collapse/0000755000176200001440000000000015202770267012064 5ustar liggesuserscollapse/tests/0000755000176200001440000000000015202627536013226 5ustar liggesuserscollapse/tests/testthat/0000755000176200001440000000000015202770267015066 5ustar liggesuserscollapse/tests/testthat/test-BY.R0000644000176200001440000003417014777170131016505 0ustar liggesuserscontext("BY") bmean <- base::mean bsd <- stats::sd bsum <- base::sum bmin <- base::min bmax <- base::max bscale <- base::scale # rm(list = ls()) set.seed(101) x <- rnorm(100) xNA <- x xNA[sample.int(100,20)] <- NA fuo <- sample.int(10, 100, TRUE) fo <- as.factor(sort(fuo)) fuo <- as.factor(fuo) g <- GRP(mtcars, ~ cyl + vs + am) f2uo <- sample.int(6, 32, TRUE) f2o <- as.factor(sort(f2uo)) f2uo <- as.factor(f2uo) m <- as.matrix(mtcars) mNA <- na_insert(m) mtcNA <- na_insert(mtcars) na20 <- function(x) { x[is.na(x)] <- 0 x } myscale <- function(x, na.rm = FALSE) (x - mean.default(x, na.rm = na.rm)) / bsd(x, na.rm = na.rm) mysumf <- function(x, na.rm = FALSE) c(N = bsum(!is.na(x)), Mean = bmean(x, na.rm = na.rm), SD = bsd(x, na.rm = na.rm), Min = bmin(x, na.rm = na.rm), Max = bmax(x, na.rm = na.rm)) options(warn = -1) test_that("BY.default works as intended", { for (f in list(fuo, fo)) { # No missing values expect_equal(BY(x, f, bsum), fsum(x, f)) expect_equal(BY(x, f, bsum, return = "list"), as.list(fsum(x, f))) expect_equal(BY(x, f, bmean), fmean(x, f)) expect_equal(BY(x, f, bmean, return = "list"), as.list(fmean(x, f))) # BY(x, f, bscale) expect_equal(BY(x, f, bscale, use.g.names = FALSE), fscale(x, f)) expect_equal(BY(x, f, log, use.g.names = FALSE), log(x)) expect_equal(BY(x, f, quantile), unlist(lapply(split(x, f), quantile))) expect_equal(BY(x, f, quantile, expand.wide = TRUE), t(sapply(split(x, f), quantile))) expect_equal(BY(x, f, quantile, return = "list"), lapply(split(x, f), quantile)) expect_equal(BY(x, f, quantile, return = "list", expand.wide = TRUE), lapply(split(x, f), quantile)) # This should have no effect !! # Missing values removed expect_equal(BY(xNA, f, bsum, na.rm = TRUE), na20(fsum(xNA, f))) expect_equal(BY(xNA, f, bsum, return = "list", na.rm = TRUE), as.list(na20(fsum(xNA, f)))) expect_equal(BY(xNA, f, bmean, na.rm = TRUE), fmean(xNA, f)) expect_equal(BY(xNA, f, bmean, return = "list", na.rm = TRUE), as.list(fmean(xNA, f))) expect_equal(BY(xNA, f, bscale, use.g.names = FALSE), fscale(xNA, f)) expect_equal(BY(xNA, f, quantile, na.rm = TRUE), unlist(lapply(split(xNA, f), quantile, na.rm = TRUE))) expect_equal(BY(xNA, f, quantile, expand.wide = TRUE, na.rm = TRUE), t(sapply(split(xNA, f), quantile, na.rm = TRUE))) expect_equal(BY(xNA, f, quantile, return = "list", na.rm = TRUE), lapply(split(xNA, f), quantile, na.rm = TRUE)) expect_equal(BY(xNA, f, quantile, return = "list", expand.wide = TRUE, na.rm = TRUE), lapply(split(xNA, f), quantile, na.rm = TRUE)) # This should have no effect !! # Missing values kept expect_equal(BY(xNA, f, bsum), fsum(xNA, f, na.rm = FALSE)) expect_equal(BY(xNA, f, bsum, return = "list"), as.list(fsum(xNA, f, na.rm = FALSE))) expect_equal(BY(xNA, f, bmean), fmean(xNA, f, na.rm = FALSE)) expect_equal(BY(xNA, f, bmean, return = "list"), as.list(fmean(xNA, f, na.rm = FALSE))) expect_equal(BY(xNA, f, myscale, use.g.names = FALSE), fscale(xNA, f, na.rm = FALSE)) expect_equal(BY(xNA, f, mysumf), unlist(lapply(split(xNA, f), mysumf))) expect_equal(BY(xNA, f, mysumf, expand.wide = TRUE), t(sapply(split(xNA, f), mysumf))) expect_equal(BY(xNA, f, mysumf, return = "list"), lapply(split(xNA, f), mysumf)) expect_equal(BY(xNA, f, mysumf, return = "list", expand.wide = TRUE), lapply(split(xNA, f), mysumf)) # This should have no effect !! } }) test_that("BY.matrix works as intended", { for (f in list(g, f2uo, f2o)) { # No missing values expect_equal(BY(m, f, bsum), fsum(m, f)) expect_equal(BY(m, f, bsum, return = "data.frame"), qDF(fsum(m, f))) expect_equal(BY(m, f, bmean), fmean(m, f)) expect_equal(BY(m, f, bmean, return = "data.frame"), qDF(fmean(m, f))) expect_true(all_obj_equal(BY(m, f, bscale), BY(m, f, bscale, use.g.names = FALSE), fscale(m, f))) expect_true(all_obj_equal(BY(m, f, log), BY(m, f, log, use.g.names = FALSE), log(m))) # Missing values kept expect_equal(BY(mNA, f, bsum), fsum(mNA, f, na.rm = FALSE)) expect_equal(BY(mNA, f, bsum, return = "data.frame"), qDF(fsum(mNA, f, na.rm = FALSE))) expect_equal(BY(mNA, f, bmean), fmean(mNA, f, na.rm = FALSE)) expect_equal(BY(mNA, f, bmean, return = "data.frame"), qDF(fmean(mNA, f, na.rm = FALSE))) } for (f in list(f2uo, f2o)) { expect_equal(BY(m, f, quantile), qM(lapply(mctl(m, names = TRUE), function(x) unlist(lapply(split(x, f), quantile))))) expect_equal(setDimnames(BY(m, f, quantile, expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mctl(m, names = TRUE), function(x) do.call(rbind, lapply(split(x, f), quantile)))), NULL)) expect_equal(BY(m, f, quantile, return = "data.frame"), qDF(qM(lapply(mctl(m, names = TRUE), function(x) unlist(lapply(split(x, f), quantile)))))) expect_equal(unname(BY(m, f, quantile, return = "data.frame", expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mctl(m, names = TRUE), function(x) do.call(rbind, lapply(split(x, f), quantile))))))) # Missing values removed expect_equal(BY(mNA, f, bsum, na.rm = TRUE), na20(fsum(mNA, f))) expect_equal(BY(mNA, f, bsum, return = "data.frame", na.rm = TRUE), qDF(na20(fsum(mNA, f)))) expect_equal(BY(mNA, f, bmean, na.rm = TRUE), fmean(mNA, f)) expect_equal(BY(mNA, f, bmean, return = "data.frame", na.rm = TRUE), qDF(fmean(mNA, f))) expect_true(all_obj_equal(BY(mNA, f, bscale), BY(mNA, f, bscale, use.g.names = FALSE), fscale(mNA, f))) expect_true(all_obj_equal(BY(mNA, f, log), BY(mNA, f, log, use.g.names = FALSE), log(mNA))) expect_equal(BY(mNA, f, quantile, na.rm = TRUE), qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f), quantile, na.rm = TRUE))))) expect_equal(setDimnames(BY(mNA, f, quantile, expand.wide = TRUE, na.rm = TRUE), NULL), setDimnames(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f), quantile, na.rm = TRUE)))), NULL)) expect_equal(BY(mNA, f, quantile, return = "data.frame", na.rm = TRUE), qDF(qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f), quantile, na.rm = TRUE)))))) expect_equal(unname(BY(mNA, f, quantile, return = "data.frame", expand.wide = TRUE, na.rm = TRUE)), unname(qDF(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f), quantile, na.rm = TRUE))))))) # Missing values kept expect_equal(BY(mNA, f, mysumf), qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f), mysumf))))) expect_equal(setDimnames(BY(mNA, f, mysumf, expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f), mysumf)))), NULL)) expect_equal(BY(mNA, f, mysumf, return = "data.frame"), qDF(qM(lapply(mctl(mNA, names = TRUE), function(x) unlist(lapply(split(x, f), mysumf)))))) expect_equal(unname(BY(mNA, f, mysumf, return = "data.frame", expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mctl(mNA, names = TRUE), function(x) do.call(rbind, lapply(split(x, f), mysumf))))))) } }) test_that("BY.data.frame works as intended", { condsetrn <- function(x) if(is.null(rownames(x))) setRownames(x) else x for (f in list(g, f2uo, f2o)) { # No missing values expect_equal(BY(mtcars, f, bsum), fsum(mtcars, f)) expect_equal(BY(mtcars, f, bsum, return = "matrix"), condsetrn(qM(fsum(mtcars, f)))) expect_equal(BY(mtcars, f, bmean), fmean(mtcars, f)) expect_equal(BY(mtcars, f, bmean, return = "matrix"), condsetrn(qM(fmean(mtcars, f)))) # BY(mtcars, f, bscale) expect_equal(BY(mtcars, f, bscale, use.g.names = FALSE), fscale(mtcars, f)) expect_equal(BY(mtcars, f, log, use.g.names = FALSE), log(mtcars)) # Missing values removed expect_equal(BY(mtcNA, f, bsum, na.rm = TRUE), na20(fsum(mtcNA, f))) expect_equal(BY(mtcNA, f, bsum, return = "matrix", na.rm = TRUE), condsetrn(na20(qM(fsum(mtcNA, f))))) expect_equal(BY(mtcNA, f, bmean, na.rm = TRUE), fmean(mtcNA, f)) expect_equal(BY(mtcNA, f, bmean, return = "matrix", na.rm = TRUE), condsetrn(qM(fmean(mtcNA, f)))) expect_equal(BY(mtcNA, f, bscale, use.g.names = FALSE), fscale(mtcNA, f)) expect_equal(BY(mtcNA, f, log, use.g.names = FALSE), log(mtcNA)) # Missing values kept expect_equal(BY(mtcNA, f, bsum), fsum(mtcNA, f, na.rm = FALSE)) expect_equal(BY(mtcNA, f, bsum, return = "matrix"), condsetrn(qM(fsum(mtcNA, f, na.rm = FALSE)))) expect_equal(BY(mtcNA, f, bmean), fmean(mtcNA, f, na.rm = FALSE)) expect_equal(BY(mtcNA, f, bmean, return = "matrix"), condsetrn(qM(fmean(mtcNA, f, na.rm = FALSE)))) } for (f in list(f2uo, f2o)) { # No missing values expect_equal(BY(mtcars, f, quantile), qDF(qM(lapply(mtcars, function(x) unlist(lapply(split(x, f), quantile)))))) expect_equal(unname(BY(mtcars, f, quantile, expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mtcars, function(x) do.call(rbind, lapply(split(x, f), quantile))))))) expect_equal(BY(mtcars, f, quantile, return = "matrix"), qM(lapply(mtcars, function(x) unlist(lapply(split(x, f), quantile))))) expect_equal(setDimnames(BY(mtcars, f, quantile, return = "matrix", expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mtcars, function(x) do.call(rbind, lapply(split(x, f), quantile)))), NULL)) # Missing values removed expect_equal(BY(mtcNA, f, quantile, na.rm = TRUE), qDF(qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f), quantile, na.rm = TRUE)))))) expect_equal(unname(BY(mtcNA, f, quantile, expand.wide = TRUE, na.rm = TRUE)), unname(qDF(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f), quantile, na.rm = TRUE))))))) expect_equal(BY(mtcNA, f, quantile, return = "matrix", na.rm = TRUE), qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f), quantile, na.rm = TRUE))))) expect_equal(setDimnames(BY(mtcNA, f, quantile, return = "matrix", expand.wide = TRUE, na.rm = TRUE), NULL), setDimnames(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f), quantile, na.rm = TRUE)))), NULL)) # Missing values kept expect_equal(BY(mtcNA, f, mysumf), qDF(qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f), mysumf)))))) expect_equal(unname(BY(mtcNA, f, mysumf, expand.wide = TRUE)), unname(qDF(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f), mysumf))))))) expect_equal(BY(mtcNA, f, mysumf, return = "matrix"), qM(lapply(mtcNA, function(x) unlist(lapply(split(x, f), mysumf))))) expect_equal(setDimnames(BY(mtcNA, f, mysumf, return = "matrix", expand.wide = TRUE), NULL), setDimnames(do.call(cbind, lapply(mtcNA, function(x) do.call(rbind, lapply(split(x, f), mysumf)))), NULL)) } }) test_that("Output type is as expected", { expect_true(is.atomic(BY(x, fuo, bsum))) expect_true(is.atomic(BY(xNA, fuo, bsum, na.rm = TRUE))) expect_true(is.matrix(BY(mtcars, g, bsum, return = "matrix"))) expect_true(is.data.frame(BY(m, g, bsum, return = "data.frame"))) # BY(mtcars, g, quantile, expand.wide = TRUE, return = "list") expect_equal(BY(mtcars, g, quantile, return = "list", expand.wide = TRUE), BY(m, g, quantile, return = "list", expand.wide = TRUE)) }) test_that("BY matrix <> data.frame conversions run seamlessly", { expect_equal(BY(mtcars, g, bsum, return = "matrix"), BY(m, g, bsum)) expect_equal(BY(mtcars, g, bsum, return = "matrix", use.g.names = FALSE), BY(m, g, bsum, use.g.names = FALSE)) expect_equal(BY(m, g, bsum, return = "data.frame"), BY(mtcars, g, bsum)) expect_equal(BY(m, g, bsum, return = "data.frame", use.g.names = FALSE), BY(mtcars, g, bsum, use.g.names = FALSE)) expect_equal(BY(mtcars, g, log, return = "matrix"), BY(m, g, log)) expect_equal(BY(mtcars, g, log, return = "matrix", use.g.names = FALSE), BY(m, g, log, use.g.names = FALSE)) expect_equal(BY(m, g, log, return = "data.frame"), BY(mtcars, g, log)) expect_equal(BY(m, g, log, return = "data.frame", use.g.names = FALSE), BY(mtcars, g, log, use.g.names = FALSE)) expect_equal(BY(mtcars, g, quantile, return = "matrix"), BY(m, g, quantile)) expect_equal(BY(mtcars, g, quantile, return = "matrix", use.g.names = FALSE), BY(m, g, quantile, use.g.names = FALSE)) expect_equal(BY(m, g, quantile, return = "data.frame"), BY(mtcars, g, quantile)) expect_equal(BY(m, g, quantile, return = "data.frame", use.g.names = FALSE), BY(mtcars, g, quantile, use.g.names = FALSE)) }) test_that("BY produces errors for wrong input", { expect_error(BY(~bla, g, bsum)) # Not supported type expect_error(BY(1, g, bsum)) # This only gives a warning in gsplit: g is too long expect_error(BY(x, g, bsum)) # This only gives a warning in gsplit: g is too short expect_error(BY(letters, sample.int(5, length(letters), TRUE), bsum)) # wrong type expect_error(BY(x, f, sum2)) # unknown object expect_error(BY(x, f, "sum2")) # unknown object expect_error(BY(x, f, log, bla = 1)) # unknown function argument expect_error(BY(x, f, bsum, return = "bla")) # unknown return option expect_error(BY(m, g, sum2)) # unknown object expect_error(BY(m, g, "sum2")) # unknown object expect_error(BY(m, g, log, bla = 1)) # unknown function argument expect_error(BY(m, g, bsum, return = "bla")) # unknown return option expect_error(BY(mtcars, g, sum2)) # unknown object expect_error(BY(mtcars, g, "sum2")) # unknown object expect_error(BY(mtcars, g, log, bla = 1)) # unknown function argument expect_error(BY(mtcars, g, bsum, return = "bla")) # unknown return option expect_error(BY(mtcars, ~g, bsum)) # Not supported type expect_error(BY(m, ~g, bsum)) # Not supported type expect_error(BY(x, ~g, bsum)) # Not supported type }) test_that("no row-names are generated for data.table's (only)", { mtcDT <- qDT(mtcars) for(FUN in list(bsum, quantile, identity)) { expect_false(is.character(attr(BY(mtcDT, g, FUN), "row.names"))) if(!identical(FUN, identity)) { expect_true(is.character(attr(BY(mtcDT, g, FUN, return = "data.frame"), "row.names"))) expect_true(is.character(dimnames(BY(mtcDT, g, FUN, return = "matrix"))[[1L]])) } expect_false(is.character(attr(BY(mtcDT, g, FUN, use.g.names = FALSE), "row.names"))) expect_false(is.character(attr(BY(mtcDT, g, FUN, use.g.names = FALSE, return = "data.frame"), "row.names"))) expect_false(is.character(dimnames(BY(mtcDT, g, FUN, use.g.names = FALSE, return = "matrix"))[[1L]])) } }) options(warn = 1) collapse/tests/testthat/test-whichv.R0000644000176200001440000001046714777170131017466 0ustar liggesuserscontext("anyv, allv, whichv, setv, copyv etc.") # d <- replace_NA(wlddev, cols = 9:13) test_that("whichv works well", { expect_identical(whichv(wlddev$country, "Chad"), which(wlddev$country == "Chad")) expect_identical(whichv(wlddev$country, "Chad", invert = TRUE), which(wlddev$country != "Chad")) expect_identical(whichNA(wlddev$PCGDP), which(is.na(wlddev$PCGDP))) expect_identical(whichNA(wlddev$PCGDP, invert = TRUE), which(!is.na(wlddev$PCGDP))) expect_identical(whichv(is.na(wlddev$PCGDP), FALSE), which(!is.na(wlddev$PCGDP))) }) test_that("anyv, allv and whichv work properly", { for(i in seq_along(wlddev)) { vec <- .subset2(wlddev, i) v <- vec[trunc(runif(1L, 1L, length(vec)))] if(is.na(v)) v <- flast(vec) expect_identical(which(vec == v), whichv(vec, v)) if(!anyNA(vec)) expect_identical(which(vec != v), whichv(vec, v, TRUE)) expect_identical(all(vec == v), allv(vec, v)) expect_identical(any(vec == v), anyv(vec, v)) vecNA <- is.na(vec) expect_identical(which(vecNA), whichNA(vec)) expect_identical(which(!vecNA), whichNA(vec, TRUE)) expect_identical(all(vecNA), allNA(vec)) expect_identical(any(vecNA), anyNA(vec)) } if(identical(Sys.getenv("NCRAN"), "TRUE")) { expect_true(allv(rep(0.0004, 1000), 0.0004)) expect_false(allv(rep(0.0004, 1000), 0.0005)) } }) if(requireNamespace("data.table", quietly = TRUE)) { wldcopy <- data.table::copy(wlddev) mtccopy <- data.table::copy(mtcars) test_that("setv and copyv work properly", { for(FUN in list(copyv, setv)) { for(i in seq_along(wlddev)) { # print(i) vec <- .subset2(wlddev, i) v <- vec[trunc(runif(1L, 1L, length(vec)))] r <- vec[trunc(runif(1L, 1L, length(vec)))] if(is.na(v)) v <- flast(vec) vl <- vec == v nvl <- vec != v vna <- is.na(vec) expect_identical(FUN(vec, v, r), replace(vec, vl, r)) expect_identical(FUN(vec, which(vl), r, vind1 = TRUE), replace(vec, which(vl), r)) expect_identical(FUN(vec, 10:1000, r), replace(vec, 10:1000, r)) expect_identical(FUN(vec, NA, r), replace(vec, vna, r)) expect_identical(FUN(vec, vl, r), replace(vec, vl, r)) expect_identical(FUN(vec, 258L, r, vind1 = TRUE), replace(vec, 258L, r)) expect_identical(FUN(vec, vl, r, invert = TRUE), replace(vec, !vl, r)) expect_identical(FUN(vec, which(nvl), r), replace(vec, which(nvl), r)) expect_error(FUN(vec, which(vl), r, invert = TRUE, vind1 = TRUE)) # expect_error(FUN(vec, which(nvl), r, invert = TRUE)) if(anyNA(vl)) { setv(vl, NA, FALSE) setv(nvl, NA, FALSE) } expect_identical(FUN(vec, v, vec), replace(vec, vl, vec[vl])) expect_identical(FUN(vec, NA, vec), replace(vec, vna, vec[vna])) expect_identical(FUN(vec, vl, vec), replace(vec, vl, vec[vl])) expect_identical(FUN(vec, vl, vec, invert = TRUE), replace(vec, nvl, vec[nvl])) expect_identical(FUN(vec, which(vl), vec), replace(vec, vl, vec[vl])) expect_identical(FUN(vec, which(nvl), vec), replace(vec, nvl, vec[nvl])) # expect_error(FUN(vec, which(nvl), vec, invert = TRUE)) } replr <- function(x, i, v) { x[i, ] <- v x } expect_identical(FUN(mtcars, 1, 2), replace(mtcars, mtcars == 1, 2)) expect_identical(FUN(mtcars, 1, 2, invert = TRUE), replace(mtcars, mtcars != 1, 2)) if(identical(FUN, copyv)) expect_visible(FUN(mtcars, 1, mtcars$mpg, invert = TRUE)) else expect_invisible(FUN(mtcars, 1, mtcars$mpg, invert = TRUE)) expect_identical(FUN(mtcars, 23L, mtcars$mpg, vind1 = TRUE), replr(mtcars, 23L, mtcars$mpg[23L])) expect_identical(FUN(mtcars, 3:6, mtcars$mpg), replr(mtcars, 3:6, mtcars$mpg[3:6])) expect_identical(FUN(mtcars, 23L, mtcars, vind1 = TRUE), replr(mtcars, 23L, mtcars[23L, ])) expect_identical(FUN(mtcars, 3:6, mtcars), replr(mtcars, 3:6, mtcars[3:6, ])) expect_error(FUN(mtcars, 23, mtcars$mpg[4:10])) expect_warning(FUN(mtcars, 23, mtcars[4:10])) expect_error(FUN(mtcars, 23L, mtcars$mpg[4:10], vind1 = TRUE)) expect_warning(FUN(mtcars, 23L, mtcars[4:10], vind1 = TRUE)) expect_error(FUN(mtcars, 3:6, mtcars$mpg[4:10])) expect_warning(FUN(mtcars, 3:6, mtcars[4:10])) if(identical(FUN, copyv)) { expect_identical(wlddev, wldcopy) expect_identical(mtcars, mtccopy) } } }) wlddev <- wldcopy mtcars <- mtccopy } collapse/tests/testthat/test-fmedian.R0000644000176200001440000012162614777170131017601 0ustar liggesuserscontext("fmedian and fnth") bmean <- base::mean bsum <- base::sum bmin <- base::min bmax <- base::max bmedian <- stats::median # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- as.integer(round(10*abs(rnorm(100)))) # -> Numeric precision issues in R wdat <- as.integer(round(10*abs(rnorm(32)))) xNA <- x wNA <- w xNA[sample.int(100,20)] <- NA wNA[is.na(xNA)] <- NA # only missing weights if x also missing f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27, 1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" nth <- function(x, n, na.rm = FALSE) { if(na.rm) { if(n > 1) n <- (n-1)/(length(x)-1L) x <- na_rm(x) if(!length(x)) return(NA_real_) } else { if(anyNA(x)) return(NA_real_) } if(n < 1) { n <- as.integer((length(x)-1L)*n)+1L if(n < 2L) return(bmin(x)) } sort(x, partial = n)[n] } wnth <- function(x, n = 0.5, w, na.rm = FALSE, ties = "mean") { cc <- complete.cases(x, w) if(na.rm) { x <- x[cc] w <- w[cc] if(!length(x)) return(NA_real_) } else if(!all(cc)) return(NA_real_) sumwh <- bsum(w) * n if(sumwh == 0) return(NA_real_) if(length(x) < 2L) return(x) lp1 <- function(x) if(length(x)) x[length(x)] + 1L else 1L mean2 <- function(x) bsum(x) / length(x) o <- radixorder(x) csumw <- base::cumsum(w[o]) if(csumw[1L] > sumwh) return(x[o[1L]]) switch(ties, mean = mean2(x[o[lp1(which(csumw < sumwh)):lp1(which(csumw <= sumwh))]]), min = x[o[lp1(which(csumw < sumwh))]], max = x[o[lp1(which(csumw <= sumwh))]]) } wmedian <- function(x, w, na.rm = FALSE) wnth(x, 0.5, w, na.rm, "mean") # matrixStats::weightedMedian(x, w, ties = ties) -> doesn't always properly average if ties = "mean"... for (FUN in 1:2) { if(FUN == 2L) { if(Sys.getenv("OMP") == "TRUE") { fmedian <- function(x, ...) collapse::fmedian(x, ..., nthreads = 2L) } else break } test_that("fmedian performs like base::median", { for(t in c(1L, 5:9)) { # All quantile methods should give the same median value estimate expect_equal(fmedian(NA, ties = t), as.double(bmedian(NA))) expect_equal(fmedian(NA, na.rm = FALSE, ties = t), as.double(bmedian(NA))) expect_equal(fmedian(1, ties = t), bmedian(1, na.rm = TRUE)) expect_equal(fmedian(1:3, ties = t), bmedian(1:3, na.rm = TRUE)) expect_equal(fmedian(-1:1, ties = t), bmedian(-1:1, na.rm = TRUE)) expect_equal(fmedian(1, na.rm = FALSE, ties = t), bmedian(1)) expect_equal(fmedian(1:3, na.rm = FALSE, ties = t), bmedian(1:3)) expect_equal(fmedian(-1:1, na.rm = FALSE, ties = t), bmedian(-1:1)) expect_equal(fmedian(x, ties = t), bmedian(x, na.rm = TRUE)) expect_equal(fmedian(x, na.rm = FALSE, ties = t), bmedian(x)) expect_equal(fmedian(xNA, na.rm = FALSE, ties = t), bmedian(xNA)) expect_equal(fmedian(xNA, ties = t), bmedian(xNA, na.rm = TRUE)) expect_equal(fmedian(mtcars, ties = t), fmedian(m)) expect_equal(fmedian(m, ties = t), dapply(m, bmedian, na.rm = TRUE)) expect_equal(fmedian(m, na.rm = FALSE, ties = t), dapply(m, bmedian)) expect_equal(fmedian(mNA, na.rm = FALSE, ties = t), dapply(mNA, bmedian)) expect_equal(fmedian(mNA, ties = t), dapply(mNA, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, ties = t), dapply(mtcars, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, na.rm = FALSE, ties = t), dapply(mtcars, bmedian)) expect_equal(fmedian(mtcNA, na.rm = FALSE, ties = t), dapply(mtcNA, bmedian)) expect_equal(fmedian(mtcNA, ties = t), dapply(mtcNA, bmedian, na.rm = TRUE)) expect_equal(fmedian(x, f, ties = t), BY(x, f, bmedian, na.rm = TRUE)) expect_equal(fmedian(x, f, na.rm = FALSE, ties = t), BY(x, f, bmedian)) expect_equal(fmedian(xNA, f, na.rm = FALSE, ties = t), BY(xNA, f, bmedian)) expect_equal(fmedian(xNA, f, ties = t), BY(xNA, f, bmedian, na.rm = TRUE)) expect_equal(fmedian(m, g, ties = t), BY(m, g, bmedian, na.rm = TRUE)) expect_equal(fmedian(m, g, na.rm = FALSE, ties = t), BY(m, g, bmedian)) expect_equal(fmedian(mNA, g, na.rm = FALSE, ties = t), BY(mNA, g, bmedian)) expect_equal(fmedian(mNA, g, ties = t), BY(mNA, g, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, g, ties = t), BY(mtcars, g, bmedian, na.rm = TRUE)) expect_equal(fmedian(mtcars, g, na.rm = FALSE, ties = t), BY(mtcars, g, bmedian)) expect_equal(fmedian(mtcNA, g, na.rm = FALSE, ties = t), BY(mtcNA, g, bmedian)) expect_equal(fmedian(mtcNA, g, ties = t), BY(mtcNA, g, bmedian, na.rm = TRUE)) } }) test_that("fmedian performs like fmedian with weights all equal", { expect_equal(fmedian(NA), fmedian(NA, w = 1)) expect_equal(fmedian(NA, na.rm = FALSE), fmedian(NA, w = 1, na.rm = FALSE)) expect_equal(fmedian(1), fmedian(1, w = 3)) expect_equal(fmedian(1:3), fmedian(1:3, w = rep(1,3))) expect_equal(fmedian(-1:1), fmedian(-1:1, w = rep(4.2,3))) expect_equal(fmedian(1, na.rm = FALSE), fmedian(1, w = 5, na.rm = FALSE)) expect_equal(fmedian(1:3, na.rm = FALSE), fmedian(1:3, w = rep(1, 3), na.rm = FALSE)) expect_equal(fmedian(-1:1, na.rm = FALSE), fmedian(-1:1, w = rep(12, 3), na.rm = FALSE)) expect_equal(fmedian(x), fmedian(x, w = rep(1,100))) expect_equal(fmedian(x, na.rm = FALSE), fmedian(x, w = rep(1, 100), na.rm = FALSE)) expect_equal(fmedian(xNA, na.rm = FALSE), fmedian(xNA, w = rep(5, 100), na.rm = FALSE)) expect_equal(fmedian(xNA), fmedian(xNA, w = rep(4, 100))) expect_equal(fmedian(m), fmedian(m, w = rep(6587, 32))) expect_equal(fmedian(m, na.rm = FALSE), fmedian(m, w = rep(6587, 32), na.rm = FALSE)) expect_equal(fmedian(mNA, na.rm = FALSE), fmedian(mNA, w = rep(6587, 32), na.rm = FALSE)) expect_equal(fmedian(mNA), fmedian(mNA, w = rep(6587, 32))) expect_equal(fmedian(mtcars), fmedian(mtcars, w = rep(6787, 32))) expect_equal(fmedian(mtcars, na.rm = FALSE), fmedian(mtcars, w = rep(6787, 32), na.rm = FALSE)) expect_equal(fmedian(mtcNA, na.rm = FALSE), fmedian(mtcNA, w = rep(6787, 32), na.rm = FALSE)) expect_equal(fmedian(mtcNA), fmedian(mtcNA, w = rep(6787, 32))) expect_equal(fmedian(x, f), fmedian(x, f, rep(547,100))) expect_equal(fmedian(x, f, na.rm = FALSE), fmedian(x, f, rep(6, 100), na.rm = FALSE)) expect_equal(fmedian(xNA, f, na.rm = FALSE), fmedian(xNA, f, rep(52,100), na.rm = FALSE)) expect_equal(fmedian(xNA, f), fmedian(xNA, f, rep(5997456,100))) expect_equal(fmedian(m, g), fmedian(m, g, rep(546,32))) expect_equal(fmedian(m, g, na.rm = FALSE), fmedian(m, g, rep(1,32), na.rm = FALSE)) expect_equal(fmedian(mNA, g, na.rm = FALSE), fmedian(mNA, g, rep(5,32), na.rm = FALSE)) expect_equal(fmedian(mNA, g), fmedian(mNA, g, rep(1,32))) expect_equal(fmedian(mtcars, g), fmedian(mtcars, g, rep(53,32))) expect_equal(fmedian(mtcars, g, na.rm = FALSE), fmedian(mtcars, g, rep(546,32), na.rm = FALSE)) expect_equal(fmedian(mtcNA, g, na.rm = FALSE), fmedian(mtcNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fmedian(mtcNA, g), fmedian(mtcNA, g, rep(999,32))) }) test_that("fmedian with weights performs like wmedian (defined above)", { # complete weights expect_equal(fmedian(NA, w = 1), wmedian(NA_real_, 1)) expect_equal(fmedian(NA, w = 1, na.rm = FALSE), wmedian(NA_real_, 1)) expect_equal(fmedian(1, w = 1), wmedian(1, w = 1)) expect_equal(fmedian(1:3, w = 1:3), wmedian(1:3, 1:3)) expect_equal(fmedian(-1:1, w = 1:3), wmedian(-1:1, 1:3)) expect_equal(fmedian(1, w = 1, na.rm = FALSE), wmedian(1, 1)) expect_equal(fmedian(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wmedian(1:3, c(0.99,3454,1.111))) expect_equal(fmedian(-1:1, w = 1:3, na.rm = FALSE), wmedian(-1:1, 1:3)) expect_equal(fmedian(x, w = w), wmedian(x, w)) expect_equal(fmedian(x, w = w, na.rm = FALSE), wmedian(x, w)) expect_equal(fmedian(xNA, w = w, na.rm = FALSE), wmedian(xNA, w)) expect_equal(fmedian(xNA, w = w), wmedian(xNA, w, na.rm = TRUE)) expect_equal(fmedian(mtcars, w = wdat), fmedian(m, w = wdat)) expect_equal(fmedian(m, w = wdat), dapply(m, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(m, w = wdat, na.rm = FALSE), dapply(m, wmedian, wdat)) expect_equal(fmedian(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wmedian, wdat)) expect_equal(fmedian(mNA, w = wdat), dapply(mNA, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(mtcars, w = wdat), dapply(mtcars, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wmedian, wdat)) expect_equal(fmedian(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wmedian, wdat)) expect_equal(fmedian(mtcNA, w = wdat), dapply(mtcNA, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(x, f, w), BY(x, f, wmedian, w)) expect_equal(fmedian(x, f, w, na.rm = FALSE), BY(x, f, wmedian, w)) expect_equal(fmedian(xNA, f, w, na.rm = FALSE), BY(xNA, f, wmedian, w)) expect_equal(fmedian(xNA, f, w), BY(xNA, f, wmedian, w, na.rm = TRUE)) expect_equal(fmedian(m, g, wdat), BY(m, gf, wmedian, wdat)) expect_equal(fmedian(m, g, wdat, na.rm = FALSE), BY(m, gf, wmedian, wdat)) expect_equal(fmedian(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wmedian, wdat)) expect_equal(fmedian(mNA, g, wdat), BY(mNA, gf, wmedian, wdat, na.rm = TRUE)) expect_equal(fmedian(mtcars, g, wdat), BY(mtcars, gf, wmedian, wdat)) expect_equal(fmedian(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, gf, wmedian, wdat)) expect_equal(fmedian(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, gf, wmedian, wdat)) expect_equal(fmedian(mtcNA, g, wdat), BY(mtcNA, gf, wmedian, wdat, na.rm = TRUE)) # missing weights: Only supported if x is also missing... expect_equal(fmedian(NA, w = NA), wmedian(NA_real_, NA_real_)) expect_equal(fmedian(NA, w = NA, na.rm = FALSE), wmedian(NA_real_, NA_real_)) expect_equal(fmedian(xNA, w = wNA, na.rm = FALSE), wmedian(xNA, wNA)) expect_equal(fmedian(xNA, w = wNA), wmedian(xNA, wNA, na.rm = TRUE)) expect_equal(fmedian(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wmedian, wNA)) expect_equal(fmedian(xNA, f, wNA), BY(xNA, f, wmedian, wNA, na.rm = TRUE)) }) test_that("fmedian performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmedian(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g), simplify = FALSE))) }) test_that("fmedian with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmedian(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fmedian with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmedian(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmedian(xNA, f, wNA), simplify = FALSE))) }) test_that("fmedian handles special values in the right way", { expect_equal(fmedian(NA), NA_real_) expect_equal(fmedian(NaN), NaN) expect_equal(fmedian(Inf), Inf) expect_equal(fmedian(-Inf), -Inf) expect_equal(fmedian(TRUE), 1) expect_equal(fmedian(FALSE), 0) expect_equal(fmedian(NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(NaN, na.rm = FALSE), NaN) expect_equal(fmedian(Inf, na.rm = FALSE), Inf) expect_equal(fmedian(-Inf, na.rm = FALSE), -Inf) expect_equal(fmedian(TRUE, na.rm = FALSE), 1) expect_equal(fmedian(FALSE, na.rm = FALSE), 0) expect_equal(fmedian(c(1,NA)), 1) expect_equal(fmedian(c(1,NaN)), 1) expect_equal(fmedian(c(1,Inf)), Inf) expect_equal(fmedian(c(1,-Inf)), -Inf) expect_equal(fmedian(c(FALSE,TRUE)), 0.5) expect_equal(fmedian(c(FALSE,FALSE)), 0) expect_equal(fmedian(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fmedian(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fmedian(c(FALSE,TRUE), na.rm = FALSE), 0.5) expect_equal(fmedian(c(FALSE,FALSE), na.rm = FALSE), 0) }) test_that("fmedian with weights handles special values in the right way", { expect_equal(fmedian(NA, w = 1), NA_real_) expect_equal(fmedian(NaN, w = 1), NaN) expect_equal(fmedian(Inf, w = 1), Inf) expect_equal(fmedian(-Inf, w = 1), -Inf) expect_equal(fmedian(TRUE, w = 1), 1) expect_equal(fmedian(FALSE, w = 1), 0) expect_equal(fmedian(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fmedian(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fmedian(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fmedian(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fmedian(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fmedian(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fmedian(NA, w = NA), NA_real_) expect_equal(fmedian(NaN, w = NA), NA_real_) expect_equal(fmedian(Inf, w = NA), NA_real_) expect_equal(fmedian(-Inf, w = NA), NA_real_) expect_equal(fmedian(TRUE, w = NA), NA_real_) expect_equal(fmedian(FALSE, w = NA), NA_real_) expect_equal(fmedian(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmedian(FALSE, w = NA, na.rm = FALSE), NA_real_) # expect_equal(fmedian(1:3, w = c(1,Inf,3)), 2) # wmedian gives 2 !!!!!! # expect_equal(fmedian(1:3, w = c(1,-Inf,3)), 1) # wmedian gives 3 !!!!!! # expect_equal(fmedian(1:3, w = c(1,Inf,3), na.rm = FALSE), 2) # expect_equal(fmedian(1:3, w = c(1,-Inf,3), na.rm = FALSE), 3) }) test_that("fmedian produces errors for wrong input", { expect_warning(fmedian("a")) expect_equal(fmedian(NA_character_), NA_real_) expect_error(fmedian(mNAc)) expect_error(fmedian(mNAc, f)) expect_error(fmedian(1:2,1:3)) expect_error(fmedian(m,1:31)) expect_error(fmedian(mtcars,1:31)) expect_error(fmedian(mtcars, w = 1:31)) expect_warning(fmedian("a", w = 1)) expect_error(fmedian(1:2, w = 1:3)) expect_equal(fmedian(NA_character_, w = 1), NA_real_) expect_error(fmedian(mNAc, w = wdat)) expect_error(fmedian(mNAc, f, wdat)) expect_error(fmedian(mNA, w = 1:33)) expect_error(fmedian(1:2,1:2, 1:3)) expect_error(fmedian(m,1:32,1:20)) expect_error(fmedian(mtcars,1:32,1:10)) expect_error(fmedian(1:2, w = c("a","b"))) expect_error(fmedian(wlddev)) expect_error(fmedian(wlddev, w = wlddev$year)) expect_error(fmedian(wlddev, wlddev$iso3c)) expect_error(fmedian(wlddev, wlddev$iso3c, wlddev$year)) }) } # fnth g <- GRP(mtcars, ~ cyl) gf <- as_factor_GRP(g) for (FUN in 1:2) { if(FUN == 2L) { if(Sys.getenv("OMP") == "TRUE") { fnth <- function(x, ...) collapse::fnth(x, ..., nthreads = 2L) } else break } test_that("fnth gives a proper lower/upper/average weighted median on complete data", { expect_equal(fnth(1:3, w = c(3,1,1), ties = "mean"), 1) expect_true(all_identical( fnth(1:3, w = c(3,1,1), ties = "mean"), fnth(1:3, w = c(3,1,1), ties = "min"), fnth(1:3, w = c(3,1,1), ties = "max"), fnth(1:3, w = c(3,1,1), ties = "mean", na.rm = FALSE), fnth(1:3, w = c(3,1,1), ties = "min", na.rm = FALSE), fnth(1:3, w = c(3,1,1), ties = "max", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "mean"), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "min"), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "max"), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "mean", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(3,1,1), use.g.names = FALSE, ties = "max", na.rm = FALSE))) expect_identical(fnth(1:3, w = c(1,1,3), ties = "mean"), 3) expect_true(all_identical( fnth(1:3, w = c(1,1,3), ties = "mean"), fnth(1:3, w = c(1,1,3), ties = "min"), fnth(1:3, w = c(1,1,3), ties = "max"), fnth(1:3, w = c(1,1,3), ties = "mean", na.rm = FALSE), fnth(1:3, w = c(1,1,3), ties = "min", na.rm = FALSE), fnth(1:3, w = c(1,1,3), ties = "max", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "mean"), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "min"), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "max"), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "mean", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(1:3, g = rep(1,3), w = c(1,1,3), use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = c(0.15, 0.1, 0.2, 0.3, 0.25) y = seq_len(5) # [order(rnorm(5))] expect_identical(fnth(y, w = w, ties = "mean"), 4) expect_true(all_identical(4, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "mean"), fnth(y, w = w, ties = "min"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min"), fnth(y, w = w, ties = "max"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max"), fnth(y, w = w, na.rm = FALSE, ties = "mean"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE, ties = "mean"), fnth(y, w = w, ties = "min", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(y, w = w, ties = "max", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = c(0.15, 0.2, 0.3, 0.25) y = seq_len(4) # [order(rnorm(4))] expect_identical(fnth(y, w = w, ties = "mean"), 3) expect_true(all_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "mean"), fnth(y, w = w, ties = "min"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min"), fnth(y, w = w, ties = "max"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max"), fnth(y, w = w, na.rm = FALSE, ties = "mean"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE, ties = "mean"), fnth(y, w = w, ties = "min", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(y, w = w, ties = "max", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = rep(0.25, 4) expect_identical(fnth(y, w = w, ties = "mean"), 2.5) expect_identical(2.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "mean")) expect_identical(fnth(y, w = w, ties = "min"), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min")) expect_identical(fnth(y, w = w, ties = "max"), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max")) expect_identical(fnth(y, w = w, na.rm = FALSE, ties = "mean"), 2.5) expect_identical(2.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE, ties = "mean")) expect_identical(fnth(y, w = w, ties = "min", na.rm = FALSE), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "max", na.rm = FALSE), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE)) w = rep(0.25, 5) y = seq_len(5) #[order(rnorm(5))] expect_identical(fnth(y, w = w), 3) expect_true(all_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "mean"), fnth(y, w = w, ties = "min"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min"), fnth(y, w = w, ties = "max"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max"), fnth(y, w = w, na.rm = FALSE, ties = "mean"), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE, ties = "mean"), fnth(y, w = w, ties = "min", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE), fnth(y, w = w, ties = "max", na.rm = FALSE), fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE))) w = c(0.25, 0.25, 0, 0.25, 0.25) expect_identical(fnth(y, w = w, ties = "mean"), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "mean")) expect_identical(fnth(y, w = w, ties = "min"), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min")) expect_identical(fnth(y, w = w, ties = "max"), 4) expect_identical(4, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max")) expect_identical(fnth(y, w = w, na.rm = FALSE, ties = "mean"), 3) expect_identical(3, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE, ties = "mean")) expect_identical(fnth(y, w = w, ties = "min", na.rm = FALSE), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "max", na.rm = FALSE), 4) expect_identical(4, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE)) w = c(0.25, 0.25, 0, 0, 0.25, 0.25) y = seq_len(6) # [order(rnorm(6))] expect_identical(fnth(y, w = w, ties = "mean"), 3.5) expect_identical(3.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "mean")) expect_identical(fnth(y, w = w, ties = "min"), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min")) expect_identical(fnth(y, w = w, ties = "max"), 5) expect_identical(5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max")) expect_identical(fnth(y, w = w, na.rm = FALSE, ties = "mean"), 3.5) expect_identical(3.5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, na.rm = FALSE, ties = "mean")) expect_identical(fnth(y, w = w, ties = "min", na.rm = FALSE), 2) expect_identical(2, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "min", na.rm = FALSE)) expect_identical(fnth(y, w = w, ties = "max", na.rm = FALSE), 5) expect_identical(5, fnth(y, g = rep(1, length(y)), w = w, use.g.names = FALSE, ties = "max", na.rm = FALSE)) }) test_that("fnth performs like nth (defined above)", { n = 2 expect_error(fnth(NA, n)) expect_error(fnth(NA, n, na.rm = FALSE)) expect_error(fnth(1, n)) expect_equal(fnth(1:3, n), nth(1:3, n, na.rm = TRUE)) expect_equal(fnth(-1:1, n), nth(-1:1, n, na.rm = TRUE)) expect_equal(fnth(1:3, n, na.rm = FALSE), nth(1:3, n)) expect_equal(fnth(-1:1, n, na.rm = FALSE), nth(-1:1, n)) expect_equal(fnth(x, n), nth(x, n, na.rm = TRUE)) expect_equal(fnth(x, n, na.rm = FALSE), nth(x, n)) expect_equal(fnth(xNA, n, na.rm = FALSE), nth(xNA, n)) expect_equal(fnth(xNA, n), nth(xNA, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n), fnth(m, n)) expect_equal(fnth(m, n), dapply(m, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(m, n, na.rm = FALSE), dapply(m, nth, n)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mNA, n, na.rm = FALSE), dapply(mNA, nth, n)) expect_equal(fnth(mNA, n), dapply(mNA, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n), dapply(mtcars, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mtcars, n, na.rm = FALSE), dapply(mtcars, nth, n)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mtcNA, n, na.rm = FALSE), dapply(mtcNA, nth, n)) expect_equal(fnth(mtcNA, n), dapply(mtcNA, nth, n, na.rm = TRUE)) f2 <- as.factor(rep(1:10, each = 10)[order(rnorm(100))]) expect_equal(fnth(x, n, f2), BY(x, f2, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(x, n, f2, na.rm = FALSE), BY(x, f2, nth, n)) # failed on oldrel-windows-ix86+x86_64 g2 <- GRP(rep(1:2, each = 16)[order(rnorm(32))]) expect_equal(fnth(m, n, g2), BY(m, g2, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(m, n, g2, na.rm = FALSE), BY(m, g2, nth, n)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mtcars, n, g2), BY(mtcars, g2, nth, n, na.rm = TRUE)) # failed on oldrel-windows-ix86+x86_64 expect_equal(fnth(mtcars, n, g2, na.rm = FALSE), BY(mtcars, g2, nth, n)) # failed on oldrel-windows-ix86+x86_64 for(i in 1:5) { n = runif(1, min = 1, max = 999) / 1000 # Probability needed for nth to work with groups expect_equal(fnth(1:3, n, ties = "min"), nth(1:3, n, na.rm = TRUE)) expect_equal(fnth(-1:1, n, ties = "min"), nth(-1:1, n, na.rm = TRUE)) expect_equal(fnth(1:3, n, na.rm = FALSE, ties = "min"), nth(1:3, n)) expect_equal(fnth(-1:1, n, na.rm = FALSE, ties = "min"), nth(-1:1, n)) expect_equal(fnth(x, n, ties = "min"), nth(x, n, na.rm = TRUE)) expect_equal(fnth(x, n, na.rm = FALSE, ties = "min"), nth(x, n)) expect_equal(fnth(xNA, n, na.rm = FALSE, ties = "min"), nth(xNA, n)) expect_equal(fnth(xNA, n, ties = "min"), nth(xNA, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, ties = "min"), fnth(m, n, ties = "min")) expect_equal(fnth(m, n, ties = "min"), dapply(m, nth, n, na.rm = TRUE)) expect_equal(fnth(m, n, na.rm = FALSE, ties = "min"), dapply(m, nth, n)) expect_equal(fnth(mNA, n, na.rm = FALSE, ties = "min"), dapply(mNA, nth, n)) expect_equal(fnth(mNA, n, ties = "min"), dapply(mNA, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, ties = "min"), dapply(mtcars, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, na.rm = FALSE, ties = "min"), dapply(mtcars, nth, n)) expect_equal(fnth(mtcNA, n, na.rm = FALSE, ties = "min"), dapply(mtcNA, nth, n)) expect_equal(fnth(mtcNA, n, ties = "min"), dapply(mtcNA, nth, n, na.rm = TRUE)) expect_equal(fnth(xNA, n, f2, na.rm = FALSE, ties = "min"), BY(xNA, f2, nth, n)) expect_equal(fnth(xNA, n, f2, ties = "min"), BY(xNA, f2, nth, n, na.rm = TRUE)) expect_equal(fnth(m, n, g, ties = "min"), BY(m, g, nth, n, na.rm = TRUE)) expect_equal(fnth(m, n, g, na.rm = FALSE, ties = "min"), BY(m, g, nth, n)) expect_equal(fnth(mNA, n, g, na.rm = FALSE, ties = "min"), BY(mNA, g, nth, n)) expect_equal(fnth(mNA, n, g, ties = "min"), BY(mNA, g, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, g, ties = "min"), BY(mtcars, g, nth, n, na.rm = TRUE)) expect_equal(fnth(mtcars, n, g, na.rm = FALSE, ties = "min"), BY(mtcars, g, nth, n)) expect_equal(fnth(mtcNA, n, g, na.rm = FALSE, ties = "min"), BY(mtcNA, g, nth, n)) expect_equal(fnth(mtcNA, n, g, ties = "min"), BY(mtcNA, g, nth, n, na.rm = TRUE)) } }) test_that("fnth matrix and data.frame method work alike", { for(i in 1:3) { n = runif(1, min = 1, max = 999) / 1000 expect_equal(fnth(mtcars, n, ties = "min"), fnth(m, n, ties = "min")) expect_equal(fnth(mtcars, n), fnth(m, n)) expect_equal(fnth(mtcars, n, ties = "max"), fnth(m, n, ties = "max")) expect_equal(fnth(mtcNA, n, ties = "min"), fnth(mNA, n, ties = "min")) expect_equal(fnth(mtcNA, n), fnth(mNA, n)) expect_equal(fnth(mtcNA, n, ties = "max"), fnth(mNA, n, ties = "max")) expect_equal(qM(fnth(mtcars, n, g, ties = "min")), fnth(m, n, g, ties = "min")) expect_equal(qM(fnth(mtcars, n, g)), fnth(m, n, g)) expect_equal(qM(fnth(mtcars, n, g, ties = "max")), fnth(m, n, g, ties = "max")) expect_equal(qM(fnth(mtcNA, n, g, ties = "min")), fnth(mNA, n, g, ties = "min")) expect_equal(qM(fnth(mtcNA, n, g)), fnth(mNA, n, g)) expect_equal(qM(fnth(mtcNA, n, g, ties = "max")), fnth(mNA, n, g, ties = "max")) expect_equal(fnth(mtcars, n, w = wdat, ties = "min"), fnth(m, n, w = wdat, ties = "min")) expect_equal(fnth(mtcars, n, w = wdat), fnth(m, n, w = wdat)) expect_equal(fnth(mtcars, n, w = wdat, ties = "max"), fnth(m, n, w = wdat, ties = "max")) expect_equal(fnth(mtcNA, n, w = wdat, ties = "min"), fnth(mNA, n, w = wdat, ties = "min")) expect_equal(fnth(mtcNA, n, w = wdat), fnth(mNA, n, w = wdat)) expect_equal(fnth(mtcNA, n, w = wdat, ties = "max"), fnth(mNA, n, w = wdat, ties = "max")) expect_equal(qM(fnth(mtcars, n, g, wdat, ties = "min")), fnth(m, n, g, wdat, ties = "min")) expect_equal(qM(fnth(mtcars, n, g, wdat)), fnth(m, n, g, wdat)) expect_equal(qM(fnth(mtcars, n, g, wdat, ties = "max")), fnth(m, n, g, wdat, ties = "max")) expect_equal(qM(fnth(mtcNA, n, g, wdat, ties = "min")), fnth(mNA, n, g, wdat, ties = "min")) expect_equal(qM(fnth(mtcNA, n, g, wdat)), fnth(mNA, n, g, wdat)) expect_equal(qM(fnth(mtcNA, n, g, wdat, ties = "max")), fnth(mNA, n, g, wdat, ties = "max")) } }) test_that("fnth performs like fnth with weights all equal", { for(t in c("min","max")) { # "mean", # already tested above.. # for(i in 1:3) { n = 0.5 # round(runif(1, min = 1, max = 999) / 1000, 3) # other numbers than 0.5 do not work and cannot work.. expect_equal(fnth(NA, n, ties = t), fnth(NA, n, w = 1, ties = t)) expect_equal(fnth(NA, n, na.rm = FALSE, ties = t), fnth(NA, n, w = 1, na.rm = FALSE, ties = t)) expect_equal(fnth(1, n, ties = t), fnth(1, n, w = 3, ties = t)) expect_equal(fnth(1:3, n, ties = t), fnth(1:3, n, w = rep(1,3), ties = t)) expect_equal(fnth(-1:1, n, ties = t), fnth(-1:1, n, w = rep(4.2,3), ties = t)) expect_equal(fnth(1, n, na.rm = FALSE, ties = t), fnth(1, n, w = 5, na.rm = FALSE, ties = t)) expect_equal(fnth(1:3, n, na.rm = FALSE, ties = t), fnth(1:3, n, w = rep(1, 3), na.rm = FALSE, ties = t)) expect_equal(fnth(-1:1, n, na.rm = FALSE, ties = t), fnth(-1:1, n, w = rep(12, 3), na.rm = FALSE, ties = t)) expect_equal(fnth(x, n, ties = t), fnth(x, n, w = rep(1,100), ties = t)) expect_equal(fnth(x, n, na.rm = FALSE, ties = t), fnth(x, n, w = rep(1, 100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, na.rm = FALSE, ties = t), fnth(xNA, n, w = rep(5, 100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, ties = t), fnth(xNA, n, w = rep(4, 100), ties = t)) expect_equal(fnth(m, n, ties = t), fnth(m, n, w = rep(6587, 32), ties = t)) expect_equal(fnth(m, n, na.rm = FALSE, ties = t), fnth(m, n, w = rep(6587, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, na.rm = FALSE, ties = t), fnth(mNA, n, w = rep(6587, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, ties = t), fnth(mNA, n, w = rep(6587, 32), ties = t)) expect_equal(fnth(mtcars, n, ties = t), fnth(mtcars, n, w = rep(6787, 32), ties = t)) expect_equal(fnth(mtcars, n, na.rm = FALSE, ties = t), fnth(mtcars, n, w = rep(6787, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, na.rm = FALSE, ties = t), fnth(mtcNA, n, w = rep(6787, 32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, ties = t), fnth(mtcNA, n, w = rep(6787, 32), ties = t)) expect_equal(fnth(x, n, f, ties = t), fnth(x, n, f, rep(547,100), ties = t)) expect_equal(fnth(x, n, f, na.rm = FALSE, ties = t), fnth(x, n, f, rep(6, 100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, f, na.rm = FALSE, ties = t), fnth(xNA, n, f, rep(52,100), na.rm = FALSE, ties = t)) expect_equal(fnth(xNA, n, f, ties = t), fnth(xNA, n, f, rep(5997456,100), ties = t)) expect_equal(fnth(m, n, g, ties = t), fnth(m, n, g, rep(546,32), ties = t)) expect_equal(fnth(m, n, g, na.rm = FALSE, ties = t), fnth(m, n, g, rep(1,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, g, na.rm = FALSE, ties = t), fnth(mNA, n, g, rep(5,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mNA, n, g, ties = t), fnth(mNA, n, g, rep(1,32), ties = t)) expect_equal(fnth(mtcars, n, g, ties = t), fnth(mtcars, n, g, rep(53,32), ties = t)) expect_equal(fnth(mtcars, n, g, na.rm = FALSE, ties = t), fnth(mtcars, n, g, rep(546,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, g, na.rm = FALSE, ties = t), fnth(mtcNA, n, g, rep(1,32), na.rm = FALSE, ties = t)) expect_equal(fnth(mtcNA, n, g, ties = t), fnth(mtcNA, n, g, rep(999,32), ties = t)) #} } }) test_that("fnth with weights performs like wnth (defined above)", { for(t in c("mean","min","max")) { # print(t) for(i in 1:3) { n = round(runif(1, min = 1, max = 999) / 1000, 3) # complete weights expect_equal(fnth(NA, n, w = 1, ties = t), wnth(NA_real_, n, 1, ties = t)) expect_equal(fnth(NA, n, w = 1, na.rm = FALSE, ties = t), wnth(NA_real_, n, 1, ties = t)) expect_equal(fnth(1, n, w = 1, ties = t), wnth(1, n, w = 1, ties = t)) expect_equal(fnth(1:3, n, w = 1:3, ties = t), wnth(1:3, n, 1:3, ties = t)) expect_equal(fnth(-1:1, n, w = 1:3, ties = t), wnth(-1:1, n, 1:3, ties = t)) expect_equal(fnth(1, n, w = 1, na.rm = FALSE, ties = t), wnth(1, n, 1, ties = t)) expect_equal(fnth(1:3, n, w = c(0.99,3454,1.111), na.rm = FALSE, ties = t), wnth(1:3, n, c(0.99,3454,1.111), ties = t)) expect_equal(fnth(-1:1, n, w = 1:3, na.rm = FALSE, ties = t), wnth(-1:1, n, 1:3, ties = t)) expect_equal(fnth(x, n, w = w, ties = t), wnth(x, n, w, ties = t)) expect_equal(fnth(x, n, w = w, na.rm = FALSE, ties = t), wnth(x, n, w, ties = t)) expect_equal(fnth(xNA, n, w = w, na.rm = FALSE, ties = t), wnth(xNA, n, w, ties = t)) expect_equal(fnth(xNA, n, w = w, ties = t), wnth(xNA, n, w, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, w = wdat, ties = t), fnth(m, n, w = wdat, ties = t)) expect_equal(fnth(m, n, w = wdat, ties = t), dapply(m, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(m, n, w = wdat, na.rm = FALSE, ties = t), dapply(m, wnth, n, wdat, ties = t)) expect_equal(fnth(mNA, n, w = wdat, na.rm = FALSE, ties = t), dapply(mNA, wnth, n, wdat, ties = t)) expect_equal(fnth(mNA, n, w = wdat, ties = t), dapply(mNA, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, w = wdat, ties = t), dapply(mtcars, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, w = wdat, na.rm = FALSE, ties = t), dapply(mtcars, wnth, n, wdat, ties = t)) expect_equal(fnth(mtcNA, n, w = wdat, na.rm = FALSE, ties = t), dapply(mtcNA, wnth, n, wdat, ties = t)) expect_equal(fnth(mtcNA, n, w = wdat, ties = t), dapply(mtcNA, wnth, n, wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(x, n, f, w, ties = t), BY(x, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(x, n, f, w, na.rm = FALSE, ties = t), BY(x, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(xNA, n, f, w, na.rm = FALSE, ties = t), BY(xNA, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(xNA, n, f, w, ties = t), BY(xNA, f, wnth, n = n, w = w, na.rm = TRUE, ties = t)) expect_equal(fnth(m, n, g, wdat, ties = t), BY(m, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(m, n, g, wdat, na.rm = FALSE, ties = t), BY(m, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mNA, n, g, wdat, na.rm = FALSE, ties = t), BY(mNA, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mNA, n, g, wdat, ties = t), BY(mNA, gf, wnth, n = n, w = wdat, na.rm = TRUE, ties = t)) expect_equal(fnth(mtcars, n, g, wdat, ties = t), BY(mtcars, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mtcars, n, g, wdat, na.rm = FALSE, ties = t), BY(mtcars, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mtcNA, n, g, wdat, na.rm = FALSE, ties = t), BY(mtcNA, gf, wnth, n = n, w = wdat, ties = t)) expect_equal(fnth(mtcNA, n, g, wdat, ties = t), BY(mtcNA, gf, wnth, w = wdat, n = n, na.rm = TRUE, ties = t)) # missing weights: Only supported if x is also missing... expect_equal(fnth(NA, n, w = NA, ties = t), wnth(NA_real_, n, NA_real_, ties = t)) expect_equal(fnth(NA, n, w = NA, na.rm = FALSE, ties = t), wnth(NA_real_, n, NA_real_, ties = t)) expect_equal(fnth(xNA, n, w = wNA, na.rm = FALSE, ties = t), wnth(xNA, n, wNA, ties = t)) expect_equal(fnth(xNA, n, w = wNA, ties = t), wnth(xNA, n, wNA, na.rm = TRUE, ties = t)) expect_equal(fnth(xNA, n, f, wNA, na.rm = FALSE, ties = t), BY(xNA, f, wnth, n = n, w = w, ties = t)) expect_equal(fnth(xNA, n, f, wNA, ties = t), BY(xNA, f, wnth, n = n, w = w, na.rm = TRUE, ties = t)) } } }) test_that("fnth properly deals with missing data", { expect_equal(fnth(NA), NA_real_) expect_equal(fnth(NA, na.rm = FALSE), NA_real_) expect_equal(fnth(rep(NA, 2), w = 1:2), NA_real_) expect_equal(fnth(rep(NA, 2), w = 1:2), NA_real_) expect_equal(fnth(NA, w = 1), NA_real_) expect_equal(fnth(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fnth(1), 1) expect_equal(fnth(1, na.rm = FALSE), 1) expect_error(fnth(1:2, w = rep(NA, 2))) expect_error(fnth(1:2, w = c(1, NA))) expect_error(fnth(1:2, w = c(NA, 1))) }) } collapse/tests/testthat/test-list-processing.R0000644000176200001440000001264714777170131021325 0ustar liggesuserscontext("list-processing") NCRAN <- Sys.getenv("NCRAN") == "TRUE" l <- lm(mpg ~cyl + vs + am, mtcars) # str(l, give.attr = FALSE) is.regular <- function(x) is.atomic(x) || is.list(x) test_that("atomic_elem and list_elem work well", { expect_equal(atomic_elem(l), unclass(l)[sapply(l, is.atomic)]) expect_equal(list_elem(l), unclass(l)[sapply(l, is.list)]) expect_equal(atomic_elem(l, keep.class = TRUE), `oldClass<-`(unclass(l)[sapply(l, is.atomic)], oldClass(l))) expect_equal(list_elem(l, keep.class = TRUE), `oldClass<-`(unclass(l)[sapply(l, is.list)], oldClass(l))) for(i in 1:6) expect_equal(atomic_elem(l, keep.class = TRUE, return = i), get_vars(l, is.atomic, return = i)) for(i in 1:6) expect_equal(list_elem(l, keep.class = TRUE, return = i), get_vars(l, is.list, return = i)) expect_identical(`atomic_elem<-`(l, atomic_elem(l)), l) expect_identical(`list_elem<-`(l, list_elem(l)), l) expect_error(`atomic_elem<-`(l, list_elem(l))) expect_error(`list_elem<-`(l, atomic_elem(l))) }) test_that("ldepth works well", { expect_identical(ldepth(list(mtcars), DF.as.list = FALSE), 1L) expect_identical(ldepth(list(mtcars), DF.as.list = TRUE), 2L) expect_identical(ldepth(list(mtcars, l), DF.as.list = FALSE), 3L) expect_identical(ldepth(list(mtcars, l), DF.as.list = TRUE), 3L) expect_identical(ldepth(list(list(list(mtcars)), l), DF.as.list = FALSE), 3L) expect_identical(ldepth(list(list(list(mtcars)), l), DF.as.list = TRUE), 4L) }) test_that("rapply2d works well", { l2 <- list(qM(mtcars), list(qM(mtcars), as.matrix(mtcars))) expect_equal(rapply2d(l2, fmean), rapply(l2, fmean, how = "list")) expect_equal(rapply2d(l[-length(l)], is.regular), rapply(l[-length(l)], is.regular, how = "list")) }) test_that("get_elem works well", { # Could still add more tests.. if(NCRAN) expect_true(is.matrix(get_elem(l, is.matrix))) if(NCRAN) expect_true(is.matrix(get_elem(list(list(list(l))), is.matrix))) if(NCRAN) expect_false(is.matrix(get_elem(list(list(list(l))), is.matrix, keep.tree = TRUE))) l2 <- list(list(2,list("a",1)),list(1,list("b",2))) expect_identical(get_elem(l2, is.character), list("a", "b")) expect_identical(get_elem(l2, is.character, keep.tree = TRUE), list(list(list("a")),list(list("b")))) expect_identical(get_elem(l, "residuals"), resid(l)) expect_identical(get_elem(l, "fit", regex = TRUE), fitted(l)) expect_equal(get_elem(l, "tol"), 1e-7) expect_identical(get_elem(mtcars, 1), mtcars[[1]]) expect_identical(get_elem(mtcars, 1, DF.as.list = TRUE), as.list(ss(mtcars, 1))) expect_true(length(get_elem(get_elem(l, is.matrix, invert = TRUE), is.matrix)) == 0L) expect_true(length(get_elem(get_elem(l, is.data.frame, invert = TRUE), is.data.frame)) == 0L) expect_true(length(get_elem(l, "pivot")) > 1L) expect_true(length(get_elem(get_elem(l, "pivot", invert = TRUE), "pivot")) == 0L) expect_true(length(get_elem(get_elem(l, "piv", regex = TRUE, invert = TRUE), "pivot")) == 0L) expect_true(length(get_elem(get_elem(l, "piv", regex = TRUE, invert = TRUE), "piv", regex = TRUE)) == 0L) expect_false(length(get_elem(get_elem(l, "piv", regex = TRUE, invert = TRUE), "tol")) == 0L) expect_equal(get_elem(list(list(a = 1), list(b = "a")), "b"), "a") expect_equal(get_elem(list(list(a = 1), list(b = "a")), "b", invert = TRUE), 1) expect_equal(get_elem(list(list(a = 1), list(b = "a")), "b", keep.tree = TRUE), list(list(b = "a"))) expect_equal(get_elem(list(list(a = 1), list(b = "a")), "b", invert = TRUE, keep.tree = TRUE), list(list(a = 1))) expect_equal(get_elem(list(list(a = 1), list(b = "a")), is.character), "a") expect_equal(get_elem(list(list(a = 1), list(b = "a")), is.character, invert = TRUE), 1) expect_equal(get_elem(list(list(a = 1), list(b = "a")), is.character, keep.tree = TRUE), list(list(b = "a"))) expect_equal(get_elem(list(list(a = 1), list(b = "a")), is.character, invert = TRUE, keep.tree = TRUE), list(list(a = 1))) }) if(NCRAN) test_that("reg_elem and irreg_elem work well", { expect_true(is_unlistable(reg_elem(l))) expect_false(is_unlistable(irreg_elem(l))) expect_true(is_unlistable(reg_elem(list(l), keep.tree = FALSE))) expect_true(is_unlistable(reg_elem(list(l), keep.tree = TRUE))) expect_false(is_unlistable(irreg_elem(list(l), keep.tree = FALSE))) expect_false(is_unlistable(irreg_elem(list(l), keep.tree = TRUE))) }) if(NCRAN) test_that("has_elem works well", { expect_true(has_elem(l, is.matrix)) expect_true(has_elem(l, is.data.frame)) expect_true(has_elem(l, is.data.frame, DF.as.list = TRUE)) expect_true(has_elem(l, is_categorical)) expect_false(has_elem(l, is_date)) expect_false(has_elem(l, is_qG)) expect_false(has_elem(l, "am", recursive = FALSE)) expect_false(has_elem(l, "pivot", recursive = FALSE)) expect_true(has_elem(l, "pivot")) expect_true(has_elem(l, "am", DF.as.list = TRUE)) expect_false(has_elem(l, "am")) expect_true(has_elem(l, "tol")) expect_false(has_elem(l, "mod")) expect_true(has_elem(l, "mod", regex = TRUE)) expect_true(has_elem(l, "vot", regex = TRUE)) expect_false(has_elem(l, "piv", regex = TRUE, recursive = FALSE)) }) test_that("coercions in rbindlist", { expect_true(allv(vtypes(unlist2d(list(dapply(mtcars, as.integer), mtcars), idcols = FALSE)), "double")) expect_true(allv(vtypes(unlist2d(list(mtcars, dapply(mtcars, as.integer)), idcols = FALSE)), "double")) expect_true(allv(vtypes(unlist2d(list(dapply(mtcars, as.integer), dapply(mtcars, as.integer)), idcols = FALSE)), "integer")) }) collapse/tests/testthat/test-indexing.R0000644000176200001440000001217614777170131020002 0ustar liggesuserscontext("indexing") wldi <- iby(wlddev, country, year) test_that("unindexing and reindexing work well", { expect_equal(wlddev, unindex(wldi)) expect_equal(wlddev$PCGDP, unindex(wldi$PCGDP)) expect_equal(wlddev$region, unindex(wldi$region)) expect_equal(wldi, reindex(wldi)) expect_equal(wldi$PCGDP, reindex(wldi$PCGDP)) expect_equal(wldi$region, reindex(wldi$region)) expect_equal(wldi, reindex(wlddev, ix(wldi))) expect_equal(wldi$PCGDP, reindex(wldi$PCGDP, ix(wldi$PCGDP))) expect_equal(wldi$region, reindex(wldi$region, ix(wldi$region))) }) require(magrittr) # attach here for next two tests test_that("subsetting works well", { skip_if_not_installed("magrittr") expect_equal(fsubset(wldi, iso3c %in% c("KEN", "USA", "CHN")), findex_by(fsubset(wlddev, iso3c %in% c("KEN", "USA", "CHN")), country, year)) expect_equal(fsubset(wldi, iso3c %in% c("KEN", "USA", "CHN"), country, year, PCGDP, POP), findex_by(fsubset(wlddev, iso3c %in% c("KEN", "USA", "CHN"), country, year, PCGDP, POP), country, year)) expect_equal(wldi[wldi$iso3c %in% c("KEN", "USA", "CHN"), ] %>% setRownames(), ss(wlddev, wlddev$iso3c %in% c("KEN", "USA", "CHN")) %>% findex_by(country, year) %>% dapply(`attr<-`, "label", NULL)) expect_true(all_obj_equal(wldi[.c(country, year, PCGDP, POP)], wldi[, .c(country, year, PCGDP, POP)], wlddev[.c(country, year, PCGDP, POP)] %>% findex_by(country, year))) expect_equal(wldi$PCGDP[5:1000], reindex(wlddev$PCGDP[5:1000], ix(wldi)[5:1000, ])) expect_equal(wldi$PCGDP[100], wlddev$PCGDP[100]) expect_equal(wldi$PCGDP[[100]], wlddev$PCGDP[[100]]) }) test_that("indexed data.table works well", { skip_if_not_installed("magrittr") skip_if_not_installed("data.table") library(data.table) wlddt <- qDT(wlddev) wldidt <- wlddt %>% findex_by(iso3c, year) expect_equal(unindex(wldidt[1:1000]), wlddt[1:1000]) expect_equal(unindex(wldidt[year > 2000]), wlddt[year > 2000]) expect_equal(wldidt[, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country], wlddt[, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country]) expect_equal(wldidt[, lapply(.SD, sum, na.rm = TRUE), by = country, .SDcols = .c(PCGDP, LIFEEX)], wlddt[, lapply(.SD, sum, na.rm = TRUE), by = country, .SDcols = .c(PCGDP, LIFEEX)]) expect_equal(wldidt[year > 2000, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country], wlddt[year > 2000, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country]) # 'unclass' because of 'invisible' class expect_equal(unclass(unindex(wldidt[, PCGDP_growth_5Y := G(PCGDP, 5, power = 1/5)])), unclass(wlddt[, PCGDP_growth_5Y := G(PCGDP, 5, 1, iso3c, year, power = 1/5)])) expect_equal(unindex(wldidt[, PCGDP_growth_5Y := G(PCGDP, 5, power = 1/5)][1:5]), wlddt[, PCGDP_growth_5Y := G(PCGDP, 5, 1, iso3c, year, power = 1/5)][1:5]) expect_equal(unindex(wldidt[, PCGDP_growth_5Y := G(PCGDP, 5, power = 1/5)][, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country]), wlddt[, PCGDP_growth_5Y := G(PCGDP, 5, 1, iso3c, year, power = 1/5)][, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country]) expect_equal(unclass(unindex(wldidt[, .c(PCGDP_growth_5Y, LIFEEX_growth_5Y) := lapply(slt(.SD, PCGDP, LIFEEX), G, 5, power = 1/5)])), unclass(wlddt[, .c(PCGDP_growth_5Y, LIFEEX_growth_5Y) := lapply(slt(.SD, PCGDP, LIFEEX), G, 5, 1, iso3c, year, power = 1/5)])) }) test_that("data selection by type works well", { for (FUN in list(num_vars, cat_vars, char_vars, logi_vars, fact_vars, date_vars)) expect_equal(names(FUN(wlddev)), names(FUN(wldi))) }) test_that("descriptives work well", { expect_equal(descr(wlddev), `attr<-`(descr(wldi), "name", "wlddev")) expect_equal(qsu(wlddev, pid = wlddev$country), qsu(wldi)) expect_equal(varying(wlddev, by = ~country), varying(wldi)) expect_equal(qtable(r = wlddev$region, i = wlddev$income), qtable(r = wldi$region, i = wldi$income)) expect_equal(pwcor(nv(wlddev)), pwcor(nv(wldi))) }) test_that("Id variables are properly preserved in operator methods", { wld1i <- findex_by(fsubset(wlddev, iso3c %==% "DEU"), year) GGDCii <- findex_by(GGDC10S, Variable, Country, Year) GGDCi <- findex_by(GGDC10S, Variable, Country, Year, interact.ids = FALSE) for(FUN in list(L, F, D, Dlog, G, B, W, STD)) { expect_identical(names(FUN(wld1i, cols = "PCGDP", stub = FALSE)), c("year", "PCGDP")) expect_identical(names(FUN(wld1i, cols = "PCGDP", keep.ids = FALSE, stub = FALSE)), "PCGDP") expect_identical(names(FUN(wldi, cols = "PCGDP", stub = FALSE)), c("country", "year", "PCGDP")) expect_identical(names(FUN(wldi, cols = "PCGDP", keep.ids = FALSE, stub = FALSE)), "PCGDP") expect_identical(names(FUN(GGDCi, cols = "SUM", stub = FALSE)), c("Country", "Variable", "Year", "SUM")) expect_identical(names(FUN(GGDCi, cols = "SUM", keep.ids = FALSE, stub = FALSE)), "SUM") expect_identical(names(FUN(GGDCii, cols = "SUM", stub = FALSE)), c("Country", "Variable", "Year", "SUM")) expect_identical(names(FUN(GGDCii, cols = "SUM", keep.ids = FALSE, stub = FALSE)), "SUM") } }) collapse/tests/testthat/test-psmat-psacf.R0000644000176200001440000002563214777170131020414 0ustar liggesuserscontext("psmat and psacf") # rm(list = ls()) options(warn = -1) test_that("psmat works as intended", { expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year), psmat(wlddev, PCGDP ~ iso3c, ~ year)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year), psmat(wlddev[9], wlddev$iso3c, wlddev$year)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year), psmat(wlddev, ~ iso3c, ~ year, cols = 9:12)) # without year expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c), psmat(wlddev, PCGDP ~ iso3c)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c), psmat(wlddev[9], wlddev$iso3c)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c), psmat(wlddev, ~ iso3c, cols = 9:12)) # only nid's expect_identical(psmat(wlddev$PCGDP, 216), psmat(wlddev[9], 216)) expect_identical(psmat(wlddev[9:12], 216), psmat(wlddev, 216, cols = 9:12)) # TRANSPOSE expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year, transpose = TRUE), `attr<-`(t(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year)), "transpose", TRUE)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev, PCGDP ~ iso3c, ~ year, transpose = TRUE)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev[9], wlddev$iso3c, wlddev$year, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, transpose = TRUE), psmat(wlddev, ~ iso3c, ~ year, cols = 9:12, transpose = TRUE)) # without year expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, transpose = TRUE), psmat(wlddev, PCGDP ~ iso3c, transpose = TRUE)) expect_identical(psmat(wlddev$PCGDP, wlddev$iso3c, transpose = TRUE), psmat(wlddev[9], wlddev$iso3c, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, transpose = TRUE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, transpose = TRUE), psmat(wlddev, ~ iso3c, cols = 9:12, transpose = TRUE)) # only nid's expect_identical(psmat(wlddev$PCGDP, 216, transpose = TRUE), psmat(wlddev[9], 216, transpose = TRUE)) expect_identical(psmat(wlddev[9:12], 216, transpose = TRUE), psmat(wlddev, 216, cols = 9:12, transpose = TRUE)) # LIST-OUTPUT expect_true(is.array(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year))) expect_true(is.list(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, array = FALSE))) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, array = FALSE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, array = FALSE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year, array = FALSE), psmat(wlddev, ~ iso3c, ~ year, cols = 9:12, array = FALSE)) # without year expect_identical(psmat(wlddev[9:12], wlddev$iso3c, array = FALSE), psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, array = FALSE)) expect_identical(psmat(wlddev[9:12], wlddev$iso3c, array = FALSE), psmat(wlddev, ~ iso3c, cols = 9:12, array = FALSE)) # only nid's expect_identical(psmat(wlddev[9:12], 216, array = FALSE), psmat(wlddev, 216, cols = 9:12, array = FALSE)) }) test_that("psacf works as intended", { x <- na_rm(wlddev$PCGDP) expect_equal(unclass(psacf(x, rep(1,length(x)), seq_along(x), lag.max = 12, plot = FALSE))[1:4], unclass(acf(x, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-3) expect_equal(unclass(psacf(x, rep(1,length(x)), seq_along(x), type = "covariance", lag.max = 12, gscale = FALSE, plot = FALSE))[1:4], unclass(acf(x, type = "covariance", lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-3) expect_equal(unclass(pspacf(x, rep(1,length(x)), seq_along(x), lag.max = 12, plot = FALSE))[1:4], unclass(pacf(x, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-3) dat <- na_omit(get_vars(wlddev, c(9:10,12))) expect_equal(unclass(psacf(dat, rep(1,nrow(dat)), seq_row(dat), lag.max = 12, plot = FALSE))[1:4], unclass(acf(dat, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-2) expect_equal(unclass(psacf(dat, rep(1,nrow(dat)), seq_row(dat), type = "covariance", lag.max = 12, gscale = FALSE, plot = FALSE))[1:4], unclass(acf(dat, type = "covariance", lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-2) # expect_equal(unclass(pspacf(dat, rep(1,nrow(dat)), seq_row(dat), lag.max = 12, plot = FALSE))[1:4], unclass(pacf(dat, lag.max = 12, plot = FALSE))[1:4], tolerance = 1e-2) # This is strange !!!! expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev[9], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(psacf(wlddev, ~ iso3c, ~ year, cols = 9:12, plot = FALSE))[1:4]) # equality expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) # without year expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev[9], wlddev$iso3c, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(psacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(psacf(wlddev, ~ iso3c, cols = 9:12, plot = FALSE))[1:4]) }) test_that("pspacf works as intended", { expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev[9], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, wlddev$year, plot = FALSE))[1:4], unclass(pspacf(wlddev, ~ iso3c, ~ year, cols = 9:12, plot = FALSE))[1:4]) # equality expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year, plot = FALSE))[1:4]) # without year expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev$PCGDP, wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev[9], wlddev$iso3c, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, plot = FALSE))[1:4]) expect_equal(unclass(pspacf(wlddev[9:12], wlddev$iso3c, plot = FALSE))[1:4], unclass(pspacf(wlddev, ~ iso3c, cols = 9:12, plot = FALSE))[1:4]) }) test_that("psmat gives errors for wrong input", { # wrong lengths expect_error(psmat(wlddev$PCGDP, wlddev$iso3c[-1], wlddev$year)) expect_error(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year[-1])) expect_error(psmat(wlddev[9:12], wlddev$iso3c[-1], wlddev$year)) expect_error(psmat(wlddev[9:12], wlddev$iso3c, wlddev$year[-1])) # without year expect_error(psmat(wlddev$PCGDP, wlddev$iso3c[-1])) expect_error(psmat(wlddev[9:12], wlddev$iso3c[-1])) # only nid's expect_error(psmat(wlddev$PCGDP, 218)) expect_error(psmat(wlddev[9:12], 218)) # wrong formula expect_error(psmat(wlddev, PCGDP2 ~ iso3c, ~ year)) expect_error(psmat(wlddev, PCGDP ~ iso3c2, ~ year)) expect_error(psmat(wlddev, PCGDP ~ iso3c, ~ year2)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year + bla)) # without year expect_error(psmat(wlddev, PCGDP2 ~ iso3c)) expect_error(psmat(wlddev, PCGDP ~ iso3c2)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year)) expect_error(psmat(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year)) # cols expect_error(psmat(wlddev, ~ iso3c, ~ year, cols = 14)) expect_error(psmat(wlddev, ~ iso3c, ~ year, cols = "bla")) expect_visible(psmat(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric))) expect_error(psmat(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric)[-1])) }) test_that("psacf gives errors for wrong input", { # wrong lengths expect_error(psacf(wlddev$PCGDP, wlddev$iso3c[-1], wlddev$year, plot = FALSE)) expect_error(psacf(wlddev$PCGDP, wlddev$iso3c, wlddev$year[-1], plot = FALSE)) expect_error(psacf(wlddev[9:12], wlddev$iso3c[-1], wlddev$year, plot = FALSE)) expect_error(psacf(wlddev[9:12], wlddev$iso3c, wlddev$year[-1], plot = FALSE)) # without year expect_error(psacf(wlddev$PCGDP, wlddev$iso3c[-1], plot = FALSE)) expect_error(psacf(wlddev[9:12], wlddev$iso3c[-1], plot = FALSE)) # this should give error... expect_error(psacf(wlddev$PCGDP, 218, plot = FALSE)) expect_error(psacf(wlddev[9:12], 218, plot = FALSE)) # wrong formula expect_error(psacf(wlddev, PCGDP2 ~ iso3c, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP ~ iso3c2, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP ~ iso3c, ~ year2, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c, ~ year + bla, plot = FALSE)) # without year expect_error(psacf(wlddev, PCGDP2 ~ iso3c, plot = FALSE)) expect_error(psacf(wlddev, PCGDP ~ iso3c2, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA + bla ~ iso3c, ~ year, plot = FALSE)) expect_error(psacf(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + bla, ~ year, plot = FALSE)) # cols expect_error(psacf(wlddev, ~ iso3c, ~ year, cols = 14, plot = FALSE)) expect_error(psacf(wlddev, ~ iso3c, ~ year, cols = "bla", plot = FALSE)) expect_visible(psacf(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric), plot = FALSE)) expect_error(psacf(wlddev, ~ iso3c, ~ year, cols = sapply(wlddev, is.numeric)[-1], plot = FALSE)) }) options(warn = 1) collapse/tests/testthat/test-fHDbetween-fHDwithin-HDB-HDW.R0000644000176200001440000016557015202513156023104 0ustar liggesuserscontext("fhdbetween / HDB and fhdwithin / HDW") # rm(list = ls()) # TODO: Sort out why certain tests fail... failtests = FALSE options(warn = -1) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(rep(1:10, each = 10)) g <- as.factor(rep(c(1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,7,7,7,7,7,7,7,10,10,10,10,10,10,10,10,10,10))) mtcNA <- na_insert(mtcars) mtcNA[1,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" baseresid <- function(y, X, na.rm = FALSE) { y <- qM(y) if(is.list(X)) X <- do.call(cbind, X) X <- cbind(Intercept = 1L, X) if(na.rm) { cc <- complete.cases(y, X) y <- y[cc, , drop = FALSE] X <- X[cc, , drop = FALSE] } drop(qr.resid(qr.default(X), y)) } basefitted <- function(y, X, na.rm = FALSE) { y <- qM(y) if(is.list(X)) X <- do.call(cbind, X) X <- cbind(Intercept = 1L, X) if(na.rm) { cc <- complete.cases(y, X) y <- y[cc, , drop = FALSE] X <- X[cc, , drop = FALSE] } drop(qr.fitted(qr.default(X), y)) } # fhdbetween and fhdwithin test_that("fhdbetween with one factor performs like fbetween", { expect_equal(fhdbetween(x, f), fbetween(x, f)) expect_equal(fhdbetween(x, f, na.rm = FALSE), fbetween(x, f, na.rm = FALSE)) expect_equal(fhdbetween(xNA, f, na.rm = FALSE), fbetween(xNA, f, na.rm = FALSE)) expect_equal(`attributes<-`(fhdbetween(xNA, f, fill = TRUE), NULL), fbetween(xNA, f)) expect_equal(fhdbetween(m, g), fbetween(m, g)) expect_equal(fhdbetween(m, g, na.rm = FALSE), fbetween(m, g, na.rm = FALSE)) expect_equal(fhdbetween(mNA, g, na.rm = FALSE), fbetween(mNA, g, na.rm = FALSE)) # expect_equal(fhdbetween(mNA, g, fill = TRUE), fbetween(mNA, g)) # not matching, fhdbetween matrix is not variable.wise expect_equal(fhdbetween(mtcars, g), fbetween(mtcars, g)) expect_equal(fhdbetween(mtcars, g, na.rm = FALSE), fbetween(mtcars, g, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, na.rm = FALSE), fbetween(mtcNA, g, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, variable.wise = TRUE), fbetween(mtcNA, g)) # with weights expect_equal(fhdbetween(x, f, w), fbetween(x, f, w)) expect_equal(fhdbetween(x, f, w, na.rm = FALSE), fbetween(x, f, w, na.rm = FALSE)) expect_equal(fhdbetween(xNA, f, w, na.rm = FALSE), fbetween(xNA, f, w, na.rm = FALSE)) expect_equal(`attributes<-`(fhdbetween(xNA, f, w, fill = TRUE), NULL), fbetween(xNA, f, w)) expect_equal(fhdbetween(m, g, wdat), fbetween(m, g, wdat)) expect_equal(fhdbetween(m, g, wdat, na.rm = FALSE), fbetween(m, g, wdat, na.rm = FALSE)) expect_equal(fhdbetween(mNA, g, wdat, na.rm = FALSE), fbetween(mNA, g, wdat, na.rm = FALSE)) # expect_equal(fhdbetween(mNA, g, fill = TRUE), fbetween(mNA, g)) # not matching, fhdbetween matrix is not variable.wise expect_equal(fhdbetween(mtcars, g, wdat), fbetween(mtcars, g, wdat)) expect_equal(fhdbetween(mtcars, g, wdat, na.rm = FALSE), fbetween(mtcars, g, wdat, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, wdat, na.rm = FALSE), fbetween(mtcNA, g, wdat, na.rm = FALSE)) expect_equal(fhdbetween(mtcNA, g, wdat, variable.wise = TRUE), fbetween(mtcNA, g, wdat)) }) test_that("fhdwithin with one factor performs like fwithin", { expect_equal(fhdwithin(x, f), fwithin(x, f)) expect_equal(fhdwithin(x, f, na.rm = FALSE), fwithin(x, f, na.rm = FALSE)) expect_equal(fhdwithin(xNA, f, na.rm = FALSE), fwithin(xNA, f, na.rm = FALSE)) expect_equal(`attributes<-`(fhdwithin(xNA, f, fill = TRUE), NULL), fwithin(xNA, f)) expect_equal(fhdwithin(m, g), fwithin(m, g)) expect_equal(fhdwithin(m, g, na.rm = FALSE), fwithin(m, g, na.rm = FALSE)) expect_equal(fhdwithin(mNA, g, na.rm = FALSE), fwithin(mNA, g, na.rm = FALSE)) # expect_equal(fhdwithin(mNA, g, fill = TRUE), fwithin(mNA, g)) # not matching, fhdwithin matrix is not variable.wise expect_equal(fhdwithin(mtcars, g), fwithin(mtcars, g)) expect_equal(fhdwithin(mtcars, g, na.rm = FALSE), fwithin(mtcars, g, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, na.rm = FALSE), fwithin(mtcNA, g, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, variable.wise = TRUE), fwithin(mtcNA, g)) # with weights expect_equal(fhdwithin(x, f, w), fwithin(x, f, w)) expect_equal(fhdwithin(x, f, w, na.rm = FALSE), fwithin(x, f, w, na.rm = FALSE)) expect_equal(fhdwithin(xNA, f, w, na.rm = FALSE), fwithin(xNA, f, w, na.rm = FALSE)) expect_equal(`attributes<-`(fhdwithin(xNA, f, w, fill = TRUE), NULL), fwithin(xNA, f, w)) expect_equal(fhdwithin(m, g, wdat), fwithin(m, g, wdat)) expect_equal(fhdwithin(m, g, wdat, na.rm = FALSE), fwithin(m, g, wdat, na.rm = FALSE)) expect_equal(fhdwithin(mNA, g, wdat, na.rm = FALSE), fwithin(mNA, g, wdat, na.rm = FALSE)) # expect_equal(fhdwithin(mNA, g, wdat, fill = TRUE), fwithin(mNA, g)) # not matching, wdat, fhdwithin matrix is not variable.wise expect_equal(fhdwithin(mtcars, g, wdat), fwithin(mtcars, g, wdat)) expect_equal(fhdwithin(mtcars, g, wdat, na.rm = FALSE), fwithin(mtcars, g, wdat, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, wdat, na.rm = FALSE), fwithin(mtcNA, g, wdat, na.rm = FALSE)) expect_equal(fhdwithin(mtcNA, g, wdat, variable.wise = TRUE), fwithin(mtcNA, g, wdat)) }) set.seed(101) f2 <- qF(sample.int(10, 100, TRUE)) fl <- list(f, f2) g2 <- qF(sample.int(5, 32, TRUE)) gl <- list(g, g2) # This is to fool very silly checks on CRAN scanning the code of the tests if(identical(Sys.getenv("LOCAL"), "TRUE")) demeanlist <- eval(parse(text = paste0("lfe", ":", ":", "demeanlist"))) tol <- if(identical(Sys.getenv("LOCAL"), "TRUE")) 1e-5 else 1e-4 has_fixest <- tryCatch(requireNamespace("fixest", quietly = TRUE), error = function(e) FALSE) if(has_fixest) { demean <- fixest::demean # eval(parse(text = paste0("fixest", ":", ":", "demean"))) # lfe is back on CRAN: This now also seems to produce a warning !!!!!!! if(identical(Sys.getenv("LOCAL"), "TRUE")) test_that("fhdbetween with two factors performs like demeanlist", { expect_equal(fhdbetween(x, fl), demeanlist(x, fl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(xNA, fl), demeanlist(xNA, fl, means = TRUE, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(xNA, fl, fill = TRUE)) expect_equal(fhdbetween(m, gl), demeanlist(m, gl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(mNA, gl, na.rm = FALSE), demeanlist(mNA, gl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(mNA, gl), demeanlist(mNA, gl, means = TRUE, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(mNA, gl, fill = TRUE)) expect_equal(fhdbetween(mtcars, gl), demeanlist(mtcars, gl, means = TRUE), tolerance = tol) expect_equal(fhdbetween(mtcNA, gl, na.rm = FALSE), demeanlist(mtcNA, gl, means = TRUE), tolerance = tol) expect_equal(setRownames(fhdbetween(mtcNA, gl)), demeanlist(mtcNA, gl, means = TRUE, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(mtcNA, gl, fill = TRUE)) expect_visible(fhdbetween(mtcNA, gl, variable.wise = TRUE)) # With weights expect_equal(fhdbetween(x, fl, w), drop(x - demean(x, fl, weights = w)), tolerance = tol) expect_equal(unattrib(fhdbetween(xNA, fl, w)), drop(na_rm(xNA) - demean(xNA, fl, weights = w, na.rm = TRUE)), tolerance = tol) expect_visible(fhdbetween(xNA, fl, w, fill = TRUE)) expect_equal(fhdbetween(m, gl, wdat), m - demean(m, gl, weights = wdat), tolerance = tol) expect_equal(fhdbetween(mNA, gl, wdat, na.rm = FALSE), demeanlist(mNA, gl, weights = wdat, means = TRUE), tolerance = tol) expect_equal(unattrib(fhdbetween(mNA, gl, wdat)), unattrib(na_omit(mNA) - demean(mNA, gl, weights = wdat, na.rm = TRUE)), tolerance = tol) expect_visible(fhdbetween(mNA, gl, wdat, fill = TRUE)) # This one is a bug in demean and will be fixed soon... expect_equal(fhdbetween(mtcars, gl, wdat), mtcars %c-% demean(mtcars, gl, weights = wdat), tolerance = tol) expect_equal(fhdbetween(mtcNA, gl, na.rm = FALSE), demeanlist(mtcNA, gl, weights = wdat, means = TRUE), tolerance = tol) # Same here expect_equal(unattrib(fhdbetween(mtcNA, gl, wdat)), unattrib(na_omit(mtcNA) %c-% demean(mtcNA, gl, weights = wdat, na.rm = TRUE)), tolerance = tol) expect_visible(fhdbetween(mtcNA, gl, wdat, fill = TRUE)) expect_visible(fhdbetween(mtcNA, gl, wdat, variable.wise = TRUE)) }) test_that("fhdwithin with two factors performs like demean", { expect_equal(fhdwithin(x, fl), drop(demean(x, fl)), tolerance = tol) expect_equal(unattrib(fhdwithin(xNA, fl)), unattrib(demean(xNA, fl, na.rm = TRUE)), tolerance = tol) expect_identical(length(fhdwithin(xNA, fl, fill = TRUE)), length(xNA)) expect_equal(unattrib(fhdwithin(m, gl)), unattrib(demean(m, gl)), tolerance = tol) # expect_equal(fhdwithin(mNA, gl, na.rm = FALSE), demean(mNA, gl), tolerance = tol) # can break R expect_equal(unattrib(fhdwithin(mNA, gl)), unattrib(demean(mNA, gl, na.rm = TRUE)), tolerance = tol) expect_identical(nrow(fhdwithin(mNA, gl, fill = TRUE)), nrow(mNA)) expect_equal(unattrib(fhdwithin(mtcars, gl)), unattrib(demean(mtcars, gl)), tolerance = tol) # expect_equal(fhdwithin(mtcNA, gl, na.rm = FALSE), demean(mtcNA, gl), tolerance = tol) # can break R expect_equal(unattrib(fhdwithin(mtcNA, gl)), unattrib(demean(mtcNA, gl, na.rm = TRUE)), tolerance = tol) expect_equal(fnrow(fhdwithin(mtcNA, gl, fill = TRUE)), fnrow(mtcNA)) expect_identical(fnrow(fhdwithin(mtcNA, gl, variable.wise = TRUE)), fnrow(mtcNA)) # With weights expect_equal(fhdwithin(x, fl, w), drop(demean(x, fl, weights = w)), tolerance = tol) expect_equal(unattrib(fhdwithin(xNA, fl, w)), unattrib(demean(xNA, fl, weights = w, na.rm = TRUE)), tolerance = tol) expect_identical(length(fhdwithin(xNA, fl, w, fill = TRUE)), length(xNA)) expect_equal(unattrib(fhdwithin(m, gl, wdat)), unattrib(demean(m, gl, weights = wdat)), tolerance = tol) # expect_equal(fhdwithin(mNA, gl, wdat, na.rm = FALSE), demean(mNA, gl, weights = wdat), tolerance = tol) # can break R cc <- complete.cases(mNA) expect_equal(unattrib(fhdwithin(mNA, gl, wdat)), unattrib(demean(mNA[cc, ], lapply(gl, .subset, cc), weights = wdat[cc])), tolerance = tol) expect_identical(nrow(fhdwithin(mNA, gl, wdat, fill = TRUE)), nrow(mNA)) # Smae here, bug to be fixed in demean() expect_equal(unattrib(fhdwithin(mtcars, gl, wdat)), unattrib(demean(mtcars, gl, weights = wdat)), tolerance = tol) # expect_equal(fhdwithin(mtcNA, gl, wdat, na.rm = FALSE), demean(mtcNA, gl, weights = wdat), tolerance = tol) # can break R # Also bug expect_equal(unattrib(fhdwithin(mtcNA, gl, wdat)), unattrib(demean(mtcNA, gl, weights = wdat, na.rm = TRUE)), tolerance = 1e-3) expect_equal(fnrow(fhdwithin(mtcNA, gl, wdat, fill = TRUE)), fnrow(mtcNA)) expect_identical(fnrow(fhdwithin(mtcNA, gl, wdat, variable.wise = TRUE)), fnrow(mtcNA)) }) } x2 <- 3 * x + rnorm(100) test_that("fhdbetween with only continuous variables performs like basefitted (defined above)", { expect_equal(fhdbetween(x, x2), basefitted(x, x2), tolerance = tol) expect_equal(`attr<-`(fhdbetween(xNA, x2), "na.rm", NULL), basefitted(xNA, x2, na.rm = TRUE), tolerance = tol) expect_visible(fhdbetween(xNA, x2, fill = TRUE)) expect_equal(fhdbetween(m, m), fhdbetween(m, mtcars), tolerance = tol) expect_equal(fhdbetween(m, m), basefitted(m, m), tolerance = tol) expect_equal(`attr<-`(fhdbetween(mNA, m, lm.method = "qr"), "na.rm", NULL), basefitted(mNA, m, na.rm = TRUE), tolerance = tol) expect_equal(fhdbetween(mNA, m, fill = TRUE, lm.method = "qr"), fhdbetween(mNA, mtcars, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdbetween(mtcars, mtcars), fhdbetween(mtcars, m), tolerance = tol) expect_equal(fhdbetween(mtcars, mtcars), qDF(basefitted(mtcars, mtcars)), tolerance = tol) expect_equal(`attr<-`(fhdbetween(mtcNA, mtcars, lm.method = "qr"), "na.rm", NULL), qDF(basefitted(mtcNA, mtcars, na.rm = TRUE)), tolerance = tol) expect_equal(fhdbetween(mtcNA, mtcars, fill = TRUE, lm.method = "qr"), fhdbetween(mtcNA, m, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdbetween(mtcNA, mtcars, variable.wise = TRUE), fhdbetween(mtcNA, m, variable.wise = TRUE), tolerance = tol) }) test_that("fhdwithin with only continuous variables performs like baseresid (defined above)", { expect_equal(fhdwithin(x, x2), baseresid(x, x2), tolerance = tol) expect_equal(`attr<-`(fhdwithin(xNA, x2), "na.rm", NULL), baseresid(xNA, x2, na.rm = TRUE), tolerance = tol) expect_visible(fhdwithin(xNA, x2, fill = TRUE)) expect_equal(fhdwithin(m, m), fhdwithin(m, mtcars), tolerance = tol) expect_equal(fhdwithin(m, m), baseresid(m, m), tolerance = tol) expect_equal(`attr<-`(fhdwithin(mNA, m, lm.method = "qr"), "na.rm", NULL), baseresid(mNA, m, na.rm = TRUE), tolerance = tol) expect_equal(fhdwithin(mNA, m, fill = TRUE, lm.method = "qr"), fhdwithin(mNA, mtcars, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdwithin(mtcars, mtcars), fhdwithin(mtcars, m), tolerance = tol) expect_equal(fhdwithin(mtcars, mtcars), qDF(baseresid(mtcars, mtcars)), tolerance = tol) expect_equal(`attr<-`(fhdwithin(mtcNA, mtcars, lm.method = "qr"), "na.rm", NULL), qDF(baseresid(mtcNA, mtcars, na.rm = TRUE)), tolerance = tol) expect_equal(fhdwithin(mtcNA, mtcars, fill = TRUE, lm.method = "qr"), fhdwithin(mtcNA, m, fill = TRUE, lm.method = "qr"), tolerance = tol) expect_equal(fhdwithin(mtcNA, mtcars, variable.wise = TRUE), fhdwithin(mtcNA, m, variable.wise = TRUE), tolerance = tol) }) if(has_fixest) { data <- wlddev data$year <- qF(data$year) data <- get_vars(data, c("iso3c","year","region","income","PCGDP","LIFEEX","ODA")) ww <- abs(rnorm(fnrow(data))) wi <- abs(rnorm(fnrow(iris))) test_that("fhdbetween with multiple variables performs like lm", { expect_equal(fhdbetween(iris$Sepal.Length, iris[-1]), `names<-`(fitted(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(fhdbetween(iris[1], iris[-1])[[1]], `names<-`(fitted(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdbetween(iris[1:2], iris[-(1:2)]))), fitted(lm(cbind(Sepal.Length, Sepal.Width) ~., iris)), tolerance = tol) expect_equal(`attributes<-`(fhdbetween(data$PCGDP, data[-5]), NULL), `attributes<-`(fitted(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdbetween(data$PCGDP, data[-5], fill = TRUE)) expect_equal(`attributes<-`(fhdbetween(data[5], data[-5])[[1]], NULL), `attributes<-`(fitted(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdbetween(data[5], data[-5], fill = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data[-(5:6)]))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data))), tolerance = tol) expect_visible(fhdbetween(data[5:6], data[-(5:6)], fill = TRUE)) expect_visible(fhdbetween(data[5:6], data[-(5:6)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:7], data[-(5:7)]))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX, ODA) ~., data))), tolerance = tol) expect_visible(fhdbetween(data[5:7], data[-(5:7)], fill = TRUE)) expect_visible(fhdbetween(data[5:7], data[-(5:7)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data$ODA))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data[5:7]))), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], fill = TRUE), fhdbetween(data[5:6], data$ODA, fill = TRUE), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], variable.wise = TRUE), fhdbetween(data[5:6], data$ODA, variable.wise = TRUE), tolerance = tol) # With weights expect_equal(fhdbetween(iris$Sepal.Length, iris[-1], wi), `names<-`(fitted(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(fhdbetween(iris[1], iris[-1], wi)[[1]], `names<-`(fitted(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdbetween(iris[1:2], iris[-(1:2)], wi))), fitted(lm(cbind(Sepal.Length, Sepal.Width) ~., iris, weights = wi)), tolerance = tol) expect_equal(`attributes<-`(fhdbetween(data$PCGDP, data[-5], ww), NULL), `attributes<-`(fitted(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdbetween(data$PCGDP, data[-5], ww, fill = TRUE)) expect_equal(`attributes<-`(fhdbetween(data[5], data[-5], ww)[[1]], NULL), `attributes<-`(fitted(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdbetween(data[5], data[-5], ww, fill = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data[-(5:6)], ww))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdbetween(data[5:6], data[-(5:6)], ww, fill = TRUE)) expect_visible(fhdbetween(data[5:6], data[-(5:6)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:7], data[-(5:7)], ww))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX, ODA) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdbetween(data[5:7], data[-(5:7)], ww, fill = TRUE)) expect_visible(fhdbetween(data[5:7], data[-(5:7)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdbetween(data[5:6], data$ODA, ww))), setRownames(fitted(lm(cbind(PCGDP, LIFEEX) ~., data[5:7], weights = ww))), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], ww, fill = TRUE), fhdbetween(data[5:6], data$ODA, ww, fill = TRUE), tolerance = tol) expect_equal(fhdbetween(data[5:6], data[7], ww, variable.wise = TRUE), fhdbetween(data[5:6], data$ODA, ww, variable.wise = TRUE), tolerance = tol) }) test_that("fhdwithin with multiple variables performs like lm", { expect_equal(fhdwithin(iris$Sepal.Length, iris[-1]), `names<-`(resid(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(fhdwithin(iris[1], iris[-1])[[1]], `names<-`(resid(lm(Sepal.Length ~., iris)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdwithin(iris[1:2], iris[-(1:2)]))), resid(lm(cbind(Sepal.Length, Sepal.Width) ~., iris)), tolerance = tol) expect_equal(`attributes<-`(fhdwithin(data$PCGDP, data[-5]), NULL), `attributes<-`(resid(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdwithin(data$PCGDP, data[-5], fill = TRUE)) expect_equal(`attributes<-`(fhdwithin(data[5], data[-5])[[1]], NULL), `attributes<-`(resid(lm(PCGDP ~., data)), NULL), tolerance = tol) expect_visible(fhdwithin(data[5], data[-5], fill = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data[-(5:6)]))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data))), tolerance = tol) expect_visible(fhdwithin(data[5:6], data[-(5:6)], fill = TRUE)) expect_visible(fhdwithin(data[5:6], data[-(5:6)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:7], data[-(5:7)]))), setRownames(resid(lm(cbind(PCGDP, LIFEEX, ODA) ~., data))), tolerance = tol) expect_visible(fhdwithin(data[5:7], data[-(5:7)], fill = TRUE)) expect_visible(fhdwithin(data[5:7], data[-(5:7)], variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data$ODA))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data[5:7]))), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], fill = TRUE), fhdwithin(data[5:6], data$ODA, fill = TRUE), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], variable.wise = TRUE), fhdwithin(data[5:6], data$ODA, variable.wise = TRUE), tolerance = tol) # With weights expect_equal(fhdwithin(iris$Sepal.Length, iris[-1], wi), `names<-`(resid(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(fhdwithin(iris[1], iris[-1], wi)[[1]], `names<-`(resid(lm(Sepal.Length ~., iris, weights = wi)), NULL), tolerance = tol) expect_equal(setRownames(qM(fhdwithin(iris[1:2], iris[-(1:2)], wi))), resid(lm(cbind(Sepal.Length, Sepal.Width) ~., iris, weights = wi)), tolerance = tol) expect_equal(`attributes<-`(fhdwithin(data$PCGDP, data[-5], ww), NULL), `attributes<-`(resid(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdwithin(data$PCGDP, data[-5], ww, fill = TRUE)) expect_equal(`attributes<-`(fhdwithin(data[5], data[-5], ww)[[1]], NULL), `attributes<-`(resid(lm(PCGDP ~., data, weights = ww)), NULL), tolerance = tol) expect_visible(fhdwithin(data[5], data[-5], ww, fill = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data[-(5:6)], ww))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdwithin(data[5:6], data[-(5:6)], ww, fill = TRUE)) expect_visible(fhdwithin(data[5:6], data[-(5:6)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:7], data[-(5:7)], ww))), setRownames(resid(lm(cbind(PCGDP, LIFEEX, ODA) ~., data, weights = ww))), tolerance = tol) expect_visible(fhdwithin(data[5:7], data[-(5:7)], ww, fill = TRUE)) expect_visible(fhdwithin(data[5:7], data[-(5:7)], ww, variable.wise = TRUE)) expect_equal(setRownames(qM(fhdwithin(data[5:6], data$ODA, ww))), setRownames(resid(lm(cbind(PCGDP, LIFEEX) ~., data[5:7], weights = ww))), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], ww, fill = TRUE), fhdwithin(data[5:6], data$ODA, ww, fill = TRUE), tolerance = tol) expect_equal(fhdwithin(data[5:6], data[7], ww, variable.wise = TRUE), fhdwithin(data[5:6], data$ODA, ww, variable.wise = TRUE), tolerance = tol) }) } test_that("fhdbetween produces errors for wrong input", { expect_visible(fhdbetween(1:2,1:2)) expect_error(fhdbetween("a", 1)) expect_error(fhdbetween(mNAc, f)) expect_error(fhdbetween(1:2,1:3)) expect_error(fhdbetween(m,1:31)) expect_error(fhdbetween(mNA,1:31)) expect_error(fhdbetween(mtcars,1:31)) # expect_warning(fhdbetween(1:2, 1:2, bla = 1)) expect_error(fhdbetween(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]))) expect_visible(fhdbetween(1:2,1:2, na.rm = FALSE)) expect_error(fhdbetween("a", 1, na.rm = FALSE)) expect_error(fhdbetween(mNAc, f, na.rm = FALSE)) expect_error(fhdbetween(1:2,1:3, na.rm = FALSE)) expect_error(fhdbetween(m,1:31, na.rm = FALSE)) expect_error(fhdbetween(mNA,1:31, na.rm = FALSE)) expect_error(fhdbetween(mtcars,1:31, na.rm = FALSE)) # expect_warning(fhdbetween(1:2, 1:2, bla = 1, na.rm = FALSE)) # expect_error(fhdbetween(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]), na.rm = FALSE)) # breaks R }) test_that("fhdwithin produces errors for wrong input", { expect_visible(fhdwithin(1:2,1:2)) expect_error(fhdwithin("a", 1)) expect_error(fhdwithin(mNAc, f)) expect_error(fhdwithin(1:2,1:3)) expect_error(fhdwithin(m,1:31)) expect_error(fhdwithin(mNA,1:31)) expect_error(fhdwithin(mtcars,1:31)) # expect_warning(fhdwithin(1:2, 1:2, bla = 1)) expect_error(fhdwithin(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]))) expect_visible(fhdwithin(1:2,1:2, na.rm = FALSE)) expect_error(fhdwithin("a", 1, na.rm = FALSE)) expect_error(fhdwithin(mNAc, f, na.rm = FALSE)) expect_error(fhdwithin(1:2,1:3, na.rm = FALSE)) expect_error(fhdwithin(m,1:31, na.rm = FALSE)) expect_error(fhdwithin(mNA,1:31, na.rm = FALSE)) expect_error(fhdwithin(mtcars,1:31, na.rm = FALSE)) # expect_warning(fhdwithin(1:2, 1:2, bla = 1, na.rm = FALSE)) # expect_error(fhdwithin(wlddev, list(wlddev$iso3c, wlddev$income[1:10000]), na.rm = FALSE)) # segfault !!! }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { # HDB and HDW test_that("HDW data.frame method (formula input) performs properly", { # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear*wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars))[2:3], tolerance = tol) # multiple factors - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(vs):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(vs):carb, mtcars))[2:3], tolerance = tol) # multiple factors - continuous without including factor 2 expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(vs):wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(vs):wt, mtcars))[2:3], tolerance = tol) # multiple factors - continuous without including factor 3 expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ am + qF(cyl):carb + qF(vs):wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + am + qF(cyl):carb + qF(vs):wt, mtcars))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous full interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl)*carb, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, ~ cyl + vs + am, stub = FALSE)))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):vs:gear + factor(am):carb + wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars))[2:3]) # With weights # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ carb*gear*wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars, weights = wdat))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl):carb + qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous full interaction if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ qF(cyl)*carb, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, ~ cyl + vs + am, wdat, stub = FALSE), weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):vs:gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) }) test_that("HDW data.frame method (formula input) with 2-sided formula performs properly", { # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear*wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars))[2:3], tolerance = tol) # factor - continuous full interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl)*carb, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, ~ cyl + vs + am, stub = FALSE)))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):vs:gear + factor(am):carb + wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars))[2:3]) # With weights # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ carb*gear*wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcars, weights = wdat))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcars, weights = wdat))[2:3], tolerance = tol) # factor - continuous full interaction if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ qF(cyl)*carb, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcars, weights = wdat))[2:3], tolerance = tol) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp , W(mtcars, mpg + hp + disp ~ cyl + vs + am, wdat, stub = FALSE), weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcars, weights = wdat))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):vs:gear + factor(am):carb + wt, wdat, stub = FALSE, lm.method = "qr"), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcars, mpg + hp + disp ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcars, weights = wdat))[2:3]) }) test_that("HDW data.frame method (formula input) with 2-sided formula and missing values performs properly", { # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcNA))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcNA))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear*wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcNA))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcNA))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcNA))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcNA))[2:3], tolerance = tol) # factor - continuous full interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl)*carb, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcNA))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcNA))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcNA))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcNA))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcNA))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions : Somestimes test fails, I don't know why (maybe demeanlist numeric problem) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcNA))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcNA))[2:3], tolerance = 1) # faile R CMD Arch i386 (32 Bit) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcNA))[2:3], tolerance = 1e-2) # 3-way interaction continuous-factor: error if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, ~ factor(cyl):vs:gear + factor(am):carb + wt, stub = FALSE, lm.method = "qr")))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcNA))[2:3]) # 3-way interaction factor-continuous: error if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, stub = FALSE)))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcNA))[2:3]) # With weights # simple lm, continuous vars expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb + gear + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb + gear + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # continuous interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # continuous 3-way interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ carb*gear*wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + carb*gear*wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl), mtcNA, weights = wdat))[2:3], tolerance = tol) # factor - continuous without including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb, mtcNA, weights = wdat))[2:3], tolerance = tol) # factor - continuous including factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl):carb + qF(cyl), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl):carb + qF(cyl), mtcNA, weights = wdat))[2:3], tolerance = tol) # factor - continuous full interaction if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ qF(cyl)*carb, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + qF(cyl)*carb, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am), mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects + factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am), mtcNA, weights = wdat))[2:3], tolerance = tol) # 3 way factor interaction expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):factor(am), mtcNA, weights = wdat))[2:3], tolerance = tol) if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):factor(am), wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp , W(mtcNA, mpg + hp + disp ~ cyl + vs + am, wdat, stub = FALSE), weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variable expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs) + factor(am) + carb + gear + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and full interactions if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs)*gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs)*gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and factor-continuous interactions + factor interactions expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs) + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs) + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # HD fixed effects and continuous variables and polynomaial interactions if(failtests) expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl) + factor(vs):poly(gear,2) + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) # 3-way interaction continuous-factor expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):vs:gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):vs:gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3]) # 3-way interaction factor-continuous expect_equal(coef(lm(mpg ~ hp + disp, HDW(mtcNA, mpg + hp + disp ~ factor(cyl):factor(vs):gear + factor(am):carb + wt, wdat, stub = FALSE, fill = TRUE), weights = wdat))[2:3], coef(lm(mpg ~ hp + disp + factor(cyl):factor(vs):gear + factor(am):carb + wt, mtcNA, weights = wdat))[2:3], tolerance = tol) }) test_that("HDW weighted computations work like lm", { # ... if(failtests) expect_equal( unname(resid(lm(mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) if(failtests) expect_equal( unname(resid(lm(mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, mtcars))), HDW(mtcars, mpg ~ factor(cyl)*carb + factor(vs) + hp + gear, lm.method = "qr")[, 1], tolerance = 1e-4) expect_equal( unname(resid(lm(mpg ~ factor(vs) + hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ factor(vs) + hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) expect_equal( unname(resid(lm(mpg ~ factor(cyl) + factor(vs) + hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ factor(cyl) + factor(vs) + hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) expect_equal( unname(resid(lm(mpg ~ hp + gear, weights = wt, mtcars))), HDW(mtcars, mpg ~ hp + gear, mtcars$wt)[, 1], tolerance = 1e-4) }) } test_that("HDB data.frame method (formula input) throw errors", { expect_error(HDB(mtcars, ~ cyl + vs1)) expect_error(HDB(mtcars, mpg1 + hp ~ cyl + vs)) expect_error(HDB(mtcars, ~ cyl + vs, cols = 13)) expect_error(HDB(mtcars, ~ cyl + vs, cols = "mpg2")) }) test_that("HDW data.frame method (formula input) throw errors", { expect_error(HDW(mtcars, ~ cyl + vs1)) expect_error(HDW(mtcars, mpg1 + hp ~ cyl + vs)) expect_error(HDW(mtcars, ~ cyl + vs, cols = 13)) expect_error(HDW(mtcars, ~ cyl + vs, cols = "mpg2")) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) test_that("Indexed data methods", { wldi = findex_by(wlddev, iso3c, year) expect_true(inherits(HDW(wldi$PCGDP), "indexed_series")) expect_true(inherits(HDW(wldi$PCGDP, fill = FALSE), "indexed_series")) expect_true(inherits(HDB(wldi$PCGDP), "indexed_series")) expect_true(inherits(HDB(wldi$PCGDP, fill = FALSE), "indexed_series")) expect_true(inherits(HDW(wldi$date), "indexed_series")) expect_true(inherits(HDW(wldi$date, fill = FALSE), "indexed_series")) expect_true(inherits(HDB(wldi$date), "indexed_series")) expect_true(inherits(HDB(wldi$date, fill = FALSE), "indexed_series")) fl <- unclass(findex(wldi)) expect_equal(unattrib(HDW(wldi$PCGDP)), unattrib(HDW(wlddev$PCGDP, fl, fill = TRUE))) expect_equal(unattrib(HDW(wldi$PCGDP, fill = FALSE)), unattrib(HDW(wlddev$PCGDP, fl))) expect_equal(unattrib(HDB(wldi$PCGDP)), unattrib(HDB(wlddev$PCGDP, fl, fill = TRUE))) expect_equal(unattrib(HDB(wldi$PCGDP, fill = FALSE)), unattrib(HDB(wlddev$PCGDP, fl))) for(f in c("HDW", "HDB")) { # print(f) FUN <- match.fun(f) cdat = FUN(wldi, stub = FALSE) expect_equal(lapply(cdat, unattrib), lapply(FUN(wlddev, ~ iso3c + qF(year), variable.wise = TRUE, stub = FALSE), unattrib)) expect_equal(lapply(slt(cdat, PCGDP:POP), unattrib), lapply(FUN(slt(wlddev, PCGDP:POP), fl, variable.wise = TRUE, stub = FALSE), unattrib)) expect_true(inherits(cdat, "indexed_frame")) expect_true(inherits(cdat$PCGDP, "indexed_series")) expect_true(fnrow(cdat) == fnrow(wldi)) expect_identical(findex(cdat), findex(wldi)) expect_true(fnrow(findex(cdat)) == fnrow(findex(cdat$PCGDP))) cdat = FUN(wldi, variable.wise = FALSE, stub = FALSE) expect_equal(lapply(cdat, unattrib), lapply(FUN(wlddev, ~ iso3c + qF(year), fill = TRUE, stub = FALSE), unattrib)) expect_equal(lapply(slt(cdat, PCGDP:POP), unattrib), lapply(FUN(slt(wlddev, PCGDP:POP), fl, fill = TRUE, stub = FALSE), unattrib)) expect_true(inherits(cdat, "indexed_frame")) expect_true(inherits(cdat$PCGDP, "indexed_series")) expect_true(fnrow(cdat) == fnrow(wldi)) expect_identical(findex(cdat), findex(wldi)) expect_true(fnrow(findex(cdat)) == fnrow(findex(cdat$PCGDP))) cdat = FUN(wldi, fill = FALSE, stub = FALSE) expect_equal(lapply(cdat, unattrib), lapply(FUN(wlddev, ~ iso3c + qF(year), stub = FALSE), unattrib)) expect_equal(lapply(slt(cdat, PCGDP:POP), unattrib), lapply(FUN(slt(wlddev, PCGDP:POP), fl, stub = FALSE), unattrib)) expect_true(inherits(cdat, "indexed_frame")) expect_true(inherits(cdat$PCGDP, "indexed_series")) expect_false(fnrow(cdat) == fnrow(wldi)) expect_true(fnrow(findex(cdat)) == fnrow(cdat)) expect_true(fnrow(findex(cdat)) == fnrow(findex(cdat$PCGDP))) } }) options(warn = 1) collapse/tests/testthat/test-small-helper.R0000644000176200001440000000164415202504542020547 0ustar liggesuserscontext("small helpers") test_that("missing_cases works", { x <- c(1, NA, 3, NaN) expect_equal(missing_cases(x), c(FALSE, TRUE, FALSE, TRUE)) expect_true(any(missing_cases(data.frame(a = x, b = c(1, 2, NA, 4))[3:4, , drop = FALSE]))) expect_equal(missing_cases(x, count = TRUE), c(0L, 1L, 0L, 1L)) }) test_that("na_omit removes missing rows", { d <- data.frame(a = c(1, NA, 3), b = c(1, 2, NA)) expect_equal(nrow(na_omit(d)), 1L) expect_equal(na_omit(c(1, NA, 3)), c(1, 3)) }) test_that("massign and %=% assign multiple names", { env <- new.env() massign(c("x", "y"), list(3, 4), envir = env) expect_equal(env$x, 3) expect_equal(env$y, 4) local({ c("a", "b") %=% list(1, 2) expect_equal(a, 1) expect_equal(b, 2) }) }) test_that("copyv and setv modify data", { d <- data.frame(a = 1:3, b = 4:6) dc <- copyv(d, 0, 0) expect_equal(dc, d) setv(d, 2L, 0L) expect_equal(d$a[2], 0L) }) collapse/tests/testthat/test-descr.R0000644000176200001440000000340215202504542017254 0ustar liggesuserscontext("descr") test_that("descr returns expected structure", { d <- descr(wlddev, cols = c("iso3c", "POP", "LIFEEX")) expect_s3_class(d, "descr") expect_equal(attr(d, "name"), "wlddev") expect_equal(attr(d, "names"), c("iso3c", "POP", "LIFEEX")) expect_true("Table" %in% names(d$iso3c) || "Stats" %in% names(d$iso3c)) expect_true(is.numeric(d$POP$Stats) || is.list(d$POP$Stats) || is.matrix(d$POP$Stats)) }) test_that("descr grouped by works", { d <- descr(wlddev, by = ~ region, cols = c("POP", "LIFEEX")) expect_s3_class(d, "descr") expect_true(!is.null(attr(d, "groups"))) expect_true(length(d) >= 2L) }) test_that("descr with weights", { wld <- transform(wlddev, POP = replace_NA(POP)) d <- descr(wld, cols = c("POP", "LIFEEX"), w = ~ POP) expect_s3_class(d, "descr") expect_equal(length(attr(d, "weights")), nrow(wld)) }) test_that("descr flags control output", { d1 <- descr(wlddev, cols = "iso3c", table = FALSE) expect_null(d1$iso3c$Table) d2 <- descr(wlddev, cols = "POP", Ndistinct = FALSE, higher = FALSE) expect_s3_class(d2, "descr") d3 <- descr(wlddev, cols = "iso3c", sort.table = "value") expect_s3_class(d3, "descr") d4 <- descr(wlddev, cols = "iso3c", sort.table = "none") expect_s3_class(d4, "descr") }) test_that("descr S3 methods work", { d <- descr(wlddev, cols = c("iso3c", "POP")) expect_output(print(d)) df <- as.data.frame(d) expect_true(is.data.frame(df)) expect_true(nrow(df) >= 1L) }) test_that("descr grouped_df method works", { g <- fgroup_by(fsubset(wlddev, iso3c %in% c("USA", "DEU", "JPN")), region) d <- descr(fselect(g, POP, LIFEEX)) expect_s3_class(d, "descr") }) test_that("descr errors for invalid sort.table", { expect_error(descr(wlddev, cols = "iso3c", sort.table = "invalid")) }) collapse/tests/testthat/test-collap.R0000644000176200001440000013351415107162574017447 0ustar liggesuserscontext("collap") bsum <- base::sum bmean <- base::mean # rm(list = ls()) options(warn = -1) g <- GRP(wlddev, ~ country + decade) oa <- function(x) setAttrib(unattrib(x), attributes(x)[c("names", "row.names", "class")]) # Should use above, but sometimes still gives errors if(Sys.getenv("NCRAN") != "TRUE") oa <- function(x) setNames(unattrib(x), names(x)) Mode <- function(x, na.rm = FALSE) { if(na.rm) x <- x[!is.na(x)] ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } # TODO: What about other return options and weighted multi-function aggregation ? And what about grouped_df method.. test_that("collap performs as intended in simple uses", { expect_equal(collap(mtcars, mtcars$cyl, keep.by = FALSE), fmean(mtcars, mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(mtcars, mtcars[2], keep.by = FALSE), fmean(mtcars, mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(mtcars, ~cyl), fmean(mtcars, mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(mtcars, ~cyl, keep.by = FALSE), fmean(mtcars[-2], mtcars$cyl, use.g.names = FALSE)) expect_equal(collap(iris, ~Species, keep.by = FALSE), fmean(iris[-5], iris$Species, use.g.names = FALSE)) expect_equal(collap(airquality, ~Month, keep.by = FALSE), fmean(airquality[-5], airquality$Month, use.g.names = FALSE)) expect_equal(oa(collap(wlddev, ~ country + decade, keep.col.order = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) expect_equal(oa(collap(wlddev, ~ country + decade, keep.col.order = FALSE, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(4,9:13,2:3,6:8))]) expect_equal(oa(collap(wlddev, g, keep.by = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], keep.by = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) }) test_that("collap preserves data attributes", { expect_identical(lapply(collap(wlddev, ~country), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~country, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~country, fmax)), vtypes(wlddev)) expect_identical(lapply(collap(wlddev, ~iso3c), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~iso3c, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~iso3c, fmax)), vtypes(wlddev)) expect_identical(lapply(collap(wlddev, ~date), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~date, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~date, fmax)), vtypes(wlddev)) expect_identical(lapply(collap(wlddev, ~country + decade), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collap(wlddev, ~country + decade, fmin)), vclasses(wlddev)) expect_identical(vtypes(collap(wlddev, ~country + decade, fmax)), vtypes(wlddev)) }) # if(Sys.getenv("NCRAN") == "TRUE") test_that("collap performs as intended in simple uses with base/stats functions", { expect_equal(oa(collap(mtcars, mtcars$cyl, bsum, keep.by = FALSE)), oa(fsum(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~cyl, mean.default)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~cyl, bmean)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, mtcars[2], bsum, keep.by = FALSE)), oa(fsum(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~cyl, bsum, keep.by = FALSE)), oa(fsum(mtcars[-2], mtcars$cyl, use.g.names = FALSE))) expect_equal(unattrib(collap(iris, ~Species, bsum, keep.by = FALSE)), unattrib(fsum(iris[-5], iris$Species, use.g.names = FALSE))) expect_equal(oa(collap(airquality, ~Month, bsum, na.rm = TRUE, keep.by = FALSE)), oa(fsum(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, bsum, Mode, na.rm = TRUE, keep.col.order = FALSE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, bsum, Mode, na.rm = TRUE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) }) test_that("collap using 2-sided formula or cols performs as intended", { expect_equal(oa(collap(mtcars, mpg ~ cyl, keep.by = FALSE)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, mpg ~ cyl, keep.by = FALSE, cols = 300:1000)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) # cols is ignored, as should be expect_equal(oa(collap(mtcars, ~ cyl, keep.by = FALSE, cols = 1)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collap(mtcars, wt + mpg ~ cyl + vs + am, keep.by = FALSE)), oa(fmean(mtcars[c("mpg","wt")], mtcars[c("cyl","vs","am")], use.g.names = FALSE))) expect_equal(oa(collap(mtcars, ~ cyl + vs + am, keep.by = FALSE, cols = c(6,1))), oa(fmean(mtcars[c("mpg","wt")], mtcars[c("cyl","vs","am")], use.g.names = FALSE))) expect_equal(oa(collap(iris, Sepal.Length + Sepal.Width ~ Species, keep.by = FALSE)), oa(fmean(iris[1:2], iris$Species, use.g.names = FALSE))) expect_equal(oa(collap(airquality, ~ Month, keep.by = FALSE)), oa(fmean(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collap(airquality, ~ Month, keep.by = FALSE, cols = 1:3)), oa(fmean(airquality[1:3], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:13)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA + POP ~ country + decade))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:13)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8))), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade))) expect_false(identical(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8))), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, g, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collap(wlddev, wlddev[c("country","decade")], cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) }) test_that("collap multi-function aggregation performs as intended", { expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) if(Sys.getenv("NCRAN") == "TRUE") expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian), list(fmode, flast), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE))[order(c(1,5,4,9:13,4,9:13,2:3,6:8))]))) expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fmedian), list(fmode, flast)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,4,9:13,2:3,6:8,2:3,6:8))])) }) test_that("collap custom aggregation performs as intended", { expect_equal(unname(oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8), keep.col.order = FALSE))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE))))) expect_equal(unname(oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8)))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE)))[order(c(1,5,9:13,9:10,7:8))])) expect_equal(oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collap(wlddev, ~ country + decade, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collap(wlddev, ~ country + decade, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) expect_equal(oa(collap(wlddev, g, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collap(wlddev, ~ country + decade, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collap(wlddev, g, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collap(wlddev, g, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) expect_equal(names(collap(wlddev, g, custom = list(fmean = c(GDP = "PCGDP"), fsd = c("LIFEEX", GN = "GINI"), flast = "date"), keep.by = FALSE, keep.col.order = FALSE)), .c(GDP, LIFEEX, GN, date)) }) test_that("collap weighted aggregations work as intended", { # Not keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE, keep.by = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE, keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), g$group.vars)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) if(Sys.getenv("NCRAN") == "TRUE") test_that("collap multi-function aggregation with weights performs as intended", { expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), w = ~ POP, keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), list(fmode, flast), w = ~ POP, keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), w = ~ POP, wFUN = list(fsum, fmax)))), unname(oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8))])) expect_equal(unname(oa(collap(wlddev, ~ country + decade, list(fmean, fsd), list(fmode, flast), w = ~ POP, wFUN = list(fsum, fmax)))), unname(oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8,2:3,6:8))])) }) v1 <- c("year","PCGDP","LIFEEX","GINI","ODA") v2 <- c("iso3c","date","region","income", "OECD") test_that("collap weighted customized aggregation works as intended", { # Not keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(unattrib(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, give.names = FALSE)), unattrib(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), g$group.vars)]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collap(wlddev, ~ country + decade, w = ~ POP, keep.by = FALSE, keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) test_that("collap gives informative errors", { expect_error(collap(~cyl, ~cyl)) # nah, need to give error in qDF expect_error(collap(wlddev, 1:3)) # only gives error in fmean.. a bit late.. expect_error(collap(wlddev, "country")) # same thing expect_error(collap(wlddev, ~ country1)) expect_error(collap(wlddev, ~ country, w = ~bla)) expect_error(collap(wlddev, ~ country, w = ~POP, wFUN = bsum)) expect_error(collap(wlddev, ~ country + year + bla)) expect_error(collap(wlddev, bla ~ country)) expect_warning(collap(wlddev, ~ country, bla = 1)) # passes to fmean.data.frame which give the error. # expect_error(collap(wlddev, ~ country, bsum, cols = 9:13, bla = 1)) # This is an issue, bsum(1:3, bla = 1) does not give an error expect_error(collap(wlddev, mtcars$cyl)) # again fmean error.. expect_error(collap(wlddev, ~iso3c, cols = 9:14)) # expect_error(collap(wlddev, ~iso3c, cols = 0:1)) # no error.. expect_error(collap(wlddev, ~iso3c, cols = c("PCGDP","bla"))) expect_error(collap(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX1"))) expect_error(collap(wlddev, ~iso3c, custom = ~ PCGDP)) expect_error(collap(wlddev, ~iso3c, custom = list(fmean, fmode))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:14, fmode = 4:6))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, 4:6))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, fmode2 = 4:6))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, fmode = c("GINI","bla")))) expect_error(collap(wlddev, ~iso3c, custom = list(fmean = 9:13, fmode = c("GINI","PCGDP2")))) }) # Note: one more thing to test is performance with vector-valued functions... # Testing collapv v <- c(1, 5) test_that("collapv performs as intended in simple uses", { expect_equal(oa(collapv(mtcars, 2)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, 2, keep.by = FALSE)), oa(fmean(mtcars[-2], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(iris, "Species", keep.by = FALSE)), oa(fmean(iris[-5], iris$Species, use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", keep.by = FALSE)), oa(fmean(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(wlddev, v, keep.col.order = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) expect_equal(oa(collapv(wlddev, v, keep.col.order = FALSE, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, keep.by = FALSE)), oa(cbind(fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(4,9:13,2:3,6:8))]) expect_equal(names(collapv(wlddev, v, custom = list(fmean = c(GDP = "PCGDP"), fsd = c("LIFEEX", GN = "GINI"), flast = "date"), keep.by = FALSE, keep.col.order = FALSE)), .c(GDP, LIFEEX, GN, date)) }) test_that("collapv preserves data attributes", { expect_identical(lapply(collapv(wlddev, 1), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, 1, fmax)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, 1, fmin)), vtypes(wlddev)) expect_identical(lapply(collapv(wlddev, "iso3c"), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, "iso3c", fmax)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, "iso3c", fmin)), vtypes(wlddev)) expect_identical(lapply(collapv(wlddev, "date"), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, "date", ffirst)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, "date", flast)), vtypes(wlddev)) expect_identical(lapply(collapv(wlddev, v), attributes), lapply(wlddev, attributes)) expect_identical(vclasses(collapv(wlddev, v, flast)), vclasses(wlddev)) expect_identical(vtypes(collapv(wlddev, v, ffirst)), vtypes(wlddev)) }) # if(Sys.getenv("NCRAN") == "TRUE") test_that("collapv performs as intended in simple uses with base/stats functions", { expect_equal(oa(collapv(mtcars, "cyl", mean.default)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, "cyl", bmean)), oa(fmean(mtcars, mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, 2, bsum, keep.by = FALSE)), oa(fsum(mtcars[-2], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(iris, 5, bsum, keep.by = FALSE)), oa(fsum(iris[-5], iris$Species, use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", bsum, na.rm = TRUE, keep.by = FALSE)), oa(fsum(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(wlddev, v, bsum, Mode, na.rm = TRUE, keep.col.order = FALSE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, bsum, Mode, na.rm = TRUE)), oa(cbind(g$groups, BY(get_vars(wlddev, c(4,9:13)), g, bsum, na.rm = TRUE, use.g.names = FALSE), BY(get_vars(wlddev, c(2:3,6:8)), g, Mode, na.rm = TRUE, use.g.names = FALSE)))[order(c(1,5,4,9:13,2:3,6:8))]) }) test_that("collapv using cols performs as intended", { expect_equal(oa(collapv(mtcars, 2, keep.by = FALSE, cols = 1)), oa(fmean(mtcars["mpg"], mtcars$cyl, use.g.names = FALSE))) expect_equal(oa(collapv(mtcars, c("cyl", "vs", "am"), keep.by = FALSE, cols = c(6,1))), oa(fmean(mtcars[c("mpg","wt")], mtcars[c("cyl","vs","am")], use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", keep.by = FALSE)), oa(fmean(airquality[-5], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(airquality, "Month", keep.by = FALSE, cols = 1:3)), oa(fmean(airquality[1:3], airquality$Month, use.g.names = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = 9:12)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade))) expect_equal(oa(collapv(wlddev, v, cols = 9:13)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = c(2:3,6:8))), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade))) expect_false(identical(collapv(wlddev, v, cols = c(2:3,6:8)), collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = 9:12, keep.by = FALSE)), oa(collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ country + decade, keep.by = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = 9:13, keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = 9:13, keep.col.order = FALSE, keep.by = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, iso3c + date + region + income + OECD ~ country + decade, keep.by = FALSE))) expect_equal(oa(collapv(wlddev, v, cols = c(2:3,6:8), keep.by = FALSE)), oa(collap(wlddev, ~ country + decade, cols = c(2:3,6:8), keep.col.order = FALSE, keep.by = FALSE))) }) test_that("collapv multi-function aggregation performs as intended", { expect_equal(oa(collapv(wlddev, v, list(fmean, fmedian), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, list(fmean, fmedian), list(fmode, flast), keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collapv(wlddev, v, list(fmean, fmedian)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,4,9:13,2:3,6:8))])) expect_equal(unname(oa(collapv(wlddev, v, list(fmean, fmedian), list(fmode, flast)))), unname(oa(cbind(g$groups, fmean(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmedian(get_vars(wlddev, c(4,9:13)), g, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,4,9:13,4,9:13,2:3,6:8,2:3,6:8))])) }) test_that("collapv custom aggregation performs as intended", { expect_equal(unname(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8), keep.col.order = FALSE))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE))))) expect_equal(unname(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8)))), unname(oa(cbind(g$groups, fmean(wlddev[9:13], g, use.g.names = FALSE), fsd(wlddev[9:10], g, use.g.names = FALSE), fmode(wlddev[7:8], g, use.g.names = FALSE)))[order(c(1,5,9:13,9:10,7:8))])) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = 9:10, fmode = 7:8))), oa(collapv(wlddev, v, custom = list(fmean = 9:13, fsd = c("PCGDP","LIFEEX"), fmode = 7:8)))) expect_equal(oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))), oa(collapv(wlddev, v, custom = list(fmean = "PCGDP", fsd = 10:11, flast = "date")))) }) test_that("collapv weighted aggregations work as intended", { # Not keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE, keep.by = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE, keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP")), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(unattrib(collapv(wlddev, v, w = "POP", keep.by = FALSE)), unattrib(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE))[setdiff(names(wlddev), g$group.vars)])) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.w = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.by = FALSE, keep.w = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) if(Sys.getenv("NCRAN") == "TRUE") test_that("collapv multi-function aggregation with weights performs as intended", { expect_equal(oa(collapv(wlddev, v, list(fmean, fsd), w = "POP", keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, list(fmean, fsd), list(fmode, flast), w = "POP", keep.col.order = FALSE, give.names = FALSE)), oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))) # with column ordering: expect_equal(unname(oa(collapv(wlddev, v, list(fmean, fsd), w = "POP", wFUN = list(fsum, fmax)))), unname(oa(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8))])) expect_equal(unattrib(collapv(wlddev, v, list(fmean, fsd), list(fmode, flast), w = "POP", wFUN = list(fsum, fmax))), unattrib(cbind(g$groups, fsum(get_vars(wlddev, 13), g, use.g.names = FALSE), fmax(get_vars(wlddev, 13), g, use.g.names = FALSE), fmean(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fsd(get_vars(wlddev, c(4,9:12)), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c(2:3,6:8)), g, wlddev$POP, use.g.names = FALSE), flast(get_vars(wlddev, c(2:3,6:8)), g, use.g.names = FALSE)))[order(c(1,5,13,13,4,9:12,4,9:12,2:3,6:8,2:3,6:8))]) }) v1 <- c("year","PCGDP","LIFEEX","GINI","ODA") v2 <- c("iso3c","date","region","income", "OECD") test_that("collapv weighted customized aggregation works as intended", { # Not keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(unattrib(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, give.names = FALSE)), unattrib(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), keep.col.order = FALSE, keep.by = FALSE, keep.w = FALSE, give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))) # keeping order ... expect_equal(oa(collapv(wlddev, v, w = "POP", custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[names(wlddev)]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.by = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fsum(get_vars(wlddev, "POP"), g, use.g.names = FALSE), fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), g$group.vars)]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(g$groups, fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), "POP")]) expect_equal(oa(collapv(wlddev, v, w = "POP", keep.by = FALSE, keep.w = FALSE, custom = list(fmean = v1, fmode = v2), give.names = FALSE)), oa(add_vars(fmean(get_vars(wlddev, c("year","PCGDP","LIFEEX","GINI","ODA")), g, wlddev$POP, use.g.names = FALSE), fmode(get_vars(wlddev, c("iso3c","date","region","income", "OECD")), g, wlddev$POP, use.g.names = FALSE)))[setdiff(names(wlddev), c(g$group.vars, "POP"))]) }) test_that("collapv gives informative errors", { expect_error(collapv(~cyl, ~cyl)) # nah, need to give error in qDF expect_error(collapv(wlddev, ~ country)) # same thing expect_error(collapv(wlddev, 14)) expect_error(collapv(wlddev, 1, w = 14)) expect_error(collapv(wlddev, 1, w = "bla")) expect_error(collapv(wlddev, 1, w = 13, wFUN = bsum)) expect_error(collapv(wlddev, c(1,0))) expect_error(collapv(wlddev, c(1,14))) expect_warning(collapv(wlddev, 1, bla = 1)) # passes to fmean.data.frame which give the error. expect_error(collapv(wlddev, 2, cols = 9:14)) expect_error(collapv(wlddev, 2, cols = c("PCGDP","bla"))) expect_error(collapv(wlddev, 2, cols = c("PCGDP","LIFEEX1"))) expect_error(collapv(wlddev, 2, custom = ~ PCGDP)) expect_error(collapv(wlddev, 2, custom = list(fmean, fmode))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:14, fmode = 4:6))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:14, 4:6))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:14, fmode2 = 4:6))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:13, fmode = c("GINI","bla")))) expect_error(collapv(wlddev, 2, custom = list(fmean = 9:13, fmode = c("GINI","PCGDP2")))) }) options(warn = 1) collapse/tests/testthat/test-fsubset-ftransform.R0000644000176200001440000001532414777170131022025 0ustar liggesuserscontext("fsubset and ftransform") # rm(list = ls()) set.seed(101) v <- na_insert(mtcars$mpg) m <- na_insert(as.matrix(mtcars)) test_that("fsubset works like base::subset for vectors and matrices", { expect_equal(fsubset(v, 1:3), v[1:3]) expect_equal(fsubset(v, -(1:3)), v[-(1:3)]) expect_equal(fsubset(v, 4:8), v[4:8]) expect_equal(fsubset(v, v > 16), v[v > 16 & !is.na(v)]) expect_equal(fsubset(m, 1:3), m[1:3, ]) expect_equal(fsubset(m, v > 16), m[v > 16, ]) expect_equal(fsubset(m, -(4:8)), m[-(4:8), ]) expect_equal(fsubset(m, -(4:8), 1:5), m[-(4:8), 1:5]) expect_equal(fsubset(m, v > 16 & !is.na(v), mpg:vs), subset(m, v > 16 & !is.na(v), mpg:vs)) expect_equal(fsubset(m, v > 16 & !is.na(v), mpg, cyl:vs), subset(m, v > 16 & !is.na(v), c(mpg, cyl:vs))) expect_equal(fsubset(m, v > 16 & !is.na(v), -mpg), subset(m, v > 16 & !is.na(v), -mpg)) expect_equal(fsubset(m, v > 16 & !is.na(v), -(mpg:vs)), subset(m, v > 16 & !is.na(v), -(mpg:vs))) }) test_that("fsubset works like base::subset for data frames", { expect_equal(unattrib(fsubset(airquality, Ozone > 42)), unattrib(subset(airquality, Ozone > 42))) expect_equal(unattrib(fsubset(airquality, Temp > 80, Ozone, Temp)), unattrib(subset(airquality, Temp > 80, select = c(Ozone, Temp)))) expect_equal(unattrib(fsubset(airquality, Day == 1, -Temp)), unattrib(subset(airquality, Day == 1, select = -Temp))) expect_equal(unattrib(fsubset(airquality, Day == 1, -(Day:Temp))), unattrib(subset(airquality, Day == 1, -(Day:Temp)))) expect_equal(unattrib(fsubset(airquality, Day == 1, Ozone:Wind)), unattrib(subset(airquality, Day == 1, Ozone:Wind))) expect_equal(unattrib(fsubset(airquality, Day == 1 & !is.na(Ozone), Ozone:Wind, Month)), unattrib(subset(airquality, Day == 1 & !is.na(Ozone), c(Ozone:Wind, Month)))) }) test_that("fsubset column renaming", { expect_equal(names(fsubset(airquality, Temp > 90, OZ = Ozone, Temp)), .c(OZ, Temp)) expect_equal(names(fsubset(mtcars, cyl == 4, bla = cyl)), "bla") }) test_that("ss works like an improved version of [", { # replaced setRownames wit unattrib because of unexplained test failures on some systems expect_equal(ss(airquality, 1:100, 1:3), airquality[1:100, 1:3]) expect_equal(unattrib(ss(airquality, -(1:100), 1:3)), unattrib(airquality[-(1:100), 1:3])) expect_equal(ss(airquality, 1:100, -(1:3)), airquality[1:100, -(1:3)]) expect_equal(unattrib(ss(airquality, -(1:100), -(1:3))), unattrib(airquality[-(1:100), -(1:3)])) nam <- names(airquality)[2:5] set.seed(101) v <- sample.int(fnrow(airquality), 100) expect_equal(unattrib(ss(airquality, v, nam)), unattrib(airquality[v, nam, drop = FALSE])) expect_equal(unattrib(ss(airquality, -v, nam)), unattrib(airquality[-v, nam, drop = FALSE])) set.seed(101) vl <- sample(c(TRUE, FALSE), fnrow(airquality), replace = TRUE) cl <- sample(c(TRUE, FALSE), fncol(airquality), replace = TRUE) expect_equal(unattrib(ss(airquality, vl, nam)), unattrib(airquality[vl, nam, drop = FALSE])) expect_equal(unattrib(ss(airquality, vl, cl)), unattrib(airquality[vl, cl, drop = FALSE])) set.seed(101) vl <- na_insert(vl) cl[4L] <- NA expect_equal(unattrib(ss(airquality, vl, nam)), unattrib(airquality[vl & !is.na(vl), nam, drop = FALSE])) expect_equal(unattrib(ss(airquality, vl, cl)), unattrib(airquality[vl & !is.na(vl), cl & !is.na(cl), drop = FALSE])) expect_equal(ss(mtcars, -(1:3)), mtcars[-(1:3), ]) expect_equal(ss(mtcars, -c(5, 14)), mtcars[-c(5, 14), ]) }) test_that("ftransform works like base::transform", { expect_equal(ftransform(airquality, Ozone = -Ozone), transform(airquality, Ozone = -Ozone)) expect_equal(ftransform(airquality, new = Ozone / Wind * 100), transform(airquality, new = Ozone / Wind * 100)) expect_equal(ftransform(airquality, new = -Ozone, Temp = (Temp-32)/1.8), transform(airquality, new = -Ozone, Temp = (Temp-32)/1.8)) expect_equal(ftransform(airquality, new = -Ozone, new2 = 1, Temp = NULL), transform(airquality, new = -Ozone, new2 = 1, Temp = NULL)) expect_equal(ftransform(airquality, Ozone = NULL, Temp = NULL), transform(airquality, Ozone = NULL, Temp = NULL)) }) test_that("fcompute works well", { expect_equal(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3), ftransform(airquality[1:3], new = -Ozone, new2 = 1)) expect_equal(names(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3)), .c(Ozone, Solar.R, Wind, new, new2)) expect_equal(names(fcompute(airquality, new = -Ozone, new2 = 1)), .c(new, new2)) expect_equal(names(fcompute(airquality, Ozone = -Ozone, new = 1, keep = 1:3)), .c(Ozone, Solar.R, Wind, new)) }) test_that("fcomputev works well", { expect_equal(fcomputev(iris, is.numeric, log), dapply(nv(iris), log)) expect_equal(fcomputev(iris, is.numeric, fcumsum, apply = FALSE), fcumsum(nv(iris))) expect_equal(fcomputev(iris, is.numeric, `/`, Sepal.Length), nv(iris) %c/% iris$Sepal.Length) expect_equal(fcomputev(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE), fmean(nv(iris), iris$Species, TRA = "replace")) expect_equal(fcomputev(iris, is.numeric, log, keep = "Species"), colorder(ftransformv(iris, is.numeric, log), Species)) expect_equal(fcomputev(iris, is.numeric, log, keep = names(iris)), ftransformv(iris, is.numeric, log)) expect_equal(fcomputev(iris, is.numeric, fcumsum, apply = FALSE, keep = "Species"), colorder(ftransformv(iris, is.numeric, fcumsum, apply = FALSE), Species)) expect_equal(fcomputev(iris, is.numeric, fcumsum, apply = FALSE, keep = names(iris)), ftransformv(iris, is.numeric, fcumsum, apply = FALSE)) expect_equal(fcomputev(iris, is.numeric, `/`, Sepal.Length, keep = "Species"), colorder(ftransformv(iris, is.numeric, `/`, Sepal.Length), Species)) expect_equal(fcomputev(iris, is.numeric, `/`, Sepal.Length, keep = names(iris)), ftransformv(iris, is.numeric, `/`, Sepal.Length)) expect_equal(fcomputev(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE, keep = "Species"), colorder(ftransformv(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE), Species)) expect_equal(fcomputev(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE, keep = names(iris)), ftransformv(iris, is.numeric, fmean, Species, TRA = "replace", apply = FALSE)) }) # Still do wrong input... test_that("fsubset error for wrong input", { # expect_error(fsubset(mtcars, mpg)) expect_warning(fsubset(mtcars, mpg:cyl)) expect_error(fsubset(mtcars, "mpg")) expect_error(fsubset(mtcars, TRUE)) expect_error(fsubset(mtcars, mpg > 15, cyl < 4)) expect_error(fsubset(mtcars, mpg > 15, TRUE)) expect_error(fsubset(mtcars, mpg > 15, 35)) expect_error(fsubset(mtcars, mpg > 15, ~mpg)) }) collapse/tests/testthat/test-GRP.R0000644000176200001440000010075215202504365016615 0ustar liggesuserscontext("radixorder, GRP, qF, qG") # print(str(wlddev)) # rm(list = ls()) NCRAN <- Sys.getenv("NCRAN") == "TRUE" set.seed(101) mtcNA <- na_insert(na_insert(na_insert(mtcars), 0.05, value = Inf), 0.05, value = -Inf) wlddev2 <- slt(wlddev, -date) num_vars(wlddev2) <- round(num_vars(wlddev2), 8) num_vars(wlddev2) <- na_insert(na_insert(num_vars(wlddev2), 0.01, value = Inf), 0.01, value = -Inf) wldNA <- na_insert(wlddev2) GGDCNA <- na_insert(GGDC10S) unlab <- function(x) `attr<-`(x, "label", NULL) test_that("radixorder works like order(.., method = 'radix')", { wldNA$ones = 1 wldNA$sequ = 1:fnrow(wldNA) # Ordering single variable expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = NA))), lapply(wldNA, order, method = "radix", na.last = NA)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = NA))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = NA)) # get starts expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, starts = TRUE))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, starts = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE, starts = TRUE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE, starts = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) # get group.sizes expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, group.sizes = TRUE))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) # get starts and group.sizes expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix")) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", na.last = FALSE)) expect_identical(lapply(wldNA, function(x) unattrib(radixorder(x, decreasing = TRUE, na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(wldNA, order, method = "radix", decreasing = TRUE, na.last = FALSE)) randcols <- function(n = 3) replicate(n, sample.int(11, sample.int(5, 1)), simplify = FALSE) order2 <- function(x, ...) do.call(order, c(gv(wldNA, x), list(...))) # Ordering by multiple variables rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x)))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = NA))), lapply(rc, order2, method = "radix", na.last = NA)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = NA))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = NA)) # get starts expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), starts = TRUE))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, starts = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE, starts = TRUE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE, starts = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) # get group.sizes expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), group.sizes = TRUE))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) # get starts and group.sizes expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix")) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", na.last = FALSE)) rc <- randcols() expect_identical(lapply(rc, function(x) unattrib(radixorderv(gv(wldNA, x), decreasing = TRUE, na.last = FALSE, starts = TRUE, group.sizes = TRUE))), lapply(rc, order2, method = "radix", decreasing = TRUE, na.last = FALSE)) }) test_that("GRP works as intended", { withr::local_locale(c(LC_COLLATE = "C")) expect_visible(GRP(unname(as.list(mtcars)))) expect_visible(GRP(unname(as.list(mtcars)), 8:9)) expect_equal(GRPnames(GRP(mtcars$cyl)), c("4","6","8")) expect_equal(GRPnames(GRP(mtcars$cyl), FALSE), c(4, 6, 8)) expect_identical(GRPnames(GRP(mtcars$cyl, return.groups = FALSE)), NULL) expect_output(print(GRP(mtcars, ~ cyl + am))) expect_output(print(GRP(mtcars, ~ cyl + am, return.groups = FALSE))) # expect_invisible(plot(GRP(mtcars, ~ cyl + am))) expect_identical(GRP(GRP(mtcars$mpg)), GRP(mtcars$mpg)) expect_identical(GRP.default(GRP(mtcars$mpg)), GRP(mtcars$mpg)) expect_equal(GRP(mtcars$mpg)[[2]], unattrib(as.factor(mtcars$mpg))) expect_equal(GRP(mtcars$cyl)[[2]], unattrib(as.factor(mtcars$cyl))) expect_equal(GRP(wlddev2$country)[[2]], unattrib(as.factor(wlddev2$country))) expect_equal(GRP(wlddev2$PCGDP)[[2]], unattrib(factor(wlddev2$PCGDP, exclude = NULL))) expect_equal(GRP(mtcars$mpg)[[1]], attributes(qG(mtcars$mpg))[[1]]) expect_equal(GRP(mtcars$cyl)[[1]], attributes(qG(mtcars$cyl))[[1]]) expect_equal(GRP(wlddev2$country)[[1]], attributes(qG(wlddev2$country))[[1]]) expect_equal(GRP(wlddev2$PCGDP)[[1]], attributes(qG(wlddev2$PCGDP, na.exclude = FALSE))[[1]]) expect_equal(GRP(mtcars$mpg)[[4]][[1]], attributes(qG(mtcars$mpg, return.groups = TRUE))[["groups"]]) expect_equal(GRP(mtcars$cyl)[[4]][[1]], attributes(qG(mtcars$cyl, return.groups = TRUE))[["groups"]]) expect_equal(GRP(wlddev2$country)[[4]][[1]], attributes(qG(wlddev2$country, return.groups = TRUE))[["groups"]]) expect_equal(GRP(wlddev2$PCGDP)[[4]][[1]], attributes(qG(wlddev2$PCGDP, na.exclude = FALSE, return.groups = TRUE))[["groups"]]) expect_visible(GRP(1:10)) expect_visible(GRP(1:10, decreasing = TRUE)) expect_visible(GRP(mtcNA$mpg)) expect_visible(GRP(mtcNA$mpg, return.groups = FALSE)) expect_visible(GRP(mtcNA$mpg, return.groups = FALSE, return.order = TRUE)) expect_visible(GRP(mtcNA$mpg, na.last = FALSE)) expect_visible(GRP(mtcNA$mpg, na.last = FALSE, decreasing = TRUE)) expect_visible(GRP(mtcNA$mpg, na.last = FALSE, decreasing = TRUE, return.order = TRUE)) expect_visible(GRP(list(a = 1:3, b = 1:3))) expect_visible(GRP(mtcars)) expect_visible(GRP(mtcNA)) expect_visible(GRP(mtcNA, return.groups = FALSE)) expect_visible(GRP(mtcNA, return.groups = FALSE, return.order = TRUE)) expect_visible(GRP(mtcNA, na.last = FALSE)) expect_visible(GRP(mtcNA, na.last = FALSE, decreasing = TRUE)) expect_visible(GRP(mtcNA, na.last = FALSE, decreasing = TRUE, return.order = TRUE)) expect_visible(GRP(wlddev2)) expect_visible(GRP(wlddev2, return.groups = FALSE)) expect_true(all_obj_equal(GRP(mtcars, ~ cyl + vs + am)[1:7], GRP(mtcars, c("cyl","vs","am"))[1:7], GRP(mtcars, c(2,8:9))[1:7])) }) test_that("GRP gives errors for wrong input", { expect_error(GRP(mtcars$mpg, na.last = NA)) expect_error(GRP(~ bla)) expect_error(GRP(1:10, 1)) expect_error(GRP(1:10, ~ cyl)) expect_error(GRP(1:10, "cyl")) # expect_error(GRP(mtcars, TRUE)) expect_error(GRP(mtcars, ~ cyl + bla)) expect_error(GRP(mtcars, c("bal","cyl"))) expect_error(GRP(mtcars, 11:12)) expect_error(GRP(list(a = 1:3, b = 1:4))) expect_visible(GRP(mtcars, ~ cyl + vs, order = -1L)) }) test_that("fgroup_by works as intended", { ca <- function(x) { nam <- names(x[[4L]]) attributes(x[[4L]]) <- NULL names(x[[4L]]) <- nam x } expect_output(print(fgroup_by(mtcars, cyl, vs, am))) expect_equal(GRP(fgroup_by(mtcars, cyl, vs, am)), ca(GRP(mtcars, ~ cyl + vs + am, call = FALSE))) expect_equal(GRP(fgroup_by(mtcars, c("cyl", "vs", "am"))), ca(GRP(mtcars, ~ cyl + vs + am, call = FALSE))) expect_equal(GRP(fgroup_by(mtcars, c(2, 8:9))), ca(GRP(mtcars, ~ cyl + vs + am, call = FALSE))) expect_identical(fungroup(fgroup_by(mtcars, cyl, vs, am)), mtcars) expect_equal(fgroup_by(fgroup_by(mtcars, cyl, vs, am), cyl), fgroup_by(mtcars, cyl)) # The issue is that GRP.grouped_df does not reclass the groups... take up another time. # This is to fool very silly checks on CRAN scanning the code of the tests # group_by <- eval(parse(text = paste0("dplyr", ":", ":", "group_by"))) # expect_equal(GRP(group_by(mtcars, cyl, vs, am), call = FALSE), GRP(as.list(mtcars), ~ cyl + vs + am, call = FALSE)) # expect_equal(GRP(group_by(mtcNA, cyl, vs, am)), GRP(mtcNA, ~ cyl + vs + am, call = NULL)) # expect_equal(GRP(group_by(GGDC10S, Variable, Country)), GRP(GGDC10S, ~ Variable + Country, call = FALSE)) # expect_equal(GRP(group_by(GGDCNA, Variable, Country)), GRP(GGDCNA, ~ Variable + Country, call = NULL)) # expect_equal(GRP(group_by(wlddev, region, year)), GRP(wlddev, ~ region + year, call = NULL)) # expect_equal(GRP(group_by(wldNA, region, year)), GRP(wldNA, ~ region + year, call = NULL)) }) gdat <- gby(GGDCNA, Variable, Country) test_that("fgroup_vars works as intended", { expect_identical(fgroup_vars(gdat), slt(GGDCNA, Variable, Country)) expect_identical(fgroup_vars(gdat, "unique"), funique(slt(GGDCNA, Variable, Country), sort = TRUE)) expect_identical(fgroup_vars(gdat, "names"), .c(Variable, Country)) expect_identical(fgroup_vars(gdat, "indices"), c(4L, 1L)) expect_identical(fgroup_vars(gdat, "named_indices"), setNames(c(4L, 1L), .c(Variable, Country))) expect_identical(fgroup_vars(gdat, "logical"), `[<-`(logical(fncol(GGDCNA)), c(4L, 1L), TRUE)) expect_identical(fgroup_vars(gdat, "named_logical"), setNames(`[<-`(logical(fncol(GGDCNA)), c(4L, 1L), TRUE), names(GGDC10S))) expect_error(fgroup_vars(gdat, "bla")) }) test_that("GRP <> factor conversions run seamlessly", { expect_identical(unclass(iris$Species), unclass(as_factor_GRP(GRP(iris$Species)))) # as_factor_GRP always adds class "na.included" expect_identical(unclass(wlddev$iso3c[1:200]), unclass(as_factor_GRP(GRP(wlddev$iso3c[1:200])))) # as_factor_GRP always adds class "na.included" expect_identical(unclass(fdroplevels(wlddev$iso3c[1:200])), unclass(as_factor_GRP(GRP(wlddev$iso3c[1:200], drop = TRUE)))) # as_factor_GRP always adds class "na.included" expect_identical(unclass(`vlabels<-`(wlddev2$iso3c, "label", NULL)), unclass(as_factor_GRP(GRP(wlddev2$iso3c)))) set.seed(101) int <- sample.int(10,100,TRUE) expect_identical(unclass(qF(int)), unclass(as_factor_GRP(GRP(int)))) expect_identical(unclass(qF(int)), unclass(as_factor_GRP(GRP(qF(int))))) intNA <- int set.seed(101) intNA[sample(100,20)] <- NA expect_identical(unclass(qF(intNA, na.exclude = FALSE)), unclass(as_factor_GRP(GRP(intNA)))) expect_identical(unclass(qF(intNA, na.exclude = FALSE)), unclass(as_factor_GRP(GRP(qF(intNA))))) dblNA <- as.double(intNA) if(NCRAN) expect_false(unattrib(identical(unclass(qF(dblNA)), unclass(as_factor_GRP(GRP(dblNA)))))) # qF with na.exclude = TRUE retains double NA's... if(NCRAN) expect_false(unattrib(identical(unclass(qF(dblNA)), unclass(as_factor_GRP(GRP(qF(dblNA))))))) expect_identical(qF(dblNA, na.exclude = FALSE), as_factor_GRP(GRP(dblNA))) expect_identical(qF(dblNA, na.exclude = FALSE), as_factor_GRP(GRP(qF(dblNA)))) expect_identical(qF(dblNA, na.exclude = FALSE), as_factor_GRP(GRP(qF(dblNA, na.exclude = FALSE)))) }) # could also do qG to GRP, but qG is same as factor.. and is a programmers function anyway.. test_that("qF and qG work as intended", { withr::local_locale(c(LC_COLLATE = "C")) af <- lapply(wlddev2, function(x) as.factor(x)) expect_equal(af[!fact_vars(wlddev2, "logical")], lapply(gv(wlddev2, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "radix")))) expect_equal(af[!fact_vars(wlddev2, "logical")], lapply(gv(wlddev2, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "hash")))) af <- lapply(af, unattrib) expect_identical(af, lapply(wlddev2, function(x) unattrib(qF(x, method = "radix")))) expect_identical(af, lapply(wlddev2, function(x) unattrib(qF(x, method = "hash")))) expect_identical(af, lapply(wlddev2, function(x) unattrib(qG(x, method = "radix")))) expect_identical(af, lapply(wlddev2, function(x) unattrib(qG(x, method = "hash")))) afNA <- lapply(wldNA, function(x) as.factor(x)) expect_equal(afNA[!fact_vars(wlddev2, "logical")], lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "radix")))) expect_equal(afNA[!fact_vars(wlddev2, "logical")], lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unlab(qF(x, method = "hash")))) afNA <- lapply(afNA, unattrib) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qF(x, method = "radix")))) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qF(x, method = "hash")))) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qG(x, method = "radix")))) expect_identical(afNA, lapply(wldNA, function(x) unattrib(qG(x, method = "hash")))) afnoNA <- lapply(wldNA, function(x) factor(x, exclude = NULL)) expect_equal(lapply(afnoNA[!fact_vars(wlddev2, "logical")], unclass), lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unclass(unlab(qF(x, method = "radix", na.exclude = FALSE))))) expect_equal(lapply(afnoNA[!fact_vars(wlddev2, "logical")], unclass), lapply(gv(wldNA, !fact_vars(wlddev2, "logical")), function(x) unclass(unlab(qF(x, method = "hash", na.exclude = FALSE))))) afnoNA <- lapply(afnoNA, unattrib) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qF(x, method = "radix", na.exclude = FALSE)))) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qF(x, method = "hash", na.exclude = FALSE)))) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qG(x, method = "radix", na.exclude = FALSE)))) expect_identical(afnoNA, lapply(wldNA, function(x) unattrib(qG(x, method = "hash", na.exclude = FALSE)))) countryf <- as.factor(wlddev2$country) expect_identical(countryf, unlab(qF(wlddev2$country))) expect_identical(countryf, unlab(qF(wlddev2$country, method = "radix"))) expect_identical(countryf, unlab(qF(wlddev2$country, method = "hash"))) # identical(as.factor(wlddev2$iso3c), wlddev2$iso3c) expect_identical(levels(wlddev2$iso3c), levels(unlab(qF(wlddev2$iso3c)))) expect_identical(unattrib(wlddev2$iso3c), unattrib(unlab(qF(wlddev2$iso3c)))) expect_identical(class(wlddev2$iso3c), class(unlab(qF(wlddev2$iso3c)))) expect_equal(lapply(wlddev2, function(x) qF(x, method = "radix")), lapply(wlddev2, function(x) qF(x, method = "hash"))) expect_equal(lapply(wldNA, function(x) qF(x, method = "radix")), lapply(wldNA, function(x) qF(x, method = "hash"))) expect_equal(lapply(wlddev2, function(x) qG(x, method = "radix")), lapply(wlddev2, function(x) qG(x, method = "hash"))) expect_equal(lapply(wldNA, function(x) qG(x, method = "radix")), lapply(wldNA, function(x) qG(x, method = "hash"))) expect_equal(lapply(wlddev2, function(x) qF(x, method = "radix", na.exclude = FALSE)), lapply(wlddev2, function(x) qF(x, method = "hash", na.exclude = FALSE))) expect_equal(lapply(wldNA, function(x) qF(x, method = "radix", na.exclude = FALSE)), lapply(wldNA, function(x) qF(x, method = "hash", na.exclude = FALSE))) expect_equal(lapply(wlddev2, function(x) qG(x, method = "radix", na.exclude = FALSE)), lapply(wlddev2, function(x) qG(x, method = "hash", na.exclude = FALSE))) expect_equal(lapply(wldNA, function(x) qG(x, method = "radix", na.exclude = FALSE)), lapply(wldNA, function(x) qG(x, method = "hash", na.exclude = FALSE))) # Testing reordering of factor levels expect_identical(qF(wlddev$iso3c), wlddev$iso3c) riso3 <- rev(wlddev$iso3c) expect_identical(qF(riso3), riso3) expect_identical(qF(riso3, sort = FALSE), factor(riso3, levels = funique(riso3))) iso3na <- na_insert(wlddev$iso3c) expect_identical(qF(iso3na), iso3na) expect_identical(unclass(qF(iso3na, na.exclude = FALSE, keep.attr = FALSE)), unclass(addNA(iso3na))) riso3na <- na_insert(riso3) expect_identical(qF(riso3na), riso3na) expect_identical(unclass(qF(riso3na, na.exclude = FALSE, keep.attr = FALSE)), unclass(addNA(riso3na))) expect_identical(qF(riso3na, sort = FALSE), factor(riso3na, levels = funique(riso3))) expect_identical(unclass(qF(riso3na, sort = FALSE, na.exclude = FALSE)), unclass(factor(riso3na, levels = funique(riso3na), exclude = NULL))) expect_identical(unclass(qF(riso3na, sort = FALSE, na.exclude = FALSE)), unclass(factor(riso3na, levels = unique(riso3na), exclude = NULL))) }) # Could still refine this code, but is not at all critical !! date <- qG(wlddev$date, return.groups = TRUE) dateg <- GRP(date, call = FALSE) dateg$ordered <- NULL date <- wlddev$date vlabels(date) <- NULL dateg2 <- GRP(date, call = FALSE) dateg2$ordered <- NULL test_that("GRP <> qG and factor <> qG conversions work", { # expect_equal(dateg, dateg2) expect_equal(qF(unattrib(wlddev$country)), as_factor_qG(qG(unattrib(wlddev$country), return.groups = TRUE))) expect_equal(qF(unattrib(wlddev$country)), qF(qG(unattrib(wlddev$country), return.groups = TRUE))) expect_equal(qG(unattrib(wlddev$country)), qG(qF(unattrib(wlddev$country)))) expect_equal(qG(unattrib(wlddev$country), return.groups = TRUE), qG(qF(unattrib(wlddev$country)), return.groups = TRUE)) }) base_group <- function(x, sort = FALSE, group.sizes = FALSE) { if(sort) o <- if(is.list(x)) do.call(order, c(x, list(method = "radix"))) else order(x, method = "radix") if(is.list(x)) x <- do.call(paste, c(x, list(sep = "."))) ux <- unique(if(sort) x[o] else x) r <- match(x, ux) attr(r, "N.groups") <- length(ux) if(group.sizes) attr(r, "group.sizes") <- tabulate(r, length(ux)) if(!sort) oldClass(r) <- c("qG", "na.included") r } test_that("group() works as intended", { wlduo <- wlddev[order(rnorm(nrow(wlddev))), ] wlduoNA <- na_insert(wlduo) dlist <- c(mtcNA, wlddev, wlduo, GGDCNA, airquality) # Single grouping variable expect_identical(lapply(dlist, group, group.sizes = TRUE), lapply(dlist, base_group, group.sizes = TRUE)) # Multiple grouping variables g <- replicate(70, sample.int(11, sample.int(6, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) group(.subset(mtcars, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(mtcars, i), group.sizes = TRUE))) expect_identical(lapply(g, function(i) group(.subset(mtcNA, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(mtcNA, i), group.sizes = TRUE))) g <- replicate(50, sample.int(13, sample.int(4, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) group(.subset(wlduo, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(wlduo, i), group.sizes = TRUE))) expect_identical(lapply(g, function(i) group(.subset(wlduoNA, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(wlduoNA, i), group.sizes = TRUE))) g <- replicate(50, sample.int(13, 3, replace = TRUE), simplify = FALSE) expect_identical(lapply(g, function(i) group(.subset(wlduo, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(wlduo, i), group.sizes = TRUE))) expect_identical(lapply(g, function(i) group(.subset(wlduoNA, i), group.sizes = TRUE)), lapply(g, function(i) base_group(.subset(wlduoNA, i), group.sizes = TRUE))) # Positive and negative values give the same grouping nwld <- nv(wlduo) expect_identical(lapply(nwld, group), lapply(nwld %c*% -1, group)) expect_visible(group(nwld %c*% -1)) expect_visible(group(nwld[c(4,2,3)] %c*% -1)) expect_equal(group(0), base_group(0)) expect_equal(group(1), base_group(1)) expect_equal(group(0L), base_group(0L)) expect_equal(group(1L), base_group(1L)) expect_equal(group(Inf), base_group(Inf)) expect_equal(group(-Inf), base_group(-Inf)) expect_equal(group(c(NaN, NA, 0, 1, Inf, -Inf)), base_group(c(NaN, NA, 0, 1, Inf, -Inf))) expect_equal(group(NA_integer_), base_group(NA_integer_)) expect_equal(group(NA_real_), base_group(NA_real_)) expect_equal(group(NaN), base_group(NaN)) expect_equal(group(NA), base_group(NA)) expect_equal(group(NA_character_), base_group(NA_character_)) }) GRP2 <- function(x) { g <- GRP.default(x, sort = TRUE, return.groups = FALSE, call = FALSE) r <- g[[2]] attr(r, "N.groups") <- g[[1]] attr(r, "group.sizes") <- g[[3]] r } qG2 <- function(x, method = "auto", sort = TRUE) unclass(qG(x, na.exclude = FALSE, sort = sort, method = method)) test_that("GRP2() and qG2 work as intended", { wlduo <- wlddev[order(rnorm(nrow(wldNA))), ] dlist <- c(mtcNA, wlddev, wlduo, GGDCNA, airquality) # Single grouping variable expect_identical(lapply(dlist, GRP2), lapply(dlist, base_group, sort = TRUE, group.sizes = TRUE)) bgres <- lapply(dlist, base_group, sort = TRUE) expect_identical(lapply(dlist, qG2), bgres) expect_identical(lapply(dlist, qG2, method = "hash"), bgres) expect_identical(lapply(dlist, qG2, method = "radix"), bgres) expect_true(all_identical(qG2(wlduo$country, method = "radix", sort = FALSE), qG2(wlduo$country, method = "hash", sort = FALSE), unclass(base_group(wlduo$country, sort = FALSE)))) # Multiple grouping variables g <- replicate(50, sample.int(11, sample.int(6, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(mtcars, i))), lapply(g, function(i) base_group(.subset(mtcars, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, sample.int(4, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, 3, replace = TRUE), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) }) test_that("GRP2() works as intended", { wlduo <- wlddev[order(rnorm(nrow(wldNA))), ] dlist <- c(mtcNA, wlddev, wlduo, GGDCNA, airquality) # Single grouping variable expect_identical(lapply(dlist, GRP2), lapply(dlist, base_group, sort = TRUE, group.sizes = TRUE)) # Multiple grouping variables g <- replicate(50, sample.int(11, sample.int(6, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(mtcars, i))), lapply(g, function(i) base_group(.subset(mtcars, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, sample.int(4, 1)), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) g <- replicate(30, sample.int(13, 3, replace = TRUE), simplify = FALSE) expect_identical(lapply(g, function(i) GRP2(.subset(wlduo, i))), lapply(g, function(i) base_group(.subset(wlduo, i), sort = TRUE, group.sizes = TRUE))) }) # This is a bit odd test, but there have been some issues here in the past... test_that("Single groups works correctly", { g <- replicate(30, qG(rep(1, 10)), simplify = FALSE) expect_true(all_identical(g)) expect_true(all(sapply(g, attr, "N.groups") == 1L)) g <- replicate(30, qG(rep(1, 10), na.exclude = FALSE), simplify = FALSE) expect_true(all_identical(g)) expect_true(all(sapply(g, attr, "N.groups") == 1L)) g <- replicate(30, qG(replace(rep(1, 10), 3:4, NA_real_)), simplify = FALSE) expect_true(all_identical(g)) expect_true(all(sapply(g, attr, "N.groups") == 1L)) g <- replicate(30, qG(replace(rep(1, 10), 3:4, NA_real_), na.exclude = FALSE), simplify = FALSE) expect_true(all_identical(g)) expect_true(all(sapply(g, attr, "N.groups") == 2L)) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { # This is to fool very silly checks on CRAN scanning the code of the tests pwlddev <- eval(parse(text = paste0("plm", ":", ":", "pdata.frame(wlddev, index = c('iso3c', 'year'))"))) iso3c <- eval(parse(text = paste0("plm", ":", ":", "index(pwlddev, 1L)"))) year <- eval(parse(text = paste0("plm", ":", ":", "index(pwlddev, 2L)"))) test_that("GRP pseries and pdata.frame methods work as intended", { expect_equal(GRP(pwlddev, call = FALSE), GRP(iso3c, call = FALSE)) expect_equal(GRP(pwlddev$PCGDP, call = FALSE), GRP(pwlddev, call = FALSE)) expect_equal(GRP(pwlddev, effect = "year", call = FALSE), GRP(year, call = FALSE)) expect_equal(GRP(pwlddev$PCGDP, effect = "year", call = FALSE), GRP(pwlddev, effect = "year", call = FALSE)) }) } fl <- slt(wlddev, region, income) set.seed(101) flNA <- na_insert(fl) test_that("finteraction works as intended", { expect_equal(`oldClass<-`(finteraction(fl), "factor"), base::interaction(fl, drop = TRUE, lex.order = TRUE)) expect_equal(`oldClass<-`(finteraction(ss(fl, 1:300)), "factor"), base::interaction(ss(fl, 1:300), drop = TRUE, lex.order = TRUE)) # missing levels expect_equal(unattrib(finteraction(fl, factor = FALSE, sort = TRUE)), unattrib(base::interaction(fl, drop = TRUE, lex.order = TRUE))) expect_equal(unattrib(finteraction(fl, factor = FALSE, sort = FALSE)), unattrib(group(fl))) # Missing value behavior is always different !! # expect_equal(`oldClass<-`(finteraction(flNA), "factor"), factor(base::interaction(flNA, drop = TRUE, lex.order = TRUE), exclude = NULL)) # expect_equal(`oldClass<-`(finteraction(ss(flNA, 1:300)), "factor"), base::interaction(ss(flNA, 1:300), drop = TRUE, lex.order = TRUE)) }) wld150 <- ss(wlddev, 1:150) vlabels(wld150) <- NULL set.seed(101) wldNA150 <- na_insert(ss(wlddev, 1:150)) vlabels(wldNA150) <- NULL test_that("fdroplevels works as intended", { expect_identical(fdroplevels(wld150), droplevels(wld150)) expect_identical(fdroplevels(wldNA150), droplevels(wldNA150)) expect_identical(fdroplevels(wld150$iso3c), droplevels(wld150$iso3c)) expect_identical(fdroplevels(wldNA150$iso3c), droplevels(wldNA150$iso3c)) expect_message(fdroplevels(1:3)) # expect_warning(fdroplevels(wld150, bla = 1)) # expect_warning(fdroplevels(wld150$iso3c, bla = 1)) expect_error(fdroplevels.factor(wld150$country)) }) # Note: Should extend with other than just character data.. rctry <- wlddev$country[order(rnorm(length(wlddev$country)))] set.seed(101) rctryNA <- na_insert(rctry) rdat <- sbt(GGDC10S, order(rnorm(length(Variable))), Variable, Country) vlabels(rdat) <- NULL vlabels(rdat, "format.stata") <- NULL set.seed(101) rdatNA <- na_insert(rdat) test_that("funique works well", { withr::local_locale(c(LC_COLLATE = "C")) expect_equal(funique(rctry), unique(rctry)) expect_equal(funique(rctry, sort = TRUE), sort(unique(rctry))) expect_equal(funique(rctryNA), unique(rctryNA)) expect_equal(funique(rctryNA, sort = TRUE), c(sort(unique(rctryNA)), NA)) expect_equal(funique(mtcars[.c(cyl, vs, am)]), unique(mtcars[.c(cyl, vs, am)])) expect_equal(funique(mtcNA[.c(cyl, vs, am)]), unique(mtcNA[.c(cyl, vs, am)])) expect_equal(funique(rdat), setRownames(unique(rdat))) expect_equal(funique(rdat, sort = TRUE), roworderv(unique(rdat))) expect_equal(funique(rdatNA), setRownames(unique(rdatNA))) expect_equal(funique(rdatNA, sort = TRUE), roworderv(unique(rdatNA))) expect_equal(lapply(wlddev, function(x) unattrib(base::unique(x))), lapply(wlddev, function(x) unattrib(funique(x)))) expect_equal(lapply(wldNA, function(x) unattrib(base::unique(x))), lapply(wldNA, function(x) unattrib(funique(x)))) expect_equal(lapply(GGDC10S, function(x) unattrib(base::unique(x))), lapply(GGDC10S, function(x) unattrib(funique(x)))) }) test_that("GRP.default(drop = FALSE) preserves unused factor levels", { df <- data.frame( f1 = factor(c("a", "b", "a"), levels = c("a", "b", "c")), f2 = factor(c("x", "x", "y"), levels = c("x", "y", "z")), v = c(1, 2, 3) ) # Basic Cartesian product g <- GRP.default(df, ~ f1 + f2, drop = FALSE) expect_equal(g$N.groups, 9L) expect_equal(g$group.sizes, c(1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)) expect_equal(length(g$group.id), 3L) expect_false(anyNA(g$group.id)) expect_equal(nrow(g$groups), 9L) expect_equal(names(g$groups), c("f1", "f2")) # group.starts: first occurrence, 0 for unobserved expect_equal(g$group.starts[g$group.sizes == 0L], rep(0L, 6L)) # drop = TRUE returns observed groups only gt <- GRP.default(df, ~ f1 + f2, drop = TRUE) expect_equal(gt$N.groups, 3L) # Falls through to default path when no factor columns df2 <- data.frame(x = c(1, 2, 1), y = c(10, 20, 10)) g2 <- GRP.default(df2, drop = FALSE) expect_equal(g2$N.groups, GRP.default(df2, drop = TRUE)$N.groups) # Factor with NAs: NA becomes an explicit level df3 <- data.frame(f = factor(c("a", "b", NA, "a"), levels = c("a", "b", "c"))) g3 <- GRP.default(df3, ~ f, drop = FALSE) expect_equal(g3$N.groups, 4L) expect_false(anyNA(g3$group.id)) # Mix factor + non-factor: non-factor uses observed unique values df4 <- data.frame( f = factor(c("a", "b", "a", "c"), levels = c("a", "b", "c", "d")), x = c(10, 20, 10, 20) ) g4 <- GRP.default(df4, ~ f + x, drop = FALSE) expect_equal(g4$N.groups, 8L) # 4 levels x 2 observed values expect_equal(sum(g4$group.sizes), 4L) # fgroup_by with .drop = FALSE gdf <- fgroup_by(df, f1, f2, .drop = FALSE) gg <- attr(gdf, "groups") expect_equal(gg$N.groups, 9L) # Aggregation through grouped df preserves empty groups (as NA / 0) res <- fsum(fgroup_by(df, f1, f2, .drop = FALSE), v) expect_equal(nrow(res), 9L) # fcount preserves levels fc <- fcount(df, f1, f2, drop = FALSE) expect_equal(nrow(fc), 9L) expect_equal(fc$N, c(1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)) # data.table class is preserved if(requireNamespace("data.table", quietly = TRUE)) { dt <- data.table::as.data.table(df) fcd <- fcount(dt, f1, f2, drop = FALSE) expect_true(data.table::is.data.table(fcd)) } # tibble class is preserved if(requireNamespace("tibble", quietly = TRUE)) { tb <- tibble::as_tibble(df) fct <- fcount(tb, f1, f2, drop = FALSE) expect_true(inherits(fct, "tbl_df")) } # collap with drop = FALSE rc <- collap(df, ~ f1 + f2, drop = FALSE) expect_equal(nrow(rc), 9L) }) collapse/tests/testthat/test-roworder-colorder-rename.R0000644000176200001440000001423714777170131023114 0ustar liggesuserscontext("roworder, colorder, frename") test_that("roworder works as intended", { expect_identical(roworder(mtcars, cyl, -hp), mtcars[with(mtcars, order(cyl, -hp)), ]) expect_identical(roworder(airquality, Month, -Ozone), setRownames(airquality[with(airquality, order(Month, -Ozone)), ])) expect_identical(fnrow(roworder(airquality, Month, -Ozone, na.last = NA)), 116L) # Removes the missing values in Ozone ## Same in standard evaluation expect_identical(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE)), roworder(airquality, Month, -Ozone)) ## Custom reordering expect_identical(roworderv(mtcars, neworder = 3:4), rbind(mtcars[3:4, ], mtcars[-(3:4), ])) # Bring rows 3 and 4 to the front expect_identical(roworderv(mtcars, neworder = 3:4, pos = "end"), rbind(mtcars[-(3:4), ], mtcars[3:4, ])) # Bring them to the end expect_identical(roworderv(mtcars, neworder = mtcars$vs == 1), rbind(mtcars[mtcars$vs == 1, ], mtcars[mtcars$vs != 1, ])) # Bring rows with vs == 1 to the top expect_identical(ss(roworderv(mtcars, neworder = c(8, 2), pos = "exchange"), c(2,8)), ss(mtcars, c(8,2))) }) if(identical(Sys.getenv("NCRAN"), "TRUE") && requireNamespace("magrittr", quietly = TRUE)) { library(magrittr) test_that("colorder works as intended", { expect_identical(colorder(mtcars, vs, cyl:hp, am), fselect(mtcars, vs, cyl:hp, am, return = "indices") %>% {cbind(mtcars[.], mtcars[-.])}) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "end"), fselect(mtcars, vs, cyl:hp, am, return = "indices") %>% {cbind(mtcars[-.], mtcars[.])}) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "exchange"), fselect(mtcars, vs, cyl:hp, am, return = "indices") %>% {`get_vars<-`(mtcars, sort(.), value = mtcars[.])}) ## Same in standard evaluation expect_identical(colorder(mtcars, vs, cyl:hp, am), colorderv(mtcars, c(8, 2:4, 9))) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "end"), colorderv(mtcars, c(8, 2:4, 9), pos = "end")) expect_identical(colorder(mtcars, vs, cyl:hp, am, pos = "exchange"), colorderv(mtcars, c(8, 2:4, 9), pos = "exchange")) expect_identical(colorder(mtcars, vs, cyl, am), colorderv(mtcars, c("vs", "cyl|am"), regex = TRUE)) }) } test_that("frename works as intended", { ## Using tagged expressions expect_equal(frename(iris, Sepal.Length = SL, Sepal.Width = SW, Petal.Length = PL, Petal.Width = PW), setNames(iris, .c(SL, SW, PL, PW, Species))) expect_equal(frename(iris, Sepal.Length = "S L", Sepal.Width = "S W", Petal.Length = "P L", Petal.Width = "P W"), setNames(iris, c("S L", "S W", "P L", "P W", "Species"))) ## Using a function expect_equal(frename(iris, tolower), setNames(iris, tolower(names(iris)))) expect_equal(frename(iris, tolower, cols = 1:2), setNames(iris, c(tolower(names(iris)[1:2]), names(iris)[-(1:2)]))) expect_equal(frename(iris, tolower, cols = is.numeric), setNames(iris, c(tolower(names(iris)[1:4]), names(iris)[-(1:4)]))) expect_equal(frename(iris, paste, "new", sep = "_", cols = 1:2), setNames(iris, c(paste(names(iris)[1:2], "new", sep = "_"), names(iris)[-(1:2)]))) ## Using vectors of names and programming expect_equal(frename(iris, tolower), frename(iris, tolower(names(iris)), .nse = FALSE)) newname = "sepal_length" expect_equal(frename(iris, Sepal.Length = newname, .nse = FALSE), setNames(iris, c(newname, names(iris)[-1L]))) newnames = c("sepal_length", "sepal_width") expect_true(all_obj_equal(frename(iris, newnames, cols = 1:2), frename(iris, newnames, cols = 1:2, .nse = FALSE), setNames(iris, c(newnames, names(iris)[-(1:2)])))) newnames = c(Sepal.Length = "sepal_length", Sepal.Width = "sepal_width") expect_equal(frename(iris, newnames, .nse = FALSE), setNames(iris, c(newnames, names(iris)[-(1:2)]))) if(requireNamespace("data.table", quietly = TRUE)) { ## Renaming by reference iris2 <- data.table::copy(iris) setrename(iris2, tolower) expect_equal(iris2, setNames(iris, tolower(names(iris)))) iris2 <- data.table::copy(iris) setrename(iris2, tolower, cols = 1:2) expect_equal(iris2, setNames(iris, c(tolower(names(iris)[1:2]), names(iris)[-(1:2)]))) iris2 <- data.table::copy(iris) setrename(iris2, tolower, cols = is.numeric) expect_equal(iris2, setNames(iris, c(tolower(names(iris)[1:4]), names(iris)[-(1:4)]))) iris2 <- data.table::copy(iris) setrename(iris2, paste, "new", sep = "_", cols = 1:2) expect_equal(iris2, setNames(iris, c(paste(names(iris)[1:2], "new", sep = "_"), names(iris)[-(1:2)]))) rm(iris2) nam <- toupper(names(iris)) # Relabelling with functions iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, tolower) expect_equal(iris2, setLabels(iris, tolower(nam))) iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, tolower, cols = 1:2) expect_equal(iris2, setLabels(iris, c(tolower(nam[1:2]), nam[-(1:2)]))) iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, tolower, cols = is.numeric) expect_equal(iris2, setLabels(iris, c(tolower(nam[1:4]), nam[5]))) iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, paste, "new", sep = "_", cols = 1:2) expect_equal(iris2, setLabels(iris, c(paste(nam[1:2], "new", sep = "_"), nam[-(1:2)]))) # Relabelling other iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, Sepal.Length = "sepal.length", Sepal.Width = "sepal.width") expect_equal(iris2, setLabels(iris, c(tolower(nam[1:2]), nam[-(1:2)]))) iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, tolower(nam)) expect_equal(iris2, setLabels(iris, tolower(nam))) iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, tolower(nam[1:2]), cols = 1:2) expect_equal(iris2, setLabels(iris, c(tolower(nam[1:2]), nam[-(1:2)]))) iris2 <- data.table::copy(setLabels(iris, nam)) setrelabel(iris2, setNames(tolower(nam[1:2]), c("Sepal.Length", "Sepal.Width"))) expect_equal(iris2, setLabels(iris, c(tolower(nam[1:2]), nam[-(1:2)]))) vlabels(iris) <- NULL rm(iris2) } }) collapse/tests/testthat/test-setop.R0000644000176200001440000001565014777170131017327 0ustar liggesuserscontext("setop") d <- mtcars$mpg dc <- copyv(d, 0, 0) i <- as.integer(mtcars$cyl) ic <- copyv(i, 0, 0) dm <- as.matrix(mtcars) + 1 dmc <- copyv(dm, 0, 0) im <- dm storage.mode(im) <- "integer" imc <- copyv(im, 0, 0) dr <- dm[nrow(dm), ] ir <- im[nrow(im), ] ddf <- mtcars %c+% 1 idf <- dapply(ddf, as.integer) ddfc <- copyv(ddf, 0, 0) idfc <- copyv(idf, 0, 0) ops <- c("+", "-", "*", "/") test_that("setop works in scalar-vector operations", { expect_equal(i %+=% 2 %-=% 2, ic) expect_equal(i %+=% 2L %-=% 2L, ic) expect_equal(i %*=% 2 %/=% 2, ic) expect_equal(i %*=% 2L %/=% 2L, ic) expect_equal(d %+=% 2 %-=% 2, dc) expect_equal(d %+=% 2L %-=% 2L, dc) expect_equal(d %*=% 2 %/=% 2, dc) expect_equal(d %*=% 2L %/=% 2L, dc) expect_equal(i %+=% dc %-=% trunc(dc), ic) # Problem: The computation creates a decimal which is then rounded down... expect_equal(i %+=% ic %-=% ic, ic) expect_equal(i %*=% dc %/=% trunc(dc), ic) expect_equal(i %*=% ic %/=% ic, ic) expect_equal(d %+=% dc %-=% dc, dc) expect_equal(d %+=% ic %-=% ic, dc) expect_equal(d %*=% dc %/=% dc, dc) expect_equal(d %*=% ic %/=% ic, dc) expect_identical(i, ic) expect_equal(d, dc) # Same with setop function for(o in ops) setop(i, o, 2); expect_identical(i, ic) for(o in ops) setop(d, o, 2); expect_equal(d, dc) for(o in ops) setop(i, o, 2L); expect_identical(i, ic) for(o in ops) setop(d, o, 2L); expect_equal(d, dc) for(o in ops) setop(i, o, trunc(dc)); expect_identical(i, ic) for(o in ops) setop(d, o, dc); expect_equal(d, dc) for(o in ops) setop(i, o, ic); expect_identical(i, ic) for(o in ops) setop(d, o, ic); expect_equal(d, dc) }) test_that("setop works in scalar-vector-matrix operations", { # Matrix & Scalar expect_equal(im %+=% 2 %-=% 2, imc) expect_equal(im %+=% 2L %-=% 2L, imc) expect_equal(im %*=% 2 %/=% 2, imc) expect_equal(im %*=% 2L %/=% 2L, imc) expect_equal(dm %+=% 2 %-=% 2, dmc) expect_equal(dm %+=% 2L %-=% 2L, dmc) expect_equal(dm %*=% 2 %/=% 2, dmc) expect_equal(dm %*=% 2L %/=% 2L, dmc) # Matrix & Vector expect_equal(im %+=% trunc(dc) %-=% trunc(dc), imc) expect_equal(im %+=% ic %-=% ic, imc) expect_equal(im %*=% trunc(dc) %/=% trunc(dc), imc) expect_equal(im %*=% ic %/=% ic, imc) expect_equal(dm %+=% dc %-=% dc, dmc) expect_equal(dm %+=% ic %-=% ic, dmc) expect_equal(dm %*=% dc %/=% dc, dmc) expect_equal(dm %*=% ic %/=% ic, dmc) # Matrix & Matrix expect_equal(im %+=% trunc(dmc) %-=% trunc(dmc), imc) expect_equal(im %+=% imc %-=% imc, imc) expect_equal(im %*=% trunc(dmc) %/=% trunc(dmc), imc) expect_equal(im %*=% imc %/=% imc, imc) expect_equal(dm %+=% dmc %-=% dmc, dmc) expect_equal(dm %+=% imc %-=% imc, dmc) expect_equal(dm %*=% dmc %/=% dmc, dmc) expect_equal(dm %*=% imc %/=% imc, dmc) expect_identical(im, imc) expect_equal(dm, dmc) # Same with setop function # Matrix & Scalar for(o in ops) setop(im, o, 2); expect_identical(im, imc) for(o in ops) setop(dm, o, 2); expect_equal(dm, dmc) for(o in ops) setop(im, o, 2L); expect_identical(im, imc) for(o in ops) setop(dm, o, 2L); expect_equal(dm, dmc) # Matrix & Vector for(o in ops) setop(im, o, trunc(dc)); expect_identical(im, imc) for(o in ops) setop(dm, o, dc); expect_equal(dm, dmc) for(o in ops) setop(im, o, ic); expect_identical(im, imc) for(o in ops) setop(dm, o, ic); expect_equal(dm, dmc) # Matrix & Matrix for(o in ops) setop(im, o, trunc(dmc)); expect_identical(im, imc) for(o in ops) setop(dm, o, dmc); expect_equal(dm, dmc) for(o in ops) setop(im, o, imc); expect_identical(im, imc) for(o in ops) setop(dm, o, imc); expect_equal(dm, dmc) # Row-wise Matrix & Vector for(o in ops) setop(im, o, trunc(dr), rowwise = TRUE); expect_identical(im, imc) for(o in ops) setop(dm, o, dr, rowwise = TRUE); expect_equal(dm, dmc) for(o in ops) setop(im, o, ir, rowwise = TRUE); expect_identical(im, imc) for(o in ops) setop(dm, o, ir, rowwise = TRUE); expect_equal(dm, dmc) # Comparison with TRA (only for doubles) if(requireNamespace("data.table", quietly = TRUE)) { for(o in ops) { expect_equal(setop(dm, o, dr, rowwise = TRUE), TRA(dmc, dr, o)) dm <- data.table::copy(dmc) expect_equal(setop(dm, o, ir, rowwise = TRUE), TRA(dmc, ir, o)) dm <- data.table::copy(dmc) } } }) test_that("setop works in operations involving data frames", { # DF & Scalar expect_equal(idf %+=% 2 %-=% 2, idfc) expect_equal(idf %+=% 2L %-=% 2L, idfc) expect_equal(idf %*=% 2 %/=% 2, idfc) expect_equal(idf %*=% 2L %/=% 2L, idfc) expect_equal(ddf %+=% 2 %-=% 2, ddfc) expect_equal(ddf %+=% 2L %-=% 2L, ddfc) expect_equal(ddf %*=% 2 %/=% 2, ddfc) expect_equal(ddf %*=% 2L %/=% 2L, ddfc) # DF & Vector expect_equal(idf %+=% trunc(dc) %-=% trunc(dc), idfc) expect_equal(idf %+=% ic %-=% ic, idfc) expect_equal(idf %*=% trunc(dc) %/=% trunc(dc), idfc) expect_equal(idf %*=% ic %/=% ic, idfc) expect_equal(ddf %+=% dc %-=% dc, ddfc) expect_equal(ddf %+=% ic %-=% ic, ddfc) expect_equal(ddf %*=% dc %/=% dc, ddfc) expect_equal(ddf %*=% ic %/=% ic, ddfc) # DF & DF expect_equal(idf %+=% trunc(ddfc) %-=% trunc(ddfc), idfc) expect_equal(idf %+=% idfc %-=% idfc, idfc) expect_equal(idf %*=% trunc(ddfc) %/=% trunc(ddfc), idfc) expect_equal(idf %*=% idfc %/=% idfc, idfc) expect_equal(ddf %+=% ddfc %-=% ddfc, ddfc) expect_equal(ddf %+=% idfc %-=% idfc, ddfc) expect_equal(ddf %*=% ddfc %/=% ddfc, ddfc) expect_equal(ddf %*=% idfc %/=% idfc, ddfc) expect_identical(idf, idfc) expect_equal(ddf, ddfc) # Same with setop function # DF & Scalar for(o in ops) setop(idf, o, 2); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, 2); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, 2L); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, 2L); expect_equal(ddf, ddfc) # DF & Vector for(o in ops) setop(idf, o, trunc(dc)); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, dc); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, ic); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, ic); expect_equal(ddf, ddfc) # DF & DF for(o in ops) setop(idf, o, trunc(ddfc)); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, ddfc); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, idfc); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, idfc); expect_equal(ddf, ddfc) # Row-wise DF & Vector for(o in ops) setop(idf, o, trunc(dr), rowwise = TRUE); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, dr, rowwise = TRUE); expect_equal(ddf, ddfc) for(o in ops) setop(idf, o, ir, rowwise = TRUE); expect_identical(idf, idfc) for(o in ops) setop(ddf, o, ir, rowwise = TRUE); expect_equal(ddf, ddfc) # Comparison with TRA (only for doubles) if(requireNamespace("data.table", quietly = TRUE)) { for(o in ops) { expect_equal(setop(ddf, o, dr, rowwise = TRUE), TRA(ddfc, dr, o)) ddf <- data.table::copy(ddfc) expect_equal(setop(ddf, o, ir, rowwise = TRUE), TRA(ddfc, ir, o)) ddf <- data.table::copy(ddfc) } } }) collapse/tests/testthat/test-miscellaneous-issues.R0000644000176200001440000006442215114165043022342 0ustar liggesuserscontext("miscellaneous issues") # rm(list = ls()) options(warn = -1) F <- getNamespace("collapse")$F if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("Using a factor with unused levels does not pose a problem to flag, fdiff or fgrowth (#25)", { wlddev2 <- subset(wlddev, iso3c %in% c("ALB", "AFG", "DZA")) wlddev3 <- droplevels(wlddev2) expect_identical(L(wlddev3, 1, LIFEEX~iso3c, ~year), L(wlddev3, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(L(wlddev3, -1:1, LIFEEX~iso3c, ~year), L(wlddev3, -1:1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, 1, ~iso3c, ~year, cols="LIFEEX")), L(wlddev3, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, -1:1, ~iso3c, ~year, cols="LIFEEX")), L(wlddev3, -1:1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX")), D(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")), D(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX")), Dlog(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")), Dlog(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), D(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), D(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(G(wlddev2, 1, 1, ~iso3c, ~year, cols="LIFEEX")), G(wlddev3, 1, 1, ~iso3c, ~year, cols="LIFEEX")) expect_identical(droplevels(G(wlddev2, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")), G(wlddev3, -1:1, 1:2, ~iso3c, ~year, cols="LIFEEX")) expect_identical(L(wlddev3, 1, LIFEEX~iso3c), L(wlddev3, 1, ~iso3c, cols="LIFEEX")) expect_identical(L(wlddev3, -1:1, LIFEEX~iso3c), L(wlddev3, -1:1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, 1, ~iso3c, cols="LIFEEX")), L(wlddev3, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(L(wlddev2, -1:1, ~iso3c, cols="LIFEEX")), L(wlddev3, -1:1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, cols="LIFEEX")), D(wlddev3, 1, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX")), D(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, cols="LIFEEX")), Dlog(wlddev3, 1, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX")), Dlog(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(D(wlddev2, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)), D(wlddev3, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(D(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)), D(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, 1, 1, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(Dlog(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)), Dlog(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX", rho = 0.95)) expect_identical(droplevels(G(wlddev2, 1, 1, ~iso3c, cols="LIFEEX")), G(wlddev3, 1, 1, ~iso3c, cols="LIFEEX")) expect_identical(droplevels(G(wlddev2, -1:1, 1:2, ~iso3c, cols="LIFEEX")), G(wlddev3, -1:1, 1:2, ~iso3c, cols="LIFEEX")) }) test_that("Using a factor with unused levels does not pose a problem to statistical functions", { wlddev2 <- fsubset(wlddev, iso3c %in% c("ALB", "AFG", "DZA")) d <- nv(wlddev2) m <- qM(d) v <- d$PCGDP w <- rep(1, length(v)) f <- wlddev2$iso3c lev <- levels(f) fd <- fdroplevels(f) levd <- levels(fd) # Testing BY: expect_equal(attr(BY(d, f, sum), "row.names"), lev) expect_equal(dimnames(BY(m, f, sum))[[1L]], lev) expect_equal(names(BY(v, f, sum)), lev) # Fast Statistical Functions for(i in .FAST_STAT_FUN) { # print(i) FUN <- match.fun(i) expect_equal(attr(FUN(d, g = f), "row.names"), lev) expect_equal(dimnames(FUN(m, g = f))[[1L]], lev) expect_equal(names(FUN(v, g = f)), lev) expect_equal(attr(FUN(d, g = fd), "row.names"), levd) expect_equal(dimnames(FUN(m, g = fd))[[1L]], levd) expect_equal(names(FUN(v, g = fd)), levd) if(i != "fnobs") { expect_equal(attr(FUN(d, g = f, na.rm = FALSE), "row.names"), lev) expect_equal(dimnames(FUN(m, g = f, na.rm = FALSE))[[1L]], lev) expect_equal(names(FUN(v, g = f, na.rm = FALSE)), lev) } if(i %in% c("fsum", "fprod", "fmean", "fmedian", "fnth", "fmode", "fvar", "fsd")) { expect_equal(attr(FUN(d, g = f, w = w), "row.names"), lev) expect_equal(dimnames(FUN(m, g = f, w = w))[[1L]], lev) expect_equal(names(FUN(v, g = f, w = w)), lev) expect_equal(attr(FUN(d, g = f, w = w, na.rm = FALSE), "row.names"), lev) expect_equal(dimnames(FUN(m, g = f, w = w, na.rm = FALSE))[[1L]], lev) expect_equal(names(FUN(v, g = f, w = w, na.rm = FALSE)), lev) expect_equal(FUN(d, g = f, w = w), FUN(d, g = f)) expect_equal(FUN(m, g = f, w = w), FUN(m, g = f)) expect_equal(FUN(v, g = f, w = w), FUN(v, g = f)) } } # Other Statistical Functions for(i in setdiff(c(.FAST_FUN, .OPERATOR_FUN), .FAST_STAT_FUN)) { # print(i) FUN <- match.fun(i) if(grepl("hd", i, ignore.case = TRUE)) { expect_equal(FUN(d, fl = f), FUN(d, fl = fd)) expect_equal(FUN(m, fl = f), FUN(m, fl = fd)) expect_equal(FUN(v, fl = f), FUN(v, fl = fd)) expect_equal(FUN(d, fl = f, na.rm = FALSE), FUN(d, fl = fd, na.rm = FALSE)) expect_equal(FUN(m, fl = f, na.rm = FALSE), FUN(m, fl = fd, na.rm = FALSE)) expect_equal(FUN(v, fl = f, na.rm = FALSE), FUN(v, fl = fd, na.rm = FALSE)) expect_equal(FUN(d, fl = f, w = w), FUN(d, fl = fd)) expect_equal(FUN(m, fl = f, w = w), FUN(m, fl = fd)) expect_equal(FUN(v, fl = f, w = w), FUN(v, fl = fd)) expect_equal(FUN(d, fl = f, w = w, na.rm = FALSE), FUN(d, fl = fd, na.rm = FALSE)) expect_equal(FUN(m, fl = f, w = w, na.rm = FALSE), FUN(m, fl = fd, na.rm = FALSE)) expect_equal(FUN(v, fl = f, w = w, na.rm = FALSE), FUN(v, fl = fd, na.rm = FALSE)) } else { expect_equal(FUN(d, g = f), FUN(d, g = fd)) expect_equal(FUN(m, g = f), FUN(m, g = fd)) expect_equal(FUN(v, g = f), FUN(v, g = fd)) expect_equal(FUN(d, g = f, na.rm = FALSE), FUN(d, g = fd, na.rm = FALSE)) expect_equal(FUN(m, g = f, na.rm = FALSE), FUN(m, g = fd, na.rm = FALSE)) expect_equal(FUN(v, g = f, na.rm = FALSE), FUN(v, g = fd, na.rm = FALSE)) if(i %in% c("fscale", "STD", "fbetween", "B", "fwithin", "W")) { expect_equal(FUN(d, g = f, w = w), FUN(d, g = fd)) expect_equal(FUN(m, g = f, w = w), FUN(m, g = fd)) expect_equal(FUN(v, g = f, w = w), FUN(v, g = fd)) expect_equal(FUN(d, g = f, w = w, na.rm = FALSE), FUN(d, g = fd, na.rm = FALSE)) expect_equal(FUN(m, g = f, w = w, na.rm = FALSE), FUN(m, g = fd, na.rm = FALSE)) expect_equal(FUN(v, g = f, w = w, na.rm = FALSE), FUN(v, g = fd, na.rm = FALSE)) } } } }) test_that("Testing grouped_df methods", { skip_if_not_installed("magrittr") library(magrittr) for(sortg in c(TRUE, FALSE)) { for(retgrp in c(TRUE, FALSE)) { gdf <- wlddev %>% fsubset(year > 1990, region, income, PCGDP:ODA) %>% fgroup_by(region, income, return.groups = retgrp, sort = sortg) gdf[["wgt"]] <- round(abs(10*rnorm(fnrow(gdf))), 1) expect_visible(gdf %>% fmean) expect_visible(gdf %>% fmean(wgt)) expect_equal(gdf %>% fmean(wgt) %>% slt(-sum.wgt), gdf %>% fmean(wgt, keep.w = FALSE)) expect_visible(gdf %>% fmedian) expect_visible(gdf %>% fmedian(wgt)) expect_equal(gdf %>% fmedian(wgt) %>% slt(-sum.wgt), gdf %>% fmedian(wgt, keep.w = FALSE)) expect_visible(gdf %>% fnth) expect_visible(gdf %>% fnth(0.75)) expect_visible(gdf %>% fnth(0.75, wgt)) expect_equal(gdf %>% fnth(0.75, wgt) %>% slt(-sum.wgt), gdf %>% fnth(0.75, wgt, keep.w = FALSE)) expect_visible(gdf %>% fmode) expect_visible(gdf %>% fmode(wgt)) expect_equal(gdf %>% fmode(wgt) %>% slt(-sum.wgt), gdf %>% fmode(wgt, keep.w = FALSE)) expect_visible(gdf %>% fsum) expect_visible(gdf %>% fsum(wgt)) expect_equal(gdf %>% fsum(wgt) %>% slt(-sum.wgt), gdf %>% fsum(wgt, keep.w = FALSE)) expect_visible(gdf %>% fprod) expect_visible(gdf %>% fprod(wgt)) expect_equal(gdf %>% fprod(wgt) %>% slt(-prod.wgt), gdf %>% fprod(wgt, keep.w = FALSE)) expect_visible(gdf %>% fsd) expect_visible(gdf %>% fsd(wgt)) expect_equal(gdf %>% fsd(wgt) %>% slt(-sum.wgt), gdf %>% fsd(wgt, keep.w = FALSE)) expect_visible(gdf %>% fvar) expect_visible(gdf %>% fvar(wgt)) expect_equal(gdf %>% fvar(wgt) %>% slt(-sum.wgt), gdf %>% fvar(wgt, keep.w = FALSE)) expect_visible(gdf %>% fmin) expect_visible(gdf %>% fmax) expect_visible(gdf %>% ffirst) expect_visible(gdf %>% flast) expect_visible(gdf %>% fnobs) expect_visible(gdf %>% fndistinct) expect_visible(gdf %>% collapg) expect_visible(gdf %>% varying) expect_visible(gdf %>% varying(any_group = FALSE)) expect_visible(gdf %>% fmean(w = wgt)) # good? expect_equal(gdf %>% collapg(w = wgt) %>% slt(-wgt), gdf %>% collapg(w = wgt, keep.w = FALSE)) expect_visible(gdf %>% fscale) expect_visible(gdf %>% fscale(wgt)) expect_equal(gdf %>% fscale(wgt) %>% slt(-wgt), gdf %>% fscale(wgt, keep.w = FALSE)) expect_visible(gdf %>% STD) expect_visible(gdf %>% STD(wgt)) expect_equal(gdf %>% STD(wgt) %>% slt(-wgt), gdf %>% STD(wgt, keep.w = FALSE)) expect_equal(gdf %>% fscale, gdf %>% STD(stub = FALSE)) expect_visible(gdf %>% fbetween) expect_visible(gdf %>% fbetween(wgt)) expect_equal(gdf %>% fbetween(wgt) %>% slt(-wgt), gdf %>% fbetween(wgt, keep.w = FALSE)) expect_visible(gdf %>% B) expect_visible(gdf %>% B(wgt)) expect_equal(gdf %>% B(wgt) %>% slt(-wgt), gdf %>% B(wgt, keep.w = FALSE)) expect_equal(gdf %>% fbetween, gdf %>% B(stub = FALSE)) expect_visible(gdf %>% fwithin) expect_visible(gdf %>% fwithin(wgt)) expect_equal(gdf %>% fwithin(wgt) %>% slt(-wgt), gdf %>% fwithin(wgt, keep.w = FALSE)) expect_visible(gdf %>% W) expect_visible(gdf %>% W(wgt)) expect_equal(gdf %>% W(wgt) %>% slt(-wgt), gdf %>% W(wgt, keep.w = FALSE)) expect_equal(gdf %>% fwithin, gdf %>% W(stub = FALSE)) expect_visible(gdf %>% fcumsum) expect_visible(gdf %>% flag) expect_visible(gdf %>% L) expect_visible(gdf %>% F) expect_true(all_obj_equal(gdf %>% flag, gdf %>% L(stubs = FALSE), gdf %>% F(-1, stubs = FALSE))) expect_true(all_obj_equal(gdf %>% flag(-3:3), gdf %>% L(-3:3), gdf %>% F(3:-3))) expect_visible(gdf %>% fdiff) expect_visible(gdf %>% D) expect_true(all_obj_equal(gdf %>% fdiff, gdf %>% D(stubs = FALSE))) expect_equal(gdf %>% fdiff(-2:2, 1:2), gdf %>% D(-2:2, 1:2)) expect_visible(gdf %>% fdiff(rho = 0.95)) expect_visible(gdf %>% fdiff(-2:2, 1:2, rho = 0.95)) expect_visible(gdf %>% fdiff(log = TRUE)) expect_visible(gdf %>% fdiff(-2:2, 1:2, log = TRUE)) expect_visible(gdf %>% fdiff(log = TRUE, rho = 0.95)) expect_visible(gdf %>% fdiff(-2:2, 1:2, log = TRUE, rho = 0.95)) expect_visible(gdf %>% fgrowth) expect_visible(gdf %>% G) expect_true(all_obj_equal(gdf %>% fgrowth, gdf %>% G(stubs = FALSE))) expect_equal(gdf %>% fgrowth(-2:2, 1:2), gdf %>% G(-2:2, 1:2)) expect_visible(gdf %>% fgrowth(scale = 1)) expect_visible(gdf %>% fgrowth(-2:2, 1:2, scale = 1)) expect_visible(gdf %>% fgrowth(logdiff = TRUE)) expect_visible(gdf %>% fgrowth(-2:2, 1:2, logdiff = TRUE)) expect_visible(gdf %>% fgrowth(logdiff = TRUE, scale = 1)) expect_visible(gdf %>% fgrowth(-2:2, 1:2, logdiff = TRUE, scale = 1)) expect_equal(BY(gby(iris,Species), sum), BY(nv(gby(iris,Species)), sum)) } } }) # Also better not run on CRAN... test_that("0-length vectors give expected output", { funs <- .c(fsum, fprod, fmean, fmedian, fmin, fmax, fnth, fcumsum, fbetween, fwithin, fscale) for(i in funs) { FUN <- match.fun(i) if(i %!in% .c(fsum, fmin, fmax, fcumsum)) { expect_true(all_identical(FUN(numeric(0)), FUN(integer(0)), numeric(0))) } else { expect_identical(FUN(numeric(0)), numeric(0)) expect_identical(FUN(integer(0)), integer(0)) } } funs <- .c(fmode, ffirst, flast) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), numeric(0)) expect_identical(FUN(integer(0)), integer(0)) expect_identical(FUN(character(0)), character(0)) expect_identical(FUN(logical(0)), logical(0)) expect_identical(FUN(factor(0)), factor(0)) } funs <- .c(fvar, fsd) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), NA_real_) expect_identical(FUN(integer(0)), NA_real_) } funs <- .c(fnobs, fndistinct) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), 0L) expect_identical(FUN(integer(0)), 0L) } funs <- .c(flag, fdiff, fgrowth) for(i in funs) { FUN <- match.fun(i) expect_error(FUN(numeric(0))) expect_error(FUN(integer(0))) } funs <- .c(groupid, seqid) for(i in funs) { FUN <- match.fun(i) expect_identical(FUN(numeric(0)), integer(0)) expect_identical(FUN(integer(0)), integer(0)) } expect_identical(varying(numeric(0)), FALSE) expect_identical(TRA(numeric(0), 1), numeric(0)) }) } X <- matrix(rnorm(1000), ncol = 10) g <- qG(sample.int(10, 100, TRUE)) gf <- as_factor_qG(g) funs <- grep("hd|log", c(.FAST_FUN, .OPERATOR_FUN), ignore.case = TRUE, invert = TRUE, value = TRUE) test_that("functions work on plain matrices", { F <- getNamespace("collapse")$F for(i in funs) { expect_visible(match.fun(i)(X)) expect_visible(match.fun(i)(X, g = g)) expect_visible(match.fun(i)(X, g = gf)) expect_visible(match.fun(i)(X, g = g, use.g.names = FALSE)) expect_visible(match.fun(i)(X, g = gf, use.g.names = FALSE)) } }) Xl <- mctl(X) test_that("functions work on plain lists", { F <- getNamespace("collapse")$F for(i in funs) { expect_visible(match.fun(i)(Xl)) expect_visible(match.fun(i)(Xl, g = g, by = g)) expect_visible(match.fun(i)(Xl, g = gf, by = gf)) expect_visible(match.fun(i)(X, g = g, by = g, use.g.names = FALSE)) expect_visible(match.fun(i)(X, g = gf, by = gf, use.g.names = FALSE)) } }) test_that("time series functions work inside lm", { expect_equal(unname(coef(lm(mpg ~ L(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + L(cyl, 1) + L(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ F(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + F(cyl, 1) + F(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ D(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + D(cyl, 1) + D(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ G(cyl, 0:2), mtcars))), unname(coef(lm(mpg ~ cyl + G(cyl, 1) + G(cyl, 2), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(L(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + L(cyl, 2) + L(cyl, 3), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(F(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + cyl + F(cyl, 1), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(D(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + L(D(cyl)) + L(D(cyl, 2)), mtcars)))) expect_equal(unname(coef(lm(mpg ~ L(G(cyl, 0:2)), mtcars))), unname(coef(lm(mpg ~ L(cyl) + L(G(cyl)) + L(G(cyl, 2)), mtcars)))) }) test_that("functions using welfords method properly deal with zero weights", { for(g in list(NULL, rep(1L, 3))) { expect_equal(unattrib(fvar(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), 0.5) expect_equal(unattrib(fvar(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), 2) expect_equal(unattrib(fsd(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), sqrt(0.5)) expect_equal(unattrib(fsd(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), sqrt(2)) expect_equal(unattrib(fscale(x = c(2, 1, 0), g = g, w = c(1, 1, 0), na.rm = TRUE)), (c(2, 1, 0)-1.5)/sqrt(0.5)) expect_equal(unattrib(fscale(x = c(2, 1, 3), g = g, w = c(0, 1, 1), na.rm = FALSE)), (c(2, 1, 3)-2)/sqrt(2)) expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0)))[-2L], c(2, 1.5, sqrt(0.5), 1, 2)) expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1)))[-2L], c(2, 2, sqrt(2), 1, 3)) expect_equal(unattrib(qsu(x = c(2, 1, 0), g = g, w = c(1, 1, 0), higher = TRUE))[c(1L, 3:6)], c(2, 1.5, sqrt(0.5), 1, 2)) expect_equal(unattrib(qsu(x = c(2, 1, 3), g = g, w = c(0, 1, 1), higher = TRUE))[c(1L, 3:6)], c(2, 2, sqrt(2), 1, 3)) } }) test_that("singleton groups are handled properly by all statistical functions", { w <- rep(1, fnrow(wlddev)) # Ordered g <- GRP(seq_row(wlddev), return.groups = FALSE) expect_equal(fmode(wlddev, g), wlddev) expect_equal(fmode(wlddev, g, w), wlddev) expect_equal(ffirst(wlddev, g), wlddev) expect_equal(flast(wlddev, g), wlddev) expect_equal(dapply(fndistinct(wlddev, g), unattrib), dapply(wlddev, function(x) as.integer(!is.na(x)))) expect_equal(fmode(wlddev, g, na.rm = FALSE), wlddev) expect_equal(fmode(wlddev, g, w, na.rm = FALSE), wlddev) expect_equal(ffirst(wlddev, g, na.rm = FALSE), wlddev) expect_equal(flast(wlddev, g, na.rm = FALSE), wlddev) expect_equal(dapply(fndistinct(wlddev, g, na.rm = FALSE), unattrib), dapply(wlddev, function(x) rep(1L, length(x)))) for(FUN in list(fmean, fmedian, fnth, fsum, fprod, fmin, fmax, fbetween, fcumsum)) { # print(FUN) expect_equal(FUN(nv(wlddev), g = g), nv(wlddev)) expect_equal(FUN(nv(wlddev), g = g, na.rm = FALSE), nv(wlddev)) expect_equal(FUN(nv(wlddev), g = g, w = w), nv(wlddev)) expect_equal(FUN(nv(wlddev), g = g, w = w, na.rm = FALSE), nv(wlddev)) } for(FUN in list(fvar, fsd, fscale, flag, fdiff, fgrowth)) { expect_true(all(dapply(FUN(nv(wlddev), g = g), allNA))) expect_true(all(dapply(FUN(nv(wlddev), g = g, na.rm = FALSE), allNA))) expect_true(all(dapply(FUN(nv(wlddev), g = g, w = w, n = -1), allNA))) expect_true(all(dapply(FUN(nv(wlddev), g = g, w = w, n = -1, na.rm = FALSE), allNA))) } # Unordered o <- radixorder(rnorm(fnrow(wlddev))) g <- GRP(o, return.groups = FALSE) wlduo <- setRownames(ss(wlddev, radixorder(o))) expect_equal(fmode(wlddev, g), wlduo) expect_equal(fmode(wlddev, g, w), wlduo) expect_equal(ffirst(wlddev, g), wlduo) expect_equal(flast(wlddev, g), wlduo) expect_equal(dapply(fndistinct(wlddev, g), unattrib), dapply(wlduo, function(x) as.integer(!is.na(x)))) expect_equal(fmode(wlddev, g, na.rm = FALSE), wlduo) expect_equal(fmode(wlddev, g, w, na.rm = FALSE), wlduo) expect_equal(ffirst(wlddev, g, na.rm = FALSE), wlduo) expect_equal(flast(wlddev, g, na.rm = FALSE), wlduo) expect_equal(dapply(fndistinct(wlddev, g, na.rm = FALSE), unattrib), dapply(wlduo, function(x) rep(1L, length(x)))) for(FUN in list(fmean, fmedian, fnth, fsum, fprod, fmin, fmax)) { # print(FUN) expect_equal(FUN(nv(wlddev), g = g), nv(wlduo)) expect_equal(FUN(nv(wlddev), g = g, na.rm = FALSE), nv(wlduo)) expect_equal(FUN(nv(wlddev), g = g, w = w), nv(wlduo)) expect_equal(FUN(nv(wlddev), g = g, w = w, na.rm = FALSE), nv(wlduo)) } for(FUN in list(fbetween, fcumsum)) { expect_equal(FUN(nv(wlddev), g), nv(wlddev)) expect_equal(FUN(nv(wlddev), g, na.rm = FALSE), nv(wlddev)) expect_equal(FUN(nv(wlddev), g, w), nv(wlddev)) expect_equal(FUN(nv(wlddev), g, w, na.rm = FALSE), nv(wlddev)) } for(FUN in list(fvar, fsd, fscale, flag, fdiff, fgrowth)) { expect_true(all(dapply(FUN(nv(wlddev), g = g), allNA))) expect_true(all(dapply(FUN(nv(wlddev), g = g, na.rm = FALSE), allNA))) expect_true(all(dapply(FUN(nv(wlddev), g = g, w = w, n = -1), allNA))) expect_true(all(dapply(FUN(nv(wlddev), g = g, w = w, n = -1, na.rm = FALSE), allNA))) } }) test_that("functions work for data frames with zero rows", { mtc0 <- qDF(mtcars)[NULL, ] expect_equal(mtc0, funique(mtc0)) expect_equal(mtc0, funique(mtc0, sort = TRUE)) expect_equal(mtc0, roworderv(mtc0)) expect_visible(colorder(mtc0, mpg, hp)) expect_visible(GRP(mtc0)) expect_visible(fgroup_by(mtc0, cyl, vs, am)) expect_visible(GRP(mtc0, sort = FALSE)) expect_visible(fgroup_by(mtc0, cyl, vs, am, sort = FALSE)) expect_visible(fduplicated(mtc0)) expect_false(any_duplicated(mtc0)) expect_visible(fselect(mtc0, hp, carb)) expect_visible(get_vars(mtc0, 9:8)) }) test_that("issue with integer followed by NA #432", { for (f in setdiff(.FAST_STAT_FUN, c("fvar", "fsd", "fnobs", "fndistinct"))) { # if(!isTRUE(all.equal(match.fun(f)(c(10L, NA)), 10L))) print(f) expect_equal(match.fun(f)(c(10L, NA)), 10L) expect_equal(match.fun(f)(c(NA, 10L)), 10L) expect_equal(match.fun(f)(c(10, NA)), 10) expect_equal(match.fun(f)(c(NA, 10)), 10) expect_equal(match.fun(f)(c(10L, NA), g = rep(1L, 2), use.g.names = FALSE), 10L) expect_equal(match.fun(f)(c(NA, 10L), g = rep(1L, 2), use.g.names = FALSE), 10L) expect_equal(match.fun(f)(c(10, NA), g = rep(1L, 2), use.g.names = FALSE), 10) expect_equal(match.fun(f)(c(NA, 10), g = rep(1L, 2), use.g.names = FALSE), 10) # na.rm = FALSE if(f %!in% c("fmode", "ffirst")) expect_equal(match.fun(f)(c(10L, NA), na.rm = FALSE), NA_integer_) if(f != "flast") expect_equal(match.fun(f)(c(NA, 10L), na.rm = FALSE), NA_integer_) if(f %!in% c("fmode", "ffirst")) expect_equal(match.fun(f)(c(10, NA), na.rm = FALSE), NA_real_) if(f != "flast") expect_equal(match.fun(f)(c(NA, 10), na.rm = FALSE), NA_real_) # Some functions are optimized and don't check here # expect_equal(match.fun(f)(c(10L, NA), g = rep(1L, 2), na.rm = FALSE, use.g.names = FALSE), NA_integer_) # expect_equal(match.fun(f)(c(NA, 10L), g = rep(1L, 2), na.rm = FALSE, use.g.names = FALSE), NA_integer_) if(f %!in% c("fmode", "ffirst")) expect_equal(match.fun(f)(c(10, NA), g = rep(1L, 2), na.rm = FALSE, use.g.names = FALSE), NA_real_) if(f != "flast") expect_equal(match.fun(f)(c(NA, 10), g = rep(1L, 2), na.rm = FALSE, use.g.names = FALSE), NA_real_) } skip_if_not(Sys.getenv("OMP") == "TRUE") for (f in c("fsum", "fmean", "fmode", "fnth", "fmedian")) { expect_equal(match.fun(f)(c(10L, rep(NA_integer_, 1e5)), nthreads = 2L), 10L) expect_equal(match.fun(f)(c(rep(NA_integer_, 1e5), 10L), nthreads = 2L), 10L) expect_equal(match.fun(f)(c(10, rep(NA_real_, 1e5)), nthreads = 2L), 10) expect_equal(match.fun(f)(c(rep(NA_real_, 1e5), 10), nthreads = 2L), 10) expect_equal(match.fun(f)(c(10L, rep(NA_integer_, 1e5)), g = rep(1L, 1e5+1), nthreads = 2L, use.g.names = FALSE), 10L) expect_equal(match.fun(f)(c(rep(NA_integer_, 1e5), 10L), g = rep(1L, 1e5+1), nthreads = 2L, use.g.names = FALSE), 10L) expect_equal(match.fun(f)(c(10, rep(NA_real_, 1e5)), g = rep(1L, 1e5+1), nthreads = 2L, use.g.names = FALSE), 10) expect_equal(match.fun(f)(c(rep(NA_real_, 1e5), 10), g = rep(1L, 1e5+1), nthreads = 2L, use.g.names = FALSE), 10) # na.rm = FALSE expect_equal(match.fun(f)(c(10L, rep(NA_integer_, 1e5)), na.rm = FALSE, nthreads = 2L), NA_integer_) expect_equal(match.fun(f)(c(rep(NA_integer_, 1e5), 10L), na.rm = FALSE, nthreads = 2L), NA_integer_) expect_equal(match.fun(f)(c(10, rep(NA_real_, 1e5)), na.rm = FALSE, nthreads = 2L), NA_real_) expect_equal(match.fun(f)(c(rep(NA_real_, 1e5), 10), na.rm = FALSE, nthreads = 2L), NA_real_) # Some functions are optimized and don't check here # expect_equal(match.fun(f)(c(10L, rep(NA_integer_, 1e5)), g = rep(1L, 1e5+1), na.rm = FALSE, nthreads = 2L, use.g.names = FALSE), NA_integer_) # expect_equal(match.fun(f)(c(rep(NA_integer_, 1e5), 10L), g = rep(1L, 1e5+1), na.rm = FALSE, nthreads = 2L, use.g.names = FALSE), NA_integer_) expect_equal(match.fun(f)(c(10, rep(NA_real_, 1e5)), g = rep(1L, 1e5+1), na.rm = FALSE, nthreads = 2L, use.g.names = FALSE), NA_real_) expect_equal(match.fun(f)(c(rep(NA_real_, 1e5), 10), g = rep(1L, 1e5+1), na.rm = FALSE, nthreads = 2L, use.g.names = FALSE), NA_real_) } }) test_that("fmedian ties handled properly with weights", { x <- c(1, 2, 3, 4) w <- c(2.5, 2.4, 3.8, 1.1) expect_equal(c(fmedian(x, w = w, ties = "mean"), fmedian(x, w = w, ties = "min"), fmedian(x, w = w, ties = "max")), c(2.5, 2, 3)) w <- c(2.5, 2.4, 3.7, 1.2) expect_equal(c(fmedian(x, w = w, ties = "mean"), fmedian(x, w = w, ties = "min"), fmedian(x, w = w, ties = "max")), c(2.5, 2, 3)) }) test_that("Misc bugs", { expect_visible(qF(c(4L, 1L, NA), sort = FALSE)) expect_equal(fmatch(factor(NA, exclude = NULL), NA), 1L) # #675 expect_equal(fmatch(factor(NA), NA), 1L) expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt)) expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt, higher = TRUE)) expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt, array = FALSE)) expect_visible(qsu(mtcars$mpg, mtcars$cyl, mtcars$vs, mtcars$wt, higher = TRUE, array = FALSE)) df1 <- data.frame(x = 1:3, y = 4:6) df2 <- data.frame(x = 2:5, z = 2:5) expect_visible(join(df1, df2, require = list(x = 1, y = 1, fail = "message"), verbose = 0, multiple = TRUE)) }) test_that("Pivot with integers", { #803 iris_long <- pivot(iris, "Species") iris_long$value <- round(iris_long$value) # Double iris_long$value_int = as.integer(iris_long$value) # Integer for (f in c("sum", "mean")) { expect_equal( pivot( data = iris_long, ids = "Species", values = "value", how = "wider", # Pivoting to wide format FUN = "sum" ), pivot( data = iris_long, ids = "Species", values = "value_int", how = "wider", # Pivoting to wide format FUN = "sum" )) } }) options(warn = 1) collapse/tests/testthat/test-pivot.R0000644000176200001440000001121514777170131017327 0ustar liggesuserscontext("pivot") skip_if_not_installed("data.table") library(data.table) mtcDT <- qDT(mtcars) mtcnaDT <- qDT(na_insert(mtcars)) irisDT <- qDT(iris) wldDT <- qDT(wlddev) GGDCDT <- qDT(GGDC10S) rmnic <- function(x) { if(!length(fci <- fact_vars(x, "indices"))) return(x) for (i in fci) oldClass(x[[i]]) <- setdiff(oldClass(x[[i]]), "na.included") x } test_that("long pivots work properly", { # No id's expect_identical(rmnic(pivot(mtcDT)), melt(mtcDT, measure.vars = seq_along(mtcDT))) expect_identical(rmnic(pivot(mtcDT, values = 3:11)), melt(mtcDT, measure.vars = 3:11)) expect_identical(rmnic(pivot(mtcnaDT, na.rm = TRUE)), melt(mtcnaDT, measure.vars = seq_along(mtcnaDT), na.rm = TRUE)) expect_identical(rmnic(pivot(mtcnaDT, values = 3:11, na.rm = TRUE)), melt(mtcnaDT, measure.vars = 3:11, na.rm = TRUE)) expect_identical(names(pivot(gv(wlddev, 9:10), labels = TRUE)), c("variable", "label", "value")) expect_identical(names(pivot(gv(wlddev, 9:10), labels = "bla")), c("variable", "bla", "value")) expect_identical(names(pivot(gv(wlddev, 9:10), labels = TRUE, na.rm = TRUE)), c("variable", "label", "value")) expect_identical(names(pivot(gv(wlddev, 9:10), labels = "bla", na.rm = TRUE)), c("variable", "bla", "value")) expect_warning(pivot(mtcnaDT, check.dups = TRUE)) # with ids expect_identical(rmnic(pivot(irisDT, "Species")), melt(irisDT, "Species")) expect_identical(rmnic(setLabels(pivot(wldDT, 1:8), NULL)), setLabels(melt(wldDT, 1:8), NULL)) expect_identical(rmnic(setLabels(pivot(wldDT, 1:8, na.rm = TRUE), NULL)), setLabels(melt(wldDT, 1:8, na.rm = TRUE), NULL)) expect_warning(pivot(irisDT, "Species", check.dups = TRUE)) # with labels expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, labels = TRUE)), c("iso3c", "year", "variable", "label", "value")) expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, names = list("var", "val"), labels = "lab")), c("iso3c", "year", "var", "lab", "val")) expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, names = list(value = "val"), labels = "lab")), c("iso3c", "year", "variable", "lab", "val")) expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, names = list(variable = "var"), labels = "lab")), c("iso3c", "year", "var", "lab", "value")) }) test_that("wide pivots work properly", { # 1 column expect_identical(qDF(dcast(wldDT, iso3c ~ year, value.var = "PCGDP")), qDF(pivot(wldDT, "iso3c", "PCGDP", "year", how = "wider", sort = "ids"))) expect_identical(qDF(dcast(wldDT, country ~ year, value.var = "PCGDP")), qDF(pivot(wldDT, "country", "PCGDP", "year", how = "wider"))) # 2 columns expect_identical(qDF(dcast(wldDT, iso3c ~ year, value.var = c("PCGDP", "LIFEEX"))), qDF(pivot(wldDT, "iso3c", c("PCGDP", "LIFEEX"), "year", how = "wider", sort = "ids"))) expect_identical(qDF(dcast(wldDT, country ~ year, value.var = c("PCGDP", "LIFEEX"))), qDF(pivot(wldDT, "country", c("PCGDP", "LIFEEX"), "year", how = "wider"))) # pivot(wlddev, "iso3c", "PCGDP", "year", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names")) # pivot(wlddev, "iso3c", "PCGDP", "year", "decade", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names")) # pivot(wlddev, "iso3c", c("PCGDP", "LIFEEX"), "year", "decade", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names")) # pivot(wlddev, "iso3c", c("PCGDP", "LIFEEX"), "year", "decade", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names"), transpose = c("cols", "names")) # 1 column: sum, mean, min, max for (f in .c(sum, mean, min, max)) { expect_equal(dapply(dcast(wldDT[is.finite(PCGDP)], income ~ year, value.var = "PCGDP", fun = match.fun(f)), unattrib, return = "data.frame"), dapply(pivot(wldDT, "income", "PCGDP", "year", how = "wider", FUN = f, na.rm = TRUE, sort = TRUE), unattrib, return = "data.frame")) } for (f in .c(sum, mean, min, max)) { expect_equal(dapply(dcast(wldDT[is.finite(PCGDP)], income ~ year, value.var = "PCGDP", fun = match.fun(f)), unattrib, return = "data.frame"), dapply(pivot(wldDT, "income", "PCGDP", "year", how = "wider", FUN = match.fun(f), na.rm = TRUE, sort = TRUE), unattrib, return = "data.frame")) } for (f in .c(sum, mean, min, max)) { expect_equal(dapply(dcast(wldDT[is.finite(PCGDP)], income ~ year, value.var = "PCGDP", fun = match.fun(f)), unattrib, return = "data.frame"), dapply(pivot(wldDT, "income", "PCGDP", "year", how = "wider", FUN = match.fun(paste0("f", f)), na.rm = TRUE, sort = TRUE), unattrib, return = "data.frame")) } }) collapse/tests/testthat/test-fprod.R0000644000176200001440000004370714777170131017313 0ustar liggesuserscontext("fprod") bprod <- base::prod # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(5*rnorm(100)) wdat <- abs(5*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" na21 <- function(x) { x[is.na(x)] <- 1 x } wprod <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) if(!any(cc)) return(NA_real_) x <- x[cc] w <- w[cc] } bprod(x*w) } test_that("fprod performs like base::prod", { expect_equal(fprod(NA), as.double(bprod(NA))) expect_equal(fprod(NA, na.rm = FALSE), as.double(bprod(NA))) expect_equal(fprod(1), bprod(1, na.rm = TRUE)) expect_equal(fprod(1:3), bprod(1:3, na.rm = TRUE)) expect_equal(fprod(-1:1), bprod(-1:1, na.rm = TRUE)) expect_equal(fprod(1, na.rm = FALSE), bprod(1)) expect_equal(fprod(1:3, na.rm = FALSE), bprod(1:3)) expect_equal(fprod(-1:1, na.rm = FALSE), bprod(-1:1)) expect_equal(fprod(x), bprod(x, na.rm = TRUE)) expect_equal(fprod(x, na.rm = FALSE), bprod(x)) expect_equal(fprod(xNA, na.rm = FALSE), bprod(xNA)) expect_equal(fprod(xNA), bprod(xNA, na.rm = TRUE)) expect_equal(fprod(mtcars), fprod(m)) expect_equal(fprod(m), dapply(m, bprod, na.rm = TRUE)) expect_equal(fprod(m, na.rm = FALSE), dapply(m, bprod)) expect_equal(fprod(mNA, na.rm = FALSE), dapply(mNA, bprod)) expect_equal(fprod(mNA), dapply(mNA, bprod, na.rm = TRUE)) expect_equal(fprod(mtcars), dapply(mtcars, bprod, na.rm = TRUE)) expect_equal(fprod(mtcars, na.rm = FALSE), dapply(mtcars, bprod)) expect_equal(fprod(mtcNA, na.rm = FALSE), dapply(mtcNA, bprod)) expect_equal(fprod(mtcNA), dapply(mtcNA, bprod, na.rm = TRUE)) expect_equal(fprod(x, f), BY(x, f, bprod, na.rm = TRUE)) expect_equal(fprod(x, f, na.rm = FALSE), BY(x, f, bprod)) expect_equal(fprod(xNA, f, na.rm = FALSE), BY(xNA, f, bprod)) expect_equal(na21(fprod(xNA, f)), BY(xNA, f, bprod, na.rm = TRUE)) expect_equal(fprod(m, g), BY(m, g, bprod, na.rm = TRUE)) expect_equal(fprod(m, g, na.rm = FALSE), BY(m, g, bprod)) expect_equal(fprod(mNA, g, na.rm = FALSE), BY(mNA, g, bprod)) expect_equal(na21(fprod(mNA, g)), BY(mNA, g, bprod, na.rm = TRUE)) # bprod(NA, na.rm = TRUE) gives 1 expect_equal(fprod(mtcars, g), BY(mtcars, g, bprod, na.rm = TRUE)) expect_equal(fprod(mtcars, g, na.rm = FALSE), BY(mtcars, g, bprod)) expect_equal(fprod(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bprod)) expect_equal(na21(fprod(mtcNA, g)), BY(mtcNA, g, bprod, na.rm = TRUE)) # bprod(NA, na.rm = TRUE) gives 1 }) test_that("fprod with weights performs like wprod (defined above)", { # complete weights expect_equal(fprod(NA, w = 1), wprod(NA, 1)) expect_equal(fprod(NA, w = 1, na.rm = FALSE), wprod(NA, 1)) expect_equal(fprod(1, w = 1), wprod(1, w = 1)) expect_equal(fprod(1:3, w = 1:3), wprod(1:3, 1:3)) expect_equal(fprod(-1:1, w = 1:3), wprod(-1:1, 1:3)) expect_equal(fprod(1, w = 1, na.rm = FALSE), wprod(1, 1)) expect_equal(fprod(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wprod(1:3, c(0.99,3454,1.111))) expect_equal(fprod(-1:1, w = 1:3, na.rm = FALSE), wprod(-1:1, 1:3)) expect_equal(fprod(x, w = w), wprod(x, w)) expect_equal(fprod(x, w = w, na.rm = FALSE), wprod(x, w)) expect_equal(fprod(xNA, w = w, na.rm = FALSE), wprod(xNA, w)) expect_equal(fprod(xNA, w = w), wprod(xNA, w, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdat), fprod(m, w = wdat)) expect_equal(fprod(m, w = wdat), dapply(m, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(m, w = wdat, na.rm = FALSE), dapply(m, wprod, wdat)) expect_equal(fprod(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wprod, wdat)) expect_equal(fprod(mNA, w = wdat), dapply(mNA, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdat), dapply(mtcars, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wprod, wdat)) expect_equal(fprod(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wprod, wdat)) expect_equal(fprod(mtcNA, w = wdat), dapply(mtcNA, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(x, f, w), BY(x, f, wprod, w)) expect_equal(fprod(x, f, w, na.rm = FALSE), BY(x, f, wprod, w)) expect_equal(fprod(xNA, f, w, na.rm = FALSE), BY(xNA, f, wprod, w)) expect_equal(fprod(xNA, f, w), BY(xNA, f, wprod, w, na.rm = TRUE)) expect_equal(fprod(m, g, wdat), BY(m, gf, wprod, wdat)) expect_equal(fprod(m, g, wdat, na.rm = FALSE), BY(m, gf, wprod, wdat)) expect_equal(fprod(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wprod, wdat)) expect_equal(fprod(mNA, g, wdat), BY(mNA, gf, wprod, wdat, na.rm = TRUE)) expect_equal(fprod(mtcars, g, wdat), BY(mtcars, gf, wprod, wdat)) expect_equal(fprod(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, gf, wprod, wdat)) expect_equal(fprod(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, gf, wprod, wdat)) expect_equal(fprod(mtcNA, g, wdat), BY(mtcNA, gf, wprod, wdat, na.rm = TRUE)) # missing weights expect_equal(fprod(NA, w = NA), wprod(NA, NA)) expect_equal(fprod(NA, w = NA, na.rm = FALSE), wprod(NA, NA)) expect_equal(fprod(1, w = NA), wprod(1, w = NA)) expect_equal(fprod(1:3, w = c(NA,1:2)), wprod(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fprod(-1:1, w = c(NA,1:2)), wprod(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fprod(1, w = NA, na.rm = FALSE), wprod(1, NA)) expect_equal(fprod(1:3, w = c(NA,1:2), na.rm = FALSE), wprod(1:3, c(NA,1:2))) expect_equal(fprod(-1:1, w = c(NA,1:2), na.rm = FALSE), wprod(-1:1, c(NA,1:2))) expect_equal(fprod(x, w = wNA), wprod(x, wNA, na.rm = TRUE)) expect_equal(fprod(x, w = wNA, na.rm = FALSE), wprod(x, wNA)) expect_equal(fprod(xNA, w = wNA, na.rm = FALSE), wprod(xNA, wNA)) expect_equal(fprod(xNA, w = wNA), wprod(xNA, wNA, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdatNA), fprod(m, w = wdatNA)) expect_equal(fprod(m, w = wdatNA), dapply(m, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(m, w = wdatNA, na.rm = FALSE), dapply(m, wprod, wdatNA)) expect_equal(fprod(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wprod, wdatNA)) expect_equal(fprod(mNA, w = wdatNA), dapply(mNA, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdatNA), dapply(mtcars, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wprod, wdatNA)) expect_equal(fprod(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wprod, wdatNA)) expect_equal(fprod(mtcNA, w = wdatNA), dapply(mtcNA, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(x, f, wNA), BY(x, f, wprod, wNA, na.rm = TRUE)) expect_equal(fprod(x, f, wNA, na.rm = FALSE), BY(x, f, wprod, wNA)) expect_equal(fprod(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wprod, wNA)) expect_equal(fprod(xNA, f, wNA), BY(xNA, f, wprod, wNA, na.rm = TRUE)) expect_equal(fprod(m, g, wdatNA), BY(m, gf, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(m, g, wdatNA, na.rm = FALSE), BY(m, gf, wprod, wdatNA)) expect_equal(fprod(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, gf, wprod, wdatNA)) expect_equal(fprod(mNA, g, wdatNA), BY(mNA, gf, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, g, wdatNA), BY(mtcars, gf, wprod, wdatNA, na.rm = TRUE)) expect_equal(fprod(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, gf, wprod, wdatNA)) expect_equal(fprod(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, gf, wprod, wdatNA)) expect_equal(fprod(mtcNA, g, wdatNA), BY(mtcNA, gf, wprod, wdatNA, na.rm = TRUE)) }) test_that("fprod performs numerically stable", { expect_true(all_obj_equal(replicate(50, fprod(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g), simplify = FALSE))) }) test_that("fprod with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fprod(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fprod with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fprod(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fprod(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fprod handles special values in the right way", { expect_equal(fprod(NA), NA_real_) expect_equal(fprod(NaN), NaN) expect_equal(fprod(Inf), Inf) expect_equal(fprod(-Inf), -Inf) expect_equal(fprod(TRUE), 1) expect_equal(fprod(FALSE), 0) expect_equal(fprod(NA, na.rm = FALSE), NA_real_) expect_equal(fprod(NaN, na.rm = FALSE), NaN) expect_equal(fprod(Inf, na.rm = FALSE), Inf) expect_equal(fprod(-Inf, na.rm = FALSE), -Inf) expect_equal(fprod(TRUE, na.rm = FALSE), 1) expect_equal(fprod(FALSE, na.rm = FALSE), 0) expect_equal(fprod(c(1,NA)), 1) expect_equal(fprod(c(1,NaN)), 1) expect_equal(fprod(c(1,Inf)), Inf) expect_equal(fprod(c(1,-Inf)), -Inf) expect_equal(fprod(c(FALSE,TRUE)), 0) expect_equal(fprod(c(TRUE,TRUE)), 1) expect_equal(fprod(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fprod(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fprod(c(FALSE,TRUE), na.rm = FALSE), 0) expect_equal(fprod(c(TRUE,TRUE), na.rm = FALSE), 1) }) test_that("fprod with weights handles special values in the right way", { expect_equal(fprod(NA, w = 1), NA_real_) expect_equal(fprod(NaN, w = 1), NaN) expect_equal(fprod(Inf, w = 1), Inf) expect_equal(fprod(-Inf, w = 1), -Inf) expect_equal(fprod(TRUE, w = 1), 1) expect_equal(fprod(FALSE, w = 1), 0) expect_equal(fprod(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fprod(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fprod(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fprod(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fprod(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fprod(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fprod(NA, w = NA), NA_real_) expect_equal(fprod(NaN, w = NA), NA_real_) expect_equal(fprod(Inf, w = NA), NA_real_) expect_equal(fprod(-Inf, w = NA), NA_real_) expect_equal(fprod(TRUE, w = NA), NA_real_) expect_equal(fprod(FALSE, w = NA), NA_real_) expect_equal(fprod(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fprod(1:3, w = c(1,Inf,3)), Inf) expect_equal(fprod(1:3, w = c(1,-Inf,3)), -Inf) expect_equal(fprod(1:3, w = c(1,Inf,3), na.rm = FALSE), Inf) expect_equal(fprod(1:3, w = c(1,-Inf,3), na.rm = FALSE), -Inf) }) test_that("fprod produces errors for wrong input", { expect_error(fprod("a")) expect_error(fprod(NA_character_)) expect_error(fprod(mNAc)) expect_error(fprod(mNAc, f)) expect_error(fprod(1:2,1:3)) expect_error(fprod(m,1:31)) expect_error(fprod(mtcars,1:31)) expect_error(fprod(mtcars, w = 1:31)) expect_error(fprod("a", w = 1)) expect_error(fprod(1:2, w = 1:3)) expect_error(fprod(NA_character_, w = 1)) expect_error(fprod(mNAc, w = wdat)) expect_error(fprod(mNAc, f, wdat)) expect_error(fprod(mNA, w = 1:33)) expect_error(fprod(1:2,1:2, 1:3)) expect_error(fprod(m,1:32,1:20)) expect_error(fprod(mtcars,1:32,1:10)) expect_error(fprod(1:2, w = c("a","b"))) expect_error(fprod(wlddev)) expect_error(fprod(wlddev, w = wlddev$year)) expect_error(fprod(wlddev, wlddev$iso3c)) expect_error(fprod(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-join.R0000644000176200001440000001401615115712014017114 0ustar liggesuserscontext("join") df1 <- data.frame( id1 = c(1, 1, 2, 3), id2 = c("a", "b", "b", "c"), name = c("John", "Jane", "Bob", "Carl"), age = c(35, 28, 42, 50) ) df2 <- data.frame( id1 = c(1, 2, 3, 3), id2 = c("a", "b", "c", "e"), salary = c(60000, 55000, 70000, 80000), dept = c("IT", "Marketing", "Sales", "IT") ) opts <- set_collapse(verbose = 0) for (sort in c(FALSE, TRUE)) { expect_identical(join(df1, df2, how = "inner", sort = sort), merge(df1, df2)) expect_identical(join(df1, df2, how = "left", sort = sort), merge(df1, df2, all.x = TRUE)) expect_identical(join(df1, df2, how = "right", sort = sort), merge(df1, df2, all.y = TRUE)) expect_identical(join(df1, df2, how = "full", sort = sort), merge(df1, df2, all = TRUE)) } expect_identical(names(join(df1, df2, on = "id2", how = "full", keep.col.order = FALSE, column = TRUE))[1:2], c("id2", ".join")) expect_identical(names(join(df1, df2, on = "id2", how = "full", keep.col.order = FALSE, column = TRUE, multiple = TRUE))[1:2], c("id2", ".join")) expect_identical(names(join(df1, df2, on = "id2", how = "right", keep.col.order = FALSE, column = TRUE))[1:2], c("id2", ".join")) expect_identical(names(join(df1, df2, on = "id2", how = "right", keep.col.order = FALSE, column = TRUE, multiple = TRUE))[1:2], c("id2", ".join")) # Different types of joins # https://github.com/fastverse/collapse/issues/503 x1 = data.frame( id = c(1L, 1L, 2L, 3L, NA_integer_), t = c(1L, 2L, 1L, 2L, NA_integer_), x = 11:15 ) y1 = data.frame( id = c(1,2, 4), y = c(11L, 15L, 16) ) for(i in c("l","i","r","f","s","a")) { expect_identical(capture.output(join(df1, df2, how = i, verbose = 1))[-1], capture.output(join(df1, df2, how = i, verbose = 0))) expect_identical(capture.output(join(x1, y1, how = i, verbose = 1))[-1], capture.output(join(x1, y1, how = i, verbose = 0))) } df1 = na_insert(df1, 0.3) df2 = na_insert(df2, 0.3) for(i in c("l","i","r","f","s","a")) { expect_identical(capture.output(join(df1, df2, how = i, verbose = 1))[-1], capture.output(join(df1, df2, how = i, verbose = 0))) } sort_merge <- function(..., sort = FALSE) { res = merge(...) if(sort) return(roworder(res, id1, id2)) res } expect_identical(join(df1, df2, how = "inner", sort = TRUE), sort_merge(df1, df2, sort = TRUE)) expect_identical(join(df1, df2, how = "left", sort = TRUE), sort_merge(df1, df2, all.x = TRUE, sort = TRUE)) expect_identical(join(df1, df2, how = "right", sort = TRUE), sort_merge(df1, df2, all.y = TRUE, sort = TRUE)) ###################################### # Rigorous Testing Sort-Merge-Join ###################################### sort_join <- function(x, y, on, ...) { res = join(x, y, on, ...) roworderv(res, on) } random_df_pair <- function(df, replace = FALSE, max.cols = 1) { d <- dim(df) cols <- sample.int(d[2L], if(is.na(max.cols)) as.integer(1 + d[2L] * 0.75 * runif(1)) else max.cols) rows_x <- sample.int(d[1L], as.integer(1 + d[1L] * runif(1)), replace) rows_table <- sample.int(d[1L], as.integer(1 + d[1L] * runif(1)), replace) list(ss(df, rows_x, cols), ss(df, rows_table, cols), rows_x, rows_table, cols) } join_identical <- function(df, replace = FALSE, max.cols = 1, sort = TRUE, ...) { data <- random_df_pair(df, replace, max.cols) x <- data[[1]] y <- data[[2]] cols <- data[[5]] nam <- names(df) rem <- nam[-cols] if(length(rem) > 2L) { rem_x <- sample(rem, as.integer(length(rem)/2)) rem_y <- setdiff(rem, rem_x) av(x) <- ss(df, data[[3]], rem_x) av(y) <- ss(df, data[[4]], rem_y) } if(sort) { id <- tryCatch(identical(join(x, y, on = nam[cols], sort = TRUE, ...), sort_join(x, y, on = nam[cols], overid = 2L, ...)), error = function(e) FALSE) } else { id <- identical(join(x, y, on = nam[cols], sort = FALSE, overid = 2L, ...), merge(x, y, by = nam[cols], all.x = TRUE, ...)) } if(id) TRUE else list(x, y, nam[cols]) } # (d <- join_identical(wlddev)) wldna <- na_insert(wlddev) wldcc <- replace_NA(wlddev) test_that("sort merge join works well with single vectors", { for (h in c("l","i","r","f","s","a")) { for (r in c(FALSE, TRUE)) { # r = replace expect_true(all(replicate(100, join_identical(wlddev, r, how = h)))) expect_true(all(replicate(100, join_identical(wldna, r, how = h)))) expect_true(all(replicate(100, join_identical(wldcc, r, how = h)))) } } }) # (d <- join_identical(wlddev[1:8], FALSE, max.cols = 4)) wldna <- na_insert(wlddev) wldcc <- replace_NA(wlddev) NCRAN <- Sys.getenv("NCRAN") == "TRUE" test_that("sort merge join works well with multiple vectors", { for (h in c("l", if(NCRAN) c("i","r","f","s","a") else NULL)) { for (r in c(FALSE, TRUE)) { # r = replace expect_true(all(replicate(100, join_identical(wlddev, r, max.cols = NA, how = h)))) expect_true(all(replicate(100, join_identical(wldna, r, max.cols = NA, how = h)))) expect_true(all(replicate(100, join_identical(wldcc, r, max.cols = NA, how = h)))) } } }) # Testing misc. issues: factors with integers and doubles d1 = mtcars |> fcompute(v1 = mpg, g = qF(seq_len(32)+100)) d2 = mtcars |> fcompute(v2 = mpg, g = seq_len(32)+100L) expect_true(all_identical(with(join(d1, d2, verbose = 0), list(v1, v2)))) expect_true(all_identical(with(join(d1, d2, verbose = 0, sort = TRUE), list(v1, v2)))) d2 = mtcars |> fcompute(v2 = mpg, g = seq_len(32)+100) expect_true(all_identical(with(join(d1, d2, verbose = 0), list(v1, v2)))) expect_true(all_identical(with(join(d1, d2, verbose = 0, sort = TRUE), list(v1, v2)))) if(requireNamespace("bit64", quietly = TRUE)) test_that("join() works with integer64", { t1 <- data.frame(id = 1:5) t2 <- fmutate(t1, id = bit64::as.integer64(id)) for (h in c("l", "r", "i", "f")) { expect_identical(fnrow(join(t1, t2, how = h, sort = FALSE, verbose = 0)), 5L) expect_identical(fnrow(join(t2, t1, how = h, sort = FALSE, verbose = 0)), 5L) expect_identical(fnrow(join(t1, t2, how = h, sort = TRUE, verbose = 0)), 5L) expect_identical(fnrow(join(t2, t1, how = h, sort = TRUE, verbose = 0)), 5L) } }) set_collapse(opts) collapse/tests/testthat/test-fmean.R0000644000176200001440000005542114777170131017263 0ustar liggesuserscontext("fmean") bmean <- base::mean bsum <- base::sum # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" wmean <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) x <- x[cc] w <- w[cc] } bsum(x*w)/bsum(w) } for (nth in 1:2) { if(nth == 2L) { if(Sys.getenv("OMP") == "TRUE") { fmean <- function(x, ...) collapse::fmean(x, ..., nthreads = 2L) } else break } test_that("fmean performs like base::mean", { expect_equal(fmean(NA), bmean(NA)) expect_equal(fmean(NA, na.rm = FALSE), bmean(NA)) expect_equal(fmean(1), bmean(1, na.rm = TRUE)) expect_equal(fmean(1:3), bmean(1:3, na.rm = TRUE)) expect_equal(fmean(-1:1), bmean(-1:1, na.rm = TRUE)) expect_equal(fmean(1, na.rm = FALSE), bmean(1)) expect_equal(fmean(1:3, na.rm = FALSE), bmean(1:3)) expect_equal(fmean(-1:1, na.rm = FALSE), bmean(-1:1)) expect_equal(fmean(x), bmean(x, na.rm = TRUE)) expect_equal(fmean(x, na.rm = FALSE), bmean(x)) expect_equal(fmean(xNA, na.rm = FALSE), bmean(xNA)) expect_equal(fmean(xNA), bmean(xNA, na.rm = TRUE)) expect_equal(fmean(mtcars), fmean(m)) expect_equal(fmean(m), dapply(m, bmean, na.rm = TRUE)) expect_equal(fmean(m, na.rm = FALSE), dapply(m, bmean)) expect_equal(fmean(mNA, na.rm = FALSE), dapply(mNA, bmean)) expect_equal(fmean(mNA), dapply(mNA, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars), dapply(mtcars, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars, na.rm = FALSE), dapply(mtcars, bmean)) expect_equal(fmean(mtcNA, na.rm = FALSE), dapply(mtcNA, bmean)) expect_equal(fmean(mtcNA), dapply(mtcNA, bmean, na.rm = TRUE)) expect_equal(fmean(x, f), BY(x, f, bmean, na.rm = TRUE)) expect_equal(fmean(x, f, na.rm = FALSE), BY(x, f, bmean)) expect_equal(fmean(xNA, f, na.rm = FALSE), BY(xNA, f, bmean)) expect_equal(fmean(xNA, f), BY(xNA, f, bmean, na.rm = TRUE)) expect_equal(fmean(m, g), BY(m, g, bmean, na.rm = TRUE)) expect_equal(fmean(m, g, na.rm = FALSE), BY(m, g, bmean)) expect_equal(fmean(mNA, g, na.rm = FALSE), BY(mNA, g, bmean)) expect_equal(fmean(mNA, g), BY(mNA, g, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars, g), BY(mtcars, g, bmean, na.rm = TRUE)) expect_equal(fmean(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmean)) expect_equal(fmean(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmean)) expect_equal(fmean(mtcNA, g), BY(mtcNA, g, bmean, na.rm = TRUE)) }) test_that("fmean with weights performs as intended (unbiased)", { expect_equal(fmean(c(2,2,4,5,5,5)), fmean(c(2,4,5), w = c(2,1,3))) expect_equal(fmean(c(2,2,4,5,5,5), na.rm = FALSE), fmean(c(2,4,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(2.456,2.456,4.123,5.009,5.009,5.009)), fmean(c(2.456,4.123,5.009), w = c(2,1,3))) expect_equal(fmean(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE), fmean(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(2,2,NA,5,5,5)), fmean(c(2,NA,5), w = c(2,1,3))) expect_equal(fmean(c(2,2,NA,5,5,5), na.rm = FALSE), fmean(c(2,NA,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(2,2,NA,5,5,5)), fmean(c(2,4,5), w = c(2,NA,3))) expect_equal(fmean(c(2,2,NA,5,5,5), na.rm = FALSE), fmean(c(2,4,5), w = c(2,NA,3), na.rm = FALSE)) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009)), fmean(c(NA,4.123,5.009), w = c(2,1,3))) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmean(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009)), fmean(c(2.456,4.123,5.009), w = c(NA,1,3))) expect_equal(fmean(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmean(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fmean(v, f), fmean(vs, fs, w)) expect_equal(fmean(v, f, na.rm = FALSE), fmean(vs, fs, w, na.rm = FALSE)) expect_equal(fmean(v2, f), fmean(v2s, fs, w)) expect_equal(fmean(v2, f, na.rm = FALSE), fmean(v2s, fs, w, na.rm = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fmean(v, f), fmean(vs, fs, w)) expect_equal(fmean(v, f, na.rm = FALSE), fmean(vs, fs, w, na.rm = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fmean(v, f), fmean(vs, fs, w)) expect_equal(fmean(v, f, na.rm = FALSE), fmean(vs, fs, w, na.rm = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fmean(v2, f), fmean(v2s, fs, w)) expect_equal(fmean(v2, f, na.rm = FALSE), fmean(v2s, fs, w, na.rm = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fmean(v2, f), fmean(v2s, fs, w)) expect_equal(fmean(v2, f, na.rm = FALSE), fmean(v2s, fs, w, na.rm = FALSE)) }) test_that("fmean performs like fmean with weights all equal", { expect_equal(fmean(NA), fmean(NA, w = 0.99999999)) expect_equal(fmean(NA, na.rm = FALSE), fmean(NA, w = 2.946, na.rm = FALSE)) expect_equal(fmean(1), fmean(1, w = 3)) expect_equal(fmean(1:3), fmean(1:3, w = rep(0.999,3))) expect_equal(fmean(-1:1), fmean(-1:1, w = rep(4.2,3))) expect_equal(fmean(1, na.rm = FALSE), fmean(1, w = 5, na.rm = FALSE)) expect_equal(fmean(1:3, na.rm = FALSE), fmean(1:3, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fmean(-1:1, na.rm = FALSE), fmean(-1:1, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fmean(x), fmean(x, w = rep(1,100))) expect_equal(fmean(x, na.rm = FALSE), fmean(x, w = rep(1.44565, 100), na.rm = FALSE)) expect_equal(fmean(xNA, na.rm = FALSE), fmean(xNA, w = rep(4.676587, 100), na.rm = FALSE)) expect_equal(fmean(xNA), fmean(xNA, w = rep(4.676587, 100))) expect_equal(fmean(m), fmean(m, w = rep(6587.3454, 32))) expect_equal(fmean(m, na.rm = FALSE), fmean(m, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fmean(mNA, na.rm = FALSE), fmean(mNA, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fmean(mNA), fmean(mNA, w = rep(6587.3454, 32))) expect_equal(fmean(mtcars), fmean(mtcars, w = rep(6787.3454, 32))) expect_equal(fmean(mtcars, na.rm = FALSE), fmean(mtcars, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fmean(mtcNA, na.rm = FALSE), fmean(mtcNA, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fmean(mtcNA), fmean(mtcNA, w = rep(6787.3454, 32))) expect_equal(fmean(x, f), fmean(x, f, rep(546.78,100))) expect_equal(fmean(x, f, na.rm = FALSE), fmean(x, f, rep(5.88,100), na.rm = FALSE)) expect_equal(fmean(xNA, f, na.rm = FALSE), fmean(xNA, f, rep(52.7,100), na.rm = FALSE)) expect_equal(fmean(xNA, f), fmean(xNA, f, rep(5997456,100))) expect_equal(fmean(m, g), fmean(m, g, rep(546.78,32))) expect_equal(fmean(m, g, na.rm = FALSE), fmean(m, g, rep(0.0001,32), na.rm = FALSE)) expect_equal(fmean(mNA, g, na.rm = FALSE), fmean(mNA, g, rep(5.7,32), na.rm = FALSE)) expect_equal(fmean(mNA, g), fmean(mNA, g, rep(1.1,32))) expect_equal(fmean(mtcars, g), fmean(mtcars, g, rep(53,32))) expect_equal(fmean(mtcars, g, na.rm = FALSE), fmean(mtcars, g, rep(546.78,32), na.rm = FALSE)) expect_equal(fmean(mtcNA, g, na.rm = FALSE), fmean(mtcNA, g, rep(0.999999,32), na.rm = FALSE)) expect_equal(fmean(mtcNA, g), fmean(mtcNA, g, rep(999.9999,32))) }) test_that("fmean with weights performs like wmean (defined above)", { # complete weights expect_equal(fmean(NA, w = 1), wmean(NA, 1)) expect_equal(fmean(NA, w = 1, na.rm = FALSE), wmean(NA, 1)) expect_equal(fmean(1, w = 1), wmean(1, w = 1)) expect_equal(fmean(1:3, w = 1:3), wmean(1:3, 1:3)) expect_equal(fmean(-1:1, w = 1:3), wmean(-1:1, 1:3)) expect_equal(fmean(1, w = 1, na.rm = FALSE), wmean(1, 1)) expect_equal(fmean(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wmean(1:3, c(0.99,3454,1.111))) expect_equal(fmean(-1:1, w = 1:3, na.rm = FALSE), wmean(-1:1, 1:3)) expect_equal(fmean(x, w = w), wmean(x, w)) expect_equal(fmean(x, w = w, na.rm = FALSE), wmean(x, w)) expect_equal(fmean(xNA, w = w, na.rm = FALSE), wmean(xNA, w)) expect_equal(fmean(xNA, w = w), wmean(xNA, w, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdat), fmean(m, w = wdat)) expect_equal(fmean(m, w = wdat), dapply(m, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(m, w = wdat, na.rm = FALSE), dapply(m, wmean, wdat)) expect_equal(fmean(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wmean, wdat)) expect_equal(fmean(mNA, w = wdat), dapply(mNA, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdat), dapply(mtcars, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wmean, wdat)) expect_equal(fmean(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wmean, wdat)) expect_equal(fmean(mtcNA, w = wdat), dapply(mtcNA, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(x, f, w), BY(x, f, wmean, w)) expect_equal(fmean(x, f, w, na.rm = FALSE), BY(x, f, wmean, w)) expect_equal(fmean(xNA, f, w, na.rm = FALSE), BY(xNA, f, wmean, w)) expect_equal(fmean(xNA, f, w), BY(xNA, f, wmean, w, na.rm = TRUE)) expect_equal(fmean(m, g, wdat), BY(m, gf, wmean, wdat)) expect_equal(fmean(m, g, wdat, na.rm = FALSE), BY(m, gf, wmean, wdat)) expect_equal(fmean(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wmean, wdat)) expect_equal(fmean(mNA, g, wdat), BY(mNA, gf, wmean, wdat, na.rm = TRUE)) expect_equal(fmean(mtcars, g, wdat), BY(mtcars, gf, wmean, wdat)) expect_equal(fmean(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, gf, wmean, wdat)) expect_equal(fmean(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, gf, wmean, wdat)) expect_equal(fmean(mtcNA, g, wdat), BY(mtcNA, gf, wmean, wdat, na.rm = TRUE)) # missing weights expect_equal(fmean(NA, w = NA), wmean(NA, NA)) expect_equal(fmean(NA, w = NA, na.rm = FALSE), wmean(NA, NA)) expect_equal(fmean(1, w = NA), wmean(1, w = NA)) expect_equal(fmean(1:3, w = c(NA,1:2)), wmean(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fmean(-1:1, w = c(NA,1:2)), wmean(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fmean(1, w = NA, na.rm = FALSE), wmean(1, NA)) expect_equal(fmean(1:3, w = c(NA,1:2), na.rm = FALSE), wmean(1:3, c(NA,1:2))) expect_equal(fmean(-1:1, w = c(NA,1:2), na.rm = FALSE), wmean(-1:1, c(NA,1:2))) expect_equal(fmean(x, w = wNA), wmean(x, wNA, na.rm = TRUE)) expect_equal(fmean(x, w = wNA, na.rm = FALSE), wmean(x, wNA)) expect_equal(fmean(xNA, w = wNA, na.rm = FALSE), wmean(xNA, wNA)) expect_equal(fmean(xNA, w = wNA), wmean(xNA, wNA, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdatNA), fmean(m, w = wdatNA)) expect_equal(fmean(m, w = wdatNA), dapply(m, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(m, w = wdatNA, na.rm = FALSE), dapply(m, wmean, wdatNA)) expect_equal(fmean(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wmean, wdatNA)) expect_equal(fmean(mNA, w = wdatNA), dapply(mNA, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdatNA), dapply(mtcars, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wmean, wdatNA)) expect_equal(fmean(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wmean, wdatNA)) expect_equal(fmean(mtcNA, w = wdatNA), dapply(mtcNA, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(x, f, wNA), BY(x, f, wmean, wNA, na.rm = TRUE)) expect_equal(fmean(x, f, wNA, na.rm = FALSE), BY(x, f, wmean, wNA)) expect_equal(fmean(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wmean, wNA)) expect_equal(fmean(xNA, f, wNA), BY(xNA, f, wmean, wNA, na.rm = TRUE)) expect_equal(fmean(m, g, wdatNA), BY(m, gf, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(m, g, wdatNA, na.rm = FALSE), BY(m, gf, wmean, wdatNA)) expect_equal(fmean(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, gf, wmean, wdatNA)) expect_equal(fmean(mNA, g, wdatNA), BY(mNA, gf, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, g, wdatNA), BY(mtcars, gf, wmean, wdatNA, na.rm = TRUE)) expect_equal(fmean(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, gf, wmean, wdatNA)) expect_equal(fmean(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, gf, wmean, wdatNA)) expect_equal(fmean(mtcNA, g, wdatNA), BY(mtcNA, gf, wmean, wdatNA, na.rm = TRUE)) }) test_that("fmean performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmean(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g), simplify = FALSE))) }) test_that("fmean with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmean(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fmean with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmean(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmean(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fmean handles special values in the right way", { expect_equal(fmean(NA), NA_real_) expect_equal(fmean(NaN), NaN) expect_equal(fmean(Inf), Inf) expect_equal(fmean(-Inf), -Inf) expect_equal(fmean(TRUE), 1) expect_equal(fmean(FALSE), 0) expect_equal(fmean(NA, na.rm = FALSE), NA_real_) expect_equal(fmean(NaN, na.rm = FALSE), NaN) expect_equal(fmean(Inf, na.rm = FALSE), Inf) expect_equal(fmean(-Inf, na.rm = FALSE), -Inf) expect_equal(fmean(TRUE, na.rm = FALSE), 1) expect_equal(fmean(FALSE, na.rm = FALSE), 0) expect_equal(fmean(c(1,NA)), 1) expect_equal(fmean(c(1,NaN)), 1) expect_equal(fmean(c(1,Inf)), Inf) expect_equal(fmean(c(1,-Inf)), -Inf) expect_equal(fmean(c(FALSE,TRUE)), 0.5) expect_equal(fmean(c(FALSE,FALSE)), 0) expect_equal(fmean(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fmean(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fmean(c(FALSE,TRUE), na.rm = FALSE), 0.5) expect_equal(fmean(c(FALSE,FALSE), na.rm = FALSE), 0) }) test_that("fmean with weights handles special values in the right way", { expect_equal(fmean(NA, w = 1), NA_real_) expect_equal(fmean(NaN, w = 1), NaN) expect_equal(fmean(Inf, w = 1), Inf) expect_equal(fmean(-Inf, w = 1), -Inf) expect_equal(fmean(TRUE, w = 1), 1) expect_equal(fmean(FALSE, w = 1), 0) expect_equal(fmean(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fmean(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fmean(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fmean(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fmean(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fmean(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fmean(NA, w = NA), NA_real_) expect_equal(fmean(NaN, w = NA), NA_real_) expect_equal(fmean(Inf, w = NA), NA_real_) expect_equal(fmean(-Inf, w = NA), NA_real_) expect_equal(fmean(TRUE, w = NA), NA_real_) expect_equal(fmean(FALSE, w = NA), NA_real_) expect_equal(fmean(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fmean(1:3, w = c(1,Inf,3)), NaN) expect_equal(fmean(1:3, w = c(1,-Inf,3)), NaN) expect_equal(fmean(1:3, w = c(1,Inf,3), na.rm = FALSE), NaN) expect_equal(fmean(1:3, w = c(1,-Inf,3), na.rm = FALSE), NaN) }) test_that("fmean produces errors for wrong input", { expect_error(fmean("a")) expect_error(fmean(NA_character_)) expect_error(fmean(mNAc)) expect_error(fmean(mNAc, f)) expect_error(fmean(1:2,1:3)) expect_error(fmean(m,1:31)) expect_error(fmean(mtcars,1:31)) expect_error(fmean(mtcars, w = 1:31)) expect_error(fmean("a", w = 1)) expect_error(fmean(1:2, w = 1:3)) expect_error(fmean(NA_character_, w = 1)) expect_error(fmean(mNAc, w = wdat)) expect_error(fmean(mNAc, f, wdat)) expect_error(fmean(mNA, w = 1:33)) expect_error(fmean(1:2,1:2, 1:3)) expect_error(fmean(m,1:32,1:20)) expect_error(fmean(mtcars,1:32,1:10)) expect_error(fmean(1:2, w = c("a","b"))) expect_error(fmean(wlddev)) expect_error(fmean(wlddev, w = wlddev$year)) expect_error(fmean(wlddev, wlddev$iso3c)) expect_error(fmean(wlddev, wlddev$iso3c, wlddev$year)) }) } collapse/tests/testthat/test-qsu.R0000644000176200001440000002654514777170131017012 0ustar liggesuserscontext("qsu") # rm(list = ls()) bmean <- base::mean bsd <- stats::sd bsum <- base::sum bstats <- function(x) { if(!is.numeric(x)) return(c(N = bsum(!is.na(x)), Mean = NA_real_, SD = NA_real_, Min = NA_real_, Max = NA_real_)) c(N = bsum(!is.na(x)), Mean = bmean(x, na.rm = TRUE), SD = bsd(x, na.rm = TRUE), `names<-`(range(x, na.rm = TRUE), c("Min", "Max"))) } base_qsu <- function(x, g = NULL) { if(is.atomic(x) && !is.matrix(x)) return(`oldClass<-`(bstats(x), c("qsu", "table"))) if(is.null(g)) { r <- t(dapply(x, bstats, return = "matrix")) return(`oldClass<-`(r, c("qsu", "matrix", "table"))) } r <- simplify2array(BY(x, g, bstats, return = "list", expand.wide = TRUE)) return(`oldClass<-`(r, c("qsu", "array", "table"))) } wldNA <- na_insert(wlddev) xNA <- na_insert(rnorm(100)) ones <- rep(1, fnrow(wlddev)) for(i in 1:2) { if(i == 1L) qsu <- function(x, ...) collapse::qsu(x, ..., stable.algo = FALSE) if(i == 2L) qsu <- collapse::qsu test_that("qsu works properly for simple cases (including unit groups and weights)", { expect_equal(qsu(1:10), base_qsu(1:10)) expect_equal(qsu(10:1), base_qsu(10:1)) expect_equal(qsu(xNA), base_qsu(xNA)) expect_equal(qsu(wlddev), base_qsu(wlddev)) expect_equal(qsu(wldNA), base_qsu(wldNA)) expect_equal(qsu(GGDC10S), base_qsu(GGDC10S)) expect_equal(qsu(1:10, w = rep(1, 10))[-2L], base_qsu(1:10)) expect_equal(qsu(10:1, w = rep(1, 10))[-2L], base_qsu(10:1)) expect_equal(qsu(xNA, w = rep(1, 100))[-2L], base_qsu(xNA)) expect_equal(qsu(wlddev, w = ones)[,-2L], base_qsu(wlddev)) expect_equal(qsu(wldNA, w = ones)[,-2L], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)))[,-2L], base_qsu(GGDC10S)) expect_equal(unattrib(qsu(1:10, g = rep(1, 10))), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10))), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100))), unattrib(base_qsu(xNA))) expect_equal(unattrib(qsu(wlddev, by = ones)), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around... expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10)))[-2L], unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10)))[-2L], unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100)))[-2L], unattrib(base_qsu(xNA))) expect_equal(qsu(wldNA, w = ones)[,-2L], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)))[,-2L], base_qsu(GGDC10S)) expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones)))[,-2L], unclass(base_qsu(wldNA))) expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)))))[,-2L], unclass(base_qsu(GGDC10S))) }) } rm(qsu) test_that("qsu works properly for simple cases with higher-order statistics (including unit groups and weights)", { expect_equal(qsu(1:10, higher = TRUE)[1:5], base_qsu(1:10)) expect_equal(qsu(10:1, higher = TRUE)[1:5], base_qsu(10:1)) expect_equal(qsu(xNA, higher = TRUE)[1:5], base_qsu(xNA)) expect_equal(qsu(wlddev, higher = TRUE)[,1:5], base_qsu(wlddev)) expect_equal(qsu(wldNA, higher = TRUE)[,1:5], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, higher = TRUE)[,1:5], base_qsu(GGDC10S)) expect_equal(qsu(1:10, w = rep(1, 10), higher = TRUE)[c(1L, 3:6)], base_qsu(1:10)) expect_equal(qsu(10:1, w = rep(1, 10), higher = TRUE)[c(1L, 3:6)], base_qsu(10:1)) expect_equal(qsu(xNA, w = rep(1, 100), higher = TRUE)[c(1L, 3:6)], base_qsu(xNA)) expect_equal(qsu(wlddev, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wlddev)) expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6)], base_qsu(GGDC10S)) expect_equal(unattrib(qsu(1:10, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), higher = TRUE)[1:5]), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), higher = TRUE)[1:5]), unattrib(base_qsu(xNA))) expect_equal(unattrib(qsu(wlddev, by = ones, higher = TRUE)[1:5, ]), unattrib(t(base_qsu(wlddev)))) # This should be an array... or oriented the other way around... expect_equal(unattrib(qsu(1:10, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(1:10))) expect_equal(unattrib(qsu(10:1, g = rep(1, 10), w = rep(1, 10), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(10:1))) expect_equal(unattrib(qsu(xNA, g = rep(1, 100), w = rep(1, 100), higher = TRUE)[c(1L, 3:6)]), unattrib(base_qsu(xNA))) expect_equal(qsu(wldNA, w = ones, higher = TRUE)[,c(1L, 3:6)], base_qsu(wldNA)) expect_equal(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6)], base_qsu(GGDC10S)) expect_equal(t(unclass(qsu(wldNA, w = ones, by = ones, higher = TRUE)[c(1L, 3:6),])), unclass(base_qsu(wldNA))) expect_equal(t(unclass(qsu(GGDC10S, w = rep(1, fnrow(GGDC10S)), by = rep(1, fnrow(GGDC10S)), higher = TRUE)))[,c(1L, 3:6)], unclass(base_qsu(GGDC10S))) }) wtd.sd <- function(x, w) sqrt(bsum(w * (x - weighted.mean(x, w))^2)/bsum(w)) wtd.skewness <- function(x, w) (bsum(w * (x - weighted.mean(x, w))^3)/bsum(w))/wtd.sd(x, w)^3 wtd.kurtosis <- function(x, w) ((bsum(w * (x - weighted.mean(x, w))^4)/bsum(w))/wtd.sd(x, w)^4) base_w_qsu <- function(x, w) { if(!is.numeric(x)) return(c(N = bsum(!is.na(x)), Mean = NA_real_, SD = NA_real_, Min = NA_real_, Max = NA_real_, Skew = NA_real_, Kurt = NA_real_)) cc <- complete.cases(x, w) if(!all(cc)) { x <- x[cc] w <- w[cc] } res <- c(N = length(x), Mean = weighted.mean(x, w), SD = fsd(x, w = w), `names<-`(range(x, na.rm = TRUE), c("Min", "Max")), Skew = wtd.skewness(x, w), Kurt = wtd.kurtosis(x, w)) class(res) <- c("qsu", "table") res } test_that("Proper performance of weighted statsistics", { x <- mtcars$mpg w <- ceiling(mtcars$wt*10) wx <- rep(x, w) expect_equal(base_w_qsu(x, w)[-1L], qsu(wx, higher = TRUE)[-1L]) expect_equal(qsu(wx)[-1L], qsu(x, w = w)[-(1:2)]) expect_equal(qsu(wx, higher = TRUE)[-1L], qsu(x, w = w, higher = TRUE)[-(1:2)]) expect_equal(drop(qsu(wx, g = rep(1L, length(wx)), higher = TRUE))[-1L], drop(qsu(x, g = rep(1L, length(x)), w = w, higher = TRUE))[-(1:2)]) }) g <- GRP(wlddev, ~ income) p <- GRP(wlddev, ~ iso3c) for(i in 1:2) { if(i == 1L) qsu <- function(x, ...) collapse::qsu(x, ..., stable.algo = FALSE) if(i == 2L) qsu <- collapse::qsu test_that("qsu works properly for grouped and panel data computations", { # Grouped Statistics expect_equal(qsu(wldNA, g), base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable), base_qsu(GGDC10S, GGDC10S$Variable)) # Grouped and Weighted Statistics expect_equal(qsu(wldNA, g, w = ones)[,-2L,], base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)))[,-2L,], base_qsu(GGDC10S, GGDC10S$Variable)) # Panel Data Statistics ps <- qsu(wldNA, pid = p, cols = is.numeric) expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p)))) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Weighted Panel Data Statistics ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric)[,-2L,] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1])) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Grouped Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, cols = is.numeric) expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) # Grouped and Weighted Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric)[,-2L,,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) }) } rm(qsu) test_that("qsu works properly for grouped and panel data computations with higher-order statistics", { # Grouped Statistics expect_equal(qsu(wldNA, g, higher = TRUE)[,1:5,], base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, higher = TRUE)[,1:5,], base_qsu(GGDC10S, GGDC10S$Variable)) # Grouped and Weighted Statistics expect_equal(qsu(wldNA, g, w = ones, higher = TRUE)[,c(1L, 3:6),], base_qsu(wldNA, g)) expect_equal(qsu(GGDC10S, GGDC10S$Variable, w = rep(1, fnrow(GGDC10S)), higher = TRUE)[,c(1L, 3:6),], base_qsu(GGDC10S, GGDC10S$Variable)) # Panel Data Statistics ps <- qsu(wldNA, pid = p, cols = is.numeric, higher = TRUE)[,1:5,] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) expect_equal(unattrib(t(ps["Between",,])), unattrib(base_qsu(fmean(nv(wldNA), p)))) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Weighted Panel Data Statistics ps <- qsu(wldNA, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,c(1L, 3:6),] expect_equal(unattrib(t(ps["Overall",,])), unattrib(base_qsu(nv(wldNA)))) # TODO: Figure out why this test fails !!!!!! # expect_equal(unattrib(t(ps["Between",-1,])), unattrib(base_qsu(fbetween(nv(wldNA), p))[,-1])) expect_equal(unattrib(t(ps["Within", -1,])), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"))[, -1])) # Grouped Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, cols = is.numeric, higher = TRUE)[,1:5,,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) # Grouped and Weighted Panel Data Statistics ps <- qsu(wldNA, by = g, pid = p, w = ones, cols = is.numeric, higher = TRUE)[,c(1L, 3:6),,] expect_equal(unattrib(ps[,,"Overall",]), unattrib(base_qsu(nv(wldNA), g))) expect_equal(unattrib(ps[,-1,"Between",]), unattrib(base_qsu(fbetween(nv(wldNA), p), g)[,-1,])) expect_equal(unattrib(ps[,-1,"Within",]), unattrib(base_qsu(fwithin(nv(wldNA), p, mean = "overall.mean"), g)[,-1,])) }) # Make more tests!! See also collapse general TODO ! test_that("qsu gives errors for wrong input", { expect_error(qsu(wlddev$year, 2:4)) expect_error(qsu(wlddev$year, pid = 2:4)) expect_error(qsu(wlddev, 2:4)) expect_error(qsu(wlddev, pid = 2:4)) expect_error(qsu(wlddev$year, letters)) expect_error(qsu(wlddev$year, pid = letters)) expect_error(qsu(wlddev, letters)) expect_error(qsu(wlddev, pid = letters)) expect_error(qsu(wlddev, ~ iso3c + bla)) expect_error(qsu(wlddev, pid = ~ iso3c + bla)) expect_visible(qsu(wlddev, PCGDP ~ region + income)) expect_visible(qsu(wlddev, pid = PCGDP ~ region + income)) expect_equal(qsu(wlddev, PCGDP ~ region + income, ~ iso3c), qsu(wlddev, ~ region + income, pid = PCGDP ~ iso3c)) expect_error(qsu(wlddev, cols = 9:14)) expect_error(qsu(wlddev, cols = c("PCGDP","bla"))) }) collapse/tests/testthat/test-fcumsum.R0000644000176200001440000006501414777170131017653 0ustar liggesuserscontext("fcumsum") # rm(liso = ls()) set.seed(101) x <- abs(1000*rnorm(100)) xNA <- x xNA[sample.int(100, 20)] <- NA xNA[1L] <- NA f <- as.factor(rep(1:10, each = 10)) t <- as.factor(rep(1:100)) data <- wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ] settransform(data, ODA = NULL, POP = NULL) # Too large (integer overflow) g <- GRP(droplevels(data$iso3c)) td <- as.factor(data$year) dataNA <- na_insert(data) m <- as.matrix(data) suppressWarnings(storage.mode(m) <- "numeric") mNAc <- as.matrix(dataNA) mNA <- mNAc suppressWarnings(storage.mode(mNA) <- "numeric") # Creatung unordered data: o = order(rnorm(100)) xuo = x[o] xNAuo = xNA[o] fuo = f[o] tuo = t[o] t2uo = seq_len(100)[o] o = order(o) od = order(rnorm(length(td))) muo = m[od, ] mNAuo <- mNA[od, ] datauo = data[od, ] dataNAuo = dataNA[od, ] guo = as_factor_GRP(g)[od] tduo = td[od] t2duo = seq_along(od)[od] od = order(od) bcumsum <- base::cumsum if(requireNamespace("data.table", quietly = TRUE)) { basecumsum <- function(x, na.rm = TRUE, fill = FALSE) { ax <- attributes(x) if(!na.rm || !anyNA(x)) return(`attributes<-`(bcumsum(x), ax)) cc <- which(!is.na(x)) x[cc] <- bcumsum(x[cc]) if(!fill) return(x) if(is.na(x[1L])) x[1L] <- 0L data.table::nafill(x, type = "locf") } test_that("fcumsum performs like basecumsum", { # No groups, no ordering expect_equal(fcumsum(-10:10), basecumsum(-10:10)) expect_equal(fcumsum(-10:10, na.rm = FALSE), basecumsum(-10:10, na.rm = FALSE)) expect_equal(fcumsum(-10:10, fill = TRUE), basecumsum(-10:10, fill = TRUE)) expect_equal(fcumsum(x), basecumsum(x)) expect_equal(fcumsum(x, na.rm = FALSE), basecumsum(x, na.rm = FALSE)) expect_equal(fcumsum(x, fill = TRUE), basecumsum(x, fill = TRUE)) expect_equal(fcumsum(xNA), basecumsum(xNA)) expect_equal(fcumsum(xNA, na.rm = FALSE), basecumsum(xNA, na.rm = FALSE)) expect_equal(fcumsum(xNA, fill = TRUE), basecumsum(xNA, fill = TRUE)) expect_equal(fcumsum(m), dapply(m, basecumsum)) expect_equal(fcumsum(m, na.rm = FALSE), dapply(m, basecumsum, na.rm = FALSE)) expect_equal(fcumsum(m, fill = TRUE), dapply(m, basecumsum, fill = TRUE)) expect_equal(fcumsum(mNA), dapply(mNA, basecumsum)) expect_equal(fcumsum(mNA, na.rm = FALSE), dapply(mNA, basecumsum, na.rm = FALSE)) expect_equal(fcumsum(mNA, fill = TRUE), dapply(mNA, basecumsum, fill = TRUE)) expect_equal(fcumsum(num_vars(data)), dapply(num_vars(data), basecumsum)) expect_equal(fcumsum(num_vars(data), na.rm = FALSE), dapply(num_vars(data), basecumsum, na.rm = FALSE)) expect_equal(fcumsum(num_vars(data), fill = TRUE), dapply(num_vars(data), basecumsum, fill = TRUE)) expect_equal(fcumsum(num_vars(dataNA)), dapply(num_vars(dataNA), basecumsum)) expect_equal(fcumsum(num_vars(dataNA), na.rm = FALSE), dapply(num_vars(dataNA), basecumsum, na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNA), fill = TRUE), dapply(num_vars(dataNA), basecumsum, fill = TRUE)) # With groups, no ordering expect_equal(fcumsum(x, f), BY(x, f, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(x, na.rm = FALSE, f), BY(x, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(x, f, fill = TRUE), BY(x, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(xNA, f), BY(xNA, f, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(xNA, na.rm = FALSE, f), BY(xNA, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(xNA, f, fill = TRUE), BY(xNA, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(m, g), BY(m, g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(m, na.rm = FALSE, g), BY(m, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(m, g, fill = TRUE), BY(m, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(mNA, g), BY(mNA, g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(mNA, na.rm = FALSE, g), BY(mNA, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(mNA, g, fill = TRUE), BY(mNA, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(data), g), BY(num_vars(data), g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(data), na.rm = FALSE, g), BY(num_vars(data), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(data), g, fill = TRUE), BY(num_vars(data), g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(dataNA), g), BY(num_vars(dataNA), g, basecumsum, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(dataNA), g, na.rm = FALSE), BY(num_vars(dataNA), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_equal(fcumsum(num_vars(dataNA), g, fill = TRUE), BY(num_vars(dataNA), g, basecumsum, fill = TRUE, use.g.names = FALSE)) }) } test_that("fcumsum correctly handles unordered time-series and panel-series computations", { # With ordering, no groups: 1 expect_equal(fcumsum(x, o = 1:100), fcumsum(x)) expect_equal(fcumsum(x, o = 1:100, na.rm = FALSE), fcumsum(x, na.rm = FALSE)) expect_equal(fcumsum(x, o = 1:100, fill = TRUE), fcumsum(x, fill = TRUE)) expect_equal(fcumsum(xNA, o = 1:100), fcumsum(xNA)) expect_equal(fcumsum(xNA, o = 1:100, na.rm = FALSE), fcumsum(xNA, na.rm = FALSE)) expect_equal(fcumsum(xNA, o = 1:100, fill = TRUE), fcumsum(xNA, fill = TRUE)) expect_equal(fcumsum(m, o = seq_row(m)), fcumsum(m)) expect_equal(fcumsum(m, o = seq_row(m), na.rm = FALSE), fcumsum(m, na.rm = FALSE)) expect_equal(fcumsum(m, o = seq_row(m), fill = TRUE), fcumsum(m, fill = TRUE)) expect_equal(fcumsum(mNA, o = seq_row(m)), fcumsum(mNA)) expect_equal(fcumsum(mNA, o = seq_row(m), na.rm = FALSE), fcumsum(mNA, na.rm = FALSE)) expect_equal(fcumsum(mNA, o = seq_row(m), fill = TRUE), fcumsum(mNA, fill = TRUE)) expect_equal(fcumsum(num_vars(data), o = seq_row(data)), fcumsum(num_vars(data))) expect_equal(fcumsum(num_vars(data), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(data), na.rm = FALSE)) expect_equal(fcumsum(num_vars(data), o = seq_row(data), fill = TRUE), fcumsum(num_vars(data), fill = TRUE)) expect_equal(fcumsum(num_vars(dataNA), o = seq_row(data)), fcumsum(num_vars(dataNA))) expect_equal(fcumsum(num_vars(dataNA), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNA), o = seq_row(data), fill = TRUE), fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering, no groups: 2 expect_equal(fcumsum(xuo, o = t2uo)[o], fcumsum(x)) expect_equal(fcumsum(xuo, o = t2uo, na.rm = FALSE)[o], fcumsum(x, na.rm = FALSE)) expect_equal(fcumsum(xuo, o = t2uo, fill = TRUE)[o], fcumsum(x, fill = TRUE)) expect_equal(fcumsum(xNAuo, o = t2uo)[o], fcumsum(xNA)) expect_equal(fcumsum(xNAuo, o = t2uo, na.rm = FALSE)[o], fcumsum(xNA, na.rm = FALSE)) expect_equal(fcumsum(xNAuo, o = t2uo, fill = TRUE)[o], fcumsum(xNA, fill = TRUE)) expect_equal(fcumsum(muo, o = t2duo)[od, ], fcumsum(m)) expect_equal(fcumsum(muo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(m, na.rm = FALSE)) expect_equal(fcumsum(muo, o = t2duo, fill = TRUE)[od, ], fcumsum(m, fill = TRUE)) expect_equal(fcumsum(mNAuo, o = t2duo)[od, ], fcumsum(mNA)) expect_equal(fcumsum(mNAuo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(mNA, na.rm = FALSE)) expect_equal(fcumsum(mNAuo, o = t2duo, fill = TRUE)[od, ], fcumsum(mNA, fill = TRUE)) expect_equal(fcumsum(num_vars(datauo), o = t2duo)[od, ], fcumsum(num_vars(data))) expect_equal(fcumsum(num_vars(datauo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), na.rm = FALSE)) expect_equal(fcumsum(num_vars(datauo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(data), fill = TRUE)) expect_equal(fcumsum(num_vars(dataNAuo), o = t2duo)[od, ], fcumsum(num_vars(dataNA))) expect_equal(fcumsum(num_vars(dataNAuo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNAuo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering and groups expect_equal(fcumsum(xuo, fuo, tuo)[o], fcumsum(x, f, t)) expect_equal(fcumsum(xuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(x, f, t, na.rm = FALSE)) expect_equal(fcumsum(xuo, fuo, tuo, fill = TRUE)[o], fcumsum(x, f, t, fill = TRUE)) expect_equal(fcumsum(xNAuo, fuo, tuo)[o], fcumsum(xNA, f, t)) expect_equal(fcumsum(xNAuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(xNA, f, t, na.rm = FALSE)) expect_equal(fcumsum(xNAuo, fuo, tuo, fill = TRUE)[o], fcumsum(xNA, f, t, fill = TRUE)) expect_equal(fcumsum(muo, guo, tduo)[od, ], fcumsum(m, g, td)) expect_equal(fcumsum(muo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(m, g, td, na.rm = FALSE)) expect_equal(fcumsum(muo, guo, tduo, fill = TRUE)[od, ], fcumsum(m, g, td, fill = TRUE)) expect_equal(fcumsum(mNAuo, guo, tduo)[od, ], fcumsum(mNA, g, td)) expect_equal(fcumsum(mNAuo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(mNA, g, td, na.rm = FALSE)) expect_equal(fcumsum(mNAuo, guo, tduo, fill = TRUE)[od, ], fcumsum(mNA, g, td, fill = TRUE)) expect_equal(fcumsum(num_vars(datauo), guo, tduo)[od, ], fcumsum(num_vars(data), g, td)) expect_equal(fcumsum(num_vars(datauo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), g, td, na.rm = FALSE)) expect_equal(fcumsum(num_vars(datauo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(data), g, td, fill = TRUE)) expect_equal(fcumsum(num_vars(dataNAuo), guo, tduo)[od, ], fcumsum(num_vars(dataNA), g, td)) expect_equal(fcumsum(num_vars(dataNAuo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), g, td, na.rm = FALSE)) expect_equal(fcumsum(num_vars(dataNAuo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), g, td, fill = TRUE)) }) test_that("fcumsum performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fcumsum(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(data)), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(dataNA)), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(x, f, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNA, f, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(m, g, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(mNA, g, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(data), g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(data), g, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(dataNA), g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(dataNA), g, fill = TRUE), simplify = FALSE))) }) test_that("fcumsum performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fcumsum(xuo, o = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xNAuo, o = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(muo, o = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(datauo), o = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xuo, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(xuo, fuo, tuo, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(muo, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(muo, guo, tduo, fill = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(datauo), guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fcumsum(nv(datauo), guo, tduo, fill = TRUE), simplify = FALSE))) }) # Testing integer methods test_that("Integer overflow gives error", { expect_error(fcumsum(1:1e5)) expect_error(fcumsum(-1:-1e5)) }) x <- as.integer(x) xNA <- as.integer(xNA) storage.mode(m) <- "integer" storage.mode(mNA) <- "integer" settransformv(data, is.numeric, as.integer) settransformv(dataNA, is.numeric, as.integer) xuo <- as.integer(xuo) xNAuo <- as.integer(xNAuo) storage.mode(muo) <- "integer" storage.mode(mNAuo) <- "integer" settransformv(datauo, is.numeric, as.integer) settransformv(dataNAuo, is.numeric, as.integer) if(requireNamespace("data.table", quietly = TRUE)) { test_that("fcumsum with integers performs like basecumsum", { # No groups, no ordering expect_identical(fcumsum(x), basecumsum(x)) expect_identical(fcumsum(x, na.rm = FALSE), basecumsum(x, na.rm = FALSE)) expect_identical(fcumsum(x, fill = TRUE), basecumsum(x, fill = TRUE)) expect_identical(fcumsum(xNA), basecumsum(xNA)) expect_identical(fcumsum(xNA, na.rm = FALSE), basecumsum(xNA, na.rm = FALSE)) expect_identical(fcumsum(xNA, fill = TRUE), basecumsum(xNA, fill = TRUE)) expect_identical(fcumsum(m), dapply(m, basecumsum)) expect_identical(fcumsum(m, na.rm = FALSE), dapply(m, basecumsum, na.rm = FALSE)) expect_identical(fcumsum(m, fill = TRUE), dapply(m, basecumsum, fill = TRUE)) expect_identical(fcumsum(mNA), dapply(mNA, basecumsum)) expect_identical(fcumsum(mNA, na.rm = FALSE), dapply(mNA, basecumsum, na.rm = FALSE)) expect_identical(fcumsum(mNA, fill = TRUE), dapply(mNA, basecumsum, fill = TRUE)) expect_identical(fcumsum(num_vars(data)), dapply(num_vars(data), basecumsum)) expect_identical(fcumsum(num_vars(data), na.rm = FALSE), dapply(num_vars(data), basecumsum, na.rm = FALSE)) expect_identical(fcumsum(num_vars(data), fill = TRUE), dapply(num_vars(data), basecumsum, fill = TRUE)) expect_identical(fcumsum(num_vars(dataNA)), dapply(num_vars(dataNA), basecumsum)) expect_identical(fcumsum(num_vars(dataNA), na.rm = FALSE), dapply(num_vars(dataNA), basecumsum, na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNA), fill = TRUE), dapply(num_vars(dataNA), basecumsum, fill = TRUE)) # With groups, no ordering expect_identical(fcumsum(x, f), BY(x, f, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(x, na.rm = FALSE, f), BY(x, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(x, f, fill = TRUE), BY(x, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(xNA, f), BY(xNA, f, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(xNA, na.rm = FALSE, f), BY(xNA, f, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(xNA, f, fill = TRUE), BY(xNA, f, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(m, g), BY(m, g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(m, na.rm = FALSE, g), BY(m, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(m, g, fill = TRUE), BY(m, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(mNA, g), BY(mNA, g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(mNA, na.rm = FALSE, g), BY(mNA, g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(mNA, g, fill = TRUE), BY(mNA, g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(data), g), BY(num_vars(data), g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(data), na.rm = FALSE, g), BY(num_vars(data), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(data), g, fill = TRUE), BY(num_vars(data), g, basecumsum, fill = TRUE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(dataNA), g), BY(num_vars(dataNA), g, basecumsum, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(dataNA), g, na.rm = FALSE), BY(num_vars(dataNA), g, basecumsum, na.rm = FALSE, use.g.names = FALSE)) expect_identical(fcumsum(num_vars(dataNA), g, fill = TRUE), BY(num_vars(dataNA), g, basecumsum, fill = TRUE, use.g.names = FALSE)) }) } test_that("fcumsum with integers correctly handles unordered time-series and panel-series computations", { # With ordering, no groups: 1 expect_identical(fcumsum(x, o = 1:100), fcumsum(x)) expect_identical(fcumsum(x, o = 1:100, na.rm = FALSE), fcumsum(x, na.rm = FALSE)) expect_identical(fcumsum(x, o = 1:100, fill = TRUE), fcumsum(x, fill = TRUE)) expect_identical(fcumsum(xNA, o = 1:100), fcumsum(xNA)) expect_identical(fcumsum(xNA, o = 1:100, na.rm = FALSE), fcumsum(xNA, na.rm = FALSE)) expect_identical(fcumsum(xNA, o = 1:100, fill = TRUE), fcumsum(xNA, fill = TRUE)) expect_identical(fcumsum(m, o = seq_row(m)), fcumsum(m)) expect_identical(fcumsum(m, o = seq_row(m), na.rm = FALSE), fcumsum(m, na.rm = FALSE)) expect_identical(fcumsum(m, o = seq_row(m), fill = TRUE), fcumsum(m, fill = TRUE)) expect_identical(fcumsum(mNA, o = seq_row(m)), fcumsum(mNA)) expect_identical(fcumsum(mNA, o = seq_row(m), na.rm = FALSE), fcumsum(mNA, na.rm = FALSE)) expect_identical(fcumsum(mNA, o = seq_row(m), fill = TRUE), fcumsum(mNA, fill = TRUE)) expect_identical(fcumsum(num_vars(data), o = seq_row(data)), fcumsum(num_vars(data))) expect_identical(fcumsum(num_vars(data), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(data), na.rm = FALSE)) expect_identical(fcumsum(num_vars(data), o = seq_row(data), fill = TRUE), fcumsum(num_vars(data), fill = TRUE)) expect_identical(fcumsum(num_vars(dataNA), o = seq_row(data)), fcumsum(num_vars(dataNA))) expect_identical(fcumsum(num_vars(dataNA), o = seq_row(data), na.rm = FALSE), fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNA), o = seq_row(data), fill = TRUE), fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering, no groups: 2 expect_identical(fcumsum(xuo, o = t2uo)[o], fcumsum(x)) expect_identical(fcumsum(xuo, o = t2uo, na.rm = FALSE)[o], fcumsum(x, na.rm = FALSE)) expect_identical(fcumsum(xuo, o = t2uo, fill = TRUE)[o], fcumsum(x, fill = TRUE)) expect_identical(fcumsum(xNAuo, o = t2uo)[o], fcumsum(xNA)) expect_identical(fcumsum(xNAuo, o = t2uo, na.rm = FALSE)[o], fcumsum(xNA, na.rm = FALSE)) expect_identical(fcumsum(xNAuo, o = t2uo, fill = TRUE)[o], fcumsum(xNA, fill = TRUE)) expect_identical(fcumsum(muo, o = t2duo)[od, ], fcumsum(m)) expect_identical(fcumsum(muo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(m, na.rm = FALSE)) expect_identical(fcumsum(muo, o = t2duo, fill = TRUE)[od, ], fcumsum(m, fill = TRUE)) expect_identical(fcumsum(mNAuo, o = t2duo)[od, ], fcumsum(mNA)) expect_identical(fcumsum(mNAuo, o = t2duo, na.rm = FALSE)[od, ], fcumsum(mNA, na.rm = FALSE)) expect_identical(fcumsum(mNAuo, o = t2duo, fill = TRUE)[od, ], fcumsum(mNA, fill = TRUE)) expect_identical(fcumsum(num_vars(datauo), o = t2duo)[od, ], fcumsum(num_vars(data))) expect_identical(fcumsum(num_vars(datauo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), na.rm = FALSE)) expect_identical(fcumsum(num_vars(datauo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(data), fill = TRUE)) expect_identical(fcumsum(num_vars(dataNAuo), o = t2duo)[od, ], fcumsum(num_vars(dataNA))) expect_identical(fcumsum(num_vars(dataNAuo), o = t2duo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNAuo), o = t2duo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), fill = TRUE)) # With ordering and groups expect_identical(fcumsum(xuo, fuo, tuo)[o], fcumsum(x, f, t)) expect_identical(fcumsum(xuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(x, f, t, na.rm = FALSE)) expect_identical(fcumsum(xuo, fuo, tuo, fill = TRUE)[o], fcumsum(x, f, t, fill = TRUE)) expect_identical(fcumsum(xNAuo, fuo, tuo)[o], fcumsum(xNA, f, t)) expect_identical(fcumsum(xNAuo, fuo, tuo, na.rm = FALSE)[o], fcumsum(xNA, f, t, na.rm = FALSE)) expect_identical(fcumsum(xNAuo, fuo, tuo, fill = TRUE)[o], fcumsum(xNA, f, t, fill = TRUE)) expect_identical(fcumsum(muo, guo, tduo)[od, ], fcumsum(m, g, td)) expect_identical(fcumsum(muo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(m, g, td, na.rm = FALSE)) expect_identical(fcumsum(muo, guo, tduo, fill = TRUE)[od, ], fcumsum(m, g, td, fill = TRUE)) expect_identical(fcumsum(mNAuo, guo, tduo)[od, ], fcumsum(mNA, g, td)) expect_identical(fcumsum(mNAuo, guo, tduo, na.rm = FALSE)[od, ], fcumsum(mNA, g, td, na.rm = FALSE)) expect_identical(fcumsum(mNAuo, guo, tduo, fill = TRUE)[od, ], fcumsum(mNA, g, td, fill = TRUE)) expect_identical(fcumsum(num_vars(datauo), guo, tduo)[od, ], fcumsum(num_vars(data), g, td)) expect_identical(fcumsum(num_vars(datauo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(data), g, td, na.rm = FALSE)) expect_identical(fcumsum(num_vars(datauo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(data), g, td, fill = TRUE)) expect_identical(fcumsum(num_vars(dataNAuo), guo, tduo)[od, ], fcumsum(num_vars(dataNA), g, td)) expect_identical(fcumsum(num_vars(dataNAuo), guo, tduo, na.rm = FALSE)[od, ], fcumsum(num_vars(dataNA), g, td, na.rm = FALSE)) expect_identical(fcumsum(num_vars(dataNAuo), guo, tduo, fill = TRUE)[od, ], fcumsum(num_vars(dataNA), g, td, fill = TRUE)) }) test_that("fcumsum with integers performs numerically stable in ordered computations", { expect_true(all_identical(replicate(50, fcumsum(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(data)), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(dataNA)), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(x, f, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNA, f, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(m, g, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(mNA, g, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(data), g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(data), g, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(dataNA), g), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(dataNA), g, fill = TRUE), simplify = FALSE))) }) test_that("fcumsum with integers performs numerically stable in unordered computations", { expect_true(all_identical(replicate(50, fcumsum(xuo, o = t2uo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xNAuo, o = t2uo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(muo, o = t2duo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(datauo), o = t2duo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xuo, fuo, tuo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(xuo, fuo, tuo, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(muo, guo, tduo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(muo, guo, tduo, fill = TRUE), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(datauo), guo, tduo), simplify = FALSE))) expect_true(all_identical(replicate(50, fcumsum(nv(datauo), guo, tduo, fill = TRUE), simplify = FALSE))) }) test_that("fcumsum handles special values in the right way", { expect_identical(fcumsum(c(NaN,NaN)), c(NaN,NaN)) expect_identical(fcumsum(c(Inf,Inf)), c(Inf,Inf)) expect_identical(fcumsum(c(Inf,-Inf)), c(Inf,NaN)) expect_identical(fcumsum(c(FALSE,TRUE)), c(0L,1L)) expect_identical(fcumsum(c(TRUE,FALSE)), c(1L,1L)) expect_identical(fcumsum(c(1,NA)), c(1,NA)) expect_identical(fcumsum(c(NA,1)), c(NA,1)) expect_identical(fcumsum(c(1L,NA)), c(1L,NA)) expect_identical(fcumsum(c(NA,1L)), c(NA,1L)) expect_identical(fcumsum(c(NaN,1)), c(NaN,1)) expect_identical(fcumsum(c(1,NaN)), c(1, NaN)) expect_identical(fcumsum(c(Inf,1)), c(Inf,Inf)) expect_identical(fcumsum(c(1,Inf)), c(1,Inf)) expect_identical(fcumsum(c(Inf,NA)), c(Inf,NA)) expect_identical(fcumsum(c(NA,Inf)), c(NA, Inf)) }) test_that("fcumsum produces errors for wrong input", { # type: normally guaranteed by C++ expect_error(fcumsum(mNAc)) expect_error(fcumsum(wlddev)) expect_error(fcumsum(mNAc, f)) expect_error(fcumsum(x, "1")) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fcumsum(1:3, o = 1:2)) expect_error(fcumsum(1:3, o = 1:4)) expect_error(fcumsum(1:3, g = 1:2)) expect_error(fcumsum(1:3, g = 1:4)) expect_error(fcumsum(1:4, g = c(1,1,2,2), o = c(1,2,1))) expect_error(fcumsum(1:4, g = c(1,2,2), o = c(1,2,1,2))) }) x <- as.integer(wlddev$year * 1000000L) set.seed(101) xNA <- na_insert(x) g <- wlddev$iso3c o <- seq_along(x) test_that("Integer overflow errors", { # Slightly exceeding INT_MIN and INT_MAX expect_error(fcumsum(c(-2147483646L, -2L))) expect_error(fcumsum(c(-2147483646L, -2L), na.rm = FALSE)) expect_error(fcumsum(c(-2147483646L, -2L), fill = TRUE)) expect_error(fcumsum(c(2147483646L, 2L))) expect_error(fcumsum(c(2147483646L, 2L), na.rm = FALSE)) expect_error(fcumsum(c(2147483646L, 2L), fill = TRUE)) # No groups expect_error(fcumsum(x)) expect_error(fcumsum(x, na.rm = FALSE)) expect_error(fcumsum(x, fill = TRUE)) expect_error(fcumsum(xNA)) expect_error(fcumsum(xNA, fill = TRUE)) # With groups expect_error(fcumsum(x, g)) expect_error(fcumsum(x, g, na.rm = FALSE)) expect_error(fcumsum(x, g, fill = TRUE)) expect_error(fcumsum(xNA, g)) expect_error(fcumsum(xNA, g, fill = TRUE)) # No groups: Ordered expect_error(fcumsum(x, o = o, check.o = FALSE)) expect_error(fcumsum(x, o = o, check.o = FALSE, na.rm = FALSE)) expect_error(fcumsum(x, o = o, check.o = FALSE, fill = TRUE)) expect_error(fcumsum(xNA, o = o, check.o = FALSE)) expect_error(fcumsum(xNA, o = o, check.o = FALSE, fill = TRUE)) # With groups: Ordered expect_error(fcumsum(x, g, o = o, check.o = FALSE)) expect_error(fcumsum(x, g, o = o, check.o = FALSE, na.rm = FALSE)) expect_error(fcumsum(x, g, o = o, check.o = FALSE, fill = TRUE)) expect_error(fcumsum(xNA, g, o = o, check.o = FALSE)) expect_error(fcumsum(xNA, g, o = o, check.o = FALSE, fill = TRUE)) }) collapse/tests/testthat/test-fcount.R0000644000176200001440000000446015202504542017457 0ustar liggesuserscontext("fcount and fcountv") mt <- mtcars fcount_sort <- function(x, ..., sort = FALSE) { r <- fcount(x, ..., sort = sort) r[do.call(order, r[, setdiff(names(r), "N"), drop = FALSE]), , drop = FALSE] } test_that("fcount and fcountv agree", { expect_equal(fcount(mt, cyl, vs, am), fcountv(mt, cols = .c(cyl, vs, am))) expect_equal(fcountv(mt, cols = c("cyl", "vs", "am")), fcount(mt, cyl, vs, am)) }) test_that("fcount matches dplyr::count", { skip_if_not_installed("dplyr") mtt <- dplyr::as_tibble(mt) d <- dplyr::count(mtt, cyl, vs, am, name = "N", sort = FALSE) expect_equal(unattrib(fcount_sort(mt, cyl, vs, am)), unattrib(as.data.frame(d))) }) test_that("fcount add modes work", { full <- fcount(mt, cyl, vs, am) added <- fcount(mt, cyl, vs, am, add = TRUE) expect_equal(nrow(added), nrow(mt)) expect_equal(added$N, full$N[match(interaction(added[, c("cyl", "vs", "am")], drop = TRUE, lex.order = TRUE), interaction(full[, c("cyl", "vs", "am")], drop = TRUE, lex.order = TRUE))]) gv <- fcount(mt, cyl, vs, am, add = "group_vars") expect_equal(nrow(gv), nrow(mt)) expect_equal(ncol(gv), 4L) expect_equal(gv[, c("cyl", "vs", "am")], mt[, c("cyl", "vs", "am")]) expect_equal(fcount(mt, cyl, vs, am, add = "gv"), gv) }) test_that("fcount sorting changes row order", { uns <- fcount(mt, cyl, vs, am, sort = FALSE) srt <- fcount(mt, cyl, vs, am, sort = TRUE) expect_false(identical(unattrib(uns), unattrib(srt))) }) test_that("fcount with weights", { w <- runif(nrow(mt)) r <- fcount(mt, cyl, w = w) expect_equal(sum(r$N), sum(w), tolerance = 1e-10) }) test_that("fcount with grouped data", { g <- fgroup_by(mt, cyl, vs, am) expect_equal(unattrib(fcount_sort(g)), unattrib(fcount_sort(mt, cyl, vs, am))) expect_equal(nrow(fcount(g, add = TRUE)), nrow(mt)) expect_equal(nrow(fcount(g, add = "group_vars")), nrow(mt)) }) test_that("fcount works on data.table", { skip_if_not_installed("data.table") dt <- data.table::as.data.table(mt) expect_true(data.table::is.data.table(fcount(dt, cyl, vs, am))) expect_equal(fcount_sort(qDF(fcount(dt, cyl, vs, am))), fcount_sort(qDF(fcount(mt, cyl, vs, am)))) }) test_that("fcount errors for invalid input", { expect_error(fcount(mt, cyl, add = "invalid")) expect_error(fcountv(mt, w = "notacol")) }) collapse/tests/testthat/test-flm-fFtest.R0000644000176200001440000001025514777170131020200 0ustar liggesuserscontext("flm and fFtest") y <- mtcars$mpg x <- qM(mtcars[c("cyl","vs","am","carb","hp")]) w <- mtcars$wt lmr <- lm(mpg ~ cyl + vs + am + carb + hp, mtcars) lmw <- lm(mpg ~ cyl + vs + am + carb + hp, weights = wt, mtcars) NCRAN <- identical(Sys.getenv("NCRAN"), "TRUE") test_that("flm works as intended", { if(NCRAN) for(i in 1:6) expect_equal(drop(flm(y, x, add.icpt = TRUE, method = i)), coef(lmr)) if(NCRAN) for(i in 1:6) expect_equal(drop(flm(y, x, w, add.icpt = TRUE, method = i)), coef(lmw)) expect_equal(flm(y, x, method = 1L, return.raw = TRUE), .lm.fit(x, y)) expect_equal(flm(y, x, method = 2L, return.raw = TRUE), solve(crossprod(x), crossprod(x, y))) expect_equal(flm(y, x, method = 3L, return.raw = TRUE), qr.coef(qr(x), y)) expect_equal(flm(y, x, method = 5L, return.raw = TRUE), cinv(crossprod(x)) %*% crossprod(x, y)) if(NCRAN) { # This is to fool very silly checks on CRAN scanning the code of the tests afmlp <- eval(parse(text = paste0("RcppArmadillo", ":", ":", "fastLmPure"))) efmlp <- eval(parse(text = paste0("RcppEigen", ":", ":", "fastLmPure"))) expect_equal(flm(y, x, method = 4L, return.raw = TRUE), afmlp(x, y)) expect_equal(flm(y, x, method = 6L, return.raw = TRUE), efmlp(x, y, 3L)) } if(NCRAN) for(i in 1:6) expect_visible(flm(y, x, w, method = i, return.raw = TRUE)) ym <- cbind(y, y) for(i in c(1:3, 5L)) expect_visible(flm(ym, x, w, method = i)) expect_error(flm(y[-1L], x, w)) expect_error(flm(y, x, w[-1L])) expect_error(flm(y, x[-1L, ], w)) }) test_that("fFtest works as intended", { r <- fFtest(iris$Sepal.Length, gv(iris, -1L)) rlm <- summary(lm(Sepal.Length ~., iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) # Same with weights: w <- abs(rnorm(fnrow(iris))) r <- fFtest(iris$Sepal.Length, gv(iris, -1L), w = w) rlm <- summary(lm(Sepal.Length ~., weights = w, iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) # Repeat with missing values set.seed(101) iris <- na_insert(iris) r <- fFtest(iris$Sepal.Length, gv(iris, -1L)) rlm <- summary(lm(Sepal.Length ~., iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) # Same with weights: set.seed(101) w <- na_insert(w) r <- fFtest(iris$Sepal.Length, gv(iris, -1L), w = w) rlm <- summary(lm(Sepal.Length ~., weights = w, iris)) expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)]))) rm(iris) if(NCRAN) { r <- fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")]) # Same test done using lm: data <- na_omit(get_vars(wlddev, c("iso3c","year","PCGDP","LIFEEX")), na.attr = TRUE) full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), data) rest <- lm(PCGDP ~ LIFEEX + iso3c, data) ranv <- anova(rest, full) expect_equal(unattrib(r[1L, 1:4]), unlist(summary(full)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(unattrib(r[2L, 1:4]), unlist(summary(rest)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(rev(unattrib(r[1:2, 3L])), ranv$Res.Df) expect_equal(r[3L, 2L], na_rm(ranv$Df)) expect_equal(r[3L, 4L], na_rm(ranv$F)) expect_equal(r[3L, 5L], na_rm(ranv$`Pr(>F)`)) # Same with weights: w <- abs(rnorm(fnrow(wlddev))) r <- fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")], w) full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), weights = w[-attr(data, "na.action")], data) rest <- lm(PCGDP ~ LIFEEX + iso3c, weights = w[-attr(data, "na.action")], data) ranv <- anova(rest, full) expect_equal(unattrib(r[1L, 1:4]), unlist(summary(full)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(unattrib(r[2L, 1:4]), unlist(summary(rest)[c("r.squared", "fstatistic")], use.names = FALSE)[c(1L, 3:4, 2L)]) expect_equal(rev(unattrib(r[1:2, 3L])), ranv$Res.Df) expect_equal(r[3L, 2L], na_rm(ranv$Df)) expect_equal(r[3L, 4L], na_rm(ranv$F)) expect_equal(r[3L, 5L], na_rm(ranv$`Pr(>F)`)) } }) collapse/tests/testthat/test-fmode.R0000644000176200001440000007246714777170131017300 0ustar liggesuserscontext("fmode") # rm(list = ls()) set.seed(101) x <- round(abs(10*rnorm(100))) w <- as.integer(round(abs(10*rnorm(100)))) # round(abs(rnorm(100)), 1) -> Numeric precision issues in R xNA <- x wNA <- w xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) data <- wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ] l <- nrow(data) g <- GRP(droplevels(data$iso3c)) gf <- as_factor_GRP(g) dataNA <- na_insert(data) m <- as.matrix(num_vars(data)) # without num_vars also works for ties = "first" mNA <- as.matrix(num_vars(dataNA)) wdat <- as.integer(round(10*abs(rnorm(l)))) # round(abs(rnorm(l)), 1) -> Numeric precision issues in R wdatNA <- wdat wdatNA[sample.int(l, floor(l/5))] <- NA ncv <- !char_vars(data, "logical") getdata <- function(first) if(first) data else gv(data, ncv) getdataNA <- function(first) if(first) dataNA else gv(dataNA, ncv) # seteltNA <- function(x,i,j) { # x[i,j] <- NA # x # } whichmax <- function(x) which(as.integer(x) == as.integer(max(x))) # This solves numeric precision issues minwa <- function(x) { xna <- unattrib(x) if(anyNA(xna)) { if(is.integer(xna)) return(`attributes<-`(NA_integer_, attributes(x))) # if(is.character(xna)) return(`attributes<-`(NA_character_, attributes(x))) if(is.numeric(xna)) { xna <- na_rm(xna) if(!length(xna)) return(`attributes<-`(NA_real_, attributes(x))) } } `attributes<-`(`storage.mode<-`(base::min(xna), storage.mode(x)), attributes(x)) } maxwa <- function(x) { xna <- unattrib(x) if(is.numeric(xna) && anyNA(xna)) { xna <- na_rm(xna) if(!length(xna)) return(`attributes<-`(NA_real_, attributes(x))) } `attributes<-`(`storage.mode<-`(base::max(xna), storage.mode(x)), attributes(x)) } if(identical(Sys.getenv("NCRAN"), "TRUE")) { # This is to fool very silly checks on CRAN scanning the code of the tests rowidv <- eval(parse(text = paste0("data.table", ":", ":", "rowidv"))) # firstmode <- function(x) { # ox <- sort(x) # ox[which.max(rowidv(ox))] # } unam <- function(x) `names<-`(x, NULL) Mode <- function(x, na.rm = FALSE, ties = "first") { if(na.rm) { miss <- is.na(x) if(all(miss)) return(x[1L]) x <- x[!miss] } o <- radixorder(x) ox <- unam(x)[o] switch(ties, first = unam(x)[which.max(rowidv(ox)[radixorder(o)])], last = unam(x)[which.max(rowidv(ox)[radixorder(o, decreasing = TRUE)])], min = minwa(ox[whichmax(rowidv(ox))]), max = maxwa(ox[whichmax(rowidv(ox))]), stop("Unknown ties option")) } } # Mode <- function(x, na.rm = FALSE, ties = "first") { # if(na.rm) x <- x[!is.na(x)] # ux <- unique(x) # switch(ties, # first = ux[which.max(tabulate(match(x, ux)))], # min = minwa(ux[whichmax(tabulate(match(x, ux)))]), # max = maxwa(ux[whichmax(tabulate(match(x, ux)))]), # stop("Unknown ties option")) # } wMode <- function(x, w, na.rm = FALSE, ties = "first") { ax <- attributes(x) cc <- complete.cases(x, w) if(!any(cc)) return(`storage.mode<-`(NA, storage.mode(x))) if(na.rm) { w <- w[cc] x <- x[cc] } g <- GRP.default(x, call = FALSE) switch(ties, first = { o <- radixorder(unlist(gsplit(seq_along(w), g), use.names = FALSE)) sw <- unlist(lapply(gsplit(w, g), base::cumsum), use.names = FALSE)[o] fsubset.default(x, which.max(sw)) }, min = minwa(fsubset.default(g[["groups"]][[1L]], whichmax(fsum.default(w, g, use.g.names = FALSE)))), max = maxwa(fsubset.default(g[["groups"]][[1L]], whichmax(fsum.default(w, g, use.g.names = FALSE)))), stop("Unknown ties option")) # storage.mode(res) <- storage.mode(x) # `attributes<-`(res, ax) } for (nth in 1:2) { if(nth == 2L) { if(Sys.getenv("OMP") == "TRUE") { fmode <- function(x, ...) collapse::fmode(x, ..., nthreads = 2L) } else break } if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("fmode performs like Mode (defined above)", { for(t in c("first","min","max")) { # print(t) tf <- t == "first" expect_equal(fmode(NA, ties = t), Mode(NA, ties = t)) expect_equal(fmode(NA, na.rm = FALSE, ties = t), Mode(NA, ties = t)) expect_equal(fmode(1, ties = t), Mode(1, na.rm = TRUE, ties = t)) expect_equal(fmode(1:3, ties = t), Mode(1:3, na.rm = TRUE, ties = t)) expect_equal(fmode(-1:1, ties = t), Mode(-1:1, na.rm = TRUE, ties = t)) expect_equal(fmode(1, na.rm = FALSE, ties = t), Mode(1, ties = t)) expect_equal(fmode(1:3, na.rm = FALSE, ties = t), Mode(1:3, ties = t)) expect_equal(fmode(-1:1, na.rm = FALSE, ties = t), Mode(-1:1, ties = t)) expect_equal(fmode(x, ties = t), Mode(x, na.rm = TRUE, ties = t)) expect_equal(fmode(x, na.rm = FALSE, ties = t), Mode(x, ties = t)) if(tf) expect_equal(fmode(xNA, na.rm = FALSE, ties = t), Mode(xNA, ties = t)) expect_equal(fmode(xNA, ties = t), Mode(xNA, na.rm = TRUE, ties = t)) # expect_equal(as.character(fmode(data, drop = FALSE)), fmode(m)) expect_equal(fmode(m, ties = t), dapply(m, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(m, na.rm = FALSE, ties = t), dapply(m, Mode, ties = t)) if(tf) expect_equal(fmode(mNA, na.rm = FALSE, ties = t), dapply(mNA, Mode, ties = t)) expect_equal(fmode(mNA, ties = t), dapply(mNA, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), ties = t, drop = FALSE), dapply(getdata(tf), Mode, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(getdata(tf), na.rm = FALSE, ties = t, drop = FALSE), dapply(getdata(tf), Mode, ties = t, drop = FALSE)) if(tf) expect_equal(fmode(dataNA, na.rm = FALSE, ties = t, drop = FALSE), dapply(dataNA, Mode, ties = t, drop = FALSE)) expect_equal(fmode(getdataNA(tf), ties = t, drop = FALSE), dapply(getdataNA(tf), Mode, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(x, f, ties = t), BY(x, f, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(x, f, na.rm = FALSE, ties = t), BY(x, f, Mode, ties = t)) if(tf) expect_equal(fmode(xNA, f, na.rm = FALSE, ties = t), BY(xNA, f, Mode, ties = t)) expect_equal(fmode(xNA, f, ties = t), BY(xNA, f, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, ties = t), BY(m, g, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, na.rm = FALSE, ties = t), BY(m, g, Mode, ties = t)) if(tf) expect_equal(fmode(mNA, g, na.rm = FALSE), BY(mNA, g, Mode)) # Mode gives NA expect_equal(fmode(mNA, g, ties = t), BY(mNA, g, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, ties = t), BY(getdata(tf), g, Mode, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, na.rm = FALSE, ties = t), BY(getdata(tf), g, Mode, ties = t)) if(tf) expect_equal(fmode(dataNA, g, na.rm = FALSE), BY(dataNA, g, Mode)) # Mode gives NA expect_equal(fmode(getdataNA(tf), g, ties = t), BY(getdataNA(tf), g, Mode, na.rm = TRUE, ties = t)) } }) } test_that("fmode with weights performs as intended (unbiased)", { expect_equal(fmode(c(2,2,4,5,5,5)), fmode(c(2,4,5), w = c(2,1,3))) expect_equal(fmode(c(2,2,4,5,5,5), na.rm = FALSE), fmode(c(2,4,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(2.456,2.456,4.123,5.009,5.009,5.009)), fmode(c(2.456,4.123,5.009), w = c(2,1,3))) expect_equal(fmode(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE), fmode(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(2,2,NA,5,5,5)), fmode(c(2,NA,5), w = c(2,1,3))) expect_equal(fmode(c(2,2,NA,5,5,5), na.rm = FALSE), fmode(c(2,NA,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(2,2,NA,5,5,5)), fmode(c(2,4,5), w = c(2,NA,3))) expect_equal(fmode(c(2,2,NA,5,5,5), na.rm = FALSE), fmode(c(2,4,5), w = c(2,NA,3), na.rm = FALSE)) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009)), fmode(c(NA,4.123,5.009), w = c(2,1,3))) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmode(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009)), fmode(c(2.456,4.123,5.009), w = c(NA,1,3))) expect_equal(fmode(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fmode(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fmode(v, f), fmode(vs, fs, w)) expect_equal(fmode(v, f, na.rm = FALSE), fmode(vs, fs, w, na.rm = FALSE)) expect_equal(fmode(v2, f), fmode(v2s, fs, w)) expect_equal(fmode(v2, f, na.rm = FALSE), fmode(v2s, fs, w, na.rm = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fmode(v, f), fmode(vs, fs, w)) expect_equal(fmode(v, f, na.rm = FALSE), fmode(vs, fs, w, na.rm = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fmode(v, f), fmode(vs, fs, w)) expect_equal(fmode(v, f, na.rm = FALSE), fmode(vs, fs, w, na.rm = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fmode(v2, f), fmode(v2s, fs, w)) expect_equal(fmode(v2, f, na.rm = FALSE), fmode(v2s, fs, w, na.rm = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fmode(v2, f), fmode(v2s, fs, w)) expect_equal(fmode(v2, f, na.rm = FALSE), fmode(v2s, fs, w, na.rm = FALSE)) }) test_that("fmode performs like fmode with weights all equal", { for(t in c("first","min","max")) { expect_equal(fmode(NA, ties = t), fmode(NA, w = 0.9, ties = t)) expect_equal(fmode(NA, na.rm = FALSE, ties = t), fmode(NA, w = 2.946, na.rm = FALSE, ties = t)) expect_equal(fmode(1, ties = t), fmode(1, w = 3, ties = t)) expect_equal(fmode(1:3, ties = t), fmode(1:3, w = rep(0.9,3), ties = t)) expect_equal(fmode(-1:1, ties = t), fmode(-1:1, w = rep(4.2,3), ties = t)) expect_equal(fmode(1, na.rm = FALSE, ties = t), fmode(1, w = 5, na.rm = FALSE, ties = t)) expect_equal(fmode(1:3, na.rm = FALSE, ties = t), fmode(1:3, w = rep(1.4, 3), na.rm = FALSE, ties = t)) expect_equal(fmode(-1:1, na.rm = FALSE, ties = t), fmode(-1:1, w = rep(1.4, 3), na.rm = FALSE, ties = t)) expect_equal(fmode(x, ties = t), fmode(x, w = rep(1,100), ties = t)) expect_equal(fmode(x, na.rm = FALSE, ties = t), fmode(x, w = rep(1.4, 100), na.rm = FALSE, ties = t)) # failed on patched solaris... expect_equal(fmode(xNA, na.rm = FALSE, ties = t), fmode(xNA, w = rep(4.6, 100), na.rm = FALSE, ties = t)) expect_equal(fmode(xNA, ties = t), fmode(xNA, w = rep(4.6, 100), ties = t)) # failed on patched solaris... expect_equal(fmode(m, ties = t), fmode(m, w = rep(6587, l), ties = t)) expect_equal(fmode(m, na.rm = FALSE, ties = t), fmode(m, w = rep(6587, l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, na.rm = FALSE, ties = t), fmode(mNA, w = rep(6587, l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, ties = t), fmode(mNA, w = rep(6587, l), ties = t)) expect_equal(fmode(data, ties = t), fmode(data, w = rep(6787, l), ties = t)) expect_equal(fmode(data, na.rm = FALSE, ties = t), fmode(data, w = rep(6787, l), na.rm = FALSE, ties = t)) expect_equal(fmode(dataNA, na.rm = FALSE, ties = t), fmode(dataNA, w = rep(6787, l), na.rm = FALSE, ties = t)) expect_equal(fmode(dataNA, ties = t), fmode(dataNA, w = rep(6787, l), ties = t)) expect_equal(fmode(x, f, ties = t), fmode(x, f, rep(546,100), ties = t)) expect_equal(fmode(x, f, na.rm = FALSE, ties = t), fmode(x, f, rep(5,100), na.rm = FALSE, ties = t)) expect_equal(fmode(xNA, f, na.rm = FALSE, ties = t), fmode(xNA, f, rep(52.7,100), na.rm = FALSE, ties = t)) # Failed sometimes for some reason... v. 1.5.1 error expect_equal(fmode(xNA, f, ties = t), fmode(xNA, f, rep(599,100), ties = t)) expect_equal(fmode(m, g, ties = t), fmode(m, g, rep(546,l), ties = t)) expect_equal(fmode(m, g, na.rm = FALSE, ties = t), fmode(m, g, rep(1,l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, g, na.rm = FALSE, ties = t), fmode(mNA, g, rep(7,l), na.rm = FALSE, ties = t)) expect_equal(fmode(mNA, g, ties = t), fmode(mNA, g, rep(1,l), ties = t)) expect_equal(fmode(data, g, ties = t), fmode(data, g, rep(53,l), ties = t)) expect_equal(fmode(data, g, na.rm = FALSE, ties = t), fmode(data, g, rep(546,l), na.rm = FALSE, ties = t)) expect_equal(fmode(dataNA, g, na.rm = FALSE, ties = t), fmode(dataNA, g, rep(1,l), na.rm = FALSE, ties = t)) # rep(0.999999,l) failed CRAN Arch i386 expect_equal(fmode(dataNA, g, ties = t), fmode(dataNA, g, rep(999,l), ties = t)) # rep(999.9999,l) failed CRAN Arch i386 } }) test_that("fmode with weights performs like wMode (defined above)", { for(t in c("first","min","max")) { # print(t) tf <- t == "first" # complete weights expect_equal(fmode(NA, w = 1, ties = t), wMode(NA, 1, ties = t)) expect_equal(fmode(NA, w = 1, na.rm = FALSE, ties = t), wMode(NA, 1, ties = t)) expect_equal(fmode(1, w = 1, ties = t), wMode(1, w = 1, ties = t)) expect_equal(fmode(1:3, w = 1:3, ties = t), wMode(1:3, 1:3, ties = t)) expect_equal(fmode(-1:1, w = 1:3, ties = t), wMode(-1:1, 1:3, ties = t)) expect_equal(fmode(1, w = 1, na.rm = FALSE, ties = t), wMode(1, 1, ties = t)) expect_equal(fmode(1:3, w = c(0.99,3454,1.111), na.rm = FALSE, ties = t), wMode(1:3, c(0.99,3454,1.111), ties = t)) expect_equal(fmode(-1:1, w = 1:3, na.rm = FALSE, ties = t), wMode(-1:1, 1:3, ties = t)) expect_equal(fmode(x, w = w, ties = t), wMode(x, w, ties = t)) expect_equal(fmode(x, w = w, na.rm = FALSE, ties = t), wMode(x, w, ties = t)) if(tf) expect_equal(fmode(xNA, w = w, na.rm = FALSE, ties = t), wMode(xNA, w, ties = t)) expect_equal(fmode(xNA, w = w, ties = t), wMode(xNA, w, na.rm = TRUE, ties = t)) # expect_equal(fmode(data, w = wdat, drop = FALSE, ties = t), fmode(m, w = wdat, ties = t)) expect_equal(fmode(m, w = wdat, ties = t), dapply(m, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(m, w = wdat, na.rm = FALSE, ties = t), dapply(m, wMode, wdat, ties = t)) if(tf) expect_equal(fmode(mNA, w = wdat, na.rm = FALSE, ties = t), dapply(mNA, wMode, wdat, ties = t)) expect_equal(fmode(mNA, w = wdat, ties = t), dapply(mNA, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), w = wdat, drop = FALSE, ties = t), dapply(getdata(tf), wMode, wdat, na.rm = TRUE, drop = FALSE, ties = t)) expect_equal(fmode(getdata(tf), w = wdat, na.rm = FALSE, drop = FALSE, ties = t), dapply(getdata(tf), wMode, wdat, drop = FALSE, ties = t)) if(tf) expect_equal(fmode(dataNA, w = wdat, na.rm = FALSE, drop = FALSE, ties = t), dapply(dataNA, wMode, wdat, drop = FALSE, ties = t)) expect_equal(fmode(getdataNA(tf), w = wdat, drop = FALSE, ties = t), dapply(getdataNA(tf), wMode, wdat, na.rm = TRUE, drop = FALSE, ties = t)) expect_equal(fmode(x, f, w, ties = t), BY(x, f, wMode, w, ties = t)) expect_equal(fmode(x, f, w, na.rm = FALSE, ties = t), BY(x, f, wMode, w, ties = t)) if(tf) expect_equal(fmode(xNA, f, w, na.rm = FALSE, ties = t), BY(xNA, f, wMode, w, ties = t)) expect_equal(fmode(xNA, f, w, ties = t), BY(xNA, f, wMode, w, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, wdat, ties = t), BY(m, gf, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(m, g, wdat, na.rm = FALSE, ties = t), BY(m, gf, wMode, wdat, ties = t)) if(tf) expect_equal(fmode(mNA, g, wdat, na.rm = FALSE, ties = t), BY(mNA, gf, wMode, wdat, ties = t)) expect_equal(fmode(mNA, g, wdat, ties = t), BY(mNA, gf, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, wdat, ties = t), BY(getdata(tf), gf, wMode, wdat, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, wdat, na.rm = FALSE, ties = t), BY(getdata(tf), gf, wMode, wdat, ties = t)) if(tf) expect_equal(fmode(dataNA, g, wdat, na.rm = FALSE, ties = t), BY(dataNA, gf, wMode, wdat, ties = t)) expect_equal(fmode(getdataNA(tf), g, wdat, ties = t), BY(getdataNA(tf), gf, wMode, wdat, na.rm = TRUE, ties = t)) # missing weights: # missing weights are summed : wsum is NA.... fmode does not properly deal with missing weights if na.rm = FALSE expect_equal(fmode(NA, w = NA, ties = t), wMode(NA, NA, ties = t)) # expect_equal(fmode(1, w = NA, ties = t), wMode(1, w = NA, ties = t)) expect_equal(fmode(1:3, w = c(NA,1:2), ties = t), wMode(1:3, c(NA,1:2), na.rm = TRUE, ties = t)) expect_equal(fmode(-1:1, w = c(NA,1:2), ties = t), wMode(-1:1, c(NA,1:2), na.rm = TRUE, ties = t)) expect_equal(fmode(x, w = wNA, ties = t), wMode(x, wNA, na.rm = TRUE, ties = t)) expect_equal(fmode(xNA, w = wNA, ties = t), wMode(xNA, wNA, na.rm = TRUE, ties = t)) # expect_equal(fmode(data, w = wdatNA, ties = t), fmode(m, w = wdatNA, ties = t)) expect_equal(fmode(m, w = wdatNA, ties = t), dapply(m, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(mNA, w = wdatNA, ties = t), dapply(mNA, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), w = wdatNA, ties = t, drop = FALSE), dapply(getdata(tf), wMode, wdatNA, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(getdataNA(tf), w = wdatNA, ties = t, drop = FALSE), dapply(getdataNA(tf), wMode, wdatNA, na.rm = TRUE, ties = t, drop = FALSE)) expect_equal(fmode(x, f, wNA, ties = t), BY(x, f, wMode, wNA, na.rm = TRUE, ties = t)) # failed on MAC OSX expect_equal(fmode(xNA, f, wNA, ties = t), BY(xNA, f, wMode, wNA, na.rm = TRUE, ties = t)) # failed on mac OSX... expect_equal(fmode(m, g, wdatNA, ties = t), BY(m, gf, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(mNA, g, wdatNA, ties = t), BY(mNA, gf, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(getdata(tf), g, wdatNA, ties = t), BY(getdata(tf), gf, wMode, wdatNA, na.rm = TRUE, ties = t)) expect_equal(fmode(getdataNA(tf), g, wdatNA, ties = t), BY(getdataNA(tf), gf, wMode, wdatNA, na.rm = TRUE, ties = t)) } }) test_that("fmode performs numerically stable", { for(t in c("first","min","max")) { expect_true(all_obj_equal(replicate(50, fmode(1, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, ties = t), simplify = FALSE))) } }) test_that("fmode with complete weights performs numerically stable", { for(t in c("first","min","max")) { expect_true(all_obj_equal(replicate(50, fmode(1, w = 1, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = 1, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = 1, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, w, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, w, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdat, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdat, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdat, ties = t), simplify = FALSE))) } }) test_that("fmode with missing weights performs numerically stable", { for(t in c("first","min","max")) { expect_true(all_obj_equal(replicate(50, fmode(1, w = NA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = NA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(NA, w = NA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, w = wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, w = wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, w = wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(x, f, wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, wNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(xNA, f, wNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(m, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(mNA, g, wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdatNA, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(data, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdatNA, na.rm = FALSE, ties = t), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmode(dataNA, g, wdatNA, ties = t), simplify = FALSE))) } }) test_that("fmode handles special values in the right way", { expect_equal(fmode(NA), NA) expect_equal(fmode(NaN), NaN) expect_equal(fmode(Inf), Inf) expect_equal(fmode(-Inf), -Inf) expect_equal(fmode(TRUE), TRUE) expect_equal(fmode(FALSE), FALSE) expect_equal(fmode(NA, na.rm = FALSE), NA) expect_equal(fmode(NaN, na.rm = FALSE), NaN) expect_equal(fmode(Inf, na.rm = FALSE), Inf) expect_equal(fmode(-Inf, na.rm = FALSE), -Inf) expect_equal(fmode(TRUE, na.rm = FALSE), TRUE) expect_equal(fmode(FALSE, na.rm = FALSE), FALSE) expect_equal(fmode(c(1,NA)), 1) expect_equal(fmode(c(1,NaN)), 1) expect_equal(fmode(c(1,Inf)), 1) expect_equal(fmode(c(1,-Inf)), 1) expect_equal(fmode(c(FALSE,TRUE)), FALSE) expect_equal(fmode(c(FALSE,FALSE)), FALSE) expect_equal(fmode(c(1,Inf), na.rm = FALSE), 1) expect_equal(fmode(c(1,-Inf), na.rm = FALSE), 1) expect_equal(fmode(c(FALSE,TRUE), na.rm = FALSE), FALSE) expect_equal(fmode(c(FALSE,FALSE), na.rm = FALSE), FALSE) }) test_that("fmode with weights handles special values in the right way", { expect_equal(fmode(NA, w = 1), NA) expect_equal(fmode(NaN, w = 1), NaN) expect_equal(fmode(Inf, w = 1), Inf) expect_equal(fmode(-Inf, w = 1), -Inf) expect_equal(fmode(TRUE, w = 1), TRUE) expect_equal(fmode(FALSE, w = 1), FALSE) expect_equal(fmode(NA, w = 1, na.rm = FALSE), NA) expect_equal(fmode(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fmode(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fmode(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fmode(TRUE, w = 1, na.rm = FALSE), TRUE) expect_equal(fmode(FALSE, w = 1, na.rm = FALSE), FALSE) expect_equal(fmode(NA, w = NA), NA) expect_equal(fmode(NaN, w = NA), NaN) expect_equal(fmode(Inf, w = NA), Inf) expect_equal(fmode(-Inf, w = NA), -Inf) expect_equal(fmode(TRUE, w = NA), TRUE) expect_equal(fmode(FALSE, w = NA), FALSE) expect_equal(fmode(NA, w = NA, na.rm = FALSE), NA) expect_equal(fmode(NaN, w = NA, na.rm = FALSE), NaN) expect_equal(fmode(Inf, w = NA, na.rm = FALSE), Inf) expect_equal(fmode(-Inf, w = NA, na.rm = FALSE), -Inf) expect_equal(fmode(TRUE, w = NA, na.rm = FALSE), TRUE) expect_equal(fmode(FALSE, w = NA, na.rm = FALSE), FALSE) expect_equal(fmode(1:3, w = c(1,Inf,3)), 2) expect_equal(fmode(1:3, w = c(1,-Inf,3)), 3) expect_equal(fmode(1:3, w = c(1,Inf,3), na.rm = FALSE), 2) expect_equal(fmode(1:3, w = c(1,-Inf,3), na.rm = FALSE), 3) }) test_that("fmode produces errors for wrong input", { expect_visible(fmode("a")) expect_visible(fmode(NA_character_)) expect_visible(fmode(mNA)) expect_error(fmode(mNA, f)) expect_error(fmode(1:2,1:3)) expect_error(fmode(m,1:31)) expect_error(fmode(data,1:31)) expect_error(fmode(data, w = 1:31)) expect_visible(fmode("a", w = 1)) expect_error(fmode(1:2, w = 1:3)) expect_visible(fmode(NA_character_, w = 1)) expect_visible(fmode(mNA, w = wdat)) expect_error(fmode(mNA, f, wdat)) expect_error(fmode(mNA, w = 1:33)) expect_error(fmode(1:2,1:2, 1:3)) expect_error(fmode(m,1:32,1:20)) expect_error(fmode(data,1:32,1:10)) expect_error(fmode(1:2, w = c("a","b"))) expect_visible(fmode(wlddev)) expect_visible(fmode(wlddev, w = wlddev$year, drop = FALSE)) expect_visible(fmode(wlddev, wlddev$iso3c)) expect_visible(fmode(wlddev, wlddev$iso3c, wlddev$year)) }) } test_that("Singleton group optimization works properly", { g <- GRP(as.character(seq_row(mtcars))) w <- mtcars$wt expect_equal(unattrib(fmode(mtcars$mpg, g)), mtcars$mpg[g$order]) expect_equal(unattrib(fmode(mtcars$mpg, g, w)), mtcars$mpg[g$order]) g <- GRP(seq_row(mtcars)) expect_equal(unattrib(fmode(mtcars$mpg, g)), mtcars$mpg[g$order]) expect_equal(unattrib(fmode(mtcars$mpg, g, w)), mtcars$mpg[g$order]) g <- GRP(sample.int(100, 32)) expect_equal(unattrib(fmode(mtcars$mpg, g)), mtcars$mpg[g$order]) expect_equal(unattrib(fmode(mtcars$mpg, g, w)), mtcars$mpg[g$order]) }) collapse/tests/testthat/test-fsum.R0000644000176200001440000010122514777170131017141 0ustar liggesuserscontext("fsum") bsum <- base::sum # TODO: # identical(as.integer(fsum(td, g)), unname(fsum(t, g))) # str(fsum(m)) # Do integer checks using identical, not all.equal.. # rm(list = ls()) set.seed(101) x <- rnorm(100) * 1000 w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" na20 <- function(x) { x[is.na(x)] <- 0L x } condan20 <- function(x, cond) if(cond) dapply(x, na20) else x wsum <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) if(!any(cc)) return(NA_real_) x <- x[cc] w <- w[cc] } bsum(x*w) } for (nth in 1:2) { if(nth == 2L) { if(Sys.getenv("OMP") == "TRUE") { fsum <- function(x, ...) collapse::fsum(x, ..., nthreads = 2L) } else break } for(fill_arg in 1:2) { if(fill_arg == 2L) fsum <- function(x, ...) collapse::fsum(x, ..., fill = TRUE) test_that("fsum performs like base::sum and base::colSums", { expect_equal(fsum(NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NA, na.rm = FALSE), bsum(NA)) expect_equal(fsum(1), bsum(1, na.rm = TRUE)) expect_identical(fsum(1:3), bsum(1:3, na.rm = TRUE)) expect_identical(fsum(-1:1), bsum(-1:1, na.rm = TRUE)) expect_equal(fsum(1, na.rm = FALSE), bsum(1)) expect_identical(fsum(1:3, na.rm = FALSE), bsum(1:3)) expect_identical(fsum(-1:1, na.rm = FALSE), bsum(-1:1)) expect_equal(fsum(x), bsum(x, na.rm = TRUE)) expect_equal(fsum(x, na.rm = FALSE), bsum(x)) expect_equal(fsum(xNA, na.rm = FALSE), bsum(xNA)) expect_equal(fsum(xNA), bsum(xNA, na.rm = TRUE)) expect_equal(fsum(mtcars), fsum(m)) expect_equal(fsum(m), colSums(m, na.rm = TRUE)) expect_equal(fsum(m, na.rm = FALSE), colSums(m)) expect_equal(fsum(mNA, na.rm = FALSE), colSums(mNA)) expect_equal(fsum(mNA), colSums(mNA, na.rm = TRUE)) expect_equal(fsum(mtcars), dapply(mtcars, bsum, na.rm = TRUE)) expect_equal(fsum(mtcars, na.rm = FALSE), dapply(mtcars, bsum)) expect_equal(fsum(mtcNA, na.rm = FALSE), dapply(mtcNA, bsum)) expect_equal(fsum(mtcNA), dapply(mtcNA, bsum, na.rm = TRUE)) expect_equal(fsum(x, f), BY(x, f, bsum, na.rm = TRUE)) expect_equal(fsum(x, f, na.rm = FALSE), BY(x, f, bsum)) expect_equal(fsum(xNA, f, na.rm = FALSE), BY(xNA, f, bsum)) expect_equal(na20(fsum(xNA, f)), BY(xNA, f, bsum, na.rm = TRUE)) expect_equal(fsum(m, g), BY(m, g, bsum, na.rm = TRUE)) expect_equal(fsum(m, g, na.rm = FALSE), BY(m, g, bsum)) expect_equal(fsum(mNA, g, na.rm = FALSE), BY(mNA, g, bsum)) expect_equal(na20(fsum(mNA, g)), BY(mNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 expect_equal(fsum(mtcars, g), BY(mtcars, g, bsum, na.rm = TRUE)) expect_equal(fsum(mtcars, g, na.rm = FALSE), BY(mtcars, g, bsum)) expect_equal(fsum(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bsum)) expect_equal(na20(fsum(mtcNA, g)), BY(mtcNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 }) test_that("fsum with weights performs like wsum (defined above)", { # complete weights expect_equal(fsum(NA, w = 1), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NA, w = 1, na.rm = FALSE), wsum(NA, 1)) expect_equal(fsum(1, w = 1), wsum(1, w = 1)) expect_equal(fsum(1:3, w = 1:3), wsum(1:3, 1:3)) expect_equal(fsum(-1:1, w = 1:3), wsum(-1:1, 1:3)) expect_equal(fsum(1, w = 1, na.rm = FALSE), wsum(1, 1)) expect_equal(fsum(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wsum(1:3, c(0.99,3454,1.111))) expect_equal(fsum(-1:1, w = 1:3, na.rm = FALSE), wsum(-1:1, 1:3)) expect_equal(fsum(x, w = w), wsum(x, w)) expect_equal(fsum(x, w = w, na.rm = FALSE), wsum(x, w)) expect_equal(fsum(xNA, w = w, na.rm = FALSE), wsum(xNA, w)) expect_equal(fsum(xNA, w = w), wsum(xNA, w, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), fsum(m, w = wdat)) expect_equal(fsum(m, w = wdat), dapply(m, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(m, w = wdat, na.rm = FALSE), dapply(m, wsum, wdat)) expect_equal(fsum(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wsum, wdat)) expect_equal(fsum(mNA, w = wdat), dapply(mNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), dapply(mtcars, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat), dapply(mtcNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(x, f, w), BY(x, f, wsum, w)) expect_equal(fsum(x, f, w, na.rm = FALSE), BY(x, f, wsum, w)) expect_equal(fsum(xNA, f, w, na.rm = FALSE), BY(xNA, f, wsum, w)) expect_equal(fsum(xNA, f, w), BY(xNA, f, wsum, w, na.rm = TRUE)) expect_equal(fsum(m, g, wdat), BY(m, gf, wsum, wdat)) expect_equal(fsum(m, g, wdat, na.rm = FALSE), BY(m, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat), condan20(BY(mNA, gf, wsum, wdat, na.rm = TRUE), fill_arg == 2L)) expect_equal(fsum(mtcars, g, wdat), BY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat), condan20(BY(mtcNA, gf, wsum, wdat, na.rm = TRUE), fill_arg == 2L)) # missing weights expect_equal(fsum(NA, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NA, w = NA, na.rm = FALSE), wsum(NA, NA)) expect_equal(fsum(1, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(1:3, w = c(NA,1:2)), wsum(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fsum(-1:1, w = c(NA,1:2)), wsum(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fsum(1, w = NA, na.rm = FALSE), wsum(1, NA)) expect_equal(fsum(1:3, w = c(NA,1:2), na.rm = FALSE), wsum(1:3, c(NA,1:2))) expect_equal(fsum(-1:1, w = c(NA,1:2), na.rm = FALSE), wsum(-1:1, c(NA,1:2))) expect_equal(fsum(x, w = wNA), wsum(x, wNA, na.rm = TRUE)) expect_equal(fsum(x, w = wNA, na.rm = FALSE), wsum(x, wNA)) expect_equal(fsum(xNA, w = wNA, na.rm = FALSE), wsum(xNA, wNA)) expect_equal(fsum(xNA, w = wNA), wsum(xNA, wNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), fsum(m, w = wdatNA)) expect_equal(fsum(m, w = wdatNA), dapply(m, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, w = wdatNA, na.rm = FALSE), dapply(m, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA), dapply(mNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), dapply(mtcars, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA), dapply(mtcNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA), BY(x, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA, na.rm = FALSE), BY(x, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA), BY(xNA, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA), BY(m, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA, na.rm = FALSE), BY(m, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA), condan20(BY(mNA, gf, wsum, wdatNA, na.rm = TRUE), fill_arg == 2L)) expect_equal(fsum(mtcars, g, wdatNA), BY(mtcars, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA), condan20(BY(mtcNA, gf, wsum, wdatNA, na.rm = TRUE), fill_arg == 2L)) }) test_that("fsum performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g), simplify = FALSE))) }) test_that("fsum with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fsum with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fsum handles special values in the right way", { expect_equal(fsum(NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NaN), if(fill_arg == 1L) NaN else 0) expect_equal(fsum(Inf), Inf) expect_equal(fsum(-Inf), -Inf) expect_equal(fsum(TRUE), 1) expect_equal(fsum(FALSE), 0) expect_equal(fsum(NA, na.rm = FALSE), NA_real_) expect_equal(fsum(NaN, na.rm = FALSE), NaN) expect_equal(fsum(Inf, na.rm = FALSE), Inf) expect_equal(fsum(-Inf, na.rm = FALSE), -Inf) expect_equal(fsum(TRUE, na.rm = FALSE), 1) expect_equal(fsum(FALSE, na.rm = FALSE), 0) expect_equal(fsum(c(1,NA)), 1) expect_equal(fsum(c(1,NaN)), 1) expect_equal(fsum(c(1,Inf)), Inf) expect_equal(fsum(c(1,-Inf)), -Inf) expect_equal(fsum(c(FALSE,TRUE)), 1) expect_equal(fsum(c(TRUE,TRUE)), 2) expect_equal(fsum(c(1,Inf), na.rm = FALSE), Inf) expect_equal(fsum(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(fsum(c(FALSE,TRUE), na.rm = FALSE), 1) expect_equal(fsum(c(TRUE,TRUE), na.rm = FALSE), 2) }) test_that("fsum with weights handles special values in the right way", { expect_equal(fsum(NA, w = 1), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NaN, w = 1), if(fill_arg == 1L) NaN else 0) expect_equal(fsum(Inf, w = 1), Inf) expect_equal(fsum(-Inf, w = 1), -Inf) expect_equal(fsum(TRUE, w = 1), 1) expect_equal(fsum(FALSE, w = 1), 0) expect_equal(fsum(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fsum(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fsum(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fsum(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fsum(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fsum(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fsum(NA, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NaN, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(Inf, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(-Inf, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(TRUE, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(FALSE, w = NA), if(fill_arg == 1L) NA_real_ else 0) expect_equal(fsum(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fsum(1:3, w = c(1,Inf,3)), Inf) expect_equal(fsum(1:3, w = c(1,-Inf,3)), -Inf) expect_equal(fsum(1:3, w = c(1,Inf,3), na.rm = FALSE), Inf) expect_equal(fsum(1:3, w = c(1,-Inf,3), na.rm = FALSE), -Inf) }) test_that("fsum produces errors for wrong input", { expect_error(fsum("a")) expect_error(fsum(NA_character_)) expect_error(fsum(mNAc)) expect_error(fsum(mNAc, f)) expect_error(fsum(1:2,1:3)) expect_error(fsum(m,1:31)) expect_error(fsum(mtcars,1:31)) expect_error(fsum(mtcars, w = 1:31)) expect_error(fsum("a", w = 1)) expect_error(fsum(1:2, w = 1:3)) expect_error(fsum(NA_character_, w = 1)) expect_error(fsum(mNAc, w = wdat)) expect_error(fsum(mNAc, f, wdat)) expect_error(fsum(mNA, w = 1:33)) expect_error(fsum(1:2,1:2, 1:3)) expect_error(fsum(m,1:32,1:20)) expect_error(fsum(mtcars,1:32,1:10)) expect_error(fsum(1:2, w = c("a","b"))) expect_error(fsum(wlddev)) expect_error(fsum(wlddev, w = wlddev$year)) expect_error(fsum(wlddev, wlddev$iso3c)) expect_error(fsum(wlddev, wlddev$iso3c, wlddev$year)) }) # Testing fsum with integers... x <- as.integer(x) xNA <- as.integer(xNA) mtcars <- dapply(mtcars, as.integer) mtcNA <- dapply(mtcNA, as.integer) storage.mode(m) <- "integer" storage.mode(mNA) <- "integer" toint <- function(x) { storage.mode(x) <- "integer" x } test_that("fsum with integers performs like base::sum and base::colSums", { expect_identical(fsum(x), bsum(x, na.rm = TRUE)) expect_identical(fsum(x, na.rm = FALSE), bsum(x)) expect_identical(fsum(xNA, na.rm = FALSE), bsum(xNA)) expect_identical(fsum(xNA), bsum(xNA, na.rm = TRUE)) expect_identical(toint(fsum(mtcars)), fsum(m)) expect_identical(fsum(m), toint(colSums(m, na.rm = TRUE))) expect_identical(fsum(m, na.rm = FALSE), toint(colSums(m))) expect_identical(fsum(mNA, na.rm = FALSE), toint(colSums(mNA))) expect_identical(fsum(mNA), toint(colSums(mNA, na.rm = TRUE))) expect_identical(toint(fsum(mtcars)), dapply(mtcars, bsum, na.rm = TRUE)) expect_identical(toint(fsum(mtcars, na.rm = FALSE)), dapply(mtcars, bsum)) expect_identical(toint(fsum(mtcNA, na.rm = FALSE)), dapply(mtcNA, bsum)) expect_identical(toint(fsum(mtcNA)), dapply(mtcNA, bsum, na.rm = TRUE)) expect_identical(fsum(x, f), BY(x, f, bsum, na.rm = TRUE)) expect_identical(fsum(x, f, na.rm = FALSE), BY(x, f, bsum)) expect_identical(fsum(xNA, f, na.rm = FALSE), BY(xNA, f, bsum)) expect_identical(na20(fsum(xNA, f)), BY(xNA, f, bsum, na.rm = TRUE)) expect_identical(fsum(m, g), BY(m, g, bsum, na.rm = TRUE)) expect_identical(fsum(m, g, na.rm = FALSE), BY(m, g, bsum)) expect_identical(fsum(mNA, g, na.rm = FALSE), BY(mNA, g, bsum)) expect_identical(na20(fsum(mNA, g)), BY(mNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 expect_identical(fsum(mtcars, g), BY(mtcars, g, bsum, na.rm = TRUE)) expect_identical(fsum(mtcars, g, na.rm = FALSE), BY(mtcars, g, bsum)) expect_identical(fsum(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bsum)) expect_identical(na20(fsum(mtcNA, g)), BY(mtcNA, g, bsum, na.rm = TRUE)) # error, bsum(NA) give 0 }) test_that("fsum with integers and weights performs like wsum (defined above)", { # complete weights expect_equal(fsum(x, w = w), wsum(x, w)) expect_equal(fsum(x, w = w, na.rm = FALSE), wsum(x, w)) expect_equal(fsum(xNA, w = w, na.rm = FALSE), wsum(xNA, w)) expect_equal(fsum(xNA, w = w), wsum(xNA, w, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), fsum(m, w = wdat)) expect_equal(fsum(m, w = wdat), dapply(m, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(m, w = wdat, na.rm = FALSE), dapply(m, wsum, wdat)) expect_equal(fsum(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wsum, wdat)) expect_equal(fsum(mNA, w = wdat), dapply(mNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat), dapply(mtcars, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wsum, wdat)) expect_equal(fsum(mtcNA, w = wdat), dapply(mtcNA, wsum, wdat, na.rm = TRUE)) expect_equal(fsum(x, f, w), BY(x, f, wsum, w)) expect_equal(fsum(x, f, w, na.rm = FALSE), BY(x, f, wsum, w)) expect_equal(fsum(xNA, f, w, na.rm = FALSE), BY(xNA, f, wsum, w)) expect_equal(fsum(xNA, f, w), BY(xNA, f, wsum, w, na.rm = TRUE)) expect_equal(fsum(m, g, wdat), BY(m, gf, wsum, wdat)) expect_equal(fsum(m, g, wdat, na.rm = FALSE), BY(m, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wsum, wdat)) expect_equal(fsum(mNA, g, wdat), condan20(BY(mNA, gf, wsum, wdat, na.rm = TRUE), fill_arg == 2L)) expect_equal(fsum(mtcars, g, wdat), BY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, gf, wsum, wdat)) expect_equal(fsum(mtcNA, g, wdat), condan20(BY(mtcNA, gf, wsum, wdat, na.rm = TRUE), fill_arg == 2L)) # missing weights expect_equal(fsum(x, w = wNA), wsum(x, wNA, na.rm = TRUE)) expect_equal(fsum(x, w = wNA, na.rm = FALSE), wsum(x, wNA)) expect_equal(fsum(xNA, w = wNA, na.rm = FALSE), wsum(xNA, wNA)) expect_equal(fsum(xNA, w = wNA), wsum(xNA, wNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), fsum(m, w = wdatNA)) expect_equal(fsum(m, w = wdatNA), dapply(m, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, w = wdatNA, na.rm = FALSE), dapply(m, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wsum, wdatNA)) expect_equal(fsum(mNA, w = wdatNA), dapply(mNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA), dapply(mtcars, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wsum, wdatNA)) expect_equal(fsum(mtcNA, w = wdatNA), dapply(mtcNA, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA), BY(x, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(x, f, wNA, na.rm = FALSE), BY(x, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wsum, wNA)) expect_equal(fsum(xNA, f, wNA), BY(xNA, f, wsum, wNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA), BY(m, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(m, g, wdatNA, na.rm = FALSE), BY(m, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, gf, wsum, wdatNA)) expect_equal(fsum(mNA, g, wdatNA), condan20(BY(mNA, gf, wsum, wdatNA, na.rm = TRUE), fill_arg == 2L)) expect_equal(fsum(mtcars, g, wdatNA), BY(mtcars, gf, wsum, wdatNA, na.rm = TRUE)) expect_equal(fsum(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, gf, wsum, wdatNA)) expect_equal(fsum(mtcNA, g, wdatNA), condan20(BY(mtcNA, gf, wsum, wdatNA, na.rm = TRUE), fill_arg == 2L)) }) test_that("fsum performs numerically stable", { expect_true(all_identical(replicate(50, fsum(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fsum(mtcNA, g), simplify = FALSE))) }) test_that("fsum with integers and complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fsum with integers and missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsum(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fsum with integers produces errors for wrong input", { expect_error(fsum(m,1:31)) expect_error(fsum(mtcars,1:31)) expect_error(fsum(mtcars, w = 1:31)) expect_error(fsum(mNA, w = 1:33)) expect_error(fsum(m,1:32,1:20)) expect_error(fsum(mtcars,1:32,1:10)) }) test_that("Miscellaneous Issues with Integers", { expect_identical(fsum(NA_integer_), if(fill_arg == 1L) NA_integer_ else 0L) expect_identical(fsum(NA_integer_, na.rm = FALSE), NA_integer_) expect_identical(fsum(c(NA_integer_, NA_integer_)), if(fill_arg == 1L) NA_integer_ else 0L) expect_identical(fsum(c(NA_integer_, NA_integer_), na.rm = FALSE), NA_integer_) expect_identical(fsum(c(NA_integer_, 1L)), 1L) expect_identical(fsum(c(NA_integer_, 1L), na.rm = FALSE), NA_integer_) expect_identical(fsum(c(-2147483646L, -2L)), -2147483648) expect_identical(fsum(c(-2147483646L, -2L), na.rm = FALSE), -2147483648) expect_identical(fsum(-c(-2147483646L, -2L)), 2147483648) expect_identical(fsum(-c(-2147483646L, -2L), na.rm = FALSE), 2147483648) }) z <- as.integer(wlddev$year*1000000L) set.seed(101) zNA <- na_insert(z) gz <- wlddev$iso3c test_that("Integer overflow errors", { # With groups expect_error(fsum(z, gz)) expect_error(fsum(z, gz, na.rm = FALSE)) expect_error(fsum(zNA, gz)) expect_error(fsum(zNA, gz, na.rm = FALSE)) }) # Recreating doubles before next iteration... set.seed(101) x <- rnorm(100) * 1000 xNA <- x xNA[sample.int(100,20)] <- NA rm(mtcars) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) if(fill_arg == 2L) rm(fsum) } } test_that("fill arg works as intended", { expect_equal(fsum(NA, fill = TRUE), 0) expect_equal(fsum(c(NA, NA), fill = TRUE), 0) expect_equal(fsum(NA, w = 1, fill = TRUE), 0) expect_equal(fsum(c(NA, NA), w = 1:2, fill = TRUE), 0) expect_equal(unattrib(fsum(NA, 1, fill = TRUE)), 0) expect_equal(unattrib(fsum(c(NA, NA), 1:2, fill = TRUE)), c(0, 0)) expect_equal(unattrib(fsum(NA, 1, 1, fill = TRUE)), 0) expect_equal(unattrib(fsum(c(NA, NA), 1:2, 1:2, fill = TRUE)), c(0, 0)) }) collapse/tests/testthat/test-qtab.R0000644000176200001440000002270214777170131017120 0ustar liggesuserscontext("qtab") withr::local_locale(c(LC_COLLATE = "C")) set.seed(101) wldNA <- na_insert(wlddev) qtable <- function(...) { r <- qtab(...) oldClass(r) <- "table" attr(r, "sorted") <- NULL attr(r, "weighted") <- NULL r } ones <- alloc(1L, fnrow(wlddev)) attach(wlddev) expect_equal(table(region, income), qtable(region, income)) expect_equal(table(income, region), qtable(income, region)) expect_equal(table(region, income, OECD), qtable(region, income, OECD)) expect_equal(table(decade, region, income, OECD), qtable(decade, region, income, OECD)) expect_equal(table(decade, country), qtable(decade, country)) expect_equal(table(iso3c, country), qtable(iso3c, country)) expect_equal(table(iso3c, decade), qtable(iso3c, decade)) expect_equal(table(iso3c, OECD), qtable(iso3c, OECD)) expect_equal(table(region, income), qtable(region, income, w = ones)) expect_equal(table(income, region), qtable(income, region, w = ones)) expect_equal(table(region, income, OECD), qtable(region, income, OECD, w = ones)) expect_equal(table(decade, region, income, OECD), qtable(decade, region, income, OECD, w = ones)) expect_equal(table(decade, country), qtable(decade, country, w = ones)) expect_equal(table(iso3c, country), qtable(iso3c, country, w = ones)) expect_equal(table(iso3c, decade), qtable(iso3c, decade, w = ones)) expect_equal(table(iso3c, OECD), qtable(iso3c, OECD, w = ones)) expect_equal(qtable(region, income, w = ones), qtable(region, income, w = ones, wFUN = sum)) expect_equal(qtable(income, region, w = ones), qtable(income, region, w = ones, wFUN = sum)) expect_equal(qtable(region, income, OECD, w = ones), qtable(region, income, OECD, w = ones, wFUN = sum)) expect_equal(qtable(decade, region, income, OECD, w = ones), qtable(decade, region, income, OECD, w = ones, wFUN = sum)) expect_equal(qtable(decade, country, w = ones), qtable(decade, country, w = ones, wFUN = sum)) expect_equal(qtable(iso3c, country, w = ones), qtable(iso3c, country, w = ones, wFUN = sum)) expect_equal(qtable(iso3c, decade, w = ones), qtable(iso3c, decade, w = ones, wFUN = sum)) expect_equal(qtable(iso3c, OECD, w = ones), qtable(iso3c, OECD, w = ones, wFUN = sum)) expect_equal(qtable(region, income, w = ones), replace_NA(qtable(region, income, w = ones, wFUN = fsum))) expect_equal(qtable(income, region, w = ones), replace_NA(qtable(income, region, w = ones, wFUN = fsum))) expect_equal(qtable(region, income, OECD, w = ones), replace_NA(qtable(region, income, OECD, w = ones, wFUN = fsum))) expect_equal(qtable(decade, region, income, OECD, w = ones), replace_NA(qtable(decade, region, income, OECD, w = ones, wFUN = fsum))) expect_equal(qtable(decade, country, w = ones), replace_NA(qtable(decade, country, w = ones, wFUN = fsum))) expect_equal(qtable(iso3c, country, w = ones), replace_NA(qtable(iso3c, country, w = ones, wFUN = fsum))) expect_equal(qtable(iso3c, decade, w = ones), replace_NA(qtable(iso3c, decade, w = ones, wFUN = fsum))) expect_equal(qtable(iso3c, OECD, w = ones), replace_NA(qtable(iso3c, OECD, w = ones, wFUN = fsum))) detach(wlddev) attach(wldNA) expect_equal(table(region, income), qtable(region, income)) expect_equal(table(income, region), qtable(income, region)) expect_equal(table(region, income, OECD), qtable(region, income, OECD)) expect_equal(table(decade, region, income, OECD), qtable(decade, region, income, OECD)) expect_equal(table(decade, country), qtable(decade, country)) expect_equal(table(iso3c, country), qtable(iso3c, country)) expect_equal(table(iso3c, decade), qtable(iso3c, decade)) expect_equal(table(iso3c, OECD), qtable(iso3c, OECD)) expect_equal(table(region, income), qtable(region, income, w = ones)) expect_equal(table(income, region), qtable(income, region, w = ones)) expect_equal(table(region, income, OECD), qtable(region, income, OECD, w = ones)) expect_equal(table(decade, region, income, OECD), qtable(decade, region, income, OECD, w = ones)) expect_equal(table(decade, country), qtable(decade, country, w = ones)) expect_equal(table(iso3c, country), qtable(iso3c, country, w = ones)) expect_equal(table(iso3c, decade), qtable(iso3c, decade, w = ones)) expect_equal(table(iso3c, OECD), qtable(iso3c, OECD, w = ones)) expect_equal(qtable(region, income, w = ones), qtable(region, income, w = ones, wFUN = sum)) expect_equal(qtable(income, region, w = ones), qtable(income, region, w = ones, wFUN = sum)) expect_equal(qtable(region, income, OECD, w = ones), qtable(region, income, OECD, w = ones, wFUN = sum)) expect_equal(qtable(decade, region, income, OECD, w = ones), qtable(decade, region, income, OECD, w = ones, wFUN = sum)) expect_equal(qtable(decade, country, w = ones), qtable(decade, country, w = ones, wFUN = sum)) expect_equal(qtable(iso3c, country, w = ones), qtable(iso3c, country, w = ones, wFUN = sum)) expect_equal(qtable(iso3c, decade, w = ones), qtable(iso3c, decade, w = ones, wFUN = sum)) expect_equal(qtable(iso3c, OECD, w = ones), qtable(iso3c, OECD, w = ones, wFUN = sum)) expect_equal(qtable(region, income, w = ones), replace_NA(qtable(region, income, w = ones, wFUN = fsum))) expect_equal(qtable(income, region, w = ones), replace_NA(qtable(income, region, w = ones, wFUN = fsum))) expect_equal(qtable(region, income, OECD, w = ones), replace_NA(qtable(region, income, OECD, w = ones, wFUN = fsum))) expect_equal(qtable(decade, region, income, OECD, w = ones), replace_NA(qtable(decade, region, income, OECD, w = ones, wFUN = fsum))) expect_equal(qtable(decade, country, w = ones), replace_NA(qtable(decade, country, w = ones, wFUN = fsum))) expect_equal(qtable(iso3c, country, w = ones), replace_NA(qtable(iso3c, country, w = ones, wFUN = fsum))) expect_equal(qtable(iso3c, decade, w = ones), replace_NA(qtable(iso3c, decade, w = ones, wFUN = fsum))) expect_equal(qtable(iso3c, OECD, w = ones), replace_NA(qtable(iso3c, OECD, w = ones, wFUN = fsum))) expect_equal(table(region, income, useNA = "ifany"), qtable(region, income, na.exclude = FALSE)) expect_equal(table(income, region, useNA = "ifany"), qtable(income, region, na.exclude = FALSE)) expect_equal(table(region, income, OECD, useNA = "ifany"), qtable(region, income, OECD, na.exclude = FALSE)) expect_equal(table(decade, region, income, OECD, useNA = "ifany"), qtable(decade, region, income, OECD, na.exclude = FALSE)) expect_equal(table(decade, country, useNA = "ifany"), qtable(decade, country, na.exclude = FALSE)) expect_equal(table(iso3c, country, useNA = "ifany"), qtable(iso3c, country, na.exclude = FALSE)) expect_equal(table(iso3c, decade, useNA = "ifany"), qtable(iso3c, decade, na.exclude = FALSE)) expect_equal(table(iso3c, OECD, useNA = "ifany"), qtable(iso3c, OECD, na.exclude = FALSE)) expect_equal(table(region, income, useNA = "ifany"), qtable(region, income, w = ones, na.exclude = FALSE)) expect_equal(table(income, region, useNA = "ifany"), qtable(income, region, w = ones, na.exclude = FALSE)) expect_equal(table(region, income, OECD, useNA = "ifany"), qtable(region, income, OECD, w = ones, na.exclude = FALSE)) expect_equal(table(decade, region, income, OECD, useNA = "ifany"), qtable(decade, region, income, OECD, w = ones, na.exclude = FALSE)) expect_equal(table(decade, country, useNA = "ifany"), qtable(decade, country, w = ones, na.exclude = FALSE)) expect_equal(table(iso3c, country, useNA = "ifany"), qtable(iso3c, country, w = ones, na.exclude = FALSE)) expect_equal(table(iso3c, decade, useNA = "ifany"), qtable(iso3c, decade, w = ones, na.exclude = FALSE)) expect_equal(table(iso3c, OECD, useNA = "ifany"), qtable(iso3c, OECD, w = ones, na.exclude = FALSE)) expect_equal(table(region, income, useNA = "ifany"), qtable(region, income, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(income, region, useNA = "ifany"), qtable(income, region, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(region, income, OECD, useNA = "ifany"), qtable(region, income, OECD, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(decade, region, income, OECD, useNA = "ifany"), qtable(decade, region, income, OECD, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(decade, country, useNA = "ifany"), qtable(decade, country, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(iso3c, country, useNA = "ifany"), qtable(iso3c, country, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(iso3c, decade, useNA = "ifany"), qtable(iso3c, decade, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(iso3c, OECD, useNA = "ifany"), qtable(iso3c, OECD, w = ones, wFUN = sum, na.exclude = FALSE)) expect_equal(table(region, income, useNA = "ifany"), replace_NA(qtable(region, income, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(income, region, useNA = "ifany"), replace_NA(qtable(income, region, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(region, income, OECD, useNA = "ifany"), replace_NA(qtable(region, income, OECD, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(decade, region, income, OECD, useNA = "ifany"), replace_NA(qtable(decade, region, income, OECD, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(decade, country, useNA = "ifany"), replace_NA(qtable(decade, country, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(iso3c, country, useNA = "ifany"), replace_NA(qtable(iso3c, country, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(iso3c, decade, useNA = "ifany"), replace_NA(qtable(iso3c, decade, w = ones, wFUN = fsum, na.exclude = FALSE))) expect_equal(table(iso3c, OECD, useNA = "ifany"), replace_NA(qtable(iso3c, OECD, w = ones, wFUN = fsum, na.exclude = FALSE))) detach(wldNA) rm(qtable) collapse/tests/testthat/test-fdiff-fgrowth-D-G.R0000644000176200001440000021343414777170131021276 0ustar liggesuserscontext("fdiff / D and fgrowth / G") # rm(list = ls()) # TODO: test computations on irregular time series and panels set.seed(101) x <- abs(10*rnorm(100)) xNA <- x xNA[sample.int(100, 20)] <- NA f <- as.factor(rep(1:10, each = 10)) t <- as.factor(rep(1:10, 10)) data <- setRownames(wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ]) g <- GRP(droplevels(data$iso3c)) td <- as.factor(data$year) data <- num_vars(data) dataNA <- na_insert(data) m <- qM(data) mNA <- qM(dataNA) mNAc <- mNA storage.mode(mNAc) <- "character" # Creatung unordered data: o = order(rnorm(100)) xuo = x[o] xNAuo = xNA[o] fuo = f[o] tuo = t[o] t2uo = seq_len(100)[o] o = order(o) od = order(rnorm(length(td))) muo = m[od, ] datauo = data[od, ] guo = as_factor_GRP(g)[od] tduo = td[od] t2duo = seq_along(od)[od] od = order(od) basediff <- function(x, n = 1, diff = 1) c(rep(NA_real_, n * diff), diff.default(x, n, diff)) baselogdiff <- function(x, n = 1) c(rep(NA_real_, n), diff.default(log(x), n)*100) basegrowth <- function(x, n = 1) c(rep(NA_real_, n), diff.default(x, n)/x[1:(length(x)-n)]*100) # fdiff test_that("fdiff performs like basediff", { expect_equal(fdiff(1:10), basediff(1:10)) expect_equal(fdiff(1:10, 2), basediff(1:10, 2)) expect_equal(fdiff(1:10, 1, 2), basediff(1:10, 1, 2)) expect_equal(fdiff(1:10, 2, 2), basediff(1:10, 2, 2)) expect_equal(fdiff(-1:1), basediff(-1:1)) expect_equal(fdiff(x), basediff(x)) expect_equal(fdiff(x, 2, 2), basediff(x, 2, 2)) expect_equal(fdiff(xNA), basediff(xNA)) expect_equal(fdiff(xNA, 2, 2), basediff(xNA, 2, 2)) expect_equal(qM(fdiff(data)), setRownames(fdiff(m), NULL)) expect_equal(fdiff(m, stubs = FALSE), dapply(m, basediff)) expect_equal(fdiff(m, 2, 2, stubs = FALSE), dapply(m, basediff, 2, 2)) expect_equal(fdiff(mNA, stubs = FALSE), dapply(mNA, basediff)) expect_equal(fdiff(mNA, 2, 2, stubs = FALSE), dapply(mNA, basediff, 2, 2)) expect_equal(fdiff(data, stubs = FALSE), dapply(data, basediff)) expect_equal(fdiff(data, 2, 2, stubs = FALSE), dapply(data, basediff, 2, 2)) expect_equal(fdiff(dataNA, stubs = FALSE), dapply(dataNA, basediff)) expect_equal(fdiff(dataNA, 2, 2, stubs = FALSE), dapply(dataNA, basediff, 2, 2)) expect_equal(fdiff(x, 1, 1, f), BY(x, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(x, 2, 2, f), BY(x, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(xNA, 1, 1, f), BY(xNA, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(xNA, 2, 2, f), BY(xNA, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(m, 1, 1, g, stubs = FALSE), BY(m, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(m, 2, 2, g, stubs = FALSE), BY(m, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(mNA, 1, 1, g, stubs = FALSE), BY(mNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(mNA, 2, 2, g, stubs = FALSE), BY(mNA, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(data, 1, 1, g, stubs = FALSE), BY(data, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(data, 2, 2, g, stubs = FALSE), BY(data, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 1, 1, g, stubs = FALSE), BY(dataNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 2, 2, g, stubs = FALSE), BY(dataNA, g, basediff, 2, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-differences !! expect_equal(fdiff(x, 1, 1, f, t), BY(x, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(x, 2, 2, f, t), BY(x, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(xNA, 1, 1, f, t), BY(xNA, f, basediff, use.g.names = FALSE)) expect_equal(fdiff(xNA, 2, 2, f, t), BY(xNA, f, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(m, 1, 1, g, td, stubs = FALSE), BY(m, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(m, 2, 2, g, td, stubs = FALSE), BY(m, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(mNA, 1, 1, g, td, stubs = FALSE), BY(mNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(mNA, 2, 2, g, td, stubs = FALSE), BY(mNA, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(data, 1, 1, g, td, stubs = FALSE), BY(data, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(data, 2, 2, g, td, stubs = FALSE), BY(data, g, basediff, 2, 2, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 1, 1, g, td, stubs = FALSE), BY(dataNA, g, basediff, use.g.names = FALSE)) expect_equal(fdiff(dataNA, 2, 2, g, td, stubs = FALSE), BY(dataNA, g, basediff, 2, 2, use.g.names = FALSE)) }) test_that("fdiff performs lagged/leaded and iterated (panel-) vector differences without errors", { expect_visible(fdiff(1:10, -2:2)) expect_visible(fdiff(1:10, 1:2)) expect_visible(fdiff(1:10, -1:-2)) expect_visible(fdiff(1:10, 0)) expect_visible(fdiff(1:10, -2:2, 2)) expect_visible(fdiff(1:10, 1:2, 2)) expect_visible(fdiff(1:10, -1:-2, 2)) expect_visible(fdiff(1:10, 0, 2)) expect_visible(fdiff(1:10, -2:2, 1:2)) expect_visible(fdiff(1:10, 1:2, 1:2)) expect_visible(fdiff(1:10, -1:-2, 1:2)) expect_visible(fdiff(1:10, 0, 1:2)) expect_visible(fdiff(xNA, -2:2)) expect_visible(fdiff(xNA, 1:2)) expect_visible(fdiff(xNA, -1:-2)) expect_visible(fdiff(xNA, 0)) expect_visible(fdiff(xNA, -2:2, 2)) expect_visible(fdiff(xNA, 1:2, 2)) expect_visible(fdiff(xNA, -1:-2, 2)) expect_visible(fdiff(xNA, 0, 2)) expect_visible(fdiff(xNA, -2:2, 1:2)) expect_visible(fdiff(xNA, 1:2, 1:2)) expect_visible(fdiff(xNA, -1:-2, 1:2)) expect_visible(fdiff(xNA, 0, 1:2)) expect_visible(fdiff(xNA, -2:2, 1, f)) expect_visible(fdiff(xNA, 1:2, 1, f)) expect_visible(fdiff(xNA, -1:-2, 1, f)) expect_visible(fdiff(xNA, 0, 1, f)) expect_visible(fdiff(xNA, -2:2, 2, f)) expect_visible(fdiff(xNA, 1:2, 2, f)) expect_visible(fdiff(xNA, -1:-2, 2, f)) expect_visible(fdiff(xNA, 0, 2, f)) expect_visible(fdiff(xNA, -2:2, 1:2, f)) expect_visible(fdiff(xNA, 1:2, 1:2, f)) expect_visible(fdiff(xNA, -1:-2, 1:2, f)) expect_visible(fdiff(xNA, 0, 1:2, f)) expect_visible(fdiff(xNA, -2:2, 1, f, t)) expect_visible(fdiff(xNA, 1:2, 1, f, t)) expect_visible(fdiff(xNA, -1:-2, 1, f, t)) expect_visible(fdiff(xNA, 0, 1, f, t)) expect_visible(fdiff(xNA, -2:2, 2, f, t)) expect_visible(fdiff(xNA, 1:2, 2, f, t)) expect_visible(fdiff(xNA, -1:-2, 2, f, t)) expect_visible(fdiff(xNA, 0, 2, f, t)) expect_visible(fdiff(xNA, -2:2, 1:2, f, t)) expect_visible(fdiff(xNA, 1:2, 1:2, f, t)) expect_visible(fdiff(xNA, -1:-2, 1:2, f, t)) expect_visible(fdiff(xNA, 0, 1:2, f, t)) }) test_that("fdiff performs lagged/leaded and iterated (panel-) matrix differences without errors", { expect_visible(fdiff(m, -2:2)) expect_visible(fdiff(m, 1:2)) expect_visible(fdiff(m, -1:-2)) expect_visible(fdiff(m, 0)) expect_visible(fdiff(m, -2:2, 2)) expect_visible(fdiff(m, 1:2, 2)) expect_visible(fdiff(m, -1:-2, 2)) expect_visible(fdiff(m, 0, 2)) expect_visible(fdiff(m, -2:2, 1:2)) expect_visible(fdiff(m, 1:2, 1:2)) expect_visible(fdiff(m, -1:-2, 1:2)) expect_visible(fdiff(m, 0, 1:2)) expect_visible(fdiff(m, -2:2, 1, g)) expect_visible(fdiff(m, 1:2, 1, g)) expect_visible(fdiff(m, -1:-2, 1, g)) expect_visible(fdiff(m, 0, 1, g)) expect_visible(fdiff(m, -2:2, 2, g)) expect_visible(fdiff(m, 1:2, 2, g)) expect_visible(fdiff(m, -1:-2, 2, g)) expect_visible(fdiff(m, 0, 2, g)) expect_visible(fdiff(m, -2:2, 1:2, g)) expect_visible(fdiff(m, 1:2, 1:2, g)) expect_visible(fdiff(m, -1:-2, 1:2, g)) expect_visible(fdiff(m, 0, 1:2, g)) expect_visible(fdiff(m, -2:2, 1, g, td)) expect_visible(fdiff(m, 1:2, 1, g, td)) expect_visible(fdiff(m, -1:-2, 1, g, td)) expect_visible(fdiff(m, 0, 1, g, td)) expect_visible(fdiff(m, -2:2, 2, g, td)) expect_visible(fdiff(m, 1:2, 2, g, td)) expect_visible(fdiff(m, -1:-2, 2, g, td)) expect_visible(fdiff(m, 0, 2, g, td)) expect_visible(fdiff(m, -2:2, 1:2, g, td)) expect_visible(fdiff(m, 1:2, 1:2, g, td)) expect_visible(fdiff(m, -1:-2, 1:2, g, td)) expect_visible(fdiff(m, 0, 1:2, g, td)) }) test_that("fdiff performs lagged/leaded and iterated (panel-) data.frame differences without errors", { expect_visible(fdiff(data, -2:2)) expect_visible(fdiff(data, 1:2)) expect_visible(fdiff(data, -1:-2)) expect_visible(fdiff(data, 0)) expect_visible(fdiff(data, -2:2, 2)) expect_visible(fdiff(data, 1:2, 2)) expect_visible(fdiff(data, -1:-2, 2)) expect_visible(fdiff(data, 0, 2)) expect_visible(fdiff(data, -2:2, 1:2)) expect_visible(fdiff(data, 1:2, 1:2)) expect_visible(fdiff(data, -1:-2, 1:2)) expect_visible(fdiff(data, 0, 1:2)) expect_visible(fdiff(data, -2:2, 1, g)) expect_visible(fdiff(data, 1:2, 1, g)) expect_visible(fdiff(data, -1:-2, 1, g)) expect_visible(fdiff(data, 0, 1, g)) expect_visible(fdiff(data, -2:2, 2, g)) expect_visible(fdiff(data, 1:2, 2, g)) expect_visible(fdiff(data, -1:-2, 2, g)) expect_visible(fdiff(data, 0, 2, g)) expect_visible(fdiff(data, -2:2, 1:2, g)) expect_visible(fdiff(data, 1:2, 1:2, g)) expect_visible(fdiff(data, -1:-2, 1:2, g)) expect_visible(fdiff(data, 0, 1:2, g)) expect_visible(fdiff(data, -2:2, 1, g, td)) expect_visible(fdiff(data, 1:2, 1, g, td)) expect_visible(fdiff(data, -1:-2, 1, g, td)) expect_visible(fdiff(data, 0, 1, g, td)) expect_visible(fdiff(data, -2:2, 2, g, td)) expect_visible(fdiff(data, 1:2, 2, g, td)) expect_visible(fdiff(data, -1:-2, 2, g, td)) expect_visible(fdiff(data, 0, 2, g, td)) expect_visible(fdiff(data, -2:2, 1:2, g, td)) expect_visible(fdiff(data, 1:2, 1:2, g, td)) expect_visible(fdiff(data, -1:-2, 1:2, g, td)) expect_visible(fdiff(data, 0, 1:2, g, td)) }) test_that("fdiff correctly handles unordered time-series and panel-series computations", { expect_equal(fdiff(x, -2:2, 1:2, t = 1:100), fdiff(x, -2:2, 1:2)) expect_equal(fdiff(xNA, -2:2, 1:2, t = 1:100), fdiff(xNA, -2:2, 1:2)) expect_equal(fdiff(m, -2:2, 1:2, t = seq_along(td)), fdiff(m, -2:2, 1:2)) expect_equal(fdiff(data, -2:2, 1:2, t = seq_along(td)), fdiff(data, -2:2, 1:2)) expect_equal(fdiff(xuo, -2:2, 1:2, t = t2uo)[o,], unclass(fdiff(x, -2:2, 1:2))) expect_equal(fdiff(xNAuo, -2:2, 1:2, t = t2uo)[o,], unclass(fdiff(xNA, -2:2, 1:2))) expect_equal(fdiff(muo, -2:2, 1:2, t = t2duo)[od,], unclass(fdiff(m, -2:2, 1:2))) expect_equal(fdiff(datauo, -2:2, 1:2, t = t2duo)[od,], fdiff(data, -2:2, 1:2)) expect_equal(fdiff(xuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fdiff(x, -2:2, 1:2, f, t))) expect_equal(fdiff(xNAuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fdiff(xNA, -2:2, 1:2, f, t))) expect_equal(fdiff(muo, -2:2, 1:2, guo, tduo)[od,], unclass(fdiff(m, -2:2, 1:2, g, td))) expect_equal(fdiff(datauo, -2:2, 1:2, guo, tduo)[od,], fdiff(data, -2:2, 1:2, g, td)) }) test_that("fdiff performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fdiff(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(x, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(x, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNA, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNA, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(m, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(m, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(mNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(mNA, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(data, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(data, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(dataNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(dataNA, -2:2, 1:2, g), simplify = FALSE))) }) test_that("fdiff performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fdiff(xuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xNAuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(muo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(datauo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xuo, 1, 1, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(xuo, -2:2, 1:2, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(muo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(muo, -2:2, 1:2, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(datauo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fdiff(datauo, -2:2, 1:2, guo, tduo), simplify = FALSE))) }) test_that("fdiff handles special values in the right way", { expect_equal(fdiff(c(1,NA)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(NA,1)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(NaN,1)), c(NA_real_,NaN)) expect_equal(fdiff(c(1,NaN)), c(NA_real_,NaN)) expect_equal(fdiff(c(Inf,1)), c(NA,-Inf)) expect_equal(fdiff(c(1,Inf)), c(NA,Inf)) expect_equal(fdiff(c(Inf,NA)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(NA,Inf)), c(NA_real_,NA_real_)) expect_equal(fdiff(c(Inf,-Inf)), c(NA,-Inf)) expect_equal(fdiff(c(-Inf,Inf)), c(NA,Inf)) expect_equal(fdiff(c(Inf,Inf)), c(NA,NaN)) expect_equal(fdiff(c(TRUE,TRUE)), c(NA_real_,0)) expect_equal(fdiff(c(TRUE,FALSE)), c(NA_real_,-1)) expect_equal(fdiff(c(FALSE,TRUE)), c(NA_real_,1)) }) test_that("fdiff produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(fdiff("a")); 1 expect_error(fdiff(NA_character_)); 2 expect_error(fdiff(mNAc)); 3 expect_error(fdiff(wlddev)); 4 expect_error(fdiff(mNAc, f)); 5 expect_error(fdiff(x, "1", "2")); 6 # if n*diff equals or exceeds length(x), should give error expect_error(fdiff(x,100)); 7 expect_error(fdiff(x,1,100)); 8 expect_error(fdiff(x,50,2)); 9 expect_error(fdiff(x,20,5)); 10 # if n*diff exceeds average group size, should give error # expect_warning(fdiff(x,11,1,f)); 11 -> Some fail on i386 !! # expect_warning(fdiff(x,1,11,f)); 12 # expect_warning(fdiff(x,1,11,f,t)); 13 # expect_warning(fdiff(x,11,1,f,t)); 14 # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(fdiff(x,c(1,1))); 15 expect_error(fdiff(x,c(-1,-1))); 16 expect_error(fdiff(x,1,c(1,1))); 17 expect_error(fdiff(x,1,c(-1,-1))); 18 expect_error(fdiff(x,1,2:1)); 19 expect_error(fdiff(x,1,0)); 20 expect_error(fdiff(x,1,-1)); 21 expect_error(fdiff(x,f)); 22 # common source of error probably is passing the factor in a wrong slot expect_error(fdiff(x,1,f)); 23 expect_error(fdiff(x,c(1,1),1,f)); 24 expect_error(fdiff(x,c(1,1),1,f,t)); 25 expect_error(fdiff(x,1,c(1,1),f)); 26 expect_error(fdiff(x,1,c(1,1),f,t)); 27 expect_error(fdiff(x,1,2:1,f)); 28 expect_error(fdiff(x,1,2:1,f,t)); 29 expect_error(fdiff(x,1,0,f)); 30 expect_error(fdiff(x,1,0,f,t)); 31 expect_error(fdiff(x,1,-1,f)); 32 expect_error(fdiff(x,1,-1,f,t)); 33 # repeated values or gaps in time-variable should give error expect_error(fdiff(1:3, t = c(1,1,2))); 34 expect_error(fdiff(1:3, t = c(1,2,2))); 35 expect_error(fdiff(1:3, t = c(1,2,1))); 36 expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))); 37 expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))); 38 expect_error(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))); 39 expect_visible(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))); 40 expect_error(fdiff(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))); 40 expect_error(fdiff(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))); 41 expect_visible(fdiff(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))); 42 expect_error(fdiff(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))); 42 # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fdiff(1:3, t = 1:2)); 43 expect_error(fdiff(1:3, t = 1:4)); 44 expect_error(fdiff(1:3, g = 1:2)); 45 expect_error(fdiff(1:3, g = 1:4)); 46 expect_error(fdiff(1:4, g = c(1,1,2,2), t = c(1,2,1))); 47 expect_error(fdiff(1:4, g = c(1,2,2), t = c(1,2,1,2))); 48 }) # D test_that("D produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(D("a")) expect_error(D(NA_character_)) expect_error(D(mNAc)) expect_visible(D(wlddev)) expect_error(D(mNAc, f)) expect_error(D(x, "1", "2")) # if n*diff equals or exceeds length(x), should give error expect_error(D(x,100)) expect_error(D(x,1,100)) expect_error(D(x,50,2)) expect_error(D(x,20,5)) # if n*diff exceeds average group size, should give error # expect_warning(D(x,11,1,f)) -> Some fail on i386 # expect_warning(D(x,1,11,f)) # expect_warning(D(x,1,11,f,t)) # expect_warning(D(x,11,1,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(D(x,c(1,1))) expect_error(D(x,c(-1,-1))) expect_error(D(x,1,c(1,1))) expect_error(D(x,1,c(-1,-1))) expect_error(D(x,1,2:1)) expect_error(D(x,1,0)) expect_error(D(x,1,-1)) expect_error(D(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(D(x,1,f)) expect_error(D(x,c(1,1),1,f)) expect_error(D(x,c(1,1),1,f,t)) expect_error(D(x,1,c(1,1),f)) expect_error(D(x,1,c(1,1),f,t)) expect_error(D(x,1,2:1,f)) expect_error(D(x,1,2:1,f,t)) expect_error(D(x,1,0,f)) expect_error(D(x,1,0,f,t)) expect_error(D(x,1,-1,f)) expect_error(D(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(D(1:3, t = c(1,1,2))) expect_error(D(1:3, t = c(1,2,2))) expect_error(D(1:3, t = c(1,2,1))) expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(D(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(D(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(D(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(D(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) expect_error(D(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(D(1:3, t = 1:2)) expect_error(D(1:3, t = 1:4)) expect_error(D(1:3, g = 1:2)) expect_error(D(1:3, g = 1:4)) expect_error(D(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(D(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) test_that("D.data.frame method is foolproof", { expect_visible(D(wlddev)) expect_visible(D(wlddev, by = wlddev$iso3c)) expect_error(D(wlddev, t = ~year)) expect_visible(D(wlddev, 1, 1, wlddev$iso3c)) expect_visible(D(wlddev, 1,1, ~iso3c)) expect_error(D(wlddev, 1, ~iso3c)) expect_visible(D(wlddev, 1, 1, ~iso3c + region)) expect_visible(D(wlddev, 1,1, wlddev$iso3c, wlddev$year)) expect_visible(D(wlddev, 1,1, ~iso3c, ~year)) expect_visible(D(wlddev, cols = 9:12)) expect_visible(D(wlddev, 1,1,~iso3c, cols = 9:12)) expect_visible(D(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(D(wlddev, 1,1,~iso3c, ~year, cols = 9:12)) expect_visible(D(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(D(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(D(wlddev, cols = NULL)) expect_error(D(wlddev, 1,1,wlddev$iso3c, cols = NULL)) expect_error(D(wlddev, 1,1,~iso3c, cols = NULL)) expect_error(D(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(D(wlddev, 1,1,~iso3c, ~year, cols = NULL)) expect_error(D(wlddev, cols = 9:14)) expect_error(D(wlddev, 1,1,~iso3c, ~year, cols = 9:14)) expect_error(D(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(D(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_warning(D(wlddev, w = 4)) expect_error(D(wlddev, t = "year")) expect_warning(D(wlddev, g = ~year2)) expect_error(D(wlddev, t = ~year + region)) expect_error(D(wlddev, data)) expect_error(D(wlddev, 1,1,"iso3c")) expect_error(D(wlddev, 1,1,~iso3c2)) expect_error(D(wlddev, 1,1,~iso3c + bla)) expect_error(D(wlddev, 1,1,t = rnorm(30))) expect_warning(D(wlddev, 1,1,g = rnorm(30))) expect_error(D(wlddev, 1,1,mtcars$mpg, 1:29)) expect_error(D(wlddev, 1,1,mtcars$mpg, mtcars$cyl)) # this gives a repeated values error first because length(g) == length(t) expect_error(D(wlddev,1,1, ~iso3c2, ~year2)) expect_error(D(wlddev, cols = ~bla)) expect_visible(D(wlddev, 1,1,wlddev$iso3c, ~year, cols = 9:12)) expect_visible(D(wlddev, 1,1,~iso3c, wlddev$year, cols = 9:12)) expect_error(D(wlddev, 1,1,wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(D(wlddev, 2,1,~iso3c3, ~year, cols = 9:12)) expect_error(D(wlddev, cols = c("PC3GDP","LIFEEX"))) }) # fgrowth test_that("fgrowth performs like basegrowth", { expect_equal(fgrowth(1:10), basegrowth(1:10)) expect_equal(fgrowth(1:10, 2), basegrowth(1:10, 2)) expect_equal(fgrowth(-1:1), basegrowth(-1:1)) expect_equal(fgrowth(x), basegrowth(x)) expect_equal(fgrowth(x, 2), basegrowth(x, 2)) expect_equal(fgrowth(xNA), basegrowth(xNA)) expect_equal(fgrowth(xNA, 2), basegrowth(xNA, 2)) expect_equal(qM(fgrowth(data)), setRownames(fgrowth(m), NULL)) expect_equal(fgrowth(m, stubs = FALSE), dapply(m, basegrowth)) expect_equal(fgrowth(m, 2, stubs = FALSE), dapply(m, basegrowth, 2)) expect_equal(fgrowth(mNA, stubs = FALSE), dapply(mNA, basegrowth)) expect_equal(fgrowth(mNA, 2, stubs = FALSE), dapply(mNA, basegrowth, 2)) expect_equal(fgrowth(data, stubs = FALSE), dapply(data, basegrowth)) expect_equal(fgrowth(data, 2, stubs = FALSE), dapply(data, basegrowth, 2)) expect_equal(fgrowth(dataNA, stubs = FALSE), dapply(dataNA, basegrowth)) expect_equal(fgrowth(dataNA, 2, stubs = FALSE), dapply(dataNA, basegrowth, 2)) expect_equal(fgrowth(x, 1, 1, f), BY(x, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f), BY(x, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f), BY(xNA, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f), BY(xNA, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, stubs = FALSE), BY(m, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, stubs = FALSE), BY(m, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, stubs = FALSE), BY(mNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, stubs = FALSE), BY(mNA, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, stubs = FALSE), BY(data, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, stubs = FALSE), BY(data, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, stubs = FALSE), BY(dataNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, stubs = FALSE), BY(dataNA, g, basegrowth, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-growtherences !! expect_equal(fgrowth(x, 1, 1, f, t), BY(x, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f, t), BY(x, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f, t), BY(xNA, f, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f, t), BY(xNA, f, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, td, stubs = FALSE), BY(m, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, td, stubs = FALSE), BY(m, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, td, stubs = FALSE), BY(mNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, td, stubs = FALSE), BY(mNA, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, td, stubs = FALSE), BY(data, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, td, stubs = FALSE), BY(data, g, basegrowth, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, td, stubs = FALSE), BY(dataNA, g, basegrowth, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, td, stubs = FALSE), BY(dataNA, g, basegrowth, 2, use.g.names = FALSE)) }) test_that("fgrowth performs lagged/leaded and iterated (panel-) vector growth reates without errors", { expect_visible(fgrowth(1:10, -2:2)) expect_visible(fgrowth(1:10, 1:2)) expect_visible(fgrowth(1:10, -1:-2)) expect_visible(fgrowth(1:10, 0)) expect_visible(fgrowth(1:10, -2:2, 2)) expect_visible(fgrowth(1:10, 1:2, 2)) expect_visible(fgrowth(1:10, -1:-2, 2)) expect_visible(fgrowth(1:10, 0, 2)) expect_visible(fgrowth(1:10, -2:2, 1:2)) expect_visible(fgrowth(1:10, 1:2, 1:2)) expect_visible(fgrowth(1:10, -1:-2, 1:2)) expect_visible(fgrowth(1:10, 0, 1:2)) expect_visible(fgrowth(xNA, -2:2)) expect_visible(fgrowth(xNA, 1:2)) expect_visible(fgrowth(xNA, -1:-2)) expect_visible(fgrowth(xNA, 0)) expect_visible(fgrowth(xNA, -2:2, 2)) expect_visible(fgrowth(xNA, 1:2, 2)) expect_visible(fgrowth(xNA, -1:-2, 2)) expect_visible(fgrowth(xNA, 0, 2)) expect_visible(fgrowth(xNA, -2:2, 1:2)) expect_visible(fgrowth(xNA, 1:2, 1:2)) expect_visible(fgrowth(xNA, -1:-2, 1:2)) expect_visible(fgrowth(xNA, 0, 1:2)) expect_visible(fgrowth(xNA, -2:2, 1, f)) expect_visible(fgrowth(xNA, 1:2, 1, f)) expect_visible(fgrowth(xNA, -1:-2, 1, f)) expect_visible(fgrowth(xNA, 0, 1, f)) expect_visible(fgrowth(xNA, -2:2, 2, f)) expect_visible(fgrowth(xNA, 1:2, 2, f)) expect_visible(fgrowth(xNA, -1:-2, 2, f)) expect_visible(fgrowth(xNA, 0, 2, f)) expect_visible(fgrowth(xNA, -2:2, 1:2, f)) expect_visible(fgrowth(xNA, 1:2, 1:2, f)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f)) expect_visible(fgrowth(xNA, 0, 1:2, f)) expect_visible(fgrowth(xNA, -2:2, 1, f, t)) expect_visible(fgrowth(xNA, 1:2, 1, f, t)) expect_visible(fgrowth(xNA, -1:-2, 1, f, t)) expect_visible(fgrowth(xNA, 0, 1, f, t)) expect_visible(fgrowth(xNA, -2:2, 2, f, t)) expect_visible(fgrowth(xNA, 1:2, 2, f, t)) expect_visible(fgrowth(xNA, -1:-2, 2, f, t)) expect_visible(fgrowth(xNA, 0, 2, f, t)) expect_visible(fgrowth(xNA, -2:2, 1:2, f, t)) expect_visible(fgrowth(xNA, 1:2, 1:2, f, t)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f, t)) expect_visible(fgrowth(xNA, 0, 1:2, f, t)) }) test_that("fgrowth performs lagged/leaded and iterated (panel-) matrix growth rates without errors", { expect_visible(fgrowth(m, -2:2)) expect_visible(fgrowth(m, 1:2)) expect_visible(fgrowth(m, -1:-2)) expect_visible(fgrowth(m, 0)) expect_visible(fgrowth(m, -2:2, 2)) expect_visible(fgrowth(m, 1:2, 2)) expect_visible(fgrowth(m, -1:-2, 2)) expect_visible(fgrowth(m, 0, 2)) expect_visible(fgrowth(m, -2:2, 1:2)) expect_visible(fgrowth(m, 1:2, 1:2)) expect_visible(fgrowth(m, -1:-2, 1:2)) expect_visible(fgrowth(m, 0, 1:2)) expect_visible(fgrowth(m, -2:2, 1, g)) expect_visible(fgrowth(m, 1:2, 1, g)) expect_visible(fgrowth(m, -1:-2, 1, g)) expect_visible(fgrowth(m, 0, 1, g)) expect_visible(fgrowth(m, -2:2, 2, g)) expect_visible(fgrowth(m, 1:2, 2, g)) expect_visible(fgrowth(m, -1:-2, 2, g)) expect_visible(fgrowth(m, 0, 2, g)) expect_visible(fgrowth(m, -2:2, 1:2, g)) expect_visible(fgrowth(m, 1:2, 1:2, g)) expect_visible(fgrowth(m, -1:-2, 1:2, g)) expect_visible(fgrowth(m, 0, 1:2, g)) expect_visible(fgrowth(m, -2:2, 1, g, td)) expect_visible(fgrowth(m, 1:2, 1, g, td)) expect_visible(fgrowth(m, -1:-2, 1, g, td)) expect_visible(fgrowth(m, 0, 1, g, td)) expect_visible(fgrowth(m, -2:2, 2, g, td)) expect_visible(fgrowth(m, 1:2, 2, g, td)) expect_visible(fgrowth(m, -1:-2, 2, g, td)) expect_visible(fgrowth(m, 0, 2, g, td)) expect_visible(fgrowth(m, -2:2, 1:2, g, td)) expect_visible(fgrowth(m, 1:2, 1:2, g, td)) expect_visible(fgrowth(m, -1:-2, 1:2, g, td)) expect_visible(fgrowth(m, 0, 1:2, g, td)) }) test_that("fgrowth performs lagged/leaded and iterated (panel-) data.frame growth rates without errors", { expect_visible(fgrowth(data, -2:2)) expect_visible(fgrowth(data, 1:2)) expect_visible(fgrowth(data, -1:-2)) expect_visible(fgrowth(data, 0)) expect_visible(fgrowth(data, -2:2, 2)) expect_visible(fgrowth(data, 1:2, 2)) expect_visible(fgrowth(data, -1:-2, 2)) expect_visible(fgrowth(data, 0, 2)) expect_visible(fgrowth(data, -2:2, 1:2)) expect_visible(fgrowth(data, 1:2, 1:2)) expect_visible(fgrowth(data, -1:-2, 1:2)) expect_visible(fgrowth(data, 0, 1:2)) expect_visible(fgrowth(data, -2:2, 1, g)) expect_visible(fgrowth(data, 1:2, 1, g)) expect_visible(fgrowth(data, -1:-2, 1, g)) expect_visible(fgrowth(data, 0, 1, g)) expect_visible(fgrowth(data, -2:2, 2, g)) expect_visible(fgrowth(data, 1:2, 2, g)) expect_visible(fgrowth(data, -1:-2, 2, g)) expect_visible(fgrowth(data, 0, 2, g)) expect_visible(fgrowth(data, -2:2, 1:2, g)) expect_visible(fgrowth(data, 1:2, 1:2, g)) expect_visible(fgrowth(data, -1:-2, 1:2, g)) expect_visible(fgrowth(data, 0, 1:2, g)) expect_visible(fgrowth(data, -2:2, 1, g, td)) expect_visible(fgrowth(data, 1:2, 1, g, td)) expect_visible(fgrowth(data, -1:-2, 1, g, td)) expect_visible(fgrowth(data, 0, 1, g, td)) expect_visible(fgrowth(data, -2:2, 2, g, td)) expect_visible(fgrowth(data, 1:2, 2, g, td)) expect_visible(fgrowth(data, -1:-2, 2, g, td)) expect_visible(fgrowth(data, 0, 2, g, td)) expect_visible(fgrowth(data, -2:2, 1:2, g, td)) expect_visible(fgrowth(data, 1:2, 1:2, g, td)) expect_visible(fgrowth(data, -1:-2, 1:2, g, td)) expect_visible(fgrowth(data, 0, 1:2, g, td)) }) test_that("fgrowth correctly handles unordered time-series and panel-series computations", { expect_equal(fgrowth(x, -2:2, 1:2, t = 1:100), fgrowth(x, -2:2, 1:2)) expect_equal(fgrowth(xNA, -2:2, 1:2, t = 1:100), fgrowth(xNA, -2:2, 1:2)) expect_equal(fgrowth(m, -2:2, 1:2, t = seq_along(td)), fgrowth(m, -2:2, 1:2)) expect_equal(fgrowth(data, -2:2, 1:2, t = seq_along(td)), fgrowth(data, -2:2, 1:2)) expect_equal(fgrowth(xuo, -2:2, 1:2, t = t2uo)[o,], unclass(fgrowth(x, -2:2, 1:2))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, t = t2uo)[o,], unclass(fgrowth(xNA, -2:2, 1:2))) expect_equal(fgrowth(muo, -2:2, 1:2, t = t2duo)[od,], unclass(fgrowth(m, -2:2, 1:2))) expect_equal(fgrowth(datauo, -2:2, 1:2, t = t2duo)[od,], fgrowth(data, -2:2, 1:2)) expect_equal(fgrowth(xuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fgrowth(x, -2:2, 1:2, f, t))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, fuo, tuo)[o,], unclass(fgrowth(xNA, -2:2, 1:2, f, t))) expect_equal(fgrowth(muo, -2:2, 1:2, guo, tduo)[od,], unclass(fgrowth(m, -2:2, 1:2, g, td))) expect_equal(fgrowth(datauo, -2:2, 1:2, guo, tduo)[od,], fgrowth(data, -2:2, 1:2, g, td)) }) test_that("fgrowth performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, 1, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, -2:2, 1:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, -2:2, 1:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, 1, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, -2:2, 1:2, g), simplify = FALSE))) }) test_that("fgrowth performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(xuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNAuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, 1, 1, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, -2:2, 1:2, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, -2:2, 1:2, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, 1, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, -2:2, 1:2, guo, tduo), simplify = FALSE))) }) test_that("fgrowth handles special values in the right way", { expect_equal(fgrowth(c(1,NA)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(NA,1)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(NaN,1)), c(NA_real_,NaN)) expect_equal(fgrowth(c(1,NaN)), c(NA_real_,NaN)) expect_equal(fgrowth(c(Inf,1)), c(NA,NaN)) expect_equal(fgrowth(c(1,Inf)), c(NA,Inf)) expect_equal(fgrowth(c(Inf,NA)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(NA,Inf)), c(NA_real_,NA_real_)) expect_equal(fgrowth(c(Inf,-Inf)), c(NA,NaN)) expect_equal(fgrowth(c(-Inf,Inf)), c(NA,NaN)) expect_equal(fgrowth(c(Inf,Inf)), c(NA,NaN)) expect_equal(fgrowth(c(TRUE,TRUE)), c(NA_real_,0)) expect_equal(fgrowth(c(TRUE,FALSE)), c(NA_real_,-100)) expect_equal(fgrowth(c(FALSE,TRUE)), c(NA_real_,Inf)) }) test_that("fgrowth produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(fgrowth("a")) expect_error(fgrowth(NA_character_)) expect_error(fgrowth(mNAc)) expect_error(fgrowth(wlddev)) expect_error(fgrowth(mNAc, f)) expect_error(fgrowth(x, "1", "2")) # if n*growth equals or exceeds length(x), should give error expect_error(fgrowth(x,100)) expect_error(fgrowth(x,1,100)) expect_error(fgrowth(x,50,2)) expect_error(fgrowth(x,20,5)) # if n*growth exceeds average group size, should give error # expect_warning(fgrowth(x,11,1,f)) -> some fail on i386 # expect_warning(fgrowth(x,1,11,f)) # expect_warning(fgrowth(x,1,11,f,t)) # expect_warning(fgrowth(x,11,1,f,t)) # passing repeated n-values or non-positive or non-consecutive growth values should give error expect_error(fgrowth(x,c(1,1))) expect_error(fgrowth(x,c(-1,-1))) expect_error(fgrowth(x,1,c(1,1))) expect_error(fgrowth(x,1,c(-1,-1))) expect_error(fgrowth(x,1,2:1)) expect_error(fgrowth(x,1,0)) expect_error(fgrowth(x,1,-1)) expect_error(fgrowth(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(fgrowth(x,1,f)) expect_error(fgrowth(x,c(1,1),1,f)) expect_error(fgrowth(x,c(1,1),1,f,t)) expect_error(fgrowth(x,1,c(1,1),f)) expect_error(fgrowth(x,1,c(1,1),f,t)) expect_error(fgrowth(x,1,2:1,f)) expect_error(fgrowth(x,1,2:1,f,t)) expect_error(fgrowth(x,1,0,f)) expect_error(fgrowth(x,1,0,f,t)) expect_error(fgrowth(x,1,-1,f)) expect_error(fgrowth(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(fgrowth(1:3, t = c(1,1,2))) expect_error(fgrowth(1:3, t = c(1,2,2))) expect_error(fgrowth(1:3, t = c(1,2,1))) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(fgrowth(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fgrowth(1:3, t = 1:2)) expect_error(fgrowth(1:3, t = 1:4)) expect_error(fgrowth(1:3, g = 1:2)) expect_error(fgrowth(1:3, g = 1:4)) expect_error(fgrowth(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(fgrowth(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) # G test_that("G produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(G("a")) expect_error(G(NA_character_)) expect_error(G(mNAc)) expect_visible(G(wlddev)) expect_error(G(mNAc, f)) expect_error(G(x, "1", "2")) # if n*diff equals or exceeds length(x), should give error expect_error(G(x,100)) expect_error(G(x,1,100)) expect_error(G(x,50,2)) expect_error(G(x,20,5)) # if n*diff exceeds average group size, should give error # expect_warning(G(x,11,1,f)) -> Some fail on i386 # expect_warning(G(x,1,11,f)) # expect_warning(G(x,1,11,f,t)) # expect_warning(G(x,11,1,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(G(x,c(1,1))) expect_error(G(x,c(-1,-1))) expect_error(G(x,1,c(1,1))) expect_error(G(x,1,c(-1,-1))) expect_error(G(x,1,2:1)) expect_error(G(x,1,0)) expect_error(G(x,1,-1)) expect_error(G(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(G(x,1,f)) expect_error(G(x,c(1,1),1,f)) expect_error(G(x,c(1,1),1,f,t)) expect_error(G(x,1,c(1,1),f)) expect_error(G(x,1,c(1,1),f,t)) expect_error(G(x,1,2:1,f)) expect_error(G(x,1,2:1,f,t)) expect_error(G(x,1,0,f)) expect_error(G(x,1,0,f,t)) expect_error(G(x,1,-1,f)) expect_error(G(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(G(1:3, t = c(1,1,2))) expect_error(G(1:3, t = c(1,2,2))) expect_error(G(1:3, t = c(1,2,1))) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(G(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(G(1:3, t = 1:2)) expect_error(G(1:3, t = 1:4)) expect_error(G(1:3, g = 1:2)) expect_error(G(1:3, g = 1:4)) expect_error(G(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(G(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) test_that("G.data.frame method is foolproof", { expect_visible(G(wlddev)) expect_visible(G(wlddev, by = wlddev$iso3c)) expect_error(G(wlddev, t = ~year)) expect_visible(G(wlddev, 1, 1, wlddev$iso3c)) expect_visible(G(wlddev, 1,1, ~iso3c)) expect_error(G(wlddev, 1, ~iso3c)) expect_visible(G(wlddev, 1, 1, ~iso3c + region)) expect_visible(G(wlddev, 1,1, wlddev$iso3c, wlddev$year)) expect_visible(G(wlddev, 1,1, ~iso3c, ~year)) expect_visible(G(wlddev, cols = 9:12)) expect_visible(G(wlddev, 1,1,~iso3c, cols = 9:12)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = 9:12)) expect_visible(G(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(G(wlddev, cols = NULL)) expect_error(G(wlddev, 1,1,wlddev$iso3c, cols = NULL)) expect_error(G(wlddev, 1,1,~iso3c, cols = NULL)) expect_error(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = NULL)) expect_error(G(wlddev, cols = 9:14)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = 9:14)) expect_error(G(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_warning(G(wlddev, w = 4)) expect_error(G(wlddev, t = "year")) expect_warning(G(wlddev, g = ~year2)) expect_error(G(wlddev, t = ~year + region)) expect_error(G(wlddev, data)) expect_error(G(wlddev, 1,1,"iso3c")) expect_error(G(wlddev, 1,1,~iso3c2)) expect_error(G(wlddev, 1,1,~iso3c + bla)) expect_error(G(wlddev, 1,1,t = rnorm(30))) expect_warning(G(wlddev, 1,1,g = rnorm(30))) expect_error(G(wlddev, 1,1,mtcars$mpg, 1:29)) expect_error(G(wlddev, 1,1,mtcars$mpg, mtcars$cyl)) # this gives a repeated values error first because length(g) == length(t) expect_error(G(wlddev,1,1, ~iso3c2, ~year2)) expect_error(G(wlddev, cols = ~bla)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, ~year, cols = 9:12)) expect_visible(G(wlddev, 1,1,~iso3c, wlddev$year, cols = 9:12)) expect_error(G(wlddev, 1,1,wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(G(wlddev, 2,1,~iso3c3, ~year, cols = 9:12)) expect_error(G(wlddev, cols = c("PC3GDP","LIFEEX"))) }) # fgrowth with logdiff option: test_that("fgrowth with logdiff performs like baselogdiff", { expect_equal(fgrowth(1:10, logdiff = TRUE), baselogdiff(1:10)) expect_equal(fgrowth(1:10, 2, logdiff = TRUE), baselogdiff(1:10, 2)) # expect_equal(fgrowth(-1:1, logdiff = TRUE), suppressWarnings(baselogdiff(-1:1))) # NaN -Inf mismatch expect_equal(fgrowth(x, logdiff = TRUE), baselogdiff(x)) expect_equal(fgrowth(x, 2, logdiff = TRUE), baselogdiff(x, 2)) expect_equal(fgrowth(xNA, logdiff = TRUE), baselogdiff(xNA)) expect_equal(fgrowth(xNA, 2, logdiff = TRUE), baselogdiff(xNA, 2)) expect_equal(qM(fgrowth(data, logdiff = TRUE)), setRownames(fgrowth(m, logdiff = TRUE), NULL)) expect_equal(fgrowth(m, stubs = FALSE, logdiff = TRUE), dapply(m, baselogdiff)) expect_equal(fgrowth(m, 2, stubs = FALSE, logdiff = TRUE), dapply(m, baselogdiff, 2)) expect_equal(fgrowth(mNA, stubs = FALSE, logdiff = TRUE), dapply(mNA, baselogdiff)) expect_equal(fgrowth(mNA, 2, stubs = FALSE, logdiff = TRUE), dapply(mNA, baselogdiff, 2)) expect_equal(fgrowth(x, 1, 1, f, logdiff = TRUE), BY(x, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f, logdiff = TRUE), BY(x, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f, logdiff = TRUE), BY(xNA, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f, logdiff = TRUE), BY(xNA, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-growtherences !! expect_equal(fgrowth(x, 1, 1, f, t, logdiff = TRUE), BY(x, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(x, 2, 1, f, t, logdiff = TRUE), BY(x, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 1, 1, f, t, logdiff = TRUE), BY(xNA, f, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(xNA, 2, 1, f, t, logdiff = TRUE), BY(xNA, f, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(m, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(m, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(m, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(mNA, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(mNA, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(data, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(data, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(data, g, baselogdiff, 2, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 1, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, use.g.names = FALSE)) expect_equal(fgrowth(dataNA, 2, 1, g, td, stubs = FALSE, logdiff = TRUE), BY(dataNA, g, baselogdiff, 2, use.g.names = FALSE)) }) test_that("fgrowth with logdiff performs lagged/leaded and iterated (panel-) vector growth reates without errors", { expect_visible(fgrowth(1:10, -2:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 0, logdiff = TRUE)) expect_visible(fgrowth(1:10, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(1:10, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1:2, f, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -2:2, 1:2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 1:2, 1:2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, -1:-2, 1:2, f, t, logdiff = TRUE)) expect_visible(fgrowth(xNA, 0, 1:2, f, t, logdiff = TRUE)) }) test_that("fgrowth with logdiff performs lagged/leaded and iterated (panel-) matrix growth rates without errors", { expect_visible(fgrowth(m, -2:2, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(m, 0, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1, g, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -2:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 1:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, -1:-2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(m, 0, 1:2, g, td, logdiff = TRUE)) }) test_that("fgrowth with logdiff performs lagged/leaded and iterated (panel-) data.frame growth rates without errors", { expect_visible(fgrowth(data, -2:2, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, logdiff = TRUE)) expect_visible(fgrowth(data, 0, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 2, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 2, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 2, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 2, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1:2, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1, g, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1:2, g, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -2:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 1:2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, -1:-2, 1:2, g, td, logdiff = TRUE)) expect_visible(fgrowth(data, 0, 1:2, g, td, logdiff = TRUE)) }) test_that("fgrowth with logdiff correctly handles unordered time-series and panel-series computations", { expect_equal(fgrowth(x, -2:2, 1:2, t = 1:100, logdiff = TRUE), fgrowth(x, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(xNA, -2:2, 1:2, t = 1:100, logdiff = TRUE), fgrowth(xNA, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(m, -2:2, 1:2, t = seq_along(td), logdiff = TRUE), fgrowth(m, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(data, -2:2, 1:2, t = seq_along(td), logdiff = TRUE), fgrowth(data, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(xuo, -2:2, 1:2, t = t2uo, logdiff = TRUE)[o,], unclass(fgrowth(x, -2:2, 1:2, logdiff = TRUE))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, t = t2uo, logdiff = TRUE)[o,], unclass(fgrowth(xNA, -2:2, 1:2, logdiff = TRUE))) expect_equal(fgrowth(muo, -2:2, 1:2, t = t2duo, logdiff = TRUE)[od,], unclass(fgrowth(m, -2:2, 1:2, logdiff = TRUE))) expect_equal(fgrowth(datauo, -2:2, 1:2, t = t2duo, logdiff = TRUE)[od,], fgrowth(data, -2:2, 1:2, logdiff = TRUE)) expect_equal(fgrowth(xuo, -2:2, 1:2, fuo, tuo, logdiff = TRUE)[o,], unclass(fgrowth(x, -2:2, 1:2, f, t, logdiff = TRUE))) expect_equal(fgrowth(xNAuo, -2:2, 1:2, fuo, tuo, logdiff = TRUE)[o,], unclass(fgrowth(xNA, -2:2, 1:2, f, t, logdiff = TRUE))) expect_equal(fgrowth(muo, -2:2, 1:2, guo, tduo, logdiff = TRUE)[od,], unclass(fgrowth(m, -2:2, 1:2, g, td, logdiff = TRUE))) expect_equal(fgrowth(datauo, -2:2, 1:2, guo, tduo, logdiff = TRUE)[od,], fgrowth(data, -2:2, 1:2, g, td, logdiff = TRUE)) }) test_that("fgrowth with logdiff performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(x, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, 1, 1, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(x, -2:2, 1:2, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, 1, 1, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNA, -2:2, 1:2, f, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(m, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(mNA, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(data, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, 1, 1, g, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(dataNA, -2:2, 1:2, g, logdiff = TRUE), simplify = FALSE))) }) test_that("fgrowth with logdiff performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, fgrowth(xuo, t = t2uo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xNAuo, t = t2uo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, t = t2duo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, t = t2duo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, 1, 1, fuo, tuo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(xuo, -2:2, 1:2, fuo, tuo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, 1, 1, guo, tduo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(muo, -2:2, 1:2, guo, tduo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, 1, 1, guo, tduo, logdiff = TRUE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fgrowth(datauo, -2:2, 1:2, guo, tduo, logdiff = TRUE), simplify = FALSE))) }) options(warn = -1) test_that("fgrowth with logdiff handles special values in the right way", { expect_equal(fgrowth(c(1,NA), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(NA,1), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(NaN,1), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(1,NaN), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(Inf,1), logdiff = TRUE), c(NA,-Inf)) # ?? expect_equal(fgrowth(c(1,Inf), logdiff = TRUE), c(NA,Inf)) expect_equal(fgrowth(c(Inf,NA), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(NA,Inf), logdiff = TRUE), c(NA_real_,NaN)) expect_equal(fgrowth(c(Inf,-Inf), logdiff = TRUE), c(NA,NaN)) expect_equal(fgrowth(c(-Inf,Inf), logdiff = TRUE), c(NA,NaN)) expect_equal(fgrowth(c(Inf,Inf), logdiff = TRUE), c(NA,NaN)) expect_equal(fgrowth(c(TRUE,TRUE), logdiff = TRUE), c(NA_real_,0)) expect_equal(fgrowth(c(TRUE,FALSE), logdiff = TRUE), c(NA_real_,-Inf)) # ?? expect_equal(fgrowth(c(FALSE,TRUE), logdiff = TRUE), c(NA_real_,Inf)) }) test_that("fgrowth with logdiff produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(fgrowth("a", logdiff = TRUE)) expect_error(fgrowth(NA_character_, logdiff = TRUE)) expect_error(fgrowth(mNAc, logdiff = TRUE)) expect_error(fgrowth(wlddev, logdiff = TRUE)) expect_error(fgrowth(mNAc, f, logdiff = TRUE)) expect_error(fgrowth(x, "1", "2", logdiff = TRUE)) # if n*growth equals or exceeds length(x), should give error expect_error(fgrowth(x,100, logdiff = TRUE)) expect_error(fgrowth(x,1,100, logdiff = TRUE)) expect_error(fgrowth(x,50,2, logdiff = TRUE)) expect_error(fgrowth(x,20,5, logdiff = TRUE)) # if n*growth exceeds average group size, should give error # expect_warning(fgrowth(x,11,1,f, logdiff = TRUE)) -> some fail on i386 # expect_warning(fgrowth(x,1,11,f, logdiff = TRUE)) # expect_warning(fgrowth(x,1,11,f,t, logdiff = TRUE)) # expect_warning(fgrowth(x,11,1,f,t, logdiff = TRUE)) # passing repeated n-values or non-positive or non-consecutive growth values should give error expect_error(fgrowth(x,c(1,1), logdiff = TRUE)) expect_error(fgrowth(x,c(-1,-1), logdiff = TRUE)) expect_error(fgrowth(x,1,c(1,1), logdiff = TRUE)) expect_error(fgrowth(x,1,c(-1,-1), logdiff = TRUE)) expect_error(fgrowth(x,1,2:1, logdiff = TRUE)) expect_error(fgrowth(x,1,0, logdiff = TRUE)) expect_error(fgrowth(x,1,-1, logdiff = TRUE)) expect_error(fgrowth(x,f, logdiff = TRUE)) # common source of error probably is passing the factor in a wrong slot expect_error(fgrowth(x,1,f, logdiff = TRUE)) expect_error(fgrowth(x,c(1,1),1,f, logdiff = TRUE)) expect_error(fgrowth(x,c(1,1),1,f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,c(1,1),f, logdiff = TRUE)) expect_error(fgrowth(x,1,c(1,1),f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,2:1,f, logdiff = TRUE)) expect_error(fgrowth(x,1,2:1,f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,0,f, logdiff = TRUE)) expect_error(fgrowth(x,1,0,f,t, logdiff = TRUE)) expect_error(fgrowth(x,1,-1,f, logdiff = TRUE)) expect_error(fgrowth(x,1,-1,f,t, logdiff = TRUE)) # repeated values or gaps in time-variable should give error expect_error(fgrowth(1:3, t = c(1,1,2), logdiff = TRUE)) expect_error(fgrowth(1:3, t = c(1,2,2), logdiff = TRUE)) expect_error(fgrowth(1:3, t = c(1,2,1), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4), logdiff = TRUE)) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4), logdiff = TRUE)) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_visible(fgrowth(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_error(fgrowth(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(fgrowth(1:3, t = 1:2, logdiff = TRUE)) expect_error(fgrowth(1:3, t = 1:4, logdiff = TRUE)) expect_error(fgrowth(1:3, g = 1:2, logdiff = TRUE)) expect_error(fgrowth(1:3, g = 1:4, logdiff = TRUE)) expect_error(fgrowth(1:4, g = c(1,1,2,2), t = c(1,2,1), logdiff = TRUE)) expect_error(fgrowth(1:4, g = c(1,2,2), t = c(1,2,1,2), logdiff = TRUE)) }) # G with logdiff test_that("G with logdiff produces errors for wrong input", { # wrong type: normally guaranteed by C++ expect_error(G("a", logdiff = TRUE)) expect_error(G(NA_character_, logdiff = TRUE)) expect_error(G(mNAc, logdiff = TRUE)) expect_visible(G(wlddev, logdiff = TRUE)) expect_error(G(mNAc, f, logdiff = TRUE)) expect_error(G(x, "1", "2", logdiff = TRUE)) # if n*diff equals or exceeds length(x), should give error expect_error(G(x,100, logdiff = TRUE)) expect_error(G(x,1,100, logdiff = TRUE)) expect_error(G(x,50,2, logdiff = TRUE)) expect_error(G(x,20,5, logdiff = TRUE)) # if n*diff exceeds average group size, should give error # expect_warning(G(x,11,1,f, logdiff = TRUE)) -> Some fail on i386 # expect_warning(G(x,1,11,f, logdiff = TRUE)) # expect_warning(G(x,1,11,f,t, logdiff = TRUE)) # expect_warning(G(x,11,1,f,t, logdiff = TRUE)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(G(x,c(1,1), logdiff = TRUE)) expect_error(G(x,c(-1,-1), logdiff = TRUE)) expect_error(G(x,1,c(1,1), logdiff = TRUE)) expect_error(G(x,1,c(-1,-1), logdiff = TRUE)) expect_error(G(x,1,2:1, logdiff = TRUE)) expect_error(G(x,1,0, logdiff = TRUE)) expect_error(G(x,1,-1, logdiff = TRUE)) expect_error(G(x,f, logdiff = TRUE)) # common source of error probably is passing the factor in a wrong slot expect_error(G(x,1,f, logdiff = TRUE)) expect_error(G(x,c(1,1),1,f, logdiff = TRUE)) expect_error(G(x,c(1,1),1,f,t, logdiff = TRUE)) expect_error(G(x,1,c(1,1),f, logdiff = TRUE)) expect_error(G(x,1,c(1,1),f,t, logdiff = TRUE)) expect_error(G(x,1,2:1,f, logdiff = TRUE)) expect_error(G(x,1,2:1,f,t, logdiff = TRUE)) expect_error(G(x,1,0,f, logdiff = TRUE)) expect_error(G(x,1,0,f,t, logdiff = TRUE)) expect_error(G(x,1,-1,f, logdiff = TRUE)) expect_error(G(x,1,-1,f,t, logdiff = TRUE)) # repeated values or gaps in time-variable should give error expect_error(G(1:3, t = c(1,1,2), logdiff = TRUE)) expect_error(G(1:3, t = c(1,2,2), logdiff = TRUE)) expect_error(G(1:3, t = c(1,2,1), logdiff = TRUE)) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4), logdiff = TRUE)) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4), logdiff = TRUE)) expect_error(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4), logdiff = TRUE)) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_visible(G(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) expect_error(G(1:10, diff = 2, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4), logdiff = TRUE)) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(G(1:3, t = 1:2, logdiff = TRUE)) expect_error(G(1:3, t = 1:4, logdiff = TRUE)) expect_error(G(1:3, g = 1:2, logdiff = TRUE)) expect_error(G(1:3, g = 1:4, logdiff = TRUE)) expect_error(G(1:4, g = c(1,1,2,2), t = c(1,2,1), logdiff = TRUE)) expect_error(G(1:4, g = c(1,2,2), t = c(1,2,1,2), logdiff = TRUE)) }) test_that("G.data.frame method with logdiff is foolproof", { expect_visible(G(wlddev, logdiff = TRUE)) expect_visible(G(wlddev, by = wlddev$iso3c, logdiff = TRUE)) expect_error(G(wlddev, t = ~year, logdiff = TRUE)) expect_visible(G(wlddev, 1, 1, wlddev$iso3c, logdiff = TRUE)) expect_visible(G(wlddev, 1,1, ~iso3c, logdiff = TRUE)) expect_error(G(wlddev, 1, ~iso3c, logdiff = TRUE)) expect_visible(G(wlddev, 1, 1, ~iso3c + region, logdiff = TRUE)) expect_visible(G(wlddev, 1,1, wlddev$iso3c, wlddev$year, logdiff = TRUE)) expect_visible(G(wlddev, 1,1, ~iso3c, ~year, logdiff = TRUE)) expect_visible(G(wlddev, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"), logdiff = TRUE)) expect_error(G(wlddev, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,wlddev$iso3c, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,wlddev$iso3c, wlddev$year, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = NULL, logdiff = TRUE)) expect_error(G(wlddev, cols = 9:14, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = 9:14, logdiff = TRUE)) expect_error(G(wlddev, cols = c("PCGDP","LIFEEX","bla"), logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"), logdiff = TRUE)) expect_warning(G(wlddev, w = 4, logdiff = TRUE)) expect_error(G(wlddev, t = "year", logdiff = TRUE)) expect_warning(G(wlddev, g = ~year2, logdiff = TRUE)) expect_error(G(wlddev, t = ~year + region, logdiff = TRUE)) expect_error(G(wlddev, data, logdiff = TRUE)) expect_error(G(wlddev, 1,1,"iso3c", logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c2, logdiff = TRUE)) expect_error(G(wlddev, 1,1,~iso3c + bla, logdiff = TRUE)) expect_error(G(wlddev, 1,1,t = rnorm(30), logdiff = TRUE)) expect_warning(G(wlddev, 1,1,g = rnorm(30), logdiff = TRUE)) expect_error(G(wlddev, 1,1,mtcars$mpg, 1:29, logdiff = TRUE)) expect_error(G(wlddev, 1,1,mtcars$mpg, mtcars$cyl, logdiff = TRUE)) # this gives a repeated values error first because length(g) == length(t) expect_error(G(wlddev,1,1, ~iso3c2, ~year2, logdiff = TRUE)) expect_error(G(wlddev, cols = ~bla, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,wlddev$iso3c, ~year, cols = 9:12, logdiff = TRUE)) expect_visible(G(wlddev, 1,1,~iso3c, wlddev$year, cols = 9:12, logdiff = TRUE)) expect_error(G(wlddev, 1,1,wlddev$iso3c, ~year + bla, cols = 9:12, logdiff = TRUE)) expect_error(G(wlddev, 2,1,~iso3c3, ~year, cols = 9:12, logdiff = TRUE)) expect_error(G(wlddev, cols = c("PC3GDP","LIFEEX"), logdiff = TRUE)) }) options(warn = 1) collapse/tests/testthat/test-misc.R0000644000176200001440000005026415202513161017114 0ustar liggesuserscontext("Misc") # rm(list = ls()) set.seed(101) m <- na_insert(qM(mtcars)) F <- getNamespace("collapse")$F test_that("descr, pwcor, pwcov, pwnobs", { expect_visible(descr(wlddev)) expect_equal(lapply(wlddev, descr) %>% get_elem("i") %>% unattrib(), unattrib(descr(wlddev))) expect_visible(as.data.frame(descr(wlddev))) expect_output(print(descr(wlddev))) expect_visible(descr(GGDC10S)) expect_output(print(pwcor(nv(wlddev)))) expect_output(print(pwcor(nv(wlddev), N = TRUE))) expect_output(print(pwcor(nv(wlddev), P = TRUE))) expect_output(print(pwcor(nv(wlddev), N = TRUE, P = TRUE))) expect_output(print(pwcor(nv(wlddev), N = TRUE, P = TRUE, use = "complete.obs"))) expect_visible(pwcor(nv(GGDC10S))) expect_visible(pwcov(nv(wlddev))) expect_output(print(pwcov(nv(wlddev)))) expect_output(print(pwcov(nv(wlddev), N = TRUE))) expect_output(print(pwcov(nv(wlddev), P = TRUE))) expect_output(print(pwcov(nv(wlddev), N = TRUE, P = TRUE))) expect_output(print(pwcov(nv(wlddev), N = TRUE, P = TRUE, use = "complete.obs"))) expect_visible(pwnobs(wlddev)) expect_visible(pwnobs(GGDC10S)) expect_visible(descr(m)) expect_visible(pwcor(m)) expect_visible(pwcov(m)) expect_visible(pwnobs(m)) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { if(identical(Sys.getenv("LOCAL"), "TRUE")) test_that("weighted correlations are correct", { skip_if_not_installed("weights") skip_if_not_installed("cluster") # This is to fool very silly checks on CRAN scanning the code of the tests wtd.cors <- eval(parse(text = paste0("weights", ":", ":", "wtd.cors"))) wtd.cor <- eval(parse(text = paste0("weights", ":", ":", "wtd.cor"))) w <- abs(rnorm(fnrow(wlddev))) cc <- which(!missing_cases(nv(wlddev))) expect_equal(unclass(pwcor(nv(wlddev), w = w)), wtd.cors(nv(wlddev), w = w)) expect_equal(unclass(pwcor(nv(wlddev), w = w)), cov2cor(unclass(pwcov(nv(wlddev), w = w)))) expect_true(all_obj_equal(unclass(pwcor(ss(nv(wlddev), cc), w = w[cc])), cov2cor(unclass(pwcov(ss(nv(wlddev), cc), w = w[cc]))), unclass(pwcor(nv(wlddev), w = w, use = "complete.obs")), wtd.cors(ss(nv(wlddev), cc), w = w[cc]), cov.wt(ss(nv(wlddev), cc), w[cc], cor = TRUE)$cor)) suppressWarnings( expect_true(all_obj_equal(replace_NA(pwcor(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE)$P, 0), replace_NA(pwcov(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE)$P, 0), replace_NA(pwcor(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE, use = "complete.obs")$P, 0), replace_NA(pwcov(ss(nv(wlddev), cc), w = w[cc], P = TRUE, array = FALSE, use = "complete.obs")$P, 0), wtd.cor(ss(nv(wlddev), cc), w = w[cc])$p.value))) expect_true(all_obj_equal(unclass(pwcov(ss(nv(wlddev), cc), w = w[cc])), unclass(pwcov(nv(wlddev), w = w, use = "complete.obs")))) expect_equal(cov.wt(ss(nv(wlddev), cc), w[cc])$cov, unclass(pwcov(nv(wlddev), w = w, use = "complete.obs")), tolerance = 1e-3) }) test_that("na_rm works well", { set.seed(101) expect_equal(sapply(na_insert(wlddev), function(x) vtypes(na_rm(x))), vtypes(wlddev)) expect_equal(sapply(na_insert(wlddev), function(x) vlabels(na_rm(x))), vlabels(wlddev)) expect_equal(sapply(na_insert(wlddev), function(x) vclasses(na_rm(x))), vclasses(wlddev)) wldNA <- na_insert(wlddev) expect_equal(lengths(lapply(wldNA, na_rm)), fnobs(wldNA)) expect_equal(lapply(wldNA, na_rm), lapply(wldNA, function(x) copyMostAttrib(x[!is.na(x)], x))) rm(wldNA) expect_equal(na_rm(list(list(), 1,2,3)), list(1,2,3)) expect_equal(na_rm(list(1,2,NULL,3)), list(1,2,3)) }) } test_that("vlabels works well", { expect_equal(wlddev, setLabels(wlddev, vlabels(wlddev))) }) test_that("adding and removing stubs works", { expect_identical(rm_stub(add_stub(iris, "df"), "df"), iris) expect_identical(rm_stub(add_stub(iris, "df", pre = FALSE), "df", pre = FALSE), iris) expect_identical(rm_stub(add_stub(iris, "df", pre = FALSE), "df", regex = TRUE), iris) expect_identical(rm_stub(names(iris), "Sepal")[1], ".Length") expect_identical(rm_stub(names(iris), "Width", pre = FALSE)[4], "Petal.") expect_identical(rm_stub(names(iris), "Width", regex = TRUE)[4], "Petal.") }) test_that("zoo dispatch works well", { skip_if_not_installed("zoo") tsm <- zoo::as.zoo(EuStockMarkets) set.seed(101) f <- qF(sample.int(5, nrow(tsm), TRUE)) NCOL2 <- function(x) if(length(d <- dim(x)) > 1L) d[2L] else length(x) for(i in setdiff(c(.FAST_FUN, .OPERATOR_FUN), c("fnth","flag","L","F", "fdiff","D","Dlog", "fgrowth","G"))) expect_equal(NCOL2(match.fun(i)(tsm, f)), 4L) expect_equal(NCOL2(fnth(tsm, 0.5, f)), 4L) expect_equal(NCOL2(BY(tsm, f, sum)), 4L) expect_equal(nrow(qsu(tsm)), 4L) for(i in c("flag", "L", "fdiff", "D", "Dlog", "fgrowth", "G")) expect_true(all(is.na(match.fun(i)(tsm)[1L, ]))) }) test_that("units support works well", { v = abs(rnorm(5)) m = abs(matrix(rnorm(25), 5)) g = qF(c(1,1,2,3,3)) attributes(v) <- list(units = structure(list(numerator = "m", denominator = character(0)), class = "symbolic_units"), class = "units") attributes(m) <- list(dim = c(5L, 5L), units = structure(list(numerator = "m", denominator = character(0)), class = "symbolic_units"), class = "units") for (f in setdiff(c(.FAST_FUN, .OPERATOR_FUN), c("fnobs", "fndistinct", "F"))) { # print(f) FUN = match.fun(f) if (!startsWith(f, "fhd") && !startsWith(f, "HD")) { expect_true(inherits(FUN(v), "units")) expect_true(inherits(FUN(m), "units")) } if (f %in% c("fnth","flag","L","fdiff","D","Dlog", "fgrowth","G")) { expect_true(inherits(FUN(v, g = g), "units")) expect_true(inherits(FUN(m, g = g), "units")) } else { expect_true(inherits(FUN(v, g), "units")) expect_true(inherits(FUN(m, g), "units")) } } }) m <- qM(mtcars) v <- mtcars$mpg f <- qF(mtcars$cyl) fcc <- qF(mtcars$cyl, na.exclude = FALSE) g <- GRP(mtcars, ~ cyl) gl <- mtcars["cyl"] gmtc <- fgroup_by(mtcars, cyl) test_that("fast functions give same result using different grouping mechanisms", { for(i in .FAST_STAT_FUN) { # print(i) FUN <- match.fun(i) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl), FUN(v, g = f), FUN(v, g = fcc), FUN(v, g = g), FUN(v, g = gl))) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl, use.g.names = FALSE), FUN(v, g = f, use.g.names = FALSE), FUN(v, g = fcc, use.g.names = FALSE), FUN(v, g = g, use.g.names = FALSE), FUN(v, g = gl, use.g.names = FALSE))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl), FUN(m, g = f), FUN(m, g = fcc), FUN(m, g = g), FUN(m, g = gl))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl, use.g.names = FALSE), FUN(m, g = f, use.g.names = FALSE), FUN(m, g = fcc, use.g.names = FALSE), FUN(m, g = g, use.g.names = FALSE), FUN(m, g = gl, use.g.names = FALSE))) expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl), FUN(mtcars, g = f), FUN(mtcars, g = fcc), FUN(mtcars, g = g), FUN(mtcars, g = gl))) if(Sys.getenv("NCRAN") == "TRUE") expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl, use.g.names = FALSE), FUN(mtcars, g = f, use.g.names = FALSE), FUN(mtcars, g = fcc, use.g.names = FALSE), FUN(mtcars, g = g, use.g.names = FALSE), FUN(mtcars, g = gl, use.g.names = FALSE))) if(Sys.getenv("NCRAN") == "TRUE") expect_true(all_obj_equal(gv(FUN(mtcars, g = mtcars$cyl, use.g.names = FALSE), -2), gv(FUN(gmtc), -1), gv(FUN(gv(gmtc,-2)), -1), FUN(gv(gmtc,-2), keep.group_vars = FALSE), FUN(gmtc, keep.group_vars = FALSE))) expect_equal(FUN(v, TRA = 2L), TRA(v, FUN(v), 2L)) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl, TRA = 1L), TRA(v, FUN(v, g = mtcars$cyl), 1L, mtcars$cyl), FUN(v, g = f, TRA = 1L), TRA(v, FUN(v, g = f), 1L, f), FUN(v, g = fcc, TRA = 1L), TRA(v, FUN(v, g = fcc), 1L, fcc), FUN(v, g = g, TRA = 1L), TRA(v, FUN(v, g = g), 1L, g), FUN(v, g = gl, TRA = 1L), TRA(v, FUN(v, g = gl), 1L, gl))) expect_equal(FUN(m, TRA = 2L), TRA(m, FUN(m), 2L)) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl, TRA = 1L), TRA(m, FUN(m, g = mtcars$cyl), 1L, mtcars$cyl), FUN(m, g = f, TRA = 1L), TRA(m, FUN(m, g = f), 1L, f), FUN(m, g = fcc, TRA = 1L), TRA(m, FUN(m, g = fcc), 1L, fcc), FUN(m, g = g, TRA = 1L), TRA(m, FUN(m, g = g), 1L, g), FUN(m, g = gl, TRA = 1L), TRA(m, FUN(m, g = gl), 1L, gl))) expect_equal(FUN(mtcars, TRA = 2L), TRA(mtcars, FUN(mtcars), 2L)) expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl, TRA = 1L), TRA(mtcars, FUN(mtcars, g = mtcars$cyl), 1L, mtcars$cyl), FUN(mtcars, g = f, TRA = 1L), TRA(mtcars, FUN(mtcars, g = f), 1L, f), FUN(mtcars, g = fcc, TRA = 1L), TRA(mtcars, FUN(mtcars, g = fcc), 1L, fcc), FUN(mtcars, g = g, TRA = 1L), TRA(mtcars, FUN(mtcars, g = g), 1L, g), FUN(mtcars, g = gl, TRA = 1L), TRA(mtcars, FUN(mtcars, g = gl), 1L, gl))) expect_equal(colorder(FUN(gmtc, TRA = 1L), mpg, cyl), TRA(gmtc, FUN(gmtc), 1L)) expect_equal(FUN(fselect(gmtc, -cyl), TRA = 1L), TRA(fselect(gmtc, -cyl), FUN(gmtc, keep.group_vars = FALSE), 1L)) } for(i in setdiff(.FAST_FUN, c(.FAST_STAT_FUN, "fhdbetween", "fhdwithin"))) { FUN <- match.fun(i) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl), FUN(v, g = f), FUN(v, g = fcc), FUN(v, g = g), FUN(v, g = gl))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl), FUN(m, g = f), FUN(m, g = fcc), FUN(m, g = g), FUN(m, g = gl))) expect_true(all_obj_equal(FUN(mtcars, g = mtcars$cyl), FUN(mtcars, g = f), FUN(mtcars, g = fcc), FUN(mtcars, g = g), FUN(mtcars, g = gl))) } for(i in c("STD", "B", "W", "L", "D", "Dlog", "G")) { FUN <- match.fun(i) expect_true(all_obj_equal(FUN(v, g = mtcars$cyl), FUN(v, g = f), FUN(v, g = fcc), FUN(v, g = g), FUN(v, g = gl))) expect_true(all_obj_equal(FUN(m, g = mtcars$cyl), FUN(m, g = f), FUN(m, g = fcc), FUN(m, g = g), FUN(m, g = gl))) expect_true(all_obj_equal(FUN(mtcars, by = mtcars$cyl), FUN(mtcars, by = f), FUN(mtcars, by = fcc), FUN(mtcars, by = g), FUN(mtcars, by = gl))) } }) l <- as.list(mtcars) test_that("list and df methods give same results", { for (i in setdiff(c(.FAST_FUN, .OPERATOR_FUN), c("fhdbetween", "fhdwithin", "HDB", "HDW"))) { FUN <- match.fun(i) expect_equal(unattrib(FUN(mtcars)), unattrib(FUN(l))) } }) w <- mtcars$wt wFUNs <- c("fmean","fmedian","fsum","fprod","fmode","fvar","fsd","fscale","STD","fbetween","B","fwithin","W") test_that("fast functions give appropriate warnings", { for (i in setdiff(c(.FAST_FUN, .OPERATOR_FUN, "qsu"), c("fhdbetween", "fhdwithin", "HDB", "HDW"))) { FUN <- match.fun(i) expect_warning(FUN(v, bla = 1)) expect_warning(FUN(m, bla = 1)) expect_warning(FUN(mtcars, bla = 1)) expect_warning(FUN(gmtc, bla = 1)) if(i %in% wFUNs) { expect_warning(FUN(gmtc, bla = 1)) expect_error(FUN(gmtc, cyl)) # weight same as grouping variable if(i %in% .FAST_STAT_FUN) expect_true(names(FUN(gmtc, wt))[2L] == if(i == "fprod") "prod.wt" else "sum.wt") # weight same as grouping variable } } }) test_that("fselect and fsubset cannot easily be confuesed", { # expect_error(suppressWarnings(fsubset(mtcars, mpg:vs, wt))) expect_error(fselect(mtcars, mpg == 1)) }) test_that("frange works well", { xd <- rnorm(1e5) xdNA <- na_insert(xd) xi <- as.integer(xd*1000) xiNA <- na_insert(xi) expect_equal(frange(xd, na.rm = FALSE), range(xd)) expect_equal(frange(xd), range(xd, na.rm = TRUE)) expect_equal(frange(xdNA, na.rm = FALSE), range(xdNA)) expect_equal(frange(xdNA), range(xdNA, na.rm = TRUE)) expect_equal(frange(xi, na.rm = FALSE), range(xi)) expect_equal(frange(xi), range(xi, na.rm = TRUE)) expect_equal(frange(xiNA, na.rm = FALSE), range(xiNA)) expect_equal(frange(xiNA), range(xiNA, na.rm = TRUE)) expect_identical(frange(numeric(0)), rep(NA_real_, 2L)) expect_identical(frange(integer(0)), rep(NA_integer_, 2L)) }) # TODO: Test other cols options and formula options !!! options(warn = -1) test_that("operator methods column selection since v1.8.1 works as intended", { nnvw <- names(nv(wlddev)) wldi <- colorder(iby(wlddev, iso3c, year), year, pos = "end") wld1i <- colorder(iby(sbt(wlddev, iso3c %==% "DEU"), year), year, pos = "end") nnvg <- names(nv(GGDC10S)) ggdc3i <- findex_by(GGDC10S, Variable, Country, Year, interact.ids = FALSE) ggdc3ii <- findex_by(GGDC10S, Variable, Country, Year) for(op in list(L, F, D, Dlog, G, B, W, STD)) { expect_equal(names(op(wlddev, stub = FALSE)), nnvw) expect_equal(names(op(wlddev, by = ~ iso3c, stub = FALSE)), c("iso3c", nnvw)) expect_equal(names(op(wlddev, by = ~ iso3c, stub = FALSE, keep.by = FALSE, keep.ids = FALSE)), nnvw) expect_equal(names(op(wlddev, by = ~ decade, stub = FALSE)), c("decade", nnvw[nnvw != "decade"])) expect_equal(names(op(wlddev, by = ~ decade, stub = FALSE, keep.by = FALSE, keep.ids = FALSE)), nnvw[nnvw != "decade"]) expect_equal(names(op(wldi, stub = FALSE)), c("iso3c", nnvw)) expect_equal(names(op(wldi, stub = FALSE, keep.by = FALSE, keep.ids = FALSE)), nnvw[nnvw != "year"]) expect_equal(names(op(wld1i, stub = FALSE)), nnvw) expect_equal(names(op(wld1i, stub = FALSE, keep.by = FALSE, keep.ids = FALSE)), nnvw[nnvw != "year"]) expect_equal(names(op(ggdc3i, stub = FALSE)), c("Country", "Variable", nnvg)) expect_equal(names(op(ggdc3i, stub = FALSE, keep.by = FALSE, keep.ids = FALSE)), nnvg[-1L]) expect_equal(names(op(ggdc3ii, stub = FALSE)), c("Country", "Variable", nnvg)) expect_equal(names(op(ggdc3ii, stub = FALSE, keep.by = FALSE, keep.ids = FALSE)), nnvg[-1L]) } wlduo <- colorder(wlddev, year, pos = "end") wld1uo <- sbt(wlduo, iso3c %==% "DEU") for(op in list(L, F, D, Dlog, G)) { expect_equal(names(op(wld1uo, t = ~ year, stubs = FALSE)), nnvw) expect_equal(names(op(wld1uo, t = ~ year, stubs = FALSE, keep.ids = FALSE)), nnvw[-1L]) expect_equal(names(op(wld1uo, by = ~ iso3c, t = ~ year, stubs = FALSE)), c("iso3c", nnvw)) expect_equal(names(op(wld1uo, by = ~ iso3c, t = ~ year, stubs = FALSE, keep.ids = FALSE)), nnvw[-1L]) } for(op in list(B, W, STD)) { expect_equal(names(op(wld1uo, w = ~ year, stub = FALSE)), nnvw) expect_equal(names(op(wld1uo, w = ~ year, stub = FALSE, keep.w = FALSE)), nnvw[-1L]) expect_equal(names(op(wld1uo, by = ~ iso3c, w = ~ year, stub = FALSE)), c("iso3c", nnvw)) expect_equal(names(op(wld1uo, by = ~ iso3c, w = ~ year, stub = FALSE, keep.by = FALSE)), nnvw) expect_equal(names(op(wld1uo, by = ~ iso3c, w = ~ year, stub = FALSE, keep.w = FALSE)), c("iso3c", nnvw[-1L])) expect_equal(names(op(wld1uo, by = ~ iso3c, w = ~ year, stub = FALSE, keep.by = FALSE, keep.w = FALSE)), nnvw[-1L]) expect_equal(names(op(wldi, w = ~POP, stub = FALSE)), c("iso3c", "year", "POP", nnvw[-c(1, 7)])) expect_equal(names(op(wldi, w = ~POP, stub = FALSE, keep.ids = FALSE)), c("POP", nnvw[-c(1, 7)])) expect_equal(names(op(wldi, w = ~POP, stub = FALSE, keep.w = FALSE)), c("iso3c", "year", nnvw[-c(1, 7)])) expect_equal(names(op(wldi, w = ~POP, stub = FALSE, keep.ids = FALSE, keep.w = FALSE)), nnvw[-c(1, 7)]) expect_equal(names(op(wld1i, w = ~POP, stub = FALSE)), c("year", "POP", nnvw[-c(1, 7)])) expect_equal(names(op(wld1i, w = ~POP, stub = FALSE, keep.ids = FALSE)), c("POP", nnvw[-c(1, 7)])) expect_equal(names(op(wld1i, w = ~POP, stub = FALSE, keep.w = FALSE)), c("year", nnvw[-c(1, 7)])) expect_equal(names(op(wld1i, w = ~POP, stub = FALSE, keep.ids = FALSE, keep.w = FALSE)), nnvw[-c(1, 7)]) } for(op in list(HDB, HDW)) { expect_equal(names(op(wlddev, wlddev$iso3c, stub = FALSE)), nnvw) expect_equal(names(op(wlddev, ~ iso3c, stub = FALSE)), nnvw) expect_equal(names(op(wlddev, ~ year, stub = FALSE)), nnvw[-1]) if(identical(Sys.getenv("NCRAN"), "TRUE")) expect_equal(names(op(wldi, stub = FALSE)), nnvw[-1]) } }) options(warn = 1) test_that("all_funs works properly", { expect_identical(all_funs(quote(fmean(vars, na.rm = FALSE))), "fmean") expect_identical(all_funs(quote(fmean(vars, g = col, w = col, na.rm = FALSE))), "fmean") expect_identical(all_funs(quote(fmean(vars, g = col, w = col, na.rm = FALSE)- fmode(var2))), c("-", "fmean", "fmode")) expect_identical(all_funs(quote(q/p)), "/") expect_identical(all_funs(quote(q-p)), "-") expect_identical(all_funs(quote(b-c/i(u))), c("-", "/", "i")) expect_identical(all_funs(quote(i/f(j/p(k/g(h))))), c("/", "f", "/", "p", "/", "g")) expect_identical(all_funs(quote(1-f(1-j/p(1-k/g(h))))), c("-","f", "-", "/", "p", "-", "/", "g")) expect_identical(all_funs(quote(i(u)-b/p(z-u/log(a)))), c("-", "i", "/", "p", "-", "/", "log")) expect_identical(all_funs(quote(sum(x) + fmean(x) * e - 1 / fmedian(z))), c("-", "+", "sum", "*", "fmean", "/", "fmedian")) expect_identical(all_funs(quote(sum(z)/2+4+e+g+h+(p/sum(u))+(q-y))), c("+", "+", "+", "+", "+", "+", "/", "sum", "(", "/", "sum", "(", "-")) expect_identical(all_funs(quote(mean(fmax(min(fmode(mpg))))/fmean(mpg) + e + f + 1 + fsd(hp) + sum(bla) / 20)), c("+", "+", "+", "+", "+", "/", "mean", "fmax", "min", "fmode", "fmean", "fsd", "/", "sum")) }) test_that("fdist works properly", { expect_equal(fdist(m), fdist(mtcars)) expect_equal(fdist(m), fdist(m, method = 1L)) expect_equal(fdist(m, method = "euclidean_squared"), fdist(m, method = 2L)) expect_equal(fdist(m), `attr<-`(dist(m), "call", NULL)) expect_equal(unattrib(fdist(m, method = "euclidean_squared")), unattrib(dist(m))^2) expect_equal(fdist(m, fmean(m)), unattrib(sqrt(colSums((t(m) - fmean(m))^2)))) expect_equal(fdist(m, fmean(m), method = "euclidean_squared"), unattrib(colSums((t(m) - fmean(m))^2))) expect_equal(fdist(m[, 1], m[, 3]), sqrt(sum((m[, 1] - m[, 3])^2))) expect_equal(fdist(m[, 1], m[, 3], method = "euclidean_squared"), sum((m[, 1] - m[, 3])^2)) if(Sys.getenv("OMP") == "TRUE") { oldopts = set_collapse(nthreads = 2) expect_equal(fdist(m), fdist(mtcars)) expect_equal(fdist(m), fdist(m, method = 1L)) expect_equal(fdist(m, method = "euclidean_squared"), fdist(m, method = 2L)) expect_equal(fdist(m), `attr<-`(dist(m), "call", NULL)) expect_equal(unattrib(fdist(m, method = "euclidean_squared")), unattrib(dist(m))^2) expect_equal(fdist(m, fmean(m)), unattrib(sqrt(colSums((t(m) - fmean(m))^2)))) expect_equal(fdist(m, fmean(m), method = "euclidean_squared"), unattrib(colSums((t(m) - fmean(m))^2))) expect_equal(fdist(m[, 1], m[, 3]), sqrt(sum((m[, 1] - m[, 3])^2))) expect_equal(fdist(m[, 1], m[, 3], method = "euclidean_squared"), sum((m[, 1] - m[, 3])^2)) set_collapse(oldopts) } }) test_that("rowbind", { expect_equal(rowbind(mtcars, mtcars), setRownames(rbind(mtcars, mtcars))) expect_equal(rowbind(list(mtcars, mtcars)), setRownames(rbind(mtcars, mtcars))) expect_equal(rowbind(mtcars, mtcars), unlist2d(list(mtcars, mtcars), idcols = FALSE)) expect_equal(rowbind(mtcars, mtcars, idcol = "id"), unlist2d(list(mtcars, mtcars), idcols = "id")) expect_equal(rowbind(mtcars, mtcars, row.names = "car"), unlist2d(list(mtcars, mtcars), idcols = FALSE, row.names = "car")) expect_equal(rowbind(mtcars, mtcars, idcol = "id", row.names = "car"), unlist2d(list(mtcars, mtcars), idcols = "id", row.names = "car")) expect_equal(rowbind(a = mtcars, b = mtcars, idcol = "id"), unlist2d(list(a = mtcars, b = mtcars), idcols = "id", id.factor = TRUE)) expect_equal(rowbind(a = mtcars, b = mtcars, idcol = "id", id.factor = FALSE), unlist2d(list(a = mtcars, b = mtcars), idcols = "id")) }) if (requireNamespace("bit64", quietly = TRUE)) test_that("rowbind + integer64", { # https://github.com/fastverse/collapse/issues/697 x <- data.frame(a = bit64::as.integer64(1)) xi <- data.frame(a = 1L) xd <- data.frame(a = 1) expect_equal(rowbind(x, xi), setRownames(rbind(x, x))) expect_equal(rowbind(x, xd), setRownames(rbind(x, x))) }) collapse/tests/testthat/test-seqid-groupid.R0000644000176200001440000001657114777170131020754 0ustar liggesuserscontext("seqid, groupid") # rm(list = ls()) x <- c(1:10, 1:10) test_that("seqid performas as expected", { expect_identical(unattrib(seqid(x)), rep(1:2, each = 10)) expect_identical(unattrib(seqid(x)), unattrib(seqid(x, na.skip = TRUE))) expect_identical(unattrib(seqid(c(1, NA, 3), na.skip = TRUE)), as.integer(c(1, NA, 2))) expect_identical(unattrib(seqid(c(1, NA, 2), na.skip = TRUE)), as.integer(c(1, NA, 1))) expect_identical(unattrib(seqid(c(1, NA, 3), na.skip = TRUE, skip.seq = TRUE)), as.integer(c(1, NA, 1))) expect_identical(unattrib(seqid(c(1, NA, 2), na.skip = TRUE, skip.seq = TRUE)), as.integer(c(1, NA, 2))) expect_identical(unattrib(seqid(x)), unattrib(seqid(x, na.skip = TRUE))) set.seed(101) xNA <- na_insert(x, prop = 0.15) expect_true(!anyNA(seqid(xNA))) expect_identical(is.na(seqid(xNA, na.skip = TRUE)), is.na(xNA)) xNA2 <- xNA xNA2[c(1,20)] <- NA_integer_ expect_true(!anyNA(seqid(xNA2))) expect_identical(is.na(seqid(xNA2, na.skip = TRUE)), is.na(xNA2)) # Start at 0 expect_equal(seqid(x, start = 0)[1], 0L) expect_equal(seqid(x, na.skip = TRUE, start = 0)[1], 0L) expect_identical(unclass(seqid(x, start = 0)), unclass(seqid(x, na.skip = TRUE, start = 0))) o <- order(rnorm(20)) xuo <- x[o] xNAuo <- xNA[o] xNA2uo <- xNA2[o] o <- order(o) expect_identical(x, xuo[o]) expect_identical(xNA, xNAuo[o]) expect_identical(xNA2, xNA2uo[o]) # seqid(xuo) # seqid(xuo, na.skip = TRUE) # seqid(xNAuo) # seqid(xNAuo, na.skip = TRUE) # seqid(xNA2uo) # seqid(xNA2uo, na.skip = TRUE) expect_identical(seqid(xuo, o)[o], unattrib(seqid(x))) expect_identical(seqid(xuo, o, na.skip = TRUE)[o], unattrib(seqid(x, na.skip = TRUE))) expect_identical(seqid(xNAuo, o)[o], unattrib(seqid(xNA))) expect_identical(seqid(xNAuo, o, na.skip = TRUE)[o], unattrib(seqid(xNA, na.skip = TRUE))) expect_identical(seqid(xNA2uo, o)[o], unattrib(seqid(xNA2))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE)[o], unattrib(seqid(xNA2, na.skip = TRUE))) # Check o expect_identical(seqid(xuo, o, check.o = FALSE)[o], unattrib(seqid(x))) expect_identical(seqid(xuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(seqid(x, na.skip = TRUE))) expect_identical(seqid(xNAuo, o, check.o = FALSE)[o], unattrib(seqid(xNA))) expect_identical(seqid(xNAuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(seqid(xNA, na.skip = TRUE))) expect_identical(seqid(xNA2uo, o, check.o = FALSE)[o], unattrib(seqid(xNA2))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(seqid(xNA2, na.skip = TRUE))) # Start at 0 expect_identical(seqid(xuo, o, start = 0)[o], unattrib(seqid(x, start = 0))) expect_identical(seqid(xuo, o, na.skip = TRUE, start = 0)[o], unattrib(seqid(x, na.skip = TRUE, start = 0))) expect_identical(seqid(xNAuo, o, start = 0)[o], unattrib(seqid(xNA, start = 0))) expect_identical(seqid(xNAuo, o, na.skip = TRUE, start = 0)[o], unattrib(seqid(xNA, na.skip = TRUE, start = 0))) expect_identical(seqid(xNA2uo, o, start = 0)[o], unattrib(seqid(xNA2, start = 0))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE, start = 0)[o], unattrib(seqid(xNA2, na.skip = TRUE, start = 0))) # Check o, start at 0 expect_identical(seqid(xuo, o, check.o = FALSE, start = 0)[o], unattrib(seqid(x, start = 0))) expect_identical(seqid(xuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(seqid(x, na.skip = TRUE, start = 0))) expect_identical(seqid(xNAuo, o, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA, start = 0))) expect_identical(seqid(xNAuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA, na.skip = TRUE, start = 0))) expect_identical(seqid(xNA2uo, o, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA2, start = 0))) expect_identical(seqid(xNA2uo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(seqid(xNA2, na.skip = TRUE, start = 0))) }) # Testing groupid ----------------------- x <- rep(5:6, each = 10) test_that("groupid performas as expected", { # groupid(x) # groupid(x, na.skip = TRUE) set.seed(101) xNA <- na_insert(x, prop = 0.15) # groupid(xNA) # desirable behavior ?? # groupid(xNA, na.skip = TRUE) # -> Yes !! xNA2 <- xNA xNA2[c(1,20)] <- NA_integer_ # groupid(xNA2) # groupid(xNA2, na.skip = TRUE) # This was an issue !! expect_identical(groupid(c(NA,NA,1.343,NA,NA)), groupid(c(NA,NA,1L,NA,NA))) expect_true(allNA(replicate(500, groupid(NA, na.skip = TRUE)))) #335 expect_equal(unattrib(groupid(c(NA, NA), na.skip = TRUE)), c(NA_integer_, NA_integer_)) expect_equal(unattrib(groupid(c(NA, "a"), na.skip = TRUE)), c(NA, 1L)) expect_equal(unattrib(groupid(c(NA, NA, "a"), na.skip = TRUE)), c(NA, NA, 1L)) # Start at 0 # groupid(x, start = 0) # groupid(x, na.skip = TRUE, start = 0) # groupid(xNA, start = 0) # groupid(xNA, na.skip = TRUE, start = 0) # groupid(xNA2, start = 0) # groupid(xNA2, na.skip = TRUE, start = 0) o <- order(rnorm(20)) xuo <- x[o] xNAuo <- xNA[o] xNA2uo <- xNA2[o] o <- order(o) expect_identical(x, xuo[o]) expect_identical(xNA, xNAuo[o]) expect_identical(xNA2, xNA2uo[o]) # groupid(xuo) # groupid(xuo, na.skip = TRUE) # groupid(xNAuo) # groupid(xNAuo, na.skip = TRUE) # groupid(xNA2uo) # groupid(xNA2uo, na.skip = TRUE) expect_identical(groupid(xuo, o)[o], unattrib(groupid(x))) expect_identical(groupid(xuo, o, na.skip = TRUE)[o], unattrib(groupid(x, na.skip = TRUE))) expect_identical(groupid(xNAuo, o)[o], unattrib(groupid(xNA))) expect_identical(groupid(xNAuo, o, na.skip = TRUE)[o], unattrib(groupid(xNA, na.skip = TRUE))) expect_identical(groupid(xNA2uo, o)[o], unattrib(groupid(xNA2))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE)[o], unattrib(groupid(xNA2, na.skip = TRUE))) # Check o expect_identical(groupid(xuo, o, check.o = FALSE)[o], unattrib(groupid(x))) expect_identical(groupid(xuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(groupid(x, na.skip = TRUE))) expect_identical(groupid(xNAuo, o, check.o = FALSE)[o], unattrib(groupid(xNA))) expect_identical(groupid(xNAuo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(groupid(xNA, na.skip = TRUE))) expect_identical(groupid(xNA2uo, o, check.o = FALSE)[o], unattrib(groupid(xNA2))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE, check.o = FALSE)[o], unattrib(groupid(xNA2, na.skip = TRUE))) # Start at 0 expect_identical(groupid(xuo, o, start = 0)[o], unattrib(groupid(x, start = 0))) expect_identical(groupid(xuo, o, na.skip = TRUE, start = 0)[o], unattrib(groupid(x, na.skip = TRUE, start = 0))) expect_identical(groupid(xNAuo, o, start = 0)[o], unattrib(groupid(xNA, start = 0))) expect_identical(groupid(xNAuo, o, na.skip = TRUE, start = 0)[o], unattrib(groupid(xNA, na.skip = TRUE, start = 0))) expect_identical(groupid(xNA2uo, o, start = 0)[o], unattrib(groupid(xNA2, start = 0))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE, start = 0)[o], unattrib(groupid(xNA2, na.skip = TRUE, start = 0))) # Check o, start at 0 expect_identical(groupid(xuo, o, check.o = FALSE, start = 0)[o], unattrib(groupid(x, start = 0))) expect_identical(groupid(xuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(groupid(x, na.skip = TRUE, start = 0))) expect_identical(groupid(xNAuo, o, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA, start = 0))) expect_identical(groupid(xNAuo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA, na.skip = TRUE, start = 0))) expect_identical(groupid(xNA2uo, o, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA2, start = 0))) expect_identical(groupid(xNA2uo, o, na.skip = TRUE, check.o = FALSE, start = 0)[o], unattrib(groupid(xNA2, na.skip = TRUE, start = 0))) }) collapse/tests/testthat/test-quick-conversion.R0000644000176200001440000001314714777170131021473 0ustar liggesuserscontext("quick-conversion") NCRAN <- identical(Sys.getenv("NCRAN"), "TRUE") # rm(list = ls()) set.seed(101) x <- rnorm(10) xNA <- x xNA[c(3,10)] <- NA f <- sample.int(3, 10, TRUE) fNA <- f fNA[c(3,10)] <- NA l1 <- replicate(10, rnorm(10), simplify = FALSE) l2 <- as.list(mtcars) m <- as.matrix(mtcars) m2 <- replicate(10, rnorm(10)) # Test this (plain matrix) # X = sweep(d, 2L, colMeans(qM(d), na.rm = TRUE), "replace_fill") setdfdt <- function(x) { attr(x, "row.names") <- .set_row_names(length(x[[1L]])) class(x) <- c("data.table","data.frame") alc(x) } test_that("conversions to factor run smoothly", { expect_identical(ordered(as.factor(x)), qF(x, ordered = TRUE)) expect_identical(ordered(as.factor(f)), qF(f, ordered = TRUE)) expect_identical(as.integer(as.factor(xNA)), as.integer(qF(xNA, ordered = TRUE))) expect_identical(as.integer(as.factor(fNA)), as.integer(qF(fNA, ordered = TRUE))) expect_identical(as.integer(as.factor(x)), as.integer(qG(x, ordered = TRUE))) expect_identical(as.integer(as.factor(f)), as.integer(qF(f, ordered = TRUE))) expect_identical(as.integer(as.factor(xNA)), as.integer(qG(xNA, ordered = TRUE))) expect_identical(as.integer(qF(fNA, ordered = TRUE)), as.integer(qG(fNA, ordered = TRUE))) }) test_that("conversions to matrix run smoothly", { expect_identical(do.call(cbind, l1), qM(l1)) expect_identical(do.call(cbind, l2), qM(l2)) expect_identical(as.matrix(mtcars), qM(mtcars)) expect_identical(`dimnames<-`(as.matrix(x), list(NULL, "x")), qM(x)) expect_identical(qM(m), m) expect_identical(qM(m2), m2) expect_identical(mtcars, qDF(qM(qDF(mtcars, "car"), "car"))) expect_identical(qM(mtcars), qM(qDF(mtcars, "car"), 1)) expect_identical(mtcars, qDF(qM(qDF(mtcars, "car"), "car", keep.attr = TRUE))) expect_identical(qM(mtcars), qM(qDF(mtcars, "car"), 1, keep.attr = TRUE)) expect_identical(setRownames(qM(GGDC10S, is.character), NULL), as.matrix(num_vars(GGDC10S))) expect_identical(setRownames(qM(GGDC10S, is.character, keep.attr = TRUE), NULL), as.matrix(num_vars(GGDC10S))) }) test_that("conversions to data.frame / data.table run smoothly", { expect_identical(setNames(as.data.frame(l1), paste0("V",1:10)), qDF(l1)) expect_identical(as.data.frame(l2), qDF(l2)) expect_identical(as.data.frame(m), qDF(m)) expect_identical(as.data.frame(m2), qDF(m2)) expect_identical(as.data.frame(x), qDF(x)) expect_identical(qDF(mtcars), mtcars) expect_identical(setdfdt(setNames(as.data.frame(l1), paste0("V",1:10))), qDT(l1)) expect_identical(setdfdt(as.data.frame(l2)), qDT(l2)) expect_identical(setdfdt(as.data.frame(m)), qDT(m)) expect_identical(setdfdt(as.data.frame(m2)), qDT(m2)) expect_identical(setdfdt(as.data.frame(x)), qDT(x)) expect_identical(qDT(mtcars), setdfdt(mtcars)) }) test_that("double-conversions are ok", { expect_identical(qDF(qDT(mtcars)), setRownames(mtcars)) expect_identical(qM(qDT(m)), setRownames(m, NULL)) expect_identical(qM(qDF(m)), m) }) test_that("mrtl and mctl work well", { expect_equal(mctl(m), lapply(seq_col(m), function(i) unattrib(m[, i]))) expect_equal(mctl(m, TRUE), setNames(lapply(seq_col(m), function(i) unattrib(m[, i])), colnames(m))) expect_equal(mctl(m, TRUE, "data.frame"), mtcars) expect_equal(mctl(m, TRUE, "data.table"), qDT(mtcars)) expect_equal(mctl(m, FALSE, "data.frame"), setRownames(setNames(mtcars, paste0("V", seq_col(mtcars))))) expect_equal(mctl(m, FALSE, "data.table"), qDT(setNames(mtcars, paste0("V", seq_col(mtcars))))) expect_equal(mrtl(m), lapply(seq_row(m), function(i) unattrib(m[i, ]))) expect_equal(mrtl(m, TRUE), setNames(lapply(seq_row(m), function(i) unattrib(m[i, ])), rownames(m))) expect_equal(mrtl(m, TRUE, "data.frame"), as.data.frame(t(m))) expect_equal(mrtl(m, TRUE, "data.table"), qDT(as.data.frame(t(m)))) expect_equal(mrtl(m, FALSE, "data.frame"), setRownames(setNames(as.data.frame(t(m)), paste0("V", seq_row(mtcars))))) expect_equal(mrtl(m, FALSE, "data.table"), qDT(setNames(as.data.frame(t(m)), paste0("V", seq_row(mtcars))))) }) test_that("qM keep.attr and class options work as intended", { expect_identical(qM(m), m) expect_identical(qM(m, keep.attr = TRUE), m) expect_identical(qM(m, keep.attr = TRUE, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(m, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(mtcars), m) expect_identical(qM(mtcars, keep.attr = TRUE), m) expect_identical(qM(mtcars, keep.attr = TRUE, class = "matrix"), `oldClass<-`(m, "matrix")) expect_identical(qM(mtcars, class = "matrix"), `oldClass<-`(m, "matrix")) gmtcars <- `attr<-`(fgroup_by(mtcars, cyl, vs, am), "was.tibble", NULL) expect_identical(qM(gmtcars), m) expect_identical(qM(gmtcars, keep.attr = TRUE), `attr<-`(m, "groups", attr(gmtcars, "groups"))) expect_identical(qM(gmtcars, keep.attr = TRUE, class = "matrix"), `oldClass<-`(`attr<-`(m, "groups", attr(gmtcars, "groups")), "matrix")) expect_identical(qM(gmtcars, class = "matrix"), `oldClass<-`(m, "matrix")) if(NCRAN) { expect_identical(qM(EuStockMarkets, keep.attr = TRUE), EuStockMarkets) expect_identical(qM(EuStockMarkets), unclass(`attr<-`(EuStockMarkets, "tsp", NULL))) expect_false(identical(qM(EuStockMarkets), EuStockMarkets)) expect_false(identical(qM(EuStockMarkets, keep.attr = TRUE, class = "matrix"), EuStockMarkets)) tsl <- list(a = AirPassengers, b = AirPassengers) expect_identical(qM(tsl, keep.attr = TRUE), do.call(cbind, tsl)) expect_identical(qM(tsl), unclass(`attr<-`(do.call(cbind, tsl), "tsp", NULL))) expect_false(identical(qM(tsl), do.call(cbind, tsl))) expect_false(identical(qM(tsl, keep.attr = TRUE, class = "matrix"), do.call(cbind, tsl))) } }) collapse/tests/testthat/test-ffirst-flast.R0000644000176200001440000003516215000542453020567 0ustar liggesuserscontext("ffirst and flast") # TODO: Check matrix with list columns !! # Benchmark with groups: Bettr to check missing x ??? # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100 * rnorm(100)) xNA <- x wNA <- w xNA[sample.int(100, 20)] <- NA wNA[sample.int(100, 20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) data <- wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ] l <- nrow(data) g <- GRP(droplevels(data$iso3c)) dataNA <- na_insert(data) m <- as.matrix(data) mNA <- as.matrix(dataNA) data$LC <- as.list(data$PCGDP) dataNA$LC <- lapply(na_insert(data["LC"][[1]]), function(x) if(is.na(x)) NULL else x) basefirst <- function(x, na.rm = FALSE) { if(is.list(x)) return(if(na.rm) x[which(lengths(x) > 0L)[1L]] else x[1L]) if(na.rm) x[which(!is.na(x))[1L]] else x[1L] } baselast <- function(x, na.rm = FALSE) { lst <- function(x) x[length(x)] if(is.list(x)) return(if(na.rm) x[lst(which(lengths(x) > 0L))] else lst(x)) if(na.rm && !all(na <- is.na(x))) x[lst(which(!na))] else lst(x) } # ffirst test_that("ffirst performs like basefirst (defined above)", { expect_equal(ffirst(NA), basefirst(NA)) expect_equal(ffirst(NA, na.rm = FALSE), basefirst(NA)) expect_equal(ffirst(1), basefirst(1, na.rm = TRUE)) expect_equal(ffirst(1:3), basefirst(1:3, na.rm = TRUE)) expect_equal(ffirst(-1:1), basefirst(-1:1, na.rm = TRUE)) expect_equal(ffirst(1, na.rm = FALSE), basefirst(1)) expect_equal(ffirst(1:3, na.rm = FALSE), basefirst(1:3)) expect_equal(ffirst(-1:1, na.rm = FALSE), basefirst(-1:1)) expect_equal(ffirst(x), basefirst(x, na.rm = TRUE)) expect_equal(ffirst(x, na.rm = FALSE), basefirst(x)) expect_equal(ffirst(m[, 1]), basefirst(m[, 1])) expect_equal(ffirst(xNA, na.rm = FALSE), basefirst(xNA)) expect_equal(ffirst(xNA), basefirst(xNA, na.rm = TRUE)) expect_equal(ffirst(mNA[, 1]), basefirst(mNA[, 1], na.rm = TRUE)) expect_equal(ffirst(m), dapply(m, basefirst, na.rm = TRUE)) expect_equal(ffirst(m, na.rm = FALSE), dapply(m, basefirst)) expect_equal(ffirst(mNA, na.rm = FALSE), dapply(mNA, basefirst)) expect_equal(ffirst(mNA), dapply(mNA, basefirst, na.rm = TRUE)) expect_equal(ffirst(data, drop = FALSE), dapply(data, basefirst, na.rm = TRUE, drop = FALSE)) expect_equal(ffirst(data, na.rm = FALSE, drop = FALSE), dapply(data, basefirst, drop = FALSE)) expect_equal(ffirst(dataNA, na.rm = FALSE, drop = FALSE), dapply(dataNA, basefirst, drop = FALSE)) expect_equal(ffirst(dataNA, drop = FALSE), dapply(dataNA, basefirst, na.rm = TRUE, drop = FALSE)) expect_equal(ffirst(x, f), BY(x, f, basefirst, na.rm = TRUE)) expect_equal(ffirst(x, f, na.rm = FALSE), BY(x, f, basefirst)) expect_equal(ffirst(xNA, f, na.rm = FALSE), BY(xNA, f, basefirst)) expect_equal(ffirst(xNA, f), BY(xNA, f, basefirst, na.rm = TRUE)) expect_equal(ffirst(m, na.rm = FALSE), m[1L, ]) expect_equal(ffirst(m, na.rm = FALSE, drop = FALSE), setRownames(m[1L, , drop = FALSE], NULL)) expect_equal(ffirst(m, g), BY(setRownames(m, NULL), g, basefirst, na.rm = TRUE)) expect_equal(ffirst(m, g, na.rm = FALSE), BY(setRownames(m, NULL), g, basefirst)) expect_equal(ffirst(mNA, g, na.rm = FALSE), BY(setRownames(mNA, NULL), g, basefirst)) expect_equal(ffirst(mNA, g), BY(setRownames(mNA, NULL), g, basefirst, na.rm = TRUE)) expect_equal(ffirst(data, na.rm = FALSE, drop = FALSE), setRownames(data[1L, ])) expect_equal(ffirst(data, g, use.g.names = FALSE), BY(data, g, basefirst, na.rm = TRUE, use.g.names = FALSE)) expect_equal(setRownames(ffirst(data, g, na.rm = FALSE)), BY(data, g, basefirst, use.g.names = FALSE)) expect_equal(setRownames(ffirst(dataNA, g, na.rm = FALSE)), BY(dataNA, g, basefirst, use.g.names = FALSE)) expect_equal(ffirst(dataNA, g, use.g.names = FALSE), BY(dataNA, g, basefirst, na.rm = TRUE, use.g.names = FALSE)) }) test_that("ffirst performs numerically stable", { expect_true(all_obj_equal(replicate(50, ffirst(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(data, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, ffirst(dataNA, g), simplify = FALSE))) }) test_that("ffirst handles special values in the right way", { expect_equal(ffirst(NA), NA) expect_equal(ffirst(NaN), NaN) expect_equal(ffirst(Inf), Inf) expect_equal(ffirst(-Inf), -Inf) expect_equal(ffirst(TRUE), TRUE) expect_equal(ffirst(FALSE), FALSE) expect_equal(ffirst(NA, na.rm = FALSE), NA) expect_equal(ffirst(NaN, na.rm = FALSE), NaN) expect_equal(ffirst(Inf, na.rm = FALSE), Inf) expect_equal(ffirst(-Inf, na.rm = FALSE), -Inf) expect_equal(ffirst(TRUE, na.rm = FALSE), TRUE) expect_equal(ffirst(FALSE, na.rm = FALSE), FALSE) expect_equal(ffirst(c(1,NA)), 1) expect_equal(ffirst(c(1,NaN)), 1) expect_equal(ffirst(c(1,Inf)), 1) expect_equal(ffirst(c(1,-Inf)), 1) expect_equal(ffirst(c(FALSE,TRUE)), FALSE) expect_equal(ffirst(c(TRUE,FALSE)), TRUE) expect_equal(ffirst(c(1,Inf), na.rm = FALSE), 1) expect_equal(ffirst(c(1,-Inf), na.rm = FALSE), 1) expect_equal(ffirst(c(FALSE,TRUE), na.rm = FALSE), FALSE) expect_equal(ffirst(c(TRUE,FALSE), na.rm = FALSE), TRUE) }) test_that("ffirst produces errors for wrong input", { expect_visible(ffirst("a")) expect_visible(ffirst(NA_character_)) expect_visible(ffirst(mNA)) expect_error(ffirst(mNA, f)) expect_error(ffirst(1:2,1:3)) expect_error(ffirst(m,1:31)) expect_error(ffirst(data,1:31)) expect_warning(ffirst("a", w = 1)) expect_warning(ffirst(1:2, w = 1:3)) expect_warning(ffirst(NA_character_, w = 1)) expect_warning(ffirst(mNA, w = wdat)) expect_error(ffirst(mNA, f, 2)) expect_warning(ffirst(mNA, w = 1:33)) expect_error(ffirst(1:2,1:2, 1:3)) expect_error(ffirst(m,1:32,1:20)) expect_error(ffirst(data,1:32,1:10)) expect_warning(ffirst(1:2, w = c("a","b"))) expect_visible(ffirst(wlddev)) expect_warning(ffirst(wlddev, w = wlddev$year, drop = FALSE)) expect_visible(ffirst(wlddev, wlddev$iso3c)) expect_error(ffirst(wlddev, wlddev$iso3c, wlddev$year)) }) # flast test_that("flast performs like baselast (defined above)", { expect_equal(flast(NA), baselast(NA)) expect_equal(flast(NA, na.rm = FALSE), baselast(NA)) expect_equal(flast(1), baselast(1, na.rm = TRUE)) expect_equal(flast(1:3), baselast(1:3, na.rm = TRUE)) expect_equal(flast(-1:1), baselast(-1:1, na.rm = TRUE)) expect_equal(flast(1, na.rm = FALSE), baselast(1)) expect_equal(flast(1:3, na.rm = FALSE), baselast(1:3)) expect_equal(flast(-1:1, na.rm = FALSE), baselast(-1:1)) expect_equal(flast(x), baselast(x, na.rm = TRUE)) expect_equal(flast(x, na.rm = FALSE), baselast(x)) expect_equal(flast(m[, 1]), baselast(m[, 1])) expect_equal(flast(xNA, na.rm = FALSE), baselast(xNA)) expect_equal(flast(xNA), baselast(xNA, na.rm = TRUE)) expect_equal(flast(mNA[, 1]), baselast(mNA[, 1], na.rm = TRUE)) expect_equal(flast(m), dapply(m, baselast, na.rm = TRUE)) expect_equal(flast(m, na.rm = FALSE), dapply(m, baselast)) expect_equal(flast(mNA, na.rm = FALSE), dapply(mNA, baselast)) expect_equal(flast(mNA), dapply(mNA, baselast, na.rm = TRUE)) expect_equal(flast(data, drop = FALSE), dapply(data, baselast, na.rm = TRUE, drop = FALSE)) expect_equal(flast(data, na.rm = FALSE, drop = FALSE), dapply(data, baselast, drop = FALSE)) expect_equal(flast(dataNA, na.rm = FALSE, drop = FALSE), dapply(dataNA, baselast, drop = FALSE)) expect_equal(flast(dataNA, drop = FALSE), dapply(dataNA, baselast, na.rm = TRUE, drop = FALSE)) expect_equal(flast(x, f), BY(x, f, baselast, na.rm = TRUE)) expect_equal(flast(x, f, na.rm = FALSE), BY(x, f, baselast)) expect_equal(flast(xNA, f, na.rm = FALSE), BY(xNA, f, baselast)) expect_equal(flast(xNA, f), BY(xNA, f, baselast, na.rm = TRUE)) expect_equal(flast(m, na.rm = FALSE), m[nrow(m), ]) expect_equal(flast(m, na.rm = FALSE, drop = FALSE), setRownames(m[nrow(m), , drop = FALSE], NULL)) expect_equal(flast(m, g), BY(setRownames(m, NULL), g, baselast, na.rm = TRUE)) expect_equal(flast(m, g, na.rm = FALSE), BY(setRownames(m, NULL), g, baselast)) expect_equal(flast(mNA, g, na.rm = FALSE), BY(setRownames(mNA, NULL), g, baselast)) expect_equal(flast(mNA, g), BY(setRownames(mNA, NULL), g, baselast, na.rm = TRUE)) expect_equal(flast(data, na.rm = FALSE, drop = FALSE), setRownames(data[nrow(data), ])) expect_equal(flast(data, g, use.g.names = FALSE), BY(data, g, baselast, na.rm = TRUE, use.g.names = FALSE)) expect_equal(setRownames(flast(data, g, na.rm = FALSE, use.g.names = FALSE)), BY(data, g, baselast, use.g.names = FALSE)) expect_equal(setRownames(flast(dataNA, g, na.rm = FALSE, use.g.names = FALSE)), BY(dataNA, g, baselast, use.g.names = FALSE)) expect_equal(flast(dataNA, g, use.g.names = FALSE), BY(dataNA, g, baselast, na.rm = TRUE, use.g.names = FALSE)) }) test_that("flast performs numerically stable", { expect_true(all_obj_equal(replicate(50, flast(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(data, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flast(dataNA, g), simplify = FALSE))) }) test_that("flast handles special values in the right way", { expect_equal(flast(NA), NA) expect_equal(flast(NaN), NaN) expect_equal(flast(Inf), Inf) expect_equal(flast(-Inf), -Inf) expect_equal(flast(TRUE), TRUE) expect_equal(flast(FALSE), FALSE) expect_equal(flast(NA, na.rm = FALSE), NA) expect_equal(flast(NaN, na.rm = FALSE), NaN) expect_equal(flast(Inf, na.rm = FALSE), Inf) expect_equal(flast(-Inf, na.rm = FALSE), -Inf) expect_equal(flast(TRUE, na.rm = FALSE), TRUE) expect_equal(flast(FALSE, na.rm = FALSE), FALSE) expect_equal(flast(c(1,NA)), 1) expect_equal(flast(c(1,NaN)), 1) expect_equal(flast(c(1,Inf)), Inf) expect_equal(flast(c(1,-Inf)), -Inf) expect_equal(flast(c(FALSE,TRUE)), TRUE) expect_equal(flast(c(TRUE,FALSE)), FALSE) expect_equal(flast(c(1,Inf), na.rm = FALSE), Inf) expect_equal(flast(c(1,-Inf), na.rm = FALSE), -Inf) expect_equal(flast(c(FALSE,TRUE), na.rm = FALSE), TRUE) expect_equal(flast(c(TRUE,FALSE), na.rm = FALSE), FALSE) }) test_that("flast produces errors for wrong input", { expect_visible(flast("a")) expect_visible(flast(NA_character_)) expect_visible(flast(mNA)) expect_error(flast(mNA, f)) expect_error(flast(1:2,1:3)) expect_error(flast(m,1:31)) expect_error(flast(data,1:31)) expect_warning(flast("a", w = 1)) expect_warning(flast(1:2, w = 1:3)) expect_warning(flast(NA_character_, w = 1)) expect_warning(flast(mNA, w = wdat)) expect_error(flast(mNA, f, wdat)) expect_warning(flast(mNA, w = 1:33)) expect_error(flast(1:2,1:2, 1:3)) expect_error(flast(m,1:32,1:20)) expect_error(flast(data,1:32,1:10)) expect_warning(flast(1:2, w = c("a","b"))) expect_visible(flast(wlddev)) expect_warning(flast(wlddev, w = wlddev$year, drop = FALSE)) expect_visible(flast(wlddev, wlddev$iso3c)) expect_error(flast(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-dapply.R0000644000176200001440000000673414777170131017471 0ustar liggesuserscontext("dapply") # rm(list = ls()) test_that("All common uses of dapply can be performed, as per examples", { # data.frame expect_equal(dapply(mtcars, force), mtcars) expect_equal(dapply(`attr<-`(mtcars, "bla", 1), force), `attr<-`(mtcars, "bla", 1)) expect_equal(dapply(`attr<-`(mtcars, "bla", 1), force, MARGIN = 1), `attr<-`(mtcars, "bla", 1)) expect_visible(dapply(mtcars, log)) expect_true(is.matrix(dapply(mtcars, log, return = "matrix"))) # matrix m <- as.matrix(mtcars) expect_equal(dapply(m, force), m) expect_equal(dapply(EuStockMarkets, force), EuStockMarkets) expect_equal(dapply(EuStockMarkets, force, MARGIN = 1), EuStockMarkets) expect_visible(dapply(m, log)) expect_true(is.data.frame(dapply(m, log, return = "data.frame"))) # matrix <> data.frame conversions expect_equal(dapply(mtcars, log, return = "matrix"), dapply(m, log)) expect_equal(dapply(mtcars, log, return = "matrix", MARGIN = 1), dapply(m, log, MARGIN = 1)) expect_equal(dapply(m, log, return = "data.frame"), dapply(mtcars, log)) expect_equal(dapply(m, log, return = "data.frame", MARGIN = 1), dapply(mtcars, log, MARGIN = 1)) expect_equal(dapply(mtcars, quantile, return = "matrix"), dapply(m, quantile)) expect_equal(dapply(mtcars, quantile, return = "matrix", MARGIN = 1), dapply(m, quantile, MARGIN = 1)) expect_equal(dapply(m, quantile, return = "data.frame"), dapply(mtcars, quantile)) expect_equal(dapply(m, quantile, return = "data.frame", MARGIN = 1), dapply(mtcars, quantile, MARGIN = 1)) # scalar function gives atomic vector expect_true(is.atomic(dapply(mtcars, sum))) expect_equal(dapply(m, sum), dapply(mtcars, sum)) expect_true(is.atomic(dapply(mtcars, sum, MARGIN = 1))) expect_equal(dapply(m, sum, MARGIN = 1), dapply(mtcars, sum, MARGIN = 1)) # drop = FALSE retains object structure expect_true(is.data.frame(dapply(mtcars, sum, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, sum, MARGIN = 1, drop = FALSE))) expect_true(is.matrix(dapply(m, sum, drop = FALSE))) expect_true(is.matrix(dapply(m, sum, MARGIN = 1, drop = FALSE))) # matrix <> data.frame conversions without drop dimensions expect_equal(dapply(m, sum, drop = FALSE), dapply(mtcars, sum, return = "matrix", drop = FALSE)) expect_equal(dapply(mtcars, sum, drop = FALSE), dapply(m, sum, return = "data.frame", drop = FALSE)) # ... but if function is vector value, drop = FALSE does nothing expect_true(is.data.frame(dapply(mtcars, log, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, log, MARGIN = 1, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, quantile, drop = FALSE))) expect_true(is.data.frame(dapply(mtcars, quantile, MARGIN = 1, drop = FALSE))) expect_true(is.matrix(dapply(m, log, drop = FALSE))) expect_true(is.matrix(dapply(m, log, MARGIN = 1, drop = FALSE))) expect_true(is.matrix(dapply(m, quantile, drop = FALSE))) expect_true(is.matrix(dapply(m, quantile, MARGIN = 1, drop = FALSE))) # passing additional arguments works: dapply(mtcars, weighted.mean, mtcars$hp, na.rm = TRUE) dapply(m, weighted.mean, mtcars$hp, na.rm = TRUE) }) test_that("dapply produces errors for wrong input", { expect_error(dapply("a", sum)) expect_error(dapply(~ y, sum)) expect_error(dapply(iris3, sum)) expect_error(dapply(mtcars, sum2)) expect_error(dapply(mtcars, sum, MARGIN = 3)) expect_error(dapply(mtcars, sum, MARGIN = 1:2)) expect_error(dapply(mtcars, sum, MARGIN = "a")) expect_error(dapply(mtcars, sum, return = "bla", drop = FALSE)) }) collapse/tests/testthat/test-fbetween-fwithin-B-W.R0000644000176200001440000020677014777170131022032 0ustar liggesuserscontext("fbetween / B and fwithin / W") # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(rep(1:10, each = 10)) g <- as.factor(rep(c(1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,7,7,7,7,7,7,7,10,10,10,10,10,10,10,10,10,10))) mtcNA <- na_insert(mtcars) mtcNA[1,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" # x = rnorm(1e7) # xNA = x # xNA[sample.int(1e7,1e6)] <- NA # w = abs(100*rnorm(1e7)) # wNA = w # wNA[sample.int(1e7,1e6)] <- NA # microbenchmark(fwithin(xNA), fbetween(xNA), fbetween(xNA, w = w), fwithin(xNA, w = w), fbetween(xNA, w = wNA), fwithin(xNA, w = wNA)) # Unit: milliseconds # expr min lq mean median uq max neval cld # fwithin(xNA) 59.89809 61.45215 81.20188 63.21997 65.99563 303.5464 100 a # fbetween(xNA) 71.32829 73.00953 86.06850 74.51227 77.79108 275.6274 100 ab # fbetween(xNA, w = w) 81.95167 84.85050 106.61714 86.65870 90.92104 314.8245 100 cd # fwithin(xNA, w = w) 71.24841 73.72264 88.08572 75.32935 80.46232 279.5597 100 a c # fbetween(xNA, w = wNA) 90.99712 93.71455 107.38818 95.91545 98.16989 328.8951 100 d # fwithin(xNA, w = wNA) 80.13678 83.62511 103.55614 86.22361 93.18352 301.7070 100 bcd bsum <- base::sum between <- function(x, na.rm = FALSE) { if(!na.rm) return(ave(x)) cc <- !is.na(x) x[cc] <- ave(x[cc]) return(x) } within <- function(x, na.rm = FALSE, mean = 0) { if(!na.rm) return(x - ave(x) + mean) cc <- !is.na(x) m <- bsum(x[cc]) / bsum(cc) return(x - m + mean) } # NOTE: This is what fbetween and fwithin currently do: If missing values, compute weighted mean on available obs, and center x using it. But don't insert additional missing values in x for missing weights .. wbetween <- function(x, w, na.rm = FALSE) { if(na.rm) { xcc <- !is.na(x) cc <- xcc & !is.na(w) w <- w[cc] wm <- bsum(w * x[cc]) / bsum(w) x[xcc] <- rep(wm, bsum(xcc)) return(x) } else { wm <- bsum(w * x) / bsum(w) return(rep(wm, length(x))) } } wwithin <- function(x, w, na.rm = FALSE, mean = 0) { if(na.rm) { cc <- complete.cases(x, w) w <- w[cc] wm <- bsum(w * x[cc]) / bsum(w) } else wm <- bsum(w * x) / bsum(w) return(x - wm + mean) } # fbetween test_that("fbetween performs like between", { expect_equal(fbetween(NA), as.double(between(NA))) expect_equal(fbetween(NA, na.rm = FALSE), as.double(between(NA))) expect_equal(fbetween(1), between(1, na.rm = TRUE)) expect_equal(fbetween(1:3), between(1:3, na.rm = TRUE)) expect_equal(fbetween(-1:1), between(-1:1, na.rm = TRUE)) expect_equal(fbetween(1, na.rm = FALSE), between(1)) expect_equal(fbetween(1:3, na.rm = FALSE), between(1:3)) expect_equal(fbetween(-1:1, na.rm = FALSE), between(-1:1)) expect_equal(fbetween(x), between(x, na.rm = TRUE)) expect_equal(fbetween(x, na.rm = FALSE), between(x)) expect_equal(fbetween(xNA, na.rm = FALSE), between(xNA)) expect_equal(fbetween(xNA), between(xNA, na.rm = TRUE)) expect_equal(qM(fbetween(mtcars)), fbetween(m)) expect_equal(fbetween(m), dapply(m, between, na.rm = TRUE)) expect_equal(fbetween(m, na.rm = FALSE), dapply(m, between)) expect_equal(fbetween(mNA, na.rm = FALSE), dapply(mNA, between)) expect_equal(fbetween(mNA), dapply(mNA, between, na.rm = TRUE)) expect_equal(fbetween(mtcars), dapply(mtcars, between, na.rm = TRUE)) expect_equal(fbetween(mtcars, na.rm = FALSE), dapply(mtcars, between)) expect_equal(fbetween(mtcNA, na.rm = FALSE), dapply(mtcNA, between)) expect_equal(fbetween(mtcNA), dapply(mtcNA, between, na.rm = TRUE)) expect_equal(fbetween(x, f), BY(x, f, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fbetween(x, f, na.rm = FALSE), BY(x, f, between, use.g.names = FALSE)) expect_equal(fbetween(xNA, f, na.rm = FALSE), BY(xNA, f, between, use.g.names = FALSE)) expect_equal(fbetween(xNA, f), BY(xNA, f, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fbetween(m, g), BY(m, g, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fbetween(m, g, na.rm = FALSE), BY(m, g, between, use.g.names = FALSE)) expect_equal(fbetween(mNA, g, na.rm = FALSE), BY(mNA, g, between, use.g.names = FALSE)) expect_equal(fbetween(mNA, g), BY(mNA, g, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fbetween(mtcars, g), BY(mtcars, g, between, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fbetween(mtcars, g, na.rm = FALSE), BY(mtcars, g, between, use.g.names = FALSE)) expect_equal(fbetween(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, between, use.g.names = FALSE)) expect_equal(fbetween(mtcNA, g), BY(mtcNA, g, between, na.rm = TRUE, use.g.names = FALSE)) }) test_that("fbetween performs like fbetween with weights all equal", { expect_equal(fbetween(NA), fbetween(NA, w = 0.99999999)) expect_equal(fbetween(NA, na.rm = FALSE), fbetween(NA, w = 2.946, na.rm = FALSE)) expect_equal(fbetween(1), fbetween(1, w = 3)) expect_equal(fbetween(1:3), fbetween(1:3, w = rep(0.999,3))) expect_equal(fbetween(-1:1), fbetween(-1:1, w = rep(4.2,3))) expect_equal(fbetween(1, na.rm = FALSE), fbetween(1, w = 5, na.rm = FALSE)) expect_equal(fbetween(1:3, na.rm = FALSE), fbetween(1:3, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fbetween(-1:1, na.rm = FALSE), fbetween(-1:1, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fbetween(x), fbetween(x, w = rep(1,100))) expect_equal(fbetween(x, na.rm = FALSE), fbetween(x, w = rep(1.44565, 100), na.rm = FALSE)) expect_equal(fbetween(xNA, na.rm = FALSE), fbetween(xNA, w = rep(4.676587, 100), na.rm = FALSE)) expect_equal(fbetween(xNA), fbetween(xNA, w = rep(4.676587, 100))) expect_equal(fbetween(m), fbetween(m, w = rep(6587.3454, 32))) expect_equal(fbetween(m, na.rm = FALSE), fbetween(m, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mNA, na.rm = FALSE), fbetween(mNA, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mNA), fbetween(mNA, w = rep(6587.3454, 32))) expect_equal(fbetween(mtcars), fbetween(mtcars, w = rep(6787.3454, 32))) expect_equal(fbetween(mtcars, na.rm = FALSE), fbetween(mtcars, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mtcNA, na.rm = FALSE), fbetween(mtcNA, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fbetween(mtcNA), fbetween(mtcNA, w = rep(6787.3454, 32))) expect_equal(fbetween(x, f), fbetween(x, f, rep(546.78,100))) expect_equal(fbetween(x, f, na.rm = FALSE), fbetween(x, f, rep(5.88,100), na.rm = FALSE)) expect_equal(fbetween(xNA, f, na.rm = FALSE), fbetween(xNA, f, rep(52.7,100), na.rm = FALSE)) expect_equal(fbetween(xNA, f), fbetween(xNA, f, rep(5997456,100))) expect_equal(fbetween(m, g), fbetween(m, g, rep(546.78,32))) expect_equal(fbetween(m, g, na.rm = FALSE), fbetween(m, g, rep(0.0001,32), na.rm = FALSE)) expect_equal(fbetween(mNA, g, na.rm = FALSE), fbetween(mNA, g, rep(5.7,32), na.rm = FALSE)) expect_equal(fbetween(mNA, g), fbetween(mNA, g, rep(1.1,32))) expect_equal(fbetween(mtcars, g), fbetween(mtcars, g, rep(53,32))) expect_equal(fbetween(mtcars, g, na.rm = FALSE), fbetween(mtcars, g, rep(546.78,32), na.rm = FALSE)) expect_equal(fbetween(mtcNA, g, na.rm = FALSE), fbetween(mtcNA, g, rep(0.999999,32), na.rm = FALSE)) expect_equal(fbetween(mtcNA, g), fbetween(mtcNA, g, rep(999.9999,32))) }) test_that("fbetween with weights performs like wbetween (defined above)", { # complete weights expect_equal(fbetween(NA, w = 1), wbetween(NA, 1)) expect_equal(fbetween(NA, w = 1, na.rm = FALSE), wbetween(NA, 1)) expect_equal(fbetween(1, w = 1), wbetween(1, w = 1)) expect_equal(fbetween(1:3, w = 1:3), wbetween(1:3, 1:3)) expect_equal(fbetween(-1:1, w = 1:3), wbetween(-1:1, 1:3)) expect_equal(fbetween(1, w = 1, na.rm = FALSE), wbetween(1, 1)) expect_equal(fbetween(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wbetween(1:3, c(0.99,3454,1.111))) expect_equal(fbetween(-1:1, w = 1:3, na.rm = FALSE), wbetween(-1:1, 1:3)) expect_equal(fbetween(x, w = w), wbetween(x, w)) expect_equal(fbetween(x, w = w, na.rm = FALSE), wbetween(x, w)) expect_equal(fbetween(xNA, w = w, na.rm = FALSE), wbetween(xNA, w)) expect_equal(fbetween(xNA, w = w), wbetween(xNA, w, na.rm = TRUE)) expect_equal(qM(fbetween(mtcars, w = wdat)), fbetween(m, w = wdat)) expect_equal(fbetween(m, w = wdat), dapply(m, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(m, w = wdat, na.rm = FALSE), dapply(m, wbetween, wdat)) expect_equal(fbetween(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wbetween, wdat)) expect_equal(fbetween(mNA, w = wdat), dapply(mNA, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdat), dapply(mtcars, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wbetween, wdat)) expect_equal(fbetween(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wbetween, wdat)) expect_equal(fbetween(mtcNA, w = wdat), dapply(mtcNA, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(x, f, w), BY(x, f, wbetween, w)) expect_equal(fbetween(x, f, w, na.rm = FALSE), BY(x, f, wbetween, w)) expect_equal(fbetween(xNA, f, w, na.rm = FALSE), BY(xNA, f, wbetween, w)) expect_equal(fbetween(xNA, f, w), BY(xNA, f, wbetween, w, na.rm = TRUE)) expect_equal(fbetween(m, g, wdat), BY(m, g, wbetween, wdat)) expect_equal(fbetween(m, g, wdat, na.rm = FALSE), BY(m, g, wbetween, wdat)) expect_equal(fbetween(mNA, g, wdat, na.rm = FALSE), BY(mNA, g, wbetween, wdat)) expect_equal(fbetween(mNA, g, wdat), BY(mNA, g, wbetween, wdat, na.rm = TRUE)) expect_equal(fbetween(mtcars, g, wdat), BY(mtcars, g, wbetween, wdat)) expect_equal(fbetween(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, g, wbetween, wdat)) expect_equal(fbetween(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, g, wbetween, wdat)) expect_equal(fbetween(mtcNA, g, wdat), BY(mtcNA, g, wbetween, wdat, na.rm = TRUE)) # missing weights expect_equal(fbetween(NA, w = NA), wbetween(NA, NA)) expect_equal(fbetween(NA, w = NA, na.rm = FALSE), wbetween(NA, NA)) expect_equal(fbetween(1, w = NA), wbetween(1, w = NA)) expect_equal(fbetween(1:3, w = c(NA,1:2)), wbetween(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fbetween(-1:1, w = c(NA,1:2)), wbetween(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fbetween(1, w = NA, na.rm = FALSE), wbetween(1, NA)) expect_equal(fbetween(1:3, w = c(NA,1:2), na.rm = FALSE), wbetween(1:3, c(NA,1:2))) expect_equal(fbetween(-1:1, w = c(NA,1:2), na.rm = FALSE), wbetween(-1:1, c(NA,1:2))) expect_equal(fbetween(x, w = wNA), wbetween(x, wNA, na.rm = TRUE)) expect_equal(fbetween(x, w = wNA, na.rm = FALSE), wbetween(x, wNA)) expect_equal(fbetween(xNA, w = wNA, na.rm = FALSE), wbetween(xNA, wNA)) expect_equal(fbetween(xNA, w = wNA), wbetween(xNA, wNA, na.rm = TRUE)) expect_equal(qM(fbetween(mtcars, w = wdatNA)), fbetween(m, w = wdatNA)) expect_equal(fbetween(m, w = wdatNA), dapply(m, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(m, w = wdatNA, na.rm = FALSE), dapply(m, wbetween, wdatNA)) expect_equal(fbetween(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wbetween, wdatNA)) expect_equal(fbetween(mNA, w = wdatNA), dapply(mNA, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdatNA), dapply(mtcars, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, w = wdatNA), dapply(mtcNA, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(x, f, wNA), BY(x, f, wbetween, wNA, na.rm = TRUE)) expect_equal(fbetween(x, f, wNA, na.rm = FALSE), BY(x, f, wbetween, wNA)) expect_equal(fbetween(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wbetween, wNA)) expect_equal(fbetween(xNA, f, wNA), BY(xNA, f, wbetween, wNA, na.rm = TRUE)) expect_equal(fbetween(m, g, wdatNA), BY(m, g, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(m, g, wdatNA, na.rm = FALSE), BY(m, g, wbetween, wdatNA)) expect_equal(fbetween(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, g, wbetween, wdatNA)) expect_equal(fbetween(mNA, g, wdatNA), BY(mNA, g, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, g, wdatNA), BY(mtcars, g, wbetween, wdatNA, na.rm = TRUE)) expect_equal(fbetween(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, g, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, g, wbetween, wdatNA)) expect_equal(fbetween(mtcNA, g, wdatNA), BY(mtcNA, g, wbetween, wdatNA, na.rm = TRUE)) }) test_that("fbetween performs numerically stable", { expect_true(all_obj_equal(replicate(50, fbetween(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g), simplify = FALSE))) }) test_that("fbetween with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fbetween(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fbetween with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fbetween(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fbetween(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fbetween handles special values in the right way", { expect_equal(fbetween(NA), NA_real_) expect_equal(fbetween(NaN), NaN) expect_equal(fbetween(Inf), Inf) expect_equal(fbetween(c(Inf,Inf)), c(Inf,Inf)) expect_equal(fbetween(-Inf), -Inf) expect_equal(fbetween(c(-Inf,-Inf)), c(-Inf,-Inf)) expect_equal(fbetween(TRUE), 1) expect_equal(fbetween(FALSE), 0) expect_equal(fbetween(NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(NaN, na.rm = FALSE), NaN) expect_equal(fbetween(Inf, na.rm = FALSE), Inf) expect_equal(fbetween(c(Inf,Inf), na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(-Inf, na.rm = FALSE), -Inf) expect_equal(fbetween(c(-Inf,-Inf), na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, na.rm = FALSE), 1) expect_equal(fbetween(FALSE, na.rm = FALSE), 0) expect_equal(fbetween(c(1,NA)), c(1,NA_real_)) expect_equal(fbetween(c(1,NaN)), c(1,NaN)) expect_equal(fbetween(c(1,Inf)), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf)), c(-Inf,-Inf)) expect_equal(fbetween(c(1,Inf), na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(c(NA,-Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(FALSE,FALSE), na.rm = FALSE), c(0,0)) expect_equal(fbetween(c(1,NA), na.rm = FALSE), c(NA_real_,NA_real_)) }) test_that("fbetween with weights handles special values in the right way", { expect_equal(fbetween(NA, w = 1), NA_real_) expect_equal(fbetween(NaN, w = 1), NaN) expect_equal(fbetween(Inf, w = 1), Inf) expect_equal(fbetween(c(Inf,Inf), w = 1:2), c(Inf,Inf)) expect_equal(fbetween(-Inf, w = 1), -Inf) expect_equal(fbetween(c(-Inf,-Inf), w = 1:2), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, w = 1), 1) expect_equal(fbetween(FALSE, w = 1), 0) expect_equal(fbetween(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fbetween(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fbetween(Inf, w = 1, na.rm = FALSE), Inf) expect_equal(fbetween(c(Inf,Inf), w = 1:2, na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(-Inf, w = 1, na.rm = FALSE), -Inf) expect_equal(fbetween(c(-Inf,-Inf), w = 1:2, na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, w = 1, na.rm = FALSE), 1) expect_equal(fbetween(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fbetween(c(1,NA), w = 1:2), c(1,NA_real_)) expect_equal(fbetween(c(1,NaN), w = 1:2), c(1,NaN)) expect_equal(fbetween(c(1,Inf), w = 1:2), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), w = 1:2), c(-Inf,-Inf)) expect_equal(fbetween(c(1,Inf), w = 1:2, na.rm = FALSE), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), w = 1:2, na.rm = FALSE), c(-Inf,-Inf)) expect_equal(fbetween(c(NA,-Inf), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(FALSE,FALSE), w = 1:2, na.rm = FALSE), c(0,0)) expect_equal(fbetween(c(1,NA), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,-Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(1,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(NA, w = NA), NA_real_) expect_equal(fbetween(NaN, w = NA), NaN) expect_equal(fbetween(Inf, w = NA), NA_real_) expect_equal(fbetween(c(Inf,Inf), w = c(NA,2)), c(Inf,Inf)) expect_equal(fbetween(-Inf, w = NA), NA_real_) expect_equal(fbetween(c(-Inf,-Inf), w = c(NA,2)), c(-Inf,-Inf)) expect_equal(fbetween(TRUE, w = NA), NA_real_) expect_equal(fbetween(FALSE, w = NA), NA_real_) expect_equal(fbetween(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(c(Inf,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(c(-Inf,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fbetween(c(1,NA), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,NaN), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,Inf), w = c(NA,2)), c(Inf,Inf)) expect_equal(fbetween(c(1,-Inf), w = c(NA,2)), c(-Inf,-Inf)) expect_equal(fbetween(c(1,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(NA,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(FALSE,FALSE), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(c(1,NA), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,-Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fbetween(1:3, w = c(NA,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) }) test_that("fbetween produces errors for wrong input", { expect_error(fbetween("a")) expect_error(fbetween(NA_character_)) expect_error(fbetween(mNAc)) expect_error(fbetween(mNAc, f)) expect_error(fbetween(1:2,1:3)) expect_error(fbetween(m,1:31)) expect_error(fbetween(mtcars,1:31)) expect_error(fbetween(mtcars, w = 1:31)) expect_error(fbetween("a", w = 1)) expect_error(fbetween(1:2, w = 1:3)) expect_error(fbetween(NA_character_, w = 1)) expect_error(fbetween(mNAc, w = wdat)) expect_error(fbetween(mNAc, f, wdat)) expect_error(fbetween(mNA, w = 1:33)) expect_error(fbetween(1:2,1:2, 1:3)) expect_error(fbetween(m,1:32,1:20)) expect_error(fbetween(mtcars,1:32,1:10)) expect_error(fbetween(1:2, w = c("a","b"))) expect_error(fbetween(wlddev)) expect_error(fbetween(wlddev, w = wlddev$year)) expect_error(fbetween(wlddev, wlddev$iso3c)) expect_error(fbetween(wlddev, wlddev$iso3c, wlddev$year)) }) # B test_that("B produces errors for wrong input", { expect_error(B("a")) expect_error(B(NA_character_)) expect_error(B(mNAc)) expect_error(B(mNAc, f)) expect_error(B(1:2,1:3)) expect_error(B(m,1:31)) expect_error(B(mtcars,1:31)) expect_error(B(mtcars, w = 1:31)) expect_error(B("a", w = 1)) expect_error(B(1:2, w = c("a","b"))) expect_error(B(1:2, w = 1:3)) expect_error(B(NA_character_, w = 1)) expect_error(B(mNAc, w = wdat)) expect_error(B(mNAc, f, wdat)) expect_error(B(mNA, w = 1:33)) expect_error(B(mtcNA, w = 1:33)) expect_error(B(1:2,1:2, 1:3)) expect_error(B(m,1:32,1:20)) expect_error(B(mtcars,1:32,1:10)) expect_error(B(1:2, 1:3, 1:2)) expect_error(B(m,1:31,1:32)) expect_error(B(mtcars,1:33,1:32)) }) test_that("B.data.frame method is foolproof", { expect_visible(B(wlddev)) expect_visible(B(wlddev, w = wlddev$year)) expect_visible(B(wlddev, w = ~year)) expect_visible(B(wlddev, wlddev$iso3c)) expect_visible(B(wlddev, ~iso3c)) expect_visible(B(wlddev, ~iso3c + region)) expect_visible(B(wlddev, wlddev$iso3c, wlddev$year)) expect_visible(B(wlddev, ~iso3c, ~year)) expect_visible(B(wlddev, cols = 9:12)) expect_visible(B(wlddev, w = wlddev$year, cols = 9:12)) expect_visible(B(wlddev, w = ~year, cols = 9:12)) expect_visible(B(wlddev, wlddev$iso3c, cols = 9:12)) expect_visible(B(wlddev, ~iso3c, cols = 9:12)) expect_visible(B(wlddev, wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(B(wlddev, ~iso3c, ~year, cols = 9:12)) expect_visible(B(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, w = ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(B(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(B(wlddev, cols = NULL)) expect_error(B(wlddev, w = wlddev$year, cols = NULL)) expect_error(B(wlddev, w = ~year, cols = NULL)) expect_error(B(wlddev, wlddev$iso3c, cols = NULL)) expect_error(B(wlddev, ~iso3c, cols = NULL)) expect_error(B(wlddev, wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(B(wlddev, ~iso3c, ~year, cols = NULL)) expect_error(B(wlddev, cols = 9:14)) expect_error(B(wlddev, w = wlddev$year, cols = 9:14)) expect_error(B(wlddev, w = ~year, cols = 9:14)) expect_error(B(wlddev, wlddev$iso3c, cols = 9:14)) expect_error(B(wlddev, ~iso3c, cols = 9:14)) expect_error(B(wlddev, wlddev$iso3c, wlddev$year, cols = 9:14)) expect_error(B(wlddev, ~iso3c, ~year, cols = 9:14)) expect_error(B(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, w = ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(B(wlddev, w = mtcars)) expect_error(B(wlddev, w = 4)) expect_error(B(wlddev, w = "year")) expect_error(B(wlddev, w = ~year2)) # suppressWarnings(expect_error(B(wlddev, w = ~year + region))) expect_error(B(wlddev, mtcars)) expect_error(B(wlddev, 2)) expect_error(B(wlddev, "iso3c")) expect_error(B(wlddev, ~iso3c2)) expect_error(B(wlddev, ~iso3c + bla)) expect_error(B(wlddev, mtcars$mpg, mtcars$cyl)) expect_error(B(wlddev, 2, 4)) expect_error(B(wlddev, ~iso3c2, ~year2)) expect_error(B(wlddev, cols = ~bla)) expect_error(B(wlddev, w = ~bla, cols = 9:12)) expect_error(B(wlddev, w = 4, cols = 9:12)) expect_error(B(wlddev, w = "year", cols = 9:12)) expect_error(B(wlddev, w = ~yewar, cols = 9:12)) expect_error(B(wlddev, mtcars$mpg, cols = 9:12)) expect_error(B(wlddev, ~iso3c + ss, cols = 9:12)) expect_error(B(wlddev, 2, cols = 9:12)) expect_error(B(wlddev, "iso3c", cols = 9:12)) expect_error(B(wlddev, wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(B(wlddev, ~iso3c3, ~year, cols = 9:12)) expect_error(B(wlddev, cols = c("PC3GDP","LIFEEX"))) }) # fwithin test_that("fwithin performs like within", { expect_equal(fwithin(NA), as.double(within(NA))) expect_equal(fwithin(NA, na.rm = FALSE), as.double(within(NA))) expect_equal(fwithin(1), within(1, na.rm = TRUE)) expect_equal(fwithin(1:3), within(1:3, na.rm = TRUE)) expect_equal(fwithin(-1:1), within(-1:1, na.rm = TRUE)) expect_equal(fwithin(1, na.rm = FALSE), within(1)) expect_equal(fwithin(1:3, na.rm = FALSE), within(1:3)) expect_equal(fwithin(-1:1, na.rm = FALSE), within(-1:1)) expect_equal(fwithin(x), within(x, na.rm = TRUE)) expect_equal(fwithin(x, na.rm = FALSE), within(x)) expect_equal(fwithin(xNA, na.rm = FALSE), within(xNA)) expect_equal(fwithin(xNA), within(xNA, na.rm = TRUE)) expect_equal(qM(fwithin(mtcars)), fwithin(m)) expect_equal(fwithin(m), dapply(m, within, na.rm = TRUE)) expect_equal(fwithin(m, na.rm = FALSE), dapply(m, within)) expect_equal(fwithin(mNA, na.rm = FALSE), dapply(mNA, within)) expect_equal(fwithin(mNA), dapply(mNA, within, na.rm = TRUE)) expect_equal(fwithin(mtcars), dapply(mtcars, within, na.rm = TRUE)) expect_equal(fwithin(mtcars, na.rm = FALSE), dapply(mtcars, within)) expect_equal(fwithin(mtcNA, na.rm = FALSE), dapply(mtcNA, within)) expect_equal(fwithin(mtcNA), dapply(mtcNA, within, na.rm = TRUE)) expect_equal(fwithin(x, f), BY(x, f, within, na.rm = TRUE)) expect_equal(fwithin(x, f, na.rm = FALSE), BY(x, f, within)) expect_equal(fwithin(xNA, f, na.rm = FALSE), BY(xNA, f, within)) expect_equal(fwithin(xNA, f), BY(xNA, f, within, na.rm = TRUE)) expect_equal(fwithin(m, g), BY(m, g, within, na.rm = TRUE)) expect_equal(fwithin(m, g, na.rm = FALSE), BY(m, g, within)) expect_equal(fwithin(mNA, g, na.rm = FALSE), BY(mNA, g, within)) expect_equal(fwithin(mNA, g), BY(mNA, g, within, na.rm = TRUE)) expect_equal(fwithin(mtcars, g), BY(mtcars, g, within, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, na.rm = FALSE), BY(mtcars, g, within)) expect_equal(fwithin(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, within)) expect_equal(fwithin(mtcNA, g), BY(mtcNA, g, within, na.rm = TRUE)) }) test_that("fwithin with custom mean performs like within (defined above)", { expect_equal(fwithin(x, mean = 4.8456), within(x, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, na.rm = FALSE, mean = 4.8456), within(x, mean = 4.8456)) expect_equal(fwithin(xNA, na.rm = FALSE, mean = 4.8456), within(xNA, mean = 4.8456)) expect_equal(fwithin(xNA, mean = 4.8456), within(xNA, na.rm = TRUE, mean = 4.8456)) expect_equal(qM(fwithin(mtcars, mean = 4.8456)), fwithin(m, mean = 4.8456)) expect_equal(fwithin(m, mean = 4.8456), dapply(m, within, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, na.rm = FALSE, mean = 4.8456), dapply(m, within, mean = 4.8456)) expect_equal(fwithin(mNA, na.rm = FALSE, mean = 4.8456), dapply(mNA, within, mean = 4.8456)) expect_equal(fwithin(mNA, mean = 4.8456), dapply(mNA, within, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, mean = 4.8456), BY(x, f, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(x, f, na.rm = FALSE, mean = 4.8456), BY(x, f, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(xNA, f, na.rm = FALSE, mean = 4.8456), BY(xNA, f, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(xNA, f, mean = 4.8456), BY(xNA, f, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(m, g, mean = 4.8456), BY(m, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(m, g, na.rm = FALSE, mean = 4.8456), BY(m, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(mNA, g, na.rm = FALSE, mean = 4.8456), BY(mNA, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(mNA, g, mean = 4.8456), BY(mNA, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, mean = 4.8456), BY(mtcars, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, na.rm = FALSE, mean = 4.8456), BY(mtcars, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, na.rm = FALSE, mean = 4.8456), BY(mtcNA, g, within, use.g.names = FALSE, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, mean = 4.8456), BY(mtcNA, g, within, na.rm = TRUE, use.g.names = FALSE, mean = 4.8456)) }) test_that("Centering on overall mean performs as intended", { expect_equal(fwithin(x, f, mean = "overall.mean"), BY(x, f, within, na.rm = TRUE, use.g.names = FALSE) + ave(x)) expect_equal(fwithin(x, f, na.rm = FALSE, mean = "overall.mean"), BY(x, f, within, use.g.names = FALSE) + ave(x)) # expect_equal(fwithin(xNA, f, na.rm = FALSE, mean = "overall.mean"), BY(xNA, f, within, use.g.names = FALSE) + B(xNA)) # Not the same !! expect_equal(fwithin(xNA, f, mean = "overall.mean"), BY(xNA, f, within, na.rm = TRUE, use.g.names = FALSE) + B(xNA)) expect_equal(fwithin(m, g, mean = "overall.mean"), BY(m, g, within, na.rm = TRUE, use.g.names = FALSE) + B(m)) expect_equal(fwithin(m, g, na.rm = FALSE, mean = "overall.mean"), BY(m, g, within, use.g.names = FALSE) + B(m)) # expect_equal(fwithin(mNA, g, na.rm = FALSE, mean = "overall.mean"), BY(mNA, g, within, use.g.names = FALSE) + B(mNA)) expect_equal(fwithin(mNA, g, mean = "overall.mean"), BY(mNA, g, within, na.rm = TRUE, use.g.names = FALSE) + B(mNA)) expect_equal(fwithin(mtcars, g, mean = "overall.mean"), BY(mtcars, g, within, na.rm = TRUE, use.g.names = FALSE) + B(mtcars)) expect_equal(fwithin(mtcars, g, na.rm = FALSE, mean = "overall.mean"), BY(mtcars, g, within, use.g.names = FALSE) + B(mtcars)) # expect_equal(fwithin(mtcNA, g, na.rm = FALSE, mean = "overall.mean"), BY(mtcNA, g, within, use.g.names = FALSE) + B(mtcNA)) expect_equal(fwithin(mtcNA, g, mean = "overall.mean"), BY(mtcNA, g, within, na.rm = TRUE, use.g.names = FALSE)+ B(mtcNA)) }) test_that("fwithin performs like fwithin with weights all equal", { expect_equal(fwithin(NA), fwithin(NA, w = 0.99999999)) expect_equal(fwithin(NA, na.rm = FALSE), fwithin(NA, w = 2.946, na.rm = FALSE)) expect_equal(fwithin(1), fwithin(1, w = 3)) expect_equal(fwithin(1:3), fwithin(1:3, w = rep(0.999,3))) expect_equal(fwithin(-1:1), fwithin(-1:1, w = rep(4.2,3))) expect_equal(fwithin(1, na.rm = FALSE), fwithin(1, w = 5, na.rm = FALSE)) expect_equal(fwithin(1:3, na.rm = FALSE), fwithin(1:3, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fwithin(-1:1, na.rm = FALSE), fwithin(-1:1, w = rep(1.44565, 3), na.rm = FALSE)) expect_equal(fwithin(x), fwithin(x, w = rep(1,100))) expect_equal(fwithin(x, na.rm = FALSE), fwithin(x, w = rep(1.44565, 100), na.rm = FALSE)) expect_equal(fwithin(xNA, na.rm = FALSE), fwithin(xNA, w = rep(4.676587, 100), na.rm = FALSE)) expect_equal(fwithin(xNA), fwithin(xNA, w = rep(4.676587, 100))) expect_equal(fwithin(m), fwithin(m, w = rep(6587.3454, 32))) expect_equal(fwithin(m, na.rm = FALSE), fwithin(m, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mNA, na.rm = FALSE), fwithin(mNA, w = rep(6587.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mNA), fwithin(mNA, w = rep(6587.3454, 32))) expect_equal(fwithin(mtcars), fwithin(mtcars, w = rep(6787.3454, 32))) expect_equal(fwithin(mtcars, na.rm = FALSE), fwithin(mtcars, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mtcNA, na.rm = FALSE), fwithin(mtcNA, w = rep(6787.3454, 32), na.rm = FALSE)) expect_equal(fwithin(mtcNA), fwithin(mtcNA, w = rep(6787.3454, 32))) expect_equal(fwithin(x, f), fwithin(x, f, rep(546.78,100))) expect_equal(fwithin(x, f, na.rm = FALSE), fwithin(x, f, rep(5.88,100), na.rm = FALSE)) expect_equal(fwithin(xNA, f, na.rm = FALSE), fwithin(xNA, f, rep(52.7,100), na.rm = FALSE)) expect_equal(fwithin(xNA, f), fwithin(xNA, f, rep(5997456,100))) expect_equal(fwithin(m, g), fwithin(m, g, rep(546.78,32))) expect_equal(fwithin(m, g, na.rm = FALSE), fwithin(m, g, rep(0.0001,32), na.rm = FALSE)) expect_equal(fwithin(mNA, g, na.rm = FALSE), fwithin(mNA, g, rep(5.7,32), na.rm = FALSE)) expect_equal(fwithin(mNA, g), fwithin(mNA, g, rep(1.1,32))) expect_equal(fwithin(mtcars, g), fwithin(mtcars, g, rep(53,32))) expect_equal(fwithin(mtcars, g, na.rm = FALSE), fwithin(mtcars, g, rep(546.78,32), na.rm = FALSE)) expect_equal(fwithin(mtcNA, g, na.rm = FALSE), fwithin(mtcNA, g, rep(0.999999,32), na.rm = FALSE)) expect_equal(fwithin(mtcNA, g), fwithin(mtcNA, g, rep(999.9999,32))) }) test_that("fwithin with weights performs like wwithin (defined above)", { # complete weights expect_equal(fwithin(NA, w = 1), wwithin(NA, 1)) expect_equal(fwithin(NA, w = 1, na.rm = FALSE), wwithin(NA, 1)) expect_equal(fwithin(1, w = 1), wwithin(1, w = 1)) expect_equal(fwithin(1:3, w = 1:3), wwithin(1:3, 1:3)) expect_equal(fwithin(-1:1, w = 1:3), wwithin(-1:1, 1:3)) expect_equal(fwithin(1, w = 1, na.rm = FALSE), wwithin(1, 1)) expect_equal(fwithin(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wwithin(1:3, c(0.99,3454,1.111))) expect_equal(fwithin(-1:1, w = 1:3, na.rm = FALSE), wwithin(-1:1, 1:3)) expect_equal(fwithin(x, w = w), wwithin(x, w)) expect_equal(fwithin(x, w = w, na.rm = FALSE), wwithin(x, w)) expect_equal(fwithin(xNA, w = w, na.rm = FALSE), wwithin(xNA, w)) expect_equal(fwithin(xNA, w = w), wwithin(xNA, w, na.rm = TRUE)) expect_equal(qM(fwithin(mtcars, w = wdat)), fwithin(m, w = wdat)) expect_equal(fwithin(m, w = wdat), dapply(m, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(m, w = wdat, na.rm = FALSE), dapply(m, wwithin, wdat)) expect_equal(fwithin(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wwithin, wdat)) expect_equal(fwithin(mNA, w = wdat), dapply(mNA, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdat), dapply(mtcars, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wwithin, wdat)) expect_equal(fwithin(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wwithin, wdat)) expect_equal(fwithin(mtcNA, w = wdat), dapply(mtcNA, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(x, f, w), BY(x, f, wwithin, w)) expect_equal(fwithin(x, f, w, na.rm = FALSE), BY(x, f, wwithin, w)) expect_equal(fwithin(xNA, f, w, na.rm = FALSE), BY(xNA, f, wwithin, w)) expect_equal(fwithin(xNA, f, w), BY(xNA, f, wwithin, w, na.rm = TRUE)) expect_equal(fwithin(m, g, wdat), BY(m, g, wwithin, wdat)) expect_equal(fwithin(m, g, wdat, na.rm = FALSE), BY(m, g, wwithin, wdat)) expect_equal(fwithin(mNA, g, wdat, na.rm = FALSE), BY(mNA, g, wwithin, wdat)) expect_equal(fwithin(mNA, g, wdat), BY(mNA, g, wwithin, wdat, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, wdat), BY(mtcars, g, wwithin, wdat)) expect_equal(fwithin(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, g, wwithin, wdat)) expect_equal(fwithin(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, g, wwithin, wdat)) expect_equal(fwithin(mtcNA, g, wdat), BY(mtcNA, g, wwithin, wdat, na.rm = TRUE)) # missing weights expect_equal(fwithin(NA, w = NA), wwithin(NA, NA)) expect_equal(fwithin(NA, w = NA, na.rm = FALSE), wwithin(NA, NA)) expect_equal(fwithin(1, w = NA), wwithin(1, w = NA)) expect_equal(fwithin(1:3, w = c(NA,1:2)), wwithin(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fwithin(-1:1, w = c(NA,1:2)), wwithin(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fwithin(1, w = NA, na.rm = FALSE), wwithin(1, NA)) expect_equal(fwithin(1:3, w = c(NA,1:2), na.rm = FALSE), wwithin(1:3, c(NA,1:2))) expect_equal(fwithin(-1:1, w = c(NA,1:2), na.rm = FALSE), wwithin(-1:1, c(NA,1:2))) expect_equal(fwithin(x, w = wNA), wwithin(x, wNA, na.rm = TRUE)) expect_equal(fwithin(x, w = wNA, na.rm = FALSE), wwithin(x, wNA)) expect_equal(fwithin(xNA, w = wNA, na.rm = FALSE), wwithin(xNA, wNA)) expect_equal(fwithin(xNA, w = wNA), wwithin(xNA, wNA, na.rm = TRUE)) expect_equal(qM(fwithin(mtcars, w = wdatNA)), fwithin(m, w = wdatNA)) expect_equal(fwithin(m, w = wdatNA), dapply(m, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(m, w = wdatNA, na.rm = FALSE), dapply(m, wwithin, wdatNA)) expect_equal(fwithin(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wwithin, wdatNA)) expect_equal(fwithin(mNA, w = wdatNA), dapply(mNA, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdatNA), dapply(mtcars, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, w = wdatNA), dapply(mtcNA, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(x, f, wNA), BY(x, f, wwithin, wNA, na.rm = TRUE)) expect_equal(fwithin(x, f, wNA, na.rm = FALSE), BY(x, f, wwithin, wNA)) expect_equal(fwithin(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wwithin, wNA)) expect_equal(fwithin(xNA, f, wNA), BY(xNA, f, wwithin, wNA, na.rm = TRUE)) expect_equal(fwithin(m, g, wdatNA), BY(m, g, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(m, g, wdatNA, na.rm = FALSE), BY(m, g, wwithin, wdatNA)) expect_equal(fwithin(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, g, wwithin, wdatNA)) expect_equal(fwithin(mNA, g, wdatNA), BY(mNA, g, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, wdatNA), BY(mtcars, g, wwithin, wdatNA, na.rm = TRUE)) expect_equal(fwithin(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, g, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, g, wwithin, wdatNA)) expect_equal(fwithin(mtcNA, g, wdatNA), BY(mtcNA, g, wwithin, wdatNA, na.rm = TRUE)) }) test_that("fwithin with custom mean and weights performs like wwithin (defined above)", { # complete weights expect_equal(fwithin(x, w = w, mean = 4.8456), wwithin(x, w, mean = 4.8456)) expect_equal(fwithin(x, w = w, na.rm = FALSE, mean = 4.8456), wwithin(x, w, mean = 4.8456)) expect_equal(fwithin(xNA, w = w, na.rm = FALSE, mean = 4.8456), wwithin(xNA, w, mean = 4.8456)) expect_equal(fwithin(xNA, w = w, mean = 4.8456), wwithin(xNA, w, na.rm = TRUE, mean = 4.8456)) expect_equal(qM(fwithin(mtcars, w = wdat, mean = 4.8456)), fwithin(m, w = wdat, mean = 4.8456)) expect_equal(fwithin(m, w = wdat, mean = 4.8456), dapply(m, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(m, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(mNA, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdat, mean = 4.8456), dapply(mNA, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdat, mean = 4.8456), dapply(mtcars, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(mtcars, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdat, na.rm = FALSE, mean = 4.8456), dapply(mtcNA, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdat, mean = 4.8456), dapply(mtcNA, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, w, mean = 4.8456), BY(x, f, wwithin, w, mean = 4.8456)) expect_equal(fwithin(x, f, w, na.rm = FALSE, mean = 4.8456), BY(x, f, wwithin, w, mean = 4.8456)) expect_equal(fwithin(xNA, f, w, na.rm = FALSE, mean = 4.8456), BY(xNA, f, wwithin, w, mean = 4.8456)) expect_equal(fwithin(xNA, f, w, mean = 4.8456), BY(xNA, f, wwithin, w, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, g, wdat, mean = 4.8456), BY(m, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(m, g, wdat, na.rm = FALSE, mean = 4.8456), BY(m, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdat, na.rm = FALSE, mean = 4.8456), BY(mNA, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdat, mean = 4.8456), BY(mNA, g, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdat, mean = 4.8456), BY(mtcars, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdat, na.rm = FALSE, mean = 4.8456), BY(mtcars, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdat, na.rm = FALSE, mean = 4.8456), BY(mtcNA, g, wwithin, wdat, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdat, mean = 4.8456), BY(mtcNA, g, wwithin, wdat, na.rm = TRUE, mean = 4.8456)) # missing weights expect_equal(fwithin(x, w = wNA, mean = 4.8456), wwithin(x, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, w = wNA, na.rm = FALSE, mean = 4.8456), wwithin(x, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, w = wNA, na.rm = FALSE, mean = 4.8456), wwithin(xNA, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, w = wNA, mean = 4.8456), wwithin(xNA, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(qM(fwithin(mtcars, w = wdatNA, mean = 4.8456)), fwithin(m, w = wdatNA, mean = 4.8456)) expect_equal(fwithin(m, w = wdatNA, mean = 4.8456), dapply(m, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(m, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(mNA, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, w = wdatNA, mean = 4.8456), dapply(mNA, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdatNA, mean = 4.8456), dapply(mtcars, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(mtcars, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdatNA, na.rm = FALSE, mean = 4.8456), dapply(mtcNA, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, w = wdatNA, mean = 4.8456), dapply(mtcNA, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, wNA, mean = 4.8456), BY(x, f, wwithin, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(x, f, wNA, na.rm = FALSE, mean = 4.8456), BY(x, f, wwithin, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, f, wNA, na.rm = FALSE, mean = 4.8456), BY(xNA, f, wwithin, wNA, mean = 4.8456)) expect_equal(fwithin(xNA, f, wNA, mean = 4.8456), BY(xNA, f, wwithin, wNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, g, wdatNA, mean = 4.8456), BY(m, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(m, g, wdatNA, na.rm = FALSE, mean = 4.8456), BY(m, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdatNA, na.rm = FALSE, mean = 4.8456), BY(mNA, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mNA, g, wdatNA, mean = 4.8456), BY(mNA, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdatNA, mean = 4.8456), BY(mtcars, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) expect_equal(fwithin(mtcars, g, wdatNA, na.rm = FALSE, mean = 4.8456), BY(mtcars, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdatNA, na.rm = FALSE, mean = 4.8456), BY(mtcNA, g, wwithin, wdatNA, mean = 4.8456)) expect_equal(fwithin(mtcNA, g, wdatNA, mean = 4.8456), BY(mtcNA, g, wwithin, wdatNA, na.rm = TRUE, mean = 4.8456)) }) test_that("Weighted centering on overall weighted mean performs as intended", { # complete weights expect_equal(fwithin(x, f, w, mean = "overall.mean"), as.numeric(mapply(wwithin, split(x, f), split(w, f), na.rm = TRUE)) + B(x, w = w)) expect_equal(fwithin(x, f, w, na.rm = FALSE, mean = "overall.mean"), as.numeric(mapply(wwithin, split(x, f), split(w, f))) + B(x, w = w)) # expect_equal(fwithin(xNA, f, w, na.rm = FALSE, mean = "overall.mean"), as.numeric(mapply(wwithin, split(xNA, f), split(w, f))) + B(xNA, w = w)) # Not the same !! expect_equal(fwithin(xNA, f, w, mean = "overall.mean"), as.numeric(mapply(wwithin, split(xNA, f), split(w, f), na.rm = TRUE)) + B(xNA, w = w)) }) # Do more than this to test the rest ... test_that("fwithin performs numerically stable", { expect_true(all_obj_equal(replicate(50, fwithin(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g), simplify = FALSE))) }) test_that("fwithin with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fwithin(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fwithin with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fwithin(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fwithin(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fwithin handles special values in the right way", { expect_equal(fwithin(NA), NA_real_) expect_equal(fwithin(NaN), NaN) expect_equal(fwithin(Inf), NaN) expect_equal(fwithin(c(Inf,Inf)), c(NaN,NaN)) expect_equal(fwithin(-Inf), NaN) expect_equal(fwithin(c(-Inf,-Inf)), c(NaN,NaN)) expect_equal(fwithin(TRUE), 0) expect_equal(fwithin(FALSE), 0) expect_equal(fwithin(NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(NaN, na.rm = FALSE), NaN) expect_equal(fwithin(Inf, na.rm = FALSE), NaN) expect_equal(fwithin(c(Inf,Inf), na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(-Inf, na.rm = FALSE), NaN) expect_equal(fwithin(c(-Inf,-Inf), na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(TRUE, na.rm = FALSE), 0) expect_equal(fwithin(FALSE, na.rm = FALSE), 0) expect_equal(fwithin(c(1,NA)), c(0,NA_real_)) expect_equal(fwithin(c(1,NaN)), c(0,NaN)) expect_equal(fwithin(c(1,Inf)), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf)), c(Inf,NaN)) expect_equal(fwithin(c(1,Inf), na.rm = FALSE), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), na.rm = FALSE), c(Inf,NaN)) expect_equal(fwithin(c(NA,-Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(TRUE,TRUE), na.rm = FALSE), c(0,0)) expect_equal(fwithin(c(1,NA), na.rm = FALSE), c(NA_real_,NA_real_)) }) test_that("fwithin with weights handles special values in the right way", { expect_equal(fwithin(NA, w = 1), NA_real_) expect_equal(fwithin(NaN, w = 1), NaN) expect_equal(fwithin(Inf, w = 1), NaN) expect_equal(fwithin(c(Inf,Inf), w = 1:2), c(NaN,NaN)) expect_equal(fwithin(-Inf, w = 1), NaN) expect_equal(fwithin(c(-Inf,-Inf), w = 1:2), c(NaN,NaN)) expect_equal(fwithin(TRUE, w = 1), 0) expect_equal(fwithin(FALSE, w = 1), 0) expect_equal(fwithin(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fwithin(NaN, w = 1, na.rm = FALSE), NaN) expect_equal(fwithin(Inf, w = 1, na.rm = FALSE), NaN) expect_equal(fwithin(c(Inf,Inf), w = 1:2, na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(-Inf, w = 1, na.rm = FALSE), NaN) expect_equal(fwithin(c(-Inf,-Inf), w = 1:2, na.rm = FALSE), c(NaN,NaN)) expect_equal(fwithin(TRUE, w = 1, na.rm = FALSE), 0) expect_equal(fwithin(FALSE, w = 1, na.rm = FALSE), 0) expect_equal(fwithin(c(1,NA), w = 1:2), c(0,NA)) expect_equal(fwithin(c(1,NaN), w = 1:2), c(0,NaN)) expect_equal(fwithin(c(1,Inf), w = 1:2), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), w = 1:2), c(Inf,NaN)) expect_equal(fwithin(c(1,Inf), w = 1:2, na.rm = FALSE), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), w = 1:2, na.rm = FALSE), c(Inf,NaN)) expect_equal(fwithin(c(NA,-Inf), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(FALSE,FALSE), w = 1:2, na.rm = FALSE), c(0,0)) expect_equal(fwithin(c(1,NA), w = 1:2, na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(1:3, w = c(1,Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(1,-Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(1,Inf,3), na.rm = FALSE), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(1,-Inf,3), na.rm = FALSE), c(NaN,NaN,NaN)) expect_equal(fwithin(NA, w = NA), NA_real_) expect_equal(fwithin(NaN, w = NA), NaN) expect_equal(fwithin(Inf, w = NA), NaN) expect_equal(fwithin(c(Inf,Inf), w = c(NA,2)), c(NaN,NaN)) expect_equal(fwithin(-Inf, w = NA), NA_real_) expect_equal(fwithin(c(-Inf,-Inf), w = c(NA,2)), c(NaN,NaN)) expect_equal(fwithin(TRUE, w = NA), NA_real_) expect_equal(fwithin(FALSE, w = NA), NA_real_) expect_equal(fwithin(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(c(Inf,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(c(-Inf,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fwithin(c(1,NA), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,NaN), w = c(NA,2)), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,Inf), w = c(NA,2)), c(-Inf,NaN)) expect_equal(fwithin(c(1,-Inf), w = c(NA,2)), c(Inf,NaN)) expect_equal(fwithin(c(1,Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(NA,-Inf), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(FALSE,FALSE), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(c(1,NA), w = c(NA,2), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fwithin(1:3, w = c(NA,Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(NA,-Inf,3)), c(NaN,NaN,NaN)) expect_equal(fwithin(1:3, w = c(NA,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fwithin(1:3, w = c(NA,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) }) test_that("fwithin produces errors for wrong input", { expect_error(fwithin("a")) expect_error(fwithin(NA_character_)) expect_error(fwithin(mNAc)) expect_error(fwithin(mNAc, f)) expect_error(fwithin(1:2,1:3)) expect_error(fwithin(m,1:31)) expect_error(fwithin(mtcars,1:31)) expect_error(fwithin(mtcars, w = 1:31)) expect_error(fwithin("a", w = 1)) expect_error(fwithin(1:2, w = 1:3)) expect_error(fwithin(NA_character_, w = 1)) expect_error(fwithin(mNAc, w = wdat)) expect_error(fwithin(mNAc, f, wdat)) expect_error(fwithin(mNA, w = 1:33)) expect_error(fwithin(1:2,1:2, 1:3)) expect_error(fwithin(m,1:32,1:20)) expect_error(fwithin(mtcars,1:32,1:10)) expect_error(fwithin(1:2, w = c("a","b"))) expect_error(fwithin(wlddev)) expect_error(fwithin(wlddev, w = wlddev$year)) expect_error(fwithin(wlddev, wlddev$iso3c)) expect_error(fwithin(wlddev, wlddev$iso3c, wlddev$year)) }) test_that("fwithin shoots errors for wrong input to mean", { expect_error(fwithin(x, mean = FALSE)) expect_error(fwithin(m, mean = FALSE)) expect_error(fwithin(mtcars, mean = FALSE)) expect_error(fwithin(x, mean = "overall.mean")) expect_error(fwithin(m, mean = "overall.mean")) expect_error(fwithin(mtcars, mean = "overall.mean")) expect_error(fwithin(m, mean = fmean(m))) expect_error(fwithin(mtcars, mean = fmean(mtcars))) }) # W test_that("W produces errors for wrong input", { expect_error(W("a")) expect_error(W(NA_character_)) expect_error(W(mNAc)) expect_error(W(mNAc, f)) expect_error(W(1:2,1:3)) expect_error(W(m,1:31)) expect_error(W(mtcars,1:31)) expect_error(W(mtcars, w = 1:31)) expect_error(W("a", w = 1)) expect_error(W(1:2, w = c("a","b"))) expect_error(W(1:2, w = 1:3)) expect_error(W(NA_character_, w = 1)) expect_error(W(mNAc, w = wdat)) expect_error(W(mNAc, f, wdat)) expect_error(W(mNA, w = 1:33)) expect_error(W(mtcNA, w = 1:33)) expect_error(W(1:2,1:2, 1:3)) expect_error(W(m,1:32,1:20)) expect_error(W(mtcars,1:32,1:10)) expect_error(W(1:2, 1:3, 1:2)) expect_error(W(m,1:31,1:32)) expect_error(W(mtcars,1:33,1:32)) }) test_that("W.data.frame method is foolproof", { expect_visible(W(wlddev)) expect_visible(W(wlddev, w = wlddev$year)) expect_visible(W(wlddev, w = ~year)) expect_visible(W(wlddev, wlddev$iso3c)) expect_visible(W(wlddev, ~iso3c)) expect_visible(W(wlddev, ~iso3c + region)) expect_visible(W(wlddev, wlddev$iso3c, wlddev$year)) expect_visible(W(wlddev, ~iso3c, ~year)) expect_visible(W(wlddev, cols = 9:12)) expect_visible(W(wlddev, w = wlddev$year, cols = 9:12)) expect_visible(W(wlddev, w = ~year, cols = 9:12)) expect_visible(W(wlddev, wlddev$iso3c, cols = 9:12)) expect_visible(W(wlddev, ~iso3c, cols = 9:12)) expect_visible(W(wlddev, wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(W(wlddev, ~iso3c, ~year, cols = 9:12)) expect_visible(W(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, w = ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(W(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(W(wlddev, cols = NULL)) expect_error(W(wlddev, w = wlddev$year, cols = NULL)) expect_error(W(wlddev, w = ~year, cols = NULL)) expect_error(W(wlddev, wlddev$iso3c, cols = NULL)) expect_error(W(wlddev, ~iso3c, cols = NULL)) expect_error(W(wlddev, wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(W(wlddev, ~iso3c, ~year, cols = NULL)) expect_error(W(wlddev, cols = 9:14)) expect_error(W(wlddev, w = wlddev$year, cols = 9:14)) expect_error(W(wlddev, w = ~year, cols = 9:14)) expect_error(W(wlddev, wlddev$iso3c, cols = 9:14)) expect_error(W(wlddev, ~iso3c, cols = 9:14)) expect_error(W(wlddev, wlddev$iso3c, wlddev$year, cols = 9:14)) expect_error(W(wlddev, ~iso3c, ~year, cols = 9:14)) expect_error(W(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, w = ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(W(wlddev, w = mtcars)) expect_error(W(wlddev, w = 4)) expect_error(W(wlddev, w = "year")) expect_error(W(wlddev, w = ~year2)) # suppressWarnings(expect_error(W(wlddev, w = ~year + region))) expect_error(W(wlddev, mtcars)) expect_error(W(wlddev, 2)) expect_error(W(wlddev, "iso3c")) expect_error(W(wlddev, ~iso3c2)) expect_error(W(wlddev, ~iso3c + bla)) expect_error(W(wlddev, mtcars$mpg, mtcars$cyl)) expect_error(W(wlddev, 2, 4)) expect_error(W(wlddev, ~iso3c2, ~year2)) expect_error(W(wlddev, cols = ~bla)) expect_error(W(wlddev, w = ~bla, cols = 9:12)) expect_error(W(wlddev, w = 4, cols = 9:12)) expect_error(W(wlddev, w = "year", cols = 9:12)) expect_error(W(wlddev, w = ~yewar, cols = 9:12)) expect_error(W(wlddev, mtcars$mpg, cols = 9:12)) expect_error(W(wlddev, ~iso3c + ss, cols = 9:12)) expect_error(W(wlddev, 2, cols = 9:12)) expect_error(W(wlddev, "iso3c", cols = 9:12)) expect_error(W(wlddev, wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(W(wlddev, ~iso3c3, ~year, cols = 9:12)) expect_error(W(wlddev, cols = c("PC3GDP","LIFEEX"))) }) collapse/tests/testthat/test-fslice.R0000644000176200001440000000571014777170131017436 0ustar liggesuserscontext("fslice") data("iris") test_that("fslice works with integers and no grouping", { N <- c(1, 5, 17) for (n in N) { # first expect_equal( dplyr::slice_head(iris, n = n), fslice(iris, n = n) ) expect_equal( dplyr::slice_head(iris, n = n), fslice(iris, n = n, how = "first") ) # last expect_equal( setRownames(dplyr::slice_tail(iris, n = n)), fslice(iris, n = n, how = "last") ) # min expect_equal( iris |> dplyr::slice_min(Petal.Length, n = n, with_ties = FALSE), fslice(iris, n = n, how = "min", order.by = "Petal.Length") ) # max expect_equal( iris |> dplyr::slice_max(Petal.Length, n = n, with_ties = FALSE), fslice(iris, n = n, how = "max", order.by = "Petal.Length") ) } }) test_that("fslice works with proportions and no grouping", { N <- c(0.5, 0.75) for (n in N) { # first expect_equal( dplyr::slice_head(iris, prop = n), fslice(iris, n = n) ) expect_equal( dplyr::slice_head(iris, prop = n), fslice(iris, n = n, how = "first") ) # last expect_equal( setRownames(dplyr::slice_tail(iris, prop = n)), fslice(iris, n = n, how = "last") ) # min expect_equal( iris |> dplyr::slice_min(Petal.Length, prop = n, with_ties = FALSE), fslice(iris, n = n, how = "min", order.by = "Petal.Length") ) # max expect_equal( iris |> dplyr::slice_max(Petal.Length, prop = n, with_ties = FALSE), fslice(iris, n = n, how = "max", order.by = "Petal.Length") ) } }) test_that("fslice works with grouping", { N <- c(1, 5, 17) for (n in N) { # first expect_equal( iris |> dplyr::group_by(Species) |> dplyr::slice_head(n = n) |> qDF(), fslice(iris, "Species", n = n, how = "first") ) # last expect_equal( iris |> dplyr::group_by(Species) |> dplyr::slice_tail(n = n) |> qDF(), fslice(iris, "Species", n = n, how = "last") ) # min expect_equal( iris |> dplyr::group_by(Species) |> dplyr::slice_min(Petal.Length, n = n, with_ties = FALSE) |> qDF(), fslice(iris, "Species", n = n, how = "min", order.by = "Petal.Length") ) # max expect_equal( iris |> dplyr::group_by(Species) |> dplyr::slice_max(Petal.Length, n = n, with_ties = FALSE) |> qDF(), fslice(iris, "Species", n = n, how = "max", order.by = "Petal.Length") ) } }) test_that("fslice works with ties", { N <- 1 # c(1, 5, 17) for (n in N) { # min expect_equal( iris |> dplyr::group_by(Species) |> dplyr::slice_min(Petal.Length, n = n, with_ties = TRUE) |> qDF(), fslice(iris, "Species", n = n, how = "min", order.by = "Petal.Length", with.ties = TRUE) ) # max expect_equal( iris |> dplyr::group_by(Species) |> dplyr::slice_max(Petal.Length, n = n, with_ties = TRUE) |> qDF(), fslice(iris, "Species", n = n, how = "max", order.by = "Petal.Length", with.ties = TRUE) ) } }) collapse/tests/testthat/test-sf.R0000644000176200001440000000460114777170131016577 0ustar liggesuserscontext("collapse and sf") if(Sys.getenv("NMAC") == "TRUE" && requireNamespace(paste0("s", "f"), quietly = TRUE)) { eval(parse(text = paste0("libr", "ary(", "sf)"))) nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) test_that("sf methods work properly", { expect_visible(nc %>% fgroup_by(AREA)) expect_visible(nc %>% fgroup_by(AREA) %>% fgroup_vars) expect_visible(descr(nc)) expect_visible(qsu(nc)) expect_visible(varying(nc)) expect_true(any(names(num_vars(nc)) == "geometry")) expect_true(any(names(fselect(nc, AREA, NAME:FIPSNO)) == "geometry")) expect_true(any(names(gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO"))) == "geometry")) expect_true(any(names(fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO)) == "geometry")) expect_true(any(names(ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO"))) == "geometry")) expect_true(inherits(rsplit(nc, AREA ~ SID74)[[1L]], "sf")) expect_equal(names(`nv<-`(nc, NULL)), c("NAME", "FIPS", "geometry")) # nv(nc) <- NULL expect_equal(tfmv(nc, is.numeric, log), tfmv(nc, is.numeric, log, apply = FALSE)) expect_equal(length(nc %>% gby(NAME) %>% varying), length(nc) - 2L) expect_true(is.data.frame(nc %>% gby(NAME) %>% varying(any_group = FALSE))) expect_visible(funique(nc, cols = 1)) expect_true(length(fcompute(nc, log_AREA = log(AREA))) == 2L) expect_true(length(fcomputev(nc, "AREA", log)) == 2L) expect_true(length(fcomputev(nc, "AREA", log, keep = "PERIMETER")) == 3L) expect_true(length(fcomputev(nc, "AREA", fscale, apply = FALSE)) == 2L) expect_true(length(fcomputev(nc, "AREA", fscale, apply = FALSE, keep = "PERIMETER")) == 3L) expect_true(inherits(nc %>% fgroup_by(SID74) %>% fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = st_union(geometry)), "sf")) }) test_that("rbinding and mutating sf works well", { expect_identical(nc, nc %>% fgroup_by(AREA) %>% fmutate((.data)) %>% fungroup()) expect_identical(funique(nc, "AREA"), nc %>% fgroup_by(AREA, sort = FALSE) %>% ffirst(na.rm = FALSE)) expect_identical(roworder(nc, AREA), nc %>% rsplit(~ AREA, keep.by = TRUE) %>% unlist2d(FALSE) %>% copyMostAttrib(nc)) expect_identical(roworder(nc, AREA), nc %>% rsplit(~ AREA) %>% unlist2d("AREA") %>% fmutate(AREA = as.double(AREA)) %>% copyMostAttrib(nc)) }) } collapse/tests/testthat/test-fvar-fsd.R0000644000176200001440000016322114777170131017703 0ustar liggesuserscontext("fvar and fsd") bvar <- stats::var bsd <- stats::sd bsum <- base::sum # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) gf <- as_factor_GRP(g) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" na20 <- function(x) { x[is.na(x)] <- 0 x } # This is correct, including Bessels correction. wvar <- function(x, w, na.rm = FALSE) { if(na.rm) { cc <- complete.cases(x, w) x <- x[cc] # if(length(x) < 2L) return(NA_real_) w <- w[cc] } # else if(length(x) < 2L) return(if(is.na(x)) NA_real_ else 0) bsum(w*(x-weighted.mean(x,w))^2)/(bsum(w)-1) } # fvar using Welford's Algorithm (default) test_that("fvar performs like base::var", { expect_equal(fvar(NA), bvar(NA)) expect_equal(fvar(NA, na.rm = FALSE), bvar(NA)) expect_equal(fvar(1), bvar(1, na.rm = TRUE)) expect_equal(fvar(1:3), bvar(1:3, na.rm = TRUE)) expect_equal(fvar(-1:1), bvar(-1:1, na.rm = TRUE)) expect_equal(fvar(1, na.rm = FALSE), bvar(1)) expect_equal(fvar(1:3, na.rm = FALSE), bvar(1:3)) expect_equal(fvar(-1:1, na.rm = FALSE), bvar(-1:1)) expect_equal(fvar(x), bvar(x, na.rm = TRUE)) expect_equal(fvar(x, na.rm = FALSE), bvar(x)) expect_equal(fvar(xNA, na.rm = FALSE), bvar(xNA)) expect_equal(fvar(xNA), bvar(xNA, na.rm = TRUE)) expect_equal(fvar(mtcars), fvar(m)) expect_equal(fvar(m), dapply(m, bvar, na.rm = TRUE)) expect_equal(fvar(m, na.rm = FALSE), dapply(m, bvar)) expect_equal(fvar(mNA, na.rm = FALSE), dapply(mNA, bvar)) expect_equal(fvar(mNA), dapply(mNA, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars), dapply(mtcars, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, na.rm = FALSE), dapply(mtcars, bvar)) expect_equal(fvar(mtcNA, na.rm = FALSE), dapply(mtcNA, bvar)) expect_equal(fvar(mtcNA), dapply(mtcNA, bvar, na.rm = TRUE)) expect_equal(fvar(x, f), BY(x, f, bvar, na.rm = TRUE)) expect_equal(fvar(x, f, na.rm = FALSE), BY(x, f, bvar)) expect_equal(fvar(xNA, f, na.rm = FALSE), BY(xNA, f, bvar)) expect_equal(fvar(xNA, f), BY(xNA, f, bvar, na.rm = TRUE)) expect_equal(fvar(m, g), BY(m, g, bvar, na.rm = TRUE)) expect_equal(fvar(m, g, na.rm = FALSE), BY(m, g, bvar)) expect_equal(fvar(mNA, g, na.rm = FALSE), BY(mNA, g, bvar)) expect_equal(fvar(mNA, g), BY(mNA, g, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, g), BY(mtcars, g, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, g, na.rm = FALSE), BY(mtcars, g, bvar)) expect_equal(fvar(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bvar)) expect_equal(fvar(mtcNA, g), BY(mtcNA, g, bvar, na.rm = TRUE)) }) test_that("fvar with weights performs as intended (unbiased)", { expect_equal(fvar(c(2,2,4,5,5,5)), fvar(c(2,4,5), w = c(2,1,3))) expect_equal(fvar(c(2,2,4,5,5,5), na.rm = FALSE), fvar(c(2,4,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009)), fvar(c(2.456,4.123,5.009), w = c(2,1,3))) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE), fvar(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5)), fvar(c(2,NA,5), w = c(2,1,3))) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE), fvar(c(2,NA,5), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5)), fvar(c(2,4,5), w = c(2,NA,3))) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE), fvar(c(2,4,5), w = c(2,NA,3), na.rm = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009)), fvar(c(NA,4.123,5.009), w = c(2,1,3))) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fvar(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009)), fvar(c(2.456,4.123,5.009), w = c(NA,1,3))) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE), fvar(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fvar(v, f), fvar(vs, fs, w)) expect_equal(fvar(v, f, na.rm = FALSE), fvar(vs, fs, w, na.rm = FALSE)) expect_equal(fvar(v2, f), fvar(v2s, fs, w)) expect_equal(fvar(v2, f, na.rm = FALSE), fvar(v2s, fs, w, na.rm = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fvar(v, f), fvar(vs, fs, w)) expect_equal(fvar(v, f, na.rm = FALSE), fvar(vs, fs, w, na.rm = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fvar(v, f), fvar(vs, fs, w)) expect_equal(fvar(v, f, na.rm = FALSE), fvar(vs, fs, w, na.rm = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fvar(v2, f), fvar(v2s, fs, w)) expect_equal(fvar(v2, f, na.rm = FALSE), fvar(v2s, fs, w, na.rm = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fvar(v2, f), fvar(v2s, fs, w)) expect_equal(fvar(v2, f, na.rm = FALSE), fvar(v2s, fs, w, na.rm = FALSE)) }) test_that("fvar performs like fvar with unit weights", { expect_equal(fvar(NA), fvar(NA, w = 1)) expect_equal(fvar(NA, na.rm = FALSE), fvar(NA, w = 1, na.rm = FALSE)) expect_equal(fvar(1), fvar(1, w = 1)) expect_equal(fvar(1:3), fvar(1:3, w = rep(1,3))) expect_equal(fvar(-1:1), fvar(-1:1, w = rep(1,3))) expect_equal(fvar(1, na.rm = FALSE), fvar(1, w = 1, na.rm = FALSE)) expect_equal(fvar(1:3, na.rm = FALSE), fvar(1:3, w = rep(1, 3), na.rm = FALSE)) expect_equal(fvar(-1:1, na.rm = FALSE), fvar(-1:1, w = rep(1, 3), na.rm = FALSE)) expect_equal(fvar(x), fvar(x, w = rep(1,100))) expect_equal(fvar(x, na.rm = FALSE), fvar(x, w = rep(1, 100), na.rm = FALSE)) expect_equal(fvar(xNA, na.rm = FALSE), fvar(xNA, w = rep(1, 100), na.rm = FALSE)) expect_equal(fvar(xNA), fvar(xNA, w = rep(1, 100))) expect_equal(fvar(m), fvar(m, w = rep(1, 32))) expect_equal(fvar(m, na.rm = FALSE), fvar(m, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mNA, na.rm = FALSE), fvar(mNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mNA), fvar(mNA, w = rep(1, 32))) expect_equal(fvar(mtcars), fvar(mtcars, w = rep(1, 32))) expect_equal(fvar(mtcars, na.rm = FALSE), fvar(mtcars, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mtcNA, na.rm = FALSE), fvar(mtcNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fvar(mtcNA), fvar(mtcNA, w = rep(1, 32))) expect_equal(fvar(x, f), fvar(x, f, rep(1,100))) expect_equal(fvar(x, f, na.rm = FALSE), fvar(x, f, rep(1,100), na.rm = FALSE)) expect_equal(fvar(xNA, f, na.rm = FALSE), fvar(xNA, f, rep(1,100), na.rm = FALSE)) expect_equal(fvar(xNA, f), fvar(xNA, f, rep(1,100))) expect_equal(fvar(m, g), fvar(m, g, rep(1,32))) expect_equal(fvar(m, g, na.rm = FALSE), fvar(m, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mNA, g, na.rm = FALSE), fvar(mNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mNA, g), fvar(mNA, g, rep(1,32))) expect_equal(fvar(mtcars, g), fvar(mtcars, g, rep(1,32))) expect_equal(fvar(mtcars, g, na.rm = FALSE), fvar(mtcars, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mtcNA, g, na.rm = FALSE), fvar(mtcNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fvar(mtcNA, g), fvar(mtcNA, g, rep(1,32))) }) test_that("fvar with weights performs like wvar (defined above)", { # complete weights expect_equal(fvar(NA, w = 1), wvar(NA, 1)) expect_equal(fvar(NA, w = 1, na.rm = FALSE), wvar(NA, 1)) expect_equal(fvar(1, w = 1), wvar(1, w = 1)) expect_equal(fvar(1:3, w = 1:3), wvar(1:3, 1:3)) expect_equal(fvar(-1:1, w = 1:3), wvar(-1:1, 1:3)) expect_equal(fvar(1, w = 1, na.rm = FALSE), wvar(1, 1)) expect_equal(fvar(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wvar(1:3, c(0.99,3454,1.111))) expect_equal(fvar(-1:1, w = 1:3, na.rm = FALSE), wvar(-1:1, 1:3)) expect_equal(fvar(x, w = w), wvar(x, w)) expect_equal(fvar(x, w = w, na.rm = FALSE), wvar(x, w)) expect_equal(fvar(xNA, w = w, na.rm = FALSE), wvar(xNA, w)) expect_equal(fvar(xNA, w = w), wvar(xNA, w, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat), fvar(m, w = wdat)) expect_equal(fvar(m, w = wdat), dapply(m, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(m, w = wdat, na.rm = FALSE), dapply(m, wvar, wdat)) expect_equal(fvar(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wvar, wdat)) expect_equal(fvar(mNA, w = wdat), dapply(mNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat), dapply(mtcars, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat), dapply(mtcNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(x, f, w), BY(x, f, wvar, w)) expect_equal(fvar(x, f, w, na.rm = FALSE), BY(x, f, wvar, w)) expect_equal(fvar(xNA, f, w, na.rm = FALSE), BY(xNA, f, wvar, w)) expect_equal(na20(fvar(xNA, f, w)), na20(BY(xNA, f, wvar, w, na.rm = TRUE))) expect_equal(fvar(m, g, wdat), BY(m, gf, wvar, wdat)) expect_equal(fvar(m, g, wdat, na.rm = FALSE), BY(m, gf, wvar, wdat)) expect_equal(fvar(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wvar, wdat)) expect_equal(na20(fvar(mNA, g, wdat)), na20(BY(mNA, gf, wvar, wdat, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdat), BY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, gf, wvar, wdat)) expect_equal(na20(fvar(mtcNA, g, wdat)), na20(BY(mtcNA, gf, wvar, wdat, na.rm = TRUE))) # missing weights expect_equal(fvar(NA, w = NA), wvar(NA, NA)) expect_equal(fvar(NA, w = NA, na.rm = FALSE), wvar(NA, NA)) expect_equal(fvar(1, w = NA), wvar(1, w = NA)) expect_equal(fvar(1:3, w = c(NA,1:2)), wvar(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(-1:1, w = c(NA,1:2)), wvar(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(1, w = NA, na.rm = FALSE), wvar(1, NA)) expect_equal(fvar(1:3, w = c(NA,1:2), na.rm = FALSE), wvar(1:3, c(NA,1:2))) expect_equal(fvar(-1:1, w = c(NA,1:2), na.rm = FALSE), wvar(-1:1, c(NA,1:2))) expect_equal(fvar(x, w = wNA), wvar(x, wNA, na.rm = TRUE)) expect_equal(fvar(x, w = wNA, na.rm = FALSE), wvar(x, wNA)) expect_equal(fvar(xNA, w = wNA, na.rm = FALSE), wvar(xNA, wNA)) expect_equal(fvar(xNA, w = wNA), wvar(xNA, wNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA), fvar(m, w = wdatNA)) expect_equal(fvar(m, w = wdatNA), dapply(m, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(m, w = wdatNA, na.rm = FALSE), dapply(m, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA), dapply(mNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA), dapply(mtcars, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA), dapply(mtcNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(na20(fvar(x, f, wNA)), na20(BY(x, f, wvar, wNA, na.rm = TRUE))) expect_equal(fvar(x, f, wNA, na.rm = FALSE), BY(x, f, wvar, wNA)) expect_equal(fvar(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wvar, wNA)) expect_equal(na20(fvar(xNA, f, wNA)), na20(BY(xNA, f, wvar, wNA, na.rm = TRUE))) expect_equal(na20(fvar(m, g, wdatNA)), na20(BY(m, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(m, g, wdatNA, na.rm = FALSE), BY(m, gf, wvar, wdatNA)) expect_equal(fvar(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mNA, g, wdatNA)), na20(BY(mNA, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(na20(fvar(mtcars, g, wdatNA)), na20(BY(mtcars, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, gf, wvar, wdatNA)) expect_equal(fvar(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mtcNA, g, wdatNA)), na20(BY(mtcNA, gf, wvar, wdatNA, na.rm = TRUE))) }) test_that("fvar performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g), simplify = FALSE))) }) test_that("fvar with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fvar with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fvar handles special values in the right way", { expect_equal(fvar(NA), NA_real_) expect_equal(fvar(NaN), NA_real_) expect_equal(fvar(Inf), NA_real_) expect_equal(fvar(-Inf), NA_real_) expect_equal(fvar(TRUE), NA_real_) expect_equal(fvar(FALSE), NA_real_) expect_equal(fvar(NA, na.rm = FALSE), NA_real_) expect_equal(fvar(NaN, na.rm = FALSE), NA_real_) expect_equal(fvar(Inf, na.rm = FALSE), NA_real_) expect_equal(fvar(-Inf, na.rm = FALSE), NA_real_) expect_equal(fvar(TRUE, na.rm = FALSE), NA_real_) expect_equal(fvar(FALSE, na.rm = FALSE), NA_real_) expect_equal(fvar(c(1,NA)), NA_real_) expect_equal(fvar(c(1,NaN)), NA_real_) expect_equal(fvar(c(1,Inf)), NA_real_) expect_equal(fvar(c(1,-Inf)), NA_real_) expect_equal(fvar(c(FALSE,TRUE)), 0.5) expect_equal(fvar(c(FALSE,FALSE)), 0) expect_equal(fvar(c(1,Inf), na.rm = FALSE), NA_real_) expect_equal(fvar(c(1,-Inf), na.rm = FALSE), NA_real_) expect_equal(fvar(c(FALSE,TRUE), na.rm = FALSE), 0.5) expect_equal(fvar(c(FALSE,FALSE), na.rm = FALSE), 0) }) test_that("fvar with weights handles special values in the right way", { expect_equal(fvar(NA, w = 1), NA_real_) expect_equal(fvar(NaN, w = 1), NA_real_) expect_equal(fvar(Inf, w = 1), NA_real_) expect_equal(fvar(-Inf, w = 1), NA_real_) expect_equal(fvar(TRUE, w = 1), NA_real_) expect_equal(fvar(FALSE, w = 1), NA_real_) expect_equal(fvar(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(NaN, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(-Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(TRUE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(FALSE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fvar(NA, w = NA), NA_real_) expect_equal(fvar(NaN, w = NA), NA_real_) expect_equal(fvar(Inf, w = NA), NA_real_) expect_equal(fvar(-Inf, w = NA), NA_real_) expect_equal(fvar(TRUE, w = NA), NA_real_) expect_equal(fvar(FALSE, w = NA), NA_real_) expect_equal(fvar(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3)), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3)), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3), na.rm = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3), na.rm = FALSE), NA_real_) }) test_that("fvar produces errors for wrong input", { expect_error(fvar("a")) expect_error(fvar(NA_character_)) expect_error(fvar(mNAc)) expect_error(fvar(mNAc, f)) expect_error(fvar(1:2,1:3)) expect_error(fvar(m,1:31)) expect_error(fvar(mtcars,1:31)) expect_error(fvar(mtcars, w = 1:31)) expect_error(fvar("a", w = 1)) expect_error(fvar(1:2, w = 1:3)) expect_error(fvar(NA_character_, w = 1)) expect_error(fvar(mNAc, w = wdat)) expect_error(fvar(mNAc, f, wdat)) expect_error(fvar(mNA, w = 1:33)) expect_error(fvar(1:2,1:2, 1:3)) expect_error(fvar(m,1:32,1:20)) expect_error(fvar(mtcars,1:32,1:10)) expect_error(fvar(1:2, w = c("a","b"))) expect_error(fvar(wlddev)) expect_error(fvar(wlddev, w = wlddev$year)) expect_error(fvar(wlddev, wlddev$iso3c)) expect_error(fvar(wlddev, wlddev$iso3c, wlddev$year)) }) # Repeating all tests for the other algorithm test_that("fvar with direct algorithm performs like base::var", { expect_equal(fvar(NA, stable.algo = FALSE), bvar(NA)) expect_equal(fvar(NA, na.rm = FALSE, stable.algo = FALSE), bvar(NA)) expect_equal(fvar(1, stable.algo = FALSE), bvar(1, na.rm = TRUE)) expect_equal(fvar(1:3, stable.algo = FALSE), bvar(1:3, na.rm = TRUE)) expect_equal(fvar(-1:1, stable.algo = FALSE), bvar(-1:1, na.rm = TRUE)) expect_equal(fvar(1, na.rm = FALSE, stable.algo = FALSE), bvar(1)) expect_equal(fvar(1:3, na.rm = FALSE, stable.algo = FALSE), bvar(1:3)) expect_equal(fvar(-1:1, na.rm = FALSE, stable.algo = FALSE), bvar(-1:1)) expect_equal(fvar(x, stable.algo = FALSE), bvar(x, na.rm = TRUE)) expect_equal(fvar(x, na.rm = FALSE, stable.algo = FALSE), bvar(x)) expect_equal(fvar(xNA, na.rm = FALSE, stable.algo = FALSE), bvar(xNA)) expect_equal(fvar(xNA, stable.algo = FALSE), bvar(xNA, na.rm = TRUE)) expect_equal(fvar(mtcars, stable.algo = FALSE), fvar(m)) expect_equal(fvar(m, stable.algo = FALSE), dapply(m, bvar, na.rm = TRUE)) expect_equal(fvar(m, na.rm = FALSE, stable.algo = FALSE), dapply(m, bvar)) expect_equal(fvar(mNA, na.rm = FALSE, stable.algo = FALSE), dapply(mNA, bvar)) expect_equal(fvar(mNA, stable.algo = FALSE), dapply(mNA, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, stable.algo = FALSE), dapply(mtcars, bvar, na.rm = TRUE)) expect_equal(fvar(mtcars, na.rm = FALSE, stable.algo = FALSE), dapply(mtcars, bvar)) expect_equal(fvar(mtcNA, na.rm = FALSE, stable.algo = FALSE), dapply(mtcNA, bvar)) expect_equal(fvar(mtcNA, stable.algo = FALSE), dapply(mtcNA, bvar, na.rm = TRUE)) expect_equal(fvar(x, f, stable.algo = FALSE), BY(x, f, bvar, na.rm = TRUE)) expect_equal(fvar(x, f, na.rm = FALSE, stable.algo = FALSE), BY(x, f, bvar)) expect_equal(fvar(xNA, f, na.rm = FALSE, stable.algo = FALSE), BY(xNA, f, bvar)) expect_equal(fvar(xNA, f, stable.algo = FALSE), BY(xNA, f, bvar, na.rm = TRUE)) # failed? # expect_equal(fvar(m, g, stable.algo = FALSE), BY(m, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(m, g, na.rm = FALSE, stable.algo = FALSE), BY(m, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mNA, g, na.rm = FALSE, stable.algo = FALSE), BY(mNA, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mNA, g, stable.algo = FALSE), BY(mNA, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcars, g, stable.algo = FALSE), BY(mtcars, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcars, g, na.rm = FALSE, stable.algo = FALSE), BY(mtcars, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcNA, g, na.rm = FALSE, stable.algo = FALSE), BY(mtcNA, g, bvar)) # failed on arch i386 CMD check and patched-solaris-x86 # expect_equal(fvar(mtcNA, g, stable.algo = FALSE), BY(mtcNA, g, bvar, na.rm = TRUE)) # failed on arch i386 CMD check and patched-solaris-x86 }) test_that("fvar with direct algorithm and weights performs as intended (unbiased)", { expect_equal(fvar(c(2,2,4,5,5,5), stable.algo = FALSE), fvar(c(2,4,5), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(2,2,4,5,5,5), na.rm = FALSE, stable.algo = FALSE), fvar(c(2,4,5), w = c(2,1,3), na.rm = FALSE), stable.algo = FALSE) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009), stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(2.456,2.456,4.123,5.009,5.009,5.009), na.rm = FALSE, stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(2,1,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), stable.algo = FALSE), fvar(c(2,NA,5), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE, stable.algo = FALSE), fvar(c(2,NA,5), w = c(2,1,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), stable.algo = FALSE), fvar(c(2,4,5), w = c(2,NA,3), stable.algo = FALSE)) expect_equal(fvar(c(2,2,NA,5,5,5), na.rm = FALSE, stable.algo = FALSE), fvar(c(2,4,5), w = c(2,NA,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), stable.algo = FALSE), fvar(c(NA,4.123,5.009), w = c(2,1,3), stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE, stable.algo = FALSE), fvar(c(NA,4.123,5.009), w = c(2,1,3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(NA,1,3), stable.algo = FALSE)) expect_equal(fvar(c(NA,NA,4.123,5.009,5.009,5.009), na.rm = FALSE, stable.algo = FALSE), fvar(c(2.456,4.123,5.009), w = c(NA,1,3), na.rm = FALSE, stable.algo = FALSE)) f <- as.factor(rep(1:2, each = 6)); fs <- as.factor(rep(1:2, each = 3)) v <- c(2,2,4,5,5,5,2,2,4,5,5,5); vs <- c(2,4,5,2,4,5); w <- c(2,1,3,2,1,3) v2 <- c(2.456,2.456,4.123,5.009,5.009,5.009,2.456,2.456,4.123,5.009,5.009,5.009); v2s <- c(2.456,4.123,5.009,2.456,4.123,5.009) expect_equal(fvar(v, f, stable.algo = FALSE), fvar(vs, fs, w, stable.algo = FALSE)) expect_equal(fvar(v, f, na.rm = FALSE, stable.algo = FALSE), fvar(vs, fs, w, na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(v2, f, stable.algo = FALSE), fvar(v2s, fs, w, stable.algo = FALSE)) expect_equal(fvar(v2, f, na.rm = FALSE, stable.algo = FALSE), fvar(v2s, fs, w, na.rm = FALSE, stable.algo = FALSE)) v[c(3,9)] <- NA; vs[c(2,5)] <- NA expect_equal(fvar(v, f, stable.algo = FALSE), fvar(vs, fs, w, stable.algo = FALSE)) expect_equal(fvar(v, f, na.rm = FALSE, stable.algo = FALSE), fvar(vs, fs, w, na.rm = FALSE, stable.algo = FALSE)) vs[c(2,5)] <- 4; w[c(2,5)] <- NA expect_equal(fvar(v, f, stable.algo = FALSE), fvar(vs, fs, w, stable.algo = FALSE)) expect_equal(fvar(v, f, na.rm = FALSE, stable.algo = FALSE), fvar(vs, fs, w, na.rm = FALSE, stable.algo = FALSE)) w[c(2,5)] <- 1; v2[c(1:2,7:8)] <- NA; v2s[c(1,4)] <- NA expect_equal(fvar(v2, f, stable.algo = FALSE), fvar(v2s, fs, w, stable.algo = FALSE)) expect_equal(fvar(v2, f, na.rm = FALSE, stable.algo = FALSE), fvar(v2s, fs, w, na.rm = FALSE, stable.algo = FALSE)) v2s[c(1,4)] <- 2.456; w[c(1,4)] <- NA expect_equal(fvar(v2, f, stable.algo = FALSE), fvar(v2s, fs, w, stable.algo = FALSE)) expect_equal(fvar(v2, f, na.rm = FALSE, stable.algo = FALSE), fvar(v2s, fs, w, na.rm = FALSE, stable.algo = FALSE)) }) test_that("fvar with direct algorithm performs like fvar with unit weights", { expect_equal(fvar(NA, stable.algo = FALSE), fvar(NA, w = 1, stable.algo = FALSE)) expect_equal(fvar(NA, na.rm = FALSE, stable.algo = FALSE), fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(1, stable.algo = FALSE), fvar(1, w = 1, stable.algo = FALSE)) expect_equal(fvar(1:3, stable.algo = FALSE), fvar(1:3, w = rep(1,3), stable.algo = FALSE)) expect_equal(fvar(-1:1, stable.algo = FALSE), fvar(-1:1, w = rep(1,3), stable.algo = FALSE)) expect_equal(fvar(1, na.rm = FALSE, stable.algo = FALSE), fvar(1, w = 1, na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(1:3, na.rm = FALSE, stable.algo = FALSE), fvar(1:3, w = rep(1, 3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(-1:1, na.rm = FALSE, stable.algo = FALSE), fvar(-1:1, w = rep(1, 3), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(x, stable.algo = FALSE), fvar(x, w = rep(1,100), stable.algo = FALSE)) expect_equal(fvar(x, na.rm = FALSE, stable.algo = FALSE), fvar(x, w = rep(1, 100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, na.rm = FALSE, stable.algo = FALSE), fvar(xNA, w = rep(1, 100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, stable.algo = FALSE), fvar(xNA, w = rep(1, 100), stable.algo = FALSE)) expect_equal(fvar(m, stable.algo = FALSE), fvar(m, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(m, na.rm = FALSE, stable.algo = FALSE), fvar(m, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, na.rm = FALSE, stable.algo = FALSE), fvar(mNA, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, stable.algo = FALSE), fvar(mNA, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(mtcars, stable.algo = FALSE), fvar(mtcars, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(mtcars, na.rm = FALSE, stable.algo = FALSE), fvar(mtcars, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, na.rm = FALSE, stable.algo = FALSE), fvar(mtcNA, w = rep(1, 32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, stable.algo = FALSE), fvar(mtcNA, w = rep(1, 32), stable.algo = FALSE)) expect_equal(fvar(x, f, stable.algo = FALSE), fvar(x, f, rep(1,100), stable.algo = FALSE)) expect_equal(fvar(x, f, na.rm = FALSE, stable.algo = FALSE), fvar(x, f, rep(1,100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, f, na.rm = FALSE, stable.algo = FALSE), fvar(xNA, f, rep(1,100), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(xNA, f, stable.algo = FALSE), fvar(xNA, f, rep(1,100), stable.algo = FALSE)) expect_equal(fvar(m, g, stable.algo = FALSE), fvar(m, g, rep(1,32), stable.algo = FALSE)) expect_equal(fvar(m, g, na.rm = FALSE, stable.algo = FALSE), fvar(m, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, g, na.rm = FALSE, stable.algo = FALSE), fvar(mNA, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mNA, g, stable.algo = FALSE), fvar(mNA, g, rep(1,32), stable.algo = FALSE)) expect_equal(fvar(mtcars, g, stable.algo = FALSE), fvar(mtcars, g, rep(1,32), stable.algo = FALSE)) expect_equal(fvar(mtcars, g, na.rm = FALSE, stable.algo = FALSE), fvar(mtcars, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, g, na.rm = FALSE, stable.algo = FALSE), fvar(mtcNA, g, rep(1,32), na.rm = FALSE, stable.algo = FALSE)) expect_equal(fvar(mtcNA, g, stable.algo = FALSE), fvar(mtcNA, g, rep(1,32), stable.algo = FALSE)) }) test_that("fvar with weights performs like wvar (defined above)", { # complete weights expect_equal(fvar(NA, w = 1, stable.algo = FALSE), wvar(NA, 1)) expect_equal(fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE), wvar(NA, 1)) expect_equal(fvar(1, w = 1, stable.algo = FALSE), wvar(1, w = 1)) expect_equal(fvar(1:3, w = 1:3, stable.algo = FALSE), wvar(1:3, 1:3)) expect_equal(fvar(-1:1, w = 1:3, stable.algo = FALSE), wvar(-1:1, 1:3)) expect_equal(fvar(1, w = 1, na.rm = FALSE, stable.algo = FALSE), wvar(1, 1)) expect_equal(fvar(1:3, w = c(0.99,3454,1.111), na.rm = FALSE, stable.algo = FALSE), wvar(1:3, c(0.99,3454,1.111))) expect_equal(fvar(-1:1, w = 1:3, na.rm = FALSE, stable.algo = FALSE), wvar(-1:1, 1:3)) expect_equal(fvar(x, w = w, stable.algo = FALSE), wvar(x, w)) expect_equal(fvar(x, w = w, na.rm = FALSE, stable.algo = FALSE), wvar(x, w)) expect_equal(fvar(xNA, w = w, na.rm = FALSE, stable.algo = FALSE), wvar(xNA, w)) expect_equal(fvar(xNA, w = w, stable.algo = FALSE), wvar(xNA, w, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, stable.algo = FALSE), fvar(m, w = wdat)) expect_equal(fvar(m, w = wdat, stable.algo = FALSE), dapply(m, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(m, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(m, wvar, wdat)) expect_equal(fvar(mNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(mNA, wvar, wdat)) expect_equal(fvar(mNA, w = wdat, stable.algo = FALSE), dapply(mNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, stable.algo = FALSE), dapply(mtcars, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(mtcars, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), dapply(mtcNA, wvar, wdat)) expect_equal(fvar(mtcNA, w = wdat, stable.algo = FALSE), dapply(mtcNA, wvar, wdat, na.rm = TRUE)) expect_equal(fvar(x, f, w, stable.algo = FALSE), BY(x, f, wvar, w)) expect_equal(fvar(x, f, w, na.rm = FALSE, stable.algo = FALSE), BY(x, f, wvar, w)) expect_equal(fvar(xNA, f, w, na.rm = FALSE, stable.algo = FALSE), BY(xNA, f, wvar, w)) expect_equal(na20(fvar(xNA, f, w, stable.algo = FALSE)), na20(BY(xNA, f, wvar, w, na.rm = TRUE))) expect_equal(fvar(m, g, wdat, stable.algo = FALSE), BY(m, gf, wvar, wdat)) expect_equal(fvar(m, g, wdat, na.rm = FALSE, stable.algo = FALSE), BY(m, gf, wvar, wdat)) expect_equal(fvar(mNA, g, wdat, na.rm = FALSE), BY(mNA, gf, wvar, wdat)) expect_equal(na20(fvar(mNA, g, wdat, stable.algo = FALSE)), na20(BY(mNA, gf, wvar, wdat, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdat, stable.algo = FALSE), BY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcars, g, wdat, na.rm = FALSE, stable.algo = FALSE), BY(mtcars, gf, wvar, wdat)) expect_equal(fvar(mtcNA, g, wdat, na.rm = FALSE, stable.algo = FALSE), BY(mtcNA, gf, wvar, wdat)) expect_equal(na20(fvar(mtcNA, g, wdat, stable.algo = FALSE)), na20(BY(mtcNA, gf, wvar, wdat, na.rm = TRUE))) # missing weights expect_equal(fvar(NA, w = NA, stable.algo = FALSE), wvar(NA, NA)) expect_equal(fvar(NA, w = NA, na.rm = FALSE, stable.algo = FALSE), wvar(NA, NA)) expect_equal(fvar(1, w = NA, stable.algo = FALSE), wvar(1, w = NA)) expect_equal(fvar(1:3, w = c(NA,1:2), stable.algo = FALSE), wvar(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(-1:1, w = c(NA,1:2), stable.algo = FALSE), wvar(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fvar(1, w = NA, na.rm = FALSE, stable.algo = FALSE), wvar(1, NA)) expect_equal(fvar(1:3, w = c(NA,1:2), na.rm = FALSE, stable.algo = FALSE), wvar(1:3, c(NA,1:2))) expect_equal(fvar(-1:1, w = c(NA,1:2), na.rm = FALSE, stable.algo = FALSE), wvar(-1:1, c(NA,1:2))) expect_equal(fvar(x, w = wNA, stable.algo = FALSE), wvar(x, wNA, na.rm = TRUE)) expect_equal(fvar(x, w = wNA, na.rm = FALSE, stable.algo = FALSE), wvar(x, wNA)) expect_equal(fvar(xNA, w = wNA, na.rm = FALSE, stable.algo = FALSE), wvar(xNA, wNA)) expect_equal(fvar(xNA, w = wNA, stable.algo = FALSE), wvar(xNA, wNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, stable.algo = FALSE), fvar(m, w = wdatNA)) expect_equal(fvar(m, w = wdatNA, stable.algo = FALSE), dapply(m, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(m, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(m, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(mNA, wvar, wdatNA)) expect_equal(fvar(mNA, w = wdatNA, stable.algo = FALSE), dapply(mNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, stable.algo = FALSE), dapply(mtcars, wvar, wdatNA, na.rm = TRUE)) expect_equal(fvar(mtcars, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(mtcars, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), dapply(mtcNA, wvar, wdatNA)) expect_equal(fvar(mtcNA, w = wdatNA, stable.algo = FALSE), dapply(mtcNA, wvar, wdatNA, na.rm = TRUE)) expect_equal(na20(fvar(x, f, wNA, stable.algo = FALSE)), na20(BY(x, f, wvar, wNA, na.rm = TRUE))) expect_equal(fvar(x, f, wNA, na.rm = FALSE, stable.algo = FALSE), BY(x, f, wvar, wNA)) expect_equal(fvar(xNA, f, wNA, na.rm = FALSE, stable.algo = FALSE), BY(xNA, f, wvar, wNA)) expect_equal(na20(fvar(xNA, f, wNA, stable.algo = FALSE)), na20(BY(xNA, f, wvar, wNA, na.rm = TRUE))) expect_equal(na20(fvar(m, g, wdatNA, stable.algo = FALSE)), na20(BY(m, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(m, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), BY(m, gf, wvar, wdatNA)) expect_equal(fvar(mNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), BY(mNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mNA, g, wdatNA, stable.algo = FALSE)), na20(BY(mNA, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(na20(fvar(mtcars, g, wdatNA, stable.algo = FALSE)), na20(BY(mtcars, gf, wvar, wdatNA, na.rm = TRUE))) expect_equal(fvar(mtcars, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), BY(mtcars, gf, wvar, wdatNA)) expect_equal(fvar(mtcNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), BY(mtcNA, gf, wvar, wdatNA)) expect_equal(na20(fvar(mtcNA, g, wdatNA, stable.algo = FALSE)), na20(BY(mtcNA, gf, wvar, wdatNA, na.rm = TRUE))) }) test_that("fvar with direct algorithm performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, stable.algo = FALSE), simplify = FALSE))) }) test_that("fvar with direct algorithm and complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = 1, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, w, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdat, stable.algo = FALSE), simplify = FALSE))) }) test_that("fvar with direct algorithm and missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fvar(1, w = NA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(NA, w = NA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, w = wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, w = wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, w = wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(x, f, wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(xNA, f, wNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(m, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mNA, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcars, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA, na.rm = FALSE, stable.algo = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fvar(mtcNA, g, wdatNA, stable.algo = FALSE), simplify = FALSE))) }) test_that("fvar with direct algorithm handles special values in the right way", { expect_equal(fvar(NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,NA), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,NaN), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,Inf), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,-Inf), stable.algo = FALSE), NA_real_) expect_equal(fvar(c(FALSE,TRUE), stable.algo = FALSE), 0.5) expect_equal(fvar(c(FALSE,FALSE), stable.algo = FALSE), 0) expect_equal(fvar(c(1,Inf), na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(c(1,-Inf), na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(c(FALSE,TRUE), na.rm = FALSE, stable.algo = FALSE), 0.5) expect_equal(fvar(c(FALSE,FALSE), na.rm = FALSE, stable.algo = FALSE), 0) }) test_that("fvar with direct algorithm and weights handles special values in the right way", { expect_equal(fvar(NA, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = 1, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = 1, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = NA, stable.algo = FALSE), NA_real_) expect_equal(fvar(NA, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(NaN, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(Inf, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(-Inf, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(TRUE, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(FALSE, w = NA, na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3), stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3), stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,Inf,3), na.rm = FALSE, stable.algo = FALSE), NA_real_) expect_equal(fvar(1:3, w = c(1,-Inf,3), na.rm = FALSE, stable.algo = FALSE), NA_real_) }) test_that("fvar with direct algorithm produces errors for wrong input", { expect_error(fvar("a", stable.algo = FALSE)) expect_error(fvar(NA_character_, stable.algo = FALSE)) expect_error(fvar(mNAc, stable.algo = FALSE)) expect_error(fvar(mNAc, f, stable.algo = FALSE)) expect_error(fvar(1:2,1:3, stable.algo = FALSE)) expect_error(fvar(m,1:31, stable.algo = FALSE)) expect_error(fvar(mtcars,1:31, stable.algo = FALSE)) expect_error(fvar(mtcars, w = 1:31, stable.algo = FALSE)) expect_error(fvar("a", w = 1, stable.algo = FALSE)) expect_error(fvar(1:2, w = 1:3, stable.algo = FALSE)) expect_error(fvar(NA_character_, w = 1, stable.algo = FALSE)) expect_error(fvar(mNAc, w = wdat, stable.algo = FALSE)) expect_error(fvar(mNAc, f, wdat, stable.algo = FALSE)) expect_error(fvar(mNA, w = 1:33, stable.algo = FALSE)) expect_error(fvar(1:2,1:2, 1:3, stable.algo = FALSE)) expect_error(fvar(m,1:32,1:20, stable.algo = FALSE)) expect_error(fvar(mtcars,1:32,1:10, stable.algo = FALSE)) expect_error(fvar(1:2, w = c("a","b"), stable.algo = FALSE)) expect_error(fvar(wlddev, stable.algo = FALSE)) expect_error(fvar(wlddev, w = wlddev$year, stable.algo = FALSE)) expect_error(fvar(wlddev, wlddev$iso3c, stable.algo = FALSE)) expect_error(fvar(wlddev, wlddev$iso3c, wlddev$year, stable.algo = FALSE)) }) # fsd (not necessary to test in the same way because it's just sqrt(fvar())) test_that("fsd performs like base::sd", { expect_equal(fsd(NA), bsd(NA)) expect_equal(fsd(NA, na.rm = FALSE), bsd(NA)) expect_equal(fsd(1), bsd(1, na.rm = TRUE)) expect_equal(fsd(1:3), bsd(1:3, na.rm = TRUE)) expect_equal(fsd(-1:1), bsd(-1:1, na.rm = TRUE)) expect_equal(fsd(1, na.rm = FALSE), bsd(1)) expect_equal(fsd(1:3, na.rm = FALSE), bsd(1:3)) expect_equal(fsd(-1:1, na.rm = FALSE), bsd(-1:1)) expect_equal(fsd(x), bsd(x, na.rm = TRUE)) expect_equal(fsd(x, na.rm = FALSE), bsd(x)) expect_equal(fsd(xNA, na.rm = FALSE), bsd(xNA)) expect_equal(fsd(xNA), bsd(xNA, na.rm = TRUE)) expect_equal(fsd(mtcars), fsd(m)) expect_equal(fsd(m), dapply(m, bsd, na.rm = TRUE)) expect_equal(fsd(m, na.rm = FALSE), dapply(m, bsd)) expect_equal(fsd(mNA, na.rm = FALSE), dapply(mNA, bsd)) expect_equal(fsd(mNA), dapply(mNA, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars), dapply(mtcars, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars, na.rm = FALSE), dapply(mtcars, bsd)) expect_equal(fsd(mtcNA, na.rm = FALSE), dapply(mtcNA, bsd)) expect_equal(fsd(mtcNA), dapply(mtcNA, bsd, na.rm = TRUE)) expect_equal(fsd(x, f), BY(x, f, bsd, na.rm = TRUE)) expect_equal(fsd(x, f, na.rm = FALSE), BY(x, f, bsd)) expect_equal(fsd(xNA, f, na.rm = FALSE), BY(xNA, f, bsd)) expect_equal(fsd(xNA, f), BY(xNA, f, bsd, na.rm = TRUE)) expect_equal(fsd(m, g), BY(m, g, bsd, na.rm = TRUE)) expect_equal(fsd(m, g, na.rm = FALSE), BY(m, g, bsd)) expect_equal(fsd(mNA, g, na.rm = FALSE), BY(mNA, g, bsd)) expect_equal(fsd(mNA, g), BY(mNA, g, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars, g), BY(mtcars, g, bsd, na.rm = TRUE)) expect_equal(fsd(mtcars, g, na.rm = FALSE), BY(mtcars, g, bsd)) expect_equal(fsd(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bsd)) expect_equal(fsd(mtcNA, g), BY(mtcNA, g, bsd, na.rm = TRUE)) }) test_that("fsd performs numerically stable", { expect_true(all_obj_equal(replicate(50, fsd(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fsd(mtcNA, g), simplify = FALSE))) }) test_that("fsd handles special values in the right way", { expect_equal(fsd(NA), NA_real_) expect_equal(fsd(NaN), NA_real_) expect_equal(fsd(Inf), NA_real_) expect_equal(fsd(-Inf), NA_real_) expect_equal(fsd(TRUE), NA_real_) expect_equal(fsd(FALSE), NA_real_) expect_equal(fsd(NA, na.rm = FALSE), NA_real_) expect_equal(fsd(NaN, na.rm = FALSE), NA_real_) expect_equal(fsd(Inf, na.rm = FALSE), NA_real_) expect_equal(fsd(-Inf, na.rm = FALSE), NA_real_) expect_equal(fsd(TRUE, na.rm = FALSE), NA_real_) expect_equal(fsd(FALSE, na.rm = FALSE), NA_real_) }) test_that("fsd produces errors for wrong input", { expect_error(fsd("a")) expect_error(fsd(NA_character_)) expect_error(fsd(mNAc)) expect_error(fsd(mNAc, f)) expect_error(fsd(1:2,1:3)) expect_error(fsd(m,1:31)) expect_error(fsd(mtcars,1:31)) expect_error(fsd(mtcars, w = 1:31)) expect_error(fsd("a", w = 1)) expect_error(fsd(1:2, w = 1:3)) expect_error(fsd(NA_character_, w = 1)) expect_error(fsd(mNAc, w = wdat)) expect_error(fsd(mNAc, f, wdat)) expect_error(fsd(mNA, w = 1:33)) expect_error(fsd(1:2,1:2, 1:3)) expect_error(fsd(m,1:32,1:20)) expect_error(fsd(mtcars,1:32,1:10)) expect_error(fsd(1:2, w = c("a","b"))) expect_error(fsd(wlddev)) expect_error(fsd(wlddev, w = wlddev$year)) expect_error(fsd(wlddev, wlddev$iso3c)) expect_error(fsd(wlddev, wlddev$iso3c, wlddev$year)) }) collapse/tests/testthat/test-splitting.R0000644000176200001440000000553514777170131020213 0ustar liggesuserscontext("gsplit and rsplit") wld2 <- wlddev oldClass(wld2) <- NULL vlabels(wld2) <- NULL f <- wld2$iso3c ind <- 1:1000 fss <- f[ind] fl <- wld2[c("region", "income")] flss <- ss(fl, ind) test_that("gsplit / rsplit work like split", { for(i in seq_col(wld2)) { expect_equal(gsplit(wld2[[i]], f, TRUE), split(wld2[[i]], f)) expect_equal(gsplit(wld2[[i]], f, FALSE), `names<-`(split(wld2[[i]], f), NULL)) expect_equal(gsplit(wld2[[i]][ind], fss, TRUE), split(wld2[[i]][ind], fss)) expect_equal(rsplit(wld2[[i]][ind], fss), split(wld2[[i]][ind], fss, drop = TRUE)) # factor list expect_true(all_obj_equal(gsplit(wld2[[i]], fl, TRUE), rsplit(wld2[[i]], fl, flatten = TRUE), unlist(rsplit(wld2[[i]], fl), recursive = FALSE), split(wld2[[i]], fl, drop = TRUE, lex.order = TRUE))) expect_true(all_obj_equal(gsplit(wld2[[i]][ind], flss, TRUE), rsplit(wld2[[i]][ind], flss, flatten = TRUE), unlist(rsplit(wld2[[i]][ind], flss), recursive = FALSE), split(wld2[[i]][ind], flss, drop = TRUE, lex.order = TRUE))) } }) test_that("rsplit matrix method works as intended", { m = qM(nv(GGDC10S)) fl = lapply(GGDC10S[c("Country", "Variable")], qF, sort = FALSE) expect_equal(lapply(rsplit(m, GGDC10S$Country), unattrib), split(m, GGDC10S$Country)) expect_equal(lapply(rsplit(m, itn(fl), flatten = TRUE), unattrib), split(m, itn(fl))) expect_equal(rsplit(m, fl, flatten = TRUE), unlist(rsplit(m, fl), FALSE)) expect_true(all(vapply(rsplit(m, c(fl, GGDC10S["Year"]), flatten = TRUE), is.matrix, TRUE))) expect_true(!any(vapply(rsplit(m, c(fl, GGDC10S["Year"]), flatten = TRUE, drop.dim = TRUE), is.matrix, TRUE))) }) test_that("rsplit data frame method works as intended", { expect_equal(rsplit(mtcars, mtcars$cyl), split(mtcars, mtcars$cyl)) expect_equal(rsplit(mtcars, mpg ~ cyl), split(mtcars$mpg, mtcars$cyl)) expect_equal(rsplit(mtcars, mpg ~ cyl, simplify = FALSE), split(mtcars["mpg"], mtcars$cyl)) expect_true(all_obj_equal(rsplit(mtcars, mtcars[.c(cyl, vs, am)], flatten = TRUE), rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE, keep.by = TRUE), unlist(unlist(rsplit(mtcars, mtcars[.c(cyl, vs, am)]), FALSE), FALSE), unlist(unlist(rsplit(mtcars, ~ cyl + vs + am, keep.by = TRUE), FALSE), FALSE), split(mtcars, mtcars[.c(cyl, vs, am)], drop = TRUE, lex.order = TRUE))) expect_true(all_obj_equal(rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE), unlist(unlist(rsplit(mtcars, ~ cyl + vs + am), FALSE), FALSE), split(mtcars[names(mtcars) %!in% .c(cyl, vs, am)], mtcars[.c(cyl, vs, am)], drop = TRUE, lex.order = TRUE))) }) collapse/tests/testthat/test-flag-L-F.R0000644000176200001440000005304614777170131017463 0ustar liggesuserscontext("flag / L / F") # rm(list = ls()) # TODO: test computations on irregular time series and panels set.seed(101) x <- abs(10*rnorm(100)) xNA <- x xNA[sample.int(100, 20)] <- NA f <- as.factor(rep(1:10, each = 10)) t <- as.factor(rep(1:10, 10)) data <- setRownames(wlddev[wlddev$iso3c %in% c("BLZ","IND","USA","SRB","GRL"), ]) g <- GRP(droplevels(data$iso3c)) td <- as.factor(data$year) dataNA <- na_insert(data) m <- qM(data) suppressWarnings(storage.mode(m) <- "numeric") mNAc <- qM(dataNA) mNA <- mNAc suppressWarnings(storage.mode(mNA) <- "numeric") # Creatung unordered data: o = order(rnorm(100)) xuo = x[o] xNAuo = xNA[o] fuo = f[o] tuo = t[o] t2uo = seq_len(100)[o] o = order(o) od = order(rnorm(length(td))) muo = m[od, ] datauo = data[od, ] guo = as_factor_GRP(g)[od] tduo = td[od] t2duo = seq_along(od)[od] od = order(od) baselag <- function(x, n = 1) c(rep(NA_real_, n), x[1:(length(x)-n)]) baselead <- function(x, n = 1) c(rep(NA_real_, n), x[1:(length(x)-n)]) # flag test_that("flag performs like baselag", { expect_equal(flag(1:10), baselag(1:10)) expect_equal(flag(1:10, 2), baselag(1:10, 2)) expect_equal(flag(-1:1), baselag(-1:1)) expect_equal(flag(x), baselag(x)) expect_equal(flag(x, 2), baselag(x, 2)) expect_equal(flag(xNA), baselag(xNA)) expect_equal(flag(xNA, 2), baselag(xNA, 2)) expect_equal(flag(m, stubs = FALSE), dapply(m, baselag)) expect_equal(flag(m, 2, stubs = FALSE), dapply(m, baselag, 2)) expect_equal(flag(mNA, stubs = FALSE), dapply(mNA, baselag)) expect_equal(flag(mNA, 2, stubs = FALSE), dapply(mNA, baselag, 2)) expect_equal(flag(num_vars(data), stubs = FALSE), dapply(num_vars(data), baselag)) expect_equal(flag(num_vars(data), 2, stubs = FALSE), dapply(num_vars(data), baselag, 2)) expect_equal(flag(num_vars(dataNA), stubs = FALSE), dapply(num_vars(dataNA), baselag)) expect_equal(flag(num_vars(dataNA), 2, stubs = FALSE), dapply(num_vars(dataNA), baselag, 2)) expect_equal(flag(x, 1, f), BY(x, f, baselag, use.g.names = FALSE)) expect_equal(flag(x, 2, f), BY(x, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(xNA, 1, f), BY(xNA, f, baselag, use.g.names = FALSE)) expect_equal(flag(xNA, 2, f), BY(xNA, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(m, 1, g, stubs = FALSE), BY(m, g, baselag, use.g.names = FALSE)) expect_equal(flag(m, 2, g, stubs = FALSE), BY(m, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(mNA, 1, g, stubs = FALSE), BY(mNA, g, baselag, use.g.names = FALSE)) expect_equal(flag(mNA, 2, g, stubs = FALSE), BY(mNA, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 1, g, stubs = FALSE), BY(num_vars(data), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 2, g, stubs = FALSE), BY(num_vars(data), g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 1, g, stubs = FALSE), BY(num_vars(dataNA), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 2, g, stubs = FALSE), BY(num_vars(dataNA), g, baselag, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-lags !! expect_equal(flag(x, 1, f, t), BY(x, f, baselag, use.g.names = FALSE)) expect_equal(flag(x, 2, f, t), BY(x, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(xNA, 1, f, t), BY(xNA, f, baselag, use.g.names = FALSE)) expect_equal(flag(xNA, 2, f, t), BY(xNA, f, baselag, 2, use.g.names = FALSE)) expect_equal(flag(m, 1, g, td, stubs = FALSE), BY(m, g, baselag, use.g.names = FALSE)) expect_equal(flag(m, 2, g, td, stubs = FALSE), BY(m, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(mNA, 1, g, td, stubs = FALSE), BY(mNA, g, baselag, use.g.names = FALSE)) expect_equal(flag(mNA, 2, g, td, stubs = FALSE), BY(mNA, g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 1, g, td, stubs = FALSE), BY(num_vars(data), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(data), 2, g, td, stubs = FALSE), BY(num_vars(data), g, baselag, 2, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 1, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselag, use.g.names = FALSE)) expect_equal(flag(num_vars(dataNA), 2, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselag, 2, use.g.names = FALSE)) }) test_that("flag performs (panel-) vector lags and leads without errors", { expect_visible(flag(1:10, -2:2)) expect_visible(flag(1:10, 1:2)) expect_visible(flag(1:10, -1:-2)) expect_visible(flag(1:10, 0)) expect_visible(flag(xNA, -2:2)) expect_visible(flag(xNA, 1:2)) expect_visible(flag(xNA, -1:-2)) expect_visible(flag(xNA, 0)) expect_visible(flag(xNA, -2:2, f)) expect_visible(flag(xNA, 1:2, f)) expect_visible(flag(xNA, -1:-2, f)) expect_visible(flag(xNA, 0, f)) expect_visible(flag(xNA, -2:2, f, t)) expect_visible(flag(xNA, 1:2, f, t)) expect_visible(flag(xNA, -1:-2, f, t)) expect_visible(flag(xNA, 0, f, t)) }) test_that("flag performs (panel-) matrix lags and leads without errors", { expect_visible(flag(m, -2:2)) expect_visible(flag(m, 1:2)) expect_visible(flag(m, -1:-2)) expect_visible(flag(m, 0)) expect_visible(flag(m, -2:2, g)) expect_visible(flag(m, 1:2, g)) expect_visible(flag(m, -1:-2, g)) expect_visible(flag(m, 0, g)) expect_visible(flag(m, -2:2, g, td)) expect_visible(flag(m, 1:2, g, td)) expect_visible(flag(m, -1:-2, g, td)) expect_visible(flag(m, 0, g, td)) }) test_that("flag performs (panel-) data.frame lags and leads without errors", { expect_visible(flag(data, -2:2)) expect_visible(flag(data, 1:2)) expect_visible(flag(data, -1:-2)) expect_visible(flag(data, 0)) expect_visible(flag(data, -2:2, g)) expect_visible(flag(data, 1:2, g)) expect_visible(flag(data, -1:-2, g)) expect_visible(flag(data, 0, g)) expect_visible(flag(data, -2:2, g, td)) expect_visible(flag(data, 1:2, g, td)) expect_visible(flag(data, -1:-2, g, td)) expect_visible(flag(data, 0, g, td)) }) test_that("flag correctly handles unordered time-series and panel-series computations", { expect_equal(flag(x, -2:2, t = 1:100), flag(x, -2:2)) expect_equal(flag(xNA, -2:2, t = 1:100), flag(xNA, -2:2)) expect_equal(flag(m, -2:2, t = seq_along(td)), flag(m, -2:2)) expect_equal(flag(data, -2:2, t = seq_along(td)), flag(data, -2:2)) expect_equal(flag(xuo, -2:2, t = t2uo)[o,], unclass(flag(x, -2:2))) expect_equal(flag(xNAuo, -2:2, t = t2uo)[o,], unclass(flag(xNA, -2:2))) expect_equal(flag(muo, -2:2, t = t2duo)[od,], unclass(flag(m, -2:2))) expect_equal(flag(datauo, -2:2, t = t2duo)[od,], flag(data, -2:2)) expect_equal(flag(xuo, -2:2, fuo, tuo)[o,], unclass(flag(x, -2:2, f, t))) expect_equal(flag(xNAuo, -2:2, fuo, tuo)[o,], unclass(flag(xNA, -2:2, f, t))) expect_equal(flag(muo, -2:2, guo, tduo)[od,], unclass(flag(m, -2:2, g, td))) expect_equal(flag(datauo, -2:2, guo, tduo)[od,], flag(data, -2:2, g, td)) }) test_that("flag performs numerically stable in ordered computations", { expect_true(all_obj_equal(replicate(50, flag(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(x, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(x, -2:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNA, 1, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNA, -2:2, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(m, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(m, -2:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(mNA, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(mNA, -2:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(data, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(data, -2:2, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(dataNA, 1, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(dataNA, -2:2, g), simplify = FALSE))) }) test_that("flag performs numerically stable in unordered computations", { expect_true(all_obj_equal(replicate(50, flag(xuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xNAuo, t = t2uo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(muo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(datauo, t = t2duo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xuo, 1, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(xuo, -2:2, fuo, tuo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(muo, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(muo, -2:2, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(datauo, 1, guo, tduo), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, flag(datauo, -2:2, guo, tduo), simplify = FALSE))) }) test_that("flag handles special values in the right way", { # zero expect_equal(flag(c("a","b"),0), c("a","b")) expect_equal(flag(c(NaN,NaN),0), c(NaN,NaN)) expect_equal(flag(c(Inf,Inf),0), c(Inf,Inf)) expect_equal(flag(c(FALSE,TRUE),0), c(FALSE,TRUE)) expect_equal(flag(c(TRUE,FALSE),0), c(TRUE,FALSE)) # lags expect_equal(flag(c("a","b")), c(NA,"a")) expect_equal(flag(c(1,NA)), c(NA_real_,1)) expect_equal(flag(c(NA,1)), c(NA_real_,NA_real_)) expect_equal(flag(c(NaN,1)), c(NA_real_,NaN)) expect_equal(flag(c(1,NaN)), c(NA_real_,1)) expect_equal(flag(c(Inf,1)), c(NA,Inf)) expect_equal(flag(c(1,Inf)), c(NA,1)) expect_equal(flag(c(Inf,NA)), c(NA_real_,Inf)) expect_equal(flag(c(NA,Inf)), c(NA_real_,NA_real_)) expect_equal(flag(c(Inf,-Inf)), c(NA,Inf)) expect_equal(flag(c(-Inf,Inf)), c(NA,-Inf)) expect_equal(flag(c(Inf,Inf)), c(NA,Inf)) expect_equal(flag(c(TRUE,TRUE)), c(NA,TRUE)) expect_equal(flag(c(TRUE,FALSE)), c(NA,TRUE)) expect_equal(flag(c(FALSE,TRUE)), c(NA,FALSE)) # leads expect_equal(flag(c("a","b"),-1), c("b",NA)) expect_equal(flag(c(1,NA),-1), c(NA_real_,NA_real_)) expect_equal(flag(c(NA,1),-1), c(1,NA_real_)) expect_equal(flag(c(NaN,1),-1), c(1,NA_real_)) expect_equal(flag(c(1,NaN),-1), c(NaN,NA_real_)) expect_equal(flag(c(Inf,1),-1), c(1,NA_real_)) expect_equal(flag(c(1,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(Inf,NA),-1), c(NA_real_,NA_real_)) expect_equal(flag(c(NA,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(Inf,-Inf),-1), c(-Inf,NA_real_)) expect_equal(flag(c(-Inf,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(Inf,Inf),-1), c(Inf,NA_real_)) expect_equal(flag(c(TRUE,TRUE),-1), c(TRUE,NA)) expect_equal(flag(c(TRUE,FALSE),-1), c(FALSE,NA)) expect_equal(flag(c(FALSE,TRUE),-1), c(TRUE,NA)) }) test_that("flag produces errors for wrong input", { # type: normally guaranteed by C++ expect_visible(flag(mNAc)) expect_visible(flag(wlddev)) expect_error(flag(mNAc, f)) expect_error(flag(x, "1")) # if n exceeds length(x), should give error expect_error(flag(x,101)) expect_error(flag(x,-101)) # if n exceeds average group size, should give error # expect_warning(flag(x,11,f)) # Some fail on i386 ?? # expect_warning(flag(x,11,f,t)) # expect_warning(flag(x,-11,f)) # expect_warning(flag(x,-11,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(flag(x,c(1,1))) expect_error(flag(x,c(-1,-1))) expect_visible(flag(x,2:1)) expect_visible(flag(x,0)) expect_error(flag(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(flag(x,c(1,1),f)) expect_error(flag(x,c(1,1),f,t)) expect_visible(flag(x,2:1,f)) expect_visible(flag(x,2:1,f,t)) expect_visible(flag(x,0,f)) expect_visible(flag(x,0,f,t)) expect_error(flag(x,1,1)) # wrong inputs: passing a non-existent difference argument.. expect_error(flag(x,1,1)) expect_error(flag(x,1,1,f)) expect_error(flag(x,1,1,f,t)) expect_error(flag(x,1,-1,f)) expect_error(flag(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(flag(1:3, t = c(1,1,2))) expect_error(flag(1:3, t = c(1,2,2))) expect_error(flag(1:3, t = c(1,2,1))) expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(flag(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(flag(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(flag(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(flag(1:3, t = 1:2)) expect_error(flag(1:3, t = 1:4)) expect_error(flag(1:3, g = 1:2)) expect_error(flag(1:3, g = 1:4)) expect_error(flag(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(flag(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) # L and F F <- getNamespace("collapse")$F test_that("F performs like baselead", { expect_equal(F(1:10, -1), baselead(1:10)) expect_equal(F(1:10, -2), baselead(1:10, 2)) expect_equal(F(-1:1, -1), baselead(-1:1)) expect_equal(F(x, -1), baselead(x)) expect_equal(F(x, -2), baselead(x, 2)) expect_equal(F(xNA, -1), baselead(xNA)) expect_equal(F(xNA, -2), baselead(xNA, 2)) expect_equal(F(m, -1, stubs = FALSE), dapply(m, baselead)) expect_equal(F(m, -2, stubs = FALSE), dapply(m, baselead, 2)) expect_equal(F(mNA, -1, stubs = FALSE), dapply(mNA, baselead)) expect_equal(F(mNA, -2, stubs = FALSE), dapply(mNA, baselead, 2)) expect_equal(F(num_vars(data), -1, stubs = FALSE), dapply(num_vars(data), baselead)) expect_equal(F(num_vars(data), -2, stubs = FALSE), dapply(num_vars(data), baselead, 2)) expect_equal(F(num_vars(dataNA), -1, stubs = FALSE), dapply(num_vars(dataNA), baselead)) expect_equal(F(num_vars(dataNA), -2, stubs = FALSE), dapply(num_vars(dataNA), baselead, 2)) expect_equal(F(x, -1, f), BY(x, f, baselead, use.g.names = FALSE)) expect_equal(F(x, -2, f), BY(x, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(xNA, -1, f), BY(xNA, f, baselead, use.g.names = FALSE)) expect_equal(F(xNA, -2, f), BY(xNA, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(m, -1, g, stubs = FALSE), BY(m, g, baselead, use.g.names = FALSE)) expect_equal(F(m, -2, g, stubs = FALSE), BY(m, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(mNA, -1, g, stubs = FALSE), BY(mNA, g, baselead, use.g.names = FALSE)) expect_equal(F(mNA, -2, g, stubs = FALSE), BY(mNA, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(data), -1, g, stubs = FALSE), BY(num_vars(data), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(data), -2, g, stubs = FALSE), BY(num_vars(data), g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -1, g, stubs = FALSE), BY(num_vars(dataNA), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -2, g, stubs = FALSE), BY(num_vars(dataNA), g, baselead, 2, use.g.names = FALSE)) # Adding time-variable: Computing fully identified panel-lags !! expect_equal(F(x, -1, f, t), BY(x, f, baselead, use.g.names = FALSE)) expect_equal(F(x, -2, f, t), BY(x, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(xNA, -1, f, t), BY(xNA, f, baselead, use.g.names = FALSE)) expect_equal(F(xNA, -2, f, t), BY(xNA, f, baselead, 2, use.g.names = FALSE)) expect_equal(F(m, -1, g, td, stubs = FALSE), BY(m, g, baselead, use.g.names = FALSE)) expect_equal(F(m, -2, g, td, stubs = FALSE), BY(m, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(mNA, -1, g, td, stubs = FALSE), BY(mNA, g, baselead, use.g.names = FALSE)) expect_equal(F(mNA, -2, g, td, stubs = FALSE), BY(mNA, g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(data), -1, g, td, stubs = FALSE), BY(num_vars(data), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(data), -2, g, td, stubs = FALSE), BY(num_vars(data), g, baselead, 2, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -1, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselead, use.g.names = FALSE)) expect_equal(F(num_vars(dataNA), -2, g, td, stubs = FALSE), BY(num_vars(dataNA), g, baselead, 2, use.g.names = FALSE)) }) test_that("L and F do the opposite of one another", { expect_equal(L(1:10, -2:2), F(1:10, 2:-2)) expect_equal(L(m, -2:2), F(m, 2:-2)) expect_equal(L(data, -2:2), F(data, 2:-2)) }) test_that("L produces errors for wrong input", { # type: normally guaranteed by C++ expect_visible(L(mNAc)) expect_visible(L(wlddev)) expect_error(L(mNAc, f)) expect_error(L(x, "1")) # if n exceeds length(x), should give error expect_error(L(x,101)) expect_error(L(x,-101)) # if n exceeds average group size, should give error # expect_warning(L(x,11,f)) -> some fail on i336 # expect_warning(L(x,11,f,t)) # expect_warning(L(x,-11,f)) # expect_warning(L(x,-11,f,t)) # passing repeated n-values or non-positive or non-consecutive diff values should give error expect_error(L(x,c(1,1))) expect_error(L(x,c(-1,-1))) expect_visible(L(x,2:1)) expect_visible(L(x,0)) expect_error(L(x,f)) # common source of error probably is passing the factor in a wrong slot expect_error(L(x,c(1,1),f)) expect_error(L(x,c(1,1),f,t)) expect_visible(L(x,2:1,f)) expect_visible(L(x,2:1,f,t)) expect_visible(L(x,0,f)) expect_visible(L(x,0,f,t)) expect_error(L(x,1,1)) # wrong inputs: passing a non-existent difference argument.. expect_error(L(x,1,1)) expect_error(L(x,1,1,f)) expect_error(L(x,1,1,f,t)) expect_error(L(x,1,-1,f)) expect_error(L(x,1,-1,f,t)) # repeated values or gaps in time-variable should give error expect_error(L(1:3, t = c(1,1,2))) expect_error(L(1:3, t = c(1,2,2))) expect_error(L(1:3, t = c(1,2,1))) expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,2,1:3,1:4))) # This is the only possible statement which does not throw a reteated timevar error because the first C++ index is 0, and omap is also initialized with 0's. expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,1,1,1:3,1:4))) expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1,1,3,4))) expect_error(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,2,1:3,1:4))) expect_visible(L(1:10, g = c(1,1,1,2,2,2,3,3,3,3), t = c(1,2,4,1:3,1:4))) expect_error(L(1:10, g = c(1,2,1,2,2,2,3,3,3,3), t = c(1:3,1:3,1:4))) expect_visible(L(1:10, g = c(1,1,1,2,2,2,3,3,4,3), t = c(1:3,1:3,1:4))) # The usual stuff: Wrongly sized grouping vectors or time-variables expect_error(L(1:3, t = 1:2)) expect_error(L(1:3, t = 1:4)) expect_error(L(1:3, g = 1:2)) expect_error(L(1:3, g = 1:4)) expect_error(L(1:4, g = c(1,1,2,2), t = c(1,2,1))) expect_error(L(1:4, g = c(1,2,2), t = c(1,2,1,2))) }) test_that("L.data.frame method is foolproof", { expect_visible(L(wlddev)) expect_visible(L(wlddev, by = wlddev$iso3c)) expect_error(L(wlddev, t = ~year)) expect_visible(L(wlddev, 1, wlddev$iso3c)) expect_visible(L(wlddev, -2:2, ~iso3c)) expect_visible(L(wlddev, 1, ~iso3c + region)) expect_visible(L(wlddev, -2:2, wlddev$iso3c, wlddev$year)) expect_visible(L(wlddev, -2:2, ~iso3c, ~year)) expect_visible(L(wlddev, cols = 9:12)) expect_visible(L(wlddev, -1:1,~iso3c, cols = 9:12)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(L(wlddev, -1:1,~iso3c, ~year, cols = 9:12)) expect_visible(L(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, -1:1,~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(L(wlddev, cols = NULL)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, cols = NULL)) expect_visible(L(wlddev, -1:1,~iso3c, cols = NULL)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, wlddev$year, cols = NULL)) expect_visible(L(wlddev, -1:1,~iso3c, ~year, cols = NULL)) expect_error(L(wlddev, cols = 9:14)) expect_error(L(wlddev, -1:1,~iso3c, ~year, cols = 9:14)) expect_error(L(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(L(wlddev, -1:1,~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_warning(L(wlddev, w = 4)) expect_warning(L(wlddev, g = 4)) expect_error(L(wlddev, t = "year")) expect_error(L(wlddev, by = ~year2)) expect_error(L(wlddev, t = ~year + region)) expect_error(L(wlddev, data)) expect_error(L(wlddev, -1:1,"iso3c")) expect_error(L(wlddev, -1:1,~iso3c2)) expect_error(L(wlddev, -1:1,~iso3c + bla)) expect_error(L(wlddev, -1:1,t = rnorm(30))) expect_error(L(wlddev, -1:1,by = rnorm(30))) expect_error(L(wlddev, -1:1,mtcars$mpg, 1:29)) expect_error(L(wlddev, -1:1,mtcars$mpg, mtcars$cyl)) # this gives a repeated values error first because length(g) == length(t) expect_error(L(wlddev,-1:1, ~iso3c2, ~year2)) expect_error(L(wlddev, cols = ~bla)) expect_visible(L(wlddev, -1:1,wlddev$iso3c, ~year, cols = 9:12)) expect_visible(L(wlddev, -1:1,~iso3c, wlddev$year, cols = 9:12)) expect_error(L(wlddev, -1:1,wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(L(wlddev, -1:1,~iso3c3, ~year, cols = 9:12)) expect_error(L(wlddev, cols = c("PC3GDP","LIFEEX"))) }) collapse/tests/testthat/test-collapse-options.R0000644000176200001440000000342115202504542021450 0ustar liggesuserscontext("collapse options") test_that("get_collapse and set_collapse round-trip", { old <- get_collapse() on.exit(set_collapse(old), add = TRUE) set_collapse(na.rm = FALSE, sort = FALSE, digits = 5, verbose = 0, stub = FALSE) expect_equal(get_collapse("na.rm"), FALSE) expect_equal(get_collapse("sort"), FALSE) expect_equal(get_collapse(c("digits", "verbose", "stub")), stats::setNames(list(5L, 0L, FALSE), c("digits", "verbose", "stub"))) set_collapse(list(na.rm = old$na.rm, sort = old$sort, digits = old$digits, verbose = old$verbose, stub = old$stub)) }) test_that("set_collapse validates options", { old <- get_collapse() on.exit(set_collapse(old), add = TRUE) expect_error(set_collapse(nthreads = 0)) expect_error(set_collapse(na.rm = NA)) expect_error(set_collapse(sort = NA)) expect_error(set_collapse(digits = -1L)) expect_error(set_collapse(verbose = -1L)) expect_error(set_collapse(unknown = TRUE)) }) test_that("set_collapse stable.algo round-trip", { old <- get_collapse() on.exit(set_collapse(old), add = TRUE) set_collapse(stable.algo = FALSE) expect_equal(get_collapse("stable.algo"), FALSE) set_collapse(stable.algo = old$stable.algo) }) test_that("set_collapse returns previous options invisibly", { old <- get_collapse() prev <- set_collapse(verbose = old$verbose) expect_equal(prev, old) set_collapse(old) }) test_that("set_collapse mask manip smoke test", { old <- get_collapse() on.exit({ set_collapse(mask = old$mask, remove = old$remove) }, add = TRUE) if(length(old$mask)) set_collapse(mask = NULL, remove = NULL) set_collapse(mask = "manip") expect_equal(get_collapse("mask"), "manip") expect_true(exists("fselect", where = asNamespace("collapse"), inherits = FALSE)) }) collapse/tests/testthat/test-attribute-handling.R0000644000176200001440000007215714777170131021767 0ustar liggesuserscontext("Attribute Handling") v <- wlddev$PCGDP date <- wlddev$date fac <- wlddev$region g1 <- GRP(wlddev$country) m <- qM(mtcars) gmtc <- fgroup_by(mtcars, cyl, vs, am) gm <- qM(gmtc, keep.attr = TRUE) g2 <- GRP(mtcars, ~ cyl + vs + am) # gDTmtc <- fgroup_by(qDT(mtcars), cyl, vs, am) set.seed(101) f1 <- sample.int(5, length(AirPassengers), replace = TRUE) f2 <- sample.int(5, nrow(EuStockMarkets), replace = TRUE) # numFUN <- setdiff(.FAST_STAT_FUN, c("fnth", "fmode", "ffirst", "flast", "fmin", "fmax")) countFUN <- c("fnobs", "fndistinct") F <- getNamespace("collapse")$F test_that("statistical functions handle attributes properly", { for(i in setdiff(.FAST_STAT_FUN, "fnth")) { # print(i) FUN <- match.fun(i) if(i %!in% c("fvar", "fsd", countFUN)) { expect_identical(attributes(FUN(v)), attributes(v)) expect_identical(attributes(FUN(date)), attributes(date)) } if(i %!in% c("fsum", "fvar", "fsd", countFUN)) expect_identical(attributes(FUN(fac)), attributes(fac)) if(i != "fmode") expect_true(is.null(attributes(FUN(AirPassengers)))) expect_identical(attributes(FUN(EuStockMarkets)), list(names = colnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, drop = FALSE)), list(dim = c(1L, 4L), dimnames = list(NULL, colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m)), list(names = colnames(m))) expect_identical(attributes(FUN(m, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(gm)), list(names = colnames(m))) expect_identical(attributes(FUN(gm, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(mtcars)), list(names = names(mtcars))) expect_identical(attributes(FUN(mtcars, drop = FALSE)), `[[<-`(attributes(mtcars), "row.names", 1L)) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"))), list(names = names(mtcars))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), drop = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", 1L), "class", "data.frame")) # Grouped expect_identical(attributes(FUN(v, g1, use.g.names = FALSE)), attributes(v)) expect_identical(attributes(FUN(v, g1)), c(attributes(v), list(names = unattrib(GRPnames(g1))))) expect_identical(attributes(FUN(date, g1, use.g.names = FALSE)), if(i %!in% countFUN) attributes(date) else list(label = vlabels(date))) expect_identical(attributes(FUN(date, g1)), if(i %!in% countFUN) c(attributes(date), list(names = unattrib(GRPnames(g1)))) else list(label = vlabels(date), names = unattrib(GRPnames(g1)))) expect_identical(attributes(FUN(fac, g1, use.g.names = FALSE)), if(i %!in% countFUN) attributes(fac) else list(label = vlabels(fac))) expect_identical(attributes(FUN(fac, g1)), if(i %!in% countFUN) c(attributes(fac), list(names = unattrib(GRPnames(g1)))) else list(label = vlabels(fac), names = unattrib(GRPnames(g1)))) if(i != "fmode") expect_identical(attributes(FUN(AirPassengers, f1, use.g.names = FALSE)), NULL) if(i != "fmode") expect_identical(attributes(FUN(AirPassengers, f1)), list(names = as.character(1:5))) expect_identical(attributes(FUN(EuStockMarkets, f2, use.g.names = FALSE)), list(dim = c(5L, 4L), dimnames = dimnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, f2)), list(dim = c(5L, 4L), dimnames = list(as.character(1:5), colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m, g2, use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(m, g2)), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)))) expect_identical(attributes(FUN(gm, attr(gm, "groups"), use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(gm, attr(gm, "groups"))), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)), groups = attr(gm, "groups"))) if(Sys.getenv("NCRAN") == "TRUE") { expect_identical(attributes(FUN(mtcars, g2, use.g.names = FALSE)), `[[<-`(attributes(mtcars), "row.names", value = seq_len(g2[[1L]]))) expect_identical(attributes(FUN(mtcars, g2)), `[[<-`(attributes(mtcars), "row.names", value = GRPnames(g2))) } expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2, use.g.names = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", value = seq_len(g2[[1L]])), "class", "data.frame")) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", GRPnames(g2)), "class", "data.frame")) expect_identical(attributes(FUN(gmtc)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, use.g.names = TRUE)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = GRPnames(g2), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE)), list(names = names(mtcars)[-c(2,8:9)], row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, use.g.names = TRUE)), list(names = names(mtcars)[-c(2,8:9)], row.names = GRPnames(g2), class = "data.frame")) } for(i in c("fmode", "ffirst", "flast")) { # print(i) FUN <- match.fun(i) for(k in names(wlddev)) expect_identical(attributes(FUN(wlddev[[k]])), attributes(wlddev[[k]])) if(i != "fmode") expect_identical(attributes(FUN(AirPassengers)), NULL) expect_identical(attributes(FUN(EuStockMarkets)), list(names = colnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, drop = FALSE)), list(dim = c(1L, 4L), dimnames = list(NULL, colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m)), list(names = colnames(m))) expect_identical(attributes(FUN(m, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(gm)), list(names = colnames(m))) expect_identical(attributes(FUN(gm, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(wlddev)), list(names = names(wlddev))) expect_identical(attributes(FUN(wlddev, drop = FALSE)), `[[<-`(attributes(wlddev), "row.names", 1L)) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"))), list(names = names(mtcars))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), drop = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", 1L), "class", "data.frame")) # Grouped for(k in names(wlddev)) expect_identical(attributes(FUN(wlddev[[k]], g1, use.g.names = FALSE)), attributes(wlddev[[k]])) for(k in names(wlddev)) expect_identical(attributes(FUN(wlddev[[k]], g1)), c(attributes(wlddev[[k]]), list(names = unattrib(GRPnames(g1))))) if(i != "fmode") expect_identical(attributes(FUN(AirPassengers, f1, use.g.names = FALSE)), NULL) if(i != "fmode") expect_identical(attributes(FUN(AirPassengers, f1)), list(names = as.character(1:5))) expect_identical(attributes(FUN(EuStockMarkets, f2, use.g.names = FALSE)), list(dim = c(5L, 4L), dimnames = dimnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, f2)), list(dim = c(5L, 4L), dimnames = list(as.character(1:5), colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m, g2, use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(m, g2)), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)))) expect_identical(attributes(FUN(gm, attr(gm, "groups"), use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(gm, attr(gm, "groups"))), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(wlddev, g1, use.g.names = FALSE)), `[[<-`(attributes(wlddev), "row.names", value = seq_len(g1[[1L]]))) expect_identical(attributes(FUN(wlddev, g1)), `[[<-`(attributes(wlddev), "row.names", value = GRPnames(g1))) if(Sys.getenv("NCRAN") == "TRUE") { expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2, use.g.names = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", value = seq_len(g2[[1L]])), "class", "data.frame")) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", GRPnames(g2)), "class", "data.frame")) } expect_identical(attributes(FUN(gmtc)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, use.g.names = TRUE)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = GRPnames(g2), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE)), list(names = names(mtcars)[-c(2,8:9)], row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, use.g.names = TRUE)), list(names = names(mtcars)[-c(2,8:9)], row.names = GRPnames(g2), class = "data.frame")) } for(i in c("fmin", "fmax")) { # print(i) FUN <- match.fun(i) for(k in num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]])), attributes(wlddev[[k]])) expect_identical(attributes(FUN(AirPassengers)), NULL) expect_identical(attributes(FUN(EuStockMarkets)), list(names = colnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, drop = FALSE)), list(dim = c(1L, 4L), dimnames = list(NULL, colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m)), list(names = colnames(m))) expect_identical(attributes(FUN(m, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(gm)), list(names = colnames(m))) expect_identical(attributes(FUN(gm, drop = FALSE)), list(dim = c(1L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(nv(wlddev))), list(names = nv(wlddev, "names"))) expect_identical(attributes(FUN(nv(wlddev), drop = FALSE)), `[[<-`(attributes(nv(wlddev)), "row.names", 1L)) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"))), list(names = names(mtcars))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), drop = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", 1L), "class", "data.frame")) # Grouped for(k in num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]], g1, use.g.names = FALSE)), attributes(wlddev[[k]])) for(k in num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]], g1)), c(attributes(wlddev[[k]]), list(names = unattrib(GRPnames(g1))))) expect_identical(attributes(FUN(AirPassengers, f1, use.g.names = FALSE)), NULL) expect_identical(attributes(FUN(AirPassengers, f1)), list(names = as.character(1:5))) expect_identical(attributes(FUN(EuStockMarkets, f2, use.g.names = FALSE)), list(dim = c(5L, 4L), dimnames = dimnames(EuStockMarkets))) expect_identical(attributes(FUN(EuStockMarkets, f2)), list(dim = c(5L, 4L), dimnames = list(as.character(1:5), colnames(EuStockMarkets)))) expect_identical(attributes(FUN(m, g2, use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)))) expect_identical(attributes(FUN(m, g2)), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)))) expect_identical(attributes(FUN(gm, attr(gm, "groups"), use.g.names = FALSE)), list(dim = c(7L, 11L), dimnames = list(NULL, colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(gm, attr(gm, "groups"))), list(dim = c(7L, 11L), dimnames = list(GRPnames(g2), colnames(m)), groups = attr(gm, "groups"))) expect_identical(attributes(FUN(nv(wlddev), g1, use.g.names = FALSE)), `[[<-`(attributes(nv(wlddev)), "row.names", value = seq_len(g1[[1L]]))) expect_identical(attributes(FUN(nv(wlddev), g1)), `[[<-`(attributes(nv(wlddev)), "row.names", value = GRPnames(g1))) if(Sys.getenv("NCRAN") == "TRUE") { expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2, use.g.names = FALSE)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", value = seq_len(g2[[1L]])), "class", "data.frame")) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g2)), `[[<-`(`[[<-`(attributes(gmtc), "row.names", GRPnames(g2)), "class", "data.frame")) } expect_identical(attributes(FUN(gmtc)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, use.g.names = TRUE)), list(names = c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2,8:9)]), row.names = GRPnames(g2), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE)), list(names = names(mtcars)[-c(2,8:9)], row.names = seq_len(g2[[1L]]), class = "data.frame")) expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, use.g.names = TRUE)), list(names = names(mtcars)[-c(2,8:9)], row.names = GRPnames(g2), class = "data.frame")) } }) transFUN <- setdiff(c(.FAST_FUN, .OPERATOR_FUN), c(.FAST_STAT_FUN, "fhdbetween", "fhdwithin", "HDB", "HDW")) options(collapse_unused_arg_action = "none", warn = -1) test_that("preservation of difftime (and related classes)", { v <- diff(wlddev$date) av <- attributes(v) v <- c(NA, v) attributes(v) <- av vd <- qDT(v) g <- group(wlddev$iso3c) w <- abs(rnorm(length(v))) + 5 for(i in setdiff(.FAST_STAT_FUN, c("fnobs", "fndistinct"))) { # print(i) FUN <- match.fun(i) for(t in list(NULL, "replace_fill")) { expect_identical(attributes(FUN(v, TRA = t)), av) expect_identical(attributes(FUN(v, g = g, use.g.names = FALSE, TRA = t)), av) expect_identical(attributes(FUN(v, na.rm = FALSE, TRA = t)), av) expect_identical(attributes(FUN(v, g = g, na.rm = FALSE, use.g.names = FALSE, TRA = t)), av) expect_identical(attributes(FUN(vd, drop = FALSE, TRA = t)[[1L]]), av) expect_identical(attributes(FUN(vd, g = g, use.g.names = FALSE, TRA = t)[[1L]]), av) expect_identical(attributes(FUN(vd, drop = FALSE, na.rm = FALSE, TRA = t)[[1L]]), av) expect_identical(attributes(FUN(vd, g = g, na.rm = FALSE, use.g.names = FALSE, TRA = t)[[1L]]), av) if(i %in% c("fsum", "fprod", "fmean", "fmedian", "fnth", "fmode", "fvar", "fsd")) { expect_identical(attributes(FUN(v, w = w, TRA = t)), av) expect_identical(attributes(FUN(v, g = g, w = w, use.g.names = FALSE, TRA = t)), av) expect_identical(attributes(FUN(v, w = w, na.rm = FALSE, TRA = t)), av) expect_identical(attributes(FUN(v, g = g, w = w, na.rm = FALSE, use.g.names = FALSE, TRA = t)), av) expect_identical(attributes(FUN(vd, drop = FALSE, w = w, TRA = t)[[1L]]), av) expect_identical(attributes(FUN(vd, g = g, w = w, use.g.names = FALSE, TRA = t)[[1L]]), av) expect_identical(attributes(FUN(vd, drop = FALSE, w = w, na.rm = FALSE, TRA = t)[[1L]]), av) expect_identical(attributes(FUN(vd, g = g, w = w, na.rm = FALSE, use.g.names = FALSE, TRA = t)[[1L]]), av) } } } for(i in c("fnobs", "fndistinct")) { FUN <- match.fun(i) for(t in list(NULL, "replace_fill")) { expect_false(identical(attributes(FUN(v, TRA = t)), av)) expect_false(identical(attributes(FUN(v, g, use.g.names = FALSE, TRA = t)), av)) expect_false(identical(attributes(FUN(vd, TRA = t)[[1L]]), av)) expect_false(identical(attributes(FUN(vd, g, use.g.names = FALSE, TRA = t)[[1L]]), av)) } } for(i in setdiff(c(.FAST_FUN, .OPERATOR_FUN), c(.FAST_STAT_FUN, "fhdbetween", "HDB", "fhdwithin", "HDW", "Dlog"))) { # print(i) FUN <- match.fun(i) expect_identical(attributes(FUN(v)), av) expect_identical(attributes(FUN(v, g = g, by = g)), av) expect_identical(attributes(FUN(vd, cols = NULL)[[1L]]), av) expect_identical(attributes(FUN(vd, g = g, by = g, cols = NULL)[[1L]]), av) if(i %in% c("fscale", "STD", "fbetween", "B", "fwithin", "W")) { expect_identical(attributes(FUN(v, w = w)), av) expect_identical(attributes(FUN(v, g = g, by = g, w = w)), av) expect_identical(attributes(FUN(vd, w = w, cols = NULL)[[1L]]), av) expect_identical(attributes(FUN(vd, g = g, by = g, w = w, cols = NULL)[[1L]]), av) } } }) test_that("transformation functions preserve all attributes", { for(i in transFUN) { # print(i) FUN <- match.fun(i) for(k in if(i %in% c("flag","L","F")) names(wlddev) else num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]])), attributes(wlddev[[k]])) expect_identical(attributes(FUN(AirPassengers)), attributes(AirPassengers)) expect_identical(attributes(FUN(EuStockMarkets, stubs = FALSE, stub = FALSE)), attributes(EuStockMarkets)) expect_identical(attributes(FUN(m, stubs = FALSE, stub = FALSE)), attributes(m)) expect_identical(attributes(FUN(gm, stubs = FALSE, stub = FALSE)), attributes(gm)) expect_identical(attributes(FUN(if(i == "flag") wlddev else num_vars(wlddev), stubs = FALSE, stub = FALSE)), attributes(if(i == "flag") wlddev else num_vars(wlddev))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), stubs = FALSE, stub = FALSE)), attributes(`oldClass<-`(gmtc, "data.frame"))) # Grouped for(k in if(i == "flag") names(wlddev) else num_vars(wlddev, "names")) expect_identical(attributes(FUN(wlddev[[k]], g = g1)), attributes(wlddev[[k]])) expect_identical(attributes(FUN(AirPassengers, g = f1)), attributes(AirPassengers)) expect_identical(attributes(FUN(EuStockMarkets, g = f2, stubs = FALSE, stub = FALSE)), attributes(EuStockMarkets)) expect_identical(attributes(FUN(m, g = g2, stubs = FALSE, stub = FALSE)), attributes(m)) expect_identical(attributes(FUN(gm, g = attr(gm, "groups"), stubs = FALSE, stub = FALSE)), attributes(gm)) expect_identical(attributes(FUN(if(i == "flag") wlddev else num_vars(wlddev), g = g1, by = g1, stubs = FALSE, stub = FALSE)), attributes(if(i == "flag") wlddev else num_vars(wlddev))) expect_identical(attributes(FUN(`oldClass<-`(gmtc, "data.frame"), g = g2, by = g2, stubs = FALSE, stub = FALSE)), `[[<-`(attributes(gmtc), "class", "data.frame")) expect_identical(attributes(if(i %in% c("B","W", "STD")) FUN(gmtc, stub = FALSE) else FUN(gmtc, stubs = FALSE)), `[[<-`(attributes(gmtc), "names", c(fgroup_vars(gmtc, "names"), names(mtcars)[-c(2L ,8:9)]))) if(i %in% c("fcumsum", "flag", "L", "F", "fdiff", "D", "Dlog", "fgrowth", "G")) expect_identical(attributes(FUN(gmtc, keep.ids = FALSE, stubs = FALSE)), `[[<-`(attributes(gmtc), "names", names(mtcars)[-c(2L ,8:9)])) else expect_identical(attributes(FUN(gmtc, keep.group_vars = FALSE, stub = FALSE)), `[[<-`(attributes(gmtc), "names", names(mtcars)[-c(2L ,8:9)])) } }) options(collapse_unused_arg_action = "warning", warn = 1) test_that("TRA attribute preservation works well", { # Default Vector Method expect_equal(attributes(TRA(AirPassengers, 1, "replace_NA")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "replace_NA"))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, 1, "replace")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "replace"))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, 1, "replace_fill")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "replace_fill"))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, 1, "-")), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, 1L, "-")), attributes(AirPassengers)) # Integer -> Coerced to double in numeric operation set.seed(101) f <- qF(sample.int(5L, length(AirPassengers), TRUE), na.exclude = FALSE) num <- unclass(fmean(AirPassengers, f)); int <- fnobs(AirPassengers, f) expect_equal(attributes(TRA(AirPassengers, num, "replace_NA", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "replace_NA", f))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, num, "replace", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "replace", f))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, num, "replace_fill", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "replace_fill", f))[[1]], tsp(AirPassengers)) # Integer -> Change of type !! expect_equal(attributes(TRA(AirPassengers, num, "-", f)), attributes(AirPassengers)) # Double expect_equal(attributes(TRA(AirPassengers, int, "-", f)), attributes(AirPassengers)) # Integer -> Coerced to double in numeric operation # Matrix Method expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "replace_NA")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "replace_NA"))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "replace")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "replace"))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "replace_fill")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "replace_fill"))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, rep(1, 4L), "-")), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, rep(1L, 4L), "-")), attributes(EuStockMarkets)) # Integer -> Coerced to double in numeric operation set.seed(101) f <- qF(sample.int(5L, nrow(EuStockMarkets), TRUE), na.exclude = FALSE) num <- unclass(fmean(EuStockMarkets, f)); int <- fnobs(EuStockMarkets, f) expect_equal(attributes(TRA(EuStockMarkets, num, "replace_NA", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "replace_NA", f))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, num, "replace", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "replace", f))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, num, "replace_fill", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "replace_fill", f))[["tsp"]], tsp(EuStockMarkets)) # Integer -> Change of type !! expect_equal(attributes(TRA(EuStockMarkets, num, "-", f)), attributes(EuStockMarkets)) # Double expect_equal(attributes(TRA(EuStockMarkets, int, "-", f)), attributes(EuStockMarkets)) # Integer -> Coerced to double in numeric operation # Data Frame Method # CATEGORICAL # Simple expect_equal(vclasses(unattrib(fndistinct(wlddev, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fndistinct(wlddev, TRA = "replace"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, TRA = "replace"))), rep("integer", 13L)) expect_equal(lapply(ffirst(wlddev, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(ffirst(wlddev, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, TRA = "replace"), attributes), lapply(wlddev, attributes)) # Grouped expect_equal(vclasses(unattrib(fndistinct(wlddev, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fndistinct(wlddev, wlddev$iso3c, TRA = "replace"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", 13L)) expect_equal(vclasses(unattrib(fnobs(wlddev, wlddev$iso3c, TRA = "replace"))), rep("integer", 13L)) expect_equal(lapply(ffirst(wlddev, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(ffirst(wlddev, wlddev$iso3c, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(flast(wlddev, wlddev$iso3c, TRA = "replace"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(wlddev, attributes)) expect_equal(lapply(fmode(wlddev, wlddev$iso3c, TRA = "replace"), attributes), lapply(wlddev, attributes)) # Numeric nwld <- num_vars(wlddev) # Simple expect_equal(vclasses(fndistinct(nwld, TRA = "replace_NA")), vclasses(nwld)) expect_equal(vclasses(unattrib(fndistinct(nwld, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, TRA = "-"))), rep("numeric", fncol(nwld))) expect_equal(vclasses(fnobs(nwld, TRA = "replace_NA")), vclasses(nwld)) expect_equal(vclasses(unattrib(fnobs(nwld, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, TRA = "%%"))), rep("numeric", fncol(nwld))) expect_equal(lapply(fmean(nwld, TRA = "replace_NA"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, TRA = "+"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "replace_NA"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, TRA = "/"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "replace_NA"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, TRA = "-"), attributes), lapply(nwld, attributes)) # Grouped expect_equal(vclasses(fndistinct(nwld, wlddev$iso3c, TRA = "replace_NA")), vclasses(nwld)) expect_equal(vclasses(unattrib(fndistinct(nwld, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, wlddev$iso3c, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fndistinct(nwld, wlddev$iso3c, TRA = "-%%"))), rep("numeric", fncol(nwld))) expect_equal(vclasses(fnobs(nwld, wlddev$iso3c, TRA = "replace_NA")), vclasses(nwld)) expect_equal(vclasses(unattrib(fnobs(nwld, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, wlddev$iso3c, TRA = "replace"))), rep("integer", fncol(nwld))) expect_equal(vclasses(unattrib(fnobs(nwld, wlddev$iso3c, TRA = "*"))), rep("numeric", fncol(nwld))) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "replace_NA"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmean(nwld, wlddev$iso3c, TRA = "-+"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "replace_NA"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fsd(nwld, wlddev$iso3c, TRA = "/"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "replace_NA"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "replace_fill"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "replace"), attributes), lapply(nwld, attributes)) expect_equal(lapply(fmedian(nwld, wlddev$iso3c, TRA = "+"), attributes), lapply(nwld, attributes)) }) collapse/tests/testthat/test-recode-replace.R0000644000176200001440000001477514777170131021056 0ustar liggesuserscontext("recode, replace") gmtc <- fgroup_by(mtcars, cyl) test_that("replace_na and replace_inf work well", { expect_equal(replace_na(airquality, 0), `[<-`(airquality, is.na(airquality), value = 0)) expect_equal(replace_na(airquality, 0, cols = 1:2), `[<-`(airquality, is.na(airquality), value = 0)) expect_equal(replace_na(airquality, 0, cols = is.numeric), `[<-`(airquality, is.na(airquality), value = 0)) expect_equal(replace_na(flag(EuStockMarkets), 0), `[<-`(flag(EuStockMarkets), is.na(flag(EuStockMarkets)), value = 0)) expect_equal(replace_inf(dapply(mtcars, log)), `[<-`(dapply(mtcars, log), sapply(dapply(mtcars, log), is.infinite), value = NA)) expect_equal(replace_inf(log(EuStockMarkets)), `[<-`(log(EuStockMarkets), is.infinite(log(EuStockMarkets)), value = NA)) expect_equal(replace_inf(dapply(mtcars, log), replace.nan = TRUE), `[<-`(dapply(mtcars, log), sapply(dapply(mtcars, log), is.infinite), value = NA)) expect_equal(replace_inf(log(EuStockMarkets), replace.nan = TRUE), `[<-`(log(EuStockMarkets), is.infinite(log(EuStockMarkets)), value = NA)) }) # scaling data using MAD mad_trans <- function(x) { if(inherits(x, c("pseries", "pdata.frame"))) { g <- GRP(x) tmp <- fmedian(x, g, TRA = "-") tmp %/=% fmedian(if(is.list(tmp)) lapply(tmp, abs) else abs(tmp), g, TRA = "fill", set = TRUE) return(tmp) } tmp <- fmedian(x, TRA = "-") tmp %/=% fmedian(if(is.list(tmp)) dapply(tmp, abs) else abs(tmp), TRA = "fill", set = TRUE) return(tmp) } test_that("replace_outliers works well.", { expect_equal(replace_outliers(mtcars, 2), replace(mtcars, fscale(mtcars) > 2, NA)) # expect_equal(replace_outliers(mtcars, 2, single.limit = "mad"), replace(mtcars, mad_trans(mtcars) > 2, NA)) expect_equal(replace_outliers(gmtc, 2, single.limit = "sd", ignore.groups = TRUE), replace(gmtc, dapply(mtcars, fscale) > 2, NA)) # expect_equal(replace_outliers(gmtc, 2, single.limit = "mad", ignore.groups = TRUE), replace(gmtc, dapply(mtcars, mad_trans) > 2, NA)) expect_equal(replace_outliers(mtcars, 2, single.limit = "min"), replace(mtcars, mtcars < 2, NA)) expect_equal(replace_outliers(mtcars, 2, single.limit = "max"), replace(mtcars, mtcars > 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2), replace(EuStockMarkets, fscale(EuStockMarkets) > 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2, single.limit = "sd", ignore.groups = TRUE), replace(EuStockMarkets, dapply(EuStockMarkets, fscale) > 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2, single.limit = "min"), replace(EuStockMarkets, EuStockMarkets < 2, NA)) expect_equal(replace_outliers(EuStockMarkets, 2, single.limit = "max"), replace(EuStockMarkets, EuStockMarkets > 2, NA)) }) set.seed(101) lmiss <- na_insert(letters) month.miss <- na_insert(month.name) char_dat <- na_insert(char_vars(GGDC10S)) char_nums <- c("-1", "1", "0", "2", "-2") options(warn = -1) test_that("recode_char works well", { expect_equal(recode_char(lmiss, a = "b"), replace(lmiss, lmiss == "a", "b")) expect_visible(recode_char(lmiss, a = "b", missing = "a")) # continue here to write proper tests!!.. expect_visible(recode_char(lmiss, a = "b", missing = "c")) expect_visible(recode_char(lmiss, a = "b", default = "n")) expect_visible(recode_char(lmiss, a = "b", default = "n", missing = "c")) expect_visible(recode_char(month.miss, ber = NA, regex = TRUE)) expect_visible(recode_char(month.miss, ber = NA, missing = "c", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, default = "n", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, default = "n", missing = "c", regex = TRUE)) expect_visible(recode_char(lmiss, a = "b", e = "f")) expect_visible(recode_char(lmiss, a = "b", e = "f", missing = "a")) expect_visible(recode_char(lmiss, a = "b", e = "f", missing = "c")) expect_visible(recode_char(lmiss, a = "b", e = "f", default = "n")) expect_visible(recode_char(lmiss, a = "b", e = "f", default = "n", missing = "c")) expect_visible(recode_char(month.miss, ber = NA, May = "a", regex = TRUE)) expect_visible(recode_char(month.miss, ber = NA, May = "a", missing = "c", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, May = "a", default = "n", regex = TRUE)) expect_visible(recode_char(lmiss, ber = NA, May = "a", default = "n", missing = "c", regex = TRUE)) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added")) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added", missing = "c")) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added", default = "n")) expect_visible(recode_char(char_dat, SGP = "SINGAPORE", VA = "Value Added", default = "n", missing = "c")) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE)) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE, missing = "c")) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE, default = "n")) expect_visible(recode_char(char_dat, saharan = "SSA", regex = TRUE, default = "n", missing = "c")) expect_equal(recode_char(char_nums, "-\\d+" = "negative", "0" = "zero", regex = T), c("negative", "1", "zero", "2", "negative")) expect_equal(recode_char(char_nums, "0" = "zero", "-\\d+" = "negative", default = "positive", regex = T), c("negative", "positive", "zero", "positive", "negative")) expect_equal(recode_char(char_nums, "-\\d+" = "negative", "0" = "zero", default = "positive", regex = T), c("negative", "positive", "zero", "positive", "negative")) }) set.seed(101) vmiss <- na_insert(mtcars$cyl) mtcNA <- na_insert(mtcars) test_that("recode_num works well", { expect_equal(recode_num(vmiss, `4` = 5), replace(vmiss, vmiss == 4, 5)) expect_visible(recode_num(vmiss, `4` = 5, missing = 4)) # continue here to write proper tests!!!.. expect_visible(recode_num(vmiss, `4` = 5, missing = 7)) expect_visible(recode_num(vmiss, `4` = 5, default = 8)) expect_visible(recode_num(vmiss, `4` = 5, default = 8, missing = 7)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, missing = 6)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, missing = 7)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, default = 8)) expect_visible(recode_num(vmiss, `4` = 5, `6` = 10, default = 8, missing = 7)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2, missing = 6)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2, default = 8)) expect_visible(recode_num(mtcNA, `4` = 5, `1` = 2, default = 8, missing = 7)) }) options(warn = 1) collapse/tests/testthat/test-fNobs-fNdistinct.R0000644000176200001440000002550315000542453021333 0ustar liggesuserscontext("fnobs and fndistinct") # rm(list = ls()) set.seed(101) x <- rnorm(100) xNA <- x xNA[sample.int(100,20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) data <- fsubset(wlddev, iso3c %in% c("BLZ","IND","USA","SRB","GRL")) g <- GRP.default(data$iso3c) # rev(), droplevels() dataNA <- na_insert(data) m <- as.matrix(data) mNA <- as.matrix(dataNA) data$LC <- as.list(data$PCGDP) dataNA$LC <- lapply(na_insert(data["LC"][[1]]), function(x) if(is.na(x)) NULL else x) bsum <- base::sum Nobs <- function(x) if(is.list(x)) bsum(lengths(x) > 0L) else bsum(!is.na(x)) Ndistinct <- function(x, na.rm = FALSE) { if(na.rm) return(length(unique(x[!is.na(x)]))) return(length(unique(x))) } # fnobs test_that("fnobs performs like Nobs (defined above)", { expect_equal(fnobs(NA), as.double(Nobs(NA))) expect_equal(fnobs(1), Nobs(1)) expect_equal(fnobs(1:3), Nobs(1:3)) expect_equal(fnobs(-1:1), Nobs(-1:1)) expect_equal(fnobs(x), Nobs(x)) expect_equal(fnobs(xNA), Nobs(xNA)) expect_equal(fnobs(data[-length(data)]), fnobs(m)) expect_equal(fnobs(m), dapply(m, Nobs)) expect_equal(fnobs(mNA), dapply(mNA, Nobs)) expect_equal(fnobs(x, f), BY(x, f, Nobs)) expect_equal(fnobs(xNA, f), BY(xNA, f, Nobs)) expect_equal(fnobs(m, g), BY(m, g, Nobs)) expect_equal(fnobs(mNA, g), BY(mNA, g, Nobs)) expect_equal(fnobs(data, g), BY(data, g, Nobs)) expect_equal(fnobs(dataNA, g), BY(dataNA, g, Nobs)) }) test_that("fnobs performs numerically stable", { expect_true(all_obj_equal(replicate(50, fnobs(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fnobs(dataNA, g), simplify = FALSE))) }) test_that("fnobs handles special values in the right way", { expect_equal(fnobs(NA), 0) expect_equal(fnobs(NaN), 0) expect_equal(fnobs(Inf), 1) expect_equal(fnobs(-Inf), 1) expect_equal(fnobs(TRUE), 1) expect_equal(fnobs(FALSE), 1) }) test_that("fnobs produces errors for wrong input", { expect_visible(fnobs("a")) expect_visible(fnobs(NA_character_)) expect_visible(fnobs(mNA)) expect_visible(fnobs(mNA, g)) expect_error(fnobs(1:2,1:3)) expect_error(fnobs(m,1:31)) expect_error(fnobs(m, 1)) expect_error(fnobs(data,1:31)) expect_visible(fnobs(wlddev)) expect_visible(fnobs(wlddev, wlddev$iso3c)) }) data$LC <- NULL dataNA$LC <- NULL # fndistinct for (nth in 1:2) { if(nth == 2L) { if(Sys.getenv("OMP") == "TRUE") { fndistinct <- function(x, ...) collapse::fndistinct(x, ..., nthreads = 2L) } else break } test_that("fndistinct performs like Ndistinct (defined above)", { expect_equal(fndistinct(NA), 0) expect_equal(fndistinct(NA, na.rm = FALSE), 1) expect_equal(fndistinct(1), Ndistinct(1, na.rm = TRUE)) expect_equal(fndistinct(1:3), Ndistinct(1:3, na.rm = TRUE)) expect_equal(fndistinct(-1:1), Ndistinct(-1:1, na.rm = TRUE)) expect_equal(fndistinct(1, na.rm = FALSE), Ndistinct(1)) expect_equal(fndistinct(1:3, na.rm = FALSE), Ndistinct(1:3)) expect_equal(fndistinct(-1:1, na.rm = FALSE), Ndistinct(-1:1)) expect_equal(fndistinct(x), Ndistinct(x, na.rm = TRUE)) expect_equal(fndistinct(x, na.rm = FALSE), Ndistinct(x)) expect_equal(fndistinct(xNA, na.rm = FALSE), Ndistinct(xNA)) expect_equal(fndistinct(xNA), Ndistinct(xNA, na.rm = TRUE)) expect_equal(fndistinct(data), fndistinct(m)) expect_equal(fndistinct(m), dapply(m, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, na.rm = FALSE), dapply(m, Ndistinct)) expect_equal(fndistinct(mNA, na.rm = FALSE), dapply(mNA, Ndistinct)) expect_equal(fndistinct(mNA), dapply(mNA, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(x, f), BY(x, f, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(x, f, na.rm = FALSE), BY(x, f, Ndistinct)) expect_equal(fndistinct(xNA, f, na.rm = FALSE), BY(xNA, f, Ndistinct)) expect_equal(fndistinct(xNA, f), BY(xNA, f, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, g), BY(m, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, g, na.rm = FALSE), BY(m, g, Ndistinct)) expect_equal(fndistinct(mNA, g, na.rm = FALSE), BY(mNA, g, Ndistinct)) expect_equal(fndistinct(mNA, g), BY(mNA, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(data, g), BY(data, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(data, g, na.rm = FALSE), BY(data, g, Ndistinct)) expect_equal(fndistinct(dataNA, g, na.rm = FALSE), BY(dataNA, g, Ndistinct)) expect_equal(fndistinct(dataNA, g), BY(dataNA, g, Ndistinct, na.rm = TRUE)) fg = as_factor_GRP(g) expect_equal(fndistinct(m, fg), BY(m, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(m, fg, na.rm = FALSE), BY(m, g, Ndistinct)) expect_equal(fndistinct(mNA, fg, na.rm = FALSE), BY(mNA, g, Ndistinct)) expect_equal(fndistinct(mNA, fg), BY(mNA, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(data, fg), BY(data, g, Ndistinct, na.rm = TRUE)) expect_equal(fndistinct(data, fg, na.rm = FALSE), BY(data, g, Ndistinct)) expect_equal(fndistinct(dataNA, fg, na.rm = FALSE), BY(dataNA, g, Ndistinct)) expect_equal(fndistinct(dataNA, fg), BY(dataNA, g, Ndistinct, na.rm = TRUE)) }) test_that("fndistinct performs numerically stable", { expect_true(all_obj_equal(replicate(50, fndistinct(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(data, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fndistinct(dataNA, g), simplify = FALSE))) }) test_that("fndistinct handles special values in the right way", { expect_equal(fndistinct(NA), 0) expect_equal(fndistinct(NaN), 0) expect_equal(fndistinct(Inf), 1) expect_equal(fndistinct(-Inf), 1) expect_equal(fndistinct(TRUE), 1) expect_equal(fndistinct(FALSE), 1) expect_equal(fndistinct(c(TRUE,TRUE)), 1) expect_equal(fndistinct(c(TRUE,FALSE)), 2) expect_equal(fndistinct(c(FALSE,TRUE)), 2) expect_equal(fndistinct(c(FALSE,FALSE)), 1) expect_equal(fndistinct(c(NA,TRUE,TRUE,NA)), 1) expect_equal(fndistinct(c(NA,TRUE,FALSE,NA)), 2) expect_equal(fndistinct(c(NA,FALSE,TRUE,NA)), 2) expect_equal(fndistinct(c(NA,FALSE,FALSE,NA)), 1) # expect_equal(max(fndistinct(mNA > 10)), 1) # These tests are insecure to random number generation # expect_equal(max(fndistinct(mNA > 10, g)), 1) expect_equal(fndistinct(NA, na.rm = FALSE), 1) expect_equal(fndistinct(NaN, na.rm = FALSE), 1) expect_equal(fndistinct(Inf, na.rm = FALSE), 1) expect_equal(fndistinct(-Inf, na.rm = FALSE), 1) expect_equal(fndistinct(TRUE, na.rm = FALSE), 1) expect_equal(fndistinct(FALSE, na.rm = FALSE), 1) expect_equal(fndistinct(c(TRUE,TRUE), na.rm = FALSE), 1) expect_equal(fndistinct(c(TRUE,FALSE), na.rm = FALSE), 2) expect_equal(fndistinct(c(FALSE,TRUE), na.rm = FALSE), 2) expect_equal(fndistinct(c(FALSE,FALSE), na.rm = FALSE), 1) expect_equal(fndistinct(c(NA,TRUE,TRUE,NA), na.rm = FALSE), 2) expect_equal(fndistinct(c(NA,TRUE,FALSE,NA), na.rm = FALSE), 3) expect_equal(fndistinct(c(NA,FALSE,TRUE,NA), na.rm = FALSE), 3) expect_equal(fndistinct(c(NA,FALSE,FALSE,NA), na.rm = FALSE), 2) # expect_equal(max(fndistinct(mNA > 10, na.rm = FALSE)), 2) # expect_equal(max(fndistinct(mNA > 10, g, na.rm = FALSE)), 2) }) test_that("fndistinct produces errors for wrong input", { expect_visible(fndistinct("a")) expect_visible(fndistinct(NA_character_)) expect_visible(fndistinct(mNA)) expect_visible(fndistinct(mNA, g)) expect_error(fndistinct(1:2,1:3)) expect_error(fndistinct(m,1:31)) expect_error(fndistinct(m, 1)) expect_error(fndistinct(data,1:31)) expect_visible(fndistinct(wlddev)) expect_visible(fndistinct(wlddev, wlddev$iso3c)) }) } test_that("Singleton group optimization works properly", { g <- GRP(as.character(seq_row(mtcars))) xNA <- na_insert(mtcars$mpg) expect_equal(unattrib(fndistinct(xNA, g)), as.integer(!is.na(xNA[g$order]))) g <- GRP(seq_row(mtcars)) xNA <- na_insert(mtcars$mpg) expect_equal(unattrib(fndistinct(xNA, g)), as.integer(!is.na(xNA[g$order]))) g <- GRP(sample.int(100, 32)) xNA <- na_insert(mtcars$mpg) expect_equal(unattrib(fndistinct(xNA, g)), as.integer(!is.na(xNA[g$order]))) }) collapse/tests/testthat/test-TRA.R0000644000176200001440000002221014777170131016611 0ustar liggesuserscontext("TRA") bmean <- base::mean # rm(list = ls()) set.seed(101) d <- na_insert(iris[1:4]) v <- d$Sepal.Length m <- as.matrix(d) f <- iris$Species # For sweep replace_NA <- function(x, y) `[<-`(x, is.na(x), value = y[is.na(x)]) replace <- function(x, y) `[<-`(y, is.na(x), value = NA) # `[<-`(x, !is.na(x), value = y) replace_fill <- function(x, y) y # rep(y, length(x)) "%" <- function(x, y) x * (100 / y) "-%%" <- function(x, y) x - (x %% y) # "-+" <- function(x, y) x - y + bmean(x, na.rm = TRUE) test_that("TRA performs like sweep", { ops <- c("replace_NA","replace_fill", "replace", "-", "+", "*", "/", "%", "%%", "-%%") for(i in ops) { expect_equal(drop(sweep(qM(v), 2L, bmean(v, na.rm = TRUE), i)), TRA(v, bmean(v, na.rm = TRUE), i)) expect_equal(`attributes<-`(sweep(qM(m), 2L, colMeans(m, na.rm = TRUE), i), attributes(m)), TRA(m, colMeans(m, na.rm = TRUE), i)) expect_equal(setNames(qDF(sweep(d, 2L, colMeans(qM(d), na.rm = TRUE), i)), names(d)), TRA(d, colMeans(qM(d), na.rm = TRUE), i)) } for(i in ops) { expect_equal(unlist(Map(function(x, y) drop(sweep(qM(x), 2L, y, i)), rsplit(v, f), as.list(fmean(v, f))), use.names = FALSE), TRA(v, fmean(v, f), i, f)) expect_equal(`attributes<-`(do.call(rbind, Map(function(x, y) sweep(qM(x), 2L, y, i), lapply(rsplit(qDF(m), f), qM), mrtl(fmean(m, f)))), attributes(m)), TRA(m, fmean(m, f), i, f)) expect_equal(`attributes<-`(unlist2d(Map(function(x, y) sweep(x, 2L, y, i), rsplit(d, f), mrtl(qM(fmean(d, f)))), idcols = FALSE), attributes(d)), TRA(d, fmean(d, f), i, f)) } }) test_that("TRA performs like built-in version", { for(i in c(0L, seq_len(10)[-4])) { expect_equal(TRA(v, fmean(v), i), fmean(v, TRA = i)) expect_equal(TRA(m, fmean(m), i), fmean(m, TRA = i)) expect_equal(TRA(d, fmean(d), i), fmean(d, TRA = i)) } for(i in c(0L, seq_len(10))) { expect_equal(TRA(v, fmean(v, f), i, f), fmean(v, f, TRA = i)) expect_equal(TRA(m, fmean(m, f), i, f), fmean(m, f, TRA = i)) expect_equal(TRA(d, fmean(d, f), i, f), fmean(d, f, TRA = i)) } }) test_that("TRA performs like fbetween and fwithin", { expect_equal(TRA(v, fmean(v), 1L), fbetween(v, fill = TRUE)) expect_equal(TRA(v, fmean(v), 2L), fbetween(v)) expect_equal(TRA(v, fmean(v), 3L), fwithin(v)) expect_equal(TRA(m, fmean(m), 1L), fbetween(m, fill = TRUE)) expect_equal(TRA(m, fmean(m), 2L), fbetween(m)) expect_equal(TRA(m, fmean(m), 3L), fwithin(m)) expect_equal(TRA(d, fmean(d), 1L), fbetween(d, fill = TRUE)) expect_equal(TRA(d, fmean(d), 2L), fbetween(d)) expect_equal(TRA(d, fmean(d), 3L), fwithin(d)) expect_equal(TRA(v, fmean(v, f), 1L, f), fbetween(v, f, fill = TRUE)) expect_equal(TRA(v, fmean(v, f), 2L, f), fbetween(v, f)) expect_equal(TRA(v, fmean(v, f), 3L, f), fwithin(v, f)) expect_equal(TRA(v, fmean(v, f), 4L, f), fwithin(v, f, mean = "overall.mean")) expect_equal(TRA(m, fmean(m, f), 1L, f), fbetween(m, f, fill = TRUE)) expect_equal(TRA(m, fmean(m, f), 2L, f), fbetween(m, f)) expect_equal(TRA(m, fmean(m, f), 3L, f), fwithin(m, f)) expect_equal(TRA(m, fmean(m, f), 4L, f), fwithin(m, f, mean = "overall.mean")) expect_equal(TRA(d, fmean(d, f), 1L, f), fbetween(d, f, fill = TRUE)) expect_equal(TRA(d, fmean(d, f), 2L, f), fbetween(d, f)) expect_equal(TRA(d, fmean(d, f), 3L, f), fwithin(d, f)) expect_equal(TRA(d, fmean(d, f), 4L, f), fwithin(d, f, mean = "overall.mean")) }) test_that("TRA gives errors for wrong input", { expect_warning(TRA(v, fmean(v), bla = 1)) expect_warning(TRA(m, fmean(m), bla = 1)) expect_warning(TRA(d, fmean(d), bla = 1)) expect_error(TRA(v, 1:2)) expect_error(TRA(m, 1:2)) expect_error(TRA(d, 1:2)) expect_error(TRA(v, as.character(fmean(v)))) expect_error(TRA(m, as.character(fmean(m)))) expect_error(TRA(d, as.character(fmean(d)))) expect_error(TRA(v, fmean(v, f), "-", f[-1])) expect_error(TRA(m, fmean(m, f), "-", f[-1])) expect_error(TRA(d, fmean(d, f), "-", f[-1])) expect_error(TRA(v, fmean(v), 19L)) expect_error(TRA(m, fmean(m), 19L)) expect_error(TRA(d, fmean(d), 19L)) expect_error(TRA(v, fmean(v), "bla")) expect_error(TRA(m, fmean(m), "bla")) expect_error(TRA(d, fmean(d), "bla")) }) test_that("TRA handles different data types as intended", { # Vector & Matrix: Simple expect_true(is.character(fnobs(na_insert(letters), TRA = "replace_NA"))) expect_true(is.integer(fnobs(letters, TRA = "replace_fill"))) expect_true(is.integer(fnobs(na_insert(letters), TRA = "replace"))) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(letters, TRA = i)) expect_true(is.double(fnobs(na_insert(AirPassengers), TRA = "replace_NA"))) expect_true(is.integer(fnobs(AirPassengers, TRA = "replace_fill"))) expect_true(is.integer(fnobs(AirPassengers, TRA = "replace"))) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(AirPassengers, TRA = i))) expect_true(is.double(fnobs(na_insert(EuStockMarkets), TRA = "replace_NA"))) expect_true(is.integer(fnobs(EuStockMarkets, TRA = "replace_fill"))) expect_true(is.integer(fnobs(EuStockMarkets, TRA = "replace"))) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(EuStockMarkets, TRA = i))) # Vector & Matrix: Grouped set.seed(101) expect_error(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = "replace_NA")) expect_true(is.integer(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = "replace_fill"))) expect_true(is.integer(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = "replace"))) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(letters, sample.int(3, length(letters), TRUE), TRA = i)) expect_true(is.double(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = "replace_NA"))) expect_true(is.integer(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = "replace_fill"))) expect_true(is.integer(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = "replace"))) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(AirPassengers, sample.int(3, length(AirPassengers), TRUE), TRA = i))) expect_true(is.double(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = "replace_NA"))) expect_true(is.integer(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = "replace_fill"))) expect_true(is.integer(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = "replace"))) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_true(is.numeric(fnobs(EuStockMarkets, sample.int(3, nrow(EuStockMarkets), TRUE), TRA = i))) # Date Frame: Simple expect_equal(vtypes(fndistinct(wlddev, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(unname(vtypes(fndistinct(wlddev, TRA = "replace"))), rep("integer", 13)) expect_equal(unname(vtypes(fndistinct(wlddev, TRA = "replace_fill"))), rep("integer", 13)) expect_equal(vtypes(fmode(wlddev, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(vtypes(fmode(wlddev, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(fmode(wlddev, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, TRA = "replace")), vtypes(wlddev)) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_equal(unname(vtypes(fnobs(nv(wlddev), TRA = i))), rep("double", 7)) for(i in c("-", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(wlddev, TRA = i)) # Date Frame: Grouped expect_equal(unname(vtypes(fndistinct(wlddev, wlddev$iso3c, TRA = "replace"))), rep("integer", 13)) expect_equal(unname(vtypes(fndistinct(wlddev, wlddev$iso3c, TRA = "replace_fill"))), rep("integer", 13)) expect_error(fndistinct(wlddev, wlddev$iso3c, TRA = "replace_NA")) expect_equal(vtypes(fmode(wlddev, wlddev$iso3c, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, wlddev$iso3c, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, wlddev$iso3c, TRA = "replace_NA")), vtypes(wlddev)) expect_equal(vtypes(fmode(wlddev, wlddev$iso3c, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, wlddev$iso3c, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, wlddev$iso3c, TRA = "replace_fill")), vtypes(wlddev)) expect_equal(vtypes(fmode(wlddev, wlddev$iso3c, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(ffirst(wlddev, wlddev$iso3c, TRA = "replace")), vtypes(wlddev)) expect_equal(vtypes(flast(wlddev, wlddev$iso3c, TRA = "replace")), vtypes(wlddev)) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_equal(unname(vtypes(fnobs(nv(wlddev), wlddev$iso3c, TRA = i))), rep("double", 7)) for(i in c("-", "-+", "+", "*", "/", "%", "%%", "-%%")) expect_error(fnobs(wlddev, wlddev$iso3c, TRA = i)) }) collapse/tests/testthat/test-fmutate.R0000644000176200001440000007213715202513226017633 0ustar liggesuserscontext("fsummarise and fmutate") expect_equal(1, 1) if(requireNamespace("magrittr", quietly = TRUE) && requireNamespace("dplyr", quietly = TRUE)) { library(magrittr) bmean <- base::mean bsum <- base::sum bsd <- stats::sd bmin <- base::min bmax <- base::max # dplyr >= 1.1: summarise requires size-1 per group; reframe for multi-row (e.g. quantile) dply_multi <- function(.data, ...) { if(utils::packageVersion("dplyr") >= "1.1.0") dplyr::reframe(.data, ...) else dplyr::summarise(.data, ..., .groups = "drop") } NCRAN <- identical(Sys.getenv("NCRAN"), "TRUE") mtc <- dplyr::as_tibble(mtcars) gmtc <- dplyr::group_by(mtc, cyl, vs, am) expect_equal(gsplit(mtcars$mpg, GRP(gmtc), TRUE), split(mtcars$mpg, as_factor_GRP(GRP(gmtc)))) if(NCRAN) { test_that("fsummarise works like dplyr::summarise for tagged vector expressions", { # Simple computations expect_equal(smr(mtc, mu = bmean(mpg), sigma = bsd(mpg)), dplyr::summarise(mtc, mu = bmean(mpg), sigma = bsd(mpg))) # TODO: Could expand like this as well... but who needs this? # expect_false(all_obj_equal(smr(mtc, mu = bmean(mpg), sigma = bsd(mpg), q = quantile(mpg)), # dplyr::summarise(mtc, mu = bmean(mpg), sigma = bsd(mpg), q = quantile(mpg)))) expect_equal(smr(mtc, mu = bmean(mpg) + bsd(mpg)), dplyr::summarise(mtc, mu = bmean(mpg) + bsd(mpg))) expect_equal(smr(mtc, mu = bmean(mpg) + 3), dplyr::summarise(mtc, mu = bmean(mpg) + 3)) q <- 5 expect_equal(smr(mtc, mu = bmean(mpg) + q), dplyr::summarise(mtc, mu = bmean(mpg) + q)) v <- mtcars$disp expect_equal(smr(mtc, mu = bmean(mpg) + bmean(v)), dplyr::summarise(mtc, mu = bmean(mpg) + bmean(v))) # Grouped computations expect_equal(smr(gmtc, mpg = fmean(mpg)), dplyr::summarise(gmtc, mpg = bmean(mpg), .groups = "drop")) expect_equal(smr(gmtc, mpg = bmean(mpg)), dplyr::summarise(gmtc, mpg = bmean(mpg), .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg), carb = fmax(carb)), dplyr::summarise(gmtc, mpg = bmean(mpg), carb = bmax(carb), .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg), carb = bmax(carb)), dplyr::summarise(gmtc, mpg = bmean(mpg), carb = bmax(carb), .groups = "drop")) expect_equal(smr(gmtc, mpg = bmean(mpg), carb = bmax(carb)), dplyr::summarise(gmtc, mpg = bmean(mpg), carb = bmax(carb), .groups = "drop")) expect_equal(smr(gmtc, mpg = bmean(mpg), carb = fmax(carb)), dplyr::summarise(gmtc, mpg = bmean(mpg), carb = bmax(carb), .groups = "drop")) expect_equal(fsummarise(gmtc, mpg = bmean(mpg), carb = fmax(carb), keep.group_vars = FALSE), fsummarise(gmtc, mpg = bmean(mpg), carb = fmax(carb)) %>% slt(-cyl,-vs,-am)) # Multi-return values expect_equal(smr(gmtc, mpg = quantile(mpg)), dply_multi(gmtc, mpg = quantile(mpg)) %>% tfm(mpg = unname(mpg))) # More complex expressions expect_equal(smr(gmtc, mpg = bmean(mpg) + 1), dplyr::summarise(gmtc, mpg = bmean(mpg) + 1, .groups = "drop")) expect_equal(smr(gmtc, mpg = bmean(mpg) + q), dplyr::summarise(gmtc, mpg = bmean(mpg) + q, .groups = "drop")) expect_equal(smr(gmtc, mpg = quantile(mpg) + q), dply_multi(gmtc, mpg = quantile(mpg) + q) %>% tfm(mpg = unname(mpg))) expect_equal(smr(gmtc, mpg = bmean(mpg) + bmax(v)), dplyr::summarise(gmtc, mpg = bmean(mpg) + bmax(v), .groups = "drop")) expect_equal(smr(gmtc, mpg = quantile(mpg) + bmax(v)), dply_multi(gmtc, mpg = quantile(mpg) + bmax(v)) %>% tfm(mpg = unname(mpg))) expect_equal(smr(gmtc, mpg = bmean(log(mpg))), dplyr::summarise(gmtc, mpg = bmean(log(mpg)), .groups = "drop")) expect_equal(smr(gmtc, mpg = bmean(log(mpg)) + bmax(qsec)), dplyr::summarise(gmtc, mpg = bmean(log(mpg)) + bmax(qsec), .groups = "drop")) expect_equal(smr(gmtc, mpg = quantile(mpg) + bmax(qsec)), dply_multi(gmtc, mpg = quantile(mpg) + bmax(qsec)) %>% tfm(mpg = unname(mpg))) expect_equal(smr(gmtc, mpg = fmean(log(mpg)) + fmax(qsec)), dplyr::summarise(gmtc, mpg = bmean(log(mpg)) + bmax(qsec), .groups = "drop")) expect_false(all_obj_equal(smr(gmtc, mpg = fmean(log(mpg)) + bmax(qsec)), dplyr::summarise(gmtc, mpg = bmean(log(mpg)) + bmax(qsec), .groups = "drop"))) # Testing expressions turned into functions: mid_fun <- function(x) (bmin(x) + bmax(x)) / 2 expect_true(all_obj_equal(smr(gmtc, mid_mpg = (bmin(mpg) + bmax(mpg)) / 2), smr(gmtc, mid_mpg = (fmin(mpg) + fmax(mpg)) / 2), smr(gmtc, mid_mpg = mid_fun(mpg)), dplyr::summarise(gmtc, mid_mpg = (bmin(mpg) + bmax(mpg)) / 2, .groups = "drop"))) # Adding global variable: expect_true(all_obj_equal(smr(gmtc, mid_mpg = (bmin(mpg) + bmax(mpg)) / 2 + q), smr(gmtc, mid_mpg = (fmin(mpg) + fmax(mpg)) / 2 + q), smr(gmtc, mid_mpg = mid_fun(mpg) + q), dplyr::summarise(gmtc, mid_mpg = (bmin(mpg) + bmax(mpg)) / 2 + q, .groups = "drop"))) # Weighted computations expect_equal(smr(gmtc, mpg = weighted.mean(mpg, wt)), dplyr::summarise(gmtc, mpg = weighted.mean(mpg, wt), .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg, wt)), dplyr::summarise(gmtc, mpg = fmean(mpg, w = wt), .groups = "drop")) expect_equal(smr(gmtc, mpg = weighted.mean(mpg, wt) + 5.5), dplyr::summarise(gmtc, mpg = weighted.mean(mpg, wt) + 5.5, .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg, wt) + 5.5), dplyr::summarise(gmtc, mpg = fmean(mpg, w = wt) + 5.5, .groups = "drop")) expect_equal(smr(gmtc, mpg = weighted.mean(mpg, wt) + q), dplyr::summarise(gmtc, mpg = weighted.mean(mpg, wt) + q, .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg, wt) + q), dplyr::summarise(gmtc, mpg = fmean(mpg, w = wt) + q, .groups = "drop")) expect_equal(smr(gmtc, mpg = weighted.mean(mpg, wt) + bmax(v)), dplyr::summarise(gmtc, mpg = weighted.mean(mpg, wt) + bmax(v), .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg, wt) + bmax(v)), dplyr::summarise(gmtc, mpg = fmean(mpg, w = wt) + bmax(v), .groups = "drop")) expect_equal(smr(gmtc, mpg = weighted.mean(mpg, wt) + bmax(qsec)), dplyr::summarise(gmtc, mpg = weighted.mean(mpg, wt) + bmax(qsec), .groups = "drop")) expect_equal(smr(gmtc, mpg = fmean(mpg, wt) + fmax(qsec)), dplyr::summarise(gmtc, mpg = fmean(mpg, w = wt) + bmax(qsec), .groups = "drop")) expect_equal(smr(gmtc, mpg = quantile(mpg) + weighted.mean(mpg, wt)), dply_multi(gmtc, mpg = quantile(mpg) + weighted.mean(mpg, wt)) %>% tfm(mpg = unname(mpg))) expect_warning(smr(gmtc, mpg = quantile(mpg) + fmean(mpg, wt))) }) } wld <- dplyr::as_tibble(wlddev) gwld <- dplyr::group_by(wlddev, iso3c) if(NCRAN) { test_that("fsummarise works like dplyr::summarise with across and simple usage", { # Simple usage expect_true(all_obj_equal(fsummarise(mtc, across(cyl:drat, bsum)), fsummarise(mtc, across(cyl:drat, fsum)), dplyr::summarise(mtc, dplyr::across(cyl:drat, bsum)))) expect_true(all_obj_equal(fsummarise(mtc, across(5:8, bsum)), fsummarise(mtc, across(5:8, fsum)), dplyr::summarise(mtc, dplyr::across(5:8, bsum)))) expect_true(all_obj_equal(fsummarise(mtc, across(-(5:8), bsum)), fsummarise(mtc, across(-(5:8), fsum, .apply = FALSE)), dplyr::summarise(mtc, dplyr::across(-(5:8), bsum)))) expect_true(all_obj_equal(fsummarise(wld, across(is.numeric, bsum, na.rm = TRUE)), fsummarise(wld, across(is.numeric, fsum)) %>% dapply(unattrib, drop = FALSE), dplyr::summarise(wld, dplyr::across(where(is.numeric), \(x) bsum(x, na.rm = TRUE))))) expect_true(all_obj_equal(fsummarise(mtc, across(NULL, bsum, na.rm = TRUE)), fsummarise(mtc, across(NULL, fsum)), dplyr::summarise(mtc, dplyr::across(everything(), \(x) bsum(x, na.rm = TRUE))))) expect_equal(fsummarise(mtc, across(cyl:vs, bsum)), fsummarise(mtc, cyl = bsum(cyl), across(disp:qsec, fsum), vs = fsum(vs))) # Simple programming use vlist <- list(mtc %>% fselect(cyl:drat, return = "names"), 5:8, NULL) # -(5:8), is.numeric for(i in seq_along(vlist)) { expect_true(all_obj_equal(fsummarise(mtc, across(vlist[[i]], bsum)), fsummarise(mtc, across(vlist[[i]], fsum)), dplyr::summarise(mtc, dplyr::across(if(is.null(vlist[[i]])) everything() else vlist[[i]], bsum)))) v <- vlist[[i]] expect_true(all_obj_equal(fsummarise(mtc, across(v, bsum)), fsummarise(mtc, across(v, fsum)), dplyr::summarise(mtc, dplyr::across(if(is.null(v)) everything() else v, bsum)))) } # Simple usage and multiple functions expect_true(all_obj_equal(fsummarise(mtc, across(cyl:drat, list(bmean, bsum))), fsummarise(mtc, across(cyl:drat, list(bmean = fmean, bsum = fsum))), dplyr::summarise(mtc, dplyr::across(cyl:drat, list(bmean = bmean, bsum = bsum))))) expect_true(all_obj_equal(fsummarise(mtc, across(NULL, list(bmean, bsum))), fsummarise(mtc, across(NULL, list(bmean = fmean, bsum = fsum))), dplyr::summarise(mtc, dplyr::across(everything(), list(bmean = bmean, bsum = bsum))))) # Passing additional arguments expect_true(all_obj_equal(fsummarise(mtc, across(cyl:drat, bsum, na.rm = FALSE)), fsummarise(mtc, across(cyl:drat, fsum, na.rm = FALSE)), dplyr::summarise(mtc, dplyr::across(cyl:drat, \(x) bsum(x, na.rm = FALSE))))) expect_true(all_obj_equal(fsummarise(mtc, across(cyl:drat, weighted.mean, w = wt)), fsummarise(mtc, across(cyl:drat, fmean, w = wt)), dplyr::summarise(mtc, dplyr::across(cyl:drat, \(x) weighted.mean(x, w = wt))))) expect_true(all_obj_equal(fsummarise(mtc, across(cyl:drat, list(mean = weighted.mean, sum = fsum), w = wt)), fsummarise(mtc, across(cyl:drat, list(mean = fmean, sum = fsum), w = wt)), dplyr::summarise(mtc, dplyr::across(cyl:drat, list(mean = \(x) weighted.mean(x, w = wt), sum = \(x) fsum(x, w = wt)))))) # Simple programming use flist <- list(bsum, list(bmean = bmean, bsum = bsum), list(bmean, bsum)) # c("bmean", "bsum"), c(mean = "fmean", sum = "fsum") for(i in seq_along(flist)) { expect_equal(fsummarise(mtc, across(cyl:drat, flist[[i]])), dplyr::summarise(mtc, dplyr::across(cyl:drat, flist[[i]]))) f <- flist[[i]] expect_equal(fsummarise(mtc, across(cyl:drat, f)), dplyr::summarise(mtc, dplyr::across(cyl:drat, f))) } }) test_that("fsummarise works like dplyr::summarise with across and grouped usage", { # Simple usage expect_true(all_obj_equal(fsummarise(gmtc, across(hp:drat, bsum)), fsummarise(gmtc, across(hp:drat, fsum)), dplyr::summarise(gmtc, dplyr::across(hp:drat, bsum), .groups = "drop"))) expect_true(all_obj_equal(fsummarise(gmtc, across(5:7, bsum)), fsummarise(gmtc, across(5:7, fsum)), dplyr::summarise(gmtc, dplyr::across(4:6, bsum), .groups = "drop"))) expect_true(all_obj_equal(fsummarise(gwld, across(is.numeric, bsum, na.rm = TRUE)) %>% setLabels(NULL), fsummarise(gwld, across(is.numeric, fsum)) %>% replace_NA() %>% setLabels(NULL), dplyr::summarise(gwld, dplyr::across(where(is.numeric), \(x) bsum(x, na.rm = TRUE))))) expect_true(all_obj_equal(fsummarise(gmtc, across(NULL, bsum, na.rm = TRUE)) %>% setLabels(NULL), fsummarise(gmtc, across(NULL, fsum)) %>% setLabels(NULL), dplyr::summarise(gmtc, dplyr::across(everything(), \(x) bsum(x, na.rm = TRUE)), .groups = "drop"))) expect_equal(fsummarise(gmtc, across(NULL, bsum, na.rm = TRUE), keep.group_vars = FALSE), fsummarise(gmtc, across(NULL, bsum, na.rm = TRUE)) %>% slt(-cyl,-vs,-am)) expect_equal(fsummarise(gmtc, across(cyl:vs, bsum)), fsummarise(gmtc, cyl = bsum(cyl), across(disp:qsec, fsum), vs = fsum(vs))) # Simple programming use vlist <- list(mtc %>% fselect(hp:drat, return = "names"), NULL) # -(5:8), is.numeric for(i in seq_along(vlist)) { expect_true(all_obj_equal(fsummarise(gmtc, across(vlist[[i]], bsum)), fsummarise(gmtc, across(vlist[[i]], fsum)), dplyr::summarise(gmtc, dplyr::across(if(is.null(vlist[[i]])) everything() else vlist[[i]], bsum), .groups = "drop"))) v <- vlist[[i]] expect_true(all_obj_equal(fsummarise(gmtc, across(v, bsum)), fsummarise(gmtc, across(v, fsum)), dplyr::summarise(gmtc, dplyr::across(if(is.null(v)) everything() else v, bsum), .groups = "drop"))) } # Simple usage and multiple functions expect_true(all_obj_equal(fsummarise(gmtc, across(hp:drat, list(bmean, bsum))), fsummarise(gmtc, across(hp:drat, list(bmean = fmean, bsum = fsum))), dplyr::summarise(gmtc, dplyr::across(hp:drat, list(bmean = bmean, bsum = bsum)), .groups = "drop"))) expect_true(all_obj_equal(fsummarise(gmtc, across(NULL, list(bmean, bsum))), fsummarise(gmtc, across(NULL, list(bmean = fmean, bsum = fsum))), dplyr::summarise(gmtc, dplyr::across(everything(), list(bmean = bmean, bsum = bsum)), .groups = "drop"))) # Passing additional arguments expect_true(all_obj_equal(fsummarise(gwld, across(c("PCGDP", "LIFEEX"), bsum, na.rm = TRUE)) %>% setLabels(NULL), fsummarise(gwld, across(c("PCGDP", "LIFEEX"), fsum, na.rm = TRUE)) %>% setLabels(NULL) %>% replace_NA(), dplyr::summarise(gwld, dplyr::across(c("PCGDP", "LIFEEX"), \(x) bsum(x, na.rm = TRUE)), .groups = "drop"))) expect_true(all_obj_equal(fsummarise(gmtc, across(hp:drat, weighted.mean, w = wt)), fsummarise(gmtc, across(hp:drat, fmean, w = wt)), dplyr::summarise(gmtc, dplyr::across(hp:drat, \(x) weighted.mean(x, w = wt)), .groups = "drop"))) expect_equal(fsummarise(gmtc, across(cyl:vs, weighted.mean, w = wt)), fsummarise(gmtc, cyl = weighted.mean(cyl, wt), across(disp:qsec, fmean, w = wt), vs = fmean(vs, wt))) expect_true(all_obj_equal(fsummarise(gmtc, across(hp:drat, list(mean = weighted.mean, sum = fsum), w = wt)), fsummarise(gmtc, across(hp:drat, list(mean = fmean, sum = fsum), w = wt)), dplyr::summarise(gmtc, dplyr::across(hp:drat, list(mean = \(x) weighted.mean(x, w = wt), sum = \(x) fsum(x, w = wt))), .groups = "drop"))) # Simple programming use flist <- list(bsum, list(bmean = bmean, bsum = bsum), list(bmean, bsum)) # c("bmean", "bsum"), c(mean = "fmean", sum = "fsum") for(i in seq_along(flist)) { expect_equal(fsummarise(gmtc, across(hp:drat, flist[[i]])), dplyr::summarise(gmtc, dplyr::across(hp:drat, flist[[i]]), .groups = "drop")) f <- flist[[i]] expect_equal(fsummarise(gmtc, across(hp:drat, f)), dplyr::summarise(gmtc, dplyr::across(hp:drat, f), .groups = "drop")) } }) } test_that("fsummarise miscellaneous things", { expect_equal(smr(gmtc, acr(disp:hp, c("bmean", "bsd"))), fsummarise(gmtc, across(disp:hp, c("bmean", "bsd"), .transpose = FALSE)) %>% colorderv(c(4,6,5,7), pos = "exchange")) expect_identical(names(smr(gmtc, acr(disp:hp, fmean, .names = TRUE)))[4:5], c("disp_fmean", "hp_fmean")) expect_identical(names(smr(gmtc, acr(disp:hp, bmean, .names = TRUE)))[4:5], c("disp_bmean", "hp_bmean")) pwcorDF <- function(x, w = NULL) qDF(pwcor(x, w = w), "var") expect_equal( mtcars %>% gby(cyl) %>% smr(acr(disp:hp, pwcorDF, .apply = FALSE)), rsplit(mtcars, disp + hp ~ cyl) %>% lapply(pwcorDF) %>% unlist2d("cyl", "var") %>% tfm(cyl = as.numeric(cyl)) ) if(identical(Sys.getenv("LOCAL"), "TRUE")) { # No tests depending on suggested package (except for major ones). skip_if_not_installed("weights") expect_equal( mtcars %>% gby(cyl) %>% smr(acr(disp:hp, pwcorDF, w = wt, .apply = FALSE)), rsplit(mtcars, disp + hp + wt ~ cyl) %>% lapply(function(x) pwcorDF(gv(x, 1:2), w = x$wt)) %>% unlist2d("cyl", "var") %>% tfm(cyl = as.numeric(cyl)) ) } if(requireNamespace("data.table", quietly = TRUE)) { lmest <- function(x) list(Mods = list(lm(disp~., x))) expect_equal( qDT(mtcars) %>% gby(cyl) %>% smr(acr(disp:hp, lmest, .apply = FALSE)), qDT(mtcars) %>% rsplit(disp + hp ~ cyl) %>% lapply(lmest) %>% data.table::rbindlist(idcol = "cyl") %>% tfm(cyl = as.numeric(cyl)) ) } }) if(NCRAN) { test_that("fmutate works as intended for simple usage", { expect_equal(fmutate(mtc, bla = 1), dplyr::mutate(mtc, bla = 1)) expect_equal(fmutate(mtc, bla = list(1)), dplyr::mutate(mtc, bla = list(1))) expect_equal(fmutate(mtc, bla = as.list(mpg)), dplyr::mutate(mtc, bla = as.list(mpg))) expect_equal(fmutate(mtc, mu = bmean(mpg)), dplyr::mutate(mtc, mu = bmean(mpg))) expect_equal(fmutate(mtc, mu = bmean(mpg), mpg = NULL), dplyr::mutate(mtc, mu = bmean(mpg), mpg = NULL)) expect_equal(fmutate(mtc, mu = bmean(mpg), dmu = mpg - mu), dplyr::mutate(mtc, mu = bmean(mpg), dmu = mpg - mu)) expect_equal(fmutate(mtc, mu = log(mpg)), dplyr::mutate(mtc, mu = log(mpg))) expect_equal(fmutate(mtc, mu = log(mpg), dmu = mpg - mu), dplyr::mutate(mtc, mu = log(mpg), dmu = mpg - mu)) expect_true(all_obj_equal( dplyr::mutate(mtc, dmu = mpg - bmean(mpg)), fmutate(mtc, dmu = mpg - bmean(mpg)), fmutate(mtc, dmu = mpg - fmean(mpg)), fmutate(mtc, dmu = fmean(mpg, TRA = "-")), fmutate(mtc, dmu = fwithin(mpg)) )) # With groups: expect_equal(fmutate(gmtc, bla = 1), dplyr::mutate(gmtc, bla = 1)) expect_equal(fmutate(gmtc, mu = bmean(mpg)), dplyr::mutate(gmtc, mu = bmean(mpg))) expect_equal(fmutate(gmtc, mu = bmean(mpg), mpg = NULL), dplyr::mutate(gmtc, mu = bmean(mpg), mpg = NULL)) expect_equal(fmutate(gmtc, mu = bmean(mpg), dmu = mpg - mu), dplyr::mutate(gmtc, mu = bmean(mpg), dmu = mpg - mu)) expect_equal(fmutate(gmtc, mu = log(mpg)), dplyr::mutate(gmtc, mu = log(mpg))) expect_equal(fmutate(gmtc, mu = log(mpg), dmu = mpg - mu), dplyr::mutate(gmtc, mu = log(mpg), dmu = mpg - mu)) expect_true(all_obj_equal( dplyr::mutate(gmtc, dmu = mpg - bmean(mpg)), fmutate(gmtc, dmu = mpg - bmean(mpg)), fmutate(gmtc, dmu = mpg - fmean(mpg)), fmutate(gmtc, dmu = fmean(mpg, TRA = "-")), fmutate(gmtc, dmu = fwithin(mpg)) )) }) } test_that("fmutate with across works like ftransformv", { expect_true(all_obj_equal( ftransformv(mtcars, cyl:vs, fwithin, w = wt, apply = TRUE), ftransformv(mtcars, cyl:vs, fwithin, w = wt, apply = FALSE), fmutate(mtcars, across(cyl:vs, fwithin, w = wt, .apply = TRUE)), fmutate(mtcars, across(cyl:vs, fwithin, w = wt, .apply = FALSE)) # fmutate(mtcars, fwithin(.data, w = .data[["wt"]]), .cols = slt(., cyl:vs, return = "names")) )) expect_true(all_obj_equal( ftransformv(mtcars, cyl:vs, fwithin, g = list(cyl, vs, am), apply = TRUE) %>% setRownames(), ftransformv(mtcars, cyl:vs, fwithin, g = list(cyl, vs, am), apply = FALSE) %>% setRownames(), fmutate(gmtc, across(cyl:vs, fwithin, .apply = TRUE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, fwithin, .apply = FALSE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, fmean, TRA = "-", .apply = TRUE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, fmean, TRA = "-", .apply = FALSE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, function(x) x - bmean(x), .apply = TRUE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, function(x) lapply(x, function(y) y - bmean(y)), .apply = FALSE)) %>% qDF(), gmtc %>% fmutate(fwithin(.data), .cols = slt(., cyl:vs, return = "names")) %>% qDF() )) expect_true(all_obj_equal( ftransformv(mtcars, cyl:vs, fwithin, g = list(cyl, vs, am), w = wt, apply = TRUE) %>% setRownames(), ftransformv(mtcars, cyl:vs, fwithin, g = list(cyl, vs, am), w = wt, apply = FALSE) %>% setRownames(), fmutate(gmtc, across(cyl:vs, fwithin, w = wt, .apply = TRUE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, fwithin, w = wt, .apply = FALSE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, fmean, TRA = "-", w = wt, .apply = TRUE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, fmean, TRA = "-", w = wt, .apply = FALSE)) %>% qDF(), fmutate(gmtc, across(cyl:vs, function(x, w) x - weighted.mean(x, w), w = wt, .apply = TRUE)) %>% qDF() )) }) test_that("fmutate with across reorders correctly", { for(i in seq_col(wlddev)) { gdf <- fgroup_by(wlddev, i) expect_true(all_identical( wlddev, fungroup(fmutate(gdf, across(c(PCGDP, LIFEEX), identity))), fungroup(fmutate(gdf, across(.fns = identity))), fungroup(fmutate(gdf, list(PCGDP = PCGDP, LIFEEX = LIFEEX))), fungroup(fmutate(gdf, (.data), .cols = .c(PCGDP, LIFEEX))), fungroup(fmutate(gdf, (.data))) )) } }) test_that("fsummarise and fmutate with arbitrary expressions", { expect_true( all_obj_equal( fsummarise(gmtc, qDF(cor(cbind(mpg, wt, hp))), mpg_qs = quantile(mpg, c(0.25, 0.5, 0.75))), fsummarise(gmtc, acr(c(mpg, wt, hp), function(x) qDF(cor(x)), .apply = FALSE), mpg_qs = quantile(mpg, c(0.25, 0.5, 0.75))), fsummarise(gmtc, qDF(cor(.data)), .cols = .c(mpg, wt, hp), mpg_qs = quantile(mpg, c(0.25, 0.5, 0.75))), fsummarise(gmtc, qDF(cor(slt(.data, mpg, wt, hp))), mpg_qs = quantile(mpg, c(0.25, 0.5, 0.75)))) ) expect_true( all_obj_equal( fmutate(gmtc, fscale(list(mpg = mpg, wt = wt, hp = hp)), bla = 1, mu = fmean(mpg), su = sum(hp)), fmutate(gmtc, acr(c(mpg, wt, hp), fscale), bla = 1, mu = fmean(mpg), su = sum(hp)), fmutate(gmtc, acr(c(mpg, wt, hp), function(x) fscale(x), .apply = FALSE), bla = 1, mu = fmean(mpg), su = sum(hp)), fmutate(gmtc, fscale(.data), .cols = .c(mpg, wt, hp), bla = 1, mu = fmean(mpg), su = sum(hp)), fmutate(gmtc, fscale(slt(.data, mpg, wt, hp)), bla = 1, mu = fmean(mpg), su = sum(hp))) ) expect_equal(fmutate(gmtc, acr(NULL, fscale)), fmutate(gmtc, fscale(.data))) expect_equal(fmutate(gmtc, acr(mpg:carb, fscale)), fmutate(gmtc, fscale(.data), .cols = seq_col(gmtc))) }) if(NCRAN) { test_that("fmutate miscellaneous", { expect_true(length(fmutate(mtcars, across(cyl:vs, W, w = wt, .names = NULL))) > 15) expect_true(length(fmutate(mtcars, across(cyl:vs, list(D, W), .names = FALSE, .transpose = FALSE))) > 15) expect_equal( fmutate(mtcars, across(cyl:vs, L, stubs = FALSE)), fmutate(mtcars, across(cyl:vs, flag)) ) expect_true(length(fmutate(mtcars, across(cyl:vs, L))) > length(fmutate(mtcars, across(cyl:vs, flag)))) expect_equal( fmutate(mtcars, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "used"), dplyr::mutate(mtcars, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "used") ) expect_equal( fmutate(mtcars, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "unused"), dplyr::mutate(mtcars, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "unused") ) expect_equal( fmutate(mtcars, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "none"), dplyr::transmute(mtcars, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb) ) expect_identical(names(fmutate(mtcars, a = mpg, b = a, c = cyl, hp = wt, .keep = "unused")), c(setdiff(names(mtcars), .c(mpg, cyl, wt)), letters[1:3])) expect_identical(names(fmutate(mtcars, a = mpg, b = a, c = cyl, hp = wt, .keep = "none")), c("a", "b", "c", "hp")) expect_equal( fmutate(gmtc, a = fmax(mpg), b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "used"), dplyr::mutate(gmtc, a = bmax(mpg), b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "used") ) expect_equal( fmutate(gmtc, a = fmax(mpg), b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "unused"), dplyr::mutate(gmtc, a = bmax(mpg), b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "unused") ) expect_equal( fmutate(gmtc, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, .keep = "none"), dplyr::transmute(gmtc, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb) ) # Inconsistent with the above and also inefficient... # expect_equal( # fmutate(gmtc, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, cyl = cyl, .keep = "none"), # dplyr::mutate(gmtc, a = mpg, b = a + hp + disp, c = cyl, hp = wt + carb, cyl = cyl, .keep = "none") # ) expect_equal(flast(names(fmutate(mtcars, across(cyl:vs, function(x) list(ps = kit::psum(x)), .apply = FALSE)))), "ps") expect_equal( fmutate(mtcars, across(cyl:vs, data.table::shift, .apply = FALSE, .names = FALSE)), fmutate(mtcars, across(cyl:vs, data.table::shift)) ) # Testing expressions turned into functions: bcumsum = base::cumsum lorentz_fun <- function(x) bcumsum(x) / bsum(x) gmtc = mtc %>% roworder(mpg) %>% dplyr::group_by(cyl, vs, am) expect_true(all_obj_equal(mtt(gmtc, lorentz_mpg = bcumsum(mpg) / bsum(mpg)), mtt(gmtc, lorentz_mpg = lorentz_fun(mpg)), mtt(gmtc, lorentz_mpg = fcumsum(mpg) / fsum(mpg)), # doesn't work because of global sorting... dplyr::mutate(gmtc, lorentz_mpg = bcumsum(mpg) / bsum(mpg)))) # Adding global variable: q = 5 expect_true(all_obj_equal(mtt(gmtc, lorentz_mpg = bcumsum(mpg) / bsum(mpg) + q), mtt(gmtc, lorentz_mpg = lorentz_fun(mpg) + q), mtt(gmtc, lorentz_mpg = fcumsum(mpg) / fsum(mpg) + q), dplyr::mutate(gmtc, lorentz_mpg = bcumsum(mpg) / bsum(mpg) + q))) }) } test_that(".names works properly", { expect_equal( smr(gmtc, acr(c(hp, wt), list(sum, max, min))), smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = TRUE)) ) expect_equal( smr(gmtc, acr(c(hp, wt), list(sum, max, min))), smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = function(c, f) paste0(c, "_", f))) ) expect_equal( smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = "flip")), smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = function(c, f) paste0(f, "_", c))) ) expect_equal( smr(gmtc, acr(c(hp, wt), list(sum, max, min), .transpose = FALSE)), smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = function(c, f) paste0(c, "_", f), .transpose = FALSE)) ) expect_equal( smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = "flip", .transpose = FALSE)), smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = function(c, f) paste0(f, "_", c), .transpose = FALSE)) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = FALSE))), .c(cyl, vs, am, hp, hp, hp, wt, wt, wt) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), list(sum, max, min), .names = FALSE, .transpose = FALSE))), .c(cyl, vs, am, hp, wt, hp, wt, hp, wt) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), sum, .names = FALSE))), .c(cyl, vs, am, hp, wt) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), sum, .names = FALSE, .transpose = FALSE))), .c(cyl, vs, am, hp, wt) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), sum, .names = TRUE))), .c(cyl, vs, am, hp_sum, wt_sum) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), sum, .names = "flip"))), .c(cyl, vs, am, sum_hp, sum_wt) ) expect_equal( names(smr(gmtc, acr(c(hp, wt), sum, .names = "flip", .transpose = FALSE))), .c(cyl, vs, am, sum_hp, sum_wt) ) }) test_that("Warnings for unnamed scalar and vector-valued arguments passed", { tf <- function(x, ...) x expect_warning(mtt(gmtc, acr(hp:carb, tf, TRUE, wt))) expect_warning(mtt(gmtc, acr(hp:carb, tf, wt, TRUE))) expect_warning(mtt(gmtc, acr(hp:carb, tf, TRUE, wt, .apply = FALSE))) expect_warning(mtt(gmtc, acr(hp:carb, tf, wt, TRUE, .apply = FALSE))) }) if(FALSE) { fmutate(mtcars, across(cyl:vs, list(D, W), .names = TRUE, .transpose = FALSE)) %>% head(3) fmutate(mtcars, across(cyl:vs, list(D, W), .names = TRUE, .transpose = TRUE)) %>% head(3) fmutate(mtcars, across(cyl:vs, list(D, W), .names = TRUE, .apply = FALSE, .transpose = FALSE)) %>% head(3) fmutate(mtcars, across(cyl:vs, list(D, W), .names = FALSE, .apply = FALSE, .transpose = FALSE)) %>% head(3) fmutate(mtcars, across(cyl:vs, list(W, kit::psum), w = wt)) %>% head(3) fmutate(mtcars, across(cyl:vs, kit::psum)) %>% head(3) fmutate(mtcars, across(cyl:vs, identity, .apply = FALSE)) # 51 microesecond median on windows fmutate(mtcars, across(cyl:vs, identity)) # 62 microesecond median on windows fmutate(mtcars, across(cyl:vs, L)) # TODO: Test all potential issues with environemtns etc. See if there are smarter ways to # incorporate internal functions, data and objects in the global environment. } } collapse/tests/testthat/test-fquantile.R0000644000176200001440000002550514777170131020165 0ustar liggesuserscontext("fquantile, and quantiles with fnth") test_zero_weights <- FALSE probs1 <- c(0, 0.25, 0.5, 0.75, 1) probs2 <- c(0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1) for(x in mtcars) { for(o in list(NULL, radixorder(x))) { for(Qprobs in list(probs1, probs2)) { for(t in 5:9) { expect_true(all_obj_equal( fquantile(x, Qprobs, type = t, o = o), fquantile(x, Qprobs, type = t, o = o, na.rm = FALSE), quantile(x, Qprobs, type = t))) for(j in 1:3) { w = rep(j + rnorm(1, sd = 0.05), 32) expect_true(all_obj_equal( .quantile(x, Qprobs, type = t), .quantile(x, Qprobs, type = t, w = w, o = o, na.rm = FALSE), .quantile(x, Qprobs, type = t, w = w, o = o))) } } } } } expect_equal(.quantile(1:2), c(1.00, 1.25, 1.50, 1.75, 2.00)) expect_equal(.quantile(1:3), c(1.0, 1.5, 2.0, 2.5, 3.0)) expect_equal(.quantile(1:2, na.rm = FALSE), c(1.00, 1.25, 1.50, 1.75, 2.00)) expect_equal(.quantile(1:3, na.rm = FALSE), c(1.0, 1.5, 2.0, 2.5, 3.0)) for(na_rm in c(TRUE, FALSE)) { for(t in 5:9) { expect_equal(.quantile(0, type = t, na.rm = na_rm), c(0,0,0,0,0)) expect_equal(.quantile(c(0, 0), type = t, na.rm = na_rm), c(0,0,0,0,0)) expect_equal(.quantile(c(0, 0, 0), type = t, na.rm = na_rm), c(0,0,0,0,0)) expect_equal(.quantile(0L, type = t, na.rm = na_rm), rep.int(0L, 5)) expect_equal(.quantile(c(0L, 0L), type = t, na.rm = na_rm), rep.int(0L, 5)) expect_equal(.quantile(c(0L, 0L, 0L), type = t, na.rm = na_rm), rep.int(0L, 5)) expect_equal(.quantile(0, w = 1, type = t, na.rm = na_rm), c(0,0,0,0,0)) expect_equal(.quantile(c(0, 0), w = c(1, 1), type = t, na.rm = na_rm), c(0,0,0,0,0)) expect_equal(.quantile(c(0, 0, 0), w = c(1, 1, 1), type = t, na.rm = na_rm), c(0,0,0,0,0)) expect_equal(.quantile(0L, w = 1, type = t, na.rm = na_rm), rep.int(0L, 5)) expect_equal(.quantile(c(0L, 0L), w = c(1, 1), type = t, na.rm = na_rm), rep.int(0L, 5)) expect_equal(.quantile(c(0L, 0L, 0L), w = c(1, 1, 1), type = t, na.rm = na_rm), rep.int(0L, 5)) expect_equal(.quantile(numeric(0), type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(integer(0), type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(NA_real_, type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(NA_integer_, type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(NA_real_, w = NA_real_, type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(NA_integer_, w = NA_real_, type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(1, w = 0, type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_equal(.quantile(1L, w = 0, type = t, na.rm = na_rm), rep(NA_real_, 5)) expect_error(.quantile(1, w = NA_real_, type = t, na.rm = na_rm)) expect_error(.quantile(1L, w = NA_real_, type = t, na.rm = na_rm)) } } for(x in na_insert(airquality, 0.05)) { for(o in list(NULL, radixorder(x))) { for(Qprobs in list(probs1, probs2)) { for(t in 5:9) { expect_equal(fquantile(x, Qprobs, type = t, o = o), quantile(x, Qprobs, type = t, na.rm = TRUE)) for(j in 1:3) { w = rep(j + rnorm(1, sd = 0.05), 153) expect_equal(.quantile(x, Qprobs, type = t), .quantile(x, Qprobs, type = t, w = w, o = o)) } } } } } if(test_zero_weights) { # Testing behavior with zero weights for(x in mtcars) { for(o in list(NULL, radixorder(x))) { for(Qprobs in list(probs1, probs2)) { for(t in 5:9) { w = na_insert(abs(rnorm(32)), value = 0) wn0 = w[w > 0] xn0 = x[w > 0] on0 = if(length(o)) radixorder(xn0) else NULL expect_true(all_obj_equal( .quantile(x, Qprobs, type = t, w = w, o = o), .quantile(x, Qprobs, type = t, w = w, o = o, na.rm = FALSE), .quantile(xn0, Qprobs, type = t, w = wn0, o = on0), .quantile(xn0, Qprobs, type = t, w = wn0, o = on0, na.rm = FALSE) )) } } } } # Zero weights and NA's for(x in na_insert(mtcars)) { for(o in list(NULL, radixorder(x))) { for(Qprobs in list(probs1, probs2)) { for(t in 5:9) { w = na_insert(abs(rnorm(32)), value = 0) wn0 = w[w > 0] xn0 = x[w > 0] on0 = if(length(o)) radixorder(xn0) else NULL expect_equal(.quantile(x, Qprobs, type = t, w = w, o = o), .quantile(xn0, Qprobs, type = t, w = wn0, o = on0)) } } } } } # Testing with fnth: .nthquantile <- function(x, probs = c(0.25, 0.5, 0.75), w = NULL, o = NULL, na.rm = TRUE, type = 7L, check.o = is.null(attr(o, "sorted")), ...) { vapply(probs, fnth.default, 1.0, x = x, w = w, ties = type, o = o, na.rm = na.rm, check.o = check.o, USE.NAMES = FALSE, use.g.names = FALSE, ...) } probs <- c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99) gmtc = GRP(rep(1L, 32)) gmtcus = gmtc gmtcus$ordered %-=% 1L for(g in list(NULL, gmtc, gmtcus)) { for(x in mtcars) { for(o in list(NULL, radixorder(x))) { for(t in 5:9) { expect_true(all_obj_equal( .quantile(x, probs, type = t, o = o), .nthquantile(x, probs, type = t, o = o, g = g), .nthquantile(x, probs, type = t, o = o, na.rm = FALSE, g = g))) for(j in 1:2) { w = rep(j + rnorm(1, sd = 0.05), 32) expect_true(all_obj_equal( .quantile(x, probs, type = t), .nthquantile(x, probs, type = t, w = w, o = o, na.rm = FALSE, g = g), .nthquantile(x, probs, type = t, w = w, o = o, g = g))) } } } } } for(g in list(NULL, rep(1L, 3L))) { expect_equal(.nthquantile(1:3, na.rm = FALSE), c(1.5, 2.0, 2.5), g = g) expect_equal(.nthquantile(1:3), c(1.5, 2.0, 2.5), g = g) for(na_rm in c(TRUE, FALSE)) { for(t in 5:9) { expect_equal(.nthquantile(c(0, 0, 0), type = t, na.rm = na_rm, g = g), c(0,0,0)) expect_equal(.nthquantile(c(0L, 0L, 0L), type = t, na.rm = na_rm, g = g), rep.int(0L, 3)) expect_equal(.nthquantile(c(0, 0, 0), w = c(1, 1, 1), type = t, na.rm = na_rm, g = g), c(0,0,0)) expect_equal(.nthquantile(c(0L, 0L, 0L), w = c(1, 1, 1), type = t, na.rm = na_rm, g = g), rep.int(0L, 3)) } } } for(g in list(NULL, rep(1L, 2L))) { expect_equal(.nthquantile(1:2), c(1.25, 1.50, 1.75), g = g) expect_equal(.nthquantile(1:2, na.rm = FALSE), c(1.25, 1.50, 1.75), g = g) for(na_rm in c(TRUE, FALSE)) { for(t in 5:9) { expect_equal(.nthquantile(c(0, 0), type = t, na.rm = na_rm, g = g), c(0,0,0)) expect_equal(.nthquantile(c(0L, 0L), type = t, na.rm = na_rm, g = g), rep.int(0L, 3)) expect_equal(.nthquantile(c(0, 0), w = c(1, 1), type = t, na.rm = na_rm, g = g), c(0,0,0)) expect_equal(.nthquantile(c(0L, 0L), w = c(1, 1), type = t, na.rm = na_rm, g = g), rep.int(0L, 3)) } } } for(g in list(NULL, 1L)) { for(na_rm in c(TRUE, FALSE)) { for(t in 5:9) { expect_equal(.nthquantile(0, type = t, na.rm = na_rm, g = g), c(0,0,0)) expect_equal(.nthquantile(0L, type = t, na.rm = na_rm, g = g), rep.int(0L, 3)) expect_equal(.nthquantile(0, w = 1, type = t, na.rm = na_rm, g = g), c(0,0,0)) expect_equal(.nthquantile(0L, w = 1, type = t, na.rm = na_rm, g = g), rep.int(0L, 3)) expect_equal(.nthquantile(NA_real_, type = t, na.rm = na_rm, g = g), rep(NA_real_, 3)) expect_equal(.nthquantile(NA_integer_, type = t, na.rm = na_rm, g = g), rep(NA_real_, 3)) expect_equal(.nthquantile(NA_real_, w = NA_real_, type = t, na.rm = na_rm, g = g), rep(NA_real_, 3)) expect_equal(.nthquantile(NA_integer_, w = NA_real_, type = t, na.rm = na_rm, g = g), rep(NA_real_, 3)) # expect_equal(.nthquantile(1, w = 0, type = t, na.rm = na_rm, g = g), rep(NA_real_, 3)) # expect_equal(.nthquantile(1L, w = 0, type = t, na.rm = na_rm, g = g), rep(NA_real_, 3)) # expect_error(.nthquantile(1, w = NA_real_, type = t, na.rm = na_rm, g = g)) # expect_error(.nthquantile(1L, w = NA_real_, type = t, na.rm = na_rm, g = g)) } } } gaq = GRP(rep(1L, fnrow(airquality))) gaqus = gaq gaqus$ordered %-=% 1L for(g in list(NULL, gaq, gaqus)) { for(x in na_insert(airquality, 0.05)) { for(o in list(NULL, radixorder(x))) { for(t in 5:9) { expect_equal(.quantile(x, probs, type = t, o = o), .nthquantile(x, probs, type = t, o = o, g = g)) for(j in 1:3) { w = rep(j + rnorm(1, sd = 0.05), 153) expect_equal(.quantile(x, probs, type = t, o = o), .nthquantile(x, probs, type = t, w = w, o = o, g = g)) } } } } } if(test_zero_weights) { # Testing behavior with zero weights for(g in list(NULL, gmtc, gmtcus)) { for(x in mtcars) { for(o in list(NULL, radixorder(x))) { for(t in c(1:3, 5:9)) { w = fbetween(na_insert(abs(rnorm(32)), 0.15, value = 0), x) # averaging because R's quicksort is not stable wn0 = w[w > 0] xn0 = x[w > 0] on0 = if(length(o)) radixorder(xn0) else NULL if(t > 4L) { expect_true(all_obj_equal( .quantile(x, probs, type = t, w = w, o = o), .nthquantile(x, probs, type = t, w = w, o = o, g = g), .nthquantile(x, probs, type = t, w = w, o = o, na.rm = FALSE, g = g), .nthquantile(xn0, probs, type = t, w = wn0, o = on0), .nthquantile(xn0, probs, type = t, w = wn0, o = on0, na.rm = FALSE) )) } else { expect_true(all_obj_equal( .nthquantile(x, probs, type = t, w = w, o = o), .nthquantile(x, probs, type = t, w = w, o = o, g = g), .nthquantile(x, probs, type = t, w = w, o = o, na.rm = FALSE, g = g), .nthquantile(xn0, probs, type = t, w = wn0, o = on0), .nthquantile(xn0, probs, type = t, w = wn0, o = on0, na.rm = FALSE) )) } } } } } # Zero weights and NA's for(g in list(NULL, gmtc, gmtcus)) { for(x in na_insert(mtcars)) { for(o in list(NULL, radixorder(x))) { for(t in c(1:3, 5:9)) { w = fbetween(na_insert(abs(rnorm(32)), 0.15, value = 0), x) # averaging because R's quicksort is not stable wn0 = w[w > 0] xn0 = x[w > 0] on0 = if(length(o)) radixorder(xn0) else NULL if(t > 4L) { expect_true(all_obj_equal( .quantile(x, probs, type = t, w = w, o = o), .nthquantile(x, probs, type = t, w = w, o = o, g = g), .nthquantile(xn0, probs, type = t, w = wn0, o = on0))) } else { expect_true(all_obj_equal( .nthquantile(x, probs, type = t, w = w, o = o), .nthquantile(x, probs, type = t, w = w, o = o, g = g), .nthquantile(xn0, probs, type = t, w = wn0, o = on0))) } } } } } } collapse/tests/testthat/test-select-replace-vars.R0000644000176200001440000001735214777170131022037 0ustar liggesuserscontext("select, replace or add vars") # rm(list = ls()) test_that("selecting vars works well", { expect_identical(get_vars(wlddev, 4:8), wlddev[4:8]) expect_identical(get_vars(wlddev, -(4:8)), wlddev[-(4:8)]) expect_identical(get_vars(wlddev, sapply(wlddev, is.numeric)), wlddev[sapply(wlddev, is.numeric)]) expect_identical(get_vars(wlddev, c("iso3c","PCGDP","ODA")), wlddev[c("iso3c","PCGDP","ODA")]) expect_identical(get_vars(wlddev, "D", regex = TRUE), wlddev[c("OECD","PCGDP","ODA")]) expect_identical(get_vars(wlddev, c("D","L"), regex = TRUE), wlddev[c("OECD","PCGDP","LIFEEX","ODA")]) expect_identical(get_vars(wlddev, is.factor), wlddev[sapply(wlddev, is.factor)]) expect_identical(num_vars(wlddev), wlddev[sapply(wlddev, is.numeric)]) expect_identical(cat_vars(wlddev), wlddev[sapply(wlddev, is_categorical)]) expect_identical(char_vars(wlddev), wlddev[sapply(wlddev, is.character)]) expect_identical(fact_vars(wlddev), wlddev[sapply(wlddev, is.factor)]) expect_identical(date_vars(wlddev), wlddev[sapply(wlddev, is_date)]) }) test_that("replacing vars works well", { wlddevold <- wlddev get_vars(wlddev, 4:8) <- get_vars(wlddev, 4:8) expect_identical(wlddevold, wlddev) wlddevold <- wlddev fselect(wlddev, PCGDP:GINI) <- fselect(wlddev, PCGDP:GINI) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, -(4:8)) <- get_vars(wlddev, -(4:8)) expect_identical(wlddevold, wlddev) wlddevold <- wlddev fselect(wlddev, -(PCGDP:GINI)) <- fselect(wlddev, -(PCGDP:GINI)) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, -(4:8)) <- as.list(get_vars(wlddev, -(4:8))) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, c("iso3c","PCGDP","ODA")) <- get_vars(wlddev, c("iso3c","PCGDP","ODA")) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, "D", regex = TRUE) <- get_vars(wlddev, "D", regex = TRUE) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, c("D","L"), regex = TRUE) <- get_vars(wlddev, c("D","L"), regex = TRUE) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, sapply(wlddev, is.numeric)) <- get_vars(wlddev, sapply(wlddev, is.numeric)) expect_identical(wlddevold, wlddev) wlddevold <- wlddev get_vars(wlddev, is.factor) <- get_vars(wlddev, is.factor) expect_identical(wlddevold, wlddev) wlddevold <- wlddev num_vars(wlddev) <- num_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev cat_vars(wlddev) <- cat_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev char_vars(wlddev) <- char_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev fact_vars(wlddev) <- fact_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev logi_vars(wlddev) <- logi_vars(wlddev) expect_identical(wlddevold, wlddev) wlddevold <- wlddev date_vars(wlddev) <- date_vars(wlddev) expect_identical(wlddevold, wlddev) }) test_that("adding vars works well", { wlddev1 <- wlddev2 <- wlddev temp <- STD(get_vars(wlddev, 9:12)) add_vars(wlddev1) <- temp wlddev2[names(temp)] <- temp expect_identical(wlddev1, wlddev2) wlddev1 <- wlddev temp <- STD(get_vars(wlddev, 9:12)) add_vars(wlddev1, "front") <- temp expect_identical(wlddev1, add_vars(temp, wlddev)) wlddev1 <- wlddev temp <- STD(get_vars(wlddev, 9:13)) add_vars(wlddev1, c(10,12,14,16,18)) <- temp expect_true(all_identical(wlddev1, add_vars(wlddev, temp, pos = c(10,12,14,16,18)), add_vars(gv(wlddev, 1:9), gv(temp, 1), gv(wlddev, 10), gv(temp, 2), gv(wlddev, 11), gv(temp, 3), gv(wlddev, 12), gv(temp, 4), gv(wlddev, 13), gv(temp, 5)))) }) test_that("replacing with or adding atomic elements works well", { wlddev1 <- wlddev2 <- wlddev get_vars(wlddev1, 9) <- wlddev$PCGDP expect_identical(wlddev1, wlddev) get_vars(wlddev1, 9) <- qM(wlddev[9:12]) wlddev2[9] <- qM(wlddev[9:12]) expect_identical(wlddev1, wlddev2) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1) <- wlddev$PCGDP expect_identical(wlddev1, cbind(wlddev2, wlddev["PCGDP"])) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1) <- qM(wlddev[9:12]) wlddev2["wlddev[9:12]"] <- qM(wlddev[9:12]) # formerly wlddev2["qM(wlddev[9:12])"], but no longer using deparse.. expect_identical(wlddev1, wlddev2) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1, "front") <- wlddev$PCGDP expect_identical(wlddev1, add_vars(wlddev, wlddev$PCGDP, pos = 1)) wlddev1 <- wlddev2 <- wlddev add_vars(wlddev1, "front") <- qM(wlddev[9:12]) expect_identical(wlddev1, add_vars(wlddev, qM(wlddev[9:12]), pos = 1)) }) test_that("empty selections work well", { expect_equal(cat_vars(mtcars), mtcars[0L]) expect_equal(char_vars(mtcars), mtcars[0L]) expect_equal(fact_vars(mtcars), mtcars[0L]) expect_equal(logi_vars(mtcars), mtcars[0L]) expect_equal(get_vars(mtcars, is.character), mtcars[0L]) expect_equal(get_vars(mtcars, 0L), mtcars[0L]) expect_error(get_vars(mtcars, NULL)) }) test_that("select vars errors for wrong input", { expect_error(get_vars(wlddev, 14)) expect_error(get_vars(wlddev, 1:14)) expect_error(get_vars(wlddev, -14)) expect_error(get_vars(wlddev, c("PCGDP","ODA3"))) # expect_warning(get_vars(wlddev, "bla", regex = TRUE)) # Better give error expect_error(get_vars(wlddev, c(sapply(wlddev, is.numeric), TRUE))) expect_error(get_vars(wlddev, sapply(wlddev, is.numeric)[-1])) }) test_that("replace vars errors for wrong input", { expect_error(get_vars(wlddev, 14) <- wlddev[12]) expect_error(get_vars(wlddev, "ODA3") <- wlddev[12]) expect_error(get_vars(wlddev, "bla", regex = TRUE) <- wlddev[12]) expect_error(get_vars(wlddev, -14) <- wlddev[12]) expect_error(get_vars(wlddev, 11:12) <- wlddev[12]) expect_error(get_vars(wlddev, 9:12) <- wlddev[8:12]) expect_invisible(get_vars(wlddev, 12) <- wlddev$ODA) expect_error(get_vars(wlddev, 12) <- wlddev$ODA[-1]) expect_error(get_vars(wlddev, 12) <- qM(wlddev[9:12])[-1, ]) expect_error(get_vars(wlddev, c(sapply(wlddev, is.numeric), TRUE)) <- wlddev) expect_error(get_vars(wlddev, sapply(wlddev, is.numeric)[-1]) <- wlddev) }) test_that("add_vars errors for wrong input", { expect_error(add_vars(wlddev, 15) <- wlddev[12]) expect_error(add_vars(wlddev, "ODA3") <- wlddev[12]) expect_error(add_vars(wlddev) <- qM(wlddev[9:12])[-1, ]) expect_error(add_vars(wlddev, "front") <- qM(wlddev[9:12])[-1, ]) expect_error(add_vars(wlddev, 8) <- qM(wlddev[9:12])[-1, ]) expect_error(add_vars(wlddev) <- wlddev[-1, 9:12]) expect_error(add_vars(wlddev, "front") <- wlddev[-1, 9:12]) expect_error(add_vars(wlddev, 8) <- wlddev[-1, 9:12]) expect_error(add_vars(wlddev, 12) <- wlddev[9:12]) expect_error(add_vars(wlddev, 9:12) <- wlddev[9:10]) }) test_that("fselect errors for wrong input", { expect_visible(fselect(mtcars, 1)) expect_error(fselect(mtcars, "bla")) expect_visible(fselect(mtcars, "mpg")) expect_error(fselect(mtcars, mpg:bla)) expect_error(fselect(mtcars, mpg > cyl)) expect_error(fselect(mtcars, ~mpg)) }) test_that("fselect works properly", { expect_equal(fselect(mtcars, mpg, 2), mtcars[1:2]) expect_equal(fselect(mtcars, mpg:vs), mtcars[1:8]) expect_equal(names(fselect(mtcars, bla = mpg, cyl:vs)), c("bla", names(mtcars)[2:8])) expect_invisible(fselect(wlddev, -PCGDP) <- fselect(wlddev, -PCGDP)) }) test_that("no problems with numeric values", { expect_equal(fselect(mtcars, 1), mtcars[1]) expect_equal(get_vars(mtcars, 1), mtcars[1]) expect_equal(gv(mtcars, 1), mtcars[1]) expect_invisible(fselect(mtcars, 1) <- mtcars[1]) expect_invisible(get_vars(mtcars, 1) <- mtcars[1]) expect_invisible(gv(mtcars, 1) <- mtcars[1]) expect_invisible(av(mtcars, pos = 1) <- mtcars[1]) }) collapse/tests/testthat/test-fmatch.R0000644000176200001440000001373614777170131017442 0ustar liggesuserscontext("fmatch") test_that("fmatch works well", { expect_identical(wlddev$iso3c %iin% "DEU", which(wlddev$iso3c %in% "DEU")) expect_identical(fsubset(wlddev, iso3c %in% c("DEU", "ITA")), fsubset(wlddev, iso3c %iin% c("DEU", "ITA"))) expect_identical(qF(1:10+0.1) %iin% 1.1, 1L) # qF(1:10+0.1) %in% 1.1 works # what about integers? }) ########################### # Proper Systematic Testing ########################### fmatch_base <- function(x, table, nomatch = NA_integer_, count = FALSE) { if (count) skip_if_not_installed("kit") if(is.list(x)) { x <- do.call(paste0, x) table <- do.call(paste0, table) } res <- match(x, table, nomatch) if(count) { attr(res, "N.nomatch") <- kit::count(res, nomatch) attr(res, "N.groups") <- length(table) attr(res, "N.distinct") <- if(is.na(nomatch)) fndistinct.default(res) else fndistinct.default(res) - anyv(res, nomatch) oldClass(res) <- "qG" } res } random_vector_pair <- function(df, replace = FALSE, max.cols = 1) { d <- dim(df) cols <- sample.int(d[2L], if(is.na(max.cols)) as.integer(1 + d[2L] * runif(1)) else max.cols, replace) rows_x <- sample.int(d[1L], as.integer(1 + d[1L] * runif(1)), replace) rows_table <- sample.int(d[1L], as.integer(1 + d[1L] * runif(1)), replace) list(df[rows_x, cols], df[rows_table, cols]) } match_identcal <- function(df, replace = FALSE, max.cols = 1, nomatch = NA_integer_, count = FALSE) { data <- random_vector_pair(df, replace, max.cols) x <- data[[1]] table <- data[[2]] id <- identical(fmatch(x, table, nomatch, count, overid = 2L), fmatch_base(x, table, nomatch, count)) if(id) TRUE else data } wldna <- na_insert(wlddev) test_that("fmatch works well with atomic vectors", { for (r in c(FALSE, TRUE)) { # r = replace expect_true(all(replicate(100, match_identcal(wlddev, r)))) expect_true(all(replicate(100, match_identcal(wlddev, r, nomatch = 0L)))) expect_true(all(replicate(100, match_identcal(wlddev, r, count = TRUE)))) expect_true(all(replicate(100, match_identcal(wlddev, r, nomatch = 0L, count = TRUE)))) expect_true(all(replicate(100, match_identcal(wldna, r)))) expect_true(all(replicate(100, match_identcal(wldna, r, nomatch = 0L)))) expect_true(all(replicate(100, match_identcal(wldna, r, count = TRUE)))) expect_true(all(replicate(100, match_identcal(wldna, r, nomatch = 0L, count = TRUE)))) } }) test_that("fmatch works well with data frames / lists", { for (r in c(FALSE, TRUE)) { # r = replace expect_true(all(replicate(20, match_identcal(wlddev, r, max.cols = NA)))) expect_true(all(replicate(20, match_identcal(wlddev, r, max.cols = NA, nomatch = 0L)))) expect_true(all(replicate(20, match_identcal(wlddev, r, max.cols = NA, count = TRUE)))) expect_true(all(replicate(20, match_identcal(wlddev, r, max.cols = NA, nomatch = 0L, count = TRUE)))) expect_true(all(replicate(20, match_identcal(wldna, r, max.cols = NA)))) expect_true(all(replicate(20, match_identcal(wldna, r, max.cols = NA, nomatch = 0L)))) expect_true(all(replicate(20, match_identcal(wldna, r, max.cols = NA, count = TRUE)))) expect_true(all(replicate(20, match_identcal(wldna, r, max.cols = NA, nomatch = 0L, count = TRUE)))) } }) wld <- wlddev |> slt(iso3c, year = PCGDP) |> roworderv() wld <- na_insert(wld) x <- ss(wld, sample.int(10000, replace = TRUE)) table <- ss(wld, sample.int(1000, replace = TRUE)) expect_identical(fmatch(x$year, table$year), match(x$year, table$year)) expect_identical(fmatch(x, table), fmatch_base(x, table)) ######################## # AI Generated Tests ######################## test_that("fmatch returns expected results", { # Test with vector input x <- c("a", "b", "c") table <- c("a", "b", "d") expect_equal(fmatch(x, table), fmatch_base(x, table)) # Test with list input tab <- wlddev[sample.int(10000, 1000), ] expect_equal(fmatch(wlddev, tab, overid = 2L), fmatch_base(wlddev, tab)) # Test with nomatch argument expect_equal(fmatch(x, table, nomatch = 0), fmatch_base(x, table, nomatch = 0)) # Test with count argument expect_equal(fmatch(x, table, count = TRUE), fmatch_base(x, table, count = TRUE)) }) test_that("fmatch handles NA matching correctly", { x <- c("a", NA, "c") table <- c("a", "b") expect_equal(fmatch(x, table), fmatch_base(x, table)) expect_equal(fmatch(x, table, nomatch = 0), fmatch_base(x, table, nomatch = 0)) }) test_that("fmatch returns correct index positions", { x <- c("a", "b", "c", "d") expect_equal(fmatch("a", x), 1L) expect_equal(fmatch("d", x), 4L) expect_equal(fmatch(c("a", "c"), x), c(1L, 3L)) expect_equal(fmatch("e", x), NA_integer_) }) test_that("fmatch works with nomatch argument", { x <- c("a", "b", "c", "d") expect_equal(fmatch("a", x, nomatch = 0L), 1L) expect_equal(fmatch("e", x, nomatch = 0L), 0L) }) test_that("fmatch works with incomparables", { x <- c("a", NA, "c", "d") expect_equal(fmatch("a", x), 1L) expect_equal(fmatch(NA, x), 2L) expect_equal(fmatch("c", x), 3L) }) test_that("fmatch works with duplicates", { x <- c("a", "b", "c", "c", "d") expect_equal(fmatch("c", x), 3L) }) test_that("fmatch works with integer data", { x <- c(1L, 2L, 3L, 4L) expect_equal(fmatch(1L, x), 1L) expect_equal(fmatch(4L, x), 4L) expect_equal(fmatch(c(1L, 3L), x), c(1L, 3L)) expect_equal(fmatch(5L, x), NA_integer_) }) test_that("fmatch works with double data", { x <- c(1.1, 2.2, 3.3, 4.4) expect_equal(fmatch(1.1, x), 1L) expect_equal(fmatch(4.4, x), 4L) expect_equal(fmatch(c(1.1, 3.3), x), c(1L, 3L)) expect_equal(fmatch(5.5, x), NA_integer_) }) test_that("fmatch works with factor data", { x <- factor(c("a", "b", "c", "d")) expect_equal(fmatch("a", x), 1L) expect_equal(fmatch("d", x), 4L) expect_equal(fmatch(c("a", "c"), x), c(1L, 3L)) expect_equal(fmatch("e", x), NA_integer_) }) test_that("fmatch works with logical data", { x <- c(TRUE, FALSE, TRUE, FALSE) expect_equal(fmatch(TRUE, x), 1L) expect_equal(fmatch(FALSE, x), 2L) }) collapse/tests/testthat/test-unlist2d.R0000644000176200001440000000245015202504542017722 0ustar liggesuserscontext("unlist2d and rowbind") test_that("unlist2d returns non-list input unchanged", { expect_equal(unlist2d(1:3), 1:3) }) test_that("unlist2d stacks lists", { l <- list(a = mtcars[1:5, ], b = mtcars[6:10, ]) u <- unlist2d(l, idcols = "id") r <- rowbind(a = l$a, b = l$b, idcol = "id") expect_equal(nrow(u), nrow(r)) expect_equal(ncol(u), ncol(r)) expect_equal(length(unique(u[[1L]])), 2L) }) test_that("unlist2d id.factor options work", { l <- list(a = mtcars[1:3, ], b = mtcars[4:6, ]) r1 <- unlist2d(l, idcols = "id", id.factor = TRUE) r2 <- unlist2d(l, idcols = "id", id.factor = FALSE) expect_true(is.factor(r1[[1L]])) expect_false(is.factor(r2[[1L]])) }) test_that("unlist2d recursive option", { l <- list(x = list(y = 1:3, z = 4:6)) expect_true(is.list(unlist2d(l, recursive = FALSE))) r <- unlist2d(l, recursive = TRUE) expect_true(is.data.frame(r) || inherits(r, "data.frame")) expect_equal(nrow(r), 2L) }) test_that("unlist2d data.table output", { skip_if_not_installed("data.table") l <- list(mtcars[1:5, ], mtcars[6:10, ]) expect_true(data.table::is.data.table(unlist2d(l, DT = TRUE))) }) test_that("unlist2d without idcols matches rowbind", { l <- list(mtcars[1:5, ], mtcars[6:10, ]) expect_equal(unlist2d(l, idcols = FALSE), rowbind(l[[1]], l[[2]])) }) collapse/tests/testthat/test-fscale-STD.R0000644000176200001440000017330414777170131020063 0ustar liggesuserscontext("fscale / STD") bsum <- base::sum # TODO: Still a few uneccessary infinity values generated with weights when the sd is null. search replace_Inf to find them. # rm(list = ls()) set.seed(101) x <- rnorm(100) w <- abs(100*rnorm(100)) wdat <- abs(100*rnorm(32)) xNA <- x wNA <- w wdatNA <- wdat xNA[sample.int(100,20)] <- NA wNA[sample.int(100,20)] <- NA wdatNA[sample.int(32, 5)] <- NA f <- as.factor(rep(1:10, each = 10)) g <- as.factor(rep(c(1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,7,7,7,7,7,7,7,10,10,10,10,10,10,10,10,10,10))) mtcNA <- na_insert(mtcars) mtcNA[1,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" bscale <- function(x, na.rm = FALSE, mean = 0, sd = 1) { if(na.rm || !anyNA(x)) `attributes<-`(drop(base::scale(x)), NULL) * sd + mean else rep(NA_real_, length(x)) } # NOTE: This is what fscale currently does: If missing values, compute weighted mean and sd on available obs, and scale x using it. but don't insert additional missing values in x for missing weights .. wbscale <- function(x, w, na.rm = FALSE, mean = 0, sd = 1) { if(na.rm) { x2 <- x cc <- complete.cases(x, w) x <- x[cc] # if(length(x) < 2L) return(rep(NA_real_, length(x2))) # wbscale(NA, 1, na.rm = TRUE) gives length 0 if(length(x) < 2L || all(x[1L] == x[-1L])) return(rep(NA_real_, length(x2))) w <- w[cc] } else { if(length(x) < 2L) return(NA_real_) ck <- all(x[1L] == x[-1L]) if(is.na(ck) || all(ck)) return(rep(NA_real_, length(x))) } sw <- bsum(w) wm <- bsum(w * x) / sw xdm <- x - wm wsd <- sqrt(bsum(w * xdm^2) / (sw - 1)) / sd if(!na.rm) return(xdm / wsd + mean) return((x2 - wm) / wsd + mean) } test_that("fscale performs like bscale", { expect_equal(fscale(NA), as.double(bscale(NA))) expect_equal(fscale(NA, na.rm = FALSE), as.double(bscale(NA))) expect_equal(fscale(1), bscale(1, na.rm = TRUE)) expect_equal(fscale(1:3), bscale(1:3, na.rm = TRUE)) expect_equal(fscale(-1:1), bscale(-1:1, na.rm = TRUE)) expect_equal(fscale(1, na.rm = FALSE), bscale(1)) expect_equal(fscale(1:3, na.rm = FALSE), bscale(1:3)) expect_equal(fscale(-1:1, na.rm = FALSE), bscale(-1:1)) expect_equal(fscale(x), bscale(x, na.rm = TRUE)) expect_equal(fscale(x, na.rm = FALSE), bscale(x)) expect_equal(fscale(xNA, na.rm = FALSE), bscale(xNA)) expect_equal(fscale(xNA), bscale(xNA, na.rm = TRUE)) expect_equal(qM(fscale(mtcars)), fscale(m)) expect_equal(fscale(m), dapply(m, bscale, na.rm = TRUE)) expect_equal(fscale(m, na.rm = FALSE), dapply(m, bscale)) expect_equal(fscale(mNA, na.rm = FALSE), dapply(mNA, bscale)) expect_equal(fscale(mNA), dapply(mNA, bscale, na.rm = TRUE)) expect_equal(fscale(mtcars), dapply(mtcars, bscale, na.rm = TRUE)) expect_equal(fscale(mtcars, na.rm = FALSE), dapply(mtcars, bscale)) expect_equal(fscale(mtcNA, na.rm = FALSE), dapply(mtcNA, bscale)) expect_equal(fscale(mtcNA), dapply(mtcNA, bscale, na.rm = TRUE)) expect_equal(fscale(x, f), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(x, f, na.rm = FALSE), BY(x, f, bscale, use.g.names = FALSE)) expect_equal(fscale(xNA, f, na.rm = FALSE), BY(xNA, f, bscale, use.g.names = FALSE)) expect_equal(fscale(xNA, f), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(m, g), BY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(m, g, na.rm = FALSE), BY(m, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mNA, g, na.rm = FALSE), BY(mNA, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mNA, g), BY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(mtcars, g), BY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE)) expect_equal(fscale(mtcars, g, na.rm = FALSE), BY(mtcars, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bscale, use.g.names = FALSE)) expect_equal(fscale(mtcNA, g), BY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)) }) su <- function(x) if(is.null(dim(x))) `attributes<-`(qsu.default(x)[2:3], NULL) else `attributes<-`(qsu(x)[,2:3], NULL) suby <- function(x, f) if(is.null(dim(x))) `attributes<-`(qsu.default(x, f)[, 2:3], NULL) else `attributes<-`(qsu(x, f)[,2:3,], NULL) miss <- unname(rep(ifelse(dapply(mNA, anyNA), NA_real_, 0), 2)) test_that("Unweighted customized scaling works as intended", { expect_equal(su(fscale(x, mean = 5.1, sd = 3.9)), c(5.1, 3.9)) expect_equal(su(fscale(x, mean = 5.1, sd = 3.9, na.rm = FALSE)), c(5.1, 3.9)) expect_equal(su(fscale(xNA, mean = 5.1, sd = 3.9, na.rm = FALSE)), c(NaN, NA)) expect_equal(su(fscale(xNA, mean = 5.1, sd = 3.9)), c(5.1, 3.9)) expect_equal(qM(fscale(mtcars, mean = 5.1, sd = 3.9)), fscale(m, mean = 5.1, sd = 3.9)) expect_equal(su(fscale(m, mean = 5.1, sd = 3.9)), rep(c(5.1, 3.9), c(11, 11))) expect_equal(su(fscale(m, mean = 5.1, sd = 3.9, na.rm = FALSE)), rep(c(5.1, 3.9), c(11, 11))) expect_equal(su(fscale(mNA, mean = 5.1, sd = 3.9, na.rm = FALSE)), rep(c(5.1, 3.9), c(11, 11))+miss) expect_equal(su(fscale(mNA, mean = 5.1, sd = 3.9)), rep(c(5.1, 3.9), c(11, 11))) expect_equal(suby(fscale(x, f, mean = 5.1, sd = 3.9), f), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(suby(fscale(x, f, mean = 5.1, sd = 3.9, na.rm = FALSE), f), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(suby(fscale(xNA, f, mean = 5.1, sd = 3.9), f), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) }) test_that("Unweighted customized scaling works like bscale (defined above)", { expect_equal(fscale(x, mean = 5.1, sd = 3.9), bscale(x, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, mean = 5.1, sd = 3.9, na.rm = FALSE), bscale(x, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, mean = 5.1, sd = 3.9, na.rm = FALSE), bscale(xNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, mean = 5.1, sd = 3.9), bscale(xNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(qM(fscale(mtcars, mean = 5.1, sd = 3.9)), fscale(m, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, mean = 5.1, sd = 3.9), dapply(m, bscale, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, mean = 5.1, sd = 3.9, na.rm = FALSE), dapply(m, bscale, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, mean = 5.1, sd = 3.9, na.rm = FALSE), dapply(mNA, bscale, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, mean = 5.1, sd = 3.9), dapply(mNA, bscale, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, mean = 5.1, sd = 3.9), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(x, f, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(xNA, f, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, mean = 5.1, sd = 3.9), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, mean = 5.1, sd = 3.9), BY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(m, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(mNA, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, mean = 5.1, sd = 3.9), BY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, mean = 5.1, sd = 3.9), BY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(mtcars, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, mean = 5.1, sd = 3.9, na.rm = FALSE), BY(mtcNA, g, bscale, use.g.names = FALSE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, mean = 5.1, sd = 3.9), BY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE, mean = 5.1, sd = 3.9)) }) test_that("Unweighted customized scaling special cases perform as intended ", { # No mean / centering expect_equal(fscale(x, mean = FALSE, sd = 3.9), bscale(x, na.rm = TRUE, mean = fmean(x), sd = 3.9)) expect_equal(fscale(x, mean = FALSE, sd = 3.9, na.rm = FALSE), bscale(x, mean = fmean(x), sd = 3.9)) expect_equal(fscale(xNA, mean = FALSE, sd = 3.9, na.rm = FALSE), bscale(xNA, mean = fmean(xNA), sd = 3.9)) expect_equal(fscale(xNA, mean = FALSE, sd = 3.9), bscale(xNA, na.rm = TRUE, mean = fmean(xNA), sd = 3.9)) expect_equal(qM(fscale(mtcars, mean = FALSE, sd = 3.9)), fscale(m, mean = FALSE, sd = 3.9)) expect_equal(fmean(fscale(mtcars, mean = FALSE, sd = 3.9)), fmean(mtcars)) expect_equal(unname(fsd(fscale(mtcars, mean = FALSE, sd = 3.9))), rep(3.9, length(mtcars))) expect_equal(fscale(x, f, mean = FALSE), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) + B(x, f)) expect_equal(fscale(x, f, na.rm = FALSE, mean = FALSE), BY(x, f, bscale, use.g.names = FALSE) + B(x, f)) expect_equal(fscale(xNA, f, na.rm = FALSE, mean = FALSE), BY(xNA, f, bscale, use.g.names = FALSE) + B(xNA, f)) expect_equal(fscale(xNA, f, mean = FALSE), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) + B(xNA, f)) expect_equal(fscale(m, g, mean = FALSE), BY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(m, g)) expect_equal(fscale(m, g, na.rm = FALSE, mean = FALSE), BY(m, g, bscale, use.g.names = FALSE) + B(m, g)) expect_equal(fscale(mNA, g, na.rm = FALSE, mean = FALSE), BY(mNA, g, bscale, use.g.names = FALSE) + B(mNA, g)) expect_equal(fscale(mNA, g, mean = FALSE), BY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mNA, g)) expect_equal(fscale(mtcars, g, mean = FALSE), BY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mtcars, g)) expect_equal(fscale(mtcars, g, na.rm = FALSE, mean = FALSE), BY(mtcars, g, bscale, use.g.names = FALSE) + B(mtcars, g)) expect_equal(fscale(mtcNA, g, na.rm = FALSE, mean = FALSE), BY(mtcNA, g, bscale, use.g.names = FALSE) + B(mtcNA, g)) expect_equal(fscale(mtcNA, g, mean = FALSE), BY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)+ B(mtcNA, g)) # Centering on overall mean expect_equal(fscale(x, f, mean = "overall.mean"), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) + ave(x)) expect_equal(fscale(x, f, na.rm = FALSE, mean = "overall.mean"), BY(x, f, bscale, use.g.names = FALSE) + ave(x)) # expect_equal(fscale(xNA, f, na.rm = FALSE, mean = "overall.mean"), BY(xNA, f, bscale, use.g.names = FALSE) + B(xNA)) # Not the same !! expect_equal(fscale(xNA, f, mean = "overall.mean"), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) + B(xNA)) expect_equal(fscale(m, g, mean = "overall.mean"), BY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(m)) expect_equal(fscale(m, g, na.rm = FALSE, mean = "overall.mean"), BY(m, g, bscale, use.g.names = FALSE) + B(m)) # expect_equal(fscale(mNA, g, na.rm = FALSE, mean = "overall.mean"), BY(mNA, g, bscale, use.g.names = FALSE) + B(mNA)) expect_equal(fscale(mNA, g, mean = "overall.mean"), BY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mNA)) expect_equal(fscale(mtcars, g, mean = "overall.mean"), BY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) + B(mtcars)) expect_equal(fscale(mtcars, g, na.rm = FALSE, mean = "overall.mean"), BY(mtcars, g, bscale, use.g.names = FALSE) + B(mtcars)) # expect_equal(fscale(mtcNA, g, na.rm = FALSE, mean = "overall.mean"), BY(mtcNA, g, bscale, use.g.names = FALSE) + B(mtcNA)) expect_equal(fscale(mtcNA, g, mean = "overall.mean"), BY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE)+ B(mtcNA)) # Scaling by within-sd expect_equal(fscale(x, f, sd = "within.sd"), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(x, f))) expect_equal(fscale(x, f, na.rm = FALSE, sd = "within.sd"), BY(x, f, bscale, use.g.names = FALSE) * fsd(W(x, f))) # expect_equal(fscale(xNA, f, na.rm = FALSE, sd = "within.sd"), BY(xNA, f, bscale, use.g.names = FALSE) * fsd(W(xNA, f))) # Not the same !! expect_equal(fscale(xNA, f, sd = "within.sd"), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(xNA, f))) expect_equal(fscale(m, g, sd = "within.sd"), BY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L)) expect_equal(fscale(m, g, na.rm = FALSE, sd = "within.sd"), BY(m, g, bscale, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L)) # expect_equal(fscale(mNA, g, na.rm = FALSE, sd = "within.sd"), BY(mNA, g, bscale, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L)) expect_equal(fscale(mNA, g, sd = "within.sd"), BY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L)) expect_equal(fscale(mtcars, g, sd = "within.sd"), BY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L)) expect_equal(fscale(mtcars, g, na.rm = FALSE, sd = "within.sd"), BY(mtcars, g, bscale, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L)) # expect_equal(fscale(mtcNA, g, na.rm = FALSE, sd = "within.sd"), BY(mtcNA, g, bscale, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L)) expect_equal(fscale(mtcNA, g, sd = "within.sd"), BY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L)) # Centering on overall mean and scaling by within-sd expect_equal(fscale(x, f, mean = "overall.mean", sd = "within.sd"), BY(x, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(x, f)) + ave(x)) expect_equal(fscale(x, f, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(x, f, bscale, use.g.names = FALSE) * fsd(W(x, f)) + ave(x)) # expect_equal(fscale(xNA, f, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(xNA, f, bscale, use.g.names = FALSE) * fsd(W(xNA, f)) + B(xNA)) # Not the same !! expect_equal(fscale(xNA, f, mean = "overall.mean", sd = "within.sd"), BY(xNA, f, bscale, na.rm = TRUE, use.g.names = FALSE) * fsd(W(xNA, f)) + B(xNA)) expect_equal(fscale(m, g, mean = "overall.mean", sd = "within.sd"), BY(m, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L) + B(m)) expect_equal(fscale(m, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(m, g, bscale, use.g.names = FALSE) * TRA(m,fsd(W(m, g)),1L) + B(m)) # expect_equal(fscale(mNA, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(mNA, g, bscale, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L) + B(mNA)) expect_equal(fscale(mNA, g, mean = "overall.mean", sd = "within.sd"), BY(mNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mNA,fsd(W(mNA, g)),1L) + B(mNA)) expect_equal(fscale(mtcars, g, mean = "overall.mean", sd = "within.sd"), BY(mtcars, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L) + B(mtcars)) expect_equal(fscale(mtcars, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(mtcars, g, bscale, use.g.names = FALSE) * TRA(mtcars,fsd(W(mtcars, g)),1L) + B(mtcars)) # expect_equal(fscale(mtcNA, g, na.rm = FALSE, mean = "overall.mean", sd = "within.sd"), BY(mtcNA, g, bscale, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L) + B(mtcNA)) expect_equal(fscale(mtcNA, g, mean = "overall.mean", sd = "within.sd"), BY(mtcNA, g, bscale, na.rm = TRUE, use.g.names = FALSE) * TRA(mtcNA,fsd(W(mtcNA, g)),1L) + B(mtcNA)) }) # Still test weighted special cases ... test_that("fscale performs like fscale with unit weights", { expect_equal(fscale(NA), fscale(NA, w = 1)) expect_equal(fscale(NA, na.rm = FALSE), fscale(NA, w = 1, na.rm = FALSE)) expect_equal(fscale(1), fscale(1, w = 1)) expect_equal(fscale(1:3), fscale(1:3, w = rep(1,3))) expect_equal(fscale(-1:1), fscale(-1:1, w = rep(1,3))) expect_equal(fscale(1, na.rm = FALSE), fscale(1, w = 1, na.rm = FALSE)) expect_equal(fscale(1:3, na.rm = FALSE), fscale(1:3, w = rep(1, 3), na.rm = FALSE)) expect_equal(fscale(-1:1, na.rm = FALSE), fscale(-1:1, w = rep(1, 3), na.rm = FALSE)) expect_equal(fscale(x), fscale(x, w = rep(1,100))) expect_equal(fscale(x, na.rm = FALSE), fscale(x, w = rep(1, 100), na.rm = FALSE)) expect_equal(fscale(xNA, na.rm = FALSE), fscale(xNA, w = rep(1, 100), na.rm = FALSE)) expect_equal(fscale(xNA), fscale(xNA, w = rep(1, 100))) expect_equal(fscale(m), fscale(m, w = rep(1, 32))) expect_equal(fscale(m, na.rm = FALSE), fscale(m, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mNA, na.rm = FALSE), fscale(mNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mNA), fscale(mNA, w = rep(1, 32))) expect_equal(fscale(mtcars), fscale(mtcars, w = rep(1, 32))) expect_equal(fscale(mtcars, na.rm = FALSE), fscale(mtcars, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mtcNA, na.rm = FALSE), fscale(mtcNA, w = rep(1, 32), na.rm = FALSE)) expect_equal(fscale(mtcNA), fscale(mtcNA, w = rep(1, 32))) expect_equal(fscale(x, f), fscale(x, f, rep(1,100))) expect_equal(fscale(x, f, na.rm = FALSE), fscale(x, f, rep(1,100), na.rm = FALSE)) expect_equal(fscale(xNA, f, na.rm = FALSE), fscale(xNA, f, rep(1,100), na.rm = FALSE)) expect_equal(fscale(xNA, f), fscale(xNA, f, rep(1,100))) expect_equal(fscale(m, g), fscale(m, g, rep(1,32))) expect_equal(fscale(m, g, na.rm = FALSE), fscale(m, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mNA, g, na.rm = FALSE), fscale(mNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mNA, g), fscale(mNA, g, rep(1,32))) expect_equal(fscale(mtcars, g), fscale(mtcars, g, rep(1,32))) expect_equal(fscale(mtcars, g, na.rm = FALSE), fscale(mtcars, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mtcNA, g, na.rm = FALSE), fscale(mtcNA, g, rep(1,32), na.rm = FALSE)) expect_equal(fscale(mtcNA, g), fscale(mtcNA, g, rep(1,32))) }) test_that("fscale with weights performs like wbscale (defined above)", { # complete weights expect_equal(fscale(NA, w = 1), wbscale(NA, 1)) expect_equal(fscale(NA, w = 1, na.rm = FALSE), wbscale(NA, 1)) expect_equal(fscale(1, w = 1), wbscale(1, w = 1)) expect_equal(fscale(1:3, w = 1:3), wbscale(1:3, 1:3)) expect_equal(fscale(-1:1, w = 1:3), wbscale(-1:1, 1:3)) expect_equal(fscale(1, w = 1, na.rm = FALSE), wbscale(1, 1)) expect_equal(fscale(1:3, w = c(0.99,3454,1.111), na.rm = FALSE), wbscale(1:3, c(0.99,3454,1.111))) expect_equal(fscale(-1:1, w = 1:3, na.rm = FALSE), wbscale(-1:1, 1:3)) expect_equal(fscale(x, w = w), wbscale(x, w)) expect_equal(fscale(x, w = w, na.rm = FALSE), wbscale(x, w)) expect_equal(fscale(xNA, w = w, na.rm = FALSE), wbscale(xNA, w)) expect_equal(fscale(xNA, w = w), wbscale(xNA, w, na.rm = TRUE)) expect_equal(qM(fscale(mtcars, w = wdat)), fscale(m, w = wdat)) expect_equal(fscale(m, w = wdat), dapply(m, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(m, w = wdat, na.rm = FALSE), dapply(m, wbscale, wdat)) expect_equal(fscale(mNA, w = wdat, na.rm = FALSE), dapply(mNA, wbscale, wdat)) expect_equal(fscale(mNA, w = wdat), dapply(mNA, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdat), dapply(mtcars, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdat, na.rm = FALSE), dapply(mtcars, wbscale, wdat)) expect_equal(fscale(mtcNA, w = wdat, na.rm = FALSE), dapply(mtcNA, wbscale, wdat)) expect_equal(fscale(mtcNA, w = wdat), dapply(mtcNA, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(x, f, w), BY(x, f, wbscale, w)) expect_equal(fscale(x, f, w, na.rm = FALSE), BY(x, f, wbscale, w)) expect_equal(fscale(xNA, f, w, na.rm = FALSE), BY(xNA, f, wbscale, w)) expect_equal(fscale(xNA, f, w), BY(xNA, f, wbscale, w, na.rm = TRUE)) expect_equal(fscale(m, g, wdat), BY(m, g, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(m, g, wdat, na.rm = FALSE), BY(m, g, wbscale, wdat)) expect_equal(fscale(mNA, g, wdat, na.rm = FALSE), BY(mNA, g, wbscale, wdat)) expect_equal(fscale(mNA, g, wdat), BY(mNA, g, wbscale, wdat, na.rm = TRUE)) expect_equal(fscale(mtcars, g, wdat), BY(mtcars, g, wbscale, wdat)) expect_equal(fscale(mtcars, g, wdat, na.rm = FALSE), BY(mtcars, g, wbscale, wdat)) expect_equal(fscale(mtcNA, g, wdat, na.rm = FALSE), BY(mtcNA, g, wbscale, wdat)) expect_equal(fscale(mtcNA, g, wdat), BY(mtcNA, g, wbscale, wdat, na.rm = TRUE)) # missing weights expect_equal(fscale(NA, w = NA), wbscale(NA, NA)) expect_equal(fscale(NA, w = NA, na.rm = FALSE), wbscale(NA, NA)) expect_equal(fscale(1, w = NA), wbscale(1, w = NA)) expect_equal(fscale(1:3, w = c(NA,1:2)), wbscale(1:3, c(NA,1:2), na.rm = TRUE)) expect_equal(fscale(-1:1, w = c(NA,1:2)), wbscale(-1:1, c(NA,1:2), na.rm = TRUE)) expect_equal(fscale(1, w = NA, na.rm = FALSE), wbscale(1, NA)) expect_equal(fscale(1:3, w = c(NA,1:2), na.rm = FALSE), wbscale(1:3, c(NA,1:2))) expect_equal(fscale(-1:1, w = c(NA,1:2), na.rm = FALSE), wbscale(-1:1, c(NA,1:2))) expect_equal(fscale(x, w = wNA), wbscale(x, wNA, na.rm = TRUE)) expect_equal(fscale(x, w = wNA, na.rm = FALSE), wbscale(x, wNA)) expect_equal(fscale(xNA, w = wNA, na.rm = FALSE), wbscale(xNA, wNA)) expect_equal(fscale(xNA, w = wNA), wbscale(xNA, wNA, na.rm = TRUE)) expect_equal(qM(fscale(mtcars, w = wdatNA)), fscale(m, w = wdatNA)) expect_equal(fscale(m, w = wdatNA), dapply(m, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(m, w = wdatNA, na.rm = FALSE), dapply(m, wbscale, wdatNA)) expect_equal(fscale(mNA, w = wdatNA, na.rm = FALSE), dapply(mNA, wbscale, wdatNA)) expect_equal(fscale(mNA, w = wdatNA), dapply(mNA, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdatNA), dapply(mtcars, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(mtcars, w = wdatNA, na.rm = FALSE), dapply(mtcars, wbscale, wdatNA)) expect_equal(fscale(mtcNA, w = wdatNA, na.rm = FALSE), dapply(mtcNA, wbscale, wdatNA)) expect_equal(fscale(mtcNA, w = wdatNA), dapply(mtcNA, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(x, f, wNA), BY(x, f, wbscale, wNA, na.rm = TRUE)) expect_equal(fscale(x, f, wNA, na.rm = FALSE), BY(x, f, wbscale, wNA)) expect_equal(fscale(xNA, f, wNA, na.rm = FALSE), BY(xNA, f, wbscale, wNA)) # expect_equal(fscale(xNA, f, wNA), BY(xNA, f, wbscale, wNA, na.rm = TRUE)) # failed on release-windows-ix86+x86_64 expect_equal(replace_Inf(fscale(m, g, wdatNA), NA), BY(m, g, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(m, g, wdatNA, na.rm = FALSE), BY(m, g, wbscale, wdatNA)) expect_equal(fscale(mNA, g, wdatNA, na.rm = FALSE), BY(mNA, g, wbscale, wdatNA)) expect_equal(replace_Inf(fscale(mNA, g, wdatNA), NA), BY(mNA, g, wbscale, wdatNA, na.rm = TRUE)) expect_equal(replace_Inf(fscale(mtcars, g, wdatNA), NA), BY(mtcars, g, wbscale, wdatNA, na.rm = TRUE)) expect_equal(fscale(mtcars, g, wdatNA, na.rm = FALSE), BY(mtcars, g, wbscale, wdatNA)) expect_equal(fscale(mtcNA, g, wdatNA, na.rm = FALSE), BY(mtcNA, g, wbscale, wdatNA)) expect_equal(replace_Inf(fscale(mtcNA, g, wdatNA), NA), BY(mtcNA, g, wbscale, wdatNA, na.rm = TRUE)) }) wsu <- function(x, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, w = w)[3:4], NULL) else `attributes<-`(qsu(x, w = w)[,3:4], NULL) wsuby <- function(x, f, w) if(is.null(dim(x))) `attributes<-`(qsu.default(x, f, w = w)[, 3:4], NULL) else `attributes<-`(qsu(x, f, w = w)[,3:4,], NULL) test_that("Weighted customized scaling works as intended", { expect_equal(wsu(fscale(x, w = w, mean = 5.1, sd = 3.9), w = w), c(5.1, 3.9)) expect_equal(wsu(fscale(x, w = w, mean = 5.1, sd = 3.9, na.rm = FALSE), w = w), c(5.1, 3.9)) expect_equal(wsu(fscale(xNA, w = w, mean = 5.1, sd = 3.9, na.rm = FALSE), w = w), c(NaN, NA)) expect_equal(wsu(fscale(xNA, w = w, mean = 5.1, sd = 3.9), w = w), c(5.1, 3.9), w = w) expect_equal(qM(fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9)), fscale(m, w = wdat, mean = 5.1, sd = 3.9)) expect_equal(wsu(fscale(m, w = wdat, mean = 5.1, sd = 3.9), w = wdat), rep(c(5.1, 3.9), c(11, 11))) expect_equal(wsu(fscale(m, w = wdat, mean = 5.1, sd = 3.9, na.rm = FALSE), w = wdat), rep(c(5.1, 3.9), c(11, 11))) expect_equal(wsu(fscale(mNA, w = wdat, mean = 5.1, sd = 3.9, na.rm = FALSE), w = wdat), rep(c(5.1, 3.9), c(11, 11))+miss) expect_equal(wsu(fscale(mNA, w = wdat, mean = 5.1, sd = 3.9), w = wdat), rep(c(5.1, 3.9), c(11, 11))) expect_equal(wsuby(fscale(x, f, w = w, mean = 5.1, sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(wsuby(fscale(x, f, w = w, mean = 5.1, sd = 3.9, na.rm = FALSE), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(wsuby(fscale(xNA, f, w = w, mean = 5.1, sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) expect_equal(wsu(fscale(x, w = w, mean = FALSE, sd = 3.9), w = w), c(fmean(x, w = w), 3.9)) expect_equal(wsu(fscale(x, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), w = w), c(fmean(x, w = w), 3.9)) expect_equal(wsu(fscale(xNA, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), w = w), c(NaN, NA)) expect_equal(wsu(fscale(xNA, w = w, mean = FALSE, sd = 3.9), w = w), c(fmean(xNA, w = w), 3.9)) expect_equal(qM(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9)), fscale(m, w = wdat, mean = FALSE, sd = 3.9)) # ... # expect_equal(wsuby(fscale(x, f, w = w, mean = "overall.mean", sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) # expect_equal(wsuby(fscale(x, f, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) # expect_equal(wsuby(fscale(xNA, f, w = w, mean = FALSE, sd = 3.9), f, w = w), rep(c(5.1, 3.9), rep(fnlevels(f), 2))) }) test_that("Weighted customized scaling performs like wbscale (defined above)", { # complete weights expect_equal(fscale(NA, w = 1, mean = 5.1, sd = 3.9), wbscale(NA, 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(NA, w = 1, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(NA, 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = 1, mean = 5.1, sd = 3.9), wbscale(1, w = 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = 1:3, mean = 5.1, sd = 3.9), wbscale(1:3, 1:3, mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = 1:3, mean = 5.1, sd = 3.9), wbscale(-1:1, 1:3, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = 1, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1, 1, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = c(0.99,3454,1.111), na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1:3, c(0.99,3454,1.111), mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = 1:3, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(-1:1, 1:3, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = w, mean = 5.1, sd = 3.9), wbscale(x, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(x, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(xNA, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = w, mean = 5.1, sd = 3.9), wbscale(xNA, w, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(qM(fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9)), fscale(m, w = wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdat, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdat, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdat, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, w, mean = 5.1, sd = 3.9), BY(x, f, wbscale, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(x, f, wbscale, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(xNA, f, wbscale, w, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, w, mean = 5.1, sd = 3.9), BY(xNA, f, wbscale, w, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, wdat, mean = 5.1, sd = 3.9), BY(m, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(m, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(mNA, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, wdat, mean = 5.1, sd = 3.9), BY(mNA, g, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, wdat, mean = 5.1, sd = 3.9), BY(mtcars, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(mtcars, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(mtcNA, g, wbscale, wdat, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, wdat, mean = 5.1, sd = 3.9), BY(mtcNA, g, wbscale, wdat, na.rm = TRUE, mean = 5.1, sd = 3.9)) # missing weights expect_equal(fscale(NA, w = NA, mean = 5.1, sd = 3.9), wbscale(NA, NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(NA, w = NA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(NA, NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = NA, mean = 5.1, sd = 3.9), wbscale(1, w = NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = c(NA,1:2), mean = 5.1, sd = 3.9), wbscale(1:3, c(NA,1:2), na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = c(NA,1:2), mean = 5.1, sd = 3.9), wbscale(-1:1, c(NA,1:2), na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(1, w = NA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1, NA, mean = 5.1, sd = 3.9)) expect_equal(fscale(1:3, w = c(NA,1:2), na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(1:3, c(NA,1:2), mean = 5.1, sd = 3.9)) expect_equal(fscale(-1:1, w = c(NA,1:2), na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(-1:1, c(NA,1:2), mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = wNA, mean = 5.1, sd = 3.9), wbscale(x, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(x, wNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), wbscale(xNA, wNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, w = wNA, mean = 5.1, sd = 3.9), wbscale(xNA, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(qM(fscale(mtcars, w = wdatNA, mean = 5.1, sd = 3.9)), fscale(m, w = wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdatNA, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(m, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, w = wdatNA, mean = 5.1, sd = 3.9), dapply(mNA, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdatNA, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcars, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, w = wdatNA, mean = 5.1, sd = 3.9), dapply(mtcNA, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, wNA, mean = 5.1, sd = 3.9), BY(x, f, wbscale, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(x, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(x, f, wbscale, wNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(xNA, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(xNA, f, wbscale, wNA, mean = 5.1, sd = 3.9)) # expect_equal(fscale(xNA, f, wNA, mean = 5.1, sd = 3.9), BY(xNA, f, wbscale, wNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) # failed on release-windows-ix86+x86_64 expect_equal(replace_Inf(fscale(m, g, wdatNA, mean = 5.1, sd = 3.9), NA), BY(m, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(m, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(m, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(mNA, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(replace_Inf(fscale(mNA, g, wdatNA, mean = 5.1, sd = 3.9), NA), BY(mNA, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(replace_Inf(fscale(mtcars, g, wdatNA, mean = 5.1, sd = 3.9), NA), BY(mtcars, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcars, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(mtcars, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(fscale(mtcNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), BY(mtcNA, g, wbscale, wdatNA, mean = 5.1, sd = 3.9)) expect_equal(replace_Inf(fscale(mtcNA, g, wdatNA, mean = 5.1, sd = 3.9), NA), BY(mtcNA, g, wbscale, wdatNA, na.rm = TRUE, mean = 5.1, sd = 3.9)) }) test_that("Weighted customized scaling special cases perform as intended ", { # NOTE: These tests are currently only run with complete weights. STill implement them for missing weights ... # No mean / centering expect_equal(fscale(x, w = w, mean = FALSE, sd = 3.9), wbscale(x, na.rm = TRUE, w = w, mean = fmean(x, w = w), sd = 3.9)) expect_equal(fscale(x, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), wbscale(x, w = w, mean = fmean(x, w = w), sd = 3.9)) expect_equal(fscale(xNA, w = w, mean = FALSE, sd = 3.9, na.rm = FALSE), wbscale(xNA, w = w, mean = fmean(xNA, w = w), sd = 3.9)) expect_equal(fscale(xNA, w = w, mean = FALSE, sd = 3.9), wbscale(xNA, na.rm = TRUE, w = w, mean = fmean(xNA, w = w), sd = 3.9)) expect_equal(qM(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9)), fscale(m, w = wdat, mean = FALSE, sd = 3.9)) expect_equal(fmean(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9), w = wdat), fmean(mtcars, w = wdat)) expect_equal(unname(fsd(fscale(mtcars, w = wdat, mean = FALSE, sd = 3.9), w = wdat)), rep(3.9, length(mtcars))) expect_equal(fscale(x, f, w, mean = FALSE), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(x, f, w)) expect_equal(fscale(x, f, w, mean = FALSE, na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) + B(x, f, w)) expect_equal(fscale(xNA, f, w, mean = FALSE, na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) + B(xNA, f, w)) expect_equal(fscale(xNA, f, w, mean = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(xNA, f, w)) # Centering on overall mean expect_equal(fscale(x, f, w, mean = "overall.mean"), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(x, w = w)) expect_equal(fscale(x, f, w, mean = "overall.mean", na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) + B(x, w = w)) # expect_equal(fscale(xNA, f, w, mean = "overall.mean", na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) + B(xNA, w = w)) # Not the same !! expect_equal(fscale(xNA, f, w, mean = "overall.mean"), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) + B(xNA, w = w)) # Scaling by within-sd expect_equal(fscale(x, f, w, sd = "within.sd"), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(x, f, w), w = w)) expect_equal(fscale(x, f, w, sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) * fsd(W(x, f, w), w = w)) # expect_equal(fscale(xNA, f, w, sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) * fsd(W(xNA, f, w), w = w)) # Not the same !! expect_equal(fscale(xNA, f, w, sd = "within.sd"), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(xNA, f, w), w = w)) # Centering on overall mean and scaling by within-sd expect_equal(fscale(x, f, w, mean = "overall.mean", sd = "within.sd"), unlist(Map(wbscale, split(x, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(x, f, w), w = w) + B(x, w = w)) expect_equal(fscale(x, f, w, mean = "overall.mean", sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(x, f), split(w, f)), use.names = FALSE) * fsd(W(x, f, w), w = w) + B(x, w = w)) # expect_equal(fscale(xNA, f, w, mean = "overall.mean", sd = "within.sd", na.rm = FALSE), unlist(Map(wbscale, split(xNA, f), split(w, f)), use.names = FALSE) * fsd(W(xNA, f, w), w = w) + B(xNA, w = w)) # Not the same !! expect_equal(fscale(xNA, f, w, mean = "overall.mean", sd = "within.sd"), unlist(Map(wbscale, split(xNA, f), split(w, f), na.rm = TRUE), use.names = FALSE) * fsd(W(xNA, f, w), w = w) + B(xNA, w = w)) }) test_that("fscale performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g), simplify = FALSE))) }) test_that("fscale customized scaling performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, mean = 5.1, sd = 3.9), simplify = FALSE))) }) test_that("fscale with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat), simplify = FALSE))) }) test_that("fscale customized scaling with complete weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = 1, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = 1, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, w, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdat, mean = 5.1, sd = 3.9), simplify = FALSE))) }) test_that("fscale with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA), simplify = FALSE))) }) test_that("fscale customized scaling with missing weights performs numerically stable", { expect_true(all_obj_equal(replicate(50, fscale(1, w = NA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(NA, w = NA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, w = wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, w = wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(x, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(xNA, f, wNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(m, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mNA, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcars, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA, na.rm = FALSE, mean = 5.1, sd = 3.9), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fscale(mtcNA, g, wdatNA, mean = 5.1, sd = 3.9), simplify = FALSE))) }) # NOTE: fscale(c(a, a)) gives c(NaN, NaN) (sd is 0) !!! test_that("fscale handles special values in the right way", { expect_equal(fscale(NA), NA_real_) expect_equal(fscale(NaN), NA_real_) expect_equal(fscale(Inf), NA_real_) expect_equal(fscale(-Inf), NA_real_) expect_equal(fscale(TRUE), NA_real_) expect_equal(fscale(FALSE), NA_real_) expect_equal(fscale(NA, na.rm = FALSE), NA_real_) expect_equal(fscale(NaN, na.rm = FALSE), NA_real_) expect_equal(fscale(Inf, na.rm = FALSE), NA_real_) expect_equal(fscale(-Inf, na.rm = FALSE), NA_real_) expect_equal(fscale(TRUE, na.rm = FALSE), NA_real_) expect_equal(fscale(FALSE, na.rm = FALSE), NA_real_) expect_equal(fscale(c(1,NA)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,NaN)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,Inf)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,-Inf)), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fscale(c(1,-Inf), na.rm = FALSE), c(NA_real_,NA_real_)) expect_equal(fscale(c(FALSE,FALSE), na.rm = FALSE), c(NaN,NaN)) expect_equal(fscale(c(1,1), na.rm = FALSE), c(NaN,NaN)) }) test_that("fscale with weights handles special values in the right way", { expect_equal(fscale(NA, w = 1), NA_real_) expect_equal(fscale(NaN, w = 1), NA_real_) expect_equal(fscale(Inf, w = 1), NA_real_) expect_equal(fscale(-Inf, w = 1), NA_real_) expect_equal(fscale(TRUE, w = 1), NA_real_) expect_equal(fscale(FALSE, w = 1), NA_real_) expect_equal(fscale(NA, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(NaN, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(-Inf, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(TRUE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(FALSE, w = 1, na.rm = FALSE), NA_real_) expect_equal(fscale(NA, w = NA), NA_real_) expect_equal(fscale(NaN, w = NA), NA_real_) expect_equal(fscale(Inf, w = NA), NA_real_) expect_equal(fscale(-Inf, w = NA), NA_real_) expect_equal(fscale(TRUE, w = NA), NA_real_) expect_equal(fscale(FALSE, w = NA), NA_real_) expect_equal(fscale(NA, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(NaN, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(-Inf, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(TRUE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(FALSE, w = NA, na.rm = FALSE), NA_real_) expect_equal(fscale(1:3, w = c(1,Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fscale(1:3, w = c(1,-Inf,3)), c(NA_real_,NA_real_,NA_real_)) expect_equal(fscale(1:3, w = c(1,Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) expect_equal(fscale(1:3, w = c(1,-Inf,3), na.rm = FALSE), c(NA_real_,NA_real_,NA_real_)) }) test_that("fscale produces errors for wrong input", { expect_error(fscale("a")) expect_error(fscale(NA_character_)) expect_error(fscale(mNAc)) expect_error(fscale(mNAc, f)) expect_error(fscale(1:2,1:3)) expect_error(fscale(m,1:31)) expect_error(fscale(mtcars,1:31)) expect_error(fscale(mtcars, w = 1:31)) expect_error(fscale("a", w = 1)) expect_error(fscale(1:2, w = 1:3)) expect_error(fscale(NA_character_, w = 1)) expect_error(fscale(mNAc, w = wdat)) expect_error(fscale(mNAc, f, wdat)) expect_error(fscale(mNA, w = 1:33)) expect_error(fscale(1:2,1:2, 1:3)) expect_error(fscale(m,1:32,1:20)) expect_error(fscale(mtcars,1:32,1:10)) expect_error(fscale(1:2, w = c("a","b"))) expect_error(fscale(wlddev)) expect_error(fscale(wlddev, w = wlddev$year)) expect_error(fscale(wlddev, wlddev$iso3c)) expect_error(fscale(wlddev, wlddev$iso3c, wlddev$year)) }) test_that("fscale shoots errors for wrong input to mean and sd", { expect_error(fscale(x, sd = FALSE)) expect_error(fscale(m, sd = FALSE)) expect_error(fscale(mtcars, sd = FALSE)) expect_error(fscale(x, sd = "bla")) expect_error(fscale(x, mean = "bla")) expect_error(fscale(x, sd = "within.sd")) expect_error(fscale(m, sd = "within.sd")) expect_error(fscale(mtcars, sd = "within.sd")) expect_error(fscale(x, mean = "overall.mean")) expect_error(fscale(m, mean = "overall.mean")) expect_error(fscale(mtcars, mean = "overall.mean")) expect_error(fscale(m, mean = fmean(m))) expect_error(fscale(mtcars, mean = fmean(mtcars))) expect_error(fscale(m, sd = fsd(m))) expect_error(fscale(mtcars, sd = fsd(mtcars))) }) # Testing STD: Only testing wrong inputs, especially for data.frame method. Otherwise it is identical to fscale test_that("STD produces errors for wrong input", { expect_error(STD("a")) expect_error(STD(NA_character_)) expect_error(STD(mNAc)) expect_error(STD(mNAc, f)) expect_error(STD(1:2,1:3)) expect_error(STD(m,1:31)) expect_error(STD(mtcars,1:31)) expect_error(STD(mtcars, w = 1:31)) expect_error(STD("a", w = 1)) expect_error(STD(1:2, w = c("a","b"))) expect_error(STD(1:2, w = 1:3)) expect_error(STD(NA_character_, w = 1)) expect_error(STD(mNAc, w = wdat)) expect_error(STD(mNAc, f, wdat)) expect_error(STD(mNA, w = 1:33)) expect_error(STD(mtcNA, w = 1:33)) expect_error(STD(1:2,1:2, 1:3)) expect_error(STD(m,1:32,1:20)) expect_error(STD(mtcars,1:32,1:10)) expect_error(STD(1:2, 1:3, 1:2)) expect_error(STD(m,1:31,1:32)) expect_error(STD(mtcars,1:33,1:32)) }) test_that("STD.data.frame method is foolproof", { expect_visible(STD(wlddev)) expect_visible(STD(wlddev, w = wlddev$year)) expect_visible(STD(wlddev, w = ~year)) expect_visible(STD(wlddev, wlddev$iso3c)) expect_visible(STD(wlddev, ~iso3c)) expect_visible(STD(wlddev, ~iso3c + region)) expect_visible(STD(wlddev, wlddev$iso3c, wlddev$year)) expect_visible(STD(wlddev, ~iso3c, ~year)) expect_visible(STD(wlddev, cols = 9:12)) expect_visible(STD(wlddev, w = wlddev$year, cols = 9:12)) expect_visible(STD(wlddev, w = ~year, cols = 9:12)) expect_visible(STD(wlddev, wlddev$iso3c, cols = 9:12)) expect_visible(STD(wlddev, ~iso3c, cols = 9:12)) expect_visible(STD(wlddev, wlddev$iso3c, wlddev$year, cols = 9:12)) expect_visible(STD(wlddev, ~iso3c, ~year, cols = 9:12)) expect_visible(STD(wlddev, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, w = ~year, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX"))) expect_visible(STD(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX"))) expect_error(STD(wlddev, cols = NULL)) expect_error(STD(wlddev, w = wlddev$year, cols = NULL)) expect_error(STD(wlddev, w = ~year, cols = NULL)) expect_error(STD(wlddev, wlddev$iso3c, cols = NULL)) expect_error(STD(wlddev, ~iso3c, cols = NULL)) expect_error(STD(wlddev, wlddev$iso3c, wlddev$year, cols = NULL)) expect_error(STD(wlddev, ~iso3c, ~year, cols = NULL)) expect_error(STD(wlddev, cols = 9:14)) expect_error(STD(wlddev, w = wlddev$year, cols = 9:14)) expect_error(STD(wlddev, w = ~year, cols = 9:14)) expect_error(STD(wlddev, wlddev$iso3c, cols = 9:14)) expect_error(STD(wlddev, ~iso3c, cols = 9:14)) expect_error(STD(wlddev, wlddev$iso3c, wlddev$year, cols = 9:14)) expect_error(STD(wlddev, ~iso3c, ~year, cols = 9:14)) expect_error(STD(wlddev, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, w = wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, w = ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, wlddev$iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, ~iso3c, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, wlddev$iso3c, wlddev$year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, ~iso3c, ~year, cols = c("PCGDP","LIFEEX","bla"))) expect_error(STD(wlddev, w = mtcars)) expect_error(STD(wlddev, w = 4)) expect_error(STD(wlddev, w = "year")) expect_error(STD(wlddev, w = ~year2)) # suppressWarnings(expect_error(STD(wlddev, w = ~year + region))) expect_error(STD(wlddev, mtcars)) expect_error(STD(wlddev, 2)) expect_error(STD(wlddev, "iso3c")) expect_error(STD(wlddev, ~iso3c2)) expect_error(STD(wlddev, ~iso3c + bla)) expect_error(STD(wlddev, mtcars$mpg, mtcars$cyl)) expect_error(STD(wlddev, 2, 4)) expect_error(STD(wlddev, ~iso3c2, ~year2)) expect_error(STD(wlddev, cols = ~bla)) expect_error(STD(wlddev, w = ~bla, cols = 9:12)) expect_error(STD(wlddev, w = 4, cols = 9:12)) expect_error(STD(wlddev, w = "year", cols = 9:12)) expect_error(STD(wlddev, w = ~yewar, cols = 9:12)) expect_error(STD(wlddev, mtcars$mpg, cols = 9:12)) expect_error(STD(wlddev, ~iso3c + ss, cols = 9:12)) expect_error(STD(wlddev, 2, cols = 9:12)) expect_error(STD(wlddev, "iso3c", cols = 9:12)) expect_error(STD(wlddev, wlddev$iso3c, ~year + bla, cols = 9:12)) expect_error(STD(wlddev, ~iso3c3, ~year, cols = 9:12)) expect_error(STD(wlddev, cols = c("PC3GDP","LIFEEX"))) }) collapse/tests/testthat/test-varying.R0000644000176200001440000003010114777170131017640 0ustar liggesuserscontext("varying") # rm(list = ls()) if(identical(Sys.getenv("NCRAN"), "TRUE")) pwlddev <- eval(parse(text = paste0("plm", ":", ":", "pdata.frame(wlddev, index = c('iso3c', 'year'))"))) gwlddev <- fgroup_by(wlddev, iso3c) wdm <- qM(`cat_vars<-`(wlddev, dapply(cat_vars(wlddev), qG))) g <- GRP(wlddev, ~ region + year) test_that("vector, matrix and data.frame methods work as intended", { expect_true(all(dapply(wlddev, varying))) expect_true(all(varying(wlddev))) expect_true(all(varying(wdm))) expect_true(is.atomic(varying(wlddev, drop = TRUE))) expect_true(is.atomic(varying(wdm, drop = TRUE))) expect_true(is.data.frame(varying(wlddev, drop = FALSE))) expect_true(is.matrix(varying(wdm, drop = FALSE))) expect_true(all_identical(dapply(wlddev, varying), varying(wlddev), varying(wdm))) expect_true(all_identical(dapply(wlddev, varying, drop = FALSE), varying(wlddev, drop = FALSE), qDF(varying(wdm, drop = FALSE)))) if(identical(Sys.getenv("NCRAN"), "TRUE")) { expect_equal(dapply(unattrib(wlddev), varying, wlddev$iso3c), c(FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(all_identical(dapply(wlddev, varying, wlddev$iso3c), varying(wlddev, wlddev$iso3c), varying(wdm, wlddev$iso3c))) expect_true(all_identical(dapply(wlddev, varying, wlddev$iso3c, drop = FALSE), varying(wlddev, wlddev$iso3c, drop = FALSE), qDF(varying(wdm, wlddev$iso3c, drop = FALSE)))) } expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE, drop = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE, drop = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE, drop = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), qM(varying(wlddev, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), varying(wdm, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE))) # With grouping objects... if(identical(Sys.getenv("NCRAN"), "TRUE")) { expect_equal(dapply(unattrib(wlddev), varying, g), c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(all_identical(dapply(wlddev, varying, g), varying(wlddev, g), varying(wdm, g))) expect_true(all_identical(dapply(wlddev, varying, g, drop = FALSE), varying(wlddev, g, drop = FALSE), qDF(varying(wdm, g, drop = FALSE)))) } expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE)), qM(varying(wlddev, g, any_group = FALSE)), varying(wdm, g, any_group = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE, drop = FALSE)), qM(varying(wlddev, g, any_group = FALSE, drop = FALSE)), varying(wdm, g, any_group = FALSE, drop = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE, use.g.names = FALSE)), qM(varying(wlddev, g, any_group = FALSE, use.g.names = FALSE)), varying(wdm, g, any_group = FALSE, use.g.names = FALSE))) expect_true(all_identical(qM(dapply(wlddev, varying, g, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), qM(varying(wlddev, g, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), varying(wdm, g, any_group = FALSE, use.g.names = FALSE, drop = FALSE))) }) test_that("data.frame method formula and cols work as intended", { expect_equal(varying(wlddev, cols = 2:5), varying(get_vars(wlddev, 2:5))) expect_equal(varying(wlddev, cols = c("PCGDP","country")), varying(get_vars(wlddev, c("PCGDP","country")))) expect_equal(varying(wlddev, cols = is.numeric), varying(num_vars(wlddev))) expect_equal(varying(wlddev, ~iso3c), varying(fselect(wlddev, -iso3c), wlddev$iso3c)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c), varying(fselect(wlddev, PCGDP, country), wlddev$iso3c)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c), varying(wlddev, ~ iso3c, cols = c("PCGDP", "country"))) expect_equal(varying(wlddev, ~iso3c, any_group = FALSE), varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c, any_group = FALSE), varying(fselect(wlddev, PCGDP, country), wlddev$iso3c, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ iso3c, any_group = FALSE), varying(wlddev, ~ iso3c, cols = c("PCGDP", "country"), any_group = FALSE)) expect_equal(varying(wlddev, ~region + year), varying(fselect(wlddev, -region, -year), g)) expect_equal(varying(wlddev, PCGDP + country ~ region + year), varying(fselect(wlddev, PCGDP, country), g)) expect_equal(varying(wlddev, PCGDP + country ~ region + year), varying(wlddev, ~ region + year, cols = c("PCGDP", "country"))) expect_equal(varying(wlddev, ~region + year, any_group = FALSE), varying(fselect(wlddev, -region, -year),g, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ region + year, any_group = FALSE), varying(fselect(wlddev, PCGDP, country), g, any_group = FALSE)) expect_equal(varying(wlddev, PCGDP + country ~ region + year, any_group = FALSE), varying(wlddev, ~ region + year, cols = c("PCGDP", "country"), any_group = FALSE)) expect_error(varying(wlddev, ~ iso3c2)) expect_error(varying(wlddev, PCGDP + country ~ iso3c2)) expect_error(varying(wlddev, PCGDP + country2 ~ iso3c)) expect_error(varying(wlddev, ~ iso3c, cols = c("PCGDP", "country2"))) expect_error(varying(wlddev, ~ region2 + year)) expect_error(varying(wlddev, PCGDP + country ~ region2 + year)) expect_error(varying(wlddev, PCGDP + country2 ~ region3 + year)) expect_error(varying(wlddev, ~ region + year, cols = c("PCGDP", "country2"))) }) if(identical(Sys.getenv("NCRAN"), "TRUE")) { test_that("pseries and pdata.frame methods work as intended", { # pdata.frame expect_equal(unattrib(varying(pwlddev)), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unattrib(varying(pwlddev, effect = "iso3c")), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unattrib(varying(pwlddev, effect = 2L)), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unattrib(varying(pwlddev, effect = "year")), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(is.atomic(varying(pwlddev, drop = TRUE))) expect_true(is.data.frame(varying(pwlddev, drop = FALSE))) expect_true(is.data.frame(varying(pwlddev, any_group = FALSE))) atrapply <- function(X, FUN, ...) { res <- vector("list", fncol(X)) for(i in seq_col(X)) { res[[i]] <- FUN(X[[i]], ...) } res } # Making sure fselect and get_vars etc. work properly. expect_identical(attributes(fselect(pwlddev, country:POP)), attributes(pwlddev)) expect_identical(attributes(get_vars(pwlddev, seq_col(pwlddev))), attributes(pwlddev)) # pseries expect_equal(unlist(atrapply(fselect(pwlddev, -iso3c), varying)), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unlist(atrapply(fselect(pwlddev, -iso3c), varying, effect = "iso3c")), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unlist(atrapply(fselect(pwlddev, -year), varying, effect = 2L)), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(unlist(atrapply(fselect(pwlddev, -year), varying, effect = "year")), c(TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_equal(varying(pwlddev$PCGDP), varying(wlddev$PCGDP, wlddev$iso3c)) expect_equal(varying(pwlddev$PCGDP, any_group = FALSE), varying(wlddev$PCGDP, wlddev$iso3c, any_group = FALSE)) expect_equal(varying(pwlddev$PCGDP, any_group = FALSE, use.g.names = FALSE), varying(wlddev$PCGDP, wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)) expect_equal(lengths(varying(pwlddev, any_group = FALSE), FALSE), lengths(atrapply(fselect(pwlddev, -iso3c), varying, any_group = FALSE))) # pdata.frame works like data.frame expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE)), unattrib(varying(pwlddev, any_group = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, drop = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE, drop = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, effect = "year"))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, drop = FALSE, effect = "year"))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE, use.g.names = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE, effect = "year"))) expect_identical(unattrib(varying(fselect(wlddev, -year), wlddev$year, any_group = FALSE, use.g.names = FALSE, drop = FALSE)), unattrib(varying(pwlddev, any_group = FALSE, use.g.names = FALSE, drop = FALSE, effect = "year"))) }) } test_that("grouped_df method works as intended", { expect_equal(unattrib(varying(gwlddev)), c(FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)) expect_true(is.atomic(varying(gwlddev, drop = TRUE))) expect_true(is.data.frame(varying(gwlddev, drop = FALSE))) expect_true(is.data.frame(varying(gwlddev, any_group = FALSE))) expect_identical(names(varying(gwlddev)), names(wlddev)[-2L]) expect_identical(names(varying(get_vars(gwlddev, 9:12))), names(wlddev)[9:12]) expect_identical(names(varying(gwlddev, any_group = FALSE)), c("iso3c", names(wlddev)[-2L])) expect_identical(names(varying(gwlddev, any_group = FALSE, keep.group_vars = FALSE)), names(wlddev)[-2L]) expect_identical(names(varying(get_vars(gwlddev, 9:12), any_group = FALSE)), c("iso3c", names(wlddev)[9:12])) expect_identical(names(varying(get_vars(gwlddev, 9:12), any_group = FALSE, keep.group_vars = FALSE)), names(wlddev)[9:12]) # grouped_df works like data.frame expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE)), unattrib(varying(gwlddev, any_group = FALSE, keep.group_vars = FALSE))) expect_identical(unattrib(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, drop = FALSE)), unattrib(varying(gwlddev, any_group = FALSE, drop = FALSE, keep.group_vars = FALSE))) expect_identical(unclass(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, use.g.names = FALSE)), unclass(fungroup(varying(gwlddev, any_group = FALSE, use.g.names = FALSE, keep.group_vars = FALSE)))) expect_identical(unclass(varying(fselect(wlddev, -iso3c), wlddev$iso3c, any_group = FALSE, drop = FALSE)), unclass(fungroup(varying(gwlddev, any_group = FALSE, use.g.names = TRUE, drop = FALSE, keep.group_vars = FALSE)))) }) collapse/tests/testthat/test-fmin-fmax.R0000644000176200001440000004513514777170131020060 0ustar liggesuserscontext("fmin and fmax") bmin <- base::min bmax <- base::max # rm(list = ls()) set.seed(101) x <- rnorm(100) * 10000 xNA <- x xNA[sample.int(100,20)] <- NA f <- as.factor(sample.int(10, 100, TRUE)) g <- GRP(mtcars, ~ cyl + vs + am) mtcNA <- na_insert(mtcars) mtcNA[27,1] <- NA # single group NA !! m <- as.matrix(mtcars) mNA <- as.matrix(mtcNA) mNAc <- mNA storage.mode(mNAc) <- "character" inf2NA <- function(x) { if(is.atomic(x)) { x[is.infinite(x)] <- NA } else { x[do.call(cbind, lapply(x, is.infinite))] <- NA } x } options(warn = -1) # fmin double test_that("fmin performs like base::min", { expect_equal(fmin(NA), bmin(NA)) expect_equal(fmin(NA, na.rm = FALSE), bmin(NA)) expect_equal(fmin(1), bmin(1, na.rm = TRUE)) expect_equal(fmin(1:3), bmin(1:3, na.rm = TRUE)) expect_equal(fmin(-1:1), bmin(-1:1, na.rm = TRUE)) expect_equal(fmin(1, na.rm = FALSE), bmin(1)) expect_equal(fmin(1:3, na.rm = FALSE), bmin(1:3)) expect_equal(fmin(-1:1, na.rm = FALSE), bmin(-1:1)) expect_equal(fmin(x), bmin(x, na.rm = TRUE)) expect_equal(fmin(x, na.rm = FALSE), bmin(x)) expect_equal(fmin(xNA, na.rm = FALSE), bmin(xNA)) expect_equal(fmin(xNA), bmin(xNA, na.rm = TRUE)) expect_equal(fmin(mtcars), fmin(m)) expect_equal(fmin(m), dapply(m, bmin, na.rm = TRUE)) expect_equal(fmin(m, na.rm = FALSE), dapply(m, bmin)) expect_equal(fmin(mNA, na.rm = FALSE), dapply(mNA, bmin)) expect_equal(fmin(mNA), dapply(mNA, bmin, na.rm = TRUE)) expect_equal(fmin(mtcars), dapply(mtcars, bmin, na.rm = TRUE)) expect_equal(fmin(mtcars, na.rm = FALSE), dapply(mtcars, bmin)) expect_equal(fmin(mtcNA, na.rm = FALSE), dapply(mtcNA, bmin)) expect_equal(fmin(mtcNA), dapply(mtcNA, bmin, na.rm = TRUE)) expect_equal(fmin(x, f), BY(x, f, bmin, na.rm = TRUE)) expect_equal(fmin(x, f, na.rm = FALSE), BY(x, f, bmin)) expect_equal(fmin(xNA, f, na.rm = FALSE), BY(xNA, f, bmin)) expect_equal(fmin(xNA, f), inf2NA(BY(xNA, f, bmin, na.rm = TRUE))) expect_equal(fmin(m, g), BY(m, g, bmin, na.rm = TRUE)) expect_equal(fmin(m, g, na.rm = FALSE), BY(m, g, bmin)) expect_equal(fmin(mNA, g, na.rm = FALSE), BY(mNA, g, bmin)) expect_equal(fmin(mNA, g), inf2NA(BY(mNA, g, bmin, na.rm = TRUE))) # bmin(NA, na.rm = TRUE) gives Inf expect_equal(fmin(mtcars, g), BY(mtcars, g, bmin, na.rm = TRUE)) expect_equal(fmin(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmin)) expect_equal(fmin(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmin)) expect_equal(fmin(mtcNA, g), inf2NA(BY(mtcNA, g, bmin, na.rm = TRUE))) # bmin(NA, na.rm = TRUE) gives Inf }) test_that("fmin performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmin(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmin(mtcNA, g), simplify = FALSE))) }) test_that("fmin handles special values in the right way", { expect_equal(fmin(NA), NA_real_) expect_equal(fmin(NaN), NaN) expect_equal(fmin(Inf), Inf) expect_equal(fmin(-Inf), -Inf) expect_equal(fmin(TRUE), 1) expect_equal(fmin(FALSE), 0) expect_equal(fmin(NA, na.rm = FALSE), NA_real_) expect_equal(fmin(NaN, na.rm = FALSE), NaN) expect_equal(fmin(Inf, na.rm = FALSE), Inf) expect_equal(fmin(-Inf, na.rm = FALSE), -Inf) expect_equal(fmin(TRUE, na.rm = FALSE), 1) expect_equal(fmin(FALSE, na.rm = FALSE), 0) }) test_that("fmin produces errors for wrong input", { expect_error(fmin("a")) expect_error(fmin(NA_character_)) expect_error(fmin(mNAc)) expect_error(fmin(mNAc, f)) expect_error(fmin(1:2,1:3)) expect_error(fmin(m,1:31)) expect_error(fmin(mtcars,1:31)) expect_error(fmin(wlddev)) expect_error(fmin(wlddev, wlddev$iso3c)) }) # fmax double test_that("fmax performs like base::max", { expect_equal(fmax(NA), bmax(NA)) expect_equal(fmax(NA, na.rm = FALSE), bmax(NA)) expect_equal(fmax(1), bmax(1, na.rm = TRUE)) expect_equal(fmax(1:3), bmax(1:3, na.rm = TRUE)) expect_equal(fmax(-1:1), bmax(-1:1, na.rm = TRUE)) expect_equal(fmax(1, na.rm = FALSE), bmax(1)) expect_equal(fmax(1:3, na.rm = FALSE), bmax(1:3)) expect_equal(fmax(-1:1, na.rm = FALSE), bmax(-1:1)) expect_equal(fmax(x), bmax(x, na.rm = TRUE)) expect_equal(fmax(x, na.rm = FALSE), bmax(x)) expect_equal(fmax(xNA, na.rm = FALSE), bmax(xNA)) expect_equal(fmax(xNA), bmax(xNA, na.rm = TRUE)) expect_equal(fmax(mtcars), fmax(m)) expect_equal(fmax(m), dapply(m, bmax, na.rm = TRUE)) expect_equal(fmax(m, na.rm = FALSE), dapply(m, bmax)) expect_equal(fmax(mNA, na.rm = FALSE), dapply(mNA, bmax)) expect_equal(fmax(mNA), dapply(mNA, bmax, na.rm = TRUE)) expect_equal(fmax(mtcars), dapply(mtcars, bmax, na.rm = TRUE)) expect_equal(fmax(mtcars, na.rm = FALSE), dapply(mtcars, bmax)) expect_equal(fmax(mtcNA, na.rm = FALSE), dapply(mtcNA, bmax)) expect_equal(fmax(mtcNA), dapply(mtcNA, bmax, na.rm = TRUE)) expect_equal(fmax(x, f), BY(x, f, bmax, na.rm = TRUE)) expect_equal(fmax(x, f, na.rm = FALSE), BY(x, f, bmax)) expect_equal(fmax(xNA, f, na.rm = FALSE), BY(xNA, f, bmax)) expect_equal(fmax(xNA, f), inf2NA(BY(xNA, f, bmax, na.rm = TRUE))) expect_equal(fmax(m, g), BY(m, g, bmax, na.rm = TRUE)) expect_equal(fmax(m, g, na.rm = FALSE), BY(m, g, bmax)) expect_equal(fmax(mNA, g, na.rm = FALSE), BY(mNA, g, bmax)) expect_equal(fmax(mNA, g), inf2NA(BY(mNA, g, bmax, na.rm = TRUE))) # bmax(NA, na.rm = TRUE) gives -Inf expect_equal(fmax(mtcars, g), BY(mtcars, g, bmax, na.rm = TRUE)) expect_equal(fmax(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmax)) expect_equal(fmax(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmax)) expect_equal(fmax(mtcNA, g), inf2NA(BY(mtcNA, g, bmax, na.rm = TRUE))) # bmax(NA, na.rm = TRUE) gives -Inf }) test_that("fmax performs numerically stable", { expect_true(all_obj_equal(replicate(50, fmax(1), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(NA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(NA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(xNA, f), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mNA, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars, g), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_obj_equal(replicate(50, fmax(mtcNA, g), simplify = FALSE))) }) test_that("fmax handles special values in the right way", { expect_equal(fmax(NA), NA_real_) expect_equal(fmax(NaN), NaN) expect_equal(fmax(Inf), Inf) expect_equal(fmax(-Inf), -Inf) expect_equal(fmax(TRUE), 1) expect_equal(fmax(FALSE), 0) expect_equal(fmax(NA, na.rm = FALSE), NA_real_) expect_equal(fmax(NaN, na.rm = FALSE), NaN) expect_equal(fmax(Inf, na.rm = FALSE), Inf) expect_equal(fmax(-Inf, na.rm = FALSE), -Inf) expect_equal(fmax(TRUE, na.rm = FALSE), 1) expect_equal(fmax(FALSE, na.rm = FALSE), 0) }) test_that("fmax produces errors for wrong input", { expect_error(fmax("a")) expect_error(fmax(NA_character_)) expect_error(fmax(mNAc)) expect_error(fmax(mNAc, f)) expect_error(fmax(1:2,1:3)) expect_error(fmax(m,1:31)) expect_error(fmax(mtcars,1:31)) expect_error(fmax(wlddev)) expect_error(fmax(wlddev, wlddev$iso3c)) }) # fmin int x <- as.integer(x) xNA <- as.integer(xNA) mtcNA <- dapply(mtcNA, as.integer) mtcars <- dapply(mtcars, as.integer) storage.mode(m) <- "integer" storage.mode(mNA) <- "integer" toint <- function(x) { storage.mode(x) <- "integer" x } test_that("fmin with integers performs like base::min", { expect_identical(fmin(x), bmin(x, na.rm = TRUE)) expect_identical(fmin(x, na.rm = FALSE), bmin(x)) expect_identical(fmin(xNA, na.rm = FALSE), bmin(xNA)) expect_identical(fmin(xNA), bmin(xNA, na.rm = TRUE)) expect_identical(toint(fmin(mtcars)), fmin(m)) expect_identical(fmin(m), dapply(m, bmin, na.rm = TRUE)) expect_identical(fmin(m, na.rm = FALSE), dapply(m, bmin)) expect_identical(fmin(mNA, na.rm = FALSE), dapply(mNA, bmin)) expect_identical(fmin(mNA), dapply(mNA, bmin, na.rm = TRUE)) expect_identical(toint(fmin(mtcars)), dapply(mtcars, bmin, na.rm = TRUE)) expect_identical(toint(fmin(mtcars, na.rm = FALSE)), dapply(mtcars, bmin)) expect_identical(toint(fmin(mtcNA, na.rm = FALSE)), dapply(mtcNA, bmin)) expect_identical(toint(fmin(mtcNA)), dapply(mtcNA, bmin, na.rm = TRUE)) expect_identical(fmin(x, f), BY(x, f, bmin, na.rm = TRUE)) expect_identical(fmin(x, f, na.rm = FALSE), BY(x, f, bmin)) expect_identical(fmin(xNA, f, na.rm = FALSE), BY(xNA, f, bmin)) expect_identical(fmin(xNA, f), inf2NA(BY(xNA, f, bmin, na.rm = TRUE))) expect_identical(fmin(m, g), BY(m, g, bmin, na.rm = TRUE)) expect_identical(fmin(m, g, na.rm = FALSE), BY(m, g, bmin)) expect_identical(fmin(mNA, g, na.rm = FALSE), BY(mNA, g, bmin)) expect_identical(fmin(mNA, g), toint(inf2NA(BY(mNA, g, bmin, na.rm = TRUE)))) # bmin(NA, na.rm = TRUE) gives Inf expect_identical(fmin(mtcars, g), BY(mtcars, g, bmin, na.rm = TRUE)) expect_identical(fmin(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmin)) expect_identical(fmin(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmin)) expect_identical(fmin(mtcNA, g), dapply(inf2NA(BY(mtcNA, g, bmin, na.rm = TRUE)), toint)) # bmin(NA, na.rm = TRUE) gives Inf }) test_that("fmin with integers performs numerically stable", { expect_true(all_identical(replicate(50, fmin(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmin(mtcNA, g), simplify = FALSE))) }) test_that("fmin with integers produces errors for wrong input", { expect_error(fmin(m,1:31)) expect_error(fmin(mtcars,1:31)) }) # fmax int test_that("fmax with integers performs like base::max", { expect_identical(fmax(x), bmax(x, na.rm = TRUE)) expect_identical(fmax(x, na.rm = FALSE), bmax(x)) expect_identical(fmax(xNA, na.rm = FALSE), bmax(xNA)) expect_identical(fmax(xNA), bmax(xNA, na.rm = TRUE)) expect_identical(toint(fmax(mtcars)), fmax(m)) expect_identical(fmax(m), dapply(m, bmax, na.rm = TRUE)) expect_identical(fmax(m, na.rm = FALSE), dapply(m, bmax)) expect_identical(fmax(mNA, na.rm = FALSE), dapply(mNA, bmax)) expect_identical(fmax(mNA), dapply(mNA, bmax, na.rm = TRUE)) expect_identical(toint(fmax(mtcars)), dapply(mtcars, bmax, na.rm = TRUE)) expect_identical(toint(fmax(mtcars, na.rm = FALSE)), dapply(mtcars, bmax)) expect_identical(toint(fmax(mtcNA, na.rm = FALSE)), dapply(mtcNA, bmax)) expect_identical(toint(fmax(mtcNA)), dapply(mtcNA, bmax, na.rm = TRUE)) expect_identical(fmax(x, f), BY(x, f, bmax, na.rm = TRUE)) expect_identical(fmax(x, f, na.rm = FALSE), BY(x, f, bmax)) expect_identical(fmax(xNA, f, na.rm = FALSE), BY(xNA, f, bmax)) expect_identical(fmax(xNA, f), inf2NA(BY(xNA, f, bmax, na.rm = TRUE))) expect_identical(fmax(m, g), BY(m, g, bmax, na.rm = TRUE)) expect_identical(fmax(m, g, na.rm = FALSE), BY(m, g, bmax)) expect_identical(fmax(mNA, g, na.rm = FALSE), BY(mNA, g, bmax)) expect_identical(fmax(mNA, g), toint(inf2NA(BY(mNA, g, bmax, na.rm = TRUE)))) # bmax(NA, na.rm = TRUE) gives -Inf expect_identical(fmax(mtcars, g), BY(mtcars, g, bmax, na.rm = TRUE)) expect_identical(fmax(mtcars, g, na.rm = FALSE), BY(mtcars, g, bmax)) expect_identical(fmax(mtcNA, g, na.rm = FALSE), BY(mtcNA, g, bmax)) expect_identical(fmax(mtcNA, g), dapply(inf2NA(BY(mtcNA, g, bmax, na.rm = TRUE)), toint)) # bmax(NA, na.rm = TRUE) gives -Inf }) test_that("fmax with integers performs numerically stable", { expect_true(all_identical(replicate(50, fmax(x), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(x, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(x, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(x, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA, f, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(xNA, f), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(m, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mNA, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars, g), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcars, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA, g, na.rm = FALSE), simplify = FALSE))) expect_true(all_identical(replicate(50, fmax(mtcNA, g), simplify = FALSE))) }) test_that("fmax with integers produces errors for wrong input", { expect_error(fmax(m,1:31)) expect_error(fmax(mtcars,1:31)) }) options(warn = 1) collapse/tests/testthat/test-data.table.R0000644000176200001440000002673315056572047020203 0ustar liggesuserscontext("collapse and data.table integration") bmean <- base::mean # TODO: Check memory allocation, particularly where names<- and attr<- are used. # Also check attribute handling helpers with atomic and S4 objects !! expect_equal(1, 1) if(requireNamespace("data.table", quietly = TRUE) && requireNamespace("magrittr", quietly = TRUE)) { options(warn = -1L) library(data.table) library(magrittr) mtcDT <- qDT(roworderv(mtcars)) irisDT <- qDT(ss(iris, 1:100)) n <- 5L # copy <- identity options(warn = 1L) test_that("creating columns and printing works after passing a data.table through collapse functions", { expect_true(is.data.table(mtcDT)) expect_true(is.data.table(irisDT)) expect_output(print(mtcDT)) expect_identical(names(mtcDT), names(mtcars)) expect_silent(mtcDT[, col := 1]) expect_output(print(mtcDT)) expect_silent(mtcDT[, col := NULL]) expect_identical(names(mtcDT), names(mtcars)) expect_output(print(mtcDT)) expect_silent(irisDT[, col := 1]) expect_silent(irisDT[, col := NULL]) # Statistical functions give warning dt <- fscale(copy(mtcDT)) expect_warning(dt[, new := 1]) expect_output(print(dt)) dt <- fsum(copy(mtcDT), TRA = 1) expect_warning(dt[, new := 1]) expect_output(print(dt)) dt <- fsum(copy(mtcDT), drop = FALSE) expect_warning(dt[, new := 1]) expect_output(print(dt)) for(i in 1:n) { if(!identical(copy, identity)) mtcDT <- qDT(mtcDT) expect_silent(mtcDT[, col := 1]) expect_silent(mtcDT[, col := NULL]) expect_identical(names(mtcDT), names(mtcars)) expect_identical(length(mtcDT), length(mtcars)) } # Other functions should work: for(i in 1:n) { dt <- fgroup_by(mtcDT, cyl) expect_identical(names(dt), names(mtcars)) # print(ltl(dt)) expect_silent(dt[, new := 1]) expect_output(print(dt)) # print(ltl(dt)) } for(i in 1:n) { dt2 <- fgroup_vars(dt) expect_silent(dt2[, new := 1]) expect_output(print(dt2)) } for(i in 1:n) { dt <- fungroup(fgroup_by(mtcDT, c(2,8:9))) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- funique(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- funique(copy(mtcDT), cols = "cyl") expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fselect(copy(mtcDT), -mpg, -hp) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fselect(copy(mtcDT), col2 = disp, wt:carb) expect_silent(dt[, new := 1]) expect_output(print(dt)) fselect(dt, col2, new) <- NULL expect_silent(dt[, ncol := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fsubset(copy(mtcDT), cyl == 4) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fsubset(copy(mtcDT), cyl == 4, bla = mpg, vs:am) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% smr(mean_mpg = fmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% smr(mean_mpg = bmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% gby(cyl) %>% smr(mean_mpg = fmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% gby(cyl) %>% smr(mean_mpg = bmean(mpg)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- ftransform(copy(mtcDT), bla = 1) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { settransform(dt, bla2 = 1) expect_silent(dt[, new2 := 1]) expect_output(print(dt)) } for(i in 1:n) { ftransform(dt) <- list(sds = mtcDT$qsec) expect_silent(dt[, new3 := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fcompute(copy(mtcDT), bla = mpg + cyl, df = 1, keep = 7:10) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- roworderv(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- roworder(copy(mtcDT), cyl, -vs) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- roworderv(copy(mtcDT), cols = 1:2) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- colorderv(copy(mtcDT)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- colorder(copy(mtcDT), vs, cyl, am) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- frename(copy(mtcDT), carb = bla, mpg = x) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- frename(copy(mtcDT), toupper) expect_silent(dt[, new := 1]) expect_output(print(dt)) setrename(dt, MPG = ABC, new = NEW) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- get_vars(copy(irisDT), 1:3) expect_silent(dt[, new := 1]) expect_output(print(dt)) get_vars(dt, 1) <- irisDT$Species expect_silent(dt[, new2 := 1]) expect_output(print(dt)) } for(i in 1:n) { get_vars(dt, 1) <- NULL expect_silent(dt[, new3 := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- get_vars(irisDT, 1:3) %>% add_vars(gv(irisDT, 4)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { add_vars(dt) <- list(Sp = irisDT$Species) expect_silent(dt[, new2 := 1]) expect_output(print(dt)) } wldDT <- qDT(wlddev) for(i in .c(num_vars, nv, cat_vars, char_vars, fact_vars, logi_vars, date_vars)) { # print(i) # Iris data FUN <- match.fun(i) dt <- FUN(irisDT) expect_identical(names(dt), FUN(iris, "names")) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) dt <- irisDT eval(substitute(FUN(dt) <- NULL, list(FUN = as.name(i)))) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) # wlddev data dt <- FUN(wldDT) expect_identical(names(dt), FUN(wlddev, "names")) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) dt <- wldDT eval(substitute(FUN(dt) <- NULL, list(FUN = as.name(i)))) expect_silent(dt[, new := 1]) expect_output(print(dt)) rm(dt) } for(i in 1:n) { dt <- relabel(copy(wldDT), toupper) expect_silent(dt[, new := 1]) expect_output(print(dt)) setrelabel(dt, PCGDP = "GRP per cap", LIFEEX = "LE") expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- qDT(qTBL(qDF(qDT(GGDC10S)))) expect_identical(names(dt), names(GGDC10S)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- fdroplevels(copy(wldDT)) expect_identical(names(dt), names(wlddev)) expect_true(!anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { m <- qM(mtcars) dt <- qDT(m) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } expect_output(print(mtcDT[, qDT(pwcor(.SD)), by = cyl, .SDcols = c("mpg", "hp", "carb")])) expect_output(print(melt(qDT(GGDC10S)[, qDT(pwcor(.SD)), by = .(Variable, Country), .SDcols = 6:15], 1:2))) for(i in 1:n) { dt <- as_character_factor(wldDT) expect_identical(names(dt), names(wlddev)) expect_true(!anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- as_character_factor(wldDT, keep.attr = FALSE) expect_identical(names(dt), names(wlddev)) expect_true(anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } options(warn = -1L) for(i in 1:n) { dt <- as_numeric_factor(wldDT) expect_identical(names(dt), names(wlddev)) expect_true(!anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- as_numeric_factor(wldDT, keep.attr = FALSE) expect_identical(names(dt), names(wlddev)) expect_true(anyNA(vlabels(dt))) expect_silent(dt[, new := 1]) expect_output(print(dt)) } options(warn = 1L) for(i in 1:n) { dt <- collap(wldDT, ~ iso3c) expect_identical(names(dt), names(wlddev)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- collapv(wldDT, 1) expect_identical(names(dt), names(wlddev)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- collapg(gby(wldDT, 1)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- dapply(copy(mtcDT), log) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- dapply(copy(mtcDT), log, return = "data.frame") expect_identical(names(dt), names(mtcars)) expect_error(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { l <- rsplit(copy(mtcDT), ~cyl) expect_silent(for(i in seq_along(l)) l[[i]][, new := 1]) expect_output(print(l)) expect_output(print(l[[1]])) } for(i in 1:n) { dt <- unlist2d(l, DT = TRUE) expect_silent(dt[, new45 := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- na_omit(copy(mtcDT), cols = 1:2) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- na_omit(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- na_insert(copy(mtcDT)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(wldDT) vlabels(wldDT) <- NULL expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% add_stub("B") expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% add_stub("B") %>% rm_stub("B") expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% setRownames expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- copy(mtcDT) %>% frename(toupper) %>% setColnames(names(mtcars)) expect_identical(names(dt), names(mtcars)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_NA(copy(wldDT), cols = is.numeric) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_NA(copy(mtcDT), set = TRUE, cols = is.numeric) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_Inf(copy(wldDT)) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- replace_outliers(copy(wldDT), 3) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- recode_num(copy(wldDT), `1` = 2) expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- recode_char(copy(wldDT), Uganda = "UGA") expect_silent(dt[, new := 1]) expect_output(print(dt)) } for(i in 1:n) { dt <- pad(copy(mtcDT), 1:3) expect_silent(dt[, new := 1]) expect_output(print(dt)) } }) } collapse/tests/testthat.R0000644000176200001440000000021614777170131015210 0ustar liggesusers# rm(list = ls()) # Sys.setenv(R_TESTS = "") library(testthat) options(collapse_export_F = TRUE) # library(collapse) test_check("collapse") collapse/MD50000644000176200001440000003546415202770267012410 0ustar liggesusersfcd9ea85df1aca03335e6615a6b12b5c *DESCRIPTION 60fbeb63059a10588f6bec7e3af7e8ae *LICENSE daa51660a98168aa18baf87b1662ff55 *NAMESPACE 47baf6ec82fe48d1acaf7ff6e2903f7d *NEWS.md e8ab68a1a750d0758616400b94aaa37e *R/BY.R ecd27343066ff9ab942ede818cb6aa56 *R/GRP.R 69448808a072dcede57b344b3fff68ae *R/RcppExports.R 58df23a4463baefedd77d8b18d13679a *R/TRA.R 5223a4125380d4bb3a15dcdc2ea93859 *R/collap.R ea464cb5e4af7c105c92f586f702aad4 *R/dapply.R a9ff06bd3d8f91dcc5cf4960ea90348e *R/descr.R 2cb05b1574f3b2ea3090185bc847d350 *R/fFtest.R bed9fd141f81ccbf18c804d9d1eb2703 *R/fbetween_fwithin.R 6ec6a7da40be958f8832355c49b30185 *R/fcount.R c2c0d122fb4b8fb4cedcbfad7f895d17 *R/fcumsum.R c16154a56e4ea6f22078b4d84761eca3 *R/fdiff_fgrowth.R 4df22237c8c0c67e9579bceae495c881 *R/ffirst.R db4b260fcd9daa22b1d7755b45eb7099 *R/fhdbetween_fhdwithin.R 8810e8b5cd30e79faa9b08fe27077f18 *R/flag.R 719da2c06ddec8c9ed9f8bcef5e384d4 *R/flast.R 8a927e10defbb2ba950d0ce6aa7840ab *R/flm.R 94e612e520da257dcb226d840eaf19b0 *R/fmean.R 64635d4a141f35d1dc182a9ea01c9ca0 *R/fmin_fmax.R 5e4bb95e09c3a072a5699b4bfb58c137 *R/fmode.R 1680b8f99f39184ec89c3f1a124fb5ed *R/fndistinct.R 233cca7456233c955428da1ca5ae58a3 *R/fnobs.R 92e7986c1f1ae68e75b89d2d6d506831 *R/fnth_fmedian.R 264056311cdb6182fcf284fc75f561a4 *R/fprod.R eff942dbd0864ba8db84bc34d7347fba *R/fscale.R 77994d76e3c1734ee8d35ee6653d8c6d *R/fslice.R 1c151d9a46045430c7c35e714125687f *R/fsubset_ftransform_fmutate.R ea2e022b56ce4f88d99d9b9610b27506 *R/fsum.R a22ef288a9020616e331e510c0112a54 *R/fsummarise.R 42ceddfc7c9b2aa9f1ae58e49f60e68d *R/fvar_fsd.R d95e5993055250ae045e4abb9ce87e6b *R/global_macros.R 4ff4e560ef268edada57deba2ec93757 *R/indexing.R 806c69dec5b22d232cbb1a4825474bf2 *R/join.R c18a1c3219443a58e807cf4e36191608 *R/list_functions.R a08de9787b64518cfd8e07cac5c52a97 *R/my_RcppExports.R c4974daa697b63e547c8c7b22e734247 *R/pivot.R f495112d5fd3273899601156786b3309 *R/psacf.R 390b196f766af1fe350e3d3d375cb3b6 *R/psmat.R 2a60a0d3a392bf44f6025efddec3c3cd *R/pwcor_pwcov_pwnobs.R d47a4ca5b4e74542d83f20dd05e9628c *R/qsu.R 00cb2a406f8f45db904b84ba374e853d *R/qtab.R c303e3e981583caf923336f987a7725c *R/quick_conversion.R 2f9fde87f2ef354b3a0612325281ec57 *R/recode_replace.R 8e8cc8cc431ef13e1667c8c91be9fbd5 *R/roworder_colorder_rename.R 4bee0e3352ca3df29099470cb7f04e1f *R/rsplit.R 802abfa340a3b456e892cadddbbd3c86 *R/select_replace_add_vars.R 1431feb5eb7e1c79250e3b5b9733dce2 *R/small_helper.R 7e086d711d690e77e2b9b38ff61bb62d *R/unlist2d.R 1b01a2641ba4612fa7104a6638a48d3c *R/varying.R c59817b21d6a0f00d43584cce8a8916d *R/zzz.R 4454e632feb75ce4f508807d50b3b12f *build/partial.rdb 87bfd6e0b6e5f453e2d4792893514aeb *build/vignette.rds a5742d7c7117d4198a262b7079ef3b58 *data/GGDC10S.rda c0728676845d1671c4342e712cad7673 *data/wlddev.rda f3023dcac94cba5954b9e5f54f09e662 *inst/CITATION b4e0e0b857fce8072ff50c73cdb97a77 *inst/doc/collapse_and_data.table.Rmd df96a7f5f813c4deda39860cde997a95 *inst/doc/collapse_and_data.table.html 0c265a6951fdd8280312c6e8543516f8 *inst/doc/collapse_and_dplyr.R 03d86f684128d8b68cc44977b53dcf74 *inst/doc/collapse_and_dplyr.Rmd 03690d6746c8304f74476989525e8e53 *inst/doc/collapse_and_dplyr.html 3c67114e5bd444a411e7baa61e690cc2 *inst/doc/collapse_and_plm.Rmd ceb9d7916fa585986f5c745314525b4b *inst/doc/collapse_and_plm.html d1c0db4430cc0d2fab00a3ca8d2d52a9 *inst/doc/collapse_and_sf.Rmd 4a892b7ceb09a5260a6e491ef1198f54 *inst/doc/collapse_and_sf.html 94f5cfdea645b1f69f661160716bf82a *inst/doc/collapse_documentation.Rmd e7eb878ba8d9492fd3a8bd6589e8e307 *inst/doc/collapse_documentation.html b2daabfbb89ea0f64d8e711fb4c42a99 *inst/doc/collapse_for_tidyverse_users.R 4f570f15cf9a5391e5948af50c38699d *inst/doc/collapse_for_tidyverse_users.Rmd 56183587ada8394c54a941e7f9c5e957 *inst/doc/collapse_for_tidyverse_users.html de162f967be2cb7d4637955f07ed69bd *inst/doc/collapse_intro.Rmd 8cb762fd4ccba319d38a6dfd2d713b3e *inst/doc/collapse_intro.html 5e270ae6034b339c2a13d20a5f7d2dec *inst/doc/collapse_object_handling.R c60a036b7be5c036349746cfd54465df *inst/doc/collapse_object_handling.Rmd 75a1b3d290b00aab96ceaec9e12eaa63 *inst/doc/collapse_object_handling.html 8f40c7ec50eef3bacf35125d24d8bda5 *inst/doc/developing_with_collapse.Rmd 215bee730aabd678ee2bc76f53d3ece0 *inst/doc/developing_with_collapse.html acf81ac4f0945daf8364c9b5a79880b1 *man/BY.Rd 8e2c7aa7baddf388a3848bde031baf2a *man/GGDC10S.Rd cc8927b569d0908c4392c769d32c1867 *man/GRP.Rd b8fc57612ae6cfc7c729097ac51bfc2d *man/TRA.Rd af27d587d3f838a7ba54276d0a28531c *man/across.Rd b2a16e373110cc32fe3deebd39955568 *man/arithmetic.Rd f70659283c78325bd2f3af687bb0b9e6 *man/collap.Rd de1a59c97b3576f116b17f60d15d9369 *man/collapse-documentation.Rd 7ebd9d4851f995b9d4dfd23499372f1a *man/collapse-options.Rd d63e988ccfa3e67593c5c83eee907964 *man/collapse-package.Rd 9523b2f22be4c99fd625d62e9aaa2d75 *man/collapse-renamed.Rd 31574e49c1340528ada01c5fd664cda4 *man/colorder.Rd a2382c03217dbeda4229ca19aa1f7c38 *man/dapply.Rd 3d19204b026457c98ef2c90fed8267b6 *man/data-transformations.Rd ef47f6bcc30840bde958f1a1b0930e8f *man/descr.Rd 2a1efb6eae86bc07138d8147efd3cc6b *man/efficient-programming.Rd 4a912719ea744ab7cb2d054c99fea429 *man/extract_list.Rd 2c843437bce5b6f298c0bf96190b60fb *man/fFtest.Rd 3939d4f7770650baefc5225b1d4ca262 *man/fast-data-manipulation.Rd 01843b470a0beeae9d4e069a01dddfc6 *man/fast-grouping-ordering.Rd 4b962afc5d025cb6842c1fc269f0f6b1 *man/fast-statistical-functions.Rd d2ed463c8e57eb88998d2a8c8a3de92e *man/fbetween_fwithin.Rd c24d7b227e820273327db3cc0f942d53 *man/fcount.Rd de00d3c2f0ac16cdc3db066dee733b08 *man/fcumsum.Rd 9311d5af2a93ef52c7de3f928dc1b3ae *man/fdiff.Rd 70c4563a4eb759a81a3d8f92c4d99175 *man/fdist.Rd 426c28d397b81023c5476cd5cfbd8b21 *man/fdroplevels.Rd eff48b9a09e87ecc8f91ecc03c9301d0 *man/ffirst_flast.Rd c7a0e5d6e5636c69768ff4c86278695c *man/fgrowth.Rd 8f2cbf8656dcfd0ed6082e0bde7dfc22 *man/fhdbetween_fhdwithin.Rd 90def9c3f2e263d117229fe74999a27d *man/flag.Rd f81fb234e57a0b2e1995952a66225899 *man/flm.Rd 12343fa326bb0e75dbb40ff232de4661 *man/fmatch.Rd ac55d9aa76c8aa700d91d01f640e3fdd *man/fmean.Rd 9d1f8097870468d5103e9f0aed249fee *man/fmin_fmax.Rd 75b1e00505d6067ccee699ebad93c296 *man/fmode.Rd 62a4811804933c730f155933eb57ef75 *man/fndistinct.Rd be5bf320997f93c8e4c086c4bc1c9d39 *man/fnobs.Rd 1f878dfae751eca405c339d2541ed46b *man/fnth_fmedian.Rd b75e4e0dee9c397d3212e84c1161c791 *man/fprod.Rd f27f406d0c2abb28fdba0761e0688840 *man/fquantile.Rd afa6bfb71452393d8c192dd0ac0cd095 *man/frename.Rd e92dd05946695fbf03e9b4effbcca88f *man/fscale.Rd 6f5ec9e8d115fd3334223d4d998c541b *man/fslice.Rd b3be76882e79fbd71375cc7d3706ef42 *man/fsubset.Rd 5cc2750e5d74694fbb40a715e59ddd4d *man/fsum.Rd 29481c5a01c254c46e31aa3d451cc7d8 *man/fsummarise.Rd e4f60aa0a2be17e226aa788e62d38371 *man/ftransform.Rd 9c5062337409514d9a02442a77745d2e *man/funique.Rd 29fef2365e615ebdbf9a562af8f56c86 *man/fvar_fsd.Rd d35341c3fb594a7303dd4bd6c127e62d *man/group.Rd a480f95c0667754559ce10fc2845c081 *man/groupid.Rd 35e545c3cb372550e2cf557e579e802c *man/indexing.Rd 38650aceb48ea70033c4bc74193b9bf0 *man/is_unlistable.Rd ae29188cc3c9a927ac7ea85d542bbb9d *man/join.Rd 0942768ce29a30c3ba4c16c19de88dde *man/ldepth.Rd b70f1a0e09afc66cc3c47e8342eca18c *man/list-processing.Rd e9ce657dcd100ad82c0d9a8b29cb4675 *man/pad.Rd 70a2c2ddccb60f2ee859b4a2c2bcaf4d *man/pivot.Rd 8fd3e00589870241d43242556f28a5c0 *man/psacf.Rd 48e0ba6c690ec170f3abbe2d7dac0ef1 *man/psmat.Rd 9cef2ae2255f38bfb29a54fe3bb6acc5 *man/pwcor_pwcov_pwnobs.Rd fc17d7b6e556f259757b8b56ac27d5d3 *man/qF.Rd da457f6448c3edc31ddfc0a24e0436f4 *man/qsu.Rd 7f22b596d497a05ef29a333a5f704c90 *man/qtab.Rd b8b53ead85dadb6b6b37c83d41dbcb62 *man/quick-conversion.Rd 868fb78513c2327d4c75de7217b5d83a *man/radixorder.Rd 63b00f8d67b0b1c6f7dc0efb39c867cb *man/rapply2d.Rd d39141ab148ebe3b85f81741b5d04fa2 *man/recode-replace.Rd b79ad1ff2b3aeb16f828bbeda8ad5741 *man/rowbind.Rd d809c186587874a777d04bc8ec0211c6 *man/roworder.Rd 10c3cdcbf438b37dab48588616ae6999 *man/rsplit.Rd ed18bf9a70a326ad637efbc4dce41ea0 *man/select_replace_vars.Rd 9de934d57e8148ff3ec34756ace1d475 *man/seqid.Rd f4b4702978877aee0b6368a7b1bbf162 *man/small-helpers.Rd 45a15e6daf1ec43f81ae4ed0b800b83b *man/summary-statistics.Rd 34103e18afb280ba9e63058c48aeccd9 *man/t_list.Rd dd60b5f74e3a8c2ad3a77c3089c9ac5c *man/time-series-panel-series.Rd 4be38a86c4d0e247f12b01bb22038760 *man/timeid.Rd 27b75f4fee392c264821b88df290f1c5 *man/unlist2d.Rd a95ba1ecab43086644104d1def3062ad *man/varying.Rd 864bb6a79e40002b66abea34eca6df6a *man/wlddev.Rd 7412e2a619faaa6c9b04f32bf22cd70e *src/ExportSymbols.c 3c857b7dd351be03be55fbd7226b290c *src/Makevars 8cb0e34f0b881075c93085784efdc23b *src/Makevars.win 49aaa1a5f6ad066b32fbb1709ad45ab7 *src/RcppExports.cpp 761a7794a896246d87181002a51a9a0b *src/TRA.c f52e926758498582157e8aad391b9f38 *src/base_radixsort.c 275926e4b742923cd68041b6b6f14684 *src/base_radixsort.h 7d0ab1b988e9538e50e18d6ee9b4f605 *src/collapse_c.h d87c5bb9a0310afaaed42018f4e060d3 *src/collapse_cpp.h b10aabd10edf1fa753ea9e499b8eea66 *src/data.table.h 402f6ad45eb2ddad5c3aaa8a79a57a05 *src/data.table_init.c 9cd8fa0dd6000a47c40f395a19c8c425 *src/data.table_rbindlist.c 0ca5c1e844f43cefcffcc0b7fedcb6ee *src/data.table_subset.c ca26be73e3ea2ee0f4dd0224b8bcb48b *src/data.table_utils.c a6ab1c12167aa9daffce39c6399bd9f9 *src/extptr.c 3b2c27d4e1dddeda160a06a6812f3fef *src/fbetween_fwithin.cpp ffc46448d562d76a7d490f582bfb0916 *src/fbstats.cpp 129bc97a64722a5f50f67b7ddbd449c8 *src/fcumsum.c 43e5af9fc2552257632889b86b4ee5ad *src/fdiff_fgrowth.cpp 4609df4bb7218b86929ae5bbffe4951d *src/ffirst.c ecbc6e59a2ae27c3776d7366921d189d *src/flag.cpp cba3535a6b24d719680e33b3ec81b312 *src/flast.c 8dffd1674e3a90b6200d2b3e199ce98e *src/fmean.c 6a53d99c8a83f185c1752184c5bb6fbb *src/fmin_fmax.c ac65779eb24f739b8fe51acf54f482ff *src/fmode.c a03cc1f61f40035dd22d79a19675f03a *src/fndistinct.c a683bd16bea1e7b1670434b897559d28 *src/fnobs.c cdec81a7c2b27bdd378cdd13828e89c1 *src/fnth_fmedian_fquantile.c 70d5450f582e80b631d5bb1be7cea093 *src/fprod.c 917c3a3d86d03be60cb11ed006c15d71 *src/fscale.cpp 1a235af3d710a6d6c9edb19d66089fbe *src/fsum.c 9df1f49b1f0fe8b7a38303fbba6be359 *src/fvar_fsd.cpp e716db5227d88965bc58799eb08f6291 *src/gsplit.c 235188f2c1b5cf22b16391af790a7184 *src/handle_attributes.c 107ac029018582b30343222a4dcab862 *src/internal/R_defn.h 61fb9844597dde9e8ac47b06ebe4dece *src/join.c 62b7e50081bf813a6c51f71a74a6c013 *src/kit.h c150dc672d7aeb791c74cb494b2b0959 *src/kit_dup.c d9d3310c162964cd459b473bd5ca9fd1 *src/match.c df321f259afece2889c16f88287c186d *src/mrtl_mctl.cpp 7fed4128280afa8566638ebf10bba138 *src/pivot.c 13d6c06b54027ffd9e61de380caf976a *src/programming.c 1ab850ab477196cb27c6ad901feb50bb *src/psmat.cpp 92f8eb5ac20a4c131364764c2d145fa4 *src/pwnobs.cpp f188afe7ac62e139f635d63f4a8a7235 *src/qF_qG.cpp 97aa205788b8827298ffc624c1fd64e4 *src/seqid_groupid.cpp 83943feb47fcfc042259c8bdc617b381 *src/small_helper.c 2d56b31f6f138ca3c90bd08ff7ddb4fe *src/stats_mAR.c 1d321c8f41669b6a32a9263d06a369cd *src/stats_pacf.c e3db898d69f4c38e6bba71820d4f4e6e *src/varying.cpp 62c20662b6b516a224365fcd359df150 *tests/testthat.R cc9e8c82cfe5bcbeb1514d1f48846521 *tests/testthat/test-BY.R 8f7135ccb23f4a683f7fddcbcd6ff95b *tests/testthat/test-GRP.R 2e8d1ffe70f59998051f1bb625b40468 *tests/testthat/test-TRA.R 4274734d27393678c1c214db36a4eca6 *tests/testthat/test-attribute-handling.R 3aaeb9cf76b2be7a6f9a471b7f860989 *tests/testthat/test-collap.R aab7c974037edf8921c7f7d05263efe2 *tests/testthat/test-collapse-options.R ebe159dbfedd0c4186488f632dd37238 *tests/testthat/test-dapply.R 7109bcbd288d6b481693b5ed864789e5 *tests/testthat/test-data.table.R eeffa4bfbf005591e5a6fbe7df055388 *tests/testthat/test-descr.R e41bc72ccb4155f0428e0d05596ed6f9 *tests/testthat/test-fHDbetween-fHDwithin-HDB-HDW.R 4ed9dc6a4b94903e47c641918e5e818f *tests/testthat/test-fNobs-fNdistinct.R 17aca78f54b74a9ab098a26f29c1b9e9 *tests/testthat/test-fbetween-fwithin-B-W.R d6038acef4f69f5fb5d0857df6f8ff8e *tests/testthat/test-fcount.R 36abe3d73d2d0a27c5555b4f955caa1f *tests/testthat/test-fcumsum.R 8b88f6880c243bc45104f0ac4e3dd9b4 *tests/testthat/test-fdiff-fgrowth-D-G.R 090550982607b30f727516af050e592c *tests/testthat/test-ffirst-flast.R ca953a7de4e50a93b3182c1eb23b4421 *tests/testthat/test-flag-L-F.R 86ca0fd5cab6295122bc95ec4a907a00 *tests/testthat/test-flm-fFtest.R 67bdb7be8cce46b9e6ea5f058aa38892 *tests/testthat/test-fmatch.R 0bb69e873d9149ae10dbf982605ad55a *tests/testthat/test-fmean.R ba04d6ec69ecc0fb872d3e5d9c92b44e *tests/testthat/test-fmedian.R f689ea540af48cfbd1ab23d91051f237 *tests/testthat/test-fmin-fmax.R c077c1fe6234c1e538b00a507efa5b80 *tests/testthat/test-fmode.R 2a9c9fa954b73ff0e71f8174386dfeef *tests/testthat/test-fmutate.R 194b2fd7449026b6dcb997ac9d59afc3 *tests/testthat/test-fprod.R a64224329151a2804ff5545f61245caa *tests/testthat/test-fquantile.R 68fa2e71f3cb573b715a8e111e9dba00 *tests/testthat/test-fscale-STD.R bcddc28c895329641291593ab6e982f9 *tests/testthat/test-fslice.R 63d19d39211741a26d3f62aa79b84917 *tests/testthat/test-fsubset-ftransform.R 7d9869c4e3611104b5a03ac74246775c *tests/testthat/test-fsum.R 85596053c261a4b027f1447d94a22d55 *tests/testthat/test-fvar-fsd.R 5796cd9a1a3fb7d6a4fca5d9d23ad689 *tests/testthat/test-indexing.R 75253ddc8065e8c95a434f454cc05f36 *tests/testthat/test-join.R 18dd43a9a0006618a0e7de1f8cc4389d *tests/testthat/test-list-processing.R afb33178610f3c2cdcb8c748816d0a04 *tests/testthat/test-misc.R a51ef6421c1bbb35e688e1663c2fc1c4 *tests/testthat/test-miscellaneous-issues.R 701badf0f6f6188fb8ebfb7eebf9e711 *tests/testthat/test-pivot.R 2c0ea828ec510d4756e097692c762396 *tests/testthat/test-psmat-psacf.R dae6a19c49e8ad739ac367cf372c9ada *tests/testthat/test-qsu.R 64fd4996595e3e11136b238074ae80b9 *tests/testthat/test-qtab.R 802f2cd46efce71f1208f703200a2831 *tests/testthat/test-quick-conversion.R 7a8a4338954796da578c777e7e8ed996 *tests/testthat/test-recode-replace.R 315b8f770c650d1a51c70feb40994453 *tests/testthat/test-roworder-colorder-rename.R a30f6b84e05c6b35b8b6bfeb7df8d8bd *tests/testthat/test-select-replace-vars.R ca05f0bb9f4a83129e56e92ca7832170 *tests/testthat/test-seqid-groupid.R 081ea0187faffcf99b59cf25a7462a65 *tests/testthat/test-setop.R d79b3ab4369d9d6235161ebb39ed57e1 *tests/testthat/test-sf.R be03dc26e8797d1baf124472e5e0c51e *tests/testthat/test-small-helper.R 9354fb7738c53566fdaa24de9c0f3ab0 *tests/testthat/test-splitting.R 7469b83eb6dd0887543601825cd65187 *tests/testthat/test-unlist2d.R a9762aad5cdf7f33361d2b327e1ae013 *tests/testthat/test-varying.R bd0f8fdf7682c1785ef723888e36b7df *tests/testthat/test-whichv.R b4e0e0b857fce8072ff50c73cdb97a77 *vignettes/collapse_and_data.table.Rmd 03d86f684128d8b68cc44977b53dcf74 *vignettes/collapse_and_dplyr.Rmd 3c67114e5bd444a411e7baa61e690cc2 *vignettes/collapse_and_plm.Rmd d1c0db4430cc0d2fab00a3ca8d2d52a9 *vignettes/collapse_and_sf.Rmd 94f5cfdea645b1f69f661160716bf82a *vignettes/collapse_documentation.Rmd 4f570f15cf9a5391e5948af50c38699d *vignettes/collapse_for_tidyverse_users.Rmd de162f967be2cb7d4637955f07ed69bd *vignettes/collapse_intro.Rmd c60a036b7be5c036349746cfd54465df *vignettes/collapse_object_handling.Rmd 8f40c7ec50eef3bacf35125d24d8bda5 *vignettes/developing_with_collapse.Rmd collapse/R/0000755000176200001440000000000015202626455012264 5ustar liggesuserscollapse/R/fscale.R0000644000176200001440000002217614777170130013654 0ustar liggesusers # Make faster ? cm <- function(x) if(is.double(x)) x else if(is.character(x) && x == "overall.mean") -Inf else if(isFALSE(x)) Inf else stop("mean must be a number, 'overall.mean' or FALSE") csd <- function(x) if(is.double(x)) x else if(is.character(x) && x == "within.sd") -Inf else stop("sd must be a number or 'within.sd'") # TODO: w.type - Implement reliability weights? fscale <- function(x, ...) UseMethod("fscale") # , x fscale.default <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fscale.matrix(x, g, w, na.rm, mean, sd, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fscale,x,0L,0L,w,na.rm,cm(mean),csd(sd))) g <- G_guo(g) .Call(Cpp_fscale,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.pseries <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- group_effect(x, effect) res <- if(is.matrix(x)) .Call(Cpp_fscalem,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) else .Call(Cpp_fscale,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) if(is.double(x)) return(res) pseries_to_numeric(res) } fscale.matrix <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fscalem,x,0L,0L,w,na.rm,cm(mean),csd(sd))) g <- G_guo(g) .Call(Cpp_fscalem,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.zoo <- function(x, ...) if(is.matrix(x)) fscale.matrix(x, ...) else fscale.default(x, ...) fscale.units <- fscale.zoo fscale.grouped_df <- function(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- substitute(w) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn2 %in% wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } } if(length(gn2)) { # if(!length(gn)) return(.Call(Cpp_fscalel,x[-gn2],g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd))) ax <- attributes(x) ax[["names"]] <- c(nam[gn], nam[-gn2]) # first term is removed if !length(gn) res <- .Call(Cpp_fscalel, .subset(x, -gn2), g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } .Call(Cpp_fscalel,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.data.frame <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fscalel,x,0L,0L,w,na.rm,cm(mean),csd(sd))) g <- G_guo(g) .Call(Cpp_fscalel,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) } fscale.list <- function(x, ...) fscale.data.frame(x, ...) fscale.pdata.frame <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- group_effect(x, effect) .Call(Cpp_fscalel,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) } # Standardization Operator STD <- function(x, ...) UseMethod("STD") # , x STD.default <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(STD.matrix(x, g, w, na.rm, mean, sd, ...)) fscale.default(x, g, w, na.rm, mean, sd, ...) } STD.pseries <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, ...) fscale.pseries(x, effect, w, na.rm, mean, sd, ...) STD.matrix <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], ...) { res <- fscale.matrix(x, g, w, na.rm, mean, sd, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "STD.")) res } STD.zoo <- function(x, ...) if(is.matrix(x)) STD.matrix(x, ...) else STD.default(x, ...) STD.units <- STD.zoo STD.grouped_df <- function(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- substitute(w) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn2 %in% wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], do_stub(stub, nam[-gn2], "STD.")) res <- .Call(Cpp_fscalel, .subset(x, -gn2), g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } res <- .Call(Cpp_fscalel,x,g[[1L]],g[[2L]],w,na.rm,cm(mean),csd(sd)) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "STD.")) res } # updated (best) version ! STD.pdata.frame <- function(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], keep.ids = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] g <- group_effect(x, effect) cols_fun <- is.function(cols) if(cols_fun && identical(cols, is.numeric)) cols <- which(.Call(C_vtypes, x, 1L)) else if(length(cols)) cols <- cols2int(cols, x, nam) oldClass(x) <- NULL if(cols_fun || keep.ids) { gn <- which(nam %in% attr(findex(x), "nam")) # Needed for 3+ index variables if(length(gn)) { if(cols_fun) cols <- fsetdiff(cols, gn) else if(is.null(cols)) cols <- seq_along(x)[-gn] } if(!keep.ids) gn <- NULL } else gn <- NULL if(is.call(w)) { wn <- ckmatch(all.vars(w), nam, "Unknown weight variable:") w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn) && length(cols)) { ax[["names"]] <- c(nam[gn], do_stub(stub, nam[cols], "STD.")) return(setAttributes(c(x[gn], .Call(Cpp_fscalel,x[cols],fnlevels(g),g,w,na.rm,cm(mean),csd(sd))), ax)) } if(!length(gn)) { ax[["names"]] <- do_stub(stub, nam[cols], "STD.") return(setAttributes(.Call(Cpp_fscalel,x[cols],fnlevels(g),g,w,na.rm,cm(mean),csd(sd)), ax)) } if(isTRUE(stub) || is.character(stub)) { ax[["names"]] <- do_stub(stub, nam, "STD.") return(setAttributes(.Call(Cpp_fscalel,x,fnlevels(g),g,w,na.rm,cm(mean),csd(sd)), ax)) } .Call(Cpp_fscalel,`oldClass<-`(x, ax[["class"]]),fnlevels(g),g,w,na.rm,cm(mean),csd(sd)) } # updated, fast and data.table proof version ! STD.data.frame <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], keep.by = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(w)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- cols2intrmgn(gn, cols, x) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.by) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L) else G_guo(by) } if(is.call(w)) { wn <- ckmatch(all.vars(w), nam, "Unknown weight variable:") w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn)) { ax[["names"]] <- c(nam[gn], do_stub(stub, nam[cols], "STD.")) return(setAttributes(c(x[gn], .Call(Cpp_fscalel,x[cols],by[[1L]],by[[2L]],w,na.rm,cm(mean),csd(sd))), ax)) } ax[["names"]] <- do_stub(stub, nam[cols], "STD.") return(setAttributes(.Call(Cpp_fscalel,x[cols],by[[1L]],by[[2L]],w,na.rm,cm(mean),csd(sd)), ax)) } else if(length(cols)) { # Needs to be like this, otherwise subsetting dropps the attributes !! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(isTRUE(stub) || is.character(stub)) attr(x, "names") <- do_stub(stub, attr(x, "names"), "STD.") if(is.null(by)) return(.Call(Cpp_fscalel,x,0L,0L,w,na.rm,cm(mean),csd(sd))) by <- G_guo(by) .Call(Cpp_fscalel,x,by[[1L]],by[[2L]],w,na.rm,cm(mean),csd(sd)) } STD.list <- function(x, ...) STD.data.frame(x, ...) collapse/R/unlist2d.R0000644000176200001440000001523414777170130014160 0ustar liggesusers rowbind <- function(..., idcol = NULL, row.names = FALSE, use.names = TRUE, fill = FALSE, id.factor = "auto", return = c("as.first", "data.frame", "data.table", "tibble", "list")) { l <- if(...length() == 1L && is.list(..1)) unclass(..1) else list(...) if(is.logical(idcol)) idcol <- if(isTRUE(idcol)) ".id" else NULL id_fact <- length(idcol) && switch(as.character(id.factor), `TRUE` = TRUE, `FALSE` = FALSE, auto = !is.null(names(l)), ordered = TRUE, stop("id.factor needs to be 'TRUE', 'FALSE', 'auto' or 'ordered'")) if(id_fact) { nam <- names(l) names(l) <- NULL } res <- .Call(C_rbindlist, l, use.names || fill, fill, idcol) if(id_fact) { attr(res[[1L]], "levels") <- if(length(nam)) nam else as.character(seq_along(l)) oldClass(res[[1L]]) <- switch(id.factor, `TRUE` = c("factor", "na.included"), # Cannot have empty alternative in numeric switch auto = c("factor", "na.included"), ordered = c("ordered", "factor", "na.included")) } if(!isFALSE(row.names)) { attributes(l) <- NULL rn <- list(.Call(C_pivot_long, lapply(l, attr, "row.names"), NULL, FALSE)) if(length(rn[[1L]]) != length(res[[1L]])) stop("length mismatch: not all objects in the list have 'row.names' attribute") names(rn) <- switch(row.names, `TRUE` = "row.names", row.names) res <- if(is.null(idcol)) c(rn, res) else c(res[1L], rn, res[-1L]) } switch(return[1L], as.first = { a1 <- attributes(l[[1L]]) if(is.null(a1)) return(res) if(any(a1$class == "data.frame")) a1$row.names <- .set_row_names(length(res[[1L]])) a1$names <- names(res) .Call(C_setattributes, res, a1) if(any(a1$class == "data.table")) return(alc(res)) res }, data.frame = qDF(res), data.table = qDT(res), tibble = qTBL(res), list = res, stop("Unknown return option: ", return[1L]) ) } unlist2d <- function(l, idcols = ".id", row.names = FALSE, recursive = TRUE, id.factor = FALSE, DT = FALSE) { if (!is.list(l)) return(l) # stop("l is not a list") makeids <- length(idcols) && !isFALSE(idcols) if(makeids) id.names <- if(isTRUE(idcols)) ".id" else idcols[1L] keeprn <- !isFALSE(row.names) if(keeprn) row.names <- switch(row.names, `TRUE` = "row.names", row.names) idfac <- !isFALSE(id.factor) if(idfac) fcclass <- switch(id.factor, `TRUE` = c("factor", "na.included"), ordered = c("ordered", "factor", "na.included"), stop('id.factor needs to be FALSE, TRUE or "ordered"')) DATAclass <- if(DT) c("data.table", "data.frame") else "data.frame" DFDTl <- function(l) { attr(l, "row.names") <- .set_row_names(.Call(C_fnrow, l)) `oldClass<-`(l, DATAclass) } # idf <- function(x) if(inherits(x, "data.frame")) 2L else if (!length(x)) 1L else 3L*is.atomic(x) # was if(is.null(x)) 1L -> disregards empty list, bug reported # faster way ? : This is not faster: 2L*inherits(x, "data.frame") + is.null(x) + 3L*is.atomic(x) addrn <- function(x) if(any(attr(x, "names") == row.names)) x else c(`names<-`(list(attr(x, "row.names")), row.names), x) # faster way ? attol <- function(x) { # class(x) <- NULL # tables are also arrays, although only 1D, not because of the class but because they have a dimension attribute. if (length(d <- dim(x)) > 1L) { # is.array(x) # length could also be 0... not NULL if (length(d) > 2L) { # breaking down HDA dn <- dimnames(x) dim(x) <- c(d[1L], bprod(d[-1L])) if (length(dn)) { for (i in 2L:length(d)) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(x) <- list(dn[[1L]], interact_names(dn[-1L])) # Good ? } } if(keeprn) { dn <- dimnames(x) x <- `names<-`(c(list(if(is.null(dn[[1L]])) seq_len(d[1L]) else dn[[1L]]), .Call(Cpp_mctl, x, FALSE, 0L)), c(row.names, dn[[2L]])) } else x <- .Call(Cpp_mctl, x, TRUE, 0L) } else x <- as.vector(x, "list") if (is.null(names(x))) names(x) <- paste0("V", seq_along(x)) # it seems this is not yet working for all (i.e. model objects..), also perhaps not start at V1, depending on what other columsn there are.. i.e. start at the right position ? return(x) } ul2d <- function(y) { if(inherits(y, "data.frame") || is.atomic(y)) return(y) if(is.object(y)) oldClass(y) <- NULL # perhaps unclassing y would put more safety ? -> yes ! ident <- .Call(C_vtypes, y, 6L) # vapply(`attributes<-`(y, NULL), idf, 1L) # removes names ? if(is.list(y) && all(ident > 0L)) { if(any(at <- ident == 3L)) y[at] <- lapply(y[at], attol) if(keeprn && any(df <- ident == 2L)) y[df] <- lapply(y[df], addrn) # better cbind for data.table ? or x[["row.names"]] =.. and the sort later ? if(makeids) { if(idfac) { y <- y[ident != 1L] # better way ? y[ident!=1L] = NULL ? nam <- names(y) if(length(nam)) names(y) <- NULL else nam <- as.character(seq_along(y)) y <- DFDTl(.Call(C_rbindlist, y, TRUE, TRUE, id.names)) setattributes(.subset2(y, 1L), pairlist(levels = nam, class = fcclass)) return(y) } else return(DFDTl(.Call(C_rbindlist, y[ident != 1L], TRUE, TRUE, id.names))) } else return(DFDTl(.Call(C_rbindlist, y[ident != 1L], TRUE, TRUE, NULL))) } else lapply(y, ul2d) } l <- ul2d(l) if(recursive) { while(!inherits(l, "data.frame")) l <- ul2d(l) if(makeids) { nams <- attr(l, "names") ids <- whichv(nams, id.names) nid <- length(ids) if(nid > 1L) { nids <- seq_len(nid) attr(l, "names")[ids] <- if(length(idcols) == nid) idcols else paste(id.names, nids, sep = ".") if(keeprn) { rn <- whichv(nams, row.names) # with more id's, row.names are automatically generated from the sub-data.frames.. if(!all(ids == nids) || rn != nid + 1L) .Call(C_setcolorder, l, c(ids, rn, seq_along(nams)[-c(ids, rn)])) } else if (!all(ids == nids)) .Call(C_setcolorder, l, c(ids, seq_along(nams)[-ids])) } else if(keeprn) { # makes sure row.names comes after ids, even if only one id! rn <- whichv(nams, row.names) # length(rn) needed when only vectors... no row names column... if(length(rn) && rn != 2L) .Call(C_setcolorder, l, c(ids, rn, seq_along(nams)[-c(ids, rn)])) } } else if (keeprn) { nams <- attr(l, "names") rn <- whichv(nams, row.names) if(length(rn) && rn != 1L) .Call(C_setcolorder, l, c(rn, seq_along(nams)[-rn])) } if(DT) return(alc(l)) } # attr(l, ".internal.selfref") <- NULL l } collapse/R/RcppExports.R0000644000176200001440000001212615202626455014702 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 BWCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(`_collapse_BWCpp`, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(`_collapse_BWmCpp`, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(`_collapse_BWlCpp`, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } fbstatsCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable_algo = TRUE, array = TRUE, setn = TRUE, gn = NULL) { .Call(`_collapse_fbstatsCpp`, x, ext, ng, g, npg, pg, w, stable_algo, array, setn, gn) } fbstatsmCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable_algo = TRUE, array = TRUE, gn = NULL) { .Call(`_collapse_fbstatsmCpp`, x, ext, ng, g, npg, pg, w, stable_algo, array, gn) } fbstatslCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable_algo = TRUE, array = TRUE, gn = NULL) { .Call(`_collapse_fbstatslCpp`, x, ext, ng, g, npg, pg, w, stable_algo, array, gn) } fdiffgrowthCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(`_collapse_fdiffgrowthCpp`, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthmCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(`_collapse_fdiffgrowthmCpp`, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthlCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(`_collapse_fdiffgrowthlCpp`, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } flagleadCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(`_collapse_flagleadCpp`, x, n, fill, ng, g, t, names) } flagleadmCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(`_collapse_flagleadmCpp`, x, n, fill, ng, g, t, names) } flagleadlCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(`_collapse_flagleadlCpp`, x, n, fill, ng, g, t, names) } fscaleCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(`_collapse_fscaleCpp`, x, ng, g, w, narm, set_mean, set_sd) } fscalemCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(`_collapse_fscalemCpp`, x, ng, g, w, narm, set_mean, set_sd) } fscalelCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(`_collapse_fscalelCpp`, x, ng, g, w, narm, set_mean, set_sd) } fvarsdCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE) { .Call(`_collapse_fvarsdCpp`, x, ng, g, gs, w, narm, stable_algo, sd) } fvarsdmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(`_collapse_fvarsdmCpp`, x, ng, g, gs, w, narm, stable_algo, sd, drop) } fvarsdlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(`_collapse_fvarsdlCpp`, x, ng, g, gs, w, narm, stable_algo, sd, drop) } mrtl <- function(X, names = FALSE, ret = 0L) { .Call(`_collapse_mrtl`, X, names, ret) } mctl <- function(X, names = FALSE, ret = 0L) { .Call(`_collapse_mctl`, X, names, ret) } psmatCpp <- function(x, g, t = NULL, transpose = FALSE, fill = NULL) { .Call(`_collapse_psmatCpp`, x, g, t, transpose, fill) } pwnobsmCpp <- function(x) { .Call(`_collapse_pwnobsmCpp`, x) } qFCpp <- function(x, ordered = TRUE, na_exclude = TRUE, keep_attr = TRUE, ret = 1L) { .Call(`_collapse_qFCpp`, x, ordered, na_exclude, keep_attr, ret) } sortuniqueCpp <- function(x) { .Call(`_collapse_sortuniqueCpp`, x) } fdroplevelsCpp <- function(x, check_NA = TRUE) { .Call(`_collapse_fdroplevelsCpp`, x, check_NA) } seqid <- function(x, o = NULL, del = 1L, start = 1L, na_skip = FALSE, skip_seq = FALSE, check_o = TRUE) { .Call(`_collapse_seqid`, x, o, del, start, na_skip, skip_seq, check_o) } groupid <- function(x, o = NULL, start = 1L, na_skip = FALSE, check_o = TRUE) { .Call(`_collapse_groupid`, x, o, start, na_skip, check_o) } varyingCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE) { .Call(`_collapse_varyingCpp`, x, ng, g, any_group) } varyingmCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(`_collapse_varyingmCpp`, x, ng, g, any_group, drop) } varyinglCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(`_collapse_varyinglCpp`, x, ng, g, any_group, drop) } collapse/R/fsubset_ftransform_fmutate.R0000644000176200001440000011220315056572047020053 0ustar liggesusers fsubset <- function(.x, ...) UseMethod("fsubset") sbt <- fsubset # Also not really faster than default for numeric (but a bit faster for factors ...) fsubset.default <- function(.x, subset, ...) { # if(is.matrix(.x) && !inherits(.x, "matrix")) return(fsubset.matrix(.x, subset, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.logical(subset)) return(.Call(C_subsetVector, .x, which(subset), FALSE)) .Call(C_subsetVector, .x, subset, TRUE) } fsubset.matrix <- function(.x, subset, ..., drop = FALSE) { if(missing(...)) return(.x[subset, , drop = drop]) # better row subsetting ? (like df, method? use mctl ?) nl <- `names<-`(as.vector(1L:ncol(.x), "list"), dimnames(.x)[[2L]]) vars <- eval(substitute(c(...)), nl, parent.frame()) if(missing(subset)) return(.x[, vars, drop = drop]) .x[subset, vars, drop = drop] } fsubset.zoo <- function(.x, ...) if(is.matrix(.x)) fsubset.matrix(.x, ...) else fsubset.default(.x, ...) fsubset.units <- fsubset.zoo # No lazy eval ss <- function(x, i, j, check = TRUE) { if(is.atomic(x)) if(is.matrix(x)) return(if(missing(j)) x[i, , drop = FALSE] else if(missing(i)) x[, j, drop = FALSE] else x[i, j, drop = FALSE]) else return(x[i]) mj <- missing(j) if(mj) j <- seq_along(unclass(x)) else if(is.integer(j)) { # if(missing(i)) stop("Need to supply either i or j or both") if(missing(i)) return(.Call(C_subsetCols, x, j, TRUE)) if(check && any(j < 0L)) j <- seq_along(unclass(x))[j] } else { if(is.character(j)) { j <- ckmatch(j, attr(x, "names")) } else if(is.logical(j)) { if(check && length(j) != length(unclass(x))) stop("If j is logical, it needs to be of length ncol(x)") j <- which(j) } else if(is.numeric(j)) { j <- if(check && any(j < 0)) seq_along(unclass(x))[j] else as.integer(j) } else stop("j needs to be supplied integer indices, character column names, or a suitable logical vector") if(missing(i)) return(.Call(C_subsetCols, x, j, TRUE)) } if(!is.integer(i)) { if(is.numeric(i)) i <- as.integer(i) else if(is.logical(i)) { nr <- fnrow(x) if(check && length(i) != nr) stop("i needs to be integer or logical(nrow(x))") # which(r & !is.na(r)) not needed ! i <- which(i) if(length(i) == nr) return(if(mj) x else .Call(C_subsetCols, x, j, TRUE)) check <- FALSE } else stop("i needs to be integer or logical(nrow(x))") } rn <- attr(x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, i, j, check)) res <- .Call(C_subsetDT, x, i, j, check) attr(res, "row.names") <- .Call(C_subsetVector, rn, i, check) res } fsubset.data.frame <- function(.x, subset, ...) { r <- eval(substitute(subset), .x, parent.frame()) # Needs to be placed above any column renaming if(missing(...)) vars <- seq_along(unclass(.x)) else { ix <- seq_along(unclass(.x)) nl <- `names<-`(as.vector(ix, "list"), attr(.x, "names")) vars <- eval(substitute(c(...)), nl, parent.frame()) nam_vars <- names(vars) if(is.integer(vars)) { if(any(vars < 0L)) vars <- ix[vars] } else { if(is.character(vars)) vars <- ckmatch(vars, names(nl)) else if(is.numeric(vars)) { vars <- if(any(vars < 0)) ix[vars] else as.integer(vars) } else stop("... needs to be comma separated column names, or column indices") } if(length(nam_vars)) { nonmiss <- nzchar(nam_vars) attr(.x, "names")[vars[nonmiss]] <- nam_vars[nonmiss] } } checkrows <- TRUE if(is.logical(r)) { nr <- fnrow(.x) if(length(r) != nr) stop("subset needs to be an expression evaluating to logical(nrow(.x)) or integer") # which(r & !is.na(r)) not needed ! r <- which(r) if(length(r) == nr) if(missing(...)) return(.x) else return(.Call(C_subsetCols, .x, vars, TRUE)) checkrows <- FALSE } else if(is.numeric(r)) r <- as.integer(r) else stop("subset needs to be an expression evaluating to logical(nrow(.x)) or integer") rn <- attr(.x, "row.names") res <- .Call(C_subsetDT, .x, r, vars, checkrows) if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(res) attr(res, "row.names") <- .Call(C_subsetVector, rn, r, checkrows) res } fsubset.pseries <- function(.x, subset, ..., drop.index.levels = "id") { if(is.array(.x)) stop("fsubset does not support pseries matrices") if(!missing(...)) unused_arg_action(match.call(), ...) checkrows <- TRUE if(!is.integer(subset)) { if(is.numeric(subset)) subset <- as.integer(subset) else if(is.logical(subset)) { subset <- which(subset) if(length(subset) == length(.x)) return(.x) checkrows <- FALSE } else stop("subset needs to be integer or logical") } res <- .Call(C_subsetVector, .x, subset, checkrows) if(length(names(.x))) names(res) <- .Call(C_subsetVector, names(.x), subset, checkrows) index <- findex(.x) index_ss <- droplevels_index(.Call(C_subsetDT, index, subset, seq_along(unclass(index)), checkrows), drop.index.levels) attr(res, if(inherits(.x, "indexed_series")) "index_df" else "index") <- index_ss res } # Exact same code as .data.frame, just adding a block to deal with the index fsubset.pdata.frame <- function(.x, subset, ..., drop.index.levels = "id") { r <- eval(substitute(subset), .x, parent.frame()) # Needs to be placed above any column renaming if(missing(...)) vars <- seq_along(unclass(.x)) else { ix <- seq_along(unclass(.x)) nl <- `names<-`(as.vector(ix, "list"), attr(.x, "names")) vars <- eval(substitute(c(...)), nl, parent.frame()) nam_vars <- names(vars) if(is.integer(vars)) { if(any(vars < 0L)) vars <- ix[vars] } else { if(is.character(vars)) vars <- ckmatch(vars, names(nl)) else if(is.numeric(vars)) { vars <- if(any(vars < 0)) ix[vars] else as.integer(vars) } else stop("... needs to be comma separated column names, or column indices") } if(length(nam_vars)) { nonmiss <- nzchar(nam_vars) attr(.x, "names")[vars[nonmiss]] <- nam_vars[nonmiss] } } checkrows <- TRUE if(is.logical(r)) { nr <- fnrow(.x) if(length(r) != nr) stop("subset needs to be an expression evaluating to logical(nrow(.x)) or integer") # which(r & !is.na(r)) not needed ! r <- which(r) if(length(r) == nr) if(missing(...)) return(.x) else return(.Call(C_subsetCols, .x, vars, TRUE)) checkrows <- FALSE } else if(is.numeric(r)) r <- as.integer(r) else stop("subset needs to be an expression evaluating to logical(nrow(.x)) or integer") rn <- attr(.x, "row.names") res <- .Call(C_subsetDT, .x, r, vars, checkrows) if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) attr(res, "row.names") <- .Call(C_subsetVector, rn, r, checkrows) index <- findex(.x) index_ss <- droplevels_index(.Call(C_subsetDT, index, r, seq_along(unclass(index)), checkrows), drop.index.levels) if(inherits(.x, "indexed_frame")) return(reindex(res, index_ss)) attr(res, "index") <- index_ss res } fsubset.grouped_df <- function(.x, subset, ...) stop("fsubset() does not support grouped data: please subset your data before grouping it") # Example: # fsubset(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:SUM) ftransform_core <- function(X, value) { # value is unclassed, X has all attributes ax <- attributes(X) # keep like this ? oldClass(X) <- NULL nam <- names(value) if(!length(nam) || fanyDuplicated(nam)) stop("All replacement expressions have to be uniquely named") namX <- names(X) # !length also detects character(0) if(!length(namX) || fanyDuplicated(namX)) stop("All columns of .data have to be uniquely named") le <- vlengths(value, FALSE) nr <- .Call(C_fnrow, X) rl <- le == nr # checking if computed values have the right length inx <- match(nam, namX) # calling names on a plain list is really fast -> no need to save objects.. matched <- !is.na(inx) if(all(rl)) { # All computed vectors have the right length if(any(matched)) X[inx[matched]] <- value[matched] } else { # Some do not if(any(1L < le & !rl)) stop("Lengths of replacements must be equal to nrow(.data) or 1, or NULL to delete columns") if(any(le1 <- le == 1L)) value[le1] <- lapply(value[le1], alloc, nr) # Length 1 arguments. can use TRA ?, or rep_len, but what about date variables ? if(any(le0 <- le == 0L)) { # best order -> yes, ftransform(mtcars, bla = NULL) just returns mtcars, but could also put this error message: if(any(le0 & !matched)) stop(paste("Can only delete existing columns, unknown columns:", paste(nam[le0 & !matched], collapse = ", "))) if(all(le0)) { X[inx[le0]] <- NULL return(`oldClass<-`(X, ax[["class"]])) } matched <- matched[!le0] value <- value[!le0] # value[le0] <- NULL if(any(matched)) X[inx[!le0][matched]] <- value[matched] # index is wrong after first deleting, thus we delete after ! X[inx[le0]] <- NULL } else if(any(matched)) X[inx[matched]] <- value[matched] # NULL assignment ... -> Nope ! } if(all(matched)) return(`oldClass<-`(X, ax[["class"]])) ax[["names"]] <- c(names(X), names(value)[!matched]) setAttributes(c(X, value[!matched]), ax) } ftransform <- function(.data, ...) { # `_data` ? if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- eval(substitute(list(...)), .data, parent.frame()) if(is.null(names(e)) && length(e) == 1L && is.list(e[[1L]])) e <- unclass(e[[1L]]) # support list input -> added in v1.3.0 return(condalc(ftransform_core(.data, e), inherits(.data, "data.table"))) } tfm <- ftransform `ftransform<-` <- function(.data, value) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") if(!is.list(value)) stop("value needs to be a named list") return(condalc(ftransform_core(.data, unclass(value)), inherits(.data, "data.table"))) } `tfm<-` <- `ftransform<-` # Example: # ftransform(mtcars, cyl = cyl + 10, vs2 = 1, mpg = NULL) eval_exp <- function(nam, exp, pe) { nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) eval(exp, nl, pe) } ftransformv <- function(.data, vars, FUN, ..., apply = TRUE) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") if(!is.function(FUN)) stop("FUN needs to be a function") clx <- oldClass(.data) vs <- tryCatch(vars, error = function(e) NULL) if(apply) { oldClass(.data) <- NULL if(is.null(vs)) vs <- eval_exp(names(.data), substitute(vars), parent.frame()) vars <- cols2int(vs, .data, names(.data), FALSE) value <- `names<-`(.data[vars], NULL) value <- if(missing(...)) lapply(value, FUN) else eval(substitute(lapply(value, FUN, ...)), .data, parent.frame()) } else { nam <- attr(.data, "names") if(is.null(vs)) vs <- eval_exp(nam, substitute(vars), parent.frame()) vars <- cols2int(vs, .data, nam, FALSE) value <- .Call(C_subsetCols, .data, vars, FALSE) value <- if(missing(...)) unclass(FUN(value)) else # unclass needed here ? -> yes for lengths... unclass(eval(substitute(FUN(value, ...)), .data, parent.frame())) if(!identical(names(value), nam[vars])) return(condalc(ftransform_core(.data, value), any(clx == "data.table"))) oldClass(.data) <- NULL } le <- vlengths(value, FALSE) nr <- .Call(C_fnrow, .data) if(allv(le, nr)) .data[vars] <- value else if(allv(le, 1L)) .data[vars] <- lapply(value, alloc, nr) else { if(apply) names(value) <- names(.data)[vars] .data <- ftransform_core(.data, value) } return(condalc(`oldClass<-`(.data, clx), any(clx == "data.table"))) } tfmv <- ftransformv settransform <- function(.data, ...) { name <- as.character(substitute(.data)) if(length(name) != 1L || name == ".") stop("Cannot assign to name: ", deparse(substitute(.data))) res <- ftransform(.data, ...) assign(name, res, envir = parent.frame()) invisible(res) } # eval.parent(substitute(.data <- get0("ftransform", envir = getNamespace("collapse"))(.data, ...))) # can use `<-`(.data, ftransform(.data,...)) but not faster .. settfm <- settransform settransformv <- function(.data, ...) { name <- as.character(substitute(.data)) if(length(name) != 1L || name == ".") stop("Cannot assign to name: ", deparse(substitute(.data))) res <- ftransformv(.data, ...) assign(name, res, envir = parent.frame()) invisible(res) } # eval.parent(substitute(.data <- get0("ftransformv", envir = getNamespace("collapse"))(.data, vars, FUN, ..., apply = apply))) settfmv <- settransformv fcompute_core <- function(.data, e, keep = NULL) { ax <- attributes(.data) nam <- ax[["names"]] if(!length(nam) || fanyDuplicated(nam)) stop("All columns of .data have to be uniquely named") if(length(keep)) { keep <- cols2int(keep, .data, nam, FALSE) if(any(m <- match(names(e), nam[keep], nomatch = 0L))) { temp <- .subset(.data, keep) pos <- m > 0L temp[m[pos]] <- e[pos] e <- c(temp, e[!pos]) } else e <- c(.subset(.data, keep), e) } if(inherits(.data, "sf") && !any(names(e) == attr(.data, "sf_column"))) e <- c(e, .subset(.data, attr(.data, "sf_column"))) ax[["names"]] <- names(e) le <- vlengths(e, FALSE) nr <- fnrow(.data) rl <- le == nr if(all(rl)) return(condalcSA(e, ax, inherits(.data, "data.table"))) # All computed vectors have the right length if(any(1L < le & !rl)) stop("Lengths of replacements must be equal to nrow(.data) or 1") e[!rl] <- lapply(e[!rl], alloc, nr) return(condalcSA(e, ax, inherits(.data, "data.table"))) } fcompute <- function(.data, ..., keep = NULL) { # within ? if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- eval(substitute(list(...)), .data, parent.frame()) if(is.null(names(e)) && length(e) == 1L && is.list(e[[1L]])) e <- unclass(e[[1L]]) # support list input -> added in v1.3.0 return(fcompute_core(.data, e, keep)) } fcomputev <- function(.data, vars, FUN, ..., apply = TRUE, keep = NULL) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") if(!is.function(FUN)) stop("FUN needs to be a function") vs <- tryCatch(vars, error = function(e) NULL) nam <- attr(.data, "names") if(is.null(vs)) vs <- eval_exp(nam, substitute(vars), parent.frame()) vars <- cols2int(vs, .data, nam, FALSE) if(apply) { value <- `names<-`(.subset(.data, vars), NULL) value <- if(missing(...)) lapply(value, FUN) else eval(substitute(lapply(value, FUN, ...)), .data, parent.frame()) names(value) <- nam[vars] } else { value <- .Call(C_subsetCols, .data, vars, FALSE) value <- if(missing(...)) unclass(FUN(value)) else # unclass needed here ? -> yes for lengths... unclass(eval(substitute(FUN(value, ...)), .data, parent.frame())) } return(fcompute_core(.data, value, keep)) # Note: Need to do this, value could be scalars or vectors } # fmutate fFUN_mutate_add_groups <- function(z) { if(!is.call(z)) return(z) cz <- as.character(z[[1L]]) if(length(cz) > 1L) cz <- if(any(cz == "collapse")) cz[length(cz)] else "" # needed if collapse::fmean etc.. if(any(cz == .FAST_FUN_MOPS)) { z$g <- quote(.g_) if(any(cz == .FAST_STAT_FUN_POLD) && is.null(z$TRA)) z$TRA <- 1L # if(is.null(z$TRA)) z$TRA <- 1L # z$use.g.names <- FALSE # Not necessary } # This works for nested calls (nothing more required, but need to put at the end..) if(length(z) > 2L || is.call(z[[2L]])) return(as.call(lapply(z, fFUN_mutate_add_groups))) # Need because: mpg - fmean(mpg) z } gsplit_single_apply <- function(x, g, ex, v, encl, unl = TRUE) { funexpr <- quote(function(.x_yz_) .x_yz_) funexpr[[3]] <- eval(call("substitute", ex, `names<-`(list(quote(.x_yz_)), v)), NULL, NULL) funexpr[[4]] <- NULL fun <- eval(funexpr, encl, baseenv()) res <- lapply(gsplit(x, g), fun) if(unl) copyMostAttributes(funlist(res), x) else res } # Old version: more expensive... # gsplit_single_apply <- function(x, g, ex, v, encl) # copyMostAttributes(funlist(lapply(gsplit(x, g), function(i) eval(ex, `names<-`(list(i), v), encl))), x) gsplit_multi_apply <- function(x, g, ex, encl, SD = FALSE) { sx <- seq_along(x) gs <- gsplit(NULL, g) if(!SD) return(lapply(gs, function(i) eval(ex, .Call(C_subsetDT, x, i, sx, FALSE), encl))) funexpr <- substitute(function(.data) expr, list(expr = ex)) funexpr[[4]] <- NULL fun <- eval(funexpr, encl, baseenv()) lapply(gs, function(i) fun(.Call(C_subsetDT, x, i, sx, FALSE))) } othFUN_compute <- function(x) { if(length(x) == 2L) # No additional function arguments return(substitute(lapply(.gsplit_(a, .g_), b), list(a = x[[2L]], b = x[[1L]]))) # With more arguments, things become more complex.. as.call(c(list(quote(lapply), substitute(.gsplit_(a, .g_), list(a = x[[2L]]))), as.list(x[-2L]))) } keep_v <- function(d, v) copyMostAttributes(null_rm(.subset(d, unique.default(v))), d) acr_get_cols <- function(.cols, d, nam, ce) { # Note: .cols is passed through substitute() before it enters here. Thus only an explicit NULL is NULL up front if(is.null(.cols)) return(if(is.null(d[[".g_"]])) seq_along(nam) else seq_along(nam)[nam %!in% c(".g_", ".gsplit_", d[[".g_"]]$group.vars)]) nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) cols <- eval(.cols, nl, ce) # Needed for programming usage, because you can pass a variable that is null if(is.null(cols)) return(if(is.null(d[[".g_"]])) seq_along(nam) else seq_along(nam)[nam %!in% c(".g_", ".gsplit_", d[[".g_"]]$group.vars)]) if(is.logical(cols)) return(which(cols)) # if .g_ etc. is added to data, length check for logical vectors will fail if(is.null(d[[".g_"]]) || is.character(cols) || (is.numeric(cols) && cols[1L] > 0)) return(cols2int(cols, d, nam)) cols2intrmgn(match(c(".g_", ".gsplit_", d[[".g_"]]$group.vars), nam), cols, d) } # Also used in collap() acr_get_funs <- function(.fnsexp, .fns, ...) { if(is.function(.fns)) { namfun <- l1orlst(as.character(.fnsexp)) .fns <- `names<-`(list(.fns), namfun) } else if(is.list(.fns)) { namfun <- names(.fns) # In programming usage, could simply pass a list of functions l, in which case this is not a call.. if(is.call(.fnsexp) && (.fnsexp[[1L]] == quote(list) || .fnsexp[[1L]] == quote(c))) { # or we could have funlist[[i]] which is also sorted out here.. nf <- all.vars(.fnsexp, unique = FALSE) if(length(nf) == length(.fns)) { names(.fns) <- nf if(is.null(namfun)) namfun <- nf } else { nf <- vapply(.fnsexp[-1L], function(x) l1orlst(all.vars(x)), "", USE.NAMES = FALSE) names(.fns) <- nf if(is.null(namfun)) namfun <- as.character(seq_along(.fns)) } } else if(is.null(namfun)) names(.fns) <- namfun <- as.character(seq_along(.fns)) } else if(is.character(.fns)) { namfun <- names(.fns) names(.fns) <- .fns .fns <- lapply(.fns, ...) # lapply(.fns, match.fun()) if(is.null(namfun)) namfun <- names(.fns) } else stop(".fns must be a function, list of functions or character vector of function names") return(list(namfun = namfun, funs = .fns)) } fungroup2 <- function(X, ocl) { attr(X, "groups") <- NULL oldClass(X) <- fsetdiff(ocl, c("GRP_df", "grouped_df")) X } setup_across <- function(.cols, .fnsexp, .fns, .names, .apply, .transpose, .FFUN) { pe <- parent.frame(n = 4L) d <- unclass(pe$.data) # Safer to unclass here also... ce <- parent.frame(n = 5L) # Caller environment # return(list(.cols, .fns, .names, d)) nam <- names(d) cols <- acr_get_cols(.cols, d, nam, ce) funs <- acr_get_funs(.fnsexp, .fns, get, mode = "function", envir = ce) namfun <- funs$namfun fun <- funs$funs if(length(.names) && !is.logical(.names)) { if(is.function(.names)) { names <- if(isFALSE(.transpose)) # .names(nam[cols], namfun) as.vector(outer(nam[cols], namfun, .names)) else as.vector(t(outer(nam[cols], namfun, .names))) } else { if(length(.names) == 1L && .names == "flip") { names <- if(isFALSE(.transpose)) as.vector(outer(nam[cols], namfun, function(z, f) paste(f, z, sep = "_"))) else as.vector(t(outer(nam[cols], namfun, function(z, f) paste(f, z, sep = "_")))) } else { if(length(.names) != length(namfun) * length(cols)) stop("length(.names) must match length(.fns) * length(.cols)") names <- .names } } } else { # Third version: .names = FALSE does nothing. Allows fmutate(mtcars, across(cyl:vs, list(L, D, G), n = 1:3)) # This makes sense, because if .transpose = "auto" and the lengths of generated columns are unequal, you cannot use generated names anyway because they would mismatch.. names <- if((is.null(.names) && length(namfun) == 1L) || (isFALSE(.names) && length(namfun) > 1L)) NULL else if(isFALSE(.names)) # this allows you to force names false for a single function... nam[cols] else if(isFALSE(.transpose)) as.vector(outer(nam[cols], namfun, paste, sep = "_")) else as.vector(t(outer(nam[cols], namfun, paste, sep = "_"))) # Second version: .names = TRUE auto generates names, .names = FALSE yields default names (no change to names by the function), # and .names = NULL (default) yields function names or auto names if multiple functions... # names <- if(is.null(.names) && length(namfun) == 1L) NULL else if(!isFALSE(.names)) # as.vector(t(outer(nam[cols], namfun, paste, sep = "_"))) else if(length(namfun) == 1L) # nam[cols] else stop("Computed columns need to be uniquely named. If .names = FALSE, can only use one function, or need to supply custom names!") # First version: requires .names = FALSE for renaming functions like L, W etc... # names <- if(isFALSE(.names)) NULL else # if(length(namfun) == 1L && !isTRUE(.names)) nam[cols] else # as.vector(t(outer(nam[cols], namfun, paste, sep = "_"))) } if(is.logical(.apply)) { aplvec <- if(.apply) rep_len(TRUE, length(fun)) else rep_len(FALSE, length(fun)) } else { .apply <- switch(.apply, auto = NA, stop(".apply must be 'auto', TRUE or FALSE")) aplvec <- names(fun) %!in% .FFUN } .data_ <- if(all(aplvec)) d[cols] else .Call(C_subsetCols, if(is.null(d[[".g_"]])) `oldClass<-`(d, pe$cld) else fungroup2(d, pe$cld), cols, FALSE) # Note: Keep the order and the names !!! list(data = d, .data_ = .data_, # cols = cols, funs = fun, aplvec = aplvec, ce = ce, names = names) } across <- function(.cols = NULL, .fns, ..., .names = NULL, .apply = "auto", .transpose = "auto") { stop("across() can only work inside fmutate() and fsummarise()") } do_across <- function(.cols = NULL, .fns, ..., .names = NULL, .apply = "auto", .transpose = "auto", .eval_funi, .summ = TRUE) { # nodots <- missing(...) # return(setup_across(substitute(.cols), substitute(.fns), .fns, .names, .apply, .FAST_FUN_MOPS)) setup <- setup_across(substitute(.cols), substitute(.fns), .fns, .names, .apply, .transpose, .FAST_FUN_MOPS) seqf <- seq_along(setup$funs) names <- setup$names # return(eval_funi(seqf, ...)) # return(lapply(seqf, eval_funi, ...)) if(length(seqf) == 1L) { res <- .eval_funi(seqf, setup[[1L]], setup[[2L]], setup[[3L]], setup[[4L]], setup[[5L]], ...) # eval_funi(seqf, aplvec, funs, nodots, .data_, data, ce, ...) # return(res) } else { # motivated by: fmutate(mtcars, across(cyl:vs, list(L, D, G), n = 1:3)) r <- lapply(seqf, .eval_funi, setup[[1L]], setup[[2L]], setup[[3L]], setup[[4L]], setup[[5L]], ...) # do.call(lapply, c(list(seqf, eval_funi), setup[1:5], list(...))) # lapply(seqf, eval_funi, aplvec, funs, nodots, .data_, data, ce, ...) # return(r) if(isFALSE(.transpose) || (is.character(.transpose) && !all_eq(vlengths(r, FALSE)))) { # stop("reached here") res <- unlist(r, recursive = FALSE, use.names = TRUE) # need use.names= TRUE here # return(list(res = res, r = r)) } else { res <- funlist(t_list2(r)) if(is.null(names(res)) && is.null(names)) names(res) <- funlist(t_list2(lapply(r, names))) } } if(.summ) return(if(is.null(names)) res else `names<-`(res, names)) return(`[<-`(setup$data, if(is.null(names)) names(res) else names, value = res)) } mutate_funi_simple <- function(i, data, .data_, funs, aplvec, ce, ...) { # g is unused here... .FUN_ <- funs[[i]] nami <- names(funs)[i] if(aplvec[i]) { value <- if(missing(...)) lapply(unattrib(.data_), .FUN_) else do.call(lapply, c(list(unattrib(.data_), .FUN_), eval(substitute(list(...)), data, ce)), envir = ce) # eval(substitute(lapply(unattrib(.data_), .FUN_, ...)), c(list(.data_ = .data_), data), ce) names(value) <- names(.data_) } else if(any(nami == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, TRA = 1L))) # Old way: Not necessary to construct call.. return(unclass(eval(as.call(list(as.name(nami), quote(.data_), TRA = 1L))))) # faster than substitute(.FUN_(.data_, TRA = 1L), list(.FUN_ = as.name(nami))) # if(any(...names() == "TRA")) # This down not work because it substitutes setup[[]] from mutate_across !!! # return(unclass(eval(substitute(.FUN_(.data_, ...)), c(list(.data_ = .data_), data), ce))) # return(unclass(eval(substitute(.FUN_(.data_, ..., TRA = 1L)), c(list(.data_ = .data_), data), ce))) fcal <- as.call(c(list(quote(.FUN_), quote(.data_)), as.list(substitute(list(...))[-1L]))) if(is.null(fcal$TRA)) fcal$TRA <- 1L return(unclass(eval(fcal, c(list(.data_ = .data_, .FUN_ = .FUN_), data), ce))) } else { value <- if(missing(...)) .FUN_(.data_) else do.call(.FUN_, c(list(.data_), eval(substitute(list(...)), data, ce)), envir = ce) # Object setup not found: eval(substitute(.FUN_(.data_, ...)), c(list(.data_ = .data_), data), ce) oldClass(value) <- NULL if(any(nami == .FAST_FUN_MOPS)) return(value) # small improvement for fast funs... } # return(unclass(r)) # fcal <- if(missing(...)) as.call(list(funs[[nami]], quote(.data_))) else # as.call(c(list(funs[[nami]], quote(.data_)), as.list(substitute(list(...))[-1L]))) # , parent.frame() # # substitute(list(...), parent.frame()) # # substitute(FUN(.data_, ...), list(FUN = funs[[nami]], ...)) # # as.call(substitute(list(funs[[nami]], quote(.data_), ...))) # # substitute(FUN(.data_, ...), list(FUN = funs[[nami]])) # # if(any(nami == .FAST_STAT_FUN_POLD) && is.null(fcal$TRA)) fcal$TRA <- 1L # fast functions have a data.frame method, thus can be applied simultaneously to all columns # return(fcal) # return(eval(fcal, c(list(.data_ = .data_), data), setup$ce)) lv <- vlengths(value, FALSE) nr <- .Call(C_fnrow, data) if(allv(lv, nr)) return(value) if(allv(lv, 1L)) return(lapply(value, alloc, nr)) stop("Without groups, NROW(value) must either be 1 or nrow(.data)") } dots_apply_grouped <- function(d, g, f, dots) { attributes(d) <- NULL n <- length(d[[1L]]) # Arguments same length as data if(length(ln <- whichv(vlengths(dots, FALSE), n))) { asl <- lapply(dots[ln], gsplit, g) if(length(dots) > length(ln)) { mord <- dots[-ln] if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.") FUN <- function(x) .mapply(f, c(list(gsplit(x, g)), asl), mord) # do.call(mapply, c(list(f, gsplit(x, g), SIMPLIFY = FALSE, USE.NAMES = FALSE, MoreArgs = mord), asl)) } else FUN <- function(x) .mapply(f, c(list(gsplit(x, g)), asl), NULL) # do.call(mapply, c(list(f, gsplit(x, g), SIMPLIFY = FALSE, USE.NAMES = FALSE), asl)) return(lapply(d, function(y) copyMostAttributes(funlist(FUN(y)), y))) } # No arguments to be split do.call(lapply, c(list(d, copysplaplfun, g, f), dots)) } dots_apply_grouped_bulk <- function(d, g, f, dots) { n <- fnrow(d) dsp <- rsplit.data.frame(d, g, simplify = FALSE, flatten = TRUE, use.names = FALSE) if(is.null(dots)) return(lapply(dsp, f)) # Arguments withs ame length as data if(length(ln <- whichv(vlengths(dots, FALSE), n))) { asl <- lapply(dots[ln], gsplit, g) if(length(dots) > length(ln)) { mord <- dots[-ln] if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.") } else mord <- NULL return(.mapply(f, c(list(dsp), asl), mord)) } # No arguments to be split do.call(lapply, c(list(dsp, f), dots)) } mutate_grouped_expand <- function(value, g) { lv <- vlengths(value, FALSE) nr <- length(g[[2L]]) if(allv(lv, nr)) { if(!isTRUE(g$ordered[2L])) { if(length(value) < 4L) { # optimal? value <- lapply(value, function(x, g) .Call(C_greorder, x, g), g) } else { ind <- .Call(C_greorder, seq_len(nr), g) value <- .Call(C_subsetDT, value, ind, seq_along(value), FALSE) } } return(value) } if(!allv(lv, g[[1L]])) stop("With groups, NROW(value) must either be ng or nrow(.data)") return(.Call(C_subsetDT, value, g[[2L]], seq_along(value), FALSE)) } mutate_funi_grouped <- function(i, data, .data_, funs, aplvec, ce, ...) { g <- data[[".g_"]] .FUN_ <- funs[[i]] nami <- names(funs)[i] apli <- aplvec[i] if(apli) { value <- if(missing(...)) lapply(unattrib(.data_), copysplaplfun, g, .FUN_) else dots_apply_grouped(.data_, g, .FUN_, eval(substitute(list(...)), data, ce)) # Before: do.call(lapply, c(list(unattrib(.data_), copysplaplfun, g, .FUN_), eval(substitute(list(...)), data, ce)), envir = ce) } else if(any(nami == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, g = g, TRA = 1L))) fcal <- as.call(c(list(quote(.FUN_), quote(.data_), g = quote(.g_)), as.list(substitute(list(...))[-1L]))) if(is.null(fcal$TRA)) fcal$TRA <- 1L return(unclass(eval(fcal, c(list(.data_ = .data_, .FUN_ = .FUN_), data), ce))) } else if(any(nami == .FAST_FUN_MOPS)) { if(any(nami == .OPERATOR_FUN)) { value <- if(missing(...)) .FUN_(.data_, by = g) else do.call(.FUN_, c(list(.data_, by = g), eval(substitute(list(...)), data, ce)), envir = ce) } else { value <- if(missing(...)) .FUN_(.data_, g = g) else do.call(.FUN_, c(list(.data_, g = g), eval(substitute(list(...)), data, ce)), envir = ce) } oldClass(value) <- NULL return(value) } else { # stop("In grouped computations, .apply = FALSE only works with .FAST_FUN and .OPERATOR_FUN") value <- dots_apply_grouped_bulk(.data_, g, .FUN_, if(missing(...)) NULL else eval(substitute(list(...)), data, ce)) value <- .Call(C_rbindlist, unclass(value), FALSE, FALSE, NULL) oldClass(value) <- NULL } if(apli) names(value) <- names(.data_) return(mutate_grouped_expand(value, g)) } do_grouped_expr <- function(ei, nfun, .data, g, pe) { v <- all.vars(ei) # unique = FALSE -> not needed anymore... can turn expressions into functions... if(length(v) > 1L) { # Could include global environmental variables e.g. fmutate(data, new = mean(var) + q) namd <- names(.data) if(length(wv <- na_rm(match(v, namd))) > 1L) return(funlist(gsplit_multi_apply(.data[wv], g, ei, pe))) return(gsplit_single_apply(.data[[wv]], g, ei, namd[wv], pe)) } if(nfun == 1L) { res <- eval(othFUN_compute(ei), .data, pe) return(copyMostAttributes(funlist(res), .data[[v]])) } gsplit_single_apply(.data[[v]], g, ei, v, pe) } # Same as above, without unlisting... do_grouped_expr_list <- function(ei, .data, g, pe, .cols, ax, mutate = FALSE) { v <- all.vars(ei) if(any(v == ".data")) { .data[names(.data) %in% c(".g_", ".gsplit_", if(is.null(.cols)) g$group.vars)] <- NULL if(is.character(ax)) { # for fmutate cld <- ax ax <- attributes(.data) ax[["groups"]] <- NULL # ax[["names"]] <- fsetdiff(ax[["names"]], c(".g_", ".gsplit_")) # Redundant, removed above... ax[["class"]] <- fsetdiff(cld, c("GRP_df", "grouped_df")) } if(length(.cols)) .data <- colsubset(.data, .cols) ax[["names"]] <- names(.data) setattributes(.data, ax) res <- gsplit_multi_apply(.data, g, ei, pe, TRUE) } else if(length(v) > 1L) { namd <- names(.data) res <- if(length(wv <- na_rm(match(v, namd))) > 1L) gsplit_multi_apply(.data[wv], g, ei, pe) else gsplit_single_apply(.data[[wv]], g, ei, namd[wv], pe, FALSE) } else { res <- if(length(all_funs(ei)) == 1L) eval(othFUN_compute(ei), .data, pe) else gsplit_single_apply(.data[[v]], g, ei, v, pe, FALSE) } res <- .Call(C_rbindlist, res, FALSE, FALSE, NULL) if(mutate) return(mutate_grouped_expand(res, g)) res } fmutate <- function(.data, ..., .keep = "all", .cols = NULL) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- substitute(list(...)) nam <- names(e) nullnam <- is.null(nam) # if(!length(nam)) stop("All replacement expressions have to be named") pe <- parent.frame() cld <- oldClass(.data) # This needs to be called cld, because across fetches it from here !! oldClass(.data) <- NULL nr <- .Call(C_fnrow, .data) namdata <- names(.data) if(is.null(namdata) || fanyDuplicated(namdata)) stop("All columns of .data have to be uniquely named") if(!is.character(.keep)) .keep <- cols2char(.keep, .data, namdata) # allowing .keep to be NULL gdfl <- any(cld == "grouped_df") if(gdfl) { g <- GRP.grouped_df(.data, return.groups = FALSE, call = FALSE) .data[c(".g_", ".gsplit_")] <- list(g, gsplit) for(i in 2:length(e)) { ei <- e[[i]] if(nullnam || nam[i] == "") { # Across if(ei[[1L]] == quote(across) || ei[[1L]] == quote(acr)) { ei[[1L]] <- quote(do_across) ei$.eval_funi <- quote(mutate_funi_grouped) ei$.summ <- FALSE # return(eval(ei, enclos = pe)) .data <- eval(ei, list(do_across = do_across, mutate_funi_grouped = mutate_funi_grouped), pe) # ftransform_core(.data, eval(ei, pe)) } else { r <- do_grouped_expr_list(ei, .data, g, pe, .cols, cld, TRUE) .data[names(r)] <- r } } else { # Tagged vector expressions if(is.null(ei)) { .data[[nam[i]]] <- NULL next } eif <- all_funs(ei) if(any(eif %in% .FAST_FUN_MOPS)) { .data[[nam[i]]] <- eval(fFUN_mutate_add_groups(ei), .data, pe) } else if(length(eif)) { r <- do_grouped_expr(ei, length(eif), .data, g, pe) .data[[nam[i]]] <- if(length(r) == g[[1L]]) .Call(C_subsetVector, r, g[[2L]], FALSE) else # .Call(C_TRA, .data[[v]], r, g[[2L]], 1L) # Faster than simple subset r[g[[2L]] ??] .Call(C_greorder, r, g) # r[forder.int(forder.int(g[[2L]]))] # Seems twice is necessary... } else { # something like bla = 1 or mpg = vs r <- eval(ei, .data, pe) if(length(r) == 1L) r <- alloc(r, nr) else if(length(r) != nr) stop("length mismatch") .data[[nam[i]]] <- r } } } .data[c(".g_", ".gsplit_")] <- NULL } else { # Without groups... for(i in 2:length(e)) { # This is good and very fast ei <- e[[i]] if(nullnam || nam[i] == "") { # Across if(ei[[1L]] == quote(across) || ei[[1L]] == quote(acr)) { # stop("expressions need to be named or start with across(), or its shorthand acr().") ei[[1L]] <- quote(do_across) ei$.eval_funi <- quote(mutate_funi_simple) ei$.summ <- FALSE # return(eval(ei, enclos = pe)) .data <- eval(ei, list(do_across = do_across, mutate_funi_simple = mutate_funi_simple), pe) # ftransform_core(.data, eval(ei, enclos = pe)) } else { r <- eval(ei, .data, pe) .data[names(r)] <- r } } else { # Tagged vector expressions r <- eval(ei, .data, pe) if(!is.null(r)) { # don't use length(), because only NULL removes list elements... if(length(r) == 1L) r <- alloc(r, nr) else if(length(r) != nr) stop("length mismatch") } .data[[nam[i]]] <- r } } } # Implementing .keep argument # TODO: Implement .keep with across... .data <- if(length(.keep) > 1L) keep_v(.data, c(.keep, nam[-1L])) else switch(.keep, all = .data, used = keep_v(.data, c(namdata[namdata %in% c(if(gdfl) g$group.vars, funlist(lapply(e[-1L], all.vars)), nam[-1L])], nam[-1L])), unused = keep_v(.data, c(namdata[namdata %in% c(if(gdfl) g$group.vars, fsetdiff(namdata, funlist(lapply(e[-1L], all.vars))), nam[-1L])], nam[-1L])), none = keep_v(.data, c(if(gdfl) g$group.vars, nam[-1L])), # g$group.vars[g$group.vars %!in% nam[-1L]] -> inconsistent and inefficient... keep_v(.data, c(.keep, nam[-1L]))) oldClass(.data) <- cld return(condalc(.data, any(cld == "data.table"))) } # or mut / mte? () If you need o choose a vowel, u is more distinctive, lut for consistency let's stock with consonants mtt <- fmutate # Note: see if function(.data, ...) fmutate(.data, ...) is possible (what about objects in global environment?) collapse/R/fvar_fsd.R0000644000176200001440000003420314777170130014203 0ustar liggesusers # TODO: w.type - Implement reliability weights? # Note: for principal innovations of this code see fsum.R fsd <- function(x, ...) UseMethod("fsd") # , x fsd.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, stable.algo = .op[["stable.algo"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fsd.matrix(x, g, w, TRA, na.rm, use.g.names, stable.algo = stable.algo, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fvarsd,x,length(lev),g,NULL,w,na.rm,stable.algo,TRUE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsd,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,TRUE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsd,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,TRUE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE), GRPnames(g))) return(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE)) } if(is.null(g)) return(TRAC(x,.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE),g[[2L]],TRA, ...) } fsd.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fvarsdm,x,length(lev),g,NULL,w,na.rm,stable.algo,TRUE,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdm,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdm,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TRA, ...) } fsd.zoo <- function(x, ...) if(is.matrix(x)) fsd.matrix(x, ...) else fsd.default(x, ...) fsd.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fsd.matrix(x, ...), x) else fsd.default(x, ...) fsd.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fvarsdl,x,length(lev),g,NULL,w,na.rm,stable.algo,TRUE,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdl,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdl,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,TRUE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE), groups)) return(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)) } if(is.null(g)) return(TRAlC(x,.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,TRUE,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TRA, ...) } fsd.list <- function(x, ...) fsd.data.frame(x, ...) fsd.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], stable.algo = .op[["stable.algo"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,TRUE,FALSE),g[[2L]],TRA, ...)) } fvar <- function(x, ...) UseMethod("fvar") # , x fvar.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, stable.algo = .op[["stable.algo"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fvar.matrix(x, g, w, TRA, na.rm, use.g.names, stable.algo = stable.algo, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_fvarsd,x,length(lev),g,NULL,w,na.rm,stable.algo,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsd,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsd,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE), GRPnames(g))) return(.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE)) } if(is.null(g)) return(TRAC(x,.Call(Cpp_fvarsd,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(Cpp_fvarsd,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE),g[[2L]],TRA, ...) } fvar.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_fvarsdm,x,length(lev),g,NULL,w,na.rm,stable.algo,FALSE,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdm,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdm,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(Cpp_fvarsdm,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(Cpp_fvarsdm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TRA, ...) } fvar.zoo <- function(x, ...) if(is.matrix(x)) fvar.matrix(x, ...) else fvar.default(x, ...) fvar.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fvar.matrix(x, ...), x) else fvar.default(x, ...) fvar.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(Cpp_fvarsdl,x,length(lev),g,NULL,w,na.rm,stable.algo,FALSE,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_fvarsdl,x,fnlevels(g),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(Cpp_fvarsdl,x,attr(g,"N.groups"),g,NULL,w,na.rm,stable.algo,FALSE,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE), groups)) return(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)) } if(is.null(g)) return(TRAlC(x,.Call(Cpp_fvarsdl,x,0L,0L,NULL,w,na.rm,stable.algo,FALSE,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TRA, ...) } fvar.list <- function(x, ...) fvar.data.frame(x, ...) fvar.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], stable.algo = .op[["stable.algo"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(Cpp_fvarsdl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(Cpp_fvarsdl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,stable.algo,FALSE,FALSE),g[[2L]],TRA, ...)) } collapse/R/indexing.R0000644000176200001440000005413314777170130014222 0ustar liggesusers# note: plyr has a function and class indexed_df... # getpix <- function(x) switch(typeof(x), externalptr = .Call(C_geteptr, x), x) findex <- function(x) { idx <- attr(x, "index_df") if(is.null(idx)) idx <- attr(x, "index") if(is.list(idx)) return(idx) .Call(C_geteptr, idx) } ix <- findex # TODO use attr(ids, "optim_time") ? -> think about what is the smartest way to implement this. Also think in the long-term # how further optimization (e.g. ordering vector) will take effect... # also what about sorted data ?? If regular panel should be able to optimize... i.e. compute without time index. to_plm <- function(x, row.names = FALSE) { index <- unclass(findex(x)) if(is.null(index)) stop("Missing index!") if(length(index) < 2L) stop("plm compatible index must have at least 2 factors") if(length(index) > 2L) index <- c(list(id = finteraction(index[-length(index)], sort = FALSE)), index[length(index)]) oldClass(index[[1L]]) <- "factor" oldClass(index[[2L]]) <- "factor" attr(index, "row.names") <- .set_row_names(length(index[[1L]])) oldClass(index) <- c("pindex", "data.frame") if(is.list(x) && inherits(x, "indexed_frame")) { res <- qDF(unindex(x), class = c("pdata.frame", "data.frame")) attr(res, "index") <- index if(row.names) attr(res, "row.names") <- do.call(paste, c(index, list(sep = "-"))) } else if(inherits(x, "indexed_series")) { res <- unindex(x) attr(res, "index") <- index oldClass(res) <- c("pseries", class(res)) if(row.names) names(res) <- do.call(paste, c(index, list(sep = "-"))) } else stop("x must be 'indexed_frame' or 'indexed_series'") return(res) } # # fixest: # time = unclass(wlddev$date) # time_full = fixest:::quickUnclassFactor(time, addItem = TRUE, sorted = TRUE) # time_unik = time_full$items # all_steps = unique(diff(time_unik)) # my_step = fixest:::cpp_pgcd(all_steps) # # we rescale time_unik # time_unik_new = (time_unik - min(time_unik)) / my_step # time = time_unik_new[time_full$x] # TODO: also think of fixest's quf, checking if double is integer, break out of loop if fail... timeid <- function(x, factor = FALSE, ordered = factor, extra = FALSE) { id <- .Call(C_group, x, TRUE, FALSE) # starts = TRUE, group.sizes = FALSE unik <- Csv(x, attr(id, "starts")) attributes(unik) <- NULL if(!is.numeric(unik)) stop("x needs to be numeric, otherwise use qF() or qG() instead of timeid()") is_dbl <- is.double(unik) o <- forder.int(unik, na.last = TRUE) ng <- length(o) unik_o <- if(attr(o, "sorted") && !extra) unik else Csv(unik, o) # !extra because of math by reference... if(is.na(unik_o[ng])) stop("Time variable may not contain missing values") r <- c(unik_o[1L], unik_o[ng]) steps <- unik_o[-1L] %-=% unik_o[-ng] # tsibble uses abs(diff(unik_o)), but here we sort the values, so not necessary # if(is_dbl) steps <- round(steps, digits = 6) # This is pretty costly for long POSIXct sequences. Better not do it.. gcd <- .Call(C_vecgcd, .Call(Cpp_sortunique, steps)) if(is_dbl) { if(r[1L] != 1 || gcd != 1) unik %-=% (r[1L] - 1.4*gcd) # * 1.4 to make sure the as.integer conversion does proper rounding if(gcd != 1) unik %/=% gcd unik <- as.integer(unik) } else { if(r[1L] != 1L || gcd != 1L) unik %-=% (r[1L] - gcd) if(gcd != 1L) unik %/=% gcd } tid <- if(length(id) == ng) unik else Csv(unik, id) if(factor) { levnum <- if(is_dbl) seq.default(r[1L], r[2L]+0.4*gcd, gcd) else if(gcd == 1L) r[1L]:r[2L] else seq.int(r[1L], r[2L], gcd) if(is.object(x)) levnum <- copyMostAttrib(levnum, x) attr(tid, "levels") <- if(is.object(x) && is_date(x)) strftime(levnum, format = if(inherits(x, "Date")) "%Y-%m-%d" else "%Y-%m-%d %H:%M:%S %Z") else as.character(levnum) oldClass(tid) <- c(if(ordered) "ordered", "factor", "na.included") } else { attr(tid, "N.groups") <- as.integer(if(is_dbl) (r[2L]+0.4*gcd-r[1L])/gcd else (r[2L]-r[1L])/gcd) + 1L oldClass(tid) <- c(if(ordered) "ordered", "qG", "na.included") } if(extra) { attr(tid, "unique_ints") <- unik attr(tid, "sort_unique_x") <- copyMostAttrib(unik_o, x) attr(tid, "range_x") <- copyMostAttrib(r, x) attr(tid, "step_x") <- gcd } tid } make_time_factor <- function(x) { if(inherits(x, c("factor", "qG"))) { # Make sure we handle irregularity correctly... if(is_qG(x)) return(as_factor_qG(x, na.exclude = FALSE)) if(inherits(x, "na.included")) return(x) if(anyNA(x)) stop("Time variable may not contain missing values") oldClass(x) <- c("factor", "na.included") return(x) } if(is.numeric(x) && !is.object(x)) { idbl <- is.double(x) if(idbl) { # message("Time variable is of type double, but not a date/time object. It is therefore coerced to integer and assumed to represent unitary timesteps. If this is not desired pass timeid(t). To silence this message pass as.integer(t).") x <- as.integer(x) } r <- .Call(C_frange, x, FALSE, FALSE) # na.rm = FALSE # Note that inside flag() and fgrowth() etc. we subtract the minimum within each group... if(anyNA(r)) stop("Time variable may not contain missing values") if(r[1L] != 1) { if(idbl) x %-=% (r[1L] - 1L) else x <- x - (r[1L] - 1L) # This is unfortunately quite a bit slower... } attr(x, "levels") <- as.character(r[1L]:r[2L]) oldClass(x) <- c("ordered", "factor", "na.included") return(x) } if(is.numeric(unclass(x))) return(timeid(x, factor = TRUE, ordered = FALSE)) qF(x, na.exclude = FALSE, sort = TRUE, method = "hash") } is_irregular <- function(x, any_id = TRUE) { if(is.object(x) && inherits(x, c("indexed_frame", "indexed_series"))) x <- findex(x) if(is.list(x) && inherits(x, "pindex")) { oldClass(x) <- NULL if(length(x) > 1L) { g <- if(length(x) <= 2L) x[[1L]] else if(any_id) groupv(x[-length(x)]) else finteraction(x[-length(x)], sort = FALSE) t <- x[[length(x)]] # if(!is.nmfactor(t)) stop("t must be a factor without any missing values") attributes(t) <- NULL rng_t <- fmax(t, g, use.g.names = !any_id) rng_t %-=% fmin(t, g, use.g.names = FALSE) rng_t %+=% 1L n_t <- fnobs(t, g, use.g.names = FALSE) if(any_id) return(!identical(rng_t, n_t)) res <- rng_t != n_t names(res) <- names(rng_t) return(res) } else if(length(x) == 1L) { if(!isFALSE(attr(x, "single.id"))) stop("Index does not contain a time variable") t <- x[[1L]] # if(!is.nmfactor(t)) stop("t must be a factor without any missing values") return(fnlevels(t) != fndistinct(t)) } else stop("Index has zero length") } if(!(is.atomic(x) && is.numeric(unclass(x)))) stop("x needs to be an 'indexed_frame', 'indexed_series' or 'pindex' object, or an atomic vector with storage type integer or double.") if(is.object(x)) { if(is.factor(x)) return(fnlevels(x) != fndistinct(x)) if(is_qG(x)) return(attr(x, "N.groups") != fndistinct(as_factor_qG(x))) } attributes(x) <- NULL tid <- timeid(x, factor = FALSE, extra = TRUE) return(attr(tid, "N.groups") != length(attr(tid, "unique_ints"))) } # Note: data returned as plain list with attributes ! index_series <- function(data, index, cl) { oldClass(data) <- NULL iptr <- .Call(C_createeptr, index) indexfun <- function(x) { attr(x, "index_df") <- iptr oldClass(x) <- unique.default(c("indexed_series", "pseries", class(x))) # Use OldClass?? # class is better for methods such as as.data.frame.numeric (used inside plm) to apply.. x } if(any(cl == "sf")) { geom <- whichv(names(data), attr(data, "sf_column")) data[-geom] <- lapply(data[-geom], indexfun) return(data) } data[] <- lapply(unattrib(data), indexfun) # dapply(data, indexfun) data } reindex <- function(x, index = findex(x), single = "auto") { n <- if(is.list(x)) fnrow(x) else NROW(x) if(is.atomic(index)) { if(length(index) != n) stop("index does not match data length") nam <- l1orlst(as.character(substitute(index))) idl <- switch(single, auto = anyDuplicated.default(index) > 0L, id = TRUE, time = FALSE, stop("'single' must be 'auto', 'id' or 'time'")) index <- list(if(idl) qF(index, sort = is.factor(index), na.exclude = FALSE) else make_time_factor(index)) names(index) <- nam attr(index, "row.names") <- .set_row_names(n) attr(index, "single.id") <- idl oldClass(index) <- c("index_df", "pindex", "data.frame") } else { if(fnrow(index) != n) stop("index does not match data length") if(!inherits(index, "pindex")) { if(!all(.Call(C_vtypes, index, 2L))) stop("All variables in a valid index must be factors. Please prepare you data accordingly.") index <- qDF(index) if(fncol(index) == 1L) attr(index, "single.id") <- switch(single, auto = anyDuplicated.default(.subset2(index, 1L)) > 0L, id = TRUE, time = FALSE, stop("'single' must be 'auto', 'id' or 'time'")) oldClass(index) <- c("index_df", "pindex", "data.frame") } } if(is.list(x)) { clx <- oldClass(x) x <- index_series(x, index, clx) # x is list afterwards, so need to set class again attr(x, "index_df") <- index m <- match(c("indexed_frame", "pdata.frame", "data.frame"), clx, nomatch = 0L) oldClass(x) <- c("indexed_frame", if (length(mp <- m[m != 0L])) clx[-mp] else clx, "pdata.frame", if (m[3L]) "data.frame") if(any(clx == "data.table")) return(alc(x)) } else { attr(x, "index_df") <- index oldClass(x) <- unique.default(c("indexed_series", "pseries", class(x))) } x } # TODO: group for integers use quf.. findex_by <- function(.X, ..., single = "auto", interact.ids = TRUE) { # pid = NULL, t clx <- oldClass(.X) oldClass(.X) <- NULL dots <- substitute(list(...)) ids <- eval(dots, .X, parent.frame()) nam <- names(ids) vars <- all.vars(dots, unique = FALSE) # If something else than NSE cols is supplied if(length(ids) == 1L && is.null(nam) && (length(vars) != 1L || !anyv(names(.X), vars))) { # !is.symbol(dots[[2L]]) || length(ids[[1L]]) != length(.X[[1L]]) || is.function(ids[[1L]]) # Fixes #320 ids <- .X[cols2int(ids[[1L]], .X, names(.X), FALSE)] } else { if(length(nam)) { nonmiss <- nzchar(nam) if(!all(nonmiss)) names(ids) <- `[<-`(as.character(dots[-1L]), nonmiss, value = nam[nonmiss]) } else names(ids) <- vars } # Single id if(length(ids) == 1L) { idl <- switch(single, auto = anyDuplicated.default(ids[[1L]]) > 0L, id = TRUE, time = FALSE, stop("'single' must be 'auto', 'id' or 'time'")) ids[[1L]] <- if(idl) qF(ids[[1L]], sort = is.factor(ids[[1L]]), na.exclude = FALSE) else make_time_factor(ids[[1L]]) attr(ids, "single.id") <- idl } else { lids <- length(ids) if(lids > 2L) { if(interact.ids) { nam <- names(ids) ids <- c(`names<-`(list(finteraction(ids[-lids], sort = FALSE)), paste(nam[-lids], collapse = ".")), ids[lids]) attr(ids, "nam") <- nam # This is a trick, fetched using attr(x, "nam"), before "names" attribute } else ids[-lids] <- lapply(ids[-lids], function(x) qF(x, sort = is.factor(x), na.exclude = FALSE)) } else ids[[1L]] <- qF(ids[[1L]], sort = is.factor(ids[[1L]]), na.exclude = FALSE) ids[[length(ids)]] <- make_time_factor(ids[[length(ids)]]) } attr(ids, "row.names") <- .set_row_names(length(ids[[1L]])) oldClass(ids) <- c("index_df", "pindex", "data.frame") m <- match(c("indexed_frame", "pdata.frame", "data.frame"), clx, nomatch = 0L) .X <- index_series(.X, ids, clx) attr(.X, "index_df") <- ids oldClass(.X) <- c("indexed_frame", if (length(mp <- m[m != 0L])) clx[-mp] else clx, "pdata.frame", if (m[3L]) "data.frame") if(any(clx == "data.table")) return(alc(.X)) .X } iby <- findex_by group_effect <- function(x, effect) { index <- findex(x) g <- if(length(effect) == 1L) .subset2(index, effect) else .subset(index, effect) if(is.factor(g)) return(g) g <- groupv(g) attr(g, "levels") <- seq_len(attr(g, "N.groups")) # This is just a trick for fnlevels.. g } uncl2pix <- function(x, interact = FALSE) { ix <- unclass(findex(x)) if(length(ix) == 2L) return(ix) if(length(ix) == 1L) { res <- if(isTRUE(attr(ix, "single.id"))) list(ix[[1L]], NULL) else list(0L, ix[[1L]]) } else if(length(ix) > 2L) { if(interact) { g <- finteraction(ix[-length(ix)]) } else { g <- groupv(ix[-length(ix)]) attr(g, "levels") <- seq_len(attr(g, "N.groups")) } res <- list(g, ix[[length(ix)]]) } else stop("invalid 'index' length") attr(res, "nam") <- names(ix) return(res) } plm_check_time <- function(x) { tlev <- attr(x, "levels") oldopts <- options(warn = -1L) on.exit(options(oldopts)) if(is.finite(as.integer(tlev[1L]))) return(as.integer(tlev)[x]) x } pseries_to_numeric <- function(x) { clx <- oldClass(x) m <- clx %in% c("integer", "logical", "complex", "raw") if(any(m)) oldClass(x) <- c(clx[!m], "numeric") x } unindex <- function(x) { attr(x, "index_df") <- NULL clx <- oldClass(x) if(is.list(x)) { oldClass(x) <- fsetdiff(clx, c("indexed_frame", "pdata.frame")) x <- fdapply(x, function(y) { attr(y, "index_df") <- NULL cly <- oldClass(y) oldClass(y) <- fsetdiff(cly, c("indexed_series", "pseries", if(length(cly) == 3L) class(unclass(y)))) y }) if(any(clx == "data.table")) return(alc(x)) } else { oldClass(x) <- fsetdiff(clx, c("indexed_series", "pseries", if(length(clx) == 3L) class(unclass(x)))) } x } unindex_light <- function(x) { clx <- oldClass(x) attr(x, "index_df") <- NULL oldClass(x) <- fsetdiff(clx, if(is.list(x)) c("indexed_frame", "pdata.frame") else c("indexed_series", "pseries", if(length(clx) == 3L) class(unclass(x)))) x } index_stats <- function(index) { oldClass(index) <- NULL lix <- length(index) nam <- names(index) if(lix > 1L || isFALSE(attr(index, "single.id"))) { t <- index[[lix]] ndt <- fndistinct(t) tstat <- if(ndt == fnlevels(t)) paste0(nam[lix], " [", fnlevels(t), "]") else paste0(nam[lix], " [", ndt, " (", fnlevels(t), ")]") } else tstat <- NULL if(lix > 1L || isTRUE(attr(index, "single.id"))) { if(lix <= 2L) { idstat <- paste0(nam[1L], " [", fnlevels(index[[1L]]), "]") } else { idstat <- paste(paste0(nam[-lix], " [", vapply(index[-lix], fnlevels, 0L), "]"), collapse = " ") } } else idstat <- NULL return(paste(c(idstat, tstat), collapse = " | ")) } print.indexed_series <- function(x, ...) { print(unindex_light(x), ...) # if(inherits(index, "pindex")) { cat("\nIndexed by: ", index_stats(findex(x)), "\n") # } } print.indexed_frame <- function(x, ...) { print(unindex(x), ...) # if(inherits(index, "pindex")) { cat("\nIndexed by: ", index_stats(findex(x)), "\n") # } } droplevels_index <- function(index, drop.index.levels = "id") { oi <- switch(drop.index.levels, none = 0L, id = 1L, time = 2L, all = 3L, stop("drop.index.levels must be one of 'all', 'id', 'time' or 'none'.") ) if(oi == 0L) return(index) clix <- oldClass(index) oldClass(index) <- NULL if(oi == 1L) { if(length(index) > 2L) index[-length(index)] <- fdroplevels.list(index[-length(index)]) else if(length(index) == 2L || isTRUE(attr(index, "single.id"))) index[[1L]] <- fdroplevels(index[[1L]]) } else if(oi == 2L) { index[[length(index)]] <- fdroplevels(index[[length(index)]]) } else { index <- fdroplevels.list(index) } oldClass(index) <- clix index } `[.index_df` <- function(x, i, j, drop = FALSE, drop.index.levels = "id") { res <- droplevels_index(ss(x, i, j), drop.index.levels) lr <- length(unclass(res)) if(drop && lr == 1L) return(.subset(res, 1L)) if(lr == 1L && length(unclass(x)) > 1L) { attr(res, "single.id") <- attr(res, "names") != l1orlst(attr(x, "names")) } res } print.index_df <- function(x, topn = 5, ...) { oldClass(x) <- "data.frame" if(fnrow(x) > 2*topn) { print(head(x, topn), ...) cat("---") print(`names<-`(tail(x, topn), NULL), ...) } else print(x, ...) cat("\n", index_stats(x), "\n", sep = "") } `[.indexed_frame` <- function(x, i, ..., drop.index.levels = "id") { clx <- oldClass(x) idDTl <- any(clx == "data.table") if(idDTl) { res <- unindex_light(x) # res <- NextMethod() # doesn't work with i ivsbl <- any(clx == "invisible") if(ivsbl) clx <- clx[clx != "invisible"] # for chaining... if(!missing(...)) { rem <- as.list(substitute(list(...))[-1L]) cal <- as.call(c(list(quote(`[`), quote(res), substitute(i)), rem)) rem <- as.character(rem) if(any(grepl(".SD", rem)) && !any(grepl("apply", rem))) warning("Found '.SD' in the call but no 'apply' function. Please note that .SD is not an indexed_frame but a plain data.table containing indexed_series. Thus indexed_frame / pdata.frame methods don't work on .SD! Consider using (m/l)apply(.SD, FUN) or reindex(.SD, ix(data)). If you are not performing indexed operations on .SD please ignore or suppress this warning.") if(any(grepl(":=", rem))) { res <- copyMostAttributes(eval(cal, list(res = alc(res)), parent.frame()), x) eval.parent(substitute(x <- res)) oldClass(res) <- c("invisible", clx) return(res) } } else cal <- as.call(list(quote(`[`), quote(res), substitute(i))) res <- eval(cal, list(res = res), parent.frame()) if(missing(i) && fnrow(res) != fnrow(x)) { if(ivsbl) oldClass(res) <- fsetdiff(oldClass(res), "invisible") return(unindex(res)) # data.table aggregation } else if(!missing(i)) i <- eval(substitute(i), x, parent.frame()) } else res <- unindex(x)[i, ...] # does not respect data.table properties, but better for sf data frame and others which might check validity of "index_df" attribute index <- attr(x, "index_df") if(!missing(i) && (is.atomic(res) || fnrow(res) != fnrow(x) || length(i) == fnrow(x))) { # Problem: mtcars[1:10] selects columns, not rows!! index <- droplevels_index(ss(index, i), drop.index.levels) if(is.list(res)) { if(fnrow(res) != fnrow(index)) return(unindex(res)) # could be with data.table using i and also aggregating in j res <- index_series(res, index, clx) } } else if(!idDTl && is.list(res)) res <- index_series(res, index, clx) attr(res, "index_df") <- index if(is.atomic(res)) { oldClass(res) <- unique.default(c("indexed_series", "pseries", class(res))) return(res) } m <- match(c("indexed_frame", "pdata.frame", "data.frame"), clx, nomatch = 0L) oldClass(res) <- c("indexed_frame", if (length(mp <- m[m != 0L])) clx[-mp] else clx, "pdata.frame", if (m[3L]) "data.frame") if(any(clx == "data.table")) return(alc(res)) res } `[.indexed_series` <- function(x, i, ..., drop.index.levels = "id") { res <- unindex_light(x)[i, ...] # NextMethod("[", x, ...) # plm has no [.pseries method yet, but the drop.index.levels argument causes problems... if(length(res) <= 1L) return(res) if(!missing(i)) { attr(res, "index_df") <- droplevels_index(ss(findex(x), i), drop.index.levels) } else if(is.null(attr(res, "index_df"))) { attr(res, "index_df") <- findex(x) } oldClass(res) <- c("indexed_series", "pseries", class(res)) res } `$.indexed_frame` <- function(x, name) { # res <- NextMethod() # don't use pdata.frame methods res <- .subset2(x, name, exact = FALSE) # as.character(substitute(name)) -> not necessary! if(is.null(res)) return(NULL) clr <- class(res) attr(res, "index_df") <- attr(x, "index_df") if(!any(clr == "indexed_series")) oldClass(res) <- c("indexed_series", "pseries", clr) res } `$<-.indexed_frame` <- function(x, name, value) { clx <- oldClass(x) oldClass(x) <- NULL if(is.null(value)) { x[[name]] <- NULL oldClass(x) <- clx if(any(clx == "data.table")) return(alc(x)) else return(x) } if(length(value) != .Call(C_fnrow, x)) { if(length(value) == 1L) value <- alloc(value, .Call(C_fnrow, x)) else stop("length(value) must match nrow(x)") } attr(value, "index_df") <- .Call(C_createeptr, attr(x, "index_df")) oldClass(value) <- unique.default(c("indexed_series", "pseries", class(value))) x[[name]] <- value oldClass(x) <- clx if(any(clx == "data.table")) return(alc(x)) else return(x) } # What about i and j for data.frame? `[[.indexed_frame` <- function(x, i, ...) { # res <- NextMethod() # don't use pdata.frame methods # oldClass(x) <- fsetdiff(oldClass(x), c("indexed_frame", "pdata.frame")) # res <- UseMethod("[[", x) res <- .subset2(x, i, ...) if(is.null(res)) return(NULL) clr <- class(res) attr(res, "index_df") <- attr(x, "index_df") if(!any(clr == "indexed_series")) oldClass(res) <- c("indexed_series", "pseries", clr) res } # No plm method... can use NextMethod? -> Yes, but this is faster, and I don't know of any other use cases (nobody uses df[[i, j]]) `[[<-.indexed_frame` <- function(x, i, value) { clx <- oldClass(x) oldClass(x) <- NULL if(is.null(value)) { x[[i]] <- NULL oldClass(x) <- clx if(any(clx == "data.table")) return(alc(x)) else return(x) } if(length(value) != .Call(C_fnrow, x)) { if(length(value) == 1L) value <- alloc(value, .Call(C_fnrow, x)) else stop("length(value) must match nrow(x)") } attr(value, "index_df") <- .Call(C_createeptr, attr(x, "index_df")) oldClass(value) <- unique.default(c("indexed_series", "pseries", class(value))) x[[i]] <- value oldClass(x) <- clx if(any(clx == "data.table")) return(alc(x)) else return(x) } # no plm method... can use NextMethod! `[<-.indexed_frame` <- function(x, i, j, value) { res <- NextMethod() if(missing(j)) return(res) if(!(missing(i) || missing(j)) && identical(attr(x, "names"), attr(res, "names"))) return(res) return(reindex(res)) } # These are primarily needed to overwrite pseries methods when plm is attached... # Note: could use reindex() instead of duplAttributes(), but the latter is more efficient, # and I can't think of a single example where it would be undesirable. Math.indexed_series <- function(x, ...) { duplAttributes(get(.Generic)(unindex_light(x), ...), x) } Ops.indexed_series <- function(e1, e2) { if(missing(e2)) { # unary operators (+, - and !) res <- get(.Generic)(unindex_light(e1)) if(.Generic == "!") return(res) return(duplAttributes(res, e1)) } res <- get(.Generic)(unindex_light(e1), unindex_light(e2)) if(!any(.Generic == c("+", "-", "*", "/", "^", "%%", "%/%"))) return(res) if(inherits(e1, "indexed_series")) return(duplAttributes(res, e1)) duplAttributes(res, e2) } collapse/R/GRP.R0000644000176200001440000013240715202504365013041 0ustar liggesusers# Cuniqlengths <- data.table:::Cuniqlengths # Cfrank <- data.table:::Cfrank # forderv <- data.table:::forderv radixorder <- function(..., na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) { z <- pairlist(...) decreasing <- rep_len(as.logical(decreasing), length(z)) .Call(C_radixsort, na.last, decreasing, starts, group.sizes, sort, z) } radixorderv <- function(x, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) { z <- if(is.atomic(x)) pairlist(x) else as.pairlist(unclass(x)) decreasing <- rep_len(as.logical(decreasing), length(z)) .Call(C_radixsort, na.last, decreasing, starts, group.sizes, sort, z) } switchGRP <- function(x, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE, use.group = FALSE) { if(use.group) return(.Call(C_group, x, starts, group.sizes)) z <- if(is.atomic(x)) pairlist(x) else as.pairlist(unclass(x)) decreasing <- rep_len(as.logical(decreasing), length(z)) .Call(C_radixsort, na.last, decreasing, starts, group.sizes, sort, z) } group <- function(..., starts = FALSE, group.sizes = FALSE) { x <- if(...length() == 1L) ..1 else list(...) g <- .Call(C_group, x, starts, group.sizes) oldClass(g) <- c("qG", "na.included") g } groupv <- function(x, starts = FALSE, group.sizes = FALSE) { g <- .Call(C_group, x, starts, group.sizes) oldClass(g) <- c("qG", "na.included") g } gsplit <- function(x = NULL, g, use.g.names = FALSE, ...) { if(!(is.list(g) && inherits(g, "GRP"))) g <- GRP(g, return.groups = use.g.names, call = FALSE, ...) res <- if(is.null(x)) .Call(C_gsplit, 1L, g, TRUE) else if(length(unclass(x)) == length(g[[2L]])) .Call(C_gsplit, x, g, FALSE) else if(is.object(x)) lapply(.Call(C_gsplit, 1L, g, TRUE), function(i) x[i]) else stop("length(x) must match length(g)") if(use.g.names) names(res) <- GRPnames(g, FALSE) res } greorder <- function(x, g, ...) { if(!(is.list(g) && inherits(g, "GRP"))) g <- GRP(g, return.groups = FALSE, call = FALSE, ...) .Call(C_greorder, x, g) } funlist <- function(x) .Call(C_funlist, x) G_guo <- function(g) { if(is.atomic(g)) { if(inherits(g, c("factor", "qG"))) { if(inherits(g, "na.included") || !anyNA(unclass(g))) return(list(if(is.factor(g)) fnlevels(g) else attr(g, "N.groups"), unattrib(g), NULL)) if(is.factor(g)) { ng <- if(anyNA(lev <- attr(g, "levels"))) length(lev) else length(lev) + 1L } else ng <- attr(g, "N.groups") + 1L return(list(ng, copyv(unattrib(g), NA_integer_, ng), NULL)) } g <- .Call(C_group, g, FALSE, FALSE) return(list(attr(g,"N.groups"), g, NULL)) } if(inherits(g, "GRP")) return(g) g <- .Call(C_group, g, FALSE, FALSE) return(list(attr(g,"N.groups"), g, NULL)) } G_t <- function(x) { if(is.null(x)) return(x) # If integer time variable contains NA, does not break C++ code.. if(is.atomic(x)) { if(is.object(x)) { if(inherits(x, c("factor", "qG"))) return(x) if(is.numeric(unclass(x))) return(timeid(x, factor = FALSE)) } else if(is.numeric(x)) { # if(is.double(x)) message("Time variable is of type double, but not a date/time object. It is therefore coerced to integer and assumed to represent unitary timesteps. If this is not desired pass timeid(t). To silence this message pass as.integer(t).") return(as.integer(x)) } return(qG(x, na.exclude = FALSE, sort = TRUE, method = "hash")) # make sure it is sorted ! } # if(is_GRP(x)) return(x[[2L]]) # Not necessary because GRP.default also returns it.. return(GRP.default(x, return.groups = FALSE, return.order = FALSE, sort = TRUE, call = FALSE)[[2L]]) } GRP <- function(X, ...) UseMethod("GRP") # , X # Added... could also do in GRP.default... but this is better, no match.call etc... match.call takes 4 microseconds. could do both ?? think about possible applications... GRP.GRP <- function(X, ...) X GRP.default <- function(X, by = NULL, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", drop = TRUE, call = TRUE, ...) { use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) if(is.na(na.last)) stop("here na.last needs to be TRUE or FALSE, otherwise the GRP object does not match the data dimensions.") if(is.list(X)) { if(inherits(X, "GRP")) return(X) by_null <- is.null(by) if(by_null) { by <- seq_along(unclass(X)) # # This is so that fgroup_by(iris, Species = GRP(Species)) is possible. # if(length(by) == 1L && is.list(.subset2(X, 1L)) && inherits(.subset2(X, 1L), "GRP")) return(.subset2(X, 1L)) namby <- attr(X, "names") if(is.null(namby)) attr(X, "names") <- namby <- paste0("Group.", by) } else { if(is.call(by)) { namby <- all.vars(by, unique = FALSE) by <- ckmatch(namby, attr(X, "names")) } else if(is.character(by)) { namby <- by by <- ckmatch(by, attr(X, "names")) } else { by <- if(is.numeric(by)) as.integer(by) else if(is.logical(by)) which(by) else if(is.function(by)) which(vapply(unattrib(X), by, TRUE)) else stop("by needs to be either a one-sided formula, character column names, column indices, a logical vector or selector function!") namby <- attr(X, "names")[by] if(is.null(namby)) { namby <- paste0("Group.", seq_along(by)) attr(X, "names") <- paste0("Group.", seq_along(unclass(X))) # best ? } } } # drop = FALSE: full Cartesian product of (factor) levels, computed in C. # Falls through to the existing path when no grouping column is a factor (matches dplyr). if(!drop) { cols <- if(by_null) unclass(X) else .subset(unclass(X), by) if(any(.Call(C_vtypes, cols, 2L))) { res <- .Call(C_GRP_default_drop, X, cols, namby, return.groups) return(`oldClass<-`(list(N.groups = res[[1L]], group.id = res[[2L]], group.sizes = res[[3L]], groups = if(return.groups) res[[5L]] else NULL, group.vars = namby, ordered = c(ordered = NA, sorted = NA), order = NULL, group.starts = res[[4L]], call = if(call) match.call() else NULL), "GRP")) } } o <- switchGRP(if(by_null) X else .subset(X, by), na.last, decreasing, return.groups || !use.group, TRUE, sort, use.group) } else { if(length(by)) stop("by can only be used to subset list / data.frame columns") namby <- l1orlst(as.character(substitute(X))) # paste(all.vars(call), collapse = ".") # good in all circumstances ? o <- switchGRP(X, na.last, decreasing, return.groups || !use.group, TRUE, sort, use.group) } st <- attr(o, "starts") gs <- attr(o, "group.sizes") sorted <- if(use.group) NA else attr(o, "sorted") if(return.order && !use.group) ao <- attributes(o)[-2L] attributes(o) <- NULL if(return.groups) { # if unit groups, don't subset rows... if(length(gs) == length(o) && (use.group || sorted)) { ust <- st groups <- if(is.list(X)) .Call(C_subsetCols, X, by, FALSE) else `names<-`(list(X), namby) } else { ust <- if(use.group || sorted) st else if(length(gs) == length(o)) o else .Call(C_subsetVector, o, st, FALSE) # o[st] groups <- if(is.list(X)) .Call(C_subsetDT, X, ust, by, FALSE) else `names<-`(list(.Call(C_subsetVector, X, ust, FALSE)), namby) # subsetVector preserves attributes (such as "label") } } else { groups <- NULL ust <- NULL } return(`oldClass<-`(list(N.groups = length(gs), group.id = if(use.group) o else .Call(C_frankds, o, st, gs, sorted), group.sizes = gs, groups = groups, group.vars = namby, ordered = c(ordered = sort, sorted = sorted), order = if(return.order && !use.group) `attributes<-`(o, ao) else NULL, # `attributes<-`(o, attributes(o)[-2L]) This does a shallow copy on newer R versions # `attr<-`(o, "group.sizes", NULL): This deep-copies it.. group.starts = ust, # Does not need to be computed by group() call = if(call) match.call() else NULL), "GRP")) } is_GRP <- function(x) inherits(x, "GRP") # is.GRP <- function(x) { # .Deprecated(msg = "'is.GRP' was renamed to 'is_GRP'. It will be removed end of 2023, see help('collapse-renamed').") # inherits(x, "GRP") # } length.GRP <- function(x) length(x[[2L]]) GRPnames <- function(x, force.char = TRUE, sep = ".") { # , ... groups <- x[[4L]] if(is.null(groups)) return(NULL) if(length(unclass(groups)) > 1L) return(do.call(paste, c(groups, list(sep = sep)))) if(force.char) tochar(.subset2(groups, 1L)) else .subset2(groups, 1L) # paste0(groups[[1L]]) prints "NA" but is slow, if assign with rownames<-, cannot have duplicate row names. But, attr<- "row.names" is fine !! } GRPid <- function(x, sort = FALSE, ...) { if(!missing(...) && any(names(dots <- list(...)) == "g")) { g <- dots$g if(!inherits(g, "GRP")) stop("g must be a 'GRP' object") res <- g$group.id if(!missing(x) && is.list(x)) return(lapply(x, function(y) res)) return(res) } return(GRP(x, sort = sort, return.groups = FALSE, return.order = FALSE, call = FALSE, ...)$group.id) } GRPN <- function(x, expand = TRUE, ...) { if(!missing(...) && any(names(dots <- list(...)) == "g")) { g <- dots$g if(!inherits(g, "GRP")) stop("g must be a 'GRP' object") res <- if(any(names(dots) == "TRA")) .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE) else g$group.sizes if(!missing(x) && is.list(x)) return(lapply(x, function(y) res)) return(res) } g <- GRP(x, sort = FALSE, return.groups = FALSE, return.order = FALSE, call = FALSE, ...) if(expand) .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE) else g$group.sizes } # dplyr-style n(): only for masking if collapse_mask = "all" n_internal <- function(x, g, TRA, ...) { if(missing(g)) { if(missing(x)) stop("if data is not grouped need to call n() on a column") return(if(is.list(x)) fnrow(x) else length(x)) } if(!inherits(g, "GRP")) stop("g must be a 'GRP' object") if(missing(TRA)) return(g$group.sizes) .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE) } # group_names.GRP <- function(x, force.char = TRUE) { # .Deprecated("GRPnames") # GRPnames(x, force.char) # } print.GRP <- function(x, n = 6, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) ord <- x[[6L]] cat(paste0("collapse grouping object of length ", length(x[[2L]]), " with ", x[[1L]], if(isTRUE(any(ord))) " ordered" else if(anyNA(ord)) "" else " unordered", " groups"), fill = TRUE) cat("\nCall: ", paste0(deparse(x[["call"]]), if(is.na(ord[2L])) "" else if(ord[2L]) ", X is sorted" else ", X is unsorted"), "\n\n", sep = "") cat("Distribution of group sizes: ", fill = TRUE) print.summaryDefault(summary.default(x[[3L]]), ...) if(!is.null(x[[4L]])) { ug <- unattrib(x[[4L]]) cat("\nGroups with sizes: ", fill = TRUE) if(length(ug) == 1L) { ug <- ug[[1L]] if(length(ug) > 2L*n) { ind <- seq.int(x[[1L]]-n+1L, x[[1L]]) print.default(setNames(x[[3L]][1:n], ug[1:n]), ...) cat(" ---", fill = TRUE) print.default(setNames(x[[3L]][ind], ug[ind]), ...) } else print.default(setNames(x[[3L]], ug), ...) } else { if(length(ug[[1L]]) > 2L*n) { ind <- seq.int(x[[1L]]-n+1L, x[[1L]]) print.default(setNames(x[[3L]][1:n], do.call(paste, c(lapply(ug, function(x) x[1:n]), list(sep = ".")))), ...) cat(" ---", fill = TRUE) print.default(setNames(x[[3L]][ind], do.call(paste, c(lapply(ug, function(x) x[ind]), list(sep = ".")))), ...) } else print.default(setNames(x[[3L]], do.call(paste, c(ug, list(sep = ".")))), ...) } } } plot.GRP <- function(x, breaks = "auto", type = "l", horizontal = FALSE, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) if(x[[1L]] <= 1e4) { oldpar <- par(mfrow = if(horizontal) 1:2 else 2:1, mar = c(3.9,4.1,2.1,1), mgp = c(2.5,1,0)) on.exit(par(oldpar)) } if(breaks == "auto") { ugs <- fndistinct.default(x[[3L]]) breaks <- if(ugs > 80) 80 else ugs } if(x[[1L]] <= 1e4) plot(seq_len(x[[1L]]), x[[3L]], type = type, xlab = "Group id", ylab = "Group Size", xlim = c(1L, x[[1L]]), ylim = c(0L, bmax(x[[3L]])), main = paste0("Sizes of ", x[[1L]], if(isTRUE(any(x[[6L]]))) " Ordered" else if(anyNA(x[[6L]])) "" else " Unordered", " Groups"), frame.plot = FALSE, ...) # grid() if(breaks == 1L) plot(x[[3L]][1L], x[[1L]], type = "h", ylab = "Frequency", xlab = "Group Size", main = "Histogram of Group Sizes", frame.plot = FALSE, ...) else hist(x[[3L]], breaks, xlab = "Group Size", main = paste0("Histogram of Group Sizes", if(x[[1L]] > 1e4) paste0(" (N = ", x[[1L]], ")") else ""), ...) } as_factor_GRP <- function(x, ordered = FALSE, sep = ".") { # , ... # if(is.factor(x)) return(x) # if(!is_GRP(x)) stop("x must be a 'GRP' object") f <- x[[2L]] gr <- unclass(x[[4L]]) if(is.null(gr)) { attr(f, "levels") <- as.character(seq_len(x[[1L]])) } else { if(length(gr) == 1L) { attr(f, "levels") <- tochar(gr[[1L]]) # or formatC ? } else { attr(f, "levels") <- do.call(paste, c(gr, list(sep = sep))) } } oldClass(f) <- if(ordered) c("ordered","factor","na.included") else c("factor","na.included") # previously if any(x[[6L]]) f } # as.factor_GRP <- function(x, ordered = FALSE) { # .Deprecated(msg = "'as.factor_GRP' was renamed to 'as_factor_GRP'. It will be removed end of 2023, see help('collapse-renamed').") # as_factor_GRP(x, ordered) # } finteraction <- function(..., factor = TRUE, ordered = FALSE, sort = factor && .op[["sort"]], method = "auto", sep = ".") { # does it drop levels ? -> Yes ! X <- if(...length() == 1L && is.list(..1)) ..1 else list(...) if(factor) return(as_factor_GRP(GRP.default(X, sort = sort, return.order = FALSE, method = method, call = FALSE), ordered, sep)) if(sort || method == "radix") { g <- GRP.default(X, sort = sort, return.groups = FALSE, return.order = FALSE, method = method, call = FALSE) res <- g[[2L]] attr(res, "N.groups") <- g[[1L]] } else res <- .Call(C_group, X, FALSE, FALSE) oldClass(res) <- c(if(ordered) "ordered", "qG", "na.included") res } itn <- function(...) finteraction(...) GRP.qG <- function(X, ..., group.sizes = TRUE, return.groups = TRUE, call = TRUE) { # if(!missing(...)) unused_arg_action(match.call(), ...) gvars <- l1orlst(as.character(substitute(X))) # paste(all.vars(call), collapse = ".") # good in all circumstances ? ng <- attr(X, "N.groups") grl <- return.groups && length(groups <- attr(X, "groups")) if(!inherits(X, "na.included")) if(anyNA(unclass(X))) { ng <- ng + 1L X <- .Call(C_setcopyv, X, NA, ng, FALSE, FALSE, FALSE) # X[is.na(X)] <- ng if(grl) groups <- c(groups, NA) } st <- attr(X, "starts") ordered <- is.ordered(X) attributes(X) <- NULL return(`oldClass<-`(list(N.groups = ng, group.id = X, group.sizes = if(group.sizes) .Call(C_fwtabulate, X, NULL, ng, FALSE) else NULL, # tabulate(X, ng) # .Internal(tabulate(X, ng)) groups = if(grl) `names<-`(list(groups), gvars) else NULL, group.vars = gvars, ordered = c(ordered = if(ordered) TRUE else NA, sorted = issorted(X)), order = NULL, # starts = NULL, maxgrpn = NULL, group.starts = st, call = if(call) match.call() else NULL), "GRP")) } GRP.factor <- function(X, ..., group.sizes = TRUE, drop = FALSE, return.groups = TRUE, call = TRUE) { # if(!missing(...)) unused_arg_action(match.call(), ...) nam <- l1orlst(as.character(substitute(X))) # paste(all.vars(call), collapse = ".") # good in all circumstances ? if(!inherits(X, "na.included")) X <- addNA2(X) if(drop) X <- .Call(Cpp_fdroplevels, X, FALSE) lev <- attr(X, "levels") nl <- length(lev) ordered <- is.ordered(X) attributes(X) <- NULL return(`oldClass<-`(list(N.groups = nl, group.id = X, group.sizes = if(group.sizes) .Call(C_fwtabulate, X, NULL, nl, FALSE) else NULL, # tabulate(X, nl) # .Internal(tabulate(X, nl)) groups = if(return.groups) `names<-`(list(lev), nam) else NULL, group.vars = nam, ordered = c(ordered = if(ordered) TRUE else NA, sorted = issorted(X)), order = NULL, # starts = NULL, maxgrpn = NULL, group.starts = NULL, call = if(call) match.call() else NULL), "GRP")) } GRP.pseries <- function(X, effect = 1L, ..., group.sizes = TRUE, return.groups = TRUE, call = TRUE) { g <- unclass(findex(X)) # index cannot be atomic since plm always adds a time variable ! if(length(effect) > 1L) return(GRP.default(g[effect], ...)) # if(!missing(...)) unused_arg_action(match.call(), ...) # if(length(g) > 2L) { # mlg <- -length(g) # nam <- paste(names(g)[mlg], collapse = ".") # g <- interaction(g[mlg], drop = TRUE) # } else { nam <- if(is.character(effect)) effect else names(g)[effect] g <- g[[effect]] # Fastest way to do this ? # } lev <- attr(g, "levels") nl <- length(lev) ordered <- is.ordered(g) attributes(g) <- NULL return(`oldClass<-`(list(N.groups = nl, group.id = g, group.sizes = if(group.sizes) .Call(C_fwtabulate, g, NULL, nl, FALSE) else NULL, # tabulate(g, nl) # .Internal(tabulate(g, nl)) groups = if(return.groups) `names<-`(list(lev), nam) else NULL, group.vars = nam, ordered = c(ordered = if(ordered) TRUE else NA, sorted = issorted(g)), order = NULL, # starts = NULL, maxgrpn = NULL, group.starts = NULL, call = if(call) match.call() else NULL), "GRP")) } GRP.pdata.frame <- function(X, effect = 1L, ..., group.sizes = TRUE, return.groups = TRUE, call = TRUE) GRP.pseries(X, effect, ..., group.sizes = group.sizes, return.groups = return.groups, call = call) fgroup_by <- function(.X, ..., sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", .drop = TRUE) { # e <- substitute(list(...)) # faster but does not preserve attributes of unique groups ! clx <- oldClass(.X) oldClass(.X) <- NULL m <- match(c("GRP_df", "grouped_df", "data.frame"), clx, nomatch = 0L) dots <- substitute(list(...)) # vars <- all.vars(dots, unique = FALSE) # In case sequences of columns are passed... Think: can enable fgroup_by(mtcars, 1:cyl) if(any(all_funs(dots) == ":")) { # length(vars)+1L != length(dots) && any(all.names(dots) == ":") # Note that fgroup_by(mtcars, bla = round(mpg / cyl), vs:am) only groups by vs, and am. fselect(mtcars, bla = round(mpg / cyl), vs:am) also does the wrong thing. nl <- `names<-`(as.vector(seq_along(.X), "list"), names(.X)) vars <- eval(substitute(c(...)), nl, parent.frame()) e <- .X[vars] # This allows renaming... if(length(nam_vars <- names(vars))) { nonmiss <- nzchar(nam_vars) names(e)[nonmiss] <- nam_vars[nonmiss] } # e <- fselect(if(m[2L]) fungroup(.X) else .X, ...) } else { e <- eval(dots, .X, parent.frame()) name <- names(e) vars <- all.vars(dots, unique = FALSE) # If something else than NSE cols is supplied, see https://github.com/fastverse/collapse/issues/320 # Note: doesn't support fgroup_by(mtcars, cyl / vs), but ok, this should be named... # fgroup_by(mtcars, c("cyl", "vs")) gives vars == character(0) if(length(e) == 1L && is.null(name) && (length(vars) != 1L || !anyv(names(.X), vars))) { # !is.symbol(dots[[2L]]) || length(e[[1L]]) != length(.X[[1L]]) || is.function(e[[1L]] # Fixes #320 e <- .X[cols2int(e[[1L]], .X, names(.X), FALSE)] } else { if(length(name)) { # fgroup_by(mtcars, bla = round(mpg / cyl), vs, am) nonmiss <- nzchar(name) # -> using as.character(dots[-1L]) instead of vars if(!all(nonmiss)) names(e) <- `[<-`(as.character(dots[-1L]), nonmiss, value = name[nonmiss]) } else names(e) <- vars } } attr(.X, "groups") <- GRP.default(e, NULL, sort, decreasing, na.last, return.groups, return.order, method, .drop, FALSE) # if(any(clx == "sf")) oldClass(.X) <- clx[clx != "sf"] # attr(.X, "groups") <- GRP.default(fselect(if(m[2L]) fungroup(.X) else .X, ...), NULL, sort, decreasing, na.last, TRUE, return.order, method, FALSE) # Needed: wlddev %>% fgroup_by(country) gives error if dplyr is loaded. Also sf objects etc.. # .rows needs to be list(), NULL won't work !! Note: attaching a data.frame class calls data frame methods, even if "list" in front! -> Need GRP.grouped_df to restore object ! # attr(.X, "groups") <- `oldClass<-`(c(g, list(.rows = list())), c("GRP", "data.frame")) # `names<-`(eval(e, .X, parent.frame()), all.vars(e)) oldClass(.X) <- c("GRP_df", if(length(mp <- m[m != 0L])) clx[-mp] else clx, "grouped_df", if(m[3L]) "data.frame") # clx[-m] doesn't work if clx is only "data.table" for example # simplest, but .X is coerced to data.frame. Through the above solution it can be a list and only receive the 'grouped_df' class # add_cl <- c("grouped_df", "data.frame") # oldClass(.X) <- c(fsetdiff(oldClass(.X), add_cl), add_cl) if(any(clx == "data.table")) return(alc(.X)) .X } gby <- fgroup_by group_by_vars <- function(X, by = NULL, ...) { clx <- oldClass(X) oldClass(X) <- NULL m <- match(c("GRP_df", "grouped_df", "data.frame"), clx, nomatch = 0L) if(length(by)) by <- cols2int(by, X, names(X), FALSE) attr(X, "groups") <- GRP.default(X[by], NULL, ..., call = FALSE) # Need to unclass because of sf and regrouping! (and some functions expect unclassed) oldClass(X) <- c("GRP_df", if(length(mp <- m[m != 0L])) clx[-mp] else clx, "grouped_df", if(m[3L]) "data.frame") if(any(clx == "data.table")) return(alc(X)) X } print.GRP_df <- function(x, ...) { print(fungroup(x), ...) # better !! (the method could still print groups attribute etc. ) And can also get rid of .rows() in fgroup_by and other fuzz.. # but better keep for now, other functions in dplyr might check this and only preserve attributes if they exist. -> Nah. select(UGA_sf, addr_cname) doesn't work anyway.. # NextMethod() g <- attr(x, "groups") if(is_GRP(g)) { # Issue Patrice flagged ! # oldClass(g) <- NULL # could get rid of this if get rid of "data.frame" class. if(length(g[[3L]])) { su <- unclass(qsu.default(g[[3L]], stable.algo = FALSE)) stats <- if(su[4L] == su[5L]) paste0(" [", g[[1L]], " | ", round(su[2L]), " (", round(su[3L], 1L), ")]") else paste0(" [", g[[1L]], " | ", round(su[2L]), " (", round(su[3L], 1L), ") ", su[4L], "-", su[5L], "]") } else stats <- paste0(" [", g[[1L]], " | ", round(length(g[[2L]]) / g[[1L]]), "]") # Groups: # if(any(g[[6L]])) "ordered groups" else "unordered groups", -> ordered 99% of times... cat("\nGrouped by: ", paste(g[[5L]], collapse = ", "), stats, "\n") if(inherits(x, "pdata.frame")) message("\nNote: 'pdata.frame' methods for flag, fdiff, fgrowth, fcumsum, fbetween, fwithin, fscale, qsu and varying\n take precedence over the 'grouped_df' methods for these functions.") } } print.invisible <- function(x, ...) cat("") # Still solve this properly for data.table... `[.GRP_df` <- function(x, ...) { clx <- oldClass(x) if(any(clx == "data.table")) { res <- NextMethod() if(any(clx == "invisible")) { # for chaining... clx <- clx[clx != "invisible"] oldClass(res) <- clx # in case of early return (reduced rows)... } if(any(grepl(":=", .c(...)))) { eval.parent(substitute(x <- res)) oldClass(res) <- c("invisible", clx) # return(invisible(res)) -> doesn't work here for some reason } else { if(!(is.list(res) && fnrow(res) == fnrow(x))) return(fungroup(res)) if(is.null(attr(res, "groups"))) attr(res, "groups") <- attr(x, "groups") oldClass(res) <- clx } } else { res <- `[`(fungroup(x), ...) # does not respect data.table properties, but better for sf data frame and others which check validity of "groups" attribute if(!(is.list(res) && fnrow(res) == fnrow(x))) return(res) attr(res, "groups") <- attr(x, "groups") oldClass(res) <- clx } res } # missing doesn't work, its invisible return... # `[.GRP_df` <- function(x, ...) { # tstop <- function(x) if(missing(x)) NULL else x # res <- tstop(NextMethod()) # better than above (problems with data.table method, but do further checks...) # if(is.null(res)) return(NULL) # if(!(is.list(res) && fnrow(res) == fnrow(x))) return(fungroup(res)) # if(is.null(g <- attr(res, "groups"))) attr(res, "groups") <- g # oldClass(res) <- oldClass(x) # return(res) # } # also needed to sort out errors with dplyr ... `[[.GRP_df` <- function(x, ...) UseMethod("[[", fungroup(x)) # function(x, ..., exact = TRUE) .subset2(x, ..., exact = exact) `[<-.GRP_df` <- function(x, ..., value) UseMethod("[<-", fungroup(x)) `[[<-.GRP_df` <- function(x, ..., value) UseMethod("[[<-", fungroup(x)) `names<-.GRP_df` <- function(x, value) `oldClass<-`(`names<-`(unclass(x), value), oldClass(x)) # Produce errors... # print_GRP_df_core <- function(x) { # g <- attr(x, "groups") # cat("\nGrouped by: ", paste(g[[5L]], collapse = ", "), # # if(any(g[[6L]])) "ordered groups" else "unordered groups", -> ordered 99% of times... # paste0(" [", g[[1L]], " | ", round(length(g[[2L]]) / g[[1L]]), " (", round(fsd.default(g[[3L]]), 1), ")]")) # if(inherits(x, "pdata.frame")) # message("\nNote: 'pdata.frame' methods for flag, fdiff, fgrowth, fbetween, fwithin and varying\n take precedence over the 'grouped_df' methods for these functions.") # } # # head.GRP_df <- function(x, ...) { # NextMethod() # print_GRP_df_core(x) # } # # tail.GRP_df <- function(x, ...) { # NextMethod() # print_GRP_df_core(x) # } fungroup <- function(X, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) clx <- oldClass(X) attr(X, "groups") <- NULL oldClass(X) <- fsetdiff(clx, c("GRP_df", "grouped_df")) # clx[clx != "grouped_df"] if(any(clx == "data.table")) return(alc(X)) X } condCopyAttrib <- function(x, d) { if(is.object(x)) return(x) cld <- oldClass(d) condalcSA(x, list(names = attr(x, "names"), row.names = .set_row_names(.Call(C_fnrow, x)), class = cld[cld %!in% c("GRP_df", "grouped_df", "sf", "pdata.frame", "indexed_frame")]), any(cld == "data.table")) # attr(d, "groups") <- NULL # attr(d, "row.names") <- NULL # x <- copyMostAttributes(x, d) # attr(x, "row.names") <- rn # oldClass(x) <- fsetdiff(cld, c("GRP_df", "grouped_df", "sf")) # if(any(cld == "data.table")) return(alc(x)) # x } fgroup_vars <- function(X, return = "data") { g <- attr(X, "groups") if(!is.list(g)) stop("attr(X, 'groups') is not a grouping object") vars <- if(is_GRP(g)) g[[5L]] else attr(g, "names")[-length(unclass(g))] switch(return, data = .Call(C_subsetCols, fungroup(X), ckmatch(vars, attr(X, "names")), TRUE), unique = if(is_GRP(g)) condCopyAttrib(g[[4L]], X) else .Call(C_subsetCols, g, -length(unclass(g)), FALSE), # what about attr(*, ".drop") ?? names = vars, indices = ckmatch(vars, attr(X, "names")), named_indices = `names<-`(ckmatch(vars, attr(X, "names")), vars), logical = `[<-`(logical(length(unclass(X))), ckmatch(vars, attr(X, "names")), TRUE), named_logical = { nam <- attr(X, "names") `names<-`(`[<-`(logical(length(nam)), ckmatch(vars, nam), TRUE), nam) }, stop("Unknown return option!")) } GRP.grouped_df <- function(X, ..., return.groups = TRUE, call = TRUE) { # if(!missing(...)) unused_arg_action(match.call(), ...) # g <- unclass(attr(X, "groups")) g <- attr(X, "groups") if(is_GRP(g)) return(g) # return(`oldClass<-`(.subset(g, 1:8), "GRP")) # To avoid data.frame methods being called if(!is.list(g)) stop("attr(X, 'groups') is not a grouping object") oldClass(g) <- NULL lg <- length(g) gr <- g[[lg]] ng <- length(gr) gs <- vlengths(gr, FALSE) id <- .Call(C_groups2GRP, gr, fnrow(X), gs) return(`oldClass<-`(list(N.groups = ng, # The C code here speeds up things a lot !! group.id = id, # Old: rep(seq_len(ng), gs)[order(funlist(gr))], # .Internal(radixsort(TRUE, FALSE, FALSE, TRUE, .Internal(unlist(gr, FALSE, FALSE)))) group.sizes = gs, groups = if(return.groups) g[-lg] else NULL, # better reclass afterwards ? -> Nope, this is only used in internal codes... group.vars = names(g)[-lg], ordered = c(ordered = TRUE, sorted = issorted(id)), # Important to have NA here, otherwise wrong result in gsplit (wrong optimization) order = NULL, # starts = NULL, maxgrpn = NULL, group.starts = NULL, call = if(call) match.call() else NULL), "GRP")) } is_qG <- function(x) is.integer(x) && inherits(x, "qG") # is.qG <- function(x) { # .Deprecated(msg = "'is.qG' was renamed to 'is_qG'. It will be removed end of 2023, see help('collapse-renamed').") # inherits(x, "qG") # } na_rm2 <- function(x, sort) { if(sort) return(if(is.na(x[length(x)])) x[-length(x)] else x) na_rm(x) # if(anyNA(x)) x[!is.na(x)] else x # use na_rm here when speed fixed.. } Csv <- function(x, i) .Call(C_subsetVector, x, i, FALSE) # What about NA last option to radixsort ? -> Nah, vector o becomes too short... radixfact <- function(x, sort, ord, fact, naincl, keep, retgrp = FALSE) { o <- .Call(C_radixsort, TRUE, FALSE, fact || naincl || retgrp, naincl, sort, pairlist(x)) st <- attr(o, "starts") sorted <- attr(o, "sorted") f <- if(naincl) .Call(C_frankds, o, st, attr(o, "group.sizes"), sorted) else # Fastest? -> Seems so.. .Call(Cpp_groupid, x, if(sorted) NULL else o, 1L, TRUE, FALSE) if(fact) { if(keep) duplAttributes(f, x) else attributes(f) <- NULL rawlev <- Csv(x, if(sorted) st else Csv(o, st)) attr(f, "levels") <- unattrib(tochar(if(naincl) rawlev else na_rm2(rawlev, sort))) oldClass(f) <- c(if(ord) "ordered", "factor", if(naincl) "na.included") } else { if(naincl) attr(f, "N.groups") <- length(st) # the order is important, this before retgrp !! if(retgrp) { rawlev <- Csv(x, if(sorted) st else Csv(o, st)) attr(f, "groups") <- if(naincl) rawlev else na_rm2(rawlev, sort) } oldClass(f) <- c(if(ord) "ordered", "qG", if(naincl) "na.included") } f } # TODO: Why is numeric to character conversion so slow?... groupfact <- function(x, ord, fact, naincl, keep, retgrp = FALSE) { g <- .Call(C_groupat, x, fact || retgrp, naincl) if(fact) { st <- attr(g, "starts") if(keep) duplAttributes(g, x) else attributes(g) <- NULL attr(g, "levels") <- unattrib(tochar(if(length(st) == length(g)) x else Csv(x, st))) oldClass(g) <- c(if(ord) "ordered", "factor", if(naincl) "na.included") } else { if(retgrp) { st <- attr(g, "starts") attributes(g) <- NULL attr(g, "N.groups") <- length(st) attr(g, "groups") <- if(length(st) == length(g)) x else Csv(x, st) } oldClass(g) <- c(if(ord) "ordered", "qG", if(naincl) "na.included") } g } # TODO: Why is numeric to character conversion so slow?... this really does away with the added speed... groupfact_sorted <- function(x, ord, fact, naincl, keep, retgrp = FALSE) { g <- .Call(C_groupat, x, TRUE, naincl) st <- attr(g, "starts") ng <- length(st) lev <- if(ng == length(x)) x else Csv(x, st) o <- forder.int(lev) # TODO: keep always add class na.included?? -> Could add anyNA attribute as output from groupat... also for groupfact... if(!attr(o, "sorted")) { if(fact || retgrp) lev <- Csv(lev, o) o <- forder.int(o) # This is necessary. Can optimize?? g <- if(naincl) Csv(unattrib(o), g) else o[g] # [ propagates NA's } if(fact) { if(keep) duplAttributes(g, x) else attributes(g) <- NULL attr(g, "levels") <- unattrib(tochar(lev)) oldClass(g) <- c(if(ord) "ordered", "factor", if(naincl) "na.included") } else { attributes(g) <- NULL attr(g, "N.groups") <- ng if(retgrp) attr(g, "groups") <- lev oldClass(g) <- c(if(ord) "ordered", "qG", if(naincl) "na.included") } g } hashfact <- function(x, sort, ord, fact, naincl, keep, retgrp = FALSE) { if(sort) return(groupfact_sorted(x, ord, fact, naincl, keep, retgrp)) # return(.Call(Cpp_qF, x, ord, !naincl, keep, if(fact) 1L else 2L+retgrp)) groupfact(x, ord, fact, naincl, keep, retgrp) } as_factor_qG <- function(x, ordered = FALSE, na.exclude = TRUE) { groups <- if(is.null(attr(x, "groups"))) as.character(seq_len(attr(x, "N.groups"))) else tochar(attr(x, "groups")) nainc <- inherits(x, "na.included") if(na.exclude || nainc) { clx <- c(if(ordered) "ordered", "factor", if(nainc) "na.included") # can set unordered ?? } else { if(anyNA(unclass(x))) { x <- .Call(C_setcopyv, x, NA, attr(x, "N.groups") + 1L, FALSE, FALSE, FALSE) # x[is.na(x)] <- attr(x, "N.groups") + 1L groups <- c(groups, NA_character_) # faster doing groups[length(groups)+1] <- NA? -> Nope, what you have is fastest ! } clx <- c(if(ordered) "ordered", "factor", "na.included") } return(`attributes<-`(x, list(levels = groups, class = clx))) } # as.factor_qG <- function(x, ordered = FALSE, na.exclude = TRUE) { # .Deprecated(msg = "'as.factor_qG' was renamed to 'as_factor_qG'. It will be removed end of 2023, see help('collapse-renamed').") # as_factor_qG(x, ordered, na.exclude) # } qF <- function(x, ordered = FALSE, na.exclude = TRUE, sort = .op[["sort"]], drop = FALSE, keep.attr = TRUE, method = "auto") { if(is.factor(x) && sort) { if(!keep.attr && !all(names(ax <- attributes(x)) %in% c("levels", "class"))) attributes(x) <- ax[c("levels", "class")] if(na.exclude || inherits(x, "na.included")) { clx <- oldClass(x) if(ordered && !any(clx == "ordered")) oldClass(x) <- c("ordered", clx) else if(!ordered && any(clx == "ordered")) oldClass(x) <- clx[clx != "ordered"] if(drop) return(.Call(Cpp_fdroplevels, x, !inherits(x, "na.included"))) else return(x) } x <- addNA2(x) oldClass(x) <- c(if(ordered) "ordered", "factor", "na.included") if(drop) return(.Call(Cpp_fdroplevels, x, FALSE)) else return(x) } if(is_qG(x)) return(as_factor_qG(x, ordered, na.exclude)) # && sort?? switch(method, # if((is.character(x) && !na.exclude) || (length(x) < 500 && !(is.character(x) && na.exclude))) auto = if(is.double(x) && sort) # is.character(x) || is.logical(x) || !sort || length(x) < 500L radixfact(x, sort, ordered, TRUE, !na.exclude, keep.attr) else if(sort && length(x) < 100000L && !is.object(x)) .Call(Cpp_qF, x, ordered, na.exclude, keep.attr, 1L) else hashfact(x, sort, ordered, TRUE, !na.exclude, keep.attr), radix = radixfact(x, sort, ordered, TRUE, !na.exclude, keep.attr), hash = hashfact(x, sort, ordered, TRUE, !na.exclude, keep.attr), # .Call(Cpp_qF, x, sort, ordered, na.exclude, keep.attr, 1L), rcpp_hash = .Call(Cpp_qF, x, ordered, na.exclude, keep.attr, 1L), stop("Unknown method:", method)) } qG <- function(x, ordered = FALSE, na.exclude = TRUE, sort = .op[["sort"]], return.groups = FALSE, method = "auto") { if(inherits(x, c("factor", "qG"))) { nainc <- inherits(x, "na.included") if(na.exclude || nainc || !anyNA(unclass(x))) { newclx <- c(if(ordered) "ordered", "qG", if(nainc || !na.exclude) "na.included") if(is.factor(x)) { ax <- if(return.groups) list(N.groups = fnlevels(x), groups = attr(x, "levels"), class = newclx) else list(N.groups = fnlevels(x), class = newclx) } else { ax <- if(return.groups) list(N.groups = attr(x, "N.groups"), groups = attr(x, "groups"), class = newclx) else list(N.groups = attr(x, "N.groups"), class = newclx) } if(identical(ax, attributes(x))) return(x) return(`attributes<-`(x, ax)) } newclx <- c(if(ordered) "ordered", "qG", "na.included") if(is.factor(x)) { lev <- attr(x, "levels") if(anyNA(lev)) ng <- length(lev) else { ng <- length(lev) + 1L if(return.groups) lev <- c(lev, NA_character_) } attributes(x) <- NULL # factor method seems faster, however cannot assign integer, must assign factor level... } else { if(return.groups && length(lev <- attr(x, "groups"))) lev <- c(lev, NA) ng <- attr(x, "N.groups") + 1L } ax <- if(return.groups) list(N.groups = ng, groups = lev, class = newclx) else list(N.groups = ng, class = newclx) # x[is.na(x)] <- ng return(`attributes<-`(.Call(C_setcopyv, x, NA, ng, FALSE, FALSE, FALSE), ax)) } switch(method, # if((is.character(x) && !na.exclude) || (length(x) < 500 && !(is.character(x) && na.exclude))) auto = if(is.double(x) && sort) # is.character(x) || is.logical(x) || !sort || length(x) < 500L radixfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups) else if(sort && length(x) < 100000L) .Call(Cpp_qF, x, ordered, na.exclude, FALSE, 2L+return.groups) else hashfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups), radix = radixfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups), hash = hashfact(x, sort, ordered, FALSE, !na.exclude, FALSE, return.groups), # .Call(Cpp_qF, x, sort, ordered, na.exclude, FALSE, 2L+return.groups), rcpp_hash = .Call(Cpp_qF, x, ordered, na.exclude, FALSE, 2L+return.groups), stop("Unknown method:", method)) } radixuniquevec <- function(x, sort, na.last = TRUE, decreasing = FALSE) { o <- .Call(C_radixsort, na.last, decreasing, TRUE, FALSE, sort, pairlist(x)) if(attr(o, "maxgrpn") <= 1L && (!sort || attr(o, "sorted"))) return(x) Csv(x, if(attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts"))) } funique <- function(x, ...) UseMethod("funique") funique.default <- function(x, sort = FALSE, method = "auto", ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) if(is.array(x)) stop("funique currently only supports atomic vectors and data.frames") switch(method, auto = if(sort && is.numeric(x) && length(x) > 500L) radixuniquevec(x, sort, ...) else if(sort) .Call(Cpp_sortunique, x) else .Call(C_funique, x), radix = radixuniquevec(x, sort, ...), hash = if(sort) .Call(Cpp_sortunique, x) else .Call(C_funique, x), stop("method needs to be 'auto', 'hash' or 'radix'.")) # , ... adding dots gives error message too strict, package default is warning.. } # could make faster still... not using colsubset but something more simple... no attributes needed... # Enable by formula use ?? by or cols ?? -> cols is clearer !! also with na_omit, by could imply by-group uniqueness check... funique.data.frame <- function(x, cols = NULL, sort = FALSE, method = "auto", ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) o <- switchGRP(if(is.null(cols)) x else colsubset(x, cols), starts = TRUE, sort = sort, use.group = use.group, ...) if((use.group && length(o) == attr(o, "N.groups")) || (!use.group && attr(o, "maxgrpn") <= 1L && (!sort || attr(o, "sorted")))) # return(x) return(if(inherits(x, "data.table")) alc(x) else x) st <- if(use.group || attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts")) rn <- attr(x, "row.names") res <- .Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE) if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(res) attr(res, "row.names") <- Csv(rn, st) res } ## Problem: could be confused to mean unique values within groups. Also can use ffirst() to achieve something similar # funique.grouped_df <- function(x, ...) { # g <- GRP.grouped_df(x, call = FALSE) # if(g[[1L]] == length(g[[2L]])) return(fungroup(x)) # st <- if(length(g$group.starts)) g$group.starts else .Call(C_ffirst, seq_along(g[[2L]]), g[[1L]], g[[2L]], NULL, FALSE) # rn <- attr(x, "row.names") # attr(x, "groups") <- NULL # oldClass(x) <- fsetdiff(oldClass(x), c("GRP_df", "grouped_df")) # res <- .Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE) # if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(res) # attr(res, "row.names") <- Csv(rn, st) # res # } funique.list <- function(x, cols = NULL, sort = FALSE, method = "auto", ...) funique.data.frame(x, cols, sort, method, ...) funique.sf <- function(x, cols = NULL, sort = FALSE, method = "auto", ...) { use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) cols <- if(is.null(cols)) whichv(attr(x, "names"), attr(x, "sf_column"), TRUE) else cols2int(cols, x, attr(x, "names"), FALSE) o <- switchGRP(.subset(x, cols), starts = TRUE, sort = sort, use.group = use.group, ...) if((use.group && length(o) == attr(o, "N.groups")) || (!use.group && attr(o, "maxgrpn") <= 1L && (!sort || attr(o, "sorted")))) return(x) st <- if(use.group || attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts")) rn <- attr(x, "row.names") res <- .Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE) if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(res) attr(res, "row.names") <- Csv(rn, st) res } funique.pseries <- function(x, sort = FALSE, method = "auto", drop.index.levels = "id", ...) { if(is.array(x)) stop("funique currently only supports atomic vectors and data.frames") use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) o <- switchGRP(x, starts = TRUE, sort = sort, use.group = use.group, ...) if((use.group && length(o) == attr(o, "N.groups")) || (!use.group && attr(o, "maxgrpn") <= 1L && (!sort || attr(o, "sorted")))) return(x) st <- if(use.group || attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts")) res <- Csv(x, st) if(length(names(x))) names(res) <- Csv(names(x), st) index <- findex(x) index_ss <- droplevels_index(.Call(C_subsetDT, index, st, seq_along(unclass(index)), FALSE), drop.index.levels) attr(res, if(inherits(x, "indexed_series")) "index_df" else "index") <- index_ss res } funique.pdata.frame <- function(x, cols = NULL, sort = FALSE, method = "auto", drop.index.levels = "id", ...) { use.group <- switch(method, auto = !sort, hash = TRUE, radix = FALSE, stop("method needs to be 'auto', 'hash' or 'radix'.")) o <- switchGRP(if(is.null(cols)) x else colsubset(x, cols), starts = TRUE, sort = sort, use.group = use.group, ...) if((use.group && length(o) == attr(o, "N.groups")) || (!use.group && attr(o, "maxgrpn") <= 1L && (!sort || attr(o, "sorted")))) # return(x) return(if(inherits(x, "data.table")) alc(x) else x) st <- if(use.group || attr(o, "sorted")) attr(o, "starts") else Csv(o, attr(o, "starts")) rn <- attr(x, "row.names") res <- .Call(C_subsetDT, x, st, seq_along(unclass(x)), FALSE) if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) attr(res, "row.names") <- Csv(rn, st) index <- findex(x) index_ss <- droplevels_index(.Call(C_subsetDT, index, st, seq_along(unclass(index)), FALSE), drop.index.levels) if(inherits(x, "indexed_frame")) return(reindex(res, index_ss)) attr(res, "index") <- index_ss res } fnunique <- function(x) { if(is.list(x) && length(unclass(x)) == 1L) x <- .subset2(x, 1L) if(is.atomic(x) && !is.complex(x)) .Call(C_fndistinct, x, NULL, FALSE, 1L) else attr(.Call(C_group, x, FALSE, FALSE), "N.groups") } any_duplicated <- function(x) fnunique(x) < (if(is.atomic(x)) length(x) else .Call(C_fnrow, x)) fduplicated <- function(x, all = FALSE) { if(all) { g <- .Call(C_group, x, FALSE, FALSE) ng <- attr(g, "N.groups") if(ng == length(g)) return(logical(length(g))) gs <- .Call(C_fwtabulate, g, NULL, ng, FALSE) return(.Call(C_subsetVector, gs != 1L, g, FALSE)) } g <- .Call(C_group, x, TRUE, FALSE) starts <- attr(g, "starts") if(length(starts) == length(g)) return(logical(length(g))) .Call(C_setcopyv, .Call(C_alloc, TRUE, length(g), TRUE), starts, FALSE, FALSE, TRUE, TRUE) } fdroplevels <- function(x, ...) UseMethod("fdroplevels") fdroplevels.default <- function(x, ...) { message("Trying to drop levels from an unsupported object: returning object") x } fdroplevels.factor <- function(x, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) clx <- class(x) if(!any(clx == "factor")) stop("x needs to be a factor") .Call(Cpp_fdroplevels, x, !any(clx == "na.included")) } fdroplevels.data.frame <- function(x, ...) { # if(!missing(...)) unused_arg_action(match.call(), ...) res <- duplAttributes(lapply(unattrib(x), function(y) if(is.factor(y)) .Call(Cpp_fdroplevels, y, !inherits(y, "na.included")) else y), x) if(inherits(x, "data.table")) return(alc(res)) res } fdroplevels.list <- function(x, ...) { duplAttributes(lapply(unattrib(x), function(y) if(is.factor(y)) .Call(Cpp_fdroplevels, y, !inherits(y, "na.included")) else y), x) } collapse/R/flm.R0000644000176200001440000001200314777170130013161 0ustar liggesusers # formatcoef <- function(r, X, y) { # if(!is.matrix(r)) dim(r) <- c(length(r), 1L) # `dimnames<-`(r, list(dimnames(X)[[2L]], if(is.matrix(y)) dimnames(y)[[2L]] else NULL)) # } # formatcoef <- function(r, y, X, drop) { # if(is.matrix(r)) return(`dimnames<-`(r, list(dimnames(X)[[2L]], if(is.matrix(y)) dimnames(y)[[2L]] else NULL))) # if(drop) return(name) # ..... # # # list(dim = c(dim(X)[2L], 1L), dimnames = list(dimnames(X)[[2L]], NULL)) # } flm <- function(...) if(is.atomic(..1)) flm.default(...) else flm.formula(...) flm.default <- function(y, X, w = NULL, add.icpt = FALSE, # sparse = FALSE, return.raw = FALSE, # only.coef method = c("lm", "solve", "qr", "arma", "chol", "eigen"), eigen.method = 3L, ...) { if(add.icpt) X <- cbind(`(Intercept)` = 1, X) n <- dim(X)[1L] if(n != NROW(y)) stop("NROW(y) must match nrow(X)") # if(sparse) X <- as(X, "dgCMatrix") # what about y ?? if(length(w)) { if(length(w) != n) stop("w must be numeric and length(w) == nrow(X)") wts <- sqrt(w) if(return.raw) return(switch(method[1L], lm = { z <- .lm.fit(X * wts, y * wts, ...) z$residuals <- z$residuals / wts # This is correct !!! z }, solve = (function(xw) solve(crossprod(xw), crossprod(xw, y * wts), ...))(X * wts), qr = qr.coef(qr(X * wts, ...), y * wts), arma = getenvFUN("RcppArmadillo_fastLmPure")(X * wts, y * wts), # .Call("_RcppArmadillo_fastLm_impl", X * wts, y * wts, PACKAGE = "RcppArmadillo"), chol = (function(xw) chol2inv(chol(crossprod(xw), ...)) %*% crossprod(xw, y * wts))(X * wts), eigen = { z <- getenvFUN("RcppEigen_fastLmPure")(X * wts, y * wts, eigen.method) # .Call("RcppEigen_fastLm_Impl", X * wts, y * wts, eigen.method, PACKAGE = "RcppEigen") z$residuals <- z$residuals / wts # This is correct !!! z$fitted.values <- y - z$residuals z }, stop("Unknown method!"))) ar <- if(is.matrix(y)) list(dim = c(dim(X)[2L], dim(y)[2L]), dimnames = list(dimnames(X)[[2L]], dimnames(y)[[2L]])) else list(dim = c(dim(X)[2L], 1L), dimnames = list(dimnames(X)[[2L]], NULL)) return(`attributes<-`(switch(method[1L], lm = .lm.fit(X * wts, y * wts, ...)[[2L]], solve = (function(xw) solve(crossprod(xw), crossprod(xw, y * wts), ...))(X * wts), qr = qr.coef(qr(`dimnames<-`(X, NULL) * wts, ...), y * wts), arma = getenvFUN("RcppArmadillo_fastLmPure")(X * wts, y * wts)[[1L]], # .Call("_RcppArmadillo_fastLm_impl", X * wts, y * wts, PACKAGE = "RcppArmadillo"), chol = (function(xw) chol2inv(chol(crossprod(xw), ...)) %*% crossprod(xw, y * wts))(X * wts), eigen = getenvFUN("RcppEigen_fastLmPure")(X * wts, y * wts, eigen.method)[[1L]], # .Call("RcppEigen_fastLm_Impl", X * wts, y * wts, eigen.method, PACKAGE = "RcppEigen") stop("Unknown method!")), ar)) } if(return.raw) return(switch(method[1L], lm = .lm.fit(X, y, ...), solve = solve(crossprod(X), crossprod(X, y), ...), qr = qr.coef(qr(X, ...), y), arma = getenvFUN("RcppArmadillo_fastLmPure")(X, y), chol = chol2inv(chol(crossprod(X), ...)) %*% crossprod(X, y), eigen = getenvFUN("RcppEigen_fastLmPure")(X, y, eigen.method), stop("Unknown method!"))) ar <- if(is.matrix(y)) list(dim = c(dim(X)[2L], dim(y)[2L]), dimnames = list(dimnames(X)[[2L]], dimnames(y)[[2L]])) else list(dim = c(dim(X)[2L], 1L), dimnames = list(dimnames(X)[[2L]], NULL)) `attributes<-`(switch(method[1L], lm = .lm.fit(X, y, ...)[[2L]], solve = solve(crossprod(X), crossprod(X, y), ...), qr = qr.coef(qr(`dimnames<-`(X, NULL), ...), y), arma = getenvFUN("RcppArmadillo_fastLmPure")(X, y)[[1L]], chol = chol2inv(chol(crossprod(X), ...)) %*% crossprod(X, y), eigen = getenvFUN("RcppEigen_fastLmPure")(X, y, eigen.method)[[1L]], stop("Unknown method!")), ar) # if(!return.raw) return(switch(method[1L], solve = formatcoef(res$coefficients, X, y), res$coefficients)) # res } flm.formula <- function(formula, data = NULL, weights = NULL, add.icpt = TRUE, ...) { w <- substitute(weights) tms <- attributes(terms.formula(formula, data = data)) pe <- tms[[".Environment"]] mf <- eval(tms$variables, data, pe) y <- mf[[1L]] X <- mf[-1L] if(length(w)) w <- eval(w, data, pe) names(X) <- tms$term.labels if(add.icpt) X <- c(list(`(Intercept)` = alloc(1, NROW(y))), X) # y could be matrix flm.default(y, do.call(cbind, X), w, FALSE, ...) } # Slower than using chol2inv (discarded) # lmchol2 <- function(X, y) { # ch <- chol(crossprod(X)) # backsolve(ch, forwardsolve(ch, crossprod(X, y), upper = TRUE, trans = TRUE)) # } collapse/R/fmode.R0000644000176200001440000001170314777170130013503 0ustar liggesusers # Note: for principal innovations of this code see fsum.R fmode <- function(x, ...) UseMethod("fmode") # , x fmode.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ties = "first", nthreads = .op[["nthreads"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fmode.matrix(x, g, w, TRA, na.rm, use.g.names, ties = ties, nthreads = nthreads, ...)) r <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fmode,x,g,w,na.rm,r,nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names) names(res) <- GRPnames(g, FALSE) return(res) } TRAC(x,res,g[[2L]],TRA, ...) } fmode.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "first", nthreads = .op[["nthreads"]], ...) { r <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fmodem,x,g,w,na.rm,drop,r,nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names) dimnames(res)[[1L]] <- GRPnames(g) return(res) } TRAmC(x,res,g[[2L]],TRA, ...) } fmode.zoo <- function(x, ...) if(is.matrix(x)) fmode.matrix(x, ...) else fmode.default(x, ...) fmode.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fmode.matrix(x, ...), x) else fmode.default(x, ...) fmode.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "first", nthreads = .op[["nthreads"]], ...) { r <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fmodel,x,g,w,na.rm,r,nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(if(drop) unlist(res) else res) if(use.g.names && !inherits(x, "data.table") && length(gn <- GRPnames(g))) attr(res, "row.names") <- gn return(res) } TRAlC(x,res,g[[2L]],TRA, ...) } fmode.list <- function(x, ...) fmode.data.frame(x, ...) fmode.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ties = "first", nthreads = .op[["nthreads"]], ...) { r <- switch(ties, first = 0L, min = 1L, max = 2L, last = 3L, stop("Unknown ties option: ", ties)) g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(C_fmodel,x[-gn],g,w,na.rm,r,nthreads)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(C_fmodel,x[-gn],g,w,na.rm,r,nthreads)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fmodel,x,g,w,na.rm,r,nthreads)), ax)) } else return(setAttributes(.Call(C_fmodel,x,g,w,na.rm,r,nthreads), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(C_fmodel,x[-gn],g,w,na.rm,r,nthreads),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fmodel,x[-gn],g,w,na.rm,r,nthreads),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fmodel,x,g,w,na.rm,r,nthreads),g[[2L]],TRA, ...)) } collapse/R/quick_conversion.R0000644000176200001440000002320714777170130015774 0ustar liggesusers qDF <- function(X, row.names.col = FALSE, keep.attr = FALSE, class = "data.frame") { if(is.atomic(X)) { d <- dim(X) ld <- length(d) if(ld > 1L) { if(ld > 2L) { dn <- dimnames(X) dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } if(!isFALSE(row.names.col) && length(force(dn <- dimnames(X))[[1L]])) { res <- c(list(dn[[1L]]), .Call(Cpp_mctl, X, FALSE, 0L)) names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", dn[[2L]]) attr(res, "row.names") <- .set_row_names(length(dn[[1L]])) } else res <- .Call(Cpp_mctl, X, TRUE, 1L) oldClass(res) <- if(length(class)) class else "data.frame" if(!keep.attr) return(res) ax <- attributes(X) axoth <- names(ax) %!in% c("dim", "dimnames", "class") if(any(axoth)) return(addAttributes(res, ax[axoth])) else return(res) } nam <- names(X) if(is.null(nam) || isFALSE(row.names.col)) { if(is.null(nam)) { res <- `names<-`(list(X), l1orlst(as.character(substitute(X)))) attr(res, "row.names") <- .set_row_names(length(X)) } else { res <- `names<-`(list(`names<-`(X, NULL)), l1orlst(as.character(substitute(X)))) attr(res, "row.names") <- nam } } else { res <- list(nam, `names<-`(X, NULL)) names(res) <- if(length(row.names.col) == 2L) row.names.col else c( if(is.character(row.names.col)) row.names.col[1L] else "row.names", l1orlst(as.character(substitute(X)))) attr(res, "row.names") <- .set_row_names(length(X)) } return(`oldClass<-`(res, if(length(class)) class else "data.frame")) } if(keep.attr) { # if(all(class(X) == class)) return(X) # better adjust rows ? -> yes, row.names.col should always work ! if(is.null(attr(X, "names"))) attr(X, "names") <- paste0("V", seq_along(unclass(X))) if(is.null(attr(X, "row.names"))) { attr(X, "row.names") <- .set_row_names(fnrow(X)) } else if(!isFALSE(row.names.col)) { ax <- attributes(X) X <- c(list(ax[["row.names"]]), X) ax[["row.names"]] <- .set_row_names(.Call(C_fnrow, X)) # this is ok, X is a list ... ax[["names"]] <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", ax[["names"]]) setattributes(X, ax) } if(length(class)) return(`oldClass<-`(X, class)) if(inherits(X, "data.frame")) return(X) return(`oldClass<-`(X, "data.frame")) } nam <- attr(X, "names") rn <- attr(X, "row.names") attributes(X) <- NULL if(is.null(nam)) nam <- paste0("V", seq_along(X)) if(is.null(rn) || is.numeric(rn)) { rn <- .set_row_names(.Call(C_fnrow, X)) } else if(!isFALSE(row.names.col)) { X <- c(list(rn), X) rn <- .set_row_names(.Call(C_fnrow, X)) nam <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", nam) } # slower: !! # setAttributes(X, pairlist(names = nam, row.names = rn, class = if(length(class)) class else "data.frame")) names(X) <- nam attr(X, "row.names") <- rn # This can be inefficient for large data.frames if character rn !! oldClass(X) <- if(length(class)) class else "data.frame" X } qDT_raw <- function(X, row.names.col, keep.attr, DT_class, X_nam) { if(is.atomic(X)) { d <- dim(X) ld <- length(d) if(ld > 1L) { if(ld > 2L) { dn <- dimnames(X) dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } if(!isFALSE(row.names.col) && length(force(dn <- dimnames(X))[[1L]])) { res <- c(list(dn[[1L]]), .Call(Cpp_mctl, X, FALSE, 0L)) names(res) <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", dn[[2L]]) attr(res, "row.names") <- .set_row_names(length(dn[[1L]])) } else res <- .Call(Cpp_mctl, X, TRUE, 2L) oldClass(res) <- DT_class if(!keep.attr) return(res) ax <- attributes(X) axoth <- names(ax) %!in% c("dim", "dimnames", "class") return(if(any(axoth)) addAttributes(res, ax[axoth]) else res) } if(isFALSE(row.names.col) || is.null(nam <- names(X))) { res <- `names<-`(list(X), X_nam) } else { res <- list(nam, `names<-`(X, NULL)) names(res) <- if(length(row.names.col) == 2L) row.names.col else c( if(is.character(row.names.col)) row.names.col[1L] else "row.names", X_nam) } attr(res, "row.names") <- .set_row_names(length(X)) return(`oldClass<-`(res, DT_class)) } if(keep.attr) { # if(all(class(X) == DT_class)) return(X) # better adjust rows ? -> yes, row.names.col should always work ! if(is.null(attr(X, "names"))) attr(X, "names") <- paste0("V", seq_along(unclass(X))) if(!isFALSE(row.names.col) && length(rn <- attr(X, "row.names"))) { ax <- attributes(X) X <- c(list(rn), X) ax[["names"]] <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", ax[["names"]]) setattributes(X, ax) } if(!length(DT_class) && inherits(X, c("data.table", "data.frame"))) return(X) attr(X, "row.names") <- .set_row_names(fnrow(X)) } else { nam <- attr(X, "names") rncol <- !isFALSE(row.names.col) && length(rn <- attr(X, "row.names")) attributes(X) <- NULL if(is.null(nam)) nam <- paste0("V", seq_along(X)) if(rncol) { X <- c(list(rn), X) nam <- c(if(is.character(row.names.col)) row.names.col[1L] else "row.names", nam) } names(X) <- nam attr(X, "row.names") <- .set_row_names(.Call(C_fnrow, X)) } return(`oldClass<-`(X, DT_class)) } qDT <- function(X, row.names.col = FALSE, keep.attr = FALSE, class = c("data.table", "data.frame")) { alc(qDT_raw(X, row.names.col, keep.attr, if(length(class) || keep.attr) class else c("data.table", "data.frame"), if(is.atomic(X) && !is.matrix(X)) l1orlst(as.character(substitute(X))) else NULL)) } qTBL <- function(X, row.names.col = FALSE, keep.attr = FALSE, class = c("tbl_df", "tbl", "data.frame")) { qDT_raw(X, row.names.col, keep.attr, if(length(class) || keep.attr) class else c("tbl_df", "tbl", "data.frame"), if(is.atomic(X) && !is.matrix(X)) l1orlst(as.character(substitute(X))) else NULL) } qM <- function(X, row.names.col = NULL, keep.attr = FALSE, class = NULL, sep = ".") { if(keep.attr) { if(is.atomic(X)) { if(length(class)) oldClass(X) <- class if(is.matrix(X)) return(X) if(is.array(X)) { d <- dim(X) dn <- dimnames(X) dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:length(d)) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } else { nam <- l1orlst(as.character(substitute(X))) # needed before X is changed !! dim(X) <- c(length(X), 1L) dimnames(X) <- list(names(X), nam) names(X) <- NULL # if(is.object(X)) oldClass(X) <- NULL Necessary ? Can also have factor or date matrices. Check this ! # -> qM(wlddev$date, TRUE) is a vector !! } return(X) } ax <- attributes(X) if(length(row.names.col)) { rnc <- cols2int(row.names.col, X, ax[["names"]]) res <- do.call(cbind, .subset(X, -rnc)) dimnames(res)[[1L]] <- if(length(rnc) == 1L) .subset2(X, rnc) else do.call(paste, c(.subset(X, rnc), list(sep = sep))) } else { res <- do.call(cbind, X) rn <- ax[["row.names"]] if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) dimnames(res) <- list(rn, ax[["names"]]) } if(length(class)) oldClass(res) <- class axoth <- names(ax) %!in% c("names", "row.names", "class") if(any(axoth)) return(addAttributes(res, ax[axoth])) return(res) } if(is.atomic(X)) { if(!is.array(X)) { r <- matrix(X, ncol = 1, dimnames = list(names(X), l1orlst(as.character(substitute(X))))) if(is.null(class)) return(r) else return(`oldClass<-`(r, class)) } d <- dim(X) dn <- dimnames(X) attributes(X) <- NULL ld <- length(d) if(ld == 2L) { # setattributes(X, pairlist(dim = d, dimnames = dn)) # Not faster ! dim(X) <- d dimnames(X) <- dn } else { dim(X) <- c(d[1L], bprod(d[-1L])) if(length(dn)) { for (i in 2L:ld) if(is.null(dn[[i]])) dn[[i]] <- seq_len(d[i]) dimnames(X) <- list(dn[[1L]], interact_names(dn[-1L])) # Good? } } if(length(class)) oldClass(X) <- class return(X) } if(length(row.names.col)) { rnc <- cols2int(row.names.col, X, attr(X, "names")) res <- do.call(cbind, .subset(X, -rnc)) if(is.object(res)) attributes(res) <- attributes(res)[c("dim", "dimnames")] dimnames(res)[[1L]] <- if(length(rnc) == 1L) .subset2(X, rnc) else do.call(paste, c(.subset(X, rnc), list(sep = sep))) } else { rn <- attr(X, "row.names") res <- do.call(cbind, X) if(is.object(res)) attributes(res) <- attributes(res)[c("dim", "dimnames")] # if X is list of time-series, do.call(cbind, X) creates ts-matrix. if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) dimnames(res) <- list(rn, attr(X, "names")) } if(length(class)) oldClass(res) <- class res } # Same speed # tf1 <- function(res) { # res <- do.call(cbind, res) # if(is.object(res)) attributes(res) <- attributes(res)[c("dim", "dimnames")] # res # } # # tf2 <- function(res) { # res <- do.call(cbind, res) # if(is.object(res)) setAttributes(res, attributes(res)[c("dim", "dimnames")]) # } collapse/R/BY.R0000644000176200001440000004450415056572047012734 0ustar liggesusers BY <- function(x, ...) UseMethod("BY") BY.default <- function(x, g, FUN, ..., use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "vector", "list")) { # If matrix, dispatch to matrix method # if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("BY", unclass(x))) if(!is.function(FUN)) FUN <- match.fun(FUN) aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply simplify <- switch(return[1L], same = 1L, vector = 2L, list = 3L, stop("BY.default only supports same, vector and list output!")) g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE) # Computing result: unsimplified if(!missing(...) && g[[1L]] > 1L && length(ln <- whichv(vlengths(dots <- list(...), FALSE), length(x)))) { asl <- lapply(dots[ln], gsplit, g) if(length(dots) > length(ln)) { mord <- dots[-ln] if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.") } else mord <- NULL res <- .mapply(FUN, c(list(gsplit(x, g)), asl), mord) } else res <- aplyfun(gsplit(x, g), FUN, ...) # Returning raw or wide result if(simplify == 3L || expand.wide) { if(use.g.names) names(res) <- GRPnames(g, FALSE) if(simplify == 3L) return(res) return(do.call(rbind, res)) } # If using names and function also assigns names e.g. quantile() if(use.g.names && length(names(res[[1L]]))) { names(res) <- GRPnames(g, FALSE) res <- unlist(res, recursive = FALSE, use.names = TRUE) if(reorder && length(res) == length(g[[2L]]) && !isTRUE(g$ordered[2L])) warning("result is same length as x but the grouping is not sorted and the function used added names. Thus BY cannot decisively distinguish whether you are using a transformation function like scale() or a summary function like quantile() that computes a vector of statistics. The latter is assumed and the result is not reordered. To receive reordered output without constructed names set use.g.names = FALSE") } else { # Function does not assign names... or not using group names... res <- funlist(res) if(length(res) == g[[1L]]) { if(use.g.names) names(res) <- GRPnames(g, FALSE) } else if(length(res) == length(g[[2L]])) { if(reorder) res <- .Call(C_greorder, res, g) if(length(names(x)) && (reorder || isTRUE(g$ordered[2L]))) # Making sure we don't assign wrong names.. names(res) <- names(x) } } if(simplify == 1L) return(copyMostAttributes(res, x)) # here needs to be copyMostAttributes... otherwise overwrites names res } # Experimental: But not really faster and also risky because vapply checks types and types may differ... # copysplaplfun <- function(x, g, FUN, ...) { # sx <- gsplit(x, g) # if(length(sx) > 100000L && length(r1 <- match.fun(FUN)(sx[[1L]], ...)) == 1L) # return(copyMostAttributes(vapply(sx, FUN, r1, ..., USE.NAMES = FALSE), x)) # copyMostAttributes(funlist(lapply(sx, FUN, ...)), x) # } copysplaplfun <- function(x, g, FUN, ...) copyMostAttributes(funlist(lapply(gsplit(x, g), FUN, ...)), x) copysplmaplfun <- function(x, g, FUN, asl, mord) copyMostAttributes(funlist(.mapply(FUN, c(list(gsplit(x, g)), asl), mord)), x) splaplfun <- function(x, g, FUN, ...) funlist(lapply(gsplit(x, g), FUN, ...)) splmaplfun <- function(x, g, FUN, asl, mord) funlist(.mapply(FUN, c(list(gsplit(x, g)), asl), mord)) BY.data.frame <- function(x, g, FUN, ..., use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) { if(!is.list(x)) stop("x needs to be a list") if(!is.function(FUN)) FUN <- match.fun(FUN) aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply return <- switch(return[1L], same = 1L, matrix = 3L, data.frame = 2L, list = 0L, stop("Unknown return option!")) g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE) n <- length(g[[2L]]) if(!missing(...) && g[[1L]] > 1L && length(ln <- whichv(vlengths(dots <- list(...), FALSE), n))) { asl <- lapply(dots[ln], gsplit, g) if(length(dots) > length(ln)) { mord <- dots[-ln] if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.") } else mord <- NULL multi <- TRUE } else multi <- FALSE # Just plain list output if(return == 0L) { if(multi) { if(expand.wide) return(aplyfun(x, function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord)))) return(aplyfun(x, function(y) .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord))) } if(expand.wide) return(aplyfun(x, function(y) do.call(rbind, lapply(gsplit(y, g, use.g.names), FUN, ...)))) return(aplyfun(x, function(y) lapply(gsplit(y, g, use.g.names), FUN, ...))) } ax <- attributes(x) # Wider output (for multiple summary statistics like quantile()) if(expand.wide) { if(return < 3L) { # Return a data.frame splitfun <- if(multi) function(y) .Call(Cpp_mctl, do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)), TRUE, 0L) else function(y) .Call(Cpp_mctl, do.call(rbind, lapply(gsplit(y, g), FUN, ...)), TRUE, 0L) res <- unlist(aplyfun(x, splitfun), recursive = FALSE, use.names = TRUE) if(return == 1L) { isDTl <- inherits(x, "data.table") ax[["names"]] <- names(res) ax[["row.names"]] <- if(use.g.names && !isDTl && length(gn <- GRPnames(g))) gn else .set_row_names(.Call(C_fnrow, res)) } else { isDTl <- FALSE ax <- list(names = names(res), row.names = if(use.g.names && length(gn <- GRPnames(g))) gn else .set_row_names(.Call(C_fnrow, res)), class = "data.frame") } return(condalcSA(res, ax, isDTl)) } else { # Return a matrix attributes(x) <- NULL splitfun <- if(multi) function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)) else function(y) do.call(rbind, lapply(gsplit(y, g), FUN, ...)) res <- do.call(cbind, aplyfun(x, splitfun)) cn <- dimnames(res)[[2L]] namr <- rep(ax[["names"]], each = ncol(res)/length(x)) dimnames(res) <- list(if(use.g.names) GRPnames(g) else NULL, if(length(cn)) paste(namr, cn, sep = ".") else namr) return(res) } } # No expand wide (classical result) matl <- return == 3L isDTl <- !matl && return != 2L && inherits(x, "data.table") # is data table and return data.table rownam <- ax[["row.names"]] attributes(x) <- NULL # Returning plain data frame if(return == 2L) ax <- list(names = ax[["names"]], row.names = rownam, class = "data.frame") # Using group names... if(use.g.names && !isDTl && !is.null(g$groups)) { res <- vector("list", length(x)) res1 <- if(multi) .mapply(FUN, c(list(gsplit(x[[1L]], g)), asl), mord) else lapply(gsplit(x[[1L]], g), FUN, ...) if(length(names(res1[[1L]]))) { # We apply a function that assigns names (e.g. quantile()) names(res1) <- GRPnames(g) res1 <- unlist(res1, recursive = FALSE, use.names = TRUE) rn <- names(res1) names(res1) <- NULL if(reorder && length(res1) == n && !isTRUE(g$ordered[2L])) { warning("nrow(result) is same as nrow(x) but the grouping is not sorted and the function used added names. Thus BY cannot decisively distinguish whether you are using a transformation function like scale() or a summary function like quantile() that computes a vector of statistics. The latter is assumed and the result is not reordered. To receive reordered output without constructed names set use.g.names = FALSE") reorder <- FALSE } } else { # function doesn't assign names, different options. res1 <- funlist(res1) if(length(res1) == g[[1L]]) rn <- GRPnames(g) else if(matl) { rn <- if(length(res1) == n && is.character(rownam) && rownam[1L] != "1" && (reorder || isTRUE(g$ordered[2L]))) rownam else NULL } else { # Important to check length(rn) below (simply keeps ax[["row.names"]]) rn <- if(length(res1) != n || !(reorder || isTRUE(g$ordered[2L]))) .set_row_names(length(res1)) else NULL } } # Finish computing results... if(matl) { res[[1L]] <- res1 if(length(res) > 1L) res[-1L] <- if(multi) aplyfun(x[-1L], splmaplfun, g, FUN, asl, mord) else aplyfun(x[-1L], splaplfun, g, FUN, ...) res <- do.call(cbind, res) } else { res[[1L]] <- copyMostAttributes(res1, x[[1L]]) if(length(res) > 1L) res[-1L] <- if(multi) aplyfun(x[-1L], copysplmaplfun, g, FUN, asl, mord) else aplyfun(x[-1L], copysplaplfun, g, FUN, ...) } # Not using group names... } else { if(matl) { res <- if(multi) do.call(cbind, aplyfun(x, splmaplfun, g, FUN, asl, mord)) else do.call(cbind, aplyfun(x, splaplfun, g, FUN, ...)) rn <- if(nrow(res) == n && is.character(rownam) && rownam[1L] != "1" && (reorder || isTRUE(g$ordered[2L]))) rownam else NULL } else { res <- if(multi) aplyfun(x, copysplmaplfun, g, FUN, asl, mord) else aplyfun(x, copysplaplfun, g, FUN, ...) # isDTL ? -> Not needed as data.tables cannot have character row-names anyway. rn <- if(.Call(C_fnrow, res) != n || !(reorder || isTRUE(g$ordered[2L]))) .set_row_names(.Call(C_fnrow, res)) else NULL } } # reorder result if necessary, without dimnames... if(reorder && fnrow(res) == n && !isTRUE(g$ordered[2L])) { ind <- .Call(C_greorder, seq_len(n), g) res <- if(matl) res[ind, , drop = FALSE] else .Call(C_subsetDT, res, ind, seq_along(res), FALSE) } if(matl) { dimnames(res) <- list(rn, ax[["names"]]) return(res) } if(length(rn)) ax[["row.names"]] <- rn return(condalcSA(res, ax, isDTl)) } BY.list <- function(x, ...) BY.data.frame(x, ...) BY.matrix <- function(x, g, FUN, ..., use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) { if(!is.matrix(x)) stop("x needs to be a matrix") if(!is.function(FUN)) FUN <- match.fun(FUN) aplyfun <- if(parallel) function(...) parallel::mclapply(..., mc.cores = mc.cores) else lapply return <- switch(return[1L], same = 3L, matrix = 2L, data.frame = 1L, list = 0L, stop("Unknown return option!")) g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE) n <- nrow(x) if(!missing(...) && g[[1L]] > 1L && length(ln <- whichv(vlengths(dots <- list(...), FALSE), n))) { asl <- lapply(dots[ln], gsplit, g) if(length(dots) > length(ln)) { mord <- dots[-ln] if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.") } else mord <- NULL multi <- TRUE } else multi <- FALSE # Just plain list output if(return == 0L) { xln <- .Call(Cpp_mctl, x, TRUE, 0L) # Named list from matrix if(multi) { if(expand.wide) return(aplyfun(xln, function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord)))) return(aplyfun(xln, function(y) .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord))) } if(expand.wide) return(aplyfun(xln, function(y) do.call(rbind, lapply(gsplit(y, g, use.g.names), FUN, ...)))) return(aplyfun(xln, function(y) lapply(gsplit(y, g, use.g.names), FUN, ...))) } # Wider output (for multiple summary statistics like quantile()) if(expand.wide) { if(return == 1L) { # Return data frame splitfun <- if(multi) function(y) .Call(Cpp_mctl, do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)), TRUE, 0L) else function(y) .Call(Cpp_mctl, do.call(rbind, lapply(gsplit(y, g), FUN, ...)), TRUE, 0L) res <- unlist(aplyfun(.Call(Cpp_mctl, x, TRUE, 0L), splitfun), recursive = FALSE, use.names = TRUE) ax <- list(names = names(res), row.names = if(use.g.names && length(gn <- GRPnames(g))) gn else .set_row_names(.Call(C_fnrow, res)), class = "data.frame") } else { # Return a matrix splitfun2 <- if(multi) function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)) else function(y) do.call(rbind, lapply(gsplit(y, g), FUN, ...)) res <- do.call(cbind, aplyfun(.Call(Cpp_mctl, x, FALSE, 0L), splitfun2)) cn <- dimnames(res)[[2L]] namr <- rep(dimnames(x)[[2L]], each = ncol(res)/ncol(x)) dn <- list(if(use.g.names) GRPnames(g) else NULL, if(length(cn)) paste(namr, cn, sep = ".") else namr) if(return == 2L) return(`dimnames<-`(res, dn)) ax <- attributes(x) ax[["dim"]] <- dim(res) ax[["dimnames"]] <- dn } return(setAttributes(res, ax)) } dn <- dimnames(x) matl <- return > 1L xl <- .Call(Cpp_mctl, x, FALSE, 0L) # Plain list from matrix columns # No expand wide (classical result) if(use.g.names && !is.null(g$groups)) { res <- vector("list", length(xl)) res1 <- if(multi) .mapply(FUN, c(list(gsplit(xl[[1L]], g)), asl), mord) else lapply(gsplit(xl[[1L]], g), FUN, ...) if(length(names(res1[[1L]]))) { # We apply a function that assigns names (e.g. quantile()) names(res1) <- GRPnames(g) res1 <- unlist(res1, recursive = FALSE, use.names = TRUE) rn <- names(res1) names(res1) <- NULL if(reorder && length(res1) == n && !isTRUE(g$ordered[2L])) { warning("nrow(result) is same as nrow(x) but the grouping is not sorted and the function used added names. Thus BY cannot decisively distinguish whether you are using a transformation function like scale() or a summary function like quantile() that computes a vector of statistics. The latter is assumed and the result is not reordered. To receive reordered output without constructed names set use.g.names = FALSE") reorder <- FALSE } } else { # function doesn't assign names, different options. res1 <- funlist(res1) rn <- if(length(res1) == g[[1L]]) GRPnames(g) else if(length(res1) == n && (reorder || isTRUE(g$ordered[2L]))) dn[[1L]] else NULL } # Finish computing results... res[[1L]] <- res1 if(length(res) > 1L) res[-1L] <- if(multi) aplyfun(xl[-1L], splmaplfun, g, FUN, asl, mord) else aplyfun(xl[-1L], splaplfun, g, FUN, ...) if(matl) { # Return a matrix res <- do.call(cbind, res) dn <- list(rn, dn[[2L]]) } } else { # Not using group names res <- if(multi) aplyfun(xl, splmaplfun, g, FUN, asl, mord) else aplyfun(xl, splaplfun, g, FUN, ...) if(matl) { # Return matrix res <- do.call(cbind, res) if(nrow(res) != n || !(reorder || isTRUE(g$ordered[2L]))) dn <- list(NULL, dn[[2L]]) } else { # Return data frame rn <- if(.Call(C_fnrow, res) == n && (reorder || isTRUE(g$ordered[2L]))) dn[[1L]] else NULL } } # reorder result if necessary, without dimnames... if(reorder && fnrow(res) == n && !isTRUE(g$ordered[2L])) { ind <- .Call(C_greorder, seq_len(n), g) res <- if(matl) res[ind, , drop = FALSE] else .Call(C_subsetDT, res, ind, seq_along(res), FALSE) } if(matl) { if(return == 2L) return(`dimnames<-`(res, dn)) ax <- attributes(x) ax[["dim"]] <- dim(res) ax[["dimnames"]] <- dn } else { # Returning a data.frame ax <- list(names = dn[[2L]], row.names = if(length(rn)) rn else .set_row_names(.Call(C_fnrow, res)), class = "data.frame") } return(setAttributes(res, ax)) } BY.grouped_df <- function(x, FUN, ..., reorder = TRUE, keep.group_vars = TRUE, use.g.names = FALSE) { g <- GRP.grouped_df(x, call = FALSE) gn <- which(attr(x, "names") %in% g[[5L]]) res <- BY.data.frame(if(length(gn)) fcolsubset(x, -gn) else x, g, FUN, ..., reorder = reorder, use.g.names = use.g.names) # Other return options if(!is.data.frame(res)) return(res) n <- fnrow(res) same_size <- n == fnrow(x) if(!same_size && is.null(g[[4L]])) keep.group_vars <- FALSE # Not preserving grouping variable or same size and no grouping variables: return appropriate object if(!keep.group_vars || (same_size && length(gn) == 0L)) return(if(same_size && (reorder || isTRUE(g$ordered[2L]))) res else fungroup(res)) # If same size, with grouping variables... if(same_size) { if(!(reorder || isTRUE(g$ordered[2L]))) return(fungroup(res)) ar <- attributes(res) ar[["names"]] <- c(g[[5L]], ar[["names"]]) return(condalcSA(c(.subset(x, gn), res), ar, any(ar$class == "data.table"))) } # If other size or no groups if(n != g[[1L]]) { if(is.null(g[[4L]])) return(fungroup(res)) len <- n / g[[1L]] if(len != as.integer(len)) stop("length of output (", n, ") is not a multiple of the number of groups: ", g[[1L]]) g[[4L]] <- .Call(C_subsetDT, g[[4L]], rep(seq_len(g[[1L]]), each = len), seq_along(g[[5L]]), FALSE) } # Aggregation ar <- attributes(fungroup2(res, oldClass(res))) ar[["names"]] <- c(g[[5L]], ar[["names"]]) condalcSA(c(g[[4L]], res), ar, any(ar$class == "data.table")) } BY.zoo <- function(x, ...) if(is.matrix(x)) BY.matrix(x, ...) else BY.default(x, ...) BY.units <- BY.zoo # return = "same" preserves attributes by default collapse/R/varying.R0000644000176200001440000001541714777170130014076 0ustar liggesusers varying <- function(x, ...) UseMethod("varying") # , x varying.default <- function(x, g = NULL, any_group = TRUE, use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(varying.matrix(x, g, any_group, use.g.names, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_varying,x,0L,0L,any_group)) if(is.atomic(g)) { if(use.g.names && !any_group) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE, sort = !any_group && .op[["sort"]]) lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_varying,x,length(lev),g,any_group), lev)) } if(is.nmfactor(g)) return(.Call(Cpp_varying,x,fnlevels(g),g,any_group)) g <- qG(g, na.exclude = FALSE, sort = !any_group && .op[["sort"]]) return(.Call(Cpp_varying,x,attr(g,"N.groups"),g,any_group)) } if(!is_GRP(g)) g <- GRP.default(g, sort = !any_group && .op[["sort"]], return.groups = use.g.names && !any_group, call = FALSE) if(use.g.names && !any_group) return(`names<-`(.Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group), GRPnames(g))) .Call(Cpp_varying,x,g[[1L]],g[[2L]],any_group) } varying.pseries <- function(x, effect = 1L, any_group = TRUE, use.g.names = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- if(length(effect) == 1L) .subset2(findex(x), effect) else finteraction(.subset(findex(x), effect), sort = !any_group && .op[["sort"]]) if(!any_group && use.g.names) { lev <- attr(g, "levels") return(`names<-`(.Call(Cpp_varying,x,length(lev),g,any_group), lev)) } .Call(Cpp_varying,x,fnlevels(g),g,any_group) } varying.matrix <- function(x, g = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_varyingm,x,0L,0L,any_group,drop)) if(is.atomic(g)) { if(use.g.names && !any_group) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE, sort = !any_group && .op[["sort"]]) lev <- attr(g, "levels") return(`dimnames<-`(.Call(Cpp_varyingm,x,length(lev),g,any_group,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(Cpp_varyingm,x,fnlevels(g),g,any_group,drop)) g <- qG(g, na.exclude = FALSE, sort = !any_group && .op[["sort"]]) return(.Call(Cpp_varyingm,x,attr(g,"N.groups"),g,any_group,drop)) } if(!is_GRP(g)) g <- GRP.default(g, sort = !any_group && .op[["sort"]], return.groups = use.g.names && !any_group, call = FALSE) if(use.g.names && !any_group) return(`dimnames<-`(.Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) .Call(Cpp_varyingm,x,g[[1L]],g[[2L]],any_group,drop) } varying.zoo <- function(x, ...) if(is.matrix(x)) varying.matrix(x, ...) else varying.default(x, ...) varying.units <- varying.zoo varying.data.frame <- function(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by)) { nam <- attr(x, "names") if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- if(is.null(cols)) seq_along(unclass(x))[-gn] else cols2int(cols, x, nam, FALSE) } by <- if(length(gn) == 1L) .subset2(x, gn) else GRP.default(x, gn, sort = !any_group && .op[["sort"]], return.groups = use.g.names && !any_group, call = FALSE) x <- fcolsubset(x, cols) } else if(length(cols)) x <- colsubset(x, cols) if(is.null(by)) return(.Call(Cpp_varyingl,x,0L,0L,any_group,drop)) if(is.atomic(by)) { if(use.g.names && !any_group && !inherits(x, "data.table")) { if(!is.nmfactor(by)) by <- qF(by, na.exclude = FALSE, sort = !any_group && .op[["sort"]]) lev <- attr(by, "levels") return(setRnDF(.Call(Cpp_varyingl,x,length(lev),by,any_group,FALSE), lev)) } if(is.nmfactor(by)) return(.Call(Cpp_varyingl,x,fnlevels(by),by,any_group,drop)) by <- qG(by, na.exclude = FALSE, sort = !any_group && .op[["sort"]]) return(.Call(Cpp_varyingl,x,attr(by,"N.groups"),by,any_group,drop)) } if(!is_GRP(by)) by <- GRP.default(by, sort = !any_group && .op[["sort"]], return.groups = use.g.names && !any_group, call = FALSE) if(use.g.names && !any_group && !inherits(x, "data.table") && length(groups <- GRPnames(by))) return(setRnDF(.Call(Cpp_varyingl,x,by[[1L]],by[[2L]],any_group,FALSE), groups)) .Call(Cpp_varyingl,x,by[[1L]],by[[2L]],any_group,drop) } varying.list <- function(x, ...) varying.data.frame(x, ...) varying.pdata.frame <- function(x, effect = 1L, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- unclass(findex(x)) g <- if(length(effect) == 1L) index[[effect]] else finteraction(index[effect], sort = !any_group && .op[["sort"]]) x <- if(is.null(cols)) fcolsubset(x, attr(x, "names") %!in% names(index[effect])) else colsubset(x, cols) res <- if(!any_group && use.g.names) { lev <- attr(g, "levels") setRnDF(.Call(Cpp_varyingl,x,length(lev),g,any_group,FALSE), lev) } else .Call(Cpp_varyingl,x,fnlevels(g),g,any_group,drop) return(if(any_group) res else unindex_light(res)) } varying.grouped_df <- function(x, any_group = TRUE, use.g.names = FALSE, drop = TRUE, keep.group_vars = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) nam <- attr(x, "names") ngn <- nam %!in% g[[5L]] if(any_group) { if(!all(ngn)) x <- if(drop) .subset(x, ngn) else fcolsubset(x, ngn) return(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],any_group,drop)) } if(is.null(g[[4L]])) keep.group_vars <- FALSE ax <- attributes(x) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(!all(ngn)) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[ngn]) return(setAttributes(c(g[[4L]],.Call(Cpp_varyingl,.subset(x, ngn),g[[1L]],g[[2L]],FALSE,FALSE)), ax)) } ax[["names"]] <- nam[ngn] return(setAttributes(.Call(Cpp_varyingl,.subset(x, ngn),g[[1L]],g[[2L]],FALSE,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],FALSE,FALSE)), ax)) } else return(setAttributes(.Call(Cpp_varyingl,x,g[[1L]],g[[2L]],FALSE,FALSE), ax)) } varying.sf <- function(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) { clx <- oldClass(x) oldClass(x) <- NULL x[[attr(x, "sf_column")]] <- NULL oldClass(x) <- clx[clx != "sf"] if(any(clx == "grouped_df")) return(varying.grouped_df(x, any_group, use.g.names, drop, ...)) varying.data.frame(x, by, cols, any_group, use.g.names, drop, ...) } collapse/R/ffirst.R0000644000176200001440000001312614777170130013707 0ustar liggesusers # Note: for foundational changes to this code see fsum.R ffirst <- function(x, ...) UseMethod("ffirst") # , x ffirst.default <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(ffirst.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_ffirst,x,0L,0L,NULL,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_ffirst,x,length(lev),g,NULL,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_ffirst,x,fnlevels(g),g,NULL,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_ffirst,x,attr(g,"N.groups"),g,NULL,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_ffirst,x,g[[1L]],g[[2L]],g[[8L]],na.rm), GRPnames(g))) return(.Call(C_ffirst,x,g[[1L]],g[[2L]],g[[8L]],na.rm)) } if(is.null(g)) return(TRAC(x,.Call(C_ffirst,x,0L,0L,NULL,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_ffirst,x,g[[1L]],g[[2L]],g$group.starts,na.rm),g[[2L]],TRA, ...) } ffirst.matrix <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_ffirstm,x,0L,0L,NULL,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_ffirstm,x,length(lev),g,NULL,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_ffirstm,x,fnlevels(g),g,NULL,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_ffirstm,x,attr(g,"N.groups"),g,NULL,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_ffirstm,x,g[[1L]],g[[2L]],g[[8L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_ffirstm,x,g[[1L]],g[[2L]],g[[8L]],na.rm,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(C_ffirstm,x,0L,0L,NULL,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_ffirstm,x,g[[1L]],g[[2L]],g$group.starts,na.rm,FALSE),g[[2L]],TRA, ...) } ffirst.zoo <- function(x, ...) if(is.matrix(x)) ffirst.matrix(x, ...) else ffirst.default(x, ...) ffirst.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(ffirst.matrix(x, ...), x) else ffirst.default(x, ...) ffirst.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) if(drop) return(unlist(.Call(C_ffirstl,x,0L,0L,NULL,na.rm))) else return(.Call(C_ffirstl,x,0L,0L,NULL,na.rm)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_ffirstl,x,length(lev),g,NULL,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_ffirstl,x,fnlevels(g),g,NULL,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_ffirstl,x,attr(g,"N.groups"),g,NULL,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_ffirstl,x,g[[1L]],g[[2L]],g[[8L]],na.rm), groups)) return(.Call(C_ffirstl,x,g[[1L]],g[[2L]],g[[8L]],na.rm)) } if(is.null(g)) return(TRAlC(x,.Call(C_ffirstl,x,0L,0L,NULL,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_ffirstl,x,g[[1L]],g[[2L]],g$group.starts,na.rm),g[[2L]],TRA, ...) } ffirst.list <- function(x, ...) ffirst.data.frame(x, ...) ffirst.grouped_df <- function(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],g[[8L]],na.rm)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],g[[8L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_ffirstl,x,g[[1L]],g[[2L]],g[[8L]],na.rm)), ax)) } else return(setAttributes(.Call(C_ffirstl,x,g[[1L]],g[[2L]],g[[8L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],g[[8L]],na.rm),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_ffirstl,x[-gn],g[[1L]],g[[2L]],g[[8L]],na.rm),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_ffirstl,x,g[[1L]],g[[2L]],g[[8L]],na.rm),g[[2L]],TRA, ...)) } collapse/R/pwcor_pwcov_pwnobs.R0000644000176200001440000003005014777170130016345 0ustar liggesusers# sumcc <- function(x, y) bsum(complete.cases(x,y)) # pwnobs <- function(x) qM(dapply(x, function(y) dapply(x, sumcc, y))) pwnobs <- function(X) { if(is.atomic(X) && is.matrix(X)) return(.Call(Cpp_pwnobsm, X)) # cn <- dimnames(X)[[2L]] # X <- mctl(X) if(!is.list(X)) stop("X must be a matrix or data.frame!") # -> if unequal length will warn below !! dg <- fnobs.data.frame(X) oldClass(X) <- NULL n <- length(X) nr <- .Call(C_fnrow, X) N.mat <- diag(dg) for (i in 1:(n - 1L)) { miss <- is.na(X[[i]]) # faster than complete.cases, also for large data ! // subsetting X[[j]] faster ?? -> NOPE ! for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - bsum(miss | is.na(X[[j]])) # bsum(complete.cases(X[[i]], X[[j]])) } dimnames(N.mat) <- list(names(dg), names(dg)) N.mat } # pwNobs <- function(X) { # .Deprecated(msg = "'pwNobs' was renamed to 'pwnobs'. It will be removed end of 2023, see help('collapse-renamed').") # pwnobs(X) # } # corr.p <- function(r, n) { # if (n < 3L) return(1) # df <- n - 2L # t <- sqrt(df) * r/sqrt(1 - r^2) # return(2 * bmin(pt(t, df), pt(t, df, lower.tail = FALSE))) # taken from corr.test # } corr.pmat <- function(cm, nm) { df <- nm - 2L acm <- abs(cm) diag(acm) <- NA_real_ # tiny bit faster here vs below.. `attributes<-`(2 * pt(sqrt(df) * acm/sqrt(1 - acm^2), df, lower.tail = FALSE), attributes(cm)) # n <- ncol(cm) # p.mat <- matrix(NA, n, n, dimnames = dimnames(cm)) # for (i in 1:(n - 1)) { # for (j in (i + 1):n) { # p.mat[i, j] <- p.mat[j, i] <- corr.p(cm[i, j], nm[i, j]) # } # } # p.mat } complpwnobs <- function(X) { # if(is.list(X)) { # Not needed anymore because now always coercing to matrix... # n <- length(unclass(X)) # coln <- attr(X, "names") # } else { n <- ncol(X) coln <- dimnames(X)[[2L]] # } matrix(bsum(complete.cases(X)), n, n, dimnames = list(coln, coln)) } # Test: # all.equal(Hmisc::rcorr(qM(mtcars))$P, corr.pmat(r, n)) namat <- function(X) { nc <- dim(X)[2L] cn <- dimnames(X)[[2L]] mat <- rep(NA_real_, nc * nc) dim(mat) <- c(nc, nc) diag(mat) <- 1 dimnames(mat) <- list(cn, cn) mat } nmat <- function(n, X) { nc <- dim(X)[2L] cn <- dimnames(X)[[2L]] mat <- rep(n, nc * nc) dim(mat) <- c(nc, nc) dimnames(mat) <- list(cn, cn) mat } # Check speed of it ... # Also check weighted cor p-value against lm() with weights -> Good !! # -> This is good # all.equal(unattrib(cov.wt(mtcars, w, cor = TRUE)$cor), unattrib(pwcor(mtcars, w = w))) # all.equal(unattrib(cov.wt(mtcars, w, cor = TRUE)$cor), unattrib(pwcor(mtcars, w = w, use = "complete.obs"))) # all.equal(pwcor(mtcars, w = w), pwcor(mtcars, w = w, use = "complete.obs")) pwcor <- function(X, ..., w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") { if(is.list(X)) X <- do.call(cbind, X) lcc <- FALSE if(is.null(w)) r <- cor(X, ..., use = use) else if(use == "pairwise.complete.obs") r <- getenvFUN("weights_wtd.cors")(X, ..., weight = w) else { if(!missing(...)) stop("y is currently not supported with weighted correlations and use != 'pairwise.complete.obs'") cc <- which(complete.cases(X, w)) lcc <- length(cc) if(use == "all.obs" && lcc != length(w)) stop("missing observations in cov/cor") if(lcc) { if(lcc != length(w)) { X <- X[cc, , drop = FALSE] w <- w[cc] } r <- cov2cor(crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE))) # all.equal(cov2cor(crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE))), weights::wtd.cors(X, weight = w)) } else r <- switch(use, complete.obs = stop("no complete element pairs"), namat(X)) } if(!(N || P)) return(`oldClass<-`(r, c("pwcor", "matrix"))) n <- if(lcc) nmat(lcc, X) else switch(use, pairwise.complete.obs = pwnobs(X), complpwnobs(X)) # TODO: what about weights paiwrise ? # what if using ... to supply y ??? if(N) { res <- if(P) list(r = r, N = n, P = corr.pmat(r, n)) else list(r = r, N = n) } else res <- list(r = r, P = corr.pmat(r, n)) if(array) { res <- fsimplify2array(res) oldClass(res) <- c("pwcor","array","table") } else oldClass(res) <- "pwcor" res } # Not all equal... # all.equal(unattrib(cov.wt(mtcars, w)$cov), unattrib(pwcov(mtcars, w = w))) # all.equal(unattrib(cov.wt(mtcars, w)$cov), unattrib(pwcov(mtcars, w = w, use = "complete.obs"))) # all.equal(pwcov(mtcars, w = w), pwcov(mtcars, w = w, use = "complete.obs")) -> Yes ! pwcov <- function(X, ..., w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") { if(is.list(X)) X <- do.call(cbind, X) lcc <- FALSE if(is.null(w)) r <- cov(X, ..., use = use) else if(use == "pairwise.complete.obs") { r <- getenvFUN("weights_wtd.cors")(X, ..., weight = w) # sw <- bsum(w, na.rm = TRUE) Xsd <- fsd(X, w = w) # * (sw-1) / (1 - bsum((w/sw)^2)) # cov.wt, method = "unbiased" ??? r <- if(missing(...)) r * outer(Xsd, Xsd) else r * outer(Xsd, fsd(..., w = w)) } else { if(!missing(...)) stop("y is currently not supported with weighted correlations and use != 'pairwise.complete.obs'") cc <- which(complete.cases(X, w)) lcc <- length(cc) if(use == "all.obs" && lcc != length(w)) stop("missing observations in cov/cor") if(lcc) { if(lcc != length(w)) { X <- X[cc, , drop = FALSE] w <- w[cc] } r <- crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE)) / (bsum(w) - 1) # Check numeric accuracy ! # w <- w/bsum(w) # same method as cov.wt, method = "unbiased" # r <- crossprod(sqrt(w) * BWmCpp(X, w = w, narm = FALSE)) / (1 - bsum(w^2)) } else r <- switch(use, complete.obs = stop("no complete element pairs"), namat(X)) # namat correct ?? } if(!(N || P)) return(`oldClass<-`(r, c("pwcov", "matrix"))) n <- if(lcc) nmat(lcc, X) else switch(use, pairwise.complete.obs = pwnobs(X), complpwnobs(X)) # TODO: what about weights paiwrise ? if(N) { # good ??? // cov(X) / outer(fsd(X), fsd(X)) res <- if(P) list(cov = r, N = n, P = corr.pmat(cov2cor(r), n)) else list(cov = r, N = n) # what about x and y here ?? } else res <- list(cov = r, P = corr.pmat(cov2cor(r), n)) if(array) { res <- fsimplify2array(res) oldClass(res) <- c("pwcov","array","table") } else oldClass(res) <- "pwcov" res } print.pwcor <- function(x, digits = .op[["digits"]], sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, ...) { formfun <- function(x, dg1 = FALSE) { xx <- format(round(x, digits)) # , digits = digits-1 xx <- sub("(-?)0\\.", "\\1.", xx) if(dg1) { dgx <- diag(xx) new1 <- paste0(c(" 1", rep(" ",digits-1)), collapse = "") if(!all(st <- startsWith(dgx, " 1") | startsWith(dgx, "1"))) { # can have positive or negative values... dgx[st] <- new1 diag(xx) <- dgx } else diag(xx) <- new1 } else { xna <- is.na(x) xx[xna] <- "" xpos <- x >= 1 & !xna xx[xpos] <- sub(paste0(c(".", rep("0",digits)), collapse = ""), "", xx[xpos]) # Problem: Deletes .00 also.. } return(xx) } show <- switch(show[1L], all = 1L, lower.tri = 2L, upper.tri = 3L, stop("Unknown 'show' option")) se <- "Allowed spacing options are 0, 1 and 2!" if(is.array(x)) { sc <- TRUE d <- dim(x) ld <- length(d) if(ld > 2L) { dn <- dimnames(x) d3 <- dn[[3L]] if(all(d3 %in% c("r","N","P"))) { if(length(d3) == 3L) { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 3L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s(%i)", "%s%s (%i)", " %s%s (%i)", stop(se)), formfun(x[,, 1L], TRUE), sig, x[,, 2L]) # paste0(formfun(x[,, 1L]),sig,"(",x[,, 2L],")") } else if(d3[2L] == "P") { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 2L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s", " %s%s", " %s %s", stop(se)), formfun(x[,, 1L], TRUE), sig) } else res <- sprintf(switch(spacing+1L, "%s(%i)", "%s (%i)", " %s (%i)", stop(se)), formfun(x[,, 1L], TRUE), x[,, 2L]) } else { sc <- FALSE res <- duplAttributes(switch(spacing+1L, formfun(x), sprintf(" %s",formfun(x)), sprintf(" %s",formfun(x)), stop(se)), x) # remove this before publishing !!! } if(sc) attributes(res) <- list(dim = d[1:2], dimnames = dn[1:2]) } else res <- if(spacing == 0L) formfun(x, TRUE) else duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)), formfun(x, TRUE)), x) if(sc && show != 1L) if(show == 2L) res[upper.tri(res)] <- "" else res[lower.tri(res)] <- "" } else if(is.list(x)) { if(spacing == 0L) res <- lapply(x, formfun) else { ff <- function(i) duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)),formfun(i)), i) res <- lapply(x, ff) } if(show != 1L) res <- if(show == 2L) lapply(res, function(i){i[upper.tri(i)] <- ""; i}) else lapply(res, function(i){i[lower.tri(i)] <- ""; i}) } else res <- formfun(x) if(return) return(unclass(res)) print.default(unclass(res), quote = FALSE, right = TRUE, ...) invisible(x) } #print.table(dapply(round(x, digits), function(j) sub("^(-?)0.", "\\1.", j)), right = TRUE, ...) # print.table(, right = TRUE) print.pwcov <- function(x, digits = .op[["digits"]], sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, ...) { formfun <- function(x, adj = FALSE) { xx <- format(round(x, digits), digits = 9, big.mark = "'", big.interval = 6) # xx <- sub("(-?)0\\.", "\\1.", xx) # Not needed here... if(adj) { xna <- is.na(x) xx[xna] <- "" xpos <- x >= 1 & !xna xx[xpos] <- sub(paste0(c(".", rep("0",digits)), collapse = ""), "", xx[xpos]) # Problem: Deletes .00 also.. } return(xx) } show <- switch(show[1L], all = 1L, lower.tri = 2L, upper.tri = 3L, stop("Unknown 'show' option")) se <- "Allowed spacing options are 0, 1 and 2!" if(is.array(x)) { sc <- TRUE d <- dim(x) ld <- length(d) if(ld > 2L) { dn <- dimnames(x) d3 <- dn[[3L]] if(all(d3 %in% c("cov","N","P"))) { if(length(d3) == 3L) { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 3L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s(%i)", "%s%s (%i)", " %s%s (%i)", stop(se)), formfun(x[,, 1L]), sig, x[,, 2L]) # paste0(formfun(x[,, 1L]),sig,"(",x[,, 2L],")") } else if(d3[2L] == "P") { sig <- matrix(" ", d[1L], d[2L]) sig[x[,, 2L] <= sig.level] <- "*" res <- sprintf(switch(spacing+1L, "%s%s", " %s%s", " %s %s", stop(se)), formfun(x[,, 1L]), sig) } else res <- sprintf(switch(spacing+1L, "%s(%i)", "%s (%i)", " %s (%i)", stop(se)), formfun(x[,, 1L]), x[,, 2L]) } else { sc <- FALSE res <- duplAttributes(switch(spacing+1L, formfun(x, TRUE), sprintf(" %s",formfun(x, TRUE)), sprintf(" %s",formfun(x, TRUE)), stop(se)), x) # remove this before publishing !!! } if(sc) attributes(res) <- list(dim = d[1:2], dimnames = dn[1:2]) } else res <- if(spacing == 0L) formfun(x) else duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)), formfun(x)), x) if(sc && show != 1L) if(show == 2L) res[upper.tri(res)] <- "" else res[lower.tri(res)] <- "" } else if(is.list(x)) { if(spacing == 0L) res <- lapply(x, formfun, TRUE) else { ff <- function(i) duplAttributes(sprintf(switch(spacing," %s"," %s",stop(se)),formfun(i, TRUE)), i) res <- lapply(x, ff) } if(show != 1L) res <- if(show == 2L) lapply(res, function(i){i[upper.tri(i)] <- ""; i}) else lapply(res, function(i){i[lower.tri(i)] <- ""; i}) } else res <- formfun(x) if(return) return(unclass(res)) print.default(unclass(res), quote = FALSE, right = TRUE, ...) invisible(x) } #print.table(dapply(round(x, digits), function(j) sub("^(-?)0.", "\\1.", j)), right = TRUE, ...) # print.table(, right = TRUE) # print.pwcov <- function(x, digits = 2, ...) print.default(formatC(round(x, digits), format = "g", # digits = 9, big.mark = "'", big.interval = 6), quote = FALSE, right = TRUE, ...) `[.pwcor` <- `[.pwcov` <- function(x, i, j, ..., drop = TRUE) `oldClass<-`(NextMethod(), oldClass(x)) collapse/R/fndistinct.R0000644000176200001440000001122714777170130014557 0ustar liggesusers fndistinct <- function(x, ...) UseMethod("fndistinct") # , x fndistinct.default <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, nthreads = .op[["nthreads"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fndistinct.matrix(x, g, TRA, na.rm, use.g.names, nthreads = nthreads, ...)) if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fndistinct,x,g,na.rm,nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names) names(res) <- GRPnames(g, FALSE) return(res) } TRAC(x,res,g[[2L]],TRA, ...) } fndistinct.matrix <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], ...) { if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fndistinctm,x,g,na.rm,drop,nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names) dimnames(res)[[1L]] <- GRPnames(g) return(res) } TRAmC(x,res,g[[2L]],TRA, ...) } fndistinct.zoo <- function(x, ...) if(is.matrix(x)) fndistinct.matrix(x, ...) else fndistinct.default(x, ...) fndistinct.units <- fndistinct.zoo fndistinct.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], ...) { if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fndistinctl,x,g,na.rm,drop,nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names && !inherits(x, "data.table") && length(gn <- GRPnames(g))) attr(res, "row.names") <- gn return(res) } TRAlC(x,res,g[[2L]],TRA, ...) } fndistinct.list <- function(x, ...) fndistinct.data.frame(x, ...) fndistinct.grouped_df <- function(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, nthreads = .op[["nthreads"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fndistinctl,x,g,na.rm,FALSE,nthreads)), ax)) } else return(setAttributes(.Call(C_fndistinctl,x,g,na.rm,FALSE,nthreads), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fndistinctl,x,g,na.rm,FALSE,nthreads),g[[2L]],TRA, ...)) } fNdistinct <- function(x, ...) { message("Note that 'fNdistinct' was renamed to 'fndistinct'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fndistinct") } fNdistinct.default <- function(x, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fndistinct.matrix(x, ...)) # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fndistinct.default(x, ...) } fNdistinct.matrix <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fndistinct.matrix(x, ...) } fNdistinct.data.frame <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fndistinct.data.frame(x, ...) } collapse/R/qtab.R0000644000176200001440000000562514777170130013346 0ustar liggesusers qtab <- function(..., w = NULL, wFUN = NULL, wFUN.args = NULL, dnn = "auto", sort = .op[["sort"]], na.exclude = TRUE, drop = FALSE, method = "auto") { ll <- ...length() == 1L && is.list(..1) l <- if(ll) unclass(..1) else list(...) n <- length(l) dn <- vector("list", n) dm <- integer(n) names(dn) <- if(is.character(dnn)) { if(length(dnn) > 1L) dnn else { nam <- names(l) nam <- switch(dnn, auto =, namlab = if(ll) nam else if(is.null(nam)) .c(...) else if(all(has_nam <- nzchar(nam))) nam else `[<-`(nam, !has_nam, value = .c(...)[!has_nam]), dnn) if(dnn != "namlab") nam else paste(nam, setv(vlabels(l, use.names = FALSE), NA, ""), sep = ": ") } } else if(is.function(dnn)) dnn(l) else unlist(dnn, use.names = FALSE) # tofact <- function(g) { # if(is.factor(g)) { # if(!na.exclude && !inherits(g, "na.included")) return(addNA2(g)) # return(g) # } # groupfact(g, ord = FALSE, fact = TRUE, naincl = !na.exclude, keep = FALSE) # } g <- qF(l[[1L]], sort = sort, na.exclude = na.exclude, drop = drop, method = method) lev <- attr(g, "levels") dn[[1L]] <- lev dm[1L] <- ngp <- length(lev) attributes(g) <- NULL if(n > 1L) for (i in 2:n) { gi <- qF(l[[i]], sort = sort, na.exclude = na.exclude, drop = drop, method = method) lev <- attr(gi, "levels") dn[[i]] <- lev dm[i] <- length(lev) # attributes(gi) <- NULL # unattrib(x) + (unattrib(y) - 1L) * fnlevels(x) # NA values cause integer overflows... # gi %-=% 1L # gi %*=% ngp # g %+=% gi # TODO: what if g is not a deep copy?? -> seems to work so far. I guess qF() or attributes(g) <- NULL creates a deep copy? .Call(C_fcrosscolon, g, ngp, gi, na.exclude) ngp <- ngp * length(lev) } if(is.null(w) || is.null(wFUN)) tab <- .Call(C_fwtabulate, g, w, ngp, na.exclude) # tabulate(g, nbins = ngp) else { if(is.function(wFUN)) { wf <- l1orlst(as.character(substitute(wFUN))) } else if (is.character(wFUN)) { wf <- wFUN wFUN <- match.fun(wFUN) } else stop("wFUN needs to be a function or function-string") if(na.exclude && anyNA(g)) { nna <- whichNA(g, invert = TRUE) w <- Csv(w, nna) g <- Csv(g, nna) } attr(g, "N.groups") <- ngp oldClass(g) <- c("qG", "na.included") if(is.null(wFUN.args)) { tab <- if(any(wf == .FAST_STAT_FUN)) wFUN(w, g = g, use.g.names = FALSE) else splaplfun(w, g, wFUN) } else { tab <- if(any(wf == .FAST_STAT_FUN)) do.call(wFUN, c(list(x = w, g = g, use.g.names = FALSE), wFUN.args)) else do.call(splaplfun, c(list(x = w, g = g, FUN = wFUN), wFUN.args)) } } dim(tab) <- dm dimnames(tab) <- dn oldClass(tab) <- c("qtab", "table") attr(tab, "sorted") <- sort attr(tab, "weighted") <- !is.null(w) tab } qtable <- function(...) qtab(...) collapse/R/fbetween_fwithin.R0000644000176200001440000004303614777170130015744 0ustar liggesusers ckm <- function(x) if(is.double(x)) x else if(is.character(x) && x == "overall.mean") -Inf else stop("mean must be a number or 'overall.mean'") # better than switch !! # Note: for principal innovations of this code see fsum.R and fscale.R fwithin <- function(x, ...) UseMethod("fwithin") # , x fwithin.default <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fwithin.matrix(x, g, w, na.rm, mean, theta, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BW,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) g <- G_guo(g) .Call(Cpp_BW,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.pseries <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- group_effect(x, effect) res <- if(is.matrix(x)) .Call(Cpp_BWm,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE) else .Call(Cpp_BW,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE) if(is.double(x)) return(res) pseries_to_numeric(res) } fwithin.matrix <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWm,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) g <- G_guo(g) .Call(Cpp_BWm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.zoo <- function(x, ...) if(is.matrix(x)) fwithin.matrix(x, ...) else fwithin.default(x, ...) fwithin.units <- fwithin.zoo fwithin.data.frame <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) g <- G_guo(g) .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.list <- function(x, ...) fwithin.data.frame(x, ...) fwithin.pdata.frame <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- group_effect(x, effect) .Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE) } fwithin.grouped_df <- function(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- substitute(w) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn2 %in% wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], nam[-gn2]) # first term is removed if !length(gn) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } # Within Operator W <- function(x, ...) UseMethod("W") # , x W.default <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(W.matrix(x, g, w, na.rm, mean, theta, ...)) fwithin.default(x, g, w, na.rm, mean, theta, ...) } W.pseries <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, ...) fwithin.pseries(x, effect, w, na.rm, mean, theta, ...) W.matrix <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], ...) { res <- fwithin.matrix(x, g, w, na.rm, mean, theta, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "W.")) res } W.zoo <- function(x, ...) if(is.matrix(x)) W.matrix(x, ...) else W.default(x, ...) W.units <- W.zoo W.grouped_df <- function(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- substitute(w) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn2 %in% wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], do_stub(stub, nam[-gn2], "W.")) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } res <- .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "W.")) res } W.pdata.frame <- function(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], keep.ids = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] g <- group_effect(x, effect) cols_fun <- is.function(cols) if(cols_fun && identical(cols, is.numeric)) cols <- which(.Call(C_vtypes, x, 1L)) else if(length(cols)) cols <- cols2int(cols, x, nam) oldClass(x) <- NULL if(cols_fun || keep.ids) { gn <- which(nam %in% attr(findex(x), "nam")) # Needed for 3+ index variables if(length(gn)) { if(cols_fun) cols <- fsetdiff(cols, gn) else if(is.null(cols)) cols <- seq_along(x)[-gn] } if(!keep.ids) gn <- NULL } else gn <- NULL if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn) && length(cols)) { ax[["names"]] <- c(nam[gn], do_stub(stub, nam[cols], "W.")) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)), ax)) } else if(!length(gn)) { ax[["names"]] <- do_stub(stub, nam[cols], "W.") return(setAttributes(.Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE), ax)) } else if(isTRUE(stub) || is.character(stub)) { ax[["names"]] <- do_stub(stub, nam, "W.") return(setAttributes(.Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE), ax)) } else return(.Call(Cpp_BWl,`oldClass<-`(x, ax[["class"]]),fnlevels(g),g,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) } W.data.frame <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], keep.by = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(w)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- cols2intrmgn(gn, cols, x) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.by) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L, NULL) else G_guo(by) } if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn)) { ax[["names"]] <- c(nam[gn], do_stub(stub, nam[cols], "W.")) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE)), ax)) } ax[["names"]] <- do_stub(stub, nam[cols], "W.") return(setAttributes(.Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE), ax)) } else if(length(cols)) { # Need to do like this, otherwise list-subsetting drops attributes ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(isTRUE(stub) || is.character(stub)) attr(x, "names") <- do_stub(stub, attr(x, "names"), "W.") if(is.null(by)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,theta,ckm(mean),FALSE,FALSE)) by <- G_guo(by) .Call(Cpp_BWl,x,by[[1L]],by[[2L]],by[[3L]],w,na.rm,theta,ckm(mean),FALSE,FALSE) } W.list <- function(x, ...) W.data.frame(x, ...) fbetween <- function(x, ...) UseMethod("fbetween") # , x fbetween.default <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fbetween.matrix(x, g, w, na.rm, fill, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BW,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) g <- G_guo(g) .Call(Cpp_BW,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } fbetween.pseries <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- group_effect(x, effect) res <- if(is.matrix(x)) .Call(Cpp_BWm,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill) else .Call(Cpp_BW,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill) if(is.double(x)) return(res) pseries_to_numeric(res) } fbetween.matrix <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWm,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) g <- G_guo(g) .Call(Cpp_BWm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } fbetween.zoo <- function(x, ...) if(is.matrix(x)) fbetween.matrix(x, ...) else fbetween.default(x, ...) fbetween.units <- fbetween.zoo fbetween.data.frame <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) g <- G_guo(g) .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } fbetween.list <- function(x, ...) fbetween.data.frame(x, ...) fbetween.pdata.frame <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- group_effect(x, effect) .Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill) } fbetween.grouped_df <- function(x, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- substitute(w) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn2 %in% wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], nam[-gn2]) # first term is removed if !length(gn) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) } # Between Operator B <- function(x, ...) UseMethod("B") # , x B.default <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(B.matrix(x, g, w, na.rm, fill, ...)) fbetween.default(x, g, w, na.rm, fill, ...) } B.pseries <- function(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, ...) fbetween.pseries(x, effect, w, na.rm, fill, ...) B.matrix <- function(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], ...) { res <- fbetween.matrix(x, g, w, na.rm, fill, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "B.")) res } B.zoo <- function(x, ...) if(is.matrix(x)) B.matrix(x, ...) else B.default(x, ...) B.units <- B.zoo B.grouped_df <- function(x, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], keep.group_vars = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) wsym <- substitute(w) nam <- attr(x, "names") gn2 <- which(nam %in% g[[5L]]) gn <- if(keep.group_vars) gn2 else NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn2 %in% wn)) stop("Weights coincide with grouping variables!") gn2 <- c(gn2, wn) if(keep.w) gn <- c(gn, wn) } } if(length(gn2)) { ax <- attributes(x) ax[["names"]] <- c(nam[gn], do_stub(stub, nam[-gn2], "B.")) res <- .Call(Cpp_BWl, .subset(x, -gn2), g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) if(length(gn)) return(setAttributes(c(.subset(x, gn), res), ax)) else return(setAttributes(res, ax)) } res <- .Call(Cpp_BWl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,1,0,TRUE,fill) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "B.")) res } B.pdata.frame <- function(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], keep.ids = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] g <- group_effect(x, effect) cols_fun <- is.function(cols) if(cols_fun && identical(cols, is.numeric)) cols <- which(.Call(C_vtypes, x, 1L)) else if(length(cols)) cols <- cols2int(cols, x, nam) oldClass(x) <- NULL if(cols_fun || keep.ids) { gn <- which(nam %in% attr(findex(x), "nam")) # Needed for 3+ index variables if(length(gn)) { if(cols_fun) cols <- fsetdiff(cols, gn) else if(is.null(cols)) cols <- seq_along(x)[-gn] } if(!keep.ids) gn <- NULL } else gn <- NULL if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn) && length(cols)) { ax[["names"]] <- c(nam[gn], do_stub(stub, nam[cols], "B.")) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill)), ax)) } else if(!length(gn)) { ax[["names"]] <- do_stub(stub, nam[cols], "B.") return(setAttributes(.Call(Cpp_BWl,x[cols],fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill), ax)) } else if(isTRUE(stub) || is.character(stub)) { ax[["names"]] <- do_stub(stub, nam, "B.") return(setAttributes(.Call(Cpp_BWl,x,fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill), ax)) } else return(.Call(Cpp_BWl,`oldClass<-`(x, ax[["class"]]),fnlevels(g),g,NULL,w,na.rm,1,0,TRUE,fill)) } B.data.frame <- function(x, by = NULL, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], keep.by = TRUE, keep.w = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(w)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- cols2intrmgn(gn, cols, x) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.by) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L, NULL) else G_guo(by) } if(is.call(w)) { wn <- ckmatch(all.vars(w), nam) w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[wn]] cols <- if(is.null(cols)) seq_along(x)[-wn] else cols[cols != wn] if(keep.w) gn <- c(gn, wn) } if(length(gn)) { ax[["names"]] <- c(nam[gn], do_stub(stub, nam[cols], "B.")) return(setAttributes(c(x[gn], .Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,1,0,TRUE,fill)), ax)) } ax[["names"]] <- do_stub(stub, nam[cols], "B.") return(setAttributes(.Call(Cpp_BWl,x[cols],by[[1L]],by[[2L]],by[[3L]],w,na.rm,1,0,TRUE,fill), ax)) } else if(length(cols)) { # Necessary, else attributes are dropped by list-subsetting ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(isTRUE(stub) || is.character(stub)) attr(x, "names") <- do_stub(stub, attr(x, "names"), "B.") if(is.null(by)) return(.Call(Cpp_BWl,x,0L,0L,NULL,w,na.rm,1,0,TRUE,fill)) by <- G_guo(by) .Call(Cpp_BWl,x,by[[1L]],by[[2L]],by[[3L]],w,na.rm,1,0,TRUE,fill) } B.list <- function(x, ...) B.data.frame(x, ...) collapse/R/fcumsum.R0000644000176200001440000000660514777170130014075 0ustar liggesusers ford <- function(x, g = NULL) { if(is.null(x)) return(NULL) if(!is.null(g)) { x <- c(if(is.atomic(g)) list(g) else if(is_GRP(g)) g[2L] else g, if(is.atomic(x)) list(x) else x, list(method = "radix")) return(do.call(order, x)) } if(is.list(x)) return(do.call(order, c(x, list(method = "radix")))) if(length(x) < 1000L) .Call(C_radixsort, TRUE, FALSE, FALSE, FALSE, TRUE, pairlist(x)) else order(x, method = "radix") } fcumsum <- function(x, ...) UseMethod("fcumsum") # , x fcumsum.default <- function(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fcumsum", unclass(x))) if(!missing(...)) unused_arg_action(match.call(), ...) if(length(o) && check.o) o <- ford(o, g) if(is.null(g)) return(.Call(C_fcumsum,x,0L,0L,o,na.rm,fill)) g <- G_guo(g) .Call(C_fcumsum,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.pseries <- function(x, na.rm = .op[["na.rm"]], fill = FALSE, shift = "time", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- uncl2pix(x) g <- index[[1L]] o <- switch(shift, time = ford(index[[2L]], g), row = NULL, stop("'shift' must be either 'time' or 'row'")) if(is.matrix(x)) .Call(C_fcumsumm,x,fnlevels(g),g,o,na.rm,fill) else .Call(C_fcumsum,x,fnlevels(g),g,o,na.rm,fill) } fcumsum.matrix <- function(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(length(o) && check.o) o <- ford(o, g) if(is.null(g)) return(.Call(C_fcumsumm,x,0L,0L,o,na.rm,fill)) g <- G_guo(g) .Call(C_fcumsumm,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.zoo <- function(x, ...) if(is.matrix(x)) fcumsum.matrix(x, ...) else fcumsum.default(x, ...) fcumsum.units <- fcumsum.zoo fcumsum.grouped_df <- function(x, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) osym <- substitute(o) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(!is.null(osym)) { o <- eval(osym, x, parent.frame()) if(!anyNA(on <- match(all.vars(osym), nam))) { gn <- c(gn, on) if(anyDuplicated.default(gn)) stop("timevar coincides with grouping variables!") } if(check.o) o <- ford(o, g) } if(length(gn)) { ax <- attributes(x) res <- .Call(C_fcumsuml,.subset(x,-gn),g[[1L]],g[[2L]],o,na.rm,fill) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } .Call(C_fcumsuml,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.data.frame <- function(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(length(o) && check.o) o <- ford(o, g) if(is.null(g)) return(.Call(C_fcumsuml,x,0L,0L,o,na.rm,fill)) g <- G_guo(g) .Call(C_fcumsuml,x,g[[1L]],g[[2L]],o,na.rm,fill) } fcumsum.list <- function(x, ...) fcumsum.data.frame(x, ...) fcumsum.pdata.frame <- function(x, na.rm = .op[["na.rm"]], fill = FALSE, shift = "time", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- uncl2pix(x) g <- index[[1L]] o <- switch(shift, time = ford(index[[2L]], g), row = NULL, stop("'shift' must be either 'time' or 'row'")) .Call(C_fcumsuml,x,fnlevels(g),g,o,na.rm,fill) } collapse/R/zzz.R0000644000176200001440000002555615056572047013265 0ustar liggesusers.datatable.aware <- TRUE do_collapse_mask <- function(clpns, mask) { if(!is.character(mask)) stop("Option collapse_mask needs to be character typed") # This ensures that you can pass functions with or without f- prefix to the option mask_ffunl <- mask %!in% c("all", "helper", "manip", "special", "fast-fun", "fast-stat-fun", "fast-trfm-fun", "n", "qtab", "qtable", "table", "%in%") if(any(mask_ffunl)) { mask_ffun <- mask[mask_ffunl] has_f_prefix <- startsWith(mask_ffun, "f") if(!all(has_f_prefix)) { mask_ffun[!has_f_prefix] <- paste0("f", mask_ffun[!has_f_prefix]) mask[mask_ffunl] <- mask_ffun } } # This now does the preprocessing (interpreting keywords and changing internal optimization flags as required) if(any(mask == "all")) mask <- c("helper", "manip", "special", "fast-fun", if(length(mask) > 1L) mask[mask != "all"] else NULL) manipfun <- c("fsubset", "fslice", "fslicev", "ftransform", "ftransform<-", "ftransformv", "fcompute", "fcomputev", "fselect", "fselect<-", "fgroup_by", "fgroup_vars", "fungroup", "fsummarise", "fsummarize", "fmutate", "frename", "findex_by", "findex") helperfun <- c("fdroplevels", "finteraction", "fnlevels", "fmatch", "funique", "fnunique", "fduplicated", "fcount", "fcountv", "fquantile", "frange", "fdist", "fnrow", "fncol") # , "fdim": Problem of infinite recursion... specialfun <- c("n", "table", "%in%") if(any(mask == "helper")) mask <- unique.default(c(helperfun, mask[mask != "helper"])) if(any(mask == "manip")) mask <- unique.default(c(manipfun, mask[mask != "manip"])) if(any(mask == "special")) mask <- unique.default(c(specialfun, mask[mask != "special"])) if(any(mask == "fast-fun")) { mask <- unique.default(c(.FAST_FUN, mask[mask != "fast-fun"])) FSF_mask <- substr(.FAST_STAT_FUN, 2L, 100L) assign(".FAST_STAT_FUN_EXT", c(.FAST_STAT_FUN_EXT, FSF_mask, paste0(FSF_mask, "_uw")), envir = clpns) assign(".FAST_STAT_FUN_POLD", c(.FAST_STAT_FUN_POLD, FSF_mask), envir = clpns) ffnops <- fsetdiff(.FAST_FUN_MOPS, c(.OPERATOR_FUN, "fNobs", "fNdistinct", "GRPN", "GRPid", "n")) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, substr(ffnops, 2L, 100L)), envir = clpns) } else { if(any(mask == "fast-stat-fun")) { mask <- unique.default(c(.FAST_STAT_FUN, mask[mask != "fast-stat-fun"])) FSF_mask <- substr(.FAST_STAT_FUN, 2L, 100L) assign(".FAST_STAT_FUN_EXT", c(.FAST_STAT_FUN_EXT, FSF_mask, paste0(FSF_mask, "_uw")), envir = clpns) assign(".FAST_STAT_FUN_POLD", c(.FAST_STAT_FUN_POLD, FSF_mask), envir = clpns) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, FSF_mask), envir = clpns) } if(any(mask == "fast-trfm-fun")) { ftf <- fsetdiff(.FAST_FUN, .FAST_STAT_FUN) mask <- unique.default(c(ftf, mask[mask != "fast-trfm-fun"])) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, substr(fsetdiff(ftf, c("fhdbetween", "fhdwithin")), 2L, 100L)), envir = clpns) } } unmask_special <- NULL # Special Cases / Functions if(any(mask == "n")) { unmask_special <- "n" mask <- mask[mask != "n"] if(is.null(clpns[["n"]])) assign("n", clpns[["n_internal"]], envir = clpns) assign(".FAST_STAT_FUN_EXT", c(.FAST_STAT_FUN_EXT, "n"), envir = clpns) assign(".FAST_STAT_FUN_POLD", c(.FAST_STAT_FUN_POLD, "n"), envir = clpns) assign(".FAST_FUN_MOPS", c(.FAST_FUN_MOPS, "n"), envir = clpns) } if(any(mask %in% c("qtab", "qtable", "table"))) { if(is.null(clpns[["table"]])) assign("table", clpns[["qtab"]], envir = clpns) unmask_special <- c(unmask_special, "table") mask <- mask[!mask %in% c("qtab", "qtable", "table")] } if(any(mask == "%in%")) { if(is.null(clpns[["%in%"]])) assign("%in%", clpns[["%fin%"]], envir = clpns) unmask_special <- c(unmask_special, "%in%") mask <- mask[mask != "%in%"] } if(!all(m <- mask %in% names(clpns))) stop("Unsupported functions supplied to option 'collapse_mask': ", paste(mask[!m], collapse = ", ")) if(!all(m <- startsWith(mask, "f"))) stop("All functions to me masked must start with 'f', except for 'n' and 'qtab'/'table'. You supplied: ", paste(mask[!m], collapse = ", ")) # This now creates the additional functions (does the masking) unmask <- substr(mask, 2L, 100L) unmask_ind <- unmask %!iin% names(clpns) # Important: cannot change locked bindings in namespace! for(i in unmask_ind) assign(unmask[i], clpns[[mask[i]]], envir = clpns) # Internals of namespaceExport(clpns, c(unmask, unmask_special)): export_names <- c(unmask, unmask_special) names(export_names) <- export_names list2env(as.list(export_names), .getNamespaceInfo(clpns, "exports")) } do_collapse_remove_core <- function(clpns, rmfun, exports = TRUE, namespace = TRUE) { # exports = FALSE in .onLoad, because exports not defined yet if(exports) { clpns_exports <- .getNamespaceInfo(clpns, "exports") rmfun <- rmfun[rmfun %in% names(clpns_exports)] # ckmatch(rmfun, names(clpns_exports), e = "Unknown functions to be removed:") } if(any(tmp <- .FAST_STAT_FUN_EXT %in% rmfun)) assign(".FAST_STAT_FUN_EXT", .FAST_STAT_FUN_EXT[!tmp], envir = clpns) if(any(tmp <- .FAST_STAT_FUN_POLD %in% rmfun)) assign(".FAST_STAT_FUN_POLD", .FAST_STAT_FUN_POLD[!tmp], envir = clpns) if(any(tmp <- .FAST_FUN_MOPS %in% rmfun)) assign(".FAST_FUN_MOPS", .FAST_FUN_MOPS[!tmp], envir = clpns) if(exports) remove(list = rmfun, envir = clpns_exports) if(namespace) { assign(".COLLAPSE_ALL_EXPORTS", .COLLAPSE_ALL_EXPORTS[match(.COLLAPSE_ALL_EXPORTS, rmfun, 0L) == 0L], envir = clpns) remove(list = rmfun, envir = clpns) } } do_collapse_remove <- function(clpns, rmfun, ...) { kwd <- c("shorthand", "operator", "infix", "old") %in% rmfun if(kwd[1L]) rmfun <- c(rmfun[rmfun != "shorthand"], .SHORTHANDS) if(kwd[2L]) rmfun <- c(rmfun[rmfun != "operator"], .OPERATOR_FUN) if(kwd[3L]) rmfun <- c(rmfun[rmfun != "infix"], c(.COLLAPSE_ALL[startsWith(.COLLAPSE_ALL, "%")], if(any(c("%in%", "special") %in% .op[["mask"]])) "%in%")) if(kwd[4L]) rmfun <- c(rmfun[rmfun != "old"], .COLLAPSE_OLD) do_collapse_remove_core(clpns, unique.default(rmfun), ...) } # Used in set_collapse(), defined in global_macros.R do_collapse_unmask <- function(clpns) { nam <- getNamespaceExports(clpns) ffuns <- nam[startsWith(nam, "f")] rmfun <- nam[nam %in% substr(ffuns, 2L, 100L)] if(any(ntab <- c("n", "table", "%in%") %in% nam)) rmfun <- c(rmfun, c("n", "table", "%in%")[ntab]) do_collapse_remove_core(clpns, rmfun) } do_collapse_restore_exports <- function(clpns) { clpns_exports <- .getNamespaceInfo(clpns, "exports") missing <- fsetdiff(.COLLAPSE_ALL_EXPORTS, names(clpns_exports)) if(length(missing)) { names(missing) <- missing list2env(as.list(missing), clpns_exports) # = namespaceExport(clpns, missing) } } .onLoad <- function(libname, pkgname) { res <- .Call(C_collapse_init, "init.success") if(!is.character(res) || res != "init.success") stop("collapse not successfully loaded!") # https://stackoverflow.com/questions/12598242/global-variables-in-packages-in-r # https://stackoverflow.com/questions/49056642/r-how-to-make-variable-available-to-namespace-at-loading-time?noredirect=1&lq=1 clpns <- parent.env(environment()) assign(".collapse_env", new.env(), envir = clpns) .op <- new.env() .op$nthreads <- if(is.null(getOption("collapse_nthreads"))) 1L else as.integer(getOption("collapse_nthreads")) .op$na.rm <- if(is.null(getOption("collapse_na_rm")) && is.null(getOption("collapse_na.rm"))) TRUE else if(length(getOption("collapse_na_rm"))) as.logical(getOption("collapse_na_rm")) else as.logical(getOption("collapse_na.rm")) .op$sort <- if(is.null(getOption("collapse_sort"))) TRUE else as.logical(getOption("collapse_sort")) .op$stable.algo <- if(is.null(getOption("collapse_stable_algo"))) TRUE else as.logical(getOption("collapse_stable_algo")) .op$mask <- if(is.null(getOption("collapse_mask"))) NULL else getOption("collapse_mask") .op$remove <- if(is.null(getOption("collapse_remove"))) NULL else getOption("collapse_remove") .op$stub <- if(is.null(getOption("collapse_stub"))) TRUE else as.logical(getOption("collapse_stub")) .op$verbose <- if(is.null(getOption("collapse_verbose"))) 1L else as.integer(getOption("collapse_verbose")) .op$digits <- if(is.null(getOption("collapse_digits"))) 2L else as.integer(getOption("collapse_digits")) assign(".op", .op, envir = clpns) # TODO: option to save .collapse config file in install directory?? -> Nah, .RProfile is better... mask <- .op$mask # This checks if a .fastverse config file is there: to make sure collapse cannot be loaded without masking in project if(!(length(mask) && is.character(mask))) { if(file.exists(".fastverse")) { fileConn <- file(".fastverse") contents <- readLines(fileConn, warn = FALSE, skipNul = TRUE) close(fileConn) contents <- trimws(contents[nzchar(contents)]) mask <- which(startsWith(contents, "_opt_collapse_mask")) # Also works with if-clause below if(length(mask)) { if(length(mask) > 1L) stop("Multiple collapse_mask options set in .fastverse config file") mask <- paste0("options(", substr(contents[mask], 6L, 100000L), ")") eval(str2lang(mask)) .op$mask <- mask <- getOption("collapse_mask") } } } if(length(mask) && is.character(mask)) do_collapse_mask(clpns, mask) if(length(.op$remove) && is.character(.op$remove)) do_collapse_remove(clpns, .op$remove, exports = FALSE) if(isTRUE(getOption("collapse_export_F"))) namespaceExport(clpns, "F") if(is.null(getOption("collapse_unused_arg_action"))) options(collapse_unused_arg_action = "warning") # error, warning, message or none # if(is.null(getOption("collapse_DT_alloccol"))) options(collapse_DT_alloccol = 100L) invisible(res) } .onAttach <- function(libname, pkgname) { packageStartupMessage(paste0("collapse ",packageVersion("collapse"),", see ?`collapse-package` or ?`collapse-documentation`")) # \nNote: stats::D -> D.expression, D.call, D.name } .onUnload <- function (libpath) { library.dynam.unload("collapse", libpath) } # Note: To create local dev version of package change package name in DESCRIPTION, NAMESPACE, this file (including C_collapse_init), # replace all instances of `_collapse_` in source files (except for _collapse_DT_alloccol`), and also rename `R_init_collapse` in ExportSymbols.cpp. # and in vignettes / Rd files replace library(collapse) release_questions <- function() { c( "Have you updated the version number in DESCRIPTION, NEWS.md, NEWS.Rd, cran.comments and .onAttach?", "Updated Readme?", "Spell check ?", "built vignettes properly with Sys.setenv(RUNBENCH = TRUE)?", "Have you updated all help files with code changes, even if it's only documenting arguments or links?", "updated collapse-package.Rd and collapse-documentation.Rd?", "All functions in global_macros.R?", "checked all depreciated functions and arguments?", "any changes to arguments or order of arguments in key functions (GRP etc.). Does everything work?" ) } collapse/R/collap.R0000644000176200001440000005754015202504365013667 0ustar liggesusers# Need generic version for column-parallel apply and aggregating weights.. fsum_uw <- function(x, g, w, ...) fsum(x, g, ...) fprod_uw <- function(x, g, w, ...) fprod(x, g, ...) fmean_uw <- function(x, g, w, ...) fmean(x, g, ...) fmedian_uw <- function(x, g, w, ...) fmedian(x, g, ...) fvar_uw <- function(x, g, w, ...) fvar(x, g, ...) fsd_uw <- function(x, g, w, ...) fsd(x, g, ...) fmode_uw <- function(x, g, w, ...) fmode(x, g, ...) fnth_uw <- function(x, n, g, w, ...) fnth(x, n, g, ...) fmin_uw <- function(x, g, w, ...) fmin(x, g, ...) fmax_uw <- function(x, g, w, ...) fmax(x, g, ...) ffirst_uw <- function(x, g, w, ...) ffirst(x, g, ...) flast_uw <- function(x, g, w, ...) flast(x, g, ...) fnobs_uw <- function(x, g, w, ...) fnobs(x, g, ...) fndistinct_uw <- function(x, g, w, ...) fndistinct(x, g, ...) fNobs_uw <- function(x, g, w, ...) fnobs(x, g, ...) fNdistinct_uw <- function(x, g, w, ...) fndistinct(x, g, ...) mymatchfun <- function(FUN) { if(is.function(FUN)) return(FUN) switch(tochar(FUN), # cat(paste0(FSF, " = ", FSF, ",\n")) fmean = fmean, fmedian = fmedian, fmode = fmode, fsum = fsum, fprod = fprod, fsd = fsd, fvar = fvar, fmin = fmin, fmax = fmax, fnth = fnth, ffirst = ffirst, flast = flast, fnobs = fnobs, fndistinct = fndistinct, fNobs = fnobs, fNdistinct = fndistinct, # cat(paste0(paste0(FSF, "_uw"), " = ", paste0(FSF, "_uw"), ",\n")) fmean_uw = fmean_uw, fmedian_uw = fmedian_uw, fmode_uw = fmode_uw, fsum_uw = fsum_uw, fprod_uw = fprod_uw, fsd_uw = fsd_uw, fvar_uw = fvar_uw, fmin_uw = fmin_uw, fmax_uw = fmax_uw, fnth_uw = fnth_uw, ffirst_uw = ffirst_uw, flast_uw = flast_uw, fnobs_uw = fnobs_uw, fndistinct_uw = fndistinct_uw, fNobs_uw = fnobs_uw, fNdistinct_uw = fndistinct_uw, match.fun(FUN)) # get(FUN, mode = "function", envir = parent.frame(2)) -> no error message } # Column-level parallel implementation applyfuns_internal <- function(data, by, FUN, fFUN, parallel, cores, ...) { oldClass(data) <- "data.frame" # Needed for correct method dispatch for fast functions... if(length(FUN) > 1L) { if(parallel) return(lapply(seq_along(FUN), function(i) if(fFUN[i]) mclapply(data, FUN[[i]], g = by, ..., use.g.names = FALSE, mc.cores = cores) else BY.data.frame(data, by, FUN[[i]], ..., use.g.names = FALSE, reorder = FALSE, return = "data.frame", parallel = parallel, mc.cores = cores))) # mclapply(data, copysplaplfun, by, FUN[[i]], ..., mc.cores = cores) return(lapply(seq_along(FUN), function(i) if(fFUN[i]) FUN[[i]](data, g = by, ..., use.g.names = FALSE) else BY.data.frame(data, by, FUN[[i]], ..., use.g.names = FALSE, reorder = FALSE, return = "data.frame"))) # lapply(data, copysplaplfun, by, FUN[[i]], ...) } if(is.list(FUN)) FUN <- FUN[[1L]] if(parallel) if(fFUN) return(list(mclapply(data, FUN, g = by, ..., use.g.names = FALSE, mc.cores = cores))) else return(list(BY.data.frame(data, by, FUN, ..., use.g.names = FALSE, reorder = FALSE, return = "data.frame", parallel = parallel, mc.cores = cores))) # return(list(mclapply(data, copysplaplfun, by, FUN, ..., mc.cores = cores))) if(fFUN) return(list(FUN(data, g = by, ..., use.g.names = FALSE))) return(list(BY.data.frame(data, by, FUN, ..., use.g.names = FALSE, reorder = FALSE, return = "data.frame"))) # return(list(lapply(data, copysplaplfun, by, FUN, ...))) } rbindlist_factor <- function(l, idcol = "Function") { nam <- names(l) names(l) <- NULL res <- .Call(C_rbindlist, l, TRUE, TRUE, idcol) attr(res[[1L]], "levels") <- if (length(nam)) nam else as.character(seq_along(l)) oldClass(res[[1L]]) <- "factor" res } # NOTE: CUSTOM SEPARATOR doesn't work because of unlist() ! # keep.w toggle w being kept even if passed externally ? -> Also not done with W, B , etc !! -> but they also don't keep by .. collap <- function(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, ..., keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", drop = TRUE, parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto") { return <- switch(return[1L], wide = 1L, list = 2L, long = 3L, long_dupl = 4L, stop("Unknown return output option")) widel <- return == 1L ncustoml <- is.null(custom) autorn <- is.character(give.names) && give.names == "auto" nwl <- is.null(w) if(inherits(X, "data.frame")) DTl <- inherits(X, "data.table") else { X <- qDF(X) DTl <- FALSE } ax <- attributes(X) oldClass(X) <- NULL if(.Call(C_fnrow, X) == 0L) stop("data passed to collap() has 0 rows.") #160, 0 rows can cause segfault... nam <- names(X) # attributes(X) <- NULL # attr(X, "class") <- "data.frame" # class needed for method dispatch of fast functions, not for BY ! # cl <- if(parallel) makeCluster(mc.cores) else NULL # aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply # identifying by and cols vl <- TRUE if(is.call(by)) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) numby <- ckmatch(all.vars(by[[3L]]), nam) } else { numby <- ckmatch(all.vars(by), nam) if(ncustoml) v <- if(is.null(cols)) seq_along(X)[-numby] else cols2int(cols, X, nam) } by <- GRP.default(X, numby, sort, decreasing, na.last, keep.by, return.order, method, drop = drop, call = FALSE) } else if(is.atomic(by)) { numby <- 0L if(ncustoml) if(is.null(cols)) vl <- FALSE else v <- cols2int(cols, X, nam) by <- GRP.default(`names<-`(list(by), l1orlst(as.character(substitute(by)))), NULL, sort, decreasing, na.last, keep.by, return.order, method, drop = drop, call = FALSE) } else { if(ncustoml) if(is.null(cols)) vl <- FALSE else v <- cols2int(cols, X, nam) if(!is_GRP(by)) by <- GRP.default(by, NULL, sort, decreasing, na.last, keep.by, return.order, method, drop = drop, call = FALSE) numby <- rep(0L, length(by[[5L]])) if(keep.by && !vl && any(m <- nam %in% by[[5L]])) { v <- whichv(m, FALSE) vl <- TRUE } } if(!nwl) { if(is.call(w)) { namw <- all.vars(w) numw <- ckmatch(namw, nam) if(ncustoml) if(vl) v <- fsetdiff(v, numw) else { # v[v != numw] v <- nam %!iin% namw; vl <- TRUE } w <- eval(w[[2L]], X, attr(w, ".Environment")) # w <- X[[numw]] } else if(keep.w) { numw <- 0L # length(X) + 1L namw <- l1orlst(as.character(substitute(w))) } if(keep.w) { # what about function name for give.names ? What about give.names and custom ? wFUN <- acr_get_funs(substitute(wFUN), wFUN, mymatchfun) namwFUN <- wFUN$namfun wFUN <- wFUN$funs if(!all(names(wFUN) %in% .FAST_STAT_FUN_EXT)) stop("wFUN needs to be fast statistical functions, see print(.FAST_STAT_FUN)") if(length(wFUN) > 1L) { namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(lapply(wFUN, function(f) f(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, rep_len(numw, length(wFUN))) } else { wFUN <- wFUN[[1L]] if(isTRUE(give.names)) namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(list(wFUN(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, numw) # need to accommodate any option of keep.by, keep.w and keep.col.order } keep.by <- TRUE } } if(ncustoml) { # Identifying data nu <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) if(vl) { temp <- nu[v] nnu <- v[!temp] # which(!nu & v) # faster way ? nu <- v[temp] # which(nu & v) rm(temp, v) } else { nnu <- whichv(nu, FALSE) nu <- which(nu) } nul <- length(nu) > 0L nnul <- length(nnu) > 0L # Identifying FUN and catFUN: if(nul) { FUN <- acr_get_funs(substitute(FUN), FUN, mymatchfun) namFUN <- FUN$namfun FUN <- FUN$funs } if(nnul) { catFUN <- acr_get_funs(substitute(catFUN), catFUN, mymatchfun) namcatFUN <- catFUN$namfun catFUN <- catFUN$funs } if(autorn) give.names <- !widel || length(FUN) > 1L || length(catFUN) > 1L # Aggregator function # drop level of nesting i.e. make rest length(by)+length(FUN)+length(catFUN) ? agg <- function(xnu, xnnu, ...) { # by, FUN, namFUN, catFUN, namcatFUN, drop.by lr <- nul + nnul + keep.by res <- vector("list", lr) if(keep.by) { res[[1L]] <- list(by[[4L]]) # could add later using "c" ? ind <- 2L } else ind <- 1L if(nul) res[[ind]] <- condsetn(applyfuns_internal(xnu, by, FUN, names(FUN) %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namFUN, give.names) if(nnul) res[[lr]] <- condsetn(applyfuns_internal(xnnu, by, catFUN, names(catFUN) %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namcatFUN, give.names) return(res) } # fastest using res list ?? or better combine at the end ?? # Fixes https://github.com/fastverse/collapse/issues/185 if(widel && !give.names && ((length(nu) == 1L && !nnul && length(FUN) > 1L) || (length(nnu) == 1L && !nul && length(catFUN) > 1L))) { names(X) <- NULL give.names <- TRUE } if(nwl) { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, ...) } else { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, w = w, ...) } if(keep.col.order && widel) o <- forder.int(c(if(keep.by) numby else NULL, if(nul) rep(nu,length(FUN)) else NULL, if(nnul) rep(nnu,length(catFUN)) else NULL)) } else { # custom aggregation: namFUN <- names(custom) if(!is.list(custom) || is.null(namFUN)) stop("custom needs to be a named list, see ?collap") fFUN <- namFUN %in% .FAST_STAT_FUN_EXT if(!keep.by) { res <- vector("list", 1L) ind <- 1L } else { res <- vector("list", 2L) res[[1L]] <- list(by[[4L]]) # could add later using "c" ? ind <- 2L } custom_names <- lapply(custom, names) custom <- lapply(custom, cols2int, X, nam) # could integrate below, but then reorder doesn't work ! # if(autorn) give.names <- fanyDuplicated(funlist(custom)) #lx <- length(X) # custom <- lapply(custom, function(x) if(is.numeric(x) && bmax(abs(x)) <= lx) # x else if(is.character(x)) ckmatch(x, nam) else # stop("custom list content must be variable names or suitable column indices")) if(nwl) { res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, ...)[[1L]]) } else { if(!all(fFUN)) warning("collap can only perform weighted aggregations with the fast statistical functions (see .FAST_STAT_FUN): Ignoring weights argument to other functions") res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, w = w, ...)[[1L]]) } # Better to do this check afterwards, because custom names may make column names unique... if(autorn && widel) give.names <- fanyDuplicated(funlist(lapply(res[[ind]], attr, "names"))) if(!widel || give.names) names(res[[ind]]) <- namFUN if(keep.col.order && return != 2L) { # && widel o <- unlist(custom, use.names = FALSE) o <- forder.int(c(if(keep.by) numby else NULL, if(widel) o else unique.default(o))) } } # if(parallel) stopCluster(cl) if(widel) res <- unlist(unlist(res, FALSE), FALSE) else { # if(length(FUN) > 1L || length(catFUN) > 1L || length(custom) > 1L) { res <- unlist(res, FALSE) if(return == 2L) { ax[["row.names"]] <- .set_row_names(by[[1L]]) if(!keep.by) return(lapply(res, function(e) { ax[["names"]] <- names(e) condalcSA(e, ax, DTl) })) namby <- attr(res[[1L]], "names") # always works ?? return(lapply(res[-1L], function(e) { ax[["names"]] <- c(namby, names(e)) condalcSA(c(res[[1L]], e), ax, DTl) })) } else { if(return != 4L) { if(keep.by) res <- lapply(res[-1L], function(e) c(res[[1L]], e)) } else { if(!ncustoml || !(nul && nnul)) stop("long_dupl is only meaningful for aggregations with both numeric and categorical data, and multiple functions used for only one of the two data types!") mFUN <- length(FUN) > 1L nid <- if(mFUN) length(res) else 2L-!keep.by if(!keep.by) { res <- if(mFUN) lapply(res[-nid], function(e) c(e, res[[nid]])) else lapply(res[-nid], function(e) c(res[[nid]], e)) } else res <- if(mFUN) lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], e, res[[nid]])) else lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], res[[nid]], e)) } res <- rbindlist_factor(res) if(keep.col.order) o <- if(ncustoml) forder.int(c(0L, if(keep.by) numby else NULL, nu, nnu)) else c(1L, o + 1L) } # } else message("return options other than 'wide' are only meaningful if multiple functions are used!") } if(keep.col.order) .Call(C_setcolorder, res, o) # data.table:::Csetcolorder ax[["names"]] <- names(res) ax[["row.names"]] <- .set_row_names(.Call(C_fnrow, res)) return(condalcSA(res, ax, DTl)) } # collapv: allows vector input to by and w collapv <- function(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, ..., keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", drop = TRUE, parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto") { return <- switch(return[1L], wide = 1L, list = 2L, long = 3L, long_dupl = 4L, stop("Unknown return output option")) widel <- return == 1L ncustoml <- is.null(custom) autorn <- is.character(give.names) && give.names == "auto" nwl <- is.null(w) if(inherits(X, "data.frame")) DTl <- inherits(X, "data.table") else { X <- qDF(X) DTl <- FALSE } ax <- attributes(X) oldClass(X) <- NULL if(.Call(C_fnrow, X) == 0L) stop("data passed to collapv() has 0 rows.") #160, 0 rows can cause segfault... nam <- names(X) aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply # identifying by numby <- cols2int(by, X, nam) by <- GRP.default(X, numby, sort, decreasing, na.last, keep.by, return.order, method, drop = drop, call = FALSE) if(ncustoml) v <- if(is.null(cols)) seq_along(X)[-numby] else cols2int(cols, X, nam) if(!nwl) { if(length(w) == 1L) { numw <- cols2int(w, X, nam) namw <- nam[numw] if(ncustoml) v <- v[v != numw] w <- X[[numw]] } else if(keep.w) { numw <- 0L namw <- l1orlst(as.character(substitute(w))) } if(keep.w) { wFUN <- acr_get_funs(substitute(wFUN), wFUN, mymatchfun) namwFUN <- wFUN$namfun wFUN <- wFUN$funs if(!all(names(wFUN) %in% .FAST_STAT_FUN_EXT)) stop("wFUN needs to be fast statistical functions, see print(.FAST_STAT_FUN)") if(length(wFUN) > 1L) { namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(lapply(wFUN, function(f) f(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, rep_len(numw, length(wFUN))) } else { wFUN <- wFUN[[1L]] if(isTRUE(give.names)) namw <- paste(namwFUN, namw, sep = ".") by[[4L]] <- c(if(keep.by) by[[4L]], `names<-`(list(wFUN(w, g = by, ..., use.g.names = FALSE)), namw)) if(keep.col.order) numby <- c(if(keep.by) numby, numw) # need to accommodate any option of keep.by, keep.w and keep.col.order } keep.by <- TRUE } } if(ncustoml) { # Identifying data nu <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) temp <- nu[v] nnu <- v[!temp] # which(!nu & v) # faster way ? nu <- v[temp] # which(nu & v) rm(temp, v) nul <- length(nu) > 0L nnul <- length(nnu) > 0L # Identifying FUN and catFUN: if(nul) { FUN <- acr_get_funs(substitute(FUN), FUN, mymatchfun) namFUN <- FUN$namfun FUN <- FUN$funs } if(nnul) { catFUN <- acr_get_funs(substitute(catFUN), catFUN, mymatchfun) namcatFUN <- catFUN$namfun catFUN <- catFUN$funs } if(autorn) give.names <- !widel || length(FUN) > 1L || length(catFUN) > 1L # Aggregator function agg <- function(xnu, xnnu, ...) { lr <- nul + nnul + keep.by res <- vector("list", lr) if(keep.by) { res[[1L]] <- list(by[[4L]]) ind <- 2L } else ind <- 1L if(nul) res[[ind]] <- condsetn(applyfuns_internal(xnu, by, FUN, names(FUN) %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namFUN, give.names) if(nnul) res[[lr]] <- condsetn(applyfuns_internal(xnnu, by, catFUN, names(catFUN) %in% .FAST_STAT_FUN_EXT, parallel, mc.cores, ...), namcatFUN, give.names) return(res) } # Fixes https://github.com/fastverse/collapse/issues/185 if(widel && !give.names && ((length(nu) == 1L && !nnul && length(FUN) > 1L) || (length(nnu) == 1L && !nul && length(catFUN) > 1L))) { names(X) <- NULL give.names <- TRUE } if(nwl) { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, ...) } else { res <- agg(if(nul) X[nu] else NULL, if(nnul) X[nnu] else NULL, w = w, ...) } if(keep.col.order && widel) o <- forder.int(c(if(keep.by) numby else NULL, if(nul) rep(nu,length(FUN)) else NULL, if(nnul) rep(nnu,length(catFUN)) else NULL)) } else { # custom aggregation: namFUN <- names(custom) if(!is.list(custom) || is.null(namFUN)) stop("custom needs to be a named list, see ?collap") fFUN <- namFUN %in% .FAST_STAT_FUN_EXT if(!keep.by) { res <- vector("list", 1L) ind <- 1L } else { res <- vector("list", 2L) res[[1L]] <- list(by[[4L]]) ind <- 2L } custom_names <- lapply(custom, names) custom <- lapply(custom, cols2int, X, nam) if(nwl) { res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, ...)[[1L]]) } else { if(!all(fFUN)) warning("collap can only perform weighted aggregations with the fast statistical functions (see .FAST_STAT_FUN): Ignoring weights argument to other functions") res[[ind]] <- lapply(seq_along(namFUN), function(i) applyfuns_internal(setnck(X[custom[[i]]], custom_names[[i]]), by, mymatchfun(namFUN[i]), fFUN[i], parallel, mc.cores, w = w, ...)[[1L]]) } # Better to do this check afterwards, because custom names may make column names unique... if(autorn && widel) give.names <- fanyDuplicated(funlist(lapply(res[[ind]], attr, "names"))) if(!widel || give.names) names(res[[ind]]) <- namFUN if(keep.col.order && return != 2L) { o <- unlist(custom, use.names = FALSE) o <- forder.int(c(if(keep.by) numby else NULL, if(widel) o else unique.default(o))) } } if(widel) res <- unlist(unlist(res, FALSE), FALSE) else { # if(length(FUN) > 1L || length(catFUN) > 1L || length(custom) > 1L) { res <- unlist(res, FALSE) if(return == 2L) { ax[["row.names"]] <- .set_row_names(by[[1L]]) if(!keep.by) return(lapply(res, function(e) { ax[["names"]] <- names(e) condalcSA(e, ax, DTl) })) namby <- attr(res[[1L]], "names") # always works ?? return(lapply(res[-1L], function(e) { ax[["names"]] <- c(namby, names(e)) condalcSA(c(res[[1L]], e), ax, DTl) })) } else { if(return != 4L) { if(keep.by) res <- lapply(res[-1L], function(e) c(res[[1L]], e)) } else { if(!ncustoml || !(nul && nnul)) stop("long_dupl is only meaningful for aggregations with both numeric and categorical data, and multiple functions used for only one of the two data types!") mFUN <- length(FUN) > 1L nid <- if(mFUN) length(res) else 2L-!keep.by if(!keep.by) { res <- if(mFUN) lapply(res[-nid], function(e) c(e, res[[nid]])) else lapply(res[-nid], function(e) c(res[[nid]], e)) } else res <- if(mFUN) lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], e, res[[nid]])) else lapply(res[-c(nid, 1L)], function(e) c(res[[1L]], res[[nid]], e)) } res <- rbindlist_factor(res) if(keep.col.order) o <- if(ncustoml) forder.int(c(0L, if(keep.by) numby else NULL, nu, nnu)) else c(1L, o + 1L) } # } else message("return options other than 'wide' are only meaningful if multiple functions are used!") } if(keep.col.order) .Call(C_setcolorder, res, o) # data.table:::Csetcolorder ax[["names"]] <- names(res) ax[["row.names"]] <- .set_row_names(.Call(C_fnrow, res)) return(condalcSA(res, ax, DTl)) } # For dplyr integration: takes grouped_df as input collapg <- function(X, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.group_vars = TRUE, ...) { by <- GRP.grouped_df(X, return.groups = keep.group_vars, call = FALSE) if(is.null(by[[4L]])) keep.group_vars <- FALSE if(is.null(custom)) ngn <- attr(X, "names") %!in% by[[5L]] # Note: this always leaves grouping columns on the left still ! # clx <- oldClass(X) attr(X, "groups") <- NULL oldClass(X) <- fsetdiff(oldClass(X), c("GRP_df", "grouped_df")) # clx[clx != "grouped_df"] wsym <- substitute(w) if(!is.null(wsym)) { # Non-standard evaluation of w argument if(any(windl <- attr(X, "names") %in% all.vars(wsym))) { wchar <- if(length(wsym) == 1L) as.character(wsym) else deparse(wsym) assign(wchar, eval(wsym, X, parent.frame())) # needs to be here !! (before subsetting!!) if(is.null(custom)) X <- fcolsubset(X, ngn & !windl) # else X <- X # Needed ?? -> nope !! expr <- substitute(collap(X, by, FUN, catFUN, cols, w, wFUN, custom, ..., keep.by = keep.group_vars, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = TRUE, method = "auto")) expr[[7L]] <- as.symbol(wchar) # best solution !! return(eval(expr)) } } if(is.null(custom)) X <- fcolsubset(X, ngn) # else X <- X # because of non-standard eval.. X is "." return(eval(substitute(collap(X, by, FUN, catFUN, cols, w, wFUN, custom, ..., keep.by = keep.group_vars, sort = TRUE, decreasing = FALSE, na.last = TRUE, return.order = TRUE, method = "auto")))) } collapse/R/fslice.R0000644000176200001440000001045714777170130013663 0ustar liggesusers fslice <- function(x, ..., n = 1, how = "first", order.by = NULL, na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE) { # handle grouping if(!missing(...)) { g <- GRP.default(if(is.list(x)) fselect(unclass(x), ...) else list(...), sort = sort, return.groups = FALSE, return.order = sort, call = FALSE) } else if(is.list(x) && inherits(x, "grouped_df")) { g <- GRP.grouped_df(x, return.groups = FALSE, call = FALSE) x <- fungroup2(x, oldClass(x)) } else g <- NULL # resolve values to order by if(switch(how, min = TRUE, max = TRUE, FALSE)) { if(is.list(x)) order.by <- eval(substitute(order.by), x, parent.frame()) if(is.character(order.by) && length(order.by) == 1L && anyv(attr(x, "names"), order.by)) order.by <- .subset2(x, order.by) if(length(order.by) != fnrow(x)) stop("order.by must be a numeric vector of the same length as the number of rows in x, or the name of a column in x.") } fslice_core(x, g, n, how, order.by, na.rm, with.ties, sort) } fslicev <- function(x, cols = NULL, n = 1, how = "first", order.by = NULL, na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE, ...) { # handle grouping if(!is.null(cols)) { cond <- is.list(cols) || is.atomic(x) g <- GRP.default(if(cond) cols else x, by = if(cond) NULL else cols, sort = sort, return.groups = FALSE, return.order = sort, call = FALSE, ...) } else if(is.list(x) && inherits(x, "grouped_df")) { g <- GRP.grouped_df(x, return.groups = FALSE, call = FALSE) x <- fungroup2(x, oldClass(x)) } else g <- NULL # resolve values to order by if(switch(how, min = TRUE, max = TRUE, FALSE)) { if(is.character(order.by) && length(order.by) == 1L && anyv(attr(x, "names"), order.by)) order.by <- .subset2(x, order.by) if(length(order.by) != fnrow(x)) stop("order.by must be a numeric vector of the same length as the number of rows in x, or the name of a column in x.") } fslice_core(x, g, n, how, order.by, na.rm, with.ties, sort) } fslice_core <- function(x, g, n, how, order.by, na.rm, with.ties, sort) { # convert a proportion to a number if applicable if(n < 1) n <- if(is.null(g)) max(1L, as.integer(round(n * fnrow(x)))) else max(1L, as.integer(round(n * fnrow(x)/g[[1L]]))) if(n > 1 && with.ties) stop("with.ties = TRUE is only supported for n = 1") if(is.null(g)) { ind <- switch(how, first = 1:n, last = (fnrow(x)-n+1L):fnrow(x), min = if(n > 1) radixorderv(order.by, decreasing = FALSE, na.last = na.rm)[1:n] else if(with.ties) order.by %==% fmin.default(order.by, na.rm = na.rm) else which.min(order.by), max = if(n > 1) radixorderv(order.by, decreasing = TRUE, na.last = na.rm)[1:n] else if(with.ties) order.by %==% fmax.default(order.by, na.rm = na.rm) else which.max(order.by), stop("Unknown 'how' option: ", how) ) return(ss(x, ind, check = FALSE)) } if(n == 1) { if(with.ties && sort) warning("sorting with ties is currently not supported") return(switch(how, first = condalc(ffirst(x, g, na.rm = FALSE), inherits(x, "data.table")), last = condalc(flast(x, g, na.rm = FALSE), inherits(x, "data.table")), # TODO: sort with ties? min = if(with.ties) ss(x, order.by %==% fmin(order.by, g, TRA = "fill", na.rm = na.rm, use.g.names = FALSE), check = FALSE) else ss(x, .Call(C_gwhich_first, order.by, g, fmin.default(order.by, g, na.rm = na.rm, use.g.names = FALSE)), check = FALSE), max = if(with.ties) ss(x, order.by %==% fmax(order.by, g, TRA = "fill", na.rm = na.rm, use.g.names = FALSE), check = FALSE) else ss(x, .Call(C_gwhich_first, order.by, g, fmax.default(order.by, g, na.rm = na.rm, use.g.names = FALSE)), check = FALSE), stop("Unknown 'how' option: ", how) )) } ind <- switch(how, first = .Call(C_gslice_multi, g, g$order, n, TRUE), # g$order is NULL if sort = FALSE last = .Call(C_gslice_multi, g, g$order, n, FALSE), # g$order is NULL if sort = FALSE min = .Call(C_gslice_multi, g, radixorder(g$group.id, order.by, decreasing = FALSE, na.last = na.rm), n, TRUE), max = .Call(C_gslice_multi, g, radixorder(g$group.id, order.by, decreasing = c(FALSE, TRUE), na.last = na.rm), n, TRUE), stop("Unknown 'how' option: ", how) ) return(ss(x, ind, check = FALSE)) } collapse/R/flast.R0000644000176200001440000001250214777170130013520 0ustar liggesusers # Note: for foundational changes to this code see fsum.R flast <- function(x, ...) UseMethod("flast") # , x flast.default <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(flast.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_flast,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_flast,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_flast,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_flast,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_flast,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_flast,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(TRAC(x,.Call(C_flast,x,0L,0L,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_flast,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...) } flast.matrix <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_flastm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_flastm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_flastm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_flastm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_flastm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_flastm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(C_flastm,x,0L,0L,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_flastm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...) } flast.zoo <- function(x, ...) if(is.matrix(x)) flast.matrix(x, ...) else flast.default(x, ...) flast.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(flast.matrix(x, ...), x) else flast.default(x, ...) flast.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) if(drop) return(unlist(.Call(C_flastl,x,0L,0L,na.rm))) else return(.Call(C_flastl,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_flastl,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_flastl,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_flastl,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm), groups)) return(.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(TRAlC(x,.Call(C_flastl,x,0L,0L,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...) } flast.list <- function(x, ...) flast.data.frame(x, ...) flast.grouped_df <- function(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm)), ax)) } else return(setAttributes(.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_flastl,x[-gn],g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_flastl,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...)) } collapse/R/fdiff_fgrowth.R0000644000176200001440000004410514777170130015231 0ustar liggesusers # For principle innovations of this code see flag.R and flag.cpp # Helper functions checkld <- function(...) { if(any(names(list(...)) == "logdiff")) { warning("argument 'logdiff' was renamed to 'log'") TRUE } else FALSE } baselog <- base::log fdiff <- function(x, n = 1, diff = 1, ...) UseMethod("fdiff") # , x fdiff.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fdiff", unclass(x))) if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- baselog(x) if(is.null(g)) return(.Call(Cpp_fdiffgrowth,x,n,diff,fill,0L,0L,NULL,G_t(t),1L+log,rho,stubs,1)) g <- G_guo(g) .Call(Cpp_fdiffgrowth,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.pseries <- function(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, shift = "time", ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) index <- uncl2pix(x) if(log) x <- baselog(x) g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !inherits(x, "indexed_series")) t <- plm_check_time(t) res <- if(is.matrix(x)) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,fnlevels(g),g,NULL,t,1L+log,rho,stubs,1) else .Call(Cpp_fdiffgrowth,x,n,diff,fill,fnlevels(g),g,NULL,t,1L+log,rho,stubs,1) if(is.double(x)) return(res) pseries_to_numeric(res) } fdiff.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- baselog(x) if(is.null(g)) return(.Call(Cpp_fdiffgrowthm,x,n,diff,fill,0L,0L,NULL,G_t(t),1L+log,rho,stubs,1)) g <- G_guo(g) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.zoo <- function(x, ...) if(is.matrix(x)) fdiff.matrix(x, ...) else fdiff.default(x, ...) fdiff.units <- fdiff.zoo fdiff.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) tsym <- substitute(t) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(!is.null(tsym)) { t <- eval(tsym, x, parent.frame()) if(!anyNA(tn <- match(all.vars(tsym), nam))) { gn <- c(gn, tn) if(anyDuplicated.default(gn)) stop("timevar coincides with grouping variables!") } } cld <- function(x) if(log) fdapply(x, baselog) else x if(length(gn)) { ax <- attributes(x) res <- .Call(Cpp_fdiffgrowthl,cld(.subset(x, -gn)),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) # Works for multiple lags / differences ! return(setAttributes(res, ax)) } .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.data.frame <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) if(log) x <- fdapply(x, baselog) if(is.null(g)) return(.Call(Cpp_fdiffgrowthl,x,n,diff,fill,0L,0L,NULL,G_t(t),1L+log,rho,stubs,1)) g <- G_guo(g) .Call(Cpp_fdiffgrowthl,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),1L+log,rho,stubs,1) } fdiff.list <- function(x, ...) fdiff.data.frame(x, ...) fdiff.pdata.frame <- function(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, shift = "time", ...) { if(!missing(...)) if(checkld(...)) log <- list(...)[["logdiff"]] else unused_arg_action(match.call(), ...) index <- uncl2pix(x) if(log) x <- fdapply(x, baselog) g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !inherits(x, "indexed_frame")) t <- plm_check_time(t) .Call(Cpp_fdiffgrowthl,x,n,diff,fill,fnlevels(g),g,NULL,t,1L+log,rho,stubs,1) } fgrowth <- function(x, n = 1, diff = 1, ...) UseMethod("fgrowth") # , x fgrowth.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fgrowth", unclass(x))) if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) baselog(x) else baselog(x) %*=% scale if(is.null(g)) return(.Call(Cpp_fdiffgrowth,x,n,diff,fill,0L,0L,NULL,G_t(t),4L-logdiff,scale,stubs,power)) g <- G_guo(g) .Call(Cpp_fdiffgrowth,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.pseries <- function(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, shift = "time", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- uncl2pix(x) if(logdiff) x <- if(scale == 1) baselog(x) else baselog(x) %*=% scale g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !inherits(x, "indexed_series")) t <- plm_check_time(t) res <- if(is.matrix(x)) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,fnlevels(g),g,NULL,t,4L-logdiff,scale,stubs,power) else .Call(Cpp_fdiffgrowth,x,n,diff,fill,fnlevels(g),g,NULL,t,4L-logdiff,scale,stubs,power) if(is.double(x)) return(res) pseries_to_numeric(res) } fgrowth.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) baselog(x) else baselog(x) %*=% scale if(is.null(g)) return(.Call(Cpp_fdiffgrowthm,x,n,diff,fill,0L,0L,NULL,G_t(t),4L-logdiff,scale,stubs,power)) g <- G_guo(g) .Call(Cpp_fdiffgrowthm,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.zoo <- function(x, ...) if(is.matrix(x)) fgrowth.matrix(x, ...) else fgrowth.default(x, ...) fgrowth.units <- fgrowth.zoo fgrowth.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) tsym <- substitute(t) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(!is.null(tsym)) { t <- eval(tsym, x, parent.frame()) if(!anyNA(tn <- match(all.vars(tsym), nam))) { gn <- c(gn, tn) if(anyDuplicated.default(gn)) stop("timevar coincides with grouping variables!") } } cld <- function(x) if(!logdiff) x else if(scale == 1) fdapply(x, baselog) else fdapply(x, baselog) %*=% scale if(length(gn)) { ax <- attributes(x) res <- .Call(Cpp_fdiffgrowthl,cld(.subset(x, -gn)),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) # Works for multiple lags / differences ! return(setAttributes(res, ax)) } .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.data.frame <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(logdiff) x <- if(scale == 1) fdapply(x, baselog) else fdapply(x, baselog) %*=% scale if(is.null(g)) return(.Call(Cpp_fdiffgrowthl,x,n,diff,fill,0L,0L,NULL,G_t(t),4L-logdiff,scale,stubs,power)) g <- G_guo(g) .Call(Cpp_fdiffgrowthl,x,n,diff,fill,g[[1L]],g[[2L]],g[[3L]],G_t(t),4L-logdiff,scale,stubs,power) } fgrowth.list <- function(x, ...) fgrowth.data.frame(x, ...) fgrowth.pdata.frame <- function(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, shift = "time", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- uncl2pix(x) if(logdiff) x <- if(scale == 1) fdapply(x, baselog) else fdapply(x, baselog) %*=% scale g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !inherits(x, "indexed_frame")) t <- plm_check_time(t) .Call(Cpp_fdiffgrowthl,x,n,diff,fill,fnlevels(g),g,NULL,t,4L-logdiff,scale,stubs,power) } # Operator data frame methods templates DG_data_frame_template <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, return = 1L, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, power = 1, ...) { # , message = 2L, power = 1 if(!missing(...)) unused_arg_action(match.call(), ...) cld <- function(y) switch(return, y, fdapply(y, baselog), if(rho == 1) fdapply(y, baselog) else fdapply(y, baselog) %*=% rho, y) if(is.call(by) || is.call(t)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam) gn <- ckmatch(all.vars(by[[3L]]), nam) } else { gn <- ckmatch(all.vars(by), nam) cols <- cols2intrmgn(gn, cols, x) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.ids) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L, NULL) else G_guo(by) } if(is.call(t)) { tn <- ckmatch(all.vars(t), nam) t1 <- length(tn) == 1L t <- eval(if(t1) t[[2L]] else attr(terms.formula(t), "variables"), x, attr(t, ".Environment")) # if(t1) x[[tn]] else x[tn] cols <- if(is.null(cols)) seq_along(x)[-tn] else if(t1) cols[cols != tn] else fsetdiff(cols, tn) if(keep.ids) gn <- c(gn, tn) } res <- if(length(gn)) c(x[gn], .Call(Cpp_fdiffgrowthl,cld(x[cols]),n,diff,fill,by[[1L]],by[[2L]],by[[3L]],G_t(t),return,rho,stubs,power)) else .Call(Cpp_fdiffgrowthl,cld(x[cols]),n,diff,fill,by[[1L]],by[[2L]],by[[3L]],G_t(t),return,rho,stubs,power) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(length(cols)) { # Needs to be done like this, otherwise list-subsetting drops attributes ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.null(by)) return(.Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,0L,0L,NULL,G_t(t),return,rho,stubs,power)) by <- G_guo(by) .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,by[[1L]],by[[2L]],by[[3L]],G_t(t),return,rho,stubs,power) } DG_pdata_frame_template <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, return = 1L, rho = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, power = 1, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] index <- uncl2pix(x) cols_fun <- is.function(cols) if(cols_fun && identical(cols, is.numeric)) cols <- which(.Call(C_vtypes, x, 1L)) else if(length(cols)) cols <- cols2int(cols, x, nam, FALSE) if(cols_fun || keep.ids) { gn <- which(nam %in% attr(index, "nam")) # Needed for 1 or 3+ index variables if(length(gn)) { if(cols_fun) cols <- fsetdiff(cols, gn) else if(is.null(cols)) cols <- seq_along(unclass(x))[-gn] } if(!keep.ids) gn <- NULL } else gn <- NULL g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !any(ax$class == "indexed_frame")) t <- plm_check_time(t) cld <- function(y) switch(return, y, fdapply(y, baselog), if(rho == 1) fdapply(y, baselog) else fdapply(y, baselog) %*=% rho, y) if(length(gn) && length(cols)) { class(x) <- NULL # Works for multiple lags ! res <- c(x[gn], .Call(Cpp_fdiffgrowthl,cld(x[cols]),n,diff,fill,fnlevels(g),g,NULL,t,return,rho,stubs,power)) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(!length(gn)) # could speed up ? return(.Call(Cpp_fdiffgrowthl,cld(fcolsubset(x, cols)),n,diff,fill,fnlevels(g),g,NULL,t,return,rho,stubs,power)) .Call(Cpp_fdiffgrowthl,cld(x),n,diff,fill,fnlevels(g),g,NULL,t,return,rho,stubs,power) } # Difference Operator (masks stats::D) # use xt instead of by ? # setGeneric("D") D <- function(x, n = 1, diff = 1, ...) UseMethod("D") # , x D.expression <- function(x, ...) if(missing(x)) stats::D(...) else stats::D(x, ...) D.call <- function(x, ...) if(missing(x)) stats::D(...) else stats::D(x, ...) D.name <- function(x, ...) if(missing(x)) stats::D(...) else stats::D(x, ...) D.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fdiff.matrix(x, n, diff, g, t, fill, FALSE, rho, stubs, ...)) fdiff.default(x, n, diff, g, t, fill, FALSE, rho, stubs, ...) } D.pseries <- function(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", ...) fdiff.pseries(x, n, diff, fill, FALSE, rho, stubs, shift, ...) # setOldClass("pseries") # setMethod("D", signature(expr = "pseries"), D.pseries) D.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], ...) fdiff.matrix(x, n, diff, g, t, fill, FALSE, rho, stubs, ...) # setMethod("D", "matrix") D.zoo <- function(x, ...) if(is.matrix(x)) D.matrix(x, ...) else D.default(x, ...) D.units <- D.zoo D.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, ...) { x <- x # because of piped calls -> "." is not in global environment ... eval(substitute(fdiff.grouped_df(x, n, diff, t, fill, FALSE, rho, stubs, keep.ids, ...))) } D.data.frame <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, ...) DG_data_frame_template(x, n, diff, by, t, cols, fill, 1L, rho, stubs, keep.ids, ...) D.list <- function(x, ...) D.data.frame(x, ...) D.pdata.frame <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, ...) DG_pdata_frame_template(x, n, diff, cols, fill, 1L, rho, stubs, shift, keep.ids, ...) # Log-Difference Operator Dlog <- function(x, n = 1, diff = 1, ...) UseMethod("Dlog") # , x Dlog.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fdiff.matrix(x, n, diff, g, t, fill, TRUE, rho, stubs, ...)) fdiff.default(x, n, diff, g, t, fill, TRUE, rho, stubs, ...) } Dlog.pseries <- function(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", ...) fdiff.pseries(x, n, diff, fill, TRUE, rho, stubs, shift, ...) Dlog.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], ...) fdiff.matrix(x, n, diff, g, t, fill, TRUE, rho, stubs, ...) Dlog.zoo <- function(x, ...) if(is.matrix(x)) Dlog.matrix(x, ...) else Dlog.default(x, ...) Dlog.units <- Dlog.zoo Dlog.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, ...) { x <- x eval(substitute(fdiff.grouped_df(x, n, diff, t, fill, TRUE, rho, stubs, keep.ids, ...))) } Dlog.data.frame <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, ...) DG_data_frame_template(x, n, diff, by, t, cols, fill, 2L, rho, stubs, keep.ids, ...) Dlog.list <- function(x, ...) Dlog.data.frame(x, ...) Dlog.pdata.frame <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, ...) DG_pdata_frame_template(x, n, diff, cols, fill, 2L, rho, stubs, shift, keep.ids, ...) # Growth Operator G <- function(x, n = 1, diff = 1, ...) UseMethod("G") # , x G.default <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fgrowth.matrix(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...)) fgrowth.default(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...) } G.pseries <- function(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], shift = "time", ...) fgrowth.pseries(x, n, diff, fill, logdiff, scale, power, stubs, shift, ...) G.matrix <- function(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], ...) fgrowth.matrix(x, n, diff, g, t, fill, logdiff, scale, power, stubs, ...) G.zoo <- function(x, ...) if(is.matrix(x)) G.matrix(x, ...) else G.default(x, ...) G.units <- G.zoo G.grouped_df <- function(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], keep.ids = TRUE, ...) { x <- x eval(substitute(fgrowth.grouped_df(x, n, diff, t, fill, logdiff, scale, power, stubs, keep.ids, ...))) } G.data.frame <- function(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], keep.ids = TRUE, ...) DG_data_frame_template(x, n, diff, by, t, cols, fill, 4L-logdiff, scale, stubs, keep.ids, power, ...) G.list <- function(x, ...) G.data.frame(x, ...) G.pdata.frame <- function(x, n = 1, diff = 1, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, ...) DG_pdata_frame_template(x, n, diff, cols, fill, 4L-logdiff, scale, stubs, shift, keep.ids, power, ...) collapse/R/fnobs.R0000644000176200001440000001352314777170130013522 0ustar liggesusers # For foundational changes to this code see fsum.R fnobs <- function(x, ...) UseMethod("fnobs") # , x fnobs.default <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fnobs.matrix(x, g, TRA, use.g.names, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fnobs,x,0L,0L)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fnobs,x,length(lev),g), lev)) } if(is.nmfactor(g)) return(.Call(C_fnobs,x,fnlevels(g),g)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fnobs,x,attr(g,"N.groups"),g)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fnobs,x,g[[1L]],g[[2L]]), GRPnames(g))) return(.Call(C_fnobs,x,g[[1L]],g[[2L]])) } if(is.null(g)) return(TRAC(x,.Call(C_fnobs,x,0L,0L),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_fnobs,x,g[[1L]],g[[2L]]),g[[2L]],TRA, ...) } fnobs.matrix <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fnobsm,x,0L,0L,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fnobsm,x,length(lev),g,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fnobsm,x,fnlevels(g),g,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fnobsm,x,attr(g,"N.groups"),g,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fnobsm,x,g[[1L]],g[[2L]],FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fnobsm,x,g[[1L]],g[[2L]],FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(C_fnobsm,x,0L,0L,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_fnobsm,x,g[[1L]],g[[2L]],FALSE),g[[2L]],TRA, ...) } fnobs.zoo <- function(x, ...) if(is.matrix(x)) fnobs.matrix(x, ...) else fnobs.default(x, ...) fnobs.units <- fnobs.zoo fnobs.data.frame <- function(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fnobsl,x,0L,0L,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fnobsl,x,length(lev),g,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fnobsl,x,fnlevels(g),g,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fnobsl,x,attr(g,"N.groups"),g,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE), groups)) return(.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE)) } if(is.null(g)) return(TRAlC(x,.Call(C_fnobsl,x,0L,0L,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE),g[[2L]],TRA, ...) } fnobs.list <- function(x, ...) fnobs.data.frame(x, ...) fnobs.grouped_df <- function(x, TRA = NULL, use.g.names = FALSE, keep.group_vars = TRUE, ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE)), ax)) } else return(setAttributes(.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fnobsl,x[-gn],g[[1L]],g[[2L]],FALSE),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fnobsl,x,g[[1L]],g[[2L]],FALSE),g[[2L]],TRA, ...)) } fNobs <- function(x, ...) { message("Note that 'fNobs' was renamed to 'fnobs'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fnobs") } fNobs.default <- function(x, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fnobs.matrix(x, ...)) # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fnobs.default(x, ...) } fNobs.matrix <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fnobs.matrix(x, ...) } fNobs.data.frame <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fnobs.data.frame(x, ...) } collapse/R/fsummarise.R0000644000176200001440000001643615056572047014600 0ustar liggesusers# Old, simple version: # fFUN_add_groups <- function(x) { # x$g <- quote(.g_) # Faster than [["g"]] # x$use.g.names <- FALSE # x # } fFUN_smr_add_groups <- function(z) { if(!is.call(z)) return(z) cz <- as.character(z[[1L]]) if(length(cz) > 1L) cz <- if(any(cz == "collapse")) cz[length(cz)] else "" # needed if collapse::fmean etc.. if(any(cz == .FAST_FUN_MOPS)) { z$g <- quote(.g_) if(any(cz == .FAST_STAT_FUN_POLD)) z$use.g.names <- FALSE } # This works for nested calls (nothing more required, but need to put at the end..) if(length(z) > 2L || is.call(z[[2L]])) return(as.call(lapply(z, fFUN_smr_add_groups))) z } # Works: fFUN_smr_add_groups(quote(mean(fmax(min(fmode(mpg))))/fmean(mpg) + e + f + 1 + fsd(hp) + sum(bla) / 20)) # Also: quote(sum(x) + fmean(x) + e - 1 / fmedian(z)) # Also: quote(sum(z)/2+4+e+g+h+(p/sum(u))+(q-y)) # Also: quote(b-c/i(u)) # Also: quote(i(u)-b/p(z-u/log(a))) # Also: q/p # Note: Need unclass here because of t_list() in do_across(), which only works if also the interior of the list is a list! smr_funi_simple <- function(i, data, .data_, funs, aplvec, ce, ...) { # return(list(i = i, data = data, .data_ = .data_, funs = funs, aplvec = aplvec, ce = ce)) .FUN_ <- funs[[i]] nami <- names(funs)[i] if(aplvec[i]) { value <- if(missing(...)) lapply(unattrib(.data_), .FUN_) else do.call(lapply, c(list(unattrib(.data_), .FUN_), eval(substitute(list(...)), data, ce)), envir = ce) names(value) <- names(.data_) } else if(any(nami == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, drop = FALSE))) fcal <- as.call(c(list(quote(.FUN_), quote(.data_)), as.list(substitute(list(...))[-1L]))) fcal$drop <- FALSE return(unclass(eval(fcal, c(list(.data_ = .data_, .FUN_ = .FUN_), data), ce))) } else { value <- if(missing(...)) .FUN_(.data_) else do.call(.FUN_, c(list(.data_), eval(substitute(list(...)), data, ce)), envir = ce) oldClass(value) <- NULL } return(value) # Check is already done at the end... # if(all_eq(vlengths(value, FALSE))) stop("All computations must result in data values of equal length") } smr_funi_grouped <- function(i, data, .data_, funs, aplvec, ce, ...) { g <- data[[".g_"]] .FUN_ <- funs[[i]] nami <- names(funs)[i] if(aplvec[i]) { value <- if(missing(...)) lapply(unattrib(.data_), copysplaplfun, g, .FUN_) else dots_apply_grouped(.data_, g, .FUN_, eval(substitute(list(...)), data, ce)) names(value) <- names(.data_) } else if(any(nami == .FAST_STAT_FUN_POLD)) { if(missing(...)) return(unclass(.FUN_(.data_, g = g, use.g.names = FALSE))) fcal <- as.call(c(list(quote(.FUN_), quote(.data_), g = quote(.g_)), as.list(substitute(list(...))[-1L]))) fcal$use.g.names <- FALSE return(unclass(eval(fcal, c(list(.data_ = .data_, .FUN_ = .FUN_), data), ce))) } else { value <- dots_apply_grouped_bulk(.data_, g, .FUN_, if(missing(...)) NULL else eval(substitute(list(...)), data, ce)) value <- .Call(C_rbindlist, unclass(value), FALSE, FALSE, NULL) oldClass(value) <- NULL } return(value) # Again checks are done below } fsummarise <- function(.data, ..., keep.group_vars = TRUE, .cols = NULL) { if(!is.list(.data)) stop(".data needs to be a list of equal length columns or a data.frame") e <- substitute(list(...)) nam <- names(e) nullnam <- is.null(nam) pe <- parent.frame() cld <- oldClass(.data) # This needs to be called cld, because across fetches it from here !! if(any(cld == "grouped_df")) { oldClass(.data) <- NULL g <- GRP.grouped_df(.data, call = FALSE) attr(.data, "groups") <- NULL ax <- attributes(.data) ax[["class"]] <- fsetdiff(cld, c("GRP_df", "grouped_df")) .data[c(".g_", ".gsplit_")] <- list(g, gsplit) res <- vector("list", length(e)) for(i in 2:length(e)) { # This is good and very fast ei <- e[[i]] if(nullnam || nam[i] == "") { # Across if(ei[[1L]] == quote(across) || ei[[1L]] == quote(acr)) { ei[[1L]] <- quote(do_across) ei$.eval_funi <- quote(smr_funi_grouped) # return(eval(ei, list(do_across = do_across, smr_funi_grouped = smr_funi_grouped), pe)) res[[i]] <- eval(ei, list(do_across = do_across, smr_funi_grouped = smr_funi_grouped), pe) } else res[[i]] <- do_grouped_expr_list(ei, .data, g, pe, .cols, ax) } else { # Tagged vector expressions eif <- all_funs(ei) res[[i]] <- list(if(any(eif %in% .FAST_STAT_FUN_POLD)) # startsWith(eif, .FAST_STAT_FUN_POLD) Note: startsWith does not reliably capture expressions e.g. e <- quote(list(b = fmean(log(mpg)) + max(qsec))) does not work !! eval(fFUN_smr_add_groups(ei), .data, pe) else do_grouped_expr(ei, length(eif), .data, g, pe)) } } names(res) <- nam res[[1L]] <- if(keep.group_vars) g$groups else NULL res <- unlist(res, recursive = FALSE, use.names = TRUE) # replicating groups if more rows per computation... if(!all_eq(lr <- vlengths(res, FALSE))) { # if(!keep.group_vars) stop("all computations need to result in vectors of equal length") # gi <- seq_along(g$group.vars) # ef <- lr[length(gi)+1L] / g[[1L]] rnglr <- .range(lr) ef <- rnglr / g[[1L]] if(ef[1L] < 1) stop("An expression did not return a value for some groups. Please ensure that a value is returned for each group") ef <- ef[2L] # if(!all_eq(lr[-gi]) || ef %% 1 > 0) stop("all computations need to result in vectors of equal length") gi <- whichv(lr, rnglr[2L], invert = TRUE) if(ef != as.integer(ef) || !all_eq(lr[gi])) stop("all computations need to result in vectors of length 1 or the maximum length of any expression") res[gi] <- .Call(C_subsetDT, res, rep(seq_len(g[[1L]]), each = ef), gi, FALSE) # Using C_subsetvector is not really faster... (1-2 microseconds gain) } } else { # Without groups... ax <- attributes(.data) oldClass(.data) <- NULL # Not strictrly needed but just to make sure execution is efficient in across etc.. if(nullnam || bsum(!nzchar(nam)) > 1L) { # Likely Across statement... for(i in 2:length(e)) { ei <- e[[i]] if(nullnam || nam[i] == "") { if(ei[[1L]] == quote(across) || ei[[1L]] == quote(acr)) { # stop("expressions need to be named or start with across(), or its shorthand acr().") ei[[1L]] <- quote(.do_across) ei$.eval_funi <- quote(.smr_funi_simple) } e[[i]] <- ei } else e[[i]] <- as.call(list(quote(list), ei)) } # return(eval(e, c(.data, list(.do_across = do_across, .smr_funi_simple = smr_funi_simple)), pe)) res <- unlist(eval(e, c(.data, list(.do_across = do_across, .smr_funi_simple = smr_funi_simple)), pe), recursive = FALSE, use.names = TRUE) } else res <- eval(e, .data, pe) # return(res) if(!all_eq(lr <- vlengths(res, FALSE))) { maxlr <- bmax(lr) gi <- whichv(lr, maxlr, invert = TRUE) if(!allv(lr[gi], 1L)) stop("all computations need to result in vectors of length 1 or the maximum length of any expression") res[gi] <- .Call(C_subsetDT, res, rep.int(1L, maxlr), gi, FALSE) } } ax[c("names", "row.names")] <- list(names(res), .set_row_names(.Call(C_fnrow, res))) return(condalcSA(res, ax, any(cld == "data.table"))) } fsummarize <- fsummarise smr <- fsummarise collapse/R/rsplit.R0000644000176200001440000001152414777170130013727 0ustar liggesusers # fsplit <- function(x, f, drop, ...) if(drop && is.factor(f)) # split(x, .Call(Cpp_fdroplevels, f, !inherits(f, "na.included")), drop = FALSE, ...) else # split(x, qF(f), drop = FALSE, ...) t_list2 <- function(x) .Call(Cpp_mctl, do.call(rbind, x), TRUE, 0L) # This is for export t_list <- function(l) { lmat <- do.call(rbind, l) dn <- dimnames(lmat) res <- .Call(Cpp_mctl, lmat, !is.null(dn[[2L]]), 0L) if(length(rn <- dn[[1L]])) res <- lapply(res, `names<-`, rn) .Call(C_copyMostAttrib, res, l) } rsplit <- function(x, ...) UseMethod("rsplit") rsplit.default <- function(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, ...) { # , check = TRUE # if(is.matrix(x) && !inherits(x, "matrix")) return(rsplit.matrix(x, fl, drop, flatten, use.names, ...)) if(is.atomic(fl) || flatten || is_GRP(fl)) return(gsplit(x, fl, use.names, drop = drop, ...)) attributes(fl) <- NULL # if(check) fl <- lapply(fl, qF) # necessary ? -> split.default is actually faster on non-factor variables ! rspl <- function(y, fly) { if(length(fly) == 1L) return(gsplit(y, fly[[1L]], use.names, drop = drop, ...)) mapply(rspl, y = gsplit(y, fly[[1L]], use.names, drop = drop, ...), fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) # Possibility to avoid transpose ? C_subsetDT ?? } rspl(x, fl) } # Matrix method: requested in https://github.com/ycroissant/plm/issues/33 split_mat <- function(x, fl, dd, ...) { ssfun <- if(dd) function(i) x[i, , drop = TRUE] else function(i) x[i, , drop = FALSE] lapply(gsplit(NULL, fl, ...), ssfun) } rsplit.matrix <- function(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, drop.dim = FALSE, ...) { if(is.atomic(fl) || flatten || is_GRP(fl)) return(split_mat(x, fl, drop.dim, use.names, drop = drop, ...)) attributes(fl) <- NULL rspl <- function(y, fly) { if(length(fly) == 1L) return(split_mat(y, fly[[1L]], drop.dim, use.names, drop = drop, ...)) mapply(rspl, y = split_mat(y, fly[[1L]], drop.dim, use.names, drop = drop, ...), fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) } rspl(x, fl) } rsplit.zoo <- function(x, ...) if(is.matrix(x)) rsplit.matrix(x, ...) else rsplit.default(x, ...) rsplit.units <- rsplit.zoo # From stackoverflow package: # rsplit <- function (x, by, drop = FALSE) # { # if (is.atomic(by)) # return(split(x, by, drop = drop)) # attributes(by) <- NULL # if (length(by) == 1L) # return(split(x, by[[1L]], drop = drop)) # mapply(rsplit, x = split(x, by[[1L]], drop = drop), by = t(lapply(by[-1L], split, by[[1L]], drop = drop)), drop = drop, # SIMPLIFY = FALSE) # } rsplit.data.frame <- function(x, by, drop = TRUE, flatten = FALSE, # check = TRUE, cols = NULL, keep.by = FALSE, simplify = TRUE, use.names = TRUE, ...) { if(is.call(by)) { nam <- attr(x, "names") if(length(by) == 3L) { byn <- ckmatch(all.vars(by[[3L]]), nam) cols <- ckmatch(all.vars(by[[2L]]), nam) } else { # keep.by always added: Same behavior as L or W !! byn <- ckmatch(all.vars(by), nam) if(!(is.null(cols) && keep.by)) cols <- if(is.null(cols)) -byn else cols2int(cols, x, nam, FALSE) } by <- .subset(x, byn) if(length(cols)) x <- fcolsubset(x, if(keep.by) c(byn, cols) else cols, TRUE) } else if(length(cols)) x <- fcolsubset(x, cols2int(cols, x, attr(x, "names"), FALSE), TRUE) if(simplify && length(unclass(x)) == 1L) return(rsplit.default(.subset2(x, 1L), by, drop, flatten, use.names, ...)) # , check # Note there is a data.table method: split.data.table, which can also do recursive splitting.. j <- seq_along(unclass(x)) rn <- attr(x, "row.names") if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") { gsplit_DF <- function(x, f, ...) lapply(gsplit(NULL, f, use.names, drop = drop, ...), function(i) .Call(C_subsetDT, x, i, j, FALSE)) # .Call, .NAME = C_subsetDT, j, FALSE) -> doesn't work! } else { gsplit_DF <- function(x, f, ...) { rown <- attr(x, "row.names") # Need to do this, handing down from the function body doesn't work lapply(gsplit(NULL, f, use.names, drop = drop, ...), function(i) `attr<-`(.Call(C_subsetDT, x, i, j, FALSE), "row.names", rown[i])) } } if(is.atomic(by) || flatten || is_GRP(by)) return(gsplit_DF(x, by, ...)) attributes(by) <- NULL # if(check) by <- lapply(by, qF) # necessary ? rspl_DF <- function(y, fly) { if(length(fly) == 1L) return(gsplit_DF(y, fly[[1L]], ...)) mapply(rspl_DF, y = gsplit_DF(y, fly[[1L]], ...), fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) # Possibility to avoid transpose ? } # use C_subsetDT here as well ??? what is faster ??? rspl_DF(x, by) } collapse/R/recode_replace.R0000644000176200001440000004652314777170130015355 0ustar liggesusers# Note: don't change the order of these arguments !!! scv <- function(x, v, r, set = FALSE, inv = FALSE, vind1 = FALSE) .Call(C_setcopyv, x, v, r, inv, set, vind1) # inspired by ?dplyr::recode # Think about adopting this code for as_numeric_factor and as_character_factor recode_num <- function(X, ..., default = NULL, missing = NULL, set = FALSE) { if(missing(...)) stop("recode_num requires arguments of the form: value = replacement") args <- list(...) nam <- as.numeric(names(args)) # nzchar(names(args)) ... check non-empty names ? -> nah, this package is not for dummies if(anyNA(nam)) stop(paste("Non-numeric arguments:", paste(names(args)[is.na(nam)], collapse = ", "))) arglen <- length(args) missingl <- !is.null(missing) if(missingl && any(nam == missing)) warning(paste0("To improve performance missing values are replaced prior to recode, so this replaces all missing values with ", missing, " and those are then again replaced with ", args[[which(nam == missing)]], ". If this is not desired, call replace_na after recode with missing = NULL.")) if(arglen == 1L) { args <- args[[1L]] if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.numeric(y)) { z <- scv(y, NA, missing, set) # y[is.na(y)] <- missing scv(z, nam, args, TRUE) # `[<-`(y, y == nam, value = args) } else y } else { repfun <- function(y) if(is.numeric(y)) scv(y, nam, args, set) else y # `[<-`(y, y == nam, value = args) } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow(X) if(missingl) { repfun <- function(y) if(is.numeric(y)) { nas <- is.na(y) z <- scv(y, nas, missing, set, vind1 = TRUE) ind <- whichv(z, nam) scv(z, nas, default, TRUE, TRUE, vind1 = TRUE) # duplAttributes(alloc(default, nr), y) scv(z, ind, args, TRUE, vind1 = TRUE) # y == nam } else y } else { repfun <- function(y) if(is.numeric(y)) scv(scv(y, nam, default, set, TRUE), nam, args, TRUE) else y # `[<-`(duplAttributes(alloc(default, nr), y), y == nam, value = args) } } } else { seqarg <- seq_len(arglen) if(is.null(default)) { repfun <- function(y) if(is.numeric(y)) { if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing else if(!set) y <- scv(y, 1L, y[1L], vind1 = TRUE) # copy z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE) y } else y # repfun <- function(y) if(is.numeric(y)) { # if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing # if(set) { # Note: not strictly the way this should work... # for(i in seqarg) scv(y, nam[i], args[[i]], TRUE) # return(y) # } # z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy # for(i in seqarg) scv(z, whichv(y, nam[i]), args[[i]], TRUE, vind1 = TRUE) # z # } else y } else { nr <- if(is.atomic(X)) NROW(X) else fnrow(X) if(missingl) { repfun <- function(y) if(is.numeric(y)) { nas <- is.na(y) y <- scv(y, nas, missing, set, vind1 = TRUE) z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy scv(y, nas, default, TRUE, TRUE, vind1 = TRUE) for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE) y } else y } else { repfun <- function(y) if(is.numeric(y)) { z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy y <- scv(y, nam[1L], default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(y, nam[1L], args[[1L]], TRUE) for(i in seqarg[-1L]) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE) y } else y } } } if(is.list(X)) { if(set) { lapply(unattrib(X), repfun) return(invisible(X)) } res <- duplAttributes(lapply(unattrib(X), repfun), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.numeric(X)) stop("X needs to be numeric or a list") res <- repfun(X) return(if(set) invisible(res) else res) } recode_char <- function(X, ..., default = NULL, missing = NULL, regex = FALSE, ignore.case = FALSE, fixed = FALSE, set = FALSE) { if(missing(...)) stop("recode_char requires arguments of the form: value = replacement") args <- list(...) nam <- names(args) arglen <- length(args) missingl <- !is.null(missing) if(missingl && any(nam == missing)) warning(paste0("To improve performance missing values are replaced prior to recode, so this replaces all missing values with ", missing, " and those are then again replaced with ", args[[which(nam == missing)]], ". If this is not desired, call replace_na after recode with missing = NULL.")) if(regex) { if(arglen == 1L) { args <- args[[1L]] if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.character(y)) { y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing scv(y, grepl(nam, y, ignore.case, FALSE, fixed), args, TRUE, vind1 = TRUE) } else y } else { repfun <- function(y) if(is.character(y)) scv(y, grepl(nam, y, ignore.case, FALSE, fixed), args, set, vind1 = TRUE) else y } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow(X) if(missingl) { repfun <- function(y) if(is.character(y)) { nas <- is.na(y) z <- scv(y, nas, missing, set, vind1 = TRUE) ind <- grepl(nam, z, ignore.case, FALSE, fixed) scv(z, nas, default, TRUE, TRUE, vind1 = TRUE) # duplAttributes(alloc(default, nr), y) scv(z, ind, args, TRUE, vind1 = TRUE) } else y } else { repfun <- function(y) if(is.character(y)) { ind <- grepl(nam, y, ignore.case, FALSE, fixed) scv(scv(y, ind, default, set, TRUE, vind1 = TRUE), ind, args, TRUE, vind1 = TRUE) } else y } } } else { seqarg <- seq_len(arglen) if(is.null(default)) { repfun <- function(y) if(is.character(y)) { if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing else if(!set) y <- scv(y, 1L, y[1L], vind1 = TRUE) # copy z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy for(i in seqarg) scv(y, grepl(nam[i], z, ignore.case, FALSE, fixed), args[[i]], TRUE, vind1 = TRUE) y } else y } else { nr <- if(is.atomic(X)) NROW(X) else fnrow(X) if(missingl) { repfun <- function(y) if(is.character(y)) { nas <- is.na(y) y <- scv(y, nas, missing, set, vind1 = TRUE) z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy scv(y, nas, default, TRUE, TRUE, vind1 = TRUE) for(i in seqarg) scv(y, grepl(nam[i], z, ignore.case, FALSE, fixed), args[[i]], TRUE, vind1 = TRUE) y } else y } else { repfun <- function(y) if(is.character(y)) { z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy y <- scv(y, seq_along(y), default, set, vind1 = TRUE) # Initialize all to default for(i in seqarg) scv(y, grepl(nam[i], z, ignore.case, FALSE, fixed), args[[i]], TRUE, vind1 = TRUE) y } else y } } } } else { if(arglen == 1L) { args <- args[[1L]] if(is.null(default)) { if(missingl) { repfun <- function(y) if(is.character(y)) { z <- scv(y, NA, missing, set) # y[is.na(y)] <- missing scv(z, nam, args, TRUE) # `[<-`(y, y == nam, value = args) } else y } else { repfun <- function(y) if(is.character(y)) scv(y, nam, args, set) else y # `[<-`(y, y == nam, value = args) } } else { nr <- if(is.atomic(X)) NROW(X) else fnrow(X) if(missingl) { repfun <- function(y) if(is.character(y)) { nas <- is.na(y) z <- scv(y, nas, missing, set, vind1 = TRUE) ind <- whichv(z, nam) scv(z, nas, default, TRUE, TRUE, vind1 = TRUE) # duplAttributes(alloc(default, nr), y) scv(z, ind, args, TRUE, vind1 = TRUE) # y == nam } else y } else { repfun <- function(y) if(is.character(y)) scv(scv(y, nam, default, set, TRUE), nam, args, TRUE) else y # `[<-`(duplAttributes(alloc(default, nr), y), y == nam, value = args) } } } else { seqarg <- seq_len(arglen) if(is.null(default)) { repfun <- function(y) if(is.character(y)) { if(missingl) y <- scv(y, NA, missing, set) # y[is.na(y)] <- missing else if(!set) y <- scv(y, 1L, y[1L], vind1 = TRUE) # copy z <- scv(y, 1L, y[1L], vind1 = TRUE) # copy for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE) y } else y } else { nr <- if(is.atomic(X)) NROW(X) else fnrow(X) if(missingl) { repfun <- function(y) if(is.character(y)) { nas <- is.na(y) y <- scv(y, nas, missing, set, vind1 = TRUE) z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy scv(y, nas, default, TRUE, TRUE, vind1 = TRUE) for(i in seqarg) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE) y } else y } else { repfun <- function(y) if(is.character(y)) { z <- scv(y, 1L, y[1L], vind1 = TRUE) # Copy y <- scv(y, nam[1L], default, set, TRUE) # duplAttributes(alloc(default, nr), y) scv(y, nam[1L], args[[1L]], TRUE) for(i in seqarg[-1L]) scv(y, whichv(z, nam[i]), args[[i]], TRUE, vind1 = TRUE) y } else y } } } } if(is.list(X)) { if(set) { lapply(unattrib(X), repfun) return(invisible(X)) } res <- duplAttributes(lapply(unattrib(X), repfun), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.character(X)) stop("X needs to be character or a list") res <- repfun(X) return(if(set) invisible(res) else res) } na_locf <- function(x, set = FALSE) .Call(C_na_locf, x, set) na_focb <- function(x, set = FALSE) .Call(C_na_focb, x, set) na_locf_ph <- function(x, ph1, ph2, set = FALSE) .Call(C_na_locf, x, set) na_focb_ph <- function(x, ph1, ph2, set = FALSE) .Call(C_na_focb, x, set) replace_na <- function(X, value = 0L, cols = NULL, set = FALSE, type = "const") { FUN <- switch(type, const =, value = scv, locf = na_locf_ph, focb = na_focb_ph, stop("Unknown type:", type)) if(set) { if(is.list(X)) { if(is.null(cols)) { lapply(unattrib(X), FUN, NA, value, TRUE) } else if(is.function(cols)) { lapply(unattrib(X), function(y) if(cols(y)) FUN(y, NA, value, TRUE) else y) } else { cols <- cols2int(cols, X, attr(X, "names"), FALSE) lapply(unattrib(X)[cols], FUN, NA, value, TRUE) } } else FUN(X, NA, value, TRUE) # `[<-`(X, is.na(X), value = value) return(invisible(X)) } if(is.list(X)) { if(is.null(cols)) return(condalc(duplAttributes(lapply(unattrib(X), FUN, NA, value), X), inherits(X, "data.table"))) # function(y) `[<-`(y, is.na(y), value = value) if(is.function(cols)) return(condalc(duplAttributes(lapply(unattrib(X), function(y) if(cols(y)) FUN(y, NA, value) else y), X), inherits(X, "data.table"))) clx <- oldClass(X) oldClass(X) <- NULL cols <- cols2int(cols, X, names(X), FALSE) X[cols] <- lapply(unattrib(X[cols]), FUN, NA, value) # function(y) `[<-`(y, is.na(y), value = value) return(condalc(`oldClass<-`(X, clx), any(clx == "data.table"))) } FUN(X, NA, value) # `[<-`(X, is.na(X), value = value) } replace_NA <- replace_na # Remove Inf (Infinity) and NaN (Not a number) from vectors or data frames: replace_inf <- function(X, value = NA, replace.nan = FALSE, set = FALSE) { if(set) { if(is.list(X)) { lapply(unattrib(X), if(replace.nan) (function(y) if(is.numeric(y)) scv(y, is.infinite(y) | is.nan(y), value, TRUE, vind1 = TRUE) else y) else (function(y) if(is.numeric(y)) scv(y, is.infinite(y), value, TRUE, vind1 = TRUE) else y)) } if(!is.numeric(X)) stop("Infinite values can only be replaced in numeric objects!") if(replace.nan) scv(X, is.infinite(X) | is.nan(X), value, TRUE, vind1 = TRUE) else scv(X, is.infinite(X), value, TRUE, vind1 = TRUE) return(invisible(X)) } if(is.list(X)) { # if(!inherits(X, "data.frame")) stop("replace_non_finite only works with atomic objects or data.frames") res <- duplAttributes(lapply(unattrib(X), if(replace.nan) (function(y) if(is.numeric(y)) scv(y, is.infinite(y) | is.nan(y), value, vind1 = TRUE) else y) else (function(y) if(is.numeric(y)) scv(y, is.infinite(y), value, vind1 = TRUE) else y)), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.numeric(X)) stop("Infinite values can only be replaced in numeric objects!") if(replace.nan) return(scv(X, is.infinite(X) | is.nan(X), value, vind1 = TRUE)) # !is.finite(X) also replaces NA scv(X, is.infinite(X), value, vind1 = TRUE) } replace_Inf <- replace_inf # replace_non_finite <- function(X, value = NA, replace.nan = TRUE) { # .Deprecated("replace_Inf") # replace_Inf(X, value, replace.nan) # } Crepoutl <- function(x, limits, value, single_limit, set = FALSE) .Call(C_replace_outliers, x, limits, value, single_limit, set) sd_limits <- function(x, limits) { st <- fbstatsCpp(x, stable.algo = FALSE, setn = FALSE) st[2L] + st[3L] * c(-limits, limits) } mad_limits <- function(x, limits) { med <- fmedian.default(x) mad <- fmedian.default(abs(x - med)) med + mad * c(-limits, limits) } # scaling data using MAD mad_trans <- function(x) { if(inherits(x, c("pseries", "pdata.frame"))) { g <- GRP(x) tmp <- fmedian(x, g, TRA = "-") tmp %/=% fmedian(if(is.list(tmp)) lapply(tmp, abs) else abs(tmp), g, TRA = "fill", set = TRUE) return(tmp) } tmp <- fmedian(x, TRA = "-") tmp %/=% fmedian(if(is.list(tmp)) dapply(tmp, abs) else abs(tmp), TRA = "fill", set = TRUE) return(tmp) } replace_outliers <- function(X, limits, value = NA, single.limit = c("sd", "mad", "min", "max"), ignore.groups = FALSE, set = FALSE) { if(length(limits) == 1L) { # "overall_" arguments are legacy, now accommodated via the ignore.groups argument sl <- switch(single.limit[1L], SDs = 4L, min = 2L, max = 3L, overall_SDs = 5L, sd = 4L, mad = 6L, MADs = 6L, overall_MADs = 7L, # Just in case stop("Unknown single.limit option: ", single.limit[1L])) if(sl == 5L || sl == 7L) ignore.groups <- TRUE } else sl <- 0L if(sl > 3L) { # Outliers according to standard deviation or MAD threshold if(is.list(X)) { if(!ignore.groups && inherits(X, c("grouped_df", "pdata.frame"))) { if(is.character(value)) stop("clipping is not yet supported with grouped/panel data and SDs/MADs thresholds.") num <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) num <- if(inherits(X, "grouped_df")) num & !fgroup_vars(X, "logical") else num & attr(findex(X), "names") %!in% attr(X, "names") clx <- oldClass(X) STDXnum <- if(sl > 5L) mad_trans(fcolsubset(X, num)) else fscale(fcolsubset(X, num)) oldClass(X) <- NULL res <- .mapply(function(z, y) scv(z, abs(y) > limits, value, set, vind1 = TRUE), list(unattrib(X[num]), unattrib(STDXnum)), NULL) if(set) return(invisible(X)) X[num] <- res res <- `oldClass<-`(X, clx) } else { limit_fun <- if(sl > 5L) mad_limits else sd_limits res <- lapply(unattrib(X), function(y) if(is.numeric(y)) Crepoutl(y, limit_fun(y, limits), value, sl, set) else y) if(set) return(invisible(X)) res <- duplAttributes(res, X) } return(if(inherits(res, "data.table")) alc(res) else res) } if(is.matrix(X)) { if(is.character(value)) stop("clipping is not yet supported with matrices and SDs/MADs thresholds.") res <- scv(X, abs(if(sl > 5L) mad_trans(X) else fscale(X)) > limits, value, set, vind1 = TRUE) } else { res <- Crepoutl(X, if(sl > 5L) mad_limits(X, limits) else sd_limits(X, limits), value, sl, set) } return(if(set) invisible(res) else res) } # Standard cases if(set) { if(is.list(X)) lapply(unattrib(X), function(y) if(is.numeric(y)) Crepoutl(y, limits, value, sl, set) else y) else Crepoutl(X, limits, value, sl, set) return(invisible(X)) } if(is.list(X)) { res <- duplAttributes(lapply(unattrib(X), function(y) if(is.numeric(y)) Crepoutl(y, limits, value, sl, set) else y), X) return(if(inherits(res, "data.table")) alc(res) else res) } Crepoutl(X, limits, value, sl, set) } # pad or fpad? x is vector, matrix or data.frame pad_atomic <- function(x, i, n, value) { ax <- attributes(x) tx <- typeof(x) if(typeof(value) != tx) value <- as.vector(value, tx) if(is.matrix(x)) { k <- dim(x)[2L] m <- .Call(C_alloc, value, n * k, TRUE) # matrix(value, n, k) dim(m) <- c(n, k) m[i, ] <- x if(length(ax) == 1L) return(m) ax[["dim"]] <- c(n, k) # Could also pad row-names? perhaps with names of i ?? if(length(ax[["dimnames"]][[1L]])) ax[["dimnames"]] <- list(NULL, ax[["dimnames"]][[2L]]) if(is.object(x)) ax[["class"]] <- NULL return(`attributes<-`(m, ax)) # fastest ?? } r <- .Call(C_alloc, value, n, TRUE) # matrix(value, n) # matrix is faster than rep_len !!!! r[i] <- x if(is.null(ax)) return(r) if(length(names(x))) { if(length(ax) == 1L) return(r) ax[["names"]] <- NULL } return(`attributes<-`(r, ax)) } # microbenchmark::microbenchmark(x[-i] <- ri, x[i2] <- ri) # Unit: milliseconds # expr min lq mean median uq max neval cld # x[-i] <- ri 255.16654 420.7083 491.7369 446.0340 476.3324 1290.7396 100 b # x[i2] <- ri 80.18755 136.8012 157.0027 146.8156 166.7158 311.5526 100 a # microbenchmark::microbenchmark(seq_along(x)[-i]) # Unit: milliseconds # expr min lq mean median uq max neval # seq_along(x)[-i] 506.0745 541.7975 605.0245 567.8115 585.8384 1341.035 100 pad <- function(X, i, value = NA, method = c("auto", "xpos", "vpos")) { # 1 - i is same length as X, fill missing, 2 - i is positive: insert missing values in positions ilog <- is.logical(i) ineg <- i[1L] < 0L n <- if(is.list(X) || is.matrix(X)) fnrow(X) else length(X) xpos <- switch(method[1L], auto = if(ilog) bsum(i) == n else if(ineg) FALSE else length(i) == n, xpos = TRUE, vpos = FALSE, stop("Unknown method: ", method[1L])) n <- if(ilog) length(i) else if(xpos && !ineg) bmax(i) else n + length(i) if(is.atomic(X)) return(pad_atomic(X, if(xpos || ineg) i else if(ilog) !i else -i, n, value)) if(!is.list(X)) stop("X must be atomic or a list") if(ilog) { i <- if(xpos) which(i) else whichv(i, FALSE) } else if(!xpos) { i <- seq_len(n)[if(ineg) i else -i] } ax <- attributes(X) attributes(X) <- NULL res <- lapply(X, pad_atomic, i, n, value) if(length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(n) return(condalcSA(res, ax, any(ax[["class"]] == "data.table"))) } # Something like this already exists?? -> should work with lists as well... collapse/R/fmean.R0000644000176200001440000001535414777170130013505 0ustar liggesusers # Note: for principal innovations of this code see fsum.R fmean <- function(x, ...) UseMethod("fmean") # , x fmean.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, nthreads = .op[["nthreads"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fmean.matrix(x, g, w, TRA, na.rm, use.g.names, nthreads = nthreads, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmean,x,0L,0L,NULL,w,na.rm,nthreads)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fmean,x,length(lev),g,NULL,w,na.rm,nthreads), lev)) } if(is.nmfactor(g)) return(.Call(C_fmean,x,fnlevels(g),g,NULL,w,na.rm,nthreads)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmean,x,attr(g,"N.groups"),g,NULL,w,na.rm,nthreads)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fmean,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,nthreads), GRPnames(g))) return(.Call(C_fmean,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,nthreads)) } if(is.null(g)) return(TRAC(x,.Call(C_fmean,x,0L,0L,NULL,w,na.rm,nthreads),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_fmean,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,nthreads),g[[2L]],TRA, ...) } fmean.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmeanm,x,0L,0L,NULL,w,na.rm,drop,nthreads)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fmeanm,x,length(lev),g,NULL,w,na.rm,drop,nthreads), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fmeanm,x,fnlevels(g),g,NULL,w,na.rm,drop,nthreads)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmeanm,x,attr(g,"N.groups"),g,NULL,w,na.rm,drop,nthreads)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fmeanm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop,nthreads), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fmeanm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop,nthreads)) } if(is.null(g)) return(TRAmC(x,.Call(C_fmeanm,x,0L,0L,NULL,w,na.rm,TRUE,nthreads),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_fmeanm,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop,nthreads),g[[2L]],TRA, ...) } fmean.zoo <- function(x, ...) if(is.matrix(x)) fmean.matrix(x, ...) else fmean.default(x, ...) fmean.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fmean.matrix(x, ...), x) else fmean.default(x, ...) fmean.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmeanl,x,0L,0L,NULL,w,na.rm,drop,nthreads)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fmeanl,x,length(lev),g,NULL,w,na.rm,drop,nthreads), lev)) } if(is.nmfactor(g)) return(.Call(C_fmeanl,x,fnlevels(g),g,NULL,w,na.rm,drop,nthreads)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmeanl,x,attr(g,"N.groups"),g,NULL,w,na.rm,drop,nthreads)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop,nthreads), groups)) return(.Call(C_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop,nthreads)) } if(is.null(g)) return(TRAlC(x,.Call(C_fmeanl,x,0L,0L,NULL,w,na.rm,TRUE,nthreads),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,drop,nthreads),g[[2L]],TRA, ...) } fmean.list <- function(x, ...) fmean.data.frame(x, ...) fmean.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], nthreads = .op[["nthreads"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(C_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(C_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads)), ax)) } else return(setAttributes(.Call(C_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(C_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fmeanl,x[-gn],g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fmeanl,x,g[[1L]],g[[2L]],g[[3L]],w,na.rm,FALSE,nthreads),g[[2L]],TRA, ...)) } collapse/R/pivot.R0000644000176200001440000006060515056572047013563 0ustar liggesusers# TODO: 'to' in wider and recast pivot should be optional (if only one variable is pivoted -> should be able to set to = NULL). proc_names_longer <- function(x) { if(is.null(x)) return(list("variable", "value")) if(is.list(x)) { # is.character(x) : list is not necessary but clearer (also regarding multiple casts etc.) !!! if(is.null(names(x))) { if(length(x) != 2L) stop("If how = 'longer', 'names' needs to be a length-2 list or a named list. You specified a list length: ", length(x)) return(x) } if(length(x) > 2L) stop("If how = 'longer', 'names' needs to be a length-2 list or a named list length-1 or -2. You specified a list length: ", length(x)) res <- list(variable = "variable", value = "value") ind <- ckmatch(names(x), names(res), e = "Unknown keywords (must be 'variable' and/or 'value'):") res[ind] <- x return(res) } stop("If how = 'longer', 'names' needs to be a (named) list. You supplied a vector of type: ", typeof(x)) } proc_names_recast <- function(x, data) { if(is.null(x)) { ind <- whichv(names(data), "variable") if(!length(ind)) stop("Need to provide 'names'. The default name 'variable' was not found in the data.") return(list(ind, "variable")) } if(is.list(x)) { if(is.null(names(x))) { if(length(x) != 2L) stop("If how = 'recast', 'names' needs to be a length-2 list or a named list. You specified a list length: ", length(x)) } else { if(length(x) > 2L) stop("If how = 'recast', 'names' needs to be a length-2 list or a named list length-1 or -2. You specified a list length: ", length(x)) res <- list(from = "variable", to = "variable") ind <- ckmatch(names(x), names(res), e = "Unknown keywords (must be 'from' and/or 'to'):") res[ind] <- x x <- res } ind <- cols2int(x[[1L]], data, names(data)) # nam_col <- if(length(ind) == 1L) data[[ind]] else finteraction(data[ind], sort = sort, sep = "_") return(list(ind, x[[2L]])) } stop("If how = 'recast', 'names' needs to be a (named) list. You supplied a vector of type: ", typeof(x)) } proc_labels_recast <- function(x, data) { if(is.list(x)) { if(is.null(names(x))) { if(length(x) != 2L && length(x) != 3L) stop("If how = 'recast', 'labels' needs to be a length-2 list or a named list. You specified a list length: ", length(x)) } else { if(length(x) > 3L) stop("If how = 'recast', 'labels' needs to be a length-2 list or a named list length-1 or -2. You specified a list length: ", length(x)) res <- list(from = NULL, to = NULL, new = NULL) ind <- ckmatch(names(x), names(res), e = "Unknown keywords (must be 'from', 'to' or 'new'):") res[ind] <- x x <- res } ind <- if(length(x[[1L]])) cols2int(x[[1L]], data, names(data)) else NULL return(list(ind, x[[2L]], x[[3L]])) } stop("If how = 'recast', 'labels' needs to be a (named) list. You supplied a vector of type: ", typeof(x)) } # Crbindlist <- function(x) .Call(C_rbindlist, x, FALSE, FALSE, NULL) # Faster than do.call(c, unattrib(data[values])): # c_to_vec <- function(l) .Call(C_rbindlist, lapply(unattrib(l), list), FALSE, FALSE, NULL)[[1L]] # Same thing (also same speed), a bit less cumbersome... # c_to_vec2 <- function(l) .Call(C_pivot_long, l, NULL, FALSE) # Special case: no ids supplied melt_all <- function(vd, names, factor, na.rm, labels, check.dups) { if(check.dups && fnrow(vd) > 1L) warning("duplicates detected: you have supplied no ids and the data has ", fnrow(vd), " rows. Consider supplying ids so that that records in the long format data frame are identified.") if(length(labels)) labs <- vlabels(vd, use.names = FALSE) # 6 cases: label or not, factor or not (either id or label) if(length(labels) || factor[1L]) { # if labels: generate id to expand vectors: faster than rep... nam <- names(vd) attributes(vd) <- NULL } if(na.rm) vd <- lapply(vd, na_rm) # Note: beforehand is faster, I tested it... res <- .Call(C_pivot_long, vd, NULL, TRUE) # rbindlist gives factor value: .Call(C_rbindlist, lapply(unattrib(vd), list), FALSE, FALSE, "id") names(res) <- names if(length(labels)) { if(is.list(labels)) stop("Since no ids are specified, please just use setLabels() or relabel() following pivot to assign new variable labels") if(factor[2L]) { label_col <- res[[1L]] attr(label_col, "levels") <- labs oldClass(label_col) <- "factor" # c("factor", "na.included") } else label_col <- Csv(labs, res[[1L]]) label_col <- list(label_col) names(label_col) <- if(is.character(labels)) labels else "label" res <- c(res[1L], label_col, res[2L]) } if(factor[1L]) { attr(res[[1L]], "levels") <- nam oldClass(res[[1L]]) <- "factor" # c("factor", "na.included") } else if(length(labels)) res[[1L]] <- Csv(nam, res[[1L]]) res } # Retain labels in wider reshaping add_labels <- function(l, labs) { ll <- .Call(C_vlabels, l, "label", FALSE) if(!allNA(ll)) labs <- paste(ll, labs, sep = " - ") .Call(C_setvlabels, l, "label", labs, NULL) } apply_external_FUN <- function(data, g, FUN, args, name) { FUN <- match.fun(FUN) if(is.null(args)) { if(any(name == .FAST_STAT_FUN)) return(FUN(data, g = g, TRA = "fill")) return(TRA(data, BY(data, g, FUN, use.g.names = FALSE, reorder = FALSE), "fill", g)) } if(any(name == .FAST_STAT_FUN)) return(do.call(FUN, c(list(x = data, g = g, TRA = "fill"), args))) TRA(data, do.call(BY, c(list(x = data, g = g, FUN = FUN, use.g.names = FALSE, reorder = FALSE), args)), "fill", g) } # TODO: Think about: values could be list input, names only atomic. that would make more sense... # Or: allow for both options... needs to be consistent with "labels" though... # Transposition Example: # pivot(BWA, names = list(from = c("Variable", "Year"), to = "Sectorcode"), how = "r") # data = BWA # ids = NULL # names = list(from = c("Variable", "Year"), to = "Sectorcode") # labels = NULL # values = NULL # how = "r" # na.rm = FALSE # factor = c("names", "labels") # check.dups = FALSE # fill = NULL # drop = TRUE # sort = FALSE # nthreads = 1L # transpose = FALSE # Check labels and attributes.. pivot <- function(data, ids = NULL, values = NULL, names = NULL, # list is better labels = NULL, how = "longer", # Better to only have one?, because the other arguments use multiple?? na.rm = FALSE, factor = c("names", "labels"), check.dups = FALSE, FUN = "last", FUN.args = NULL, nthreads = .op[["nthreads"]], fill = NULL, # Fill is for pivot_wider drop = TRUE, # Same as with dcast() sort = FALSE, # c("ids", "names") transpose = FALSE) # c(columns = FALSE, names = FALSE)) { if(!is.list(data)) stop("pivot only supports data.frame-like objects") ad <- attributes(data) oldClass(data) <- NULL nam <- names(data) if(length(ids)) ids <- cols2int(ids, data, nam) if(length(values)) values <- cols2int(values, data, nam) factor <- c("names", "labels") %in% factor how <- switch(how, l = , longer = 1L, w = , wider = 2L, r = , recast = 3L, stop("Unknown pivoting method: ", how)) if(how == 1L) { # TODO: multiple output columns names <- proc_names_longer(names) if(is.null(ids) && is.null(values)) res <- melt_all(if(is.null(values)) data else data[values], names, factor, na.rm, labels, check.dups) else { if(is.null(values)) values <- seq_along(data)[-ids] else if(is.null(ids)) ids <- seq_along(data)[-values] vd <- data[values] if(length(labels) || factor[1L]) attributes(vd) <- NULL if(check.dups && force(ng <- fnunique(data[ids])) < fnrow(data)) warning("duplicated id values detected: there are ", ng, " unique id-combinations, but the data has ", fnrow(data), " rows. This means you have on average ", round(fnrow(data)/ng, 1), " duplicates per id-combination. ", "Consider adding additional ids or aggregating your data (e.g. using collap()) before applying pivot().") if(length(vd)) { if(na.rm) { cc <- lapply(vd, whichNA, invert = TRUE) # TODO: could do this all internally using a single vector # cc_vec <- c_to_vec(cc) # id_cols <- .Call(C_subsetDT, data, cc_vec, ids, FALSE) id_cols <- lapply(data[ids], function(x) .Call(C_pivot_long, alloc(x, length(cc), FALSE), cc, FALSE)) value_cols <- .Call(C_pivot_long, vd, cc, TRUE) # value_col <- .Call(C_pivot_long, vd, cc, FALSE) # Csv(c_to_vec(data[values]), cc_vec) # variable_col <- rep(if(factor[1L]) seq_along(values) else nam[values], vlengths(cc)) } else { id_cols <- .Call(C_rbindlist, alloc(data[ids], length(values), FALSE), FALSE, FALSE, NULL) # .Call(C_subsetDT, data, rep.int(seq_len(n), length(values)), ids, FALSE) # This is faster than .Call(C_pivot_long, vd, NULL) because rep() is slow... value_cols <- .Call(C_pivot_long, vd, NULL, TRUE) # .Call(C_rbindlist, lapply(vd, list), FALSE, FALSE, "id") # value_col <- .Call(C_pivot_long, vd, NULL) # c_to_vec(data[values]) # variable_col <- rep(if(factor[1L]) seq_along(values) else nam[values], each = fnrow(data)) } if(length(values) > 1L) vlabels(value_cols) <- NULL # Could solve at C-level with additional argument... names(value_cols) <- names # TODO: multiple pivots this does not work... if(length(labels)) { labs <- vlabels(vd, use.names = FALSE) if(factor[2L]) { label_col <- value_cols[[1L]] attr(label_col, "levels") <- labs oldClass(label_col) <- "factor" # c("factor", "na.included") } else label_col <- Csv(labs, value_cols[[1L]]) label_col <- list(label_col) if(is.list(labels)) { # Setting new labels... if(is.null(names(labels))) { new_labels <- labels[[2L]] label <- labels[[1L]] } else { new_labels <- labels[["new"]] label <- labels[["name"]] if(is.null(label)) label <- "label" } if(!is.character(label)) stop("label column name supplied in a list needs to be character typed, you passed an object of type: ", typeof(labels)) if(!is.character(new_labels)) stop("new labels need to be specified as a character vector, you passed an object of type: ", typeof(new_labels)) names(label_col) <- label value_cols <- c(value_cols[1L], label_col, value_cols[2L]) if(is.null(names(new_labels))) { if(length(new_labels) != length(value_cols)) stop("Number of new labels supplied must match number of new columns in long format frame. There are ", length(value_cols), " new columns in the molten frame, and you supplied ", length(new_labels), " new labels") vlabels(value_cols) <- new_labels } else vlabels(value_cols)[names(new_labels)] <- new_labels } else { names(label_col) <- if(is.character(labels)) labels else "label" value_cols <- c(value_cols[1L], label_col, value_cols[2L]) } } if(factor[1L]) { attr(value_cols[[1L]], "levels") <- nam[values] oldClass(value_cols[[1L]]) <- "factor" # c("factor", "na.included") } else if(length(labels)) value_cols[[1L]] <- duplAttributes(Csv(nam[values], value_cols[[1L]]), value_cols[[1L]]) res <- c(id_cols, value_cols) } else res <- data[ids] } } else { sort <- if(is.logical(sort)) rep(sort, length.out = 2L) else c("ids", "names") %in% sort transpose <- if(is.logical(transpose)) rep(transpose, length.out = 2L) else c("columns", "names") %in% transpose if (how == 2L) { # Wide Pivot # Note: No Complete Pivoting (no ids and values) supported! This does not make a lot of sense! # In general: names specifies where variable names are coming from. If multiple then interact them using "_" # Same for labels. drop specifies that factor levels should be dropped if a single factor column is passed to names # (1) Preprocessing Arguments if(is.null(names)) { names <- whichv(nam, "variable") if(!length(names)) stop("Need to provide 'names' if how = 'wider'. The default name 'variable' was not found in the data.") } else names <- cols2int(names, data, nam) if(length(labels)) labels <- cols2int(labels, data, nam) if(is.null(values)) { if(is.null(ids)) { values <- whichv(nam, "value") if(!length(values)) stop("Need to provide values if how = 'wider' and is.null(ids). The default name 'value' was not found in the data.") } else values <- seq_along(data)[-c(ids, names, labels)] } if(is.null(ids)) ids <- seq_along(data)[-c(names, labels, values)] # (2) Missing Value Removal if(na.rm) { # TODO: better way? data <- data[c(ids, names, values, labels)] ids <- seq_along(ids) names <- seq_along(names) + length(ids) values <- seq_along(values) + length(ids) + length(names) if(length(labels)) labels <- seq_along(labels) + length(ids) + length(names) + length(values) data <- na_omit(data, cols = values, prop = 1) } # (3) Compute ID Columns if(sort[1L]) { g <- GRP.default(data[ids], sort = TRUE, return.order = FALSE, call = FALSE) id_cols <- g[[4L]] ng <- g[[1L]] g <- g[[2L]] attr(g, "N.groups") <- ng } else { # Could also use GRP(), but this avoids computing a potentially large and redundant group sizes vector g <- groupv(data[ids], starts = TRUE) id_cols <- .Call(C_subsetDT, data, attr(g, "starts"), ids, FALSE) } # (4) Compute Names and Labels Columns names_g <- GRP(if(length(names) == 1L && is.null(labels)) data[[names]] else data[names], sort = sort[2L], group.sizes = check.dups, drop = drop, call = FALSE) names <- GRPnames(names_g, sep = "_") if(length(labels)) { if(check.dups && any(vary <- varying(data[labels], names_g))) # See if there are duplicate labels stop("The following 'labels' columns vary by 'names': ", paste(names(vary)[vary], collapse = ", ")) labels <- if(length(labels) == 1L) tochar(Csv(data[[labels]], names_g$group.starts)) else do.call(paste, c(.Call(C_subsetDT, data, names_g$group.starts, labels, FALSE), list(sep = " - "))) } g_v <- names_g[[2L]] attr(g_v, "N.groups") <- names_g[[1L]] # (5) Optional duplicates check if(check.dups) { # Old way of doing it: # if(force(ng <- fnunique(list(g, g_v))) < fnrow(data)) # warning("duplicates detected: there are ", ng, " unique combinations of id- and name-columns, but the data has ", fnrow(data), # " rows. This means you have on average ", round(fnrow(data)/ng, 1), " duplicates per id-name-combination. If how = 'wider', pivot() will take the last of those duplicates in first-appearance-order. Consider aggregating your data e.g. using collap() before applying pivot().") # With 10 million obs, 1 million id groups (g), and 100 names groups, this is 2x faster than the fnunique() option + could multithread ndg <- fndistinct.default(g, names_g, use.g.names = FALSE, na.rm = FALSE, nthreads = nthreads) attributes(ndg) <- NULL if(!identical(ndg, names_g[[3L]])) { ng <- fsumC(ndg, narm = FALSE) warning("duplicates detected: there are ", ng, " unique combinations of id- and name-columns, but the data has ", fnrow(data), " rows. This means you have on average ", round(fnrow(data)/ng, 1), " duplicates per id-name-combination. If how = 'wider', pivot() will take the last of those duplicates in first-appearance-order. Consider aggregating your data e.g. using collap() before applying pivot().") } } # (6) Compute Reshaped Values if(length(values) > 1L) { # Multiple columns, as in dcast... TODO: check pivot_wider namv <- names(data)[values] attributes(data) <- NULL if(!is.character(FUN)) { data[values] <- apply_external_FUN(data[values], group(g, g_v), FUN, FUN.args, l1orlst(as.character(substitute(FUN)))) FUN <- "last" } value_cols <- lapply(data[values], function(x) .Call(C_pivot_wide, g, g_v, x, fill, nthreads, FUN, na.rm)) if(length(labels)) value_cols <- lapply(value_cols, add_labels, labels) value_cols <- funlist(if(transpose[1L]) t_list2(value_cols) else value_cols) namv_res <- if(transpose[2L]) t(outer(names, namv, paste, sep = "_")) else outer(namv, names, paste, sep = "_") names(value_cols) <- if(transpose[1L]) namv_res else t(namv_res) } else { if(!is.character(FUN)) { data[[values]] <- apply_external_FUN(data[[values]], group(g, g_v), FUN, FUN.args, l1orlst(as.character(substitute(FUN)))) FUN <- "last" } value_cols <- .Call(C_pivot_wide, g, g_v, data[[values]], fill, nthreads, FUN, na.rm) names(value_cols) <- names if(length(labels)) vlabels(value_cols) <- labels } res <- c(id_cols, value_cols) } else { # Recast Pivot # The optimization applied here is to avoid materialization of the "long" id-columns # There are two ways to do it, first the long value cast and then wide cast, or many wide casts and row-biding. # The complication is that the long cast requires construction of an id-column, which probably can only be efficiently # done by creating yet another C-function. Thus I try the wide option first. # -> initial benchmarks show that this is also definitely faster than recast from long frame... # but presumably because grouping is much faster. If an id is constructed we don't need to group a long frame though... # TODO: multiple recast?? -> I think in such cases it would be justifyable to call pivot() 2 times, # the syntax with recast could become very complicated # (1) Preprocessing Arguments names <- proc_names_recast(names, data) # List of 2 elements... names1 <- names[[1L]] if(length(labels)) { labels <- proc_labels_recast(labels, data) labels1 <- labels[[1L]] } else labels1 <- NULL if(is.null(values)) values <- seq_along(data)[-c(ids, names1, labels1)] else if(is.null(ids)) ids <- seq_along(data)[-c(names1, labels1, values)] # (2) Compute ID Columns if(length(ids)) { if(sort[1L]) { g <- GRP.default(data[ids], sort = TRUE, return.order = FALSE, call = FALSE) id_cols <- g[[4L]] ng <- g[[1L]] g <- g[[2L]] attr(g, "N.groups") <- ng } else { # Could also use GRP(), but this avoids computing a potentially large and redundant group sizes vector g <- groupv(data[ids], starts = TRUE) id_cols <- .Call(C_subsetDT, data, attr(g, "starts"), ids, FALSE) } } else { g <- alloc(1L, fnrow(data)) # TODO: Better create a C-level exemption?? but this is inefficient anyway (row-binding single rows...) attr(g, "N.groups") <- 1L id_cols <- NULL } # (3) Compute Names and Labels Columns names_g <- GRP(if(length(names1) == 1L && is.null(labels1)) data[[names1]] else data[names1], sort = sort[2L], group.sizes = check.dups, drop = drop, call = FALSE) if(length(labels1)) { if(check.dups && any(vary <- varying(data[labels1], names_g))) # See if there are duplicate labels stop("The following 'labels' columns vary by 'names': ", paste(names(vary)[vary], collapse = ", ")) labels1 <- if(length(labels1) == 1L) tochar(Csv(data[[labels1]], names_g$group.starts)) else do.call(paste, c(.Call(C_subsetDT, data, names_g$group.starts, labels1, FALSE), list(sep = " - "))) } g_v <- names_g[[2L]] attr(g_v, "N.groups") <- names_g[[1L]] names1 <- GRPnames(names_g, sep = "_") # (4) Optional duplicates check... if(check.dups) { ndg <- fndistinct.default(g, names_g, use.g.names = FALSE, na.rm = FALSE, nthreads = nthreads) attributes(ndg) <- NULL if(!identical(ndg, names_g[[3L]])) { ng <- fsumC(ndg, narm = FALSE) warning("duplicates detected: there are ", ng, " unique combinations of id- and name-columns, but the data has ", fnrow(data), " rows. This means you have on average ", round(fnrow(data)/ng, 1), " duplicates per id-name-combination. If how = 'recast', pivot() will take the last of those duplicates in first-appearance-order. Consider aggregating your data e.g. using collap() before applying pivot().") } } # (5) Compute Reshaped Values save_labels <- !is.null(labels[[2L]]) vd <- data[values] if(save_labels || factor[1L]) { namv <- names(vd) attributes(vd) <- NULL } if(!is.character(FUN)) { vd <- apply_external_FUN(vd, group(g, g_v), FUN, FUN.args, l1orlst(as.character(substitute(FUN)))) FUN <- "last" } value_cols <- lapply(vd, function(x) .Call(C_pivot_wide, g, g_v, x, fill, nthreads, FUN, na.rm)) if(length(id_cols)) id_cols <- .Call(C_rbindlist, alloc(id_cols, length(value_cols), FALSE), FALSE, FALSE, NULL) value_cols <- .Call(C_rbindlist, value_cols, FALSE, FALSE, names[[2L]]) # Final column is "variable" name names(value_cols) <- c(names[[2L]], names1) if(length(labels1)) vlabels(value_cols)[-1L] <- labels1 else if(length(vd) > 1L) vlabels(value_cols) <- NULL # (6) Missing Value Removal if(na.rm) { # TODO: better way??? cc <- whichv(missing_cases(value_cols, prop = 1), FALSE) if(length(cc) != fnrow(value_cols)) { value_cols <- .Call(C_subsetDT, value_cols, cc, seq_along(value_cols), FALSE) id_cols <- .Call(C_subsetDT, id_cols, cc, seq_along(id_cols), FALSE) } } # (7) Properly deal with variable names and labels if(save_labels) { if(!is.character(labels[[2L]])) stop("label column name supplied in a list needs to be character typed, you passed an object of type: ", typeof(labels[[2L]])) labs <- vlabels(vd, use.names = FALSE) if(factor[2L]) { label_col <- value_cols[[1L]] attr(label_col, "levels") <- labs oldClass(label_col) <- "factor" # c("factor", "na.included") } else label_col <- Csv(labs, value_cols[[1L]]) label_col <- list(label_col) names(label_col) <- labels[[2L]] value_cols <- c(value_cols[1L], label_col, value_cols[-1L]) } if(factor[1L]) { attr(value_cols[[1L]], "levels") <- namv oldClass(value_cols[[1L]]) <- "factor" # c("factor", "na.included") } else if(save_labels) value_cols[[1L]] <- Csv(namv, value_cols[[1L]]) if(length(new_labels <- labels[[3L]])) { if(is.null(names(new_labels))) { if(length(new_labels) == length(value_cols)) vlabels(value_cols) <- new_labels else if(length(new_labels) == 1L+save_labels) vlabels(value_cols)[seq_len(1L+save_labels)] <- new_labels else stop("Number of new labels supplied must match either number of new ids (names/label-columns) or total number of new columns in recasted frame. There are ", length(value_cols), " new columns in the frame, of which ", 1L+save_labels, " are ids, and you supplied ", length(new_labels), " new labels. Alternatively, please provide a named vector matching labels to columns.") } else vlabels(value_cols)[names(new_labels)] <- new_labels } res <- if(length(id_cols)) c(id_cols, value_cols) else value_cols } } if(is.null(ad)) return(res) # Redundant ?? if(any(ad$class == "data.frame")) ad$row.names <- .set_row_names(fnrow(res)) ad$names <- names(res) .Call(C_setattributes, res, ad) if(any(ad$class == "data.table")) return(alc(res)) return(res) } collapse/R/select_replace_add_vars.R0000644000176200001440000003001714777170130017225 0ustar liggesusers # ind must be integer (not numeric) !!! get_vars_ind <- function(x, ind, return = "data") switch(return, data = .Call(C_subsetCols, x, ind, TRUE), names = attr(x, "names")[ind], indices = ind, named_indices = `names<-`(ind, attr(x, "names")[ind]), logical = `[<-`(logical(length(unclass(x))), ind, value = TRUE), named_logical = `names<-`(`[<-`(logical(length(unclass(x))), ind, value = TRUE), attr(x, "names")), stop("Unknown return option!")) # ind must be logical !!! (this used to be get_vars_FUN) get_vars_indl <- function(x, indl, return = "data") switch(return, data = .Call(C_subsetCols, x, which(indl), TRUE), names = attr(x, "names")[indl], indices = which(indl), named_indices = which(`names<-`(indl, attr(x, "names"))), logical = indl, named_logical = `names<-`(indl, attr(x, "names")), stop("Unknown return option!")) # ind can be integer or logical "get_vars_ind<-" <- function(x, ind, value) { ind <- if(is.logical(ind)) which(ind) else as.integer(ind) if(is.null(value)) { if(!length(ind)) return(condalc(x, inherits(x, "data.table"))) return(.Call(C_subsetCols, x, -ind, TRUE)) } clx <- oldClass(x) oldClass(x) <- NULL if(is.list(value)) { oldClass(value) <- NULL # fastest ?? if(is.object(value)) oldClass(value) <- NULL ?? if(.Call(C_fnrow, value) != .Call(C_fnrow, x)) stop("NROW(value) must match nrow(x)") if(length(value) != length(ind)) stop("NCOL(value) must match selected variables") # length(num_vars(x)) x[ind] <- value if(length(nam <- names(value))) names(x)[ind] <- nam # == length(ind) } else { if(NROW(unclass(value)) != .Call(C_fnrow, x)) stop("NROW(value) must match nrow(x)") if(length(ind) != 1L) stop("NCOL(value) must match selected variables") # length(num_vars(x)) x[[ind]] <- value } return(condalc(`oldClass<-`(x, clx), any(clx == "data.table"))) } fselect <- function(.x, ..., return = "data") { # This also takes names and indices .... # ax <- attributes(.x) # oldClass(.x) <- NULL # attributes ? nam <- attr(.x, "names") # if(inherits(.x, "data.table")) nam <- nam[seq_col(.x)] # required because of overallocation... -> Should be solved now, always take shallow copy... nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) vars <- eval(substitute(c(...)), nl, parent.frame()) # if(!is.integer(vars)) stop(paste0("Unknown columns: ", .c(...))) # if(!is.integer(vars) || bmax(vars) > length(nam)) # nah, a bit redundant.. if(!is.atomic(vars) || is.logical(vars)) stop("... needs to be expressions evaluating to integer or character") nam_vars <- names(vars) vars <- if(is.character(vars)) ckmatch(vars, nam) else as.integer(vars) # needed, otherwise selecting with doubles gives an error if(length(nam_vars)) { # Allow renaming during selection nonmiss <- nzchar(nam_vars) nam[vars[nonmiss]] <- nam_vars[nonmiss] } # if(!is.numeric(vars)) stop("... needs to be column names, or character / integer / logical vectors") switch(return, # need this for sf data.frame data = .Call(C_subsetCols, if(length(nam_vars)) `attr<-`(.x, "names", nam) else .x, vars, TRUE), # setAttributes(.x[vars], `[[<-`(ax, "names", nam[vars])), # Also Improvements in code below ? names = nam[vars], indices = vars, named_indices = `names<-`(vars, nam[vars]), logical = `[<-`(logical(length(nam)), vars, TRUE), named_logical = `names<-`(`[<-`(logical(length(nam)), vars, TRUE), nam), stop("Unknown return option")) } # or slt sel, selt, sct -> shortcut ? slt <- fselect # good, consistent # fselect(GGDC10S, Country, AGR:SUM) # fselect(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:SUM) -> why no error ?? first argument is just ignored ... ?? "fselect<-" <- function(x, ..., value) { nam <- attr(x, "names") # if(inherits(x, "data.table")) nam <- nam[seq_col(x)] # required because of overallocation... Should be solved now -> always make shallow copy nl <- `names<-`(as.vector(seq_along(nam), "list"), nam) vars <- eval(substitute(c(...)), nl, parent.frame()) if(!is.atomic(vars) || is.logical(vars)) stop("... needs to be expressions evaluating to integer or character") if(is.character(vars)) vars <- ckmatch(vars, nam) if(vars[1L] < 0L) vars <- seq_along(nam)[vars] # if(!is.numeric(vars)) stop("... needs to be column names, or character / integer / logical vectors") # if(!is.integer(vars)) stop(paste0("Unknown columns: ", .c(...))) `get_vars_ind<-`(x, vars, value) } "slt<-" <- `fselect<-` # STD(fselect(GGDC10S, Country, Variable, Year, AGR:SUM)) # Idea: also do this for replacement functions, replacing characters renames, replacong number reorders, replacing 3 does renaming and reordering? num_vars <- function(x, return = "data") get_vars_indl(x, .Call(C_vtypes, x, 1L), return) # vapply(`attributes<-`(x, NULL), is.numeric, TRUE) nv <- num_vars "num_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 1L), value) "nv<-" <- `num_vars<-` char_vars <- function(x, return = "data") get_vars_ind(x, .Call(C_vtypes, x, 0L) %==% 17L, return) # vapply(`attributes<-`(x, NULL), is.character, TRUE) "char_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 0L) %==% 17L, value) fact_vars <- function(x, return = "data") get_vars_indl(x, .Call(C_vtypes, x, 2L), return) # vapply(`attributes<-`(x, NULL), is.factor, TRUE) "fact_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 2L), value) logi_vars <- function(x, return = "data") get_vars_ind(x, .Call(C_vtypes, x, 0L) %==% 11L, return) # vapply(`attributes<-`(x, NULL), is.logical, TRUE) "logi_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 0L) %==% 11L, value) date_vars <- function(x, return = "data") get_vars_indl(x, vapply(`attributes<-`(x, NULL), is_date, TRUE), return) "date_vars<-" <- function(x, value) `get_vars_ind<-`(x, vapply(`attributes<-`(x, NULL), is_date, TRUE), value) # Date_vars <- function(x, return = "data") { # .Deprecated(msg = "'Date_vars' was renamed to 'date_vars'. It will be removed end of 2023, see help('collapse-renamed').") # date_vars(x, return) # } # "Date_vars<-" <- function(x, value) { # .Deprecated(msg = "'Date_vars' was renamed to 'date_vars'. It will be removed end of 2023, see help('collapse-renamed').") # `date_vars<-`(x, value) # } cat_vars <- function(x, return = "data") get_vars_ind(x, .Call(C_vtypes, x, 1L) %!=% TRUE, return) "cat_vars<-" <- function(x, value) `get_vars_ind<-`(x, .Call(C_vtypes, x, 1L) %!=% TRUE, value) get_vars <- function(x, vars, return = "data", regex = FALSE, rename = FALSE, ...) { if(regex) { if(!is.character(vars)) stop("If regex = TRUE, vars must be character") ind <- rgrep(vars, attr(x, "names"), ...) } else { if(!missing(...)) unused_arg_action(match.call(), ...) ind <- cols2int(vars, x, attr(x, "names")) if(rename && length(nam_vars <- names(vars)) == length(ind)) { # Allow renaming during selection nonmiss <- nzchar(nam_vars) attr(x, "names")[ind[nonmiss]] <- nam_vars[nonmiss] } } get_vars_ind(x, ind, return) } gv <- function(x, vars, return = "data", ...) { if(!missing(...)) return(get_vars(x, vars, return, ...)) ind <- cols2int(vars, x, attr(x, "names")) get_vars_ind(x, ind, return) } gvr <- function(x, vars, return = "data", ...) { if(!is.character(vars)) stop("If regex = TRUE, vars must be character") ind <- rgrep(vars, attr(x, "names"), ...) get_vars_ind(x, ind, return) } "get_vars<-" <- function(x, vars, regex = FALSE, ..., value) { if(regex) { if(!is.character(vars)) stop("If regex = TRUE, vars must be character") ind <- rgrep(vars, attr(x, "names"), ...) } else { if(!missing(...)) unused_arg_action(match.call(), ...) ind <- cols2int(vars, x, attr(x, "names")) } `get_vars_ind<-`(x, ind, value) } "gv<-" <- function(x, vars, ..., value) { if(!missing(...)) { warning("Please use the new shortcut 'gvr<-' for regex column replacement.") return(`get_vars<-`(x, vars, ..., value = value)) } ind <- cols2int(vars, x, attr(x, "names")) `get_vars_ind<-`(x, ind, value) } "gvr<-" <- function(x, vars, ..., value) { ind <- rgrep(vars, attr(x, "names"), ...) `get_vars_ind<-`(x, ind, value) } "add_vars<-" <- function(x, pos = "end", value) { ax <- attributes(x) attributes(x) <- NULL lx <- length(x) if(is.list(value)) { oldClass(value) <- NULL # fastest ? if(.Call(C_fnrow, value) != .Call(C_fnrow, x)) stop("NROW(value) must match nrow(x)") # res <- c(x, value) # FASTER than commented out below if(is.character(pos)) switch(pos, end = { ax[["names"]] <- if(length(nam <- names(value))) c(ax[["names"]], nam) else c(ax[["names"]], paste0("V", seq(lx+1L, lx+length(value)))) return(condalcSA(c(x, value), ax, any(ax[["class"]] == "data.table"))) }, front = { ax[["names"]] <- if(length(nam <- names(value))) c(nam, ax[["names"]]) else c(paste0("V", seq_along(value)), ax[["names"]]) return(condalcSA(c(value, x), ax, any(ax[["class"]] == "data.table"))) }, stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") ) lv <- length(value) tl <- lv+lx if(!is.numeric(pos) || length(pos) != lv || bmax(pos) > tl) stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") o <- forder.int(c(seq_len(tl)[-pos], pos)) ax[["names"]] <- if(length(nam <- names(value))) c(ax[["names"]], nam)[o] else c(ax[["names"]], paste0("V", pos))[o] # FASTER THIS WAY? -> It seems so... return(condalcSA(c(x, value)[o], ax, any(ax[["class"]] == "data.table"))) # fastest ?? use setcolorder ? (probably not ) # ind <- seq(lx+1L, lx+length(value)) # x[ind] <- value # FASTER than simply using x[names(value)] <- value ? -> Yes ! # ax[["names"]] <- if(length(nam <- names(value))) c(ax[["names"]], nam) else # c(ax[["names"]], paste0("V", ind)) } else { if(NROW(value) != .Call(C_fnrow, x)) stop("NROW(value) must match nrow(x)") # res <- c(x, list(value)) # FASTER than below ? -> Nope # ax[["names"]] <- c(ax[["names"]], paste0("V", lx+1L)) nam <- l1orlst(as.character(substitute(value))) if(is.character(pos)) switch(pos, end = { x[[lx+1L]] <- value ax[["names"]] <- c(ax[["names"]], nam) # paste0("V", lx+1L) return(condalcSA(x, ax, any(ax[["class"]] == "data.table"))) }, front = { ax[["names"]] <- c(nam, ax[["names"]]) return(condalcSA(c(list(value), x), ax, any(ax[["class"]] == "data.table"))) }, stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") ) if(!is.numeric(pos) || length(pos) > 1L || pos > lx+1L) stop("pos needs to be 'end', 'front' or a suitable numeric / integer vector of positions!") o <- forder.int(c(seq_len(lx), pos-1L)) ax[["names"]] <- c(ax[["names"]], nam)[o] return(condalcSA(c(x, list(value))[o], ax, any(ax[["class"]] == "data.table"))) } } "av<-" <- `add_vars<-` add_vars <- function(x, ..., pos = "end") { if(...length() == 1L) { if(is.list(..1) || is.null(names(l <- list(...)))) return(`add_vars<-`(x, pos, ...)) return(`add_vars<-`(x, pos, l)) } l <- list(...) # Old: c(...), did not allow atomic inputs... l <- if(all(.Call(C_vtypes, l, 3L))) c(...) else # Checks if all is list... unlist(lapply(l, function(z) if(is.list(z)) z else list(z)), recursive = FALSE) if(!allv(vlengths(l, FALSE), fnrow(x))) stop("if multiple arguments are passed to '...', for all arguments NROW(arg) must match nrow(x)") return(`add_vars<-`(x, pos, l)) } av <- add_vars # Exercises: # repl <- function(x)x # `repl<-` <- function(x, value) { # x <- value # x # } # repl(x)[2] <- 4 # Works!! # http://adv-r.had.co.nz/Functions.html#special-calls # This works because the expression names(x)[2] <- "two" is evaluated as if you had written: #`*tmp*` <- names(x) #`*tmp*`[2] <- "two" #names(x) <- `*tmp*` collapse/R/fnth_fmedian.R0000644000176200001440000001735114777170130015040 0ustar liggesusers# Note: Adapted from fmode.R fnth <- function(x, n = 0.5, ...) UseMethod("fnth") # , x fnth.default <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ties = "q7", nthreads = .op[["nthreads"]], o = NULL, check.o = is.null(attr(o, "sorted")), ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fnth.matrix(x, n, g, w, TRA, na.rm, use.g.names, ties = ties, nthreads = nthreads, ...)) if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fnth, x, n, g, w, na.rm, ties, nthreads, o, check.o) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names) names(res) <- GRPnames(g, FALSE) return(res) } TRAC(x,res,g[[2L]],TRA, ...) } fnth.matrix <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "q7", nthreads = .op[["nthreads"]], ...) { if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fnthm, x, n, g, w, na.rm, drop, ties, nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(res) if(use.g.names) dimnames(res)[[1L]] <- GRPnames(g) return(res) } TRAmC(x,res,g[[2L]],TRA, ...) } fnth.zoo <- function(x, ...) if(is.matrix(x)) fnth.matrix(x, ...) else fnth.default(x, ...) fnth.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fnth.matrix(x, ...), x) else fnth.default(x, ...) fnth.data.frame <- function(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "q7", nthreads = .op[["nthreads"]], ...) { if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here... res <- .Call(C_fnthl, x, n, g, w, na.rm, drop, ties, nthreads) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(if(drop) unlist(res) else res) if(use.g.names && !inherits(x, "data.table") && length(gn <- GRPnames(g))) attr(res, "row.names") <- gn return(res) } TRAlC(x,res,g[[2L]],TRA, ...) } fnth.list <- function(x, ...) fnth.data.frame(x, ...) fnth.grouped_df <- function(x, n = 0.5, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ties = "q7", nthreads = .op[["nthreads"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(C_fnthl,x[-gn],n,g,w,na.rm,FALSE,ties,nthreads)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(C_fnthl,x[-gn],n,g,w,na.rm,FALSE,ties,nthreads)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fnthl,x,n,g,w,na.rm,FALSE,ties,nthreads)), ax)) } else return(setAttributes(.Call(C_fnthl,x,n,g,w,na.rm,FALSE,ties,nthreads), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(C_fnthl,x[-gn],n,g,w,na.rm,FALSE,ties,nthreads),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fnthl,x[-gn],n,g,w,na.rm,FALSE,ties,nthreads),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fnthl,x,n,g,w,na.rm,FALSE,ties,nthreads),g[[2L]],TRA, ...)) } fmedian <- function(x, ...) UseMethod("fmedian") # , x fmedian.default <- function(x, ..., ties = "mean") fnth.default(x, 0.5, ..., ties = ties) fmedian.matrix <- function(x, ..., ties = "mean") fnth.matrix(x, 0.5, ..., ties = ties) fmedian.zoo <- function(x, ...) if(is.matrix(x)) fmedian.matrix(x, ...) else fmedian.default(x, ...) fmedian.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fmedian.matrix(x, ...), x) else fmedian.default(x, ...) fmedian.data.frame <- function(x, ..., ties = "mean") fnth.data.frame(x, 0.5, ..., ties = ties) fmedian.list <- fmedian.data.frame fmedian.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ties = "mean", nthreads = .op[["nthreads"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(C_fnthl,x[-gn],0.5,g,w,na.rm,FALSE,ties,nthreads)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(C_fnthl,x[-gn],0.5,g,w,na.rm,FALSE,ties,nthreads)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fnthl,x,0.5,g,w,na.rm,FALSE,ties,nthreads)), ax)) } else return(setAttributes(.Call(C_fnthl,x,0.5,g,w,na.rm,FALSE,ties,nthreads), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(C_fnthl,x[-gn],0.5,g,w,na.rm,FALSE,ties,nthreads),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fnthl,x[-gn],0.5,g,w,na.rm,FALSE,ties,nthreads),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fnthl,x,0.5,g,w,na.rm,FALSE,1L,nthreads),g[[2L]],TRA, ...)) } collapse/R/psmat.R0000644000176200001440000001617414777170130013544 0ustar liggesusers psmat <- function(x, ...) UseMethod("psmat") # , x psmat.default <- function(x, g, t = NULL, transpose = FALSE, fill = NULL, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.matrix(x)) stop("x is already a matrix") if(is.atomic(g) && length(g) == 1L) { if(transpose) matrix(x, ncol = round(g), dimnames = list(seq_len(length(x)/round(g)), paste0("GRP.",seq_len(g)))) else matrix(x, nrow = round(g), byrow = TRUE, dimnames = list(paste0("GRP.",seq_len(g)), seq_len(length(x)/round(g)))) } else { if(!is.nmfactor(g)) if(is.atomic(g)) g <- qF(g, na.exclude = FALSE) else if(is_GRP(g)) g <- as_factor_GRP(g) else g <- as_factor_GRP(GRP.default(g, return.order = FALSE, call = FALSE)) if(is.null(t)) { # message("No timevar provided: Assuming Balanced Panel") return(.Call(Cpp_psmat,x, g, NULL, transpose, fill)) } else { if(!is.nmfactor(t)) if(is.atomic(t)) t <- qF(t, sort = TRUE, na.exclude = FALSE) else if(is_GRP(t)) t <- as_factor_GRP(t) else t <- as_factor_GRP(GRP.default(t, sort = TRUE, return.order = FALSE, call = FALSE)) return(.Call(Cpp_psmat,x, g, t, transpose, fill)) } } } psmat.data.frame <- function(x, by, t = NULL, cols = NULL, transpose = FALSE, fill = NULL, array = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) oldClass(x) <- NULL # Setting globally ! if(is.atomic(by) && length(by) == 1L) { nr <- .Call(C_fnrow, x) n <- round(by) if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] if(transpose) { dn <- list(seq_len(nr/n), paste0("GRP.",seq_len(by))) res <- lapply(x, matrix, ncol = n, dimnames = dn) } else { dn <- list(paste0("GRP.",seq_len(by)), seq_len(nr/n)) res <- lapply(x, matrix, nrow = n, byrow = TRUE, dimnames = dn) } } else { if(is.call(by)) { nam <- names(x) if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) by <- ckmatch(all.vars(by[[3L]]), nam) } else { by <- ckmatch(all.vars(by), nam) v <- if(is.null(cols)) seq_along(x)[-by] else fsetdiff(cols2int(cols, x, nam), by) } by <- if(length(by) == 1L) x[[by]] else GRP.default(x, by, return.order = FALSE, call = FALSE) if(is.call(t)) { # If time-variable supplied ! tv <- ckmatch(all.vars(t), nam, "Unknown time variable:") v <- fsetdiff(v, tv) t <- eval(if(length(tv) == 1L) t[[2L]] else attr(terms.formula(t), "variables"), x, attr(t, ".Environment")) # if(length(t) == 1L) x[[t]] else GRP.default(x, t, sort = TRUE, call = FALSE) } x <- x[v] } else if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] if(!is.nmfactor(by)) if(is.atomic(by)) by <- qF(by, na.exclude = FALSE) else if(is_GRP(by)) by <- as_factor_GRP(by) else by <- as_factor_GRP(GRP.default(by, return.order = FALSE, call = FALSE)) if(is.null(t)) { # message("No timevar provided: Assuming Balanced Panel") res <- lapply(x, psmatCpp, by, NULL, transpose, fill) } else { if(!is.nmfactor(t)) if(is.atomic(t)) t <- qF(t, sort = TRUE, na.exclude = FALSE) else if(is_GRP(t)) t <- as_factor_GRP(t) else t <- as_factor_GRP(GRP.default(t, sort = TRUE, return.order = FALSE, call = FALSE)) res <- lapply(x, psmatCpp, by, t, transpose, fill) } } if(array) { if(length(res) == 1L) return(res[[1L]]) else return(addAttributes(fsimplify2array(res), list(transpose = transpose, class = c("psmat","array")))) } else return(res) } psmat.pseries <- function(x, transpose = FALSE, fill = NULL, drop.index.levels = "none", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- droplevels_index(uncl2pix(x, interact = TRUE), drop.index.levels) if(is.matrix(x)) stop("x is already a matrix") .Call(Cpp_psmat, x, index[[1L]], index[[2L]], transpose, fill) } psmat.pdata.frame <- function(x, cols = NULL, transpose = FALSE, fill = NULL, array = TRUE, drop.index.levels = "none", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- droplevels_index(uncl2pix(x, interact = TRUE), drop.index.levels) oldClass(x) <- NULL res <- lapply(if(is.null(cols)) x else x[cols2int(cols, x, names(x), FALSE)], psmatCpp, index[[1L]], index[[2L]], transpose, fill) if(array) { if(length(res) == 1L) return(res[[1L]]) else return(addAttributes(fsimplify2array(res), list(transpose = transpose, class = c("psmat","array")))) } else return(res) } plot.psmat <- function(x, legend = FALSE, colours = legend, labs = NULL, grid = FALSE, ...) { d <- dim(x) arl <- length(d) == 3L if(isFALSE(attr(x, "transpose"))) { x <- if(arl) aperm(x, c(2L, 1L, 3L)) else t.default(x) d <- dim(x) } dn <- dimnames(x) colours <- if(isTRUE(colours)) rainbow(d[2L]) else if(isFALSE(colours)) TRUE else colours t <- as.numeric(dn[[1L]]) if(!is.na(t[1L])) { mint <- bmin(t) maxt <- bmax(t) } else { mint <- 1L maxt <- length(t) } ns <- d[2L] dots <- list(...) if(arl) { vars <- if(is.null(labs)) dn[[3L]] else labs nv <- d[3L] if(nv == 2L) mfr <- c(1L, 2L + legend) else if(nv + legend <= 4L) mfr <- c(2L, 2L) else { sqnv <- sqrt(nv) fsqnv <- floor(sqnv) mfr <- if(sqnv == fsqnv) c(fsqnv+legend,fsqnv) else c(fsqnv + 1L, fsqnv) } oldpar <- par(mfrow = mfr, mar = c(2.5, 2.5, 2.1, 1.5), mgp = c(2.5, 1, 0)) on.exit(par(oldpar)) for(i in seq_along(vars)) { ts.plot(ts(x[, , i], mint, maxt), main = vars[i], col = colours, xlab = NULL, ...) if(grid) grid() } if(legend) { plot(1:10, type = "n", axes = FALSE, xlab = NA, ylab = NA) legend(x = 0, y = if(nv == 2L) 10.5 else 10.75, # 'topleft', dn[[2L]], col = colours, lty = if(any(names(dots) == "lty")) dots[["lty"]] else 1L, cex= if(ns > 80L) 1-sqrt(ns)/sqrt(1150) else 1, bty = "n", xpd = TRUE, # y.intersp = 0.5, x.intersp = 0.5, ncol = if(ns <= 10L) 1L else if(nv == 2L) floor(ns^.32) else floor(ns^.39)) # .37 } } else { ts.plot(ts(x, mint, maxt), col = colours, ...) if(grid) grid() if(legend) legend('topleft', dn[[2L]], col = colours, lty = if(any(names(dots) == "lty")) dots[["lty"]] else 1L, cex= if(ns > 80L) 1-sqrt(ns)/sqrt(1150) else 1, bty = "n", xpd = TRUE, # y.intersp = 0.5, x.intersp = 0.5, ncol = if(d[2L] <= 10L) 1L else floor(d[2L]^.39)) #.37 } } # print.psmat <- print.qsu # nah, too expensive print.psmat <- function(x, digits = .op[["digits"]] + 1L, ...) { print.default(`attr<-`(unclass(x), "transpose", NULL), digits = digits, ...) } `[.psmat` <- function(x, i, j, ..., drop = TRUE) { ret <- NextMethod() if(length(dim(ret)) > 1L) { attr(ret, "transpose") <- attr(x, "transpose") oldClass(ret) <- oldClass(x) } ret } aperm.psmat <- function(a, perm = NULL, resize = TRUE, keep.class = TRUE, ...) { r <- aperm.default(a, perm, resize = resize) if(keep.class) { attr(r, "transpose") <- attr(a, "transpose") oldClass(r) <- oldClass(a) } r } collapse/R/my_RcppExports.R0000644000176200001440000001753714777170130015422 0ustar liggesusers BWCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(Cpp_BW, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(Cpp_BWm, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } BWlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, theta = 1, set_mean = 0, B = FALSE, fill = FALSE) { .Call(Cpp_BWl, x, ng, g, gs, w, narm, theta, set_mean, B, fill) } TRAC <- function(x, xAG, g = 0L, ret = 1L, set = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(set) return(invisible(.Call(C_TRA, x, xAG, g, ret, set))) .Call(C_TRA, x, xAG, g, ret, set) } TRAmC <- function(x, xAG, g = 0L, ret = 1L, set = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(set) return(invisible(.Call(C_TRAm, x, xAG, g, ret, set))) .Call(C_TRAm, x, xAG, g, ret, set) } TRAlC <- function(x, xAG, g = 0L, ret = 1L, set = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(set) return(invisible(.Call(C_TRAl, x, xAG, g, ret, set))) .Call(C_TRAl, x, xAG, g, ret, set) } fndistinctC <- function(x, g = NULL, narm = TRUE, nthreads = 1L) { .Call(C_fndistinct, x, g, narm, nthreads) } pwnobsmCpp <- function(x) { .Call(Cpp_pwnobsm, x) } fnobsC <- function(x, ng = 0L, g = 0L) { .Call(C_fnobs, x, ng, g) } varyingCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE) { .Call(Cpp_varying, x, ng, g, any_group) } varyingmCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(Cpp_varyingm, x, ng, g, any_group, drop) } varyinglCpp <- function(x, ng = 0L, g = 0L, any_group = TRUE, drop = TRUE) { .Call(Cpp_varyingl, x, ng, g, any_group, drop) } fbstatsCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable.algo = TRUE, array = TRUE, setn = TRUE, gn = NULL) { .Call(Cpp_fbstats, x, ext, ng, g, npg, pg, w, stable.algo, array, setn, gn) } fbstatsmCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable.algo = TRUE, array = TRUE, gn = NULL) { .Call(Cpp_fbstatsm, x, ext, ng, g, npg, pg, w, stable.algo, array, gn) } fbstatslCpp <- function(x, ext = FALSE, ng = 0L, g = 0L, npg = 0L, pg = 0L, w = NULL, stable.algo = TRUE, array = TRUE, gn = NULL) { .Call(Cpp_fbstatsl, x, ext, ng, g, npg, pg, w, stable.algo, array, gn) } fdiffgrowthCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(Cpp_fdiffgrowth, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthmCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(Cpp_fdiffgrowthm, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } fdiffgrowthlCpp <- function(x, n = 1L, diff = 1L, fill = NA_real_, ng = 0L, g = 0L, gs = NULL, t = NULL, ret = 1L, rho = 1, names = TRUE, power = 1) { .Call(Cpp_fdiffgrowthl, x, n, diff, fill, ng, g, gs, t, ret, rho, names, power) } flagleadCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(Cpp_flaglead, x, n, fill, ng, g, t, names) } flagleadmCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(Cpp_flagleadm, x, n, fill, ng, g, t, names) } flagleadlCpp <- function(x, n = 1L, fill = NULL, ng = 0L, g = 0L, t = NULL, names = TRUE) { .Call(Cpp_flagleadl, x, n, fill, ng, g, t, names) } # fnthC <- function(x, n = 0.5, g = NULL, w = NULL, narm = TRUE, ret = 1L, nthreads = 1L, o = NULL, check.o = FALSE) { # .Call(C_fnth, x, n, g, w, narm, ret, nthreads, o, check.o) # } # # fnthmC <- function(x, n = 0.5, g = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 1L, nthreads = 1L) { # .Call(C_fnthm, x, n, g, w, narm, drop, ret, nthreads) # } # # fnthlC <- function(x, n = 0.5, g = NULL, w = NULL, narm = TRUE, drop = TRUE, ret = 1L, nthreads = 1L) { # .Call(C_fnthl, x, n, g, w, narm, drop, ret, nthreads) # } fquantile <- function(x, probs = c(0, 0.25, 0.5, 0.75, 1), w = NULL, o = if(length(x) > 1e5L && length(probs) > log(length(x))) radixorder(x) else NULL, na.rm = .op[["na.rm"]], type = 7L, names = TRUE, check.o = is.null(attr(o, "sorted"))) .Call(C_fquantile, x, probs, w, o, na.rm, type, names, check.o) .quantile <- function(x, probs = c(0, 0.25, 0.5, 0.75, 1), w = NULL, o = NULL, na.rm = TRUE, type = 7L, names = FALSE, check.o = FALSE) .Call(C_fquantile, x, probs, w, o, na.rm, type, names, check.o) fscaleCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(Cpp_fscale, x, ng, g, w, narm, set_mean, set_sd) } fscalemCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(Cpp_fscalem, x, ng, g, w, narm, set_mean, set_sd) } fscalelCpp <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, set_mean = 0, set_sd = 1) { .Call(Cpp_fscalel, x, ng, g, w, narm, set_mean, set_sd) } fsumC <- function(x, ng = 0L, g = 0L, w = NULL, narm = TRUE, fill = FALSE, nthreads = 1L) { .Call(C_fsum, x, ng, g, w, narm, fill, nthreads) } fsummCcc <- function(x, w = NULL, drop = TRUE) { .Call(C_fsumm, x, 0L, 0L, w, FALSE, FALSE, drop, 1L) } fvarsdCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE) { .Call(Cpp_fvarsd, x, ng, g, gs, w, narm, stable_algo, sd) } fvarsdmCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(Cpp_fvarsdm, x, ng, g, gs, w, narm, stable_algo, sd, drop) } fvarsdlCpp <- function(x, ng = 0L, g = 0L, gs = NULL, w = NULL, narm = TRUE, stable_algo = TRUE, sd = TRUE, drop = TRUE) { .Call(Cpp_fvarsdl, x, ng, g, gs, w, narm, stable_algo, sd, drop) } mrtl <- function(X, names = FALSE, return = "list") { switch(return, list = .Call(Cpp_mrtl, X, names, 0L), data.frame = .Call(Cpp_mrtl, X, names, 1L), data.table = alc(.Call(Cpp_mrtl, X, names, 2L)), stop("Unknown return option!")) } mctl <- function(X, names = FALSE, return = "list") { switch(return, list = .Call(Cpp_mctl, X, names, 0L), data.frame = .Call(Cpp_mctl, X, names, 1L), data.table = alc(.Call(Cpp_mctl, X, names, 2L)), stop("Unknown return option!")) } psmatCpp <- function(x, g, t = NULL, transpose = FALSE, fill = NULL) { .Call(Cpp_psmat, x, g, t, transpose, fill) } qFCpp <- function(x, ordered = TRUE, na_exclude = TRUE, keep_attr = TRUE, ret = 1L) { .Call(Cpp_qF, x, ordered, na_exclude, keep_attr, ret) } sortuniqueCpp <- function(x) { .Call(Cpp_sortunique, x) } fdroplevelsCpp <- function(x, check_NA = TRUE) { .Call(Cpp_fdroplevels, x, check_NA) } setAttributes <- function(x, a) .Call(C_setAttributes, x, a) copyMostAttributes <- function(to, from) .Call(C_copyMostAttributes, to, from) setattributes <- function(x, a) .Call(C_setattributes, x, a) # invisible() duplAttributes <- function(x, y) .Call(C_duplAttributes, x, y) # No longer needed... # setattr <- function(x, a, v) { # invisible(.Call(C_setattr, x, a, v)) # } # duplattributes <- function(x, y) { # invisible(.Call(C_duplattributes, x, y)) # } # cond_duplAttributes <- function(x, y) { # .Call(C_cond_duplAttributes, x, y) # } # cond_duplattributes <- function(x, y) { # invisible(.Call(C_cond_duplattributes, x, y)) # } seqid <- function(x, o = NULL, del = 1L, start = 1L, na.skip = FALSE, skip.seq = FALSE, check.o = TRUE) { .Call(Cpp_seqid, x, o, del, start, na.skip, skip.seq, check.o) } groupid <- function(x, o = NULL, start = 1L, na.skip = FALSE, check.o = TRUE) { .Call(Cpp_groupid, x, o, start, na.skip, check.o) } collapse/R/fhdbetween_fhdwithin.R0000644000176200001440000013525114777170130016575 0ustar liggesusers # TODO: More tests for attribute handling + Optimize linear fitting... demean <- function(x, fl, weights, ..., means = FALSE) { if(length(fl) == 1L && is.null(attr(fl, "slope.flag"))) { clx <- oldClass(x) # Need to do this because could call fbetween.grouped_df of fbetween.pseries / pdata.frame if(means) return(`oldClass<-`(fbetween(unclass(x), fl[[1L]], weights, na.rm = FALSE), clx)) else return(`oldClass<-`(fwithin(unclass(x), fl[[1L]], weights, na.rm = FALSE), clx)) } msg <- "For higher-dimensional centering and projecting out interactions need to install.packages('%s'), then unload [detach('package:collapse', unload = TRUE)] and reload [library(collapse)]." res <- getenvFUN("fixest_demean", msg)(x, fl, attr(fl, "slope.vars"), attr(fl, "slope.flag"), weights = weights, ..., notes = FALSE, im_confident = TRUE) if(!means) return(duplAttributes(res, x)) # if(!is.matrix(x)) dim(res) <- NULL # also need for flmres... e.g. with weights... intercept is no longer always added, so res needs to be a matrix... # Need matrix dimensions... for subset in variable.wise... do.call(cbind, fl[!fc]) needs to be preserved... # return(if(means) x - drop(res) else drop(res)) if(is.atomic(res)) return(duplAttributes(x - res, x)) duplAttributes(.mapply(`-`, list(unattrib(x), unattrib(res)), NULL), x) } myModFrame <- function(f, data) { t <- terms.formula(f) v <- attr(t, "variables") res <- eval(v, data, parent.frame()) # faster than res <- eval(substitute(with(data, e), list(e = v))) attributes(res) <- list(names = as.character(v[-1L]), row.names = .set_row_names(fnrow(data)), class = "data.frame", terms = t) res } # Example: # mf <- myModFrame( ~ factor(cyl)*poly(carb, 2) + factor(cyl):factor(vs) + factor(cyl):factor(vs):wt + factor(cyl):mpg + factor(am) + factor(hp > 146):qsec + vs + carb:am, data = mtcars) # mf <- myModFrame( ~ factor(cyl)*poly(carb, 2) + factor(cyl):factor(vs) + factor(cyl):mpg + factor(am) + factor(hp > 146):qsec + vs + carb:am, data = mtcars) finteract <- function(x, facts, mf) { # x and facts are logical f <- which(x & facts) if(length(f) == 1L) mf[[f]] else if(length(f) == 2L) do.call(`:`, mf[f]) else as_factor_GRP(GRP.default(mf[f], call = FALSE)) } slinteract <- function(sl, facts, mf) { # sl and facts are logical sl <- which(sl & !facts) res <- if(length(sl) == 1L) mf[[sl]] else do.call(`*`, mf[sl]) if(is.matrix(res)) mctl(res) else list(res) } # This is probably the craziest piece of code in the whole package: # It takes a model.frame as input and computes from it the inputs for both fixest::demean() # and linear model fitting getfl <- function(mf) { facts <- .Call(C_vtypes, mf, 2L) # vapply(unattrib(mf), is.factor, TRUE) # Any factors if(any(facts)) { terms <- attributes(attr(mf, "terms")) clmf <- oldClass(mf) oldClass(mf) <- NULL # good ?? tl <- terms[["term.labels"]] factors <- terms[[2L]] fctterms <- fcolSums(factors[facts, , drop = FALSE]) > 0 fctinteract <- fctterms & fcolSums(factors) > 1 # best ?? # Any interactions involving factors if(any(fctinteract)) { modelterms <- tl[!fctterms & tl %in% names(which(rowSums(factors) <= 1))] single <- rowSums(factors[facts, , drop = FALSE] > 0L) == 1 # These are either single factors or factors only appearing inside an interaction... factors <- factors[, fctinteract, drop = FALSE] nointeract <- frowSums(factors[facts, , drop = FALSE]) == 0 # These are factors not appearing in interactions singlefct <- names(which(single & nointeract)) # better way ?? # tl[fctterms & !fctinteract] intterms <- mctl(factors > 0L, TRUE) # Need names here fctfct <- colSums(factors[!facts, , drop = FALSE]) == 0 # These are factor-factor interactions... need names fctdat <- NULL # best way to do this ?? or as before with pre-allocation ?? lsf <- length(singlefct) lff <- bsum(fctfct) if(lsf) fctdat <- mf[singlefct] # unattrib() -> wrap around at the end... Nah, better with names... if(lff) fctdat <- c(fctdat, lapply(intterms[fctfct], finteract, TRUE, mf)) # Any heterogeneous slopes if(lff != length(intterms)) { intslope <- intterms[!fctfct] slflag <- integer(lsf) factors <- factors[facts, !fctfct, drop = FALSE] dimnames(factors) <- NULL # Could have imp:exp and imp:exp:year, so we need to partial match imp:exp in all slope terms... imc <- im <- pmatch(names(which(fctfct)), names(intslope), nomatch = 0L) # need names to match here !! if(any(im)) { # first the fact:fact in order (only add slopes), then the other ones if(!all(im)) im <- im[im > 0L] # Check for duplicate factors in interactions (largely independent of the other stuff) dupchk <- factors[, -im, drop = FALSE] > 0L # same as intslopes... if(any(dupfct <- frowSums(dupchk) > 1)) { # Check for factors with multiple slopes... if(bsum(dupfct) > 1L) stop("Cannot currently support multiple factors with multiple slopes...") dupfct <- which(dupchk[dupfct, ]) # This accounts for im fctdat <- c(fctdat, lapply(c(intslope[-im][dupfct[1L]], intslope[-im][-dupfct]), finteract, facts, mf)) } else fctdat <- c(fctdat, lapply(intslope[-im], finteract, facts, mf)) # only get factors not already in fctfct... slopes <- lapply(c(intslope[im], intslope[-im]), slinteract, facts, mf) lsl <- lengths(slopes, FALSE) # No names here lim <- seq_along(im) imc[imc > 0L] <- lsl[lim] # This is ok, these are also included elsewhere slflag <- c(slflag, imc) if(length(lsl) != length(lim)) { # The other cases... if exist othmc <- lsl[-lim] if(any(alone <- single & !nointeract)) { alone <- fcolSums(factors[alone, -im, drop = FALSE]) > 0 # This finds the terms corresponding to a factor appearing in an interaction but nowhere else.. othmc[alone] <- -othmc[alone] } if(any(dupfct)) { # reordering if dupfct... putting it in front.. slopes[-lim] <- c(slopes[-lim][dupfct], slopes[-lim][-dupfct]) othmc <- c(bsum(othmc[dupfct]), othmc[-dupfct]) } slflag <- c(slflag, othmc) } # this shows single factors not interacted... set slflag to negative... # what about double interactions only with slope ??? i.e. only imp:exp:year -> also negative flag... } else { # No double factor interactions with slopes.. Only simple slopes interactions.. (what about dupfact of two different double interactions with slope, but no factfact?) dupchk <- factors > 0L # same as intslopes... if(any(dupfct <- frowSums(dupchk) > 1)) { # Check for factors with multiple slopes... if(bsum(dupfct) > 1L) stop("Cannot currently support multiple factors with multiple slopes...") dupfct <- which(dupchk[dupfct, ]) fctdat <- c(fctdat, lapply(c(intslope[dupfct[1L]], intslope[-dupfct]), finteract, facts, mf)) } else fctdat <- c(fctdat, lapply(intslope, finteract, facts, mf)) slopes <- lapply(intslope, slinteract, facts, mf) # getting slopes, independent of dupfct... lsl <- lengths(slopes, FALSE) if(any(alone <- single & !nointeract)) { # Any factor occurring only inside an interaction... This is independent of dupfact and thre associated reordering... alone <- fcolSums(factors[alone, , drop = FALSE]) > 0 lsl[alone] <- -lsl[alone] } if(any(dupfct)) { # reordering if dupfct... putting it in front.. slopes <- c(slopes[dupfct], slopes[-dupfct]) lsl <- c(bsum(lsl[dupfct]), lsl[-dupfct]) } slflag <- c(slflag, integer(lff), lsl) } attr(fctdat, "slope.vars") <- unlist(slopes, recursive = FALSE) # , FALSE, FALSE) attr(fctdat, "slope.flag") <- slflag # c(integer(length(fctdat)-length(intslope)), lengths(slopes)) # what about other slopes (not poly??) } # drop unused factor levels ?? } else { modelterms <- tl[!fctterms] fctdat <- mf[facts] } slflag <- attr(fctdat, "slope.flag") if(length(modelterms)) { # Intercept only needed if facts with only negative slope flag... form <- paste0(if(is.null(slflag) || any(slflag > 0L)) "~ -1 + " else "~ ", paste(modelterms, collapse = " + ")) moddat <- model.matrix.default(as.formula(form), data = `oldClass<-`(mf, clmf)) } else { moddat <- if(is.null(slflag) || any(slflag > 0L)) NULL else alloc(1, length(mf[[1L]])) } } else { fctdat <- NULL moddat <- model.matrix.default(attr(mf, "terms"), data = mf) # .External2(stats:::C_modelmatrix, attr(mf, "terms"), mf) } list(fl = fctdat, xmat = moddat) } # Keeps attributes ? -> Yes ! # fastest way ? or better use vectors ? -> this is faster than lapply(fl, `[`, cc) ! subsetfl <- function(fl, cc) { slopes <- attr(fl, "slope.vars") # fl could be a data.frame, slope vars not (getfl() unclasses) if(is.null(names(fl))) names(fl) <- seq_along(unclass(fl)) if(is.null(slopes)) return(.Call(C_subsetDT, fl, cc, seq_along(unclass(fl)), FALSE)) attr(fl, "slope.vars") <- NULL if(is.null(names(slopes))) names(slopes) <- seq_along(slopes) res <- .Call(C_subsetDT, fl, cc, seq_along(fl), FALSE) attr(res, "slope.vars") <- .Call(C_subsetDT, slopes, cc, seq_along(slopes), FALSE) # fdroplevels ?? res } # Old version: # subsetfl <- function(fl, cc) { # lapply(fl, function(f) { # use CsubsetDT or CsubsetVector ?? also check NA in regressors ?? # x <- attr(f, "x") # if(is.null(x)) return(.Call(C_subsetVector, f, cc, FALSE)) else # return(`attr<-`(.Call(C_subsetVector, f, cc, FALSE), "x", # if(is.matrix(x)) x[cc, , drop = FALSE] else # .Call(C_subsetVector, x, cc, FALSE))) # }) # } # Examples: # str(getfl(myModFrame( ~ cyl + carb, data = mtcars))) # str(getfl(myModFrame( ~ factor(cyl)*carb, data = mtcars))) # str(getfl(myModFrame( ~ factor(cyl) + factor(am), data = mtcars))) # str(getfl(myModFrame( ~ factor(cyl):factor(am), data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl)*carb, data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl) + factor(am), data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am), data = mtcars))) # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am):vs, data = mtcars))) # wow !! # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am)*vs, data = mtcars))) # wow !! # str(getfl(myModFrame( ~ mpg + factor(cyl):factor(am) + factor(cyl):factor(am):vs, data = mtcars))) # wow !! # str(getfl(myModFrame( ~ mpg + factor(cyl):mpg + factor(am):mpg + factor(cyl):factor(am), data = mtcars))) # str(getfl(model.frame( ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb, data = mtcars))) # (Weighted) linear model fitting for vectors and lists... # Neded to sort out some insufficiencies of base R default functions when dealing with dimensions `%**%` <- function(x, y) if(length(y) > 1L) x %*% y else x * y tcrossprod2 <- function(x, y) if(length(x) > 1L) tcrossprod(x, y) else `dim<-`(x * y, c(1L, length(y))) # y = x; X = xmat; w = w; meth = lm.method flmres <- function(y, X, w = NULL, meth = "qr", resi = TRUE, ...) { # n <- dim(X)[1L] # if(n != NROW(y)) stop("NROW(y) must match nrow(X)") dimnames(X) <- NULL # faster ?? if(length(w)) { # if(length(w) != n) stop("w must be numeric and length(w) == nrow(X)") wts <- sqrt(w) if(is.atomic(y)) { dimnames(y) <- NULL return(drop(switch(meth, qr = { fit <- X %**% qr.coef(qr(X * wts, ...), y * wts) # same as lm... if(resi) y - fit else fit }, chol = { fit <- X * wts fit <- X %*% chol2inv(chol(crossprod(fit), ...)) %*% crossprod(fit, y * wts) if(resi) y - fit else fit }, stop("Only methods 'qr' and 'chol' are supported")))) } attributes(y) <- NULL return(switch(meth, qr = { calc <- qr(X * wts, ...) if(resi) lapply(y, function(z) drop(z - X %**% qr.coef(calc, z * wts))) else lapply(y, function(z) drop(X %**% qr.coef(calc, z * wts))) }, chol = { calc <- X * wts calc <- X %*% tcrossprod2(chol2inv(chol(crossprod(calc), ...)), calc) if(resi) lapply(y, function(z) drop(z - calc %*% (z * wts))) else lapply(y, function(z) drop(calc %*% (z * wts))) }, stop("Only methods 'qr' and 'chol' are supported"))) } if(is.atomic(y)) { dimnames(y) <- NULL return(drop(switch(meth, qr = if(resi) qr.resid(qr(X, ...), y) else qr.fitted(qr(X, ...), y), chol = { fit <- X %*% chol2inv(chol(crossprod(X), ...)) %*% crossprod(X, y) if(resi) y - fit else fit }, stop("Only methods 'qr' and 'chol' are supported")))) } attributes(y) <- NULL return(switch(meth, qr = { calc <- qr(X, ...) if(resi) lapply(y, function(z) drop(qr.resid(calc, z))) else lapply(y, function(z) drop(qr.fitted(calc, z))) }, chol = { calc <- X %*% tcrossprod2(chol2inv(chol(crossprod(X), ...)), X) if(resi) lapply(y, function(z) drop(z - calc %*% z)) else lapply(y, function(z) drop(calc %*% z)) }, stop("Only methods 'qr' and 'chol' are supported"))) } fhdwithin <- function(x, ...) UseMethod("fhdwithin") # , x fhdwithin.default <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fhdwithin.matrix(x, fl, w, na.rm, fill, ...)) ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] # Note this here !! if(!fill) { if(length(names(x))) ax[["names"]] <- Csv(names(x), cc) # best ?? x <- Csv(x, cc) } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(Csv(fl, cc)) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl = Csv(fl, cc)) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } if(nallfc || !fcl) { if(na.rm && fill) { x[-cc] <- NA x[cc] <- flmres(if(nallfc) demean(Csv(x, cc), fl, w, ...) else Csv(x, cc), xmat, w, lm.method, ...) return(setAttributes(x, ax)) } else return(setAttributes(flmres(if(nallfc) demean(x, fl, w, ...) else x, xmat, w, lm.method, ...), ax)) } else if(na.rm && fill) { x[-cc] <- NA x[cc] <- demean(Csv(x, cc), fl, w, ...) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ...), ax)) } fhdwithin.pseries <- function(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, ...) { if(is.matrix(x)) stop("higher-dimensional centering of matrix pseries is currently not supported. You can use fhdwithin.matrix(x, ix(x), fill = TRUE)") ix <- findex(x) namix <- attr(ix, "names") if(is.character(effect) && length(effect) == 1L && effect == "all") { effect <- seq_along(namix) } else effect <- cols2int(effect, ix, namix) g <- .subset(ix, effect) if(na.rm && length(cc <- whichv(x, NA, TRUE)) != length(x)) { g <- .Call(C_subsetDT, g, cc, seq_along(g), FALSE) # lapply(g, `[`, cc) -> slower ! if(fill) { x[cc] <- demean(Csv(unattrib(x), cc), g, w[cc], ...) # keeps attributes ?? -> Yes !! return(x) } ax <- attributes(x) attributes(x) <- NULL xcc <- Csv(x, cc) nix <- length(unclass(ix)) if(nix != length(g)) { toss <- seq_len(nix)[-effect] reix <- copyMostAttributes(c(.Call(C_subsetDT, ix, cc, toss, FALSE), g)[namix], ix) } else reix <- copyMostAttributes(g, ix) attr(reix, "row.names") <- .set_row_names(length(cc)) ax[[if(any(ax$class == "indexed_series")) "index_df" else "index"]] <- reix ax$na.rm <- seq_along(x)[-cc] if(length(ax$names)) ax$names <- Csv(ax$names, cc) res <- setAttributes(demean(xcc, g, w[cc], ...), ax) } else res <- demean(x, g, w, ...) # keeps attributes ?? -> Yes !! if(is.double(x)) return(res) pseries_to_numeric(res) } # x = mNA; fl = m; lm.method = "qr" fhdwithin.matrix <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!fill) { if(length(dimnames(x)[[1L]])) ax[["dimnames"]][[1L]] <- Csv(dimnames(x)[[1L]], cc) # best ?? ax[["dim"]][1L] <- length(cc) x <- x[cc, , drop = FALSE] } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(Csv(fl, cc)) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl = Csv(fl, cc)) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } if(nallfc || !fcl) { if(na.rm && fill) { x[-cc, ] <- NA # What about weights cc ????? x[cc, ] <- flmres(if(nallfc) demean(x[cc, ], fl, w, ...) else x[cc, ], xmat, w, lm.method, ...) return(setAttributes(x, ax)) } else return(setAttributes(flmres(if(nallfc) demean(x, fl, w, ...) else x, xmat, w, lm.method, ...), ax)) } else if(na.rm && fill) { x[-cc, ] <- NA x[cc, ] <- demean(x[cc, ], fl, w, ...) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ...), ax)) } fhdwithin.zoo <- function(x, ...) if(is.matrix(x)) fhdwithin.matrix(x, ...) else fhdwithin.default(x, ...) fhdwithin.units <- fhdwithin.zoo # x = collapse:::colsubset(pwlddev, is.numeric) fhdwithin.pdata.frame <- function(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, ...) { ix <- findex(x) namix <- attr(ix, "names") if(is.character(effect) && length(effect) == 1L && effect == "all") { effect <- seq_along(namix) } else effect <- cols2int(effect, ix, namix) g <- .subset(ix, effect) if(na.rm && fill && variable.wise) { ax <- attributes(x) attributes(x) <- NULL varwisecomp <- function(x, fl, w, ...) lapply(x, function(y) { ycc <- whichv(y, NA, TRUE) y[ycc] <- demean(Csv(unattrib(y), ycc), subsetfl(fl, ycc), w[ycc], ...) return(y) }) return(setAttributes(varwisecomp(x, g, w, ...), ax)) } else if(na.rm && any(miss <- missDF(x))) { cc <- whichv(miss, FALSE) gcc <- .Call(C_subsetDT, g, cc, seq_along(g), FALSE) Y <- demean(.Call(C_subsetDT, x, cc, seq_along(unclass(x)), FALSE), gcc, w[cc], ...) if(fill) { ax <- attributes(x) ax[["na.rm"]] <- which(miss) return(setAttributes(.Call(C_lassign, Y, fnrow(x), cc, NA_real_), ax)) } attr(Y, "row.names") <- attr(x, "row.names")[cc] # row.names of pdata.frame are special. nix <- length(unclass(ix)) if(nix != length(g)) { toss <- seq_len(nix)[-effect] reix <- copyMostAttributes(c(.Call(C_subsetDT, ix, cc, toss, FALSE), gcc)[namix], ix) } else reix <- copyMostAttributes(gcc, ix) attr(reix, "row.names") <- .set_row_names(length(cc)) attr(Y, if(inherits(x, "indexed_frame")) "index_df" else "index") <- reix attr(Y, "na.rm") <- which(miss) return(Y) } else return(demean(x, g, w, ...)) # setAttributes(, ax) -> Not needed anymore (included in demean()) } # x = data[5:6]; fl = data[-(5:6)]; variable.wise = TRUE fhdwithin.data.frame <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- if(variable.wise) complete.cases(fl, w) else complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!variable.wise) { if(fill) nrx <- fnrow(x) else if(is.character(ax[["row.names"]])) ax[["row.names"]] <- ax[["row.names"]][cc] else ax[["row.names"]] <- .set_row_names(length(cc)) # best ?? x <- .Call(C_subsetDT, x, cc, seq_along(unclass(x)), FALSE) } } else na.rm <- FALSE } if(is.list(fl)) { # fl is a list !! fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(Csv(fl, cc)) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl = Csv(fl, cc)) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } if(variable.wise) { if(na.rm) { # this means there were mising values in fl, which were already removed! return(setAttributes(lapply(unattrib(x), function(y) { y[-cc] <- NA # which is not faster !! ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, cc) YC <- whichv(y_cc, NA, TRUE) y_cc <- Csv(y_cc, YC) wc <- w[YC] y[ycc] <- if(nallfc) flmres(demean(y_cc, subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, YC), wc, ...) else flmres(y_cc, xmat[YC, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) } return(setAttributes(lapply(unattrib(x), function(y) { ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, ycc) wc <- w[ycc] y[ycc] <- if(nallfc) flmres(demean(y_cc, subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, ycc), wc, ...) else flmres(y_cc, xmat[ycc, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) # Rfast fastlm?? } else { # at this point missing values are already removed from x and fl !! Y <- if(nallfc || !fcl) flmres(if(nallfc) demean(x, fl, w, ...) else x, xmat, w, lm.method, ...) else demean(x, fl, w, ...) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } fhdwithin.list <- function(x, ...) fhdwithin.data.frame(x, ...) # Note: could also do Mudlack and add means to second regression -> better than two-times centering ?? HDW <- function(x, ...) UseMethod("HDW") # , x HDW.default <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(HDW.matrix(x, fl, w, na.rm, fill, lm.method, ...)) fhdwithin.default(x, fl, w, na.rm, fill, lm.method, ...) } HDW.pseries <- function(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, ...) fhdwithin.pseries(x, effect, w, na.rm, fill, ...) HDW.matrix <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], lm.method = "qr", ...) { res <- fhdwithin.matrix(x, fl, w, na.rm, fill, lm.method, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "HDW.")) res } HDW.zoo <- function(x, ...) if(is.matrix(x)) HDW.matrix(x, ...) else HDW.default(x, ...) HDW.units <- HDW.zoo # x = mtcars; fl = ~ qF(cyl):carb; w = wdat; stub = FALSE HDW.data.frame <- function(x, fl, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, stub = .op[["stub"]], lm.method = "qr", ...) { if(is.call(fl)) { ax <- attributes(x) nam <- ax[["names"]] if(length(fl) == 3L) { fvars <- ckmatch(all.vars(fl[[3L]]), nam) Xvars <- ckmatch(all.vars(fl[[2L]]), nam) fl[[2L]] <- NULL } else { fvars <- ckmatch(all.vars(fl), nam) Xvars <- cols2intrmgn(fvars, cols, x) # if(length(cols)) fsetdiff(cols2int(cols, x, nam), fvars) else seq_along(unclass(x))[-fvars] } ax[["names"]] <- do_stub(stub, nam[Xvars], "HDW.") if(na.rm) { miss <- missDF(x, if(variable.wise) fvars else c(Xvars, fvars)) if(missw <- length(w) && anyNA(w)) miss <- miss | is.na(w) if(missw || any(miss)) { ax[["na.rm"]] <- which(miss) cc <- whichv(miss, FALSE) w <- w[cc] if(!variable.wise) if(fill) nrx <- fnrow(x) else if(is.character(ax[["row.names"]])) ax[["row.names"]] <- ax[["row.names"]][cc] else ax[["row.names"]] <- .set_row_names(length(cc)) # best ?? } else na.rm <- FALSE } xmat <- NULL list2env(getfl(myModFrame(fl, if(na.rm) .Call(C_subsetDT, x, cc, fvars, FALSE) else .subset(x, fvars))), envir = environment()) fcl <- !is.null(fl) nallfc <- fcl && !is.null(xmat) if(nallfc) xmat <- demean(xmat, fl, w, ...) if(variable.wise) { if(na.rm) { return(setAttributes(lapply(.subset(x, Xvars), function(y) { y[-cc] <- NA ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, cc) YC <- whichv(y_cc, NA, TRUE) y_cc <- Csv(y_cc, YC) wc <- w[YC] y[ycc] <- if(nallfc) flmres(demean(y_cc, subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, YC), wc, ...) else flmres(y_cc, xmat[YC, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) } return(setAttributes(lapply(.subset(x, Xvars), function(y) { ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, ycc) wc <- w[ycc] y[ycc] <- if(nallfc) flmres(demean(y_cc, subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, ycc), wc, ...) else flmres(y_cc, xmat[ycc, , drop = FALSE], wc, lm.method, ...) return(y) }), ax)) } else { # at this point missing values are already removed from fl !! Y <- if(na.rm) .Call(C_subsetDT, x, cc, Xvars, FALSE) else .subset(x, Xvars) Y <- if(nallfc || !fcl) flmres(if(nallfc) demean(Y, fl, w, ...) else Y, xmat, w, lm.method, ...) else demean(Y, fl, w, ...) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } # fl is not a formula !! res <- fhdwithin.data.frame(if(is.null(cols)) x else colsubset(x, cols), fl, w, na.rm, fill, variable.wise, lm.method, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "HDW.")) res } HDW.pdata.frame <- function(x, effect = "all", w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, stub = .op[["stub"]], ...) { res <- fhdwithin.pdata.frame(fcolsubset(x, cols2intrmgn(which(attr(x, "names") %in% attr(findex(x), "nam")), cols, x)), effect, w, na.rm, fill, variable.wise, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "HDW.")) res } HDW.list <- function(x, ...) HDW.data.frame(x, ...) # Theory: y = ?1 x1 + ?2 x2 + e # FWT: M2 y = ?1 M2 x1 + e so residuals: e = M2 y - ?1 M2 x1 and fitted: # Now M = I - x(x'x)-1x' = I - P. # So (I-P2) y = ?1 (I-P2) x1 + e or y - P2 y = ?1 x1 - ?1 P2 x1 + e # I want y - e = y^ = ?1 x1 + ?2 x2 # so # P2 y = ?1 P2 x1 + ?2 x2 # Haven't quite figgured it out, but my solution is to just subtract the demeaned data !! # Note: Only changes to fhdwithin is in the computation part: Perhaps you can combine the code in some better way to reduce code duplication ?? fhdbetween <- function(x, ...) UseMethod("fhdbetween") # , x fhdbetween.default <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fhdwithin.matrix(x, fl, w, na.rm, fill, lm.method, ...)) ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] # Note this here !! if(!fill) { if(length(names(x))) ax[["names"]] <- Csv(names(x), cc) # best ?? x <- Csv(x, cc) } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(Csv(fl, cc)) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl = Csv(fl, cc)) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } # Only this part of the code is different from fhdwithin... if(nallfc || !fcl) { if(na.rm && fill) { x[-cc] <- NA xcc <- Csv(x, cc) x[cc] <- if(nallfc) xcc - flmres(demean(xcc, fl, w, ...), xmat, w, lm.method, ...) else flmres(xcc, xmat, w, lm.method, FALSE, ...) return(setAttributes(x, ax)) } else return(setAttributes(if(nallfc) x - flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...), ax)) } else if(na.rm && fill) { x[-cc] <- NA x[cc] <- demean(Csv(x, cc), fl, w, ..., means = TRUE) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ..., means = TRUE), ax)) } fhdbetween.pseries <- function(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, ...) fhdwithin.pseries(x, effect, w, na.rm, fill, ..., means = TRUE) fhdbetween.matrix <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!fill) { if(length(dimnames(x)[[1L]])) ax[["dimnames"]][[1L]] <- dimnames(x)[[1L]][cc] # best ?? ax[["dim"]][1L] <- length(cc) x <- x[cc, , drop = FALSE] } } else na.rm <- FALSE } if(is.list(fl)) { fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(Csv(fl, cc)) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl = Csv(fl, cc)) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } # Only this part of the code is different from fhdwithin... if(nallfc || !fcl) { if(na.rm && fill) { x[-cc, ] <- NA xcc <- x[cc, ] # What about weights cc ? -> done above... x[cc, ] <- if(nallfc) xcc - flmres(demean(xcc, fl, w, ...), xmat, w, lm.method, ...) else flmres(xcc, xmat, w, lm.method, FALSE, ...) return(setAttributes(x, ax)) } else return(setAttributes(if(nallfc) x - flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...), ax)) } else if(na.rm && fill) { x[-cc, ] <- NA x[cc, ] <- demean(x[cc, ], fl, w, ..., means = TRUE) return(setAttributes(x, ax)) } else return(setAttributes(demean(x, fl, w, ..., means = TRUE), ax)) } fhdbetween.zoo <- function(x, ...) if(is.matrix(x)) fhdbetween.matrix(x, ...) else fhdbetween.default(x, ...) fhdbetween.units <- fhdbetween.zoo fhdbetween.pdata.frame <- function(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, ...) fhdwithin.pdata.frame(x, effect, w, na.rm, fill, variable.wise, ..., means = TRUE) fhdbetween.data.frame <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, lm.method = "qr", ...) { ax <- attributes(x) if(na.rm) { cc <- if(variable.wise) complete.cases(fl, w) else complete.cases(x, fl, w) # gives error if lengths don't match, otherwise demeanlist and qr.resid give errors !! if(!all(cc)) { ax[["na.rm"]] <- whichv(cc, FALSE) cc <- which(cc) w <- w[cc] if(!variable.wise) { if(fill) nrx <- fnrow(x) else if(is.character(ax[["row.names"]])) ax[["row.names"]] <- ax[["row.names"]][cc] else ax[["row.names"]] <- .set_row_names(length(cc)) # best ?? x <- .Call(C_subsetDT, x, cc, seq_along(unclass(x)), FALSE) } } else na.rm <- FALSE } if(is.list(fl)) { # fl is a list !! fc <- .Call(C_vtypes, fl, 2L) # vapply(unattrib(fl), is.factor, TRUE) fcl <- any(fc) # if(!fcl && !missing(...)) unused_arg_action(match.call(), ...) nallfc <- fcl && !all(fc) if(na.rm) fl <- subsetfl(fl, cc) attributes(fl) <- NULL # good here ?? if(nallfc) { xmat <- demean(do.call(cbind, fl[!fc]), fl[fc], w, ...) fl <- fl[fc] } else if(!fcl) xmat <- do.call(cbind, c(list(alloc(1L, length(fl[[1L]]))), fl)) } else if(is.matrix(fl)) { # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl[cc, , drop = FALSE]) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } else if(is.factor(fl)) { fl <- if(na.rm) list(Csv(fl, cc)) else list(fl) fcl <- TRUE nallfc <- FALSE } else { if(!is.numeric(fl)) stop("fl must be a list of vectors / factors, a numeric matrix or a numeric vector") # if(!missing(...)) unused_arg_action(match.call(), ...) xmat <- if(na.rm) cbind(Intercept = 1L, fl = Csv(fl, cc)) else cbind(Intercept = 1L, fl) nallfc <- fcl <- FALSE } # Only this part of the code is different from fhdwithin !! if(variable.wise) { if(na.rm) { # this means there were mising values in fl, which were already removed! return(setAttributes(lapply(unattrib(x), function(y) { y[-cc] <- NA # which is not faster !! ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, cc) YC <- whichv(y_cc, NA, TRUE) y_cc <- Csv(y_cc, YC) wc <- w[YC] y[ycc] <- if(nallfc) y_cc %-=% flmres(demean(y_cc, subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, YC), wc, ..., means = TRUE) else flmres(y_cc, xmat[YC, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) } return(setAttributes(lapply(unattrib(x), function(y) { ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, ycc) wc <- w[ycc] y[ycc] <- if(nallfc) y_cc %-=% flmres(demean(y_cc, subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, ycc), wc, ..., means = TRUE) else flmres(y_cc, xmat[ycc, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) # Rfast fastlm?? } else { # at this point missing values are already removed from x and fl !! if(nallfc || !fcl) { Y <- if(nallfc) x %c-% flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...) } else Y <- demean(x, fl, w, ..., means = TRUE) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } fhdbetween.list <- function(x, ...) fhdbetween.data.frame(x, ...) HDB <- function(x, ...) UseMethod("HDB") # , x HDB.default <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(HDB.matrix(x, fl, w, na.rm, fill, lm.method, ...)) fhdbetween.default(x, fl, w, na.rm, fill, lm.method, ...) } HDB.pseries <- function(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, ...) fhdwithin.pseries(x, effect, w, na.rm, fill, ..., means = TRUE) HDB.matrix <- function(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], lm.method = "qr", ...) { res <- fhdbetween.matrix(x, fl, w, na.rm, fill, lm.method, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "HDB.")) res } HDB.zoo <- function(x, ...) if(is.matrix(x)) HDB.matrix(x, ...) else HDB.default(x, ...) HDB.units <- HDB.zoo HDB.data.frame <- function(x, fl, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, stub = .op[["stub"]], lm.method = "qr", ...) { if(is.call(fl)) { ax <- attributes(x) nam <- ax[["names"]] if(length(fl) == 3L) { fvars <- ckmatch(all.vars(fl[[3L]]), nam) Xvars <- ckmatch(all.vars(fl[[2L]]), nam) fl[[2L]] <- NULL } else { fvars <- ckmatch(all.vars(fl), nam) Xvars <- cols2intrmgn(fvars, cols, x) # if(length(cols)) fsetdiff(cols2int(cols, x, nam), fvars) else seq_along(unclass(x))[-fvars] } ax[["names"]] <- do_stub(stub, nam[Xvars], "HDB.") if(na.rm) { miss <- missDF(x, if(variable.wise) fvars else c(Xvars, fvars)) if(missw <- length(w) && anyNA(w)) miss <- miss | is.na(w) if(missw || any(miss)) { ax[["na.rm"]] <- which(miss) cc <- whichv(miss, FALSE) w <- w[cc] if(!variable.wise) if(fill) nrx <- fnrow(x) else if(is.character(ax[["row.names"]])) ax[["row.names"]] <- ax[["row.names"]][cc] else ax[["row.names"]] <- .set_row_names(length(cc)) # best ?? } else na.rm <- FALSE } xmat <- NULL list2env(getfl(myModFrame(fl, if(na.rm) .Call(C_subsetDT, x, cc, fvars, FALSE) else .subset(x, fvars))), envir = environment()) fcl <- !is.null(fl) nallfc <- fcl && !is.null(xmat) if(nallfc) xmat <- demean(xmat, fl, w, ...) # Only this part of the code is different from fhdwithin !! if(variable.wise) { if(na.rm) { # this means there were mising values in fl, which were already removed! return(setAttributes(lapply(.subset(x, Xvars), function(y) { y[-cc] <- NA # which is not faster !! ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, cc) YC <- whichv(y_cc, NA, TRUE) y_cc <- Csv(y_cc, YC) wc <- w[YC] y[ycc] <- if(nallfc) y_cc %-=% flmres(demean(y_cc, subsetfl(fl, YC), wc, ...), xmat[YC, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, YC), wc, ..., means = TRUE) else flmres(y_cc, xmat[YC, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) } return(setAttributes(lapply(.subset(x, Xvars), function(y) { ycc <- whichv(y, NA, TRUE) y_cc <- Csv(y, ycc) wc <- w[ycc] y[ycc] <- if(nallfc) y_cc %-=% flmres(demean(y_cc, subsetfl(fl, ycc), wc, ...), xmat[ycc, , drop = FALSE], wc, lm.method, ...) else if(fcl) demean(y_cc, subsetfl(fl, ycc), wc, ..., means = TRUE) else flmres(y_cc, xmat[ycc, , drop = FALSE], wc, lm.method, FALSE, ...) return(y) }), ax)) } else { # at this point missing values are already removed from fl !! x <- if(na.rm) .Call(C_subsetDT, x, cc, Xvars, FALSE) else .subset(x, Xvars) if(nallfc || !fcl) { Y <- if(nallfc) x %c-% flmres(demean(x, fl, w, ...), xmat, w, lm.method, ...) else flmres(x, xmat, w, lm.method, FALSE, ...) } else Y <- demean(x, fl, w, ..., means = TRUE) if(na.rm && fill) # x[cc, ] <- Y; x[-cc, ] <- NA return(setAttributes(.Call(C_lassign, Y, nrx, cc, NA_real_), ax)) return(setAttributes(Y, ax)) } } # fl is not a formula !! res <- fhdbetween.data.frame(if(is.null(cols)) x else colsubset(x, cols), fl, w, na.rm, fill, variable.wise, lm.method, ...) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "HDB.")) res } HDB.pdata.frame <- function(x, effect = "all", w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, stub = .op[["stub"]], ...) { res <- fhdwithin.pdata.frame(fcolsubset(x, cols2intrmgn(which(attr(x, "names") %in% attr(findex(x), "nam")), cols, x)), effect, w, na.rm, fill, variable.wise, ..., means = TRUE) if(isTRUE(stub) || is.character(stub)) return(add_stub(res, if(is.character(stub)) stub else "HDB.")) res } HDB.list <- function(x, ...) HDB.data.frame(x, ...) fHDbetween <- function(x, ...) { message("Note that 'fHDbetween' was renamed to 'fhdbetween'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fhdbetween") } fHDbetween.default <- function(x, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fhdbetween.matrix(x, ...)) # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdbetween.default(x, ...) } fHDbetween.matrix <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdbetween.matrix(x, ...) } fHDbetween.data.frame <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdbetween.data.frame(x, ...) } fHDwithin <- function(x, ...) { message("Note that 'fHDwithin' was renamed to 'fhdwithin'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')") UseMethod("fhdwithin") } fHDwithin.default <- function(x, ...) { if(is.matrix(x) && !inherits(x, "matrix")) return(fhdwithin.matrix(x, ...)) # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdwithin.default(x, ...) } fHDwithin.matrix <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdwithin.matrix(x, ...) } fHDwithin.data.frame <- function(x, ...) { # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')") fhdwithin.data.frame(x, ...) } # # HDW(x = mtcars, fl = ~ factor(cyl)*carb) # # HDW(x = mtcars, fl = ~ factor(cyl):vs) # # lm(mpg ~ factor(cyl):factor(vs), data = mtcars) # # HDW(x = mtcars, fl = ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb) # # # Works!! although there is a further interaction with carb!! # lm(mpg ~ hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb)) # lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear + wt:gear:carb, data = mtcars) # # lm(mpg ~ hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear)) # lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear, data = mtcars) # # lm(mpg ~ hp, data = HDW(mtcars, ~ cyl*carb + vs + wt:gear)) # lm(mpg ~ hp + cyl*carb + vs + wt:gear, data = mtcars) # # # lm(mpg ~ hp, data = HDW(mtcars, mpg + hp ~ cyl*carb + factor(cyl)*poly(drat,2))) # lm(mpg ~ hp + cyl*carb + factor(cyl)*poly(drat,2), data = mtcars) # collapse/R/small_helper.R0000644000176200001440000007754615056572047015105 0ustar liggesusers# Functions needed for internal use because of option(collapse_mask = "fast-stat-fun") bsum <- base::sum bprod <- base::prod bmin <- base::min bmax <- base::max # Row-operations (documented under data transformations...) ... "%rr%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "replace_fill") else # outer(rep.int(1L, dim(X)[2L]), v) duplAttributes(.mapply(function(x, y) TRA(x, y, "replace_fill"), list(unattrib(X), unattrib(v)), NULL), X) "%r+%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "+") else duplAttributes(.mapply(function(x, y) TRA(x, y, "+"), list(unattrib(X), unattrib(v)), NULL), X) "%r-%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "-") else duplAttributes(.mapply(function(x, y) TRA(x, y, "-"), list(unattrib(X), unattrib(v)), NULL), X) "%r*%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "*") else duplAttributes(.mapply(function(x, y) TRA(x, y, "*"), list(unattrib(X), unattrib(v)), NULL), X) "%r/%" <- function(X, v) if(is.atomic(X) || is.atomic(v) || inherits(X, "data.frame")) TRA(X, v, "/") else duplAttributes(.mapply(function(x, y) TRA(x, y, "/"), list(unattrib(X), unattrib(v)), NULL), X) "%cr%" <- function(X, V) if(is.atomic(X)) return(duplAttributes(rep(V, NCOL(X)), X)) else # outer(rep.int(1L, dim(X)[2L]), V) if(is.atomic(V)) return(duplAttributes(lapply(vector("list", length(unclass(X))), function(z) V), X)) else copyAttrib(V, X) # copyAttrib first makes a shallow copy of V "%c+%" <- function(X, V) if(is.atomic(X)) return(X + V) else duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `+`, V) else .mapply(`+`, list(unattrib(X), unattrib(V)), NULL), X) "%c-%" <- function(X, V) if(is.atomic(X)) return(X - V) else duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `-`, V) else .mapply(`-`, list(unattrib(X), unattrib(V)), NULL), X) "%c*%" <- function(X, V) if(is.atomic(X)) return(X * V) else duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `*`, V) else .mapply(`*`, list(unattrib(X), unattrib(V)), NULL), X) "%c/%" <- function(X, V) if(is.atomic(X)) return(X / V) else # or * 1L/V ?? duplAttributes(if(is.atomic(V)) lapply(unattrib(X), `/`, V) else .mapply(`/`, list(unattrib(X), unattrib(V)), NULL), X) # Multiple-assignment "%=%" <- function(nam, values) invisible(.Call(C_multiassign, nam, values, parent.frame())) massign <- function(nam, values, envir = parent.frame()) invisible(.Call(C_multiassign, nam, values, envir)) # R implementation: # "%=%" <- function(lhs, rhs) { # if(!is.character(lhs)) stop("lhs needs to be character") # if(!is.list(rhs)) rhs <- as.vector(rhs, "list") # if(length(lhs) != length(rhs)) stop("length(lhs) not equal to length(rhs)") # list2env(`names<-`(rhs, lhs), envir = parent.frame(), # parent = NULL, hash = FALSE, size = 0L) # invisible() # } getenvFUN <- function(nam, efmt1 = "For this method need to install.packages('%s'), then unload [detach('package:collapse', unload = TRUE)] and reload [library(collapse)].") { if(is.null(FUN <- .collapse_env[[nam]])) { v <- strsplit(nam, "_", fixed = TRUE)[[1L]] .collapse_env[[nam]] <- FUN <- if(requireNamespace(v[1L], quietly = TRUE)) get0(v[2L], envir = getNamespace(v[1L])) else NULL if(is.null(FUN)) stop(sprintf(efmt1, v[1L])) } FUN } # qM2 <- function(x) if(is.list(x)) do.call(cbind, x) else x null2NA <- function(x) if(is.null(x)) NA_character_ else x # flapply <- function(x, FUN, ...) lapply(unattrib(x), FUN, ...) # not really needed ... vlabels <- function(X, attrn = "label", use.names = TRUE) .Call(C_vlabels, X, attrn, use.names) # { # if(is.atomic(X)) return(null2NA(attr(X, attrn))) # res <- lapply(X, attr, attrn) # unattrib(X): no names # res[vapply(res, is.null, TRUE)] <- NA_character_ # unlist(res) # } "vlabels<-" <- function(X, attrn = "label", value) { if(is.atomic(X)) return(`attr<-`(X, attrn, value)) .Call(C_setvlabels, X, attrn, value, NULL) } # "vlabels<-" <- function(X, attrn = "label", value) { # names(value) <- NULL # if(is.atomic(X)) return(`attr<-`(X, attrn, value)) # clx <- oldClass(X) # oldClass(X) <- NULL # if(is.null(value)) { # for (i in seq_along(X)) attr(X[[i]], attrn) <- NULL # } else { # if(length(X) != length(value)) stop("length(X) must match length(value)") # for (i in seq_along(value)) attr(X[[i]], attrn) <- value[[i]] # } # if(any(clx == "data.table")) return(alc(`oldClass<-`(X, clx))) # `oldClass<-`(X, clx) # } # Note: Shallow copy does not work as it only copies the list, but the attribute is a feature of the atomic elements inside... setLabels <- function(X, value = NULL, attrn = "label", cols = NULL) { # , sc = TRUE if(is.atomic(X)) return(`attr<-`(X, attrn, value)) .Call(C_setvlabels, X, attrn, value, as.integer(cols)) } # Also slower on WDI !! # "vlabels2<-" <- function(X, attrn = "label", value) { # names(value) <- NULL # if(is.atomic(X)) return(`attr<-`(X, attrn, value)) # duplAttributes(mapply(function(x, y) `attr<-`(x, attrn, y), `attributes<-`(X, NULL), as.vector(value, "list"), # SIMPLIFY = FALSE, USE.NAMES = FALSE), X) # } .c <- function(...) as.character(substitute(c(...))[-1L]) strclp <- function(x) if(length(x) > 1L) paste(x, collapse = " ") else x pasteclass <- function(x) if(length(cx <- class(x)) > 1L) paste(cx, collapse = " ") else cx vclasses <- function(X, use.names = TRUE) { if(is.atomic(X)) return(pasteclass(X)) vapply(X, pasteclass, "", USE.NAMES = use.names) # unattrib(X): no names } # https://github.com/wch/r-source/blob/4a409a1a244d842a3098d2783c5b63c9661fc6be/src/main/util.c R_types <- c("NULL", # NILSXP "symbol", # SYMSXP "pairlist", # LISTSXP "closure", # CLOSXP "environment", # ENVSXP "promise", # PROMSXP "language", # LANGSXP "special", # SPECIALSXP "builtin", # BUILTINSXP "char", # CHARSXP "logical", # LGLSXP "", "", "integer", # INTSXP "double", # REALSXP "complex", # CPLXSXP "character", # STRSXP "...", # DOTSXP "any", # ANYSXP "list", # VECSXP "expression", # EXPRSXP "bytecode", # BCODESXP "externalptr", # EXTPTRSXP "weakref", # WEAKREFSXP "raw", # RAWSXP "S4") # S4SXP # /* aliases : */ # { "numeric", REALSXP }, # { "name", SYMSXP }, vtypes <- function(X, use.names = TRUE) { if(is.atomic(X)) return(typeof(X)) res <- R_types[.Call(C_vtypes, X, 0L)] if(use.names) names(res) <- attr(X, "names") res # vapply(X, typeof, "") # unattrib(X): no names } vlengths <- function(X, use.names = TRUE) .Call(C_vlengths, X, use.names) namlab <- function(X, class = FALSE, attrn = "label", N = FALSE, Ndistinct = FALSE) { if(!is.list(X)) stop("namlab only works with lists") res <- list(Variable = attr(X, "names")) attributes(X) <- NULL if(class) res$Class <- vapply(X, pasteclass, "", USE.NAMES = FALSE) if(N) res$N <- fnobs.data.frame(X) if(Ndistinct) res$Ndist <- fndistinct.data.frame(X, na.rm = TRUE) res$Label <- vlabels(X, attrn, FALSE) attr(res, "row.names") <- c(NA_integer_, -length(X)) oldClass(res) <- "data.frame" res } add_stub <- function(X, stub, pre = TRUE, cols = NULL) { if(!is.character(stub)) return(X) if(is.atomic(X) && is.array(X)) { if(length(dim(X)) > 2L) stop("Can't stub higher dimensional arrays!") dn <- dimnames(X) cn <- dn[[2L]] if(length(cn)) { if(length(cols)) cn[cols] <- if(pre) paste0(stub, cn[cols]) else paste0(cn[cols], stub) else cn <- if(pre) paste0(stub, cn) else paste0(cn, stub) dimnames(X) <- list(dn[[1L]], cn) } } else { nam <- attr(X, "names") if(length(nam)) { if(length(cols)) attr(X, "names")[cols] <- if(pre) paste0(stub, nam[cols]) else paste0(nam[cols], stub) else attr(X, "names") <- if(pre) paste0(stub, nam) else paste0(nam, stub) if(inherits(X, "data.table")) X <- alc(X) } } X } rm_stub <- function(X, stub, pre = TRUE, regex = FALSE, cols = NULL, ...) { if(!is.character(stub)) return(X) if(regex) rmstubFUN <- function(x) { gsub(stub, "", x, ...) } else if(pre) rmstubFUN <- function(x) { # much faster than using sub! v <- startsWith(x, stub) x[v] <- substr(x[v], nchar(stub)+1L, 1000000L) x } else rmstubFUN <- function(x) { # much faster than using sub! v <- endsWith(x, stub) xv <- x[v] # faster .. x[v] <- substr(xv, 0L, nchar(xv)-nchar(stub)) x } if(is.atomic(X)) { d <- dim(X) if(is.null(d)) if(is.character(X)) return(if(length(cols)) replace(X, cols, rmstubFUN(X[cols])) else rmstubFUN(X)) else stop("Cannot modify a vector that is not character") if(length(d) > 2L) stop("Can't remove stub from higher dimensional arrays!") dn <- dimnames(X) cn <- dn[[2L]] dimnames(X) <- list(dn[[1L]], if(length(cols)) replace(cn, cols, rmstubFUN(cn[cols])) else rmstubFUN(cn)) } else { nam <- attr(X, "names") attr(X, "names") <- if(length(cols)) replace(nam, cols, rmstubFUN(nam[cols])) else rmstubFUN(nam) if(inherits(X, "data.table")) X <- alc(X) } X } setRownames <- function(object, nm = if(is.atomic(object)) seq_row(object) else NULL) { if(is.list(object)) { l <- .Call(C_fnrow, object) if(is.null(nm)) nm <- .set_row_names(l) else if(length(nm) != l) stop("supplied row-names must match list extent") attr(object, "row.names") <- nm if(inherits(object, "data.table")) return(alc(object)) return(object) } if(!is.array(object)) stop("Setting row-names only supported on arrays and lists") dn <- dimnames(object) `dimnames<-`(object, c(list(nm), dn[-1L])) } setColnames <- function(object, nm) { if(is.atomic(object) && is.array(object)) dimnames(object)[[2L]] <- nm else { attr(object, "names") <- nm if(inherits(object, "data.table")) return(alc(object)) } object } setDimnames <- function(object, dn, which = NULL) { if(is.null(which)) return(`dimnames<-`(object, dn)) if(is.atomic(dn)) dimnames(object)[[which]] <- dn else dimnames(object)[which] <- dn object } all_identical <- function(...) { if(...length() == 1L && is.list(...)) return(all(vapply(unattrib(...)[-1L], identical, TRUE, .subset2(..., 1L)))) l <- list(...) all(vapply(l[-1L], identical, TRUE, l[[1L]])) } all_obj_equal <- function(...) { if(...length() == 1L && is.list(...)) r <- unlist(lapply(unattrib(...)[-1L], all.equal, .subset2(..., 1L)), use.names = FALSE) else { l <- list(...) r <- unlist(lapply(l[-1L], all.equal, l[[1L]]), use.names = FALSE) } is.logical(r) } all_funs <- function(expr) .Call(C_all_funs, expr) cinv <- function(x) chol2inv(chol(x)) vec <- function(X) { if(is.atomic(X)) return(`attributes<-`(X, NULL)) .Call(C_pivot_long, X, NULL, FALSE) } interact_names <- function(l) { oldClass(l) <- NULL if(length(l) == 2L) return(`dim<-`(outer(l[[1L]], l[[2L]], paste, sep = "."), NULL)) do.call(paste, c(expand.grid(l, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE), list(sep = "."))) } # set over-allocation for data.table's alc <- function(x) .Call(C_alloccol, x) condalc <- function(x, DT) if(DT) .Call(C_alloccol, x) else x alcSA <- function(x, a) .Call(C_alloccol, .Call(C_setAttributes, x, a)) condalcSA <- function(x, a, DT) if(DT) .Call(C_alloccol, .Call(C_setAttributes, x, a)) else .Call(C_setAttributes, x, a) unattrib <- function(object) `attributes<-`(object, NULL) # Both equally efficient and therefore redundant ! # setAttr <- function(object, a, v) .Call(C_setAttr, object, a, v) # setAttrR <- function(object, a, v) `attr<-`(object, a, v) setAttrib <- function(object, a) .Call(C_setAttrib, object, a) setattrib <- function(object, a) { .Call(C_setattributes, object, a) return(invisible(object)) } # setAttribR <- function(object, a) `attributes<-`(object, x) copyAttrib <- function(to, from) .Call(C_copyAttrib, to, from) # copyAttribR <- function(to, from) `attributes<-`(to, attributes(from)) copyMostAttrib <- function(to, from) .Call(C_copyMostAttrib, to, from) # copyMostAttribR <- function(to, from) `mostattributes<-`(to, attributes(from)) addAttributes <- function(x, a) .Call(C_setAttributes, x, c(attributes(x), a)) is_categorical <- function(x) !is.numeric(x) # is.categorical <- function(x) { # .Deprecated(msg = "'is.categorical' was renamed to 'is_categorical'. It will be removed end of 2023, see help('collapse-renamed').") # !is.numeric(x) # } is_date <- function(x) inherits(x, c("Date","POSIXlt","POSIXct")) # is.Date <- function(x) { # .Deprecated(msg = "'is.Date' was renamed to 'is_date'. It will be removed end of 2023, see help('collapse-renamed').") # inherits(x, c("Date","POSIXlt","POSIXct")) # } # more consistent with base than na_rm # na.rm <- function(x) { # cpp version available, but not faster ! # if(length(attr(x, "names"))) { # gives corruped time-series ! # ax <- attributes(x) # r <- x[!is.na(x)] # ax[["names"]] <- names(r) # setAttributes(r, ax) # } else duplAttributes(x[!is.na(x)], x) # } whichv <- function(x, value, invert = FALSE) .Call(C_whichv, x, value, invert) "%==%" <- function(x, value) .Call(C_whichv, x, value, FALSE) "%!=%" <- function(x, value) .Call(C_whichv, x, value, TRUE) whichNA <- function(x, invert = FALSE) .Call(C_whichv, x, NA, invert) frange <- function(x, na.rm = .op[["na.rm"]], finite = FALSE) .Call(C_frange, x, na.rm, finite) .range <- function(x, na.rm = TRUE, finite = FALSE) .Call(C_frange, x, na.rm, finite) alloc <- function(value, n, simplify = TRUE) .Call(C_alloc, value, n, simplify) vgcd <- function(x) .Call(C_vecgcd, x) fdist <- function(x, v = NULL, ..., method = "euclidean", nthreads = .op[["nthreads"]]) .Call(C_fdist, if(is.atomic(x)) x else qM(x), v, method, nthreads) allNA <- function(x) .Call(C_allNA, x, TRUE) # True means give error for unsupported vector types, not FALSE. anyv <- function(x, value) .Call(C_anyallv, x, value, FALSE) allv <- function(x, value) .Call(C_anyallv, x, value, TRUE) copyv <- function(X, v, R, ..., invert = FALSE, vind1 = FALSE, xlist = FALSE) { if(is.list(X, ...) && !xlist) { # Making sure some error is produced if dots are used if(is.list(R)) { res <- .mapply(function(x, r) .Call(C_setcopyv, x, v, r, invert, FALSE, vind1), list(unattrib(X), unattrib(R)), NULL) } else { res <- lapply(unattrib(X), function(x) .Call(C_setcopyv, x, v, R, invert, FALSE, vind1)) } return(condalc(duplAttributes(res, X), inherits(X, "data.table"))) } .Call(C_setcopyv, X, v, R, invert, FALSE, vind1) } setv <- function(X, v, R, ..., invert = FALSE, vind1 = FALSE, xlist = FALSE) { if(is.list(X, ...) && !xlist) { # Making sure some error is produced if dots are used if(is.list(R)) { .mapply(function(x, r) .Call(C_setcopyv, x, v, r, invert, TRUE, vind1), list(unattrib(X), unattrib(R)), NULL) } else { lapply(unattrib(X), function(x) .Call(C_setcopyv, x, v, R, invert, TRUE, vind1)) } return(invisible(X)) } invisible(.Call(C_setcopyv, X, v, R, invert, TRUE, vind1)) } setop <- function(X, op, V, ..., rowwise = FALSE) # Making sure some error is produced if dots are used invisible(.Call(C_setop, X, V, switch(op, "+" = 1L, "-" = 2L, "*" = 3L, "/" = 4L, stop("Unsupported operation:", op)), rowwise), ...) "%+=%" <- function(X, V) invisible(.Call(C_setop, X, V, 1L, FALSE)) "%-=%" <- function(X, V) invisible(.Call(C_setop, X, V, 2L, FALSE)) "%*=%" <- function(X, V) invisible(.Call(C_setop, X, V, 3L, FALSE)) "%/=%" <- function(X, V) invisible(.Call(C_setop, X, V, 4L, FALSE)) # Internal functions missDF <- function(x, cols = seq_along(unclass(x))) .Call(C_dt_na, x, cols, 0, FALSE) frowSums <- function(x) { nr <- dim(x)[1L] .rowSums(x, nr, length(x)/nr) } fcolSums <- function(x) { nr <- dim(x)[1L] .colSums(x, nr, length(x)/nr) } missing_cases <- function(X, cols = NULL, prop = 0, count = FALSE) { if(is.list(X)) return(.Call(C_dt_na, X, if(is.null(cols)) seq_along(unclass(X)) else cols2int(cols, X, attr(X, "names")), prop, count)) if(is.matrix(X)) { if(length(cols)) X <- X[, cols] if(is.matrix(X)) return(if(count) as.integer(frowSums(is.na(X))) else if(prop > 0) # as.integer() needed to establish consistency (integer output) frowSums(is.na(X)) >= bmax(as.integer(prop * NCOL(X)), 1L) else !complete.cases(X)) } if(count) as.integer(is.na(X)) else is.na(X) # Note: as.integer() here is inefficient, but storage.mode() <- "integer" is also. Would have to export a R wrapper to C function SET_TYPEOF()... but this is probably never invoked anyway. } na_rm <- function(x) .Call(C_na_rm, x) # x[!is.na(x)] # Also takes names along, whereas na_rm does not preserve names of list null_rm <- function(l) if(!all(ind <- vlengths(l, FALSE) > 0L)) .subset(l, ind) else l all_eq <- function(x) .Call(C_anyallv, x, x[1L], TRUE) na_omit <- function(X, cols = NULL, na.attr = FALSE, prop = 0, ...) { if(is.list(X)) { iX <- seq_along(unclass(X)) rl <- .Call(C_dt_na, X, if(is.null(cols)) iX else cols2int(cols, X, attr(X, "names")), prop, FALSE) rkeep <- whichv(rl, FALSE) if(length(rkeep) == fnrow(X)) return(condalc(X, inherits(X, "data.table"))) res <- .Call(C_subsetDT, X, rkeep, iX, FALSE) # This allocates data.tables... rn <- attr(X, "row.names") if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) attr(res, "row.names") <- Csv(rn, rkeep) if(na.attr) { attr(res, "na.action") <- `oldClass<-`(which(rl), "omit") if(inherits(res, "data.table") && !inherits(X, "pdata.frame")) return(alc(res)) } if(inherits(X, "pdata.frame")) { index <- findex(X) index_omit <- droplevels_index(.Call(C_subsetDT, index, rkeep, seq_along(unclass(index)), FALSE), ...) if(inherits(X, "indexed_frame")) return(reindex(res, index_omit)) # data.table handled here attr(res, "index") <- index_omit } } else { Xcols <- if(is.null(cols)) X else X[, cols] rl <- if(prop > 0 && is.matrix(Xcols)) frowSums(is.na(Xcols)) < bmax(as.integer(prop * ncol(Xcols)), 1L) else complete.cases(Xcols) rkeep <- which(rl) if(length(rkeep) == NROW(X)) return(X) res <- if(is.matrix(X)) X[rkeep, , drop = FALSE, ...] else X[rkeep, ...] if(na.attr) attr(res, "na.action") <- `oldClass<-`(whichv(rl, FALSE), "omit") } res } na_insert <- function(X, prop = 0.1, value = NA, set = FALSE) { if(is.list(X)) { n <- fnrow(X) nmiss <- floor(n * prop) if(set) { lapply(unattrib(X), function(y) scv(y, sample.int(n, nmiss), value, TRUE)) return(invisible(X)) } res <- duplAttributes(lapply(unattrib(X), function(y) scv(y, sample.int(n, nmiss), value)), X) return(if(inherits(X, "data.table")) alc(res) else res) } if(!is.atomic(X)) stop("X must be an atomic vector, array or data.frame") l <- length(X) if(set) { scv(X, sample.int(l, floor(l * prop)), value, TRUE) return(invisible(X)) } return(scv(X, sample.int(l, floor(l * prop)), value)) } fdapply <- function(X, FUN, ...) duplAttributes(lapply(`attributes<-`(X, NULL), FUN, ...), X) fnlevels <- function(x) length(attr(x, "levels")) # flevels <- function(x) attr(x, "levels") fnrow <- function(X) .Call(C_fnrow, X) # if(is.list(X)) length(.subset2(X, 1L)) else dim(X)[1L] fncol <- function(X) if(is.list(X)) length(unclass(X)) else dim(X)[2L] fNCOL <- function(X) if(is.list(X)) length(unclass(X)) else NCOL(X) fdim <- function(X) { if(is.atomic(X)) return(dim(X)) # or if !is.list ? c(.Call(C_fnrow, X), length(unclass(X))) } seq_row <- function(X) seq_len(.Call(C_fnrow, X)) seq_col <- function(X) if(is.list(X)) seq_along(unclass(X)) else seq_len(dim(X)[2L]) # na.last = TRUE, same default as order(): forder.int <- function(x, na.last = TRUE, decreasing = FALSE) .Call(C_radixsort, na.last, decreasing, FALSE, FALSE, TRUE, pairlist(x)) # if(is.unsorted(x)) .Call(C_forder, x, NULL, FALSE, TRUE, 1L, TRUE) else seq_along(x) # since forder gives integer(0) if sorted ! fsetdiff <- function(x, y) x[match(x, y, 0L) == 0L] # not unique ! ffka <- function(x, f) { ax <- attributes(x) `attributes<-`(f(ax[["levels"]])[x], ax[names(ax) %!in% c("levels", "class")]) } as_numeric_factor <- function(X, keep.attr = TRUE) { if(is.atomic(X)) if(keep.attr) return(ffka(X, as.numeric)) else return(as.numeric(attr(X, "levels"))[X]) res <- duplAttributes(lapply(unattrib(X), if(keep.attr) (function(y) if(is.factor(y)) ffka(y, as.numeric) else y) else (function(y) if(is.factor(y)) as.numeric(attr(y, "levels"))[y] else y)), X) if(inherits(X, "data.table")) return(alc(res)) res } as_integer_factor <- function(X, keep.attr = TRUE) { if(is.atomic(X)) if(keep.attr) return(ffka(X, as.integer)) else return(as.integer(attr(X, "levels"))[X]) res <- duplAttributes(lapply(unattrib(X), if(keep.attr) (function(y) if(is.factor(y)) ffka(y, as.integer) else y) else (function(y) if(is.factor(y)) as.integer(attr(y, "levels"))[y] else y)), X) if(inherits(X, "data.table")) return(alc(res)) res } as_character_factor <- function(X, keep.attr = TRUE) { if(is.atomic(X)) if(keep.attr) return(ffka(X, tochar)) else return(as.character.factor(X)) res <- duplAttributes(lapply(unattrib(X), if(keep.attr) (function(y) if(is.factor(y)) ffka(y, tochar) else y) else (function(y) if(is.factor(y)) as.character.factor(y) else y)), X) if(inherits(X, "data.table")) return(alc(res)) res } # as.numeric_factor <- function(X, keep.attr = TRUE) { # .Deprecated(msg = "'as.numeric_factor' was renamed to 'as_numeric_factor'. It will be removed end of 2023, see help('collapse-renamed').") # as_numeric_factor(X, keep.attr) # } # # as.character_factor <- function(X, keep.attr = TRUE) { # .Deprecated(msg = "'as.character_factor' was renamed to 'as_character_factor'. It will be removed end of 2023, see help('collapse-renamed').") # as_character_factor(X, keep.attr) # } setRnDF <- function(df, nm) `attr<-`(df, "row.names", nm) # TtI <- function(x) # switch(x, replace_fill = 1L, replace = 2L, `-` = 3L, `-+` = 4L, `/` = 5L, `%` = 6L, `+` = 7L, `*` = 8L, `%%` = 9L, `-%%` = 10L, # stop("Unknown transformation!")) condsetn <- function(x, value, cond) { if(cond) attr(x, "names") <- value x } setnck <- function(x, value) { if(is.null(value)) return(x) ren <- nzchar(value) if(all(ren)) names(x) <- value else names(x)[ren] <- value[ren] x } do_stub <- function(stub, nam, default) { if(is.character(stub)) return(paste0(stub, nam)) if(isTRUE(stub)) paste0(default, nam) else nam } # give_nam <- function(x, gn, stub) { # if(!gn) return(x) # attr(x, "names") <- paste0(stub, attr(x, "names")) # x # } fmatch <- function(x, table, nomatch = NA_integer_, count = FALSE, overid = 1L) .Call(C_fmatch, x, table, nomatch, count, overid) ckmatch <- function(x, table, e = "Unknown columns:", ...) if(anyNA(m <- fmatch(x, table, NA_integer_, ...))) stop(paste(e, if(is.list(x)) paste(c("\n", capture.output(ss(x, is.na(m)))), collapse = "\n") else paste(x[is.na(m)], collapse = ", "))) else m "%fin%" <- function(x, table) as.logical(fmatch(x, table, 0L, overid = 2L)) # export through set_collapse(mask = "%in%") "%!in%" <- function(x, table) is.na(fmatch(x, table, overid = 2L)) "%!iin%" <- function(x, table) whichNA(fmatch(x, table, overid = 2L)) "%iin%" <- function(x, table) whichNA(fmatch(x, table, overid = 2L), invert = TRUE) # anyNAerror <- function(x, e) if(anyNA(x)) stop(e) else x cols2int <- function(cols, x, nam, topos = TRUE) { if(is.numeric(cols)) { if(length(cols) == 0L) return(integer(0L)) l <- length(unclass(x)) # length(nam) ? if(cols[1L] < 0L) { # This is sufficient to check negative indices: No R function allows subsetting mixing positive and negative indices. if(-bmin(cols) > l) stop("Index out of range abs(1:length(x))") if(topos) return(seq_len(l)[cols]) # cols <- seq_len(l)[cols] # if(!length(cols) || anyNA(cols)) stop("Index out of range abs(1:length(x))") -> used to put earlier check after if(topos) and use this one instead. But turns out that doesn't always work well. # return(cols) } else if(bmax(cols) > l) stop("Index out of range abs(1:length(x))") # if(bmax(abs(cols)) > length(unclass(x))) stop("Index out of range abs(1:length(x))") # Before collapse 1.4.0 ! return(as.integer(cols)) # as.integer is necessary (for C_subsetCols), and at very little cost.. } if(is.character(cols)) return(ckmatch(cols, nam)) if(is.function(cols)) return(which(vapply(unattrib(x), cols, TRUE))) if(is.logical(cols)) { if(length(cols) != length(unclass(x))) stop("Logical subsetting vector must match columns!") # length(nam) ? return(which(cols)) } stop("cols must be a function, character vector, numeric indices or logical vector!") } # Needed for fmutate cols2char <- function(cols, x, nam) { if(is.character(cols)) return(cols) if(!length(cols)) return("") # Needed if NULL is passed if(is.numeric(cols)) { l <- length(nam) if(cols[1L] < 0L) { if(-bmin(cols) > l) stop("Index out of range abs(1:length(x))") } else if(bmax(cols) > l) stop("Index out of range abs(1:length(x))") return(nam[cols]) } if(is.function(cols)) return(nam[vapply(unattrib(x), cols, TRUE)]) if(is.logical(cols)) { if(length(cols) != length(nam)) stop("Logical subsetting vector must match columns!") return(nam[cols]) } stop("cols must be a function, character vector, numeric indices or logical vector!") } # Not needed anymore !! # cols2log <- function(cols, x, nam) { # lx <- length(unclass(x)) # if(is.logical(cols)) if(length(cols) == lx) return(cols) else stop("Logical subsetting vector must match columns!") # if(is.function(cols)) return(vapply(unattrib(x), cols, TRUE)) # r <- logical(lx) # if(is.character(cols)) { # r[ckmatch(cols, nam)] <- TRUE # } else if(is.numeric(cols)) { # if(bmax(abs(cols)) > lx) stop("Index out of range abs(1:length(x))") # r[cols] <- TRUE # } else stop("cols must be a function, character vector, numeric indices or logical vector!") # r # } # Helper for operator functions... cols2intrmgn <- function(gn, cols, x) { if(is.function(cols)) { cols <- if(identical(cols, is.numeric)) .Call(C_vtypes, x, 1L) else vapply(unattrib(x), cols, TRUE) cols[gn] <- FALSE return(which(cols)) } if(is.null(cols)) return(seq_along(unclass(x))[-gn]) if(is.numeric(cols) && length(cols) && cols[1L] < 0) { res <- logical(length(unclass(x))) res[cols] <- TRUE res[gn] <- FALSE return(which(res)) } cols2int(cols, x, attr(x, "names"), FALSE) } colsubset <- function(x, ind, checksf = FALSE) { if(is.numeric(ind)) return(.Call(C_subsetCols, x, as.integer(ind), checksf)) if(is.logical(ind)) { nc <- length(unclass(x)) if(length(ind) != nc) stop("Logical subsetting vector must match length(x)") ind <- which(ind) if(length(ind) == nc) return(x) return(.Call(C_subsetCols, x, ind, checksf)) } ind <- if(is.character(ind)) ckmatch(ind, attr(x, "names")) else which(vapply(`attributes<-`(x, NULL), ind, TRUE)) return(.Call(C_subsetCols, x, ind, checksf)) } # Previously Fastest! even though it involves code duplication.. # colsubset <- function(x, ind) { # ax <- attributes(x) # if(is.numeric(ind)) { # attributes(x) <- NULL # note: attributes(x) <- NULL is very slightly faster than class(x) <- NULL # if(bmax(abs(ind)) > length(x)) stop("Index out of range abs(1:length(x))") # ax[["names"]] <- ax[["names"]][ind] # return(.Call(C_setAttributes, x[ind], ax)) # } # if(is.logical(ind)) { # attributes(x) <- NULL # if(length(ind) != length(x)) stop("Logical subsetting vector must match length(x)") # ax[["names"]] <- ax[["names"]][ind] # return(.Call(C_setAttributes, x[ind], ax)) # } # ind <- if(is.character(ind)) ckmatch(ind, ax[["names"]]) else vapply(`attributes<-`(x, NULL), ind, TRUE) # ax[["names"]] <- ax[["names"]][ind] # .Call(C_setAttributes, .subset(x, ind), ax) # } fcolsubset <- function(x, ind, checksf = FALSE) { # fastest ! .Call(C_subsetCols, x, if(is.logical(ind)) which(ind) else as.integer(ind), checksf) # Fastet! becore C version: # ax <- attributes(x) # ax[["names"]] <- ax[["names"]][ind] # .Call(C_setAttributes, .subset(x, ind), ax) } # Sorted out 1.5.3 -> 1.6.0: # Fastest because vapply runs faster on a list without any attributes ! # colsubsetFUN <- function(x, FUN) { # .Call(C_subsetCols, x, which(vapply(`attributes<-`(x, NULL), FUN, TRUE))) # # Fastet! becore C version: # # ax <- attributes(x) # # attributes(x) <- NULL # # ind <- vapply(x, FUN, TRUE) # # ax[["names"]] <- ax[["names"]][ind] # # .Call(C_setAttributes, x[ind], ax) # } rgrep <- function(exp, nam, ..., sort = TRUE) if(length(exp) == 1L) grep(exp, nam, ...) else funique.default(unlist(lapply(exp, grep, nam, ...), use.names = FALSE), sort) rgrepl <- function(exp, nam, ...) if(length(exp) == 1L) grepl(exp, nam, ...) else Reduce(`|`, lapply(exp, grepl, nam, ...)) fanyDuplicated <- function(x) if(length(x) < 100L) anyDuplicated.default(x) > 0L else .Call(C_fndistinct,x,NULL,FALSE,1L) != length(x) # NROW2 <- function(x, d) if(length(d)) d[1L] else length(x) # NCOL2 <- function(d, ilv) if(ilv) d[2L] else 1L issorted <- function(x, strictly = FALSE) .Call(C_issorted, x, strictly) charorNULL <- function(x) if(is.character(x)) x else NULL tochar <- function(x) if(is.character(x)) x else as.character(x) # if(is.object(x)) as.character(x) else .Call(C_aschar, x) # dotstostr <- function(...) { # args <- deparse(substitute(c(...))) # nc <- nchar(args) # substr(args, 2, nc) # 3, nc-1 for no brackets ! # } switch_msg <- function(msg, which = NULL) { if(is.null(which)) stop(msg, call. = FALSE) switch(which, error = stop(msg, call. = FALSE), message = message(msg), warning = warning(msg, call. = FALSE)) } unused_arg_action <- function(call, ...) { wo <- switch(getOption("collapse_unused_arg_action"), none = 0L, message = 1L, warning = 2L, error = 3L, stop("Unused argument encountered. Please instruct collapse what to do about unused arguments by setting options(collapse_unused_arg_action = 'warning'), or 'error', or 'message' or 'none'.")) if(wo != 0L) { args <- deparse(substitute(c(...))) nc <- nchar(args) args <- substr(args, 2, nc) # 3, nc-1 for no brackets ! msg <- paste("Unused argument", args, "passed to", as.character(call[[1L]])) switch(wo, message(msg), warning(msg), stop(msg)) } } is.nmfactor <- function(x) inherits(x, "factor") && (inherits(x, "na.included") || !anyNA(unclass(x))) addNA2 <- function(x) { if(!anyNA(unclass(x))) return(x) clx <- oldClass(x) oldClass(x) <- NULL if(!anyNA(lev <- attr(x, "levels"))) { attr(x, "levels") <- c(lev, NA_character_) .Call(C_setcopyv, x, NA_integer_, length(lev) + 1L, FALSE, TRUE, FALSE) # x[is.na(x)] <- length(lev) + 1L } else .Call(C_setcopyv, x, NA_integer_, length(lev), FALSE, TRUE, FALSE) # x[is.na(x)] <- length(lev) oldClass(x) <- clx x } # addNA2 <- function(x) { # clx <- c(class(x), "na.included") # if(!anyNA(unclass(x))) return(`oldClass<-`(x, clx)) # ll <- attr(x, "levels") # if(!anyNA(ll)) ll <- c(ll, NA) # return(`oldClass<-`(factor(x, levels = ll, exclude = NULL), clx)) # } l1orn <- function(x, nam) if(length(x) == 1L) x else nam l1orlst <- function(x) if(length(x) == 1L) x else x[length(x)] fsimplify2array <- function(l) { res <- do.call(cbind, l) # lapply(l, `dimnames<-`, NULL) # also faster than unlist.. dim(res) <- c(dim(l[[1L]]), length(l)) dimnames(res) <- c(if(length(dn <- dimnames(l[[1L]]))) dn else list(NULL, NULL), list(names(l))) res } # fss <- function(x, i, j) { # rn <- attr(x, "row.names") # if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") return(.Call(C_subsetDT, x, i, j)) # return(`attr<-`(.Call(C_subsetDT, x, i, j), "row.names", rn[r])) # } collapse/R/qsu.R0000644000176200001440000003253014777170130013222 0ustar liggesusers qsu <- function(x, ...) UseMethod("qsu") # , x qsu.default <- function(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = .op[["stable.algo"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(qsu.matrix(x, g, pid, w, higher, array, stable.algo, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) { if(is.null(pid)) return(fbstatsCpp(x,higher, w = w, stable.algo = stable.algo)) pid <- G_guo(pid) return(fbstatsCpp(x,higher,0L,0L,pid[[1L]],pid[[2L]],w,stable.algo)) } if(is.atomic(g)) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") if(is.null(pid)) return(fbstatsCpp(x,higher,length(lev),g,0L,0L,w,stable.algo,TRUE,TRUE,lev)) pid <- G_guo(pid) return(fbstatsCpp(x,higher,length(lev),g,pid[[1L]],pid[[2L]],w,stable.algo,array,TRUE,lev)) } if(!is_GRP(g)) g <- GRP.default(g, call = FALSE) if(is.null(pid)) return(fbstatsCpp(x,higher,g[[1L]],g[[2L]],0L,0L,w,stable.algo,TRUE,TRUE,GRPnames(g))) pid <- G_guo(pid) fbstatsCpp(x,higher,g[[1L]],g[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,TRUE,GRPnames(g)) } qsu.pseries <- function(x, g = NULL, w = NULL, effect = 1L, higher = FALSE, array = TRUE, stable.algo = .op[["stable.algo"]], ...) { if(!missing(...)) unused_arg_action(match.call(), ...) pid <- group_effect(x, effect) if(is.null(g)) return(fbstatsCpp(x,higher,0L,0L,fnlevels(pid),pid,w,stable.algo)) if(is.atomic(g)) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(fbstatsCpp(x,higher,length(lev),g,fnlevels(pid),pid,w,stable.algo,array,TRUE,lev)) } if(!is_GRP(g)) g <- GRP.default(g, call = FALSE) fbstatsCpp(x,higher,g[[1L]],g[[2L]],fnlevels(pid),pid,w,stable.algo,array,TRUE,GRPnames(g)) } qsu.matrix <- function(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = .op[["stable.algo"]], ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) { if(is.null(pid)) return(fbstatsmCpp(x,higher, w = w, stable.algo = stable.algo)) pid <- G_guo(pid) return(fbstatsmCpp(x,higher,0L,0L,pid[[1L]],pid[[2L]],w,stable.algo,array)) } if(is.atomic(g)) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") if(is.null(pid)) return(fbstatsmCpp(x,higher,length(lev),g,0L,0L,w,stable.algo,array,lev)) pid <- G_guo(pid) return(fbstatsmCpp(x,higher,length(lev),g,pid[[1L]],pid[[2L]],w,stable.algo,array,lev)) } if(!is_GRP(g)) g <- GRP.default(g, call = FALSE) if(is.null(pid)) return(fbstatsmCpp(x,higher,g[[1L]],g[[2L]],0L,0L,w,stable.algo,array,GRPnames(g))) pid <- G_guo(pid) fbstatsmCpp(x,higher,g[[1L]],g[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,GRPnames(g)) } qsu.zoo <- function(x, ...) if(is.matrix(x)) qsu.matrix(x, ...) else qsu.default(x, ...) qsu.units <- qsu.zoo qsu.data.frame <- function(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], ...) { if(!missing(...)) { dots <- list(...) if(length(dots$vlabels)) labels <- dots$vlabels if(length(dots) > 1L || !length(dots$vlabels)) unused_arg_action(match.call(), ...) } formby <- is.call(by) formpid <- is.call(pid) formw <- is.call(w) # fastest solution!! (see checks below !!) if(formby || formpid || formw) { v <- NULL class(x) <- NULL nam <- names(x) if(formby) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) byn <- ckmatch(all.vars(by[[3L]]), nam) } else byn <- ckmatch(all.vars(by), nam) by <- if(length(byn) == 1L) x[[byn]] else GRP.default(x[byn], call = FALSE) } else byn <- NULL if(formpid) { if(length(pid) == 3L) { v <- ckmatch(all.vars(pid[[2L]]), nam) pidn <- ckmatch(all.vars(pid[[3L]]), nam) } else pidn <- ckmatch(all.vars(pid), nam) pid <- if(length(pidn) == 1L) x[[pidn]] else GRP.default(x[pidn], return.groups = FALSE, call = FALSE) } else pidn <- NULL if(formw) { widn <- ckmatch(all.vars(w), nam) w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[widn]] } else widn <- NULL if(is.null(v)) { x <- if(is.null(cols)) x[-c(byn, pidn, widn)] else x[cols2int(cols, x, nam, FALSE)] } else x <- x[v] } else if(length(cols)) x <- .subset(x, cols2int(cols, x, attr(x, "names"), FALSE)) # Get labels if(is.function(labels) || labels) attr(x, "names") <- if(is.function(labels)) labels(x) else paste(attr(x, "names"), setv(vlabels(x, use.names = FALSE), NA, ""), sep = ": ") # original code: if(is.null(by)) { if(is.null(pid)) return(fbstatslCpp(x,higher, w = w, stable.algo = stable.algo)) pid <- G_guo(pid) return(drop(fbstatslCpp(x,higher,0L,0L,pid[[1L]],pid[[2L]],w,stable.algo,array))) } if(is.atomic(by)) { if(!is.nmfactor(by)) by <- qF(by, na.exclude = FALSE) lev <- attr(by, "levels") if(is.null(pid)) return(drop(fbstatslCpp(x,higher,length(lev),by,0L,0L,w,stable.algo,array,lev))) pid <- G_guo(pid) return(drop(fbstatslCpp(x,higher,length(lev),by,pid[[1L]],pid[[2L]],w,stable.algo,array,lev))) } if(!is_GRP(by)) by <- GRP.default(by, call = FALSE) if(is.null(pid)) return(drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],0L,0L,w,stable.algo,array,GRPnames(by)))) pid <- G_guo(pid) drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,GRPnames(by))) } qsu.list <- function(x, ...) qsu.data.frame(x, ...) qsu.sf <- function(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], ...) { oldClass(x) <- NULL x[[attr(x, "sf_column")]] <- NULL qsu.data.frame(x, by, pid, w, cols, higher, array, labels, stable.algo, ...) } qsu.grouped_df <- function(x, pid = NULL, w = NULL, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], ...) { if(!missing(...)) { dots <- list(...) if(length(dots$vlabels)) labels <- dots$vlabels if(length(dots) > 1L || !length(dots$vlabels)) unused_arg_action(match.call(), ...) } wsym <- substitute(w) pidsym <- substitute(pid) by <- GRP.grouped_df(x, call = FALSE) is_sf <- inherits(x, "sf") class(x) <- NULL if(is_sf) x[[attr(x, "sf_column")]] <- NULL # Getting group indices byn <- which(names(x) %in% by[[5L]]) if(!is.null(pidsym)) { pid <- eval(pidsym, x, parent.frame()) # This allows pid to be a function of multiple variables if(length(pidn <- which(names(x) %in% all.vars(pidsym)))) { if(any(byn %in% pidn)) stop("Panel-ids coincide with grouping variables!") byn <- c(byn, pidn) } } # Processing weights and combining indices with group indices if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) # This allows w to be a function of multiple variables if(length(wn <- which(names(x) %in% all.vars(wsym)))) { if(any(byn %in% wn)) stop("Weights coincide with grouping variables!") byn <- c(byn, wn) } } if(length(byn)) x <- x[-byn] # Subsetting x # Get labels if(is.function(labels) || labels) names(x) <- if(is.function(labels)) labels(x) else paste(names(x), setv(vlabels(x, use.names = FALSE), NA, ""), sep = ": ") if(is.null(pid)) return(drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],0L,0L,w,stable.algo,array,GRPnames(by)))) pid <- G_guo(pid) drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],pid[[1L]],pid[[2L]],w,stable.algo,array,GRPnames(by))) } qsu.pdata.frame <- function(x, by = NULL, w = NULL, cols = NULL, effect = 1L, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], ...) { if(!missing(...)) { dots <- list(...) if(length(dots$vlabels)) labels <- dots$vlabels if(length(dots) > 1L || !length(dots$vlabels)) unused_arg_action(match.call(), ...) } pid <- group_effect(x, effect) x <- unindex(x) formby <- is.call(by) formw <- is.call(w) # fastest solution if(formby || formw) { v <- NULL class(x) <- NULL nam <- names(x) if(formby) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) byn <- ckmatch(all.vars(by[[3L]]), nam) } else byn <- ckmatch(all.vars(by), nam) by <- if(length(byn) == 1L) x[[byn]] else GRP.default(x[byn]) } else byn <- NULL if(formw) { widn <- ckmatch(all.vars(w), nam) w <- eval(w[[2L]], x, attr(w, ".Environment")) # w <- x[[widn]] } else widn <- NULL if(is.null(v)) { x <- if(is.null(cols)) x[-c(byn, widn)] else x[cols2int(cols, x, nam, FALSE)] } else x <- x[v] } else if(length(cols)) x <- .subset(x, cols2int(cols, x, attr(x, "names"), FALSE)) if(is.function(labels) || labels) attr(x, "names") <- if(is.function(labels)) labels(x) else paste(attr(x, "names"), setv(vlabels(x, use.names = FALSE), NA, ""), sep = ": ") if(is.null(by)) return(drop(fbstatslCpp(x,higher,0L,0L,fnlevels(pid),pid,w,stable.algo,array))) if(is.atomic(by)) { if(!is.nmfactor(by)) by <- qF(by, na.exclude = FALSE) lev <- attr(by, "levels") return(drop(fbstatslCpp(x,higher,length(lev),by,fnlevels(pid),pid,w,stable.algo,array,lev))) } if(!is_GRP(by)) by <- GRP.default(by, call = FALSE) drop(fbstatslCpp(x,higher,by[[1L]],by[[2L]],fnlevels(pid),pid,w,stable.algo,array,GRPnames(by))) } # Try to speed up ! Printing Takes 100 milliseconds on WDI ! print.qsu <- function(x, digits = .op[["digits"]] + 2L, nonsci.digits = 9, na.print = "-", return = FALSE, print.gap = 2, ...) { vec2mat <- function(x) if(is.array(x)) x else # outer(1, x) # for variable spacing in vector printing... `attributes<-`(x, list(dim = c(1L, length(x)), dimnames = list("", names(x)))) # faster and better !! formatfun <- function(x) { # , drop0trailing = FALSE redundat ?? class(x) <- NULL xx <- formatC(vec2mat(round(x, digits)), format = "g", flag = "#", digits = nonsci.digits, big.mark = "'", big.interval = 6, # "\u2009": https://stackoverflow.com/questions/30555232/using-a-half-space-as-a-big-mark-for-knitr-output drop0trailing = TRUE, preserve.width = "individual") # format(unclass(round(x,2)), digits = digits, drop0trailing = TRUE, big.mark = ",", big.interval = 6, scientific = FALSE) if(any(ina <- is.na(x))) xx[ina] <- na.print xx <- gsub(" ", "", xx, fixed = TRUE) # remove some weird white space (qsu(GGDS10S)) return(xx) } xx <- if(is.atomic(x)) formatfun(x) else rapply(x, formatfun, how = "list") # No longer necessary, but keep, maybe you want to print lists using print.qsu. if(return) return(xx) else print.default(xx, quote = FALSE, right = TRUE, print.gap = print.gap, ...) invisible(x) } # View.qsu <- function(x) View(unclass(x)) aperm.qsu <- function(a, perm = NULL, resize = TRUE, keep.class = TRUE, ...) { r <- aperm.default(a, perm, resize = resize) if(keep.class) oldClass(r) <- oldClass(a) r } `[.qsu` <- function(x, i, j, ..., drop = TRUE) `oldClass<-`(NextMethod(), oldClass(x)) as.data.frame.qsu <- function(x, ..., gid = "Group", stringsAsFactors = TRUE) { d <- dim(x) dn <- dimnames(x) stnam <- dn[[2L]] if(is.null(d)) { res <- as.vector(x, "list") attr(res, "row.names") <- 1L # res <- list(Statistic = names(x), Value = unattrib(x)) # attr(res, "row.names") <- .set_row_names(length(x)) } else if(length(d) == 2L) { varl <- if(stringsAsFactors) list(`attributes<-`(seq_len(d[1L]), list(levels = dn[[1L]], class = c("factor", "na.included")))) else dn[1L] res <- c(varl, mctl(x)) names(res) <- c(if(stnam[1L] == "N") "Variable" else "Trans", stnam) attr(res, "row.names") <- .set_row_names(d[1L]) } else if(length(d) == 3L) { dimnames(x) <- NULL # Special case: qsu(wlddev, PCGDP ~ region, ~ iso3c) if(d[3L] == 3L && dn[[3L]][1L] == "Overall") { res <- aperm.default(x, c(3L,1L,2L)) d[c(1L, 3L)] <- d[c(3L, 1L)] dn[c(1L, 3L)] <- dn[c(3L, 1L)] vn <- gid } else { vn <- "Variable" res <- aperm.default(x, c(1L,3L,2L)) } attributes(res) <- NULL dim(res) <- c(d[1L]*d[3L], d[2L]) varsl <- if(stringsAsFactors) list(`attributes<-`(rep(seq_len(d[3L]), each = d[1L]), list(levels = dn[[3L]], class = c("factor", "na.included"))), `attributes<-`(rep(seq_len(d[1L]), d[3L]), list(levels = dn[[1L]], class = c("factor", "na.included")))) else list(rep(dn[[3L]], each = d[1L]), rep(dn[[1L]], d[3L])) res <- c(varsl, mctl(res)) names(res) <- c(vn, if(stnam[1L] == "N") gid else "Trans", stnam) attr(res, "row.names") <- .set_row_names(d[1L]*d[3L]) } else { dimnames(x) <- NULL res <- aperm.default(x, c(3L,1L,4L,2L)) attributes(res) <- NULL nr <- d[1L]*3L*d[4L] dim(res) <- c(nr, d[2L]) varsl <- if(stringsAsFactors) list(`attributes<-`(rep(seq_len(d[4L]), each = 3L*d[1L]), list(levels = dn[[4L]], class = c("factor", "na.included"))), `attributes<-`(rep(seq_len(d[1L]), d[4L], each = 3L), list(levels = dn[[1L]], class = c("factor", "na.included"))), `attributes<-`(rep(seq_len(d[3L]), d[1L]*d[4L]), list(levels = dn[[3L]], class = c("factor", "na.included")))) else list(rep(dn[[4L]], each = 3L*d[1L]), rep(dn[[1L]], d[4L], each = 3L), rep(dn[[3L]], d[1L]*d[4L])) res <- c(varsl, mctl(res)) names(res) <- c("Variable", gid, "Trans", stnam) attr(res, "row.names") <- .set_row_names(nr) } class(res) <- "data.frame" res } collapse/R/fcount.R0000644000176200001440000000717615202504365013713 0ustar liggesusers # TODO: keep argument? -> not needed, can use fselect beforehand... fcount_core <- function(x, g, w = NULL, name = "N", add = FALSE) { # TODO: don't need integer group sizes if this is the case.... if(length(w)) g$group.sizes <- .Call(C_fwtabulate, g$group.id, w, g$N.groups, FALSE) # na.rm in g is not needed (FALSE) # if(is.atomic(x)) { # what about factors and sort argument?? and dropping levels?? # if(add) { # res <- list(x, .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE)) # names(res) <- c(g$group.vars, name[1L]) # } else { # res <- g$groups # res[[name[1L]]] <- g$group.sizes # } # attr(res, "row.names") <- .set_row_names(.Call(C_fnrow, res)) # oldClass(res) <- "data.frame" # return(res) # } if(add) { gs <- .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE) # return(`add_vars<-`(x, "end", `names<-`(list(gs), name[1L]))) if(add == 2L) { x <- # if(inherits(x, "grouped_df")) fgroup_vars(x) else # Better keep groups, does no harm... can use fungroup() .Call(C_subsetCols, x, ckmatch(g$group.vars, attr(x, "names")), TRUE) } res <- c(x, `names<-`(list(gs), name[1L])) return(condalc(copyMostAttributes(res, x), inherits(x, "data.table"))) } res <- g$groups if(!is.object(res) && is.object(x)) { # inherits(x, c("grouped_df", "indexed_frame")) res[[name[1L]]] <- g$group.sizes return(condCopyAttrib(res, x)) } condalc(copyMostAttributes(c(res, `names<-`(list(g$group.sizes), name[1L])), res), inherits(x, "data.table")) } fcount <- function(x, ..., w = NULL, name = "N", add = FALSE, sort = FALSE, decreasing = FALSE, drop = TRUE) { if(is.list(x)) w <- eval(substitute(w), x, parent.frame()) else x <- qDF(x) if(is.character(add)) add <- switch(add, gv =, group_vars = 2L, stop("add must be TRUE, FALSE or group_vars (gv)")) # add = "g", "groups" or "group_vars" # Note: this code duplication with GRP() is needed for GRP() to capture x (using substitute) if x is atomic. # if(is.atomic(x)) `names<-`(list(x), l1orlst(as.character(substitute(x)))) else g <- if(missing(...)) { if(inherits(x, "grouped_df")) GRP(x, sort = sort, decreasing = decreasing, return.groups = !add, return.order = FALSE, call = FALSE) else GRP.default(x, sort = sort, decreasing = decreasing, return.groups = !add, return.order = FALSE, drop = drop, call = FALSE) } else GRP.default(fselect(x, ...), sort = sort, decreasing = decreasing, return.groups = !add, return.order = FALSE, drop = drop, call = FALSE) fcount_core(x, g, w, name, add) } fcountv <- function(x, cols = NULL, w = NULL, name = "N", add = FALSE, sort = FALSE, drop = TRUE, ...) { # Safe enough ? or only allow character ? what about collapv() ?, extra option ? # if(length(w) == 1L && is.list(x) && length(unclass(x)) > 1L && (is.character(w) || is.integer(w) || (is.numeric(w) && w %% 1 < 1e-6))) if(is.atomic(x)) x <- qDF(x) if(length(w) == 1L && is.character(w)) { w <- .subset2(x, w) # Problem: if w is wrong character: NULL if(is.null(w)) stop("Unknown column: ", w) } if(is.character(add)) add <- switch(add, gv =, group_vars = 2L, stop("add must be TRUE, FALSE or group_vars (gv)")) # add = "g", "groups" or "group_vars" g <- if(is.null(cols)) { if(inherits(x, "grouped_df")) GRP(x, sort = sort, return.groups = !add, return.order = FALSE, call = FALSE, ...) else GRP.default(x, sort = sort, return.groups = !add, return.order = FALSE, drop = drop, call = FALSE, ...) } else GRP.default(colsubset(x, cols), sort = sort, return.groups = !add, return.order = FALSE, drop = drop, call = FALSE, ...) fcount_core(x, g, w, name, add) } collapse/R/TRA.R0000644000176200001440000000653314777170130013044 0ustar liggesusers TRA <- function(x, STATS, FUN = "-", ...) UseMethod("TRA") # , x setTRA <- function(x, STATS, FUN = "-", ...) invisible(TRA(x, STATS, FUN, ..., set = TRUE)) TRA.default <- function(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(TRA.matrix(x, STATS, FUN, g, set, ...)) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_TRA,x,STATS,0L,FUN,set)) if(is.atomic(g)) { if(is.nmfactor(g)) { if(fnlevels(g) != length(STATS)) stop("number of groups must match length(STATS)") } else { g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !! if(attr(g, "N.groups") != length(STATS)) stop("number of groups must match length(STATS)") } return(.Call(C_TRA,x,STATS,g,FUN,set)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) if(g[[1L]] != length(STATS)) stop("number of groups must match length(STATS)") .Call(C_TRA,x,STATS,g[[2L]],FUN,set) } TRA.matrix <- function(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_TRAm,x,STATS,0L,FUN,set)) if(is.atomic(g)) { if(is.nmfactor(g)) { if(fnlevels(g) != nrow(STATS)) stop("number of groups must match nrow(STATS)") } else { g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !! if(attr(g, "N.groups") != nrow(STATS)) stop("number of groups must match nrow(STATS)") } return(.Call(C_TRAm,x,STATS,g,FUN,set)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) if(g[[1L]] != nrow(STATS)) stop("number of groups must match nrow(STATS)") .Call(C_TRAm,x,STATS,g[[2L]],FUN,set) } TRA.data.frame <- function(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_TRAl,x,STATS,0L,FUN,set)) if(is.atomic(g)) { if(is.nmfactor(g)) { if(fnlevels(g) != fnrow(STATS)) stop("number of groups must match nrow(STATS)") } else { g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !! if(attr(g, "N.groups") != fnrow(STATS)) stop("number of groups must match nrow(STATS)") } return(.Call(C_TRAl,x,STATS,g,FUN,set)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE) if(g[[1L]] != fnrow(STATS)) stop("number of groups must match nrow(STATS)") .Call(C_TRAl,x,STATS,g[[2L]],FUN,set) } TRA.list <- function(x, ...) TRA.data.frame(x, ...) TRA.grouped_df <- function(x, STATS, FUN = "-", keep.group_vars = TRUE, set = FALSE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) clx <- oldClass(x) oldClass(x) <- NULL oldClass(STATS) <- NULL if(g[[1L]] != length(STATS[[1L]])) stop("number of groups must match nrow(STATS)") nognst <- names(STATS) %!in% g[[5L]] mt <- ckmatch(names(STATS), names(x), "Variables in STATS not found in x:") mt <- mt[nognst] x[mt] <- .Call(C_TRAl,x[mt],STATS[nognst],g[[2L]],FUN,set) if(!keep.group_vars) x[names(x) %in% g[[5L]]] <- NULL oldClass(x) <- clx x } TRA.zoo <- function(x, STATS, FUN = "-", ...) if(is.matrix(x)) TRA.matrix(x, STATS, FUN, ...) else TRA.default(x, STATS, FUN, ...) TRA.units <- TRA.zoo collapse/R/join.R0000644000176200001440000003403415056572047013356 0ustar liggesusers################################ # Implementation of Table Joins ################################ sort_merge_join <- function(x_sorted, table, count = FALSE) { ot <- radixorderv(table, decreasing = FALSE, na.last = TRUE) .Call(C_sort_merge_join, x_sorted, table, ot, count) } multi_match <- function(m, g) .Call(C_multi_match, m, g) # Modeled after Pandas/Polars: # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html # https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.join.html join <- function(x, y, on = NULL, # union(names(x), names(y)), how = "left", suffix = NULL, # c("_x", "_y") validate = "m:m", # NULL, multiple = FALSE, sort = FALSE, keep.col.order = TRUE, drop.dup.cols = FALSE, verbose = .op[["verbose"]], require = NULL, # E.g. require = list(x = 0.9, y = 0.8, on.fail = "error") column = NULL, attr = NULL, ...) { # method = c("hash", "radix") -> implicit to sort... # Initial checks if(!is.list(x)) stop("x must be a list") if(!is.list(y)) stop("y must be a list") # Get names and attributes ax <- attributes(x) x_name <- as.character(substitute(x)) if(length(x_name) != 1L || x_name == ".") x_name <- "x" # Piped use y_name <- as.character(substitute(y)) if(length(y_name) != 1L || y_name == ".") y_name <- "y" # Piped use oldClass(x) <- NULL oldClass(y) <- NULL xnam <- names(x) ynam <- names(y) how <- switch(how, l = "left", r = "right", i = "inner", f = "full", s = "semi", a = "anti", how) # Get join columns if(is.null(on)) { xon <- on <- xnam[xnam %in% ynam] if(length(on) == 0L) stop("No matching column names between x and y, please specify columns to join 'on'.") if(anyDuplicated.default(on) > 0L) stop("Duplicated join columns: ", paste(on[fduplicated(on)], collapse = ", "), ". Please supply 'on' columns and ensure that each data frame has unique column names.") ixon <- match(on, xnam) iyon <- match(on, ynam) } else { if(!is.character(on)) stop("need to provide character 'on'") xon <- names(on) if(is.null(xon)) xon <- on else if(any(miss <- !nzchar(xon))) xon[miss] <- on[miss] ixon <- ckmatch(xon, xnam, "Unknown x columns:") iyon <- ckmatch(on, ynam, "Unknown y columns:") } # Matching step rjoin <- switch(how, right = TRUE, FALSE) count <- verbose || validate != "m:m" || length(attr) || length(require) if(sort) { if(rjoin) { y <- roworderv(y, cols = iyon, decreasing = FALSE, na.last = TRUE) m <- sort_merge_join(y[iyon], x[ixon], count = count) } else { x <- roworderv(x, cols = ixon, decreasing = FALSE, na.last = TRUE) m <- sort_merge_join(x[ixon], y[iyon], count = count) if(how == "left" && length(ax[["row.names"]])) ax[["row.names"]] <- attr(x, "row.names") } } else { m <- if(rjoin) fmatch(y[iyon], x[ixon], nomatch = NA_integer_, count = count, ...) else fmatch(x[ixon], y[iyon], nomatch = NA_integer_, count = count, ...) } # TODO: validate full join... switch(validate, "m:m" = TRUE, "1:1" = { c1 <- attr(m, "N.distinct") != length(m) - attr(m, "N.nomatch") c2 <- attr(m, "N.groups") != attr(m, "N.distinct") && any_duplicated(if(rjoin) x[ixon] else y[iyon]) if(rjoin) { tmp <- c2 c2 <- c1 c1 <- tmp } if(c1 || c2) stop("Join is not 1:1: ", x_name, " (x) is ", if(c1) "not " else "", "unique on the join columns; ", y_name, " (y) is ", if(c2) "not " else "", "unique on the join columns") }, "1:m" = { cond <- if(rjoin) attr(m, "N.groups") != attr(m, "N.distinct") && any_duplicated(x[ixon]) else attr(m, "N.distinct") != length(m) - attr(m, "N.nomatch") if(cond) stop("Join is not 1:m: ", x_name, " (x) is not unique on the join columns") }, "m:1" = { cond <- if(rjoin) attr(m, "N.distinct") != length(m) - attr(m, "N.nomatch") else attr(m, "N.groups") != attr(m, "N.distinct") && any_duplicated(y[iyon]) if(cond) stop("Join is not m:1: ", y_name, " (y) is not unique on the join columns") }, stop("validate must be one of '1:1', '1:m', 'm:1' or 'm:m'") ) if(multiple) { g <- groupv(if(rjoin) x[ixon] else y[iyon], group.sizes = TRUE) mi <- m m <- multi_match(m, g) if(is.list(m)) { multiple <- 2L # TODO: Optimize if drop.dup.cols if(rjoin) y <- .Call(C_subsetDT, y, m[[1L]], seq_along(y), FALSE) else x <- .Call(C_subsetDT, x, m[[1L]], seq_along(x), FALSE) m <- m[[2L]] if(how == "left" && length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(length(m)) } } if(verbose || length(require)) { Nx <- if(rjoin) attr(m, "N.groups") else length(if(multiple) mi else m) Ny <- if(rjoin) length(if(multiple) mi else m) else attr(m, "N.groups") nx <- if(rjoin) attr(m, "N.distinct") else Nx - attr(m, "N.nomatch") ny <- if(rjoin) Ny - attr(m, "N.nomatch") else attr(m, "N.distinct") if(length(require)) { if(length(require$x) && require$x > nx/Nx) { msg <- sprintf("Matched %#.1f%% of records in table %s (x), but %#.1f%% is required", nx/Nx*100, x_name, require$x*100) switch_msg(msg, require$fail) } if(length(require$y) && require$y > ny/Ny) { msg <- sprintf("Matched %#.1f%% of records in table %s (y), but %#.1f%% is required", ny/Ny*100, y_name, require$y*100) switch_msg(msg, require$fail) } } if(verbose) { cin_x <- if(verbose == 2L) paste0(xon, ":", vclasses(x[ixon], FALSE)) else xon cin_y <- if(verbose == 2L) paste0(on, ":", vclasses(y[iyon], FALSE)) else on xstat <- paste0(nx, "/", Nx, " (", signif(nx/Nx*100, 3), "%)") ystat <- paste0(ny, "/", Ny, " (", signif(ny/Ny*100, 3), "%)") if(multiple) { validate <- switch(validate, "1:1" = "1:1", "1:m" = paste0("1:", round(ny / attr(mi, "N.distinct"), 2)), "m:1" = paste0(round(nx / attr(mi, "N.distinct"), 2), ":1"), "m:m" = paste(round(c(nx, ny) / attr(mi, "N.distinct"), 2), collapse = ":")) } else { validate <- switch(validate, "1:1" = "1:1", "1:m" = paste0("1:", if(rjoin) round(ny / nx, 2) else "1st"), "m:1" = paste0(if(rjoin) "1st" else round(nx / ny, 2), ":1"), "m:m" = if(rjoin) paste0("1st:", round(ny / nx, 2)) else paste0(round(nx / ny, 2), ":1st")) } cat(how, " join: ", x_name, "[", paste(cin_x, collapse = ", "), "] ", xstat, " <", validate , "> ", y_name, "[", paste(cin_y, collapse = ", "), "] ", ystat, "\n", sep = "") } } # Check for duplicate columns and suffix as needed if(any(nm <- match(ynam[-iyon], xnam, nomatch = 0L)) && switch(how, semi = FALSE, anti = FALSE, TRUE)) { nnm <- nm != 0L nam <- xnam[nm[nnm]] if(is.character(drop.dup.cols) || drop.dup.cols) { switch(drop.dup.cols, y = { rmyi <- logical(length(ynam)) rmyi[-iyon][nnm] <- TRUE y[rmyi] <- NULL ynam <- names(y) tmp <- rmyi tmp[iyon] <- TRUE iyon <- which(tmp[!rmyi]) if(verbose) cat("duplicate columns: ", paste(nam, collapse = ", "), " => dropped from y\n", sep = "") }, x = { x[nm[nnm]] <- NULL tmp <- logical(length(xnam)) xnam <- names(x) tmp[ixon] <- TRUE ixon <- which(tmp[-nm[nnm]]) if(verbose) cat("duplicate columns: ", paste(nam, collapse = ", "), " => dropped from x\n", sep = "") }, stop("drop.dup.cols needs to be 'y', 'x', or TRUE") ) } else { if(length(suffix) <= 1L) { # Only appends y with name if(is.null(suffix)) suffix <- paste0("_", y_name) names(y)[-iyon][nnm] <- paste0(nam, suffix) } else { names(x)[nm[nnm]] <- paste0(nam, suffix[[1L]]) # if(suffix[[1L]] != "") ?? names(y)[-iyon][nnm] <- paste0(nam, suffix[[2L]]) } if(verbose) cat("duplicate columns: ", paste(nam, collapse = ", "), " => renamed using suffix ", if(length(suffix) == 1L) paste0("'", suffix, "' for y") else paste0("'", suffix[[1L]], "' for x and '", suffix[[2L]], "' for y"), "\n", sep = "") } } # Core: do the joins res <- switch(how, left = { y_res <- if(identical(unattrib(m), seq_row(y))) y[-iyon] else .Call(C_subsetDT, y, m, seq_along(y)[-iyon], if(count) attr(m, "N.nomatch") else TRUE) c(x, y_res) }, inner = { anyna <- if(count) attr(m, "N.nomatch") > 0L else anyNA(m) if(anyna) { x_ind <- whichNA(m, invert = TRUE) x <- .Call(C_subsetDT, x, x_ind, seq_along(x), FALSE) m <- na_rm(m) # rn <- ax[["row.names"]] # TODO: Works inside switch?? # if(length(rn)) ax[["row.names"]] <- if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") # .set_row_names(length(x_ind)) else Csv(rn, x_ind) } y_res <- if(identical(unattrib(m), seq_row(y))) y[-iyon] else .Call(C_subsetDT, y, m, seq_along(y)[-iyon], FALSE) c(x, y_res) }, full = { cond <- !count || attr(m, "N.distinct") != attr(m, "N.groups") if(cond) { um <- if(!count || length(m)-attr(m, "N.distinct")-attr(m, "N.nomatch") != 0L) .Call(C_funique, m) else m # This gets the rows of table matched if(!count || attr(m, "N.nomatch")) um <- na_rm(um) if(count) tsize <- attr(m, "N.groups") else { tsize <- fnrow(y) cond <- length(um) != tsize } } if(cond) { # TODO: special case ? 1 distinct value etc.?? tind <- if(length(um)) seq_len(tsize)[-um] else seq_len(tsize) # TODO: Table may not be unique. res_nrow <- length(m) + length(tind) x_res <- .Call(C_subsetDT, x, seq_len(res_nrow), seq_along(x)[-ixon], TRUE) # Need check here because oversize indices !! y_res <- .Call(C_subsetDT, y, vec(list(m, tind)), seq_along(y)[-iyon], TRUE) # Need check here because oversize indices !! on_res <- .Call(C_rbindlist, list(x[ixon], .Call(C_subsetDT, y, tind, iyon, FALSE)), FALSE, FALSE, NULL) # if(length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(res_nrow) if(keep.col.order) { if(length(x_res)) add_vars(x_res, pos = ixon) <- on_res else x_res <- on_res c(x_res, y_res) } else { keep.col.order <- 2L # has global effects !! c(on_res, x_res, y_res) } } else { # If all elements of table are matched, this is simply a left join how <- if(multiple == 2L) "left_setrn" else "left" y_res <- if(identical(unattrib(m), seq_row(y))) y[-iyon] else .Call(C_subsetDT, y, m, seq_along(y)[-iyon], if(count) attr(m, "N.nomatch") else TRUE) # anyNA(um) ?? c(x, y_res) } }, right = { x_res <- if(identical(unattrib(m), seq_row(x))) x[-ixon] else .Call(C_subsetDT, x, m, seq_along(x)[-ixon], if(count) attr(m, "N.nomatch") else TRUE) # if(length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(length(m)) y_on <- y[iyon] names(y_on) <- xon if(keep.col.order) { if(length(x_res)) add_vars(x_res, pos = ixon) <- y_on else x_res <- y_on c(x_res, y[-iyon]) } else { keep.col.order <- 2L # has global effects !! c(y_on, x_res, y[-iyon]) } }, semi = { # = return rows in x that have matching values in y anyna <- if(count) attr(m, "N.nomatch") > 0L else anyNA(m) if(anyna) { x_ind <- whichNA(m, invert = TRUE) # rn <- ax[["row.names"]] # TODO: Works inside switch?? # if(length(rn)) ax[["row.names"]] <- if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") # .set_row_names(x_ind) else Csv(rn, x_ind) .Call(C_subsetDT, x, x_ind, seq_along(x), FALSE) } else x }, # = return rows in x that have no matching values in y anti = .Call(C_subsetDT, x, whichNA(m), seq_along(x), FALSE), stop("Unknown join method: ", how) ) # Join column and reordering if(length(column)) { if(is.list(column)) { lev <- column[[2L]] column <- column[[1L]] x_name <- lev[[1L]] y_name <- lev[[2L]] matched <- lev[[3L]] } else matched <- "matched" # TODO: better? # matched <- paste0(y_name, "_", y_name) mc <- switch(how, left_setrn =, left = structure(is.na(m) + 1L, levels = c(matched, x_name), class = c("factor", "na.included")), right = structure(is.na(m) + 1L, levels = c(matched, y_name), class = c("factor", "na.included")), full = structure(vec(list(is.na(m) + 1L, alloc(3L, fnrow(res)-length(m)))), levels = c(matched, x_name, y_name), class = c("factor", "na.included")), inner =, semi = structure(alloc(1L, fnrow(res)), levels = matched, class = c("factor", "na.included")), anti = structure(alloc(1L, fnrow(res)), levels = x_name, class = c("factor", "na.included"))) attr(mc, "on.cols") <- `names<-`(list(xon, `names<-`(on, NULL)), c(x_name, y_name)) mc_name <- if(is.character(column)) column else ".join" if(keep.col.order == 1L) res[[mc_name]] <- mc else { if(keep.col.order == 2L) ixon <- seq_along(ixon) res <- c(res[ixon], `names<-`(list(mc), mc_name), res[-ixon]) } } else if(!keep.col.order) res <- c(res[ixon], res[-ixon]) # Final steps if(length(attr)) ax[[if(is.character(attr)) attr else "join.match"]] <- list(call = match.call(), on.cols = list(x = xon, y = `names<-`(on, NULL)), match = m) # TODO: sort merge join also report o? if(sort && how == "full") res <- roworderv(res, cols = xon) if(how != "left" && length(ax[["row.names"]])) ax[["row.names"]] <- .set_row_names(fnrow(res)) ax[["names"]] <- names(res) .Call(C_setattributes, res, ax) if(any(ax$class == "data.table")) return(alc(res)) return(res) } collapse/R/list_functions.R0000644000176200001440000004606414777170130015464 0ustar liggesusersrapply2d <- function(l, FUN, ..., classes = "data.frame") { aply2d <- function(y) if(is.list(y) && !inherits(y, classes)) lapply(y, aply2d) else FUN(y, ...) # is.null(dim(y)) # qsu output shows list of DF can have dim attr. aply2d(l) # lapply(x,aply2d) # if this is enabled, rapply2d takes apart data.frame if passed } get_elem_indl <- function(x, indl, return = "sublist", keep_class = FALSE) switch(return, sublist = if(keep_class) fcolsubset(x, indl) else .subset(x, indl), names = attr(x, "names")[indl], indices = which(indl), named_indices = which(`names<-`(indl, attr(x, "names"))), logical = indl, named_logical = `names<-`(indl, attr(x, "names")), stop("Unknown return option!")) list_elem <- function(l, return = "sublist", keep.class = FALSE) { if(!is.list(l)) stop("l needs to be a list") get_elem_indl(l, .Call(C_vtypes, l, 3L), return, keep.class) } atomic_elem <- function(l, return = "sublist", keep.class = FALSE) { if(!is.list(l)) stop("l needs to be a list") get_elem_indl(l, .Call(C_vtypes, l, 7L), return, keep.class) } "list_elem<-" <- function(l, value) { if(!is.list(l)) stop("l needs to be a list") al <- attributes(l) ilv <- is.list(value) len <- if(ilv) length(value) else 1L attributes(l) <- NULL # vapply without attributes is faster ! ind <- which(.Call(C_vtypes, l, 3L)) if(len != length(ind)) stop("length(value) must match length(list_elem(l))") if(ilv) l[ind] <- value else l[[ind]] <- value if(ilv && length(nam <- names(value))) al[["names"]][ind] <- nam setAttributes(l, al) } "atomic_elem<-" <- function(l, value) { if(!is.list(l)) stop("l needs to be a list") al <- attributes(l) ilv <- is.list(value) len <- if(ilv) length(value) else 1L attributes(l) <- NULL ind <- which(.Call(C_vtypes, l, 7L)) if(len != length(ind)) stop("length(value) must match length(list_elem(l))") if(ilv) l[ind] <- value else l[[ind]] <- value if(ilv && length(nam <- names(value))) al[["names"]][ind] <- nam setAttributes(l, al) } is_unlistable <- function(l, DF.as.list = FALSE) { if(!is.list(l)) return(TRUE) if(DF.as.list) return(all(unlist(rapply(l, is.atomic, how = "list"), use.names = FALSE))) checkisul <- function(x) if(is.atomic(x) || inherits(x, "data.frame")) TRUE else if(is.list(x)) lapply(x, checkisul) else FALSE all(unlist(checkisul(l), use.names = FALSE)) # fastest way? } # is.unlistable <- function(l, DF.as.list = FALSE) { # .Deprecated(msg = "'is.unlistable' was renamed to 'is_unlistable'. It will be removed end of 2023, see help('collapse-renamed').") # is_unlistable(l, DF.as.list) # } # If data.frame, search all, otherwise, make optional counting df or not, but don't search them. ldepth <- function(l, DF.as.list = FALSE) { if(!is.list(l)) return(0L) if(inherits(l, "data.frame")) { # fast defining different functions in if-clause ? ld <- function(y,i) if(is.list(y)) lapply(y,ld,i+1L) else i } else if(DF.as.list) { ld <- function(y,i) { df <- inherits(y, "data.frame") if(is.list(y) && !df) lapply(y,ld,i+1L) else i+df } } else { ld <- function(y,i) if(is.list(y) && !inherits(y, "data.frame")) lapply(y,ld,i+1L) else i } base::max(unlist(ld(l, 0L), use.names = FALSE)) } has_elem <- function(l, elem, recursive = TRUE, DF.as.list = FALSE, regex = FALSE, ...) { if(!is.list(l)) stop("l needs to be a list") if(is.function(elem)) { if(recursive) { if(DF.as.list) { raply2 <- function(y) if(elem(y, ...)) TRUE else if(is.list(y)) lapply(y, raply2) else FALSE return(any(unlist(raply2(l), use.names = FALSE))) } aply2de <- function(y) if(elem(y, ...)) TRUE else if(is.list(y) && !inherits(y, "data.frame")) lapply(y, aply2de) else FALSE return(any(unlist(aply2de(l), use.names = FALSE))) } return(any(vapply(l, elem, TRUE, ..., USE.NAMES = FALSE))) } else if(is.character(elem)) { if(!regex && !missing(...)) unused_arg_action(match.call(), ...) if(recursive) { oldClass(l) <- NULL # in case [ behaves weird ret <- 4L - as.logical(DF.as.list) # is.subl <- if(DF.as.list) is.list else function(x) is.list(x) && !inherits(x, "data.frame") # could do without, but it seems to remove data.frame attributes, and more speed! namply <- function(y) if(any(subl <- .Call(C_vtypes, y, ret))) # vapply(y, is.subl, TRUE) c(names(y), unlist(lapply(.subset(y, subl), namply), use.names = FALSE)) else names(y) # also overall subl names are important, and .subset for DT subsetting ! # names(which(!subl)) # names(y)[!subl] # which is faster? if(regex) return(length(rgrep(elem, namply(l), ...)) > 0L) else return(any(namply(l) %in% elem)) } else if(regex) return(length(rgrep(elem, names(l), ...)) > 0L) else return(any(names(l) %in% elem)) } else stop("elem must be a function or character vector of element names or regular expressions") } # Experimental: # elem_names <- function(l, how = c("list", "unlist"), DF.as.list = TRUE) { # need right order for method how = list !! # namply <- function(y) if(any(subl <- vapply(y, is.subl, TRUE))) c(names(subl), lapply(.subset(y, subl), namply)) else names(subl) # switch(how[1L], # unlist = names(rapply(l, function(x) NA)), # list = # ) rapply(l, function(x) NULL) # # } list_extract_FUN <- function(l, FUN, ret, keep.tree = FALSE, nkeep_class = TRUE, invert = FALSE, ...) { if(invert) { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { matches <- !vapply(x, FUN, TRUE, ..., USE.NAMES = FALSE) wsubl <- which(matches & subl) if(length(wsubl)) { wres <- which(matches & !subl) a <- lapply(x[wsubl], regsearch) wa <- vlengths(a, FALSE) > 0L x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wsubl[wa]))]) else return(x[[1L]]) } else { wres <- which(matches) if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } } else { matches <- whichv(vapply(x, FUN, TRUE, ..., USE.NAMES = FALSE), FALSE) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) } } } else { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { matches <- vapply(x, FUN, TRUE, ..., USE.NAMES = FALSE) wres <- which(matches) wnressubl <- which(if(length(wres)) subl & !matches else subl) if(length(wnressubl)) { a <- lapply(x[wnressubl], regsearch) wa <- vlengths(a, FALSE) > 0L x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wnressubl[wa]))]) else return(x[[1L]]) } else if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } else { matches <- which(vapply(x, FUN, TRUE, ..., USE.NAMES = FALSE)) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) } } ## Previous Version: Does not check the sublists, so cannot find objects through inherits() # if(invert) { # # This is rather simple, just negate the vapply calls. could also simple invert the function.. but this is faster... # regsearch <- function(x) { # if(nkeep_class && is.object(x)) oldClass(x) <- NULL # if(any(subl <- .Call(C_vtypes, x, ret))) { # wsubl <- which(subl) # wnsubl <- whichv(subl, FALSE) # matches <- !vapply(x[wnsubl], FUN, TRUE, USE.NAMES = FALSE) # a <- lapply(x[wsubl], regsearch) # wa <- vlengths(a, FALSE) > 0L # x <- c(x[wnsubl][matches], a[wa]) # if(keep.tree || length(x) != 1L) # return(x[forder.int(c(wnsubl[matches], wsubl[wa]))]) else return(x[[1L]]) # } else if(length(x)) { # matches <- whichv(vapply(x, FUN, TRUE, USE.NAMES = FALSE), FALSE) # if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # } # } # } else { # regsearch <- function(x) { # if(nkeep_class && is.object(x)) oldClass(x) <- NULL # if(any(subl <- .Call(C_vtypes, x, ret))) { # vapply(x, is.subl, TRUE, USE.NAMES = FALSE) # is.list(x) && a # wsubl <- which(subl) # wnsubl <- whichv(subl, FALSE) # matches <- vapply(x[wnsubl], FUN, TRUE, USE.NAMES = FALSE) # a <- lapply(x[wsubl], regsearch) # wa <- vlengths(a, FALSE) > 0L # note that this also gets rid of null elements! could make it length or is.null! # vapply(a, length, 1L, USE.NAMES = FALSE) # x <- c(x[wnsubl][matches], a[wa]) # The problem here: If all elements in a sublist are atomic, it still retains the sublist itself with NULL inside! -> but c() removes it!! # if(keep.tree || length(x) != 1L) # return(x[forder.int(c(wnsubl[matches], wsubl[wa]))]) else return(x[[1L]]) # fastest way? # } else if(length(x)) { # This ensures correct behavior in the final nodes: if (length(x)) because problem encountered in get.elem(V, is.matrix) -> empty xlevels list, the lapply below does not execute # matches <- which(vapply(x, FUN, TRUE, USE.NAMES = FALSE)) # if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # needs to be != # } # } # } } regsearch(l) } list_extract_regex <- function(l, exp, ret, keep.tree = FALSE, nkeep_class = TRUE, invert = FALSE, ...) { if(invert) { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { matches <- if(is.null(names(x))) rep(TRUE, length(x)) else !rgrepl(exp, names(x), ...) # rgrep with invert?? wsubl <- which(matches & subl) if(length(wsubl)) { wres <- which(matches & !subl) a <- lapply(x[wsubl], regsearch) wa <- vlengths(a, FALSE) > 0L x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wsubl[wa]))]) else return(x[[1L]]) } else { wres <- which(matches) if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } } else { matches <- !rgrepl(exp, names(x), ...) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) } } } else { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { matches <- rgrepl(exp, names(x), ...) wres <- which(matches) # wres <- rgrep(exp, names(x), ...) wnressubl <- which(if(length(wres)) subl & !matches else subl) # wnressubl <- if(length(wres)) fsetdiff(which(subl), wres) else which(subl) if(length(wnressubl)) { # faster way? a <- lapply(x[wnressubl], regsearch) # is this part still necessary?, or only for keep.tree wa <- vlengths(a, FALSE) > 0L # note that this also gets rid of null elements!! could make it length or is.null!, length is better for length 0 lists !! # vapply(a, length, 1L) x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wnressubl[wa]))]) else return(x[[1L]]) } else if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } else { # This ensures correct behavior in the final nodes: matches <- rgrep(exp, names(x), ...) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # needs to be != } } } regsearch(l) } list_extract_names <- function(l, nam, ret, keep.tree = FALSE, nkeep_class = TRUE, invert = FALSE) { if(invert) { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { matches <- if(is.null(names(x))) rep(TRUE, length(x)) else names(x) %!in% nam wsubl <- which(matches & subl) if(length(wsubl)) { wres <- which(matches & !subl) a <- lapply(x[wsubl], regsearch) wa <- vlengths(a, FALSE) > 0L x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wsubl[wa]))]) else return(x[[1L]]) } else { wres <- which(matches) if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } } else { matches <- which(names(x) %!in% nam) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) } } } else { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { matches <- names(x) %in% nam wres <- which(matches) # match(nam, names(x), 0L) # better because gives integer(0) -> necessary as cannot do l[[0L]] wnressubl <- which(if(length(wres)) subl & !matches else subl) # fsetdiff(which(subl), wres) # old solution: faster but does not work well if parent list is unnamed ! (i.e. l = list(lm1, lm1)) if(length(wnressubl)) { a <- lapply(x[wnressubl], regsearch) wa <- vlengths(a, FALSE) > 0L # vapply(a, length, 1L) x <- c(x[wres], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wres, wnressubl[wa]))]) else return(x[[1L]]) } else if(keep.tree || length(wres) != 1L) return(x[wres]) else return(x[[wres]]) } else { matches <- which(names(x) %in% nam) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) # needs to be !=, because integer(0) goes in first.. } } } regsearch(l) } # Idea: Also use indices and logical vectors ? i.e. get first two columns of alist of data.frames ? # This behaves a bit differently (not find elements everywhere, but also subset inside the list) list_extract_ind <- function(l, ind, is.subl, keep.tree = FALSE, nkeep_class = TRUE) { if(is.logical(ind)) ind <- which(ind) if(length(ind) > 1L || keep.tree) { regsearch <- function(x) if(is.subl(x)) lapply(x, regsearch) else if(nkeep_class) .subset(x, ind) else x[ind] } else { # if(ind[1L] < 1L) stop("Cannot subset with single negative indices") # .subset2 throws error... regsearch <- function(x) if(is.subl(x)) lapply(x, regsearch) else .subset2(x, ind) } regsearch(l) } # Note: all functions currently remove empty list elements ! # keep.tree argument still issues with xlevels get_elem <- function(l, elem, recursive = TRUE, DF.as.list = FALSE, keep.tree = FALSE, keep.class = FALSE, regex = FALSE, invert = FALSE, ...) { if(!is.list(l)) stop("l needs to be a list") if(recursive) { ret <- 4L - as.logical(DF.as.list) if(keep.class) al <- attributes(l) if(is.function(elem)) { l <- list_extract_FUN(l, elem, ret, keep.tree, !keep.class, invert, ...) } else if(is.character(elem)) { if(regex) { l <- list_extract_regex(l, elem, ret, keep.tree, !keep.class, invert, ...) } else { if(!missing(...)) unused_arg_action(match.call(), ...) l <- list_extract_names(l, elem, ret, keep.tree, !keep.class, invert) } } else { if(!missing(...)) unused_arg_action(match.call(), ...) if(invert) { if(is.numeric(elem)) stop("Cannot use option invert = TRUE if elem is indices") elem <- !elem } is.subl <- if(DF.as.list) is.list else function(x) is.list(x) && !inherits(x, "data.frame") l <- list_extract_ind(l, elem, is.subl, keep.tree, !keep.class) } if(keep.class && is.list(l)) { al[["names"]] <- names(l) return(setAttributes(l, al)) # class(l) <- cll # when drop.tree is proper, l might not be a list } else return(l) } else { if(is.function(elem)) { elem <- whichv(vapply(l, elem, TRUE, ..., USE.NAMES = FALSE), TRUE, invert) } else if(is.character(elem)) { if(regex) elem <- rgrep(elem, names(l), invert = invert, ...) else { if(!missing(...)) unused_arg_action(match.call(), ...) elem <- which(if(invert) names(l) %!in% elem else names(l) %in% elem) } } else if(is.logical(elem)) { if(!missing(...)) unused_arg_action(match.call(), ...) elem <- whichv(elem, TRUE, invert) # else stop("elem must be a function, character vector or vector of regular expressions!") } if(keep.tree || length(elem) != 1L) { if(keep.class) return(fcolsubset(l, elem)) else return(.subset(l, elem)) } else return(.subset2(l, elem)) } } # there is base::getElement # 'regular' (is.atomic(x) || is.list(x)) elements, the check now implements in C_vtypes with option 5L. is_regular_vec <- function(x) .Call(C_vtypes, x, 5L) is_irregular_vec <- function(x) !.Call(C_vtypes, x, 5L) # A variant of list_extract_FUN for FUN that can take a list as input and check the elements list_extract_FUN_vec <- function(l, FUN, ret, keep.tree = FALSE, nkeep_class = TRUE) { regsearch <- function(x) { if(nkeep_class && is.object(x)) oldClass(x) <- NULL if(any(subl <- .Call(C_vtypes, x, ret))) { wsubl <- which(subl) wnsubl <- whichv(subl, FALSE) matches <- FUN(x[wnsubl]) a <- lapply(x[wsubl], regsearch) wa <- vlengths(a, FALSE) > 0L x <- c(x[wnsubl][matches], a[wa]) if(keep.tree || length(x) != 1L) return(x[forder.int(c(wnsubl[matches], wsubl[wa]))]) else return(x[[1L]]) } else if(length(x)) { matches <- which(FUN(x)) if(keep.tree || length(matches) != 1L) return(x[matches]) else return(x[[matches]]) } } regsearch(l) } reg_elem <- function(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) { if(!is.list(l)) stop("l needs to be a list") if(keep.class) al <- attributes(l) # if(inherits(l, "data.frame")) if(keep.class) return(l) else return(unattrib(l)) if(recursive) { l <- list_extract_FUN_vec(l, is_regular_vec, 4L, keep.tree, !keep.class) if(keep.class && is.list(l)) { al[["names"]] <- names(l) return(setAttributes(l, al)) } else return(l) } else { matches <- which(is_regular_vec(l)) if(keep.tree || length(matches) != 1L) { if(keep.class) return(fcolsubset(l, matches)) else return(.subset(l, matches)) } else return(.subset2(l, matches)) } } irreg_elem <- function(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) { if(!is.list(l)) stop("l needs to be a list") if(keep.class) al <- attributes(l) if(recursive) { l <- list_extract_FUN_vec(l, is_irregular_vec, 4L, keep.tree, !keep.class) if(keep.class && is.list(l)) { al[["names"]] <- names(l) return(setAttributes(l, al)) } else return(l) } else { matches <- which(is_irregular_vec(l)) if(keep.tree || length(matches) != 1L) { if(keep.class) return(fcolsubset(l, matches)) else return(.subset(l, matches)) } else return(.subset2(l, matches)) } } # TODO: See about big objects! #microbenchmark(all(rapply(lm,is.atomic)),!is.list(unlist(lm, use.names = FALSE)),all(unlist(rapply2d(lm,is.std), use.names = FALSE))) #microbenchmark(all(rapply(GGDC,is.atomic)),!is.list(unlist(GGDC, use.names = FALSE)),all(unlist(rapply2d(GGDC,is.std), use.names = FALSE))) collapse/R/flag.R0000644000176200001440000001617414777170130013331 0ustar liggesusers flag <- function(x, n = 1, ...) UseMethod("flag") # , x flag.default <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("flag", unclass(x))) if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_flaglead,x,n,fill,0L,0L,G_t(t),stubs)) g <- G_guo(g) .Call(Cpp_flaglead,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.pseries <- function(x, n = 1, fill = NA, stubs = length(n) > 1L, shift = "time", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- uncl2pix(x) g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !inherits(x, "indexed_series")) t <- plm_check_time(t) if(is.matrix(x)) .Call(Cpp_flagleadm,x,n,fill,fnlevels(g),g,t,stubs) else .Call(Cpp_flaglead,x,n,fill,fnlevels(g),g,t,stubs) } flag.matrix <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_flagleadm,x,n,fill,0L,0L,G_t(t),stubs)) g <- G_guo(g) .Call(Cpp_flagleadm,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.zoo <- function(x, ...) if(is.matrix(x)) flag.matrix(x, ...) else flag.default(x, ...) flag.units <- flag.zoo flag.grouped_df <- function(x, n = 1, t = NULL, fill = NA, stubs = length(n) > 1L, keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) g <- GRP.grouped_df(x, call = FALSE) tsym <- substitute(t) nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) if(!is.null(tsym)) { t <- eval(tsym, x, parent.frame()) if(!anyNA(tn <- match(all.vars(tsym), nam))) { gn <- c(gn, tn) if(anyDuplicated.default(gn)) stop("timevar coincides with grouping variables!") } } if(length(gn)) { ax <- attributes(x) res <- .Call(Cpp_flagleadl, .subset(x, -gn), n,fill,g[[1L]],g[[2L]],G_t(t),stubs) if(keep.ids) res <- c(.subset(x, gn), res) ax[["names"]] <- names(res) # Works for multiple lags ! return(setAttributes(res, ax)) } .Call(Cpp_flagleadl,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.data.frame <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(Cpp_flagleadl,x,n,fill,0L,0L,G_t(t),stubs)) g <- G_guo(g) .Call(Cpp_flagleadl,x,n,fill,g[[1L]],g[[2L]],G_t(t),stubs) } flag.list <- function(x, ...) flag.data.frame(x, ...) flag.pdata.frame <- function(x, n = 1, fill = NA, stubs = length(n) > 1L, shift = "time", ...) { if(!missing(...)) unused_arg_action(match.call(), ...) index <- uncl2pix(x) g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !inherits(x, "indexed_frame")) t <- plm_check_time(t) .Call(Cpp_flagleadl,x,n,fill,fnlevels(g),g,t,stubs) } # Lag Operator # use xt instead of by ? L <- function(x, n = 1, ...) UseMethod("L") # , x L.default <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = .op[["stub"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(flag.matrix(x, n, g, t, fill, stubs, ...)) flag.default(x, n, g, t, fill, stubs, ...) } L.pseries <- function(x, n = 1, fill = NA, stubs = .op[["stub"]], shift = "time", ...) flag.pseries(x, n, fill, stubs, shift, ...) L.matrix <- function(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = .op[["stub"]], ...) flag.matrix(x, n, g, t, fill, stubs, ...) L.zoo <- function(x, ...) if(is.matrix(x)) L.matrix(x, ...) else L.default(x, ...) L.units <- L.zoo L.grouped_df <- function(x, n = 1, t = NULL, fill = NA, stubs = .op[["stub"]], keep.ids = TRUE, ...) { x <- x eval(substitute(flag.grouped_df(x, n, t, fill, stubs, keep.ids, ...))) } L.data.frame <- function(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = .op[["stub"]], keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.call(by) || is.call(t)) { ax <- attributes(x) class(x) <- NULL nam <- names(x) if(is.call(by)) { if(length(by) == 3L) { cols <- ckmatch(all.vars(by[[2L]]), nam, "Unknown variables:") gn <- ckmatch(all.vars(by[[3L]]), nam, "Unknown variables:") } else { gn <- ckmatch(all.vars(by), nam, "Unknown variables:") cols <- cols2intrmgn(gn, cols, x) } by <- G_guo(if(length(gn) == 1L) x[[gn]] else x[gn]) if(!keep.ids) gn <- NULL } else { gn <- NULL if(length(cols)) cols <- cols2int(cols, x, nam) by <- if(is.null(by)) list(0L, 0L) else G_guo(by) } if(is.call(t)) { tn <- ckmatch(all.vars(t), nam, "Unknown variables:") t1 <- length(tn) == 1L t <- eval(if(t1) t[[2L]] else attr(terms.formula(t), "variables"), x, attr(t, ".Environment")) # if(t1) x[[tn]] else x[tn] cols <- if(is.null(cols)) seq_along(x)[-tn] else if(t1) cols[cols != tn] else fsetdiff(cols, tn) if(keep.ids) gn <- c(gn, tn) } res <- if(length(gn)) c(x[gn], .Call(Cpp_flagleadl,x[cols],n,fill,by[[1L]],by[[2L]],G_t(t),stubs)) else .Call(Cpp_flagleadl,x[cols],n,fill,by[[1L]],by[[2L]],G_t(t),stubs) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(length(cols)) { # Needs to be like this, otherwise subsetting dropps the attributes ! ax <- attributes(x) class(x) <- NULL x <- x[cols2int(cols, x, names(x), FALSE)] ax[["names"]] <- names(x) setattributes(x, ax) } if(is.null(by)) return(.Call(Cpp_flagleadl,x,n,fill,0L,0L,G_t(t),stubs)) by <- G_guo(by) .Call(Cpp_flagleadl,x,n,fill,by[[1L]],by[[2L]],G_t(t),stubs) } L.list <- function(x, ...) L.data.frame(x, ...) L.pdata.frame <- function(x, n = 1, cols = is.numeric, fill = NA, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, ...) { if(!missing(...)) unused_arg_action(match.call(), ...) ax <- attributes(x) nam <- ax[["names"]] index <- uncl2pix(x) cols_fun <- is.function(cols) if(cols_fun && identical(cols, is.numeric)) cols <- which(.Call(C_vtypes, x, 1L)) else if(length(cols)) cols <- cols2int(cols, x, nam, FALSE) if(cols_fun || keep.ids) { gn <- which(nam %in% attr(index, "nam")) # Needed for 1 or 3+ index variables if(length(gn)) { if(cols_fun) cols <- fsetdiff(cols, gn) else if(is.null(cols)) cols <- seq_along(unclass(x))[-gn] } if(!keep.ids) gn <- NULL } else gn <- NULL g <- index[[1L]] t <- switch(shift, time = index[[2L]], row = NULL, stop("'shift' must be either 'time' or 'row'")) if(length(t) && !any(ax$class == "indexed_frame")) t <- plm_check_time(t) if(length(gn) && length(cols)) { class(x) <- NULL # Works for multiple lags ! res <- c(x[gn], .Call(Cpp_flagleadl,x[cols],n,fill,fnlevels(g),g,t,stubs)) ax[["names"]] <- names(res) return(setAttributes(res, ax)) } else if(!length(gn)) # could speed up ? return(.Call(Cpp_flagleadl,fcolsubset(x, cols),n,fill,fnlevels(g),g,t,stubs)) .Call(Cpp_flagleadl,x,n,fill,fnlevels(g),g,t,stubs) } # Lead Operator F <- function(x, n = 1, ...) eval.parent(substitute(L(x, -n, ...))) collapse/R/psacf.R0000644000176200001440000003530614777170130013512 0ustar liggesusers # TODO: could use source code of C_acf and adjust for panel: https://github.com/SurajGupta/r-source/blob/a28e609e72ed7c47f6ddfbb86c85279a0750f0b7/src/library/stats/src/filter.c psacf <- function(x, ...) UseMethod("psacf") # , x psacf.default <- function(x, g, t = NULL, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric vector") typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) g <- G_guo(g) if(is.null(lag.max)) lag.max <- round(2*sqrt(length(x)/g[[1L]])) if(gscale) x <- fscaleCpp(x,g[[1L]],g[[2L]]) acf <- if(typei == 2L) cov(x, .Call(Cpp_flaglead,x,0:lag.max,NA,g[[1L]],g[[2L]],G_t(t),FALSE), use = "pairwise.complete.obs") else c(1, cov(x, .Call(Cpp_flaglead,x,seq_len(lag.max),NA,g[[1L]],g[[2L]],G_t(t),FALSE), use = "pairwise.complete.obs")/fvar.default(x)) # or complete obs ? d <- c(lag.max+1,1,1) if(typei == 3L) { acf <- .Call(C_pacf1, array(acf, d), lag.max) lag <- array(seq_len(d[1]), c(lag.max,1,1)) } else { dim(acf) <- d lag <- array(0:lag.max, d) } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = length(x), lag = lag, series = series, snames = NULL), "acf") if(plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psacf.data.frame <- function(x, by, t = NULL, cols = is.numeric, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) oldClass(x) <- NULL if(is.call(by)) { # best way ? nam <- names(x) if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), nam) by <- ckmatch(all.vars(by[[3L]]), nam) } else { by <- ckmatch(all.vars(by), nam) v <- if(is.null(cols)) seq_along(x)[-by] else fsetdiff(cols2int(cols, x, nam), by) } by <- if(length(by) == 1L) x[[by]] else x[by] if(is.call(t)) { # If time-variable supplied tv <- ckmatch(all.vars(t), nam, "Unknown time variable:") v <- fsetdiff(v, tv) t <- eval(if(length(tv) == 1L) t[[2L]] else attr(terms.formula(t), "variables"), x, attr(t, ".Environment")) # if(length(t) == 1L) x[[t]] else x[t] } x <- x[v] } else if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] lx <- length(x) nrx <- .Call(C_fnrow, x) snames <- names(x) attributes(x) <- NULL # already class is 0... Necessary ? getacf <- function(ng, g) { if(length(t)) t <- G_t(t) if(gscale) x <- fscalelCpp(x,ng,g) acf <- array(numeric(0), c(lag.max+1, lx, lx)) fun <- if(typei == 2L) cov else function(x, y, ...) cov(x, y, ...)/fvar.default(x) # cor for(i in seq_len(lx)) { xim <- .Call(Cpp_flaglead,x[[i]],0:lag.max,NA,ng,g,t,FALSE) for(j in seq_len(lx)) acf[ , j, i] <- fun(x[[j]], xim, use = "pairwise.complete.obs") # correct ! } acf } by <- G_guo(by) if(is.null(lag.max)) lag.max <- round(2*sqrt(nrx/by[[1L]])) acf <- getacf(by[[1L]], by[[2L]]) lag <- matrix(1, lx, lx) lag[lower.tri(lag)] <- -1 if(typei == 3L) { zvec <- double((1L+lag.max)*lx*lx) z <- .C(C_multi_yw, aperm(acf, 3:1), as.integer(nrx), as.integer(lag.max), as.integer(lx), coefs = zvec, pacf = zvec, var = zvec, aic = double(1L+lag.max), order = 0L, 1L) acf <- aperm(array(z$pacf, dim = c(lx, lx, lag.max + 1L)), 3:1)[-1L, , , drop = FALSE] } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = nrx, lag = if(typei == 3L) outer(1L:lag.max, lag) else outer(0L:lag.max, lag), series = series, snames = snames), "acf") if(plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", mar = if(lx > 2) c(3, 2.4, 2, 0.8) else par("mar"), ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psacf.pseries <- function(x, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric pseries ") index <- uncl2pix(x) g <- index[[1L]] t <- index[[2L]] if(length(t) && !inherits(x, "indexed_series")) t <- plm_check_time(t) ng <- fnlevels(g) typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) # faster ? if(is.null(lag.max)) lag.max <- round(2*sqrt(length(x)/ng)) if(gscale) x <- fscaleCpp(x,ng,g) acf <- if(typei == 2L) cov(x, .Call(Cpp_flaglead,x,0:lag.max,NA,ng,g,t,FALSE), use = "pairwise.complete.obs") else c(1, cov(x, .Call(Cpp_flaglead,x,seq_len(lag.max),NA,ng,g,t,FALSE), use = "pairwise.complete.obs")/fvar.default(x)) # or complete obs ? d <- c(lag.max+1,1,1) if(typei == 3L) { acf <- .Call(C_pacf1, array(acf, d), lag.max) lag <- array(seq_len(d[1]), c(lag.max,1,1)) } else { dim(acf) <- d lag <- array(0:lag.max, d) } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = length(x), lag = lag, series = series, snames = NULL), "acf") if (plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psacf.pdata.frame <- function(x, cols = is.numeric, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, ...) { typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) series <- l1orlst(as.character(substitute(x))) # faster solution ? index <- uncl2pix(x) clx <- oldClass(x) oldClass(x) <- NULL nrx <- .Call(C_fnrow, x) if(length(cols)) x <- x[cols2int(cols, x, names(x), FALSE)] lx <- length(x) snames <- names(x) g <- index[[1L]] t <- index[[2L]] if(length(t) && !any(clx == "indexed_frame")) t <- plm_check_time(t) ng <- fnlevels(g) attributes(x) <- NULL # necessary after unclass above ? if(is.null(lag.max)) lag.max <- round(2*sqrt(nrx/ng)) if(gscale) x <- fscalelCpp(x,ng,g) acf <- array(numeric(0), c(lag.max+1, lx, lx)) fun <- if(typei == 2L) cov else function(x, y, ...) cov(x, y, ...)/fvar.default(x) # cor for(i in seq_len(lx)) { xim <- .Call(Cpp_flaglead,x[[i]],0:lag.max,NA,ng,g,t,FALSE) for(j in seq_len(lx)) acf[ , j, i] <- fun(x[[j]], xim, use = "pairwise.complete.obs") # correct ! } lag <- matrix(1, lx, lx) lag[lower.tri(lag)] <- -1 if(typei == 3L) { zvec <- double((1L+lag.max)*lx*lx) z <- .C(C_multi_yw, aperm(acf, 3:1), as.integer(nrx), as.integer(lag.max), as.integer(lx), coefs = zvec, pacf = zvec, var = zvec, aic = double(1L+lag.max), order = 0L, 1L) acf <- aperm(array(z$pacf, dim = c(lx, lx, lag.max + 1L)), 3:1)[-1L, , , drop = FALSE] } acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = nrx, lag = if(typei == 3L) outer(1L:lag.max, lag) else outer(0L:lag.max, lag), series = series, snames = snames), "acf") if(plot) { plot(acf.out, ylab = if(typei == 3L) "Panel Series Partial ACF" else "Panel Series ACF", mar = if(lx > 2) c(3, 2.4, 2, 0.8) else par("mar"), ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } pspacf <- function(x, ...) UseMethod("pspacf") # , x pspacf.default <- function(x, g, t = NULL, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { if(plot) psacf.default(x, g, t, lag.max, "partial", plot, gscale, main = paste0("Series ",l1orlst(as.character(substitute(x)))), ...) else psacf.default(x, g, t, lag.max, "partial", plot, gscale, ...) } pspacf.pseries <- function(x, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { if(plot) psacf.pseries(x, lag.max, "partial", plot, gscale, main = paste0("Series ",l1orlst(as.character(substitute(x)))), ...) else psacf.pseries(x, lag.max, "partial", plot, gscale, ...) } pspacf.data.frame <- function(x, by, t = NULL, cols = is.numeric, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { psacf.data.frame(x, by, t, cols, lag.max, "partial", plot, gscale, ...) } pspacf.pdata.frame <- function(x, cols = is.numeric, lag.max = NULL, plot = TRUE, gscale = TRUE, ...) { psacf.pdata.frame(x, cols, lag.max, "partial", plot, gscale, ...) } psccf <- function(x, y, ...) UseMethod("psccf") # , x psccf.default <- function(x, y, g, t = NULL, lag.max = NULL, type = c("correlation", "covariance"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric vector") if(!is.numeric(y)) stop("'y' must be a numeric vector") lx <- length(x) if(lx != length(y)) stop("length(x) must be equal to length(y)") typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) snames <- paste(c(l1orlst(as.character(substitute(x))), l1orlst(as.character(substitute(x)))), collapse = " & ") getccf <- function(ng, g) { if(length(t)) t <- G_t(t) if(gscale) { x <- fscaleCpp(x,ng,g) y <- fscaleCpp(y,ng,g) } if(typei == 2L) drop(cov(x, .Call(Cpp_flaglead,y,-lag.max:lag.max,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")) else drop(cov(x, .Call(Cpp_flaglead,y,-lag.max:lag.max,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")/(fsd.default(x)*fsd.default(y))) # or complete obs ? } g <- G_guo(g) if(is.null(lag.max)) lag.max <- round(2*sqrt(lx/g[[1L]])) acf <- getccf(g[[1L]], g[[2L]]) d <- c(2*lag.max+1,1,1) dim(acf) <- d acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = lx, lag = array(-lag.max:lag.max, d), series = snames, snames = snames), "acf") if (plot) { plot(acf.out, ylab = "Panel Series CCF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } psccf.pseries <- function(x, y, lag.max = NULL, type = c("correlation", "covariance"), plot = TRUE, gscale = TRUE, ...) { if(!is.numeric(x)) stop("'x' must be a numeric pseries") if(!is.numeric(y) || !inherits(y, "pseries")) stop("'y' must be a numeric pseries") lx <- length(x) if(lx != length(y)) stop("length(x) must be equal to length(y)") if(!identical(findex(x), findex(y))) stop("index of x and y differs") index <- uncl2pix(x) g <- index[[1L]] t <- index[[2L]] if(length(t) && !inherits(x, "indexed_series")) t <- plm_check_time(t) ng <- fnlevels(g) typei <- switch(type[1L], correlation = 1L, covariance = 2L, partial = 3L, stop("Unknown type!")) snames <- paste(c(l1orlst(as.character(substitute(x))), l1orlst(as.character(substitute(x)))), collapse = " & ") if (gscale) { x <- fscaleCpp(x,ng,g) y <- fscaleCpp(y,ng,g) } if (is.null(lag.max)) lag.max <- round(2*sqrt(length(x)/ng)) l_seq <- -lag.max:lag.max acf <- if(typei == 2L) drop(cov(x, .Call(Cpp_flaglead,y,l_seq,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")) else drop(cov(x, .Call(Cpp_flaglead,y,l_seq,NA,ng,g,t,FALSE), use = "pairwise.complete.obs")/(fsd.default(x)*fsd.default(y))) # or complete obs ? d <- c(2*lag.max+1,1,1) dim(acf) <- d acf.out <- `oldClass<-`(list(acf = acf, type = type[1L], n.used = lx, lag = array(l_seq, d), series = snames, snames = snames), "acf") if (plot) { plot(acf.out, ylab = "Panel Series CCF", ...) invisible(acf.out) } else { if(!missing(...)) unused_arg_action(match.call(), ...) return(acf.out) } } # could do AR models also : # psar.data.frame <- function (x, aic = TRUE, order.max = lag.max, na.action = na.fail, # demean = TRUE, series = NULL, var.method = 1L, ...) # { # if (is.null(series)) # series <- l1orlst(as.character(substitute(x))) # if (ists <- is.ts(x)) # xtsp <- tsp(x) # x <- na.action(as.ts(x)) # if (anyNA(x)) # stop("NAs in 'x'") # if (ists) # xtsp <- tsp(x) # xfreq <- frequency(x) # x <- as.matrix(x) # nser <- ncol(x) # n.used <- nrow(x) # if (demean) { # x.mean <- colMeans(x) # x <- sweep(x, 2L, x.mean, check.margin = FALSE) # } # else x.mean <- rep(0, nser) # order.max <- if (is.null(order.max)) # floor(10 * log10(n.used)) # else floor(order.max) # if (order.max < 1L) # stop("'order.max' must be >= 1") # xacf <- acf(x, type = "cov", plot = FALSE, lag.max = order.max)$acf # z <- .C(stats:::C_"multi_yw", # aperm(xacf, 3:1), # as.integer(n.used), # as.integer(order.max), # as.integer(nser), # coefs = double((1L +order.max) * nser * nser), # pacf = double((1L + order.max) * nser * nser), # var = double((1L + order.max) * nser * nser), # aic = double(1L + order.max), # order = integer(1L), # as.integer(aic)) # partialacf <- aperm(array(z$pacf, dim = c(nser, nser, order.max + # 1L)), 3:1)[-1L, , , drop = FALSE] # var.pred <- aperm(array(z$var, dim = c(nser, nser, order.max + # 1L)), 3:1) # xaic <- setNames(z$aic - bmin(z$aic), 0:order.max) # order <- z$order # resid <- x # if (order > 0) { # ar <- -aperm(array(z$coefs, dim = c(nser, nser, order.max + # 1L)), 3:1)[2L:(order + 1L), , , drop = FALSE] # for (i in 1L:order) resid[-(1L:order), ] <- resid[-(1L:order), # ] - x[(order - i + 1L):(n.used - i), ] %*% t(ar[i, # , ]) # resid[1L:order, ] <- NA # } # else ar <- array(dim = c(0, nser, nser)) # var.pred <- var.pred[order + 1L, , , drop = TRUE] * n.used/(n.used - # nser * (demean + order)) # if (ists) { # attr(resid, "tsp") <- xtsp # attr(resid, "class") <- c("mts", "ts") # } # snames <- colnames(x) # colnames(resid) <- snames # dimnames(ar) <- list(seq_len(order), snames, snames) # dimnames(var.pred) <- list(snames, snames) # dimnames(partialacf) <- list(1L:order.max, snames, snames) # res <- list(order = order, ar = ar, var.pred = var.pred, # x.mean = x.mean, aic = xaic, n.used = n.used, order.max = order.max, # partialacf = partialacf, resid = resid, method = "Yule-Walker", # series = series, frequency = xfreq, call = match.call()) # oldClass(res) <- "ar" # return(res) # } collapse/R/roworder_colorder_rename.R0000644000176200001440000002346015121637602017473 0ustar liggesusers roworder <- function(X, ..., na.last = TRUE, verbose = .op[["verbose"]]) { ovars <- .c(...) if(!length(ovars)) stop("... needs to be comma-separated column names, optionally with a '-' prefix for descending order.") dec <- startsWith(ovars, "-") if(any(dec)) ovars[dec] <- substr(ovars[dec], 2L, 1000000L) z <- as.pairlist(.subset(X, ckmatch(ovars, attr(X, "names")))) o <- .Call(C_radixsort, na.last, dec, FALSE, FALSE, TRUE, z) if(!is.na(na.last) && attr(o, "sorted")) { if(verbose == 2L) message("Data is already sorted, returning data.") return(condalc(X, inherits(X, "data.table"))) } rn <- attr(X, "row.names") res <- .Call(C_subsetDT, X, o, seq_along(unclass(X)), FALSE) if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) attr(res, "row.names") <- Csv(rn, o) clx <- oldClass(X) if(any(clx == "pdata.frame")) { if(verbose) message("Sorting an indexed frame / pdata.frame may not be the most efficient option. Consider sorting the frame before indexing it, or set verbose = FALSE to silence this message.") index <- findex(X) index_o <- .Call(C_subsetDT, index, o, seq_along(unclass(index)), FALSE) if(inherits(X, "indexed_frame")) return(reindex(res, index_o)) attr(res, "index") <- index_o } else if(any(clx == "grouped_df")) { if(verbose) message("Sorting a grouped data frame may not be the most efficient option. Consider sorting the frame before grouping it, or set verbose = FALSE to silence this message.") g <- GRP.grouped_df(X, call = FALSE) g[[2L]] <- Csv(g[[2L]], o) if(is.null(g[["group.starts"]])) warning("Cannot reorder a grouped data frame created with dplyr::group_by. Converting the grouping object to collapse 'GRP' object and reordering.") else if(length(g[[7L]])) g[[7L]] <- Csv(g[[7L]], o) # correct ?? -> seems so! attr(res, "groups") <- g } res } posord <- function(sq, o, pos) switch(pos, front = c(o, sq[-o]), end = c(sq[-o], o), exchange = `[<-`(sq, o[forder.int(o)], value = o), after = { if(length(o) == 1L) stop('Need o supply at least 2 columns if pos = "after"') om1 <- o[-1L] smo <- sq[-om1] w1 <- whichv(smo, o[1L]) c(smo[1L:w1], om1, smo[(w1+1L):length(smo)]) }, stop("pos must be 'front', 'end', 'exchange' or 'after'.")) roworderv <- function(X, cols = NULL, neworder = NULL, decreasing = FALSE, na.last = TRUE, pos = "front", verbose = .op[["verbose"]]) { ncheck <- is.null(neworder) if(ncheck) { check <- FALSE if(is.null(cols)) { if(inherits(X, "sf")) { Xo <- X oldClass(Xo) <- NULL Xo[[attr(Xo, "sf_column")]] <- NULL neworder <- radixorderv(Xo, na.last, decreasing) } else neworder <- radixorderv(X, na.last, decreasing) } else neworder <- radixorderv(colsubset(X, cols), na.last, decreasing) if(!is.na(na.last) && attr(neworder, "sorted")) { if(verbose == 2L) message("Data is already sorted, returning data.") return(condalc(X, inherits(X, "data.table"))) } } else { if(!is.integer(neworder)) neworder <- if(is.numeric(neworder)) as.integer(neworder) else if(is.logical(neworder)) which(neworder) else stop("neworder should be integer or logical.") if(length(neworder) != fnrow(X)) neworder <- posord(seq_row(X), neworder, pos) } rn <- attr(X, "row.names") res <- .Call(C_subsetDT, X, neworder, seq_along(unclass(X)), !ncheck) if(!(is.numeric(rn) || is.null(rn) || rn[1L] == "1")) attr(res, "row.names") <- Csv(rn, neworder) clx <- oldClass(X) if(any(clx == "pdata.frame")) { if(verbose) message("Sorting an indexed frame / pdata.frame may not be the most efficient option. Consider sorting the frame before indexing it, or set verbose = FALSE to silence this message.") index <- findex(X) index_neworder <- .Call(C_subsetDT, index, neworder, seq_along(unclass(index)), FALSE) if(inherits(X, "indexed_frame")) return(reindex(res, index_neworder)) # pdata.frame cannot be data.table... attr(res, "index") <- index_neworder } else if(any(clx == "grouped_df")) { if(verbose) message("Sorting a grouped data frame may not be the most efficient option. Consider sorting the frame before grouping it, or set verbose = FALSE to silence this message.") g <- GRP.grouped_df(X, call = FALSE) g[[2L]] <- Csv(g[[2L]], neworder) if(verbose && is.null(g[["group.starts"]])) warning("Cannot reorder a grouped data frame created with dplyr::group_by. Converting the grouping object to collapse 'GRP' object and reordering.") else if(length(g[[7L]])) g[[7L]] <- Csv(g[[7L]], neworder) # correct ?? -> seems so! attr(res, "groups") <- g } res } colorder <- function(.X, ..., pos = "front") { # This also takes names and indices .... ax <- attributes(.X) oldClass(.X) <- NULL # attributes ? nam <- names(.X) iX <- seq_along(.X) nl <- `names<-`(as.vector(iX, "list"), nam) vars <- eval(substitute(c(...)), nl, parent.frame()) if(!is.integer(vars)) stop(paste0("Unknown columns: ", .c(...))) if(length(names(vars))) { # Allow renaming during selection nam_vars <- names(vars) nonmiss <- nzchar(nam_vars) nam[vars[nonmiss]] <- nam_vars[nonmiss] } if(length(vars) != length(iX)) vars <- posord(iX, vars, pos) return(condalcSA(.X[vars], `[[<-`(ax, "names", nam[vars]), any(ax[["class"]] == "data.table"))) } colorderv <- function(X, neworder = radixorder(names(X)), pos = "front", regex = FALSE, ...) { # This also takes names and indices .... ax <- attributes(X) oldClass(X) <- NULL # attributes ? nam <- names(X) if(regex) vars <- rgrep(neworder, nam, ..., sort = FALSE) else { if(!missing(...)) unused_arg_action(match.call(), ...) vars <- cols2int(neworder, X, nam) } if(length(vars) != length(X)) vars <- posord(seq_along(X), vars, pos) return(condalcSA(X[vars], `[[<-`(ax, "names", nam[vars]), any(ax[["class"]] == "data.table"))) } # Internal helper for frename: allows both pandas and dplyr style rename repl_nam_arg <- function(namarg, args, nam) { m <- match(namarg, nam) if(anyNA(m)) { if(allNA(m)) { m <- ckmatch(as.character(args), nam) nam[m] <- namarg } else stop(paste("Unknown columns:", paste(namarg[is.na(m)], collapse = ", "))) } else nam[m] <- as.character(args) nam } frename_core <- function(.x, cols, .nse, ...) { args <- if(.nse) substitute(c(...))[-1L] else c(...) nam <- attr(.x, "names") namarg <- names(args) if(length(namarg) && all(nzchar(namarg))) return(repl_nam_arg(namarg, args, nam)) # The second condition is needed for a function with additional arguments to be passed. arg1 <- ..1 if(length(cols)) ind <- cols2int(cols, .x, nam) if(is.function(arg1)) { FUN <- if(...length() == 1L) arg1 else # could do special case if ...length() == 2L function(x) do.call(arg1, c(list(x), list(...)[-1L])) if(is.null(cols)) return(FUN(nam)) nam[ind] <- FUN(nam[ind]) } else if(is.character(arg1)) { if(is.null(cols)) { if(length(namarg <- names(arg1))) return(repl_nam_arg(namarg, arg1, nam)) if(length(arg1) != length(nam)) stop(sprintf("If cols = NULL, the vector or names length = %i must match the object names length = %i.", length(arg1), length(nam))) return(arg1) } if(length(arg1) != length(ind)) stop(sprintf("The vector of names length = %s does not match the number of columns selected = %s.", length(arg1), length(ind))) nam[ind] <- arg1 } else stop("... needs to be expressions colname = newname, a function to apply to the names of columns in cols, or a suitable character vector of names.") return(nam) } frename <- function(.x, ..., cols = NULL, .nse = TRUE) { attr(.x, "names") <- frename_core(.x, cols, .nse, ...) condalc(.x, inherits(.x, "data.table")) } rnm <- frename # rnm clashes with 2 packages.., rme would work but is inconsistent setrename <- function(.x, ..., cols = NULL, .nse = TRUE) { nam <- frename_core(.x, cols, .nse, ...) # No longer needed, as also calling setselfref() in C now. # if(inherits(.x, "data.table")) { # # Need to allocate here, because the named are captured in ".internal.selfref", so modification be reference still produces an error. # res <- alc(`attr<-`(.x, "names", nam)) # assign(as.character(substitute(.x)), res, envir = parent.frame()) # return(invisible(res)) # } invisible(.Call(C_setnames, .x, nam)) } # setrnm <- setrename relabel <- function(.x, ..., cols = NULL, attrn = "label") { # , sc = TRUE args <- list(...) nam <- attr(.x, "names") namarg <- names(args) if(is.null(namarg) || !all(nzchar(namarg))) { # The second condition is needed for a function with additional arguments to be passed. arg1 <- args[[1L]] if(length(cols)) ind <- cols2int(cols, .x, nam) if(is.function(arg1)) { lab <- vlabels(.x, attrn, FALSE) FUN <- if(length(args) == 1L) arg1 else function(x) do.call(arg1, c(list(x), args[-1L])) if(is.null(cols)) return(.Call(C_setvlabels, .x, attrn, FUN(lab), NULL)) args <- FUN(lab[ind]) } else if(is.character(arg1)) { if(is.null(cols)) ind <- if(length(names(arg1))) ckmatch(names(arg1), nam) else NULL args <- arg1 } else stop("... needs to be expressions colname = 'New Label', a function to apply to the names of columns in cols, or a suitable character vector of labels.") } else ind <- ckmatch(namarg, nam) .Call(C_setvlabels, .x, attrn, args, ind) } setrelabel <- function(.x, ..., cols = NULL, attrn = "label") invisible(relabel(.x, ..., cols = cols, attrn = attrn)) collapse/R/fprod.R0000644000176200001440000001416514777170130013530 0ustar liggesusers # For foundational changes to this code see fsum.R fprod <- function(x, ...) UseMethod("fprod") # , x fprod.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fprod.matrix(x, g, w, TRA, na.rm, use.g.names, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fprod,x,0L,0L,w,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fprod,x,length(lev),g,w,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_fprod,x,fnlevels(g),g,w,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fprod,x,attr(g,"N.groups"),g,w,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fprod,x,g[[1L]],g[[2L]],w,na.rm), GRPnames(g))) return(.Call(C_fprod,x,g[[1L]],g[[2L]],w,na.rm)) } if(is.null(g)) return(TRAC(x,.Call(C_fprod,x,0L,0L,w,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_fprod,x,g[[1L]],g[[2L]],w,na.rm),g[[2L]],TRA, ...) } fprod.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fprodm,x,0L,0L,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fprodm,x,length(lev),g,w,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fprodm,x,fnlevels(g),g,w,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fprodm,x,attr(g,"N.groups"),g,w,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fprodm,x,g[[1L]],g[[2L]],w,na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fprodm,x,g[[1L]],g[[2L]],w,na.rm,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(C_fprodm,x,0L,0L,w,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_fprodm,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TRA, ...) } fprod.zoo <- function(x, ...) if(is.matrix(x)) fprod.matrix(x, ...) else fprod.default(x, ...) fprod.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fprod.matrix(x, ...), x) else fprod.default(x, ...) fprod.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fprodl,x,0L,0L,w,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fprodl,x,length(lev),g,w,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fprodl,x,fnlevels(g),g,w,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fprodl,x,attr(g,"N.groups"),g,w,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE), groups)) return(.Call(C_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE)) } if(is.null(g)) return(TRAlC(x,.Call(C_fprodl,x,0L,0L,w,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TRA, ...) } fprod.list <- function(x, ...) fprod.data.frame(x, ...) fprod.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) prodw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) prodw <- `names<-`(list(.Call(C_fprod,w,g[[1L]],g[[2L]],NULL,na.rm)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "prod.")) else if(keep.group_vars) gn2 <- gn else prodw <- gn2 <- wn } } } gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(prodw), nam[-gn]) return(setAttributes(c(g[[4L]], prodw, .Call(C_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } ax[["names"]] <- c(names(prodw), nam[-gn]) return(setAttributes(c(prodw, .Call(C_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE)), ax)) } else return(setAttributes(.Call(C_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE), ax)) } else if(keep.group_vars || (keep.w && length(prodw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(C_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fprodl,x[-gn],g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fprodl,x,g[[1L]],g[[2L]],w,na.rm,FALSE),g[[2L]],TRA, ...)) } collapse/R/fFtest.R0000644000176200001440000001211014777170130013635 0ustar liggesusers getdf <- function(x) { if(is.atomic(x)) if(is.factor(x)) return(fnlevels(x)-1L) else return(1L) bsum(vapply(unattrib(x), function(i) if(is.factor(i)) fnlevels(i)-1L else 1L, 1L)) } fFtest <- function(...) if(is.call(..1) || is.call(..2)) fFtest.formula(...) else fFtest.default(...) fFtest.default <- function(y, exc, X = NULL, w = NULL, full.df = TRUE, ...) { if(!is.numeric(y)) stop("y needs to be a numeric vector") if(!is.null(X)) { Xn <- fNCOL(X) atl <- is.atomic(X) && is.numeric(X) && is.atomic(exc) && is.numeric(exc) if(length(w)) { if(atl) { cc <- which(complete.cases(w, y, X, exc)) if(length(cc) < length(w)) { data <- cbind(y, X, exc)[cc, , drop = FALSE] w <- w[cc] } } else { data <- na_omit(qDF(c(list(w = w), list(y = y), qDF(X), qDF(exc)))) w <- .subset2(data, 1L) data[[1L]] <- NULL } } else { data <- if(atl) na_omit(cbind(y, X, exc)) else na_omit(qDF(c(list(y = y), qDF(X), qDF(exc)))) } if(full.df && !atl && any(fc <- .Call(C_vtypes, data, 2L))) { # vapply(unattrib(data), is.factor, TRUE) cld <- oldClass(data) oldClass(data) <- NULL data[fc] <- lapply(data[fc], fdroplevels.factor) df <- vapply(unattrib(data), function(i) if(is.factor(i)) fnlevels(i)-1L else 1L, 1L) # getdf(data) k <- bsum(df) # 1 for intercept added with y p <- bsum(df[(Xn+2L):length(df)]) y <- data[[1L]] oldClass(data) <- cld } else { p <- fNCOL(exc) if(atl) { k <- ncol(data) # 1 for intercept added with y y <- data[, 1L] } else { k <- length(unclass(data)) # 1 for intercept added with y y <- .subset2(data, 1L) } } kr <- k-p-1L vy <- fvar.default(y, w = w) if(atl) { n <- nrow(data) r2f <- 1 - fvar.default(fhdwithin.default(y, data[, -1L], w, na.rm = FALSE, ...), w = w)/vy r2r <- 1 - fvar.default(fhdwithin.default(y, data[, 2:(Xn+1L)], w, na.rm = FALSE, ...), w = w)/vy } else { n <- fnrow(data) r2f <- 1 - fvar.default(fhdwithin.default(y, fcolsubset(data, -1L), w, na.rm = FALSE, ...), w = w)/vy r2r <- 1 - fvar.default(fhdwithin.default(y, fcolsubset(data, 2:(Xn+1L)), w, na.rm = FALSE, ...), w = w)/vy } ndff <- k-1L ddff <- n-k Fstatf <- r2f/ndff * ddff/(1-r2f) pf <- pf(Fstatf, ndff, ddff, lower.tail = FALSE) ddfr <- n-kr-1L Fstatr <- r2r/kr * ddfr/(1-r2r) pr <- pf(Fstatr, kr, ddfr, lower.tail = FALSE) Fstate <- (r2f - r2r)/p * ddff/(1-r2f) pe <- pf(Fstate, p, ddff, lower.tail = FALSE) res <- matrix(c(r2f, ndff, ddff, Fstatf, pf, r2r, kr, ddfr, Fstatr, pr, r2f-r2r, p, ddff, Fstate, pe), nrow = 3L, ncol = 5L, byrow = TRUE, dimnames = list(c("Full Model","Restricted Model","Exclusion Rest."), c("R-Sq.","DF1","DF2","F-Stat.","P-Value"))) oldClass(res) <- c("fFtest","matrix") } else { u <- fhdwithin.default(y, exc, w, na.rm = .op[["na.rm"]], ...) # Residuals miss <- attr(u, "na.rm") if(!is.null(miss)) w <- w[-miss] if(full.df && length(miss) && !is.atomic(exc) && !is.numeric(exc)) { p <- if(is.factor(exc)) fnlevels(exc[-miss, drop = TRUE])-1L else if(any(.Call(C_vtypes, exc, 2L))) # vapply(unattrib(exc), is.factor, TRUE) getdf(fdroplevels.data.frame(ss(exc, -miss))) else length(unclass(exc)) } else if(full.df) { p <- if(is.factor(exc) || (is.list(exc) && any(.Call(C_vtypes, exc, 2L)))) getdf(fdroplevels(exc)) else fNCOL(exc) # vapply(unattrib(exc), is.factor, TRUE) } else p <- fNCOL(exc) n <- length(u) r2 <- 1 - fvar.default(u, w = w)/fvar.default(if(is.null(miss)) y else y[-miss], w = w) # R-Squared ddf <- n-p-1L Fstat <- r2/p * ddf/(1-r2) # F statistic for the model (the constant goes unrestricted) Pv <- pf(Fstat, p, ddf, lower.tail = FALSE) # P-value corresponding to the F statistic res <- c(`R-Sq.` = r2, `DF1` = p, `DF2` = ddf, `F-Stat.` = Fstat, `P-value` = Pv) oldClass(res) <- "fFtest" } res } fFtest.formula <- function(formula, data = NULL, weights = NULL, ...) { w <- substitute(weights) pe <- parent.frame() if(length(w)) w <- eval(w, data, pe) if(!any(all.names(formula) == "|")) { # Standard formula (no X term) tms <- attributes(terms.formula(formula, data = data)) mf <- eval(tms$variables, data, pe) exc <- mf[-1L] names(exc) <- tms$term.labels return(fFtest.default(mf[[1L]], exc, NULL, w, ...)) } y <- eval(formula[[2L]], data, pe) fml <- formula[[3L]] exc <- attributes(terms.formula(call("~", fml[[2L]]), data = data)) exc <- eval(exc$variables, data, pe) X <- attributes(terms.formula(call("~", fml[[3L]]), data = data)) X <- eval(X$variables, data, pe) fFtest.default(y, exc, X, w, ...) } print.fFtest <- function(x, digits = .op[["digits"]] + 1L, ...) { xx <- unclass(format(round(x, digits))) xpos <- x >= 1 xx[xpos] <- sub(paste0(c(".", rep("0",digits)), collapse = ""), "", xx[xpos]) # Problem: Deletes .00 also.. print.default(xx, quote = FALSE, right = TRUE, ...) } collapse/R/global_macros.R0000644000176200001440000004067715016652026015225 0ustar liggesusers # Global Options set_collapse <- function(...) { opts <- if(...length() == 1L && is.list(..1)) ..1 else list(...) op_old <- as.list(.op) nam <- names(opts) ckmatch(nam, c("nthreads", "na.rm", "sort", "stable.algo", "mask", "remove", "stub", "verbose", "digits"), e = "Unknown option:") if(length(opts$nthreads)) { nthreads <- as.integer(opts$nthreads) if(is.na(nthreads) || nthreads <= 0L) stop("nthreads needs to be a positive integer") .op$nthreads <- nthreads } if(length(opts$na.rm)) { na.rm <- as.logical(opts$na.rm) if(is.na(na.rm)) stop("na.rm needs to be TRUE or FALSE") .op$na.rm <- na.rm } if(length(opts$sort)) { sort <- as.logical(opts$sort) if(is.na(sort)) stop("sort needs to be TRUE or FALSE") .op$sort <- sort } if(length(opts$stable.algo)) { stable.algo <- as.logical(opts$stable.algo) if(is.na(stable.algo)) stop("stable.algo needs to be TRUE or FALSE") .op$stable.algo <- stable.algo } if(length(opts$stub)) { stub <- as.logical(opts$stub) if(is.na(stub)) stop("stub needs to be TRUE or FALSE") .op$stub <- stub } if(length(opts$verbose)) { verbose <- as.integer(opts$verbose) if(is.na(verbose) || verbose < 0L) stop("verbose needs to be a non-negative integer") .op$verbose <- verbose } if(length(opts$digits)) { digits <- as.integer(opts$digits) if(is.na(digits) || digits < 0L) stop("digits needs to be a non-negative integer") .op$digits <- digits } if(any(mrl <- c("mask", "remove") %in% nam)) { # either can be NULL maskl <- mrl[1L] && !identical(op_old$mask, opts$mask) removel <- mrl[2L] && !identical(op_old$remove, opts$remove) if(maskl || removel) { clpns <- getNamespace("collapse") .Call(C_unlock_collapse_namespace, clpns) if(!maskl) opts$mask <- op_old$mask # problem: option remove does not restore masked exports, e.g. when moving from remove = "between" to remove = NULL when mask = "all" (and not changing) if(maskl && length(op_old$mask)) do_collapse_unmask(clpns) # Fixed in do_collapse_mask(): not overriding already masked function in namespace anymore if(length(opts$mask)) do_collapse_mask(clpns, opts$mask) .op$mask <- opts$mask if(removel || (maskl && length(op_old$remove))) { # When changing mask setting also need to change remove again if specified if(!removel) opts$remove <- op_old$remove if(removel && length(op_old$remove)) do_collapse_restore_exports(clpns) # Also adjusted do_collapse_remove() to only remove existing funs if(length(opts$remove)) do_collapse_remove(clpns, opts$remove, namespace = FALSE) .op$remove <- opts$remove } lockEnvironment(clpns, bindings = TRUE) if(anyv(search(), "package:collapse")) { detach("package:collapse") suppressPackageStartupMessages(attachNamespace(clpns)) } } } invisible(op_old) } get_collapse <- function(opts = NULL) if(is.null(opts)) as.list(.op) else if(length(opts) == 1L) .op[[opts]] else `names<-`(lapply(opts, function(x) .op[[x]]), opts) # Global Macros .COLLAPSE_TOPICS <- c("collapse-documentation","fast-statistical-functions","fast-grouping-ordering", "fast-data-manipulation","quick-conversion","advanced-aggregation", "data-transformations","time-series-panel-series","list-processing", "summary-statistics","recode-replace","efficient-programming","small-helpers","collapse-options") # .COLLAPSE_TOPICS <- c("collapse-documentation","A1-fast-statistical-functions","A2-fast-grouping-ordering", # "A3-fast-data-manipulation","A4-quick-conversion","A5-advanced-aggregation", # "A6-data-transformations","A7-time-series-panel-series","A8-list-processing", # "A9-summary-statistics","AA1-recode-replace","AA2-efficient-programming","AA3-small-helpers") # rd <- tools::Rd_db("collapse") # .COLLAPSE_HELP <- unlist(lapply(rd, tools:::.Rd_get_metadata, "name"), use.names = FALSE) # grep("^A|depreciated", unlist(lapply(rd, tools:::.Rd_get_metadata, "name"), use.names = FALSE), invert = TRUE, value = TRUE) # # Get updated .COLLAPSE_ALL: # # ".default$|.matrix$|.data.frame$" # v <- grep("\\.|N|HD", objects("package:collapse"), invert = TRUE, value = TRUE) # getNamespaceExports("collapse") # # grep("N", objects("package:collapse"), value = TRUE) # v <- c(v, "GRPN", "GRPid", "HDB", "HDW", "allNA", "whichNA", "replace_NA") # # TODO: also remove Date_vars... # cat(unique(sort(v)), sep = '", "') # all package objects.. # allobj <- ls(getNamespace("collapse"), all.names=TRUE) # dput(setdiff(objects("package:collapse"), .COLLAPSE_DATA)) .COLLAPSE_ALL_EXPORTS <- c("%-=%", "%!=%", "%!iin%", "%!in%", "%*=%", "%/=%", "%+=%", "%=%", "%==%", "%c-%", "%c*%", "%c/%", "%c+%", "%cr%", "%iin%", "%r-%", "%r*%", "%r/%", "%r+%", "%rr%", "add_stub", "add_vars", "add_vars<-", "all_funs", "all_identical", "all_obj_equal", "allNA", "alloc", "allv", "any_duplicated", "anyv", "as_character_factor", "as_factor_GRP", "as_factor_qG", "as_numeric_factor", "as_integer_factor", "atomic_elem", "atomic_elem<-", "av", "av<-", "B", "BY", "BY.data.frame", "BY.default", "BY.matrix", "cat_vars", "cat_vars<-", "char_vars", "char_vars<-", "cinv", "ckmatch", "collap", "collapg", "collapv", "colorder", "colorderv", "copyAttrib", "copyMostAttrib", "copyv", "D", "dapply", "date_vars", "date_vars<-", "descr", "descr.default", "Dlog", "fact_vars", "fact_vars<-", "fbetween", "fbetween.data.frame", "fbetween.default", "fbetween.matrix", "fcompute", "fcomputev", "fcount", "fcountv", "fcumsum", "fcumsum.data.frame", "fcumsum.default", "fcumsum.matrix", "fdiff", "fdiff.data.frame", "fdiff.default", "fdiff.matrix", "fdim", "fdist", "fdroplevels", "fdroplevels.data.frame", "fdroplevels.factor", "fduplicated", "ffirst", "ffirst.data.frame", "ffirst.default", "ffirst.matrix", "fFtest", "fFtest.default", "fgroup_by", "group_by_vars", "fgroup_vars", "fgrowth", "fgrowth.data.frame", "fgrowth.default", "fgrowth.matrix", "fhdbetween", "fHDbetween", "fhdbetween.data.frame", "fhdbetween.default", "fhdbetween.matrix", "fhdwithin", "fHDwithin", "fhdwithin.data.frame", "fhdwithin.default", "fhdwithin.matrix", "findex", "findex_by", "finteraction", "flag", "flag.data.frame", "flag.default", "flag.matrix", "flast", "flast.data.frame", "flast.default", "flast.matrix", "flm", "flm.default", "fmatch", "fmax", "fmax.data.frame", "fmax.default", "fmax.matrix", "fmean", "fmean.data.frame", "fmean.default", "fmean.matrix", "fmedian", "fmedian.data.frame", "fmedian.default", "fmedian.matrix", "fmin", "fmin.data.frame", "fmin.default", "fmin.matrix", "fmode", "fmode.data.frame", "fmode.default", "fmode.matrix", "fmutate", "fncol", "fndistinct", "fNdistinct", "fndistinct.data.frame", "fndistinct.default", "fndistinct.matrix", "fnlevels", "fnobs", "fNobs", "fnobs.data.frame", "fnobs.default", "fnobs.matrix", "fnrow", "fnth", "fnth.data.frame", "fnth.default", "fnth.matrix", "fnunique", "fprod", "fprod.data.frame", "fprod.default", "fprod.matrix", "fquantile", "frange", "frename", "fscale", "fscale.data.frame", "fscale.default", "fscale.matrix", "fsd", "fsd.data.frame", "fsd.default", "fsd.matrix", "fselect", "fselect<-", "fsubset", "fsubset.data.frame", "fsubset.default", "fsubset.matrix", "fslice", "fslicev", "fsum", "fsum.data.frame", "fsum.default", "fsum.matrix", "fsummarise", "fsummarize", "ftransform", "ftransform<-", "ftransformv", "fungroup", "funique", "funique.data.frame", "funique.default", "fvar", "fvar.data.frame", "fvar.default", "fvar.matrix", "fwithin", "fwithin.data.frame", "fwithin.default", "fwithin.matrix", "G", "gby", "get_collapse", "get_elem", "get_vars", "get_vars<-", "greorder", "group", "groupv", "groupid", "GRP", "GRP.default", "GRPid", "GRPN", "GRPnames", "gsplit", "gv", "gv<-", "gvr", "gvr<-", "has_elem", "HDB", "HDW", "iby", "irreg_elem", "is_categorical", "is_date", "is_GRP", "is_irregular", "is_qG", "is_unlistable", "itn", "ix", "join", "L", "ldepth", "list_elem", "list_elem<-", "logi_vars", "logi_vars<-", "massign", "mctl", "missing_cases", "mrtl", "mtt", "na_insert", "na_omit", "na_rm", "na_locf", "na_focb", "namlab", "num_vars", "num_vars<-", "nv", "nv<-", "pad", "pivot", "plot.psmat", "print.pwcor", "print.pwcov", "print.qsu", "psacf", "psacf.data.frame", "psacf.default", "psccf", "psccf.default", "psmat", "psmat.data.frame", "psmat.default", "pspacf", "pspacf.data.frame", "pspacf.default", "pwcor", "pwcov", "pwnobs", "qDF", "qDT", "qF", "qG", "qM", "qsu", "qsu.data.frame", "qsu.default", "qsu.matrix", "qtab", "qtable", "qTBL", "radixorder", "radixorderv", "rapply2d", "recode_char", "recode_num", "reg_elem", "reindex", "relabel", "replace_inf", "replace_Inf", "replace_na", "replace_NA", "replace_outliers", "rm_stub", "rnm", "rowbind", "roworder", "roworderv", "rsplit", "rsplit.data.frame", "rsplit.default", "rsplit.matrix", "sbt", "seq_col", "seq_row", "seqid", "set_collapse", "setattrib", "setAttrib", "setColnames", "setDimnames", "setLabels", "setop", "setrelabel", "setrename", "setRownames", "settfm", "settfmv", "setTRA", "settransform", "settransformv", "setv", "slt", "slt<-", "smr", "ss", "STD", "t_list", "tfm", "tfm<-", "tfmv", "timeid", "to_plm", "TRA", "TRA.data.frame", "TRA.default", "TRA.matrix", "unattrib", "unindex", "unlist2d", "varying", "varying.data.frame", "varying.default", "varying.matrix", "vclasses", "vec", "vgcd", "vlabels", "vlabels<-", "vlengths", "vtypes", "W", "whichNA", "whichv") .COLLAPSE_ALL <- sort(unique(c("%-=%", "%!=%", "%!iin%", "%!in%", "%*=%", "%/=%", "%+=%", "%=%", "%==%", "%c-%", "%c*%", "%c/%", "%c+%", "%cr%", "%iin%", "%r-%", "%r*%", "%r/%", "%r+%", "%rr%", "add_stub", "add_vars", "add_vars<-", "all_funs", "all_identical", "all_obj_equal", "allNA", "alloc", "allv", "any_duplicated", "anyv", "as_character_factor", "as_factor_GRP", "as_factor_qG", "as_numeric_factor", "as_integer_factor", "atomic_elem", "atomic_elem<-", "av", "av<-", "B", "BY", "cat_vars", "cat_vars<-", "char_vars", "char_vars<-", "cinv", "ckmatch", "collap", "collapg", "collapv", "colorder", "colorderv", "copyAttrib", "copyMostAttrib", "copyv", "D", "dapply", "date_vars", "date_vars<-", "descr", "Dlog", "fact_vars", "fact_vars<-", "fbetween", "fcompute", "fcomputev", "fcount", "fcountv", "fcumsum", "fdiff", "fdim", "fdist", "fdroplevels", "fduplicated", "ffirst", "fFtest", "fgroup_by", "group_by_vars", "fgroup_vars", "fgrowth", "fhdbetween", "fhdwithin", "findex", "findex_by", "finteraction", "flag", "flast", "flm", "fmatch", "fmax", "fmean", "fmedian", "fmin", "fmode", "fmutate", "fncol", "fndistinct", "fnlevels", "fnobs", "fnrow", "fnth", "fnunique", "fprod", "fquantile", "frange", "frename", "fscale", "fsd", "fselect", "fselect<-", "fsubset", "fslice", "fslicev", "fsum", "fsummarise", "fsummarize", "ftransform", "ftransform<-", "ftransformv", "fungroup", "funique", "fvar", "fwithin", "G", "gby", "get_collapse", "get_elem", "get_vars", "get_vars<-", "GGDC10S", "greorder", "group", "groupv", "groupid", "GRP", "GRPid", "GRPN", "GRPnames", "gsplit", "gv", "gv<-", "gvr", "gvr<-", "has_elem", "HDB", "HDW", "iby", "irreg_elem", "is_categorical", "is_date", "is_GRP", "is_irregular", "is_qG", "is_unlistable", "itn", "ix", "join", "L", "ldepth", "list_elem", "list_elem<-", "logi_vars", "logi_vars<-", "massign", "mctl", "missing_cases", "mrtl", "mtt", "na_insert", "na_omit", "na_rm", "na_locf", "na_focb", "namlab", "num_vars", "num_vars<-", "nv", "nv<-", "pad", "pivot", "psacf", "psccf", "psmat", "pspacf", "pwcor", "pwcov", "pwnobs", "qDF", "qDT", "qF", "qG", "qM", "qsu", "qtab", "qtable", "qTBL", "radixorder", "radixorderv", "rapply2d", "recode_char", "recode_num", "reg_elem", "reindex", "relabel", "replace_inf", "replace_na", "replace_outliers", "rm_stub", "rnm", "rowbind", "roworder", "roworderv", "rsplit", "sbt", "seq_col", "seq_row", "seqid", "set_collapse", "setattrib", "setAttrib", "setColnames", "setDimnames", "setLabels", "setop", "setrelabel", "setrename", "setRownames", "settfm", "settfmv", "setTRA", "settransform", "settransformv", "setv", "slt", "slt<-", "smr", "ss", "STD", "t_list", "tfm", "tfm<-", "tfmv", "timeid", "to_plm", "TRA", "unattrib", "unindex", "unlist2d", "varying", "vclasses", "vec", "vgcd", "vlabels", "vlabels<-", "vlengths", "vtypes", "W", "whichNA", "whichv", "wlddev"))) .COLLAPSE_GENERIC <- sort(unique(c("B","BY","D","Dlog","fsubset","fbetween","fdiff","ffirst","fgrowth","fhdbetween", "fhdwithin","flag","flast","fmax","fmean","fmedian","fnth","fmin","fmode","varying", "fndistinct","fnobs","fprod","fscale","fsd","fsum","fcumsum","fvar","fwithin","funique", "G","GRP","HDB","HDW","L","psacf","psccf","psmat","pspacf","qsu", "rsplit","fdroplevels", "STD","TRA","W", "descr"))) .COLLAPSE_DATA <- c("GGDC10S", "wlddev") .FAST_FUN <- c("fmean","fmedian","fmode","fsum","fprod","fsd","fvar", "fmin","fmax","fnth","ffirst","flast","fnobs","fndistinct", "fcumsum","fscale","fbetween","fwithin","fhdbetween","fhdwithin", "flag","fdiff","fgrowth") .FAST_STAT_FUN <- c("fmean","fmedian","fmode","fsum","fprod","fsd","fvar", "fmin","fmax","fnth","ffirst","flast","fnobs","fndistinct") .OPERATOR_FUN <- c("STD","B","W","HDB","HDW","L","F","D","Dlog","G") .SHORTHANDS <- c("gv", "gv<-", "av", "av<-", "nv", "nv<-", "gvr", "gvr<-", "itn", "ix", "slt", "slt<-", "sbt", "gby", "iby", "mtt", "smr", "tfm", "tfmv", "tfm<-", "settfm", "settfmv", "rnm") .COLLAPSE_OLD <- c("fNobs", "fNdistinct", "fHDwithin", "fHDbetween", "replace_NA", "replace_Inf") .FAST_STAT_FUN_POLD <- c(.FAST_STAT_FUN, "fNobs","fNdistinct", "GRPN", "GRPid") # "n" .FAST_FUN_MOPS <- c(.FAST_STAT_FUN_POLD, "fcumsum","fscale","fbetween","fwithin", "flag","fdiff","fgrowth","STD","B","W","L","F","D","Dlog","G") .FAST_STAT_FUN_EXT <- c(.FAST_STAT_FUN_POLD, paste0(setdiff(.FAST_STAT_FUN_POLD, c("GRPN", "GRPid")), "_uw")) # "n" collapse/R/fsum.R0000644000176200001440000001530614777170130013366 0ustar liggesusers fsum <- function(x, ...) UseMethod("fsum") # , x fsum.default <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, fill = FALSE, nthreads = .op[["nthreads"]], ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fsum.matrix(x, g, w, TRA, na.rm, use.g.names, fill = fill, nthreads = nthreads, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fsum,x,0L,0L,w,na.rm,fill,nthreads)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fsum,x,length(lev),g,w,na.rm,fill,nthreads), lev)) } if(is.nmfactor(g)) return(.Call(C_fsum,x,fnlevels(g),g,w,na.rm,fill,nthreads)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fsum,x,attr(g,"N.groups"),g,w,na.rm,fill,nthreads)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fsum,x,g[[1L]],g[[2L]],w,na.rm,fill,nthreads), GRPnames(g))) return(.Call(C_fsum,x,g[[1L]],g[[2L]],w,na.rm,fill,nthreads)) } if(is.null(g)) return(TRAC(x,.Call(C_fsum,x,0L,0L,w,na.rm,fill,nthreads),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_fsum,x,g[[1L]],g[[2L]],w,na.rm,fill,nthreads),g[[2L]],TRA, ...) } fsum.matrix <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, fill = FALSE, nthreads = .op[["nthreads"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fsumm,x,0L,0L,w,na.rm,fill,drop,nthreads)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fsumm,x,length(lev),g,w,na.rm,fill,FALSE,nthreads), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fsumm,x,fnlevels(g),g,w,na.rm,fill,FALSE,nthreads)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fsumm,x,attr(g,"N.groups"),g,w,na.rm,fill,FALSE,nthreads)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fsumm,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fsumm,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads)) } if(is.null(g)) return(TRAmC(x,.Call(C_fsumm,x,0L,0L,w,na.rm,fill,TRUE,nthreads),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_fsumm,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads),g[[2L]],TRA, ...) } fsum.zoo <- function(x, ...) if(is.matrix(x)) fsum.matrix(x, ...) else fsum.default(x, ...) fsum.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fsum.matrix(x, ...), x) else fsum.default(x, ...) fsum.data.frame <- function(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, fill = FALSE, nthreads = .op[["nthreads"]], ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fsuml,x,0L,0L,w,na.rm,fill,drop,nthreads)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fsuml,x,length(lev),g,w,na.rm,fill,FALSE,nthreads), lev)) } if(is.nmfactor(g)) return(.Call(C_fsuml,x,fnlevels(g),g,w,na.rm,fill,FALSE,nthreads)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fsuml,x,attr(g,"N.groups"),g,w,na.rm,fill,FALSE,nthreads)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads), groups)) return(.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads)) } if(is.null(g)) return(TRAlC(x,.Call(C_fsuml,x,0L,0L,w,na.rm,fill,TRUE,nthreads),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads),g[[2L]],TRA, ...) } fsum.list <- function(x, ...) fsum.data.frame(x, ...) fsum.grouped_df <- function(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], fill = FALSE, nthreads = .op[["nthreads"]], ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE wsym <- substitute(w) nam <- attr(x, "names") gn2 <- gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) sumw <- NULL if(!is.null(wsym)) { w <- eval(wsym, x, parent.frame()) if(length(wn <- which(nam %in% all.vars(wsym)))) { if(any(gn %in% wn)) stop("Weights coincide with grouping variables!") gn <- c(gn, wn) if(keep.w) { if(nTRAl) sumw <- `names<-`(list(fsumC(w,g[[1L]],g[[2L]],NULL,na.rm,fill)), do_stub(stub, if(length(wsym) == 1L) as.character(wsym) else deparse(wsym), "sum.")) else if(keep.group_vars) gn2 <- gn else sumw <- gn2 <- wn } } } gl <- length(gn) > 0L # necessary here, not before ! if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], names(sumw), nam[-gn]) return(setAttributes(c(g[[4L]], sumw, .Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads)), ax)) } ax[["names"]] <- c(names(sumw), nam[-gn]) return(setAttributes(c(sumw, .Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads)), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]], .Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads)), ax)) } else return(setAttributes(.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads), ax)) } else if(keep.group_vars || (keep.w && length(sumw))) { ax[["names"]] <- c(nam[gn2], nam[-gn]) return(setAttributes(c(x[gn2],TRAlC(x[-gn],.Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fsuml,x[-gn],g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fsuml,x,g[[1L]],g[[2L]],w,na.rm,fill,FALSE,nthreads),g[[2L]],TRA, ...)) } collapse/R/fmin_fmax.R0000644000176200001440000002504514777170130014361 0ustar liggesusers # For foundational changes to this code see fsum.R !! fmin <- function(x, ...) UseMethod("fmin") # , x fmin.default <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fmin.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmin,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fmin,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_fmin,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmin,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fmin,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_fmin,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(TRAC(x,.Call(C_fmin,x,0L,0L,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_fmin,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...) } fmin.matrix <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fminm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fminm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fminm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fminm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fminm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fminm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(C_fminm,x,0L,0L,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_fminm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...) } fmin.zoo <- function(x, ...) if(is.matrix(x)) fmin.matrix(x, ...) else fmin.default(x, ...) fmin.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fmin.matrix(x, ...), x) else fmin.default(x, ...) fmin.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fminl,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fminl,x,length(lev),g,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fminl,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fminl,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE), groups)) return(.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(TRAlC(x,.Call(C_fminl,x,0L,0L,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...) } fmin.list <- function(x, ...) fmin.data.frame(x, ...) fmin.grouped_df <- function(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } else return(setAttributes(.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fminl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fminl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...)) } fmax <- function(x, ...) UseMethod("fmax") # , x fmax.default <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ...) { # if(is.matrix(x) && !inherits(x, "matrix")) return(fmax.matrix(x, g, TRA, na.rm, use.g.names, ...)) if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmax,x,0L,0L,na.rm)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`names<-`(.Call(C_fmax,x,length(lev),g,na.rm), lev)) } if(is.nmfactor(g)) return(.Call(C_fmax,x,fnlevels(g),g,na.rm)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmax,x,attr(g,"N.groups"),g,na.rm)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`names<-`(.Call(C_fmax,x,g[[1L]],g[[2L]],na.rm), GRPnames(g))) return(.Call(C_fmax,x,g[[1L]],g[[2L]],na.rm)) } if(is.null(g)) return(TRAC(x,.Call(C_fmax,x,0L,0L,na.rm),0L,TRA, ...)) g <- G_guo(g) TRAC(x,.Call(C_fmax,x,g[[1L]],g[[2L]],na.rm),g[[2L]],TRA, ...) } fmax.matrix <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmaxm,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(`dimnames<-`(.Call(C_fmaxm,x,length(lev),g,na.rm,FALSE), list(lev, dimnames(x)[[2L]]))) } if(is.nmfactor(g)) return(.Call(C_fmaxm,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmaxm,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names) return(`dimnames<-`(.Call(C_fmaxm,x,g[[1L]],g[[2L]],na.rm,FALSE), list(GRPnames(g), dimnames(x)[[2L]]))) return(.Call(C_fmaxm,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(TRAmC(x,.Call(C_fmaxm,x,0L,0L,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAmC(x,.Call(C_fmaxm,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...) } fmax.zoo <- function(x, ...) if(is.matrix(x)) fmax.matrix(x, ...) else fmax.default(x, ...) fmax.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(fmax.matrix(x, ...), x) else fmax.default(x, ...) fmax.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ...) { if(is.null(TRA)) { if(!missing(...)) unused_arg_action(match.call(), ...) if(is.null(g)) return(.Call(C_fmaxl,x,0L,0L,na.rm,drop)) if(is.atomic(g)) { if(use.g.names && !inherits(x, "data.table")) { if(!is.nmfactor(g)) g <- qF(g, na.exclude = FALSE) lev <- attr(g, "levels") return(setRnDF(.Call(C_fmaxl,x,length(lev),g,na.rm,FALSE), lev)) } if(is.nmfactor(g)) return(.Call(C_fmaxl,x,fnlevels(g),g,na.rm,FALSE)) g <- qG(g, na.exclude = FALSE) return(.Call(C_fmaxl,x,attr(g,"N.groups"),g,na.rm,FALSE)) } if(!is_GRP(g)) g <- GRP.default(g, return.groups = use.g.names, call = FALSE) if(use.g.names && !inherits(x, "data.table") && length(groups <- GRPnames(g))) return(setRnDF(.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE), groups)) return(.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE)) } if(is.null(g)) return(TRAlC(x,.Call(C_fmaxl,x,0L,0L,na.rm,TRUE),0L,TRA, ...)) g <- G_guo(g) TRAlC(x,.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...) } fmax.list <- function(x, ...) fmax.data.frame(x, ...) fmax.grouped_df <- function(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, ...) { g <- GRP.grouped_df(x, call = FALSE) if(is.null(g[[4L]])) keep.group_vars <- FALSE nam <- attr(x, "names") gn <- which(nam %in% g[[5L]]) nTRAl <- is.null(TRA) gl <- length(gn) > 0L if(gl || nTRAl) { ax <- attributes(x) attributes(x) <- NULL if(nTRAl) { if(!missing(...)) unused_arg_action(match.call(), ...) ax[["groups"]] <- NULL ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df")) ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]]) if(gl) { if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam[-gn]) return(setAttributes(c(g[[4L]],.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(g[[5L]], nam) return(setAttributes(c(g[[4L]],.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE)), ax)) } else return(setAttributes(.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE), ax)) } else if(keep.group_vars) { ax[["names"]] <- c(nam[gn], nam[-gn]) return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...)), ax)) } ax[["names"]] <- nam[-gn] return(setAttributes(TRAlC(x[-gn],.Call(C_fmaxl,x[-gn],g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...), ax)) } else return(TRAlC(x,.Call(C_fmaxl,x,g[[1L]],g[[2L]],na.rm,FALSE),g[[2L]],TRA, ...)) } collapse/R/dapply.R0000644000176200001440000000661314777170130013706 0ustar liggesusers dapply <- function(X, FUN, ..., MARGIN = 2, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame"), drop = TRUE) { rowwl <- switch(MARGIN, `1` = TRUE, `2` = FALSE, stop("MARGIN only supports 2 - columns or 1 - rows")) aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply if(is.atomic(X)) { dX <- dim(X) if(length(dX) != 2L) stop("dapply cannot handle vectors or higher-dimensional arrays") res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, X, FALSE, 0L), FUN, ...) else aplyfun(.Call(Cpp_mctl, X, FALSE, 0L), FUN, ...) lx1 <- .Call(C_fnrow, res) if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), dimnames(X)[[if(rowwl) 1L else 2L]])) switch(return[1L], same = { ax <- attributes(X) retmatl <- TRUE }, matrix = { ax <- list(dim = dX, dimnames = dimnames(X)) retmatl <- TRUE }, data.frame = { dn <- dimnames(X) ax <- list(names = dn[[2L]], row.names = if(is.null(dn[[1L]])) .set_row_names(dX[1L]) else dn[[1L]], class = "data.frame") retmatl <- FALSE }, stop("Unknown return option!")) } else { ax <- attributes(X) attributes(X) <- NULL res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, do.call(cbind, X), FALSE, 0L), FUN, ...) else aplyfun(X, FUN, ...) lx1 <- .Call(C_fnrow, res) if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), if(rowwl) charorNULL(ax[["row.names"]]) else ax[["names"]])) dX <- c(.Call(C_fnrow, X), length(X)) switch(return[1L], same = retmatl <- FALSE, matrix = { ax <- list(dim = dX, dimnames = list(charorNULL(ax[["row.names"]]), ax[["names"]])) retmatl <- TRUE }, data.frame = { ax <- list(names = ax[["names"]], row.names = if(is.null(ax[["row.names"]])) .set_row_names(dX[1L]) else ax[["row.names"]], class = "data.frame") retmatl <- FALSE }, stop("Unknown return option!")) } if(retmatl) { if(rowwl) { if(lx1 != dX[2L]) { ax[["dim"]][2L] <- lx1 ax[["dimnames"]] <- list(ax[["dimnames"]][[1L]], if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1))) } res <- matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE) } else { if(lx1 != dX[1L]) { ax[["dim"]][1L] <- lx1 ax[["dimnames"]] <- list(if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)), ax[["dimnames"]][[2L]]) } res <- do.call(cbind, res) } } else { if(rowwl) { if(lx1 != dX[2L]) ax[["names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L) deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)) res <- .Call(Cpp_mctl, matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE), FALSE, 0L) # definitely faster than do.call(rbind, X) } else if(lx1 != dX[1L]) ax[["row.names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else .set_row_names(lx1) # could also make deparse(substitute(FUN)), but that is not so typical for data.frames ! if(any(ax[["class"]] == "data.table")) return(alcSA(res, ax)) } setAttributes(res, ax) } collapse/R/descr.R0000644000176200001440000005260115015446753013516 0ustar liggesusers # Super fast tabulation of a single atomic vector, with various sorting options fsorttable <- function(x, srt, w = NULL) { if(is.factor(x)) { lev <- attr(x, "levels") t <- .Call(C_fwtabulate, x, w, length(lev), !inherits(x, "na.included")) # tabulate(x, nbins = length(lev)) # skips missing values !! names(t) <- lev sorted <- TRUE } else { sorted <- FALSE g <- .Call(C_groupat, x, TRUE, FALSE) # FALSE = keeps NA t <- .Call(C_fwtabulate, g, w, attr(g, "N.groups"), TRUE) # TRUE = check for NA's and skip them names(t) <- Csv(x, attr(g, "starts")) # This seems is slightly faster with not too many distinct values, but less straightforward # g <- .Call(C_group, x, TRUE, is.null(w)) # t <- if(is.null(w)) attr(g, "group.sizes") else # .Call(C_fwtabulate, g, w, attr(g, "N.groups"), FALSE) # nam <- Csv(x, attr(g, "starts")) # names(t) <- nam # if(anyNA(nam)) t <- t[-whichNA(nam)] } switch(srt, value = if(sorted || attr(o <- forder.int(names(t)), "sorted")) t else t[o], # "quick" sort seems best, based on multiple datasets, but "radix" (second best) keeps ties in order... # sort.int(t, method = "radix", decreasing = TRUE, na.last = TRUE) freq = if(attr(o <- forder.int(t, decreasing = TRUE), "sorted")) t else t[o], none = t, stop("sort.table must be one of 'value', 'freq' or 'none'")) } # Same for grouped data, building on qtab() sorttable2D <- function(x, f, srt, w = NULL) { if(is.factor(x)) sorted <- TRUE else { sorted <- switch(srt, value = TRUE, FALSE) x <- qF(x, sort = sorted) } t <- qtab(x, f, w = w, dnn = NULL) switch(srt, value = if(sorted || attr(o <- forder.int(dimnames(t)[[1L]]), "sorted")) t else t[o, , drop = FALSE], freq = if(attr(o <- forder.int(frowSums(t), decreasing = TRUE), "sorted")) t else t[o, , drop = FALSE], none = t, stop("sort.table must be one of 'value', 'freq' or 'none'")) } # Extended version including totals and transpose option: better do that in print! # sorttable2D <- function(x, f, srt, w = NULL, transpose = FALSE) { # if(is.factor(x)) sorted <- TRUE # else { # sorted <- switch(srt, value = TRUE, FALSE) # x <- qF(x, sort = sorted) # } # if(transpose) { # t <- qtab(f, x, w = w, dnn = NULL) # tot <- unattrib(fsummCcc(t)) # t <- rbind(t, Total = tot) # } else { # t <- qtab(x, f, w = w, dnn = NULL) # tot <- if(is.double(w)) frowSums(t) else as.integer(frowSums(t)) # t <- cbind(t, Total = tot) # } # switch(srt, # value = if(sorted || attr(o <- forder.int(dimnames(t)[[1L+transpose]]), "sorted")) t else if(transpose) t[, o, drop = FALSE] else t[o, , drop = FALSE], # freq = if(attr(o <- forder.int(tot, decreasing = TRUE), "sorted")) t else if(transpose) t[, o, drop = FALSE] else t[o, , drop = FALSE], # none = t, # stop("sort.table must be one of 'value', 'freq' or 'none'")) # } # X = wlddev; by = ~ income; w = ~ replace_NA(POP); # cols = NULL; Ndistinct = TRUE; higher = TRUE; table = TRUE; sort.table = "freq" # Qprobs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99); Qtype = 7L # label.attr = 'label'; stepwise = FALSE; nam = "wlddev"; dotsok = TRUE # fndistinctC = collapse:::fndistinctC; fsumC = collapse:::fsumC; # fsorttable = collapse:::fsorttable; frowSums = collapse:::frowSums # Expects X to be a plain list and nam the name of the dataset descr_core <- function(X, nam, by = NULL, w = NULL, Ndistinct = TRUE, higher = TRUE, table = TRUE, sort.table = "freq", Qprobs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99), Qtype = 7L, label.attr = "label", stepwise = FALSE, ...) { dotsok <- if(missing(...)) TRUE else names(substitute(c(...))[-1L]) %!in% c("pid", "g") # Checking for numeric data num <- .Call(C_vtypes, X, 1L) # vapply(unattrib(X), is.numeric, TRUE) Nnum <- bsum(num) # Define functions to process numeric data if(Nnum > 0L) { if(Ndistinct && dotsok) { armat <- if(is.null(by)) function(x, y) c(x[1L], Ndist = y, x[-1L]) else function(x, y) cbind(x[, 1L, drop = FALSE], Ndist = y, x[, -1L, drop = FALSE]) numstats <- function(x, ...) armat(qsu.default(x, by, w = w, higher = higher, ...), fndistinctC(x, by)) } else numstats <- function(x, ...) qsu.default(x, by, w = w, higher = higher, ...) quantiles <- if(is.null(by)) function(x) .quantile(x, Qprobs, w, type = Qtype, names = TRUE) else function(x) BY.default(x, by, .quantile, probs = Qprobs, w = w, type = Qtype, names = TRUE, expand.wide = TRUE) # This function will be applied to different columns. descrnum <- if(is.numeric(Qprobs)) function(x, ...) list(Class = class(x), Label = attr(x, label.attr), Stats = numstats(x, ...), Quant = quantiles(x)) else function(x, ...) list(Class = class(x), Label = attr(x, label.attr), Stats = numstats(x, ...)) } # Non-numeric data, assumed to have at least some categorical variables (could also be date) if(Nnum != length(num)) { if(table && !is.null(by)) { f <- as_factor_GRP(by) tabstats <- if(Ndistinct && is.null(w)) function(tab) cbind(N = fsummCcc(tab), Ndist = fsummCcc(tab > 0L)) else if(Ndistinct) function(tab) cbind(WeightSum = fsummCcc(tab), Ndist = fsummCcc(tab > 0L)) else if(is.null(w)) function(tab) cbind(N = fsummCcc(tab)) else function(tab) cbind(WeightSum = fsummCcc(tab)) descrcat <- function(x) { tab <- sorttable2D(x, f, sort.table, w) list(Class = class(x), Label = attr(x, label.attr), Stats = tabstats(tab), Table = tab) } } else if(table) { tabstats <- if(Ndistinct && is.null(w)) function(tab) c(N = fsumC(tab), Ndist = length(tab)) else if(Ndistinct) function(tab) c(WeightSum = fsumC(tab), Ndist = length(tab)) else if(is.null(w)) function(tab) `names<-`(fsumC(tab), "N") else function(tab) `names<-`(fsumC(tab), "WeightSum") descrcat <- function(x) { tab <- fsorttable(x, sort.table, w) list(Class = class(x), Label = attr(x, label.attr), Stats = tabstats(tab), Table = tab) } } else { descrcat <- function(x) list(Class = class(x), Label = attr(x, label.attr), Stats = if(Ndistinct) c(N = fnobsC(x), Ndist = fndistinctC(x)) else `names<-`(fnobsC(x), "N")) } } descrdate <- if(is.null(by)) function(x) list(Class = class(x), Label = attr(x, label.attr), Stats = `attr<-`(c(if(Ndistinct) c(N = fnobsC(x), Ndist = fndistinctC(x)) else `names<-`(fnobsC(x), "N"), `names<-`(.range(x), c("Min", "Max"))), "attrib", attributes(x))) else function(x) list(Class = class(x), Label = attr(x, label.attr), Stats = `attr<-`(cbind(N = fnobs.default(x, by), Ndist = if(Ndistinct) fndistinctC(x, by) else NULL, Min = fmin.default(x, by, na.rm = TRUE, use.g.names = FALSE), Max = fmax.default(x, by, na.rm = TRUE, use.g.names = FALSE)), "attrib", attributes(x))) # Result vector and attributes res <- vector('list', length(X)) ares <- list(names = names(X), name = nam, N = fnrow(X), arstat = !dotsok, table = table, groups = by, weights = w, class = "descr") # Computation if(stepwise) { # This means we compute one by one, mainly for printing... attributes(res) <- ares print(res, header = 2L) # Only header for(i in seq_along(X)) { invisible(readline(prompt = sprintf("Press [enter] for variable %s/%s or [esc] to exit", i, length(res)))) xi <- X[[i]] res[[i]] <- if(is.numeric(xi)) descrnum(xi, ...) else if(is_date(xi)) descrdate(xi) else descrcat(xi) print(res[i], header = FALSE) } } else { if(Nnum) res[num] <- lapply(X[num], descrnum, ...) if(Nnum != length(num)) { date <- vapply(unattrib(X), is_date, TRUE) if(any(date)) { res[date] <- lapply(X[date], descrdate) cat <- !(num | date) } else cat <- !num res[cat] <- lapply(X[cat], descrcat) } attributes(res) <- ares } return(if(stepwise) invisible(res) else res) } # Since v1.9.0, descr() is generic, with a grouped_df method descr <- function(X, ...) UseMethod("descr") descr.default <- function(X, by = NULL, w = NULL, cols = NULL, Ndistinct = TRUE, higher = TRUE, table = TRUE, sort.table = "freq", Qprobs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99), Qtype = 7L, label.attr = "label", stepwise = FALSE, ...) { # Getting input information nam <- l1orlst(as.character(substitute(X))) # Unclassing and (if necessary) transforming X if(is.list(X)) { is_sf <- inherits(X, "sf") # if(inherits(X, "POSIXlt")) X <- list(X = as.POSIXct(X)) if(inherits(X, "pdata.frame")) X <- unindex(X) class(X) <- NULL if(is_sf) X[[attr(X, "sf_column")]] <- NULL } else { if(inherits(X, "pseries")) X <- unindex(X) is_1D <- is.null(dim(X)) X <- unclass(qDF(X)) if(is_1D) names(X) <- nam } # Processing by and w arguments: inspired by qsu() if(is.call(by) || is.call(w)) { v <- NULL if(is.call(by)) { if(length(by) == 3L) { v <- ckmatch(all.vars(by[[2L]]), names(X)) byn <- ckmatch(all.vars(by[[3L]]), names(X)) } else byn <- ckmatch(all.vars(by), names(X)) by <- GRP.default(X, byn, call = FALSE) # , ... } else { if(!is.null(by)) by <- GRP.default(by, call = FALSE) # , ... byn <- NULL } if(is.call(w)) { widn <- ckmatch(all.vars(w), names(X)) w <- eval(w[[2L]], X, attr(w, ".Environment")) } else widn <- NULL X <- X[if(length(v)) v else if(is.null(cols)) -c(byn, widn) else cols2int(cols, X, names(X), FALSE)] } else { if(!is.null(by)) by <- GRP.default(by, call = FALSE) # , ... if(length(cols)) X <- X[cols2int(cols, X, names(X), FALSE)] } descr_core(X, nam, by, w, Ndistinct, higher, table, sort.table, Qprobs, Qtype, label.attr, stepwise, ...) } # Benefit of grouped_df method: better control on how data is grouped with fgroup_by(), selection with fselect() etc. descr.grouped_df <- function(X, w = NULL, Ndistinct = TRUE, higher = TRUE, table = TRUE, sort.table = "freq", Qprobs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99), Qtype = 7L, label.attr = "label", stepwise = FALSE, ...) { # Getting input information nam <- l1orlst(as.character(substitute(X))) wsym <- substitute(w) by <- GRP.grouped_df(X, call = FALSE) # Unclassing and (if necessary) transforming X is_sf <- inherits(X, "sf") if(inherits(X, "pdata.frame")) X <- unindex(X) class(X) <- NULL if(is_sf) X[[attr(X, "sf_column")]] <- NULL # Getting group indices byn <- which(names(X) %in% by[[5L]]) # Processing weights and combining indices with group indices if(!is.null(wsym)) { w <- eval(wsym, X, parent.frame()) # This allows w to be a function of multiple variables if(length(wn <- which(names(X) %in% all.vars(wsym)))) { if(any(byn %in% wn)) stop("Weights coincide with grouping variables!") byn <- c(byn, wn) } } if(length(byn)) X <- X[-byn] # Subsetting X descr_core(X, nam, by, w, Ndistinct, higher, table, sort.table, Qprobs, Qtype, label.attr, stepwise, ...) } # Methods ---------------------------------------------------------- `[.descr` <- function(x, ...) copyMostAttributes(.subset(x, ...), x) print_descr_default <- function(x, n = 14, perc = TRUE, digits = 2, t.table = TRUE, summary = TRUE, reverse = FALSE, stepwise = FALSE, header = TRUE, wsum = NULL) { w <- paste(rep("-", .Options$width), collapse = "") arstat <- attr(x, "arstat") DSname <- attr(x, "name") DSN <- attr(x, "N") wsuml <- !is.null(wsum) if(wsuml) { cb <- function(a, b) if(t.table) cbind(WeightSum = a, Perc = b) else formatC(rbind(WeightSum = a, Perc = b), drop0trailing = TRUE) ct <- function(z) if(t.table) cbind(WeightSum = z) else z } else { cb <- function(a, b) if(t.table) cbind(Freq = a, Perc = b) else formatC(rbind(Freq = a, Perc = b), drop0trailing = TRUE) ct <- function(z) if(t.table) cbind(Freq = z) else z } if(reverse) x <- rev.default(x) else if(header) { cat('Dataset: ', DSname,', ',length(x), ' Variables, N = ', DSN, if(wsuml) paste0(", WeightSum = ", wsum) else "", "\n", sep = "") cat(w, "\n", sep = "") } nam <- names(x) # Needs to be here if(header < 2L) for(i in seq_along(x)) { if(stepwise) invisible(readline(prompt = sprintf("Press [enter] for variable %s/%s or [esc] to exit", i, length(x)))) xi <- x[[i]] cat(nam[i], " (", strclp(xi[[1L]]), "): ", xi[[2L]], "\n", sep = "") stat <- xi[[3L]] TN <- if(wsuml && names(stat)[1L] == "WeightSum") wsum else DSN if(stat[[1L]] < TN) cat("Statistics (", round((1-stat[[1L]]/TN)*100, digits), "% NAs)\n", sep = "") else cat("Statistics\n") if(any(xi[[1L]] %in% c("Date", "POSIXct"))) print.default(c(stat[1:2], setNames(as.character(setAttributes(stat[3:4], attr(stat, "attrib"))), c("Min", "Max"))), quote = FALSE, right = TRUE, print.gap = 2) else print.qsu(stat, digits) if(length(xi) > 3L) { if(arstat) cat("\n") if(names(xi)[4L] == "Table") { cat("Table\n") t <- unclass(xi[[4L]]) if(length(t) <= n) { if(perc) print.default(cb(t, round(t/bsum(t)*100, digits)), right = TRUE, print.gap = 2, quote = FALSE) else print.table(ct(t), digits = digits) } else { t1 <- t[seq_len(n)] st <- bsum(t) rem <- `names<-`(st-bsum(t1), sprintf("... %s Others", length(t)-n)) if(perc) { pct <- unattrib(t1)/st*100 print.default(cb(c(t1, rem), round(c(pct, 100-bsum(pct)), digits)), right = TRUE, print.gap = 2, quote = FALSE) # cat("...\n") } else { print.table(ct(c(t1, rem)), digits = digits) # cat("...\n") } if(summary) { cat("\nSummary of Table", if(wsuml) "WeightSums\n" else "Frequencies\n") print.summaryDefault(summary.default(t), digits) } } } else { cat("Quantiles\n") print.qsu(xi[[4L]], digits) } } cat(w, "\n", sep = "") # More compressed -> better ! # cat("\n", w, "\n", sep = "") } if(reverse && header) cat('Dataset: ', DSname,', ',length(x), ' Variables, N = ', DSN, if(wsuml) paste0(", WeightSum = ", wsum) else "", "\n", sep = "") invisible(x) } print_descr_grouped <- function(x, n = 14, perc = TRUE, digits = 2, t.table = TRUE, summary = TRUE, total = TRUE, reverse = FALSE, stepwise = FALSE, header = TRUE, wsum = NULL) { w <- paste(rep("-", .Options$width), collapse = "") arstat <- attr(x, "arstat") DSname <- attr(x, "name") DSN <- attr(x, "N") g <- attr(x, "groups") wsuml <- !is.null(wsum) if(header) { gs <- g$group.sizes dim(gs) <- c(length(gs), 1L) dimnames(gs) <- list(GRPnames(g), "N") if(wsuml) gs <- cbind(gs, WeightSum = fsum(attr(x, "weights"), g, use.g.names = FALSE, fill = TRUE)) if(perc) { gs <- if(wsuml) cbind(gs, setColnames(round(fsum(gs, TRA = "%"), digits), c("Perc", "Perc")))[, c(1L, 3L, 2L, 4L)] else cbind(gs, Perc = round(fsum(drop(gs), TRA = "%"), digits)) } } if(reverse) x <- rev.default(x) else if(header) { cat('Dataset: ', DSname, ', ', length(x), ' Variables, N = ', DSN, if(wsuml) paste0(", WeightSum = ", wsum) else "", "\nGrouped by: ", paste(g$group.vars, collapse = ", "), " [", g$N.groups, "]\n", sep = "") print.qsu(gs, digits) cat(w, "\n", sep = "") } nam <- names(x) # Needs to be here if(header < 2L) for(i in seq_along(x)) { if(stepwise) invisible(readline(prompt = sprintf("Press [enter] for variable %s/%s or [esc] to exit", i, length(x)))) xi <- x[[i]] cat(nam[i], " (", strclp(xi[[1L]]),"): ", xi[[2L]], "\n", sep = "") stat <- xi[[3L]] Ni <- fsummCcc(stat[, 1L, drop = FALSE]) # to get the name TN <- if(wsuml && names(Ni) == "WeightSum") wsum else DSN if(Ni < TN) cat("Statistics (", names(Ni), " = ", Ni, ", ", round((1-Ni/TN)*100, digits), "% NAs)\n", sep = "") else cat("Statistics (", names(Ni), " = ", Ni, ")\n", sep = "") if(any(xi[[1L]] %in% c("Date", "POSIXct"))) { stat12 <- stat[, 1:2, drop = FALSE] if(perc) stat12 <- cbind(stat12[, 1L, drop = FALSE], Perc = round(stat12[, 1L]/bsum(stat12[, 1L])*100, digits), stat12[, 2L, drop = FALSE]) print.default(cbind(stat12, matrix(as.character(setAttributes(stat[, 3:4], attr(stat, "attrib"))), ncol = 2, dimnames = list(NULL, c("Min", "Max")))), quote = FALSE, right = TRUE, print.gap = 2) } else { if(perc) { if(wsuml && ncol(stat) > 4L) { # If weights and non-character ncolf <- 1:(2L + (dimnames(stat)[[2L]][2L] == "Ndist")) stat <- if(wsuml) cbind(stat[, ncolf, drop = FALSE], Perc = stat[, "WeightSum"]/bsum(stat[, "WeightSum"])*100, stat[, -ncolf, drop = FALSE]) } else { stat <- cbind(stat[, 1L, drop = FALSE], Perc = stat[, 1L]/bsum(stat[, 1L])*100, stat[, -1L, drop = FALSE]) } } print.qsu(stat, digits) } if(length(xi) > 3L) { # Table or quantiles if(names(xi)[4L] == "Table") { if(perc) cat("\nTable (", if(wsuml) "WeightSum" else "Freq", " Perc)\n", sep = "") else cat("\nTable\n") t <- qM(xi[[4L]]) if(total) t <- cbind(t, Total = if(is.integer(t)) as.integer(frowSums(t)) else frowSums(t)) if(nrow(t) <= n) { # TODO: revisit ! tab <- t if(perc) pct <- fsum.matrix(tab, TRA = "%", na.rm = FALSE, nthreads = 1L) } else { t1 <- t[seq_len(n), , drop = FALSE] st <- fsummCcc(t, drop = FALSE) rem <- st - fsummCcc(t1) dimnames(rem)[[1L]] <- sprintf("... %s Others", nrow(t)-n) tab <- rbind(t1, rem) if(perc) pct <- tab %r/% st * 100 # dimnames(tab)[[2L]] <- paste0(dimnames(tab)[[2L]], "\nFreq Perc") } if(perc) { tab <- duplAttributes(paste(format(tab, digits = digits, justify = "right"), format(pct, digits = digits, justify = "right")), tab) print.default(if(t.table) tab else t(tab), right = TRUE, print.gap = 2, quote = FALSE) } else print.table(if(t.table) tab else t(tab), digits = digits) if(summary && nrow(t) > n) { cat("\nSummary of Table", if(wsuml) "WeightSums\n" else "Frequencies\n") print.summaryDefault(summary.default(t), digits) } } else { cat("\nQuantiles\n") print.qsu(xi[[4L]], digits) } } cat(w, "\n", sep = "") } if(reverse && header) { cat("Grouped by: ", paste(g$group.vars, collapse = ", "), " [", g$N.groups, "]\n", sep = "") print.qsu(gs, digits) cat('\nDataset: ', DSname, ', ', length(x), ' Variables, N = ', DSN, if(wsuml) paste0(", WeightSum = ", wsum) else "", "\n", sep = "") } invisible(x) } print.descr <- function(x, n = 14, perc = TRUE, digits = .op[["digits"]], t.table = TRUE, total = TRUE, compact = FALSE, summary = !compact, reverse = FALSE, stepwise = FALSE, ...) { if(missing(...) || is.null(header <- list(...)$header)) header <- TRUE oldClass(x) <- NULL wsum <- if(is.null(weights <- attr(x, "weights"))) NULL else fsumC(weights) if(is.null(attr(x, "groups"))) { if(compact) x <- fdapply(x, function(z) if(is.null(z[["Quant"]])) z else c(z[1:2], list(Stats = c(z[[3L]], z[[4L]])))) return(print_descr_default(x, n, perc, digits, t.table, summary, reverse, stepwise, header, wsum)) } if(compact) x <- fdapply(x, function(z) if(is.null(z[["Quant"]])) z else c(z[1:2], list(Stats = cbind(z[[3L]], z[[4L]])))) print_descr_grouped(x, n, perc, digits, t.table, summary, total, reverse, stepwise, header, wsum) } # Note: This does not work for array stats (using g or pid.. ) as.data.frame.descr <- function(x, ..., gid = "Group") { if(attr(x, "arstat")) stop("Cannot handle arrays of statistics created by passing the pid or g arguments to qsu.default()!") g <- attr(x, "groups") # w <- attr(x, "weights") nam <- attr(x, "names") attributes(x) <- NULL # faster lapply if(is.null(g)) { r <- lapply(x, function(z) c(list(Class = strclp(z[[1L]]), Label = null2NA(z[[2L]])), as.vector(z[[3L]], "list"), if(is.null(quant <- z[["Quant"]])) NULL else as.vector(quant, "list"))) } else { gnam <- GRPnames(g) r <- lapply(x, function(z) c(list(Class = strclp(z[[1L]]), Label = null2NA(z[[2L]]), Group = gnam), .Call(Cpp_mctl, z[[3L]], TRUE, 0L), if(is.null(quant <- z[["Quant"]])) NULL else .Call(Cpp_mctl, quant, TRUE, 0L))) } names(r) <- nam r <- .Call(C_rbindlist, r, TRUE, TRUE, "Variable") if(!is.null(g) && gid[1L] != "Group") names(r)[4L] <- gid[1L] if(allNA(r[["Label"]])) r[["Label"]] <- NULL # if(length(w) && length(r[["WeightSum"]]) && length(r[["N"]])) { # Too complex... # nam <- c("WeightSum", "N", "Ndist") # ind <- match(nam, names(r)) # r[sort.int() ind] <- r[ind] # names(r)[ind] <- # } attr(r, "row.names") <- .set_row_names(.Call(C_fnrow, r)) class(r) <- "data.frame" r } collapse/vignettes/0000755000176200001440000000000015202627536014074 5ustar liggesuserscollapse/vignettes/collapse_documentation.Rmd0000644000176200001440000001477315202504365021301 0ustar liggesusers--- title: "collapse Documentation and Resources" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: vignette: > %\VignetteIndexEntry{collapse Documentation and Resources} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- *collapse* is a C/C++ based package for data transformation and statistical computing in R. It's aims are: 1. To facilitate complex data transformation, exploration and computing tasks in R. 2. To help make R code fast, flexible, parsimonious and programmer friendly. Documentation comes in 7 different forms: ## Built-In Structured Documentation After installing *collapse*, you can call `help("collapse-documentation")` which will produce a central help page providing a broad overview of the entire functionality of the package, including direct links to all function documentation pages and links to 13 further topical documentation pages (names in `.COLLAPSE_TOPICS`) describing how clusters of related functions work together. Thus *collapse* comes with a fully structured hierarchical documentation which you can browse within R - and that provides everything necessary to fully understand the package. The Documentation is also available [online](). The package page under `help("collapse-package")` provides some general information about the package and its design philosophy, as well as a compact set of examples covering important functionality. Reading `help("collapse-package")` and `help("collapse-documentation")` is the most comprehensive way to get acquainted with the package. `help("collapse-documentation")` is always the most up-to-date resource. ## DeepWiki [DeepWiki](https://deepwiki.com/) is an AI-powered platform designed to automatically generate structured, interactive documentation for software repositories, primarily on GitHub. Developed by [Cognition AI](https://cognition.ai/)—the same laboratory behind the autonomous AI engineer [Devin](https://devin.ai/)—it serves as a dynamic, "Wikipedia-like" encyclopedia for codebases. While not more comprehensive or accurate than the structured documentation, it is great to learn more about the internal structure of *collapse* and use a chatbot (Devin) to ask questions about or write code using *collapse*. You can access the *collapse* DeepWiki [here](https://deepwiki.com/fastverse/collapse). ## JSS Article The [collapse article](https://doi.org/10.18637/jss.v116.i01) is published in the [Journal of Statistical Software](https://www.jstatsoft.org/) (volume 116, issue 1). If you want to 'read something concise' about *collapse*, this is the best place to start. ## Cheatsheet A fairly up-to-date (v2.0) [cheatsheet]() compactly summarizes the package. ## Vignettes Updated vignettes are * [***collapse* for *tidyverse* Users**](): A quick introduction to *collapse* for *tidyverse* users * [***collapse* and *sf***](): Shows how collapse can be used to efficiently manipulate *sf* data frames * [***collapse*'s Handling of R Objects**](): A quick view behind the scenes of class-agnostic R programming * [**Developing with *collapse***](): How to write efficient statistical packages using R and *collapse* The other vignettes (only available [online]()) do not cover major features introduced in versions >= 1.7, but contain much useful information and examples: * [**Introduction to *collapse* **](): Introduces key features in a structured way * [***collapse* and *dplyr* **](): Demonstrates the integration of collapse with *dplyr* / *tidyverse* workflows and associated performance improvements * [***collapse* and *plm***](): Demonstrates the integration of collapse with *plm* and shows examples of efficient programming with panel data * [***collapse* and *data.table***](): Shows how collapse and *data.table* may be used together in a harmonious way ## Blog I maintain a [blog]() linked to [Rbloggers.com]() where I introduced *collapse* with some compact posts covering central functionality. Among these, the post about [programming with *collapse*]() is useful for developers. ## Presentations and Slides - I have presented *collapse* (v1.8) in some level of detail at useR 2022. A 2h video recording that provides a quite comprehensive introduction is available [here](). The corresponding slides are available [here](). - I have recently presented *collapse* (v2.1) and the *fastverse* at a workshop on "[Speeding Up Empirical Research: Tools and Techniques for Fast Computing](https://www.bportugal.pt/en/evento/workshop-speeding-empirical-research-tools-and-techniques-fast-computing-bplim)" organized by the Bank of Portugal in December 2025. My 45-minute talk focused on two advanced applications in international trade and spatial network analysis/package development. You can find the materials (slides and recording) [here](https://github.com/BPLIM/Workshops/tree/master/BPLIM2025). collapse/vignettes/collapse_intro.Rmd0000644000176200001440000077445715121640575017603 0ustar liggesusers--- title: "Introduction to collapse" subtitle: "Advanced and Fast Data Transformation in R" author: "Sebastian Krantz" date: "2021-06-27" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{Introduction to collapse} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- *collapse* is a C/C++ based package for data transformation and statistical computing in R. It's aims are: 1. To facilitate complex data transformation, exploration and computing tasks in R. 2. To help make R code fast, flexible, parsimonious and programmer friendly. This vignette demonstrates these two points and introduces all main features of the package in a structured way. The chapters are pretty self-contained, however the first chapters introduce the data and faster data manipulation functions which are used throughout the rest of this vignette. *** **Notes:** - Apart from this vignette, *collapse* comes with a built-in structured documentation available under `help("collapse-documentation")` after installing the package, and `help("collapse-package")` provides a compact set of examples for quick-start. A cheat sheet is available at [Rstudio](). - The two other vignettes focus on the integration of *collapse* with *dplyr* workflows (recommended for *dplyr* / *tidyverse* users), and on the integration of *collapse* with the *plm* package (+ some advanced programming with panel data). - Documentation and vignettes can also be viewed [online](). *** ## Why *collapse*? *collapse* is a high-performance package that extends and enhances the data-manipulation capabilities of R and existing popular packages (such as *dplyr*, *data.table*, and matrix packages). It's main focus is on grouped and weighted statistical programming, complex aggregations and transformations, time series and panel data operations, and programming with lists of data objects. The lead author is an applied economist and created the package mainly to facilitate advanced computations on varied and complex data, in particular surveys, (multivariate) time series, multilevel / panel data, and lists / model objects. A secondary aspect to applied work is that data is often imported into R from richer data structures (such as STATA, SPSS or SAS files imported with *haven*). This called for an intelligent suite of data manipulation functions that can both utilize aspects of the richer data structure (such as variable labels), and preserve the data structure / attributes in computations. Sometimes specialized classes like *xts*, *pdata.frame* and *grouped_df* can also become very useful to manipulate certain types of data. Thus *collapse* was built to explicitly supports these classes, while preserving most other classes / data structures in R. Another objective was to radically improve the speed of R code by extensively relying on efficient algorithms in C/C++ and the faster components of base R. *collapse* ranks among the fastest R packages, and performs many grouped and/or weighted computations noticeably faster than *dplyr* or *data.table*. A final development objective was to channel this performance through a stable and well conceived user API providing extensive and optimized programming capabilities (in standard evaluation) while also facilitating quick use and easy integration with existing data manipulation frameworks (in particular *dplyr* / *tidyverse* and *data.table*, both relying on non-standard evaluation). ## 1. Data and Summary Tools We begin by introducing some powerful summary tools along with the 2 panel datasets *collapse* provides which are used throughout this vignette. If you are just interested in programming you can skip this section. Apart from the 2 datasets that come with *collapse* (`wlddev` and `GGDC10S`), this vignette uses a few well known datasets from base R: `mtcars`, `iris`, `airquality`, and the time series `Airpassengers` and `EuStockMarkets`. ### 1.1 `wlddev` - World Bank Development Data This dataset contains 5 key World Bank Development Indicators covering 216 countries for up to 61 years (1960-2020). It is a balanced balanced panel with $216 \times 61 = 13176$ observations. --> ```r library(collapse) head(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # 3 Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA 112839996 # 4 Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA 237720001 # 5 Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA 295920013 # 6 Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA 341839996 # POP # 1 8996973 # 2 9169410 # 3 9351441 # 4 9543205 # 5 9744781 # 6 9956320 # The variables have "label" attributes. Use vlabels() to get and set labels namlab(wlddev, class = TRUE) # Variable Class # 1 country character # 2 iso3c factor # 3 date Date # 4 year integer # 5 decade integer # 6 region factor # 7 income factor # 8 OECD logical # 9 PCGDP numeric # 10 LIFEEX numeric # 11 GINI numeric # 12 ODA numeric # 13 POP numeric # Label # 1 Country Name # 2 Country Code # 3 Date Recorded (Fictitious) # 4 Year # 5 Decade # 6 Region # 7 Income Level # 8 Is OECD Member Country? # 9 GDP per capita (constant 2010 US$) # 10 Life expectancy at birth, total (years) # 11 Gini index (World Bank estimate) # 12 Net official development assistance and official aid received (constant 2018 US$) # 13 Population, total ``` Of the categorical identifiers, the date variable was artificially generated to have an example dataset that contains all common data types frequently encountered in R. A detailed statistical description of this data is computed by `descr`: ```r # A fast and detailed statistical description descr(wlddev) # Dataset: wlddev, 13 Variables, N = 13176 # ---------------------------------------------------------------------------------------------------- # country (character): Country Name # Statistics # N Ndist # 13176 216 # Table # Freq Perc # Afghanistan 61 0.46 # Albania 61 0.46 # Algeria 61 0.46 # American Samoa 61 0.46 # Andorra 61 0.46 # Angola 61 0.46 # Antigua and Barbuda 61 0.46 # Argentina 61 0.46 # Armenia 61 0.46 # Aruba 61 0.46 # Australia 61 0.46 # Austria 61 0.46 # Azerbaijan 61 0.46 # Bahamas, The 61 0.46 # ... 202 Others 12322 93.52 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 61 61 61 61 61 61 # ---------------------------------------------------------------------------------------------------- # iso3c (factor): Country Code # Statistics # N Ndist # 13176 216 # Table # Freq Perc # ABW 61 0.46 # AFG 61 0.46 # AGO 61 0.46 # ALB 61 0.46 # AND 61 0.46 # ARE 61 0.46 # ARG 61 0.46 # ARM 61 0.46 # ASM 61 0.46 # ATG 61 0.46 # AUS 61 0.46 # AUT 61 0.46 # AZE 61 0.46 # BDI 61 0.46 # ... 202 Others 12322 93.52 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 61 61 61 61 61 61 # ---------------------------------------------------------------------------------------------------- # date (Date): Date Recorded (Fictitious) # Statistics # N Ndist Min Max # 13176 61 1961-01-01 2021-01-01 # ---------------------------------------------------------------------------------------------------- # year (integer): Year # Statistics # N Ndist Mean SD Min Max Skew Kurt # 13176 61 1990 17.61 1960 2020 -0 1.8 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1960 1963 1966 1975 1990 2005 2014 2017 2020 # ---------------------------------------------------------------------------------------------------- # decade (integer): Decade # Statistics # N Ndist Mean SD Min Max Skew Kurt # 13176 7 1985.57 17.51 1960 2020 0.03 1.79 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1960 1960 1960 1970 1990 2000 2010 2010 2020 # ---------------------------------------------------------------------------------------------------- # region (factor): Region # Statistics # N Ndist # 13176 7 # Table # Freq Perc # Europe & Central Asia 3538 26.85 # Sub-Saharan Africa 2928 22.22 # Latin America & Caribbean 2562 19.44 # East Asia & Pacific 2196 16.67 # Middle East & North Africa 1281 9.72 # South Asia 488 3.70 # North America 183 1.39 # ---------------------------------------------------------------------------------------------------- # income (factor): Income Level # Statistics # N Ndist # 13176 4 # Table # Freq Perc # High income 4819 36.57 # Upper middle income 3660 27.78 # Lower middle income 2867 21.76 # Low income 1830 13.89 # ---------------------------------------------------------------------------------------------------- # OECD (logical): Is OECD Member Country? # Statistics # N Ndist # 13176 2 # Table # Freq Perc # FALSE 10980 83.33 # TRUE 2196 16.67 # ---------------------------------------------------------------------------------------------------- # PCGDP (numeric): GDP per capita (constant 2010 US$) # Statistics (28.13% NAs) # N Ndist Mean SD Min Max Skew Kurt # 9470 9470 12048.78 19077.64 132.08 196061.42 3.13 17.12 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 227.71 399.62 555.55 1303.19 3767.16 14787.03 35646.02 48507.84 92340.28 # ---------------------------------------------------------------------------------------------------- # LIFEEX (numeric): Life expectancy at birth, total (years) # Statistics (11.43% NAs) # N Ndist Mean SD Min Max Skew Kurt # 11670 10548 64.3 11.48 18.91 85.42 -0.67 2.67 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 35.83 42.77 46.83 56.36 67.44 72.95 77.08 79.34 82.36 # ---------------------------------------------------------------------------------------------------- # GINI (numeric): Gini index (World Bank estimate) # Statistics (86.76% NAs) # N Ndist Mean SD Min Max Skew Kurt # 1744 368 38.53 9.2 20.7 65.8 0.6 2.53 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 24.6 26.3 27.6 31.5 36.4 45 52.6 55.98 60.5 # ---------------------------------------------------------------------------------------------------- # ODA (numeric): Net official development assistance and official aid received (constant 2018 US$) # Statistics (34.67% NAs) # N Ndist Mean SD Min Max Skew Kurt # 8608 7832 454'720131 868'712654 -997'679993 2.56715605e+10 6.98 114.89 # Quantiles # 1% 5% 10% 25% 50% 75% 90% # -12'593999.7 1'363500.01 8'347000.31 44'887499.8 165'970001 495'042503 1.18400697e+09 # 95% 99% # 1.93281696e+09 3.73380782e+09 # ---------------------------------------------------------------------------------------------------- # POP (numeric): Population, total # Statistics (1.95% NAs) # N Ndist Mean SD Min Max Skew Kurt # 12919 12877 24'245971.6 102'120674 2833 1.39771500e+09 9.75 108.91 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 8698.84 31083.3 62268.4 443791 4'072517 12'816178 46'637331.4 81'177252.5 308'862641 # ---------------------------------------------------------------------------------------------------- ``` The output of `descr` can be converted into a tidy data frame using: ```r head(as.data.frame(descr(wlddev))) # Variable Class Label N Ndist Min Max Mean SD # 1 country character Country Name 13176 216 NA NA NA NA # 2 iso3c factor Country Code 13176 216 NA NA NA NA # 3 date Date Date Recorded (Fictitious) 13176 61 -3287 18628 NA NA # 4 year integer Year 13176 61 1960 2020 1990.000 17.60749 # 5 decade integer Decade 13176 7 1960 2020 1985.574 17.51175 # 6 region factor Region 13176 7 NA NA NA NA # Skew Kurt 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1 NA NA NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA NA NA NA NA # 4 -5.812381e-16 1.799355 1960 1963 1966 1975 1990 2005 2014 2017 2020 # 5 3.256512e-02 1.791726 1960 1960 1960 1970 1990 2000 2010 2010 2020 # 6 NA NA NA NA NA NA NA NA NA NA NA ``` Note that `descr` does not require data to be labeled. Since `wlddev` is a panel data set tracking countries over time, we might be interested in checking which variables are time-varying, with the function `varying`: ```r varying(wlddev, wlddev$iso3c) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE # POP # TRUE ``` `varying` tells us that all 5 variables `PCGDP`, `LIFEEX`, `GINI`, `ODA` and `POP` vary over time. However the `OECD` variable does not, so this data does not track when countries entered the OECD. We can also have a more detailed look letting `varying` check the variation in each country: ```r head(varying(wlddev, wlddev$iso3c, any_group = FALSE)) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # ABW FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE NA TRUE TRUE # AFG FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE NA TRUE TRUE # AGO FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE # ALB FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE # AND FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE NA NA NA TRUE # ARE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE ``` `NA` indicates that there are no data for this country. In general data is varying if it has two or more distinct non-missing values. We could also take a closer look at observation counts and distinct values using: ```r head(fnobs(wlddev, wlddev$iso3c)) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # ABW 61 61 61 61 61 61 61 61 32 60 0 20 60 # AFG 61 61 61 61 61 61 61 61 18 60 0 60 60 # AGO 61 61 61 61 61 61 61 61 40 60 3 58 60 # ALB 61 61 61 61 61 61 61 61 40 60 9 32 60 # AND 61 61 61 61 61 61 61 61 50 0 0 0 60 # ARE 61 61 61 61 61 61 61 61 45 60 2 45 60 head(fndistinct(wlddev, wlddev$iso3c)) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # ABW 1 1 61 61 7 1 1 1 32 60 0 20 60 # AFG 1 1 61 61 7 1 1 1 18 60 0 60 60 # AGO 1 1 61 61 7 1 1 1 40 59 3 58 60 # ALB 1 1 61 61 7 1 1 1 40 59 9 32 60 # AND 1 1 61 61 7 1 1 1 50 0 0 0 60 # ARE 1 1 61 61 7 1 1 1 45 60 2 45 60 ``` Note that `varying` is more efficient than `fndistinct`, although both functions are very fast. Even more powerful summary methods for multilevel / panel data are provided by `qsu` (shorthand for *quick-summary*). It is modeled after *STATA*'s *summarize* and *xtsummarize* commands. Calling `qsu` on the data gives a concise summary. We can subset columns internally using the `cols` argument: ```r qsu(wlddev, cols = 9:12, higher = TRUE) # higher adds skewness and kurtosis # N Mean SD Min Max Skew Kurt # PCGDP 9470 12048.778 19077.6416 132.0776 196061.417 3.1276 17.1154 # LIFEEX 11670 64.2963 11.4764 18.907 85.4171 -0.6748 2.6718 # GINI 1744 38.5341 9.2006 20.7 65.8 0.596 2.5329 # ODA 8608 454'720131 868'712654 -997'679993 2.56715605e+10 6.9832 114.889 ``` We could easily compute these statistics by region: ```r qsu(wlddev, by = ~region, cols = 9:12, vlabels = TRUE, higher = TRUE) # , , PCGDP: GDP per capita (constant 2010 US$) # # N Mean SD Min Max Skew Kurt # East Asia & Pacific 1467 10513.2441 14383.5507 132.0776 71992.1517 1.6392 4.7419 # Europe & Central Asia 2243 25992.9618 26435.1316 366.9354 196061.417 2.2022 10.1977 # Latin America & Caribbean 1976 7628.4477 8818.5055 1005.4085 88391.3331 4.1702 29.3739 # Middle East & North Africa 842 13878.4213 18419.7912 578.5996 116232.753 2.4178 9.7669 # North America 180 48699.76 24196.2855 16405.9053 113236.091 0.938 2.9688 # South Asia 382 1235.9256 1611.2232 265.9625 8476.564 2.7874 10.3402 # Sub-Saharan Africa 2380 1840.0259 2596.0104 164.3366 20532.9523 3.1161 14.4175 # # , , LIFEEX: Life expectancy at birth, total (years) # # N Mean SD Min Max Skew Kurt # East Asia & Pacific 1807 65.9445 10.1633 18.907 85.078 -0.856 4.3125 # Europe & Central Asia 3046 72.1625 5.7602 45.369 85.4171 -0.5594 4.0434 # Latin America & Caribbean 2107 68.3486 7.3768 41.762 82.1902 -1.0357 3.9379 # Middle East & North Africa 1226 66.2508 9.8306 29.919 82.8049 -0.8782 3.3054 # North America 144 76.2867 3.5734 68.8978 82.0488 -0.1963 1.976 # South Asia 480 57.5585 11.3004 32.446 78.921 -0.2623 2.1147 # Sub-Saharan Africa 2860 51.581 8.6876 26.172 74.5146 0.1452 2.7245 # # , , GINI: Gini index (World Bank estimate) # # N Mean SD Min Max Skew Kurt # East Asia & Pacific 154 37.7571 5.0318 27.8 49.1 0.3631 2.3047 # Europe & Central Asia 798 31.9114 4.5809 20.7 48.4 0.2989 2.5254 # Latin America & Caribbean 413 49.9557 5.4821 34.4 63.3 -0.0386 2.3631 # Middle East & North Africa 91 36.0143 5.2073 26 47.4 0.0241 1.9209 # North America 49 37.4816 3.6972 31 41.5 -0.4282 1.4577 # South Asia 46 33.8804 3.9898 25.9 43.8 0.4205 2.7748 # Sub-Saharan Africa 193 44.6606 8.2003 29.8 65.8 0.6598 2.8451 # # , , ODA: Net official development assistance and official aid received (constant 2018 US$) # # N Mean SD Min Max # East Asia & Pacific 1537 352'017964 622'847624 -997'679993 4.04487988e+09 # Europe & Central Asia 787 402'455286 568'237036 -322'070007 4.34612988e+09 # Latin America & Caribbean 1972 172'880081 260'781049 -444'040009 2.99568994e+09 # Middle East & North Africa 1105 732'380009 1.52108993e+09 -141'789993 2.56715605e+10 # North America 39 468717.916 10'653560.8 -15'869999.9 61'509998.3 # South Asia 466 1.27049955e+09 1.61492889e+09 -247'369995 8.75425977e+09 # Sub-Saharan Africa 2702 486'371750 656'336230 -18'409999.8 1.18790801e+10 # Skew Kurt # East Asia & Pacific 2.722 11.5221 # Europe & Central Asia 3.1305 15.2525 # Latin America & Caribbean 3.3259 22.4569 # Middle East & North Africa 6.6304 79.2238 # North America 4.8602 29.3092 # South Asia 1.7923 6.501 # Sub-Saharan Africa 4.5456 48.8447 ``` Computing summary statistics by country is of course also possible but would be too much information. Fortunately `qsu` lets us do something much more powerful: ```r qsu(wlddev, pid = ~ iso3c, cols = c(1,4,9:12), vlabels = TRUE, higher = TRUE) # , , country: Country Name # # N/T Mean SD Min Max Skew Kurt # Overall 13176 - - - - - - # Between 216 - - - - - - # Within 61 - - - - - - # # , , year: Year # # N/T Mean SD Min Max Skew Kurt # Overall 13176 1990 17.6075 1960 2020 -0 1.7994 # Between 216 1990 0 1990 1990 - - # Within 61 1990 17.6075 1960 2020 -0 1.7994 # # , , PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew Kurt # Overall 9470 12048.778 19077.6416 132.0776 196061.417 3.1276 17.1154 # Between 206 12962.6054 20189.9007 253.1886 141200.38 3.1263 16.2299 # Within 45.9709 12048.778 6723.6808 -33504.8721 76767.5254 0.6576 17.2003 # # , , LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # Overall 11670 64.2963 11.4764 18.907 85.4171 -0.6748 2.6718 # Between 207 64.9537 9.8936 40.9663 85.4171 -0.5012 2.1693 # Within 56.3768 64.2963 6.0842 32.9068 84.4198 -0.2643 3.7027 # # , , GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # Overall 1744 38.5341 9.2006 20.7 65.8 0.596 2.5329 # Between 167 39.4233 8.1356 24.8667 61.7143 0.5832 2.8256 # Within 10.4431 38.5341 2.9277 25.3917 55.3591 0.3263 5.3389 # # , , ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max Skew Kurt # Overall 8608 454'720131 868'712654 -997'679993 2.56715605e+10 6.9832 114.889 # Between 178 439'168412 569'049959 468717.916 3.62337432e+09 2.355 9.9487 # Within 48.3596 454'720131 650'709624 -2.44379420e+09 2.45610972e+10 9.6047 263.3716 ``` The above output reports 3 sets of summary statistics for each variable: Statistics computed on the *Overall* (raw) data, and on the *Between*-country (i.e. country averaged) and *Within*-country (i.e. country-demeaned) data^[in the *Within* data, the overall mean was added back after subtracting out country means, to preserve the level of the data, see also section 6.5.]. This is a powerful way to summarize panel data because aggregating the data by country gives us a cross-section of countries with no variation over time, whereas subtracting country specific means from the data eliminates all cross-sectional variation. So what can these statistics tell us about our data? The `N/T` columns shows that for `PCGDP` we have 8995 total observations, that we observe GDP data for 203 countries and that we have on average 44.3 observations (time-periods) per country. In contrast the GINI Index is only available for 161 countries with 8.4 observations on average. The *Overall* and *Within* mean of the data are identical by definition, and the *Between* mean would also be the same in a balanced panel with no missing observations. In practice we have unequal amounts of observations for different countries, thus countries have different weights in the *Overall* mean and the difference between *Overall* and *Between*-country mean reflects this discrepancy. The most interesting statistic in this summary arguably is the standard deviation, and in particular the comparison of the *Between*-SD reflecting the variation between countries and the *Within*-SD reflecting average variation over time. This comparison shows that PCGDP, LIFEEX and GINI vary more between countries, but ODA received varies more within countries over time. The 0 *Between*-SD for the year variable and the fact that the *Overall* and *Within*-SD are equal shows that year is individual invariant. Thus `qsu` also provides the same information as `varying`, but with additional details on the relative magnitudes of cross-sectional and time series variation. It is also a common pattern that the *kurtosis* increases in within-transformed data, while the *skewness* decreases in most cases. We could also do all of that by regions to have a look at the between and within country variations inside and across different World regions: ```r qsu(wlddev, by = ~ region, pid = ~ iso3c, cols = 9:12, vlabels = TRUE, higher = TRUE) # , , Overall, PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 1467 10513.2441 14383.5507 132.0776 71992.1517 1.6392 4.7419 # Europe & Central Asia 2243 25992.9618 26435.1316 366.9354 196061.417 2.2022 10.1977 # Latin America & Caribbean 1976 7628.4477 8818.5055 1005.4085 88391.3331 4.1702 29.3739 # Middle East & North Africa 842 13878.4213 18419.7912 578.5996 116232.753 2.4178 9.7669 # North America 180 48699.76 24196.2855 16405.9053 113236.091 0.938 2.9688 # South Asia 382 1235.9256 1611.2232 265.9625 8476.564 2.7874 10.3402 # Sub-Saharan Africa 2380 1840.0259 2596.0104 164.3366 20532.9523 3.1161 14.4175 # # , , Between, PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 34 10513.2441 12771.742 444.2899 39722.0077 1.1488 2.7089 # Europe & Central Asia 56 25992.9618 24051.035 809.4753 141200.38 2.0026 9.0733 # Latin America & Caribbean 38 7628.4477 8470.9708 1357.3326 77403.7443 4.4548 32.4956 # Middle East & North Africa 20 13878.4213 17251.6962 1069.6596 64878.4021 1.9508 6.0796 # North America 3 48699.76 18604.4369 35260.4708 74934.5874 0.7065 1.5 # South Asia 8 1235.9256 1488.3669 413.68 6621.5002 3.0546 11.3083 # Sub-Saharan Africa 47 1840.0259 2234.3254 253.1886 9922.0052 2.1442 6.8259 # # , , Within, PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew # East Asia & Pacific 43.1471 12048.778 6615.8248 -11964.6472 49541.463 0.824 # Europe & Central Asia 40.0536 12048.778 10971.0483 -33504.8721 76767.5254 0.4307 # Latin America & Caribbean 52 12048.778 2451.2636 -354.1639 23036.3668 0.1259 # Middle East & North Africa 42.1 12048.778 6455.0512 -18674.4049 63665.0446 1.8525 # North America 60 12048.778 15470.4609 -29523.1017 50350.2816 -0.2451 # South Asia 47.75 12048.778 617.0934 10026.9155 14455.865 0.9846 # Sub-Saharan Africa 50.6383 12048.778 1321.764 4846.3834 24883.1246 1.3879 # Kurt # East Asia & Pacific 8.9418 # Europe & Central Asia 7.4139 # Latin America & Caribbean 7.1939 # Middle East & North Africa 23.0457 # North America 3.2075 # South Asia 5.6366 # Sub-Saharan Africa 28.0186 # # , , Overall, LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 1807 65.9445 10.1633 18.907 85.078 -0.856 4.3125 # Europe & Central Asia 3046 72.1625 5.7602 45.369 85.4171 -0.5594 4.0434 # Latin America & Caribbean 2107 68.3486 7.3768 41.762 82.1902 -1.0357 3.9379 # Middle East & North Africa 1226 66.2508 9.8306 29.919 82.8049 -0.8782 3.3054 # North America 144 76.2867 3.5734 68.8978 82.0488 -0.1963 1.976 # South Asia 480 57.5585 11.3004 32.446 78.921 -0.2623 2.1147 # Sub-Saharan Africa 2860 51.581 8.6876 26.172 74.5146 0.1452 2.7245 # # , , Between, LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 32 65.9445 7.6833 49.7995 77.9008 -0.3832 2.4322 # Europe & Central Asia 55 72.1625 4.4378 60.1129 85.4171 -0.6584 2.8874 # Latin America & Caribbean 40 68.3486 4.9199 53.4918 82.1902 -0.9947 4.1617 # Middle East & North Africa 21 66.2508 5.922 52.5371 76.7395 -0.3181 3.0331 # North America 3 76.2867 1.3589 74.8065 78.4175 0.1467 1.6356 # South Asia 8 57.5585 5.6158 49.1972 69.3429 0.6643 3.1288 # Sub-Saharan Africa 48 51.581 5.657 40.9663 71.5749 1.1333 4.974 # # , , Within, LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 56.4688 64.2963 6.6528 32.9068 83.9918 -0.3949 3.9528 # Europe & Central Asia 55.3818 64.2963 3.6723 46.3045 78.6265 -0.0307 3.7576 # Latin America & Caribbean 52.675 64.2963 5.4965 46.7831 79.5026 -0.3827 2.9936 # Middle East & North Africa 58.381 64.2963 7.8467 41.6187 78.8872 -0.6216 2.808 # North America 48 64.2963 3.3049 54.7766 69.4306 -0.4327 2.3027 # South Asia 60 64.2963 9.8062 41.4342 83.0122 -0.0946 2.1035 # Sub-Saharan Africa 59.5833 64.2963 6.5933 41.5678 84.4198 0.0811 2.7821 # # , , Overall, GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 154 37.7571 5.0318 27.8 49.1 0.3631 2.3047 # Europe & Central Asia 798 31.9114 4.5809 20.7 48.4 0.2989 2.5254 # Latin America & Caribbean 413 49.9557 5.4821 34.4 63.3 -0.0386 2.3631 # Middle East & North Africa 91 36.0143 5.2073 26 47.4 0.0241 1.9209 # North America 49 37.4816 3.6972 31 41.5 -0.4282 1.4577 # South Asia 46 33.8804 3.9898 25.9 43.8 0.4205 2.7748 # Sub-Saharan Africa 193 44.6606 8.2003 29.8 65.8 0.6598 2.8451 # # , , Between, GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 23 37.7571 4.3005 30.8 45.8857 0.4912 2.213 # Europe & Central Asia 49 31.9114 4.0611 24.8667 40.935 0.3323 2.291 # Latin America & Caribbean 25 49.9557 4.0492 41.1 57.9 0.03 2.2573 # Middle East & North Africa 15 36.0143 4.7002 29.05 42.7 -0.2035 1.6815 # North America 2 37.4816 3.3563 33.1222 40.0129 -0.5503 1.3029 # South Asia 7 33.8804 3.0052 30.3556 38.8 0.2786 1.4817 # Sub-Saharan Africa 46 44.6606 6.8844 34.52 61.7143 0.9464 3.2302 # # , , Within, GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 6.6957 38.5341 2.6125 31.0187 45.8901 -0.0585 3.0933 # Europe & Central Asia 16.2857 38.5341 2.1195 31.2841 50.1387 0.6622 6.1763 # Latin America & Caribbean 16.52 38.5341 3.6955 25.3917 48.8341 -0.0506 2.7603 # Middle East & North Africa 6.0667 38.5341 2.2415 31.7675 45.777 0.0408 4.7415 # North America 24.5 38.5341 1.5507 33.0212 42.7119 -1.3213 6.8321 # South Asia 6.5714 38.5341 2.6244 32.8341 45.0675 -0.1055 2.6885 # Sub-Saharan Africa 4.1957 38.5341 4.4553 27.9452 55.3591 0.6338 4.4174 # # , , Overall, ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max # East Asia & Pacific 1537 352'017964 622'847624 -997'679993 4.04487988e+09 # Europe & Central Asia 787 402'455286 568'237036 -322'070007 4.34612988e+09 # Latin America & Caribbean 1972 172'880081 260'781049 -444'040009 2.99568994e+09 # Middle East & North Africa 1105 732'380009 1.52108993e+09 -141'789993 2.56715605e+10 # North America 39 468717.916 10'653560.8 -15'869999.9 61'509998.3 # South Asia 466 1.27049955e+09 1.61492889e+09 -247'369995 8.75425977e+09 # Sub-Saharan Africa 2702 486'371750 656'336230 -18'409999.8 1.18790801e+10 # Skew Kurt # East Asia & Pacific 2.722 11.5221 # Europe & Central Asia 3.1305 15.2525 # Latin America & Caribbean 3.3259 22.4569 # Middle East & North Africa 6.6304 79.2238 # North America 4.8602 29.3092 # South Asia 1.7923 6.501 # Sub-Saharan Africa 4.5456 48.8447 # # , , Between, ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max # East Asia & Pacific 31 352'017964 457'183279 1'654615.38 1.63585532e+09 # Europe & Central Asia 32 402'455286 438'074771 12'516000.1 2.05456932e+09 # Latin America & Caribbean 37 172'880081 167'160838 2'225483.88 538'386665 # Middle East & North Africa 21 732'380009 775'418887 3'112820.5 2.86174883e+09 # North America 1 468717.916 0 468717.916 468717.916 # South Asia 8 1.27049955e+09 1.18347893e+09 27'152499.9 3.62337432e+09 # Sub-Saharan Africa 48 486'371750 397'995105 28'801206.9 1.55049113e+09 # Skew Kurt # East Asia & Pacific 1.7771 5.1361 # Europe & Central Asia 2.0449 7.2489 # Latin America & Caribbean 0.8981 2.4954 # Middle East & North Africa 1.1363 3.6377 # North America - - # South Asia 0.7229 2.4072 # Sub-Saharan Africa 0.9871 3.1513 # # , , Within, ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max # East Asia & Pacific 49.5806 454'720131 422'992450 -2.04042108e+09 3.59673152e+09 # Europe & Central Asia 24.5938 454'720131 361'916875 -1.08796786e+09 3.30549004e+09 # Latin America & Caribbean 53.2973 454'720131 200'159960 -527'706542 3.28976141e+09 # Middle East & North Africa 52.619 454'720131 1.30860235e+09 -2.34610870e+09 2.45610972e+10 # North America 39 454'720131 10'653560.8 438'381413 515'761411 # South Asia 58.25 454'720131 1.09880524e+09 -2.44379420e+09 5.58560558e+09 # Sub-Saharan Africa 56.2917 454'720131 521'897637 -952'168698 1.12814455e+10 # Skew Kurt # East Asia & Pacific 0.2908 14.4428 # Europe & Central Asia 2.3283 18.6937 # Latin America & Caribbean 3.7015 41.7506 # Middle East & North Africa 7.8663 117.987 # North America 4.8602 29.3092 # South Asia 1.8418 9.4588 # Sub-Saharan Africa 5.2349 86.1042 ``` Notice that the output here is a 4D array of summary statistics, which we could also subset (`[`) or permute (`aperm`) to view these statistics in any convenient way. If we don't like the array, we can also output as a nested list of statistics matrices: ```r l <- qsu(wlddev, by = ~ region, pid = ~ iso3c, cols = 9:12, vlabels = TRUE, higher = TRUE, array = FALSE) str(l, give.attr = FALSE) # List of 4 # $ PCGDP: GDP per capita (constant 2010 US$) :List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 1467 2243 1976 842 180 ... # ..$ Between: 'qsu' num [1:7, 1:7] 34 56 38 20 3 ... # ..$ Within : 'qsu' num [1:7, 1:7] 43.1 40.1 52 42.1 60 ... # $ LIFEEX: Life expectancy at birth, total (years) :List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 1807 3046 2107 1226 144 ... # ..$ Between: 'qsu' num [1:7, 1:7] 32 55 40 21 3 ... # ..$ Within : 'qsu' num [1:7, 1:7] 56.5 55.4 52.7 58.4 48 ... # $ GINI: Gini index (World Bank estimate) :List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 154 798 413 91 49 ... # ..$ Between: 'qsu' num [1:7, 1:7] 23 49 25 15 2 ... # ..$ Within : 'qsu' num [1:7, 1:7] 6.7 16.29 16.52 6.07 24.5 ... # $ ODA: Net official development assistance and official aid received (constant 2018 US$):List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 1537 787 1972 1105 39 ... # ..$ Between: 'qsu' num [1:7, 1:7] 31 32 37 21 1 ... # ..$ Within : 'qsu' num [1:7, 1:7] 49.6 24.6 53.3 52.6 39 ... ``` Such a list of statistics matrices could, for example, be converted into a tidy data frame using `unlist2d` (more about this in the section on list-processing): ```r head(unlist2d(l, idcols = c("Variable", "Trans"), row.names = "Region")) # Variable Trans Region N Mean # 1 PCGDP: GDP per capita (constant 2010 US$) Overall East Asia & Pacific 1467 10513.244 # 2 PCGDP: GDP per capita (constant 2010 US$) Overall Europe & Central Asia 2243 25992.962 # 3 PCGDP: GDP per capita (constant 2010 US$) Overall Latin America & Caribbean 1976 7628.448 # 4 PCGDP: GDP per capita (constant 2010 US$) Overall Middle East & North Africa 842 13878.421 # 5 PCGDP: GDP per capita (constant 2010 US$) Overall North America 180 48699.760 # 6 PCGDP: GDP per capita (constant 2010 US$) Overall South Asia 382 1235.926 # SD Min Max Skew Kurt # 1 14383.551 132.0776 71992.152 1.6392248 4.741856 # 2 26435.132 366.9354 196061.417 2.2022472 10.197685 # 3 8818.505 1005.4085 88391.333 4.1701769 29.373869 # 4 18419.791 578.5996 116232.753 2.4177586 9.766883 # 5 24196.285 16405.9053 113236.091 0.9380056 2.968769 # 6 1611.223 265.9625 8476.564 2.7873830 10.340176 ``` This is not yet end of `qsu`'s functionality, as we can also do all of the above on panel-surveys utilizing weights (`w` argument). Finally, we can look at (weighted) pairwise correlations in this data: ```r pwcor(wlddev[9:12], N = TRUE, P = TRUE) # PCGDP LIFEEX GINI ODA # PCGDP 1 (9470) .57* (9022) -.44* (1735) -.16* (7128) # LIFEEX .57* (9022) 1 (11670) -.35* (1742) -.02 (8142) # GINI -.44* (1735) -.35* (1742) 1 (1744) -.20* (1109) # ODA -.16* (7128) -.02 (8142) -.20* (1109) 1 (8608) ``` which can of course also be computed on averaged and within-transformed data: ```r print(pwcor(fmean(wlddev[9:12], wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri") # PCGDP LIFEEX GINI ODA # PCGDP 1 (206) # LIFEEX .60* (199) 1 (207) # GINI -.42* (165) -.40* (165) 1 (167) # ODA -.25* (172) -.21* (172) -.19* (145) 1 (178) # N is same as overall N shown above... print(pwcor(fwithin(wlddev[9:12], wlddev$iso3c), P = TRUE), show = "lower.tri") # PCGDP LIFEEX GINI ODA # PCGDP 1 # LIFEEX .31* 1 # GINI -.01 -.16* 1 # ODA -.01 .17* -.08* 1 ``` A useful function called by `pwcor` is `pwnobs`, which is very handy to explore the joint observation structure when selecting variables to include in a statistical model: ```r pwnobs(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # country 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # iso3c 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # date 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # year 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # decade 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # region 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # income 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # OECD 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # PCGDP 9470 9470 9470 9470 9470 9470 9470 9470 9470 9022 1735 7128 9470 # LIFEEX 11670 11670 11670 11670 11670 11670 11670 11670 9022 11670 1742 8142 11659 # GINI 1744 1744 1744 1744 1744 1744 1744 1744 1735 1742 1744 1109 1744 # ODA 8608 8608 8608 8608 8608 8608 8608 8608 7128 8142 1109 8608 8597 # POP 12919 12919 12919 12919 12919 12919 12919 12919 9470 11659 1744 8597 12919 ``` Note that both `pwcor/pwcov` and `pwnobs` are faster on matrices. ### 1.2 `GGDC10S` - GGDC 10-Sector Database The Groningen Growth and Development Centre 10-Sector Database provides long-run data on sectoral productivity performance in Africa, Asia, and Latin America. Variables covered in the data set are annual series of value added (VA, in local currency), and persons employed (EMP) for 10 broad sectors. ```r head(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA # 3 BWA SSA Sub-saharan Africa VA 1962 NA NA NA NA # 4 BWA SSA Sub-saharan Africa VA 1963 NA NA NA NA # 5 BWA SSA Sub-saharan Africa VA 1964 16.30154 3.494075 0.7365696 0.1043936 # 6 BWA SSA Sub-saharan Africa VA 1965 15.72700 2.495768 1.0181992 0.1350976 # CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA # 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229 # 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710 namlab(GGDC10S, class = TRUE) # Variable Class Label # 1 Country character Country # 2 Regioncode character Region code # 3 Region character Region # 4 Variable character Variable # 5 Year numeric Year # 6 AGR numeric Agriculture # 7 MIN numeric Mining # 8 MAN numeric Manufacturing # 9 PU numeric Utilities # 10 CON numeric Construction # 11 WRT numeric Trade, restaurants and hotels # 12 TRA numeric Transport, storage and communication # 13 FIRE numeric Finance, insurance, real estate and business services # 14 GOV numeric Government services # 15 OTH numeric Community, social and personal services # 16 SUM numeric Summation of sector GDP fnobs(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 5027 5027 5027 5027 5027 4364 4355 4355 4354 # CON WRT TRA FIRE GOV OTH SUM # 4355 4355 4355 4355 3482 4248 4364 fndistinct(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 43 6 6 2 67 4353 4224 4353 4237 # CON WRT TRA FIRE GOV OTH SUM # 4339 4344 4334 4349 3470 4238 4364 # The countries included: cat(funique(GGDC10S$Country, sort = TRUE)) # ARG BOL BRA BWA CHL CHN COL CRI DEW DNK EGY ESP ETH FRA GBR GHA HKG IDN IND ITA JPN KEN KOR MEX MOR MUS MWI MYS NGA NGA(alt) NLD PER PHL SEN SGP SWE THA TWN TZA USA VEN ZAF ZMB ``` The first problem in summarizing this data is that value added (VA) is in local currency, the second that it contains 2 different Variables (VA and EMP) stacked in the same column. One way of solving the first problem could be converting the data to percentages through dividing by the overall VA and EMP contained in the last column. A different solution involving grouped-scaling is introduced in section 6.4. The second problem is again nicely handled by `qsu`, which can also compute panel-statistics by groups. ```r # Converting data to percentages of overall VA / EMP, dapply keeps the attributes, see section 6.1 pGGDC10S <- ftransformv(GGDC10S, 6:15, `*`, 100 / SUM) # Summarizing the sectoral data by variable, overall, between and within countries su <- qsu(pGGDC10S, by = ~ Variable, pid = ~ Variable + Country, cols = 6:16, higher = TRUE) # This gives a 4D array of summary statistics str(su) # 'qsu' num [1:2, 1:7, 1:3, 1:11] 2225 2139 35.1 17.3 26.7 ... # - attr(*, "dimnames")=List of 4 # ..$ : chr [1:2] "EMP" "VA" # ..$ : chr [1:7] "N/T" "Mean" "SD" "Min" ... # ..$ : chr [1:3] "Overall" "Between" "Within" # ..$ : chr [1:11] "AGR" "MIN" "MAN" "PU" ... # Permuting this array to a more readible format aperm(su, c(4L, 2L, 3L, 1L)) # , , Overall, EMP # # N/T Mean SD Min Max Skew Kurt # AGR 2225 35.0949 26.7235 0.156 100 0.4856 2.0951 # MIN 2216 1.0349 1.4247 0.0043 9.4097 3.1281 15.0429 # MAN 2216 14.9768 8.0392 0.5822 45.2974 0.4272 2.8455 # PU 2215 0.5782 0.3601 0.0154 2.4786 1.2588 5.5822 # CON 2216 5.6583 2.9252 0.1417 15.9887 -0.0631 2.2725 # WRT 2216 14.9155 6.5573 0.809 32.8046 -0.1814 2.3226 # TRA 2216 4.8193 2.652 0.1506 15.0454 0.9477 4.4695 # FIRE 2216 4.6501 4.3518 0.0799 21.7717 1.2345 4.0831 # GOV 1780 13.1263 8.0844 0 34.8897 0.6301 2.5338 # OTH 2109 8.3977 6.6409 0.421 34.8942 1.4028 4.3191 # SUM 2225 36846.8741 96318.6544 173.8829 764200 5.0229 30.9814 # # , , Between, EMP # # N/T Mean SD Min Max Skew Kurt # AGR 42 35.0949 24.1204 0.9997 88.3263 0.5202 2.2437 # MIN 42 1.0349 1.2304 0.0296 6.8532 2.7313 12.331 # MAN 42 14.9768 7.0375 1.718 32.3439 -0.0164 2.4321 # PU 42 0.5782 0.3041 0.0671 1.3226 0.5459 2.6905 # CON 42 5.6583 2.4748 0.5037 10.3691 -0.4442 2.3251 # WRT 42 14.9155 5.264 4.0003 26.7699 -0.5478 2.7294 # TRA 42 4.8193 2.4712 0.374 12.3887 0.9782 4.7857 # FIRE 42 4.6501 3.4468 0.1505 12.4402 0.6052 2.5883 # GOV 34 13.1263 7.2832 2.0086 29.1577 0.3858 2.1068 # OTH 40 8.3977 6.266 1.3508 26.4036 1.4349 4.3185 # SUM 42 36846.8741 89205.503 369.2353 485820.474 4.0761 19.3159 # # , , Within, EMP # # N/T Mean SD Min Max Skew Kurt # AGR 52.9762 26.3768 11.5044 -5.3234 107.4891 1.6002 11.9683 # MIN 52.7619 3.4006 0.7182 -1.4068 7.509 -0.1988 15.0343 # MAN 52.7619 17.476 3.8861 -1.1061 40.3964 -0.082 7.3994 # PU 52.7381 1.3896 0.1929 0.6346 2.5461 0.5731 7.8523 # CON 52.7619 5.7633 1.5596 0.8964 12.9663 0.3077 4.1248 # WRT 52.7619 15.7581 3.91 3.7356 29.7615 0.3339 3.3386 # TRA 52.7619 6.3486 0.9623 2.3501 11.1064 0.2671 5.7162 # FIRE 52.7619 5.8228 2.6567 -2.9836 15.9974 0.5486 4.0288 # GOV 52.3529 13.263 3.5088 -2.1983 23.611 -0.5647 4.7286 # OTH 52.725 7.3941 2.1999 -2.3286 17.4413 0.2929 6.4631 # SUM 52.9762 21'566436.8 36327.1443 21'287906.3 21'844816.3 0.6649 34.2495 # # , , Overall, VA # # N/T Mean SD Min Max Skew Kurt # AGR 2139 17.3082 15.5066 0.0318 95.222 1.3274 4.8827 # MIN 2139 5.8514 9.0975 0 59.0602 2.7193 10.9184 # MAN 2139 20.0651 8.0033 0.979 41.6281 -0.0348 2.6831 # PU 2139 2.2298 1.1088 0 9.1888 0.8899 6.2385 # CON 2139 5.8721 2.5113 0.5951 25.8575 1.5002 8.9578 # WRT 2139 16.631 5.1374 4.5187 39.7594 0.3455 3.2655 # TRA 2139 7.9329 3.1057 0.7957 25.9625 1.0122 5.7137 # FIRE 2139 7.0377 12.7077 -151.065 39.1705 -6.2254 59.8739 # GOV 1702 13.406 6.3521 0.7607 32.5107 0.4888 2.9043 # OTH 2139 6.4046 5.8416 0.2327 31.4474 1.4978 4.2051 # SUM 2139 43'961639.1 358'350627 0 8.06794210e+09 15.7682 289.4632 # # , , Between, VA # # N/T Mean SD Min Max Skew Kurt # AGR 43 17.3082 13.1901 0.6058 63.8364 1.1328 4.7111 # MIN 43 5.8514 7.5705 0.0475 27.9214 1.7113 4.807 # MAN 43 20.0651 6.6423 4.1869 32.1138 -0.3591 2.619 # PU 43 2.2298 0.7457 0.4462 4.307 0.6196 3.8724 # CON 43 5.8721 1.8455 2.9405 12.9279 1.3285 6.505 # WRT 43 16.631 4.3779 8.4188 26.3876 0.292 2.4553 # TRA 43 7.9329 2.7222 2.037 14.8892 0.6362 3.6686 # FIRE 43 7.0377 9.0284 -35.6144 23.8658 -2.674 15.0975 # GOV 35 13.406 5.875 1.9757 27.7714 0.5198 3.0416 # OTH 43 6.4046 5.6137 1.1184 19.5299 1.3274 3.2043 # SUM 43 43'961639.1 185'785836 5077.7231 1.23317892e+09 5.8098 36.9778 # # , , Within, VA # # N/T Mean SD Min Max Skew Kurt # AGR 49.7442 26.3768 8.1532 5.245 94.3499 1.234 9.5269 # MIN 49.7442 3.4006 5.0451 -20.051 35.7053 0.341 13.102 # MAN 49.7442 17.476 4.4647 1.1188 36.3501 -0.1928 3.9339 # PU 49.7442 1.3896 0.8206 -1.0904 6.2714 0.5258 5.3462 # CON 49.7442 5.7633 1.7031 -0.3464 18.6929 0.7493 6.3751 # WRT 49.7442 15.7581 2.6884 4.6513 32.6691 0.2338 4.4953 # TRA 49.7442 6.3486 1.4951 0.9187 18.5977 0.6995 10.1129 # FIRE 49.7442 5.8228 8.9428 -109.6278 54.1241 -2.7728 54.5971 # GOV 48.6286 13.263 2.4153 5.1249 22.8497 0.1663 3.3083 # OTH 49.7442 7.3941 1.6159 -0.9151 19.3116 0.7301 9.6613 # SUM 49.7442 21'566436.8 306'429102 -1.21124805e+09 6.85632962e+09 12.6639 253.1145 ``` The statistics show that the dataset is very consistent: Employment data cover 42 countries and 53 time-periods in almost all sectors. Agriculture is the largest sector in terms of employment, amounting to a 35% share of employment across countries and time, with a standard deviation (SD) of around 27%. The between-country SD in agricultural employment share is 24% and the within SD is 12%, indicating that processes of structural change are very gradual and most of the variation in structure is between countries. The next largest sectors after agriculture are manufacturing, wholesale and retail trade and government, each claiming an approx. 15% share of the economy. In these sectors the between-country SD is also about twice as large as the within-country SD. In terms of value added, the data covers 43 countries in 50 time-periods. Agriculture, manufacturing, wholesale and retail trade and government are also the largest sectors in terms of VA, but with a diminished agricultural share (around 17%) and a greater share for manufacturing (around 20%). The variation between countries is again greater than the variation within countries, but it seems that at least in terms of agricultural VA share there is also a considerable within-country SD of 8%. This is also true for the finance and real estate sector with a within SD of 9%, suggesting (using a bit of common sense) that a diminishing VA share in agriculture and increased VA share in finance and real estate was a pattern characterizing most of the countries in this sample. As a final step we consider a plot function which can be used to plot the structural transformation of any supported country. Below for Botswana: ```r library(data.table) library(ggplot2) library(magrittr) plotGGDC <- function(ctry) { # Select and subset fsubset(GGDC10S, Country == ctry, Variable, Year, AGR:SUM) %>% # Convert to shares and replace negative values with NA ftransform(fselect(., AGR:OTH) %>% lapply(`*`, 1 / SUM) %>% replace_outliers(0, NA, "min")) %>% # Remove totals column and make proper variable labels ftransform(Variable = recode_char(Variable, VA = "Value Added Share", EMP = "Employment Share"), SUM = NULL) %>% # Fast conversion to data.table qDT %>% # data.table's melt function melt(1:2, variable.name = "Sector", na.rm = TRUE) %>% # ggplot with some scales provided by the 'scales' package ggplot(aes(x = Year, y = value, fill = Sector)) + geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) + theme_linedraw(base_size = 14L) + facet_wrap( ~ Variable) + scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10L))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 7L), expand = c(0, 0)) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10L), expand = c(0, 0), labels = scales::percent) + theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)), strip.background = element_rect(colour = "grey20", fill = "grey20"), strip.text = element_text(face = "bold")) } # Plotting the structural transformation of Botswana plotGGDC("BWA") ```
plot of chunk scplot_BWA

plot of chunk scplot_BWA

## 2. Fast Data Manipulation A lot of R code is not concerned with statistical computations but with preliminary data wrangling. For various reasons R development has focused on data frames as the main medium to contain data, although matrices / arrays provide significantly faster methods for common manipulations. A first essential step towards optimizing R code is thus to speed up very frequent manipulations on data frames. *collapse* introduces a set of highly optimized functions to efficiently manipulate (mostly) data frames. Most manipulations can be conducted in non-standard evaluation or standard evaluation (utilizing different functions), and all functions preserve the data structure (i.e. they can be used with data.table, tbl_df, grouped_df, pdata.frame etc.). ### 2.1 Selecting and Replacing Columns `fselect` is an analogue to `dplyr::select`, but executes about 100x faster. It can be used to select variables using expressions involving variable names: ```r library(magrittr) # Pipe operators fselect(wlddev, country, year, PCGDP:ODA) %>% head(2) # country year PCGDP LIFEEX GINI ODA # 1 Afghanistan 1960 NA 32.446 NA 116769997 # 2 Afghanistan 1961 NA 32.962 NA 232080002 fselect(wlddev, -country, -year, -(PCGDP:ODA)) %>% head(2) # iso3c date decade region income OECD POP # 1 AFG 1961-01-01 1960 South Asia Low income FALSE 8996973 # 2 AFG 1962-01-01 1960 South Asia Low income FALSE 9169410 library(microbenchmark) microbenchmark(fselect = collapse::fselect(wlddev, country, year, PCGDP:ODA), select = dplyr::select(wlddev, country, year, PCGDP:ODA)) # Unit: microseconds # expr min lq mean median uq max neval # fselect 2.911 3.4645 4.76297 4.3665 5.3710 20.459 100 # select 382.284 393.0055 442.70734 410.3075 441.4265 2951.262 100 ``` in contrast to `dplyr::select`, `fselect` has a replacement method ```r # Computing the log of columns fselect(wlddev, PCGDP:POP) <- lapply(fselect(wlddev, PCGDP:POP), log) head(wlddev, 2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 3.479577 NA 18.57572 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 3.495355 NA 19.26259 # POP # 1 16.01240 # 2 16.03138 # Efficient deleting fselect(wlddev, country, year, PCGDP:POP) <- NULL head(wlddev, 2) # iso3c date decade region income OECD # 1 AFG 1961-01-01 1960 South Asia Low income FALSE # 2 AFG 1962-01-01 1960 South Asia Low income FALSE rm(wlddev) ``` and it can also return information about the selected columns other than the data itself. ```r fselect(wlddev, PCGDP:POP, return = "names") # [1] "PCGDP" "LIFEEX" "GINI" "ODA" "POP" fselect(wlddev, PCGDP:POP, return = "indices") # [1] 9 10 11 12 13 fselect(wlddev, PCGDP:POP, return = "named_indices") # PCGDP LIFEEX GINI ODA POP # 9 10 11 12 13 fselect(wlddev, PCGDP:POP, return = "logical") # [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE fselect(wlddev, PCGDP:POP, return = "named_logical") # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE # POP # TRUE ``` While `fselect` is faster than `dplyr::select`, it is also simpler and does not offer special methods for grouped tibbles (e.g. where grouping columns are always selected) and some other *dplyr*-specific features of `select`. We will see that this is not a problem at all when working with statistical functions in *collapse* that have a grouped_df method, but users should be careful replacing `dplyr::select` with `fselect` in *dplyr* scripts. From *collapse* 1.6.0, `fselect` has explicit support for *sf* data frames. The standard-evaluation analogue to `fselect` is the function `get_vars`. `get_vars` can be used to select variables using names, indices, logical vectors, functions or regular expressions evaluated against column names: ```r get_vars(wlddev, 9:13) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 get_vars(wlddev, c("PCGDP","LIFEEX","GINI","ODA","POP")) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 get_vars(wlddev, "[[:upper:]]", regex = TRUE) %>% head(1) # OECD PCGDP LIFEEX GINI ODA POP # 1 FALSE NA 32.446 NA 116769997 8996973 get_vars(wlddev, "PC|LI|GI|OD|PO", regex = TRUE) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 # Same as above, vectors of regular expressions are sequentially passed to grep get_vars(wlddev, c("PC","LI","GI","OD","PO"), regex = TRUE) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 get_vars(wlddev, is.numeric) %>% head(1) # year decade PCGDP LIFEEX GINI ODA POP # 1 1960 1960 NA 32.446 NA 116769997 8996973 # Returning other information get_vars(wlddev, is.numeric, return = "names") # [1] "year" "decade" "PCGDP" "LIFEEX" "GINI" "ODA" "POP" get_vars(wlddev, "[[:upper:]]", regex = TRUE, return = "named_indices") # OECD PCGDP LIFEEX GINI ODA POP # 8 9 10 11 12 13 ``` Replacing operations are conducted analogous: ```r get_vars(wlddev, 9:13) <- lapply(get_vars(wlddev, 9:13), log) get_vars(wlddev, 9:13) <- NULL head(wlddev, 2) # country iso3c date year decade region income OECD # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE rm(wlddev) ``` `get_vars` is about 2x faster than `[.data.frame`, and `get_vars<-` is about 6-8x faster than `[<-.data.frame`. In addition to `get_vars`, *collapse* offers a set of functions to efficiently select and replace data by data type: `num_vars`, `cat_vars` (for categorical = non-numeric columns), `char_vars`, `fact_vars`, `logi_vars` and `date_vars` (for date and date-time columns). ```r head(num_vars(wlddev), 2) # year decade PCGDP LIFEEX GINI ODA POP # 1 1960 1960 NA 32.446 NA 116769997 8996973 # 2 1961 1960 NA 32.962 NA 232080002 9169410 head(cat_vars(wlddev), 2) # country iso3c date region income OECD # 1 Afghanistan AFG 1961-01-01 South Asia Low income FALSE # 2 Afghanistan AFG 1962-01-01 South Asia Low income FALSE head(fact_vars(wlddev), 2) # iso3c region income # 1 AFG South Asia Low income # 2 AFG South Asia Low income # Replacing fact_vars(wlddev) <- fact_vars(wlddev) ``` ### 2.2 Subsetting `fsubset` is an enhanced version of `base::subset` using C functions from the *data.table* package for fast and subsetting operations. In contrast to `base::subset`, `fsubset` allows multiple comma-separated select arguments after the subset argument, and it also preserves all attributes of subsetted columns: ```r # Returning only value-added data after 1990 fsubset(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:GOV) %>% head(2) # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # 1 BWA 1991 303.1157 2646.950 472.6488 160.6079 580.0876 806.7509 232.7884 432.6965 1073.263 # 2 BWA 1992 333.4364 2690.939 537.4274 178.4532 678.7320 725.2577 285.1403 517.2141 1234.012 # Same thing fsubset(GGDC10S, Variable == "VA" & Year > 1990, -(Regioncode:Variable), -(OTH:SUM)) %>% head(2) # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # 1 BWA 1991 303.1157 2646.950 472.6488 160.6079 580.0876 806.7509 232.7884 432.6965 1073.263 # 2 BWA 1992 333.4364 2690.939 537.4274 178.4532 678.7320 725.2577 285.1403 517.2141 1234.012 ``` It is also possible to use standard evaluation with `fsubset`, but for these purposes the function `ss` exists as a fast and more secure alternative to `[.data.frame`: ```r ss(GGDC10S, 1:2, 6:16) # or fsubset(GGDC10S, 1:2, 6:16), but not recommended. # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA NA NA ss(GGDC10S, -(1:2), c("AGR","MIN")) %>% head(2) # AGR MIN # 1 NA NA # 2 NA NA ``` Thanks to the *data.table* C code and optimized R code, `fsubset` is very fast. ```r microbenchmark(base = subset(GGDC10S, Variable == "VA" & Year > 1990, AGR:SUM), collapse = fsubset(GGDC10S, Variable == "VA" & Year > 1990, AGR:SUM)) # Unit: microseconds # expr min lq mean median uq max neval # base 150.839 156.5585 199.63105 160.3510 166.993 3778.191 100 # collapse 45.715 49.0975 51.55545 50.9015 52.357 82.861 100 microbenchmark(GGDC10S[1:10, 1:10], ss(GGDC10S, 1:10, 1:10)) # Unit: microseconds # expr min lq mean median uq max neval # GGDC10S[1:10, 1:10] 36.367 36.982 38.14599 37.515 38.294 76.219 100 # ss(GGDC10S, 1:10, 1:10) 1.886 2.050 2.30666 2.214 2.419 8.405 100 ``` like `base::subset`, `fsubset` is S3 generic with methods for vectors, matrices and data frames. For certain classes such as factors, `fsubset.default` also improves upon `[`, but the largest improvements are with the data frame method. ### 2.3 Reordering Rows and Columns `roworder` is a fast analogue to `dplyr::arrange`. The syntax is inspired by `data.table::setorder`, so that negative variable names indicate descending sort. ```r roworder(GGDC10S, -Variable, Country) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ARG LAM Latin America VA 1950 5.887857e-07 0 3.53443e-06 # 2 ARG LAM Latin America VA 1951 9.165327e-07 0 4.77277e-06 microbenchmark(collapse = collapse::roworder(GGDC10S, -Variable, Country), dplyr = dplyr::arrange(GGDC10S, desc(Variable), Country)) # Unit: microseconds # expr min lq mean median uq max neval # collapse 113.406 152.151 176.7567 165.722 183.0855 538.330 100 # dplyr 1240.168 1299.372 1618.5869 1384.755 1507.8160 8350.552 100 ``` In contrast to `data.table::setorder`, `roworder` creates a copy of the data frame (unless data are already sorted). If this copy is not required, `data.table::setorder` is faster. The function `roworderv` is a standard evaluation analogue to `roworder`: ```r # Same as above roworderv(GGDC10S, c("Variable", "Country"), decreasing = c(TRUE, FALSE)) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ARG LAM Latin America VA 1950 5.887857e-07 0 3.53443e-06 # 2 ARG LAM Latin America VA 1951 9.165327e-07 0 4.77277e-06 ``` With `roworderv`, it is also possible to move or exchange rows in a data frame: ```r # If length(neworder) < fnrow(data), the default (pos = "front") brings rows to the front roworderv(GGDC10S, neworder = which(GGDC10S$Country == "GHA")) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 GHA SSA Sub-saharan Africa VA 1960 0.03576160 0.005103683 0.01744687 # 2 GHA SSA Sub-saharan Africa VA 1961 0.03823049 0.005456030 0.01865136 # pos = "end" brings rows to the end roworderv(GGDC10S, neworder = which(GGDC10S$Country == "BWA"), pos = "end") %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ETH SSA Sub-saharan Africa VA 1960 NA NA NA # 2 ETH SSA Sub-saharan Africa VA 1961 4495.614 11.86979 109.616 # pos = "exchange" arranges selected rows in the order they are passed, without affecting other rows roworderv(GGDC10S, neworder = with(GGDC10S, c(which(Country == "GHA"), which(Country == "BWA"))), pos = "exchange") %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 GHA SSA Sub-saharan Africa VA 1960 0.03576160 0.005103683 0.01744687 # 2 GHA SSA Sub-saharan Africa VA 1961 0.03823049 0.005456030 0.01865136 ``` Similarly, the pair `colorder` / `colorderv` facilitates efficient reordering of columns in a data frame. These functions not require a deep copy of the data and are very fast. To reorder columns by reference, see also `data.table::setcolorder`. ```r # The default is again pos = "front" which brings selected columns to the front / left colorder(GGDC10S, Variable, Country, Year) %>% head(2) # Variable Country Year Regioncode Region AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # 1 VA BWA 1960 SSA Sub-saharan Africa NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA 1961 SSA Sub-saharan Africa NA NA NA NA NA NA NA NA NA NA NA ``` ### 2.4 Transforming and Computing New Columns `ftransform` is an improved version of `base::transform` for data frames and lists. `ftransform` can be used to compute new columns or modify and delete existing columns, and always returns the entire data frame. ```r ftransform(GGDC10S, AGR_perc = AGR / SUM * 100, # Computing Agricultural percentage Year = as.integer(Year), # Coercing Year to integer AGR = NULL) %>% tail(2) # Deleting column AGR # Country Regioncode Region Variable Year MIN MAN PU # 5026 EGY MENA Middle East and North Africa EMP 2011 27.56394 2373.814 317.9979 # 5027 EGY MENA Middle East and North Africa EMP 2012 24.78083 2348.434 324.9332 # CON WRT TRA FIRE GOV OTH SUM AGR_perc # 5026 2795.264 3020.236 2048.335 814.7403 5635.522 NA 22219.39 23.33961 # 5027 2931.196 3109.522 2065.004 832.4770 5735.623 NA 22532.56 22.90281 # Computing scalar results replicates them ftransform(GGDC10S, MIN_mean = fmean(MIN), Intercept = 1) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 5185.919 27.56394 2373.814 # 5027 EGY MENA Middle East and North Africa EMP 2012 5160.590 24.78083 2348.434 # PU CON WRT TRA FIRE GOV OTH SUM MIN_mean Intercept # 5026 317.9979 2795.264 3020.236 2048.335 814.7403 5635.522 NA 22219.39 1867909 1 # 5027 324.9332 2931.196 3109.522 2065.004 832.4770 5735.623 NA 22532.56 1867909 1 ``` The modification `ftransformv` exists to transform specific columns using a function: ```r # Apply the log to columns 6-16 GGDC10S %>% ftransformv(6:16, log) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 8.553702 3.316508 7.772253 # 5027 EGY MENA Middle East and North Africa EMP 2012 8.548806 3.210070 7.761504 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845 NA 10.00872 # 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452 NA 10.02272 # Convert data to percentage terms GGDC10S %>% ftransformv(6:16, `*`, 100/SUM) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 23.33961 0.1240535 10.68352 # 5027 EGY MENA Middle East and North Africa EMP 2012 22.90281 0.1099779 10.42240 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 1.431173 12.58029 13.59279 9.218680 3.666798 25.36308 NA 100 # 5027 1.442061 13.00871 13.80013 9.164534 3.694551 25.45482 NA 100 # Apply log to numeric columns GGDC10S %>% ftransformv(is.numeric, log) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 7.606387 8.553702 3.316508 7.772253 # 5027 EGY MENA Middle East and North Africa EMP 7.606885 8.548806 3.210070 7.761504 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845 NA 10.00872 # 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452 NA 10.02272 ``` Instead of passing comma-separated `column = value` expressions, it is also possible to bulk-process data with `fransform` by passing a single list of expressions (such as a data frame). This is useful for more complex transformations involving multiple steps: ```r # Same as above, but also replacing any generated infinite values with NA GGDC10S %>% ftransform(num_vars(.) %>% lapply(log) %>% replace_Inf) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 7.606387 8.553702 3.316508 7.772253 # 5027 EGY MENA Middle East and North Africa EMP 7.606885 8.548806 3.210070 7.761504 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845 NA 10.00872 # 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452 NA 10.02272 ``` This mode of usage toggles automatic column matching and replacement. Non-matching columns are added to the data frame. Apart from to `ftransform`, the function `settransform(v)` can be used to change the input data frame by reference: ```r # Computing a new column and deleting some others by reference settransform(GGDC10S, FIRE_MAN = FIRE / MAN, Regioncode = NULL, Region = NULL) tail(GGDC10S, 2) # Country Variable Year AGR MIN MAN PU CON WRT TRA FIRE # 5026 EGY EMP 2011 5185.919 27.56394 2373.814 317.9979 2795.264 3020.236 2048.335 814.7403 # 5027 EGY EMP 2012 5160.590 24.78083 2348.434 324.9332 2931.196 3109.522 2065.004 832.4770 # GOV OTH SUM FIRE_MAN # 5026 5635.522 NA 22219.39 0.3432200 # 5027 5735.623 NA 22532.56 0.3544817 rm(GGDC10S) # Bulk-processing the data into percentage terms settransformv(GGDC10S, 6:16, `*`, 100/SUM) tail(GGDC10S, 2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 23.33961 0.1240535 10.68352 # 5027 EGY MENA Middle East and North Africa EMP 2012 22.90281 0.1099779 10.42240 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 1.431173 12.58029 13.59279 9.218680 3.666798 25.36308 NA 100 # 5027 1.442061 13.00871 13.80013 9.164534 3.694551 25.45482 NA 100 # Same thing via replacement ftransform(GGDC10S) <- fselect(GGDC10S, AGR:SUM) %>% lapply(`*`, 100/.$SUM) # Or using double pipes GGDC10S %<>% ftransformv(6:16, `*`, 100/SUM) rm(GGDC10S) ``` Another convenient addition is provided by the function `fcompute`, which can be used to compute new columns in a data frame environment and returns the computed columns in a new data frame: ```r fcompute(GGDC10S, AGR_perc = AGR / SUM * 100, FIRE_MAN = FIRE / MAN) %>% tail(2) # AGR_perc FIRE_MAN # 5026 23.33961 0.3432200 # 5027 22.90281 0.3544817 ``` For more complex tasks see `?ftransform`. ### 2.5 Adding and Binding Columns For cases where multiple columns are computed and need to be added to a data frame (regardless of whether names are duplicated or not), *collapse* introduces the predicate `add_vars`. Together with `add_vars`, the function `add_stub` is useful to add a prefix (default) or postfix to computed variables keeping the variable names unique: ```r # Efficient adding logged versions of some variables add_vars(wlddev) <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.") head(wlddev, 2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # POP log10.PCGDP log10.LIFEEX log10.GINI log10.ODA log10.POP # 1 8996973 NA 1.511161 NA 8.067331 6.954096 # 2 9169410 NA 1.518014 NA 8.365638 6.962341 rm(wlddev) ``` By default `add_vars` appends a data frame towards the (right) end, but it can also replace columns in front or at other positions in the data frame: ```r add_vars(wlddev, "front") <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.") head(wlddev, 2) # log10.PCGDP log10.LIFEEX log10.GINI log10.ODA log10.POP country iso3c date year decade # 1 NA 1.511161 NA 8.067331 6.954096 Afghanistan AFG 1961-01-01 1960 1960 # 2 NA 1.518014 NA 8.365638 6.962341 Afghanistan AFG 1962-01-01 1961 1960 # region income OECD PCGDP LIFEEX GINI ODA POP # 1 South Asia Low income FALSE NA 32.446 NA 116769997 8996973 # 2 South Asia Low income FALSE NA 32.962 NA 232080002 9169410 rm(wlddev) add_vars(wlddev, c(10L,12L,14L,16L,18L)) <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.") head(wlddev, 2) # country iso3c date year decade region income OECD PCGDP log10.PCGDP LIFEEX # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA NA 32.446 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA NA 32.962 # log10.LIFEEX GINI log10.GINI ODA log10.ODA POP log10.POP # 1 1.511161 NA NA 116769997 8.067331 8996973 6.954096 # 2 1.518014 NA NA 232080002 8.365638 9169410 6.962341 rm(wlddev) ``` `add_vars` can also be used without replacement, where it serves as a more efficient version of `cbind.data.frame`, with the difference that the data structure and attributes of the first argument are preserved: ```r add_vars(wlddev, get_vars(wlddev, 9:13) %>% lapply(log) %>% add_stub("log."), get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.")) %>% head(2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # POP log.PCGDP log.LIFEEX log.GINI log.ODA log.POP log10.PCGDP log10.LIFEEX log10.GINI # 1 8996973 NA 3.479577 NA 18.57572 16.01240 NA 1.511161 NA # 2 9169410 NA 3.495355 NA 19.26259 16.03138 NA 1.518014 NA # log10.ODA log10.POP # 1 8.067331 6.954096 # 2 8.365638 6.962341 add_vars(wlddev, get_vars(wlddev, 9:13) %>% lapply(log) %>% add_stub("log."), get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10."), pos = c(10L,13L,16L,19L,22L,11L,14L,17L,20L,23L)) %>% head(2) # country iso3c date year decade region income OECD PCGDP log.PCGDP log10.PCGDP # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA NA NA # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA NA NA # LIFEEX log.LIFEEX log10.LIFEEX GINI log.GINI log10.GINI ODA log.ODA log10.ODA POP # 1 32.446 3.479577 1.511161 NA NA NA 116769997 18.57572 8.067331 8996973 # 2 32.962 3.495355 1.518014 NA NA NA 232080002 19.26259 8.365638 9169410 # log.POP log10.POP # 1 16.01240 6.954096 # 2 16.03138 6.962341 identical(cbind(wlddev, wlddev), add_vars(wlddev, wlddev)) # [1] TRUE microbenchmark(cbind(wlddev, wlddev), add_vars(wlddev, wlddev)) # Unit: microseconds # expr min lq mean median uq max neval # cbind(wlddev, wlddev) 13.694 14.1040 15.72760 14.391 14.7600 57.072 100 # add_vars(wlddev, wlddev) 3.280 3.6285 4.13567 3.813 4.0385 19.352 100 ``` ### 2.6 Renaming Columns `frename` is a fast substitute for `dplyr::rename`: ```r frename(GGDC10S, AGR = Agriculture, MIN = Mining) %>% head(2) # Country Regioncode Region Variable Year Agriculture Mining MAN PU CON WRT TRA FIRE # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA # GOV OTH SUM # 1 NA NA NA # 2 NA NA NA frename(GGDC10S, tolower) %>% head(2) # country regioncode region variable year agr min man pu con wrt tra fire gov oth sum # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA NA NA NA frename(GGDC10S, tolower, cols = .c(AGR, MIN)) %>% head(2) # Country Regioncode Region Variable Year agr min MAN PU CON WRT TRA FIRE GOV OTH SUM # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA NA NA NA ``` The function `setrename` does this by reference: ```r setrename(GGDC10S, AGR = Agriculture, MIN = Mining) head(GGDC10S, 2) # Country Regioncode Region Variable Year Agriculture Mining MAN PU CON WRT TRA FIRE # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA # GOV OTH SUM # 1 NA NA NA # 2 NA NA NA setrename(GGDC10S, Agriculture = AGR, Mining = MIN) rm(GGDC10S) ``` Both functions are not limited to data frames but can be applied to any R object with a 'names' attribute. ### 2.7 Using Shortcuts The most frequently required among the functions introduced above can be abbreviated as follows: `fselect -> slt`, `fsubset -> sbt`, `ftransform(v) -> tfm(v)`, `settransform(v) -> settfm(v)`, `get_vars -> gv`, `num_vars -> nv`, `add_vars -> av`. This was done to make it possible to write faster and more parsimonious code, but is recommended only for personally kept scripts. A lazy person may also decide to code everything using shortcuts and then do ctrl+F replacement with the long names on the finished script. ### 2.8 Missing Values / Rows The function `na_omit` is a much faster alternative to `stats::na.omit` for vectors, matrices and data frames. By default the 'na.action' attribute containing the removed cases is omitted, but it can be added with the option `na.attr = TRUE`. Like `fsubset`, `na_omit` preserves all column attributes as well as attributes of the data frame itself. ```r microbenchmark(na_omit(wlddev, na.attr = TRUE), na.omit(wlddev)) # Unit: microseconds # expr min lq mean median uq max neval # na_omit(wlddev, na.attr = TRUE) 60.393 69.208 84.8126 79.9910 88.683 419.881 100 # na.omit(wlddev) 745.790 856.449 1721.5457 940.6015 1005.177 56344.414 100 ``` Another added feature is the removal of cases missing on certain columns only: ```r na_omit(wlddev, cols = .c(PCGDP, LIFEEX)) %>% head(2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI # 1 Afghanistan AFG 2003-01-01 2002 2000 South Asia Low income FALSE 330.3036 56.784 NA # 2 Afghanistan AFG 2004-01-01 2003 2000 South Asia Low income FALSE 343.0809 57.271 NA # ODA POP # 1 1790479980 22600770 # 2 1972890015 23680871 # only removing missing data from numeric columns -> same and slightly faster than na_omit(wlddev) na_omit(wlddev, cols = is.numeric) %>% head(2) # country iso3c date year decade region income OECD PCGDP # 1 Albania ALB 1997-01-01 1996 1990 Europe & Central Asia Upper middle income FALSE 1869.866 # 2 Albania ALB 2003-01-01 2002 2000 Europe & Central Asia Upper middle income FALSE 2572.721 # LIFEEX GINI ODA POP # 1 72.495 27.0 294089996 3168033 # 2 74.579 31.7 453309998 3051010 ``` For atomic vectors the function `na_rm` also exists which is 2x faster than `x[!is.na(x)]`. Both `na_omit` and `na_rm` return their argument if no missing cases were found. The existence of missing cases can be checked using `missing_cases`, which is also considerably faster than `complete.cases` for data frames. There is also a function `na_insert` to randomly insert missing values into vectors, matrices and data frames. The default is `na_insert(X, prop = 0.1)` so that 10% of values are randomly set to missing. Finally, a function `allNA` provides the much needed opposite of `anyNA` for atomic vectors. ### 2.9 Unique Values / Rows Similar to `na_omit`, the function `funique` is a much faster alternative to `base::unique` for atomic vectors and data frames. Like most *collapse* functions it also seeks to preserve attributes. ```r funique(GGDC10S$Variable) # Unique values in order of appearance # [1] "VA" "EMP" # attr(,"label") # [1] "Variable" # attr(,"format.stata") # [1] "%9s" funique(GGDC10S$Variable, sort = TRUE) # Sorted unique values # [1] "EMP" "VA" # attr(,"label") # [1] "Variable" # attr(,"format.stata") # [1] "%9s" # If all values/rows are unique, the original data is returned (no copy) identical(funique(GGDC10S), GGDC10S) # [1] TRUE # Can remove duplicate rows by a subset of columns funique(GGDC10S, cols = .c(Country, Variable)) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA # 2 BWA SSA Sub-saharan Africa EMP 1960 NA NA NA funique(GGDC10S, cols = .c(Country, Variable), sort = TRUE) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ARG LAM Latin America EMP 1950 1.799565e+03 32.71936 1.603249e+03 # 2 ARG LAM Latin America VA 1950 5.887857e-07 0.00000 3.534430e-06 ``` ### 2.10 Recoding and Replacing Values With `recode_num`, `recode_char`, `replace_NA`, `replace_Inf` and `replace_outliers`, *collapse* also introduces a set of functions to efficiently recode and replace numeric and character values in matrix-like objects (vectors, matrices, arrays, data frames, lists of atomic objects). When called on a data frame, `recode_num`, `replace_Inf` and `replace_outliers` will skip non-numeric columns, and `recode_char` skips non-character columns, whereas `replace_NA` replaces missing values in all columns. ```r # Efficient replacing missing values with 0 microbenchmark(replace_NA(GGDC10S, 0)) # Unit: microseconds # expr min lq mean median uq max neval # replace_NA(GGDC10S, 0) 109.757 141.163 203.4982 151.0235 163.0775 4579.085 100 # Adding log-transformed sectoral data: Some NaN and Inf values generated add_vars(GGDC10S, 6:16*2-5) <- fselect(GGDC10S, AGR:SUM) %>% lapply(log) %>% replace_Inf %>% add_stub("log.") head(GGDC10S, 2) # Country Regioncode Region Variable Year AGR log.AGR MIN log.MIN MAN log.MAN PU log.PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA # CON log.CON WRT log.WRT TRA log.TRA FIRE log.FIRE GOV log.GOV OTH log.OTH SUM log.SUM # 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA rm(GGDC10S) ``` `recode_num` and `recode_char` follow the syntax of `dplyr::recode` and provide more or less the same functionality except that they can efficiently be applied to matrices and data frames, and that `recode_char` allows for regular expression matching implemented via `base::grepl`: ```r month.name # [1] "January" "February" "March" "April" "May" "June" "July" "August" # [9] "September" "October" "November" "December" recode_char(month.name, ber = "C", "^J" = "A", default = "B", regex = TRUE) # [1] "A" "B" "B" "B" "B" "A" "A" "B" "B" "B" "B" "B" ``` The perhaps most interesting function in this ensemble is `replace_outliers`, which replaces values falling outside a 1- or 2-sided numeric threshold or outside a certain number of column- standard deviations with a value (default is `NA`). ```r # replace all values below 2 and above 100 with NA replace_outliers(mtcars, c(2, 100)) %>% head(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 21.0 6 NA NA 3.90 2.620 16.46 NA NA 4 4 # Mazda RX4 Wag 21.0 6 NA NA 3.90 2.875 17.02 NA NA 4 4 # Datsun 710 22.8 4 NA 93 3.85 2.320 18.61 NA NA 4 NA # replace all value smaller than 2 with NA replace_outliers(mtcars, 2, single.limit = "min") %>% head(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 NA NA 4 4 # Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 NA NA 4 4 # Datsun 710 22.8 4 108 93 3.85 2.320 18.61 NA NA 4 NA # replace all value larger than 100 with NA replace_outliers(mtcars, 100, single.limit = "max") %>% head(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 21.0 6 NA NA 3.90 2.620 16.46 0 1 4 4 # Mazda RX4 Wag 21.0 6 NA NA 3.90 2.875 17.02 0 1 4 4 # Datsun 710 22.8 4 NA 93 3.85 2.320 18.61 1 1 4 1 # replace all values above or below 3 column-standard-deviations from the column-mean with NA replace_outliers(mtcars, 3) %>% tail(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Ferrari Dino 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 # Maserati Bora 15.0 8 301 335 3.54 3.57 14.6 0 1 5 NA # Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 ``` ## 3. Quick Data Object Conversions Apart from code employed for manipulation of data and the actual statistical computations performed, frequently used data object conversions with base functions like `as.data.frame`, `as.matrix` or `as.factor` have a significant share in slowing down R code. Optimally code would be written without such conversions, but sometimes they are necessary and thus *collapse* provides a set of functions (`qDF`, `qDT`, `qTBL`, `qM`, `qF`, `mrtl` and `mctl`) to speed these conversions up quite a bit. These functions are fast because they are non-generic and dispatch different objects internally, perform critical steps in C++, and, when passed lists of objects, they only check the length of the first column. `qDF`, `qDT` and `qTBL` efficiently convert vectors, matrices, higher-dimensional arrays and suitable lists to data.frame, *data.table* and *tibble* respectively. ```r str(EuStockMarkets) # Time-Series [1:1860, 1:4] from 1991 to 1999: 1629 1614 1607 1621 1618 ... # - attr(*, "dimnames")=List of 2 # ..$ : NULL # ..$ : chr [1:4] "DAX" "SMI" "CAC" "FTSE" # Efficient Conversion of data frames and matrices to data.table microbenchmark(qDT(wlddev), qDT(EuStockMarkets), as.data.table(wlddev), as.data.frame(EuStockMarkets)) # Unit: microseconds # expr min lq mean median uq max neval # qDT(wlddev) 3.075 3.608 4.21439 3.8950 4.2640 12.546 100 # qDT(EuStockMarkets) 6.765 8.733 12.09254 12.5050 14.1040 30.217 100 # as.data.table(wlddev) 64.206 122.180 253.75023 143.2745 173.1635 3653.346 100 # as.data.frame(EuStockMarkets) 64.247 70.971 82.25174 79.6835 84.8700 339.849 100 # Converting a time series to data.frame head(qDF(AirPassengers)) # AirPassengers # 1 112 # 2 118 # 3 132 # 4 129 # 5 121 # 6 135 ``` By default these functions drop all unnecessary attributes from matrices or lists / data frames in the conversion, but this can be changed using the `keep.attr = TRUE` argument. A useful additional feature of `qDF` and `qDT` is the `row.names.col` argument, enabling the saving of names / row-names in a column when converting from vector, matrix, array or data frame: ```r # This saves the row-names in a column named 'car' head(qDT(mtcars, "car")) # car mpg cyl disp hp drat wt qsec vs am gear carb # # 1: Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 # 2: Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 # 3: Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 # 4: Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 # 5: Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 # 6: Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 N_distinct <- fndistinct(GGDC10S) N_distinct # Country Regioncode Region Variable Year AGR MIN MAN PU # 43 6 6 2 67 4353 4224 4353 4237 # CON WRT TRA FIRE GOV OTH SUM # 4339 4344 4334 4349 3470 4238 4364 # Converting a vector to data.frame, saving names head(qDF(N_distinct, "variable")) # variable N_distinct # 1 Country 43 # 2 Regioncode 6 # 3 Region 6 # 4 Variable 2 # 5 Year 67 # 6 AGR 4353 ``` For the conversion of matrices to list there are also the programmers functions `mrtl` and `mctl`, which row- or column- wise convert a matrix into a plain list, data.frame or *data.table*. ```r # This converts the matrix to a list of 1860 row-vectors of length 4. microbenchmark(mrtl(EuStockMarkets)) # Unit: microseconds # expr min lq mean median uq max neval # mrtl(EuStockMarkets) 139.728 151.4335 168.5522 155.841 164.6355 399.791 100 ``` For the reverse operation, `qM` converts vectors, higher-dimensional arrays, data frames and suitable lists to matrix. ```r # Note: kit::psum is the most efficient way to do this microbenchmark(rowSums(qM(mtcars)), rowSums(mtcars), kit::psum(mtcars)) # Unit: nanoseconds # expr min lq mean median uq max neval # rowSums(qM(mtcars)) 5699 7933.5 12702.62 9122.5 11131.5 316315 100 # rowSums(mtcars) 38868 41697.0 48003.21 44157.0 51496.0 95981 100 # kit::psum(mtcars) 574 820.0 510905.51 943.0 1107.0 50967797 100 ``` At last, `qF` converts vectors to factor and is quite a bit faster than `as.factor`: ```r # Converting from character str(wlddev$country) # chr [1:13176] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ... # - attr(*, "label")= chr "Country Name" fndistinct(wlddev$country) # [1] 216 microbenchmark(qF(wlddev$country), as.factor(wlddev$country)) # Unit: microseconds # expr min lq mean median uq max neval # qF(wlddev$country) 70.192 71.1965 73.77376 72.160 74.784 107.256 100 # as.factor(wlddev$country) 263.794 275.7660 282.21530 278.841 283.761 360.431 100 # Converting from numeric str(wlddev$PCGDP) # num [1:13176] NA NA NA NA NA NA NA NA NA NA ... # - attr(*, "label")= chr "GDP per capita (constant 2010 US$)" fndistinct(wlddev$PCGDP) # [1] 9470 microbenchmark(qF(wlddev$PCGDP), as.factor(wlddev$PCGDP)) # Unit: microseconds # expr min lq mean median uq max neval # qF(wlddev$PCGDP) 445.096 474.944 531.221 488.146 509.0765 3930.342 100 # as.factor(wlddev$PCGDP) 9374.240 9546.132 9823.477 9633.196 9727.5165 13732.499 100 ``` ## 4. Advanced Statistical Programming Having introduced some of the more basic *collapse* data manipulation infrastructure in the preceding chapters, this chapter introduces some of the packages core functionality for programming with data. ### 4.1 Fast (Grouped, Weighted) Statistical Functions A key feature of *collapse* is it's broad set of *Fast Statistical Functions* (`fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct`), which are able to tangibly speed-up column-wise, grouped and weighted statistical computations on vectors, matrices or data frames. The basic syntax common to all of these functions is: ```r FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE) ``` where `x` is a vector, matrix or data frame, `g` takes groups supplied as vector, factor, list of vectors or *GRP* object, and `w` takes a weight vector (supported by `fsum, fprod, fmean, fmedian, fmode, fnth, fvar` and `fsd`). `TRA` can be used to transform `x` using the computed statistics and one of 10 available transformations (`"replace_fill", "replace", "-", "-+", "/", "%", "+", "*", "%%, "-%%"`, discussed in section 6.3). `na.rm` efficiently skips missing values during the computation and is `TRUE` by default. `use.g.names = TRUE` generates new row-names from the unique groups supplied to `g`, and `drop = TRUE` returns a vector when performing simple (non-grouped) computations on matrix or data frame columns. With that in mind, let's start with some simple examples. To calculate simple column-wise means, it is sufficient to type: ```r fmean(mtcars$mpg) # Vector # [1] 20.09062 fmean(mtcars) # mpg cyl disp hp drat wt qsec vs am # 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750 0.437500 0.406250 # gear carb # 3.687500 2.812500 fmean(mtcars, drop = FALSE) # This returns a 1-row data-frame # mpg cyl disp hp drat wt qsec vs am gear carb # 1 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125 m <- qM(mtcars) # Generate matrix fmean(m) # mpg cyl disp hp drat wt qsec vs am # 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750 0.437500 0.406250 # gear carb # 3.687500 2.812500 fmean(m, drop = FALSE) # This returns a 1-row matrix # mpg cyl disp hp drat wt qsec vs am gear carb # [1,] 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125 ``` Note that separate methods for vectors, matrices and data frames are written in C++, thus no conversions are needed and computations on matrices and data frames are equally efficient. If we had a weight vector, weighted statistics are easily computed: ```r weights <- abs(rnorm(fnrow(mtcars))) # fnrow is a bit faster for data frames fmean(mtcars, w = weights) # Weighted mean # mpg cyl disp hp drat wt qsec vs # 20.8090714 5.8876772 214.9587303 142.8931066 3.7558442 3.0941361 17.8201120 0.5025300 # am gear carb # 0.4918237 3.8375831 2.7771280 fmedian(mtcars, w = weights) # Weighted median # mpg cyl disp hp drat wt qsec vs am gear carb # 21.00 6.00 160.00 113.00 3.77 3.17 18.00 1.00 0.00 4.00 2.00 fsd(mtcars, w = weights) # Frequency-weighted standard deviation # mpg cyl disp hp drat wt qsec vs # 5.8799568 1.8416865 122.4274353 74.9459089 0.5413624 0.9689836 1.8516418 0.5089768 # am gear carb # 0.5089152 0.7557877 1.6744062 fmode(mtcars, w = weights) # Weighted statistical mode (i.e. the value with the largest sum of weights) # mpg cyl disp hp drat wt qsec vs am gear carb # 21.40 4.00 121.00 109.00 3.92 2.78 18.60 1.00 0.00 4.00 2.00 ``` Fast grouped statistics can be calculated by simply passing grouping vectors or lists of grouping vectors to the fast functions: ```r fmean(mtcars, mtcars$cyl) # mpg cyl disp hp drat wt qsec vs am gear carb # 4 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909 0.7272727 4.090909 1.545455 # 6 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 0.4285714 3.857143 3.428571 # 8 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000 0.1428571 3.285714 3.500000 fmean(mtcars, fselect(mtcars, cyl, vs, am)) # mpg cyl disp hp drat wt qsec vs am gear carb # 4.0.1 26.00000 4 120.3000 91.00000 4.430000 2.140000 16.70000 0 1 5.000000 2.000000 # 4.1.0 22.90000 4 135.8667 84.66667 3.770000 2.935000 20.97000 1 0 3.666667 1.666667 # 4.1.1 28.37143 4 89.8000 80.57143 4.148571 2.028286 18.70000 1 1 4.142857 1.428571 # 6.0.1 20.56667 6 155.0000 131.66667 3.806667 2.755000 16.32667 0 1 4.333333 4.666667 # 6.1.0 19.12500 6 204.5500 115.25000 3.420000 3.388750 19.21500 1 0 3.500000 2.500000 # 8.0.0 15.05000 8 357.6167 194.16667 3.120833 4.104083 17.14250 0 0 3.000000 3.083333 # 8.0.1 15.40000 8 326.0000 299.50000 3.880000 3.370000 14.55000 0 1 5.000000 6.000000 # Getting column indices ind <- fselect(mtcars, cyl, vs, am, return = "indices") fmean(get_vars(mtcars, -ind), get_vars(mtcars, ind)) # mpg disp hp drat wt qsec gear carb # 4.0.1 26.00000 120.3000 91.00000 4.430000 2.140000 16.70000 5.000000 2.000000 # 4.1.0 22.90000 135.8667 84.66667 3.770000 2.935000 20.97000 3.666667 1.666667 # 4.1.1 28.37143 89.8000 80.57143 4.148571 2.028286 18.70000 4.142857 1.428571 # 6.0.1 20.56667 155.0000 131.66667 3.806667 2.755000 16.32667 4.333333 4.666667 # 6.1.0 19.12500 204.5500 115.25000 3.420000 3.388750 19.21500 3.500000 2.500000 # 8.0.0 15.05000 357.6167 194.16667 3.120833 4.104083 17.14250 3.000000 3.083333 # 8.0.1 15.40000 326.0000 299.50000 3.880000 3.370000 14.55000 5.000000 6.000000 ``` ### 4.2 Factors, Grouping Objects and Grouped Data Frames This programming can becomes more efficient when passing *factors* or *grouping objects* to the `g` argument, as otherwise vectors and lists of vectors are grouped internally. ```r # This creates a factor, na.exclude = FALSE attaches a class 'na.included' f <- qF(mtcars$cyl, na.exclude = FALSE) # The 'na.included' attribute skips a missing value check on this factor attributes(f) # $levels # [1] "4" "6" "8" # # $class # [1] "factor" "na.included" # Saving data without grouping columns dat <- get_vars(mtcars, -ind) # Grouped standard-deviation fsd(dat, f) # mpg disp hp drat wt qsec gear carb # 4 4.509828 26.87159 20.93453 0.3654711 0.5695637 1.682445 0.5393599 0.522233 # 6 1.453567 41.56246 24.26049 0.4760552 0.3563455 1.706866 0.6900656 1.812654 # 8 2.560048 67.77132 50.97689 0.3723618 0.7594047 1.196014 0.7262730 1.556624 # Without option na.exclude = FALSE, anyNA needs to be called on the factor (noticeable on larger data). f2 <- qF(mtcars$cyl) microbenchmark(fsd(dat, f), fsd(dat, f2)) # Unit: microseconds # expr min lq mean median uq max neval # fsd(dat, f) 6.027 6.232 6.51613 6.4165 6.601 11.152 100 # fsd(dat, f2) 6.150 6.396 6.77771 6.5190 6.683 25.830 100 ``` For programming purposes *GRP* objects are preferable over factors because they never require further checks and they provide additional information about the grouping (such as group sizes and the original unique values in each group). The `GRP` function creates grouping objects (of class *GRP*) from vectors or lists of columns. Grouping is done very efficiently via radix ordering in C (using the `radixorder` function): ```r # This creates a 'GRP' object. g <- GRP(mtcars, ~ cyl + vs + am) # Using the formula interface, could also use c("cyl","vs","am") or c(2,8:9) str(g) # Class 'GRP' hidden list of 9 # $ N.groups : int 7 # $ group.id : int [1:32] 4 4 3 5 6 5 6 2 2 5 ... # $ group.sizes : int [1:7] 1 3 7 3 4 12 2 # $ groups :'data.frame': 7 obs. of 3 variables: # ..$ cyl: num [1:7] 4 4 4 6 6 8 8 # ..$ vs : num [1:7] 0 1 1 0 1 0 0 # ..$ am : num [1:7] 1 0 1 1 0 0 1 # $ group.vars : chr [1:3] "cyl" "vs" "am" # $ ordered : Named logi [1:2] TRUE FALSE # ..- attr(*, "names")= chr [1:2] "ordered" "sorted" # $ order : int [1:32] 27 8 9 21 3 18 19 20 26 28 ... # ..- attr(*, "starts")= int [1:7] 1 2 5 12 15 19 31 # ..- attr(*, "maxgrpn")= int 12 # ..- attr(*, "sorted")= logi FALSE # $ group.starts: int [1:7] 27 8 3 1 4 5 29 # $ call : language GRP.default(X = mtcars, by = ~cyl + vs + am) ``` The first three elements of this object provide information about the number of groups, the group to which each row belongs, and the size of each group. A print and a plot method provide further information about the grouping: ```r print(g) # collapse grouping object of length 32 with 7 ordered groups # # Call: GRP.default(X = mtcars, by = ~cyl + vs + am), X is unsorted # # Distribution of group sizes: # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1.000 2.500 3.000 4.571 5.500 12.000 # # Groups with sizes: # 4.0.1 4.1.0 4.1.1 6.0.1 6.1.0 8.0.0 8.0.1 # 1 3 7 3 4 12 2 plot(g) ```
plot of chunk GRPplot

plot of chunk GRPplot

The important elements of the *GRP* object are directly handed down to the compiled C++ code of the statistical functions, making repeated computations over the same groups very efficient. ```r fsd(dat, g) # mpg disp hp drat wt qsec gear carb # 4.0.1 NA NA NA NA NA NA NA NA # 4.1.0 1.4525839 13.969371 19.65536 0.1300000 0.4075230 1.67143651 0.5773503 0.5773503 # 4.1.1 4.7577005 18.802128 24.14441 0.3783926 0.4400840 0.94546285 0.3779645 0.5345225 # 6.0.1 0.7505553 8.660254 37.52777 0.1616581 0.1281601 0.76872188 0.5773503 1.1547005 # 6.1.0 1.6317169 44.742634 9.17878 0.5919459 0.1162164 0.81590441 0.5773503 1.7320508 # 8.0.0 2.7743959 71.823494 33.35984 0.2302749 0.7683069 0.80164745 0.0000000 0.9003366 # 8.0.1 0.5656854 35.355339 50.20458 0.4808326 0.2828427 0.07071068 0.0000000 2.8284271 # Grouped computation with and without prior grouping microbenchmark(fsd(dat, g), fsd(dat, get_vars(mtcars, ind))) # Unit: microseconds # expr min lq mean median uq max neval # fsd(dat, g) 19.065 21.1765 23.68447 22.9600 24.9690 38.909 100 # fsd(dat, get_vars(mtcars, ind)) 31.611 35.2600 44.56823 37.3715 41.1845 327.877 100 ``` Yet another possibility is creating a grouped data frame (class *grouped_df*). This can either be done using `dplyr::group_by`, which creates a grouped tibble and requires a conversion of the grouping object using `GRP.grouped_df`, or using the more efficient `fgroup_by` provided in *collapse*: ```r gmtcars <- fgroup_by(mtcars, cyl, vs, am) # fgroup_by() can also be abbreviated as gby() fmedian(gmtcars) # cyl vs am mpg disp hp drat wt qsec gear carb # 1 4 0 1 26.00 120.3 91.0 4.430 2.140 16.70 5.0 2.0 # 2 4 1 0 22.80 140.8 95.0 3.700 3.150 20.01 4.0 2.0 # 3 4 1 1 30.40 79.0 66.0 4.080 1.935 18.61 4.0 1.0 # 4 6 0 1 21.00 160.0 110.0 3.900 2.770 16.46 4.0 4.0 # 5 6 1 0 18.65 196.3 116.5 3.500 3.440 19.17 3.5 2.5 # 6 8 0 0 15.20 355.0 180.0 3.075 3.810 17.35 3.0 3.0 # 7 8 0 1 15.40 326.0 299.5 3.880 3.370 14.55 5.0 6.0 head(fgroup_vars(gmtcars)) # cyl vs am # Mazda RX4 6 0 1 # Mazda RX4 Wag 6 0 1 # Datsun 710 4 1 1 # Hornet 4 Drive 6 1 0 # Hornet Sportabout 8 0 0 # Valiant 6 1 0 fmedian(gmtcars, keep.group_vars = FALSE) # mpg disp hp drat wt qsec gear carb # 1 26.00 120.3 91.0 4.430 2.140 16.70 5.0 2.0 # 2 22.80 140.8 95.0 3.700 3.150 20.01 4.0 2.0 # 3 30.40 79.0 66.0 4.080 1.935 18.61 4.0 1.0 # 4 21.00 160.0 110.0 3.900 2.770 16.46 4.0 4.0 # 5 18.65 196.3 116.5 3.500 3.440 19.17 3.5 2.5 # 6 15.20 355.0 180.0 3.075 3.810 17.35 3.0 3.0 # 7 15.40 326.0 299.5 3.880 3.370 14.55 5.0 6.0 ``` Now suppose we wanted to create a new dataset which contains the *mean*, *sd*, *min* and *max* of the variables *mpg* and *disp* grouped by *cyl*, *vs* and *am*: ```r # Standard evaluation dat <- get_vars(mtcars, c("mpg", "disp")) add_vars(g[["groups"]], add_stub(fmean(dat, g, use.g.names = FALSE), "mean_"), add_stub(fsd(dat, g, use.g.names = FALSE), "sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_")) # cyl vs am mean_mpg mean_disp sd_mpg sd_disp min_mpg min_disp max_mpg max_disp # 1 4 0 1 26.00000 120.3000 NA NA 26.0 120.3 26.0 120.3 # 2 4 1 0 22.90000 135.8667 1.4525839 13.969371 21.5 120.1 24.4 146.7 # 3 4 1 1 28.37143 89.8000 4.7577005 18.802128 21.4 71.1 33.9 121.0 # 4 6 0 1 20.56667 155.0000 0.7505553 8.660254 19.7 145.0 21.0 160.0 # 5 6 1 0 19.12500 204.5500 1.6317169 44.742634 17.8 167.6 21.4 258.0 # 6 8 0 0 15.05000 357.6167 2.7743959 71.823494 10.4 275.8 19.2 472.0 # 7 8 0 1 15.40000 326.0000 0.5656854 35.355339 15.0 301.0 15.8 351.0 # Non-Standard evaluation fgroup_by(mtcars, cyl, vs, am) %>% fselect(mpg, disp) %>% { add_vars(fgroup_vars(., "unique"), fmean(., keep.group_vars = FALSE) %>% add_stub("mean_"), fsd(., keep.group_vars = FALSE) %>% add_stub("sd_"), fmin(., keep.group_vars = FALSE) %>% add_stub("min_"), fmax(., keep.group_vars = FALSE) %>% add_stub("max_")) } # cyl vs am mean_mpg mean_disp sd_mpg sd_disp min_mpg min_disp max_mpg max_disp # 1 4 0 1 26.00000 120.3000 NA NA 26.0 120.3 26.0 120.3 # 2 4 1 0 22.90000 135.8667 1.4525839 13.969371 21.5 120.1 24.4 146.7 # 3 4 1 1 28.37143 89.8000 4.7577005 18.802128 21.4 71.1 33.9 121.0 # 4 6 0 1 20.56667 155.0000 0.7505553 8.660254 19.7 145.0 21.0 160.0 # 5 6 1 0 19.12500 204.5500 1.6317169 44.742634 17.8 167.6 21.4 258.0 # 6 8 0 0 15.05000 357.6167 2.7743959 71.823494 10.4 275.8 19.2 472.0 # 7 8 0 1 15.40000 326.0000 0.5656854 35.355339 15.0 301.0 15.8 351.0 ``` ### 4.3 Grouped and Weighted Computations We could also calculate groupwise-frequency weighted means and standard-deviations using a weight vector^[You may wonder why with weights the standard-deviations in the group '4.0.1' are `0` while they were `NA` without weights. This stirs from the fact that group '4.0.1' only has one observation, and in the Bessel-corrected estimate of the variance there is a `n - 1` in the denominator which becomes `0` if `n = 1` and division by `0` becomes `NA` in this case (`fvar` was designed that way to match the behavior or `stats::var`). In the weighted version the denominator is `sum(w) - 1`, and if `sum(w)` is not 1, then the denominator is not `0`. The standard-deviation however is still `0` because the sum of squares in the numerator is `0`. In other words this means that in a weighted aggregation singleton-groups are not treated like singleton groups unless the corresponding weight is `1`.]. ```r # Grouped and weighted mean and sd and grouped min and max add_vars(g[["groups"]], add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"), add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_")) # cyl vs am w_mean_mpg w_mean_disp w_sd_mpg w_sd_disp min_mpg min_disp max_mpg max_disp # 1 4 0 1 26.00000 120.30000 0.0000000 0.00000 26.0 120.3 26.0 120.3 # 2 4 1 0 23.08757 136.62639 1.5306081 14.19412 21.5 120.1 24.4 146.7 # 3 4 1 1 27.34688 92.65353 4.8723476 21.44005 21.4 71.1 33.9 121.0 # 4 6 0 1 20.22046 151.00525 0.9349875 10.78832 19.7 145.0 21.0 160.0 # 5 6 1 0 19.52725 204.86661 1.7612203 50.80083 17.8 167.6 21.4 258.0 # 6 8 0 0 15.12267 359.56902 2.2886672 70.60949 10.4 275.8 19.2 472.0 # 7 8 0 1 15.51023 332.88960 0.4758366 29.73979 15.0 301.0 15.8 351.0 # Binding and reordering columns in a single step: Add columns in specific positions add_vars(g[["groups"]], add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"), add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_"), pos = c(4,8,5,9,6,10,7,11)) # cyl vs am w_mean_mpg w_sd_mpg min_mpg max_mpg w_mean_disp w_sd_disp min_disp max_disp # 1 4 0 1 26.00000 0.0000000 26.0 26.0 120.30000 0.00000 120.3 120.3 # 2 4 1 0 23.08757 1.5306081 21.5 24.4 136.62639 14.19412 120.1 146.7 # 3 4 1 1 27.34688 4.8723476 21.4 33.9 92.65353 21.44005 71.1 121.0 # 4 6 0 1 20.22046 0.9349875 19.7 21.0 151.00525 10.78832 145.0 160.0 # 5 6 1 0 19.52725 1.7612203 17.8 21.4 204.86661 50.80083 167.6 258.0 # 6 8 0 0 15.12267 2.2886672 10.4 19.2 359.56902 70.60949 275.8 472.0 # 7 8 0 1 15.51023 0.4758366 15.0 15.8 332.88960 29.73979 301.0 351.0 ``` The R overhead of this kind of programming in standard-evaluation is very low: ```r microbenchmark(call = add_vars(g[["groups"]], add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"), add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_"))) # Unit: microseconds # expr min lq mean median uq max neval # call 27.388 28.1875 29.56428 28.823 29.356 97.58 100 ``` ### 4.4 Transformations Using the `TRA` Argument As a final layer of added complexity, we could utilize the `TRA` argument to generate groupwise-weighted demeaned, and scaled data, with additional columns giving the group-minimum and maximum values: ```r head(add_vars(get_vars(mtcars, ind), add_stub(fmean(dat, g, weights, "-"), "w_demean_"), # This calculates weighted group means and uses them to demean the data add_stub(fsd(dat, g, weights, "/"), "w_scale_"), # This calculates weighted group sd's and uses them to scale the data add_stub(fmin(dat, g, "replace"), "min_"), # This replaces all observations by their group-minimum add_stub(fmax(dat, g, "replace"), "max_"))) # This replaces all observations by their group-maximum # cyl vs am w_demean_mpg w_demean_disp w_scale_mpg w_scale_disp min_mpg min_disp # Mazda RX4 6 0 1 0.7795446 8.9947455 22.460194 14.830858 19.7 145.0 # Mazda RX4 Wag 6 0 1 0.7795446 8.9947455 22.460194 14.830858 19.7 145.0 # Datsun 710 4 1 1 -4.5468786 15.3464694 4.679469 5.037303 21.4 71.1 # Hornet 4 Drive 6 1 0 1.8727485 53.1333901 12.150666 5.078657 17.8 167.6 # Hornet Sportabout 8 0 0 3.5773335 0.4309751 8.170694 5.098465 10.4 275.8 # Valiant 6 1 0 -1.4272515 20.1333901 10.276966 4.429062 17.8 167.6 # max_mpg max_disp # Mazda RX4 21.0 160 # Mazda RX4 Wag 21.0 160 # Datsun 710 33.9 121 # Hornet 4 Drive 21.4 258 # Hornet Sportabout 19.2 472 # Valiant 21.4 258 ``` It is also possible to `add_vars<-` to `mtcars` itself. The default option would add these columns at the end, but we could also specify positions: ```r # This defines the positions where we want to add these columns pos <- as.integer(c(2,8,3,9,4,10,5,11)) add_vars(mtcars, pos) <- c(add_stub(fmean(dat, g, weights, "-"), "w_demean_"), add_stub(fsd(dat, g, weights, "/"), "w_scale_"), add_stub(fmin(dat, g, "replace"), "min_"), add_stub(fmax(dat, g, "replace"), "max_")) head(mtcars) # mpg w_demean_mpg w_scale_mpg min_mpg max_mpg cyl disp w_demean_disp w_scale_disp # Mazda RX4 21.0 0.7795446 22.460194 19.7 21.0 6 160 8.9947455 14.830858 # Mazda RX4 Wag 21.0 0.7795446 22.460194 19.7 21.0 6 160 8.9947455 14.830858 # Datsun 710 22.8 -4.5468786 4.679469 21.4 33.9 4 108 15.3464694 5.037303 # Hornet 4 Drive 21.4 1.8727485 12.150666 17.8 21.4 6 258 53.1333901 5.078657 # Hornet Sportabout 18.7 3.5773335 8.170694 10.4 19.2 8 360 0.4309751 5.098465 # Valiant 18.1 -1.4272515 10.276966 17.8 21.4 6 225 20.1333901 4.429062 # min_disp max_disp hp drat wt qsec vs am gear carb # Mazda RX4 145.0 160 110 3.90 2.620 16.46 0 1 4 4 # Mazda RX4 Wag 145.0 160 110 3.90 2.875 17.02 0 1 4 4 # Datsun 710 71.1 121 93 3.85 2.320 18.61 1 1 4 1 # Hornet 4 Drive 167.6 258 110 3.08 3.215 19.44 1 0 3 1 # Hornet Sportabout 275.8 472 175 3.15 3.440 17.02 0 0 3 2 # Valiant 167.6 258 105 2.76 3.460 20.22 1 0 3 1 rm(mtcars) ``` Together with `ftransform`, things can become arbitrarily more complex: ```r # 2 different grouped and weighted computations (mutate operations) performed in one call settransform(mtcars, carb_dwmed_cyl = fmedian(carb, cyl, weights, "-"), carb_wsd_vs_am = fsd(carb, list(vs, am), weights, "replace")) # Multivariate settransform(mtcars, c(fmedian(list(carb_dwmed_cyl = carb, mpg_dwmed_cyl = mpg), cyl, weights, "-"), fsd(list(carb_wsd_vs_am = carb, mpg_wsd_vs_am = mpg), list(vs, am), weights, "replace"))) # Nested (Computing the weighted 3rd quartile of mpg, grouped by cyl and carb being greater than it's weighted median, grouped by vs) settransform(mtcars, mpg_gwQ3_cyl = fnth(mpg, 0.75, list(cyl, carb > fmedian(carb, vs, weights, 1L)), weights, 1L)) head(mtcars) # mpg cyl disp hp drat wt qsec vs am gear carb carb_dwmed_cyl carb_wsd_vs_am # Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0 2.1897386 # Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0 2.1897386 # Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 -1 0.5286617 # Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 -3 1.3161442 # Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 -2 0.9674070 # Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 -3 1.3161442 # mpg_dwmed_cyl mpg_wsd_vs_am mpg_gwQ3_cyl # Mazda RX4 1.3 4.567045 21.40000 # Mazda RX4 Wag 1.3 4.567045 21.40000 # Datsun 710 -3.2 4.872348 27.95146 # Hornet 4 Drive 1.7 2.444036 21.40000 # Hornet Sportabout 3.5 2.288667 16.21512 # Valiant -1.6 2.444036 21.40000 rm(mtcars) ``` With the full set of 14 *Fast Statistical Functions*, and additional vector- valued functions and operators (`fscale/STD, fbetween/B, fwithin/W, fhdbetween/HDB, fhdwithin/HDW, flag/L/F, fdiff/D, fgrowth/G`) discussed later, *collapse* provides extraordinary new possibilities for highly complex and efficient statistical programming in R. Computation speeds generally exceed those of packages like *dplyr* or *data.table*, sometimes by orders of magnitude. Column-wise matrix computations are also highly efficient and comparable to packages like `matrixStats` and base R functions like `colSums`. In particular the ability to perform grouped and weighted computations on matrices is new to R and very useful for complex computations (such as aggregating input-output tables etc.). Note that the above examples provide merely suggestions for use of these features and are focused on programming with data frames (as the predicates `get_vars`, `add_vars` etc. are made for data frames). Equivalently efficient code could be written using vectors or matrices. ## 5. Advanced Data Aggregation The grouped statistical programming introduced in the previous section is the fastest and most customizable way of dealing with many data transformation problems. Some tasks such as multivariate aggregations on a single data frame are however so common that this demanded for a more compact solution which efficiently integrates multiple computational steps. For such purposes `collap` was created as a fast multi-purpose aggregation command designed to solve complex aggregation problems efficiently and with a minimum of coding. `collap` performs optimally together with the *Fast Statistical Functions*, but will also work with other functions. To perform the above aggregation with `collap`, one would simply need to type: ```r collap(mtcars, mpg + disp ~ cyl + vs + am, list(fmean, fsd, fmin, fmax), w = weights, keep.col.order = FALSE) # cyl vs am weights fmean.mpg fmean.disp fsd.mpg fsd.disp fmin.mpg fmin.disp fmax.mpg fmax.disp # 1 4 0 1 1.416054 26.00000 120.30000 0.0000000 0.00000 26.0 120.3 26.0 120.3 # 2 4 1 0 3.232217 23.08757 136.62639 1.5306081 14.19412 21.5 120.1 24.4 146.7 # 3 4 1 1 7.893395 27.34688 92.65353 4.8723476 21.44005 21.4 71.1 33.9 121.0 # 4 6 0 1 1.866025 20.22046 151.00525 0.9349875 10.78832 19.7 145.0 21.0 160.0 # 5 6 1 0 3.237565 19.52725 204.86661 1.7612203 50.80083 17.8 167.6 21.4 258.0 # 6 8 0 0 8.054777 15.12267 359.56902 2.2886672 70.60949 10.4 275.8 19.2 472.0 # 7 8 0 1 2.881698 15.51023 332.88960 0.4758366 29.73979 15.0 301.0 15.8 351.0 ``` `collap` here also saves the sum of the weights in a column. The original idea behind `collap` is however better demonstrated with a different dataset. Consider the *World Development Dataset* `wlddev` introduced in section 1: ```r head(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # 3 Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA 112839996 # 4 Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA 237720001 # 5 Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA 295920013 # 6 Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA 341839996 # POP # 1 8996973 # 2 9169410 # 3 9351441 # 4 9543205 # 5 9744781 # 6 9956320 ``` Suppose we would like to aggregate this data by country and decade, but keep all that categorical information. With `collap` this is extremely simple: ```r collap(wlddev, ~ iso3c + decade) %>% head # country iso3c date year decade region income OECD PCGDP # 1 Aruba ABW 1961-01-01 1964.5 1960 Latin America & Caribbean High income FALSE NA # 2 Aruba ABW 1971-01-01 1974.5 1970 Latin America & Caribbean High income FALSE NA # 3 Aruba ABW 1981-01-01 1984.5 1980 Latin America & Caribbean High income FALSE 20267.30 # 4 Aruba ABW 1991-01-01 1994.5 1990 Latin America & Caribbean High income FALSE 26611.44 # 5 Aruba ABW 2001-01-01 2004.5 2000 Latin America & Caribbean High income FALSE 26664.99 # 6 Aruba ABW 2011-01-01 2014.5 2010 Latin America & Caribbean High income FALSE 24926.17 # LIFEEX GINI ODA POP # 1 67.2592 NA NA 56984.3 # 2 70.6372 NA NA 60080.6 # 3 73.0153 NA 49745999 61665.9 # 4 73.6069 NA 29971000 76946.7 # 5 74.2660 NA 23292000 97939.7 # 6 75.6546 NA NA 103994.6 ``` Note that the columns of the data are in the original order and also retain all their attributes. To understand this result let us briefly examine the syntax of `collap`: ```r collap(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort.row = TRUE, parallel = FALSE, mc.cores = 1L, return = c("wide","list","long","long_dupl"), give.names = "auto") # , ... ``` It is clear that `X` is the data and `by` supplies the grouping information, which can be a one- or two-sided formula or alternatively grouping vectors, factors, lists and `GRP` objects (like the *Fast Statistical Functions*). Then `FUN` provides the function(s) applied only to numeric variables in `X` and defaults to `fmean`, while `catFUN` provides the function(s) applied only to categorical variables in `X` and defaults to `fmode`^[I.e. the most frequent value. By default a first-mode is computed.]. `keep.col.order = TRUE` specifies that the data is to be returned with the original column-order. Thus in the above example it was sufficient to supply `X` and `by` and `collap` did the rest for us. Suppose we only want to aggregate 4 series in this dataset. ```r # Same as collap(wlddev, ~ iso3c + decade, cols = 9:12) collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + decade) %>% head # iso3c decade PCGDP LIFEEX GINI ODA # 1 ABW 1960 NA 67.2592 NA NA # 2 ABW 1970 NA 70.6372 NA NA # 3 ABW 1980 20267.30 73.0153 NA 49745999 # 4 ABW 1990 26611.44 73.6069 NA 29971000 # 5 ABW 2000 26664.99 74.2660 NA 23292000 # 6 ABW 2010 24926.17 75.6546 NA NA ``` As before we could use multiple functions by putting them in a named or unnamed list^[If the list is unnamed, `collap` uses `all.vars(substitute(list(FUN1, FUN2, ...)))` to get the function names. Alternatively it is also possible to pass a character vector of function names.]: ```r collap(wlddev, ~ iso3c + decade, list(fmean, fmedian, fsd), cols = 9:12) %>% head # iso3c decade fmean.PCGDP fmedian.PCGDP fsd.PCGDP fmean.LIFEEX fmedian.LIFEEX fsd.LIFEEX # 1 ABW 1960 NA NA NA 67.2592 67.2740 1.03046880 # 2 ABW 1970 NA NA NA 70.6372 70.6760 0.96813702 # 3 ABW 1980 20267.30 20280.81 4037.2695 73.0153 73.1260 0.38203753 # 4 ABW 1990 26611.44 26684.19 592.7919 73.6069 73.6100 0.08549392 # 5 ABW 2000 26664.99 26992.71 1164.6741 74.2660 74.2215 0.37614448 # 6 ABW 2010 24926.17 24599.50 1159.7344 75.6546 75.6540 0.42974339 # fmean.GINI fmedian.GINI fsd.GINI fmean.ODA fmedian.ODA fsd.ODA # 1 NA NA NA NA NA NA # 2 NA NA NA NA NA NA # 3 NA NA NA 49745999 39259998 23573651 # 4 NA NA NA 29971000 35155001 17270808 # 5 NA NA NA 23292000 16219999 42969712 # 6 NA NA NA NA NA NA ``` With multiple functions, we could also request `collap` to return a long-format of the data: ```r collap(wlddev, ~ iso3c + decade, list(fmean, fmedian, fsd), cols = 9:12, return = "long") %>% head # Function iso3c decade PCGDP LIFEEX GINI ODA # 1 fmean ABW 1960 NA 67.2592 NA NA # 2 fmean ABW 1970 NA 70.6372 NA NA # 3 fmean ABW 1980 20267.30 73.0153 NA 49745999 # 4 fmean ABW 1990 26611.44 73.6069 NA 29971000 # 5 fmean ABW 2000 26664.99 74.2660 NA 23292000 # 6 fmean ABW 2010 24926.17 75.6546 NA NA ``` A very important feature of `collap` to highlight at this point is the `custom` argument, which allows the user to circumvent the broad distinction into numeric and categorical data (and the associated `FUN` and `catFUN` arguments) and specify exactly which columns to aggregate using which functions: ```r collap(wlddev, ~ iso3c + decade, custom = list(fmean = 9:10, fmedian = 11:12, ffirst = c("country","region","income"), flast = c("year","date"), fmode = "OECD")) %>% head # country iso3c date year decade region income OECD PCGDP LIFEEX # 1 Aruba ABW 1970-01-01 1969 1960 Latin America & Caribbean High income FALSE NA 67.2592 # 2 Aruba ABW 1980-01-01 1979 1970 Latin America & Caribbean High income FALSE NA 70.6372 # 3 Aruba ABW 1990-01-01 1989 1980 Latin America & Caribbean High income FALSE 20267.30 73.0153 # 4 Aruba ABW 2000-01-01 1999 1990 Latin America & Caribbean High income FALSE 26611.44 73.6069 # 5 Aruba ABW 2010-01-01 2009 2000 Latin America & Caribbean High income FALSE 26664.99 74.2660 # 6 Aruba ABW 2020-01-01 2019 2010 Latin America & Caribbean High income FALSE 24926.17 75.6546 # GINI ODA # 1 NA NA # 2 NA NA # 3 NA 39259998 # 4 NA 35155001 # 5 NA 16219999 # 6 NA NA ``` Since *collapse* 1.5.0, it is also possible to perform weighted aggregations and append functions with `_uw` to yield an unweighted computation: ```r # This aggregates using weighted mean and mode, and unweighted median, first and last value collap(wlddev, ~ region + year, w = ~ POP, custom = list(fmean = 9:10, fmedian_uw = 11:12, ffirst_uw = c("country","region","income"), flast_uw = c("year","date"), fmode = "OECD"), keep.w = FALSE) %>% head # country date year year region region income # 1 American Samoa 1961-01-01 1960 1960 East Asia & Pacific East Asia & Pacific Upper middle income # 2 American Samoa 1962-01-01 1961 1961 East Asia & Pacific East Asia & Pacific Upper middle income # 3 American Samoa 1963-01-01 1962 1962 East Asia & Pacific East Asia & Pacific Upper middle income # 4 American Samoa 1964-01-01 1963 1963 East Asia & Pacific East Asia & Pacific Upper middle income # 5 American Samoa 1965-01-01 1964 1964 East Asia & Pacific East Asia & Pacific Upper middle income # 6 American Samoa 1966-01-01 1965 1965 East Asia & Pacific East Asia & Pacific Upper middle income # OECD PCGDP LIFEEX GINI ODA # 1 FALSE 1313.760 48.20996 NA 37295000 # 2 FALSE 1395.228 48.73451 NA 26630001 # 3 FALSE 1463.441 49.39960 NA 100040001 # 4 FALSE 1540.621 50.37529 NA 40389999 # 5 FALSE 1665.385 51.57330 NA 70059998 # 6 FALSE 1733.757 52.94426 NA 91545002 ``` Next to `collap`, the functions `collapv` provides a programmers alternative allowing grouping and weighting columns to be passed using column names or indices, and the function `collapg` operates on grouped data frames. ## 6. Data Transformations While `ftransform` and the `TRA` argument to the *Fast Statistical Functions* introduced earlier already provide a significant scope for transforming data, this section introduces some further specialized functions covering some advanced and common use cases, sometimes with greater efficiency. ### 6.1 Row and Column Arithmetic When dealing with matrices or matrix-like datasets, we often have to perform operations applying a vector to the rows or columns of the data object in question. The mathematical operations of base R (`+`, `-`, `*`, `/`, `%%`, ...) operate column-wise and are quite inefficient when used with data frames. Even in matrix code it is challenging to efficiently apply a vector `v` to the rows of a matrix `X`. For this reason *collapse* introduces a set of efficient row- and column-wise arithmetic operators for matrix-like objects: `%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`, `%cr%`, `%c+%`, `%c-%`, `%c*%`, `%c/%`. ```r X <- qM(fselect(GGDC10S, AGR:SUM)) v <- fsum(X) v # AGR MIN MAN PU CON WRT TRA FIRE # 11026503529 8134743462 24120129864 1461548426 7845957666 14776120961 6416089614 7216735147 # GOV OTH SUM # 5962229565 7155872037 94115930269 # This divides the rows of X by v all_obj_equal(t(t(X) / v), X / outer(rep(1, nrow(X)), v), X %r/% v) # [1] TRUE # Base R vs. efficient base R vs. collapse microbenchmark(t(t(X) / v), X / outer(rep(1, nrow(X)), v), X %r/% v) # Unit: microseconds # expr min lq mean median uq max neval # t(t(X)/v) 194.873 234.3560 358.13500 284.6425 298.0905 3244.043 100 # X/outer(rep(1, nrow(X)), v) 55.555 83.5580 101.45696 108.5885 113.5495 137.637 100 # X %r/% v 11.685 37.2075 83.87657 63.2630 72.7135 2744.663 100 # Data frame row operations dat <- fselect(GGDC10S, AGR:SUM) microbenchmark(dat %r/% v, # Same thing using mapply and collapse::copyAttrib copyAttrib(mapply(`/`, dat, v, SIMPLIFY = FALSE), dat)) # Unit: microseconds # expr min lq mean median uq # dat %r/% v 15.129 37.187 143.03998 40.139 46.5555 # copyAttrib(mapply(`/`, dat, v, SIMPLIFY = FALSE), dat) 59.204 64.124 71.98944 66.379 76.7315 # max neval # 5089.289 100 # 110.003 100 # Data frame column arithmetic is very slow microbenchmark(dat / dat$SUM, dat / 5, dat / dat, dat %c/% dat$SUM, dat %c/% 5, dat %c/% dat) # Unit: microseconds # expr min lq mean median uq max neval # dat/dat$SUM 1275.264 1385.2260 1636.95411 1434.2825 1551.1940 5150.092 100 # dat/5 276.012 295.4870 1181.83361 306.2905 327.4260 83176.208 100 # dat/dat 295.323 320.1075 417.10858 330.5010 361.7020 3807.711 100 # dat %c/% dat$SUM 20.295 45.4075 120.01479 48.5235 55.1245 3520.096 100 # dat %c/% 5 17.179 44.5260 87.22996 48.7285 64.1035 3489.223 100 # dat %c/% dat 20.459 46.2685 93.95601 51.0040 67.5065 3795.903 100 ``` ### 6.1 Row and Column Data Apply `dapply` is an efficient apply command for matrices and data frames. It can be used to apply functions to rows or (by default) columns of matrices or data frames and by default returns objects of the same type and with the same attributes unless the result of each computation is a scalar. ```r dapply(mtcars, median) # mpg cyl disp hp drat wt qsec vs am gear carb # 19.200 6.000 196.300 123.000 3.695 3.325 17.710 0.000 0.000 4.000 2.000 dapply(mtcars, median, MARGIN = 1) # Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout # 4.000 4.000 4.000 3.215 3.440 # Valiant Duster 360 Merc 240D Merc 230 Merc 280 # 3.460 4.000 4.000 4.000 4.000 # Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood # 4.000 4.070 3.730 3.780 5.250 # Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla # 5.424 5.345 4.000 4.000 4.000 # Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird # 3.700 3.520 3.435 4.000 3.845 # Fiat X1-9 Porsche 914-2 Lotus Europa Ford Pantera L Ferrari Dino # 4.000 4.430 4.000 5.000 6.000 # Maserati Bora Volvo 142E # 8.000 4.000 dapply(mtcars, quantile) # mpg cyl disp hp drat wt qsec vs am gear carb # 0% 10.400 4 71.100 52.0 2.760 1.51300 14.5000 0 0 3 1 # 25% 15.425 4 120.825 96.5 3.080 2.58125 16.8925 0 0 3 2 # 50% 19.200 6 196.300 123.0 3.695 3.32500 17.7100 0 0 4 2 # 75% 22.800 8 326.000 180.0 3.920 3.61000 18.9000 1 1 4 4 # 100% 33.900 8 472.000 335.0 4.930 5.42400 22.9000 1 1 5 8 dapply(mtcars, quantile, MARGIN = 1) %>% head # 0% 25% 50% 75% 100% # Mazda RX4 0 3.2600 4.000 18.730 160 # Mazda RX4 Wag 0 3.3875 4.000 19.010 160 # Datsun 710 1 1.6600 4.000 20.705 108 # Hornet 4 Drive 0 2.0000 3.215 20.420 258 # Hornet Sportabout 0 2.5000 3.440 17.860 360 # Valiant 0 1.8800 3.460 19.160 225 # This is considerably more efficient than log(mtcars): dapply(mtcars, log) %>% head # mpg cyl disp hp drat wt qsec vs am # Mazda RX4 3.044522 1.791759 5.075174 4.700480 1.360977 0.9631743 2.800933 -Inf 0 # Mazda RX4 Wag 3.044522 1.791759 5.075174 4.700480 1.360977 1.0560527 2.834389 -Inf 0 # Datsun 710 3.126761 1.386294 4.682131 4.532599 1.348073 0.8415672 2.923699 0 0 # Hornet 4 Drive 3.063391 1.791759 5.552960 4.700480 1.124930 1.1678274 2.967333 0 -Inf # Hornet Sportabout 2.928524 2.079442 5.886104 5.164786 1.147402 1.2354715 2.834389 -Inf -Inf # Valiant 2.895912 1.791759 5.416100 4.653960 1.015231 1.2412686 3.006672 0 -Inf # gear carb # Mazda RX4 1.386294 1.3862944 # Mazda RX4 Wag 1.386294 1.3862944 # Datsun 710 1.386294 0.0000000 # Hornet 4 Drive 1.098612 0.0000000 # Hornet Sportabout 1.098612 0.6931472 # Valiant 1.098612 0.0000000 ``` `dapply` preserves the data structure: ```r is.data.frame(dapply(mtcars, log)) # [1] TRUE is.matrix(dapply(m, log)) # [1] TRUE ``` It also delivers seamless conversions, i.e. you can apply functions to data frame rows or columns and return a matrix and vice-versa: ```r identical(log(m), dapply(mtcars, log, return = "matrix")) # [1] TRUE identical(dapply(mtcars, log), dapply(m, log, return = "data.frame")) # [1] TRUE ``` On data frames, the performance is comparable to `lapply`, and `dapply` is about 2x faster than `apply` for row- or column-wise operations on matrices. The most important feature is that it does not change the structure of the data at all: all attributes are preserved unless the result is a scalar and `drop = TRUE` (the default). ### 6.2 Split-Apply-Combine Computing `BY` is a generalization of `dapply` for grouped computations using functions that are not part of the *Fast Statistical Functions* introduced above. It fundamentally is a re-implementation of the `lapply(split(x, g), FUN, ...)` computing paradigm in base R, but substantially faster and more versatile than functions like `tapply`, `by` or `aggregate`. It is however not faster than *dplyr* or *data.table* for larger grouped computations on data frames requiring split-apply-combine computing. `BY` is S3 generic with methods for vector, matrix, data.frame and grouped_df^[`BY.grouped_df` is probably only useful together with the `expand.wide = TRUE` argument which *dplyr* does not have, because otherwise *dplyr*'s `summarise` and `mutate` are substantially faster on larger data.]. It also supports the same grouping (`g`) inputs as the *Fast Statistical Functions* (grouping vectors, factors, lists or *GRP* objects). Below the use of `BY` is demonstrated on vectors matrices and data frames. ```r v <- iris$Sepal.Length # A numeric vector f <- iris$Species # A factor ## default vector method BY(v, f, sum) # Sum by species, about 2x faster than tapply(v, f, sum) # setosa versicolor virginica # 250.3 296.8 329.4 BY(v, f, quantile) # Species quantiles: by default stacked # setosa.0% setosa.25% setosa.50% setosa.75% setosa.100% versicolor.0% # 4.300 4.800 5.000 5.200 5.800 4.900 # versicolor.25% versicolor.50% versicolor.75% versicolor.100% virginica.0% virginica.25% # 5.600 5.900 6.300 7.000 4.900 6.225 # virginica.50% virginica.75% virginica.100% # 6.500 6.900 7.900 BY(v, f, quantile, expand.wide = TRUE) # Wide format # 0% 25% 50% 75% 100% # setosa 4.3 4.800 5.0 5.2 5.8 # versicolor 4.9 5.600 5.9 6.3 7.0 # virginica 4.9 6.225 6.5 6.9 7.9 ## matrix method miris <- qM(num_vars(iris)) BY(miris, f, sum) # Also returns as matrix # Sepal.Length Sepal.Width Petal.Length Petal.Width # setosa 250.3 171.4 73.1 12.3 # versicolor 296.8 138.5 213.0 66.3 # virginica 329.4 148.7 277.6 101.3 BY(miris, f, quantile) %>% head # Sepal.Length Sepal.Width Petal.Length Petal.Width # setosa.0% 4.3 2.300 1.000 0.1 # setosa.25% 4.8 3.200 1.400 0.2 # setosa.50% 5.0 3.400 1.500 0.2 # setosa.75% 5.2 3.675 1.575 0.3 # setosa.100% 5.8 4.400 1.900 0.6 # versicolor.0% 4.9 2.000 3.000 1.0 BY(miris, f, quantile, expand.wide = TRUE)[, 1:5] # Sepal.Length.0% Sepal.Length.25% Sepal.Length.50% Sepal.Length.75% Sepal.Length.100% # setosa 4.3 4.800 5.0 5.2 5.8 # versicolor 4.9 5.600 5.9 6.3 7.0 # virginica 4.9 6.225 6.5 6.9 7.9 BY(miris, f, quantile, expand.wide = TRUE, return = "list")[1:2] # list of matrices # $Sepal.Length # 0% 25% 50% 75% 100% # setosa 4.3 4.800 5.0 5.2 5.8 # versicolor 4.9 5.600 5.9 6.3 7.0 # virginica 4.9 6.225 6.5 6.9 7.9 # # $Sepal.Width # 0% 25% 50% 75% 100% # setosa 2.3 3.200 3.4 3.675 4.4 # versicolor 2.0 2.525 2.8 3.000 3.4 # virginica 2.2 2.800 3.0 3.175 3.8 ## data.frame method BY(num_vars(iris), f, sum) # Also returns a data.frame etc... # Sepal.Length Sepal.Width Petal.Length Petal.Width # setosa 250.3 171.4 73.1 12.3 # versicolor 296.8 138.5 213.0 66.3 # virginica 329.4 148.7 277.6 101.3 ## Conversions identical(BY(num_vars(iris), f, sum), BY(miris, f, sum, return = "data.frame")) # [1] TRUE identical(BY(miris, f, sum), BY(num_vars(iris), f, sum, return = "matrix")) # [1] TRUE ``` ### 6.3 Fast (Grouped) Replacing and Sweeping-out Statistics `TRA` is an S3 generic that efficiently transforms data by either replacing data values with supplied statistics or sweeping the statistics out of the data. It is the workhorse function behind the row-wise arithmetic operators introduced above (`%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`), and generalizes those to grouped operations. The 10 operations supported by `TRA` are: * 1 - "replace_fill" : replace and overwrite missing values (same as dplyr::mutate) * 2 - "replace" : replace but preserve missing values * 3 - "-" : subtract (center) * 4 - "-+" : subtract group-statistics but add average of group statistics * 5 - "/" : divide (scale) * 6 - "%" : compute percentages (divide and multiply by 100) * 7 - "+" : add * 8 - "*" : multiply * 9 - "%%" : modulus * 10 - "-%%" : subtract modulus `TRA` is also incorporated as an argument to all *Fast Statistical Functions*. Therefore it is only really necessary and advisable to use the `TRA` function if both aggregate statistics and transformed data are required, or to sweep out statistics otherwise obtained (e.g. regression or correlation coefficients etc.). The code below computes the column means of the iris-matrix obtained above, and uses them to demean that matrix. ```r # Note: All examples below generalize to vectors or data frames stats <- fmean(miris) # Saving stats # 6 identical ways of centering a matrix microbenchmark(sweep(miris, 2, stats, "-"), # base R miris - outer(rep(1, nrow(iris)), stats), TRA(miris, fmean(miris), "-"), miris %r-% fmean(miris), # The operator is actually a wrapper around TRA fmean(miris, TRA = "-"), # better for any operation if the stats are not needed fwithin(miris)) # fastest, fwithin is discussed in section 6.5 # Unit: microseconds # expr min lq mean median uq max neval # sweep(miris, 2, stats, "-") 15.457 16.2975 17.57711 17.0355 17.6915 53.505 100 # miris - outer(rep(1, nrow(iris)), stats) 4.715 5.6375 6.36812 6.0270 6.6010 21.402 100 # TRA(miris, fmean(miris), "-") 3.075 3.3210 3.98930 3.6080 4.4895 14.678 100 # miris %r-% fmean(miris) 3.362 3.8130 4.68425 4.0590 4.5305 42.066 100 # fmean(miris, TRA = "-") 2.583 2.8085 3.79496 2.9930 4.2640 29.848 100 # fwithin(miris) 3.321 3.6080 5.26768 3.8130 4.9815 78.474 100 # Simple replacing [same as fmean(miris, TRA = "replace") or fbetween(miris)] TRA(miris, fmean(miris), "replace") %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 5.843333 3.057333 3.758 1.199333 # [2,] 5.843333 3.057333 3.758 1.199333 # [3,] 5.843333 3.057333 3.758 1.199333 # Simple scaling [same as fsd(miris, TRA = "/")] TRA(miris, fsd(miris), "/") %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 6.158928 8.029986 0.7930671 0.2623854 # [2,] 5.917402 6.882845 0.7930671 0.2623854 # [3,] 5.675875 7.341701 0.7364195 0.2623854 ``` All of the above is functionality also offered by `base::sweep`, but `TRA` is significantly faster. The big advantage of `TRA` is that it also supports grouped operations: ```r # Grouped centering [same as fmean(miris, f, TRA = "-") or fwithin(m, f)] TRA(miris, fmean(miris, f), "-", f) %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 0.094 0.072 -0.062 -0.046 # [2,] -0.106 -0.428 -0.062 -0.046 # [3,] -0.306 -0.228 -0.162 -0.046 # Grouped replacing [same as fmean(m, f, TRA = "replace") or fbetween(m, f)] TRA(miris, fmean(miris, f), "replace", f) %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 5.006 3.428 1.462 0.246 # [2,] 5.006 3.428 1.462 0.246 # [3,] 5.006 3.428 1.462 0.246 # Groupwise percentages [same as fsum(m, f, TRA = "%")] TRA(miris, fsum(miris, f), "%", f) %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 2.037555 2.042007 1.915185 1.626016 # [2,] 1.957651 1.750292 1.915185 1.626016 # [3,] 1.877747 1.866978 1.778386 1.626016 ``` As mentioned, calling the `TRA()` function does not make much sense if the same task can be performed using the *Fast Statistical Functions* or the arithmetic operators. It is however a very useful function to call for complex transformations involving grouped sweeping operations with precomputed quantities. ### 6.4 Fast Standardizing The function `fscale` can be used to efficiently standardize (i.e. scale and center) data using a numerically stable online algorithm. It's structure is the same as the *Fast Statistical Functions*. The standardization-operator `STD` also exists as a wrapper around `fscale`. The difference is that by default `STD` adds a prefix to standardized variables and also provides an enhanced method for data frames (more about operators in the next section). ```r # fscale doesn't rename columns fscale(mtcars) %>% head(2) # mpg cyl disp hp drat wt qsec vs # Mazda RX4 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.6103996 -0.7771651 -0.8680278 # Mazda RX4 Wag 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.3497853 -0.4637808 -0.8680278 # am gear carb # Mazda RX4 1.189901 0.4235542 0.7352031 # Mazda RX4 Wag 1.189901 0.4235542 0.7352031 # By default adds a prefix STD(mtcars) %>% head(2) # STD.mpg STD.cyl STD.disp STD.hp STD.drat STD.wt STD.qsec STD.vs # Mazda RX4 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.6103996 -0.7771651 -0.8680278 # Mazda RX4 Wag 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.3497853 -0.4637808 -0.8680278 # STD.am STD.gear STD.carb # Mazda RX4 1.189901 0.4235542 0.7352031 # Mazda RX4 Wag 1.189901 0.4235542 0.7352031 # See that is works STD(mtcars) %>% qsu # N Mean SD Min Max # STD.mpg 32 0 1 -1.6079 2.2913 # STD.cyl 32 0 1 -1.2249 1.0149 # STD.disp 32 -0 1 -1.2879 1.9468 # STD.hp 32 0 1 -1.381 2.7466 # STD.drat 32 -0 1 -1.5646 2.4939 # STD.wt 32 -0 1 -1.7418 2.2553 # STD.qsec 32 -0 1 -1.874 2.8268 # STD.vs 32 0 1 -0.868 1.116 # STD.am 32 -0 1 -0.8141 1.1899 # STD.gear 32 -0 1 -0.9318 1.7789 # STD.carb 32 -0 1 -1.1222 3.2117 # We can also scale and center to a different mean and standard deviation: qsu(fscale(mtcars, mean = 5, sd = 3))[, .c(Mean, SD)] %>% t # mpg cyl disp hp drat wt qsec vs am gear carb # Mean 5 5 5 5 5 5 5 5 5 5 5 # SD 3 3 3 3 3 3 3 3 3 3 3 # Or not center at all. In that case scaling is mean-preserving, in contrast to fsd(mtcars, TRA = "/") qsu(fscale(mtcars, mean = FALSE, sd = 3))[, .c(Mean, SD)] %>% t # mpg cyl disp hp drat wt qsec vs am gear carb # Mean 20.0906 6.1875 230.7219 146.6875 3.5966 3.2172 17.8487 0.4375 0.4062 3.6875 2.8125 # SD 3 3 3 3 3 3 3 3 3 3 3 ``` Scaling with `fscale / STD` can also be done groupwise and / or weighted. For example the Groningen Growth and Development Center 10-Sector Database provides annual series of value added in local currency and persons employed for 10 broad sectors in several African, Asian, and Latin American countries. ```r head(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA # 3 BWA SSA Sub-saharan Africa VA 1962 NA NA NA NA # 4 BWA SSA Sub-saharan Africa VA 1963 NA NA NA NA # 5 BWA SSA Sub-saharan Africa VA 1964 16.30154 3.494075 0.7365696 0.1043936 # 6 BWA SSA Sub-saharan Africa VA 1965 15.72700 2.495768 1.0181992 0.1350976 # CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA # 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229 # 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710 ``` If we wanted to correlate this data across countries and sectors, it needs to be standardized: ```r # Standardizing Sectors by Variable and Country STD_GGDC10S <- STD(GGDC10S, ~ Variable + Country, cols = 6:16) head(STD_GGDC10S) # Variable Country STD.AGR STD.MIN STD.MAN STD.PU STD.CON STD.WRT STD.TRA # 1 VA BWA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA # 5 VA BWA -0.7382911 -0.7165772 -0.6682536 -0.8051315 -0.6922839 -0.6032762 -0.5889923 # 6 VA BWA -0.7392424 -0.7167359 -0.6680535 -0.8050172 -0.6917529 -0.6030211 -0.5887320 # STD.FIRE STD.GOV STD.OTH STD.SUM # 1 NA NA NA NA # 2 NA NA NA NA # 3 NA NA NA NA # 4 NA NA NA NA # 5 -0.6349956 -0.6561054 -0.5959744 -0.6758663 # 6 -0.6349359 -0.6558634 -0.5957137 -0.6757768 # Correlating Standardized Value-Added across countries fsubset(STD_GGDC10S, Variable == "VA", STD.AGR:STD.SUM) %>% pwcor # STD.AGR STD.MIN STD.MAN STD.PU STD.CON STD.WRT STD.TRA STD.FIRE STD.GOV STD.OTH STD.SUM # STD.AGR 1 .88 .93 .88 .89 .90 .90 .86 .93 .88 .90 # STD.MIN .88 1 .86 .84 .85 .85 .84 .83 .88 .84 .86 # STD.MAN .93 .86 1 .95 .96 .97 .98 .95 .98 .97 .98 # STD.PU .88 .84 .95 1 .95 .96 .96 .95 .96 .96 .97 # STD.CON .89 .85 .96 .95 1 .98 .98 .97 .98 .97 .98 # STD.WRT .90 .85 .97 .96 .98 1 .99 .98 .99 .99 1.00 # STD.TRA .90 .84 .98 .96 .98 .99 1 .98 .99 .99 .99 # STD.FIRE .86 .83 .95 .95 .97 .98 .98 1 .98 .98 .98 # STD.GOV .93 .88 .98 .96 .98 .99 .99 .98 1 .99 1.00 # STD.OTH .88 .84 .97 .96 .97 .99 .99 .98 .99 1 .99 # STD.SUM .90 .86 .98 .97 .98 1.00 .99 .98 1.00 .99 1 ``` ### 6.5 Fast Centering and Averaging As a slightly faster alternative to `fmean(x, g, w, TRA = "-"/"-+")` or `fmean(x, g, w, TRA = "replace"/"replace_fill")`, `fwithin` and `fbetween` can be used to perform common (grouped, weighted) centering and averaging tasks (also known as *between*- and *within*- transformations in the language of panel data econometrics). `fbetween` / `fwithin` are faster than `fmean(..., TRA = ...)` because they don't materialize the full set of computed averages. The operators `W` and `B` also exist. ```r ## Simple centering and averaging fbetween(mtcars$mpg) %>% head # [1] 20.09062 20.09062 20.09062 20.09062 20.09062 20.09062 fwithin(mtcars$mpg) %>% head # [1] 0.909375 0.909375 2.709375 1.309375 -1.390625 -1.990625 all.equal(fbetween(mtcars) + fwithin(mtcars), mtcars) # [1] TRUE ## Groupwise centering and averaging fbetween(mtcars$mpg, mtcars$cyl) %>% head # [1] 19.74286 19.74286 26.66364 19.74286 15.10000 19.74286 fwithin(mtcars$mpg, mtcars$cyl) %>% head # [1] 1.257143 1.257143 -3.863636 1.657143 3.600000 -1.642857 all.equal(fbetween(mtcars, mtcars$cyl) + fwithin(mtcars, mtcars$cyl), mtcars) # [1] TRUE ``` To demonstrate more clearly the utility of the operators which exists for all fast transformation and time series functions, the code below implements the task of demeaning 4 series by country and saving the country-id using the within-operator `W` as opposed to `fwithin` which requires all input to be passed externally like the *Fast Statistical Functions*. ```r # Center 4 series in this dataset by country W(wlddev, ~ iso3c, cols = 9:12) %>% head # iso3c W.PCGDP W.LIFEEX W.GINI W.ODA # 1 AFG NA -16.75117 NA -1370778502 # 2 AFG NA -16.23517 NA -1255468497 # 3 AFG NA -15.72617 NA -1374708502 # 4 AFG NA -15.22617 NA -1249828497 # 5 AFG NA -14.73417 NA -1191628485 # 6 AFG NA -14.24917 NA -1145708502 # Same thing done manually using fwithin... add_vars(get_vars(wlddev, "iso3c"), get_vars(wlddev, 9:12) %>% fwithin(wlddev$iso3c) %>% add_stub("W.")) %>% head # iso3c W.PCGDP W.LIFEEX W.GINI W.ODA # 1 AFG NA -16.75117 NA -1370778502 # 2 AFG NA -16.23517 NA -1255468497 # 3 AFG NA -15.72617 NA -1374708502 # 4 AFG NA -15.22617 NA -1249828497 # 5 AFG NA -14.73417 NA -1191628485 # 6 AFG NA -14.24917 NA -1145708502 ``` It is also possible to drop the id's in `W` using the argument `keep.by = FALSE`. `fbetween / B` and `fwithin / W` each have one additional computational option: ```r # This replaces missing values with the group-mean: Same as fmean(x, g, TRA = "replace_fill") B(wlddev, ~ iso3c, cols = 9:12, fill = TRUE) %>% head # iso3c B.PCGDP B.LIFEEX B.GINI B.ODA # 1 AFG 483.8351 49.19717 NA 1487548499 # 2 AFG 483.8351 49.19717 NA 1487548499 # 3 AFG 483.8351 49.19717 NA 1487548499 # 4 AFG 483.8351 49.19717 NA 1487548499 # 5 AFG 483.8351 49.19717 NA 1487548499 # 6 AFG 483.8351 49.19717 NA 1487548499 # This adds back the overall mean after subtracting out group means: Same as fmean(x, g, TRA = "-+") W(wlddev, ~ iso3c, cols = 9:12, mean = "overall.mean") %>% head # iso3c W.PCGDP W.LIFEEX W.GINI W.ODA # 1 AFG NA 47.54514 NA -916058371 # 2 AFG NA 48.06114 NA -800748366 # 3 AFG NA 48.57014 NA -919988371 # 4 AFG NA 49.07014 NA -795108366 # 5 AFG NA 49.56214 NA -736908354 # 6 AFG NA 50.04714 NA -690988371 # Visual demonstration of centering on the overall mean vs. simple centering oldpar <- par(mfrow = c(1, 3)) plot(iris[1:2], col = iris$Species, main = "Raw Data") # Raw data plot(W(iris, ~ Species)[2:3], col = iris$Species, main = "Simple Centering") # Simple centering plot(W(iris, ~ Species, mean = "overall.mean")[2:3], col = iris$Species, # Centering on overall mean: Preserves level of data main = "Added Overall Mean") ```
plot of chunk BWplot

plot of chunk BWplot

```r par(oldpar) ``` Another great utility of operators is that they can be employed in regression formulas in a manor that is both very efficient and pleasing to the eyes. The code below demonstrates the use of `W` and `B` to efficiently run fixed-effects regressions with `lm`. ```r # When using operators in formulas, we need to remove missing values beforehand to obtain the same results as a Fixed-Effects package data <- wlddev %>% fselect(iso3c, year, PCGDP, LIFEEX) %>% na_omit # classical lm() -> iso3c is a factor, creates a matrix of 200+ country dummies. coef(lm(PCGDP ~ LIFEEX + iso3c, data))[1:2] # (Intercept) LIFEEX # -2837.039 380.448 # Centering each variable individually coef(lm(W(PCGDP, iso3c) ~ W(LIFEEX, iso3c), data)) # (Intercept) W(LIFEEX, iso3c) # 5.596034e-13 3.804480e+02 # Centering the data coef(lm(W.PCGDP ~ W.LIFEEX, W(data, PCGDP + LIFEEX ~ iso3c))) # (Intercept) W.LIFEEX # 5.596034e-13 3.804480e+02 # Adding the overall mean back to the data only changes the intercept coef(lm(W.PCGDP ~ W.LIFEEX, W(data, PCGDP + LIFEEX ~ iso3c, mean = "overall.mean"))) # (Intercept) W.LIFEEX # -14020.142 380.448 # Procedure suggested by Mundlak (1978) - controlling for group averages instead of demeaning coef(lm(PCGDP ~ LIFEEX + B(LIFEEX, iso3c), data)) # (Intercept) LIFEEX B(LIFEEX, iso3c) # -52254.7421 380.4480 585.8386 ``` In general it is recommended calling the long names (i.e. `fwithin` or `fscale` etc.) for programming since they are a bit more efficient on the R-side of things and require all input in terms of data. For all other purposes the operators are more convenient. It is important to note that the operators can do everything the functions can do (i.e. you can also pass grouping vectors or *GRP* objects to them). They are just simple wrappers that in the data frame method add 4 additional features: * The possibility of formula input to `by` i.e. `W(mtcars, ~ cyl)` or `W(mtcars, mpg ~ cyl)` * They preserve grouping columns (`cyl` in the above example) when passed in a formula (default `keep.by = TRUE`) * The ability to subset many columns using the `cols` argument (i.e. `W(mtcars, ~ cyl, cols = 4:7)` is the same as `W(mtcars, hp + drat + wt + qsec ~ cyl)`) * They rename transformed columns by adding a prefix (default `stub = "W."`) ### 6.6 HD Centering and Linear Prediction Sometimes simple centering is not enough, for example if a linear model with multiple levels of fixed-effects needs to be estimated, potentially involving interactions with continuous covariates. For these purposes `fhdwithin / HDW` and `fhdbetween / HDB` were created as efficient multi-purpose functions for linear prediction and partialling out. They operate by splitting complex regression problems in 2 parts: Factors and factor-interactions are projected out using `fixest::demean`, an efficient `C++` routine for centering vectors on multiple factors, whereas continuous variables are dealt with using a standard `chol` or `qr` decomposition in base R. The examples below show the use of the `HDW` operator in manually solving a regression problem with country and time fixed effects. ```r data$year <- qF(data$year, na.exclude = FALSE) # the country code (iso3c) is already a factor # classical lm() -> creates a matrix of 196 country dummies and 56 year dummies coef(lm(PCGDP ~ LIFEEX + iso3c + year, data))[1:2] # (Intercept) LIFEEX # 37388.0493 -333.0115 # Centering each variable individually coef(lm(HDW(PCGDP, list(iso3c, year)) ~ HDW(LIFEEX, list(iso3c, year)), data)) # (Intercept) HDW(LIFEEX, list(iso3c, year)) # -2.450245e-13 -3.330115e+02 # Centering the entire data coef(lm(HDW.PCGDP ~ HDW.LIFEEX, HDW(data, PCGDP + LIFEEX ~ iso3c + year))) # (Intercept) HDW.LIFEEX # -2.450245e-13 -3.330115e+02 # Procedure suggested by Mundlak (1978) - controlling for averages instead of demeaning coef(lm(PCGDP ~ LIFEEX + HDB(LIFEEX, list(iso3c, year)), data)) # (Intercept) LIFEEX HDB(LIFEEX, list(iso3c, year)) # -48141.1094 -333.0115 1236.2681 ``` We may wish to test whether including time fixed-effects in the above regression actually impacts the fit. This can be done with the fast F-test: ```r # The syntax is fFtest(y, exc, X, ...). 'exc' are exclusion restrictions. data %$% fFtest(PCGDP, year, list(LIFEEX, iso3c)) # R-Sq. DF1 DF2 F-Stat. P-Value # Full Model 0.894 258 8763 286.130 0.000 # Restricted Model 0.873 199 8822 304.661 0.000 # Exclusion Rest. 0.021 59 8763 29.280 0.000 ``` The test shows that the time fixed-effects (accounted for like year dummies) are jointly significant. One can also use `fhdbetween / HDB` and `fhdwithin / HDW` to project out interactions and continuous covariates. ```r wlddev$year <- as.numeric(wlddev$year) # classical lm() -> full country-year interaction, -> 200+ country dummies, 200+ trends, year and ODA coef(lm(PCGDP ~ LIFEEX + iso3c * year + ODA, wlddev))[1:2] # (Intercept) LIFEEX # -7.257955e+05 8.938626e+00 # Same using HDW coef(lm(HDW.PCGDP ~ HDW.LIFEEX, HDW(wlddev, PCGDP + LIFEEX ~ iso3c * year + ODA))) # (Intercept) HDW.LIFEEX # 3.403288e-12 8.938626e+00 # example of a simple continuous problem HDW(iris[1:2], iris[3:4]) %>% head # HDW.Sepal.Length HDW.Sepal.Width # 1 0.21483967 0.2001352 # 2 0.01483967 -0.2998648 # 3 -0.13098262 -0.1255786 # 4 -0.33933805 -0.1741510 # 5 0.11483967 0.3001352 # 6 0.41621663 0.6044681 # May include factors.. HDW(iris[1:2], iris[3:5]) %>% head # HDW.Sepal.Length HDW.Sepal.Width # 1 0.14989286 0.1102684 # 2 -0.05010714 -0.3897316 # 3 -0.15951256 -0.1742640 # 4 -0.44070173 -0.3051992 # 5 0.04989286 0.2102684 # 6 0.17930818 0.3391766 ``` ## 7. Time Series and Panel Series *collapse* also presents some essential contributions in the time series domain, particularly in the area of (irregular) time series, panel data and efficient and secure computations on (potentially unordered) time-dependent vectors and (unbalanced) panels. ### 7.1 Panel Series to Array Conversions To facilitate the exploration and access of panel data, `psmat` was created as an S3 generic to efficiently obtain matrices or 3D-arrays from panel data. ```r mts <- psmat(wlddev, PCGDP ~ iso3c, ~ year) str(mts) # 'psmat' num [1:216, 1:61] NA NA NA NA NA ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # - attr(*, "transpose")= logi FALSE plot(log10(mts), main = paste("Log10", vlabels(wlddev$PCGDP)), xlab = "Year") ```
plot of chunk psmatplot

plot of chunk psmatplot

Passing a data frame of panel series to `psmat` generates a 3D array: ```r # Get panel series array psar <- psmat(wlddev, ~ iso3c, ~ year, cols = 9:12) str(psar) # 'psmat' num [1:216, 1:61, 1:4] NA NA NA NA NA ... # - attr(*, "dimnames")=List of 3 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA" # - attr(*, "transpose")= logi FALSE plot(psar) ```
plot of chunk psarplot

plot of chunk psarplot

```r # Plot array of Panel Series aggregated by region: collap(wlddev, ~ region + year, cols = 9:12) %>% psmat( ~ region, ~ year) %>% plot(legend = TRUE, labs = vlabels(wlddev)[9:12]) ```
plot of chunk psarplot2

plot of chunk psarplot2

`psmat` can also output a list of panel series matrices, which can be used among other things to reshape the data with `unlist2d` (discussed in more detail in List-Processing section). ```r # This gives list of ps-matrices psml <- psmat(wlddev, ~ iso3c, ~ year, 9:12, array = FALSE) str(psml, give.attr = FALSE) # List of 4 # $ PCGDP : 'psmat' num [1:216, 1:61] NA NA NA NA NA ... # $ LIFEEX: 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ... # $ GINI : 'psmat' num [1:216, 1:61] NA NA NA NA NA NA NA NA NA NA ... # $ ODA : 'psmat' num [1:216, 1:61] NA 116769997 -390000 NA NA ... # Using unlist2d, can generate a data.frame unlist2d(psml, idcols = "Variable", row.names = "Country") %>% gv(1:10) %>% head # Variable Country 1960 1961 1962 1963 1964 1965 1966 1967 # 1 PCGDP ABW NA NA NA NA NA NA NA NA # 2 PCGDP AFG NA NA NA NA NA NA NA NA # 3 PCGDP AGO NA NA NA NA NA NA NA NA # 4 PCGDP ALB NA NA NA NA NA NA NA NA # 5 PCGDP AND NA NA NA NA NA NA NA NA # 6 PCGDP ARE NA NA NA NA NA NA NA NA ``` ### 7.2 Panel Series ACF, PACF and CCF The correlation structure of panel data can also be explored with `psacf`, `pspacf` and `psccf`. These functions are exact analogues to `stats::acf`, `stats::pacf` and `stats::ccf`. They use `fscale` to group-scale panel data by the panel-id provided, and then compute the covariance of a sequence of panel-lags (generated with `flag` discussed below) with the group-scaled level-series, dividing by the variance of the group-scaled level series. The Partial-ACF is generated from the ACF using a Yule-Walker decomposition (as in `stats::pacf`). ```r # Panel-ACF of GDP per Capita psacf(wlddev, PCGDP ~ iso3c, ~year) ```
plot of chunk PSACF

plot of chunk PSACF

```r # Panel-Partial-ACF of GDP per Capia pspacf(wlddev, PCGDP ~ iso3c, ~year) ```
plot of chunk PSACF

plot of chunk PSACF

```r # Panel- Cross-Correlation function of GDP per Capia and Life-Expectancy wlddev %$% psccf(PCGDP, LIFEEX, iso3c, year) ```
plot of chunk PSACF

plot of chunk PSACF

```r # Multivariate Panel-auto and cross-correlation function of 3 variables: psacf(wlddev, PCGDP + LIFEEX + ODA ~ iso3c, ~year) ```
plot of chunk PSACF

plot of chunk PSACF

### 7.3 Fast Lags and Leads `flag` and the corresponding lag- and lead- operators `L` and `F` are S3 generics to efficiently compute lags and leads on time series and panel data. The code below shows how to compute simple lags and leads on the classic Box & Jenkins airline data that comes with R. ```r # 1 lag L(AirPassengers) # Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec # 1949 NA 112 118 132 129 121 135 148 148 136 119 104 # 1950 118 115 126 141 135 125 149 170 170 158 133 114 # 1951 140 145 150 178 163 172 178 199 199 184 162 146 # 1952 166 171 180 193 181 183 218 230 242 209 191 172 # 1953 194 196 196 236 235 229 243 264 272 237 211 180 # 1954 201 204 188 235 227 234 264 302 293 259 229 203 # 1955 229 242 233 267 269 270 315 364 347 312 274 237 # 1956 278 284 277 317 313 318 374 413 405 355 306 271 # 1957 306 315 301 356 348 355 422 465 467 404 347 305 # 1958 336 340 318 362 348 363 435 491 505 404 359 310 # 1959 337 360 342 406 396 420 472 548 559 463 407 362 # 1960 405 417 391 419 461 472 535 622 606 508 461 390 # 3 identical ways of computing 1 lag all_identical(flag(AirPassengers), L(AirPassengers), F(AirPassengers,-1)) # [1] TRUE # 1 lead and 3 lags - output as matrix L(AirPassengers, -1:3) %>% head # F1 -- L1 L2 L3 # [1,] 118 112 NA NA NA # [2,] 132 118 112 NA NA # [3,] 129 132 118 112 NA # [4,] 121 129 132 118 112 # [5,] 135 121 129 132 118 # [6,] 148 135 121 129 132 # ... this is still a time series object: attributes(L(AirPassengers, -1:3)) # $tsp # [1] 1949.000 1960.917 12.000 # # $class # [1] "ts" "matrix" # # $dim # [1] 144 5 # # $dimnames # $dimnames[[1]] # NULL # # $dimnames[[2]] # [1] "F1" "--" "L1" "L2" "L3" ``` `flag / L / F` also work well on (time series) matrices. Below a regression with daily closing prices of major European stock indices is run: Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE. The data are sampled in business time, i.e. weekends and holidays are omitted. ```r str(EuStockMarkets) # Time-Series [1:1860, 1:4] from 1991 to 1999: 1629 1614 1607 1621 1618 ... # - attr(*, "dimnames")=List of 2 # ..$ : NULL # ..$ : chr [1:4] "DAX" "SMI" "CAC" "FTSE" # Data is recorded on 260 days per year, 1991-1999 tsp(EuStockMarkets) # [1] 1991.496 1998.646 260.000 freq <- frequency(EuStockMarkets) # There is some obvious seasonality stl(EuStockMarkets[, "DAX"], freq) %>% plot ```
plot of chunk mts

plot of chunk mts

```r # 1 annual lead and 1 annual lag L(EuStockMarkets, -1:1*freq) %>% head # F260.DAX DAX L260.DAX F260.SMI SMI L260.SMI F260.CAC CAC L260.CAC F260.FTSE FTSE # [1,] 1755.98 1628.75 NA 1846.6 1678.1 NA 1907.3 1772.8 NA 2515.8 2443.6 # [2,] 1754.95 1613.63 NA 1854.8 1688.5 NA 1900.6 1750.5 NA 2521.2 2460.2 # [3,] 1759.90 1606.51 NA 1845.3 1678.6 NA 1880.9 1718.0 NA 2493.9 2448.2 # [4,] 1759.84 1621.04 NA 1854.5 1684.1 NA 1873.5 1708.1 NA 2476.1 2470.4 # [5,] 1776.50 1618.16 NA 1870.5 1686.6 NA 1883.6 1723.1 NA 2497.1 2484.7 # [6,] 1769.98 1610.61 NA 1862.6 1671.6 NA 1868.5 1714.3 NA 2469.0 2466.8 # L260.FTSE # [1,] NA # [2,] NA # [3,] NA # [4,] NA # [5,] NA # [6,] NA # DAX regressed on it's own 2 annual lags and the lags of the other indicators lm(DAX ~., data = L(EuStockMarkets, 0:2*freq)) %>% summary # # Call: # lm(formula = DAX ~ ., data = L(EuStockMarkets, 0:2 * freq)) # # Residuals: # Min 1Q Median 3Q Max # -240.46 -51.28 -12.01 45.19 358.02 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -564.02041 93.94903 -6.003 2.49e-09 *** # L260.DAX -0.12577 0.03002 -4.189 2.99e-05 *** # L520.DAX -0.12528 0.04103 -3.053 0.00231 ** # SMI 0.32601 0.01726 18.890 < 2e-16 *** # L260.SMI 0.27499 0.02517 10.926 < 2e-16 *** # L520.SMI 0.04602 0.02602 1.769 0.07721 . # CAC 0.59637 0.02349 25.389 < 2e-16 *** # L260.CAC -0.14283 0.02763 -5.169 2.72e-07 *** # L520.CAC 0.05196 0.03657 1.421 0.15557 # FTSE 0.01002 0.02403 0.417 0.67675 # L260.FTSE 0.04509 0.02807 1.606 0.10843 # L520.FTSE 0.10601 0.02717 3.902 0.00010 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 83.06 on 1328 degrees of freedom # (520 observations deleted due to missingness) # Multiple R-squared: 0.9943, Adjusted R-squared: 0.9942 # F-statistic: 2.092e+04 on 11 and 1328 DF, p-value: < 2.2e-16 ``` Since v1.5.0, irregular time series are supported: ```r t <- seq_row(EuStockMarkets)[-4L] flag(EuStockMarkets[-4L, ], -1:1, t = t) %>% head # F1.DAX DAX L1.DAX F1.SMI SMI L1.SMI F1.CAC CAC L1.CAC F1.FTSE FTSE L1.FTSE # [1,] 1613.63 1628.75 NA 1688.5 1678.1 NA 1750.5 1772.8 NA 2460.2 2443.6 NA # [2,] 1606.51 1613.63 1628.75 1678.6 1688.5 1678.1 1718.0 1750.5 1772.8 2448.2 2460.2 2443.6 # [3,] NA 1606.51 1613.63 NA 1678.6 1688.5 NA 1718.0 1750.5 NA 2448.2 2460.2 # [4,] 1610.61 1618.16 NA 1671.6 1686.6 NA 1714.3 1723.1 NA 2466.8 2484.7 NA # [5,] 1630.75 1610.61 1618.16 1682.9 1671.6 1686.6 1734.5 1714.3 1723.1 2487.9 2466.8 2484.7 # [6,] 1640.17 1630.75 1610.61 1703.6 1682.9 1671.6 1757.4 1734.5 1714.3 2508.4 2487.9 2466.8 ``` The main innovation of `flag / L / F` is the ability to very efficiently compute sequences of lags and leads on panel data, and that this panel data need not be ordered or balanced: ```r # This lags all 4 series L(wlddev, 1L, ~ iso3c, ~ year, cols = 9:12) %>% head # iso3c year L1.PCGDP L1.LIFEEX L1.GINI L1.ODA # 1 AFG 1960 NA NA NA NA # 2 AFG 1961 NA 32.446 NA 116769997 # 3 AFG 1962 NA 32.962 NA 232080002 # 4 AFG 1963 NA 33.471 NA 112839996 # 5 AFG 1964 NA 33.971 NA 237720001 # 6 AFG 1965 NA 34.463 NA 295920013 # Without t: Works here because data is ordered, but gives a message L(wlddev, 1L, ~ iso3c, cols = 9:12) %>% head # iso3c L1.PCGDP L1.LIFEEX L1.GINI L1.ODA # 1 AFG NA NA NA NA # 2 AFG NA 32.446 NA 116769997 # 3 AFG NA 32.962 NA 232080002 # 4 AFG NA 33.471 NA 112839996 # 5 AFG NA 33.971 NA 237720001 # 6 AFG NA 34.463 NA 295920013 # 1 lead and 2 lags of Life Expectancy # after removing the 4th row, thus creating an unbalanced panel wlddev %>% ss(-4L) %>% L(-1:2, LIFEEX ~ iso3c, ~year) %>% head # iso3c year F1.LIFEEX LIFEEX L1.LIFEEX L2.LIFEEX # 1 AFG 1960 32.962 32.446 NA NA # 2 AFG 1961 33.471 32.962 32.446 NA # 3 AFG 1962 NA 33.471 32.962 32.446 # 4 AFG 1964 34.948 34.463 NA 33.471 # 5 AFG 1965 35.430 34.948 34.463 NA # 6 AFG 1966 35.914 35.430 34.948 34.463 ``` Optimal performance is obtained if the panel-id is a factor, and the time variable also a factor or an integer variable. In that case an ordering vector of the data is computed directly without any prior sorting or grouping, and the data is accessed through this vector. Thus the data need not be sorted to compute a fully-identified panel-lag, which is a key advantage to, say, the `shift` function in `data.table`. One intended area of use, especially for the operators `L` and `F`, is to substantially facilitate the implementation of dynamic models in various contexts (independent of the estimation package). Below different ways `L` can be used to estimate a dynamic panel-model using `lm` are shown: ```r # Different ways of regressing GDP on it's lags and life-Expectancy and it's lags # 1 - Precomputing lags lm(PCGDP ~ ., L(wlddev, 0:2, PCGDP + LIFEEX ~ iso3c, ~ year, keep.ids = FALSE)) %>% summary # # Call: # lm(formula = PCGDP ~ ., data = L(wlddev, 0:2, PCGDP + LIFEEX ~ # iso3c, ~year, keep.ids = FALSE)) # # Residuals: # Min 1Q Median 3Q Max # -16776.5 -102.2 -17.2 91.5 12277.1 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -333.93994 61.04617 -5.470 4.62e-08 *** # L1.PCGDP 1.31959 0.01021 129.270 < 2e-16 *** # L2.PCGDP -0.31707 0.01029 -30.815 < 2e-16 *** # LIFEEX -17.77368 35.47772 -0.501 0.616 # L1.LIFEEX 45.76286 65.87124 0.695 0.487 # L2.LIFEEX -21.43005 34.98964 -0.612 0.540 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 787.3 on 8609 degrees of freedom # (4561 observations deleted due to missingness) # Multiple R-squared: 0.9976, Adjusted R-squared: 0.9976 # F-statistic: 7.26e+05 on 5 and 8609 DF, p-value: < 2.2e-16 # 2 - Ad-hoc computation in lm formula lm(PCGDP ~ L(PCGDP, 1:2, iso3c, year) + L(LIFEEX, 0:2, iso3c, year), wlddev) %>% summary # # Call: # lm(formula = PCGDP ~ L(PCGDP, 1:2, iso3c, year) + L(LIFEEX, 0:2, # iso3c, year), data = wlddev) # # Residuals: # Min 1Q Median 3Q Max # -16776.5 -102.2 -17.2 91.5 12277.1 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -333.93994 61.04617 -5.470 4.62e-08 *** # L(PCGDP, 1:2, iso3c, year)L1 1.31959 0.01021 129.270 < 2e-16 *** # L(PCGDP, 1:2, iso3c, year)L2 -0.31707 0.01029 -30.815 < 2e-16 *** # L(LIFEEX, 0:2, iso3c, year)-- -17.77368 35.47772 -0.501 0.616 # L(LIFEEX, 0:2, iso3c, year)L1 45.76286 65.87124 0.695 0.487 # L(LIFEEX, 0:2, iso3c, year)L2 -21.43005 34.98964 -0.612 0.540 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 787.3 on 8609 degrees of freedom # (4561 observations deleted due to missingness) # Multiple R-squared: 0.9976, Adjusted R-squared: 0.9976 # F-statistic: 7.26e+05 on 5 and 8609 DF, p-value: < 2.2e-16 # 3 - Precomputing panel-identifiers g = qF(wlddev$iso3c, na.exclude = FALSE) t = qF(wlddev$year, na.exclude = FALSE) lm(PCGDP ~ L(PCGDP, 1:2, g, t) + L(LIFEEX, 0:2, g, t), wlddev) %>% summary # # Call: # lm(formula = PCGDP ~ L(PCGDP, 1:2, g, t) + L(LIFEEX, 0:2, g, # t), data = wlddev) # # Residuals: # Min 1Q Median 3Q Max # -16776.5 -102.2 -17.2 91.5 12277.1 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -333.93994 61.04617 -5.470 4.62e-08 *** # L(PCGDP, 1:2, g, t)L1 1.31959 0.01021 129.270 < 2e-16 *** # L(PCGDP, 1:2, g, t)L2 -0.31707 0.01029 -30.815 < 2e-16 *** # L(LIFEEX, 0:2, g, t)-- -17.77368 35.47772 -0.501 0.616 # L(LIFEEX, 0:2, g, t)L1 45.76286 65.87124 0.695 0.487 # L(LIFEEX, 0:2, g, t)L2 -21.43005 34.98964 -0.612 0.540 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 787.3 on 8609 degrees of freedom # (4561 observations deleted due to missingness) # Multiple R-squared: 0.9976, Adjusted R-squared: 0.9976 # F-statistic: 7.26e+05 on 5 and 8609 DF, p-value: < 2.2e-16 ``` ### 7.4 Fast Differences and Growth Rates Similarly to `flag / L / F`, `fdiff / D / Dlog` computes sequences of suitably lagged / leaded and iterated differences, quasi-differences or (quasi-)log-differences on time series and panel data, and `fgrowth / G` computes growth rates. Using again the `Airpassengers` data, the seasonal decomposition shows significant seasonality: ```r stl(AirPassengers, "periodic") %>% plot ```
plot of chunk stl

plot of chunk stl

We can test the statistical significance of this seasonality by jointly testing a set of monthly dummies regressed on the differenced series. Given that the seasonal fluctuations are increasing in magnitude, using growth rates for the test seems more appropriate: ```r f <- qF(cycle(AirPassengers)) fFtest(fgrowth(AirPassengers), f) # R-Sq. DF1 DF2 F-Stat. P-value # 0.874 11 131 82.238 0.000 ``` The test shows significant seasonality, accounting for 87% of the variation in the growth rate of the series. We can plot the series together with the ordinary, seasonal (12-month) and deseasonalized monthly growth rate using: ```r G(AirPassengers, c(0, 1, 12)) %>% cbind(W.G1 = W(G(AirPassengers), f)) %>% plot(main = "Growth Rate of Airpassengers") ```
plot of chunk Gplot

plot of chunk Gplot

It is evident that taking the annualized growth rate also removes the periodic behavior. We can also compute second differences or growth rates of growth rates. Below a plot of the ordinary and annual first and second differences of the data: ```r D(AirPassengers, c(1,12), 1:2) %>% plot ```
plot of chunk Dplot

plot of chunk Dplot

In general, both `fdiff / D` and `fgrowth / G` can compute sequences of lagged / leaded and iterated differences / growth rates. ```r # sequence of leaded/lagged and iterated differences y = 1:10 D(y, -2:2, 1:3) # F2D1 F2D2 F2D3 FD1 FD2 FD3 -- D1 D2 D3 L2D1 L2D2 L2D3 # [1,] -2 0 0 -1 0 0 1 NA NA NA NA NA NA # [2,] -2 0 0 -1 0 0 2 1 NA NA NA NA NA # [3,] -2 0 0 -1 0 0 3 1 0 NA 2 NA NA # [4,] -2 0 0 -1 0 0 4 1 0 0 2 NA NA # [5,] -2 0 NA -1 0 0 5 1 0 0 2 0 NA # [6,] -2 0 NA -1 0 0 6 1 0 0 2 0 NA # [7,] -2 NA NA -1 0 0 7 1 0 0 2 0 0 # [8,] -2 NA NA -1 0 NA 8 1 0 0 2 0 0 # [9,] NA NA NA -1 NA NA 9 1 0 0 2 0 0 # [10,] NA NA NA NA NA NA 10 1 0 0 2 0 0 ``` All of this also works for panel data. The code below gives an example: ```r g = rep(1:2, each = 5) t = rep(1:5, 2) D(y, -2:2, 1:2, g, t) # F2D1 F2D2 FD1 FD2 -- D1 D2 L2D1 L2D2 # [1,] -2 0 -1 0 1 NA NA NA NA # [2,] -2 NA -1 0 2 1 NA NA NA # [3,] -2 NA -1 0 3 1 0 2 NA # [4,] NA NA -1 NA 4 1 0 2 NA # [5,] NA NA NA NA 5 1 0 2 0 # [6,] -2 0 -1 0 6 NA NA NA NA # [7,] -2 NA -1 0 7 1 NA NA NA # [8,] -2 NA -1 0 8 1 0 2 NA # [9,] NA NA -1 NA 9 1 0 2 NA # [10,] NA NA NA NA 10 1 0 2 0 ``` Calls to `flag / L / F`, `fdiff / D` and `fgrowth / G` can be nested. In the example below, `L.matrix` is called on the right-half ob the above sequence: ```r L(D(y, 0:2, 1:2, g, t), 0:1, g, t) # -- L1.-- D1 L1.D1 D2 L1.D2 L2D1 L1.L2D1 L2D2 L1.L2D2 # [1,] 1 NA NA NA NA NA NA NA NA NA # [2,] 2 1 1 NA NA NA NA NA NA NA # [3,] 3 2 1 1 0 NA 2 NA NA NA # [4,] 4 3 1 1 0 0 2 2 NA NA # [5,] 5 4 1 1 0 0 2 2 0 NA # [6,] 6 NA NA NA NA NA NA NA NA NA # [7,] 7 6 1 NA NA NA NA NA NA NA # [8,] 8 7 1 1 0 NA 2 NA NA NA # [9,] 9 8 1 1 0 0 2 2 NA NA # [10,] 10 9 1 1 0 0 2 2 0 NA ``` `fdiff / D` and `fgrowth / G` also come with a data frame method, making the computation of growth-variables on datasets very easy: ```r G(GGDC10S, 1L, 1L, ~ Variable + Country, ~ Year, cols = 6:10) %>% head # Variable Country Year G1.AGR G1.MIN G1.MAN G1.PU G1.CON # 1 VA BWA 1960 NA NA NA NA NA # 2 VA BWA 1961 NA NA NA NA NA # 3 VA BWA 1962 NA NA NA NA NA # 4 VA BWA 1963 NA NA NA NA NA # 5 VA BWA 1964 NA NA NA NA NA # 6 VA BWA 1965 -3.524492 -28.57143 38.23529 29.41176 103.9604 ``` The code below estimates a dynamic panel model regressing the 10-year growth rate of GDP per capita on it's 10-year lagged level and the 10-year growth rate of life-expectancy: ```r summary(lm(G(PCGDP,10,1,iso3c,year) ~ L(PCGDP,10,iso3c,year) + G(LIFEEX,10,1,iso3c,year), data = wlddev)) # # Call: # lm(formula = G(PCGDP, 10, 1, iso3c, year) ~ L(PCGDP, 10, iso3c, # year) + G(LIFEEX, 10, 1, iso3c, year), data = wlddev) # # Residuals: # Min 1Q Median 3Q Max # -104.32 -21.97 -3.96 13.26 1714.58 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 2.740e+01 1.089e+00 25.168 < 2e-16 *** # L(PCGDP, 10, iso3c, year) -3.337e-04 4.756e-05 -7.016 2.49e-12 *** # G(LIFEEX, 10, 1, iso3c, year) 4.617e-01 1.124e-01 4.107 4.05e-05 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 58.43 on 7113 degrees of freedom # (6060 observations deleted due to missingness) # Multiple R-squared: 0.01132, Adjusted R-squared: 0.01104 # F-statistic: 40.73 on 2 and 7113 DF, p-value: < 2.2e-16 ``` To go a step further, the code below regresses the 10-year growth rate of GDP on the 10-year lagged levels and 10-year growth rates of GDP and life expectancy, with country and time-fixed effects projected out using `HDW`. The standard errors are unreliable without bootstrapping, but this example nicely demonstrates the potential for complex estimations brought by *collapse*. ```r moddat <- HDW(L(G(wlddev, c(0, 10), 1, ~iso3c, ~year, 9:10), c(0, 10), ~iso3c, ~year), ~iso3c + qF(year))[-c(1,5)] summary(lm(HDW.L10G1.PCGDP ~. , moddat)) # # Call: # lm(formula = HDW.L10G1.PCGDP ~ ., data = moddat) # # Residuals: # Min 1Q Median 3Q Max # -807.68 -10.80 -0.64 10.23 779.99 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 1.907e-15 4.930e-01 0.000 1.000000 # HDW.L10.PCGDP -2.500e-03 1.292e-04 -19.347 < 2e-16 *** # HDW.L10.L10G1.PCGDP -5.885e-01 1.082e-02 -54.412 < 2e-16 *** # HDW.L10.LIFEEX 1.056e+00 2.885e-01 3.661 0.000254 *** # HDW.L10G1.LIFEEX 6.927e-01 1.154e-01 6.002 2.08e-09 *** # HDW.L10.L10G1.LIFEEX 8.749e-01 1.108e-01 7.899 3.39e-15 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 35.69 on 5235 degrees of freedom # Multiple R-squared: 0.4029, Adjusted R-squared: 0.4023 # F-statistic: 706.4 on 5 and 5235 DF, p-value: < 2.2e-16 ``` One of the inconveniences of the above computations is that it requires declaring the panel-identifiers `iso3c` and `year` again and again for each function. A great remedy here are the *plm* classes *pseries* and *pdata.frame* which *collapse* was built to support. This shows how one could run the same regression with plm: ```r pwlddev <- plm::pdata.frame(wlddev, index = c("iso3c", "year")) moddat <- HDW(L(G(pwlddev, c(0, 10), 1, 9:10), c(0, 10)))[-c(1,5)] summary(lm(HDW.L10G1.PCGDP ~. , moddat)) # # Call: # lm(formula = HDW.L10G1.PCGDP ~ ., data = moddat) # # Residuals: # Min 1Q Median 3Q Max # -677.61 -12.45 -1.02 10.86 913.22 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 0.1456192 0.5187976 0.281 0.778962 # HDW.L10.PCGDP -0.0022910 0.0001253 -18.291 < 2e-16 *** # HDW.L10.L10G1.PCGDP -0.5859896 0.0113538 -51.612 < 2e-16 *** # HDW.L10.LIFEEX 0.8701877 0.2456255 3.543 0.000399 *** # HDW.L10G1.LIFEEX 0.6910533 0.1132028 6.105 1.11e-09 *** # HDW.L10.L10G1.LIFEEX 0.8990853 0.1068241 8.417 < 2e-16 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 37.51 on 5235 degrees of freedom # (7935 observations deleted due to missingness) # Multiple R-squared: 0.3784, Adjusted R-squared: 0.3778 # F-statistic: 637.4 on 5 and 5235 DF, p-value: < 2.2e-16 ``` To learn more about the integration of *collapse* and *plm*, consult the corresponding vignette. ## 8. List Processing and a Panel-VAR Example *collapse* also provides an ensemble of list-processing functions that grew out of a necessity of working with complex nested lists of data objects. The example provided in this section is also somewhat complex, but it demonstrates the utility of these functions while also providing a nice data-transformation task. When summarizing the `GGDC10S` data in section 1, it was evident that certain sectors have a high share of economic activity in almost all countries in the sample. This prompts the question of whether there exist common patterns in the interaction of these important sectors across countries. One way to empirically study this could be through a (Structural) Panel-Vector-Autoregression (PSVAR) in value added with the 6 most important sectors (excluding government): Agriculture, manufacturing, wholesale and retail trade, construction, transport and storage and finance and real estate. For this we will use the *vars* package^[I noticed there is a *panelvar* package, but I am more familiar with *vars* and *panelvar* can be pretty slow in my experience. We also have about 50 years of data here, so dynamic panel bias is not a big issue.]. Since *vars* natively does not support panel-VAR, we need to create the central *varest* object manually and then run the `SVAR` function to impose identification restrictions. We start with exploring and harmonizing the data: ```r library(vars) # The 6 most important non-government sectors (see section 1) sec <- c("AGR", "MAN", "WRT", "CON", "TRA", "FIRE") # This creates a data.frame containing the value added of the 6 most important non-government sectors data <- fsubset(GGDC10S, Variable == "VA", c("Country", "Year", sec)) %>% na_omit(cols = sec) # Let's look at the log VA in agriculture across countries: AGRmat <- psmat(data, AGR ~ Country, ~ Year, transpose = TRUE) %>% log # Converting to panel series matrix plot(AGRmat) ```
plot of chunk AGRmat

plot of chunk AGRmat

The plot shows quite some heterogeneity both in the levels (VA is in local currency) and in trend growth rates. In the panel-VAR estimation we are only really interested in the sectoral relationships within countries. Thus we need to harmonize this sectoral data further. One way would be taking growth rates or log-differences of the data, but VAR's are usually estimated in levels unless the data are cointegrated (and value added series do not, in general, exhibit unit-root behavior). Thus to harmonize the data further we opt for subtracting a country-sector specific cubic trend from the data in logs: ```r # Subtracting a country specific cubic growth trend AGRmat <- dapply(AGRmat, fhdwithin, poly(seq_row(AGRmat), 3), fill = TRUE) plot(AGRmat) ```
plot of chunk AGRmatplot

plot of chunk AGRmatplot

This seems to have done a decent job in curbing most of the heterogeneity. Some series however have a high variance around that cubic trend. Therefore a final step is to standardize the data to bring the variances in line: ```r # Standardizing the cubic log-detrended data AGRmat <- fscale(AGRmat) plot(AGRmat) ```
plot of chunk AGRmatplot2

plot of chunk AGRmatplot2

Now this looks pretty good, and is about the most we can do in terms of harmonization without differencing the data. The code below applies these transformations to all sectors: ```r # Taking logs settransformv(data, 3:8, log) # Projecting out country FE and cubic trends from complete cases gv(data, 3:8) <- HDW(data, ~ qF(Country)*poly(Year, 3), fill = TRUE) # Scaling gv(data, 3:8) <- STD(data, ~ Country, cols = 3:8, keep.by = FALSE) # Check the plot psmat(data, ~ Country, ~ Year) %>% plot ```
plot of chunk psmatplot2

plot of chunk psmatplot2

Since the data is annual, let us estimate the Panel-VAR with one lag: ```r # This adds one lag of all series to the data add_vars(data) <- L(data, 1, ~ Country, ~ Year, keep.ids = FALSE) # This removes missing values from all but the first row and drops identifier columns (vars is made for time series without gaps) data <- rbind(ss(data, 1, -(1:2)), na_omit(ss(data, -1, -(1:2)))) head(data) # STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE L1.STD.HDW.AGR # 1 0.65713943 2.2350584 1.946383 -0.03574399 1.0877811 1.0476507 NA # 2 -0.14377115 1.8693570 1.905081 1.23225734 1.0542315 0.9105622 0.65713943 # 3 -0.09209878 -0.8212004 1.997253 -0.01783824 0.6718465 0.6134260 -0.14377115 # 4 -0.25213869 -1.7830320 -1.970855 -2.68332505 -1.8475551 0.4382902 -0.09209878 # 5 -0.31623401 -4.2931567 -1.822211 -2.75551916 -0.7066491 -2.1982640 -0.25213869 # 6 -0.72691916 -1.3219387 -2.079333 -0.12148295 -1.1398220 -2.2230474 -0.31623401 # L1.STD.HDW.MAN L1.STD.HDW.WRT L1.STD.HDW.CON L1.STD.HDW.TRA L1.STD.HDW.FIRE # 1 NA NA NA NA NA # 2 2.2350584 1.946383 -0.03574399 1.0877811 1.0476507 # 3 1.8693570 1.905081 1.23225734 1.0542315 0.9105622 # 4 -0.8212004 1.997253 -0.01783824 0.6718465 0.6134260 # 5 -1.7830320 -1.970855 -2.68332505 -1.8475551 0.4382902 # 6 -4.2931567 -1.822211 -2.75551916 -0.7066491 -2.1982640 ``` Having prepared the data, the code below estimates the panel-VAR using `lm` and creates the *varest* object: ```r # saving the names of the 6 sectors nam <- names(data)[1:6] pVAR <- list(varresult = setNames(lapply(seq_len(6), function(i) # list of 6 lm's each regressing lm(as.formula(paste0(nam[i], "~ -1 + . ")), # the sector on all lags of get_vars(data, c(i, 7:fncol(data))))), nam), # itself and other sectors, removing the missing first row datamat = ss(data, -1), # The full data containing levels and lags of the sectors, removing the missing first row y = do.call(cbind, get_vars(data, 1:6)), # Only the levels data as matrix type = "none", # No constant or tend term: We harmonized the data already p = 1, # The lag-order K = 6, # The number of variables obs = fnrow(data)-1, # The number of non-missing obs totobs = fnrow(data), # The total number of obs restrictions = NULL, call = quote(VAR(y = data))) class(pVAR) <- "varest" ``` The significant serial-correlation test below suggests that the panel-VAR with one lag is ill-identified, but the sample size is also quite large so the test is prone to reject, and the test is likely also still picking up remaining cross-sectional heterogeneity. For the purposes of this vignette this shall not bother us. ```r serial.test(pVAR) # # Portmanteau Test (asymptotic) # # data: Residuals of VAR object pVAR # Chi-squared = 1680.8, df = 540, p-value < 2.2e-16 ``` By default the VAR is identified using a Choleski ordering of the direct impact matrix in which the first variable (here Agriculture) is assumed to not be directly impacted by any other sector in the current period, and this descends down to the last variable (Finance and Real Estate), which is assumed to be impacted by all other sectors in the current period. For structural identification it is usually necessary to impose restrictions on the direct impact matrix in line with economic theory. It is difficult to conceive theories on the average worldwide interaction of broad economic sectors, but to aid identification we will compute the correlation matrix in growth rates and restrict the lowest coefficients to be 0, which should be better than just imposing a random Choleski ordering. ```r # This computes the pairwise correlations between standardized sectoral growth rates across countries corr <- fsubset(GGDC10S, Variable == "VA") %>% # Subset rows: Only VA fgroup_by(Country) %>% # Group by country get_vars(sec) %>% # Select the 6 sectors fgrowth %>% # Compute Sectoral growth rates (a time-variable can be passed, but not necessary here as the data is ordered) fscale %>% # Scale and center (i.e. standardize) pwcor # Compute Pairwise correlations corr # AGR MAN WRT CON TRA FIRE # AGR 1 .55 .59 .39 .52 .41 # MAN .55 1 .67 .54 .65 .48 # WRT .59 .67 1 .56 .66 .52 # CON .39 .54 .56 1 .53 .46 # TRA .52 .65 .66 .53 1 .51 # FIRE .41 .48 .52 .46 .51 1 # We need to impose K*(K-1)/2 = 15 (with K = 6 variables) restrictions for identification corr[corr <= sort(corr)[15]] <- 0 corr # AGR MAN WRT CON TRA FIRE # AGR 1 .55 .59 .00 .00 .00 # MAN .55 1 .67 .54 .65 .00 # WRT .59 .67 1 .56 .66 .00 # CON .00 .54 .56 1 .00 .00 # TRA .00 .65 .66 .00 1 .00 # FIRE .00 .00 .00 .00 .00 1 # The rest is unknown (i.e. will be estimated) corr[corr > 0 & corr < 1] <- NA # Using a diagonal shock vcov matrix (standard assumption for SVAR) Bmat <- diag(6) diag(Bmat) <- NA # This estimates the Panel-SVAR using Maximum Likelihood: pSVAR <- SVAR(pVAR, Amat = unclass(corr), Bmat = Bmat, estmethod = "direct") pSVAR # # SVAR Estimation Results: # ======================== # # # Estimated A matrix: # STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # STD.HDW.AGR 1.0000 -0.59223 0.51301 0.0000 0.00000 0 # STD.HDW.MAN -0.2547 1.00000 -0.07819 -0.1711 0.14207 0 # STD.HDW.WRT -0.3924 -0.56875 1.00000 -0.0135 -0.01391 0 # STD.HDW.CON 0.0000 0.02595 -0.18541 1.0000 0.00000 0 # STD.HDW.TRA 0.0000 -0.03321 -0.05370 0.0000 1.00000 0 # STD.HDW.FIRE 0.0000 0.00000 0.00000 0.0000 0.00000 1 # # Estimated B matrix: # STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # STD.HDW.AGR 0.678 0.0000 0.0000 0.0000 0.0000 0.0000 # STD.HDW.MAN 0.000 0.6248 0.0000 0.0000 0.0000 0.0000 # STD.HDW.WRT 0.000 0.0000 0.4155 0.0000 0.0000 0.0000 # STD.HDW.CON 0.000 0.0000 0.0000 0.5028 0.0000 0.0000 # STD.HDW.TRA 0.000 0.0000 0.0000 0.0000 0.5593 0.0000 # STD.HDW.FIRE 0.000 0.0000 0.0000 0.0000 0.0000 0.6475 ``` Now this object is quite involved, which brings us to the actual subject of this section: ```r # psVAR$var$varresult is a list containing the 6 linear models fitted above, it is not displayed in full here. str(pSVAR, give.attr = FALSE, max.level = 3) # List of 13 # $ A : num [1:6, 1:6] 1 -0.255 -0.392 0 0 ... # $ Ase : num [1:6, 1:6] 0 0 0 0 0 0 0 0 0 0 ... # $ B : num [1:6, 1:6] 0.678 0 0 0 0 ... # $ Bse : num [1:6, 1:6] 0 0 0 0 0 0 0 0 0 0 ... # $ LRIM : NULL # $ Sigma.U: num [1:6, 1:6] 43.898 24.88 23.941 4.873 0.661 ... # $ LR :List of 5 # ..$ statistic: Named num 1130 # ..$ parameter: Named num 1 # ..$ p.value : Named num 0 # ..$ method : chr "LR overidentification" # ..$ data.name: symbol data # $ opt :List of 5 # ..$ par : num [1:20] -0.2547 -0.3924 -0.5922 -0.5688 0.0259 ... # ..$ value : num 10924 # ..$ counts : Named int [1:2] 501 NA # ..$ convergence: int 1 # ..$ message : NULL # $ start : num [1:20] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ... # $ type : chr "AB-model" # $ var :List of 10 # ..$ varresult :List of 6 # .. ..$ STD.HDW.AGR :List of 13 # .. ..$ STD.HDW.MAN :List of 13 # .. ..$ STD.HDW.WRT :List of 13 # .. ..$ STD.HDW.CON :List of 13 # .. ..$ STD.HDW.TRA :List of 13 # .. ..$ STD.HDW.FIRE:List of 13 # ..$ datamat :'data.frame': 2060 obs. of 12 variables: # .. ..$ STD.HDW.AGR : num [1:2060] -0.1438 -0.0921 -0.2521 -0.3162 -0.7269 ... # .. ..$ STD.HDW.MAN : num [1:2060] 1.869 -0.821 -1.783 -4.293 -1.322 ... # .. ..$ STD.HDW.WRT : num [1:2060] 1.91 2 -1.97 -1.82 -2.08 ... # .. ..$ STD.HDW.CON : num [1:2060] 1.2323 -0.0178 -2.6833 -2.7555 -0.1215 ... # .. ..$ STD.HDW.TRA : num [1:2060] 1.054 0.672 -1.848 -0.707 -1.14 ... # .. ..$ STD.HDW.FIRE : num [1:2060] 0.911 0.613 0.438 -2.198 -2.223 ... # .. ..$ L1.STD.HDW.AGR : num [1:2060] 0.6571 -0.1438 -0.0921 -0.2521 -0.3162 ... # .. ..$ L1.STD.HDW.MAN : num [1:2060] 2.235 1.869 -0.821 -1.783 -4.293 ... # .. ..$ L1.STD.HDW.WRT : num [1:2060] 1.95 1.91 2 -1.97 -1.82 ... # .. ..$ L1.STD.HDW.CON : num [1:2060] -0.0357 1.2323 -0.0178 -2.6833 -2.7555 ... # .. ..$ L1.STD.HDW.TRA : num [1:2060] 1.088 1.054 0.672 -1.848 -0.707 ... # .. ..$ L1.STD.HDW.FIRE: num [1:2060] 1.048 0.911 0.613 0.438 -2.198 ... # ..$ y : num [1:2061, 1:6] 0.6571 -0.1438 -0.0921 -0.2521 -0.3162 ... # ..$ type : chr "none" # ..$ p : num 1 # ..$ K : num 6 # ..$ obs : num 2060 # ..$ totobs : int 2061 # ..$ restrictions: NULL # ..$ call : language VAR(y = data) # $ iter : Named int 501 # $ call : language SVAR(x = pVAR, estmethod = "direct", Amat = unclass(corr), Bmat = Bmat) ``` ### 8.1 List Search and Identification When dealing with such a list-like object, we might be interested in its complexity by measuring the level of nesting. This can be done with `ldepth`: ```r # The list-tree of this object has 5 levels of nesting ldepth(pSVAR) # [1] 5 # This data has a depth of 1, thus this dataset does not contain list-columns ldepth(data) # [1] 1 ``` Further we might be interested in knowing whether this list-object contains non-atomic elements like call, terms or formulas. The function `is.regular` in the *collapse* package checks if an object is atomic or list-like, and the recursive version `is_unlistable` checks whether all objects in a nested structure are atomic or list-like: ```r # Is this object composed only of atomic elements e.g. can it be unlisted? is_unlistable(pSVAR) # [1] FALSE ``` Evidently this object is not unlistable, from viewing its structure we know that it contains several call and terms objects. We might also want to know if this object saves some kind of residuals or fitted values. This can be done using `has_elem`, which also supports regular expression search of element names: ```r # Does this object contain an element with "fitted" in its name? has_elem(pSVAR, "fitted", regex = TRUE) # [1] TRUE # Does this object contain an element with "residuals" in its name? has_elem(pSVAR, "residuals", regex = TRUE) # [1] TRUE ``` We might also want to know whether the object contains some kind of data-matrix. This can be checked by calling: ```r # Is there a matrix stored in this object? has_elem(pSVAR, is.matrix) # [1] TRUE ``` These functions can sometimes be helpful in exploring objects. A much greater advantage of having functions to search and check lists is the ability to write more complex programs with them (which will not be demonstrated here). ### 8.2 List Subsetting Having gathered some information about the `pSVAR` object, this section introduces several extractor functions to pull out elements from such lists: `get_elem` can be used to pull out elements from lists in a simplified format^[The *vars* package also provides convenient extractor functions for some quantities, but `get_elem` of course works in a much broader range of contexts.]. ```r # This is the path to the residuals from a single equation str(pSVAR$var$varresult$STD.HDW.AGR$residuals) # Named num [1:2060] -0.7234 -0.1962 -0.1993 0.0739 -0.1418 ... # - attr(*, "names")= chr [1:2060] "2" "3" "4" "5" ... # get_elem gets the residuals from all 6 equations and puts them in a top-level list resid <- get_elem(pSVAR, "residuals") str(resid, give.attr = FALSE) # List of 6 # $ STD.HDW.AGR : Named num [1:2060] -0.7234 -0.1962 -0.1993 0.0739 -0.1418 ... # $ STD.HDW.MAN : Named num [1:2060] 0.363 -1.989 -1.167 -3.082 1.474 ... # $ STD.HDW.WRT : Named num [1:2060] 0.37 0.628 -3.054 -0.406 -0.384 ... # $ STD.HDW.CON : Named num [1:2060] 1.035 -1.093 -2.62 -0.611 2.307 ... # $ STD.HDW.TRA : Named num [1:2060] 0.1481 -0.2599 -2.2361 0.8619 -0.0915 ... # $ STD.HDW.FIRE: Named num [1:2060] -0.11396 -0.33092 0.11754 -2.10521 -0.00968 ... # Quick conversion to matrix and plotting qM(resid) %>% plot.ts(main = "Panel-VAR Residuals") ```
plot of chunk PVARplot

plot of chunk PVARplot

Similarly, we could pull out and plot the fitted values: ```r # Regular expression search and retrieval of fitted values get_elem(pSVAR, "^fi", regex = TRUE) %>% qM %>% plot.ts(main = "Panel-VAR Fitted Values") ```
plot of chunk PVARfittedplot

plot of chunk PVARfittedplot

Below the main quantities of interest in SVAR analysis are computed: The impulse response functions (IRF's) and forecast error variance decompositions (FEVD's): ```r # This computes orthogonalized impulse response functions pIRF <- irf(pSVAR) # This computes the forecast error variance decompositions pFEVD <- fevd(pSVAR) ``` The `pIRF` object contains the IRF's with lower and upper confidence bounds and some atomic elements providing information about the object: ```r # See the structure of a vars IRF object: str(pIRF, give.attr = FALSE) # List of 11 # $ irf :List of 6 # ..$ STD.HDW.AGR : num [1:11, 1:6] 0.611 0.399 0.268 0.185 0.132 ... # ..$ STD.HDW.MAN : num [1:11, 1:6] 0.1774 0.1549 0.134 0.1142 0.0959 ... # ..$ STD.HDW.WRT : num [1:11, 1:6] -0.1807 -0.1071 -0.0647 -0.0402 -0.0259 ... # ..$ STD.HDW.CON : num [1:11, 1:6] 0.0215 0.0383 0.0442 0.0438 0.0403 ... # ..$ STD.HDW.TRA : num [1:11, 1:6] -0.02595 -0.01257 -0.00721 -0.00511 -0.00421 ... # ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 0.0122 0.0147 0.0132 0.0104 ... # $ Lower :List of 6 # ..$ STD.HDW.AGR : num [1:11, 1:6] 0.1137 -0.0144 -0.0393 -0.0446 -0.0439 ... # ..$ STD.HDW.MAN : num [1:11, 1:6] -0.6474 -0.3434 -0.2069 -0.125 -0.0734 ... # ..$ STD.HDW.WRT : num [1:11, 1:6] -0.659 -0.427 -0.311 -0.236 -0.189 ... # ..$ STD.HDW.CON : num [1:11, 1:6] -0.721 -0.417 -0.258 -0.183 -0.123 ... # ..$ STD.HDW.TRA : num [1:11, 1:6] -0.4161 -0.2568 -0.169 -0.1231 -0.0894 ... # ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 -0.0157 -0.022 -0.0227 -0.0211 ... # $ Upper :List of 6 # ..$ STD.HDW.AGR : num [1:11, 1:6] 1.218 0.801 0.565 0.389 0.275 ... # ..$ STD.HDW.MAN : num [1:11, 1:6] 0.906 0.601 0.439 0.328 0.239 ... # ..$ STD.HDW.WRT : num [1:11, 1:6] 0.846 0.601 0.428 0.319 0.239 ... # ..$ STD.HDW.CON : num [1:11, 1:6] 0.716 0.514 0.4 0.305 0.234 ... # ..$ STD.HDW.TRA : num [1:11, 1:6] 0.2866 0.21 0.1591 0.1207 0.0899 ... # ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 0.0363 0.0471 0.0461 0.0405 ... # $ response : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ impulse : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ ortho : logi TRUE # $ cumulative: logi FALSE # $ runs : num 100 # $ ci : num 0.05 # $ boot : logi TRUE # $ model : chr "svarest" ``` We could separately access the top-level atomic or list elements using `atomic_elem` or `list_elem`: ```r # Pool-out top-level atomic elements in the list str(atomic_elem(pIRF)) # List of 8 # $ response : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ impulse : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ ortho : logi TRUE # $ cumulative: logi FALSE # $ runs : num 100 # $ ci : num 0.05 # $ boot : logi TRUE # $ model : chr "svarest" ``` There are also recursive versions of `atomic_elem` and `list_elem` named `reg_elem` and `irreg_elem` which can be used to split nested lists into the atomic and non-atomic parts. These are not covered in this vignette. ### 8.3 Recursive Apply and Unlisting in 2D *vars* supplies simple `plot` methods for IRF and FEVD objects using base graphics. In this section we however want to generate nicer and more compact plots using `ggplot2`, and also compute some statistics on the IRF data. Starting with the latter, the code below sums the 10-period impulse response coefficients of each sector in response to each sectoral impulse and stores them in a data frame: ```r # Computing the cumulative impact after 10 periods list_elem(pIRF) %>% # Pull out the sublist elements containing the IRF coefficients + CI's rapply2d(function(x) round(fsum(x), 2)) %>% # Recursively apply the column-sums to coefficient matrices (could also use colSums) unlist2d(c("Type", "Impulse")) # Recursively row-bind the result to a data.frame and add identifier columns # Type Impulse STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # 1 irf STD.HDW.AGR 1.92 1.08 1.68 0.83 0.72 0.54 # 2 irf STD.HDW.MAN 0.98 2.22 2.12 1.09 0.97 1.05 # 3 irf STD.HDW.WRT -0.47 -0.27 0.65 0.17 0.03 -0.02 # 4 irf STD.HDW.CON 0.33 0.39 0.34 2.00 0.55 0.38 # 5 irf STD.HDW.TRA -0.07 -0.11 -0.24 -0.30 1.31 -0.20 # 6 irf STD.HDW.FIRE 0.07 -0.07 0.02 -0.09 -0.06 1.84 # 7 Lower STD.HDW.AGR -0.18 -2.08 -3.14 -0.68 -2.46 -0.68 # 8 Lower STD.HDW.MAN -1.52 0.38 -1.30 -0.86 -1.82 0.12 # 9 Lower STD.HDW.WRT -2.38 -2.65 -0.22 -2.68 -2.01 -1.20 # 10 Lower STD.HDW.CON -2.01 -2.47 -2.16 0.53 -1.68 -0.80 # 11 Lower STD.HDW.TRA -1.32 -1.34 -1.17 -1.64 0.31 -0.69 # 12 Lower STD.HDW.FIRE -0.16 -0.26 -0.16 -0.27 -0.20 0.96 # 13 Upper STD.HDW.AGR 3.97 3.18 3.21 3.69 2.61 1.58 # 14 Upper STD.HDW.MAN 3.19 3.85 3.00 3.60 3.05 1.78 # 15 Upper STD.HDW.WRT 3.06 2.66 4.41 2.49 3.31 1.47 # 16 Upper STD.HDW.CON 2.85 3.30 3.20 3.88 2.59 1.76 # 17 Upper STD.HDW.TRA 1.08 1.93 1.76 0.72 2.82 0.63 # 18 Upper STD.HDW.FIRE 0.30 0.15 0.30 0.12 0.18 2.21 ``` The function `rapply2d` used here is very similar to `base::rapply`, with the difference that the result is not simplified / unlisted by default and that `rapply2d` will treat data frames like atomic objects and apply functions to them. `unlist2d` is an efficient generalization of `base::unlist` to 2-dimensions, or one could also think of it as a recursive generalization of `do.call(rbind, ...)`. It efficiently unlists nested lists of data objects and creates a data frame with identifier columns for each level of nesting on the left, and the content of the list in columns on the right. The above cumulative coefficients suggest that Agriculture responds mostly to it's own shock, and a bit to shocks in Manufacturing and Wholesale and Retail Trade. Similar patters can be observed for Manufacturing and Wholesale and Retail Trade. Thus these three sectors seem to be interlinked in most countries. The remaining three sectors are mostly affected by their own dynamics, but also by Agriculture and Manufacturing. Let us use `ggplot2` to create nice compact plots of the IRF's and FEVD's. For this task `unlist2d` will again be extremely helpful in creating the data frame representation required. Starting with the IRF's, we will discard the upper and lower bounds and just use the impulses: ```r # This binds the matrices after adding integer row-names to them to a data.table data <- pIRF$irf %>% # Get only the coefficient matrices, discard the confidence bounds unlist2d(idcols = "Impulse", # Recursive unlisting to data.table creating a factor id-column row.names = "Time", # and saving generated rownames in a variable called 'Time' id.factor = TRUE, # -> Create Id column ('Impulse') as factor DT = TRUE) # -> Output as data.table (default is data.frame) head(data, 3) # Impulse Time STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # # 1: STD.HDW.AGR 1 0.6113132 0.1896711 0.3488940 0.05976606 0.02503336 0.00000000 # 2: STD.HDW.AGR 2 0.3986337 0.1892803 0.3014961 0.09430567 0.07263670 0.03669857 # 3: STD.HDW.AGR 3 0.2676944 0.1654161 0.2491999 0.10769335 0.09330830 0.06042380 data <- melt(data, 1:2) # Using data.table's melt head(data, 3) # Impulse Time variable value # # 1: STD.HDW.AGR 1 STD.HDW.AGR 0.6113132 # 2: STD.HDW.AGR 2 STD.HDW.AGR 0.3986337 # 3: STD.HDW.AGR 3 STD.HDW.AGR 0.2676944 # Here comes the plot: ggplot(data, aes(x = Time, y = value, color = Impulse)) + geom_line(size = I(1)) + geom_hline(yintercept = 0) + labs(y = NULL, title = "Orthogonal Impulse Response Functions") + scale_color_manual(values = rainbow(6)) + facet_wrap(~ variable) + theme_light(base_size = 14) + scale_x_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ scale_y_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ theme(axis.text = element_text(colour = "black"), plot.title = element_text(hjust = 0.5), strip.background = element_rect(fill = "white", colour = NA), strip.text = element_text(face = "bold", colour = "grey30"), axis.ticks = element_line(colour = "black"), panel.border = element_rect(colour = "black")) ```
plot of chunk IRFplot

plot of chunk IRFplot

To round things off, below we do the same thing for the FEVD's: ```r data <- unlist2d(pFEVD, idcols = "variable", row.names = "Time", id.factor = TRUE, DT = TRUE) %>% melt(c("variable", "Time"), variable.name = "Sector") head(data, 3) # variable Time Sector value # # 1: STD.HDW.AGR 1 STD.HDW.AGR 0.8513029 # 2: STD.HDW.AGR 2 STD.HDW.AGR 0.8385913 # 3: STD.HDW.AGR 3 STD.HDW.AGR 0.8264789 # Here comes the plot: ggplot(data, aes(x = Time, y = value, fill = Sector)) + geom_area(position = "fill", alpha = 0.8) + labs(y = NULL, title = "Forecast Error Variance Decompositions") + scale_fill_manual(values = rainbow(6)) + facet_wrap(~ set_class(variable, "factor")) + theme_linedraw(base_size = 14) + scale_x_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ scale_y_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ theme(plot.title = element_text(hjust = 0.5), strip.background = element_rect(fill = "white", colour = NA), strip.text = element_text(face = "bold", colour = "grey30")) ```
plot of chunk FEVDplot

plot of chunk FEVDplot

Both the IRF's and the FEVD's show that Agriculture, Manufacturing and Wholesale and Retail Trade are broadly interlinked, even in the short-run, and that Agriculture and Manufacturing explain some of the variation in Construction, Transport and Finance at longer horizons. Of course the identification strategy used for this example was not really structural or theory based. A better strategy could be to aggregate the World Input-Output Database and use those shares for identification (which would be another very nice *collapse* exercise, but not for this vignette). ## Going Further To learn more about *collapse*, just examine the documentation `help("collapse-documentation")` which is organized, extensive and contains lots of examples. ## References Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), *Routledge Handbook of Industry and Development.* (pp. 65-83). Routledge. Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†*Econometrica* 46 (1): 69–85. collapse/vignettes/collapse_and_plm.Rmd0000644000176200001440000031147415121640575020044 0ustar liggesusers--- title: "collapse and plm" subtitle: "Fast Transformation and Exploration of Panel Data" # utilizing *plm* classes" Advanced and fast author: "Sebastian Krantz" date: "2021-06-27" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and plm} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This vignette focuses on the integration of *collapse* and the popular *plm* ('Linear Models for Panel Data') package by Yves Croissant, Giovanni Millo and Kevin Tappe. It will demonstrate the utility of the *pseries* and *pdata.frame* classes introduced in *plm* together with the corresponding methods for fast *collapse* functions (implemented in C or C++), to extend and facilitate extremely fast computations on panel-vectors and panel data frames (20-100 times faster than native *plm*). The *collapse* package should enable R programmers to - with very little effort - write high-performance code in the domain of panel data exploration and panel data econometrics. *** **Notes:** - To learn more about *collapse*, see the 'Introduction to *collapse*' vignette or the built-in structured documentation available under `help("collapse-documentation")` after installing the package. In addition `help("collapse-package")` provides a compact set of examples for quick-start. - Documentation and vignettes can also be viewed [online](). *** The vignette is structured as follows: * **Part 1** introduces *collapse*'s fast functions and associated *transformation operators* to compute various transformations on panel data, and delivers some benchmarks. * **Part 2** uses these functions to explore panel data a bit and introduce additional functions for summary statistics, panel-autocorrelations and testing fixed effects. * **Part 3** finally provides an example programming application by coding a slightly extended and very efficient Hausman and Taylor (1981) estimator. For this vignette we will use a dataset (`wlddev`) supplied with *collapse* containing a panel of 5 key development indicators taken from the World Bank Development Indicators Database: ```r library(collapse) head(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # 3 Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA 112839996 # 4 Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA 237720001 # 5 Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA 295920013 # 6 Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA 341839996 # POP # 1 8996973 # 2 9169410 # 3 9351441 # 4 9543205 # 5 9744781 # 6 9956320 fnobs(wlddev) # This column-wise counts the number of observations # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 # POP # 12919 fndistinct(wlddev) # This counts the number of distinct values # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 216 216 61 61 7 7 4 2 9470 10548 368 7832 # POP # 12877 ``` ## Part 1: Fast Transformation of Panel Data First let us convert this data to a *plm* panel data.frame (class *pdata.frame*): ```r library(plm) # This creates a panel data frame pwlddev <- pdata.frame(wlddev, index = c("iso3c", "year")) str(pwlddev, give.attr = FALSE) # Classes 'pdata.frame' and 'data.frame': 13176 obs. of 13 variables: # $ country: 'pseries' Named chr "Aruba" "Aruba" "Aruba" "Aruba" ... # $ iso3c : Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ... # $ date : pseries, format: "1961-01-01" "1962-01-01" "1963-01-01" ... # $ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... # $ decade : 'pseries' Named int 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ... # $ region : Factor w/ 7 levels "East Asia & Pacific",..: 3 3 3 3 3 3 3 3 3 3 ... # $ income : Factor w/ 4 levels "High income",..: 1 1 1 1 1 1 1 1 1 1 ... # $ OECD : 'pseries' Named logi FALSE FALSE FALSE FALSE FALSE FALSE ... # $ PCGDP : 'pseries' Named num NA NA NA NA NA NA NA NA NA NA ... # $ LIFEEX : 'pseries' Named num 65.7 66.1 66.4 66.8 67.1 ... # $ GINI : 'pseries' Named num NA NA NA NA NA NA NA NA NA NA ... # $ ODA : 'pseries' Named num NA NA NA NA NA NA NA NA NA NA ... # $ POP : 'pseries' Named num 54211 55438 56225 56695 57032 ... # A pdata.frame has an index attribute attached [retrieved using index(pwlddev) or attr(pwlddev, "index")] str(index(pwlddev)) # Classes 'pindex' and 'data.frame': 13176 obs. of 2 variables: # $ iso3c: Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ... # $ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... # This shows the individual and time dimensions pdim(pwlddev) # Balanced Panel: n = 216, T = 61, N = 13176 ``` A `plm::pdata.frame` is a data.frame with panel identifiers attached as a list of factors in an *index* attribute (non-factor index variables are converted to factor). Each column in that data.frame is a Panel Series (`plm::pseries`), which also has the panel identifiers attached: ```r # Panel Series of GDP per Capita and Life-Expectancy at Birth PCGDP <- pwlddev$PCGDP LIFEEX <- pwlddev$LIFEEX str(LIFEEX) # 'pseries' Named num [1:13176] 65.7 66.1 66.4 66.8 67.1 ... # - attr(*, "names")= chr [1:13176] "ABW-1960" "ABW-1961" "ABW-1962" "ABW-1963" ... # - attr(*, "index")=Classes 'pindex' and 'data.frame': 13176 obs. of 2 variables: # ..$ iso3c: Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ... # ..$ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... ``` Now that we have explored the basic data structures provided in the *plm* package, let's compute some transformations on them: ### 1.1 Between and Within Transformations The functions `fbetween` and `fbetween` can be used to compute efficient between and within transformations on panel vectors and panel data.frames: ```r # Between-Transformations head(fbetween(LIFEEX)) # Between individual (default) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 72.40653 72.40653 72.40653 72.40653 72.40653 72.40653 head(fbetween(LIFEEX, effect = "year")) # Between time # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 53.91206 54.47441 54.85718 55.20272 55.66802 56.12963 # Within-Transformations head(fwithin(LIFEEX)) # Within individuals (default) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # -6.744533 -6.332533 -5.962533 -5.619533 -5.293533 -4.971533 head(fwithin(LIFEEX, effect = "year")) # Within time # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 11.74994 11.59959 11.58682 11.58428 11.44498 11.30537 ``` by default `na.rm = TRUE` thus both functions skip (preserve) missing values in the data (which is the default for all *collapse* functions). For `fbetween` the output behavior can be altered with the option `fill`: Setting `fill = TRUE` will compute the group-means on the complete cases in each group (as long as `na.rm = TRUE`), but replace all values in each group with the group mean (hence overwriting or 'filling up' missing values): ```r # This preserves missing values in the output head(fbetween(PCGDP), 30) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 # NA NA NA NA 25413.84 25413.84 25413.84 25413.84 # This replaces all individuals with the group mean head(fbetween(PCGDP, fill = TRUE), 30) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 # 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 # ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 # 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 # ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 # 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 ``` In `fwithin` the `mean` argument allows to set an arbitrary data mean (different from 0) after the data is centered. In grouped centering task, as sensible choice for such an added mean would be the overall mean of the data series, enabled by the option `mean = "overall.mean"`. This will add the overall mean of the series back to the data after subtracting out group means, and thus preserve the level of the data (and will only change the intercept when employed in a regression): ```r # This performed standard grouped centering head(fwithin(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # -6.744533 -6.332533 -5.962533 -5.619533 -5.293533 -4.971533 # This adds the overall average Life-Expectancy (across countries) to the country-demeaned series head(fwithin(LIFEEX, mean = "overall.mean")) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 57.55177 57.96377 58.33377 58.67677 59.00277 59.32477 ``` `fbetween` and `fwithin` can also be applied to *pdata.frame*'s where they will perform these computations variable by variable: ```r head(fbetween(num_vars(pwlddev)), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1961 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1962 1985.574 NA 72.40653 NA NA 76268.63 head(fbetween(num_vars(pwlddev), fill = TRUE), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 1985.574 25413.84 72.40653 NA 33245000 76268.63 # ABW-1961 1985.574 25413.84 72.40653 NA 33245000 76268.63 # ABW-1962 1985.574 25413.84 72.40653 NA 33245000 76268.63 head(fwithin(num_vars(pwlddev)), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 -25.57377 NA -6.744533 NA NA -22057.63 # ABW-1961 -25.57377 NA -6.332533 NA NA -20830.63 # ABW-1962 -25.57377 NA -5.962533 NA NA -20043.63 head(fwithin(num_vars(pwlddev), mean = "overall.mean"), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 1960 NA 57.55177 NA NA 24223914 # ABW-1961 1960 NA 57.96377 NA NA 24225141 # ABW-1962 1960 NA 58.33377 NA NA 24225928 ``` Now next to `fbetween` and `fwithin` there also exist short versions `B` and `W`, which are referred to as *transformation operators*. These are essentially wrappers around `fbetween` and `fwithin` and provide the same functionality, but are more parsimonious to employ in regression formulas and also offer additional features when applied to panel data.frames. For panel series, `B` and `W` are exact analogues to `fbetween` and `fwithin`, just under a shorter name: ```r identical(fbetween(PCGDP), B(PCGDP)) # [1] TRUE identical(fbetween(PCGDP, fill = TRUE), B(PCGDP, fill = TRUE)) # [1] TRUE identical(fwithin(PCGDP), W(PCGDP)) # [1] TRUE identical(fwithin(PCGDP, mean = "overall.mean"), W(PCGDP, mean = "overall.mean")) # [1] TRUE ``` When applied to panel data.frames, `B` and `W` offer some additional utility by (a) allowing you to select columns to transform using the `cols` argument (default is `cols = is.numeric`, so by default all numeric columns will be selected for transformation), (b) allowing you to add a prefix to the transformed columns with the `stub` argument (default is `stub = "B."` for `B` and `stub = "W."` for `W`) and (c) preserving the panel-id's with the `keep.ids` argument (default `keep.ids = TRUE`): ```r head(B(pwlddev), 3) # iso3c year B.decade B.PCGDP B.LIFEEX B.GINI B.ODA B.POP # ABW-1960 ABW 1960 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1961 ABW 1961 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1962 ABW 1962 1985.574 NA 72.40653 NA NA 76268.63 head(W(pwlddev, cols = 9:12), 3) # Here using the cols argument # iso3c year W.PCGDP W.LIFEEX W.GINI W.ODA # ABW-1960 ABW 1960 NA -6.744533 NA NA # ABW-1961 ABW 1961 NA -6.332533 NA NA # ABW-1962 ABW 1962 NA -5.962533 NA NA ``` `fbetween` / `B` and `fwithin` / `W` also support weighted computations. This of course applies more to panel-survey settings, but for the sake of illustration suppose we wanted to weight our between and within transformations by the population of these countries: ```r # This replaces values by the POP-weighted group mean and also preserves the weight variable (POP, argument keep.w = TRUE) head(B(pwlddev, w = ~ POP), 3) # iso3c year POP B.decade B.PCGDP B.LIFEEX B.GINI B.ODA # ABW-1960 ABW 1960 54211 1988.976 NA 72.96257 NA NA # ABW-1961 ABW 1961 55438 1988.976 NA 72.96257 NA NA # ABW-1962 ABW 1962 56225 1988.976 NA 72.96257 NA NA # This centers values on the POP-weighted group mean head(W(pwlddev, w = ~ POP, cols = c("PCGDP","LIFEEX","GINI")), 3) # iso3c year POP W.PCGDP W.LIFEEX W.GINI # ABW-1960 ABW 1960 54211 NA -7.300566 NA # ABW-1961 ABW 1961 55438 NA -6.888566 NA # ABW-1962 ABW 1962 56225 NA -6.518566 NA # This centers values on the POP-weighted group mean and also adds the overall POP-weighted mean of the data head(W(pwlddev, w = ~ POP, cols = c("PCGDP","LIFEEX","GINI"), mean = "overall.mean"), 3) # iso3c year POP W.PCGDP W.LIFEEX W.GINI # ABW-1960 ABW 1960 54211 NA 58.58012 NA # ABW-1961 ABW 1961 55438 NA 58.99212 NA # ABW-1962 ABW 1962 56225 NA 59.36212 NA ``` As shown above, with `B` and `W` the weight column can also be passed as a formula or character string, whereas `fbetween` and `fwithin` require the all inputs to be passed directly in terms of data (i.e. `fbetween(get_vars(pwlddev, 9:11), w = pwlddev$POP)`), and the weight vector or id columns are never preserved in the output. Therefore in most applications `B` and `W` are probably more convenient for quick use, whereas `fbetween` and `fwithin` are the preferred programmers choice, also because they have a little less R-overhead which makes them a tiny bit faster. ### 1.2 Higher-Dimensional Between and Within Transformations Analogous to `fbetween` / `B` and `fwithin` / `W`, *collapse* provides a duo of functions and operators `fhdbetween` / `HDB` and `fhdwithin` / `HDW` to efficiently average and center data on multiple groups. The credit herefore goes to Laurent Berge, the author of the *fixest* package who wrote an efficient C-implementation of the alternating-projections algorithm to perform this task. `fhdbetween` / `HDB` and `fhdwithin` / `HDW` enrich this implementation (available in the function `fixest::demean`) by providing more options regarding missing values, and also allowing continuous covariates and (full) interactions to be projected out alongside factors. The methods for *pseries* and *pdata.frame*'s are however rather simple, as they simply simultaneously center panel-vectors on various panel-identifiers in the index (which can be more than 2, the default is to center on all identifiers): ```r # This simultaneously averages Life-Expectancy across countries and years head(HDB(LIFEEX)) # (same as running a regression on country and year dummies and taking the fitted values) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 62.36179 62.85981 63.24258 63.65245 64.11774 64.52503 # This simultaneously centers Life-Expectenacy on countries and years head(HDW(LIFEEX)) # (same as running a regression on country and year dummies and taking the residuals) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 3.300210 3.214193 3.201424 3.134554 2.995255 2.909975 ``` The architecture of `fhdbetween` / `HDB` and `fhdwithin` / `HDW` differs a bit from `fbetween` / `B` and `fwithin` / `W`. This is essentially a consequence of the underlying C++-implementation (accessed through `fixest::demean`), which was not built to accommodate missing values. `fhdbetween` / `HDB` and `fhdwithin` / `HDW` therefore both have an argument `fill = TRUE` (the default), which stipulates that missing values in the data are preserved in the output. The *collapse* default `na.rm = TRUE` again ensures that only complete cases are used for the computation: ```r # Missing values are preserved in the output when fill = TRUE (the default) head(HDB(PCGDP), 30) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 # NA NA NA NA 21833.32 22132.25 22479.20 22772.31 # When fill = FALSE, only the complete cases are returned nofill <- HDB(PCGDP, fill = FALSE) head(nofill, 30) # ABW-1986 ABW-1987 ABW-1988 ABW-1989 ABW-1990 ABW-1991 ABW-1992 ABW-1993 ABW-1994 ABW-1995 ABW-1996 # 21833.32 22132.25 22479.20 22772.31 23064.29 23060.00 23089.75 23115.36 23343.25 23595.16 23823.11 # ABW-1997 ABW-1998 ABW-1999 ABW-2000 ABW-2001 ABW-2002 ABW-2003 ABW-2004 ABW-2005 ABW-2006 ABW-2007 # 24149.44 24424.69 24727.46 25205.98 25399.16 25603.11 25851.29 26349.64 26665.54 27224.58 27772.82 # ABW-2008 ABW-2009 ABW-2010 ABW-2011 ABW-2012 ABW-2013 ABW-2014 ABW-2015 # 27769.52 27002.95 27218.84 27424.18 27471.49 27660.92 27889.34 28107.78 # This results in a shorter panel-vector length(nofill) # [1] 9470 length(PCGDP) # [1] 13176 # The cases that were missing and removed from the output are available as an attribute head(attr(nofill, "na.rm"), 30) # [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 59 60 61 62 ``` In the *pdata.frame* methods there are 3 different choices how to deal with missing values. The default for the *plm* classes in `variable.wise = TRUE`, which will essentially sequentially apply `fhdbetween.pseries` and `fhdwithin.pseries` (with the default `fill = TRUE`) to all columns. This is the same behavior as in `fbetween` / `B` and `fwithin` / `W`, which also consider the column-wise complete obs: ```r # This column-wise centers the data on countries and years tail(HDW(pwlddev), 10) # HDW.decade HDW.PCGDP HDW.LIFEEX HDW.GINI HDW.ODA HDW.POP # ZWE-2011 0 -4632.971 -8.080748 -3.663217 118306300 -4547122 # ZWE-2012 0 -4523.505 -6.271385 NA 385526419 -4749368 # ZWE-2013 0 -4710.576 -4.753056 NA 149910333 -4903132 # ZWE-2014 0 -4931.693 -3.568136 NA 93295114 -5059317 # ZWE-2015 0 -5148.895 -2.685053 NA 150833589 -5224484 # ZWE-2016 0 -5433.809 -2.203219 NA -27844184 -5404667 # ZWE-2017 0 -5645.022 -1.920365 -1.964138 10266318 -5591762 # ZWE-2018 0 -5938.794 -1.759333 NA 59646823 -5774326 # ZWE-2019 0 -5710.646 -1.669415 5.627356 223473855 -5946725 # ZWE-2020 0 NA NA NA NA NA ``` If `variable.wise = FALSE`, `fhdbetween` / `HDB` and `fhdwithin` / `HDW` will only consider the complete cases in the dataset, but still return a dataset of the same dimensions (as long as `fill = TRUE`), resulting in some rows all-missing: ```r # This centers the complete cases of the data data on countries and years and keeps missing cases tail(HDW(pwlddev, variable.wise = FALSE), 10) # HDW.decade HDW.PCGDP HDW.LIFEEX HDW.GINI HDW.ODA HDW.POP # ZWE-2011 0 517.6924 -4.379840 -3.839653 -176176494 -3042247 # ZWE-2012 NA NA NA NA NA NA # ZWE-2013 NA NA NA NA NA NA # ZWE-2014 NA NA NA NA NA NA # ZWE-2015 NA NA NA NA NA NA # ZWE-2016 NA NA NA NA NA NA # ZWE-2017 0 -128.5240 1.971143 -1.314869 -67497466 1936716 # ZWE-2018 NA NA NA NA NA NA # ZWE-2019 0 -389.1684 2.408697 5.154522 243673961 1105530 # ZWE-2020 NA NA NA NA NA NA ``` Finally, if also `fill = FALSE`, the behavior is the same as in the *pseries* method: Missing cases are removed from the data: ```r # This centers the complete cases of the data data on countries and years, and removes missing cases res <- HDW(pwlddev, fill = FALSE) tail(res, 10) # HDW.decade HDW.PCGDP HDW.LIFEEX HDW.GINI HDW.ODA HDW.POP # ZMB-1996 0 534.39373 -3.6445256 -4.744748 -174237036 4911230.7 # ZMB-1998 0 201.58094 -4.1708951 -5.085621 -492258601 644947.7 # ZMB-2002 0 250.78234 -2.9085522 -10.912265 81848768 -1027712.3 # ZMB-2004 0 -72.94954 -1.9629513 1.494340 396830282 -3774596.6 # ZMB-2006 0 -308.55937 -0.4975872 2.407226 485998870 -2255101.6 # ZMB-2010 0 -428.16949 3.9600416 4.497547 -148714637 -4174306.2 # ZMB-2015 0 -1106.52213 8.4099983 7.553052 -335529320 -4962997.8 # ZWE-2011 0 517.69244 -4.3798401 -3.839653 -176176494 -3042246.9 # ZWE-2017 0 -128.52402 1.9711431 -1.314869 -67497466 1936716.5 # ZWE-2019 0 -389.16842 2.4086971 5.154522 243673961 1105530.5 tail(attr(res, "na.rm")) # [1] 13169 13170 13171 13172 13174 13176 ``` *Notes: * (1) Because of the different missing case options and associated challenges, panel-identifiers are not preserved in `HDB` and `HDW`. (2) The default `variable.wise = TRUE` and `fill = TRUE` was only set for the *pseries* and *pdata.frame* methods, to harmonize the default implementations with `fbetween` / `B` and `fwithin` / `W` for these classes. In the standard *default*, *matrix* and *data.frame* methods, the defaults are `variable.wise = FALSE` and `fill = FALSE` (i.e. missing cases are removed beforehand), which is generally more efficient. ### 1.3 Scaling and Centering Next to the above functions for grouped centering and averaging, the function / operator pair `fscale` / `STD` can be used to efficiently standardize (i.e. scale and center) panel data along an arbitrary dimension. The architecture is identical to that of `fwithin` / `W` or `fbetween` / `B`. ```r # This standardizes GDP per capita in each country STD_PCGDP <- STD(PCGDP) # Checks: head(fmean(STD_PCGDP, index(STD_PCGDP, 1))) # ABW AFG AGO ALB AND ARE # -1.422473e-15 2.528841e-16 -6.189493e-16 -2.275957e-16 -9.281464e-16 -6.661338e-17 head(fsd(STD_PCGDP, index(STD_PCGDP, 1))) # ABW AFG AGO ALB AND ARE # 1 1 1 1 1 1 # This standardizes GDP per capita in each year STD_PCGDP_T <- STD(PCGDP, effect = "year") # Checks: head(fmean(STD_PCGDP_T, index(STD_PCGDP_T, 2))) # 1960 1961 1962 1963 1964 1965 # 9.882205e-17 3.496021e-16 1.889741e-17 -2.185013e-16 -1.724389e-16 2.616954e-16 head(fsd(STD_PCGDP_T, index(STD_PCGDP_T, 2))) # 1960 1961 1962 1963 1964 1965 # 1 1 1 1 1 1 ``` And similarly for *pdata.frame*'s: ```r head(STD(pwlddev, cols = 9:12)) # iso3c year STD.PCGDP STD.LIFEEX STD.GINI STD.ODA # ABW-1960 ABW 1960 NA -2.372636 NA NA # ABW-1961 ABW 1961 NA -2.227700 NA NA # ABW-1962 ABW 1962 NA -2.097539 NA NA # ABW-1963 ABW 1963 NA -1.976876 NA NA # ABW-1964 ABW 1964 NA -1.862193 NA NA # ABW-1965 ABW 1965 NA -1.748918 NA NA head(STD(pwlddev, cols = 9:12, effect = "year")) # iso3c year STD.PCGDP STD.LIFEEX STD.GINI STD.ODA # ABW-1960 ABW 1960 NA 0.9609854 NA NA # ABW-1961 ABW 1961 NA 0.9485730 NA NA # ABW-1962 ABW 1962 NA 0.9585105 NA NA # ABW-1963 ABW 1963 NA 0.9669638 NA NA # ABW-1964 ABW 1964 NA 0.9579477 NA NA # ABW-1965 ABW 1965 NA 0.9556529 NA NA ``` More customized scaling can be done with the help of the `mean` and `sd` arguments to `fscale` / `STD`. By default `mean = 0` and `sd = 1`, but these could be assigned any numeric values: ```r # This will scale the data such that mean mean within each country is 5 and the standard deviation is 3 qsu(fscale(pwlddev$PCGDP, mean = 5, sd = 3)) # N/T Mean SD Min Max # Overall 9466 5 2.968 -6.1908 16.257 # Between 202 5 0 5 5 # Within 46.8614 5 2.968 -6.1908 16.257 ``` Even further customization (i.e. setting means and standard deviations for each group and / or each column) can of course be achieved by calling `collapse::TRA` on the result of `fscale` to sweep out an appropriate set of means and standard deviations. Scaling without centering can be done with the option `mean = FALSE`. This will also preserve the mean of the data overall and within each group: ```r # Scaling without centering: Mean preserving with fscale / STD qsu(fscale(pwlddev$PCGDP, mean = FALSE, sd = 3)) # N/T Mean SD Min Max # Overall 9466 12031.4627 17803.3537 247.7598 131349.27 # Between 202 12169.2793 18055.6626 253.1886 131342.669 # Within 46.8614 12031.4627 2.968 12020.2718 12042.7196 # Scaling without centering can also be done using fsd, but this does not preserve the mean qsu(fsd(pwlddev$PCGDP, index(pwlddev, 1), TRA = "/")) # N/T Mean SD Min Max # Overall 9466 4.247 3.192 0.0579 26.647 # Between 202 4.6036 3.5846 0.8207 24.8111 # Within 46.8614 4.247 0.9893 0.5167 7.9993 ``` Finally a special kind of data harmonization in the first two moments can be done by setting `mean = "overall.mean"` and `sd = "within.sd"` in a grouped scaling task. This will harmonize the data across groups such that the mean of each group is equal to the overall data mean and the standard deviation equal to the within standard deviation (= the standard deviation calculated on the group-centered series): ```r fmean(pwlddev$PCGDP) # Overall mean # [1] 12048.78 fsd(W(pwlddev$PCGDP)) # Within sd # [1] 6723.681 # Scaling and centerin such that the mean of each country is the overall mean, and the sd of each country is the within sd qsu(fscale(pwlddev$PCGDP, mean = "overall.mean", sd = "within.sd")) # N/T Mean SD Min Max # Overall 9466 12048.778 6651.9052 -13032.4333 37278.2175 # Between 202 12048.778 0 12048.778 12048.778 # Within 46.8614 12048.778 6651.9052 -13032.4333 37278.2175 ``` All of this seamlessly generalizes to weighted scaling an centering, using the `w` argument to add a weight vector. ### 1.4 Panel Lags / Leads, Differences and Growth Rates With `flag` / `L` / `F`, `fdiff` / `D` and `fgrowth` / `G`, *collapse* provides a fast and comprehensive C++ based solution to the computation of (sequences of) lags / leads and (sequences of) lagged / leaded and suitably iterated (quasi-, log-) differences and growth rates on panel data. The *pseries* and *pdata.frame* methods to these functions and associated *transformation operators* use the panel-identifiers in the 'index' attached to these objects (where the last variable in the 'index' is taken as the time-variable and the variables before that are taken as individual identifiers) to perform fast fully-identified time-dependent operations on panel data, without the need of sorting the data. With `flag` / `L` / `F`, it is easy to lag or lead *pseries*: ```r # A panel-lag head(flag(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 65.662 66.074 66.444 66.787 67.113 # A panel-lead head(flag(LIFEEX, -1)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 66.074 66.444 66.787 67.113 67.435 67.762 # The lag and lead operators are even more parsimonious to employ: all_identical(L(LIFEEX), flag(LIFEEX), lag(LIFEEX)) # [1] TRUE all_identical(F(LIFEEX), flag(LIFEEX, -1), lead(LIFEEX)) # [1] TRUE ``` It is also possible to compute a sequence of lags / leads using `flag` or one of the operators: ```r # sequence of panel- lags and leads head(flag(LIFEEX, -1:3)) # F1 -- L1 L2 L3 # ABW-1960 66.074 65.662 NA NA NA # ABW-1961 66.444 66.074 65.662 NA NA # ABW-1962 66.787 66.444 66.074 65.662 NA # ABW-1963 67.113 66.787 66.444 66.074 65.662 # ABW-1964 67.435 67.113 66.787 66.444 66.074 # ABW-1965 67.762 67.435 67.113 66.787 66.444 all_identical(L(LIFEEX, -1:3), F(LIFEEX, 1:-3), flag(LIFEEX, -1:3)) # [1] TRUE # The native plm implementation also returns a matrix of lags but with different column names head(lag(LIFEEX, -1:3), 4) # -1 0 1 2 3 # ABW-1960 66.074 65.662 NA NA NA # ABW-1961 66.444 66.074 65.662 NA NA # ABW-1962 66.787 66.444 66.074 65.662 NA # ABW-1963 67.113 66.787 66.444 66.074 65.662 ``` Of course the lag orders may be unevenly spaced, i.e. `L(x, -1:3*12)` would compute seasonal lags on monthly data. On *pdata.frame*'s, the effects of `flag` and `L` / `F` differ insofar that `flag` will just lag the entire dataset without preserving identifiers (although the index attribute is always preserved), whereas `L` / `F` by default (`cols = is.numeric`) select the numeric variables and add the panel-id's on the left (default `keep.ids = TRUE`): ```r # This lags the entire data head(flag(pwlddev)) # country iso3c date year decade region income OECD PCGDP # ABW-1960 NA NA NA # ABW-1961 Aruba ABW 1961-01-01 1960 1960 Latin America & Caribbean High income FALSE NA # ABW-1962 Aruba ABW 1962-01-01 1961 1960 Latin America & Caribbean High income FALSE NA # ABW-1963 Aruba ABW 1963-01-01 1962 1960 Latin America & Caribbean High income FALSE NA # ABW-1964 Aruba ABW 1964-01-01 1963 1960 Latin America & Caribbean High income FALSE NA # ABW-1965 Aruba ABW 1965-01-01 1964 1960 Latin America & Caribbean High income FALSE NA # LIFEEX GINI ODA POP # ABW-1960 NA NA NA NA # ABW-1961 65.662 NA NA 54211 # ABW-1962 66.074 NA NA 55438 # ABW-1963 66.444 NA NA 56225 # ABW-1964 66.787 NA NA 56695 # ABW-1965 67.113 NA NA 57032 # This lags only numeric columns and preserves panel-id's head(L(pwlddev)) # iso3c year L1.decade L1.PCGDP L1.LIFEEX L1.GINI L1.ODA L1.POP # ABW-1960 ABW 1960 NA NA NA NA NA NA # ABW-1961 ABW 1961 1960 NA 65.662 NA NA 54211 # ABW-1962 ABW 1962 1960 NA 66.074 NA NA 55438 # ABW-1963 ABW 1963 1960 NA 66.444 NA NA 56225 # ABW-1964 ABW 1964 1960 NA 66.787 NA NA 56695 # ABW-1965 ABW 1965 1960 NA 67.113 NA NA 57032 # This lags only columns 9 through 12 and preserves panel-id's head(L(pwlddev, cols = 9:12)) # iso3c year L1.PCGDP L1.LIFEEX L1.GINI L1.ODA # ABW-1960 ABW 1960 NA NA NA NA # ABW-1961 ABW 1961 NA 65.662 NA NA # ABW-1962 ABW 1962 NA 66.074 NA NA # ABW-1963 ABW 1963 NA 66.444 NA NA # ABW-1964 ABW 1964 NA 66.787 NA NA # ABW-1965 ABW 1965 NA 67.113 NA NA ``` We can also easily compute a sequence of lags / leads on a panel data.frame: ```r # This lags only columns 9 through 12 and preserves panel-id's head(L(pwlddev, -1:3, cols = 9:12)) # iso3c year F1.PCGDP PCGDP L1.PCGDP L2.PCGDP L3.PCGDP F1.LIFEEX LIFEEX L1.LIFEEX L2.LIFEEX # ABW-1960 ABW 1960 NA NA NA NA NA 66.074 65.662 NA NA # ABW-1961 ABW 1961 NA NA NA NA NA 66.444 66.074 65.662 NA # ABW-1962 ABW 1962 NA NA NA NA NA 66.787 66.444 66.074 65.662 # ABW-1963 ABW 1963 NA NA NA NA NA 67.113 66.787 66.444 66.074 # ABW-1964 ABW 1964 NA NA NA NA NA 67.435 67.113 66.787 66.444 # ABW-1965 ABW 1965 NA NA NA NA NA 67.762 67.435 67.113 66.787 # L3.LIFEEX F1.GINI GINI L1.GINI L2.GINI L3.GINI F1.ODA ODA L1.ODA L2.ODA L3.ODA # ABW-1960 NA NA NA NA NA NA NA NA NA NA NA # ABW-1961 NA NA NA NA NA NA NA NA NA NA NA # ABW-1962 NA NA NA NA NA NA NA NA NA NA NA # ABW-1963 65.662 NA NA NA NA NA NA NA NA NA NA # ABW-1964 66.074 NA NA NA NA NA NA NA NA NA NA # ABW-1965 66.444 NA NA NA NA NA NA NA NA NA NA ``` Essentially the same functionality applies to `fdiff` / `D` and `fgrowth` / `G`, with the main differences that these functions also have a `diff` argument to determine the number of iterations: ```r # Panel-difference of Life Expectancy head(fdiff(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.412 0.370 0.343 0.326 0.322 # Second panel-difference head(fdiff(LIFEEX, diff = 2)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA NA -0.042 -0.027 -0.017 -0.004 # Panel-growth rate of Life Expectancy head(fgrowth(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.6274558 0.5599782 0.5162242 0.4881189 0.4797878 # Growth rate of growth rate of Life Expectancy head(fgrowth(LIFEEX, diff = 2)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA NA -10.754153 -7.813521 -5.444387 -1.706782 identical(D(LIFEEX), fdiff(LIFEEX)) # [1] TRUE identical(G(LIFEEX), fgrowth(LIFEEX)) # [1] TRUE identical(fdiff(LIFEEX), diff(LIFEEX)) # Same as plm::diff.pseries (which does not compute iterated panel-differences) # [1] TRUE ``` By default, growth rates are calculated in percentage terms which is set by the default argument `scale = 100`. It is also possible to compute log-differences with `fdiff(.., log = TRUE)` or the `Dlog` operator, and growth rates in percentage terms based on log-differences using `fgrowth(.., logdiff = TRUE)`. ```r # Panel log-difference of Life Expectancy head(Dlog(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.006254955 0.005584162 0.005148963 0.004869315 0.004786405 # Panel log-difference growth rate (in percentage terms) of Life Expectancy head(G(LIFEEX, logdiff = TRUE)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.6254955 0.5584162 0.5148963 0.4869315 0.4786405 ``` It is also possible to compute sequences of lagged / leaded and iterated differences, log-differences and growth rates: ```r # first and second forward-difference and first and second difference of lags 1-3 of Life-Expectancy head(D(LIFEEX, -1:3, 1:2)) # FD1 FD2 -- D1 D2 L2D1 L2D2 L3D1 L3D2 # ABW-1960 -0.412 -0.042 65.662 NA NA NA NA NA NA # ABW-1961 -0.370 -0.027 66.074 0.412 NA NA NA NA NA # ABW-1962 -0.343 -0.017 66.444 0.370 -0.042 0.782 NA NA NA # ABW-1963 -0.326 -0.004 66.787 0.343 -0.027 0.713 NA 1.125 NA # ABW-1964 -0.322 0.005 67.113 0.326 -0.017 0.669 -0.113 1.039 NA # ABW-1965 -0.327 0.006 67.435 0.322 -0.004 0.648 -0.065 0.991 NA # Same with Log-differences head(Dlog(LIFEEX, -1:3, 1:2)) # FDlog1 FDlog2 -- Dlog1 Dlog2 L2Dlog1 L2Dlog2 # ABW-1960 -0.006254955 -6.707929e-04 4.184520 NA NA NA NA # ABW-1961 -0.005584162 -4.351984e-04 4.190775 0.006254955 NA NA NA # ABW-1962 -0.005148963 -2.796481e-04 4.196359 0.005584162 -0.0006707929 0.01183912 NA # ABW-1963 -0.004869315 -8.291000e-05 4.201508 0.005148963 -0.0004351984 0.01073312 NA # ABW-1964 -0.004786405 5.098981e-05 4.206378 0.004869315 -0.0002796481 0.01001828 -0.001820838 # ABW-1965 -0.004837395 6.482830e-05 4.211164 0.004786405 -0.0000829100 0.00965572 -0.001077405 # L3Dlog1 L3Dlog2 # ABW-1960 NA NA # ABW-1961 NA NA # ABW-1962 NA NA # ABW-1963 0.01698808 NA # ABW-1964 0.01560244 NA # ABW-1965 0.01480468 NA # Same with (exact) growth rates head(G(LIFEEX, -1:3, 1:2)) # FG1 FG2 -- G1 G2 L2G1 L2G2 L3G1 L3G2 # ABW-1960 -0.6235433 11.974895 65.662 NA NA NA NA NA NA # ABW-1961 -0.5568599 8.428580 66.074 0.6274558 NA NA NA NA NA # ABW-1962 -0.5135730 5.728297 66.444 0.5599782 -10.754153 1.1909476 NA NA NA # ABW-1963 -0.4857479 1.727984 66.787 0.5162242 -7.813521 1.0790931 NA 1.713320 NA # ABW-1964 -0.4774968 -1.051555 67.113 0.4881189 -5.444387 1.0068629 -15.45699 1.572479 NA # ABW-1965 -0.4825714 -1.319230 67.435 0.4797878 -1.706782 0.9702487 -10.08666 1.491482 NA ``` A further possibility is to compute quasi-differences and quasi-log-differences of the form $x_t - \rho x_{t-s}$ or $log(x_t) - \rho log(x_{t-s})$. These are useful for panel-regressions suffering from serial-correlation, following Cochrane & Orcutt (1949), and can be specified with the `rho` argument to `fdiff`, `D` and `Dlog`. ```r # Regression of GDP on Life Expectance with country and time FE mod <- lm(PCGDP ~ LIFEEX, data = fhdwithin(fselect(pwlddev, PCGDP, LIFEEX), fill = FALSE)) mod # # Call: # lm(formula = PCGDP ~ LIFEEX, data = fhdwithin(fselect(pwlddev, # PCGDP, LIFEEX), fill = FALSE)) # # Coefficients: # (Intercept) LIFEEX # -2.442e-12 -3.330e+02 # Computing autocorrelation of residuals r <- residuals(mod) r <- pwcor(r, L(r, 1, substr(names(r), 1, 3))) # Need this to compute a panel-lag r # [1] .98 # Running the regression again quasi-differencing the transformed data modCO <- lm(PCGDP ~ LIFEEX, data = fdiff(fhdwithin(fselect(pwlddev, PCGDP, LIFEEX), variable.wise = FALSE), rho = r, stubs = FALSE)) modCO # # Call: # lm(formula = PCGDP ~ LIFEEX, data = fdiff(fhdwithin(fselect(pwlddev, # PCGDP, LIFEEX), variable.wise = FALSE), rho = r, stubs = FALSE)) # # Coefficients: # (Intercept) LIFEEX # -12.93 -91.97 # In this case rho is almost 1, so we might as well just difference the untransformed data and go with that # We also need to bootstrap this for proper standard errors. ``` A final important advantage of the *collapse* functions is that the panel-identifiers are preserved, even if a matrix of lags / leads / differences or growth rates is returned. This allows for nested panel-computations, for example we can compute shifted sequences of lagged / leaded and iterated panel differences: ```r # Sequence of differneces (same as above), adding one extra lag of the whole sequence head(L(D(LIFEEX, -1:3, 1:2), 0:1)) # FD1 L1.FD1 FD2 L1.FD2 -- L1.-- D1 L1.D1 D2 L1.D2 L2D1 L1.L2D1 L2D2 # ABW-1960 -0.412 NA -0.042 NA 65.662 NA NA NA NA NA NA NA NA # ABW-1961 -0.370 -0.412 -0.027 -0.042 66.074 65.662 0.412 NA NA NA NA NA NA # ABW-1962 -0.343 -0.370 -0.017 -0.027 66.444 66.074 0.370 0.412 -0.042 NA 0.782 NA NA # ABW-1963 -0.326 -0.343 -0.004 -0.017 66.787 66.444 0.343 0.370 -0.027 -0.042 0.713 0.782 NA # ABW-1964 -0.322 -0.326 0.005 -0.004 67.113 66.787 0.326 0.343 -0.017 -0.027 0.669 0.713 -0.113 # ABW-1965 -0.327 -0.322 0.006 0.005 67.435 67.113 0.322 0.326 -0.004 -0.017 0.648 0.669 -0.065 # L1.L2D2 L3D1 L1.L3D1 L3D2 L1.L3D2 # ABW-1960 NA NA NA NA NA # ABW-1961 NA NA NA NA NA # ABW-1962 NA NA NA NA NA # ABW-1963 NA 1.125 NA NA NA # ABW-1964 NA 1.039 1.125 NA NA # ABW-1965 -0.113 0.991 1.039 NA NA ``` All of this naturally generalized to computations on *pdata.frames*: ```r head(D(pwlddev, -1:3, 1:2, cols = 9:10), 3) # iso3c year FD1.PCGDP FD2.PCGDP PCGDP D1.PCGDP D2.PCGDP L2D1.PCGDP L2D2.PCGDP L3D1.PCGDP # ABW-1960 ABW 1960 NA NA NA NA NA NA NA NA # ABW-1961 ABW 1961 NA NA NA NA NA NA NA NA # ABW-1962 ABW 1962 NA NA NA NA NA NA NA NA # L3D2.PCGDP FD1.LIFEEX FD2.LIFEEX LIFEEX D1.LIFEEX D2.LIFEEX L2D1.LIFEEX L2D2.LIFEEX # ABW-1960 NA -0.412 -0.042 65.662 NA NA NA NA # ABW-1961 NA -0.370 -0.027 66.074 0.412 NA NA NA # ABW-1962 NA -0.343 -0.017 66.444 0.370 -0.042 0.782 NA # L3D1.LIFEEX L3D2.LIFEEX # ABW-1960 NA NA # ABW-1961 NA NA # ABW-1962 NA NA head(L(D(pwlddev, -1:3, 1:2, cols = 9:10), 0:1), 3) # iso3c year FD1.PCGDP L1.FD1.PCGDP FD2.PCGDP L1.FD2.PCGDP PCGDP L1.PCGDP D1.PCGDP # ABW-1960 ABW 1960 NA NA NA NA NA NA NA # ABW-1961 ABW 1961 NA NA NA NA NA NA NA # ABW-1962 ABW 1962 NA NA NA NA NA NA NA # L1.D1.PCGDP D2.PCGDP L1.D2.PCGDP L2D1.PCGDP L1.L2D1.PCGDP L2D2.PCGDP L1.L2D2.PCGDP # ABW-1960 NA NA NA NA NA NA NA # ABW-1961 NA NA NA NA NA NA NA # ABW-1962 NA NA NA NA NA NA NA # L3D1.PCGDP L1.L3D1.PCGDP L3D2.PCGDP L1.L3D2.PCGDP FD1.LIFEEX L1.FD1.LIFEEX FD2.LIFEEX # ABW-1960 NA NA NA NA -0.412 NA -0.042 # ABW-1961 NA NA NA NA -0.370 -0.412 -0.027 # ABW-1962 NA NA NA NA -0.343 -0.370 -0.017 # L1.FD2.LIFEEX LIFEEX L1.LIFEEX D1.LIFEEX L1.D1.LIFEEX D2.LIFEEX L1.D2.LIFEEX L2D1.LIFEEX # ABW-1960 NA 65.662 NA NA NA NA NA NA # ABW-1961 -0.042 66.074 65.662 0.412 NA NA NA NA # ABW-1962 -0.027 66.444 66.074 0.370 0.412 -0.042 NA 0.782 # L1.L2D1.LIFEEX L2D2.LIFEEX L1.L2D2.LIFEEX L3D1.LIFEEX L1.L3D1.LIFEEX L3D2.LIFEEX # ABW-1960 NA NA NA NA NA NA # ABW-1961 NA NA NA NA NA NA # ABW-1962 NA NA NA NA NA NA # L1.L3D2.LIFEEX # ABW-1960 NA # ABW-1961 NA # ABW-1962 NA ``` ### 1.5 Panel Data to Array Conversions Viewing and transforming panel data stored in an array can be a powerful strategy, especially as it provides much more direct access to the different dimensions of the data. The function `psmat` can be used to efficiently transform *pseries* to a 2D matrix, and *pdata.frame*'s to a 3D array: ```r # Converting the panel series to array, individual rows (default) str(psmat(LIFEEX)) # 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # - attr(*, "transpose")= logi FALSE # Converting the panel series to array, individual columns str(psmat(LIFEEX, transpose = TRUE)) # 'psmat' num [1:61, 1:216] 65.7 66.1 66.4 66.8 67.1 ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # - attr(*, "transpose")= logi TRUE # Same as plm::as.matrix.pseries, apart from attributes identical(unattrib(psmat(LIFEEX)), unattrib(as.matrix(LIFEEX))) # [1] TRUE identical(unattrib(psmat(LIFEEX, transpose = TRUE)), unattrib(as.matrix(LIFEEX, idbyrow = FALSE))) # [1] TRUE ``` Applying `psmat` to a *pdata.frame* yields a 3D array: ```r psar <- psmat(pwlddev, cols = 9:12) str(psar) # 'psmat' num [1:216, 1:61, 1:4] NA NA NA NA NA ... # - attr(*, "dimnames")=List of 3 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA" # - attr(*, "transpose")= logi FALSE str(psmat(pwlddev, cols = 9:12, transpose = TRUE)) # 'psmat' num [1:61, 1:216, 1:4] NA NA NA NA NA NA NA NA NA NA ... # - attr(*, "dimnames")=List of 3 # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA" # - attr(*, "transpose")= logi TRUE ``` This format can be very convenient to quickly and freely access data for different countries, variables and time-periods: ```r # Looking at wealth, health and inequality in Brazil and Argentinia, 1990-1999 aperm(psar[c("BRA","ARG"), as.character(1990:1999), c("PCGDP", "LIFEEX", "GINI")]) # , , BRA # # 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 # PCGDP 7983.7 7963.1 7791.8 8020.6 8311.6 8540.1 8591.0 8744.8 8641.3 8554.1 # LIFEEX 66.3 66.7 67.1 67.5 67.9 68.3 68.7 69.1 69.4 69.8 # GINI 60.5 NA 53.2 60.1 NA 59.6 59.9 59.8 59.6 59.0 # # , , ARG # # 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 # PCGDP 6245.7 6721.3 7157.3 7644.2 7988.6 7666.5 7994.2 8543.0 8772.1 8381.3 # LIFEEX 71.6 71.8 72.0 72.2 72.5 72.7 72.8 73.0 73.2 73.4 # GINI NA 46.8 45.5 44.9 45.9 48.9 49.5 49.1 50.7 49.8 ``` `psmat` can also return the output as a list of panel series matrices: ```r pslist <- psmat(pwlddev, cols = 9:12, array = FALSE) str(pslist) # List of 4 # $ PCGDP : 'psmat' num [1:216, 1:61] NA NA NA NA NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE # $ LIFEEX: 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE # $ GINI : 'psmat' num [1:216, 1:61] NA NA NA NA NA NA NA NA NA NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE # $ ODA : 'psmat' num [1:216, 1:61] NA 116769997 -390000 NA NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE ``` This list can then be unlisted using the function `unlist2d` (for unlisting in 2-dimensions), to yield a reshaped data.frame: ```r head(unlist2d(pslist, idcols = "Variable", row.names = "Country Code"), 3) # Variable Country Code 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 # 1 PCGDP ABW NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 2 PCGDP AFG NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 3 PCGDP AGO NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 # 1 NA NA NA NA NA NA NA NA NA NA NA 15669.616 # 2 NA NA NA NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA 3193.404 2947.194 2844.322 2859.919 2925.367 2922.217 2902.618 # 1987 1988 1989 1990 1991 1992 1993 1994 1995 # 1 18427.612 22134.017 24837.951 25357.787 26329.313 26401.969 26663.208 27272.310 26705.18 # 2 NA NA NA NA NA NA NA NA NA # 3 2916.794 2989.617 2889.886 2697.491 2635.156 2401.234 1767.025 1733.844 1930.80 # 1996 1997 1998 1999 2000 2001 2002 2003 2004 # 1 26087.776 27190.501 27151.92 26954.40 28417.384 26966.055 25508.3027 25469.2876 27005.5294 # 2 NA NA NA NA NA NA 330.3036 343.0809 333.2167 # 3 2122.968 2205.294 2235.39 2211.13 2205.205 2223.335 2444.4178 2433.8616 2608.7840 # 2005 2006 2007 2008 2009 2010 2011 2012 2013 # 1 26979.8854 27046.2242 27427.579 27365.9312 24463.6922 23512.603 24233.0011 23781.2573 24635.7649 # 2 357.2347 365.2845 405.549 412.0143 488.3003 543.303 528.7366 576.1901 587.5651 # 3 2896.5547 3116.1810 3424.372 3668.0799 3565.0569 3587.884 3579.9599 3748.4507 3796.8822 # 2014 2015 2016 2017 2018 2019 2020 # 1 24563.2343 25822.2514 26231.0267 26630.2053 NA NA NA # 2 583.6562 574.1841 571.0738 571.4407 564.610 573.2876 NA # 3 3843.1979 3748.3201 3530.3107 3409.9303 3233.906 3111.1577 NA ``` Of course we could also have applied some transformation (like computing pairwise correlations) to each matrix before unlisting. In any case this kind of programming provides lots of possibilities to explore and manipulate panel data (as we will see in Part 2). ### Benchmarks Below benchmarks are provided of the *collapse* implementation against native *plm*. To do this the dataset used so far is extended to have approx 1 million observations: ```r wlddevsmall <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) wlddevsmall$iso3c <- as.character(wlddevsmall$iso3c) data <- replicate(100, wlddevsmall, simplify = FALSE) rm(wlddevsmall) uniquify <- function(x, i) { x$iso3c <- paste0(x$iso3c, i) x } data <- unlist2d(Map(uniquify, data, as.list(1:100)), idcols = FALSE) data <- pdata.frame(data, index = c("iso3c", "year")) pdim(data) # Balanced Panel: n = 21600, T = 61, N = 1317600 ``` The data has 21600 individuals (countries) observed for up to 61 years (1960-2020), the total number of rows is 1317600. We can pull out a series of life expectancy and run some benchmarks. The Windows laptop on which these benchmarks were run has a 2x 2.2 GHZ Intel i5 processor, 8GB DDR3 RAM and a Samsung SSD hard drive. ```r library(microbenchmark) # Creating the extended panel series for Life Expectancy (l for large) LIFEEX_l <- data$LIFEEX str(LIFEEX_l) # 'pseries' Named num [1:1317600] 65.7 66.1 66.4 66.8 67.1 ... # - attr(*, "names")= chr [1:1317600] "ABW1-1960" "ABW1-1961" "ABW1-1962" "ABW1-1963" ... # - attr(*, "index")=Classes 'pindex' and 'data.frame': 1317600 obs. of 2 variables: # ..$ iso3c: Factor w/ 21600 levels "ABW1","ABW10",..: 1 1 1 1 1 1 1 1 1 1 ... # ..$ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... # Between Transformations microbenchmark(Between(LIFEEX_l, na.rm = TRUE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # Between(LIFEEX_l, na.rm = TRUE) 17.73594 18.71248 21.84342 20.13574 22.35853 37.94689 10 microbenchmark(fbetween(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fbetween(LIFEEX_l) 4.408771 4.639519 4.705529 4.718424 4.771498 4.908684 10 # Within Transformations microbenchmark(Within(LIFEEX_l, na.rm = TRUE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # Within(LIFEEX_l, na.rm = TRUE) 10.17887 10.74663 10.91092 10.8766 11.24224 11.37664 10 microbenchmark(fwithin(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(LIFEEX_l) 4.522218 4.550303 4.735344 4.644296 4.696017 5.297036 10 # Higher-Dimenional Between and Within Transformations microbenchmark(fhdbetween(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fhdbetween(LIFEEX_l) 56.916 57.29971 66.0179 58.13864 76.50108 84.10625 10 microbenchmark(fhdwithin(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fhdwithin(LIFEEX_l) 55.55906 56.2372 62.31852 56.56555 75.78784 77.20657 10 # Single Lag microbenchmark(lag(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # lag(LIFEEX_l) 7.967776 8.144896 8.542879 8.632468 8.840092 8.949357 10 microbenchmark(flag(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(LIFEEX_l) 7.994057 8.038747 8.337862 8.180484 8.603481 9.12086 10 # Sequence of Lags / Leads microbenchmark(lag(LIFEEX_l, -1:3), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # lag(LIFEEX_l, -1:3) 18.7525 19.29476 28.61876 27.95813 38.11081 39.5329 10 microbenchmark(flag(LIFEEX_l, -1:3), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(LIFEEX_l, -1:3) 15.5415 15.64335 21.10042 15.83998 33.37699 34.10265 10 # Single difference microbenchmark(diff(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # diff(LIFEEX_l) 8.00525 8.16884 8.370421 8.368776 8.554404 8.733697 10 microbenchmark(fdiff(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l) 7.937805 8.020502 8.3458 8.2451 8.426238 9.34923 10 # Iterated Difference microbenchmark(fdiff(LIFEEX_l, diff = 2), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l, diff = 2) 10.20129 10.62786 10.72184 10.77488 10.82326 11.21805 10 # Sequence of Lagged / Leaded and iterated differences microbenchmark(fdiff(LIFEEX_l, -1:3, 1:2), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l, -1:3, 1:2) 45.90159 52.22494 66.83236 53.21347 57.53222 187.8582 10 # Single Growth Rate microbenchmark(fgrowth(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fgrowth(LIFEEX_l) 8.222304 8.357153 8.69059 8.727158 8.884167 9.436683 10 # Single Log-Difference microbenchmark(fdiff(LIFEEX_l, log = TRUE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l, log = TRUE) 12.41394 12.8583 15.06961 13.17156 13.61659 32.51989 10 # Panel Series to Matrix Conversion # system.time(as.matrix(LIFEEX_l)) This takes about 3 minutes to compute microbenchmark(psmat(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # psmat(LIFEEX_l) 1.482478 1.500149 1.628028 1.520813 1.553941 2.438639 10 ``` This shows a comparison between flag and *data.table*'s shift: ```r microbenchmark(L(data, cols = 3:6), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # L(data, cols = 3:6) 14.13692 14.43877 20.88276 18.865 19.73141 37.06244 10 library(data.table) setDT(data) # 'Improper' panel-lag microbenchmark(data[, shift(.SD), by = iso3c, .SDcols = 3:6], times = 10) # Unit: milliseconds # expr min lq mean median uq max # data[, shift(.SD), by = iso3c, .SDcols = 3:6] 176.5308 199.9415 215.6897 204.0719 230.089 268.9992 # neval # 10 # This does what L is actually doing (without sorting the data) microbenchmark(data[order(year), shift(.SD), by = iso3c, .SDcols = 3:6], times = 10) # Unit: milliseconds # expr min lq mean median # data[order(year), shift(.SD), by = iso3c, .SDcols = 3:6] 193.9684 210.7025 213.7664 213.0727 # uq max neval # 221.9783 226.3685 10 ``` The above dataset has 1 million obs in 20 thousand groups, but what about 10 million obs and 1 million groups? Do *collapse* functions scale efficiently as data and the number of groups grows large? Here is a simple benchmark: ```r x <- rnorm(1e7) # 10 million obs g <- qF(rep(1:1e6, each = 10), na.exclude = FALSE) # 1 million individuals t <- qF(rep(1:10, 1e6), na.exclude = FALSE) # 10 time-periods per individual microbenchmark(fbetween(x, g), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fbetween(x, g) 51.66189 53.60693 91.00168 62.54655 73.87835 233.3696 10 microbenchmark(fwithin(x, g), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(x, g) 43.46291 44.03954 77.0216 45.33919 58.65132 196.7248 10 microbenchmark(flag(x, 1, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(x, 1, g, t) 42.65382 55.05332 87.72527 59.55935 80.86143 210.8074 10 microbenchmark(flag(x, -1:1, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(x, -1:1, g, t) 92.19842 92.5559 162.8994 166.736 228.6354 239.6953 10 microbenchmark(fdiff(x, 1, 1, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(x, 1, 1, g, t) 42.51778 46.29306 82.27838 53.85735 67.54295 205.0114 10 microbenchmark(fdiff(x, 1, 2, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(x, 1, 2, g, t) 59.9363 62.11689 84.42818 69.85072 75.38506 217.1431 10 microbenchmark(fdiff(x, -1:1, 1:2, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(x, -1:1, 1:2, g, t) 163.5046 182.9127 246.2855 250.664 301.4046 339.1415 10 ``` The results show that *collapse* functions perform very well even as the number of groups grows large. The conclusion of this benchmark analysis is that *collapse*'s fast functions, with or without the help of *plm* classes, allow for very fast transformations of panel data, and should enable R programmers and econometricians to implement high-performance panel data estimators without having to dive into C/C++ themselves or resorting to *data.table* metaprogramming. ## Part 2: Fast Exploration of Panel Data *collapse* also provides some essential functions to summarize and explore panel data, such as a fast check of variation over different dimensions, fast summary-statistics for panel data, panel-auto, partial-auto and cross-correlation functions, and a fast F-test to test fixed effects and other exclusion restrictions on (large) panel data models. Panel data to matrix conversion further allows the application of some correlational and unsupervised learning tools such as PCA, clustering or dynamic factor analysis. ### 2.1 Variation Check for Panel Data The function `varying` can be used to check over which panel-dimensions different variable have variation. When passed a *pdata.frame*, `varying` by default takes the first identifier and checks for variation *within* that dimension. ```r # This checks for any variation within "iso3c", the first index variable: TRUE means data vary within country i.e. over time. varying(pwlddev) # country date year decade region income OECD PCGDP LIFEEX GINI ODA POP # FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE ``` Alternatively any index variable or combination of index variables can be specified: ```r # This checks any variation within time variable, i.e. cross-sectional variation. varying(pwlddev, effect = "year") # country iso3c date decade region income OECD PCGDP LIFEEX GINI ODA POP # TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE ``` Another possibility is checking for variation within each group: ```r # This checks cross-sectional variation within each year for 4 indicators. head(varying(pwlddev, effect = "year", cols = 9:12, any_group = FALSE)) # PCGDP LIFEEX GINI ODA # 1960 TRUE TRUE NA TRUE # 1961 TRUE TRUE NA TRUE # 1962 TRUE TRUE NA TRUE # 1963 TRUE TRUE NA TRUE # 1964 TRUE TRUE NA TRUE # 1965 TRUE TRUE NA TRUE ``` `varying` also has a pseries method. The code below checks for time-variation of the GINI index within each country. A `NA` is returned when there are no observations within a particular country. ```r head(varying(pwlddev$GINI, any_group = FALSE), 20) # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR # NA NA TRUE TRUE NA TRUE TRUE TRUE NA NA TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA ``` If we would like to gave more information about this variation, we could also invoke the functions `fndistinct` and `fsd`, which do not have *pseries* methods: ```r head(fndistinct(pwlddev$GINI, index(pwlddev, "iso3c")), 20) # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR # 0 0 3 9 0 2 29 20 0 0 9 16 5 4 16 3 5 9 12 0 head(round(fsd(pwlddev$GINI, index(pwlddev, "iso3c")), 2), 20) # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR # NA NA 5.18 2.47 NA 4.60 3.84 2.76 NA NA 1.19 1.76 4.85 4.37 1.71 4.60 5.98 3.02 2.58 NA ``` ### 2.2 Summary Statistics for Panel Data Efficient summary statistics for panel data have long been implemented in other statistical softwares. The command `qsu`, shorthand for 'quick-summary', is a very efficient summary statistics command inspired by the *xtsummarize* command in the Stata statistical software. It computes a default set of 5 statistics (N, mean, sd, min and max) and can also computed higher moments (skewness and kurtosis) in a single pass through the data (using a numerically stable online algorithm generalized from Welford's Algorithm for variance computations). With panel data, `qsu` computes these statistics not just on the raw data, but also on the between-transformed and within-transformed data: ```r qsu(pwlddev, cols = 9:12, higher = TRUE) # , , PCGDP # # N/T Mean SD Min Max Skew Kurt # Overall 9470 12048.778 19077.6416 132.0776 196061.417 3.1276 17.1154 # Between 206 12962.6054 20189.9007 253.1886 141200.38 3.1263 16.2299 # Within 45.9709 12048.778 6723.6808 -33504.8721 76767.5254 0.6576 17.2003 # # , , LIFEEX # # N/T Mean SD Min Max Skew Kurt # Overall 11670 64.2963 11.4764 18.907 85.4171 -0.6748 2.6718 # Between 207 64.9537 9.8936 40.9663 85.4171 -0.5012 2.1693 # Within 56.3768 64.2963 6.0842 32.9068 84.4198 -0.2643 3.7027 # # , , GINI # # N/T Mean SD Min Max Skew Kurt # Overall 1744 38.5341 9.2006 20.7 65.8 0.596 2.5329 # Between 167 39.4233 8.1356 24.8667 61.7143 0.5832 2.8256 # Within 10.4431 38.5341 2.9277 25.3917 55.3591 0.3263 5.3389 # # , , ODA # # N/T Mean SD Min Max Skew Kurt # Overall 8608 454'720131 868'712654 -997'679993 2.56715605e+10 6.9832 114.889 # Between 178 439'168412 569'049959 468717.916 3.62337432e+09 2.355 9.9487 # Within 48.3596 454'720131 650'709624 -2.44379420e+09 2.45610972e+10 9.6047 263.3716 ``` Key statistics to look at in this summary are the sample size and the standard-deviation decomposed into the between-individuals and the within-individuals standard-deviation: For GDP per Capita we have 8995 observations in the panel series for 203 countries, with on average 44.31 observations (time-periods T) per country. The between-country standard deviation is 19600 USD, around 3-times larger than the within-country (over-time) standard deviation of 6300 USD. Regarding the mean, the between-mean, computed as a cross-sectional average of country averages, usually differs slightly from the overall average taken across all data points. The within-transformed data is computed and summarized with the overall mean added back (i.e. as in `fwithin(PCGDP, mean = "overall.mean")`). We can also do groupwise panel-statistics and `qsu` also supports weights (not shown): ```r qsu(pwlddev, ~ income, cols = 9:12, higher = TRUE) # , , Overall, PCGDP # # N/T Mean SD Min Max Skew Kurt # High income 3179 30280.7283 23847.0483 932.0417 196061.417 2.1702 10.3425 # Low income 1311 597.4053 288.4392 164.3366 1864.7925 1.2385 4.7115 # Lower middle income 2246 1574.2535 858.7183 144.9863 4818.1922 0.9093 3.7153 # Upper middle income 2734 4945.3258 2979.5609 132.0776 20532.9523 1.2286 4.9391 # # , , Between, PCGDP # # N/T Mean SD Min Max Skew Kurt # High income 71 30280.7283 20908.5323 5413.4495 141200.38 2.1347 9.9673 # Low income 28 597.4053 243.8219 253.1886 1357.3326 1.4171 5.3137 # Lower middle income 47 1574.2535 676.3157 444.2899 2896.8682 0.3562 2.2358 # Upper middle income 60 4945.3258 2327.3834 1604.595 13344.5423 1.24 4.7803 # # , , Within, PCGDP # # N/T Mean SD Min Max Skew Kurt # High income 44.7746 12048.778 11467.9987 -33504.8721 76767.5254 0.3924 6.0523 # Low income 46.8214 12048.778 154.1039 11606.2382 12698.296 0.5098 4.0676 # Lower middle income 47.7872 12048.778 529.1449 10377.7234 14603.1055 0.7658 5.4272 # Upper middle income 45.5667 12048.778 1860.395 4846.3834 24883.1246 0.6858 7.8469 # # , , Overall, LIFEEX # # N/T Mean SD Min Max Skew Kurt # High income 3831 73.6246 5.6693 42.672 85.4171 -1.0067 5.5553 # Low income 1800 49.7301 9.0944 26.172 74.43 0.2748 2.6721 # Lower middle income 2790 58.1481 9.3115 18.907 76.699 -0.3406 2.6845 # Upper middle income 3249 66.6466 7.537 36.535 80.279 -1.0988 4.2262 # # , , Between, LIFEEX # # N/T Mean SD Min Max Skew Kurt # High income 73 73.6246 3.3499 64.0302 85.4171 -0.6537 2.9946 # Low income 30 49.7301 4.8321 40.9663 66.945 1.5195 6.6802 # Lower middle income 47 58.1481 5.9945 45.7687 71.6078 0.0352 2.2126 # Upper middle income 57 66.6466 4.9955 48.057 74.0504 -1.3647 5.303 # # , , Within, LIFEEX # # N/T Mean SD Min Max Skew Kurt # High income 52.4795 64.2963 4.5738 42.9381 78.1271 -0.4838 3.8923 # Low income 60 64.2963 7.7045 41.5678 84.4198 0.0402 2.6086 # Lower middle income 59.3617 64.2963 7.1253 32.9068 83.9918 -0.2522 3.181 # Upper middle income 57 64.2963 5.6437 41.4342 83.0122 -0.507 4.0355 # # , , Overall, GINI # # N/T Mean SD Min Max Skew Kurt # High income 680 33.3037 6.7885 20.7 58.9 1.4864 5.6772 # Low income 107 41.1327 6.5767 29.5 65.8 0.7523 4.236 # Lower middle income 369 40.0504 9.3032 24 63.2 0.4388 2.2218 # Upper middle income 588 43.1585 8.9549 25.2 64.8 0.0814 2.3517 # # , , Between, GINI # # N/T Mean SD Min Max Skew Kurt # High income 41 33.3037 6.5238 24.8667 53.6296 1.5091 5.3913 # Low income 28 41.1327 5.1706 32.1333 58.75 0.6042 4.0473 # Lower middle income 46 40.0504 8.4622 27.6955 54.925 0.334 1.797 # Upper middle income 52 43.1585 8.4359 27.9545 61.7143 0.0336 2.2441 # # , , Within, GINI # # N/T Mean SD Min Max Skew Kurt # High income 16.5854 38.5341 1.8771 31.1841 45.8841 -0.0818 4.902 # Low income 3.8214 38.5341 4.0643 29.4591 55.3591 0.6766 5.1025 # Lower middle income 8.0217 38.5341 3.8654 27.9452 55.1008 0.4093 4.0058 # Upper middle income 11.3077 38.5341 3.0043 25.3917 48.0131 0.0728 3.5115 # # , , Overall, ODA # # N/T Mean SD Min Max Skew # High income 1575 153'663194 425'918409 -464'709991 4.34612988e+09 5.2505 # Low income 1692 631'660165 941'498380 -500000 1.04032100e+10 4.4628 # Lower middle income 2544 692'072692 1.02452490e+09 -605'969971 1.18790801e+10 3.7913 # Upper middle income 2797 301'326218 765'116131 -997'679993 2.56715605e+10 16.3123 # Kurt # High income 36.2748 # Low income 32.1305 # Lower middle income 25.2442 # Upper middle income 464.8625 # # , , Between, ODA # # N/T Mean SD Min Max Skew Kurt # High income 42 153'663194 339'972909 468717.916 2.05456932e+09 3.9522 19.0792 # Low income 30 631'660165 466'265486 91'536334 1.67220583e+09 0.9769 2.6602 # Lower middle income 47 692'072692 765'003585 28'919000.2 3.62337432e+09 2.0429 7.2664 # Upper middle income 59 301'326218 382'148153 13'160000 1.91297800e+09 2.1072 7.0291 # # , , Within, ODA # # N/T Mean SD Min Max Skew # High income 37.5 454'720131 256'563661 -920'977647 2.87632242e+09 2.2074 # Low income 56.4 454'720131 817'933797 -1.19519570e+09 9.18572426e+09 3.8872 # Lower middle income 54.1277 454'720131 681'484247 -2.44379420e+09 1.12814455e+10 3.8965 # Upper middle income 47.4068 454'720131 662'846500 -2.04042108e+09 2.45610972e+10 19.6351 # Kurt # High income 28.8682 # Low income 33.5194 # Lower middle income 47.7246 # Upper middle income 657.3041 ``` Here it should be noted that any grouping is applied independently from the data-transformation, i.e. the data is first transformed, and then grouped statistics are calculated on the transformed data. The computation of statistics is very efficient: ```r qsu(LIFEEX_l) # N/T Mean SD Min Max # Overall 1'167000 64.2963 11.4759 18.907 85.4171 # Between 20700 64.9537 9.87 40.9663 85.4171 # Within 56.3768 64.2963 6.0839 32.9068 84.4198 microbenchmark(qsu(LIFEEX_l)) # Unit: milliseconds # expr min lq mean median uq max neval # qsu(LIFEEX_l) 9.49355 10.25679 11.07317 10.37214 10.78839 50.22574 100 ``` Using the transformation functions and the functions `pwcor` and `pwcov`, we can also easily explore the correlation structure of the data: ```r # Overall pairwise correlations with pairwise observation count and significance testing (* = significant at 5% level) pwcor(get_vars(pwlddev, 9:12), N = TRUE, P = TRUE) # PCGDP LIFEEX GINI ODA # PCGDP 1 (9470) .57* (9022) -.44* (1735) -.16* (7128) # LIFEEX .57* (9022) 1 (11670) -.35* (1742) -.02 (8142) # GINI -.44* (1735) -.35* (1742) 1 (1744) -.20* (1109) # ODA -.16* (7128) -.02 (8142) -.20* (1109) 1 (8608) # Between correlations pwcor(fmean(get_vars(pwlddev, 9:12), pwlddev$iso3c), N = TRUE, P = TRUE) # PCGDP LIFEEX GINI ODA # PCGDP 1 (206) .60* (199) -.42* (165) -.25* (172) # LIFEEX .60* (199) 1 (207) -.40* (165) -.21* (172) # GINI -.42* (165) -.40* (165) 1 (167) -.19* (145) # ODA -.25* (172) -.21* (172) -.19* (145) 1 (178) # Within correlations pwcor(W(pwlddev, cols = 9:12, keep.ids = FALSE), N = TRUE, P = TRUE) # W.PCGDP W.LIFEEX W.GINI W.ODA # W.PCGDP 1 (9470) .31* (9022) -.01 (1735) -.01 (7128) # W.LIFEEX .31* (9022) 1 (11670) -.16* (1742) .17* (8142) # W.GINI -.01 (1735) -.16* (1742) 1 (1744) -.08* (1109) # W.ODA -.01 (7128) .17* (8142) -.08* (1109) 1 (8608) ``` The correlations show that the between (cross-country) relationships of these macro-variables are quite strong, but within countries the relationships are much weaker, for example there seems to be no significant relationship between GDP per Capita and either inequality or ODA received within countries over time. ### 2.3 Exploring Panel Data in Matrix / Array Form We can take a single panel series such as GDP per Capita and explore it further: ```r # Generating a (transposed) matrix of country GDPs per capita tGDPmat <- psmat(PCGDP, transpose = TRUE) tGDPmat[1:10, 1:10] # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG # 1960 NA NA NA NA NA NA 5643 NA NA NA # 1961 NA NA NA NA NA NA 5853 NA NA NA # 1962 NA NA NA NA NA NA 5711 NA NA NA # 1963 NA NA NA NA NA NA 5323 NA NA NA # 1964 NA NA NA NA NA NA 5773 NA NA NA # 1965 NA NA NA NA NA NA 6286 NA NA NA # 1966 NA NA NA NA NA NA 6152 NA NA NA # 1967 NA NA NA NA NA NA 6255 NA NA NA # 1968 NA NA NA NA NA NA 6461 NA NA NA # 1969 NA NA NA NA NA NA 6981 NA NA NA # plot the matrix (it will plot correctly no matter how the matrix is transposed) plot(tGDPmat, main = "GDP per Capita") ```
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

```r # Taking series with more than 20 observation suffsamp <- tGDPmat[, fnobs(tGDPmat) > 20] # Minimum pairwise observations between any two series: min(pwnobs(suffsamp)) # [1] 16 # We can use the pairwise-correlations of the annual growth rates to hierarchically cluster the economies: plot(hclust(as.dist(1-pwcor(G(suffsamp))))) ```
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

```r # Finally we could do PCA on the growth rates: eig <- eigen(pwcor(G(suffsamp))) plot(seq_col(suffsamp), eig$values/sum(eig$values)*100, xlab = "Number of Principal Components", ylab = "% Variance Explained", main = "Screeplot") ```
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

There is also a nice plot-method applied to panel series arrays returned when `psmat` is applied to a panel data.frame: ```r plot(psmat(pwlddev, cols = 9:12), legend = TRUE) ```
plot of chunk pwlddev_plot

plot of chunk pwlddev_plot

Above we have explored the cross-sectional relationship between the different national GDP series. Now we explore the time-dependence of the panel-vectors as a whole: ### 2.4 Panel- Auto-, Partial-Auto and Cross-Correlation Functions The functions `psacf`, `pspacf` and `psccf` mimic `stats::acf`, `stats::pacf` and `stats::ccf` for panel-vectors and panel data.frames. Below we compute the panel series autocorrelation function of the data: ```r psacf(pwlddev, cols = 9:12) ```
plot of chunk plm_psacf

plot of chunk plm_psacf

The computation is conducted by first scaling and centering (i.e. standardizing) the panel-vectors by groups (using `fscale`, default argument `gscale = TRUE`), and then taking the covariance of each series with a matrix of properly computed panel-lags of itself (using `flag`), and dividing that by the variance of the overall series (using `fvar`). In a similar way we can compute the Partial-ACF (using a multivariate Yule-Walker decomposition on the ACF, as in `stats::pacf`), ```r pspacf(pwlddev, cols = 9:12) ```
plot of chunk plm_pspacf

plot of chunk plm_pspacf

and the panel-cross-correlation function between GDP per capita and life expectancy (which is already contained in the ACF plot above): ```r psccf(PCGDP, LIFEEX) ```
plot of chunk plm_psccf

plot of chunk plm_psccf

### 2.5 Testing for Individual Specific and Time-Effects As a final step of exploration, we could analyze our series and simple models for the significance and explanatory power of individual or time-fixed effects, without going all the way to running a Hausman Test of fixed vs. random effects on a fully specified model. The main function here is `fFtest` which efficiently computes a fast R-Squared based F-test of exclusion restrictions on models potentially involving many factors. By default (argument `full.df = TRUE`) the degrees of freedom of the test are adjusted to make it identical to the F-statistic from regressing the series on a set of country and time dummies^[In fact factors are projected out using `fixest::demean` and no regression is run at all]. ```r # Testing GDP per Capita fFtest(PCGDP, index(PCGDP)) # Testing individual and time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.905 264 9205 330.349 0.000 fFtest(PCGDP, index(PCGDP, 1)) # Testing individual effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.876 215 9254 303.476 0.000 fFtest(PCGDP, index(PCGDP, 2)) # Testing time effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.027 60 9409 4.276 0.000 # Same for Life-Expectancy fFtest(LIFEEX, index(LIFEEX)) # Testing individual and time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.924 265 11404 519.762 0.000 fFtest(LIFEEX, index(LIFEEX, 1)) # Testing individual effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.719 215 11454 136.276 0.000 fFtest(LIFEEX, index(LIFEEX, 2)) # Testing time effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.218 60 11609 54.075 0.000 ``` Below we test the correlation between the country and time-means of GDP and Life-Expectancy: ```r cor.test(B(PCGDP), B(LIFEEX)) # Testing correlation of country means # # Pearson's product-moment correlation # # data: B(PCGDP) and B(LIFEEX) # t = 78.752, df = 9020, p-value < 2.2e-16 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # 0.6259141 0.6503737 # sample estimates: # cor # 0.638305 cor.test(B(PCGDP, effect = 2), B(LIFEEX, effect = 2)) # Same for time-means # # Pearson's product-moment correlation # # data: B(PCGDP, effect = 2) and B(LIFEEX, effect = 2) # t = 325.6, df = 9020, p-value < 2.2e-16 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # 0.9583431 0.9615804 # sample estimates: # cor # 0.9599938 ``` We can also test for the significance of individual and time-fixed effects (or both) in the regression of GDP on life expectancy and ODA received: ```r fFtest(PCGDP, index(PCGDP), get_vars(pwlddev, c("LIFEEX","ODA"))) # Testing individual and time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-Value # Full Model 0.915 227 6682 316.551 0.000 # Restricted Model 0.162 2 6907 668.816 0.000 # Exclusion Rest. 0.753 225 6682 262.732 0.000 fFtest(PCGDP, index(PCGDP, 2), get_vars(pwlddev, c("iso3c","LIFEEX","ODA"))) # Testing time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-Value # Full Model 0.915 227 6682 316.551 0.000 # Restricted Model 0.909 168 6741 403.168 0.000 # Exclusion Rest. 0.005 59 6682 7.238 0.000 ``` As can be expected in this cross-country data, individual and time-fixed effects play a large role in explaining the data, and these effects are correlated across series, suggesting that a fixed-effects model with both types of fixed-effects would be appropriate. To round things off, below we compute the Hausman test of Fixed vs. Random effects, which confirms this conclusion: ```r phtest(PCGDP ~ LIFEEX, data = pwlddev) # # Hausman Test # # data: PCGDP ~ LIFEEX # chisq = 397.04, df = 1, p-value < 2.2e-16 # alternative hypothesis: one model is inconsistent ``` ## Part 3: Programming Panel Data Estimators A central goal of the *collapse* package is to facilitate advanced and fast programming with data. A primary field of application for the fast functions introduced above is to program efficient panel data estimators. In this section we walk through a short example of how this can be done. The application will be an implementation of the Hausman and Taylor (1981) estimator, considering a more general case than currently implemented in the *plm* package. In Hausman and Taylor (1981), in a more general scenario, we have a linear panel-model of the form $$y_{it} = \beta_1X_{1it} + \beta_2X_{2it} + \beta_3Z_{1i} + \beta_4Z_{2i} + \alpha_i + \gamma_t + \epsilon$$ where $\alpha_i$ denotes unobserved individual specific effects and $\gamma_t$ denotes unobserved global events. This model has up to 4 kinds of covariates: * Time-Varying covariates $X_{1it}$ that are uncorrelated with the individual specific effect $\alpha_i$, such that $E[X_{1it}\alpha_i] = 0$. It may be the case that $E[X_{1it}\gamma_t] \neq 0$ * Time-Varying covariates $X_{2it}$ with $E[X_{2it}\alpha_i] \neq 0$ and possibly $E[X_{2it}\gamma_t] \neq 0$ * Time-Invariant covariates $Z_{1i}$ with $E[Z_{1i}\alpha_i] = 0$ * Time-Invariant covariates $Z_{2i}$ with $E[Z_{2i}\alpha_i] \neq 0$ The main estimation problem arises from $E[Z_{2i}\alpha_i] \neq 0$, which would usually prevent us from estimating $\beta_4$ since taking a within-transformation (fixed effects) would remove $Z_{2i}$ from the equation. Hausman and Taylor (1981) stipulated that since $E[X_{1it}\alpha_i] = 0$, once could use $X_{1i.}$ i.e. the between-transformed $X_{1it}$ to instrument for $Z_{2i}$. They propose an IV/2SLS estimation of the whole equation where the within-transformed covariates $\tilde{X}_{1it}$ and $\tilde{X}_{2it}$ are used to instrument $X_{1it}$ and $X_{2it}$, and $X_{1i.}$ instruments $Z_{2i}$. Assuming that missing values have been removed beforehand, and also taking into account the possibility that $E[X_{1it}\gamma_t] \neq 0$ and $E[X_{2it}\gamma_t] \neq 0$ (i.e. accounting for time fixed-effects), this estimator can be coded as follows: ```r HT_est <- function(y, X1, Z2, X2 = NULL, Z1 = NULL, time.FE = FALSE) { # Create matrix of independent variables X <- cbind(Intercept = 1, do.call(cbind, c(X1, X2, Z1, Z2))) # Create instrument matrix: if time.FE, higher-order demean X1 and X2, else normal demeaning IVS <- cbind(Intercept = 1, do.call(cbind, c(if(time.FE) fhdwithin(X1, na.rm = FALSE) else fwithin(X1, na.rm = FALSE), if(is.null(X2)) X2 else if(time.FE) fhdwithin(X2, na.rm = FALSE) else fwithin(X2, na.rm = FALSE), Z1, fbetween(X1, na.rm = FALSE)))) if(length(IVS) == length(X)) { # The IV estimator case return(drop(solve(crossprod(IVS, X), crossprod(IVS, y)))) } else { # The 2SLS case Xhat <- qr.fitted(qr(IVS), X) # First stage return(drop(qr.coef(qr(Xhat), y))) # Second stage } } ``` The estimator is written in such a way that variables of the type $X_{2it}$ and $Z_{1i}$ are optional, and it also includes an option to also project out time-FE or not. The expected inputs for $X_{1it}$ (`X1`), and $X_{2it}$ (`X2`) are column-subsets of a *pdata.frame*. Having coded the estimator, it would be good to have an example to run it on. I have tried to squeeze an example out of the `wlddev` data used so far in this vignette. It is quite crappy and suffers from a weak-IV problem, but for there sake of illustration lets do it: We want to estimate the panel-regression of life-expectancy on GDP per Capita, ODA received, the GINI index and a time-invariant dummy indicating whether the country is an OECD member. All variables except the dummy enter in logs, so this is an elasticity regression. < ```r dat <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) get_vars(dat, 4:7) <- lapply(get_vars(dat, 4:7), log) # Taking logs of the data dat$OECD <- as.numeric(dat$OECD) # Creating OECD dummy dat <- pdata.frame(fdroplevels(na_omit(dat)), # Creating Panel data.frame, after removing missing values index = c("iso3c", "year")) # and dropping unused factor levels pdim(dat) # Unbalanced Panel: n = 134, T = 1-34, N = 1068 varying(dat) # year OECD PCGDP LIFEEX GINI ODA # TRUE FALSE TRUE TRUE TRUE TRUE ``` Using the GINI index cost a lot of observations and brought the sample size down to 918, but the GINI index will be a key variable in what follows. Clearly the OECD dummy is time-invariant. Below we run Hausman-tests of fixed vs. random effects to determine which covariates might be correlated with the unobserved individual effects, and which model would be most appropriate. ```r # This tests whether each of the covariates is correlated with alpha_i phtest(LIFEEX ~ PCGDP, dat) # Likely correlated # # Hausman Test # # data: LIFEEX ~ PCGDP # chisq = 17.495, df = 1, p-value = 2.881e-05 # alternative hypothesis: one model is inconsistent phtest(LIFEEX ~ ODA, dat) # Likely correlated # # Hausman Test # # data: LIFEEX ~ ODA # chisq = 43.925, df = 1, p-value = 3.413e-11 # alternative hypothesis: one model is inconsistent phtest(LIFEEX ~ GINI, dat) # Likely not correlated ! # # Hausman Test # # data: LIFEEX ~ GINI # chisq = 0.56851, df = 1, p-value = 0.4509 # alternative hypothesis: one model is inconsistent phtest(LIFEEX ~ PCGDP + ODA + GINI, dat) # Fixed Effects is the appropriate model for this regression # # Hausman Test # # data: LIFEEX ~ PCGDP + ODA + GINI # chisq = 24.198, df = 3, p-value = 2.272e-05 # alternative hypothesis: one model is inconsistent ``` The tests suggest that both GDP per Capita and ODA are correlated with country-specific unobservables affecting life-expectancy, and overall a fixed-effects model would be appropriate. However, the Hausman test on the GINI index fails to reject: Country specific unobservables affecting average life-expectancy are not necessarily correlated with the level of inequality across countries. Now if we want to include the OECD dummy in the regression, we cannot use fixed-effects as this would wipe-out the dummy as well. If the dummy is uncorrelated with the country-specific unobservables affecting life-expectancy (the $\alpha_i$), then we could use a solution suggested by Mundlak (1978) and simply add between-transformed versions of PCGDP and ODA in the regression (in addition to PCGDP and ODA in levels), and so 'control' for the part of PCGDP and ODA correlated with the $\alpha_i$ (in the IV literature this is known as the control-function approach). If however the OECD dummy is correlated with the $\alpha_i$, then we need to use the Hausman and Taylor (1981) estimator. Below I suggest 2 methods of testing this correlation: ```r # Testing the correlation between OECD dummy and the Between-transformed Life-Expectancy (i.e. not accounting for other covariates) cor.test(dat$OECD, B(dat$LIFEEX)) # -> Significant correlation of 0.21 # # Pearson's product-moment correlation # # data: dat$OECD and B(dat$LIFEEX) # t = 6.797, df = 1066, p-value = 1.774e-11 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # 0.1456048 0.2606109 # sample estimates: # cor # 0.2038109 # Getting the fixed-effects (estimates of alpha_i) from the model (i.e. accounting for the other covariates) fe <- fixef(plm(LIFEEX ~ PCGDP + ODA + GINI, dat, model = "within")) mODA <- fmean(dat$ODA, dat$iso3c) # Again testing the correlation cor.test(fe, mODA[match(names(fe), names(mODA))]) # -> Not Significant.. but probably due to small sample size, the correlation is still 0.13 # # Pearson's product-moment correlation # # data: fe and mODA[match(names(fe), names(mODA))] # t = 1.1218, df = 132, p-value = 0.264 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # -0.07362567 0.26243949 # sample estimates: # cor # 0.09717608 ``` I interpret the test results as rejecting the hypothesis that the dummy is uncorrelated with $\alpha_i$, thus we do have a case for Hausman and Taylor (1981) here: the OECD dummy is a $Z_{2i}$ with $E[Z_{2i}\alpha_i]\neq 0$. The Hausman tests above suggested that the GINI index is the only variable uncorrelated with $\alpha_i$, thus GINI is $X_{1it}$ with $E[X_{1it}\alpha_i] = 0$. Finally PCGDP and ODA jointly constitute $X_{2it}$, where the Hausman tests strongly suggested that $E[X_{2it}\alpha_i] \neq 0$. We do not have a $Z_{1i}$ in this setup, i.e. a time-invariant variable uncorrelated with the $\alpha_i$. The Hausman and Taylor (1981) estimator stipulates that we should instrument the OECD dummy with $X_{1i.}$, the between-transformed GINI index. Let us therefore test the regression of the dummy on this instrument to see of it would be a good (i.e. relevant) instrument: ```r # This computes the regression of OECD on the GINI instrument: Weak IV problem !! fFtest(dat$OECD, B(dat$GINI)) # R-Sq. DF1 DF2 F-Stat. P-value # 0.000 1 1066 0.153 0.695 ``` The 0 R-Squared and the F-Statistic of 0.21 suggest that the instrument is very weak indeed, rubbish to be precise, thus the implementation of the HT estimator below is also a rubbish example, but it is still good for illustration purposes: ```r HT_est(y = dat$LIFEEX, X1 = get_vars(dat, "GINI"), Z2 = get_vars(dat, "OECD"), X2 = get_vars(dat, c("PCGDP","ODA"))) # Intercept GINI PCGDP ODA OECD # 3.638486969 -0.035596160 0.120981946 0.005744747 -5.862368476 ``` Now a central questions is of course: How computationally efficient is this estimator? Let us try to re-run it on the data generated for the benchmark in Part 1: ```r dat <- get_vars(data, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) get_vars(dat, 4:7) <- lapply(get_vars(dat, 4:7), log) # Taking logs of the data dat$OECD <- as.numeric(dat$OECD) # Creating OECD dummy dat <- pdata.frame(fdroplevels(na_omit(dat)), # Creating Panel data.frame, after removing missing values index = c("iso3c", "year")) # and dropping unused factor levels pdim(dat) # Unbalanced Panel: n = 13400, T = 1-34, N = 106800 varying(dat) # year OECD PCGDP LIFEEX GINI ODA # TRUE FALSE TRUE TRUE TRUE TRUE library(microbenchmark) microbenchmark(HT_est = HT_est(y = dat$LIFEEX, # The estimator as before X1 = get_vars(dat, "GINI"), Z2 = get_vars(dat, "OECD"), X2 = get_vars(dat, c("PCGDP","ODA"))), HT_est_TFE = HT_est(y = dat$LIFEEX, # Also Projecting out Time-FE X1 = get_vars(dat, "GINI"), Z2 = get_vars(dat, "OECD"), X2 = get_vars(dat, c("PCGDP","ODA")), time.FE = TRUE)) # Unit: milliseconds # expr min lq mean median uq max neval # HT_est 7.919437 8.46937 9.761301 8.869612 9.508597 45.08717 100 # HT_est_TFE 22.501128 23.18640 25.387041 23.469835 24.490612 85.96462 100 ``` At around 100,000 obs and 13,000 groups in an unbalanced panel, the computation involving 3 grouped centering and 1 grouped averaging task as well as 2 list-to matrix conversions and an IV-procedure took about 10 milliseconds with only individual effects, and about 40 - 45 milliseconds with individual and time-fixed effects (projected out iteratively). This should leave some room for running this on much larger data. ## References Hausman J, Taylor W (1981). “Panel Data and Unobservable Individual Effects.†*Econometrica*, 49, 1377–1398. Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†*Econometrica* 46 (1): 69–85. Cochrane, D. & Orcutt, G. H. (1949). "Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Terms". *Journal of the American Statistical Association.* 44 (245): 32–61. Prais, S. J. & Winsten, C. B. (1954). "Trend Estimators and Serial Correlation". *Cowles Commission Discussion Paper No. 383.* Chicago. collapse/vignettes/developing_with_collapse.Rmd0000644000176200001440000010020015121640707021575 0ustar liggesusers--- title: "Developing with collapse" subtitle: "Or: How to Code Efficiently in R" author: "Sebastian Krantz" date: "2024-12-30" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{developing with collapse} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Introduction *collapse* offers an integrated suite of C/C++-based statistical and data manipulation functions, many low-level tools for memory efficient programming, and a [class-agnostic architecture](https://fastverse.org/collapse/articles/collapse_object_handling.html) that seamlessly supports vectors, matrices, and data frame-like objects. These features make it an ideal backend for high-performance statistical packages. This vignette is meant to provide some recommendations for developing with *collapse*. It is complementary to the earlier [blog post on programming with *collapse*](https://sebkrantz.github.io/Rblog/2020/09/13/programming-with-collapse/) which readers are also encouraged to consult. The vignette adds 3 important points for writing efficient R/*collapse* code. ## Point 1: Be Minimalistic in Computations *collapse* supports different types of R objects (vectors, matrices, data frames + variants) and it can perform grouped operations on them using different types of grouping information (plain vectors, 'qG'^[Alias for quick-group.] objects, factors, 'GRP' objects, grouped or indexed data frames). Grouping can be sorted or unsorted. A key for very efficient code is to use the minimal required operations/objects to get the job done. Suppose you want to sum an object `x` by groups using a grouping vector `g`. If the grouping is only needed once, this should be done using the internal grouping of `fsum()` without creating external grouping objects - `fsum(x, g)` for aggregation and `fsum(x, g, TRA = "fill")` for expansion: ```r fmean(mtcars$mpg, mtcars$cyl) # 4 6 8 # 26.66364 19.74286 15.10000 fmean(mtcars$mpg, mtcars$cyl, TRA = "fill") # [1] 19.74286 19.74286 26.66364 19.74286 15.10000 19.74286 15.10000 26.66364 26.66364 19.74286 # [11] 19.74286 15.10000 15.10000 15.10000 15.10000 15.10000 15.10000 26.66364 26.66364 26.66364 # [21] 26.66364 15.10000 15.10000 15.10000 15.10000 26.66364 26.66364 26.66364 15.10000 19.74286 # [31] 15.10000 26.66364 ``` The expansion case is very efficient because it internally uses unsorted grouping. Apart from the default sorted aggregation, these functions efficiently convert your input `g` into the minimally required information. In the aggregation case, we can improve performance by also using unsorted grouping, e.g., `fsum(x, qF(g, sort = FALSE))` or `fsum(x, qG(g, sort = FALSE), use.g.names = FALSE)` if the group-names are not needed. It is advisable to also set argument `na.exclude = FALSE` in `qF()`/`qG()` to add a class 'na.included' which precludes internal missing value checks in `fsum()` and friends. If `g` is a plain vector or the first-appearance order of groups should be kept even if `g` is a factor, use `group(g)` instead of `qG(g, sort = FALSE, na.exclude = FALSE)`.^[`group()` directly calls a C-based hashing algorithm which works for all types of vectors and lists of vectors/data frames. Missing values are treated as distinct elements.] Set `use.g.names = FALSE` if not needed (can abbreviate as `use = FALSE`), and, if your data has no missing values, set `na.rm = FALSE` for maximum performance. ```r x <- rnorm(1e7) # 10 million random obs g <- sample.int(1e6, 1e7, TRUE) # 1 Million random groups oldopts <- set_collapse(na.rm = FALSE) # No missing values: maximum performance microbenchmark::microbenchmark( internal = fsum(x, g), internal_expand = fsum(x, g, TRA = "fill"), qF1 = fsum(x, qF(g, sort = FALSE)), qF2 = fsum(x, qF(g, sort = FALSE, na.exclude = FALSE)), qG1 = fsum(x, qG(g, sort = FALSE), use = FALSE), qG2 = fsum(x, qG(g, sort = FALSE, na.exclude = FALSE), use = FALSE), group = fsum(x, group(g), use = FALSE), # Same as above basically GRP1 = fsum(x, GRP(g)), GRP2 = fsum(x, GRP(g, sort = FALSE)), GRP3 = fsum(x, GRP(g, sort = FALSE, return.groups = FALSE), use = FALSE) ) # Unit: milliseconds # expr min lq mean median uq max neval # internal 119.62078 124.61575 133.51499 129.24721 136.84295 187.9376 100 # internal_expand 87.45751 93.53473 101.63398 97.34573 105.04102 195.5121 100 # qF1 98.40816 101.62102 110.80120 105.03839 112.72224 265.5931 100 # qF2 86.75518 89.82823 100.47122 93.89814 103.04776 194.9115 100 # qG1 88.38563 92.44846 103.28242 97.29579 105.35159 202.8058 100 # qG2 72.94851 76.86912 87.05558 79.43137 86.15307 262.4734 100 # group 74.08335 77.19435 87.62058 82.58726 90.61506 162.0318 100 # GRP1 145.13799 149.54178 163.89938 154.71379 164.11361 297.5056 100 # GRP2 95.83557 99.05297 109.58577 103.34950 112.50322 266.9996 100 # GRP3 82.56629 86.15699 97.54058 90.40781 98.05956 328.7744 100 ``` Factors and 'qG' objects are efficient inputs to all statistical/transformation functions except for `fmedian()`, `fnth()`, `fmode()`, `fndistinct()`, and split-apply-combine operations using `BY()`/`gsplit()`. For repeated grouped operations involving those, it makes sense to create 'GRP' objects using `GRP()`. These objects are more expensive to create but provide more complete information.^[See `?GRP`, in particular the 'Value' section.] If sorting is not needed, set `sort = FALSE`, and if aggregation or the unique groups/names are not needed set `return.groups = FALSE`. ```r f <- qF(g); f2 <- qF(g, na.exclude = FALSE) gg <- group(g) # Same as qG(g, sort = FALSE, na.exclude = FALSE) grp <- GRP(g) # Simple functions: factors are efficient inputs microbenchmark::microbenchmark( factor = fsum(x, f), factor_nona = fsum(x, f2), qG_nona = fsum(x, gg), qG_nona_nonam = fsum(x, gg, use = FALSE), GRP = fsum(x, grp), GRP_nonam = fsum(x, grp, use = FALSE) ) # Unit: milliseconds # expr min lq mean median uq max neval # factor 16.02514 16.49498 17.50705 17.11619 18.16497 21.72975 100 # factor_nona 12.72911 13.15124 14.41943 13.87850 15.03540 23.27144 100 # qG_nona 14.30178 14.95450 20.48179 15.67930 17.34989 57.15597 100 # qG_nona_nonam 11.57118 12.00423 13.12157 12.49071 13.61801 23.31219 100 # GRP 12.83345 13.08907 14.45512 13.95154 15.21594 21.46473 100 # GRP_nonam 12.67589 13.22139 14.15271 13.76600 14.84057 20.36359 100 # Complex functions: more information helps microbenchmark::microbenchmark( qG = fmedian(x, gg, use = FALSE), GRP = fmedian(x, grp, use = FALSE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # qG 258.4450 261.9357 267.2520 264.2608 267.4161 297.1552 10 # GRP 191.8623 193.0631 196.0935 193.4358 194.6245 210.3685 10 set_collapse(oldopts) ``` Why not always use `group()` for unsorted grouping with simple functions? You can do that, but `qF()`/`qG()` are a bit smarter when it comes to handling input factors/'qG' objects whereas `group()` hashes every vector: ```r microbenchmark::microbenchmark( factor_factor = qF(f), # This checks NA's and adds 'na.included' class -> full deep copy factor_factor2 = qF(f, na.exclude = FALSE), # NA checking costs.. incurred in fsum() and friends check_na = collapse:::is.nmfactor(f), check_na2 = collapse:::is.nmfactor(f2), factor_qG = qF(gg), qG_factor = qG(f), qG_qG = qG(gg), group_factor = group(f), group_qG = group(gg) ) # Unit: nanoseconds # expr min lq mean median uq max neval # factor_factor 1107 2562.5 6925.31 7298.0 9676.0 19270 100 # factor_factor2 5926960 6147663.0 6898849.83 6235136.5 6421686.5 15325349 100 # check_na 3440474 3503880.5 3525056.59 3513597.5 3524770.0 3927185 100 # check_na2 287 1496.5 3325.10 3341.5 4243.5 9922 100 # factor_qG 2583 11644.0 15105.63 15887.5 18614.0 31898 100 # qG_factor 1927 4284.5 10171.28 9614.5 13796.5 50799 100 # qG_qG 1476 2583.0 6674.39 6498.5 8897.0 23124 100 # group_factor 16066629 16300165.0 17378151.76 16489011.0 16858872.0 54181582 100 # group_qG 13824175 14194917.5 15083957.81 14347396.5 14700345.0 22289117 100 ``` Only in rare cases are grouped/indexed data frames created with `fgroup_by()`/`findex_by()` needed in package code. Likewise, functions like `fsummarise()`/`fmutate()` are essentially wrappers. For example ```r mtcars |> fgroup_by(cyl, vs, am) |> fsummarise(mpg = fsum(mpg), across(c(carb, hp, qsec), fmean)) # cyl vs am mpg carb hp qsec # 1 4 0 1 26.0 2.000000 91.00000 16.70000 # 2 4 1 0 68.7 1.666667 84.66667 20.97000 # 3 4 1 1 198.6 1.428571 80.57143 18.70000 # 4 6 0 1 61.7 4.666667 131.66667 16.32667 # 5 6 1 0 76.5 2.500000 115.25000 19.21500 # 6 8 0 0 180.6 3.083333 194.16667 17.14250 # 7 8 0 1 30.8 6.000000 299.50000 14.55000 ``` is the same as (again `use = FALSE` abbreviates `use.g.names = FALSE`) ```r g <- GRP(mtcars, c("cyl", "vs", "am")) add_vars(g$groups, get_vars(mtcars, "mpg") |> fsum(g, use = FALSE), get_vars(mtcars, c("carb", "hp", "qsec")) |> fmean(g, use = FALSE) ) # cyl vs am mpg carb hp qsec # 1 4 0 1 26.0 2.000000 91.00000 16.70000 # 2 4 1 0 68.7 1.666667 84.66667 20.97000 # 3 4 1 1 198.6 1.428571 80.57143 18.70000 # 4 6 0 1 61.7 4.666667 131.66667 16.32667 # 5 6 1 0 76.5 2.500000 115.25000 19.21500 # 6 8 0 0 180.6 3.083333 194.16667 17.14250 # 7 8 0 1 30.8 6.000000 299.50000 14.55000 ``` To be clear: nothing prevents you from using these wrappers - they are quite efficient - but if you want to change all inputs programmatically it makes sense to go down one level - your code will also become safer.^[If you do use `fgroup_by()` in a package use it with non-standard evaluation, e.g., `fgroup_by(cyl, vs, am)`. Don't do `ind <- c("cyl", "vs", "am")` and then `fgroup_by(ind)` as the data may contain a column called `ind`. For such cases use `group_by_vars(ind)`.] In general, think carefully about how to vectorize in a minimalistic and memory efficient way. You will find that you can craft very parsimonious and efficient code to solve complicated problems. For example, after merging multiple spatial datasets, I had some of the same map features (businesses) from multiple sources, and, unwilling to match features individually across data sources, I decided to keep the richest source covering each feature type and location. After creating a feature `importance` indicator comparable across sources, the deduplication expression ended up being a single line of the form: `fsubset(data, source == fmode(source, list(location, type), importance, "fill"))` - keep features from the importance-weighted most frequent source by location and type. If an effective *collapse* solution is not apparent, other packages may offer efficient solutions. Check out the [*fastverse*](https://fastverse.org/fastverse/) and its [suggested packages list](https://fastverse.org/fastverse/#suggested-extensions). For example if you want to efficiently replace multiple items in a vector, `kit::vswitch()/nswitch()` can be pretty magical. Also functions like `data.table::set()/rowid()` etc. are great. ## Point 2: Think About Memory and Optimize R programs are inefficient for 2 principal reasons: (1) operations are not vectorized; (2) too many intermediate objects/copies are created. *collapse*'s vectorized statistical functions help with (1), but it also provides many [efficient programming functions](https://fastverse.org/collapse/reference/efficient-programming.html) to deal with (2). One source of inefficiency in R code is the widespread use of logical vectors. For example ```r x <- abs(round(rnorm(1e6))) x[x == 0] <- NA ``` where `x == 0` creates a logical vector of 1 million elements just to indicate to R which elements of `x` are `0`. In *collapse*, `setv(x, 0, NA)` is the efficient equivalent. This also works if we don't want to replace with `NA` but with another vector `y`: ```r y <- rnorm(1e6) setv(x, NA, y) # Replaces missing x with y ``` is much better than ```r x[is.na(x)] <- y[is.na(x)] ``` `setv()` is quite versatile and also works with indices and logical vectors instead of elements to search for. You can also invert the query by setting `invert = TRUE`. In more complex workflows, we may wish to save the logical vector, e.g., `xmiss <- is.na(x)`, and use it repeatedly. One aspect to note here is that logical vectors are inefficient for subsetting compared to indices: ```r xNA <- na_insert(x, prop = 0.4) xmiss <- is.na(xNA) ind <- which(xmiss) bench::mark(x[xmiss], x[ind]) # # A tibble: 2 × 6 # expression min median `itr/sec` mem_alloc `gc/sec` # # 1 x[xmiss] 3.34ms 3.58ms 269. 8.39MB 4.21 # 2 x[ind] 771.74µs 972.11µs 1025. 3.05MB 6.61 ``` Thus, indices are always preferable. With *collapse*, they can be created directly using `whichNA(xNA)` in this case, or `whichv(x, 0)` for `which(x == 0)` or any other number. Also here there exist an `invert = TRUE` argument covering the `!=` case. For convenience, infix operators `x %==% 0` and `x %!=% 0` wrap `whichv(x, 0)` and `whichv(x, 0, invert = TRUE)`, respectively. Similarly, `fmatch()` supports faster matching with associated operators `%iin%` and `%!iin%` which also return indices, e.g., `letters %iin% c("a", "b")` returns `1:2`. This can also be used in subsetting: ```r bench::mark( `%in%` = fsubset(wlddev, iso3c %in% c("USA", "DEU", "ITA", "GBR")), `%iin%` = fsubset(wlddev, iso3c %iin% c("USA", "DEU", "ITA", "GBR")) ) # # A tibble: 2 × 6 # expression min median `itr/sec` mem_alloc `gc/sec` # # 1 %in% 146.8µs 165.7µs 6008. 3.8MB 2.12 # 2 %iin% 17.3µs 23.6µs 39878. 130.4KB 23.9 ``` Likewise, `anyNA(), allNA(), anyv()` and `allv()` help avoid expressions like `any(x == 0)` in favor of `anyv(x, 0)`. Other convenience functions exist such as `na_rm(x)` for the common `x[!is.na(x)]` expression which is extremely inefficient. Another hint here particularly for data frame subsetting is the `ss()` function, which has an argument `check = FALSE` to avoid checks on indices (small effect with this data size): ```r ind <- wlddev$iso3c %!iin% c("USA", "DEU", "ITA", "GBR") microbenchmark::microbenchmark( withcheck = ss(wlddev, ind), nocheck = ss(wlddev, ind, check = FALSE) ) # Unit: microseconds # expr min lq mean median uq max neval # withcheck 48.749 106.6615 124.4366 122.1595 143.8895 256.619 100 # nocheck 47.355 105.5750 126.9225 119.6380 150.8595 344.113 100 ``` Another common source of inefficiencies is copies produced in statistical operations. For example ```r x <- rnorm(100); y <- rnorm(100); z <- rnorm(100) res <- x + y + z # Creates 2 copies ``` For this particular case `res <- kit::psum(x, y, z)` offers an efficient solution^[In general, also see other packages, in particular *kit* and *data.table* for useful programming functions.]. A more general solution is ```r res <- x + y res %+=% z ``` *collapse*'s `%+=%`, `%-=%`, `%*=%` and `%/=%` operators are wrappers around the `setop()` function which also works with matrices and data frames.^[*Note* that infix operators do not obey the rules of arithmetic but are always evaluated from left to right.] This function also has a `rowwise` argument for operations between vectors and matrix/data.frame rows: ```r m <- qM(mtcars) setop(m, "*", seq_col(m), rowwise = TRUE) head(m / qM(mtcars)) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 1 2 3 4 5 6 7 NaN 9 10 11 # Mazda RX4 Wag 1 2 3 4 5 6 7 NaN 9 10 11 # Datsun 710 1 2 3 4 5 6 7 8 9 10 11 # Hornet 4 Drive 1 2 3 4 5 6 7 8 NaN 10 11 # Hornet Sportabout 1 2 3 4 5 6 7 NaN NaN 10 11 # Valiant 1 2 3 4 5 6 7 8 NaN 10 11 ``` Some functions like `na_locf()`/`na_focb()` also have `set = TRUE` arguments to perform operations by reference.^[Note that `na_locf()`/`na_focb()` are not vectorized across groups, thus, if using them in a grouped `fmutate()` call, adding `set = TRUE` will save some memory on intermediate objects.] There is also `setTRA()` for (grouped) transformations by reference, wrapping `TRA(..., set = TRUE)`. Since `TRA` is added as an argument to all [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), `set = TRUE` can be passed down to modify by reference. For example: ```r fmedian(iris$Sepal.Length, iris$Species, TRA = "fill", set = TRUE) ``` Is the same as `setTRA(iris$Sepal.Length, fmedian(iris$Sepal.Length, iris$Species), "fill", iris$Species)`, replacing the values of the `Sepal.Length` vector with its species median by reference: ```r head(iris) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5 3.5 1.4 0.2 setosa # 2 5 3.0 1.4 0.2 setosa # 3 5 3.2 1.3 0.2 setosa # 4 5 3.1 1.5 0.2 setosa # 5 5 3.6 1.4 0.2 setosa # 6 5 3.9 1.7 0.4 setosa ``` This `set` argument can be invoked anywhere, also inside `fmutate()` calls with/without groups. This can also be done in combination with other transformations (sweeping operations). For example, the following turns the columns of the matrix into proportions. ```r fsum(m, TRA = "/", set = TRUE) fsum(m) # Check # mpg cyl disp hp drat wt qsec vs am gear carb # 1 1 1 1 1 1 1 1 1 1 1 ``` In summary, think what is really needed to complete a task and keep things to a minimum in terms of both computations and memory. Let's do a final exercise in this regard and create a hyper-efficient function for univariate linear regression by groups: ```r greg <- function(y, x, g) { g <- group(g) dmx <- fmean(x, g, TRA = "-", na.rm = FALSE) (fsum(y, g, dmx, use = FALSE, na.rm = FALSE) %/=% fsum(dmx, g, dmx, use = FALSE, na.rm = FALSE)) } # Test y <- rnorm(1e7) x <- rnorm(1e7) g <- sample.int(1e6, 1e7, TRUE) microbenchmark::microbenchmark(greg(y, x, g), group(g)) # Unit: milliseconds # expr min lq mean median uq max neval # greg(y, x, g) 131.39639 138.68961 153.1586 145.78243 161.48137 305.5862 100 # group(g) 62.41733 64.80468 72.2558 68.87266 73.21657 153.1643 100 ``` The expression computed by `greg()` amounts to `sum(y * (x - mean(x)))/sum((x - mean(x))^2)` for each group, which is equivalent to `cov(x, y)/var(x)`, but very efficient, requiring exactly one full copy of `x` to create a group-demeaned vector, `dmx`, and then using the `w` (weights) argument to `fsum()` to sum the products (`y * dmx` and `dmx * dmx`) on the fly, including a division by reference avoiding an additional copy. One cannot do much better coding a grouped regression directly in C. ## Point 3: Internally Favor Primitive R Objects and Functions This partly reiterates Point 1 but now with a focus on internal data representation rather than grouping and computing. The point could also be bluntly stated as: 'vectors, matrices and lists are good, data frames and complex objects are bad'. Many frameworks seem to imply the opposite - the *tidyverse* encourages you to cast your data as a tidy tibble, and *data.table* offers you a more efficient data frame. But these objects are internally complex, and, in the case of *data.table*, only efficient because of the internal C-level algorithms for large-data manipulation. You should always take a step back to ask yourself: for the statistical software I am writing, do I need this complexity? Complex objects require complex methods to manipulate them, thus, when using them, you incur the cost of everything that goes on in these methods. Vectors, matrices, and lists are much more efficient in R and *collapse* provides you with many options to manipulate them directly. It may surprise you to hear that, internally, *collapse* does not use data frame-like objects at all. Instead, such objects are cast to lists using `unclass(data)`, `class(data) <- NULL`, or `attributes(data) <- NULL`. This is advisable if you want to write fast package code for data frame-like objects. The benchmark below illustrates that basically everything you do on a *data.frame* is more expensive than on the equivalent list. ```r l <- unclass(mtcars) nam <- names(mtcars) microbenchmark::microbenchmark(names(mtcars), attr(mtcars, "names"), names(l), names(mtcars) <- nam, attr(mtcars, "names") <- nam, names(l) <- nam, mtcars[["mpg"]], .subset2(mtcars, "mpg"), l[["mpg"]], mtcars[3:8], .subset(mtcars, 3:8), l[3:8], ncol(mtcars), length(mtcars), length(unclass(mtcars)), length(l), nrow(mtcars), length(.subset2(mtcars, 1L)), length(l[[1L]])) # Unit: nanoseconds # expr min lq mean median uq max neval # names(mtcars) 164 205 240.26 246 246.0 410 100 # attr(mtcars, "names") 41 82 109.88 82 123.0 1476 100 # names(l) 0 0 24.60 41 41.0 82 100 # names(mtcars) <- nam 451 492 651.90 656 697.0 3321 100 # attr(mtcars, "names") <- nam 287 369 480.52 451 492.0 4346 100 # names(l) <- nam 164 246 276.34 246 287.0 533 100 # mtcars[["mpg"]] 2009 2091 2363.65 2173 2296.0 15539 100 # .subset2(mtcars, "mpg") 41 41 68.88 82 82.0 164 100 # l[["mpg"]] 41 82 78.31 82 82.0 205 100 # mtcars[3:8] 5166 5371 5607.98 5453 5576.0 15908 100 # .subset(mtcars, 3:8) 246 246 321.03 287 328.0 2788 100 # l[3:8] 246 287 305.45 287 328.0 492 100 # ncol(mtcars) 1025 1107 1200.07 1189 1230.0 2255 100 # length(mtcars) 164 205 249.28 246 266.5 492 100 # length(unclass(mtcars)) 123 164 176.71 164 164.0 861 100 # length(l) 0 0 18.86 0 41.0 287 100 # nrow(mtcars) 1025 1107 1239.84 1148 1230.0 6642 100 # length(.subset2(mtcars, 1L)) 41 82 113.57 82 123.0 1845 100 # length(l[[1L]]) 41 82 100.45 82 123.0 492 100 ``` By means of further illustration, let's recreate the `pwnobs()` function in *collapse* which counts pairwise missing values. The list method is written in R. A basic implementation is:^[By Point 2 this implementation is not ideal because I am creating two logical vectors for each iteration of the inner loop, but I currently don't see any way to write this more efficiently.] ```r pwnobs_list <- function(X) { dg <- fnobs(X) n <- ncol(X) nr <- nrow(X) N.mat <- diag(dg) for (i in 1:(n - 1L)) { miss <- is.na(X[[i]]) for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - sum(miss | is.na(X[[j]])) } rownames(N.mat) <- names(dg) colnames(N.mat) <- names(dg) N.mat } mtcNA <- na_insert(mtcars, prop = 0.2) pwnobs_list(mtcNA) # mpg cyl disp hp drat wt qsec vs am gear carb # mpg 26 20 20 20 20 20 21 22 21 21 22 # cyl 20 26 21 20 22 21 22 22 22 23 20 # disp 20 21 26 22 22 23 22 22 21 21 22 # hp 20 20 22 26 21 23 22 20 20 21 21 # drat 20 22 22 21 26 23 21 21 20 21 21 # wt 20 21 23 23 23 26 22 21 21 20 20 # qsec 21 22 22 22 21 22 26 22 20 22 20 # vs 22 22 22 20 21 21 22 26 20 23 21 # am 21 22 21 20 20 21 20 20 26 20 21 # gear 21 23 21 21 21 20 22 23 20 26 20 # carb 22 20 22 21 21 20 20 21 21 20 26 ``` Now with the above tips we can optimize this as follows: ```r pwnobs_list_opt <- function(X) { dg <- fnobs.data.frame(X) class(X) <- NULL n <- length(X) nr <- length(X[[1L]]) N.mat <- diag(dg) for (i in 1:(n - 1L)) { miss <- is.na(X[[i]]) for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - sum(miss | is.na(X[[j]])) } dimnames(N.mat) <- list(names(dg), names(dg)) N.mat } identical(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA)) # [1] TRUE microbenchmark::microbenchmark(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA)) # Unit: microseconds # expr min lq mean median uq max neval # pwnobs_list(mtcNA) 153.217 160.1255 185.09696 179.744 215.004 241.654 100 # pwnobs_list_opt(mtcNA) 27.429 31.1600 33.38507 32.964 35.137 45.387 100 ``` Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what's going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code. If the latter is your intent the [vignette on *collapse*'s object handling](https://fastverse.org/collapse/articles/collapse_object_handling.html) will also be helpful. If you only use *collapse* functions this discussion is void - all *collapse* functions designed for data frames, including `join()`, `pivot()`, `fsubset()`, etc., internally handle your data as a list and are equally efficient on data frames and lists. However, if you want to use base R semantics (`[`, etc.) alongside *collapse* and other functions, it makes sense to unclass incoming data frame-like objects and reclass them at the end. If you don't want to internally convert data frames to lists, at least use functions `.subset()`, `.subset2()`, or `collapse::get_vars()` to efficiently extract columns and `attr()` to extract/set attributes. With matrices, use `dimnames()` directly instead of `rownames()` and `colnames()` which wrap it. Also avoid `as.data.frame()` and friends to coerce/recreate data frame-like objects. It is quite easy to construct a *data.frame* from a list: ```r attr(l, "row.names") <- .set_row_names(length(l[[1L]])) class(l) <- "data.frame" head(l, 2) # mpg cyl disp hp drat wt qsec vs am gear carb # 1 21 6 160 110 3.9 2.620 16.46 0 1 4 4 # 2 21 6 160 110 3.9 2.875 17.02 0 1 4 4 ``` You can also use *collapse* functions `qDF()`, `qDT()` and `qTBL()` to efficiently convert/create *data.frame*'s, *data.table*'s, and *tibble*'s: ```r library(data.table) library(tibble) microbenchmark::microbenchmark(qDT(mtcars), as.data.table(mtcars), qTBL(mtcars), as_tibble(mtcars)) # Unit: microseconds # expr min lq mean median uq max neval # qDT(mtcars) 2.952 3.280 6.35705 3.5670 3.8130 269.534 100 # as.data.table(mtcars) 34.194 36.572 44.93641 37.4535 39.2985 697.410 100 # qTBL(mtcars) 2.419 2.583 3.19267 2.8700 2.9930 38.704 100 # as_tibble(mtcars) 48.257 49.569 71.56304 50.4095 52.5005 2050.533 100 l <- unclass(mtcars) microbenchmark::microbenchmark(qDF(l), as.data.frame(l), as.data.table(l), as_tibble(l)) # Unit: microseconds # expr min lq mean median uq max neval # qDF(l) 1.722 2.2140 4.51779 2.4600 2.747 199.424 100 # as.data.frame(l) 210.412 225.1515 242.65973 248.3370 254.569 301.186 100 # as.data.table(l) 70.889 77.2030 90.30086 83.0045 88.683 798.393 100 # as_tibble(l) 55.350 61.8690 68.20924 67.0760 72.898 139.769 100 ``` *collapse* also provides functions like `setattrib()`, `copyMostAttrib()`, etc., to efficiently attach attributes again. So another efficient workflow for general data frame-like objects is to save the attributes `ax <- attributes(data)`, manipulate it as a list `attributes(data) <- NULL`, modify `ax$names` and `ax$row.names` as needed and then use `setattrib(data, ax)` before returning. ## Some Notes on Global Options *collapse* has its own set of global options which can be set using `set_collapse()` and retrieved using `get_collapse()`.^[This is done mainly for efficiency reasons, but also do implement advanced options such as namespace masking (options `mask` and `remove`). The options are stored in an internal environment called `.op` visible in the documentation of some functions such as `fmean()` when used to set argument defaults.] This confers responsibilities upon package developers as setting these options inside a package also affects how *collapse* behaves outside of your package. In general, the same rules apply as for setting other R options through `options()` or `par()`: they need to be reset using `on.exit()` so that the user choices are unaffected even if your package function breaks. For example, if you want a block of code multithreaded and without missing value skipping for maximum performance: ```r fast_function <- function(x, ...) { # Your code... oldopts <- set_collapse(nthreads = 4, na.rm = FALSE) on.exit(set_collapse(oldopts)) # Multithreaded code... } ``` Namespace masking (options `mask` and `remove`) should not be set inside packages because it may have unintended side-effects for the user (e.g., *collapse* appears at the top of the `search()` path afterwards). Conversely, user choices in `set_collapse()` also affect your package code, except for namespace masking as you should specify explicitly which *collapse* functions you are using (e.g., via `importFrom("collapse", "fmean")` in NAMESPACE or `collapse::fmean()` in your code). Particularly options `na.rm`, `nthreads`, and `sort`, if set by the user, will impact your code, unless you explicitly set the targeted arguments (e.g., `nthreads` and `na.rm` in statistical functions like `fmean()`, and `sort` arguments in grouping functions like `GRP()`/`qF()`/`qG()`/`fgroup_by()`). My general view is that this is not necessary - if the user sets `set_collapse(na.rm = FALSE)` because data has no missing values, then it is good if that also speeds up your package functions. However, if your package code generates missing values and expects *collapse* functions to skip them you should take care of this using either `set_collapse()` + `on.exit()` or explicitly setting `na.rm = TRUE` in all relevant functions. Also watch out for internally-grouped aggregations using [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), which are affected by global defaults: ```r fmean(mtcars$mpg, mtcars$cyl) # 4 6 8 # 26.66364 19.74286 15.10000 oldopts <- set_collapse(sort = FALSE) fmean(mtcars$mpg, mtcars$cyl) # 6 4 8 # 19.74286 26.66364 15.10000 ``` Statistical functions do not have `sort` arguments, thus, if it is crucial that the output remains sorted, ensure that a sorted factor, 'qG', or 'GRP' object is passed: ```r fmean(mtcars$mpg, qF(mtcars$cyl, sort = TRUE)) # 4 6 8 # 26.66364 19.74286 15.10000 set_collapse(oldopts) ``` Of course, you can also check which options the user has set and adjust your code, e.g. ```r # Your code ... if(!get_collapse("sort")) { oldopts <- set_collapse(sort = TRUE) on.exit(set_collapse(oldopts)) } # Critical code ... ``` ## Conclusion *collapse* can become a game-changer for your statistical software development in R, enabling you to write programs that effectively run like C while accomplishing complex statistical/data tasks with few lines of code. This however requires taking a closer look at the package, in particular the [documentation](https://fastverse.org/collapse/reference/collapse-documentation.html), and following the advice given in this vignette. collapse/vignettes/collapse_and_data.table.Rmd0000644000176200001440000021621215121640575021245 0ustar liggesusers--- title: "collapse and data.table" subtitle: "Harmony and High Performance" author: "Sebastian Krantz" date: "2021-06-27" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and data.table} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This vignette focuses on using *collapse* with the popular *data.table* package by Matt Dowle and Arun Srinivasan. In contrast to *dplyr* and *plm* whose methods ('grouped_df', 'pseries', 'pdata.frame') *collapse* supports, the integration between *collapse* and *data.table* is hidden in the 'data.frame' methods and *collapse*'s C code. From version 1.6.0 *collapse* seamlessly handles *data.tables*, permitting reference operations (`set*`, `:=`) on data tables created with collapse (`qDT`) or returned from *collapse*'s data manipulation functions (= all functions except `.FAST_FUN`, `.OPERATOR_FUN`, `BY` and `TRA`, see the [NEWS]() for details on the low-level integration). Apart from *data.table* reference semantics, both packages work similarly on the C/C++ side of things, and nicely complement each other in functionality. ## Overview of Both Packages Both *data.table* and *collapse* are high-performance packages that work well together. For effective co-use it is helpful to understand where each has its strengths, what one can do what the other cannot, and where they overlap. Therefore this small comparison: * *data.table* offers an enhanced data frame based class to contain data (including list columns). For this class it provides a concise data manipulation syntax which also includes fast aggregation / slit-apply-combine computing, (rolling, non-equi) joins, keying, reshaping, some time-series functionality like lagging and rolling statistics, set operations on tables and a number of very useful other functions like the fast csv reader, fast switches, list-transpose etc.. *data.table* makes data management, and computations on data very easy and scalable, supporting huge datasets in a very memory efficient way. The package caters well to the end user by compressing an enormous amount of functionality into two square brackets `[]`. Some of the exported functions are great for programming and also support other classes, but a lot of the functionality and optimization of *data.table* happens under the hood and can only be accessed through the non-standard evaluation table `[i, j, by]` syntax. This syntax has a cost of about 1-3 milliseconds for each call. Memory efficiency and thread-parallelization make *data.table* the star performer on huge data. * *collapse* is class-agnostic in nature, supporting vectors, matrices, data frames and non-destructively handling most R classes and objects. It focuses on advanced statistical computing, proving fast column-wise grouped and weighted statistical functions, fast and complex data aggregation and transformations, linear fitting, time series and panel data computations, advanced summary statistics, and recursive processing of lists of data objects. It also includes powerful functions for data manipulation, grouping / factor generation, recoding, handling outliers and missing values. The package default for missing values is `na.rm = TRUE`, which is implemented efficiently in C/C++ in all functions. *collapse* supports both *tidyverse* (piped) and base R / standard evaluation programming. It makes accessible most of it's internal C/C++ based functionality (like grouping objects). *collapse*'s R functions are simple and strongly optimized, i.e. they access the serial C/C++ code quickly, resulting in baseline execution speeds of 10-50 microseconds. All of this makes *collapse* ideal for advanced statistical computing on matrices and larger datasets, and tasks requiring fast programs with repeated function executions. ## Interoperating and some Do's and Dont's Applying *collapse* functions to a data.table always gives a data.table back e.g. ```r library(collapse) library(magrittr) library(data.table) DT <- qDT(wlddev) # collapse::qDT converts objects to data.table using a shallow copy DT %>% gby(country) %>% gv(9:13) %>% fmean # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 # Same thing, but notice that fmean give's NA's for missing countries DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13] # Key: # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NaN 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NaN NaN NaN 43115.10 # 5: Andorra 40083.0911 NaN NaN NaN 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NaN NaN 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 # This also works without magrittr pipes with the collap() function collap(DT, ~ country, fmean, cols = 9:13) # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` By default, *collapse* orders groups in aggregations, which is equivalent to using `keyby` with *data.table*. `gby / fgroup_by` has an argument `sort = FALSE` to yield an unordered grouping equivalent to *data.table*'s `by` on character data^[Grouping on numeric variables in *collapse* is always ordered.]. At this data size *collapse* outperforms *data.table* (which might reverse as data size grows, depending in your computer, the number of *data.table* threads used, and the function in question): ```r library(microbenchmark) microbenchmark(collapse = DT %>% gby(country) %>% get_vars(9:13) %>% fmean, data.table = DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 203.073 212.7285 223.4156 217.1565 225.6230 475.559 100 # data.table 758.623 777.4010 929.5450 793.1655 854.4605 2292.515 100 ``` It is critical to never do something like this: ```r DT[, lapply(.SD, fmean), keyby = country, .SDcols = 9:13] # Key: # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` The reason is that *collapse* functions are S3 generic with methods for vectors, matrices and data frames among others. So you incur a method-dispatch for every column and every group the function is applied to. ```r fmean # function (x, ...) # UseMethod("fmean") # # methods(fmean) # [1] fmean.data.frame* fmean.default* fmean.grouped_df* fmean.list* fmean.matrix* # [6] fmean.units* fmean.zoo* # see '?methods' for accessing help and source code ``` You may now contend that `base::mean` is also S3 generic, but in this `DT[, lapply(.SD, mean, na.rm = TRUE), by = country, .SDcols = 9:13]` code *data.table* does not use `base::mean`, but `data.table:::gmean`, an internal optimized mean function which is efficiently applied over those groups (see `?data.table::GForce`). `fmean` works similar, and includes this functionality explicitly. ```r args(fmean.data.frame) # function (x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], # use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], # ...) # NULL ``` Here we can see the `x` argument for the data, the `g` argument for grouping vectors, a weight vector `w`, different options `TRA` to transform the original data using the computed means, and some functionality regarding missing values (default: removed / skipped), group names (which are added as row-names to a data frame, but not to a *data.table*) etc. So we can also do ```r fmean(gv(DT, 9:13), DT$country) # PCGDP LIFEEX GINI ODA POP # # 1: 483.8351 49.19717 NA 1487548499 18362258.22 # 2: 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: 10071.0659 NA NA NA 43115.10 # 5: 40083.0911 NA NA NA 51547.35 # --- # 212: 35629.7336 73.71292 NA NA 92238.53 # 213: 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: 1219.4360 54.53360 45.93333 397104997 9402160.33 # Or g <- GRP(DT, "country") add_vars(g[["groups"]], fmean(gv(DT, 9:13), g)) # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` To give us the same result obtained through the high-level functions `gby / fgroup_by` or `collap`. This is however not what *data.table* is doing in `DT[, lapply(.SD, fmean), by = country, .SDcols = 9:13]`. Since `fmean` is not a function it recognizes and is able to optimize, it does something like this, ```r BY(gv(DT, 9:13), g, fmean) # using collapse::BY # PCGDP LIFEEX GINI ODA POP # # 1: 483.8351 49.19717 NA 1487548499 18362258.22 # 2: 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: 10071.0659 NA NA NA 43115.10 # 5: 40083.0911 NA NA NA 51547.35 # --- # 212: 35629.7336 73.71292 NA NA 92238.53 # 213: 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` which applies `fmean` to every group in every column of the data. More generally, it is very important to understand that *collapse* is not based around applying functions to data by groups using some universal mechanism: The *dplyr* `data %>% group_by(...) %>% summarize(...) / mutate(...)` and *data.table* `[i, j, by]` syntax are essentially universal mechanisms to apply any function to data by groups. *data.table* additionally internally optimizes some functions (`min, max, mean, median, var, sd, sum, prod, first, last, head, tail`) which they called GForce, `?data.table::GForce`. *collapse* instead provides grouped statistical and transformation functions where all grouped computation is done efficiently in C++, and some supporting mechanisms (`fgroup_by`, `collap`) to operate them. In *data.table* words, everything^[Apart from `collapse::BY` which is only an auxiliary function written in base R to perform flexible split-apply combine computing on vectors, matrices and data frames.] in *collapse*, the *Fast Statistical Functions*, data transformations, time series etc. is GForce optimized. The full set of optimized grouped statistical and transformation functions in *collapse* is: ```r .FAST_FUN # [1] "fmean" "fmedian" "fmode" "fsum" "fprod" "fsd" "fvar" # [8] "fmin" "fmax" "fnth" "ffirst" "flast" "fnobs" "fndistinct" # [15] "fcumsum" "fscale" "fbetween" "fwithin" "fhdbetween" "fhdwithin" "flag" # [22] "fdiff" "fgrowth" ``` Additional optimized grouped functions include `TRA`, `qsu`, `varying`, `fFtest`, `psmat`, `psacf`, `pspacf`, `psccf`. The nice thing about those GForce (fast) functions provided by *collapse* is that they can be accessed explicitly and programmatically without any overhead as incurred through *data.table*, they cover a broader range of statistical operations (such as mode, distinct values, order statistics), support sampling weights, operate in a class-agnostic way on vectors, matrices, data.frame's and many related classes, and cover transformations (replacing and sweeping, scaling, (higher order) centering, linear fitting) and time series functionality (lags, differences and growth rates, including irregular time series and unbalanced panels). So if we would want to use `fmean` inside the *data.table*, we should do something like this: ```r # This does not save the grouping columns, we are simply passing a grouping vector to g # and aggregating the subset of the data table (.SD). DT[, fmean(.SD, country), .SDcols = 9:13] # PCGDP LIFEEX GINI ODA POP # # 1: 483.8351 49.19717 NA 1487548499 18362258.22 # 2: 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: 10071.0659 NA NA NA 43115.10 # 5: 40083.0911 NA NA NA 51547.35 # --- # 212: 35629.7336 73.71292 NA NA 92238.53 # 213: 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: 1219.4360 54.53360 45.93333 397104997 9402160.33 # If we want to keep the grouping columns, we need to group .SD first. DT[, fmean(gby(.SD, country)), .SDcols = c(1L, 9:13)] # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` Needless to say this kind of programming seems a bit arcane, so there is actually not that great of a scope to use collapse's *Fast Statistical Functions* for aggregations inside *data.table*. I drive this point home with a benchmark: ```r microbenchmark(collapse = DT %>% gby(country) %>% get_vars(9:13) %>% fmean, data.table = DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13], data.table_base = DT[, lapply(.SD, base::mean, na.rm = TRUE), keyby = country, .SDcols = 9:13], hybrid_bad = DT[, lapply(.SD, fmean), keyby = country, .SDcols = 9:13], hybrid_ok = DT[, fmean(gby(.SD, country)), .SDcols = c(1L, 9:13)]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 207.419 234.9915 322.3994 255.6760 283.6790 1685.305 100 # data.table 755.630 845.7685 1029.9024 904.6650 962.1060 2409.529 100 # data.table_base 2795.257 3148.4310 4034.2081 3349.8025 3561.9570 37919.916 100 # hybrid_bad 2198.994 2481.3815 3737.1102 2650.5680 2909.4215 62158.747 100 # hybrid_ok 374.699 451.1025 564.6873 484.9275 542.8605 2082.554 100 ``` It is evident that *data.table* has some overhead, so there is absolutely no need to do this kind of syntax manipulation. There is more scope to use *collapse* transformation functions inside *data.table*. Below some basic examples: ```r # Computing a column containing the sum of ODA received by country DT[, sum_ODA := sum(ODA, na.rm = TRUE), by = country] # Same using fsum; "replace_fill" overwrites missing values, "replace" keeps the DT[, sum_ODA := fsum(ODA, country, TRA = "replace_fill")] # Same: A native collapse solution using settransform (or its shortcut form) settfm(DT, sum_ODA = fsum(ODA, country, TRA = "replace_fill")) # settfm may be more convenient than `:=` for multiple column modifications, # each involving a different grouping: # This computes the percentage of total ODA distributed received by # each country both over time and within a given year settfm(DT, perc_c_ODA = fsum(ODA, country, TRA = "%"), perc_y_ODA = fsum(ODA, year, TRA = "%")) ``` The `TRA` argument is available to all *Fast Statistical Functions* (see the macro `.FAST_STAT_FUN`) and offers 10 different replacing and sweeping operations. Note that `TRA()` can also be called directly to replace or sweep with a previously aggregated *data.table*. A set of operators `%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`, `%cr%`, `%c+%`, `%c-%`, `%c*%`, `%c/%` additionally facilitate row- or column-wise replacing or sweeping out vectors of statistics or other *data.table*'s. Similarly, we can use the following vector valued functions ```r setdiff(.FAST_FUN, .FAST_STAT_FUN) # [1] "fcumsum" "fscale" "fbetween" "fwithin" "fhdbetween" "fhdwithin" "flag" # [8] "fdiff" "fgrowth" ``` for very efficient data transformations: ```r # Centering GDP DT[, demean_PCGDP := PCGDP - mean(PCGDP, na.rm = TRUE), by = country] DT[, demean_PCGDP := fwithin(PCGDP, country)] # Lagging GDP DT[order(year), lag_PCGDP := shift(PCGDP, 1L), by = country] DT[, lag_PCGDP := flag(PCGDP, 1L, country, year)] # Computing a growth rate DT[order(year), growth_PCGDP := (PCGDP / shift(PCGDP, 1L) - 1) * 100, by = country] DT[, lag_PCGDP := fgrowth(PCGDP, 1L, 1L, country, year)] # 1 lag, 1 iteration # Several Growth rates DT[order(year), paste0("growth_", .c(PCGDP, LIFEEX, GINI, ODA)) := (.SD / shift(.SD, 1L) - 1) * 100, by = country, .SDcols = 9:13] # Same thing using collapse DT %<>% tfm(gv(., 9:13) %>% fgrowth(1L, 1L, country, year) %>% add_stub("growth_")) # Or even simpler using settransform and the Growth operator settfmv(DT, 9:13, G, 1L, 1L, country, year, apply = FALSE) head(DT) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI # # 1: Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA # 2: Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA # 3: Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA # 4: Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA # 5: Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA # 6: Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA # ODA POP sum_ODA perc_c_ODA perc_y_ODA demean_PCGDP lag_PCGDP growth_PCGDP # # 1: 116769997 8996973 89252909923 0.1308305 0.4441407 NA NA NA # 2: 232080002 9169410 89252909923 0.2600251 0.7356654 NA NA NA # 3: 112839996 9351441 89252909923 0.1264272 0.3494956 NA NA NA # 4: 237720001 9543205 89252909923 0.2663443 0.7003399 NA NA NA # 5: 295920013 9744781 89252909923 0.3315522 0.8570540 NA NA NA # 6: 341839996 9956320 89252909923 0.3830015 0.8992630 NA NA NA # growth_LIFEEX growth_GINI growth_ODA growth_POP G1.PCGDP G1.LIFEEX G1.GINI G1.ODA G1.POP # # 1: NA NA NA NA NA NA NA NA NA # 2: 1.590335 NA 98.74969 1.916611 NA 1.590335 NA 98.74969 1.916611 # 3: 1.544202 NA -51.37884 1.985199 NA 1.544202 NA -51.37884 1.985199 # 4: 1.493830 NA 110.66998 2.050636 NA 1.493830 NA 110.66998 2.050636 # 5: 1.448294 NA 24.48259 2.112246 NA 1.448294 NA 24.48259 2.112246 # 6: 1.407306 NA 15.51770 2.170793 NA 1.407306 NA 15.51770 2.170793 ``` Since transformations (`:=` operations) are not highly optimized in *data.table*, *collapse* will be faster in most circumstances. Also time series functionality in *collapse* is significantly faster as it does not require data to be ordered or balanced to compute. For example `flag` computes an ordered lag without sorting the entire data first. ```r # Lets generate a large dataset and benchmark this stuff DT_large <- replicate(1000, qDT(wlddev), simplify = FALSE) %>% lapply(tfm, country = paste(country, rnorm(1))) %>% rbindlist # 12.7 million Obs fdim(DT_large) # [1] 13176000 13 microbenchmark( S1 = DT_large[, sum_ODA := sum(ODA, na.rm = TRUE), by = country], S2 = DT_large[, sum_ODA := fsum(ODA, country, TRA = "replace_fill")], S3 = settfm(DT_large, sum_ODA = fsum(ODA, country, TRA = "replace_fill")), W1 = DT_large[, demean_PCGDP := PCGDP - mean(PCGDP, na.rm = TRUE), by = country], W2 = DT_large[, demean_PCGDP := fwithin(PCGDP, country)], L1 = DT_large[order(year), lag_PCGDP := shift(PCGDP, 1L), by = country], L2 = DT_large[, lag_PCGDP := flag(PCGDP, 1L, country, year)], L3 = DT_large[, lag_PCGDP := shift(PCGDP, 1L), by = country], # Not ordered L4 = DT_large[, lag_PCGDP := flag(PCGDP, 1L, country)], # Not ordered times = 5 ) # Unit: milliseconds # expr min lq mean median uq max neval # S1 343.03396 347.18443 391.7494 364.51431 379.7866 524.2279 5 # S2 100.52544 101.72645 165.8369 128.76042 153.6818 344.4906 5 # S3 98.48249 104.80830 120.3499 114.20591 127.0192 157.2335 5 # W1 913.00883 1009.29930 1071.0633 1035.74446 1104.7680 1292.4957 5 # W2 99.48199 99.69654 110.0907 113.95884 118.5229 118.7931 5 # L1 1812.59987 1822.58026 1896.8809 1905.67377 1942.9434 2000.6074 5 # L2 110.36056 128.45845 135.0995 133.80219 139.1405 163.7357 5 # L3 611.28392 665.22123 768.0616 718.38679 803.7170 1041.6991 5 # L4 64.26369 66.99006 105.7952 84.26537 106.1809 207.2758 5 rm(DT_large) gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3113072 166.3 8413097 449.4 NA 8413097 449.4 # Vcells 7897587 60.3 324289364 2474.2 16384 405361681 3092.7 ``` ## Further *collapse* features supporting *data.table*'s As mentioned, `qDT` is a flexible and very fast function to create / column-wise convert R objects to *data.table*'s. You can also row-wise convert a matrix to data.table using `mrtl`: ```r # Creating a matrix from mtcars m <- qM(mtcars) str(m) # num [1:32, 1:11] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ... # ..$ : chr [1:11] "mpg" "cyl" "disp" "hp" ... # Demonstrating another nice feature of qDT qDT(m, row.names.col = "car") %>% head # car mpg cyl disp hp drat wt qsec vs am gear carb # # 1: Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 # 2: Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 # 3: Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 # 4: Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 # 5: Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 # 6: Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 # Row-wise conversion to data.table mrtl(m, names = TRUE, return = "data.table") %>% head(2) # Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout Valiant Duster 360 Merc 240D # # 1: 21 21 22.8 21.4 18.7 18.1 14.3 24.4 # 2: 6 6 4.0 6.0 8.0 6.0 8.0 4.0 # Merc 230 Merc 280 Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood # # 1: 22.8 19.2 17.8 16.4 17.3 15.2 10.4 # 2: 4.0 6.0 6.0 8.0 8.0 8.0 8.0 # Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla Toyota Corona # # 1: 10.4 14.7 32.4 30.4 33.9 21.5 # 2: 8.0 8.0 4.0 4.0 4.0 4.0 # Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa # # 1: 15.5 15.2 13.3 19.2 27.3 26 30.4 # 2: 8.0 8.0 8.0 8.0 4.0 4 4.0 # Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E # # 1: 15.8 19.7 15 21.4 # 2: 8.0 6.0 8 4.0 ``` The computational efficiency of these functions makes them very useful to use in *data.table* based workflows. ```r # Benchmark microbenchmark(qDT(m, "car"), mrtl(m, TRUE, "data.table")) # Unit: microseconds # expr min lq mean median uq max neval # qDT(m, "car") 4.838 5.043 6.16230 5.3300 6.437 20.049 100 # mrtl(m, TRUE, "data.table") 3.608 3.854 4.23981 3.9975 4.182 15.908 100 ``` For example we could regress the growth rate of GDP per capita on the Growth rate of life expectancy in each country and save results in a *data.table*: ```r library(lmtest) wlddev %>% fselect(country, PCGDP, LIFEEX) %>% # This counts missing values on PCGDP and LIFEEX only na_omit(cols = -1L) %>% # This removes countries with less than 20 observations fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% # Run estimations by country using data.table .[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX))), "Coef"), keyby = country] %>% head # Key: # country Coef Estimate Std. Error t value Pr(>|t|) # # 1: Albania (Intercept) -3.6146411 2.371885 -1.5239527 0.136023086 # 2: Albania G(LIFEEX) 22.1596308 7.288971 3.0401591 0.004325856 # 3: Algeria (Intercept) 0.5973329 1.740619 0.3431726 0.732731107 # 4: Algeria G(LIFEEX) 0.8412547 1.689221 0.4980134 0.620390703 # 5: Angola (Intercept) -3.3793976 1.540330 -2.1939445 0.034597175 # 6: Angola G(LIFEEX) 4.2362895 1.402380 3.0207852 0.004553260 ``` If we only need the coefficients, not the standard errors, we can also use `collapse::flm` together with `mrtl`: ```r wlddev %>% fselect(country, PCGDP, LIFEEX) %>% na_omit(cols = -1L) %>% fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% .[, mrtl(flm(fgrowth(PCGDP)[-1L], cbind(Intercept = 1, LIFEEX = fgrowth(LIFEEX)[-1L])), TRUE), keyby = country] %>% head # Key: # country Intercept LIFEEX # # 1: Albania -3.61464113 22.1596308 # 2: Algeria 0.59733291 0.8412547 # 3: Angola -3.37939760 4.2362895 # 4: Antigua and Barbuda -3.11880717 18.8700870 # 5: Argentina 1.14613567 -0.2896305 # 6: Armenia 0.08178344 11.5523992 ``` ... which provides a significant speed gain here: ```r microbenchmark( A = wlddev %>% fselect(country, PCGDP, LIFEEX) %>% na_omit(cols = -1L) %>% fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% .[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX))), "Coef"), keyby = country], B = wlddev %>% fselect(country, PCGDP, LIFEEX) %>% na_omit(cols = -1L) %>% fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% .[, mrtl(flm(fgrowth(PCGDP)[-1L], cbind(Intercept = 1, LIFEEX = fgrowth(LIFEEX)[-1L])), TRUE), keyby = country] ) # Unit: milliseconds # expr min lq mean median uq max neval # A 58.914253 60.063381 68.770933 60.865217 73.507813 241.594509 100 # B 3.145766 3.293715 3.463643 3.377006 3.503983 5.378995 100 ``` Another feature to highlight at this point are *collapse*'s list processing functions, in particular `rsplit`, `rapply2d`, `get_elem` and `unlist2d`. `rsplit` is an efficient recursive generalization of `split`: ```r DT_list <- rsplit(DT, country + year + PCGDP + LIFEEX ~ region + income) # Note: rsplit(DT, year + PCGDP + LIFEEX ~ region + income, flatten = TRUE) # would yield a simple list with interacted categories (like split) str(DT_list, give.attr = FALSE) # List of 7 # $ East Asia & Pacific :List of 3 # ..$ High income :Classes 'data.table' and 'data.frame': 793 obs. of 4 variables: # .. ..$ country: chr [1:793] "Australia" "Australia" "Australia" "Australia" ... # .. ..$ year : int [1:793] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:793] 19378 19469 19246 20053 21036 ... # .. ..$ LIFEEX : num [1:793] 70.8 71 70.9 70.9 70.9 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 793 obs. of 4 variables: # .. ..$ country: chr [1:793] "Cambodia" "Cambodia" "Cambodia" "Cambodia" ... # .. ..$ year : int [1:793] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:793] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:793] 41.2 41.4 41.5 41.7 41.9 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 610 obs. of 4 variables: # .. ..$ country: chr [1:610] "American Samoa" "American Samoa" "American Samoa" "American Samoa" ... # .. ..$ year : int [1:610] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:610] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:610] NA NA NA NA NA NA NA NA NA NA ... # $ Europe & Central Asia :List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 2257 obs. of 4 variables: # .. ..$ country: chr [1:2257] "Andorra" "Andorra" "Andorra" "Andorra" ... # .. ..$ year : int [1:2257] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:2257] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:2257] NA NA NA NA NA NA NA NA NA NA ... # ..$ Low income :Classes 'data.table' and 'data.frame': 61 obs. of 4 variables: # .. ..$ country: chr [1:61] "Tajikistan" "Tajikistan" "Tajikistan" "Tajikistan" ... # .. ..$ year : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:61] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:61] 50.6 50.9 51.2 51.5 51.9 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 244 obs. of 4 variables: # .. ..$ country: chr [1:244] "Kyrgyz Republic" "Kyrgyz Republic" "Kyrgyz Republic" "Kyrgyz Republic" ... # .. ..$ year : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:244] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:244] 56.1 56.6 57 57.4 57.9 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 976 obs. of 4 variables: # .. ..$ country: chr [1:976] "Albania" "Albania" "Albania" "Albania" ... # .. ..$ year : int [1:976] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:976] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:976] 62.3 63.3 64.2 64.9 65.5 ... # $ Latin America & Caribbean :List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 1037 obs. of 4 variables: # .. ..$ country: chr [1:1037] "Antigua and Barbuda" "Antigua and Barbuda" "Antigua and Barbuda" "Antigua and Barbuda" ... # .. ..$ year : int [1:1037] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1037] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:1037] 62 62.5 63 63.5 64 ... # ..$ Low income :Classes 'data.table' and 'data.frame': 61 obs. of 4 variables: # .. ..$ country: chr [1:61] "Haiti" "Haiti" "Haiti" "Haiti" ... # .. ..$ year : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:61] 1512 1439 1523 1466 1414 ... # .. ..$ LIFEEX : num [1:61] 41.8 42.2 42.6 43 43.4 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 244 obs. of 4 variables: # .. ..$ country: chr [1:244] "Bolivia" "Bolivia" "Bolivia" "Bolivia" ... # .. ..$ year : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:244] 1005 1007 1042 1091 1112 ... # .. ..$ LIFEEX : num [1:244] 41.8 42.1 42.5 42.8 43.2 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 1220 obs. of 4 variables: # .. ..$ country: chr [1:1220] "Argentina" "Argentina" "Argentina" "Argentina" ... # .. ..$ year : int [1:1220] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1220] 5643 5853 5711 5323 5773 ... # .. ..$ LIFEEX : num [1:1220] 65.1 65.2 65.3 65.3 65.4 ... # $ Middle East & North Africa:List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 488 obs. of 4 variables: # .. ..$ country: chr [1:488] "Bahrain" "Bahrain" "Bahrain" "Bahrain" ... # .. ..$ year : int [1:488] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:488] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:488] 51.9 53.2 54.6 55.9 57.2 ... # ..$ Low income :Classes 'data.table' and 'data.frame': 122 obs. of 4 variables: # .. ..$ country: chr [1:122] "Syrian Arab Republic" "Syrian Arab Republic" "Syrian Arab Republic" "Syrian Arab Republic" ... # .. ..$ year : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:122] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:122] 52 52.6 53.2 53.8 54.4 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 305 obs. of 4 variables: # .. ..$ country: chr [1:305] "Djibouti" "Djibouti" "Djibouti" "Djibouti" ... # .. ..$ year : int [1:305] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:305] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:305] 44 44.5 44.9 45.3 45.7 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 366 obs. of 4 variables: # .. ..$ country: chr [1:366] "Algeria" "Algeria" "Algeria" "Algeria" ... # .. ..$ year : int [1:366] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:366] 2481 2091 1638 2146 2214 ... # .. ..$ LIFEEX : num [1:366] 46.1 46.6 47.1 47.5 48 ... # $ North America :List of 1 # ..$ High income:Classes 'data.table' and 'data.frame': 183 obs. of 4 variables: # .. ..$ country: chr [1:183] "Bermuda" "Bermuda" "Bermuda" "Bermuda" ... # .. ..$ year : int [1:183] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:183] 33363 34080 34763 34324 37202 ... # .. ..$ LIFEEX : num [1:183] NA NA NA NA NA ... # $ South Asia :List of 3 # ..$ Low income :Classes 'data.table' and 'data.frame': 122 obs. of 4 variables: # .. ..$ country: chr [1:122] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ... # .. ..$ year : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:122] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:122] 32.4 33 33.5 34 34.5 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 244 obs. of 4 variables: # .. ..$ country: chr [1:244] "Bangladesh" "Bangladesh" "Bangladesh" "Bangladesh" ... # .. ..$ year : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:244] 372 384 394 381 411 ... # .. ..$ LIFEEX : num [1:244] 45.4 46 46.6 47.1 47.6 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 122 obs. of 4 variables: # .. ..$ country: chr [1:122] "Maldives" "Maldives" "Maldives" "Maldives" ... # .. ..$ year : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:122] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:122] 37.3 37.9 38.6 39.2 39.9 ... # $ Sub-Saharan Africa :List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 61 obs. of 4 variables: # .. ..$ country: chr [1:61] "Seychelles" "Seychelles" "Seychelles" "Seychelles" ... # .. ..$ year : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:61] 2830 2617 2763 2966 3064 ... # .. ..$ LIFEEX : num [1:61] NA NA NA NA NA NA NA NA NA NA ... # ..$ Low income :Classes 'data.table' and 'data.frame': 1464 obs. of 4 variables: # .. ..$ country: chr [1:1464] "Benin" "Benin" "Benin" "Benin" ... # .. ..$ year : int [1:1464] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1464] 712 724 689 710 745 ... # .. ..$ LIFEEX : num [1:1464] 37.3 37.7 38.2 38.7 39.1 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 1037 obs. of 4 variables: # .. ..$ country: chr [1:1037] "Angola" "Angola" "Angola" "Angola" ... # .. ..$ year : int [1:1037] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1037] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:1037] 37.5 37.8 38.1 38.4 38.8 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 366 obs. of 4 variables: # .. ..$ country: chr [1:366] "Botswana" "Botswana" "Botswana" "Botswana" ... # .. ..$ year : int [1:366] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:366] 408 425 444 460 480 ... # .. ..$ LIFEEX : num [1:366] 49.2 49.7 50.2 50.6 51.1 ... ``` We can use `rapply2d` to apply a function to each data frame / data.table in an arbitrary nested structure: ```r # This runs region-income level regressions, with country fixed effects # following Mundlak (1978) lm_summary_list <- DT_list %>% rapply2d(lm, formula = G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)) %>% # Summarizing the results rapply2d(summary, classes = "lm") # This is a nested list of linear model summaries str(lm_summary_list, give.attr = FALSE) # List of 7 # $ East Asia & Pacific :List of 3 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:441] -1.64 -2.59 2.75 3.45 2.48 ... # .. ..$ coefficients : num [1:3, 1:4] 0.531 2.494 3.83 0.706 0.759 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.59 # .. ..$ df : int [1:3] 3 438 3 # .. ..$ r.squared : num 0.0525 # .. ..$ adj.r.squared: num 0.0481 # .. ..$ fstatistic : Named num [1:3] 12.1 2 438 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.02361 -0.00158 -0.04895 -0.00158 0.02728 ... # .. ..$ na.action : 'omit' Named int [1:352] 1 61 62 63 64 65 66 67 68 69 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:549] -39.6968 3.6618 -0.0944 -1.8261 -1.0491 ... # .. ..$ coefficients : num [1:3, 1:4] 1.348 0.524 0.949 0.701 0.757 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 5.4 # .. ..$ df : int [1:3] 3 546 3 # .. ..$ r.squared : num 0.00471 # .. ..$ adj.r.squared: num 0.00106 # .. ..$ fstatistic : Named num [1:3] 1.29 2 546 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.016821 0.000511 -0.022767 0.000511 0.01965 ... # .. ..$ na.action : 'omit' Named int [1:244] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:312] -32.29 -11.61 2.91 11.23 10.28 ... # .. ..$ coefficients : num [1:3, 1:4] 1.507 -0.547 4.816 0.428 0.478 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.39 # .. ..$ df : int [1:3] 3 309 3 # .. ..$ r.squared : num 0.103 # .. ..$ adj.r.squared: num 0.0976 # .. ..$ fstatistic : Named num [1:3] 17.8 2 309 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.009471 0.000492 -0.013551 0.000492 0.011842 ... # .. ..$ na.action : 'omit' Named int [1:298] 1 2 3 4 5 6 7 8 9 10 ... # $ Europe & Central Asia :List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:1355] 2.706 -0.548 1.001 3.034 0.257 ... # .. ..$ coefficients : num [1:3, 1:4] 3.254 -0.172 -2.506 0.407 0.227 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.3 # .. ..$ df : int [1:3] 3 1352 3 # .. ..$ r.squared : num 0.00257 # .. ..$ adj.r.squared: num 0.00109 # .. ..$ fstatistic : Named num [1:3] 1.74 2 1352 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.015254 -0.000863 -0.05461 -0.000863 0.004722 ... # .. ..$ na.action : 'omit' Named int [1:902] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:34] 0.166 -1.804 15.949 -0.778 7.165 ... # .. ..$ coefficients : num [1:2, 1:4] -5.31 9.36 2.03 2.56 -2.61 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 8.43 # .. ..$ df : int [1:3] 2 32 3 # .. ..$ r.squared : num 0.295 # .. ..$ adj.r.squared: num 0.273 # .. ..$ fstatistic : Named num [1:3] 13.4 1 32 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.0582 -0.0514 -0.0514 0.092 # .. ..$ na.action : 'omit' Named int [1:27] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:121] -1.626 8.745 -14.47 0.298 -11.886 ... # .. ..$ coefficients : num [1:3, 1:4] 0.106 4.631 1.499 1.315 0.938 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 6.02 # .. ..$ df : int [1:3] 3 118 3 # .. ..$ r.squared : num 0.178 # .. ..$ adj.r.squared: num 0.164 # .. ..$ fstatistic : Named num [1:3] 12.7 2 118 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.047775 -0.000927 -0.142782 -0.000927 0.024298 ... # .. ..$ na.action : 'omit' Named int [1:123] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:511] 0.761 -2.153 -4.091 -6.476 -3.43 ... # .. ..$ coefficients : num [1:3, 1:4] 2.983 4.147 -3.351 0.698 0.779 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 8.28 # .. ..$ df : int [1:3] 3 508 3 # .. ..$ r.squared : num 0.0531 # .. ..$ adj.r.squared: num 0.0493 # .. ..$ fstatistic : Named num [1:3] 14.2 2 508 # .. ..$ cov.unscaled : num [1:3, 1:3] 7.11e-03 4.52e-05 -1.45e-02 4.52e-05 8.85e-03 ... # .. ..$ na.action : 'omit' Named int [1:465] 1 2 3 4 5 6 7 8 9 10 ... # $ Latin America & Caribbean :List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:487] 2.39 6.02 6.1 1.71 -2.27 ... # .. ..$ coefficients : num [1:3, 1:4] 1.015 0.483 2.613 0.677 0.952 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.71 # .. ..$ df : int [1:3] 3 484 3 # .. ..$ r.squared : num 0.00592 # .. ..$ adj.r.squared: num 0.00181 # .. ..$ fstatistic : Named num [1:3] 1.44 2 484 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.02062 0.00155 -0.05714 0.00155 0.04082 ... # .. ..$ na.action : 'omit' Named int [1:550] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:59] -5.667 5.091 -4.46 -4.224 -0.526 ... # .. ..$ coefficients : num [1:2, 1:4] -3.18 4.02 1.73 2.28 -1.83 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 3.79 # .. ..$ df : int [1:3] 2 57 3 # .. ..$ r.squared : num 0.0516 # .. ..$ adj.r.squared: num 0.0349 # .. ..$ fstatistic : Named num [1:3] 3.1 1 57 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.209 -0.265 -0.265 0.364 # .. ..$ na.action : 'omit' Named int [1:2] 1 61 # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:231] -1.386 2.029 3.213 0.413 1.334 ... # .. ..$ coefficients : num [1:3, 1:4] -1.678 -0.479 3.896 2.26 0.709 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.96 # .. ..$ df : int [1:3] 3 228 3 # .. ..$ r.squared : num 0.0081 # .. ..$ adj.r.squared: num -0.000602 # .. ..$ fstatistic : Named num [1:3] 0.931 2 228 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.3264 0.005 -0.4084 0.005 0.0321 ... # .. ..$ na.action : 'omit' Named int [1:13] 1 61 62 63 64 65 66 67 122 123 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:1065] 1.97 -4.16 -8.5 6.72 7.17 ... # .. ..$ coefficients : num [1:3, 1:4] 1.681 0.583 -0.124 0.353 0.512 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.22 # .. ..$ df : int [1:3] 3 1062 3 # .. ..$ r.squared : num 0.0016 # .. ..$ adj.r.squared: num -0.000283 # .. ..$ fstatistic : Named num [1:3] 0.85 2 1062 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.006982 0.000348 -0.013936 0.000348 0.014734 ... # .. ..$ na.action : 'omit' Named int [1:155] 1 61 62 122 123 183 184 244 245 305 ... # $ Middle East & North Africa:List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:334] -10.728 -11.988 2.151 0.985 -8.618 ... # .. ..$ coefficients : num [1:3, 1:4] 1.929 3.963 -3.533 1.102 0.996 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 8.36 # .. ..$ df : int [1:3] 3 331 3 # .. ..$ r.squared : num 0.0456 # .. ..$ adj.r.squared: num 0.0399 # .. ..$ fstatistic : Named num [1:3] 7.91 2 331 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.01738 0.00101 -0.02441 0.00101 0.01419 ... # .. ..$ na.action : 'omit' Named int [1:154] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:29] 0.468 3.424 0.415 3.842 3.342 ... # .. ..$ coefficients : num [1:2, 1:4] -6.91 11.38 2.11 3.64 -3.27 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 6.05 # .. ..$ df : int [1:3] 2 27 3 # .. ..$ r.squared : num 0.266 # .. ..$ adj.r.squared: num 0.239 # .. ..$ fstatistic : Named num [1:3] 9.81 1 27 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.122 -0.178 -0.178 0.361 # .. ..$ na.action : 'omit' Named int [1:93] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:191] -0.95 -2.047 4.541 5.594 -0.723 ... # .. ..$ coefficients : num [1:3, 1:4] 2.238 1.271 -0.647 1.002 0.599 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.94 # .. ..$ df : int [1:3] 3 188 3 # .. ..$ r.squared : num 0.0244 # .. ..$ adj.r.squared: num 0.014 # .. ..$ fstatistic : Named num [1:3] 2.35 2 188 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.06471 -0.00043 -0.07801 -0.00043 0.02309 ... # .. ..$ na.action : 'omit' Named int [1:114] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:263] -18.068 -23.976 28.692 0.858 1.141 ... # .. ..$ coefficients : num [1:3, 1:4] 2.663 0.718 -1.19 3.538 1.318 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 13.8 # .. ..$ df : int [1:3] 3 260 3 # .. ..$ r.squared : num 0.00119 # .. ..$ adj.r.squared: num -0.00649 # .. ..$ fstatistic : Named num [1:3] 0.155 2 260 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.065741 0.000795 -0.084456 0.000795 0.009122 ... # .. ..$ na.action : 'omit' Named int [1:103] 1 61 62 122 123 124 125 126 127 128 ... # $ North America :List of 1 # ..$ High income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:137] 4.6986 -3.1098 1.8243 0.5643 0.0176 ... # .. ..$ coefficients : num [1:3, 1:4] 6.542 -1.461 -19.53 2.272 0.662 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 2.49 # .. ..$ df : int [1:3] 3 134 3 # .. ..$ r.squared : num 0.0657 # .. ..$ adj.r.squared: num 0.0518 # .. ..$ fstatistic : Named num [1:3] 4.71 2 134 # .. ..$ cov.unscaled : num [1:3, 1:3] 8.36e-01 1.59e-17 -3.60 1.59e-17 7.10e-02 ... # .. ..$ na.action : 'omit' Named int [1:46] 1 2 3 4 5 6 7 8 9 10 ... # $ South Asia :List of 3 # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:76] 0.544 -6.17 3.951 -0.964 7.829 ... # .. ..$ coefficients : num [1:3, 1:4] -108.62 -1.72 96.06 174.19 1.25 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.7 # .. ..$ df : int [1:3] 3 73 3 # .. ..$ r.squared : num 0.0494 # .. ..$ adj.r.squared: num 0.0233 # .. ..$ fstatistic : Named num [1:3] 1.9 2 73 # .. ..$ cov.unscaled : num [1:3, 1:3] 2210.639 -6.979 -1875.261 -6.979 0.114 ... # .. ..$ na.action : 'omit' Named int [1:46] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:216] 0.294 -0.293 -6.067 4.954 -4.164 ... # .. ..$ coefficients : num [1:3, 1:4] -2.232 0.238 5.972 1.074 0.493 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.44 # .. ..$ df : int [1:3] 3 213 3 # .. ..$ r.squared : num 0.111 # .. ..$ adj.r.squared: num 0.103 # .. ..$ fstatistic : Named num [1:3] 13.3 2 213 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.09757 -0.00201 -0.10483 -0.00201 0.02054 ... # .. ..$ na.action : 'omit' Named int [1:28] 1 61 62 63 64 65 66 67 68 69 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:82] 3.262 3.976 3.128 1.67 -0.901 ... # .. ..$ coefficients : num [1:3, 1:4] 3.859 -0.577 -0.476 1.036 1.365 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.25 # .. ..$ df : int [1:3] 3 79 3 # .. ..$ r.squared : num 0.00622 # .. ..$ adj.r.squared: num -0.0189 # .. ..$ fstatistic : Named num [1:3] 0.247 2 79 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.0595 -0.028 -0.0473 -0.028 0.1034 ... # .. ..$ na.action : 'omit' Named int [1:40] 1 2 3 4 5 6 7 8 9 10 ... # $ Sub-Saharan Africa :List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:39] -11.33 -5.041 -3.158 0.585 7.81 ... # .. ..$ coefficients : num [1:2, 1:4] 2.551 -0.644 0.775 0.55 3.293 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 4.8 # .. ..$ df : int [1:3] 2 37 3 # .. ..$ r.squared : num 0.0357 # .. ..$ adj.r.squared: num 0.00959 # .. ..$ fstatistic : Named num [1:3] 1.37 1 37 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.026 -0.00217 -0.00217 0.01312 # .. ..$ na.action : 'omit' Named int [1:22] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:1085] 0.694 -5.869 2.069 3.855 2.415 ... # .. ..$ coefficients : num [1:3, 1:4] -0.0756 0.5308 0.5124 0.8887 0.137 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 5.88 # .. ..$ df : int [1:3] 3 1082 3 # .. ..$ r.squared : num 0.0146 # .. ..$ adj.r.squared: num 0.0128 # .. ..$ fstatistic : Named num [1:3] 8.01 2 1082 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.022858 -0.000025 -0.025534 -0.000025 0.000543 ... # .. ..$ na.action : 'omit' Named int [1:379] 1 61 62 122 123 183 184 244 245 305 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:891] -8.2839 -4.0289 0.0449 1.8231 -0.5267 ... # .. ..$ coefficients : num [1:3, 1:4] 2.352 0.782 -2.616 0.608 0.169 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 5.27 # .. ..$ df : int [1:3] 3 888 3 # .. ..$ r.squared : num 0.0277 # .. ..$ adj.r.squared: num 0.0255 # .. ..$ fstatistic : Named num [1:3] 12.7 2 888 # .. ..$ cov.unscaled : num [1:3, 1:3] 1.33e-02 -1.13e-05 -2.00e-02 -1.13e-05 1.02e-03 ... # .. ..$ na.action : 'omit' Named int [1:146] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:298] 0.7659 0.9133 0.0921 0.996 0.0765 ... # .. ..$ coefficients : num [1:3, 1:4] 0.584 0.456 4.112 2.472 0.652 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 11.4 # .. ..$ df : int [1:3] 3 295 3 # .. ..$ r.squared : num 0.00658 # .. ..$ adj.r.squared: num -0.000152 # .. ..$ fstatistic : Named num [1:3] 0.977 2 295 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.047213 0.000438 -0.070778 0.000438 0.003285 ... # .. ..$ na.action : 'omit' Named int [1:68] 1 61 62 63 64 65 66 67 68 69 ... ``` We can turn this list into a *data.table* again by calling first `get_elem` to recursively extract the coefficient matrices and then `unlist2d` to recursively bind them to a new *data.table*: ```r lm_summary_list %>% get_elem("coefficients") %>% unlist2d(idcols = .c(Region, Income), row.names = "Coef", DT = TRUE) %>% head # Region Income Coef Estimate Std. Error t value # # 1: East Asia & Pacific High income (Intercept) 0.5313479 0.7058550 0.7527720 # 2: East Asia & Pacific High income G(LIFEEX) 2.4935584 0.7586943 3.2866443 # 3: East Asia & Pacific High income B(G(LIFEEX), country) 3.8297123 1.6916770 2.2638554 # 4: East Asia & Pacific Lower middle income (Intercept) 1.3476602 0.7008556 1.9228785 # 5: East Asia & Pacific Lower middle income G(LIFEEX) 0.5238856 0.7574904 0.6916069 # 6: East Asia & Pacific Lower middle income B(G(LIFEEX), country) 0.9494439 1.2031228 0.7891496 # Pr(>|t|) # # 1: 0.451991327 # 2: 0.001095466 # 3: 0.024071386 # 4: 0.055015131 # 5: 0.489478164 # 6: 0.430367103 ``` The fact that this is a nested list of matrices, and that we can save both the names of the lists at each level of nesting and the row- and column- names of the matrices make `unlist2d` a significant generalization of `rbindlist`^[`unlist2d` can similarly bind nested lists of arrays, data frames or *data.table*'s]. But why do all this fuzz if we could have simply done:? ```r DT[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country))), "Coef"), keyby = .(region, income)] %>% head # Key: # region income Coef Estimate Std. Error t value # # 1: East Asia & Pacific High income (Intercept) 0.5313479 0.7058550 0.7527720 # 2: East Asia & Pacific High income G(LIFEEX) 2.4935584 0.7586943 3.2866443 # 3: East Asia & Pacific High income B(G(LIFEEX), country) 3.8297123 1.6916770 2.2638554 # 4: East Asia & Pacific Lower middle income (Intercept) 1.3476602 0.7008556 1.9228785 # 5: East Asia & Pacific Lower middle income G(LIFEEX) 0.5238856 0.7574904 0.6916069 # 6: East Asia & Pacific Lower middle income B(G(LIFEEX), country) 0.9494439 1.2031228 0.7891496 # Pr(>|t|) # # 1: 0.451991327 # 2: 0.001095466 # 3: 0.024071386 # 4: 0.055015131 # 5: 0.489478164 # 6: 0.430367103 ``` Well we might want to do more things with that list of linear models first before tidying it, so this is a more general workflow. We might also be interested in additional statistics like the R-squared or the F-statistic: ```r DT_sum <- lm_summary_list %>% get_elem("coef|r.sq|fstat", regex = TRUE) %>% unlist2d(idcols = .c(Region, Income, Statistic), row.names = "Coef", DT = TRUE) head(DT_sum) # Region Income Statistic Coef Estimate Std. Error # # 1: East Asia & Pacific High income coefficients (Intercept) 0.5313479 0.7058550 # 2: East Asia & Pacific High income coefficients G(LIFEEX) 2.4935584 0.7586943 # 3: East Asia & Pacific High income coefficients B(G(LIFEEX), country) 3.8297123 1.6916770 # 4: East Asia & Pacific High income r.squared NA NA # 5: East Asia & Pacific High income adj.r.squared NA NA # 6: East Asia & Pacific High income fstatistic NA NA # t value Pr(>|t|) V1 value numdf dendf # # 1: 0.752772 0.451991327 NA NA NA NA # 2: 3.286644 0.001095466 NA NA NA NA # 3: 2.263855 0.024071386 NA NA NA NA # 4: NA NA 0.05245359 NA NA NA # 5: NA NA 0.04812690 NA NA NA # 6: NA NA NA 12.12325 2 438 # Reshaping to long form: DT_sum %>% melt(1:4, na.rm = TRUE) %>% roworderv(1:2) %>% head(20) # Region Income Statistic Coef variable # # 1: East Asia & Pacific High income coefficients (Intercept) Estimate # 2: East Asia & Pacific High income coefficients G(LIFEEX) Estimate # 3: East Asia & Pacific High income coefficients B(G(LIFEEX), country) Estimate # 4: East Asia & Pacific High income coefficients (Intercept) Std. Error # 5: East Asia & Pacific High income coefficients G(LIFEEX) Std. Error # 6: East Asia & Pacific High income coefficients B(G(LIFEEX), country) Std. Error # 7: East Asia & Pacific High income coefficients (Intercept) t value # 8: East Asia & Pacific High income coefficients G(LIFEEX) t value # 9: East Asia & Pacific High income coefficients B(G(LIFEEX), country) t value # 10: East Asia & Pacific High income coefficients (Intercept) Pr(>|t|) # 11: East Asia & Pacific High income coefficients G(LIFEEX) Pr(>|t|) # 12: East Asia & Pacific High income coefficients B(G(LIFEEX), country) Pr(>|t|) # 13: East Asia & Pacific High income r.squared V1 # 14: East Asia & Pacific High income adj.r.squared V1 # 15: East Asia & Pacific High income fstatistic value # 16: East Asia & Pacific High income fstatistic numdf # 17: East Asia & Pacific High income fstatistic dendf # 18: East Asia & Pacific Lower middle income coefficients (Intercept) Estimate # 19: East Asia & Pacific Lower middle income coefficients G(LIFEEX) Estimate # 20: East Asia & Pacific Lower middle income coefficients B(G(LIFEEX), country) Estimate # Region Income Statistic Coef variable # value # # 1: 5.313479e-01 # 2: 2.493558e+00 # 3: 3.829712e+00 # 4: 7.058550e-01 # 5: 7.586943e-01 # 6: 1.691677e+00 # 7: 7.527720e-01 # 8: 3.286644e+00 # 9: 2.263855e+00 # 10: 4.519913e-01 # 11: 1.095466e-03 # 12: 2.407139e-02 # 13: 5.245359e-02 # 14: 4.812690e-02 # 15: 1.212325e+01 # 16: 2.000000e+00 # 17: 4.380000e+02 # 18: 1.347660e+00 # 19: 5.238856e-01 # 20: 9.494439e-01 # value ``` As a final example of this kind, lets suppose we are interested in the within-country correlations of all these variables by region and income group: ```r DT[, qDT(pwcor(W(.SD, country)), "Variable"), keyby = .(region, income), .SDcols = PCGDP:ODA] %>% head # Key: # region income Variable W.PCGDP W.LIFEEX W.GINI W.ODA # # 1: East Asia & Pacific High income W.PCGDP 1.0000000 0.7562668 0.6253844 -0.25258496 # 2: East Asia & Pacific High income W.LIFEEX 0.7562668 1.0000000 0.3191255 -0.33611662 # 3: East Asia & Pacific High income W.GINI 0.6253844 0.3191255 1.0000000 NA # 4: East Asia & Pacific High income W.ODA -0.2525850 -0.3361166 NA 1.00000000 # 5: East Asia & Pacific Lower middle income W.PCGDP 1.0000000 0.4685618 0.4428879 -0.02508852 # 6: East Asia & Pacific Lower middle income W.LIFEEX 0.4685618 1.0000000 0.3231520 0.09356733 ``` In summary: The list processing features, statistical capabilities and efficient converters of *collapse* and the flexibility of *data.table* work well together, facilitating more complex workflows. ## Additional Benchmarks See [here]() or [here](). These are all run on a 2 core laptop, so I honestly don't know how *collapse* scales on powerful multi-core machines. My own limited computational resources are part of the reason I did not opt for a thread-parallel package from the start. But a multi-core version of *collapse* will eventually be released, maybe by end of 2021. ## References Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†*Econometrica* 46 (1): 69–85. collapse/vignettes/collapse_and_dplyr.Rmd0000644000176200001440000035757715121640575020424 0ustar liggesusers--- title: "collapse and dplyr" subtitle: "Fast (Weighted) Aggregations and Transformations in a Piped Workflow" author: "Sebastian Krantz" date: "2021-01-04" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and dplyr} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} params: cache: true --- This vignette focuses on the integration of *collapse* and the popular *dplyr* package by Hadley Wickham. In particular it will demonstrate how using *collapse*'s fast functions and some fast alternatives for *dplyr* verbs can substantially facilitate and speed up basic data manipulation, grouped and weighted aggregations and transformations, and panel data computations (i.e. between- and within-transformations, panel-lags, differences and growth rates) in a *dplyr* (piped) workflow. *** **Notes:** - This vignette is targeted at *dplyr* / *tidyverse* users. *collapse* is a standalone package and can be programmed efficiently without pipes or *dplyr* verbs. - The 'Introduction to *collapse*' vignette provides a thorough introduction to the package and a built-in structured documentation is available under `help("collapse-documentation")` after installing the package. In addition `help("collapse-package")` provides a compact set of examples for quick-start. - Documentation and vignettes can also be viewed [online](). *** ## 1. Fast Aggregations A key feature of *collapse* is it's broad set of *Fast Statistical Functions* (`fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct`) which are able to substantially speed-up column-wise, grouped and weighted computations on vectors, matrices or data frames. The functions are S3 generic, with a default (vector), matrix and data frame method, as well as a grouped_df method for grouped tibbles used by *dplyr*. The grouped tibble method has the following arguments: ```r FUN.grouped_df(x, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = FALSE, keep.group_vars = TRUE, [keep.w = TRUE,] ...) ``` where `w` is a weight variable, and `TRA` and can be used to transform `x` using the computed statistics and one of 10 available transformations (`"replace_fill", "replace", "-", "-+", "/", "%", "+", "*", "%%", "-%%"`, discussed in section 2). `na.rm` efficiently removes missing values and is `TRUE` by default. `use.g.names` generates new row-names from the unique combinations of groups (default: disabled), whereas `keep.group_vars` (default: enabled) will keep the grouping columns as is custom in the native `data %>% group_by(...) %>% summarize(...)` workflow in *dplyr*. Finally, `keep.w` regulates whether a weighting variable used is also aggregated and saved in a column. For `fsum, fmean, fmedian, fnth, fvar, fsd` and `fmode` this will compute the sum of the weights in each group, whereas `fprod` returns the product of the weights. With that in mind, let's consider some straightforward applications. ### 1.1 Simple Aggregations Consider the Groningen Growth and Development Center 10-Sector Database included in *collapse* and introduced in the main vignette: ```r library(collapse) head(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA # 3 BWA SSA Sub-saharan Africa VA 1962 NA NA NA NA # 4 BWA SSA Sub-saharan Africa VA 1963 NA NA NA NA # 5 BWA SSA Sub-saharan Africa VA 1964 16.30154 3.494075 0.7365696 0.1043936 # 6 BWA SSA Sub-saharan Africa VA 1965 15.72700 2.495768 1.0181992 0.1350976 # CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA # 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229 # 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710 # Summarize the Data: # descr(GGDC10S, cols = is_categorical) # aperm(qsu(GGDC10S, ~Variable, cols = is.numeric)) # Efficiently converting to tibble (no deep copy) GGDC10S <- qTBL(GGDC10S) ``` Simple column-wise computations using the fast functions and pipe operators are performed as follows: ```r library(dplyr) GGDC10S %>% fnobs # Number of Observations # Country Regioncode Region Variable Year AGR MIN MAN PU # 5027 5027 5027 5027 5027 4364 4355 4355 4354 # CON WRT TRA FIRE GOV OTH SUM # 4355 4355 4355 4355 3482 4248 4364 GGDC10S %>% fndistinct # Number of distinct values # Country Regioncode Region Variable Year AGR MIN MAN PU # 43 6 6 2 67 4353 4224 4353 4237 # CON WRT TRA FIRE GOV OTH SUM # 4339 4344 4334 4349 3470 4238 4364 GGDC10S %>% select_at(6:16) %>% fmedian # Median # AGR MIN MAN PU CON WRT TRA FIRE GOV # 4394.5194 173.2234 3718.0981 167.9500 1473.4470 3773.6430 1174.8000 960.1251 3928.5127 # OTH SUM # 1433.1722 23186.1936 GGDC10S %>% select_at(6:16) %>% fmean # Mean # AGR MIN MAN PU CON WRT TRA FIRE GOV # 2526696.5 1867908.9 5538491.4 335679.5 1801597.6 3392909.5 1473269.7 1657114.8 1712300.3 # OTH SUM # 1684527.3 21566436.8 GGDC10S %>% fmode # Mode # Country Regioncode Region Variable Year # "USA" "ASI" "Asia" "EMP" "2010" # AGR MIN MAN PU CON # "171.315882316326" "0" "4645.12507642586" "0" "1.34623115930777" # WRT TRA FIRE GOV OTH # "21.8380052682527" "8.97743416914571" "40.0701608636442" "0" "3626.84423577048" # SUM # "37.4822945751317" GGDC10S %>% fmode(drop = FALSE) # Keep data structure intact # # A tibble: 1 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # * # 1 USA ASI Asia EMP 2010 171. 0 4645. 0 1.35 21.8 8.98 40.1 0 # # ℹ 2 more variables: OTH , SUM ``` Moving on to grouped statistics, we can compute the average value added and employment by sector and country using: ```r GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmean # # A tibble: 85 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1420. 52.1 1932. 1.02e2 7.42e2 1.98e3 6.49e2 628. 2043. 9.92e2 1.05e4 # 2 EMP BOL 964. 56.0 235. 5.35e0 1.23e2 2.82e2 1.15e2 44.6 NA 3.96e2 2.22e3 # 3 EMP BRA 17191. 206. 6991. 3.65e2 3.52e3 8.51e3 2.05e3 4414. 5307. 5.71e3 5.43e4 # 4 EMP BWA 188. 10.5 18.1 3.09e0 2.53e1 3.63e1 8.36e0 15.3 61.1 2.76e1 3.94e2 # 5 EMP CHL 702. 101. 625. 2.94e1 2.96e2 6.95e2 2.58e2 272. NA 1.00e3 3.98e3 # 6 EMP CHN 287744. 7050. 67144. 1.61e3 2.09e4 2.89e4 1.39e4 4929. 22669. 3.10e4 4.86e5 # 7 EMP COL 3091. 145. 1175. 3.39e1 5.24e2 2.07e3 4.70e2 649. NA 1.73e3 9.89e3 # 8 EMP CRI 231. 1.70 136. 1.43e1 5.76e1 1.57e2 4.24e1 54.9 128. 6.51e1 8.87e2 # 9 EMP DEW 2490. 407. 8473. 2.26e2 2.09e3 4.44e3 1.48e3 1689. 3945. 9.99e2 2.62e4 # 10 EMP DNK 236. 8.03 507. 1.38e1 1.71e2 4.55e2 1.61e2 181. 549. 1.11e2 2.39e3 # # ℹ 75 more rows ``` Similarly we can aggregate using any other of the above functions. It is important to not use *dplyr*'s `summarize` together with these functions since that would eliminate their speed gain. These functions are fast because they are executed only once and carry out the grouped computations in C++, whereas `summarize` will apply the function to each group in the grouped tibble. *** #### Excursus: What is Happening Behind the Scenes? To better explain this point it is perhaps good to shed some light on what is happening behind the scenes of *dplyr* and *collapse*. Fundamentally both packages follow different computing paradigms: *dplyr* is an efficient implementation of the Split-Apply-Combine computing paradigm. Data is split into groups, these data-chunks are then passed to a function carrying out the computation, and finally recombined to produce the aggregated data.frame. This modus operandi is evident in the grouping mechanism of *dplyr*. When a data.frame is passed through *group_by*, a 'groups' attribute is attached: ```r GGDC10S %>% group_by(Variable, Country) %>% attr("groups") # # A tibble: 85 × 3 # Variable Country .rows # > # 1 EMP ARG [62] # 2 EMP BOL [61] # 3 EMP BRA [62] # 4 EMP BWA [52] # 5 EMP CHL [63] # 6 EMP CHN [62] # 7 EMP COL [61] # 8 EMP CRI [62] # 9 EMP DEW [61] # 10 EMP DNK [64] # # ℹ 75 more rows ``` This object is a data.frame giving the unique groups and in the third (last) column vectors containing the indices of the rows belonging to that group. A command like `summarize` uses this information to split the data.frame into groups which are then passed sequentially to the function used and later recombined. These steps are also done in C++ which makes *dplyr* quite efficient. Now *collapse* is based around one-pass grouped computations at the C++ level using its own grouped statistical functions. In other words the data is not split and recombined at all but the entire computation is performed in a single C++ loop running through that data and completing the computations for each group simultaneously. This modus operandi is also evident in *collapse* grouping objects. The method `GRP.grouped_df` takes a *dplyr* grouping object from a grouped tibble and efficiently converts it to a *collapse* grouping object: ```r GGDC10S %>% group_by(Variable, Country) %>% GRP %>% str # Class 'GRP' hidden list of 9 # $ N.groups : int 85 # $ group.id : int [1:5027] 46 46 46 46 46 46 46 46 46 46 ... # $ group.sizes : int [1:85] 62 61 62 52 63 62 61 62 61 64 ... # $ groups :List of 2 # ..$ Variable: chr [1:85] "EMP" "EMP" "EMP" "EMP" ... # .. ..- attr(*, "label")= chr "Variable" # .. ..- attr(*, "format.stata")= chr "%9s" # ..$ Country : chr [1:85] "ARG" "BOL" "BRA" "BWA" ... # .. ..- attr(*, "label")= chr "Country" # .. ..- attr(*, "format.stata")= chr "%9s" # $ group.vars : chr [1:2] "Variable" "Country" # $ ordered : Named logi [1:2] TRUE FALSE # ..- attr(*, "names")= chr [1:2] "ordered" "sorted" # $ order : NULL # $ group.starts: NULL # $ call : language GRP.grouped_df(X = .) ``` This object is a list where the first three elements give the number of groups, the group-id to which each row belongs and a vector of group-sizes. A function like `fsum` uses this information to (for each column) create a result vector of size 'N.groups' and the run through the column using the 'group.id' vector to add the i'th data point to the 'group.id[i]'th element of the result vector. When the loop is finished, the grouped computation is also finished. It is obvious that *collapse* is faster than *dplyr* since it's method of computing involves less steps, and it does not need to call statistical functions multiple times. See the benchmark section. *** ### 1.2 More Speed using *collapse* Verbs *collapse* fast functions do not develop their maximal performance on a grouped tibble created with `group_by` because of the additional conversion cost of the grouping object incurred by `GRP.grouped_df`. This cost is already minimized through the use of C++, but we can do even better replacing `group_by` with `collapse::fgroup_by`. `fgroup_by` works like `group_by` but does the grouping with `collapse::GRP` (up to 10x faster than `group_by`) and simply attaches a *collapse* grouping object to the grouped_df. Thus the speed gain is 2-fold: Faster grouping and no conversion cost when calling *collapse* functions. Another improvement comes from replacing the *dplyr* verb `select` with `collapse::fselect`, and, for selection using column names, indices or functions use `collapse::get_vars` instead of `select_at` or `select_if`. Next to `get_vars`, *collapse* also introduces the predicates `num_vars`, `cat_vars`, `char_vars`, `fact_vars`, `logi_vars` and `date_vars` to efficiently select columns by type. ```r GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fmedian # # A tibble: 85 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1325. 47.4 1988. 1.05e2 7.82e2 1.85e3 5.80e2 464. 1739. 866. 9.74e3 # 2 EMP BOL 943. 53.5 167. 4.46e0 6.60e1 1.32e2 9.70e1 15.3 NA 384. 1.84e3 # 3 EMP BRA 17481. 225. 7208. 3.76e2 4.05e3 6.45e3 1.58e3 4355. 4450. 4479. 5.19e4 # 4 EMP BWA 175. 12.2 13.1 3.71e0 1.90e1 2.11e1 6.75e0 10.4 53.8 31.2 3.61e2 # 5 EMP CHL 690. 93.9 607. 2.58e1 2.30e2 4.84e2 2.05e2 106. NA 900. 3.31e3 # 6 EMP CHN 293915 8150. 61761. 1.14e3 1.06e4 1.70e4 9.56e3 4328. 19468. 9954. 4.45e5 # 7 EMP COL 3006. 84.0 1033. 3.71e1 4.19e2 1.55e3 3.91e2 655. NA 1430. 8.63e3 # 8 EMP CRI 216. 1.49 114. 7.92e0 5.50e1 8.98e1 2.55e1 19.6 122. 60.6 7.19e2 # 9 EMP DEW 2178 320. 8459. 2.47e2 2.10e3 4.45e3 1.53e3 1656 3700 900 2.65e4 # 10 EMP DNK 187. 3.75 508. 1.36e1 1.65e2 4.61e2 1.61e2 169. 642. 104. 2.42e3 # # ℹ 75 more rows microbenchmark(collapse = GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fmedian, hybrid = GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmedian, dplyr = GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% summarise_all(median, na.rm = TRUE)) # Unit: microseconds # expr min lq mean median uq max neval # collapse 236.406 263.6095 303.309 295.9175 337.061 419.635 100 # hybrid 2699.317 2894.9690 3573.611 2998.3505 3119.772 56249.212 100 # dplyr 15923.908 16297.8280 18810.943 16742.5140 18578.105 71125.939 100 ``` Benchmarks on the different components of this code and with larger data are provided under 'Benchmarks'. Note that a grouped tibble created with `fgroup_by` can no longer be used for grouped computations with *dplyr* verbs like `mutate` or `summarize`. `fgroup_by` first assigns the class *GDP_df* which is for printing grouping information and subsetting, then the object classes (*tbl_df*, *data.table* or whatever else), followed by classes *grouped_df* and *data.frame*, and adds the grouping object in a 'groups' attribute. Since *tbl_df* is assigned before *grouped_df*, the object is treated by the *dplyr* ecosystem like a normal tibble. ```r class(group_by(GGDC10S, Variable, Country)) # [1] "grouped_df" "tbl_df" "tbl" "data.frame" class(fgroup_by(GGDC10S, Variable, Country)) # [1] "GRP_df" "tbl_df" "tbl" "grouped_df" "data.frame" ``` The function `fungroup` removes classes 'GDP_df' and 'grouped_df' and the 'groups' attribute (and can thus also be used for grouped tibbles created with `dplyr::group_by`). Note that any kind of data frame based class can be grouped with `fgroup_by`, and still retain full responsiveness to all methods defined for that class. Functions performing aggregation on the grouped data frame remove the grouping object and classes afterwards, yielding an object with the same class and attributes as the input. The print method shown below reports the grouping variables, and then in square brackets the information `[number of groups | average group size (standard-deviation of group sizes)]`: ```r fgroup_by(GGDC10S, Variable, Country) # # A tibble: 5,027 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA SSA Sub-s… VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-s… VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA SSA Sub-s… VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA SSA Sub-s… VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA SSA Sub-s… VA 1964 16.3 3.49 0.737 0.104 0.660 6.24 1.66 1.12 4.82 # 6 BWA SSA Sub-s… VA 1965 15.7 2.50 1.02 0.135 1.35 7.06 1.94 1.25 5.70 # 7 BWA SSA Sub-s… VA 1966 17.7 1.97 0.804 0.203 1.35 8.27 2.15 1.36 6.37 # 8 BWA SSA Sub-s… VA 1967 19.1 2.30 0.938 0.203 0.897 4.31 1.72 1.54 7.04 # 9 BWA SSA Sub-s… VA 1968 21.1 1.84 0.750 0.203 1.22 5.17 2.44 1.03 5.03 # 10 BWA SSA Sub-s… VA 1969 21.9 5.24 2.14 0.578 3.47 5.75 2.72 1.23 5.59 # # ℹ 5,017 more rows # # ℹ 2 more variables: OTH , SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Note further that `fselect` and `get_vars` are not full drop-in replacements for `select` because they do not have a grouped_df method: ```r GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% tail(3) # # A tibble: 3 × 13 # # Groups: Variable, Country [1] # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP EGY 5206. 29.0 2436. 307. 2733. 2977. 1992. 801. 5539. NA 22020. # 2 EMP EGY 5186. 27.6 2374. 318. 2795. 3020. 2048. 815. 5636. NA 22219. # 3 EMP EGY 5161. 24.8 2348. 325. 2931. 3110. 2065. 832. 5736. NA 22533. GGDC10S %>% group_by(Variable, Country) %>% get_vars(6:16) %>% tail(3) # # A tibble: 3 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 5206. 29.0 2436. 307. 2733. 2977. 1992. 801. 5539. NA 22020. # 2 5186. 27.6 2374. 318. 2795. 3020. 2048. 815. 5636. NA 22219. # 3 5161. 24.8 2348. 325. 2931. 3110. 2065. 832. 5736. NA 22533. ``` Since by default `keep.group_vars = TRUE` in the *Fast Statistical Functions*, the end result is nevertheless the same: ```r GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmean %>% tail(3) # # A tibble: 3 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 VA VEN 6860. 35478. 1.96e4 1.06e3 1.17e4 1.93e4 8.03e3 5.60e3 NA 19986. 1.28e5 # 2 VA ZAF 16419. 42928. 8.76e4 1.38e4 1.64e4 6.83e4 4.53e4 6.64e4 7.58e4 30167. 4.63e5 # 3 VA ZMB 1268849. 1006099. 9.00e5 2.19e5 8.66e5 2.10e6 7.05e5 9.10e5 1.10e6 81871. 9.16e6 GGDC10S %>% group_by(Variable, Country) %>% get_vars(6:16) %>% fmean %>% tail(3) # # A tibble: 3 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 VA VEN 6860. 35478. 1.96e4 1.06e3 1.17e4 1.93e4 8.03e3 5.60e3 NA 19986. 1.28e5 # 2 VA ZAF 16419. 42928. 8.76e4 1.38e4 1.64e4 6.83e4 4.53e4 6.64e4 7.58e4 30167. 4.63e5 # 3 VA ZMB 1268849. 1006099. 9.00e5 2.19e5 8.66e5 2.10e6 7.05e5 9.10e5 1.10e6 81871. 9.16e6 ``` Another useful verb introduced by *collapse* is `fgroup_vars`, which can be used to efficiently obtain the grouping columns or grouping variables from a grouped tibble: ```r # fgroup_by fully supports grouped tibbles created with group_by or fgroup_by: GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars %>% head(3) # # A tibble: 3 × 2 # Variable Country # # 1 VA BWA # 2 VA BWA # 3 VA BWA GGDC10S %>% fgroup_by(Variable, Country) %>% fgroup_vars %>% head(3) # # A tibble: 3 × 2 # Variable Country # # 1 VA BWA # 2 VA BWA # 3 VA BWA # The other possibilities: GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("unique") %>% head(3) # # A tibble: 3 × 2 # Variable Country # # 1 EMP ARG # 2 EMP BOL # 3 EMP BRA GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("names") # [1] "Variable" "Country" GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("indices") # [1] 4 1 GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("named_indices") # Variable Country # 4 1 GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("logical") # [1] TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("named_logical") # Country Regioncode Region Variable Year AGR MIN MAN PU # TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE # CON WRT TRA FIRE GOV OTH SUM # FALSE FALSE FALSE FALSE FALSE FALSE FALSE ``` Another *collapse* verb to mention here is `fsubset`, a faster alternative to `dplyr::filter` which also provides an option to flexibly subset columns after the select argument: ```r # Two equivalent calls, the first is substantially faster GGDC10S %>% fsubset(Variable == "VA" & Year > 1990, Country, Year, AGR:GOV) %>% head(3) # # A tibble: 3 × 11 # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA 1991 303. 2647. 473. 161. 580. 807. 233. 433. 1073. # 2 BWA 1992 333. 2691. 537. 178. 679. 725. 285. 517. 1234. # 3 BWA 1993 405. 2625. 567. 219. 634. 772. 350. 673. 1487. GGDC10S %>% filter(Variable == "VA" & Year > 1990) %>% select(Country, Year, AGR:GOV) %>% head(3) # # A tibble: 3 × 11 # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA 1991 303. 2647. 473. 161. 580. 807. 233. 433. 1073. # 2 BWA 1992 333. 2691. 537. 178. 679. 725. 285. 517. 1234. # 3 BWA 1993 405. 2625. 567. 219. 634. 772. 350. 673. 1487. ``` *collapse* also offers `roworder`, `frename`, `colorder` and `ftransform`/`TRA` as fast replacements for `dplyr::arrange`, `dplyr::rename`, `dplyr::relocate` and `dplyr::mutate`. ### 1.3 Multi-Function Aggregations One can also aggregate with multiple functions at the same time. For such operations it is often necessary to use curly braces `{` to prevent first argument injection so that `%>% cbind(FUN1(.), FUN2(.))` does not evaluate as `%>% cbind(., FUN1(.), FUN2(.))`: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% { cbind(fmedian(.), add_stub(fmean(., keep.group_vars = FALSE), "mean_")) } %>% head(3) # Variable Country AGR MIN MAN PU CON WRT TRA # 1 EMP ARG 1324.5255 47.35255 1987.5912 104.738825 782.40283 1854.612 579.93982 # 2 EMP BOL 943.1612 53.53538 167.1502 4.457895 65.97904 132.225 96.96828 # 3 EMP BRA 17480.9810 225.43693 7207.7915 375.851832 4054.66103 6454.523 1580.81120 # FIRE GOV OTH SUM mean_AGR mean_MIN mean_MAN mean_PU mean_CON # 1 464.39920 1738.836 866.1119 9743.223 1419.8013 52.08903 1931.7602 101.720936 742.4044 # 2 15.34259 NA 384.0678 1842.055 964.2103 56.03295 235.0332 5.346433 122.7827 # 3 4354.86210 4449.942 4478.6927 51881.110 17191.3529 206.02389 6991.3710 364.573404 3524.7384 # mean_WRT mean_TRA mean_FIRE mean_GOV mean_OTH mean_SUM # 1 1982.1775 648.5119 627.79291 2043.471 992.4475 10542.177 # 2 281.5164 115.4728 44.56442 NA 395.5650 2220.524 # 3 8509.4612 2054.3731 4413.54448 5307.280 5710.2665 54272.985 ``` The function `add_stub` used above is a *collapse* function adding a prefix (default) or suffix to variables names. The *collapse* predicate `add_vars` provides a more efficient alternative to `cbind.data.frame`. The idea here is 'adding' variables to the data.frame in the first argument i.e. the attributes of the first argument are preserved, so the expression below still gives a tibble instead of a data.frame: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% { add_vars(get_vars(., "Reg", regex = TRUE) %>% ffirst, # Regular expression matching column names num_vars(.) %>% fmean(keep.group_vars = FALSE) %>% add_stub("mean_"), # num_vars selects all numeric variables fselect(., PU:TRA) %>% fmedian(keep.group_vars = FALSE) %>% add_stub("median_"), fselect(., PU:CON) %>% fmin(keep.group_vars = FALSE) %>% add_stub("min_")) } %>% head(3) # # A tibble: 3 × 22 # Variable Country Regioncode Region mean_Year mean_AGR mean_MIN mean_MAN mean_PU mean_CON mean_WRT # # 1 EMP ARG LAM Latin … 1980. 1420. 52.1 1932. 102. 742. 1982. # 2 EMP BOL LAM Latin … 1980 964. 56.0 235. 5.35 123. 282. # 3 EMP BRA LAM Latin … 1980. 17191. 206. 6991. 365. 3525. 8509. # # ℹ 11 more variables: mean_TRA , mean_FIRE , mean_GOV , mean_OTH , # # mean_SUM , median_PU , median_CON , median_WRT , median_TRA , # # min_PU , min_CON ``` Another nice feature of `add_vars` is that it can also very efficiently reorder columns i.e. bind columns in a different order than they are passed. This can be done by simply specifying the positions the added columns should have in the final data frame, and then `add_vars` shifts the first argument columns to the right to fill in the gaps. ```r GGDC10S %>% fsubset(Variable == "VA", Country, AGR, SUM) %>% fgroup_by(Country) %>% { add_vars(fgroup_vars(.,"unique"), fmean(., keep.group_vars = FALSE) %>% add_stub("mean_"), fsd(., keep.group_vars = FALSE) %>% add_stub("sd_"), pos = c(2,4,3,5)) } %>% head(3) # # A tibble: 3 × 5 # Country mean_AGR sd_AGR mean_SUM sd_SUM # # 1 ARG 14951. 33061. 152534. 301316. # 2 BOL 3300. 4456. 22619. 33173. # 3 BRA 76870. 59442. 1200563. 976963. ``` A much more compact solution to multi-function and multi-type aggregation is offered by the function *collapg*: ```r # This aggregates numeric colums using the mean (fmean) and categorical columns with the mode (fmode) GGDC10S %>% fgroup_by(Variable, Country) %>% collapg %>% head(3) # # A tibble: 3 × 16 # Variable Country Regioncode Region Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 EMP ARG LAM Latin … 1980. 1420. 52.1 1932. 102. 742. 1982. 649. 628. 2043. # 2 EMP BOL LAM Latin … 1980 964. 56.0 235. 5.35 123. 282. 115. 44.6 NA # 3 EMP BRA LAM Latin … 1980. 17191. 206. 6991. 365. 3525. 8509. 2054. 4414. 5307. # # ℹ 2 more variables: OTH , SUM ``` By default it aggregates numeric columns using the `fmean` and categorical columns using `fmode`, and preserves the order of all columns. Changing these defaults is very easy: ```r # This aggregates numeric colums using the median and categorical columns using the first value GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(fmedian, flast) %>% head(3) # # A tibble: 3 × 16 # Variable Country Regioncode Region Year AGR MIN MAN PU CON WRT TRA FIRE # # 1 EMP ARG LAM Latin Amer… 1980. 1325. 47.4 1988. 105. 782. 1855. 580. 464. # 2 EMP BOL LAM Latin Amer… 1980 943. 53.5 167. 4.46 66.0 132. 97.0 15.3 # 3 EMP BRA LAM Latin Amer… 1980. 17481. 225. 7208. 376. 4055. 6455. 1581. 4355. # # ℹ 3 more variables: GOV , OTH , SUM ``` One can apply multiple functions to both numeric and/or categorical data: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(list(fmean, fmedian), list(first, fmode, flast)) %>% head(3) # # A tibble: 3 × 32 # Variable Country first.Regioncode fmode.Regioncode flast.Regioncode first.Region fmode.Region # # 1 EMP ARG LAM LAM LAM Latin America Latin America # 2 EMP BOL LAM LAM LAM Latin America Latin America # 3 EMP BRA LAM LAM LAM Latin America Latin America # # ℹ 25 more variables: flast.Region , fmean.Year , fmedian.Year , fmean.AGR , # # fmedian.AGR , fmean.MIN , fmedian.MIN , fmean.MAN , fmedian.MAN , # # fmean.PU , fmedian.PU , fmean.CON , fmedian.CON , fmean.WRT , # # fmedian.WRT , fmean.TRA , fmedian.TRA , fmean.FIRE , fmedian.FIRE , # # fmean.GOV , fmedian.GOV , fmean.OTH , fmedian.OTH , fmean.SUM , # # fmedian.SUM ``` Applying multiple functions to only numeric (or only categorical) data allows return in a long format: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(list(fmean, fmedian), cols = is.numeric, return = "long") %>% head(3) # # A tibble: 3 × 15 # Function Variable Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 fmean EMP ARG 1980. 1420. 52.1 1932. 102. 742. 1982. 649. 628. 2043. 992. # 2 fmean EMP BOL 1980 964. 56.0 235. 5.35 123. 282. 115. 44.6 NA 396. # 3 fmean EMP BRA 1980. 17191. 206. 6991. 365. 3525. 8509. 2054. 4414. 5307. 5710. # # ℹ 1 more variable: SUM ``` Finally, `collapg` also makes it very easy to apply aggregator functions to certain columns only: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(custom = list(fmean = 6:8, fmedian = 10:12)) %>% head(3) # # A tibble: 3 × 8 # Variable Country AGR MIN MAN CON WRT TRA # # 1 EMP ARG 1420. 52.1 1932. 782. 1855. 580. # 2 EMP BOL 964. 56.0 235. 66.0 132. 97.0 # 3 EMP BRA 17191. 206. 6991. 4055. 6455. 1581. ``` To understand more about `collapg`, look it up in the documentation (`?collapg`). ### 1.4 Weighted Aggregations Weighted aggregations are possible with the functions `fsum, fprod, fmean, fmedian, fnth, fmode, fvar` and `fsd`. The implementation is such that by default (option `keep.w = TRUE`) these functions also aggregate the weights, so that further weighted computations can be performed on the aggregated data. `fprod` saves the product of the weights, whereas the other functions save the sum of the weights in a column next to the grouping variables. If `na.rm = TRUE` (the default), rows with missing weights are omitted from the computation. ```r # This computes a frequency-weighted grouped standard-deviation, taking the total EMP / VA as weight GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>% fsd(SUM) %>% head(3) # # A tibble: 3 × 13 # Variable Country sum.SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 EMP ARG 653615. 225. 22.2 176. 20.5 285. 856. 195. 493. 1123. 506. # 2 EMP BOL 135452. 99.7 17.1 168. 4.87 123. 324. 98.1 69.8 NA 258. # 3 EMP BRA 3364925. 1587. 73.8 2952. 93.8 1861. 6285. 1306. 3003. 3621. 4257. # This computes a weighted grouped mode, taking the total EMP / VA as weight GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>% fmode(SUM) %>% head(3) # # A tibble: 3 × 13 # Variable Country sum.SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 EMP ARG 653615. 1162. 127. 2164. 152. 1415. 3768. 1060. 1748. 4336. 1999. # 2 EMP BOL 135452. 819. 37.6 604. 10.8 433. 893. 333. 321. NA 1057. # 3 EMP BRA 3364925. 16451. 313. 11841. 388. 8154. 21860. 5169. 12011. 12149. 14235. ``` The weighted variance / standard deviation is currently only implemented with frequency weights. Weighted aggregations may also be performed with `collapg`. By default `fsum` is used to compute a sum of the weights, but it is also possible here to aggregate the weights with other functions: ```r # This aggregates numeric colums using the weighted mean (the default) and categorical columns using the weighted mode (the default). # Weights (column SUM) are aggregated using both the sum and the maximum. GGDC10S %>% group_by(Variable, Country) %>% collapg(w = SUM, wFUN = list(fsum, fmax)) %>% head(3) # # A tibble: 3 × 17 # Variable Country fsum.SUM fmax.SUM Regioncode Region Year AGR MIN MAN PU CON WRT # # 1 EMP ARG 653615. 17929. LAM Latin … 1985. 1361. 56.5 1935. 105. 811. 2217. # 2 EMP BOL 135452. 4508. LAM Latin … 1987. 977. 57.9 296. 7.07 167. 400. # 3 EMP BRA 3364925. 102572. LAM Latin … 1989. 17746. 238. 8466. 389. 4436. 11376. # # ℹ 4 more variables: TRA , FIRE , GOV , OTH ``` ## 2. Fast Transformations *collapse* also provides some fast transformations that significantly extend the scope and speed of manipulations that can be performed with `dplyr::mutate`. ### 2.1 Fast Transform and Compute Variables The function `ftransform` can be used to manipulate columns in the same ways as `mutate`: ```r GGDC10S %>% fsubset(Variable == "VA", Country, Year, AGR, SUM) %>% ftransform(AGR_perc = AGR / SUM * 100, # Computing % of VA in Agriculture AGR_mean = fmean(AGR), # Average Agricultural VA AGR = NULL, SUM = NULL) %>% # Deleting columns AGR and SUM head # # A tibble: 6 × 4 # Country Year AGR_perc AGR_mean # # 1 BWA 1960 NA 5137561. # 2 BWA 1961 NA 5137561. # 3 BWA 1962 NA 5137561. # 4 BWA 1963 NA 5137561. # 5 BWA 1964 43.5 5137561. # 6 BWA 1965 40.0 5137561. ``` The modification brought by `ftransformv` enables transformations of groups of columns like `dplyr::mutate_at` and `dplyr::mutate_if`: ```r # This replaces variables mpg, carb and wt by their log (.c turns expressions into character vectors) mtcars %>% ftransformv(.c(mpg, carb, wt), log) %>% head # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 3.044522 6 160 110 3.90 0.9631743 16.46 0 1 4 1.3862944 # Mazda RX4 Wag 3.044522 6 160 110 3.90 1.0560527 17.02 0 1 4 1.3862944 # Datsun 710 3.126761 4 108 93 3.85 0.8415672 18.61 1 1 4 0.0000000 # Hornet 4 Drive 3.063391 6 258 110 3.08 1.1678274 19.44 1 0 3 0.0000000 # Hornet Sportabout 2.928524 8 360 175 3.15 1.2354715 17.02 0 0 3 0.6931472 # Valiant 2.895912 6 225 105 2.76 1.2412686 20.22 1 0 3 0.0000000 # Logging numeric variables iris %>% ftransformv(is.numeric, log) %>% head # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 1.629241 1.252763 0.3364722 -1.6094379 setosa # 2 1.589235 1.098612 0.3364722 -1.6094379 setosa # 3 1.547563 1.163151 0.2623643 -1.6094379 setosa # 4 1.526056 1.131402 0.4054651 -1.6094379 setosa # 5 1.609438 1.280934 0.3364722 -1.6094379 setosa # 6 1.686399 1.360977 0.5306283 -0.9162907 setosa ``` Instead of `column = value` type arguments, it is also possible to pass a single list of transformed variables to `ftransform`, which will be regarded in the same way as an evaluated list of `column = value` arguments. It can be used for more complex transformations: ```r # Logging values and replacing generated Inf values mtcars %>% ftransform(fselect(., mpg, cyl, vs:gear) %>% lapply(log) %>% replace_Inf) %>% head # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 3.044522 1.791759 160 110 3.90 2.620 16.46 NA 0 1.386294 4 # Mazda RX4 Wag 3.044522 1.791759 160 110 3.90 2.875 17.02 NA 0 1.386294 4 # Datsun 710 3.126761 1.386294 108 93 3.85 2.320 18.61 0 0 1.386294 1 # Hornet 4 Drive 3.063391 1.791759 258 110 3.08 3.215 19.44 0 NA 1.098612 1 # Hornet Sportabout 2.928524 2.079442 360 175 3.15 3.440 17.02 NA NA 1.098612 2 # Valiant 2.895912 1.791759 225 105 2.76 3.460 20.22 0 NA 1.098612 1 ``` If only the computed columns need to be returned, `fcompute` provides an efficient alternative: ```r GGDC10S %>% fsubset(Variable == "VA", Country, Year, AGR, SUM) %>% fcompute(AGR_perc = AGR / SUM * 100, AGR_mean = fmean(AGR)) %>% head # # A tibble: 6 × 2 # AGR_perc AGR_mean # # 1 NA 5137561. # 2 NA 5137561. # 3 NA 5137561. # 4 NA 5137561. # 5 43.5 5137561. # 6 40.0 5137561. ``` `ftransform` and `fcompute` are an order of magnitude faster than `mutate`, but they do not support grouped computations using arbitrary functions. We will see that this is hardly a limitation as *collapse* provides very efficient and elegant alternative programming mechanisms... ### 2.2 Replacing and Sweeping out Statistics All statistical (scalar-valued) functions in the collapse package (`fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct`) have a `TRA` argument which can be used to efficiently transform data by either (column-wise) replacing data values with computed statistics or sweeping the statistics out of the data. Operations can be specified using either an integer or quoted operator / string. The 10 operations supported by `TRA` are: * 1 - "replace_fill" : replace and overwrite missing values (same as `mutate`) * 2 - "replace" : replace but preserve missing values * 3 - "-" : subtract (center) * 4 - "-+" : subtract group-statistics but add average of group statistics * 5 - "/" : divide (scale) * 6 - "%" : compute percentages (divide and multiply by 100) * 7 - "+" : add * 8 - "*" : multiply * 9 - "%%" : modulus * 10 - "-%%" : subtract modulus Simple transformations are again straightforward to specify: ```r # This subtracts the median value from all data points i.e. centers on the median GGDC10S %>% num_vars %>% fmedian(TRA = "-") %>% head # # A tibble: 6 × 12 # Year AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 -22 NA NA NA NA NA NA NA NA NA NA NA # 2 -21 NA NA NA NA NA NA NA NA NA NA NA # 3 -20 NA NA NA NA NA NA NA NA NA NA NA # 4 -19 NA NA NA NA NA NA NA NA NA NA NA # 5 -18 -4378. -170. -3717. -168. -1473. -3767. -1173. -959. -3924. -1431. -23149. # 6 -17 -4379. -171. -3717. -168. -1472. -3767. -1173. -959. -3923. -1430. -23147. # This replaces all data points with the mode GGDC10S %>% char_vars %>% fmode(TRA = "replace") %>% head # # A tibble: 6 × 4 # Country Regioncode Region Variable # # 1 USA ASI Asia EMP # 2 USA ASI Asia EMP # 3 USA ASI Asia EMP # 4 USA ASI Asia EMP # 5 USA ASI Asia EMP # 6 USA ASI Asia EMP ``` Similarly for grouped transformations: ```r # Replacing data with the 2nd quartile (25%) GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fnth(0.25, TRA = "replace_fill") %>% head(3) # # A tibble: 3 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 VA BWA 63.5 33.1 27.3 7.36 26.8 31.1 13.2 12.0 33.6 11.5 262. # 2 VA BWA 63.5 33.1 27.3 7.36 26.8 31.1 13.2 12.0 33.6 11.5 262. # 3 VA BWA 63.5 33.1 27.3 7.36 26.8 31.1 13.2 12.0 33.6 11.5 262. # Scaling sectoral data by Variable and Country GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fsd(TRA = "/") %>% head # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 VA BWA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA # 5 VA BWA 0.0270 0.000556 0.000523 3.88e-4 5.11e-4 0.00194 0.00154 5.23e-4 0.00134 # 6 VA BWA 0.0260 0.000397 0.000723 5.03e-4 1.04e-3 0.00220 0.00180 5.83e-4 0.00158 # # ℹ 2 more variables: OTH , SUM ``` The benchmarks below will demonstrate that these internal sweeping and replacement operations fully performed in C++ compute significantly faster than using `dplyr::mutate`, especially as the number of groups grows large. The S3 generic nature of the *Fast Statistical Functions* further allows us to perform grouped mutations on the fly (together with `ftransform` or `fcompute`), without the need of first creating a grouped tibble: ```r # AGR_gmed = TRUE if AGR is greater than it's median value, grouped by Variable and Country # Note: This calls fmedian.default settransform(GGDC10S, AGR_gmed = AGR > fmedian(AGR, list(Variable, Country), TRA = "replace")) tail(GGDC10S, 3) # # A tibble: 3 × 17 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 EGY MENA Middle Ea… EMP 2010 5206. 29.0 2436. 307. 2733. 2977. 1992. 801. 5539. # 2 EGY MENA Middle Ea… EMP 2011 5186. 27.6 2374. 318. 2795. 3020. 2048. 815. 5636. # 3 EGY MENA Middle Ea… EMP 2012 5161. 24.8 2348. 325. 2931. 3110. 2065. 832. 5736. # # ℹ 3 more variables: OTH , SUM , AGR_gmed # Dividing (scaling) the sectoral data (columns 6 through 16) by their grouped standard deviation settransformv(GGDC10S, 6:16, fsd, list(Variable, Country), TRA = "/", apply = FALSE) tail(GGDC10S, 3) # # A tibble: 3 × 17 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 EGY MENA Middle Ea… EMP 2010 8.41 2.28 4.32 3.56 3.62 3.75 3.75 3.14 3.80 # 2 EGY MENA Middle Ea… EMP 2011 8.38 2.17 4.21 3.68 3.70 3.81 3.86 3.19 3.86 # 3 EGY MENA Middle Ea… EMP 2012 8.34 1.95 4.17 3.76 3.88 3.92 3.89 3.26 3.93 # # ℹ 3 more variables: OTH , SUM , AGR_gmed rm(GGDC10S) ``` Weights are easily added to any grouped transformation: ```r # This subtracts weighted group means from the data, using SUM column as weights.. GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fmean(SUM, "-") %>% head # # A tibble: 6 × 13 # Variable Country SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 5 VA BWA 37.5 -1301. -13317. -2965. -529. -2746. -6540. -2157. -4431. -7551. -2613. # 6 VA BWA 39.3 -1302. -13318. -2964. -529. -2745. -6540. -2156. -4431. -7550. -2613. ``` Sequential operations are also easily performed: ```r # This scales and then subtracts the median GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fsd(TRA = "/") %>% fmedian(TRA = "-") # # A tibble: 5,027 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # * # 1 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 5 VA BWA -0.182 -0.235 -0.183 -0.245 -0.118 -0.0820 -0.0724 -0.0661 -0.108 -0.0848 -0.146 # 6 VA BWA -0.183 -0.235 -0.183 -0.245 -0.117 -0.0817 -0.0722 -0.0660 -0.108 -0.0846 -0.146 # 7 VA BWA -0.180 -0.235 -0.183 -0.245 -0.117 -0.0813 -0.0720 -0.0659 -0.107 -0.0843 -0.145 # 8 VA BWA -0.177 -0.235 -0.183 -0.245 -0.117 -0.0826 -0.0724 -0.0659 -0.107 -0.0841 -0.146 # 9 VA BWA -0.174 -0.235 -0.183 -0.245 -0.117 -0.0823 -0.0717 -0.0661 -0.108 -0.0848 -0.146 # 10 VA BWA -0.173 -0.234 -0.182 -0.243 -0.115 -0.0821 -0.0715 -0.0660 -0.108 -0.0846 -0.145 # # ℹ 5,017 more rows # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Of course it is also possible to combine multiple functions as in the aggregation section, or to add variables to existing data: ```r # This adds a groupwise observation count next to each column add_vars(GGDC10S, seq(7,27,2)) <- GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>% fnobs("replace_fill") %>% add_stub("N_") head(GGDC10S) # # A tibble: 6 × 27 # Country Regioncode Region Variable Year AGR N_AGR MIN N_MIN MAN N_MAN PU N_PU CON # # 1 BWA SSA Sub-sa… VA 1960 NA 47 NA 47 NA 47 NA 47 NA # 2 BWA SSA Sub-sa… VA 1961 NA 47 NA 47 NA 47 NA 47 NA # 3 BWA SSA Sub-sa… VA 1962 NA 47 NA 47 NA 47 NA 47 NA # 4 BWA SSA Sub-sa… VA 1963 NA 47 NA 47 NA 47 NA 47 NA # 5 BWA SSA Sub-sa… VA 1964 16.3 47 3.49 47 0.737 47 0.104 47 0.660 # 6 BWA SSA Sub-sa… VA 1965 15.7 47 2.50 47 1.02 47 0.135 47 1.35 # # ℹ 13 more variables: N_CON , WRT , N_WRT , TRA , N_TRA , FIRE , # # N_FIRE , GOV , N_GOV , OTH , N_OTH , SUM , N_SUM rm(GGDC10S) ``` There are lots of other examples one could construct using the 10 operations and 14 functions listed above, the examples provided just outline the suggested programming basics. Performance considerations make it very much worthwhile to spend some time and think how complex operations can be implemented in this programming framework, before defining some function in R and applying it to data using `dplyr::mutate`. ### 2.3 More Control using the `TRA` Function Towards this end, calling `TRA()` directly also facilitates more complex and customized operations. Behind the scenes of the `TRA = ...` argument, the *Fast Statistical Functions* first compute the grouped statistics on all columns of the data, and these statistics are then directly fed into a C++ function that uses them to replace or sweep them out of data points in one of the 10 ways described above. This function can also be called directly by the name of `TRA`. Fundamentally, `TRA` is a generalization of `base::sweep` for column-wise grouped operations^[Row-wise operations are not supported by TRA.]. Direct calls to `TRA` enable more control over inputs and outputs. The two operations below are equivalent, although the first is slightly more efficient as it only requires one method dispatch and one check of the inputs: ```r # This divides by the product GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fprod(TRA = "/") %>% head # # A tibble: 6 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA NA NA # 5 1.29e-105 2.81e-127 1.40e-101 4.44e-74 4.19e-102 3.97e-113 6.91e-92 1.01e-97 2.51e-117 # 6 1.24e-105 2.00e-127 1.94e-101 5.75e-74 8.55e-102 4.49e-113 8.08e-92 1.13e-97 2.96e-117 # # ℹ 2 more variables: OTH , SUM # Same thing GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% TRA(fprod(., keep.group_vars = FALSE), "/") %>% head # [same as TRA(.,fprod(., keep.group_vars = FALSE),"/")] # # A tibble: 6 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA NA NA # 5 1.29e-105 2.81e-127 1.40e-101 4.44e-74 4.19e-102 3.97e-113 6.91e-92 1.01e-97 2.51e-117 # 6 1.24e-105 2.00e-127 1.94e-101 5.75e-74 8.55e-102 4.49e-113 8.08e-92 1.13e-97 2.96e-117 # # ℹ 2 more variables: OTH , SUM ``` `TRA.grouped_df` was designed such that it matches the columns of the statistics (aggregated columns) to those of the original data, and only transforms matching columns while returning the whole data frame. Thus it is easily possible to only apply a transformation to the first two sectors: ```r # This only demeans Agriculture (AGR) and Mining (MIN) GGDC10S %>% fgroup_by(Variable, Country) %>% TRA(fselect(., AGR, MIN) %>% fmean(keep.group_vars = FALSE), "-") %>% head # # A tibble: 6 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA SSA Sub-s… VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-s… VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA SSA Sub-s… VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA SSA Sub-s… VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA SSA Sub-s… VA 1964 -446. -4505. 0.737 0.104 0.660 6.24 1.66 1.12 4.82 # 6 BWA SSA Sub-s… VA 1965 -446. -4506. 1.02 0.135 1.35 7.06 1.94 1.25 5.70 # # ℹ 2 more variables: OTH , SUM ``` Since `TRA` is already built into all *Fast Statistical Functions* as an argument, it is best used in computations where grouped statistics are computed using some other function. ```r # Same as above, with one line of code using fmean.data.frame and ftransform... GGDC10S %>% ftransform(fmean(list(AGR = AGR, MIN = MIN), list(Variable, Country), TRA = "-")) %>% head # # A tibble: 6 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA SSA Sub-s… VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-s… VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA SSA Sub-s… VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA SSA Sub-s… VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA SSA Sub-s… VA 1964 -446. -4505. 0.737 0.104 0.660 6.24 1.66 1.12 4.82 # 6 BWA SSA Sub-s… VA 1965 -446. -4506. 1.02 0.135 1.35 7.06 1.94 1.25 5.70 # # ℹ 2 more variables: OTH , SUM ``` Another potential use of `TRA` is to do computations in two- or more steps, for example if both aggregated and transformed data are needed, or if computations are more complex and involve other manipulations in-between the aggregating and sweeping part: ```r # Get grouped tibble gGGDC <- GGDC10S %>% fgroup_by(Variable, Country) # Get aggregated data gsumGGDC <- gGGDC %>% fselect(AGR:SUM) %>% fsum head(gsumGGDC) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 88028. 3230. 1.20e5 6307. 4.60e4 1.23e5 4.02e4 3.89e4 1.27e5 6.15e4 6.54e5 # 2 EMP BOL 58817. 3418. 1.43e4 326. 7.49e3 1.72e4 7.04e3 2.72e3 NA 2.41e4 1.35e5 # 3 EMP BRA 1065864. 12773. 4.33e5 22604. 2.19e5 5.28e5 1.27e5 2.74e5 3.29e5 3.54e5 3.36e6 # 4 EMP BWA 8839. 493. 8.49e2 145. 1.19e3 1.71e3 3.93e2 7.21e2 2.87e3 1.30e3 1.85e4 # 5 EMP CHL 44220. 6389. 3.94e4 1850. 1.86e4 4.38e4 1.63e4 1.72e4 NA 6.32e4 2.51e5 # 6 EMP CHN 17264654. 422972. 4.03e6 96364. 1.25e6 1.73e6 8.36e5 2.96e5 1.36e6 1.86e6 2.91e7 # Get transformed (scaled) data head(TRA(gGGDC, gsumGGDC, "/")) # # A tibble: 6 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT # # 1 BWA SSA Sub-sahar… VA 1960 NA NA NA NA NA NA # 2 BWA SSA Sub-sahar… VA 1961 NA NA NA NA NA NA # 3 BWA SSA Sub-sahar… VA 1962 NA NA NA NA NA NA # 4 BWA SSA Sub-sahar… VA 1963 NA NA NA NA NA NA # 5 BWA SSA Sub-sahar… VA 1964 7.50e-4 1.65e-5 1.66e-5 1.03e-5 1.57e-5 6.82e-5 # 6 BWA SSA Sub-sahar… VA 1965 7.24e-4 1.18e-5 2.30e-5 1.33e-5 3.20e-5 7.72e-5 # # ℹ 5 more variables: TRA , FIRE , GOV , OTH , SUM ``` As discussed, whether using the argument to fast statistical functions or `TRA` directly, these data transformations are essentially a two-step process: Statistics are first computed and then used to transform the original data. Although both steps are efficiently done in C++, it would be even more efficient to do them in a single step without materializing all the statistics before transforming the data. Such slightly more efficient functions are provided for the very commonly applied tasks of centering and averaging data by groups (widely known as 'between'-group and 'within'-group transformations), and scaling and centering data by groups (also known as 'standardizing' data). ### 2.4 Faster Centering, Averaging and Standardizing The functions `fbetween` and `fwithin` are slightly more memory efficient implementations of `fmean` invoked with different `TRA` options: ```r GGDC10S %>% # Same as ... %>% fmean(TRA = "replace") fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fbetween %>% tail(2) # # A tibble: 2 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. # 2 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. GGDC10S %>% # Same as ... %>% fmean(TRA = "replace_fill") fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fbetween(fill = TRUE) %>% tail(2) # # A tibble: 2 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. # 2 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. GGDC10S %>% # Same as ... %>% fmean(TRA = "-") fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fwithin %>% tail(2) # # A tibble: 2 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 742. -7.35 760. 187. 1798. 1713. 1249. 495. 2678. NA 9614. # 2 717. -10.1 734. 194. 1934. 1803. 1266. 512. 2778. NA 9928. ``` Apart from higher speed, `fwithin` has a `mean` argument to assign an arbitrary mean to centered data, the default being `mean = 0`. A very common choice for such an added mean is just the overall mean of the data, which can be added in by invoking `mean = "overall.mean"`: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) %>% fwithin(mean = "overall.mean") %>% tail(3) # # A tibble: 3 × 13 # Country Variable AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EGY EMP 2527458. 1867903. 5539313. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.71e6 NA 2.16e7 # 2 EGY EMP 2527439. 1867902. 5539251. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.71e6 NA 2.16e7 # 3 EGY EMP 2527413. 1867899. 5539226. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.72e6 NA 2.16e7 ``` This can also be done using weights. The code below uses the `SUM` column as weights, and then for each variable and each group subtracts out the weighted mean, and then adds the overall weighted column mean back to the centered columns. The `SUM` column is just kept as it is and added after the grouping columns. ```r GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) %>% fwithin(SUM, mean = "overall.mean") %>% tail(3) # # A tibble: 3 × 13 # Country Variable SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 EGY EMP 22020. 429066006. 3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8 NA # 2 EGY EMP 22219. 429065986. 3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8 NA # 3 EGY EMP 22533. 429065961. 3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8 NA ``` Another argument to `fwithin` is the `theta` parameter, allowing partial- or quasi-demeaning operations, e.g. `fwithin(gdata, theta = theta)` is equal to `gdata - theta * fbetween(gdata)`. This is particularly useful to prepare data for variance components (also known as 'random-effects') estimation. Apart from `fbetween` and `fwithin`, the function `fscale` exists to efficiently scale and center data, to avoid sequential calls such as `... %>% fsd(TRA = "/") %>% fmean(TRA = "-")`. ```r # This efficiently scales and centers (i.e. standardizes) the data GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) %>% fscale # # A tibble: 5,027 × 13 # Country Variable AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # * # 1 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 2 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 3 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 4 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 5 BWA VA -0.738 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.596 -0.676 # 6 BWA VA -0.739 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.596 -0.676 # 7 BWA VA -0.736 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.595 -0.676 # 8 BWA VA -0.734 -0.717 -0.668 -0.805 -0.692 -0.604 -0.589 -0.635 -0.655 -0.595 -0.676 # 9 BWA VA -0.730 -0.717 -0.668 -0.805 -0.692 -0.604 -0.588 -0.635 -0.656 -0.596 -0.676 # 10 BWA VA -0.729 -0.716 -0.667 -0.803 -0.690 -0.603 -0.588 -0.635 -0.656 -0.596 -0.675 # # ℹ 5,017 more rows # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` `fscale` also has additional `mean` and `sd` arguments allowing the user to (group-) scale data to an arbitrary mean and standard deviation. Setting `mean = FALSE` just scales the data but preserves the means, and is thus different from `fsd(..., TRA = "/")` which simply divides all values by the standard deviation: ```r # Saving grouped tibble gGGDC <- GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) # Original means head(fmean(gGGDC)) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1420. 52.1 1932. 102. 742. 1.98e3 6.49e2 628. 2043. 9.92e2 1.05e4 # 2 EMP BOL 964. 56.0 235. 5.35 123. 2.82e2 1.15e2 44.6 NA 3.96e2 2.22e3 # 3 EMP BRA 17191. 206. 6991. 365. 3525. 8.51e3 2.05e3 4414. 5307. 5.71e3 5.43e4 # 4 EMP BWA 188. 10.5 18.1 3.09 25.3 3.63e1 8.36e0 15.3 61.1 2.76e1 3.94e2 # 5 EMP CHL 702. 101. 625. 29.4 296. 6.95e2 2.58e2 272. NA 1.00e3 3.98e3 # 6 EMP CHN 287744. 7050. 67144. 1606. 20852. 2.89e4 1.39e4 4929. 22669. 3.10e4 4.86e5 # Mean Preserving Scaling head(fmean(fscale(gGGDC, mean = FALSE))) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1420. 52.1 1932. 102. 742. 1.98e3 6.49e2 628. 2043. 9.92e2 1.05e4 # 2 EMP BOL 964. 56.0 235. 5.35 123. 2.82e2 1.15e2 44.6 NA 3.96e2 2.22e3 # 3 EMP BRA 17191. 206. 6991. 365. 3525. 8.51e3 2.05e3 4414. 5307. 5.71e3 5.43e4 # 4 EMP BWA 188. 10.5 18.1 3.09 25.3 3.63e1 8.36e0 15.3 61.1 2.76e1 3.94e2 # 5 EMP CHL 702. 101. 625. 29.4 296. 6.95e2 2.58e2 272. NA 1.00e3 3.98e3 # 6 EMP CHN 287744. 7050. 67144. 1606. 20852. 2.89e4 1.39e4 4929. 22669. 3.10e4 4.86e5 head(fsd(fscale(gGGDC, mean = FALSE))) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 # 2 EMP BOL 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 NA 1.00 1.00 # 3 EMP BRA 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 # 4 EMP BWA 1.00 1.00 1.00 1 1.00 1.00 1.00 1 1.00 1.00 1.00 # 5 EMP CHL 1.00 1 1.00 1.00 1.00 1.00 1.00 1.00 NA 1.00 1.00 # 6 EMP CHN 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 ``` One can also set `mean = "overall.mean"`, which group-centers columns on the overall mean as illustrated with `fwithin`. Another interesting option is setting `sd = "within.sd"`. This group-scales data such that every group has a standard deviation equal to the within-standard deviation of the data: ```r # Just using VA data for this example gGGDC <- GGDC10S %>% fsubset(Variable == "VA", Country, AGR:SUM) %>% fgroup_by(Country) # This calculates the within- standard deviation for all columns fsd(num_vars(ungroup(fwithin(gGGDC)))) # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # 45046972 40122220 75608708 3062688 30811572 44125207 20676901 16030868 20358973 18780869 # SUM # 306429102 # This scales all groups to take on the within- standard deviation while preserving group means fsd(fscale(gGGDC, mean = FALSE, sd = "within.sd")) # # A tibble: 43 × 12 # Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 ARG 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 2 BOL 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 NA 1.88e7 3.06e8 # 3 BRA 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 4 BWA 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 5 CHL 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 NA 1.88e7 3.06e8 # 6 CHN 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 7 COL 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 NA 1.88e7 3.06e8 # 8 CRI 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 9 DEW 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 10 DNK 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # # ℹ 33 more rows ``` A grouped scaling operation with both `mean = "overall.mean"` and `sd = "within.sd"` thus efficiently achieves a harmonization of all groups in the first two moments without changing the fundamental properties (in terms of level and scale) of the data. ### 2.5 Lags / Leads, Differences and Growth Rates This section introduces 3 further powerful *collapse* functions: `flag`, `fdiff` and `fgrowth`. The first function, `flag`, efficiently computes sequences of fully identified lags and leads on time series and panel data. The following code computes 1 fully-identified panel-lag and 1 fully identified panel-lead of each variable in the data: ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% flag(-1:1, Year) # # A tibble: 5,027 × 36 # Country Variable Year F1.AGR AGR L1.AGR F1.MIN MIN L1.MIN F1.MAN MAN L1.MAN F1.PU PU # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 16.3 NA NA 3.49 NA NA 0.737 NA NA 0.104 NA # 5 BWA VA 1964 15.7 16.3 NA 2.50 3.49 NA 1.02 0.737 NA 0.135 0.104 # 6 BWA VA 1965 17.7 15.7 16.3 1.97 2.50 3.49 0.804 1.02 0.737 0.203 0.135 # 7 BWA VA 1966 19.1 17.7 15.7 2.30 1.97 2.50 0.938 0.804 1.02 0.203 0.203 # 8 BWA VA 1967 21.1 19.1 17.7 1.84 2.30 1.97 0.750 0.938 0.804 0.203 0.203 # 9 BWA VA 1968 21.9 21.1 19.1 5.24 1.84 2.30 2.14 0.750 0.938 0.578 0.203 # 10 BWA VA 1969 23.1 21.9 21.1 10.2 5.24 1.84 4.15 2.14 0.750 1.12 0.578 # # ℹ 5,017 more rows # # ℹ 22 more variables: L1.PU , F1.CON , CON , L1.CON , F1.WRT , WRT , # # L1.WRT , F1.TRA , TRA , L1.TRA , F1.FIRE , FIRE , L1.FIRE , # # F1.GOV , GOV , L1.GOV , F1.OTH , OTH , L1.OTH , F1.SUM , # # SUM , L1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` If the time-variable passed does not exactly identify the data (i.e. because of repeated values in each group), all 3 functions will issue appropriate error messages. `flag`, `fdiff` and `fgrowth` support irregular time series and unbalanced panels. It is also possible to omit the time-variable if one is certain that the data is sorted: ```r GGDC10S %>% fselect(Variable, Country,AGR:SUM) %>% fgroup_by(Variable, Country) %>% flag # # A tibble: 5,027 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # * # 1 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 5 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 6 VA BWA 16.3 3.49 0.737 0.104 0.660 6.24 1.66 1.12 4.82 2.34 37.5 # 7 VA BWA 15.7 2.50 1.02 0.135 1.35 7.06 1.94 1.25 5.70 2.68 39.3 # 8 VA BWA 17.7 1.97 0.804 0.203 1.35 8.27 2.15 1.36 6.37 2.99 43.1 # 9 VA BWA 19.1 2.30 0.938 0.203 0.897 4.31 1.72 1.54 7.04 3.31 41.4 # 10 VA BWA 21.1 1.84 0.750 0.203 1.22 5.17 2.44 1.03 5.03 2.36 41.1 # # ℹ 5,017 more rows # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` `fdiff` computes sequences of lagged-leaded and iterated differences as well as quasi-differences and log-differences on time series and panel data. The code below computes the 1 and 10 year first and second differences of each variable in the data: ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fdiff(c(1, 10), 1:2, Year) # # A tibble: 5,027 × 47 # Country Variable Year D1.AGR D2.AGR L10D1.AGR L10D2.AGR D1.MIN D2.MIN L10D1.MIN L10D2.MIN D1.MAN # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA NA NA # 6 BWA VA 1965 -0.575 NA NA NA -0.998 NA NA NA 0.282 # 7 BWA VA 1966 1.95 2.53 NA NA -0.525 0.473 NA NA -0.214 # 8 BWA VA 1967 1.47 -0.488 NA NA 0.328 0.854 NA NA 0.134 # 9 BWA VA 1968 1.95 0.488 NA NA -0.460 -0.788 NA NA -0.188 # 10 BWA VA 1969 0.763 -1.19 NA NA 3.41 3.87 NA NA 1.39 # # ℹ 5,017 more rows # # ℹ 35 more variables: D2.MAN , L10D1.MAN , L10D2.MAN , D1.PU , D2.PU , # # L10D1.PU , L10D2.PU , D1.CON , D2.CON , L10D1.CON , L10D2.CON , # # D1.WRT , D2.WRT , L10D1.WRT , L10D2.WRT , D1.TRA , D2.TRA , # # L10D1.TRA , L10D2.TRA , D1.FIRE , D2.FIRE , L10D1.FIRE , # # L10D2.FIRE , D1.GOV , D2.GOV , L10D1.GOV , L10D2.GOV , D1.OTH , # # D2.OTH , L10D1.OTH , L10D2.OTH , D1.SUM , D2.SUM , L10D1.SUM , … # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Log-differences of the form $log(x_t) - log(x_{t-s})$ are also easily computed. ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fdiff(c(1, 10), 1, Year, log = TRUE) # # A tibble: 5,027 × 25 # Country Variable Year Dlog1.AGR L10Dlog1.AGR Dlog1.MIN L10Dlog1.MIN Dlog1.MAN L10Dlog1.MAN # * # 1 BWA VA 1960 NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA # 6 BWA VA 1965 -0.0359 NA -0.336 NA 0.324 NA # 7 BWA VA 1966 0.117 NA -0.236 NA -0.236 NA # 8 BWA VA 1967 0.0796 NA 0.154 NA 0.154 NA # 9 BWA VA 1968 0.0972 NA -0.223 NA -0.223 NA # 10 BWA VA 1969 0.0355 NA 1.05 NA 1.05 NA # # ℹ 5,017 more rows # # ℹ 16 more variables: Dlog1.PU , L10Dlog1.PU , Dlog1.CON , L10Dlog1.CON , # # Dlog1.WRT , L10Dlog1.WRT , Dlog1.TRA , L10Dlog1.TRA , Dlog1.FIRE , # # L10Dlog1.FIRE , Dlog1.GOV , L10Dlog1.GOV , Dlog1.OTH , L10Dlog1.OTH , # # Dlog1.SUM , L10Dlog1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Finally, it is also possible to compute quasi-differences and quasi-log-differences of the form $x_t - \rho x_{t-s}$ or $log(x_t) - \rho log(x_{t-s})$: ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fdiff(t = Year, rho = 0.95) # # A tibble: 5,027 × 14 # Country Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA NA NA NA # 6 BWA VA 1965 0.241 -0.824 0.318 0.0359 0.719 1.13 0.363 0.184 1.11 0.454 # 7 BWA VA 1966 2.74 -0.401 -0.163 0.0743 0.0673 1.56 0.312 0.174 0.955 0.449 # 8 BWA VA 1967 2.35 0.427 0.174 0.0101 -0.381 -3.55 -0.323 0.246 0.988 0.465 # 9 BWA VA 1968 2.91 -0.345 -0.141 0.0101 0.365 1.08 0.804 -0.427 -1.66 -0.780 # 10 BWA VA 1969 1.82 3.50 1.43 0.385 2.32 0.841 0.397 0.252 0.818 0.385 # # ℹ 5,017 more rows # # ℹ 1 more variable: SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` The quasi-differencing feature was added to `fdiff` to facilitate the preparation of time series and panel data for least-squares estimations suffering from serial correlation following Cochrane & Orcutt (1949). Finally, `fgrowth` computes growth rates in the same way. By default exact growth rates are computed in percentage terms using $(x_t-x_{t-s}) / x_{t-s} \times 100$ (the default argument is `scale = 100`). The user can also request growth rates obtained by log-differencing using $log(x_t/ x_{t-s}) \times 100$. ```r # Exact growth rates, computed as: (x/lag(x) - 1) * 100 GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year) # # A tibble: 5,027 × 25 # Country Variable Year G1.AGR L10G1.AGR G1.MIN L10G1.MIN G1.MAN L10G1.MAN G1.PU L10G1.PU G1.CON # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA NA NA # 6 BWA VA 1965 -3.52 NA -28.6 NA 38.2 NA 29.4 NA 104. # 7 BWA VA 1966 12.4 NA -21.1 NA -21.1 NA 50 NA 0 # 8 BWA VA 1967 8.29 NA 16.7 NA 16.7 NA 0 NA -33.3 # 9 BWA VA 1968 10.2 NA -20 NA -20 NA 0 NA 35.7 # 10 BWA VA 1969 3.61 NA 185. NA 185. NA 185. NA 185. # # ℹ 5,017 more rows # # ℹ 13 more variables: L10G1.CON , G1.WRT , L10G1.WRT , G1.TRA , # # L10G1.TRA , G1.FIRE , L10G1.FIRE , G1.GOV , L10G1.GOV , G1.OTH , # # L10G1.OTH , G1.SUM , L10G1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] # Log-difference growth rates, computed as: log(x / lag(x)) * 100 GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year, logdiff = TRUE) # # A tibble: 5,027 × 25 # Country Variable Year Dlog1.AGR L10Dlog1.AGR Dlog1.MIN L10Dlog1.MIN Dlog1.MAN L10Dlog1.MAN # * # 1 BWA VA 1960 NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA # 6 BWA VA 1965 -3.59 NA -33.6 NA 32.4 NA # 7 BWA VA 1966 11.7 NA -23.6 NA -23.6 NA # 8 BWA VA 1967 7.96 NA 15.4 NA 15.4 NA # 9 BWA VA 1968 9.72 NA -22.3 NA -22.3 NA # 10 BWA VA 1969 3.55 NA 105. NA 105. NA # # ℹ 5,017 more rows # # ℹ 16 more variables: Dlog1.PU , L10Dlog1.PU , Dlog1.CON , L10Dlog1.CON , # # Dlog1.WRT , L10Dlog1.WRT , Dlog1.TRA , L10Dlog1.TRA , Dlog1.FIRE , # # L10Dlog1.FIRE , Dlog1.GOV , L10Dlog1.GOV , Dlog1.OTH , L10Dlog1.OTH , # # Dlog1.SUM , L10Dlog1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` `fdiff` and `fgrowth` can also perform leaded (forward) differences and growth rates (i.e. `... %>% fgrowth(-c(1, 10), 1:2, Year)` would compute one and 10-year leaded first and second differences). Again it is possible to perform sequential operations: ```r # This computes the 1 and 10-year growth rates, for the current period and lagged by one period GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year) %>% flag(0:1, Year) # # A tibble: 5,027 × 47 # Country Variable Year G1.AGR L1.G1.AGR L10G1.AGR L1.L10G1.AGR G1.MIN L1.G1.MIN L10G1.MIN # * # 1 BWA VA 1960 NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA # 6 BWA VA 1965 -3.52 NA NA NA -28.6 NA NA # 7 BWA VA 1966 12.4 -3.52 NA NA -21.1 -28.6 NA # 8 BWA VA 1967 8.29 12.4 NA NA 16.7 -21.1 NA # 9 BWA VA 1968 10.2 8.29 NA NA -20 16.7 NA # 10 BWA VA 1969 3.61 10.2 NA NA 185. -20 NA # # ℹ 5,017 more rows # # ℹ 37 more variables: L1.L10G1.MIN , G1.MAN , L1.G1.MAN , L10G1.MAN , # # L1.L10G1.MAN , G1.PU , L1.G1.PU , L10G1.PU , L1.L10G1.PU , # # G1.CON , L1.G1.CON , L10G1.CON , L1.L10G1.CON , G1.WRT , # # L1.G1.WRT , L10G1.WRT , L1.L10G1.WRT , G1.TRA , L1.G1.TRA , # # L10G1.TRA , L1.L10G1.TRA , G1.FIRE , L1.G1.FIRE , L10G1.FIRE , # # L1.L10G1.FIRE , G1.GOV , L1.G1.GOV , L10G1.GOV , L1.L10G1.GOV , … # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` ## 3. Benchmarks This section seeks to demonstrate that the functionality introduced in the preceding 2 sections indeed produces code that evaluates substantially faster than native *dplyr*. To do this properly, the different components of a typical piped call (selecting / subsetting, ordering, grouping, and performing some computation) are benchmarked separately on 2 different data sizes. All benchmarks are run on a Windows 8.1 laptop with a 2x 2.2 GHZ Intel i5 processor, 8GB DDR3 RAM and a Samsung 850 EVO SSD hard drive. ### 3.1 Data Benchmarks are run on the original `GGDC10S` data used throughout this vignette and a larger dataset with approx. 1 million observations, obtained by replicating and row-binding `GGDC10S` 200 times while maintaining unique groups. ```r # This shows the groups in GGDC10S GRP(GGDC10S, ~ Variable + Country) # collapse grouping object of length 5027 with 85 ordered groups # # Call: GRP.default(X = GGDC10S, by = ~Variable + Country), X is unsorted # # Distribution of group sizes: # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.00 53.00 62.00 59.14 63.00 65.00 # # Groups with sizes: # EMP.ARG EMP.BOL EMP.BRA EMP.BWA EMP.CHL EMP.CHN # 62 61 62 52 63 62 # --- # VA.TWN VA.TZA VA.USA VA.VEN VA.ZAF VA.ZMB # 63 52 65 63 52 52 # This replicates the data 200 times data <- replicate(200, GGDC10S, simplify = FALSE) # This function adds a number i to the country and variable columns of each dataset uniquify <- function(x, i) ftransform(x, lapply(unclass(x)[c(1,4)], paste0, i)) # Making datasets unique and row-binding them data <- unlist2d(Map(uniquify, data, as.list(1:200)), idcols = FALSE) fdim(data) # [1] 1005400 16 # This shows the groups in the replicated data GRP(data, ~ Variable + Country) # collapse grouping object of length 1005400 with 17000 ordered groups # # Call: GRP.default(X = data, by = ~Variable + Country), X is unsorted # # Distribution of group sizes: # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.00 53.00 62.00 59.14 63.00 65.00 # # Groups with sizes: # EMP1.ARG1 EMP1.BOL1 EMP1.BRA1 EMP1.BWA1 EMP1.CHL1 EMP1.CHN1 # 62 61 62 52 63 62 # --- # VA99.TWN99 VA99.TZA99 VA99.USA99 VA99.VEN99 VA99.ZAF99 VA99.ZMB99 # 63 52 65 63 52 52 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3184710 170.1 8862174 473.3 NA 8862174 473.3 # Vcells 23965820 182.9 147787078 1127.6 16384 445825141 3401.4 ``` ### 3.1 Selecting, Subsetting, Ordering and Grouping ```r ## Selecting columns # Small microbenchmark(dplyr = select(GGDC10S, Country, Variable, AGR:SUM), collapse = fselect(GGDC10S, Country, Variable, AGR:SUM)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 400.775 410.7585 425.43117 416.396 424.637 820.041 100 # collapse 2.911 3.4645 4.59856 4.469 5.412 15.293 100 # Large microbenchmark(dplyr = select(data, Country, Variable, AGR:SUM), collapse = fselect(data, Country, Variable, AGR:SUM)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 388.926 396.429 412.67730 402.9890 411.0455 728.734 100 # collapse 2.870 3.280 4.44686 3.8335 5.3300 12.669 100 ## Subsetting columns # Small microbenchmark(dplyr = filter(GGDC10S, Variable == "VA"), collapse = fsubset(GGDC10S, Variable == "VA")) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 374.084 394.4405 409.23986 401.0005 414.3050 716.475 100 # collapse 39.278 48.2775 55.85307 55.5550 60.4545 103.320 100 # Large microbenchmark(dplyr = filter(data, Variable == "VA"), collapse = fsubset(data, Variable == "VA")) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 4.487409 5.242752 8.352270 5.653223 6.434048 159.13658 100 # collapse 2.840808 3.082359 3.469128 3.163478 3.302714 16.56047 100 ## Ordering rows # Small microbenchmark(dplyr = arrange(GGDC10S, desc(Country), Variable, Year), collapse = roworder(GGDC10S, -Country, Variable, Year)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 1715.112 1867.4270 1983.4726 2015.109 2080.7500 2367.791 100 # collapse 192.495 232.4085 256.3878 247.968 258.7715 1055.381 100 # Large microbenchmark(dplyr = arrange(data, desc(Country), Variable, Year), collapse = roworder(data, -Country, Variable, Year), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 89.37512 89.37512 101.05180 101.05180 112.72848 112.72848 2 # collapse 66.46703 66.46703 67.45254 67.45254 68.43806 68.43806 2 ## Grouping # Small microbenchmark(dplyr = group_by(GGDC10S, Country, Variable), collapse = fgroup_by(GGDC10S, Country, Variable)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 778.713 815.1825 911.3484 874.2225 960.3840 1529.874 100 # collapse 146.534 157.6245 198.5921 165.0660 177.3455 1484.241 100 # Large microbenchmark(dplyr = group_by(data, Country, Variable), collapse = fgroup_by(data, Country, Variable), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 34.20294 34.62839 34.88041 34.88432 35.07821 35.48279 10 # collapse 27.89972 28.03211 28.55175 28.36954 29.32283 29.54206 10 ## Computing a new column # Small microbenchmark(dplyr = mutate(GGDC10S, NEW = AGR+1), collapse = ftransform(GGDC10S, NEW = AGR+1)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 317.463 321.7270 333.38822 324.9660 333.7810 631.564 100 # collapse 8.897 11.0495 12.95354 12.4435 14.2065 38.991 100 # Large microbenchmark(dplyr = mutate(data, NEW = AGR+1), collapse = ftransform(data, NEW = AGR+1)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 637.878 1084.225 1330.006 1164.6665 1291.2335 15869.05 100 # collapse 210.740 657.025 1021.434 698.3735 781.7675 16725.09 100 ## All combined with pipes # Small microbenchmark(dplyr = filter(GGDC10S, Variable == "VA") %>% select(Country, Year, AGR:SUM) %>% arrange(desc(Country), Year) %>% mutate(NEW = AGR+1) %>% group_by(Country), collapse = fsubset(GGDC10S, Variable == "VA", Country, Year, AGR:SUM) %>% roworder(-Country, Year) %>% ftransform(NEW = AGR+1) %>% fgroup_by(Country)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 2982.340 3416.325 3525.7983 3538.464 3668.516 5034.021 100 # collapse 136.858 186.632 214.4681 211.683 243.130 314.470 100 # Large microbenchmark(dplyr = filter(data, Variable == "VA") %>% select(Country, Year, AGR:SUM) %>% arrange(desc(Country), Year) %>% mutate(NEW = AGR+1) %>% group_by(Country), collapse = fsubset(data, Variable == "VA", Country, Year, AGR:SUM) %>% roworder(-Country, Year) %>% ftransform(NEW = AGR+1) %>% fgroup_by(Country), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 7.917182 7.997378 8.142653 8.109943 8.292291 8.423163 10 # collapse 3.080289 3.104028 3.150153 3.140969 3.188365 3.251259 10 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3184728 170.1 8862174 473.3 NA 8862174 473.3 # Vcells 23970594 182.9 75772825 578.2 16384 445825141 3401.4 ``` ### 3.1 Aggregation ```r ## Grouping the data cgGGDC10S <- fgroup_by(GGDC10S, Variable, Country) %>% fselect(-Region, -Regioncode) gGGDC10S <- group_by(GGDC10S, Variable, Country) %>% fselect(-Region, -Regioncode) cgdata <- fgroup_by(data, Variable, Country) %>% fselect(-Region, -Regioncode) gdata <- group_by(data, Variable, Country) %>% fselect(-Region, -Regioncode) rm(data, GGDC10S) gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3201723 171 8862174 473.3 NA 8862174 473.3 # Vcells 23589381 180 75772825 578.2 16384 445825141 3401.4 ## Conversion of Grouping object: This time would be required extra in all hybrid calls ## i.e. when calling collapse functions on data grouped with dplyr::group_by # Small microbenchmark(GRP(gGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # GRP(gGGDC10S) 8.692 9.2455 10.16021 9.4915 10.086 39.196 100 # Large microbenchmark(GRP(gdata)) # Unit: microseconds # expr min lq mean median uq max neval # GRP(gdata) 885.641 1160.915 1248.258 1237.236 1323.234 1651.398 100 ## Sum # Small microbenchmark(dplyr = summarise_all(gGGDC10S, sum, na.rm = TRUE), collapse = fsum(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 3017.723 3354.1895 3733.4739 3620.9560 3738.441 22135.736 100 # collapse 218.120 227.3655 236.7693 235.1965 244.852 270.805 100 # Large microbenchmark(dplyr = summarise_all(gdata, sum, na.rm = TRUE), collapse = fsum(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 272.9737 279.91024 305.02067 283.59737 303.57122 448.07629 10 # collapse 41.5330 41.63214 41.88717 41.77062 41.96059 42.78662 10 ## Mean # Small microbenchmark(dplyr = summarise_all(gGGDC10S, mean.default, na.rm = TRUE), collapse = fmean(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 4360.104 4596.6740 5125.4194 4754.791 5005.710 37144.852 100 # collapse 169.084 174.3935 185.4594 183.434 194.832 221.933 100 # Large microbenchmark(dplyr = summarise_all(gdata, mean.default, na.rm = TRUE), collapse = fmean(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 623.5123 642.83748 704.39836 681.32260 786.82731 829.74435 10 # collapse 31.7636 31.88037 32.00222 31.99445 32.08209 32.43875 10 ## Median # Small microbenchmark(dplyr = summarise_all(gGGDC10S, median, na.rm = TRUE), collapse = fmedian(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 14399.118 14849.933 16170.3500 14982.5685 15145.892 33613.235 100 # collapse 137.596 164.902 189.2056 178.1245 214.676 248.624 100 # Large microbenchmark(dplyr = summarise_all(gdata, median, na.rm = TRUE), collapse = fmedian(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 2826.83036 2826.83036 2828.12912 2828.12912 2829.42788 2829.42788 2 # collapse 19.95564 19.95564 19.98524 19.98524 20.01485 20.01485 2 ## Standard Deviation # Small microbenchmark(dplyr = summarise_all(gGGDC10S, sd, na.rm = TRUE), collapse = fsd(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 8332.635 8612.5215 9365.1216 8712.766 8989.086 25087.982 100 # collapse 242.228 251.0225 269.7849 273.552 282.326 321.891 100 # Large microbenchmark(dplyr = summarise_all(gdata, sd, na.rm = TRUE), collapse = fsd(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 1375.80363 1375.80363 1409.60358 1409.60358 1443.40352 1443.40352 2 # collapse 46.21713 46.21713 56.88205 56.88205 67.54697 67.54697 2 ## Maximum # Small microbenchmark(dplyr = summarise_all(gGGDC10S, max, na.rm = TRUE), collapse = fmax(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 39964.504 41008.8560 43577.92707 41448.273 44195.1095 58816.550 100 # collapse 68.798 74.7225 87.83389 77.572 100.9215 129.519 100 # Large microbenchmark(dplyr = summarise_all(gdata, max, na.rm = TRUE), collapse = fmax(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 480.83804 490.9982 540.7374 517.86136 533.85723 687.14713 10 # collapse 11.40116 11.7745 11.9366 11.85156 11.94908 13.18318 10 ## First Value # Small microbenchmark(dplyr = summarise_all(gGGDC10S, first), collapse = ffirst(cgGGDC10S, na.rm = FALSE)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 4147.888 4242.249 4801.88966 4383.248 4701.532 19254.215 100 # collapse 11.685 14.227 26.25476 24.764 35.301 137.514 100 # Large microbenchmark(dplyr = summarise_all(gdata, first), collapse = ffirst(cgdata, na.rm = FALSE), times = 10) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 530327.66 558767.393 637499.226 596503.08 672801.103 969373.660 10 # collapse 872.89 999.088 1087.845 1068.87 1204.416 1289.327 10 ## Number of Distinct Values # Small microbenchmark(dplyr = summarise_all(gGGDC10S, n_distinct, na.rm = TRUE), collapse = fndistinct(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 11316.574 11600.847 12573.1010 11759.435 11939.487 31659.667 100 # collapse 189.051 205.164 226.0933 235.422 239.604 443.661 100 # Large microbenchmark(dplyr = summarise_all(gdata, n_distinct, na.rm = TRUE), collapse = fndistinct(cgdata), times = 5) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 2044.13376 2110.16926 2133.91960 2138.07456 2154.39797 2222.82246 5 # collapse 30.65443 30.94582 31.51081 31.17123 31.17972 33.60286 5 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3972309 212.2 8862174 473.3 NA 8862174 473.3 # Vcells 24857303 189.7 75772825 578.2 16384 445825141 3401.4 ``` Below are some additional benchmarks for weighted aggregations and aggregations using the statistical mode, which cannot easily or efficiently be performed with *dplyr*. ```r ## Weighted Mean # Small microbenchmark(fmean(cgGGDC10S, SUM)) # Unit: microseconds # expr min lq mean median uq max neval # fmean(cgGGDC10S, SUM) 195.488 200.4285 218.2836 211.1295 218.8375 444.276 100 # Large microbenchmark(fmean(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fmean(cgdata, SUM) 34.73516 35.28276 35.66689 35.32257 36.44802 36.80722 10 ## Weighted Standard-Deviation # Small microbenchmark(fsd(cgGGDC10S, SUM)) # Unit: microseconds # expr min lq mean median uq max neval # fsd(cgGGDC10S, SUM) 243.048 244.606 249.2181 246.9635 249.444 323.9 100 # Large microbenchmark(fsd(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fsd(cgdata, SUM) 44.905 44.93116 45.15391 45.01095 45.22677 46.14689 10 ## Statistical Mode # Small microbenchmark(fmode(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # fmode(cgGGDC10S) 245.098 248.3575 253.4809 250.6945 253.9335 420.619 100 # Large microbenchmark(fmode(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fmode(cgdata) 40.26151 41.82082 41.63019 41.88382 42.0232 42.0587 10 ## Weighted Statistical Mode # Small microbenchmark(fmode(cgGGDC10S, SUM)) # Unit: microseconds # expr min lq mean median uq max neval # fmode(cgGGDC10S, SUM) 330.993 333.535 337.7744 334.5395 337.3685 447.187 100 # Large microbenchmark(fmode(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fmode(cgdata, SUM) 57.69815 57.78466 57.98187 57.84567 58.09942 58.81835 10 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3971768 212.2 8862174 473.3 NA 8862174 473.3 # Vcells 24853915 189.7 75772825 578.2 16384 445825141 3401.4 ``` ### 3.2 Transformation ```r ## Replacing with group sum # Small microbenchmark(dplyr = mutate_all(gGGDC10S, sum, na.rm = TRUE), collapse = fsum(cgGGDC10S, TRA = "replace_fill")) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 13088.102 13223.340 14388.9000 13359.7680 14380.05 29060.554 100 # collapse 238.456 273.757 292.1693 293.9905 312.01 388.106 100 # Large microbenchmark(dplyr = mutate_all(gdata, sum, na.rm = TRUE), collapse = fsum(cgdata, TRA = "replace_fill"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 391.63618 679.62609 662.91807 716.40975 729.7527 749.4973 10 # collapse 49.63788 50.24189 61.77658 55.18416 63.4596 111.6039 10 ## Dividing by group sum # Small microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) x/sum(x, na.rm = TRUE)), collapse = fsum(cgGGDC10S, TRA = "/")) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 13058.992 13203.8450 14294.3733 13321.41 13880.796 42300.028 100 # collapse 242.884 268.5295 278.8541 274.29 294.585 330.255 100 # Large microbenchmark(dplyr = mutate_all(gdata, function(x) x/sum(x, na.rm = TRUE)), collapse = fsum(cgdata, TRA = "/"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 474.9046 654.6199 796.14248 907.32863 942.32567 999.2501 10 # collapse 49.3542 50.9056 84.66647 52.05635 74.51705 325.4319 10 ## Centering # Small microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) x-mean.default(x, na.rm = TRUE)), collapse = fwithin(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 14460.04 14769.4095 15977.4942 14859.815 15013.421 37113.077 100 # collapse 203.77 229.7845 246.5043 242.638 266.664 293.191 100 # Large microbenchmark(dplyr = mutate_all(gdata, function(x) x-mean.default(x, na.rm = TRUE)), collapse = fwithin(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 893.06503 925.50231 1217.2225 1259.34620 1445.254 1545.5490 10 # collapse 43.90731 56.97093 143.4797 73.39498 152.872 429.3341 10 ## Centering and Scaling (Standardizing) # Small microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) (x-mean.default(x, na.rm = TRUE))/sd(x, na.rm = TRUE)), collapse = fscale(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 20275.033 21145.524 24976.1242 22214.190 25194.0285 79869.435 100 # collapse 277.775 304.958 323.3613 314.388 338.2705 437.388 100 # Large microbenchmark(dplyr = mutate_all(gdata, function(x) (x-mean.default(x, na.rm = TRUE))/sd(x, na.rm = TRUE)), collapse = fscale(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 2118.97696 2118.97696 2315.9282 2315.9282 2512.87938 2512.87938 2 # collapse 60.17144 60.17144 60.6284 60.6284 61.08537 61.08537 2 ## Lag # Small microbenchmark(dplyr_unordered = mutate(gGGDC10S, across(everything(), dplyr::lag)), collapse_unordered = flag(cgGGDC10S), dplyr_ordered = mutate(gGGDC10S, across(everything(), \(x) dplyr::lag(x, order_by = Year))), collapse_ordered = flag(cgGGDC10S, t = Year)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr_unordered 14495.386 14796.101 17579.85413 15265.3250 15889.7550 49137.721 100 # collapse_unordered 48.544 75.071 90.29225 86.6330 109.6545 225.377 100 # dplyr_ordered 24893.437 25327.607 27521.59809 25904.9275 27136.2190 51312.074 100 # collapse_ordered 80.196 107.953 120.85160 117.5675 131.6715 189.051 100 # Large microbenchmark(dplyr_unordered = mutate(gdata, across(everything(), dplyr::lag)), collapse_unordered = flag(cgdata), dplyr_ordered = mutate(gdata, across(everything(), \(x) dplyr::lag(x, order_by = Year))), collapse_ordered = flag(cgdata, t = Year), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr_unordered 3461.11500 3461.11500 3471.95821 3471.95821 3482.80142 3482.80142 2 # collapse_unordered 13.71897 13.71897 211.59809 211.59809 409.47721 409.47721 2 # dplyr_ordered 5786.57522 5786.57522 6291.90389 6291.90389 6797.23256 6797.23256 2 # collapse_ordered 25.14399 25.14399 35.36102 35.36102 45.57806 45.57806 2 ## First-Difference (unordered) # Small microbenchmark(dplyr_unordered = mutate_all(gGGDC10S, function(x) x - dplyr::lag(x)), collapse_unordered = fdiff(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr_unordered 25613.274 25878.0725 27951.41954 26257.3225 27226.808 43048.893 100 # collapse_unordered 56.539 72.3035 95.72147 91.6965 102.664 254.077 100 # Large microbenchmark(dplyr_unordered = mutate_all(gdata, function(x) x - dplyr::lag(x)), collapse_unordered = fdiff(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr_unordered 3287.88487 3287.88487 3425.69703 3425.69703 3563.509 3563.509 2 # collapse_unordered 16.58971 16.58971 23.36885 23.36885 30.148 30.148 2 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3978800 212.5 8862175 473.3 NA 8862175 473.3 # Vcells 24870572 189.8 72805912 555.5 16384 445825141 3401.4 ``` Below again some benchmarks for transformations not easily of efficiently performed with *dplyr*, such as centering on the overall mean, mean-preserving scaling, weighted scaling and centering, sequences of lags / leads, (iterated) panel-differences and growth rates. ```r # Centering on overall mean microbenchmark(fwithin(cgdata, mean = "overall.mean"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(cgdata, mean = "overall.mean") 44.66782 48.03445 52.04073 50.07953 53.67134 71.13221 10 # Weighted Centering microbenchmark(fwithin(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(cgdata, SUM) 40.45204 42.86833 46.55326 46.18277 47.28202 57.82673 10 microbenchmark(fwithin(cgdata, SUM, mean = "overall.mean"), times = 10) # Unit: milliseconds # expr min lq mean median uq max # fwithin(cgdata, SUM, mean = "overall.mean") 39.99279 40.32256 43.0638 40.60269 41.34366 54.45542 # neval # 10 # Weighted Scaling and Standardizing microbenchmark(fsd(cgdata, SUM, TRA = "/"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fsd(cgdata, SUM, TRA = "/") 50.19536 50.9145 55.12553 53.23862 56.27094 67.46816 10 microbenchmark(fscale(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fscale(cgdata, SUM) 54.14792 57.64584 60.83251 59.88025 61.16425 72.31928 10 # Sequence of lags and leads microbenchmark(flag(cgdata, -1:1), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(cgdata, -1:1) 26.03902 48.02695 194.8518 257.0652 264.5479 276.5348 10 # Iterated difference microbenchmark(fdiff(cgdata, 1, 2), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(cgdata, 1, 2) 38.76001 39.83896 44.93731 41.08887 48.98348 63.42528 10 # Growth Rate microbenchmark(fgrowth(cgdata,1), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fgrowth(cgdata, 1) 11.58627 13.81528 18.05776 14.03489 22.34279 31.15811 10 ``` ## References Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), *Routledge Handbook of Industry and Development.* (pp. 65-83). Routledge. Cochrane, D. & Orcutt, G. H. (1949). "Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Terms". *Journal of the American Statistical Association.* 44 (245): 32–61. Prais, S. J. & Winsten, C. B. (1954). "Trend Estimators and Serial Correlation". *Cowles Commission Discussion Paper No. 383.* Chicago. collapse/vignettes/collapse_object_handling.Rmd0000644000176200001440000005544615121640575021550 0ustar liggesusers--- title: "collapse's Handling of R Objects" subtitle: "A Quick View Behind the Scenes of Class-Agnostic R Programming" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: vignette: > %\VignetteIndexEntry{collapse's Handling of R Objects} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This much-requested vignette provides some details about how *collapse* deals with various R objects. It is principally a digest of cumulative details provided in the [NEWS](https://fastverse.org/collapse/news/index.html) for various releases since v1.4.0. ## Overview *collapse* provides a class-agnostic architecture permitting computations on a very broad range of R objects. It provides explicit support for base R classes and data types (*logical*, *integer*, *double*, *character*, *list*, *data.frame*, *matrix*, *factor*, *Date*, *POSIXct*, *ts*) and their popular extensions, including *integer64*, *data.table*, *tibble*, *grouped_df*, *xts*/*zoo*, *pseries*, *pdata.frame*, *units*, and *sf* (no geometric operations). It also introduces [*GRP_df*](https://fastverse.org/collapse/reference/GRP.html) as a more performant and class-agnostic grouped data frame, and [*indexed_series* and *indexed_frame*](https://fastverse.org/collapse/reference/indexing.html) classes as modern class-agnostic successors of *pseries*, *pdata.frame*. These objects inherit the classes they succeed and are handled through `.pseries`, `.pdata.frame`, and `.grouped_df` methods, which also support the original (*plm* / *dplyr*) implementations (details below). All other objects are handled internally at the C or R level using general principles extended by specific considerations for some of the above classes. I start with summarizing the general principles, which enable the usage of *collapse* with further classes it does not explicitly support. ## General Principles In general, *collapse* preserves attributes and classes of R objects in statistical and data manipulation operations unless their preservation involves a **high-risk** of yielding something wrong/useless. Risky operations change the dimensions or internal data type (`typeof()`) of an R object. To *collapse*'s R and C code, there exist 3 principal types of R objects: atomic vectors, matrices, and lists - which are often assumed to be data frames. Most data manipulation functions in *collapse*, like `fmutate()`, only support lists, whereas statistical functions - like the S3 generic [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html) like `fmean()` - generally support all 3 types of objects. S3 generic functions initially dispatch to `.default`, `.matrix`, `.data.frame`, and (hidden) `.list` methods. The `.list` method generally dispatches to the `.data.frame` method. These basic methods, and other non-generic functions in *collapse*, then decide how exactly to handle the object based on the statistical operation performed and attribute handling principles mostly implemented in C. The simplest case arises when an operation preserves the dimensions of the object, such as `fscale(x)` or `fmutate(data, across(a:c, log))`. In this case, all attributes of `x / data` are fully preserved^[Preservation implies a shallow copy of the attribute lists from the original object to the result object. A shallow copy is memory-efficient and means we are copying the list containing the attributes in memory, but not the attributes themselves. Whenever I talk about copying attributes, I mean a shallow copy, not a deep copy. You can perform shallow copies with [helper functions](https://fastverse.org/collapse/reference/small-helpers.html) `copyAttrib()` or `copyMostAttrib()`, and directly set attribute lists using `setAttrib()` or `setattrib()`.]. Another simple case for matrices and lists arises when a statistical operation reduces them to a single dimension such as `fmean(x)`, where, under the `drop = TRUE` default of [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), all attributes apart from (column-)names are dropped and a (named) vector of means is returned. For atomic vectors, a statistical operation like `fmean(x)` will preserve the attributes (except for *ts* objects), as the object could have useful properties such as labels or units. More complex cases involve changing the dimensions of an object. If the number of rows is preserved e.g. `fmutate(data, a_b = a / b)` or `flag(x, -1:1)`, only the (column-)names attribute of the object is modified. If the number of rows is reduced e.g. `fmean(x, g)`, all attributes are also retained under suitable modifications of the (row-)names attribute. However, if `x` is a matrix, other attributes than row- or column-names are only retained if `!is.object(x)`, that is, if the matrix does not have a 'class' attribute. For atomic vectors, attributes are retained if `!inherits(x, "ts")`, as aggregating a time series will break the class. This also applies to columns in a data frame being aggregated. When data is transformed using statistics as provided by the [`TRA()` function](https://fastverse.org/collapse/reference/TRA.html) e.g. `TRA(x, STATS, operation, groups)` and the like-named argument to the [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), operations that simply modify the input (`x`) in a statistical sense (`"replace_na"`, `"-"`, `"-+"`, `"/"`, `"+"`, `"*"`, `"%%"`, `"-%%"`) just copy the attributes to the transformed object. Operations `"fill"` and `"replace"` are more tricky, since here `x` is replaced with `STATS`, which could be of a different class or data type. The following rules apply: (1) the result has the same data type as `STATS`; (2) if `is.object(STATS)`, the attributes of `STATS` are preserved; (3) otherwise the attributes of `x` are preserved unless `is.object(x) && typeof(x) != typeof(STATS)`; (4) an exemption to this rule is made if `x` is a factor and an integer replacement is offered to STATS e.g. `fnobs(factor, group, TRA = "fill")`. In that case, the attributes of `x` are copied except for the 'class' and 'levels' attributes. These rules were devised considering the possibility that `x` may have important information attached to it which should be preserved in data transformations, such as a `"label"` attribute. So to summarize the general principles: *collapse* just tries to preserve attributes in all cases except where it is likely to break something, beholding the way most commonly used R classes and objects behave. The most likely operations that break something are when aggregating matrices which have a class (such as *mts*/*xts*) or univariate time series (*ts*), or when data is to be replaced by another object. In the latter case, particular attention is paid to integer vectors and factors, as we often count something generating integers, and malformed factors need to be avoided. The following section provides some further details for some *collapse* functions and supported classes. ## Specific Functions and Classes #### Object Conversions [Quick conversion functions](https://fastverse.org/collapse/reference/quick-conversion.html) `qDF`, `qDT`, `qTBL()` and `qM` (to create data.frame's, *data.table*'s, *tibble*'s and matrices from arbitrary R objects) by default (`keep.attr = FALSE`) perform very strict conversions, where all attributes non-essential to the class are dropped from the input object. This is to ensure that, following conversion, objects behave exactly the way users expect. This is different from the behavior of functions like `as.data.frame()`, `as.data.table()`, `as_tibble()` or `as.matrix()` e.g. `as.matrix(EuStockMarkets)` just returns `EuStockMarkets` whereas `qM(EuStockMarkets)` returns a plain matrix without time series attributes. This behavior can be changed by setting `keep.attr = TRUE`, i.e. `qM(EuStockMarkets, keep.attr = TRUE)`. #### Selecting Columns by Data Type Functions [`num_vars()`, `cat_vars()` (the opposite of `num_vars()`), `char_vars()` etc.](https://fastverse.org/collapse/reference/select_replace_vars.html) are implemented in C to avoid the need to check data frame columns by applying an R function such as `is.numeric()`. For `is.numeric`, the C implementation is equivalent to `is_numeric_C <- function(x) typeof(x) %in% c("integer", "double") && !inherits(x, c("factor", "Date", "POSIXct", "yearmon", "yearqtr"))`. This of course does not respect the behavior of other classes that define methods for `is.numeric` e.g. `is.numeric.foo <- function(x) FALSE`, then for `y = structure(rnorm(100), class = "foo")`, `is.numeric(y)` is `FALSE` but `num_vars(data.frame(y))` still returns it. Correct behavior in this case requires `get_vars(data.frame(y), is.numeric)`. A particular case to be aware of is when using `collap()` with the `FUN` and `catFUN` arguments, where the C code (`is_numeric_C`) is used internally to decide whether a column is numeric or categorical. *collapse* does not support statistical operations on complex data. #### Parsing of Time-IDs [*Time Series Functions*](https://fastverse.org/collapse/reference/time-series-panel-series.html) `flag`, `fdiff`, `fgrowth` and `psacf/pspacf/psccf` (and the operators `L/F/D/Dlog/G`) have a `t` argument to pass time-ids for fully identified temporal operations on time series and panel data. If `t` is a plain numeric vector or a factor, it is coerced to integer using `as.integer()`, and the integer steps are used as time steps. This is premised on the observation that the most common form of temporal identifier is a numeric variable denoting calendar years. If on the other hand `t` is a numeric time object such that `is.object(t) && is.numeric(unclass(t))` (e.g. Date, POSIXct, etc.), then it is passed through `timeid()` which computes the greatest common divisor of the vector and generates an integer time-id in that way. Users are therefore advised to use appropriate classes to represent time steps e.g. for monthly data `zoo::yearmon` would be appropriate. It is also possible to pass non-numeric `t`, such as character or list/data.frame. In such cases ordered grouping is applied to generate an integer time-id, but this should rather be avoided. #### *xts*/*zoo* Time Series *xts*/*zoo* time series are handled through `.zoo` methods to all relevant functions. These methods are simple and all follow this pattern: `FUN.zoo <- function(x, ...) if(is.matrix(x)) FUN.matrix(x, ...) else FUN.default(x, ....)`. Thus the general principles apply. Time-Series function do not automatically use the index for indexed computations, partly for consistency with native methods where this is also not the case (e.g. `lag.xts` does not perform an indexed lag), and partly because, as outlined above, the index does not necessarily accurately reflect the time structure. Thus the user must exercise discretion to perform an indexed lag on *xts*/*zoo*. For example: `flag(xts_daily, 1:3, t = index(xts_daily))` or `flag(xts_monthly, 1:3, t = zoo::as.yearmon(index(xts_monthly)))`. #### Support for *sf* and *units* *collapse* internally supports *sf* data frames by seeking to avoid their undue destruction through removal of the 'geometry' column in data manipulation operations. This is simply implemented through an additional check in the C programs used to subset columns of data: if the object is an *sf* data frame, the 'geometry' column is added to the column selection. Other functions like `funique()` or `roworder()` have internal facilities to avoid sorting or grouping on the 'geometry' column. Again other functions like `descr()` and `qsu()` simply omit the geometry column in their statistical calculations. A short [vignette](https://fastverse.org/collapse/articles/collapse_and_sf.html) describes the integration of *collapse* and *sf* in a bit more detail. In summary: *collapse* supports *sf* by seeking to appropriately deal with the 'geometry' column. It cannot perform geometrical operations. For example, after subsetting with `fsubset()`, the bounding box attribute of the geometry is unaltered and likely too large. To preserve *units* objects used in the *sf* ecosystem, all relevant functions also have simple methods of the form `FUN.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(FUN.matrix(x, ...), x) else FUN.default(x, ....)`. According to the general principles, the default method preserves the units class, whereas the matrix method does not if `FUN` aggregates the data. The use of `copyMostAttrib()`, which copies all attributes apart from `"dim"`, `"dimnames"`, and `"names"`, ensures that the returned objects are still *units*. #### Support for *data.table* *collapse* provides quite thorough support for *data.table*. The simplest level of support is that it avoids assigning descriptive (character) row names to *data.table*'s e.g. `fmean(mtcars, mtcars$cyl)` has row-names corresponding to the groups but `fmean(qDT(mtcars), mtcars$cyl)` does not. *collapse* further supports *data.table*'s reference semantics (`set*`, `:=`). To be able to add columns by reference (e.g. `DT[, new := 1]`), *data.table*'s are implemented as overallocated lists^[Notably, additional (hidden) column pointers are allocated to be able to add columns without taking a shallow copy of the *data.table*, and an `".internal.selfref"` attribute containing an external pointer is used to check if any shallow copy was made using base R commands like `<-`.]. *collapse* copied some C code from *data.table* to do the overallocation and generate the `".internal.selfref"` attribute, so that `qDT()` creates a valid and fully functional *data.table*. To enable seamless data manipulation combining *collapse* and *data.table*, all data manipulation functions in *collapse* call this C code at the end and return a valid (overallocated) *data.table*. However, because this overallocation comes at a computational cost of 2-3 microseconds, I have opted against also adding it to the `.data.frame` methods of statistical functions. Concretely, this means that `res <- DT |> fgroup_by(id) |> fsummarise(mu_a = fmean(a))` gives a fully functional *data.table* i.e. `res[, new := 1]` works, but `res2 <- DT |> fgroup_by(id) |> fmean()` gives a non-overallocated *data.table* such that `res2[, new := 1]` will still work but issue a warning. In this case, `res2 <- DT |> fgroup_by(id) |> fmean() |> qDT()` can be used to avoid the warning. This, to me, seems a reasonable trade-off between flexibility and performance. More details and examples are provided in the [*collapse* and *data.table* vignette](https://fastverse.org/collapse/articles/collapse_and_data.table.html). #### Class-Agnostic Grouped and Indexed Data Frames As indicated in the introductory remarks, *collapse* provides a fast [class-agnostic grouped data frame](https://fastverse.org/collapse/reference/GRP.html) created with `fgroup_by()`, and fast [class-agnostic indexed time series and panel data](https://fastverse.org/collapse/reference/indexing.html), created with `findex_by()`/`reindex()`. Class-agnostic means that the object that is grouped/indexed continues to behave as before except in *collapse* operations utilizing the 'groups'/'index_df' attributes. The grouped data frame is implemented as follows: `fgroup_by()` saves the class of the input data, calls `GRP()` on the columns being grouped, and attaches the resulting 'GRP' object in a `"groups"` attribute. It then assigns a class attribute as follows ```r clx <- class(.X) # .X is the data frame being grouped, clx is its class m <- match(c("GRP_df", "grouped_df", "data.frame"), clx, nomatch = 0L) class(.X) <- c("GRP_df", if(length(mp <- m[m != 0L])) clx[-mp] else clx, "grouped_df", if(m[3L]) "data.frame") ``` In words: a class `"GRP_df"` is added in front, followed by the classes of the original object^[Removing `c("GRP_df", "grouped_df", "data.frame")` if present to avoid duplicate classes and allowing grouped data to be re-grouped.], followed by `"grouped_df"` and finally `"data.frame"`, if present. The `"GRP_df"` class is for dealing appropriately with the object through methods for `print()` and subsetting (`[`, `[[`), e.g. `print.GRP_df` fetches the grouping object, prints `fungroup(.X)`^[Which reverses the changes of `fgroup_by()` so that the print method for the original object `.X` is called.], and then prints a summary of the grouping. `[.GRP_df` works similarly: it saves the groups, calls `[` on `fungroup(.X)`, and attaches the groups again if the result is a list with the same number of rows. So *collapse* has no issues printing and handling grouped *data.table*'s, *tibbles*, *sf* data frames, etc. - they continue to behave as usual. Now *collapse* has various functions with a `.grouped_df` method to deal with grouped data frames. For example `fmean.grouped_df`, in a nutshell, fetches the attached 'GRP' object using `GRP.grouped_df`, and calls `fmean.data.frame` on `fungroup(data)`, passing the 'GRP' object to the `g` argument for grouped computation. Here the general principles outlined above apply so that the resulting object has the same attributes as the input. This architecture has an additional advantage: it allows `GRP.grouped_df` to examine the grouping object and check if it was created by *collapse* (class 'GRP') or by *dplyr*. If the latter is the case, an efficient C routine is called to convert the *dplyr* grouping object to a 'GRP' object so that all `.grouped_df` methods in *collapse* apply to data frames created with either `dplyr::group_by()` or `fgroup_by()`. The *indexed_frame* works similarly. It inherits from *pdata.frame* so that `.pdata.frame` methods in *collapse* deal with both *indexed_frame*'s of arbitrary classes and *pdata.frame*'s created with *plm*. A notable difference to both *grouped_df* and *pdata.frame* is that *indexed_frame* is a deeply indexed data structure: each variable inside an *indexed_frame* is an *indexed_series* which contains in its *index_df* attribute an external pointer to the *index_df* attribute of the frame. Functions with *pseries* methods operating on *indexed_series* stored inside the frame (such as `with(data, flag(column))`) can fetch the index from this pointer. This allows worry-free application inside arbitrary data masking environments (`with`, `%$%`, `attach`, etc..) and estimation commands (`glm`, `feols`, `lmrob` etc..) without duplication of the index in memory. As you may have guessed, *indexed_series* are also class-agnostic and inherit from *pseries*. Any vector or matrix of any class can become an *indexed_series*. Further levels of generality are that indexed series and frames allow one, two or more variables in the index to support both time series and complex panels, natively deal with irregularity in time^[This is done through the creation of a time-factor in the *index_df* attribute whose levels represent time steps, i.e., the factor will have unused levels for gaps in time.], and provide a rich set of methods for subsetting and manipulation which also subset the *index_df* attribute, including internal methods for `fsubset()`, `funique()`, `roworder(v)` and `na_omit()`. So *indexed_frame* and *indexed_series* is a rich and general structure permitting fully time-aware computations on nearly any R object. See [`?indexing`](https://fastverse.org/collapse/reference/indexing.html) for more information. ## Conclusion *collapse* handles R objects in a preserving and fairly intelligent manner, allowing seamless compatibility with many common data classes in R, and statistical workflows that preserve attributes (labels, units, etc.) of the data. This is implemented through general principles and some specific considerations/exemptions mostly implemented in C - as detailed in this vignette. The main benefits of this design are generality and execution speed: *collapse* has much fewer R-level method dispatches and function calls than other frameworks used to perform statistical or data manipulation operations, it behaves predictably, and may also work well with your simple new class. The main disadvantage is that the general principles and exemptions are hard-coded in C and thus may not work with specific classes. A prominent example where *collapse* simply fails is *lubridate*'s *interval* class ([#186](https://github.com/fastverse/collapse/issues/186), [#418](https://github.com/fastverse/collapse/issues/418)), which has a `"starts"` attribute of the same length as the data that is preserved but not subset in *collapse* operations. collapse/vignettes/collapse_for_tidyverse_users.Rmd0000644000176200001440000003762015121640707022532 0ustar liggesusers--- title: "collapse for tidyverse Users" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse for tidyverse Users} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{css, echo=FALSE} pre { max-height: 500px; overflow-y: auto; } pre[class] { max-height: 500px; } ``` ```{r, echo=FALSE} oldopts <- options(width = 100L) ``` ```{r, echo = FALSE, message = FALSE, warning=FALSE} knitr::opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE, comment = "#", tidy = FALSE, cache = TRUE, collapse = TRUE, fig.width = 8, fig.height = 5, out.width = '100%') ``` *collapse* is a C/C++ based package for data transformation and statistical computing in R that aims to enable greater performance and statistical complexity in data manipulation tasks and offers a stable, class-agnostic, and lightweight API. It is part of the core [*fastverse*](https://fastverse.org/fastverse/), a suite of lightweight packages with similar objectives. The [*tidyverse*](https://tidyverse.org/) set of packages provides a rich, expressive, and consistent syntax for data manipulation in R centering on the *tibble* object and tidy data principles (each observation is a row, each variable is a column). *collapse* fully supports the *tibble* object and provides many *tidyverse*-like functions for data manipulation. It can thus be used to write *tidyverse*-like data manipulation code that, thanks to low-level vectorization of many statistical operations and optimized R code, typically runs much faster than native *tidyverse* code, in addition to being much more lightweight in dependencies. Its aim is not to create a faster *tidyverse*, i.e., it does not implements all aspects of the rich *tidyverse* grammar or changes to it^[Notably, tidyselect, lambda expressions, and many of the smaller helper functions are left out.], and also takes inspiration from other leading data manipulation libraries to serve broad aims of performance, parsimony, complexity, and robustness in data manipulation for R. ## Namespace and Global Options *collapse* data manipulation functions familiar to *tidyverse* users include `fselect`, `fgroup_by`, `fsummarise`, `fmutate`, `across`, `frename`, `fslice`, and `fcount`. Other functions like `fsubset`, `ftransform`, and `get_vars` are inspired by base R, while again other functions like `join`, `pivot`, `roworder`, `colorder`, `rowbind`, etc. are inspired by other data manipulation libraries such as *data.table* and *polars*. By virtue of the f- prefixes, the *collapse* namespace has no conflicts with the *tidyverse*, and these functions can easily be substituted in a *tidyverse* workflow. R users willing to replace the *tidyverse* have the additional option to mask functions and eliminate the prefixes with `set_collapse`. For example ```{r} library(collapse) set_collapse(mask = "manip") # version >= 2.0.0 ``` makes available functions `select`, `group_by`, `summarise`, `mutate`, `rename`, `count`, `subset`, `slice`, and `transform` in the *collapse* namespace and detaches and re-attaches the package, such that the following code is executed by *collapse*: ```{r} mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), mean), qsec_wt = weighted.mean(qsec, wt)) ``` *Note* that the correct documentation still needs to be called with prefixes, i.e., `?fsubset`. See `?set_collapse` for further options to the package, which also includes optimization options such as `nthreads`, `na.rm`, `sort`, and `stable.algo`. *Note* also that if you use *collapse*'s namespace masking, you can use `fastverse::fastverse_conflicts()` to check for namespace conflicts with other packages. ## Using the *Fast Statistical Functions* A key feature of *collapse* is that it not only provides functions for data manipulation, but also a full set of statistical functions and algorithms to speed up statistical calculations and perform more complex statistical operations (e.g. involving weights or time series data). Notably among these, the [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html) is a consistent set of S3-generic statistical functions providing fully vectorized statistical operations in R. Specifically, operations such as calculating the mean via the S3 generic `fmean()` function are vectorized across columns and groups and may also involve weights or transformations of the original data: ```{r} fmean(mtcars$mpg) # Vector fmean(EuStockMarkets) # Matrix fmean(mtcars) # Data Frame fmean(mtcars$mpg, w = mtcars$wt) # Weighted mean fmean(mtcars$mpg, g = mtcars$cyl) # Grouped mean fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt) # Weighted group mean fmean(mtcars[5:10], g = mtcars$cyl, w = mtcars$wt) # Of data frame fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt, TRA = "fill") # Replace data by weighted group mean # etc... ``` The data manipulation functions of *collapse* are integrated with these *Fast Statistical Functions* to enable vectorized statistical operations. For example, the following code ```{r} mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), fmean), qsec_wt = fmean(qsec, wt)) ``` gives exactly the same result as above, but the execution is much faster (especially on larger data), because with *Fast Statistical Functions*, the data does not need to be split by groups, and there is no need to call `lapply()` inside the `across()` statement: `fmean.data.frame()` is simply applied to a subset of the data containing columns `mpg`, `carb` and `hp`. The *Fast Statistical Functions* also have a method for grouped data, so if we did not want to calculate the weighted mean of `qsec`, the code would simplify as follows: ```{r} mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> select(mpg, carb, hp) |> fmean() ``` Note that all functions in *collapse*, including the *Fast Statistical Functions*, have the default `na.rm = TRUE`, i.e., missing values are skipped in calculations. This can be changed using `set_collapse(na.rm = FALSE)` to give behavior more consistent with base R. Another thing to be aware of when using *Fast Statistical Functions* inside data manipulation functions is that they toggle vectorized execution wherever they are used. E.g. ```{r} mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + min(qsec)) # Vectorized ``` calculates a grouped mean of `mpg` but adds the overall minimum of `qsec` to the result, i.e., it is equivalent to `fmean(mpg, g = cyl) + min(qsec)`. On the other hand ```{r} mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + fmin(qsec)) # Vectorized mtcars |> group_by(cyl) |> summarise(mpg = mean(mpg) + min(qsec)) # Not vectorized ``` both give the mean + the minimum within each group, but calculated in different ways: the former is equivalent to `fmean(mpg, g = cyl) + fmin(qsec, g = cyl)`, whereas the latter is equal to `sapply(gsplit(mpg, cyl), function(x) mean(x) + min(x))`. See `?fsummarise` and `?fmutate` for more detailed examples. This *eager vectorization* approach is intentional as it allows users to vectorize complex expressions and fall back to base R if this is not desired. [This blog post](https://andrewghazi.github.io/posts/collapse_is_sick/sick.html) by Andrew Ghazi provides an excellent example of computing a p-value test statistic by groups. *Note* that only expressions typed out can be vectorized; expressions inside functions such as `mean_plus_min <- function(x) fmean(x) + fmin(x)` are not vectorized.^[*collapse* can only read what you type, e.g. `exp <- substitute(fmean(mpg) + min(mpg))`, then `all_funs(exp)` gives `c("+", "fmean", "min")`, and `any(all_funs(exp) %in% .FAST_STAT_FUN)` returns `TRUE`, signifying to `fsummarise()` that the expression should be executed only once with the grouping object passed to the `g` argument of `fmean()`, instead of it being executed once for every group.] To take full advantage of *collapse*, it is thus highly recommended to use the *Fast Statistical Functions* as much as possible. ## Writing Efficient Code It is also performance-critical to correctly sequence operations and limit excess computations. *tidyverse* code is often inefficient simply because the *tidyverse* allows you to do everything. For example, `mtcars |> group_by(cyl) |> filter(mpg > 13) |> arrange(mpg)` is permissible but inefficient code as it filters and reorders grouped data, requiring modifications to both the data frame and the attached grouping object. *collapse* does not allow calls to `fsubset()` on grouped data, and messages about it in `roworder()`, encouraging you to write more efficient code. The above example can also be optimized because we are subsetting the whole frame and then doing computations on a subset of columns. It would be more efficient to select all required columns during the subset operation: ```{r} mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp, qsec, wt) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), fmean), qsec_wt = fmean(qsec, wt)) ``` Without the weighted mean of `qsec`, this would simplify to ```{r} mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |> group_by(cyl, vs, am) |> fmean() ``` Finally, we could set the following options to toggle unsorted grouping, no missing value skipping, and multithreading across the three columns for more efficient execution. ```{r} mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |> group_by(cyl, vs, am, sort = FALSE) |> fmean(nthreads = 3, na.rm = FALSE) ``` Setting these options globally using `set_collapse(sort = FALSE, nthreads = 3, na.rm = FALSE)` avoids the need to set them repeatedly. ### Using Internal Grouping Another key to writing efficient code with *collapse* is to avoid `fgroup_by()` where possible, especially for mutate operations. *collapse* does not implement `.by` arguments to manipulation functions like *dplyr*, but instead allows ad-hoc grouped transformations through its statistical functions. For example, the easiest and fastest way to computed the median of `mpg` by `cyl`, `vs`, and `am` is ```{r} mtcars |> mutate(mpg_median = fmedian(mpg, list(cyl, vs, am), TRA = "fill")) |> head(3) ``` For the common case of averaging and centering data, *collapse* also provides functions `fbetween()` for averaging and `fwithin()` for centering, i.e., `fbetween(mpg, list(cyl, vs, am))` is the same as `fmean(mpg, list(cyl, vs, am), TRA = "fill")`. There is also `fscale()` for (grouped) scaling and centering. This also applies to multiple columns, where we can use `fmutate(across(...))` or `ftransformv()`, i.e. ```{r} mtcars |> mutate(across(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill")) |> head(2) # Or mtcars |> transformv(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill") |> head(2) ``` Of course, if we want to apply different functions using the same grouping, `fgroup_by()` is sensible, but for mutate operations it also has the argument `return.groups = FALSE`, which avoids materializing the unique grouping columns, saving some memory. ```{r} mtcars |> group_by(cyl, vs, am, return.groups = FALSE) |> mutate(mpg_median = fmedian(mpg), mpg_mean = fmean(mpg), # Or fbetween(mpg) mpg_demean = fwithin(mpg), # Or fmean(mpg, TRA = "-") mpg_scale = fscale(mpg), .keep = "used") |> ungroup() |> head(3) ``` The `TRA` argument supports a whole array of operations, see `?TRA`. For example `fsum(mtcars, TRA = "/")` turns the column vectors into proportions. As an application of this, consider a generated dataset of sector-level exports. ```{r, include = FALSE} set.seed(101) ``` ```{r} # c = country, s = sector, y = year, v = value exports <- expand.grid(c = paste0("c", 1:8), s = paste0("s", 1:8), y = 1:15) |> mutate(v = round(abs(rnorm(length(c), mean = 5)), 2)) |> subset(-sample.int(length(v), 360)) # Making it unbalanced and irregular head(exports) nrow(exports) ``` It is very easy then to compute Balassa's (1965) Revealed Comparative Advantage (RCA) index, which is the share of a sector in country exports divided by the share of the sector in world exports. An index above 1 indicates that a RCA of country c in sector s. ```{r} # Computing Balassa's (1965) RCA index: fast and memory efficient # settfm() modifies exports and assigns it back to the global environment settfm(exports, RCA = fsum(v, list(c, y), TRA = "/") %/=% fsum(fsum(v, y, TRA = "/"), list(s, y), TRA = "fill", set = TRUE)) ``` Note that this involved a single expression with two different grouped operations, which is only possible by incorporating grouping into statistical functions themselves. Let's summarise this dataset using `pivot()` to aggregate the RCA index across years. Here `"mean"` calls a highly efficient internal mean function. ```{r} pivot(exports, ids = "c", values = "RCA", names = "s", how = "wider", FUN = "mean", sort = TRUE) ``` We may also wish to investigate the growth rate of RCA. This can be done using `fgrowth()`. Since the panel is irregular, i.e., not every sector is observed in every year, it is critical to also supply the time variable. ```{r} exports |> mutate(RCA_growth = fgrowth(RCA, g = list(c, s), t = y)) |> pivot(ids = "c", values = "RCA_growth", names = "s", how = "wider", FUN = fmedian, sort = TRUE) ``` Lastly, since the panel is unbalanced, we may wish to create an RCA index for only the last year, but balance the dataset a bit more by taking the last available trade within the last three years. This can be done using a single subset call ```{r} # Taking the latest observation within the last 3 years exports_latest <- subset(exports, y > 12 & y == fmax(y, list(c, s), "fill"), -y) # How many sectors do we observe for each country in the last 3 years? with(exports_latest, fndistinct(s, c)) ``` We can then compute the RCA index on this data ```{r} exports_latest |> mutate(RCA = fsum(v, c, TRA = "/") %/=% fsum(proportions(v), s, TRA = "fill")) |> pivot("c", "RCA", "s", how = "wider", sort = TRUE) ``` To summarise, *collapse* provides many options for ad-hoc or limited grouping, which are faster than a full `fgroup_by()`, and also syntactically efficient. Further efficiency gains are possible using operations by reference, e.g., `%/=%` instead of `/` to avoid an intermediate copy. It is also possible to transform by reference using fast statistical functions by passing the `set = TRUE` argument, e.g., `with(mtcars, fmean(mpg, cyl, TRA = "fill", set = TRUE))` replaces `mpg` by its group-averaged version (the transformed vector is returned invisibly). ## Conclusion *collapse* enhances R both statistically and computationally and is a good option for *tidyverse* users searching for more efficient and lightweight solutions to data manipulation and statistical computing problems in R. For more information, I recommend starting with the short vignette on [*Documentation Resources*](https://fastverse.org/collapse/articles/collapse_documentation.html). R users willing to write efficient/lightweight code and completely replace the *tidyverse* in their workflow are also encouraged to closely examine the [*fastverse*](https://fastverse.org/fastverse/) suite of packages. *collapse* alone may not always suffice, but 99% of *tidyverse* code can be replaced with an efficient and lightweight *fastverse* solution. ```{r, echo=FALSE} options(oldopts) ``` collapse/vignettes/collapse_and_sf.Rmd0000644000176200001440000010210315121640575017647 0ustar liggesusers--- title: "collapse and sf" subtitle: "Fast Manipulation of Simple Features Data Frames" author: "Sebastian Krantz and Grant McDermott" date: "2024-04-19" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and sf} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This short vignette focuses on using *collapse* with the popular *sf* package by Edzer Pebesma. It shows that *collapse* supports easy manipulation of *sf* data frames, at computation speeds far above *dplyr*. *collapse* v1.6.0 added internal support for *sf* data frames by having most essential functions (e.g., `fselect/gv`, `fsubset/ss`, `fgroup_by`, `findex_by`, `qsu`, `descr`, `varying`, `funique`, `roworder`, `rsplit`, `fcompute`, ...) internally handle the geometry column. To demonstrate this, we can load a test dataset provided by *sf*: ```r library(collapse) library(sf) nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) options(sf_max_print = 3) nc # Simple feature collection with 100 features and 14 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry # 1 19 MULTIPOLYGON (((-81.47276 3... # 2 12 MULTIPOLYGON (((-81.23989 3... # 3 260 MULTIPOLYGON (((-80.45634 3... ``` ## Summarising sf Data Frames Computing summary statistics on *sf* data frames automatically excludes the 'geometry' column: ```r # Which columns have at least 2 non-missing distinct values varying(nc) # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 # TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE # NWBIR74 BIR79 SID79 NWBIR79 # TRUE TRUE TRUE TRUE # Quick summary stats qsu(nc) # N Mean SD Min Max # AREA 100 0.1263 0.0492 0.042 0.241 # PERIMETER 100 1.673 0.4823 0.999 3.64 # CNTY_ 100 1985.96 106.5166 1825 2241 # CNTY_ID 100 1985.96 106.5166 1825 2241 # NAME 100 - - - - # FIPS 100 - - - - # FIPSNO 100 37100 58.023 37001 37199 # CRESS_ID 100 50.5 29.0115 1 100 # BIR74 100 3299.62 3848.1651 248 21588 # SID74 100 6.67 7.7812 0 44 # NWBIR74 100 1050.81 1432.9117 1 8027 # BIR79 100 4223.92 5179.4582 319 30757 # SID79 100 8.36 9.4319 0 57 # NWBIR79 100 1352.81 1975.9988 3 11631 # Detailed statistics description of each column descr(nc) # Dataset: nc, 14 Variables, N = 100 # ---------------------------------------------------------------------------------------------------- # AREA (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 77 0.13 0.05 0.04 0.24 0.48 2.5 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 0.04 0.06 0.06 0.09 0.12 0.15 0.2 0.21 0.24 # ---------------------------------------------------------------------------------------------------- # PERIMETER (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 96 1.67 0.48 1 3.64 1.48 5.95 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1 1.09 1.19 1.32 1.61 1.86 2.2 2.72 3.2 # ---------------------------------------------------------------------------------------------------- # CNTY_ (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 1985.96 106.52 1825 2241 0.26 2.32 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1826.98 1832.95 1837.9 1902.25 1982 2067.25 2110 2156.3 2238.03 # ---------------------------------------------------------------------------------------------------- # CNTY_ID (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 1985.96 106.52 1825 2241 0.26 2.32 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1826.98 1832.95 1837.9 1902.25 1982 2067.25 2110 2156.3 2238.03 # ---------------------------------------------------------------------------------------------------- # NAME (character): # Statistics # N Ndist # 100 100 # Table # Freq Perc # Ashe 1 1 # Alleghany 1 1 # Surry 1 1 # Currituck 1 1 # Northampton 1 1 # Hertford 1 1 # Camden 1 1 # Gates 1 1 # Warren 1 1 # Stokes 1 1 # Caswell 1 1 # Rockingham 1 1 # Granville 1 1 # Person 1 1 # ... 86 Others 86 86 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1 1 1 1 1 1 # ---------------------------------------------------------------------------------------------------- # FIPS (character): # Statistics # N Ndist # 100 100 # Table # Freq Perc # 37009 1 1 # 37005 1 1 # 37171 1 1 # 37053 1 1 # 37131 1 1 # 37091 1 1 # 37029 1 1 # 37073 1 1 # 37185 1 1 # 37169 1 1 # 37033 1 1 # 37157 1 1 # 37077 1 1 # 37145 1 1 # ... 86 Others 86 86 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1 1 1 1 1 1 # ---------------------------------------------------------------------------------------------------- # FIPSNO (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 37100 58.02 37001 37199 -0 1.8 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 37002.98 37010.9 37020.8 37050.5 37100 37149.5 37179.2 37189.1 37197.02 # ---------------------------------------------------------------------------------------------------- # CRESS_ID (integer): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 50.5 29.01 1 100 0 1.8 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1.99 5.95 10.9 25.75 50.5 75.25 90.1 95.05 99.01 # ---------------------------------------------------------------------------------------------------- # BIR74 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 3299.62 3848.17 248 21588 2.79 11.79 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 283.64 419.75 531.8 1077 2180.5 3936 6725.7 11193 20378.22 # ---------------------------------------------------------------------------------------------------- # SID74 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 23 6.67 7.78 0 44 2.44 10.28 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 0 0 0 2 4 8.25 15.1 18.25 38.06 # ---------------------------------------------------------------------------------------------------- # NWBIR74 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 93 1050.81 1432.91 1 8027 2.83 11.84 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1 9.95 39.2 190 697.5 1168.5 2231.8 3942.9 7052.84 # ---------------------------------------------------------------------------------------------------- # BIR79 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 4223.92 5179.46 319 30757 2.99 13.1 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 349.69 539.3 675.7 1336.25 2636 4889 8313 14707.45 26413.87 # ---------------------------------------------------------------------------------------------------- # SID79 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 28 8.36 9.43 0 57 2.28 9.88 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 0 0 1 2 5 10.25 21 26 38.19 # ---------------------------------------------------------------------------------------------------- # NWBIR79 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 98 1352.81 1976 3 11631 3.18 14.45 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 3.99 11.9 44.7 250.5 874.5 1406.75 2987.9 5090.5 10624.17 # ---------------------------------------------------------------------------------------------------- ``` ## Selecting Columns and Subsetting We can select columns from the *sf* data frame without having to worry about taking along 'geometry': ```r # Selecting a sequence of columns fselect(nc, AREA, NAME:FIPSNO) # Simple feature collection with 100 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.114 Ashe 37009 37009 MULTIPOLYGON (((-81.47276 3... # 2 0.061 Alleghany 37005 37005 MULTIPOLYGON (((-81.23989 3... # 3 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... # Same using standard evaluation (gv is a shorthand for get_vars()) gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO")) # Simple feature collection with 100 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.114 Ashe 37009 37009 MULTIPOLYGON (((-81.47276 3... # 2 0.061 Alleghany 37005 37005 MULTIPOLYGON (((-81.23989 3... # 3 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... ``` The same applies to subsetting rows (and columns): ```r # A fast and enhanced version of base::subset fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO) # Simple feature collection with 44 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... # 2 0.153 Northampton 37131 37131 MULTIPOLYGON (((-77.21767 3... # 3 0.153 Rockingham 37157 37157 MULTIPOLYGON (((-79.53051 3... # A fast version of `[` (where i is used and optionally j) ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO")) # Simple feature collection with 10 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.114 Ashe 37009 37009 MULTIPOLYGON (((-81.47276 3... # 2 0.061 Alleghany 37005 37005 MULTIPOLYGON (((-81.23989 3... # 3 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... ``` This is significantly faster than using `[`, `base::subset()`, `dplyr::select()` or `dplyr::filter()`: ```r library(microbenchmark) library(dplyr) # Selecting columns microbenchmark(collapse = fselect(nc, AREA, NAME:FIPSNO), dplyr = select(nc, AREA, NAME:FIPSNO), collapse2 = gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO")), sf = nc[c("AREA", "NAME", "FIPS", "FIPSNO")]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 3.034 3.9565 5.19429 5.1865 5.6990 22.878 100 # dplyr 431.279 452.2915 505.29015 466.3750 493.8450 3356.342 100 # collapse2 2.665 3.4850 4.59610 4.4075 5.0635 14.391 100 # sf 105.165 114.1235 120.39732 118.0390 124.9270 156.497 100 # Subsetting microbenchmark(collapse = fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO), dplyr = select(nc, AREA, NAME:FIPSNO) |> filter(AREA > fmean(AREA)), collapse2 = ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO")), sf = nc[1:10, c("AREA", "NAME", "FIPS", "FIPSNO")]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 9.676 11.5825 15.01707 14.4730 16.8920 30.463 100 # dplyr 890.643 917.6415 1055.40970 941.7085 1009.7890 5546.685 100 # collapse2 2.829 3.5465 5.40585 4.8995 6.4165 20.541 100 # sf 176.997 187.6160 202.72286 200.7565 210.8220 340.464 100 ``` However, *collapse* functions don't subset the 'agr' attribute on selecting columns, which (if specified) relates columns (attributes) to the geometry, and also don't modify the 'bbox' attribute giving the overall boundaries of a set of geometries when subsetting the *sf* data frame. Keeping the full 'agr' attribute is not problematic for all practical purposes, but not changing 'bbox' upon subsetting may lead to too large margins when plotting the geometries of a subset *sf* data frame. One way to to change this is calling `st_make_valid()` on the subset frame; but `st_make_valid()` is very expensive, thus unless the subset frame is very small, it is better to use `[`, `base::subset()` or `dplyr::filter()` in cases where the bounding box size matters. ## Aggregation and Grouping The flexibility and speed of `collap()` for aggregation can be used on *sf* data frames. A separate method for *sf* objects was not considered necessary as one can simply aggregate the geometry column using `st_union()`: ```r # Aggregating by variable SID74 using the median for numeric and the mode for categorical columns collap(nc, ~ SID74, custom = list(fmedian = is.numeric, fmode = is.character, st_union = "geometry")) # or use is.list to fetch the geometry # Simple feature collection with 23 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 SID74 NWBIR74 BIR79 # 1 0.0780 1.3070 1950.0 1950.0 Alleghany 37005 37073 37.0 487 0 0 40.0 594.0 # 2 0.0810 1.2880 1887.0 1887.0 Ashe 37009 37137 69.0 751 1 1 148.0 899.0 # 3 0.1225 1.6435 1959.5 1959.5 Caswell 37033 37078 39.5 1271 2 2 382.5 1676.5 # SID79 NWBIR79 geometry # 1 1 45 MULTIPOLYGON (((-83.69563 3... # 2 1 176 MULTIPOLYGON (((-80.02406 3... # 3 2 452 MULTIPOLYGON (((-77.16129 3... ``` *sf* data frames can also be grouped and then aggregated using `fsummarise()`: ```r nc |> fgroup_by(SID74) # Simple feature collection with 100 features and 14 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry # 1 19 MULTIPOLYGON (((-81.47276 3... # 2 12 MULTIPOLYGON (((-81.23989 3... # 3 260 MULTIPOLYGON (((-80.45634 3... # # Grouped by: SID74 [23 | 4 (4) 1-13] nc |> fgroup_by(SID74) |> fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = st_union(geometry)) # Simple feature collection with 23 features and 3 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # SID74 AREA_Ag Perimeter_Ag geometry # 1 0 1.103 1.3070 MULTIPOLYGON (((-83.69563 3... # 2 1 0.914 1.2880 MULTIPOLYGON (((-80.02406 3... # 3 2 1.047 1.6435 MULTIPOLYGON (((-77.16129 3... ``` Typically most of the time in aggregation is consumed by `st_union()` so that the speed of *collapse* does not really become visible on most datasets. A faster alternative is to use *geos* (*sf* backend for planar geometries) or *s2* (*sf* backend for spherical geometries) directly: ```r # Using s2 backend: sensible for larger tasks nc |> fmutate(geometry = s2::as_s2_geography(geometry)) |> fgroup_by(SID74) |> fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = s2::s2_union_agg(geometry)) |> fmutate(geometry = st_as_sfc(geometry)) # Simple feature collection with 23 features and 3 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: WGS 84 # First 3 features: # SID74 AREA_Ag Perimeter_Ag geometry # 1 0 1.103 1.3070 MULTIPOLYGON (((-83.69563 3... # 2 1 0.914 1.2880 MULTIPOLYGON (((-80.02406 3... # 3 2 1.047 1.6435 MULTIPOLYGON (((-77.16129 3... ``` In general, also upon aggregation with *collapse*, functions `st_as_sfc()`, `st_as_sf()`, or, in the worst case, `st_make_valid()`, may need to be invoked to ensure valid *sf* object output. Functions `collap()` and `fsummarise()` are attribute preserving but do not give special regard to geometry columns. One exception that both avoids the high cost of spatial functions in aggregation and any need for ex-post conversion/validation is aggregating spatial panel data over the time-dimension. Such panels can quickly be aggregated using `ffirst()` or `flast()` to aggregate the geometry: ```r # Creating a panel-dataset by simply duplicating nc for 2 different years pnc <- rowbind(`2000` = nc, `2001` = nc, idcol = "Year") |> as_integer_factor() pnc # Simple feature collection with 200 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # Year AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 # 1 2000 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 # 2 2000 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 # 3 2000 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 # SID79 NWBIR79 geometry # 1 0 19 MULTIPOLYGON (((-81.47276 3... # 2 3 12 MULTIPOLYGON (((-81.23989 3... # 3 6 260 MULTIPOLYGON (((-80.45634 3... # Aggregating by NAME, using the last value for all categorical data collap(pnc, ~ NAME, fmedian, catFUN = flast, cols = -1L) # Simple feature collection with 100 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 # 1 0.111 1.392 1904 1904 Alamance Alamance 37001 37001 1 4672 13 1243 5767 # 2 0.066 1.070 1950 1950 Alexander Alexander 37003 37003 2 1333 0 128 1683 # 3 0.061 1.231 1827 1827 Alleghany Alleghany 37005 37005 3 487 0 10 542 # SID79 NWBIR79 geometry # 1 11 1397 MULTIPOLYGON (((-79.24619 3... # 2 2 150 MULTIPOLYGON (((-81.10889 3... # 3 3 12 MULTIPOLYGON (((-81.23989 3... # Using fsummarise to aggregate just two variables and the geometry pnc_ag <- pnc |> fgroup_by(NAME) |> fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = flast(geometry)) # The geometry is still valid... (slt = shorthand for fselect) plot(slt(pnc_ag, AREA_Ag)) ```
plot of chunk AREA_Ag
## Indexing *sf* data frames can also become [*indexed frames*](https://fastverse.org/collapse/reference/indexing.html) (spatio-temporal panels): ```r pnc <- pnc |> findex_by(CNTY_ID, Year) pnc # Simple feature collection with 200 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # Year AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 # 1 2000 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 # 2 2000 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 # 3 2000 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 # SID79 NWBIR79 geometry # 1 0 19 MULTIPOLYGON (((-81.47276 3... # 2 3 12 MULTIPOLYGON (((-81.23989 3... # 3 6 260 MULTIPOLYGON (((-80.45634 3... # # Indexed by: CNTY_ID [100] | Year [2] qsu(pnc$AREA) # N/T Mean SD Min Max # Overall 200 0.1263 0.0491 0.042 0.241 # Between 100 0.1263 0.0492 0.042 0.241 # Within 2 0.1263 0 0.1263 0.1263 settransform(pnc, AREA_diff = fdiff(AREA)) psmat(pnc$AREA_diff) |> head() # 2000 2001 # 1825 NA 0 # 1827 NA 0 # 1828 NA 0 # 1831 NA 0 # 1832 NA 0 # 1833 NA 0 pnc <- unindex(pnc) ``` ## Unique Values, Ordering, Splitting, Binding Functions `funique()` and `roworder[v]()` ignore the 'geometry' column in determining the unique values / order of rows when applied to *sf* data frames. `rsplit()` can be used to (recursively) split an *sf* data frame into multiple chunks. ```r # Splitting by SID74 rsplit(nc, ~ SID74) |> head(2) # $`0` # Simple feature collection with 13 features and 13 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 NWBIR74 BIR79 SID79 NWBIR79 # 1 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 10 542 3 12 # 2 0.062 1.547 1834 1834 Camden 37029 37029 15 286 115 350 2 139 # 3 0.091 1.284 1835 1835 Gates 37073 37073 37 420 254 594 2 371 # geometry # 1 MULTIPOLYGON (((-81.23989 3... # 2 MULTIPOLYGON (((-76.00897 3... # 3 MULTIPOLYGON (((-76.56251 3... # # $`1` # Simple feature collection with 11 features and 13 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 NWBIR74 BIR79 SID79 NWBIR79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 10 1364 0 19 # 2 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 123 830 2 145 # 3 0.124 1.428 1837 1837 Stokes 37169 37169 85 1612 160 2038 5 176 # geometry # 1 MULTIPOLYGON (((-81.47276 3... # 2 MULTIPOLYGON (((-76.00897 3... # 3 MULTIPOLYGON (((-80.02567 3... ``` The default in `rsplit()` for data frames is `simplify = TRUE`, which, for a single LHS variable, would just split the column-vector. This does not apply to *sf* data frames as the 'geometry' column is always selected as well. ```r # Only splitting Area rsplit(nc, AREA ~ SID74) |> head(1) # $`0` # Simple feature collection with 13 features and 1 field # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA geometry # 1 0.061 MULTIPOLYGON (((-81.23989 3... # 2 0.062 MULTIPOLYGON (((-76.00897 3... # 3 0.091 MULTIPOLYGON (((-76.56251 3... # For data frames the default simplify = TRUE drops the data frame structure rsplit(qDF(nc), AREA ~ SID74) |> head(1) # $`0` # [1] 0.061 0.062 0.091 0.064 0.059 0.080 0.066 0.099 0.094 0.078 0.131 0.167 0.051 ``` *sf* data frames can be combined using `rowbind()`, which, by default, preserves the attributes of the first object. ```r # Splitting by each row and recombining nc_combined <- nc %>% rsplit(seq_row(.)) %>% rowbind() identical(nc, nc_combined) # [1] TRUE ``` ## Transformations For transforming and computing columns, `fmutate()` and `ftransform[v]()` apply as to any other data frame. ```r fmutate(nc, gsum_AREA = fsum(AREA, SID74, TRA = "fill")) |> head() # Simple feature collection with 6 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry gsum_AREA # 1 19 MULTIPOLYGON (((-81.47276 3... 0.914 # 2 12 MULTIPOLYGON (((-81.23989 3... 1.103 # 3 260 MULTIPOLYGON (((-80.45634 3... 1.380 # Same thing, more expensive nc |> fgroup_by(SID74) |> fmutate(gsum_AREA = fsum(AREA)) |> fungroup() |> head() # Simple feature collection with 6 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry gsum_AREA # 1 19 MULTIPOLYGON (((-81.47276 3... 0.914 # 2 12 MULTIPOLYGON (((-81.23989 3... 1.103 # 3 260 MULTIPOLYGON (((-80.45634 3... 1.380 ``` Special attention to *sf* data frames is afforded by `fcompute()`, which can be used to compute new columns dropping existing ones - except for the geometry column and any columns selected through the `keep` argument. ```r fcompute(nc, scaled_AREA = fscale(AREA), gsum_AREA = fsum(AREA, SID74, TRA = "fill"), keep = .c(AREA, SID74)) # Simple feature collection with 100 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA SID74 scaled_AREA gsum_AREA geometry # 1 0.114 1 -0.2491860 0.914 MULTIPOLYGON (((-81.47276 3... # 2 0.061 0 -1.3264176 1.103 MULTIPOLYGON (((-81.23989 3... # 3 0.143 5 0.3402426 1.380 MULTIPOLYGON (((-80.45634 3... ``` ## Conversion to and from *sf* The quick converters `qDF()`, `qDT()`, and `qTBL()` can be used to efficiently convert *sf* data frames to standard data frames, *data.table*'s or *tibbles*, and the result can be converted back to the original *sf* data frame using `setAttrib()`, `copyAttrib()` or `copyMostAttrib()`. ```r library(data.table) # Create a data.table on the fly to do an fast grouped rolling mean and back to sf qDT(nc)[, list(roll_AREA = frollmean(AREA, 2), geometry), by = SID74] |> copyMostAttrib(nc) # Simple feature collection with 100 features and 2 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # SID74 roll_AREA geometry # 1 1 NA MULTIPOLYGON (((-81.47276 3... # 2 1 0.092 MULTIPOLYGON (((-76.00897 3... # 3 1 0.097 MULTIPOLYGON (((-80.02567 3... ``` The easiest way to strip a geometry column off an *sf* data frame is via the function `atomic_elem()`, which removes list-like columns and, by default, also the class attribute. For example, we can create a *data.table* without list column using ```r qDT(atomic_elem(nc)) |> head() # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # # 1: 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2: 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3: 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # 4: 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1 123 830 2 # 5: 0.153 2.206 1832 1832 Northampton 37131 37131 66 1421 9 1066 1606 3 # 6: 0.097 1.670 1833 1833 Hertford 37091 37091 46 1452 7 954 1838 5 # NWBIR79 # # 1: 19 # 2: 12 # 3: 260 # 4: 145 # 5: 1197 # 6: 1237 ``` This is also handy for other functions such as `join()` and `pivot()`, which are class agnostic like all of *collapse*, but do not have any built-in logic to deal with the *sf* column. ```r # Use atomic_elem() to strip geometry off y in left join identical(nc, join(nc, atomic_elem(nc), overid = 2)) # left join: nc[AREA, PERIMETER, CNTY_, CNTY_ID, NAME, FIPS, FIPSNO, CRESS_ID, BIR74, SID74, NWBIR74, BIR79, SID79, NWBIR79] 100/100 (100%) y[AREA, PERIMETER, CNTY_, CNTY_ID, NAME, FIPS, FIPSNO, CRESS_ID, BIR74, SID74, NWBIR74, BIR79, SID79, NWBIR79] 100/100 (100%) # [1] TRUE # In pivot: presently need to specify what to do with geometry column pivot(nc, c("CNTY_ID", "geometry")) |> head() # Simple feature collection with 6 features and 3 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # CNTY_ID geometry variable value # 1 1825 MULTIPOLYGON (((-81.47276 3... AREA 0.114 # 2 1827 MULTIPOLYGON (((-81.23989 3... AREA 0.061 # 3 1828 MULTIPOLYGON (((-80.45634 3... AREA 0.143 # Or use pivot(qDT(atomic_elem(nc)), "CNTY_ID") |> head() # CNTY_ID variable value # # 1: 1825 AREA 0.114 # 2: 1827 AREA 0.061 # 3: 1828 AREA 0.143 # 4: 1831 AREA 0.07 # 5: 1832 AREA 0.153 # 6: 1833 AREA 0.097 ``` ## Support for *units* Since v2.0.13, *collapse* explicitly supports/preserves *units* objects through dedicated methods that preserve the 'units' class wherever sensible. ```r nc_dist <- st_centroid(nc) |> st_distance() nc_dist[1:3, 1:3] # Units: [m] # [,1] [,2] [,3] # [1,] 0.00 34020.35 72728.02 # [2,] 34020.35 0.00 40259.55 # [3,] 72728.02 40259.55 0.00 fmean(nc_dist) |> head() # Units: [m] # [1] 250543.9 237040.0 217941.5 337016.5 250380.2 269604.6 fndistinct(nc_dist) |> head() # [1] 100 100 100 100 100 100 ``` ## Conclusion *collapse* provides no deep integration with the *sf* ecosystem and cannot perform spatial operations, but offers sufficient features and flexibility to painlessly manipulate *sf* data frames at much greater speeds than *dplyr*. This requires a bit of care by the user though to ensure that the returned *sf* objects are valid, especially following aggregation and subsetting. collapse/data/0000755000176200001440000000000015202427630012766 5ustar liggesuserscollapse/data/GGDC10S.rda0000644000176200001440000125730314777170130014467 0ustar liggesusersBZh91AY&SY)i! vtÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿã‹/€à5÷Êù®ù Ξ·ÖÕ| ò­iíÜ÷±}ÛÞ=±Ö}ÞÏ÷_^ï_{ƒ¾Ö{ïƒbvß.÷¦×{Íôùo¾÷·¹½î6nmzÚúá»Sží¾ç½í’®Þùµöe­Zõ÷uÝßnûÛÛï=mïW¾{»|ï¹Óotö뷺ɚö绽Ton×¼ÑÛÜ>Üöoiög¶öß}Ý{¾»‹³Ý¶îïz÷{Û½}7¾úvï½¾÷Û»zîÍåÞÃÛ‡¼îJ{{ïWtÛݹßh^{×Üï‹3e}Ü÷:ûj•ìúìí·µÌ÷¶]ÛÏ}}æo»îwn÷[.óÙßw;îv²÷}ïÞîìk»¸Þ½ëÙºîŽ}½åõ|ïw»®—¼ðï]íÕ÷Û黹ëow|ëž½SyS³[Ý»»îúûÎé×zNûß]öúøwSë}Û¾×­ÜöíÜ;»Ëjæ=ïo§½o¾îÖ½7­êÞí£uÝήîöÞU/mݽ½íe§/.ñõõöÏ®›læŸeï{Þëºî««ç{½²íö®éo[ua¯=˼¾ŒGÓîïœv×½Ï{ÜövÇ>Í]÷wßo§½“gÝ»·»Ý׺óÎ÷ªñN›w}÷¨ûÖÛíÞãw¶×Ýîúßg[MíÝÛÞãÏ{^{=³ëÖ¥Ûí}îÚ—t½œûsî½åvî¹÷½ï¾wÞ›wÊ:Ï]{¹ÛÝêö^îëÓß{v^ßIöõõîݵõ{rìÞ^Ûï±y}¶}³Ýîá­Ü¸]ܬíÂß;è}_nop;°î÷Þ{Ö}kïívw» âd¼û½ÛzØÛîfšæ­÷œ¢ŸZNùÜžµÕYëÏ==™âŽ·g½Ç·»·n¶îëw9Ó·;tY;·kBÛm{Þ÷³u»Þï)kA^ö®)سmÚOv¼˽³Íïfôz‡¼Ðk\ö¶ÞÇ»Öî*¯ZTƒ´Â÷w@mswYFÛÛ¹ÛzñÛ»œ¬ÚzÖδ»=y»»{{Þ÷®íÞö<ï^övÜ-íÜ.ÞíëT»o€÷o­ö.òzóÓÞò«Ýåok²vËÑ^Õ·zïvk½×kÓV{Öïy7kµ]ÛÞµ©Ë×½\ÛnzèîÚÞeÚ{ÛšVî·ní[©ÝÞÆ½¯دuî=íÍwMÜÞïwžÛU¯5ì÷ººõîí—o76.îïhíëÛ»íô;Ò÷^÷H·»½ï7Ÿx½ö÷·Ý¾\]MÖ½h÷Ù­wÎÞíêZ¥“{ÛN¯½ôöõVßnç¾^övûHõÞ½ÝwsÞÙ÷²åÛÈò+¸Í½·o\õxº¯]·}÷»×Ëw¼ö»Û{½÷»ïWÞêwÖ;ï±÷£‚»½ßy×¾ûïw·Ù÷Þﯾ÷ÞÛ·/gÞ/žûËËÞõ÷½š÷×}öûu®ûæz¯·#«w×¾v>½½ö÷Ó}×n÷Ýï»ï_7ß}m{¾7iÛ;Ë»½ßg»ï# ¹{—¯wfûï½|Kjw·Îž¶÷Ëï}òçÞôݾû«ï›æï½{|'¶ÊîïzûçßmÍwíÞ.ùöõÏ·Ýìw\>í÷¾ö÷}o{}÷½·ß¦÷Þzvï}Þ·wŸZÞë޹ϾíôúW·ÝÝcß[ÕÙW›Í}ïŸ^ÚØöúsÒípú=íÏ{ºõçg¯¾÷[]îÚï<èõï;ß}ÙÝï£o³vµ,˜åïwu½¾÷kŸ[¾Õ^/pçozÝ»uööú»¾ó×ßníÞ·'ÒÞö÷Cì{ï{}ßvxûÝòáî}{޹=¯]¾7¾O¼ûÛÏ>ïœá×Ù{z{}^§·l½»Ýw}_}Ío—Ývú¼û™¶íÕuë«×kïµ½ï¾ö½§wkë¼ß×gÕn¾÷Onó¾öNúngÛ¯{­·ÞÞ÷}÷ÇO§½ó·¾º½}^íy_A§½¾ï®_{{îûÔï>÷o<òßo|úrû¾û·ÞMÓ¯¤ûmß=_nûé¾ÉÛÜovçÜ·B÷Üw¾Ýë{CÖ5õÝ{[ï;w—´Z{{îòï¼÷¾kË×¶¾šéõÕ¯»Ý÷=ç¶Ï½ÎݹnžóÇtÞû¹].Úûïž¾÷wÍn¾ûŸu½ïºã¼ò檯÷{­u»wYöÝõ»èÝëÎõ»ÓîÓæ—wÍíë«ÛoWz¶Üûu7ÝöÞ»KêôÞõð: âõÞ÷®Ý}x»ç¯Ÿz¹ïn|Þ÷ÁòÞ÷Þ=×m=w¼b•Ý{ÞS¬÷°â¾õõ¯^/7lÈW}÷ÞûníÕÞûßw}Ùn´ëyï»Ýƒ½{µÊU}gßs÷®×Þ÷×ß/xiÎëÃÜms»»ÖõïwWy¾nï½^î¯[uÞù·ÞûsÝ»ß>öõ»ã·wo{Íåöï{¾ß\Ûg½Ú¾ó®îíÎÕÓݵz<óto±ô{­Ö—×»Ð}§Ÿ[½÷Ýå4o»Ý§»»Ñæt÷½»ž¾yÕÏTï¹m½inÏ×ß{¾æë=ë׳¼õ»G¼Qï·½õ­¯½=åßO}÷½÷s_{MŽ+ïwßno»³·}Ü»@÷|+ï›ÕõÇ>Þ÷{Þô›5îí´Uí{‹Þygš6ï^÷^Ý®ª<ÛÀw¬½¾÷w}ﯾ¤“Ø}=×]ZSqÝ–šï{¨í×^¾]öê«­Ç»}š{ëõß_cîï5§-ž¦ö÷¾»¦}=õ÷Þ÷Æ÷#w½îúßjÞÏ´íÝïy¨çvöåvöôïo}ó﮽÷··¾[â—Þë¾íÝÞ«œïuï^t´û§n÷ׯ½ÞŽÝó{Ÿ{ÎöJÍÖwÎõÛß[ï¾=ìµ[½³}Ï7ß.[ÇÁµ ÓM×\½»Ù¯G½Üv½Þ½aÞûç¾Þž÷ÞÞŸI½Ôï³{ï·Þï{^›Ûï|ß\ï½gµïuï:ûݾ÷¾×Þµõ÷m»íÞ^º½÷yëz³í×¹|ûÞûíukÛ»×ÝܽîÞû½Ûï²ì·Ý}¾‡¾öòïŸOW½ëÛç·S}®ZÚ3²zã¶Íç_nðÖÒÛ{x4ûß\îöõï]{·”×Ñíîûoqí§ÞÛîíë|¼ùæk£ï¥æw•¹ñß]öú¾ºÝ÷w}¾û}Ó^o½ôõ•;ÞõöÏ}î¬ö»îûßN]¾òYë^囫JÛ/]ï›ëëîÝô»œ×Ö«ß{¾û\ã··Ó|¶wW[ÝÉÝÙÕ¼÷»Þã>Úúú”}}wÚÛ}[Mìwšlùõo]÷·ÞÓ¾öî¬ßZõékµÞ÷¹¼Ûï»nù£ë+[ºõ±¾y»ßw»Ôu†¾÷µôûÌz¾÷=í»W½¯a÷_s³=Û¬ÇYﯾ>¯¶rìzï1w®gjù=zúk[]ÙY{zm=>ûï·ÖëêÌG-êë1®õ{ÕÆ}yÝ÷;ÝñîÞëwW¾û§Œî²÷w¾÷žSÝršÕí×oÚ}ݼ­Ã«¯Gݺiå¶ÙîÞ¥ó½Ûo¸î¾öÝé÷ß}âîï¯NíO·¾·».³U*êm÷3Ͷ÷½›³Û½î÷wo[ÞÁïyç[½Ýï9÷×ß^û^û§Ûï¼õu»Ù½¹Ù¡åÝ+iìÓÞ¹óï>{Û»Úº·ßÆSÄÈÄi“dÄÀ0FL&bi“IädÓ#&š`LLÀPÊx@ ŒLL&†a0Œ2` €‰„Äɉ€1™1'£F&ÓF ¢Od`˜`™ ÁTö zhѦ&šb2ŒM0&LhÄÈÓ1PªžÀL ¦!¡¦&0`§¦˜di‰‰“4É `&&†ª¦Ó ÄÓM4ÄÅOɪ†SÁ 4Ó ˆÉ‘‘£L©í24i¡¦†™ 4ÓM &†L zdщ“FLLŒŒ& 4Ó&ŒÓ#A¡“CLŒ@biA¢„ИLM4Á†€2h €4  @@ h 4‚y¼<² ùS˜‹ ø&Î>G/žä‹x"ûŒ6b‡®ÒS±  ×qÝÃú-»”=üÿfӆȈ@€g³7 »CËÜÒ£$¢Y'ÃI÷8OÒA0dq&Ü‚A™“`0A& ˆ’`Κm¦J1L0)d„  ‹$$Ðh24 ˜0`ÌÐf š Á˜4 !`Í0aFfk˜A„šh0 !#0ƒA¡B !c 0”šJAƒ @34 4 …! ÌBA 4ƒAšƒ0` ЄC’VS#XФ2B¬ºRºu,j5ŒØ]”¡*42RÉAª,%J%.”®j@@Y B”²‚„¬B°\–RË .…¬ºMax5,–R¹¬•M!f ]u,•(Ò”®…®…HoB©L.‚RÊ$¬…!…]dšË¬ …”²É\Òk.ff …—R@J”k˜B— HYt„,f”!eŒ)&¥,¥ŒR°]$£%,¹¨²”ºP•Ð¥$,•’£BË©u–Bë$!*J’”¥Â#–Bì,”’ `Ðfk¤Ö5W]ud.atøY4$¬¦„! )z 3Ha)&haHKEڮà D¬a“ BÉCëc6fØàÂŒÔÃ@f•™·5fRíÈQ²]¹±,!,Ð¥0Ñ‘›jíªY¹-¬¹“Ch•†Ð³VŒÙ¡,ÛÑnlaµv6©–a£$¶0Õ ŠAÝTÑcjÕ†³]½¹±½-‰SD¶.ѱ ŒÍM¬Ú†LÜ0À6¡!„87775 XÛ›T»5d555dÚÂÍWd¦C%ÒØÚØ¦mj³ Ñv­Í¡£ ¬„c †A±¹,šµnmdÑ“0„®³PÃE›j“jÚ62K c—fÃVÖÕ4ln ¨Øa±±“1‹’.Pˆ¤Ç$`‹ "_œ!0 &`0B ¨¤Ãã LÀ ,HtB€0I nCQrÅ… ˆ²ýò#€Ha‰‘.2DÂ0@ †XR¦‡,h‰Pƒ)AÁ8tÙ`A$DC€AÞ\a.t 6Œ4D &ˆ‘âÄÎ%øá!,X! œ<‘fÏ›,@˜Œ€ãHL±’ˆO@„¦*€@d覂—Ù[DLœ”#U#²¢#‡Ãª²$…K$Dš¡€ULŠ)"&à |eUd!$*˜FÉ•ФÕS)òƒŸS.X¡1Å,x! •Je*$V Š6Y*ÄJ’&TêaŽ˜éC¢$*A4ˆŽ†‰!ÒHD™ñú3¥F‘CÀ‚‘ Tá8…*¦”.xP2hA£FPSC4lù²HDH€#¥”²ã/Xi\kf si×V—“)†X¹²J⫌BÁ$¢5Œ˜¾yU‰e:¹o¿-?w†0óEл¥,ÕgI{‹z*£Ðû­«Ü´(Ë\’ÍÃA•T[:LÃ=/=2=f<¨Å!;·WB¬}%’ejCÈ[Ùc¸kZµ*°má6 ‡GEv¬Rb50h¦ 몰™Ç§NPÅ&†ð5ЫS-ƒàG”µ©­¡¸ štMÙ å‚¡´ñÁITÕÙݬCŒÅæ!W •õV‘ÃZcê¥"· °à’3tEó´õhPRÚ\ÜYÚzC¡ÑX:h\íÇÞfÆ|´žCµFåDÝ6Åóä÷ÿŒ #.¨zÄœ¢¶jÐ ,én"U ÉÞöÁ X/³Ÿ*Gv˜,—UËý¸}îÍ·á‘`Fwn?¡¬~6Õ²æ¢-:£5ÿ?‘ G«z,rP£} .¶>þ(bøæþH­¦ND3ÒHXu  p'{˜ÙË[jƒ^ZfüúN4{F7_åñÆBïú}|æ6)¹„W0¢FÞÊ,¢hMÁ|KgÆW2‰Þø§ Ê…£¡ëd•öZWʯ cj:‡bº¦¤ô÷³’YÍtZde…Òÿ²-iê­fKñ 0âÚÍ!X¦"hß|„ö(µ ìÈÙ–7ò¦ãË…ÀÛɦ$bVw¤:4×9õ7¹ó¦ªX÷¹oj3n*ð¾YÙQ²]Õ¼º–˜õþ×é$ì|¨©«‰µ^DZ'{'W$6A¢¢ÇWÜùB¡@-N£®÷{µ 0£Ô@ÒnD”ÁØ/ Ì0ƒ¿!°dl ñAI­û® P D ÁïM´ uMfBè@BäÒØ¤(XГ3mp]š7i¨9B¿:×”ŒƒÊ[”ØÖ¹™¬£op 2¡Û@»zÎnX26I"Ä@"DAsFZcdD ©P£Ž„Yô‰’BŠ; ŃCJ•6|$°Ã,‘‡Ú´—092(@ „h" —&ddB„@" Äѯ0Á…éb¨ˆ!H@“0¦*@:¤¹ó'Y X•"`¸¦Ô͆N4v¨ “" Ý3ICj ™qŽ˜Fˆâ5R,ÀZ6¬:‰§LteR IEb*ž*iÈd',„‘cæÀ2ˆÐ§V #ž"˜6³)F,œêS¤N’B]0¡™*€&Ö,$?`!äŽ §å4§,˜‚ҪĂ³€ºµb„—m–È®¿8&í§Z­ábD ¸ªa‘pdñ†™!%ý&ˆSz6txÆ™“-´'b`Ð/[$TL"ʳӹ+اdÁtYD´Êf ¨mëÍc¯¤)ª4™¥—Fîi©#×=†‘sÍŒÍ_…ˆ@ܼ}- ¶°‚²)sN'þj‡´ü™í›œ…S(ŽIÝ/´Íp Ýh›×\C£fÕ1fáÝ3¯8¹œhN€z­Yµ“)µ„¼UzÉΙ.ÝJIËHäªéžÒ¨^Tgg8¡NÉG‰ ÃÔ!k ŽT+õJB‹$ØõY²·R`š ¨Ì¢¨JI¥$H ‚@HB !($™˜#0`Ò„%ˆ¬€ˆŠ˜ˆ¡Aˆ=ÒD>¾Q€""£‘6¸ ˆàd`!ÁI1+N€!]0©€@„ T€ B<#£ B  b€ B,+!†€Šp„ð¦€@…8¡€@…5È€ BD"@ AòŽÀ ïG°©€@Ø€ BŠÚà!R€@…`#À B’!O XV Bœ  T@ B@ˆ§J¤D B=Ц֯aYZÃUdccÜV ¨í)·Éõ¬$[Þ_”2g×7*õ¡l›Òî¯2°ôÐ(Íë’³PŒÎä@‡sHLÂ…©; ]h±¡AÉO±CÖ@A\#.TZ/Ag¸2©iµHÿàCïô$!C—, ÷á£@D¦©$.ÙÃ…"i¥”c´Ú&½PóHp²AgC,–ÜUþhý&Èòµ…wä‹FDæ.™Ü‹hÇ)&³hú tt6óSÁÞVC2·åv’…ÁóuöÝ×:$GDš‹´²|T d‡òtÊâÂ.”èk‹ððQVêJrK¬~àHÊë²T|Ž8ÎM(9óÙ„ƒòü£Ê8–º˜üóaM©Ï–xpn ¯+P$…{Ô,>ŠÂÖâ(þ%ÖÔö§~ƒ¬<¯ÃßÔÊY¤£»üˆDG›ÿ[â⦣[’I¼ˆŽÎ÷[CÉcp] aX»@Æßñ„×Ôýaý`ßç²2ŒMùRâ)©™"G"ϹMÕ´¦¨ (=\MRðÚíݢˆë›0ñø³vW=i-û-ÏeÙxÖë/™ºFÞ«¹kšYÐÄXhÿ:^ŽÄçlW|ç;fb¥…¯#€Ž+|ü,iÔ’Ëz]qò¶®ÿ-wɸ{¡ú³o”x¡[°x:t! &tž£ÔCû’ ]×Y–‰BŒ®¾ß¹wµ±òi&ßçv«râòùe=ß]w:Ÿ®‰bÌþ–­îeÔ¼üƒêËä’ †‡Äø}+ë¿gçm*•èäþ{í–"€à8: ý ]3"×ÔÚô“3Ùzf£!¼;ê5hD! @4!c>·])ÚzVÇcXˆU¿¾¼`ûÌÖFXÌV­íœ±óÿ­s¼„á9›—Þõ¥çUd‡;cýMXï—O?ÇËrÿ@‡zÌ_ñ74ßî„•OxüÝcXˆ @,æD@"ˆÍ‘ у0 È”@!»2lÁ™#30@‡|ÀK™ŒB¸` Œˆ¬õdÄD 0d*FP Z‘‚Á0€ÚdJs 'ëGxçzÿ"œ›M’"" ànΦƒwBëÁM§»%WVKÎ" ôh&Fˆ€WÓHjˆA!DŽÂˆÏŠIg}Hêb,ü_ðȈˆúƒ³y½æ@† =Ãr³´¸ºv-¸""Dš.NîaOðÞ§ž¡¬@€BO¦‘†ÓB`BD, » ‚k©…ÖYd(ÖI¨0¥.ºJ²—0½“ž‰YÁ±­¹`ÙHC~úóí_ĺé{s†Ý;5Y‡ÌÝÒffmX0ª;‚Y.É ›j™„šÂ=[ÑMГ <¹sé-ÐFMLœÐÊ1Õí¨7äs»“¢ÁÕ 6®kQ˜yBé€B - Ù/K¹›Šî¦Au ™³@ÑÅÜÁRb8êFQÆZÚ²1Žì’˜§ª0d dE"`.d"""n0Ì3d``ÁƒfdFdf 0Ì’  Ń0dF„33i3 JIÂ’j0£R‚ ! 2yJÉz0êe²]¹†JKË6XÙ„…%¡’ + 0Â’3’B –JRÉ’’Ée¡èa” 6-¬Û m]MP“KVi S6ä)µ&²™¡½uаm6Lš7½°»rƒ5Xƒjä”·¥X&õÛL,Þüp~¤@†MÍdÞÃGd,äà‘ 1ÐM€(ã2²  ’‰UÑ‘4pÂé’b¤:2" UK„X$gŒ•4 Bd„$Dñ8:Èãÿ FiåõS(M„]NÜ™S¥¿—ôgLš.(Lˆåë—ˆ’©zØz.ë¡#0k‹Z… ÒÇL½(u°‹’\ þ™eS]ø²é·dGHºÂxÑ63éR²4!(—jŒð¹¹äâì¢0^µfGP×<ÖÛ:w iAn¡æ^VVýâ¹7 fD3c ÚýD@Èýl,D?É™ DÜ`¨†D"DP‹C ¤-FEAë´ºòMRbtPR4Ú³a¥ØI˜I£ á·Qå@"¢DGd2Dˆ92Qj7í‹c6.WH›ÞNCå“Öðv9en0n##MÆŒ‚]ˆ=¿ ˆ‡2€‹2"!à0`X$ èȰ330# ƒ28Óƒ“£›Aƒ@‘2#0Ì€3"¢+ÆUãÃ`¸õ0q ˆÜ" C fF ˆÃa¸$ (¦Tˆd"+ot‚ JÓPA´À³˜"ø™ôÁ" ZY€ ½†ø5Œ`ÌÈÌŒÌÈÌŒ‘ D ÀÊôh20DDD†F؈ðH Ab DI"DÁ€""Ó3ˆ‰@ˆ¹fÁ"%´@pÈTÀrnˆÆDf@fÌ2̘3 ÌÌÁƒ Ì)ˆ‘t)ˆ"%()DMµo"EwA"#"D…Ö DD¢DF`€@ˆš³ˆŠDJ$ÖȈYI"D¹À3 DE¥f¹"%œr À3"D@FöâDMTD‰‰s DDÄÚˆˆ$""ji" ݈ˆU¬@]5"% ˆ""@3¥›.ŠqŠÁ`€DDDŠ 7â eýDÒ½°sP.^4ˆˆ·ø³ôâ@jd@"Ú€<Ñ"2t{vé¹DËÙ‘§íÒC“–G½€I /½Û÷¹A¾Ì0X÷»Gþ\ªb6qú䢱2Šü+ x×\}À¡"„`€šBwh«‘Y?t ïøì¼êûØšm›Zœwæ×ˆcm‚ŠÍB{×Ý™J÷ŽƒrZj‘‰½ÐN¤áUpû}Å~ë&¼N./ü£±~ùæõi(÷ež„…ùN½}Ũ:4*̹ûÎMÓy2µuò§'ÙS¼OçxÓz:…´\Vh,f éEmHQ€+?´jÜ)<((ê`EeE6´jéë펅%Î×ÐñÝ/†\&Q+ îp‹Ì }…b?Soe{Ó˜Ö(?£ŠQG@cç]Øe&fA§Ä 燌µaàóëïõ3ÕþÞbUr ˜Í-»'"À ÜÈE=÷Á$0„"ÚwNlªÂO˜¾¿¤%u·žg[+,pÄ'Þë‚'Îuëd",r dnkOò›>B‹˜¼IÝ*ÙÓa\5WRôóXÏní•âߤb`·í>Wá\ ·þ²Ò×ÇÙ)S´<ÙêÜ»>6ÀfÛ¢ŠŽËî$r1 ˜GQ?¸ž:kÀ `½Üg¢šÁc-åLœ#®Õi´e©Ó¥ÔÐ.Pqïä4&%$k橊P@ —2O~çeÐXº­_Š1ïuì¶Ýu™´ûŸwüÞ¦ûj¾U-‡›ì¿ÞËìþ5‚Öìë(Ç,þw#fr€5;ÿMìBE±€±[by×ñnNÞ•sé:ìüÄ}xõ]C~ºôešå¦ä ¸Fÿ:Ü[‘`gÌÆ¡˜a©øÆŒ»_#Ç“`-óo2Ðd WãËl¯`šSïp»ßûm+IØÿ3­¹z.bŒ¡E·S¯„§$‹/@§Â Ž•èKÏû\˜•þê"mÿR»Ÿ½çÖP€LȼæEô0BÀp]z³Ý’¿Äÿ°¬µÖ¬*´òäD÷Ù®ñE¸’r,WÚ—%‘õXïx†c³ëÙ'çßôìàµÂ§Q»l|øÖ'ê×uHÈgeÿBRXöüBq‡îvbùFEA»që ¢ì2’XÕ§¦Â÷+ñ+kÜ›áa†ç·ÛÉùÉ!Þ蜗 ‰¼ý$ÑÞ*o÷Å`«³ÏŸ†Ðø! £¿¡Uý7Lÿª‚JS€?}g6pM¾"êd V ¾ÎËLï¬yÔð­!Îu°ñ˜ì<¡䮹qØY€j7IŸ¶ã@¡èœîÛb®?°?þÄö…Žé ÜÑTvw>l§ä$qÓÏ㸗îðý] ~9Fvy-[ÅAµY‚^ëJ Ö’#MÁEwz}À»ð2SƒYý;‰’(P“4¨þ)Z cêa’°LÍs$Гǣüj Œ !IVu@³,ä2éo&A 5«v£ü¬äåÒ@ ˜-êèÌÌ$ÞÒ 3­¢’feÓB ö``…„f@d” š  $õ8FED0K™—ìõÆA¹’ ƒ9äÀÔ}c …Ÿ’30“2!H43è!£5Ì„ªéI³@‹0“%9 ,`Ì ñ‘,@Q‚³[2"XðæK˜¥˜$dÁ À‘½I¶!H@2Zþ€RÆhBÆ &@ÍFIRà…$ 8 Æ@ XÂL2\È ÂŒªXÁ(ÖBèÇ(‰jA0fJ0²R’ 0fd“& € £A!HR¤¡>$²PFffaÍ ¤`ÌÁ„•iF²$˜4 Ò0*‰@f`330;&dKéDa&h2ÌŒÐ`™ È«¦As(È(ȈI€ˆ’`ˆ‰‘@$DDFd@ @Ì?$%Œ^L€ 2 5Œ2$ƒ& c"3ì˜FA&„ ™.`òè”0±…‚ A‚¹™ ™ d¥)23%¨Òzc"Q‚J£$™˜3%’Fw)B€f2h4™¡)HJÌÁ€” ‚€ð`(ƒ#24  ÈÁ‘9˜I¬d gÄ2ïšæDf–B e*dH0fEV26Ä‘ƒ3êR ¡–LÈŒƒ‰i2n6¦Øl’€ #Ä’&DfŒ€A€ f(È”o&`f ÈlÈ€M†¹†\ÀÚ`ˆ È !×ÍäǤÁjŒš̹*e¬2'•DÍ™ðd6ÆdL™;nL3"ÝR àÀ¬˜ðe <˜,¬Š Ùl@‹û¢£„»&‹ÿ[’E¿1i1h2"ÁÝc£|´þZ|·ûêuòÙôTûŽ¿«Žm­©Úÿ ¯á‰7ýQT¿WoM¢‘èáU®V|ßÒ,|€LØ'4}U±Åq·.Ÿ#„òÿÑå¶Aàϯì„\¨¬¼i¡è1é¹;'Cµð±;ÆCiÉ´/twqTàMïäL*BƒrêdJw'2sŒÛöªŠÿµþ¶æX‡VoJ+V²1Zê‚4˜×Lâ±ç)ECÇ3›6ªþq—äV߸IÖbÑ—Ù£ÄêG<± ü|Ân®pþ2ÙÎÎ1û7š”Þy| I€YÿRfd ™¹7-Î*øØØ/þ¹Î&Ã1pûÆïgÞüõ?¿èrÔ9Ñ®¹ª=R=}êçÄånDfãN5ûÀ÷áiѤ¾‘ “;ñ~¦…È]\qÌ ™8ïÔ÷HšLuæøë"3l:S|íVƒÍ£Ôt¹Ý+kÒøÄJ*}è{ÒÖÆá“òE?jøÊ¹mõ­ðoSèÈŒ[;ûnüêÚ4‚æàZ(„¹#èT*¯„ñ0Ý÷zæÆ ¾1Îaçoµ~?ÉE€4>í® ÅÙ£â²x¾ö§Â6b‘ƨÍ.„Ô‹Ô0@ó“Å  g#Ešð´'mfÍó¦šFù¸”hJ*FB½„6ï1$jæg0Ð1‰m/.Ü‹ìxnRbü˜kŸ«‘QÓ¹ aê³’¼bEº$n^SÒlÙ3 ²§í, ÍOª<¹â&Eo‰d†ÝpÁ þ¡Ox] yòXæ_Q»BB‚ï@‘ž¾ØË`WY½Ðâ5o¡ŸöާqÏîjyí ŽÙÀ ô O¬ñYøœ8pñôñÝ‘æ¯96wÊ‹ Ñt ¦“Ú„‘ê:‹7¢ßžœ¾Ü¨,“ëõð±áFuEì©ß"õYeÐö«Bd'*ºâ´¥Öþt2Nõ»ë–ª¥¥õªjåùH( #~+>6¹¯-Ã!íý… €D]V°NÖ¬ób¾Ô$q¯¯Þû}»‚oжE`¹vþs• †§ÃÊ¢³¹ÿºJÿy8QÑ}`K˜ð&öI"MaoÅ1êMîoŸ˜ºµ­¨dÒ -|¨và:uWÆËã1¹¶­»ªy^ùt|MÐà!Óx»ú@8~¼# &ªá\0S ÇLƒõ?š@6ìÔº*ùŸEo+§÷­¼¥o“óíaó ë2#×XÆW¯Hó_Éx¨iR<šà³dàýŽ¡ Ÿ1¤dã×|â÷v™ˆîc¤T~bñ¿Z²u¼¹õDš»àÉ´3󨇙 IG"Ãc;4GM ùUÓ”:Ë…¤ÕqªœF•ù§i²rÁvÒwª9? q×]ƒ÷˜ª2>Ѐdƒ,¡ùÌ8˜\Ãx`6à ”–â'‚‘Á2ÆÝ<ÉXï*0«¦}Ue‡Hĉ“†SÇ>;®­ç[;BTSÄ;%gú¿Úƒ ƱkʸÂ{‘=,Å{Ik‘}ãÔÙpýŠÄ¨#ÕrÂk43!IÍ‘Z$l8¡Ýåù P]JIhŠ¡<58C*5• mÝ ðŽ wùÛVr5õ;c" BÉlwPÖÁõNXNöKJ˜éÞ‘ºuʽûÕMüÞ•Xy¨<ø† Ju1=ùþÐ’ÁD¼ü @(ºPUÚ -¨oy¶¼Læ·`ÜD&ýî÷>šßô.OûIŒ¶° <”)uaL~/)ÖŸpô›ßí¹XóÑó`hY5þ3mRXü6ºÂŸ¨kmæÔï„°÷2`*Á@´ª.«¥·±íÉŒFÁüaj.4¦~sdêDDŽ 7»øáÓ’Êý×ÔÝøëë&ÿG7 ùžÏ^EFÒYùþiàÇ5ª ØÕÁ-k­,pï¤,zô|çŸùç·´ouk‡.ŽXÈ?Ð~.^¿úSˆ‰‰DEÊò »Ó⠂ɤÑP.§þª1<˜d"šù“Ä:L¿ûcõúûÔ‰D0.ÒÅ’õÇ£/ÜþŽŽ›+(((èº]¥]¬€{ 2y î_¿šã†q»?ŒZ´è^ Ãß4˜"ò*ø%vŒp#]w?ÙÅ"­ÉPñ!Qò¨Øw`eß…‹åàãD¨Ç~E™ÞT&êSQÿþn|ºŒ€Š:¸#ý‘ò¬ÆÚKö7oiP]ÛøÛß§ëöâF¿üŸä/©rH`É…ÍA³Ñÿ«å öpF_/‘B`ô£1uåUh†"Û{°Ÿ«¹è¶}䎾bwZ~ ÌÝ©¥Ó^%Næó‡ìº7`0jz‘6p| »Íf|NSƒœ&8Œ™y@þùFûûâi ô’´ ]JŠŸîðéϻѾ»åa/60β[^7À¦ëböã ±=gô9ö5bè(ޤÏ.›£$1»´ÛˇIZÃZ£ý8Z?ŸÆ'-ãÈ«­×‡xd,i4ý´Š½ízN±~vØ}öæÂä6õf9ÿËòƒ(ÉËÜ'Á&?´ NgcדʷBçA´5wŠîµ[ð¶”¶€Ì‡û:÷ŒrýÇè»Ãq!#¼È¬Í~,@§¤>;÷ºbLIÒ#1ƒ„TLi–?ÀÔßÁ ®4’æ­aÒë%üÀà sŽ‘Œ|Š9[¯Æñ­”jÉcQ4n›—tÓ¯ØZh—|ïƒÏËs’¥»z7öhO2Dv–2ž>ìŒn–×õ¿÷1šR¹Í:É`ߨyˆÈy5—÷½š8þYîtÇÀO‘<Ô;‚`vŒ)øê-šÅ3=#Fu–dŸ‰}`Àøßcˆø%ˆ‘0 À¾WŒl¬BŸ¿hˆðLï2‰:Y>â‚‚‡ ½” âw½ýeÍÂ0-n¯\\Œ4ž  ¸Š#Á"ëwD>u²¾^VÑ`]ã DDAS0$b?|ˆ‡“‘eƵûñšY”³cÏ•WJ_ÖË´h?àϦlRêwZëz±u݃ î~ûnoþ6” {†ðŒÛWÝMο;aW¦@ÖÁUïê$™m­)ÃUØTT$̳mø§œ´Å£GÓÖÔ˜¤Moý8}¬'¦Ëì³AÒœ1[m’U{Ó¸?môZ$<÷„n-õG+Wf„˜Ðjb<¬Ôw2±„'ͨÿÉX‹cä’LiµÓ"¡Ú>œÍWLeDü¼Ãf*à>G¸0ÿT‡q8ãQh‹Æ"æ.B»šÔ 姸%/EèKœÂ7ríî?‹XX[9ÒË%SfX˜Îá;C!ðW}ÜÏ$šãBdJåˆy°Ë|Ì(Ô…B$#Ð"èZ"ý»ê -¥ÿ1Ö̵”|-7Šö-^îÞ¦™‹6hØq ÊÊÇ>P$\¨X\ÑUNZYuÕ:¥ôWÐj ¯üæå½œ>fü·S¶¡ËRÎs÷™„~}ÔÑÞÿÛÙÝÚ³ ÙºA{,ŸJG1¶ ³ªyÇP¾ÿN­sÓü^7 «ØÎ·z„(]DD@î…ÑáK®×çÿ]4í•¶¶jJj:üó]†¤¥{ÒÐzÎ ûÿ86i¡!ÐaûV‹*QÄ?ô1älã©K ³4'#¡g@H«œÐ [ˆ €8+J·eÁxü f%1vÇ_ ~ f‹±³í𙯠U´Ó£åŸGóŒ8 ·Qk‡¢n¸ÅfñQo}ˆú.˜ I‚%hN…ßüÁ¿+uŸVRÿÄ›žZáà\ú¡´¼Ìÿ~௡qæ¾ÊΣÆä×/÷}™G@há¹e^½ÿcf¹}fö„•Ü_g°ØbÓÔBŒ.³Pߥ妗Ѥ IUǦÎÖˆ~4zxó>a¾žYÁ‘NÒ–ãâÔï%4´/5:H÷µ*yçû5¾ÊÜ–‹ÎÎ:¤%—Ö@¹øÝ»í^ÁŽ,OßÖ  y;'¢Òñ΃ÞȰëê¦!þ_KF‘v gaÜçQç¥÷I;]b,ÃtÇ [Š®oÍy”ˆÓ‘W²¸ª¾c#@ݘüóžÙ¡P*÷‡û“½†4Ûø†*ògàºL+Û}àNçgàûäÞ«é%EÙRÿ„¥ÀljQdÇE¿4=¸+'ÐÙžXàƒéi‰0Ü€µo1ÔÆEý/FúQBåhïUÈiÇ×ÄgENb¤\Ô¡ÅœœÆ"ñ äW†Â7éó$>Ôrç>6ç•ùµ†ç¶ˆc]ìâbMŒ›ià=Ü/by jZ«—ž5-²+ºYkZÝ2Wî\Cš#nÞ`£ú]ß<È­ÞÎd%A{|ÿ½¤p¶¶#¯ ËÔ£Vøêë¾sëߌªßð¿£m Þñ°dWy»ÈXzR>)ˆH¯…¹u(@$Çy§“¢pÊм½Ií5œ1½33•Pƒ£OŽÁúÔå‘›îs“Ä ò‹æ«?æpüRN÷úâ2—KšàYvÅ; "«é#}¬‘s)Ø–×õ !I $Í!Ø[CÄɆó€ñŸïɼ$äü¨ê„yÜÒ{— ëóéîÚÓËÖ€TÔÝ(Üpp½Þ1tEB„Ô¦0‰*àp¡89Ÿ½·  `Ù8©´f”*}œ У~uRˆ~ñÓ+·ü•)Ó†³ïJiù󮶪bÁt‘Cê2þÆçÊÌ^b—W±(Ý·S±¼âö„>ýy=wÝû3”e„Ko9úçÉÕz­C|sû=\»¬ÉYUNB´Ãùâþè1ÉKàæœ4óDGøa¬¾êƒ$Án²%îųù3ðLýØŠ¹¥&u*é H„˜Û_—‡*(cÄæÏú™•X±ºŒÞ^¸Ã¦4‰7e¦¯`îáD¿ Ø×œ­€=68Íj d+ù„®JÏ’yE׉—'F‰Ô~½tóÍØˆ.FãOpzF ¥Ó+ÿ%.(.y^±´\Фf«“ çgè'o@xÙ%©þD#ü "‰Ñ„‚EfÍû3µ <¬# E÷c. ¼T*5-ñ“ix]iÑ™X-¹Cò=hpö­{]Õ ã¶kTQ™‹—ͽ“‹—aÀ²$ı>¯üäE €¯å$k¨›*eßAŠKôE4iH¸€cGÑ~T}c`~‚$D}I}Œ/Ü¿Š1<`ÀR—_åt¯"Ãm¢þE3vúf‰JTEÓx»^ŠÁã{\³hµB`>« <™Ÿ2‹Jô\GËD—}§Á`ˆùÌÖÿ6¡ÈL¦gmF‡ ò¦hSXʤ€ýÄÄ(àŸ…·¶'ÉW觘[ª Æ=F}Ó%0Ռę©¥0:”Äk»$÷v…% éga¯=) 8Àèš R~âÉÊ`›t3)ÏgØ5Ù’‹yfI@œ9üiÔÑ´Ê 6í2u ™SG+EmΜ„DeÎ' ¡÷ @Œ¨‰3w¾×·¸@ÆŸöa3a^œ¦¦2ÓíBíáF ìQ.´¼š%OQÖ šäP§j .€ç.Û™œ²ËÚ·¦z\ö¢-]3äëOC°ÿáCãÐÛ•wÙ7i‚À¨›ÔÔLßÅqNú,·Ó&®žÏßÓÜÐÏ/º@ËîÛžsˆÉºõk±ájöò³4ÊóV9L!ŠŽÆúk(m#Iê}ÈláˆùölfyÀA¹SŒöuõøs÷ÐŒ¤Èub-{µ¡_-Re^¬î!p‹‡ú‘âj®%àP •ܾí¾ÿa†<ƒ"PÐ/ K{Ý/P;hõTûéÁ¹ûjHïÚó½ãè¦Hm¿GÍ`ºð#íá¾ršv7GJ¦’|ìÍ6þEèJöl×Ìç¬á«ˆ¹ÿ B,‹²[¹nÌá’~—HY ‘¿’³²˜`;Kž~nX‚EDMå‹krýÛ$VÚSo¹ÖÓÅÚŒLãÔ­FY6¸ÿAvYŽËÊs¦Låêr÷ÛhbV¿àŸ÷‰tKÍMcÉãàðoÊ”èžá î<ÔPÍQr~A:ñ¬ðŒ–7Wñ„‡r‹Í>"IÔ­É W ñÂOøBëŸT›/Óddø…Paó#•A!㌲7ÅÔH -XÄôVX—x¿%ß±  L=&¤k¯T5w9Ð@ú  ­¶Ï²ü#,߉ÆÚ§f6Ž•tͬ‰zjÀíÐ^ŒÚá’#”¼âÆA¾mÕºúdŒ‘R¾5ÐßuBÆ¡’CŸds ²Òx9ˆ}Ù&ä“ûiy\È5#A%ocn¼B,ñ>¬èLÚrÍgEÈqä«Ôg§8âS"Ac‚/:ßñëQ8–äj›jC­ÍîRÏÇÇ›¬gmkÉ*¸òüÎfâ&K° ‹#:FkxõZVfwT£x‹Ñ› ”áöžºÉ˜ž~&lia:·ÝšR SŠ€‹æhÃȲp-ƒçuÅæöøR$šÐ_ý%]UR’ÚÕ€¿ƒµ 6« Ï€ð£©[}ŸOص9…v•YEÍqNJˆO ¶EbF­~‡¢)«k±Ü.éë¦UÔweKšµJR¬Õ'§ËoS§á5o"ÒîKefS敳å¢ðs¾OÛXŸN5OÓêo^™¦Pꤌ 9Žbs–ÒQtÉúÛ"½kÈÓÙóç³}÷¦_T/‰¿eW¹+¥»]MAÒp Êë+7ø úõÞfT£)ôî.¸ ñü4…ƒ®ªæ …g´„Ø­ÓmWl÷wÒññã¢CI|#­£”U®û-$Ãoj~^pNª@"ž‡ˆqš:pPŸ[ù¯±±†ù/7é5*%1L>'`^R1JÔ5‹p$Ñr†Ù 𭈆3´ðc•7Z*dè<–1’|‰Ø7e´I²·Lªo=“z “ØW‚2‚KÇ$ñÚïÅI|£½ë°ÈÀ­À* éLa¶qçƒt[¼÷`ý h[¹^­Ú²§µÞæ ϽÎÒ¤²—/ÙÀúº'`Ù5íu¼ù|xéì‹¡2L „ýæÅjÝ´‰ìŽݼçœäô–(*âõéÖ¶N$5ЉõMAퟱ+ `‡ñȲcØ@a ”œ¦aHIÐÊd7!ÿEÂ7½:\vv«–N|1¹„Qÿ®=jÑ¿°v>q:@€Ët>šjµEÐÉ{v-xM,öPIvë8ŶéãíïT@Dºuó˜ hËy™ÞGƒ–uØ gh&v°«›w÷1¾ôŠVC8 Â\fÏŒ©Á P=M½§j+-Â)çP˜xçÚ´Ó$‹X˜È –ݿ˛ÖRç÷;ofÁÂrL&Œˆ"ÇwM¿é1Äïà¨Üú Þn;>v/ƒÇwïÊÒYt»w}íÕŽÔJ™µò±J“áäl1±PDV¡±$Û*£ üÝ ÎòðpÞ’Ÿ<Óìk?pS—qÖéÌ'¸#ªÜd:%O¤‘T‹Û 25SZÑÿnWía Ü;’î=äˆòÀDí e‘î"NrŽ¡Æ»Üþ|òå—²cÚÈ~¨j Œ¦^á/ÐZS?°–ˆà¥}olíÀèäŠèel²`‡Ñÿå~±c‡“ý¬æD¦å8 åξB—ý"bü<Ž”¯äëœÈÁùNwP©–~;­Ùý±ºóT…žoéš5Ì3@wÚ:4ƒ5 k‹¥ÝùgX±uæÔOY¶ÝPáï$ó .'6rÈÃ_9/vû0@hX’qÏoô œÌV°;.ˆ°$ZÂüeâŸ/‹!~Éïsüñ|õ,Þ~êþúu2亸@âZeÅë¿^ùm‰Wjyø‚j/|A‹k µW _pxnVÎÓ á†WÙäü²œ0†Õ³=M:~î$µt¶²/½ ÓˆíÏj—_ ªöÁõù_ÃD3êè/ª0Æ_cUlªwÓáøåÙž£½a›º\5ô™/w%êb“úèSÿ‘¶’‚v¿iK*ÄÞZU´FȬô—GUÐåDkÄFÆLë‹`.ôw‰~ÅÞŸöèQ¹‘ZMèr>ÿ4R‚‚€·½êÇôžPË AóÛð¼Ë±ÉaŽQ*u ç§(üÅD8?bŒŒ>š2_´ÙZsQþ?r+íÃ(•uPÕ:~Åö³gÑw‘›®ë÷g¤¬¤déXjã P¶¼y ‡zÈ¥õcãÏ´ñ>Èö]5åó«#»Èõ®\`q‰‹f`­"?¦;Q¨îê¤9`ç"PùlÖžfœ&lIæqm‰¦S³€ï  #ÿüQþ}Q¨ÞžIâö[ëKTÇŠ%±f±aXë];ÈßZ›.RŸvá`†¯6€˜÷n»ÌÔÆƒ‰‹@¬8­Fó=M+’º4Ãw9N/鞣dˆšÀÊ…#ÞÐ-¢®ÿ+ôV™:¬ ¹2kªm\«µ±ŽZA‡8èìhnÆ€Ýk×/׹ڡþ5Y÷¯õ± 3/†ÀœÇ©Ñ";˜5Œ÷l­òò§¹t~á$ÇðuðW<â@%JG ]Œn=èKbÀ›L÷ºÌ XïÓEÀTPŸbrË€@ ‰æEdŸ ·ÝÕê¡¢§ë#…ÏÈ^pØþ1\;èFGÄò=µ~L¹F;H òºG‘¼wãšm ¨“=CK!æÏ÷•0‘1%ÅlÉy…ÀýNßù.–B§‘Yõ5&ä¿WeÂ;Ý16ÓLnÎ̓î”ôtÔ7ú2aÎÔzŸV­ä.Ý£ìÒk0'AMÓ\¾¼© ö¹€€Ñ`¶Ác*‹”v¥Ëy2,G}¶]ƒKâRlnSýÊ𤮎)êšR¸çõ”Æ€¿¹Ôåœ0è[üÃ6*w@'Ê֚͟ázIC³Ôà ùt–­<`ÖÛ‚¯#y¶ê 'Ó>_Sv‚õKK„ø–{ĘU÷z,—%}ÄçöÍ ²Å¯ƺ‚uNûœßó>˜Ð3jN ïZ ölžH'‚u¢®á³iöN0¦ˆ|OšÛüéò:+k7Œ­˜¬›À]²[T&ÇrøÜD‰rgHHt †qؘ؟É"€3Á?¡( =ŸÅøÍ˜?;ÄBø²ª1Æ©ˆšqB€¬^t$tøB§~AJ‚,¨òÎ~M%C£sqÓ 9Ü •·-ûŸ ÑØä¡9‚öí¤ït ù±mk…J`2ůñFæ¼fëš'ïR5þaa§Âë¾ s‡ZÙH€ˆ‘™?¦é‘uêñ¿°YUšBÆ÷jYV:Õ=Ë3n¶ð«‡øe5×P~Óˆ ´£Ỉ"íz›NåcNŽe^’?­ùˆt=ßÕ¿ƒ­NRýë'”.MtÎÿCBêõN,]Ë{)øWbÓo±°ä’Z·d·ý¶šÍ©„+ÿnyñ+®•¢µ¹s«®lªWdd’g8óB=\û”TÔ(€àåœaé½q¹:H8F7ƒD›7\aœ±LšD È)dªnp*q`yƒ&°avë3M@þש‡‚?èfÚµEëé~4N.Ê”‹ÙͼDÙp$˜X¸^"zÜ+‹ä¹Ÿ-®³b+UòòÙ¸=ÈúÅé!n \üd¨Âõ4ï|UòÒbÙù÷¢„ Ÿó#qº¡¦”ë>Üà8‘¢ÄÝ}þvö=„ˆS˜æ±•ÊÊ)@ý†X—0Šwª·”WÖ`1pA#Ú±€Ï—g 6!J=o`Hó#ö‘"Ø"´Rô±óÚN“X§õ7FÏýÔ$¼Â怓f7DÓtÍ­Y»§ÓÇZf—Ñ/‰ãÁ+&yuÂ.o4ÂníÁq¿3€tä¼Ð·+Ïž8gIÈ£¹tó—QTeQ°C°y©sYk¡xGÑw'IKw>cfŸSÞ„úƒ+z—Õ*Zd]‘?Æ¥Atê+ÝÄšû|u£Ï–àÝ/4àV¢ä—A³PQG_VI«.ûûÖÓY>Q‚Æ7ÇMÐb#xç,L§†Ÿ[øï@ rúúòjó±Ž5¨æÞd/<ÍÂè kÁØÚ5QgQW¯i%|6ÁÇsŒ)‹{Ê…ö¢Èæ ?Ì0uW{­<_tå¡Þ“ýöùȉ45LÕYù]O[s"1#Ñåkï^jÏcJ±äçAà`ƒ÷ &”ÅŽ¦écš•ŽÓ¥Žœ'¿ÝÐM‰¹kÙLèk×õMÀJ‹óMÇ¿Ñb›(ÊHl?X`O„ÇEMOÀA—v“ #òr{×Ú(‡Q6n®{®þþ+‡ó±âU`´ÑGðíVkz}èèÁnI‹+|QÞ[Kì´ÊËø¢F,’gk(k­v%ê9= +êTŒ«X…ÿNOùF7ý·øùÿgƒt*n³,TÒª—#*\—¥±b¶›•Õâ›(EçL6ÿŠ¥åVJ É÷ۧ½ë>dµ@¹&<ÄN$lìºû> |ï ãmRýD>J{é‚=ð…H[*/usHWÍ]½BUÿ\¼ÁK$ŠbËUb£Z6¦¨? ¡Ï…W^géÜiâ=ƒñÑ ¶Šk¬T”‹Ê‡ýûéÞs–ÔÉdönÿÍÒÐßšx†f7¬õdy°]ì"¢øô¦ B&çÖ(:ø4ð%ußµ¸|`%mø“öjøíS7¬•<Žð1¿rfHg«šÜéÎÝ®þn2¬3}qP…G6 ™Zu/iVÓ*$ýóí*ò—0KÙ;ø‰ˆÒ¿ï$ôIÅ /Ó!Ó(Nð9ž"S²é«ìŸY= bXT>RºlãÚA·Ýƒ\ö³Å˜wÙE§Úü¦Ÿôv×W°ÉhRÛ‡í.]¼^IÞ)C+§Ê£ôFÖ$ˆá´í°>K¯Ê§Lr Üܸþýx-/œ«òÂ>¿Ž—ü騧o²~ÊRYZ:b˜Qq*VPHÆs!5Ré%»Ëð¢Á 9s©2éÁí$¢€£›æ¨‡•-*—p]êÊk´„/Ôp’–Ðý>U^»ci|G­’l*ë>±W4a(³ûÊÜ|ÖŒC3b•Ê07þÓ Íª~C#«ø¥R7cìýØŽ=ÿ:7¶âï¤Þ»R{nD]‰£ò.ù´ÆØcq-,\‹}§oÂö4’\÷»Ä¬½Dø»o*LjØ£|_‰|º#¿cS›¿€ Bù 3øÂ!Üé#âÐÂ$q†à¥¥8ï“ãD—ù‰±޳x8¾NVðà4¯)Ÿi¾D×£> ®óëß t(/•˜é²Î§ƒEÖa/¦` ¦ïw ¡–¤¬Aö¹ðI"Ë/ ÛÊ¥"›ºÂý¢±„|+†½œß~ _alÛ,¨ŸröZ¯C¦qùÌËMZv‰½íHÃ{¸ø’<ðy­#ÙSKQYô©5Ôõ¿éƶN Vw˜Òv)|³ø¥kes§½ž™žkÈþ›kƒ+–$¬í×6ríýfÝ]îm‹º¸ôR³,¸ûOºU5ƤG=b’æþC¼w«ãž³SùHïú+\÷-þkÌ©—LU‡WF”‘™0Ê3%²~O[&·c”6” ’xG0Ü®ZØí×8ÒPXjÌFâœÌ‡©FltÒiBŒ™Ð.פ©O´ ¼ÕI“7qqXgíó)õšfج®{G¾¡z (×âå›5†ö Vø6k¼(ãIiØÐpc€?ž¨rÈÀ¦vøÞÕªÜ{ÝÂÛÏ >xsñX êŽîh Ë5®•çÐÅPc''éÕ‰§A×Á‡ë¡¯ô”DŸõðñ­ÿ|]ÓˆBÞJ*´kƒˆ{ô‹³8Gõ»ÉèÂ¯æØ¤1jƒü‹w+9öÙ¹µ pkíSÐíÅËÚ^ÓâÆ_} ¸¹rUý\›Dy?²IRGöh,¡½ž<©\êÞVu?øGv –nž÷¶×£ovɼþø|O/â¾`Í¢>›l™ ·,‘.ôôé:-ˆO€•šöP>ƉáuàYpR?›Sš[SÉ*’‰n~PåÆËaĨ)§æFh[Љ[¿bšÅN™®ÕaÒ%m²(ª6úvaôõ[{gœŒBV™¬7FêV(NfU‹ÂÅæô)’þЂÇõ kýDÌnr`¯Õ.÷÷Q×ÒÆmrðƒKÑuÓŒj6ÃÞÈVDÊ‹šèWÌ'&à&_õbmÇ”n-5?ÅrgöJo¾/„¯y¾’îMtu_Öì×’DÄÏ\±ÊÒ´«Úð”<‚wï9lzº;û”ŽÏÜ\¾ÚKä È/훑Q2ô\à ¨ö÷÷¿ ™×>¢üŽ¤ê ´Púa¬,*çŠA ;ÌW½‡$"Ì.ߔְ Zl›äñܹ­³l(üK/0$Èï{Ô÷é³µkt¶„ÅŠ %)B>3£Z0+EþeÝXó›Ì™.F(oQß,ü³;»dß»q”—ƒS ¡òAÅ}ìÆ’·”ŒÖ–=‹f ß+èsrU·Ov[“Ì{™åìOn½=ùI[Éè$4Gðbc žXG>}Óå¼Ã~ëH7Àzïß#z0Êã±ò(E/‚×2®yMq6§Ûùzâ~%ÈÉ/gïц®7ùº–73Ž.1Rm1ÆÛˆ£gd;ÎödÊ#2îÀÉÆÁÐM~YqÛxl4Nòã€@¦àS3mv—\št¬Ö;ÞÕ€êLƒ²˜Jy@ár¿ÁÁ¶—¹—M@«m^¿ì„HŒŠŸå*ÝŸÞLŠiÞ-NÏˉ3ÓÏßÈÄ%/–hß°WW†œÇÓT¯Â>öËÅK°-¦^þxß0ªñè±½ÔhaÿÝqS"ª’mèãOË<À‚4{³fwéêV‚X‡ù9C¤MY¸ôŠäp-j©§vE²Ýit :mr'’Ñæ´Œ@ЇdÏi\¬ÒÑi×Üu`øašï eƒä6/qÙFªøH¤¿Ý?cZ†ØçwTT ¤„}Ä(ÜÀ³ÐÁ"ŽŠXê«‹•°SªeO¸nI=¶7»Üû˜ÑP”íX,xºQ>‰…^¥æ‡5Èd ¨’Œ`s|À°6Š”n{âßô_Í¢Þ™/ZS•5Dnâà^ø÷AóÑ‘éÛ¸¾ZÎæY÷ÚàÎ%ª¦êØJñò°L|d¡¸cŠ{%ëä¥èÅñh‘²L˜I!ÞÙ ^S§Ïѽïuç ºË§rÖŽÌÿfÿ¬FVîÔ8 œB+ôŠH.üç-å¬y4·ï—¯á+6L`zÖ×ü5 Ó-ôÈ””ÂmÞ¿E&·ù6"Ÿ#÷ËÌ?Ábá-Œ€„'tÏ;# s—©…í7¢8ý!-8GhÇÙúÂb´˜ŽQ½g†f’1{Y‡l]z^,W6åèî-ÈÊiú?6 Ø ~IìÖ´elJ÷|¬“9ûEùÄÛügI¼êOIöõ¦ Ÿ¡Ï™\Ïj wÁÌ•örÆéQ|È“‚‚Áˆ8ZñwfŠè†°J3ì(âEv~V•• ö‡ÈãÃõü>mÖBD—±& ¸½¿¹S]ÓýäO«Cj%+>.p-ø§} „#=f”óþ…€ÅœÞÓ´¹<ÕÙ†÷8Õ-Qò,¶ +¨áH½ÜM>NrF¹6HÆ… ÞY³3ú_IAµýMú/ÍðŸ&œK÷QIQføœª ÷¶*ÇòHuiÃ-üº¡–HïqÛ×aÇh§»ßÐ×0õð¤Çû{ÌCõ`ó–‰já$èòÎÂ÷u/SµÈg”Óö®Ä-´YÒZúkUX‡ M‚뛎æϤÒΘh ¬5{¼Ñ/§!ò~xÈ(dûÃòÉSÁÅ©Üÿ:Ò’ó½-<©L9±ûìªÖRÅd~Ò™ÕÏÜ<)wÛ”ž\k-sgй҈eùóªØÁNW©¾`…½–õY £8/­î´õÐ? ùOÝfŽåùµ–5çq.g¤k³Ê„S¯9Áìv X¦T,r”Ia8Ù¼°ö]>Q´÷™¦ÿÑ‹køšáV¶`R²zaT'×qt:Ò·FÙS6ÐÚöoeCÅåt¥ßOò 4:.è Ít°,á7\'–c ©¥Û¢ŸÖÝ2R#S¥Â`9Ð$ëG»ýüp±‚" @ Z=vÛ¢Ò_m7û ¹þ¾}ë먄|÷yÝÓ$óÊàŠ ‰²¯ßf!X‡Wx…ÿˆQø+ëðþ®b ›‹˜+,ìÌa¹.ÍóŽL´8[ƒÞáÛma>Vëãv¡°„¾fq97_´ƒ¶°Ø‘åµèÆS§¸ÝL+8j“sô ‡|CŸNø©“(‚ÔΓiï£Rum`iªÿémŒ<³j2´”«q¥Ã…÷Á¤1Ía °2—HÑ%c%ñ†´ÏŽ˜Ì06}y[R,ÙCø‹*ç…®¹Nʘò¬ô᱑‡w½‘±(ÊÀ*?£`=âæ¡"kX½Ø1DUSÞÀD-üºTîÉÿ×((¢  *ýy W_È>Í—fZ:ôþ×ánï-È÷O ĺ¡5<Õ\.†N ÛÇÄHú¿’+Ö‡ý$uX™Z üœà Z¶¡¿ÛTØÝÑÑ;7ôñ¬þ1ë'OÍÑ××ñÝ‹|ÀQå}n…¨á‡-X½ŠHóìÞ·ã»–zmç{–{¹_¾Çþ:B‰,˜%8þZc7v΋ò–){3 Â2…~ï#Š8cù aOѵk"Ù‰ã‚ÏÎInƒ-¹Üñ02ký0Ë,$ /¶Chƒf]%Kïá}a²w kƨ=è¦Ø»]&Pöc€Glo‰ UßoøsgÑïÇF4«RþùËñ«‰éáš»¯.ç87øŠ¼´Fbã¨Ú¼Bööä»3¼ÏËb[§³.<uH ñU*Ü âL48¿mÖ1PC·ˆ,kæŠåúΔp%ÂyØÈÕl×Ëôfü5“¿›ú%9Z~—糟/qH1Œ„µ¹TH&½®(®B×r²HJÑûÀ@‹ Lb7ñ€€š>PœzáN¿eå¶G3Ñvœof|õ¸D§ñ— ¶óÓ–Ç_Û&ôWsðܸ2l4´H¬Úú^3!V¨Ó;ŸU%y¦Éo!G]Dê!Œ«I¾Ë!›¡n‰+kZ›!"(4*aìl:éÚèÅÈØRGà’û¤è9ÀDÛèv(ŠÔöz&VÂHOç?¢só;©ZH¼ŒõÕ\&­Õª4´X/y ¼ßîÓÔ‹_7È…1dlÖµdŽmÄD2¿)…èaÔäÔ·ñ¿úÕ½+’0”ñŸ¥®‚É\ëß2‡bm~ÂÖ ×íÔh”°,ÅêST.óéØJtz½‹–•æDâ‹‘nØ“to0XNQÛWÚ »¶@oº"îë\P²ÆL¿ Ñh½,öTüj‡ãëá;z­qx»ŠÞíϨü§¦puJf6={¶Âÿ´¦ãŒ mãææ /~ìcË# 8“«Üêýx•®F€˜€ïÛ5¬˜Æ/H³Éмönfõñ4E@º‘gb‡âE*ø–pV¾‚f RYÇjH ²£Ý'QÖÒŒíÏÛµ ®IYá–è5žÖâÂø*Ø‘7…õ¼1ïÎÈü<ÅkLiþÅ]2Ì»ÕïÕ.iÕÆz;ù§$ùŽ+§ö4Bº‚zy?ÍÈùFÑ!0žÂ3š@nYêX :s8LØcÂÍ*xaºÿþ‡©°Âºû6‡åG–ã®hü"Úø' `в¡ŸI ¯Èê#•º+#æL fUW ’¾Ú1—2¢4s[ܾÖÊMé\™—Ón«é­ÓýY#Tçw !}«à%”ÁU¨¥kiâ­·)‰[r¾<’°ò}K³¼C¶ð¶ÄJ ,$ÿßKpò 6žWÎÁW&u‹0ôðàá²¹›àCÝùsíéþG®„¶e¿•þ,Â5îQ~û÷åÀ…ÙÞLÜyÈœ?ÓµåW&Œ‹0óm­p_iu©t:.È5@}Ø£ÏëoòŒ|Ñ }š \cy×™ÁdVúPÚöÀЪ £4!Æ×Yƒ’?dyÏp¸ÜI¸’ªX¹U§²é—¾zSNª\òzwž8³þô…þ}ÈÍ ÒMbÓTRCéZ‰óLΘ/.- ìjt92·ÈFDaaô¶`Ì·÷ +¯b?ˆQ>Ò‚?Æ>––7ôNbüéšdÀŸIâç8Yw dr™~GÙ®nd ÷§'ôyå7ùÑ…/™û&» êÉËþx5rQÞ§üŸŸt¨¡yÓ»,ÈOFU“Xèh-^dí I(Œ«mÚåIâú~›Ø¥¾3ëGˆM/è?šs}{O©eÑ^÷!S÷q}/8¾ ÔKK§; æ#É&'Ëï矦Vç…˜û£võ³O:Rv1B^À»ôZÃ2ù¬”4Ä€ZÙéÔ¯ÁÑ©s´ºÜiç˜Z&HaÎv„2¡ßC “ä,…6ÇÇEéÉ—=WôòúHKܺ› HrÎ9¢• É]HÏž“¯¦Ž™*ÎLßün {èÛßÐÑ*gá澬ö$x¨« ®l™¸Ëaü´˜Ô6Ãæot—ô­½{nÚÏ{¹Q7ÌI½ûŒƒU4µÄÆŸoEVᣊWÄÐdö„¬ ©ûôö§Ëe•<^FÂ?cügÁ=û³˜‘¾ÿÎàž×cr·zbi IJWóâµbd»óT9®hlÚPwÓäªò~¶¨Q_< Æ>ë Ëu˜=Û³Ö”Œ¼îHÓÐi MÇzµÝŽN}U.÷Œâûµ­= ¥Eìeâ”tœ§Ácì.zÎ`uÈÑRIJRÍçƒò6ü§É£•[ ŠÏœÉø†¿.™‹Àåy`k&0`vŠù´Ä¢÷[L‹÷¦“{‰íöÛ6…“ß©I¸m„îâD_öp%‰7ßW8³Ð§ñ O„> •?ümÃ/÷‰_‚4m{×2®ò'+´Ô¬ÞßaC¨ANì°.´Ý àÏõã%ï¥ 1s@ý({]97×~Xâ×¥ÙåCU$Ÿ<Ž½Ë¢d‡ÅÁÖ ÄXÀŒÔúeö–ƒ¯ì‹~[E9– V½:‹±©­ã^wЗж3®­1àÊÌ/Ç\Dr¨Þl2ãÛPJÑÀqJX‘­Ê·ã¾Q.¼+Ú}·…o»jQjðÔœé¦íÉ ³ÉHRä[9J&>«ÚøÑ?‘+zèÌ~ŒìÛÉDiˆ<{”JÅŒV¤AçiÔ¾ÞhOúïJm4sÏ65{¾rþõ‡x )fÞ}'@1ý¤D/€¬•`]âi»º§0c÷Ñ™P¢–$y=rK›š°¾2[n$Išæps€ðY·i[`QÙLQ ÷¢˜n}ÔÝ”¼BµØ½oÇYaÂ>‰Øïþ”ÚM<.EH—ljøê/ Ùb'ùÌ'.¸ÏõÒ¶˵!£.ƒ7Üs¹Nmh’$Ò+³¥…}ø½ ®MðiäÄ蔟ÌðœÄÇkI¸˜X\0ǘô4Oa'œœÁÞ—å¡TÚÉ+ì‚ý®÷_·«Ä)ߡɶ‹æ•&©¼vç{[:Ó…u«‚×!5–ù戃õ ™ß½¿º { áyª`Aïænü^yåM3ÉÎ>ÍJóÞ?aYüõ;äR-ÞWf‡+'›žôްã=xšî-K':ø{j’ ª© ×W¡mÎ0Ó`=Þ‰®Ú,W´Sú |’‹QÓè®4úÚÁ„«4ÈU¶Îv×c…¦ÓèP8Ë¦Š±ß“çZ,<ÉKýp%uQ$ò@ƒÈW)å…R¯1ã9œ¶2<$î®ËÖÎ0Í+sÜ1µ¥®@æ­oĘþÉH:ýîÉzÀw}Læÿ'þ™L&gŸ à²ß›ÍðFûæDÇWß7 K,‹ÛЭ±¼âžôíóùÃ2‡„êGªk_AfD¦wu9qøèáÞ¨¸  s- îAùú›Ó?"ÿv2^ÉvmøMj›>ù®Òñ®NþæH^èU ˜À +ká·¾.¦és‰@Kì8Èw/㪠w—Û%þòÌ|D‡jŸs æq¤—L'‚‰’›\1\Ü(€pw»ä¸8‘ýÑ£â?@Üá‘¥ |NNÃ9E¤_âµþ ­¦çͲöÉbÁÎx @J5ô/ãQ©ÇxL†‚¼Øñ‘ÄP=»A ú¢¾ºÑRµÔ7•B”Ð ¾})aõ‡ÀLÂÊAW¾°çƒ)t[ÜËÚÿèøü—×\k…éƒ÷7¿˜åé\úa0}šq>GU¬ŸðÛ @íë®»¥[™ml\Ê΢<3°Q}ßr©ÄBˆ+ŒØ¥UgámxWø–{Z‚—X)…‘YIaŠ,Mü‰Ž±jB?Û6"òÙMZ°"s½yæbß÷$Ñ=¿1„'C]6âAH9ï]Ó™c ñqÝ@:÷¦^#ìŸoüy $Ô]Úýë²1JS²ýR:تƯ媄nv–â›pxáoÖ¯ˆ¦,×£Á™S>ÖU“÷– ä@€Õu2†¸S¢úè²Â-fºD\(îhƒ¬ Ù§âüNo3ªhÍí­ß‚ZX³Ó«(×T9C ~Ø3¿LúÇD‡NßlÅdíELÕÓçɹï|é»K!_.W·à{ÎÄÀ·²ÜŒ.7jÝ•{Ÿ Öÿò9€=š/ˆr]%ê¸+ Ãz?årøi­l·™¾˜½6m#(-_ÓÔl‚ðOeˆ•`Í©ÒÃÝMþg=·¬‘õæg:(ÂÛg­\qÔßm$õàûC‹ÖWO;)q€šÁ<7̉e?I”žMÅÉ‹úèHì¤F”pÊrŽ^Þž"ÅŒp¢¯ú|båˆ@V LëÉ]Ö¤˜rY@<73Ò[§›']¹éèr U<e@¶å\i˜Ã=ò'k4ÐEÓ—hPGA1®Dj²Sxye 02É^­êxL.éITÖ6 ð88Z6ž 8‘²ãÄÓ§ŸX²ž|@'yIÞp˜JîåÛKƒ‹–¶ß®G”Oëâj«œN]/©è•Ïa4F—úçü˜rð}td¦‰û†Ó OÀkå?ê*æÈÆbd`‘ °00ô9»©ºZJ×y|µ:\R¿´Š·Yöþº £˜””¾’™{G(ÜÔÅ'd‚C®±rN¸9t4}ïæÛ\bÆÛtƒpUNiî/œÂŽ‹<kö«jV\:MÀÍw‡ýçƒO‰¼‹eÓŸù*†=4ø›_Gí²pðËÑÆØo«[Öö„©É]6U ÏçôW°–§STÌc%PI¶kà‡’ËF³¿´1Á-Âaæ‚ûì\bÎ>/á©lø›Ö£|]ÔI‡ú¢Î‘Æø]н˜Î7Œ7ÕsS×SäC[¬¡–h®õžæä®Ü곋£“OÏñ¿oÓÒfˆÑ0b‘·¥±Þ9`ü‚ÿ‘Ä}™Ý›ÉÁµ=È'Q|˜¸Ø 6ÙdÏzm¨»,¹áî××}P„àŸÞZ^ XFÈ‘ÅÇôS”ˆ_$=¸Ña1h&‘+ÆŠæNRr‰7ŒO³a(Ⱦ¯ ¨+DŠû‚걉À­q%.>V• #ÿmP™ü©½nµRÙ³ŸÊö»§zAõƒÝÇñðFЧI=]¨eÇíÝ…Nþ$†ÓZB~ïû¥1R‰'34´ªx2!AÎïÉ¿Ïg™´8Í.b', _?= EÈÏA¡×—nŪÆ>¢Б0xBk ¢6Àé,Í£¡xëd—²,ôSäÖë¿Kë%*É7ÞiºãeIš™ÏcRîLYÏ?¦Ì|ªúTÁKi_c-d¯@¬»¤»»cž¾ËÇ¡çÓ°äË1{p­>óÀ³pX tsD®ÓÖGz»Ð?±,±b\4@ ÷uó±müƒG#Š»@ '••rÉñv¸Ru%£0¦ÆóÃßkh*Á¤*©Ã%}ä$Ažÿ#¨q³Xí'e‹Å*lâ‡ÄŸÂ†vIq [xŽùŠdæˆó«=a€ëóÙŠôÍÅ0ŽDÞà€ùºÕŸŸÅÝóÕŽœƒMîϰÏñ;Û)ùŸf$½ÞÂq–Ú¬ |@˜¼EXßnY}tì-ÜÒS ›eq~¨N@ö°ÿÑ1N~h\"Ÿ/?9ï1[_¿Ó9 cÍPܾðÐ,b‘êWëL#¸/’÷`#~7Rg0QW¹6‰hPn‹·  yÐ(½Ï;RX;ñ–;UÅöÿÅ;2¹ù+Ó÷ßl:ªµçŸçŠB‹÷OåPËòN¡zï3fc2kÓYîU nÊâçó©z'Hkî‘øÛ!¨ÄÝ,{Ó ¥ëë!öX3DË´ |†¯ Ï3OĆK:ŒáÚâÇM*’…50›Spz!gòŽ#pÌÌPÌ¡óT>e…nSgI~ðý†·ãŒâß Täãê­Ó#£´ú’Q„Mɦ²Œk,4œLŒ‰ Z^äïò‘çD˜+íbÿ;‡=ŒÎ++&oÈÙ°]ò|Ž}zžØ=G‘þ%”ÁrÒ~ðèÀ‡ÝÄaÑeF]ÍÝ–ë¡“ÝÕÈ'mDy‡[{£|ÅÖ& ) Ï j£”ž‰îøimkŸh{uuBŒd"…¨üþã \qL†;h‚“.ç‰ÌN·Íù„GFNðÈɳa:„† [ý&<÷æ égüø€Ëlç(©>|T@0/;P;š-mÄ?ð$vY/4¾wû4°ÍJ膬°Å2¬P£@c ªGmð LV* ÏYîÌüÞßÃmkÿmKj“¨Qº!¹ÖKér,”Ã+󔯣)¹K—^O“Ž(¢+éœð¥Z!ˆˆëµžZ¦ï•ï‘ãßsPþgùŽh»ÂçS, p•[_f¦±°‹Ü¦áyñõ({tôŒ•‘g)Ç×kàRÿïµNjåúÙ_J‘?&B,x—ƒ\k´¯D5kf-Lc¶ NNÖ{î­eÆî¬ºœkv=ÁíÚ|~'ýúÄôq4äæJÕl— ¯R˜Iø€ù“Ùµéñ#9Ñ›(ƒ ü?7÷Ÿ¹Æzþ$ta©\à•Šî³© Å ßõ¥öåQÔÈUgð·>’B=a à¥[ôѪgM»“ËÔYäZÕfÖd^}=ä9:€äþ¯}e[|k»„6‘,bW€}FˆÜ£¹Ùøq ŸFºF“ŒqWXžhŸ–êÉΘ*†tÜz;|±y{ë¨xJÊ9ÞžÏÒÄS‰¨[xør3F48êB’$·8ÛKQ¬tT¡Ü°ÁÊý¢Ìórôc¹ÚvUxÁv2×q¸™ñ¦ðxd‰MëÅð6ãº,¥°J|Œ³ÐI”î§g×&Ï'¶¡Y‰ÎwFMß^PÌ ï?1™­±ŠáD•6ŠæUù›J:{R?é·ICnºèÈÈë:ä<|NùLiÛBä¦9§#J32R**÷µ©‡Û›OµuòMЋlv :’Ƴ¤(â¢ZùÚ~¸ª]INf³eõ€ðnÂS”Ì›é)m Øá<]oŠ…Ý9a,e¸#%" j‚¶wûÉéÜÀ÷ú·xš4<6-¾t:û¤q š×lÏxîX[”×6ûÿÃ…k—ºbkßþj7“ZÙňT´å}äÍË¡Öuš^Ð*wô~>ÄÜüoâáΤl±S¦¬d¢„dgÖÕu 'ÀU•¢,÷iš ¾'CtîñÚ8±äª6Îã¤Vª­! F7ùGfÏ©Ovm¢1¤‹—'¬›­ÙkvUò¨äÆLd= „Ýô$1tÀå>T_N 'ìJÎÌþѵŸíaÎ/œ“o›°f àèªPœÙG[3PwzßGm?yê(©‹&wŠ•‘@ÆñŸPãldžn¶%ü~À…¦¾êøó/»7êbóc¹R.ñÄGŽÂ½æ˜o$ìDùp¼§*]Ï<ÉÉŽö_Üõ™­.\¬ŸGw¥Ç\ÖþÿÓwW_éòÀ·2vBuéÅ»®oÇ+Þè\~­.€È»@TÕL´ï¶"–}+ª‹!1ŽìácQòw#ÑšB¯#î!9™é¼=9¿Ø"Ø8>~Šl‰ïãOf{ÕR;gŠŸ2êõÂа懼² :Ÿ‚aĵÏ`6çž%GîlÈ­%çαSsâäÊÐj²Òk3îÚâW=µGÑVóa#.íK²Ý–£Òj ¿ÎÇ€lWã­r²÷¸ß¶Y 9§˜2!YGÉov+ï—‹ÉGR™gÕ'üçAiç~ÿgbl÷‰öã¨?µ«Ú _a)Бƒ²Ý-òn6"xxQrDðÓ·æs¦%ËV3µf/J Z!„a åÖíuúW“ŒÄ û}¢RbövM÷m¡«b‘±Ay—íMH*ïU88gÆ^5}[hb½óQþDÞ…µJ~O¼§™5¦øÛ™_g .FZü^ˆ0SÄ¡½Á¦§{r¯5"j,f¾JÀC,޳8¸ÿjS5ȸ¶Û"÷Ö¥2#Sq[¦þŸ\µùý¼­…{×Íq5 J=[ù™z[ºªžÃåW›w]3 šÀ\™6¯ƒ Ú!=ò¬É«n¦ùcLÔñ)f×k+ß?? Î;$vÞe\÷Œ³r¦e¥ÎXFÍßëÐ-¹§K „xUjyêÌë>Yµ5½/± Šä×ã4Ÿ‡ï!"úÌpP€NÏ¡½ôÓžúWª4u¨¡üXÛ¶r+~á…ŒPü @•N—ƒS:ìŠäxf6™y­® ¹ú¨kã"¯¡ðÇ–ÁVÍ–Ó°8Ex0–"œ–çƒe’EóX¬B0©÷Ûx­Ûe¡³LÔgŽnÓõÏ_ÿà 2? T÷ûÂâ‚7IRÉŠW¢5‚}›¤£1Wvøn¥‡‹„Áô§€l0-×:ᣠ|A ÜTWí:Îsr_C‚c‹16–Ñ©Uot$=º+ÕQš6µ¾ÛZ ¤MuÉSŒ-º€”8ÆÂ=á_ =¹í¬ö/w&ØŒCÛω¼HòiÕ¡—!Ú‘j 1OZçvàfú ø¨ÓoD§Rªh.Š·w>>üY,×9¾B´%UŒ¥ªü$­\ßýRÖv%hßbòW*X*’q’1§ùºDEñTHt…*:C™Väeå 'ÒˆLeüz9õéörA‘¸ØŒÇˆ6ÀAÝ&щ•`…1¨Ü”‚ZVáeb±RwOäOæ,#VJ7¶@Ö@´9€­*@s#7+1+Ó=é{œ¥È×{>ëÈ ^¡½7ïydXÕP\Dû›ÂX!ŸC#uÞjºÆ®ÅzLø¬™äÛ‰Å(½Öá‚€š¡o åPÀª½"Nü¬—nbdþ ãµØ&ÙµõE'ÁR=~§æL-›öÉ4 äl <1å/šðrj€<ˆ¤"28Â÷ÑøbFÀ& ¸ºe©Ñ²óÄËãî`˜$$ÚÊÎZã;—cùŦÍ2#¸¦›·÷;Iµ¬Ü^yØz2åßÁ€ ¹Ê‚ªÿ‰Âû)ž„«àF(:áïL÷ÛîÍy¶Ú»ž­õ À‹ÇWèË÷ì×Z®•˜c,䘚1ͳ ˆï Þu´S¯c€$5i½ÿx"6XmKUZ ¾“JJåïÉ…Ö?SW.åÃvú1(ur +MRÁ‰˜kZùy>÷ØÒN?z;‡»ÀXŒØ™ä2­þ‹>ÕÔ-«C¡â¯­oZ?zN;iÍÇÎaþʺ-PCˆÒvi,Ò÷凘—Ã>ª|^Wö0{ ªxvûé}cMâÜe ‘Ì>2+~³ú?ŒrÑ4ŽŽ\ œÉ>¿h•×M2n]ʬú":¢Ng“0yB(áç[)ϳx¸ú+•Íñ#K#Ìz9Ýþ hi+XUöß…ÝãÖÍT5ºÎ&™&rúDöw 6IÓozÁRµy9¼™ãÖmßùäeØè½±ñ›ûƒùTõPÜO‘¾±v‹¬@öÃ~nê ‹-’«;Õe•&*Ï0º®¤÷Àe…&9d‹Ž(s`¾>—Ö­—ˆª—ÚúЄŒþ«»tYêv{OÁ²ÂEwzTm}3]„¢u0½žò…ð,ì‚9ˆ¨<òé’\"haK­r&‡©þéœþw€°Ã°!N˜YáÕ¶*Ö¿®ììbWô¯»…ìÛ[ˆót’g’„BÒ¡î¦7öÙ7KêµÈõ1'.Õ4~? BÑ­‰[UÝÒ#çá䤈nk]&95UM¶ÌÖ+ù/ïlp͆_!h­úxGØ@Máý°‹ñ•ƒ½íÈšûªúÄì%ýÆÞl´”¯÷X =h&²Žjáü¹¼Ô“äeÓ21­NOÄÃ&Pee§f%šŠ»{Jäübk³N<µJâƒeTZOÕ¦XÈ”³M%ªh0ÌÝìA¯73†*É·g0Wªù©|ŽlËd¤*CU:Qÿ){doú=æiN·1½­bÏù“3¬‰MŸZgÊyämÌ@Ù§ ïDK—Pâ¿bdöù· €÷–Ýîbkïyr‘Üòƒô)tTµ¨]¯ÌóÞK²¯AOœ¦Íoׄ´µAØãeUgOýs©ï:‘Z8sV¦]3ŠnþSü‹´/½«VžòùrHŠñ*š®‚ô®èŸ‹ ¬Ø^o,äÐá E8y.©œ×½”Xf*üea ^ÎIVh3ËöÙß!Âì0¸äP®=£a¶¶+Ò2§»–ÆWvJ™5Ÿm%ç6n†×{BU÷vk 1:ÉïÅ|œT›PìÆÓœVÊëgÙyZ‰¬ôþ;|ØáÒˆÿžtÀR‘Û¤",ÑIÞnº®-ºË¿´·cP8Æ¥MhÝs·V/·fœæ.³·éå.RˆÌS9‹îÌ•çÃÏ»„F§ä—#SÀºj$’¼ ‘8Ñš³šúò{SúLßûÇ([k‘ÈɵÑÚúΦÝê®ÐTz«¶!Ÿ¬"ŸŠn.zÕ„TD­3ñÃ|å&*xõ.QIè‰Ñ‡ÊñŠï³lGLJr°ÁÜ$By:ªމÛé»~Ï€•+¡ÜÉÒù>4À™oû0œÜ3 PáMu‰é¼#Žy>îÕà<ã‘ô‚õ'øÌlt¯¸úlVæœÜ⣴Èè á#*o í¬QؾûTiœOPÒ#–܉ÿg'Ôåþ ’OY§¹~Þ#Ç›ÑÞòÞœZe0œ}ûY;¼Ï¬Óö‡dw‘Øl$ HŽkJÐÕä·Ö=àŨThë§ 5ž2Р‡Í©†ÒoZ$¡ høýÉO[¨­ñ×büZQ­U©ä MTª&bjZõsÓ#Â9fþ4OûJ«šKù+î q˸^GUÛÆÙdt#Žf97‰›bc#\[gJCªPw¥þç\±—fÇÅÝê‘=æ~õÇx°qðèw\í h´G+I}ë©ä ’ÿ{¾iÿCÞøc‹ñ#ø ¤¯†íä@VógËëød8,/¨ùªE0IZÖ¤H%<ìÅ‹î+vþS7Jµ„†g–\Úú-é³ô2 :#”§Æ3¯më¼t÷DÏ£%\”r@LNI|ðU‘Êü¹ñ»/‚ô Ì7`ÇÉÿž °&3|'†° ±Åü »aé(Ãk;òv_ÃØ‹¸‡ó¥=a‚Æ úè€'Ùs·[’6Å·áp%‘éÚžZ«¹]q)î€@[[6Îg¡*~} 1(=ïÐ7ôÁ}…êB÷Vi]øº2­LDså> KÁ~ ½„$ÔПؾ­Øhˆ% Mó/^ùCÓƒè¡W¨g1e¨ï½:6³a”#ÕOÛ4 Åáž¼›‡DÄÓ0±›ÒJrgˆÞȲFÜ8[û^·u“àýì\87KTv~Vc41äÀNac,ÿ}÷°á´þ ˆƒj®O ŸbÂR£ÇUŸ¥³ OÇO;\þCU¡0ö©—b;Ѧ]9ŽKÞ˜ƒqß$øR±na‰ú¬×Dª(XXtƒBz»e[”{…µCW·mKÐÍ2@%3èñ³ruk©¨íçõá³úB ;©”’VЧò_\p#^0 \¬ÁÐÔ ¥)" orp)¥þœ‚œ±.””sétí ™‚yH#t5D<ŒôˆëQTX¯V®!kÅð„è¾Å²AùÛ°µ…ÓTáÀ?*”ÓæÿmPf©h\ÿ¹ýx‚wÖarÛ¿0k2–[‡ ÈØü2Rì=˜??Nz.zl”Þ9ߘî>„.ÆÀòn‚Sôæt«ê¡FkÅ«·¥Â0öGsuõ\lœÅ 5Q=…ôàwU:æðgÍ—¤¶Ë§Üë˜8Öp£÷/?¹~ÈULQ†)Ù¼½©RÒcï»q}—ˆ¹5‚p‡X$mjXqp=¿I5Çdðo”ºhZw5“&+ÊþïáyœÛºçVZÔÓ¿)ñ%F–×’PuÄjÑ“d’((úxoÅ;vƒdJü¿o¬ÃoûcRÂGïü/¼íVËm9a4¢µÞ~LÐÍÛ2µ\:ÿ0(Òú9“ךí–þæë4ÁƒdJa—ˆÃÍÕ ‡÷Ãà„4$êîÌIè: ¦„ÆuÞý÷‚Ó6󄤸Áñësö8¡@é—ͦÜÛ£X$”Òœ­¿5™~­?äEð%ª'Ìuü þÆüN-‚0÷âº0½·HJ 3kcëýÆ[“°¨"Ÿíâ QéûüŸKÖËÉÒw$Ÿ (7[šÖª‘ä¡f qME¥×úEŠ8x&`fZ8Ë®æ’ùŠýúmš}œ1wÿí/¼uiÛ±ñkx1¾(õÃNÞäù>3Ëú8d\ÿ·tÄûüò¡Éª4H™ÉÈÿ{;+)-ö¸2ríè.³¢!i±px¹ÚU–\ ¿°Iº½nQäý=”ÇÍö÷;dZx=‰P}t±ÉÀn|5!× –ó§P¢¶ïiѹ‰9‹k—)þ!®ëŽÈ%¤œÀjýÇizKl Î_êá0úä^,o•Êj¬ê«YùnVácKUF„ÂÁ]ˆ  <3eÿf’?(ØÈÖ‡ûõè¡ýL’ƒï¸ ›Y¸=“ÿí®8À¢[t¹”YqŒÛK¤W ²÷×to¤|«~¥0_5YÅp¥hØ.ß|4†oM‘ZäÈLÄÑÖMÉ”·ÌúsõhzJîÙ›8Ú”» ãð Š%‹à¹E…¯F¡Yÿ˜[ØÇºJ²pµá³hó”OËþsŒ3l6Õº£ù?7}Z®594êõb­ƒ¼“G§êFÙ‘¾]ڈˣ˜QÔª¿WUƒ ç•vh:ŽŠe³BÎü†4ÏâjÝc¼]è—œý·ÚpÇ11¿¬]¯&Á ­A€î•Úåà¯4KVOÃrõhR”Xè§ †›/”qWý· E¥Qcƒx—ÔÑ5¬s{Ï#d7,FkÖŠµ#Eo†ÑCErOp ³þ†Å3f¤c øí¨ÉfÏJÇ,1Oé"’ñoY]£yÇãC]]û`Ø* âûÑÈ×i#]GìÚÓ“}êm÷iÃÂ$kÜOl¿»šü?÷Œô"·GŒïðsÔæpævÌ7ÚfI•oµb|l§1õ¥”)_·ŸÔæ¶Ëñ‹bˆF èõG%µ3ŽêDü1ðÜ"êÇå%Ôñdº9Ì•¨¡8û(Iù’5ÁÆ6b¯v…W®îÂîJâ45½Î_y5LüÛL¼¡k¶/P3+ênz,× iB׿¢Ç‡Œ,fà„ü¯¯ñ¶{äÜ Hdû;!ž½îÛYsa¥Lž?“TãÉ ía£ùŸwàHD¯¹p,øõ+Þ Va ß*¥hÝwZ¤GÊ“º= °bÉ=~®ŒÌ{qFRx ˜ìYfe ñË'U¹¦\‘ÎæBf¶ýKÙVã#£WøO:X1D´µú¼ëmD@6}&SÿÔÅÔØT»J+ñ·‡nFeLÀV `ýœ»ž6_«ŒKßšª+ tçûò›Çÿ:§Ò¤e3’t.¥Ú@¹P Ûh›™^©ˆâ„4Uì­L1Ë…õóûÙ“} Ò¸I¡fÄ|à˜Õ“ÕKLük˜´Ž´R%$Ì+¿ÔZe•ÒHbýW‘;ó 6"WH„cm»1éš#+Ȣ籀¥í8˜94 ¸FWt±'"VÕikfQªôÈ‘l.€ï§d¸Îcó’n '‚; LÚÍ׎¾úDZ»?œû¸NýJÅhòÿÛBµsÖ³+ Âa8ÊôýÙ¦M³‰e ‰xÛ÷–iÄœn#²:ˆÑ^€¶¦nŒKÇy£“ð˜!vÐy©:²º”iØíÂ`‚tuÜÆ'&åÓýÞʃÏâ9ÿ^g‘à’ÔÝwÚYè]]ìýž¿Ï³ÈÇÒáyæJ0™h˜P4e^mt}6Ië ¼òt„ uýPül¶³œìBù.Ò'TÚB};&ÏKúƒIÄâÁ¸øaœOçœfPB²òáá‡?Óíö^Bñ«'–8<—5ÿŸNxVi+ûÀjŽ‘b><«É˜W–|ÍfТ ÃÃ9Î ¾Çé:Do~§îõOT· ëó8þ­0Ó\µ¤‰a£X"ÔLqŠ }Gažoƒn ]©#Ê’Ÿ²é'Æe¨›a öDÛ¹QÑ~ndÚN¾³y¯¨—&±o‹áHr(JôއV{{Ëd »˜™FùÀ·h¢²0¶uaíâÉšI¿·WúžÙAñCÑ»§3oØ Æ¨R÷$"˜ãÔfì&§LˆŒÌÛà°êBŽ+7Ùz.½Ç÷²-R~sÎévâqÉĆÄ9,y™5/$:'o Å”ˆßç=ΈCàŽ«®ÇŽ Â½\®ŒGˆ•vÒ™–ù䟘s² èø;Íñ˜wŸô¼úzî"våÖ—cÁ)*µìã§d·aúÍ¢xºÌ1¥öa`ø”.'¶¶Zxúé:aF¨èÕ[rØ;R¹ù‹öÛ¦Hæ“dÊBú¶ö¨-ð~ÚJ‹¹ óF¨Î]^ϾF“>Üʋ™;åKóçHHž±÷D~[ Ñ­¬†ÿ·7èÓ"\ ®‚˜UÝÕjÇQXy•œ3éCðh6~èE2u¥ëÐʵ‹¹ˆ»¹]œµjâø}jÕt2·=7)BlÌ+hÏa"< »¶%ƒŸÊŠ­,­ÿÿw§_n<+œ2ÄvD¡«BxT5¼«ª‚Ÿøª[>¡ ÊÛ¾ °é‡Ü³¼z©{' ÷Ì÷zrP·úsc¡;ýe!‹T$ÿµª2^¢ #̬Qþܺ?²QxŸúA/Í=² •%_ Räy‡•l7Eáé o®?Zh,§[dÓw˜uó2Ò¶¦Äd¬t2ºúˆM|ùhîóN²Íðhjb•c«´ÿŸE;ÍâM¦&Üþ烨6ON0!þŘè–EmÞ<׸´YJuD}úrt1T/ÿ±çR™qå'kŸƒ™IÛy³~.m?ñÇç 7õ±{Ðj™1‚@ÎÜz+3µîšg9©æk:ç2ÓtÔîmJ×Id~Ýp¬.äçT|l´`ï»Yüv¥º¤uç˜64ŒFÜ<·¸>³Þ›»É)Òv%1YKÛ„Ô¨™Íœnò¬¢…ªçºygë¦ÀRJßuÚÊÆ‹ÔyDºJg2HM«=˜j"üjº)a=;a®¯#4\±.@ô@U­¤Y=cÌ>×…ðÔ12 ‚¬°U,G¿ïbœŠ¦7qãeõ8r>yº”«±=Òm,¸šñš'È Þ½2}cü„Óozê‚îZ’†>a=¶ÿ7g[Ç[iV Š9 P Xëå$6­–‘ÊœÀ¨…,ÿöwvÚßLÆak}L’üd‰Â„u³T’ÓËíK8Kfªœ…×f»×½}@­ŠmÂUÚ¾¢m¶ Çì›x¸p`çöï-TºÖ§‡f+qÁ‡õ ÝÕyRÿ¢WÛ\í¹TtTÇdù­h„+æBfœ÷M/úˆÖs7ñÄÂê€1{r-ê%r½FÆû¸òj愤ÚZ`{ׂc¨²‘¸&0¶®ã.ÄÜÙÜ‚ ò9xhLãßφ9ýN”³v¯ Èÿ¸4"jzs¹Í«×È¢µv–`SæãNÔò±s1بqZ÷#x[/×—“qk3.nZjý2Òø“”¾ Ça¬Y–$²ë_#½Jßëì×ì„B¦Ç,FÔ~Õó¡@|?·ÒD¨k_tmž.ïõ‹Žr®AýzKq¯¹GвÉg6ÑCçäÉ̃JtgO°"ÞC/¹a7a2.òpºð>ž³¿vaÛ· —(fžÇôzh»U÷2¨('x”׊÷Ìx[ªe·ŠÉuº¦£àið(týÅ–Aªxž]µ+$å‘QרŠáÑ2_ƒ­LdéÏÚdê@üÎÛÞäêé÷µ³Oýº›ÞK@çL°TkLä2[‹ç.¼Ò¤éfÒò\7,Èx2>º­öߣ¦Mäµ$Úƒ)I…ÑÍ~gÿé„ÄJžò[ƒT—ºhñÓÖsÑsbuÔThþ¢€‰œæÈWË‹¿wEAŽ©¼€ÂNÈ0ðña‚?.ZÒÜE!mâ!ý—md>SŒUGX+â=s!´,axþ›xëÕ-µo:ˆ@nœͿy‘×k“žËýío8ç•G'Ù<¦_~ÀÆ™ »â +Á¼d [ÓXMé>`º ôOeâ¹=­‘õ²’¨ŒÔ,[\½}Ï8ÉHAª÷]:g¶é…RS»ÕW<@ñ0Á÷½^èŸ%–†¨ÎSN¿Ø«eôNžg+ùã²r?û¿vâ¼ÕZü(„½§© ÜŒï{Žƒ’PÛÆ¼¥£³åH ¼À˜Äþ &²ÿ.ÿ_á¡Ü/kÖÑrúéWª¦LÀfÛ5—D&×7ÏÖdvž&Uý'òßÚ¤C=”)AÛÅ¥ F;„@à½G_WuF|o”y 7R×õV6L-ìYšÛ{:Ë… uË„†×˜_‚×WÍ yÓŸÓ–ÖIñØÔlL$m?- …A÷›4UåYÜýKúœi„Þ,ªƒrÙ9R§} hU•(Xe–½_ÓÙVV©_§¿3l’|cíÏx8ÊYuë5)ê £žS.£µÛ„3©¸Û´ÎnÉF±lʳñ’4lë]Dö³û­¸¥jüþ‘U ¥zÑ·úHµžóô"I’o}•hÀ} 2cüª<üñ0>µ÷š&N¥xeÿÙ¢áX«¹óei‰q®ÛI©dê¶¡ÚY=·¤Uß‘¬¡[£´¬gï«e^MSµL­ 3ã«ã¿P96V޵øÕ­£ªž)Tý¯Íö®¨ÄóGUNm¶VÒ^O»¼VReÎDñ-pèö·Ÿš··Þí?–†à'rõnŒ…/ÎÈ츶¾þÔÂxM^­™Îç LÅÓq|hîº'$Á1"Ârþ±µV‡îò7ýþò!|åçq‹6òÐi”Fíd`N~ÿhh÷×Ff;ñ:ýó@ªž2’yŠ u@¶2öê.·YsiXu·P½ç³$¡®Ðc:³­*µ _ G^ý–`§×îËz–“Áx9È3wápvŒ0¾…h“.,•?Bú‡…¾÷ûÜtÛ£µ+;Ù¨:X’g¤ì=Ê&çœ4:#µ´V®>–ël¨ãÀs÷ú´–²Ñ§X˜oBd¥unUYâf°}¿ô%_‘N”©š­g¯¿E¯^k8ÈþG˜„Ú?)|Ê­bοH«§ÑÃû_b×ë¬ì9[H`N_ßú|iÒ¹hdlTñy'1~4zßU«¼ÙVëùÑ.–çT§®OàV&V+A‡Ó´©=“m“ÕyïN»%üêît£«Ê<»ÆüXÌ]¿O8Ôßä#žü±«‰_È…GNªëÚgê€fÜÌ5“aˆyàÅ¿ùÚ¬§”šû_19 Ïé :4U¨Þ,åLquîJ¤ÜÞ¾elÞµ€Ÿ‡J5¹[È` H+R¨ Ë;¨ª~2õºCî¥?ÿG$‰µ‘ó~‡çÕƒ€tŸ÷zLSºØÞ^ùáÅR( ·Öz¨ƒnš›2úÎ9mÀV©˜-ܕʌ‘ªTQ vpßë录¿7muDäH!þ­[ü*m¸¤ù÷ÖÙ:\BÇõ­aïÿ00ü¹†ÀAn§WÉ p‹ñÆIo{ì$KÄßâ]T(¯´;zö±*éyŸ°/ÖIˉ¯¥6˜ßâ J«;Þr·ËLÚ#âYզؿýáÞÑýƸب4qµr ÿ[n9ôˆ $ÂðÄ_¬É͸O<¢Ûo©pÈÚKÉ‘Qã÷«T0Þ«üÅc¡Z,†óºUžŽÖ¾—ËRþ«À¿KÏ“P`§{Pz0È .Ëöh£jÖÄÑä-í¨zÛê©0¶<¬œ«f¼2%¶¾JB÷˜T¨Yqcß(pï®ê+ë~s\WeQÙ¡ÙÖnÈl+‚5Ÿÿ+Þ5§ÎtéLc×øª‘¥”2• ¿ˆÌëYâ¾WBÍ0ŽXaéné&9,–/)Û°ÏB+¾~ñõÏvNêeØê,DQ'^ÿ޾ڵܷ•Ç·†¢†MÝ5{Wò¾ ÖmÇÿ[Ýx·kÇý^ƒ£Y~ÒÝLåô9Äý½Z2W†¥¢÷û‡9Ê稂ÑÁ­o>–þŠº6Gל½alWÈ›9JU¼"˜9õ /¡Iføeâ–Ù½‰Ök ‰og³xßûŸ8ΦÃLwÍ®*@ßݹÍõ6kø³[m«KÕv‹öè~c¿©ÆŽÑ(ÜÈÓ¼Nü…öxÄiÌ—Å‚ÑÂ}I[1à3Ŝό’cé˜CÔb{ ž —ª4Oh¦_ßk>[7·GgâDn`–Á£ù¹­{ÒÔÎB+zñö`çK½‹Á»‰ÐÍöàFnH¸mÆÒJ®©¤{ñ>N”ÚÐBèi3ž=X ä¸?f¤#?öy±Šˆ´¢«ãÐÂ:êÉÝ6…KÓ+«ŒÚí*)fŽm¯ÞFÄ¡ôâëæîøí}ƒFþÓcâžÝÛK¥W-+@Ð+“1’á$7ÁÿáˆSjUGÞNùûÊ.Æó’[fLͪ-…­=é(õˆê«K|WYݯý>H¦oùFr"$óÝd²H@ ùúÎnŒ~s¢™ã¥2êQm¾ >@À°>üÔ©2Õ þÈ_Åa 1NܘklœØ&’ñ¿zŸÆpB“U«ÃYЉñNwñÝZdÒ:…}¹ëÏÀæâÚÂ-5ü–³éÆ`V Íwª™ËöZÃTÔÿ¦´±W¯‹<²´ÔŠ+ömWšo’Iù&Ó‡»]_£É±8úÖ¥R®£îÑ‘œ6Yà€þÛù±·Hõw:a7.ŒôÓÿ·eõôuÿÁ~»·¸%[§Ù‘—“X©eÈ#ÇØÓXãq$B¦ávŶ…—« ”8((ú>-–áPÒ¶êtwOöz¨ìͰˆzõ ¡ÕWnnšÈ ÇÊÞýׇÝ÷õÇø±ÐjbÑ2ÖÓx¥@R_éâ@…H^t›BpÔøŠ„nƒ›ØI(žÓ—öñ)útOT-~Õ#¦—faÓõåñzÀÿÇ8=†+T÷»g4~ ®ã:úèWÈNLç¼lƒRE僔¿žKÅ;›Õk޶É&……5<ý9¹ ‘áfª·u4Œ>ÔA’j0Ï?V¤£w #^©î/úþ›Yâ‹ÅZÐË‘ð/…OAl'϶¼¸ íŽÓ´ùŸMÂÊŒé›pINgù:¶§­ˆº†#ïæñ¬ö‡F™²¤„'–›ö4¦LŸ¨®~êÚeã:oß«XÀdtof.G{òm±ß§Pp\:Â…ƒ¾Ò_†MÒ,oC×Íâ?gÅRï$êQBÛ+îXRCATæ´ÔdÅÁH1‰%:í ‰²6Ä&‹¶#ežÌ!žÙxn¢ˆeâ|™oòUÿÂDCÉ2#k¯ÆË¿y²»°ÄiÖ|*+5Å-jZu:›ð„Ο`Ìæ6|ôÍ¥U±yÕ)Lmßѳ FOLÜÝ €i¨Œ¦/1A[ð·=ËŒ‹/m¨¶ð±.ào:Þg—²·*òÉÞãfó° +2ÀN@ÊôŸÈl¨ÌÛû¨Á®ËTxÎ)œ”&ØÅOoë“ì:ªMÅÈKøØ=Ü4]ú†š9œ¡P ‹”þÜ0#2:³ù†OÆOÏõoUï@¦£á:@í‚z¼1 +ŠsC´ÅÀ³}æ Pö…HŽvíO‘p0 ‰ºöé˜m³‘-× â»tñ‚Ì$¾ í3ïOÑËõЄéå$É2#䇾LÆN5®Ç™[1,¥:àc–÷ta0µ’ß+ÛàÎ8±†žauÓ²³GîsyâQ$À™´y1p L‚ßÕ ¿%ÿÜÆ, æO8ðu7Uñä«É¿Žöƒ—úËhÅÆã¡íÒ™:Ê9²÷B–x×+¬ò¶cYG‰k+íŸ,®ª,DÀ¼¼Lÿ«Un™.5æáI.¿ºÃyM‚üvE4ty`/óòzRç07òP0ÕL]ýø”[Ó©°¤úú ƒfóƒ>Ü7¼Yžçû¯ßOÄìzˆ;Ýu¹Ü(k ¥Ì{—Šx"E™Ú¤JßžjBo#ª†}óÆ€ž¦ÿ# ¹9gÖú?Å'éÒI¡cÆUÈ¢8ɯwз}aË?›e‘B±°Çî#0ˆ—Û|ôHuõX_õJªî2·sK[$êSçzh•¨Ôþ3#©´éøßaêÓÒKÿçÚ‹?€ÀáÞºAäÏß(¥àச`Á0`éu=QÇÞõ\Ù/xyo&²2Š˜ÿ«†·Ø«n»†€Ü~óâÃW.MÒu»­”f—Ã-“Cœ9%S ö”¬°œôËââìSbsÓ†ÇkR·BÉçqeTðuó9—îUÃ~¬²gQ1D°6+P1`;+Ç'»ý¦bZ ¼i¨.eáÿ ÷f‘\äjkÔV‡²ÂŽ•Ë4*ñ…Õ*ÄTá¼/äìgãšJRG ;ÆÓoB®h“÷¡¿NƒOo†ˬwÙÆÝÑk‡#:]&€ðs×:Í{ÑO€¾¹:y÷’!ÆžF~´[È£•F6)Ê÷k)oz+ëà¶0|AvoÁºp¿¹AU×N9ˆ“ŒÙ,8‡šSÌþ¾8ôÜMFÞÁÓ‹&ãJ+Åô¥Vò-ή¹ã_ÃãöMkÔñçJ)¾|ýð&6>v yH½äÏk„ÀñI²aSãi¥H÷nJ(>y&ߥÜßy¾¥ÄgAÎâ ôÏ$²²šW3˜úèk=Eñ«€R­h•ÁØjÇOU,òcé%zúð°µi¬o´7‹ææ&ñRÏF~øV· ¤'Íßšò¡9¿orÕ!Û ÓûlŸ"DÙŒf„r4µ#äpš#T@…OгÁÏmç^˜Ò€›¡Oóh‚Ü/·è<ñ£ KºÙQ‡c´å³ß3X=¹³¼Cê©êÞTZÖÕMê''¦eokÂBk0´¶€.ÕÚ•ÝôıΤ [3bó+fm`|[›¢Œ3Å„ûo/ƒ?áé“Ýáp ÝüžÞΦHUŸæŠ©.ìžój|S_ˆ)×-VaI4Ù(ÞÿJ]Ý´âÃèWÜ ¿%¹† öÔ3›š¼%4qŽ^˜]Ýd™*ûxˆ¢e÷·}C).¥.@’coÏÔ3Êý`mIœ@ºlŸ¥Ç†0-T5{„ÝÎ*PÞǵ=‚_º D ¹½^°äă¦³GľJõŸ7¨6(*OïºÚaawÎÑé‹™»$F˜Õ¼î؆Âþ u®Û@öe²Ô –ÌAaÃ2? ¿ïÉ¿ž÷ñkì,Ö?ïÁ®kWÓ:Z&hô¬¶Žbïo`I„cÚɶ^3ã¦ÌØÓ1ÿ&Ÿ¡¸ëTƒ¦r3»“3z®C sY«åd˜møXßt¬³âJ'f³ÆIvµHûEÀ<%~»)Ð@éÁ<îBÃ3áž>ó Zû& f‹^û󘼤®+2÷•)Ä@g÷J¨½iKØAA;’XaÑóÉ$¨•=£utü¼ø¹éJÛ‹’©ä~ãÖ/އCy?ú –ã„‚Ì~=Ý6èÅ{Ö ŒPL1˜uÕ`K[:y n’ øBG%I¢ý½½áËÍã:ëѪâàÞÁªÛY´öO3>o«–CJ/?íž–*pâ$&›(¥Z¢Aúüêd|^è:âE–Öü_ÒåçUâµ$âðòÆìüj¯tæ/%ó ¢öO½Hp_c]ßéô†/:V‘.¨Ñµv*`ð{™WD*u^€7!´ ™jU‰&Éô÷aÊme~†Näà Ü0׺ €( »7Z¤Áå÷xéB2‘5ꂜú$6 ¦8þp~ÁF˜výˆ¥–;ŪÔ+ÀÒr|Sòv!¨Ø*YŸEt¥‘¼¾£©ý¨ãDÃ]¥7^1v0Âÿ) Xî õy¼ÁÖß?sÂqµ]G}@‚¬J÷ý‘››¤ SŒÿ]4ë2^oÒ4óDïWÁ§¸âëãË66ï‚!Ñ.ïc«gêÂzæò-}³´xy_Ú×-d[ßD”µÍnV ºØÃYVéÑqäwbþ+îbw•ãÚ»Ó͈ëã¶e·Ò<ÖËíüNSåÕTÑ­YÞZ7Êû$©ÍšrÌFº©Ù:°2êáÜ©­ô\س4TØ:Œ½²³W²ì÷@ êQªª ׋ø –íŒgÓÏ|!úÿ÷ÓotjOƒ>ˆ=ñuêüèÿ鵕­¼œô š’´¯€6òÖËTš¢{ˆ ñþ}-o î²:ÿNôú‹c_ûa¥Ÿ¨ÁFL9ÒSéýx³‰ô%WkœoX´¼Kï §zFÉ™xvã?ÏTmÝWˆÅIA1»5‹K5,G[4qh'²¶ÓCÌ;UŠn|"š¿ñw~fOv%f|}%"߯iý¦Y‚‘g¼½zè«[üˆ—tíu†uÿ¦L `R ÚÁûƒCj‚Ö5›ÝvÂńЄKˆaòØ·{¼å’U6øOE`ŽŽŽ‰ˆu&=è5M%¨ãZã°&¨Mó~ð‹0_"i‹’Ö†3È[Å|äÉ!Æîãâ¾õ1CDãU4»ÕRýU!I¸þ°; ‚ó!ü¸£´—ÂL€H°?îw‚䌱»³ÉÉ"fû0»6°_ zæ×}‘ÃÝò¿=Óyb*êò5µBôgZ’ò¦@,€–OQ}ò{g–¡/ >Æ@íûŸÎTG§ ˆ¦üHi ×Öµ9ïÓÿ{$Сï ü3Ä Ë¬‚'¯˜þŠ×0Ƀt[êŠæ¾¾L­ŒjšŽÂB³Ì üÌëÉRŠÒIÃHÒ »¶Á(ÎöpΜrÚ²ŠˆÓ.  'r}9œ´ dV“’Cjë»9Óïþï‡Ô)ÈzýŠVO¯fìKý—ûîþc&P#—ý£ ZÈLÜ[0bCÛ_¿oô¹k¬ªõÀq™Ó/‡à^¾†£I#mt[Ïh‰‰Ï¼&/ÙŠ7â•sÚwtõ8•:¬Ì®kò-ÏÍ;›…žǯ›gçîÎNÕ»sùŽþä/È¡mORh¼ÁtvK5ç°ÖˆÕÍ ¤™9Æ}»È0;¤Laô™Ù4·¶•}¿^uú7N¿r¬¥v©žü-qñëäÝÄSV›MäeQ)¯·oÏÐåÝ=uûÆÃ…}ò’dA¥$ØŠºÀâùf/yF§YG¶•²B°6šw…ïâ…æ”k¬~Ró +ð¿2w'V¥ª½ Ê¥€Æ©r‘Kû†Tï—[Í׋л°šûíÓú}SØuºÎnuN0 il.v<ùh¾ýéÕõ]ÅÅvÕÓ±ÌÐ;ø“ë%+9%G ¶S‚Š%KK‹Ìx¬êÔâX ÌŽþÌŽ Gÿt o³<ÎŒþÌö†EÛç v‹}2f­×e——ƒúùÌ8-Ï2©H÷jFñö³Á}Aß>¨ƒœ;íŸXÂkÑJç ùâû%éx”¥X[äY÷û¢ è­Ug;µ§´dµÌK¿ò¹šh±¶Ñ–GkSnÜKÖ:D3Ä47 $% wóï8¤ô×ç~Éæˆ/óä~ê "Ãq½ð@.Åd¤Fym ªQW, bó756+‹fÌlú~Ÿ¤¤u$W/‡¦;¯Mä> $Â[:ì3Þ. ¹P$ÈÃÀêkua•\Œœça…ÙÇäÙýY±Küuú †Åª¢G‹`¿@ø€¸ÈÅÿ'³ºùdÀ4_b&±M´WnT~¹Ê~KÛW&nBë‹M¸tðÔ¥w¨16º 2oŽ…ÜHNyÇ3qð™}  Á­8T MP¾5­Àóû[þ¥î°På;9,¦þ1 z«Có×üJãC…íü•‚Ô_ªÚ€Éákë.ܲÁ6ÐS`ðÝ%þd0S “³QÙdïþ“s>ŒŸ«+.¬É0Ëh±or×S¼¯²ß“†_6ðjW1&ÆdÇ]µék§ýRÆù[Ãù}â©ø!"yO¤Ì96ä#$PŸñÞ!—Ó Ð.ì±]-K?!$׿?oœ`°2_Y`÷!ˆíü2u ‰d”6ÊýKÁhÃ㘅Ŧ¾…I¬‡öIãIÛídU›B‘nò¿Oy ˆsS¥—Mæ´¹¬•M‹Ö¹4JßWò- éŠëö6"ÌaÛÙ ôÒY·«ÔåÂòÑ{53ŽL6{%¼B‰l^iÆyVXV…Ý|Gå½ë¯N(ýEñH’öìΠ S“å|gÔ«bCý{æ&uáˆm͆ãJXpJ}:Ó9ÀÁ¨ÓJ|sþ4µ ˜uG5]>©àÜ 14ýô¢Å{Ý>hÅt0Ÿ›v1&黢EEº]l“ë³eQ‰é"¸Ïü„ööª‹ûÛIbÌI­ÚcQ§¸•o™ß˜ãåÁ‡ˆ¬SšÓ>a 4Ìûß¾‰^ùS/ C3(.VZ…‡ñ æ=íªk¥¿6ÒŽŸä$–cYÝV³MÔd¤,¹JÁäOð!îãõE8Mq£>Αáî\|ôŠlɬü£q¸j¹¯IÍ-$w»u&[äåõȈsr®awŽø3)x•1‚ƒh´ ` añÅ…¾ò~×· *áO ˆæÃ“{_êvÍž³½Wû  QÔ:®Ò³tPPê3ѳ˜ðÃôµnµñÚ ñ³rG6ùÙ#>.¸¼­b“¹÷c®­ÿbáYÕRâpZAÜ.*ú£,Ü«ôÞ܂Š­2,;+ NÌ›€¬x+.Û5_˜Ô C¿žÙõÇÔ*ßBúWy •±Mº¼†'ñ&‰óP©¿yGÚG(•›˜e‹ 1Ñ€33Ñi\˜žjo‘yÍ-6D"£ÊÉÞÅ÷m"à”›ÎáÞÀÅ=ÖÈ'b’•rQÐþ>9àrC»Cð cÕÄàÜ–ÀÔצ…ä›tTÇEæ:Ùë7‚ ­|CeJ¼Á0åoÊ0ÇvF*i‡ðC2l†´–»‡Ù_—ÁÄŒVYÆÓë-ÊCA1›þ+o½BS~Â\“â|¬:V|`{`äÝ…¦¹T*MŠZ®+Ò’F®ÿ’6&pº†ÐRH4ê‹Ã~‰ «äãSÅ­»_ôê}ìÃËxŽòïé4¼ËtÔßBɄĉ§ƒ@©”{‹Û´&²Æ“D'“|ˆö©—LJšu¤Qð6Ì9Åù^5üË ÔÀí( Q^p X~ÈqÖ¥¾Jéæ|r2O"©B_LZå—ýhQ^ò= å6£Ã—öºkûø‹F­wÅ'ì4Ær=µ¯p¬ 3AŽ!<ÌŒ÷»âë½ÿêKbÕa[ƒÃÆþ¶ùÿa·§î‡[FHí’[H‘ò7 &Çv.·ôYdz”›èê.`—wm˜-â4c^6ç0Ò0:É&‘È£³“&Ê=wðÝ¢¥Î£XVåoDùWRÎôZšÎ,§™’z±ó½:k÷ž /†Í˜ð‘„ü?™oÊù‰îü×yYr_õ,\5O¶#q:¸ãê¯Ê;87kL ÈyºæcÊß´sÿ= uä ô`%š–©ŒŸà~J8Ñ>GÖgÇ›xVîÝ‘öµzó›UT z§$,Ëfvçó>Â~mo¤ƒX.Ë×Öù&´Iç¤Ö\’æ‹“5=$Èxé£Q(—sóýJñ‘8Ñý(€{­2±JFuâ(” YjP¢‹Ø„{/טؚãä‹Äƒ|ª_Oï €ª™¬?°BÞ¡]ŸåKÿ wÖ–5zžeÍÈ=xV`i?…•e,œ”UJiÑÞîò€LoŒŽõžóØàgoÂPRx ÌzNÁ_=aÈë§>cC:–W>|ò(ïA<›ë”=Û£"v{pëôÛÝ(\¬Si­ðÔœc)!=ù@‚H TnÑ}yšµï3ýÝaˆ0Gˆ†âpu­RL5ÊRb5 qu ÀYCtUA’ýí.ØÑ‚¿û’»reòÁìƒÀ»‰/ciíSTPZ½‡O úÀ‘ ±Ž¯ ô¹z.e'ŽYy9^f9‡aPã×P”[;¼Òò¡ÈèØcGÚúl÷£yc™Æ/Nñr Å«x“?iüj¬ŠôÈÅA|Ð0e ˆˆ›v¨cûY ´¥]¾òêËjʪùŒˆ-ç}B¹œh–—H—­êHO)ºÖžGÕÑ[‘KT’‚i>ó1+‹uÈrƒ½¼:›±îŒ‹·…Qÿ¾Ó&9šÊÄ [ûâgÿ…×f«N¤uôt©%Ùb¤ZüÝ£äÇë1¤„?9Œq0qâÄPß\ÄÒ>D ÇåÊ}R€eÀœFˆ¬AH¸Ró»>x1jž£øzÁæ$LJÀ$.§½Nä6Ôqð £ÿ§Öþa€øüÃÏÌäˆé2ç¨ËiS¢2-®Yîûƒƒšé×~!¼´’˜Bö­zuG¿\áäQW9îªlUWªÀ[“i“rÞ†Ž¸Ùu£×!Ó| –eÏ"@˜ÿQà'£„ÚHd›d+BDì?cÑ7kËoÙ.,Ï‘’£,d˜ 5çiSó][²aæ»$aâi™óY=¬ëóMo=ƒÑ«˜È~øãf}Ôj~|†¢©•’ø¬òs÷œ-H¬+H"<êE‰Ðų¥1{([Ûçá>ÁÜS|+Â~·Fkq,Ùª'´¥û~õ…|p8Èü†ujÎTEÄÃó0¢7]B"öý™ÛßïIP€ø¡àྩé,lê0>ÉÕý!®6sÚ@MôÆN÷Í›ïºqÖ•²‹[—›1šÓQ"›Œ[-±úGômíJކÞÊvtmN{â ôï÷SuSåw*•^П7›„ö7ÉŸî—Á¥>ÀœÛéó$(¸F§5[~T¸ób;­¾Ó"÷’„ÿ4‚ذ¡p[ú–~|Ow0­Ý›3:ôÿ\š|1êv'æy1¤4§NÄß  3¤™ß*œç<æTBMpê' o* ÛØuŽªê‚d…¿Y‹‚§¦3@ñ3ÂnF{OÐS¶À÷eg÷ ê°ÒúÙݹDYì®ç#ëÜRG¡LgØ;–Ëót ½ÐQ¡ö« [s´ÌpGhêôùÓ@™dRæç"i=ïÃÃiÌ!O£Xr?öÊ6†%éàó[ˆâXE†ójz\€yØ~a›FþúÓ!¾R%ìõè‘ 2¥'¤Á”àÜ4èkëæÎ|Ù«•cn@ ¨Hä§Œëëþªú¼ÿáÍéÚëÚÚSfS“zièr±„ñzdÙmN c»Y‡àÕлøƒË&’€ZñTÞO‹õûí<ëFžºåñòe§OÉý˜Ç6M¯À}@lœúÁ¼éЊ´ˆFÕ¬…§?Ÿ¥C‚åîyÌÜñ|Ö¹®/ƒÕ°}PB-‚RÃÀPg$+çÉV)Ÿ¹ëÆ_npeý <-œQ<ö{ÝVmOèy/Š©è]­×ªÙ=™íŸÈežìÕÇwþœ©N»€<·ô¾úâ°PpôxÜu¿¸ {ý”O(îv‘N{©Ù^–>qö‰¶¼®LºXíxß•¤Ïbâ“sâœT»9“îë$ZªìÃR}¯í5¤NdŸIû~©‘3Xò–Œ¸ïZá±jÔ›î„O Ue¢ñ]{I,æþ,hŸéýdÿ©{í«Æ+ÆÍ²/ò±!’ƒ\‘pÍ®Rº“ˆz­EähsS8Âí hŸØXÌh0œ~ŸH’H?c}"§¦Œ4IÒ€Â-‚ Ràøf«ò‡êoœLéIóqb³ÚVÀMe¸{°£|æ.<ÛSÀDQ|‡&†)Ê«AsÈì6÷ÜÚtï E>ëk:5²ê“ÚìOlž¯wªr³VŸe‡#Ö°O%ZÒF°î%p·½ˆŒ@E7dýq½ë/–clìÞXëfŒ õÞt½¥c´44ÄÂA6%”¦ Ây¥a‹½JÈ=DFB¥"w€À`_Ùºê¤ro¬]òWI׺<³mýÌ?½‡gÕÃfCŽ£îù0xñKª§À»Ç«NŒ?;[e€0§U÷w¤)¾d=åówØ(Œr “jYSef!›Ïe¸YÝѵ{„Í‹sÒÅhTÅÌ×÷0Ô3õˆíE_ï„ ^ÅØÕQYt1x˜€*ï?«±G•,XE%E4â£ú<ë– ~ÿõɉ7T\¡‹ÜC#™çöŽºxìW‡"X@îÚn°2ÝQR¾ˆƒþÑáCM<wlvȇõô ›yŸ’¾ÄS€½•¬»0+ØìÈ÷Äë8ò8æ—ØBß­›>Ж;[Kú· ¡13ø¹tû‘¬ìjÔEò›ô44mFzZý}MZ¶1QÅGk*µoÑXÅ-_¹PׯGš2”MÆÐV_EK1 'lÜñ) Áey?^Û„Œdáà‡¥Ð+VZÒˆ¼Øô Õ©þ^$,"}$ Ê;智¿Ð-ͨcÒ7“ÜПWÄå1*Ù¯d¤î‰.„ž¾P±Ý?ûñ]i¹3Š6hE0i(Á\P¨(c¶oók9lÿ¼»L¼ÌûI6®(ƒvªá†—ƒú¯7ê;¡R™œ(NWUbeÍ]ái7dc$v^:«(KŠ‚ª Þ 6Í¥0æeÂ0EƒdîPF|­Æê\åµÀ¤ÿ­&Xûj Ð5™Ž4oÕ`D R˜‚ìø³OX1é¹…±¼rÛ+]kõjO­ÃôŽe472»®~ùÜõ>7ý¦»•x¦ÇK}}®à?«bÿ_Ã+»ÓÖCn6}ÃëØõ1å‡xŠy¡ ˜gáZ=j•À[ïÛŸ7)ÞW–ú[Ž<¥Xš$ž[Hßk%¯79"QnòD²ãï<$XÏÅÎJoaKª8˜ìÿt.aÉgMg#á'v÷Ö_,<ØœI/èá2sÜ"6‰~·qÚcN‹•6Tå ¯ö§3‡ÓuÌÊg.U~Èì"Š"âvUŒ+™wE‚@cê!ÑðFa#ªzmÿåEë–šôªoÆÙAÎ?ÑpûEr!¡[UDoÍ>¡oA캈 €m]ªúò~l¿CÚœÇ×su0'|ÜÏ×מÓÞ ¾0‡T+WZg1ZF<Çïá ’Çs#5Ó´’úùÊ5Ùœ!¯ÑƒË€*Ä7372eÜ¿­õ¼W(ØE¥æ#'8Ëã‘RÆlêú,qF{`¨ƒú Ñ"³ ‘ŒÐÉæÁL‘ *ê‘P|X³=Qh¿‡“‘–еúŒñG²qDN˜%ñ¡Ó™ h”úaÈcßk§q·VTŽa fà¨i(_Ì÷ˆº‰½€ˆ+â¼ÿ­ uÆ‹=дPŸ¯8þ×P89[–I&¼<ëLS²Ö~ŒXuФ4¨åbz˜;Äö³x×`QœïH©`>nbbœGæø‡¥öh×V¢4OšGŸ‹ë®©5þ›vR¬„ß÷™TSŸ}OÒ._…gyúa;ô·%f.$ c‹£üŽM¸æxWc¿ö’k5=Ù~ùü%´?~á¸×M»ÒÉ—‡jK’mï­ÄÎÛï 4€m†ÌìÆù§#->N¨=¬É‚´ðmü‘Hž¬†]Á‡\áÝq+1Ÿ""Á¦ *$IÃõ¶´‘|æ¯M¾·âÉ|ë‘ê#ÿÜùÉûë/ôGŸŠgbG¢Bí «å«âO|f#SúZH'}Óý‹Nú^ÌÅüá„Òø5º\QY¿X±n§¸¤g`Ý3ÊIÔÌVÑ€.¹…LðãLgß.GO²õÌË_OùYñªícƒûaû.¦,«xÍ Bƒã|ÈNŒÄ*3Ò:¨-4ŠÅÕ>q…Wª4Ùå´ÑG¤•vþÝ”øÓ‰x‹­?ÓaüL7Ÿ M_Ñ1“õÁETF^-ÑkXP€a{rQ¸\Ð@œ/îÄ L{¥¤‘í 7«Ÿ¤ÜMSÒˆ¶£¿¹å˜ŸHmüKlB¹f¯²ãNèK´[ßî²í#È_½·|˜„ÇfP)HŘ1bƒ{öŒ¨&ÂÌŠáê&ExÛÁ±!çÎv…‹¦ªãä2>ZÛsúÞjëþþ-þ:§Ç•q#*{éNÔ´Ó…¥»a Ny¢QðéœiŽÏ¢Ôf“óüÝT8Æ_ül*±ê?'íÐÎå3=³ÃÈMÒðè%Þoä@?¡TíoVU@µÅŸv%ÊL²ºþ{©Œ!«úÉ#%•ù÷b4¦µ~…å˜)sË;¼ÑláòA´P¹åÛŸJ}SÚ@Ö4Å uý΀øÊ,BäxÖ…Ý“ng ·»MŒ¹™ÄiÁ¤J¯j#2Z?¿Ö¢dq "EÛƒ¦ÅS‘¢ò<Ÿº;ޢɉ ôBv{èZ¬õ²3†$IëzÇ ï÷ û}ƒzîÓ¬[V[®×³m˜¹”ïä"I¶úw2¼qAܦ[BÝîáY\TqjþÍ÷ðïºÈPxìÙ]xp'¼gÚXÄvùtâç夫ƠªÊáR šŒ5ÌD•ÑM„UYo%”TOœB9]eõw¹Šsó0ä°i›ô.|V颯¬oï婯eÇ„W„lîk¬{ˆÌ4x£«¶ –Xþ¾T5ðà¿r.€ESB»Ÿ *8 ·ß!¨47ÖÇX¦·$T+)æÕj@ñV‘àƒª—3`Ó›ùÀo¢a3éa"e&1°RWØB ùkä_¿ãÑ=ÐýÔ–Gûþ‡i„àÂææ¼¹¬¸•êv©Y׬•(Æepú‹‚µ"¾oR,âKoÀëõi˜¦6ªJ…Ò ‚{°Y?M>`]Õ>H„¸Ì²½è2´.žØíüs^Ã×è]褴nVÅUɸDù‚œ½n¯t|ë÷ËñøâÉÞ€Ÿ´ÐÆùò¶)~%¿®þˆ ütŠÂêñMÿ6m§®–N‚›Ü2[ÒwÒF¸Ê•®õaOJÃä2Å8Áÿ>´Ð¼Éд (^Œ`6FŸ ŠóOl}µ}ÊŸß+´`û !5ÂÓr§šç•8óðÉ'nŸ#ýÿcK6yûBð‡>un­u½|•€ÿA™ºäjkƒI«•>ãŸL#dt)ÞNwF½)9žHŠàÍ4›uª¼Ë¾nRm^ç¼ÖïºZ |Ô–¦E§Áp4Û:âÌÿR,ìm~ ¢é‘UhôSu¡VÇ1ò‚51t¹ÂKÒl{'}2ëÑ©¢n5û!G‰A÷-‹ÿovFPJ‰óÜž™2lN”xH«¼ ÂÌI@þE;âæ)‹]‹é§p&Hö¼AýckåPyûõ~ôZ—Ká̵àyë=WMMÿT/§0C`£Ìü2!"š‡€Â=¬×Ú@pú¼ë¦á¿$  ÀÀ¢˜ )4gMÊöömì²RNžj\R½L¥¬yU÷Öri2PQ'ò§Ñ>ev*—í>ôõÚfPÊx;JÌOî%èÆ$¡ÐP0SÿÔ’þáéäÉEæ<)b²¥RC»m*\Y¡ø‹¯Ýæm ŸÅ”o{gT¶™GúɲvÐ^Çõà?ü“О¶s_мýzåǹmË2dˆ{C>0§bÏlVÆÏ4z±*/`<¢`«Ó À ¯¡»yÕeí}(y=ú«n¨ˆñE ù”Ü›FLºQÆzWñåõÀÊ0Çÿ|¤0¹Ì¢'áPéW#Q†_Óc@8àd wç g3á4gr+®~.¸àáõ#4Σ$H @¢ c^€ b½¡ J{Õ5[†ÿZ³q3R(œŸ^×ÉußcŠ\„PÑ/LaæÀžåxÉå*\ÿæü7ø‚õ 9òf—nù°e@Âßá;Ź˜3hÇ)ÒË6qß̤V}“#3ª. VÙùX=’ÿÖR¤"Ê{¥Œó§ öè~D9hãFÛƒuW0Ûe³+–&£¸s˜×8!Íù×oÔJ=ý¤±lFӜ鶸;È®÷<¦ ^.ÁäÌ“ŒàËoI¹rgÅáÆ\ª”óuw ®(z…,ÅÞ×㯳Ó'U™DºT€òmö)†ÒŠQÇ)âÏ+.KýVý OŸß”íM'¢R¥®§®Ï¡ÓÊ%ô=OŽR\4÷Éšˆ½Ç÷ûf¶ù®—9Zjk6zõüœÚ)åÍÖLx³Íi‰Ö…K·ã¯X>4¶ÃcšI]Ð6 ™›aPÈíŠTuZAGÄ9ñc*'ÝD=@Ó€aÇüþiŽHH.Œ‹jz‹;%XŸRw‡_¦FçZ÷pÙU-ðB¥}ÀÄÉqë;ýÌ’†?jPö‚Ãâ?o¬O> 6/Ç.É -JšÊÕùÜ‹—Ä ‹nû䋟/é᪜ìŒ4;»à}Ý¥ì"…ª.˜1È¡†,S—^(«»¬çiÂ,ðCãÔT&Æš‚ÒtŒ1! ”ЉèÒÀwi§±ªù¯Ðžý äâý]ø¬3[¹lh S܇‚¾M©–^k¿ù˜Œ ¯NÂÅ5ÏfœzÓ77w 1Õ t—õÓn‘H:Z²ð¯™¨Ÿ‹þ‚£5É„¬)§á Ým°ü¦ê\³piÏ0óÍ„ns@8¹ .l ë#³GžÄúžt¦ÆÀÍ\Ì÷´ÏLîÄ×$.D¶Œyõ^¤ÎrüÅë%n³ÐF÷ס+ßSV”‹:(ZÎÎÇ‘ó§ÓYô.u вD¶‡ÏÄ.×?î|ܵ6 XÍ|OÌ9¿(ò²¿F%_—?f©Õ¦Ȩ BˆL,Ô ¸½óü8XÂÌ,×GÕ{þÛÛÇÃÒ¿%]¦nÚnûïhËó¿ÜEŠ¡´NÍFj#·I ÞïÁ'h· º3‰ ü¨ªý+R)}NÝú†žñ‘ÞîY¥ âoéEHž Œ³L ¦¢‚þUB äEÅ¢ee½'èÁ›Òp1÷Xr¼z21]¹nÍøù‘xú‰™%«Ép6s±=R×Ë'¦œr"ðv4¾½ØÌ¿zG_ØOR 1KýXYÀÛ[„1¾æPPŸ³^F3v~n²b2‚ýe<}­§Fâ„ Œf7L1ˆ^ÿÂåÄ•‚*°§ù»/b›ÂUér§s„ïg_zpWG1ÖÝdñÐ_D£Jº¦4¨/ú‘]ŸíûvVUéê/çdÎ.f'ˆ #¬X„@’re5sO/ñ‡ƒô\)vŠìöÄþZa—‹Èà—_v}7·%XöRž(¢ÝÙ;uÎ×/å¹°º@œ/T&ñ#þu˜À Êȼ¬™›!ʪ Å£RÜ·3tŠ@;…Ýšž„ÂU-•t¨êÎn7qÊÂ(Ì1Q+*$Á<ÂþP“´[¨RÌXußL Á'A¿lVSÃã\8bàjêÖ…h—ªKãÁø¦í=Ù„ñ³n°Ž;=îL@ÚB„máv*Ÿ¼Î“ùt÷«Üa<Ü9ž™nž#–¹ à@ÓÑÝ×r«2^ë¶oÇØ;>º?ê-dzᯑJßšw<m·ßÊ͉X÷3q«õ$‹Q½ú)á±qK{MM»©çѲʘ/˜äñ”Š¡ ˜ÓCéKÈà—© ýZþhHè¾úÛÉqÚé=#0”[oÿ2 Mrs‚Ãbʼnծ´L£5Þò{gÆ¿DôLÁqŸúžb»oýžñw½&ÌK¹Ñavb­ ÑjÛ,KÕ Á˦††y¾ç˪v&îWÏÑ|5@eënó)¾‚6’¥¯ iiåÔ¼?ÚßA'l‰wOÄYà3_ŠÚ©÷¥«U…³«i*¦MOÉHßœOj¨áƒ”dCº cö··yäö}éR¯—Yÿ('‰x +JÇUÂm4‘uïô¿dMÄGÂ?îVãÛH#?Ý?×i‡v‚gþ<-!ûô¥oajì¯ø-±ç†á`w}?QBácɤµFa’˜ Rz~‡9Gä‡]Â/s¾ªéˆÉáMÒI§‰âDÌtè[0ª±œ°è#ê!²Tî¤B*ïái­ ½Neé‡{þɾÙS)Å—»œñlÔHXÛ l ®îî-3îÄ3ly@õ‚¿[ËZ0z'ø¼2·Þ¿G•OèЖa‡ŠŸF&QæiðMC”L#8*YÖÕ±=¶R^’]ý‘}<¾Ñá÷e.–õ“ŸÚ×Ôž9íñÝ´ì.mÐ*¨É+œ±èÀ( ¶í+›¨½”È¢·+« ‡•!¤£Añz0Ä»Ã*‘˜ƒçúÄ”MPvgBíçCCAm*}»7&JG,ÅÉ9µìµãõÍw{—<½Ä\qöi•Þm¶€El¸Ìйê1ß½^+>rÙìÎážTSmêwAèUjtY£”ªê»6©?UÇXHd.„LqkÕúcl-A¾0F¥Q…uÈØ7¼Ø6›Ä.K*Í«qdGk|8ì&S†xJáÏ;ÊæìÑí9wn¥ôäL‹—»L}WFí±NöÅöŸºHH©Ÿ‚²ê¾ÃWNp,ybËZ§?Ú^Ê.¤ŸG*¯ä2fkÚ¥¢¡ZEà4Ú±Kâi~Ò¶›+.ûð°AÆå»·Ì Ÿ7! ­Ð«¸¥î¯’U8<³Kß®öþöxž õ'øÕ8äº Ç·0â»^˜|Ñ‘ f(\vì{‰jk÷×–|Ȥ޺ÊY²î¤•\ZÝ fÁ¥¿`› HPØï}8ÁlÔQx=™œ±øÞ©Ã@Û€Äq>K½A“ºTײ¬îßH„üpª \®©!_]`Uô»¯vm–{“)]=_I»©x¼—þØ`‚ËÐ]„~e47Î’­´2|ï´©lµ@ÍóµÍê>Íàω˜½|q&?רvášÖ6Ýzý§O½®¹äëh}ú¯{Å֢⤙ç|Œ­I±»äŒÝ¡!Î2Àö“„¥n|±ê6ªù’¾åeðعè7 $4³¦}¶‡iQïârrƒÑŸ§´ÇæýØ´£D¾Ò.¿ެºÊCª0¾œ=ÝŒ„ÜÐý¾,3§Ô>λ #k_¼vÝù§û(‚h¹Ÿ3õÕ'(?J5Tu$µ’§±¤éÞÊ­FÙ[êéâ2ÌõiãøGÔì'ÄûEûã»eÉ'Ôæ×aâÑÊ™,¡'ç̾0Ïhþ¼†ædnìþ©T°4k ¾º»dÄAɾà»ÀUÕ¾„QÔß°–+n~¼Ó@Ü£%˜®Çå8…Ƶ¬n2é®ÇÅ´—ƒ£êZéüùM ò‰ÄÉÆñ¹ž’½[ϱþ¾ièZb³w¬)“P‹ë L.+†¡áƒ„‹<ª²UèÑÛÓ þÚ–!tKé_Œdã€ÑC´&Çü÷I]¼ó²ÛKe¨ðn:ip1˜™0žxå†ÝÏèõåsë7TóÕR±_¡4)rkwÑSXEÙ’XΣícœ¢ÎÒód:f¾Ñ‹’·:Â*§”*›ùiˆ¹µPrÚêP%^DÌ×ËSÝWã…÷³±X~4þQSVLtnƒM›êQÿ7!àq~f’ø3P•¾¿0ÏÊOS‘ÔÉÄÂÕ5^ÝÑWo±Ðk‰ò\¾G K9Zöh_›wß|ÙTLð}2F§d{@㣿ÕÖõÞÌO{>ÓéœPÇóYI©õ¨÷ïs%[uºÃPÕm'3»”³ZÊB©+uò‡Þ?ÉÑ>Þƒ;ªÄ†-è<ölÈòÿGLãˆä²;qT2i¢{&ÔeÝR˜O§uð£á‡r¾*xaK|Ì? ;R~·~×éG,édecæªî[z,$Þ¢µvíÉΨÉú0Yv.‰Ø»ë÷W͸z¥ÂcWhzžóœÖ!³Ö¯À͹'›Ã«nx¤É܆Pk™0ç¿Õ±] ©7ÂEi§b9˜—u÷œ …î¡\@Ký”ÄÜÝÄ ö丞u7j!؉xA'ã­;ötzw »ˆô¼tN g>cgZ#{JÏ’¤­:å/¦™³ÄÞHÁ‚ÊÐCéxøÃ:‘]4<['¢õ¿•ô¢w‘Í×°ðÃ@<›•”&¼sNU9[× Id‡¹”^´û¶iVÁ\RÄò¼zB'c¶Ø†vã9ìOÈKºø:¸° d›ùàGêh­ÕOk¡Ù?Òïhš‡šÛ ‰™—µa·!6× ˆ…;ßBš»Ïé@½Õ0<ßD3Òÿ?FÛŒÈÓ×w‘žZÒ:Ö§ ö‘&AmÓðZ r½ÿwcŠ24ÔÛ3"§áW+®x÷Fö×N#;•æ:±.¾ðV>3â76Rá6WU!ô\ûUg+F¡§í3yúÄP­r¶ð ªÿuÅB¢qg8x^ö U^´­ìKè#¸*àwzø­Z¿t®+ÝèÖoõ(”Õ™$㺌¶ã¢aS?Èš’$šs¬{Ø †ÂÕ,ÀöKa×­s g´L¨f¼Q¸kÓ'ÞzXsŸªzŽzàfñÏç(>A/±«0©ëjÁ•°øµÆŒš)åºl<¡P㪴g¬O™ù®°ëï+'~Ì’K“5õªç7ɊѳüÿßÒÅN Ò?®qã%G¶ºu ®€~ðM©¸pP©GŒ²g³#5þÝ—¦ÿJ7S:ž5ÄUïfw‹¿Aò!úQð’nr‘œL?ž¹?ª,Ô‰šó*æk:fÞâRÉæ+Õ,ÂÀM¯½-‚pÚø›‡KœëéIÐa@#Ù[o<ƒœî4YŠ)OAý›%áï[‡pë6N³Ürb=øz‰ ä-È–iüA”36;®ÿ(Ìn“*Üžéš[®=¬QŽ>üì=DåwÉ“/|GRNïÑó°ö'›®ïQSvY7»,ß¡3,M}·8òÕ׆Éוe¹êÄh»Õœ´úŠn„ã€VÜ‚éDÏO¨Î´hº}ÿ Dt¿¿“¡…î­$ß ™d¸Ã Ìo¿ö³&µ«›o‰!Õê´¦ùñCaYäc4@*’IJ++—²í¡vÄq°ˆ]ò3ú§±jEº—=«í©ð*›šr¨í BðÕÆ¡Â<»‘L'sfÓg{$N²çØ6Žû!ýeÔ`hóÆ|]W1ä'ó\¯<âC?ÑFW{ýN¤™Ko®8Rýœèó\/U¾ÆÜõvdpV`¼JÖ&àip9j±\ãoïCÍÿqÝ|'}ÊšµŽxÃs¹Rº•Üð::`Kn Õ ½¢ZrQ>>™FÑÕòê‰sV>4¤E,ŸèØ-Á/Ác ´>ßxn Hý{;ÿgâÞ?V9‡7/ð˜êcæuõV9Òã•Üþž¢¾½LgŠ ë?K’C wïºwÝ¥¦äŸM¾ßàìÞ§òk߻엡ÛúŽšó„„Ñ6þX¾ÙJUOàgÑk1 ‚&ב٘\çÇëýäMËi‹bôàTàÏUÒ²üøzF1öôöHÔív\š·H û˜7‚ÿæ>†‰Id¨YƒEžÛDzÁYÛ,á‘pÍhKw£;0@$hª:Õ$Ë¡ p b`Í\¤ÿ’ueÇüÏÊB¤ cò8ô_œÖA‘ß. ©ïŸ™íqhVÒ~]¦¤[)èQéûM–·ÉéL«+Ù¬d–þ1Gipïš_pr&ÜivGJê¬ÅÏ7ç;ÎVÒ{#B{Â1©íçUD¸ÛXbÞ_}W3oô˜|´Z©ÞÂ'ô•Õ „c•Ü;Èô%ë\”HšRg¤ãò?i2x²¼x^°"‘“4Ê„·Å0Hä^ÊÒ”_!2ÐjP¿xÄ’“Q–”Ú…<µáö“ª$jû@V¯÷§æ’SP«°¤Y’\üÁj&£›XRú°3£7ûXÕ&9圽®ämm³AK§mä·áxI…±©ù4ÕN¢+bø|Ê+´û[í7¾:i´õXÅŽÀ¯hLSVûwÚ l[#Ò”à `[$X‹àdý0O.M»qëµ_ùÝkzAýa"ægþuT´ÌÞW¼»}ƒG H?Ï.…Q]÷»ã†<)@Qàs»ÛÛÿ,L£].yˆn‹ÎULÀ'ã¶Ú;îT‡ZK¾â"5ñô "e4™@ƒû‡òŠŒsÛUl<Ÿ³ðrZLɉS¥ê1VEó•ÓÈ¡¡À§æíV·ßxDËÀÞ‰boÂv˜Îdkwäû&þ‚šäÞÁÉÒïñ…覒‹ œ  òL3¸³ŠogŒ7i>”ªgüÃãZÚX^@«žåéfÅmYaetëxúúƒïhö {ûé.Äɱ¬@h×ÌB\öDÓ!“Ÿ¾É$Fº[kÏ)g®,äÅ;ƒSàR¤AïpO_jOA†/éŠ_Å,öU‚§xß(:ZŸÇï4bÊÈýbBX½N04dC,¦›v4_9Ît¾y^ž²®Ú¡Qô[Øb?AN*ãSä*€l€ñs(Û™¾zÄì _)Ùüßk×_BZ¹Ý^2`óñQü_«”â £Ÿ ˆÿƒ´òòï”tØüÈQ—³>ÖçhÊïgâi×hª\×Q£E€¯Ë’Z1¹Eï|)zü¬ZHF1è·Y{0r”ÕÑú%¯N•Â-*š¤”ÂyÙ8zú¨èä\çàî’5×ãVDÓ:(?Y¦ñ†Ý‰¹¢ PƒpþûÅ8q塸SYu±jï@›ËSg$ï‚åKPT²>ÊN÷ºlVß…Ùˆ®’Qj窦ƒódïðÈì¿ÀÝ£¶ØvIúÝÐCÞ¡H€·¼ý…w’ùºãxRù¦åTä<%¿°@iª ìNòŠïo&ßóE­/Ë{þ&vLÖI(¾ÓÓú(22×þ8¸_<þ¿™OB2Ï4Ö›BYȆm?1.æc£™•SYxÏÃ"’×c—)xsw©¦‹ó8MVeÊÒªÚ×ø›¶IüO­‰wl±á5XŽðÂ-¸•-Ç-ßðH‡_“'ÿ ²»aݪfó¯˜1™ ÜŸ•u·÷£ÃtgˆðÆrIII˜L≩ˆPT€Ï%áý :UÚ»9³ïš4¿Sužæ+Ök_¿N¥î²R]œ!³O—9°ºøø~!_6½3žJÇ*•’Ç6òû´Œ…Ѥ"®9USÓ*Ò4Ü$ÈÖ Ë‘zÉžòkwD_›¹ãmYv²¿9„—%½b_±íå8Ñga9dŸñ€ÕCà[Ùoq[¤yÚÐ?|úúô±Û·õI¼jîø$’{¾ è^ñ,¡èEk:‰Î²ƒ{®,]Æ‘†‘ݬ¯8ÏÕ,}‹ÒXþõ]”'ƒoì„x/Hqžtv’Õ 'LПW‹+â|o×ùiÚ(\NZ¤òæá/>î?K·ÇÏ+ÈwWÝ\9iË8 R¿?˜nêÀZYnûô«äFðÀ'-¹¶ÕuÔ­POð5þÝȧ$¤Ê‡¿‰’©c†­]8âŽ]è| ßÏà«à3o—õ)óô0 %6aìûÆM,YÂ"x¢Ã?™ª.•ŸjžŽ«À+«à›Ékº¡Ð1e”(úëåÚãfEOÅÓó P›Í6B”Gvä1µj¿Ý¿-·}ëÐ'±·h¶6ª`ßÝÝ´1‡xæ5:ÞW·÷hÀX(›æäÛ×k°ÿ‘sBáhå ÆúŠ»eý­dýOER ‘ofGÕÓ¸¼5ðœÆþ°Š© Z¡æ±å*XÜœöCŸQ‰²HÍ›½’ðKu°|)ºŒ4Æ ÀÀ[Í p©ÛFMÓTÆð×½aò~;Ž3öÍG¯¿½EjT¾ ŧªQóÍ+ä97˜‹µ®äŒËhyÚ KJ#ðÆ”ä¨®ä  ¨—›×…ƒ‡ö¹Ñ–]èî×€[w»ÄÀJÍW6EF¬Ä³É¨§-¼ò.òkÇ>¦T„ôh ½Žæ}ö^4ròË3Bs€Ã7]þ¤r>8éûôkÄ} ³„U¿A6”üÅS´íçé"Èàïšñd±tÒ/Âõ*ÖÛÅÛúèÈnÈ!r"d™ïùt–×G÷y˜]¢nSÞÒí¶‡#Óz]w(" hUƒ]Øc#éÝV2™1óÙ³ýåhÊúÄ·ÐU[sNÑ÷ëz@j},M…øJ*›ŽÆKð¤ƒD*¦_w¾Å—zhCí“”Íí%GUC”©j`¸_|T9>˜yàz2l:÷÷:°Õˆv„kþ •)mØê½½4F^Qz>>ýfôÅB­;¯Ëb3§&0MÅ&>ó64°‘p ª'ofutLu¶iC}‡kÿ0èžpi•ÃE)5êª ê“87vDÐØ¯PÎ Õ îAr:²5ä[Šƒ/°ðm;Lƒg#Ö=;J‡¢Uþ5«æ(Ÿ(Æ“ `’“Þ³™b+¸`¯Ë}/±tó})ŒðäH‚ûÍðº;å×4&Ä-×5%è0ÊÓu TôÄj÷ºïM uµ¨c ÜO— o€h^è燊kþVG{ú,æt…;›Ý¡’¥¹"Ü&yLý •G”ôÛk«*ù£Ž·½þàêÔ¨Ò'‡¡ ÷T_²8©îØ Ì2šaÖ/ªeý§~9­6F³FÍí¡„g1FµvF³³Ö.JŒ–ñS4†§Ýnä^;²oSðí’ØéEò0>f†Ïk–:#& ‘2Øì8~2aXר‹,³våX$DS]ÿn"àÐß=ðJ¯­3±o®xó‚è̸:˜œþ#ëMy;ø£—†S³£úÙ…n„ÍôÛz%¾¼öL²"4Mp«~6›Õ,°¿b¬ÏAsÁþƒ„ÚVœÏ1òçn%ˆ7M½ÁÌ0í¿h:2¬b~ 'qÆ*Dm Zùoó¼2ó&lŠX”—¾nhÛ‘PéÖü1±äÕ†¢‡Å½¾?lk÷Óqž5ÔXøt›õî¨ÒqFï¯0¹–*†¨ ’siUÔénÆÉ·=Ò ÌZ‹ASúO.S? jDŒÜ£_i:›Úœ“üÙ"?é»66W’™œNg"íìd§¢FŠõÚB½Nù7u; €¾aiA—Ó°8 ^~k˜÷ÒðV©¸m–mÞI›—_ºÆäÉuLa423!'%™¹ŒÊw˜:ú—áú~¹µ•¶ZXrbòQo~qÊØOìî¥KP½Ü(XÄvnæ"3UßÉþ£‹+¥kÐ|”ò=I®?ðÖ¦÷,b6&Ø ÷é{9x<Êܱ5V v%$’Š«tâ‡)m{|§Óë>ˆô;"ì(¤ö;TJ:¡'+‰xÚžÆ(³ÎŸ-]‚7ÓKÅ$¸?S,*¡Ø¢$…?÷Kõ”ðKE‡âéNYfvÉBÕ¿ð¯ÏÇ*sš\®IM£mUÐaþ»WWMRfÏ‹ÎJ‡Mû)Ñ$ÿs[.‹j†”ÑC„)”qÐ=BN®ÕcU³¢6S(‚ÒsûÇØ‘¢ÎòR¥?¬Hk‚À éMq“x\{Ã0ø h Íëò|¾ô&Æh ©‘k^Ï!=ø$¹ „äV¸‹Øöñ¬ñ19º¯~£kñ󖵑 ÉOøk·¦:¨_Ï›ê9$ù’Óèp¹â˜å&Çå¼G° $›6 èÏåÅéh ŸQzq¿[Ñ:`œ¸=÷n¯ì×Ïw’˜çáª@Û§›q10? FûpsØRRÍF.>ÎYrE±ù… Ðö®ŽCÏ¿áÐpò«;FÎ/wü«­v&û–pãÛN,ø*Àë7ˆ}A–sÆ „í­Zx9œ'{F>aZøxsâN0PÇROÞSÕY¾Tˆ!º9Û`EªâïC¼PeÛ²¡ÐúzS–øÖÇN-¬Ôìé[kf‰X´Ë_n~à†ˆ-Cq»‹Ü&«¤è?ÌD\"&Fð}8·™KWP¼˜òû6ƒWekL•SÚ³/yÜuæ ó ‚%ðÐÅó‘B„ƒ083ïŸÈ UR1]Ô]…­!ú¹=Wï¥?–…Äá±\Úa¡ªxÎaÂMúõ"öM†´>܈kæçÏej8x Ã5oD pöYóA«ö®`t°œSœ(×sJ‰¦wŒB?©0’Ýr»ûžæËÃ’oÃj}]Œ¦jáçdîõ4Œ8\æecDÍÎ÷‡'û>ƒ¨eº’OqÝÛ²2ˇ7;Z!ª÷33Ó5ægæB«va0®8þ'Uaôèý†]ö6@Ál­ÎÕÚþ4ÓXg #£t5D±vÍú<¯ÅFàCÿŒAUÝC»*°âi¥»‰ÝŽ­Î;¼â ÙØ)æ0ŠlQC,)´hôlõ ®ñ÷”ÅKÅÌý¶Ž_bVÓ‘×5‹˜FÊE$& ˆˆˆ·F""Ìq€Öìf$ ¹­€Ú{#ÌË/~Ò—ËPÊU‰Ä/+ûW ðb3’³/s3òCL3Ä^d 1E!v„¦l9ðÔfÍwä!€ÿ‚h«Ál,ëkIçÕÄ$[›Š‰£6À—Ór º,nlÃrVfèq]‘´C—eÕ“'eãUîßγÕríìz­¤;Xo›ú CT 0ÍÞ ºÛsÖËæWS¦ÊâQJ@ÕÐ8u–¨G¤;þì6Wº ‘ÀdèZ±ýx(EºM7æEïMš|G`û­3T!ʆ8ƒô' ïiÿ”9÷´UAs¨=(œ]¬Ät…–• T­öÁ™äµúæÏ þÌ¡õ6±J½ð’Ù«ˆ—©,†Âkà_>àäˆä£ùšǰåÄ”zúõéËzJSqÞç^,èAøÌŸ½w}E Äî ¡x=QªºIZ«ŽVwèW—|'ã™šÊÆjŸËó‘÷?#†AÏó½å mM´¯ÓépØÌ6·"§áq6Œ6Ng³®28ÿuâ)¬Ë³ÊÚ¢ò·ÑtN°´·P;¬7Áeæ@’'Jœ@¦ | §´MGË1žÝà_/oýe>Ç££¶BL•òS›i,_ûLˆ®07Ö‘jjàÂU«¨£™ÝL¤¿ªß%Ç…‚úÔmš£Ú3[ÅŒ&…õrÞ£±À×¾Lta/?ëŠòŒnªÃÛ¼„pð6*Í?j%†•Êéã\”åõî;LL~O¨÷‡ÚÝ,F”¬‡éÚ٥ͮ8ùmµHÙc|¡ìÿ‹q嗾޵›0“‘ˆ-Ø·o¡kȸRÊ­ˆ»Á*NyõÇ }î‘o8¡ÎP+ä[-;\7Ç\‰ÊÖ:¨³p1Ëk•\Jiªç« ”…”4Ï-CDÇÛ¾»ü¿_áU&þŒÜö~ø¬³,9~€žêêeûü#–„ãhÊJÔ ‡ìr„ÚÁ»‰t¢µ‘jl^Ò¢÷KZÒrhõ ê uÏÄ©$&ãÙnîõ¹ú8b³~È44xS.o–3ô…Ú —wüÞÐÌ`3“r‹7~Y˜ â ¥2¨ëŽ‘b¼l¢O¦·´Î+ÙT—[bº+Oéã†sq&¡¸Žöó¯;f¾ã‹³ L3ã %1/Û-;H÷Y|¿Ö^åãÃôŠúÛéÐ…²&@®\q ]Éß’ìe£xWŸ?´ôI eãÓ²¬HÊ~mnñ0bËV»×‚ÓüJ¡= Ž­¬Ìœ‰,^ÉjÚ 8à’ª}YØËþ&:‹l£{×Oæ¬ß¿Cgcds“óöd´ÜúFÊëÙàzpP®ðö‰pÝdæ;n…Ñ]\íæ<;±Ý›øµäÞA´3;ÒãÛ´3¯ÖJ¶Ð¿J(oÛ@õç°eíÌÔ¢àšÞP/a•|“ô³O|EBƒ¥§â?Õç%M ¯W¨+{TP‹[v€°¿›ˆó²oeï̶äÝY²œàâj\Û‘AÙÁÒ~ràü~QæŸ]HƯ÷ÆVv>.Ó~\z`5Ìg‡«šÒVί¼ÓR‹ KÂ>q†Ou<£\‚ÌÐï–“¶Z4l³»íËKbnÁ\o IIèäKºÚ9r4Òš ÉÓD{ß™|¬X¸‚L>fŠ1R¢ÀçUlIªiäßDítì¶¥“™IôÔ=±0„s ªõÂó½†%E$ó®Y•ùG×@¨QšXy¿œ`êÍ„´ya…‡Ž3x×äçn5¯¬ShÇ·•º(yN(Ç·ýŽHÉ}:»n§‡4áç{ð6gèh‚:¡rc†‡sLÅs;lÎ0ÅÝ÷e‹[gåy½× ÚþØZÐù(ßû¶³|¾ÆÞBš`Om9¿ÌHN8éInø /¨¡ áŒnö¿Xâ“Söú$ˆRã·Ó?¦ù˜|%§ÝI_=4agRtI†~¯lÙݱ¢tüjí7—âý‚3V€™œÀžbšq ´':¹}.‚Dn\AŸÆŠÁkü‡™q4º—CÚ®Ý˜ŠŸ®¨ñÎ{úP^X*®´ÃŠ¿à@ò;8sÎ4D=’Ò+*øÝ¦¹,±sªžY\L ¿àXgʤ{ WÎeú†/ð€ n%=*«Â§9¨ÚêµÆ–Á*VÏp8Ý«MÔúŽülMþ¬î˜¯³QŒœƒ†9¼™–á¾ KÜú¥üÑ(-Ho0J³º¼•ÂQùÔ_ùEñ]æLa-òá¿S®çÉ(,dü‹À’õî^n^Ù3ß""Çïï`þÚÓPgžäCάé8QêÙ å—Ø¥« ÌÍåòeéãJÛnJã @H Í››à1$äc Ûím¦ ¬¨ î×6PIô­>WN§æ½&5‰š£7ðûYPV=®Ûô-DE’üBä»{û²âÁÏ’õùñ›5VKÊ©c“üKŠm,ÌâHG¢<Ö-c¥PsßÛ¦ôq&~¦IÐkѳÄqyXƒü»€P-ŽÝ$jÔGmú ågE Ëpÿ»T x],ª³­Ö€¼ÃÌŽ¸Ðâ î’$ôÆáýȵ"öQ:}aWÖ²±††.ÊV4§ÆhºYÿ»€:€ä2Fpø:Uó¦6—­ô3gj®âœU!-U#*«É¯t¯òãYH°ê>øƒ$ÝE½¾+†ÒÞ®î /b~ef°¢…BŽ.%œ¨ üí)#FmÍ«ñœG[Çäþ>Jò¾ƒ†²`³0öDýÃdxK{ôúi‰9Žœ¾HW1(ª#M~¹¡Åï‡%¥2‹¹‚®XËeì6:ÏÉú¢è\?U¼’»šãt¼g“üý:·CUiŸ1fg¶’U¿p]Þeôy­ªâ˯ݫǖ”?"ËNŽý»EíˆÉ‚†¢èf¦8³~¢Îr¶.Lèéb°‡‡8ˆÕÌ­1âS³Â*#Òôª®™gÞí%R"Ø?¥ê8U[ûçûoÏÄtñ%åØ}q³c¡¯~QUV%’(¯£|UÂÅMd_üÚ{ÇžµÂñT§tlÔktn=óÁéÌjKÁ$. (æÔý~ÖÖ›<Ûu, ›Òz×´,ëw¸ZjöȦÿ˫ËïÀùWFç®Ô$½Ó&WA5ëVÇýèiÜ>òò¿ê#¯Ñä×öDåL Ü>ˆQ÷ŠYúl$ß ^jv¡4ÉôL £påBV«²C¹^#~W=‘›Æ’{ãøt!‡¤ÄSõ÷}`‚’‚7˜ˆþQ:3ÎÕqûÎeÕøõNÖ( ‚Il)†h#8ôßá–ÅžÑlVÙVºI>J•´Zhä†Â?<¢Õ[ªŠ(¨XŽW“åCrÁh¼'ÛÊŒˆ é s·lvÜR¡Ÿ ý°y_Ä> ÕE?\‘çý$ÏÎz)æ<,Åð´_æ^ áógk~jÚ”ø xu¸ìH‡ËÒ îë÷i ÊHF”è‹ê[!>÷N”7SWf¯ìè7µ‰m3œIÆu¤`pså¨J\£~þ›p³«fÈ¯à€ »å_O¥ £-0I³$õM.˜;$ÿY‹¼ä{:áBnê‰ö§“l /X´áür(±_¼nï{Z¢åQ†@M<[<ïЕÀmî&Ü>EI#«+¾ÖÚ¿•J‡Î’ÏXH#2mò*Ð|`‹sÕý¨Éëc±ø!sž~M¸V"°¿¨‚áeŠÜO´û[ Ü=ÝÈfedº~C”ÄXZu?ó“A ;ýú¤ù¶ÒýÂ2}Ü‘“¯n\á(îUI°ßöSOõÿÓ v#$̪§ï~"%DJ“–‘ü D¤8_;0Ê–{w¯Œ´}³#o1Ò{RE ñQÁ¨l-¬s ¡”ûU˜/ìå¾ny¨8Køφ~á™X™çž­{·Ñ«ÎÅàB± B~…$œYöõUb¨D’ w#ÒøÇY™ÃŽBÜ.ô`÷ÀGÌÃ-°\8(z²¶SáᎱì¿Vo„ouv³[‘‹Í'8úxƒI¶œg G`Àx[F´Rï‘ø?.vÅá³v?çÕ÷ŸÂäeݶ_w5SÖ‡ŠUb‘=ÅèÖÌÐSÍ5?žTØMYü²ÝÅ ÕQ‰þ?…Ä¥/¨†Kð~xƒVâ'ÇsPeÿzJIþËI¥w­¤«G)ì¹.7g¼C=Ö¸6652AÌ¿–á¦ðÐú‰O[–²t>pœ—D{íåj3QDfîX4Ô#‰Çƒêwˆ:ðarc”nö ;¥>\)K¦Xõð¹~Û|Ojaæ—|¦Ùúzå§–élL³ c@B º–²UUsn{ ËŸ¨.ËX BŠ›UËp#ÊÔ`ÄëåúPùd{óéöä ðBó4ìíeŽW| 0;´e¤ëJFa©lìð_ó+‡$ûî2g}X*ð‚[aè:_"£c‚Lt£y6å^³K!á#Þ$ÎÙg6I‚ë<¬a(ç<…ß"sJà=¡×`ó9BѼ¼T+djœ€üº8#ªÏÍýñ|Zé¿“‡â〯rÑRbú7S–v v¨b–¶üìÌðw«GÛ™WvÚ„ÏR¢Ø;áç|^Ö¨Ø}ß{ó}Y¹BÄ—[ Só×¥´²¦ˆÌä§ÕÙ·›÷¿ÎÚD#–¼´ˆ^èOAÖ¼@ÇS r&Ä,ño>nÃ’Vñ)$AeçFZ(I+ýQÈœÍÕ©4ÛñtoÙŸ,Y=žfIG™…ÓÔG«÷ó½ZÅR‹\W;`D9×¦Ò $xÿlD:EC “¡~f8ˆ$o«î”2Â^šwv÷nË5B– íY]ʇ6²í¶FȉfI¢dVKŸ}ú„e³²…&KÛ>Â^õHìiÊôE‚±¤®¢yöfÆl ²ŒwIùwwõã§4RŠ/¶K}Óµ©ÙFåcì?×ÎÁi¸ê¬ÆÐMßtYôGä¨ò±ò¡ä[»L(ÂQgx=h†vz~(®{\;.Ö4Ù3áüᛘ}ÉP,+¨”Eœ*zø[ëEncNÙu íyKŠ3-ÿ~!˜…38Jû¢E€áðg¦Vz‹ƒ€Ñ“™Nòj«šC·DLÔ[â‹ÃëÄ¢e†_ú¨l0ת¼©-E”Þ¢ kßêð¾3ç…:œ“à.{oXÏóß@½Ï1Þ<¼Lã6yæ)…ÛãÍ*=óÚ¤ˆüÿÌ2V۳ʩ©e¹ öï“ß/Ñ;rcŒÂ¾«!µZ5tTj†¨[XnŠ3, û ò_Vœ ¼¼gœ‘TÃÐ@u”¯‘úÅc“‡>>òô ²tŽk—ß„RÃàR†~¢µfæ4Џ7$2Ï{C¯®Èˆ¾ÈEáKVÌ›“T|l}Þ¼ز¶aö‘Çaß¶„ÆÎEÌî¬#‰ò0)Óý…ç÷˜zýw–êÀSëJR4¿Ñé"˜<ÔdS9™Í"Xpë,¦“ÅpÑ•œG}Ÿy !þ$+zX²À[Šûƒ ½%{€öOói“Ñ÷b´¬FôºêžÙ+3–þ³gS£¶MDW`Taéeë6è2¦ôÆPó_ÚŸW·Oþ{f<«¹yÞRµq‰V½íØÄdáÈcšäl¯2-Ê|²q™5é™öùR’¸Éû¥Î­w¾)«Çz–ô®Ç0~Ûä®#-©Ñ—›?öv©»Y™& ¢zVÊV/ïe{›2¾AiBs"y·†4û"4 Þ©}ÖÉ…žûÍ¢Ô:{6 HÚïëF‘†ö1ª¥?2á}†ê¯ûÐåVq+qô/*QÄ9”ôÿ[-òK8j¸‚±¸“›SLH7ã·zuel7ûšzê×9»L5ò( §¸½|Àå1îQéÅÄ Íîy3^”<ÎÏÙ×§Dþ"„ã½nعŒÌ„'AQ¨Ûÿ7F{FrõAÍIb¿ÍoR r7@ðS-Ed ;ºòÛö7™Yí9±«'î*¥ÇÓNw2·¿·_Ê¥ëµb/;§÷Kß´ºe$X ëöØZŒ-v^H#ÏNÛ'4ØSî `¯Í¹•Ľ¹ìÖÐ&{R‡ûص,â*ôÒµÞvw?°ý^Œ9²c8ãrØÅ«ŸoY-ì»”68¤òu¹r\÷¦Ø¡¹è¼úEReµKBpÌ ª”íÜo'#mëupAj!ì¹£…š¶nÎÉñ”ÐÛÐrBÃ÷í±¼ÇŠeE€Tô˜*¬ ]âc.ÿÒs¤ŸB¢¹Œ‘KB(¼©âNý»Q»‡7Î8WŸTо{–»cêÎþ¹#ýé¨@…êô¸UqïtÕ·vS¦wÜÑhùš}±·ûÃOþµSvÌ/{Áå-gw;[“ÚD%híC©íèCœæä¯Ý#s—>x­§`¾ÜNo†?p¶"Þ 1­(ÇŠ”bÓ²L¢U]Ë iÌ}9/ÑÉ(ߎKY)¤j°H€’ïèÂ,Èk0=iÜ;šLï]ƒôT$¢£-øÑ²½…=®û9oÊÂ5d¾œ™ˆ“`ËeBÙ:*~§Ó[ÿìB@£Õms•x½Ÿ¿£ZEE YbòhÇ®ùæ™53g ôæßRtc½ðÎï\Ýnø\*‰ËlnÒ£kýVÍXVx6iÿaá$E…9 boe®ºÊ1VÁÃ7Á fÊTe„}ó`:§`ƒ77ftæ‹Ú¸üWÊR|â<.LnöXbz Š1ÿ5è³æ×™Ãþk S© Q;¨ƒea{èvÖuj)Zåº!<}lNźNãõó‰oú3Ë}=Dc»×&°)l¼f—t½–®Õíè3®Ô(Ø2aŒIq 1>–ø¯»åê–¤w¦Œ™Åç¶<­£{B5èàEZ4çË!@¡Rzsˆ½v¤¬%ÿ0%M* ¶l<;+û\Íi¯áaC%¶U±mí"ᦕfÿñÎ4£±òø¹"‚pÒ$ _¥rOü¦éÄU_˜§CÖQ¬ÛÕ*î5G{ršTNÀrl¯Ãâ÷TuÎSø?è`ÑÝαÑ’/XWè ¡5ñ@“:À§b[tzP36?NÐ µMÊM» žÁ¤ )ñ!ñ9ÕlÆ6’د7°qiyDx[3´Ê—½Ëü×Ô÷Ú¿J£.Éñ$å Ä í}9~… Œa= Vƒ2¿ÊÞ‡þZÛoÌmbðb‚5_eÚþkxï`µuåŠw$Á5N4Î’a¢æG(f!×e“ÍüÜÊ. 2’­[ðÍKÝðzî½ï¯xÌÏïŠÞÎjôHëúÓATIjËEl\J¬ß‘9|ˆ?€Y0è;`~Hͼ—tÅÎÚgª(à§ìjä?–§øî2É+<™ÿ ›JÜïú?±Üº >#Aƒ+ ͉dMìür=¢†Tᤂ”9‚ÿûE÷ê óÈQÃfr†ú>dŽ«˜Óp}ãg;tŒ[B/˜Ä–íc QN7˜ný0‰aç ¬É5è ?…²Cœ«êÀDЇøÅ —¾ÑÿeŽ8¼/_Ò€÷Œ»l±óÜyPšLÞT›hxNÕL²TBîÅŸÔ4V”ê›Gv˜çæèž³J#N[go"lÃÙªc):Wc³ÿ€!öÛÔÞ]‹€c–ÏýÊ$×uÂüßIùœÉ7gŒãrEŽ;ù¡ 1çœç,~ßn0XtJ%Ç`€ÃÆÃŸìGQðL"Òzôáñ.tdL> ‘œ¥Ü{åʧ±ë×l’ EB^Wu+íÂtÜ´!—ÆŸŸb,NA9ý‡¸×îuïOÛu*÷Šd®:|ãSô›—Ò*2 ÆSÀAÒ#3–Ìÿè¢Àñ(SN \z¦î@“·ÙZÖWZžÜФ³áÒq%üûqn`ªÈ°0ÏÑT8ž™FêcÃgÍVÁ^½$ö¦›\øq™^¾R•:8½ÀÎèÀ+ iŸò" K$¬”nß_Ì&L,®JG’ÖX]È÷¦… 5 Â&&žå!t2üÈ ÇÕmtíñGüù[“mÚÍ*Ö/kÃäS’@ÓÀi–5Ó%kÙ3]øôÞ‘JÁuÀu`Ùð;#$[\â?ƒõˆ5ÿéœà7ÊÇù{cÈzrÑáª"œè¢¥¬ÇK›WÒóg û<²Àþþ´t+Ü¿ws4ĵú‹ð·ÙU0Ý9Ó%<2OõF»Š/CÊAˆ,úNäìROù;É Œ-,¶ÊÇ‘tûäWeX‚qÞfçŒ>˹Þ3¾-wEŸƒÇ:C)#ålp*¢¥’¿)ÍàH¦!ƒò‚– ¿bØ9˜\½¢w¹ÉЙÞ–Åó±“ÎÜe·ôú¤´»x·QFÝõ¹ôŸx_+s ÌØ*@xËÙº¯ÇTîîɈ·Dj20Z<¾]L†Oé‚hņ³(.K,ö·”­îÍ'éVÌG?oR>Lø>yÕKðª¢:Œ`ÅD”1í'|Áõ¬- ~@‚PÇüÂííÈ™C—-'Ë1ö'˜5‘7ÈýÝ×õ†n6`™°LAöŒïÿ[@úuë"Rn¿„ùö©ÓÃU‘´VòÃø]#ìdÔzûƒ SVÄ^ ¿•g”!=Âz:k’ž¨ŸvN?ÄNÙoÞ="WµÄQl B †¿NTFF[þüãã™Ø" Þ–Ê@®££ªfñéÊ"3ŸVÏAùµKû¨÷Øùäòx{®šØ@v\‰Óö©½Ë¨œÕVPËY¦ÒI0rž#!¸Ð§£ú¦×ea¯Aè©,Lû9¦ŽêJHáY0¹0Ö9mDe°áAõëÝ€ûÔðTÒ<ýyVÔ”Ú”CBЏŸÂhþãv“Fœ›Ôøf­HHžû,×Ôûú^—¨ó*‹Š5_þX7›T«?£‚V6ù½ ñÇÛÕüÇæ!qιçy3pCK±›Ïå‹l•|®tø$0ßÚq I|ñ´ÌÁq—9°¾už #q‘h{wâÅuè»2…±‹¥ÂP ù²k«D.9áõÄÛ­²u³$ŸjRíñL°™S²Ûÿ)ÝòÕqï‹–NVß÷‡å[¨! ›Û‰â°A¹ç3T*rèöÚZ <¬ì‘jµ¿´%q© C­”"'ûátžç1ŸÚÞ˜1Ô®ø'SÃtaÔg¡‡{Ǫ•ôäJ)¦w] ìÏ“˜ëûÄ/"Ø„s{í ½£{¼dq»ùû× ¤çúæ°ä_ €æá½Ì"oJ4ßcë!Á—œÿfe ×fin!¹¤ëOkÍ”gw 8ê‡lÓ=JÁÑ›¸.>Ý“Ùú`TPtóÎ’:.Ìääî)­6Р\e‚ÚæTCIËáÕ ødO®óvºÈìŽÏWDÞ¦W~äÊœG‚ß­€—xyQåð-š)ÕNGº‰Ê€Á“Jì~Úúw„½¹‰Ï ­¹Ñ¥}'ð„}Ò8É#*dmÁÓê‡[6B˜}w:G0ðÓZwUÊü¥¡Ì9Jóˆ…c$WvmºIr›»Þ›ük*j—"Ê êT:äVê@pĉNOÙ&Ÿ¿˜âc½[¨- ©¯=hÚ=Øk+'è#»M±ïÑBé»äæ"ì¸m ýo£†ƒoÊ£ôëpÛ,ß,ýN›lÅæ<Ò‡ŒCw˜[g"‚-ô‘ ˜3Þ}¿²"K—4q笷w@£§O®”¾9Ÿ0.Û%ª<å<߸ÂÓ«~gú,vÍf½Ìt4%rá;Hê((ÖŸò]Á”8™ú[6ýåUMC‹ü å‚ëÆ'QY5ª¥4¶ÁD[Ôžšt ìMC*b«à®­0Oiò¬ÖZë.L£gÂu]Ûµkzó®v×ϯÛ2Ôí–la#94ì9‡[GôúÓõ«`úТ5×/à£ô¿9rÇY$€ç¬¡¿SÌÆH³û­?9H??q!¢‘Y“í¾”÷öF~Ò*,¯äoM§]›âõTd8Þ–©ÂbþTæÇJC<3˹4,¾—Û™–Ô»ÏíãÐFë*¾¢â`í±^xQòŒ¯ÀWgMâ×%hÈ€`Ÿ ÆLtŸ–Ç!·C˜¯HWà·^×\z‚ ²ðøb”ÿS0xÆ„6-Èß%Þ>-í+D>2^Ô6øm‚6%Ô2äìy¨€dOC„ŒE¹’„Y¥©UÇôèîk:‘ã~áu ³uÉoõó]AQPGØ1ýÏ\ðû¾áֶɌźœxFýåÐxóÜ$(¨}3h~…w‡ºaúˆ[Þ~jOW}¾y(ø›Ôiõ E}U«j†#A÷@Ãv±œ6D“ä§ŸÐ3’úwø\šKöÛˆN_iŠR?àËPﯜh4Yƒ¬`¡•}¢ºÒÚ•µíYxÁ,hÆœcÒ+ú烺ë›.ÜÃcë²8}®Ê5,g)ô„jͲp³ * É×Çm9†Þ…¡^‡† áõ®¿G"ž8?ÕSY¥0èüñ Ú§Þ% Ö™/Ñ'”‹Äx’ta„ Bäq®ç¤p”¢òÑÀ< m¾Gãr–\}¸÷g³rFó=I+GQYÌ”sNUÒmùßÇO¾]‰óÉiÞDÉHþg @VØ%Ü‚y×¢c0ÅÏÏwc?m;äEií80qOö6#DP· õɶ´³'ÄüXí.Èg Þ껄ÃX6!Jyö5{Ž¥Q€ÍáärŽ07QuÂØÅ=㺠†=¯|5êûÀ¼™NþÚ\?9HæãµŽèÆ]åöoÌkc³ö‚Z}OúVôŒ‡¹ÅHÍÆ,| åEý¾üÑ'¶°bˆ¼0ähÞ6Äô~@zÙ—´é²Ê~n3¶ªÕ7dú¿=)žÇ|¥Kê—j1ÏVÍ÷õT¼ Yº8¢OlIöë7~ŠôVVQW¡J!è(4e†”ÜŽZCyB•=1÷õ‘©Õ¥ÍÜÀ÷sùN ¨fô6â…ð; [ú«ŒXTÆîœb$Þì¤];0Îå’}‘jÏq}é5ŒŒÑüMÈ®±É÷˜H¹í¢×†škªèÕíqZcß¹ ^KŽm¡Å_(ÉãH¸1×ßY6YsH]RܵŸa\Eãजͱ£€ÉÏ!ð‹»ü–'B™€áÐï×ªÆØž_6ƒfM‹ªTîzD{ `ŒGthnÀw¶Ž¿ŠöžÑÕb›]Â<Æ^IŠv*¾Aw Ù̹¥%V³aSx’,–.>ÏÃ?%ÛW·p ó„H±^ÖJÕ]“özìŠ#¸YR¿;ª¾¬ÃSøRmúß!þ–^¾j˜x´t $Úeìt<#«¬wMGѬ#öy¾FÊ »Ú:£wÞ\%I)"7ã©j„Z¸Ìo‹ó9ÃŽ’Â.ø5duÐÖ-2Èî‘1Ì}”ŠuìÜpJLN'%å·wTõÕcMªb {A ²º·¬§wÐ’+uß÷5åŸ;½Måç•ãF·D»y¿<¼û±K˜–‡ÚÕr7ÝÍ¥î®ò÷gé>îË>ÚËü¿éÀ+ÇO^qÚü‚û>3ç"FeèíÍý+ác»]MÞ×mÜ©Ët|rÂ8Nl­†eÏtwï3…š›=©7ZáW˜Æš#@Á‹$»ÌXòºÿ9‹JÍR?èYõ!åÎ5î~=¼ì„¦ün?·åÍï; å’çrÚõ·Ýîë²:/:šÃA nzjNãÚœ6ýæ`ÂGïèã˜ærÃÈžXé¨zwk°°e&õ;>wi†­§Áúúh¥ü#Ó¥èÿŒ—.…½jty.k¸S"Úž5* "¦ S£{P¿¨µ0¿>‚Óé‘Ðúœåðòc'jÌ]?õ”a$·æ~Z¶ô%9ç]Nä˜×pëGÞ[…1ãTm*Kj.èÛ[“ék'¨›†l¦í‡NˆûÞ æ£ ’Ì!—BjˆŽïÓ}~›ìŒØ@+»ºâ ½É_H@+¶«š)8ÂÊð`“î djÔ°ŠÍA¡9%3‘€iÿ¬ñCTÝ Œìø‡óâU½íÁެzèÕB@ÉmµgSp“ük§5Vü—ÝD˜×ïì°I{›êZÇÆªŠè7/’| ¥æÔEt±x7­£i¶MkIXŒÝ’Ìf*‚Ût²Jxµ)R!ŸÖT,¹ Ü\*q‚lô¢EÕ,çÙ×D±R”.:¦>\ú…dü¡´ëÙÃkm~ÜÊC3ÙþoÏu\!×­â¿ÄcÉm¯*„VŽx"I²îË?³ÁÖ~€ë^M˜ÓXˆêôó@õMJ¸ zøwõ“¢.©§Ö@m줒[®ø´ÉÓÎws ޽ZNWo²GpæÏÊ¥”˜ªÁÀlÆáÇ*¿§Yñ—û5ìüáî¢Óÿf±ñ<ÅØ>…ÈiwVŠB ~t߬ ‰ûoÆ‚Ñ/Ò-Ÿþµ2Ò5©1Ì™-’QžœóãwR*®£¾_Ô×~Qƒ²èüóíAë÷l¦J).¡Z]ñóݾÅ}N¸‘ ëÜ)'Œð;föŒYzx2”I‡"È º/ïçž´o_÷6. S#ë§ïaÇ/í;»QQý/¤ÿý.ù¹Àµ)5Iy×ôƒ'•ô¼ Ú©>jHxÓžg†=<ëÄŽß³ÏÜøüÌ9ÛW<ÙøÓ¥§tœ°é6!v h(=ÍÊu]®_%)cbÍt­IQ,[ƒöv¾¹y“Gdˆ}÷#Ÿ™žz ‰Ȫ#øA€&m­ß ß²ãúOã³ º»¡‘î:“ ëN¶HK-Þά’c]ö}ežB/9™r^q¥Œðä „ã7ÌFðt—(ã¹°ÀG…ÓèѪŒŠèµy8š·ºs³))úµ¡C¸ÝΧ}í§rËnÓÂO@Z¨¼ýÜ/‘¥¿ˆSc­WfY”œ{^ î p«Î u§¡aèEF^aH 3²>ðjú"LÏøX|?Ù•_ñ¶m¿¡ÒŽl¹ ¥bK­õÁúÐZóä@¯7¸ËÅ`ì GØ6nÌÞÆbjϘêCuõÌu<8üJ›J2åÿ`™èaè,è=2é”ù‘ÔgÊÆN.(™/ˆejü×7|@¼®?ðI{?èiöEÝgÎÏvêGÙHþ¡­¹Ç“ýã•KÆ+ãvÖ2bPZ([ð/W^u×ñJýŒªÄ*åÒ}ÅÅt»«éžÍFðÞx1Û)¾C Ü /a³œcRÒI¦°d Úï'å”9¥ â{Ô÷C.vÃB²,û§ÑÎâ÷þ23:e Tœ}Ò·Rv²›ˆ7{ñÙ(lÏ`¸I­S"DZöÅtÄ:^-žØëŽC?bYìX@ ±ûÁ÷ŽÞäI¡SÏåÞ.¹ç{ðVÛ¥×ûÓ:ë¥Äî\T ŒjÎ4»¤Ù€…‚UJ–S{à}¨0…™\þÄ¿¢‚=’è·PõoavüçAp‘k$··k´Áü³Ë°QÛôF®×k~¾ó£Iãe‘~çôÓyMá_¹ø°»ÙsÅ™´—I&:À=*¤×bØÏáÓj=å.„¸ò ƒÃϘ¤ 0*j”ÐN›Ä åvôß}Îí8ÓÆ'þãvïõ:³Ý™ ¶mßEcžÑFËR„›$.™.‘ÞÚí¥Ú:'¼®^ZýÅ ï!Áã+§÷pËÙèn½qñgÑîϳL–²Ët¬:\Êý{Ï¢Õ?‰V|Éúé Æ·^[ßHÿË-{æ»×°óÀ4 ®úÎeà`6c;úÂ\2ÃÚ"pºº#ãÅÄd™Å½w„íƒé¥FÛµÏØ)^b…À0EÄžÙ …]/ÓÝK ­ b¯‹eg&6éáÖÉÔ]Á¼ô«„³X¿é^›Œ§¹ªœhã²>³FÞZÁL‰ãÞòÃ'´^/0£"“Ë­¯¤´§œTñ+‘:¢¨éऎmö´–¹‹Àõ*½¼Y¦õ§<Ìðq¾4²SeãígŸØsW*}Âifúª#XƒWO~öæÑAÃn“Ùç@%À¢Áüœ+ÓŸ–% ñHjI¤µñÞûc ÷ãO­2zÃÑ\–Ñóµ;jy%IɆ£Ùv›%’xöö¤Ùú)Éå_çW#—'z‰=M…ËI«N’}-ÂGˆºS9÷cQ{äÅ7ß¼üHÃç™’Ü]…ˆÚ…2o指=@Q…–6?{ãăÄVÙ ÷wù~̉â]¨Ê.B­ÅÓò»[~°®Ïý&ˆ<ˆFèB³VžÒñ‹6é=,ûª/f£Îx6ƒ/kœ«WB®jÙY¬±(5a¡Ûò”;@·Ÿk—q´ ÙdÄèy;ú<.êÓõÐz&n;p)ó½G•sÎ3dIKúS5Iþ`h~­çf"¶õœÙëtm»Yãš'Ú=oÉìHO/¯:ÉtG:zâuË-'!=ß^‹y;Ýïiõž¶ÚŠ‚‘º €âžIÔM4»·3t3úfu*Ê[Ï•$þzKÏO P®ZmÐ?Ðð—p4\\T(•ˆ;’†Ðìõcšö[È!¡g9ö·›YÉWU¦¶ –è ÝQ¢æ-6ú¥ãkèûüR³‰žNËc®¸bünK^í¾{(ÐçBßW“lÏÀ_øàØ:ÁΧ¥IŽ”¤ˆiƒÓf&wâUv ŠÞ€¢0º7 «Ê+ª ÷ã¦|eÖ)[IëɾÅÓÆÌo¼² "¹Ù®øuâ$½¹ÚS9·Ç݆-àƒûA¸C¡qÔ謴l࡟žÐø< =Ê—ñµeõø kOŠíËâæ‘¼Ÿº9›‹d+$hR½oÁÌmôÊo—_žwc)ì ŸÛeYúÉ –g‡Q’¯á½‹ä9¨›eŠzâÝéàëØ"z Méðp Æ Aè9sšˆ.í7uõבA,÷?‘Ãnµ¶t4LäwnNlÃÊ«Ôç Ûû}œŠúÇá}s—™ÙÊtl4ÀH7gVÄMÌ”0Ädô’ Ã÷ˆ«Ñƒ©ÈϨËâÛ4ça¶*,ÈuÓf§÷K}7ÌíÀ2,q’¨¿È礀©ÉŒ„ŠÁ?+±W£ïMwMÕx‹qí§åÑ!{÷¨"¾‡ÁS.›~H9¸?ª† 1é¡Ôè¹E=Í_úC†WpåøÚA’ Ù=kÆêÖ'{6×@Sä¬d{¬H·Ò“‡C.qÛd™rá6BâÚdÚ®Oø…nwêÇ.Ë#k+(AãôjTqÚÕ¢³Ÿø>×1ùr d6=Rÿ”ÛúaüEo÷#&ÞÁ;ËÉ, D`„á+Ÿ²ü´ø¬:#²^K±/ð;Qxmô„gfJån7ÙŒGÜïW´ªƒŽ=agð¾lÜ3ïY œjØÞ±ý–Ðßèu!RéÎ-9,óÍ#â¢3™*²¹§‚ù<ŠL\%'èÜêIï—Åcõ¸CVpäŒ[0>z;F¦>ÿý>ct)u:ƒWµÏ¿PUöÖCM¢•©²{”È*p¼ˆwjÛ¦íR@ ©6»WÙ£º´ =‚Žâ?”×[ÇGy×]K°ïÝv¢m¾Î˦rcÜŸÔôL}£i–>.ÝÛ㢆©]w<®×©«þÃÕ(gÝ€«€~ƒ¯ ¬v2³Ý8@9œo@Ú)ß2û–‹Ñ…Ë…#ªK¥¬?˜«×*¯âÕqÜ6¼M¬ÊÒ§Ý•À’qþÙò¦„íßy;¢wuJ2è*2E¨b cýW½JŸ¢¢ÛX݉g7‰.F!úbÂû/^#ã}ך,™v»‡z„|eªóSï,5NfÙŽ1ß«¸¨¥£Yøà<‘Ú¡.í GQ~*Î… ?Z•âƒ^¦ó`9ÆHäù<Øòn>Ê®l(é<’> ˆži»œŒ œ.òyÔ7·¾q§BÃÜ'ŒcÐË Ì¢D­±cŸ'1£Ö±'ŽÎá[_dº‡ÒÈÇæŠs[?ƒi¬ ³›/^بyš»Ÿ‚NWv$”ù”—Í…ý“‚­_¡rl¡r®¼›ãÇê^QêI•Ø<èºoYìX14t¨³žîʸ6.›»þwûÙí„Äf6>WYB»”)F€cEÆÅŠ?\â¸èÜ*Uýÿ~À!kÂd4G¡;⊷þÀÒÜ>i-Ï[g“{»Ô9íž7W|—*UÑï9šòTIÓ%bÜ÷½Õ´è,pA€ËYáE-Å,F7LÝå¶ýnmû²2¤Lñ\4hÚJ-~Ÿ3Ÿnbû<œúÞ¹L[y7Êœ±J‹ëöÚeý¦òPW+HCžH{ZOZ¹áÌ HЧž`Áçs×S¬âtwû¨„lŠ¥=º`æOÔ¶9 _\¹.n¸Ö€±8óÉLŸÇÍÕðëb%@ÍIåÀp"(dÉ2ªbcœ‘ ÄuäTdJëIë:¾G3è¹ RûH)¾Ž³{ŽÞK^Ňª9$±ó{Õ93ÜeR,ž…?W¬*‹%ýEŸô¨e‚Š—át»u7iÚÜ­Aêùèá«rZ9¬ˆ¬š"Áã^ÒÿVuæÎaðnÅ–RÞôwæËz/†‚­“þ©¦ª=•Èõ^Á¸Æ~U6í&Yæî&*8Rv¼†"—)ga5áô^ï|ÚéGTèžIuóúWºÓÝq ¬|áGŽ—ê·¿™ñ‘hïœïÙžFÛÇ<´çÌávus+;þ~uBêîåx{!¸v9,Eðþéu:ßó<úœûÙ@¿Ú¹/3#Ú\‡_þeæIÑ2"s™)’>ÞÈEâ¥M«aš·ãå}è®ùoŠŸdö©¤û}{æÙ|Õ8szåVEÜ^B»¥4™YSOÁÌmFuÎÞæ"渮CÖc‹ q“ò¦qŸPŸ¹hIÙŽã_íŸÃ­køQî‚õ›'W–~úåÑ5ñ¿çáªÅ7&—O1±g ÉðÉÞLe•ӌƵüßBÔëóZ²™ä€ë¦ Ö7;_uÿ·;‡Ú”ÅãŽm¨¸ÿVi¼P].f ëé¦×¶)lœÒëÏR2‘v½òÏÍ)Wo|ŽøÖG»_ÔúJÂ.F–ÿ4`ÂñÕp`HaZìuÙ9æOª'™)MEìgÌQ¾ÛU7ÍË+_ì>ù”Fâ‘Vóº°‹ž×ÎPåRjlrzԶçq·$<ªîé"ú­Ëô`ù¸RÄHúœxJmkÁ||£LÚíCø¡ŠÛ/fô̧­wë·¡’|Ë6ê&õ. ôæuûó…?ÈmÔK¿A"©m‹Ó,ÂêcSární­":ñg|ÚóÇÒ¼i¥Î¼(»T÷þ¾,ïý`!l½jZ {¶Î}®^EÆó dVýÓXá±oê>Û±¶¡ûÙŸíÙ-ïOá,ÙDweÖH<îÄ5!á£ùf„XåÊTH%zúc»:¢¶®W7¬œsÛõëWè3o·W¨P_Ëðäè’ªëhÉ,Ò5¯vʽTõɶY³ž.¿wöª?(ãþ—¿2ãïˆÕ'_ZIOc]¹Õôbß% Ï–êZö¶¯˜,!î· Q)Âv=¹ò%?t\ñnHÙƒ,»úž>Ì¥?Abùùx§èZ &ÇŽÌT1‰Y;Øã±{Ât'Ë¿óGZC#áö®"N{PBL˜~Y¤7`B˜{¡Ž[?uTŒÅ1‘™¤`sƒÚÍ•De.Í“„CúÓÉXWG²UÚ›lÆ_í˜ÌåB ”Â88Vgãs…›—Ú½6M3I k?®Ìœx™ìÀªø?ÎÝ}ÎuŽÖ>yeàCLß~±~%ˆ²6Ef½,lMž[Þ¢”wR’Õî\ê-‰Óó:®£æû­Ñ}/8³y v4ì*Iñßt»h¬1w,ÊoÞ-Ntq€2%kõø½*Cx1q ›7{LÊ1šé0Ú/·$1ñsºú ¯ønmÚ.™C97&Ä^A*lÖ”5ðiV»]KÔ>Uº<8È—C|ûu9ÅCoÕác9cf8ßü$U¤ò2úÄt­…Èx­þ ®«¸›ô¯ u':•“´™2YB©h%£âYy…•èê“ݰõ='뉶‡‘¿û­àÖ´ã°g™xˆx÷Û2¦R„{J8ap]Æi”P½XŸM‚Óëü@;{|·TG6ÈôˆüVvømT ùÓiÜO”MüÆëa6l—fºLÁÔÚŸ…¾ªF̡ƔÜÝ4£]_~!‰ÌÄÖRJÕåÎ%Cù‘ÝgþÐ@+.ˆß´o¢vKQN';½3¨9U‚¥aËô Á·;yfö÷§Ñ½xe˜Ù˜0K·,L>å»{V¹9?‡“'§Ë'ó5µ¹} |a[§ØžÆE9ör»]òš|&æ†ì®MÚO^N»MWß% Ò;û~IjN…´ÏÅü¯û?O RÁ„Ý6ëÄZreh> ô¾¸|öŒÛ´"8w¸¼ä!ö¬à /?öKKdÍ íoÂã'ë=>»›Aåž÷'f»ÌB*2ÅÏGnà—0¨g³ãY#ep8¢x‹¡M>¿®Êç!Úrñ#<ÿqt“úê1¬™Åar÷ {ÐqA×^‘F4þ}¯–¨îšÂ;«ä”ÃOåFpîYëÈÇX¬aíÀè(Íkmü¡‡ªs‚«ÇF¼‡xnþÕʼnæ/È繘„çµ=xéJ…© •í-=Q3d-É­òòá|Ííx·=(¯âŸ1q'3du9U–ÅMã!cïbßSˆÚŽ{ñ¯A:¬¯Ú6‚÷ìAæ¬óOÒ¨™w/?¸ÝXT?Fþ±XŸO5í価œÆ(š¤/ >ìÜ­¥°êß÷A“šŒ@_&ùBë3¼Í¹ÎlUìéÉÆ¼ÓÈL“´¸ËýTºA°í‘KjVÑÕbwHC_$þ³Žn>Û¶éa1voÏ›ìiËTþ*Å¢t€‚õD÷n_ì^ÆU&« vçõûÚûºÑ$:/2Ù·GgÜõrÏ[âvÍæ,QGü nÂá`Êåí¸¸ _(µ]6 ljЦ³>°`›%ö6¸™­/›¢$ɽ˜{$jõBœ|nŠC‘a±˜Þµ‚œÞ€E¶ï‰b;ŽI_‚"O4%ùÛ¤œ[oÅC¡A¥Õ£ÙçíHÙ@sÏy%ÿ4JRÛ×z9¢“©Âö«\ÑM×0e ¬$ѸðMaøß{'›Qðéc5‹×d€/›ºJÓ½¼uíV~Ìä_ P+~NóúH{¶Ô¬ÔY»R»®/Z¶BlÞAåVb>2jºþ?](9Ýõ|FFÎbÑqÅû7Âë Õâ3}|šdê ³ó3%¾&^ÀEFüù0Ëî¸\:Åߤü]×­Uá")±JGȦò]Ê*W°Ëî6¸ÑÏ{§#~kÕ¤[¼X^²ÃŸÜX·‡öpÂí&r1 °ýc÷ôzØYDúÀ¨j2àaKkxÓ+/¢;-lÐuxßÑþRÅ{O“>aغ{—azûhË¡ªã­3uè fá1“a¹kà‰l+ŸÐ€k,Ç‹'qƒ€¿Ñ,“;Fg2yWµãíÿb ì_'ΈöPÿ{йn±/=¦Hgw„fÇm®­‘—y†/S‰²X7’>5÷û4ÊšÈËÅ{§ª‚&Kw*d y¯Êiȃ佯ݹÄb®,íQêÍZäß×ûÉKµÛ¸A/¿âø0yknuU´'ãU3Nt}°Ñi•·§±±N?ðH ÅI“FÊž[ÃUì®ï*†]rùЋç™H–©Éx?Gg¸&£Cúh(ÁÂ=)î¿~y?E˜±ØY>,¨¦ó}Jõ®òM”R×*=ï<;“öI5Sî%tJÿžL~!sFí¥zÆã=Ïæ æyø H §_2ê´«£z®áµgÆ^‘•‚”häë…Ãþ¨±‚C£`@%íÐËeÚ0çÔà¨î¸ª÷XÖµZEeÙ÷_0ÖkÆæ=ï–Vá=‘²H©¿XÎ*(¯e±)‚ޝë=ê¤O~zø\”7-=öÆmã?o…°$Tþ®½õcB±š³ÍXëxo›»z7–êÕÒ$áˆh'U9E%…vóŒ¤sÙË›©—ŽØE$Ñàºïý‘kÚõhùj5ΞÉ)Ýïeåoà{“®¡õàµÆlc-Á÷í­áá?ŸDÞΨ’Ø©™V— dA¡bu·øçðã¾kŠ‹ŸÍÌ·éÓOPtß³vG¼²lç£Ö’†C6î8Ï­C§Ð33”±2Þx–°•ØwfCéÌ,ÄÔqQ¨:çä½?ï+ÁFŸ/¯™æfæ|ø˜Òéts˦·í®3JI;áÙöy}5µ_;ÝzóW&iÈäK˜›d¤95Ö@4د—ÅB×ýÿNŸ÷ÓŸ´SJâÃüÇVpÒ¼[>ÉÞI¼†Æ [šäU¼>ðmµCÞ5±n"¨v¬îEKXæòýÂuQºŠuA;¨kBÝ[yLœ ë‰ö†žüý-Oë_t²G’u¦Å¼O÷]¯ÄWßß=,_ÀxøŸY¡|è]»ü¢h+’C:\~ |Ne¢òôÖS§x©l˜]Q«–s³‰Agâ)êùå<%Èþ(´š"ɼnS›´‹®vÂ7Ï…H©ÃS="ÈTÃMðË¥X@ÍÂÂôÈM}ÜXÔáä¦ß aÌúY‹ápåA=zcv]\Æ]ÿL@†W¬èb×ý’FgIˆÕ–ç!|“<•¹ðÁ©\-xL¯á‹®Íœf•Ùcš3Z`ãH&eÑS-ÀÒ=B6ª¡%'±=oëž<÷¶zû°o¸µ)ÝÛOe¥_–7ߦ×ïÍ­ÒÙ‡ûOìt'úgê•ÝMY2ò¿Âïyæ{’P°»]^p‘®JƲQM6tçÎ9³5¸6ñ³%ôKö—Ûe™ÿïðÀ´“c´v;#;óÜ>½7'¢C]\f*ŒÈÔÇ 8Ü%I‚–’Uî©·”ù¥ê±÷&ôû1š=¿ìÄtáݤÅFUê𥌓úß=*ÓÃÒªÕZcá!]ц£;é³mtZ;¬Œ†APEúQ”úW›ÚŸ-"' s¶•ÔL³T­éwµ$oí¯;uýßÜ9‘Ä.'¨´ãÙ0©!¶§Ø»¥D²èç¤Ã£iy“cËJ†“ïm©w޾ÿ|ú}0AÀ—ˆáÍD„b„-Ùs¡T1‰C„y\“_Fb¸éZž•gÑŸ4yw¾Ù7«íY¼ÆßéÁoëo"?WüÓ¢ÑÖJÎ| '¨@ DéåÞ”¤p9/¥ï“Bª[Mì÷pùA‚Cù@Ï˨œ\‘hÍUÝdÕÆ½ *““¸àm u½K¯[מkq.~‰:˜O¨ÎV,KW½Ú=5ÄüöYû˜=êKe}…OáVil²~UZÝ N—y¥]eEn_.#Gª`³üÏ›Áþ&YHõh>W½¬ˆÕhßkR–ãd(þ/ûswËPÇÚ+Z@jX!êì‘ÞÇíÕ|NÄ–q §·E¯#ž-¯×¡ÚÉšv_û/1HÊ‘œì6ýVƒ=)? ‘‰b¢¨Óé*æê5¼}SH2$œŠQïm£=hNéìà ÎqŠo·:¨´â­¦Óo1Eî?“ícò]¾ó뢙r……qï+$>2v•o¹{æ·-C;>ý{Ëâ2· Éw4‘åxŸ&8©é|¬6R¿©Ó}ð÷B’ ´†tOßUœÉåÑ×¥í²Q}²ÚHáQ+Z#b)ûÈvj)‚æà“ƒƒN¢ÍV£¤½/ÏÆj¡o¯Áò„â-–è,èÙ]».X÷˜µù·>fИ֥¶”%GÄ>k ùF°ôDévë´1³òz”d#]®KAXk´hÏY?Î{ÂPá á¦uyÔž²§ÕôÕù†%—Ì3üË?ç#Lgk»®—Ìv™wñm)ä<“ÁÔ[¦®£8ÏØL !=~C[ê`<óÐå‰å(s}þ¾øêªÑ(˜“›IBÝ´yÔó«BmëX*¶FQÚT }2­L¹úwj‰N®N¼ãE2Z^ÏbLÊJÎ4zR³C}›ÜkõGlÂ]>¿¿oy‰ß3[‰J^WìÕKíì«&ŠÛÆ ç³91 5ùm¿òu®4}/ز2ƒ_>ÚÜJ3'g˜£çÈ“á?ƒ2µ¢H§  ÷I'";Å 6–ÐKuòJR‘M5Ü:Zâ7. Û»®~%oB+>°YÒÒ;9“‚~‡R5êÿhƒÌ⃙Qµò4–Ͼ°MW³s:2¹Øó~næ² ïÚU­mq‚‰Éõ“#W¤E‚ˆ#}ÞÓÀô†²‰Æ<Åûš$=Ï÷'½§>&¥ ﵌kci’ZfôÊÊ¥r¤)Y¥@¼â­¡¶Hj(xÛÿvwm÷x›Aí+5?[¦#öá ÍæùBY;Ýõ¥Tùp·µT:ÊÒ^«Aű!`!ØU^”àDï9 ºªV㾡)w~o¨x¿‘Û‘ãy÷ŸÃº€OâV¸‹Úù>Îj>“-}øÊï¾Üú 3RazmrÍÃ~åQ-ôƒƒ\õZb[gøÊÏfeµDB“ÂõÃ/ÞÀÅœÏæSÀÇùô‹±J9ý/‡«ÕWEößs†*S‘åcX†n ä–‰¡¼©Dhð½ë÷·pÑÆ§FG/ò•ž†Quá¥é]Nð*n’{³îºþKÕw2¿]‹ÜÀm‚./Dˆ F›¥cIƒHm /Фd†Ëךí<¨v&`¼½r2‘¦I¯·+4rRNîzÑÜ#ö¾Ó~¡I_€9î"ïØ—%ŒÛÊó|¤[ìUa?Vd¾ÜдXJÑÒûfä76¼:ÉiÄëä%|)ãã–Mr>L“T±(ºp =]ïåWæ»IŒ±çõ÷+¿jŽlùh°sSÀŸy–‚šõ^ œp…#byà51žÂš—::(¿òrß?oØ*/?Ÿ--†Dd‹Ÿ²­åp?`ê)éZ¤Ñ}ÏùŒuL!ï8ûz'Œ‚wægòPï¼ofåʶmc}Ñê̸é—døŠî¾ïŠzøïø±Úü–‡qs·ÐôpÆl í~ܱÌ—(D²¥‡Vå©~þùŒž—Þ-”œx¡Vʾ”ž*}m•&x×~ÿÞ·^··rw-`‰åìwÊ`S6 Iux*M qiÖT¤Ì%ô|Ñço{Ç/ÇáÌÛv:ïQsMË:ª Æò‘µ~{×ûÑ&$nÜ—×8¼SVd¨P¹™ÖÖn"é’ÊÍÇCx7¢-+²/­?-«ž:’>²´¥(D”\îÇ¥2KófÍ—^æâêÊ«&ürR{yó^ÓiËàî(66n”ø>X««kŽŠæáXJŸ|/L)æiWÜ=x·ÃWTMl¼7Ü´NX)Ö£vdº×£ÖÔâô^r?r`Ù_ßÒò€I·pÐ%«…?£”"¦ ûé:-£ù(¶ £7t¾ß•¸Û\Ê´Ë©ëÓ¯íëœas»ü°ì-ÖÈoËZŸ«¢/©üD]zCq¬ž<æ–˜ª@¯sMÛ© ·G\·1ª–uƒÐ;ä£ñ Çd:]«D½Fjˆ‡…ˆq†Â¦çô'¥SCeè4¢¶þ,¤÷O…Â"“ïÇ® h:ÏìÑˬ[Líg ]õãް te+\ñOZšdÜXò,:“ŸN& "Ü#EìÙû~ת~ÿ47±›ì¿²Ðš~‡%2Q¬s Ëyù1jå„Ëã?èxIÄ6y/Ý{CŸj=`Š×òý”E¢\Z·ÎlíÁ´Ñ_ò0{êIvŸ:qk2Ì¿TÉåSe$…µüΗciqníGÇ“íÞ›g7±‡ì~ª¥°­ ¼"‘\)÷‡÷BÁ’2áÚá¤×áúäç’x‰l©Ÿ,Oj33>Ùõ0'm>º‚‹=„ Oœl2ncû&|”~@ÛB÷ß-Bqí)”k=£_q V_Ÿù·q±Ó„ÉåÞÆk¢¯ ÞÔÊö‚Ö¥ê iûõVñ!DÔx2t:wèiìâ×<©Þ1î¾yœ~9£Í(;Ì hF Ÿn8ŽXMå´éÛÏ…êž6΂²ÎT27»œ5†âøï â,Fë)¾>"ý¨(„1bë½ËŸÁì‹ýÏKÖYg‹ n y¡6Ö[7»•^Hñ®ÂÁ±L[kK‡'ctI‡ï…qì­NýÎ "k¾IëÇhˆínu‚d«öÎe®¥qÜtGânŽw½ë,_[Ÿæ!e¬#oƒz=£˜¶0麖MKåj÷hBÐJ“JíC4¾eؙéÃùbÔ¢¼Šëfª"Â\gɈ1‹¿âÛÇß/O«ZѺ³Oc„y'TÕšÊ&¢ºÿŽ/òðöïùb÷”ÂTÏÌÿ"èE™@vfœ°ØwXA¥.ƒ°GŒ}Iº¼p¸ÓÅÎI¯æ&CbŠ·r,‘QÜ-?³ kQ ñ¶o½–¯ºç6•IÓ'«0χÏ}Þݹtj¹eySj±‰@[Êkö(á}—÷¼ôrˆ%’MlØkžer#]è/SŒ†ëN…ĤÃ&á^mëc‚›/èuŒiãêæ»$ê ¾Sw¨VÙrBëf Œ)j‚4®¥â«Y¦Õ'›z?¹½»W“ÓJW!ÍJ\ü1¾a>ç˜Í6š 9ø^vã{C“D«z­³„¹Œ¨+ò‰¨çn2{5;Ù*4*ºŠW1ñÑ 6 ÿ¯åtçŒÍTÔšó«8ÄÄwú¿¸"qçË’R EJ~›JDÓ(µ`dí󋨤w@=*6I‹üâ,ç‡IÕ&z‹Ý^µphœ¥ßPãä,CB¿9•.[ö¹˜-ï¨÷ß,–,Ñ;]#Î'|ôÿ~ ¦^Û®næL§¶¤‹Tö+sýu‰ßÐ8I"¶½…wu„o¹Ñî”ÅÎJVq†6à ?[=½tÝQ&Y̧ÿå6µ…!)Nq>´0MÜæØÜžf“×»ºŠ_XåP³ÇîiVºNGXìS¢:nŽü¡cSÊ­–9æDÓŒ—ŸÖ/;â÷ÞdD»¨{åôbÚ”?K’ÂècÿK7¿tàëŒð R0V¦tK(‘Þ/èÆ7dNÖ3*JJ_8¤®ªþÝÑÉ{öˆÂº;Fx§\Tï/…"‡+:éÈmã]"Im !(Ïeóýò?ò›s½@âeg³+ùš£­8$ÀeÞÕÇF›®\ƒ°¶ŸîƒÃÙ@>~÷¸Éž3F­‰ H´ƒíðJx“÷£ç8ÅYÏqmvmٵƄSíëöò¯ß­²)ÑuWrÿs¿Q·[¥ž¤Çq`ÉqO%§)3‹-̼=‰õ G>W¾BéÃ÷RûÞ¤þ.¼}Z¤ÉßÍkM×)á/ŒM1¸f1J…Ü×#ò4ú<ámb¸E`ídi3.¡;UÛܹ8MXäa‹üãeÝŸ¹YpK]éFs· »ÿ:YH£ì¯’]d›—Uš?K õ5be8Ö9/ƒOzî5ÍZŽþ,§MÇ÷׋û¨Ž:yÚXÙñš’|„f$á÷ÝD~ý¸­•ßÅÕô5M~¸‰!~ §V¹ªË¯Ô…o¹sR+%‹E߬x²Å צ¥a©²’íD°&N‰þ8.ƒ‹´Õ…ì~nîÖÿ§F׿Œ¼G¦lâgKö>H—cž9°¾­ÿá+º ¯/žŒ¿†7T)|Ï.9-|s6†þ;½(È/w>–4†Šûû!Ç{¤QêwéÜï3ógsÊÚŸÍIÛ=²ûHÕ4Jª@;õ×wI¾ý3ÕÚ~@`¹&¼ FåY›œÛ¥ùAAŠÌ²„‚A„—‹ºœbZÔ'Ÿ×/[óPoÜlÖ–vîßæØWžj&Ãx!–MÚrâ )¬™ÃÛq/{§‡9D3¹æ)N“‡æI~„+X‘ýY>§øCøßÑŒ|¤HÄR‹ à<ôô‚ÐÍwºèŒ*©­Öæj‘Ùä36 õÙž¸Õ£IÓ”š´Ö})òiJ‘‚ºå;÷®ù0óöLsPèÜ ›øë¥¡ÈýEÙujlþjÄ ó«²êÝK$Tîû¾&ßé÷ôbóÅ"ö7eÃõÙžÆQè<ÿÙŒŽ:Ñ©G—‰UTgÊbÏŽ ”ÕpïÑ (NßYÏ)÷†‰;Õž#0&R Cypó<ÓÜCïëAìÒysN—·W{&0í2;Ø’š«c¡¸Õ™|ô2oòMç{vE¹‹bì `]Šr¢}—X?ÏÏi:”ŒlŸbØE‹±'·E[à­j^¹ M^’B Ü×ì5Æ*­Çðøšš‘ ˜n~°Öꑎ?ÈŠŠ‚J–­Ê€¯iüß×cj_¾¨™”Ôc„q¤'ü£}¢WÄ^ÚíO`ç•56@x¨vi¨$ùðÌ,ì?¤íu6tÝeÌ4UìçÑ´r´lg“2ö~Ãé%;l6¯ò‘hïø >ü½]¦þÍ%u´ø‹Ê²l5™ˆ)Îêî¼íÇc<æÉyÚMo®a•1~Ô/uQË6?Uz`t”U“?§TA}ž­ï­¥‘Cj³ªò÷µ‘¢&FùKÆøü2(—é¥óê»ôôú0ÉøUpÙxQQ¼ÿœ(‡Ku§"v^ „x¬ÉÁ =NÉGóòPÿÛ«OȪ† zì]P òŠêÞ-ÿP‡ô¥L[§Åsõ(–Wºôáó+óGˆuuQUçzA»©ÑzSô™Ýû„VnŸÕš,O‘óÜBð˜åþÙÅÝ.u¼øS[±ÚŸ'þPï²·ª|ù¦Xµ)ZóY ‰"½r´"Æ¿+‘ÿ<«2ÑÿÞZ çüEö?ä"NFŒRÝ3ý¡¶ªv-g3ö®£Ë ªv¶sé[çª/1)±õz¹.R·XòŽoµä—V˜=(?äÈ|2Î tÞ ÚYÊ"u’•5ý$5ï¶J<ÝîR£[*⵸#]\¸J¦9=ÎyT-¼ݼÓ31ô¹+¶Õ§¶6:4Úþ$©y{£±ëÏUJý³†X7oß»¾u±Eí7D—NÍÛ`ã[äüÆ]„ˆîJ :tó~¿ï9t=BåíÍ\¬ºA~ºwÊš_iÐm0çßÿ< žÒ}Øuˆ8©ˆ±¼•™ÿª-Æó8¢ê‚ÎYîÙâIþlMÍS%ÏŽ´±Šø×'IÓwÅÉð;l9§—ú= rJêªHæÚ}T2ýº¥ïG¡ ®˜RÃF' ÓƒŽ Ó’+üms_Ž[`¾cÊûîön®§:¿³bmìNUH’÷wiô7Í: ؼÒ1%ÌÚ.‹Ìr­×<¬tfUw:ª•Êušò£8¸…-}¸³ÒÌG²_7Îí?…~Q; ëû?š§âqÔÅ8ªè# åÿuö¤™éY+=©BjÆ)‘¢#¡Æ^Ê! ƒƒㆠøfŠ´¹¾_ܪ&ìçÂM•,VFfæTÂ+iºaì)•?ÆG+Ìòã=g|©²¶ÊÀ¾_—þ{Ê2ü›1ýb¤ÞUÛ¨l{œ!‰˜…wßx µf"ãxµh Ñ=H ¾íüÖxMU˜´ÓF­¾o*Ö›´B~{û ó §ËÄKoxsÐHHriûS—\òé1¿)úÆdëN¥ñ?ú,¡±Ë›K?gTr¿Uÿ b£ãûý?Þ(;G‰Ì f?±‹Ž®#”$[Ïnš ÁþÒF¿+;K¾ð}êQ¬]˜`[‘ñ´MÔ­¢Ufí>­¨ÚK+p¿t›NSìë²°ý–~~FÕ€‡Ò"?—S¯šÚ–½_{Fwû—^Ûæ›gÒÍM vË<–ù4^ÅYrtGõ&wíNrŒÙGpÇëÏeä-$L\-SÄ#úÜô”+{³‘PqþÊàšâm)ýEw¾ùùN6å[%€“I{MÌë6îxMâ(—±Ú›Kc·VütcÌ/_œf¦*‹OC iv¨Kò»˜ÜùÜÞK²Í7êÍ}Ò×Ë®hðˆ…‚(#Ó^ˆyi£VŠøó3ÅdDþ\ÁÓet¤ƒ2·¢ê)ý¹”ðЩ 'æ,çóÒþÑ‚ ÖÅcë|š˜Û„b÷¿±~5éP–¿Nµ6!þ[ <,H©î„¿»&´ÛVyY$¦².¿¢|•Ë \vV·jj²û·»áº“žÿ7°¾&V¹|^Ú7ÔdºÉíW[+ÙŠ§wˆD óŸœ Ÿ¦]Ÿœ¡üf ñ³n+ïõI¤pèíŒûÊ>sO˜~Ò tÕ×,Þe/qfû/uJÆù­™¦ãÛ­¹U¹,gû~RgWkÿm´9èdÛçz»’ä>Fùø{ù¼ÐÂßEz×YV)3~ûd|ÔW³úG}øvK{>ΙéH.O,a¬Ý[Ì‚M¬ø‹ì;ç#— Âi¤/Gв‡(Ç'$¡¥\<½CÕFò±î—¿àÑâíìèP‡Á¡Wäž™cÇ"pÿ¬=‡Ì–TÝ ’9oßu¥õPÓ—|•hìÄØÌr¥½âT9U,Õ÷ÀÙ”ÝÁ¥§ÆšÈýþ»È(¦æµˆ<û+‰o{¾nÓc%Iƒ%Œ[SFë··,K«¨î×1]jçú1c5­¿]õ檧-2f§äÃç€úi#ñؑ޹}5Ç›¨»ou fª¡ÑP‹iýƒnù×Õ,yø´WK?†&kBZ6‹ ƒ5¼Â#kadýqæ?!øIÎÊŠ9Ë5’°RvÏ^ß½GÇj7ô}GrR¤cÝÕcö|©oTh<Ös½™V¬ËÝx£¶ ú“?%üIx’ùú¹b‹?çÌ™D/…»Y"’›%ß/—›!’b’‘¼Ý Pê¯\¿0XtWç^ܽË1¬ÝhͯìøÂ~ ½f®“úâ!Mfqötéå=Yv¨ß— ºb£—a^VzÀ3æÓYFn¶ÓaJÖׯ“•A½Å‹Ût¥'¥Åê+Á<ßá½Ç)‡?ù©y-ø¯g…ôk´¬E"'h£xtE­ò´õ?7 Ri™ÒAVtòß‚Õk®8•'úÁ' º½Û|Ù}Ùx_N’á’Ǫ Þ¯Ò9÷ŒŸ!‰íýUw† 󟓺ý?+{ÃÆþsFyCóqš±‹ß^`Ê`LA¸(¶X„³ìaÓ8tUízYQ69¼+$¤y¹©•:‰EÎ?+†Î¹CÙ-}siÜ÷ƒoÏ’…›}šõ›c)q7#Ýà}:]aBVô«ï€CÌåë¡@/HSÊulä_¡Z¤ðÈœOŸÃÄG8?ÀËŠ«™Ü¦ßÿÛ R%ßô[BÎjtþ®@D¬îïŽ-ã¡beËŠÏ-pjΡÓq8™á³g·<(©þqvÑ Z|05eOËA9; ¹¶(Ëþ·„šü†²t37“¥YþM³JÒ—Äx 9ü)2 âÓ¨yéüâŒé *ê;u‡÷…!vH +£Xê:‘Ó¢MX|ÝQµ¥UnÉÜŠ%ñzRMwŸÅ½‡›}`àéi‚¯¦@ÆŸè2`v ÏlùÚü8ýÚ+òˆCAs# æú‹ûl» Â}ùîÊ9õ™>™ct_¦ýëúL¹é­|³¨Žo-¡Þmg¾øàûÙœƒDhRFÔ¹ßDëJè^ŸWÜGSçÍZêp§¨W#Eª+u9Ï~l½;Åñ¯½ýJd“ HÃÛ¤ é]V"n‘ƒ9‰.>˜£®$úÃtø8Ì›<Ýi=™;$OL—É`¡õðÊÔ8TrϹÕR|ÆfW¨ß…ï‘5š«J¦{úDˆ9ö0èï_Â*þ8R62My•—"h¶HSHN±°Bõ!«ãdŹ„€jaÚaþçϤû€\ÞH—’í@ÇÆŠf_M$?·ÿðÍ(†U¹ªìÏOþ$û§ôSðŸ+;Ž>®´ù¬^²ŒÔ§khq2ô¿‡Lv]Pð)a›Ë…–›-7–!ûðM¼ŽÙtØ^LôìBƒ=ÉêïÐØ½¡ bõZè8v2ýù1J}}±_î­Žü¥ø8UKÏõVv°Ï•¾ý^*cM‡ÕªùØ~ÜÅQ…ñ(™ã“U‰[‰ºÅísq€¢#EèÑ…vöýæË:ÌZÝí9Íɇ{ËÔ„ÏM꥖ž¦l3³2ã‘´ß"áEXwJ¥¬Í ¿Õý\¦ËÎ<ì7ËÀ}—\<]Qɉ=cÚè—Uíæ¥ú® ,ªTyuUN4ûEW_í«³IÏO.5ƒsŸñ+ñÇ­Ah÷ô¸\¡Þ*ü\æ Ï&àçuÞÛK®«ÅyÚkHZZX©DªÍóyv2ì5§öÞMH \ê³· õÈæ‰F;ˆS«Ë“Âß3­bÿZXÆò…O¢)Yá›íµåj¦v,ÒÀ¶¯T7ä~X9¿TdfoÞE›°]óésßó!ÈÜ;zn·tó}\v(Èm¼AUa™ú{ìmNŠù:=zùKú9¡ü}1cLíä½ m4”ýë*m~X!i2Ûo­÷3dÙ#ÎYˆP}…¡tΟ¾vêëÂÒZ8¹ö‚dähàu¦y§ÍQ€¦<¼#þy7‹ô ßAêÀXžL”÷æîö²°Ç1xûÙLô[,© ñ¤‹>2ÿF ýa%Œ˜б?…fy¬Ž¤ÒøÊð·áùåÙxp:NºAaÑÈ¥Y!.×ýhs‹®Fq¿1ë/ÓYɳíúoÙ«zÞ[]Æ€…гªÔÓ$™òn›q—#p½˜X¬ò…^ˆÏ·â JBá-7®«öÏöÈÎàãï *Fn~'©êÓ¼äêíEÙOŸ~»Óž„h¿)*ÏWÎ4®d_<‘GVüÜå‚ê‚V.öm,iÕSÇ“m3É®½JQÕûüý¾&ªiJ¨*îõ'¥Ý­žBL'Ò×yÚÓ¤Ðm#°ƒ>04õ¹ £óÅ»ã,Ψl’¸sxoIŒŠeLû" .Ʋ4ë^>‡›OKŸª¿ˆ¿ÕÁÿÜms'ý¹øýö× §†jþ+§¡ã™±ÙJú_S' ô¸Ìº¯®E¨×W«xDõâ“.»ìc\m€Œ•–Á’Òñ8è#Ìmáí’R{ef’hì󞮚ÂöR¥´ªôÆ øän¤òw4fÇHs™aœüšÝÄ tVW þµeû]ÒdÂÙd;¦«×A‰=¨Õg&1åÍt¼ôdŸvnKÒ÷ÕCët*‹d˜Q2>º^éŒóÞÚj?oÇ ~Ř­æÉí«×•ÞFêQg=ê¬[¥–G7¡q—yÙÿ& 'Kñm _ÈzÕ8Ð'×Dù°¦:š ÆüÙpp±µ «Æx“ŒµÛæàº*è/ËÆNí÷4Î13Ì–kî?†§61?ç쥅"¹D‡\ ¬¢Å…%6G9dÜ®¾v(jÎÁÇ×àSVäZ!sÒãV·t¿“ÓÖ¼k]·äGĪÀÞîÝÌ[1¿ íÅû›É$ºŸŠÆËaSêy‹ez¦o墸ñ"xœ¤7?wü…Çç‡kn…ÑÞtÊl‰ÉŒÞ¢›{ O%WCºŸgä$iÛü˜Ó±G÷ïîue=Li9ÄÕ)A#ÝyÒØ‘Èá_¥S…µd˘fxb¼…T¸qùÚE·ž/¨ËR‘!‡º³,…» ²üCŽg¥æ,'0(!-®7¬šn¹þ|œ–fxz00Ð~¯ë^¯õ±Qf|lNúþÃ$ ŽÃ¢}Ò?y¯‡7¬Lž¼Â‡âÇDýK©äVAšût»eñÔa¬À“г-¦òl)Ëi¾=‹?‰òè"ÃÛØ«UüŠy´FRtŸ†µ6+9ÌUÄ3MLô-ûð%ïá²+ø:ÈÖÇH¡÷¾íï–>ãyV<—Äûÿû@Õ]ƒ¤s}ãʪb)¦ß±ØD7:r³åNḺuÂ}ýÜýü‰Nò›äWô˶”NU½à$Óý0 D1'rÚ.Ý»G¶ ú)Fƒµt|Äpïסx/ÌFøB~c¶®ººÅ]Æß9äíßû–Ö DÎíÍS%–ÿWòÕ£Ùʯ$£ýÈcìS¼°›!‘käÇûÛx\¦ ŽÇ báXç[{MBÞÆ~à»à†ãúÿzx|¾lqt†žVœ~™lDšD¿<ì[æPKÞE,f¥µSu|-–L*g·BR¸qÿχ-¶Jv,¢™cÐéùï<9“–Íqè)®›ÁüÇGuÙ&xš‡ÍÜ}0|…'‡º™ÅÁ‰C¼m ™ÂÜM?úG•;ä±÷Ó³;ùäÜbeö¯T=ÄH®Ì¼¸¶Ðˆ¼WP1Ê1ºïË2úÎ^n¬ª<@ qÍ–¶—›£B¿M$ÜÙ{ëÿY 4I:t¸ŠxºM(t¿êÂ…ò¯6„í´áê™úWÏ}Ÿ„×±™<DL¶•t§j~ïþí$CùŒdü6’ï™+GÛy®Øn¬uõšâöPÜÃîIº›,îw®Ýí°Y1½ã-"Ô4 ½RÍ ¤5frù?¯b’ Š~ÖÖI÷ ««ŸÄ6ùÒi@ÈOY·2‡£Ìôæƒö›G¡å¬ÔKª<ð_&3?KWD›q+\¦êÇ,ñu`w>ÝVfÉ=¦ìó§(¬neÜqÛiµò|^{¼Õ¢+]»o©-åpo4CÜÞuz{Þž‚]77?KW ]0Æ{X©lÞ=2y2SÝy£ä‡ñŒí}ŽÆO€ÉÓßúkEãã›O…ýSýûŸ×Ý¿$ù}É ’¶Žÿ9nÉ•vš»-aA>ÖmC9¾‡–ˆòKÝT¢‡_7áÿ/[”À½’ .µš0‚Ï‚‹Fß¿Ùm—9|7\{o; ºSó\kƒÝú{lÍÏeÑñÜA@S—Ú†/õ4±’ðýt9þ·³˜àcÀôÛÔð\x õzËœÌV˜ƒ 'hV6Ó$ö›:ç|Óé]j›øÙÊø»èÈÄâÉbŸ<–`K¤± ìÊn¿éù”&8ÈiÏ?ýIÍo€4»õGÑ÷^Ç[R)šè} ¼\,¹r_fgœµ8*_'*GRÓ)o›Ç‘¤V -¼ã†dÎÿ«ÎJ§T…$Œ¨˜>™PÛÙ3þC2UlÖ{ÖÌÀ]¹[K Ážî€«°ì_é¥ù§lÒ½§{6ž‰u>‰I% vŽô’kŸFÜ %A÷cºß)¢Ÿý„w¨jº–º8Ó‡¼kÃVœJ™ þGóO“v¦ ›”²"ñT‡Ì™Úû«Ÿž:d•¥‘,8GN ëKf»Ðp98šù¦8¡g!ÓQèò–¦–Eѯ²ŠË}ž|µ€OÏüD±1Ñ<Î}áÍry—¶‹Z!dM)Íúôñ*úëÂæ’ø·ž·]Á·˜Î¶è+É*nK¦c™\ÙŠŽÑÓœ%þgA(ªœ!Ö´‡tú²SÙpd@›œVá¦ÙÐý´öîjïøû:_µ®‚Øœ†° ¶ÞÔçI~~Xʼn·þÖnœmÙ™§J­¡½ÂôMáº8¼Ù탴¯n^WR’y0æÖµ½tÔ¬¤ô¯žÍ0À&LmP/*Ùߪ+¤&æwÓXœmøýÓŒE…K=É£S?›h׬õˆŠ¶= Ê5V£ Q¯HÅ-&ndÿ£fõß v>d—Î_ñ3l¡Äa9ST.)â&ï‹0+oY2sÊ-£mËìãäm|&ºËŒÿðk¸ò‚tâR´9 EFü:8͸ šlñT–ޝª_É' á»ûŸìÑ’2û8?ßO ëÅæ=H©Â#Áô>Tõ5BRôÎ8¶{wJÌ—›Æûß!ê_¬Éx™ÕúäÎEÊŒ,¤wÍ“¿ªþ–I¹Ü&-).)~I=ËmVçdêE…Géû5»üìûîè¥QÀ–»}èê©¿Ðg‹G×HY)/>’îfmféŸ?„‡¸ÚœÕë7Ôns›‰¯ê[±ièôâV,˜Ó{Ó•´GúûO¢šcòúÔ•V;‡ñþ;òïÎL÷môà’¸ìÿ5JÁ˜×áœc£1Žr‰§Š¾®s÷|ý²ý¦n_¼KL´Û&YàþÆô+:²Î8J(tÅ£ú “FuXÆ–x¤\,ŒÏYˆìæz4eÕɺNÌà 27B‘Z—gÁ¡RÔêk¨Ñ·n´%Qñ8“dâ^ý—‚#2'ûÞKO! F­{U¤¾'Äšà°S¥kŽx÷}#’¯@ãÅ, ”È~;;!ÿ{𰆛×ob‹G0lbO¤ÇÊß!ôk(?Ìç€G¦*G]ùpDuÔ£ {KuO¿ož·þéÜiù"ã”ÜÔÈöþ+Î(®ÉgʈÚ?Û#~š6œ1(LPÖ6{íò­{…¦_r*]€ rê:å|‹92Á|viê8ŽÍÌ­d€ê¡åêªÿ”,²š{^çÈIÌ Ó.ª#1z—‹K˜þÔˆ ´¤VÛuhÚU«ƒˆÓÄr¾Ã×#bÇl¯:°"›p³ˆš.±6ì {"m{~ˆY|+ù;s!<>Züöö’òÛ[¥Ä'8ó˶ê,Œ“‹¨Ç¬ýC  ®â=·–ýý“Üóø‡tožFÓ´  þÝ}ß~¤¸o7!\Á‘3)iB»>OÏÈ›+º’Z+K†j¹YTí´—Ö|†¡CJ†i;±1ò’È{Ø'Q¢ °¸\Ê,ì¯ÃRŸyêv-÷d؃z)+­ÞŽ5|S°çXß5Kjõ5ªGJŽH•åŸ5›oÝ£½ôôE¬¾în_Qa..W†[Îg×ðß'qeñsLZPè·âD´c\•„î ûLªãRO¦à÷¹ZÚsqþâå^{y^c;Ëc^ÑŽiß ÚPž«ÐdÈqr±¡€ÖéYð}vì|q*Øtºž›§7™#Œóñ&VZñôcº•¿ž­-}ôs”i[ÚÁ²gˆÃ>ÌKwæóTe”fØa†”³ïPrzÌnÊ+bæ²³Eï¼æ*ºa×£¯yÝÆÏ"ï>ì“«K^r|Iš}%?ˆÍ8z¦Úã±a »`yëæ‚·"¦jìtîBZª  UT^Ôÿ˜3±ÐJ{}/åâwÂUƒŸCèºõå^`ŠWïý¯ õ®Þº4œ´E³½;D}?MÉtS7 )–n¶Àyy£„Å»¼Ü÷ݸà9Wgia¾.ŸGÿ¸.L"îMt5rSE|’êw(cõÅn|ÍT†Vw¢"c ®J ʵ²_õ­žó˧9–ØFÏÒŒ€™?¶%(%öL¸P²papΫ)žóYx»V¥@KÜØ»Ëøz^zJAMNÒ ¨½žduù15sÎ\TçÙÿ^ ëŠ\]»Å©êBÔ6k—ƒ´"G”I*1?ð§Cì²P~''»“rEûÅÛ^?ó _<7qÜ×#DÜ@²rˆô¯§9Iô©å CC9Ga&±2£C¶hjáî@ÕøëX–YhXÜ ‹c–ÿ|’‰Kä'×Ü;aÞS]wJˆû½`4ACí°Mƒšç¸új¸Q'É›*q¦•Yk-~¡‹‰Œ,!ç瘡#ª¾÷Ÿ-oˆ)Sì«ü9µJU¸Æ_½¤Us%˜ãC*o¡A÷=õxnËÕ¾Kï-ÄlDJHqE©E["_¡y<#†4R°;ÓøL¥pÕ3{/n±\òH¡[Ç*¾ c1©IÚ‚á½(g¡ÝO[´‰ÜëtÏÐvõ×/d©o*·°.ô©7×fmTädéé¤vµÿæyƒ•Ù¢|SÀB¥¿ËI„Fº—?%‡E3µñØ´†Rnïü¿¯¬:¨"¢˜daWO“gå'Òë™¶¹¹Ý·c!É $¯¹-TÞ­ ÎqôóÚõ_F6ÎLRè«`¼ó,_‚_ö‰äÄëÎ|-;üÙéî%’ «P…va ÁÞhLÎJúPoIKÉÆBÏÒM/&"¸ Ì q5ÓD­ˆÁ¢àL½‡‘;’üî‘s¦,(ÙX.;ªÝNÍé?;$~-Ô^õ+ÏýÖ”(q¯.3pXÍjca=*â:J(ô¹ÎgRPùqÁë…ˆMwRd¥øy÷ÞrfT®ÕMËܺj1»_ad™>¤‹é¦ñ¬ÞöÚJ99Hí;£¾o‡ ·Ìg湇­NÁE#£ÞŸ¨ÈóîHë –C8®,qkE'=N<„M/ÉR†uª;騒·ÛvúꢠY>çlNpwÊó•Újñ²Y¾ñ~œ_Qe>4ú&•£“–lÕòfÔERÜV2@„2°[PC›Fô½Â¬žznI¦üÝÞ¸¨}#3 òÀ–™¢@Ÿáøy F,â%ÂL¬vÜ;E–}Ï'æ§µí’N¦‘ÅíÌ“ÂØÁÁœÍú¶­¾Øµª7“9…}®~(£Z8ñЇru_áŒÎšŠl7¤æãŠX(]I×qGÑ"e[u2ëÁÇJ¢É4ôWhˆªAcw7eðÝ%6ˆß¿òè#~Éq|!fâ°Ž0eZ F@•V2 ùg×'·E-ЉÏÌipÛP1¬ˆÐò.M_VøT8òF“~Ltë#Û<|°>Ó:c¹þ½ ļ>rvS£ ·±àþáÖ›:ÔòáÞ'c›E@¨Ÿ¸ÎÚ†„–éÓT4§ UV ú—zw˜$ÞŠžæéFþ‡þß³ÅÐ’nÙŽézq$®×º¯N¯tϨÈð¿EZIé1uÞL¿Xhò† Yrx—¢Ú Êòr§;6Ù¼ºý ‡ë¦åÅú÷2££F´é!ÍÞ› ¤žMòÅ£ú{猲VTð§¦p”+Å6[¯Ô1LTž·Åž;G/o-æÖ"mŒ“é”åãCΖåMa Zø¹´±~ þI‹qÓÆ 9”‘œHÐ6Ó·*(ì zßaå±TäËœ/—Q× V«€­8Ñô|¸E–ö.`Ù¢–ºwÙs"øãáÐã `¨vÍ©혱?_KSd¥Sqq8JtX' ª;§¬°9]ÒŒN ©,EZ‡‡ÇöÃÇ«ð˜áIvÁ"÷âÎàžnùo”qkC…&Ëû—6´u!ŒóÈ–íg’} :`ˆ€®~„"¬E´ýAÓ»lé½Dvœ‰/ç9[ ‡Ò½6hÎM)ÍÕÚÜt K{D¨¤œÖ…Ú°’QC .ØÇó¼ÀÝo¹Fäóáæâ ž:øÀ­fßw‡EÏ ˆÖ^¦âÚZ8ú- • @ZwÇ%(çß!¨fqÆÐ|x2?pør>E—Ë•ŸÏ±5Æîè#¿òN™ª? elÔÁ½º—¼ÞB]ØÒªS¯q3èú’{;ûÕg}*/uGýÃN?Q¦œí!) ¼Dü‘¦™RÔi±çV —À+t²`nëbÒéÿ0‰(˜.æ°å9(U6?ë¡&nת¥×·—à*Ä¿C=ÃëXaf¡R›qßê”Ö²Â6ØSÉÑŠéS%§{ÆWXʱs+ lòf±‰wÏÔÒ‚¸‚N⛸=t¢îj§Û×:+0¶île’'G-Îqÿl~]OÚÓx]†»²Î‰DXõ ٞ܌¼aQ«åÑ ÙÝàçã­Žÿâ>o\ƒ–ŽkG³k1DWŸ@žC,&óéü]fþŠg²‰°º";Ïï[ÇË#²¤"²7>ÍüÜ„—-†šüU¨ÅR+`Þ'MÐÄ2;sE½óŠxWù±OŽ´ô»2'Q°!б܂(¯±ñË{=ºÑãVuõr[&½GÜd‰0¸ ž÷%\o¦¹ügË í$å' ®år~ýkSwùøUœS“èëÒí÷™õ‡ÃÌtKJ¾Û‘ÿË{åY¯6Ew^*¿EóÎN^N™ÛïWxY`E±Ä£OYNÿ#g?ÞÝÄC䉋Ž¥~ÞTày*¬B>2éöO6üƒ¼y>ã´·?¼l])ó EÌÒÔM‡/Dù¯×íÖ_¼hú¢X:Æì²£ý+7].=—,†Ú¿» d¨äù æÆB{ÿ¤¼ßÇ¢ë¬hq¸D8ðyRi%ò :ý©XÕùŸÀ®3ÑÓS0²ZèsC†¢¼³â;$­žjX#Çÿß–ŒÇ¤[JnŽÎìÐëÔFá\øxW¥X$Š×„rás+‡{[ÁèWg_ë¹ %Q»Á› °Ñ_ ÇHEeõê ŸÇ:êy!¤Ýe½Õž,hò]h;Ñ\:Ö Î/s÷Æuü÷áŒÞªæ´ºº—a b—O·ð­¼ªªËÅ$ìtu:±¶Ê^úµËÛ¥‹ëvQõþªÖ®?5[Þ¿‡ˆ\|Ñ¿¢Qÿ:-{ao d'xÏA”­UX&6øŠgÿB!‹ü{DÿÒ­¬¤¸¼ä¶ŠÝ]åzlý%Ù/6ë‘Z¥ŒFÎ`ï ËÛùŒN„K°ï4´æ¬Ã.òÁÚQLbÜqtóIÒ;=aÏüKvÔNÐ €4. ¬¹¢ZË ýqÐÂ[ݼcmð­ŠÝ,os4˜œ€•WœW–õâ4"ƳxÀ鉅•¥~ uaÆÖ„‡7ã‰{m‹ø—¹ÍxÓiª¿d‚ú©qÕÏ/ Š(³˜^­õ*–aê}Lw«æÿ©ô55`H`äL»)k‹úZùlÄt–>v°L†í›º”¿™G¤Ý  ži—nŸ¨ìæ1¨ø@—|+˰ʚªï$f„‰¡0_ "^¢ÓÝ¿¯Øù™2' r_F¢¿É ÃF.îd të„ÔLU÷jK±µ;„p}ø¤O 0ï.!)b-z¢J5U0zæ+çmh‹5Ï3)-ºì\4Ÿ™‡æÃfýûixzXDåⵋkq«•õì;ïÇ9 U%Ú¨ÁòoïÜm!joV_ð—RšŠ“Žx9q©¦ÚgÊ9½˜íõ´ÈZ„ÇÂ,'Òž¶¸ðà¥õœÎb½”c@=çMøeV{î>™ fE sº•apíÕØórcjÄu‰É\‡G¶©>4!òB2;éߥЬ#Ïûný¤zªÖ¢ëÆ÷,óI£ê¢p%*ªIN?7©âýv}¿G_cJ$Âõ/·xó‡¢ÁuXú]ÞãE@h+CƒO’ _‡[àÜå•LÂÂá²ý–)Ù!ÿtÑ嘞ü柸È4-ùÛEϲ3Zr›ÝGoÈ;Œ%õ,C µ†Z<,]…è¾3”¼ûÌÈœQýDeãÝ©É95»"íçNE©kË‘¨€‚r’O³ÝZòÞjq¢OZzÓkCû:â¬Ö6ˆB…ŸÍ (´ï;œ’-ÞkéåärŽœŸyš!Æ år½¼GÆ5žsÌryf-Í®´yY\áÓ6}öïuÐzûš'³jóhòR=Á!Ä2ºnúqâEÆY’"á>뇇µ°~³ÍwƒBµ¿4JøµI~ß}ÖÔúŸðÖT´Wnoš9pPàêó×±Ö]%lÓ#¢Xaø÷í¸„1ð¤Ï³$!Ñ»· )Uc“»q~Ð`A’›i’¬ŠgwGÅ?x+c¿˜séÁÂ&˜pÜŽJˆ‰¸ðpC%ûuÎXbÏÕñ5¸›¢hÄéñý,ŠlWþJL%dåɇo‡'¬%ê†Ð-I>^Àͳc­>>‰C]Ŷ„ûÏ&€&›àa=¥;¹–³¢lã²TˆŸøë(enÙ»¿lЯyœëŸÞ*¦ø®Ý☌\°¢ÓO¬™­‡xèSе¡Dƒßò|Í©/MåÁ¸˜ã5^ê¾5%ÒY”í¿¹úºþ ËÝàƒö`!`Á~ÈèjÐh¾üe}Bâñ)è0t¸ÊÓNzñÇ_r“NßñÞ­ß›"vÓ‚–PqWí°ž¹+•:g¶/Ä‹4ÉCëÞ)I§ 4,ïÒgzŠÆ«Q?µÝÇp¹¸þ|æ¡ÍÜs½u'CKÈüÎö˱™4º˜§± —ÜG&sœ+­hè¤éÝRð³{}Q’Þ¼ÝÆ±…ˆ½%&×Élü½®\—ŠoEvƒ´g‘Cƒ~%dðÓõOÛA…q×µ™À´0š,YÓvUÿ… ,¥a°à5ù#²H|<ûðù¯ÁWþ'˜ôÔ;‰7è`ËÁ§Ÿ‹ b=+ùç2:ì8l “7Ôį{nÜô%€o"mÕZ û—%®Š´ê Ô›7ú= Ò8kú‘¿o‘I»({QHKÊ5d(ï7†9kZ=í®§pÄrêÎx®KÔK›SrПåÛx" 9°!£Ïlºo+ŸŸM»}Ö&ÀÂw*«ÑXÇL¯"5Ú–óÞÍõ3D¤¿îô·ÅÎ"Jy&À ênszá'ªmebÓ÷£†³A´‹)ƒN¹ß2¯-‚­}¼ðý Pgq)?£¤…¦OgøÐÚVúA%Šûêv3%œñTÀϵ=Ì%÷œ0654¡íßòÛW‡Š\Sý”êÜÖ~–]a od$ËpI52Lp’ˆ#zthY|ð)×;Q;r¢Ôíã‡;,¹þߥ-gº+tú¢“Û OPßs×Õ¬^3„Áñ(ÕàD¨(ÉÑ5…àõÓ ¦²í1èŸ1nkV¶š!ÙS=Z‰o¬„€-…V¼D†W3NÁ Àò)ÍĽ¿é©Õó=aswéB/`—g1&“sФêֵ˿ Õœ?W/Ó±5ð±ÿy ð÷Ñà½ý7«ÊìÚåéïúÏ?¡®¤ö¡|¤û[:Ö:ã6a½0ÖpïË…U}S­ÙvSe«ÑL’t$% «Yq|+•¼ÚÄj–L„û{ªÕ À’4ò›þ‡u¦È^ç7ûzuy -kÚ¯òxOlj4öUÞ©º´ Æz@Ì[Ë'U>i¢ïÔ2¶³ff#cÎ÷ù™|ëq³Áõÿ ‚눛ўŠ)Ij!£­ý~¡¼¡àóàJ ÂÅÀÔWjºRíyD´ÅƒBDÏEmWökfþÅ Û<žLeR.‡€’â“yž|¿ ƒ5˜ÍwW‚‚ÑYtO˜]FX×>ñŒëæÃë´âÜ%K·&7 WG€R”æu>È_´sB!nkÉê¯Ö?²¢„´è¦í½÷›¼ØÒ]®OL ¹7øEÅJ~mU|=ˆh“<@ìeè 7>zƒȬÉfPÍ.ìšõ3bCrP¹¯eП8t­ÆZÏHÝz§ï-_àŠŠšÄÂüW?NOòtëb©×q;pœÓÝn—ݳö>~‹Ï/×ý(Oö&þgÜ(¢ùºŸòa¦éÃõˆ\ª ¤Ž TqŠœ‡ýÛžjdþ[írS”çge“_$¹Ì—o19¸„e:æ¾—yˆÀÛ?^,°-rCí! ë!ã‹õf`Õ-²SDüœ¦ À ÕT/Ï.{9œï½ï8ˆD—²ã‹T/îSµ¹4Üuf€VÁ*¤˜—óÜO¤‡u·@ú)Äw|‰ì´Î:¹šÚˆßÓ¯ÿŠ¢?q °=B÷«ÎC]5®[ó“Ù!¢­xm@ Šm³™{¶¼ ¦kÒj_¨¯o;áøsQû°úÙØ[F€î(ãpoSlT ° r饸ÆJ~îÎ ­k˜¡ ÄIBâ[WaÑ"òŽ3¨E³ß½3tĶý£:왣2Ý£vR™w‡]t>Âîr$–á·CÂT÷ý¤æ6‘ŽÁÍ8 ægãÊ÷zÔã/¨eˆ—_4Ljÿå7oên•y´N$‹„;ôÒÕÏyo}juzð¤z7o¡"Ú2)°8?.çX›ô2J…Žxϸ}VÏÅwºm!+.MI¸Ðfñc¨f{ÿY?¨œ ì–› 8àñgIê\Z–_ð¼î”.È7«§k.X^%ÇÑÞ–96RÕN³¿WߥË1ú»Oøˆ\¤ñͽ=Ùq­“ØËP§{›Œî É#a^àœ¾Ð?Ž£¸F«ß+ó§¾µSÚQrc€MéNx+Üu“$j#C­ü’_í±æ÷f†t×Â[†(Aƒ«ÚqÓÕç$»&þ„©¼Ó îÁp?f¡ÈìSÂ3sŠQhsFq_ˆ¯Ëk<šåìx¦^Fˆ° LT§jê›ó ö9»·éêÛ°´£´9û¶eýÊó¬…Óš~¼‹¢T cý2؃L§˜F,ÓY‹òWã¤|úz+V¦aþƒ²Ë0iq6‹—(Ž+ðüúéôJÞÔø…1E©t¹3¡‹Ø”ϧ³Å•z»â¥Jzˆû– ÛkÊæ˜ËX*¥4_wóHñ½¡tÊ»ñšG¸‘N˜–ä²D–\åX2–ëUâdUÎSó~£Ú8ò—5ŸoDgç‰a´R¨ì ý¦˜i*9ÒyØOàdÖ›áþªf³‚Qš”y¾Ì„´]i‚BšéO/Hës.ÙâÖèE™ÏOf”ón< çÝ=-"ïØÒ3ÒX"¸/ÞB‚‰Í¢FåI›O/tóƒò Is¬ /`ÿûµ4 š_‹€n"S*SÉ_½0ûŒ½ëOñá/pR­¢$Ëþd\¢ìcôyqþdW=Í‘6£ªìÒˆ¯Õ~OM ¶i×ÑÐØ·¿{jTò;øè<­gNWQ“,‡=zžÌ®Æ)i0Ã}Û‚ô/?÷‰òtA`yxBíª óˆãe‰œ*IeqCk|aãœÙÃêoÖ¡•ú'ÐaHÊužC,È¥޽ߪc ZLR{ø:åâ&Qæ°;¹êqcºûÙˆ½÷Ý•¤ñè®aÐȃ„ebþ±ÚCpœ”DD_'MHˆ‹å@""¾Pˆ«]ŽDDDéuAxŒÝDE„0DE…ç ˆwŒ€,^E!}“ÄÒðÒK€ÔÁ ‹ñí›æ¥ !g¶ ?XBA!¡0ׯ ,W‰¿ ëÞ¯ÂÎFÊ€„ˆHV2 Ž¢ÑuP8ÿ6/J šš›° 1´§Žà°xü /+áª.ÆæªËÚH!Œ0ó{Qm ŒšïþRÔÝp4½È°Í‚¤±@Ù}–šQv×¶­p,yUtÔ] bwcAðu)×YíéÁ¤Š_¤ÙÏäF>lß-“ö@âóÉ#3¾9ï#¯b©6×…fr½Æi|¹ Ä†¬ky¼hJ­²Á0ª—©ÄAo6Ù‹Cûízócð+3»ØÇdЙ]…çÔB/RËœPÛ¦ bx[,,Þ-– ×LÊäϺ[ªÍòC“Š÷õä¯.%²‹ "‰¤«jÇJg_ I¦nÇ òúÎfl¶L˜ùÇèkßýÒ%wB·=zd)©¢‚õGF}²3àç–ôœæKþc–ÙÙ{}¶JRÅGø¦—ìå»›¶OÙë1qš¸íâˆ6« …•ž«Íˆt”B¡Ò~y'À„¥gçÐ’d ë(Žï쎙ïóY¨†V:ÞËí’âG0âBÉgàÄ9:i`VX=¬„*åþû öÐ(Äsö%8Æ\*Ù¯ádr9yû«N­7JBûìi?é(–ëá¢JðøxÛs³†ñ ñ¸8'n b­Ì×€jæ×§÷ì€ò.zºMA bÓÒ¸¤ûêVU8#ûÈï=úêì]y6üÏGÀ_~ª`*]yC®Š!,o›4É£|û2ÖÔ§¿œ*'Ÿ¸Â.Díµ ‹ãäü'ƒh½… 3ÌÒ!až£J÷,6Ktsõ3z±e8®ü¦ÖÛéÉóÔü;>n•V×EŠýy]gÅúØI?*&uiå­`~vIŒÂí’ŒZ´3’ÂÚYU[o“ônh¥—n^1ËfŽÏmÈÚ…™n*¿9o3§/Z„ÛîDïž+mäǽIM./åÛ`_½ºbÆþHäº@q#ìÓ¢h&)=üN ŠÀ¸“Ák`(ßæLûËwÝœú^O*DBM½×ixãý™J»]9=MVd0—2Ø|¨‡‹ÞÞLªÔ’ÃÓâI¶ zºCFûžЬùWËAWx·Â“‚>ûóu 1ô «­òÁ`úïµaT)N¢>FÖ5RE+Þï¹íøZ‡ÑãØLÙ¹ûvËùPïw4×_ŒÆŽNV¼u±™i±±®!ùœ%q ïb“-‚3Y"ý‡b­í—Ýìôwq0¼ºÙ6ÞbÐÁò c»|ëÿíAðóÁíØ¸]x¹ª¸R»T·ë£”{í¹y&¦Ø \1ÂS•ÒÌ÷ôoÆ1ŒÙÇU\ÉHÓ-c0V;Ö£5dÿ¯¯UÑ2.my[ï ûf²2á¬N6õþ{#?Iè9Sd{)8ø‘¤Û?°™Y[¶ê¯ŸÛbUÒ^5|Ä£;´í›R´ªŒÄl ¤ 3®±ûäe͂ڂ 6#›£¹çÙÛnº–ã}[gVøûðf‡èž¢`pðÖ{ÿY×»XŠ+¹œ}›€º35ji”¦ ËCêêR¸ªÙd¾Üõž6í8Æ”búÚSŽé_oŒ‘4œÓ2?{Óp;G®þz/ELŠ%fæO°ûŠá,›DiàÇ·êIßBè ï­AV2eöj^FJÞ¬~)%¥gæ~r]'³ÌWÜÛ< Ù›Ú“BÍÏ Šã–±¶w&šôÊâ,sÌü­ñc“GYi4¦ƒ&ö×Yi&tiåe¹|A,Zóý;X|ÿ ŠRLeŸæô0®úõj’ßçfhCbÇÛAÖé2¡QºûÃÍùeK{RÑÝÃ$ fa-–‰µ]PDíÖã.×.œ/¸OÂÍ-!glq}ò2öèÿ3S”<­YNKF‡n¾w‘¬$g­ƒŽr>«P¿2™Í¡hHñr ÆÂѰtØP#ª “Ušá;Ek³ÛâÍ-.'î-n(Œ:ñ´±8ìþ£ÈÁÃÊP`LåG`Têç}.Û>`oúí\}cÙ‰Ö’øÎÕ]^à‰±_Up+Œ€Äëܣ˭e‚9sÖ‰:h¹9A¢_ââ'5ðãµb8ÍÿÝ•¹òÓ‡ªæ©¦¯Èð#…I ¦½ä‚±Âšú,ê®æXÕ[|¹I«æÂòž[P^Sûû¤ç¹»æUö†x{BOó_N²9ɧÎ÷ªñíXÝÁÛÙ¬`ðé^ Ý×’Ž—ú­ñï·}}Äi,lƒh~ÎÍßÌÁˆý€­ŠA÷ÏfÒL6,ÜÁ BÑf䵨1)6ßë¿×+xÈžGÃîâÁ–™ç°þCÞâåüЙ$ "|1jÖãHÃ}J’5î+ƒ;‘þ$W¹õ~¤oùœbw\y%ž‹><Û€&o©²Ž7¹J“   Ô3¯Pû¾wröj ù‘Û²<ª¬¬ÛÀFL§“òßL½HÎô”ظu-É¥ DhÞ57PDlt§Û{‘c9 üˆL%`Ö²5بík¡X÷ðôø›YºÃgÿ[ýCÜËF>§˜"1¬+kÉ5k¸±¥E¾´Ð•ÿ2½E]‚ÔÞjí1Z©ÂÚÅϤõÅþ±HM3Œr)jVÜPÝ3àž•]2“v¤6I ÿÏ" ";róL½±…¿Z*_Í·<9䬾ò~ž]Ãò«“ló×H´¦¾Á wØ}ðH½_Ÿ œ “o/ð¡aÔ­¥KŸI¬Õ‹Í÷¦,Ó–¢÷œÌƒkdç!úÛºôÀ?òdZ­ÉËàεt®ïíT‘éÜ2MĘýT4»ÝîŽÅ;.gÈL&¹Xlìèû¢³ìoJšÙUp_Áuñ Š-‚jíéÍIçÜ}±’M/jWÛ½à[ÜWl„@ôòë~ý…Îñfÿ/S%ÜùØýX“LRœ†6 ³Bm Øâcó(mÕN•Ë«î_®°Ûe{Æ‹ ^knÒ£«Þ=€«ÇûÙ•÷²öéZ®É¹ý\Ž!V«Ò»;4X“ʳ¾ ø±g¬×ïªÜ<_»ß mÔÁêÏFÜ1¨uû RûB—nLœ«f¥Óó*Ú¯ý°Ôëp<¦ŽÄÊԨ䢹ï RßíÖ,¤?'z6 ÕýÙ|D7?3È67ÝzrSÉñU8›ghNá(Ñ(Üä™Ué<˜¦À¬ù»–ƒbá›ém:-â´Ns¼^é¸ëøï7{¶dàË?»‹“@V]©šo¡6A3 Àu^â­»ð©(e«4Ì…Àéy3\ÚvK³IÛÉMùÃöl7•¬ŒÙ à#èêŸïÕ$l)‘ƒ~.ÏW0Àœ…ª(šO¬-§Ì¬_¹ ßüq4.? X¿F¯­R‚ßéïüò·™ÉêÁHƒÞ‚»hô¿ªžu§ï&\NÔñÚ•µPM+ŽvpÐcT–U ]GòýJÛÔJ£ôJò­K¢¸$(ô~;¶Æó|ò‡ïn–;KÎ>;Z»=NòÞò nÏ6ôåF‡ÛŒËv$boò˜Úqr³ŸŸØ /õIä%Qê;,@ÕÂYþš›2­Ð"ëA5šþƒñÿW#¶è»•ìå-²•wwcq/„Ï1ìLâÒOdƒúÊV m+öuváa…Z-Ïu_νì2“2 ºIòÑÖX™°£oÚ&îWÓI~èe] P&üŠâ— å¢Õõ{¦B_ëg0…ô'jAM×z%k¨¸¦³f.áVr òÒVè_[*ذ[Y=j¨ãû,qÞ4Ëá²af×4”Ò¿wš¿Ÿ–°w‘¶%<ågËÏÇ;ÛÝä2C½¡hÏF4ÆÆß@Ž€,%¡µåhýã_C ðÖaÒÝvz·%¨Oö|Q³õ`Ú«QdSëo`2’§bã<'“Sù±õ§F4t8”¤Mä´8×vc‰º“´Vï„4Š$1õð·”Õ_˜SÌKV®@Ã8t›Û‡Å-„¬÷¨sÙi²«¾©ŒÖ5&¤Lµ‹ËïÎå‹73Åf3Œ8ìúÄ–gï­Ò£UÕ$~8+®YU‰øÇRyS—Î¥S‘ºÅ(¶©þr÷ÐÁ¢Uª?ïûŽ3zâì ÷n¡AA~‘½¦ `g¸•‘øÁdS”7‘ä¶$"¦ (9î¤ê§ W0_%•ØŒ¦òùU«ûÿãÌhI£²µõP çιñÞZÒÅ6–3o4ÝÓÌx"†+â ¾Ém'hì‚LJ)䮚¼‘mþøzI®m0Œµš\9 øsWuá!+žø¢Èþ‚åß?}äÂË ºàφuœ¬…)?•X¶˜þ¨í1Éõa.7¥{2vì°Ü{™xz‡º—é {Kg™­†˜ÜUì†ÁXw\Çš½²ñ¾'³Y¿@­+¨ è6öl'ˆÀbze°³ýy.Í|¹IÑX0¸}È›’9Fìιãà€d`Iy¶o, T,<æ½G>¢QÝ®¥n4hé• ·­;$ÍÊj’ÑžA§ª¼˜ѮГÕ¹—Ø.¹HmXÚ¼ºæZG{Êip« ¼Jb’êõQÀ î,‚”hº¿yÅtLXçœG’¼«wÂYlcÎÚ™B]r›Þ~"ñ»õvbqϯE);_uÃô°«•¤ðÄoÚ¨õàš‡â`®â&½GGçn]Âê}ˆÁ†.=Ï ‚²$PÅkʤÆ< Èu›£¡EH™ãŒÎžÖU"?…Wá®Éø%ð-ë)s”¨aÜå[2ø+uz­õ`o#Ô*Á\x‚.™ûÁz-T…(«¢®M"& Jóö…jÄ^¬h™¤WŸLÝ ™¼l~XËN þªLNñÂ$R JqV¶O”H‚Tâv™[Ï¥=•ãòµh±“ÈZxaPÉ1ܦe渓ÈHyºPÖ–FYå^Z)އ"X45Fì¢Ûú­IIž‰é G[‚Bä­i/[·˜Š[ý¤RFÅQ;{¬Ç{?‰‘Œèþ൮åSpSÅrW¥&´¨‘ˆ×¶Àd@ ï…BÝ£p ˆŸxQ N!;ç;;ø%Õ™ nyõi9¼ÖEZ~ž¦Iµ½_ÜàÖ#¬#4J2Ž; X–ÿ$׸ÈÞé7[!y­ŽL[òñ¯ŽèˆU¥Ñ\áøëô0ÑØÌ:L<äþÃTzýÝ_ɪ !ô5ãh¡hQÚ wýfwØË½÷gÇ…H ²¯»žâée ´úÔ°ÈR*“L(³Ç-ƒðú~¸ÐELžm¸ºVÄ™ jOú´Åf‡w’¨3EA¨6Æí˜c¢»½ïdM/& B6¦¸ÖfÖ[WÖ|Da´ìZ˜ÀQÎlòàå:\Èj¿_Žl1µíÁµdñ(æ×¶ZRNC.ðK8Êëï›|¤7à®çáù1 <ÇöUyÌIçB3T­PXFŒÕU;#z˜Î Ü×à"osÿv°Wxÿ޾±¢C–Ö5ž¡*f-obºÞ®š”f”êéëyfãEg<)—šÙ̳iÀÁVM¤=Zå¦Þ'ŒéO¼ýrúgÛ™àŒGˆ¤"hÅç[Å”–šz_ö'‘ì‚ï{»¯t¡MÆZùëmKiÌô<ïP!F@ňj^«®g5mn-‚÷Ú6{iÚY¡'¾×‘ù¹ïWë©5?ØG ÉË^ëôIoœ§Ý¤(L)ж ŸBoOB ¹÷bÕ)ãê›»¥W.Ä›3ãcÌôº?*mrä¥_sYQ¹èŸÞ"†Èç ªrt>½Í¦«)‹‚.I–lBÏSÒóÄ„_úí"º¿9Q,ÓäXNµÝ(ý{h9S Ò7œ1;4šž±Åhîí´Þ.©ö‡»ù&ù??–ì{|¬gÃßž'§û4Ôò‹8U;Q©g¥†­?µÐi­88]³É?R–¦ÐÙ×– ¸¥âÆö]DÏZû[T9´Â¿ù<Ã@ÅŽ~#67üŽõÁˆñ‡×Ò\yϧËæ ¨ñ„ÇÐüòÎÁØ%#‚,wyRÒË“¯xin†jŽèã98Œ]MmŠ„}M‹V#?W•Lëß`ü£gﻟÄvóuQ>ÔÂŽ­!·@›½0Í“Íg—·x^r)Ó¦^çÛ‹¼N{iÍ“óñ6•KC< ]I t¶¸ü‹Äдe¹ Ñbß] údë|·šÊ5—•Öa¡ª[wªÉç’˜W~¼Ÿ8`hˆ«4ï&ó ôYGHBËD ·?$ð#¥g´ï™ðeY HN;ø#E;¹ÕÉ帼K´Âì’~èWclJð´IøikJc¾ÿÜ…®C^v˜í]1¸HZ&E“iQ”f#ލT9g– “¥Ã3giÍáoÍüŒÅÿ}ƒZ(œòämŠüemcá2 ÄÈŽ76ã*§«@^”.—å  «ô»²Öù“æÕLÜ;?y‚§(/TK– À•mÅ ÷z_~‰¢Ñ£Rž^[x~š29œJ¼XûBsËŽé¡È—-cлҦš©G6ŽÖ LÂÙv8@dª…n ÚÎ^Ga+>“oOÊFʲõ\ž} <ÁQ¿¹!‘³{uàŽºñJì~d棷m]õ!Jo;ìÓÙ—Fß±{oÖ«?¹‰´CÃïcú³·mé[kÌLL_>Íé_YÈɼìIÉ¡e–d¶¿c³¤Ö(¿È†Ôl¯æ%PÅtµÎqZÇŠ?¨Æ²½öÂþñÔ )EB‘’{bÆ ü’–A.Mj°~²Ó;ÍÝ&¶ ‰xB•§kï³¹ÎìgõéÊ¿Þò^¶é\¨T…âùü·ïc¶X™O4ÿˆÑâs¯Ææ­]¾»ŒsÔ·×5âö#ìlwÔ œ›ê&f­çÊM¹õÀSÅvß~ÕöÔ²dñìÔ‘=ŸMª‰ N…@ŸŒûG1UäZý¸ÇöHßÑBîlxH'¡F“Ô -±|GiÉãÓ”ö-OÑ=o«ÎJ+¶)ãë=Õ¥ólíÛ lqìåw2­ÿ& iÖ³æù²IÇŸõº¶s ½fOKäcþøPCDF:SõiøM‰Éë¿‹ Ú_D“–d¨$Éc⸕@ #‰ÇýSª+†?Å;9,°Ž1äD7"úhÖ¿9 „Ðõ*pÛ—§Î3hdÃûù žžÈ²_ÏqüÅÉ1»'ÈŸŠs«{ ¾Â¼õ¶ihN9¼¯ÞDÎYDœt’;=²ÑWI}³i‘+V‹D]7~{ÝR¬»WHßjÌUû$˜Øõ Ú鼂l_õô«ŸÃ ©ßQƒ…ûÛ-lÐù«Tñ€½@Ë`zbI~òUDrµ£ãɲë–ÒѴܶ{¯¯Cë1ËYåªÆ]‡‹Ú9WõcØ,¦“hÌÂ÷—y\#ìIG× ´ckbÛ¼ñ%ÉÏ—?í~Y GåV­–Ô«QUÊ•*6£òÝåãüfé¬ÉìIýlfO›Mßo•KÌ-Õ5›²aöEžû÷´Æ¦ÎŽ'Öl»8Íx æó?:gXªdºGµó/®@º0éâ‚kPñô•Ck¹I•ޝb{oy38cšVþcIgý®Ja½ug»»p.@ü!/\)jÄŒ®š½”¦›@/NºÛÅt,ÌWVÐ<ßf‚¨œÊ¦½ ?ÍqPC™ïBù­™s1ô7§úèJ°b*X52þƒHYcœHïÁ&ýmì‘PH’´gͰÏhê+ -¥ w+ËÝ!aþœôÎÝÐv¡«úÀ ÿÊ“½®»;à÷¬úû½g:S튮Z•F¹!›º¹l’Ãv'Ÿ¼-ïĻ̆áȦ+æí<Û•ˆõÞŸEiŒ£Õ/8„Km•t+&oZøýGíò3´&<ÛŽ‘Ô„î¥íðS0ÀÉV‘ûg²¬ü¿s±7—‰<¶x¬{ø›4¢zéÏâøI! {òÈ£^7AÏÓ]`‹ÑUm˜z2ƒc”ÿmKnªUK¥•Œä2Bu³ã•]釶«úYÿƒÏ¨¹å·A¼3{Ø¡?ÐÑJš›`ëÍwùäödqÿf*üÖ^Ì}‚³Î<ù’·—TsŽj |¸X¬ý^†a^fÙ¢.¥VÌ&’îItÉqj2¿ÏiS"§>³g\ßjåMÝôCOªÞDìß3[¶:¢» ÿ8ðÈEœØ«rÓ¡)KŸníÌm¦ƒòT‹æ§ƒ°]a_¥Ñz(c“31˜kvŽEa’»l«î÷éS£ä?Î^.gœûÇyF°ãCYž•FÀ\koE|ªVe ì[îKŒþïZØÞÏùñÒí±yÄ̉@S ¿ñ¹È¸¡ÊzÓÿŸd>6QçØz4“ÙSËM#¯“.Òà9]f‹@ëíS†šî£·¶‰-ªÿ;'Îál†ˆømî×+fMºìFhïHVâa§£àüîŒç(‘v%ˆ¡+/›Í‡êƒÉ¹¸÷‡²?/¢‚b+`ï–¼Ç„è ¬ÈÚg²’ÎìíÇ–<ì†E‰Ôß["où±‹¶n}䦪7Í\ª>øä{l(£¹îÆ=¿’¤x¾D+ØœJRÅ1ò3Wø+Þ4ÿn.Á#y4lF « o(1ü´Bù…)dÅÙj¶ mßè›<üí‡JÀ™~‰ÅG5ÎóTåÉX_ùQØI©ú³CØ@£jeTmhs9Ð¥ð1wég{œÔ±-M×PÆ=ˆ.§Ítìª5(²×6diõe<¼±—sÞ÷©.°"ѱe©deæE™êMâȽ⦢ˆÃ ©!í»½66¾Ù+9Ü÷¦žÃt¦Ð¦…`­àÙ@ycHþ{«)%)">²šèx–òŒús´ eáú4Ö7Ø£z7ÛI9”Mnml“a˜°<¤ä´‹£RRË ºšœé¥*W¨Ñ~„çiÃL¤ò;^8F]ïê±Ýû¸ê‘‡?Ü,Gg˪Y­¼†•z·@e°¿3DµR‰ºümƒ4&N϶·±o‘¡ì5*Uo|5„—=÷îPvXnö ¢ý0‡`2/j(_ºSÍtÖ¿" †ŽOæ' ›ù ×PÇ€Gïb’ѰN]£ÊÃãðZý0«Ž+[‰@gIö™ö—f]7]^6"¶b¦Žj7øÏôŸœ§žÎÄOpú5'ÛF«~$«ÂmÐM9;·çÊYC•fH«[Áj!Õ(Zi±)·ºÎ˜Ä̇ý=øjIóŠ3â|ÐYÂ`å64@€ô¥Sƒ|‹¼Us Bï¥^‚¢68>ôó%享‡|t}µ!Ó;]+/îÉf¯ÒÚ{öŒB]abÖ£ ½FEçÙ—¡ysäŠF”rtC°kݺÿ"fϾÔWeãËÃ#Pëî`óœ‡?bÃ×î‹yZ¦g|ŒDC‡Óh—ãëGÕIžOhf„¿§ ÿ'É¥Õh¾â¡±¶Ýtý?$«‹‡“)ë0ò»oï°nÙ·[W¿äÍ`)©¤zÇ2•>YNbû6ª`:¸vÚ_ß›^h4UYLöüÞ³vT•¦:X?é-¨"~ 9\»Dú×­C¯¨Æ3äpã±~YT³üë/}'ŠÞߥÕz®úœÉ™d¡Úšmo,u¤:·‡Å‘°×ìʱn\mjšGUówÑ]%¿÷¡6<7oð7>½Vœ¤äÑûŒ÷JÔ¥¼b5¤ØŸ^Ä3EiÛ¥$×7ž›ƒ{Öñ½å“ü¤ 1Îûídïrò/¾x”ÕŠ9CÐ{Y[‹Ê?¨áˆ ì§lT1êKÐÆeÀÝœÊ! €í×µ&ËdJü5oåÝs<á‹t$"DCàï×ðVê|;fZ9-¼Ôšv„ÇǪùPxEކ˨Fjk]·óè\[L÷ÿµG¿b_2AWÞÕ/ÁÍ‚ýáEcVÍ=Ö{Á1-å¶œ2«˜À¼c·ÃÀLï)¯xÜù@ÅNUª Ì!HûÎõðà]èÜ·–¯?øö[+î¸Woø¥öšYÑ z겂&e-Z+P0†Ò‡¬g³ü›îvµ#«ÿ;éâ'!†‘2òácá"ÔÞ—>qkÍaM¹QóeWù·Üó!–zG…–ùU_ߺ6ÍXmº·•7Õ*[ç~ Ñ{ÙXÌÈZ¨òF†»`%}QŠd£Ú)Eâ“ކؚÍÎ’8C—³8Û œeÒ’·FꛨÝ×9’F/’X[´Àdœ£V¡ƒŸnéj%»'ؽ‡LÕ5vƒlÊMœ~ú]'?£cZ‘‡äÆÍE1òUjx4^fœŠô<ÿ`ß\ŸÃ&`sè¼®Qdi@+Qµáà¥LÞ‰·}³­ûözúGƒ]vb™±Mêy€ɯOâ"=þX1å´Š0ꃠ2WýG‹wR9Ùœ®}_§è†ò=îÚgØCÄ[îzçý CðvsW9ouzŠ]\•V’Î kô¹½•.Þ ›"‰~9ý¬&Â0z•N±jü§ïaê‡|W#oüÞ„ø>IÇ 4‰Žß“÷þL‚NÀ$EÎ*‰8gÿ1jþßÔP}—lp$»T¥²Öd*A÷ ¤"}hcòRXzIÓ¸t«JªL÷¥séÌp{?c6Æ\µÀð 5:Þ—„º`F.2@#ò¢:Œ3Œ×n¼þ·‹’°»Õ—šÈUЯ^Ih[ž”3Ëòg³‘â«sEQÖ¥A;!X)qÂ¥'ºx“„:.iÎçñ¹¨B0 „ìÛ>c?Šjû‡©È‰ÊýsO%3ç?1ÜÂ(úüá=N%ÍÑiÅð+ Þ3|Ù9?À‰«$Òÿ¿<Øœ n£YXlÔ7…jÜ­;ñí—vÞLJMÕë§×úÊ5£ÿ”&¾bØ÷üÖõz±ã'êµÕ Ör†é}­üœ"ˆ<#éV7k@ß´‰¼Ã›U‡ž|þþ¿R_c™âMuëÚ©²·‰ák“6™ òmŒânX_H!û–Ä’pŸ U@з¿ê:šþàöIYã>è{§:÷ñ‹ Ç;g9„@nôæûewÁ4ÜÛOÍ'#ÉÙ`Ò´‚Ó|ÜQÄ€éâ$¢¹™‰ÒV N±xPE„¶ Kæoõ[¬ÒÛ¿q–‹‘kçÞ-ßøq/¦³¹×h¹UGsÕr©[ì_·#¯´ïg.¡ÇzÇCž·ç¡·À.–ßi¯*¶Ëž ßšÜ÷öøMÈ<úP(íh›/çqÊéy½üLžLΆ™i„;þZb<ﯩ–>hŒÙÙÏ«æ˜þGЯӑï¾Øì:î]¤Xê5œñÙmÓS /½WøÅ¹?SÄñ Þk×wÎ-´g§5KËg¹V¬=Œ©êD+¦Î:;¿"ë÷öaT˃;ÖÙ±—WÃÁ±¬ÃêćE¾}gc«Öý” ñâÂüªŒÑ5´Ëö?µ\Z¶3³_ÜÁÁ‹™M$¬%¢ئªx¤:°;akß ´ŠmŸ$³„ÑñÛøT¡]ë}ÜÇ‚’ÄÛ·.k;е«M¶Ë¤Þb‹s …—²èøëcOˆâ»Ý»z­PŸüœ‘¶¬cäp¤cY.tõyMþ'zeôïSZîÖã Þ‚hpX‘ÄY*ña¸÷3…l/Ñ_ö„ô4»²‰«¯u‘5‹ú9ÅîïÙ,$ìòVtÓ q©¦f«I)60*×HÊSŽÃå •áÉÔHšÌïw³ Ñs-Ëë»dGÆßÑnRÐeY¯3 ™Ûv1Ì^o2XÇYÆ%_Á}æû¿Ñ̨½¿÷qN7†ŽØ£MTÕÌTl±ÇG`èïšq#Ò+·ß dÍ óCÖ%åŠo­Ó†tûõA¸^ 5ö™õ,ؘ±}³W>è÷uã eºôJ±E¿z ù*—ïΞä†Î;©âDn«W»ÆžZŸ"ìÙ_SþFvbµÔ. dÉá¢ûQ³ÌÏ<ºG­ƒ¬w‹ÀzÕž =ÏkX™2ö¦«þ‰äiÊu,Œü•ÁU(¸Ô¥Ã~d/(¤…ÕVTø=Wºðn°$v¾§ì…•æ€>ç늊Dfä²Å’øNxBñ½K¾,taàúA[Oy‹ŠpÒ0KØôF{ïŽØ,ágh±û“t%]¸)¥±*Ø’HÓÝãè w‚3 äaö…ÿJø–Ù2›ž–Ε'®íÃI7Tƒâa1/²¸3¤îÑ}ª€‹ä±3R×ÈqÁªf¹-z”SÆ9QvÀK‹,›- ªÎ“·ðô•˜dè›vs⎹ü;l²•nª{mÄ-)”G^'å¨7H—ijØCÊæÐ_¸m¡Xþ˜@Iº/êA)}ÇáÕÕIÙ£sµê¼˜ÈËDý›Ç²Loñ·5¬ÍíÙ,Êé¿<ƒO¡Ï”ÖD©U.ˆ\CgŠ$")Æû>DAlD+VÛÖñL3PU|èùe¹í¯¢aœzç…KbøÁÃTðBXhE‹ÚåÅ2œ&!CÕ 1Ù®‹¥þùR^}ÑöS ®'‡³ Žè0‡ÛükS «A°+VFo”®#´£Ã|;ÛVl&ènÂO«ºþoÞÛsñROpá13Y­Ã!A,?O¤K„ (•c4£Ž‘%s—C·Â +tQÀD öÓ(JW: uÅþ %·CvŒt©ùâcñ@P3” ±zeª”#¦ì"“J¿…tT$b¬·€<‚qlÝ!qUçùÚ!~ LÉÆ‰éõoX—H´ÆÉ+>d§¤Zz(–î×À~!éV¹ŽFymÜϳÔr]j p‰üSDÜRâûÍ+ÙÇ¿ín»Y fkP\Þ¾ƒÃb „ “èÁÊ‹F¶ù¦<–h$.Ú‘+üÞ`"÷|gÇ@hµ&~7žä89ÿՌⅳ½h¶©J4 c`ZE0íÁj̃-½Ù0¦‹×|!Úç*`|; T@ö‚Ö(%vÕœ;õ®Su Ñ&âìõ~˜Ç¢÷Á‰©¨ß&Á·ì4õ±ýN‘åAÔ€¡…{Š©pPâÕÕ“õÚQ§=kýÜ*^c ye®~ÓÀz¸ÏÉÐÐÔ…ÈíÓ w> ¸úÈ<"|È•¡òDcY ?ªµ­{öv7¸ó¯„àz-ã’2•ù+e%ŸçèXì¶ÅÌž»(ëÄ=Á{Žýˆ¥}/™§†˜·z™­×Ü?ôL<¹ =fòõR‚oOx¨èÛœ\A@ ^"N9×J’Ž&!ˆ“ì_~k‚,š}ç?4³ ­ó¯îYŸy?ZÅ×µ1ß’e¦ál’‘Œ™}ÎN1lð@Ë¥]õá÷N«%xÞ´Xý~D•%ó ü=± >ˆ¸~ZÅ«bpúº™2B™ßf’+šýWÒ³8œÈð@WJK²Ë6Ô¥%ƒ§îÜŒè:gÔn·»h§&CaBð»|²DÅ ;u[”=À`@å9æD©P[Ø&Ò‘ÿcl‘Q;ÒD¥àóÛäEìó;E‚ôXÁà-YJ}†€E€C.˯ùDÂp:”e›ŠÐ÷Öæ‡N¶Ù 8ݪ½m¾¾Õ[èùt†Êgács‚|G»˜¡. Â¢Šô³^‘Œùe|kDÖ#Û0ÄhØO“ž'¡õ.õF‚üâT[`¼Û¼°L‹D´a . Ëê1‚€ë¾qÉŠí²ú®oŽ7K¼V‚p(‚¿¢ýŠ™Ftq—aã] c—ÚÁ$q˜zU¶2§ D¾ £³0 5‚5=mÞoN<»™é`¡±uæšÕ°Ù@è°,Ÿ•÷‚ža¸èî‰JŒî‹¥è’%v¥§8›Áy|Ü‚C8ÙÀ °ÿeðE_äY,4`ôuè¯>@½^ìV{—ΰ±wâ$ƒ| ö¯¤5P–¿¿]ýA~V µLR‰iΧÍÂ%dG €Oƒz$È.…xOÛýÐb+g>¶íÕyk¼¦[‹¨Ð.]Ü Àã³Ø<6Á±s{¼åÙáøÊ|_–OXÿBM{:z”µ,pΪµÞù/Å3ÄšzZ)?Ï‚½Ûf$ŒJØà@”÷ËÈ&ÖµP‹÷§‰¢Ø¼÷V£Æ2ôðÒUÎ+4ÊÓqw5“|uŒ‚þ¥ǘî"ÎOèãîÝPºÜÔ hø ‰àÀ4û³š¬SN2ÑšTžèU÷÷ÉFKÒÚ*f½±\$nóâŠhMý½åïVÇckq—v[Ì{šR·å°ŠÅ™á$ðwϾùWýgúÃÜ[‡µpV°6sêòe‘¥rûdyáÍl¡IÀXaVI.YS{lË4”­…™¡6ViXföÞ5#¡¯Ú/Ow i_Q}"¹£a2 ”¡ç[ ¾~ÚH‚Ùc8ôÄÒŽÿ„Z¨}Ú¡joó¼˜9¬ëÿ4Ö»Ná`<€Îr²[”{î’Æ%|X(xøGÁÂì…ù i¦ÀMGèçUÍÚg‚ðýQ³ .>¶D3ø°ïëÃ#ûm2&šþ…›ØI Œñ1à©2H“5Ôžé™2Ø.‘‰î˜Y-­1²vHi™ï¦6FKÓ2E"3}š ˆHÆ­ºJ="po@W"ׇŸ °îDé.󘈎k©ì¬²»âe"=sL½©·­D 7tBb&•žÊPIÀ’Ó‚†÷´éšZûílr:‚ÂŒ´>Ô (ݪ*Q? a; 3oª ëPH0QêÎ[ˆ6ÇU’çâ(º€*À}+ç†âY):-‹=t‰.¼Y1˜êO÷.døíS¼µ­­ gö$7‰2æá¯¢²³”qé›È"ô/-¶‘ùJËF–Wçî‘d¼þ4+gNí¢âøZ«ûÏw4+kÜ]'¼.•—Ù(°‰ ÛytG¯C+ó9õkÇê|.kÎlÄP9N¬Ýì"ñ³á7.*")’¯UXmÞÇôŠæGŠ‘o8Œ¤ñ7ÓÖ>·”)RÐtR-Ç üÈPUl–Ýè$û_\Ýó™dÎÁ7,9‡ßPX0™T&Ø|ê\I,’Üï3PÞIŸ8‡PÕ>Žà›ºÆ$wÂøýîñTGl/ù,ôú—áO6ù_—Ï?uSÓÁ_ðŠ×é«é?kŽÒ­SÜYŠéà)|*BŠúk†Òühëªç´ÂIHïEmgâÈ«õç {¸:\vgqGwãM{…σ!Oec©ÑJ/,ñ–—>¹®ªáÒ'6¨^TÔ‚¯l)Dà/-«Ÿ8ІÇ\„–XôŠã’:—+ðrõ™Ðv)eøtÒâu¼á]I´tªS˜ŽÐžÿõùS‰Ûh k¾ ¿›ƒÅŸnàÀ’8Åóü"‘—cì5æ/RG $W`?‘î³s MsÀ.fÈ<_‰#ýù3FsìÔäÊ÷‰òÃñFØ|VC9‡ê㎗pm7ƒ=ôP°~ !JÛ¹[sý­þÄŠðÔ`*SjP)éÞGËMìÍkzñòaðp+$ÖÿFh×rø©£õëmÞc‘YÐÅ6Ô]Öid“Ák‚AŠS$ÑYðø[²É±úÎØ/:f ‘1ÞÀX9˜‡…ͤæ6Z…’Ûä¶Ö,¯'~@€ D3‹q"EŒ" Р@‚Rˆ‰DDZ8ˆ€$.Dˆ€ "EŠ" 5›žûè¼;˜b¸lgÅög…€7Ѩ7¼gýÃ\6ˆ¾R ÛzxY9„=ÉDø-ך¢Ê-!7¯E©w‰iµ"¹©¼ñ¬¥Lõ–ÜtË]ù €¿#ÝøEu Ž›ˆø+¯5®û±ÙPø(j‰kžéh¡n^<ø†. ”Éu‰¨ ]F½‚µ¢ŸÔTW ,Œ ”gÆ¢ßþ²¯™~dõ":Ö.t^M=wš·Í;†N59+(ªmŠ&ÌX;-Ñ5l_„§ÔPSå:Â[uU ®™Ï C°  L9DPß{–‘’c!„ ¸ß=G³m‹lÖ^Ü;^{@o†ùZ¸ª?cÒ¤G×ÏYVäGŒ_Îz— .£,åuŸ´(ᦘNÅwd>«0Il‡–qÕtÑÛzfÃÓ¸àÓŠ¿Ò]wQ1¢‚Ø9‡f«.b p‡¶Îó'ÙAQ­Ìk2Ë\ª¨ ªWZ*¹XhËTÿåÊ^õ}ÚÏ÷Æ‚<ƒP/£È`;2©1R;®q©°YÝMÖ÷ÚE«Œ f†£ˆU¯ÐÞÉÀ÷‚*‡{Ié-Ùä"<ÃÛ'ØÚ*â¤jå;Ýlšû§¥æÚܵRËÍJnÓÆóÜ1ª,0¢Ÿ ÕÛىÚ«ŸPüÞ=±™¯C|{žè0QÒ¨=šámöUZ»pÔm¢±×Ô•VЋ9ó|ÈÌ臯©çrêéòn¼(T¬Qžà#ë½rN¯Õ‹xM]ˆDû§6^¬ã°žBÍGtÇx`!±³–¡Š-ˆW«ÍBÒï¡~“Ö}ù^€Õ]—’†ðœlÏqŠFâo<œnrss ±ryHÜ[m¬bÃpnÈ×@7Ðø‹ ááŸ2Ø‹ž±“<íE:Ú}—¢Ùé¿]ÙCz Y£SzË,ù¯M-¯ÿ*ÄÙìK‘àð®?%SúЖÍåŒË«=Pðò§W¼Òh.)4yÛà%O©@÷opéèI4Áí Â&¥½8óô±ÎÆ6ªÂl \‚ʼn¦` ;5GDùŮޕ‰ \iaS¶\D Ú­a6PoDÿC³yö~±µécÀìÛú×*ŽðÀ<Náìåè†D–¸B®ùÅÏÛ´½=‹,÷áé+¾à¨QxH˜}‡Ê¦·|óÇò=IÍ6óìzç Š[ÖG8ÈNRyصQå>÷•OØ/\Õ ;Ô¯BÅ@ÝâuTfÏ.A‡;â>•‹¾3Ü‚Ñâû\ów7´i°j˜J$Íã+„ër u A.b£ù­5¤,‚ Ö òrõóÝcá¶\ýKf¬±ááQIÜÞ´^kÞÛþÂ48„cªÙÄ1é>ï‰{?ÕLJ|½!öûvÏpPä½Ó`D@7fǸ d-ãz èðk¦6`Óž¢èqÓIèÚÅz¶>_Ó[lqψ ‡tß `ó3ÖÞÍg€þ˵“´F܃¯È?ø©kfŠOYÔx5?‹a‰T9OÂán VÑöÁ'ruB4›¬ctb«9ù @,!Á›È—’öÈu`BE ÖK8Veî”–¹,Ÿ ¬§pÊëœ.ê®oa·„E¬ëOâ®âw¾ƒá I»ýc$yX{ô© >´¼¹]ˆ^Ïc˜3 1LNgRÊ{މÍêø¦ ¢ý|‘3 Ù|Oõž_Øàè«%Äeô…B:ò±kÔ2E˾ /g½ûÔ,1iJ¨±ÙâžwòÕ©ùñ§0‹’È)®e€Î¤ ?²ì)xkä8ì¤óôˆÒÀþºƒ…0ÃGY\àEdÀRµ‡,ì´2B ¥ì«À·ΜTêâX¬]Ù’5äA¬ë/bÀ„/>.U;.Ð6ËE6ÌÒ¡À3›”†ð‘ŽýÉû}­°9¹kí“æšAOð#“ˆ¨æŠè‰­^¥˜=º­Ã¥qîD·xÀU—ÉVVõ_àÓyÐ 3>hæ‹Ý‘ÏÇ}Î!yÁN¶¼E ĆPùú-¾yÆþý_=Ý>¤DfÓ§8üf‚ OÝL‰·\ZBp¼W jÂHÆ[Éñ´>Õ€Ð!þ†‚»«K˜0ÿš¿d‘‹åDÅ Z¼x•Ê BÝÍ7 *¡¨N&Öȼçä`5ÃcyX;ÓkÍ'§üþÚµ"IŠäùb¬¤Ò‚ÈI»˜dMu£na‘~;HJÚ89û_P€iƒÏTp1ꋼ¯’«¹éh„vzÀ½I=Ä7¯†È+¢wÓ¨l…îv¹BJ\`És†R[2kfö›$§hßslÛŠJj‘ÃW ®ÌÃR<¢ë_Ò‰°5¤n-/@Â_¼BL-&„±&ü÷-üË¢Ù$h¥á•ÁÆümôÏȱÖòŒßˆ?Ú T“`D@bÔñì$ ;E£únX.ÓWbôEèi†¼²ðM ¯¥»0ˆ_£­ü‡*ý T°>€oJ_Ï÷.׃åtù ($¿ÁÞàæ/Q7³ M >ÚzB8pÖNlEÎØC¡ÕKoz"€§ýº6Õ ¼Î ÉòˆPœçßùHˆ †¼(5U·ý·»ÝnjÇO\3À9€Ód nÇÆÕ™jº§ißQÖß‚.~·/Ü/³¯‰ÎWN(2P@3{øÑÝz-¥ö«Öîô»a¢BœÚÒà ž_ ¦'s'ÛZ΀¯a¾÷í„»{=’ü7JóßÊÿµäÐ#hLdzv»¥çÔ¹`³ˆ‡à¥ Z¡˜è4±¬Ÿå94c›t”1F©ç‹r°Ú{é7 ©= !T±|ñs1,°h*¼sI‰gÆä1p¸~_uÐæ¨YöÁõR ;{†¿Žx[x‚_E0¬p–;"‘AâÛö¯@²Ò›öÁÓ•ÀË= ¢=„ %`ãçòýƒ-Nbµ®_LË’‡ëN¡ö+ÃŽjŸ09ÏÚ´!û³çØhþþdó¯n•@ð˜ÄÜÐô~k£”¶‚ñË…árS­Å|½[¢ f«Éóü°NÂ!4-öË•kµ`DAm ŒŠÞ~ï÷Ã@œéç7‰þn6¦8GÛ*Ü\ó…FEƨ$cN‚h}F‰4-w!³Ç~a‘\äþuuY³’^Îü o¯û Ò¬Üâø÷Á…£YUþ»7þä 7Âà®B°X#M4£Z=-”òyNˆ/åeá²DÅe Æ=0§w>naõ£…x>}ÙPôïôº‡pÊÚcÇÓªàw·H¶0>ÔijΉ½¶?Iw3ÝÐÜ"cX|àÓ÷öȉ\íIÙ.ŸÐ=.µTúÒ‰Y TÁäÂúcPÊäPQ¸UÛ_ˆ5¿N?¿æ3ÅŒLþqž¾q¡yÖ߈«÷S{@Z ¯f°%ìä ¾P‘íÄŒà ¤Äv­»µVï{fm9ÀšîíxžÌÑ*9Xi„{Ì%L˜¥ A.Ú,ˆ9XÅ/€Ð<ÊñúæÃ¿÷¥!š@õñ‚ɧV¢H†o®Øû|¾}-íŽ%&Ç¥$93ÄËï!lfdzG^(Û¡•'(n‹— ‚#ݬ¾†^˜ø¯­œ·ÀSö½*•[}d‚ÑÄ–¼,°v#OÏ/"¬ž†û-gPÂTå•£¿&Pïò¦õnÈË1^6qa¥öH9¥k”eì:m^êúH±y]R~t…Ò#Ö¢ 4ËDŸ˜t¹²Œ¿DÒ·ýŠxÍÔóTʆkܦ¸oÚ _Áþƒõ·©³>˜…jp–ÁœïãÕ‡z/v÷h<@J1üšGäu%» TIÕ„$’ÑdØ¿$‰Qt»±ÂÄn`Ž3F © s6jî+í¬2ᘹCtrƒ†Òц=l.åÀl[Ÿs.Ìî¶íꯨ¸Q¬˜80Apò®ÁpžAl?/ïšÞ®6‚h#Œò;Hôß—RR¼²PÒÐÙô¿Â©w#$硚Ô¦LJå†×=ë®§r¤Û8üËYìöI„vzE·ÒÝ”SÝÓ™&§×qé-’¶{›š›ŒvŒjP)ö¯"l&ßÄ4y¾o:ö¡ñØ>†Ÿ+^ÁU"5mZ$©â¡ð^šUñ"j@®ºùýx€:ä%óâuu¹\×Óü ÉýÄ,ÜÜA <¡Bʆ‚}#EØÜrWðayëzÉ÷# ¦­þþpv ªâÕáîQs­ –1)Ì«te4³üFPÍçý—XEÍY²Þ¼§Œ[š’lp[ï]útL]!¥z*˜®lù2f}N\L VkiC*È›^iÆèÈö¹l®4ÇŒ4Èa=0AÃpùÑa¡CŒjè8Ž _èfþ2ÑRò¸cú ÌôõMF¹×½ƒÑþÚ)ÔûÛ•»¯]®3>ÝtFÃSKlZ:oˆ°»ü—fÿ{ý_ãe¼Ÿ§µêu»-0mBaÌnÌHêx ÓÙbðî*‘¾eS‚mÕ[ü«ûóm7ör-ÄØÑ!U †'ZG êBDZLˆl¬JÁ®Ãâ€`4ë¼Ô¢'ctl]ŒÛ‚8ßꔪG¬ %NA‚¤ûŒ×ÒoeÁÞ ð03öÀ 2›T=ôcvÈ_ÖuR'Ç£K@F;45@€S·íâˆó0  =Fo`¹=Œ‚™ùøÄ£-ÑÑ‹¦6ó½þ ;z1À$¦Ëæ2Àœzy™±ØwEQ’ò¢òíîS '¹ø,©Xç òv fwwñ4~}­/J^‰_ ¥±±=òBͺ”é¶·ÉöO–Q©ˆåœÂä=0è¹ýQ1«;üÀf‚7ßeñ ¶Á “M©ÐÜaÓ]lC+[¼öV½Ÿº>6Ô±“t ^‡¤gvŠÆ Ö™K§6^‡}Û~j )7ìnúT\Rl™’Ú¦½Gb/èVM¹P«ùÉ™UF¾8þQ­,Ód Õ÷Ç'ÌÞ¼ãË>6-}êü–M¥°øKKãpƒ|@îý¦HÊÔÃ@I·ÛœéзÐÂL]ˆ`}IK bxî@Jm÷–}Y»ÁÀ?÷Öa]E3´LU‰bGÅÝt*¯·×ùòH¦”ì}ŸÖþówï ÑØãö}Æëîe¶¸-DÎ `ßHs°&G((ósùÔ70»Ð¿Z|%q«?a6ÜCÚiñ@Æûr”ôˆÖGÎl !kë03Á^ôN“‰¯õ3ä[Å6öµ#_{ÖÙÞÖæ\oÝéÝ|þº#Gø‰$üzõ×»c'wŒ} ûehIµ]§”ùIèx)´‹èa\¾\M‚—ì7Ç—¬Ñ‘§z´°qÈ6û"\ M¸ÐÐXô‡‚kåÓRŽTü "›õþeì\´a5ëlâÔ>f Úk6ûÒ—5ÝôúŠìb9Ã<ž3LôÉŠšçe3f¾Dyo9Dü ¢¬± ëfœ /¯Ÿä™5QWW<nåÌ`ij‚ñrº¢+_Hñ^T#!}8¸‹ÂЯET.Ö™®çô¶ "0sÎëF·d #Œ®ÙÚ¡ üšÖ!ÂÜ hvE¥}.#ºXïfÁ÷ w{Ç¢ó)pýGý‹r>dj¤“XÒÞ}äSTÉkŠ ¼”ª ‘‰+é%(ÐY‹ëþ5$™—ïbáƒfË Zõ¤ˆ®¬Ë?zEóÓyÅ4tU»´yÅRg¨¢˜Zbp(Fl¨£vn€“y‰˜Ó õjÎçë_E ©ˆ‹¾ª{ýbr”Åã/VÞÏSˆäü– ô æ£-oµv®/@ AÊå#¢¡Õu?ö3ŽÍŸfã¿cÝð S7þúã]£Ñvíîš[E_ጼAþëѧ;íÕÅŠ¦¬öbkäøYÓÆ<á|ö…ìó¥À":èåþà 8RJãíƒÐ?èœ<[MöÝCÕÇëÍJœð ÜÓŠãßéž„ Îýûª—HE~,•Ø!>W'Z(œëÛ qìäž[?ÝuCÊØ“•üW2Š˜8ÓÅðì¸0¡é0öcóÆÿ*¬—°!µ_Þ@g"0ÚMÊ'Qˆ©ëßð¾õÏL{ÊüñEÍèA[Ãeð$ käƒÈDb/’=­ ñÅ3ù¦§eGÿÃÎn|ÇÀVuæ gyWìÈ(êà>v™'h¢FؖȨë1ÜÀ‘#.ªL¤¾>ʺ?‰þÇñM6§73 ™ŒŽvˆêì= ²ÃAÝ£àãBÛýG*ž« 'p¹¨LÚ0˜?°HL…dÙÐTK™]]šðüK'hãücµv©pû[=¨x®VÞóÈh…Fxâ8¶²Ix‘ýeößÎ =¸35OV#Þ6”¬zuøæê3j±i_‰D[?7¥þ_õ©CS`ÎsN`°}wîëQš«Ûã¦9Ÿ~-­;x:ª&g\ô–?8å°{L†¼×j«¿ë‹äó͆žDë wPi¦O¬{aÖB‘”»")IJ&&«xC÷ß×Ö^¤SΆ´ïõSÝùu¼ÑœMG¦Å‘p‹º!Cþ¥CÅÃiû( DD@„dS9øº•Ï2¶¶%=š›.ëy‹–h×5yBkÈí¥A¤z{;ûv_1=ðñ6%Í †‹8¾û”7RŽˆwÕh¯‡ë‹@¬R“¶Ö?½°2éä„`ã¦F3‰Ðxà^h0®³î¬‹“Ô¡ÀIóß7!ˆ¤¹Í-/™“åßsÃ1ï)inðo¹Ó,PIÞü·ô€DD€ÇUŽ»»Ûgjž‚ö~Ï|Ÿù×¥N§Ïù±ôð79[µ•Ÿ¦X.8".dü‚yfO  è¶È§ÙžÔóè^rqA‚Œ »cSùLiågë_ûHÏ&ß„T¯·¬–Ò)¢Ø.Å) ›utÔmøÀnÁŽV ?@,§(0óÛ'ˆ Þý±³)ÓpöÊ£$t«„v²¥ÇÖÓjûàWšøöçqš‡‰e‡;Ÿk7eCý/ëQìNéì7¡c¤oÐB(î'ª¯\¯o8ÅÄ;…•#Y×FÄ~:£ëþ'ضÝ’:&;ö¹Ë}^®½(Ë>ôÎÅ3‡MYÊ„«¥«¨™u‘¤Ë[ˆSNœ0 °`4þ³—´”Wc@ßJÿLàÍ–ðFÉÛ¶ÿù઎·I<Tÿ‘4îEЦ›ìx=Èᮩ>ðÀ8Ä&Qê_(tš6á ä<\†D’€ŒÁòd„.¦©âŒ9NÕ•¹«þo©²™Ý.+|t— €Ë)”ÒlwÔ€J­å°iÚi—oPY}†ŠÀùEøÙ¶VÓ̘›a‰”fÍž½B~ÍìÏíeÎtÊ¢èÅ#¥4žÆRÙ´×­Ù:Ô Oexót5¤cQ¡‡–¥¨<êz›Œòª~8|w2’qkíOùEÐ%Ìø±³]5®'möt &ÇKkïkñW¸Öb•ÄEÅ õ„v6vÜ5Æ }…ÒÔ¥WAùwzoZ¸µ½>Æ£%Ä"lÄ ³mI ¬oíÃ\xÖÐs\b)G~#t ê»o`™\WÜà,ß]Œ¢WüÜ2INóŸFÍ mQB0-V=n6 hÖH»6ñ¾jÀôX£ñz0éúŠ9–Ûƒ¼7³ÓC³³*aÄ®ŒŽVê–˜&ÛÒ‰¯ñ±ÌB*º.>ÍjéaÀbìi¦œ™ŒÖ:Ò“êUøjrõþÒL:÷±êÈÝ>EBXè_vî>ûl•÷Ç„ï¢åVO¶  úAc¡SÈ*ó 0ÑÔùLšeB´Eʬ4û¾gò=jy긽äÜžQ-pW,aæ+dŒ¼ Å „οLzùÝöØnƒ.g]üb®m E1ÞyÂ&%¾¯Œõ6üÿ/·ïËz¹wwq›Ü!þÁ‰€ø~Þ¦ò@>.Œ„tSÀJVRqyE,L1X~†Ìp¦¢2<Òg‘X÷F'³ˆŽÂ£§ÍÕ÷|?ÛX÷öÙøá)Ú=.Aw¾ªµ•†2ÇR«çlªµÈÖèjîjöÓ6'Wf`0 3…7‰‡Ìço ð²Æ1Só<’»­“¦™¼` ¢“ìÓ£ X,Éàƒ!…ÞEA,À„ÞJ%ˆ’ûeåHIƘ¶0{q¡ S#“xÚM|!%°ÌCæËrßœ”Gb„?~ù,À?ÈìÀ´Á T>ªì#Ϙ£§³Ò°"ñ²ÿAÍo“Êbƒ–¸¹µ^pÿÀ­ à»®Î9-C%D“ûÈ~¨÷jpºç§¥I<¼Æ‚NiLÃðØl´Ò+ÆÝØG´î-ÕÆûºÓ(g¡2æt®i’×ä/_+ cÓ†æ‘}ÊüuMìA?¡?eÙMNâ¼R§hmãFüþsŽt^Þ³’ì¡|Ê5ÑútìæŸÃ†ÓêwÑ%à æ­×¦QФ#W¶÷ °T’ RIUötäZ¡Á8{B#¾8`ù¬­'¨Í@CçŽÐÊe¸wêIh;·}n´ym'42±çÚŽ<Ÿ“!oCµ)Äbú•…šŠ½ÎSÕ@ê×s¾ÚÙ°Ýè„„ƒ1èíVíìN0ÑcÆFËõ¹ l§t·Ý RÎBÌN"´Ò*\T¯/ÿ’~/›,ƒBf<˜Ã0 l»,Óðûo›¿= ²gByÖïNé~áéûÇQ!þêÞ\µ¼R5ðÐ$ŸŽ0é™sòˆIj®ò ¼ÿ4È›„¹C=in—ä ¯6_æËÇý·Aap»­qx1?æÊ-ž¯Ks¹×wm…ÚÐÌIù&ø³«¯bÌä<°daË„<^ ÷ŠžÏCFë±Å6V ã ·©˜È’èf°/€> ÷-˜Ðuü|ןe&ÇãÊïy•×L0É›æü¨ø¨kÊ^„Xh]Â…·H„#™Z6ž @’¦)ÃùÕ×ø?Ï|Œ] ’íÀß[ÃYc¯*èíhdn©ò²ì“ï’ƒ&q÷Êö£¾øÊ¢ËÒµÅÚØ¨kxø\û¡†Iëž/MÅò;õàŸ)ÕðX…?¯å E Ôèëm^á"šùˆfÅÖÝ4å T¥A{¸r—yÿÔÅZ‡â ÷âIµY|½‹í¹Ñ¹G5«“"Ewñ­‘¡a¾U{<""(¶Àˆ™m“5Þå>/ÖÕ£“wCžNå¾P¿e@ É"Ýž‹KvÕv°T/ËoíëQ7Z÷L->žß•{C‡¿™â¼¬‚¯G6sz›ÃK`BªfH÷´'uß³ŸáµJ±í¿Á.Ãý\ËdóK:ƨ)œ íl—¨Þ¯‡®)i™Ý @ꊉ®Ó™ΈZ—Õÿ]z s·YE¼ãfÜ%à’ÎS .‰Ì•TY–U »rä2h!JRTÖ‹€:#Š]m½¿Þk´Î7Þc«ì Þš­e5•²Á†[܄׻ó)?DŠøvÅTgSw½[‰1AçifI©×Pa†œéí©­”Ÿû}æ«à#Ò„yb8·žƒhÈ@cSK›ÿ=”ç_ÑjXhkK6¦®¸šÇ¹z€r ¢ˆ#‰‡‡’¬fÐÄÓúá ºè‚ÛoÄÔíIPXúâÍE>çxuD•=ÏÙßìÍKmµŽHzŸÎ€Ñ<å`š5æÿ!˜—ßÛÑ,4v¤ÓtÄ´» §Î.ƒSÄ\úF+wÛ˧ú5mÔl.¾©_f —²pâž³;‡ý(ž«Ü„]„ù‰+²¹Ð+î™é¥ÎŹ¿ä(Jæû¼sª\{ÐñêmB›W1î…¥æ o.¨Eù¡"Àx;:·XQ3ÑÁÒÞ¼s±Ümiƒ­¾ÌHG/&j9± X=òoŠ?‚{H͇à˜O_-CdOïläPß:q´%ÉZ"=îý”I«ÚÑŒÛ8¼Õg1ÑŒBªÌ!Ãì³ÈÏÌwzE»vÿ\"¦8òh˜__r4Ènê#Xâg•’þ½h‘% Ž]Рs»}Þ•£m-SçÚ²Û,ƒTŽL Ù$*Ó_ùç´ZëZ—síZÖJx˜Š‹Vƒód.xA–‚ÑË3r#ZÞÆ°¸!DçmÌ´Y*I䪽šI2~*€Œ7´’Lh…{Ú|fdÄäñÌN?žpzõ¡š€ÀÐhÞžwå!FC‡¾!ü0€pi°ÚwZíü‘Þ³ð—4LœPx ½“þ×Ǿëé¥îÅð¦y˜…½¿N†\jÚÏ‚Hò›}0 œ-Õp¤¹èDÕ”øún> ûÇ÷’ß‹§  ='4<å?ß|K*Bk'nê×^°AœÎ‡ K†€¨‚Æràïe•Ø?P‹€ü Æ|ÍÓ¤ö$iN“jë½Ê'Ÿ©Î±Î½öûmŽ‹Ô®:¦¬Š¿Ç }TòjE o"c¥:x§õÛ:ë¤âs‘x¡öŒÈôk)Ó›6“¤®»5-{ÚÄz8š”®Zéã¾k_®x4ÓyE¬º_NäK67jëªpí‹ÀŒcs©{Ô‹Ú2b¡=gªV(úì?|].qž"d-z1<&{s´]þøžFâÓ ET•žNšÁâvÖQÍÈŠHíæ³9mìì¿— ‹ÖÞåGˆªî?¼R,Øîdµ’¥Ãk¦ç³ÊÁ±´ìZµxîªõîl¾ZºçÖþ?DE߇lÓüÏP˜ˆ‰šÍ »´®~~M}MוêÃi›C æãÚÅNH¾øxmÚÈÖ-VâFwXË2>¤bü©ð´\ÜD›-³ð·™šw.ÂÅ:~ˆÉ$²ñ26 Îâäǰp«–»iŽÉ\]héŠ °1wJ…®ð9ˆéÎ[•Í–!"¬Þs-õ5Ã0 †ò³uh¼@óÔSƒÏVиA«ÄÉìÓä Y¢O ƒËë= ŒŠù”ª'‡‚ÕüÑ#µz'koÁÉ·9ÒPÃðÍŒ „i¢¦Eðæ¨•ν7"Ý,ÙÁÎ}²^ˆ5ýúµ&çÿ׆\zN”ª¶?Ÿžµ[“"ü^ £‡ž-KnŸ¤gy±Oö,ÓµúÊ-úí–7'þ‡âý2ÏNV9&áZ~ƒç’”ŽÃ+ò«f1rô?>δ÷-ã1I@ü¿'Ô¯õz’¿<<ƒòÑÏ‘»ueç̉¦ôž©8 *›ãÔ X¼‘«ô~1»]é ïe]µÝ¨m…Ÿþö+Y¯¸ –óy;¸ºÌn "¢+î冄ՇŠÕõí¿÷Œ“UV[;™c—zX ÞP¶íÄÆkæàçx‚XÏ×; /è MxÙêß!… ƒzd42¬ÒºPæÑÛ@Ý[ …µÀ.Âp@ㄟ¹^—ŒDhÃBº‚Æ6204pâè_65ÕÙóv{ÜòÑöŽäñ?€ kü`Äa¢‹wü¹2²ÍæO´HW#ÇõA"JT¦ÌlŒ W}†Ìµೡ„¬T]ñˆ³cÊ<9Çù—ßùO×PZ¼¶ú9)]^ÑÎyR‰ÀIõz5¢_ifwJ](ƒÀ&³Ñ ÷A„¯g§¡lw‰ˆŠ( )ï]çp¿Ýc5¸OÊótk7ëÑopOvU>û ÷„¤ãŸz"MÞw¯ìNCŨÔBANp˜?’‰›¡FáT.•"úÄJC@£‹T¨i#¿°gµÿ·³„©1M1 ÝØdÓŸy çv›2\Àei±îL$ý ó¸Ó)‰‘‰öܶÄ´2]–»‘Ú®ÊÜè¢`ñ$áWøG#ÁIè\ƒ4³v0ÔZloˆ£¤Ð·én•ÒO´3á+0W–7¿ég <ë×ôzŽú&ÆÅâaÖ¶ ¿y:¨Bø™©|EEí†; úxk“¹! pÄò¬+>„t  q>X´´]l ¥©%kŽ4ÉJÊ“²ï¹½¶°^¦Ðš ;h_GW±\š_M5.zn œ¯ãâæ/€7WÝÙ÷8裮’¦ZñYEní+³ÉãQ]ö¿šÇûr×9 ™†G*4žs‚S"ÉÏbÙkìòÊ<`áççŠxNÿ§s™‚®Aß~ÿtuåæDˆÚ žˆ Oì .$Â0¼Q»CwÐ}û‘ÄÃDwŒ#¾Ézhy@lÁ²§À…^Cˆ¡òdÿw/’Ò¶—µÚåÖÈ‘b¯ï¹Ìà‰¦[ ²ë—8³æêóÅA~;‰ÅÏÞŒg­Ñ>žï/vÇ^-:šê­ŠWÒ@”ƒÃ Çs %y»±tÉ ‚PóäsÈ‘< 2¢kSɰ6±ÎÚ+„ý{¡P~*CC­G­B h°nX\un|+¿^O®ñ*SÄ«jµ:1m$™¬¦)@¿’ÃÎHjôZíß¿G| ƒ$UªA×É9¨Öâ|o°q ÊØËL¤ÁŒ6ºDÕJBˆfHÐ=bp´ÝÂòG×±½Düà‰þ•Ï4EÃGNÒ²iæ´ÔùS bô—Xžš—Yqfùˆ‹›SA‡CC¦ʷ⨔§†8ËÈ8¨ 4F'_Ÿh_shÁ e@?Œó‘´â¹ªœà¢+/dw8ÄÄ£§½+AJ4ŠÓ§VˆZs­XDÁTb.÷m‡7vnØKìsïæ<¤m¦K~Òh§Ë7©d¨¼‚ŽÛH¹U›à¢þaïù¯­/0á8Ð07“ß•KÅ/`„iÜ7L˜Ó¸Ç‰aà}ªõ•êŽš× éº×¶÷æ ýä «;“„ä"W$l昷Šò‚ (ìƒõ£§î¬¹$#mú†°±ó…;Eõí9PDéÑ4<™¶Pùñ¸®Ê-š’½Á<óªÅU¸¸"ØZÈTkošéVÒJõsôÅ*_'4Рã®êŠ˜!oHýŸãóŸmèBÕº#´awˆúç¥2¿óyw/™ê6@½eìr¡JXYôYámL÷²J\>e5*;wNš}_¯Ã?´‡­i9‡©*‰[·Ã2€‚#×÷ù&@åÖÎN[í¤ ”ù‚Ò(ží/Ù*üD#¹Zœˆ›¢àk,Ÿ¯.  D@^+•ÙºØYˆ¥ DDw¶Çâíö‡ji­ùk{œº¯A»^ˆ‹ý·ëW— ø ð""-Ïu²{ù0¼†ÛÛ"&ï—‚¯X—ûñýx]P DL þ  ï×´Â2ã©Vî1äu±§†ˆ‚ð6{˜ÍAÏX$ô^ÊËeh€G—œë;¸YÜšB"?eUÁ1îƒE$ñ´m³ž:ëˆpÃïîܼ=‹:É´ ÜÛC°í «`$Û冯 Æ©n/{oâÈj7Ò<ÖÎQ:ÎÝA‹æ÷±Ë+m DEC½fõ]ÕøüT9ì½´Ex a2{TöžïÞŽ¯‰~?†k€V›Bì,7º)K—ËÀ_bÐκÛ.í9ï§^ðæ[å·¢YH¦·l:¤&¿û’?š”`7»/„2ü5ª\Jnš£s,-„C‹ÏKnP¯ìgÖú´T° 9Äúƒî9ܰð4 ¨\}®J¢?…t<üÅ®D©R[…:Ï·öùüP£èýÖ&>IZG6fò(‚[ã8"GX]‡Ü]3ÌkÏ ®D^VÞ¬B¥9Ä÷_2Ô§¯¾/«ÎUéé٨磳” òõ3Ë̦²x6gq¡¶i‡Òzj½±mñðž¼ }Ï夃€3 ÕEnÒ5 q©¹ùPŠúuYív`Îù<`yyn¡!çØÖ&äù Ige€Pdµû\uTn)˜ È!8R®-‰îY–Õ,ú5ì A ÕpCm䥼(»4¥I¦­X°¥î¤NÀ–¶6¶7û’㱡2þþF×€[‰2ÍbÔà'"ü"¾Ðî3Ȳö7ô3Ôì 9-6-I¶q - ¬ð¯KΆɳ֚'™{h o8‡ª¦Ž!*Ú‘–?&VÐéåÞ°7Ðe’ÍïǬ¤ ¹)Šš»+¼£>ê>GuüœÀÉ"Oxü‹ _÷We¸E!&Ë?à{× ,€³¦S]hœ­A>OsdsÀvDwGý¯·:ö“ø<¸ÿ‡ü‰¨iÞhHg?K¬\7lV9xc‰¡(>ömwÞUëÅ¿¸·­‚¿„ÃþÝ¢Ç,¦Ž›¿„÷ËúùÄf ¡”ÒoÅ)ÃÑ`ÙbI_SV0r¬EÛ cùÙs£¡— š…ºM=±Dü±Ý=1Û뾯¥[ÔÒÐFƱZÏ‚læb‘|ŽÉÍœY'6ßÔ¯æŽ,úS˜=ÖÞ.hSâDéO!.߯a– PK¬Ý|‡©}¸B…¾„Àˆ’³Ÿì³oãUIl~,ô¾¯ ,˜¨g,~cCªnþ¢£›”gŽ&ZT¥"ëêC. ÌïS6Ø3‚íïïå_Âeþ½!à­æ/¾&Š ÝÑã7yïlk9oPᇬ×,]iÀö.!¶ò Ÿ‚K¹Y~œué½&f‰•qí–þ«(ÐÂF[‚žïW±Ùl³RTËë«zË]Ó€r…çá×UwâÝÀDU@¾k0Ÿ©³8¤ª¿ÐõþP°1B™À*|s£n†³@­¨Ìÿ°NÉQSYAò¶ëGÀ-D­’ Âô`a\\ÉŠÆòƒEØÕº WtH ¾x@+è‹ÿ"y…ÉÝe’Ñ1Ö §ª,N^Þ÷ø.ÇožÜv /íæëÔÀÑ·ˆ E~™ °½û=41J’·lRZ1”G£¹Ñ¹ó½’ž±ù¯m6ƒ—ü½lÆÝf×25z-è&¹­Ìàn%H ÷ ¸=½ÃÇ›klÉbȳ)WO=¼QC±Öú',—m‰`¡Ì' ãêÅ+¿ |uüDEüˆ‹3ÄÇQ™u]ýË^û Y8¿Š¶J7ðÑ‹ï„!¶•1_AÍnkÅv?¾Zß­„ži¶k_ÝðÔÚOÔˆS]?Xè)¯ð§s—6*füI‡‹!mŽ1ãx¾“‹æ/éB8ž¾Vªv;oŒjÉÙ+½×2½oØ"qG”~¦N‘=w’X× ‘Ù¹"ž¿…¡ Õ-0›k÷2Ó“ˆG:8_yHc;cv¼¯³#½]±gëÄRÏ·CZ4¡`[½îIñˆ±r•È8ôOJ@Ÿ_»²ýšœxÐ'ˆ23—Sݺn}#oÿÕ1*åXèilÅdæÂ%"áXøÿö“úéZ\Tûº‡¸³­5ú×xå¢:Žž²2³VûôØy/þãúyN? Yx¼òÎ}Kr+òªô´ª )ïe}÷FïáúÄ“°÷ÜÁ°8ù÷ eÈq_ÉÚüÅ™ÉÑëòD«§ü†¹¾þÎäZ¡JOùsÜÖày¸eÖí>¦$¦Ý¿‘´z.HäP‚íµ¡è|º Å*©•òÞ±X¼¢^·ð8iŠª¢3¤Ôo,èOöeãÁCè¶×_ï›Ûl÷=¹ü[Å*ß'œt-·ëko½T[,þëŽ"‰brÆq¸ô7AÛ.]~ãóÅ&ü@ˆ‰¾ÙF®ËíwÙý⺻Éz4Žý¥·iOtùÌ¢F›eT÷M•¢›OšÝRèÚ!<™ö&uß*ŸrÕf”´çëê¹ü…ô1À:† Á~; ¸š€ÖOå&pÙ¥â!"ÞYTcq{Úy£ÜŽšo¶²^ÈøÉBôäÙëí½Â #ÆçGµÛXÃ,-QK¯ù¢æËøëÚªJ`Y©e0<—²H`ÊKêåg†»cȱ}¡ëybíú `à|7JØ ŽcTŠX >‘¬0&$”*Íøä/`X@¸[Ü„ý7‚0’‚°ê¯°Õ7\’ûŠkΆñŠ_Qõšþ)*•µâ;úš·rôòÔ¾ö½`o?/$7bØcs©ìë¿–ïûä‘°¾ñ<ïxÅ ªÕ ²Â:ÍàVôà$p‰-u{øŠ©õwÇv//•¾Bñ°qýsRÉâAð2ÍF‡a€Ž!Ð=]›€ûªá8Uæ4ö¶ëS  Íß›!ÊQ§Èzþ»XbÀo8I»‡‡Rƒãô1‹ñTÛ/­]ôŽ˜‰C¼—}•湋M˃¾ Š¥¹D_Ûï5ÿ{+œ=HIžkdƧi)âD·ÆŸ[X…÷ïFಹ´f*F88RN&[•}I«¢è2ržÿ™Wžß!Ä£U"A`µŸ-J©‘;FÔ’Òz¶¦,ÐÎEþ<¼ž.ºHdeÙÇ JCy²[âSÑóFÜŽ¬GÄùy¿8‹rC€¢3Ë)™¯qh±ckì\x0œJ±Å9âiÌäùæÄjõ̇4¦‡ŒcÕžjÓZþÚ0>˜ ÞŽþ³:­+ ü°ð©»{™òê¹HLlpþ,tÖº¡cÞ)J$²•A®K¤fš°gìÏÖúmd}zrqEÚ“áKŒC>9 u;¨k¹ä<æ"Í2×ê ”¸3Ëh²áW”Vªä:k£’ó÷þ=Ò@Av¸ç*4ü ¹oÜ5íŽ&>ÖraÔÌ€ïâõ»Nµ‘F=IÑû—lP—Ü[LjÐtÑž¹>ÔÚc>O¡‰à- åô¾vü#Sâ{ñb(;úˆƒ2~ÌÕ—´PÖ ñŒ«óVY$¸Ó‚^ 2,þ'_ãýØ· õuìPâÜÁK¢û=O;½Á¦ŽñŽm@¹ÕTn+Y[z!aÌ©]3û1!0Ñ;¢®r¥ÜX "tšq/•A”16Jð:Mÿ¯Æµ¤‰/O¡Ö3g%ÑÔ@>í»^’&f©–í"1I­¢*Æ>\ù““¼“_–$·61bØ$[R{¦˜ÓĆ«H¦CójÿfLV ·£òY2Œòµ9õSß±ÿ”Òi¤0ö=ñ£r­+*±ÏÔ£#d~Å«Duš¢ÉÃâVÀÈz¶JÇÙ¶U/<þ£‹òÞù¤ö·TlK—‡’™ ÌD£Ý±ó4·mhQœ—FŸ³úüh:E¿öX½<ÎÏ`AAAE "-¨""/Fodž¼W""šêõ¶tF—UõÊCàæiÉ¥ŸR±†¨7ÁÈõfc÷t¯r)l¡ÿgÐÔÄsEß³¨]d̯’"oâÅ*–­ÁÛ…FÏŠµ*†c7lÒw¬“.åÒ—mÚö¡ßÓ#ýrþQ Áîq-:ɵŠ(à°bº˜®(n¤0™wnT¯h¼úö¦šû ¢¹ûxŸ4®\]=|îüînLí‡×Q­Ò[œoÎú\‡+¦øca8QŸ†„ªß“Z½ºA àSóF Y¬ú&aûªë7\ý!)«­ðš]ÉÛ& ø‰}†lˆ‹£¡ùdw¼ùpDt‘Ï/WP9n Hð,3¡Ë‹Ú;;s¥•”åóSa‘;.{BûûX7Û³uÖWÍÑ q)ó ’›1 þ²ùqSçÂÜè`z÷vá«û®Íâ.ÎþN\T„îÂgPò8þ‡±] G0ÖÂëU®+«”}¿ü*o8ǨCÏœ·N̰í§Bõæñ éâ4‘¨?°ÂÀg,Ú=\NS%³1MDdecIꮪNÈžÀ† VGrk$Ö«ô {ÉUÇæm:u|7øvÒÆ™Š6ûþôµž]Y@’v÷¨³ê €ß h<)xë‹-˜nÈ e)TnŠwÿZÝ%uNñÒ¿cé…–ñ¤m4¢t(rÿªó+z$füŽú©Ë,=Á ^-«û\¼û}aP³êIñÝ:Ÿ#`7»t!΃p`._ѰˆÁAÔ q»L—\G¡3/°¨Ï¾¼G\Û•O›½,¡g‰vÖ1Ñh=yy/©ÿH=ÜSL½ý!¤áûXî:# KéÙf×pôü8Ú‡(Ï#ØâÙ¶¥T¨÷Ũ‰ÉnH B°DˆÍ¨l²_{uW•mœÌ"ûrwè'ÕOy¦ö­ª(Èßi8M!űX æá=‚@¿ö21½‚í·²ƒåíKr®ººŽ%½ç‚ Ð@SóùØpK¯å:Ërç°–¿€‡sR‘Íjivåøò.]Ý%TÉ©"²;°sK9Gïæ§Í´W°ipˆ¨ ¤×cG}Ž[vÂIŠQh%Õö–A¼žŠÉ\ûÍœ{R¢'ËÐ*U¡üŽªÃup«‚éú*v‡XŸþ$Óºo R£²˜îöÔ§l“‘˨ß#d¿)tks½Pó“Rï ›?ÕŽ· &29_ÐöEÃ1žÌÞµþP¥„EŽª¦âkž¨M|TiîÒ=Ÿgôßø¹dÚTSÓøã¨›“૵Cë4-Ýõb‡c¥‘G{Þ77ùÜYJ“€aE0_o­í ÞxÑW<Îàœ;?;±”ÊÜö»-þ6Ù¼•²YŸ@""iXØæ* -B€÷óVÚɱ?ÌlÖåWgzßÍ?º°Î›Èé}º+ ãÍÝ«omœ›-oÑ·®«„›ÚbO™Ë‹ÝWúÛ©^‘Õ8)úøiøú…U¾1K7 FPð L'i‘K_Ê?žÆxÇ¿¹ /C´çßalÕS³šF?0 2Mˆ ¨ofX!-ƒø$¥ ĪÌ?½Fö™æÎD˜Ö½à„ úwÿbšWW/¹q>±Š?lŸ‡³‘1œ¶8t‚ Μòˆ‡ûyŠ—Õ0ˆ m™¢ê{SŒ£lI˜ÃÈÝ -Íh?è6<´J â—<târ–Ú‘<üçþÑ&ì`‘ \øÌBe°r´ã”´þêÛZ5þmÄ*º9|RQ7Ñ·ßjÓ‚·ãqhnv=SJÃqP!|Â#$J—üÑJ4;¿€@UaØD`}8­@cñ½ÅꆄÐL ‘_PzÜ-¦cF`6/U×+gM[¤¿Zá™lÆp+ÌŽTU2Ï—ÊßÙó4HÚnÄ>Á½ÊºµR¨ž›ð`QÚ <`aXý0iÅOƒ\Të Á.YÂD…7À¢DËÒë“ÌY í\OÛCà¯øœCÄø·6ÝLp)–ÓqDaf›™0Ž€©6宾x`0CùvÌÆ]i¹oJg¤õ+«¾ä2"•rñË¡ø„«FVnÔfŠ+“‘ #¿Õ|q¬ôsó ¼ÄªFí?íéé-]ò’ý>ذRçÇ ð¹ye±ÉWÞšP+h¢ã“ Ñ&XYémufñð]tXÿzS߇W_U+¦Õ Uü!G»LìCVç}«LK _G†Ñ?¼n˜ðÜÈ`Ãû¶HŸ|Ê"w…¾¶ôÊi"Ï×Â!Z7øz±ÿƒªìÿkÉÆv¸RGÛá*V9 “Æ›X^²¼ZóžñI†¡LSLúñ¢20h©™$*ÁÉ›Úìèû=®Þ’§ï'¾€Å¸s)ÀbO$¢_²Jr"ÄMc§UœpÊLïÝã7ô_,‹ ζZ÷¯P‡mh)Mò±>7ŸAㆢ£‡}C1BnC{n¶íKþ%œúå¼'ùy‘GÈPK}Ø=oAÖl¸@ "’"ò Œàj…:¬²ÿüy<ï¹ÀnˆÓ)HÅÈýWvöPØhê²v…àŽ5ßyÇÅD^óP"fT|¯ø4`z*¥Ñôë.ü¤G¯—ŽÞªè§?­—ûðf\\…Ñ]B¦{šçXs§óSQ£å¦4Íï,hããnê9ëNaz>»¼™õY·åp÷mÒ ò; „µ:ÅV;ÙþRÔ¤¶ ‚]0H šÛ *^šÜµß×Â}{÷_¤Z]ÏÊK#ë0 ?Š}ÄÏhŽýÌ1¥iáM%ÔáúXZ^ãLœPJC }µu‹–ÕD8¨߯û.RÓNÜžr+ç+÷»½Í$4L¦«fróç7~oYõFÂrþËÓm© ¼óaXîV¥8YÕ‰ ‰¡ýküFÔëOÎí¯¿RDT¸ïg^‘jèg5¶qÇvÚW8Ñœœ*ú°uCÓ)k¢¬týv˜Àè« ™05UŒûzäí[þHÞ2@ t‚ü]N‘¾áþDo”(½³Vh5(q:¡sh [o ô³A;ÆI[X´Ï9c×R±QIب¶¤")šlc¹¥F'»Xú¦@®Ò6¸˜=…wP³ N§þ©j¨*—ùd[¼s2:Ži|-åw3}"6:»1§e¡¦ª¿ˆ)rKÙu¾(ÕÆ1=Š+Kqg'%òÉbͰ´¶È¼"€;ùí1™t« J4 r‘~ÈÙ#¡çÑrgÒ|jß´Áj_Ð{­™Ã÷¦_Ië{ÉWªf%åЛ€ç‹ºõa†Ž0aø‰‘4Âí˜ÈJÒ;Ê©­ù$ãÓòiG3š;VyuŒþ`ÕúËŒ·ìñ5ˆ.½† 'ž¢óÎÚIÊk8̸²ùUY+7±–©ýOÈ…Q·1ÿÍÜFÚË1Ý“ÀyÆk™Qôó]§âF_Êäç[%6'\º¢yç\S3·ûCm ³Dn«bÉ8Ïc‹Õz?Ú ‚‹h`VE“pÈÍ™UÔGr…@¨ÁLCˋޓm\w9’x¾¿ÍÍŽ~&ǯëùZׂÇ?Ý@°)Ê ÀÊÖcÊifWM>gúx;²ÅÞ.»Tª¥!`˜ùúÎ R)@™à,†&£5PÕ²içD}moÚ5£o… §Ú^]å·êüˆ‹ÆYÖ~/LU5çþ/ºJÉEM4èškLguVøÆY X_óÔ“ÅUN$®>„¹\Þ#Ýzä ãN${Ô ·/ E¸ }XœßBêί 60{Œ4ÙöŸ«…¨ÝnÚ¤^(¿ 4û:'༾KJ(íÂ$&J9£Êb[£a­€°i¶×lž•a2íÄ–çñˆ±®üÏ*ÿÄs&9[nl>¦J,¦u ¨Ÿ£¥s÷þ6Jü=Xàt¢gH,Ìáo.¶} –ñ4´4ËÝ’æ'Dƒƒ›K ÎÒ ¶:†ñƒ?(ð§×ððÀQö¨÷?=6Òׯ-¿p‡’Ë_çì®|•õ(Òo®òX&Ç”oºÐ¾7°›/FšÏ®†¹Ç#|N|ÇDAß‘Û#T”¹:ÿ`âÜu|¬• óæíáZÒyï±bV’É?‚5¿E› ›E —à[ ’ÚJ×”LAÍo½ª¼ÃÕ2÷(Ýà¨)ÖXÝRf"Ùa½\ë8‡)î\å¼TMË¿âTUÓùawýú¿ôÒò/×®‘†…jýæ1-)°SþÚ—ÈjTCžb¯aÇkx—…Ó¬ N8ìÎp"- äwa~>¾ÓàZ¼Ñűnó=F©ØˆɲÀ¿ŠJÎ Œ+?^Gõ'Ÿ¾÷oËñ™èÚÛû»àÏ Ó÷>‹:„zýÉÑÉ:êŠñðà¯p¨€û¬2®Üáfum#õAÐó²fZ(GëçŸc&ée ÕS mŒh;ø¬–à_³þS[ƒû¨¾üëÆ HŒ¾À´ï”ŒX>ßôÄqQZÅϼš˜F#» =‚5дÞVò©eUwS Œ]þB!~,®aê\ü«µgY÷J†ŽPµL·Wm]#*¢@–o”¤ !1Ü{à_ΣÏIêhôÆî·;ïôÞ~¥¶êÖózØíQùuJ%àx¦šƒji j²[X\>Ûúkf!…ÚoL]âØR[uXžäi.š{Ð亘„WØ×à7WU×i` pÎÓ…LF/Ä ‘èÏ8qd¢+\#¼ÏPÍD@¼ý~%µ Òé·[!ûçúŒÕOдÇb‹÷ sØ`…3$ÁÄÓhƒô4áÈŠ\²=Û¼ªÿ=}z~œ!Þ @ñ6€]à+K–Áp-­þXÒìËÔÏõÈv²ØŸõn~(ƒwZØŸvV¹)UX†Ï>Ü C¢Þ€\¯Båf·jî òh§Q@XѰn´Šµ?ø_kLCv¦R@ð‚BÜO±[Å|-Er8sUŒ}G?%QÂév”ƒUè¢ðleÆ3´FÛ®ö®Nìv#NÕ ÇßsšéW`‚J*Èdc¾Ð}Ô< 3 úd! M²} ß/êÒ_Ç- ÃÊÍF2>J ëÞöÄ¢`ÊE¬†|§‰›«$ÈzÜŽt"ôJ»Šù§Ïhõ•¶Œns&*ž÷J÷êÓ¯rœú—è¿L§õÅ¿ 0 @ÙÑlÕ'¹R D^rDFDˆÁMa«.ÁÈþõÚí9ÛÈ›îEK„2¼£e€Ê–¯Ã½ Ü,©’7¤C’s_x1¤‘[uÑNJd 0Ÿ5×uÃP€y+ô£ÓK†}%ÃìøND+¼Qñuþ¾æG^…d§^3ùb`Oý*iÀ`„KÚÄÝæ[ñž ÕDÇ¢!ðáºDcϺÁVþI¼3µ5Ü1 9(ðg=zÞ ààpä"nÙ6ärJe?*LJîó£_h³^xó·# …p71݇l!L3( þU¿óq—…‚õÂûÍÙêo"r()Q6… ÂþVp@w^ù2‡O ' HMµ¯žÞf•s½îz]A½q—¤q°¢òRkÊë &¿¯B‹®ý˜Ü_Õb6Ó’t &>â]ÇZ&•¤1· ä<*sZæìÉgÞk¬ÀµIl‹¿w$Ž}t´¸šú´a2‚õ§ð‡$²÷£*’>Õ¾«\g2£B²ïÝ×?‚rŽª)ìÑZmŠþ„ëé?óf}Š­Œ›ØÄ4þ{üp¤+‘ãÖf£á0{Ënö’@ˆ%mÍü·,E©Ûwº>¶{Q‚%Ï ËEèxiÙˆ2@Ô_÷pÈ6æ'4vÌyQVÅtµÎl¸ªQÞ›úâ})Ú^jgÍ…uÈó»1+ê‡!gÿž]™a"ï°Lã;‘*5»L~¿¹²c©(¬=Ä+ê>y©›ÌDý¡Ù?[þ*‘@ ¯ÒÑh2keñ'†ci%奶þú’^mÓ·ûËДpìzìµéx6~v¬ð5 ÀÀ4*¼.âÂkÅøi#ØT€–Ãѱ 5 ‘Éa›¸Mà‡è~—Nê» 'ÙÒü!uu$ï¹—uKbÈlµ Uß›É>‹¹Ë.÷@µÂŽ?TeGÉ;üq[äÃpCwt4mbÐëê0.–lHMÍÆ×‡ç¢”<£äð\9m1*‡‡£ !8—W¹¦H§{<ÎWŸƒDž8+&•c÷ƒqÔq45]¯H 1W[ÅKâ~sVÄ#3@ó#D„ :5¨Þ›òʯðýj&ÒÍj< ~»\gó º¦™[$ák»Þì¬àµzJ.v{âG£ì¯ŸûQJrõåÌ‘±›1@±,gbýÄn⃠d4?§ÚÐý!üê³Z$ä¶¶RÛNE7š XëI•œ+' —|Zsüºxµ±ç~)Ú»A&L¸´~"±ñ~Т{Üo ÑêeÆbÏ1m˜sÔ ÷ö èü)\Þ“•¬ò!Ñ…ý¦KMCÒª}–èšêc 0`¯ЕŸl$ÃãåG‘ðÔ¾t PÔÎø@šš6JÍ ïëO°‡–& dé6Þ-ŒkJ<ÆB<:ãsöÑ@&–YÐÞy\¾ã›ušÜ& Ê)–l™\ë]Ö éï™\;OÔ“€ë"kYwÄð^ð›* ""@W^Zù©¨÷~4e ¬ú#ÍQGjÝ­^ì_+•]kj9¢C~J£Èà€mê;ÿN¢ £~û„9íÉäS¢~¿ùœ“›{á=sAˉ¼¡ /‰3¾™-‘@Æ|˜Rù¹Ÿ {ÚëÔ/ö¥qÈuA=@âÕ—¡Ó`üÉvB¡‹†„‹ ƒxP¼š,19Vܽs)X™(%bJî«Ú{…Åã {ñî5vˆß¬–1§(«3W?’GÊw:lTgÀúòta,Š/ÓòÈf‰ñQt<ñKãBÝŸn÷j/“»&„¬ñ0_½~ò•¦ Xm;\þ©‚»÷ºýG®“‹‹˜ƒÓùn›óde»ô›y})9>¯†­USD²»¤|Ç5'±¨oÞ¿õá½zͤl¥`¦J©Y.?<ƒ%0Mɲ·HÇtèŽM«Ò«Í  4q¡lÇjí_žŽi÷‡}ûeŸ)$ŒåkÞ‡gzð²áîCéQЊÛ,ß7BñõŸù’е£:Ûרá‘X=›Òp¨°Ù§oÇý½ÃË÷ù¼ Í4Çí²Í›,#–Dnâ+Ÿ÷âš¿ì€î@ˆ‹»§6û>#[Ó^clKÁÀmot½ü¿ÎÝmúÊßšË}ü åi|€ D@Læ_,]=ûnR¦ŒÛð}ùJ»èy–Êü”–{k­¡> ÝFÑl±5Wm{ ÀМht=…±€ˆŸÜÌe ¼;-ž¹énOJçJ<ûó ˜kµºtgƒë¸ÌGn(é•óª:²—^Ù-‹W§û²èúõŒp´¯K¸ïõR»Gý¿Ù@œ4³í$0a5‚‚†ÛÊЯÎÒì[ÝyöˆðS‰{ÌJž×†ÈãÊKª"®ªb8O„Ç~™<]2È^½ËIuõ*WüʼnԊo¢õzs ›S `R–4f+ñ[!×@Íè¹põÔ×Cö\îþ¸Æ•.7èÓ$îücéÄÛ´-é¤u¹šÜó¹t7{GÜ'ÍYZƵ £>“ö°í(>fÒE’™•`bèÿ*"Ψæé *bíçO3òMhÊ÷y–*:ê;$´ïŽ_mÞ@íìÎᔦ‡èÛ;­Ã%M*ï—õËãÿXõl°%jÕÌVú—2ÂÊP½÷/{Q†.åæªsFË“ýŠÙÐ ÒøkôM :§c>ÂZù_›ÎT!‚p`Èf E½R£Å4÷VG2ت­ö A€C”·n×AF±ÌÅBþkïsÀ_ÁÙ*ÀoœˆG¦ïƒYÚÛ®†È¯xž¿ZÈTH×µ»Ù§pßÁÄë¼Böío÷WhoJÀöCX=·ÁÉnƒÜ,æå|ïéÕ`~öEênƒØ²šcPdÐÀÃöwé=ÃÚÈ%ccÎÅ´˜¡“g9¦Ñ ®‰p6%°´l¯˜&%»ÜhwGú›äåKéïÁˆœÁJI42€g&غ­³Õ+\î>†®JqY—¼ŒÁ1T. ÓÚg…©­{u½ jI$m¥ÈìûÒ߀nrÐîN—Ì_ïZ ÆË¹ìúowÝaí’¥ä<íD6IVÅ ç¦e*iX$Å-Ƕ@ˆŠÚˆŠ¤)Fö€DrÁ"""* {Ï$ßH‡ì¯K„É"úR66'v¶@Ò\ªŒ„3|•Î ø¡vÄñ9ún¾¹n(³sS ¸ÞtŒºgKV"^ÑSh›ˆ†(Ý¡`N8Oå>®–¡Ìµm&Õäæã“ q«Ê4¾Qßi&´R£}CJhªNÿz«ºü×Ct`Eʦ;©o™©<ЙóÏÃ9RœÐ7 ©©ÍSÙÞ‰ËW"á\EZ݃|úȲšþ1•ZÐöþû±Õ,iUÙîæÓžîÕIÿÁí‰Ü™ó{¿øÇi&Ó|e‹GŠþ$ÜòJf\êõq`7ŽA$75Õô“{¢¶ìù>E>AȦàðeÏò¡ä(8¹xÌU6yÏ3•a{¥¿O^ƒ’3¸)³Ó/£®ñÅÐãZ,³Û³=2±”§i—*¥jÃ?KX(lÀÛáÅ>!“<â¯òhy*Ú›yCŠŽ( Å ““²úæœ=Þ?šü,î«ñ_÷Q!»7†ª­0µ>¦·»Xµ}d‘lçw^HäU|?8)XiÒÖŽ!ól?Šv]¼¹-wº1ym¹„§)¾òMû¸¾x¡£B2»üõ‡ c[‚t³‡ÑÿC†‚ð$ùÁßäÞEÚ‡®¦D”µ’»¹ßÞˆ¡iµ,zdãÄ2ã;"²6"8/Ø: ÛŽ 4·? ™“ëüüö{…N~Ûż h‰eNh{ÇO­_Ô¿àËuÓ];ËÜ_|¬”.‘ƒ„|èÊ}Ç¡€Ðf­û¸]Ùó§‘K>}M'Àõ>Çé´õ5Ó꽬Ç:‚SFZ=U9Tz´^‹[Ë3€O]ž ‹ˆmÎó™ß§ô1$ø‹ mak¥R‹3LBçðÓ#t¨m ÷=¼<¥’ûöß³6¢>r×Ã|“žs†çi °d­P¶…5¶^Ò»Ð6@IÑGü,jy[ß &zÿBÚŽ´L×§ï†ÉèøºHª>ü-¾*Œ½ÏÉx å.ú%ƒ$ƒ„bÊàïQÂJ¬‰þJ£_k_+·@ß_“@…àJ¤aö§ÝA¢)þ £FèÖjÅ'äV÷àl”@8™¤‰¯zåñæp­Ýå ®5UÍseMÌPˆ÷]„(€kÁÎAðâÛCÊíÖßß|ˆñŒZßi,ÌžÑHå‘"Ï"¬BQŸœ2UVkZÑs÷~²šû³ÿÌ DFDDDk öûKîÌr«]¬÷u§©Hý[§§íU¾‚Í×é̽Ö0ž zf©c¬TArB&_”Dܤ–]8Ο·°åãGãZczò“òIžGóD«÷šÔwcý»BÐæZ5^g}¤Ü>×>â:dz) êf±¸­¼.óãˆÍä´Fæ7'?w¦¶6Á¹c-XQ…D`þiáƒ!bÁÛ{R—ë(¤¶ j M»¸N·*? ²í~z3á‚ÍŒö¾º°ÞÃÏ•º“Üuo¦gH&‰3³Ò,k¥¤"øk»WÀiW1Yæ-žXIŽô3Z]ã¢x¥3æ‘…±QIä¾YxžÔ½ðÐTjó?ç+b9š\¼}2_‹èlÙ}Ðv3d,혱|D;Ä“:3'BÞü—#˜tE¡)„’XôË–¶þ0€0gٌ½Ñ2ÚÝ’uÐPpFnÈöÉüDÏoj°°¡^$¬=¬@™#´Ä掮+̦32QüŽ=ñî±çh"$ÅŒ§ ™"OŠŽ#æé;Á÷vÂÆ=»lØ»UÛÜyQðž@¬Ìzj(¾©¥{Ãgf:h€Uº=½ª™HkÞ×d—¾ ¯Ö$áèq0’Gá{ï$]b¹Iõ<œÆ€‚Wð~xµ+Ö¸üòl: îÑ…WOI=¹ì¯×¦ö3ª–±üô*"=ü¾åWâS‘(ˆËÁ()ž¸GL‹«ÏO¨ý›Cµyé·¢KënN9ì‚ÕW™«ã4r•(Ÿ6uùI ÔüNÂ:ä¾8ßµÞ ¶‡.š™sŠ]%C‘Wþäj/ÞÝlj$ŒÏd…ÅŽ·¢‘(Ïi×­89Ó&]Ù±˜ÊÕ†Ÿ8_(ßJrÃÃîDZ›Aýù1aŠT ó±<=s~›Þôjxº•5>¶¨è,Kªh4öš¯ŒN Îøó&üOÚ`0€ôë"ç@儌ÜýÁ’×^ºTéýN²£B¿çoù&zŒ½PÈ€ D@^N""ìÐ1!GÈ‹ü‹¾ Ì¦ x8,ˆì`—t(Ö/ûc×í>,’L‡ÝÄà@…36ïÇ<„´wâ±3-²~õa%y ¶. )š%BѲUʾ¼>%à÷ãi¾<½…±É¿&x™O¼ Á” (Ý[{ìÆ»“§>EAm°ð&AÿÀá ÅÛ¯§Xò,œT[&ÑëH,ævy·‡×SNKáˤ1ýEy¥±Ból›ÌZÝó—³­ÔG£ ®È@ñ„Œɵ88œÚÉÌHÕ£«YFJ^;›†@K¡†´—KXü>ØHê¿Z#ú ÔƒÑòfo‡¾˜R ^½G¡ö… ve‰ÇHô>C…F³¢Ä¬OR¶«S%™Ñ-ˆÌÚBÖ»y–ž1¿»Ý>ÓÄ­ÆN?ùMäŸü µ^ü–•Þ!×i©rI …û'̵hÿYÁ-œ·ºiטj®–-C'º‡ßzûœŸÛpšÎ…ýo}B9ªììn Ã²{õO¤»Ytƒ ´¼ӥÈÒìûe½v Ærûøèúiv/ô¦o!Þû®hD[\¤c ·-—ä6""¾Õn°) ¹âÅ’ß„'}p_}´¬Y0‹ mXËo‹Œß=·vV–âöøJf”…ÑÉ~†ºjï4þy=µÇóŠ©aÏÊ™5_Åý÷â‡0 9zèÝúq€3Ìì³eÍêi3<Ï$ }“!úxçm,ï›Ç72ìGFVŸ¹ExÌ‹nH!QE¿Â fJ+ýàcÖœ;2¬ Râ"€ñ©Ÿ`»¤Õ.œMÐ$­’®b=¬Òe[Wÿä³H´ít—V¤Lë¨äÌ´qd¤ô}Tåí]N!)5ä /aQ˜ê¤¬ƒßF»¹®B·YX4ÁÞË’Q%{œNÎà……° $ZZE¿5·þ`®p¢±|ùÔ6¾Æ§Ì¶7w<3}&Ó>ÜD°«®kŸÒSKïU@I5<:°ŠÖ†×ÓVuµ´" ë–?j 21D$ËÔç¿É‚3P¬4êHʃ«> ŸÁ³­f=c#®¹–tNjRåûÛ ÷÷г hѯàÃ(¶ÞÖc9rdgª÷Ÿ?F² ±ªù] ©@h ž±ó†¨ÈFv½ˆ²ÔA”Wý+¡Á æ¼´âJmÅñðõ4» ‚cÑjåM±ƒm++8*¯ˆ‘çðAJÈ‘ëºõô͉ ÜäÝ;ýa“ß’MªGöÕ„jBÄM@UÍÑ£rÇ!ùˆ,5ù‡zå±hšöÉh±á@°¿Ê©»Ÿj•%¯èþMœyæs“O†[ ­·ß“ÀY—ÝFfòÄÖјöˆÍ)Y“SwL[ /€Þªh­`:|y®¾=j¦Å ›@­ÎGQ6œVu½;ÖÔÂ÷ñå_ «Èî]:;ûª/WÊ?,¨ìEVªú¢— ¢I†•u{aü6¿/j]ÃB÷cUɽ¶ fSÁ=õФäþº¸€1ع% S‹_/žDNõ&¡Ï£*-ówÂ\‰Øzz„gR235½_$?>uŸ‰Z•î±~½uØÃºžŸóVËU H˜c‹™$† ~ã(f‚ùÿFß÷ÅÓ²'nÆÓ`¥ÆyäN¼þ>Áõåá¾g§9Y]\ ¦@1@d½²êªÊá/ÌÍØ*2JóB-Si£Ûä¶Ó'/¢ý¶jeVý †,™ï²aw=žúx8ù«DFó^H»Ã “ÊÒlóšç7}—%:Öri ÙÓèDwÁG() ü¤†zž[³8¯qd&‡@ Á,ÛIÁyýÑ´R—lu„@n¶ˆ".p""$Ej0KDD¯ÀÓ?V, ¶»vl’»¢àÌ(_Dj¹ йˆôâ©\K¾þľÛžÁO×Na6¹˜ðo;žü5œ†ãäVL”ÏfAß)«šþÓ¬Uƒ>ZNrqÐY;›&æuÞ‰¼T¥<®˜äœ4wü²XcMÑ[Ÿ'«¿1±‰(ûº–ËâM¼øÊc©ÇûH©`YñHŠçy:ä;tH  ëVIÊjs†zúƒYÇc,Fbñ,§ÓÀ(¤ˆÃFÖcú2шº/° \Ä1†k{ñVX°Y'*õü£Xã†5ì‡ÚW#Ô…vÌ 0Ê&ƒø¼(W•``çI*HåÚwl¾@^%} ^=©oGË!;Ö¬mþMʵ^­¼~‰ºKŸ ÛàOd”Ü'é`~ÉŸ0¤Ìöv  4|]܇sl»p‡îzqQ/“úAM¿K-H=_¸ÛàsT)]9( ‹>[ýgå´]àý Pc/TÚ_â œgíœÓ¦Ó™pñ¿9C„þs¬Èbì¹3PYÕf<×íïil·%ÒžvÙ_hZj+¿¼mq¿E2ÐWRLÈ gÏžîÐõu ˆœt~ßGÅÜ<×ß)_…ÂT=^ç›}ö×ídtéγf;fUž¾A&+¡j_j½¿þ Þè¿ôG Ž’èi}˜÷¤aj âvØÈT}ò¸¦*eài’+ì—ïoˆom¯Ó˜Œ1Ùz>A+âU¦„I¦L毇áiAB?xO¼à`AûrDÇ¡eö|yrêvFæe‹ï¦1÷ªØƒ¾“::ž™O8â¬gµ~$06 z,?r ¸Ä:‘»q>||8ÅL€€Ù&m$1ì-T9~–Cæ)̬Y†fõ&L[:J[{æÿ¯“»7K#2}Åšh)ÝF—ùta€JhÙx;ܹC»¤}¯?È@ILj܅@rýG6¬Ö÷Ž ö #+ï ºgÞ˱™Ù·á»aˆœæ"i„UL.¦¦Öb<2ñw×”XV¸,ÒtÐ\R¬:޲qõg Ô|{p³ÆüÃŽ*Ð<=Æ›ç]|>rÏX÷ßëk¯´éçŽ[*PÐ;sgˆ~wëbð¸ï˜é«L>Û¹~³~94),(‘çdz¥"§øÑ]Û¨®•k#ªP!ŠuUî=ÖÏUŽdäÎkÀ\¤î ÑƒEëV”5÷Ï6Ü~É8Ê')ô:Í¿¤ãP)c—^H U ?d7a;rR¿,ÂÁÒkl±±ø"ìê‚2û›k‰óË%Œ¼*K¦¯ˆš¾Þäyx v1ï·60§úˆD½P"fGó&íîÃ)ÄçH`ò¨“}‹?¶Y«ªoQ»[³Lªÿôî«24›ä¯ÒK£ôäÛzùðBÉH— ¡ÉK…íÿ¿¦¸¹aiÕDV64#N,¼1,Y%~7! ¥B£‹äÓ¹yµ„aÖX=µ†c]ñàd¦™m½H¥úáè·cÓÖ`6ä9åU€ÒÝèÀ´Jý.'¬f¯ž´†§Îk»sr*2¦%ÿ[›ù°­äuå¾¶D»1ŠóØÒù§𦩢o¢Dî®,=PŸ¡LÍ{Š;çc»Á¯œ ôX&$ÿE¹¥O®5 û,ÀœA›X¬(=^tb uYN@¾·¡T¾N ‚9° ß\5ºñÒŸ‡õ"tº¨½  ãÃ{ýé*ZfÿúÌ»U‡uádÛ<Ù%¦±Nô¦½v ŒËjHKr³oûÖѨT”ÄÆãUÝçŒÒÉhŠák¦£ÖÔâÇÞ9Y…$ð–ìI;Iû›Ì||˜€D€&¡}ãÂêP¦«xº‡5êè ÿ³©Fµž»ãõ`¦¼ÂU$½1O±hj !‡˜›õ÷CO²~ßñ¤±ä›´Ž™æŸ™štÝá©êJK/ÇÖü¼\íÇ×oA曳Ë\0Ü“fá¶0-n¹nó˜ò£…?¸º¾óô‚kô•ùPƒî½V²`ùGX ‘à‡¼Ámu*=}Qùñ¢vÒ9éR^Ä«{ö-´L÷<úDÁóö;×ÌoüƒûoÒj±W^½]” Çù¢ŽÒ†Ü°Ë„M¢d¶­”RŽÙ’ ¡-Éíé þw¢ô¡¬7Â{C»b#§{•?Ûg‡ÛÔAwËÁÍf7¼ùãȾu0“T\ç=?cänT8§eàY[­åIœ=ÚÍòÄNí᳊OO¯&`ªJ-ÒM±g³£YÄM³R2@Ôj‡…:³».b‰ÃÀÌ•; ù&ÀøþÍÅ‘%¹rÉY6уÑ~5¤l6G‹ûÓ|—ꢊ'Êÿζ –µv•õú / ¼õÝ^h2EÕš ÔÁ¥DÞÏñáâ~oîÕÔÞNo:!iðI ¡,ëhu¾?ãpúÎ?_ŸM³=þêmÖs¨^ ~³6ö¬Y­yÕO¶Æ¬¯1o—„Ëhœ¨Ê›J9QŸ ý¹¨""-~¹Î¡ïl„]ý8&`!VËL髚íL™ã¦£q’¾'Ø#DÔµÿÅQ“éöóª >Ü®¦'<¶mc%òì%›ûEí>î´Å‘CÌ“-)Øç³”ºíÈöÜ„ÊRÕÞ¬ˆ+ºÁ HQQè݇Z¶œÄl‰~WãC\ç8µÛ8Ë/B öB.`R(4«ÙÑO-˜Ö°Kœš£T ëOßµi¡ªP‰Þ½5Ó2<ôÙ›E$yþps™CÖOè8 q»ßCgç]H6.wK£6ÞÀ¡p°PCd«C¸˜†À€ÇœõCÁËÀ‘fâÿ?°LÕ{uÁõ¬ÊDÍ(™{e{‘ Q¤úô´þ;d‰•Øõeãø¾ômVŸy ŸlcÙrÇU­[ÿOP[Òli«Ýßsï‹AL¬ a^Y»jÝôÇvÝIZ6cž ýNðyã +YhPòËëQKÍœ—ñôÕ>~#ï´çr@YQ€ÿ<—›˜½f.âšYç:gÎD¾ (8[AûËS˜ktÕjîåñ†6è_;À·|îêÌO ÐÁþÇP±+YŸ½¿³?Øõ% ËÁ½Ú˜3Þ¥pð º5×ßsð(~\®ÕøëU+ù|ÎÜéSã^?zTÞÀ1å æ† –Ô$ èLYà+Ú£ Ä}â#ú4Ð)IÞp¤]S{“hgoYG°¤Þ--ÙüÑVµžÂáΡc³#¦Ùsõú5ãÀ„°HÛkî #ÎØ1®]í°²üÑ]5àñ}gB*+oÿ02™Hufqб¤`»O9•¿³«Ð] åni ‰žTÈùWÝ°õâ׳={ÛòCàa¸îØŽ¿Ó½çP¦ô¯Ô÷gÞ0•ÝФRøúÐ?p4Î'³äS,ïmV¿% “í‚H#6Ôtó¯ýƒ¿¾ÕZ–§h!”)œŽáV¨Èß,»´QvsáŠ\CEú†A{ût?ðäs•-âïe°ÛŠˆŒÓä¸+ì$#S+‚ é0Úë¼ÆËH[ó¡s…Q¸(+‡.(˜Óðç(ÊìpVõÐ3±oÚ#å ¦»# dª}ã=ŠGªš¯½žâÃöÿfÃfMÍß_Zi?fÍEê>·Vé×îÞ´2 ÷â/m‚6Û#ǘ¹sWCçàåùÕ5ÃwHèHÄŒƒ_üÛnPöšW®ÇnéaKÕÉŠ{ItygõþPã½.a'ðfT-ü¦†‰¶ØV–ƒ‚›¾éyØ2Ä5Åe…åN‡ðbZX€Î«_Ê‚;åmЄGàˆ(Ú87·«*Xè®;û$nBÌÐý†GnvÆÝ,d÷Ë ?г÷1% %‰ §Ø#7KícçCݰ7ÿU脟ò®o‘õóä7Ÿ¦Î.ëÊ DEŠà?@¹I@ÊA>+”ä#—Él@(ž^´ë„Qø#Åp=!2¼, =nºùf÷Ä 2…Œ‘9£ Ý mÑùÃ&œÜoÒ˜÷BËõVTK^ÏW}ÝìuדšG"™òIfîÆŒØ:|6pÅ6¬@öD5]ˆâÙ5ÑWå¼ ûÎÂÒNXÉ‘í_‡‡y¡€i«’VÈ ˆ$¨S\Þn^ÈödDÚA(»uÿÍ" \±¶ôÅsŽ®óN^°¨*ý-ƒoâ¦ðN¸òZóix›ò(×Oh°(RŽùˬžVu·7[ lHªR…ÑÞÃÅM6¬\Œgu¼n™~¼h àŽYLÜ,¾ök^p€(d`G{dx¸–W*üMI“¬³ž1W­ØE™¾èMÿDzDDäa‘T V2Î~²±ía®X 9yžgúh¥²ÃŒ&R>ãniÀ¾÷”÷QŸ“2xÒœËc?Þåò—èO0¤ÜŸ‰ƒ8%ù®pˆ *‰þˆYîÝby2w‡WQ ŽÎÕFŠԦϾüŽSÞÃÄøfFC4æLûýƒXÞã¢23xÆXh 9éÑ17ç-ëv…“¸ÅÄ$2[ ÜQŽ5vú}3§ã×KÈÉ7„^ EÚyá¡ëöx"fScD*ý—æ\8ÇïÒÖδ³¼(H¡Ï^·ðË.“»Q”¯G›­± m™eùÅú<Ã:ú&äðÖÕó×f<>d¥G»žò¹›âÀßáVÉôc{7,iÞ¶ÎÏÆªµo§u^˜†Æžs7Ásn“1Ïùæ¬XÅÔqFÿ—9£Ëiêµv‘^Ͳu#Ì‘‘øoÉL¾b1aâØÖ¾âbO0š_.Ñ~AË›»çÑfÏýg›é‡\âwóEÁÎÒ/6ìÎ_-Zþö|î5O‡c‹""ˆ€µmßx–ø9Ök \‹{Íü2Œ9ì¬|Å7¤Ÿd'[-¿Ù}ÖI"+ËÝ>ý fù²µü`*fû¶ºRØ6lùqì£a`gStÀ'~ó5èeû•„%úi/m+ú ‹Ñ2oî½Qñx3X›]Æ,ŒæzúŸ{þž…áBÖ&¦{6Î(tJžG5È| ¯ėJ„ÜÞÆ¼Áò±¡UêU n“ç•%«ÜîŠÞ?ý¯ß˽u[3ñ•·]w†´–¬\â7Å03`¨ö0ÉýÌhK)Ó•ruD*û6ÿ|cf¦VJŠ$g®È¹t €¦.lR™yZŸ4 2;hƒÚÛ§ÎößWGÑÿuG!Õ¶>K´¨,™vsúü»W:¯¢X®Ë%K‚_gèsæ ^,U@œÞévB‡Y9“„• ‹y³p·0b@S˜è¢ˆwW1ˆíaçbÅ…ªcÒD!²¬6¤â,ð Q…j1X ®”OÖeIê=넾]™ äÔ<Ùü_¦8<§Mωs†ôš¼Ct«í»ÂQ$ùßõֻɛbÞÔ€¾ÿ”¼j7ëÖÞô9Ï ª/²íØÆ½82;[£ªú0)F©¾ö–?ayÄúñv¼^sö(—@ðY÷Äæ¹‹;°ä{D/¦|äÑ\±dŽCGƒCLIá~,¡U‘n Uö K ,YÍ(0QˆÐ¬º²ÑÈ€³µQ(¯ÒxZkvSZD36N ÏÄxàÈ"VQ ‚’öõ¸’~vÝÀè±_!˜uÐ ö$7l)šæ ›ü9E_Súï»Áhk¯uÖš3Ðæ(ÐûkœBûÚâÇØôˆQÀAŠrþü”aÈÜéð–z€ëâO4¾ßö«¿SÐL‹ÚýÞ´DW ȼLÎZÓ0î ÓkzakåíaIS9ÔmËŽç©/l‡±Õ§p;ož…©\–`$©Ž R¾X=Õ' úç“iìüý:Y;Êé=ΫYõq ê×.3HŠ!%ó}š¶æÿài½€û>9á ”Â#à !É‹*í˜YúKwQEWaû鉼 .7F(1ÎŽØ/€Wèu‹õUÐIB,ãâr¸ÏÏ´ËÉwÚ—Ä:#0Ë7# +é”Ǡ딦‹Vé+f• –iA_'ì«2Ädª†€Ôž.2œD?D¿´gT²`x[è¿¿}ê?˜ e7ïpKA€ fÉÂX‰m\é,SúŠ0A`A Ÿ 9€œÍãáä™á9F¸V„›KZJƒ’á±È\9 !1.{ðxhSÊ¢w ¹òº®8â õ/ò‚åú“íÎ÷4¶Í¤Õ“ñ¤÷NŠ"FÔoè[RPîvðóú;­ú¿“¤“bŸ2Õý¡ÅåÓ§ÓßáHßgž+ ËIUšáû¾_ñëÑŸûlbVu" 1ñÐ|çܘ7.>6m°J¨ÝmBŒ uø–¥e4€ ¹p¹|·Ùº¥;f{k’ƒ~÷ÂØ FúýxwÎÉæm8=®ÛìDDÑ5zMEÇ g®J׬þìÔ×X@´5C<æ÷ˆÓ7³×çµbÔᚇ©•C›šI‚z0;ÒêµH`Þÿ­&&ÅŒýÚ`! \»~ö)ûãšB30ó ©²¿ÞW ŒEq:ËD“PóÆbp*†oÇ[´“ çd§Ê×'À©Ûj猬 ]$þ ÓA•@úq|“¾Ú»âAÛ™xo«*ö¤ì^’¤m¡KÐ(Kq©BTØÿº:dåËO—ЄA¿n`42VÅ/Öüb;÷À:PRð×½$ À™ˆ Ru¢)áÂ)×í²_˜—͇5®Á2³¹õLY°–ÜFÿJЙ< ‡;;l;eÃ&p]ñÍF¢Ïjź×Ý)ûª¹f~&ö5æßöÙ:ôZÏcâó¥Ùv^RÚÈàÛNV™¨K¾}51ne¨TÃÜ=q׊…äÅVã œ#8páh4]¯4ÐÒN:’¡ÖŸ¢qß·Tƒ¬±$ÅþUîþٸǞÖÅ‹RÝT Ÿé·ö¯toQåÐÊf§iîyîõljJXuä4È•TêÃ[w"¶X2@»¯Ê‘0OVãr¼#>0zqmÎw“ƒäò>©q°SP+©m•‡fèeUß!¶†¨(‚ʈÀ•f¦*ÃôjjJšƒÑLFpãgëãÇˉèú@ulèý8dö¨¦*F(‚d*žr‚ZÔzÖ…2ý(¯:šE¯Æi,‘¸UP­–ÂŽÄ_ê Kè!aF¿MHã;í“_¯Qg¥Ãj6©÷Qmñ2ÿ~D|çO?±ùsï•®#»ýUj˜¬¼ F[ìSDÍÜ,¦ü}..o1bÑ—€‡~[Žaï›Ç¥d»üK]Ÿ?¾9Ç®ÍæˆßÕï#!©®§¸ÿgž¨ò¹×ö]¸ï?i¾2s"bÓzF!ŒtÄ`%ñÈŽ©0#ÆC‹À³ÝVŸ %>§£Âëfen¡ÌšI—ru ˆ³_¨FðhåqÔ)žœÀs< ‘b1äwþ8Ó¥¢>™]•];çÀvï>Ù…š;ÁLF!i k ø "9‘ ¾ÜU$¿NÄLœðg•þÉâ÷%%O˜]ÐFJ‚Šä­î¢ÅË©"Œ­·°äÆn¹XInNð{Óç§ü÷âU±ßzã+oZi¿¥ØWyó³Ðá¤ÑvA‹»%ÍŸprÃS¬*¹bØrÅ(ú½Àøá2ü˜ÃápEÚfx°}xGdž9ËJ³ÖÑ|.6[W¥£lK¿6•8£yÃËîG©ÍcºQ<Á…ÓáZ&¶783HiÎY}¹É¡Õ¾ÝXí²WÛ¾â¿À6šæ½ß±{XëØj†IÏ>»Ÿpä$Üj¥AN á€t ˆ7¥UWtÔø£9P¾ò+>s •BHéÇì0á(¿WŸnÿÉÇô­N#Ä9 Vy9bnšãxe©ÅßM:v$Œ÷÷˜"Ñ)âU[¿›[Aã` ³cLÜZ2ýÏô“jÖ à~D© ռЬnpà#7ø¶PkUQ"—ëá'Ò4#©àz0¦Mële—VÅ¿ˆÂ¦°7‹èˆÍQkÛ,”Ü;õ«Ø”æÁÒ)MÖûXd^iû©BfŠ~ü£1› °±'5=Ý[Ì×/{Öã‡H×ÈT·˜uJU$-“Óø¤ë)[Ì]}Ý*A$¡ • \ß°T3Cæð’¡/ïB‰mÁýûK Ô”Þ”®TÙîÀ¶‰•ÞH£VmK yZ€]XáNõäÞM2¡[×Ð×{Iíâ¢EïÙ®þekÑ—nD°É§]‹d¿bÙ]~8'ÅC“ ùÇŒ)^Zêó$äìóJ6f¡†¦3[Lö«Úœ ,–áÊ“°Ñ*’L±‘.kßc©Z‘Q5XU""xpݼÅ@Q”BpŠ;˜Q¬hzÎ1%m²¯XšÅ²†aÿ¸É€ ƒ´VÈT¹Ôáv2áZ4…¾=Ÿ•«‘íž¿­QŸ bg[áD¼5ë·Wé¯4a¶-E §6e‚ü`ÀË@maØž’Á»ÅŽ÷ÉB$°FÂbŸÓ¬ñºA>Y _ÓëËè%¼mñþ,:~õhÊ·õéЂ£¬#Óµ{%«t\¨YËÎñ è€ Ûá@rk½¼"±0ì®péÌ”<ŸÆÌ€KÜfƒþæ;îöTXÎÓf‹%çÆáù kþuoç>ÅHÛrút×âÐÏŠŠ`ý|˜X—$³% ŽuÚíϰ/5<#³¤ã‡3Ñ`?=‡þ3-´è‚ìZtJÐ3ÅY1šT•VŠz°OŒÜá‰Ó‚½†õZÛÎÿ^9ÒGMT}Uǰ7nÎ"ˆ‚ªÌÒÈE~P™ü? $Æ~^ ð6r}Ì>†-ÎÖ}…‰ö™t^qù°¡Ò=F ,TÌŽOíuóC 7I ÓŽÖ L–ÞýAòóê+HjÝÑÑß(uc¥ÚÅ÷e‰B4|ÿ“ªì±­ŸùübP0?røÆCaÙW›B$ Ñ]/Räyr³–R¿FÙ¤Jºò¹œö¯Ø˜²@ËÃ,Šw^N¤tú¨õœÄ“€³Aö.¨û‚5÷}Y¨,_‡gŸE©Á¼Ð;¿Á¨^¢ÛtúSþ&z÷}œ •¼‰Õ˜•+0&Žl}.-` oÅÚÿ4¥fè¹Îoµ‡SÓàwàm²‡å¶J.ÚŸ8&\çÙÚüUÎKßúãÎÙÀ¨•²0xf¹¨ßÚ «€hr ·âÐüJ5@<((À…ÔËø¶ëßN§Æ‘LÓÚ;<ªštÔÔ Ÿr>Xz½s9¯cu^‰®8v£˜H˜<Ùö7á»¶5„ŽÂ2 þ¸ÏîRœcå2ç¼(Mï^ˆñ˜íòãÅ ‘}վɞè.QÈ@R‡ÔùQvs®Ë.T%Ü/°ô¹ÿ9r$Š’vÒ9=J žÅ¸™«g>°`ÞÇа Jˆþ‡¶ôƒ%Ï K‡xÑä tlËÆj×Ùuü ÷‹º—Ìq ôD´ÅÖ —G76_é5=ÿk´IϧN–0˜m·wcW¨=ñ'ê‚ ì‰úçŸ\5}²{=ƒ‹¯19]å. 4k÷óú"h&T‡c" (zUMö„ðFä§õär½Ð»^³b A¡ºÇ&È›Éor>hÔYˆ q§¡1‰I­›x‹·ìúýRš‡A}pj»š1ëzú]k¦03‰;íù"½÷"8ÿˆÕ00n¦gxaì™M¤.rç&ˆ˜rtt"ÌûoæKbŒkZTÍ•è5ñn² 9Çijÿ0-\îÒ[üþ§3Ê‹ª‹P¨ìÌóñ ñ‚šS°×B¯LÎ9Ç—³qì¥+41%ƒìaS}2sqò„Df´ÿ$;/!™ Τ’ðVbffçq׊ӞWa"ܪæKÀSj•â¶Ä ÃCPÊI©vs{Š9^ûS üÝ£‚©rF©‡ ÎîFq¡{Y ñô†ðüBH¾Þ{D0Æ–ØÔSþ^ w+ãI¿>‘à¾õ@œü‚ûÉúUåÎU™Qû¶éUä¡’,Œ´‘Nº0þ"ÕÜf×9|[Ÿ´§4nÂǵŸµ×™¥…‚ÔÉôR¹KÔ–ÇϜ޵kY|áÅ@1µà5üZËYûi]‡MÔ?µAÕ| yIгòÍÒ;ãSXà$í¾>IM^Rï>Cþ³ÃöwWúêfRägu§ÒÊ—<šdUqÖâî¡o襊|'%†xGGMŽ«å—ó ¤IIG’ü+(<練Ûñµs…2ã‰yÎ8Y¡’šõÏÍ'iÒ©îj7,ú£›rψ6ÖSvCyèl{™‚‡@ü8ð—B¿.?-äI;D± z=!M2ðcãíÖ N¯‚Qy¹ÖÁ]½Þh(¢l‡ï‰ê” dØíøÆAþ‚sLJVïÞx® ­ ŸðŸF¡ìF¹ö<À㣤e§Þ™‰—®ä{ŒÛÁGg]4|qΠ½žÍDj~¯ƒ³ïd¥°“âY+óÁHÍ« ÂЈEsA4bý=ï™bÎ×ÂÒ§óÚÿúÝ{1Î!huÍ'/NqÿéÖgN­ ’û³xHî)¿ËÑà ½ÔYv@Rôg¼Ô_•Û„«Hܘƚå1Æ/€ `¥OÄ7‹²<ÌÎB2=G,OYìtêdK åÜ⇬6^÷W´1Ü×ªŽØ¨~U»³š…xáWç%ò>&Òîû·ŽA£ ŸËbeÓ½è+þSgÃ>»v{x­Ì‚ k (WqISïaºÂU õèœ}£æ™õïídP>MA“#åt}ÒíÆTÄŽ4/O¸!MF¿º\Œ¯ÖÈ”±MškR÷Gì÷2œ:,¢Ç+±«Rª1–9žh¦)|¹]I¬kLþ¦wTÖ“ìÔþ~6]3V~u!ƒ2îEú¾ÜüÐOa—zÑV¾èÛI´6»ëþÖÙØpÅ€ÿŒˆƒÅõÓª"›Šë¤]ð•:‘²ý¾ØÀ"ƒã3ü$P?ŒP—B¨àÙ  @êÍš×ûá÷ž´ß7Ù’Ëë:ê!%§‰L¬³´×‘¾–ýö´¾»‡¿oüg_€ûÖtíŸÚóLò[¶sÃs·!:ä>^ û©ÕþøÒŽÕ„!?}åJ.[ªþ³†Š {›¯‘ƒBbûê˜(% ·¤Ñ£€6&BúŒ…/§Î©~ÔôÝ]`•Bƒ~+º×óÅsëBrlѱc¾h2Ú Étû+S½ù™ßûª5̘°@[³èP‘r~cdêâϺq;Åãð‘Þ¹ï:ŠIçD/}ÂÕò×i&ê㯟Éf¡Ü]CË,dXu ¿Ì„ÝöÓø²€’Ä_ž‡ ÷õ#µä ùÛ!âxºKбueEö}Ãð”Ù缽ȺkÒ0}¸`•·Ê¥eÇ\phŒÖœ4 «p£ëè(²ÿíaC2à]™Dk­–&h)O/ÅÒ4§sóåâsÛMk¾)¨³ž)ƒOþÀ?PƬƒÔ')ÑêÕïë£hŒd;*p/x ,)\Mæ Ž+J zd1u@ˆ‚¾˜…ô»VG=й[àC1Lò—©7áteì&Íó ÿé5ñL-ˆlŠL`!©?ô(ªV¾GѵG;¡°›Ÿ:xÑŠ~Šü¤à0 „À€-ªD!€¨¢®ÕÙí ùsó,æ°Ç²u£r•6 d¿ø~¤$$£†5è®$¯!R®}¶°ŠÐ[^]/ögûÀ@½¦îFßYëùF³BÏÖãúYV¤—¹«;–½]Ôl[G„bìŒp †5ªÓp}?Ãr˜S%ü2|Óg ]µ%¯7½ú.X§ki¶Ú½Òªf$L°[]ê(y…G_fy'Ф)Æv‚^co(®x7®AÑÎÃ-?ש“éšDýZb[ê•9QþHÖ³†“ÎÉp?ú³s ù/Ï¿ÇÐ"×}ØB2XGú?™cQ°c;ýS°3Öm©bˆ“h†pÖóˆäÛ¢@ƒ=ó‡\@(ÙƒœabÕ-1ôa—âjŸ[Ïð(é~ˆJ^߉”üT•o}rL[‰ø~Ô¦öA?ZÎŒk»¦O²RÚ ”Ó$ÌÉÑÜ[ò5ÇþD±n«E‹mŒ2Pþ¸yW_ÒF ?å„yU; ,µK\G©ù§‘ñ/ö6OÕÚ ‰lÇõCÞªØÛkVš”¦FÓE¼_6vÌrÀµxmå‡ôL‡=aQb!”|D¢H²¡Ñ2I­ÅU^”·š¬/Õûf\ƧÖè]Íý=ÑòW¦el©·áÌ[”ê+ëG>•q?§XyøM‚hÂþòXö1'¡ßÍ~¯2-Ua¥Û’u‹…zõ“Šäp2%Õ0·ÇËyÎê(j”A10´ã¯åï`&Ôw½2T^Ûˆ¼xDBW#²|ɬ0&Jéaú´-,öãµ%ðaU4Ή„⢱—¦GÊ¡ÝÅ>3°}’Qt ¿Ø53†BIÈ;˜V¼ˆ<”½rí¿YŽbðгc¼:"Áèчq8â{ݯØÕz_ãàŽª*£¸;óQ ß™XrÅLxHäpÿè(& y£œï¨ÆÙ—Er—ŒfO° ¹¯sM̾‚’f:¼6ý᥃qû&MWØZš?ë¢öFŠ¿ói*|.:2õxSÏh¸ó@JÎ?³c'¡{‚Rš_|˜Œ·³í5:Áq@ïh-ãôŠä4…—¡šÁÇäöÏvHëŒÊ^ }ë5ÎÛ%6rU¤[ÙY¶‡£š ÏkþŽ’KúA”˜´ø”}Šjº-‹_=µ<<§Á 5.ŸRrÓÆ»Xò®7«iÞÊ«ï#ž¦6†q¦G؈UŽˆœv‘“Œå¹µ ‘[¯~UJA3Æè¥É†?Z£~¼ñ'[ÆZÝMk• 4HkQ½%TÀ¹ƒäëdw[Õ.bIÿkèJröZ¤(DéݺæŠ1µ½ uocÌô‹Mz£‚dDÂøùž`f*QªTT¾ü€Òp^Ë ê…3ŒùŠØz 2Ü%´,ôæËÅíQt$ÏüâÀ×»4ìzàÃð϶ӵV= ÐvAï®Âwß{DTøS׸kG£ˆ´Pÿa (rBk1е"ˆ(ô é§f·ýæB®S2»„T×VðÌ••y4{ (Å hÕ¤Zòšui>œ`@hX¿~9D€¹{Œ‘>“òmö>½°ðeð<’E¿Ûòu@ ˜/ aþ(ÛVì•}ËŸ(ÿ&jl+Ÿ”}®Éß ‘(ÆàáH l¿MÅrõ£or/ànH›cµ›’‚%^̲ÕH僌XjÜ.–7>Œ{ß&ÌZ0Õñäè˜28Š>³nuÁ®„~5ÖÃÄT± 7ÂXSô–~îó–p›ÅV/º§†BØo§Í6GBáãê~æ*xäýi—áØµ,êñïeöÖúôŸŸ÷k²•Sœ•âü«ëb×F|a³sëR*sÊ:ÅHQ2á„^odâ ÄûTr»‡Äª>×àéJ–77Û0EqÍ •ÿ€¾X\÷V:su“¶5€"çØ±èÿ泬zÅaØXŠ'ŸÖ¾*(ü•›kÕ¯ªê&-³|66<†uÊœ¿ÓŠÄþ÷泌«Òœ@Ïnºgª^1%)(ìùÝ Ž’÷™…ܶÉ[-!¸N&ˆÚ6K–—i@äDqÄ]$݉ù£0í˜-ÊB:İS¹n\Ǭ£¯¦És_ü¬û+»sù3¹´Ül<—_ó²+N ” µ)èÜ$'Ñ’ýŒ1®×ˆ˜Í‚úK¯žSygîÇ`ß“‡Ø/`aUèsÎahHª@»Rsã2]‘,>±¶ÏjðÈmAƒ›ÞêC)€÷%Žñ¬geަ0-ˆˆ³û¥“Ú#PëúŒ9¤í¾ƒ”gº½ý¤þnÏS7@R¢¤þ ’sÆ‘  ÑÔÇØñcöÔæ¹‰zz®í*®s¬e®é¡p»FŒè@Sv_{¸ €õ¨°š÷,ó+~íGÀ Ì€‹G*C}Ûp±ÊÐùpX\ó1-Ò•”ç º{GÄi¯†‚Ú7n‘*‹Ó²WìÀ¤ º¬öè :nD \$ FÍù§í ªÜ[¹-”fnß 7e ¥ÐPk.òÃ'õP0×¼Be›T»~{Æ£ëÕC==å9=>èÒ狟=!D>Þÿ¸`o£”Éòts%g@xI ·ˆÕi­: J³‹ú²‰¤ Õ[Z H§ìoÌVø£üŒ¢† c´^•U€:š7Ñ«*ÄÄMõ‰,6ĦJþ̤C%lÎÛ2ÓdØ[ݧ-¤þ±ýÏ`è!àÖŽ+û‰Úu~b.aøP,57­£…7(åþÍ1VÌЇ©ýÈUóÒl½4ü««ð”¤tDßÏL”eøéUû>ä%V<00\€ÔýÄ‹ƒ/5ÝiŸŽÎUJ6@&Þ™½H~.,œµM¿ÏƒGâ…åÞª‡‚Ù©À¸E6´ß.\÷K/1šŠPœÈ?¼þ;v1Zõhº_aH;£ ú±ÙEþáõÃÖ¦Wf™n«DÅÿ&¯¡½;­€N¢êôÐËRç 3╆§ý‚çÞ k­v…–DYÇ |x%PGR/¢ª?Û•cT×Þ~šøÓX¢±ÚZ9ËwèQòC¶ëýöq¡½4'¦ùä 0 ㄃\4 ¶cÜ–×>Øê—ý@`"Ó¸´Iy¶×‘§9vX??Ÿ÷ Q'jfQ°*¯µà@ıjߢÜhK kP0@µà˜§£:0¤ N´/ß³Qlvé,ÄÁO町;wàCÅ“ŠAn~S³Ë!Qo¢¤­„ÑQY~00¯l3EÚF®;¶7ŒHÉöéQB¿æjl:Túh"M«O”¨6Æy®iOÀwˆGZêÅŸxl̲úܰ¢Uè (ºªþšæ¤v¢!8ô¾¡˜i!´‡õ½ŠsTd|=±Z mûõú—1æyìkÍ/õj>évw\þøéš<æÂfiº-#OqÚÔwßnç²5»o«fà~Fïïë·PåË>€\uÞW„ïB¤¬så¢Dó>Ü9ûÌ,;­Të%v³"vp›°ƒœ’Ê™|êàpêqÙV9†OÓÄñ'U²æˆg®ÌŒûåVu ú q×8Ë}d›RÛBQê‰~]!òµùÀyjïóýöÞ’å›"Hè?ùûÛ¿†bÅšï‡g~ñAÆ‚&Ø€,P× @¨U/$¬L̶Jn†âýY¥îÌ×£è¥ù#‘hE‰ÑO¿€zw+¹E¨Ðîÿ–ÐÁ:ŸH˜³“3Á*HëhJÎ%sžx÷ž·mp/ãgL-ƒÌÐÌoDáßõH¾J±T€îAT°ìÅä®-Ð _DŒ°a+jMÍ`÷КD ·Ïp´n¿•çWbùØï~Xzî±,ŽÖj/¹ËЮ5|î=.b©SІ±˜D _OB3S‹iøºkÁÔäù$/ƒô)ŠÎ¹hí\IùæïÒk—ý¥?]çÙx⃬7ªb7¦ÀÅ›ÉÇÑõøË‹þþø¢æQ´=I_òô fÔ•*ŒŒ5Ú1<Ú-!=PCãQK40@ó‚ú.$ZkÅÅ“ðÚŸ²¿ÍE`Ã( Œ{8x%´qÀ2î©G¿Ø=¯9,<äØ3á’¬å Ä-ú6¥‘¥ôJ%ay9èíL1j¨<^¦¨?_ ÊÿÚéyçÅàYXRœó–çÚx‹N*.ºÒ•˜mkvYüž#ÖjÀ/bâÂiùTÿTjÑ,Õ ÏË ªvã0^±ëõác:”ƪ¬R·'}&‡YÝ1ðáHz©s…G”šH8•y4>׋íþÊ/sÙb[ÿsv?ä{Ld}^K¤\¼¼×gÕ-€Ãrý97©9ô´£?7Ù´iOÛÌ줅ùÅÁøär¬ûW×S–¦J¥°7/8·0†;6®¼V‹R8YÏá‘LG~î¨Z¶ÅBg ^šÿˆQ¤ðïÖD }šÔÊÝ÷I⟗¨€ø0nɻܻ üÑ\.߹Ņª‡›‡£åSwÏ)QHmݱ¢G‹¿¾ úÅ„?!^OçÜ(Ó¾?l«aWõô|kÿtßlñµÄ`a®*«Nwæ[nG=ƒÉeéj±€%æ–òi"1ö¡ÑG3×û¶¤ƒï£)ðô„,{œ ƒ”›çÉ|–·.úQ1éþâ\ÖGvÐÐÿlm²~ý7æz-¼Y×Ïéœf09'¢¡ªæ÷tBcPrª‚Øù/´ùýnˆC_Mbß–òŸ†¨êN[€Ö›B}€ÛD»BâRŠ˜äOVwò{ s§«Z¶½GÁ¨ðP „/Q¬uè`ôéÛ1/ø† MΚó>Í#û5å€È`A”Sp¿Å ?/„Pœ™CÇÚéíÕû’¿—´÷—U óZæì=†ûÑdNñ´DÕ÷”T“ˆ»_@²Ãœ¼Ø]wô'd˜µ*˜Ô?\É-ÎË÷-FB%IS•çùBÑo´ïm+ŸÒ(°J@ˆ[ðaº/„ôcå3nÊVh vzá7@‡‚{Ç~•Ö“ÅiÚ¡÷ºáû_ù£ÁA*ÙVÂfªkZØÚ&;zÂ}³µúô @™€C?®¶7Heq]¶Ó@çpiZ5x?OÉÚë}6X¥p~hœ—%®[gShA&Ô&… rieŸìßFX ‹Çz­˜R¿†(ÒT‘¡t];©é‹Qƒ}:ÖøèŠt ·9Œ"~¢÷!ƒ÷k´6çŒä ò ð¸šJLè'Ð1²¢FSØZ'à%öÚ¦+_Ù_à\¨ïœ"· ‹ÎK]ö ß¿è§A€Ë<ôê»/äw »†³= xlö*ß!ïÊGÁr×ëCÛ³n¬ô £ú;ÑâÁ“H‘¨´\TxqD@#ºJ`'vb@ùÙî¦(Sø¶ô¸¹jt,êûz~ãEß²!NϯáPŽCåèŸÄóÓU—Ž©wxNºLàûwlQöû~ÔóZeE.Qéj [ÓG­'™'¯TxÝšêÛÜtuW º{wXIõút>ô­C#®…ç‡û´‚ÞÊV矕¹ÌƒT¶ožxê ÊDdø·qFœP™å¥“ì*öýçX'ïXN’²¼3sQ8EË|-í¶v_Ò³õ¦¢^ çqÌ1 ‘¦P”Ÿî½æŸŠÁn?> ¬Òãí÷æV)YL–Û 7ÉÊÚóüúf]“­¡v-UžÅÁ±ð˜Éø€ìa„Ä–„†ëxÞH±8CŒ`P3Õb·ï{¨õY]¶Z«“v„ß§Iñ!^ÝüW¶Íìr~Õ,¦SBÏ»®út1¹¬ÿu¯¥ÇINòX@"" BólÂú_ß\3iÃH4àä¸B"" åÒœ‘b‡x_Cm»'ó…p^›hDD‘n¤•'›ÙôÈ …eÆÍg¿ò_ëÕÓ>¶;a-OÑë¾ì5šÑ6ºçù¬Æ­=Ea¬¯ðÞ ¾óºp@ê /|ô~PŽg AƒåÚʃQ23Ø€ž!u°§q>Ô#€¤º ¡é~É8Ïš{ƒÔ-Fs>°'ãZq1´`oÚ -’n¦žmûì¯ÓbZYD/yrÀKIéOB@œ0VöýGû`>@¯ç"ý&ãq…”ÜŽ›x1CÞ1-Jœ{ œë‚M2)ø—ÝWŒâfp¤È.½ÍÃ4æVX2ŒQjVú7GÍ Œ= pÁÙ`±¤‚ÎpÊ•c‚;C‡Ú{ÿ?„ìg殑3ψ+Ksû Õí¨%l[.òãi„’7Áÿ’ðM à¹%¨¾¯±Õ1¹ØiÿË,íŽÔ?ü|‘uXm|RÇw`_wºQ*8´ß›†Ý†ÁÉuÞGKdzqèÍÈáÌ”5ð¡õZ¾b/wçÙNrÜóðÄÿhò|.“Z\Bh±Q@™>g×ÝÖ2rÕÛµ JçŠdó¬.XuUïk¯˜,âß±‰øI$T¦=RVö+Ù´a¢H·&O‡¾@ße0A£0# Ô‚íB¯^ /ñ¿I:‡c¹…uPÏ»½’ùÿA·`n«‚ëšÝYóK·Œþ 3ņ/Ø @sîœþŠÙ8„SýØWAªçÇТÉ$(#!%BÏ*×zÈÚÚ-?ièm*»,%(Y'´Ö£ªè´J)gÇù ¬ŠG¹£ãBtr—\Ÿ6UÓgÁ–#S*ïÝÕè`{sV„:¯Œ›ûe /K¶„°œß„ÔÁc…ýHBö"r2L¶Žì;Ѐ”¡Sî §Šÿ¸"^¡‘ð ;[Rƒ/|Â]wôüJ‡ËÜáné]¦Ž.[·û}§¯â2‚éÈþr<«Ó[/é#“`ˆˆ€ì•;Éí‘ýr-Íjší¾ÏÚœ¶%«.v‘¹ç£u·F>×ï˜ïúo¸i-î´‰Š À$å0Þ8˜¸¯ Í‘ý]ÞÑh™|Ï  ¿sP6§¸\¹A.ñ–‰ôº¯ÿ¿²Xc¯„ñª‘KœFîk™–Í{æžêøE„|~TÞæ/Ÿ>Á¸ÌSØùÒ ±mÿè2°ë¯ Æ» ÌpIÄäƒkýΛ-ö0^”¸bÚ9ç‡{ûnÛ~\*ÙÁ® ,¹2‘‚-ÈÆêØvÃÈK”ºp—ßU÷sèYœ~ùdNÞ»Fé,Œ±±9N£zíž–kVµanUA*ÑÏÒQTí’™q}e­ÏšpŒ=ò€3ý‹Ò´¢E@¬úÔÙ€§éO›¢Ë`>gÓ×¶0"à‘ŠÅÏq,4{ë.ßZ'…ÄáQg‡Û¸³@ÀÖÀ@4Ö˜Ü;ÛûŒ”-J©O‰Çî R[Í'õnâµë B3þJÌÛ ]—×s×Å& `%l»ä– X$•Ðð£Ð<åßlK¡H_  TIã9Ä÷Ås@D#ph¢ùw5ËK¯1Ž•~Wʬåyá5v½Ú¦áeNqp·¹Eõqۜ¥ֱ¤zyT>1.>¿¦AH­%øüqÅyß¹œÓ=Ú—z…ö>RP+&Ÿ-O„®E:zsct¶aŸEés è½_o°´|-x'njwÚEWìY(B (Áõê·">‚(½M0Ah™ ä±Ù©‘q±ðaoŸ(7CÎ|hÿ_ Óh<áÃXÀ¥R ÀõŸ¿½YìßÏÑÉø…JT…T^08z¿=MU‚ã®k»(üÃ+ÓŒ%%.4@ IDà $Ù(µŠTŸrÍþxï÷rÅì?%òÐ„Ü ¥þ;̸8B‡:Ò nöÂi/ ðÞÂÒx‡&0+šdP‘C¸æ)»œL(Ú™¡øB`œ9¶r!+n1’™»­Oð„\ åe¤8Â@k"¥ÖdùPô½sŠÅl4#`¡×ZëUþ1—ëyÿÝ3ÏE\ñâ?(@qÆÑ0 XB5aÃt¸," j®ËJÛò>¨çÉ÷[ª6"x#1|íkH©ÿÆ_#ÆÐF d‹K²Ð*‰€¦y ‹=l¿q\X;`ö‹YC*ÿQÇ^ô žãÊþëî_²nä´Zó¡ÓsFXí¦Z,Ûmqo6An.Å!·gsx¤­v9üšu›[†çúc .e·Â­«!Vê…Í—bÊ"{H!6ØÍ¤V+›CM&úÅTäp ÿ¨B‹º2 œ½_[žß…À~î*ÖöŒýÒ˜•“ÚX§ÔùñåôE¾›¦)¢ŠZ¢ù5ž¯Y—éçÛ0²¤.t°€ö]Œ%h$™š}‘+}‚nþMÁÒ¹l>b¼ Q2‡d3âþñµCm¾×ô…Reã…é¡45x½¬úƒ¼óf­°p'aPP’v%*ù&ÿeT¾¼ ˆè°O‹Ë&TÓ=­*"¤€£~jþE´C¿ ]-ÅÀú§&¡òl,+ ÖPaì,–žè* ê‰Ú™º®b÷šÔezù‹?uýc³Ë¯ºV×, ¬ª°žW{´âeðªÏ·rN¥Þè}¦*‹ªi"ŠeƒêÙw±‘<­×Õo¼ÕЇtc½ë;þ3•DÂfÐ:Wy#ãn«§åÐak°|]Ô´‡(ëà R~îs“'ÕrÕŸäήbõ ·1 ñ‚}Ü*ó·iê²—iÒ|XkuŽèå¾ {Ìj÷ÿ=4k#²ïFÅnŽe´…‡tq%ñâ7[ƒ£»‡Ä„ÄKÜ”Z;ÃöçÎÃ+_…öå×V WÿZ yÂ_¡þHâE +½+ ÙÙ\ßë†b»ä#jw ƒÂ7söã]1¨^}Èf³8Þ$uS `BA$M‰(7MîBÖî ÊÄÔ/yäcPlXtYq¥‘W)ÇQ¡ÕÒùì•—orßz ¶@ "ßq’{ÙOf¿}òG2–ð‘ @¸lœ§•ébGn‹<á g,²¢Î¦Âñàîš"X»YŒˆ·€Ë ½” kÂr´!˜'|€¾Ñ3=W=h|2<7Ä„ˆà|?ío7,øçU› À(Â+;2ÚÎ#xéjz>з ½z¬R§K感½BOc¼+¼(¹ÔûòâxíòȳôiÛ:[ ;“Û‡ÅöS–îf6á)Æös»ìé‹Öó¤þ׌&d(`.Pè‹*qÍg{ÖþýuÂûveý¯Œ’ŸUþ`~FÛû>…ùëš¶¦è?E×̶‰öþ÷ JÄeãAyN…bì÷%üûcÊlqMþáðœ©ó‹V¸r`_º›‡—>tlUbíJ8iðÁ‚˜uB5Ý#8ŽýŸhø¦F(°0pIàbŽÏæ_LT>Ñ“ûá£T¢T @ïžîgo –Î<¶FjŸÍvÎG5¡êÿÆáIíÈ㓃ïé"!ڜշ“^`ÕDX\”!tСcã÷-p³e ¶‚îÔ"ì'#å›ûVÍÉ-“ž±it,€R&òDñ¹¨ˆ èÞÀ‰a¨`@H å¡Éëyi½ÚUóÖ‰ks]áþ¨sÛ%-=¶(Â\^¼mP®« Ê÷9E“–Ô^iê« Î&½çPb±_”ºY£ÛQÒ“iFóUÁÖ”#§¯—3Ûê,ó—¤P%Ý4Ÿþ­+‹tP!ÜûØ¢ªÿxÑ ;Æ6ƒ†D=>Ây”‘QÀŠEAÉ¡ˆkáïá^Ò Khë+‹jÓÎÇôüw2%Ôc|úºÇó,µ[—E±Îôý¼R–þ–AúÓ»;)­õ|nÛÞ­µRAo^Ћ^„«áKÇ©Á /jú)DÙ•ßO@@ÑÅþ£¤èñA‘¶SüåLÞPúõdÄÏ'æe‹8Q2X\ÁȲ1‡¹anhïCájŽW½˜;ൽçCò ÀQLìÙ(­Ýâ}utu†õh8ÏñýÁ$yø’Ù> йê‘ñ½¼¾¤+ö«pÙ)í„Oâ‚!LpRÿ„n“»øl»ÛŒÇÕ¹Ÿú®ÜÓ ‰ º7Ç¥…D‡wÿåãçZܱ%lAOc9zýË&þLK¯Y½Ûg,’|rÅ:²x¥ÃïpÛFPˆ2<±Ì‚yã1uŠ<ñ¨œ>ŠºWÒ×%šÌùê&˜½îl"½‘ÛÈF¢‹k«Ãí[„óp@w™2¹{WºNÆÓ¨¿¦¡º<²þË)zçÝ·SD\¯ßëŽ#:€€Áçsˆ»¯Ìá“Ùt ˜: 6˜?:ØÎ?<áüî`Zeo(Ä»ù:(;Ébs„Ï>CŸ[ù^l`Æ9Hòåª}X¦Z„ƒv›«îΑpÐ3TÛ/{3©Ãè-ÃïsQȈ`¸$ýwn æpd¦:ÆË!FEž–$¨¾ÀÝíÉÞ·Ñìl0$ÉàçB"42@´Ë”´1¡\úÑíß~b+ÇðÝõ †þ°®œôèWxuÇP*£T‡†”Z`¶Âèþó^0ûï°¹Vr¹¦ïÏeG³@G‘È)À78–çäª~éb/ô Re_ÂlÂa5©+ðCiaHp¤ž)[RTI t êâÂâ{}ÏÖ‘wkÁK™ïµN+)n)%Còþ"çã§džq–°qÀdi/x r^2 Á³ê¤Ú?PfbTbN›/&˜( à>_IÅ }¾=sò1ª­ÀüqNxÖiÛÏ0iLÆ{@öRœòÎfl½I÷8ìˆ ­Ú™RüFh'H)`Ãæš: `°'%‚ñ…™dhP?ÕÒ¯ ºéµ{ð×Áo1¾‹ˆ_aeõÓ’/Fdäþ ”ÞM S°t" @ˆ‰‰^ØÍz.U6¨ì¸Ýñì\OÙ|ßÞÅŽÙâµ²·Åáß,ë‹xw¶ Ëÿô鬑%õmlì(Ø rYB2ˆ x0!ÎÁs;Úw¤+B¸êF¾ÍÅl°00 5Õ+=e@þÒ×­£À¢G*6€0:Ìõ|ÔÙ¼î<þ·m[¼Cx""@z Çꎅ£fÐuÚ1Û{Xù+€öÔJrÅá (-aR•66½Ä6¾ÓQgöíƒ ÁFTCœXWf¸¼Fè&læ(Ø-;F8L2ðeko!Ùh“øæY£¸ÕÁÚ œ ~¿×›èœVh=sO¾qCÇ>Ú«u¨G»×ËÈLi :ÌF÷§¢KBsW  ”•#à¹þ ¬7É»wÞlw”8ÓJ—“ww_ý½8KðûZXìœnøIsQ°Y0cHdÑÍ’j‹Ì1 wÒ¸ O‡ÅæubWÔׄuê‘„f¶žo鈲0XÚ®î>¢Àù£a«QJwx ™¢|§ãð)úK\iþ#Æ€|–Ì2†+7æáŠæ‹DïoeÛ‹ËÅ/•ңؠÊ÷CŽZå†È%S7ãƒÌ /X”Aå{9žßkʉAŸ|PEN·G{ä¡Cçü)ø­°A•‹Á/·›Õ9Q’2t¾V˜Øœ&¨¨00S?Ñ‚]ƒríõpêÝùõ<–è[TøF~~€Ef/‹ÖÇ‚\aD×½ ë.<Àbu´»,‡Rªƒ9‹Ô·$“fª\'Þ^3ߨn°\µýÜGßß´¨ÒüMúˆŠ;~-¾†i=ìýÅOZ¥à¯R{RÀdžs?‰%ÁžÄQòÒô\x'ï/|çâ°ä×íyÅHA^…äZúˆ¦±Gƒ7Áu¬>é+; 9»]ù”²¯xœaPЈk¿©I†$ká-„""£›i¢ÛŽ-yÞ&Ù^’ÆŽ?Ýt³á+^i"ü‡må3ÿ¢}1>ÏÏuê¸wZÅï;õ OR²C¶ÜÜÕ£HC´QxôE«lÎàyPÀ8é“oЇw “9xƒ½˜$à@]‹5¡¢Æ3D`.ÁƒÔÓ¦`ïuCËìõåÄt¬@wÙ`¨¤ÀÆ‹+b÷à„ç1t³Í»ü6l‹°¶ÖòOJù3¨‡•[dz­06hßv«^.ì[ ,U¡õ‡'WÞåÉ84¹O}Œ•Ù˜¼KNLÞJ/ÇÎ2ÿôÈÎÏ(Ô„qC¬LÚª—à5¬R¸v®"¸ø=ª#… |Á‚ C¶ìåAS¤ÃáðäúX¥ÀÓL#¾ez‘„¾Ê>å'<£Ÿ6›Gé}ÆÏôDç¹lÐxÄ´ð_{—HÞ–*Ë; Ú £ôõGM²ªø››òÀ¢^eÁÕ M z˜tü˜'4º¬†kXÑŸ:TxŒï¼oKŸTïf®`¶R·¬÷‚=90ÈØ~:4,u혂ªƒnüÈ/3RÈd¸ ßÓÅ,'ÌŽ=M•™ÄL’+=«¯ï?î<)tÕ'ÙY>N\žžÄäÖÁU`Vyuº‚¬~ uõ®{ÚÒt‹Á#8…qÝ !J+ŒDÜcdDWw¶°¹Ì2J°©£BË5T #õè›ñfgƒ(ÐoåU‚>[iåUuÄàsò%7Ä«à×Ȧ'£ðmj&°.(T?ƒ¢„L(êà/Óµcÿpkv›rÊ‚4?|ßRÙ ·. ìdCf‚+³0 3w‹wÿ@Úer1¤w@I¨Áf„T+rž‚^ÔºÂ[–šcî,SZ™:)~¼)RÙ1 î}~ê¡«;)õ¢ãb,Ð?,>Ï?-Gík ˜8â«A0P„v üþºlƒ õ=—.Zç¥dŽz8’á± „¤ e4mØ Ì"˜È™F ‘x.Q6cg;º*‡WN;`H_}êk8RI«¾ù÷sAê-†„|pþÍu[ㆇ£´¾Wü¡ÉøÖŠ3*ìÝI>ŸŠ DUuOîû@of†öVmA<¦\ûyžgC%ÝJÔÎZ勜ØV>-¿L¦=XÅâ£+Y«*­ËKöÖ1£)€@T6zîe‹¹ú”\¬5Cpæïãò}GD…'!«÷iØ~Õ_pèþ•Ï2 ð^wš Àªù¡k6 0ÄÄD6ÕU­vAHZYZÒ«ÌY,MTó¬ˆ]l´gÑõC]ÉŸF1]üÔ-§jÛÉÁ£ˆ¡¸‰§¾<8¹ÖÙ/Ø !y½œs:î)T¥Ðu –ÓÅ}¡NçD½>Œˆ_0°Aà¾Î Éæ0^'lo8 =Q¸‡ÛA-…ãÔ)†OòTÃ}•T¦íí·ËI¦ItN~Õ»¨4c¨ìRd„‰\ a.ÛQ‰Ÿ¹šLSrõ×(°!A Šg=0ÌÐûpb(*@“̇«é0¶€ºÄÈdQC-qå |XÅÌ(žþ-…î÷œ^0ÎkC´„Z˜[,s¢ÊÞ{±zdà lj¥¸¼K#'à3,µÍ}*0s„ûšk‚—´ ãnxżå’møp†”ë³Üýø8GµÑ©ÔåŒç@ݪ¥JàPoìfC‹ã°Tº†%õ ôÙ™¨’¿ÿ¶|7íJ@t×ZóšàRUÜÏ™ z€À:ב¢'R¦ !¯®¨ +ÐV£4W"N‡L)/£«Ë¶ƒ^0mìn8”Ë 3yï:ðÔ0›ú=ÎahZ§3tb³Aç–hE<™ÿQ7…6~ã™ÈN€vÙ‡&a޵Iã f>ëø)3±XF%;µûÏÊËù´âÒëdñÆ¿i¬žëAx¡PÆÉùI9©Ú$¥~Òò¶~JíÈ„…ú°Í*MÚB§Ê•/Ø¡!ò†f;TUé$xuÙ‰náU;¡A)pˆðˆ«Üñ–½z G1Æn a[¿`´±9™ä8öQàÒ¿Ý—&xµ_ºœPx˜­ÄX|å¶ñÞü*ÃÁt á*òßöv@*J|äëÅÛÚ<ÀôÌÖ̇Ž/RgôvX.‹Ü3””,hÀ¡ðaþÇb{Õß°ÐézC£@¸8è’±¹ÚÌézËkŒ¿xóa5Vmæ)Ì“¡¯uCÖ+Eõ-ÿÿÑØ-å$B½¡#ƒ—I‡(lŠôì¬ßvÌÇö2ÄUáÔTÅÐd=Ÿ°ÓA•ŠÚ1@é³ɧå•^ñíkÔ¢÷š|€·¦±â=q¿ÎˆÇ*èÿ®ówްÿÝÄ×Ƚ,;èbœ7ŸüYnBñJR-Á€p¦ºCrm±>soâÑÝ«Fh½ËÙ fq³÷Æ•ôl8gáðN§U7ˆû ,¼`[¶÷'Ë' '“~ÐofJÃÞ#ù0—]½®Çâ¡´4ö:„æÂÞ)!’y4&ä}Y(Xkïüóg©CÑ á¤&Ôkõ`†vѵÊSÉíGüEy)ÐGE4ì8ˆQÇö…X%Ÿ ñkÉôÀr ýœ-5dïxÃW'é>s"š²ñ »C`Fqï,ãŒÀœæaŽëƒz"à4w‚Ü=äÀûKU3 7³áŠô»êN6Œ#@`%&Jj$"Ì=ÐÿÈ5DhGBZ43?M6‹á&LƒÒaÌh>àLß}´ìe4—2tr9oŽð経Óz¢¯%ÂóÆ%؉OfûEP]9*]š<š´0_£SëÊx´‡šÎ*ßÅ^@f„œOþâÂ|ª¶T½Tcþ À “&?8 8¯7k$ÔçC:•Ð}€~>Ê@Rà:À“y”hÔ{-£}¢®GðŸ·ÉØ bîÅZÔÁÞOy’ëY0ë>ÌŠ64*W¼A€Ö|(€et±ÉÈS|Îÿêègïæ’ß¡ÌÛ×Ú6â…q"uä媽+ñ”ùQ³zNÍÜÎy)„ÁÖ¦qG NºóûbJ“`@NƒšPšË¸´¹¨Ý(±™ÈŽ£y¨xájpâ(V¶h^+QS‘ïÇ›Ðåq"—JBJfîŸpó·-ŒteC =…ï”Îù÷Xɪ< ?y’5iùË ÄŒÂT"¿‘=“,R+Úñ9áSÿX/7ÆÏÓß/„ 7[¹VCóö¯†¹šútût.¾©i™ë­š2WÓZRŸ0ŽÖápaðL‡ç,®Šš³¤Y­ã‡,p°ø+Qû¡SYŽÃöl_w|§Û>P_Æ(`% Ü/fÏwÌ˰¬µ‚4¢÷ü³Äí¬x2=#Ik Íݨ܊ˡ±ozM-¡ñô^8ª¶lµ îoÑ5wO›)y{WÛž²±7³ÌÀ¬þ•gí­Kž¾º÷5‰JÔûrˆó øŽt'1{ôùïÂCæÉ)í¹ïõWYé« Kê"LÀG¹èOÒPq˜h^N.Ñ5Û³ ¦›púbY§ ë*bß­r^']‹B˜WÐ{¢ð*”c«´Åv†(Õ@o éÊ× ^€…ñµs½5ø¦ç² .]J$ñwÌ~NõTHw;FTÜ…@[ù=?”z|Š )×Â2 LLLLP®ÔEðžÍ|i^Ïœ;ãtì:wAh±w¾Úª*ߨÇ:>¸ö#Ó¿Jz Nc.Æ3œ ºÕ\¯*ñj¼Éþó¹4•r:x¨…P«R®­_†ðËK'¿÷(g,ß%ÍÁL‚Dn<Æ.örˆl-—ÃÞ…ryÉæñX¯=ÚESçÿÃ…­ë?›lÌ&NÂírè‘ÐgyO­…qjD8¥Þµâ£Ôxâ _6"¶…7@íü1…f}WæHV`,†ÜÈduû-#ÇöÂm¡õÙ¢Ž¯]ïà JÁöûüåöFç%#ãõ%s Xäw0bÖV¼Á·UÎ;ucs1j@³DA1c¡ý.ôN¾w‡rŠv§êõVi@m¥tüºB31kÜ%uhJ`Ÿ‚Ý ûqäm+Î#ª ¬Ä‹g¢jøÖá¸Y&6/ZÀ×+Å÷½› µ±{ç.1çèJOƒNv.©ê„Œè¼UŠÖõ’„Ò˜ ÍWœµ¥.:Û¨W™ÌŒ×Ïÿ‰á~ÔéܲxV,£¢·½†ÆÇdê`§«/¡c«&œϯ©†qÔýkJ …Ôǃ ÖŠöóáâ:Ŧ…WÕ·iW3—C*Î~ÓBßözôE£’Õ)Neõ Ãt–¡¤–ÂV¨LÖ&n?'àÉæw‘Ùè¥2ðn…|*·_®²ë&ÝñêÇm!Ã*=›®"±•·yÎÚƒ51ŽŒSTíàØŠûÊ\[ÐBaÜûªvÓ`àåšžv¶¹I¶3•lÿ#"‡DPáT5ŠNœ ®Nk‡-0c¯çL6¨ºTþþ’Nkâ?\mïà ë†í‰bµïþå®Ú™Píðü†O»‚(P<"b<óÏ=ÛúnÏôÓ‹’æKRŸsÃÒ·ïcs6Œi’ü9þ?OÅŽ%ËWQÉŸÉýOöºï-¨Ÿ‹C>³Šñ5·XÝ䱆ÑVI.éÔ~8 Ýêk¼—j~¾E«»Íóõ×B$ÿRj–+'Wç­Þ/ì›E¦œvò0é·ì2Gä$®Qÿ¿çk/œþäß/ºe"^`Äš:ôeû»g¼åo21‹å‰oø3©¦ËEïÛ÷ŸÁ«ÊóCÑ"Öûr^V› Á8åÃ5p qLíLÒƒÅRʵΠ$«HyDFª 9ëÔÚEÚLÂ;-$’Œ—b03)‘Æâ©»¼WnO{"®íÞ×{Õ­ûDÜv "j48¾hÎpñ/Þ½bä@ˆ *dŽcu§‹ý¹4ÎÔ¾•/g;´²}7zV–µ\"/rì!–Ý6-m§ä‘[p×K}þUT@ˆ61Ð4 ½õÒý°3õÁ­XC¨“n³&çÛT±åŸå‘tén/3xúƒ•œiÇ£ïóÒõ¾ŸNMºÿï£Hö_ÂÍÈôŸˆŽüŠäUj œÅèkjÒ[³7:O«2dâØà7]†~ä{Ot\:qü€å±i©‡ þjo•ÛA5¹¯4l‘ûñ¾ÒŽ Ã›/1Ö Qãl·4EëVRµ†mŸ½¬cpÞ{Éq¢uO×à„‚ÜûÅ}:&œrPE£ˆ§%‰/BR}Ÿ.B{p+ 2˜h‚éGâB¥Oqá$_gî¬]ˆëÅŠ$‘ãUÌ'‰4®z2e™£NT„ºq„Fê£p×t9#±¾ªüB ¢Úòõk…€ê ús^«Ì*¦BŠ·Çžœ•Ú‘<²\±oùÌ)fKòâ1`wâ‰Øà^iÀÒ`Šã—Oé™Å}‚FÁš3è<Ÿö×hÞà4‚ë6Úú« ßÃû|Œ«"ŒÔdì’»? ªâpÍ$Õð7mÿM}“Wz¼4^;¹ÝôÅŽûÂ6¹‡qæ ™£ÌÛt*wFö¶j‡ à6} 볘(ˆcHJôr±):Å)aس,©E—¿eCã"íUœQñšT þ­€HUÛ ë§ñêÊ£_¶S…)‹ Áë-æèfÉö=ÔX‰Ü”kË/ë¥M]“qÓîëŒçèÒí¶U¬@:òa¡³fÐlëýa|'2Å.ŽÁ£Ôñ: mëN—úS‹ ðüÁoaU›/3‹>î„Ó,@òB&#;zÝ*x/S0ú—ÂùQ4—„ÈO z :Ý¢Œˆ§ð\ á¹+”y~êq]c'ÃÛêÏô@OЛ艮¨gzç•ÙÃÝ̓Sg ;D:•ý÷$Cß­fI¹P1p0™…ø)ÿ•©˜´›™ÊÔÜ7O¶Ïn@ˆ^+tPAhˆRƒ„k9Ðql½ «ýý‘w½åޤ7kz±ÄÈÞ!r —›;¥HÃéA±‹®à¢n;Ô†&•³`=ªñ¨“‰Ãô—õ¸&k•?¸æ˜"Èv›RÐK~5Á?–+ä‚ñ͆å.¯ànûs¿?>q$œ%»­Vcu¹ ©'…b^„Ëm8›oñeñó^g\q„54f2R]ÿWûhödøòg{‡ož³¡³ÑyŸÀ”µ‰bÙøŠòšq(ÙIÝôÛak~"» Óç‚”ůd-¿îµMP•öá™ø»><ØêæŸm<“ ÖFRóòÊw&©«z¨k¥_wÿ›rØ®ö9õÝh‡aŒ :=–EJ>+³^©Ü·\3´–ŒJ×H„)gýKºTRôò 5“9¼+fcXÞëiýgr,à×CÌ7fÀV~ ƒTBËZnæ«tEw´šd0wv¾"C]ú÷eª·ÄÁó§kó«ö`üª˜#r‚u ¢½7ëñš5A2Ãâc¿à«ÑØ”²¡€¦r¥¢± frY;ö~ örsÊIX>=é1Œu|sýHäWŖ䓾Âltq›×Φ6*®0누ª0KËÛa¡¢„‹6„¼#\¶cwu›ü3P¦» WçÕ0쯇 òÐ;[»—ÅBÊtN wvùRÕ°(gí0ÄÈKlõÉÈÔÝ|?¿$X\8+^]~³#[ÎHP޽4JÏFc_\”5X.|J@%Àûé„Áå]bwÀ½Ž¾Þf˜À)ß% èúº° غ L}Oæäýèü•º^ Ra'¼ø >Z(‚vÚŸx(dJñ2*øÃ[ê03‰CnÑeÆTH97Ð>’Ï€˜Zè+\!BÚõÄ´žÝovn€È]IÿÍ"‰ *Rpœ(´0“ˆwë©5ágÉDæœýù ˜‰\¬ÀŽE·b+¯Mˆ0QCU¬@Ÿ\н7~mag6çâ}²]¾:¥MàL8rËê-3Š©ì¬wšq8y뀽P-/ümut¸È%øóÊÿ_ýZ>…òѪ~K!¿¥ØŠÀirn±¸@Ê~€}rò'çós”r-Nlùïõˆäæjw0ÛÉÈeNØÅIA©ÎEÈU!8mVGˆÆàx+$j}6ã R0zt¿CÁôNnÔ¡i4;¹kÏæÀÿ«o*%`[Ò8tj&ì첟°Cbž`ýÙNŽžH§Ú´"€]ÄùŽ'gc|í!ÈŠAm4Jí o¹1µ¤Õ<¿R4Ã?q¡ ؽÌMk ö…|á7BÂ[®VÁéô.{ÚŽ°ò¡ÇÉ€ùÙ®‹TÍ£KpÀ›A©òè® <Í6—ÐQ°< *>q^C&3ä$u$ˆ­¥ÔØÒ0õD018½žê]&.’ÇŠX¦Hµoºhœ»$T|ùˆ»w-á|§ü`þ ¦µm(“´s“ϳ tù©–¬]tdOÏ ²óÀ<¨;` Ë.ÅE~ï>7±-îþçÑûæ3 EeyKÓ!Ìu¢Z„›éçg§;ûeî´óðM­cX'PÈ·€P=tCÙvh½?r!×[‰Xt$‘›Ô8&êbÙ*kÑeks =Ôñ•Vfï½: Áßå Ÿ:gsâÑ…!ù…æEk†˜›v@æÁÊ©vŠ&L;ŠrÓ{k 8öüÖÎ~T“–ä$Œ}ÀúÀ€BŠõôDXŽ¦ÙˆÀˆ0daƒ[-6n¯î‹‡i°Vì$"h€|VØ=[3zo-àW”@èæ¯ïiÙ·;þ%Qé8 T3Úóö:ÜÅâ–Ïó^Þž)„Òõöþâ@©¼h“öÇ‘'¾<1–}ÔT,2™pnMwäî©9ƒ]ìÞ¸ñòò"Æ•æù¾ÞO¼Ó4(ø\P™7éjˆòK<{Ùò¬8]Š?®Á*ýžúŠ¡åœe"X#P9U+vŽ ‰KÔ1`⦓ˆ„%.ð­Õ€XâþpÕ_Ñ î è*û¾Ög^pºv¦„‹Ðºf0ó f›—AŽÅþâÍø%òWû<$GKE¢Þežä;àÈûÀ¶áeœ8\¾[Þå a*= Z·êáA{5`  ¡ßÿ<·œÀ²Ž¾ñp76><;«‚ef2PW,D'¯èIk—]®ù¬|™o…š0f&^>H‚$xǃð>ê=În­-›¿b°#¨DíÎÜ¥¯¶Êц9b8©Ï`90rp‘@“~€ËÒë&›ih§Xl™oƒ^a s›}h35i®d¤ä]Ûƒ°nŠQ¥Émöœ½¿‡bÁ7e¸ûºý¶u3sÂgN°¹/ìIÀŸ¼"dbÖ9l%µ/”¡°W*Ì÷{æo__´@x@Ò®ž®L6Ëÿ8ÙôÜ©UÄÒŠ!tÕƒjBXºE4€×:¼Ž`ƒ¶Ym`4óY¥Å;kÞ®öõÅàoW¤}pùX/ûf캷|Â”ßæ€¨¦È×FÏÌ—–=2ÚÁ|yx¡wË](Ÿ¼‡¦dA’ñÊ>üÖž/‘ú—î!{E[ÞšWټуlƒequDüƸȓV–ß`oõFM¶Î5êãÎ/À_’×ü™À"©f®ò}ŸË"çÊYR¶{´ò¨‰W®¸;A^–ÑÜîŠb¬ª ‰ûÔÞôáÆGÓJ½0¬ÀÜÈ/R=/a4Ã]M„Þ¥@_@Vh›1c¸-;¸1DZËMŸšoÖ y Þ6Ýj ñŒä¾Ñyxõ¨l¼ñ1W6Ç'B§¡ZÒ1dççMö¬Í©3Qˆ½Os ›¾£‡ºwŠ;‹­\¼zÒ©ÔÑ‚ÇGC+ -—µŠqMÛõ 2û²¾4¡ãAÀ\€fØx[œ%Óqv+Àðu, IÀŒIl á7Ì݃Lw'Ñ-¥O@`;”\Ìg§" ÏÐݳnóúÎXÛè ­ÌFn…ÀtVÞ“Ÿ°yÀlñ¯žHQfÚɒßsY°}›ÿó+òõÄítGò’ÒìpÔ*ƒE…Ó¡­çù[ݱE\û‰ëÁú›_“—%ÎÎØqNìšµ]„»ÀBðL×á ܽ@Ë"¹¿á£$*ëF—a{±ÉìÖ#œVËÛV L´ºZh%í£×ù +M¯>ë4T}_ëôgR NÏMv±üñàâ+ ê¿×‘q¨"‹$,5Ìp¬Búÿk<ãJ?¢æ'UÕÜaø{J¶„ fϘÊ2³‡ò©§ CÚ*“‚CAIºôˆš±œËšý¶SÄ;^‹Dèõ6„Õ‹ æ†7ê±öÙnã4ÌÖ“.–§œÕ+ƒdVûÓ¨ñ·ê1û³Ï>Pû‹.XŠ*|÷}‚M a—³&߬`©S«öÇ»ØI)UÕ•¶‚ÚxúŒK¢ûɰ‡Uo’’û<¶ô°§\~}KÈH©6Œ›jõóxëxY_&i3ë Þ0hiŠiF4­“%.—¨J×.ëÉ&x/öRÂí™Þ±&Ó/‘ÔFVÆoðžå”üd`¨]hu¤€oË(#î‘ÕÆ ç™ÏØØ`@Ƀð 8—€úÒ$O4Í1Æ@-¿†Ã@¢fö¨ 8½«éàmö6“o؆,=Kí$†òÇ!‰Û b×\“Q}8óAØ;+Øèp^ÃB,8"Û4æåÍ}âº3ŽÈ~§oO·5µG¯˜Ým©‘àƒåüÃô“¢û`RJP¥­$ñÄS^8ùÒÔ@ýL'ÎñSj} Š8'ˆeÈ ƒ•§HÒô W^^ú˜ëªŸqOÍöw‘ÊLqˆËÁ“áe”U•sp“̶’mùûUD“?ù‚¡Ø’êwÙ.3³QæA€ÍºÀ'*‰wïXÖ­ û×OHŽ’'EìÞ ´ûpË–ªŸ•Ž*„é%kîÅë?ªÅŒ[ùu„<“¹¾ägzý ðuî`fp©ŸTv*ÞM”&ŸwõtµÜïŒ""@(;ê¹»o}ñ‰ž¹¾Z|Û "¼]ß • M?àÒš¦Y2°êp„}™âÅhð?½¼@í­ô··êú±aœÙrˆ’qgñÄUùv –À ŒÑœÅ ¯HËÜ»pË\ÁœÆ-:CçŨâÎÀT@ eûžô<„â¤ôBXKÌ#/òß ‡yÔï(T`§RO¤~¦,Ù” w7jI×.÷3˾âØYg¸P‚˜8ÀäÁÿC•˜ÌF0–pX÷³nÒf ˆ´ñ¿-¸Âõ:Äy‰ûS‹©Û‹}stú.¯×‰ZŒxö†D­”e¤('Š¿Óé%Îò´xýÉOƒÉ€ìŠ=ÇzéNŒéî#Êþ`Ä`&R”Z-j0g–úZB’ì£î¦y¿‹´:W)]ÄÈ}üì&ŸH<èSý]ç¦fø‹û‰âYÄy]iŽMÎB­¾î_6[ßPZ ˆzH©þˆÆ±•x3pBSv}jš¨›þ´)ŠÜÄxoL,Ѭ¤G·^1±Ûa|šƒ’Û?]æ¼Ñ°ìFë÷SXΟÎ/äˆzøwªÎØ4Ù}†UËù×WÄÍÀ_ž1ò;“wàüFZ™[ÿ¡‘å’ŒV‹µÑ+w¿²µXy6%†ÝE¢P¾ lg¬¾u5…û‹ý›”šÅ'ìWOîéV ÿ7tW̳ô_ €0^ær=;Õ6rCux 8[ìϘú˜ˆÿ»åµúÜÊÍÊ¢zð1º9S¿®›l"F×TÅÒkì:VyÖÚ8ì}ßKôŒŸìKg—>ÝŸ&f»ýJç§ÿ~Ü&n‚_ÓAý+4³âéû8{+.Ø'Ç®Aú^ÉUrS˦ËWåí>[±Þ>s.¿$j÷’Ü÷ÑìóïoPÞk\})9÷¡-Sä8Púm´Cý1·›jÝ&ìî¤V†½êÿVéFŽtM=Û9ôü Ã¬ÞžøÃ;Py Í>øÒ…pY½"z0íòláf¯EYrt†D*Zk¥µªkèmL›0€&( dÑŠ˜šÛi+UÊ5LºÃµ[›Û¼2/ÙùK¹›>¿€`®ÜY€YØhxA~ïõNG{Þ• tˆ @>Ù&Üo‹i÷ãq·yúþA³¥ý5>XÿëƒæÕ½°á÷|ÞS—úxÃǯ€:P±WdpûÌ•YÔ“ŒlK¤”³Äþâæñè .vÚ„rÊK'*|iJÍ ½„¦é^'4²î_’SÓ( Þ ?9ÔElV,îj‚™rm‹Îðà+n¤4P@ÌXªtµü?ÚTÙ#è „/w²ïÆ(:ù¡ºŽ8V½Ï¬r·=K¶A  öLrúS‰›[šƒ\·y”²«kõà,úQš£<&øtÒ˘¦Èâð!)÷ Š€ýUVkŽ,§(–srÁ¿jC·ÒHŒ4ò™¾8¦«€¦‚ûËóLGô#7ìQ-‹ô‚I&Ú«¢yÊJÕ]:šgZhíwuNJÛª'YãJäç¸ œBäí;Ο€h/uÈ„†¬ùò±âBá´ÂÚ꣢œššk‡CvÍ‚ºßáý)-@,3ì¦m×¹ &3C½©—5§aé.ù­Û-IÜýøõävø'MÒ h?C1e‘¬<ƒ @ˆ‰¶×D£Ql^‹¸’ØüA.|Îj™+KªEXãìzjûð{êe¬ŠZ62¾¡™Z…û;ùŽÈÿÜ•YÆà  êK –QÁ ûþËäh„IZúúiòvTÔw:º^®]½~B·ˆ•ã?¸sgvø<ó Då‹h.,mh w®³aùÌj¾HÔ°9ùÅ®þfC:QÎ+õôhD뎖€vÔ¼Ìø¹v,wLZ UÏ×ó _Ôßžš«åŸ‰áÞ<ÆLHÅÍ™üKXs¹7*ìzÐ¥ï¹joI?ÄŽyPj —çß8Î’[a“Kaôßáÿ†"¡=â'ØSBãfÈ öe+}®«ÎR5F$ ×|q•ȯÕNcT„ò7ùÎ}¿ H?òŸÅõ¶¯p)ÅüO¸Ý¬¹“Y‰â`§Óý[)Óz¬jôræT[sW|%ËzÕ]~"Yúç®{ê÷z’Ì{Àá¢=éún˜Ï›8çÂ\Þ †µÒž³Á}Év¸V¢v–ó«´– ÔëÏÉëðC’EÀá¥íj2Í×}O§¨XPŒ›ž âÕ¿&9^D¼NáTÞhD:N´R¹-ún÷°Ô+ u˜A×ú ð§—[+I³­¬°ÔŠ À·ŒôD¿g ÁŠtrM’ð‡XðS¶ÃiÒJD" Ä3ôÍI)’¥OF°à/:Â,w­ŠÉ&KÚù}ÊeZwDÃ×#«NQZ«+›Êûðiéf´F’=/Ûº)1Ö^é­¯9²ã¡P+µ¬™àÆ©ööòµ_·h^—uʘÃãž:¡$’§Ù¾BÓyuÑ10 „ŸåJ¥Ü«Ï¼Õˆ_î˜Ý–¤]}û-ñ1:)Û¦¯·g½¨sJ-+•ê·©%š[rP Ñ‚ëéZ²hÞI„>ä„Hw«[.rÊü@3ç×ç:Ä^Ñq»Æ"iÎZ >Ò^»Ê'$ ìÍ…#J&±„€Æb…ŸODh÷h)ä«ðÁwÚ©j.i»ï«#öwZ†tŽ:ŸÍÛ÷KÄ÷Zͱ;€«µuϯÑ1{sæ'–};â0Üæd“Ú9¨¼ OŠÞ#zJ2ØŠ€÷%Bð}Â+€@@&Jj._ Ùmý0 Ôh@^@„LZ ©¶„’…Tv¡×aŽËuJ-Ï:Pê/S²}q›þü÷*õ[Ëùm©úP?çlVG½JµÿŠ}„+FåªÉ®ÿûìÆ»0ÑÏ“"ìçGø²¥MÑ©~ýl´lS{kÂPÍì~ò²Œòzw]ÎîHK¨ÐéŸTs†ôx{×cæ94¯­ a^ñK-a„€l…CÝ]3Q¨FŒsÍ,¬±t |x@‚ö< ÿ‡ùîGV§Ï?µŒ,œ l%ûAøwªÆ,'L8Ÿ×¥µeÑ[‘`˜â>-š§CW¾‹DÅïÁƒíÕ–¢óvš¦äó*X·€_*ªkHMôýÑì¿„ö3#x"ÀJé%f‰•e³Ú< àœöÖÁMˆ+lº%¯è‡ú=4&*b޶>~Q/'sú#týZNscµÃåWÌp‹á¦<Å3W¹Áx׊@ÎYuÞ<%á²x "×~7ç,­?¢ýÓyúï#j´×ù–D1@)ƒ¾gæXšô ® Àîqc D1Ï?h–†ÇêgmôLü¶7Чƒ3ƒNW¦’¯‡.‹G Š×ÙÞf7ΖČèà nϘÉvî6o‡ï€nÖ¾MäÏ  ¿éý»—\ÃØRAYº~¹@¢°Mt؉^¹—­²™jÓÉÍ!j‹®÷‰pÛã:,€÷<£ÜaÀgQ_¼"Ëš\¬ Û§f…οõ³×|CRfxm«_æÃTËtȈƒ#'mª ¹Š´’* Šm€§3õW´4Œ²‡êÓG毦&¬ƒâY’íâáGHOÀ†+¿çÛ:)ø@‘fÔ ~ÑËü%Y #þQØ»"ôë¥lÆ*N³#pÀ¢a óAÀ-Õ"÷!²süBÅØPŒý¾­#Ú:À&¾éØxÀª’Cü’ØƼ² 'ûÊXþÈQ€ý’ĪE‡ÙY•­‡¨£hEM…`m~­kE!¥^rµ[L€² y\¢Ø#UO¶à•äÀY8-xD03#>‚Ây.¹©íL_âpª<¤£Øp„Òä¹ûýÁoºÉ–3V‡ùDiž¬‹AÂ5ýp¥>xQ͇Aƒ~l ?JÂy-¾”¸4•$ŒòÙÓýzê( ÒmI¾e3—ꦰ@NU·IÔÇ.‡ ‹¬ôÔ(ÆéÂIèèT)ÄÜ#Æ€´b¡ç‘5§èÙç3Èãã„€ÆÄ(DF.ÉÈÉ5®u®r$)Ú& ‚šoˆ&h.@dÿ9,e",´£aNø¿æd–ãqïÿÏ¢ ¦Iô Ç›ÇÎ¥¥YÈL3 ž±EUÐ˶. €7„~¿BȬ¢]_³…!ö¡#ºZœ«ÌÂ%Ÿj¸e j’ó tLS[0UÛ2Ö+Óªuù̆ãÍ[\|ölºÛö(¬>{ò¨¢@ “MçúéG›”Éx™„y`˜¡>€–ˆK™b¨ˆ.dˆÄæ³1ÝýÐz:ÃëÌå"ÁBùË“O=›žv8ëþ û¨·Vë_‡œ[æçZN¥“ØwÄa†@º¢k¬*Í}år÷ê9kýøé•[0EaWhK<òˆêßdnÀÝðÕ«°ãæ~ÿkÆÝþKê‡Í¤áš§.Y”[-SŠ3ÙDÕz¼áJ…6šÜÆ2s5^âdþEPiåTŒàÜÝ_—æçœ“,+Va?Q£lG).‚ÈóõèŒ>a¯ô½Ao!:XÞ.–š_ëÑ»@2ˆ)ù7ÑÎù¤ §ŽÏÔ•ÎãOè³eAXVíÁZM¸Ý`I„\ÁÂF—!ÿÔx˜½í%ܬgá¿•×ë“Ó Œ5;Ѐ6MnuP¾ÊÔe`d˜2#¨PņdÛD 7d¶¼ìiH”GQ¥¿àšý&¯\0+–ß਽ðúÿá耭´Êž†ì(ÅÌÔ‡BÜA˜ˆ!\9À A<Šç÷½ÇK¦³‹$ª¬dšÅ,sú}Ÿ·} Ä5¶ˆäˆ€LÜw >ÿ±s€àÀþÃ&2uøˆÝ4s)"zqK7kT}òj¹IùPÚÑYÑ«žüÒ‹m¿A[O½€FÎÙYL©‹þÏ}KPÝì„êGâ–, œšãÜBxtÝÞükŽ´Ù“Êz f=lªôJ$0wóºmwÌ™‹€¼ Yï¿ñär`n%åP¥JÚÜFš‰Š¢\À€¢×c~Žé„õq¸ýÎñ½Í>Â9ê5(ó—F«¢Þ¼¤´×ô\r[”È{¦¬9å´'§u§¶ù÷óÒ0aê·ÇêØÃ7Æð-hl@§5‡Ñ‰+©f÷XÛ"`gÏÿ•ä7·?ð$IÈ*0@À•~„³ ÌI^)ñ|š¯Üýùº×9Œï2Wg‹\»Ðwœ~¸óÈâáÐFR¥ÆísÈWÁàìè«cb@ó )nmóš×]wk@Q§){}ý)ŠÅçz\ÅA´æ+þÍâ·5ÇÚÑ—ÍJ+Õ«BÜЉ£rýÿ0ûçg\+³9Ó~eŽÿw ºk>[n~ rºÃ•Õç×Z}‰SôûýÝϓљ\¨V–nº""®ê8Xzš# ²’%{ŒþЬŠÉ'nÚ^*vv®æ $;Ž!T'„›²…ܾìé°¦=mà.+4qW!Ýæ‹ÞlÎ…4áL¾Âyè^ì ð$\Ù¨{øâ¶U„Õ¸*ê±¼´r-(íï?ÉWpŽñ§Ž%› ðï­xdȘûC²þJîÖëH˺—w9»¥B’«ôÑÎÒèü¯ÔØ¿ì=FLÂ-æU|êèôØ¿U¿1+ˆF7‹øöú£?­øãÂ]/üÌÃtj-˜™ï¡îæÐxÛ|ýî"pØ„‘a¿fïIʲn“aŠÎìåAX9Ñ’Y)ù‡ ƒŽÉTž¯mÏŒXnç:‡˜Þ_üʳp1©¶×Y&©6ˆƒ´=dúîûµÄÿé…Û{+ŒÇwJFÙì†àPÒÈ*(ã€rD0n„¶¯q3ÑæóDz¶ FÑ€~ké×Ã}Xctži¸ÿÏD):-«[†±ŸH®Ib`@Uc@ªNý}Wió³ï3[2vÉsUŸðžççRŽi³ÕÝ”71êOŽÇ—[˜¶ÌÞ»s<õ 4²#¾ª†RξËÁIkÞ!’‘&I4ûÉßݹòqI†¦™ìM<êsBLnL.¨>à¾üjëSQ jh²œtÈ[ ;ɾ²!L8û‘”8YRåv¬\‡2jñ_åÜ6/ ýSÚyó㱪RÔÇËú—Ç: óTTbJâùÃH~‘^z@ÕîŒÃsàí tѺ3”òNçfÛPùâ4w9uÒàfUÊæR¢êH¶â7§±õK†ýÉ`¶ù2ïúÈ¢ÿ²v~áÿ!‘ÃKr&wâ"w#+úõÿ«è·°|ûÈÒÍÅ&ã@í‚£š%ñß»|fÍ‘¹H›€KÑê‚]‡tEÀUÁè\Q ëÆN›ÆØ ˜ îóY®`Þc* à¢ô‹¹ä7jÞQIü/á±S©È1E8­¹`›?Œ]i˜xÆî€DDe²ž¾ºÔðê®ÿÕT•9ºð&v9L÷ƒ™i‡†±ÿBžÀ" @ƒ‡ ß4>Ð V÷OÕ½oÌú?+˜߯bÑ|iZ°B½{î+Œ ²NËïæÐ˜œX€yX9·XûÝøõTw–Øî]îÍÀÛ=m¥Ji®-ñ‚9ÑÜ3up¤mTQì=ÙǸô_•A!ˆ‚ÁIJ;~øSšÇR€A¯lîÔÊÒ݃á´z„‰HËýÖâ{¸ ý’a‡D I}8{?œ<”;¿m˜v6ºˆŠÞ×õ¨f§Ì$jîJýLÖ8<¡Ø=‰ƒ‘Μø¨`d@~àSqʆé@€(ôö¾Hf«ÜŒs½q‰G$O3º¤Oh{ÖÇàùðkº Wë!%¿Üû9ûÌ#ª?Áì pK`ù b¿‚S$o\1ƒ· Oå“N~G¸PÎd"1¼,0r¶@ûQgu´ŒlíÚs¯Ð 0b yàÙpîHyH@÷Æ©PØhÂt¡(ÍBóNibîÒÝxˆ~ä{JNØ7™šõÅÅöy¢ÔPÒ7³›»aAƒ¬ónÔlµÅÑX- ,ÓZP§ã‘€ã¾×±DGòÿ=µŽ€ï\èD>†Lb´Rt¡‰{W¤ºHÒ/*~Ÿ,m,£BzAp>PuS0úYwbȉ*­¾V ïsð¾R½$¤Lb1€¾c†;¡£ÓT—_™pì#:½µ˜+öxoŽ¡Å$TbB1ræ€7`±;¤ŸÞ“¤µ´¨Gá!†B“8 5Ã%¶2ÄÛÐh>ÚAxžùë͉ubÖrÉ[¸ ï r’»lîðóÁqÃÉu}D œî>™÷;„¯ËnÛ„þÄ{jM™ =&e|Ï8¯Åû‡<óW)—ù7çËœYÒ<G£r½_\¾UC‹ÅóWjàó£¼èø»ÂI oA4£fòËI]t븼3Cµ N¤ã°;¦æ.Ò.¨{r¢µgZ^5û:[±ëômSÏôË&t{~A9ðÕÖlO¤±]~6Ut›Æ?Kš©#rûð¦åîöTÓf&ÚÔ\Œ¦Fe\ßÒ^½íŒlZ&låY^Pø|KÜô&F©ë¡n¬Ä5RmA§> (ö*Û´ÁazDvŽ © ‘·•ܾ¯BÏIA· ü¹Ë¡ ø„.ðr €ãÝÑ»´ñâ ]A  OÛ¡n_ÓNoºŠ<½§ïãÀ£îÙýÊñxßx ²ôЋ„]M fiv ¦×Éß¹×¶“VÑ“e‡à(»§X:iÑõÕ$Œ„OZ¢.–Ü/¿Yh³ƒ£ž^‹!çïäÈÙ=ñ¨¬cŠˆ<‡¤ líeäT»˜S#B6±EÊ;Q6gtãgµ4+ƒ`h„YdßR\H¢ËúëQàGÛ1±M ³´ðq§Î[Íh• ‰Þ7ÒµÊ+ 5ÅÌÅ@>ôˆ1¯u¼Ö8¶ì&¦ûßÔ:•iñÏèòÛEéEÌŸƒpÀÔþo2-€I€¯‚ßÇB½N(â+ç±b0pÐ>™H¢†‚—ñ /úœ€ÏSéAÑ–WØå|YNÓE/kªVjŒx럕]ú3/ ʪ›RïÁuÄkÑW‹cË—ë0Á¡ÒÈ(à*Ú´G-l,x6hå&Q¦%?·M§¡+QNçèmRDWE–’ÞÜ/ ì"3$0î‚®e«tw+¿˜.³¿üÙ»æ}vûîSÜ)%dÈ*ʬÂSû7|)©Ñ}º©øÅbB2¨Ä(¹#;ÖB8€«#(†ŨW³1g-ñx·é²Œv—?(O_ñ„h½WàÜ¹ÐØìÿY¿/I9|Õºt¶Ô”N"ØÀ¦œTˆüãp9¥À/\TèÙDäT»æwàÆG=*áÿU?‚^ nÑkÞN¿Ìž†CVŸwÏ'½A 4‘í†!4è£7@O ‚ó§ƒ}g~îglØQÄX¹§ADT•W·b#¶«Ó°ƒB|À6 Ê×8®/‹d8`šº3:©ÒηÏt _Éb¹™¶nÀÄsG6²0AªÞºJ¥VÀ~0ÜnÕ¥Ü|>»(/—çUÖfs$ª©‹Í(Û0Š0cÈïó Þkò¼xsß9†c»Ð™çb%×gÕEÔùˆL¦ bÙ¸ÙXFr–ÓOqfTã„§!c 57Î/ËÕO{Äš@P@i”ÅBŠýGÐ( lE Úí¡sZº GŸ'!žèÿéã#UhÍh Áô&ŠR$ 8¯šrÄúY…÷ti§™xGÁ&À|a–Üf¢uVÊÊe82ˆŒ™]ÇÂwwvV©ãÎ;8/žño5–Y:þ^ÒßvÿÝÉG:ÀW,m»ò¾‚ûßÞŽ›J ÆÖ~“a×üÓIL¡¬øöÅ®ÊsO•¥‘õí~©èè&eRYÔ¶ü Œ}øhÍSVwõ«ÿâO¸Îý—'¢:X¢ˆ¨ÒíîpÈ[.רîÍçi!µY!SÎþKá/q%=1ç>úˆ§ª4¬8ƒ*Ñü÷]©V}Ž —Œ>¶+¿GŒ°3 ߤËL¶%W€ˆïB¹-)©Ùh=nãú×PWæûóîØ1.¤A¤Y=÷àíïœÃ8$°URB£Æ?˜«°HZö{a#w1Ò}D´µŸ?ðŽ…]ßZÌ„˜´eM«5qHp ‡jþáe­Œ7*,9Qîcò€ˆˆ€讟Qùv¤j9–ý Šé£´ÒÚÞ!eÙrûùôÉhï=¡ônÌ'ù*‹ûÖ­^: zÈ';ß„¾5ë\/š¾ p§€Dˆ™ßY"žïÚ»ýy„‘@@<½þC]ˆUôš²¼TÉN]ômmùÙøIõ;r½Þ’oP r3†@XO«ðÅOª\TÒ˜Ÿžø˜4 o€¥ &Ø 0 fB}`Á¾ˆü`ʰŸæîùY$C[=·×Ø¿J.Õq‚æ¨Þ¢ÍüG“Á”CŒñÊéÝÏ|F <Œ^÷ÒÇ=ƒ*Ñ6c3GüøVt½ÏG†{’tjð®!CIò”‰ç¨ «àþOÚ/0¼Šø¯Ñ%þwÐ'^'ªD踎tŸ©Î,·SÑõ‘óï W®¦û ѿȕGÜiïAqŠ $ˆx#þ"Ä<ÃaœXÓn~:m‡X‡‚nzh+ “Ê%rlßõz¹È´!…P¥"óX©ÅH­?±Ø f ¹;íç뻚P#„SÙ‹^ܳ™—œ^J€€3²alðÞ."EqK*Ú'ö^"_A@´y íõ°–»SʯID_0»àòí­Ã( à.+Ø®B/0m† z‚_®\oñ­#(†ø ?¢K•‰SbK8Bk+à€ó#„:‰ì2•×t_¹¼ÿ¤l±bu‘š .ï½¢·CäP0{àw/wù¸ Ë}˜Æ.85,é ’²°lk¸¢J*ùÚmDNÁ‚ ÆG¸5äÃr/w˜]‚‰£°ˆª‡ÇI·­³84íØ*[œ–K™—aYà®L¸Ô[}ÔJXxÍ8áƒ;±dPïëËŽxÆgÖ#5šqIÖH²ˆá“PÊ"^6Œÿ¯pÖÝåd´Þ¹šÛÀbÊ€Óù3aÿÒ±DvyNsÀ°4¢/|¼3WâþûA€¯’¼À(¾R@€:z®ÆŠl-b ÉBâ¬Ä^ÉÆ6½/k698„d ÏÛÖ*ÏbœÑ%&Àe Jµï¸´–ß €¼×¼ ìD0ÐAÚOÀ 8@ñÛé†K„wvéô[±<›Ò¡ÙÁÛ-!€x 9&ÓAlÍæôÍ’O)¤õ»fdU«ª}C¡ÿzNY…vêeª ¯®æž=@-Íkš„yfjÍžº¦THFG›<~¥÷ßLüîÖì®]ûo„^aÜÜ‹WÛñ¤¯³sÞ¤x« ¯¡ÝLØD²h›2]-¶œCöcygܾþZ}êÚ'0Þ»%ôÐ~iªÌ9Žk•-ó4,h•5LrÚC¡ÂœUd¹õ¡4=r3{hàÚ‚T½;£$À|ILgèþ‹ssKÏ»Fè¯Z<¶†åñSì‚?ÀüÁînÞü„— [uƒx†ÃfŒ$Ôn,?áO؆ÒЮØ+KèzfØß9-$p`ze¹\H…E…x÷Ê>=ÀÙTeô'%ß/Æv ØôþßQ@Cnb¬úÚ£¦ÍsèÆ‹M©q‚—\w&iº"ñá[œpþ7Ò¶Ú"Ìtò¤äúm»Ú•V¬É@ÿãù}ã±R-‚ˆ^)›þ H¬yPâSë! ]joÎuƒÉ»»×ŒAjÍ]ÀÖ/$ü¨$Ï2  á¶;:?ð`Q3óÚ.ÞpïãÈ(LrŽbsúaóó5E'‚ ýCºÚn뱺qWb'$UZczEo.•<Ñò®6é‹©ßUùï¿dBøn,S½Äq`—ƒO;q]ß!å{!££ú×É`.aıÅMo7‘,Þ{9R”Š Æ±€¬øT§‹8V§#¬~y÷­ÜŸ·ƒyðc ׌#õH1N•è–éŠw·ÌNÔ̪Ý#¸Æþ»ÙÜØ¨(aóº×^e! S¯•ÞVMº 1OM®*OZxn×½©ÌNFªŠ–6”¢ïñ÷ΪæéÞƒƒ:ÉãÀMÈñ¬BqîNk›ma·ç Þß_¬k«¶ËÂòô‚‚‰›Án[“7þ§„€e[Áòb4ˆƒZfá+[Þ9ÒfðM,ºî˜Ó9ãÍýýÉä.ÃÎEeÙ\dÄÿ­3?`¿Ô æ‘K_ o‰Ž'8öŧä²Ã¦€¦õl¢lœã±Ú4Õà€ éí4¡3[ž= BÝŒú³47éãÒk@æþó5¨\‰ûüF%B­~ÑJ?½z£SŒsÕþžÙ ¿4Tm =”XÎW—‚OkQCæt3 Ž›¨©ä×öËÝÓî¼äÀ¡ƒÜD’Òe²Â!LjM:W£¢{æÁêí¢‚ö(Ç„P[ñš’Üý@2%9¼4§ ï‚F¬ô’i"Éu;ð¢¿!·Ïd6Åà£1ôª{Ï÷ÂÎÐÒ)¿?ºßÝúSü ý[`€õ;ný(F ôJîì¦^LýXsì¬ÿÔrŠ¡0è€glêù˜Ÿž×%MaÏÉ;öX[Æ~ÁiùºR"í"‡0 Ð8ÎÐB­:Ã^ŸÇªÕ<¢¶®(F`赈àX%åEBo»ÑXÓþèÚ!σTÿ‹¾ÁE0‰F¤M<’PÀ9ZübÀØx€ãÙ2‰ïïý;N´/ÎÅpOLÕ£[&"ÆJl0"àùâꈪUs>ï;ÝUH˦òK_É þAhý¯¢Gê—ò¯^ŒµÔïê³±©L¿­i·>܃Mø»Yø%—8¢ÈX¿v°ÏÏûš;®¼Α·ù¿ê®ö#ÃE§`}½–£¬cšmå?–«M.øCŽÖ™SHIÃÙ–gzœ0f66UžYü(“…>kÜü×,ñ»\+ïiŤâ×Å\\‘È ›uú‰3¡Ç:nþ­ïS(Ñõ©ÎùIó­A|òÃ` çVdÕðI® ý‡ 2ÉWƒ*§AQxþ %vó¹¶Ïê‰òy*nçl_sÊé›'D*_àR4TÒ¼Me¨Å–#]„NìXþü8€s ;¢Ë6e£†h–PQ5õù}æGI‚Jíh¾ìyIlN’ñì³þ½§æ •¨‡vÆ!*h(¼€[ŸÓ—¥ºÍ/â:„g?t¥Áê%h*>º¤>Í€¼h.Aý'm¦æH§€³ËK¼™cî•@ž#<ÑØ½ìS•1MB44.Ëľ¸ °CJx£šj˜8ÿéù1|yò'\Ÿ©¿Gd% ) ‘ èìľA5x¤eÕzò¤Š¤ïÅ“Ûmû‹ËuAE@líavóÝAU”ä!ù¬Sç:¹³Ý çÄ Uk1?À­wS‰èw_¡5ëäVºÍmÚ¡ž¾í-Œƒ~¢Âiº CÀ}nЯ¯Hü47ï´ÜTÔhÝ`D9á-—¼ >Æ áOiF$–yýFdƒIè~pÉ;“ŸÈ^NÕµŽ1Ò‘¾ëxÛÏa ½¬þyÑ}„HÊÝ!¼lâI°ò2¹|³t/ÁûC¥i(j+®âk[‡õ Âõì`.—ckzÎÖ3ozOXYrM øìǾ(‘ ~Á`ô—khaQ¼¥œWaYÎÿ‘O™_ìÔvïœAIj »K§âR|³¢BÀ_(`SC¯g·>EðÚ$ ‘€ËssŠ"Õ©¾$úïH•¡`ÉcÖJ` 2 q{Â\à¼É÷˜3ªøéwõÓ´‰’Œ íËã1Xž|0Šw—$mI—cÒ$¸Cåiìúp¢zaÓ‚±=E-D8qW8eú9ƒò-·F ‚Æòx`2ÃÝüZÅÔm’󴀿/\›5Ì `éš*ñ1*M§j˜ç 7CdÌzæ-9”K\¯|Y!|Xˆˆº=»UqÉxƒn¢c-÷N1“îe_ÚðÙ„~giI‹¢á» µTvýö’ÇËLë+ã×?â aLœ Zy M>wÛäphjˆÔ»þÈ |»ÏH*ßç£äl;·Œ­é Ô(­úåv؇Á°ºá¶™]xM~ªÁ¢Ý*/ô]°bó\3¼Ïå›(”–ÖC+èz«šêÈÏC ¼«À¢ˆ2ΧoLÁ(¾¹$Hu5™º™ ¯\«ícŸ,~[!¬IëZØ>,tŽƒ¨¿ˆûzoÜíg\Ôg¼ÔN±M—y³¦IaŒÚ1šð@Ú~c.µâÊä…ÑŸ¿ _"!¡²£áñ¶5ÐÑê‚ì…ØúuÐ}©îwVk¾òŒ[]õ‚é«Krµ}Uî«æŽÝ$ûæ¤n:"òµjë­T”ëÆý64"‹UqÜCÂmU?¿&‰‚ùã±L÷òü·(!>ÚÏkê?$bÎÜþz×Ñe•—‚ºßaxÊ£ËY ÁˆUÔw½ÏG Æ6Óû0¶ñ7ÞƹÃF¹&G€Æ8›(ï¶LüÜ¿ˆÝ`Ò*¼!•Ÿmí}Ûc¥ÖHR3 VÛèyí¼š8iÌŽ1Çœ‚ö7 ~6Ê€3Õ%³ LY`üÂ%D"Äš¦Žf ñzZ‚©I3.Š(j¶¼>dnĹûêA9ƒÉyýSRÑeDMx¯£¼~‰µÇŒ¤r J¨I¨ˆká q©³[Ê,ûüºBWâÿha18¿'é¼n²LØ=îH0âc¬ÅUÜkBVãeë9°Åi̳[ªèY_#ùȸKD€R "¾ bÛLe}dxobú ´÷†ÀˆøOc’´¼Àl| 5½ì¡ß‚Z³F @¸ ¨Òbl«mcÙ…ÅŸŸù+336'mÎÚ …ÔÑ)ÔT—ù ò¦yhO¢‹™«ÿÅ‹ò™s‡¨[_PÇàüvPŸ'䀎²ðšiò5ô&õT¬´5ã êÝ@)ì¾]‘´C\7›®Þ–‚{Ú*åà¨+6+ñh¾C+‰Ür^}ïÙ>$ÑÀ-˜‰˜MS×2~ÉäÇn‹Ô(ý_iÞ±ÿç;6U/³XÜ—ÑÆ•-È×=J)Ôƒu"WÀ£ò õ›øtwêÏÔÅДœœÌfG´w¿MµêÞòåf­ýì3m—v¬áp…¶—O‚˜[]m°ƒ%¼€=yëbWÜ"ʹb%›ù3B¥ÒÍ d/´5Ö%ÿk‡‚•fÓïJJÒg•uF:¦½~ùÖõCuÖ@«ûpµú.‚ßèèñÑ¡] íê B(\´$^% §[õƒ¼I; ®”-p§Cä[3•‘ 3ä6-ƒ¢@1 šcË8Av_Ôˆ1U…‡DêCâçß dÒ¨DÒ!€Ê k”ö¸¨ ×m±9 Ù¡µ€¶²ÙSåÓ¶i¼ù¶®ÀdT•Oöñ&„‹`ßuºf¡…éÈ8‰"LÅ|‚½Ÿ»­*~…HøÐá¨JMíñ‚l´ ˘V¦6wa1îˆ ©a{Á;zéï#‡RøŽu>˜5køËЦi¨WQpvÀhƒ öÐǧ¥Hݾ¯EÇKxgmx#?g¬hµ$Ó¼F}Û Ò9qà@ü¼qóÄ>d@æÙÃ`=ØÁ/ö™òl'ZK° ¶ðK®cö‰%/…jIuŠÌ…<œùc²‰%2ÃÅÍ y~ò¶ôOÕN'©)ñc,&¼¡*Ãï Dö¤¬ÕŸK‰ ÂÃmÙMõ“´}q㡞¶ÉíÐXùÜMމb«ñÜæ€-Wh×A–y74ª7žÒÐÊ3º/ƒ».Šá{¼Æ$ÖÿS¿sõUÍ#*ôÉú‘hy’eškUª I·¼N@4ð+éf,exŒ=¸Ö‘¿FÒëÂ#~UtïÈ¿/m®ªž«Ü Â=]UUñ.¤/Òö#ù°ÎÛùÆóXƒQü—TÂ(Bí®ï°Äéñ/Ù*…# 8¥ Ÿ¿ŽÖY2Û.Ùæ#üxß ÿš_‘Ib8dºhËŠP¹4¦hëÛœ>^m,¸lM(ùó˜ßôñ𵛦ÑÎ5QYÊï'mzpÃtÓv§¿oŠê¼òEjçOq‹Å &|GÚ6ÏÍÊH¾;(‘FäÜÉ>Ã*!Xl¯¡Ëìy­ùÁ‹ž~ ŸWò+éÃ`DŸûwì 5~ ¿”TS µròÔ]GIqO‹Éõó%ÅÊḟKgÛMWTAŒJÛ™/u=re?X©I˜µÄ)܈߄ŸcœÞüõ°ã´e5<ÃwZÊÁô÷³<¼Å¤ BæÛùP‡ÞT­GÛÈ8 ñšZ HØ,øfw=0y”›DAYQýõ Þ£ä“n……?ñð²Ú) =Ç.Cìö¬a9Õ™Ctžu. n‰áκb‰›eµýóqúö"b`„]‘E°ð­±Nv´ôŽŒjÂ7áüÞ„R-X—néÛˆ8^s«&¨£‘¹©Ë•]S¹Ô©›™ìØŠ—©ˆ& Fa"!¼EÏ!Õ‹;bê.KòÅ”£•NF@0ý%ðégÊÕø¶J›¹~ÞžKÿ¾£æçèÀ¹w½§_>rÞ¬™V/Ö«ô¥ü-Þ ’š“hŸ?$M0QnœßsG:uÂ;™C–víìõ™$t°ÉBI‚_x08!ê×€¢¶=¾z @:º4Á×=G½(ì -sl ¡H‘Ï.‰„óÏ÷#KDÂ_÷ŠŸ}'“‡—³Ö!øƒÜ1¿#œ_ÂÿøhþýKÕ„y‚„?¥´­z²à6±ß YØyI†ÌŽޑBò>îæ2e4FL«HPzì 9eÖ8ËÅç¬9ÜŽ3þ£õK2Î…D»¡ÿ‘fÄõ`f»‰RÏ›mª£Ÿ=Ý*áÒ[–t‡Òk$]õøw0›#N~ñ÷åuÕ„Vf)kÜIc721Zƒ›ÕŽ4K6ƒ=“þ¢Ä/£fÊ™üº”g.õ.²~¶b’ºWE·äòï…¥ã)ÝÊ.µ‡‰N¼¨7¼ôá~š3† Õ%‹ÃQ¥Ö“¯‰vµ×ì Ðæª×7Ë]žÜ½hÝèÄÐö½ÌØÂ”Jb/épºÏeAœ-dJù8’Üö‹Àz26pf>@ô!›f½z,? »¡6„¬á|¥š8ª_k$DšaƒéͦcDŽÊÌc¦ü׸–t/$öÃc/˜;šeÌ6m¤ˆžû\Òº3òPCN÷íÿ™&Žp.ñµ¨ðº‰‹¿f£g|nEry•a¸TšQÔûà®øǾAÒ Ä*b X€€-³ª»`V! ÿÒBa+?ØA’ÏÈkÉ21.¾ÚÛÇåÇk"Êð¿©»7FJ4 U-ƒ@à4qœÄÅϧ äœÖžËRcеŽUSÄ·À0C>Ò¹"ÆR1e²{9^J‘wÓ굓"×QÙþTÊÜ,4Sĸ„dT€¹<&®p¡1~š‹idW: 6ÛÝŒ0}Ts9t]_‚—·8•ñ3½¨ótü±€wx²ÕÚ‰ÒY'Vˆ£ô(…o²zL2"ƒ^‰È×X!Ù“Ùd£D¤-@p¦ß¸«’þ ’AI³gSQ§µÉî(£˜bßù^…í`6;­2¡±Úaq:šïÂV õ®¹VÞÜ­»šÏýå‚ÃËÝf£ÂkÊx½ŸµC0ñGzjû°LÍúÝ @äÇ gœ(°ZºªZ§2˜uª2ó.Dø “„'“ˆ |@ûÁäÌ•+ª"Ãë|Þq)—ÈŠ9dÓߢ$È]™ìŠ…µguØÍâ±'¢ Ü.ªnÀlC“YBó^œwoݧo;¦ò›9J_¸ç@©*“y3ú¢Ó C”4À0þø2Â^“E‘§‘@9¦+u 1µ0¿#Ø–m(¥¢³0n0dÀCsÄwNÊ÷˜AZ{ÎX«ç|–c°[Ðø¦Š:*?Ô àÅð„‹š³Š‰†oöô,ôÇ%Îý§ÆÊhmzNÄ®z«yÂ)1 +j݉GáÅëhhë«ãÞBÐEaZÎÓýQØò:’dÉÎê5ä¯Ù©ysÉÝ5¢,ª·¯.~ ÷ (ŒY;>ޝI2ãøi}#%ÌÓô#,Ï„fÑ–*¯.øõmVZ¯ÐAöøŸøa «Í‹‘zrÉÍ8©Š·ïBF$—ù¼8Ofl=ÔJ=ýÑŽ3<ɘ?à®`¯efÖ†hD[”øP ’[Üü<ˆ×ÒC™uTŸÚ ›`˜Pœþi¾H%pWâµß {»ð= »T/ˆK§ã¿B½‰\‚Ô¦TÝÿ_ÆM@h$0Xz¹c„äø(‹r!Äà´äõ[†:ô‚v¸¦,£ƒÎ%s{ñ|H¾œHO.?žåš8¾'!ÿq¸Šž,üC…м’ìùâ‹j°¯yÅx¢6/½B<ö~Õ ‡)$ú*Q¦©ãmö¡]³*­ó_ O’œû¹QöH°/™ey£†#àIŒudþIš[AÞðtVlP,.#AÒzný\8áxRüÄjƒFçËF‰Hú´ |ú©UŠ*’ÙäϯüNQfûöûÄ2>v^wÙnA3À–Nq8¥u*\}F]zÜ礮>ËÚb(@Nž`VÆ÷Åô äj†.š1ÄÄkÓŸ-›ÜŒEuÍõ5»<ø€‡a¯öDU U«ÛêÈí‡Ý]$ýÇ_cÞã˜dåÐC-pøy{Èã°’ÃŽ«8b»dÑŠ*{ü9ß9¡ÔU²Lpo¥ãöìQ[.+µYö}ÂÈÀýšbz Igmè• ì‹«%º{)èùU_Ó)8;ph`¯œcLKAeu½¬Hín£áÍOü΃ «zG©žñr©9S"1CÛ«"^g±ýJ>ˆä 6bï~|eŒ$õÞúàÄ­…a~‡Î†üò)CdpŽmÝkgײ†øñÓ 7Ç’c&0ÎÀ;/Wo+ñÙ*E‡¥8ð’Á0xxd7ü2¯X8jÓèûÚ×QGÕg_ èƒvqœ®îŠÝvH çáÐ-†CŒ09l›‹‚Dô*Ç!Úß³£hƹìJ§—(ïÓSŸs)ÙRðìb?Ý1 ;Þfsë-*\L ÷9,rÀv¿aM¾Ñy¿o;g9F2Ì„… $wþr2®*>Õ•©x¤.ÚÚý0Ý`¶VFZšG‚³GØUo:yU=W‰YN݇“ý²Ð‰0NØÛºÅÚ)´¡ÊÔ•C|~ 7é äPöU¨9–x–$1•ƒQÞDZˆ“ÙNC8¼¤ TáwA4;î|wYx~”Ö÷ DdSìá!lWžqa F²öRʯÀã$°y쌚Ã)}Åüú¸Eüg²² ÄW…a¶GÓ†ú³åò^îð»Ð;ÀÝ‚ÉÝçö ž$= cj¸iöîkôõÅ""÷{Cé,°# DØ+wËPÍÛ2Å‘ïAÍw˰ô¨}$´ÒãÖØ=×rtœÉ6šOŸÅž‰Ø¦,(9Âu´¹ÐÀTµÆAÑO˜)¯bCã‘ìÉJ‡©%6Ñ+JøG?NÙ{ó‰¿Ôa/öuk[q3t±ëXÍ)2kå}üÅí6/oìý c›\•»þWAd°VâÂÜÑûË)~b]rÉó²Ò5Ês†­/Ý7NÔη›¿l¥^ž°âmî¶¢ä«Ûò)\ëíx6Š”}Rç5QzŽsòùp;¯ã)w²»0  ºÐ¬•åÆù ¡ß)ÌwÇs xE@=Ÿ4B@‚öºl?u*Ÿ;cÏøŠ VÁ´C¹Œ2‹i4=ÑC­~ù[£Ò¥Æ\jå¯Ã·E ÿwûæJ#rujñw¬C[´iTL-ƒÓ…­:4™— æ!Ý{7ФaÙoHwûïÒÎHÔáÀ ”íx:«ÍT_ƒâ2ÌçÞ…Þ×—ÕÔêêˆ"ö9™âÚÅC*›+ŠÀÿÙÝõÅl&•eZÎñ¤¦=8¬J8ƒ Ú=É´4Ü`èšüWJÊn ¿ù¤,QŽ L¿uEKaÿòSò‰þŒPfºº‚ž§=ÒpuoGvokj9-f¥Cœ].‘™°;# ÍßS{% $üˆOLÞMA O‘ “3F¼üWl°+>´˜Öç. "[QË%µª4{6#aù6þµ&þòtîž¹áxÙ€ôã° 0Þ&—Ô6@Óéž›6&wˆ0I¸8Þ8â i÷r¦Ò;Å×ÝÜélƒ/=:F:¡â –œØXRÈ¥» ˆc)íL~-›”¨ùJ¶ýcw”ÑOþ%³þêj>¨Ǻ¶¤òôÙ¼‚ÏÑ j8Ž‹”S¬}7 ó²q¶—Õaú!¶::^Xº<ªçãlu1Uàº(IL~¢ 6·y&åqÍîn‹~ e…—Æ?‰~„çkk|cDú•™rÓψ´Sš2FŽèܯP¤µ‘·³Yž¼Óš ïØÑ»¾X1¸¶Êð(ý8úœÝÌ[Î>æb=öÍ÷–»fh$¨#X‹p²óX)ÄM%áæ&ââIŒ:.>ç¥ý¼ë¾7^î- d›:x„“è¼óá¬C+Ë™]ãGŸ|åûºˆœ+<óÁ“ÇûKht h\åŸVáž-Qh4]27VqwÙ¡Mê-rˆûð€^Ö âšÚIBB†‡òGv= Êr|™ôü*žZÜVõv†j]/™¸4xOaÀ”iØ¡”†ë€€±ôÅE¬ ®'Kó{`í~ÅB¯ã™ ¥"·F9¤£E“(XÅyZjbtîeÉó™<u¤Ë¦ß¼ë,«¡ݪ ³\¿É’&SA‰«ßR´- Jyè¹C±YBή?¹ái´²œ^ƘÙdú…èÄŠ(¨ 'Dìm±†H„Øw )Ãâ—Ñ­’|¹žûȱ„Å ¹l *¢]Ä6ûÜ¿[8o.ÒõçpÀqèN!]˜R0`þ™ÜŒàñɪFªs+ê´Gs"À6’ÂÁóîši/¯$ÿ/€»3Ó\2ºL±»õß ö@°T&Ûíùæ+«þžõ†LÁüŽÁ6jdšx(‚@\Ú7­÷ %5¢1ˆtáï\:ÄöÖ¶{ $‚‡‘ì<ƒ/ºs`æúýÖt:T}KâJŠ‹û’ɽ´_…Êèð1.w˜ôÖ9 šìæ©\•0ÑÄê÷ÝÔ°s²WaÔ ò¦fÎ톓¡ƒ™¡}?¤i‹ýFI5§fô9y™)9´Ji€Â@½å:»¢~ŠÎÁ¼äÆš‡ü‚WY½r±â'„¶ Öÿ2æÛÑF*8÷…@ÁUï¥ß x{‘Ød )—ÞÞ}ºo˜ý´Î?‡‹ÎYé*²NX7 çß=zŠï×2Œë?Z®{×”+Á¢e§±ƒ•ãÕÍŽ^;ç p¬üBAå.ù¢XÀžúÓÍ‹ðîöëzéÂ練Š…£ò ¸ ºOèù/¸²,¹Gé_ ­)·¨NË©¬yïKÄYméê‹dŠÿ‹Ñ¸Iz‘ú¢J]©,Ûø1Ë×mråùWElcþ“‚+ZÔõ!Â=;†‰Ê*-\Vm&§ßñe(l!ê rÇ­±EJ¶“Ü?³a±Þ3eqQÿ}ZÏkækS3H ÅÏ¢q¥% -â |žs^©ûì8TÜIÉ&ÀÁù·®‚ãkæpxC¶}ÖŸ×cÏ,&˜`À{¡;_9@‹¸@O:ÀF)ª¹¢è4YQ°#(–ÝYŒÎÇ÷¾trµwúÛ´øv¼ÒrZq æ„Yk®ÑË‚ÇH žrÂÓ A|Ñz뾩ßâþïów¡Ž«7½T=]Ïæ96â™èôɸýó=îÛãÄTñ¼3â-³ø¸â€–Çâ*~Ì­²¢àß^ç¢Á³õŒŠ“üvq£ó4!\Ãò""éd¶Ê<±¢žÒlÁéô+V/ò‹1¦}òô½s<Ø÷Í_pe³'kCÇö†¸'Ê—½ˆVù†Q`¼nñ}•tG×X„`@9Ú6± 0v ƒœLÔæ¨Ž"ÑôˆÒ­òU6),ýbk1º «ƒV‘=¥Þ`YØŒAP~ ,ÇÛ<"âê^R ö&ÞÐOÚŒ´^ƒµdþ—€!L„%@0ÿCó ³ûæK½¥c™í³ÛH£" {VÉ̉#Iù¾2:n̽ù’Š M«¢ÕxÞ½žâùÙŒ2Õ£Oخµ/½°TB‹r±V‚–Gß}! šà+7N?e}·ÊúµåNÿ|š— æ`OãÍì82J#Ih–êiÅu‘x¹¨™šûÔâ&3ûv–0‹XÓ‰5¨en”󼙆˜By¸:H"B€]1†è»x6âœiëúB)?ËÜ”Á*,ù 0dv‰—ò"iÝÏôGÏ—*XzRG¼ƒk¶ýV^3?#תnýCþ?2½, @•\îZÚ“-Y¶Ãx|h‡á¡QÚ²"z†ÇŽÿ€ÀŽHg•`õ„ñ©¿yìygìÑ“‡YÉz(ç¶u›axUòž |øÇ”ö£>bTÇËÙ–®ÂÄûÅîêNù(ZäY³Ûç­Z”ß§GË=Ýûœ¿ˆãAÀw‚šêð |ÄæÚÛ†Üô ÀîóæÉè°Ãú†ïƒ\…dâFܧ M‹{èÞ«g£>Aø#TÍ(ÁORìÄ ¤2ü È aÛ¹þê l;K²4h³!,×ô½ßp#YFbGˆ+b«Ý(àZ‚Þ;Éô Ç7lL"Ý‚t9Ç}è·Îk{*—>´‡¹*?@qœÈEìg‚ ¼XPËy–‘¼ñùϱ'Qïƒ#\Y UCL?¿Â@dØÏSMyTöÔR°ÿ:¨¨){$þÃt&³5(èQ³;u‘‡îã¾Ä™õš‰)„4¸{týª*øŒ–µê–)ƒ,¢ m·ìUÎë[t|sÍö(»«w£eÙÖ:Ò·†âcÕ^€É…Ä|Á«q3Ë1[ÿ—¿$ÅöºF'Hµƒžð÷1H·[N¥Lwü•†Õ‰±â±\§;b ]6d”*{à¦@¤tTx\|çf|ÝçQß5cÔ4 ÿY1}veË—g­ ›Ž–L¾¡;¨Á9H…lGT™`ùñ |*ýP‡­P¼Ê5}zo:JudT/uƒOb¢©Eþ<î3†vš ¹ ÒÉ„åLSæ7ûh‘ÓÖd ¸d@†PÁtûè!$dEo0XÉÈÀΟŒÀÖ2!H2&óÂdLVÀXéfD6†E¯0E4dSf¤ÁJ¦ “¶€»c>&c E 0K™,¦^£"w2B ¤dÌã` Àd@Ì‘W@ŒÈ#Ì€ƒ,™×š TLˆsL„WE똜1ô1«2ã€@"ˆ:™" ù‚Rûp6ó«ßζws"-¹ˆÃˆÈ;Í b<ˆ°æXÃ!º0`À.]’H†”Ö0_³RÌWŒMgC’`S¤b®d˜0˜2a@,d:fG5D=I€`™`€Ô{ØqRO÷Tò·’Û9C®O¦# ÕÁ·ñ¤ ^‘Í ðaFËBŒ ‚ÚšÓ ƒƒ8d“Ö!4¤7™\L]Ì£ X̘2›:AÕ˜£!=áøÃ#~dX³"Õ™w ìȆäÀÝØcwhd@ •ÌɸÈ~ 9³˜Ô`€ëŸ(À½†Š0B€d@†ÐÀæẄ4FPFDE aÀÌÁ2## ™™ƒ0 À"3"d3Fd_L.–Ô–07Øh€ »Êufž0âd2¦BpÈ|Áb ˆ‡À"¥˜!¦0‹2"DVS-Á€<Àzd3æiL€+AÖÌgL3A‘ B”döœÉó5ÇÐ2»ÙI Yñ(fD@ÌPЀ À#2"3 fDA D@Ì# R±× —02† Òe ¯Tnµ-,Ù~¹#Š™†R¨é˜ëø–HéòQþëΧùô¢§ˆc!¿§äùŽO>¦Ø”׸)Ýš ´Þ¢qùø.uèàMF†ì´竚ä€Þ9Nû1£Ð9R2ˆ%Ûzá|cüh'hPúË "+ÀsÒ.{Ô/¾DRÌxLšFFGYj„Üj(Iƒ1&¿é(1vBÉ#d”€“""í2I¤í 3#0fPFFa$" Á›4BOg¶ãkdo¿ZÜ«83ˆÊ2€j`ÌXL€ Öd@k0@Šü` › JÕÁ½éacñŸf}˜6´ÚäDR³4ÌV]^òbœ'ak…›‚Œ@&‹bЏßE}c º—ÌE¥0#&§±õÃÞ{]dÿëMï|É„ªO¢‹—¯ó/¨}~yí,¹‹Œ[ÈÍÿåjôVqsaUê[ÔjQªVÛy yï‹´÷ ŸZS†X'6Àê¬ ›;¬ÔÇZ^§×ÅÓ —=gIã ï8ö'æ(t‘÷^¯³õñG˜/Gý:²såKR’^ :WŽ%ÖÔüÀ>kžìNMÏÃüdZésa!x)ßcÈŒ¸N®Í}ÿºWÀŽMþèܰßz¹Í'ÈVJ;ÃfH騱YAÚ<×o®à¤Wüð¾]—ÿ=èßnÔ&‰ÆGf™Ÿß/ä«U²‹VÐh`‘U‘µpßð#Ø·óØD/²[šàïÙÕç¢ã˜ø,ix Çô´ûÙ²TUé Z®›ÏyОàöÔ‰n¥ˆ½®Àšê„I•ù‘>ƒÌÎË‘<Ãà_E?qñNú’¿µe%…Õ[¡½Ï³rÔÓÇ3Çz«FËã©NRÙø¬U\s×°ÍFÃê¦#šXàj»Ö† ‰°¡Ó`C<ªf7Æi^«­ýßWÇ"ƒ÷â×Ûö]{QZ”Bç]íÀ›²:ÆýÞ¦½}Uh^=zòy¯_#”:ÆÕ]=gæàÑØ(óÙ| ½Zª‰•‚:èþâ{=šÒ<Ö.C Ä;…É^·¸ž¯c-®~ Û“&´œi—­áÔáÐ`<ÓÃ0˜TW_âUÛ;ÜwŽ\s†îœî¾:õ·kNBáµ)eÑÄ¢û`a/ÐÚ"~ºË¿.À£)ït»ƒ‰ˆRñ!8Ã…«°Uƒ!M_>Ò(êît9ðmÏ$¨ÊØT^§½_ïëàÈmÍc-3/Kü-Õ~…³ßÊdæXnMY#QoèÓ7÷ŽJ‡ÔãzNÉ)íFkÏ\_0ÛJ½ÛÔ¼ñÓàá¼d=øÞד¶rôŶØòÀË#-"‚;(Ô®Ÿ1e ´Í ë0W‹˜µÈ±jo¼«OâÓÛßçïØAö[Ø5_.+8@i©«¯¢€ò—5öëæé6jv,E_N·Wç,êá¶Ý§y*œæ ߀„ ½ŠÃRbù°ôã+ñ‹W’`VâLP—ÿJ£zêÓºË‡ÏÆX矽Ïßùëz5À›¹ ýMçs/MË<<¦™ŸlÓ{ƒ¿²Älìý…ª| ø ék § ½¼Jº1Bõ‚¨"/ÚÄhfÊT o÷ZØLy"O RP+àYùLD@iÙ‚¼úp².‘*1_Ì—ý˜>Ä”÷®\æ–% Wߟêìôu]cš¨„JxASýBÌ6 bádZ):n;GÍ«û¶@žÀÙ6׃crNò“xv™Z³ÈM²œ‹U)¿Ž…dTÓPÃÃÐ&"}LY숨ë‘ïHÄ6@¦î÷ÂïIç aNê`wŽ*ªŠ'‹l—÷$ üŠcõOiÜc¢~Tô8 "yÀçô¨'7Ø–ýÂC·²ð丛uwO•bÒß•@0²%^±uN]}–.ÀˆœaÁî¬ð¼ " ÝgRÆ9—ÞÅd}ô–þìS0†ËFHúNŸ¿°ð¿±†$@^¦%˜‰Â.š%QSCÉš–Œà@ „H=!)j¯—̈­ùƨ·âIlKã\;yP¦b*¸DåÃ÷ÖÉj¤¤Ø¸Ëà}Iòƒ@¦êÂ÷I÷×°×Gq,Û Q._£ï¸¨ØðËñ jˆP_͌ˌxʰC àßéòó MñK Ô{i ¥¢5jDÕFèx1wY¬¥gžö7‚7ƒý×盀ä O‚Zy…©,égÓÐÏ'c¡KH¯Qà/­@’ÜöÉ'%qÒðûbô°º‚¥Øå=|Çç„ eÅqqUdÏö C/1ò«hÌ)å‹FcŒº  Ps^¡ï2ÈÛkñŽŸ¤\[?XŸ˜¦[[Òƒ%!zÀ¶Ï³¿ÿ”it½”5}vÝb6Oü|¢Å½Qam—’ˆ0šŠ[©7ö.z[7‘kŒÑD2ÔgüwZ ãTó@OoJÐæl)-´$ô?¸-ñÈéhI÷oP*¤3|‹"¾±ÒSÃ" æŸqˆfYnª{‚º/ªaO°Äø/;“¥ìƒ&¢ ACr2 Ø™¦5 8ÏÑ#ÍèF¬öÙTj?ZOÓzÞŽ¿ëGr¼u˜Á¸„§AnÕà׆s½<0¿ÞÊß$Íð$†Ð@"wÈtU V)LŽøŠRU³‰<`Ú6Ù»Š‹lBý¸r‚L"ÿrø_+ëGå7ëâ]¦SÖ QŒµuø¼hr ÎŒ_­ˆŽ8^þ.í-Њ¸!ƒ$#íy+ºfAÓ¡xyÁþ¤ÒÝj:T„3C–¡3üwGæ‚ÕhÂ;5¼ºüû–Ê™$iÂüÌ:ðjSß_¡ñ¹ANö9b'¹Í:#M°]šŠê„j2È7B¼Çi2Ö ¬&.s]38Ëçh«œ Š#£U²ä”6 veg³{€iq%¾a3—a"æXÔú»?OawË®^êrvë‡ïqVØŽ,ûŒ’‡Ö¡¹7™|(ÆÈo—ÌÞÔäèb«%Ön=ØÐøo¸^÷æV}y#/ã¦÷YN:¬®¼ÚwÎ>BSw¬}ð²–²QýmÝ{äG’ÿƒÜ¥¶^ì4Œ ·ƒ@øüªœÚŠcnöÀ…žFÐÁùL›Çûϵ¬÷» <Ê6Ž ¼…iªí@ vì àPªAÏ]£bs€Œ ÏJ™ˆÈ üóšã¹Â"ˆ)'ôªâ(ŠìðxgZ(#S§a^Œõâì=‹×™:=ƒ¦*i£âë¹¼:€ŠÊРçyÁÊ!c+͘ ­v?©ÿnmøÍˆŸÎ† 'ÌÌÙ,|FÎÞ¿Úsh¨éɘ@$Ÿ Iƒ«`Og:&ì@ ü¢¶n#ã™ ¢@@“Z™”_Ç)Æ2@MpFá xp9Ayj„&o¼ð¼¡ B ML3 R«}üaøc¦“ ò1CØ$v·P/€Ä5 4ÿ:8Oȯ˜Û‚Y§¨°3±ÞÍox/¾~?îY°·ˆÃ‡[òì€ûѸö8^™Æìø[–‹+– Ž}: ‘p΢ âsÓICÙŸTðçÓiOÒ+A–¢Wý\Úg;ÊÎójß¹@”Ý£™‚@Í‘Ø3ûÛ$tûQØÍlS#Ò¥Î-m0÷€Ýç±w í½é~ÿÑa.wƒñžKÇYNÛ÷„LA÷~Í'U°ƒ•ì­ãéJ=©"fÊ‹|×}:k¥¥:¤öSˆ&WÅz·ØE·5ó¡œûÃ1õÙan1Ð7¸–Bº¾ûLºú¨/Çcï €PLgÞ4k_»ó×vÚ$%GAw0—˜ýi€äÄ ˜P‹L ",?1þÍöc ÆCŒor9ܰƒM»~ÝÕÜ»zjÖ}rewV¸¹px×Cq1g›ô-—x¼æ'û‰êߣ@¸ðõ'\øË:?}ïŠÚšs -Æw:¯ƒ=6B„&—øÑÄGïm» "4¸Ë ¤—ZåéoRtm¸ ;ÒëÖˆ¼à^}½•\R‡eÂhLV—Ü®Úáb±VŸ@ÈCÓ` 5=Oä­Úºï9ep¿ÜØÆ,ãíhà]hîÑÿL°Ëßìèé‰<§å J8êéàè"Ý´ý–€ß.ÂBˆ{½WÉ&.  5#¥Á? â@`#Ýô,Ü5çlÑ@¬™b©¨XE"¡ÍØ97£Ò{ñúœÚñN~æ>>nh öçš;w,ÖÕÛº óýEÝéìám ñm‘1ÖG•4káËY$HÔiš ÷\åupôÊÐÆÛã i]HôÍ•ÏÏ·´ŸÏAøF½´ Ñÿ=³Ê\˜úæªSBì{}„5(Á¹‘®Úåivõ½2qÑ“ÝųZ[ëÝ»éû7¹¡«ÔÝ QÕ»5¾˜†æYŽTfçW¶ÆdÖs1®ôWÅèCé'ø%‡èËË›oj‚W˜ ?. ]L:Nã9µëmŒæS&Kz§Ñ†«v“Y‘”ç#Tù éö =«®%O×®Âch%0hí¢£ øÇw„ùɧªQ}ðó¦ì üÖ }O`ÄO8O¼XPûé–êý—¸*°ùŒHàÓï…PÇPÑÖ[{j²Ãø¿j¼M ¯'˜‡äã/ڥƟ'wþÄŠQ[‰£‘Î~PsŸßôŸÑ›Æx@i °*ûràÓ7òiwóÒÆ&„/ú$u? €–#¿@HÅðÀÔܶó˜¡" 0Ju×g2¼×s¸*". ‡çÁãD*vhA% â uæt€&káècÈAÄ%tæm†Žé@Gf›]ác/uЗP‚° Y'÷'Ci —[ïu*ô ÆÁXvÞÝcö3 2V"˜[†ÞÓ(ÛaCô¿IÎóûœÔÉÜéÿ|fù­³+·rF1Êå+ó®rLöý jç{S1ú: ÷ÜD µ‘ÿm1¦ã—µõ°Íò‹wü ÞíeW¸z³)³E­)3¯ˆÕœ†g éÛx·ºlŸƒ'ײµÜ: ­ðȪaôH-©C’¯i4×-»?9Ь•Ô’X@üáYÈŠÀ®R®³rÍDÄg¥ZÄ@°ÿÖ™%žA¸‡ãàÖ/îÏ‹ü=× GÓ1g”ú-™{Çî'é‹–-îJ_j_TbÄ÷¾Âæ°1J>ׯ¿ÒžZLÙ1]j ”ý«Œ'OÊ­ôs–ݷų}S@ö{Ò¦vÉpý«säªv@ž÷†tqÒÌ;ïWŠ2GÎ=ËAšá9SAÚÝ7ŸTÊ¢X à€ Žð@FÇCýÌûVxkñNŒ: ¬3’ /Y“aΑ¶Ú÷>ƒ ºÏãvRøÆìŸ—žïÔº˜HÐso²wDቸß’Oúäüdꮄo‚ $T= ­Á@â-øXšÝE£€”­Å@äAù‚ ûÂCò¾º!iþ÷%,&Ì"ðO°ºª» aôö/`,­8{›9Wª¤ÙÎ+z¢öo±“S°¨C= »E¥ãTý`Ïü8ìGÓ$srxÿ?ÚcŸ÷épyFˆgUu÷ͤZøçÁC¥“=ož.½áYÇ£þ’~9A}s’‡ä6G9s½Îä¦~Ï;[WAU¥KíµAÍöÌDlÞ¡X dÒæ[+–‹£g’N{ÙÕò/¬õ# |>†·á¬X<êÑñ=VI¾,Yoz·0’öè‘õw¹' ~º<†ª¾³¼Z3çø2Íh?©I²8?mGµZ>9fï’É÷÷aA¤3‚ lѬK0Œ•DÒ>pÇb É ñ¼ø@K…±v Ž»/wÀ<#ƒÐŒe^æœç u0óˆÞÀ búˆÿz6†åÄßô”6@áN¸wèî~åC¢c³Çöÿöæ¾¶S€œ“û×z§DcÞªm‰CVo;Œ_„…éaºúdSçÅ?üå­Û¦54ïÝ%¼¿à,¸’zš"è1€U‚T @.*@ÞA©$šõ"³:È«‡¥£“l1|DÀâ¢Ë€FFGë%‹©*éʲ³˜ÔÌ´×n´Wþ‰Nå±8Ý :xÄd v"Se=!¢TmWˆ)©¬²5¶˜äƒpàˆ÷„«Ì`ÌÅÝ}îXÀcŸ„9°]¨ŸÀYcv!Q[“€(]†·ª &±å¡Ûø ñK{Š÷²8¿ð›cãÜÎ"gË @&àŠ 0vÕ ¡;ø³Þìdg¢È€‹/ú{Ù:¡Ø$·‡¦ƒpÌÚ­0d®á0#ìÝ-î¹B½ JqÀ/`Š_ÓÑ"€h"á@Æ9´íFã†_h ‹cQ_,) EÝÀ!õÃjý½VbèàyB#ƒÆìVŸð"¡]—AÌí¼xß\¶qýIl8„e·ø'Šª–‘+1“®ƒôX? dÀË 6…™2µR1R¶ÿÖb¥hPñw^Öš::KÎÅTå³bå6ç‹øÕèùv÷/Ðý±Z1;e„åWÖ_øs+ß;ę̀`&á¨P¥nªn1[8B=¥6têWåub_d¡T€x< ÊŠx‰€¾ä<˜ 'žOY»–¦À{Á%¢a¼y§š‘f‡õ¶)çíqmä»)­t €¹¹Ü=U°² àyÌýZ˜Ã%÷¢ €Í‘vĉùÅ­”fß[k•>”ÅfŒLß±‚ƒ2"osÀÕ`á§f¶zc†¦×/|¨~Ñ2¡Ro'®SŠD)‹´bì#ç.#0v<èo?N fO‡mÆŽ«02_…9˜‚¬oËéæ´•¶ûB~=u‘ÀæW.úî‚Ú]¿qô^²ç쯲&RaTåH»4ýNËbtÍr@J$Ó6Ÿû¾¯cåÒ¾HOß_*u#B^Åal ç«qïPùOLß+uÅŠ$RÝèd˜‚;]_ŸöŠN©-Œú ‰\« 6À¬Î‚õ+¿ˆQ=Ö °Ì;†UQt ~N)&ºHÈb¼sYqàYAšLÓS­CÁˆ^à «)¬qéE˜Bc¬È5`““è aC‰º0¿E¥€w9· z»Æà½N¹œó|!Âñ‡@ÕÇé§×ñ¼Kéþü„”~!c¼¾"Ã×F“QiNLǕӚN;u­¹¬½Ý n¦l”χƒ1+=ð·áÚñNþcŸ¸q„P„= õbt«¹Œ¢g(bí¸uNÀúf)w %£õªp»7û9¯mû]7OPìÕ]Ae!õËG4'ÛÙùêÊá,õíüÛ$ÿ6^l¡@UºtnD­¨ÆÕ(¬¾°0y2Vvؽï‚uôC`I>ŒÐj4\ˆ0 ‰·E¸`,„f u(Öß^^]¡ ·íáËû8üvB»=î†.MÊ‚kÒác².ò‚ufþÈêð¨\èq„*0˜¶©ÆsüëRµÝZO©ÍØáÓlß!æGˆ•[LLzPê~xì’vPÛÎ2›á²Ûjñ¹®†adÈI‘‹b›ög{×Ùœ*D—âȶCP~Ñ7°U ‹Gí¸Fˆ –dK9wŒFä÷ùƒ"Ð$³+Šùˆr}²ÆY]2m³|¨#“Ùm±¦qFë7"äñ1ÆA`nbꈯÛì–µõ$ŸûFêÚ‘SЦ”ÙC‰ãžÑ.ewN=®í”_1¨œ®î;Û¤06\k[’¢Fíú‚Œïäowíæõ#ú`wOu†Èª áÍ÷ÿ´Ù¿Ã,P’\|õ¨rN†ˆˆ·\ìêˆHFosyz@$Éïvƒ`F”–Y~ÀÄ WìàH'f¦™ã„×)çÎOaG€2;mhå#"ùÏP•vA¯IÂ'M¶Ѩ »m³>õdÝÓ¥*Ìôsú*×V› ¡eú8ë?5.}¯–záu\C1ÒVÀé´”S4ÈÕ›‘×Üþ¬s“\ãÝ :Éx~/’²Ñý¾& €¿±I–n^½-7…V0ãO¦}\ê§ŒŠ¦dD=Ž’éàä7 „Сce Ä&o çpQ#2jÝ3må6ú'¹,Úñf4'¸œS@ÚÓáoýàí;g& Z5ˆÉ9q¯û¬sWº^~(¦wr‘™’ùñZê/O±üWÿ€Ø[<¶]³õßsï[U~ÀðsPŒÊP¹*eÈ` 5m©ïrnîÊ/îIC¨Ò™¤Pkˆ“ç¡æ'%râj×iG zŽ1˜6À^hº&ÖÙGc¾ãfxsâŸaA##?ôíãÉÆ´VEl1ŽÏeíâÿ¼ý¶^LÁcqtÚ¬® ùñBE?·”Ôi¯uÍ<†)ÄÓ‰6ÂÅ!k–ôÃI +tu3Âqt׈²ˆ‹kNh‡R e¸P=p0(šµ0®„÷(ò¶|Ì”l¯y’”¬¿ñ= Šš¼yÄCQe?IºÁ>ð€ßðÙ0EW¡™5ðñ§Ts*É×5]´yR—@‘ ÏXW.5Û 6•Å;FG˜¹¯"Ç{¥ëc²Ò¿äè`~Í\²¯‘Ç(%Š^u[Ýš.Þˆ äÏÌ×!zpO.‘ÃF`0vœ’ˆSAo_ ê˜jÜK§1TÓo¨73Âpø‹Nw„…Þñ§`œj[ß6ã-È’€•|´õösÆîÛBK‰ó‹wu;;µI\UðPQë¡s8žåx¼ –•ÇX ÁÃÏMZ3²ZÆFÌËñûð.2}ÈûÒ1v\¢op~ZÙàÎhU0#0¬eÀ³ ¸èxM‡GˆgxñË`×÷B$1Ð ÞÆn—@ ùœßDÜm´ìÈ;Ùä([ÚÕç—&Iª=‡¦ŸUIå9ñ‘à†¡M33ÊžóíÀ+3Ä€½d–ƒ§ï“ˆ×ªÆxC`À’]M+¬In¿oÆáµÏC„&Añvù¼Â bÏMOÁó_Æ ðM6"½£IQYònW[f âÄ^µŽ"¤ŒWM°Ëƒ“ÎÜ•ÑÊD%ßÇyÐó—*Î~%C¤ö˜èí¶aàq c ö Æ³Š:½+Q™øÕ”Ùåø¯<¸ÐáOB"väzP5~Zà ß»¨KàÿpÃó‚@ŸY¯Î! sÇã(š¤c–JæM<Ó/ªÏˆ ' ¶ß!}¥B‹;åµ,`ÐÆ¡bƒ«—xkŠâò·—+>'â (°@ˆºùyÎ{éôªþóÐ@£)ÀI»HÒ1ÂEÇ„×lÌn w.³…lû÷È峉«øg&´q³‰ñ™|7߈ÍjÁÄðL¥,{5©g4t¨Cyø“Qðì a²½ì“”§õ¾.ÊOòãeSÃÉaŸWJ (˜>}¶¦Q‘wîD; ˆ~œºœ©:piBü¡ßi÷È- êA"¹Àv¯ÈbñA'ZÖ3wz"§ƒŠF~h*wqÓÊÞ‰ÖÏæ¿ÄЇm?†<Ÿ!2Ù½Œ¨/÷E±[ùHBhžTôL ƒtaC·™ªÅßëaᜑþÈ>ÚOßWŒXNù_¬_¼¼t9ß Š°9ãåòB®Z‹v×Õ% £æÞˆ"ÕÀV_1‰Ñ~]~Qµlÿ-!±{]±Ïê«b ´0#!11Çæ%ï3jjš’´Ó Z€Hêâ9ïíW%Š&–¥ºøM”ùÐæÓéœ'°ÅªO³7ìEëÚ(ø‡²kã,©Š¡J`ÆB®Lþû€|Cð)•&¾?Ï×kÎu ›Q·Àû`Ë“©²ÇGc+j‡WÉ‹%åõeõôf]ª!îmÇô‡IÓвÒf§Q£ï¶Ièø°GzûµM5ÊR_-%w$îò¬¼Ä¹¦É‹‘kEs§QÒÁ&g ë¥ÈÞŽw¢Ælþár0–àW_ÁÅ=móÜn|‰ÿÄU1\« Æî¢Lö¯@éëõv ±8ýjf¡%o‹ßà8Ëï¼+[Ec˜Ÿ—:ÃÄ*÷ÅÎÝç̬yïWÂjŸýÃ8:Èw#–µ×á^£w¨Ð¢ë×'ËnSQ§fëG6zV]Qðˆ KÚÞ=¸]Qz[ä@J\± B“Œ-%Ä»?¿ªÃ(;¹;-‡^–õø¹¿|M²Ó*ý¿Aä%4ˆ*å¹ ·`„ã¹yßÂñãµìź¿ØÎ™XÚ¶ LëAÁ |42â<%…éPœÄšnáL)„Ø 8ЧâBˆ·îIãfYTßrý9}ÙHö´¶4—ÒàL…A,ófýº!]…Q"s1‹ØF¹Ð–Þ*0Ç¥û-¦YãTã^Vz{Úó˜æ¨ó ¥ŸãŸø¤4^DéW.æ•XÛÜ‚Kö~íŽ7­Ý•ÇiÉ‹õ1‹©mþ²®ðÕ3Ã.€©qŸíE]Ö»oý‘d‡ÜC‡À B @‚§sš#;Ë,rð;–â7Ê™–¾z²“¨uP0Bà7ˆ ‘5‘tcCÛ žßÛÈüþ(0Ï$ò’|kÝá¬ë8 ²1Fë´.©Àœ‘}Žœ,ò´Éä op{àL!\;ç»ÉŽ‚^ ˆ¬‚€Ôûè!)þèçé–)Dù‘9—Ť۳„±æ.({uzÒ7ÚÇ'¸G.Ÿ* `²ü‡© ëÞ•ñæ‰z: Üc€ó¸f²¸`§˜zGXëó†{ ÛÛÉìŠpé /×Aë%á#]†æ­ç¨®@…ñ3þ>oyÊÿŸÝ%×…”«Äõmp,TO_qî°ª1ßcû5bNæ‚ï½è¤ÍË=ßѦæ[.íx×i€YS [jS/ÕcK!±ÿÛL1y_Îâ‚+ðQú!LJ“%ØÌÛ=\*µ0šßY#àðd°t-‡ìüô2¤§›•W£-Øúõâùfo$d$=[„Æ–1œÚuYõ-Qå®zzüeÇO]t†;IÝÏé>¸Ð‡wúœ°§¸º”}—ƒŽ¢rJ¦2œ£»Ãáf÷]’*ÌAªŽHp õÌÍ0ý¼Í€úøµƒ &Áøk!¹Í"uêÆ¥ü0N7‡jsÀ²¯õ2Ÿ î0`-†žëÚ OÔ >I‰ú2«ÐLÞj­«aŸ°oi=kì·qÕj©Ô‘ÒCæ–ùò±åëij8c§Í¤i¦3‰ãI uÚ ZîïþÌb±P¼8kZ–¼ž÷Ú‚i$å7†'9|;ôMW¬-¶§YÞ.àr€O´Áó •èÅ$¤”"]0i6r¶ ±n¬ŠvÐè‘×¢rbú±õï«[-vI¨úÞÑÃŒƒðh4Æce5ŒK*Wå:c)ižOÙBÞ|‰Žªh€Ñ8øæ!oί ùO6z>U?ÙbòÑ=ƒÅêÄ¿ R[oÕ‡dbƒY2]…€u¦ùF,uOIPzX<îØßj²$¡C²ó‡½Ývdnñ2^` ±Eb¶Í=SW¤ þ?6¬¥‘In´s|Xn¬Oi¦ŸÉUZ²†ÈÄþï¥Ð[Ö7ÊMPY]AnFdù çžT@w‚žü‡b¤°Ú„0‚PŽ Ï–ˆ«ƒö³¦!!€B͸‹`ªSߘ®)10ô¿mJL×Î\›98Uµ¦4ù¿¢y7¤ .÷±0‰„¶GÂZÞ0€H Ó$#è8ªLoM‚À¦w [ú Pò át³éŒ7’:¹v `1½H|æžöåu,Çá÷J¿¾oêl¥{|4Tq½ôŽ_Š="½‹ïWMgGA—‘ëOÙ‘’@¡$Ô1þ …Gtާˆ„ûŸÖ.ŸÞÀúj¬€ËY!ã@ª‡»x0°ÁçÒ8|…"®“ÁujÔ[N_»¥Ñ%½‘+ ¢8r×ûX™d@´-¢¦ÛqøåÝþ"2ÉߨC³×´#ˆ k¢ƒ°k ZèKfîkºuN®É7„Ûcе¶\jñ>2äÀÈŽÌ¿¡aG·ø›Â ùODÝkéÊä÷Õ=÷Ï_HýxÎàγ*ï4Ê”ÿ›ê¼ÎIòû›»Îˆ""l Ó´ß‹½Go°üií•Í­1ô+R€ŠùØR2Z¹MJhòkñ¿(ÕÔzIÎ{ú^U\}ÈT¦y¶9- å”f>ø¤B˜ÈK#zh‰Aµ¹fóPo_÷¥‡H6£¤çîÏç+‘ò‚p#Ɔ‰&¾« ßn@¾»©yÛðÀ÷·ÏKhr½dm$â/Ä@Á91¡–!NÇÐñS‹¾Êb†nSõÀ‚ƒ]µç§Ûì]UŠðC„1|’KD¤÷¦EæN«®8½„-¦xÀÛ&9>H¯ðìÃEHZÛà 1õ…‚ û(2]ñ\¡k„›oúŠéŠß»5i‚Uî!5*dÛáySžÐ ÿ¿-¿ïÿ4y]—‰&ë$¢c]ðˆÛµ^Lé´ñ4Ò¹š'XÀz]ÅGöF“g~-–ý¬9͵úɯàù.V½:e7(•š’Èd‰ Ükg ?q¤ª5О¼úkwÈ…zô›- \ÿUÎ>^›%Œÿ§î>êý-Ý&X.r;§î‹ÙX>HÝüCJt0³žÆYšûŠ`Uö÷©çfÀåm8ÞÝØábH’ÞšLżR¤\?oèd>áÁ¶A‡Mœ²æ'jÈ'J8xøìN¤i†(ÇQ>(™ä/ýy¾rûåvo=IX:)T5˜Ù‹…¦, @* €›É; /ÕÑ«nÒ‚)Ðw.öa—h ˆÊ&Pp€Ÿ‹ RÐÂÑL ­î}r&-qE t-»x@qFÎï Ëø³ûlÇ uP]¿Y/k;øOÅLß,h²a­«„~/¡æ²ì—>Ñk'ˆ#ô¥ mïpky·,Pƒæk ?DøG›à€[Z>ɸ\Õvº®ÈÑÔpçb˜èC y‹ûù‹Eý€&[R$kÜ 1(]>.¾à†ðEèMfµ¶©Ô»#‹¹T6Ím°50_}¡¢9d²Aj5áôÿjL¢÷èÕy= e¯ztV¢bþ­æ¸#îaÂiè'Ò®·«Â$§+iŸ™“ªøF~^ VœDÁƒz^ŽDmÛI×ð‡¤šòNÞzGç¹~½´é¡æÌ-™Uvk30ß P ˆ€E ÁNC>ý–ýLlú‰955v€Ì{6­×©¡ä½»?ð¥¤÷¹ ]xbÂŒm–º×}º²ƒnÈŽ·# É ïs¥4^JÐ)@@ÁO(TÁK%ž “EŽÖ,YPŽ~(/ŠµÌ‰$Ü/hžÔŒÿÇ!—œ>Ì…íTJ…F T'êû>˜7ñX¬+”~Ûìþ$b -‚NWBû¶ Z¶|öø«ŠóahîHö´}â‰znjT¼ú]!­@üÁîÙd+:!›¸×ް 2®Ð€zÞ *Çñm-Á½eXœ'ñ!Ÿél¢†´Ät#E領ë´µuÓÈÇ5N¸Ë‡xŧ!¼ë:8›¯KÓ!õE¯ÞˆÓ=R6uöKOÖëæ“1É"g­êÒ•´÷û‡rÑØ8Åáúe ›öÅà ü’eœañTöä½:Þ í«.û 6%a•xçŒbÌtçåàþ•Äî«r+Âl«®²›î|õ$ü38›?VÄ,“ç–jë:hâïú ø,ZÕׄ"—Œ jþü[3Õ-vë¹·ºp©ÇßiœRæw“V!ÈÝWÃzzµÃGDÅÆ½ÛâÇþ©ýŒUª~ãß÷_#˜e`]³b¢êÊ2˜æa¬Bôý zÝØ)l%—£¿gü2sa°3B†ü…Íxñ8­ÉâK&Ø8C‘F%ó¦r’*o&Ú˜e'Wv¸B%²¥Ò:JéQ‡ îy¯"€`ã± ®à#¨FZ¹ª¶‰7ËÄûkòNdpì$ 0vz‚òÃØ»#dè î`ÞwÿÓÙçÛ€t^x{ÜÆ$­G€8þ¦Jd þŸ‰Î!ž‰jz—±E@ôr¶Fjï£ÃŒßðìó>·hSœ´el_|Zà,<ºe@1&õj ´Wà?œ€°ýÍ ÚœìÑx~ÂÏÍsâÒÜÅ8ÆA½÷ ‘{iaÊš¶¿´Ç”¬1àÏdíØ²r9~‡|5©=ZêéÆ#?- á»Q:¶x%Sœ¹dù˜ª½pYNp¨î|Òœ\íáâïÄóe~h``VÖ Èï '„"‰Ð”.D^Ûc°³žŽ8Öƒ‹•9WÙ‹øÕ)(S¦FL6U !–íÉÈyˆ~4èÜy)ïðð|pã‚5õÒ–‚ì(èçH§Í,«<³ÈÍàa@Ï`qÐù`K/W}t}aÍž W2" †ŒzîÒùJ‘ÚÞ?KgI ,¨áޝÇþ7a¹‹è—‡ö‚¼Ü0ÍF]'P½»Û¾qzh?n:¼ÀÜátUE6Ù:<–íîÒ ïÆÍõ[ù»èñ›X'^ì§[ìÔàm¼¯^A‹û¥P‘$‹¦¦u¡ :Z(‹T)U~i>ý9ŠTºM›˜÷±/ ¨:’³ôIÀ@°/Ê.r_¾bÀh’Ê:$¥Ï™º02+šÝ̵þ™7#Xíš•‹BALIø%yÎ2ð’§»aë]…YãN“Û , 툊j‚uñÀœJVÕ®”ðð½= )ÑÅEõ¸ÀÒ¥Ì\˱ã Î‰*ý„ Ÿhn§fú>Ô®zá¼!†zŸ°ÎãÚȱ4ä_4¨qµêʸý4 íÝ8‰A8I‰.÷¢¶¦[ò›)ºál©¾DÇ]KÅJFD¼tBÂܽóâ©óo2³œ±ÉUº!§HkØ÷Y{Å€}¡ùÊÊIµ$Y4£¾Aí‡ X­Æ"oYÛ&lÞø¯4ú„ìÏõ‡îÇÛ‡wFz—é*¥q'AÍê ­·z \ÙàòöÈÚDÂ× j_hxü¸ÊQ5—=­¼ý­ ߦ šo \UÑŒ\7a"õi4Í{ú}©Äp4ÛyV}Ö†Þš= TTüLŽótŒ&æ›ÍiwD®tY婸2o‡ÎÍžÚqs. SÆ…ã_O§ŸWÞ°ƒbü›Úнª+ÑP|ùÝxöö7,’Mð:«6y6óRà߬ÝóŸLùNÈñcû“žŽñôáP/9Ö+^¯[KXy Yh‹nfÐ/JËüžs÷“ÏÙZAæ´ã°ÜçyÑRùXPÏ þ†@ª.5€“F¾qëY‰7ûû˜=²GÐü‡žä pŒ½¹ìZsÙn©‘žnUoê¶jaýn¹HŽžw/°†iŠåÿPSs=„¤ ß¹åŒE)§Ê=«K%Ï擊¦Dü­@g¦Þ]ÕËNŠÝøt{å@Y¯Ä(¼oºÇåˆÀTmRá©d>˜,x]!6†dbÒÑÛ@&~Júì¿A@Blq*~?"€5é £g5 ¢ˆ8ô0xLÅ]‹æ»7g©b¼"ݰ GaCêß{_yºèN•à§úÂä.zxi·Ö‚ÏóÊ‘À| ºDØ{h¼fFñA;šm›Éž+|'Äêá?š´1–é~|Æ¥Ìýý±êãÀºï£‡S-`Ó1›ù³GßÉR¥)'’Ë­rGÀè| 2`þ8w×\·±”OñýÞÜMë/]R†<õWÚë–zñíÔ3ižÝHj!˜sëÿœ}9‰’|€sXýpþ;¸>Œ" ŠãšžBÔ”åÛü$U0{ñ!;õ¢õœè…¾ @nD\=I”µ¦Mþï¶2Ž'Ê.L½¯9¹ÜBC|[Ù1¹äÍhž”l ÀF¹p9–µóÜ8–xWe’ªë ¤,8€{®µâ9Ö>M”¹–V'¬h£_Î\çOɘB.P "‘xUÁA]jcbÀïÝ ‚ëJ¡5 {íö3Ù…oN÷¾&¶ª¢r4üÒh’Éb´ò¥~]ñ`&ÁNë><Äôøaó& )<Öyú„E-©±‚¨ŠWÿeê †GrÙjm-íˆ)"Î8xwA³„È;ð%{{M.Š^1ùàÏKôRSçp5çÙ«vΨ³öµ®)‘y+ŽA¦è˜–vsDe5 „=â‰.£‰x`ÀÁºŽSXIc4φôVÉŸiÏ“ÿ9\}"µ¹ÉÖCIö›ºÉqÁËg²‡ ×£Ê³ßä“KNÄN:@übíûi áÃdöÜOR¡ †‘hhu秬nפlµ{¯>%OÛÓÁ˜æÝ¿ÝÎ!¨™ï oÎV±ÕHÈ >𕍂Býæãçç §~öG‡°Ç¨)²H|mZ=¼y³‡4J?Âë,"‘f)•Ó²GºË;©¤?‹Œ!Ú>!«U¤¡ˆFqŸ´ØÑU‹½=j+9jl9퀀Ó#Ò>ž¢l0_3F)©vb!_fƾ¶Ê|( Êcû* UhœHôB'A‚Ì ÀZD,þ¿„¨Rˆû…%öÁÀë§#=Y´þ5’Rr $´~Ùûèê{hÔ‰S_Â)AyÂirë#¬MŽº Œ5±ƒt‹kpºaêCsíhÿ¥»uêÈÑŒîó‹'ÛU~÷ ö§Úð/ÓØ,ñ ;’_¨î±¥jn¼‹?FU“c9Ä)¤s|›IЭn:”ò8`@/m2‰¨¡vžè ›P@7Ì… –ªȰ(0d„@ÝÈ;º2~’P¢h¸@rOÚ Í9ˆ¸RZH³Áð´fû¬FGÉKë›Ôôn`è›î o©&œ}< ²9þ ¼. -ƒ|(§gÞZÜàB‚0:àg.º–?gufrLnçðÏã›êEÀ1ÂaO]Nå·¿ccuíßm<•EäÌ:?ŧ•+ôz U}«ÁQÓÄ:U`×3B.éë´qP^×ó}ÏuË|ð]f‡ÈÍŠô¯­Ù´VÍÃÒÜ£Õ« Î:h#-[ví:!IPŠY«=\þ‡°õ«µüÚ†ØU1ÖןEÀÇ7óÏÜÏq£”7T~ mýßoåGì™'£a¤Ø¾.œ”¶òêÚ½/~”&´³ºÝR×–É¢µåJËç°Ó_5RŠi±¬—p‰ïiûáºre-‰à ÀеÚüÏþ¤-ïù8¬0°Õ¸¶ ”¤/¿¿‹ÍmÆg@©íZ0ŠEõSÿ*¦±Šièü(ßÕG'­ÛNEx#(Ó Ž”˜ÜD·ö˜cÄÿi¸È#ÊZx¨ ÷·?­„“ì–!Ž Š_º£x¶0uÁ𱝠ƒAÛ¤ ¿´eU˜˜%(E)»ù¯ ØÇã^ºÉÔó¨oo¹HÈésÙ5Ï`>MkÔ{Kÿ@ßîC1sø€Hñ.Ÿ mÿ2T°<+"ÎãðFoö‘âÉ{'Î>ÞêþI–tÿšeµ0¹¡T> Ý?ÅKØ" ‹Ó=âKî‡áxíAÈÝ 8À ¡'®ÞÎÀ^½GçóÝ-ÎÓ-,ò¿åÂg5Ý=Š@Š"y‚v5Ã" `ØRf!÷Ô„"ú8Êd>‹bÄ8QÐ}À-Úùàr‹¡7÷1å_$,b]„®n-çí‹J3ÒÛT·$äø á2Ê·vÑŒV ¼½ óƒ ñ ž^-DO777àJ‡"êXw—í¹ñ²x•LÊj²´C5PÿîÛ°ËÁOó=âÌ Ëû2!EÚ¥„¼!Bž/©Cðè.ë[®oƒþ/ªÜÄ$ï\cÄ&“WEèÏAe3Ö¬?i}†ôY‚ý)+ÃæÐnwÈâùìøÍdqF•Ý¥w¥a‰žG¡Æ ¡J/‘¯…ÅW|cٷ݃{Û©þÓ·"DIÖ©9n)œ™=ÈήY!ëÕÖ†Ø p&VÅØò‚0E0W„:ÂTÚ$œ§&õœ‡*ÝÜQ ¶53{–!Jøê…ìå\Ç50pœiZ^$\´Úðàtídzy©>ÈD3Ž>w¡ëâ"¾|ªïU¹÷¨vdR¯ýÍå¯ýîb/©'|¹,USÝ‹õ…Dü^ëúf¡|Iv± 5ß!³gªyÇÀrØÞ­P©Ÿ £oĶ$«ÞÑ;ÁÃ|äñÕŒ³³`‡øoS®6•gìïV6˜ÁGž?m Ÿ^µ¼:CkJSUt‰¡ÏQððŒ5r´}¤kSü[2Û›üÅP@º¶ ¡wƒÍøóK¥æ×鉔Ÿƒ­€åþ%Ô` ×÷³ž‚Í›JáIDp5Îâ8ƒËûÖ¾‡ï' „p¯K¸VëC¤EY—û_5zÈÀཔÌëDÞAhYH጖Y…”ÿUؼŒ€‘4RgÔgóå |w ª€Þ_ðI&{ûjŒ´QØ>è¶t\ð=­}n€üÌ2:«‹ŸÄBëÐ/Ô +,°è»oCLK9üaK*ÑÏsþðüÎFŽ•vðÃÔÌ-ç@s¿ŒZf ´8ßÁÙT«e‚nµq3dSþ£žH–4 :âÖ+iQϱÝÔŽ_‰/bð“ <^Õr±Ž›$Mí~¼¾^ñ×[Oßs¨)‚—0êÓrt¦7D1ñÌÑc8Jº´àG ¼!l*Ùh8ö›V?Ã~dD8íÈnQmeqZ5¹ugV‘->·896™ë›0ÇëM&¡Z„F4*¸[ž«Êÿw";§ÙîÈr;ÑéCoÇö4Å(29†Dúý^>N€ŸÌl±ÙŒº¹dE…ÿ5Ø6_ÉÊ@"’#žÀ‘œ…\è[äðƒ«<÷Š~8®îÊPr¼N6WG{ÐFÞ,Xµ€ÂN?¨Io¹r,gçx=J7‘È)!³„¹·€¥¤;°ª{8»žÆ5¡¼šaÿ~Ý4'™1ÀoØ­Ü®)¿bŒÂ}VÙÅFF󣳃FíWèâÓJ©!þŸ~š'«ÿ,DE²]ä­ñáÃ]˜ìó{Hb Ïšþ[£‹«×Š›}UãKÂSuüú8­8CøúzFµõà .ªW6(>-{a$nì43¼ð©ŠÁîÿpnj‡ IÜõ ¼üä§A€-§ï×f·–ËMšÅ(Ï,º.Wä{Nøw£õxõuœJØ ÌÂJ­ó@ÚY76C÷ó@®8ŠïQã!©¯vôo͈}¦p 2¬›’ÎåŒe©ð`'ƒ†¡ÖyYi5 èÔŽ/~¯Ÿ}H=€g 2}¶Pß§{_%Ö\ÛYCßæsˆX[@y>Ðz}h2Jió†ç ýóÝ‚Ð'?Tˆ ¡‚‡ïõE~ˆ;¹žuö€Í½¨!­ÞmÄÿÝÁå6FíÎ}¶Mʵý†vÆ'ÛT"1r‰Æï)Ç>";¶YBå¦ü†ßLúí7ôªRè‚x^OfÒ tlçó/Ò¼˜Ÿß&c\DŒ¨ ;s ɉ?We›Áíþ"Úb\JüÛÁ§î¨÷ ÔÆC^!´m{ñ–Ϥ Š ß´,áp2PE³ÛX½ÑhÙ”zF"ÄÛûζ êßš0¾þˆlÝïì6 Ö~ÔÜvÇüFúá èŸ[B™•œÖ6>}â«R æ±ö)hEŽVƒÏ,öd7çÕ´2=IàÍ‚~`|ô ŠM«¾½Xª:câºã45£0>³¨e´¸LuLó€™ Ø¼‘ :–¿”}@‹r¡žËiLÔ­oUâ5¦•Fð3Ùl¹þˆ€Îd3–ܤƒÃ^À‡÷ Å>|š»òˆÉ?t·à€Q×.©ˆâR­-µ­#ê~Ð4b<½wÑ«çõ„Î äY¾?œ`Éã*—x£ˆ±Ë8DS®ðÖâv[…*¡éŸÄ†ª—Ðvú à+Ê)¹ù½«#h<¥Ü´vØ%ðuà$Y)ü/ŒXJƒ ;Õ©mÊ‘¼X€'>–n9ÂÂØO @ *4æMÞŒ¢½Ÿß1áÓ¥bZ©àË&zÇDÓI•Ï<Žd 9òó÷Š“s†#|>óV†ßûfÏÍßבÅT·|@çZª0óy[é¹7ŽÅsÑá–î¿J¾,‚’™~Gý>Ì…GýLEjöEôõHcÄWuÅÖ³n¥³´Ô“8+Ù+"êR#8h#¸Õ[¹Æ áÛ7[8¿’ÀÞà.Òd¿»Ùwäðü]÷`Áb7u)°Rñ!ùLˆe/°ªÄ1ã’[ˆ,€ª”A”xý^ÌHˆ£F YOc熶;ªŒ8h• qîÙÌPS¡ çï†rªO„oW?hàzðUOÚP0s$ŽV™ÁŸ°^|íGdªè«ÈµÓY'isÎ'=ÎD4T-’ qw,vo’Ï)"¦ù=½×Äþ9‹\+B 8‰tŒæk|‘ +¾ØÍ¿Ó«wþì†Ðµ­²í¬{]ð®$îÏö_…ϼðõ#–4UœÅœÀÿV9û]Yxf¿µ?Èf\ÀPÈÞ;ïKCžpÆ ¹&`뎄Á²³ŒÌ"ùT¤ïaé*ý¡Úö•iEåÈÌ.QÌvSjZÿŽ·•èeßÇ$ÌòpH`¾ìàæ‰ŽºÝÆF%IÕ‚êôxƒ°™ò›( ×Y†,õ@1ë¡Ê—Œý~?Ïô²§Ù7·Žæ„3ƒ&‘«3/²¤U“›ËEPùž-ƒ£#çaMŠz•z¹¹c¥”¶SÎò+ŸFq°×R:²7Ú8°ãLBpo Qƒ€5f}°kâ@ðv~ÛÏðø'_uÙ Š !j8¤RD­A'KÚ΋³"²)`˜ÀÈðË7Üê|3¾)È\"ï]çU†€6î[# çtˆþPôùK`ÁŠ<¥èÁÄ>ó“ÑY®Åeo¼Üœ›âCcEÅaq¸g…Y°€Šî3‚ÊfýÄ׃ o ˆ›Yˆ1Kø…©[í¿¥Ÿ8îÙÇÞM@¼]6D¢\^‰‹Ÿ»ð8ˆ‘úŒ¬Í.ã–lê|°‚Õ:"Z0¨pïØ >S×ò´†ï˜±;1ÇX™ U¿¯éó¹€ûó& /h3ƒ³už¢ ³¿dªÚúÝ’Ž2µg9ØçÜäGÆU+l² ]Ìî³¢M`« ‰8/®9G¶`Î é\Ý“rj ?ø+;ƒ{uªUS1 ÃôÙz´%vÔ}+×õhÌqfD§øO€4ž(˜íÚ}*™Oe‡û[ô­ ü[6ØÌÄ/dWöëæû±0–»\Ë\ÿR7c^ŒÉOìùs<žàñ;yàb÷ à;ôØ:‹ù42²¹*½,?‰R£±j'½Äç705ÚQ·ÀÞNþ·[¥,\ÆähNc«–­îù- ȽÚm‰0£stùÓí·§ÙQ ¯¹ Œæ½ku)ßZk⃇ 'ð:^¢‹X½luLÏÇÀpj<ÝñU-©¡pŸ.G”lnAi´ â* ­6ý>Ð×Û¡{õ•c½ÞMsˆ÷´^N›Ì@ÍyÿÚ¤'œx)-7 ü¾ŸŠý2›;${ûÎ6Áy—ºvªw-l[xóÈÊ_¾ ýž¶Ë#0©4LY¨Ìé™6.ÆUC›:Í÷RU›Ž^´R4vÙN¥å• ˆS³%n-6… ?áØŸã_̶ïUrgü©'Ècë×ãY°Û«åã²YDvœìKUiW¸cÑØÇ– Îñ×ü÷ŸÜý{ôé¿åU·RNñßËYj:‹õõ) Õè­ñl±þo\R¼°‰H°ªÌ-?ËD3Tl“ DÅþ#0eǤ;—ûbµ…‡ÅÈŒ×Ì¢ÎGþ¡Ê1*T€ Ì¿Ž¥:„0ö?'ñû^§xâZSs”ã£F0ÚK£z÷Œ²Þph&©|é.-ꤡíÓ“²Ÿà¤©Š‡PgÆnMò=cªvDv´aÎåBsTçÃëßò-Ïä ½™¤5¨É¥ìàð(¼ð6Uª ¹©¼­–¬Ívç‡Õ1V›Édئ5TßïÕÏš+Yòy[ÊÆMÙKÒ ÜödoxƒŸ’ Ü\Ü=i“«íU”PxSP²ÞIÍEÊÙo=e~Su—çj˜Gv)ÝÿØÈ¥LQ²{ïÎ…tbN€½1Ï9Œ¶ò“oµYÜôTœÍ™Nl)²+ Å»a¬½,'wYÚ®3íÄô2™ÄW0è̲Ì!_/ÝWgùû¸Â@¯Òm@íþr¡ñžD ÈPYÜ‚¥ ·wŒÿs>Ño¨rö¯Y[÷~ Û‚~Ÿ Y#˜ò#8Qöóq{ÂM@ýÊ·¹û繓äâȱ_Šg–·H,Ð3TÁ)0Gdg/‹€ßÚ„60C†õÚ%Wz1„ÙRY¾Ûîl*×nOskVeÏÌLG2_Úî×qï,4®Ut“ü+曆ù\S•‡ÆüVØøX›V(ÿFTS\ ñ–Ã*¹ „Ë ²|Yý(ij@¢ÑþÍÊø˜é}Ñú6ÈÇtöÝPs­Ék̤®ú ´LRªvùDó‹Ê{™(^ðU‡x£³¯Ø±Y§ÚéþqgÙ—DÏd¢Ë¢Oޤ,üÿŠËùgC&éënˆš Áþ¹.PŠñÙ÷Z¿n÷EèÐ[ÚA%7½’Hx-–@p9žÂ-§¡Ò nßÍ·'ƳÜ|,‹5÷©$^¡;öÌ•]EaÑsnÞi5êøªU+p¿=Õƒ¼TÂô5´]”~ÞÆGUAÉ™Ôñ;V©˜Fo†^ãÅNƒª_=€®h¼{ër²ØUüÝîÒif(2C 4ç4ë©ê±ðP ËÎ~v(ª6æT¯?#Õ<;;I¥Š/ÇÄAÿö¬ÕüܽÆh¿Í"D}á,¿ õžŸ e¹`³^¡ß4æHñ„ézê Øàþ‘ JT‰ÈSê `P‰%@ðß]s¾L‘²€m°` åoÄ 3ßd.´*à1ªážìý¥0:ÜW‘ÄÁšOþNé"Nß•‰ÖðC³ÊÿjuVs©.ÒÊÍî`­j+ 2·fË? —ŠÜ*JrðäX§Ã£€Y¼I|Ð2ÂÀ:@Ø;ýZªJóža¯;Õ1Ú‡Zoû‰êÏèÜÜs‚eNôaæ]ÈôfL±óI§ý¤êléíÃ9jâ¼ã› "‰ò”*bjd©Ôƒ¢´í»¥=žk_è±øuœ X ÄžÇvO?Úÿ„ä`u»>·‚ÞÓmö„þ¼zað üÒšUƒ7Ÿc‘Arßuµ Ù3çõ1>cã+8¡×2¬Î 9?ñ¥߯àìbŸ%š²uo0=TY n½ Ö›ÑÐï4ØKqØð^Wja͉Œü“[¸‹áÅrš%žÕ+Œ±wyY]òÇ‚É'R·…÷¡ϲo¡­Þ?ä’ß”~ž½µ?ímSÎõs ­Úî&ž3Çøïv Ý3ŸÎÑ9L¹Y¤´[³þÆlÓS}ïÃ×g®zÉÅL>‡ßÝÞTm>¶èöh=€|'K^xc"§ÅžòpÈo·—~©%#û8Vå9rIu×Òe7²x€zœq!>ÒÄKÊ+kÛÏþæXˆ€eáqo¨©»OÊøè½C²™»ŸÃÊØ¨ êþôÞ¸hÈz¬“êþ›Ùà"´ß¿f¤R br:qµt×J^öL©Š?.L8L;Úá ·’ ìyŠù4߯¶ªf@¥`¼jyF =Š#Z'¼Ê+$K÷iÞ÷Z½Ê³ÕÛ6É¥}~ol«—Sޱ_¼qüFáAwÁnõÒƒ×rù‚€]¢û°ØÁoô„40oÎÊoV+UEØ‚üç;/¾®Œü2_‡ì<,TÜ\<ìL/_1YÜÍ'ÉQv¯ð*úEÛY×uJŽTÝ;{Ú´Z—þï» 7Qmu!—®}û ·¸ t>¯ ƒ(· ÔZ/¬ ŸF“»—$Ô(vV{ã$ð:,å5UîûnÛ~°¸æ%uSÁñÓí}[3¼yüo¦ë,™f{ŸÄßu“¡Ö:Ö‡A‡s·˽xLÉùwx-wùùuOæ°5U«Áî°ùÖÞä&[âœiòãTWÿµ»Þ=t 7@êÑò±²D04ÉrZ°A>ü×F'üŠ1dÇÿº¼¡ªGyEZ×w˜"è¤Ø•ñ´Ÿ>½÷Pà+Ÿ*–­27_gœ&$Ç0õŒnR~ôí‰xðd%­ÏWCG*ʈŸ„UmÝlf9‡ÓV÷ªýeæ‡Îé]—¸›ž‚ô룫=Ú ‡¾²iö ¨ WpXà?±×ú$öy^(€#ƒ² íZKcvl¹¨sU«Ûµþyæg›g[˜ð¥Éî›Öþfpü˜Ï5†¨-âÝ…^šÐÉ»*˜øéùÇ:©ÙíOÓµ¯lÜsOû‰wW“²õØFßÉfóò¹íIkÉZR,¬g;Q¤$_ É|ˆ˜„]2ÉgíÊxÉAeŒþ•çÁ c/‰¿j*“ ü½ñ5ßí*©­gžàðÃõ‡·3×¾˜ró­Zà6u{ì5aKöe¬ï¸i'÷_>ÿ[ß™'Ó­ }=F€#â H¥d€½,·¨Âú-ˆ±¨óf¾›ž6g˜Ë-Ôùo*½Þ„dPOg?ôHq×Ïô-lÞQuêWw-ˉCCÛŸ®¨G{ÛZ¤­Ÿ=kþ%ª¬'>d-‡Ï‰}éEx—JÊ;®PÀŸ×S›^·2ôD%“Õ­}r‘ ~h^Ë™ ·†ú.!)ï&ùÚ j§ÑJ½8¼ßä"O¯¶ì_[·ãóËJ¢@HÙ•Ñb %÷c{ÓËs(,.mÙ/G™TîÉ—£_Ôw¾›Þm%’@GbsH™8Ö&s¿&½–Äî-ËÉQbž‰Ô­×K>ëßøf)<ûÿv‹ñ^øsš{BpÒ2IÐî…“’1á:IÇùåSWÛép}­E=ð°0‘Šï­¨Nø-—Í<ÛÀ/ñ²B|‘VÓ¯ÃheÀ c²å‘AèÁXŽÐM 0^œ"IŠhÖȪ^›Ã ‚L²R@žÃUà׋«f Py©‚ðHd²t8L¡•„®ÁÏŽôøj]'7‘(Móx@¾ ]`ãuBÍV_]ÿ™Z’ÆÒÝîÎ'ÃÈü ZJˆþ2`“ËíŠÓzvà¿âP³1ŽARTGˆs%Ex¤(´¶IŽá½Æ°8²Á°bs¤àNlkGJ$ÞŽY¤8¤îÓ&‹r*ù ,±“zø—N9˦ïª'(Î3«ÀÔ¡{‘7Þ=ei)°s€¹âlæ3{¤&Q‰˜»½‘¨H˜h¿ˆ¡Õ&|fP—¦@g¦rÁrw±èÆ“|gÂÎI! w !úœöÝlnrõ—‡ˆÀM²I‹ÃE“x@Õœr>GxÏÅtùliáݵ#ù.Šžf—Eä1/E4Iô'ü0epK,0î <Ö1®%tËH±½+«ÊßÀJ¨]>f%kêi‰³÷žýKYõ²~=¥õîŽö7ćÇ9³˜ú$ºF'FUûUÊWïÒ‘,zSüÈq³'¾þœ2ô3ˆ¿7ˆå~»FyH÷6DŽ|q~7Ñ[ýÓº‚¥™BhÁ¼XL”€7˜ÓS½­&ýÙ›®e¹ÒF¥„ÜD¾ÑÒb/Õš–‡í5sú‹ñÆä¢ ’n W“{ ZFÙÚϘed{ÚØXÌÝã–JY"»ƒÈòSlÉ›Ÿpj¯éöÑŒéRÃ-¶ÿ¥¡|q—Žƒ,‰xLÈ«ùø›¥ÿz¢‘æ=Þ·ˆó­¸ôRîŠ|×úËieÝ•úMõ™áäÈ…†·n‚+Ú6³t¥p¾óaQå„…fð8 1•‰oŽ¶Üˆ>J±2JȶÖì7TnW¹ õÿÛ¼Y6Î*nÇê=Iþºh©RQ“™BLUµÑ>ZØã€…´GÆâùLnmÚþÕ’ôózTKÑ‘4΂’e_@v¤d{‹×µ<ß{;}òu•säÑÝÆýp$B¯?œ±ß}a«j€BBÕ¨ÈÓŸ˜a¹hÓ‡gx[ËN@‹ÞVÛU@„@ÃR^jÌA”³à}<©ˆ4¶SLåyb£.Bþµ]©<'ðñ S5nP«ø'\x%ã¥AÒÆ¸Ð#W0€Çß Ç3è6M/º÷RhK,'ûv×uüZ}E·Œ P²A[TÕ‡¥”[ͱ;É Áµ\ç/|Ç—’y3c úOi Rä ü! W1Rè!¨\Yoß?eà}x[¹Ïù¢¶/AD È=F²™Çga)¼w÷Ù¸*pÚBº‚Ý·Çn#¨4•x  ·Û5B –}’J~¥÷Êlú4ìŒ QÑmn·¯GY{(4ËCŒ0Ô[£Ùu¯¤]=ž|«24ÁcޱlÆV%BO«Lì"ï m¾FƒË{±?·çd†Ü¿ý]Õâz6S³&„Ðq®^M7ñ}¡ß‡{¹0!Fv*aÞVD¼]n/YÚ_j…šµ¸q28=y©ÝŒj‹¯ÁZÚ M0­(iQc¤ñöUM5à‘u¿C‚s”˜qU“E‰©aÖ•Ëe«ªÊÐg$~ì!vÉ„vL›·§Ã'Ó ?®Ð>µÐ—Éqíh#º§Æd'§[± îØñ-܃P qÌáØŠ•H™Ø+ª1¸ZÛ÷’¼_Öˆ53e¿GºùüÅq•¼‘ãHKèïNøÈiÁlæ°ÒRUsë l½çÔñE()PPÞÒ+Ÿ®ô«[Œ"9Ê9]ˆFã˜þ4ß''’9zÛFÊwÏ7÷ºoéˆ/cOsþñhí®ýì¶SÍ/W”ý¿Äü¸ï¼•²ÛªqG÷8çH¾¦-¢êŸEF!Á!óuMaÇ‚µú¶ÿʃsh^ºÍ!ñ\éäO™ÙR;^L¿ú¼Yˆ ò¨*ãÂ"ù–Ãþ(°…Ýd,½ÆôN”•tYYÒ¼%V”˦´xzÔ6ÖÉ#={YŸ²R—êþTò¶tñ—}*„&Ú£ïW«”Šg=¬lÿ:$$K¹ßô¼ÓóºÞø ,ÅS["ܽ \0B¯œº¦ÆjYV•уõ…X\/?›‡éÌîHJ|Hm%¶ÓAÁebñ¥…’³‡%‚i9qµß™q €:üEBPtuj@³xç•å–nç€kè–Øqh’å…Iq»i½j‡µYó6̮ȀÏÚÏÐl¸ÒIdlô#²fžòÉkM'3A}ð]¸Nqê¬O»U>õx˜ìúŒS¼J#*â>@â= ®˜ž/æy;Òk5õÝĉz´1»·9AÒ›€9ÌœÒʪvó÷vYMAlE5ÇÈõ;‘l"ì…ͽ¤T~‡«ÛQ{[pÍú"~¡9 BVm®C|o¹ P±+GÞ/º (_ÄæÙjøj"T@«ƒßlÁƒ€5Ö›ˆÌHd¨ÈWiŒNd®ÑDk0#щ݃[þ[À½ Œ‰‡gŒju9ºÇ-îžßîˆH]9v´æÎ©ÐFØìTü–£Ëׯ¬{•ï’M–£‚¢Ô/jc£ûÝ´°«dþ4}:a|Ÿí0·aQdㆎižŸÕ¹”mv*c ^4ÌBŠÂùj1kB30FÔ ö¶ñb–äZ…ü@ØÌTæÕÖƒB€~ˆÏ²(º)[#¤žŠˆMRq¾¯ìX>>…ͺ-a€C¢È©@´L •Håª{ߢ˜eüÀ¼¶¡Æ3ý¯f·Ú‰–º;‡Ébê¬\`;–Ðóä?½¦ß 9§K0uî”àÞÅG‚àܦýŽ"mXs$ãNîX“µ ÞãÀò”(ýbë=øÎmö†! lƒ]²d@|}›—üÁÞÕo7Ë” ÷vR÷»7¯K3þc¯ÈY¤ûJ^ I D"ðˆ1>A½„äZßuûv¤?#E’„ÔcÇ޹JXzÀëG3„^PÆ¥b³äì§Y߈…ÃùCçT&Ù@·At+‹ÇBçÏqµQ}Ñ @Z±”Ÿ¡¬;¿æF<Éâ´SAxO °¤m?²×z¸ùíƒä¤A}KR*•ï‹Ìø£îµ#Í@í¢“ó¸õùÅ€UÉ­¹ôˆa¯@ð(/4DçßÚ 9Tý…_Ë Ã)B`N´s]*ůbusÄ‹E2JEøö…µ’÷öX-Ô­að‘@ ŸŽ–]àx£qu1Jï}Ìá¤G®áJ,š³ø¾wö8IÍëœ|f‡3ì~¡é8E@õ©lÃÐçý'߬r¼J#z0ƒÏª¼ƒ‰ü²-˜Ó½ÓåļE6t˜>ò,7?Š»^1n!8‚’Û Òûfdb6dšæùaácbÉgØ‚ñwü%è[áv‹r`YZî]Ñí.F‰ÍCŽã,ÿ]›h£ãhÛl/¾Qh&P+¶¿tÑz:¹`@ré¡]%mª…®?hå°Ùø—4D9žé¡Ä\mÐgäÒ¥}ÿ3wv2[OMe))H~®ç¶ðÞ‹W.Wœ€ìY>'¾ŠûÌìÔÝD%^ÊUÜ÷kFÔ|µéaWˆùÿkö,Ögë fוÐP‚‹õÔ#"þÊ©ø÷™¨ñ®Q扗áHšÁÅV ›€`•8 |…4‚ä¼dôçÃ/‚ªÇÍK”L AG1’Ó¿¦¾Cë/¾ç%üþ t7kàùX1|̓‡ˆm¼·C¹XÓð#aHrÐ`vN ©±/¦ûW¯›[S³býl¿y[3D! 3 L%Ì)Æ$%磕üLë5ÞrÀ¶.™Zû‰ï=¯p³(} þý E`·{øå÷«aŠadǾ'Ãsâ¿(h·ÛÄŒ÷ã™%Ñ©=¡—/oOxW÷Æ_©MÞ"¶Œë=<ëô"nUÕ¿gyŸÉ5Àºné•n{L~÷[Þ=E°½FêÀbGÏ`PÚd[DžýËùÄäägçìJ3Të1‘Sìª ¥À²' l¥„£ËØð6„ÏWëÁc£Â!·ë÷S¦¶ÛºŠn˜7͵A°÷*ݱtzx6 ït9ƒ¿–šÐ5Ciê ¨#¯ìѳ½’¢YIkÝ»žT’.¨@önK§°»úˆ¹Ñ5ʰ Ì0%&³-N½ÍÕ†"‚9«0U¾IÜh!Z*ˆâ<«†ðe±¥vúŒˆCWȰCàŠA4ê\hf@„[¿{´rß54~É–"G¯Oï0ˆ8äyƒÏ0IƤÐjšÀK€Ö-°SЍ' ^{”OØ8Y9ÔØ¯4tµÒ[7Š€«V3ÊXõüšb§? ®Z3Sj2 ó–Žä1¶\ƒÅ¿!žÏE¥tô3•ïìJºî*#,ŨS*ƒ"wm5ñ7T¨ØÁÛ*\äȯŽ(:ô(ïâÛÂÔÃ@V€ü• èߓ`…C`Àzbr’ÃâɃ®äÒ!wÁcL)¯í^gMQõ®ÃÓò·À#Ìÿ§ÔÛ4|Å%«` 0ØàPœ¤žû+6DÌ –y êåG´<Ïe¶kXÓuÛ‹Nͧr(cð °è컥 4WVÒé¬<Ê”†”PôU\d¥:ÕOZK2ë—ó:ŸÑŠ™>;¨ÌL£hø›«…_èOéwVn¶ahœbØ»ŸÅ±#’œéÑvÕ½nPL³Õ`Ó‘‹@Å™PüQRô :ôã9r õÍ“ŽÐð0ÐR ä)9 SYJP;€uËœc¥9QˆÅ[/Ø)ñ*žŠñ 9l¬#Ÿˆ^6ùúq)åºÁ¾'…☠ýÌ(úËÍ;e|Êh±}æxÅ0T¹ž+»ÊR…P_V1P2k‹øÉö{¥ Ö„³?W€éAŒõ±7©Ò‹ràÂÙsSÓ-'C:]ª»Y…¶eÑ}ûZþ´õø{.VȬÇe³6s{\ïŽÊj{÷ò3ùÓ* ÝÔ~¼‚LLîwÀÑÈt#&®VÑɈ\ܰ=}6Z½¨›0''šžªÞïäEãú ^]ÉÿÁèK÷;c)-»gÉãˆDßs’þ¹M»þõ.K‘®o­@Á12Pfþ?¥–'\`¿Ú9ÆX“rïжÛWÇnfÓÊ<ç…§dZL²/Ç^ï¾åwŸÖOî™ìα×iïHbü˜P³[ñ9P‘a^ç’2»›ÉuE­A6íÁ‘[¶YÏJ ªÕ“‚¿ÜR@®e´b¼ïõþ¢/Eƒ ïž~tß)”E²àÁG¾ [ÎÎÝo\8ç{WÜîèC“a%cñ¾hgÖ¥Ë q8YÅ?µ`j·æ@ïç;œÌ¥½>3Õ‡›‘’ì¹IU§è­ ‚|~R.ñ颃ž²ìm: “,B¥äí|A”pÄ¿ßHe¹Y±în")Ó¯¹Lªú³ìÞ´§K'pMp(¾UïdX®ºûúÓۣ촅Mì~æçFÇÝ~ªÖ?<0ЂºšËÔJ!Ð8¿Óº“w‘lµ­ý]ÎEeKKéµTPÖ³”›¾£f'?„O¨d²SÖBw?ܽËAoò2p1¼Ð'v‡c¾ú窈yÔØ½&6M£âF +`ÚÇuøû9¯ßm põ™s‡3ÚfÓÇÃÓ" ÇÄʘÓ†D¶GÕÛRžãTëÎÐÀ9©€‹aþŒ‡1ä8¨Zc(Þ: À•…XœÎO‹àPþj%|&·ô~ E*ê+åCœñ>ßä1Y[“wʽ_Ëz¶Y!cÙŠ¾».·#„‚¼ÃDçtv?ëù/ËŠ©ÇzÅ{/üÝÕUéó ÅÿÁðz»ËP jáÐ=W2ÏNZàxÈíÂ1]ŠŠZ¸õØ@ ¬ûúL— 3¯˜ñ€™žÎY Çôìív·ZzýCž!Pv©¨ú§¯Ò3$IhÞ©. .v² ñyüãtÔ-ÛäÛ8ÝW¾«óY»H¾b!çÕŸ1²l•õUV䵫^Ù\™Ì;lWwSyŽôt!V±Vú:Îή(ù@ ¶/È¿™}fò±rªÏËhŒ»¦oëÒkóò_V§cföÜ U*½yOîÍFrÁ_…‹ãGbò4Ë8Òu¨ x|ø"]T‹~/ɨ¯äiS0<Ëæ©º“XMÏ{r ¥x˜:{µ÷ùN4²=cÜÀ²Xk®”o¸ÓJew^¥fÄÔv]ûI˜¥øèG-„ò Ö9?¯Ÿ çPç»ÜØk…gEíÂhaà(þ à PsÁðƒøç ¹†ü¡L«äDê W‰·O¬$ýqEŸÊû^‘d9žÄ! °‚Q¿ho’¨*üÉ;àšks• Žƒ6¤”˜_°Ëþ¡ZºÁ(-šîô¨Fc2flb8ŽW¾Æu.òŽ7@Ph˜—,ŒGXâ¡@>Éc Îç€*ç%Óõ¾+*%ŠcUOê?I‡ÀjÇÃMÙ½„ác„¾ÖLæñTwéÓq—5õ͘rÆý“=ݺuTQ¦pÔH×Ò3X[Bz您·èÅUë§rj¬Ö£Dµÿ·Y_èªa1»þóñI9Ú¸Ó³é …_ÂÏl~‡=7¤ª}Y) b× 9ç…†ƒ¿V{^¾æãÕM7&Š÷ãP½F¯þ7ÓÁÙó©º]v)ô˜ø˜œ½§4ß0&ë-€û„w`,AÍ](—+¡àxŸÝ²„):5ýcëf;?ïìÆ´!FT>t§c³[·ëtX<ÊÅø­¡ïé7ØË ¬ 8+ŒÙ;Öƒf]¾®—yFTIánÕS©L³i¨ÕÙæ=£.7ìÓqµÜp&>ΊAdq[-`×ÝôÎÃæÆ_uƒ7CàA›ÖÜ¡ÁŸïx©XÆñ<~åŸ'JB?g¨-zh^ l¨'„âža1Þî»Û}_suÿ•›‡wç ˜ •¢Ažb(c§™ò¼èÕdzåÌSxç¶‹Š T_{C®0ƒš÷Ò I°_júAù¾é £ŒÒ^VWKX…E¢æVdªT ¤av60~kˆäË+ì›)ª“$–MhiavgϺò(ÕUÖ|ä‰t‚RÉ…Óe(PZ=ü…à ¼*4‡ÔÌ%bÆa@IGß aB°Ö:¼ØZ جL¤±A$:%¡:‚5>ó×™{§g¯¹K–¦­æÛɵn¯~¡ VOD.ñB•Þp«°¿ö/ïŒ÷*P]¹t'ãÍúH‹)gêž Æl:vüÿXÄÙøz / '|:ë9y9ã@¬¾Ù Ž"€¢HÐh0Ú¸[CGUq††Ì(evD𰽇\8¾Ý>à|%%ÛÑœ“Ñ¥Õ,HC~]ËYÖwÙFLÜ—{²HfE½Ÿx}Öíâ~UÆÛÞà5uøJtº¦]çK4“óû9òò0í¦²xn¨¤ðˆ?hbÚ–š’¸]ÃK£¤³üÎŽœçRpf;S‘:G“ÈQ–•zm¥µ?o.â-²š­n;‘U{% (&µ…Y£ÉþxC2(e >;y¨DDðï›ÿ¯hyÁfƒµaþ`V™Ý»é"\N‰šý€ßùrxôQ¾-ì"£¾Ñ”ïîìaUG+ÚO EïPMf?&€4cp:ð€—¡ Éo”÷ŽøYð_öPT‡×›½dX«Ìf¹_ª¿žô`Í»iŠpÿ½ìíEoʧ-zL{0PÉù>“:Î “ ‰ëAMÛQ¸ç0@5öˆ¦)õ¹¨d¾Q$ý Ñ±ªÅ*†Ûiá´Ôg!ÙQT,"ã-ÆÁ#·‰TˆùÉb˜9)žsöO?²fm¼ /!|ó¬åÉ…S¡}ô …‰Ü#j$³vWè ι±¡m(½Paÿ3ÿpŒl+ mŸÊç¨Wºh%ÆÂìÛ6>¨öíŠúT˜ÇËLÊxSs¸¢KÀë$Ý€}(ô ¶Xøb©µ‡ýØ=´4]¾Ôs³Þï•Õ»OÓû¦ù~›þ\è.ø6^Ù» ¦ ¡7Ї" @±0Ù”‚ @°|}}CF•žÚÇå› `ÅFŒfh&ä·QŒGÎÉ“*Œ+~rœE¥YT¦Î1G¸œÈEÆ0&Ä›|4'æwbð¢ÿä3…ÈêCÆwrÂ?g =HUsuËÐ ûºˆuÆ™ÔÈiDiüDmˆ(×ûNí_RÓ<4û¨ ° Ì3sÕøz½Ï1&Êx·×KƒÝí·©í 9ÌŽÞàØùÐ+Nǯê~¼f¤×B®ÏtJ¬½¹¬Í4Sˆ‡ª¡#2äLÕ‡¸4ÚÐ †ß¦cœ©½^$h€ÑŠ!‘‹œ)¨Ô¥«"Y0ЧJ4‹õù+©B†gu¢ß–ySÇ©{WTŠts2¾/@k‚ }$ñ«EL÷W"hÒt ”Ó’uqño1õÏk²®³uuÜÜý­»ž¬ƒµž‡g{»·ÙÇ7ýdxõ Ã…S:%_èAŸ4@zÌ®p–qFMÊqw¨þBøAÝúðUŠ]b8…À²…’‡5‚(¼¦¬oXT®I÷bô^Ú à'Ì,J¢9« S½ðOy¿,3NÙ{O+&Køð\Ë\ÃB$‡ï\Èu}üßb0 $>î#$ªrºKŽC`"àúé²"pƒeq "3›#ûÚ¶Hrv'Œ/cs ìtÌØ; É¡‡}l†àÛÛŽáQ¦²^<“ãŒÙz2ì/œ‘à&|¿•F"ƒ=˜Q·úÿg§7ˆa»bŠ)†Ê†V²¡{@Ó…4ùA±Q·Ã,›þK®åޜܰÏþ™ƒ%²  °IÜv$\­Oh+ÄBW¶ Ð@ô£,„ÌA¨®£«¸Åeü~s¢ÞÈÕY)˜È….•5×ü?2‹váÖ…U'/˜ÜZ{´ˆ£´ô—†MÙ­ 1ZÁlNqY/,œ! $oÿbô>L¡‹À èŇò¾ƒ‰ln3Et -˜€>B…µ÷´’—ö™"Õƒ 3®ˆ•n®°€L=D0udÅðt  /œÁGKãVPc*\ÄÃÓƒÆ/$t—PlÉÝô°ì­áÀH=Óü½îX .±J¾ ¾ìm•·ö4†ð¤àHÁÒˆ dfiÌ*·ñèÊò7ÜU Gû‘»ø£‡Î›÷?‡K˰®Þ5YävN(òv<ÖÛµŒ_pGð±·…ý Ø%¢˜/nÐÂÌ+Øpı¼à×JÙå7ÖVx =Ì Ø~HµüMŠàG!äûJ@Ë(ÅŠ¢S·K.tû Û¶ªaŒqÜ7H.ÂÒöiÖp5)!‰14#íQöµ/ÿ*¢uÌz5ï~™©Å…cŠ€“@n¼Ô\Îäs&ÔÀ,Q·õjW“BØbª`²^c§ÈÂGxr ‘?áQ6yN2m…@ðÐÁ«p@<Ñ ÌŒüÈA˜>ȘN’IÁ>ÈF1mœ§-rVnÅ{¸J wðH 3Æ¡´ÅJ]<ấ1¾ÅD~šŠ ½ˆdôe~Ô¹åUZ° ²§ÎÞ_ÒøŠë.ú‹ñx^8‚iPï e`·Ü‘´0êNÐ k×¢@ؾ,K âdc×@91ëã0 ñòLL¦†Q‚ù÷G{O(8Ðo†sÓX°fmEþ(|ŸŸï ±“kýð¦a Iœ+¼ql€¯øNS¢¶B¶YDIS‚ ®røÒLK—LÊ2Àm9+r<@Só8eo9n- ¾Ü ;ÿÎPì‘nø<ýï‡çû¸ü3„Ì„ÃaûÈŸšC$±º=ZfB ‚êïBÍ_7ä ÉTq«ü¡Lƒöœ‚‡÷ LžÞˆ÷ ^oÁvD ´²•…³W½aÈ¢'|ú×`r€TSdž¤NÕæÑå1*Aª30:ùèÜ‚Ê0«Ä;þ÷ÖQÕ§¾q°R'q&JEÁÞ»`V+Ì Ê×Ìí*¬]¢/Û× "9†ÄuºêsÖ÷Î,dP +úç¬ïgxí°cdìHÈ=sôóÏ…€}b†È³ÏxÏ ^ßùdQ0ŠVvÄ&© !~5£æã<<ó¢dè„êÑ´~´ºËÊ„ H„öªÞÚw²âÃ3½6¸S‚Y(){@í9ÌŒP½4Á=«{[ØK3úŒ5&Á&ýnI£Á1",ÀNµ_PÀ­»™×§+´~bCŠá§,žèzz} å5Œ´0ð<;AÅÖ¿|®÷}öد1 +²vÍ4†ˆû`y#N%¹ˆy†!ïTnµó·CE!Û¦“±†¾Ô±ä¼.ÒÈÂv0m¬Ç ¤âÿ ç_—ª3åÁÑè5Çõïù?ÁÖ`~IŽ7üº Û·´ ‰í†—:ï¥RŠyT}á_ðDp3°° Œ¡N„Np^f1jáóõ)ðe©#†w4x}Éb–APk·ÔUý€_”DÇ„…ïÎ[ûc¤‰Fáy|^EyVŒÁ Öy4t ®VÒÅáƒ0ïõ­”-J?p`ä?aE¹]¸ÛŠ< o»ÌB>V\)T|PÁG¸g•Ž]CŸŽTkÃäÄöè± B¤¯Ã¯yá‹Ø @Õ µÁ¶§j/Ìö9þOt™O[õ’%ÆH@ 0%¾Ú^5“HiN~ûk/Äž}Ö{hâ4®íCºN—[“gëÙÿo„GŽñõ•¤c¼Ѧ=ñÔÃôÕPï0µí ²Ñ‰^61–oª–:‘<žZŽÛìr—)˜Smýì<tºÍX¯C_Jcº;Rö •÷…^R`:Wçb ³YÙöt˰“æ;Ù´ó¼ê³t­«» F ÉìPß/Mím™|y¹R —Mr‡#è®ÏÝv ú+a Ç0ë†?«òDõÑ3gêé£rb„tcß_l OɨG ÝN<µOôÛ„ò¬ÂÔF«èæž…ñÚLŒWMZw%‚Ù´­g¹¿I§%À®vÏÞ²3p`ÎxyÛ¦g|ú_ŠpX0 ÆomÐL-¯Ìºö‰åÔaïS¥‡ä¥¼|V©ˆr_2ǤqB JŒïZ¥™&ÎJŠ#ÀÏ—µF—+4CWO%§‚y¾V«OšQUÓ‡'oIÕSˆøÔ>™fC8›KCéçêó^W>ÔRÑíyE4•>u+¿¥íå¦6ñ­–¿¬(xÖNåmÜÞ<<*§i¨¼sP"Œtíת†då[ÐUcÔüQøÄÕtÏÒSï.Ì)¦]òäï„Ò³‘¡ZœáÈ£ P©ðm už)úòôÃüO b!ác,›«‹ÙïÛY*­/ðâãXÁ¾Zótû‡ LÒæEx‡×æÔ)WtŒô\’HxgÛl²­òçþ÷¬orÑ%ŒêÚPÔý|]OîSà`]Hýz{âKã:Þ!§'j×ë¹ÎLR<á€M2>&ɲY5ç<{ü!Sª?ìÚä`5…Âð]¥e *<<:Ç7}ŸÎíÈ•.Š·¶B£.ë´Ö × ,[!BÿUC¬^\±þ†¼`ö#´œEÅìÖNÚŠoø@$D¦1¡€‘n¿ ëOu£Ù±PäÂ/É'³¤NÑ+è˜ÀW7_½À¬#Œ»¾:ïË~³Æ®F{ Õ]¶A–W~K×Ú2óÖfßÀ~ƨ|¼È/5Á>Úå~³ÍXm5î‰Ô™fÏ9·oK5n¯ª­*À²ówTŒóã,/ˆ§ÃÆaJGT»§ü6¯?ÖT}b¾µØnæ{!Y Á»˜¸&ÐàÀB}ssdf8 ýÁ•ÜšËýhFV˜ø4ŒüÚ/• ý¨ñó•óeÆx便ïYv‚€áÀí?Àˆ‹åÍx¨Ùþ C]U`CS€JLù^ƒz·Å¥Bd¤2Üz ô‰Q0×ËÈK|PÏXrüXù¼b˜B#Ûõ†­ÂÝÄ”¬wdËÁ– «a?†×ù¬I¯%x}àe6ÿÙSK9È^¾üI_=ˆA‹¿å °€R©’”ø­Æ&ZëyÝd"@{Ž˜Pï‰1ÝÀ¶ÞRÈ8=k40Ó>0Vweoцû#>ñ]¢<…9”q=2~êžK»rA¾B`»ÙÁ Èæm,Ñ´ïV¹½’ ]ÏâüR²HA)ÿ¹dÞ™v}Z¶‡/ßðR’u°¿˜Ë‘€:šÀ´<ænè¦éGTMK€×)zpœdϾÈ@rÑóV/“è¹·YÛ¤Â/O2;£€|ºÌïsQ?ü›¨aCû@~wu¢]¥Ò¹Y+~¦ËûÚ’üºõÉ_7t _H…‚÷' 1å°Ô1C ŠŽ{½—Õµi#¨Bgú.šÊü ±è0\¦F&¦CФXîàz®éÙâPRDðoÐ2ÈÕ·'  `Uªš¼I%‘>öÚ²ÿO ‘åØwœ4W‹wöÿ`gL^.¯3TºŸæ,Ô¯P¼/ˆ‹%ÿ)ý—̹›ûƒNqºË~I)U%…ùÇ2=|·óŽ, .o·0Z¨21&]?ŠÄVÄ' ‚…i—ýØ fŒDº?*QŽB…UTï› 1\ñ–.áÿ†… TŠ 6„ëš*ȳNÅŽzÙ•Ð*cÇÅ`¨»¤ÒÒôÿƒ¦‰I.Èýí‹E-6ZYf•Á`h?œTì¸` ³OººÇ¹OÙE|Ê"´4.a…Ý„h:ÊË"ÿ‘A‚ºn4S¿æ÷ì%Ä—…IѰ¥3׎b¬Vk"GqDRˆÒ¤ŒŸü(î »#, °Ýd ‹!Eè ¥¡:¿hÃ×ðì²øÐê]·›\LܧSj½ ­’èÔ±»Hr1g§ z®jG&­S­»@ºs(Ïûö8¸ _óQ¤bÖµÈ'Ÿ†JR¹Éò7GÿšÉl@”xSÍE¥H’YÄhÂÂûUÁHÀupà/Ó]öѶ‹žÒ¶1DÿfˆÌšãƒà ¿Æð‰æ'ÙVºHÅâT¡diç˜W§à V„FÄ)ë•ßQ‘‹t^ºfØÃØ‚>t^ÿý`[ #öÏ5ºõö÷øü*Øn–[‹ž®/Á0ô<èF9ÇÕ=‹u+¯™Ðvttº1B¥B]Ùí;P8¯ˆ-b{$ 2DYWÆ ˆèo8~Þ{cb馮rgפZÔÔÕ–•.åÒÁ®ö¤AÄ¡ÛÑ@Ú:Øg°ÔŽÍU‡L9^8(øÜ®¨”@ÄÄ÷ô3©`Ò¤­ÔCðdÜ÷1æHïZÊD­jƒ˜‡:øqë ûPZÑéáÔƒç•{ža@¦Áâl-Y )ÿH;4°‰§ö‚ëÏ’t_T3júk‡æ*¯¢¤B2“?då`½äw 9æ<–BQe܄ϲ|7ñF¼®î/I' þ~ï«)ä‹ifK©i'íŪž–öÃMCÈm¸±e±hºýV¸bëÇ™ùJB¾ñxýe丶æ,ÌWÈðPÅöင *Ù·‰ÖYÕ«“]"¡ã!ÏûÏ‘ää°1l÷†”Âd~!LOq-²öns¸&ÖÆÊý¬û‚bÌ„:ãâC5:mÔRцë™ù*(é·îçä±EíŠ<-NP†œ…¦{`ãß”CSc@¯]û­+Jc$èà@èCÍÿÚC²$¿)Œ.ƒûY½ÎLG{õ±Dä£WR©ùMiîà|ñb)ûãÁÍ+BXÆ–+.ÙÉQÖßš›c~`»’¨íéÛ¬–ÚôÍ:‘{»Ÿñç(brö)q\ß5w—ñ¥9`µ)&œ“§ž(û󆘂ùýäï¦"T "T¹ÅF¸Zè“‘Ð9huLÀH&~/JуB(És„‘l†:¶‚o1/Ù`Šì;p 7õpÔJÅïÒ2ב¾‘·<è]Î5™â®rÐÇæ»é¿€4 ¶²¶ `âw“ü<ñL±MÈO—^ön* é˜`,“¯[Ûb;îRU¬3™ó3í/ÊI2.†´{P@–¡ÏG áŸÞØUGð’åK9jà ,9Q†\€–à&²ï¡Õ^"¼Xù/ÿKƒÄ0óÙÃRvúDš"‹Ìç¹ö†®ÕÁ0¼ûo4‹h¥@ÎîÒ­€ëX7O +@Yx'¤Åi A£·iÊÍ4d–.7KÉÇocèÐY ‹8)¥ž`Ø<A AÄÊ<0-׳ŸÔ¶ùŽe‚~±yìV›á\Ð÷Q´[Ô2$EždóÔÑÃ’UÝã®—~±^sÈÍ 4ëx>B{ô `.½€`ƒ,"Ÿç,Å >ÌÅjY¿,¤JXÖß®€ÒÜ‚ÎйØáŸfU  ‘?™±‚Ã\ëuâ;ã€S7ú>&¨«–¼‰Mç­Ùù–z\ókãfJÀƒZzáû¥†Bq}úæ:U´Dƒg°ž‹1Xwa‘ h}Î÷¤æÆN÷ÌÿYÔf‘1ÆToÓ¿p팵ES´º¶Ï¢Ö½SÔj‡ë”±Dí5ìêa-„2ïÏÚ Š·‹ÑÄx)¾¬*9.v»¬ZKòÏoÉBüÊ¥ò!,D­ ÞŠ¸`Šú©,J™‚œf-Ðì3£Í°–¼ 4!d‡ÁG÷Ðñ"Wñ@˜Æóǰou5“Ãöy#‘Ó@Çœ¾>)*Ð;îõ¾ÅºÖÂ$pOFs¼#•5k4¼&¦_}{(_P€ ‹ª<^.0ÿ ,4ZKÇhø¤zš×ÆBÒ}“D}^tãå›°§¯³|û–¢ê¿K¯13Ã,ªã2jtêýü8_Àþ PžÁ"øç! ÅÄ |®ío–ðÉ’žFA¾*ʆøó“”Û?"š°jhî·C¹x:РÌ—c›ŠÊ(ëjübÎ:Df–{1©ƒoÞØ `€ö©P¥Tä‹ÜTôóÕ½S…˜½ÿhH„×AÝ‚hìBçG>÷Gu¡ob‘2€xA©ŒŸáõb$§03&„ ¸üuo_­Èteì¯J9ËÇ5EÉÏ…*#òÏïWŽÖá¿uåD™²C¹£e¢Çðú˜¦Î0#ú2ò:§%¹ »{‹ß îùbxôEJŸšŠNùUFéð€¡›Ö¢,¿ °3~§R…Ž›e›nwYMG½EÁzSÞàÏ£·¤+¹—Èc·Åº£µênV&= Séõÿ4BžlØ y>>udˆæò©0`ê‡Ôçx’Ð酖ׯdo ½±Ý}j)ÒòT:€$ ³ó¡ÜËBB/J .`AN!W7k)ÆòÌðüïUƒ)‘[foŸhHàÛÀMˆwèÌÍfüüX¤LøÆö½+*VÒZg°u€/Ñà3H×ý‘¨;ŸÝQží_Óyà{Óùyµ4õÙ!²l‚æDDY¾>²¾•u`=;4þùÅ”DD\«Ã:Ž“8¾Ë¸ïc©®yî ".£—«P®ó¿;mñr""ñ×»=[×á´Ý3 Y{xµf÷Ž›!ì¯(èXy”uDæø˜Z󡻺Ãè\ÛFɶÕÿw´¹vDDEÃͤQ:ëYΟ¦²¥©ÞH¶1~hDDM´IÎ_ø"(?J7[Ìú¨w+òH!JèÔèœu¬›lŸ#Æ´Ñ C(ëý ªÍ(‚ïé€@ƒ–«{Ò–ÁÃêÝÐÌ6')[Ü'1xqáVp]ÝžI¡! P=ôGÆýL­3 @…Ðî{”—³™¦ÇÔ«tæÁ¢÷zèe€ CëÏÇÙ"}ó,Sº¾#µ”»Ýá[;:¤@‡š×ÄKÏ/Å)ÒYþN/Jâ°"'?Ù>ÎWíÖØ²r:mi÷o½lïpìÀ B©bEýå Önס@ úøŽÁ€ø`8; d n¾.¹ÿh,±>.>kµ‘à ­‚c!æsÞ²¸:ktJÃß]þJƒÍJ1'Êq¯ßަñ2YʤoCF¿"£ÅûÕW$>hTÅË Löó*\ÌlÔåïÛD£ïÿª¸,À C zäCî¯O8‚ Aï‚ïo¶¬9;´dΊdäœÃ¥zµ (¹`N6·Ä°(!ø½O=mÚ‰áŽÒ®l~Í8ìoù–¤½ð’Ì9¼ÚœO™–c¯`F\Ȉ§ñ¿¸ ßµ²ùjºo®wJÎÿu™s ÚÞš„Õ³yÅÿ¯§ï¹Ô >KôÆ»ÁõÌj@‚0s^ªÍ|Ÿé!™xF<ÝOvèøÙr\'¶Ô0Ÿ¶=?Æc-ðYïÃòöÚùSRlh&1S=¯g½‹–µ“½¢Šb³™EØHßTm,nƒ.?&”ßÒ³úYk~/Š‹ dϘ‰1N?GB¥†Á|—׫Ç+g¡3bã›±'ézGz%‹È ;bûû«6Çãfë¢@"3TJŠvÔ›‡žH"5XH¤Ø£y@Ûo²µˆÿWÊEÇÊÐ5ORV{ÕŠ¢ÀáîQÔxŤ"M®=³[ÜêzØÌ8¢¥Ý@äP¡%ïë@†>Ç@ê­‚©V/hYí–Á èRÀu|Þk}án¬Çp67‰¿ƒýª;?7˜¨|¹)+áû¬¡? „£ûzïÞuŽ4©®[Âç¨h_qÂ\‘K÷g_eÖ¶6/´¡1ößLáúÁÝþ·“Ë4˜núNÚŒ&jt¦êÑ| ó*QÄu•͘ûmiW_ñIÄˈîÖXÊãE‘²So¾2Ĭ–\F.©¯²ÿ>|Ó-Úûг6Œn{)sëHž¾VÚóÈ ÷º*^׃Ø4uÇ`k+€@†G˜7æùÌ >õA º4 @„£ÇB난Žò™Œ0ŽçIþ÷ñ Wth @‡€ÅóA›{Úxo¸*½3ñ!7âV0-Ön B÷Ïý÷?ËŽ>§†ï¤â¤]Š~+‰È6ÙeƒýªtÁt™ C$u“©:b›–e¯ÔaþÌÇÀ C{W÷[eºÏìdñ]ÔŸ ¢tß. Ò5¡FÚÇÕ¸5Þïï?®e‘0D@|ÏŸCD"×Jöªò¹=*=„ ñö@©Y}ÙEkxžùQ!‚R¹Kœ¶C^)K÷Ýz®ï 2p}Àij…U©v€Á/ÕÆŽ–×ÚH“DÇ8±J¹á>Ù:^Õ‹b•öèôÈŒ£û.験@±à~%ÍP²…|‘‰÷8DÈur£_Læ7>3ù†þGsù³wÈóR !9>ü½Ú¦·ý,– Çi“¢åXÎ×!ã¡|È@»’o˜PÍØ[hæí×Wõ¦ãÕ£K? ¡p¹J_÷«®7OK–)«$F¿¤‹vg\]$rÙBÿ”,óâëìÂ& VÞq³Ö²-mL–´á—ÏÏxøÚÍd’[ƒ¾SjpmÒº^~äjƒËr[§Ø©7éÏ!ÐýzCù\`î˜Až`ßO½[ßÌàV`æ•–m„X‹þWåíYXÈn¯±x>SªUï97E!Ç%¤ÎKYGCì+¯A ⵂ¾t¨ÑA1݃;®Ï-îíQˆ×Ð:è®ñÜÝ5ÄÜaqSÖ—èžó€¥r舃{¡‘N+ùp)gÅ¡å/è>9\ !N¬¸Å ±§:í/<úÚC¤ñ¸N*ªóáOú˜|x<ßkÁ /N‰ÅZò ûÎè5U½òäþâ Äe„ÎÔ$®Ë´¼:ý§Þ*tU&ióxä M‚±/rß—®Uup’éÏNsÓÏ<³E³S5â¾T¶×… ôv†ÜSœÎ3êê~«}Þ¸ˆ¶™hZ~YìÞÂÂ˪šv‘ÑA¹.2ýd +üÞ‰mçxïìVQ¢v.¾)lED[ŠzÍÉþ±¨<ùÌçš«ºÄÉ_ê?(ß(©¾kñâŸÐÝŸ ÝuGåŽsÝVÉLÞ…«güÿˆ `â5´þ«†lXK¨†,@‚@Óì{ÅE´ ;N7ƒl½Ûdk] å犽‹˜HÇ@"CIS¼av:ÝeÛØÐœ¡+VHtdsÐW®F:ɱ\?ð± jµî¬ ‰ÖhþµCñÍœM-OS0é–Fðã[]¹œ¶9Rf#Œ®×M´2®ÞéXŸ*EVÆ@ZÁê™X!}ð”÷`ׇÝd€o×ü8vœ#8‹.çÖÀ͸WàùA©°‘E´cUo?ô€ãØnž"d8Ô¡.‰~vFÑ­YÛQt‚Gé•F™³cRéBBº5ºíQyþÆ4d-Ç‹Á#×@ûéÜ~K B;÷¯eAiT{N“Ž@¼cû?L4MÙÁô¿Ó;R9Ý8wµ Ùí£Bo׺êh¶%=ù´»=h{GVãÕ™Œ7Â$°-àyˆ*JlSIYÚ²~|:xÀž¯âU¾¤âe: ¦õ‹S-ŠQœÁ#èIeÎæ®wð¯ö×z©$4ªÎaã»p§3®¼/Ý.ë=“ˬߘ厬¶´ÆúQ¢AOUÓÖ!ò:¥Ž:¡™/·Ϲ¹¹™P—”¡?1´þÁhK­=/ gÎÀÐfÛémº £2~…B¤ÇÓ÷ÓDeÀ­#6}ã¼7[÷ØZÌïcÂÓô[™Z“AÕ«t¿]¬–i‹µwVœéÄPø˜>g¹ ¨‰©+ŽÅj©åoÕtÄ·y“›0û |j¡‘2¤ˆé9ìÒa‘Ó¾f™±1ÐC8Ý1…Í hò´ë.Án±Ù}NSÑܱÙã/§@è ÿ“D…*ŠóíºM?7ŽIEOÕù:ÉI;ã"”ÕPÈ9 n y !”0“Šao¶3çÍJ¿øN,ªÚ÷†§ª Y×tU7ŽwJ˜m«‚0°ißŪ1ÏÏÁ¥¿'bU;nŠ^ƒûàðstÆBl —m#˜tF}AZÎb]:èÅì:ß•²{ÍùqƒUÏoˆ“ìñQÊÕì*$ ïUÊòÞs˹aKõ#wÔ…P^bEÖ “¾Þî¸ÈsÚ»àNÎ]‹T¬uíãÊ ä¾Í_z×—ÄcRCs”µ1#ÏÊ ?¬ ÁO®Ò,S6ÉÛÁ²Dî—°lÉRÖ ¬³ÆŽÅœ0Ĩ…tòT0¾Tçwl‚}y˼Á°S™ƒ²È)UOÇÓÅßšódˆúPÐÿ/ÜáñÁÓùtSKJ8ÝÍïXÔsfzvúçíî NäÑØ¤›-F¢‹Üýjý–‰C«\ŽÊx–òÆ‚·, &¢î'Å4š Gz)Æ)ÜûÁñ*÷Á¢«»¯ÚGwv·\àôF_ÜõŽäª—¿QÄ“[Œ`c7È5 ½¶€ ´v°xy-~!j›Ú=/ÎcYq>É‚z޹Ô3“)cjµÜaIÉœ§B*kr\…RjÕ¼W”úDz†@ÊL ^MÀèÆó@-ò(Èîë¨è–§óôÌRq,>V'hâÂÀÜØùbª?^T§eè!Š>Zx|‡eþÍDã:)_ã¬Â|ëFõÖEJ¹iÖÑÖÓÙ`Ò57RyÔ~Êÿ³!ÌÚÑ~ Ò~1ZÔi³ýV9bµY³½zÃÁüêm–z²ml«YLƒˆŽéÚmŒ&=ê×~UëFÅ=J_<óZï¬ÏZ3ƒÒ¶0®þ»(«Çí· ^+÷}6~i¤6rïï-‚£ÐáéÉ«buÜ\ºk—Äcýè—Ò>(:;K§¸p_Ü÷êaP’ÝÛU”¸ ‡’œÙý ¿î‹§Oøò±›Òý“5vÃ~`ÖÖš¦‹IÑæ¯{oeq‹ËÂw”€Þ^$\ÙèÝ‹Ä9±„"@@ kËísöæx#Z¢H>؆£2ÑdÝk)ÚhLv`‹Ç©I÷⴦ûä®È ÐOò´ ` }WátvGƒMoRe«n«H@%å4iiuÂ|a{àÿuö"Høò£ 6ޏ©áœ¿jG®Éfݺ=v,ÚÅgæ§#+ï„å^Î=äU!fÝíwõERò³¾K©ÔâY:Ißø(ð_!вã°&ZòŸ3µÒb™—c<-÷V³éO¸8áßHÄy¦Sô^û Gìýj¾´}Ó¾¢7ÌÖF¥–-æÖŠÑ?®ÍáÞ¾ou–bp²‹‘¹á"óª˜5Þy–ç¬f?½ÛJzGû€]<ˆ§w—‚¾7ß÷HŒF‡†ˆ¥(¦?x®ë)¼*¸¹§àÙÒ7ÕÒI Æ«¿8Ìmþ£xa#¦ w⤱p€Ù1K-3ê­Œä{Hª5žbÙ® ÙΠÙ$¸ý;¡Oƒ7pŠ€(·ZÙÔ4 0²¢Ÿ˜Ç÷^€ ±âH““tHù]âÛ««AÈàvUºØƒ·Õ2 !¼¿S¨ ­WAêEÇŸé§cÞ&l0nKŽ;þU͈#VÖ~ýÓAoÖ ½ä™àŒ\y}>[š¡ÿ¤§W)ÈÇ}}بiöƒÅð@~²ø.ÿ…û&T£»ßFS§˜É0ëÆÎ¤=³Å¥øb¸kþuý_ía2«ŽËùï`a!Pÿ½8·eË~/9”—£.ãÇŒ Jàd( záðN¨‚DqMA_Y Ü4øëˆòhSSêë’:&œbê÷Ôà3͉`}ÜúZ 4Cøžû¾˜C»l­+Pb—v¹Ibz=ä]›‡ŽÉ1'4¸ ¢B©9ùîÅpó0·ðFP*hÙ1¡V9ðŒ˜»LÙ‡óB?_Rúoñæh¯=”g1® D/ø7`_ר¢D`( —¼%JB½S–7ô¶RLŸÈŒ4]äzy„ÂI1ïµÕm¸òd w(¢ñléPã’z¨ŒÍâ#=azd ŒÝa*!xØ„‡¼qÚ ßðªà ´ ø'°»Xæm6cx‚nÿœTÔø .nØúàeæþ·êßµÓ±g,BEþ;ý×5°¤U·U–Y³íÇ Q³—d‚ b®z««ªãkrÚ¦xçütôÀßtTj<•¿ä¬û]¹Ûî(ôç~ÉÝLÊöîŒê³üëÀ¢<‰i0stÔ§…LÇ:(¤ù„—÷µ€Mš½â¬*M³¢=Hª fsóiÐæ<ÉήžV/Áÿ|à—ƒÏ|ÔÃÃÀ Æk7>j£´WýDá Ø´±N•1OÛ°Ò«¤¤kf7Õ)&½ÄƒY?¯ØfÙxhñA¡:Ïß‹àÊ,5¯¥;\0#å±á™EÕÞÁâƒt@X) ÓtàvÓ“¶×—SÈ$ /WÅ„P€Ä¾6ÐÂbIJÃUª"pî‚ãHHàš»G‡Á|JäšÅ©¦Ö«ÀOÜGCІÒÐø±Ù?átB…üW‰c Ó±=/ s3‰d$†µÛºzžÄ^.µÄÕìˆO6:ÿá÷Û—ù:•û»\Xàx-•PO0bXÊØq§ùÆpáUJÞѵ +«Åk­O®YoÌŠf—­Þ{vt5ýS ŠË(d ˆdR}d¬Èl2ñ³ÿŒ‰&mÌ õ‘}ŒrLIÊmèÉd0î³R·z*@Ú„2FDL̰G‚2)t` ‚2£/iÐÌ€¼™8ó!Oö„ Ñà ¹†QFxÈ›ŽçŠx‚Qp2/µí` d+æDØÀÝÉö °FO˜±ÈC#š>•|ÀŠ2ƒ _oŠ úÈ.1‚ò›ß Ñ€&LŠÖeåpAÛ°€@R ÉŠi‚ʘ!®1`2.!Š×½í$1Å2÷™ÌÀ# ¼€éZª¨]´è½†‘ýCw©oOyU-ótÞ‰æÖÇÀÀ«%ÜûpHåkëRTÌüBýçn h@.Ey±û«•d0:š(Tb™yùr•Q Òsx>š›âÑ$r7ÏAËELÒʹGòÃÍJ½›d÷û™×ócÿn· zF€RÈÍ…ôZñì‘ TQhª6¬XŽ€!L|&XSÁ”7ʆáíÚ¬4ö­ÌP렅 Fïk×ÅÏ'¸— ËÚlu­;íù´(e¤?§‹;•cði2»—ñªb‡8Þ &Ýßéÿ7¼X©ÂëÚ8Ð(]F²ãîÕ:/8ˆ?q¶nBø¸ñã"*^ñ»¢d P½¯NOcEI樆B!åöÕL¬ôYXŽì`7g#;+tóÔmZ¯o9¤®ß]$‰ DUœä—€Ø?®Uh‘}°¬»-G Óâçðþ¨ò°ÕvÛÔÐøàí±&FÒœÄïe‹ ļ6ŸJhgÁ¿y³*(m¯Å"5û–›LÌþu[¶8O·›%Ùb¯y¼‚Ç ÒWNVëØOx°Î¶à•m†èåÌ—‹‘Ž#PYhUAL§û]ëa¹FzËÃ;Wˆ2·|©Û±ˆä‘vÝÀ|•ûÓ n Óiû¹Ok9(cë¶ë•å‡zT§ŸHPìød±*ó‚½á Ј f€±xÂx[2ó(b AôCâO+A¯è Ì ¼‚Lœ²ð’ ¾=­**ö{ei)_ ÿ÷R•ê£0o<¸`_ŸªÒj…ކɿwèÒZ”Ÿ‹ êû H± ÄfÊÙ §Û/öæãíyå’vãã¶=Ò¼X;#ÖV(DA.GQ±"o܆‡ø|ëiãx+ü);‰ ºG|êF_cìý¡"•y½ Vü³.à4#ÛÆÁÈ"†Ú ‚w6\ÐÖwÂØA/ ãÒwôÄ,,‡Ž oz¨3"3Ð &‘B;—–œÔ"<æ,TDmì´„íß+vŸúžª ^y¹P]ô:Yzؾ Òf÷v$ßL~OLaÐõò¨äPlŒ®Ey«b*Ÿlm>!søôú-®m¸ü*–ÝHðuJØóŽIyzüXr<2ù'?0 \9 ëóˆÊ—!Íž ÿˆÙS² †8O0âkœ/3‰öAA²ö-r’¾³šà4ÄÁïïMÛŠÅæÊ°ËT¡.R¸î„'~³áÂðI÷uw$Áݘ°EJ n} Øˆ „ã|"#²/°ÀÌlgàÆ¸Ó@,ý%\ì°×–ñðXV¦Æm‘d¸jQÔuaý$Âý-É~„=_ˆ%jÑ,ZŸlS0"2^¿R›v3®D†Ñ;<·ñL¡Ê\5|}ƾrÞ!ÁíšAõ’ôbäˆ4„zió€›yáÊ×ÑÞ¨H€Y«ÃsÀ“ÓÍÙ¢K| ^ \*A4‚¸ +þ|ÆG+gWµÈùí“å:ð§+qG†éoðµßUÖÏ úa:7@äÝ>ŸöÆpÃÛëO½GQ0±ü/ŨN(3Á©œ#oÄ›âFÆJ4ö¸€2œA›Ûv—l`Õt,±}ÓãéLAðæÝ¾ßn#öÜ,#ns{:±žñgPˆþςŵÂöÉ.ÆV8c‰YaþºÐªÂ{ƶLB¤¡ °¢’ìA‘àS|Ziõ½xGÉwÈLÀþÁ€Ú@÷©Ñ`ùŨƒ¨_¹ @'ýƒðdÇä Ü5Þ<¤{Û2 XE̦DíêÃo€«Àp»!®8 - Mç /i` àÅ ª•YupÑ ôÕ=!3³®Ï®«ÆÂÆzBƒø ™üKÍÊEY–*ޠߪE‘i·5Ö~¥ ÛÙUØ1%kbÉb‹•d(šNІЮMoü‰.àSvÛ•fb›ŽBܼ ÉôÎ1&×Bëz_ŠÍ1ÆÙƒH ÎõmÀw“Ó•Ö\ËÆ{Û«úÄ0zŽ2ƒYØ8âB-ÞH ìœÈl9XŸ ³µŽæV¦Ø½à († KîžÒg˜×Ì@¡òu+ì5…Ç8@~ûT™wžÿ¸Žª)¶·ÿ)-  {ô—RZ<HÕÁÌ•ÜÎãý¡N´0‚¼´„/„¯Ü@P*9檌6±˜•ÁCì„]±¨J½ãê›gô’s¾Ù @¼Á©.‰y,P1² Ö}"ÝO\age3÷ÊÐ6 Ô=Óae¬ó­ŸmÄ[p’#C(­¹ŸÍ4$å0nÊe>AR CŒ`³Á¯ ¥Ÿ5¢å1ŠdvÈÚlwyea4w’)r弎ÏX@N`µÜNçÿ}yT„‚Ô )8¦2kD0ÇwÔ5ð†ÀœÎ­tŽd2TÕâÌsÄŠ…Àþ(FoǰH¯Øl7^.ÐýXvÄY«\3ÐVlE=DZqÄ×D¾¢€.$ÙáQæ:wZ~YälpVZdtñÉk«MîwJ’Ì÷o*‰“¬TÀ»ÎI_TÞ`SÚCñUÚô ~깟¿£õ©˜[£’øÛÕi?#®Ó͝P;áüí~÷E]úmSöÆA»ŸüA„|ê¯Ø™Åÿ€I¨DâÖú².¢9?rÞÜmý€ê‹:N&Q;^²W/ïK^SúK*Òä\$ cIÏž` \ŒVy„ÂSÈ[Ìdñ]L×BQJ‘úàî{Õ[Ì[ñм¬ºþ3mé s0-ìÜ*¡Óð`«múßëÎ]ÿÝ“ r3ÍÙ›PÔûü„ eÚ¥z¢­ìúmþYe©}øî_ÑeÍF\r²>Ë~´î®]=×nð€{—ö“‚ùqÏ,­]ëY~ÅQæž“‘j‚̨~¦( ö]âçdÅ(‘ Š°89\¥“éŠââeY½òL'ÃDØo§7V}# Ÿç¾ˆzJ^(áÝ*ι%…d®’7¹‚Ûbž‹~F'3ÈüKPQquËóƒ=ÉÙù¾Önñ£EÍåf%+m_c’ ÔœµŸ‚úg<Ы–ÏI¿3ÖUY ¸h{ÖÚbÈ+¡®ãiÏåúbà$Ú£ãOs}u»—œJöËUd޾]láw½ßy•«ÊwyÊí‘·FÃ0j+®óÛk·Šz ›c^6Úä¦ì­M‚f3mnGöqØgº²Í<­ª„i3»G+áÅÆo-þRJ‰œFCn\>¥»ÝWà^gûö)Óš Äm~–ÿzáËU"–쌥æ9‘ “k.èzöì<éF¡ZÌÿêâÛÚ¤'‹Æíöd„…Î TÉç™IœÉ¥·ÌX ¼L»,ðh‡é. ¹Qû”0>Fíö* ªÓ[¶l# ê‹_!f\¿R¯û¤jëîß×1ƒ@øÒÐé$Ó@ö>tæûe-rGÚ ûPŒ¨LÌ&kÂü?æû‰8ŠësbИ”0³²‘+ý¢?A¿0h«ÔFºa”*Ñ”yú­÷¾ƒäê??Þ›x}GGÇ|wZ;·˜Ô·¿´Ж¡Kæ¶9DÕ17=ª,—б$}åƽÏûM¢¼:m«—â·ái˜¡ÅwÓÅíþ¡>“ê¼K¶O§üêcÙ·_XÀõÂ6’@MÈ[¶7æwx°S¶ul aÙ³NAÄÒ„DSZÛ# Å¿È;îâ²à_vh™Ñ BªY1ìêTÀÊ‘5嵘ˆ–ÉP…F ø3Žm7ª¯3~Øúõ¼ò>1_0z)ÂÚÉu Î5²°ƒÖ†Î¢®¥ËéX&3mãõÒ==z-a’o³÷PÜ1HŠÚQ çiþU.³ Žº¨).u F·¢]™ÆÝ…’Õ+ªäô9v/; ä7C»RÇ×µõ?Ma(E¢šW¡òµèðŸÐ=¹$râZFO‘–È|`€ô ÄP5ýÎ`ÛAXœqZ·}P”GRY‘xL hãºX¾­á;Pýô¯&Ë•œwPî^›)JãÒi.¯tåç ˆ¯Šsû­ºqû”~r _ç˜sSÍ9ß#êò»fÓ‘d¾_“žÙ×ÜðÅ(ù4öâµNâ«i¿êkoD³(0”4™ëÄÁ_ö "£ýlpS쎡æÕ0[‰_no¸b5Ñðk¤Bß@évÔ‰,àî(0ƒ$ü'š ¬ôl+LÏ×µaâƒÀ`:ÝÞ74H-÷à!ÈOc诠,”™’Ù}ËÍ"~õ³ª–ÆÞRDÓ!ò“¿0„îDÜù´'›kø/†ñ…Þ8 ›OQú$ »DšÂ䘢bÊþÄMYš€£ñ!™­ØïÑ™§ÊM”3XQôö…7Ñ@4VAôAdŽ×±ñkQ` w°sÃYç ÐïÅãJj-™–ïËÞO:ýz”kSœß2l&98SÓèäâ{¼<$}o”gÓÍgR7Õ[ "ˆÍOƒV‘DÊ÷‹öSHÛOØ“J4eÅkä4&' wÄýÈÇË[Pê.úÇ/Kê#¬¼}³ 4 ´{L_ðÃ_õ©§ʱédx¦‡õ*iì1­úº]^Æ –çά¾kuje£Ôúí«¤ö¡‚‡¿-¹XxÉYh¢Z_ívsE¸!Š=|Ù=Â5˜œó!©ò_BLŠYY'»~¥RY˜ñêÎî>þ[{8”¶õ’t*u£Ö/;\øo¿¦6"­™ÿº_J{öX„Áôs¨´àQÓ¢#+ŸàF3Å-nðßp­wù{P‚<–FÑD‹Ëêýÿ ÄÜÇá^¿¬\”…ÕCyÞT+ªÓ= øØ{ñÐgõ%g£6YÓXÿ¼Ü^Ædwß3I<Ù¨OŽì‹ÙðÆC ø£ÇäOí¨ÁÄ“§á%]h€Ø,™l°~,zà¦éçŒ{ ¬êø¿B o„*°€ñ·´c=Ïvƒ Õ”ún=U ,;j=Ð2÷“§ÿB…mbM;nÃrþQRd/ÊztQËFsý•Ù’È6Lr®ôñ§ŸnÀï×;ÉÆU<Ý´ò‰xÖgžÝQ¢BW<“ÚÀFûPíµÝúœÑþaùRJ£¨Ý虬° ûÔшñÓ3HÖ îõ fŒÊ£/MÛ,››Ä%0¨µÃÖdN#5”Óºh=&B‚tóžËTöêl fTTy]éX7=íÛ ôÍ´ÕD,‘Mp$¾úâ6zÝÆ¼Úáú›…&zÿ=]mÃ/ª8œ Z‹´DiÖ[|[Úéµh£ÛŽp߯ ÷k÷$#ÈóÀèò\pD³Íf¸Éh¿ÈìH4Ðà $gP‹í¡ýb‹•œZ¯¼|FB›<èWܬuäñÁÔ<';þkñÁÑLQà@=xBn™¬¯º!ñ¾wKÖ8ã:Ѫd€ŽÈSayøâ¼Ð{*cõ!Ñy5á-1-ûG(ûÖ¹tö$òíùÁ<¾vÑ/ù©»Ç’D/ÀÛ~°y%PkÌ2•@ò‚Í2Œ.kF¼‘žé­¨oP—cú‘‰Sç¹Èx–Škè‚ÑÿÚßwk赕›}ãÁ²±õ¬nªn>Y,žFû"Ä4ýÞZò¡Ì9œŠU9Û«¬ýè´ÔN§1‡ ¨z¿]ì_R–l0<úZÍ"ó]&Ï8š¿»©¡Öß©ˆi_Ÿzd‰*ÎËùVÈ0×áê!© ñóUzEÑÁ´0R 9“•l³#­ÓkžRcûñGÎá_÷tVÎULùÉy®þ>~ÒIæz~ù•CGͰùzOòú|èVó+“ÈF5TM0# Öñ·­þŽÓÄøÒ^ú¯nÚk^ÝÂM²³ùxXʯúëçÞcÇ%ˆŸÐ“zdÊOÍ6“¶c æ"´eôšø½È‰(ßi¢ gQfkb\¨;€ånñß¹Ã&ArSvkîî»Á’:°¤Ï½y¸±Ú³ç É€Zw¡AåeÌ1K¯ ‚h^Ћ™“JÃÒ<ªõ”Ýâyä |ŒÓúx„“‚¾6úL2Ãe³ø{S|< Ò|%£ ,³•u…<Ë„€ªÛœY/OÒ 8 2<„ö;CgÛªaT–H¯ R€Sº^ÐÚA©ÅBÚML\Üèq 2»ºX÷jÀU¦ðšóŠ`§I%MQ‹€´qâg’üVh3ßÃÐáÁ­È`Þ8,:ͺ”¿Ó‚3mÄã…fÕ¯¹ò¶§P"ÒÊ8 z?ô§¡ ÚÐǘœ†Mt tÌ~p,ⵌ^Iük+,~å5j¼Äå^¸ €‰²t5™¤»hHD ªÁvJ7ŒH CŒƒff\%b°ØÊlÑEXø¶;;-š…hÐì¥Éºõĸà¯MÚÖRØÓ+4È„é£ûôÈßX$Fü;âHÌ‘dï©ïø¼:o8©RÔêŒjƒ`(÷îåajvôñZt—÷¸£/çªûc/}>M…Z¢Øa”ÎÚ«žòêüïÈ_Êb‰ô GÀsK©¢ª³[®p¨%®DŒ³¤I09„1]öäÐÏ¿¯R^ݰíþ´ôRÅp@G¹Aï‰ÑðÆI§‹tì6ìKaLØý?ãøî1›ocÏ„)Fmçê!N*Q‰ì[6ÀýüŒücp™û"eÆû݃¥øŽw‘˜ºÄÁKÈx®ö†a‘]$RFDûÕ¿Ð[‰óEáÜf¾+œ{Ê?•M‚]¸ÄaU'WG¶´ˆ™À€Hyйâ4&T}èôÙl­.QöÖ¡Q)MÉ£`Jžo3¢ÈÈ£S[ºü9B÷7(?øØUµ°9àÄovù t6r‹¡r–ܼNm¶ƒ#týg»Ë¶O =Msó \‹„Ú#:¯ü 'wcÄžÖ}þê+Nq;dš•‹oÜaŽ…ZUBE#¦žãªŽÖœùW¶ÏãYO³¢þ测Òäx•þfCмñŽJ2Û©hÎQÀœ—jpIðQR7¤Æè:¹(æ“Hϵ—ÉÝQÓ¢Xõœ2:÷E•éEkí8Qìúo’>&¹yÒ|ƒ€†[+fªÈEu‘™þŽ!ð”§R°ógæ9ÁûüÜàÉoÎ%`W}²É¡kBé+•ÛqÚZÞ&¶¥÷ݺ£V¯¶æc¸ÊU®VŒÃQZt ‰–Ë*[¶íj…ݾjûùqø?Kíåžµmûô7¾In!kë÷\ÓëâÙ|}Ñ›ÀæLêgªþ©VÖ»-]ŠÉ×ã()¢È«(Â0Ï“½vÑ|3¢Ö ¿Ê)TY¼pCé2ê2Ðó\&Õ¿òw^]ÆZaÂFªî#îTùÌS6ªÉw=j…!„.·Ðñ àã2ÇýQQe_Nv×ñ™Ë¹.j˜ 8¹ð— á2z_ÜÞÑ—CISØ›†Ü‹àWXØS_Æ\k×è8'xGd€ðˆ¿¼cÂ-Råh€ü³D-¹4ïöXÞ¤ Ê+i ³ î§N<Ô¡z4¹ëÚ§|¯Ö‘LNµ€_ü½ý…ì¿À†os*(t­`¢|Ï´HþK½OÖþl¾¤¦q'T¼(~šõyh>¹É}ŸTÕ Ç¢Y-"ÒdFg—±œ‚ˆÄ°N¼pƒ¢¢°{$/7â|€Z¤5óñ?…ã"æ$‚NJà H0iäƒ:¬½E¨‘©nïÔ[²ìR#/ ‚hÂO!»ê“xqÆ8Grð©Ak·x³R»¬#‰t|í•É"õ~°Áº’O¼þІœ_Ü!Ìt¶²'ôü[áÈ~ô q’œíÑÐ$&AÛ—®õeÙ*a°c®œfÂô˜Çá7)®ÔÐ6²ÚŸ hz¼Ùþvázõg_„‚ûTmlù âô$Í›Š2T·Œ©âñ&gTÎÔˆ<ñP'`JµñƒU/zyû=ú—mg ÍHYX>¸fÁU1c5æþ§­“ÊAúSÜÃ-iº{õ9 Ñõì¡~wšu3Šì‘s8ÁœõBÄgu´¹·d¦ú±u΀nÙAî“X„דá·$Ö/ï@ü•åKxE€´P8A[9Ãü“Ì<_fû¹Á<¯ÓþÏÏ('ÎÜ~êŠ`û@öÝØ„n°ÿh ±¶sÏ?­áðc*øçýëóÍð®ƒÆ˜›VŸJ €pR/zbgNcÀ >ûò½0À«gü™×§v 1 Óò‹V߀º¿rCä7„Í+ó>÷8©ìçýòT¥>M7clÙòîQHÙ%Khxÿß¾bJ!t‘úF-¢©sòÃ=ˆOYAWƒ ÓÀ·å„+"<€àg£—V!¶YPš¯YNw†!‹>Ñ£æÊˆÃ¿,ù;ã*G»H!GØ,D»Ç¡±n[ÄÖuÛD´1 'S×Í6H,æ>ŽK^&¿ŸîV¶L»â˜a±ë~Rva²5ôº-'KøÀúîÞ•À:éìú8Û+ {•Újkþ†ËgiÛ´ðÁcÿ3Ÿߡʮ»!Þæ=[¼æ…¿¦ZçýùÈ>Ã`9”³_¤•ˆõÔUth%@¢Õ܇……"–Áo‰^B·¸‰ËzèôFÅÞ°‹±H$ç¿v™¾Â7ü?ûhWnK°×ÝÿdÀÆŠ[É}1xÞcIÄߺŽCŸ½1ùRe‡”,Äÿ÷óÄO]"õÑ\xéìÈ"GˆŸÊtzþtG·?ÄN¯Â{.ÊŠÇcF O2—Ç ÜEý¶W[P–º€!Y ]tSüÐùxPhÝöÐ_½*>EZ?{áp!PÆ å¶ˆ@Ëš}Ô’ÚPWò^]„Ʊ…>±X¥övp6Þà¤çœ{„ •¥m:y_C/ ãˆò !ß“+ÑX ìÆõ~àS/k¸|ÚÚwG·¹2 -@2‰›©âŽÀá?TCßãOÁÿ Œ]ºqWŸðJ h='¦cªk=Ï|¢€)d²õÈlð0}M½ˆãåÞ_º }û‚ùY¤p ¦<Ž>ek]$TE"uò|j‚‚¨Ü9‰¯phýñ²åí36бƒî«âH¬®Þ£`åtÃ4×'=JÃWÄèXÜåý‡÷dMÚÄÂ’@Mî7ŸÄïvKŠpñìNÙŽÿtÏçWàVŸ®)zõ¡óåÙÄO„¦H¸NÀÚJQ°²_|9C‰©mÜ6þý*˜ŠÕÿ>³-9@ <å7"3ÑS}Àñÿ²¯;s~åF‚b»ÙžY îz±IÙP§ÁÊ üªšó õ´Öå£?^zŽƒ›AF”8sÎýçö ˜Ö[Ò|¸äUjz»¢Ü伕-¸±‚£Ïðº»ô…LžiÄ>X5N¨T¡n9æ+} 9xKjÒýÂ>-#^óa²²¼Ô¬âƦ<¢ë¯¨Soð²¢jvSóå$F¡o…MÃù ‘àœ·´cðå ª Ü€`nÓ9dn»¿Übä³ ƒ4ÁßíÇDJC¡A¨$mam;©¿“‹ÄR`÷⦷™Ù¼¼W™2ª¶‡—§%Xæl9,áFzhçõvâ%y+9°>„ÑdK‘ÿƒÓrcUê_…é”ì̈[FAPQ™É|éW|[^‚­³ž ó ¬ñ °u‰R¢Dö\…D ŸyN:a‚K¾ŸÑðZX–걟$ù“l"ÿO>˜Àøš¤Ér[Ós2÷ñJª÷ª’>O¿žª~]×™Ÿ*_¡>.W" »EN»]<äõivÊÍã+í{p7§¸ž:Ÿwí!¿"*¹…ÕSJ–©^¼©&tÞë2‹Qn ô-w.MÚt·±j»«G!xqõôÉÆöVö’rÞ3µ{W^’O¿¤®g°ú:ÒÒÉ1‹ýLÓ`Îf¤›E¨„úrÌ wÒ™msF©B¨²K“é:¶úN4ÛyGÎûé4·×šràãFRˆÝJ;šùC„ÙDæDŠ:þÆC#ÝÚ£c¯Ô¨õD?h¡0eª¼A+œ6øóÈݹxsóи¦ŸK žäè¶™ÝwÙ|$•z è›À‘-ÄÝÍ•%”esft—¤þ\òzXÕ\®Ðcqèpq¨¾ºÛÚªC^‚Øô'KÅÈŽ@Ìß6@‰‘brñŽ+ @0Þ ß…ð=Ðhô¡Cúþ«_jj-Ÿ+›+W¤¸Þ°iØÏ¿æ:Q‹À ÚMñ4i€Â/¾07÷Î|ŠÍbÆÑníÂèD·l±€ÆhžðŠ"(”.xaå9BÛÁ?E{.ñÝÙ ¶€›¾™ ¨Á‹)v ƨb³ï›îθæÈϪcTÐÀ0OˆR'LC ;£U{–O™!¢E¢¦â¯ÉO/2)`ènàì‹å¹í6úrSâYñ>J‹lšX&YÏÆÙÇçðn—2ƒüb¬Wv}Gì킦jä‚©ÖÒçtK„¾¶þo)½ÅÑh ”I¡P•Q‚Z´ù¼ºkà„æ‘“ÛQ-a‹Ópÿâ\øCñÚ>§q^;1?„K×p“Ê‚,š-ŠÊ‚% b”m¹ !³D¦ŸÁøYå¹°‚çj P4?¿7žÄk þ¬‹lbÂaf†rNµhä•çJÓ³zZžb˜g3ùòVfî•ãølø‘øYïA·ÉŸ*|! ý`ÀåoÃxë¾/Ý^Î[W¿Œ•6!â¡ðï![ˆ„Y`ùÂè™õ;ä ¥í6Pj‹î¬DGêWٸæá F¨”p4` S:TìAUM7Í2·ß$#[Â+b«7lß  ¾ÃóJ숧ÀÑ™°ÿãšRûJ–=_ƒãù¯Õ'AúÇS=o¸õ Ÿ‹z÷\šìïé¢È“×þW?¹ 8/Xh¸‡Ù Î¥UWoÑS/e ¬´Y“â+˶8\dkyjñÊÍZ’*i;Äz”k8wø/õ=áÂÞå¯jß>9¾æOeèâ.ŠûGæìòü™x2%>ñ§ƒað3`Ò$x^÷šò4 È™BîP5®Ñ•<­û#ÿPk¯3ößÅŸØËy£˜¥dz©l¶%ýÑ ¦ÌÐÃ#" À±¦ó-B|¢øöYoêsqÑCá wÎ £”Æ5 ¨îj‹Q!‚gø£Mx レ ,áeo|ÎXëð~È <‚¬Ÿ6°&)v[ó(J󿮥 r?f¨ð(ˆfNEÅ”úD‚rh‚j\Öz˜v³¥Ûhj“j±# #Ê3úÔH`úÁ˜/]"§\¼KhþØQÁzœ5ùôÜÔaâó‡ö`ÃR™–ÐàöËÉ€¹BóEÌóÓgûØ+ï²*_ú*@ÃòضòDd&Q§¡Ê®õ%SäÜsâ'Ì%=Í?ã|Æ«c%ÿižåŸÐ§¥CqgûÛ2D"cG¼Ç?¿z´oçÑbhàƒR =ê²&*Ô(‚ÁáyÔïl#èDÛŽ¤ï)´ZSõGGéÓTOÏ!zHCÇ™ -н*$ 5ÅdR²–š2˜> > aÜh{Äù` ÛO(™Ky~çh †¥m…†h¥ ’„]v+¹ê¯všE"Ål Mc‡2~M'ÂÙQ>fŸìp…¹Bh·wŽ#ü·ßÃy(8V˜,*ùñ·w”.æU7ü–Q»åõ¼ÉçŽ2KGÓwŸÚï‹$õ;ï /JÖx…”×KkÓÛW;ÓƒZ¬$dsÕ+Ø\gô\#5*¢ÏɽŸ­•쌒hnå·öyž|ä¡jû:\ù(H¡›ý€íeMe¿?€Ç"}ÆÔ¥1Pðƒª8ä‹;/H+b*ÅY””퀨%ÎiNÿlIù& ~ÈóŽi®$wÇxHŽäd +#~4ñ¤÷ð³¶ëÙ {þ÷Ïãt(|]ÊÑ"ÂwœLçù–“ójI8°Ø4 ðäqŒƒ|²åÝ 6ï¶1-–2$hzÈ%ÃðÎ…î±R1Um7^,A®Ð0$´GÑ{á½ÛX«ŠÄˆ%²->D­³¬—F‹®À+Õw±«°z4HѪ\h‹†dÛÙp¦g€½tGß#=F+Mri_ÃŒ*|Ê¢ØÇq]B\½õ–Iàr†ÓNòɼ=·)©ÀÑÞøj×,˜b*üþ7ëÐòíMWŒ c–*¸q€jSµ  øúžú~¾vMh¹¤(Õ(¤’¶:”C35eO£‚¢ œ‹Q‘LyËÓ5aÙ¼ã??;‚ùW·@Þ1…ß @3ÁÆL0o'=;Ö Œ^9}N—lb”=`~_›S‘¥´kŒ0ÒÁd<–P‹ý‹ß±¦’|&iðØäKXà]j%ÂÆ+­„é™îÞ(Ïasöý¢¤c7)^k–c¢©Ð±§(#o%¸0ÊBc´jH¬ é)ßm†°Sî&ÔßÅ~ˆ“jîŠ?¿kó jÅÑ%‘Ýå§Ø·NFÌôÅýEóz×kh<¿q]”f–Äjâÿi†;­3>Mñò‹´»SD´ï ½f‘öÆSÞÃqJÕ^‚×(Šà¿ß¾ÔãŸ2Jà݅зvÂw›¡=^‰„ôºÜ5 \I 2öˆù¡Tû™ÂÝdÃÜâèLJ'‰©Ëa»âO¿Db¸Ú¡ ÜÆY‘otíT0ê¹ÕªÛœc®å±ßeè 103, tsîùC?7°qYë¥,¯œ ÉÀÁaº«ç ;©%BÃ3šŒõ³¥]z¤IÒqõïÀYŒý–û6’£°#:ŸÖÓËÙø2¸sÛPÊ|ø³¯qS9«D™Ï®ëoà]V'S4C €UÁ+m‹Eâm– EÔLó'ÊNYã(“†ýÒ¤M­WÔä@`ÀÝÀlI7qåè†ûÔ¼Ÿ¡Ð@´\F¸`œßZ´Z Ö]ãr¦©G?´Tç¢ÜñPÝi«o¢ãyTYÆcÅÅ£ÌoëÌ*À£Åvj¿J¯^^þ?z²›ÙR­±ˆ¶5bªŽUú>°MÖøQ @Îòå‡ñLîRìÞ3lμûsˆ»h[0=Ký¿?¨Ò%»7ìÙÉ4c^‹¹ g =o®è0ùEÌÔQr_¦T>s¯Uæ€ >Úô1s"ûüCaüÞNrÞ{öc?yaß8û0«D÷DPL©.Â¥_•Ä-Ú1Y>»„Z+÷²ýv}÷xþª’ÓabS'(pp‘ãiæD>@uU»ê¸‚–ì 4DÀOA€…ÏñãÖúe/û¯Œ—\‘&|B€ÓcGçÑÖCSnu¿žtŠãgPBçRÝe«¯=µÖÆï?Šƒ‚ó ¾Ô{l¼bݨO!^~é>÷üAoš ¿ [› ËuO¹6Ýj5Ç U…^DPíŒþâ¡¢øWñ1í¨¦:†ª6Àf4}Q¬ÕÜPOŸ-œæãèë׊{.¦¯ªˆ`TBim>¢Žþ×0cöÕææHت†Q¥joÙ±‹s)"tý``ĵDSÐ ˜d7« çÇSßñº•ðZzèÞ\Ž}!'<ͼ-‚É€­¸“,¯…œìLá(újŒ‰)”gØiÒzÒ—âRž‡Þ Ä¥îþVJÔÎ<«-«?w±C8¢„Ó 1"ªeÃ3Pö©¶C´ Æ$¦i+Ã…Õ¨ f <ÿÜù»ô6ÄßäÈ0Šx46Ë4¼ýóp$làD*qêäÿŽ?7d)t<ÈbcDÚãÛêztEK¹ñÃþ„d‹òÕÉT&nlÈ w왈<$K>€'r™GFÀËEÜnRÃrHjÖ7;žlÝá¤}夷R…x‰f³a´HÆ8ëýBᎂD< ¾ñ#£ÜKÎÅõ.þI‚§†ñs¿p¦1Ž;¶¿ˆJÝV%~ÑCݸLÑPÐ@ e>æT~ók0é…)ßñU]Ƴ8)žÈ×§ÇØ‡iï¾á¸Zg«6è==Y‘,HÊß"8‘šáï)Ê^' Ô›x“KAt įn£,÷Ž0eFŸuÝš ¤.Ërl4²é®†6Õ5í^ûú±“ötvÊ!Šq^G¹>æA £[fÀøjBðºLí‘–3Û%&Ä«J!"6,©¿ŠïFâï8™À€t ê<½/¦Æ0 xõ@á!odqy‡Cøž÷æh!šÃkPž°ú8))˜µðÞÏçSrbFG ¦‚W„©ó+\BÐg.\tD èà¬5Ð;;w4уªÞQ(]ÆÔæðH¤5da˜c«‚)K—ô¤.)﨑šÚ s-»èø=w•‚öž{{c_]UG¦c7t…XjcÝ®w/„Õª‰¡átðYç¼ñ¤s»˜P—E]Oê£â |šôUS¾i¶³V¿·?+#J4&Ã&Û¾¿©9È5as>i „z¢†=ò›¶ {¥Øù©Rº²åF‹Ï°æt"<^°Ì´Ê³ÉÇâyÙÍl"=f5ÞC+½Y•˜v阘?+!‰#ÛñºÎrº Ã×ëUy ª“˶þ¸îM7â‰æé²­}G›àï²*»ÿu;^²Ã¨‡&á9Õ\ZbPû„¶&Àc\cÀ s‰þÀ€ ™‹d%žs%Àšm¸cE…HgmR¦(ã“Dçs½Øyú²q5¥ŽB¥H†R™’b ÉjÙ¾T†`ô8€SÛ!Ö•†míúʸ ÏÞºxט %é.ÄyêùJ±Æ±õ¿¢}&~ÚüH»\䏨âÂÆÝJn9.ïèOE 8i;¢Íê¡Dål±\LãÙ_žH¸ÀHñnƒµ4Ï–0»¢ žg¥ëÖâ$}º“ÍÈQ\xíŒkZÛàœóo£®&Ðïír´&×éÜ<ªÊæêÚ_íÞ¬±?]´""½9¸;þ­à’ž¿Û®5‡¸0 ÇS1î§âxCsĨíosž'F÷ÜÚXhJdU/·'Dóï{(7Æx+ÐaN Òêš){·«ƒ3¯¾íEsÍE„ÚËÑ~á<cdi=…öO{bvàk´æCyöŽì¼_¸Man¢ò¹Ü¯ÄŒÉ7Τº¤wR+AHþ"lwOãÑÔŠ]O Ç¢ƒ¥ìK\ã‹5hÉ# ÈÍJ–4‹©'÷¡„Õù ”Ù ¤ôÉ^*¦DµKþÃc· ÕDgð—‚¶[E~©»ùÄ'br] `€à‘²ˆØ›É25DÄ3.(¤«2¹jË¿‹ ‚šú˪6È·ÿlh×LJ7W71žž Ÿ^À0™êO»WÓ¥_·Cž'#{òp!Šî$M|®²g†èLK…r“p“Ëßæ%³a%SGÂ÷”kTÅßôø*UÙÆe2M¾QUEälŠ#ÃÖ?þ¡’øµ)^A'Þè§ Ê°š–Ï·Àv(›&ó88Uðæ /ʽ,bªõRCÛW³å0ÛèËY!ÇmJL;)oqòRdm›T¦fÃÄ>&*Z-¥̱ùgà³s z~êÒ®×\x-tf:¯Óèµe p(¤þÊóÕ5ßðÚ´ÒÅŠ~öÕð…ã|\X„˜û¿Ã¥ÜÊCnSÑyP>À \-;ŸžéÂhà*à Ô:#»`2?>Ç·‘áƒ4Û‹$¤8^p²Â`y< ')âOßZ í~'¤Ç맸±>qóØ$¶S£Sá<‹½9ýÒ¸vDàç;ݬööjéÎ\ò•)ÕYTwË k8vµÔJÒGÕ¢–!_“‚HOáVó¿K°¢xGçŸî÷¯ƒ2ðŸÃ@Ư͊‡eâç4¯˜ÃWz#6 Áˆ¯SúIÿrQ/Žÿ¨j¯ç‘žá [(-—ª>ë7úë\åš°u|Z6çq‡„Õ+——ñ¤ ìÆ>ëÁZ©¸¨%$\ sÚ¹§‰o¼u÷:æšÇVÚ(¨±½ÄŒÙvãÏgPßz.4Œq..r¨3Ï^¤i·’î-° Ù™aÖ¡´ð©jà®H÷yÖ:ˆßm„âñ/âVkde=¦O©D¨9Gш;üei2îü÷|y§ö9ÌyF*dXùß=-²“Ô™W±ù$‡¨ôJÛÄUºÊÐu¢>Îû]Yb}D{EÞ?ÁY©%ësG‡½b޳ÊJ6Ž¢¹q fÓtq@|JªFïeb'ìòåøÜÝYãC„™á$*ýÊöU–ÀŸÖàÝÛT–‡›‘Àh>Ž=ö¹1vžð–mõ–«eUhGˆýìdUíIŒÌÜ`ŠÈùˆ¼‰‚RNvÖV ÖjlæOj@œéêýµ…°gÏ¿Ø* §yˆ ÒNøh´Onƒ•~ùô7-¾mÊ ‚’ksO~á½oWíÁ‡;›“¤Å±…½KF(ujžDÓ|S‰™Ú›õ«Ï½@ìb÷3^Î^³ý¥f$ðGÝ.Ÿ¿³óW%.«Å‰Å6wüfÅU‡Þ碵¬âμÕCØãÑ{à j~g¡>û#I?Só4ˆvû²EK¡eH¿ÑÐ ñ…8W},Ü?¾34ä*=5—„Ž$:ðÞ·Wü•|Ê…ƒ…!îbÅõV˜òWÛ7{OÂßð?=ÞA»’b˜rqò©ÒzCS“)íjK˜cðÚýþatËŒzD»h*å;Ù²î—Âøð‰Éál ÆU‘±2úhùé*Ê‹6ÌýÚn/ûT˜?Š‘Oò¸£¬ªÞЊ»~fÁ_ºÖºg#e{ƒQvÐ.½tÃ=ê\‡”ªu=·W@å­_äÕIu\ÝòîSpžYî?™Àš!âì”—Þ¯*¡˜~D7pP»ÚÞ“ÛÖXèjs-í×çV–¿.0‡ÞíeÛò‹³t½£6cx&õÛ€À^ïG[æ°j€¦hgê‰ØÕêÆœñ[`ÝÊg{'¤½þPìeU2ö͵HÓb2­èg%ýG*ØÑ2û\õ×è1Q{Û¤í÷ÃyVþ[€nÛ÷`W¥ER^ˆjF´6葊žÍÖިˉÊÝä¬ÁŠïwŸWôæÐÉÐ3Š&q×Cá‹’@¼ó‡góýê!Ñ×{ W¡äÿ¹WÚ…D˜ø“o |–“*vW«›ô¬¸5Úe¯euÕc,¦?j¬.M V¯¨Í\»T</"zW“¼n9/’!1ÓÜÆ©:òlÞ3 ß´9˜î®mó‘Ç΂^ÖŠŸ®ž: @ãñÕ•’Œ–úÝ¡¾ñ’ÿ$zq­å;N6·7òjn<{òØ¿¼%?~תÉÀ­‹Ë6ÿl§“ÒuR1D]V§ôP/ÿ£í†Ò¡ 'ãTüÅ)ï¼@séƒo ˆ t~Û¶Ž”pd£©4”0+ª†…:³x®á,1žƒåäë3,µ¯¿ªÍjƒ·»ùXGu6ÙÕ·â¦tõ?ÕÿXvên–¾.‰ß οoµ$WwZ‡Õu¿ˆÇ:ô¾ê۔Іô:.íë“'Öd@(%1¬Š eçÑD{~šµ@#»\)bØöÃâѧ1õEìX3事nq`-¦eÚbÕî|éu-Z?ÚN³âƒãcv]+TÈK¡*™IÝ„ÖM oŸ¤Ù¨r:¨)Î]2¡sòíR~8¿Î+é6Çãá}ªÔn|'¸ž{@ëÕýØvXEÓÑ—>FÒB?vœuˆ½%ðŠÏð~Ÿˆ—–*t¼FÌäE$#ôžc Ÿàs¥Ø+œ½âq$b­œN4±DŽ£: =TYÉÔ|ûˆïQ8øLàíyMê@Ï0#u{áúSŠîoýÐWü5†ûÕYôæÖvŸåˤъԪ?xy~‘eëiQŠÇ#–bó¼IuÎíFÏ`:ø…Ǫ-éếõ#·³š†cÍÌÁäz«èÏŸjËþuZîÌí*ÁKÑõÀ—•ÉÄ p1yá7:}fU0ZÍ÷ž‘¤ïRÝä™!J_W0›iuâw°{åã½)ލzªxõ²¹OÊ{'Úët%° xÇxÔ|¿‚-‡Gçù/ ÎjPîÿúúE]Û s°Eý&ÙbØ¿, ± 7—]õ{Ÿ/ÓìÔK¤gï(AÔÄ.™‡Úç±ß„öiüwh¤×î¾%ß_*é»ú¢š^¨úžO¤V{ïž«­RæÞ{F6ôà?zU¹U¼”Öµ½o™é=öd¾-nÊ(xÎàqR5TŒ=f5â_ITò0+‡I1tëJjE]kô½ ¢µ¶Š³o}jòLU¢2rxªnQ„XyœEQ==Í PŒ€²ÞÆÇæÏÈXº,‹Ù­ýˆƒÍòŒá2ZÜLpxþ±í`Ï•þjO/‰bªœ¿*åoà@m‚»µJ!MEàák$qû¸ÚO€J@å¼Íÿ–ü^!|øÁHÙîwàP5ˆœ¶˜p¾<ñ€Ýÿ˜ð“º{ûˆF—Ò°2É•WMd,ò¼ÊæïïÄÅUõŠNá¯ÿ;bYoÌY/œµ?YÄÛ1ꦘÔ‰qÙ#‹Gý²¥áWzÊk#Zý/âÛlñ±õÊì-·h’/!í~}J—[*¹18æ–fDO!EOn{°²ùYühÒŸ´é.Hƒ¿y‘—ý¬ÝN’ÑL¨ßtq]|­V’àˆsñ†}¬Ce:sŒo$óƒŒV_«¾*ô çßCŒ¼´Ò1²£¾|ÒX%îåþ MÒhò™èÃM`ç¦ü^ ñ Î!Qm*ZCÓ£w.ð“½Å¦Y4µK«*ŒÏø£)>Ò‚<„\¹·qj §|ɽ8nïÏaŒŒó{ê[S¹BwAÖ+”õ¤Â[f²}µ òjgNˆÉw<›ƒÛFðƒ°ÖE÷8^ny.=Hk–ëL€Šfy:=ðætræw@¬NÈús–o²»;`ؘ‡0èäIïŽïF›Ó­FËe8–$r<,Ù íJbòÝÇ•ü=åö K’2ªû3ž™ŒÂᤘáwØèÅ èÝÎBÒ$ ºÀ>ÌêMô·íA `1ø=Óe§GžYW#­×³iåþõ=y®?yÁ¯6ÄŠ£T±ï ®~Í|ÀÝKÕ‡ü³Œ1éˆñfRµb±„Y„Í·ÏèlïöRî"'†0…Â,èŠ!“üÍ^É´Z“)ïÇõÖÃÏøÞ å¿Õ×P•æ#%eäFj³‹ÈUñO "óŒ‰êVóFS{w~µTl)ÞV);/}Ö•–¤×˜–; áÛñõÒp+:]ì[´Ó%ÍÛVÎê– 1ðÚxrû5†lÅîIá†3°Rªºõ2ÎHm~×µÄ÷ªÿ̉8þ¸‰Ÿ‘N-1i%Å‚aÐJñTûªv…ÈÿÈLƒ7ù°«Ö…‚>Kã`€0QÌtbƒ„ ÜU£¬± ‘)Má3Ï ÔPù”Dnbó… ݦhɧ/Ûâx8zî¾íõ— ñ_t÷·•Udäî‹$z7HüñìË8}› ©Õ¨±GbÐn½j€;Ùˆ¸ä L&a"êa& }'­?žƒõOµÏ|i¿V”¶hØÂʤWrœ¸?¬½»¾Ïhb–Í΢4ç—·âR)‘†‘”î΋Ï"Ø1g{ÿû)E·õÿ SÏNtsLý¡!þS'pèìÄd‘åRpC®´7V­(` 7†¨2=Y¢vÁ%ÊCIÜZ¹Ñpv‡ùeí¨xa Û˸>>,AL©8U`Dø,ÁÝÌ q¹#mk]Ô!ÌÂsæ÷, Ü‚ÞE<–†¢Å|œ/û׆ÖÌYȨ€§‚œw»2çt£ïfƱkæßjÈZë•(ø\ˆîéÓcI~Í$™‡¡–,ZÂ…ϸߛ¸Nà<‚tØ,¿adqpcT)ze}ãé|eˆí‰a¨±6l Yò,×›½è¾^oyÃT1À8.æëY°ˆ\³ oÚ4ÕßïõͱÜ•zñÚ2|P^Íë¨8!‹B%d!«pÖ¡|6}g¶ÏûËDŸ&Ž•¿žÅ?öø¹’y/š¶ÝRW?ÎøÍhÃY¾ùzä^»8vH‚©ékûˆi€J3œ“p›–Yt^†Oà¸Èë¼~3q‰àöå)­„Ôeú 'œB%@olïe$0:\,BO@'ûqV/ÚÂÙÌ£ÕŠòI²Â´ûpZŒþ ¤`Q»‘Ú×mjÀ§Z¼X(Š<~–T´_“AgJó ®ò:ìíyšhS=ˆ\™m±ós–C‡½Œ‘‡XÍ*qÒˆÝ7Tdó¡ˆè$„Ü6Wê–ø[á=q>ˆ§qγ7cŒ€’`ä6øÓ^^u[”®ß£vy³ÓpQ0m8{tù¸È†À½^•ožºzWê"tã×Û'ZqT.¢@Ôú=iÒ@]RÞyP`¶§œÄ`5] Ö¶S¬×ƒOgQ­Vÿ• l¤íÒã¼ µ2674l*ýð_ó/õt°H·ð':M¥¨½s—YÖ+ŒXóh£(‰^EUò+ûWiål¯|-N{uÒÌ.ÐìÝL2JóJ…äã\p8Èå8óUÏí®Äþ¥Ê 6ò«Ë·Zµ­Ýœ·•Á´l2äìg_ìxéi¶"ŽùnÜbÕ,xŠ¿·8° ²CEñU '@Áð‹T0'„¯¡ÔX°”‚Î~o;—Óó‹ûµ ‘Uº’›˜ÙþíÿƤí0;i*8ŸG•ºbyz ¨?=`ú£Áè_£Þº <é1òÖɱ»dI¹«xóÏ{È%AÈP„§V*LyÀ«SîÓx²oL°Ÿm¾7Ò®’½DõüŸ_&®ú.8¯PÑ(káÛrJ1pFqˆÖhý؆ ΔâHJ=îû㇠‚07Ì!€Yù¦HÎnË{@evØâ|׊Ňk<Ó\ðFÝ%¥X¯ó—ËIp\“2‹]§´”ö"m"Qî®4§š*ŒÊŠ“ ü2OåŒ$¶ëÿpPqõõOæø]Z§ü¸§‡ßÛàjŠrÿE¨ÚäôÔ}t~þÄOŒÍ?gŸ¿ºPè5ç…SÌ®Wok‚¿ÛÆä‹ŽÞl^p«iÑW1““F3XŒÅÅßÃsKÑÖŽBÆŠ‰‹S¢2tJÚ¯U}\øö0½ú!¤/¢\/äVE7±òs}ÂúQZó›´2-ùŠÄ—oä °¶ÁxôE ¬üÛ8k:à©LÑ¡¸5),ÈÞ1éƒ8YðW:=5  !OeÏÖº ¬Í["b·åîfzÙ£2šáx8øˆh†.Ø4B­Ò|ü0¶³g§ÄÆ·±²¹p’.;f=ꈧye Ͱ阂gšz¬èùõDî—dš½dw*Rv] E6Ñ\PFlüòn(=¯ë\ª§ WU^†`ŒB ¨ô®¬(Ê¡ƒåòqÅctR7¬1a‡!³¡å_   ¦ø5?¬ÐÅ}€"&kîæ™cT/K2Þ¶¶\s‰˜QMc »Cuö|…"ƒÁìîpÚú1G©Gii÷Ʀ¦L ZËk~÷F(8LnœP¢Ù£€¯è2þ/W Ûq|¢6ÈÕ—Hyœä'»ÙG”úí؆@-Lý'À ¶‡8ÉÆaq|,ñq§¶¢ÿs]G¨™ªÂõï±jþœRÂÕÎ1ƒue“Ž Ùú ïx£Ô-nf‹I2÷izDêDá2ˆˆ4ÿ%؞͸ei=DùèÂ1䂯È:â®U!1o0q[wúu:.ÏÃ1Ï“/RÂE¨ZþÃÓ›ZýŽe6æn²°pPUQÐT°É¸lÂiwŒjXã)SjuÙj³¢?¬ JpÛß’ ðÂbløª/(€%ÂoyÊßU]µEcËÑVôÉ uûUf$)ÜL›˜:cÜF!@`?ØZž?TÙÔf>uße\/¼×édžæì‚…#nd?á?­FG¨°@F€éˆ K|‘([ƒ_pÖ 638†HZa|£ËÄñž’ž‘ï=IÕí¥ÎAg«U{˜%w¬ZDýï“• ÷—V>š¸ÂuQÈ;‘ƒ{,í°ž.)뤡_•`±aK Ï#ÊMí °Þñ@_±ø)Šä=È®G–"ÓȃÄòÖ|æðD7Ê|gü¡NÈ"Ì›këxª:‹»Á5X–VÀõÐ8m ¯ºfPL˜ÔSP\§N§W\f=W”íh}Äñ‘ÅOók¸¦vŒ2pèÉíUw}Pû S‘rá¬c–uWÀ„ÉÂÜ@ aÝlB©˜Z'š3d£ÿÜÉ PÓ+f œì)†Ðóz¥‰.©H+Ÿ ­ø{»¹ ãRJÎm^ß'Ï= v@ ânÔW5ÛgÑl=½ê³íÚp¼Þ¨ßãÕñ¦ü3ˆPz墱QL§-ZmÚ¯ÂÿÐBPàbø¤ÕÓxÛ×›G]¢ý†®{—¬ÅÛ@¿”†ŠéK‹(ÅC`u'ñòڄÂZ»Ámæ„´´ï¹G’ÇÏÎ:R;껺Bƒžô}uš"·×ÃZÅ×3Ê}æÜè?±ÃÑFžGì.¦Ië½Þ›MÂ{†E)Q”ì!‘¯<†äÃâÛCßSµ’Ÿ‘^ð}Ü×¶£°Ô‘¤épûÿZk–£^#9*×­çË÷&:#rJU }ÞHe,¨ÄwY¿ûÝ ͦÔv€}‹ûDäÓ.Ü&#BrîëÎ÷†;«³§:g@ïYà”êiþdýˆ|ÒmŒMµÃÆb?oØò‘ƒ û ,HÌ»¾1SBí"­aŠWÀ»üøHõ?/q”¥¶Q¤äÃ5Œ0P®Á=Ònf`\xëË7­PFIzÚ×-x¿#ñ~+„•gþßDO,Ä’žØ\<=͘¥ß`Rü#˜WØú¢Ü XûÿY“ð®ÚÀŸ„"ô–ejÜOðÐ ,,DŒ¥¨ ü¾õÇ:fû±˜êR%9«ú¹ÁdÈ‘°+.Ybù(:(ÿ5"ÛÖõC~=çÆÔ#¸`Pà ¢¦ÜÜW&w5}5ý¥>ÍWäè*ó²0IúkO6¥‰‚ókóã¨DôæiÕïíù‹„š¹¢OÛC0g90C"= He÷ø1}|ëŒxÒH›é(=M}80›¸iû\Qé˜ÖüÊç/’µ„qœRPE· ÏN@cvâ±5³Wm2-%¯Óg¼y(yP)?xÜ0kFq ÞsüøÉ×Ū8£"%Ÿ@fº]ÊŠqSÉ é8D4bÈhZ çî¶A¹¸ZrÉ÷TBôaâåæ¡‰h å5æ*g{Òzï3ЕG'®SNþUõ-/—Åï*Í\@®˜§ýmëWü‘;\&YrHpï÷08ݘÚÈ?ÅÛB2ôÕñ©¼É¡l>3€¹ÁQç9p†àÆtapžú/Z_ »Ä†ç¢TG‡Òüœnïœà¨£²6!Ã)çMËpÂ{êç4VcWtí«uÍD$ל©‘vÕÂÉË ƒê²j‡±ô£;uÿZrR€9›Û—íròW¿!«wüyÌêVÛƒÚ¬²´—æ0¶;»J €Âº ¨oо@_Õ³ÐbJZZÏrSy: ‹4E"P*ôP >5”„œQ%z¨‚²^  P¬âì9B;Øq[C­ bL¥‚õ3áËÅFoúT]f?H‡Á¿'ÓåA[³’¶?×Q¹GŸU[eQÏeŠ?Éêƒäú£Ö–°Æh5YõM»®lUñ?]‹µYz­, Gý<¡*€D7€®"üî߯á¡ík̆E¦Ns³þpÀ´ñ$Ùw‡)HhŠê;Œ žËÁ,U!Ex‡Þv«§Y§"|x"ÂýÑ™¢Úö¼oåÚØ>ú Á ¾4Yg!ž.¼9¬3;ÿTÝ=WY€lß­œIÛšæ}×§¦AJܦ}AˆÇ¸5‰ö`òKYÏkÚÊÉâcêþMQ§Ûc’ÆQV>¾„ñ%pƒ;RDRŽÊzÛé?УÙ¾a“u×nn,BæüÊwQ¾m-´EûؾË/¦ük‡ØsÜ€-$UM¢;m¶Ý–o%±Ü;±¶G‡U‰O.òèù0ä  ³Š‘4,9B|ÞÈåã?F·cáæ¨ûß•[~©ƒì±0ñyáwnLp„ {UÛÒx|H”ÚD¢<ðÍt†®\‘¶Ìk;aOÄÁÖUJB%Åô®aw<ÇmQX¥ý!=oð@ŽúÁž'Ù|uÒú¶1äªB›B^ÝO±v:2MÎ\ªîæWµœ‰{ïÚ"Ý+Ož×oOuzÕ£IkXfR?UcíF!Ýà/ƒónIކ‚8Ïõ¨AÿÄoÒ 7ŸŠ¥ŽÒv­Îô×èJk3í¶Û›èëÇRf$FÍÆg==>3O ¢ô»™ífhnt.n£â‘SÔf«ÔAy­/v† Ú*Ó)pFü$=Ú"Ô-‹Ð›]C•Í.µ‘e§Mèþ·c³,ùÛ+ã}Äsû™U` öè½CHeñÁXtt®àW{6 ò´öٴ쌨Šâ`c´¿é?B ‰Ù\|éAiŠÎúæüõBâøŽP—.œœb­”ŸÒéúûÕ\©Ðc»R OÒwâíÈj¡>h'”¬ÁÊ 8£×—PZâ3Stb Ô‘)PƒÐ(ÉHæÞàŠ9f¿xÐ$Ì*Cq®JÃVb·h »sí†\ëbN B会&Ï*_¢wSꑞhd´èžM†ÊJØ{QÄ;ÃL'‚"*/° ¡ìü$ùx]ƒð€Ç’ mà& O”é(â{ä=ÜKɵ¹ÀzZ{©²äRY„Ùßo¶™—/ÂÉt%è,>*PBÿ°/{º(|©º»¢=d‘´¥…쬣¤SÈ}’Ø#—æ/Gè´f1wèë3豓VßÔö_ ðq[‚D-Ãצ)`ÍÚL=\êï{£¶©í,§.‰JRDúŽ‚3Ѹ=srIOÕ·ß£Ô sÌ$ò¸ß£ls,o?µê}áš$Ùþ»¨Ál7öh­¦ Õ°ÑöþGäní=UÙc·ô{{­SïæºdÁ(„P¬ë‰ú˜ñ™Žòc†  ±08î‡sNÚxó¥­¶)"ôóö”ÓW{¾ûêËÐOµEùÎÄZa¾cí‘ANÈ+– æŽs¹X^@rŠþšP&£Ô§ÚE^£ú7ÀвÒajjd0bs( úÿ‹çÅX^Ùè<‹Brföþ›æ·ZÍÈð]ªG…Ê-7S{ã]¢†÷»™H¨œ­udÖPY‘ëeKšª=ãàØË ½ HÈ[]ù þÿbbXQ)Ëy]RøÖ‡ð$\}µ@Ce8 0|> rÙŽÕÓ ª|zq»æAZð4†ûŒZEøö·jœï©ÁÑŒì,¿&s=h0=«ˆ3´¡£±Åÿ)åvk£_ÞJ’œCᮾ8ÆÚmBûð¦ÇÝñ.ÙŒ¸<]$@ë¸ÙNmª!MÉI-ægld²#%â}’x)ôb“Ã!¥Ù)ߌÉÖ&›Üׄ½ æ 1h ™X,pÖFÎY)'s¿ìÜËÅÅ*«§P¹M=äÕ;y…u#ö$I.ëò¯&ĵnC·)“WrÅ<`A˧ô€Ô@—)3ûÝ—çñª)?IæàØ1X¸rZp0-vA)2/ÇÙ{²ŸGÉ=ôÎ&ýõ¸tdø¬i…£›[By®_²(²M^]PýQÝ×ô‡ÍŃíí%·Ò 'Ù oL—¿ H©l°›ö ƒ´Þ¯‡.¾pÙØwºDC·kSZ.øü«ÎEèÎn·Ê¼¢É9ÌŸ§y¶®*╚õG¡C®6Ö):DЬD½¨‚\@WhúçÒß|ãFˈú]#ö0nvU˜bØ‚eU[[šG¦ƒ¨³Ï°©ï±%þRy}¢ÁÚJšªtþ‘à2b–°Ÿ‚²üöZßÓÔ½Z‚¢¹¯µZ uŒãKŽ B›I¾èm1’Q6e€ú/¶sÁ|=¼ ÛˆN¡MBà—àö´#ˆƒµÖX–qÿ¥ˆs6ó^ò¡Êß@w¿Ú£ÊÍÁ âêÅg3ß$ Ã@ÝÐŸŠ·ì%1ãÿƒóÔ*2ÖÞ“ts P’‚Â'Øû) §Ì1±ü'¢Ž95!H¹Mú¸Ë½÷sמu 3*cuE‚îv "F7e£*SR¼/L,c‡³ª ©þ„j¦t0É}a8÷sç²A½¹UòZ¤ôÅ9“ýþÝKì ”tó9ÏŽ|´qñ[´&ùO‚˜¯Ê7ZE¾â}4­|¿ßâÑlÉ_óÉé̆ƒ¾7àâ`®>ý¨Ž{°Mª‚NòÀ9nh ?[BÑšõ!xBB}’“+Ýß¹VÏü’låO%×}ÂO@Å?OÎö©çGZNP®+ñ3Ó ÀH 9' Fx^›®ÚÙ.m}·ïž–Ò¯qäíÝ_n¨EÐOv§l„ËÄ ·Nš[2Aøp …â @±›R_ø4ØÈ”ºv›ž#ÊÜ%7ƒ?úK†¢¿ÖÆ×b×Å{¤®ÝiÎB*ÖL… ÅFvøjC‰Ó4¸;ʆú¦[+þnñ!0¾A”UX½ë|£R ·wMT2ý€¾´\>˜7 KLGù‡¹´Öé3oXY÷¤TñØó«vªû?¿Xàa‡òž}9uÄÛu:† Þò¬pW¤À=3‘u»¢Pž C@CA„žÛÎcCøÖáÖÜüóïç=-Ïüó¾õ" ³3¸ô©ÐË“šýw»e>ZÛ‹÷뵇_ù¹"*SMw¥­—¶Íð9972µƒi9‚-ŸÜÁ^¯/„,\ýÎΦQHx±! eki&D.rl̯wôغy¬¡ßî;0%ò]ÑgoßÁ5ª]™º¸w¿ç o3uÎoö"Œ`å9¾³ècBv-êá%ªçyæh ƒï]ûJÍEy9®«¤G‹îp" æóNÚxÝ¥;ÈË‘‚!p0A©¿Ÿ¤Àä1UùÝÓ¤…“zØx®`ÕÖBk +^áÇÖ4P€½ôïš¿|Èt—ëoôûm|ÍŸ«™•jð± tøL¨éߟu(õÀ¬ªzžš¯§4LºýeÊdmƒ·+†J“ˈ:ä²îw‰›åZ ã¡M“ìï—x‡ÿ¨:é /H+°þzÄèÞ_ÀØpžÿá¦LŸœáaÔ7ôiºý¹œ‰äMÆŠÕß‘¹ïïMþˆu¼ Ç®È;[±*P„Á~g2.`²ùCšî„òÃÄæÛÞiœScå‚^jð(Ó<¦ìàFäz%2•{&œ·”™ .˜{Ä ˆ´Rp©ÝÊADÙ^wÍ¿Ü÷° qBñ5æÍ]KúU’è«Êq>‹Æ*wä!ãœ8ʱW½8_Ÿ)—'Ê”xaØñ`ò .§÷ŶÅBP¸1 Í,J\&"½ú„<AÔ)yŽé=ƒÙEGÎ`ƒsAYÉ_öf8Ë´Éû¢§$„­ÎOh'{²ÝÙ˜?Ò:i}p—ÍãîIø.é#|mB‹V(úƯbv”ušüæ¤)ÊòI«2‰füaÊŽ NX/p¡U¶Ñ+\[—ëQ-²GÜÌñQÒÅ+-uø‡7Dbx.nä²|ݬVëŸ^sƒÃ‡nç=œ@Ý‹¦ÒXª•Õ8[´êjf+=è<¿ñœyÔæ¶èâœü«­Þoî@ÌüÉl¶¬lPo!´ÒD«aƒöÝÑX©?”ûÅ•îQ ùò£âÎr[Åœ}}uµ-•+†Ò¡ÿJÉP3sm•P‰v$0 èð@:|ë¬*Ñ®ƒÃLôZ¬8ÇÖ*îM1rßµF‡AeÀü=2IœÈÉÕ„Àîxù1@¾ &¯Ò a +Ÿm±ÜGü)¨ì¤Ò<ÞsƒbÅã+Q½°™ê•‡·’CD,sÕ»ß(ã¥(ð'ÑÄÓ#ñJŸY@rŒ þ¯Ä§õ¬d¾Ö¤^8µ{ôËrvúj~Í¢¿ÂõF4«•ƒ£l)Té…ÀÌ“ÃÛZ׎¸!·jýÊh?Ðq²¢Æº/¬{aù£ô`»HI3×qÖ° ñ"lGÈóG'[âê/ñéê Qĺdp™öèÔ±ÍÁ5UĈÜÈ£z·C÷oŸÏ•FGcqö3yXYsÊ¿#Kɱ”¼GÜX^zË Ÿ¢±_'–…‡Ý­&1ßs ¿¹•ñ?÷àên–ùW2U5i‰öº–^ìèWµU-`=›3Žð ÂÄÁíH‰–å±urdi1댻S딆¼žÃšXö(÷Ð.ž8ÙmæñNëSr;ªþ?¥Ì®÷߇qWõ¡ü¢# Óõ)”ï)ÕȰÅ>ìÔ9Ãz·ÉìWv!øÅ6<+ùÅÒYg¿ãQÓ ùN´ H%?J»eûÍoJäˆw"Ù&8:¥ö ¾cc+ü…äQá<Ã{„cƒ Ô'Ì,­˜ -ý‰ö\XóU`5ïïé"/(&wÛªknxÔad°³U(Åt4[üPÀ1zýHð˜ZyË­q{½›˜BÀ¾ÛXD)Ž½Ñ ß®N%ØØ"ãæ1ऽƒZP)L޾·4bô,þh¾m B,C£f íp˜hóS…ZªDœÀ@AÈNñ§‰4p­°P”f<:V3á`D±ý¨ãÙ³ýc4çEUS•RŸ‡îÄFÖÇ\YdP›¢±yü׺h–]¬¢`§™þ%‹¬Z…âöõWtäz8 sÞj80I”¤§4ËU“åvIv{æî[Ûp†?É͘ë\׈üyTE»±#d"ü±¹2fÅ0ÇÅÅPæKðÀYò T@V P¢k2;»;Â¡Ž è‹¶×Nêâ‘_*hµ‹OöEQbÝ,ý©H/‚A ; #O¸¤R JÈÿyü€}s!÷ohs¯ĈÀäk4%ß§;Œâ<«CÖ +˜¢¥ó:Äl=z+'µpÒÞQ‡ŸRqÄhK" ][.¿ZÔ„ïém=z;Ñü/ÆU³ŠY‚è°Þ@2+a½0CLæ £‚{0³¦çpý¾À¸ vî&ÎñoôžñH 4ÙL©ÒýN˜Õ¡ˆuTÎas’.¡j ‚è ?¼5¤ Ñ–4—Ç{ Ì`ž÷Œ9Άp"A aÍ5;zº,ëññzX›¼_˜æbÎ Ž÷¶€kކçß—ÿlД—„èga•&Xdå»x@{ÂN ‹€ Ç•O.Œ¬Ë·šŒäŠÕ6#_7~#QÒÓÄu@Ýû`Õvh°›¦ÇFrƒY Ç-‚‚ŽHæ›éNDb~y*D?ý÷€‰V)z­Þqy…ŠòéÛÌ“ÛÀ\¹W¥ÃÉE*œÞR¨Öí{ç^*Ù0•æ88Žlyà«•aÛÔÄì;׉zÁ«¼µ„ÔÄÌ.4´€@>¤(_-k«Óg§*÷ÛõMÆÙЇô·YpÞë)A£ñKÍO¬*ö‹¡L†·—$Vw‘Àæ*¦–®Gµý>½¡/0%@ʨâWI«\Dy'¤^]LÖÓW¢*ÎNhdÁW€€È ½_@vhÎ9ƒ,j´ƒàíÑ-ˆ:C ÚþÏ®À>uØnæ^ÙAÕ]»OÁànbˆ%zàx §ådá+%£¥=V6Š6”ú·Ïj»Þ¶3ÃpY·\³6Ð/Tt±Ì~¸¨ÔÈ»Õ^Óp)N-Ík³Ò(GÁÖ‡4ßì+WtARg4HoÎ lì%ÞGÁéU¤–䳺Ԩ§º%,(<Á‡™F'L¢G3ÛIjƒd‰ùë´w/Èñl²Ê÷¹àæºOm„ÆîöÄ·—‡³±è…Ràq³¡TÂößÑ:âZŽŒ® R^Ëpžp@×§ÉÙ96àÉgëÅ~gèíkþýÃõ{ªM… MCó·ÓIFC£Rúœ1 lªR"øS ýáÏ•= 5ãg‚S `t{·â·E¦>{¹ëלõ’)#ëWÀ’l3Nvÿó/¦˜n#¾¾$þr¹'D;ÌdxaÂÑ$6 ­¹¾áô3|ÒyàRõ~`™vÄ1æðM]3tëMñÛFm'àûT¤8O jÿ˜)…‹©éh½RA…®¿ÞËük@• ½Ô|¸á»üÀ ±R©·—…'òq¾i fáfი‹>MëGN‡ˆŽ´@뜱@x•oçã²$íûc­9é’¿Ë‘U~'Ïè`!:pâ:ìgÙáÀÙÄ{%vR;A° &µ¿î.—ÊMaÙ× ‡°zO_@ØË;[¤Gæï ¾ý ÄÚ2„dqƒ,\¦Û\Ú‘€'¹ĨGùHM9-ë—’|È jÿ³5§ð² «œXkDÕîéÝ!oÐh›ÐÙgzµÈ6 ™ó,á ¥@O'l<Â8F²Bu ž§œ wãÄåðü)+”IH%ŠÿJ¸©8¦šìñ|ªPJ‡vøþëÊÓg­¯[}Mß@x)»ÿ§ ½å²ãý×5fç<¦dyîGÜc6±˜,ÜÜî¹ØL¿Ä™þ¸¤8žÇWæ6íJ¨:Ò›Kàóxëט6ŸÄÊtÖÌ  Jš¼%(ßO?—ms~wïÆ†%µP¹ÛÝ–í¦_~޲}—›G¨Ç ôlQLeºšíuÅV3|Òg«„Â,¸ë> ùÃâ±(¨C´ä—ðÇm‹lnËœèÖ h ø?·'7W®¨t\Ý|üb½6É_§–ßàp9šx/³œ„D´wÞI}D@9û|R”YðC?VŽk‡¨YÆg1½«Çù ÿsÙA:[¤rî=Ô!è!&ƒPs'Zì(iTÒÎÿôÝÝÍ&ž¸Ûˆy± ­-C+9§™î¹pÙ¶§ðœYÓªQ뻈g€7qëp}«¿lDÑàl£€L¾ WZtÂ3‘·o¼TÇHô ;HIɧü"“Æê^éæ×Zx{èwÓsÔXäàð„Œ²ïüô‹]ºÍÎýR}LêZ;° ’=ɶHÙŒÎlqÑeïdS“§£êÇ÷tW!ªÃ ‘Ì1š‘õaÕÿmÓ¤Žo!,š -˜ñîÑ#Óó]Õ†1-„ìHEq;ÙŒ[=À/ºÑdj!ìW¤9¾y?¿­þE· ¤õUã:5§–©ëÁ”R4XNŸL¥ºá)Ò¢ÔÈ>Ã|å™Î4‚:þ8À%½8"KزY>/xõ|"(/~}•l޵Ä?¡FV[J-N †€‰ˆ£PõÈä8^ÔiíåÊÃ|Ëüs"tÔùh³þøÏ7 ²oëã Jé’Y.°¥?ç·B–%0Ô|g@sûÈ7é|* nâåc'¤ÔÃý±@hÏuV¯#KRðœhãK‹*2giݲÅK–“HØsØ%Pr A”¢‚}«7¨±-Ë€tK×zEúˆoH‚Q4fWmh¾«÷þÄ_ !=MFKk–ŒL:³eßó)± û㉥js4Ãd?I¬jϾš®x"©#M£“mC,½1X™Sè¦Üº‹¿KtwXgŒÝ~o¨Ä¿, ~û|´òš­ŽGoë4U~o^ýv—*º1Þèãl ½¬•Õ®TþÅW€Ûô‹]H°ßò€»†%]j•…eÿ¦­|ró0OjD ôEF{¾ß@#ñÌÞ:C¹oöû”缜 z…¤Ü)ÜfŠ–)ÛwXw'S¿}’f:ž•®ëPvÿB¨žØŸ>{/þ·´CMEèz¯¬m„04¤ _å@òo©a.B_ÍO“ò JïÖUaÑãÉeVÌî2öäíŽ%¥(P'»I%¹²´«zÂúëø93¨êH­"îÑ%à>ÀH¼Z/åvé<Ø3¹¢¯°XÿUÌã;µÓí¼7¤èav9ê'Cv±› ïÏE:Ï xè&þ.÷Ö¸¾Zÿ8gµ U…tŽÿ.§‚7Õ_5|ûf;Îê*VûX°qe©¶úþ ¿ŠÙóÒgrq‘±AöYc­ ä¼Ô…]ó³rGôñÚÇ,‡–úbö1ËúrŽIµaïC- ®’u…ã“­oƒøoÞkÌü‚'vbÂô9Š|áÔS¼¢Ò¢3f(r «@~WÝpNH˜ŸÎ3ü(¿½–Ǿ Áw¯Ä,¡X€ ¨†¸}F]¨µ íb+QO ["0¥x››µw²²Ò¹…lÁµóŽnÈÞ¥cõ6D)؇?ØÙ½ÉqfYÑ)¾ãº³,äÀ¦° =M}ÀâÆÜpqÔ8ÐåÐ1Ú\m=æ rlŒFh z?ÃDnoØ#Z’Ðð$Âç.X@5J È1äƒKÞ5!íÛlëdБýHpެ ¬fYâÑß©­H_â:xñiGÝ„ØJO¨w`Ø×Xº\ F³{9DLûnâŽpwxVV,âK"`€À±á–û6 7 €½owɨÈtC™Ü$¢¬xg+J~ ÜssiJ¼³H×íâj€Uÿ-ÑÁ/ÆÆ iLü€Fà÷¯îv¿i™dºãð´Å·ú˜O̳ (;ÎhÀµh[SB·éÁb±y—›„é¿—CîçûfŒþ„ƒ‘,)DNÑi˜i Šïƒs )2ÖæîÿZ%f¤¦šäÉdÀ2œå‚^¾½'£ç®Xì%vß3йMy¥º‘ö~­û?ešè{âGp“Ã÷ŠêÃ88/½„ Äíø'–HìZ>±×ë޿Qü_+,ðg84ŠŠßk°?9zþ’ÃBRÃ*! ôþ¼ÆK­®©&[®ê;NÛè€!|œbYš§°Ô]@3€Žsº,8ˆÃä ES ÞD¸œ 2º´ðCÃHN™LDëÆ£}¬fI “k’m®ÙUGÆ4YAèqç° ²šTÿŸ!¢TåŸîÓïöSÒUáz~kG(zj…Ö¦¡TILZL†¥Óy4ÕS{C¹7(tñ’~ÍÚ÷û|LÜrÎÕ"jÏ NòÞÑœ‡ý‡þ¢MÖ;7J]IºcÕ^ãšzs]­A·Û]&kßß(”¹:Wèr÷®|¿Ü´*Fã¶´œ Q»ÍäÝÛ^ƒ¿Yøæç¶mCF÷±ÃÞ¨«¥?…ŸÂKÌ}™$#¾žK°ÈƒƆ·(LÜY.(æÔg>QÝ×n4¹ÿ65v/œ®'?uߤÜûý°Ñ&`’æz;÷Šžó$[>wW(K© µØØ[qºrYcþ™y÷J]ÃõÙyûÍq¢ S´ = ‟ƒz%GtúT¦UE'â ¨$¼E¾KÅhÜM&Èy'r*Ž¤è¿£ƒ}­JÙ†` ðm`“‡O?³ßãƒÕmŒ§ó½ï—äk%?5œAüHÖØ18”ùYÀ:¤0¢evY¢¤'í û½»4“Ç›ÇìÒÿ?ZßÞé¶ý¼Ezkä/F³ÁáZdu‡ž”/ßAàkS9o\”%8{¼<æÔ6;HýÊú©•ˆ“§4Â`„;êÌz¦DŠãò)u¬,”þ‰ôе6Nž¨ŒìÁúPcÃ0¿ïÖô"` 'EbgÃ+PÓ¼‹Ó÷è™åÝÊà_68}o\ðúó<‚º`/C¼Ýz«…ª€£çØ Ø_dø%ZUÑŸEØ­ïß¾cÀg/¶˜2®”y ¯l‘_j³Æî£ÀëŠnáª%÷•Ç"H z KsDgú #@T0±ý 8™õýÊk0~°ƒäMJ»áWö‘oýÕ¬»¦ŒEÃAˆ…}DJ‚f¹0kÙý½!@k‹‰<2àÐU#ΤiýFÄ~ƒh¨F%Å› ESZ@ü ÚGdUV0>]ÐßCž'ÁVÃ]ø e1Ú.ÂÂc& J1¼]çtEWŽÅ[@®_Ô¬ â5åPŠg*v´‘‰ðûý¸Ô,¹(úSóL‚ÑûÊ€—ŒùK È8Àäî›j#Íj v{ w]g¸mvXCG&3 aÀýº¶BD·Ç:Yw;Ñ\:˜72ä«/iXÅTÕŠÀ $vhÑfù€K]XxáA×ê i_·q0+NR)hag¦‹Pè¡ß¦L×Úl @»í,x‰œªýÞ÷ H¦( Çëé€Í­¼eï@y9®¤ 3ò‡cú½ÝBVðu2ÚµRU¦£|øº82"’¼‹IdTrú™5‡ã&úIüÊ­z´JFˆ€‚dý~ÉF×Èt™X…lXHê)À@z¤s`­b `l&Éþ7zíLüˆo ÃoQ]jzœ¤ ¾K7‘SªYì0Ü,±Öå^zæâÀ»<7Ø0‹NEÕ¿É«p0¶M ü™}óH§3ñ äµkÙˆZLùð d{g¢ŠˆÝÿ‡ß¾ì$8‰b䦻ä:ÒåÄ®}Û‘ÖÀ¾¡nÊ£Ÿ»”N¾âѦ#«+Œ'²Ë½/õËGQõ¹àØÎÂ6|Rü¹Ô?|D1h¡OÕmUˆÃ“³ý&çõ¼BEàãºì‹æKæ‰ê­gþwT%YS¶Y¢ÌŒxv½ž:J(šD&ô9~ŸŽdÓ3Ñý–׎yó]ëfI­3uæ¹²vc#ÜïƒúOÜ­‹hΆŠ*íhnÉ´£Ý-‚a¬qša8Q%ÞõMêë%˘Þ&<ކ-©vy޹âÕ«²ÙÂRG{Å7DER½ J ?¦:dã û8õ¦q.ñÍ*Taey8gU´††®ø‹—-ú£÷=«<àp FŒÁ,0eöÅ®ÔÛ‡>,Äm×óáNiTy€ANÙð“.½;Ù•?´‹ xU ÒÜmß²ö³¥»»B@/ÁêÐÞÉjІAÏþà"Ìé)œ"R½0ºS@ 0ˆ:ð‘êX+önìÀËcZ$I}_úÓS}_B¸M—KHøìÓKÿ½ÒçÌgËGÉÅÒC¥Œo“|WÇ¢9_øU*àaaþ9ÆòmxGqÞ#e®µ–$kgO05Ü„ý àx-—ÝkÉoGî¶A©Kï±ø“µvê¥ݽÑzK£ç“ …³ã‹³£‡1¹×HZ®s`¡ ®BòDÁÖŽüÓX¨×Îl¤o&åîÊÖúØ}ž„L'GÑ–cý:ɯ€I4‹¶¢4Šâè[òp8§í0‚]RÐà@#w[—Äë¼ÃÍÛOØš‡~UÍȪ¾óhQŒ׃ÔzLÏÞ§ËkÆ+¹ «h&£‡¼0Uê6ÆŸØ+¢wyE[¾ciæçmà;ÉÖ3¿rR°¥"nøô@Ñj“ñdÎ$J„#ýóôÕÍu }„ïØ€Iïqó Xé †ƒ¤¦0 X¶íwWdz2”—(ßì@¯!—R·kÝlÊ0­HvIeý1oá&zŽ«íw¬XìÁFؘGã<˵³n8ü`ˆ£ÁG¼ 4™QëÔUpí>îP€ù„Qz›ŠSPUÈ?ƒ¤z®QçÜWCºè¡¥¥“éð`×›–;»êÈ–¶é,èTxgcûeé`ÚO ”Y¦ÞcŠz"t‘ûn±¨kÓǃk°¸Cæ×ÙKšXÙþ4æU`í½{—5–¦¾Æ ’ÚûÚ0`ÃÇ” Þ“Dðóp$2³ìsfD\¦Eç/œˆÕ@H£Îò3ùq˜§èŒYpÓÕìîF+˜.ä`âÑ×›ì Ž/Ëò°u!nAˆVÝx®Ð€/}ÒÄšˆÊëËTGŠ—aC;Žùác0;Rî¿ÞšgŒ ÍMò|}ÛA]3›ç@ºà3 ^¹o_·pâ©b5œ‘ÙµP3 êr±½îÉ"€X¤±ä-©~‡é#‹IèP ÷û¥œ2™ý LŸS×3!,xòþ\GYÙƒKðîøðì©d´WaÎÆŸžÜyì8kl/6ä TE>Ç÷##ôdü{›gˆœéídW4×CÑ•÷Ûèšþ±}} z}•*ݬ6hÈr­kârùZÎѽ=›ª†×èŽËÈ€–±Ü,×ð¨і¾ŽØ¿,¿Cc c,–`M?”T\Tg ÍÐj}40Dói¯\™f¨ú¹lÔS2<-•¶ÐŽC“Â*9r¬¥pïËC_)E8¥=zÆ|×ã }oC¬ûg»;-ò Y÷:NòÚÊYiU=Ý$ËÐæùQŒ€6‚•€0Ufdÿ&ÿ+¯÷÷d˜ îÛ½eÞÔáåP‚±bµQûÕjÔvIS $D\¨ ”§òöHøU0Z™xl ÛÄFÝ1¨ýgGÚ_… ¼QuÁ!ÔÄ_GÐX÷+ài¶ÀÒ•8¯TP²È®¦tëÍpÛAIk9¬Á-8Zì3f[9u¤}\0f¥Œ`¦ÃÜN³5Y–cúPß.÷[~–¬îdgΖÉþ!PQ 4IT9 õ$‰yú(¾ôÒCe7qªó·Ó Šúáþ[ÞX GÊÄi¯—®–*èü½fMå–t¾¿þåKµãáË9¥ìc“Hv/Þ'.Å÷%üÿZ.wúÀ\9éÝ7Û:, šgÌ0ÄY±æüñߌÐ èz5ñåŽÄwŽi$5@í<î­êP±Pº¸bȨ•pRs„›”Ž»¦øVæ@\ ·†¢ ¹M'4„Hœ›·š#ÃêüìQ÷`³ªË¬0s~»>©èHá”YóÁÎ3`Y&&|ÜTš¡S1ZK4§ y°ì‘0ìЀu†™’pÞ˜5¦ Q ošÃŸúû²j{Ÿ Ä:Iwû›Ã9« Ô9°¬giÀ¿§°óð`o`ä&©ë|m(ÈF}¶ªoù!ÄðØ‹ÚrštûdâZ RvºØçDƒ~œ…®A.«r‰.@°§·;÷ö&b–]Z}´fd½šˆ9¤«±ÂŒ}{{`š®élã„ñÙ’Ò8„š8å“?‹É«·Æ´_0BñMìóÂ,ÝóÐÞBpe÷š¨Óß$ovа:ʺcÜf†¤xU ^Ú°¬™uDÚk>B½xïóöó;ê,5ñòwá­Nù;‘Ñ­Ëœ~o%"ìuöC!$ó»—­sû÷Ó²Ãv¯ãs üŸª»5Ù•‚Íc‡Û5OÙj­ `¯ Ý“´™€DŒWŽÁŠCC£}DQ7X:67VÚüâ3z¬Ýlq—²¦ ö{5/.èÕyOmû›Ú—²âpË#¸‚Ïò+mO†C9¶‚ÏXƒéã%0RX\ÊEî)» ùTýÅlŠÝëòeÏ,ï—÷bÄ ÉŠÁïÄh”¬#ƒïRü°¬zY†Z‰d8Ž£Z‡ƒH¶3=.ì™cï¶ŠÆ\¢yYš>ÖÅ&Æ1eä ú¯Œ.¨Vk[:.¿XøVŽží»ce½[ß:ŒžUÙôqNn1&۱䧒a&Š…ã3\N1oa'Õö34«x|ŠúËW£¸êBmåT8*r xàÈ…ÿÐ^cMÆÖ…ŸÐäÖ èµ³žÖÁã0"¨^n­“‹Í¯E3Íõ|ûu{Î8À»ãü–îÏ{Ÿz3žÕ±Xìd3ò‹f7DQ[¿àˆ€OÑí¢ÉôòP2%¶ BWj0B7lsÏ z©vN¹ê˜tùÏ%ÕxTÇ>®*Ô?†pg àE ÏkzMºëø’4iF¹™`»ª«Zí Îi•®=V³Ç–ÎMÿÒG+#‚—{’/w´)=V8â'@M5a|(sæoám¸÷`véQÒÍÉÌ?Lb’ÄacÄÔÐÄGØ”—‚è˜èˆåöÊOžžt‘–¬vø¬yÙ5Ûy[ͦOixDTê<5aù¢“³t0¹¸;jin©ò»sžÚ£òœŒúÊw:ò_ûzYÚr&{Ì÷Lï'ž0õ3ôˆÆ0ð€Baز$;%͉Bê‹eô‰ëÁ&‚+âÃÿ6ÒÞfQ 7ÙÇ“é[,ZÈq`¨ƒ¤ P±´š~+Ø;c'¶¨(¼á¥G³[Ò"¨&:¡ÀóÝ4?¶7Á_&l–$t8LøOÃÕ¾2µ˜t=ùÚŒn]f>|Š&?ŒÊ+4Y»œâÁÒÍçoèþêÍqªÖºcœe™: ò/h/÷MÆ9W =sºçÅŽ·2 G¡óì´5•âr:1:ä¤ÊF ÑlSþñ-}nkwHâôî¦f žÓbÚ¬É&¨]ôiû¹abÆŠåMo^žõ¶ hÔƒö|òéè.µ&ŸÁÐpüJ<ÚóßùRÚîÍò±/`0 ÉÜì‰ý\ò¦Áµæ«}m—d ûç¾ûÌ\khXΞŸÙ\ôÁ\Úf äú¦÷Çñ¦±ECΪîCšVGÁ³U¼‰b p\ @>S’©@U‚æy25AÛg¢ØxêðtÇý>Ä&KìÀôf œâ#دÌÿ=-"¼©¿QPüÞ¢9ºR=13i¶|°Ùü_ my²{”ϤRÔÆ¥¶9ËuÛ ¦|(ìÑ8Ǿa :ÉãÁLܵ <¢D¾Q;'g‹Eõn¦6gt€£¥Û?L$×%<³¹¬@úIÞ mô^\Jßô"ì™ø~âùj.–Ò¡þ9.“ÕÒovùÞâô$6踻dÓX>9Âø};Ejìuª|Bx¦$ê>/r»®8D—‘¬NSšíaÉ´&Óp°€´o³“4H±÷ £ ;óÎQµ(uÿÍ8šà@óá{3 Àzº®º'B’/]Ϙ2P"ÞPÉ €K.8€wXÂì”U¨æ|!¹eK©¢ð°V•ð3Ò ”;`o@æDò°™(£¦PÅê,›_–4à_u?Ùš´› 1¦!b¹¯*–7ì(EÂ7™“àì°ô9žºãÑÄDfµÆW‚Õõqå×­çŽu[¯B'¾âA‰ÉØÅ¶LUŽÈO6èSº^ø|ÇA Ÿe”nÒÇÛm…Aòyx±ø’ÁéÍyý1]øp€(ss\Ä@zÁ=ÑèÖØ GŽì 6³Xà­-PDg1}5GŸB*—^ß.,uôÃ¥vø¨v7ªúuSßÞåÐÛþÉÆ«àcóè`' < ¡6…ÑqœŒÎBÓÞy¶“¾’*î=öÄߺ¨tÞ=Iÿñ0Q°öø´Ô d9W‚Á,¬¾!§­RÀ®PZœ§ÍŠü–iM#ÃÀ¡lBoO§{vÏ1\ ·ê_nGkBAj>Ï×ú» M|t¿›…ŠØ”÷¢µ]+ù‘n:]¦EŸÒùN0¥¸MŒÆ”K/YljŽþ¶ãëÍòª[E¬[¾„\«ÂøV§Ô{ή2ÔiÅ¥Mí| „Õ'ú^Ĩo”Rˆ‰¿Ú“ ¯éÄŒ¦åß|¡Õð˜ "R™/³óÞ|Íýb­®s¦„xR×{fT$ˆÇBɘq8Hñ­ ªœ3 ¾mxž·Qà±Oš~>•--çqž×cð¾+g·ãV†@àtòi½VPUë¢ù‡²$>I˜ÔïÇæ|v»s°¹§²Cê;s¡‚å²´H3Ѱݠ½›`×»­êŽ£y׿–ò`x)#Ýo ±õ^uá0.„$ìl+¨ ‡i˜“‡}G \ èà$©»Žb.«2Ч¿,â !Á¿Ü±ïÿŠutœÍ– `¶½¶DÄgpâÜ +;pÀä”×ó"<›…Ø8…S±MýB]¤Z “>ñø³°S‘ƒïL+ $ý<“P°3æ °ße– 4òi1ì-…¥xr…å='°ÁdC‡°6Þ »aKʈжP!¶Ê¹Ë#𺵢ςÏ뽬D¢`*,ÎÀåï)gJ*e?‹!‚¥ÿzÐçÁŒ®: ]ÄÜ8ØEWûCŠÌ¼ÁÕ_Ý‚e¢Z^[âÊ ŒÌÁ!Ánõôû+W;Î9¿BÇœ¯8g¥á]¶uÚ¬þÅh™·Å*˵âgwñh¯lü†pÁ”è2€gýC¿ @.ÁÓŠÇÈjAÛWµ÷õîÕñ½ †š„·c,Å"+”‰Tn›Ñ;g¼q*†ë KNàã¿§|ÑímUM"Èc³ñf=ïöÎBÚ¦Ñ!Ìî‡ÿ=ù¸û¡ç0Fl€^F©'CŠ•å —x2‰^l˜G!È`jP{55ç“FCf[À«áåÚ³4Rq*níkTpøÙ§ W¦úœ¦¼æŒ}4´ÍHÁ+’æ’L÷öï!²¾´³ aIhëUP2ã”§¶½ÿºsEÝnRKbdg2¶Ê;‚Éi¦ç=Å¥“}‹ºé•ÕÕ)ì Tˆ3tj u-2ƒf岟'£þ µ5¼Nƒ¡ø9ù¼zîkú¼¶™7Dæq…¢a,µ¼éúPé$.3Õèà iv¼-ºü¾¦Û^KÅ)&-‡]Ò£G~ç_}}eÇ0ð>DJ]Ç_›á²Ûs*_î“oúÇͨ÷ôJ¸×Ÿ'ÏÌeçžÁJ*×þ¹¥©†óµPï¶ [¹3Â{Ùy®¡@MEû‹ñ|–õEm¢›œÿdˆT9[ªè¾lлV6ÙªtÜ…™!ù¶vzú±kã0ÓjÀFG¤SŒ¿xÕéšÕsuôñ?º¤,*lôUivÐØ¦Cñ›â °í,'JÚ 0ÛKq‡@›†O·g£´ 8 È!”™›&PÙ×Ýû|¾úÜ>}Þ°­å?„{¶»°U,å~ÅÀ§sykc÷ŒY <>‰D¤í“jw¹äÇò¬Àû\‘¸ù­&ª#Åö>±”e:Ëà’ [›{““á^ÜíýíïJDã7&ñæ>«gMgKúY¿¬Ãã“^ï½`°ÏžÈ5㱸˜nƒjâØãí SÈV Ó”çê¶ð^„oRŸØ¢± !Ù!5ÍÒ'ˆ·™É²ŠLpR3‹Á€ÈT&PpñFž ðf»ü‹Ûé¢ °ñ‚àþå,7"¶ƒX¬“RUx‡Z/€mº"t¿aÜJcî? Ì‚» s4¸0?Ô!,œ„Aa¿(ÓX¤(“Á1)ÉHä.Á0/H!gw¡âÄZ2¼.ˆÏ¯Vl€Atgò@º-Æü+L´ dQ;L;¡á€˜ŠÁ€p'Á¿¤GWÜgM†Ú<-)&ÀævC=ÜÚ8@ìzË¡æÅJEÚ/™6ôPÈÈÁØAÅYŠ_ \0@uë¤0õOòÆ3q*[™QLḚ€ æj‹*ˆ¶*ù=ÈØN¡æÿˆÛ:wš)A|z¹Gu'‹Þú#rÝ Ùø¯æ@Y)÷n¢´öW AºÂ¨XYo[¡ÄºüÞæôé ¿[ ÌVC7‚N(¬1Ç¿}¬A2|ƒ¡ï>X=~ljÕ¾zfqPOàÉ®i• \蓹5(þñÞDJ±%;u;Ó«\Z±mÜÝÔŠý¸4kpœoRÂ[}ŸØ¥t—jÝŽ¬ÊìY°RÔä QòÛ8éöû÷ÆEbí”>°ûÒ'õž-Š¿£±!ªÀ¡ÉlpV’é„Å1Ž<½½¨(;aYtF'ÇcÇá¶âbÄÓ.Ù9{μÞI“ò“$/S²£Ãœ“R/ÚDÜ©\R/ÇÙ:ï%Öv# ;¿}Óòjž¹í]7~ç8Ÿ3÷l‘_?‚3gâ¶ižl3Ç4L-C?Oc}²Ò'°ÖFt©À"„Ëw•‹[LXïÍs¬vßÐôyLjIˆ+6—T_6Á™HÒùžPÚ¢ÍV·ŽQ|©ÅlØjÉm¶ÙYÓÀÏàÞJ—¶8¹oH Rãw±0 ¼B¥‹.¸zPUŒ½ƒGTpquÇ£{H+ ¬Éaº®‹\‘2ÐÆ§a³•S«þ^%½YîÐ÷ —MìáfÕ¢*IAaI½³þÂ`#Ùª&1¼Áq°õ¢^À ‰Õ ÃRÎç§+ÛqiJAÁä*àH2Âýü-s9F ž- ÿ’,Ü+›B'`’Y·»">U¨xXÑ@€†BH?8 sEjOöry¶ìŸâ´¦ÍÙÈŸc|‡¼¥Ù6CÈþ‘uägöRœßÞ%–-â %÷¢  ½í$02lV~cn©ãbçXµ|æY˜¶Ð ù/º¿Q’C"(tˆA‚4â‚Ýë‚–Ç_¡[‚“vJ =÷"n‡k*ù‘³’MHw¸#7­‚#båX  ˆ,Ôœk«.†w0ñ¯l%R8ÒPBÍù¦À¯ô§¢ƒp%Þª Š%±ø}¡Ý Òâ°k‘ÆìÛrâ5ÚK«Wób…ã‘%ÇÞþE*-„ÊîùM:Sк'LZmu³1“œ^pÒý=ðïódi#=ÎTzÛx÷¾8#ûͨ«è–x9ý·ÜRUû€(2SÌãÆY+ˆ[z´  q_.ïi“mNRG|‡I'¾[ÓÈ®µg$äþØš›|»–¾þý7y-Ò^–jÌ06¶ ƒ–Hüá6«j›Rº`ô.’Ø´úŸør+Ǹ*úØ[×'Iw²vŠ:ί§bêÏ.(ðå´@8¬Œy©œ\¥Ë+„îG–¼ÓhíßêÑLJxH& Ì!Y€Qâ;‰¿üUꊄŠÇ`ØŸ‡ÍÝ%ïÜKk^UYdø;DX1òAZ˜_XÂþÏ ˆì@ï;G9ŒQ)·ý!Ô08Â\uJƒ)¯:ò…0 Ô‰pùÊÑ8ð)d‰Œ8¿Øy´SÕ¬DȬ2o´ —qí¯“è¥‡«}wØ:FŽ+6¦hP+.U•¹àË:&´õï+}ßKÚÍLNy ¿h“€Y~ÅQèã&)I¦ay—M–“QN»À #°;[‹ÿu¸Èà(²³½÷ö’¼gœžÝ§D Áäü¢s %/îüb2^¸’ÓYÏ{š©_çG§˜&ÔKÞ²á cØÕfµŸ“õûq„ÚÔMŽFõMÒÿ ÇkgõW<û ˃ e}֔첵r73v¤·¬¬”Ç;€c§/èñÌ:¢ÇA ÌlÉD'¦\ZÒñŸÑpy¨ ³NÉ|7ŒŽ;ðqŽP¥M¢@è t 3/R»â”Uv[12ÞÈÔØ†¦þßðn¶®£¼ˆÀ½ˆ¸4ÝñˆhŒgdnöã6D ‹ÀõüݶÄ]ŠÍ·ÄkËT{¸ Ó¿`6Ç;Z§L¨› eaáõ2:-þË3Ñç4¡d7õyú²ã0p¾Lwl7µ«%1ÁúH[ç,"Î ¦cp n:–‚\ÎõoÔP´)´ ûÒÛ‚ 9Ûyn¥~B/¾!íxýf n!Ÿ„¦5Q@'…¯œ«!ζ+aD.1‡òbƒÑT÷B›oØP¿SÖ›ªÝ^Cè7ÂáÍ—¨on"U§Â›ù:Üv&è`O +Îõ³¢ˆ4‡R+Ïh%³ØÜhx=´/ tv4éü<ë°ÀéUúZ'¶MÇá‚ì$X0õû/Ýk±–g¢õóÊ÷—ÞBÚe‹è6;tÉ““ùx´<POÆvfìœRˆ§šþ³J!²0.} ›„ÜÙð¿úKîÃ1 XJ³a¸#<·ðU³x &ÞÓŸ½‰‚ èPŠû†g1ö†LBø‘Šÿ^xÊ`ÄžÎR“ù]q"åo‹›Zc¤ ˆfï '£]áQ'rÕzör½“4ãxЯ!÷™\Y4oy-!ˆP†¾…%²ëoíÓj”›­Z|ÄRqþ Z=€£¬úOíQ|“·YÕ1A(áÝA·Ÿ9rXŸ¢&ô"äKØW@Þê«+Xð#¾ð—QsüžqÀëõà€VÇ*¸2ͺÔ(±]äÍhŠÓµôkµØ) òÉ—ÄP ] í)ŸâHËØÔÆø—**Þ9myüÒv#¢¹¨¡K>8'ŒU÷ÊÑʉ!IƒÒ@is¢ô.p=D›2¼i.ýVýËíŒä‰A©ªÇ@æ[¦›'Ó â@ˆ4ŸUõJj£ÝÔ¶!Øú¹XnÇyN¤?-mîüîä°èP@U빟FX•)ïÁnlY ÐycCZžKïuBfGAm5íPàýÉõº®ëäÒé߉ò4þʾGÓ$4Á4Ó a-¨œ:7ŸšÂon:í²™a¹Zè•,é(¿§Žy¯¾’˜2ÑÅ¥‚“G½{À|X¸y}7¯i÷–{0ˆ9ηbÁ>©Rƒ,váoŒ¸Ö^úÉÒ+D8[š²eÓ!`à>]¹ÿ5~ocø¤°2|¤§j9i=ÛK UDî“ñ¤õÚ?즣óçMJŸÂ"_ȉ“{Û5l·2@·wb‹‹5- ÆRâ{s½Á¶Uºo4 …‹µ?„¸5æ‰Qì~¾ß•åÍêÆW^[v1çÔ*ñB=hëX6# Ç{÷:ˆö©ÅØ›.c‘j|n¹°"Û!‰lö¹a×7¦ýbpPïóЦD¨¸åýy×a7}HXXB5ɯ= ðÀ Á-ÛAåñÐ ïfbïú»'д#¨þ\úýx}uªQÈ3ófÂm†žî&]ä%<%î ÞÆ<›éÞþ b…«Éz€ˆL}tÀl©„híTh“’/¿ÿ*1x¶l–@‰ú½oÙäö ï1b1ø=¯‹ŠºÊ¸"^G6…u4"]J‡–k­Çl©Þ0à*(ö}fp†“\t÷å’’ áhÅÿ8 F2¨¢ÿ‰~Ö¸äeœ½ûïÙg]˜¡)â®ÕLa« ~‡›ˆIupEˆôZBib àó]!{záÆ)Âb?ìÒ¬Fæ°`Âeõ7@ž#ý¥T!Ù£÷z¦IxB*­2åêƒWÐhzD °?Á…L¿åDßí±géF¨ÔôƦxç»C{8¡`§Â‡4 éW@q[ÁZ,q{ˆt_hyÕD8°Rë:æ€pÛûà˜‹¬¥}p¿ žxà*Ì™èrÛkqf mQ~ Uœ€gÉhØê—-„ç´þ É^"¡2¼£àµa‹Ìëo”L ÚÀʃÙ$¦ÁãñgX©£vô·äövCü^nådvŸIz½5'D_‚íÈ~I~Kð4[Ÿ¥ô¤xAîÊ8vòŠŠáÇ•‡PVvZA©‰BŠ'»†3´Pް¬•÷k·5¡:XfˆÎ÷§t1²yƤ]§ÝGJíû- éäå âƒ7^ú>oj©’ Ýu$ þ­Œ€QèP#]gLleͧÜh€øU‚y¯5h¦½Oq @©! £8šû:ö4J ]>¿†‚ü”&Ç]Ž ÔôtBdðOcàjJð>~#á”Ø5ùzËñ¡¶(E¶ /d# Ë·¬ËkÉ}êى9ö{É"ìêøö%MSª.w-ÀŸØÞì÷ B¨{cå1ÐöI±†\övBã|*`E{¶ÎŒÐÖýK®£•å\ Y¬ašµ‰^Ò¨Šå½H÷¨.ˆ‘A"fDzÜc5ÔÂ";µ¥<¤4„ÀnøÝ’©Ø–-¸]ÅA¾Œ á¿2¶ÊOÛöÞ‡œE.vžÎ=Ù+cIB'â{e~pa¿_л†–ô$™†$¸E â±¬Ê”0Í« zÀ*'?b'pô¬©’hWáZrjX§>ŸdB»V«Kÿ-lhË[×ô™k?Ú¬»RýíÁF5A ͹^Ƈ®"Ö[¼ÈÿŸc¸ÙVÑN7ô–=Þ®mø«ã]éÚ!@yKf‰ö…¬ÎW–ûÅX§3zÍØVÚFj™›¶¸“âNHí[[‡˜Y^T9ϼ…îa™Œ"_Ð !æí¨èVäf_ʳJÔài÷-—T¿¥²åš™1í’¨Uá||[eƒ<ªÂeh²N1ڣ͑lÆ8JŒÒsaÂ8×ÖýˆFAלDC¾u•PÖìÖ (‡ ´Íï‘]¸8ž,ü9} ™€0D÷¹bu™ÃZ‰ÏÚ¶ŽƒLìÔ@=|„s±bÆ„Š“ìpKξÍý òÿ¹2.ë3ŒÎÉ9,ýè;bžd pœjD‘åÙXÌ®ªHÀfsj4s7+œuó³å0wйtNñ²>гL$¶ÁÙï÷á¤x¼=±7ƒ Xà§]Þ‘™âe­>4‰, ¸4ånBA «çób¡ ám ޼%@[»ÌrhëêÝV¦C©\H‚ŽÎœõ{­¹ËÈš+Ô °Ž”¨…2Bt .…rôɳâ3}ÈÙlY@óý ¸gøv$Ün—€ Ƕµå³òd@èõõ=oaXF^yé³ûë(ã݃$âañO®Ÿ jÂéÕŒØ|Ò-HWGÁØnÝiz5ã%HŠƒÉ`¸ŠT1ì$@çÞvÇòWóaûórR¿] ¼*ÝíUvÍùm1¢Å×mYsíûY­œ3ºp‰KÌ>™ÁÚº$ÍLtV.¾åþÔÖy1Ü$!Tcn¨üÖÒ9Ï ¼*šÒÞ?¬µGÃ2-½²by1nn¹a¨d¢"#Äž'jµË6% hþÚÃ¥­”žYÃfڽѲð`Ÿ¹’Ì=îjïh圡}¯Å=ï(«K°ýc3ŒúÐv•žTã|cðÙuª„8©Õ¡îƒ÷l —U³Ä!íÀLŠ?Ø Œ\ÇF_‘†x¬­ÙÐÅx[IÁ¿OXÈý K‰ÕÝbiŽcíRqˆÒèb²oZ)ú/¢6EðYë‡ñ³0“¼l*üZÕ6¼X¹–O}p[Qâñ+õ48Àn@I_A;Ïà ýp˜Ð¯£“Êö:Väd€´³ºµ« }²¦0‹*¥­Ql·%ßퟮÃíÒÆy¹ vP w·Aì\|»îŒ2LûÂÈïVÞOg¶(_.ÿ ÓœW[®@ˆ—³H€€:Jf ¢ì×Gsâ2Ã,è;»bE›ÜVh²ÑäK·þ©y”h«k¾¼—-åØ¿r´?`K7Ãý¹ÝBÌU…E´¢¢*éÕ?vyE6\ëÌdn('Ûü3d ªaÉôàµ:AÍŸÒût"mÛx¼ò”Í FbÖââÏ Eâ_?°`€ž$žä-º{[qî˧ $vï.踢*Š ’ &4³Âõ¾äQЇYí ß»Â5¨_ÝiÇ_‘¬®v¾¶(Œ²÷XÌIñY߀`iáŽM UM"€ !ô¥eË@%^±n”´½à᥾Š{G‚ô‹(çbí 2BÅ0Õ¨ÞØdoRhµ¹)wYìа kæóMA±v¤—g‹‹"ÝÈ#B¿L¦B®õŠ·U°@F  Ac…6,ˆR3ñAÅó+û(×bMÁÂ7§ïaxÇ'Ü7¡Ân]Q´¦üuw'“ÞK½nþlæþ·X{udÞnÛÏíTüdž¨ _(€×e@™Dz‚ßûüïöùEžÔ=Nª•õsŠ0—¬ŽÉ*;¸ºígú™y—=6HÜf¨'ÁHv¼>©‚¼Ð5³Å;} ç¶ §‹½tS mN߬´¶C}ŽåèvïݰøKÑ`2 ùV¿ÊÍK»ž‚ÃŒ$&x ¶ò×#ËóÒyŒ n“Ïoü‘›aPßÜÔ[8~÷.#ˆƒ¦{jÖƒqÅh¢'ț땟O ÑÝ`Wª[„°H×v< Ó~=)®€ß.؆[w~_wÇÔ²Ó¶8ÞT*oƒÙ†&'+}5Á7§¬Ù\zørPÑFG¸Qƒ9xàŠÊø«E­s+ñ ùÙp!ȇ°ÞŒö½eÌÒòßôßЀ(kxÁLù"N•Ò†2Ž&Ö›ÌN÷ŠækHL‘gm¨ŠÞ¼š­ÀnA#f¼F€_”ÖJ3 ÆØô„—¿Jè@6›Œh7‹å‘ij_ÖbàÊ5¡íx.?Ï-o/>èG c…ç%nt@èÏÌËìE4P¤Î鹄÷pr«$“å¤ë[4èí¬‚<_:'Óqoà™”ªO¨À}ᶪ¹Ÿ YZ;á„Äx`;V Ǔǻº¡èºÑ6%2ƒ¯p/Ç KWLCˆÜ°RøŸC»bÅôÖÐzâ/‰äŸ5Dsñ¢Iµ5J±Ëâuú¬íÀq4$Rÿ°@8 kÿÕŸe±¦Ñd4_åVƒž>ey'¬ÅzµÉÜç/ûzÜÞß}xÍÈ9Ü:·ô·¤‹xrë÷æV2oXÕŠÉ/> Øÿ¦÷­¢!º?ó‹Ñ2¬Þ6q˜°ƒÏ}éóÚ#h3¼†¶¿€JŸ”§m„Êœ·^`ö7Ø7ô­D 5 èu½+~™êûßnƒ.9ñé‰2òy±½x]°¸þÁØèú'FkŽý¾õH\d¥¸Õµñ”µî|8É3ГÓÏšÅ|­¢ŸzòKÍIê`¡t|Źºõ@¼Â¨ó‰Ì¬/±Š :X9+~ðÇ—ŒàÏÒ™ýh¿1€lé<°»®º˜‘N"MÜF†³ŒX¢ùþ¦åávp&‚(ÒËSáÆó÷·ºÙrL‡õFSIxi–—ÉO›Ã>ë9×£¿-(®wv¹â錰~Ñ©«y¯ÓÜþ%¯‘Ü6ñ¸d]¼È(ŽC Ñ€&ò•¤‰$Î`ÐZ0ëät4Ø…Õ}¼g§\Ûíc@D«j²Y€ý aЛs‰Ÿ7‹˜-¶­®5å6Òï@XAL>çFê ±-vá{kX>3]1RÛtKØi4fTèàM{:B•)94õdÃCŠê?9 œ™ ‘–0ŠˆDD‘ï•TNàQÑhiñ„¤&ŠîEªAƒ—–Îòo‡W÷Í,bqzÎ"–kо> 0ó‘½¢=ÏÂÖ1L¼•Ï|´e`#Ù£´ h S ›_™K¾SpêT¸ØTamÜÊçL ·éU]“BZã¬2ÖÂáŒ}œa÷°Ù—Tòub‡Ú‡”«Ëo@¥BÊdH@­ÙÅøª½¶‘ç 6ñ ĥǸ>»gM)žýOÐæÎºPliòú5Ò™XýÀiT¹"Ÿüå(‰ƒD¼g’Üîè þ‚—$zÖ%ǵŒ]ðÌú¹ ¡ó43{¾,J¶AaAZw'y¸ghÇfWË/a\>{¾ý,“Ÿµ§9¢÷±å¬ØO›*?Ÿ´üdƒ t3QÂRìÖwÄà0÷Jµo¯$µ¶9G›œÆ¢n–ÆôPEPûÇ ú§ 8 ¬]¤à-FZíb‰Ä[¦¥_û;ñü½„Ê@qqS'Zì½±M¨gÞ®oÑü°.ƒi,:}ð†éáY½œh·-•ÉLN{¿ÑʬV@ù×ÒÙÒÑøt\pØæÕŒ¸¥v ’->7!M™Å×DuÖvAä}X|‚@.ù[™Š)6w~¦ÐaëE¿á¬q¨Í_fH!sóqœ».Mx »Yæ·RÚ(e†ðMµßð€0QöÓ×âL:rx'år`÷Jæd–²‡×áÿ-ddê þÁ lìG<ša×n°§cÌ4©:˦Ç%DÌILŸÑ5ˆc’È7(<¿|5¬!~¬aüéTÀ?ŽÆO×Ë óÍêçqæzJÀþ;ê¶$nK=×Å‚šè[ÄÉ#ž@`Ö¸êS€»(sÎàHî®^xQai¿VD,>¤@™óì‰Ì±ÆÆdZlâ;¿ÉkðÛ°rœÔð†ý.”;‡7£}T±^u“ƒÍ=ì>Ë.gà$¿L•gO;xbûjÀ~Ò êcÑÌE¼ð'+åð4î ‚ÇzN¹Ò«‹-ÊŸi3¯“Ø_;#í›:Õóñ=oÑŸ[:’øY9ž97èý¢v^oCÖ·*·Kß•öžÚÆÒf³ÈnT©ª£Œ;‹¶ˆ M0‘qï ÎfØà\ç“äB§ïèäcUç1¿OÚB+}}½ ½u6³§JÔ²¯'™Æ7»E; ' ñêìUåQ"Æ‹çL‰Ú34³a#.=ÂõåZz¨FV²å–S‘%µÓU6}9‰èb-xÛøÛÁµám:åÐi~¾iĤx‡<݇œ1àéÿbõÑt–Å€îôÝê»{ÎR3âb©¼É/¯K51!»^Êqj3ç!"…, «4ð•Ù&µ ˆƒ.˜ ¦ —+ñ;Â']ž „T‚wµ¬  @s¥è|f±YÏ®Aì«—5­|ð© Êo 2såÓ|™?5(Ê¡ŽDìk†Õ´¦àNýšx.3l´cUÁ ÝÐNU½À98,œÚâ£àyzÒeÄÍ`FÑ­W~R¯0ø ÿyÐn~á‚eHO?])lU¶ôu`7$%í²M—æSËÿHnL¶GÊF\v“f„ö•Ætð¦ç³Þ’øáFTØ+‰¨OqÜ…@w…2a\‰x©êüià‘ð<ûÍIÝ«gßù 1Ç,?á­ ;ò¼™j_ºUÌ9 ›ò;ø,[”›6ˆj$,/¦¯l«ŠÙ/l,n‚^öÂÈḢÉ,zuÛ¬ÿ¥aaYéÊvçáZ?eÚñ¸ÑV4ñ†q<‘8íPò¿%Âq2©æÊfÄ ³ˆoh=¿qëÖçšJÁ}u’»:í¯Ç€úÂï¸Í¤_“ÆÇòËס/ó©·)@ƒ#c¿#P#•)ÀÂryƒ¯d{”9.Ãza.á.FÎ?GáƼèZ„^„b0»1Ñ -®‚ ‹B¸D›}œU5(ÉUÎh}Å5þp”¶mU!ãõclÝ/‘e¸®'½‘_°+ܧÃýVêRÃáZ6½o–07Bbð†Ù¬û•$S <G¡€ºD]1¯æûGÒåW!)lµû%láØn2ÞËÏ 8³fþÊ—ƼÎ:6㲡(x\,5«Ž­Éñ|Àx»àñ°6¦CÈzUù‰µ§Žc›z颭c[{ ÅÈÍÜ`RŽÌR›/‘×.œýŽÍ?ËÛxóè([P î\.@ZŸÖ,Å2 ø]¶EXJåO´¬:‘yÇ ½ƒ έú,\éÛ F÷VsKÏ^0£3‚øBµÀ“êåD=1-Ô@\ð$e»ËÛ v^ˆù‰¡Tb*oïš µ.‘ oH01@‘E¿ŸŠe#Ö‹êø,×¢ÕnAüû¸ŒÄoËڜ֡ª¢ÑX Ž“`ÈÏfCñÄd€éÃ%!+£)£ºï¾Z‚#ûå¾RXs”„ò¿è…僠ó‰S_ç€ö¯,~>ŽpÞ‚ݪoÁî°”)žUשi»ùô†œV¬ç´Þ±pÿdû‚·Å– p¾8L¿Ü²ÔFE~̬…±&’ûÇ‰É K‚^&7Yüœz¦‰$ièÇ@9lÃÊA¯ð­]ß //Çî`·Æ…‚ôJ/õÛ‘ëlMTÜ‘îÊo>ýØ,u“®½¯7-±bq‘¶iWÏØLˆ;_°¢•bS„~xÂwB¾pŒª?8%4Àì / ̃žÈ£•–ÍÎ¥Q|ç¨ÝŒ¦¤‚* ”aã{Cƒ ÍhMT±þlå^Fõe}íE·,a¬­Ñ¹¬¾s-ÖÜÞ@µ ÅP‡ðÍ‹4•:jw´aœìŒb±@'KW`àç$"Ë…懼©µÝöÿu+D;k‹š’æÐ7z ßjq.d!Z† mŸâßLVf[÷eÄJíž´/ì7µZšÀÙAÛ)ÁKY=“I]‹ý=`Ù3ÕžYçN"Žh²‚°T‰–Šb~'}›ÊšÿVÇù ¥·FGx¶òÀ3#Bi%þ¡¢än-d‘5èp6†ñÑÝ<ãï«Ù¾#ÅÖ\ì.ÌÐsƒˆ}P‘ê ‚|KìøñFL-'ýÔ!Kã*,‚£–I" ®Ò‚¼<°¾¤»2Ë®VXèÛ@¤+¯¨u3‰dL¯Í3Ì>ó48kúûþ—iˆä—ñ°¥PªŸšMž.oþ*æ§ï¦+7º®´œg|l™y(jòHª8 =S‘©Ðó%Òn?¡ä«’â~)>„‹ÁÄ«tןH¿… ìY”ÏÈ!U?!ñ¦xîóš\Ãäƒr2¥mÚœÈlèã(ѪcNù·Û©˜ JŽgl¼±õNßyõÑ@C.îýÅ9 Ü¢®x£ƒ#nsyö†Úê¼ì1¦ckWøß–.1RXŽq.ñqëÁZ¤×O <„‘;ÊĶE}ÑÈ»¡î¾®TðÖ'•äpâYõ`Jö+p û¡Ÿ¹¬Õ=Fý Z¡“âÿóô•^¶dãÙÉ= Céw{)o4>àÕ;'ÈVÞtæΤŸŒ°éM@ío¼!ñ mB1œß\ ´Î‰û¬d­ˆ‘Rân§S-y Ü¢º{å'ö#J¶Æœ_æïæí˜µÍÛß¼0/”ö\Ž£g½7,Uça’ãsPQ% .jIÓ½›jÃM˜ÎÕ–“޽YcÆZ£—-m6ßÛó…ï‹sðœ°B1X¯âUÔõjè|gÔÉ}P2(VG–; jèCPsbwüð¾YŸøè©q®J«÷GÙ8¾üÛÆ9«NËäçÛžϱÒFø\CG‚ï¿—Ùb:U·´#9mŽŸ&) ò‚’þ݈ [Yp3yúa†×kÔqFè3ÊŠÇÆË¸ŒÖt)C9b`òEH§"$w™ù‘;v¡ c3E»RΈt  }M$y8jŵY*•`=8trÁÇúƒ->ÉPýÓÁèxÈQ©óE´P,?»D$·‡Œ™òlJRŽ ;êשá1÷.aÀ®êZ.κ^zì{yúaïõ– ¬%+ë? uQ\|&oWðI}tÙÅsg‘‚Í7È|jxZ³tÕb¹ÇL?îk«wGÌò dokŸà0–#^0v} Td«a¶ 0¼ÐBÃLDð—Jô]/ÀVNõÙßÿUŽº‰ü8mé6ªêèz^oÝ*Û{@32 cë•Fï÷3|Qà¿  @Xc1‘þóÌ‹UüÏüìýoJ†¹1|K**Еd°D¤rÖÇ•1Þ]‘×ó¿ºÝI0É©Í_ íj<$S‹6©LJiæî¨“[.;”Ä€]f|:Oòb|CIRøÏø;¿=¢ÍPAA‚ÊxÏÉ(^Òd qŸ±4ðWˆ ZÔûá 7Î0¸¾ñ>¤}šW³IùøüÕöUH.ö$"*àÀD?xŠ [z—øà£ýØçxØÛ]çôopö€pÈžˆÐ4…¬ "0þñÝ–Ó¯ø)pKÒ*ÊË€l)Ìÿ!j#à]LáàH®l©‚O?Wú)HPw:{M!Ùò4J 0xšSþúkj¦ý%“·žYÈï$½lò-ß”žÅœûµ;ò#š&]4 ®`c¥ÐÇm.†VöI;€[¶þuˆ¬Þ­P~êh§8ÜÒΗF×" “A¡9GýµMx! ,͓͎ÒÑå?7´ô3CXJºa[Ñ€…Û:˹_˜Ÿº­îè ØÙuQ.`€. €Kz:HIX=äÆäþþÏѶŒ†+Ý=ŠþÈb%â 9‰¨2Ô¤Zžˆ¿ï°p ´çÓ†÷²<5øG“Äy1dÒl@zÒD£'žç­[9î gN9ÈÒD¾¿ÈFšèFÑÚvkêZ8JÈVc ƒîÁíl”Á—ÈݼzA©)OsÀýø«‹þ$ÂÇ!ÙFhz£ @+b3ljuvj|ºò-²£(Mþ€Ö†=`àešƒÇüí~¶“‚Ýl8RÅ@ʺ`Ëró(nd!øñÿ=_×>•ñ êÛúƒ `{¦ }¹%EìXË%8Z¢kÿc„åaëÂMÒÀ„&b7q…S?V¤¨Sr›£I”_€öNø#øUkÇS$ÀÞ¨Ê%f@[|¾\á°¸] Ž‘’+ í ´âº¼ña¬LDñ[܉յRÑx:†Êƒ‘d`1°Zá¸äãæÌGœˆ{-—š B0ã‚Ôl÷‹Þл„¿ ™íK¬ÍgõËû.¤è˜|‡†ÿP©·ú-,(§û¦”áG·Hz’ÏN©Æå\û¹ˆßÚ¸^¤—jú·ËkŸžÇCÑ8ØïÔ¯³6öŽO£®ÆÙA|]~׋lüb:Œüy„Eþìiñ8cãÑeÐ8f[ {^˜o– /ÜÇÖÚˆ M†D'L³B€ì’ùøÀÂÀÈÊP–€öegÓ Vä)ÉWÌ0d;ç)®@m5ˆì¨Aƒ-ÙŠ?åI¶ aN΢°ÃÕÒ^ܲª)AQ,¶í_r­ebB‚†§SB8Gg0³²c.aäŒw tÌD8"ÈHîVˆ1èÆ,œÕJ¸›ä ŠpÙ4÷†¥‚÷Mð ¨ùÖÑßÃwøIé2ÐJ3"êÈJ†Ûáð8”¸›¢ίß}ˆ”·e#é´‚Ô+BV‡JŒ’¶Æ|öf¼~KÎÙÆJ'´ñiûª”æ‹ê5«øKù›Qr¢÷mr [ Ò´Ý6ã©‘ …@L&_­=Ô°Ç;>E—á²ngõ`rÑ¥{ÓÀZ@äj”{Äü¾¯õ9žÖô†‹l¿¯Aß—I¶_|{<?¼ëq<ï”0q˜rm9>޶yÆlWN1™{žEWqnÇAjcÐ^Eu!#XëvZûd¼ÜøTA"ç;€3rDÀ^8PæV B*†É3&„–^ØUQRÖêÆ”ƒÛ‘M`a÷"o©BOÁ+‘{×áÄCžâr9;&¦îcÚÐ5êe¡#‹;ŸìBÀ²Á“õð:3 à ¼ÁlÏ÷@µ×þ€Nó5ŠM™¯÷ê‚ÝFñeÀªì#.W^«äñy.O“JÌxÉB LPt½X­Ø!Ü@fmœ¸ûÂ@÷ÔrÈŸbc·ºV?~µÀÒ‡/F(bu?²›Ò$¬NL3ëþÜþ_浿#ÎáÉðÔ/b)é4¨ºb8ãT1‚Tfìú9¹“N0éìð×Ý6pX=Q3#5ötR)Ð&ÊÉÕˆ'½SÔªó-XSô*þ}Ý1{„wDë’â7ˆm×ü «óâ:Ú—XEØÍ¢Ÿ÷3Eª\QkƒÛWO”Pt»å »GÎxYµÓÙ*K‹CJë@ù¾`æP¹N9^Ñø¿Uά]Ö ‡(î#e iy¶«zÇŠNI\ã€Bîe‘[!bWï €ê\ÏbíHÔ};WqOö5Xðy6y‘,äY6ö%Ô‰#îáž@ãÇ{¶!½-T;MíúËÚæOüœ‘ÿ¼PFºòüõûŠžqò]r‰÷ÿk.Zl´dJôYi²2˜sÊå±Þž_ÐL$èUÑ¿´‡¡Qˆ%ß\6Ø­ÖpŠi×›Oá[ÛˆlC±l[¶ÿ™|ˆß. IÌòö£t™tFê‹ F¨I«BÈÝ~‡rCˆø¶+Åø<1O@PŒÇfœ¥Õôxþ²äˆlüehKˆsfƒýZvóŠeƒSf޶âÿãÛ’¥åâV&:l=7îÉ<)8€!™å›—›í#Ü¿ùtÇ—³­Oºªã­z<¼qã~{ âÀ³x¸s «yÈïUäCU6¦¾Q7%ïÂä®Âv`”òmUðLyºµŠí¬¦%‚o×vñü•*u O[ óñfßÜ’°€½J…ŠÒF$¯éþJºæì·P[ùdÊ‹NÃ…fÌz¯ºìÈ^Œ‡Ê‚ݾq©2‰6GW$°ÿgƒ·æn¶¿û/ª&åfû­wQþ‰ÚÆ!º¯[CžúÔ90›ì‡];9~X­üØÔŒâaå¾x”MêEšA:e HÐ M¬…í®v©ÔÌrØ“ÖkŒ%b.¶ÑøKÇà‰·¼áyìÐT2ŸÅY‚A®ûìH‰f/ûÀ[X(ÇØŸº#>0ÅbZÐŒœ¿;ôçµ>_3ý¡á [‹¶üûžñ‰Šèšÿ¢ªï)*ñ‚ßN/:bë å VïÎØäÆ+¡YA3ûÑ…„IÑ‹*ã@#âng®ä;dke ²¢(t\Ûÿ‚‚3!McæO‚m‡ŒÇª sËò%:õÚVjús Ä©µèh,?‹úíü71:ŒÜ%M":øíy£æj˜¦ðbÒ/°W'¾‡©­øîŸ.è–2óqÕÅK¹‘NΠˆ_Q„œqßHI9¡>ƒñ…bÈösŽAÿáñ¶ÿOiÖ¡Ý0­}<6kÝz'*DY€o ¶™>Ø€9Hî.*È€À ÚÅiÿÖ °^«ƒTL¯”œ×ûî ÛÊlU„ÂÙ5³ï‹¨VÛNšöÒtŽ¥t*Ùz½j.4HëgÕy.¯IËíJ¸ÍùœåÖþ72õ¿ÇÈ;Ôâ„©Ä[n”5#µËÖü^T¹˜­yÆÑ9¡×hÉ÷Yüc*Éý6SÊ×´Î3›½q`öÍ’Aã[ß[(›ƒ7¹Æ9·lÊáj,(G@Ÿk‚R]µbÏ4K;á踕Зön¾$3yöÖ2ü”3âmž yŽðÖf׿Gœ“Xn½¨…¢,°ÌT”Ãódß¹6„( À\0B¶òâŽM¤á[WÑs“ª@ d9Õgçmj˜†Xå÷m7dްŒH{½a `c•¦påU:?ÚŒ@5•å%Þ}¤ÜJÍ©^E¤ tL‰€òµîåî¹/5g“ø"öµžPlêh?6”‹//™ŒÈgÄÙ‰h‡iQ+F¼ÔxýõI{ïØ²GUk!â——–úˆDßGÇ „m—H7Þk†¨ö`¾áAû­bŠ¿l‚m°þvÑQ¡yàüÈ©ËRþëUñ·o<˜¯PpÆcÒ%­å|NsmÄåtJT},*‹g;R‚ !?5µýQ7NTé0ºçõå <—…´ È!IW2€5`ÿs(rþÆï~nIuåÎ=øŸëã›/«èÜÃjl¤»zÜc %¨ôx4yÉËoøhO§’.¢Š©ñù‡Ùö~ŠY³ät°› °­ËÕß?6Ìl´ßDóR2{½¼o TojrßÒ|²¤SCp¾³ü÷ïñ‚xO"ൕÁøYÜ#ý2Ü÷&ÞêëœY‰€2øí¿ÜrÑzðÓ³ÀÀPQb§!ÜÓJý½˜§Ã;Ñ.ÔÇ õ1kMÐu{íxxŸ Üò|¼óÌžçE:W¦™Ì¿úÚµàx>uç²|MÑ 4 PÆz§dõ±R™`³;«)™ŠivjÉöÁ"À dÏpn §$æØ-cÉ⬦ãR${¨±$?u—þÔeqá$󟪭3ÈK/;œ QIª¬"·è€Ðhøõÿ‘šú¤5¼BoÀñåûFà>}ÊQËŽ%ƒ¾þðþžÒ¤ËWϰÀú»ˆl®Ê#S8Œ,¢ƒm[ò0Åá ûuÈÁ¨Ò] œò+,Z@L¾½„‚r}?ÒÔ¿OcHãò.¤,ÜNØø ,²·|ZIáºÃÆ>H@¬$‚öŒÒ É‹ªÃähÝvçOL{èaÖØœÜtl+½áLûƒ#õêŒN"#øG_øëÐ1Â'ÉŽ!ǃ”¼F¬Cu!ý:¨g\øåš¶2¦l5°äE ~ǼbQîšâ_†—ÙzùcgpŸ˜©ËÍ¢æ=½îœÍá'÷?¹ Ê|-Þå#ô¸uêr&öÆC½¦ˆe·o÷×l ¨Iuɤ?í\9•÷ÞýM™½¹ ZT®Cuvý‡‚þ²ÍÇï”TzãS&á:f¯‘°ïÐØûVNç³ýR̬tCË1žÙ$¶ôpy¨”Üc€}vècÃuîyMÞáqpÚøÞøüeyœTù ˆyÜqÊ«^*MD ãÌ@ãDc®°Á ÿ4'Rãõ6ß#„©ÝöÄxÿQù…¶ªºœ;cËï°p£*ñ9êO±••UÖ(ê­Sá£OÀI†‹óõ”þ,g¯:Exƒ¼„nÛþàQÆó÷%_È€AÐ ª=ÏpÔLp:ÿWPËmØýPs¶c7QƒIâ£EÄM`÷ü‹{ÉQlÙçúAí6©Ûa"»R3ãžP}Ü~ ‚¼vÿl˜ Ê´`¸î‰sÃù>^Šûgƒ·g|ÏÄŠ¬Ú‰w‘"ïÖÔ¦ÛkŸHàÎmÓ~`úI"˜ÃL«$’>˜—ÇFOyÝ„Þ`¬"sú”È`9˜m@ –Ôeg¬Wãxú hÿŸHúöhœ¬:°|Y[×iTóèÓß¶-.ÔJ,C—otÔX¤ë¾™å…©Ð'oÀáßÒP_%gv.p<ܳhoú3Ü9OXš­Ñ@Zw§[¼q4y¦ž+/¼½¿³ƒê*ÊPyäLŒc[ï¿nfÜKIúeBº;KFI÷"=Œ Ÿ ÛTs}?!Ù :òõ°·Ã ù;1ýƒr˜éòµÓ×§c‚N~CÑÌÐJᲂÞÙZp ºvG¬Í¡€$7†øÐÒ³[M¤vüH±é9MãšiBl%ù èÎÃ7Èx/ÄÈüBoJc¬:Ä®t’ûŸccHèh(ÃlaSQ¹Õìé˜ìW ÆÞA`ÞÂýé-¡ZCßÇä„y&El¦\ñ¢ Ñ÷›]©S4}½…¹àÏÔ¨CÕàu<æ¦LãcÔ¸:f¹ôãG;G†á<’ûû«ð{™™S´à5o@ÆÛføoÊtpyÖˆ`Ô„_«‘®¿ä˜ø UI ”•æÜPIkXˆ‚ñæØ¾è_&Ö÷]©Õ>AÏ®õsýPþr_Ì\h ÀZƒŽ3ƒÔÄf¿C#Êt¹¡)Õ¥!Ýr˜¯ºO%¯,Øïè.xr×aÁ˘h#Û"3Úå|] l5*Ma9üh ­Å®¼øbá8Ð/ÌþôiÁdYÕã–Ú”2O,u€”nÖ/þ-̦mÈ4fD˜/0â½ãY„Y8`4[<ÛXöÔضœ¾ø›]¦ÞÄ¢–†`ì KÌÂÚTšñŠ/Ù’!1W)mÑe¡å·sÎ?Ì4|³¸‡ÒC….°vŠëþ$¬_çxEcðSd”ÓÓxÕ¶ëqŽÓ)Öå+ebµì0 1ÀGy‰üõm Ü›rÓ~‡sa§:Îi´Ò,“âXÈw½yôù¿ípAÏÜîLóç:%ÞhƒêétYb¼lüýÏTæ53Ék¬ÓgXš©°‡À_ëµlùggí>J2?º”œMH«€ìk;Å+†cZ%¶€WEKí^v™ô«“…Hüª½Z…:/1LæX„w¥[ÉÕX Û"F/Ã_]¬HYð+ÔN®býÜ@Ýy‡óõ›ñÿ&ƒA…5†£o–1Ð_Z á>u\`¹?Œ‹É D¢Ñ X.ŒA¡ ¢³¤íPóœ ² þzËcÃ<ËcA¤@´ï~˜šHébJF\QìµÈ úñÙñ>‹æŽ^k‘¦?ÁÕw)u—ñ%²æ»`“‘c Ýõ&jFþàCçÍÖÓZøitJaë|ƒ3˜¸¢¸M“À¥3^1–Æ('‘MªÀ0šXÓfÆê*K$Vá×`€]ؽ¦Zƒ)ÀX~3M+˜ø·#)ÖŸ_KÚúœµ¼9$‹ï%›ªÍ9Ù>KÜ5+áÒÌ\]))ô¾,Ÿ©ä6ˆñžÇò—D8O"nnžšÂv¡õX"Ò#íÓÑ*ia&¡duAçÇghB€)õ€ÔAbd<¶¦QÕÓ¯Îâ&-ú”õ=ɯÝÃi3§¬AÃ/á2³A×ïÊÖ¤ü.ŽôNWCÑÚdV£-Î%á;ЙwãÎüc–¥¶G¦ßH0 Vû<‡ZgèVÆ;âÜ ;šM߈ã¾¹X7—­ù7Y&‚”x9T}ït„ÙïhÕVÝXqVÏÕ’ILÏY’Ëòð‘Á>~º*‘˜XxæÐ)ž—¯êŠ*ÔÒ"_Áê‘ÚÇÜN;¶r'\¤n˧Y¢eéw¤yœhî\¹ÕÀc¶h"³Fü¤€nlZÒãÎtGà¡ Ìö“ªdÏqæÝjñšûËÿ jÛéÃâ{‰KärcÊ€.ïoò 6¯Ÿ›© )õqßåRÛý+_!Öû5Pª¤53‘-üW·ªÎ ÝÒÚpð¬¨îPzØäìlÿçÍ2=°¡;ËÂ2Gwµ8 ޹  BgtØåÖ…™Ïá .-71ê“>®dÕÈÄ+&,áš €–»ðÈA¼/ yi)3~ãÅšÜÓÛ{'¾0{”k鼓ÿ’æ’åi•Dò­¬1ಣékÌÌùVºŸ$æ7¹)Zଋ6´ðÁ‹àÎ*Kzšº—8¦íÁUŽŽYž»×ðßeŸÏcÁôLm‰º’B†Ýü…øµ-æ8lþ§µAjr¤û½à,ß‹ísõšÜe?Ƈ5©x¸p*í[±¼þøð«ýƒS´Ø3 NäsÕï±Á/ü.hˆrþ›¿q,‘6«îÚæ™$Ån2M¬¯h{tŸS¶*Sþ¤Ç ?6ÉÃ]òÆŸ§ã Vкƒ'YÚ?z-öÇĻÍD c_Ãsy­Lžçz›ýw_@ãÔ_'±¶¼6pÓ„}¼Ã0¨° Ø,#ãÐ:U•ØLZŠ‹p­8¿# ïêÅžÿ/OœG³8u`ˆ âßRê=Ë{@$Y„ áÁøwœÄQ¨ ãÍä.ÿ ½\û¨ü6È@Ä (´> ?Í·Jóe®U¿¨N‘!ˆEt…èÒ è°úA6^St.eb~¨ó׌žýéÍâÝ¥ ÒBÐNÏ:„Æ7ew8¶•®Än$³‰ñìPútôyxúŠ*9{ÙaèZ2Û‹Áã­Üâ1AfñQìcHÐÇ\ÿ¶´µdÕGGÿcWkLO¬Hr¨ i'#‘xŽ ~V C¿¬žézŒ‡A|á¸+à…Ån¿Y<8<0²æÛ{×fS‹µÁv1:Ôí: ^×hH}ON⻫ç 5sÚ¬±`á»MT3Мäó«h¥ÎÙŽÙãzÊ~_m#3|⦺؋‰ØåáxP«=ÛS&¥B$r¾Îk‚ß嬤°GËš“é>Í/ó,7Àw¿XÖüæûIÀ­‘ tßI ãŒ[Í2XPq/küËs¬„aÁ#gO.t UK‡`Œ0m÷\SØ^{-@Àÿ_‡ ÑkóÇør3Þ‰¶d.qüH˜ ‡Ý•.ËøÕYã€Òé_² ;Pþްe<õ‚ÀO¬ ãOÜŽM>ãæV"c @at&·x!KUNÇtM*Í'OZèfëÅCãµ›õÞÜX¯œ|"·sj ;êþà¨Æ4*-P¹ õ­p?€FCl4‹Ù™ªË!^H:ã %Ÿ~4›ú¯ yiÏþs –•¹Žrc´«‹‚&ΩI­V[$±6Zøðî›ôÀ"üëºÉš_ÈÞça•š>ƒˆÏCL"!» bW.ûœl þûIð-˜ìBT¸íhZó¬XV1ÄÈÁ<Û>û¿‚ÚëñQ¼1ÿ}5.R¢2+[ƒ€h@ZF8E’_¤ ùÙÄ׎®Œá¡§ T6n¸ŠNÍ¶Âø‚ÿD .òWcYI·¨€õ!DL‘b„€ÌzaEGuº—Ë:0 k&:”KE^x;w‚¤Áä©ÿ/Õç¿´ôšù þ„g->™'Ák”¿³¿е<Ѐ‚Dð9ó³‹:ã ¯ø2àJÈ–þimH ¤BIÄz$ûÒmˆÆÆÑ¦§Ã!XWôì¦Xlä?¼ê`{\p®Çæ˜aOwõ–}´‹1öÙU=ëI¼ÈÚ¢ÃfôTíãÁ• £u^Zt¼¾6U¤ô•½²‹¢¤†³‹÷mÖîÉ<;Î@%^Ÿë¡~©uü:¼‡ÝC”¡p¢ŠPêßáù›+6Ý÷ ”¨ ÆÜ§Á…xίª”ñèNg¦g4Hd¨õ%äɳŽÌTÖÌßXªìéd-z¬Ô;¿w/¥4x˜Cò¢(€JÎ(+h4Ô<ttR«(ÐŒ¯nÃÑ~\i ˲LÞÑ_<œbFátÛ7/”î}é˜õ¼ ˜ ’ç^ˆÿw["°Qö‡å—1pt@˜²ì°žµ] i0x»ÿ&ŒT3u²Œ×Êb¶çë™ÐTMC@.Eì99ýèøÐ˜…“¸% žÚ× üVGºÀ—ÑíC›^ E¥âjº,X[†£ÊÂÅõA#+´øÙ÷^˜«Ê«!ˆôæçrÐ8¹$€WŽË½)ËeÓÐÕÂá’ È(£^±S|Õ'¾œw²1ž0Jß=þ³Ü;u©‰ñóR*,Ú,¾á¤ëxò®Ò'·" á ,LÝ}ßg™Ñ÷á°uj¯Ö@s,_n¥àûu¿b­½­¨ðn«5Z{†YA2d´×%89$…û´’êÂP‘7£óèd)«H¼~Þ)’/ø·%ë|].&•¬ï ˆ)œ¬êù&ò:¹uÏ~q‰‹S_“±Òô!…ækc…å³”Ú€*àÅ~6‹±RÞž6+ä ñãè T#¯€é€@"ÂRÓ 2O˜ædño~<þ!tœ·¿ñÕ ÿBîóaÄ›ü¾Ì$/Ùˆ`; œLˆ!d¶_(Þ¸múT‡õ_`_ÂÍ?¼,9ùŸNÕDm Q¬Jõ3ûÁ@çÞïÿ»è-Ý"2YÀ€Þò³§ýå“™ÿ `n—Ÿ´³ŠëÿBÑ!¡xwà2=U[\…ŽU*aþÚ› ÎX¦§÷O²ßé¦J¥Ñ[¸ËGC™¹Íû, ‡L>Gaò㱓¦Û]¸ ʲ`¾Ö¶V¡9J5«)P®˜Ïá&BAЗõÊ ø¸êö¸wË´A‚ÆÓ¡àd·€Ò‹8TQÂñP°N9I` ·ì,r8b™HËñb¢uå|%©ž39,Æz#Úrí&ýf4ѾZûaÝW¤}£8JBV}ìxà{j°wkÊgäh ³€Scè)­Öÿ¢/¤{À‰•p°I ^X&÷a&ëj‘¢‹³ùd€Çª<Ì㾟úÎÂó]Ì;Zœfù¡F`ÜÌù!>#éɃ&¸q8ÈDØ'€Ë쳄ÒëßÿÂ.i…BA½Ê¨/»gâ/}sß… Í\EÍa&8h<À‰úd™l˜– ÎDíSسÀÒ‘™§û?‚)ªA¢©]A¤(uËXQã´’4o£úª®žU®lv'B¯ÄXø’k)IÝt£BtÂ#)q…C­ym޵,¯­ýUÞáxæ¬b4Iª µ`JÚ™>ca¬²é“í,×Rãfø†÷™Ü´‰³–:!l£"öáu±ü(« Ÿ›r©{G‚§œÉ~JÿÇ/ ùˆ²{ã< ta’½±éGAýòPZ#…-Ê úSÕµb©¸°Ã„à(@nsr·”½þØ‘Q«dœ«Å¯E¼$òÇ‹‡ Çê­ÎÜìôtéÖ.Rh±¨ÒÙ,Ú·vøþ$‚ÂÁ–¦§W(‰CÿItÆÔ˶Á¤¦—z1Kl ëD;AH¢3ŒTw›àšÒ‰N­éLXp©"ôëF:u¤7YÞOƒ½uE¹ óYª)Î1Z Çv0ŠDuœ½ . -Ï:pb:÷Â<c0¹«B}ø~*ÄÜ*ª æä"%0¼à&Ö×x6ºÐþäÍßÔöÎêǽœ‹sþ²+ó†Ϻ(¶c!{Ÿs`1è g1û#±[¨*àg4ö2«øí¨sFr샞@‘¹û¡œÖêXb1â9~ÆuT;­õ7¹+ËDƭј>ntÛ q€ML®e"þ D ·×ÀØS —¬•8 m¹ÑU‰òîRäÉ@Öç8¸¦ -ÐãìÌgt_‘¥– #%ªÿÈE$úûƒ" äÓ‚Qkÿ¤1ªøoï#@±”À?›+—‡Ðº<€ ¤°°˜œ‹nEz”Ý¿ª;ªö‰²nÒÚ/„èž»NIŠÍ„o8ªxìyFÔËC·²œ;8ÿ÷§5w£`M2=þ«;ŸÐ¨iÐ< *”st¨–ŸN»ˆîý6$ørÜÜÈŒìƒdÞ¹¨we£Âä_{÷ è¥Ç½¤&½ƒØx‘T~ AãËÊý!ô…Mû„±´UßdÝCÚ>ûôÆÄ¹Aàd.Y€É‘CË?èHŽ@I¦ŸCÔ,Ö¡NB,DfÐÍ&‹hþý(³|å„6gµÍ ù1¹r8L¥à¼Câñ4$ª}"iƒ,ä3¥å»ÅÔÀ ¸mPŽõ>†“‰èë("´Iµ`(Ðm#,Bl"]¼8Äöî"ñ"cªœ„~ßò( ŒÌç<ÜÐdo`ObNOW·¥4ØM‘“ !yÁxôh¿±*O©ìäè>“ì÷o7©¹æP)! ùW¹XHª¦ÃïPuuA§’¢#ÖpëŸB£o‚>a4Å‘©kßäìå<Û^±§ÓlOÀ;€ˆ «yžG'A"0Ô3˜¼¥9+| w³¥sK‡~ˆ>¶<Ñïx7>Û­WîÞïß¿3¦¥Bk-à<¬|ÔÅÖ\ÌÚ;…WW[ÜRýSQ“Û“²ã¨P"<_-®m†ÃGw©ÕE_™÷êëåÏ–^éÆ×—ÌÏWPvšçÊ>%]щ¸ð}Õ2œ¦Z®^þZ¨åOÎàÛtÓ-•Ó "ij6¯»;ýçÔ AhS²ÓŸ­– òæR„bîÓ|?’@Þù¾+ïðNVšP¹À{šK^ß^U´8kÀôùÒºšÈVÈòó%Þ{6Ïݶ‡“ô飱dHüÌš†6Só³‚0ÖZ 7÷dÁOO­&Ï@ðRŒÃâÔ³–Y)·‘³[3aÐn-:ËÆC¬…5÷¯Åy½îá:ÖŸ÷G÷™sGJÜâ¨+¿㌾©k‚™™<:b[?X6iõmïµ.qÅ]’ËÃ@ j¹N$ Š8‰V{õñY‰‘fß ©°€õÙdOzVgr!Ãß2×+·x~Ãa7`Š }½ÿV$áíç×¼À/Q@“æ±)C ?õ!¹xšºFÐÍ©e¨ ð]¹*Ã\íôKx+`~}ŠL»"× 3_Ä]Œ„Uºë+õ< h. Ãê>}¤b™÷Ç~š– wú_h¾ªÚk&f,Ïf¸j~æ0FëÁ¸jHÁ‚2“Uï¢3`ìYyn7«µ£UuÚŠSòTô­(Ênké]¡¶qžðó‚lråÎøS]@PÐá=$úþâ§®X&Qk»i)êÞvc&Xlä}\. “³¾!6bÀkÊÝÇù#¯÷רFðÏù°hƒg{˜:Ÿ”-³¨’×2‘Ú‘ö×½å~µYŠ×g,@<ŒÁ6¸…°«>6¬^ 7~³nköŸ‡q‚°;ažÃ¨eŒC¤‡ï£’©ZN¬+âcá³.%Ä[ŸÖh¹ø½íß4IÛ‚Fý ôîqŒ6žYú@ÚùÃÜÂ" ³]¼ÙC¬ÂxÔ2RÎT«èX²`ûÊ”‡†‘W36\*_îZá¸j2¬t™ ,ƒÓS½F¦u rœèóÜ.dÿh@‰t•™(_î›Æn]DUÍË5Tè ‘lô ^Í`Y^Ï»åÕû/L`̺†E²2v­Gw¯‹·çÅBrÛR]ƒ&Rù·çªCËiÓ{õa©Ëغq{N§¦uºúÌ‚ÊÐ*ýÀ^ ¨pЎ°õNZowc—³{ÍQŒ(rž:ôÀTZçâ{¬di€zÇ#ÑÝî‹’¤³ ã‹›ZRY»6Â@:mÈM`ètŒ-í¾ùojqùý¬sbïa™.0}M²½8ªàÚrÀ õ-O“Æ/xö¦úŠ,_hÑàï …t6“wmô8çóÍ0´V6Î?ŸsÕkéàÛG&X*š®´#F WŽÅ•èaÁ·NÜ<í—ΞP½»²W¹F]¯Áܪ?ÁŠæÑ„²’áxjR,’£òU á–“ždHRœíì㨆¯òn’Æñž\‡ §Ö!ÓKqM©ž v5OxÞ- âð 2¨‚†ëx„Ê2<Ð(!ñ4‰/"Þ#ÈŠ-žAÑó€ë[M;1A/û]€=@ã‹„¨•ĉësyÑ/’w–õĽq¡€©à ð¤'PW½Í-`ÅV¤‚=ÕÑcª;>¼M´ò¥)a­¸ÁbÃÿ¡ìºeA>a9ÅØN–ª¯°IX‘Þ¿™ôAimKÅû?Ò7EÒ(fæÈTLû!‡-a¼˜ãmÙ(ÎÖô…Ø&«´§ñ€]ºbÿDÅÚ³j. Ç…¥ ?yt!òœ÷§^AT+[×Äâ°ùâìUˆ˜Â1:† æn…œÔ&ÊÚV>¸Àþo0ÁÜûj!/dó)|€7Ë2éB Y÷:eÔQRLÿ¾àý®–åÍ­vVC¸K‡Ák¼'PWäpê“ ÁŸ‚L£æjŒ©²ÛæBŸI¯–ÆØ‚¡ ëÉÈffY~~Q¢±rk[⨅h ™^€tiE7ôô¬˜?—Ä?Òsâý4vo€ì]Ž=óÈóþà7EõÖÓ†??dyÅ? ‡ö°¿¯`f'¿-?Éw³xô𵺫‚¿W竎° 䪂 <ò×à0’~tVU8|ºíô6¹àÉðh`îšü¯Œ–Ë¿ƒ­YÏ’€ö¯d9«ÈŸ¶¢“,„}VÜ"c—àU@úÖëO·‹=ŠdR¿0îÂ5M¶iÂsfVAíMÓ(…û-1 †ùºÛæ ƒîkô>5à1®bOÕkTw½ú yŸKÓ"ŘÖ/°Uþž§Š–þk¬ÑêNÁKÕr±„Ï‚oÖ‘NÁ"GQ®,Tè.£õ¼0Bé~2Œ‹›` ”Kƒÿ9E­2[#߉_æzz'µÁ¥rË6c9z"úmNf3µì£î~Oα”Æ®qÊÎÁO§¨|™Êõð4 ~¸òÚO« k\<Áïópý.«úVN‡ÀpA÷‡…²Y\¾ôà;Šè¸<Â)xì–}c‡{ŽsµrFgŠ‚^d9|$v“jÝá¹ë«¾ŒKá}³ ¥É7Ï©”¦ipgfñõÅn‘ã¼}úù-¶6s!]ÿœ§«Ú¹bÓ‘w4tå>¤¯‰Üìk!¹FË«’±Îí´ÌèHøÐÞ"¤üO%_æeíû=êš4 ÉúîÜÒºUÖ|bºþf‡š¹ßåžcË»¸«;>є "Åëßß,ʵŽl¼¼µþ¸¨©ñ4bGDÆÄàó1];e1*QÕGƒ[W®Šá×Òñ¾Ã•½™¸õŶnÕ\T"/@$U›ÇÁ¸5WǹOX0qQR!z‚l?ªZlW×Á8Ax„"öµÀøß·Íf·bò©²½.YÜ‘ËâeT”(/¨@K¸]Šˆ4Ítì_Îôî6 hŸDtõºÞ“Ò¿ÖüôíÍux“¶yvÓ eÝ7,J#/åÿ_ØêQ—crË ‰cŠç ƒ'SÑRxˆ¨G—é¢ß¿M0»w“Æ2f¥ò˜Qð&&~x«.$—O@øòa‡"Yƒ4¤/:N¬„ ÝËW²¬æ^-Ó¬4BOÙ6œB\ uõ”–ÝÐÓZ,‘äú±‘²«îK9×RŒ/ºªÛª‚âSà€^¢gº_ÇU¾‹AŒ ÝÁÄ=„ðÁA`üèT·ù¢üî§X¢–EÃ÷ƒøãS‚÷¿»œgÛž–bŸHzý4e(žŒ‰#CÀnÞñ}ÙB€t›¤E$͡뚆ŽBOKe%»KÚÆ1$ôŽ7q”fëÓŸ¤êi¸«oT*¢‚A ý›zoñ‘2Ô¸0±èë;ŸEìõG¦–t€KO(û“(c5;åÐré¹Ä=­R w”Wwg®C«’T­hH€r]YƒzÛ#Ìô@^ÞLµÑýô8ø@úª8À”6µÃºý%Jµ»—ûÒäU¾¥ãø$Ë ›t)BÑšƒF æYC˜~4zÉ.}&K”³àîâ†d¹ ›ìÇ—N“S!À?éüŒ×ûÝÞù6²§Ø-•ý1ªz§Îù`N á FÙIó‹TÒw€¡;ÌÁ[›¡F“7„5ÀJÅ1YÒ‹%‚ªÅÑïIh²ê ³Ï·´ö?q¥Í›¤ÊÿzŸ2Ñ.R¨›’ªKQ˜ÆùМÿuœC·X ¥˜ë¯÷·<xQ9,—w&‹~blru>=vé’Ó§03yEºø*Ëc¨yAgoµ×9,XÇíô[öJAEæå¥|Û¥ot#%I€üæèI¥i‘J'Q ¥l=`±gORÀŒÆŸ›!r•tØ0E"÷£u·òYUªOÜv÷èí.ƹ×ÙùyŽH­Cúé.п¿„"gáùáåë0I¿Åñ“èæ’èöK•±g¶z/™¿;ã:ñPp‰< F@òÁS•Yäbm›Ô W£™}†‰³¼:“ƾ@­’Äh ’Áûôðžn_xY?·í¯L6tœ‡o›n3m¡Ž‘ì L²Æ¹¹N0tlÑ4OÐLZ²¬é»( !¦sŒ¬›q;3Hs<Þ§)Œ&"Lš{7ÿ*Úi ¼ˆèPö&¾ V ŸæÌ¿å˜ý7 òÅRM:D3±ÒÎ:²øàîsuµK~×bßn–²º6þ·n>á'âÛþáÊÆ/æ„q­í’¦@nâ9¥sjœ@Å[û‹’Dû-²3üF¡AÛN‡Ìµî(@±#.•· èy-èìë¶…—=À5 "Qn”’]U¥Gýn»!j¥ótºïlw»]9f‹©3»S×üb­/Få1í §‚”"àϽMû6yë¦ö²€DNA!“õ†v»¤,|nÅSMþÇuïmÖ2ŠÜLH)Øøâtb„E ¶!xz~» ýíúúÏ:꽡7ÊÞø˜ï)ã÷ò^>€R|0±&ZÆQœlq±RXHÊPË›“@½qsih¿ö1¾vÚ–Dýþƒ¢²Ù!ÛcjÙW´¸m¼—žAù«@8°k‹Æ‘L£?ûÀG4‡….º¹™bãò¦X}Ç)r"ˆ' ]'Q®¼-C~?ŒÉ¥„œ±ÃV´ÄaîÝÒ?g?3è«—Z»éBJ¿¯˜áý°¬Ø6¿~ȳ¿—Ðb)‚ÊÐÉïý¡‘µE]öMH£*±´<@uމêp4åÃÈÑ(Ú \;c†Ê8ø˜ƒ¶Šƒ-ñg¸"±HñXæÊáςµZ&ÎD-mįÒ‡•š\þ‹"Ïþ=»ÏŠ÷ŒTV–E–~¼^¢¸µ¤p§Ý# 2»3${ª ,ñÅG¨WÌt®F$BûФ @%¥Ø'ðïŒ^ýæÐHïO±!—r q6ò—/œA¸E•L‚ƒ·W°5{EŒú:' 傚ST²ä@ç÷õiÐà¸c ®T2„_&‡½×Þ†ÞfÅ5ù^ðýˆÕ Y€ù~ …JS7X¦+ŸÏBñŸj¼#„Á¯=0æ=!‚Fì‹õ yÁéqmëË;”Ð…9³¡Â=ô¿uï†ò.$ŽX>àˆ½JAWËŽù–”˜8<¿UN9wÿó|óø÷§ß¶OØX]*÷¹áV–q¡¡¤ë ö÷‹ÀÈ'ºÌA´£ÿ(Ó³ht^Ney9éXÕP`ç‚[e„¥]×nd»û*ü}W\±sm_KI u‰w5º„e3°Ð•m;¨Oøô³ãO„?ïGxŒôZ§»ÁPˆÄ¦—8ÉÑÚ™!(H€ó³»ö„Ý_ >ÛÙ*‰k#¨l8+8gâY<‘Ⱦ'ÿ  ·yHo¥q±ØJm@†ÃQ5fÚ7ÕþYJæb7™V.ëOcFÕÂáÒxü@‹ÐƒL† ×陉õéÄ2ÀKgZ•ÒSàšAÜü¡rTþþuTÙjÆaûeÄ’)þÞ+»£Ã˜J)…bôN@FËÎË|ji¹´99rí._ÃsŠÆøðÇ“éÒvÏ¢ÃûSxñ¯˜µƒþõ êý·—Àå™ÛÍ-îÀ.eœÃÏ~ßXk™ ôg+'Û\Ã'ýÓ©@m Í ˜-¼$í­‚µ'‚‹ùÏóØ–Îô uJTÁ‘ x!ªãûa‰Øc•t~¯É1 æÓWýNÞØ]a31¢ð~<U¦á*ç½­^Šë¹Cj+=—øÙ´ßh¬ººÐuï^ów‹_Á l‚ ÜýŸçd:´³×tÁƒæýi箥Ø+ý xÙ?eÿl¹½çÞËÀ‡ûçx¥¶úCÍ$ÐágÆŠE#Ô¨Ô `@Ô4 °š—×`FÐHêKgD¨ìzöÝXêâv9„Àôù~ðî¦_¸ÌU­èåœl: H®/sû³Õ6TYMQ”9}òxfçç†øªû¸žó¢§Ä–Iº6½hËè¡â¬•XvFúoâjVž’é/õp½U|y­^à(¬ j)_pÍ \>} ØÖ#}!HÄŒ>‚ƒî «—MHGNF­HñP`ù§›µÇ°U0sP÷¶9…S¤7*¯@`1Ì’E=lÕyÚ[ÆçÍν;ø ›÷Á¢ ±><6«Á•å_x=Iºÿ.ŽïŽ˜à\p”"OÊ‹ ¯Bd¡‡=1ªÁݾsHœ‹%ºpHÒubx«§A¤ûóxéè]¬œÖ,â"Wk+ù0{²]͘Ë?ñUùb†‡kõ'Í—¢"ƒ‚ÌãæúlÐDe6/-í蘙NNhYÂÁ€} X¦´L ”ýý±eÚ>kþNj=FÂúWMN×y¼n’e9ºk°bÔ^V+ã/ ÚÝÖ}°ôû<'Ãýtüyx>ñE('&͵mÓÄH‡"m{ÄV—!ëMX:ØÆ¾åp‘ßã4ª®<µ–i)¨¡•ísü î/çòo—-éieœïÝ#ö•òczKüùÐ J:‡éÇѨœ­` i°AíUù…¨!\/îô?Òº­¢¬ßýà HZ#«®½q—ÆÈŸ¿i£¿6k­&JàƒË×L–Q©‚„pR€þØöóòbèøÑÃÕ2^_Sµ>ÐÉœŸuü[ÆÄ˜Ü›ÐÎíhƒ¡ÞpàXeÙÜa‘S›T¹EAù5ÆéÀ…û: ÍÀ 5‹ÌÄÓ£Ãy¹bלÎ`csõ‹j:[˾»—ýÂ@<8\®§ŒâÀù-ùRY’Ééa¿­ÎÛü©6¿š©{ë,(—ƒ¤Ü!^Éí"Ó^G(VÐ|ºíìS‘æRïô½~‚Á^âVM1µ°i\„ûÁÐÑß´}™ÇY†½\"uGÙhqÀº!9á±§¬×¥sPZWÀOºTP䕉+zyüNæ< _§ì{:nœœbïŠ) ñ\‹óÙ±‰ÀÆñ~Ø-f.•μzŒbAêŸß«—‡×^›ùB .ùÞÞ¾FÊÚBÌ\N)€ +i2èý­/äÕõÛ_Ò-ðN4‹ ʗǢ͑e=ÉO{–ƒWÞ™ù†ÌòDC§È˜Ï˜[ ó[}b/»>íéAšBJJû:v¯øC€ªe×J™< ¨ ŠÐnE&yMÊ2>Áرa:ºÝF=ëHžÇE¡Ú´UÇÏGUSÃOÜYcˆŠìcçÑ…Å([à¶îâïÖ£Ò•n5àÐ’ H‰t$¼Ÿg(b³·—wPZÑï]hH²ph3ŠR+L+!¹o’Lƒk{("Q ;<¼Hw*ªmÙ-òOòÜñR>}!v¯¿‹ˆqsâQ\Ü0 ¯™¬6·jUÿ›Ï¸H“ÙÄù^ Ø&\$qòvDK~„ZwW~ù»u³å'ÔÁ¨’ lLÇž;¨øƒÞž¾þÜc7Þ(IÃ7Pa_¦]Æè5‰F’AÔK@TûwO=ÝdìâN¾þu)ƦºFÙ«ê‹;³ð¸RzC}É:¤õÖ-A¸!;žSžc9”›ìÔ¡ß:êüdeÉBYDŽkïÍ• æFó†~ ­Ü‰€¦¸‰“ž ›oßî¯üÍ“¼·41]k½M_­—ü¹[J vŠÆOµ„ç&ã̈S)‹BÄ“$ ÇΓ nÀØFÐfzêNN3 FºA;OÎ6ö9þµ7ôkæäÙ]~·RNPÜi‚Ž\YëÑa:¡cÈ=ýÊSZJ=–Å9¹Ó© OfAÇ‚ð$ ŽœüáíY”}ÝoT£¹dd} ÛGJƱ™£ºh˜õ‚Ìû’”Ÿ g3Ú8Ù'ÛR³Ë2¤|‹óõâÞ#£&"šÎþ¦äØ„Q«Ôªp”E|$Õ÷¼ÃØ¢‹€åŠÀ3°yLuûϺ¡ mõþ¦ÈMØòU_tJ†ƒv*̘¢¥ÑLÏQô'¿ˆ@Ìmñ dýå+’Ô—O[´…?ަ†FJ±Žèÿáä¸eºé¦Èâak Ö†¾ÒáåøÌçð >”5Š…Ÿ VUÑ{°¸}r@«¬Aº{|)­ƒlQ 4ì:¹ê,?¡åaUYãa$~\rYÙ×?ê ‚Súœ¶‹Ÿsî+ÞßãiIsä¡Óø KÆ¶ÖØ¼!÷‚™’ú@ ÏãÑÙµ‹5ÎÝ¿÷´¥1}g(4OH7+ˆ ´œr|óU"¶äýŸè!f¢KÜÙ6 Á½®Ô4 M‰¼]_ëêäcÈxSÅ`äÂAؽɲ0JA;"UQ^üÍÌë Lq.Šâ£zíàŒÍ¤znc–wÊ»Iâ¢x>âÄ(Ç/Uð`:WzN™«2’0>§ò6Ã"e7ßÅ?5\} üKT[·‰tž¢SB€zÿŠ¢B•! (Ÿñb]Ó13Ú‚"Â,9>QŠÒw*ºñèbŸ÷}{^ÖNÓ ‚gÞy€*~¿ÉMí–ÄÑÊßÌí,Ý¿¿÷ V””†¸:áãvËE2LA²Ð*|/•°®»ª1 9)”"=òltãó­Êú+m3ø»½¨nfê´ø®×{Œ•Ôò·¬n|IGÓé^C´(ëulˆšCÛoÚ‚V×vIF_gËp”¯¤,ƒtÊþËŽDfsr|*Ö&HOŒ½lœãF|–*ïÜZì–2­¿‚" T^¦/¼ßôDv—n»è竨±5uÜE‘ÿSÍû¡IHt÷RH¢ˆ¯%ÎÏôðZh}f]û;0í†Ãý¤ÜάåÙ±-®4¡zøUÎ>1D4E`2J(E ·1…ÎÐ(8oôu[(Ã$§ ¥áðzbÑÙ‹ÅVÊ9™ ÿ( *·jC‘Qˆ V®©u뚨Ã0Kl%YîAfèÍø€ûkBâ ( '×kª=Ïqmò¼(`Ÿ¨ð9^ùô¡CJ*ÆkO‰ x8sÏÐf•€•=ðR©¹»œQŒ·Œ«áN›½IY:€5ú\ÁÀ>mp‰üFì•dc0K~¨›à”Œ%è$*­ŠÀªe¯U¨*8ûÝ·ôdö²ˆC~ס›®_‰zÅßÌ äw<¯ToPt‚çY›Ç o%F8méV­"œ¶Üðèød zðÄîG0ëô¶) ë:+GŸV*}UÛŸ~êŸA•DÅæ¸ß|¦´_H¡”Ê®¦Â$ˆ;£E-€s –êÄÖ*a¤³ ï6þÁùSRÑ*= Òb<*; ˆ¾Û3ª¼zW·¾(ñVÜ©9%hÆs«uÉZ=B–>lÄî-#©­ÈÚCë §×­ /SñôŽì,…û‹|+NÔæBå¯(iÊ”n›;Ï®†Ø®ïecx;è&†FŸ~”\ÿêç¥`Àø=Ž4x­2Ϋ)¨º9÷ütØ‘ïÏ?lBÌ7Ó¨¦Är‘{¡¯cM=e!uÞX…§­™Í}§^k8.«jÖäpÓ–Ê÷\(¸#gñx°‡$+¦SÝLëG×ÚïÔÝ÷nW…y úÖ‚'‹¸×`¹œL,SÛÀß0B2ñ¥Xf­Ú‡“ú¬H¤“òJ^=áà€¢4 b>JR9CŠA¾“àšˆZÆ |"ìƒY@¼5£ 7@Â#B .â#÷ìSªæ’„Ô¯Làß¼[A彯b…ÆŸ˜å–ël(YÐ!}“Àß—ÿµ¸áæmaüåQ(!s DŽñ§B³ƒŸ.›O0(ažÅ‘¸³¹©‹\nIÏ=¬Ñ¢•®ñ ¥ð%å)ïAÞÏÜV$,’g±Ø±‚(’$ëjó]%öÎ'í©²KÂÖf½æ¶4„¨'YT<,‘wöB¨¿V›„éO&[©Ó"Ç¡óÐÅF¿ ¾šlãˆu]4ÙåèÇ#ùŽUcò_¦±nÂ8]‰oCŽªTö6¢9Ú§ÌK8ðü¹ký饳ƒò¦yåÌëGæ¡8yÿžìšëhcø»#Ã5,<&c¼Œ-®æN¶îËŸ›Uà|FD#Ò\ÀãþÐv7(,’¢=ߥäPÉ;y¸ìçæÀ{­ÚäÅ6|ؼrÜ‹éz}þá¤z2HËwaØÅésY[‹»ró‡Ä ¯Ÿ€d¬ÖÃÕ[:ˆÂ7…‹|¢O‚˜9¾b,Îz¬a3ä®/ˆUߨ¶Ð1£×jø—ä`–~ì5æ¼À){iÍþGÐ JFÚ„U5ò´"h¶®ÀjölíZî…â]êMÁ@VØ·‹•X“Ì)ö„™”‘ )ÀÀÙäÍŠ£eJéâÜö¥:»qQ¡$F Iæ/àÅ’{Ã> «kîFŸºâô¨{B½š"¸*‘ˆÈ/F#"}ÃísZ»"É_*à§NM†¬„”bûi“»\Í*o埫ῼ0<)--0ùðJ|y=›9^ôx2±g0'ûÄÙ ½P‹cþ-JȬnpVeœ)Ç)†•7iÆ™g ëGaÀš©ô¸“Ðìg˜k.K}휱]†bMPréè"¯Ä6޹Æ“W0u2÷ø5lüÀôèλ·í9y š nÑ?ËX*íúol3Άò"F݉ßÏÍLÜš<Ö—‹Ö£oîðbN¾¿sBBaåKYE†2„‡ŠƒËÄmçV2Rid9lÍ5Ìõ¯sÖF§¬(qMVB¥»´dû<—õhPL¤Wë›hDÈ.£±RtâÚæE-'Ù„b£?¬yqy]ÿY3Ú›)‹kÁ5¸ Ô7x‘P¬eÉЃØ^4lýTѬXþ enΕÒ?Ce¦®ƒ*z öè&vÊ$¼Ï ££­¯¾GsP¨T9ŒÉÇïb«,»TÊléYXJ%Á»o³d7§Ý™lì*ã%çOk%;Iû¬% ›NÚa)‰<ò8•´ {ž4JÐÜ]^ìûLgÚ 9\ãîÌú¾™–YËyÔ7éGv‰)K7Y™ cd»ÙnÌyÇÄóù eÁ¿<8/\Û“nì±}óBKÛPT±^­’¾LõN³‰›Tùž€â’ õjÎ%ÌAÈh:Åg‰!•÷…*P¦Ý¾8êÓ-\öçE5i~muþô©2uÙ‹C²®æ—+gHiÜ"P]Öl=˜B€«ÉÈð¨m*åÿXÿ!¨ÀšHLHÜq‹;Ü“q4zè_ Ïį.½T!ð%6y¥Þ]Žæøf‹|²v*\ß Eµd¡Äú 3žÙÆ6î. ¯x«ÜºÇ³1ß¿|[ÿV_ÚÒTª5pÕ¾ðr­7ã%Lrh¢æWÊê!¾gÑ‹76$€;Uø4®¶Zà>BÈbû9.WÃÖp]ŠNM‚*’½¶t&¡š†£ñrB„9Ð*åo‰+)βlL×=>ÒŒÖR £|ù_öšMÝמ,ׇp‚"ùÝ”ËÇ¥DnŽlÚ‘ÁLüÔÄ%¢– ÝGA ` usp*d‹ð¿|OÌ=Q^{óþ— xÌð[è2 Ç‘1iPúCu~%…1å:"Ö(Ñ-;¿œË%ÕÉ¿Ë5'C<Æ\PPÉu'Ǚ게Týéqq7–XHéÑ_íË£}• )Ø ÝPJNq3•tBìÖ˜ÃödÕ2ù+Š‘^½´Eçówså$ªn×}O†"ɼu ŸXã8¾­dfϳ‰•fv §“.AiðØ‚_õvÇmÃá\ÉáSÊ$·Šy„üoÚ{ÏgTR½Õ–ø¡ºêÖP>Y“°&oOcI<+¨†¥W™óØnÜ`ȧÝ×+6Û‹èŒL"‰¨5³Vøð•O¸.îò)ÛVSŸ`Œ^{¥ÑwZD½â÷þz×gÍî¿ß Þ#Äæ§„yZÉ`žÒaÁ¥¾@‘V<š!û;­ðèý‡‚jÙÊ~„p;À±¸›ÐÃß8–Õ#e:üD«Ðe&7"ÏÑ8?mnVuÝ5‹ì#g1²yÁÜKc³z×5,=!Ù[éAKUa–SÓÊ-¨e¯!$´Ì×4˜g¿s}Ë㉅µÁÉ Øÿ°£¾½ó9áY#|5ˆ­Tõ¿X<-¶ØÍ6š2Ç¿Á¦œÊ0¨;­ Æz”ªûá½C Æ9™ù¦IŽÑ.”ÓŸ»è$gp$=nðyÍ‹¡¼ù§ãECllÖ{ˆ„G»›E¹™á‹h**àd|m·^7Ãcõ纾'ÕÃ4Ã:¶tÛPû®œiàYnº?|õDEÀz›S 6° ­äñÀÔ¨»ÿvþd¿þÂ` S®¶S¦ÿ©7»˜CЬwÈ[+óPÊ€fT¼>:´4 +‚FW;«5°®`(»¸ybS.OoðM¥Ë3ô¬–n_hÉAeõOµU3FRòü]h5b·¨­6¥;ï->O5޶0ÕÈNv «ã<×~ñzÄ]µi¥Jv÷+Ù¿~ó?é›ýÌŽû Ùö„D>ªò~YÞ¬Áš–{ò|ƒ&âÝgU}²mWtKëÚmùg²"{ïDàõl±{ÚMIºÕÂAàÖë÷u×g™n‹¬’ØvÕ/ÅvZ ‰yƒTê©¶×\ ÈP/;I¾¾ž$Žû# _ô¥Î’£¨!„=¼¶r H>L¼@jµÓ\!Ày¸2~=áп»^4à11Žed¼D(áp„¿£ øèt±ÁfÁ²Ëè&ühaR'e3ð¥É{!Çé…A°Ø/^×!«-#:`÷fdf'FŠÚ©é âMòRxö³Q¹·¶CÀ =ív¡w ÿ~Ë¢.ìõp„§ß8†B Yþ6nÕíÅØašzõãÈ9·˜ÿ¤…G—ô†Ô A>×/¶+ý/Âáô`¯ºÖúqé~·ØÃ šaa¹¤ÜñÍÂ'?FM<î¶½ wÒ2ٟϽGXèèŽ(¥$íl¾2šë séEcÑýÀšl¥»ª m£z„bS„=Lî kÌKÌÊóɤ¸ ˆŸémJ}<Àú:,ùßSjQ÷—×+þÁ9Š<ÜñO¶èGc¾¡]¹óÑÈ£ïQ-~äúEÚ±Ý{%4\ÑÜl*.¹,8¸³Õ3„DhÚ+¦œÃØþ›Ï(¥ìì­¡èØúi¢Eþxæ•DŽ^Ø×­Í¬Øö¼Ð€ª±ïç XÖ/•C7–Xþ†€(¼ Öòè›õí³!ˆWÇØtROû5m[N?¸=…Q_›Þœì€¬Úà•ú%+?®¹¹3¿á/k‚Îã”™ ýü`_Š WîPÐŒìAL°Înµ_{Æ$8(/°š¡X-`€øS<·Ÿ¤æç6i“pHímó$Pœ–*/w’Œ¶A‹ÅòlB!«:1òB9ôG 1!’ü_\4 ÎtÆz2˜ÀpAøûLË»yR‘¼2z<å±[âlj¨ >W˜7‹×£S¤ynÀÐ+±ÂÊŒ¤í ú„ `Tèd^ʶzÿV„£ƒÃä„ä F+nA0ëîNŠzlΣ@À?ë/!vHz>wbÔZrðd¢f(н¨ qLªv8HÝhkØÄ1Òc•J( ,Jëb\PÜnZ…¹F¶ÖÚ¶ìÝïÝÁoJÿÐ'k·Av(›Όh¡î'02hb¹úä( ,ãéCÈwHýPW;Øw‚M¢]Ê%¾oû« êªi9Lõ˜ÀE,ŸSŠã©·¼þ®ÖÐfÐI×LCîzv3¥X·Õ»húH ±~N¹ÔnPñôªËýØ÷›L DÚÏQ ?nìŠzÒÒ·Üh ’!>2 ËãF´ eAºJ ` ‚éòÿ’šÖÓБQCõØb§÷eKf`ˆèŽÔ{5"¿ž›ëÂÕg‹í‚ïúŸB˜öø†U=•öÑÀHò@CØ‹ž´ ñi|=„ù¾4KË=Íòô_xåP™f§ùâëMF"5—ºð¤X1ê)aîƒúÙkMËJyŒÇP‡‡Ê"– 0892ŠI ‡É-ä%:5R$ûµ“SnUÌŒaƒªŠ.ÀÌÞµCÁ»Ý¶3p°H1O’ v$ -¨’Ä]«<xŸ@À캺ì F ¢4P ªû—ïfÉ@¸‚ç~• së¤Fô ƒ‚·¡\yLz+¾€ñ[gƒ ö¿¤£ƒö‚»v( p¹èbC²¢NjÑËpS@8ýƒ¤òçEüQ`ñ[„°$wA¬2ÏÞѪ'‰´ )]B¿HdyäÝENEšÔ—òV4rSÈ¿Öcm²Wµ™vÙŠe‰'q¶ÊW:nÈžÓ,<99…O–AÏ€t×8;Å:¦UFðoóZ´ö‘¦ƒ»G¾‹®—{û½-zë\ñ½åö~R˜”$0ÖËfzIz#%¨MOšŸ^†±o²„Ç+…XÌò¦¾ž–œX}0®¨k–±e§Õý¦SîÁ'!HF.§{3߃’¾€ðná¸ênôI:#ÛçnzãihÓ8(mØÚ€{B 5‹Ãsªð™$n8|õg± ˆ2r̉ÄbP{(DiaôqÏYFbœwvpÔ´s5®tè‹8Ø fû.lÉý¥û+ƒXxÂûS¡í<õô0Ýée󹼨aJh]»ž¯hÄÏ=jD¥3Μî:¶díð(s޲ ³ŒZ¤3•ÿ©º-Ki¨NìÆ¦µ%© €r4ñ.(ë…Òª¹¨¤Ü=?¿n7ìl¹MÐw@§å³¸!Ñ»ÕóÜ@ð!^Dû(Fr«ÜØÂï¾§"; #Íì‘g¼Åî¢[O×EC¢ŠtÙr@‘©,'bÁÌ”¤@€_1¢ÃL^éx‰)° ¸¡øD ®"¿¬ûÖº ÐNó¤ÿD•p^=|«Æ€ü”(¬œu&븰:‰ø±Œ‚¼ÚÁ’ú{œõ!÷ª»SpmËŽáZó®Õ›·D<Öíï§Lì2%ø¨Òsê|}ŠÕ ‚[‚.Q::f¹oÍV?ýø{”›N81°oã 3¹;bU7|)kÁæ'"Xa©qellëäc$Å˸­9 T£%%/¾DŠÕ¹ X¦h^tÛ&”Í×,ëœY)Ãí¾Eˆ•žÅGèÑuf# ü°7wÏGÐ_–]ðÇû”…\¢r+ aÛ¼¹;MÇÇfcª|LÄW÷Ç•§ÙÊ`Õ„´Ý«{&n¾¢´pø?SôáCššûp×ZƽîÚ]/0¢ªc»™ä¹I®’·CÃÐÔmóh»+w›¢IOK•r„øÙ06ˆƒ¶0Mн Á1•{……Þ1Ï÷èÜ9×üê';J¾w:åðÖy2ðùûÿ­‡‡¥€¿½ÿc¿ q±ªîìœÖ ðI=mÑ@$ƒŠÝP˜Ä( V3p÷>¸²ôîýy·‰Ù92gcn)‚Æ]­êæ\U%ãáÊ‘APSßyâz,§McÈ,½hÕ~¥!Icõ‚ÞÍGn€0ÂQé¦_NïúûtuÂ`¯Æ)ÅÌŽ:õ¦ˆmOóoÏa»Ý˜Ê ¸²\üà–ó¥ânË…7”I ø>p8‹Éò†à»èóòQ.6·òÊ…ñtס;.ïj§¡íÇþS\–«ì¡R½ïíuÒBµ2¹qB÷3blë@€`'Kc\h*‹±­bø¿j äéà·û®¡CY(èvúDZNWR,{,ì¿ Ö -` Þ(Aûá Ô;óGÏZ\Ï­-ˆð–¯|çmØN´¼Z&ÙöSÖ6l•ºe‹ì;f¾²#]ÇÙ¥Ûµ 2«.²wÝË!£m¶YÜrc‹-*l¬£6³·~ï¶µ×¢$R°.«·Òsù…R¢t§õºQ†uñt¿û¯üFЮíæÍ]ºÞÐŽA£+­ì´9¨&8B¼ì'=]dÉ*s­-Pþz+8QL%µû½}äü£([S Ü“´tTA ¶‚›’0´ÞÍ‹›Ž¸­³;IÊ0´b4íôîÍ ô´C4&´0ú‰Á —l:Ç"bo[Ð츻›‚û ¬nlèó͸¹Dâo>¢NãœøEæ;_Œ î.·úŸ×GªžD©¯âXå ŠÎçò  Øô35‡üB kMÍ/b»zBvY¼üÚzaÏ¿) D1`"¶Q…hûh7tI¸+NåÑò…Œ×aß ¥l›ÉÏö) Ø…vÎ$a Ðl¤‚œsˆ„•ö><ßà/aާV›­ÀÓý´ÍÖ€2 RîÒ¾KÆ‹¦§Î‹Qt²œ™Æœ5™•ü‘IëÁ}ë–ÈK\â)ä’gJû·X-Ó¹®fò[;%hL°ØkvhYÅ’ÓR›ÖZkó¡yÜI”пYžÓ\8 Ó]*Ý¿BµG…xØ6Ÿ|ÄÉ;„p ¶!M¤G±¨ÝßzKïÉ!?ç Ã…ù.<âáæ>8¡ÚÐNË-GНLÊy’µÆųïFd9ÙÙS®„»u•뙚–­`µ%FÆÇ•žp‘izjÌìÝÔ Áwª U€E~ÿ0ª&_Rºë .Ò/ÙÖ¬_Š<#”| ÿ€é&* J×8’äŽYÚ1È%M@B¢í üæ × ‘ÍäÊ; I*ÇAÚ\†ŠòÌGû²ö£@VSêDÏ> ®Kʘˆßƒ U‹ãÞ´’çNÃñm_®§Y ŒÈå㙵˜#>°ó)uM]Žèƒ€<.»0`#½ýêV2v‡GLaVy÷Ú³Çåo–BŸatê÷WQ¶ëû®nK.é€ànš¹õ%p0 ðKcíÎÄòî²] %6¶¶ás»Ó²éß!º w¨6VTÀç*§Ÿ¶Ô¼Æ÷¦›cúÂb»9¶™ {™‚ô~Èh+!f{†{]˜Ž+Óq4!︟1«<¿ÊାL³Ló+¢xH4L|ªÑ ïwÂÕý·/¬1Û†Ä&®w^ß±ê¹/B6¯Û¼2Âæ|fa_‚«©/„ Öξ2¥¯Âœ°{'ñÙ¤D†ÛáþVlsÁ½þÆhs_`·2Ð æzvÿ W«×|yÁÉc±,?F!s†ÉQ’ÈÊ÷ 555e ŠŸ[½Ûù”š˜‰ÞdŽÙ]k5ÇÇ.s”µ›[–6Wöe VJø/ûâÜEÓÇï©þû6³òhV ÕZ›VÞCÍZß †Âmz_¸«`÷"“Úã`Í€ÚIBâÂññêþm”¢Ñç¡.$ Ñ–¿B‚£…Ä¿„ÄÅ(SE •c·Ñt›êºæ=’õó ²à°pâ^A·É*î'GÃKÂŒVSçáüþw™ .{pÔsãÛ^hÚ qaRÀ-Bh×Ê£KÕdÉä;‰ É#I•ûQ}Œ‹,µv˜qIïaAŽk¯~BàqÐ\ùB\0CY þš·Æ é…ŽÄËrz¯XkärÂì{ÓÄåµZßž¾Ö½vùÝ]¦§\á¥ê"¿1WW!¦~¨~%ÁkÁ""a»v†c£ïZ8•H€m™hŸ¿¶9íö^`ÅœuÖ`k"“°je^PƒÓrL S¿¤KCgkë!G&÷þ6>Ã@0€Å)qj!¦ ™ÅÛ8Jh3Ò%b 5Ð V+ÄMÌ„ ( ׌ÂÊf¨ÊU0FŠR6þeQ[ƒØÔ0Õe×a“â#ÓºÌ|ÿ]Qîù= D[oùŒâÊPäY“P¥ê ÊÁ·=5VéªÎôgÀQ»iéôÀ½àMɨ1 ²g¬Ðª, .Y!6j°æ «ó‡Äqùt]ó 'ÁÿnuK½QJ€Ýë»?) KѤë’p۸ݛÜ!­™ÎàÐ ê ~Æœ…î“M8>'ôl—ˆ‰œdô<˜ÿæ"Çî>këzŠFU†fßÍÝõ™­-æàÏÌÜXq÷Ïyßö™tÐz³“*7b®ªœÏ"`ƒRîdñá»*.ÏãËÖéÿ7uµÀ߯Tæ’pý¡šg(NHl©f ïÎ1ÙÙ¸ýå$ТN‚pÀFÒ›Ï(i@Œ¹þÖ¤ï:L*xn¯ì/Xî%–Õ×!¬Ì¿éB®kÙp™,RÎDQÔ”L”þ„3ܽ2¯–¶_•`²XaÐq‰%/XÑl]ßrc¡5:ÊGùq<v)aT³‹)*—§¤¯„O•äV,Þ_¢Rü†P5Q~`ØŽ î@l›z¬=ò9F¢G•>ô·Y®j™ù{׎5~W2Ýi`Ê Þ8õ®óG'òòREÕ<ÂÈ^Öߣý­*ž~œNVûÅ´R?]gó€@„æ›k7qÆÛ¬n·9&w¼ï{šç¶A’¯l0Á_Á"·äóTJÏôT¡¨£HM¶€åAƒ6$†fôÁ)á(=æà³ýI‹pW¼¼p+ÅûÙò½˜gUÕðìN &ê !k³ÐF,µä™CØÖ áêQõʸ²c@iȸ·³èטF+@yæ¹Ovu’oœ@u^§A¨øU_6>½¸öÎ%ùž©Jչ⦠õ<¦> еs¥ ÷—öµ—#¤Õr$ŒwÛ°0š›¼y¸þ(ž¿hëvƒÒçtx<Òÿ"iù¯Ìnúp’%Qò”–Œ*ï´hêSVŸ‘q»ç<«Ü>ý7‘é[Ç#ÞáÞ W™Úëaû¶½üµðö¥+µ['XûoäÌfÌÜ­† †õ<Ž=¿üßF¢Z]7ÈbP› ¹a×ú¹Bòð¬š^!Ûùά^@SM^†Ãpó<>ÐãÄé"Úéü <ç‰ÑÛ§cçâ­šó±M&Ö|ÿ¿¿ìÊ0DŒ,èmÌ;?}Ò¿juBCj S ^Ó  žæõrñIúK05ô0H· „ˆ)2@"öа)1ÉfPãÊwž¼£J%îä+Cª/ð€RÉ7u™Ëû[ßöõˆüg» $(ìÇOÐ4µ‰vÆúá`y¸YÞ!‹nüŽ«žð;:‡pù‘ø_¿²šy­ÀIŸúÚ]«pôó¢Ã#ƒxŠCí‘Oàjöì;ÎŽª7ÖòÍ -­½d=1aÅ ãαX]|ŸQ ë&¹53aÓÝ:~\4ÏïH‡ÒÛdP2 >M¤ì<‘E-:®ÁÀÍK¤ÂíI"ǹ1/Q𵑍ÁOçâ ç;ßÀÇ€!K«í?™8GÉÂfBAdðÇØm :ÃÝ:[þÚ3Y%%(±ÏÛì…€{þÔÌÕ]²±ÏB?ß:E“ý€x>¶ádP*z¶¨˜LÎõF:ˆ &;ý@ènÆ`½hîU õ$ @,ûá2yP•¼°þ ùîxD QüŽ™÷Èò¾X.öòf1hC¬Ù°×:3IU{é¼ÑŸÀuÚBAÜ(”jç^ ôï66"×yO¶Ô´@ ¹Ù÷ídóŠSÞó$¢]!,|@§öïÛ9tÛÌ)¥ª˜8 Pj&¯nDÛª¹aÕ¢4Fò<*â!MîÅ0â ðÂå®™`d©´¿f÷\}Hù1;Ì_½ñ­ÕÁ]‚7Ê! ŠIƒiaV…äqJ~º42ž[þ*Èy†bnÌäÓÊ‚T`ß©!FµÞø žº”[’,\ó7h¸x 5yy†äzµ‹§ xÓˆ Õ·Ð_D‘üW‡Þ.‚]8Ä™(¹,––Y}Ay¥Ù_!±ð*‰+˜¹á‡”Ö+Poìö|OñŸ"X½†O£C7>]¯I4¬®®­•<ÓŒîx;£á Y]‚&mBÿ´ÔÖ;Öª˜YÐékßpEÓž@„pÔ#ÁL’Á)©gý‚V’ª’œgB^ê¾Å´}E2àK?nwã°ýrB©pË8¥ÁÂ{ì§Gr‡³M+úCH¼u+4¿*x:-/ ‘ø=/N:†Òû"9 ×'Á=ôHXâf©e¬ê„Î7~£Ü5\ÒÄ<Êè/…2ÜN%Ló|”õôˆ®³žwž¼Ö_¶]A+®Ù¶íº»BäÁ7÷%tbZ­€Oî=«·×ݵçÁ„„ãâõÖ#M»×f“e)ùn{¯¬(Ð7Úßb"¥z.ê þNÄ¡ÙPÛïô §éw¡¯óðvÖlÝ\¯—•gDÒ©ŸîJìÔM¹§­n¥èØŠO4ƒ:{°Ö0!uWx› GÆmBJàç•%A³Ák-F1À ju¹OGŒIƒó2cë¼ã4ûƒÛ’²ñf ð¥€áŽËÎE'›¤Ež‚°i”EÕ­­3¡àA¶ÄðÇAý„ÒÉr®Ç8•bGQ8îÃ,n€5e7‚{lÂ`Þ@ Áfþâ!EÈO€ç»™îŽý U*f@˜ê\áOÙ›²Q€½Û\—ìêM–íY£ûi5†Möp\cæ1HÞ’ø¢Tùb2¯FwžBˆQ 'AܶŠ)ßëÚH­CÀäcf̯„Ô^~ˆ„ò°ØüL‹C‘+ö)&¸!¨»hçדáÙ%÷,æ[~³hÎPƒüšSéÀ_ÿ ë.ØùÔl>ùq%B³¢b ÞÈ[|´zGæÔ~ó›l×Ú_êžkÓ*z$­»T½¨ÙÉýv.¾C‡nží¼éÎTŠ·¬‘â¿ÉüX®ßj³™/õ!ެ—À³ÄXŽC¨0)EC Iû,üe·óÅya!9P]pÍRøYcÌ©àn êu MøFuh@q³¶‘vú„úÃÂ\Ò¥úFÒd,Câ’k¬ºíHªþº—q§”°°ç±6ƒ>$X2}ÑR/À•)Ò(ïî÷F–Ð7Œæ§Ù+ÓÄŸ__¢O9â|âTt5'Ñå줹ɅØ|*§{ƒç23·ÌÊß"Vì9änrê0ò ŽשòÄE)Ðß­Úv–ûÁSkÖÂ/!Ô¡‘¹dâÐÃõ`/5߃őNª~æÐ°.ä­]”âj÷±¤P @å–í‘|˜Š fõô®*2lt¨3‚8åJ î{¹yŸÒÜFT :æb8KÀS ÄØ`oI°§bþJÞGʾK-~[à3ªž×¯)ùÞ<‰En¨ˆÔ@¬^X<1~¯£E¶ŠNBIkõÉ‚ 0²Á  ™0ÍHâ*¹é*¦rødÄW™QkdÆYÕ5TJøøZÔåfUÖó_™’ü?’Þs¸¨‚öGMß$Ü\ñ—69"ïÙàfÓ%’kÚò†Þ™Å‰—댞“‹ vuD‘2ÏÓêç窽†Êü~Ìú£ØØœuT"Êð~±<¹Ÿó†e÷Y÷ÐôOø‹úÎàòI“5Gm2hì“Òå*õ°/|hÙ“këØ­¥ºEx.3JŒÿ>ß&ƒï¿ÉŠhvl,•C©/)]mµ¶ìévØY ÞÈÎAˆQUôÆLd¼ ï÷]lCûåïäjŠ3³¢û"( „$2¤.á6´Þ£'òH•¸©,>xA¾+~ áÇ¢Œί:¸ÌïÚBŒ>†0Ñ@IP¥2öÆÌcºˆÝÈâEîK>5A¶jŠß¬¦}EȺOÈùöhâÑB•=l ˜þªþ óíËYN˜Èw|É_HÁ`¬…+~ŒBÖÁ?`c½p¸ á“ Õ½ƒÇ$v²óœ²ZSbÿ=xö=­IïB+tï%µÊ|ñ5b–9„´S4 ‘{³GæÈŠ¿»4“ìe™E’9úp*ÀM0òV^W>ãmþþû“¼L‡A³äßݵ›¹>ÒÖÔa N£ÊŸÚ™—ywu .¾*@!– _Í BùÊ7SMé±Ì÷N+ |ç·ØÊÿ§ëbØ?\c²]S`»n)‰Ëÿ·ÒÓ·5Y(+{yËžeº!ñ`¾§+*÷z}œ©ö‘’h»{Wå]çÑÛ}‡YÙñ» ¼ h J¸H †ÂðÿÙ‘ÿ!ñ±wOŸC¶Üˆ_ìúK‰{æ€ë!5[õÄúÞ"4œõš•lœjž`‰ÀyòëC~…ôQªùx%šNêøÌþä?YÙß g®RÊõ×SˆºqŠ¢¯¯a–¥ðñr.÷â%þä¥| ó³dSÆÎh´EYQ=S¦XeÂ.ý¹£<õáaèQeö®61Ùý˜Í† ¹Œ?="¾ÎeY­ù/¤b4‹WšÆŒ¥' &§ˆãÕ2aÐ3VUÞ “s¤–FºM>^’Êã`Ýô¥ïoµ&@’ÝWEŠãKP«³§Œ¢2KõÒ¥ªWAãÔG) ^Í$Æ.|6âs<ÀÞ nÇfPErN€È›þ8S}X ½ñÊÉõ¥M"§•Æ÷YÓ¨ÊÆÜ &¹Òý¢ÏÝ@f௮_›e#æèô-~u>l&9®ÿx}#üÿ(N²ŸGýWŸð÷„íÜbþV?uD&$Qß^໫Émï-âƒt_tؘ Æ1:š±Wšü8 ÓO#uâó…’3G1¯É7XÕ¡h)s ŒÉnb}ªµ]P¿KoÇÑq6tS±:ÂõM—n¬±tÓ7’µÖØzÔ-®È€@ u˜UʰÁØÇ´zB”<çtŸ…»Óöܲ¿ÌÊ’eàÞèž”‹twÏìcйê!™ƒV8—ï£É¶3 ‰øö@Êø—ÙË(Š#r¯=¢+Ñ¿](æ(‹‰³67ú•oS7!ó¨IIz<õøŒŒÎd‡RÈìÈS ›Íô¦?å[Çm%ã~B|ʓĊ`šucæXtÆÇnØ‘˜HYg9GvÖÏÓDOaVŸ.ËÏK7³Ó¡ð¨¸ðMZÇ(GâfCAQV×Çö*´mjF׋jV/¾µÇàÞ\éM“0,äÐõóIpiE6—íÆ]¥®ÍQétևʱf·yúoÍÎ])R²C=»á±š5°é©K (»"û¾ó9ç@äò¹â·ÓWì[¯NzÒJs.G…Z½F'ï'+Šö\˜àYs±C– ,%:àèG.WJÓÕèõ³Æ„õ4¸î3–õÁ×#Dd‹ød‹fà°ˆ.Ž.¬'GIB ܇Ö$Õv!ŠÙÜ}R"Æ´"ҽǿr}é=Œº¿zŽ,[À·t‡7ÇÊã.ú¿7$Óðš _¼H%`ÇÇMnn‰OqÂõ~lêE©‚Cê´Õò %œ+¢–;äW³ƒò/ÕO{>× ¾.|ÒzèYthºD}絔ĮÎI-‘ò‹¡RóE­®¾Mà{ÔÓ ðY¯ÔqÛaî¸ot5˜öi?©ÆÀ.µ=˜N› 5·Û‚¦k³0»0'}ùí°&½Xõõ¼®€kZ ©Ø:f®þ;Æ\ v9"߇d0h+ Ð¥¨ÿ"N÷iÄZlä@xô¿HÛ h5{òœÑÞÔí}Anš¡mt3Õ´O¶­¢ædmŸdØ_íÀº® °Â]Ö p8¤5šË!~ï%¸¿5Œ1[|÷ûoAØgâæ/}=e›ýË¥i3 iA›ãÐ!  ¯ú€ÀðÁ!Í·<Ǿ/ª µ8G“¡XèsÕÑÖ&þJ)7’f{øã*ê‰À[-¾þB‘È®×X!#ØÓ¬)v–*Œôú˜¥„û‰‡ X‘‘íÌio™äòèH_UeýlËɃÎÙM&Ôµ“O—p Á¯Y¹C…Õª@iþÃÆe] ÷ìïîÅ€7 óºš9 H8\¿SÿvØüÿR‚äš3qJÏh,Ø‚+ïµ×b§§>‰ rÃKìKÖÎyHò]x5cz½f£%TÃpD| Ù]‚^· µBÖry«U¨gêðŸŠUþñu_ÃiU¯ šØDú§‘ñÍõê²ÚUÃY‘ìh¾˜ä¾Gè¼ÖSšñ9K±ÿL}:D°k »ªyŸˆ]OÁã•Í}Ž8ÖÖí}ŸØ<À/€õȬéà¾'Z‚.Ã+ß €©‹q= Ê^×&’ò˜?{7øì*Ê[â¶yï›9{¦YñoÒä\-à Ý]Æ%/=Þêre꘨]έ…ß¾(s±Ïè™t°´¸¨´~bü˜O¥^íùCïï$äžz<¦¿ÙÉ FùpKÒzÏbñ5ƒ¬žù.}£)S:žYÖr-(ä™2Õë›*Ie€ŸMêüÃUgê•@³4·äØí}°‘uékÚŸœ©“ ¹¼ÇÿªTïdÍÆ_]C‹]‘Âcò|eú“E¥o•ÜÓb½õ°®BɆ‘{ÊÞqp'¶Ÿwšé ú®*ˆM5ãÁ ¼ ¹ÿŽÀ·üö×ËÎ’ñð›݉¯¢0梧¬[*ª·üˆý—% ­òÐR5×Â麼1_õ˜ XJ€hZ¬ b"´(`‘Ø€a±³”7í’™žŽ<¿ÿõ ì0ÁS·Ý7©"ˆB8øž&n?¼<™|Ï»Á ñ–n\D²zJhä\ìÜ8¢â¬öVª9~B Ë3™¥’_œbÑm(‹E=Hò„( CË@g½Á%f.*Úà`Û§´Ä!ã±ZîÍs¿U4-zÊ\ˆtŽȤÌHëæž ×VÝ'äÁÏôªqu^ÃýgÞAÚbÂz óâ-\„N "½M¢cSu)º)/¹…ëëãÆÈüQÎ$+f*KHhÜþh‘ÖÏ¡`ÃÏ”[®®ºt¶ÿ1•è£`é.¸ä¦½$݆ÿZã§òXÝ™oG²0#çìdHmº(;ú²'N-íÓ¦?®þŽÔr/¬ÃF»õÃr"Khþ»ûÿÀFõŠÍ¸ßÞ#ÉÝÓ@ª’”±=À$rþø-q5Ïß®t…\/œ"Å»m çtaÛ„š#M)€óÿƒ&î©oìz²À¸–æ¶È½+ nô1÷kUŠö#(°iø$Ì„ÿ_Ö³»¥F·Q©”ú<ÅiÃâ®uh…®f]¸U=Ñ@Cá[à6õóMßõÐV…5»I€®þ<}¦€>E+±×*-Dx.`)KàV~Ò7õæ3WØßP§| $ò~3ªÍœ ]MEæ4ý¹VÅÕóðfòà}g߉÷à œóCÍk’ÞÊ@K+¶dOÆúvØïòœMb©ÅÙjÅM²«ÌWÞm¸ÀžægÝ24VhŠxÞ)S‚:^L¤=wAÆi5i÷pXµÐTÊÿ°ZE‹ØëyÊkQWgFìïdè]:/®^Øâ9ol½óõIg~¹ŸüRS @Záê”§‰¹Ý0Ír4d|º™í™ ªÉm1n 6K±ŒÝÝ(s‘\žaV~í)— rˆk/›‰)Æ‘9+£¹ü³p®ãV<@iζù›ËFýrý3Cj"ÜíâáêñÌ‘…rçÎ.ñ &—!h“`Sù\ùÒ@}$vŸ#ýÿNóßðãÛU#Å­÷í¿ÿ¨ kN:âÇúû²|:þ ìÒߺgÔ5»´Òka‰hŒ¦2uÈ`T›3_«ÂÚÔÖ-o9!äÇ]šôãFÂ"S»Y»;Ö¢Púg ÚOϕ̀´&É‹±c6ÙÏžÍZ• Ñ©s½ ú±È¸õDN}~¦ÔD†ågGe-Řnãç¶)¡9­‘cXýÿv!d}¬¦väùdN±‰,ëXP³—³ió‹/ž«/ûçZäõ¹èÌ(0þs«¸]RéÔd×8˜Ie0’lÑcÁòuzé+‰ÁÏ»tRïÊhJ$Ølô¨Uƒ!¹oÅÍPÃò—oŠgvò—ß }ƒ©:!æ-m¸€d»Ôʉñ+¾_Ùoú¦»ž» h&:}¥O”ZexÖƒŸŠ¸¹á‰ð@aÎK‹?ìävF ½¨Ç$‚§Â¯š\„[Éê%3w+1d "‹%;®å†R»/ÓÁç½$´FÏzk';m2ûÝœâ<ô¢Þ•)Vî¹€Ìs2Ú5çFØ÷K˜¨¾ß÷A[­r.áà”³ZóÑ4ùOï7•{)z¥7ÄDé¥,’È‚ *üŸW 1QxÁû¿˜Ùÿžq³€›Ü°†Vé¯„jGõ»¢} $änšÕPsäš×@2Hã¬óÆ­Òëë ·PA@¤ûN UªeÐSËQДFÖÉÏó}×᪴sš£^_ù÷ÇÆœÿJÓ×½7IµÔÞ8€GÈåûX¤š- ÍX²TcÝÇ÷Í% 0¨Õd§šïu’rÉ邸õC†mâ£#ß•JÃìŸÙ7K½ä‘WŸ·æ¤]Cx:ɳiÓKX¨€I@rPØIÉÓ{G5w®Ã–½ýèJjà ‚áÍqªAdUš‡#Ú)õyçÈèùg¸ ­3þTòwiI©þ.«}ndŽi/ÍŠî‘HMƒ£gç;“®!Tæ¦ÚN7.EÏö„„ EGZ2ÊJ'‰}ð,]ß‹FBBžáÜ… ‰o;ï**Ò'±Á#ÅΪ|læ•—\Gó˜—êh/µ¹]”Yïö$ä+:BÓT›u{i‡sþ,Ð)ÁÔ€I\á/ ò©C¹H£'i#%OFruŒÈ鸓E„rv`g ÕT×±ªi:ö)&ÃëöˆUooD¬6«zNB¦À)¡{¢º "NÙ(K`h`&+:â{æÐåIa‹0ÏÇó&xq ÙÇWš½½ËŒ7ö+dl¿3ÓïgÍ‹¨‹š<šèPŠmY¬óÜÔi3ž¿Çx2ß c·5^"enùIÖH5j@+0ùÛ0ó'ñOñ¯1ëå3b[)G½<Ø5}{RÛ³CIäþÌ~Ž·z<"´]å1îúд # â8yá‰B¼ÀLßî z¼`5P¿ˆÿsUš |r%U=0}àE |oL52?¸Æÿvß“á‚m_¢ŠòO'^d^ïºò]•Nà€ÖW?/ ”P?ã/€ƒÈó¿ý¬Vv—J 75½\Nn¢ÊßVMäÈyL­>$ >öA ]nk1EÚ2ŽÓ²ZÝ«W+úúC"q0ƒØ¿$4Ó ÿKŒŠÉ¯±å9™~·ê¦¶öëÛÏäitp_ì(°Å¢„lg¥s²‰÷»o1­«Ç,g)»¯X‡tr]ÿ&ÛûU‹3ËA#å]’b0œ‰,koéà¥,õ3X‘!eëîá{ÅhƒMU_Tê(²)jþA0ôöàìŠ)xX©ŒïR;àúg°xIX–_^øµRàKÃå« -%G;ç/ad©ˆ­kÁ¥â& }DLÆŽÀ].—¼ô_v»Þû‘ÖÚ¼EÏõá'VqÏEÉ~HzÈOà;“Ãè„‚éZ ?yúÉùfõ2Ú`º Ðçåv¿2 ±7ÚG=Ÿø¾ª®¿4•ãÎ×Ê JoéÞÈø-*1¿ó)‘»®/•è˜/â„É)u1ãõsúβñPÿGSõxçß„ìxæÏÞϤäj¤ŸýªöñUù†Mö0Ïsõ¦5{”’ÞVÔïz~ iÐ5p½3'–è{•èaÖxš5·ùö˜ äUŠ!*·áƒ_è ‚úhjà¶«lî(”ýú»#€’Ý0K»?#˜ÖÆp:I±Úº„i¢³£“ix$fŽÝ@Þ÷™ÿøTºÊQQ¯T¨ðÖÊv’B)ÓA4R?Ø_=ŽBã1˜¢I@€¢8•%Akº^ãAðP¦OJ““#iµjüp¯(åd݈aüznG»Nœ$”#õ¸Lè·»hþVq¨páu_ŸÙ°»·øf©ü$ÏòP½EöÊô¡=û¤Ž!ÅHûÍqÄŠúñ¢Äîõj¢€5ŠÚhÛÄ.Ñöó‰pRà1w.Ýä|–LGc®¹ P{|æAØ|ð+s…AíwÜ#}v ô9_”ƒ„‚  $ÃËB|…æ[^ÝM!7Ïð­;h­æ]“õ(>D¶¦´Ðy°ÏÿwškDø)·¨n’2.Æ%K“q%@Ö ý¯JPC+êUîíK™;Yr¡ÛvàqÇQË+O•jàû^‘°“Ø”7­ŸÉkôTÛÀÖO8è ³ozl`Éæmjý›_¬»D¤à„UÛ`+¶n-ÿ—gtÞÌâí#Ü)üo«âài çùÓŽþÛJ2MâÉuž?nŒÚzs‘­Æy×ÒyÖdý£˜qoá£K:9p4]D&MG6 «Q)ÂÃA(ô¿í1Òíy€fæÙª{6í*«·Ó’ˆÒ•VwAŽÈÕ)\™„Û»ÒÉ•yâÊ„7OªQô•ÖÚ‡(Ë+55Ú±|Ñã{žîNHR4ÙÃ[þ¬ìÿ|óô®µSÿÂ8Ïß»=#Ô˜Þ$Ü]åÚÖCTL/gf_8’=+}f³º~âX^´³n]¡:@x_>b›±2ø‰^foìO×Ícü”ÇwnOhi[Â…æ°<0í€ÎÞm|¬Lu [¯ üë5ÊÈO(4›j¹Ã©n”“$›«wÉ$šz±¶V‰þ_¡n‹sîç§þ¾?è5ІCZüEñ?®÷âoBŸ“—h¢ÆLÉô“÷ÅBçá©Ó†ò-3v~£Ï Õ2°zë8:—ðrS?½Œ§ð#v°×BŒ6ÙvÊËš†/„¦¨•ØõLÆ*Eu&‚6)C„®î æ6Ôp)ÌeüO×âu£½;OŠ9ý«u£fNæ¯ hÝ΃ŸÓ@÷3r°hjæu)®è“œ&À E â¸Ãϧ.ºi¥ùÿ#bšÚ,,”AÒõ*päz?`))¥®6R{ò^ú0èÐw6ÃF4•Ê®-•NÚŸæͽÑm)Í£~yŠP‡ãÀïÄy™Zïû¢ÎǵïÅ«9VP›3Y«©V&¿èv$Èrµ ­£~kôíGF»ÚÛB±ìàÍ'øÊÛ¢5!™¬œá6Q+æäþií4þmºvÿfŸÉÐËa«ƒ*ÌN6X—9ÈîTj¿‹«„Ž òŸ'µbfJÙ&ŽÚë.ÛGžÕXÌ–ö­0»)7«È[?ïg£é´îÀ¹Æì]Ê6תŸÅo„â”Å‘ÿ> –”{éfØúSN7¶7?AdÉz2¸”ñ`õú–‰¹Fs•c͸…ÅØü“hœÒÓ!z¹ÂT0•¯Q¹ÂLy’ö;™Iäp¸\AáÐZˆá©¾1ã08˜²ÎB 1¨…GÔ.û9ô ……b§VaTDVôfV…Ýÿuû@0µ1A[á„Ã=Ö‘ï)4ûJã[ÏŽ5Ê\¯O®ÕŸ(¼Α3p­"V ^Ì©¨ßÊŠ¸Õ„Vî7?'÷?$w§‹SÙý$t°KÓ‡Ÿ¦Ùµ ¿æèÒ§iä;ÿ»Eà^ÍÇ0å ‡…àqU“|êD‘r’ªÇyÜëô„û_läC2¿‚YY‚2.)Éç§’\‰Dv7p’ªLºáeþ¾kï]sÀDTÃ¥ö ¾W?‘†o‚  ª¿pEOm¨7š ú®EÈÒpœ)„&«ÿÀOeÒ Á´ôüÚëÜÆàx.Tt{·¦æ›IFxcí16è<é—žŽØC¼–žƒ*…‹¤àüÅ*øád&NªÁ3Õ×±K?¶í©1ßy›HÌ~ò]_ø êEÛ§äØ6F°jÜáä .‚ñJ[ÙSŒ@DI‡so~^üÆhân +ûñ!ÑÕ¨ìDÈu¸Ï gdEÓäOΕÏ5¼_WèGŒ l¯ÿ’ÙŠJô«&þjÚÀ}“‚#tÎì=H]ÄàÍ…7!Gùúy‹ÿ®l†`å­_æéîvDñ½ðJ`0_Òì¢ yS‡Æ^¼Pô|Ò [³Ž%§¯´ÖÉ>%¼´ÀíÞ'ü[§3_6¶¿Úfi÷‹ãV½/áXëòw4R ;%"wèçÉ©ÂøIò^U¶P@0/n—ƒg+»#F0ä¶:fœ‹Õ÷šw]ÖbxÂsêÉìšîn‹[ôo<ÉÕ夦†mòÕÛRfJèÖ˜ÇÊpM¯5f«ª_íB£‰}b],åF¤Ç‚²’ó6-zôS7vÚ_­J`üÛNò—7ùns|Žíw!e8¿ä˜c3ˆWK ï_—ŽÇâaùå#L_óN2 ôõ‹Åù{iî b‚Ñÿ/Ûö*½Ø‘e8œµP’äù.¥:ۛ §Q± 8Ùî Zãíi@ [(`Ðy‡ï°n$1WFDµwõ·tëùGøL}þÊ?²ƒ Qå!ËÞÜÐg©i×Û¨;4ûDûTÍ£7þ ¼ SUÅ0 á'Qÿ/_×þÿ¾¨—áAÃV"ŒŒÿ›Š²TÞ&wïÑW¤¨XJä'&i€ú¶µ;¼'޾Ó쮺ÐÙ t/êë) T±Š,IÉ)uz®2‰#X™Ù}dÛ)¦#¿çÇ1­ÏÞ²êzÑ^™×‹‰m,™Á»€€‘“ÍQ—©©xZP< CÅE©GCÈ„ÙÒÒÑßñ{Ž‘õP¯ RßššBµžBÎ —ÙÛòE“ ë‚Zå‡4…6e!‡lÚ?³äøâQaON“ÿ†lmu~n(“ ´,ûDdPœ8•¯ ³ƒ×³Ä·”?R¨4.©¸9ÙÍùqQÙnôhCIº¯,¾fA½ÈÅ–_=©ò=Muþéð4Ó3¸ N…Ñ1í}*LìëÕ"ë…ÓžX¶“8OÜU4u‡ ‡0‘Ÿ‡+þbMÁñÞ¤q¾éGrøÛã°JT †PÊêú¬ otç “kùÇQ+1‡} #OÀP9t¼êŒ.pÕûÊð6Ø}ˆ $b˜ª¨Ð°j[8F†Œ¤…ú|]:]ɯ@c€ ?ܵ¿A7Èã*¯èú@¥P8æÕ>טRò³vŸ}¤®Y~þù>eQDÔU’>XËdAÑBfcý¤ä¨*¸‡!p—Äjb_ùwÄï‹Åp>“«vTô3ÍAJ–¢æ0V¥r¹Ñ«.|àc¿Õ(ŸaÉØ\6£ÿ™,zÎñˆ½juÞ¶âcLü¾cÔÁ;zŸŒû…GìCƒ7”ð«^Zå« ³:¾Á2\¼–8pdE"fù+qü“Q™³Oééw®ªâè®vº?S»ªÓôx¬VsÅ·5¸“ xŸÛ¡Ø5gPh.cNºŽlaØý§˜¹êϰü†Uøž|qgSuÙ+¹´ÍžL!§=^ÿ'8­À‰—Á’–©R)',6œÔ8ÛºñSõhÑŠ£!`wµb؈[ü22j…DðK·âsm\£03Ö†»*QN…^vÖü ™·éLêyù7ùÛcÿîY}î5Ä›^õ²ÇÜj)†_ö¨ëß?kY,Ìá0ž&%­ÆÇPþ}°À¤WÄx}< §{|§rôÂ[Ô›¬Tõ¡ÍÀFìK¸ƒ¼*v¸¬º‹gYvï9¾:&Ö+„¯|«ý_ R!hÏ";[³ †CŠÁ |¹@EBßÑW¡ü[dÖí‚ù7ˆ®ÜüC\ìòÊטæ5ƒOÆëá,ÿ˜†P縣í~SM‡|5H†øç«%ì†)DYà!£g }ä.à€ ‚š†Ö31Æ>DÉfïQ¨£U”!ñeÄ1Ið+ÝACòW§-m^Ϲ¯‰–’¥ÎFÝ¡»ÓÚÞzRÉÜ~:Ý­¹íAǰN•|{÷c=ÚSdó8ÙóðÅÈÂÕÔ\@€ÆÑÈ+ªÚµÀAõÞ^Ü ÿdºï<+%ýŸNŸ0zîîÜõ»çT38‹»Fã´ÜÂørR¥ï”C™Z¼æ´H/HØä¯¶oi UüùJòÞƒï^‚\ ²5tä–þË೜wÀ]Tý¿¤÷V©l€më]Eb ­Á³/ ŠAì¿WÑoÕ lõf04dYô5ôwyô”Ó²l颩žÁ²Ö§š${Ù5ŸXX#M÷síËhKÆ>ª¶b™èðfbnì›l³}Vf‡:÷Áë ßÅÇp4áuƒ/Öó倃.¯V§,àŠŠ›³ŠúÑÿBþo€élw×óaZ5ÈMD-´\á¤JUY‘4N¥TÛ d^–Ú›~;efŠ¥¹SBq%‰»:DÙñÖ—ÈÒº ¤/ä'ëò ^[£¡~ žÿ¤fû±‚;Y©P±7*è8ÚÉ“ íã(>ͪûe ¶•¯Í£S(‚+ Û6w}<¡°ì:¦?‹[`-þ-3¢U‡±µX×ÿð¥þ§ÀÂÚš­?»„ïYúɇ°´ # ª~÷ÐÇvçÁÃîüž¡±ÂHq¡™½Á¥3¡ÇWœVE™–¤,I‹UÚ:‡ã²]ÇÖ\²O`ÿtÚë…›fSAŽ7I±ˆ›ê§spÚ,äí8Í\ãff§òõƒ™Ú£?ÞÔ×DþˆÏpŠÐ8°t«®úg˜öÔ¬ÄUËÃÌ € ø'¬–Ü(ñ(®±³ìuú¦÷Ð{³%N¤YîMÇv:ø‘@F´)…-ÆnB{òð€F®WP$0)A×f„ Yý~«ç¦\U|‡ôºcJ¿êŒúû±ÿ\Pæ ;éh{:b6ÐkWÝ?‡ ™æ½èv°¾ãÂÑ-ÏŽãæ)ßôV‘ø(¦Î(L¿orÐK1f®TÊ̪°Êä núÜÎ%Íî§.ïA…D ¿è'Ù ph UÊÄäi#*œ u ŸÂKVƯ'Ë\úVJ =`Ï—½&[”‹r­îpK…ÞÝ{DÛ5ÒgVWiNóš “j̆ijÜЀ"«µH¤Q‚æ/™‰ÚF¼Ò—­6ç}ÉÏo,§CÉD©Ñ#îN= ‡ã—[¨pfRQ5‰©Øà¢Hµ¥xözû~²'ï¹’þs`QÜÛº˜"׫Ÿ:»\}…ÑäÊ,}jArc‰ëøúy¸‰uXŸK êG¶ŸâØË”2ýŸÒRû #òS¨ß.„ˆÿà@r3šÁDõÁ¿À #ÈRÛ‰‡Ýå£òÀ!¨‘_Ñ·còÆwTò»n¤–ØPn\äK£±Ð^Nm/yM+¶ §¸îη-/àuä„¢ÖéÁ'¸Î¶¿ Ššƒ–ÌwJõí+{ÞØ£±¡É„ U:ÍŽÎØ™›’8‹ˆ¿úâÐÏÓ,EϤ#7KP?ÆÀ×Ô󺡬}‡`f‹×ÜL½‘…¢—\‹ Ä„}‰ºY«MÇ\ÙFˆÄêzÄxÛ-{6þ±Æ6—P‚”0ë‡Ñad~à96*ÏïÐ`*±NoÇôßip¡ÍóóÄ€€½€*‘êœ%áK/ZáKŸH)‹.KÆ!YeÇÔ²­A•ü㌜m.Jpýô$Ûñ-#+[Þ–šûØ—¶¹¶Ýö2Ý-½j$<¨¿Z*Ñ;âwsX¸±‹5ïnV±Ö!ÙU¤_ÆmâNªŸ3{k¥>—u|CÒPºWZìC´XÉç?àÊt¹eì¤!¤=ÁýTHf8ºXŸ“úÈ“þ§p Â%wáÎ÷ÞW‰5^«RY<`Ðåx´þ‚¢[³AŠþǘRðP]û—áÑ9dN‰Nµ1UãBö4ó˜šæWŠ‘]DÖ¿²È€ë$¥kßâ_ÌŠo’»üñ¿×Xí Ó±FÅ"ÄÅR!@ò‰µú’`¹úŽôS±×#¢^•ŒzîêRziå´„‹•K¶yIR4¬ûŒ–ef†Ô’Ý0H~çx|ÇsF'ÉÎÃÊ^ŸüùŠ'ò¥ñ,™?‘kr¥ÇsÙ tÒc†Ç©ç÷Ýádm!q¡¦/“‹èOHöœÀÌ#vÆ3 M`˜2è_ã)üOÀ@Õ)ñd¢&'zhSÕa.^)3%}'r¤ôXqÄÁÍ¡x#Ù\Ç]äDŒL3,"Ä]Híž!›MzQ_ÑvBTÙôè5ɽŽR=Ø%­Fq“3}´*í±îÉÆâl–PëdÙik»GÒñ-)“?G€óg×ç¿þçzNXˆ9¬Ÿ ™,)7Üû)†ò¿l^-ïÎØ5¨Aij8 ZœOùÈ']Ü ´÷ëgÂÅ9ù>îX@ƺgJŸ9:.މè6ϸ>šÂ¤·sE¦÷®Zæ—÷¢Æ,ñfª“Ã4l®‰ËnÉSÝî‹ù/gá>lˆ)rd£tú Èlйõ¹`¿Ž®F¨ç†7¬ÊŠUr‚µF]àHVx$I÷(3#G˜>þ‡í[7ÞzY‡D±š3WŸšDžËÒÝÃúÔ ­–‡o”â‹ 7]‹ßæ}€Ä7 À9VÓiþÜÚ“ß0Ûi݈79‚¼@žÀÏ”ÀZxtÝ þ™Êë‡û‡Å˜Ðމ䂿£ Øoo3DñFñYýîré{‰â Oaû{æB7ÍòÛ¬HsñãB} >Ç«Ää´8?_¶3—Z`Ôˆ;·—B)ô„Äèea7]è p[Ÿ€Ô†Û+¹àYFä`V«?;|E%E ÛÁ¢‚,Ê©»íîQË”eÊ#lê>Rj^WsQélÎ|L ® Ð82‹áŠè¸`buq6ì—@Ø:—ü¹')=;­uÕr—3µ3ë™ùíá¾/ÿìþ_«Ûü¾—YÍ-ÍÃCƒÊ‚ÖÀ)Íà”Ï?,)d* «rƒ›(­#ã”_<´ ³ê–7ô¿|©9–Í~[Í÷ˆ] =gqø@C./œ£J¥•wŒÉb7IZZkÐH_I´ET}hqâ‘æ¼!+ð)(ŠlftPJ@“4äp$Ž!v¦A‚pŠŸQ}!5"þŸð´_€WvšúcBjõc¥†Ó :еuù_ÿÄ”ñׂ€p@”ã.²çäw®Ë{»úœ%]µ‡PäM†x -ªÃsÙ‚ö=;ØMãO[¥¡b°tæài0 „Ÿ‚ûÅ{¨€3ê5ˆ-ÞÀ¦¹ª>÷%*¦tò57dê‹*ë ‘ÿ<>½Uݯ#^©h- þñ‹ŽåÜyø0HÄ“ë¨&MV°œr®ãv࢓¿f¹ùSέ83ËGUtpúH¾°xûNú2pŽ>®œorDPh°y×Ä8}.?™-ÝÓz˜®’~Í+h•#¸EgîþØ.â ëù–$ßëI³·ð«r–}k·nS (SVĬ‹zÜü”›GBF3fA†X6¨¹µ쾀—N‰ò~kWðó%¦iyÂ{ž.©@ݬ¡¸Ø5FS36>ßttOfèD³Cï5nÐ’Áz–¡u6'tPJ [6\” Ï5*š¹­ñ'yÈpâóA®Ë ¼ Áj«S’R”¹?.b¨ ¯Ý ¸ÊðOx¹"° ÝA ÒЀ°“ ¥ŠÔ;@@$õÙÎ(*gÄ<¤|<ü/Õ%'à~ QFc¬í¢þ¥—h€=X^Ø›ó íèÂnÑ¿í¡”óßßôº¸üÂMàGa3¹eɈahÔãö|[Òdbì˜8çÉñßBø¦÷çàìdç"ÌêîwÛ9õ.8¬)=›ÿdr2 ïQþ3‚Ò¬àgzðÑhÊS¸V/Ý·öþŒTÏÓ#†ÒÄ}Š“¶£äÒîú:7›Dà 6=PÖN¸IýZbôñD a¯ùóN)‘ ו¨šË«¨Ê`Ant ºmño€Z¬T¦!ƒ·úÕ íÔ` h!zÉm7Øx"ŒñÂóêT9 ÍÌ4ÉçK$é!®Qç}g÷(%rÁÐÿÖÂÀχ:¥˜P6õµß´y*óÅ~S6¸ëb”€ XNœÿ«47 /)AÜ› 3 M.§×`ìTœ<©@úlNpq"ð¸·É{$w|¢e½`D'W†>`[A^dû”Þ l¶-w Û}Ñ(Á^ÁÖ¨ªÍ$/YY» U7joLh½J‰×eÅÜÆ¿o¥'-¬|w1*¯T¨ŠÙI<¶‰† õW%˜_ÓÈVÀϘÏiÝÀÃÏŸ¨c[~"™Üvb`5%ïqõ\¨fd:¢dB£{ÞÇ ]áöÑö¹™†q5ä- *x¯ .ùÇÂ&ã7º‚RhG6Yîë=‘‹[?ÒÙÂo×ýú˜ÑX„ÔÊd`N*Òþt0-ánˆl¿ÄÉ!–PN[ƒÏüö6Ë};ò?*ñm*·ìä–’kBm¿lS~ÙUi2ÏŽiû­¨élãjØróf üT¾£ ü×{)ÌÕ¼. (à æÒRX·Ç£šv.}Gu_w¯ÐJàåôõ{÷G‘"·7ÏÓNô>CÔ`ái6N‹b4ì}ømða (Ô^>›‘Ð%âÛ²,ßd&"ñçSe—LnÂ×Î6–sŽo¨p4$ HýþïqH{0KéXWìþU±ô¿OV=82øÏ 'Aã I¥áïzò÷Y[°x£FØÅf^ …g~u^iùÙƒ¿ÖMê|ìAú¹X<ùH´þЋc.&«ÀÞO4ç…ç¨!ý§‰®ëð¬ß«ñ˜we"JüT©¿…)ûë{’|õI7^#£ìoþ÷?o~8ß×UðvèÈ1'„$37|n¥ ÷ÝÝŽg—¦B¥å]ˆ¦BGDíÚAüŸÞ&ÍÑ Å4}Á<ÎÞJF{?#ö·þœ3çi~EV»3}\ñäw+ùTèÍ÷Ó—™–ÃFKVã<}Ùî¯_¹vÙåæh[ Pâ…ŽrþÝ;?Éðp9ÐA™QÌõ¦×Á*–4/‚c@™LIS;`^Ïÿ§Wþ÷ì$ÕbɈÛ1}y’óenǯ„b­ ç)c*y*X¢Ú¦"[ñ¬ù2Ž_Ù!YzoÛw„Ðö6¦Õ³ž¬E-dÖ稊ËOHuT+›¤k2h½ÿß/ µ‚c&ü9–¼ÊR{ÁÊçÇ?è7SÑñùcÖu}T¬ËP¬v¿mþ÷U‹}7ùè/Æ.åE¸IÁr—ƒÔ>p°'WûbƒÛ“ø,ýþÑŽ~}„$e-Ë'=´BÖ€à³w‹2iׂ§û=16¾€ÖB»žDÆ¿›j@k.‹Eø DzB¤Rë¯Ivåç»fT­A,¦Ì7`(z›õÔ$Þñ[°ñsâVDjv9c«Ó[Œ/ÚÚ|Ú„\ʼeÁ.‰LÖÜš”u©q®É¤µä}ðzYmƒû¾ÌôΩ\XirBÁ•qÕÐ+ÈöÄ/ R]ÒØ€'¾û±¡Výýݽ²yÈö¢"*Fm²1|•‰EõŽÝ¥¿ƒáEbGéEß\Õ¸¦L4ú›m}{ †HùÝ6xfų²RƒNP­Òû^`9׎µb ï&иDø ç `)ðƒT⾓ó©¥Ö- î=þÜ*™L/s •PTÐ'Ÿ°’=Þ¿¥ pÇDJÄ!v{'˜Yµ%'ÔƒÍì*®S«‹I™Ë½^×¹Z¶s›rÃ\>>>ûǧÿ›Â>oïû>(¬žq Ç>á¨$ÆkŒ2Lýk"uúÛƒ¢!ZÐH™1 YÛB«ý{,KFÎ݈$3NcÆ×}e`ŒQ>=h)¼šÌ[1PõÂÂë’È“ÀölüP|Ê  }ûþf޵’Ô‚; 8\h¾Û5LsÌxŽûä~ñÛÏ/‚c›SβÚÂ8º½î±ì­ÓŒ¾ù×”Ô\®©\÷$kýEqLÑå`Ñ –ÚßÙ¡íˆLo›ë#¢/¥1ja))°÷¢êmÀ¸A9’°—Á@·Œ&ð‚„ LÈH Šá$=×N"Ýk½6©Â¶63¥ü<*˜v5Tö²>EOgX¦ÆÀ?cºÀq@ïMBš›Á‹Ðv?VÄšÛîã»÷º/{ôÑÖÌ1|q|º/*•#7lÅ{I”q/>»1ˆîo«àý!¦ œ¥<ë½ycV Aö›… „ô$ö ü£÷ðœë²u¼V<ï @Áåo¡©Óž6 OrùÚµ¿¼{î‚3+ñ@ÖFTf( óys$L\ýqGÛâ²§ÇÜ4ž#cJݽ9ûê.äAUè[`ABå‹›R|s/ÝËPÅÀ@Kh'×A`–´»é²àOt´,·ª/¯áøÏäÉ5 ºŽA™_"ä%ÎgÙ™u¹êýD®F›VÒÙ÷8ÖÈ$^¾Ž©7zm¼¯¬ûyIGY±ûÔš»¹£êXâ jeFèœÅ“`–ê¹0¯ãxÉ›ÚpÂ?ˆRavµ¿\HªÌ Fð UäÑŸý™åß%•Ì‹tŸíÙ^Á]ô÷AÕ¨_£ò(Bw1.Ř ܌܌c(w}?®l‡í Ø\…†ÒGô<¤úió¹>|½ü!ñ`j‘«ç½¶y)µu ‘³¡B½\ãˆ'yÉ>$–°±Â7ïEhß3úÇL7õí3eSa‘8'¸ÚŸ9 cÚ”€RýÓ=³øª5@˜«E:ˆñE²W.ùß]Ϲ»¾É#šR_ƒRÓV(•àqîCý¯D$@,lÚ²_‡uŒU¹zlÓeóô3mR(숢Jã„/B43òè|Û“á¾.„›ÌØ M*Ð4bôV˜yjË|›ø´éOg©^$&!]æU„[ê1í¼¶õ `QÿO¤6ßûß©]j)ÁlÝE’“m%w‹»À½ŸzrýÎb3÷ŒC”êögöüS&ø ÚéCå“NEÑ瞢v»ºPU¶“‡/P|>E¢§Z.nØ‚Ñi¥šß³¼ïÔý§¦TšƒÜèú9›(I8#rªýnuÉŸÃ9õ%0»Þ23ô wUÜß;¡ÖÀüÁ΢wTÁNro#Kµ¨GÄH–>=#Fÿ]Þq᪺úk¨¤†´ÄÄßAa ”¾}ÉNkÀ—‹Ý¥¼âJðÈêüÆ“š+å"ùY¶þG8v_Ò„Ò_ˆ”ÚÕÇP†Õ}õ½†óäz/óµ˜0ÕYÀÄÀùM‚އõﲎÈ'©6ÒezBÒ’2îÍQÍa`Ý$ø:képÏ/|”ý4FT²rˆŠuY²µý¼ƒgèêM¯‚‘Ó)Zu“¼—»Fëwh†Mhíµ+v¶öäcÜO­›Ôõ‹ErÇ@úiØîÁw^d·kWµÅ«µ“å\•ÌÍõ…€‚ x b×ÒÄ ÝëŽ-‡ á§d]Nå­óhy"ÀÒõÔÚˆM`”`tß{ÄfO4Nï+™x! —¿ÌBWˆ”ãÖùB¨æ¬ܸ?›€(µÞËÔ•9 ˜FzÓ!oåöEH²¡Y£÷ÇßÑ®„Ý&«˜u$·WkKý¼ó9F$cÐì0è²¾å\²ÑèÀ€í$á:OQv¥õ] ²µ[VŽSNèµQ[¿ ‚ËØ2™Ñèê“|q>R¢e!ároWctG1ÌÃí–OVC0¹EPÞý´ÐË‚H0ÿIì¾;ºâ=oÐS„ùO6å‡&Ò¶èb•\k¿œìÄNÓû©(ôá—Òiùýá Sx5 g‹˜ï!œñ¼í„¸D},÷èfê7zì¶>Pò£€›ÃÜUýiÉñèðw䕽@¼÷½¬¥wY¯%ýE×¢RÑšUdDò–ypÑ%}B|Ì å=#O·CÃéË'óùSƒÖ("­C)¾1óõTçÐoU½÷7AèYÍP@A,¿Šû¾ëèÕŽó²ÑíÔìBbõOˆQã¥Gä¥OÊHÝÇ@€Ÿfݔܬ,‡ÚFì“=m—¹úãHû.MA8™¶´gr‹‡bÓÎÛ_"À¬æ_Ã>‡¶Ñ¢Æq5äRüº«¢i±ü¾‡žóv_™NâÙ“%¼¡:«;ù‹ÈjœûG4‡rµm×I˜¾=,(ß޾ ä¬&¤q™^¿n»¯8ßÁ¾Ž?‰˜oׯÑ%¸“y£rGUÝ©¼Û\ã¨(¾¼“E·à“·ªÜ<\§uRÊÇÝÏ©ˆœù&õ{„þ¼`ã±/÷BÒ[>3ŸËÔ§|øñbLÂuä6jÝüñÛ¯‡lo›Å¶Ã@Åcø»åµ¡4ùæêÈöL¦c$ŽÄ6³v*>âÚ|­zoØ-Ná6¬[û’¦ÛSj7Þ~ ·!Å]ò}ÆÜé7ÓýšFþZ(¥² ž Ó_Îç/åKOp¤tŽ^(€S âÔ»©ã›N£ëœ¥,•lTâ‹BKÙ¼Í×Ô$šrµ``N-«¸tàÓ¢ïŒºïø¾¯¾þù«P¯ÁÑd—\÷¸›¹Nî¼ªèÆ€`b×;úYH%©œ«+oÓô±Îl:ÌxCp%2:’4Õë!'è3DŸ-Ií”`|í8W.Æáˆ¤t Y GôÿEk±üqÐ-&‚hÖ¢‚ó¦äŸz3„“•ƒLÒ„ßédº¿Áâ9½ð¨¥‘üK°q!y4Xco3=ؤ- óœ!Ò0QîrZUFG±¤b°(`äÿ4Ò8؇)R¸.ùéÿî,ç3eêüÙ²æK7­#XahŒ› YÅLcÆòÉjÇs¶ÞIºGî‚l ƒ‡ºÖÅ˹ҬÑ窶÷üW‰í”0CìbËúAèX¿”YC&_8kwÂ`ªµØkDfɉ-c¤Þ¬¬Ë8I» Y՚蓘U—äW¨íÇ+“‘H·º5|Ö¯„ï#’nLòØ®’g*»Æg›Û¼©óÙ,[½o{ÿÔ«‹ó‡fŽÛöNkùaJØ$NÓ Ü\G¡d=?ðè fñ™ç€›ù‡¬ép³ˆüÐW§4‚Fõù¹G6áŒóê®Ò0_”»«¶<·¿Êc‹6È`÷ØÞˆ;8pâ‰ÝxžU¨ô=iIûWÙGq ÆZù;Åä«9 /ˆÍËÊ]— ë! ÜÖ„²‹ˆ/Ç é|¾oH`bä¦>iI–QŽ:sÄd&Z¯nÔ’ozkšžÌ¢L‹à€¹Õ‚fº]d€í° $ÖÈñ©'°I$¦œËwß›¾ô‡ûåX%–¥•ù¥ºƒW"¦þíÂ¥ÚB£Á2Y0 ”Ñ{ahˆ`»ÁOâÄ¿$”ï"¸žËyD¥Eßžæ±·4€ýgo¢‚P'M'BCÆwÛÓ¾¿«ÑOµì•'ã¯6ꢦRÝÛm‘r­eANàŒßo¯=Xþãt¨8=Kt‘'fm˜$OÔ0ö:.¯´›&’£1첆›µëà&*Ûþ©pÜiæ4;ÿ0ƒpãç–õ}Aæ/IŽ ‘ÆÎß¡cÅ`ºá冻‘j±Ó‡kÒpæ‹q‚ÝŒ§rHÅ—›ÛFÜ&–ËÿhÍ .?rßWéEçlSICþMó•OßLÍ`}îë°1ôŸ—§Üf·Ó¾úi`×´n?Ïyë E[2Ý`ÿšž–L˵âY}þ !þ¬S\llßÓ—-õÒ€Eq=,^§Éþñõßò°rí:®cǨ¬,4³@>ÜìÁ­Ö2˜[2+<tgÍ©— Ç¥]‹?£ÖHÙÁ ¿* $S\©Þm3K{tY×@ÂD,è¬rªâ¸Yíî*´;@ÿwþnÞXi`î·‘Ë<"QXßÙÎ %ÝmèfjÔPýa,Z.1ƒ<‰ç€ã‡"$-ü)˜3нf¾áÁ-ÔŽÊìÇ»hI‚œõ˜?o³9}³5E±VaÖ)Ó¥Î×Nxk½¥%Äý;ß<( ( èEâ’‹PÃOv$² -Øn›©Ëi>{-ð7FÓsLO…½Yøì¦83¹}¡à–Ø«Ž%LÚh‡^ÐR nú/š ãtÞOŽ­¾ fßjî·¤m'î¥9þŒÜdÁ<ÚÊ{ž¾ôîã[~½–ùL­C8yë]¤èªS[*<ÞQ/oyñ4]Ɔþ?4Ìá¦rH[ŸÐ¯”‘tJOŒæ¥1þ®²OnHš¢|WõRç÷©÷ÌÅÂpi˜ã—Ò®t&ÿ×’òÇSºƒføÀBШ÷3ÓÝêÌ^¯”îÒ³Óµ¿ZÜQH:û”zšg( ûEƒ{O«ÔLÎ3Ï$Ëw¬šÇòšÑX4Ã:Ûڥߞ®bešÙä¢V÷÷>£ßE¯ÜY-Ç+@¢"ôï+Ø>Œ×rÇ蔥è©xOKPýÓ^Êï8[9Ýí±®cßàPµhî8…ÐEcq£­|#o?Ëî,¼D~&J«%¥ÃFÈW¤‡kŦîvŒïÞìÇÔžÛc˜ì%ÌEX¤ÙÁJ²(/vÉÃ%-ƒ‡ ¡ÂîN^ `àèkÅxôÏzÌ˜Ñ ²ƒ¶`tàõŸ*¼˜Ã£ä]£8oŸ*ý1»ÍÂBŠB¾RG2ÎÕõ¯ù©ç4Q}H`uÙ >(_È«€TίOc³xÔJ3¸uß·dMrhÞè±–£ÿ =ã?)9êã ‹]û;ø·P¹Fñ4HËÛ7 ÞPf$Ÿ‡hevÍiV:€î¡´õÃpÁ—Z&e>RA.tÊ|lGÿY‡üõ"¤­/Öy!NyfB¢õÚ+•½ YØ·)Çd/wBØrü~¶X‡~¦öeÅ¥±Ö7±Lb· Ö‰d–úBÞ¹{\„Y¯‡¢}~l•æi¡"É FE¯Ïu–ÅݜÛ¨Ér”•Ú9º(Z T²-Ų…Å—Ü㸲—̦°Ô.H=þ›‡Ö1–¹ïåµ2Û)?ã:Q¢oÊÒ4jT+œ«H®X59uêL43ñœßŠÑê‚×6ïß$ Ö?ëÎn˜%gCŸ‡ÚŸ­¶™ÝñWtº9oB^¼Ö£2ÇQd+¹–'ê3ú¤ê]?¯üÙ4ÿ”NtUCíleRn“gZu8ó±y»‘y•pª³!jÒËøÉìß¼o»6ÅäÕ#Jé7Íhÿ$®óÆ;N8±5|Î$ì^7¥)Gž#}aÁi>ÏsŸéø‰¡S:%V\¢ïóTàfqÙP¼\S»þ0/5‘Œ*»+24áB}Ó²üœ}Õ¡¥§ž(2ñOð!?Æ—†æ\»©âXW@ͰÈ-+OåS…(³{”Øð_2üVµ-2L•Æu-£)Ø–·övÅrºæžg§ÿÃ#L榼و Ï45ê°ñz»\¸dqÀŽ'ÕX=Õ‘1 ²e9žûxÓÞ äfA¶«ý.–ÅâzÜe{µ^¸ÑäFve¯døÇácSþ2.²!UMàq-/ò(^£”—(if5_“Ѱ ;-’ÿylW·£[²Œàô}•Ó“Ýûÿ¨ÞÑÌ |'ORŠ8tÓÇÆý# 3— ´£var×eµY°ŸÚ¨ïåÝ‘ÄZ¾‘!n:äkí´Í|ÎÛÉ4Ñyˆ·…Çõ^øaŒ}¨…ptÁÒ+‡NÑaŠS•l<¡2dð‰5˜ÛSNŸ½Ó]m%ãECtäHêtFÊÎ!Ð}FG5Œõˆ…¹ÞþâAXò1yE;<0Öt£Ê5ºã_ó@ÀT¤{+2õb™Gïg™}¼Db„%×i=ÈAñç¤ß/ÎYˆ“¬TÚñÕíÛ–Ñ€>ÁÒ‰]áÀéFƉ ë(‘ í&ôaŒ¼]Ñ1hï0Í}˜M§ßO¡²¢·¨L”"õ zAÔõòmq¨+¨Û€8ûJº5s2ü¾Ãh®«5¹L>Øamí¤Žó¬½¨²ÄÎ:Û‹Àlä)~ÈceZvž½0—Žï@»–PŽOÄä§÷Y\ˆžÍÍ©XïÑéó/|nl¥RÄ+£ ±XÝ=cºÛp¥_Àê¶]j6£“jÊ­ú‹%£;—Ì2B¼ù¼ÎR÷Ÿ‘UïdDá§~Ñ´1í ñÆ/æ d#ïYSìÚunQøµ÷ß*Iž†L’‘¿j5ÓSˆ=Žo rá’`ÈÑZDݤŌn™‰ ô½¯ïO ŸaL½mC­Ètxºþqy¡~~ù#e[y@”9Ç_-pMëÀ,{Ÿ)Üõ+ØÉ­á” qWr†~n™ É¥¦@üLÏR™ÙÁV®½aÍñ¥ÈGÚ2óçgcz†Púçî®Ùôá£ùÿtÐLPµ¶Ît Ʊ2ÛP ö}V{.Ê·šÐ|®„5IwŸ-g:½Šiû¸“t6âÜ$ƒ"„*÷¤¶Ok³®JÒ–Úh;SM·­@|h#+ÌáoõZ³%’qn¶ù,mµ ÆñÏ•~0ïæ½¡¶V–Ë{i-S*fGpÛšgû°°iÐûÆÌ™3‚Â÷©‘åpิ\áª)A‘7YQ>_ä俚›¶%»”3Í"sDåšÏe®«$–Ë!à@G"Eê’¦§Ó¥t,…$=Ö»ˆëLŠ¿^Û_W¶nÿ¾!b 𶸰ñ¯g +uÚMè'ÿW#T{M1g¬XQ"ØTáQ%q…ëÜœÓu‘>wXÒ`ïá~÷ò·ª¼ƒÂÛ z¦Ñ`íBJóUÒX¦•ƒ 'GxšŠãë­HwÄ>,ùæ‹ó bô£´®ÏçXk¶ç.÷¦zéÅMAÕ'”ÿ°eXÆÇ.ñÔ˜¥ £ØkÜ.ƒ¥×­Ë‡ ñÛÒ¯3 2U› žûì·¹=aß>Iú¶±*6KMšö¯:P·¼àŽh˜vYÒÏßc†2þ›Ÿ~¬º s‹×вvm·ë áIU¯%@».³¥ _vî–£žJRsü9F9¸D ±›†ŸÄ1_ÙšÈ ” W2ÀK?„˜mv®*õ„MP†%WyÑ®ÂBü¬;ÞÝ ®¬`ê`õÞFmÏ”Àö=‘§ægE|ãio>Ýç|±LæFº½Äoè¯ðÂåÖ–æ9ÿ"v;”G$Dïjxr”Sœ©è÷™kñ¹5—Æ»S=æÊäïv¿¾Íá‰èK°]KB\» 6$o\Ëé PMYsÚH¹Ä+¢Ë.QPôê ÿì²¾ÐÜÝ5²ð7 J×ý;næÝ›‡*„È`rqw\)EMêtçñÝØ&çJ·&ògª×õ[; ªiÆÒïd¾¹ígEŠøjÀÔ{1Õ9•5ƒHø•ƒöiÊ`æ@ŠóàmNYž|¢ÏuGŸæeÒq&ä1Äjf˜` Bá^ó•ÛºÅéf¹Ýª­¶ ‰¶ìfl7ÓœÃy©Óˆ¯Ä2²E[”¿L«ÿ­õC:ƒµY¦v%ÕäAf”À4íý ôÿ5ì ¨ó¿ åa†{¦!á®…´“±âGdQ€RVË>kK®q¸Å›õÔ%á§ß»·öÓmþ¿ÄŸ.'¸Bªëåê,žä›™v#9Ìî¹ÑÇÞß™,YOúë¤øm%Θó“îÙKø{µ?…Xõ*3¬¹=!Í:ù?<ïÑÄyÁt¬´„^F'«ñOÑi¤jC-ܵ£ß¢V,4üôöëØ›@9øVd ÷þg6úº+N>géJ¸#LWLR1â³K’‰ºm´v£õúvóí ÑÝÂpÎË ¬dpäÂÆ“[§T릓Ñðxë㤠ÊášÂl¥ Žy 6VĸdŠ£Î/¶–[ƒê¬ÓkËqÀÂÓNœ³9­jõ^¯69¹Z½å¢ÑQØù›[~†ÜM†ù¬nÒ…ÒÂÁÖbwSÑ‘ å:a”%,”ˆÌ°¶qhF¡ä”êƒÁ’L›æ$,–mixZ§ä(Û¸°DâÞ\lE°o•ñš vJÏ“©’/ «P¬ø.Ô]"uO^*\´Íæ!Ì%6ÞÙo­p?™> žÛíJ†!q;GªÐÿ†¥z=×Vû¼ÒMDúÀÎéø7‰òšç£x™¿ZndϳþBÊ‹FöÊñG8Íðj±¾^xN¸þZ_rÃ^YËY±a‹”½AñÌC20 Nëéµ@l]5?ßIìÁ$fˆú2ÖÎÐxæ^Ü8ùG'Ÿsi/×I#sUè(𘀖R÷‚ã§ÏS{xj¨ÏBZo~jfªÎ+cb3w¢¡§w•HÁôV®+»â‹4ú(·xÚªdÀu!Yìï+m©¤$ÿƒ=ŒHVÏÿ_bº)7б d–ªœÖ×ÇMŽÁm}Üë;F\•lËNâ/ þÈÓiõ [ ŠQYÙXD©íýB 1€U7&“S:g4ïXɉ9´µNÉx V(RïoÓ]»s W%ý?ØO3eÒq|w—¿ôµ¶0 „ÕŠ“ó#cæ•«ŠZN$:ù[wotªºî˜š)›]Vð<Çj­³UýÛK/«…ß¾]fý&Û AÛîS¿•‰q¼R²ÕÞ¯,0VT¶F•ŽCo§Úý‹t2A3‡ W³×ÿ*êñ–É˜Ž¢¹®"‹è-PßÕJÐÞºT~ÏYýÂö C?ÐðÏ™+ÕãÂб2 D©ÂÐ;•øGªÄ“ÑÔlÝ›$ø'©ÏšÉ%êµ±A-rÇ:dÁ FÙ 7›K̯ê$êĨæP'w§«÷í0èçzYÞ¨¾|]q2ž{ö¸ éùª<›Ã燑@Öf¦?ï¬ÿ‚A'ŽÙΰnùöcʽLD;âüCL@Õ”R’ÞR0ßœ­› ÿ.¤…õIjmòÓ/zùÈ–ðþFŠáŽÎWÖÙCzä·­¤4­j%൫:Îé¥Pû–A”HÕ‡½T³/ííB¼Õ>SÙLjiL‡ô½ÑÊHôËXÒ‡*°òÌH>vªŠuÌsjgãE8@á’õ†%{¨ÏSºBD¨e ]‰Z²Œ„¬§ÕϨÌÚÕ‚Š™à_ÆÚ¿(7E¨_oåôr÷¶eüªÇ‹ŸµÖà6[>¢Ï÷Á Î)ËDé7/-4¶F5aÐ×Y—Ž+Ô¿×òÚF¬$Ž˜ ô fK¸?…öÊwšW¸¾O¡›q3š7 10œ… ¿–-eÔ0ë묞ñ ~]§¹‰Ìž¢÷R P ÈH›<´ú&RT;HªåJ‘Cú4£Ö¤ SŒ­í AŒ˜†-°Fñpéé¤8²{3OkÎCÁü°S%zÏkEù冖 g"Žóc[—S’¾´ÑGJðŸò…]'?õq:…ü§îb¶„dá£ÓR«?ŒÁ›-šMD¶í攃¯Ž䘘ÿñ£¹o:úÙèX +lÁ¯×àü›ö›²—æúøËQ‰Ì'Ä<ìëçä?L+JÓpB¿ÐíàÔ4O’ó½egÝuÍŸÙ¾mgS"æšQÁC³2Ø‹½ Xü$‡N›kû>ÿœ_Z¸ç¨÷7úY S¥”o{½‰³_÷‹Ò;“­É%?*{ð›A •åý"l—pÐ"³»\[üê3t¡¨>>FGéÕZPû¡Ÿ³ž&J˜F‡mØÊ^[׉¾Ih‹3³ îÎ N¶Lµa,ÉéèâfùªÜ Ý?ùÚgŸI8´¬Ö±7äÛý÷ä~^Õ)ÿ¤ & §e”„‹ÖV킎rUnkh+nªÍØí7–r=úv˜/ã™ùNøÍ/Má~uÿKÍe?%ÿàqn¼B ¯ZÂþšÿhUñå—wKŠéfÂOƒ”0(™À˜뤿bm ˆ‡š§)ÓôÒUUè‚ÁÙ3r(|hÁ–÷}º©`ïb`U‰vÕ-µŽ˜0€Ö êÞ©êWºT£í·d?Ñ\^y1¾æ™ƒvLëQ0† ES“3ïü&·Ìs&¿:C’Ñ•z?ÒQR^Çî/=’srÚ½CtŠi$K„[uµÖ¡Ëc/ÌF~Ã_àC(ð›yh7¥6ýÀ|уdàQ{Pgh[“5–ÉmŸÄYŠÙw͸kÇsB’l›îýoÕê|–Yi&ŠÙƒˆù¶)uˆQ§ñ69£—M*ºáv8Ù#ÌtýÛüîÓ¢‚§‡q‰—Zƒš4—BQ´Ë“–Ï}XkÙÑWÖ_%Iç“Þ1œb)œ1¥];óÃþRY,ËÚ{i䯄šq:~-#«@km’\3øRYýÌã $òH9ÖÐ6±&•#·¦âUŠÒ*Í-zÜìOS×@¬‘À«ß3Tœ³¬}üÒ‡àºþqDÙHZMwòÐð,v« ¢·šÔd?Çu‡‹¿0 û¾<0:dê¥dw¤m¬¶‘v7^àOPߪ–žKöÌñN.%àÁG¨´‰ïäu€w-ØóQx{9q²  ÏbøÖ’híHé\«Ó“ZX¡•„—ZæáFºIPÚRØÔá—ÐN#ñÌ~} ‹{´8"5# Ý) ½ÅЊ2Ȥ(bê5Íx?ÓrUŠ®ãFi‚äãmÏŠ£”ÓJpÎqµ·…˜™ úɱMätŒ×*‹ªUßpÍÖe+,]”§ƒ¹ª**¸ œ.ôÛ¼‘z4’·ŠÆéaz?ôJ¬†½[ZÇÅÆÜ‚}Ú‰]ÙýëË_]8=惔çÊ冚Z8½x½©‘ê —!ÕRRÎÏvš>©¢1ò‘ÜR®a““ B…ؘÑÁT;1¿¼:â`¾”ʼn/)tÒýø:&™¸fî_ üo=ïïÄüxÑO»Þ>[æ¯I" z—Xˆn`µ;˜›Þäˆ ?•Suidó˜¦Ý›"QÌ<~)“ãšÿr·£pì¼×þÇ+áò~Î2ØlTÞ ÝIYåvÖ¾Mž¬)6!SKÔÑ|\r† æ~“­jyfãÕù¸³¾5Ë^Ëë­ÈË¿ÚËÞc¢:à“¥¤{×á|3æ« ’\V ©ˆi1¸7îv‡µææäÔV•îéÇù2%‹q[ëƒE¦°Òn«h¾„Âm€ÔvD™E:™"¥ }´@މX™¹lþšÝº"')ù[•}n„“ÿ|ËÕ12Rtæ†kaæéš}#ˆ'½xÿã_{â#°¶º8ÏCº'æ„ :hz)/¼… c)f5<\B²D÷%4çQ8Ýfö¸Ë…$IZLî¿ÒžSK×Å ÃX]!B’“ÙRv“@¿ˆCÕ²1tšçû¯ÎPß!örh2ámD;(+%eæHafõò&²Å~Ä¿Ëc™ð19·ºGž²ý¶§zEQ;8ÿû­Ð|LýÄAõe¦Y„*ß¹q¸ÞÝM°veÖÎ\Ø“›0Àb­@–ÒÌ!€÷RÓ'uÓs¦#‘Ê?B$%¡]©`KÔ*Zç<))z ~oæ]ªImœK;Káô4ÿ©°öîéi‹'Kèä¼^à—;Wv:³ácS°yÜÁã…õKµÖæO—é¨b|¾Ü%*ê“kŒºÑò`8þvïv©ÚO[®×$¾ñº*NsÊŒ[¦p·æ"·u.sįõHˆ0R¦€ÖŠîɤ‚bÑ!?šìÃr&s’}½~UÛ<ëýôb»îìð‚]z),Ô6yž˜ŽÌaÏzù{K£f2ç@0ï5xõ"!Ê` ¶]hã÷äÙéâf¹ßòä†îD¸H©½åâ¸P¶½ûc2 B˜@. ½ÍÔWðNPiPZÐÞ§ÔAËY^³ÏÄ.k cñc.Ä2ïVÿzÒkœÇmÈŽĽGð†yú$ñnáWçÛÛ&yA¶lÖ8Ñ!ÏÈ+Õ²•AìLŒ±±áý$ÞÞ0òõYÙ-zKN<ÂL.UŸc¡o[ÿ‹ µ$âàÒ‘^&<Ð?ä,ß„¹sm%ûAYiz{úZA¹g›¾©‡‰ûW“¸ëLŽë[ߢ‰ê«ûª7’×)Î×÷RhµÛXf(÷T7¸êLCàB]™} ž2,¢·J‹œcòਹÜÿr›# ãê|¢ÿ¾7]k<IIþ+º¿>VŸó±¹‚ȸk?ÍÏ“ptM›™Á=`=DóÜGõZ”ÜóùçLb¤QKvqihëîÝmí¨ 9íçfÍí£Î÷{ï¹÷™s¾w’’w½÷Šß2÷ÛÈëÐ\Í×ݪû½÷¾QÖC—½ã»wÜ®>ƽ»š¾·h'¹¾Á‰×u"éݧÑîlšUï·ÞùìóÓî5öÈ÷Ï»ÊßnW߇ß[éé Ïsª=ožù/<_f.öéÛ6õÏF“ÙîÊCÛÅv¢vâtšÇmßw«¶’‘¾ÕP("¥ERE*ŠET¢¤û1E X 4¥ Òµ¥ªfj hiªI¦©A*H ¢€¡PM6ÚÑe” kM>¾÷Þrô¼;¨¬/E=ï8=:åȹa9ôÈä\”£Ó#ÈkÖ é§L*rÚΘFšlt°€i€ »2ìÁâíõóçß/%Û<ætªï¹¯{¡»';=ãm¥Þûï¢SÉoÞkÌ7›l6±÷yÃò Ñç¥{`¾µõŸ,_36=»Í÷ڼݛ`ûï`ûçuîäöÝjìÝÜo{'/CvÕí÷}¾Ó—>u;{Þõî³C;½»Ù{ŽçÓïoNíBöΞæzÝy÷w¯wqNÓ²ç]¸6YN°ÊÜÝѧ]l®Ûm¥sÑ{۽뾗«M»²ûÎíºç¶>û¹my÷½k{z9‹{eZË›v{væ½·Øol³¶Öµ®Ís}Ú[ÝÏf‹—»Ý¨h‰À&&˜ 4`&AÓ@ !¦CF4ÐA¦¦h0`ÓÀM00¡ 0ša2`M14i‰£0h6€M xPÊž €M4LA£L&˜Œ™††š@M4Ó&š4hL “&À©äÀš A0É¡¢žh& Ô@ !4€‰‚4À` €˜Œ$̘&L&Ɉi‚a ɦŒŒC£‰£!“ihdÐÈdhF4`˜À €LžM0H"’š ‚0ŒÈÉ¡‚Ú1€1 @€Ð ˜4 ŠxLL>äAS‰C$†Œ¢ @„¢ URdä+ PªTARª ¢‰ê±kŒËô{ìy3-/Ö!(ÂHÚÁ~* Ðí¾ÀüòÇŠ=Î<(Z׿­Ï«ç1á­+ŸÄ¹Arån É€!%@úà(ó@(â€QýÀQ˜ìÀQà@QÖ@QèQýÀQá@Q¦ŽÔ@u yðð <( 5@QªŽÜì (ÕG÷GFPj€£ß£‹F´ˆY+1aJ•ICX*’Tª… ªB%"À€£rFÀ@(È`fFÔÊ¢;fݶÆÜ-¿qžsqžÈ¢d( äÙJ[íúþJžL ¸ZÁ1DW0²#ÈTˆÇ dËR#Eo®µ­±¶È Š š¸ 0"ð’aR!P…`€`…"‚ˆqr$BÀ* þ½¯È‰E%4‰¢iJP€Š*­<©…¥Bµ@Dˆ Zª *ŒkU´õqŠ®8ÐZ¡¬ikB’UR¬$T+Ɔ¸´ÐOí 2bZ¨¢¨„UY¨©52&©A%Q ‰¤ Í<óΈ£C5€ "(j±Š¥€Aq¡\-@UŠ&žDŠ('"`E2õB€D:\auXCZ°1ŒbªÔ1óªÀè¬@ˆ€Q<äÔ¦œe*&h„Š%Q‚14ÓDÒ‰H‰DÓHš‰¥5*$JtPQ§—F€D@:ê¸UJ«X¥‡ŠÆµXÖ´1U ±TÊb"i¦"$M F%"DÓˆ‰‘”ÐH”¢xš F¶zJ‚@E(¥4L È+iÑR¶jSÏéÒZj #Phb‚)Ã4“Êx&œ‘J%Dó€3M)M4M‰QŠO<óÎ ×B ÖÔUP¹PT¯g™QE2Ó€£ TR+R@(f ÉQˆ’¤bA\ÄÀ¢;/úd®k0 ÉCüNR>9og TR°«!Ú6€VHŠÌ çd"¤à®rJŠ@+c 6ͬŠÙ»¬[9+c¶²j¹‰Lè+X ÙÙ¾Õ0V²¢+P°Ú2`®ç·‚¾>â Ôÿ›’+¹¢» + ÝZ ®ÙS?¼"»È+S> Î Æ@ Ö‚´´[Ò+½‚·VÖZÚ+¾¹ôà7ôW€d Ñ-„ÇºÜ²È®Ô ì; +âí`¬¶WÉÙÁ]Ÿ6 À+™Ê€ˆƒ6l˜£¢Ì‚½îlÙs`­ÔÚ.ÙÄWmvÍ´ ¬–Ø Ü]ÒžˆáwVã VjÐW†Eh¬ÜA^‡`â5pWZ+EH+Æq`®“H ÙÎ ñó‚¼}.<+‚µ*r¯Šò ¯É° Ê\R+£ynPØ\¤üˆ+ÍrÀ¬´ ­@W†¬Ð‚¼ï0 ï\Ê+͵¼À+.l°uÚ`­IùÄWkAZésÀ­o ôý+Ñt@¬¹*}+Öi\˜+Ítý0+¨æÁZ]@+@+gT Ù‚½Z+g‹+:ÀV˜+o¯çÁ[V-:ÐVÜ Å‡J õÚ€W­ë[ˆ+s½”š@¯ciØ‚²{$V¶˜+[GfŠö€¯qÛ½nŠ÷¬à­4»VŽèï;ÀVYUhëûÐWuwŠò+ߢ¾+à‚¶&§Â}Ÿö ËÄu>0+åxà¬À¯— xsÀiy@­_ã_zÒЩgÿ_'ÌmXêYøàW¤ Ü-A[£Í|8êÁ^²Üz>x+ëú ­g¤ Õ}j`­ýDWÕE}`V+¸+\ Öד{¯ÿ±í{@¬¼ì¿è+„vÿwÜ|WÃÇ‚¾ê+=­KÕÁ\Vþ ð9xÖ\L½ôWÚ|ŠÔT4<puq¢áÚJŠ©éƒæÌªÂ ¨‰0prFb@T/Tì&úÊö}õ½ŽJzj‚V¢¢7Ýxd¨Œ¤£•G[G,ŽÂŽÂŽX¸ :à 5QË€ŽÀŒ(å@QË€£°£°£°£—G.ŽP€(å€G"Œ(ëà(äÀQ×QÊ€£•G*ŽT  5QÉ€ŽTQÊ€£”G*ŽP¨ :à 9Puàj£“¨ 0£”G(ŽP˜ :è :ð :ð :È 5QøÀGãF~@~P~`~p~€~p~~ ~ºÀaQl@DPø²éþ¼°œ ´ý]–»m²´£^ ¨Ö£e²×à›­ Ùà×±p4hgžQõî? bJÝŽ›°* ’Œ+“"2)¯Ç:¢žšL†ˆ$¢Kîë3LûÎ2ÊSŠôÉ*dlOaŠDxë†4xZû%GLÊË!Æ¿ÇDbȘ*3 ž~TiGv(Lð0ÉPÈDu";%’Tóe™ªÄ–}2Vr°²¥*+'‹„ZQÙv ®ÜŠî^œ›¬|®´¢€W Ð±Ö»¶ ­­Ef¤ Ô’+2+0+NH­@Vº@®û8+*XAZ!ªŠÐ ËB Ñ8+[0+:+:+VYækHEkA[ P Å`+)´©‚´ëªµ ÒAZµ€­`«aUÓ‚· `­e€+_Hž[VjÙÁYW‚¶µÕòl&eP§Q˜°€V¢R…9´‘[V'f ­®e8+­)ÁYYõà¬X‚³­ˆ+RœÀ­€+¬ñ V@¬ZH˜šÌ©5r+Z ȈldŠÖV‚³‚°@+ Ù‚±a+DP ÍT©f Û¬ÁZ^¢+4µ ¦ À«ihŠÉ„VX¦ ÊH¬²b¶ZZÐVVuà­”À­PV‘$V€V`VJ+b ÄÀ¬Òk¡¢Z ¬"´PŠÏ0+<"²kÁXœ®² ž]+T’+RÀ•¬¥¬L Å Vº[tEZÀV°ŸÂ€V€V¾Ê¸œ®+n°h¨ ÑZ ¬V³Ì ÏHšàÀ+:+Y$VÂ+8+4à¬UbpVY¢[Kx+Dâ¬à­˜+°Š×L ÜhEd ð4³X²€VÒ`Vº[}¬…Z ¬¤ ÑP•0V+ÁXk!ßjÔf³bhf¨ ×ÈžjÀV”À¬Tkj"´Á[KJÀVdVr+ VºiZÊSÐ Ï1¾áÑEë¡Pó€Ö€QAü¡Ý!EQ.Šª¤B(ŠŒ°€ ‰ ¯9(Ž‡Ù€"x0"+mŠôPŠý0*)¾@*Ž£$é`L7í´Lˆ¯í“(ŽÇ ¯#+ô_ôWIÑ]¯Yš’+Û{E|­ƒ/¯ÎŠððˆ¯¯K®—ÈÌ"¾&ÁûÌŠò¨¦[fŠúP¨§s¯ñþIM¦ØÿÜ‘^ °¾@Šé=©"¿OwÛz³¢³ê¸ž»ª›ûÛïò&œd<ÕÌýïc²øõ:ÿxœÔnKõ—ggu¹x&ô€càòàG·€qÐ`ú«¿ÓÝá1˜ë¯ÛÃÖ¢º-ÿ‚ÈiuÚB+îB+™ÒÉÂÀŠúÕ$Šï°Šâaô¡Ö¿i¯QÉÈUºB+}ݳ³"¸XE{[Ü‘[´¯¯+Î~²S{²„W%6:dWŠ÷Њñ» òéõ{øÊh¯‹+ä@"µðŠì|ä‘ZÈE~HE~]æçnfë2+Ô¢šÆ~H®ኙìáÖ Ezäó»ã´ŒÌ謋ì‘_ù+q„WsE{¸E~xEz^û!$Vñ¯¡‘ÃÌ¢¦÷+w„W=ÇÉã EqPŠö°ŠòЊÜÕ„EopŠÚ¢d"»¥ìË`i_i"º¤"½Ô*¿T"·8_Æ]Ž^ž]"»nH­Âþa¯„W¿B+…ÎɽòÒTS ¯®š_*]üÖà’+ÜB+‚„Wt„W ®«çÉÖ>I­Ú÷$WhÂIQLîÿñLŠöŠáðáó!í»É¨ƒBÈBòÖ%†ÓRg×Püi 埳 ðúÈ\HZÍ««gOKj|ú~íïàX ¡ôÛHKçøÉ}¿‘ÌJUþ„óϺÊ^4?¶šy³pˆSž|«së+kTz¸õ{nƒzð¥æå7ä–—ùŠ›¨”E-÷ǧøùäj?\·b¯U‹Á´hÜuNü+ï…{o"Ýá-M°óV ‰a¥cÄ«.ÃJQ"AÙ©RªŠ¤BTH$¨A…$($UJ…U(UAJR¥Jª’*ª…E%UQH(AT•U%J•Rª U R¤)R¨U„©$ª…EP¨*@UBI*ªA J¢©E H B©RT’ )* )PT¥T ¡U …UT‚T@$‚ªI%APª¤ªª¤J‚HT(ª¤UPz”0Äèœ0€Z¡IEQ¨UaBJ*…%BJºÆŒP°Ê!ˆ”‰D¥Ôa³-(üÜü€Qß..špÔ(ìú9 9¸7(w¨G€ÞzXbT®€}¸GQ ?/ú¶ï{ßGØì¸¿Üíýýšû3z¶µ•Åbðÿ¶¶˜Ë·¢¿œ†´›ÇïAÚ|ÜvC•>CCÓýjÑåä†fGžsÅàâŒNŒƒ¨Æ2A*R¤‚„€U¨% …RJ© …**¡A@B…EHP¥ •UE%ARP *©P…RTŠPTôeÿW³!•©¦~í¡Ø+Öm£~늇v[E±BÔ²ìæ§Ùruná47­ j]O £×¹VƒÀSŸ…s…uÿu÷³²°B˜ù7Úž¿ôÏbUH "0zD΀@³4Œ»3_¨oO¥g6uønô82ªå®0¸³s¦*pP«“P&öÙ®ž²ñ¡»ù0DbÒÇx›°û*ŸU¿è¥±V*¼ü·z…†FFa"ó¹7Éd(É;º\£¬xËë×y»ªÃת)N‡î“¿<Ì—‘YïÝHzNÕQ؉ “"(b¥Éö!Õ ÌùF'tºŸ›?ÖâeéY-%{¹Nßwyù]^š3q@åã_m¨’;ÝUi~Øß<ý§ç/ÿ“ ( 3I$©U*¥H ¢©R¥Tª’„)%HRŠ …U*¥Bª*T…RªfåT+¨XªŒ%ŠQC *CQX¨¥ b…cÂJ(BTPeÊ%™J¢¸ 4£¥ÀA(qŒa T¥ ZqÈýÞþ¸ñ]É5ϲ‘í3zõ¶ïÛïÉ=ÊgMã¹upßÿKÖ“yÉN:ËZ6zÛ:š0îØÓÄ[|Hê½`BaaútÝæf±Ð9EÝRørç|”yãr…±çO‰Q¨þ½è/®|äCóý—áÚÅýµ±ØáIR¡T©±¨a !I@ª %¥„‚C ª(PI%QJ…!B•§nìÛüïf{“ãÀtP«Í®î„|Î^¢‡šOI:ž?¯E¡ò¶õI\/ø!xÄNÓ%ã} ±eÝc‰”Çtòe8F_}ÎšÖÆY;­|}lJbaDÇ_%v¢ÿãÕã IöÔ×™ßN”ð¸ïŸòl~µ V·ÀÞ6„"E4p'ûÛë3_ûr¥»jÚ]®•ÞÓf¨.Ðó‘4›—^ ¢v—X:²Þú²ëÍÙó)ÌXŽ´zųŸ üñû†¹…VQãËS}ß¡„bô€(Y¿· 7òR—GD¯1G¨éþoí?U~þ]ˆÝs®V³?V«ÓÝ6ü‘A”@'ÈD<¬¼5âÝþé'·aá§‹’'Í ƒb@²V¨UðÄ PgˆHB?ÎS;ʼò.8K7Fsþi=UøÃ•{Þ®ädJBÿ% Ïžrºþçùåÿoh2ŽUŽ/‰Ï­ÒB3 ¡Ï}ƹŽm4&&“Fu Âfª¤n~°ÒÖúÔò«•y8Ú×M¾ Ã7¨W®ÿ…n\!L ;¡üÃ<«‚Oˆ &D1 }‹}YJz·:û„‹WOšÖéK“ÂÊl ×Þáo1˜oùÕû]'w½Ö%-[[®Ây=ÝiŠÂiîúD ªª: ©A>"%Áòòxo/Ò¨¾–‡u4ÓHÁLú¹[‘ÑÛ^Z Ë6 ºÔò{™ê³ôñåì[žiÂRéÊn‡¶<” [x_±ç¬kƒuxa[þ•ݖȦv"Ï©CarIxÖç„T¥1@ ìT8ÕÄñf¦ ‹Îk;~/Ý2}ëñoÕ_6ƒµkVtlþSWËÇѽ´H€]Ðà!\Œƒ@F]Œ†ÜsZDWHžûÈm8\ã¦Ü|Ϋ€úsý×Ùøo *`¯’]zÝ«¿†Èr²@B€â^L$’ OMF¾‘Y¨‹úVÑ Ç’%6—“g¹M8üsú€S);µóÏ%ý'bôÓ÷«Ì¿ºæ:Ÿ /¬× }‰ì¡œg`+Ýö•X¸ÍãN¶3aÝË|¬®Ûù7{|UåöL¤•#ÓRŸv°Þ¨¢á/:(Ÿ±Ð‡ëïª?s•OÖ¯^ã˱ݼä+´•&Ìù.ž‚º’Õ‰?ýÊ(¡QB€BÛôÉ@ýcïÑøúŠQžŽc¿TÄŒ¾'Éüõ$/%]ÎÔ¡½¼E—¡M;ðOåZ½2>ÉÉêÏ¥FH:ãŽ!†€Oiäµ*f·FÓ;©¹Ì<ýôj}qqŸ÷¦™ÛX°©¸Õ1%ããg®ó\VS¿Ùé1ö½¦ý©*)Æë2TSñ„Wž„ îå¦Ey¨@Pó~.ËÀè½½Ç'™é¸¾¾÷_Îþþ­éòhíjxÛÄ´¾[GË7o—î¢Q󥔪H8aæxêÊ$ôÏ‘Q<ìù‹3Ï»¼ì#+°] [úÛi¬Úë¾TÿW Aò‚Éc‡ÀÆÅóÅï;"°4£ˆsDááŒIäG™èC[=õfÔ ñÃ14lÝ:F¾›¯×g%`î÷ب©!â¯A<7›ÉBîÝҼΜsZlZ¹&–JÁÓÿŒ €SNk-_x ’²æ>–Tî2üVöô wçÉh(Mfy‘é ^ºµ5ëHØÞqÛÌ9)»fš›èÓ´¬^ÙÏ©pà§Ù¬o2õý…3Iè•ÇÅ^8kZœÔ»<ˆ™)èÄ„ ÓQ>+vCÜáâ¬ÿž'sϸÙç²YÿU« dÊ„¢–u©}%õ2šè ªŠD@?‚iÀ ÝÇþ×\çw} æª4ŽÂã"ר%xOâÓ¹ú±³ÚóX$Þ¤ˆ€BGC¹eñhEvèTSÊ„W´ØäŠå E|8@Pè:ÎZað UýàEvèTS\çù¾§SíÒEv3Žó} Åï^n|¼Tˆ(¾`ìpÇše,àT—d/1;ðÑs‹¡»·9£rAÈnƒ«’dÄ—·{¢¡ª³ÜÂÁúW«w«.…˜øÛó-ÛŠ5ðKãTûÊn‹þŸ×°ûÈf~K¸$Ð16w|ÕNl] ä¢eÚê¾¶ºQømŽ#çKØ7giõ0¾ê/ɉ³FgÞ ìÚ…S³Çu”zyT2ýtÌØÒ}|»R”äïup€B!kP-m<°ù•HïìÞ.—©5Ö?÷”÷öv™§¢‚U3Ö…¯v1`ØpGò"%ýŠæiÓnÎÚvz½\•CMv#ðÆ0ÜR9óLòŒ§<ç¥ÿzrÐ^ð#ý“J/Ö±®UqzG¨xüoëóôXoËÈØ²×%ªØùöõTK Š|ßý$Wá…E>EwhTS•„WþÀŠïŠü×>‹ÚËûš`Š÷¨¥H_û¯¿Æìó*)m‚ŠèàB|.ô6³<{NþlÃØöðÌö/GÛÏÏ:Ô¸x ! Â?¡¿7'À‹W#$’þ°Ýäâ³üb6ù‹ éÆA¦ ‡?»½ƒîw]û> }¿Ã?uæÀîŸÆlÓ#¶× Í—F^ë°Ø>¸s¥Þ?•ÎÊÃ8óZöËþ÷%RšÓ}}Qú' ØpÚðÑÍms嬪/ØðæÇLû &%‹””ô2ŒÉI\ø‡Õã\$Wõû5zä cRE¾´Êo (=·êòz$œôo>šË¡„ÐÊXS¤Å}, ¦2©ÃRÏ¢¶n;;Ïv<—E†ùðå©ÞnÙ96 ìñU0¢6–± ²Cåá^ûûÙ?ŽÚ‚„ ýr„ X€_ˆ ,ÚRî«öýY¼óé0¨Ö‹!’fwŽØëC‰vKw¼;t˜>·âùûñÄmü6U9 êèŠë…[ŽŽZ½uƶ•…Ù+®H]`ò!èá"X†1~:JZmÇV¤ ØÝt‰ŸÕä$ªcÂbîtDýüVK»&•j¢êzÀø@!=,"¹8^äàxÞâûÙ\rÕŠ+ùÀ"ºóÁ÷|¤àBÇ„!r«î÷ÚŒ©Ê¢PÝ×LËÿöwÇùö*!´"cQÛer½ÓYP)Œ´;r€<È WÖÖœòòée ­œ®sâ>7> o}Ïm[§‰JfAŠ^Æ¡Úy1†• ž‰™ðB){¶èƒ‰ÊmRæÄÐvxÒ4 ~ª]NÌ0kzD¸ÿß¹\¤ô7CLÞ3bÎ-<ëæ·SË–Q­Þ–nï9G_ÈÅlÑ—¿ç"Znêt«hM‡Ç=föe#ÿ0ƒFôÎzÛ\ƒÀ€BŠÛ iRÅÛã^Åùàñ©rpzXq›Wòµ ;Î1€¯ï Þâ¶|$Í×´všÊ¶ãì|/½ÌM¹-U|+„wÜ´˜5ˆU¬­ù‹Ç4¯ðB±A†™rfµ;Ì Rògw°ÒÍ^J-Õonv.èÅg;¦Ë?¬¥ÕBgÇ‘û›åëŽrà€@! ŒÖfEfWÔÛvÏ>tW/ÙæEzÌ„„Wßß}y„WŠÝµù„V×#$Wò·ß{_þi"ºmÃTÉmyÛÍÃڶŒeË»õœd³kT_†Úˆ/ *´oÓ纟­jøL%dÈäp¦øÍ(¾j¦ŒcðÝCÅ61(XTp¶ºD}sÀ¨Ħ¦’×||]?5¾Ã¾©’Öï.{þØ ™Í0ð¨XÖ˜df1Yö!q‡Ú|Äb€£@;þóÛ~Çá~^Ïoþ»~ƒ¤Ì¢¶^,„W9õæ¦E{l¾˜UÏq¾¹Ô" ]âùÛ²á„"C!`2Üñ1ï°13ÿ.¬Øž:r¼]þä×+§½›õEãÝ8Dà=ËovбGѲ4üLä&Ý3>$Ú[b;sÎRLßScCÖ66ØßÝ–ÿ+ô¾X7Àš0«¼ÑÜa¼ ‚`Ô¿å|H3Ï÷¡zeËjùÇxžxõ¤Qr,û&“‡+Xã df° 59[(*KmÌ*þ‘ƒ¹dfÃ…TãS`Yþ°?Àÿ‘Dºbäpô8æpp=V¨Æ·¯"ie4‡ìã§DM>È(£  Ý’¾"\ Û‡-˘û³³V^¿÷.ã?Ôß<“í'•±7N|̃âÍCš‰ƒp])t*H(éFŒUPÆa¯—Ï®¯ýN8|)µS©×Çqkþ;rË]ÊÓvغôÃ'm¡p¼¦[üé,3¨®¿+ÑÞ n×Ì}éЊö}oURi·Øê~]®;å2Zvpåck1¡OspTžd• ™>—¯ä­@€&”¿EºmQksŒÇ…ïñ{ýµÌ{Ë€­ìlk+Å⸴=ï‡J *ÒÂò¶zÌn®Y˜Í¥¶ŠX©çu3Y$o=}FÔçCù}—Ýh²ÍA¥Dû ’†–›Ï »æt\<ªÕþƒ)løB}é >üOüD€£˜ÃQúùµ¬RìÎÉG<Ö¬ï¨ßhµ>O,Ã3wÙxf¸ î"k’†©Ì„ Ö°ê'Ÿ †} ùe4U•Øc &«P!oË b@Àòw3þ©v \Î:\êÄö"Ò|0»ª»²ãrÝÂçÕ~!ÐÁf~´iIó L4ô7Ÿžõ=‡÷J.jú'镽ÞV/!ÓiOÕ‰>hv~­cž?ÑÊ¢¾Ž±¶ÌŠÚÂ+Çÿ2_ÀâþæY>×TÀó]ækl¶Î壻®L¿2u¿?®^Í!U3–ÍÂZÁ‰ÊS)¦‚ »žñš ?wצyìijÈ–ëÖ,JÂ$»ý0¿åM™½‰«ˆºù­¬Ec¢ çaªžÊ’;õì¼~AÒ ý×£í»…{Yû¥!x@òi}ý }=¶ß…ò-Û Ì ˆ4Þ§šäºcÄòÝÜØžê«ìióZ3ÐL²Ö¾ÊÛ|ãÛÄ™GÑR‘íÝ©rË –I\û–€'ãu¸=ä)NÛ¯_ÊkÜL\wÆÝ7 v¦Ü–—˜>òr®±R /S¹è´q1gÝô·&wfV³ÀDçðª¢U ˆ@ ›9¿Þ%ÀP¤RÐ@R?f ôµ¬¥ÀAc>-RÂ¥…„“ÇÍTt> ÍW (¨Þ+4Êsê¶n22»m²Zè§?!H­ DLOI$W!‚’+sÍÈEvhE|;âàù»]LF&Ûá÷4¼ù•øz(øó2~¨ !3×éš¾LÀþ6<|愬A‹ÌUòX]û<§†RÊ–œ¦`l0Ow†–Éé¦6>GÚ‚Ot­ö$‘Q=@†~ íÓõ˜_sÐÊä&“Z¿ª,™Ýú’fýö^ÒuZ1'!,;Ÿnc¾®zõ¼§7å¼–oêÁq%=ßîÁpŸ¡Z®yÕ;(ÿÂË©q <›+5mcºxåvøî;ã[£ uÆ‘„%ÈâQŒ/Üõ®š ?†ßkÁ¾Ã¯'ÓÜ­•*&{C ¯$íí*j™kP¥u+´éÖ÷-~¢áÎÚÖ…hûÞ"AY 0€U×ã7]ôØîi{èÔ: ¹ }úXcgSM¾ê«ëèDåE5¤[˜ ׬t\;®ï¶µænîêööŠF~AwÈékËàZÌ“y‚/o)è³ìç  »Ny°ÝIùe/tý×mŸ•ô^Ë©Š„˜Â=WMÜÊ¥¥9´¹òëñî^?–oŸËÚTçö2ÁPQ`qïdvŠ`\)æ®4ÞA9tNV×Zc-Ly9¿?w+Isvý±Éº¤_N×cB“ãþë¹ xÉIF»ÖŽíº~;˯œ…2 QóL~•¤Bþìxhu8+}?ŠÙÞÉÒÿ=Èó›‹¶øw´7©àeÐK±èw½î¨yÉI-ìO«8WÎ|£ºp?ùÝ5hw弩OŸ«ž!®oÏ„ÏÀÝÆ0 `0 "N?ØLk<ê2¦”áuÖs-k¹Ó­µ _2õ|Î]ñýO«øŠì@B—™HWß+:ŒÃÑÑêQO>]š òH!1Œ7ÎÊòÖ‘3ÈÛËÈ/ìFµ|àTŒÐ‘{¹RßkÜBÙ—eyÕÑ#FKc¾érjˆR/”|?#‚YšŠÏµšˤSœð˜D-ù\¬2ÐÚN7L=K.A„ƒ %UÓËÌÏ\èþ›±IŽî8C×êÔF"Ǥѫ¾ÆõÒtOm Df9b””‘åã˜âû“ù’§ÿÖ%aU7#r¸¿“I]¸ú×ÒBó¯…„&AˆÒ|€€BIfijRe›S¹î¾i[›æÒq[ãWúŒX(‡_;ɤåG–ó;þV‰GGÎÂiá<÷‘˜_Óî­@ŒK€ ý9D`Èè|QoºävlpXÉnGŸ³øæf’Ý~r¢‚»avç¢Õ±µ Uï½›Têñ1–á?›-‰Ìï«àòüfÉMþ½i"¹\ÅÖ—ËVÝÎ4fcÒ¨{ºŠ‘kl#UR«î21àþð"}ÈM-=󶆀´‚Æõu8g=õJކo­/ú(yo%R0/1-SL°ì3¥yi}…oZ7šóÉÄt_‘5hÊ‚µÓ(`ÕQºè´}Ý;†xmS«KMo#=Ïñdk{—5ü0¹pr9µÓgаœØ$Åêymh!ð÷¹ŒßT šå5U(ÎV9>;5y‡h…ZiÒ1°i~¬3*H—µDøò|ªæùlíôàÉÖ?<„ÿ(«ÝèF6}mq=`Ÿ$“'F@B{>ïKïÀÓh½ÿNªd³Øí¢ùèL‚§Ò*;AßEßøÏ)-î D/EžÀƒ£ÇĸàËˈáÿÆ6Þ·-×±e©Ô%×d„ Éñ6>5ãî{†¨Hùý{&hJžö»*ðþžj⮩A5õ¤í˜Ô|´R^÷»íÏ'Ø·º{ÄÝ ©º¥K3U@†mÚRÍèÁ*k¾q!Ž„b‘9ôì-sE´gµ&SDˆ («Z¤è„¯šgœc_»Ïô+žì˜çT%â¡¢Ìý÷‡i†‰YòÍ8 ëxw½Dh*?ãÁÇý§ç쬌£ÁLýƳ‘±_Q'¼Ì•wæ°PzIb†(U€Ä Ô<ãœþõþñèÇ’;ERpÁÞ–œ^LDû=Q¡þPáž mT¹q”„½*_‹\ŸÏ°ÝùšmÆÍü^KMu¾ú-¦Ê¥²'D¸ÞNtj=ÜMh*‘h}’í·}÷Ž1££¿g¢›Äz±‘)ç±ÈEâsØšòmT4µ9¬}õuïQâÄw³¿LbÓ ÒéѧÕÈblP Q8¸ätQ £‰c-Ÿø÷›Ýkûhûg˜Dv½8û·õÙaüò|¹F;µ‚û;~k-*ïÕ_"9 ‚ÿVºü×¼£sò×Öõ „¢Ÿ!Ñ`½<46Äí^acqjË€ÌÌðö.Ü3¶ßžžZPé•èãNn&û·+žh?ÖvOAû´h‹7ô×è(„døÀçýÑ;2kÛ'iã/‘: ~¤Ñ×–ÊêâÂ@÷–8Wï¡ì¸›.Ͻ¹ÑèܼΟ£»ìè¬ß'Áq÷ýêH®õæq\6 ½ciˆ¯Y9-zÍΊ~FfŸÜäþ~OWp§ÿ‡õ·ý_{K®p¦ö„ªÒb·:rü£|“´YU™®·Êÿ8½É_:s­úDIÁu»{蹿K79‰KâßÖzÿ`žÀÀ@)lbÝqã8Ô>ž:®ùVO¯=ö»:8û÷2øðãZF>×öBq©ùlm®ybá"úwDwUûGã´Ù±·CP2Hjðü‰ elƒÈz ÐMq_¨ Ú2KI kjOíÃÅgʬ{eó&†ö#ê ^ø3'¡3ÚàEak·ªUÉÔ¤<&Ь°OÊ#m\î\<ßÎ+ ï[<òž çè€C3,@ƒaó®/¥ÀBõÂDW':q„%ˆÛTã»=Åû¤è±šm>‘M£!ê|=ªÞBqßÊN¼©*0ïÁÝé¨=£‹}‡G¾,û…;M.  ða舄ã°óÏ)-žâ—k?n[n_7ÑkK@o›É†ÎNùü¾~ï';tˆf£.…ûPÄñ«¿^C_†h¡„&¼YCæ¬'¾ŽúÎÙ³ŸC¡/ÿ#¾-:ùPiÎ_´h‡{ b÷ßîìw×Y`D-Lÿ^nT»ï3re4ÀÅÐßþ{½/ÇaÄÚ2òúx1ÚU·†+/Ö¿Úž,ƒvÅ˃àö˜¯Ï\7«ù‚¹D üìðÉ-j z=9­š³ÖGŽ7çtu„Xï‡@•€¾>*:Bz ¹·ú=»Cm,É|gŽ´ß'&w¯ªm×2¿lO> ÊêŸVâÉ òÈ{×§ Š{ƒHTaŸXè M¼ÅŠ&޹÷q„% ÒFšYÎCY'ÙÁ7%:ÞÁX–|z¡ /XÖT žå‡õ!{’içtZÆGâD.)ÂEш€1lºÎŽOŸ±Yk2D1Œ1öÍ–ðtþ'“½nÆ¿L÷fÐ]óé©ÎA`//Å’üt¾J Ú“7þ¿Ò[—¸—%c‰Ÿ€±Ù*«[ãƒk­Äzm ¿ßOÒÁ·ÝÕëÑúÒì8~?“ÃÌ24wìð®Æ¢I,©)^ü!bSÖ¥ha·²lâ€@±yý¾%1`·ÔŸÊÁj¯¼vãS§úžq¸ó‰xõã–ã`|òd0?º±åP‘€Ä<î‡<Šs¾}Ô׎£(ôZOÛxKÛf?ÒÁù¢ÖKéõýÈÿ?6Í%›Y#\‹í¤¾¬ƒxjÒNвuÖ#g„Í_¿hp½-ƳKø£áf=J´‘ÜÕb?Ö{^»}—*â‘4õp9ª°®HE­Ýk{5+˜õÿ¾ÆÔ= µÄû/úóì9ïUãW,I{‚YÊ$›)λyÌuÎÖ´“ßyO£*2ãç߈h±Q¤ï ïڄĉS1qL1æ´D _ÁîöU¿G¦þAJ7×!Âo¥mAƒ®Å@‚•©Ö?—l–|ƒ™,~n{;õé¡€2á#5©Ÿ™ŠŒÆƒv1œªòÏ/A¿žvrsib¸x|ÏM~÷škè»dNÌÇíüyáÁ“€BC ¸ßêÌŸ7êÿ J“k$x_î?ó¨R7#MøìqÐ8Kö7À!n—½Ža|—YЬR©€J%>·ªUãÆàý×ðgh_MÕæ7¦Éó{×-·õÙ‰3ýžL—/iÒ׃½¶^°èp½¾›Æèødâ=’=ïÔqÇ´úîýx<Íç¤aÿu;ªähà€Zä‘4/è]NJjy÷ ¯ÞÍPÆÎe«£wÖGÿÑŠWÇr8×>ÖþÕ2/Î>“ë_nK”õBïxˆá¥“q{ †‡šªàǶkY}0¸ÕSwž’ÓóëžÆ&# ƒ×ƒ¿u]_«½öÛÉÍ©³, }Ä7á»ÌÜÇ_â*í{bN>%ÊÚŸÞçÙ”Éò2Wð4w à€£«¢!x¸ÝÎqÓ[QïÎòS¸³êïT0×­¶™rSd^&l2›Š®ïî~¬ÀcWÖhÕOF/€GÍn[Z\x‚DâDÅÃúäu$¼éïÇñ’ìV‹ö®@ˆÄ`*ƒsÝþ„®þ¼µ>Þ̳bÏͰï$oþ6ú¦ ~¿§ã%}Æ…#²ï;öcµõýM4sýáépG<’biÆÐ–RþbÈXÓ0­ˆ Z T@! [8~9ÄYå۷ÈÂ@äÏŸQ‡§×We"òÕ«vy ˜Þ ¤©gøw,:ì+r„W¡EiíVáí.ß­–sœëú_†ãoñ»pý5ÎëºíþÞmYðµèÕ=/3Ë_eÊ1ü»”ïÎÅZ½:àÕºéÒbÎã2lÀB˜Ä†C0^©’+Ï"³Ñ‰îó[޳:+UÝ!éÑ_X¯_W|ÈØ¢³¢¶™^EÏó¾gW–Ej"¶È­f¢Í­Em­XfE}»þËU§J·b®špV@¬X`VpVpVH­b+ñ¢¾‚+oEi"µ'E~¢+yEtè­Q¡öw[†_x²El[ͦzŸ-°ý­+4V±Ú(©rEn¼•[uö@+ V@¬À­­X½ltN Ì Ì Î Î À+hŠØ¢¾w1w™€VðŠÒEmZôV¹½Øˆ¬è­DVïÚ{ëʤŠÖ¢µ¨®«ªw8í[¹Á¢ºÎÈþwdWŠÞQY"²Eoˆ­É¹"¸.ï6.ûMý¹ yÇ1·Ü}JW{”·/÷Kø±<ϣרŠÕ\^ɱx×DW àý |ð¦_ŽÞû½µb-‹ æ@,ý%Ùîªðë*«z (Î+éi§ìsZÈÇš¿ºeŸvó÷êûÿ“¥Kòt/ËèÐRW×^ž[E;ü$è“¶žS©;ú„†ÉW¶ÚŸ+z¡ÐD'ü³aU QVTFۮͤ½-¥?›áÔ£½ÊpGǧÜIåùqÚ¢åíáÐ@¾iÏSÞÖo½p…œ¾Ö*¥PïÅaýþ&68㵤 ô%äÈ:óÕŽê_§ÏÆÖ±ÀEÍmºö'ýk~“€Bõ?=pº!Ye•#ûI§ˆJ)®¥íGfÉšÊu’§LÍu…ÓÔÌ»4/ÃÚ>Ãy²* Ø»Xòž86_Üõƒ­!(ÂéÀ_ b hþCùÒJ_&íPé*PI LÈs'pŽæö;}~agƒ¹/ÒÅ·¹rkÑŠƒü;á>µtK†+=xþïݵ,pg÷PX. Û{ˆ:ý¬}(gÕEݶ)ß>ÏóaîMå9~×¼s¯“škekþõ!ǽ<°«6ù µý½À” –)WÂ¥3¾äÑ<Œœí™YW7›î Ø7Hä¿§pŸá®Y=&´i馷¿. P€"²|ü‚¡b‘ûÍavÔè¨!ô,¸Æ«Ræ^¨ÐÜôèâ«­ÁªgÑÞÉèZeœ;±S)·²šStL¬’ß_î‘QÞrsñëÖÄ«Ÿ“qU97뇪øÒ*U‹üʶþõ ÇóiGÒóÀjñO&âÖ®kŸ¤ï’"uÝD•Ù÷2 Ègq‰Ò§Æ‘å;&z‰¢ëshªyø?ûߊédÞ’¯­Ÿ¡°v~uÊÓˆ/n‰óR\…Š÷P7?8vÚ–ÉzökaÞo¸²¿6KŸ+÷) 4 ÷›MMoî¶ Î¨m¿ðšÇ= €d/`øs´Y@ÈïQ©§üýD@·;£Í Áí=yßÏ®ð›Y¸RTJ œH$m3liìŒzp â\¦¥s€ùLâŽórqrÄr^ßÅ¿¨`ï÷)ÑëOóÜGøÝÌ)!/VK<.ódš¦p½oyx ÆG%hXTëX¦÷Ùõ˜!Ü»”Å<ÿ7|"ü=|Õ‹±8FE¨×02àr ¿¸óWWë5»@ù«îc‰(|•WŒ!:ɶžN§CB›¢é§œôqå*tq€°qÆŸé²w‰$7“°ü¶xÿ¯Bû:—DvºÉ$µ9¬ü’ÜJBnká‘a­µ½‰8ÌêBqÞK–s : ÿ8Ø€±€j’À¬$!½¨ XJ›ül,«ÏïÛûi1™úöfûѯø»ØMÖ¢:ZóòÅ£ýA®~²º>]ÔtƒûÏI–y#ü~sçaX˜Ä™A’Ò3¶Žâ/J÷1ën;L´ÔÜš®5sýûÔÍ…8zN¥¥z9G„½ãÿ:…Q1^ ×Ó¹é¬zaù¯@·+™iׯg;\[êÛã2^mtŽþ(ë ‰UŽá„U]¢¹(U,St{¦ß28DCuVd¾ð+­—MÁ·cxåGäÚÌ[üop€Bó~yàó¯|üH†“ kŠ œn’â-c·:]{-„êX~Øräs°Å‹’¼OzÏ`䤴ŽP¯ ¤ˆg ¦ìýÊ€GäïK‚Z dvìeçd6â9Éô]iQrÒY}«¬Mjr,œGéûoöˆ´ŠuD×V,'ãøõ«Úõ¿êjØVÖÜö~•-n¾NN-JŽµßºãeäö3¿®ùúæNßk¸÷¯L~ZÓ­_ …Ù{ ß$¯5<Ð&Áéoú?ç»áÕíÓÓ ¬“~¿e}µz)¬T Õ³ÿ7¼>Yçýýëø*^‡Öeµcc‚ÿô÷Â|=àÇtâ o}uEeñF‡®°w=†Z¦`ãJÊG{)ÇA·èF ¶¥±›IfI]ß„ôþªEÿ¹Ës¶úÐÿ]“è„!IÙ‰‚+7öoKa8Q[âè/ZëŠ.Ä«ÏúÊ0U xª˜€B®^E>¾ÝíæöÙ¡ÕÝ+(OµÓÿû:óÛäkùN@¹mGVÞVhŸ››éá¢B]*SÏYrˆ}òpkV¥UÞ?ýŒGø‚tŒ¤Šæó¹–çLák–',€,¤ºp H d `#MÇŸô#]ÞßIÎÐxÕÕoã½=n¼²žl¯û²uû—lù×k«‡Þª«o¡rO—Õ=ð×ÄL°°¼‡Ú\7Y¥ÎÊû,ñÒ×îk— Áö9¡òïþ˜È.æý$ˆ¹ì皥Ùu‰íϱŽÔûÐNcî·47IçþÞ} )mkò?w˳Ì3A|§!»¾Oðb×Ìx`!)g¿@L?ˆåRÑ&Ôq9•'oȬüÖY(õKÇ£¥éyaí„£ƒ„0€ÿOÓy»úœGw;¢ÕÉ|ß{ükwûW‡Æ¢“ûÚ]iö«üxMÕ¾ÀÑRT÷²UćBMQ¶lY ìÂÎh©ý㛫§jx©ãw¼‚J÷ïŸwàÈJ;t£Ú(ÝyœJ‡&ñòwÉ]ov(Óý‰1IÔj¶=å%‚7±·û[k¶Ê ,÷Aõ¡Ö_|žV<µÙ¾1é—lÎ×<òí'ßß½Õ¡²î­¸/¦øX¶L»ö¾µ™¿6ëz¡;ˆóºþ,b×x€ Qi¸åOÆ.Žá襖cÈ}õ;qêî­<ãóVî¡»»ùÌwÇÙÞÕ9äŸ ººüÛÂî]¢#è_²®¼11WÏ¢Äî]Lœ9«³dqU/Ûsßþ¥‘ÊZ‹¦ˆID3gbÕ·œƒcS ™áƱ\,YW€˜?;áÄYõ½¶g–æ}*4;uVKP ¯Øk‚½ÍRå¹4i²S\Ù…h{ËCp.Ô *v6+ø°œß÷×ûŸ­^f±¥òAÐy)"Òã°,8U»òæÃ‡2†ô6g^òov) ¦­ø¡[H§z’7²%Hy!Bïêÿ+únݦSé>.~ëNÝÛáQ!O}tS?:¼cöD®ŒÜ×¹)Uµ7ÏœÉ^~uçÅ‚T›gÕËG~4 ¦›b„>{J¯öÍñHÀ” N½;Ã39ÌíNÇæÔò1~“ßïóœÒý—™ÝMcI˜ï.ËC_Åü¢kBßú °N8DåÕû¦e–C饔s'Ÿ[·[‰vßê©ú¢øtý= ŽÔsXƒª:zÔ×µ•õJ=OŽzlM½Om”º" 6öy›0ß%àŽ™ËA=ÀÛIlhžPu›`žžž¾ÂÝÖ ÇÙ´BR:kLâÆ6aòh´2¸È½º•€ö÷5K7>aýâãÍ7q1ß9ò ÐÉ yÁß#B…µ’Ms¡×ÐÛü¹•WéGBMÓR*Þ/¹SçäÃpx°JNŸCž¦TrU„= ,=‹Ù¾gó55½ÓSKQËöÍz΂ZŠÒ=’:”á3½Ã;|Ø–S½àV7Ì ö&©îåv¯¶Wc:’£öµ,Õ»PÍÓ8Ó3ΙVp eʶÊâIŸé™iÕè[ek0S·i?ñY0u±¬,’x«MÏrŒÜã?÷m¾OÖ¯Ìç¶ý½Õ‹€5Œ@)Q€ ŒroG?Åþ’ÇJ#ã±¶õ\yÈ0ÿ•äKZ£â4´Æê,yRÚ§.B‚§ï£'yHe½„©„ÓŽPÂû‡r¸±é|íݨ·™Rì-²·?s;ÉÿÇ{³ 2zh€èÈ6G¢bù¼8˜™:¦H€¦-æŽ×¥—FÎl´î ÙÁ‹Ö¹û‡»ôd脆zK:xñºIá·²nÏ;KÔ™ï9ä¿£„9$¦9ZòÌÉÒ›i ÅÿÞ¡¶$µp ;=:®ÏPœõè­yæSc‡ó‰M³?¶?Au°ìÅàynzAÕ”øz À#R¦äÜ>f€|ßÛïªùwªüß=GÓäpÝ[Šõt›¦ßØÒ±ûønCÏç­ÿ‡œŠÔ_}é‘[¢+¬I+ Vå$V>˜Ç “¬µIô—ýÖJ{a´_åX®{dV›n¦/ÒcªÐm>P_uæˆk°( <_o·W3×N«uƶZØ™0Üj‡Ï¹ÇŠê}*OzˆMÏ­ÝâôÌí¯Ùaë//ÎÜÓ}‡m©Šg8ŽßQø¨àÌ™;eó¨úø1Q®ô<žCf­Öx7_“T4{e+/µ§?©OE¦HFUò^>ÚŸ-#ðwbS!ʤ-øü&q[>} psÝž-įÜÚ‘Ã*êK<7bÄÁúj¯Õ+/Í"YoáØWv¨ûì^³2ˆâxøìôÿõÑŸQ\7ƒß/ã4¾ ÞWÐå2n Ï k­’é©ù±›µú¤ ~ŸÙ(Åg#Þ³kŽà\¹bèÊ€@ %©"RÎg€ÈÑí°žä@RPø[¸¦hfšÖ«ÅV¹ïÂâ«“ÅRŠVÄ,hyÒ§‚T!.Ú].5:Îu8þI6Ýr2£ˆæŽVÏûh…„!B[ä[Ñ(€BB ž&$êˉy)_ÜujÔ~;ú˜ á©í\/‰Ø}0b¨7.mî,èÎÞ‘º” <{:‹#\oÓìXñI®¦ä 4¿•8hÓA‘é†ûu"ß@ÝtÄÀÉ[aÕŒ2`«üæäЭœ­ù¬Õ“f¼MæLoW™,¹dÉÁw+Ct惕k±ûûüÑ „ ÷G¡ª²!ƒô "3Ô]¼šËy­äͯãÝ÷ ßÉðªg˽ŸQ²c2&'}¥èvyäVîûP¢)¬ü]ßðäõìM“µàu·k†½t¾þºþÏÏo™Í"¢œb+Ø¢²Ep6þãÌÀ`>.[šÜçÂî}kéíYï“jÒžÉq7ëÞþÛ¬èå>› æBi.rˆÒø•›%-¼˜±ßß“#Ï+}m¶¥þ嘘ÿ2â¿WQHé^¤Æ¼V–ëÂg…q׌m†¦¤Aõ¦ m<âKšé©S»’@ò¡M?Ü©Y×颼¾É [ùr÷;>Dq–B¥['ŒLå/D0í¦Ø¹IJó¿úº–±Z)FkDÍC„Чž³‡£óú÷*»qxL?ÔqÛr[*HÒGÝnX²îÀRZq9·?è­ÓjE‘ï_!öØÜygÛ ›…×2:ßCW¿€Ùþ~Ú8ô§[GëÙJ<€„È÷“æ¥J ³!`c@! 3q¡{÷–Ïp‘Õ|cÃ2¬z‘ ¯k „£Û%.®U°RY½2À÷U1ð}¡ëÌ ´MeÈËZý¹æC’Ç5™¦«+ËŠ<ü*<œöìé !2¸ ü¶q¬øZÖqjðUóÕn2æt²V*<{¼z,…*uáE¢/„Iø’Ãô*HÀd@ öw0ÞNÔ£áâßÖx2ïGLÞgF_u¿N‰¡­âòëžãôji}Ëzsµ:ÎFjüÝÇ:ŸÛ ¼ˆ á˜'ÁÌ iE,ß}7¸äûV\9‘¶cøÏ™°A[’!† ª9Ê/}so<|}O[à³ßIôÓëjLõï"Ú0N­b'«Á±X8¶ŸK;Ôœ\OYrÒ @ñù¨kîœö]‘¤o¥ñ+µ®î9ÎÔ ì{ÓÊgÇ^g»÷Í2wr&<”ê¶ãK&aß=E»éÏÙ×eÁñ‰.b[›o‹÷}ùiçÞŸ×Àb­WÜ5ñ×é„·³ÙöJžÆ6~²ùÌl×éûÖžŒ…4¬îꆢ‹ØÐ»US.hYôJoXDχ3%°øÙ·j¿¬hõìï¯iõ¡Í{‹Ä× Q^â"ö|½…*‹ *Zôô[åÔ8«”QØü–ÈÐߨö¶4«+ÂÓ—lîL_JÓê¾Inû¨Ï? ¦Žÿ2Mãyñ­Ÿ/ÁÙâ>·¡p¾' k÷ —ësòr”·uϧKèy¥õ¡~ðµIªËËr=\EŒ56¨m)ùJ+cþåu4¯½TÙ"E$0¥3RH†Öi.K—Ùì[S(rbÇþc=&k’~ä”ë<,}Ûÿ®œÃ„ä€9¥&Pøt_ˆ¯X›‡RÒ|˜Ò=–ýS{ƒ:ü @! JâÜLrñÖñu7;±ûr‹¼ =6Ëj"º´»]•=¶ÞP?yÛË22¦; ø¬xŒôìÿÉ|1Zuxì¯3X+)¿wÔÕœï=B-À« ºÿ9·ïŒw†=çÉÌp„q¾:$ÿ›²xÓ•S¥¶A¯)uðgºðȨ’0¼B*®D… ”hQf)’ê8À}¿YŽMÕß=ˆ]ÂÛ†YËv·¿ˆ¹O-ÁpB¼´çfM²7QO<²«Žž÷Ço®;V³›÷ª[0i<ÉÑ´àZŸAý"›¿C^Ñ<^JAû‘'ˆAü™•xÏœƒýa\ð!!ñ5ïC>†fÀéëÇü®<¤ãUTK®¿Iž1É,u¸µ\¼ ˜îò#m„ÛH>— “_YæDNün¨ÙJÑ÷U’ ýÆÑz@Ü4OŒ¶Eþu4¥š´”§ ÚÇfýÙÆ3ùš^uÓ£¼pJô¾WÐWÉt}wšœ›F­îGö©¯ÈÇ A†‹ ‘ŒB.vmPûWÅ&„œX¶÷Æ4]{Ѝ|«`ÿ•Hf~…¾kê?±÷”ÃÞBýÜ9Ö‚u@ß<èÂ×!s¡ä½Uí+oqOõ>BÓ/\ZYê\ª›”L©_‹ „ñæ‘ANîý?›å±øúΜó­ïÏû¥Y–oÒA™Äüíáz;K®Ýº_,•“:B"¶¿³‰mgkX|WTŸ^ýÀÑX­%:»$ù@ xp”¸üÛHùöCn µ  ¸ Üy9{ñ;ëK{˜­5œhIx²Aµæ`/o0ƒÙ5/-J®ðªú¯²ÑE%v³"š`a)ÒçQnÉ‚âÝÌQÍ_œ|i*ƒ¬teþç¥Ùí™æöµújg÷ÐH´a/OíìålÉóÁÛ6¾Pˆ$P8®AóÈŠB©ù…EÚ~œDÀ,ð z–/Ï´w{=Ç\quöÙþ3Ieí÷»§_çôõq‘ùÝwî3^ÀduÜG[È×ýVôO"A6À [”‰H%J¡ÍYV^7þ¶5^”I“(Õ8"ÇU¼¢¤¡ÞX°R„()fK§;ˆj´Š}ô±ª3Y÷¿KƒÕ'6«ç-b)/I£ÃW²`) ŽØ:_„ÂI ÚÁ pª ‚XqH()X(RC IB *AUUPªUIP ¯Žá!T­4ƒ¤$‘37Õç”PH,Z-Sh—‘Ô¹PƳªÍO }šHŽ¨ÅŒÈ+ñÛ™zR"$È*‘ §#iUVÝØ ø}8†/8r+ß)ª˜L”Þ\të7Ë¿-ãŒ¨ÈØ@^ð~‘x›qœ_Žƒ«¥<å ‡±[Í#È9éO휫8ËÐ*»I”ýwMÒª™_îø»+ë0ÌźHÀÿ¡QçKù)\S©¿'ž# >±-æO‚Ò;•dþš™¿­™LCØ0)'|j'=Ð.6“ëc®oñçÙ¤µž{’\®Ó„[öä‰ñdovŒV2iDÅÊOSˆ!l·.t“©¦ßO³ ̵ŸÉe)MÅh¨O|©÷ü$f‘ÕÜxUÝéeÙ9Ø…·êí€q^úÏí7G Síôø,4;'·i¾‹cøÜG5½¬}? 0ÌןÕüˆ.wôZ6BËKôÊqc^ó%ÿÒ6ø¾9¦éR믳«‰ì[ŽSƹayoGðÿ²p„!Þo^…7ð©B3ÉÄv¾ ø1@tï÷¬éð „a¢_}ÊÇÍÕ‰7ºŽ3“D¹åG®ÛƒH“i±æÁÞBYëê_êûjX|¨ÄÍÃØM¨¤YÓ…k½í½µ·ëÉZ•ˆÂ]L¨ï¨Uœsódæ»I)yîTy›:ë…ĉ§ëÀê7„´·'¯c‘ôÛ‚òºÔìn6,ÎÀR–\ŽžõpJˆ¢ïñ÷®ËrsûþSm£ƒSwÄ@! Åo#ͽÓà7ñ:ð]åtºxšo,/è¦L®FÁN½ë4ÍœÕÿâoäñª,·ˆšãëJ°@­LØpC¹U+#/RA%¢ƒÍLkTˆRñjÛ§®Vw¸·¯ìz]&ǵø«†L2P“HŠ´*Ò•"?t­*”ˆ4,kÿ‰ŽÆ«¢ÛLe;÷éezý±Ï\²áèÉZuаßô4r4Ÿ5ò“œx䮆ëÇÖù)]~ïîNÝ1õ°n½™ű™‚Ñé‘Ï1kNã—‡i1Z¸Ÿ^Sß}®6¶)ζ)d%«†Õéœæ%½¹Sοô¤I÷Ý”…OE³+ZଃG@9·» 瀉 “’`¹‘ºØS+þþóøÜät1üÓÐLÈÓ™úŠFœ½Ô£KÚQ:íû >¿5cd˪þÄØ*BøO[cU"1ĵ3Íl=‘†ˆsrŸÕ//*ç„YÔ4†›ë¦úåÿ‹ÍI/©Ø­ «JÏ0f’Æ£ñéÔø ÙYñM±»p8@0¹ëá>Ÿ_¦yÕÿ«Ý ݬ™æ—íFhµÊ®Á!®¦g«XÔM‡_þë~ÿìèž ô±rß·Ä~´ªžc§wXÆtSíõª¥€“ ‚öpþ=•×Ìpålq¦Ž3›¬!¥¨¾ÛG |Ö‡­ybÝÑsHB¤¤’¿;ÚX~½/Rñk訰o›ÄpiRp 7>ZGS|ÎS§Ï"ä/?›Ö×¾ºjX:éÿ>Z±ºyùú]ü+ºys:/[«¬¯¯XŸÑ\Ÿ3ùÃw±6°8oÇÂp¶Ê³ƒŠn9 ÁÒÙø8 Ì×}„¥Ñ<¾ÖÙû6Uóׯ&4•î[)G‰ä!ZÀù#‘È»[¨¹ûеGÿ7J…ÇÖU‘ú=Îgp¿d+ðºÜé#ÐbÀ¥@³ÍRCÃRÜs†ÇÒÔ›¬¬S|^Éný£³Ø‚ªžýæf¢cÈióíçZ×êÌî<ð`ÜŸìãYuHœÅ‘|ÿµ8µ¡MBVňr•Šyϲgƒö?ªa´…WеiS±%TRGþÄ©ù£ „/Îø„¾ß¦Áý!\9Í_su:ª­Wɺ ÞG~ÔîÉ@aö~&iƒZÕ¿fá—¦ñ m×õ™3é-¢qvç²6DžôDrý_“¥èï×*ÜOÍ-›™V PcHÌ?¬Fv°ÿqÔˆþaob* :=Zõù%Ø—Îü}Â|´’m/…íÅP$¸¨W<É&.Ö÷%‘ù*®ݱI mœñpjЊ xz™hRT°eœ!„*̶ýÜ­‚Š3 ‘&¹¡x᳿ÜëœùߦðÞ$p¿AØݡ͑_xíQ Qy&HßÀ z jhz(=Hq½Ígqÿ¼IP¤Ižª—Û.ÂM’ºíá`³ÖëgIñ“»©V ™ý½lÎîG]¡!ß/sįAùøúylª²ÒYo>BLŒ @]žHöj©’þobId``SÍiãnàÐvݦk÷1|ütË w¨–Wl3c3 yéO+÷Ÿ#›ý–TjP26T.1Ô“ÚåBêªdî<õ ÌõâƒÕAáMYü¹6Ù|~@â0X컛Εv'Ržª¡¾7/GLt,XnÙl*øëð3×]} §ª&½9L þ¿‘ó¸-Mrï²îA’aÔ EVö£äè—nh½ÜŸ‡°@! ^ŒÎs¨üz¤šgŸÃêù̽9%—&^"KhÏ=Ñîõ(ŸŠ`ã3šw­ÚÁJyí˜Ñ7%1×[@@x0 €¡H B Áˆå»éˆh”.½iyâ}3íHŠýÉ¿]¥*¯7›ëXýê[Ï‘Ñ}wMšÈž›=íBë峂;ˆ ‹o›U1WÉkú²#IÜŠ3N²é4<ΰ›ŸÜ×£R…­â¼³œì:Œx ’²$´];) ÔÔ*"c ‰Vd J&@ëÏ>l h=>·7÷RwäÁ=Ù¨Äçfä¤Uñ¨D%}1y—Ãú@ œ` ¹ÆÕŸzóך7BQMQ7ŒÂÅN‰aµ#°«@ljxŠÒé]ìÏSáþ»œ}iͤÜuóÿ¡È¶>2ì¾ï*“ÎøÓÛùxÎLK¿Â½~ìŒøá<á×Õ¢)e쪯BZ›çÆúò'Ýi˜ë’غ o'½‘Üœ¤âþR à˜»ùºì¤Ä½þù´^Ô»þ¢”øÿ‰õ&<åD¶ö÷½Ì¿wûÁýâÊ=$³Œ¯¼8ÛFàÿŽéÅÍ©É6uú# ƒÃõk oN‰€BfÀSb ½ô <`Àc™GÔ›;-«-o¿¯Õº¾>å¤ô¼y ½ýü¶Á{Å×wŒ·›1¼K{i~Ë¢ÿÍL ‘{×Ëì8)ö!] àhÿÙ†ÙmýG&¥sF´poã÷”«åFðêW&à`kÇ`ÿ`:c£Ç<ñªWåØLKÏ(zZÖÚ Ÿžf çÁ-/CWÅH8îòïåo[ý³¼L£å¥½JþwJ¥t|v¤pI¤ÖZBKQ_0@¤"h&¯µm\)ùú¥2²Õ¦ÜûC9B£fÈJ¹·`åÔŸªÇqì¯é søT¹4!Z¦ÃM¥5¦Qf÷· w-¾>£Ú  @! •DõýgŽ“n0Žj’úx,ÿ78Š×Þ½6’ì³ÚÄ 2‰`±ºJw»Äíćž‘¯m90‡ÿ/ðÂ-ß;£ÐŒP¥øŒš ‚Ïf‚ @ô;ò’I@3c Êžý}r?¬kÊ'u6•~×ß "×Խ߾è^e÷Ó´x°‹øž7¼:#2[§Çm£Pjê{8iªŸœî‹¿¯xƒ`¶áçÁ&Àz û3ADIØ}òþ•$窘ê[å!Ñ»‹l>Dã5» ½¨m]’ü[¼òñ ”‚€H'VùÃ=šçØàQ=k¸vçÿ¼]Ö íÊ9¯—æßðU'üõw;é‡LøÍ‘´Ýë“mÅe#Dïg'¸Ûrø›Ü—Ì4è 0–oWKM§É´v---‰IÝÔ»Õ<ý¼ž¾ªr(Ô+Sô¥‹¹ ùG"/S÷ýJ2cS=}ßÜø¬V&Òbržà€–ìU«Û¦qÁOcK…Îuш JQH bòü_7['ßcIZ”ÊB;¨gE/tš¦x¬ãr:¼LN®I¯%‰ꙆÀ6ï–ø·OÏ^;„ùì¨n£N{ÓO¿£t8ˆ€¯òÏVpR÷4N6Ö¶¸E©Žq›4å(r÷Û´¸H÷‡xÙxøš_’âÿUc¦°çЩ¨ûs÷òXÿ¶!›Y˽#„å‘«uU/ù£àFg‰8ZH¼ñèlO¥ ÷£y—!kèßèÖQX›ƒìJFidŸ÷œàbê©ÙÙßÃë€ZV'Ù2õ\²J¦ñmb§ÚzÎõ³ –ºÉ{$EŽ€ Šø´9ç…Nh‹ yû†Ÿíer@)1)ßιðäªf#ð|* 9p&ìÓÞM‡0¾!¯…ñ TØÛuM·;åI­¹;Õt¼ÒÜe$€E!0í”~¿éÏF$v·üS~u.6‚ÖœºXÃ/Øö™%ˆÑÑáØq)eöNžT£•90ªR¼~ _¯H¸Ãg?áÛs°1z€ƒLªM½T2EI#ÂQÂ$0ˆq…Á @ÊÒ§©æãÇ¿²eÖ¥Ç[x¼’š4nl°*<¡–t. Ã3›$¥ BV29®ۜ㆚üt˜.?eæª*-Çú¼—yÆÌZÑ¡¼RSvÓd§ÀOô«®-ÛíâƒA`çÅJ°^ dôþ\3~3™†)­¥°½5µ\íÊ}NòÓÜ*D ±_ô|(ò¤±ù,Ržv@@ä €km܃pvTŽuìÄzÉ9°ÙNOîuk.~×ôwº’s[Ú‰˜p³Í?ÇÖP„ÜàÖã>lôw²{ôή·ŸÃˆåDU3±?ÐÁg'ù;K]¶c6ØË8h%ldÈsmíå/—…¡;ä 1ìjÙ˜¥Ê±hšÕx¥?¤’ W›ºÂþ‹URbdãl‡B_3»û'K6âHÜ¢ÌM7ü$vYzâ0nS”ÉuJë.æ®T€q6R5we!ûú8òªô£d?Oþû¾÷òÞ]°ÿåèߢÀ9)UꦡèìWMÐ¥·¾¹³Î»ígpÄÌ©Å!2[2ùP°½õZmÇ¿ªñ¿‚¿žà K3ë¶é‰é¿õ™a 6 Üm6™æðuŽ 3»;ïg|l[Mw2Uã±ï;øÔ-jYYʧ%¶_÷»’‚ ´Ó×Q»FOjŽ)3Ï·kÙ –_æÕ0·‘ײ Äùâ¸ðݺ„¨kó¿ÞìÎ…{òV]@ÏîõÝàWò·¶—kZ>4ç8 >_ƒ-ë ¢tˆ AÈ·ÐåËÜ€\jó,&Y@!SnŸ”Âïn²ßçg/”áG…6õ“BLnä¤2¸—A0ôÓ_¦¡á¦#màß(ýžJ/ì é»hTÞ¸a¾]:Ð_Å™‰ûr½Öëí¼ˆ˜D  r»-9çÏ/Ôc£`¥§FATV@ú P3éQ B ! =ùO‘KaˆKÕîÐ|§“cü'~IC"OÌÜ.ÑØÏZ7hG"=|†Ï[2Cë½NMr±£Øâ&˜Ò<~Žp·E¬£YÙæmo,˜ª6B»h³,Qfá3žÍ_ÇgxzõùA|C˜ëÞ! Þ€]“ÊrG\xMM`g‚Xò•a`TãÒ†°žcÆ»ˆÀÿÚ ´DŽú0Á›íYàg€ß+Nà+]–Iª1F”¦š)Ž'÷°ÍKŸe¶” -î±ò!ò€¾³öˆÕ¬“?1¢éïà1ýBÅ”S¼´Ì"¸±O!à75¯„š C Øit…ðƬñÇÑ‚k&Ôš Á«»(ü¼e&D¸XSG4|ò„ü,Ï&ª:žüKãdÞ]ÚÁ ³ ëé"IU¥³ú”Z;RÙ¾´dàg準‡{ŠÏzUˆ.L:¬ R#5Hß}4hÍlž7KŸ¤øähÿvCÅ«N.ÙÖÊâ(²¥×ãcnKJÖIìÇ÷b—/Å¡´Ra½=F‘\›®\ÜX­Z 2_ž+-=ÌŽ 7…÷Ð_-;oiM»*˜ZÈ£r(4¦ƒÂò¤¥R£Á0J|’ÏË¥ÁlgòMèG+ÅÄæÙ$ªMLãëø.a¿O$jI+©áœP×ëj8zÈŸƒ˜g9>HHv²j^q< Ö9ö­Ž¦&xh=¦]W#¤V‰YÇŸÈ1‹ææPiР¤™XÔ¦n¡EÅÒ! =„Ýaq€¶«k1†åq5©n-Б*$ñ}ÂéD¡ç/+Ö PqxÑ­K*ÝdÄʉÀ×8Õf@HÍq<0š²pÚÄúíEÍŃ¡NÜ}Tàx¨êíY§I/c?V&fFä­è^‡Fúi ´‚iäMUç“•pLÛò„Ù´¥VxØö«™–>„4èf‰ªJD^&ÖQËWËÙÀTE9#®ÉÛ`> p=“Th£>%?p è[úêY«†É () \9ðÂ_XÛ¥YÃ" ¬xº©é¬ P*n0íG4 A5&¾I£W6 .é ýrxµ²& ¡‘¹$æ2R†¤ƒ P>"x7ÖcmJJ vv²¢¬|؆oØb tgØgƒ˜+\êø5ØlA‰Å¿¸L¨p’puaÄÏe¯Y8«iM‹ òTËm Y:.hÐ9ãï]<(r¹Ë¸sTsõò Ž™SDî*ê8t¦׫!u^|°'·u0„‹¡TØ_]S‰T7§ÞL> Ù‘WÔRV™Úã(‚\UÙ3wS‡oF -0âk„L@ s„â$;iWí· V2kìjˆ/˜ü4’jjLý¦zV†í7‘æ€CÔdÍœHMx¶.päpèf­F7+Y*x¬x2rÁð‹¥V\‰f+íÎV3 0=;suøI)™LßIŒTn ²\<5qttí¦Å±«¶U^·ùi¶‡oÞ¢^ìŒãו¥IÖRÛ «·CcqNk‡žW,|‘«ÖDÍ@¾{ ¢)¤Þ• L÷†…«[#`BžÅbÈT‘{«u¤TÉvÒh ËPWIÏ«üÍ;1Å`Ø8VRáj<ãdæÈ†@ž˜(DËn%hŽ6$( j¡ “\Ș؄$Ü-‡y1U™VôåP†‡ à€]ÊP¿9|ðÂà í]e‰Ê(äE¦1²àòHDNVWÒáSÕIÞļ5P¸yƒÕòešTÐIÉÁ‚†¨TA24À,“'€íØ9Ät¶+(/׃·µŽîJèmI™ S²j¨Cfôu-I‡&È€œúxG‰Ò£j0ØÛ¤amPS1ø•‘bRàƒ”´¤›Âp9H&ÅiÃËŽ&êÑÜÁpõ¢),Z]Í‹¤›, À!£¶>ÆåµAÆlÓ⾉`% ùBF‰¤“"‰Ì!uîÒG{t¼@·ÁaÊ?AmQ¬”ä"!À0!\¹fÛΞ¶Y0ÚýͺHf¤XÕÚ2TKA8æõ8ôiËB3Jšòä‚3kè•.zƒã4c›…pí¡rx§Ç™YbâØsº2¸Š°Ì_E4AR•f%eŒÇ¸6­QÁ8X™ l ªÜT•ubR¹Ô É/^æ0Óf«dN.BòÁ§bÕÐÚ,lRL$¡GÂ%@³)~©ô9±³« Y?Äx`TÈs@¢•#ÂåAV'Áf šdA‚¿‘•*®Ýœáb!”…\Bf7©D(àÏ•x®Rúl'‘g¨…ßÌP’ä­Q¡†ÖƧ-«˜ÍG/Gy‹jeQŠÈ/®Hö– ÊÂÄ׎8† xØ)qm^ø½å„‡+]<ÔµÙV8u‰ ßFa705!UPàÑÛ$…lñ³”#"\R8… ®1«¹I§Ô„ß « º[â\%¨]e/:z~½6‘àUßPÚ×1*ŒÍžº6á—HÝv×ç5&ÖqÙõrG¡ÑÊ€m9SñîJ› 7ñj‚1uª)ÄI­ŠÁ,2\Û0b·±ºd0E 7%"²”h„ywÏéÏÙ‚uP]HeÄCP­VfªjªjÂBZM´Ù‡¢.Ç«ÁFèÄhLá¨Z%[ƒÈk‰Ô ñÌÌ­ppnè”aW¥ñ“¤b­@Š¢Q  ¤ª KSõ˜CÄ3#?à}XpÐqßѺ}}@†ÊxÙ7UÛ–2G8K»e½DÓ· )Ž­ d®*\ɳtô2Øw2RäV¨µdå êÂnT7NA÷Ó¬dnÕ€*+µ7†Î‡óƒ+lGfmŒbŠnAÅMé3×ÈÁ@NŽ‘ E)*ê%ë&„T…±db…úƒ$­ÁC1p¡m’ÙCŠG-Ã9z<±Aˆ ”*K¦-Ô TÔ¾®,R4tj×·\h;ÒîB‰y:$LseBƒŸz#f†tHº| ¨ßP®?q4‘Àk¦eÃsƒ6ËI£Ñã²Ã¾ z4–€:zHkÇ´UÑLlwtOÉÏ¢wzeËê“sÒmÄR(|"ŒÃBtÒ“oéE±²¥X, ŠvUn§ „hšk ¢!u Úe¬Û ©2ºíȵŒË’Ow2„LÄ»*s«T±l|=Àé9`Çë ™x®¡p*{å'}.ÂÿÑ6Pk%‘Õ©:#uYKÄåMˆ¼3~<œÍ½À| 0Šh•³ŒÑIKTÔ²µàͤÖŽÀ¤D“âí®ª,CP^šØ$g¨Á’¦,´¼«ùÃ4ªÀ¦š8øHbT/ÞaÜÄåA–”¬Ï›f ôóºù6ØÍÉ&º«dºP5râiQ]6^qBÖ†ìh¥°-‹— Óq·Àáa„“(xBç*¢’4Â8CJ‡x©“ö°Sœš ¬Å™Úª »´îlç¨‚à „Ö^Ä¡àI_5ƒ1FU‹6SF:º.MJ˜RÕ ±½´:‘€UíÈQ³ˆôLÚ°Cs"ûJ{ٱ毃^¢ ðZd!Í›ŽMŒÕ[v¥,"ƒ”£´ÄYyÅÕ¥£_™^w£&1¯ˆ©í¼«ê+U Óß2°^<åQÚZº‚ñ¤MÔª<©l:郕=(ü9ƒ…’ÈryË¿E%‰L ÉŠÇÌ0àA$T¤…u:L+#0ST5i9j™Üi;ù(f…[Ÿ´:¢Ža¨÷ÐÄ3E)It9[W5æ°À–wFd!HOztÂ1lW^4Óú£jf’ð°Ø¯)H-#!‚袥=A•b¨Q! Ì*˜jN«‡‚ż"é‰pqÔû×øs8’GäÜHHØ%I;„ì4:âMæiF—rÕ£úÊKˆ§ã ½@¶öˆ‘Ò»>¨Ã´s¬*بJ‚²¡ à/f U$›4jFZÁ‚É) ²ijŒƒ²¶ ApP³˜nñt@h”­fŠÊ*hó5ð—·(žVý…YËKËê±ï…QXžì5ét·ªU!ÅÁ¸Azøº¶éÏ Ÿ¶§E‰uo¶x­ïà7¬- KjCPá»”€˜eÜ ¯*Á²Œ¤hÈ8“l¤*ê¥,á"¿:ª=lí3òKP¼2•Íý]ê©ñë hd[+¦°-Vú)¤ô ™=sYE™±DgA(4˜(+˃Jªje-“ÛNɷîº Šr„¡¬’`1‹Ki%ÔûD˜!ž°Â:À«ºûX­®œJ™a3^5M9%bÉ GlhBz„@L?IBÒ||E…ÓhBº{&œ­2÷.b´À41™ãQß­[znÙƒãNß;HRúka º¹:âÖÕ8ªSV*œQJdÖæŒ)]/:"Œ£ÉÈ¢:*+ ÏØv•$pAf%ɸ@ö©$4W³”±v6îœ-|‡ÛT–*ð¢áŠªCF¢¼ÉȈtËÙ*R”³óô˜bÐèY2¥¨ÃC!*6Äü”^ÕF0Zª@ÌGr ,+«©©`žº†)¡Kʤ9dVh{…ìähÌÛJ{ËÅžÑ;,­f˜Gì ÀŽ#qŸ®#‚¿ ú¤NSžDê%K‡êmÚ™C0SÌœ¤á àO%b{›g ‹ŠþÖ»YÃkJ7³e$d¨Ú\X-˜o} CwÚ2U´c™Z:L¨‰öâ}†¶a´O4k7A €H€ˆ!¸×˜p¼Q#gaa¨ ™Ë&A€mU½KÃgC†ŠA±|[K¢xb ¤‘Ykà…½âIPÍØ{JŒ{*c3ƒpçFélȼ”¥#\ZKk©Æ0ÿµ%abŠ6ˆ(„H©’6XFÍqš(‰QqPé=Òª Fî³ Ó.v°õQê?PssH«°É µtÔ𪼉ê×êß^W^é!.mDÊß±ˆ¨}HÐS8 íð!ù'‘Q­D¢â‹ªD´í VjùË BèG/ÓÍÑž½•EtP$× F¾Þ®Ê{I<#!o½‹”HB¨‰+*‰Û:kÑtVñ•Ÿ “ñ™Nж&IZ‰tv7÷+Jé— ¶½\JÀ¹q/rtÑå](‚ÆS5;êA–™nå«ãÓøòË1Á:ÓÕJxøêŸ‚¡þ÷*~P¦)Uˆf¤îær܆N.“ØŒªÕúUrCÉ+œñ"«¨C̈ŒYLù¤’m/b6EŠ^‚Òcµ—BÉã::Õ!b©ä¦”8⢘ü5f™3ŽÂ:µË1…˜VØ5‰  $kxö@å@­‚úœ™ìZ*FÜCŠI‘4sQÇ/%%²bƒ¥‡Öøæ"Ëåeä©PWEEB*|õÄÍ™3n]_8J[D耸âÃ=–¿T³j€ï½éö·–(ôªˆe0„{Zri$èìX5W*tြUyY‹N'O*Û¦%– !ZžD6¥«d MúÄ4”‚ØPÜAÄ—¤2Êo0j9Uk˜>&‰ãɪ,ôš-|픃TôKÂ|€«´û4Q.….Ô›ÄÆ°k/gŒÂÉiú–Ðb¹²R’¼öj#/šÄfJÍ ›§Äe/ž˜© ïIœš™ôHX‰_.\ìrËÔ«p­M´xtrWú[š.Ä{E‹ S4s^Š…»,»êÔ8bå „âÄuþ.¸½*d%äo.(4ìA׬”A,û‚^Ö ‘xÆq‡ ¦§³y-K¦´zœ+Ô¯Â$R>ÕöÌä¯Ó‰50©£ËLŠýN^ç,Ø¢™c>ÓÇ®ªÚ*Bº÷áºåNÐU(nuÍÇ/”hÒEùê›?×*~QªËÁú©YOÆ °šjûFzü ±¨o\¦9”t»°0 ׬A71PÂÌ0'¨jEúyŒ´Äõìý•ácïùÛJ¼&÷p«å8-Tò¦Í—·˜ÒSnÖªÎÔ‚†²³Oˆ(l³‡ê.å‹lÉL¸K?L/Ãt.³‰W¦ƒNM]¿û£¶Õd»Jšæ’¾‹Wp&ºà@µ!±Ê"!J`%’ Lð˜‡¢…ì‹ÀLèÐr6RZ<­ÔW0$Ë~×ë~z=ceJ磈¸Ï7@EâS‚ 0.ŠáäÍ¥¥À›‘sT®Š.,¼¤o²îm&ˆ$€ŠtýYú úZ|Í!Ý}ì‚jÉÂH¸qœ·þßbfÇ …}XU[Oê͘@Äd›{Éêck2¾ü>îcŽ´ÒcFï–´_è0Û>š'­@Öeûw‡ô[POy‘+†Ò«Ò4Ç ¨äO4ÇKáœ//ù¼¶YÆ,„KÿË(!³¿“³4ß0Ef®ÜCë}S±u&sÑÂþv†ƒD‘׬7j ”O‹À>=Í!éCzKwUÙÑò‡U$º!ø3ÇRÉqiêQæ{³ôMC,w1•ùSVÛì9ÊçŠJR3ÒñòçÆì;AMª²TKbrG¤'®YV]¥PL‚*îÓß­s…!=ˆY¶ã1Â_N¢°»hý’wzDd]N+–1ùí;I+ |äù ¦ð<)ðgµaÆ8pufŠ«éfw…£+h_%^ [0!Qúšíƒ¾@ñ†GÒ¾#®œ®Õáý—ìf £.ºÅ“ªD^”çݪD7,e8âêi_RñÊf°Uœ½b]­9¼˜½|Q¿õ£?ZG±×Åíñý^3WªhIf*€M¼~Ô½#|~³”,“ð§ÃRLN¨N–ï_˜Årë¬BNÞ ÓmGbE„[œŸ‡Ã£™¹å²«~•*QX eKqdŠ ôæ¸È3‚âþ¶ãc²…ÜòZ?J·H®±:½§)Éÿ})«±‘笹Êi9bF‚+'Q1ÊÄÑG)!~×(£ùÍFºÞÕ"‡e© Â9hêyNl´Ë¤}8K¯5‡Ki…4–¢Øƒ¬õRòúGž‡ vïièpªë‘Œz³gZbì[‰ÙpcZ£•¹¼ÏÒhý´…ßâPéÕ׫Q«÷Bê ©'° ¡ÍôéO nâ4<~±5od\álçÆDï«–ŠÚAttšKܺ]‰Ñ«–΄H)4äF¾7ñhgU²¦2ä´å´÷”ÿBâ›¶Ö¡73CÜ¿n³9½9¹°[éÏJçMÕlÆìêØz 6G[OÊqelŠðW5úóÁ½&np¾¦ÿT"äZ$^Óõ•y¡!ÀE“÷A_‘·ºU5DNðÕüˆ:i¤çƒ²œ³”v–-囿¸BbK1ëQâ‰ršµåA¯á\%ËHÑb*jÒôVänTd™é龑¾ÙT‹Øž‰ÂŽ=¸ýuz›¤Ûm9™½«=¨QW˜¶¼¹mpW—¥ôó ý{òþÕÕæŽ_Gu¶Ñº"pøØT‡ SÎèh Û ߬Ò׉< ‡ʹѷ>†ÐW#7‡áï”úøOƒŸ='ߘ»\ž‚>XÞXQvj02õm5t¼ +±\<2ØF_žU!55Ùø¸¥D_9æ˜ùsŽZÔõþD*}ªQ›óç)Õž¡~FAN'»×u¹£ÃW+îÜíf}±/„ìOç¦êKì·ð>úæXÙ™Ð6ûË çöw›*mñ2ªÜúÓj)24ÔSÚæ]gqy_Ÿ¾ñïÐ×j©@3ýD^ƒWm/…ZÂrýËGAMâ–Ë öÜÂÛû÷ø® _ê/Å1Ã>Õ†}5êû0Ë/m{{#Þ“`áp0åЗýY@´œÍà:ÆUXÀc!TpÝU™â”tžy0¾É®‰Û?¼l8Krrj‘¾Šš…:»ùôúX:hÔƒìë·¢þ”Ú ×^é­ÇxUûô•ýMÅ£9¶0#ì°ÑOüsrKò'‚ûä3~äP­['•Èw%µ5d¤›Áw¼C-Ë¡-ðš£Ù‹øÿðºC~nûª¾åU†ñîi’+qõ«€×ïoËnlä(~8ÝÕóä? Dô7ó¶ÓµÐ•ö™û6öX¯tÝ­8NSއ](ÿX2ýÏ[pÎåS¹öZî‰,‘õ/my|¯¿y¢«¿. Ußw“ÁÍ4ããeËÓ<Ów?$œM³ŠyÚEEõ:9ûG^°y>Ú~¨{t·0pt{àÝ™·œÃ`õŠòx†¡º©¹5.ùx%ZÎ5SfÃÑ6cÁˆâcá0Ru˜•~쫹 ¹Ðµ¸wl^ªMcöïç²¼ÈNO*3÷£;‡Lv°>©ÛT£“ëZ¬îü7ÇÇ›·`>tØž°õH\;›ô²Èë" ¯å‡œ2Žo‹ÅLÖ~™Äî´Y;íøêLšÑyÉŠCž.ã’¸Š•t¢.ŠÅ 6Òôõ³ñ‘$‹Ï%4FxÛ³~ŽýEõ.Ы;M7Z•=‰ì!nQÜ}¦d·«yà60aN×Í~yÙíOÝrÒÓéól³c‰“0K—´ÜàЗœaj°E6.ñÇœdxÊnnÄHS8UL8ÛNɹY¯èÃoRgbéNß ¿ªŸÌìX!µØ`EaßU#éfcÆp¥piHz—ª,öˆþóàØ Íå_=þÌë1 "l®ëÕµäŽçöh¶Apx•w\>oK«ã²•îu%—ΑS 'i?á*ðv–û*. ·Ïž©ößÎsÄx„3üÏF˜Â¨ï]=Š‹æ³7°Õ¶˜Oÿg²7¹mmÑ›k…jf¹vko êÿÛtp¿Jä ³ÜJÔsvW£¾RnÓU,>Ã}géõ&iz’Ÿ†s߯r¹Y ž¹vcˆo5”M\ŽXÚ­>R—~“ S‹»ìÇ?b îVâª_*‹÷Øt€À“™Qq“Í\>¾bN|ñŒí«³æëJ«$[ôñwcídp){:MÕ†yÉÞ¬.ç‹© s}ÿµý*d,t×~b݉šn_6RM­d§¥Jý«¿cdR·ËâÑ­¸5>Ë#H8çÓ½^¾S"÷/3 8Ò;½ËT¼R¯Äö[e¦ÕZ÷¿¢#Êêù°Åç%Ö&´çáç姨é…;<ÊA1 ü!3ü3äJ͆쉎#ÛÚ&m®öH¦wšÌ»e[ÉY¿ó1‡g79FÓ·ÞEϱívIÍ”Õ÷/cZôlYgü FKN¥á|¯Çþ¡ðGÅìK~0¸Z××=X©=VŠ^æÿÞL`ÂÝH¢¨î×tÞtC¯NZÏß”ÇUjˆÿÜwÆ“+Z}†Êײž†Þª|p¶=8ÙVÇêȰݾ ?)ìúÇüøëå9šŽR qQŠšÖo]`ã1ÉÁâ6n¯Âçcâé:»_Ï,2®{^XwÁ*€ò )6'ÐÞ›}º±§±µÙñÏ_7•—‡kÑÔ¦93Ïô.ü÷úÃåào hŽì´kXäi_ŒéŸ•âà[íó³<Ð3w¸´[^B+Âz#V\qŒèÚ›¬Òr;–&óœõÀ0iC_€÷å&¿‘î+y¡TŒ1 ¤Çb ovuQG|§»d’¼»¹Œ/C¡­û÷ì×A6×T^“m=p6Ùøêo1Å®*1}éô]Õ‡´/¨v£Ü·Æ#i›Ú—³™?—§j#ÉS¥’‡‡ÜdÂz-ÃT³GF™W~ësvïŠ?C3¢¥| Â2/a1Û›­­··(>¸<«¯Ãh—§¹ÖêÐaeȨ•;÷î¹â1Jýƒ²T‰"tñ‰ä\eí¦ðâñ ðTÖ9|#‰DÍš«4Ä•IÄ„À|ƒ5ÂyÓV¹j|£Ò ¿ë–«v›VÄ*ÜþKô„«Iþ+Ž-u5ÑP6·u0[X5;ÜZ„u}ãAþÉø\Üû·(æ5Ô99~ŒïÏ+¶e1QUËÙ…¨Ús/åÚDÓzåSåœ;)Iƒ~¼ºÖ–×ÛqA2&XWzÅ+5ˆìYD¡à&“ØT^¼çp¦x÷³TvÛ°–ÙóP¾à6nðnÊð´K~ÎVÓg”–u+ÒÜô|*üÒÚñ¦íN}ûý žŠõrŽÚZÁ£ÒÍ3G5’†w’au[XÅ·PÕVB‘É”Ack˜ì½3•Öç|~šݶò–všÃ ÖÑuýýÕ¯Hhz|,l(£æ{Y_eüfëÁeæ×jò7 6ÓŸòùƒ?lÇoYKrÏœË`€ïG%”“ÍcVEš»ÔäkꪂVë 7äÛæ:/dìš}ZúìK¨œÉ z¤Èª ®ÈÔPíàíÙ%Xj5^£ÂS|*Èq©?JÚÄ|gÙ°YéðR¨ïiߦ^!VY^óŽ—BÏÔÄlõr•BŠ)EöÐÇgê6+8jo}>Í éÔ´®1Þí—¯LæÒZÖ ¥9T6dǛҮ]Çc"S¶¦açí‰=GÞk±íª«o{„5žDšI¯0;z[b…á2@ w…—ŸÅHoëE‡m ש‰mGuÇ—Æ_hi¶½&,ê4Ã&bv %‘Wyø\lØê/в¢Î¯ÁÇÂIRÑÙé7ªê@sêou »î;<]6.¦ë:·Ø°ËÎ k™=Õœ‚r}6ËÑéXÊäÕnñÍéÒJÞfD°ÇhÃ<ßG¡³mìå}p¤¹ûŠ[;Tõ´Áz9s}¿…`ë&=ô79ä›ËJ  I£„r»ŸzßE7ÓÃ*K•·€2ܰD=M{!òœ¡ç-©àÿz‹;®×̲Ÿ(®­&îéÓ}ܽç.²¿¦ßlýм`‹Ólº[–œ6ú„ÙÓÛT†Ev·’Ž\¿ã½¦R_ŽŸÉ7‹¿ÇWDWZ€Tk!9ðMCc‘ ‘¬öªçàãï&NŸ*&*‹?£U–@zPàðšh íßy.®ính8Å1Ú#«Z³‡ù[ !Ð[OµîÌ4ö$í›QPøvV™ç»ÖýAÞsËe›c“6;ýŽk…LëGBþ4ñ?~îömÈXø³ +¢^V ls‹ah«ºÝžIê5´¼¬BcÑÉ5JŽeM§ZÐ{Žuñ ñ ;€^ü¬a“ÚÓ”õ…ßñ³ä Öá5mŒ(!tÞݬ¬Dõ½Ë Špyµnw›N!=ô<¦´à¯H€½RÀ¤2-'Òܳ-q«Ýúdòäô IÉÇWg½V@ÿ›n3£ü36Úý>½OaŸi8§;4ÒÓ³“sÑ[-_õŸq1Žì?¨»öŒŒ3Eþ½:¤Zse _,f’£÷„ÇiA)QWT󯱻ã>ùçëÉUɹº R…ŸbEýe»_´¦ÜÄ÷õ¢œ7§ÌéW:¬Êð휚fçm×§U×Þ>Aê´_ÇÐñîÝñ®¥~ ƒ^‡+MØpif.§YòpmHopÜ‹5¨@Éù£þ+’y2K5w¯O¯¸Ô‰¨ÜŘQù©}쎟¹“wé$šjÉ»Ž9͆›X¯?S#e7Ï0”lˆ7ž/•׳ø§»uW"@»—µ ~i#a×ë";àóWnУ'ËPXl­VBG›ß‡iצ⸣¼U.c<³\ÉÓ*-$ÓV|\Qp(ç—M›¨x{tÒ>r¼µm¢É Qì%»L‹}¼<óZ'èH+pÆlIãR†¼ä¬GFxBD»Zì;>˜Kí)«íV›Û7+Øó“™ÙáÆfêËÍk.Qo³¯‚¶¤­M$`œw!á”jÙ&©ÒvÀ’]›²Fªx!asÇNßåp¶Žùsú”U»R-)é5¹Gr[O7Nx^;ûA(ÞZöb覫¦™Çùw]ù³,¹Ð·Y¦*T£©S"«[ÉP¬×S> ¦Ã6,Ø>颎ÔË=aE—ê6|Eî*†¸KênSc¥Yh—£©ã+gÊ›†ŠºÂUªéÓíÖØ¨ªR›X°h0› ƒ‡¨?Ô2wžcS™ ²‚Û}+*| #N”7§ÿ”v"LÉÍ‘+#¤áÐCÑV¨ßÈlŸI-¯E»ó­/䄱l=Ö¤t§à"²LQð¯ÎðGâžxyñ+³-LòºîèŸ[ ›˜_I²õ7&2nͱª[†9S*£O•mD‰ò¦î6“Êé)æ­¹¡P÷K5×ø¯ î bXdÉË.É—3s¶šÃÔIɶâwtZû&-º ‹dÞ;#¡”®bQáŸô3³r¼Nª×!©)øY&Î.tŒLÚý²FÉ=8Ó够!¨!*¶&6kV„ÑǾ7[³.\”É5q,÷N+dùìBiÞ®°ûe–"=„êyœ³ð”ÏAPyë¨õb@×Í®ªW/U4.€@Ð?ð÷}<.Š»Wïóˆ,ŠŠx¼r*4Ǭ:6ÀR‹"Ú )”~³a[6®¶8ñ ªÆ› ÇsE}gjìT‹)×z¥ ÆèšÉ âÚµúa!zG·ýæP ¶ )pÉ­šEe, `IdQ›QÎaMòO¶¡ô7i$ ½´Åƒ/¤©ø‚)Ð-ìZfyÛÜD;c¼‡WW~Jz¼Ä–ñGðåb[53¶hbˆ #ÿø‘j!ì8<;²çìï<†³uÙˆãµ?“QéÀý{¤³÷¦4ùx;­äjH¿»®"Í<©¶lÓWÕKÕØItnöø¯Ûä%Ö²ì"2¸¹=úôv|ü ;«kÙc㟺ÏãÃà»(•²ŠW»t^ߔ¥çÓÞ¿gÜÊúÙeþ]2s®Ï=WZóµÊ¹AL†Ï¡ìëË?Ûï(3¤Ì|¼¾Ê¬–@ëòIç™þ¼Qˆø;N‘æwÝiý\ëÁÍòó×ò-a< ’[&(Ùy^ëÛíZ4ˆ3>ïw#WÆ}æQGø»Fèò›Û°èíêõbÕ +90Ö¨U‰Ù¹¼oxÕžCÀy“ùˆ~8¸üô*8)“}ÁLeDæ/7†»B‰Êîø÷ü—îïn]²(°~dç¯ÌWRŒŸþþ Çþ\ãeÅ9¾Ÿ”÷¶c†èZÊs1Ì¡’a¨lUñÔçƒw9QaÄ3ýÐí8Q¹n:©?N4L4˜Tµg?*ÝvÅ›Úà…Çrâ¿ÑÛá4ê~n¾=lnD›èojÌÄÿ5®ïì%‚g–ñþBTÿ”Çg›!ÛÛù$\óJ¹/°õŠYaXEép}½,dLvÉ:Ã6øô¿;b†leDùó ì.&µ‰Inü{wóžâÝ6Ëê­ŒóŠØîR©¾Ü×Ï(†yõÇœóË÷²Jô\rù%w¹¸1ÄøS3}“jvYd8Ĺ^›gãéÛ0Néc@U]köÖ–öœ_O xòóȽ.'”8´³ f­í1ˆT,y}Û†É5ªû3q¡óï_o$wlèF—6krnWü?íáÕ;ÕÛ"rŠçKœ¹…ƒóúùZoª÷ÿtþUŸ9<éÞwÓ°ï<É,aw–ýÓ¼öOb•œ8rþ›ƒ–¾”¬ÜeÕ·Ý¿‰Ÿý(¥Jïí.„€Ü> uÝì&´&­ñL*­ií1T·—\ˆK^¦• ¯ï±Ûãÿ¢ ?ÛT7;)•¾Û{|¿{áüü0{wô$ë¾= &¢ ¹tcüýßþ­“@å}ŽUJ—)ÿµ}Ÿè͘$Œö‚4ª<ß¾ƒèa‡O­ÓÄ¡ðæFH¼ÄVh CßïéçBú×þ†’Ô×=4åiMj¯dï9HÂÜÃùmâX÷¬¢( yzK(Ä+äf‰÷~þv/例ÑÁèë.=eVF-¶À¯/Pº¿È­:9&o|?2BDÃ!½nYW ­Ž˜q1Ù.3øHE‹;]¥çËþô#UªGC)rðLx~7X:fÑ\7Õ· 4óì+ÈüżJ*:&lž#?Ï7ik?×a½Üƒ]›E:Þtl¢dxKêñIQçæ˜žÝ[³6Jfòf¸å)ÒîõBÒ7jÌ㾌Nf§ž·£,¯øÝ÷jfq"«bå•‚OÛö;•õý6\¼áé¯hª Œþhžr9&ßÕ+.G‘qÞýËá/ÿµj'ÎíFÓש¹Õ k8H…†¿t²o—>‚žuø_üíÐ^\;ýîãI¨û·žÕ—iúÎ4ÝáôÉqcCö ¿ÝßÙ ù1 Ûs?y9SÉ1 Æ‹]ã_ÆØ5ûÐ4`üÏCÁ^¸§ŸÛ}þN£ì{žÞ&W… –ñEíUâÂ+òpXŠ_Lzüø9íë~G8¦„Ã>äUÑÛß˼ÿŠGÉRÊ‹ºÚ¹÷Gݺƒ2æG¾ÊÌ|Zý¼nïý¸õ>/‹]ÑGW¿—Õðgt›hçë§hšÇ‰ü[>¿ HØ‹|”_Qg%1ímöZŒPÌ‚~û›ƒŠM–ŸR3©`|²°aSíø£Êùø_¬šDtYs/* >1²œÆï÷þwÇr‚s¶öj²›W·œØ-Ø@,KÇd]U••¨ã÷c%ó?[%òVYŒ N¸Q–H‡ö9)KlŠ;}O2®É\çÕ‘þ+ÞÑçµ8q{6:g«x‰›RÌžnŠûK¦²W Ý\ï$¾­xö·…®iò%‚ôZ§¦›ñÏü8â9HisûÖ_\Íg‡&׈ê=¥RÌÌ&¿¼©ø±º`è)í—ê·n™Zÿ27e±'JöÁïÝy[C/ f\£ýZ=¢m£æ Ž/¬Ôņ¶ç7Öö}) œ?“ïœ4CjN/«s¬”u™ï3ÉÄ1Nϵé"-íš±ÌõEú¿Z>/ýàÆãø)¦Í2óÃÓðøxÑ41|“>Í›¥?Å¥êédú’ö©œ”k¿Ãr2 ËÙà]ó•oèàU–T,œ0Ðnþu¯Ù9î÷O›ÎñÚ CÔüýÓ~¸»®c *µ"ÎÛ’bDnS_Y¶Þõ Þu"SNk‡…}­Ÿd³‘õœû ¼õõz)8#À×W«ñÜ rî[gM:S}•:N‡î-û¥Ýmçsâÿ}FXf­m ²—Pÿ!ä»EçÉ›ŽÁ¿K¤PÞû¤VÃrC/wµÜùGÇq•iÝS4 QyܽÆêE޾£™·ŒzàŸ+×òYËZª–ñ9¶@¸~õ|¸N *ºÌûc?§j·Xr—èÙ\Û]+iÂ\7|õü‡0Z„hXYgŽü(Óðöÿ'2¦þ=ÆÒ“º.µ©2™I‡µ¾¢ÜùZ÷ûNMrUNW-“Õð–­[ðø"ô0ý/ :Z*ìÓ?Çåû¯-覂CË›ô¢b…FÎiz†_¾ø)¦\‹®–çŠzì¢íW°ýüÂK¦¶ÏZŠe\óeŠîÿ‰C”Ž¥Î·ÁPðegÕ×å"óüWïI÷˜ÜÌiX]T ‡]·gMDïL ^/òþ ½¿^ŽáÇqçÝT[Ö6£UR¯ïµd$!=ÇÑ”F)ÏÄb¦%²XyM7\DîI­e—"=›æj’£ís›ê9AÄtž9äû%&áŸóJD&xYn˜ì'VRvküýÙξÎcU3î°î¯dÇkÏàNc yºF'¾­-\³ïwñ8ÆCý-ïvŠcùúáiä¡àö¢L|;P‹qØo£Èܻ֙ú²GjúÄ•«¶ú°«Ôr3#(V -–Wô)Òú=ÒÅ>´†—‚"ú&%r›ÕÝçD1>BU¹òpIĤģ™À¸hãæŸ¡­™‘K‘9j™¯ÞË +ëëšú1æ'?ÑÓ­2¾X^LœÁܰ‚û9mÀ®?¼Šk»ATjsÀÜÞl·±ý£ }JÄM.xºñ’H)°(tJ§©]nɾöÁM „º97Ãä|¬D6¨™Ù¢­Íݺóýæž~õO•á!vñæ!¤K6jÒÎcDé/¯Øð×ÖûŸÙ! Õè9òº¼§0Ïé_'ï JcóUÀîDøí,òWµÞÝ8YÊß§òëæô¼Ätdh›BâÕbg~ªv%3&92$‚¿È·ë©ï]xõë óþZV¬<þJØÏãû©ÏkçÞš¡Ì¹Ý¨HÀTI'·ë>ìA¸A@Äì9ü®ž…GI†#“6ô¹w÷Fö’§¸«¥`cg´îÍíNÙ°±êùÙFã3μpr?m•R N•BëZ<í=¬ ÿßjÆGçÛ}5oì¶T€í±Ð[Ä;«ØÇ§¸Ï}9*ö'ìïuƶ¸Q;U•»ÞéÛ&·AÏ2kyô޹ÊÉOà'Öô9©Óµö whÄÒ&Ûr#ô 1DE…¦Š¢µ?¢óÛBõM à%Y{é‡ÜßwÆ=W”ünw’íyd¶sõ÷å¤Þ£жš–h¶-¡ñÊñj{üœâW5_)ë?Ú/Ê£K GcRþ˜Æ¾ÁZæ_±¾æ1õ.ËãY[_n5¤DJñâ:{úƨˆß]ÇSÑG_"yžgìíEeÒÝÌÐxþý î'¿èƒÕVßÚq¾tYù×תM[;¸\¤>\giBÿ‘$F1x‚½JÚj¤¦—§ì•^‰EÆíj>É,:IK`²aÿ¿{?OÆïªðä”îºCÇo1=ÀÉdd¤ré<È>™Çþ~”óV†%úþÊè/²Œz2ç7§3o[S{˜U›Ôg4ÉjŸÝ:c‘oôõ5Ëoªà](@²¶ÕÔÞ»w޳ØÝc’àÕ2Hù0蹕ÊÎ<³,µ¼ T{®- …_ÏK¼Lân ËZ©–ñÂQ:u ï–O‡éÖ+"ÏG}¯+¶ˆiøÄ”¶§$7Ž&«¥&ÆKþ@Év"™?JþMü–Σü¿(Ѫ±CÝ\™å‚—D=|ó8ýŽ~?…²U³k;µ—6è›Ðáïbæ"÷œ§SG?¿iܳ'’Çó´êÑŸ›cç™?®ÍÇâþhfkZ’È --Ãýe Ÿü×’ƒö²;¾™úÓö©ƒü3?VÊœ™;¤.|b‘ÄŽÔŽý®Ak…d·Ì¹Íl÷ eþ{³IÎ;MÕ¸yÙ>´*ïs"˯=)þiwg·Z[%u®ïÁ•p’‹ÌËEógû×|19Þõ5Ú[g£)§Kú” IÓ–tûJ¥Z iæ_tëoßú•φLÕ¯ev'n³ªç:Wò1EgWŽQzâN¿wÔöïðÙÑŒhVèLžT}•yLñòŗ΋oz‰ZîÒkdÖ‚R¶‹*Ù¹™º½ƒø³-<-Ó ¶Yý¼l¥—Ï–S×-1Çf4Ûšÿ–úrÅ)gzͰ RØìWe0#å1š=4P|ƒe(ºµÛÏÌéý9vöŽßcP¨t%ƒ7y|Ç0å‹#Ùòí«V¢Øe-2fÄïSd‘»8ÿ›ã·™£Šú™ïîÿ?K+Ð’¾{Ø1d€p®;.-øª\t6ÐÝûô;÷jÖ9}´àŽ}QH?IAÚ©yiHt¨uS—+1YNÊm€Áº¾ùüìfâm¹íiø•iÛ%ÿ#ÖÔß©›ÏŠã ÀØ™—¯Í»iòžüŽéW!ùº×ùþÞÔ ÓœUñ#þf5.¾vbŽ“ºµªã4ÏÝ«DÆ/·Ér>söã&7÷¾çù3Ï_öË )‡|†…é´šhÔ»K¾;Ýâ3Š¡IÉ`¥}SF{1`>ÿ¿iS^ËÝŒGþ§õðÂÃÒ›à²æZwMR¼¥ëûÌñ¿~ÅcÜR_ïŒErõýG>A‚Ç£±›%uI:ip¥wë7i[9ÐyÝpye^ ã¬ù_ „ýDû‹!‚±¥òØ^èüÏMlù4WóVi¯ù>‹Çó6dBÿV± î>Ø ¿,_2›±Þcg·vFú;ÓGnºQ·/Ýøþ mOy Ñ›êÅÿäÕ]ôU:É”U‡KnpÀîˆÓm°¾Hylü¿­ ʳÄïb¥O Í™¯m– »ŒÌr·ÜΨ5÷úN“°òqû©6Ò7“PàÖÆ+7¬[^d;½Þ§Zë•„«­ágÃ!:¦CîVŸP†½e7Ì㙦ú¶õ;ïébzriàìUS©YÔ÷ðnh;o>üò½bO«)lç8ÆÏù¤ÞÑáaèï—<÷âžq‹wyY4/ÝV“_"›Å²vcì7….¦üKDü’]T~žÃ§?ë#ž8l¤™CZõ†Ù_C‘fWã1É;Íd»ÔToo•ñï¯ß æ|ØèµÖUŸ.o“³2«ok­¥¾\g›Õ¹oÙÖœt½Ÿ÷ÊÄr_€ñt‰Ô°ú(±Èbb€„s,²IüœØÁMM+”øw#+/°8€Êû©c©KÉldgòm¹}3­K½&OÑ¢ز¾w¼mÒàz§o¾l˜·'‰4hŽ]íªþA¨X·7›3pUƒŠù§FUÍt¯S¾’võ›ÐS†©ôq«9G¼,V;ÝÌV¸ÒïúöŠ‹0É*Õå ,Ìše]e9üjäÖ/u“’Âo¡‚Æ^Üꉾ±Ü@Î;¤öÅ €„ÛE«ôùŸ÷ûf²(ÖLÑ׼Υó¤ÿ©yôï!ªWþN­]ðù2ÙS¹¸sf½Ð^›®sRÞ¸ÎužÉ™n?"ë<ËÄ©ÉAQø}ñŰÇ%Üû.ÖK6 3ç³G+VÅší0òw¾8í,³øuåN½émSW¿—=»‚ø‰™‡Øß?–#ÔÄ—7Rq‹ÜTòunµ¹ñº²WjM\«_+ŠÇ²Tç6u¾êUÆØ}(oŽ‘ñF‡X~Ð@,u¯‡mù_€„e7Ý7ÄÖqE.tæªKwÓ9t\n­ý> ùZµÈh‹7™ö:nW_•èM-îõlk;°&•‡§°—™uo%L‘¹ŸdŒcÚ ~”âf i ÄHT˜úըܝ«Z.8D'&íC°ÌûøôŸ÷Ï ÛÐÍë6t=µ&¾Z´]Cö‰Ã¿«ƒýéþf{‘ ËÞ«ŸÇ‘~?`µèΜEÒrôFZ»Wγ)f©§ÉÊ }*“å~Ϥ|­‹…ƒ Ôàdf c6ê¶ º„I(Š…ÜÖ¥±8/\3^å+a£ ¤ ¾¹é#7¶dÿÞMÁ4™kŽû”g‚C=? ñù‰÷q(Î8? ø˜l È7­»[]¶µ7õo kþ—ì·jÙnöË|ØJG^Õ ‹c6òxns.ÌÚŸ’ö&EÞµÒ_ê!:àٞŶ1ƒºÃoJ·Wq~pzóâgwµfu’Çbßùœ™5}'ÿç´Q™ê,nÿ¿ÈØîö'qb«;…AøMu¥k•³ËÅûÙeûäa<0ök´K“KŸ˜®|˜æû¬1ÝeK03•Û×bê›ùà!·Ý9„–¼‡½ùWÜN@ÝBžcM‚ªW:‹²OµlV·<ËǙƚ’iWv˜£¯ü÷ G©{èÄmâ󨎾‘Û=o©×9ŸeDþÔ糇‡ÎæöÿXÝ…ÚÖû¾LùÕþ¯½»H̆ýð2 q(},¾—€_'öºF«^ÑÅß{úyeÿK…[³Á>?¬ÄZÑØö*&aI;0þ÷𼻨çM¥K׃GK_¥Œó5gÆÚÆžû=m¼{ Ÿ¢§Uû¶a &Ø•‹Ùêü$?kƒšŠ†—#‚ù«þP¯’‹ ›_’’|˜~lõÂhi±¶vl2Ó”?«ËZ çéG¼ŒìpZð…G ‘U™…?¾J´‰˜Âÿµ:IÕòJüsQYçêþÔCŠó†Œfg3Ù–ãõ;PCíÂý±û˜Ž¾Aº—>jÚ4¯žþõØ:àåP7Bg7Aäæò_".øÔüµ®¶Slµâ^J /wÍý$^ý䜓IlAòûã–©1x{Ǭ¹\6û™üs^ù³”ŽOÂ[÷%Q—ÉC/2¡ô2å_0„$›à¿âÌúùèT²E!cd®s lFòÕ2II~–õX·ÚªRHï|Õë†]‰Ë§ TKê*Q¥w'Ì´>EÒ¼lÝ\¬?GWU9êaÉ ÒƒlÒѱòi©Ø ]5³mͪ֕º©ªLˆ‘ÝÓî\,ÊܰÏM×¥¼³MHbÁ¦YrSËh‘ºŽ©À;pžŸ]õe6Ú · bªOÚ¡Ãäg9q"¸UÞääãÝÒå‹~Òço~.Œ3 €@/oâÿ2a>„ ³l5'öª ~lòQ2y^ Ý: N-ô’ÎaÌkkqŸcO@Ò,øíÙëêÏý6:„á(€„ñ °á_\všeÝ‹&íMЉ¦R.÷mtQ:ø|wãýUT®Y¤Î¢rŸ z¬¨¦]• .¦«ã¡%Éár@B6\w“Õ0rG¾ŒN”ì2XxIršóËù1Ò[:Ö67NW¼Ý¿$ß&Mhæ© Ãbƒ¿áëR¤¹äƒP@&žÆyÝmV7‡õ=ùXqžMZ88¼MØÌ\žöɬު=(b¼š^°o¤\ 8òÑëÒ† Û¾6¦Ý5›¶KèR|ª¨@/oâ3œëd]nOW‰w)îVé\i¬å+vå^>8ý»ˆ&ª/3^œL£Ž±µmFß]¨ëÃç›A›TPæo@B.Ìnš¥˜ºË÷“¡èïBV*]ú_|1RüŸq9Ÿ³^¶[rìd†ÖžAÒ%¶K|"Ø@,æK/ …yT‡³Ü¹*jF*ªuׇïþ-ïž•Z¾|³ÿa£3ñÛ Þ×KÒè J~Ú¦LåFAžä¤Ò4Œñâ#ò£Ôƒõ’|ëj_e{ßiÒ»¦G“Qfš§üb­´ÿ_Æþ#:<‹„ýH<´Iô÷ëôjùÈ.ùíGòuxÃìY3_5KÑAj©\žÉæØ-åí5®3·nlOǹ¬æKå‚2Åí®EþHÓø|¬^â@@/ù ^_Îõ©|îæÕ»wëä3xÌê ÙŽ¯3á}´çêŠNè·Uø”KÀ!¯ÌqMa—*ùÜ:ÄwF”“Oš €[j°¬> #u²åÒ!ê 7×Kfµ´Üç:,N$Åû @,oeD—¢2–mMû²sxÛ#95C¼/¹éž5þ9*]š’CË{Í®Aù  ħ†Zf»Pß¿¼&iÌ^lƒS»^ü•q­Eöš–ÆËÕKÝ+y+¾WhÓãéo òÝþ,ÝaÿÉ“Âf¢0‘Ÿ8³ ƒ_«ÇÔú!ënüw]F;ÚÔ=Yæm“Å‹õsú_îÿõÕàVÈ`¶Ìxô÷0Ó°ÌýÑÿsî§$Ñ·”¦˜¥©“éž'[$×/êâž+Œ…!¥w¯¿ò}‹À±»fç"˜xœ{2:hµÅZø©‡Wzß7¡ô±„™ýë×Û €Sß& Z4¯€épp›Ll‚›~!Úª%Gj{la†‚ô_ó…ëéíBۇɽÖð?š°óÁ*;ÊZ wvzüZ•t»Tl“ Ñ„#Ξówo¼à«¢#-a¸¨îU Oÿ¹þ‚ÛÆÄÍáÎ5= ±q@€ZE¥`)½žÕÛÈtŠÃºzgf*Oã¯rÙ‚ïÕñê¯Å>Ò!„ÞÜ÷l¸’np ^—8Üú‰&&ùÖœÆeÑûÀB/“¾~=ÀùxO›nߣ{ovwvªØ- ÷õ]É>ª¤wÝå_%{éƒZ¶®ˆ e޷Ѥ{è’Уgb#·/øó©Ü“$wHp w>‚I.#Ï][¥ ‹¡‘k¬ü ;J^Rü²üx‘tÆ“®e«ÿÍçg !¶7˜QF,´åtÔ]µ†œ»ïhøÈË0+œªÈö7`€Ppä0\a[b £½í|e¬P¨OÓ“žöÅ;ädîã²66ý4Òíl ÿØÕôãñðkpùòH[åk£HbüN@BjÛØý~Ì¿Sç«Féí9âVL6öæEׂW T—Úc!¼ þjÛFÕJò¼öä;®p~ úfÝÖO}Žå),ïLJš‰­]ð-ù­üòÊN‹Pí–rÃÃ-ª³OCp÷ßû䣌'‰É®¶ùg°»xn’yºcÛ}¹ª=Uv·Ÿ}<éÔ^%J¹Û½Ã:aO›ò·V¥ZtnÄåÛb=Ú¢¿Z7QÝÌ2u{Û”E¤u¥X¥zוxfô¨r~dÿÕÊ"tÕ– Òf®<‚Ÿo¹Œß‡'¾fÙã™ÊD®^/š¡ò®kjðZ"ÞK©rù'ŠW²=úŽ.X½•Zñ•ÿÉÌÄÈó«JC]ÏÚÊÓšö}]o Së¿öe2þQtõ+Û¸¡‰³Ùòwߦ?>óÏŸ>ò㻼’ uÏúBGó‡½E äHB¥?váhâ®üiK}{cMt®:,cFm¦¦¬Þ$âƒb®ß"ûÕy rƒˆÓ¥ õ®ÿ{E5¶½ÙDbîcv®¿{Û€š™ß¤Ìøú~BÊk©¤¡çßý¶ªç;“H{Ín‹Ÿ^†ÿq¦#ÅÓkß¶8w9*õ5Êh E¥GÇE_ŸÁݰc°w~$ôä˜ù4Ëw*Mg/—‘w&56„îþuøö‡ñhÝ]’Qi^' þ÷ÅÏÄ¿rü_ªAR½/÷z½Þ4«$m"Ô±y›câ>v«›Î’ûÙÛ«Y£œõoäô ;g¸³3óò)-³”ñ¾ÎÂï¾w) í¿:èÍðzFpnÄx\.&>#À粡AÀú]!*xŸ\¾5•å?·7of<+J^”õÎG!!:N·ÉK³ YÚ³bP~žŸêÿ~_U±á‹umX¦(”8ÎÈð>(Ê,ïÎ/Í4æ¯ÏÀ"ʳ½Üþ10½™²[ÕÇò°8šŒÝUN÷£ÔÝ¥±ÛÖý}Þçwî$¬€Ò\LâüµóJ¨íáw=oz&ßÓ¬¤[©Ý áù#|’ZèXÙÞû%n>ýpì^Iùi)7±;°¦«8êyDç&5ßCþys>×_"‚~Šò^_'q¦Ýz³Ü=¯oy Ù¯Hµžç øØvûô|y̺ſF"2üöqjæ7¹ý•‚×;ØÒ]îm0«õS°¦dé¹͹9JÚG=D;ޱk#£Ö¬8Ž­ÿAd`ø0õ9¯Z¼ú(?q²Þ‰fÊÊ2Js·ùâeœÓxs9º£2Û6,–8Šu@¥²iéñqku(äŒLÇIݾœ6iø»lTfÿ¦”H'I”üS&~ù‡“h†ïàé†ʄèTf[¾we«^kæ<“Ú+T/,ªó½E¯zQÅ·Ïü–޶Ý9⬂Ïçw‘¯ÔSÍøÜÖsŽƒÕ[œËó÷™A³‚hœ)Þ^>ªmè¨Õþ*bÝ™I¦˜g}ìS4ù–Úfú³äް¸ïP=ÿ¾gœòÙä÷ÁöûÐÖÒÖ챡mÑ4~¸¯É©*"ù½2žÃFü÷æl¸É2N©%MmÒ>ùcðZU”žêõ'‹“è=ç§õY¹¿:j¾{Ãä°Môm=,÷˜=Q•~ÒJ÷zÚsןsƾ%Ô¼ÅM2?Ïú5nˆÑRæyç#±`áXÕÛdLýÀÖ÷äMüuZGa Ÿ‡z¹©¾Ä®QÄÚø›5ñ/Í&®á|ó°{éäÚR9lê—/‹á×Ζ/2Er÷c§eo®{çÆ-ó»tœsм…½õAoÎ?G³ºd¿î,»Ë¼Ç!ï´¨±~ØhÚR(ˆy•>52ÞÌ6ÅÇë­WI/›ÚNdá»ÃÅcûui¶vG½fkÏóÆ»’ãÖ`}Úhæ;/Ää¿úÑöå›'áì`zfP±ÿf¦Û\Žçá>9g7ÐÅ1{ª`_èòA±Ñû]ïå?ë½´0oÓùq‰þÝôkGŠœ½A‚våËmDç¶mÛ×zº{;‘1¬l<‡‘œÐî¿¶³à‚÷®}ù’ß•¼WÙ´l[Ïú´B²‹Çê»ùÐí™?ÿì'Ý:ˆ®ï¶<ÔjߣՎVy‘çeEC꿾ˆ€hå¿1ñ¤%Øôì1ä/mh(€ˆâ_0!a(8…ÓGêå(sg6¥2kgný%L­0«i™dðü5~¯ä'ˆés‡yÐÍ\cL¡E7EoØôƶ~+&exË,¨mO¥.‹±­ – „tŠ·C¡!®¡ê§¼½‘ §7{z´ ²àEù™åVÉV@ÓÒ8Ð3ày’úæ¯ýߦÏûâ¨ô&MH«\ƒëîilèZx¹Ð§â_{[ ]¸’ðÒÛ½|>ö¥#š²6Î ö,¼¤…[S w¿«ØS^1>G4÷˜dÐ/¥\‰Z ØŒ—•ˆMj‘,­1ϧi2ã9~{kô9 š}~õ­ÇËñwàwÐlž9í9Ï·„õ&ÒïÜÆ“¢Þ²Z Ú*UŸÖ¶òÿ©};“×<®>e²r¢‰‡¬Ù¾ë²r“!®YÖ Èu„ÌŠ_´Á.ðÓÓÿãpR7ÍÞ±ßøúyöoƇœ¬«Q>B .¡im£iÌØ”–¶&,Ž ³ó)i=D¡"ösöä°b \Ï.OM›G»I³úf½ÄåJ¹×NKïDû¼‘óçUiò¹ý[¤›mïó¤roHñÔùYÄe¸«'* Ä’nÿÅ…X¨îdа ! MÖ¢Ãj—YDŽÂ’kõhíºŠ:ˆí|꘾C3ËÓÐï?DÃÑ¢¼X*ÞnEæÖõÇòg¨"»ÉðVŸ²+¶PŠåÑ_aÄ^ýyX®I¨Šô(­Æ€¡¶Ö¯“ÔEj"¶»üõ:WS–ã²Zßéò{¾Åk©Äi?9ʛ߬‚ëYüv:;Ζ³Å»rp|³þ†ÛžS* ¥1ÈXc‡ t{*IÏÆçäPå[0™¥ÕQyzD?»!×4ŽÄ6@#S}OTJeØjÔƒQ5çy)§°a3=}Fòlî¿v¹‚%-=ðÿ÷¿¥ñ% $:”FÖÍØwC²öz_r€ µŽ¡mù‡·ðQIš=ó¸wa«_½=t†3lcQ…/Ä‘ÕUÚ·Æw4\êz@¸€›ˆÃZ)ô·0²éÖrœ¾¬tàwëNÆÉ©¬›*§¬¤¦ÿ(b#ä‰Ïë'¸Àºî6vá„+UBõ¤Ñõ«Õy@ŠHÈn4<¿>;)'›ql0 Ãň8™P‡Š¢?þõÌÊçÆüÌd:D…vjË.”"{# ˆô5ZºWüímüP‹Ië÷¡w–êat^JþL9DDß”>’…'Äì{ï8´[Ï‹«góh^ïúßã~ÏG£˜o†»B§ÿw2ëßUŒ|äÌÃ8` Àqa,üêØ„H¥ƒñ-–†ŠØìæ•ûœ‘»ÏÄì9{·ר~^ÿ”–ý»Õ›ù¾í计ä‘]2+Ð"°ŠÌ"½¢+Ù"²Ek9½)5$V:ôV.?~ífUE”VÁ§µ_kQZ^e ía2Ѷ×UEn­r+CÞr6•žU_Ãøhˆr•@óÃàñeû&ÖhÖ¥ Mki_ù·²i+á_,dš]§l¥{£0 q{§QôÜí˜)CƼV{B@\¾`YKf»¢cÓ7o>45'¥ Ýæ' I:¹ª†ì¦I¾M'T{‹ÔNÈt{«Ã4àS+*3°•×àË!Pn¥^®3Ò–õËþ³k1¿"¼xŠóâ+Т²ì_ÏiÙ¼äVŠÄÝÿTW4Šõ+DB Ñî[­f|¯›¶0&K­«jŠä^‘÷‘[’+í¢½YO cŸ­ª+3Èm6ìesEi¢¿Ÿ¹«à{¿ÝýÚð®ÿOvÓHÿ¸#cˆêïZúÍœ„­‰Ñ¥¨ý.\F;SŽ#þ‚“éú,ÝJnh÷‘ö¬ß‚[ß|Íq-À•× v²Ø_µÔ}>ʉ`s¥˜•2½C]yü¿eHˆ1.3hÄ«'å¡MŽ}ˆ/<µô6Û\óA÷pfåFÛ­Ä¥EF|¹î&Â[Ô`„ýk6*­Òâ4DB›f÷Ë¡xf$]éW©Wıs/ÈÈ2ì}ÿ^Œ¹ [ n,L~¶}Ñh㇇yÊýÜ{VôAµ-±tûH˜,åÒXñ6ô…æéÆ À„k—xN.™k§‚Ý¡FÀµN¡¦ºÒøhª §nÖ¡ëìÛ˜Œßze,wX6 Hî_˜äIA3ݱúPî2ñÙÛ‡!z À ™¼ÌTj®) Z+„?ãž)©þˆ&8-Ä8 ÛõIh/˜ï*x–ç?‰+Úì0•8 ±ñÉ]kdÏm· ðöŒ—zÝQdg#Æ{ZP@/˜$^¡‡%ôÅÌÌZé+¿”:-jZ~¦Æê™¸@!ÑŸŒõÇÚ.4^;yYê,ëèÊAao¥Ô^·¤W¨üênåB+Ë¢·Mã̦å#Aõݦ½ÁÖ¢¶ý³^òz²„W®Es•?((xÞó°õ"dhM É¹Ðr»ŒíY“g& ¸,™*ðÛÕ"+Q;aÜQu.Ë,]¾ß¹­;Ä0µnTVní}Á |¦üd¢Ô|ô–nˆð2å ‘š(LBpìôM…TËYp§€»cz“µ ñK¥ÂÿæÛ­Vå¶ÏDϘõòY €‰w |xµ‚;£Œ M|ŠŸÉJ­f}žy¼½m¹’Ê4/»¶æ:²¢²•äÏu;÷®wOv fO‡üZ„w1‚ÅqÄ'›XàbB¨ø,Ӧ̵Ó?ÖäâþœsMÔ¬ oÐ'ä!, ùµ…ÆÛݧ-¤&og;Ö{ŸŠœeòBWaü¯´Žbš¾‚ï:J‚„À.馲N¼/B>—kíÍ…"Lwup‰é¡T &ÕJ!D­Ÿ·ÙF,ÃK—³à&2 kïFWð÷˽8™ÞE‹´l~1hØoµ JýwúŸ9ÎtKÕ›çïªóC¤©¬å𿥠„€ÿä_ÿãù6¦ó¡QcÙµclØÆ¥ÖÃM”¶dT›BÑ‹¸ŠÉÈ™Á‘¿¨—y;}ËùµÜp4´šÇa7Šöòn0é[¶ù1¾Á·¢±£ŠäWÀÞæç¼ÿ?™³JãÊ¢¶ôVhEzáÛ=nmë´Õ5ê$?Â!—ÉÆ]Ÿ¥~C…><÷–<í¸…bÓÄK Øa>Œ·û㓳š¿¡ÃÀÑD#¨Þ™cH¼Yj}ævÌ1…½/¥û¸&ÐNˉ@\ŠPêVøÀÂ$œùÊ`Æ0(dW·ù½ëT†(?³#î0Ösn·_Ocívj;äC[vºßÉg·µÅäÿ°—zîø4»ä¡éÒè—9‡µ0EWPµÃ!®h€9í‹ÉÙ‰|ð®¶)?K‹£x„5xú ,£Í„÷u¬è¬ÇåñÊa"ïËÙ–{‡)H8_‘ëÑ\tÿdoR—9œtÿ$Šä‘Zä"#Q(šåR‚Ȧ®•öú¬¦³Ý`¢Ÿ àð A£U)A"MÀ)$xÌ^¦@Úwø·x [­Aíú|X—=˜"XDÈzÔ © œ"©˜[9nÌÕ)C JO1qf‘=Qwµ½`L:ÇzgݥѦÔêˆáI¬õB‘Kq‰5ú)¶OBýoÕîcxE'… ©k©\Æqh ÷i0Tö¤Ø‰\Ù@ó¿Rùk•2ºg‚r  69sDuŒÊÑwTaµI± µ3ÃýÅâÞÏL‚ˆ‚4Å>«):Ò‰×"¼–Õ‘ï°÷+C¿åêìù)>œh!Ùà,àâàØ÷YZ`,é–⌖>CâEX ¡)Do ÐîÈÇ“P#ˆK@ƒó2˜kÓ¿É‚,gغN=¿ríùRkWšÂ´[M(‹ÈDáZA®üïÞ£ W’%Ï?@—+w™ Q¾*7œMæËÌô~[ÛËÁ_î¨ãÚñ¶VýÈûí&kd å¬ÂÛy]ßÞtÔ®“%ƒ•íî\ÞËt·ɹ>jAž#ŒNÕ˜†=I }Š6)gòá°bø”(ü2žn¤YêŒô‘;¡RÛw"L]CÄ PÈaB@ö¹Ù/÷ö.“Ý0ŸÏŸkäðôÙ\‘YzH®ËM§„Ùa=ÍÀEdÊÄWd˜ÕÑX~ŒNÓ½çwªGá°ŒŽŠÁ­ElQ[z+;wî¤l¼B+M¨ Ó"W³EdŠÒ5ÞîA¼Ãùþ>äÜ–Ïäm§¢X©xÓÁÇGÿBHS¯ls¼ 4 Ü¥ ·ªŽFPXh3{UEÅ)e ÛOØ:Õ*¯À­¦`%ŒNfFb¨›"1„8ˆ mocTÔÚVðýº±½u LwwL…"ZØÞ:ª¼wQ‡3¨B,Ö¡{ÏðÚ#6ýì.LLò¤n£Tâ …¶I‰NLY´¢éJB>hi‚n½³W D‡0e#J %1SO›™3-o\ÉDtŽÑšSçLÿ…zbP¼Ÿ¼x…Žh¿vÆhJ× zûÒ ™ª€"ÈÑïúŽEý^KU Hë†=Q¼¤–"Êü¡¼ü¥]SA LA…Š lÁ„ý­¦´Ep5³Ô¼‡ÉÜÞNÚÉm2>8½kr¼Cy³ÈLÜ;Ò9E,&®xç”cf*.änHÿHã„P|xŠHRäI)`€º0N ×?hŽhªœÁÇ”I#H¹(J(ˆ‰­£ªH‚PmPªCoÛÏžûé¯WeÙ›‡C{C_Eqëèa4¼ÛÖ;;°©Ä›y‰ŸKS0iTýYig:.)È™³P]%qäÝ›R”ö¯¾×†XÜ›àì¥ìÍg ÇÀUàeèAÔ¢½Ç)66 vºOJŠÀ|0›n¢Yóèú'$°BÛAüp±´}aéOH ›ž[ôü\ê+AYˆ0}ü€èQX7]GÁq™+Ïý‰˜õ㊎‡KöÎIÓ›!¯ç” 5½jd ™Á÷Ñ'ÌKP„Ÿ°Z [ˆôÒ¯þF¦fº³?(ªbíë15zò„Rÿ*Ľԡˆ!d[!Li@,zIû3UAp!* ,û ù/EÌêˆxE{ tÌl‰ñÏbVµðQÈɨ&PPçÓ<6Á}0$¶Yj¥åk_™DI~<4'ù½.<¤˜—'÷ÿ­©sÄ=)Š­ÒYý„íÖØ BIáMÉ5þƒäj_M<‰±Âá)ú È£Y£“CKZö³Kƒ`OÜÙ<4ŸN‚úêT¦ªƒ¯„[™çÖ”INJΡáàÔØJ iLœÕ@©ûšt]gf8§ìG˜§¶Årb YA· Tè2©Q¸³æJŒåJJ~côD¡éìBhö>è¸cƺ·ËÚŽîrûÖ?Ås‰¸º«=~Ξac¤Æ]±ÈÊ­™^D(Ðo& ¢R>f£JBm I?šSþZ ËûUŽË HöÔ:†Ú¡A"›?]QH EìÚ9dáå™ä Àã©IÔý²þ£kÔĵlº+¤EfIá2(¯´Š×¢³rñ´ ¢+Efö#ÅŠäVaàÊV½Ðìç™#¿"úDמ©ÂbŒ(>¤8@~ i _J~G^yDd½š ÿPSóùŽ¥7Ui’qhÓH'[ÿ*lÊVs³Ö€…(•ûjýIíC>à-šå%jÀóÈÂDËX¡R€'±¶ôt|³tt‚Ôf`‡K®³8s¶§uJÝyKS…GŒˆ¡Ç´Ë¼UÁÅdr¹þÛHÜ ºêל¡Ç ~5¥½6B@R isÅ"{vʃ©2ø1õÍR0§x¥#MœÁÀ8Ã@. û‘ž7ü®Ñ‰´2raŠZ"•WD¼],< ·lŸäj® ©<šó*¢¤BtÊÑļ\¡ ž)ØŒã0gHøåÆãd¦­eEdÕMÉ="e)ôÈQ9žÅ:õ-SRñ²TžyÂöÈD½Iè¹Xe»€©ÓÕ* %@Ñqór ™·ëU$Óƒþᣛ9‚-d&xÙ€•üªy܃¢ÞRȈˆ.…1* Cßi $€Æ1Â9>y|ù?];œ¶ ÉÏÖîÿÄ»“eªTþ¬*¬Î*k”œ5Aç )\sâl¥ÿd¹C'Ú¨ÿM¥uª‰M"ë'dýT ½YCjµ²W\®9.!úàÞ`/[!i3^Šê\„ÏúùdûP¬|•/V9Èw-D¹›9æ%g$‡ë„8(óbº”žšSz‘‹€³Ðàþhм„Ù¢ÉëQÒ:[ö~U|˜º›)Uaˆ™Š¤¤abØäˆF(§yTLË‚Ùí 0rÑ3zÖ¼€&rìêw~=‰åäíÙH=h8hô'iÜÁx€á w±¿N<<.ZítØêPc hHKœÅ2b€†‘g"ö~Eñž|tŒÔ7€T÷Ú•!ˆ„ÝêœÂ (JKµukÓ­Nt#Î<t%ÞH;ƒ'Ûßäg£ÓŒßßÌa.\”Þ”4<+§ôq𣂕*,cc¾G)^;m\³Q4¬zžU)ò U,k•éjX+dXH×ÚÁà†0 f-@º÷ö½"Àf&pE<ÁœXîŒIá@1—õn3L;EWl³û²ÏÿÅŸK‚¥ô§Ä {ÈTH“%e[käaí¤°öüÇíØâ<°@äµk¾Ëü}^KDc®ÍI>Ïk¤³)EeáõÈ1i·AV8•gÇÙÔ亮cé ú¸¸X¦aà”#Ò:÷«šŠ_B] oL_£+ø_¾mM`õ`”O9‰Ê‰¹"’»ª§ÒP"%ñFR™>¹þè¡Ü¿‰È>Vª+¬§¥.„ï`:ï^š+À"½Z+GÇÉkS#¯Fš’+/û ˜EdnøY<$ ¸(&åH¤¤G [ÉüÃÆ9^ql[}§2Nf<£¼wÝúZWr7îcñ^~^ hö"ñI ÕpðNàÚ!ëPLBÜUÎ]õR ¥èwß*x 9¢xnÇú;B)"P ñˆnƒM]­Ž4åD9A]"ÌF˜ú£A­ùHXENÕˆ,¸ègÊ›P[P"%Fwb/ÁÖr"qLwsÓš”½b•-g›4P™X5ØS£4T"ÓD¼nrÿÇ´BY¯ ™™* Ž…PVŒâ9s R5MW‹¾_±—Z…ˆõ1!?×÷²ÍÃlU8ë R°yi2Rw?`È©i+ÓãU¥b ÀDA(”Pv¥Ê}ØÚ¥£Nmx æÁËNO/#m€ë?Ùy8ŠçÒäßïοò. Ã.™Šø°‹ÆÙAoIùçxÇ…ôžîšƒ¶½C¶ƒO^Òñ½hÊšt%í’r¬’bØâ€RÍ]¿îkœöfRkÜ7øsZô‹Ä>#37RŠã²¯×Ã"´<¼XЊÓEaüÑYN~žÄ"½r+󢿗ŠÑn‹­u-,ú),‡{íI‚ˆ ?ƒ!í‹z*<@ˆ’M¶¿ð°ä&H¦S XJ¹DËÏ”jê‹x"îSÎw¤uŠJ7êÙ`1¤ ‰šYàa /´«"î¡÷µF´À;°O©TP{J@ý”aMJ™ûÒ `]ÅÈÐ 1ÌQGò¸ÃC™( )D¯‘±l#.Ć`F€¤“#ËëàUWÖNsÞ¦¸—¤ÞA&g %'`Á tØeôµƒÀo<1l©F€<¸£& KÇ ‘)Õ¦2ØB Amü 1“å¶™OAL¼r ñì•+%9E8ÄéJº¡(25)%ÏÔÅ uàÎqç6_§ô©;ßb´-I±”× DJ•gG‹+Ì¡J 9ݦ±‘°¸Ày† |:(ŠÖëígâ`9øÜ ž¢Øa˜)a«…OE…yzôo¨¯Š  šyD¸¤QïûÖÃF‚ ©‘ µÍ0RnGÔ„£IÏeBÁ‹<"ž­]‰ðÀ4ÕÔ( ‘ŒaE×]® ‚¾ˆŸ´ÙÀQ]æaF`m¥Ò›~ÒPxí¿~’Ö+{ôöÇeè§¥ p-û%f<¤ªÂÞ“bõó Ë]òâ…6;5¹ 7Ú]v 8‰üáXÂãå ¤è°ÔŠ C%š ƒ)Øö(‰zÙÂ2¾s¨+ü"°ŠØ"½r+𯰊õè­ ŠREa¤Šõè­Lÿ›!¢ª+ÐÍåô’à`í üÑ_ž@[ã°C¤ƒ9a¿Ì îäëQ‹Ù‘[r+tEm‘Y‘XEpªƒ„¼xóà!w^/G0û«ˆŒÖ±ð‘É>sܹªfÈ ™Ñâ›}‚í«Ç3¤>ã)ÊCÑÅ)¤à,.ó.ZŒþd<Öãk†²²Iß@uQ… QT}`ÆÍâ™íÊ­Ü ðž±ˆÚØ?’MAH3¹(/ÕUœÊޱAøz£–Bšâоùƒ±7–°{ @Ì$Ĩ)9%'P¸¬¹ºýv^†gy"¨»Óµ¶7Uþ¶w2Ô„:ë µÜß(ñ1±Wá+÷¸Ò fÍ´7Á÷H«×ž,€ü¤kŒ'nð°'°‘œ¨2$pIé×”Od&ó*Ðþg¸FB);þÓ¡‚fCˆ‡eë´û' oY¯¢¢â ¾íºÍâÆ–Î¢°Ö[´‹Ñ6:¾¸Í®!ªF&KO+ð÷N5Á„&¼£˜R[J(e7oÂ/uß|’Zܯw Üü#žþ¤ÙBm0¼ô2±•¿G‘™ß OŸ´ÁèÑЙüêÕ" #'‰` *)óvÉ' É´žiˆ5ý)´­­¤×ˆÓæ8¬¾LP ™@Ÿr?Ò¼E w¯3‹…š“Ç6ELy—~éã»’©ûãaÙkÀùÄ8Qþïn#Úö<Ý+ٌƣ‚@”6(lŽ­LYS]ã‹T]Ýv =-zÑ"›Æ¨ÜA«+ë) 3⩇"œ@ü-•H=ñÀ=i·_'bJBNçÛ™k®ªF‘ô ¨Ö6W»Ef£Y•ò?B+碵^„VH¬ëö÷ˆ­$Vµœ}è*ð’äy)rÑíri?z+wE{´V‘Þ@(|]§uÖÍv‡ë€¹Å–ÏÓmh‚6„ ‚`½sy™×T w÷rrpþèÆ”ªç•®s[¥"€?e0DL YOýe§°y 8£Ò+/)dÄèŒ1Tr%÷›ðË9ë„IM2´i#šÀâèBMɇ·ö™U:²9=ý¡¨Eáªs)Ì#%£Šgä ›”³–âLHOà¢nÎ`׈™cü+aukÇd”òŠæŒÜöj—r’XlF­À›r6¥+;‰*u Èyî¡“2™`ù• &£L¸’‰ 1óeeoÊ+™±¿këKFËPà ?ˆ9¨{Ø o= :hl#@î)ºÖ.8ÚŠSe*‰N!,˜Óx(˜¢K"TC–³G;²1qSÐ,“U¢âh¬¼ ÏÖ2ÌÄì”zSOT˜sÖ[ÏP/„zLȃ–ÁÚflA½Vã§{ºËÏ8™ÿŸÕ«äÕa6qq¶¿}b|,ËÈP=ίq—ï‰cF&úfB±³Ðܤ‹h>HÀ5ñZ°¢ £=jèXá_Þ±û¸“â€~„V§Øï#®­ ¡6Ä]{š¹š à27:ÕJìkÊâ*b4ÊDì ¬Õ¬¬Ò”B‚ÃÌĆGE™Bx?,»xëã­’ßàN‡ü“kc$ò 4| F÷GYlBOMP§Ü¥Ü¢x*ò,Úõ}Êdå5T·”ª”Í!ê,3T5Vn=ä&Z{MŠ·CxQ)¶*¤ J!@*5Ë”ýtê2Û±ydÀò' 'UDçþý(lÛ:º[3.¨ØÓ-Ì ¯7BX+Ñ£°ÔþAM J¯º"EÒU¸o–^–¾˜à¢¨"ëî4pÛ½`¸ÂÛàÕã9 + >jC…ÑåVV}¯™+u«ÿΛÐC’y¬ŸŸã¥Ÿ¨ù\[Í zùÏ·ôŸ< H‰psçêþüü!až/vOL…C?+IáDï)÷"úvøÀ²TŽÂÒS.Uñnø¤å`é 9Œ.®€vˆà MÚ5|}æ¿”£‰9Xé9þú˜6ØNA!å`äØr0'»–ÐH{6{Xò¡^n²ù ?ÎbBXcdЩ IžB,ðñ Í‘z(Ù'_"¶€¸s—yM¨—/&PVÙ¸"³<|!2+s]š+$uØ1™ƒ¤ùüByP†Ç„’¶Û¾ï0|ñÉÄB''ÐP~0§ @~0&BÏžt+¬ÂhÔÀ8S‘€ç!8çaä7©’ÃÉéc¢˜¿Âlð¬/; ËÂâ£}±„ÝfFç d –ƒ°„ë¡}è@Å@vV@;4)…Ž3õå&DÏBaéÝn¦Áv¤‹ô@¥:¬O-¦¶ŒŒl{Pb¡Zx€†š Ú}„â Ú΀.¸²C|ÂíS:¬+ÿ£±„2Ycu“¤™E¸ÂÚB‘i¾ßfÔB… ×éˆí!|ÏZ°ÀX”„-a ªóÙ̯S ·Â˜(á„è æ0R WÀtÑïÁ®Bi 0‰ac‘€Ö t°'M ù@f¡=ÿx­ÐÐÙ{RSÄ‚=¤)íW)¹Ùff’@A ј~¸@Êü’TÚáÿÐ ÚˆGñ$¹ýQéÆ?ªÑYÇ)ÙŽ$p ¥Ó—]‡ëo:vC˜T%æ\|<*#[‹Ü;‰:ßÑ€rïê¦œŠ¢-½ÃܨÁöÝf{ÌëB·²B§ëoldÜdRa…Oª áÿDyÛô“£…[´¸ï‡1‡…ã Rçâl²EÕ!_¶pM …ùcÄHs0YY$·ÂpZ:öß8pq‡§ é SÛ€ý` (” ~€ ýž¿¥‹ @yÔØð8óôðãçJ6 lÔxÎOð>Ìy{]ôµF]ߟ.}7Wá°ÓZW"¬‚.ɇ‡…?ì&28( ,;¬.á6²J…Ö98D·@=Ôô(Þ‘[[Æ#…®ÊÜâo&‹Iâ\¿%UN±Câ„ ÚþÀ‡íÌôtöµK íJýÁïBœd!t„¾B!³À¢/¿‡U' âÀ/K„’¤*g ²‚·Ä_V/þÖ¿mÅB?4 ˆîäìA€.pä+i ` C›€Çjò:¸ Þ!5°FæŒ#á€Á»f‚“Æ}Ùœ×Ùa0ÒGu7ý’躲×I?L•EL•µY0ìiO<}î¦ZBKUÚÑY”ËÁ¯A£šY¨S!ómótÐ?\ §…œÙfyHV·f’Ó€pYN†ßJ-`«Œw&P¥R´€*ÃÝEd|(?ˆOަWâ™2V?Ô?ö tNùm0‡ÉÛI—Û ”ÊàuxE&]È7!ê „Z@T.¹­Fë m×’ôdKr‘ÿЉøêÒM¿mÛk¬è=Èña_Ö#;$ }PñPYY pþ¯ÏàN¡]­: XmæÚ`CÉ‹ü'û„[ä ¡ÆßæOšŽ„âµ™× ½Â»ì:ˆuHSêŒÜ'Å Þ`<‚.°¦¨FÕ vzØßpöNÞ¡5vco€÷#ˆ„2ñË÷òy |‡pNžÔÀž|DW‡¿Æ¢-Ô{PòqœüaLä îøZ —hƒ®ÅÈtÑu„êâÒ7èk£Ü„ËnÕéç³ÙÆ j Ám²|øN¯U’|Vr:è}H^ÆÇ@ì°CåAÏ@o‰üLô p„ÁÀè99 zv±ÐHC¹€æ‘X-¡"Þ ¿f%×ê`qÃ(ƒP4¢_Í” LAñ”ùß»1|ëéý gGc÷-ó?ìCdFšªåüŸ·|ü"ŽQå»VÜ¿u¤ô‡fÒ~¾¸f @„%‘À*F ™£H(p†ÜX€qŠ”Ji§ýT&<„ŠQÔ›®¾Á…c„VJ ‰a2‚¢Õø¸@‡.:e%®º7ª¨A÷’D,@CÇÚÉ ø XŸ[‘zËÉåo(¯Æ Ä@+ë"¼ª+·¢´–оIå^ •4V×u™ÚÂòUQY}È® “´žuüï'<ºÅ"¾¢+–˜tVÜŠÍ^ŠÚtȬÍÉ„ÊÅš+œ\b+:+ꢴ"³ÝÄW›èwîb…áéW¢° Ú¢³øp°—´VÑ’g d²+m^D (\ \™ÜÌšhR¶ôn0ºÔî—èý£v‹M'I ÒÀ„]£+NOE¸È _ø•”f¡ÌÂÿÖKÞB™1"¤åÞÄ[ÿ3p˜à©ªÔÊ3ðœ„M•‘€iÁ„ŽÊ³Ð7SY69¿]§ øBøp½¬k*û´á&h£6~D]ò¦ð„E[)Љ˜è•Á>©Á¼p_ ð}D‚ëxplb31‘‡IøºsÒ±²’–Ùä£~¨cd»Rއá`Z¦$€0Uy~—4“Ýã+Ÿ,íÿDï±Üc3ÀèlÝä½u©MC;%k ¶<óÍÕ8s4½ óþûð©b+k·3ëŒ ´¥þWOVÒ¾º’"7hÃHg‹HxîQ^^Ó»a‹Ý,–+i­¸ „ ê÷ÂÉÛ"å ˜èäŸ ?C¯ôô³¹[or`)äxˆÌˆÙ;Ÿ‘ö9㪕ªixÒ–mjŽÄ÷>(*NP ™AÓ2¢ ?ôbÒÈ@ôÓ¿åA()9Pwçÿt”YìðJ¦2“¡n5´xeÙA¬@C(6iv’‘@€ÂC Ñ—øû7‡/‡4W¼û‡âH÷+pðíå|ªÎ9r?ò¯Ýó½öå@æÁAJ"þÜÎåësXÐ?úå]Cò?áD¸— Pþÿ6¶‰´«ߺ… šANUž€ÍAݼ«°@Þ•iŠœfO'ÒÆQ%0ÒC¸+dWá=k¼“á§Š“µÂ÷¯,ŠËô€Ùa½A–áäREw±—™FŠÀÓå ´MäŤp0ÓƒGTVÙ„W¸EfŠÃ]}‘ªCÞ"¹tW(Š÷h¬‘]jƒÒ‚ÂÊÂâoó—Øÿ;ìMˆê?êt<èzÈN³t˜8H=ÈbÛîda`;¸^"4}ºG>XÉ&§ gWuO!Ýž™Ki@>¤Kôj7÷Tð°KHIyA:¯•Ò •5|å[ý^íü:ÚÊ©—°i“ÕÍYÜËEEˆ/ÅHMׂ`pøe™5.êŽö5FÒ¥z¸|³nÑŒÔñǃ¿r'Ü€'&Èâ@a„diÀBæ2’3)e¢)G]¡Hƒ<—ñ0%Zñ„'©Æfí›LÇßkrÐÎeH~o†Ýä‹7”Ô‡6°zÔñÔ‰ÞBˆÝu2X²yi€Ø—Š6û²yå}ˆGGW‡»ììq„$rÎ8Ë\Rïnìµ6þ”ÔéB“6§g²#3ñnlóaY{Jž]¯´³Ÿ¿ iþÍ{H·]ºwÖkAØU;IJÞCíÇ•®#FÇÑs=y:û¢-zhT(¯¹íá©tkƒÎÁêÑ.8zE§ßAÜ­Š …C± ù¢ žzŸ¥E„WÞEy-Š¢+nÓŒ¸Þ06q¤èñšW?»å4¹lÓ•ñ6E/JüôÚµ”¼ÍîšýÅ]6ûŸdÔâfëm׸„ ¸õ>.˜®L4šíd¯Zp}XLî¢s8õ=Zþ àÃÇœÒêÌtm‡ú¿­vï˜7bG˜³¡ø‹G§è_ý¶»ÇQý6la¶FÊÿ,o&ۈůDƒùà|eeDXJp¹½”`€@$ëMjZÌÓ¸ŸæÂu—|{RŸv×=÷WSQXÈ‚Š^Yw/Yì'¬÷ŸÉèÃso‰»ÊL¦ ¹SpÁ4%açíS'Ä!¶PB›(ßMÒ”|óÃÜUïµZL µ›ÛKµ<¥Ç21ˆ€XÿÅí¿»+Gi®0×Ç])Úžþ£½L‰Å7v ÇàyÉÐ@‚Þ{dKJQJQx!ŠP´©?¥ªÔçUîéÐ#Eø•µ¸,š¯ òllVq‰5Fö‡ÒÞb$:ߊ\ñê"C?Ù>ùšÑêûfžj)ù]ÿÏçÞ(²å½¿Mo¹ý=–÷×¢¼TŠõè¯w䬯¾­b+LE~dW–ÛœŸQ¼ïÜ­¯m©¾û:ÕÓ”•mòÏúÿ.9“âa4#Ô¨,Î éçÿTÕÎoÝòH˧»éä6§S•b¥î“etIHÌÚ‡g&¯YDz´Cµ“¢|–p g¥ŠÇÚy¶B¨ÆW.•…’æß´¢ÚÒT÷ܸŠ#SÈâJ‡o)K[Ü•ÆÌåóÑmHé$÷íÿ±äKs^Ù5q}lâNpÆq¿Ž- ÏàŽÿ§{tß½2ŽñÈŸk[‹d’NkžCZG9­yûнÑ%/¯À.J;Ñö¾‹³ˆ²ñ-BW?¯S2‰‰7ÙvŠ â¦UêÏXm›¹O ?ªò0Õiûz~—V£{ £ò|¡£ È‰©èùNo°ê(!lXŽ9‡Uõßïί·Ú^?Ì`¾ÍÜý;>´®£PµÈÖwÍa³ žÍÿDDаßjW?xK¬8xÀªO©ˆ’Ï~´ME¥ö…â°ºÚ¤± ¦yd²|5ˆ3ÃÄtö”Î^$šÊìûwq}½¹|<üšô%MDRLMü¤þ#Ýá¤d½Í¾?Òµpè`u“NÎæëÕ.MõÊ•«T”ÕÄsZ›”'×yl£z§†4éÿŽtsˆn·¹ã–~…œðNÙè5ó[¥ò±½Êëµ.‡¬©F”ÊòhÑÓËü›[L+ö]pùßu”«m@@¸ð oÆÿÎN¦8éBED°ÉÕnó:ãÆy/^O¦W§oJ\â –5UDÇ’ÿ«Æv*³Ò®H_øwýï¹úSØùÇß[òDÒÍÆé)®ÂÍeüî:¨ÿkùPéAî{m”Åá){öfˆ†SP„y™F %` V«ABÅRróViu~ósU¬ÿëÏñ’–³Ñüå9ÿ$•»¿l¢ò—ÑY*Uå]õä•è¸ÿ§'_Åoï*äkHtGÉa|Ìa&Óï¾lò–¼ýh\a[»Óþ¾ãLkLÖÏâo d½U÷tP÷´h{ó¦ÿ”ÏQk®õˆ­/ªbÏøw}q{Mž(Ü.>*Ⱥ°X’} †—˜{¢»†D qˆOÒǾyY¥ÄGéÎFþ‰ˆ÷GC¥×QLÜgBlíwÙNÛº_—¶+“¬~pÚ¨±ºfäA¨§Íä/½Íúm\Jï0}X½w3üA÷ïk<Ÿ¢5o÷ o{³¹œ¼ºBÅ×::å{øS„ŽÓ¢–¢M#8žoøâçþq z[}¡#›J_‰?lðœ†ië¢(íÇŒYß-§¯GíýB%”_<V5­ë_»–Ïïõ¾Âçðf°$C £Àº ¥÷rå=Z«XÃa­‘‘â)óCR]OÐsRç]ÒÌúÕ¶èAó.Ÿ Õ™Ö7ñQ·ñNóà ä¿ 69¥I`ˆA1 ]î†r&èïZµ¯IžVˆa„$ýj³ŠzµîŽnÙ­DÇ`V_5Rßå[Å?¬‹ã¬õ•*x[®\É„r—­¤3éuÅ8‚±6÷ô.@v‚´¯«úÎ M·äèªM0m¤w(\<ë ’Ö ¡^‚*29¤`§"W0 ] “t~¢Ôà‘ìMêõÙ¸l¼|Ûi«Wò†bñK}üb‚®BVHIB@RƒAóˆóν=,Æ_5pôÇÞëµçfxU2óÝ*{ønÕ4g2½)ëñø‡¬þ?ªsœ¦kÕ÷¥ìï<Š ³"þÚçc3µ+30ÙŽTöÒüúØþZ䲟žS_,€„'ëjö~z‘G¹ß½<f~ƒ"CH%ŠŠ%œw+{°ûöøjß§½n?‹úªîs?zIز':©YW™9Ê–úJ›apà–>/¥“zY™#í¼Õ»«ßž1ÊØÜ¥w­CüóIùc§ehÏ ¤H}»¤‘=?“+òJã€@î~b/|Îrª›Ä™;ýßæ«˜o‹çƒéÜte¦ã­=¦´vÒzl2¦Ý[Wø3B¤™.PóÇ¿%3Úrôu«qRBT¿lT “‹˜Òîëæ}ý(š¾ŠÇgÓ5ÎßH›¦kÝV¿³Ýö E©úŸ)׋ôå'ùt)u©îh n¨µ~º÷o—Ph¹ Hg¬s½ Df{ø"‡GŠ@ °ïµßf§³xs±Ì6ZÙHÒsz²ýv^³qçÔ`aÐ""”!œŒA@H½+Á÷ÿ©¯å'úïã>Ç»N?à5xTüI.Û^ÜF6êœáoe÷{ö”*Éõ'‘“:úÌ ¶³¨ÈÊÏ•‰g¢ì‰|ŽU11hiUX½ÞN€…d‚|F1=¬Š2 2·ÀNfÊýüß…]S~º®"C‚]Gg›õ+Ÿ£…-3Á;îeým¯¿K|ÏÑ´²¬¨>ó÷üþ–' î×ùPS!•lWÒmº”=ýr‡®&¾¿Ô*ö¢­s?îÂ&QÏ‹÷©¡÷^Ôr}«;—¬ú@Š‹Þ©Kx!Þû^>šl©ü#as^Xÿ†Ó‹'ùÝ’`ž¾ÊºÔ£ýóu'ë\Né¬É̦×û{¾ (¿ê+o,@! ¢º‹úó¦c!7®­4X¬! {r+lÝcu·ÓÜeþk6}ÄÕ‘I!4š½ïpK÷ÕIíº%&««ÇÎþ,ŸÈ\pÃÆ° ÈÜL…׃ˆ$[‰˜³°zòK}Æ{¥KÒlŒnxy“ÌK9öbçàpÆ™åe%›ßµ;5ëÛ:þá|Kpøp^ô—ó³ËèN¼Ê­Í1Á :ô €ü}½—€ ˵:׉þKýFéÏšõzøß[µ™ô£v„<¢ÝfYÖþ¤‚1d-ò¿Å³ušI›™sN¤bÓ6«Ñob3e‹J×]ŒÂ½>~'Ñìók¾÷N"aÆ÷iêÿV²GVIÞ–j-±Ôt+òŸ‚ηð½äm*™×jÿÍ}3ÚåRlaVHsŽè…–S’Àû¼ú>n-òkÞ?]âûN g2íìñ Wí¤ù?ê2Óò%ž¬ÒÿÜMáñ–ω©¾$¬Ÿ£@>êMœp1w€ÎËxÖE'uÃÐò—¹{D–×nü–›ÿ÷g0Êå ùiC†QV%gŸ¯ÂHýÚÓð7W_j°ÅQuƒŽ ý;ÁûPÀ€+ª7𛽠IFmû"Ëk,tÇ#Åj÷Ç±Ï 'ñ‡ÿO¸ëU·ð¢þkªü¼>6*ÃÒÉÞp´ë^oŸÞ·`è$„Ò}kÖ¹v‰¹ÓŽúFE_{Ü4ùáËÐu Û“Ò3¥Sm¯rJZº¼ß—V¾ªã2F2(ꈱ–ˆ(@G ³ÕŸ‡‘Øîs9úqxçQr~øç…^µ’÷ª‘c’ÿ!L¿2ßÚÛ÷¡Ð)inb)>:³<˜–ŒÈ×¼Ï+«!Ó¬õ;νpx±nêVÝXîÜÐüy_¿ì€HiǾ‰85Îv%‰ÝÕ%‰˜è™]¼è uº^VÑSeÒUOû62ÓéÃÇZ£îî·GwèCÁý?}×é6®>ñ]³ˆÇiØ¿‹>æ»ý»×"ˆ*ã€E$µ„’Yòpc–g&§®|Ä ýqà§e WR ÊXgðÁY@+peÆSþÁ[ +ªv ¬õ`–|˜¨ ï{+H™€V¦ ÖH¥¬à¬ ­…AVH+0+=€+2+X Â(„ó¢²DR'dŠØB Òœ¢@­9«AY„V¼÷Y"µ€¬Q­lMîÉ+kd Í@+*ØZ`­•0VÆZt5€­0Vº¶Ê¨ Ò MÇœØ6½)zìú“|Ôù’ûq½†·÷|^΋±Åô÷‹•îè"¸VüŠÌŠÂ+:+…ô6YÐân5/ܪv½.KÿQdŠêÈ®’+æÉþq¾CîÞóö;Uµ¿YËâf×¾_ï¹Áù–ØOûîs=7¬ÑëO–ÊÔÃG4¤…„éú­®‹Ù©4£0vKü=öW-w êx<ÿ<*Q{µÆù[´ƒ»GÔVý­ÓüÏîšÞßôVýº·Ó’Öõ‹m§ý~9;š+«íjŠf!¼¢µˆ­ÅÐΊÓEr0ŠíÄWÜ8´W>ŠÎ"´‘]ºÁ™×]舰Ev~fÅÔç´Uè­4V¾€ý×|¯Ñûj˜®J‚„ǺÖï¼¢³B+{Äã7ÜoŠà[‚+|E{àV؈{tWEq} +|Enª¢¿‹TÑ ¬Fý\*çÑ[4WìEt­TW ¨¥½„V VX·"³"³"¶(­Ñ­EtX@W ᥬ…\ª+UÀ"·”VâŠØ¢·Šh¬‘Y"µZá²n­«"° ÚM0+2«<¥æ‚° Ê +4È­­¾â"´ÑZÄW¨¤è­ +­Þ`€VTÑ[$VäŠÒEkl€Es`"µ[ª+0€¡~Eoˆ­ˆ+qEn€­d@+5‚+I¿"¶H¬È­DV—òŠÓEoˆ­>ÞŠÉ„VÕñ>{Ò+=¢+pEn¯}s±EiX"²Ed‚´‘[;pŠÖ"·dVç­DWŠÕ™¼¯,ŠÉº"½r+XŠó–h­TVdVª+K†3÷¬2+T¼¢¶¨­j+xEtˆ­ÚøŠÎŠÒY‘ZàW å/(­¼Ep(®¯ÐSEÝú¾ºXA\mݤŠåþÍ]Ú[r+üëŸn÷¢éj]¶ç-÷ý8 Sè~׻ݶ£ÁÙ=.Æ»—¬(F>ï®Y'ÛÐ#àæêðè¢G»ë¯.ÐÎ-á7›¤ÈGÇËSGE©X z©Ê”1‡çÑÐí‹÷‰gÿ5| ¶²‘q¨´Yîûu¾_ˆ;½E€BàñÀ³)Â;>8$wxßµ!ø¶É¡a¤BAå€PIÙEàÒÔ²Ë0Iâº媰ñgƒI—½ê¸-äÒÂèÈÏÂ=„½…ÊÅjßgùéföNÃè½_ ]Vz^Íòn]eŽàá ŸÆÛ/ª÷]Ã<Õü\í>–¥éw~Íóf%–Gß5ÜU¥6ž¬Šã¢¬úIïZ©%LÆÖÜZ T׊­xp0æl–»2Pë½¼JÆ6!I„"D0Ã5LR3ÐàŠå ŒÏÊÂ@BO3ÝÃÎâ5¼9‰œ•»çºr¾†ñÎÚâW®Ñ"¹4W¡EaæQ\Z+XŠùá|ì¦á±gQ] …b+žç»|žÌ¤ŠÈ§$Vi²\>.à*)Ñb 4Vš+¬"µˆ¯ûñ$ŠôW:ôW+ÝÖ"´‘\O‰úY'/*¤p›7í6{e:O£&òÊ íÖ×^ÑSgLg<ÐÂpC[€^pãPû˃ä]*—I4t½íäT¾YNRÙa8Ðq6OÌ¡ÎFªÊâÈB^4Á4?éϤ¡ñÖÕ8ÿ†Ò’|;ƒS×iB¸_²ìÔ~êhëNw0êó•ÐôR‘2£Î']¸JuHÞ(@‡ŠìJ¢æÉ¬ÿÞBëêúþV¿ÞÿèöMljÎ÷ÝÁò¹›ßM㈷÷zZ>ãõ©o)ùÄ{›7– ­¡Ô!ÓÃ\ë¼ÿoFËž¶]GvoH˜þA‡ê&»Ää<ù_¯æêUTÁhЖѹõ7/ Bâ–õ@ÜSe`ÿ\S´^£ØNß¾oû ÞÙüa?{1”æys2¯a ŠdÄWªÍ"³"¿Y¿úºê+pØüpV¹ŠáVë¾"³"»§U$V’+¢+¬fY‘YùÔV²„WšáíȬஈ’+‡¤ž YÑ[(E|)¥Qʪ—a™BúACîU$ðBa?N$kÉ«ƒaOß×Îq»\ºIvŠÓÙŸÿÜ¿²¿–ž 3Ø! ¶G‚Ú¹öÃg sO®¼ûóeÝš»_$ÝSú>—ôãqs·ž…õ²|hÔïݱPçyæ¤Ù`Ä‚’[µÆl3å"äëzÿß%ÃëèÕþðgEuÐW¨+îèpö·âp=—Q²àÿ«öû„åp|OC_®å/u.ù樫sVXXEaº"°ŠüýÎX•êðŠÛ´›Ÿ÷pý,[¯¯ö÷ì×Gµnzî©xÕöZzÞs: ê’Q(`)@¦˜¡o•{„Ú·üø¾V˜¹‡ëni±Ø¾É¯íñ·UN9è­:³{΋›SE£ÇWkYa¥®0k²ß‚Ó Šÿò×Ûg~âÂÚà‡ »œ»º!J´mOiBòå<ùÊûN‚™?ÀáÍC¦ƒ°ëšu¼F»ee´à¤ˆ¥ŠñçFKŸ –@+:›Ööìæ÷  ²Y²e)@+»"²EdŠò ¬D¾îñGWIÌF`EgE*¹¸êð;ØElXkQX¨Šëö(­aùÜîïÇ´¢µQ[!­Y"°ŠÕEt5™ºå …ä‚ncîi{î9Ï/. ”HQyÇšÚê\Öÿeõ#‚½¸,#iÍöϬ©×ì>¯8ñR&†‹iŸT¤„«ë ½Zù:/Ç^Ÿ}þ«7îÊ@žžï•üªíÍóåC©‡²²wVÂS°ÍŠ£·¬F š£ª{@„üÞ÷Ûà’nÓ¦>6}ÄLÍ=ã ¶îì@{$W+‡<>vhÇnH¬¼ ÏšÐF~è Õ ª Óž˜+@+E V˜+T€Wo¦ ÓjS&œžïsÌû9//‹½VØìŸå÷oâ<Œžƒ½qÞïÇòk˜ dÀõú§y”½`5DWƒEn—øEaö¨Uïï½ßýŽÝÄ,Ü_!7Î Ê辌j³Ç™9s5¦t·¢w5OÃQSÝÐõÕ?÷<³¯I•¾1' ígžéÅîºGn§'üIxr~ÊŠ¦¨/6¯É¸˜ðèp'x½Öç<‹l™{[,}—mߦérÚ ¯ËŠ·~|¾µÄã7ÔVª++¬"½º+¿~´ø çŠü½B+º¢¶¨­®éß]ªîjŠyH¯þá‘Y"¸„WjEn+µÅ¯±EdŠØì²Ek‘^û¦«óÜÑ\æÍ.‡ŠÕEv}+XŠÚ"¿Ê¢•QZ—dVÅß|´VøŠÝQYÑ]ñª®™ñYÑZ¨­r+$VøŠ×"´‘[WoEqB®…úð®ÈŠÞ…E0ˆ¬ ŠqQºê¦àŠêˆ­Á¸"´©‚° À«oUhEnˆ­z+pEdŠÎŠêˆ­ÙåQ^]º^‹œ\+0+ ¬[4V¸¢¿J+­²+ZŠý§©B+[käj“ª)©ã²˜W€ó;M+ÚW"¼ó„$pŒúL¬ }Rò2¹pÕï_›S,(2îc×Ñ(ÐôDÝ©S  žôS{qN½w¼ø”·JRÚkùÑ·~QÄEŠâÏ‚ÛA1êïò4Üœí¸)U³Õ‘»¾–çNh¶ AeÔã¡sŒéˆÄâÁg²FJ÷4BTõì¾|Ûj/% Sh‡!þÏå­Âmð·ø"_–äOˆ€ AIf…¯yjK_Åå2c¿ªÙÝp$¨Ú|½øÙÁ„/É{™w)x7Ñ'J•gTLi»µó#šÝ¯-g-Ï{²¢Á âû(9ÊIDfŸ¾h•ùÐ*ÆöVI^úIKÉÎˆÇ A†¨ cr¬Øø[žæÿ®@ë¿gK -^ÿÔ´WÔ%ü0„mÉ(@’øE5¡¥J)*ª¤j’Ã3q\eÏ:ª¼Å‚cA%’ãŠø½~C­T~ §5X÷ â{ò 8Õ„4 7ã§Ýõ_? >éMcÿÆuÓ·‡JHd¹kT rKJ’B¿1å%À €¨ ¥P ¨©J|è/ô‹RP” š©¦)aL‚Ch.ø‰ÎSA„‰uØ6?6 jÕ±LeÏ„·ÛÕQä=ÍíVî˜?Ã\ø! 9—¼{ýÿ.ÍfvÃbèLÚx?€@.]wÐÎË¥d¢Z¡Eü7 n(JOg‰ê©%)êçæ~4/æaV¹æŒá˜ýa'òK#î &$tR¦¤YꥥÏ(ù1å²¶{_•ŠÃ0¤e Úuùºëª6ywÇoYýh¼~¹5 Íô=÷sJKK,£p 熤û-À¤®(‚ÏZ~•G¾ˆX‡ìÿ[?‡÷Qï_eÆæZñ†Å}}]ßGw¸sÒûÖü—ŸÐú¼oC—Ìq§€™fQOÚa¨¨åÀ§,vÖ€’l$^éÕnÐÿh'g‘1«OŠ€÷*³…ŸúqX(ß%€Ë/,®ïæ"Ùڳ¡üÔ°¿Këë*{óA¬ÜDNÒxÙ ½«eœO’§>>=J14¤û´E7éØÅ2ÏP€BA„-Ѽv ÷;Ùg;ü†›žºYo›¶Ý¹{׋æS[¥Ærè«Ðˆ¬"·DWט@BÝþáŒ*+èÉ ÓÄk“b Ká¯ÐË´(ûŸXÕ8SYÑP¬`!¤A‚œ’€\ˆJ! ƒU\ýõÿ”«F®G^1Ú›ïÇív€öäõCe»y­2ÇèJ{®,#« /ox¶É”ÚÚ‘Ã û)«6wŸN´ÀÞ_ºëØÛ!\‘]±Í"¼ò+EhEføJb+šEx2ËoEf^–ãàV¢·dV¡îy¿矼ÞðѲN„ý| DÉOB*f«îËu»V–B·gŠ«6½”<ÚÁð/aØÖúî±UïWò|BØö[‹…à8+ñ¿91”0 ë+¨Â"·´W$ŠÛ«³ÿW ôc¿Ï8‹æsœáÑæºMçy Ñ›wßWàAc˜ºnÏ’š¾KžŸº‹–Ž›ÀNfGRNK%wX¬ÒEásoãó©úóàÅi|ÚÏÖW‚Î ¥Üí˜ÐÓ£DPvO¥ÀXXJXÐsú_ ªækôâ eà$8[Fƒ€ƒ|è/ì‘Ɉ§Û¥ú½L©zW Ã4g€¢ \_?‹Ãk¯ç{ÔW õ®Ì Ƕ ãÁYçE}4D!ˆ!QV¤Q×ÍÁS2jÖgÒ´OŒº.¹÷½ˆ[3Мá(ÛEs“¯„¹‚G'õz¡oÛx™]1îL¨þTdzúkcÕ”í¢|Âvÿ2•+v…–šygBð‡ŽcŒ —T|ùëÁ]“…Þü1¨QÔñÌ\ÿ¼Å+³®¥Œb¿H8ë¶G'Ý ßƒÖÌ’Ëç»N_ƒÍèßU1Eª²$ƒr²X5Ëù~|ò^:Ñ…ŒñWs§ ®£Ö6 !ý­ñûz‰Õ22ÂEÈÞÓŒÖÿ±êÌË}ýêÐñ¯—{z=-Å—Vãg¿Ý7ÚŒ¼Ãb!‚ú@ˆê“…ˆ‹o‚fH3f­Û9÷ñGë—vß=‘Є@=ˆ $Ѐ$ Ü„Uè?øöÚ.ÇÊ™àÓS)Îï.½æñ0ð`¥,oˆóF"B³©Î3F›wªs‚ˆ©+ o®‰‡±Ãßj? ÖÙèYkY+àè{3Õb°È®Càð)m]…ÿÔÌÖ{˜­E|ÔWdEpÞ/íÚó×émÝÍÒ»÷µ”Gü›£ÿs3cp¿ó¤Év3V“ºÆ]›q8bu­é@á†ê«EXŠ!ò·ýŒõáÊÌhœ”ÅÄÓ„Prçáö2n׃ÕÉâì¸gUâþåRœjº)²1ŠG3(ýÞ€çj½OÊݶ¹t~"Þ”Ë2ÈÏ)`T…MOØšÚü~ËÕ©ÿØ5ͽ$)ª¬-'#û¸e¤m_Äúp0ÿ+}´¾?P²L}~EG½WßÝÌL€ð˜Â’¡ Gí¹íÅcD¿1% rÌéHSGx …ÏB†@?3ä$ØõÏ:ò:zb‘’beõÃõN!u„—£ÐÖŽ³Ü/#Ñ$½~ㄎ”5-fFÿ½ÖžÃÖ­¥é3á¾úmxÈü¡¾e@rŽ!†`åÏÃ1"´D×Å"Åí¾ÞÙ½^|—÷?íž1îJÕ2Ö †¢õ:¥å.‹J㯛gé§Ñ^C['`é^9 ‡V …¼cN:ï ­ímv>ó«òþIV›•)bfErH¬Á$V\T¼+ Wj§Ë"²E|ûŧÅWS•ér9)½Ò©w_wƒ½Òºœñ›@D·1:9wc6@ÈžíÊCã÷"Žî@¬¤cäÓæÿ)‹ÿ,cÜu5÷ETIi0 õQšor–'YÛdáoôÔå’M¨Öa9ÎCªùÁŒøçb}î÷æg±¾µ¾·ý} ßZ´E`Ë»P+ VJ+-Ï?|»úÑmÖô—쿪þ¸«\Q¯ãÿ_¥p[ÖXsˆ«%,/"4ó5”Å­Ô JC$«öU9ß*N2¤¼à'`Ja¬|ov$îÝPqÏùphª=FÉ—ÍjNÿV_à6¿Ñ+Þ]ÿy½ŒØml¤óxãÔ$à1{ýûnì5Âlñ´ÞqßvYm÷—ÀÜs_òÚW-·Ä¹”V1(¯—‹EgE|Vz”XEd²ÜAYZ'€W}gŠC¬± ³¢¿] ŠVhQ]ß–ÔЊ×"µ¨¯ Ö¢¶"»j+ñãZäVÅï6Êb+\ À+óŠŠW¢³"´‘^'m?¼}DV„W”Ú÷{åQ͈¯•§ÑÖUTSU„Wmæ4Ó"µQ_ñ·ÜsÕîø¬Vãåm\Õ¶|EsH®àŠè‘_ñ’+Æ¢ºèŠÒEyËs·¢µhj"°¤‘]ð”ЊåVtVª+¼¢º¢«IÉû2\©PEo¨­»äùêb+I²ACBe1W˜ÇÄ7Z‚ …kâ^—×ô”‰4ö¥qùžwV‘ÕÀf펠äeð´¾ék|ø)uO§¼ž6ü¿‚¼j´¼Üg™gïÖîž´Êþ´%þ‚ì¡ÒprËlÝÇõµý1·’Gù3VÍývúªóÊÑšV‰€`ýkCòAÑhˬ5´­î÷{çG-»„ìr›5øÓúßž‡ÞÁ\;Þuÿîo•EwmàÞY²Ud À ÊXÞYLŠÄȬ@+ ¬ˆb”¬ÐŠÌ Ì Èõóvï—­ÀÍŸ› Èú>ÏÖÍê»áï]qWëÍ–^äŠÜ‘[h¹@+¬‘YB+yEd®gä­äý>UÖêõ@p „v …¾³4ÆÂ¤o €À_‘×­˜MõÙ{·kØN)€ó;WÓËz­˜Üj(Â_{ÝMÃíÁˆªÙcNâÞ8ïå7ÍÊþfïâsnKïy[}èLô._ëï³éûXãµ½jÏű·Y뻾÷rÙ\°ŠÁ¬ ± Â+¹l¨®Á0+¬B+Ö"²EhEyù"²EgX@ tߤ‚ùAÇ¢´®Q @P­E{ÊÀVH¬"+TEi¢¸äVaÕ(`V„WY­lçTR’+Q„Vª+0ŠÖ¢°ŠÌŠÔ€V`V@¬H’¢’AYJZ„VuE'Ei"´"²Et ŠHEu%$V„WîÖ)"³"¹ÚS"ºjh®«2+˜ò¬kYé"²EfEl‘Y(­‹™í©¢¼GXoû-Çh¹SEkQ^öHˆ^Ê’-hõ)»)ƒ45]„G-?o>§l@œX4™¼¢ò.Ç`µ¼’ÁZÖ¤8Æ·´àg6«$~£_vÀ¶7N¥úA&OF”ÐÖ†?ýJ`&‡B„!hhT™ëð|V+ éº­nSƒm¤¹ˆ“ßäïT»]¶CÕK‹rfRæÑðž gæºÇŒëYšÈç`àòM'” ¯ÿB ¸XQjÿšºæ³å¡C£,s÷];ðÊö‘¹Ž§£÷ùí:+l þ1]¥Ef^`WHŠí“‚´¢@¬k ¬´YÁYMïå°>G«_âxžŸéýýÎo#’ø4\ŸqÄ{þïEàÛkY\…íæX]ݼÎ?%äÆâqlt" í¶w½MÂD„™åZ§ÙÈ|6m{˜éh¨¹P)ò¾ß×oŽü‘]Q€@q€C ÎÞ3UzÏnóÅûma›À£0tÿ¬¨°¢~ë|k¥§†Þ*Õr=åéríu*†@òèƒòL¦ßV?ƒ&Ÿ4³ƒÓ Þ}ûÜ.ÙSªpBå!sD ñsÌŠæöÄVH®)¤ŠÛx½V­¿¢¸[Æm³Ep謑^±El‘Z_5Ù¦EmQ\‚+·¥¢+Ƭȭb+Í\A^Ú[’+ Vi[¢+fŠæÑ^i²En¨¬È®E°Ek„Vš+¶¢´ÄVÊdV¢+mÈ­&Š×"·”V§ÂŠ×"¶­4Vµ¦ŠÔEs«lŠÂ+*àVXªŠÜ‘Z¨­Š+mg]2+ Ei"²mè­¥5’+~Ú¾›Væ"·tVµèöˆÝ°ÖȬúÅØŠû?ZùmÙXÓÀúzÅnCmEÆa§ø„ ÿ6íÍ%#Š–ˆ{Qù©ã“/¢½ô¡#Z Ü]IÅM.Ü@! <@! S|»×§qµŠïú{¨&ݾâ!zdd÷I_ÍýÞ¹p²Œé­p(·Ìe9Õ'(B-hÏêcú*¦'̰°'›húdUs̲p -Ý!RˆA`!°(F9•,ŒÿQF‡ÎÒ¡'èo÷(!^tbFôÒ¢¯[‡pzº@nãßõx0 é<>=ŠlühN%á3‹æÈ…^!—×µ¼4Mícá½cèÀ7@)n¯N~¬Q‚F"]3Ù’øL ÞÕETŽÁhmXnç¤&Sy†êuê/m*h˱…ñ n­:?Ôw63^JtùýëÙÊÜ€2 gð ‚‘Qf(NzYÜ‘Ý×Ôøtþò·´`ÃÐ~ê–¥ßOd|Ê}iãÜY >B)«>½õ¹¸]»ªÓæsÙ`„,™:‡3ÅxŸÇܵ2i Ðè "ý°«G}·ÞãªÖé3}¦W©Exï%îY ²a” ¬B3IžèÿÔŽ_×±]k—û|Çå­A º}—Ê©ç$¶É'§:ššæ‹v¥PÀaþed«÷–MÚÑšdöõÙcá¬rT×D¥SW ?ãÖpN™û“«×5µUJlÃw#YåµA@@*B”äv´¿ï3ïáÅË.´Öâ¸ú îŸÜÑPµq™}ÎëekíîBU,ÿ"x0¶~‚“ Ã{<0mÇÜÿ³滊9F"‰ÿJ—‰Ívë÷íÌ[Ør¿?Â}â+¢W²E~4W_Ì"¿zŠúȬ÷„W ŠÂ+QëQ]ÙòÑ[.΢+8ŠÂ+Ï|tÞÞŠÒEb²Xc–±EdŠà„VÁ±[VފЊÔ^eõúDW«¯EkX[@VQ"¢+M™¨ŠÇ‰D ±¬"µ¨®êŠâÑZY‘[Š+¯bŠøÞ5­*÷+V²ÚH­¹„We㤊ùXÏ„á]ŶElÑ_½ªŠÙÎ"²ElhDVeVÚY"³¢²Et²i"±fŠûH¬È­ÉïAZˆ¬B+¯ßºÚÔWÒ¡½:Ýz+ma°b¶Ÿ²×£Ó ‚Þ©1¶{ƒ†®`åïþ>ñð<ûó‹+·]ö™Ø/šù\´¢ÎshMâvÏ’ë!9¢~¤]’5‹Ì˜Î1òX[hb³çyã±*K¯S+™ð~!tH奮:@é]&G–‰gݘÓ)@í\ŸË¹“¾¤bî#ʬë~¥)ìŽqûÐp[í¶«f‘~ªê¸S»}{5r„ž~µ¶@O¡@0e‹ËLXÂ-?sÉͶì|N—lÖwþªÜXÕÌl»&}'ÇžÁÒÄRŽkŠÛnqÔVRÚdVYM Vü ÈœÐÑ0+¬"°ŠÀ+¬Ò Ѭୂ³")®>¯ñÍÖat6ܧèð}'ÑK·½÷½Ç©íÜ¿/¾q¹¢·dWrå]+«Üþ÷ŒøwŠÝ,™w|íý®DD§NûHJ˜8ãÐp€Ý’Õï ÇSBѱïÏ++ÓüñrKEѨÚO}òZÍ_÷¾Ô`.Oˆ9Å$ÔŒùÕR@‡=în.ƒµÂìÿ7ø¨$ýxeŸß¨×yvQT/÷‡Vø™®gBiyÝ XâÒ ‡Wc-5ÇBrrrÏä{;ÄÞNÿûà=ž#ÊÖ7ááz߀6¤V¿ÝÇä0\ÍÿEx$WoE?ÚyÑ^E’ çEm[¥ÕTþóè­½ïëY‘_/¯Ö"¼'ÝY¼¢µè¬"²*¢µQ\Ŭ +¶NŠÂ+·¢´ÄWo­Üû wwå?¬–­Eo_G±…ü=4õÏ.E&’ä&Ø?¾Ð2¸#Ss[¶ø{ŸçÇ“U¶àlu„ütÄyB¶ÏÚsùq-Û½zI¿Ãß¼ZÙSõÿuŽƒ–S•õãû«Ø¸˜EÕýãþ€ÀźÖù>Um2Ë`œ¦qq,Õf™Í@¢y‡ïw'îÙc?Söµôv|‡p•ixH©™ýSFtŸ÷‚\Ñ„-/å1úOn‘ACªW”IuTêãâZ¬$¸YõeÏÓ•ÕÎ*ˢᆠa®„VÛÈGQ<‚³fÒ~ä°šµ•§'|¸ŠîšUÇ÷yû`Eq׺ÓïÛ;ö÷íßô^W’ñüóÐõdJÕB®jŸÔÿºááòä+®.µÑ”?¿Çý1êа;x¦U0Td#§5òOï­´²mãž Ätœ¯åê_ú;NëÑEwLÊ+±Ø"²4@¬D(ŠdhEe™iÄ·DW䦊Ý(¤ŠÅTV¢¢“"³"»+›ErH¬Â+æÕªŠÒ"XEcÅEzäVH®¡®Ek‘_ÚZh¬UE{(EkÑZˆ¬¤ŠÀŠÎ*ÚB+àãî?•âŪŠÓáíQZ­j+i^*ΊÒEa“¢Em‘[r+z¤ŠÔEkQXE~|V„WŽãº×pµÙ¼ïçwºÊÏÞ×öÝ]«c WŽоpu¿d>’)ñì •éü³4 B †”I&Žñ,B´ÏùµêO2ûKÁ=– ÿ×q[úxÛÃ#ÂEÍa¸¸¥¤hØT>Å<…ºó©ôžã>™Ç"yç±Aím¤ÃÈÓ…Ëø/žR"ŽB€Mk3Š'{%¦«Gäz<`r¢\†_K©>[’*Õ0Ýw¶º\‰Ž3_ó’Ψa<~z$Q*fàœ (ŠrÕ¦³+ÙØç9æÆŸ%õýû‡GÕØÜ·œOdŠîÈ+½Š¼B+¿! !.^ž¬†$ÿ,Ùo&¼>ÿuÓÛNNž«0^›†öv¹ÉÃ~a^’㌈ ¿>›Tùlý·‡Òç,)Tý&··9g騢àf=N£n†8ŠªÃŽW~8€ %ªX¢Ÿœ’Yå¶ëÛÖTG>±ö=”ßQßW—T W{ÆY,^yóðg4'hùT¨›Fó“ÅŸ#®/mÝ€8]fJÅšL¦}üiilÒY2(1[¯ñ £„Íð“à:ý^bÎ.½Ì²X+M+œEaݹtW„E{¤V_¥Î"´„Wy®±x0V"´TSR‡»ÖÖç÷ŸÝ+_£#N¤ôŠ) {EyMJ+×¢¶‚+®¢·Vßî"»B+ó÷×Êõþ£•ãÑ[¢+Ëâ®W El5¬b+€ï‘\5Çquâ+¥Ex´WÀk±Û\„ø4WäEi¢·¤Vô"µQ[DVWÅ™™à÷K V߬±aVH­‚+Þˆ®¿ŠEy´V š‹u’+V抭!%!$”~ÊÑóŒi„ ä ‘së’ï¤QrZ%?›¥ŽNŽ~ñêuª†Z/¨NHãøðVyÿc}›Â©Q48Ð?2öwØÇÿ7ÒêÓ¡¬ÙQ–¯ùN*{Ù2Ø·ž7BêfYö¶}e²À2.0!>j¿ä³&‰²¤/µŠ—Ün÷–†z o” Óð@]‹\´I¢L˜S‚Qßw~‹ Sʿ͌ 4C 1DÒO gÑ=|z y0ÞfŒUì³#T ÷bîÙû¡á,Ôí«¦¾­˜ñÕ£¬T ñ>š­Þ€q¹¤zäøRïŠÐW Ý6 >¶m;_ŒÉ¿ë~Ýô!Úd¦ÖàÕjõÙ–âzqc‰âùÉ÷éKù¥y>ì-ˆº‚zm¥qÙ K¬O<éTúÝ1oï4½¬Û›\^<÷&ÉÃ|‹ÚÈUí3] _ÿ69¸÷Þ.†>á6ÖÕ”Ê$×I™ýÔ̬-ÊŸ˜%î5«cØ/Oó#«³E-¿‰›³#µÛ¶ë:+Ùõ<—©‹Eu9 ÃïÃÑ¥ß.ü†c¸è¾ŽúŸŒ‹Ó/ÔUS‹`Û4HP¬ZeÂ1ühõ¼;ˆ¬J-Ý<â-™§a‰£-YË$]h‡¡tÈÍÅgÎj-ì~ Ò k–¿Ýì÷w¼% ‘Ï$`{{×.3(…¯Ž¤v]¡rö­-»ø.Ýg[ Ñó õ!FǾR¡­sXßGÞ‚Ä6‚ ,8G£/….óºò†“!öorÏ‹žÆébÉJ€R)¯ÕöþeC6\€³˜Â¤khÔQ¿ÄÇõ/.ÿknòq”"|_QÓPh:dVøYfY¹pV`WiEggéòߦ“Sª^îyÌŒÞ5§ äk“do!>9´™ôÀY^óì>Öno‹³ÙÆŽ‹xá_¢’-iDõ½²Øèt«\ÁÇî7oAÖ„ßÎÒ‰H¨SË—)üäßCO‚ƒ$QP à9r(EZ¨Öª),•д)(AB#U?­Ókþ›%Ï(ö¡÷¯Wq—$ÚŒ¾­ïŽßOb.:-n6h·™~¥…µT)jQ·wrW³Òfꣷ“Ã2qº+ÇQ¾¶u3³ìî9™Ì×ÛáL^xWþ‚±ϲEn¨¯ˆT#XEvY´:®oxè2h®"ž‰ñö|OóËæ_½Ð`ÏodEx¤Wc²EloU²¥­Ef78ÊÞ53`úÝÖ{œ~ÁÊá Ë„¬þŽÛ½ü­:*`ŠØ[Cý»3mŠð[ æ'±"#‘ªî„­íXIˆõ#}ïÙt\+âõ½rƤÆÍ§{–ÌF®¿‘O³À²¢wU¯x;jØê3Ôß-#]~>!0MÚ:ŠZ¨&qRD¶2 -Ï çÜ`ŠNI `ê‰O—ø§a^l¨„HlLõ¬um'¼8GùSÒÖ7tkÝo €Eßfvn£é’€ÕôÞM 9ŒV{á„$½Ì8•ªý”Åòâ—œ"­SXBÑ™ù·T«Ò[PEvBnu~›ö—Kú8ãÜî>GLé'ëÖ4|t„•ýEÑÍï®Q±<ì­¾y½üæìÈŒDiït.)‰*Ãö¼nV‹”j/·Ï ²EzŽñOc#ámJ&Ö&ž¬E~tWkEf¥Eb¿ƒÒ¢¹L +ŠEx¤WÕEvœôqö(¯ŸüÕø5Zf?ç%µeQ_Öj»ã½nÃØùµJž tÇñöðˆí'òONguû³¿'‘pÅ÷}$™^“¦°£´"U†äÖ#T£¾Ôò¤)¢’$ò•öÞ½©ð;þ„)¬çäºóÐûœT;H…¥"€é¼›nWéE'•í·ãl3ó«×¹¼Ð> ›FâOcuû ÷ŒÓ1ª•›¤â”ƒ"²ŽÙáS®«âeöù×á¤B¡ã™¸Òt#Yyia“0='dèœÛFZ¯iqÐB© v¤kì b¥jÓàýWxöœÓóÔ0B医sÆá•î/¡Äëc&{Y{´ÔžB€HjTª( 8µaA×µX¥H wvš«ÇöY{GÈ| ©+ÕŽzC~tk¯Qv«ÿeü6….øÊAØXäã4?´CÊåòI÷ˆµo¸·È|¤\›ÿ$Kv:’øáž2Éé}&?ÜŽèÉÅšðÆ[N2 } léƒÙÂl›ê]a&–ÚýD½vþrEìS¢:ÁqçÄ9Ì#í8›s{”è(%½õšýó0¤‰–eŠ·;ÕB9Â"±Xá¹–j´±„&l°ó;¿ùœæ¼xņD‚ÚJ4ÌmS©SVøg^_‘ϯ蚓OFi‰:·]ÙüW—PÚ{˜9OK”ï®äƒ}ŸÒ’€7ühWj~ž{<Ô(J¬Åë‹¥U›)ЦðëÕû¾©à² Pú ôšWÅD7'xL†œéÓ^ŸŠ6lÜ22 ?)¼pÝ8Å=Û…ò‚¦‹-=ðxó¿]¾Ÿ?®Q¯x¦q8Æ÷KOÿ<,bãAEü2tÀ´õÂFÑó?Î%¨ÆøÁ¼èƒdÒ~h.µBS!`†ôÕýº5HÖ‡ a@*G‡ÿ_R“s¥~¸éÅ›¹Óͱ°rqˆ¥S~·bêª`e]ÍÇͤÓM?w¦äWX.Ö3ï(·c~î¼£3IB s¡¾Šó˜ÌçÒ7áîWX-w<ö†õ9÷8ÑzrA£Fˆ„ƒ¸R]jÓêËg‹Ò„OæÁéo®檮§Í×Ìè–f ’fSKÎWDéoŒû„0ï"65m£²*cÕĪ¼Ô¿kRå¬çß-‹Ðjkj˳ˆ ½…Å„?Ééÿ¯[Ÿï­¿`xÊ^¯£^‰ùmõíMN¶è%–&F Þ¥¡ múrIfyþõCÓ¶Ò Üx=pËU㨋Âi9…áÉP—zí9ý…»Ç:PÌ8Nš²¤sø½ö6H±„£˜nJŠŒÌ¤ Ü—$Ýg¿LOuÃÚ7¾™I2Ó9¡Ó0ÇdêuÌ+÷ËØôDwÔZ=˧ëÿvn¡"Úü€lBéé”]ÌÌžãvˆr-ÌÇñtÏüœç·OݘG¹¤“Òñp±<ï%÷¦ó÷²Û¸¾~ŽHB_ 2Â’øÕ<÷²ŸÚXèl)÷–w‹2Ü7Š-K½\W8À[wTRž»– QU{^órkWÈ_/V­æ“’_F·è Çlô»š¹RΕUÿkêUBJ—ÒÀø}qP¡Ã¥ÃÚ¥5kTGTupãø²Õ²À+vŽSÑáz”W_Û¾ƒ1ÉUÓs2ä¿eã[ÀþoÝÒ¾F¯¾f)©TŠ˜1.wƒhÖ~†ˆÀ ¨B2tƒŸ2ű‘öZqaq'D_Ú>ߟ>«ü¡l24 q°€dƒ°ÝìûU®þ !¹ù6}~c Ïæù¾Xê N+Âc—ÛÅò™—èÖ³üSþ¦ø¼=ëÂêPÜ›§c>P8Aö3"0äÇbwy‡‹ë.—KͽÀ­!6CÃJÁ±‰~°ê1Tß!œÕVn=Ëoÿkg‘l&–„Ù'J]/µ½Ê ¹ùSi VŒ€¢½@ÂöQSH7èYš»)c*Pî<’à€úXÚ†¿¿ÝíƒßëI4Ž·P®£*ÉICJŒ%öTªbà×9¤žŽñ4 ›3UnTg½KŠ>ó­_ uîDÌ_Á@éŽíñ ÎM­Þ`ª4s¿0rå¼T2ш$5íðs`­»]N×»¨n¹ 5Йžnç h iADÒQ(âå6@RƒržA1  S€•9M ÇÀw‘§xñƒìSHÅG¤@R%(ÅçæÕ’-BiL‹…32Ÿé9êÁHrQ¯±Pxr‰7”8Ãt”¨`–¨†¯®N¾²´@‹ää´-D ”kz€É’FÅT¤üŒ %\.äv­¢6V쥜6\•5`& šòPD” ´ÎáÐêª[àrìœòŽY6Q)ÄA¼Î±>M}yźҒ³F)?mãÕx♣þ#˜ãX+}5•O?ñteu÷‰Å2´ì9Ûì®—êWìêÖ”(†XiÇV2•݉m=ΆbšÖoÐlûBý?¦©³WÆ(ùæÚ}ð³Â[=~ÿ5n|Ìë´¶i<;Þ‘}®ÿ¼×RŒv4êôŒ¢)ÔäDÜ–Qž¨]=âHb—%)@€°VÀ×ÀJø‰ä3JA ©€¤JÅÒ€)FÈ%J% ”!$“À¹B€Þ`¥QQ J"ˆˆšR” ÐÊ P<‘šeÍîP¢|¯oÍGN È"²²iå e (†xˆ'„M”yq €¥…©¯  ¤(c xÈTºž B5ÐyPc!E¥¦÷ d*è`å¡ ÈjBRè _Š„{XQü ð¡ñ MD…ºÓ@(…”:Ut"$Bì€7¨·„” Ž·©3ð'Ï!±ÂnÐ:ä(¦„>˜nP•°€o°"›|*!ÇÀ7¸<¸æÞ¤¿\'íúæâÚ:o0 <<+Ü@ó‚Dü0Žåå@ÐB‡ùŽnº@†"H€ ¤¡ˆQ4p¬ ÊÇûÄ(%(§Ä!ãÀüP(b k¡ 8Q" ¤*< ag€I@Ê/°¯ó ÿ 6}ZSÂô=ö:d9n¾H•Ð*Ú@6Ј\`h,¤Q Šÿô“*Ì@Z·f€B " ¡¥ èurMžR”1 J3ÐÜ è!0°¿”¡Vœ(W7×$ðO»|+~…Rh à÷ᢠ ti ÷Þ|“Ö„:Hc„+`ÿp§G î@>¤ø¯Ja¾KE,a­…?ì´*QJ„!C¼€vx¢ˆæ¤ÖB‘ (Tâc¿…8žšBÖ@!•„%‘g¡B²ZèQ Bˆ)ÇÂò0Ð÷ùùc:ßgÑå(E+ab<(la>¨zØZ ¸ÂJ!xøEe(M*F© ÊHB” B16øEš&„%«Àc¤*­¨Q DCM$¶¬Žáø`ó>oE¨ùp{Ç¥__ÛðÀ(ìà(ü€(ÕFÌh >X :ð 9 q€(õ@(Ö£Ä6¤C~1?M9D1¢E™òþ‡"Sã½OÈs Ä[‹ñÂÖ‡\ íΡ-€ª„@Búyœ“[ñ3;¢³þõæ"¿©PÅÅ1 Nt!¹(r9Žš$x­‚6C«Ø+>M€œs_2ÉQó2ÛjŽï™|­ô?½wdwýuLkËfRxûT'óuYW­Þ0Ës¸/*%zç½;-B7dvÍmE}ë3pŒóùÊ|ÜÈÀK ÿ©|±;^ék³ JzJ¦ù|}Ç%RO؃Ðp2otÍÓ¨´¼EáwÍ„M ÂqÍh¨e‡ðåÖÑ‚œ T3CJ„¤_M­Î™NÁ ÓÞhD›#\ëì}p,»ÿˆý%옷^·›~¬~[wßÊ»+üç,úç êÄDÁUü'gEì~õM·‘¾LêÊü‹©ŸÐ õ –îˆæwËðÇÿ¬ƒ TËU¢èͰ÷XnÆ&\ÙÈ“‘%SwGg¤:;§¶’Bðã°s c<Æa‰D¬Ñ1Ù>\&Š6GDü0øE» BÛñ»‰ñ&ª¹{·Ï6jš„\l>Ìò¬ÉJ‹áöPâš¶c-ŽÆC¿m[œÿÇ¥&ÜwQp…çÀaJL¼ç„\ýºõõ“úïj·bØÓ–.g1byQ›LJÎ=‹o›­ŽCÐι {—[ìF'rOGSç ݯïƒfPã=ëÎ1ÞŸq¯Ç+?™v$œÆÙh(œ ›ƒ3áTYµ´Ñ³ºXî/¿¦ÿ§îI¯rø'¦.‘ÿIßÇö?_k &f«¶{ÐÞ\^AÕ'w(ÛªËL¥Õýv¸æØöm8fyÁ0Ò\:¢ÄŠœYøk¼¥“ʆÈÉ×n "æœG¢UÞÖé½e“QÖÌÔçú–~Ãìá¶4BGïä³{G¾Suå"H¿²tm—ÇMéÉ<ÿ©—êBƒSW=9#D[òžY«é-y‹8œ¢gQ?ÿfOSyždy¯«ãæ±ÐpãÛnõõ/=½(“ýæžkÛ—’‘õÛ24 AÄ0"ŒH<бóu=Zb/C?ÞëÔA Àô¦®'Zøl9÷†Rõ]æ3g‹Zä"‹Æ?§Ç[Ýñf$p»¢Ìw±„‰ÞcÿÑu…0„Màr¯·HVóŽ ÅMfîcTwú®¬`…@úŽQ_/|àbÒî°Õâ÷ ™9B6©Ô¦*A:ÑKð÷°ÛVAP¿L"|ZpD=ø~eaŒ’>$fmÞ£\é5nî'ت#}ä¿)kÇW ¡®þž1±3ëò½ÙÞžñ¨‹K½JŸcŠ¬æ¯ºM*ñ`ÊKØþ×¼š‰o,†8~G-d½º,¥¸ÏÃ}#üTÄq4ðÔfgÊóÒkl0Ï÷^ –|ø¦qäN¿Cé£%%ð“nތÎàkƒ @Ò´:¤€|VÒ»÷J†Fc‹séʘ)ï–8Î1F %$g#å›Ù$’à› jP‚,ôŒã¹TB£yžz✛p{¥k{½ŸUÎ|HÔ>+;—¾è_誷øÖ)Õ. ùýïcâÆÙS­óYì÷>?£Ò¿WVàtF‰£>ÇÓû DGyØgÂí‚ãén H7.àyæ['Ï,êòº\v<.9š ->xâ?iób©d±c!Tg¶T SɉoÍHUÝÏFh€*WD™Ç0RÌߊ¢BÁôIÈ»m°J)ÄvŸ7uús˜Cü‘ÃgÓb¹øEµÃÅÚ·¸zg#.~ß„myº:î΋5ˆ–‚&¶/źkàA¥æQZÛõÎc·ÝTšÚ —ê´þÛ×xWû2©NMoÃÅ!B¢Œ»`ÁV¿=å‘“-2ÀÛ7húîÞ`û´mSB”ÏIB'¡mÿ/4•{vÃN–ß ØhÆË}–›¥-ÊùfE®m[w6ºG`>O;G¥P4ñ:”¶zó¢„fÊŽìGÛºóð;EŠS³|ƒÿG€Y/€´ñ¿8ù—×,M± ÚÂ=öõd—q7‘xX5ËåjNs÷ZÝ÷GXåPŠ R÷çÊ?;ˆÒ=‡a}Xß¼“)æé¤åf"&S¥vP ™q[•øÍæ`ð ¡1÷H6·¼Å:rn{|€d!¤v¤C\eSÌãçþ´ìGvZ\[8ÐüpùÕ¾ÆÂýÌ"Ý$ø àûJsøC˜ö_Mê³j3éi~T&%Î pWìÉ2U!ñg!0#÷üÞÞ[Å>×ÚúÔg3¦ÇÃGf¢qGßôgü[ §ùš?Ά-ò©Í¼vr46 l»ëæ^äK½‰Ä?ïXsLø w«†æÿ‘k¨%D%/w*¶Êv™›2m»ä'°yo p®€÷öÊ)º÷ÍiÃ_–^G}]w‚f<ÈHî÷–ÏÌ+ù˜áž'R&ìïy`(òk‘RœF›Œ`„Ð`#ÇÈÖêL·¹R#t ?̈£K9ˆîÆRZçšc†˜ M±¼t*ˆ‚ɺÖló °6GãØ@'Šð…që}®R—°]–ꙉñbq–zäÉsz_ ÒÀ~Õ!ùê Å2¢l§!†ÂÞ¿„ k(ÁJh¥b""'1ÚôÆŸ^¦»z<Õûã¦ëþ…,×€ ¬´S øÃ¨-Ÿ] ϺØQKTDô&,5ºUmX× 5ßÒ$x~ËŒråäðŠz3Î7ðsÝŠzC/¥rPŒYgx€„¥zýM“[¯ôºÓú³^§ÃS¥dým§ ôª@6?}‰@6¤ÇJ…(J•AÖ¬øéš¼…rkY•Ðh7]-‚Â/ñž©ŒxÐø~T»xÆN:WàjiŸ²iá3Æ‘Þi˜±?aÛ¯Ôn1^o—L/¯KXÅÕïd·¸>Z·Puè³ñøQ,A®b-®¦zµGÒÒwÚge^£a"þd¢òkÒÖ«›í°²˜‹¡ñÞW_bñ¸àE´wùˆ1Œ@AÞ¶¿Ð« >U ´O'#½Pñÿôj@1ØæF8ÄCTpÚÏïÕK „»§pƒtÿHÐ/bÃ;ÆÜöcR2º:²H1"]Ë–‚}Ý:$:(…Ào_6çEô1—¡gµï‡­'¬w5r±¿>ÁB¬kŸ¬@BcäÍÄÿFÍë§ñ½£×±0_£¤¨?¶bóåÀMÂ’í†NyA¾UÀ×eÂÜqØ*+×üîV )écÔgGÏ,΂‚¡Gu¥=ó¸ÚSü`y¬ì“lÕû7¬p—gÇçL¢ï|7Ro›À•çnáfåžÈ×}É¿@+$@7c xߪ'öógµ77§¬Y•ŠÊé£käPÜî¨Vq sGó<ëvë®—äqÄ¢i«ñÜØ ƒUa/©¡V‰Ûq#?Ån÷ºüM°ØaÎ]ÎÞþÛô>„$ú³A¿ï;Í–ï9x4¾Ñy¯VÜ1*á3L‹V©L÷Ð#€6ñ"á!ÚV&Ó"@uÌŒ—ÜR×þ:B0x+>S75U€ÔŽÄÊê¬Ê(Bmw„cH’‰§`Q QpÊPy,¤’ㄌ‘`ŸÉjˆùúœ8n,¿Ä·ÏÎuû¸ŒMÓ-.6e¢-çÃ^á_F=¶D°(ø`àÊc²¿öT­ÂeÇç˜Xp9XPä£W–÷ ®^T©~aÒSŸ¡ ,·©"ÖùÇ…“ÃXãû@®”dꇜã—S0çÈ$k&ï…,pH/ÒI.ú‚¢-S¼Û3*öt†§ üâ…ds‘él:Ø©\Ÿ,áŒdVÅ\MB,27]†Lld÷ä°1óIëWý0NK:Gn¶Ñ›áEi'Ïóq/¿3R¢cÙgªóŸÚ)²…ù±‚+ó]îë D ®(ߤZŠà<ÌÏI‘›Oér~‡a~ÿ+üºÎ¦ßC•Y²r@hwÛT´ûÀUÌlˆ¿.D™> ̰ ”EDo‚ÚùhÕÆl1$yž×Ü™Ê]ÓU ûP6øfs¡ùçºËΈìÙõFÝdÿ«½äïϳˆ/wô©’¯£ã 1äc„Œ+ak¬¦g±§8ã´} ¯¨|›ç‘#Ã;'ž`‡¥è·$/©²,µÙÔÆQΧ­’ß7F=ü¢#ÇhõÎÌ‹=J¬žp $RqŸ Ι‚@r8œƒiè¾³ã$`²€uÅC'ö!Ó,,k‚óI"ù×úU€àGþŠwŠwƒ”¯3p:´ ºŸjx>[ÎÕ)ÕÛ›{{%ŠÁwÑÛqHФ)9C 푵Ém0ªdL­r7ZN'“¼ÖdïÖ.Kf¸ÓæGïGFŠú4«¦žm[Óâü.ý§;J qN¯V”Á“ õ(‚FÍQü%ÆEò¡¿|AcóºÐŸ?-®¾['ç•øÔa‘? 'Zßd¯/àÁ×£þËÐÇásüw Æf—æá7ïÈQàǎľ”¼ [Š:½pï…÷;÷Qÿ‹Ú§>wvñË|rd±¦Ø7›ÇàzC§ "±çû[ÆwÅjÖëܸJÑS§‡“ƒ Dw 4‰oSO‹žbH‹ ‘Š?[ Âuö>ëg ËýZšÕuÝ·Koi»o¼¡uB'µs] õ¨,¬í~ 3”BôÆFîáãp áb ÄG÷qÿ›5’edsÜãcø9â–v …¿Qr;oÕ{ø³;Æ«## ßaĆ6Üþ˜B<:<·~¬*΄‚áñõ¿ê‚Úƒòû’¿ò èò\lšÜ|yÔÌ¡,)ĵ(ÄGFS3%_ÒèÞ¼³Ô†³[2P­!ÏCè"˜=®½Ñ0¨Š¼Ò¥Æ­O4ó®˜/sÒà2“@‚ض×m\æš+£á¿ékUÍzèÜŒ‰ÓÀKæ-µÁþM®hjÜþ÷Šlò#‚ '-—XÖ¼8*Ãr¾ý†‰ º©Ÿeš‡îã'¬Äõ3´}4KÎ!ÝØ—_ÖP5/&Ì}‰ZO†^Çóªù­ÆÜ³ÚŽ×G•™öëú#õ'à·¬Þ±*į¹û5z –òT³U›Qð®ÌGí«]œÉYv¦—ÊqŽ&5§ð¿êMÉ"]Y%Û_ib+­‡†Pâ¹#"Vþ+¦¿T‚Stä *Ž­Üä I³æA`7rë%Y¼­¯5®ŽÇg Œ]±œü±~×l®ZùCà 3 pšZ§<ŠbŨdX`¹ÐçM¾ì|íàB?ç_6ß&8ˆÍ¹{–ho·¿’.]_“ÿs$þ© וÉ áª>kê¹ÆOƒ˜æ„‰ÔM¿ºÜdÓ3ÇäŸi¢ð½Ÿ/o»J©Et­*Y‘w”Vwùv üº Û_R[ù{;‘=4}ó7YHÝTnq¹˜*J±O_ܱ¤è‘cº ˆ·½iÚ›AŠm.rŸ UJsÞŒn?+ Ud¸b3"—"\ü¢-Œóó¼Æ8v}†Ò–¦PIÉ;aãwO“‰ÔIéMÝúÎx¾®¦)óHÀ±LnEù°<üŽÔ‹F O…E–^¶‡¶iÈçøù>wíïûŸ{3ÓÀ ÝØ„#øÊÏÈ” $¤QE#o+¸Ó¾ÌåÎCºøOgŸ… ÉñZ‰ß÷fùIüWÍå›]x^ï•ZÛùÖÊej›Q¢YÍ£Åmw=t;ørw(JVÀì³+ôrÚ-çS~§d&ø”ýß³aÁƨÅW–±àÌsUPF! ‚‚¥­·Ú©‚ºù8¼ `¹;xÈÊPÂVSSO%ƒ–]»¡iP²/hyNC×ß»ç^ýù#ñÚõ`èrì/áRV7K«Fò 7°ÿ4£;M¶ŸËÓZ`GfË¿\ÓG®ânµ18Wg†¿r®ˆýHÍäcF-û³¦ÇGN-ν®Üéÿ.K®È-øÔþ^Vdå]©KXFW`z^ Éÿ袪d7­D-äÇ;‘¼‚‚ºñc.DˆB];/õ)¯šÝCÌ›ìÙ¼•D` ï}ÀhÞ+zŠýçà¬/–×"j޳꛻ºØtÑû^€é:ª¬zÆe .r½©¹bʘ~_ïÌ@¹Q1¿¾¦ƒs'Yb£d97Šƒµ,Ö§üÓ¹Ò;¼"ÚcD%!²ijŒÎìtèãã€j0¡BAüeø¹=&c xùÎ_)Þâð&eszw| V'nÕ&“MO«ySÀýÓbÐ0þ£VÁþp€™6EC ["î GnÆÂ«ÆoKRAδD|ΕJÝâ®ÖÊ=üÒ,ÐðVÝ"ŒðËsMa’;)¬seäYñôÐR³¼Ý¼3ÀÜì¬T»ªTrŸòÝ^ŒÊU`à¬ÿûwóÍã¬wù$KA8T¯Ý?Æœ·›Ù¶|,Þñí$ê#Õxp‡é5T ”6–FC;ëØÚ<‚L€#5è æ¦0 1…™«ã… 8Û¢¿¼¯Áø¼N ìú¿Æ™óʦK-.–zú=ùsp|å§|Ú¿—óŸùL‹yI¤>æüGœÅ$D£‘HJjÓŽÃ(€B\Ó¢I‚ŒèéL#¶*÷HHQ9Ù½}Ö e´RÖ|Gö(Ââ¯íŠ—T°aÿF $TT€Ÿéxä¬Ý”ÜºðŠ…¼æPÚ)°£7µÚ¤¦Í÷úv3Æ4a’z Çmd·¶1Ô'‚!­üÒhÄðQ6E)„"‚‚´L¬Úsö¶=Óò6W <ÆS‘…çRã*Y „ú“çà%¼Þ³f[d§c°¤··ŸM.AM|3|cœÕ¤C*:¨„ k_ìÐä‡A©ŒD áÞš‡¿3 6S¯¦g\+_q»FÉ7¸¢šo7»½¸Ç$¨ÏMSŸ`9ú=Šy óO„‘„#Ì«õ鱊¼<³{uXWÄÎ:w3˜²Œš{™ t™A2_6ŠÁ×-÷‰Î£§Æ9fZ¤'p˜´w¿óú6ïìÎaãÃÚ'Ÿ³ôm‹Ÿaí$²ŽM–öcêÔßÑ•‰@ÌLÏ2ô|â|@kÙˆw-£®¢"¡úÒKwÀÏ®5¾š²u)”üþp­gýé-¨-ï(Ûž›s4ˆÃ]z¸ræü®"àw[óïî² –Ù܉óEO-¶w̽!ÊRƒ‘š5ãÌ Z'RQç嘤uò‚^¼þNŸ(ÒØ;Fýê«?KÔÑÞœãå]׺'ªø½›u—õ¯÷³qÊýž¶ýÇ©uc¢âw—YÏ¡ÏHïÇÛà›üW5×ìöig9r7q=GBñ.ר*ò$6B2†"G•Óý6U¿Ÿ}¾ü¹ûqœß)ó©FÆ€™Òæ‘’Vt±>DÓõG~ƒÔ l„‹"2ÚܳtâôhVIÈ'*áÈsó¬v)üç1è›[ÙŠ·æ*LªÄéšö´|3¿ Q ˜.Äõ°o¾»š7UÈCˆ¹Z:°XûŒJE30½–×ö’ççÌä|9®¾Ùy£·…ý0W*à ¡T䑦p%ØDéçlç*®­¢oŸ‚zð­žmL¶­ ¹"ýÈ­âÿn˧ Ÿ½N<†EL%6È ?φ£¦Ø(’ÐÁë'/ÁrþZ¤`ajÍïYÞ/ãyBd)ìíóÔ„ „ôë›ÝÅ^C׸u\>ø²òqØ¡v=OEU¶þ¬|ìe͘”Ôî­ZNþ9‡™GéÕ׬NòÝý¡Ö6ÇŠN.q+½P‘uÜI¦(Ä%…¸Â¢7P—ÄàQûtSHTŸ2Â#“Õæþ›Ç<‘F·ÄY—ŸMŸ7IèË&ÆIbÖj(SÌ$k™îâ§›¡Š`×÷‹š™Á{ÅI3d¼ÿ™ÔÿÖ¼xk(òNÆåÌâY<«‡qëe.+¨ŒÇÚ_,­ÅüŸµ,täJ>ØH ÆLKãä£/¡]ñçX} ^oxÜ”Á ]YÁyæÕ¬” LîÙ 2Š«þÌo¥i•—Ø´åóe’›TòqÀÙhó%CÅy²«ÖN— ð˜³Zû'?µC¸$ÃW*ïŽÚM!Ñ59ñ²ˆy¥‡°qȬjL(‚¿Së?¥aë„øÐ@]PôÅ–·2×l'slc‹A•wíÃÛ}ËDªÌm>*˜s\;©ÆÂÆýuÓÇwqDjâ‰põgûJô޲‡Ì·ýbCC^Ü DH›ÊÔ¢õ/Zi³˜î6ó™r¡‡“añwFèÑ¢$”ÖU¥#©”P&˜¯´>8u0ê?Áã[…o$½[ÄÍË)`îµzéçœ)Þ)¨žHÞ†¿ø€`¨ÉCŽBÂ9€'4ö Xo¿³ž;CÜóÂIoPšw¥õ™°‹£=Qm´±Í›!Jló¿O3³äd@)_r—Ë ¬|ŸP-ŒÜÀuH¶ž"£‹D»#ëÔZÌX§|(SO„SÖo÷ ‰‘â–óN._Tì\0°ßßuõ×ùˆª5îÜÁx0¨ÿ–*Ê¢Í 2£’‰[¸‘z)LŽ™Áͦå#d^éuóï\BqpôF¿§£" n– Ÿ~}êRNHpj k1¿£fBéÇŠ?Y@Îv„A/¦,¬5-ÒÖEø‰¥+4Ágœþõ"#ÝöÉ‘Y¤TRŸÐDôª6Õüù§¯vÒô^Ýî ÁšÎ+[¢4I'»¸hçÖ)ã!FöN,|ð …ˆÁða·ß/.¤ñÿâàwΨ K€'iyùŒŒïÛ@ÅGißO^Ó§¢ké¨Ç8û%,èáZ ³î·ÃA;nÙ€q¥5“@T ü¹F•:^NkÕ[ ǩةùNò¯0ünuÔžf÷—ÁQZYÛµ°yÇò/ղ<7w$Ý ?ÍUDÒ vfR« |#ÐF|•Òη•_×öLÇ(h®.ì»ý²·NË«÷ÿ›ºÚ•7²µúIq ?JúìZcšçhSûuמ*µÖû ÂUDKc1ËUR´Ge„‰ Hˆ_üÒú½?s†Oz’ð¶ò¡QÖ®Üô"]ÌAz^´þ¨´Æì‹ÿmÈÊ4e$ñ19½–q<œ‡áxìLÙÎÒ¬gÎý:bhûÍI‚iϬoy2‡{óLC;r $ž¬Mg˜Áç⮓³,AÁ=SùôØØ°þ IÎÑ íZ@¦xKãõúР˜,KùŒ˜Š¢«CÚWµùJë2y@¤7ÎúÚ–Ë5PÂÐþƒFV †ÍvÊô@!±tÞÝß"úá{Û›îW^;N{ǤoNãrî¢9F³UŒgÄQìxë8ªE·?Á2b ®†f‡òËU,uÅ3‰<ƒr)¶_ $PLIº(a ³*¤4±€’ ¬,OqŸ½Üçý–}fø« h&µjÛ¯©öõ7ñ›<ŧ)¸c/u¨cÞ3d4LÒˆVì/C̿ɭÑóß­ùŒ£BÑfOà`÷¨4×væ.Sý¦=‚ñN†|8TÇB]M^ëóOekNu æ”=5äU‘d Ôè*ö.ŽDÔCúVû¯q €Ïn¥Ðtçû¥²¾¤¸V¸ÅY …½W¤=™Sqw¥vAdò™+Ñd"ÂY!:`äv^+cÒ¿ VSUYÝ2¢^~Ë[äàáåþ÷–ö2¾e»›0oòîÏÎ~ì˜Ô›|Üõ|QÓjS.%ÏÛÏ]]šR½_íYß—V[ì€ÊQÌÕÈÊ#gÆ|hö%¨D>eÜâŒIK%axúyã ¦ð¿ÃŠXw_[<9¿Ý¹{Kþõ£-Î[´îƒô2zÜ“Ëþck¶àö8u(çzBšñÚáÜc—#® K²¦íËLìÖŸuÁ"]Ù«ÌNªQ—a0*í·ÂÁa5£ ]„e4îõñƒÔèH]§aDy‹J)‡qp96¸@i.'„{u%óÇÐ>%ŔΔVxŽûpX¥J….ÌË%EG»d“å«ïÛë^C¿9kV%¯74…{ßš, x-朅h1rç [Ó)ÌàÌÂÊýIÙè¤I“ùˆ`‘%‰@D%v…1Ç>¢jÖfäÀÔaéÍnxKªUt÷F[\Sá)]š( ì—Ôv'¬D\É{~U¡ÅÙ˜6q»FcòÍR÷bT¨±ž~ᥠa–£Ê€…¶¹Ìho•Ƶ4 ¹R=7/®æ2.ð&T;¤#g¿ MU¸B%†³#LË6ë±ëè*9…Àè{çŒö"×ÉI,åÏAÚ2oÛ¥­J¦CÚÊ'ÞÛ*ùÛø}?ŸÃjï¨D]Ý0I&˜¯¹,Ä7zeä¦Ç¼ïÞö­fúÑSÕkó˜S¢o¦¹ßÓC~Úg¤ý0wpÜ .·~ ¥(ô (ù¼Ö=õFùZžÁ¸}™–Èe犜ïuÿ~ß­¥åÂóQ£MbwÂSÆ]iAøý#Ø9¦`rÉt8bJíT´H0k7œD "Љúly€×ðÛžz¶×+ô–«j™Î#‘P8v„Ш±‚6 s]R÷/¶Ôjž$:–FK¦µ—[ÊÆ¾µbjm÷ü‰ñØO …¼·AîÀq€`çŽ8 Ùkï¬Xúñµu"ž0ù¸Çü®5Re^â \‡:+vDê7¶üÎÛäæVBpC¶~’> Áp”~ «¤¯TÐ}¿™Ê„ˆÌu_nQ„>üpøhé³eÀpné‹Jv-bwê•.ƒÄ{Sãè·&Ûx?[TîÎfîLÄÇ9cü‘á”dØ—JÄKñ=„ëô¹-»†«é¹z¾?wî3×çóI zDz]ÚcžgbÈûwÍdvÜ¿sW)Jëq·¥Rž12@OB$l6¥•W]hwf&;ƒýÙ5$pŒš¶ôq³v'ÌÍHŠðB²„ü¢ç‘‰‹‹ÛmpœÃ(‹Z>cåüØ7† ËwýÞ´jö‹çî©D±Ît6h¡(yF?R’à>4W;Ðü‚\$ÝÕÊi­XˆsÚ‚R7Ž^´&"€÷‹¥poåjýóa-9ôâI‚ƒ¢@ˆ‹D”+óÎÍö©™?¿W^Éd²c~l÷ã\áU8Ç‚XxEó›09Ñ“ ±Žžü°Äv{¿„"JãHé£tšÎ ÕYŸÈ*˜;\J™—Ûøqy@!0Æ!À=V˜’ü9Èã°íáGf!ú{®‡©ið“gÏ/‰èOìsR& ºôfÿzR¾Ï^±gq0]ßS0¼YB[ܱ´CГq.ñ»™]æœ30‘Ϭ°®¸¸¡ z0@l¨ˆýµ¸-ÓèŠý6Exp_[HºwF[7ﳕòîßvBÏoržÂqϨ ´>€ƒ¼H1}~ŽTf&]œªA€XޏÓõ Ád|ò' K µxCÀâ;®èŠcå÷詳ÓÙ½²«‰loCæš‚†±ÜçU±Á1p%ŸµŠMvÚèŸâÌQ Uƒ^ID?^_>j[8Ræ sò‹c»FL¢á€Å¬¦IoI¼r)žT¥¢šXl•É|¿ÀåˆßhO™|¬ì13Y˺ÎZdØJÂ^—÷¶¢ˆU`(оseÊÜ;ª{ž«#=³ãsœ–zŒná&ÕL0à”»räk9Í)oÜ途§"ÛØû¹Ë›‰´ø:/„?Ƭ5_A1¶~Pϱ·¯t¥ $I 1A‘ÀCCBD D @#¿ÀY½ÍÄ ˆeßx¾W³´æ~—ñjý´Ñ:åz¼'•{ØÓùs8¥kŽ¥#ýêvY¼œŽëaÕ¨Ö{Æ?€÷òÅóäNƬ91‡y.Ìa0,v]D\AŽØÞî uð7žØÒ|ûÞ5 æÇ„êJÎÇHDÎO\|à¿QŸ¸g*µ`AIM¥ˆÄ „$I BH( /ož¯;¦á&°ˆ5ãy— i´ÿ‹ûñÚ¤ŸÕëÆ4CáY1dWÿ(ÆÊüH(ž_ÖWÃQ€«¯xêJm¤ê _²úvýë˙ӬŒ÷+¢Þð3î}{o¡W–ÏÒÌÍŸTÍÑ~§(•…»o ß¡¢H”fLÄYïäP=ýËÌ7•Š‹Vذcé•ê²ÜVp-ìcƒ úk¥PÕÑäâê·åŠ|;ò"ÿçX›:DµÔؽCSyÿ• ì øúÊfŒ”“­L b$ ˆóµN`?¡¡ ŒõAÛ•O½gcM-ú: 5 :xh`GT‚oŠžd¿—¨ON€†Žú†ÐŒ«R:®pðNÝ#Sד{Ò”Äóm±¬b“3äUúÝÕi×®RÓ1YîÑr=F¹Ú£;ê1ϻЙ=mõ?Æ”`— ýƒú"׋Q•¹V%—Š–– õfr:JûJElzƒ‡'hV*§ ‘0ÊbXσQ!•XP€H()€‚hŸVòÁ4ÿ4H\ÕùÈM2˜5øµÞKƒbù8ýÑZ˜lú½–ˆBp#sñíiŽýuÁ–0À*ô’­ŠctÂx>—Ði‡ßSßjp…ãš°Cƒ½Æ¦6ÿt/¼„Ã%é™ôÔ£æ”@+4jÕTçŸøÿÁêûšyÇÍ;T¬a\â>'ã‘Î2|F'´û-&ü:˜Æ¹º'€yªxýStk®÷¸ísŸÆž•h¤!¿ŒTƒ“£ ç!25½>óo1¤o’~ǯúk¶N¦yåö}ò²z6äßd$‰fŸõXJU[ XмÙ,þZˆÕÁ{iºûÆuæ!?Ý” ¤lxß61ŠÚµx¹ED¤ä£_ÐÛvÊ‘ßõ·iä¡­˜êjˆcÌ Ü_ïáoòõ#[ך³YìÄGeÁK¬rçKUQ@h!J’¨H¤i®rºsôëÆë$ÕÚËîu\·ëÓ¥*°™›K!­îÞž²ÍŠ |ánoÑw—ø0£†¨ùóõ=|ËíØÂýk©Ÿ:é…[`r ƒÒAnë/ÏSàþð+ F›íý þ{Z36=ì¯Ìövu“*®Ö}ëáá¤ï÷È,®BÑg«Ê_ 2D A"ìæÏ'Nœd+RºêÆÌþMò‰µº÷Ÿ$m>[ô <=¢è1Æ+^Ôîójþ‰Î Å>”kDØÚœF•ã5²‘<‰»üý߯Fwè«[œ¡”dG»üy´çd×Â7éëò¬t/«œr8æ÷—˜ØZ%2^ VFšÁ#ÅÑÌ[¦@ˆ‘êþr=EžÇýÔ4^ªhìÛß¹{ûCNrAµFö½i©òéGâ*wãØ ˜QP„ÍL?KƒÉÁ0+ŒŸ´vD8˜h´œ¨ÈSÿIZ¢ðC/¯pR?séó \ªVøÜX¾¿ßÈöm§ôÜØ¸—x²QŽèÌrw¹9+÷^a=&¹sï `<«HÜq¥aôÚLèV4׿ˆ®ÿæu¬%Ë«LkŽîZ}¬º˜³úBO¢ÇFPŸÑ|w^Ï%H²iR`ÕãYÙp]]Ç%#®•s Îi›óîÌ› áðÊ%óþ ÌóÍ~͹˜­’„ÙT=’%!PœÍ°Ç/ßzD ÔÛíü?k}ähYdâ-I_A#»sx®¶±K4FÊ…¹h*”¶ ŠØà¡Ïv¶è‰Nç˜ÿ±'s€„%° T ù¾‚æŠ^n—‚G ó.G·lAw^ŽžùŽê‡¶iŽÁDvh¿¨c¶«è±‡ÿŒޏ&ø|C»ÞsDÎ&,µc¯UÝ;X¬”4wŠöösf>‚C617ÕK­dbšR/ÛøuÛå'йñÏoÙ¬nl©6g Ø’ñJªŒwˆ‹í{!‹•}f0E:âÚ•ä="»ˆH!¡|m¹z•ìuùnD…&ò (BBJTURBƒp’ÏæýWHNþ¾ÛŽ—,âƒü©<™• ë´.Å¢[ZÝ£Z MÆ!e[¡Ë ’¬ã ™É¶ô<¼òÜûâ`÷[y%ÿçÞ(‹S[ª†_ÛÑÖî¶ÚUL€4†£@lgwÇŽ™¸¹½ô·¿ ÅÞNʰö+é>ë‡fì®ò„ü¬Ä†@ý¦ì® ´%¡wü%ûO2þ·ر!´n^•‹äx~OdmIøîø¶JÝZÖŽë]©^4G+<ð°QÕ£Ñg°pUù?ßñml&KQëH4(º‡˜Øc«Ì9?´Ïz»Â¥Ã\ 3j¶233;µêž«0Ÿ'þ¢ø7•˜ ! øó~„G[Ð@ïÓãê]íQ¼š®CyIŸöë¦Õ­ä½ûôŠÎã¹*³³§˜CÏOŠ‘‚ŸÖñúÀÙú³>úBbrõYÉÊãê¯Úñ!Ò®$Üžïâ$>÷¾t” Ÿ~\OVǬæÈýGÖ£h£í)Üø3µÖÆuÄ™'ÁIYàr×@Ò/Û¼Tç1ù}IÖ¯û:”‘“·;–4„nòqFýÉëepA‡Qú†£Q[ºÄ¢õwoðÝ¥Åévóµ%õâto8Ï‚ï3I(¥&±Ø;÷¥h• gе/Ÿ²@%FìP4 ‘"bb1Óà{FL p-TkOÈ—ÆÅî-þ!‚F•¶<ŸGÆB1&/Å=7È_ƒ]Bth›û,:X:š]ó&ÚýçùeÏ^#¡Ê$¶µ“0¶fcPä¦!hvh‘Üìæ¶U“wŽ¥òqÝÝÊ5î’ÉËe쀕ŠDE™„Ó¬È0 ²~ï\Õ¬Q}6zõàþöÊœüýãÞï&–³ŽË]öÒϵ!ôTÆè̦*Òô‰öò¼ÃȬ)ƉB#ƒ,±Â6vãlmÁâý& ˆ0ö‘$T¸¹å²–:Xɽaî\GìºÙa?B§"ŸÈô-ÒÝÂÀ KÿWÑ%†ƒn)$Zs»Îí÷÷[x{å/µüfÍ­Ñ?ˆ‡ÅðЙ­þg‹bÍŽÔ«×0]Ô° ÍnŒL@ÉÊæ’gßTè† ´¶¨2Á;yÏPÀ\ašÍ/ð‹ õ 9§îÔ…­ÝÅ>³àqÏ ”òë'îÆÿ®²ƒ˜[äœCGuÊKûZÃQÍ;\ò§¥©hóAÛ„yð( €^KÎ=З¥PŠŠqiJœjã·”cPj<³1–Ö B0Ww¨@SeÇV2ìã“Ý%Žˆúùq•¹[;+?NÌ‘n82‚i«(›ì©Zë@AªÎ×—ÇÏ¥:? ½~ƒ•2 :šŒç^Y Ü|rÌø*8=^ZÃÍÓ DKSÛõòÒòU žev…å¾þ­Y¶;Ÿî­S#Ž˜6ÏšD6ûñª^Å"ºz¹I”Þ Ò° D ”"å;en^6µ(¯¹GÒ( n¢™´¤[Çã@¼AÓ<‚ÕwãÔAÑdzk<º¯#[³ewxî‹3Jß¿â÷þÕšV9Ie‘˜“2ê+±Ã?ÛïZÚç¥o¥N@]ÁÐ}!÷ð¥’ÅC¸e,r¬¼K€÷ßèMEykd¸á\ë´TJ$Üٺ{£o¾ZÓÇî näP?+bD7øþJ…ÏwnæñÙ¨ jƒÜò’ ƒ>ªÞáé&m7n¯ÞÅ:·ìêóI³õ?ö¢¶ÄÚy #µ?Ù£½³¨òÆÃd$•ÉkxÑw¥(n¬ã›uöYËÏ73_³¥¸;òŠcÄ¢A~ý' Æ®(ÓPòkö¼.‰CCŠ^%RÔî[ãŸv½ø8óµ(aË9œ ìÁ`—U! „!&:•Ët›Æcûœ¤ñ~gÙÝ>Іo°TfŒlFGªÔ¨œy¤Líþñ{Œõ·knõßYì4O­ˆ5èàÌiª:V†ƒT†ÞÉø½¡ƒ“)o¼ˆ*(f¯!fq‚ØÏY5ê-½X&”žB§(~r¿-MOu+Þ°^¯VTVorã ø{94w™>¥æ–F ˆ H„ bˆR B ˆAˆ "†! ˆ ‚Béˆ!7Ù¦™çà;ØI¢ b!ˆH†!ˆ" k!-ЧÙ†´$.Zµ€ô"¾3Е —„ç`ZÎ@ìyÐ7ˆ^+àˆ~øÊÀø;ô·ÿ£;7Iç×ä:´nÏÈˆŠ¦$8¿² _V‚ ýÒ|š)~Ö{4ò;üõ «jv@'èòñ:ã, ƒjó—¨}qƒW,}ÅþçË¿_¶%££¾Ø²çÊ€¬Aþ•;Ÿ÷ ŒvFzÉÁþÕ©7—S?œTŠ–æðúYãÁãØUØ5ž‰ÔÅY~«•:Z"È$ðªü&äíÈ™.ò $ò.‘¶däC©³¡¡›@,,ß™OªÁïëӧݱtë‰/éCDj°€ÀPójöÍŽ)Êð~Îüÿ}Gâºý˜ñZñ²vŸSýœ>~N£e†RŸ‰ÓÅà@´øøW”üªF.ª„Þ[¶|D¬‘ ˆ DüÛÉgÓ’68º,×Û*†@·Gå!Ì7 7`@$4m @/iQ“-/o!ßæ9=5„/ÄAe¶*›ÞžI¨Ÿ±ðINëéàëè?6¹ú±U½²ÂÊÍéÙ‘ÍŒJ´]ïû~ƒ‚e¿C?ýÑS°1ã×0 #|Ö¦ý…‡RÎbן¶ePò.ËR­pÙˆu•YÉ7¦àËIç\î8WStòYE_ùy ãC~mOcS>i*Êtˆå 9ªb'2§Í€¬tÈqlåÆ*uNS<í² ƒÑPª<úÙ¹º Th¢lµ)1;X‰ã4À„Œ‰¸ÞÓú×-9R‰€¡Ï‹™ÉbüM”èj'÷Ôw×mrဟO=T'£>@'`@…-óü©£ì,ïY7@ˆœˆwc+š5mŸiÀg èÈß’oø0Or‹E¿"¡‚6Ô£²×C5š5ãNyնֻ˧=8Z…!£ËiÐHg\ŸôX äñ¦+J½l›æ?£@š„ð¢'d ¡)Mù@Œg’‘™·èsô¢M$ót)Ú×sõÃÅ«œØßCp5LZ ðräNvˆœú£zËì¼3&¿XÞ% ò…õs¸ûÛ$ÏNÅIAOV®Á„ÑÈÊ9GÃgX”›ã@ãvml¨JÔj÷]ff‘ü³û"Äwš‰”£ƒW×¼3ºö¨ÃðÔ¡O-E£y4²ŽÎ6ñë°9 üJæMÝ–2º¤·C—#e[þñÍþrù¨{ÄùÑ÷ߨ«”ŽûÀßÚ’ºŠÒ¥_'â=‰ˆ£é_dƒ@1%º]Ìžzë¸mPâ>¨\äÝ´2)$$¯Y›f×,K+~ Ó‘L>ÛÚÛ¸‹öî,´¥/gÜêÛ#NqHeñ³@G±®J²q:}Ó¼"8†ÁaQÕ÷-¿z<êd&1äš³º`œø°òýsq ô­yv–©×ù ä¯8¶Èeó<§ãš&лØ},¶‰H§H%  0môZ$Kö¼mK€xñY´[üÂâÀÛ•Úí«!mU1f7–õt ±q¯ ÍÆjO½O³vVv?•ì‚ÐÍ!¿>ýaq`ÂŽtŽÀ8xIñ¶&Ù-\Í?®Z2hÕ„Ž]äÎáÃÒí,¶×ìõêð® Ù%Á·Fz*Êño˜D¿)²¼išäík¥iЗ9äžõã#ŽG3:\ÿØàÙF35ÓÒIxå€]Aã´0¿rÛ®E,… !Ý+Þt žÖ “äŒF_Ap¾^&Á0¨1óˆöTºº×1„5oÁÛ#]àå¹ÿº‚„†8Ũ8¦N(÷#ë¬\ß Î6ÇV¸\%ÇDp@íØ~[DöËûßçòÐ<|¸¢ÕpKhàÝÉÈT †Ž·ëæÂI?–ˆÄ`@w9Øt„ªºµÇ ¬QÁ9åLÝzöîdsƒ’RkUzÈB"çÂ-ô’‰²ÈÏÁç,|^K(Ãðãfì-~ÚSÞ£\¸Vã¼j<½{^üÀ(éÀQÀÈ΢à婯¥r½û¤âFµ(>wßãà;ÁŠ,ÿ³ˆ ðÿ¡nqÇx ¤2 eKþé;Á”e‹Ø£"&±ä "-;3:¶-¼+ÇæÊ® æ@tµaÄñRdîãøøGƒUag™€µ(âbˆÂ;4~qtú & ÿ¿$ ½“ªãjyûòéY…¶‹ì0Q(Ÿèê:ž•>&…ɧs%6mÖ±QÀìèOÚš$ "3ÀðkpôýÂMã—¯;¶3ßàÁéYMé’„2¾y ¦l1ŸÇ¾ _,¯Ð$ •£‹üô?O³Ôgpñ#9è‘ïÝNx=‚K Få0†è²4R+Ì7ÚËWë 3 ÚRî!€©ÁŧbX9ñ‰ÅÌ9„'Þ×Þ•–=!‡+^@f~ôX¤„îWõ±m‰õåíµÀêIÓß¾_Êî@Õ¸ø’ÓdŽÆhŽÀðÍôìÊ Âå®/M¶lktOÝ0òy¬ïMÄÎS«)ôXÿ¯³’¡†/c>ôßq§›¢²ýáþ'­ÿÂa8˜+obÙ²žÜè ' ¼©~š1¿šû¡Çœ3—MÑ ,ÎN¾ðøÛ‰þœ‘QפA !r!"6ŒÇÅ?_Ôýý×Q³qzæ:{þ™ä@Afƒ=›Ã6c©ŸÍoëK‡Öfåý… »SÕNX»Z©‰€^=&? ÌkôuL‡Åÿñ–Î2¢sçä[oºÉÔmŽ{Í—ô¸xô@‰Å41¥ü’$´†ª R¡A ¢ùb«¿å²ãËÛq}/d×ï[§†˜|m>/7Û¡”Ï÷oz¸ÆW'Yv¤ °ë’‡Á¦#;Ñe©rÛA«Ž43ìVòˆh–:½³¶÷øÚÛSŒýüß׆fùysÛâÎ&^ÊY—Íб4Ñq7b6Þ i2+<3KžÍ‰ç£¬äàêõqèÔKGéJˆ“ú¼Z“‹›ã«Âþ‚ÍÎí8¹K¨,\=mw->?À6Å,€Ä´>oô¹jRÒ\Xze™w7{1ú©_Bá—,©^ô}—<:Wجÿy$¼M埀AVõ8‡ˆnÁ¡¬‡yY†0|³ÍD2®=4ç9~idÔ ÄϱoHÓÇü7dHn7BÂDÕXvÉ:ãœ`=‘NÏŠ DM@$¦IÆð©wØxåaÑQßÃ(|ÅÒl²yíx­‹Ë-B7÷’øº&öOWO¦ç~[»…nàûxàÊiiÖÎÝuͤín½¼#%c~Z DéýuQ5 ,ºUv“s…²°r¯ãn#í·÷Ô¼©à+r¾=õ8'Æ8nKÝ_ubªP¥1¤H€À½úBz Ÿ¹ëNoZ¼˜o>U-ªAÀ^^, Dþ…wh°>£(çöï‘È™¹ñ&=0Ïý#/–+¹îE“B-1 ãÚ€`{DÃéñK9§0)]rI…!¯ T(ƒãÞ:À$0ÐùÌÉæF®ºÝ¿C7*ôº&ºG¾,ØòQ½XéD»Ð@€•@ £€û»Ä×KA›áí¼efòÈÕS>œ·ZÏÕHÿCïî%tkt­fÙüAËë’Ê›”"!Æ?(¿ÝG¤(oÏþT+Ùã$w~9Éï,®}²–Ìè/jj—r>>eçÖæÑÄû{ãvα X}zŽeÁÜz n2Ü×yë×9QrXr(£¥­2U÷Ÿ¬òJV¸·TèGo½yyñú¥ƒÌ”Ç!FØ¡¢n/Œ™DXõüsikU„¾pZ?y ¢G‰¿m ó×eèÛçptFÊl3)[rR!'˜¤Z…JRÈc*… I  ($(ü¿í§WߥðPæZ~6§åŒîèâ¡4êÆ£/›RÁ¼`¬?Õü¡!ñw ¢±ÇïsƘÂ{öû7ß÷ëÞÚ¼sw÷„}%¿³s©Ý¨%æ4‹G³ÉùÜ€+Ñx.‰®n„ƒÅ_u¼©'6dhiTõ!œ•X>õØVþº%±ÏØõÅ{‰˜#ŽÈ^˜#ެ‘‡bÊb1‹ê¡ á;·G˜àÃôȬ®î0”-ª¥ÇÄ ÍŠð“ûú#—Ee$/ÇýSN'ú™–LÓ›¹aoáµ³ßþÌ)Õ»½:Xá+HVw-Ïuù*E$¸±/@¬ÕüÏ×{gƒcœ—ã¶:Gœ<¶="="¢GÒ>o_øËø¢wY±ÿ#íMŒÝ®¤Ìu}M=l„ú|qçû‡£Ñü,X |ž¦s‘O°àñ ͇Pñ[~%×~jð¥êƒ|rãä¤ÓÓj´†PÇ—â×1h§úòa9%óõñI?®ò»ÐÌq8Ì›ÚÈûUÔl?Ýë'w]¼Ð×Sp°Þß6}ŠŸ „µøc Nr—»èâ¡>D) ]gohR9Da°:ú,sžå”{©Îd ÖÛc|×j˧í6en•cÜaˆˆ°7éôXöÊðŸkÖÉý/ÒÖ½yÈõU¼Â<Û?vG¤DIKnfpËžåb,ëEb¥“’¢ ÈÿgÊËBªÐ~oòŽ&]R¹‡‚'†²]áŒó(s¶mlMƒº]zàO:n£Ò4yã+ò™kË/aࣙwËSëÄ=´žTºÁ<Öîµoþ‡¯¯Ð:ªŽoÊ9ãj'ÂixtþÆìP0 V(PQ‡XÕ(Çá0ºH*< V´ÅÓß_¾ssv—©Îé÷+lsŒ-ïê]R‡¹µÊ21ß’@äfé¯ >YäʚĸÕ|‘/T}³Bêÿ×áÔS]—8šì¦l@9×815¡Ò-ÕÇ0…¶}'gfæzB{r§–€ï½o»X Hú´`=#2<|?g…àÔôM鬅Þá:´z]ÉT:gR7±æ³k{ï®ù Þ oê¨ IDzÿmžCºýí¦ó«òý=ðŽ³Ð¤ £¾Ïº&å ‰—’$1ûäaïÒ *Ì­~’Ö–…D}\([õ«æÍõAp0hQkUº2b)ÙR0WŒC>U#¶jTz˜lM'`ÛwŠq 40¬~ó˜±Ü‚í¶ló[{3’«•–ÞÕÕ³Ô¨Ë|®UëË"Vhhô¿ßD ¢@ˆ›  O ÄXhVx.J™ˆå¦ŸBšÔá:™·ÀbÎÒ³B ½5g:¾îüßeÁ0~;oüJ•wÝ´À¾Ä‹>I°¡f±½DŸŸñ¥Y”rcúY ÷Ó]ñœ^ú2䫬~h­ pšèÐ7®å • ]ö/ßÕ;J¢P$ñAud>%ðq#Z¶5ïñŸ¬Ç9oe¹CK 'ñÿî†'LÃów¯8 {Â#Ž0ÌÞèÎ¥–°º ÒÖ´…—G€:ô‡HH>‰vŽ\=Ï$¸‚$iƒ&‚.šR(D+£ë"YÚÀÁ ñ<¶Fh SWg½\j$o¦rNìÏÒf¨2šEþÍÒOG˜Ð%ü|[Ûñ¸@gZíw¿Rzò~ʪe½aç´ˆAÙnÄH6׿¬·Î{i²+UUº»é †¥M('÷µ[|Ù<¯ûÆÖáÿ‚ m¸[¾o¼Ø`êy¼·î²_vÊÜŒ£~EzP~šXnn]LqŸ/1Òërâ!ô¼ýÜU Ç‹A OÓÉé±L1KþXgxìÌÿÍ©1Œb¢+~ùÚWÊÖ¥ò©ŽN<˜‚Q^v+ÏZ` þžvW >·ý iZá}lÿ–Nb<ÛÏ«õ¨áCš9uˆösþ3ÿ†îÿÍt¬Z/dù£ ¦2…Ü!ct4œ.жï`\É€.áe=s39¹ª"üª~(‚bD wGÙÖä>õ^gBáq?©…ì¾ìbs6W—°º|”Σ=t‘…ÎèêAc‘ôEï5u”%¤ƒ¿2 â¹N2BìcËùdÂy¹œÄîê÷‹«8óžF|¼jüÛ‡7ºolµ¼—ɺÕï=[ÌÚló`ØÎÞ V/³§lø?(Ü”ºnçÔÇ—‹¤ÇIihµ@ý±øH‰"[Ç…ø;4ض+ µS/1‹™~_V³æÍµ"ÂÏaJ«2(á¢4øâh‘ôícj¶­+ìçÂ&9¯Õ¾EE²B‚»B9¤%û-èg¿ÓÀÛÝc Æíãki݇‰­ÀPݳBkcÚ%ÆoHW‡þ}ká4I©“×öŠyÝXP"&„ˆ”âï…j‹#ïû;°·Ô©Wûn¿T„;T¦õñó´uI=8)s$`WVjšú„C|‘ÕS’²¶ÄB:·IÒTŠÛÁ1ö“ÛÐ8F±øŒEÆ0™úò>lê¨PQ÷ãéê‘Ê`QaÒÃt9^&÷/ú½2ìl±¦ÂÑñqÄ‘TââqºŽ‹r–<»PÞ•Q…RúÔ°W#ðÐþX  rd&oH:é³ã¸ m>Û¤sD35ÈvÓoQ[¤ Âo&ŸÍ–gU¾­¾žêÆnÌdHý³Ç£ýö]jœÛè‰Ù+hâá¶_†LÅCþyŠ-veÀy,¥pÌwÇ0m–&Až:H/´/Ì5©n<¸%µLmŽéÇéj'ªäɤz÷ºpš^Qôž<5fŠTýlÂ=Z­¤™’Ž#m_^eT]ôÜÛåÖ€óÀz$W¢F¯‘¢ñ+ýìÝ//Æ•tjc¸›´ÃÎxóþûÀ¾‘élòüTë”ă$RJ½ó}+Rj×Z%OÂ|~Z»g;¨cVì[•ÕÍû°éáRTW jBÁƒÎ1šå¥7Wð.ÁÏqì^|GÖ@ðÙ;í |/ábdx¢¹YÍo¸ˆ]!zäxIML—%ŠÙh² ¾O1Xýో´vxm‘†ç)o½å¯ç>ý½\,}è7˜SàÅÇ5•×ts¥¬6ä€ ŸiäTÓÄ?$KyR'߸‘ȉè†Îbçòçm9@Ï©bõÉrzë¯ü·iEGâb¬Ö•r>Ÿ4ŽÎ !B¢ØŽáó'IŽÙ¥ÇLÞIQ lïnÄ´¡3«3M´d³WŒK1cHBæ1Û6c~ñ ã pv7ñ„èÞã“- æòHC|ãOT¯MwÑO’ž¥¢dÕ0ȘM˜ûˆ®©i¤þÉ)ÒÑÜ¢›ô>Øâeñ‰wÝ‘m»½jj,ßÑ"µA<µ3çþå¡®ZÎ ýä“2j ‡¿Д¶Â;ƒÁ…øv) ãgóð|×c¥ÎÝf˜úx{æÍ§ò²+³ª)b®e){[/}£}:?.Æ÷½Lc\˜y ©€Ý‡<ÛÂK7ãl‚*[@†±Û¬Ä0AÂ$üÇÓÇ!·vG—z!Y€Q˜%ÍË Çe ¤æ›kæû­Ö‹÷õ#Çû¤lÐi ±Ñ{RŸ`e¸-ó&Úy¿½ @•Þ«ó?ìŠoH¬Í¥ÞšT¥–6¶È芓4&æpoëÔ" ê¤ÿ` çÝ7vÆ^ýÇL]s(XUIÁ¾Ëß;›«õËúé~¹Ý¾°ÃarS™¢|îä´[_\}"Tj>÷N]uAº ®øhA©)!ˆtcÇúˆƒ1‚¯ Á¸€;GôÚ£a†Qw›×xX.ã¢éZÂ՛ɥã0R¹:qˆRAý$r‘‰éuÈ" >‘„ßãûΊ쎧•‚H²G‘U«é‡PM0Ò‰P Ìæ†¸”H,ü‡¡K‰×&1·VZó‹°/ïH6…¼‚Cƒº)’+Z¢ÒðÍo []·DiˬǕæâÌh/@ œ±æ™¶¶Ä”ÆÈ±3 •1&d:caDëÝã»du9 ÌÓ+Ñß«‚‡ìe‚†ßɽíâ¾ì|™‹:#õ­Ô%Ë´3¨Ýv)bû¬Ç ²QÁFBÝ_6ëúÍÓ¢º D7YµÈÇÆ'ŸÚ¾ïgƒv©-†›C9à?”|´† Ô ØKkjŸŠ2ëêP#ø‡tp mj«ë*ˆ¨Íß77¹Ø]8yV£r§E)–ê\¨8翾[çé5?3ÕÙ•Nök4Þ£š\ѵ¥Ç]’@Bš*t¶›Ù‡6å¹<ámª5áòZ|Æ#aËÅ9yS`|&wæN¿Èè´œ~{éV °•ˆ›š=¯±÷!ká›fžÉÁ—}Ø–£§÷.&Õ° Œ@Õm°ž)ª7…Çw2[á¡{cŒ VÂöHYٽʑ† pþ%\ðgÊx¡æR ø¦*(˜{T”Cøõ<—MT~p®ÍùÏ l^}“OÊ ºØžé™²9LJÔXZ; ñím@jÍ2]7û4F‘OA2üµr€ÐrJO7ö`Ó”#¾{F©ÄXšRåðš¶`bjƒ{Ýu%’KzÚç,ÎÉ:,d}ŒE=Ö+‡NÙÞï•Mä6“ßxLðB0l}dåû2/G¦emÆàŒ§4‰Í'¿×™6Ê©:>*ã’91˜îJgà×¶îHpz9Zê­ L˜­ÄÈOmÎãGvóYe€ŠÁHÈ DIH&Åi†åÂן´zÙèþ"o4ª“¾òÏ7*÷Ø”Y®÷™‘ýƒŒ£í›\ÙÞòÜÿ…öÌ¿ÙÔâ¾-šOú8&Þ±Ûœý`aU]¸“€^ì˜æTöŽ/¸’ø†¥[…¢’)“^„P…ýg†ºðz) h<2ÒÙRW)>O½[ ÿǤl;mMoÆ‹g¢»%wÊ8à1Q %ÈJ޲u¹ò[ñÙ8]cj¤,Y°ƒÙ]ºá§ÜmýÔãjW8¥þTÍ®ÇÞ’Øg½ö~–]C2¡:QrJ_£!é@ ˜OìˆqšG©<+aÅ$L×büßœ«2¶ÈC¢ …ùüïªÑ|üsì%Eb¶Ëå®pY¯Ïì1 ’2·N«š¨2ÒºÃR+ľLêÒ\‡ðŽá³ÝîšP®˜˜ Qwksf:~'©ô Š>H”y‚ÀHp¦'é¼£X¸ášD>/~0)€ÂÊ«Îv&÷ªÏ­¯+7a³X”é1_‡™˜ú¹Øk;䯦ÝwéÔ¤Ž"5¢³ ¿m°h¯XŒ°_±¾¢ÍÐ*žÜC²ô¹¿ëzMñV—<ÏÇÃÌ%ÿ×±˜è'UâÝêðŽ>]x¢÷$OÙuü©ŒÄž§YGûõ좀TAº™çUan'¦8ï1ë^»1c‡x~0T˫팜1Å^4Xª½ÆÅN´W)¶j!Å·gþ(€]Á€%r†w†Å·SR¡‚YƒOGÜï¥Gæþ‡ö }Kï˜*==#¨öÛVûvzsY¡íÈ-ÏÅg GÅíZîÃáÕ›9Ô®–×ãJlo•lmBì˜å¯üÿd\_ãzf*‡M$Rg“äc Ã.¥R“t¥Ý‘Þ~±_3¯NUñf¾¨Oó#Çë” ¿Ý†ó¿Rø€5 l¤aºËh(3í­‚œwœ™+¨h»±ÄÊ™Bìľlë½xj]çÿßV½ù¦ÝEGÁO«™×‰“…¸[@›àð›Ë#Ýí¹æKn;U’`½J!TUÃõš•Aè˜C¢%ºï?ûH”f?oA+^l± ¤±i1ÂàÔÏûjèCh`u˜¦¼e©Ô2Ü »xÔE©„1@.šgŸ‘9›¼M¸d£6ÛLv²¯Æ«æ Ñ¥íIEÖ¸±TeIÉØAãĉ°"€Ï¶£RQ¸Þëêêõ³à30ƒÆÐîEgZ™ãÞ6èWÅÀ©jƒHnG<ønq6£{z4Nç;êPq“©RQ¹OäV´°>%c¾ ¿!vÇâüÙ¹çÇO™6³"!(‰ßôÏU—)/ ™áúÈ2Ôväâ\Ôç4µy³žÃRë-ì{;_vóîÚKuµóà‘ù.Ôl‘wä´é`Õ£³úÎXŸ Ö–e€ªQ/ÅY¤¡«9l¯G¸³íí(ìŠ^¯û‡%”•U±¶?]ùª7üå¢S¬¿ûQÏS_€C&9~ÿÆ§ÃÆúžU§vKõ9ö`í%KΪ¾Ž×%‰KíÙ]‚Gƒp­´SlñHjMô]ÈÌ­~ˆ%¡úEçLÁŠ÷TÐÉkgô†mvé”eà‹.Ûq_ÂûM¦Íru¬ÏWÍäï³~lü>ŒcاEÐõMË7ÉË»:/|ù¢}læŸYûÔDÞ»Gö;s—P"&°ˆ¨çVSIeÛ3ãº"s÷¦B›H%’ ÁªCÐ8òwÏÓ«ÄTRËgy-Û{4SƒÜFÌûáæý‹·)}_=*y4æöúÛì£c¥·â³ÜÓ]nù9W¶ˆ" ñ1ЍH*¤«J`¡„”-‰ þ¿aÓ­ÿ±®Í£&ÓhÚ…ú}¬¿ÒŒÂ?«3ü÷±ÿ#ˆB6«›‚{‹¬C LÚñ, &U~öËFÁ5b.¾â„$öz»1¬X(JRpo¾Rnß%Ò¯3÷!â»åÈPŠ3aÚÉh nEÛž¤ÚÝI—Û S=éü”íPÂAü(­²afñÃcúC‚"qÆEÙçhëø×ª¥ŒÁ¸²4tßû,nk%Èèh󦑱äA¸í)ÎÜaÂÏ»øÂ(ð`?ò AÈ!5˜¤‘¸WN ë4Bðä÷Èë‹e”™_ÐO÷rÍ 3(³ —¹”zØí“°ôº g€îAÑAl’,Vˆ{!¸@@&%5×.Ò4Î…voÁ`s2¢ÎS·9T¦|ëlÁtaóºÝ °–^Ø›¶¤ft)ma¿-Råý_Íÿfq4@€B˜@%b#dÿ «-?Ì¥ Aæ«Æfÿ¤¯9æk«ç¢.qœž° Åý=åû(W»SkÁs>Œb‡b€@Ø{KsÁ|œ@vsàâ®Hï ã}¡Q"L¦æ–Ô;מˆììLÄéü;«=“¸ò”¶Yò@zš©M´ŠZ‡lóÛ›yGÖ&¢mY¸%¢™Ô¥#daŒâ<£¶lŠ霯ÇÊD³ƒÉ/-ŸzT3 w FL{rL x#N T´èÊ$k^r¨W?s.ip«zI#¶käR¿…²ÌºŸçÎïü½ãÚæv}”†%º» öeVyøÍÅßs®rïq̱@G嚀"•÷ó|øl2â! ŒøSÓ¢ËÐ:ŒóL<@±¢4®bñnWm79og÷½Ÿ‰çrùæ@‰Ô×1eyœûáîìÒ,ºe*×'‚{Ï}@`Ξ¤ïBΡçK±Ñ DM$µœàÃùlO‰VÈä^E¹Z£Ã—Åq‰ŸÃÑ`@~?Ä"’v&ýYÄžëïò3w09¥~ÊoV/‘f‹yo:¬¤ÀVè¿úNý…ŽÓðê,Äd <)0!…%ÕT…*¤ æï÷=Ö§E8ÀÍø®»Ëößóæ|ñ“xš†oåPN¶/Ü…0È1º¡ƒÓ|€%øóS²CŠÚT>nš„úüž¨Áã@.rȳqŽ´…ˆò. ¤à>âѦ tƒ¥XvÃð8ëd¯[ÀA´Ž8SŽÕñ-=‹ÊÉP–¼:ôÇúòŒ£`_d¡Mmüæ¶#λ—JZDŒÇJ’“³ÝWWfŽo5„#Oî4­å/4ªåiMf¶Ÿá­~}ô#4}çàzUäEËñM”{c:ÊчËX™}Ñúî‘nkP"%ˆ=zùJÑQí·- ‰!Öê¿Ñ¤AE;q0q/3”7<845Lc€„æ™Úrˆäûs 1£æÜ|ÓwÒ½ËEúÇ­Òí‚™ŸèøÌ€KÊÑò1È'ïXF«l:ýþmŽ•uO†7Ž?.£ò›¬W¨@ÝØÝCt]iµ ן2x˜ ©I#ÖÝFGDÕc}auçγ1UÛ)lÆ{Éfô0—B>ÉQ·ûX._Zojðçý#ÄÈ ½*±ü¹Ãõ]¥ôj0œ%;§.~è_CºpÌwÃïãRçÒ³þeíéñ¦üGÔafýEû¿v7à­Dý¥ƒæ:_î ‹øˆØmM-: µUsÓ¨¨ ƒáÁgNââÂݳi‰‚R1¡ùÿŽ<´Š¹ƒÝî¸ïrh9üËuªÒØ ÒRúÀ-EªŽ…9`P&qöæ.…ñÑB§÷H¬×mQáð¯‡S¥¥·u‚•οï#ÊÞBÍñª DJ@@€rTYû/×û>ÖõòG ¹[#QÑaÂn‹US[ÛÞ«ÅW•ùo«Wl:…w¿O<ÌÅž>[ ¯`²2³ÎŽÙȃ¡; 4oRŸÔ—gg™K”H‰È@  ü ì?b¯Ê«te^µULêKÆ¢¾ïûfÇÏà“Ìs;òi"Ùß1ôûˆŒh85éÛÕÙî_“×SM›6 Éò}.¹Ÿlo\cþhüõüA…õçšû…’ì/°¸Ï(yZÇÂ[ Döåj¡Ô²ÝÄ-PʘrZφú †(/>/ï¾ðÌ5•ô&,_•êó!Î?+Èp¦¤–I?èR}Ÿ§KÙÃÞpzé:Žý^Š!†Æ0¦S›àóílA½Œ§]”ßJ€h±Sˆ:ÜGEiî× n oéèb½ ˆõ½ð×”¥Áæ\ ÌTM­¼ß•æÝû<Í‘ã7Ñ€[ §Û~´©—5ÛáMt–Ú"äöÛ¢ßåu›kLl;¬ržJ§Hk —oø¬a¾jìŒÃô¥ìDDäµý îÊsaÉwm¼hÍ"~ @žNçõ|,fæs[iîŒϦ^6ñÜ:=&ÇIœ8”ƲòsI\ã*}¥rq0låº%«Øu6GV,?„‡Ž‰È’J ôÉl¡¿ûÌ`ôv–[nO`2ˆ²ÇyFÈñ¤ä¡‡ÏÞk[p¿,Ý=DMT«;>î“Áè|«”4pÊЮLe ÃÁÁB+7¾|ÿ®WyÙÿ+‹Æ¦8/b^©Œ! 7þjóþéçgð3¨t±zs-Þǰœåú*SbÝäè τՆ÷ìYí‡vtÍH˜Yzg…ò¦ø1”’wžZhka0H¨An¾ä†n¡ól‹ä{Y4Ko¬i« !Xbg'ëK¡Ú¡zdZ}÷ÈBH_S(è!Æ “~ÀH‘³ÿŠö¢ý ÇÜ Ð§*„ ᙞ´çk¶X¦2Èø~N7Zy%JR:«ü’ºløûí /y˜XÒ[n9ºÜ­×jç]CŸß(ùËn/.NÑýQû¬4Üüç 0§kü2vˆÖ×oOfÉüü©Ú£ÊþâRófçñY¬ÄÆksÈJJ©Rî³¶¥L½Ü¥BÄwºÆmà¸Ý9싇µ˜µ!­8B|dï÷éß¡‚ll‰.÷ù­ÅxGÁ|鯖‘+¢®R›žQ,6‘õ* if€@!µäuêùd÷ш?æF™<º‡lɹ„ÙHà4•÷áèûùÿd¯©gúÄó,½ƒÜNÃÕ¸Qkóôš 3¥LK¤‰£Ï³Æ'Ë닸[yˆ®­­’MKîw J[€.+»¯”Œ±n¯[´@ìfÿ I¬™+yq8i¨¶õg<ÎöúÕY銾ù y'ËDÆI°G:Úº›€c``ŸþV?Øoî v' xA¾ÿcèw¿%o=âÁèyG)ñ§†¤@BT!-OVËüS;:ìOÜ^[qºòR,Uí Ó1SAŠ0 ù³¨ïɧ›éñÆ…7\‚Ýn©yÊE~!7ÚÆrÞ/Í$ÆÒöÄ/¹‚~éOŽ–øŸÝOM 0-½ÎVèG²L^oWËï;ïñ<\[k'JØ÷ž·®:×u\š< p×J·qêU«¼µ…ACšù»jæ7 CXî"€”€„Z®´À½î{—'Äý;e;ÅZ (á§èb``>¿Uœ'ðÊSÈ\¦Ü連ÿ· ×`Û¨F«(ðòã8)Õ™¯~kU‚ ‘?©ñÎL.f²}~bq24•úæû½Yg¹GõaU‡ßØ÷Êì vþ T¡ãBQ[aŽœc§(³ Ø‹ÈSÖ%Á²?üí¬èØps ª$ÇËžMr^e“èsŸïûp§¯Y‰ây$!(@5ŒÅdxôÛ¾Qyú5ûh׫` pÏ2e“ÇZÈò>æÊdΑ!°ÿ5$‰ö„Ï̬!„Ò|"|ÐBÞâiÔ¡è®WùŸŒz—I—¥«¦|8p5€¼¥øüFƒ–㣠’P T¢…Ù¿íE‹“å½Åi³ê°IO¿§ëò”³mÂq>™¡l› “­·a®Û$¸/VÝÈÑ–ºÚà¾rŠ%oÒ÷i6\ñ—ñó‘oð<§^‘_ÌWX=ÄYõ+2Q‚Ú÷íg%œi•ãqê`ñ«ˆ¬Õ­öª´SÍ™DŽ•É-øTP&ð»ÅñöÇ­î´k ÆdpŽðœÃð¨®l›þúÎÖ²s´÷1}W¥uÙöœÇVOÉr}U«ÕwÄ,¤õîÍIÜklͽ9d"€ïóšæiÔ¦RuA_ÛB£¾1ReÃS8™oï‘*ëÝA¥f|M•9]«s/âö¤nŠçV7ñeàë1%±ï8‘m]ÜÇâ 3°]¶(ßÄ)Þå‚é¾HJT£§¬T±hÖš?¤ïƒu^f|‰Žw¸Ëzü晟ê’ât·ëTÛOc‘­›ŸO‡i<ÛY‰¹•¸õ~÷&OGÂù¹»ÇM¶­.÷=PÐ@9 £Â¶»XÖÝÊO†öÇ †G\s„;¹¹„bÕi]:£N£Tò"‡ê¿ÜáÄ„¨À“jÑ¿ Ѓa³âXÏÍÐ÷YεõÔ”ù›s¥Ñµ¬AXxâ1»žDn^îÌûq«â¡ŽsMúbžg"¼ÔÌC‚½’¬³@5`@Ša>¿íT&º0Q]®¾èæ,ÒTxHGR s¶w÷šÿÈvŸng@ÊXëÃ&G: ~·¥¢V ÷îÌ=´+«`hõ½$>š‹1®¾¦œÙ½‘˜8º7ýÆÂ  6(‰fß*’A5"¥ SI`j PIF(Ä|±³JôöÍËãAtZ÷*Vþ ÌÑèÃ1j4¿C·EyÖÎúSw¸ÒÌoÚœ{W¢ÿa@@ ¡»ÂyŽV«‚’d_÷|а -Ûwl¾<=„R¡`ëûˆ˜;¸A,¨d°6{è‹i¸rÓ<'›:f¯ÙB©d½ÌËcH/ßINµnÐñ½ „ïPI¡)ÜÏNº‡”`¶j ¥0£ã±§é§[—0÷øäÚ+™ä¿¡"n>øñ*ùÜSÃSKî—I3¬S°MßäHãxù1€„!^j;åt!…ÌŒ…„Éå=»¦Håqj”E3¼Ã¡àš™œÀûÈ{ Ш‰­¶¤šÝ?ùSW~ìby5tô 1ÆÜV¤@¯Xއæ9êTýÿzmŸÊÒ͹W™:ù¹yy.fF*$MŠ)ÑãñHEZéSH׊¦cÌy|ÄÄëàœÔ¸/C×m÷æ¶\,ƒÖ½3ùk]ÑÊOr7d„÷϶B¬=£ÕùL0ÆÄ ª€YË–û8ÑQö‡}蛿îë/ã°Ô¦—vs[67ÃGøÉÈrXãÁM܎ï4çdë ¿OBäaicéÎ`ñ;ú¶ÆÞÝ©tGÚ•BÖàKe§º5ó.ZõÔJæí‰ÓB_'û@t˜VàB Ý«ÍUØñ–oCP=B†_m)ÐÎ4®iÔÔ•süñÎÐeéBÞ­¶„¢¾lU¯…øA‡!dPÐkiòTü’2)>¤ÐäÑë×öQ‹2âµB1D |fòu—%ÊM&‚¥=slS%}´Ð¤ºÒŸÓ)¼î p9ª²Cš˜Eš8ÃKrY˜rÔ m¯RÅýºäìé‡Î’¹•f ÒD R„@¯Ø%B-b¤!„ª¨¥±4v_<“aô2ÞIjœ‚s§sÏ,›Œýi݉¿^˜é* ˜’¨Ã­æÂ;ºµAàSH79/ùO­Nv†ƒà*ºc¶6œ&üb/b§BXÂ!€jvЀs  ÷^-\{ÑšPï÷E4s*\ú7{Úß„oKoìeú¯×æPý3r}ðVx²Ëþ Ã>ã*¯ÁŽ1ÄDŸ<ìœí+a*§e¿gƒûï30NDЦñ&ávj[…:ñÀ0¯`  ŽQ» Œ”S¶¡*B$oØœ*kÄ<_ŽËÇ¢Ú…cri…;øžN÷žOí¹IéyïÔæ`ÛÝ"SÈ:¿ô«J › }æÜ~¶+?xÀû¿ƒœ{‰ŒØ~:Û﯊hþ›RÓ•Ü_0ó›QCò²©fÀUh¡°uUu¿Ì×+èZ“?#‡¬áI€_²!ǧçoùèг®w˜zÞÒ;à„ºH¾ IñøxêuŸÃb¾ŒŸMðß‚»­ùo½¾…L&11§œì;«îcK€©Üj¨®öŒÙê5ÌŽ–÷Ús›Ã…­Ûþߦß8Çó·ÿÅž†”ÙÁxœÍ€1©w G,Ä -¦N LŽiºó YA¾Š}Ó,­ÃŽ¡œóÚb’or¥5IõN'¦À€P·Ž]²ä]ôÐ?‘5bè!ÔD< çKÇ‘‹ ¨î_w¬‹$%QX^B[?D ‰0n>Íù!¡]²IqíP»z_BQ¨éWõò•×뢦ú†€B!í d/õ¹MÑ ‚RÀ I¼ –A]ô9B/‰Þ·Ï†â|^l˜.¿›ÿzåqº7•’‰Žé®r—ËÝééŸgúL“ï=Š•L·Wz¼þl%qf@ÿˆD½nrŠ”O¡@Ä´nËì—/ô×?™jO;ÑY¸§¿Ã¬Þô¹çØ7(tðLÜ›êüàóœ4TÍ5›nbÖ0ÖÔqŽðdû®ÖrªÆ¨+Ø?w{Æ‚á±ð(WúJÜÖæPOŽÑ+ÓÑt]0Cÿ8 í$ií¥„ý4P!–ôôô*”Œ*|sóWzð]Ýmte“ÿ–h Üi$¼$&àé?t©öÛ˜š-€¨Ñ |ýyMWÉÖ"¼ü@3%êd¢_›{ÊÒ<ZM¾Ã~ÄC¿ß±+DD,Ð6F ƒŽs‡Ði†›ö¶"\G»ܨ"MóÞ½¸Ž®µeݵ²góJ&ÎÐq`x:™“™‰™±ðvH·e>!Žö´+Um¤€„&¤BuØ*ß¾¾B^÷§;YïÈ.°ZFk„õNÓùµT[õUÆÕ\›I‰Z³Ü8*ÝSøo6R¿ÂˆåÙ}všô)Ô1ê÷Èúшõ(™ ®—g‚‘þ¤òM@Âxöœ;Yت4óÏý=Lj»‹äBx?öÆÏ¦3€. å͘G~ ~)7.G€«vj§ÿNc7}ÛÕÎ{Á¼@®Úm1ª|ÈúD@Orðj6ÕQD¥¤Ùn‡Ð1|døí´Æ/­T¹QJY¤¨Ò´ÛÎú/aÔz¢Êó›+ÏÕÝ%Å®ÜÞ£|;J§Dõ·ZøüoÞ•jÅ#%ß!Ã^ÉÐ@C ÑÒ°‚ì\æ&)€ö 1š'6^4縦Órµ`£se-º¾?Ïúg\87œ£SЇ§ZóÉq)iR齓t®Em"OŽ~Cûl”æÿdÄ}Õ©L±K„¨P@ö½Œå ¿üôOO´†Dñ¯þLÍyè$ÈÓ¿½^¬óªÚùý¦ÝFcNò›FA‘[qÿkèˆ"G8€¤5ʨ„“-*IE#ˆJÏœU|^ú¹„ü’Ï¡´µÂ[F»}nQû½§äùÍÙÁ6?Z>w5ä³ÇßP-Cé¬X܃#ôî$ÍÃÙ…þ,~€ô 1s`ŽSÂŽvž@<¼T#ÔíçÇ$ýìçGµûk,tç[§J?š •ôäãùVåCdºF€ÃÛÜ3n;+n{ý‰xFË1°„ªdL¬ÞÁØ»Ùï'ÿ÷nKï§4u©»#+¬•–ã½?2ÌïØFE/…˜·¡÷&¯yoðX—è B@`JVÅïî1“~iãaÁTǫ̀•ØhàHí„€BtËM(›õ˜z cXqý~ëÈÏ—o4©©˜dé{fþÜ®žÊO2ãVsˆö­6ì¦ÑW¾÷®Ùl…ë/ìäÑ]8 7]sAÛy Ü E9SãÑ—šÏXòZ,ƒCc—̽íáod ýhÊ—ÕþÀ0zÙÃf$ÍÃ.ÇzYasú„k.[Çñ mUxüzÂŒ|¢]ùCèÿmºwy·™òv¨. \]Žg²å©:ªŽüï “QM¼òßIqn4@€²¬`:¨>œyþ®É]¯¨uëLU'Bå]U¥ëx.™wàb0‹Õëd§_Wç ¸Â/*ã]»Øùö׿Ÿ"@–¢ÜúÖ‡ íÃi¢SZm¡Y{ÿBÑX¸äqsÁCâ‘2Z‡5Rî[ªÿOÏP]. •º0êcôyÚ36w>gë6à"¡Ö@І=ˆ€ƒx•”ö°*ª…*ŠA ¬U4ƒõgµ¶ñ¥„‚}ïmìi'ŠX b=b½ukû¿ùÅïG†lõÀT«ñ{žrj…{Ñ%øtŸ Gn0_³q/Ï¡§ÏÒj¢ÃóäÙ)4B0‚ꢪT‚ €¥UIE!…„PTï ¯*€¨N~”¬oÕª F:¢†Å1UVѱ)kÄRô†L´“Yd)AþöÂO QâéG<¹…Ì>Ž,ìÎÏøÔXx ê×0Ï7üa —?ê^þsÖÌ^ýF¬«¤ùÑ ¡4ýüØAØ£œÐѺþÕi† ´ïåMh“õ9ñ®M¬!Dú`ë6Éü¿¥»•-æÉH*5eÙ›ëD2݈¢ ³nöõÞ=]Þ³†éîs £†è †!""!ˆ"" ’H €IBI€A¡ P”CD BŒA0@D1 C DBˆÀBãê =OwÐk’WKó/Ý6Mr‚½U­¿ïsa&æ§ñ1z¨ŽÉzôs6•Ê…¿ÅOþÕ¹ûìù|Ñ ´D v=]OE×L¿­Ú…·dŒv= üÁÇÏål¢ix«6º+ïX·ç]—Þ¤â_³ÓÆOÿ <Õ‘ZH(I•¤€H$ˆ$­(RP”BàËšüg™„[˯¸X²½±½©IFýÄt¿©KÈn ½ÅýÎîþãÂÞùiNQ[ã§Òpf×wдí=S!0•?€¨’zl0€ÕrmTÀšÌÜ¢@*ŸzÐ{IG¹ØÇ@½«“UJ&»âÅÿx:&íÕºx:ÇäÏéò¨“ó@NgºEGi ! é€Ksú~鈂XP"9¨Ñ»¦Pâİìl'†¿¡àk_«Œî{ïÄŠd7[eÍ$%ûH¥¶ÙͰôæß’Ó‚Y’Ì8tÇ,ÏŽêSþ¾øžÉë]ÁÁ4»8±ØßOçÁ9ú¬ßÿ[Þ3¼¨ðTä…®A‡tØKð:õá\æeUy"`‰ì­8öy‹ž¥{¾] |NÂ!¬+*oÑÏýǤàÂÚ+â)ÌàMIìTмÜý”éX*ÉÑÚ¥ Ãë©q•Z»¾»Aþj¢ÂÅM”‡É‚\ä¢ÇÀt wÖóG§R]¦Ó–®y›%P@,÷‚>4t îµHèš–#‘sçW¨¥“bÎÜ!7ºV…€ëêfÛVײra^ S×’½®£w<‘8öd° ux\V+Êó׿núJÇìªØ`àBΈÿÎVïQ¶”øÝ¦³fR„ ŠF~ù"@ „ÊN_(†"X¹ÆLç&Åré·BBÛ$Ž5/jxž{ÎÙ&ptó¢L!’íXÜf›“‘ì*8%ÄZ`ƒÛo¶Ñ³ÓW£ã˜Y,ý·‘‹ï‚F0Ãápá^-FWŸnPLpr Q~ª*~ÊZ¦@D3%€€UUH$IÈÚû×gèÃ[£B©áˆŸ…0xx㺩×Ý¥4âíPõËq`ÿ½“þžë®Ž#-Ø),'ªŸîáý&<ƒ¥ ·c<&+øË¥TlÅý½xªö288P ºc•“–ûHÌeà ÎH‡%š‹|Œæ· ªfëQä&è6wªK^4ÿ6!~!Ï®Æyôe{<Æ>|eßéT»Ïk) ~– ¬ZÝŽEi'MZ.§glo›¢Å‹t Þ);Ä’”5Rwëd/$?í6†xs"n§æXê‹¥®ŽoNñßŪ±™Ô=H„›«¤JQs‹Ò:¬Y³¨+D_eO¼#¡ž¯æ±æ<p°Wø)¼³(2ÆSuéÀ€V`$`QæÀA½wxéåË‘›GºçÍÛ‘ç;¯g¥Ò¿`¡ók­ý2üÌ»F`ËåkýÂÐô»¡øçu™6vÌÛ7Áž´WGèÌ3¬¼ó’Ù«ºå3òó„Ê5ç5G”å‰Û ¿Èá eLî/¹¥Ù͈ËÍ ‡@é’ÇŸ¾/’ðËüo8=ªÙÂßWàã'÷3’‹cheæí½nÿ½®qe±ÞÒG +1Âvƒßÿ(ù@ÉG.ûÔ¨ÿŽÁB2—½•ÙÁó×W{•¡7âåV°^’ˆß}önâu¸|ÈÄßÙ°â=Ó®û—çŒtˆß½‹R*¾»ëå=!Éa×·í¨J§YD®øÖϨfvÔqi~ôv£À€OÏü²A|Ôž´©²ßцÜ9ª+’RmZ´ÊI™ž?[(3#”%´rú˜vjÍñãæ½«,ù¯Ð¦½*±Å­ã×_w¿rì}ØwÂ`£Kdvà$özµR•3z)¤Ê]¶2̼³ÀÄd?iPý¥WNëò»FQ·‰©ô+EÜ›Èü½tGìn*¤{½ö$‡«)iX»ôúÁ`Øëö3‡ç…ž™¦h²9JB–™Öøô ƒÑA4Y²ù(.:©?¸¿ñ‡»äÀˆz t­@‹Î´ß؈ˆ€_<ò:ÎÄ ì¸QåcþsÉëþ$¢è|&Aú™…sbïèdXå¯[;ˆyµD"ÀaŠó (Âø¿!ÿ(Úö÷Æ»UâÔÖ(Ü59_·­ól<8ê5K/¿A¬¤¼·ðåÚVî† =¸ÃyÝ£Ðí” EÙæµ@6&±G[iiN!ÿ?Žþò ÆŸ@ µû ÒðœŽÏYñ½ö­6¶t<Ö4+òh|†úEãTú³à„÷Y“‚™½š\€ °ÕY³¸]A© Z˜˜•1Ŷ¡¦,ðŽ,J‚A3mdU´£+¯5ºÞÛâ8üöv\vç®ÝœÓHˆjD! ʕƟ’zÅÙ0Vçv]»„Îì°Fg†7%º’¦¤ºXÅʇÓÜF–¶J¡¡2(½ìjc“ ÒF|f}$ëÎØKáaàØÁ LšB®¢ã*v …ŸœkW¹’jžf4‹ÿ±Žöéàm5ÞÖ[É׸KçíO±×íÓWö=^¨|lŒÀ(ÌVŠõ`(ÚÖãàó¬p³ñ‹ð:ެ¨Î4j%‚fJ÷ŽªàºR‡’¾‚Úþ¨ >œ®êÁ4{kÁ;X”Òd¥žÞ\F>lýòËxh¿W;V,£Ýv/ö°îpv6.×D’A Ž®‡í/«O§x{;Ÿ1­SÙµº'8§ïÔ=T~ꛊðò¿‡×uuI•bG2_õ¿\p'W?CËðOQ©ØÖ,€æë­lÓ–ß·n¡?؇6Ügºq1Þ|eÔI´CÑL‰È™"[¶uoÖ³áR¾pÏÖÕÉ2«ÖðÏÐtƒÂ@‰|²YQÈ%ƒ­s Á7ËÐÿÌñæÑ(tR¸žÞbà›BNjA¤ìP{¯DçüUÉ}WO Tº0ù¡›f°Wx`~Ä]çjïV 6ªŒ§õ$ÛÀ‡©T+ï«'[½.B‘/öÍ‘@?0–N/d¹x:/ßûö{]v³íK÷¼‘úú­0ªÈ‡ÍÝ)Ì|g¿ÌîŽÃÚ¦ñGÿ ø ˆÞ•rçT°Ž±Ò[À£Ø«¸í"ÀX ¡€ÆHxiv{êuiv=L|g¹–S úíeϾÑ-™çÙï°CæsýÔc±&‹;™ìøÔW­F|mïOý`}þ~ë® N£Çi/SM'£XšnV˜2°HÌü´'÷ïöÁ˜j|dß’í2ÿÚ]ׯ¶»9à¾ÁÁß\q½‰@Ÿb Öi@áSP0 84¤Äì<Ϥ]T '×»‰wô¡fÆÄiP¤¯kRâW8£l„ýF[ÔÃ*™…ß°ÿ‹jÚkA‡¼Ö6Â1U4@ fÝo{)Ù¸Õß肟‹ž‘ -èÞŽÑ6?€¢Î}ЉGQþ•e‘@¤åk©€kBúWOQà\ËT>„ F|~£"Þu.pìŠ4ÅOïuj}"Ù^SÖC†(²’ÔÚC K¶¬ÉsG×*åqWR4}Ĺ}_Ú¬Ÿ™ŽÇµ÷uWOew¥Éf²V|þ—ÕߪµûH 7äVúDe¸Ïáä¢óºc¹ 2•aV/§Êõª…€ Kþ܈c8¾4e˜åpë„2ÉÍ·/µž½Íš×k˳+w¶ó2§Ôrö5ù{½’ÆŸGº®Úî-|7hÒ{[\{/>µÒâ¡(R`ýñëí±Z{Äæ—¸‚õûìßf’“L:F]{?Ñm{Ó9{f2éÝ0ýZ¤éLiÔ½áAþcÁp¤RÊ$±ë0H×EÔT¯)H\²òí[ÝÙALÖcÄT‡{e“Myëi©8‚èéÇÆWnÏÐÉêƒü™CWç$ÖöE[Ú&Ë¢­-¸dž+ß'jØRò ÖdÇ0 ‡U¸Š/AŒèX7Á 6; m³1_¶Ýµöhkð]q½pžØp”¢%^>§¢²°ˆ“¸2¦@ ËUáæýå›))× îPF{Š;@A'Üä¤JhÄ‹»å£Mí˲ tp^vð;ø» û€€[ƒ}6‚;ÐK6Ìò\.Y~î3…•^¢äŸ€óË´ )}Ðô¼Þ"$?F>œÃ““’‹SËjôÙÊëˆmU¼¬W!ÝÞËòÓ.(ÒfcBûþj¢yIB%Òíåß>…_”Ñé¨ÿoÖ`»_³=ßA‡eŸ/dœ9Øá7™)"^„!|F `ßcñ^Gÿ¤c}/SI¼{ ;ÈwÌ?sÅÙ)‚¯ýâ‘eŸÖÉwÊäNúwòž­¬§\ÞTÖŒj›#ev®‚ä®$ …®f8›UhÈà–ÅÝñ-Þâ©·äN’Þ[égÇû4ò÷È5LŽ8жg©d$ÆÒ:¢R1×qŸ I*êO©ÌyrüÈ`lÔ;£ðb|~|z«)°æœ‹~©i^xëƒík|ŒKÆêÑ¡ÓöÒ@®h +¦.@0»"H‚R¡ Ê|Q?¤h¼E[wض¹öÀøì°–çž ÖeÆü“J§ŒŒ—*T{ëpŸ¬Ö£âfÀˆø/Ä@ Äõ£Ã^.² S§pr«&dê’΋§wXèÎï÷j;ÙçxÓ5µ×©§Ò5íx@€>äá~ÜM;•XEží§CHÙ™`.~–O÷<˜XT#8v”q©KZ5$6óz>Ö3"ð.t1>?t†dŠY 'l¬ÀšãFóìa±SK'ƒ¾À7œ–§iá©{´´P%ƒÃ–É Œ‡NÔ›ëèbÕtíøÔ,KL_3Åz—74ÿuN¯ËÚÒ»–A6d>`•¶E~³_ñÏô“©0ÃPÕ-¾$$³#Ž^ò*!¹Å–&t}_µ,w$tan( özïT—7G)&…ªj~s¦ãc˜ ƒÚB ˆâo¢ç¡u]s,|?㨖ö8s¸uüóu'­£Q9cm¤IhlNOŒse«RPæ4ý Èn h9—6ÛhjIK‰Qæ=b Ÿ½ØFÔñp´?÷j7úϼ4Jî a`Ps–Ô¯&í´#•E3ÂÄJ 3Õ逖!„·k% [º12êå1·?CÉÙ²ÜÌõ«ìò1óìð%{5ÆÞöµÜЉú¦(®ÞnȾLŸŽÝ¤ZÅÿåß¹™¨ETDDá|x‡ÓdÝŒ“›Ÿ+ŽXÞqƒÇ®Ðn.î3]à¥Ô6;…Ëøüó\7÷F'E¯>ìáTsNS”æLI!x2w(tKÙ½õ=L©Ó[ †PÀ#Nó«=¤÷úóû»jTŒ éÍÿšËZšSÏdÐkÑç й V¥;&¦•ˆ8±²LžB‡Ž3ðZeþÄ’‘¢MìU™¤4S˜Y ç5¦a>k®›Ÿ×êkúx©^ïcùÙû®‡ AÔ‚û}½&MMugi áÓòþ¥øjÊ_>o*U>ʸò‘ÀV8ѬaK@xÛ@oÆš4U¶|s–ã`u«уŽlsí|lyÉc‘c|½1¤Œ³Oî¸%j·”s€L^0P‡ê/9ñ 57Bø‘}fú›~žpM‡ïó„?‰¬ +à_~@R!-whŒj¢”6åhŒ ÎC»ª‘÷ðùÿÅ[ä9·çÄB[5a hiXh†°EÈ?£¥\ÊUŽ£ù¦àÛÙ½2 ­ sV¶å-G–&ÁZL\0Ø`$ vOÛg¢KÞò½ð_ó~×ûÿ³7iÂþ¶íÃðÕF¯øpà@“퉼ãÓ"ƒáð8kÿ æÙjk7qæI4GÝÊ~ÄU×>)ªkö¯‚ÛòÐ4¾á†½@Cœ`g+ˇ |v5ø\¹|=ÂiÚÂþ•‰ñgè2n×\v^„¬1É󔃵`éÒu>U®;† YVó©è\Jç—é~J³Ý<ÚB¼!÷]K•$;£­«s¾•QDË|÷ƒ]ÿ’)zòlÍ`èéH Î0J"RB#J™"ªÅ­æe¥UX]$ïŸýò³Pž©}â½1¾|›ÂÉBûU.UÛLÄš^jSfSbŠq*§bŒgÚ¼­%Þ3´ldC WÌ$ξUgß±=UÙ½Š²FÞ^à à_ÓþîVŒD ˆ$R™ù¬UMqšÑ¶©¥¨<¼ž¦wÌb«\|_0¼¤¥rÍ:yã.BR‚0˜Áøä©7‹eœ…í›;¦.`F€3ÅñêÌ”Rônå`ĺ+Œl¦"_jüÚo®§ÉæÍ{vø¹Ž&œn¹cèÿÍ?Àï%}r8£iýÔï/ùßó3%èg[§öM°“üòu|÷à鲑“‘‡"ß3–=Xþ ÑÄöà‹À#:ÿoUôÜ¡n4‘yúŒöÈÀý4¢°ùý–·çs›O÷Fñ«XǪ¦†£Sþ"ÞÏoninɲrL3¢Y¹¿hšÇŸ1—ÅÝ‘èùo9,HŒ¦Gûä4w×´öîŽÁH¿ß9VReY+}Êé6Xx¯æbNÝè^¿½@úI@hMQƒÄÙ«&x|Ó^1€í¸5_Ú·¡N mv.b‹A+Ï÷…uZ  ð­'UÜé_TE/.gPEÃ6´•‚z@ ¶D¹"éïÙƒª+âŒF·œDºS Aò¯[zÓZÈê1cÖ@@5N™pê„¥áwˆÜpÝN‘:q¢á Ú » å/NA³øì]Þs@JÁŒ¶æ0ÝaS-{ˆÆ_~Ša Ù±À7«¸v?’F)KGqÙœ›,dΆ ƒ`ήdEûþ1˜2ýuÚkžùFZ›ò_ò´º °ÈM$ Â÷p=1zúòç.~T‡­CÇù[æFÀf|}É;íi@ŸS®WƯú˜Û ýȆñü¸6LH§se„K‚æ,hzß?Ûv9>>RÊ?]âÖbî™/±j{X–(_–¶¡KiÜ®rŸfyö³gß=ù.&L[b”I‚J¦@Q‘V#Ï£¨€ú(!.„x•|Ä0ƒ$ª~ JPò©¸ŠS§×Œ}öíb@?¸¹ÄÀîgv¼ÆtˆN«÷÷&g|c[ª¡íºϳl>0XÞgéÈN躺©–”¤ó=lo“™`ÊÃZ`ÐíwKÀPi Õj!‡¦ÆÒC B8ÚñÆOD¢YJÒUhž¬ƒæ%ŽYà³?Ï,[w»ÊB¾°QymöK”Gf¼/»‰âêÈh¨ïÛkIJÎÒX†Áa )î© NÓßpQ—RÜDwš} sHœ]ÂÆÞîÞQFù˜ˆô†©`ÕûŽ“¿eÞdz`‡FåH†uo×#JsÓOÜ0&7é{P<žëTh@/Æ—J¡œˆJ¸Þé×±EË(ÊLo{T˜Už.¤Q?¹ýÙA\+…ý߬KeD~aÓÝ1)³’„\QÎ(˜^I†¶ÈôtsÖ SiŒh­ï›Eõhÿ1µ–Ig,npã¨é,é6_|Ÿ~¾;ØÃG)ªÍâ"^ôHÄB¾8Þý:¢UuJ\þŸi:ïí«Æz£{®B‹ÁB¿ Ç/ëÜi«ÛŸªÊè öލ‹žžþ{q×Ý*5nþGÜ^þ2Ëü‹šJ©œ_L™çÖ´Ò ì[†QüUM/¾Q6‚ékéwØ„A…?-È]6°01é%Ë™ÿÇ;‡^YÄ”šký7¹šà9O¶»kŠà?õ\ªq¯ù¡ù‹¯iç›WemР¾ät@pˆý¬³éƒ_yÀ_/_<Õ{ij^¾ÙO%RÔt–Ù•æêææÆ©F’®³!ÜZFwÀÑÔñ9œsà>&ÄSÓY‡¬´vž¬qßÄne  `b `! ¤b¹²×þ\Ó•F©Qq|ËÞPÕ·)Ø~å «±C¶£Ùsz6jÕºßi>ê'Ïïöªáó´ò»iÁ!=ìAí¦nóiøVs1éô(&͚͡/º×#ü3–¹ƒÈñ2¢gVÛ]Ö? £­åÓ³DÃû_ê•‚ïÍsáÿg'­-82ˆÎI p„†¼âýbçŠ^”bú‡vd•¸­®½!t>§Ÿ°”à bŸ}¾+ ˜KîꉕÉQøÈü8Œíî"Ú/ôU7ž*U†Er¬L0 C¿ÂØç±Ž$Ö£­–ÉU€±5í§8Јö‘ÑóÏç^,šìø^@ý®J"ŠM kà¾6íöŒ„ÅÐóžÊ˜R µÐq¼î+‚§áB-õòÿ¬pvU%Ô¯æá,¥T‰55FÖþ2f“}P’H îf|”Ã}²ùþÝÿ:‹yy c±MùŸ{ªF:†s}%7ÈÐAy ‡`léäEƒ†´Áav¡‡b±J’ci‘G:ií—žë·’Þ—)ˆP)£Å`D'CÆ];e@Ø’Zò“änÖ´Ý×1€ÆyÕy5mB]øL¿¬€¿ãEg{L}\ÿ›zXs<¡–>NoL +[Ž$[Ã1H'ò$K `ÄŽßä}X_74¤òdŽÉ$Þ@8…¾*.Ï\ñnL˲ß|Qœòv-•~fP ¨´\Çë­¹*’¡Ùs Ù^81ÏÚ§rå†Fq$Ž˜a¯LAWgU_88=º¯"=v|Æ[F"¨È„ ñ–Æ] lj iÇåê.ƒEÛ6>ÜÚaIËu©ïs£”—¶àdØ ¶ç\ÔMæ§YVY¹ï݈«ôéœÖ»bk—G½>‚¥¥€¢Y­øöÄ›®ñmE‡¥Ö}½J-=Xj—»Ýk[KÐ¥å1-Ct®·~׈òƒªIjn•8s÷½¤ÀôÊÇÑ!Ø6iÿæÉâþI]IC9óÚdÌϧÎG\–âÂŒoµ"B/µB¡Š;SqëP«5#*=”> êÉl„M!âÞ󸄦8íö»û}ý)š¡ÍsHD˜Œã§>qaV®‘*Ã€š³†•Ü¹5ÏÈÝÇ=K<ö™ÁgÚY6»bB‘&³ÎX?©ò™ÓÉûiòÅøv•ƒ× fSý®ƒ°¨Ì§Ð x)ÚÉμ҃öe銄î¼y_$ô¡XÉÛHº05™IÈÕ¹œ^áíΡ³}0zص°xÈG—£DÎ@:Ïjt*Ù‹:щµi°çËANNåâø·Ù.ÛëÁæÞþ 23^OOºCÄ«/ÿÜ.+Éïd¡Yc£®‘Ø9úö]µN=k×ù¢ÂIH¾QÆKœ×—[.ŽýÝ:·ŒÒ…_ÏeS#µë­âÃ}øÇÁUÎá"8dé3"2Ɔ„òãõÉ@§‚ß»(ÂÁûš[ôçÏÃM /‡#¯^( &ää5@V{Â÷2JÌÀHS“5´s%î볩(L­ ö…½*kRtZVkÙy|;R?í±x'·†0Ú|["ÔsâZˆÇw¨Ú|HàS J†.¥éQ“ÆŠáz¡RíË—¡àæßï‡èñxù½Âà’cŽ[Yé<à’À@T*æ;—>/éi !w$Oº¨r$¦ÆŸC÷˜h¸}xXQ»Í»ü婿<^ЉyjñoŠ?›4/-»Šf»âœ`+N7fé¶€–·tílµs-zey²KD‚Ðцn/=ô¼ûpWÃþ’+¿k×Ý>ø®]/sJÃælu ü‚àW PÁ™ŽÉ î{çHC3H¸ëÛ €„ y€'ðî‰ ˆcâ… Š­eï¡Â9^Øy·§‘÷ý´Ž×eÔ¿@ó‹éÿ·ÕßÊÚ‚^¶çAÊϩ̩ÎUCzÒ¿S@ŸÞ¦¦xZ(ý_óðm/Ú–¥ 1é2Tª×YŠpÅA^)Óê\S£Úø¼”*(zœhTä‚¿°€í”=Ëò©|ÚÅ}Ïž×j¤d26NÝàGÚ—œYöàÄ­ËÕ×l6j5Ú윬sF(êGHzéHŸ0³©¢&GIY¯“³}Tàw:?ÅTªsC‚=8Ys-›á’ºÄ4¯>÷¶^§™êóFHÍ´3®ba¥˜ãU:F§MdB ß½š]»ˆˆý&·f.ó›_“5)ÿé·j†cnZ?ÎC½KœôolwÚÄØî.¯“¢[Õµ!! ’Tû¤¥µ‘³ï3}Xiiš¹¸ï×ãBš*TpPÑÍþ¡ò–ÿuæ!þZøDúéöúL]Ç€Éö9jf÷ÊX;$?ûp,K.AˆK0ÃLTòPihþѯ“ Wfüÿœe+åTmîl\wŸRø<µqžNõ’ÊÚ×ÞuHL å—É\v»-ö9h‘òjNâ?}8y–h–d$|n'©Ypü¿S„€VÕr‚ÜR8X±» ‚_ÕUmäè«s¬Ø²Nȉ+}žE¬ žMLŠ”©œ!P88#¶NÄgŠU÷ñÝØÉ¿œ77žoJŒÃŽëP\=ÿã¦ÍÀBý™Ðô;7 ·7.l&›ú³º(wy8Üf-ÙÊlIMûu³ƒ[%·r»ëIÊЛ}{*8ɽÙ!œ‚1‰ä™ÔdwtSÆ:ï0ׄyÕZÕc^|ô/n¶*ÿ&™3%åé2&!q’…=6ãh¤ÅXýO¢0k‘•Ï; à–¯’•ÎéÜž§Ÿ}(n®–ny ¯Àj|EÚZª:…TõH÷?¾j캮÷° ,r¿ÑžU›U?¤m§`ÑSã«Ýo[\Þ¶cU5l³÷_±GŸ@ftÄöG“]zäB%åÍ5*½uéÐ5:6£uÉàêCþ¥|—ðU>ôÈ®¾ÅÎ?ãÌ£<”•ñêpRF6N\Ö.•>¾Ù±phÿDÉ0zÌu6¾çó‰uÌ’ŽvF^x®AR QZ“lS³5)m;?­ Ì,ìØ¡‡½¢ÆãŠt*….¤(2¼‘v¼B ú£ |R¼ÜPqÌOkã Þ®T!áK·õ«5{÷Ó\3YÉi®‚ñª§aýU,Ék…+íê7€[¬0¤ÄIÙÍ6—8*&2nó,QJëœAV:cV_ôúv_>—·à˜¿€†½E†P_Y]·•¶Nt%œa)ê +iˆíVAž,ƼE·Jtb‡È×n~‰§EøæS—}¨B»ÞXß§,×\w‹2ÛÏ3„Jáa)œôìÃÙp·^LÛäTèŰ:ÇÝéÂɽòžÍ¼h#•†…Ø”¼4õHÐ4HXÀ¥ô»Pþߘ>›cŽYƒi/ ›´Ð% ÈS³c섳ú+Só¸Ún2$ÚVí¿¼:ªÍß®vmRÒÖ_£ï²·;éÀxµTnÜÈú²–¾ Ö&¸ëÙ«~êˆHååJÝÄ5 ¨Ä > …°rÄ5Ýèù~BáÌt,±À&ì^hðz¯Ò^45·b:Ø{Ør\ e-ÝŽ,É+²l[:Ý“aù÷Á‚V!á[@•ˆþ–_Y2I6½éváç÷Ãzµú-ª<ý½¿¦¿n>­OvÜ-­ ¤€ŠÉèP>Aˆ>H„ I ÉLd ¾Ž{;²?Æ?ÛZo™tß R’¢ `ÀòÿbMŽ.U¦[ƒzUÆÀ”"ÆÛO}™øññ8à¸g ìn€‡ &Š®[N¨6zÛÞ J í„΂9½jÅù®ƒ®¨êÐÝ ˆF-yÞ+…o×°´°5[Ô¬¶ —aè®þî³eŽê,”â!½ødèI æª• ¨IV*)`¹j.Ïñ}952mnÖ¼ØôöÏqþ-Œ}Ò]èÎòR\Ú|A¥ºK>}‘N.¦ŠV Ž7‚†ú¥øŸÃ1 ïiRLÝènŸB^ ¨UûúåRR÷8ö2ÐȉÀ]fxZr~·géþ2ù-9½ûÄÜòY:áP»:qÁ‹ˆ¹ÃÈÌ'+Q€ç{Ø 9Ê÷<¥oÓÞÈ©/8gó?km¬U'ôQÙ·â“.O¨ûÕlºk:¶•½åj|Ô”¯¸p|ækA½Öp`9d|’â @E!»ʑlWÙ™âì<»_ûÄádÝ«ZzXÛ>´»'dÐ}«µ"£yuE$NÉŒÛ=˜S‡±»Õ^Iå¥á~PèT37{„~_¸¡ŠøAügÕ&érºjk'sZ>ìÚíµx¯»yQ¡ý÷möç¿U|©í`Éd¤#]ó‚1Td¾ûâ9¹–¦3m'ëSx‚öe Âhh*j}¥¢—½œË]éDSá=ªq-²ÅÊÃ(MûØ…³Y÷W,h—måË’HËÔ øƒãxÛ’›W"^-Øà6P쾤2êÖœ%žé¬z>É6¿›½VÏ‹~ø£Ý‘ëc 7Ÿ“ž1£k^Cߺ|ìCt£Çk€‹‡»‡X.¼T†¯û/}R¿|ör„NMTµ”\{TxE.“]lNvÛ;¥=Û9û‰{§C’µõu䀤c]­4žjòÇ3•£eÔx3W €‹ŸÊºó†ù<ã/Szü™Ë¿h‡ÍAô¦“Ñ7Ëk5””™€8G§eºÄhé‘”1O÷Lg!·È'|:ÞOJ»<ó0ÀذíkL€sÇÉðí`ªçRªWûR=ÎJ/Ht§åTÎÔ œñŠ€_ ip-¿†—­±ê|IÒ ¬¦,<²6O#´^Ž5<µ˜±PêÍ6iœž6Wf„°ˆXJKÌ?xÏÑÐå@b Ñ!Œ3Þž÷ð TÊ)ë5 êGgkGÒ]¹]æëî%cSGFf¡V˦ŽÁËD¼I ·Í¿¬'?^Ã<×Ãj3—ÌÏþ…Ôî(Nm¤‡áj3––WCIzP–cÓ1¦TTùX öÐVâØpæ>ܲd@"Pb¨bùG6dõÿcxKü@¹ìˆž‚±sÚ¸êU¡Rdm½ËÅRòŒ!! z€Ò¤ªyìÎa™Ë©Nç~ëTÿUKjý{e%[­q‡œàîgb.ù—8z\A..üðÐ"ˆ{ÔUdõw Ýï}6§˜yÞGì‚d­]r1ÅA„Ù -&¯znŸ2Ìv”JÞjéða¿©t #·9Ÿ¶¶Ôg6¾B2ðÞ¾C ¾Ýƒo*ä¿´V²Íž~#•ŠùÚvTwÔ  ©@'Û@ °³ØQ,%´Ú‡¶ck®Ÿ+©a¥òÔj#ÙvëŸóä0©º*qvm¨§û‡õ«ûè Mvæ—œP¿‘›€ãJT½˜|?cù÷ˆzEeQ•vìŠ3áÆ~ùŸüƬtö?cÆÉ^ Š&äìós#­ª5õ Çèyã‹ÃJzÇ}xçîRú ý}Ôâ©ëâˆÏîä{d’-¢}–eî>QÚÝEÜjþ-’Y°˜©‰ÛîËH|wgX:ºYî±ÂmÑÄ\¹# +£÷Î<§&§íа)¹ƒh”øTíº¤À÷|î|¶¡‡Jû~’hwyë‘Nó7a¿§ëæÔ––ê}oïiø‰ÅIbLNÎþøDô?·2fÕà’÷þÕÚÎOÇh^üŽ2Ý)˜ï9Іö9ÅƒÎøÿŽÝåoñtîÖ“_ܦ}àúSŒ•sTèpk0ñ²SÛz€¹…¼)$‚P¬ Fqz)»Ù\Ÿ:gÏšvÛÕi®·Anþw>Š(Ó n½º”ÚSÄîºF’$Õ…‡¬Bg¥@˜ …È|E…™Ñ~=XI¸e&´è&U¸%߀ä–lüceˆi29ÒD, äðEÞ]×@¯§·GYm]õô€šÁ›š˜a¦[3¿Ì˜gxÂñc„! ¶ø€ƒ;øñMUóü]Ù—µ1\S4Í·%®‡”•qSë–Vŵ?V?Ÿ;¬UZÖ¼‘‘CP/Üø“&joLØ@! Òå“yÉæ]¤üv…÷$´·Ò“cœÉ°Þ£s^¹°+woÇF(r\¢ý*71áìÞ‰dbð ›o ®ží²½­ …m"Ýu.¶»o¤éàû:ÖÍ¥ÌÄ÷~¿òÊ’·hI¬~ÇEØ›ï¢ZúFŒ:$å¹9= ´/zaYÂ5—Wx¤ÈöÞÑ“Å"Ëî…çR¤.E%à”AÐW”·¥C¬…:˜V€‚óÌíŸÉ ÖnŸÚ§áiöž³åù¼?sÏZ–ŒçÙËâÚ0íû?¨Iîöùx¨tå§×û%Õ©1ÎP(q ÔÕâ•ÁE²6„¡`.PZ'ºK°ê\ofÊ ¥¢ÜR J"I²{©8þÂB9CŒý½"×WBÚ÷¿ÿ­´¯P”»[ó´ä7çË£Md)u²q·iRFâäþþÊèÓ£vÙx>$¿ xo!¶âÌ6E1æ ï@ì̦ ¦v4W£ šz‹hM¿4ä=ý‰‘m¸„­½py|›Â‘2ÃÜô0¹Kl8„õÊð÷˜á1â~AUIG¶.iÌð@’ÓÔŠR/WùÇõ2Þ.ãžj¹YÓyü»¾*6¦°å3ÅŒwåuí1ð9_"3§ÞûxŸœJã#¹“ïî¸ èër0Ud܉‹Ý÷g'îÙ.½NÎå¾€¹ûRIøuB¤RÿÊ% íë29ÊŸ±U#Sµ' ³¼{³Üø±™déçyáXYy=‡\ši(•cm‚»¬8bwÍŽ@ÿÿ+r ½ÐMi¹=­ëKöǨÔu ¿/,Àõtdn·j޼—1Ïüv:éX”ì-ûÕ”¼_xs ó\î”CIƒ„'q½°Äx.ÍM‚€y?_‹SLíàTs©Öš'ûÁù^_oýŒgí;¢Ã½_ˆ²zV¦Þæ9£8ƒ?!bb$r‹\¾ùù’è±Ë"gøŠuÍÝj5Ÿëd`òys†&…yØX|t² áÉÁ€B„Vç}=úÏÐíÀXÅÔ*’0êÝ')O#Q2}M§³Ê#Ú)£L¡ÖTÊoÏ)÷äÓ\õ÷×|‘«V|j Ê‹Ñ?_í½jäÆéøPlX2Ô5†@øBú¤Ø¢Œ*…Uëlwwc™‘˜Õ¥ÞS­ŽºË¢ú¼,'nÿÒ÷~ðò9]Wc„CX³ÞÖCÉý&öý+OÑÍ¿6.lÓ!Ÿ›ÙÓt1qF:N£¡:–ÒF \Ý¥IPÃú[¯lÇ YEÔ5öhñ0XòâA d£!@^4¾™dôßó‹´Ã t?(¢!a©£h^æó_±ÒÈ*« #ýíSëˆ;ý¿ Â{·e ˜ð/¤fBb?0F0¯ßìLË/ç øìŸ[úIáõWº¾Þà¸!8!¡¡¡–.se¨L»eígût®÷×ùæ#Q~~Nƒéç¼U/ô1âÃc´Àd¤æoÛÈðÿ)Þ+6$ Ž+¡trŒÜñ6Kw›V’8³,½Lr…Ÿ¢5V#øŸ·œ¾¸! •Ï[!Øê¥ìâÂôØÇ]XÝõ¾XŽ[étEhpöɆÔO¿púvsŸñlšÑö/¶6Ã$Tð*1`aÑõ¤‰4þ†Þ“Èïòq¬ë‚ý§¹þ(=Í•ÇÒVéÖÛé’ÏÅ\¶ â·tÙ¼÷„UjŒµ%Ú÷±FLÙê‡öíoZ=üòU¼ýöã¼lF\ª=Ôß4AÈX¿Úɧ2Ð×Õˆøòê#±ô ¼•Ø©)«-BcgŸ:ØãßúîÀ‰æt{±™ýÇýD§)9>„Xi‰µ4ïÆ7‚?…õ€^ë-9AMj¥JÉlj Ö+‰Âz›¸®«¹’Öôü,… .ë<'¡ž°íñÍ4ïA½¿op¿¦ÿÑðo”G÷¬ ÒÈǧêRVž4± ö²ýü½»]ƒD¢ ¾U×EPŸìq˹þAKA>/> ß¡œ(ûê[rüǹ*«èÉãgÖ¦Ná¶dÚ¾p×KœTÐ}NæëŽ…fFÑæÄw){Ö¤œ¡#÷!HiFIb€ ‚ÂÀÀ¤*© ‚ ¿…¾o½rW{ë‘îm1Ñ&suäòúJ U­+-?úí—0ï¦ð+)êp±„q ºÃóIŸGfÝòƒç¹ûÓ"û9ÒÄ¥5_†ý+×:}7Æ­ë5ÇNVFp¢_Ü•ïÿ–Š$ÎSP¸…6ðÊû—„E“ü>O”sz|®t[úôá£ï7§2ÁÍç¸È® sˆ´îÚ~îÎ¥L» HJÞˆ¸eIs¼Ÿ¶j c† Ý9)}å^û6­wUÚ+¥À¨X}gŒT†^DéŽ@­A-s¥,žKœúÎÑjÉäU'u7œ¼TíLFtÊx^ÍG»Øê­xõ߃M¸HD£ý!òª¹øn>Wö’âºSYÂúsßèq#²¢þ ßäà¢Å[R²Û8Ù¨#u'S.« *³ükÉø~,†¬ ©þ½Ù«[üËþnch>4á)ÖŠSèãqFó6‰ÂíðYÛýª0–‘Óú©õÄ€°6‘]º¹÷Oê‰Lbºn³°âä{>H++§p à½ED°“Ï%]lÖÁÖR,2G»-L^‡þ‘Ð.Ê[¿£1{+Ä×À%ð¿æI¬hªu@CŒ ]‘Ý1ý¹ icT''·ÃOj¡î_NÓ"ËzRçoÕI\eñá_ULÒ¹v³ºñjý|4Ñ€ A£ßT°ooÿÃ%oÇöžÉÑ’ÆJêZ—Ðà«À8ë~¸\}‰qq÷“¡Q©ç9 rÑ#†k&‡ z‡ÖüÔP?Ïr™E§eº5Uò™Ýïð-bž_köÚ0sšCÚ°þLÞÙt=ÌÖQ-ÏÑñ˜} é bZéIÏX;DR—嘮d7‚Áj-®á“rÏœ{þ@Ô}ªâ©B© ÏÏKoÞ»Aðpé$"'•qºṀ¿Ð·‰Ôžw ˜pñíÓP!ˆþe‹³gð@R‘'>vDDÈR¨O‚©$A$–ŽÇªµÎ¯žt'.b§ê7Ç«¿þÞ$”ó¯­Ë‹6ö_­|î®Ï¸¥õb1ÁG‘²ÆcÊÏM 7~ó/6þÓfåŒßÝn°Ÿ­‡+‚üå{~Üî_Ã×à ÿ«˜>ÆKÝSûãÄõ^[JÅû*EúìÇ`©ŠþIÿ4¸ß>øà"ân4ØàFùPÞ€  ß]„=ŸÄE¨Égè€Ø^›"Ž<Ι:£è ­è€^<¡@>œÓ´Êz7g&ˆº÷E‚Ç2~c¸šñîpÐ/y9¾|ùÕe?á]J4ýÏו•ÌG=^2LÓv Źº–Ì̽=.9é¾*1*ûj"õ?¬CÑ}‹v÷Á.ŒéÈÇ@âHql;–}kšýô‰Û7 »ÔåÅy´æq櫹fýKËÈÈ¡žÉ<3ñ„¶Ú»û<úTù&)œÙqç7o¯  oY'Ïó¯¿#óâþ”uëæŠ=Çl×µ”…ƒîbüR羚úI­Ÿáç[þí<}ïÛÚB÷%›^ß•ˆÖÛõ×÷Ó¶¢"VÔk±í`à{RÆœü:»d(ݲs ÍO $G•¨ñ¯Dé ¦é„:ž*©ÛZí’b´Ö[g}.îcÍ5üÊÀ|È¿ØXÆT@ ªDTžAqfÊ;H‘·èByÕ.ÿ;*Ôüq9E¸x  ê¬®OÎ_³Ü8ßÛÆ“-“³Ê”,Å{r€:™Ò£äC…£ð@aCO’âZoá⯪XãÇ3ú.-fÿ÷_êü¡'†Èzà <¼3à»[Z¬“h€\,àxäºæ‹ŠvŒ¡V^’4UË´Ð@.¸(÷ŸûéÍ£wq¦xå#/kúŸ$怨zûÖõƒÇ6ØŽËο{;_Å ¿˜=Nž·q¶æZ\/{©º…ÍÆrÉý*{‡ðÿØq5¤%Ôøâè3aDlðÄã€caìÃæhRBi/¨yqêšrýy)V28«>÷ÓCÃîê–_ÓŸOŽb~Üì~ÿKœÊ«xwŠøéþU?q‰½àx/O~.JOEêyú€5û@>9 õQ±$ä4yÍl*}wð ê°é§«ú§\pªY3〔¿XÞW\¢aø~ºwÖø—¼ï 2Ü;ÆãƪýDÍY–2ðOΖJõ¤ o˜XNB@oݰ1¯'u¹Æó§ÓJ˜ÇÂgxG³þ.•[Vm׿ 3®o.$ ^«Ö”ÛÝHãEÝx4xP˜_lèi›>«½‹#·Q›Õõw±ÅüÏ‚§VUÃäÞ8ú4Â]L!´zÙ™[>ÿ§ié~oñ–þwOužVq^Ë:Ñ ß^…ä¬Í‘jÏiøÞÎP‚ĉ¦,ºý„}r[ƽ*ù c±ñûqħ6)ïðCPêÿºtÒa@¤!@hÅtã«|Çh/HG«.~A«®9hýB½<–5@ŠTU+ÎÕ”¤º¤¯C¯-{¸'¢ú‡‰É’Èý2÷Ǽ·¤uÞŒþÄÄ!€\tǤx'üŒÿW ä>‘KUZiéô“óŠŠÌªÖ6¦!¼úÞåŽé~´žÊ:©Ì—à©_ëÇ+¾ÌÛ¤«tVú¹ËnW^Æ+Ê_§½–Žß<÷ü  PÀB£›_¢nŠE’ûùÏåçtžwÒ©Û«4J\Áì§ÁnM¤ãñ5êÐì‘R´ß¯éÓ ¨þP?VFþ”" 4æ_N†z!"~j^Ö`ž›/í5 ÇÎíÿ2ÊHò²>͒щ&j'h…rÒ7'¬‡þ¬z"¼LBn5<ÕõÙçG)siþfH ñÛʽ]€:~žï4‹$´ÚÑg'*}ÏPÒ~ý·-7 ¦±ü÷c„D6ž”ç?ÅX¬zÌ`ïGñvÇxó-Jl Ɖ†è¥o—îÒ…æ#[côoö.µ“Ÿê›Ž‹Z!ä‚ çêÌ„¼Üó(Ó€BPrE'/eš]“c©±C+>¶OùU«™:îH°¾EZZN*ªÙë[Ïð»O©õ2ÈR`ð!`é=÷µvýw»§ßW6‘'Š/ØC+V`€ã Cyk*NoO™ÞÞ>ÑÄ Ó÷äÝÿdKS°cŽ©éx•àÏþd¤Èqx˜ÊÈ8|•ã .:~‡Ï´ÌÜ G}¢%mÃ5¿Õ:¾²©–ŠßÉçt2‰Gíï8Ôû§Ly¯î€! ­e§…¬ÔNz_Ë]®hvðü‰;ÄÈ»>î´Öú©Ç›¥w-ã(í³6¬a±„zB³í‡…A,¢«Õ%½CÂ<ù"Ô›˜·LÉ¡wæµdÐ_ñ+£2ÀÉ çXšÞa¡æ9›E….©\ާáÚýVŸ‡¡Ëh&×Ê`‘“B^ê?æUa£’Lœ3ŽxއS:Ô?«ë¿#á«+«~!‰­øªûA!\ "ŒÁõÜæ7lÁg&=>a,@Þþa®?Ù~ϦÛu"coŠäLBLŽçÕFxûÈË]ûu/f©¨ ›p}Û•dõ SØ i‘Œ<]’_”Ùƒç» ÿ÷ö¨àøø³™çfƒœ 'g3Í@„é @c@ÂÚÛjÁ¡\í%ë†:ÀõØ's/»kß‹o«¼“(Ih4jÿ]»™ªP|àï ‰¬énc-o@pÆ/p!ø&[)L·æk`~Œ¿c ‡wÑZ{UöýÙð~|€ð ¢ÏûšY”´¦«9ï߀‰ÚD— øêtJÓÃ+ðú rp•q¤iÈ_ýÑœ9Øô¯¶ìòôi}u^Ö‰‹»Ù¿¿ö| g¿!tSI§FêÉ{ùdM»GáÌÏËþªøûÁêøòPHJ¶)20žï[vv½Õ}"Ûö£NstUª¼¾ì.vÁíáUàŸ¢8˜¿å4WÔ c`?Û´›ÛŸóEƒ\šf郭˜ˆ\óÃüìÚO†Î Ñg‘àJ6mýöB€âH‡k¢ú†ØÕëÎÿiY‚7-L±ÕB‡åWkì7ð"„ç!j½¹éÃ\,£"ô'w?m” ν·†0d‹i‚*pk*84ô÷Ù¤¶2‡žg$®Û6“X2ÁÏÞ?Íúù=›^ä¢e#pæO“°Ó”Ã4qÚ'ÊWŸùÍËÞýÒË:¥¥ÑŽçKS´¶_h…ÌitpOÙÓáÖ/òä}TÞwÝ mutKÄcv¶/WÎù--ÑbIR¦}ÿûÆi^Û㛑Ed}LÄ HàŽO)æêÛºRÊëKë3ÃâyÕcçÏ¥¯ŸÜp›Æ4c@‹{œ1\/íò“ [Úbmæîkiñå©ržÜË~TÄ’Æ'ñ±T¨š/Ú£`þ›_÷¨NþYýFF?·–wÅt¡¥œm(]…)ºÆg.–NeU\!¥=1×Èœ]2Ç|½c6TÿwÜ9kò«´B3Œc½GŸ>¯ù´Óø¨k}Xq>Þ“ÍD~¿¯?O´OÚ/&ê’Ó:.à 8gÌ«!1 K]ÒÄýý£iöwf×TÀõ6IÛ¦(‰(I3»Üþéoi¶Ä€UŸÓüÞiÏüs› «Ü|Š%~ó÷@ðü2Qúyœ{LY)¥6Í,œô²?±þz©õSNh'­6Øv³ð ¸Ç*Ù™×$rG­­·´tKHÄzÕ@·xÙù|7Ðû·Êý-§‰aš?k‚Þ­Œ`‡ßÎù.“b´xÞ)ÇvÆŸãqw™G‘²‰}½›·"úУ©´ ©t’Wˆ"nSš®ÕìÉkÇüíÍ÷(É=jvä\:n‹Ív°_"c,çžPÝL[¤T†[Š-^õ;¾lÒ²v„>ßB;óó!΂Îu¹.`Ä64YÍpA}aƒƒ@HX]¶VRêÐÖöy_+¸=|¥J98S£þ,lc›ï}ݙѣTªÑ ‚¡¼™Tnà!Ó‡¨öœÍzzH|…701‡–ʬé7ŠCÒÄdò¼0s–â`çnùMbóû©ÅÔuÔ1{bÓ/ê¥Í&¢q&Ù¢12otΙïYë²9µ›hÛÍÏAêÅgÃamEçW¾û~GGM«Rk¼âŸÕ)J¤)|”ðy¹k»Íñï5ôIJ\R¢y*H éq‹ØÞr.ú]fÛó¤\Á¶>²„ÎøUåzÕ’âWéYªoÔ¶æÛEÀy‡k¡lMdj‹ pna,™,s’˜VUv~–ÁEÝöàö䆰@‰e• ˜¾ê°Å¥ŒP¥‚kút4ÓKçïxàÕ™Yý®W¯äÿ“B¦Â·ÇÕá[£³úRš7¨ýl†¸5,7Mµx s©pR`\„IÐ$Æ}[oÀùõš«¶ÿtÝìswò¨ýs(µÿD dzöŸ2¥ÿENS5šWJo:Æ- ¹[úl<ó¶3qºì«~$ÆÖn^ûØ€DžºQÿ(LìÙ¦ñ› NPUЭ ’¦9 •Ã*×)ië!¯ï¦ÛìJµq¯™õy+gŽãÔHÌÚj¬±4Å,¬”"íôoÃ=h-9)î¸Û´+Æd­˜†àLyÚ•X8hû§¹MÌØT~ÇÀîSùNë±Ç~¯ùR"G-D·[S!^dub\üÑñ¢±ýˆ]^ôðÌÁ³ARrûYçSbèUd¸Ëú^æsÿ°g ù˜î˸‡#‹ôÖnb¨ëúÔséð†Ÿð4¯õ¢Þ|÷X—èÔ—â;‚áÒúMns¥w‚ÑFG¾ÃŸóôö(©ŸM͆‘¢Æ÷|vlŸ²°†¡émL‰ÎŠÐ}R3…N¯‚tÏ?:¶¯wX_ë:ÊËèœôY삺{boåÍ{øm¦²·}˜1’æukÑRûè­6çøsà AR6ÏôNèd-¹…?úuùû®ÿT8óë—»UÿÌPVI”Ö\Ö 5@h%ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿøO׃À<à=B"’•Q()T¯¬Xøò¨ª€ð}ówÔù,†ª,›Q±V*Àï/·€éîãžó=âïŸ{Y÷ÙÞmæjë—/^æŠÓ(ïn^™7±Òªnš ªŠ•IABR…RGwÛž¢J®ï7!DJUP”PR%ª!"DIT¨*)^ûºP¾ïµáꪩ,÷ê*©žó¼H;’Š)Ntº|B€¢„•U/¶V°ªË[FŠAU¥>ùJR[dªÓU¢”¤¨QJ¤©Û-Më|Mãjšn«º›bŠm½ÞnòTöÕIUTJŠÌ-lËmk6Jµ¦úΔ({¾÷”QUR]ïwÙïžÖžï^¢!JU@ªD•*Vøƒä5)(Å‚Fw H ªUyãØ06 yêEŠ¥"RVØÖ¤•Ö=çÎôB¥{ïŸ|ú"ôÕm©}óîõ} B AQUTû»§uÜ) mDP JÛ[0)#çØåŠnõÀEWuìóÈB‰*•E.Ãzhª©(JRUT$RŠ¥@HQ-5;Ch$ss{ÀT¦wžòJRTó{Ï*ûÜÖ‡QTEU(P©TP%TŠ¥J"ET>ÛÝÝ*PKÜ}ï%‚UR T¥é…@¦Ýt ±Z>à[ÙÊ cݳ§zÞNv÷Ÿ¹æÝ7ÙïnñÜç)|¶šV"hˆ ™2dÑ ÓF†šh h4Ðh4hF€ba2i€LM114Ñ‚ba 2„M"   €š`˜0C ‰€#&b22a  @ ƒ@€©ú  @Ð4H&4É©¦Êi†š$ü€ÓJlSò)íƒ@ SÔðF˜SÊoIµOG¨gª4õ̆šLh™¡4ÉŠzm …ÌÌ‹ÿ)Š‘ðšˆþÿVžJLZ¤¡.þÇ´Ï× (䯭#¦¢Ò*9¨¤ð}C›Dr¹’Õ1‹uöÙrÊ…Ë4•ø»èU”º”˜FX¡5Õªd4 U‰~ô)ÅÂÒÛã]·Ìû¢`S[1[$‹Äð,ŽMG–i±9îXL¦ €Œ. å¢3±ÕÒrÕYn§ ïa*¶«¤‚ed‹Æñµë¶ªÒwÙeQÀ/E ªTÏV¦©¢ŠÙg–'œi[M=.°óÔ›+œe|´Y-ÒFJ¬/Æ®+ïDhÛ)ããcÍLJæØŠ %Fv6ÊÁ¶«K¯œm²HŸ@¢¬q}¯m¶iåA¶ä\ö`»y[jŽ9plIjlÎExH³Ó|¥ ‰9Âj)†ºö»§£ ç%(l™µÁa:@§$×>”N¨Wd%(šèâÅKBóTöËAïj [j•÷GjcpÅZå\µŒFxü$=4{éÝ„p-0[K–›–G>¥:ø©E8Yêš=:ús Èb}¯[ÔGïGÔëkú;ôe×Ï©—ÖÇFɺoéíu|KrÉèušº£òZÉ³Ùøßµ¿Öù´ð?[m‰ßºÍOÉ»òá±Â¼8\ä%цÏÓmið´p¶ÿ×GÑð8çFŠ4o­Í­&—š[†ßÜ®9H@ãIyW–-.ü+Ë]ôð¼êM²zmÏ÷ÔˆWoô°‘ Ó>mw»õ4•¢ÜuaÀñ âpséíqtSWÀ‹gJømîlð1w#|IwtE'må0àQ¥šLîÚÝäŒõih·[}/„2npwnÞKÇå;‹}N#y5»‹§f÷—&ÞVZeÏA¸z¸ØÍíûî)ø››Ù5)¯5xrÝ©a :ÛÝì†Í®$ i#‹‹ÜŒ¥¿‡$ûVMÝ#3sÉßîoµŽmæß S{®ÒáÁN¼›ѧ»°7‡GŸFï+dš/íË—ƒÄÑVÏ«õ6øí ø6ºnñõï7µr¹p8ÆÍ¼ÜÍÄ_¥Dûý¢‚ë³ äÑ~:·ÛýúÓ`k{¸†»ñÏÆáçàzáÂßÅ ÝÜ7®5±Ê:x˜ü›ñ’¼x{é³Õ¿ÏyyrÞ«f§ ‘yñì¤9ݨn¡“§~Un ÎŽ^G wäò÷·òJóhß½åžý¼Û¶pùsòêãMWûFf»wàiòïÑ·}˜~c»rýñw›ºÿÓÀÛÜóà{"éokåj[™–Ðü;GÅcEí]œQÖC¶Q±±Îb¡•YâsŒ¿¾ayèU¡qq!“"ב'SàªßèPŠæÐ±´ô#¶·Ì­g>Ùö·Áµºeñµ­%ƒî ½ý‹´šŒ21©Hz³…²ÓÊÊjzþ1ÉŠ×ZâÔõÙ$("&âVöl¹–8윢ÖÙ+Ç$ºcWž[# “”˜ßÆËÍäã¥òî’QXCT²Ÿåþl¬Ât+Ê»«QæF+ó«ÈKÑèɧ#]&aýé©ì)hÔù,’hÕ%T˜õË ûâLûËæfÞ"¿ÓS¹› újøe×îdÚ¢=ÉöÎGàµ;?Dÿ*}q|òÊ»"ü+Ì夒ƒòªä³$ÝSúе*¾Eç2ŽºäŸM ²ÒàB)=QÁ|ÅÌG´T47R¿1ÐS²©7áeÙºýåå|ˆ)Æ7ÇPm™Ú˜”Wg2ªê±¹uWù”5øXv…S­sS‡¡_Ó%œ¢“3h‘• ŒOËÊô¨¹©öª&ÇÆ¸Ç¤ÅE!˜ÉId£•e_×rð¬ hQ§jf ™¡äÛæŸ–)昤éå˜[£3U¤ò­m¨'ÓÞc!jª#JÌJ61›ÉFÔÆ qOŽxËQ´°ž‰ˆ§·å‡ (U‡Ô)Õu¹ÄÑM(h¹yÚDv!…OŽQXÈI>²Â#.¤Ô¶©GŸiE̤§Ë/O22£ÂƱ&Åe'Þ#QÙÒgÝRQcÔ#زÍô¯M(è‰Ç³)+F Y8ú‡šŒ™œŽsOœÎL®že_Í4ÒNäò9<”äs$ªæåN¨Ë<¡£#! ™PS„f^W„f\;R ­³b8“Aáxe§Í{y~hÆø·&%%ÓÁŸJu¸ ÍûÏ!UÊë|÷ Š6š+ª³îpÿk³2‹©öŒ ·foåØšŸèyÙ¼(=©ñÒÚÅ<;‡žÞÊf_¯4­ê¥®jÑÿ6^2>Çc“óÝösþ­gÒFJ-’¥XèèXüžª=þ«Ç¡žË͵¤J)*²r·ÙçÌ¢(4ц¿Að¢ÆcWúѸLýjd´é-µœŒ/A©s¼‡ÿ ‹Ä-wOÇôؾxA^RÖyFÒo‡}ÑVµå×åô©¾«„Å·Q:…e5CûQ„™¼ºnTžEèTà®?Ÿ/·âæÐ¶%gS=sï{|Þßc­Öb}z”9ÞZtU3”þs ÔYÝÓ_Cì¥W- ”C‹Œqéû1°0í“ùöÍ£'û¡N½Ñ°ÜÿÓyWøwK†O'†Ž'Ôwgo¼æx½¨å|dw vÞÛáè‚Ò]¼ö5‡x‰kÔº½ƒ4>2êç Džؤc½œ{UÞF^æ² ™o%­êéNú\ç˜/yµR3Nͺ.~TØkÆ’UòãÄ–ïSò³ýå9ý_{ßéÕ ééj«ª7¿Š¦ZsvÕ¿¾ëª§3R6çü,ÉýqV=øœ~„ص£r§·ª¶Ë\Ä8o ßS%YÀÐ6)Mbævü˜½‚Ž•ë|¾½†ÂYXŒ1Ï¢—§Úpi%é4Oœ›µì}&OÛéý¿ÀÐÛ(f®}çü·ZÊ»Äg­M‚ò*_NiêÙEóÁ¾vÿpyq×ÊÔðJ,ÎçËÒï`cþy©ëO1ñŸêcTy³«mž£®[·xÆòÿÝmúKm]¶yÙç<úLÆEù[ºõ)Y]þ•ô{nÎ WwEÏçKj¦í¾^LÅ?¿“uÖ½Õ_oï½ ¶¬|Æ—Å™›OîU«í)¬ïÆL&;äw9ol=fÿÌÆáh¸pÿSnV ètß™"ݯº÷•2^ÿŽÿìvÖ›BÙ¶wÓ¨Qä–OÌJ§õ·ª,½VæI*Øû¿Wži3ŸOÁÛÓ^cr•Mý&í\dfz4ü·4äìÇœáx¾Öè¶Eöß'—¯[ƒ÷Yµu?4”›GLÍ*ú>†ê™>üß{A•ÜÕÞ™‹·9F·%»y‘Ùé;lŒLZ„møÜžûTjÖ" n>Në%É™ºÎÁÕr´ø¶Fþæ&äuJÍ,ÙÈZiŸ®×û~­)Þ¤ù̇þMo®{U¢ìRžÞ:'ô*~ ïFóW²ÚÍ=÷óz´ÎG¶íó÷äÉq¡;.ÚLÒ>†5âÅ–”wêyÜÅÿåjwö{¨¸¤ëÔíç¼9Õ§hýrî¡Q–e¯m¦® ”µ\>öP´¯¼{~ë®¶w¹ÆëÜ·ð?”š¤oŠêuñÕǪ“ÀÏ#Z¡ª5¼FQîW8w¸½%<¤$ÞŸ]•¨¤›{Qõ°²iߨwðvIpô_ Ÿõ´=®?ö—+ájÔò'!Ñéÿ5PÂ?Oñ÷w]¨à©âÛÆ»cýÕæ¾-Ìù¬esºV“ßçÔ¬Ž‰5êfw»ÂýÄîÿ;/Ë÷_ ·C]ÖÆF¶ÞÃß‚:áÓÒÃ+åm3ŽÉ Ýê¼Al`ñçOÅ,ì«q¹º¾rƒÄûÌÂ9P~§.몯É}в@Ëw³k‰¬öo3vóŸ:­ª1kVÊ»?¥ìí¿Äd‹J­ŠbPÞ›¯…¨|Ž·?¶/^GçëÎB6{••üº×œ6…–ŽZ5CNV~Ië WçeZrtÁÊò|ÙTnÕn‡™ÝínQ*Ž9§˜ÕUNO~˜Ý}M÷½Ú7.è¡ñ^WŠHÄÈØí:)œê#}_—-§´ Š¼±Š´à+Yõµ–ß[¸²Àn£xPâpyWoª“é³Í¿z<¦ž3Uôмÿ ÒqòЛ ÷vÍö™Žz¢G#¡;‹ô®ÆÎÿ3 œL\ôÿ³[.+GóOâÓ¬mñ`¢üÓþîïQ»Ö„õ›•AïëbK§STßZÍfuÿíÆóiGëÅÚ.ëe·§²Rë©›Ú(¥cû?µ»Ö}úÇÉ#šûÐkœ¾ùœùt·mÝ5Þ4üÍ#>§Eëoyí´ý´¾Ï®-·¾ý´°ç U÷~ÝÞôýw›¾ßñçó‘Ü=êz‘ÝžIûA@ûµneF¡¦ôÕQ±¹æ0ïLñÖ¾lêÌe¼/\+÷(ÿþ{&®õT~Ý;ËËýOÉ2ö’Hq÷K8SU²”Ì\»g6›¶;ú›â±¸ÖÃÆèÝéwwÙÎÎfWZÙ\´Ñ2öt5˜º¿Ë‘£ßFk3Ë:ÆÍÄêÏÓÉ-ET³T‹*Ÿštemoü¶Ú»óú3 ˜ºXXšh¶~ ÂmF*æ‚}¦¡ÃO#®î,3}­ 6-~“£L~†"INÚÛî×ô˜Þô-&›P­ÖúX¾NÍ»«ÅGöï¯k&Ê{á1¶Ó;ý÷SÚTïíú?NWÉÒs¼»ê(Ró}Ê9ŸT{Ö‡ÿ¬Æ­Óâ‹ð1B0칌ºÃõ¿ÞiÚCY»ÿJëþè(é¯òhOöꬖÿrgp5þw{fÍñòsoh·¿_ÃçÞ­ûµ²êfÁE3­–¤ÛôƒÍ²qÛØQÌ%–Ó8ôŸÓúý;¯tìÕ˜àUa¾1Òz-¶2¯>Kkʉ'Ü…Ó{íM÷ Ý =0³IË|—ÀÉu¶_£jnŸ*Ëm|™ôä\6ÜËUa¤À)ÄâVVûo;þæ4Ù¼ïâÚÞ¡ÇIýùî*®7κîTôM¯g¾j}o~V÷µáß~<û¤Ž3γ²­ÐÀ9âÒ8-ÃOSI"ìè™ZûÓ¶ö·§/WëO÷zÇ´Rÿ.j²Sqäý'¸ãU v_ÝÈÃéÍk.çbÿ SrÛ8˜-ö#èÉï· ô¾ Ž¡Ý¬]mŽ÷’Zé ß®Ósãä^;PüúZÜÌÓ e¿YWƒýa(–úýÙ·íÊ~xñqßÍÓOj£Kƒ§ïÏY3)¢cE¤¬žDËè0·Qžîî‹jšùý`ï«7ÖMßéÏC9Ο¦—VùgÐC·ÉxS¼lp7žOe+ŠÍƒfX¹.Œ×Æ]ÝävœÃÛÿÏ‹R».1Þhm.7À“gÜ:%OÑe>…¥{cf•½(ýÖ{ð™ŸOuïóQ-Ê|R޾ã²d­wÝG%>ñ6-C€”†¯~ü«åöói×Þ[R.šÒô§*t›Ü ^ûüÚÝÃ6÷‡åµý ·†nóRsb¸ŠZ¤ªJT¨…(”„¥ˆµVöÉvðq¼TÚVÕrÞrgQõsâu{ ¼‹hŽÂ±®sÍ7HY®x# i_‰¨[ú½bpªuØï+¿¡óg#ûIGîí\Ûåi?ÏSÿ÷ŸÈtt­ë+ܶÒJ”ë\c4£>ˆ’¥ÔzŸ»ð·qx³œÜzï+€™)¹ŽRâäj_kž|–ýÝŽ£øêîß®­üÖÊ/±Íß:ç¯;Gîým»L>7=²+„¿4ý”|ïNÄ73Íc×3—YæÏÍβnžïÙéôR>¦k³0+X1B{_“or=Ž«tOÃw¨ã(ìô[NgÖ²G1ptñub@âÒûåçïzÔ{šD·¶Úλ.Vñ¹Ãâ÷ºuxÒ44eî]ãCSLò?¯¿¯Wø*"ö©ž :ý®ýÊŸ÷´O—àÉuW³²šªïêMÏo+hùRÒ¸©PcäUÖí£:áéÞ´ÙÀ¤!Jb£ûÜä|{^y \Û_Cß„}ÚðldýÏ,¹» Û¤gƒ2î¿Gfej܈Mõ¶;û³ÇØ =Õ¡•«±ñ>.`iS·™&Ò´7ä2.dn¶}=xm¿™É­{ôækïôyËâöv Õßn•©™§z3sk++ª”­ž£T¢–ôã¿)K o*¦_Íu~ûŸ˜ð¥°Ö\§Ì°=Tqó|„rNÉƋÔÔô¥©w>C7¿óô˜_xflUávµ9úx¦»ò÷¿ܯÄâA ©UM ¥|/+î:ø¼w˜Of2{çÉá~i§Yr[§ùùL¸alÿùÿ°$I&ÙýáP!|¯ÆIA ! ÿ·uÁV.$ âÖ5b-ŠÅ%“1‚Ì„š@&jFLD2‚4ÆÆ#fTbƉ‘"dƒAŒ ¢!a$ÐC#C$DR†0BD‘¡„ Б±ŠšA,@„f(ÌFdÁ †J@ÅHbDИŒ˜JfC L2HÊDFDŒ°’“L$e$–m1¤ˆ4&0JR(CH! ÈTÌŠ„J`£lÌ B†‚È„¥¢(C4’D›H‰±13 I€E4ÃBb2d mfJ`Ø’Ã4 L¤f¦F@$,ÀM€‚`Ò%(˜£2š‚i˜R4¥5$A¦0¤ˆÌXÈ—Ìzº1(=»tR ñÞq;£ÅÑ7ÐÚwB¹}¡ÇN÷yÏ]÷!k¯ÍïuÐÛÆøÕë¼ôúÏ:ç;PÐ'¥žÙÊÛ£š=÷^&Äð³B»ï~ÇZÛmînxhβ\IŒYç­i÷úÎ”Ùæ­¶£Þû+{ÉëÂg®xñ²Øny:㜥’¶ÒÖeg;qÏwÚëšã¿Ç~Þ|W8sÅô‡kƺ<ÎwØsuÖûó¿:ÎÞ+­ åiQÚÌ]jÍx§Ìç©ÆÜ‚ñhÞ¯#¡6ð¸Û½²|wÎ5]o8×=qÞøÏs1x[u¡¿KÁÒœ>¶¯½“ŽïyB¹â×UÇ:è<Ì‘c½WqÓ•½hŸŽ÷ç¦ûß¾FÕ®µž^¹çc¿Ž,óÝm%ð ï­n­øß~5â;ãiÁÚëd…qº×<.Ýñ·kÄß„Þõ׎·äã…ÁÛ·ªÜøs•¿68‡ªÐb¸\xã‚ês•¾XàoŽim<ó´®®÷Ö{²|MwÁ>;¯g‡ÜÜo[…+·]žëZïWß{Mû;ô%¬kjBVRë–¹:Ý1¯m§"¨ÖùÈÛ®éU7ßOk¨E¹fAO„1Ð"O¡(jZtÄU¥b0øÿ\Æ?v¾gBOê[ÅÀOäM2¶æž(ã‚£¶¡f–*Yú‰"´«D¨‹}<÷,‡>ð·¬ó{é¤{Tª*l#¾Ü(½÷ÒÉæßšš«Å¬s©¶¶Åd®Jè¨íSM†0M9™Sm‘Ïs éÈGß-fN™[i$¸õ ®’U,q±!T‘40ÆòâVLc²,³Ç4sW Ù*´ÝøÓ%´…E]©°òFÄ`ÆVtP_ß8šHÙj*Žªª j¢*é)í¾E`ðÆ)«¡ï¢–G!¨ÀˆvÉ–©Ï"‰dHD²‰ì¦œ…qZeãMÑW…x¡8ß /žérwÎúqŽË¥À×F¾{0UI¨ eà j|çÁøÛN„ „s— ŸA¬V71²ÅŽXì¸m®•ÈU2I±®úh¼%¸·F”b“¡‰#U C ‘è¨XðÊ!ÕWm•É$Ô)ul‹+0¦F s^Ùf=U ¸Mmç²ÒÍE…o=¸©1G!z/¤ó×1d(Uiß:g¡7T6yY‚©L2šq-V4Ó®êȪ= ­Í•uå™xBy£+]#«Œ¸â¨Ù–OdM×–3 ŽaBÂDÒEHä#*\%Ié€ÐŒÉAB÷Μ-ñ)öÔK*«Šy2Ôåc}5‰R™áä%§„Õ¨ËprkÁõªbKpÆIžÜ\ªgÂì0M@³_^™§²KÓ„Î…àkn„‘ÁÆiï¦ãNbžwÓ;±"“}Ž©èeˆB”ùHJHX±•îg *ÂxFj$Ã*Â@”©mj’Ê0£ª3§éŠx_I™$÷:9,°ƒIÏeíi)–úi4uQ…ÒH¹©‰’‹© ¨*è$T²kˆ›—g²ÅmNì|y&¿³§Úå¯..ÑÚû®šO ç£Í­â§ÕµãÒXŸÿšxÿÇ.óéÚ´iI½çvýýŽÂZ¢8lhÛO}_á‹Iß’«_Ãf_.Nò*÷t÷]¯k&Î'³ËÄÓØsG‡·[þ&YhÕ1ñ†)âϾáo‹­†(ÚÞju?<6å࣠,ÙÙÜØßímíï>æ‡oxe’ây®"©æ.þ~Céžóƒð8ͼ¼c_´ú¬<5(ر•émýÜØrkÙžç÷—8œ)œ<ªV%ݹ9¡ ݹ®4§­üŠÂ²?’+õ§ÜÕÃ|2åæ´éØâIÅ×^–çX¸û– uxroøÒHó\h]ÊÛNm«{–}‚5œn-’ik<4e‹S``F<­MÁ9kÞFrï¹E.ÓµÉ$ ¥9\³ÒÙnòCk&ØãåWÍqäŠÆfÕÄÛéWfüT¡X¤é?&ShÞ¨´(ªŠBq@A6ê)ŒQ ¬¢®hciG•7!CŒ§¼fn¹J’RãŽ\NÉ(DrʉÒêȬ1š‹ì¹•i™P³¨+3dÚ^q ²[­B²VçœíZ² Éì|û ‚Hã·ùy=ÓZ“±!ݸéz*šÆƒ;‹*KO΋M#XKàP䡽j•اäYÓœYšžÒu}΢"òvXãZœPáÓ” aZÖ]Üy¶nféªãŸ¿óü¯žnä‹w“êº:.×eݯ8ÎsŒŠöª‹ËÛñcÝêø„2¯!ÔëIѶÜä9ÃŵÆÓ¬Ù¾q6ìYÿ„Ñ=ªRG—¥Ûå<Öšoòt.ð„i™2NŠ'ݳ†'óŠ"_®%«îûO?êU±_wÍcÛx=«ŸŸñIóbêß]—¸GÙóz5ƒ0‘j+²êb)NaÌ(è5NŸ÷=|?Þ4+½›ÌŸ¦Þ˱Ë!›æüÜȆ_?Eç:ãyãº{pß&ñÜ5• z|"Ì»úÂU*É©«¶ ®‹uÒíɈÊ鵕ê‚Z$Ûr3¦cžÃ9žzl=ÏGáZ5RHòšáÑQOPóXóô¹¤Üí,¤»[â™è¥6Tv“ð&Ã!3·20½».³­dнÇ~±õMyÇè£mÛàÈ!e“0ÈáXדÌVpp4¡ D¥?DGåöžü$_ÑÇø~…õÓ–ÌK}ÖHôü´>YùèÇÏxmË\:¥l%£´žýÛwÚÛ| {³jK­š:ÝTk¤ñþÒüÀ˜únqü—K[®6å²ÍÔ¸‡µßœý-Jöqé¶/Jš/î—lÎê7²ß{æŸµÔæÂI$Cžb%hýyµp^É$ m $$$"IÖüÚÖÕʵW-­¶å[hÚÛbª­\¶µ®U­¶øæ«Uñ h µ $,€Â”Éç2T%ÈKÙLÄáuËœURä,Â̽.f`—²˜ôsé/EìwG§ä{“Înôóž8]›ØTˆ & %-ˆX † À$!2³¨5@+°­*™ÔØÐ¤Œl3”"U$t-B& áÈ©j^iÙD?°Ø•¨dC‘6!;Üë6É"ÊV²qT4ÕSQ— và˜B7†SˆO+–`¨è«SwÖ““R Ý‚)mÒ™PFx)ä‹wèV®ªÉ»¿»•VéFXX6ÞU ¬Ã‚‰Wû(&Jû ”£½Šf›'ï#Ù!+‡£kMÚg28g2¤#Zlf­ƒz¶¢•fg,: å§pU)DÛ’”µ2¾…Ã&µAÓz(öY~%)/h.üm,ÙMº“U3ª:£ƒ[>[ÛεSD# -œ&•¿ƒqcm"EilA‡!“ ÉÎW‘)þ“ýMž•<Ì¿÷÷¬ª{çÑšÍåhË:z §7Ú¤¹ô¯TÜÉÔ3;ëÑå½çO~7Þ+O#p½M·ï†s¥Þ«¾ºæÞÙåeïζ½kY\kª{dW«a»œv–t»<.{ìw;ßyžû»ç“É=qP?é:ëS,oǪ¤øæü†,y551]›']îûµúštHM 40, Lm²ëôXDmET6q›¸g¿ZúÇêê]j óä„„/B‰%$"‘B)z:Vöw߯…Ö(0i›Vµ‹öZ1T q%ÌqQ(H¢¢H’4¢L“×·©Vö^teèb¶$Çt€c$Å…E)”ål²ÑE@1wtNîîí$“YNã½f²!ET× ]ÍJ­õŠFÍUTX"w¿÷/Ní‘‚wnItéAÍÆyÉÛ»•ÝÒAÜtÝÛ‡y爗‡N:çqÜŽsœ¼òR¶0:ÌW^u9º>GþÞ‹²í9/WÞ0êo†¡3œ æÛÔ[q”>I@¥…I± |¯qØ!¥ZŸ O²)•Ö `Ú„Î8Á¤D$ Œ¸Æà4Š ’%‚ŽÄ.¦1¤ ÄLB¡DÐabnœI¦N :j½Úš¤-hƒD$˜Mã2¤„˜^dbu‡jœ‘fnž3rÂÍÛtÒ8¦ª›**Õ[&ž-7f"Ia°ƒV-Ü»Œ¡DZjHJuUŠ4 6ª[R¹FURšpiJ̸U*fÌ5@¥ E FZeš¦ØaE‡%3I'È +YœnkZö>>ÆÁÀîÉÃQNR/ ŒÁ©ÆCô„$„ e9­Åµ¡™B RšeSH±®»´îs³ÓÓ¼,ÏZïTë €ˆ’DL‰@‰Á„#†‘'$nè;¼ï<ë¼\òãÏÞsŒD’Š-2EB1¢eÁ È)¤k¦PAKEa[Ã(‚Ñ)4Ð%Ò P%¶HI„!'rE•j©º‹£{yjÃÄÈgž™]-Å=çuìõ¯/—ë^¥_¿rH.îÒ åà dL†‰7w1\ÜåÓœçdÆ7vè"míÙ’kb†‰§¥¤Ab¨Š',ë¼îPI²d$¤’‘%ŒQµ4SM IQDµõ-›OH&É­$¸ã¾Ã~…$ÇÄÍÜÖjï.dg«öÄ)›Ê”© Û-ƒ•t nRŽ©L”ÑT¨d¦Žâ0â!*yÜK»”¤’5BªÒ­k­[º¸ˆo¨±AGFìè2ÈL'=é0Úó;9ÒI'ïú÷½°}¿&ˆ!E ~A¶ðOðkÐuÿ:üÉòûOÓÅ$ Ð’pa$€H¡ $VÕÚ5¢ÛlmTUhÕ¶ÔjµѪÑmµFµEVÆÕ´m¶±ZÛ-mVŠÕ¨«Tj±­ccV[QXµE[QlU¨ÛjŵbÖ,mŠÚ+Qh¶Ñm‹lTk¶Åm‹¨µ¨µª,mE‹AU±j*,UEµ¶ŠÛÔV5QQ‹F6ÑXÅQmŒj£ŵFØÚ-Œm±£Fض6ŶÆQm±lj5Ek5£Eh+h±ª‹Z1TQ¶ ÄU‹`#lQ‹F-¢ÑT¬[2™b "’Ñ´h±hØÅ¢-£bØÑl…©5¤´QQÈ–¢„ª ‰+ˆÆÆ",jƲmXTTj÷K\ÔkE°jÁ&-ƒE¬mж-cZ¢Ø­cmV5UóE­rª-[lmT[Ej"*¢Š$PF#²ïKúý>>»gýñ˜îÜfýïœ>¿œ¸ýÿ΂ï7ú½·‡»é=VÏ¢å²äù2~’,Xï꣉ͥ†èŽœoÃÒ±¢r(]Œá@ݶg®{W¨>”;ÄNY}Z•­†Ê†ò‹5Ò¶?‘±!D‚ !ÞF ¨ ø€eßbœÂh†¢j”¦xµˆ¨—yb¥X8ó \i™N¾Ú]MCòŒ>ŽºV¦$±—UJYÝ y€à}Ïx=˘M_»ð€’üþ!dÏÑ­õ’Í“Úúo~¶âE!»_½[ÀªóyààÌ_j5d¡dÈÀ¯íG«v|"¥#Òz«¯í¬šjxó5ðÆü„þ2êZe}Â* Û¬¢8·OŒóÛx@¼„«ÉÝ(ÖLÁ‚Ç÷ 5`(¢ %ë®nç.Ü»§&ŽîéÜîç.Ôt[ºtîã»»•×ÍÇ#¤9Ó»‹³Ž®µm¯¸«TY66Ú £Dj1›&Q’&˜Ì$¾#·$!”c&H¡¢ Ð(•,I¤1²_Äüï³÷)So šyôôO[#Ñ↻û’o¸•H q>D9=šK‰Ñm.·þÿ_• ‡ðÿJø[è@ßAuÄ»ÝÓ¯…å–4Ÿã‹àÆ"ø=ßeª?“~/YáDš8ÙùH›¹®m m0ÆTR€wu¥Ñ€Gó^qrW¬¸F˜go¬™ß†Ñ¥qçÅØÝUn*†Hšâ’ hµ—½¼–<èºú”ÈPµiEÄqûÏGÄßGø_ÏëÚS#f`B!E‚Q¬m`¨É­&°A”M±±@@‚ Ư+ðø‚Цƒ¾pe=×Jº³óJ¥ún“LÀ)‚¹ðܘ)<¿vßÜvž²“±ô¤š8cDŸ#ƒ¥è»—ÀøøÀ;â«`Oß €X+ìXh~å·L”‰ÇÓÄ•øÄL¹fVQ¶ÿUTÅ£QýSZû Ȫ@<ì•mW_¶Ë±(!»›ÖÙAþ—~Mqø\×ü;óq|½ïõÝÙ\›ëíËÚøŠXÀzÆóŸݽŠg/Ì|¤ƒ^îýQ}6Voq'% b¢–’ï,ÊÆ©n²;!I6&½~Üï½î¦¦7Oñ-Ǽêx›þ³ãtŸ7¦þvì€}5÷;Üóq Èn~¯7À¢\ˆèˆ4攋féúw.ý9Säò©¨37^¾ò÷¬¤±ÆÑøë¦êiÍð÷i%žõŽÊgÓ©àœ’˜‘“¸¸Ñ"Jà ²‹Ÿ½]kŒ®–¹GŸÌÏœv%_M㈛ï«ÎgÊœèºD*kçMÓ¢ÿI§˜ìF†×› ËªÛ˜9ˆ—½}©t8ö¡-iÊlô¡|ÇñÒ"&û;9_:ZyÃÞÝ彇/“ jr[ziHn俇ÂoÕ*k¼‡ö`q;zªð×ÂeoÊ©aégµ §‰_\þ¼·Ât‘ȵŒ€·`ÀGó$2E'³^ó–jÿf¸gàpTU¨(ü ±ç@BÎÕñC5*s¸=ä-ƒI@•#BEQ„Š (°`I ÏYXÿÕ¯BýÆý3t7|ôâƒYbj;!|.´¬IÎ^;ÈÔjà,ÒjÌÔãÀ­Ìê6q|~&òSé.³­Óð.åØÔ5<;•™Šô~#*-¨úR ÖÍŒn°ìdyM;ãÀ ƒ‰c$šýx¹Ò¬ß)ÍTª@D7$b5ý©é½ ªDѲší.2p™ÒÕÖö'G©샵÷}ŸÁþ®¸ôj7ÑþNèçO|„úÕ7¶é}â1ýqBôuÿ#³ž9ù¿ïJYÍû/Çü?¹:Aä½WÔGs£Ç×Ð}Ï`íyõµnÔ³[X•í2E|ƒæ_'I횣6 çÀ3ý#•€¡¾¶Á£›‹µ?½õÈ&9ÞΔÒ%MÃ3¾n ì—ò¶‰†yPnDÀù´Œ™×%!Ñ ]HXdÚõÛè!]áDH$e@† FRR1LÒÌ, 4‰€‚$È™J¬÷Û4ñ¥Ó.¥lŸŽGµ?Kín¨³§Á¨ýÁ8ponŠ%kƒ{Y‹•¤BüÙOÌ€ß ‹÷j#ü¶ˆ×3Pò£ÉìÄ=š ž‚Åq¥LǶà˜1ÏødÀSˆñ“‘• ?]tÝ·íªô3/Èɳt÷Ëœ Pj‘Æ©0âÂäíŒÏPêÃg.pŽòÄ¿‚—S~Á÷ãE¿?à^Ý[¥×Íï€9îÛˆ›? ó8óó†êÅhñµ·Ý|9õü¥Ù4•¸d‘>Càë‚F0À”ƒMD‚æí0XÃûin± Æoì§ç`{¹˜šž'Õw”œÏQšÀÆ‚`@ '¾¹u¢ À>ýpoØýNv†ë?ñMuY¤Ãòïm Ùaþì%R[ÝüÇ}ˆaGŸâDÕ¡\ iþnÜ#=º¸D© ؈£AX¶Ø¶­wÈ~{í>z•ñžóZârÝ?‡¸ä‘ÕCóÊ/=½¬>b¹”°DÓiÔ>A;büóTßßÞ³ þî÷%sò]³²ÛW39òе|}Nï+ƒ‹#q„•L‘ È9«F8Ý“-ËÃ7Yœ–YvÍêr¯ßiÇ\¿B<ô—š½¶O专ù#3ÄpiœûR³JVÆ@GóIwÓu·½ôw6Úf„°« ˆ}`"Cƒ¤A9C¸îm“•üEl-[Føè‡ÍkÑîýrìoÔ]×sX÷ûwø>gŒûœÏ删*Á`°DEðzGÞç>ÈO߆—¥“´ï <“¾ê/êì)c>ÎSötVÔÌ^¤ô΂mÅ¥XÈâäKŽ6tt¶¥^!ús,Âèed9w{‹¿çOÁÕ‡³ww«N~å~CôË—>¥ çt°íª£«]bj ÖÔ³%ÃÒÙÀhÆBŒ\ß¡ÒÓ¥×3s žòÒ&˜©+ïý2Ú»¨†ê#ݽR#"Üçr:cžáØòÊ–@!i¼Ö&Ïñy:Š…ÕtÛ µ¾ËUíËjzRÌu P$kXNÙž /üÑ飊çÏHzHû$Ÿ’k«åŠñ¤ª×H,¦µü¶7ÕO o[ë>Cy‡uGöÙh.ÔòÒ<Û3cïèÿ¼ó{Ím´Ò@8x¸ŒïJ6«}jßF£« ÂÖ+Å;j霵Дã’û õkõi@œ“‹Â´åÜÀ¤Óì“RdiãìhZ辯yùÕw"ri€@"…Hª´j-€å÷¯EÛ7À!§‚¸m’$ƸÁ¨ÒÞ7ÖŠŽ:~†2åƒJ½ßÜçž™š‹Q寢ˆJ\¯Rt×ë"ÈJ¡•®Qèbòòv«aé0'S·"…>!h£è@S‡$ûÿF«>ÿçéjÓµ>¾~j?9õwù|Ë«Æ8ûÊ'Ø¿ð~Si»^O#ã·€>çÿR¶":î« jB& 3=¹~9lퟬÛçeò}ÖU1ˆœpO ­OÂ[¤˜­Lžu„V°Š´²²f¶Ù%Æ8êç×Û´¨BGé'˜p¥QÍL, óŠÚvó]×GmÄÞ=æ’É…í®¯Þ´É¥@¶x!ëê9ªäÔƒ§RF=!³Åqã‘ñÜ×^FØ$íÙ6wbãT²jÖQ¾o±V¿Ý˜Õ~,,¤^ÅXÔ;Vþøö7.tðä|V0;SZFöZ ãNz»L7¨“¦².ò± Éè¤?8(úcn~(äèÿ7 Š6Q‘´B¦e3dòœ[3zìñûd(“S1óŸÎŽšÞ-"Ôh9êi(ÎQS¾ù"S^Uzb½TXĦüUX‘ã›÷qÀÊŸöÿa^fRÿÜ ]Ê)ë×Ö¡øÔA—š¸´EÂê`sü9[I¯%«ôkïïפ°w´k@ R;8ÞRº:\¬’å+,F{·Mù¶ÿ_aã¼ÏŽéyýO}“,<Ê=MPŒEb*ŒQ# R l˜¨Õ£`e/6ˆO`rMUÔî 5¹™7 ÒeاÖÏúC ùí{ÎÑV X®B& ÎÄÉl ”ÿjM swU)R; 6ç°EógˇÓoG5 mù߇7ŽÜ‰=[¹Ú8§¨®Á"ó»Or>õ ÁÂWd•‹ÒAˈ5—Šðôx!ÊóOʽ™% Â‹Éøïí¦IewÙ#'yÜÚ(kwÉŸ¾RJ&^Þf&}1\«‰€,™;ùÍœP@,ˆIÚ‡W§žs^1ûNà fu½·¼çº_Gñ7}t÷8Ñp ±<ýl¸‰öm,ÜžŒe'ýžÄ¾è—¸¹=Ðå«möâ $FHý©~_ǃ¡³Þeæd¾’*Îé+mµ=ç‰Q2åÏL°èò­î 3§Mî±Y Ͳ’‡{÷ÎìÝ6«Ì¯~æ…{튋T¤gƒ«^„e“gÊ2s¥ãBQ‡ZÖQ>§›¶ý>ľ4K†y¢  I”-èoŽR€O›ÓcÞïö¿åÖ÷Žù‘ôŒqö¿é âFJÊÝ”îO'©=ÑŽ"^E~Ђ@rìÊÈø žæœÔ²7Ñ0Q‡¬¡¶Õë%NÚ÷r[éùðÞaµá©IE˜„øæe6Jü„„çÍi4¯t–ãß]ªJZT·QÃ,«„Ì-ùêÒø¼|ÍÒ×.jœ”ëÂï›÷¥ë/ØÞE©˜7›gKoètuSð½æŸ” „TA"¨‚‚ÄynÙô|§í]øö=7Õó~?£ìûŸ—ü/ÕŸûèÇ‹Ÿ#ºß-Ó©þDÀâ¨#ZöóÙì¶½MU@´ wJR.mŽøõ?YHWÏ®*±,¯RRª½á¡Òô ùœÜ7TõTºo7dð.ü":D® ?,ôg¢¨• $Ž‹ÝVU•MÙˆL Ú_™ÏÖÿuîF¨9!ÅE_f@éÛÃÝÁëY4„+¼~#Št¦r¢á£f,Ào&ÁcÈ›Ë`ÑU'Áz0´œ#àŠ!|?Àá¬æƒÉ_‰—´N ŠŽ¥$'V0¯i€¼B€‰‚-G]mR†Yƒ)_j—b™1à>™i¯,vwh Û²Ðó?²vÆÂýs{@r/Ìñï²t¹˜¿Ò5Hže2k®áLàÌù­dæ“7ûþj¿Zñ‘sî>«g`З2ª²i‹©WHE<‚ ‚^="‚îÖ½îÒ{yÜ×­¹1p=¯ùåž×hšîòZ{6îJ²/Z_?8zƒ“Àؼÿ¡û$õ²Ìˆœ„ ùV˜)½?ú¼y\^\§c£Ì+ƒ·Yó5!œœ°ò@°$`k@"òÀŸkžÒ÷’; ®TóÛ´OÈÏYAÎÈò©ÿŽÑáóVˆè`BÈ‹¯®ñâ,ílÖ¡1©'#S¬j9SɸšaáªDÝɼNüPú<ìè"u[´‡Â¤môf WÜÒuBŽQŒà16Ô–øÀC|!#è,λ\h„íÊc[¦q^i¤n†Ùd‰LÀã¶ú§&¹{“÷kám_s Ÿ¡³žl™Cä÷ð³·Œ¬*h§T•6n‰ªgŽ*ÖÇ*#ɧy½ã׽ǺõÝ÷ãýÿÁü´ëR®è{ QÚw)1 ý‘¼ŒþÈÝÖwýH®¦QµYÏf‡„4¢°Ì^þ>u7Zûi¾™ýW‡*ùÁÿ<)>M’PPÛ‚" ölýÚʯéU|¯lÿïÉr8hgw5AÖ¶Õâ!¦…=è ¹mߌÈá ÑJX,ï»™¾¶ØÿãÓÛ“ý^‡Ñ~Y™V"ŠÅ‚ŠNïˆÝ{ÿ€÷>›1ö:‹Š`âaOyP¨KdükyŽç7BÒ¤ @‡÷`gÝfž¹ržÛNžÒ{º:ÄÇñ~¿·Y§9›nîl°èË£iõ!³:éÚÔZsÑè'b=ÓÛärcR·ÆÊOj1»ÚàÃ.›ÂÙ’ÒÏdùÃÊ‹ÉüÉ–*"EB¿+áÑBeô™^Žþ#ËÅF×}zÆ}˜1Ød›%¸hÌXZôG ›~OæA°Íj¯¹‡KÏ]5­ {Qžüæ¾·Žÿôg¹'5ÄC àù¼´þÎ’æÂñÃÖéäÚ}³¤Ú/z?½ö&Â;ýÄ¿É:ù¾ÌÜ5+„÷©ïf»J™ì¯ã¿¼ü x}¹gÄj¾'r™!m6÷Z{Ç’õ¨Õw½I¹`üç¿I»Éˆßαýáú®Q1Ç<ò&Ü?ßšª b¿g£pËUŒ³vý¶>=±}´Þe(>¥?ÅyX€¹ÄËå8Þ,j2z)Cƒ(*Þ’˜Zu_~ÉËÇ›@3ùȧ“õ‚ùinb훹AÔDàœu!º£ ¢3® Û·Jx÷±ï'”9+Á7¥+@ppÿÄ2á*ÜŽO÷ȦD8t×"°ûÇÛ‘wx­<(Ô»†çðeLÝÜóÔžX`r¦Ö $PËVÑÎÙb±_ôx ¼›Ÿµ,æ±'ov`åñ‰ÕËîÏnÝ”ÝRÒìhsïÁ†…Öh 1´AÄQI{‡ú#÷ß(’¨dß6UÀ9Ž7åל0wÞ`æ ëÝ–0ÅMo;ûE ÆDMÕ|‡MQÁD ƒ3œF&¹×O,3fci 9! á ©€´›Ñœ÷Üé ´EvM®ÍaØ·>²gîK“ôGæc_O `0}´×c韯:O_"p™¦{/ J âO_’uͤãdêN¹ý:¿ ã/7¦;xÃÍÛ>ËoØzc Ó¦3˜"eƸ{ ÔøŒ»>käÜ 'øý6–ËLƒÿV”FÓ›â…ï"ûj‹aÀîïtIŒ.ˆ¯X[Tµ9–±P²[-{|ªÜý¥m^ãÝæ?`.BÖáÁš63¯°Œ[ñR^¹ù×HÑH4ÛÈÍ ŒBæø8yd?2^/£A&F¶13VxMm A Ô¾rcž²å|_Á·pdiokM÷t—MC„ôBœæn7›lShG9ÐUôÅTS/P_/‘_[\îÈõÒCÃpðÁceo‹céÇó°ºOùßQi[ƒÔOØÝB³tfN©YšúÒ9ÌÞ‡¤Ž2yغ´‘ÄYø]{Ômœ~$… "S¬øjô>†ëø<ì¯ /¿‹+ßòÓ·ø=·Íï¼Ðºÿ®M~g[ÝNª¦8]Â÷Õñô£çm} h6]—*!-µ m¼?¥VYq{&ªõ™êúÌV^ém}ô|à-)žmDôÚ¥-W7¦a"ò2,ÏVûã…øv~ˆ6‚é«× ¼;8\NÝUÇeSýp‰j¼^½J4ì¾[‡NÛ ºØÙ}s@•b!žÅ¨}=&Ÿoן¼†B늞Kt[fHC³råVxªiö$~êžPÒÕ¸\™»¼ÇBwWÝÊá´Už\æ ˜ð‹{ŽïoE%¦»gã1Kµ3°à/4€xšûFÄÇßùM4~‡ž3=Þ0Ù†ƒEoˇÀàw=ÏŠ2† ­çÊ×'B%».²–ÿ:å/ÉßZ¡w¯¹ÞýJ Lü˦pc3îEÔ’fÚ ˆTâ¡yìɈoÊJŽš‘ŒÓZl ~¯J¥÷n19ªæ…qiõñÝ»†žaªêf®½GqAL•¥Y5±.’à# <åV÷®ÊìÖŸ]E‚gõn.ûmù㦸ñb.&8 ‰™fÿbêT³Ñ2»[Ï7`ù6cÍñ*IøÎK-4àvÁÍó¡îè«B΀@E„Œ¾4Çjšƒa÷EψðJú^ŸØ"õ+ÜϾÓ[êì°{.°}Ú'_†½&¡Å]!ã)ê¶©QCñsô ÉÀÔ$ŸZx¡C8HAÝÚøÏJ‰ßU ¢™¡{Ãú™„<š,¯CÍ™]= vªštc± ¿ˆ(nd° ŸçÝmhâ –¦Å€Tl`@4éê[nñǵ!öz¨ÎÖ/!ÚpøÃ$AêãædÆGzàg ûñ aÑ 89@8q©Æ ÐÒ -SCt…¨q…|ÕÖVûè‘dŽÊò À6ÃR ö!´oÒ奸‡³I¢_.x®ü#rf‚ps=õjÇÐs ³Û;Uþ=RUØÍýÞ'2K\ÕIêÆQÕLÔiyÜ®cì 2‚Ï;AÛþÇ*ð‚#Ób_{FYdÓ„agŽ4vÝ%EØ2]îB·Øk@E;è¦3 í*´xº ¶_yë–Þ$Ä¢áÜK†F┥|Òä@eôù›<Ö‰B/ —ɺP7Ýx¤qÈk=g?h¾­£e+鳞²¹ø¤Š¸nÔr%C GIÓ‡økõ½p#ó´°™X_ò¼œµçÜOÏóö¾ÎŒo^÷™pŽ]R9lõKª§»3»tÑò8µþ…Øì³2×¾¾7×þ7WIs,F¹í’Å\ûå1Ûz†ü~6j)‰ÃCPPÄñ¯½AŠ±æº‚"s\Œ™ÁðišBëÖáÅ%–ž6ÏVƒCìŽWÿ6º_Ž'Êæ_¸DD/uÿom:ªóvéLã0Wü¸Z/ ]¾¾ó/»§<„$âP¬n¯Ïµ*–Ý! ÉokÂÿ ‹ÀKNˆNZÓ–oHŽt,8 ~_ÏÜ÷~ ÖÄ¥)D‚P(¤(…bfÕH¨Äb QãlKÙk¬Zç»NûY«ÂSŠäôÞsoà73¶tòt–ÃØ^M2­½ç´»<­2b˜%RASŠƒ=$0¼ï&@ÉÿÖ{ùã™ß~¶÷«§á ïÊ`f:Gd†ga¾_ûxîA Õ—‘˜éÝOù-•Àn¢óvÁôô®AY1;d­Òý¸Úm¼ˆ?h˜{½´Žöyy)ÍÎÞ¨Ùו',ṖLal²”Äa‹µéì,¼¹J÷ŠíDé‹ü«•Á²pQ4z¡_O6ÝäÈØÉFžÕUr†®#±mýˆõ~5ÌŸMò2¶×꘣çð©;¨•²ÐrñºœŠjLq˜³ ™"9=‰q_÷O´yDy‚?_m]³H8Óh^Ì'>Ü©b„/Îü"÷çjÏåyjcÞÛG¦ï†l8MiŽáucseˆ;åŸåË £.ýMîJï,ä ˆ^‰Ê1³ÆèdOm£«èD:±˜°to~Î^ùIJú:ѯû1yd8ˆ©Ó™àÒÀ1÷su˜QÿT˜—*mC—ï¦ÙÀÞÞKs8)Mqj<}sÕdŠ\/«Ï{ž˜Ši‘ô#±õjû½öf`à”›hVÓ\ùúúßgÝJÝL5¿Vy$´?«¬m=477Ÿã%-×o6;#á’Ó¿g±nJÖ8Úéý t· ×%®AêÄ´Z„ŸìwñkÐF hÃdãs¶}æ–?ÇöÖ寥]„‘PìérÀþ[÷ùŒÒµS§ùOízaÀécÿøVéûÐÜž9ÿ4ßR‰:ºÍÊ{9ÃÆh‰Ê’ýz-{ÜòßQ'°2­¸9^…~b±öD«ˆ›äAߨ9‰‘]føåÌæ`B:ä{ÎY)MoMßbõŒ®¿òü× ÇÍÉá\p¸Þ;ï¯_WÏJóq¬‡T •-è@”2,o2ñ\ö„.‚á#Ëά¿*÷œoq'Í,cÄrLÚ|ãüæÿchlâ“X±¤ :6ŒlHHMˆÿ'ki×bŠl-ö?Žùïg˜÷k4ýŸòF"SÚ´ÄbºªðŽå¦]nQ³`«ÚwNï}™;3áûvêJwØO71ªZÛʳ¸ŽN÷'­þ}\:v¿õísWÕ,Ç»éÕÕy^Z*/“vUˆ#y¼—œÆIš»"ÇT,L¥0L(L(À0€¦ !†=R|R„¢Ë‡‘6ùÍþMfëZ |.…ç7¡­=®}QÚˆÛh’Û;º ¤×ÌÖß[+kÚñÀ¥;ò9Fോ:ÕKâ4CÎB¥ð8°[ÓÌl‚¶óÞzsÜC30•‹0^ãºf«Ø~Gí륺‹á¸:¢e4˜¶3Âï)Ú¨Úåy-Ksá¢]4Í8ï”·´¥¨J¦ÇaW"ö.¨aú,¸þ&Fî£MÐC7yäÓYœx-œ®¦m»ˆT]øÉv=ÜÃ{G£S8Ðt‹ÙQN•øÏó¢¤:¡³¦—¥¿Rw«Ã¼F¨µO2oU÷äl¿Ïç4)³!ÖänÎfZâ$«óLLiðûG¢‘n*·I)I#©\ÏöÏlo52O¯š+µ óG ×§ẄQs8ªüSÉë[~ÃUÑuiàSR³ÅTË[¼lêãóö“àDð˜ñÙ×îj,—|2QÒ®:ä¢y|ážÜKL%GyRj…¼¹A‰ª()Œëv/Ö®Jï® J‹jÎÝÕ$ˆ‡BžYïSq­£Ÿ³òxæØa^œö¡whmù{ç q‘Då½Ä­ôÉ»ïÅ8ž‚@•¢³»këGð;ËlB€iq3MàÊ:¸Cà»ÖßwÙÒÄÊÌK¯¢Óâÿ%ÍÚÓêÓœâLºykÿíB¿K%윰p¹èáSy*·“3"dYåO’¦Ú˜û4“Õš³½ˆ¥®†Ž‚üå|r†Öö¯ì,ê{›_wá¥}&±K¬Ù“èt¥ZŽß0ÉBå*P¥jðÖ˜³»TY¾Ì×YÆÌí—QÚʆJKKyHX‹¬^ÿGþ3'c1èãbÌ‹šÛì8ß—¥Š9 ¦.L~tü -壙®i‚-Õò~¹¶«©Æ9TæÉîQð(÷it—‰.ç³M¹›—Ž~Ó……·Èò¤¡ïùMO–z㽞öu­ùE•&»f¬V ÷ú´FTO¢=ûó”yzßCæw®ñÿqŸjy ²›æ/#ë1Ú±Mø6ÌÛ¢§8d4ŽVžÝö«ÚŽÄíµœÎ}fâË+ºÇ³¾Œ‰z÷ǶêÙ¾µ!¿»ÀÜ,UÙl˜ÐNÔl¾ »?/Xígƒ#£èÕñã¾»'*Ún 7ûHÄÃó²·]ó²D+äìoAÍÝ[ØýH)nΪ#°¸ÓиMØGT{•x«î0Þ =#²æÇÇ“sˆÈ/*Ë0ê•3ŒûYž¾ž"§ éI¶ì_ëß1ô;4I„/ë¶y&AYCQ¿tDD‡-?±¦q‡ã²[å§îáö稰k:Iye$êœoò³Êùd:ù"WÇ(Cý J_¶4(5^þ>»Ú\Y¿ÚwXÇYtn}T9py¾¦îùÄÜŽœ|0¥ûf‹;'çjP(ðÖ\NGÓÒ_ Å ˜I(}‘¿¦Ø |qC0=‡- Îß j)î~Us?>w8°Š`×[Ll·æ±5õ <™Æt"$(=™Ä¹t3Ä<1 M/jÇä½ëûähK2 Ö:]îŒ-\j#Û£µ¤ KÉ|ÍÐæþˆj¶%‰œ2H‡Û›Š7DBï%«øPu¥0³º-M³›sßP"Ês©Î{ä’LÅ‹ð¥ßàЪ[k˜çôñVTQù²Ÿ¶S„¦‘’wT‚~¹žÄêÞ"ìdŠ^ƒhò‡³€Üxµ*!ËUè!GŒej^?ݲ¿¿‡?AêprI ü¹C¨/Keû²h5‘ÖD - Â/Tæ²=e©cƽ• sßøÎõH»@¢²iÂÕç'˜åËä6éÔ™VPwðÄÐ$úe >îÌÀÈ–XP'¼¡ÖAoÛ€}dñÎNÄ èh6†g~¶nFÉm¡-¾ÍîZO[Ïûwd›ÝÛk¨NûæLÁúmAg̱Ö÷³#åÔáþ0Ôd¯ÐÇÔ/C—›ä­ü“N˨B;-?‹ƒp*âß¶g‚ÜA‹Õæz ÷YаcQZ"& ~욡Ѧ2ÏnJòùš±_–K#Ó¾©ºqÊèÉñ }µb;ñÅŽ ¡Šèæ¥5þî‚›Oj†GÌtŠ&ˆP’jûx$¶9$-¾öÇ;B‰+#½b@A[9yµ" %t 5¿€Ýü~·ˆÊSÉô1¾ücîçO÷Ò# ó"5£{yŽñŽIµ4¸B<y'€Æ!Ë1ûq¸«ÉMjï–®wXät1ÊÚžYañSùœ $†„¹_vH0ž:—’‡óß§9ÍI^)œýÑYv¥•(ÇUeÌ þ\ÒØ¬…c¯5RG?w¨VfLõ'z{ærÞpÁ¸â2 Ážõô=¾x«”>(¬•ý·î7}ÉUnøÜ¿ç3Ikèd ”áÚ}L©Hè’`¸—d}ïáH¸ªPelñšP Ä ŠZƒ™¹†œtÒ"‚`°g°ÙÜ»ö·:¨š$œ]©ÜÚvß!ÐY„ oú%kùÒˆŸÈµ·7=›oµTÖ2•qCt)± KÇY{Ê榛Ž!2Po;ã­) Ý÷m±O=k"ýyÇÀMM 2Ú'ì³ß—7R˜Ë“¶2|( ·Ç{‹^;¦‰TÕú(#0Uú9ÇË^–ÖŒÖéetŒŠ[íqÁÚrÚ,Vž }K­ÁÌ`÷Oï8?‚O3ýûyÝ~¼ ê¥ÎŸñ’‘×'K†ãÈOØ Nö$¸¢÷M°¹,A?<¼,&ÔstÄ'úçH·Ä@ŠXˆÀwÜ´½Ž³QʸAÑ”½’ÀIw•…9“Î_”¡Yw“xJÑÄäúÐá`|L…ˆGaA5‰¨kRgD ÀI¡{˜†y< Ãr`:ÕwÕ š¸^2=²%:+°‡–@¶“ŒKô]>‰I¶–Á'÷ƒUñÌR¡né®®þ½…:gü¦Ïä. cGÅÜÄë+ÕÃÝ•h:'WnifݰâY"„ZÁ| §þþläÓ÷¿Š§62—±ÁXãþPì³Ú/ûYÞ‰û·n[{²ñki#l%B«§(*zfµ|Ì]XL¯!òÓ\¤MÑÒhÜó†áŠÓ=þ…qöåϯÉ.}ËèaHºíôdÎT‰ØíÍûBP )ˆ Ü­¹Ï( òúì¤ß"ØCW9z­^ðýÊÊËË´ÐÁá>ýï­’qÂæŒã¸+?V!–)¹ÜɈq‡fÈÄèˆèè³$uÊŠï™ú@àþ #— èöIƒAmz9Þ—À•Š?#å*l¬ÔC?ºÇô„@îØ7­é¯máàèH™×¿:`{Ù~³Y{]…³s‚®0ŒÅu˜­ß½óP8¶3ñŠ0Na-/}±OF”m—£Þ¥\H;¦‚™ëÕžIi¡‡L$‚iM©zµtBMl6ìÄv-⟮@û¬Ék?~ŒT‚ñ"æ‹·>wè]¼Ex,g«ÆÍ7ýqG~!Ž’,óï rhl1ö'a« FM0¿†@Àç°ßj@lVö=ßž!eòc}ŽŠPågd_.;^™ëµGÊš¨éÅÊù¢ë³Üßkù2ÆóþÝ‹éÞ$^›µEçrÔ3xüÑòLѹ÷Z¹«O6óý®Ž±ìæ;–!"E*8R`èòžE¼lNd&öAÍÛvÏÓCÙ€ÐTÝúi·˜ŸD±ä€yìaÄúÇýnû2‹¨!碻„‡ ãħ(šÁúFZ~–vç¼XðÓá×%Ó'‰ =:ö´PÎvA€övGþkÏoS¯nøa¶ŸºvÑк ‡…èÒw¸x|›Û‡¤ŸpÆ¡Õç‰tÍT…í¼¡ÙöêµÛþ|y W„`wl Ô ¹˜y¬²S, ¤ëAY+Ah0T¸ÏÞÞdÜ’1$@#¿Íç—0VL³-eïßÓò’ôZ}âT:qÑÕ]ðN™]„‰Ü ']Ž,êAÈZ„ßI„äo[;A©óͧŽÄ0Yd4–wùÔW<×ýov‚FâÆ;‚`3F÷rõ1­úd'[NæÆ4æ­¼)ã¡¿3¿c(ó5-ã⸬þ¿¾Ü³k'õ‘&}ØçbÕ¨ëæ¿][]¶Å¦Žeb)î§àåØÏòŒgá³f‚8Ì5ßîVoà ŒŸføœÌÉøœç‹›ÛÞoÔ4‰=šCæ]¸ºTÞõ”nã‹7³È¡f<'eßaó>ïZ®ÝþúQI){Á¾'õ"GÞ¸ˆ÷ÁõP7± bøõp³öy?W÷Ÿ—ò±£p|íæuéfZ ¯/Êt 5ƒ(H€P(ç#g”ÂhjÉUØò‡bÙI:ü‚—?ÉËèœ×¤ˆd€Zb²ýC› #<²–ƒY¸\}¿AŸJÙ ÍÚ'3.\u6üÜ~½­eTðßFåÖ£²®w8UE²ÑÒᣉ3óuƒ÷Žñè.Á+À±ÊRºç)ï4í"ÂTïev2³ñ½ª…“^Íñ%jôóáÍ|}Žòæ }/ÞˆäÒF'¡Ç]u„_;¸Ì\>ÍÊO’(Y Ÿ;yuÈÞS0ÿ”«€ÿ6«ý¦nJnwÝú{5-€óµ½8_æ„GnTóî%> xcÇ5ÃI(Uk”§S…ûY=¿ß œùdXw´uKInëyšD¥¿£ßä<l/2…+ì yMÀ—¬æ¨Áܸ²*øÀ±—ê.ÞR5¸àþ}¯ï³òœ Ÿ\XVƒç6–‹ä1j‘üŸý¦âO1¿¹ªÍÎÎè‘9¯2#.Ôú˜+Oœ—‰KD\ecQL`Ád5Ú4)“­üÓóyÕ ;+ÊcPÄ¿wøÈš5"ÑiÓNn¦ìÌ;¾‚æ2£GÐm䲯 0pkå öûf¨jx‡"»hŸ…îZ«|m0oíÁO}@ˆ¼8#JðÔ)Çr‚ë†|ÒŠÍ­§þ!öúFxÎ'çþÉûP|GZVé|-äU·”?¶4|ô7ß/ϵÞÕEzõˆ“/~Gw@‘´ iH§¢÷aò|$„¨«vÓÓ+¢ü°ui-ÿBE4š(æcUˆ<}Â(Ù/õsÏ;‚TsÒpßõùÛY¾âs°Ä3si!a‘lHRWC1´FëΞüç Wìa¿¬<ç§CïL£ÿºohó”3(ôWîÎá2m&ïÊ!<-¨èÈÜž{{®‰¯Å6/õ¡`EOÎÖÿÚM&U'd²íB1Ô$l·H.Б:œW^LëOL¼ÉnŠÉÁé÷w²Å 3^ʇQÓëÖÄúÒSLÏÇËêÏô#lÓ¹ˆÑ ëÆ3Ü&S@å°®‹eSµký›×ŸÊé§Ú‡áÃû®W,ÃaUûUpýîiõ42“]ï/«EÈÂù 1äMÄ‚ô'þÔ?ì[ª¯n*!¨\JKê“$»(´zÀ5K[÷"'Úåµ8«Nü–mkôVŠW…´JÛãü—$ܹ šï¬•gšS|‚¾Ó“ÄšÛ^\¨¡B^ÚTè¶)ŽaÝ ×áïeÀª2ß)WãvWƯK‰ðöìz½>¯/ùº<²á)Qö˼L#‚m„RZ·¶3£›ð„C\åNWÒjn _'§ÎŸöàò³»ý<Åoaò‚Û ÑÍqXt­Ë?kl<±¶»mæ]}ÍavîËu³eÞ5qÔÞµØuãše¬l½Wùéî‹wÌÊíVuEñ¯g¼ñÖp§·¥ú­uHD• ,Àæ×püpvA–ùhëNìDî— ÛŽ‚þ˜¡=qóïÑ  mù%ÉòÃ!þÖ#f‘ †b‚sqÍ^ïD„1¨‹–Š/*OÚ‚“ºƒ¡ÐìIEРûÏÅ,äµ¥¶ÝqøÙ–÷]¸ï`3‚<x!5ƒL*­k(²Š¥¢ªÕCH~Ïô~×ÙÛ¿òþËáô>›ÏuÕúîÆ\™ ý÷Ý73F~îµÛÐæ>›ŠÓh^<n‡|®ƒÐPâÀà|Å¡ôR¢¸¾Z̳M‘›¶I4ðU„ DÄ2Pa0$Å: qû'e+‡ý½݇³ð÷Gë|,çð;9üN_=ÿ¹¢øs™£¢î÷{?FÓ.t÷Ÿ1X;ye Xñ|¯u~TvΠÈ\Qx«1A/î—V@ƒû `†à”:W]êH74¹sŸlÙDI¹mÇÚ4DÞÇ‘ÕCìn,øþ¹? àûÿ`¸ÐÜìO?ùìlAöU@„OOVeö+ Rì —d?×6ïp—®E!SúNÔóBAIðä½Ðª˜LuŠU9š_¬é‘:æÇïArO°¡FÜåE¤*@œBŽ×Í¥ñ?sØ A2PUúÖQ¡µ–ÕbÔP¸øÚ~Û öÛ?{ÛO§ÿsÜè`wá`}\Åß´ê|£$™™ªË 3,E/-˜Êƒ-Ú6íC^Cþ¿òËï>¥|?i?º#óò·_sn[ÿ?vwñ~.½û9èI>G™Åɰ[ÒEÖî%üŽ='hDï*ØIÇé˜à¥ü½Öu}‚õoöƒsÔE§Ëö{gƒX9j]d ‚Ì¡Ìïz¼rv%Bµ…óhö“ 5¢)%DÉj2ª¡‰iK _GrÌÜ}9?è3~ùÅ0áÄn–3‡ûª„~5ûáž?-¢Öíê=¤Ë|4W‰$:§“,­kOmœá:éHë ¯P) 'õŠõÊ ëåÜj×Éû%#ëå{g'Ç!½`£7,)Ãîy(% Øz\Ì>*_³ÄXpÚ#"9GÌ$Û•‡÷’A‚\šN8ú+s]Ûœ•ÕÀ0räV¢é9åØlüUß|R7SÝRsó/M_9*E¬b5î cD<½ŒTQшAþ:”)nVóÈõüÞ²í+g𑲢Ø/©€6jS‰vÂ^ŸTļGAo!ÓâÔp@„Á.÷™F=©îcšd €ßwØIZ‚Hq­:Ò¨ÞíûëøZë>G"$5¶Ûž´’Ì_|zɾ :øðP”ôë.’ÁŇD}ÞWpáÈëƒ]6GãQÁmdnUT³ø¶, É#NÜ‹X„öþ™`©ü>¶ùÐ*VðÔ*qÞ9üŸGî (bó­gä×Sš>D#&ÚëŒöÀСI››;S´H¢s•¥¶»˜¾Šâ¶JϪ€:%Õ7},RìN¯B²˜œ%ãÈ]š–ÿ¾ú¥ÔxYûp'zÇvÏîþLJïdRµ˜¸P³ˆN+Z½†œØ#ŸñÒÔA X7 pQùj¬ZýH$&~ ïŠt`1YãP&¹ÍGM8Kúg(9YÍ.¤,¨±´$q"8¦ŠqN,{RÈS¨{8XÞ<‰ï²¨ƒ6;•òü }ã?ˆò\[Ó|Àx{Ït`KÝç#ÖJ™Ä(‘,U( ÜÑï Ô~|Þé‚O@¡‰í•bȬÜL|q̤|QÈpÕ!G”òï/‡éÑ]k•ÛPåÓùð5m,ûf¼ì?z÷Ý÷_úY_tÜå8 JF D³ù®H’%4·q=âïªÏ&Â"({£dD~~߱턮¡­vuVÅ»jGeå*Ž2Kd¸zîùB,ûÙ3[Š3P©u—ôêô¸Ü{n»=[«ŸÏwµwþcìéü{¹Ë9-J¡`¢Êw±kQIV`¨•„Òd’Â(åˆT ø`Tý̬ՕvŠR±FIIVNhK’¡i† ³€}4,)¬e ‹µŸGxP ÃFP€i—å󻬌 TB½ +ô°/…1ÔÝ{qÒ)ÛÖ,¬ó9çƒbãfÈjB ß\¡UCÝ ,‹bDèž¡Í$Ô=ë[úy~Kd3‘8]8¦kª{ðÇ£öDöGASÛhȽ‹ÖÚH™°¢l0¡JÁ†&W°`qeÌD‚DñKJU¬¡nY Y\M9¡¨0¹HŠð‡‡Î]ì6}¬³h*XIw2Çlñû·Já%ÏÕã ž?ÓÜ2=ýCS>B/ýb\%ãg3ØÓxºE-Û7ëÈ Ç†TlóŸSû±s}þ“‰ÑœYÞž‰¹XÌŽ‡®ðÜš}·LnàßgV©ÔÐt‘R@Å‹…¥»¸ðÌpG_'­t6/U›Ë³}}ŸPÝ‚hvÑå”^üÄÄn·W\€ á-2wß!Mîú—q¶êi]°Â—~Ì{&j TÃz…ÊŒ*¨d±L‡øËjÓH„Þ«™´›²¹;Ý”¯c¿YPG!ɼR…sÕßæÄâq‚Ì ðÂ2ò … )-Õù¼Š|M̽›.[ofá;ü["×=\ò0c­·ü¾ŸÀù€ZD?&#ÐlÊ€ŸÿQC¥{¡à ã—69ëÎxøQÆØ G÷8?±úèy&”Ì£Ç{:8/µgpaÔ$þäôö:-ïñöü÷™WkŠ×XeLÉMüïwhÃŽä¡ ãÉò¡cÿSìt¦eJàæN°y@º¨ ÐplÍØØRÓT(S>~]98ìãO7 øìþiôŠÚºÌè>ò’}ŠÞJ–Ã0åàšêD8ˆ—úÇÁeNR”‚˜ ÷a‘ëmXd§r½¯î何•íÆ‡IÝNT! $Ûx®SØ%çÃÇš0^]KÜ…¬ÁÚ\ÁA(H h¹Ãêâý¿ìÎ%Kª”1Vîb¬¼Z<°“b/‚LU!C\1²ü½ï²ài²*… ð’é0 ë4ŸŽ-§”€‹Œ?¥Î' †õü¥Î * /†X®ºçP‰Õ@ÇF„Àïš²©IM¢¾84‰÷!¦ØhÑñ6>{²q)ÝÄZ4Ò€}*/@€mÏ%NßäW‹ “•FÑtÒc¤¾¶Ö—Œ9øÝgj{qÍn™h—Ü-¸Ò«n®2OI>“E~‚A¹›H”›>ùiɘ¹ÓNL¾Ûæ­+?ÍŒU©Ù3Õè~+ÿô~ÊIÜÏ*‹=Fr‡ûnÂÛqt¹óÔÎ+ÀEåbÞ*Ûüù &1ßÅ!U77áH¼ôä0µÙ®RmÊÿ'£ üº»–mÎÁu–”§-_\U™”óÕ}GQ*Pçäå¡))µ¸´^Ž”Z,ÿ†9öFV]“,Ãß}ÏïV¶šŠçÆyUNÁ³=É´m_ë`¦dsœÇî ò$]û‡'+©ñ¢Ü5üãè¡4÷;(]ûWVe'çÜ>ϧf· øø[9ÅÛ_ë±< +h&éîÝ<<ž JD) '';Ÿñ]ò$àýÅž÷|ñcRu(wù•iߘMŸsCµ{WáÁÃl2¿Nä·']]÷ ¹ÈOÌvyr¿F‚îŸÝÛeÚºßüåÕ\q~ž†çÓ³S²åB±æ#³œ7ï¾ÒÙ3«lÝ~{Ÿ9WB…,¬Ü?Þÿ_øž¤âóù>ÍÅû£·CÝpû^®î*÷|d‚ý?ï£íöùºŽ›QAF³/kr·×úçêúî-77Ìœ}—eÊÅç8Ä÷a~;³ÍâôüúÜ€¿qú5EEx h¯=â¦5«×·æ´õ½ådmèŽoµ ØéÊ{ÃIþèè½Ìì}œÆ©GË…ƒéÈNÃÿÙˆ'i‡¹*oÅÑpÊQ%Jü0Mg°É}3| ädœ Çõ´üxâalôó³rI}ë6²Û x³û?z·É¯L;=[ý€Èÿ~ûu’¥/›š”Ç×l–òi£Ùù¹ ÕTÏŽvm¹ùû+ygfÓþDíÆ¡µ°àòœžWv›zi$4ÞygšÏïF}}Vm÷Þ1o³6Þ½‹û„ïñæKÎ}bª®÷™€ƒåò¯z´­¬3/;å\|¹pã:æ{úëošÏ ¿©b…Òâ_;«óÔ ö^Œû“/c|voÆÚlÝ5«>ÆãiW 1¶à&1q¿²™/F^ߋ۬š®Ú·+ Ûùâm á¾K˜Û*Œ_Ê7©KÒ€ÐeŸNýµÚš‹™$yÔ{~Dòe½ö™~½+ÏmǰþòunØ=ŸË+Kv¯eÏ¢r§ý);O–êú=)s¿×¿‘Ý7[EÅå¿N¸Ñ>Þ 7 ¯™aåö!~CW5ëÙDJÙÅDZ¯øP~1ff½³±L?¼}äo¯ƒ¼ƒãëá¼Ñ^î–Ñ—¹ÅkªÔþÚ瘛_ÜnòìÒ[ité|¥ Uÿš»Ý}K#»wiÄ=¢á6nè­¼h—3š–xþ¼ŸZÒ´ÑŸ‘Ìi²5zÞ3>ñ{WÞ´õEÛîÿ,Þ{]šìm~ÌfäwLü"û>]ŽëmÇ_ÎbFV|€˜r¸Ž„ïJ‰Ó‰=´×ßšSž”2ÜŽm')c÷Ö>Ÿõ«#Ôù_Cïm‘¦¡(’DHÂDD$Är'öi¸KAª ÿ³-›We¸ÄAÿ—x”¯õ÷AѢ #" H —#@Jh3ú¾pý×áý¦¦É–$ˆmŠAlXLBË 0‘Œ0IÃE´£µ„É*ÀPj©ÂÛp„ Á b aµ†ŒxA!Ä D" ‡’6A,$)C„A"P4 !“DH‹Â l9>ÅB x)Ø"Öa`ÆmH„,4š •i p„X- I$c.naa½n*s÷P`† e6a}бP«#à‘‰U#ÁDQGîÈ…º É $•€Ù X@„Ó n âSd†Ó-KlåL,ï¨o?‘ýÒðïþ“Ñ[ phûŒ´xî%†x¹l‘PH¾$àõœ(úqè€ßi"_Ro‡×ßuÜǪxS‡ÜHQïÉöì{ÝÊì9³ËÞ§Èüº-ÿìU?–ïebëT2Y$÷¬ ’—«½-ÒUІïö¯#têÄX±A¯[’îêMNîö;š9î{åg¦ôõvéžç®oöuý·™%™*zœÒîí1ÝvIÏÛ»ÏaäHCºïkï¼ò„ôt”Ovñâ…$õ÷Kšéÿå\Ѧ¼Væ=?¡ý’þ·ªõ+Øjö+—„Y!Ý®•¶ß¦óÉ ˆíg&ýgW,LÉ&b0X/S¥¢BˆÁC×»¦1DÇt8FH»ºB‰]_ñßçêšõ±ÆŠ‹Ö×,š 1Q±`¨§ßºåí¾ezµæÞëý§oF6Þ‹EÈÝ,I·¶ÍsA‹x×f" k›]ýÕØ„Óãý/7`+ºâ+ÖÕØH /9»*Hɶ65ÖÞ+–1µr¢ÜføOOõKÜ=5êX>ÇÜUâæ"ƒØî±Y5ãrÉ«»±­ @)žÞ§èV[V€S$P"’ ª€5˜Jd½ 8…$¶KŽâ™,ÉMz* dQe’SX¥_Ùå®ímä½$®çR™Y RÚö„.d"‚© ƨ4lj-cÆ¢)ì¼ð‡ú!L"„R]U 6ün¯¾ ‚RO¬”È,ÖIÞ=fÇóû·Šý—»&]¦vC:L“ÖÖÞûZù!s O®”ŠKp6·÷¤a•ÓœÆZªk´"Ù%"€RC‡Üô]t™"’Üir¡/µóÉ.`a¾«ËU4E$æ­AÛ¦n`ÏÁ§~DP™.agá%6BæZÅ!^‚€7ã!5’h©’/?R~ëe@½)Ñ„©éÙ$/Cô™ L7Ü ¡‘’Uæ•¡/@ö™.@7ømØs"ÁL•¢Ãdæh ˆBüùvn€cH`Àý¤¦H¤1áRBÜ& Ìœ¦{Ž’èCÙjj^\¹6j0ª fUTbÂÝ5Bs;–´† Õéj’j¡µj$¶ÎÇè‚C,2¤ÖÓ¨`¤äv2jç¾Lü<:!ªÉg=E& 5LµAr)%³PzBoa…‹™á0³ _k4b© m_»·qxLøQQÕÏ–ÒL‡ï!¸ÀÓd›Ð`eàÑ$šZšv’LÜ*î.d3ámNÉ„³"¬šl¤²T5îÈÒ^ŸÆ’bd3¡IÛS†êLŽÓ$Æçï Ãn´ 2N!¤ÉÛÛ²ÑÌ Ëª4 ¬¶´æM„)„ËŠˆÕD6u7Ö’˜j¡±’¤'ÿ„½½Ö`o±_b4%06Y6±QªC”`˜bCm…ì!a)%˜m¤ÐË•¬Ó ÆLœ77(—iÑäíä´Òª¬ßôœ§Òú½îõݧAÞ½2÷{{ð¼÷­ý-lØý‹SÎÌyº“¬BÉ"ÎkƒRqÈo¹j%ì‡"’“ HJ`r ‡-™SŸe.`5î(™˜sl˜2qé%"™™ Èu$¹9FHÐølÊÀÉ…R › œâI;Sä']»Í†Ÿ~ì0‡jf6’ä þ-[Izà²è@Á…Èû•!5{¯÷Ø0`v¤“aÌü–v»®´†TÁnoQóœT“'äÔ…É5Xq5’S%Ì!À×Ö°;c&ÂC¹Ò™9ÖB|”!›¸ÑH•BRr6 7ÕD*ê °X)4rI¡’a“"0…¸Þ‚»ÚÖâÈk¦Tæ@ÝIf¥RÂhdá¤Á! .áPœ$œLû7I”ɼ…’ûT±…Iʲä'Z€›L“Y$/`Mf !ŸN°I (C<’l°ÛáTðèf¬l†ó$Þ`Fê¤u¨“I9FQ%ÝÛjÄÒI'%’ˆk0â¡ É3îÔ˜&†CI†Ë}¨½„8Iä’Ù7Ì ]ýÖÙf†M&ûR¤‡½Lï‹E‰s†wQ ‘%ÚU˜0p¢K2²ÐæXõ*@Ë…šl|†V6øú léjX—¤Ä†$œ ' ‡$4úÚ’LI_bÖÖf¡í¨¹†LTAâ0¢y|Ú-íZг!L'FÈLN6K¶è“u†ù ³Š¦¦MD0ÍDè’d˜±PØ\ÀÆ„ÝaHK™—5f¤ÀÝI h© ¨NHm%ì Ä ÌOZ‚ôá²Rmz„mR­­RRY$6™!¦†ª^À5КÚ*Í´ª…0¬µ?ZŽ9 $†–¥ÒCI†šÒ£#ЉL ±¹ÆÉ™!¡“Q™ÐÐÉ{õè4˜° „æ6ð´ÚH Øø( P40Ï–¤4Ò䋲S8œ‰(†f,%̆>V‰¿O9ß/ÇÊ^i ¡22lúZ `i±@4!C$½†„ÕHŒ%ÌœÊIHq®¨ =ÛøWO›ô"†NÕ¤š2T lÕæYÆÉ¤’~!2²H¤ÕÅS|0Iý èkìPq¯…˜â´’ga L• ª‘C”NU58h!1°¹¦(¸˜l¡fØè˳PŸ”¨j Ù1ý¯;`œŽtü…‚šl3Mõ p´ôìJßZÒ ™‰†¢ÜÒk3Kn€áÝP®ëD4™Î!9”5Ó]¬ÀÐÀ)7´èß ‡F”Áa{ „Ôæ(5™Á`dË…ƒ2ÝPÜH¹ªC]12bazN57[’j機.@,ÀíÈ0ÄÀá$ÖÚ  ˜l3µñô¢Þ¾‰ BíK­"†Ã Ù&,t^šlKÅÔ%ì“[j‰fk¡¦É—0 U@ˆKÐ.NE R ޲Bj0˜&Óî2K˜N !Çg -TpªÝdÞg5i.CÕE–K¶*4Q&«¶ÍÔ˜9X\Ȳi磒{]Ô2q7åC‰Šƒ“ËFnCg˜Ú±6½CI7Pr¡Áq´Í7úÜ{54!•˜Ù1°ÌÉ?"3€é¥ÌÕaÄC"eÉÀ´šL59nNØ23ŒdÈåÇ@µƒ1$ð6m¡ÅwÌ™År¦mêÆÍ6KøKAŽâT¹Êñ©°ãÖ ÒÑPäPâ&mŠ“m˜¸Ì–ãY7x5¸šŽÛ4™Ç R&’aš®ã9`a{µ­@dÅF\Õƒ!¡²Mfª‘áÑ52&›—îÚL©©uïêRÃ=µIª˜ph.I8n$Õ­úL²š®&üY-‹H¢eCUšI¼à™Y«ž†dfË Ó4õª”kºi¦Éz¹T WI17$ÈÙœ‹2â© –sŒàì×Vœ?þlÛˆÍÇŽMÄ×ÏRÞ%ŒßìÔ1oêdfû5E’ôãv±ðîÄÍ-*˜ðÓ³«FšbNc=cM¤Æê¡—^°Lù(¿M7.þçM)4&óË772b5*S¤ÞÃ2Lùêgg ÖbÌ7ú-¤äù"=ds¤¾y8ò‹ݨB8ãíBdß/l„ÝfÛuÕ‘4f£+7Sq›b¡‰™¸Ê×ѳ£=æÕWÍä5XpÓ€I™Øª&Vëêýmš5h¤®;fØ’ý:–u›2|¦]¦a«Ä´5·/±™äÐÛeÎÛO˜²ú˜ø†¶F1¿ Ù"½~nf¾q~|Ì€ûZ»r,.fÛ Mj‹{f.’ºbendܺ¶µ(΃‰!£=.ë5ð£C;— ~zÕm–·.¨hoGb´8‰g™nwx56XlÕö5õ+6öÆ[¤ßZµÙ®Íkè×Ý«5¥RË»AÄÏmõÕ·PÖMgY—°à°Ñ}ê#ÊoêôJùQΈDQsÌ:¤>W]í<Nˆãœ|>_—›<ñCÒQzAð…Ÿ{}íòh{Ç&znTö»;äùßêïžS.êipªCO‰AÅv—‰µ¨í]|Å’¦W…»Å°dL®kë2ÔÑB&¥Ä­7&"¡8 ~ÞA"yXt‡¹ÊD‘ñýO¨ç;z½ˆÑ ã¢0ÏF£20ÐÙœ\Ù¬º»6Ë¢ àÍ Åô¶~+p†<úY,ãÔ~6Pø6á ~ì¾}+à˜èé&šR`ÖÍZ¹-hKÐÈɸܘ&|u¡Saè‡Æ˜ä㓌8·<áO¤âŽäùŽüצ9›?_FÆåê˜ÜI£nƒa†WÃE±Û=¦¾>:²k^¤¬çÚLÊö‡Öî†HÛÕÃû>LyˆƒëˆCéKÆ[èíyX%Î×èñ\g†NõœYcdzާ™òë>^óC¯«ŽÚî&dMQœëÉ·£¿£[ó*[EM­6>+,„ äæ}†ƒ„M&6žÂä Ä@äĬZîGØGÈãÚø‹3Öø eO‚lD8Æäc·ðü^1Ò“;YëI±_àäqo«jÔaÛöô´ùëá§­›fè ©£š×Ï|6-Z…&³ l;jl[ ká«n×EI$ãÌGf–çª@ùü̵›óx˜˜³íûô=}¬HescÉc]ÕfbfÙ–¥ü•’öìÕ™˜pÐ Qf‰—,P?Ûˆ‹Q#’b*3hŽK Pp¤inv_ÆQ6'>EiÝÑŶ³¡1º‰‚M—@‡ ŽÏ¬ë×äÏ•³G¦ämʳÇrF<ç®–=þø,Gmœ#à„A#ò‹_3ì—'ƒÙGo±ú_ú9ç=l†Ç“€ÞïÙ0ª× ýtö9@}^þ†#Ÿ/³®R hë/×å[‘êõðHþ ù¶X8öúÝà ò‡ƒ¢Æ É™5Sa&4åRdpfaÒÉ©bç.µ“‹6}²t†Ô¬ãcòÊ8Ç Ål{¼Ï\·¯ôëõ²ùG}Å«„Sq޽­¸ËmQ5¹*›‰¶Ý©A´›ç;µ‹±g6±y@Ÿ©ê!3ÓC׺ü§µçcÚ nßž ÔqÏž<|¶6>¯L?Gåxß—Œpùlߦc•Ò’’"“D©ÎSK1°‘'•Øk­&‰ÙMhœä u¥ÆF–s`gMƤ† ì,Ìô"å±Çˆq(Ì…åT©Ð;Ñ!,¹Zy°Œ¦!2O14.@ô-΀'|  L•ŒFÈè%‚xìMŽÑÂb[$¿gB6l+Ò|äi¤”Ö “–Eß´`Ž vTé•2G­rÀás ©Ëéˆ)9å Ì5š£­MP@p,#5$œ¶gœ«Zús<È(#!O½TR›PB²u/X‡(Çßd¼°ìæ*S“Ÿ +(•„¯Ìegަ½ÄA˜kÉlZ0™Y)¬ë å$ú‰+ͲC$ù_9Ü‚f¹&ùÓHŸH“flj8(Fs14 *mØR,Ø(R\Å´eõFÈf ÷¢Á #¸¦ $ŒH*°êaËâdœAVH„)Ñ#‹ Œc‚à\† „X‚&"G( ËQe95D!`f<4¥KuI¦æ’ã_œ” ï”´\B †¨Pdg¥ PðnCƒdªX"!Ç6rZÓ !ywŽßÌá—WZÕ!XéˆI]–fr .hª(†$y‹9uP©Äűέɬ2ëdèmà|¥aC)"1'Ë<•b!žu"¹µbÕ‚iFù,ol0¹ÓÆa0Âu ÀvMQ`†µd ’¡ÔâŒp3E‚ÆÜs9fJS€T¤¥QÈØF€˜¦-“tÀñ‚”é«@XÅKe†¦‰H«³ÊÖ³&B`ÓR';%ՠϢ&‹C¬1˜HžD2ùi€“¦ÝV¹Sʃê ƒß6±g@²ˆ•)ÀŠJK¢xԓϤ©À˜‘ Óqâ¸Ì£NˆEHé ë!$Q âdq¸èŽÍŽU‘ã+bð‰$Xž ж ´ˆÌ%ÅqŽs¬À˜ŒB­jx@BX$Ò”Ç"`¦-Èœ³JèXT×l_›Ë­Ý!ó>¤ôD øp<§ûç%øÂ«¾sÏtz2Φ(àQ6¹a6œU½a©YÜÄâ¤T&Ø£2„€(¡Gž«ÓçÏn=u§Âé﹜—Ç•ñ\ë°s¾]ç γ.# £ÜuÄH D dŽLLŠ8Òf!mÉxÆ£R¤Èƒ´Ð(<„Kï”Èp®B)ðp¯ä*צ, ŠB€È"S At% ˜ˆ¥¤0ª+f]ˆ4ÍŠ£Ä¨I9ÄX5%¥‘ɤg•Ô ¶½î*^ÅðY Gœ«9ŠU¤åƒ+ùP†‘Ÿ± ¡õåñùÏc~{œNøÎŸ^}4ú“­=·Ú–·ÐÙóåìqëvÂH”1‚P,f"…Ez×%úíÈ™ˆ„–V·Mª-Š­äâü?,ߣÓÕo^Šy-Æû¬Ë~¬çÊóã©ÀßY}s¯ñl"sÝJ XeJW\µ­(„…Dg™b¥Ì-e)ß$u£¬ëÔ¶ÞêevæÕ6[²kFïlæ®õ·žfvg˜ˆ:]q^6ÛVõïmÏ­ÑÛ³tþLÙö¼õ^rk;ë"l:‘ùOO´ÈÜ=ü׿YÑh…<¾ÒïD—µ¡œŸgYý&µÀÚÏs}ÎÚã-Á5WzŠë}VÒöØxßWä‰^‹ïš</W>®½í§KmâñOŠaéùØòã]Mó·¯5c4DÛœêü^ûP«¶TïzÓµ9oÒv¼×'Žot¹:}eÝuú‹ÞõÇpó¿~“畗׎2¹y£Æ¶º§4Fìø‹+wÆ×6ÔÎN÷AÄ2¼ÜìÂσ´öwç[ñë¤&xgs¼mw¯Šú|í¾48ÏmL2zÏVÏknÖäö—·\»ÁÔjéïUåÅúLóo¸ÖïøÍ»0-•¯C~ ®<ðquYœ]°Q­FšY½žnËŒ§Öu¤&QÖ‡#g6÷!í6óU ë/ŠÏeOa^e¢ÚcRrŠôí·9ÎvÞµ«·½m¬Íüµ~:½+ %5éâô(*w¤U©SmƒÙí4)±:ÝíºzÌ™OSϵ§,ç`V·s]f¤:d9ár©õZ%[ rìÄrn+¿=ñŸ“¼|l³å¶úÊegSŸÓ,jï‹uÖwdÓ©2xãh3zÓrÌ5K)†Gs/Ì­\àÛ°x9IVôÓ’_ÊIÅÊ¿gÉì|-i)çCUYPm7òªX˜^5WºT³¡œB¸¬ê©+Ö]]ÑšÍåÞÂ(kMëO7L…“E `Õ/… Íêó«¶2®ŽògU^GV³ipu´{ŸZÜAç¦t+Fk…[ÄL3l<=h”"ò¿+,5\1¶Ø·¾´jÊ*Äiºóí^žµ.jñQÕçÜÊ7­DüÑ7«eùàDC¹Â;–<–ùÑ­2»0×N·û»Y×¶çš6v’ ž-hf†Ù Êu†3)‚h<§µæãzÉÊñEMd§µ­M[ªyyÙ Õ©yNiĘ$$³"Ý5)V_µÞ5qåIç…+™¿0UïBlv@ÕDR»’ÝEÝñzbÙÒ «”UÜpª2Õ–d´®Iz‚³a“°o[ùAb·/g»º›ÈÍ:Ž+x›á梦ň2Ëd Ô*ÒÒvTþ×çýwº?ô<~?ø>:ëÅ®‚—¹n¼ŽHºÈ%™‚°©S\PB®–½t¬+¥A­ Eà´ýÑ€Í<ç5d£Si"\¦åOÊ#9‘èûÝ ‚Ëh¤’[l‹,#X,Tü¡Âpû //¾ 9fʼò•S8;'ðjÅÖøÜiб3R÷-ºw¨7æÓ ¨[B­HòzAFFïÏÄþïCOQ=MÜ7îú$‹Úä“4F®ÆmŠÅЙˆ’NÙ‹„Eꘀ‚(–Ã,šHÍ’g”µyj’°•4H-nÑÁjo#SxÙ$ê°â¤Ùn:†ýrðð8\Z"«0€ ÄÈ$Vï CEÞõJ½é ²™ Ê›m¥wvÖSæ8x®*F÷BÄGƒedzo5³Gao g׫‡{%I”Ö8>’PIU*,CQGÚÍ¡ WU!4[` Bó6 Œ-ÌF.Ò•ÙrßeÏ!\*bjH„冷°*¨à‚Hul$/"…bÎêóC¦µ3†+YÉÅ„¶±Î$T6˜Õ¶)Ú~üÜÕh™¨3+ú-AGÍzsbTÙeZ¨ˆ)°i 4¾Î؃ʳPÊ!±ció]ؤÞWƒïŒ-žH«[è7d2õçÕãé«Gs¯^ìXV¸;‘𰿛ʼn´*]ÆM^\'Áhä½/I,žogÉÆI2Ð\®N”y0Pœ,´U¶”÷¹ªÂ"ŽÐ‚2ö´7:,¥NÎÉïe”‘9%d‚xJ Ï1úþKÉ]ôÀšÝÔL¯ ““Ó‰a<éëÅ;"Î3ÖPíÌ™Ë,2 Ç&h³‚«a1Tvc%™ÒÆ„ZʲrªÐ…±#ô0dˆ¤OOF°sp°HÑÌ¡`ƒ0¡ÁDJÝ6‚ Û5#"$H‰² $(–Z7„ < ¶a8[D6 ¦ÚOg-BM›ÂÝ#¤ˆrų„Dxòà±0‡°°‰3ÊD^ÆÿgiŠe³Qˆ€úpGºY¾à Í*(›9Œ\¡ˆæž("ÌXÂa‰éyT;DÒ´$’Ú` ¸,AéAAî2GíÈ !GëJùñ0 ¤ËWü½M_|+I¡RÊÑ©X®«–ÈRR´‰HºIyÛ‡Ûœ¹»¼óÇ·ë½Nªª]TĽÂÅ0Tuª «} Šª WPŠÛ•¢Ê+ö›œ¶(r5´ê½MËÙ{<‰È^.DLM~­PªÌõLúôPˆ°¿É•‰³3'õ¦Ó1"Å/gNÊLúaxiÝ/lÓñRV\TMšP¥).ŠPÓÖ»ûízq¼YºÈ.´—wy—Év½FÔk•€‰ÄO”÷㜠Ü¡$ÞÿP’z"$µ.E¥ʹAWxøyxug~žÏâ8"Ÿü»t°¿ðe‘tñÛjË3ÁÁt<“º,( °£™„,À‚ B̈4ƒF—Ðô‹öŸ­AèÍ+ø=’]<¶w˜î[Esç"r>…Ôõ`²ŠV'¿¼Þ!„yBEãÅL}ÏxÝ"0nn¼§$àëŠ`Å·æ¯:Ú›/Ò0_I }9SPHD‰ñŠ20J™+@Ɖäm ÉÙín‘gJŒENëéÐ Ašã¡íáÀSÈßÊ…\œ¬ÃæÂk»øgÔª‘/ÌÀ ›^ >Ph52ï´Å ¡ÐUgëDRQ#ÖrÚpWÜàÎ 5Âà€ü§Hƒ¾ µü@ÞJØ™{H80‚oúìÁK¤4ÑöÌG¤€ ä™Yÿš8ûsç[ñÍ?Ξ‰ÕÙ†ó̇À¯½0†Œ¯x NJ!qÄÒqY$$<Šƒßø£À9“ 3’ŒCûw‹ñÚtùÿV¤^€ËM-&²E§D‹eÛ¿]`çòÏ./‡ƒÂQDvé´EQbŸŽíË¥mYf‹¾ B¿xJWP¤}„ñzl fhUN‡ÜÕk•^i±j]Ps!ugÂä²b¯N®‚~Ù[µgå¢Öòu¦*œyî<Ø¿¡žoëñ|l>¤›Ü%:M8hy%€ϼÛ0Å&·XuŠ>GCCä!öÀƒðqÑšî=q(‘˜·ž‰àçÈ);Úq1›µqÀO—|>—Öí?“û(ƒîÉàLüZ {̧ÀKÍZÑáw\N€!0JÐ¥4lBYœ¤ÔqÒCïg5j5Ü0î&@hCþGru¸¦VÛil7§þ Ìl/DN “™ëü˜à'%gYâPB–û’t9È,<šûÓ¯|¶(Iª{!íï¤ûÇqÅgødl¥ÎPZÐA×'Ñvž1qKxùßS9‘s£¦w×D¤ÐÅ©§_÷œûç8C¤æËÄB©éØõh¶,˜æÀGoÃ~žåÜÍ¥’ |œF2W_-Ê tw<¹v–`0'dÅQlò7h„ ö©hÃv ž ˆ?\ŸéÐÁ÷ ¤ ømvË ®Ûž@, Ü 0uEˆ€>vé ²²("C¬=íŽbÎ(àðö…¸o-qÔÉ„aW"Ÿ<áÎF¢âdBt@G~”Bÿ±îqç´BäP‚³)ƒ;¥”2 L©'¦NÇ)Xa6CF8(@]hÐY”5 † RŸ©N´ð¸‘ßöª\ŸgÁTÿÖ\òžn§æe÷ß^àæ±ÎÇÈ\ã5ñ+üŒê9…’VwAUPóÓÕZîÝßfô‰¹d°ðæöÀ`8íP³³eñ s¡fÞíî¶ð±9ãU#ýï'¤;ËÖF\ýבç¨}BSDìHàí“:CwÓZã@25…ÝêJ¡H§îbe°aÓÊ Ÿlù¯D‡å8VgRó W qB»(8r t$@>Öh8F)?áµ>EÉdM$ôsèÀyC691í puO ?üR ;Ȳp°Tœ`Áj,Ѐjs×"À!ʸ^ €Ø!æ>MzSšªz3̉Òû¬˜Œ€¡s`PXŒoÆV¤—Ö?…b„úQWÄE©…N}tôcoN„¤ÒDÓë›|Ñt3rË‹;8ߢjá׋7߃ ˆ'tÌ%ê"’¼Pý+Ÿ0pj£‡æ¾Áãbr i „hɘ€aðŒ°ÆlËTPmE0­`€7"þœ.ùM"kÈÏû½òÜ>èß:ŠU=­âB/\‘Û>¿J…§×ñ'ýª ,%H¬ÂÂÑÈáúˆÅÁRó ÷¼…[Sí Pú›Vbâ…êùT8Ð?FÄ!ï†0úîMà ñVp´&  ª€cœd㺠í 0ƒÔA@#ùE´'ƒgëì»Ïâ¡  Q€’´1;1œ ñ°5 ÆPÅ¡À›Í›þm@Pú|@Ò šˆõ žº}ãFÞ¥üÃt—lði©¾œt•™ÛÍ”néÚ_­ýü§•½qŠÑ{<¥å§l£òAƒ¤ãí_Ó¸OŸç[”;‡?=zfÔ!›]H­—&4S§8ãf èôð Ö ‡Ä]¬lÔòOþ, '…•q‡ÒË ³¶–t¨ÐR7'’À®b<C¥¢wÐ-2ˆGÈ@K¼Ùº:sŸ‘"À„ñ°Àyvõ÷àÏüOócwsÏǺҳ2X__guî³µ‚¥œvY!é Hè9‘åJbq œi~c˜L›r¹àná´² b@÷ 8À©¢6Ø@‘¸CŒf$éŽ×MT®?vÎüÕžå±DIÑíŽðo $2ÿ#H÷Ó‡Ô_Ö{U»Ð¿5Jå&ºÕOsšxx¶‰Õ‚ðšPYçyôXàÆT‘@¡ªqo[B’¼H„8Pð'8 .zˆ<ç-ˆ$$ÙO)½Cö¥GÝH<™! u R(P3]·j:}f4XÑ@ Á0ÂÌpå„…ÜÇÆœó—($‰!ÊKë†1Mä¼ P¦„PJïfR8ÿV ?¶¾:yî¡`žó"jºæ¾ˆtÅI6GªphDŽóÆ82ö‡›€;f"„ØÅP![Yˆ@GÍ êO‹ˆ¢ƒÇøúÄ|÷³XucilÓÔÔÁYˆ¤Ø ¢ ¢Eä’èÄŸ×Y¹uÔžºVT#ÿÞ]ó¼@N-¤Úàjͨ¥H 1cÞ=¶ìþ=˜X üÙ$ŽÒD¥zªo´™" ÝŠS~r‚MÓ왤¡ç:úxù0¾„÷Ÿ Äüö%¡Àæ¾¼¹ßE$_Å!‡v AÒpà™"‚üÜLîš Á 5ÇL4]¥e ÷õÛ¤6 “?tdI²ôq­‡F=V‚Ã=Šð¼å$‡§­xhüoX–LÀã}`ᑦƒmAA÷cî9å‘b«QÁ÷p_y0É ¼ZÑ…,Z¹â ãé¢ £P(í¿pÀ¤=á›Ê€†ÕRöº²wÒřկhá3òaÐBƺ½ÞœPö¾'–¿ªõwwùbңąw`ß"8!6^4À—ix`cáuœ 1/cDyƒì W+ˆ†[‹ÕÛÉД5‚è –ƒì7Ücº ä` ÎBñ»‚šgñYôà*Lá}BpŠ~ÍØô Úœø¼'&ØÕ»n’±®„À QZ“FúH4Büß•‹õ à]#÷ÛizÇoõÅÉt!¥ ÜÐt{Fƒ5V·ð_6ñ(°ÂCÖ­Éݾ-Ÿºã”!EdCEPí­Ñ!îC0̲|-AKß«¯ƒç0 \i]´.j©᩠ֱc]æ«ú›¶ûKPCÖ/è(÷šuu€Û:M­¤-ð€‰'Á‡O"Pdn‚—Åã&Žúb% ¨ÿ àŠFL9@5H zv”s $l:EVdI u¨Ã^S’(fCõC(}™³ªÑ È#3ö(’¢‚ $ø£@Ä vȤkðÄ;÷賿GkÃÀ(#'”dÌC ¿#ƒáЊJÏ¢C‰¹ZõÁÊžIqÌXXýØñ!ñ¾ŸÕ¡êNaùæÖœ²€õ€h± xÞo_r§¦6,ç.V» Zÿš¢³ÜéÈýð±*ùl‰§G -¢šŽŸ3Vtë† k¹ ÀÜ£Û™E€]èMløRp•ªÝDP•ä÷ÝÍÀÉ=Ó®7àŽáfë¹v΀3? ƒ`/‹%ùÓ=UÉ|P‘ÛFbU ”ùæúcO;Å¡IHmí"f<¿ 0õvN'p@4}G<ÇÓ½÷áÄ{yp•­  ÑQ–e+Ò"39eÆe<ë§#‘©‡çjUa´óáIñY¿+@Ï¿e46g‡9î‰;š(‡Ež”0;˴:Ä æ5®Ûów!³ëY†+Ô«ÚÙÙû~‘Ü‹8$¢Ø÷äÞdç ®BœÀ)‚”„ŽúçÅæ |ü¬ü|Ò(ªÎ§±I¶ÁN¥4 Qá‰9¢Á‰Úä »!sÓÚÿUõáv³<JY.m2&'@vBæ”]ð“f×ýfbõ¾![Ò×F¡¾¼1§·ïV/?¿_zw„}øÞW­áæçߥ(ˆ5ŒàsŠH…B‡‚›è[6ÃâpIôCô!žtk&‰ƒÃé$íhîø”Þ6´¥8ISj„'75‡4)þ'Ž“#Û§Hÿuá€ÖkÍô‘.XIö¥‰I4J^2>¬Î»pUÏ~¸|@ +øè<#ÃN61Ø×²…8¿Fb¡ 0‚Jß烢q&—ÊíëÂjiÞ }ˆƒBøl÷„ý«½<±ë"M6ËðkºÓfÄ#¸ô#;s"ABžR8µûWîÿÈD”ŽB„wyñ¡€‚zÐÛ—ñ]ï9Øðyš¿GsùÝKЊæ‚mGÕ™´ÏEÚåôP”ï‹«™§ =—™› ø7REÞž…í­Ð[ã3?†$ægå “Ô£Ä¸Oûå8ΦF˜Wë Âó‡Yqmðo•û÷éÛ­ª^þ~N¶î¿û9]3©u3{ð‚Þ'ßm£²šM‘¦ë™1ZþŽÐŒéYÌ”¿‡9ïÿ±ê¸\dôÙHÝßIê`¶õ´[õ¹\†Œ·ÆÙïË À“ÆA ñ,Q@ äLÖX QFò+X[ãö~òëüC$ˆ âý¯(:k3wùÔ Ê$Û‰<·þn’=AÒ°†øZ!Š¿e‘49"ÝÖò­ÝèÐ>>LHczÝ;Æ-øíŸ•óvÐ?ó&€k«UŒ&_´úÖhà½à”š!0ŸŠ¡iå0,xEs?1ê÷~ã„$º¦À¨?ßëHå T²ÚÖšQž“I„WàÅÁ$À$Èü&Ê#K‡ºÍ¶QòÐ#I­Ìb"@,Ÿ(J| ^rÔ…GhÀ™X$„ãý—òÿ3R8¬ÜËß þ¸¤ªa&ñ#;¬4,n± Mó©ëª˜ý/å5.á§:ÉÔ{Ê=“›¢ðÛ¾ßëþïày|°ù›] iùÉä[Ð7~%Aù¬ùŸ[­·ú'Ýóu7ÝÿÒ^s‚x%"Æœ·™â€iF*qOý¼ÏH¿ß¹õ<šzGä '¶ßàÚrzH@ÂÔ'¨·B¢ð\ôqB pâBfÄžAqáá"·§7¢N脨€jÄ1í'}®2Æ"yŠBNMÚXÝÒÁBN”L$`Š˜‡DZÇÜRCŸíÝŽ??Ïc‹<03±ýyä 6òã„€âN5æ›óèykˆ‰°ž(TÿÏÒ%z™˜‰’1±èéV£Îîf7¢N¡‹–­¯·Â8Þ å Á]uÏ©hv)Ø siýžº¤9äã“ì']þ¾.Ç»ògyê»Kçjþ*îÖñð>ßä©ûß©cê"wôÒ˜yÎI9̧v•eÝStZì0µ^‹ª*±X*x?Så´¿oÆoœú9Æè®U¼¾fžQë!uÞäå.=× øPJ)G !€¦”Â"É’@-@øÚƒÒyþä§ä™§kH1'"/!b36ygl|h* T°Œ¦úù=·R‡9>—¾ÿ3ý×ðÐ9Æ£I‹9Ê"­€@€" R QtÊcQy7|n|¸MûyØÍýs¿ãèëv6Bžõ“ÚwÁË¿ösn´ì&´ð·ÛÂɨÿ—Þ:2xúù,ŠJ1ù+Ždkð  Ófá9ÿÌL½"ßkx‡„ ’}ò„Ë‚ aRbÑB,¡¼óà›`=o\Wr}ª:‡»?Úÿ:pì~öjî~ßoÅ'ýˆyÎ}œ~‘A¤C`¹ñ ïê-[,Æû‹çù¹ÛvŠãhÆPÄÁöý¥t…ª‰lvBÎ?¤”†œ¤öü':†‘¼ J qA#¿0h@ ‹‘…¨¼”<Ì5‘" " 9DDDy?Æ[éÝzttb«Q–ø’˜xÆõ_KÃùˆ,÷ÓÿZ™ñpuu¤Ÿ3_üü‚ú5ðûNñê?ãØ~äÓ8Ä¥æ­eVÍ­bˆB”¥ î§ÜÕb¾ÒŸWªlÏRC μҽ͉Ïj$#6k’ñÃÒÿ¾„óÜç5ãþïÈÍ ½QˆêËUZÖª,Päå—c .p<âFQb¼<<[YÀT)IWÊPÈ¢”‡(I ÝÕyA¯—ˆ¡<U$PɯV˽h“7êPp„ò›B Mt—t΢9Bî"¨»fÑŠj9N¿È|ýêªZæÊ +Do×p ½6bx…ýIH‘ðˆGç%°_Ïx˜Å-B Ë’®¿µ›þg;‹¥‘+ûžÏ ÃÊ‘ExÑk'!ÞobÄ2LŸ5bðàk®ÂB˜œlj3µi²,s§8=¡6P£Í¦°ò}÷¿ÿÏÒúŸÇïô8ÌxÄÏÓõÅÓÅ!Ì>s¸mo®)È~éþH€¨4ãâæ¿˜`ªyÉ¢³ HÙ¶cÉc’@8µ†Z)ÕRÕd¢ÕTJQiüíïá;Ç{û_õÝ/ìu¿»Ç}ãü“û“ýÛ¤À02_ü[›/+8¨2åÿ-}¡f7œÞr|å%ý7UÓóÞ~ê¿¢Eú·ÏùßÐpFÖÇÁ/KOk³®Éõ;γëñ5{PÉÆ16 ©'±Û.fI¦T¥k|oëß= ×OætøZã¡êgÁæºædñÏX`RH˜@¢V48¬Oµ"LŸ‹ò™ðÕ MÁó …¿q.…d¶CÍ(a% B`ÀFñÂ$XI]_o`êpŸS[Œ÷~á˜BW_G¨ÒlÖŠM C $š},øGŒ@‡ìî×û2xÐBrš Ä‘÷®–|BLBäBšÌ«Q8ðõO¨€&޽ÿæxOëPFOZ,¬.ÜI¸ ÕÔŸùc¬Œ?®Âs¹=5Å$õ–Ãøylh/ 2‡/Õhcˆ Ó L¼äà]D’‹ ‡ tC8$ÕFfÚ5ÑŠ«'I*ÈäÊ%;Æá—á÷Ãq zÁå Úßê@a%/IÔŸ9àô”!z9½™EL š=&®áUnH Kð)»Oæ%¿ÑÀ E¸ V@0 cÙ,¨¤‰º É™u€þJ½í>óQ ü€€È0¬•ئ‘yp¤"¸§Ñež?ï ØÜYÃÉÊÉ!·èŸ XÂk¯üªn{ãí,:bƒæ3?Ø©×&g¿¾W¯(ò éy¾köÎ’ã´{‡îŠ™Ÿô>º¸ðªÙj·í-³(YÐç¤!öªÊúü—ªv¾e{WºÜaY!‹ A Å­1a@5!VvNd(ú¢ƒÆ ¡Â?Ñ3Éñ(p„!„H«ë&Yø§^ Ÿr ‘\ 1vØ…xÑö€kßá§ýM?wB`ÑøÄ€ƒ8‡N °#–~­APUÿ«}]VŸƒÒÑÝÇß"…‡€x:¢‡ ^æ #BRxÄt"O~<ÒìÅÔ“ž %"XLƒðuÔƒBs3ws•ÒèRê„2âÜ:1ÏQI"©¾1Q,jD²$‡½.ë ™è8~~?E È_”<ÅÏd{æ -­í©Þ}QlDŽ´±_WN-#ô«Û©™ùuØbÓÃL¢_5ßFQ0ï`üBÀb¿=ŸïÔêxßoƒåBЩ—< ޢߋãIx×%f… F3äì€7’åo‹d &<@6|“Fž[NÎoÖè›/«‘,ŒÂÙ‰ôÒÈ P½úœ¾}i½zÇ,û.Q6Gü&WÆŽÚžO;‹¸÷ìR—EŒå¥iá)q¿cáŸ:1›¶c°†·[²x8;FíЯŸ¯½ˆz8—{p’Œ}çÈýÊÒw®žf8´M•F<¿î¡A¼mfCy„l(º˜ÒÅŽõÆ&’f„ðåSrÊG*G1»ìæ¬ú’9ÞÛÇËžëíõ«ËÏúlÿlý)ƒŠ,mÝ3!ìèâMHxâäD›Î\OÚ|ð„7<3°4ÆÈ Å HÈ$]AAdCb$íŒV/+›½Í  ÷ÿ/ç4‰5Â×÷)=Ã!5ƒ yuÑK³ôÕRI§ZþŠcHqm΃äÄ™ý7ð>ñGªð\aÏM”2z½°¤ßíNçò”2DzFŠ,çOXG?™âÓœòˆjqe¤Éÿjp2ÛÄX˜‹ñTv5f€¹ñR—j÷œ@i¹UoFmÍ C€BAMø SË~:˜­ÛÊÀÜ»”^eœoŠKŒöÇ%y‹-ñ‚ŒrAê¶¿…ÞQâKßö.1/(ÝÁ<Ã}åÑíkU‘Ünã—ûúŒg÷ èMqò÷ìSØ_”6?\e÷Ó,â­ØäS0r»ž½×!.Ë(Áñ‘N¹í2ʵ÷¥äõ?)Ñ2¸7±ð,Š^ ~ÓÖ[|vk k{w/3’Ü®ñÚ¹GärEñ:éÁÏYÈWÝ÷¬T,b;¸™-væ>£ûóTe'Ã>nFšjƒ8Û¬Ï?Ÿž£_sJhè9øxÉO ]Í\æ‡OäýfóztøÓŸîôO´òŠœ›¥ñI-âw<á7AŽÙ£h¢±‘Ÿ­îS¯í×R×R˜,°®ª¹)®Ù„Eq‹¬ßßÂ; =ê·„\‘ŠRªhÏ'~ìVñÚÉw~å>Yº?$å¸Ñi&|<ÏF>‡Q¤œOÖ¶4v­©1zÈÍöF#Ùâ×j¸ ,è5zV{®/§lÑ“ŠÙ0ÑÀμ¶Ô¸+3u•à} ='{Í¡¾¨-—÷ŠEœñ³º«çõŠ ž”»¦&‹'M.µ°;Ýeáé¬û4ׯÞôyù^%Æ÷S{÷\Þ>ûùî×Qun!hÛ®6jÌj6|åXµ|†×hê‚IÍ#$·8á&¬­³Å«ßã?l2xÝóÅNI{“¨—ÉL%E[hŽþ§BÐy Zêwoû,œÐs„ˆ^P|ê{H‡Wú>fÝ˽yoþ—y!éÞEeºXÄarÃȾGêþÅѯ¢ƒ•Õ×v»Ô3ú]”ÛP÷¦u¢N`CÕ½·ÏÑïÓݧý~Ëç\qïÒ@7™Ýân+³¡B©ñë;ÚFSjÓ†kq(–G'r’9ü¤„4 %ÓJQk‘"À蔋Åt¤O¢<˜>B€d*ÊBÃú †å¼P C§¨¡ñŠ(qzp¦d„8¼8ç^= ÐÆ²-ø¦ˆ¸IšòˆZ¶B7ž9. úåÝÖ·åZ}àCâ!=ó+HÏP¡ùô”tN9ýôd¦ãõÅéÀÛÎæq>ßôñɔд««@R(Å‘ÌÀ âæýA~Ñ€¸¤RŽð ®$ƒvc{o¥®e;6uìýéûξÔÀö/½CƒÄ¯~~Ç?ôî9‘Õ×ÜUŽSÚ}¹ûô!hÒPN‘,´cU¿ƒÔ2¬I¾2#‹XÝĘ‚ßNˆ[<€TÂÕÝIëgÑ@D5# ;‘ w¸¹¤óHB/‘C[²(¾~L_ÛÐõܽýNm£›èj{ÇGYý?j㡉Š{”ýÏG[ž ¹õ}]ýoOÛlz”ï‰Î3Ð yd7žu_sáÚdðõ>Ó õ÷pÆŸþú™ISx^ç´Í¡ ¥õ"Óå†oSªü$‡$Hü!ûŸ¹Y- â ‡A~æ Ð‚%)2O“¡¿À2!##‡­a%…{Ò? $þD=§§-ü5&ïúu¼ÏÅô¿CÍ}og–ôN yÔ÷£;h "K¡™Ø”2·Í-$ijÕЫr…&õÔ¹ †ìÓ@HkÁ€ŸÎÁIë)+ŽWõ çC.þ„ùñ µ):|"òÃÂPô‰c ã&T}BJƒ×ÆÑç,€î„ö´¤„ìfxHÿ£¡B,.½š³KÍ‚ØKœÈ|)ÇH`p\ CȘË4jCŠ  ‘€;øR†£Rú ýo†T8OÚ6öƒÑyj‡OÚ¸–<êxǼ >‚o!ó}…uü|vJ‡¸N«Üÿw…»^8R€I ’€@ø²Aî ‚D8]™lm£Ê ¶PàÕÓ­‚xZe4—}]»SG)ß?Ɉݫ1oîb2¸ÉYmTc=ó«nõVbê¡D @¾+c°ˆ)<>‚çØaÛeq{(.e«R¯ñŒKFP Á zòð0¢YB¯¾PÙh=Fƒ­gvÁH]Xœ¬3–t6?Â’h@;[“sÍ1’1üeF‡ò8à>[@ @€y(xg°¦¥QM4ÑÉ7t]Ó¸¹Ýº9Ý.å•R2—;×ûÊäõ¨gÖàM°á…üŽû/£ü9K®Ò1cÉÓâï!L® ‚ÊHð‡Ù4V”³;¬6÷ДÉÇøet§5/á‰~p/r)VXû¸¥@.ñ{ÓÅÏ!À…ň€Seà;G3W9nvWÛ/`ˆ–â“2$ Ñ À“‰t\Lªy’¢½Ý«ãgÊC­Ù”þ<ß7Ä|0¼\Ó1  K1'"÷‘éd<wÙÌñŒ1~ÕÈx¾{Ö-B5Ýb@¥á~î­™¼òÛý`0›jÎbnÊòWŽ”<éëÌNd·Å™ÁÅ´i±µLh4!`Zùìk$4aæqÎoŠN/pÀÄ©Ð%]“B â…aËŠ¶°çI-RuV¨å@µßÜÀ9kºMóõ ë@YŠ üΦhа3HQq™)öìçÁ,B j¢‹–…‡ BnyôÖÀ‚,ó 3èÔ@-ÖÇôj“rÃ^¿”yU±g‘]<¬ðŸʴUyò>Ù\ä¢÷jÓ«Ó(þi*4ígóÍ¾ÈæƒÉðF‘ëq ô˜ú½¾¯{³{ø²;¢h«!W=!…(ˆó;ç©&2YÈá›X€r¦8“y€XÜ C ;(v»E ÂûˆAã*Z¡ ¨û³YZ1 1ŽÁ*ÜWŘÉ¥“ÙÄBà@E@4Â;ð‹Î¶ßµú>jª7ü 5%_Å@ñ÷3ª’|xx̽1R=ˆØnSà°YœƒKdpaö哸êjyÑ߯iЃb°I/š#Õ+(Ó3 ,E°·°†4<}-ÿ|èI€sº«ý¹…B"¸ô†crä‘k§O<+êQÞË>+‚1é*¯²VŒv»¹Ç>¦†©:ÈH·hT®‰nÛsŸI>Z$J ¤çåÌà µéú£ö ;h¼­–¿dêý4gÀæ.óTÑóÙ€(NßYÆ4¸ª 4¡ð’:Ó\›4‹¤¸¿ÑÛïæ›ïµ3³^læÃ^|3¯=¸”㎭Y£ ‰mÑÔÝ'Þ-y®8è•@Œ!ÛÚ W~Ùqš¥ÈêNÙC¾#PqÐβ*ƒÔ¸þ˜3=OoÅèÿN5ÇË=@ƒ™ž.´cÃjݾՈÁ%§Ñ?–‹~3ø†¸CW‡.~ÚМaŠpWˆ i0±yå%Ò¶!åw3È1¯ÊÐßXeîpÖùí<üÖÅlš,ñDJ "QÇY€<‘ `ð?ŠwÊ”‚È’`fñ̤³q?_‰RËzbXñnÔéûK ëõ‡'þù-:¤z œw"«®ZÍñ@*w¸G'&OåN¦)QaJ¾iI_òZ0X>}´»§ƒÖ!ùqƒ›¹ËÏA¬¯¥“¬Mоâ*™*œPƒ0‡‹¼WPï]E±›Ž®vâ ¤#P“ª= Œ­"PëY6æN|¬¹€eðÞ`£¥;¿;R¯‡ôí+`^1ÿy5¤®å /£§£PG⤒JÇ…ÉQÉì¡ì[¶4õ AcD_;Á{Ã6Ÿ‘ÿ\èý”“I)‘ÔZÅ@ÄÇöêËcW‚n—e{a˜¢âr«ÛVRI\ÞPa¿ímŽ­Ê)pôg®¾~ ¾ ÛÄAƒ›¿–0b†ò¦“!÷<'·ààÙåß“ñ׃×y‹iÏ*õ)¥¯³ŠBK2¬:>þ‡!ü¡ÂŒ™APa„ßWWP…Ñ †p6¦ ’âj‹ÊÁŽfÞ•›žÛ¢¼OìíˆDluK5'¬ °Øƒfšá‘ªYg¶*Ýžf+ŸøîuR“*äüôÍø>É"')\Uk´@t÷ê@Ây%;p^ b²´Æ9êIÂ( Û㻾ë4ûy‚±eÑ zf0é¶òüÚÜ á\/*H¿õ'Lq"9…'?;*äp;èRXF5ËæDÙzXz ³µf ; ±„ê@ÿVòÚž²@ØÃqÊA_p.8_™4ÞX<¡ÐØèá7гêg·¢â¥_a¸2[ˆYˆlÄ,ćB[Ý~j˜¡¡?R3vUï»ÓM­Gé0Ñ„_·J–x2ÛG…ÀD¥àöpÇô¦D\‘ǰåQ!γ–¨ôßÉ®4g£ý»oÌë.¡¾®?rjÙh5ׯûÉ÷¨ˆHÔéÊ4~=3›\!ËÔS™£ðÕ†û{/oúø ’eD^Š%ÃבmX‘da aJ"Q”M’úG ,b=TòÎñ}·Ž¡±K7È¡:¬\.{¿·>I戆ΎÝÊ“bwìCz*»]Rÿ;nö¥}DÝwÆÐ>ÆÌ,z"P‘ê=üÀà¤ìŒ,Y*í4’±ê&뜋”6§0é°ó¿1_\cÂ¦Ö 6:QÑáu44êoÝðCtw1.ñH]×(¯¢tÂám,½>¶ü EGÀqæZЄóùBxC€!èƒXÑ4ݳ?ç(€iÊδàêŽÕ’0ÕÂ÷µ.Ÿ’/¥–ezͯØ&="÷û¸ ‰‚I5(SŽîcC0°‰©¶^Œà ÙJÛ#ßÖ`d÷t Åsx¼7@ÖRŽ#Zk–ƒ+nصmÜ&OËBöÚi$[ýfI¡Èl¨LÇ6ªs쯨çiÝö» ª— Éôá[¨‘ltð× l”©‚My{³ñà_DF¶°Ib̼xîLÔ¹‰µBmиS­*Æ£MZp6Q}.3mÉÔš^¯Úa.ˆCd2uÙ3¼Tݩ̈́žmìA_˜ÀÞžóûÝ8ZQ·OŠ`xÌb2.âMÊ+ZJ³B,I° (5q¨!øg¤YÄ„n 2ȵ°¯°<¬ ‘Vfo7./lI(WÇ!ïX±{=[²þ¹gUr˜˜@›‚Ï+œ ×ÅŠí—3.ó”1?ï'qÈ@ÙǘµÞ,s{}b;"µçUî {Ö±¢{kÓÍNé™#f‡åá pBCv+²XíO?7Íx›åÉßÔ#ºÃY;Âð¦füËM4š—Œ(±-n”þl±Îþv W#ô¼G›¯Ç] ¤ ¾ÃqŽ˜.—.VÝo‚kèþç«îu71R:Ö+]mR†{îºø¢GdPõhT Ž[~¯#k¤jŸ?O¹œ´äiÚZÙiš°¸šU/ëÖÆ³+Þ¤¿ôü×/à0ÚiU1¨QQ|]‹ÿIäà4OUTæ÷÷#5ŸJ§É‰:bQBØd—BPT”pâáI»‡œBuh¤¬Þ•çοøh& 2;£ ú÷9Ÿ™˜qˆXa€p(žP °!.ùbßÅ”+Äè|o/AÃÿZž¥ôÙyëCÉ¿”‡Vá`Äèý¨ø'$¹Ü˜˜U/%våé"» ÖÂvÈIÝôÛvÞ1úAè¾$øÅ»¦ñ@ÆžÌ^WZEÂŒ6ÑÒÂ{µRÉ£…èe‚¢Y~¡[z·í] Ió‹$Ž[‰ÀØðÑËÁµîBôˆŒ…o¸$ñj ™°¸Š8§òè %|ar‚‡U/tñlÄ4£Í íiž¢…O[%ûÏa'´I$v-J©ÖGaxüÇü=Um,i«Ü‚†(ç€ Ä<Ú»ªf¶ vGšï»tåò_Ýøz ¡Ëá=÷ÉN'†¹5ˆÈq¤`Ù·M™¹ˆ] ‚`»Ä•£çê…×_ Uå$"Òé´ð;“F"(R{¨Ìá÷°vÂNÏ4¿‘&‡¿qpPœ@j~H€>œAUë½{Œ<—ÃiÍ^uð×óýVHà 4¢Ï)N›¹gÿ-pRo±iÇuÃo¨ZeòÎç7ämöL÷ì!Ö´_IÞ¿ ,&ÊÁÏ0`Ï8þ"ƪì6F#pådJ@ûŒP“M÷Úª<”GÖ(ltN¸p;²‡2ý7óiýÒyg€ís¥yÏÑ ë:OøÓ»Üï«]1ëÑ?”,"q}ùùÚ«Ü9ÀÉF¸ENy…þBnÐ8I’’Æ›;™)‡—à²DÌt–9ò‚õ“$… @!Žà»S:H@G¡ÕI£„³ßŽØéõ–¾d2 Þ†]È× Ë“ Éä•åÃ…ù}Î,2Á"ëFº…”à|³ PdíûÆ^,E˜H³Jc ïøÒQ @Ðî´©*ÞXèñð“B-(„ÚŠ`ƒ(ù³²‚eHÄï©Ö0c ø9is—ĉ)Î ™B,@x!×0—ÃÆG³Ä6&ø=­3Ím–HÁ,6WÇ`èòUŒ¸äh%6 Ó™¼ý>~ܨå7ç~þš9ÎJWQV’T}O§!TTs©‘O§ÏËgí¬‘r·l!H 9"×IC~ gÓ¸³Ð8¼—‚†±«n`\o@2&“|ÿ÷¿¼JeÀ‚¸£ÃYÓ¤ÅsçºËXíÜæÞ+ÖêrÐÞi€ükKKÏ`¶>Pà<ôxkÿ×{ßYøŸÀ^ŸN7ûl/ò>àÀm5$²à ‚’üT) ÍÊÌË´$yLšt}¥®ûÝks°š¨õpü¥6â4d\ÛŠB£;¼`–¶/€‚­8uPœÜ£öþ¦xpdê¦Gãx=ATǰRo¨Ô­7_÷ýY[’mÂF!‹""@  )DYÁ½Íÿ/˜ëúàß—1ÆN-Œ)ExÌfnâÆ­¾2Ÿ†PIê%F÷ÚV1 X!*!æ\™ÁÜ QšG#d†—P P‚°Ôa<çõb–º€ÚŽ&1gR$ÕÄ{Ûc ŠMÌä6‘\––û]Éí°ï2/×'Ó8îH™yÌ~JìÈçyhQUÜ‘Ž?u[Ï㠽̧¨, U= ‡zkŠ$sªíÍœO§Ø­¢ Ëc—O Ñ­ /¡›jWÎ ä‹'ðàÚƒVP±X =¡À#¯(>c aéAÁþ}›´¶®æ$œ)|#ÕždFwttC¹@&ÀuÀ!¢b$GîÐÿ)UØÀ¯G%rÊ:=”Õ9æEg<åjbùކ€$‰1ÅÕ`ªâÎ2bÞ*£ ¾_/¶ž’7§ÏËä\Þ7/çIñvž3¿Æo0Œì£ë-ްÿt™ð¤«‘Ñ œîaT ÈE­\Ü †òÛcëŒçý#ygbïiÍ OÂ’^ŽÀá„l8"+ÜÜÁÓ @ºD}¹Œ]¬~‰?@©Ã—Ý3` öÿÙ‰šA áBò¯#û†éÆæzŸ·ß]äNKkxsµÉYIªùÔzƯM½VJEö=k^¿·ÍyëÔzè:¢I(èºÍÇ4ÁC+Á£ñÚj6ph‚ǘRdD9ˆúo‡ Cá‹, †–€¿Ø×§fCûqýóh²¥€Sâdª—ŸÉSØW_YBC«Ø8ž¡cÖ¦D9b©Le¯§%áyÃ:OûÁíäI%/VŸÑ–J»Tl® …þdž`‡Eþßz;´?¡Î _2iì>|]Ö® ñÂêÇT“²)˲¨<Ä4I¥i(øeŸ²I“UŽfòt x´¦f_˜=Ég_‚û‡3à3W… ¸ <]ùƒÒª—Œ1úÁ a,QYˆðXQ±CÇ”ôËv8jrúÍÛÓ‡TFëYÖºÌXŠázÐÔhDÇa Y¤UñDß W3daœmx›SìÙZœTŨޝ»¡ûÍÝŸ(*ˆ\ë¸ÿÓ¹ ¿%BÔZf”jà%ýõ½3õ;žó…7øî<ñ ­à)­$>7gY¤J0¡ž/BlОMÑ2ZÑã9\>~©@2¼) Þ¥‚’N> iÔ‘w¸…xí—Hû}V…T1v#Ò†ΑÈH×e~ÂÙx„2Ã;ônׯwûHŠå^Ò’S}à17Lý»7HLgZÚÔœÌ »¶ì«Bø¥pÍZÍÃÙò,®l0’ûgBȤވOò b\y¹Çö¡¹‰mÍ^Wdç½»'N:lîš·˜7Ø€¨ú·W¿§LÞÉ*mÍûHK@«:òµûWê®å¸e̺@mïN6yçÀ2¯s±ûï9€ßpþ³Ë%ü4rôÚ.Ç öAŠÑ済Àl!bçÊy8•Bä6¼RªŠi½k…!D€ü- ét¯?§°ÏEé§û˜8V…'0CݱØwÿGÁqäœN¦¶ù—r_µ©‹t %"¨O¥l?\úE"ÄHÛ( ÕÇ ¾)9cK)‹D¤ëm›RÑöÑËÖtµÂEpÝñwOÊ Eô‰~Šx€¤…)p*G‡Ój|®ÂLØ’ç¸]Ø@(QšØPš!\ÐòÆ4<¯@ãmÚx<ô,rýfJ'Ë.òÐïëF†YHv³ev4«cãýÉu¬¡ãqº‹v­s3JASŠ(FjýÆ ÜÞE½ÿ‹öú®£Ú¤p(À¹Ä~B<eŽ‹Â;ÖZÿÙ N¨Å‰>ºRÌ5yãåV^;9¦$ÕYúFÛ»oú E…Ôªrü@cãðñ`¡ÿŽ[§ ê?øóÃS¡ùªU0«ŒÜ4ÚP4 £l IŽ6"CV!ŒéhËô °VéêVש:±5T©÷'éßmGÄ»7 —Y©Ñyã‘Ê—WͶ•þDƒÿMÎY)ÞžF»—ÕÁÂEóï‘6)AjŠ‚»2ö“û‘B´I¯P ìÚ2Ra¦<>~*– K fa2bOà’‚ô¼„‰3X•§@t*ßÓÌÏù Š- 31¥ö-ϘFNuî0&q`¡âµ#¤aE¨B²Ð¶´¿=Q¡—–Y”ÞêÁÝl®âÌ dÍβe6ø@?è"€KJ¼Ð!ÌÊÓ|UŒ÷hÒ5[†ƒª.ÏVûâ*& þœÐÕ7ñM!ÑÎ ™“¥Ã<£¨ÒçÃ1Jäé3&ì4õϼC„cΗ|¼ì=‘€DÕ2îÛj$|ô&¤›.2…%­>¬nø¡£þ¹pú‡àæjü„—ÖR„…vßvµImà¬lÔ7!€TÛ†ûÙzuRsšbx<¬‰cCw.¯S!¨ËnuYeWº×K£ÒóŠk(/ýâ-'5V¼ºe MMRVËÿ”CvøP%Âç6”Õüz|bâð £ñÍ™àH ªQÌ(¸qv§ƒòÎó‘qøµ&D'äàl®Ãð=ùBæ_¨`s¤Š*Ö~JÝÞ¾7qò^Øs 0—_îÝ`rƒý-ÀY8KTÒúöE4ò  /ÂïY³n4#Øòé$ùåc’ù->Ï»¹EÏíäÊüaà!èŽ?äÇç^>HB;ŠU6foxùLÖ9úiÍ7ýÌÿ€f#Ö©’_xç,à  Ö9—6s®+4á@+ @ä$·\@SÓª5þoìâ{Ÿ$E!+óûí¢Ù½Ãz9UÌkƼ‡-è¯ñ­w§m¹¬s¼í¹IcÉoEºQ].½'o;­áמUÃQêÔº¥3òpµ¢.*% ¯—ìm :>×k¢\"żŽ+‹”úÕ2øZ¸s&6Ì_ÖªÕJ‹IYÛÛG …d«­§ñ.ÒÿJ>‚*‘]j) °õï~erÔ_“ð!r &Z‡¯Ku”icÔ´%êÇ=R°uª¦J¥ŽæÝm]J"ÅŒEC A^ŒÈ`HÓ¤§É¾%ã„ÈÚV”ÇÈHM‘É|xl{^Çîü¦|²ÙSŽˆV’Ay¹ø­À k㩆ýx RÝAMÉ,¨E„æ@¹ -j ®¨\Ó ­P²%ª‚ä³j©pÕT—2Í!:ÆS¬Ìè‘óì †CBI´ÀÆÈAT+"™˜S‚ؽøcoc^=9µÞ+ÒIfLMÈåôb°°”¶¡kšïjï:óÇo›råÍ®[ÆÅ´sx×4^Âño[ß•¹óf´_jƒ}`¨ÈæK0« Y…$§^+ł܋^Š·¸mo¨¯Fôo\·¦¢àRms›„–Hdd'H“HE ƒ0d"–ad…™/I32¹rUP I0BId$Á6’˜ E$+ ‰„!•ô4Ø6H@ÙÓ¨R0 HIìôQ•ÚI0`$ÌÃmÞÕH@†Û!$¦0a! ba%$‘ f%Ì$¹ ) vöB™„¹ BæI12K˜Yª„…é!.góÝPš¨d’ÉÆ’Ó¢Ø^˜ØI2²Iƒ$œ&@–dŠs$/aj-ZñµíU­ìÕÍljÕãkZ‚,Y q¬̲H,—°% I{6¤¹”’^ÉÉ/BÌ%$œr¤ÐÉ’ê’ ²\ÉçØBk°dI ƒ12B’@ÄÈ,!)„ºÔHÈI ‰s N &,Ôj&ó1á[ˆ2°&³™^’@›h‘’KÜi8I“@M›(¹’ö@) I!rLL)„„6D!3²#$ 4$ &`À‚Ã|äBÙè I3§&éo¶_sËv†‘$€¼²ä߆+s #Ìò,HuìŒÁ¸8³„­"Sd?žÅ6Š4ZUBT.A0‹j£ Æ$¤˜N“Œ̪§R©ˆÛUUMÑp‘)B¨pJ:©E'F¨Þ^Sl.O7¯!q'ü•"€&l×ÂLL¤Œ©–J#h±šîê\î‡ ”ÇNbîéÝÝ;»®wbDÝtÃvë¥Ç;\¸ww.w]“u×]Òî¹sœºéÓœú­¯iôÊ l`jé‰`XHé˜Ñu”¡%-fšWÓHèµIN:)Õ„,‚± X’2ôî´2d°&ôêèÈÌ—ÃÝw.îå·ILsÏ.‹»Érçµ€°‚Hà‚Ah6t™(D¤”A‚Y)–ÛD´BD€JA¢`Ș@” ,’Ë. QF Dª(È©!ª¨ÕÕTÃeH¡2 ¢*©–¤é u Ц*¥ dª•ˆ¤Y¨À™iRª’:STÊŽš‚š¢Ua’‘§R„1Ç `’ÑBHÊ!@È8%•(Z‚¤µ÷ÚYª´P,\–±ç8Þ:¼æ^Îáx¯7ò>GÌùýŸÂ+ú2|ªþ}e/à÷¢B”¸þº¤%·²íù¿z¾†í õîNÅ ×L–ÃAüsSCgûaO{˜0Ë;¢g÷&©q2Æóªø°g™rí›…º¶(uþƒªŽ GxÕ•íkT/þquJ” ›}Gû5üá(uíúU/?«}6æñãê||?‡5ïš-™X]2ïÝyŽëä¥*v.ùé‹Qgy§Úxè™üÿ™tÊ®’£™ÛkÁ¹QGi‰ÖÑšÝ+ùè¿fÏG8-Yh/µ¾nÆêfR¿#˜»ŠøÙ÷}ÿ¹^B,÷Ó/-ðôáì÷q ýªìttoò—%ßfszßûV—J°þß7åùF»óxw~Wã'”jN™³åòiØÙ‚á5ip?WknvFB;ݯøp™Åjf(4ü½¦Á¶w‹öÍJO§ZÔS½þT¤›ùüú(å='PÃìúêã6>/Eï"¿Œí:^¹º84ÿÞýn›Èí÷3½ð®Pé¢~š»Â;˜ÖžkNÆ“µy噿Ÿ Þ()“lt–&›< ÷ÊE‹ Ûà‡’Ï"¢"Ð#BHæË¼Td3ù“ÃÃÙŤÒá¹5'ú:Üæ«kÔ y ‰˜ŸÓØ3e£nïzú¿ÒŽîzús…KkœÉð:IÛé=‚ÅÇË J‡øÝÚñ´:}ëO³¾É@…KÜÞJÞʧz›Õªâhý]Í$‡ŒµÑrÜrÓs¡Ë@Õ1(¯ÊëÀ·È¢ýN­³jˆÝùîT;¨©ê«ÇÉ_®{ÍÍj~ÍPœšn¿‚cöëc^ðÁüëüuØ»çn'ÕºâËåüãõ®•³7‹w"—€‘™«:°uʧi»÷\… 6 &ø3P)ë\“2šÔNvÓ†bÝ,Ž̳ZüeÜ«¼xÿK¶Ú@Ftý>¤s÷ôë¿ß, ÞΫû,‡wíÆÛszN*= êå¾ÍÇÓÞãå0u2¶ßŽëÇ–<¼¨ìZòæÝýZŽ";q5K»mÉ«—'õÖÇÚ!2ònl+8l):ÜmË%t¯/í²ÚMµú³"¼·mhŒ?ÄnÑÎZ(·©8ëÐJm7]ÿkYŠ«O3$´·ƒÚÇç\_…©mŸÔëiwÌ;ë^eÿ"½*'{¼•ÿfã^çZRÌÞé²±ÿ ªÎÅK½]–¸^øvj¡ºÞ=ž¡ß•L/X¿|t““¤õM~ÚmÎä—›j´ÓšßR:.!ë&`ÙEt…Gýä÷ãQÆ'v$–óøÝÒÆÃ׋Ìe.<ÖË(™Zšh»ŠkKÎ÷wEyQ¾êo¢0žö²8UÞ¾¶uÛAàRÚÝcÁkÝ&÷n~γ·>…Gñ¶ŠRÜL'¼žèܼ·r'!ι[jðü9þæå§ó´i˜lE¼Î|ÿswK]žwÕÁáëWöÍý¥o>SÞY¬le+üyµØGN/f&™>ŸƒÈܺÞ1æzÍ÷ÝǧÚÌÅ+Ëþcñ«ýdØÝ?ìúcÅÙö]ˆžz÷=w(¥Y´‚‰ûú0ž´Þ½4Z'õ#Ië? !ü§ËÅÿ9Ý CŸ•c¨xð·ºWüŸjt}½í§2ë`¶—õ4Öo*e7æÕ ¾°R˜Ôêv:þ$”‹ÌùtÚWðÛþðú—uÒ|ÜDvÖ‹¹ïžz?™Õ¹XmÆÖk/woJe„Or¾ºÞÇeÏËg!âí<½ÿÒ–¾[Ò•ºæüž¥Îö9K4ûwŒÁÚQ̯ DüÆ)N.^ÛÜý¼×PqÚ&–YjÃô¼óf·é¾âPgo½û§;…$M¼òÆÒ+cÇÍn4˜×Çå×ËMÛñUå5#=¹ss½ý#¯²ø~¨«QŒÙ$+oO+iÌݾ½1¿pc*X7{:££vC?ÀÜìûÛJóþïª*;uöXõ²ù.´ê>ÓrúhÏ®Šâ{±5´üK>Û³¦UVEm³þŒ ­ÍSJc‰~„–¿Îápêv<øìô Ó}™Ýf;•ïk¢¸Ø0µ½«o?Ép«3?ƒvÖ ƒ'ÅSpµÝ>Ÿ¹¸]ÞLût/\gîÿ’¿¶ó¢•¦î¨<+2VË~ï•vÊ8Ì\'ûÝÑ_ØS#¾ ü;þ0b?n‘™ýzJ"¿#±;aîé4j~£ú·Ü½ýw&ó½mwÓsþJ ”Üx¶Š½ÃÜ5¶½]FÂU·7qõ-WÇ1Eã×Ì{¸:U-E›¬•‚ Y$¢´o»YŸm?"´påç>¤ÿÝQE¶çe2~ Z»-g 2•æ'b‚6eq—µcÐý$¶û$W™\}_!£jÐè¿È._¤:w,r^¾wýÞ]¯óýÊìVŒìÞ: ÿ£]íjªéÕlÄ¥¡¾Çÿ’i½§Ü2ˆ7ÎU2tµŒŒ>¿ÄVáÈÕxwâ£üï¼òö«þÞÍ&-exûŽÕ …•ÛŠ{‡ûªë­xÛç»Þ[Ý´#GÉQI3%\»Î×ËÝXûž¹¼ů!ºÓ7Q­ T€ˆ@H!ÄIÝv"+ä\ €˜Ž]¤]ÞyÐ ž<ñ°IHá–Ò!6ðÂi4dàà‡€Ùa‚JHàƒL&i”œÛÃi-¡ûÚûÑV$óúýø·íŽx¯Èß@pH<âŽ1ºt’S %)‚Èdd…!.«Pb¹j¢ÍñÊå±b½;±!Qˆ£cb ž§ 1C% KÎéçs»´wD§7Gtî;¸ +ºåÎsŽîîtéݺæ.Îç/a¼ó\ÝÝêxK )Â`§"” ¡Qº!Ó-Ä A$Ò…Šªi@„F¡ÑLÄ%P‰#¨ÚUÅ (JL…)ÕEÔ’AXª&©Õ:µA Ë Hñ) Ù,™Rª$Û$È)àˆ(àa¬(":P´ ('R¤0‹V Á*˜¶Û€Ç$úp0Š "I< †BeYHS 1U#¢Y"Ú×½ßË}´¶-’Æ¢(ƒbcKÙ»)Ùsas¥Ý\w;ww]8»Žætîç\uuww9tîîtèwus˜ã¹ÎÝ9×;®§]˃¸\:¸EPkÞé骜ë?d€åÅ ¬Šà‘àã#Œmæ>ÓNB’¤(?1H$È” 8šB… ;]U&Z³ƒh´(JET™‚ëɪ7…™3–ô[³äCÚ7­pv½MÆž“6©Í·ÝäVïŒÑÝmÂÔqÇ Œç1ãGgKn26Ö³\^¤Aq¾ÃU[ž7³ <<í¶kR²rwÖÖ¸ªÏOŠfå;¹u¿3[Ún­î'žQŠÜä0„(É$ôr}ûäÓ„û6´ ¹SRC×5¦@z«AùùÂhe–zS¦}Z)õ{½Ó7Û÷™Œm¡ L*ˆwrÇ«R_¾Ä ŠÇÆoP¼ˆ¯ŠÚÃn`Г@r‡¿lRp/Jƒ PÊIìÌÆfdÞü‰5ا1˜qbÑóî ¡k"^ȾûÊ@í¹twª©†3Wéb’XBË£ÇÎ9ïþ>("Pš>Áƒt! {B7ãODRXIÞštr<ˆzÿ×3¢¯þÐà¡K¬/ÁT c‘“Ékÿ}^Ìg„û/H@¬î—Þ!œ3õg˜òä½M!ÌåÛ QÝM!+ŸÂÎ1S©Š¸0]è_è`N_‰ñdÒÅf5Ýéj£­nŸ÷Ð #vº_¼†Rð%€sÆ+¾P{4â‚â_Sp6ËöHõ4›áQïL¼µÖf©lÉv÷öóöüTé-†½‘fQ-cgÒûŠ}e¦YµK%þ¹(òrDÓ#«ù”7|–Ì¡ÁóX‡ÐIÏSˆ>{{ý³—ÆoãGã˜<#ÏÑÈqôx7€ò¥ÌV“¾‚´ è“ͳçþ öð 5?#Û>oݬ8,zfÓý¡ 1TÏ1ðjÎaöò@±Ãô]?Ô8ꡃnEaÄ3IKÔʘ "è8±²»5Æx9‹¥ÙrùæîÏéXp6ŽÝåHÞ Q áz&€¯ß`ݽýö—éY»ÖYÉþì¼»ò×ÉÖŽ 8ê|·/N+ÜÓR¡KÚ'õ¿?4*~ÄYÑñõ>È‹ðB ã. ˜°R' ÀæÔbÌ%Wçz×£Dt¿“Ö Pˆl­‰ÙÏù ð¨€ð@žèw³&å¤Rn gåS5b@SUNej8Ó½§â²R$€ƒ „ÕŸ‡²n8ñFN+‚z&:qù.v‰¯ vçŸÕDØ|¸7¬˜œ“ŸT ÌR‡l@&Äš'2‘]×gÙÉNlæ‚QA ±t<^÷dÒ>+hÆDy(J©+˜Iáê>€†tÊ ÷sjlü¨ %Üñ_€C¥z©½„ïGÜ™7²”]€‡¿SV§†,?Så ‚QAH^‡>ùët<ýÆø¡Å:,…žñ™_*{îóÎjÝ6ZŸìç¹sFQòˆvðHZá‰ìÆÙ_;«8ÍbTÝÉH á†cn–ÐÕ‰"ãŽèCž!úÑ´.Ý~½W•&Â¥ÿ$¤ö\ÎÒuã2€ž<1HMŠ"«ÀÞ#öž—õà8ü±¹éXû¿m™ðÇyÞpbyj_Jº¤ƒ=Ø“:º0Ø)GY á*§ºSiózdã_ôJ„ùŠˆërÚÓƒGcÔ·­–£Ò:pAMf3—¨8žbß„ ¸à°¨˜ -óΕS ÷B—ü?ë¯ðkr MOg oö÷Ñ(%-,o”Ó,tOj® ‰W* ž!ß{ˆÂî×P^C—ÇÙV0¨v‚0à#±|‘ïµ6•B_¥¯Ì¶F8eÛT Žùt·ø¦êÀÀì‡Fßê TV‚MˆÈU~8cß-5[o˜ÐýŠN| ×¸…£:À'³5„áóL>fPñœÚ$“2,(¾ê¤«™ìÀ-ø+iǹ íDC'Poôóíöžu”äø;;´oÉ âj›¦õ6dzÐI>–ô¤O …ߤ9é}'’`)K•¦ìEbûkÈ©šÊÝüç˜ç>çÊPð°ÚÚ@~nå!'´>·#AÓ:PÝ@m¿UsÛ›ÍVBÅkì©í¡–¾sô´ýSƒñ6„£CÒlÂ?³sXdžn¤¡#ʔդMg ø9ú—Bar[n0?Žh¿˜Ü]»ûéû7D´×Î@hi›ÿˆÉ2p9|ín¢¿ªƒX"æQ7&h…\B@O "ßÝ úuA°dÅ«ùܤg§öe>ôfßþUÏ ßz.%ܾã®Ñxÿ}C÷+ÿ¯ÊÀM·P®•çÞªãäƒÒ!õâ4×¾˜ÁÇ-Œ» ¹¬;FŠEJÎ!Ùñòl¤n¿¨+ßnú¨râH±eC('ˆ(ýË1|ÌŽhi«ùÆñÄUÇXQ)J^yÐîë¼¼óh«&yÕÝ×X’êZ UJD¦Ö°ØD Ïõ½?ùZ¥ç¼ï£üî{Ë~÷çÿøß¡ø¿Ê~Ãýßg|O‹üÙöúˆÅ ¯ÁF×ïêéU¼Ôþ:ëMH?R*æˆÒ¨3ÿYØæï&ë9TŸúÒ~{ýžuÆýoԜؠ„›¨ù‡çÑáÑôd< O17¿õý”ÓË\`‹°S—-,‰TŠ0SJJ2K¦O3—SŸYJÁ•ˆÃqðêjV©V(>Ó*ÄcÕò°Â=ð]˜)J@sŇÓXˆÈ¥_©Ìs€zt\ãGý‹×º-P…ýaaÄ ß‡mKé9Rƒ¨ºy’± 8Ûï,Â5SÓ˜éxÓž3 ½ –ξpó'€˜c’ÑÖr½Æ :.ñ¾µp0~ô(&Æ“?’ÄAWî~+›ž—~E”U.œIþÅx²2Gž©Þ›;> Ž5Ù³‰êääÎ r<ž¼’Øð Y«ÄþÃ÷6>¹øx=½D,…ú³¦g;·ônïêwœó{vÿÈl¢Õ2¼›£×gÏ“fnÍü^ã”'¾yIw[ Üêô\$gör†“ †F¦ãJærã>ÏÝŽNž¿Áì_àdæè¸<˜nä—wûz—íñk2jÊßZÿË+VçÐÕ­9Ï~;WåÏÚìæt´P<ª¹æo®ó%{cïõy2¿WEž³wëª|É8:·hÊÓg ¦/œŸûÉn Ü‚‰÷KR oE{Üç÷ló ¡Î®&ñðîÅõPwz‚Ïrbu9òË5,+Ôvƒ·Å™Ifa ù£ôsíf¶Wz샜Øòbj)$ÄšIÜì4žáž©ž}=«ó^múáÛOúÿ_ÖݶÏ,žÕ<§QS %º¥"µ7ó_Ê<="G¨)ç÷{y” š¢†Ò©h M„iA\IN?öetÁË|„&ÂÖn`¾f15¢”!YÌù5mÏ Qg[-úIˆá”, Æu¥^t€Ü9ñ½áC¤ë/çÞœ¯¥çfM ‘ÑLª°ÂJ¤w&Rߪ‘N$î =o¯¸^ÇGíöD™¬9²ƒA=ЊL¨¢Bg„ÔИ!ÍYR™néh$ä¾}q Ô´ ×gÎ:Äu¢NeH¢°“¼»’|ª¸ÀÀÛ)Ëè"oÄû|¦±>‚x´1<ŽÄåêyõψ~ÔNäžu§ÄyЉïtô,m (mƒó¸¨8¹¼w%JÏA¯|_MCðDÒ2[·sozëš8ðéôʧ¯£eqà ñ°¦Ô :¢¥rÀˆPŽw ^µ¹I”þY$ÙgŒ€€Ò)cý·¾áQÛéJIŒ>'Ý(x~îü*6”T_¸âß$!6\ÏwŒô°vÙlâl˜©Í¡†(Lð $lÏíMW8òL(.m)h¤?Ü{K¯×jt žÕªtû泟Ú×ä2õã¸Z5žú ¢;z&HÀpÙ€t £¦W ÎÙÀÞ8 bölËöÌÚ¦zúœ žP–•àkÞOry¾Fäð À†´A¾uzŸ•(wáÁ0 0c-Ûß12G€$=ÆP\)ÅÎRl¤Kªð”ýêHµ “/Á$OˆfÓ@%€Ð4‘@8 â ¥¶ña°Â‘vL¦IlÙåâ舨PØhi Ó +0ÊÿpPZã”%Y Š’¦O<¶’Úºp?úÒ"åz@ÞxÊÇj“þ4^ŸÝoåA»úw3O‘€ˆ/m’EÍ\e¸¢'ƒÞ;j>(.é¦f@TL1@7€!z2Kå @“„ ÞÛüüpHÌ00ÿV€&ïÁc¶†–Áüã$‚k/Í•BM’Ë}ò58ø<ø£ÜC­Pq†t(U ¤®Hx#p$õeU3­X}CNõÂÉwÚ%ZH•¨li[gïÐ!mÄ BÁ½lÀÁ¦)h·}ä+ Ô¼Qfv=Z$Î?8´Sßñ¨„”på;éC=BXzX=£¨ó¼¹éC¨Äá…¸ó °vOo¦†²™µ¹Ké(@ý¶{”¡Aî™Î²ntóÖwØé0DA&À$H ûbñ!ËôïxETu’úŠ™wÓ=w, ë|4P¿5#ñœG®ŽbÏVOkRž3?Švȳ© "ƒ ›ß iƒÈçÕ1?0¿x=ÕaÐaeçØöØì¸Cb1 V^Ƭî,Ù·ZC 1UDb4ì„—‡ÞiÞÊ×"a€±á‹Q¹8ÅÕ/}̶b[’ÀO–xI$›j—ËÌs¹‡×Ä FýßÒãÕž›Ñxc–´ÁBU|°C{½0EðCÂ!ý_(|¼e'¹;PɧuY<¿= ¬4X²ºæx8^ÏÓ”Ù¥}FŽåEþŽBè*ˆ)gv[¿ÀÂy<búˆhÓƒ[³(1{ÛŒ»œÑñrK‡i‡|$<ØP:!ŸÀräVo/‡º‚[kü  Íˆ*‰k¥KF«ñ@ m¸x“´\õêofŽ€sƒØq㎃ÁIn,)eJU,DD›ós±Cý7m>‡É#µ>Ktæå~ àb×Á«­+…!{ùsƒŒ’a'ŒI'ÍʾÔ÷!¼„H-%#~ã `n|I ýS…>;­ÂÌ>ð3¥e E•Ù^×/ñö©„Ö5³u®«éi;G–«ÊùúzB±ÂÜlOI“›õÇdü±Kýßœ¿'ß÷Ä»Cþk!õcÿкºŒj8?Ìþ2¿ý|¥Eëà>EïCŒÊ¯cÑYæ?¼6·–iî±a Ø 2(û©„Tú!§'‹[ñ¤ü_"üVÊån]P ñS}µq ˆBäcò¥$ „5L1½ãË7¼ÀÓãP™¤ÄçxÍ4] M°¿ÊQÙfs§I‘æ”+25{£k­úŠIkCòi>ƒ©ý•|d°>ÝwŒCªuG7)’—Ræàr.RRIíR)sVPÚ*j ²$«Ú”žÏ“FÚ­·u±“í|ÒéÞ'o¯+>KûäÙÁ Óˆ\«÷/JmÕøøýw»Np°²\v¿ñÄxçã[ÎX-´äiꯊ V‹9ô8)ÀAÐwùyŒ9ü8çÛ{{x¨ ža@Ý[@"ÒP$ñÙ¢óÕå÷šdXÞêGÛè8=b¯-~b(=TyMZ8Ý:é‚®w8v(I•µ÷ã_ÒW›á7ÅÄŽ´SD.º« !Üt\êYPöíîT›ZbõñGx‡Á¾‘0y¨^ ¡²„«ô­¶«×kâŠø·\ÂNé®GHJZ¡J¦QQi•Qjªª¿ú]o½ÿ“]öÖ|M©Ê Œ !εȉӫFâ1§ÚÂЧŒ)Ä„$I‰öÞ? ôù—Ù’ä9¥‘ÃõŒÝó(Mw·ö> cš›äõ‘CðÈYO yZâܷĹî85;ò½©ÿ˜ò°(ï3ôòKÈÁÁטÑ;ƒ°ÝøËE¹V©åSJ2û½ì)Ú+‡×móQx“ fü%͈,mº¼ƒ %å­555jŽf/”¡ ;6èÐø?}ï}×ÈWÁ¦‰Є£Mˆ«IýX»ãð‡õŸZ;Õ'Kó›ùî>ŸW±huwåü‹¼Žúzoq¯ý_tÌM(„›I ×-®m,Ú¹L6-ܤÜ蓘!G¯Óg“Kˆà3°ãó=ëÔD™WqÛ¹Ã! 1|"ðßÓ8}\윢îÌõ©8ÂÃWÓé¥ÿBË·é>¡#,1kÄ"D(uÄLö£¦h=K$'¥:ÌòÚmÌ:Ì>ò¨·˜È_DZägšüù„¯íiC¨Ä\öµ#?.ß%ÂÎêé¨+ ¾ôÚd–‰ò½ÔG@,â·áÎÃCôp»?ùKÈqÆt¡G¾A—øó€l·¿Ó$„/†§~ag8EßwÊö0we‡&È$ºÑnœu’vaüº™€ Š¡jP¾¢+>øºïqvC Š4 óC9+Ǧ»ð\¹\ÊL¢3½ùí á:Þ#éRÐmùC›Öª]9Å„eëî@ Dvw|»I—`A–ÛMì­Vø0#{ŽqϪµ}uó©–íÝz=J«>]p˜l¬Æu÷V2ºQGî$ke¨ïB(äy³{9lêPFó¿í{ʽü ©j~ ÷¸êaŸÀ©8/Yq>Éžwÿeô\ø÷$O,Ú2«b°l4 ͉ B™×?‘øKºŽpœæ PÅQJÖì,é»Ã&-C´Ä–daPv›Ÿ‘²Cyœ0NiÊè>}¦nÀòt‚"#.°¤â‹“Ù\CIdXÌ`ŸâP ™36(foDZ—nãuì?ìv艹>òœq(ÂMSúžÃÆ„µ¿P”åT¤/¢ÂôÉaåŠPË ˆù^JJ¥þ@YüÄ÷4~%+·_q1ÁBâ=ƒ%ñqÊ.ÔAtI]Êú}¬Òø†.èB‚®'þ@@Jý«t\ç(_ö̾± ÏW<üâêhµ¬µ‹PÒ>´à3éA“SïsF" ê øGBùä»MšöÞà Œ¡)‹(m¤ýS€3=R¾eôÆo/àx¢ Bíeñ«Â}ÿZ¸^ÓÁˆþSízšèM4Æt7”qʹûº” ØþÎÝÏŸ`6ûÒÚ)•Àuñäˆy„3ñà{'8mçn·'>–ÓJP;«90¶55p ®ÌËr?Å]J…>ŸÚ©Üuòï:?qærݤöå(üiäGØyÿdw‚†íª:n=ýú.–”Ô±rq‘  Ó=Z›<þ1ð0f~“ÿ›uÓ€êñœ×¾ž ]§£òi¡N+·)IÖÚ6?VˆD.âm2¿B‚¸NŠðÑúv?4O)±Ô'Heš·:v—åqÄB¢[ÔþÀÙ; .¥·#Iá‚aû6´g\·³ùÝ"_w¾ó†añÛ²aÑâj»^ë©ù}{¾íh¶}}Ýݯg¶¥¢YüÀ±êÎw?‰RŽÙç&÷›»6—ä`§q«•Ñí|›ß4uîWù£ÌLqöð‰™*©­;‰çþÑì-Óø>:û¤i|ž³ß·õæð£s²¡ž¡Ç¦H¬pÅ¢Îrö‹ŸFÑáOðªè¼YÕ,šûow˜¹µq›€ôS ¯OìyGå̵־Hºö+QÕtôËûë\¬Ç¿œµwœ¦ð¯Àð˜mßd¼>•eŠEö¶zÕ±öp[ï¥û´'A™ÅŸ÷¹‡ñGSÛ[ä¿G­ •+.OÃIw‘ËÝìܹ¹¾oC,Þí²}Þpvô\>#„4ŵÖßG”[bóÿ2·mn‰¹…ÒC„»O mS‘Ú"Ü÷²2úKv¾ÚD—¬ýšäÒ°ô?oß>M6§ç:œXl»’ÂnŸû“‘Íðœ}¿ Rçëµy©•²—Ñ‚Ã犣u‰@tW×™ÚÌEò×¼KWÿì+Ì/Ô/­=÷š–_œº%ì.•ýðÔ?özÇÅ ˜ ¨jùñ=´–ß]ýœ l‹Áä7†’å…ÒÓw<\ |åŸÃGl÷ã¾óé´]\—ÊC™=0²·M-›|œ‚þÇöLALu€'?ŽRaˆSZ9 }ã¡i(mÄ 3åˆ0Ž4šÖf²¬ÔÞimä Ø¥á À¯‹è„ÃùlƒFRp²Ï¿n¿V'4t!“C{0|¾1&ï<>V¶LgXHHû[“š™NÆŸ/ùƒ_*Eó7ŽET}…Dx#ÍC~‚ò¡Ö=®wß Ç%Æ‹BÚ¡emh5xGZæi(bŶåpgúÛÄ ¦ÇoI¼[ľ’^`òU b!í·7p­.Âmz³éz‹¾O¬ éÞ‡³ï!vÙÄ=Û†z-9ƒ ›ýüx›šLµâÿ)’ƒÂäDø ÈŽûPÞ‚éHª²^¥ñA.ßÀÏs%ÍŒ¯YbÿËØó?~¿?±`Ûþ¾—}ñ²Þwͯwë:ž{:Bƒøžßóöm3?±Ã(wCº$’šÈ·íû½eÌ bÝ9@ŠÃz<©~Â(ÿKÓøbÒm†ÄLRƒ¯5|B€@ÿŒ#CnÝ+“êáã˜f”>JýÇøŸNñ1q§÷˜æÇœdj¡;»s‚¸ôlŽtô¬„! eÄ ÑÑÑŠ(OlêÁëHøk_©©ß—[€…SÎø»^ºkÕ=;ì`r…Œ@ü3»Î—P¶{ÁÁÎOTÚºKR?JŽqÖNMëNk 0›1…Û›­‘Í`ñ#Dri„¦ÞŸ ÿ™–Gºï§(˜ Jzò׋ IÖ딸5»Ã@åˆIa¹º«éÞÕˆRp ìOɉApexÚŒŸ €ÀÚMå #ýh+LE0™·²‘¥0¡Ë:ãC´—ra/œK„$Î%ÁZVc‡ñ±â÷Y¶²J(=Ç.‘ü¯=£$¿ï{Nð{ùG†œ8HX2„ ½ j€H¹Þ(9”¨Â Ü7:<ÀÃ.çú1LRͺUÎxNKY¥Ãõ•·›:Ûçóž:«þÿ·ËúaÎ DMW‡Í©Ý¾eüœfõ  PîTiÎÔï8d3è $ ¿wcyå‘Ñžcï§–BßtRdD_ͨØ)úß霑õI7%CûŽFN¨iÊNgCð¶SÅ|aT À›b[:Ч&ÙH½%ˆ06 ~ í8´ ×?Ðb9ˆ†.Ǻ`ªÃ?8Ä j,4¸(ä‰È?# †6Hg¾ºË‰ ôJ$„'ªŽW†ÈÈW}j)=¼F×úÿ<\>W=±_~ù[Z¡º%%8sÕò ¹ß©$ÏG0éOyz§‘J’bVDÓÈ¿ïbÙ4=Šê½/«JÆ‘¤®ðƒ®¯b˜T¾\”flö[o“_ô|]Hœ+e!HB!"‘ °Žd ÈÀµù T?£M¿Ð§¥ DDíK¯€+ÛÙ;m3ðf·±¿ü œî3¸µ¤îr¶–l›Ó(élQ*#œüª;ŸF4"HöEC&K|Ò—”žÀ-F+Ií!¡$¶2…ÆŸaFÅS'°Øzivsü^l*GßIqó5ºø¡f1M‡| ÛHP˜JšÓÉoˆÕÙ«%5¯ŒÔ’I=9"ù€„TÆ”¹ØÒépå'´HáP¾|·†Òþ–LË´9:~IäRà”7}Û 2îÐÕ!1/©Ï<Œq€”Å rû™{’~SÁHPÊ<ïÙá6(º>=ž+íë¶âÓÇF@•I)9‘*ö‡AFÿZc¦xe÷s¸ÙN}mÛ %Å“¾³ö§ÏKoK%7 `áO ‡Á1•†[ˆ<|ªäpƒO q*œÊ¥•# ÑžÔ²‚¢½¦‰%¸H±ž³°{Ã+%”2°X5d…è…‘4W$35-fٲʹHæ5­iÍ{·†Eo°ÄÍe‹ômhXXþk9Z›]¤‹¯âsýÑ€ùK]Ão“zì<ƒ`ÇÅ¥ÜõK¯ÂÂë?M&ûqÚ½ÏZå›p~ª²ž}}rÊ'±£ü»éùSÙÁ‡@œ9¿·0†„ Ÿž­ÕŽŸìk¤úã´æþµ5¨¼ÑyucÇåÀWD“ õ¨môBå×ÑSÅ Ç­6´AÔw¼²Î²s ⽘—MMFPõ.þ&½Õ°n&mò“Ð qjfÙ@~1ŠÖ)Sû%ïᯚڜFÂQ•|A8z ! È‹!kaæiåg¼éßž¨Á…Â.Ï£òÑi*¤²È€ðI÷éñ Ô÷‡³Äucrc/òð×<æ> ŸcØ“p9FÙŠÈò„Š,¨>îZjíbÄŸ[šÛ6ˆ?‰|8¿µ/«_æ_75ÒêxMÉŠPÙcmZûw´ÈÉ =ya>†‰kx6YD¾1«hâøîO!.ÅóVTØžù‚,A²‡¼`1 B îî¼ùÚO ö7°2Ü¥ò4a, %Œ1Œ2d«žw-2NU|¢”áüKBûû:…3†‡‚L¯~ö©írHtŒï•.¸â‚FRå¶Û Icó?qþï¾ýN±€jfÇ'×ô>³gzÿÑßnbpÏ—×ø\Y¿£{G?¬o" ŒU"à’N l‘÷ÐöýÍ"FVTid%b•Jg HS€§žqÅ,ZŠ’&?Z4I”‹è4ŒuTÝõÔ£Ä/_–—®Q¿‚N ‚’Ô®ö¯è‘d7ýˆ|HÀRAëJ¸úOÉáÖ‘¦-°ü»e»‡e÷*xÚ¥JÎÆj½=x©ƒ`|öžÌÚhA P « _¿{99SóbkÍó‘ê4à^†ãÖeàŠIm¯ð)Ig“üÎŒ–TZ§>oSãèÉGJxÉ@´®ÒW§$bpûÖìZ-ð6Í¥ÖzÄ>Sëzó­óà4Xêè° ŒC_PY¡î“ú$\f+0†tCVü¼ ×|öàº1ËzJéS[ãuü£ŠS cœ4[¹÷¦ ¾kÇW,{LþRQ~{ƒ"â6¤˜%voPIž dð2âsh 0$æ© ”íg`hç{»Dk³ùí1Ïkº.¹DóJtèöÅ“SGca@QØ F× buô¥e¤k1Í’yñ•#ú¹Å†L¢½—3ú£õI«_Û3ï%7YK„dL¬ã–3¼ȵœN‘È(<4C™R€# À§ 8óý¾A·Â¹fÁ¡*]ÆŠV„µBî;0œ/[J_@å;Ë=ƒhÆ%± çZp GΩCÅ€cÞz8×_­lC 'ÖÔ‘ˆº£(L%— Nþbs·E—¿¿.kÏœnXà×±•g ;mY€1±Eø ¾dͰ2æºâFÛ8n[& €¹(“ƒüsç êsåÒÿþS¬‡mŠÑê»cîØo{U¼*ßâT²]ñb »’æPèˆ;gþF Á h„Žc X¬Í£À^Ì9®¿¹T‚®#ê×¹’Á/€•¬Pr® ]Ϋué:éM3Õ>޹J¾³O—z+‚@RÍ>ÖÞ-¿E¨í8úÈä¼/II –c ¹äæÌœÑª/W V1H”e_»ö â±$ªëÝXœðŽìW(•{6ÁÛfï@ïoÀ$ĤД?Ò«}®±ˆ3¢¯é@$ð¦’_.XNÕ6}NÀɈ tAåkTh¥¢ÕV«_ÅRà=¯Íôý³É{LŸ"üšYxÍõ¯KEÒ‰æ6SÛéç—ÖyÿŸ_ø~)©}Æ›5W—¥4dKš¤†ùñÉŽ{å¶l@ø=¨B¨µcî› í/_XPÙÝ”5“œïœê&ö¢vç\„=¢¯WÈæ/cÅK~(H §‚bOõôÌÓ‡ûVyàðiD<úÏióÏ-k—’<7ÄQS;F+ˆ£ÐãéÚw?{^>¾Ù\Cç‹Þ˜å@{ü¨ Ÿuî Xf3®˜¡ìuÑä<1íÇÂMª·î‰o(wBµˆ$é}K$ö!¯]±k³7 C”“Sû†gMàsÒtþø •ÿµÖÓ6÷Œ”»ÖÆÂó¯¨8kÆ:5VÃFrfÒ_žhI›‘X®’pïäùG˜.Ã~.:™*†½a3wB–í¸[\¤ß£¢úÄD€Ûõ(i;Þófž{>,S¶üb’@[˜E1ÍN~š8ýÜv5.FÞœ3Wÿ¼_I±P· ågº†LÏ4™(ÃõI”ö¶ý‘ÿujÄú”ß"Çäô’ó©Ö}¹yËìb þ+w¦–©@O¨’eõÓíä!Nó[Ø ¥À (  ä¥?œ–ÎRF¯s“]RƒÓ­xÍ7žƒ4è@ɨÓá~磒³3êYC©—=àT¬Áœ¦=L¬^èÇ´$È6üzn­MïÎǪÙ$p‘¸ü)“ŸJ”³`Ö6”œøPrLßâã7u¡_ÙdÑüäurÇ$³çiã Í©l„‘û›À1xñ£¶L;Í1¬Yiî ¶,3$hÊ_t“åWíñhYî·æ˜¹!]5`B…­z±?©i¤â®yk&ÓÓðls;rTô|NzJq¦ç¶™Æ|Ìcž3þh<œÍ¿Šj~'¡•ý»¦öýåoú6¯£}3Ã<êÓÂC÷U‹õ‘hÌW»zdz¬§›€«a‹_±ìkm‡õu=Û^“Á™{`yFÜ6quøñ»–êo£hÖðo>še¬Xí\ã+mú•éÞïmùöªDëå!Ù¯1)ò8{f3±Ÿ{ßÄ?gLÚ8 :MÞŠÛ²yuòÅþT«,R8)Z-µ]à ƛ&q”! $ð&±‚j/‰ß“#÷ØâqýPæð:4ºÁI! ‰weÁb9"[6_šÉ‰œËXã¦Ã–0&odìèpCK^8EB˜ðÿ[ɱ¨¡ó©00ÄŽ‚vëj}ªþþ%hÂoïM )]€šqJótÖ—OãˆeDŸ-ÖÊpìS‰I²¼†µ1ÕA™(cÇjÆ-L²¢@ü‹`Ëó% °jYdíð xô€ÞáekÑëàÑo<ÙµlZi [tùxní¡ ‘gÊÃÁ”±85â E¦_G¡3ñ:‚‚½sa‰u¨»5@“¼8pzÓìýv`#78»ö{;œUï´>þ&çóí0ñ>ž·ùdºÁ!wâÜ %Á-ЄHµ¥¸C¥ !Žžn2{ÃËÓHüa ( b€O™Ÿ8`Lþä½ ‡hæÌ‘á!&>aò ;vuÆ))L,_Æzb$›²Ð‡¿? &s}ª!$Äg(N²8Ò’èXdz×èç³è)=t…Ð÷—?í3õ•4áÓÞhj%_y7RPýÑO#a>ï¥É¶qÌIѹóÀíÑœ Pü‰?‚zª“Ïß.hlšßñô'ØÊŽhÑm^*°„h‡8A+¾Uáœéƒ)‚ÔHÝ@W]„MéÝM1kGÈ;‘zþ1ý^ðàx^ЙXAš>V¸à º=ù"ø°Sr_Púo¤1—% ž‹¦¼p ÞkÿeÌØ—'N\7⃔”ogžó)Ùƒƒt^ͺ«jMÊ’ÙíޝÓöBéŸéÝö_%Ñmq4êÉø>M))t;R—Ö4Ç%¸1Bа‚%’‡l|A xÎ0Ú ˆÞ‹Ô¾äÀ áðçO˜BÅ 5pK0)D3£ÚŽ.5 Hý·-0Òk@ ¡íÒ]ÈW9ÈQËêZùÏj`oıc"c¼cœÚŠè…X†ìRÇ’ †+Ó [çÈŒt(^A@=¢MÈþh[Ì€iû/ùÜ-{ѧ½oÙ’³Øµs缡&$Ô€Ò(á ¢v<åC=¡ ð$Öt¯8 x‡Í'šU.Ä/«Å3:-x—™.0ƒ˜æ»™ÏR±Àƒ¢ l¹­Bà?ð€u„8êwÞš6ž_áÞ,›Æh×|üD¬ «7YI«öçÅãfV®yk‹—½Âç/Áv½0€¶;<¾¡ „¸Y_‘Mk,x¹ãÀCE#÷N‰ÌÊ×L­¨‚V¨}O<ï®Ë^Œ+"¤é|ø0IÑNÈÑ6Õ} —PÞçŽíJz!VÌ“@òõ œPÃäO ÑBºÒùÎ,ñJ-Œ¦Z)ïÊq8ÜÃöaÞBxü< –ï"õ½’ûL^G¿èC„–{?aŠH¡`FæŠÝd 0å²Ç¡H’ÆLQe¶´<>{xñA¥:BHÃJm@ÀÝŒGX49L÷ÑË”{’=êÆoY¢‰³}´îܪP¹“3x©Ñâ$c:Û ˆ"¹º¦ ¿6ÁØ"wkTl•9CÚú[ý휴3ä‚§¸ç7ʆoÕXw¯9Ô(+«3F.ôe'Ïve%L{Ú¢Iý!SÄœî¾1´BL!ÛºQ¢Âb "ø>•½7ž¶ËYùØÐü*Qäöû…ZOÌbGýS8º¡HS¿]K‚å¬ÏQµ(èû¨NW[ßG¾fˆxØt¬i¿|> 7ë÷=JäãêV^ÑZ¹›ÜòêTáçÞïØ=ÒžùݯàîWAþ~e®xÊ~®ãNòÊäAtÜ5exxsˆ<±¦Ð½œ`õº"=¿úô£ ùÈxÊ–²œÃ4²¿î8Õ[?À‹ï%yýëM ‰ò“ C{X±-þ.âNó½nvŸ²Þ`ÇSŒ UÅ&rÑMvƒÅ*ôü× óŒµäzÝ‚Q¤I^îŠã~—Úµ©hŸësФ\Eá|·)£ƒW¯-³4¸ŽG´QrD¥F]„¶òæ¬ØvÍuR ©(dƇ½„cok–Íä)ªO‘…~ÍÕHxŸ FÁ^“Ûü±ªêÔû¡‹@‰ÃŠE<‡€ȱÊ3Ž;^|Äk&uZ} k±Í A…Ó¨Ã>•“Ñ#M¦ö…4àò4 ‰ÅÏä çÌ ¶“ƒUÀ¯‘Ô(òà1 íÒÿŠ^Š}J÷¦¬BxÆÍè ¥¬Çšßp²dÀ‡îÇþ‡êÍ"ž  ?>¬Èxý6‚ŒFOS²PÙ5ÿO}ÒùYSsÒ7,}Ýb…øæÅõp¹±¿u™7ÖVÔÁÅåðË¥5~ð¶FIÎ~%"µ: Vô»Y–30G´=¿2+»fH²_~¢!0ý–ìõ'#ÄŸ÷\²É' K1Ð ¥mJÂpWßžŒ¶güC3©ðzÎ /ÙýPêòýŒfßµ›Â€ª!ºD²»£â“ z{ßá«ÝÓAî˜È8ýÖ|߯3@›ÌºÂÔ$¤d„™Eæ"/)ì(CˆN­}ᕾ…ÿ•Ñ1ûùWz ©v^l:䔃›¶í«-ú…^ˆ17.›Ñ‹g…9È}#WÔáQœâL—ñÌCáæk0$ic od˜p*ˆŒ–WÆ`p¦çi…ú«o¿»:Œ “…$¢‚U€¸tºûWãÃr ÕK¾~qê–­ëvG ×ÿY{ÒÂcÿrª}ØîFàH/|Ò…E‡}£ „‰Î›P ‚Ü õÅ´1‚ ¬¡)€˜tˆ[rÜ8»j:umÍo=øÜèݱlÎf1½Ð‹Üd¹«uêjU+Ý3æ{Þt øa0˜ zÓ.„œ”Íš[þÌÒþ õ•çÛg–´¢?áRV.öÙüÿ¼ŠSä¾ÊíDcªÇK&"yiyoàÕèòU¢i)ëNFÄi˯ƒ­U'úÚÌÁD[ìîö¿2yãïú˜Ì‹Åu,·o}Ë›Ùk;’“i!-:hPóhÌˇåÞûûp–f ±$kÞB”àãÚÆj޹™.üC®;qcíÚŒî>öò–IÛìjbt÷Ý*çT–^áøÕç'¿0sŽûl þ³’1·X‰ 7n¶rµ«ÖÉ«rùþ­ú™:_|š§äs_bQÒé>l€„˜v® ~䔑×ìÒšEžaaÃÔP¤š“¼”8Ýf€Ë‰ótfÐÛÌAwÛ‘Au}üÀQÇyÊ~±¬Ä@yKW7º5©8…Ä­"ê¡-#üåWc°<Š8:ëô ÏŽ\«ÊéFSÊPø 9¿2i5}Ž]¹« os^Ùa¥©ý¨Uø¹@ÌLîhyÙ5\cƒ+˜ÂÖÜdãNI«ÃÁ/ñ(H„ÍÍý¤€O6õ H°‘ÛÉ®ÛÃ4¦eܸŒüååf2iËmÿr ÙØkÛd_–Ú_á;/îiûOyðgg¤7}¶ç9ŸëxlÞ±žï} ߢèÜ·¼ÕK(wôþ'²³ƒ`¡¤ik„k›ÀÑnìt[XI®ïžÓQ”Ö>×Ue毒ÜrK;èÅÞÂÉe±Zó½%ÖóAÖ–¾¨ŽÊl%ѽSOÝð1–¨AAS›²òϼ<ש¶>Çg9a2zv5_ï¨þÔ–E<¼ý³>aJWÔ1Lé0¿èzL÷k6¢q-m?ôÀ›rrI[@î¿zÙÖÕé¾Â>K5=Ù·æw7W÷© Ad ^ÛÀÃòùÒ}[5¿Š–¥µÃÅZ|ÿ§û3âüʉ±§á3ðŠŠ®þÏ5©°ï+ò™‡úüŽ{ôëÉz‹œ„ú"$öûäp'2(¾UWŠõƒ€ÝÓØV~ûxÝyÃò¬XøÿYæ"Ôø™IATô ~rOÐ4¸‰äñæ„<ãé»_ç!ãd1ô_kð9IG¸«ÁQ‹ P‡ôRÇx%à€³×ÿ+ºÌ³{VDsycéÙF9$óúÿÏÍ%§ý‘MW)9€!>­§4xE—¦þ~ÇkÑÜ¢#ÿˆ©(õF$Í@ „õ’Á>%çw.h´Gᕼs)ÿÓe¡@$Ä#X”CÞ$óojÞgzvÎJjœ“TÅSß5=0¢ŸE ^½)~̲¾ÒðJ…íü¼íö9TÈ®µ©˜Ä¸{f-°XÜzg[iÑ· ´=T‘Çfq†Á8‘Dø¦éòž 6ú´ý!ñÕ#xD“¹t—|IûÇb»ß!f;ÑöDÕ~xgZ\°“m¦(%%ö¥?0—i3õAEAþLÞ´”$Àu‘eKt(x#Ú–ñ­Â­Ôsk9d¤«n 1 )| †‚§×–»&‰v>CãÖRŽù¢VhÞcLþ B×'˜ ú‚‘ìù¤n¥;|]Î^#–‘8$IÁ(jD #¯´Å˜§H¸Ó—ÄÁœ*iƒpbô…|(å«w-^vo½¯ˆ”9I=ÉI!À\9üÔtÐEu¿·;Ç áA«H‡šË„­1ÀÔmkßù_kÛÒª1‡iÓ „×Ìxõ÷€lüOòsnK«ÃƒØ[¬ùþ·ÌüH×ê½{OgkM­fÔж…‘DEDËþ¾Oú¾6÷üâô=?ßñ¿ñðkœîžóß~µïÃïÊ`õZê}Ý­3 ܽ?¸ÆaÔô*a²‡QDµ+([»‰ª)“¼Œ©oR¸6(…7%ƒdô,Vf>.¹œô¨mÎÍħÈé”I+†ÿi¾Ãß5î:è£/™’oðöó~É:ÜêÛôßår:/¾ßô~çþ¼ÇÆ9]~Hä,Œ«ZÍ*”ZÄ!Ãäùkr–q0êÂn›™£K1˪¨åÿÅõŸ;ÜÇÑãØî½ŸÎþ8û•cÊ“&·¥QêEL¸}ÉÀak(ug÷" P”$)­›¸'í2/ÆÜäÒWšOa¥ º ”B4CŒ¤Û DFóÅ`vLœ œìÜ¢¡¨ì¡SèyÊF’rÙ Ã~$¿Uí ˜Ï=º9%¬J"L.Ntè€ôuÁ£†â Â5"ÚÖ%5s]’Bx¼ ŒD¡€ PVÇÛ‡Ø1í¼D|ËlîvËïg-[7‘ …”¬ürÁ{î[±}—î¢89+e ¡;Ø|ç¹éÊ€‘]¢É¸¤Çƒ×3]q’A.:»ôºuŸPáëF ⌓$”‰á·@JžS1>¹†HTG¼”YÓºÛ8út¹RýžJO·]5àèA:ÌãT[ÆRˆ»ùŽÕ§zdz³çñ¬½¦ÔÚ9(Hå›I¨»Ô b}F9 ŸÐe¤ÏB Â\¡AgAobwä~Š#h ÂÍIñøï;UÞ¤I{ 3áÈ篱 r¦e·'ž §1CÆ2„·Ã”‰,‹ô;V¥>Û‹ý.z¥kçzGˆÂmUé%šÕ–ú¥kí-Ƚ~alÆñøxJóg†ë»ôQ‡<„ѵWþ`/noÃÆ²àe1…ñiiÍÉü †·ÄÏUÇ-Šf}GVf‡v憤\†I]ÄÁ®’/°qž„¦sxŒ¹iì6”‘§˜Ï‡À¬¦ŠD¹õåZ™X$š"|vYs:¢“.˜V„!¡JÚ9A#O»·åZAÀL[0SÙþçŽ)2„à’;nSû+ØGÇ$¾’|]5‚bXkI–/ÄC?Ü(ßvv§δÚiNó¬VͪšgòA$àiŠ»Ø- 2¾ÀÚDê)µÖN=ú5Õû:ÂISr@f”|ùãh@¢ŒUîÏœ=° ßäŠS¡ž‹(X œ[̯ôù ä:w3(íÓ0u·w­Ë×óZˆýÕèë_-²qzN0Šàu|fY:I¦ „u[{i­¤Tëò^#¿á “j¸peJ U¶¼'²Ä@Å*ýú†’'í_ÛÓu?Þ‚R ù`І³J¥ I™»²`b°p@$’II`Áš¦Ö[ZªªÉEKFY°4ÑOèw«?Êû© ‹WTî⟊?éý TÉzÓ#½–„Be½Ž;ˆxÀ²Å¾ õ ûhX¿Ó?ý(7hï|TÝ¢yäÞ¢¦xBžk¶ Œäùm›E¨¥¦µ°ØLO´+¥E&ÒèNÖ#”ãÂhæ?YýR¦ûêª@ Z;ïœ&F@l~ &¸¼»ô õymâù×tTx08€Dº9!g‘aá7£ÉçS8ˆYHVþ®Í¦üX‚űÙ‘ÌYc'¦“z™Œà|ßn‰/©#È_AaO¤©òØÞ– ;ÕÉnÇâ­èd#¯~T4—2;î “ÅŸA•Uû¶“rNa ógt¤8>Ô#R”“Ä.ª?cÐòÄþù¬q£¿‚ÁJØÈlÔì°%AÕ*½=ôª ´bÉÂ~r@òØv+Š»Œ¼ Ä×çŸÍ#0\ ƒ«A†ƒhéYmÐoÓŠ%Iè¶ëjþ‹¹ËK]¼S꾑@'<TÆ9ä¤îç õC×bl­‚Ά™I&…kÇÃÕÁ×¼Vá î sß-£Š5|Ž=ûÂd¾¢«Ï;q•û¶¥fz ö£’ã±-¿>±Ðõl­28³Ñk>—~cس5´–{d 0 DLøÐ9Øy³œÇhSè=)¸ÆóÙªŠË!#¹2 ã~!¬' bi ÛF[AU½q ذÃûRq©=UIne­*[°oÍ1 ‚,^³C÷¥»|®Ja[t…û(xÎÇ7;ÞV£çæ(oû¿¾rW?Ú˜!×;’¦]DÙCÚ+Œžè¤{oâ ¥¿Šô>3ƒï¤güiõ”z$˜¼€%E§ëŒ ‚x4Æ×îß%N›à×ÿÅ þßlÀìØe~âZhu¬ùú”x½œ·c߬ìgy–?§n´Þ¡ëæwЦœ0¨ì&U¬§F(I+7²"Ñßß½3¤çDÐG ¹,³e¬÷ Çk¿gœiFšŽEÎÊ~Ò¶ ’fÁÉóz¬¤\¸Ã‚.gÛ„£j3°‡dièñ&îª÷Í’7+ÍcÚïÇ¥KuÑ)¥Ü{.¬¦ö[;‹.‰¸MÖpDήá}9N^g¼ºÇwÃÒÒШ­íü3L?æžœþcBůwüß³ë¥ÓëÑò£æÐj[²éJÖ‹¿Í†¢°ó`Ú¢)<äÑÎsU— "UÍȱŒ~é?÷ý‡ËýÔ~ƒì¼üwFE €èé©ã|º[µþ/úáúZZ¢D…@R@H"#*ÍZ ETµB²)Ç»×aÐ8°ä-uøÈÄÉO¬é‹Ô­©}Âl}¸àòúØB/mý¯™n<$4ícW Å@òÄ>­A¢KaØ*¬Æ Z!\lªSKÖ£Ð+îÜ\†@Wó³¦<¶œs@­'¤­0¨VÞ'þæX0q¥ìV± Ü“!^¬ès1ØlƒǃÜM>o³•奦ˆ qëѱ.·’˜A©Ã*e¡¶Å—vù ír70›©0=]"­µÏ"Ôy¦cêFäÜ»W`«“èvßԑ倡šW• ]ÜÇ?ÃßfƒÛ¶G¦óçŠQ㬤®WÝ)g™@±Â¶óWHW#þʹãœ-4i‘Šˆ3y¼WÌôcŽºI¾¯Ã|(‘t±“ fÔßá'lUrYÆðv=32C$‰¢dn*cÿ TxNçKã§ÈÆ+y:PŸJ dKé’‡¡•E>/{ý`‘C D„Fp#º‹³‚ #Ó§ÜôMtˆ„ÏeÂN¦Å˜š1”¶Fefvu=›ÊŸ2óÛùè“Ó»ðΊbˆBêúÔS¹Z½F£vYФlÜ€êwƒWî¼O£â6—(yˇqôö5¼toP´£“WË9>y1~­:òfÛšP7á©pAná:ÃØ›½«4™Qläk 9Q®s6$òÒÈFœrT\&Ó¼ª wö¾¥/ÂÆn‹ˆá&ÿQÑãB™@è `uÜvç.S7;¥tç75Jª¡T:ÉÃ{#+¬iÙ}Á½<Ôð]"}¾c ›‰Þ|uõët]ãäOެΠ+Zr•¤|cõÏ.{ù h¯üÆÚó°aÌ·ð+ÖdüÇÖä SçsìåOdýÏÏ't±’ƒá°{24Y¼ÿÅàœDD@1&¡"Å&"½~Ïóú3X;ŽÕ4C­®Áþ:ÇúøÝq¬z_Êe¤ìºv’oÙ˜ûH‚û[ûÅcD÷_Žì|›ï:~ào]á:œ9“š‰dD`ydë¶Ouà† e_*Ôé²ÜÙX(¤œÿnŒÖîŸZû½ÀHÚ_¸ ô9 ¹ó9cëô—æÅÔä#Ìú6Kóˆ…:´B’~=lß  ìÿ=„êÎb§:¿ó–¹Qk–¼#v¹Mðõ{ª¤ƒvye +4Ôy½„°>h;XJ¦U0á.4†í*Kù?o zx=芩k‡ ²eà ƒ«Œ™ôåê¯Èå%NÖríQwüаB¤Cïô©uï®NS •·Z}øìÚ™Zø»Šª5K=…ÄzÞÂ9-lC™Æ(÷;«ÔÇÝs§3šÇaùfæàpü'WüúIŠj”U~*2î ß´³óÄ!má´L™íöN‰V_1¦¸Îq²Ü8EË2µ6ÖE|¯–ðÇ9ÖóœV‚¥ì™È( ?®ìµû#µBšoËÂÙ7ºZ¸T:ª‡†OLóbxdhxJ9ƒÃˆâŽ ÉbÜÊAþ@åۢ累 LJ‰ {YÓ¡Û†¥V1Ü·¡:va>‰!º5¡/›G]~=ã=àçÜªŽ™Çïð¶FN‘xK»qE ¤qoÓøÃ=žÃo«9ø‚ÿÀÃö ²Õ ¨’D‘žS›Ýbn¥ùöŠM©Ý£üo‹ôÄܱu.¡1)6âM,ì'¨à6ð€zBl îÔÏɱÕån¡Q"°°öÙH®-E›•Ñê ?Ã…¢å¢-âô¿{§§LôØâzËy?Σ½a)…Ü4Gñ""çz)%ž¯YÛOÿw?3Wz?æýok%¹BþRudÊó,‰UЮS IÈiÛÇhîS% Éà[j(#ƒ¦éu‘\îhW*dCAdwí"LÒb˜]ri’&¿‡—Ògó}‹æ¿Æîiפ¤HÓÀ¢ágF#­háÁ}jh¬£LÜ,À!ÀÒc¥‚aðD—4›€”Ú}·F{T”‡6$ ¡øÇ‹¶cÎc„|›þÚs¥ç-}ˆÒ)‡{Ü(qF`÷S6 DAêî¿õ4p0€Ÿât³5C ²ß˜Î¾«cÕLêÕ "~|°R&˜¼!ØÊBfî› Ýÿ/ wvmå³M RõyÒÿ¢oNEëe¯ø; @1opG„Ê >‘aè½™Yw$ ’((8æñ<@D=¾æä’ "øx8˜õ¡øXœ¨>¨F2u›ÆbDïCeäüK`*»ÏÐøT„7)á N)@==Ã>ÐEÏÓü—ÜSܦ/ ï“ÙÀp x®0ÍŸÁ“ö†vBû€“u{}pwRËÓäBþ”,Vl–øsh“vž#ãèÂ$²0M>Ó©£ýR’ùitQ¢@¾‡»ÞP8nÇur_9ý#£/²hKRc@¢@4XR½ CžØÑBáä­¢Õ¡KB‹U A’]îùíž¿ØúˆœOê‚€è«ÎUü»ßí9·°é%ƒ€3fpçµQ ž¿Cõäè5Y-+Z»üF_ÝP±†üßÙŸé¿{Ѱ/pá5½Zå9eÎéqÉ(iÔ„}`’?¾’Bh9(л´¹x Bá%†ÊÒ)z$)tr¥×ê fòN~ÔÂcììØÎ’{{6.âÔÄHkÇÄ+ãߨr ¿û°:™·VEÐ÷áQ½ÜO %wØ¡âÆR¥ƒùðÂ#. Ù»"Ÿ•Ô0A©,¢*â RëÎÉËlOTÈ*u˜›0_mù‘•o§óSgfùiçbÒãCb¯ä" ¥sË3ƒ¯zá¹eÒóLû§lŃ—ÝSï½즻: ÿm)1Æà¯`|ãP÷+ß)#÷‚vU+?ÁŽ_Ti>„zد#iEÉ‘£ð$//t’M³,¸èò«‹¸ê” sEz”h‘·ýÓ+ç‘câRf•c”ø žnömå%6®½…Èô;KZ\ww(7ñeð1~xªüËÝ'… Jß\§7ÂÕá_ÜïàЮŒ(¯M_sTßœ™isÔVÖTÈêrô‰£ÔÞvy2ïûÜyX/äžVý¹“âñÉ“’Z .S^Þn¶msEÅáuÔl?}Š~Ëj–é W yº®R³ ‘ð­Ý•DpÊÙt0sþͪ /½ØrÙ”!˸™; ¹’’þv¥"§ð4ϨtHsƒ¦•S”@†DœÓê >¥­Ÿ»×®HKÒX¬}&¦±›†“VR]r%tI«,:&pÙfÝ£¢ž =q½5ߌ/-‚ÝKO¸ÙÊŸU»Œí?L» ²®9uc.*’¥aÞþ±<êÍî‰ !eYoûû :+Î/¦y‰ðò„s«÷šhŽú¯\–•tðŽOÕ’1Ü–åšßFôÓÝÝRBFg¹Yø­gT£ëÅ'‹õÍcëcºæÄRâ(C ÄÒçT/?G|Ã=-qÀ]H§P$âÛŒŒQ®˯ÑãóèrˆJõ!þ‘Š¥`éfm’¯¨Ó(¥‰?"ðŸpÿûýYÇ#ú•ŠéÞX¨ÞFíLÑÞ­! +èÔ©§Å â<×:z>¼ÕšÔ=-gOX¨, ¢aõÛA› ¸7:²«0_›ï‰‡Ïz^¡fç;Ìëíp«¨$þÛ\Æ%h1ƒ€£Lb½$n.餸TÐ#UÇYÚ|3pH¡ aNȾÉïÁÜÔbž·lVD4D>ÅýšC©«‹ðF€|ÒR.‚²òpRõk+ý ûNèá}l¾}Õ%$íÇÜÌÜø°ðñµÈ©s­sÝgÄH?,¡ðH¶û#Í ¯A`Rœæë›€ÿÐïlÉJ—• „.£çz|­#ói§i’?ʤ[÷µÂUEÝ}>Þßµú¡þy1[r0¿Êeׇ‚»‡£AtòGÔ½B3þNØðj¢¦ÐÏñ‹~b‰‹ã¼4®6Q›n´jËDkZ—#¬¹ºGkòš\ß­ã: -‰^>îŽqðo¹9—“¨~BÝ>r÷n~ÄGÙkq냎ÿ-±$òH©o£cÊþá¾3öšîZ²g¬I+|Î[·±M~ñ\:ÇF"sÔKùL¥ÎéM[ð áïLjAÄZR¼üòö­èžºÛ„rÂÇÊ‘x¦~éÇIBùŠ“y𺃸ècv…!ôÈ¢ßz þ×Lþ…¬WàŸ¯K±Û4žî ïÐü7¯ºðǧïÅݾÉ,‹® ­è¡Ér]¨±¹¤DôÛ0ŸÇÖ7È,ûñ>VW‰uœbNÁÔ"Å'8??òªH-Fsø~=:‚áÒgæ7\žzq<æ’Ý£æW¼ù³^ÙÄÄCGF: 3–hŒXDzÇsøEŒ–¥.A—±¹‹?’EÝ0å°Íb=>³ÜäÃÞ×ÝOP—¦ÕÔ›Ûm–Œõ Ú-¤¿Æ|h‘Sót}6·[ ›tŸ'ùÚ¶ü£;¬"Jõq5»vT ³ntNJ Ex*V:™Oò/½,ðÈWJ5œ©[5£ãÏͯKýaoÖCˆçƒÍF¡Ú9;W˜\ÒÐÊ-ÁtùZø´…ï­ê•biivJóv¯1ÅE'ió3Z;A:ï·]UÂÒ!%|êü±Ig@©¥ÃÏé’ð†p튽UFô€µ¸ £‘ì!.y0¬wºô>=r7㪟ù`êÕ—<žËœbøm]{ÙÙ3t'ß1ÙâA¸`¶ÿ³ŸÍ{³g:“z‡i‰c°’°3£šÃVY`¢ÒIN™âÎÊQcxú—st¨øNyŒYeÎ%›â¬Û&¶%ïêæŽóÈj$`¾{\~[öô–ÐÛ?.÷ŠÀ¹fçYe-¦;¯oÉ¿ø)®;=ÕžÉvGŠå¢èûéYdÓº/\æÒ ¯3 >¿©6¼æ‰µåãú_§—½ÿGý/ÛR¿ßî_x°ê¾–U)zZÝÎÝDOz7è5 4YTgêû~‹Ïövõjç£N£’Bô廓°l}9™:–{8)wæ–Í=öÙ@e+x×Nߟ_U¥Ý¯¡fÿßðʦ\kwfÔS­"¸°¾¹‚ÿ¢¯ãì!t{Ü_§¹ŠÓÛáÅÅ¿Gn€ífû–ó=Þí¿2•7)æçÝkØͪº~ò3ë]ojÍW_…Ÿ¤Üé§;ÇrŸÏ'%ÝeTò$A²i.óŸôO…¶Ä¦¢žšúßÄX nrÖv~fya;¶ýB¶ÞÛËàfqñ啾̋,1½œ?ŸMr׊§šÖHñ`»>¼=¿Âcsjé°¹pïg´pÜŒ}öž†³OEöòbrüvç)·þVÓï ‘t:·›ºÕÂÌéÿ ½F&Òfèö¬Pºº÷" ñçíøµ*7$nz$á@Ð:ë“4Щ~ºo´ ㌅(õ볕0GNаÁûM4…ù¸õÍ4D-´@4ßÙeL¯*; Ùša%p“À$¦?77cÛÐ áõùÒTè,לÓEK•àëÎIh·2M‡RìÁÎn$ÒHг(ÊooN„ì‰j’Žž\?U…/´;åBÝöϹU¬÷ìûÉ¡ÿ ÜAAîãk²n:t|T7pý"áãú2%zÕ9@ˆã†jí =ºŠ/rP+­,ñ·iœt.&3“¸ÐUåa>ÚV¤9ðYS»*°Ä˼×g¿YCAõ/„&—Nçs¼_ñó€ø1Cž5„C'´Úe=q;˜EDöoË2åÜת€„nÊQ¦ èo¯ k"† y»Ò¡-K¼¢@=9™®"Ïa-y;?3ñe†øï‘«¥¼ahº¾ÝdXn—R•4…ÛdѱEÜ „D„fF«¥û NDW$@mw()¶y¤ óoÉ{óÅWŠæÓÑ„1wc†Ãó"óák§Mªþ_š EócÇYÆøahèºñËJò`)Ž:å¹ 2)Å@PeÓŽÑ‚ƒFêÀ#$¢ ‚aJa‚`¥Ô¶—<'ÈN›jÒu¿jÉm1ªY4›µ+®ôùÚy“í‡Qª^= ùÉŽÆIûŠx÷ ³KÊwQ©Ç+3ïzÉØ‘SixGEO¥ÃcuvÏÿ+ñÿõï—}æ¶Ø¡#4L1 œcøŸWòöOÃú /E?Õ-þ¤Ò­s^¶#T§ŠSÇ …’•è##2 ø1+‚+Ç™Ú_$]ì¸CÝ«!ÑåÛÀ@V{Cûü|ò€oÏÂeO,©^ˆ›g¬“D >EjÁˆm Õ™ñC“¸ð‡~9# ú_@›¿{¸‰úv~9,ý½¾Í}º‘­V.=ÿ–;;1”ÑÆkªÚöÊb8D¬‰hÚÊ£§ç‡)±GÆ4êN¾ê0˜˜»hJxò¡‚°Kõ}zùÒ­¼ÚWðN<".×xð1€Où¤#˜ô›4ƒ¸ê–ô½¥¼ZToI1åmw‹ ÓEößj›SuIÓä¬*g·ˆ^îÆÖ–|Ñí£‡%‘‚ÖõüÂ0йË"„6Ÿê(7ë£ý°Ø É“õ‡ÓÖœk_ÔžƒÓcB&«Íì÷+²Çð}óÊjÙ´èÂóõ¯õ˜ðç¤CÈ©vŠ.ßÙ?Oª>H÷m@¢Òâ²ÞÛ­» êͱ<›Bö€;ojÍGYãÎ\ ž_"Ê“¥ ³ÄÞBE€[O×PŽþ˜ {Sj[‰â°ó!›•k6Z¿—u£~¨¤‰éŒ¼:zøÝ8t¢.ž(HW÷Ò–²Qª> Fƒ»³UûÌy`Ó½›?*ìVï1µå3´ac¹ÍZÃMÞBb±æ¯4™Š0$P9ç·ƒe‘© ›)=Û<û:ðtE­Š­±Ã—ø‘Q¬ZìÌK Îy¬3·Ú°UóY$„ž¶¶“þǤ¡oO1ÚQâû<‡ÏÝü®Žª¿HÑhù zN¿k»Ù™ÿ'»)&&÷Òï?§…«Qj+Êýz¯£åç×¢Þ!ÙÿP³oZc&À(-âøþW3ÿ¼ÚúG{ÞPC‰ÆFzä¦cêr‹yñ€v<¹¼=ÅZºz‡à½1è'â0(,AJ‰ä°w÷ªæˆ†a«®µÞ(Eø’‘e1•ë¤é8 PÙ ¨|¢È©ˆioΜ져”¦&ü x÷Z¬Æ£• e ¼Ó,Ú?{4@AÍJi®CîͪÈÐ|hÅ{¨ÿÎ(¥èä È{(}ï÷Ú9I æCø|Ð¥"Ѝ¨¢ »ûn]FI´1¯Ó6âðÜÁn›/èA“@ªÍ³"’æF¦Èv¥HãÎF³ÿ7ÞˆiïÇsŸç§4++ƒ·ÿ˜ô÷[þ6ÜEš—7í½ß­þÇ­#äCÃà ’IúI“ p½¹é|Ùkõ=Ñ@Þ~IrÝž–c¶””æ­Ù¤‹þ§éÒÓ‘Me¶þ%þ¶‹£'ï0ž–¬ì]ÖW°Œyß×I§×íœÛD£yx¦ïúÏ€fY&;Çx¦|[^É{B1Å oATÀ@EÛå°ÂÛ2ª! ž»7>U§BhõPÇëRɲ¸NAH·0Y'ÏzæRý¤ö3×6#nôã™OCnXd‹ „öŠìûœ= m”|J qZòsسA¥:ÎÙVå9p;¿0¶™èF~‚4Ó³Z0¡×Ž£&fu™Ç*„L¥Aã5>ãLñ HJ#e ‰1‘àÇž<Ñ–pF8hïÂFrA¶CöÃ¥™¼­u·¼[Ob-ó÷˜Ù•œ³Âd‚Žå甂óùpc„ñ…Ùñ$Mv$=Ì!¦9.R§4›Æ<–6påÑš8üÜÉàÝhh±ãˆîoV$ÏKЮFÂþlCWXñ´L[®B÷——dóëöWÒêܼTäeZ\WÃ`©Ä$NеTY¾¾qBß”5ÂLûÔjë¶Í `Jˆo²0HÆèèF…ió¼ÊëíÛ†èE´³B%=Pªw5ËuFeä 8‚0’ t9Î)©U wKËýåçu¨Ô¨D ó®¼sòvȱ£ !ïÔí)­f%ƒ˜Ø’ܤ_¨O“÷ï9„p6ý¡Àñº%™MÜN¯J†ŽC†$Aí. ïKê@x÷®»ò˜€† bôŠ÷,î|²ÌQ»µžƒÓç{qMO¿/}mOoÒòõŸ„£Ôæ2þ¶¶ÒÅô ›jv©ÅÔu …,æÃYòÝÁÝUgR©ìc“yßèüÓ/ŽÜîQÃui¦á/è}›¢ÌÁ\v>(~ïøâè‡ÃÄ­7åÔÏ5šõvÙ“Ù دãÛé&À„¶œBÁkVßÁþÒ~.þôŸê$¨±|z“pûÿnOœàܤ%»3¡Ðÿ…#î+N¾X@Ÿâ_s! F_‡’°ÓÖ±%õ¹Ÿ`›2)ç`ólŒ½Wû3tž{˜[à‹ìe¥î¡ƒp! cÅÎ=ïZ•"–ŽèiÃêPÍ`üs;Þ혎"kxž?FyV‹Ñ‡aÖ¨ìú”€[_ñu˜j}ÁΫ© ø~“&s\öO(‡ýÄôµ,,¤û!7-{ê—×Òôç\€€ñÔÉ¡¼±Ì™»\Æa¡ÀR¼?Ï€@¤¨p\Cng¯lsk¯+èÍI[U«6G ³ýR²šÛL<˜ì8E0Wº Ë¿¼¸ÑÌé—Ò’°å~ùªp¶^oÔv§QE¡ŠÌ,Ù+Tà1a-Nf ¿3#Ñð<ÊõÚââ˜ÉáÊn–\ðú^I@ÐâHPòd¯¹xm´vòþ|¹RŒ¸OËZ‚óÜŽ¿ÝÂ$‚]Ý 4ÐŒœºîJ¼– cµ¡Üx¿Ù?Qèæ K¤NÙŸ1C½mp|5yê?1¶ëˆ`‹­Lª7y%æÅª…·Ÿ˜GÌЫsÆYMM›þ"±´…uqä`ÆµÈæé¾0þû÷GùÙ”½”±|»À6î‘Y‚gÍH†ÉŒÐ”çlÎí ¢MÜï ðo#´}CòëÀhÊþ†â÷Œ˜ÞànAã艑¤:±Üý¡÷Ö™ËËýL _9Vøö±oùQ¾óÖèÌñþ‹ü®V •*fUhñt®ìäk¯!ÜêS$Î+ “qVä´IýTí?<톺Y/‡Q30`R§D¡ ªPNô½]´áÚ~9€ FyD0%Ê÷ʃ*„‡om5³7þÿ‚Ô¥(‰$Ä-F¨M‰)Áò'ù's‡¡Ø®7Ó’íÜò>¬MJŽÑS&ˉ{w»SqÜ#Œ -¯;HaF ‘C>€ÇòLˆ8ýÐÀ™Ý‘Auq…4ß}a|–%ÍÚwKp¿YƒØ@)øÅݸS™¹àgÓ6à˜±³œcºX™­ƒwjÅŸÄÞZ¨NYè©ûÚ¹Ê[Ÿa`—¨‘•e\›ça\Ö2#Ø¿R;ñaÍ/S¯:oÏB£`#¥¯#’Ðß!C Ekeç#±õ¹U-o(ÐÍÇ !#„ñ¨”óE ¸¿ø´?m$¾·¤ÿðî¥&ÌÇ¥¿Â@ç:úž{bŸ“þ°A@zQ.Ý:t%Š $Û']¡¤æ6hL´=TPZ£ÿ6­šGJÊ6ž~›äx; [îÁ¬úäÝ 7>zƒØ!ŠÑçþ›Ø—æw…¤¡Ûë!À±PÒ“pEÕ'Ù/gl™C_|ÖC_={O\Gºãc¹ê°î·Ù–(üJ¦÷þ¸Â˜H[Vª"©TÇè>7vòÏŠ{Ây„Õ?\ê=]§çy}¹wÅý ËínÐúÏö¦$ÒôÝàNÁê7\¢Ì<Ðå·=#w¢N0Ö3ÃHT/£èfZÐ,1Soùx€oI+Bz=Œz@]lÿ~¤êG ÁÔ7vÞ¹ò·ËÁ,^i0ÍP÷÷$¾‰xžšãÜôG~”ŽUoJ߯y" ˆÂ×’äó>êza¥húOÈýNTçœøÎs}c°G;ÓKW (¥zœ~KYÈú>ï†Z†Å8%)¿ ;Ñ,PâW=°?=±ÁÞçÁÑy-P ƒÏ; ‚s¤Î-ÆøŠ¢B" {-qØ‘œå¶6 0-2fØýúå H€pE]lÁáW³æA¿ä5R¿.÷ ÙF=Ñ]+Çud­9¾Û£…BåUX@ð˜Ï^·f½¡Òtâ…EÜcÝéÐÎ)–X¤þçs£æ ˜6̱ߞÐ|ðI‘#4èˆ—Ë „6|gTæøˆ,þß`‹pñÏž8¦óK=[~Éøg6“AÝb¡Újá7Þþâ« w÷Üà><Ëwf´1ª¾—Û?ûؾçæÖwr'a:̘,{{Î_Z 7¼._Pí¥§sŠÍç!aE?IånUà×i/Ö®Q¯ó¹œ÷™‡)ÛÞw¿°MË©t•›¤éî »+Ì]ä_ÓÐÅ¿{—iÄÝt¹›}Ÿ>#˜Ðfõô¸7½ ^y÷C{Ɖ; îlòØÝLÛcfÅsõ¯í÷Wi¡_¯/Õáž3/\wvú äj‰TÚoãï:gßa™x§„¨+œ² šš%þxöæv(•3°žývW……¿!"Üi˜Áé»G©Óþ–öIÒè1Y þ°eL… W´Z9]‹þ®„®[Lú¶çÓE2¶ƒùSpF:á‹^“Ç]x¶ZRAîf(Au.› ¯•ðEpüÉtÔxgcïO¯k½‰É!A"Αµ´/Ó̦…ùoU¥-N’ðó 1mÅ3M…B×1͘°Y@)ªæJ ß’Ö´Ãxsò™oL¥}*0*I#7YEšÓ‘‘€¥×£Ï85¾+é­Ÿ°èqë z9”²°cûÜÞ½YýñþŸùuŸ[íá<óÞßõ=<ËUVD‘`B :‚¹"{xcêÝ•ß[ûð}Peý«¨2p;/­ÿ~É9;Ù^a€4ø"9ºÑõÐFzžËÁg³¾t^£œû¦ÓMOoÕEHÿž(®¨`4~ñû4õCòƃú+èÕWn˜â‡@¾Äº¤“ 1—Í2w´zwµ lh‚ÃŽÃã³=±8ËͦǸ`.L_?e毊Â0ÿÀA¤sQì0­9üq”Dzʰÿ øTJÆž‘(o9ÇGu…?*ó|ŽçÄúéÌM-OæPA¢Éh2Üil:ˆO½#½zwšÆ^$¯)ŽvÐÈ,fö¹¼ÿÉ/1I­”¬¡Çô£`àíw;½K§cØ™#àêM¯[LÛÖá!6êGïìP?|`2ÿÔÓlšûåПŽ!UEª ¡Búá&™Éï}e$)Z½idÛãM¸7pwújý¢CfÆòh&°ÉÊÏ«ôFëºf²}ËT”Ï÷‡‚²žðàÔøÅéõ©+SUš§·}>Š@Õr¢åzfT¤‰÷æ’=Ø{JJÛ ª}Õôöx¼¦XVBÝøªÝ«†)Å!s e»ŽÃ©}xEOàº{­²¯”vFZðÓ5¯ÛLf±¥«D\ÿs_½Wâõæöë<Z$¬—Ÿê~±».–ŒÉHYÖžZÙ¤ãM¾Á &—%”ÿJoèÉDÅ\3REÕWØíV‹óìµbç-»Z)ûŽ–?6?¤2‰»FÒ0<þRö=±ÜóÜXËíud7›ÔXÁ 8dÀ P^)œ%@xâ*¡:\²ÌÖ[o˜BR,p¿Ëd}ll7xlúû”ƒƒ´ê)¸œ[Ô~Ý#Ô® ñ{i8Ûlfûe¸_¼tÆ÷úX•# ¾ò¶U1`Ã+Mýt(çC¨?ë»xÓ›¬VÕÜëœüh’óÞ¡Ý»Ë>@ù(¿ã’2ð¿ì°'§{ GÒ¶çro7|eÊÃeÎåV Ý…rƒZ/‡;c¬JNäÝòFֱء¤æ5¡€÷~.õÁøÿÁÌëÉT‘Å©Þë¯þ®âPJ2þàæÄ՘Ɩ¥•„ÆF90ù)ßµÁPRá$mXÅ–XTœIïçÄéuKÈ T´m'š[ zÝÔÉùTývÆSÓGË·‡Ë+N¤#ã[Ø×éâ•,ú\3œîª–§ÖUÔ£}—Äíÿ7êÅ÷ú‚;þ÷î’÷™D¥ß¢qjÎ; ‘“¯£Á¶>ž< Ñ-¬ó¶æ40oÁÄ‚h&1ì|3Ö¿³îû|—½–b7- ò¶uìªOkÉÎìÐò•«,Pñ…÷\hXNeÖYáyo÷½3†‰ç!ÉR…˜ÊÁ†!&¹z¯ÆÄq0CN×ÞrÐïÞÜÄroçfó˜Ó.» %Þ˜²Áa€H˜_4ºr³{í §k@¢èƒR×je.´¡Ý£~1×ý ø&ÝÚ½:õÚ}å'ÂbM±6«Z;ôaç›ðýÕ·zv…A`–SÆç 4á¤DÐ@øíöPrKG7ÑGéýðîgÔU€—ÕA¢DóX)*§æUóõé*SÜÏ‚aÅu•YõÊlÃÒëýTÕ`L0õ™dàž-´( ž®&ô4aøWG¡+P”˶t1òÞ3¡k÷Ò]M±D‚ å¯* [VË ÞÏJGðp×’ïä÷QçÖgâšÏ9Ô`e¶º­#¿ûtèOˆ&ƒ¦è“[#9¤Cuk.ó/¯÷›[žrRôþâµI –ábçújý $9v(Ç%f¾ÁÚ´P~( ÆÎdrr ‘H–ÈYvü.ªÁºhxñç=ŠmHQ[Ë–¨H¹@@’¹ýÓw"{îT3·Yý:|LJ6i:­¡½°ëåTOnÌÒ­¬»óš)`9g÷(J¸!ÐëÃê±™kéF`­1¾¨¸øüƒc lò–Žv_)Ê—¿Ðe-ªÎ U‹E$³¿ŒÊZ,uzáÈÎ!,‘>q.ú ºQMCÈ£øPEOG0T-±¿Hªóù®å¶7Ieü˜B‡¼ö]Ýä¥ÂI$2bÂ,H ö_±:^WÝ:Æt'º±øÞ§_ÉÙu ÔÖa+m@Ý7P¹ò)€Æ æyÁ¤>|ô¯âhAV“Xr_ë·>E¡þŽB-¡âË'íºˆCžN63¡­»™1_  ÷ç@@!c¯ 〩‰qŠëe#²à<Á©˜7b“µqªxZXÄ.·ÊY®Â¥Ez^ø8+T‡„ØîQÁ@äÁº6ôI´ÊÂ݇ú4 ‘Û/Rf^cÏ=°©õ!ôcˆLÈ ¸{ü8Z¦aÞÝ™`ú õˆsC -ŸR„xÓQ¹RfÌÅ=iœ_”ó. ~E/Ãl†]ŸªÐ6'Ä SíYÄ©´8w©È-ÓE -OéÖie$¿Vmïu´¾¿i²`P7%1UWï#†š^G‡Î% ?¿B–£,±‰?ÃKJÓž¯ÊÖÃÂ|½´â“9oœXš¤A×&j½ñ]" Ì‹G]ÔIáô]ÊMò¥±êíËû„„­p‡æH²¢ÖÜPì:à¹J2ßWñ^¸ å‚†¢Ü²¤qßmºŒNŸGß܃tÂýðÅ-Çò—,!^%¬w<Òçã°ðÇýJK¹_we|°±Pн^J·»ƒŸ}ÉñôùI¦s/åºû7Äà_ÆËy¨óz€_h¥ÒÚ#pÞ™¯ƒ½B Ñγ_G´àÈz³±ß:ø—¡Bõo~t7Ê(‰CI€HúwÀwZø¾j»ßªí½³ú»Æúáøø‡[¿êò†Å=¸@*‡ù‚À`Û€ºqó†ólÊëj]óuñ˹¾ßâ¾^”®èƒ¶˜¡Œ&³kðp³‹œ ý9 ñvY¨dGelNÝeGXsæÉ¥%u(;èS€ŽTÇ%œI7м/dé£ “ò¨ ä„E<ç³Ì#92å/²nH†@=NÚXO×óºz=[,ésåÍ'ÛBî [ðÖ¼¡Ü ; ’Üg~jf(¿•Eä¬CL¨Wpþ¦öÔ”…¥úƒGõ8xöÍb‡g¬uÊzQkT1¡]Ü”@†„Ú®Á»C¡>e2åö+‡ }ª„ŸU"­é,ŽaSË! E1vVßì-YŽ"`ÞŠ¤GMMàðèˆ|I\ªŒ„/=òÐÁ±ÌM5&p@ÜDH)&=ÿ»ä8˧;Øï~·Ö¼üÏ£÷Þ«˜ðÞ ø6ËŸé@O÷Ë[+7œmú©Šþ?ƒ¿×±Í®}Dï¶Fž{†ê$rD2œŠ|½…áíšaYÊ-_B‡uÆøÁ§rÁ“ìf¨ºöoZàv¸6þm×sö´’¥ œþ!uà+@§žÿ÷Oü!‡ØÔiBÛ|hÈüðç­þÏa·ßæDzî<$†D×é”Itüÿ¥é§ý‡”rþ ¾—‘š’]¦í0¿Îõˆz«Bé;ÖyŸu•MÎCèÝÄ=kä¾Â ÜHæ !U¥Õ“Ï\ZñãÒˆèp}Çß ¿§éþ0¹~vÏóŽ^ν{°6Ë6ŒêŸ¡åˆz-³«”Ê:µÒç£J'¾ÿŠ ¦1Èû÷gõõ9ÿmGÆu»åo¿}ž!ƒQU‚ª¸¤ ¦0Q4mE Xîi>!DýÿÞövÀô}_ª·üñw9ŸÛ¿ñ'á{ŠwÏÓáÖÓ‹)7Œá€;#¥æŒð×B:FÚϵ³é²×%~„‹BDZ–£/W”™yî{Ž™¤8…šeo¾[0Þ¬B¾]+¤©ÀÄŒ™0”¯¾#_¯ñ0>¬V³kt&nsŽ>#uNm‹Í´I-NŸëºŒ]©ä…§¹ ­3b EرÍìMgÝm B+Öû>þÉšAb¨“èÑüÙ&A"B[‘”NêY„އa35¤ß=Û® ]MúNaVÖP]àášV”˜«ƒUѵéË|†•EnZS"+œ\÷,ž#IêèõîŸüôŠ?žÚ¾)hic40„Ã1Kå:݉FQ "¥˜+EX¨Ï¹þý]Ìo½áÿoö~¦éïÓ{{'Ãè¸üŸoÌ}Ë»­æÿ(œF.h9E5å%\ù¢YsRÐÈÔºñ™Ý¶œÄ­<¿Öë—íß¶#uzÛ….RGÏš)©IÇú Ø?ˆSîZ9¸‘ûežÝ`5œ—ìW}½çðÇ‹Ÿ3Áªˆô¶L2(³·š*ëÆ·®é!s´í£­|¯ŒÒÞТbGÏ è4²çïY¡; ÷"ø;[ÆPk^^Ãô§K{ ´\Pz|+=¦ó¾²‡ §×îëI®Ö66•Š"ÈX°HB€N:}ªj!ç«f#Î[FÏ+{6YBç@w=e¾ÖÀËis;Äi)BÉuÞô…×Õ¨ï(ä›ÉxJ1!Š&õ>ö~ÒnPOâüŽ59¹ñD¿õ{e^Â"2¾WÀ±h¤Šž™xŸ‚ iæ-íÒB¾%‚¿lnæòG9±|½v´éÀ@„“‰ZÎ ÖîZ±»ŽÜß{ªÇ­»i)î=E( N1„)†ýl‘/Ù˜ÂõU?ý*aÚD†ÿéLĺe¤%æ Oò Uy7”?k¶®©Ÿþ3·ð8üÜ›Ǽ¡WùçóKwÞÓžµ¿ëéÕòs¿×Ç“`øöø~ý®–y|xK";>G¤àô·¢¡Í,1œ4L”¨JÒäÉÒt_Éåïè»DäȤþ;À¤ë*B¨áí}©@ém´qõ ²=¯Ä¢—´“cÉþqÞÕu™™ÕTÐ;r£¨Šü$1œØ|G¨ø¦A^â–@DÀô¡{!õ&_Åß7Oîå¡NÛ$bœ¾Í¹Í Š«‘Þ © Þý¾O»órŒ^¸ãó¯žpCñjô ƒPüŒ3©CJÃÜÑ7õ@ïëƒ;“úhÂD¤B´hÛcFÑLM ÌÄEƒßù/¯´$†öÍ#dæÒ‚ÔXòÏ"¦tÝèSUU€@Yk}ÉÎЛÐùÓŠ‹—‚à„ªÞ2j—m“ž¢>8Ž@øôqZâ{2T˜5ȇΟ/ãá f™ÖuŸDþ&Dƒ¥UO´ßW´=”ñàžR:ϧЧð?þcÿ‘ü¹¿.¶}_¡îîM·Ý³OÉwmì„i9X“9^  îÍ=¹é%Á0l{tíÊÕö b•ÊÀ÷ýíµqk"Œ`¢bd\9Ý®³õhf5n§yúÆok›ú"ÁUi%ÍNNoh!ÃðóŠú~õq«G»æ3ÒÑAYX`€dº\NY‡Õ%ð¯Çkc°¡Y3ÜUhí]ê V²{lš` £?1†v¤5 çúÿøÏ}ò?'ð_Ë÷¾ò÷‹ C&FSQ,…J 6$’!Ä ÊHÄ)$*Iˆ5D*ˆˆÔFÉ¢ 6ŒDd„"À$XH¡<7¶Ñíûþî÷œÍã.ëüÉ{ å}Ü+rH-3hVe"ñÕ@„‚".©PJ¿n¡&4“]eKk¾ôãá³\"ôœ‚m¦“¿3¨?³|¿õ±/á„H"AcxÞCp¤ë«ôá@_ãVs÷ý«êx1‹P #à"*ïÌíýú~ üoô‰}зwóÿ¸>„^–÷—óþÀì* ÒB7ÜÔÅr¡ýÖ¿ÈOêë¹htô|Ä‚9ö¿úóöf¢…7ÜoÓ"º$ë÷×ÕÞKDbË×Ö&3™F%»üÔË•¯ËÈ>òyÞþ-ÅSI9ù`L¾q£ðÕÐxºæ=¿M+~»Ô?Õä¾?~ü}û•÷ž?Äž$UH‚ˆ¨Ž&ÈÆhL6`1‰‰”eB DÃE&‰)$Df‚#€ˆ DH  » -À#þ¾ewl7 9»Ÿ]·<çû—xšž ã}+}u9m¯VͤÙ%ßdªÂÀù Êá\ø Rí­õÏ®¹Î’†—¤êIÂÍ­zàWq‰ €ˆG£õQß3:a7mLµð¸+)„ HpóÞ4¨Z,¹’|¿iwöÆ¥Wcf‡O½Âz_  ‡aéáVE&9ôÿM½ìòð6e› &“Húšì•Îi)"dLŠ&2!$2$gvîh`×3ªï‰´žPßž¯©M‚wy×3B$$”¥ÅÝvw§—&,ΫWŸ0¯¥¥«ý50F „R ò5ûíf™ ©«Vµu†ÐGA>ê¢ /v7ì`(cœQdp!ÁÀ‡¤ciCQ5‚!D¾©Kêû«ö,JcÔU5BЭTòï&çu˹:àâäîóGG QD I@‚VÃÑÄŠ£A using namespace Rcpp; template LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { int l = x.size(); if(l < 2) return Rf_ScalarLogical(false); // Prevents seqfault for numeric(0) #101 typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; if(ng == 0) { // Note: Does not return NA if all NA... can be checked with fnobs ... int j = l-1; storage_t vi = x[j]; while(isnanT(vi) && j!=0) vi = x[--j]; if(j != 0) for(int i = j; i--; ) if(!isnanT(x[i]) && x[i] != vi) return Rf_ScalarLogical(true); return Rf_ScalarLogical(false); } else { // with groups if(g.size() != l) stop("length(g) must match length(x)"); Vector valg(ng, Vector::get_na()); if(any_group) { for(int i = 0; i != l; ++i) { if(isnanT(x[i])) continue; if(isnanT(valg[g[i]-1])) { valg[g[i]-1] = x[i]; } else { if(x[i] != valg[g[i]-1]) return Rf_ScalarLogical(true); } } return Rf_ScalarLogical(false); } else { LogicalVector varyg(ng, NA_LOGICAL); int *pvaryg = LOGICAL(varyg), gi; // seems to bring a tiny gain.. for(int i = 0; i != l; ++i) { if(isnanT(x[i])) continue; gi = g[i]-1; // slightly faster if(isnanT(valg[gi])) { valg[gi] = x[i]; pvaryg[gi] = false; } else { if(!pvaryg[gi] && x[i] != valg[gi]) { pvaryg[gi] = true; // ++ngs; // Omitting this is faster for most datasets -> most are ordered ! (i.e. PRIO Grid 1.27 vs. 1.14 seconds) // if(ngs == ng) break; } } } // Rf_setAttrib(varyg, R_NamesSymbol, R_NilValue); return varyg; } } } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } template <> LogicalVector varyingCppImpl(Vector x, int ng, IntegerVector g, bool any_group) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] LogicalVector varyingCpp(const SEXP& x, int ng = 0, const IntegerVector& g = 0, bool any_group = true){ RCPP_RETURN_VECTOR(varyingCppImpl, x, ng, g, any_group); } template SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { int col = x.ncol(); LogicalMatrix out = (ng == 0 || any_group) ? no_init_matrix(1, col) : no_init_matrix(ng, col); for(int j = col; j--; ) out(_, j) = varyingCppImpl(x(_, j), ng, g, any_group); if(drop && any_group) { Rf_setAttrib(out, R_DimSymbol, R_NilValue); // Rf_dimgets(out, R_NilValue); -> Doesn't work ! // Rf_setAttrib(out, R_NamesSymbol, colnames(x)); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); } else { colnames(out) = colnames(x); } return out; } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } template <> SEXP varyingmCppImpl(Matrix x, int ng, IntegerVector g, bool any_group, bool drop) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP varyingmCpp(const SEXP& x, int ng = 0, const IntegerVector& g = 0, bool any_group = true, bool drop = true){ RCPP_RETURN_MATRIX(varyingmCppImpl, x, ng, g, any_group, drop); } // [[Rcpp::export]] SEXP varyinglCpp(const List& x, int ng = 0, const IntegerVector& g = 0, bool any_group = true, bool drop = true) { int l = x.size(); List out(l); for(int j = l; j--; ) { switch(TYPEOF(x[j])) { case REALSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; case INTSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; case STRSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; case LGLSXP: out[j] = varyingCppImpl(x[j], ng, g, any_group); break; default: stop("Not supported SEXP type !"); } } if(drop && any_group) { LogicalVector outl = no_init_vector(l); for(int i = l; i--; ) outl[i] = out[i]; Rf_setAttrib(outl, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return outl; } else { SHALLOW_DUPLICATE_ATTRIB(out, x); if(ng == 0 || any_group) Rf_setAttrib(out, R_RowNamesSymbol, Rf_ScalarInteger(1)); else Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } collapse/src/ExportSymbols.c0000644000176200001440000002744115202504365015653 0ustar liggesusers#include "collapse_c.h" #include "collapse_cpp.h" static const R_CMethodDef CEntries[] = { {"C_multi_yw", (DL_FUNC) &multi_yw, 10}, {NULL, NULL, 0} }; static const R_CallMethodDef CallEntries[] = { {"Cpp_BW", (DL_FUNC) &_collapse_BWCpp, 10}, {"Cpp_BWm", (DL_FUNC) &_collapse_BWmCpp, 10}, {"Cpp_BWl", (DL_FUNC) &_collapse_BWlCpp, 10}, {"C_TRA", (DL_FUNC) &TRAC, 5}, {"C_TRAm", (DL_FUNC) &TRAmC, 5}, {"C_TRAl", (DL_FUNC) &TRAlC, 5}, {"C_fndistinct", (DL_FUNC) &fndistinctC, 4}, {"C_fndistinctl", (DL_FUNC) &fndistinctlC, 5}, {"C_fndistinctm", (DL_FUNC) &fndistinctmC, 5}, {"Cpp_pwnobsm", (DL_FUNC) &_collapse_pwnobsmCpp, 1}, {"C_fnobs", (DL_FUNC) &fnobsC, 3}, {"C_fnobsm", (DL_FUNC) &fnobsmC, 4}, {"C_fnobsl", (DL_FUNC) &fnobslC, 4}, {"Cpp_varying", (DL_FUNC) &_collapse_varyingCpp, 4}, {"Cpp_varyingm", (DL_FUNC) &_collapse_varyingmCpp, 5}, {"Cpp_varyingl", (DL_FUNC) &_collapse_varyinglCpp, 5}, {"Cpp_fbstats", (DL_FUNC) &_collapse_fbstatsCpp, 11}, {"Cpp_fbstatsm", (DL_FUNC) &_collapse_fbstatsmCpp, 10}, {"Cpp_fbstatsl", (DL_FUNC) &_collapse_fbstatslCpp, 10}, {"C_ffirst", (DL_FUNC) &ffirstC, 5}, {"C_ffirstm", (DL_FUNC) &ffirstmC, 6}, {"C_ffirstl", (DL_FUNC) &ffirstlC, 5}, {"Cpp_fdiffgrowth", (DL_FUNC) &_collapse_fdiffgrowthCpp, 12}, {"Cpp_fdiffgrowthm", (DL_FUNC) &_collapse_fdiffgrowthmCpp, 12}, {"Cpp_fdiffgrowthl", (DL_FUNC) &_collapse_fdiffgrowthlCpp, 12}, {"Cpp_flaglead", (DL_FUNC) &_collapse_flagleadCpp, 7}, {"Cpp_flagleadm", (DL_FUNC) &_collapse_flagleadmCpp, 7}, {"Cpp_flagleadl", (DL_FUNC) &_collapse_flagleadlCpp, 7}, {"C_flast", (DL_FUNC) &flastC, 4}, {"C_flastm", (DL_FUNC) &flastmC, 5}, {"C_flastl", (DL_FUNC) &flastlC, 4}, {"C_fmin", (DL_FUNC) &fminC, 4}, {"C_fminm", (DL_FUNC) &fminmC, 5}, {"C_fminl", (DL_FUNC) &fminlC, 5}, {"C_fmax", (DL_FUNC) &fmaxC, 4}, {"C_fmaxm", (DL_FUNC) &fmaxmC, 5}, {"C_fmaxl", (DL_FUNC) &fmaxlC, 5}, {"C_fmean", (DL_FUNC) &fmeanC, 7}, {"C_fmeanm", (DL_FUNC) &fmeanmC, 8}, {"C_fmeanl", (DL_FUNC) &fmeanlC, 8}, {"C_fmode", (DL_FUNC) &fmodeC, 6}, {"C_fmodem", (DL_FUNC) &fmodemC, 7}, {"C_fmodel", (DL_FUNC) &fmodelC, 6}, {"C_fnth", (DL_FUNC) &fnthC, 9}, {"C_fnthm", (DL_FUNC) &fnthmC, 8}, {"C_fnthl", (DL_FUNC) &fnthlC, 8}, {"C_fquantile", (DL_FUNC) &fquantileC, 8}, {"C_fprod", (DL_FUNC) &fprodC, 5}, {"C_fprodm", (DL_FUNC) &fprodmC, 6}, {"C_fprodl", (DL_FUNC) &fprodlC, 6}, {"Cpp_fscale", (DL_FUNC) &_collapse_fscaleCpp, 7}, {"Cpp_fscalem", (DL_FUNC) &_collapse_fscalemCpp, 7}, {"Cpp_fscalel", (DL_FUNC) &_collapse_fscalelCpp, 7}, {"C_fsum", (DL_FUNC) &fsumC, 7}, {"C_fsumm", (DL_FUNC) &fsummC, 8}, {"C_fsuml", (DL_FUNC) &fsumlC, 8}, {"Cpp_fvarsd", (DL_FUNC) &_collapse_fvarsdCpp, 8}, {"Cpp_fvarsdm", (DL_FUNC) &_collapse_fvarsdmCpp, 9}, {"Cpp_fvarsdl", (DL_FUNC) &_collapse_fvarsdlCpp, 9}, {"Cpp_mrtl", (DL_FUNC) &_collapse_mrtl, 3}, {"Cpp_mctl", (DL_FUNC) &_collapse_mctl, 3}, {"Cpp_psmat", (DL_FUNC) &_collapse_psmatCpp, 5}, {"Cpp_qF", (DL_FUNC) &_collapse_qFCpp, 5}, {"Cpp_sortunique", (DL_FUNC) &_collapse_sortuniqueCpp, 1}, {"Cpp_fdroplevels", (DL_FUNC) &_collapse_fdroplevelsCpp, 2}, {"C_setAttributes", (DL_FUNC) &setAttributes, 2}, {"C_setattributes", (DL_FUNC) &setattributes, 2}, // {"C_setAttr", (DL_FUNC) &CsetAttr, 3}, // {"C_setattr", (DL_FUNC) &setattr, 3}, {"C_duplAttributes", (DL_FUNC) &duplAttributes, 2}, // {"C_duplattributes", (DL_FUNC) &duplattributes, 2}, // {"C_cond_duplAttributes", (DL_FUNC) &cond_duplAttributes, 2}, {"C_copyMostAttributes", (DL_FUNC) ©MostAttributes, 2}, // {"C_cond_duplattributes", (DL_FUNC) &cond_duplattributes, 2}, {"C_setAttrib", (DL_FUNC) &CsetAttrib, 2}, {"C_copyAttrib", (DL_FUNC) &CcopyAttrib, 2}, {"C_copyMostAttrib", (DL_FUNC) &CcopyMostAttrib, 2}, {"C_groups2GRP", (DL_FUNC) &groups2GRP, 3}, {"C_gsplit", (DL_FUNC) &gsplit, 3}, {"C_greorder", (DL_FUNC) &greorder, 2}, {"C_lassign", (DL_FUNC) &lassign, 4}, {"C_gwhich_first", (DL_FUNC) &gwhich_first, 3}, {"C_gslice_multi", (DL_FUNC) &gslice_multi, 4}, {"C_funlist", (DL_FUNC) &funlist, 1}, {"Cpp_seqid", (DL_FUNC) &_collapse_seqid, 7}, {"Cpp_groupid", (DL_FUNC) &_collapse_groupid, 5}, {"C_collapse_init", (DL_FUNC) &collapse_init, 1}, {"C_dt_na", (DL_FUNC) &dt_na, 4}, {"C_allNA", (DL_FUNC) &allNAv, 2}, {"C_na_rm", (DL_FUNC) &Cna_rm, 1}, {"C_whichv", (DL_FUNC) &whichv, 3}, {"C_anyallv", (DL_FUNC) &anyallv, 3}, {"C_setcopyv", (DL_FUNC) &setcopyv, 6}, {"C_setop", (DL_FUNC) &setop, 4}, {"C_vtypes", (DL_FUNC) &vtypes, 2}, {"C_vlengths", (DL_FUNC) &vlengths, 2}, {"C_multiassign", (DL_FUNC) &multiassign, 3}, {"C_vlabels", (DL_FUNC) &vlabels, 3}, {"C_setvlabels", (DL_FUNC) &setvlabels, 4}, {"C_setnames", (DL_FUNC) &setnames, 2}, {"C_group", (DL_FUNC) &groupVec, 3}, {"C_groupat", (DL_FUNC) &groupAtVec, 3}, {"C_funique", (DL_FUNC) &funiqueC, 1}, {"C_fmatch", (DL_FUNC) &fmatchC, 5}, {"C_multi_match", (DL_FUNC) &multi_match, 2}, {"C_radixsort", (DL_FUNC) &Cradixsort, 6}, {"C_frankds", (DL_FUNC) &frankds, 4}, {"C_pacf1", (DL_FUNC) &pacf1, 2}, {"C_rbindlist", (DL_FUNC) &rbindlist, 4}, {"C_setcolorder", (DL_FUNC) &setcolorder, 2}, {"C_subsetCols", (DL_FUNC) &subsetCols, 3}, {"C_alloc", (DL_FUNC) &falloc, 3}, {"C_frange", (DL_FUNC) &frange, 3}, {"C_fdist", (DL_FUNC) &fdist, 4}, {"C_fnrow", (DL_FUNC) &fnrowC, 1}, {"C_createeptr", (DL_FUNC) &createeptr, 1}, {"C_geteptr", (DL_FUNC) &geteptr, 1}, {"C_fcrosscolon", (DL_FUNC) &fcrosscolon, 4}, {"C_fwtabulate", (DL_FUNC) &fwtabulate, 4}, {"C_GRP_default_drop", (DL_FUNC) &GRP_default_drop_C, 4}, {"C_vecgcd", (DL_FUNC) &vecgcd, 1}, {"C_issorted", (DL_FUNC) &Cissorted, 2}, {"C_all_funs", (DL_FUNC) &all_funs, 1}, {"C_unlock_collapse_namespace", (DL_FUNC) &unlock_collapse_namespace, 1}, {"C_pivot_long", (DL_FUNC) &pivot_long, 3}, {"C_pivot_wide", (DL_FUNC) &pivot_wide, 7}, {"C_sort_merge_join", (DL_FUNC) &sort_merge_join, 4}, {"C_replace_outliers", (DL_FUNC) &replace_outliers, 5}, {"C_na_locf", (DL_FUNC) &na_locf, 2}, {"C_na_focb", (DL_FUNC) &na_focb, 2}, // {"C_aschar", (DL_FUNC) &CasChar, 1}, {"C_subsetDT", (DL_FUNC) &subsetDT, 4}, {"C_subsetVector", (DL_FUNC) &subsetVector, 3}, {"C_alloccol", (DL_FUNC) &Calloccol, 1}, {"C_fcumsum", (DL_FUNC) &fcumsumC, 6}, {"C_fcumsumm", (DL_FUNC) &fcumsummC, 6}, {"C_fcumsuml", (DL_FUNC) &fcumsumlC, 6}, {NULL, NULL, 0} }; void R_init_collapse(DllInfo *dll) { R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); /* C API Functions start with cp_, and can be imported from C using e.g. cp_dist = R_GetCCallable("collapse", "cp_dist"), after declaring cp_dist with the arguments of the function (fdist). See section 5.4.3 of Writing R Extensions: https://cran.r-project.org/doc/manuals/R-exts.html#Registering-native-routines The C API is not documented, but I have indicated corresponding R functions for C functions callable from R. For confident use, look up functions in the C code under src/, and where/how it is used under R/. Feel free to request export of additional C/C++ functions. I do not a priori guarantee C API stability, so I recommend you contact me if you want to use a C function in a package. I am always happy to answer questions regarding the arguments and use of certain C functions. */ // Functions that fully operate on R vectors (SEXP) // Corresponding R function(s) R_RegisterCCallable("collapse", "cp_TRA", (DL_FUNC) &TRAC); // TRA.default() R_RegisterCCallable("collapse", "cp_setop", (DL_FUNC) &setop); // setop() R_RegisterCCallable("collapse", "cp_range", (DL_FUNC) &frange); // frange() R_RegisterCCallable("collapse", "cp_dist", (DL_FUNC) &fdist); // fdist() R_RegisterCCallable("collapse", "cp_quantile", (DL_FUNC) &fquantileC); // .quantile() R_RegisterCCallable("collapse", "cp_match", (DL_FUNC) &fmatchC); // fmatch() R_RegisterCCallable("collapse", "cp_group", (DL_FUNC) &groupVec); // group(): main hash-based grouping function: for atomic vectors and data frames R_RegisterCCallable("collapse", "cp_group_at", (DL_FUNC) &groupAtVec); // qG(.., sort = FALSE): same but only works with atomic vectors and has option to keep missing values R_RegisterCCallable("collapse", "cp_unique", (DL_FUNC) &funiqueC); // funique.default() R_RegisterCCallable("collapse", "cp_radixorder", (DL_FUNC) &Cradixsort); // radixorderv(): radix ordering from pairlists (LISTSXP) of R vectors R_RegisterCCallable("collapse", "cp_rbindlist", (DL_FUNC) &rbindlist); // data.table::rbindlist(), underlying collapse::unlist2d() R_RegisterCCallable("collapse", "cp_alloc", (DL_FUNC) &falloc); // falloc() R_RegisterCCallable("collapse", "cp_na_rm", (DL_FUNC) &Cna_rm); // na_rm() R_RegisterCCallable("collapse", "cp_missing_cases", (DL_FUNC) &dt_na); // missing_cases() R_RegisterCCallable("collapse", "cp_whichv", (DL_FUNC) &whichv); // whichv(), whichNA() R_RegisterCCallable("collapse", "cp_anyallv", (DL_FUNC) &anyallv); // anyv(), allv() R_RegisterCCallable("collapse", "cp_allNA", (DL_FUNC) &allNAv); // allNA() R_RegisterCCallable("collapse", "cp_setcopyv", (DL_FUNC) &setcopyv); // setv(), copyv() R_RegisterCCallable("collapse", "cp_multiassign", (DL_FUNC) &multiassign); // massign() R_RegisterCCallable("collapse", "cp_vecgcd", (DL_FUNC) &vecgcd); // vgcd() R_RegisterCCallable("collapse", "cp_all_funs", (DL_FUNC) &all_funs); // all_funs() R_RegisterCCallable("collapse", "cp_subsetVector", (DL_FUNC) &subsetVector); // fsubset.default() R_RegisterCCallable("collapse", "cp_subsetCols", (DL_FUNC) &subsetCols); // get_vars(), fselect() R_RegisterCCallable("collapse", "cp_subsetDataFrame", (DL_FUNC) &subsetDT); // fsubset.data.frame() // Functions that (partially or fully) operate on C arrays (pointers) // These functions provide the ordering (1 indexed) of a single numeric R vector, or integer or double C arrays R_RegisterCCallable("collapse", "cp_num1radixorder", (DL_FUNC) &num1radixsort); // See bottom of base_radixsort.c R_RegisterCCallable("collapse", "cp_dradixorder", (DL_FUNC) &dradixsort); R_RegisterCCallable("collapse", "cp_iradixorder", (DL_FUNC) &iradixsort); // These functions are all quantile / nth'element related, see fnth_fmedian_fquantile.c R_RegisterCCallable("collapse", "cp_dquickselect_elem", (DL_FUNC) &dquickselect_elem); // These functions permute the input array R_RegisterCCallable("collapse", "cp_iquickselect_elem", (DL_FUNC) &iquickselect_elem); R_RegisterCCallable("collapse", "cp_dquickselect", (DL_FUNC) &dquickselect); R_RegisterCCallable("collapse", "cp_iquickselect", (DL_FUNC) &iquickselect); R_RegisterCCallable("collapse", "cp_nth_int", (DL_FUNC) &nth_int); // These functions don't permute the input array, and can remove NA's R_RegisterCCallable("collapse", "cp_nth_double", (DL_FUNC) &nth_double); R_RegisterCCallable("collapse", "cp_nth_int_ord", (DL_FUNC) &nth_int_ord); R_RegisterCCallable("collapse", "cp_nth_double_ord", (DL_FUNC) &nth_double_ord); R_RegisterCCallable("collapse", "cp_w_nth_int_ord", (DL_FUNC) &w_nth_int_ord); // Weighted quantiles R_RegisterCCallable("collapse", "cp_w_nth_double_ord", (DL_FUNC) &w_nth_double_ord); R_RegisterCCallable("collapse", "cp_w_nth_int_qsort", (DL_FUNC) &w_nth_int_qsort); R_RegisterCCallable("collapse", "cp_w_nth_double_qsort", (DL_FUNC) &w_nth_double_qsort); R_RegisterCCallable("collapse", "cp_nth_impl", (DL_FUNC) &nth_impl); // Estimate a (weighted) quantile on an R vector R_RegisterCCallable("collapse", "cp_nth_ord_impl", (DL_FUNC) &nth_ord_impl); R_RegisterCCallable("collapse", "cp_w_nth_ord_impl", (DL_FUNC) &w_nth_ord_impl); } collapse/src/fmean.c0000644000176200001440000005550015202427630014103 0ustar liggesusers#include "collapse_c.h" // #include // Adapted from fsum.c #define FMEAN_N_ACC 4 double fmean_double_impl(const double *restrict px, const int narm, const int l) { if(narm) { int j = 1, n = 1; double mean = px[0]; while(ISNAN(mean) && j!=l) mean = px[j++]; if(j != l) { double acc[FMEAN_N_ACC] = {0, 0, 0, 0}; int nacc[FMEAN_N_ACC] = {0, 0, 0, 0}; int rem = j + (l - j) % FMEAN_N_ACC; for(int i = j; i < rem; ++i) { int tmp = NISNAN(px[i]); acc[0] += tmp ? px[i] : 0.0; nacc[0] += tmp; } for(int i = rem; i < l; i += FMEAN_N_ACC) { for(int k = 0; k < FMEAN_N_ACC; ++k) { int tmp = NISNAN(px[i + k]); acc[k] += tmp ? px[i + k] : 0.0; nacc[k] += tmp; } } for(int k = 0; k < FMEAN_N_ACC; ++k) { mean += acc[k]; n += nacc[k]; } } return mean / n; } double acc[FMEAN_N_ACC] = {0, 0, 0, 0}; int rem = l % FMEAN_N_ACC; for(int i = 0; i < rem; ++i) acc[0] += px[i]; for(int i = rem; i < l; i += FMEAN_N_ACC) { for(int k = 0; k < FMEAN_N_ACC; ++k) acc[k] += px[i + k]; } return (acc[0] + acc[1] + acc[2] + acc[3]) / l; } double fmean_double_omp_impl(const double *restrict px, const int narm, const int l, const int nthreads) { if(narm) { double acc[FMEAN_N_ACC] = {0, 0, 0, 0}; int nacc[FMEAN_N_ACC] = {0, 0, 0, 0}; int rem = l % FMEAN_N_ACC; for(int i = 0; i < rem; ++i) { int tmp = NISNAN(px[i]); acc[0] += tmp ? px[i] : 0.0; nacc[0] += tmp; } #pragma omp parallel for simd num_threads(nthreads) reduction(+:acc[:FMEAN_N_ACC],nacc[:FMEAN_N_ACC]) for(int i = rem; i < l; i += FMEAN_N_ACC) { for(int k = 0; k < FMEAN_N_ACC; ++k) { int tmp = NISNAN(px[i + k]); acc[k] += tmp ? px[i + k] : 0.0; nacc[k] += tmp; } } double mean = acc[0] + acc[1] + acc[2] + acc[3]; int n = nacc[0] + nacc[1] + nacc[2] + nacc[3]; return n == 0 ? NA_REAL : mean / n; } double acc[FMEAN_N_ACC] = {0, 0, 0, 0}; int rem = l % FMEAN_N_ACC; for(int i = 0; i < rem; ++i) acc[0] += px[i]; #pragma omp parallel for simd num_threads(nthreads) reduction(+:acc[:FMEAN_N_ACC]) for(int i = rem; i < l; i += FMEAN_N_ACC) { for(int k = 0; k < FMEAN_N_ACC; ++k) acc[k] += px[i + k]; } return (acc[0] + acc[1] + acc[2] + acc[3]) / l; } void fmean_double_g_impl(double *restrict pout, const double *restrict px, const int ng, const int *restrict pg, const int *restrict pgs, const int narm, const int l) { memset(pout, 0, sizeof(double) * ng); if(narm) { int *restrict n = (int*)R_Calloc(ng, int); for(int i = 0, gi; i != l; ++i) { if(ISNAN(px[i])) continue; gi = pg[i]-1; pout[gi] += px[i]; ++n[gi]; } for(int i = ng; i--; ) { if(n[i] == 0) pout[i] = NA_REAL; else pout[i] /= n[i]; } R_Free(n); } else { --pout; for(int i = l; i--; ) pout[pg[i]] += px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. ++pout; for(int i = ng; i--; ) pout[i] /= pgs[i]; } } double fmean_weights_impl(const double *restrict px, const double *restrict pw, const int narm, const int l) { double mean, sumw; if(narm) { int j = 0, end = l-1; while((ISNAN(px[j]) || ISNAN(pw[j])) && j!=end) ++j; sumw = pw[j]; mean = px[j] * sumw; if(j != end) { #pragma omp simd reduction(+:mean,sumw) for(int i = j+1; i < l; ++i) { int tmp = NISNAN(px[i]) && NISNAN(pw[i]); mean += tmp ? px[i] * pw[i] : 0.0; sumw += tmp ? pw[i] : 0.0; } } } else { mean = 0, sumw = 0; #pragma omp simd reduction(+:mean,sumw) for(int i = 0; i < l; ++i) { // if(ISNAN(px[i]) || ISNAN(pw[i])) { // mean = px[i] + pw[i]; // break; // } mean += px[i] * pw[i]; sumw += pw[i]; } } return mean / sumw; } double fmean_weights_omp_impl(const double *restrict px, const double *restrict pw, const int narm, const int l, const int nthreads) { double mean = 0, sumw = 0; if(narm) { #pragma omp parallel for simd num_threads(nthreads) reduction(+:mean,sumw) for(int i = 0; i < l; ++i) { int tmp = NISNAN(px[i]) + NISNAN(pw[i]) == 2; // && doesn't vectorize for some reason mean += tmp ? px[i] * pw[i] : 0.0; sumw += tmp ? pw[i] : 0.0; } if(mean == 0 && sumw == 0) sumw = NA_REAL; } else { #pragma omp parallel for simd num_threads(nthreads) reduction(+:mean,sumw) for(int i = 0; i < l; ++i) { mean += px[i] * pw[i]; sumw += pw[i]; } } return mean / sumw; } void fmean_weights_g_impl(double *restrict pout, const double *restrict px, const int ng, const int *restrict pg, const double *restrict pw, const int narm, const int l) { double *restrict sumw = (double*)R_Calloc(ng, double); memset(pout, 0, sizeof(double) * ng); if(narm) { for(int i = 0, gi; i != l; ++i) { if(ISNAN(px[i]) || ISNAN(pw[i])) continue; gi = pg[i]-1; pout[gi] += px[i] * pw[i]; sumw[gi] += pw[i]; } for(int i = ng; i--; ) { if(sumw[i] == 0) pout[i] = NA_REAL; else pout[i] /= sumw[i]; } } else { for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; pout[gi] += px[i] * pw[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. sumw[gi] += pw[i]; } for(int i = ng; i--; ) pout[i] /= sumw[i]; } R_Free(sumw); } double fmean_int_impl(const int *restrict px, const int narm, const int l) { long long mean; double dmean; if(narm) { int j = l-1, k = 1; while(px[j] == NA_INTEGER && j!=0) --j; mean = px[j]; if(j == 0 && px[j] == NA_INTEGER) return NA_REAL; for(int i = j; i--; ) { if(px[i] == NA_INTEGER) continue; mean += px[i]; ++k; } dmean = (double)mean / k; } else { mean = 0; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) return NA_REAL; mean += px[i]; } dmean = (double)mean / l; } return dmean; } double fmean_int_omp_impl(const int *restrict px, const int narm, const int l, const int nthreads) { long long mean = 0; double dmean; if(narm) { int n = 0; #pragma omp parallel for simd num_threads(nthreads) reduction(+:mean,n) for(int i = 0; i < l; ++i) { int tmp = px[i] != NA_INTEGER; mean += tmp ? px[i] : 0; n += tmp ? 1 : 0; } dmean = n == 0 ? NA_REAL : (double)mean / n; } else { if(px[0] == NA_INTEGER || px[l-1] == NA_INTEGER) return NA_REAL; #pragma omp parallel for simd num_threads(nthreads) reduction(+:mean) for(int i = 0; i < l; ++i) mean += px[i]; dmean = (double)mean / l; } return dmean; } void fmean_int_g_impl(double *restrict pout, const int *restrict px, const int ng, const int *restrict pg, const int *restrict pgs, const int narm, const int l) { memset(pout, 0, sizeof(double) * ng); if(narm) { int *restrict n = (int*)R_Calloc(ng, int); for(int i = 0, gi; i != l; ++i) { if(px[i] == NA_INTEGER) continue; gi = pg[i]-1; pout[gi] += px[i]; ++n[gi]; } for(int i = ng; i--; ) { if(n[i] == 0) pout[i] = NA_REAL; else pout[i] /= n[i]; } R_Free(n); } else { --pout; for(int i = l; i--; ) { pout[pg[i]] += px[i] == NA_INTEGER ? NA_REAL : px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } ++pout; for(int i = ng; i--; ) pout[i] /= pgs[i]; } } SEXP fmeanC(SEXP x, SEXP Rng, SEXP g, SEXP gs, SEXP w, SEXP Rnarm, SEXP Rnthreads) { const int l = length(x), ng = asInteger(Rng), narm = asLogical(Rnarm), nwl = isNull(w); int tx = TYPEOF(x), nthreads = asInteger(Rnthreads), nprotect = 1, *restrict pgs = &nprotect; // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0 && nwl) { // switch(tx) { // case INTSXP: return ALTINTEGER_SUM(x, (Rboolean)narm); // case LGLSXP: return ALTLOGICAL_SUM(x, (Rboolean)narm); // case REALSXP: return ALTREAL_SUM(x, (Rboolean)narm); // default: error("ALTREP object must be integer or real typed"); // } // } if(l < 1) return tx == REALSXP ? x : allocVector(REALSXP, 0); // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(nthreads > max_threads) nthreads = max_threads; if(l < 100000) nthreads = 1; // No improvements from multithreading on small data. if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(REALSXP, ng == 0 ? 1 : ng)); if(nwl) { if(ng && !narm) { if(length(gs) == ng) pgs = INTEGER(gs); else { // TODO: this is probably slower than narm, which requires only one loop... SEXP gs_ = PROTECT(allocVector(INTSXP, ng)); ++nprotect; pgs = INTEGER(gs_); memset(pgs, 0, sizeof(int) * ng); for(int i = 0, *restrict pg = INTEGER(g); i != l; ++i) ++pgs[pg[i]-1]; } } switch(tx) { case REALSXP: { if(ng > 0) fmean_double_g_impl(REAL(out), REAL(x), ng, INTEGER(g), pgs, narm, l); else REAL(out)[0] = (nthreads <= 1) ? fmean_double_impl(REAL(x), narm, l) : fmean_double_omp_impl(REAL(x), narm, l, nthreads); break; } case INTSXP: { if(ng > 0) fmean_int_g_impl(REAL(out), INTEGER(x), ng, INTEGER(g), pgs, narm, l); else REAL(out)[0] = nthreads <= 1 ? fmean_int_impl(INTEGER(x), narm, l) : fmean_int_omp_impl(INTEGER(x), narm, l, nthreads); break; } default: error("Unsupported SEXP type: '%s'", type2char(tx)); } } else { if(l != length(w)) error("length(w) must match length(x)"); if(TYPEOF(w) != REALSXP) { if(TYPEOF(w) != INTSXP && TYPEOF(w) != LGLSXP) error("weights must be double or integer"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } if(tx != REALSXP) { if(tx != INTSXP) error("Unsupported SEXP type: '%s'", type2char(tx)); x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } double *restrict px = REAL(x), *restrict pw = REAL(w); if(ng == 0) { REAL(out)[0] = (nthreads <= 1) ? fmean_weights_impl(px, pw, narm, l) : fmean_weights_omp_impl(px, pw, narm, l, nthreads); } else fmean_weights_g_impl(REAL(out), px, ng, INTEGER(g), pw, narm, l); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); // For example "Units" objects... UNPROTECT(nprotect); return out; } SEXP fmeanmC(SEXP x, SEXP Rng, SEXP g, SEXP gs, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); const int l = INTEGER(dim)[0], col = INTEGER(dim)[1], *restrict pg = INTEGER(g), ng = asInteger(Rng), narm = asLogical(Rnarm); int tx = TYPEOF(x), nthreads = asInteger(Rnthreads), nprotect = 1, *restrict pgs = &nprotect; if(l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(l*col < 100000) nthreads = 1; // No gains from multithreading on small data if(nthreads > max_threads) nthreads = max_threads; if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(REALSXP, ng == 0 ? col : col * ng)); double *restrict pout = REAL(out); if(isNull(w)) { if(ng && !narm) { if(length(gs) == ng) pgs = INTEGER(gs); else { SEXP gs_ = PROTECT(allocVector(INTSXP, ng)); ++nprotect; pgs = INTEGER(gs_); memset(pgs, 0, sizeof(int) * ng); for(int i = 0, *restrict pg = INTEGER(g); i != l; ++i) ++pgs[pg[i]-1]; } } switch(tx) { case REALSXP: { const double *px = REAL(x); if(ng == 0) { if(nthreads <= 1) { for(int j = 0; j != col; ++j) pout[j] = fmean_double_impl(px + j*l, narm, l); } else if(col >= nthreads) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pout[j] = fmean_double_impl(px + j*l, narm, l); } else { for(int j = 0; j != col; ++j) pout[j] = fmean_double_omp_impl(px + j*l, narm, l, nthreads); } } else { if(nthreads <= 1 || col == 1) { for(int j = 0; j != col; ++j) fmean_double_g_impl(pout + j*ng, px + j*l, ng, pg, pgs, narm, l); } else { if(nthreads > col) nthreads = col; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) fmean_double_g_impl(pout + j*ng, px + j*l, ng, pg, pgs, narm, l); } } break; } case INTSXP: { const int *px = INTEGER(x); if(ng > 0) { if(nthreads <= 1 || col == 1) { for(int j = 0; j != col; ++j) fmean_int_g_impl(pout + j*ng, px + j*l, ng, pg, pgs, narm, l); } else { if(nthreads > col) nthreads = col; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) fmean_int_g_impl(pout + j*ng, px + j*l, ng, pg, pgs, narm, l); } } else { if(nthreads <= 1) { for(int j = 0; j != col; ++j) pout[j] = fmean_int_impl(px + j*l, narm, l); } else if(col >= nthreads) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pout[j] = fmean_int_impl(px + j*l, narm, l); } else { for(int j = 0; j != col; ++j) pout[j] = fmean_int_omp_impl(px + j*l, narm, l, nthreads); } } break; } default: error("Unsupported SEXP type: '%s'", type2char(tx)); } } else { if(l != length(w)) error("length(w) must match nrow(x)"); if(TYPEOF(w) != REALSXP) { if(TYPEOF(w) != INTSXP && TYPEOF(w) != LGLSXP) error("weights must be double or integer"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } if(tx != REALSXP) { if(tx != INTSXP) error("Unsupported SEXP type: '%s'", type2char(tx)); x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } double *px = REAL(x), *restrict pw = REAL(w), *pout = REAL(out); if(ng == 0) { if(nthreads <= 1) { for(int j = 0; j != col; ++j) pout[j] = fmean_weights_impl(px + j*l, pw, narm, l); } else if(col >= nthreads) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pout[j] = fmean_weights_impl(px + j*l, pw, narm, l); } else { for(int j = 0; j != col; ++j) pout[j] = fmean_weights_omp_impl(px + j*l, pw, narm, l, nthreads); } } else { if(nthreads <= 1 || col == 1) { for(int j = 0; j != col; ++j) fmean_weights_g_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l); } else { if(nthreads > col) nthreads = col; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) fmean_weights_g_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l); } } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(nprotect); return out; } // For safe multithreading across data frame columns double fmean_impl_dbl(SEXP x, int narm, int nthreads) { int l = length(x); if(l < 1) return NA_REAL; if(nthreads <= 1) switch(TYPEOF(x)) { case REALSXP: return fmean_double_impl(REAL(x), narm, l); case LGLSXP: case INTSXP: return fmean_int_impl(INTEGER(x), narm, l); default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } switch(TYPEOF(x)) { case REALSXP: return fmean_double_omp_impl(REAL(x), narm, l, nthreads); case LGLSXP: case INTSXP: return fmean_int_omp_impl(INTEGER(x), narm, l, nthreads); default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } } SEXP fmean_impl_SEXP(SEXP x, int narm, int nthreads) { return ScalarReal(fmean_impl_dbl(x, narm, nthreads)); } double fmean_w_impl_dbl(SEXP x, double *pw, int narm, int nthreads) { int l = length(x); if(l < 1) return NA_REAL; if(TYPEOF(x) != REALSXP) { if(TYPEOF(x) != INTSXP && TYPEOF(x) != LGLSXP) error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); x = PROTECT(coerceVector(x, REALSXP)); double res = (nthreads <= 1) ? fmean_weights_impl(REAL(x), pw, narm, l) : fmean_weights_omp_impl(REAL(x), pw, narm, l, nthreads); UNPROTECT(1); return res; } return (nthreads <= 1) ? fmean_weights_impl(REAL(x), pw, narm, l) : fmean_weights_omp_impl(REAL(x), pw, narm, l, nthreads); } SEXP fmean_w_impl_SEXP(SEXP x, double *pw, int narm, int nthreads) { return ScalarReal(fmean_w_impl_dbl(x, pw, narm, nthreads)); } SEXP fmean_g_impl(SEXP x, const int ng, const int *pg, const int *pgs, int narm) { int l = length(x); if(l < 1) return ScalarReal(NA_REAL); SEXP res = PROTECT(allocVector(REALSXP, ng)); switch(TYPEOF(x)) { case REALSXP: fmean_double_g_impl(REAL(res), REAL(x), ng, pg, pgs, narm, l); break; case LGLSXP: case INTSXP: fmean_int_g_impl(REAL(res), INTEGER(x), ng, pg, pgs, narm, l); break; default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } void fmean_g_omp_impl(SEXP x, void *pres, const int ng, const int *pg, const int *pgs, int narm) { switch(TYPEOF(x)) { case REALSXP: fmean_double_g_impl(pres, REAL(x), ng, pg, pgs, narm, length(x)); break; case LGLSXP: case INTSXP: fmean_int_g_impl(pres, INTEGER(x), ng, pg, pgs, narm, length(x)); break; default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } } SEXP fmean_wg_impl(SEXP x, const int ng, const int *pg, double *pw, int narm) { int l = length(x), nprotect = 1; if(l < 1) return ScalarReal(NA_REAL); if(TYPEOF(x) != REALSXP) { if(TYPEOF(x) != INTSXP && TYPEOF(x) != LGLSXP) error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } SEXP res = PROTECT(allocVector(REALSXP, ng)); fmean_weights_g_impl(REAL(res), REAL(x), ng, pg, pw, narm, l); if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(nprotect); return res; } #undef COLWISE_FMEAN_LIST #define COLWISE_FMEAN_LIST(FUN, WFUN) \ if(nwl) { \ if(nthreads > 1 && l >= nthreads) { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < l; ++j) pout[j] = FUN(px[j], narm, 1); \ } else { \ for(int j = 0; j != l; ++j) pout[j] = FUN(px[j], narm, nthreads); \ } \ } else { \ double *restrict pw = REAL(w); \ if(nthreads > 1 && l >= nthreads) { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < l; ++j) pout[j] = WFUN(px[j], pw, narm, 1); \ } else { \ for(int j = 0; j != l; ++j) pout[j] = WFUN(px[j], pw, narm, nthreads); \ } \ } SEXP fmeanlC(SEXP x, SEXP Rng, SEXP g, SEXP gs, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads) { int l = length(x), ng = asInteger(Rng), nthreads = asInteger(Rnthreads), nwl = isNull(w), narm = asLogical(Rnarm), nprotect = 1; // TODO: Disable multithreading if overall data size is small? if(l < 1) return x; // needed ?? if(nthreads > max_threads) nthreads = max_threads; if(!nwl) { if(length(VECTOR_ELT(x, 0)) != length(w)) error("length(w) must match nrow(x)"); if(TYPEOF(w) != REALSXP) { if(TYPEOF(w) != INTSXP && TYPEOF(w) != LGLSXP) error("weights must be double or integer"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } } if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)); const SEXP *restrict px = SEXPPTR_RO(x); double *restrict pout = REAL(out); COLWISE_FMEAN_LIST(fmean_impl_dbl, fmean_w_impl_dbl); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(nprotect); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)), *restrict pout = SEXPPTR(out); const SEXP *restrict px = SEXPPTR_RO(x); if(ng == 0) { COLWISE_FMEAN_LIST(fmean_impl_SEXP, fmean_w_impl_SEXP); // Needed because including it in an OpenMP loop together with ScalarReal() is not thread safe for(int j = 0; j < l; ++j) { SEXP xj = px[j]; if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, pout[j]); } } else { if(length(VECTOR_ELT(x, 0)) != length(g)) error("length(g) must match length(x)"); const int *restrict pg = INTEGER(g); if(nthreads > l) nthreads = l; if(nwl) { // no weights int *restrict pgs = &nprotect; if(!narm) { if(length(gs) == ng) pgs = INTEGER(gs); else { SEXP gs_ = PROTECT(allocVector(INTSXP, ng)); ++nprotect; pgs = INTEGER(gs_); memset(pgs, 0, sizeof(int) * ng); for(int i = 0, nrx = length(g); i != nrx; ++i) ++pgs[pg[i]-1]; } } if(nthreads > 1 && l > 1) { for(int j = 0; j != l; ++j) { SEXP xj = px[j], outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ng)); if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, outj); } #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) fmean_g_omp_impl(px[j], DPTR(pout[j]), ng, pg, pgs, narm); } else { for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fmean_g_impl(px[j], ng, pg, pgs, narm)); } } else { double *restrict pw = REAL(w); if(nthreads > 1 && l > 1) { int nrx = length(g); for(int j = 0, dup = 0; j != l; ++j) { SEXP xj = px[j], outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ng)); if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, outj); if(TYPEOF(xj) != REALSXP) { if(TYPEOF(xj) != INTSXP && TYPEOF(xj) != LGLSXP) error("Unsupported SEXP type: '%s'", type2char(TYPEOF(xj))); if(dup == 0) {x = PROTECT(shallow_duplicate(x)); ++nprotect; dup = 1;} SET_VECTOR_ELT(x, j, coerceVector(xj, REALSXP)); px = SEXPPTR_RO(x); // Fix suggested by ChatGPT } } #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) fmean_weights_g_impl(REAL(pout[j]), REAL(px[j]), ng, pg, pw, narm, nrx); } else { for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fmean_wg_impl(px[j], ng, pg, pw, narm)); } } } DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } collapse/src/flag.cpp0000644000176200001440000012675215113724755014307 0ustar liggesusers#include using namespace Rcpp; LogicalVector intToLogical(IntegerVector x) { return LogicalVector(x.begin(), x.end()); } // 7th version: Irregular time series and panels supported ! template Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { // typedef typename Rcpp::traits::storage_type::type storage_t; // storage_t fil; Vector fil(1); if(Rf_isNull(fill)) { // fill != fill // Not necessary !! fil = Vector::get_na(); } else { fil = as >(fill); //as(fill); -> doesn't work for Character vector fill !! } auto ff = fil[0]; int l = x.size(), ns = n.size(), prev = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = 0; i != ns; ++i) { if(n[i] == prev) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot !! prev = n[i]; if(prev < 0) { if(prev == NA_INTEGER) stop("NA in n"); absn[i] = -prev; } else absn[i] = prev; } if(ns == 1) names = false; CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; // NumericVector(abs(n)) CharacterVector colnam = names ? no_init_vector(ns) : no_init_vector(1); Matrix out = no_init_matrix(l, ns); if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; int i = 0; while(i != np) outp[i++] = ff; for( ; i != l; ++i) outp[i] = x[i - np]; } else if(np<0) { if(names) colnam[p] = "F" + nc[p]; int i = l, st = l+np; while(i != st) outp[--i] = ff; for( ; i--; ) outp[i] = x[i - np]; } else { if(names) colnam[p] = "--"; outp = x; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); IntegerVector omap(osize), ord2 = no_init_vector(l); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } // return as >(omap); for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else if(np<0) { if(names) colnam[p] = "F" + nc[p]; for(int i = 0, osnp = osize+np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else { if(names) colnam[p] = "--"; outp = x; } } } } else { // With groups if(l != g.size()) stop("length(x) must match length(g)"); int ags = l/ng, ngp = ng+1; if(Rf_isNull(t)) { // Ordered data // int seen[ngp], memsize = sizeof(int)*ngp; for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) { outp[i] = x[i-np]; } else { outp[i] = ff; ++seen[g[i]]; } } } else if(np<0) { std::vector seen(ngp); // memset(seen, 0, memsize); if(names) colnam[p] = "F" + nc[p]; for(int i = l; i--; ) { // good?? if(seen[g[i]] == np) { outp[i] = x[i-np]; } else { outp[i] = ff; --seen[g[i]]; } } } else { if(names) colnam[p] = "--"; outp = x; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); IntegerVector omap(temp), ord2 = no_init_vector(l); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; temp = cgs[g[i]] + ord2[i]; if(omap[temp]) stop("Repeated values of timevar within one or more groups"); omap[temp] = i+1; // needed to add 1 to distinguish between 0 and gap } for(int p = ns; p--; ) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outp = out( _ , p); if(np>0) { if(names) colnam[p] = "L" + nc[p]; for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else if(np<0) { if(names) colnam[p] = "F" + nc[p]; for(int i = 0; i != l; ++i) { if(ord2[i] < max[g[i]]+np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = x[temp-1]; } else { outp[i] = ff; } } } else { if(names) colnam[p] = "--"; outp = x; } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ns != 1) { Rf_setAttrib(out, R_NamesSymbol, R_NilValue); Rf_dimgets(out, Dimension(l, ns)); if(Rf_isObject(x)) { // && !x.inherits("pseries") -> lag matrix in plm is not a pseries anymore anyway... CharacterVector classes = Rf_getAttrib(out, R_ClassSymbol); classes.push_back("matrix"); // classes.push_back("array"); // mts does not have class array... Rf_classgets(out, classes); } // else { // Rf_classgets(out, Rf_mkString("matrix")); // } if(names) Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), colnam)); // out.attr("class") = CharacterVector::create(x.attr("class"),"matrix"); } return out; } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Vector flagleadCppImpl(const Vector& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP flagleadCpp(SEXP x, IntegerVector n = 1, SEXP fill = R_NilValue, int ng = 0, IntegerVector g = 0, SEXP t = R_NilValue, bool names = true){ RCPP_RETURN_VECTOR(flagleadCppImpl, x, n, fill, ng, g, t, names); } inline SEXP coln_check(SEXP x) { if(Rf_isNull(x)) return NA_STRING; else return x; // Rf_coerceVector(x, STRSXP); } template Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { Vector fil(1); if(Rf_isNull(fill)) { // || fill != fill not necessary !! fil = Vector::get_na(); } else { fil = as >(fill); } auto ff = fil[0]; int l = x.nrow(), col = x.ncol(), ns = n.size(), pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = 0; i != ns; ++i) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot !! pos = n[i]; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; // NumericVector(abs(n)) CharacterVector colnam = names ? no_init_vector(col*ns) : no_init_vector(1); // what if no names ?? CharacterVector coln = names ? coln_check(colnames(x)) : NA_STRING; if(names && coln[0] == NA_STRING) names = false; Matrix out = no_init_matrix(l, col*ns); if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; int i = 0; while(i != np) outj[i++] = ff; for( ; i != l; ++i) outj[i] = column[i - np]; } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; int i = l, st = l+np; while(i != st) outj[--i] = ff; for( ; i--; ) outj[i] = column[i - np]; } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); IntegerVector omap(osize), ord2 = no_init_vector(l); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > l) stop("lag-length exceeds length of vector"); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; for(int i = 0, osnp = osize+np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } } else { // With groups if(l != g.size()) stop("length(x) must match length(g)"); int ags = l/ng, ngp = ng+1; if(Rf_isNull(t)) { // Ordered data // int seen[ngp], memsize = sizeof(int)*ngp; for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) { outj[i] = column[i-np]; } else { outj[i] = ff; ++seen[g[i]]; } } } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { // good?? if(seen[g[i]] == np) { outj[i] = column[i-np]; } else { outj[i] = ff; --seen[g[i]]; } } } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); IntegerVector omap(temp), ord2 = no_init_vector(l), index = no_init_vector(l); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } for(int j = 0; j != col; ++j) { ConstMatrixColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); MatrixColumn outj = out( _ , pos); if(np>0) { if(names) colnam[pos] = "L" + nc[p] + "." + coln[j]; for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else if(np<0) { if(names) colnam[pos] = "F" + nc[p] + "." + coln[j]; for(int i = 0; i != l; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outj[i] = column[temp-1]; } else { outj[i] = ff; } } } else { if(names) colnam[pos] = coln[j]; outj = column; } ++pos; } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ns != 1) Rf_dimgets(out, Dimension(l, col*ns)); if(names) { Rf_dimnamesgets(out, List::create(rownames(x), colnam)); // colnames(out) = colnam deletes row names ! } else if(ns != 1) { Rf_setAttrib(out, R_DimNamesSymbol, R_NilValue); } return out; } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } template <> Matrix flagleadmCppImpl(const Matrix& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP flagleadmCpp(SEXP x, IntegerVector n = 1, SEXP fill = R_NilValue, int ng = 0, IntegerVector g = 0, SEXP t = R_NilValue, bool names = true){ RCPP_RETURN_MATRIX(flagleadmCppImpl, x, n, fill, ng, g, t, names); } // [[Rcpp::export]] List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = R_NilValue, int ng = 0, const IntegerVector& g = 0, const SEXP& t = R_NilValue, bool names = true) { bool lfill = Rf_isNull(fill); if(!lfill && TYPEOF(fill) == LGLSXP) lfill = Rf_asLogical(fill) == NA_LOGICAL; int l = x.size(), ns = n.size(), pos = INT_MAX; List out(l * ns); IntegerVector absn = no_init_vector(ns); for(int i = 0; i != ns; ++i) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot !! pos = n[i]; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; // NumericVector(abs(n)) CharacterVector nam = names ? no_init_vector(l*ns) : no_init_vector(1); // what if no names ?? CharacterVector na = names ? coln_check(Rf_getAttrib(x, R_NamesSymbol)) : NA_STRING; if(names && na[0] == NA_STRING) names = false; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; int row = column.size(); double ff = lfill ? NA_REAL : Rf_asReal(fill); // as() for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > row) stop("lag-length exceeds length of vector"); if(np>0) { NumericVector outjp = no_init_vector(row); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; int i = 0; while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; int i = row, st = row+np; while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; int row = column.size(); int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as() for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > row) stop("lag-length exceeds length of vector"); if(np>0) { IntegerVector outjp = no_init_vector(row); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; int i = 0; while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; int i = row, st = row+np; while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; int row = column.size(); // String ff = lfill ? NA_STRING : as(fill); // String SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > row) stop("lag-length exceeds length of vector"); if(np>0) { CharacterVector outjp = no_init_vector(row); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; int i = 0; while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; int i = row, st = row+np; while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int min = INT_MAX, max = INT_MIN, osize, temp, os = ord.size(); if(Rf_length(x[0]) != os) stop("nrow(x) must match length(t)"); for(int i = 0; i != os; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; if(osize > 10000000 && osize > 3 * os) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); IntegerVector omap(osize), ord2 = no_init_vector(os); for(int i = 0; i != os; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; if(os != column.size()) stop("nrow(x) must match length(t)"); double ff = lfill ? NA_REAL : Rf_asReal(fill); // as( for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > os) stop("lag-length exceeds length of vector"); if(np>0) { NumericVector outjp = no_init_vector(os); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0, osnp = osize+np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; if(os != column.size()) stop("length(x) must match length(t)"); int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as( for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > os) stop("lag-length exceeds length of vector"); if(np>0) { IntegerVector outjp = no_init_vector(os); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0, osnp = osize+np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; if(os != column.size()) stop("length(x) must match length(t)"); // String ff = lfill ? NA_STRING : as(fill); // String ?? SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > os) stop("lag-length exceeds length of vector"); if(np>0) { CharacterVector outjp = no_init_vector(os); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0, osnp = osize+np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } } else { // With groups int gss = g.size(), ags = gss/ng, ngp = ng+1, temp = 0; if(Rf_isNull(t)) { // Ordered data std::vector seen(ngp); // int seen[ngp], memsize = sizeof(int)*ngp; for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; double ff = lfill ? NA_REAL : Rf_asReal(fill); // as() if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; seen.assign(ngp, 0); // std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; ++seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { // good?? if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; --seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as() if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; ++seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { // good?? if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; --seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; // String ff = lfill ? NA_STRING : as(fill); // String ?? SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; ++seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; seen.assign(ngp, 0); //std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { // good?? if(seen[g[i]] == np) { outjp[i] = column[i-np]; } else { outjp[i] = ff; --seen[g[i]]; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(gss != ord.size()) stop("length(g) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); // return List::create(min, max); // Note: INT_MIN is the same as NA_INTEGER for(int i = 0; i != gss; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; // + max[i] - min[i] + 1; } // if(min[ng] == NA_INTEGER) stop("Timevar contains missing values"); // if(min[ng] != INT_MAX) { // max[ng] -= min[ng] - 1; // temp += max[ng]; // } // return List::create(cgs, min, max); // index stores the position of the current observation in the ordered vector // omap provides the ordering to order the vector (needed to find previous / next values) if(temp > 10000000 && temp > 3 * gss) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); IntegerVector omap(temp), ord2 = no_init_vector(gss), index = no_init_vector(gss); for(int i = 0; i != gss; ++i) { ord2[i] = ord[i] - min[g[i]]; // Need ord2 can get rid of any part ?? ?? // if(ord2[i] >= gsv[g[i]-1]) stop("Gaps in timevar within one or more groups"); index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } // return List::create(cgs, min, max, ord2, index, omap); for(int j = 0; j != l; ++j) { int txj = TYPEOF(x[j]); switch(txj) { case REALSXP: { NumericVector column = x[j]; double ff = lfill ? NA_REAL : Rf_asReal(fill); // as() if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { NumericVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } case LGLSXP: case INTSXP: { IntegerVector column = x[j]; int ff = lfill ? NA_INTEGER : Rf_asInteger(fill); // as if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; } ++pos; } break; } case STRSXP: { CharacterVector column = x[j]; // String ff = lfill ? NA_STRING : as(fill); SEXP ff = lfill ? NA_STRING : Rf_asChar(fill); if(gss != column.size()) stop("length(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p] > ags) warning("lag-length exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "L" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else if(np<0) { CharacterVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; for(int i = 0; i != gss; ++i) { // best loop ?? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = column[temp-1]; } else { outjp[i] = ff; } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos] = outjp; } else { if(names) nam[pos] = na[j]; out[pos] = column; } ++pos; } break; } default: stop("Not supported SEXP type!"); } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(names) { // best way to code this ?? Rf_namesgets(out, nam); } else { if(ns != 1) Rf_setAttrib(out, R_NamesSymbol, R_NilValue); } return out; } collapse/src/pivot.c0000644000176200001440000003716015114164360014160 0ustar liggesusers#include "collapse_c.h" #define NISNAN_COMPLEX(x) (NISNAN(x.r) && NISNAN(x.i)) // Needed ?? rbindlist() is already pretty fast... // SEXP pivot_long_replicate_id_columns(SEXP data, SEXP times) { // // } // Helper for pivot_long void writeValueByIndex(SEXP target, SEXP source, const int from, SEXP index) { const int tt = TYPEOF(target), coerce = TYPEOF(source) != tt, li = length(index); if(coerce) source = PROTECT(coerceVector(source, tt)); if(length(source) < li) error("Attempting to write %d elements to a vector of length %d", li, length(source)); if(TYPEOF(index) != INTSXP) error("Indices must be integers"); const int *restrict pi = INTEGER(index); // TODO: SIMD?? switch(tt) { case INTSXP: case LGLSXP: { const int *restrict ps = INTEGER_RO(source)-1; int *restrict pt = INTEGER(target)+from; for(int i = 0; i != li; ++i) pt[i] = ps[pi[i]]; break; } case REALSXP: { const double *restrict ps = REAL_RO(source)-1; double *restrict pt = REAL(target)+from; for(int i = 0; i != li; ++i) pt[i] = ps[pi[i]]; break; } case CPLXSXP: { const Rcomplex *restrict ps = COMPLEX_RO(source)-1; Rcomplex *restrict pt = COMPLEX(target)+from; for(int i = 0; i != li; ++i) pt[i] = ps[pi[i]]; break; } case RAWSXP: { const Rbyte *restrict ps = RAW_RO(source)-1; Rbyte *restrict pt = RAW(target)+from; for(int i = 0; i != li; ++i) pt[i] = ps[pi[i]]; break; } case STRSXP: case VECSXP: case EXPRSXP: { const SEXP *restrict ps = SEXPPTR_RO(source)-1; SEXP *restrict pt = SEXPPTR(target)+from; for(int i = 0; i != li; ++i) pt[i] = ps[pi[i]]; break; } default: error("Unsupported SEXP type: '%s'", type2char(tt)); } if(coerce == 0) return; UNPROTECT(1); } SEXP pivot_long(SEXP data, SEXP ind, SEXP idcol) { if(TYPEOF(data) != VECSXP) error("pivot_long: input data is of type '%s', but needs to be a list", type2char(TYPEOF(data))); const int l = length(data); if(l == 1 && isNull(ind) && !asLogical(idcol)) return VECTOR_ELT(data, 0); if(l == 0) error("pivot_long: input data needs to have 1 or more columns. Current number of columns: 0"); const SEXP *pd = SEXPPTR_RO(data), *pind = pd; if(!isNull(ind)) { if(TYPEOF(ind) != VECSXP) error("pivot_long with missing value removal: list of indices of type '%s', but needs to be a list", type2char(TYPEOF(ind))); if(length(ind) != l) error("length(data) must match length(indlist)"); pind = SEXPPTR_RO(ind); } int max_type = 0, distinct_types = 0, len = 0; for (int j = 0, tj, tj_first = TYPEOF(pd[0]), oj, oj_first = isObject(pd[0]); j != l; ++j) { tj = TYPEOF(pd[j]); oj = isObject(pd[j]); len += length(pind[j]); if(tj > max_type) max_type = tj; if(tj != tj_first || oj != oj_first) distinct_types = 1; } SEXP res; // Case 1: no indices, which means we simply melt a single column: same as rbindlist() if(isNull(ind)) { res = PROTECT(allocVector(max_type, len)); len = 0; for (int j = 0; j != l; ++j) { int tmp = length(pd[j]); writeValue(res, pd[j], len, tmp); // from data.table_rbindlist.c len += tmp; } } else { // Now the more interesting case: we have a list of indices for the non-missing cases of each column. res = PROTECT(allocVector(max_type, len)); len = 0; for (int j = 0; j != l; ++j) { writeValueByIndex(res, pd[j], len, pind[j]); // See above len += length(pind[j]); } } if(distinct_types == 0) { copyMostAttrib(pd[0], res); // setAttrib(res, sym_label, R_NilValue); // better to keep, this is also used for id-columns if na.rm = TRUE } // Add ID column if(asLogical(idcol)) { SEXP names = PROTECT(getAttrib(data, R_NamesSymbol)); // PROTECT() not really necessary but RCHK gives warning SEXP result = PROTECT(allocVector(VECSXP, 2)); SEXP id_column; SET_VECTOR_ELT(result, 0, id_column = allocVector(isNull(names) ? INTSXP : STRSXP, length(res))); SET_VECTOR_ELT(result, 1, res); if(isNull(names)) { int *restrict pid = INTEGER(id_column); for (int j = 0, end = 0, v = 1; j != l; ++j) { end = length(pind[j]); // SIMD?? for (int i = 0; i != end; ++i) pid[i] = v; pid += end; ++v; } } else { SEXP *restrict pid = SEXPPTR(id_column); const SEXP *pnam = SEXPPTR_RO(names); for (int j = 0, end = 0; j != l; ++j) { SEXP namj = pnam[j]; end = length(pind[j]); // SIMD?? for (int i = 0; i != end; ++i) pid[i] = namj; pid += end; } } UNPROTECT(3); return result; } UNPROTECT(1); return res; } int aggFUNtI(SEXP x) { if(TYPEOF(x) != STRSXP) error("Internal FUN must be a character string"); const char * r = CHAR(STRING_ELT(x, 0)); // translateCharUTF8() if(strcmp(r, "last") == 0) return 1; if(strcmp(r, "first") == 0) return 2; if(strcmp(r, "count") == 0) return 3; if(strcmp(r, "sum") == 0) return 4; if(strcmp(r, "mean") == 0) return 5; if(strcmp(r, "min") == 0) return 6; if(strcmp(r, "max") == 0) return 7; error("Unsupported internal FUN: %s", r); } // Implementation for categorical functions #define AGGFUN_SWITCH_CAT(TYPEACC, NONMISSCHECK) \ switch(aggfun) { \ case 1: { /* last */ \ if(nthreads <= 1 || narm) { \ if(narm) { \ for(int i = 0; i != l; ++i) if(NONMISSCHECK) TYPEACC(pout[pid[i]])[pix[i]-1] = pc[i];\ } else { \ for(int i = 0; i != l; ++i) TYPEACC(pout[pid[i]])[pix[i]-1] = pc[i]; \ } \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int i = 0; i < l; ++i) TYPEACC(pout[pid[i]])[pix[i]-1] = pc[i]; \ } \ } break; \ case 2: { /* first: no multithreading because backwards */ \ if(narm) { \ for(int i = l; i--; ) if(NONMISSCHECK) TYPEACC(pout[pid[i]])[pix[i]-1] = pc[i]; \ } else { \ for(int i = l; i--; ) TYPEACC(pout[pid[i]])[pix[i]-1] = pc[i]; \ } \ } break; \ case 3: { /* count: no multithreading because possible race condition */ \ if(narm) { \ for(int i = 0; i != l; ++i) INTEGER(pout[pid[i]])[pix[i]-1] += NONMISSCHECK; \ } else { \ for(int i = 0; i != l; ++i) INTEGER(pout[pid[i]])[pix[i]-1]++; \ } \ } break; \ } // Implementation for numeric functions #define AGGFUN_SWITCH_NUM(tdef, TYPEACC, NONMISSCHECK, ISMISS) \ switch(aggfun) { \ case 4: { /* sum: no multithreading because possible race condition */ \ for(int i = 0; i != l; ++i) if(NONMISSCHECK) DBL_DATAPTR(pout[pid[i]])[pix[i]-1] += pc[i]; \ } break; \ case 5: { /* mean: no multithreading because possible race condition */ \ int *restrict count = (int*)R_Calloc(nr*nc+1, int); \ double *meani = DBL_DATAPTR(pout[1]); \ for(int i = 0; i != l; ++i) { \ if(NONMISSCHECK) { \ meani = DBL_DATAPTR(pout[pid[i]])-1; \ if(ISMISS(meani[pix[i]])) { \ meani[pix[i]] = pc[i]; \ ++count[(pid[i]-1)*nr+pix[i]]; \ continue; \ } \ meani[pix[i]] += (pc[i] - meani[pix[i]]) / ++count[(pid[i]-1)*nr+pix[i]]; \ } \ } \ R_Free(count); \ } break; \ case 6: { /* min: no multithreading because possible race condition */ \ tdef *mini = TYPEACC(pout[1]); \ for(int i = 0; i != l; ++i) { \ if(NONMISSCHECK) { \ mini = TYPEACC(pout[pid[i]])-1; \ if(pc[i] < mini[pix[i]] || ISMISS(mini[pix[i]])) mini[pix[i]] = pc[i]; \ } \ } \ } break; \ case 7: { /* max: no multithreading because possible race condition */ \ tdef *maxi = TYPEACC(pout[1]); \ for(int i = 0; i != l; ++i) { \ if(NONMISSCHECK) { \ maxi = TYPEACC(pout[pid[i]])-1; \ if(pc[i] > maxi[pix[i]] || ISMISS(maxi[pix[i]])) maxi[pix[i]] = pc[i]; \ } \ } \ } break; \ } #define ISMISS_INTDBL(x) ((x) == NA_INTEGER || (x) != (x)) // TODO: How to check for duplicate rows? SEXP pivot_wide(SEXP index, SEXP id, SEXP column, SEXP fill, SEXP Rnthreads, SEXP Raggfun, SEXP Rnarm) { const int *restrict pix = INTEGER_RO(index), *restrict pid = INTEGER_RO(id), l = length(index), nr = asInteger(getAttrib(index, sym_n_groups)), nc = asInteger(getAttrib(id, sym_n_groups)), tx = TYPEOF(column), aggfun = aggFUNtI(Raggfun); int narm = asInteger(Rnarm); if(l != length(id)) error("Internal error: length(index) must match length(id)"); if(l != length(column)) error("Internal error: length(index) must match length(column)"); if(nr < 1 || nc < 1) error("Resulting data frame after pivoting needs to have at least one row and column"); int nthreads = asInteger(Rnthreads); if(l < 100000) nthreads = 1; // No improvements from multithreading on small data. if(nthreads > max_threads) nthreads = max_threads; SEXP out = PROTECT(allocVector(VECSXP, nc)); const SEXP *restrict pout = SEXPPTR_RO(out)-1; SEXP out1; if(aggfun < 3 || aggfun > 4) { SEXP fill_val; if(fill == R_NilValue || aggfun > 4) { fill_val = tx == REALSXP || aggfun == 5 ? ScalarReal(NA_REAL) : tx == INTSXP ? ScalarInteger(NA_INTEGER) : tx == LGLSXP ? ScalarLogical(NA_LOGICAL) : tx == STRSXP ? ScalarString(NA_STRING) : tx == CPLXSXP ? ScalarComplex(asComplex(ScalarReal(NA_REAL))) : tx == RAWSXP ? ScalarRaw(0) : R_NilValue; } else if(TYPEOF(fill) == tx) { fill_val = fill; } else fill_val = coerceVector(fill, tx); PROTECT(fill_val); SET_VECTOR_ELT(out, 0, out1 = falloc(fill_val, ScalarInteger(nr), ScalarLogical(1))); UNPROTECT(1); } else { if(aggfun == 3) { // count SET_VECTOR_ELT(out, 0, out1 = allocVector(INTSXP, nr)); memset(INTEGER(out1), 0, nr*sizeof(int)); } else { // sum SET_VECTOR_ELT(out, 0, out1 = allocVector(REALSXP, nr)); memset(REAL(out1), 0, nr*sizeof(double)); } } if(aggfun != 3) copyMostAttrib(column, out1); // TODO: Check that this works!! // TODO: can multithread?? -> NOPE!, as expected for (int j = 1; j < nc; ++j) SET_VECTOR_ELT(out, j, duplicate(out1)); // TODO: SIMD: doesn't vectorize on clang 16. Also multithreading gives only minor performance improvements.. switch(tx) { case INTSXP: case LGLSXP: { const int *restrict pc = INTEGER_RO(column); if(aggfun <= 3) { AGGFUN_SWITCH_CAT(INT_DATAPTR, pc[i] != NA_INTEGER); } else { AGGFUN_SWITCH_NUM(int, INT_DATAPTR, pc[i] != NA_INTEGER, ISMISS_INTDBL); } break; } case REALSXP: { const double *restrict pc = REAL_RO(column); // // cool idea but not really faster... // double *restrict pout_i = REAL(pout[pid[0]])-1; // for(int i = 0, prev = pid[0]; i != l; ++i) { // if(pid[i] != prev) pout_i = REAL(pout[pid[i]])-1; // pout_i[pix[i]] = pc[i]; // } if(aggfun <= 3) { AGGFUN_SWITCH_CAT(DBL_DATAPTR, NISNAN(pc[i])); } else { AGGFUN_SWITCH_NUM(double, DBL_DATAPTR, NISNAN(pc[i]), ISNAN); } break; } case CPLXSXP: { const Rcomplex *restrict pc = COMPLEX_RO(column); if(aggfun <= 3) { AGGFUN_SWITCH_CAT(COMPLEX, NISNAN_COMPLEX(pc[i])); } else { // AGGFUN_SWITCH_NUM(Rcomplex, COMPLEX, NISNAN_COMPLEX(pc[i])); error("Internal aggregation functions sum, mean, min, and max are currently not implemented for complex vectors."); } break; } case RAWSXP: { const Rbyte *pc = RAW_RO(column); if(aggfun > 3) error("Cannot aggregate raw column with sum, mean, min, or max."); narm = 0; // disable missing values with RAW AGGFUN_SWITCH_CAT(RAW, pc[i] != 0xFF); // Sentinel value (= 255) break; } case STRSXP: { const SEXP *restrict pc = SEXPPTR_RO(column); if(aggfun > 3) error("Cannot aggregate character column with sum, mean, min, or max."); AGGFUN_SWITCH_CAT(SEXP_DATAPTR, pc[i] != NA_STRING); break; } case VECSXP: case EXPRSXP: { const SEXP *restrict pc = SEXPPTR_RO(column); if(aggfun > 3) error("Cannot aggregate list column with sum, mean, min, or max."); AGGFUN_SWITCH_CAT(SEXP_DATAPTR, length(pc[i]) != 0); break; } default: error("Unsupported SEXP type: '%s'", type2char(tx)); } UNPROTECT(1); return out; } collapse/src/RcppExports.cpp0000644000176200001440000007037015202475506015655 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // BWCpp NumericVector BWCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, double theta, double set_mean, bool B, bool fill); RcppExport SEXP _collapse_BWCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< bool >::type B(BSEXP); Rcpp::traits::input_parameter< bool >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(BWCpp(x, ng, g, gs, w, narm, theta, set_mean, B, fill)); return rcpp_result_gen; END_RCPP } // BWmCpp NumericMatrix BWmCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, double theta, double set_mean, bool B, bool fill); RcppExport SEXP _collapse_BWmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< bool >::type B(BSEXP); Rcpp::traits::input_parameter< bool >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(BWmCpp(x, ng, g, gs, w, narm, theta, set_mean, B, fill)); return rcpp_result_gen; END_RCPP } // BWlCpp List BWlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, double theta, double set_mean, bool B, bool fill); RcppExport SEXP _collapse_BWlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< bool >::type B(BSEXP); Rcpp::traits::input_parameter< bool >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(BWlCpp(x, ng, g, gs, w, narm, theta, set_mean, B, fill)); return rcpp_result_gen; END_RCPP } // fbstatsCpp SEXP fbstatsCpp(const NumericVector& x, bool ext, int ng, const IntegerVector& g, int npg, const IntegerVector& pg, const SEXP& w, bool stable_algo, bool array, bool setn, const SEXP& gn); RcppExport SEXP _collapse_fbstatsCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP setnSEXP, SEXP gnSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ext(extSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type npg(npgSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type pg(pgSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type array(arraySEXP); Rcpp::traits::input_parameter< bool >::type setn(setnSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gn(gnSEXP); rcpp_result_gen = Rcpp::wrap(fbstatsCpp(x, ext, ng, g, npg, pg, w, stable_algo, array, setn, gn)); return rcpp_result_gen; END_RCPP } // fbstatsmCpp SEXP fbstatsmCpp(const NumericMatrix& x, bool ext, int ng, const IntegerVector& g, int npg, const IntegerVector& pg, const SEXP& w, bool stable_algo, bool array, const SEXP& gn); RcppExport SEXP _collapse_fbstatsmCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ext(extSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type npg(npgSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type pg(pgSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type array(arraySEXP); Rcpp::traits::input_parameter< const SEXP& >::type gn(gnSEXP); rcpp_result_gen = Rcpp::wrap(fbstatsmCpp(x, ext, ng, g, npg, pg, w, stable_algo, array, gn)); return rcpp_result_gen; END_RCPP } // fbstatslCpp SEXP fbstatslCpp(const List& x, bool ext, int ng, const IntegerVector& g, int npg, const IntegerVector& pg, const SEXP& w, bool stable_algo, bool array, const SEXP& gn); RcppExport SEXP _collapse_fbstatslCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ext(extSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< int >::type npg(npgSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type pg(pgSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type array(arraySEXP); Rcpp::traits::input_parameter< const SEXP& >::type gn(gnSEXP); rcpp_result_gen = Rcpp::wrap(fbstatslCpp(x, ext, ng, g, npg, pg, w, stable_algo, array, gn)); return rcpp_result_gen; END_RCPP } // fdiffgrowthCpp NumericVector fdiffgrowthCpp(const NumericVector& x, const IntegerVector& n, const IntegerVector& diff, double fill, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& t, int ret, double rho, bool names, double power); RcppExport SEXP _collapse_fdiffgrowthCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type diff(diffSEXP); Rcpp::traits::input_parameter< double >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< double >::type power(powerSEXP); rcpp_result_gen = Rcpp::wrap(fdiffgrowthCpp(x, n, diff, fill, ng, g, gs, t, ret, rho, names, power)); return rcpp_result_gen; END_RCPP } // fdiffgrowthmCpp NumericMatrix fdiffgrowthmCpp(const NumericMatrix& x, const IntegerVector& n, const IntegerVector& diff, double fill, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& t, int ret, double rho, bool names, double power); RcppExport SEXP _collapse_fdiffgrowthmCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type diff(diffSEXP); Rcpp::traits::input_parameter< double >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< double >::type power(powerSEXP); rcpp_result_gen = Rcpp::wrap(fdiffgrowthmCpp(x, n, diff, fill, ng, g, gs, t, ret, rho, names, power)); return rcpp_result_gen; END_RCPP } // fdiffgrowthlCpp List fdiffgrowthlCpp(const List& x, const IntegerVector& n, const IntegerVector& diff, double fill, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& t, int ret, double rho, bool names, double power); RcppExport SEXP _collapse_fdiffgrowthlCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type diff(diffSEXP); Rcpp::traits::input_parameter< double >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< double >::type power(powerSEXP); rcpp_result_gen = Rcpp::wrap(fdiffgrowthlCpp(x, n, diff, fill, ng, g, gs, t, ret, rho, names, power)); return rcpp_result_gen; END_RCPP } // flagleadCpp SEXP flagleadCpp(SEXP x, IntegerVector n, SEXP fill, int ng, IntegerVector g, SEXP t, bool names); RcppExport SEXP _collapse_flagleadCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< IntegerVector >::type n(nSEXP); Rcpp::traits::input_parameter< SEXP >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< IntegerVector >::type g(gSEXP); Rcpp::traits::input_parameter< SEXP >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); rcpp_result_gen = Rcpp::wrap(flagleadCpp(x, n, fill, ng, g, t, names)); return rcpp_result_gen; END_RCPP } // flagleadmCpp SEXP flagleadmCpp(SEXP x, IntegerVector n, SEXP fill, int ng, IntegerVector g, SEXP t, bool names); RcppExport SEXP _collapse_flagleadmCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< IntegerVector >::type n(nSEXP); Rcpp::traits::input_parameter< SEXP >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< IntegerVector >::type g(gSEXP); Rcpp::traits::input_parameter< SEXP >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); rcpp_result_gen = Rcpp::wrap(flagleadmCpp(x, n, fill, ng, g, t, names)); return rcpp_result_gen; END_RCPP } // flagleadlCpp List flagleadlCpp(const List& x, const IntegerVector& n, const SEXP& fill, int ng, const IntegerVector& g, const SEXP& t, bool names); RcppExport SEXP _collapse_flagleadlCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type n(nSEXP); Rcpp::traits::input_parameter< const SEXP& >::type fill(fillSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); rcpp_result_gen = Rcpp::wrap(flagleadlCpp(x, n, fill, ng, g, t, names)); return rcpp_result_gen; END_RCPP } // fscaleCpp NumericVector fscaleCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, double set_mean, double set_sd); RcppExport SEXP _collapse_fscaleCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< double >::type set_sd(set_sdSEXP); rcpp_result_gen = Rcpp::wrap(fscaleCpp(x, ng, g, w, narm, set_mean, set_sd)); return rcpp_result_gen; END_RCPP } // fscalemCpp NumericMatrix fscalemCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, double set_mean, double set_sd); RcppExport SEXP _collapse_fscalemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< double >::type set_sd(set_sdSEXP); rcpp_result_gen = Rcpp::wrap(fscalemCpp(x, ng, g, w, narm, set_mean, set_sd)); return rcpp_result_gen; END_RCPP } // fscalelCpp List fscalelCpp(const List& x, int ng, const IntegerVector& g, const SEXP& w, bool narm, double set_mean, double set_sd); RcppExport SEXP _collapse_fscalelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< double >::type set_mean(set_meanSEXP); Rcpp::traits::input_parameter< double >::type set_sd(set_sdSEXP); rcpp_result_gen = Rcpp::wrap(fscalelCpp(x, ng, g, w, narm, set_mean, set_sd)); return rcpp_result_gen; END_RCPP } // fvarsdCpp NumericVector fvarsdCpp(const NumericVector& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool stable_algo, bool sd); RcppExport SEXP _collapse_fvarsdCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericVector& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type sd(sdSEXP); rcpp_result_gen = Rcpp::wrap(fvarsdCpp(x, ng, g, gs, w, narm, stable_algo, sd)); return rcpp_result_gen; END_RCPP } // fvarsdmCpp SEXP fvarsdmCpp(const NumericMatrix& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool stable_algo, bool sd, bool drop); RcppExport SEXP _collapse_fvarsdmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const NumericMatrix& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type sd(sdSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fvarsdmCpp(x, ng, g, gs, w, narm, stable_algo, sd, drop)); return rcpp_result_gen; END_RCPP } // fvarsdlCpp SEXP fvarsdlCpp(const List& x, int ng, const IntegerVector& g, const SEXP& gs, const SEXP& w, bool narm, bool stable_algo, bool sd, bool drop); RcppExport SEXP _collapse_fvarsdlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type gs(gsSEXP); Rcpp::traits::input_parameter< const SEXP& >::type w(wSEXP); Rcpp::traits::input_parameter< bool >::type narm(narmSEXP); Rcpp::traits::input_parameter< bool >::type stable_algo(stable_algoSEXP); Rcpp::traits::input_parameter< bool >::type sd(sdSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(fvarsdlCpp(x, ng, g, gs, w, narm, stable_algo, sd, drop)); return rcpp_result_gen; END_RCPP } // mrtl SEXP mrtl(const SEXP& X, bool names, int ret); RcppExport SEXP _collapse_mrtl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type X(XSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(mrtl(X, names, ret)); return rcpp_result_gen; END_RCPP } // mctl SEXP mctl(const SEXP& X, bool names, int ret); RcppExport SEXP _collapse_mctl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type X(XSEXP); Rcpp::traits::input_parameter< bool >::type names(namesSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(mctl(X, names, ret)); return rcpp_result_gen; END_RCPP } // psmatCpp SEXP psmatCpp(const SEXP& x, const IntegerVector& g, const SEXP& t, bool transpose, const SEXP& fill); RcppExport SEXP _collapse_psmatCpp(SEXP xSEXP, SEXP gSEXP, SEXP tSEXP, SEXP transposeSEXP, SEXP fillSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< const SEXP& >::type t(tSEXP); Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP); Rcpp::traits::input_parameter< const SEXP& >::type fill(fillSEXP); rcpp_result_gen = Rcpp::wrap(psmatCpp(x, g, t, transpose, fill)); return rcpp_result_gen; END_RCPP } // pwnobsmCpp IntegerMatrix pwnobsmCpp(SEXP x); RcppExport SEXP _collapse_pwnobsmCpp(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(pwnobsmCpp(x)); return rcpp_result_gen; END_RCPP } // qFCpp SEXP qFCpp(SEXP x, bool ordered, bool na_exclude, bool keep_attr, int ret); RcppExport SEXP _collapse_qFCpp(SEXP xSEXP, SEXP orderedSEXP, SEXP na_excludeSEXP, SEXP keep_attrSEXP, SEXP retSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type ordered(orderedSEXP); Rcpp::traits::input_parameter< bool >::type na_exclude(na_excludeSEXP); Rcpp::traits::input_parameter< bool >::type keep_attr(keep_attrSEXP); Rcpp::traits::input_parameter< int >::type ret(retSEXP); rcpp_result_gen = Rcpp::wrap(qFCpp(x, ordered, na_exclude, keep_attr, ret)); return rcpp_result_gen; END_RCPP } // sortuniqueCpp SEXP sortuniqueCpp(SEXP x); RcppExport SEXP _collapse_sortuniqueCpp(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(sortuniqueCpp(x)); return rcpp_result_gen; END_RCPP } // fdroplevelsCpp IntegerVector fdroplevelsCpp(const IntegerVector& x, bool check_NA); RcppExport SEXP _collapse_fdroplevelsCpp(SEXP xSEXP, SEXP check_NASEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerVector& >::type x(xSEXP); Rcpp::traits::input_parameter< bool >::type check_NA(check_NASEXP); rcpp_result_gen = Rcpp::wrap(fdroplevelsCpp(x, check_NA)); return rcpp_result_gen; END_RCPP } // seqid IntegerVector seqid(const IntegerVector& x, const SEXP& o, int del, int start, bool na_skip, bool skip_seq, bool check_o); RcppExport SEXP _collapse_seqid(SEXP xSEXP, SEXP oSEXP, SEXP delSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP skip_seqSEXP, SEXP check_oSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerVector& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type o(oSEXP); Rcpp::traits::input_parameter< int >::type del(delSEXP); Rcpp::traits::input_parameter< int >::type start(startSEXP); Rcpp::traits::input_parameter< bool >::type na_skip(na_skipSEXP); Rcpp::traits::input_parameter< bool >::type skip_seq(skip_seqSEXP); Rcpp::traits::input_parameter< bool >::type check_o(check_oSEXP); rcpp_result_gen = Rcpp::wrap(seqid(x, o, del, start, na_skip, skip_seq, check_o)); return rcpp_result_gen; END_RCPP } // groupid IntegerVector groupid(const SEXP& x, const SEXP& o, int start, bool na_skip, bool check_o); RcppExport SEXP _collapse_groupid(SEXP xSEXP, SEXP oSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP check_oSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< const SEXP& >::type o(oSEXP); Rcpp::traits::input_parameter< int >::type start(startSEXP); Rcpp::traits::input_parameter< bool >::type na_skip(na_skipSEXP); Rcpp::traits::input_parameter< bool >::type check_o(check_oSEXP); rcpp_result_gen = Rcpp::wrap(groupid(x, o, start, na_skip, check_o)); return rcpp_result_gen; END_RCPP } // varyingCpp LogicalVector varyingCpp(const SEXP& x, int ng, const IntegerVector& g, bool any_group); RcppExport SEXP _collapse_varyingCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< bool >::type any_group(any_groupSEXP); rcpp_result_gen = Rcpp::wrap(varyingCpp(x, ng, g, any_group)); return rcpp_result_gen; END_RCPP } // varyingmCpp SEXP varyingmCpp(const SEXP& x, int ng, const IntegerVector& g, bool any_group, bool drop); RcppExport SEXP _collapse_varyingmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const SEXP& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< bool >::type any_group(any_groupSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(varyingmCpp(x, ng, g, any_group, drop)); return rcpp_result_gen; END_RCPP } // varyinglCpp SEXP varyinglCpp(const List& x, int ng, const IntegerVector& g, bool any_group, bool drop); RcppExport SEXP _collapse_varyinglCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const List& >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type ng(ngSEXP); Rcpp::traits::input_parameter< const IntegerVector& >::type g(gSEXP); Rcpp::traits::input_parameter< bool >::type any_group(any_groupSEXP); Rcpp::traits::input_parameter< bool >::type drop(dropSEXP); rcpp_result_gen = Rcpp::wrap(varyinglCpp(x, ng, g, any_group, drop)); return rcpp_result_gen; END_RCPP } collapse/src/psmat.cpp0000644000176200001440000001240215113725301014470 0ustar liggesusers#include using namespace Rcpp; template Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose, SEXP fill) { int l = x.size(), gss = g.size(); if(gss != l) stop("length(g) must match length(x)"); CharacterVector glevs = Rf_getAttrib(g, R_LevelsSymbol); int *pg = INTEGER(g); int ng = glevs.size(), gs = l/ng, ngp = ng+1; if(Rf_isNull(t)) { if(l%ng != 0) stop("length(x) must be a multiple of length(levels(g))"); std::vector seen(ngp); Matrix out = transpose ? no_init_matrix(gs, ng) : no_init_matrix(ng, gs); if(transpose) { for(int i = 0; i != l; ++i) { if(seen[pg[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); out(seen[pg[i]]++, pg[i]-1) = x[i]; // out[(g[i]-1)*gs + seen[g[i]]++] = x[i]; not really faster... } } else { for(int i = 0; i != l; ++i) { if(seen[pg[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); out(pg[i]-1, seen[pg[i]]++) = x[i]; // out[(seen[g[i]]++)*ng + g[i]-1] = x[i]; not really faster... } } Rf_dimnamesgets(out, transpose ? List::create(seq_len(gs), glevs) : List::create(glevs, seq_len(gs))); Rf_setAttrib(out, Rf_install("transpose"), Rf_ScalarLogical(transpose)); Rf_classgets(out, CharacterVector::create("psmat", "matrix")); return out; } else { int *pt = INTEGER(t); if(l != Rf_length(t)) stop("length(t) must match length(x)"); // int maxt = max(t); // needed ? // check whether t.levels is same size as maxt ? CharacterVector tlevs = Rf_getAttrib(t, R_LevelsSymbol); int nt = tlevs.size(); Matrix out = transpose ? no_init_matrix(nt, ng) : no_init_matrix(ng, nt); // best way to do this ? Stable ? -> Could conditionally create vector and the coerce to matrix -> faster init ? if(nt != gs) { typename traits::storage_type::type coerced_fill = Rf_isNull(fill) ? Vector::get_na() : as::type>(fill); std::fill(out.begin(), out.end(), coerced_fill); } if(transpose) { for(int i = 0; i != l; ++i) out[(pg[i]-1)*nt + pt[i]-1] = x[i]; // out(tt[i]-1, g[i]-1) = x[i]; // tiny bit faster } else { for(int i = 0; i != l; ++i) out[(pt[i]-1)*ng + pg[i]-1] = x[i]; // out(g[i]-1, tt[i]-1) = x[i]; // tiny bit faster } Rf_dimnamesgets(out, transpose ? List::create(tlevs, glevs) : List::create(glevs, tlevs)); Rf_setAttrib(out, Rf_install("transpose"), Rf_ScalarLogical(transpose)); Rf_classgets(out, CharacterVector::create("psmat", "matrix")); return out; } } template <> Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose, SEXP fill) { stop("Not supported SEXP type!"); } template <> Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose, SEXP fill) { stop("Not supported SEXP type!"); } template <> Matrix psmatCppImpl(Vector x, IntegerVector g, SEXP t, bool transpose, SEXP fill) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP psmatCpp(const SEXP& x, const IntegerVector& g, const SEXP& t = R_NilValue, bool transpose = false, const SEXP& fill = R_NilValue) { RCPP_RETURN_VECTOR(psmatCppImpl, x, g, t, transpose, fill); } // Only Numeric Version: // // [[Rcpp::export]] // SEXP psmatCpp(NumericVector x, IntegerVector g, SEXP t = R_NilValue, bool transpose = false) { // int l = x.size(), gss = g.size(); // if(gss != l) stop("length(g) must match length(x)"); // CharacterVector glevs = g.attr("levels"); // int ng = glevs.size(), gs = l/ng, ngp = ng+1; // if(Rf_isNull(t)) { // if(l%ng != 0) stop("length(x) must be a multiple of length(levels(g))"); // IntegerVector seen(ngp); // NumericMatrix out = transpose ? no_init_matrix(gs, ng) : no_init_matrix(ng, gs); // if(transpose) { // for(int i = 0; i != l; ++i) { // if(seen[g[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); // out(seen[g[i]]++, g[i]-1) = x[i]; // } // } else { // for(int i = 0; i != l; ++i) { // if(seen[g[i]] == gs) stop("Panel not Balanced: Need to supply timevar"); // out(g[i]-1, seen[g[i]]++) = x[i]; // } // } // out.attr("dimnames") = transpose ? List::create(seq_len(gs), glevs) : List::create(glevs, seq_len(gs)); // return out; // } else { // IntegerVector tt = t; // if(l != tt.size()) stop("length(t) must match length(x)"); // // int maxt = max(tt); // needed ?? // check whether t.levels is same size as maxt ?? // CharacterVector tlevs = tt.attr("levels"); // int nt = tlevs.size(); // NumericMatrix out = transpose ? no_init_matrix(nt, ng) : no_init_matrix(ng, nt); // best way to do this ?? Stable ?? -> Could conditionally create vector and the coerce to matrix -> faster init ?? // if(nt != gs) std::fill(out.begin(), out.end(), NA_REAL); // memset(out, NA_REAL, sizeof(double)*ng*maxt); -> unstable !! // else balanced panel !! // if(transpose) { // for(int i = 0; i != l; ++i) out(tt[i]-1, g[i]-1) = x[i]; // } else { // for(int i = 0; i != l; ++i) out(g[i]-1, tt[i]-1) = x[i]; // } // out.attr("dimnames") = transpose ? List::create(tlevs, glevs) : List::create(glevs, tlevs); // return out; // } // } collapse/src/Makevars.win0000644000176200001440000000036714777170131015151 0ustar liggesusers## -- compiling for OpenMP PKG_CFLAGS = $($(subst OPENMP,OPENMP_CFLAGS,SHLIB_OPENMP)) -O3 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS ## -- using C++ 11 # CXX_STD = CXX11 ## -- linking for OpenMP PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) collapse/src/data.table_utils.c0000644000176200001440000003206415122271061016230 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "data.table.h" int need2utf8(SEXP x) { const int xlen = length(x); const SEXP *xd = STRING_PTR_RO(x); if (xlen <= 1) return xlen == 1 ? NEED2UTF8(xd[0]) : 0; for (int i = 0, t = xlen < 1000 ? xlen : 1000; i < t; ++i) if(NEED2UTF8(xd[i])) return 1; return NEED2UTF8(xd[xlen/4]) || NEED2UTF8(xd[xlen/2]) || NEED2UTF8(xd[(int)(xlen/1.3333)]) || NEED2UTF8(xd[xlen-1]); } SEXP coerceUtf8IfNeeded(SEXP x) { if (!need2utf8(x)) return(x); const int xlen = length(x); SEXP ans = PROTECT(allocVector(STRSXP, xlen)); const SEXP *xd = STRING_PTR_RO(x); for (int i=0; i 1.0) error("prop needs to be a proportion [0, 1]"); if(!isNewList(x)) error("Internal error. Argument 'x' to missing_cases is type '%s' not 'list'", type2char(TYPEOF(x))); // # nocov if(!isInteger(cols)) error("Internal error. Argument 'cols' to missing_cases is type '%s' not 'integer'", type2char(TYPEOF(cols))); // # nocov for (int i = 0; i < ncol; ++i) { elem = INTEGER(cols)[i]; if(elem < 1 || elem > LENGTH(x)) error("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]", i+1, elem, LENGTH(x)); if(!n) n = length(VECTOR_ELT(x, elem-1)); } SEXP ans = PROTECT(allocVector(LGLSXP, n)); int *ians = LOGICAL(ans); memset(ians, 0, sizeof(int) * n); // for (int i=0; i != n; ++i) ians[i]=0; if(count || prop > 0.0) { // More than 1 missing row, or counting mising values // if(prop == 1) { // Not sensible: better skip lists... // // Preliminary check for early return // for (int i = 0, tv; i < ncol; ++i) { // tv = TYPEOF(VECTOR_ELT(x, INTEGER(cols)[i]-1)); // if(tv != LGLSXP && tv != INTSXP && tv != REALSXP && tv != STRSXP && tv != CPLXSXP && tv != NILSXP) { // UNPROTECT(1); // return(ans); // } // } // } // Counting the missing values int len = ncol; for (int i = 0; i < ncol; ++i) { SEXP v = VECTOR_ELT(x, INTEGER(cols)[i]-1); if (!length(v) || isNewList(v) || isList(v) || TYPEOF(v) == RAWSXP) { --len; continue; } if (n != length(v)) error("Column %d of input list x is length %d, inconsistent with first column of that item which is length %d.", i+1,length(v),n); switch (TYPEOF(v)) { case LGLSXP: { const int *iv = LOGICAL(v); for (int j=0; j != n; ++j) ians[j] += (iv[j] == NA_LOGICAL); } break; case INTSXP: { const int *iv = INTEGER(v); for (int j=0; j != n; ++j) ians[j] += (iv[j] == NA_INTEGER); } break; case STRSXP: { const SEXP *sv = SEXPPTR_RO(v); for (int j=0; j != n; ++j) ians[j] += (sv[j] == NA_STRING); } break; case REALSXP: { const double *dv = REAL(v); if (INHERITS(v, char_integer64)) { for (int j=0; j != n; ++j) ians[j] += (dv[j] == NA_INT64_D); } else { for (int j=0; j != n; ++j) ians[j] += ISNAN(dv[j]); } } break; case CPLXSXP: { const Rcomplex *dv = COMPLEX(v); for (int j=0; j != n; ++j) ians[j] += (ISNAN(dv[j].r) || ISNAN(dv[j].i)); } break; default: error("Unsupported column type '%s'", type2char(TYPEOF(v))); } } if(count) { SETTOF(ans, INTSXP); } else { // This computes the result if(prop < 1.0) { len = (int)((double)len * prop); if(len < 1) len = 1; } for (int j = 0; j != n; ++j) ians[j] = ians[j] >= len; } } else { // Any missing (default) for (int i = 0; i < ncol; ++i) { SEXP v = VECTOR_ELT(x, INTEGER(cols)[i]-1); if (!length(v) || isNewList(v) || isList(v)) continue; // like stats:::na.omit.data.frame, skip list/pairlist columns if (n != length(v)) error("Column %d of input list x is length %d, inconsistent with first column of that item which is length %d.", i+1,length(v),n); switch (TYPEOF(v)) { case LGLSXP: { const int *iv = LOGICAL(v); for (int j=0; j != n; ++j) ians[j] |= (iv[j] == NA_LOGICAL); } break; case INTSXP: { const int *iv = INTEGER(v); for (int j=0; j != n; ++j) ians[j] |= (iv[j] == NA_INTEGER); } break; case STRSXP: { const SEXP *sv = SEXPPTR_RO(v); for (int j=0; j != n; ++j) ians[j] |= (sv[j] == NA_STRING); } break; case REALSXP: { const double *dv = REAL(v); if (INHERITS(v, char_integer64)) { for (int j=0; j != n; ++j) ians[j] |= (dv[j] == NA_INT64_D); } else { for (int j=0; j != n; ++j) ians[j] |= ISNAN(dv[j]); } } break; case RAWSXP: { // no such thing as a raw NA // vector already initialised to all 0's } break; case CPLXSXP: { // taken from https://github.com/wch/r-source/blob/d75f39d532819ccc8251f93b8ab10d5b83aac89a/src/main/coerce.c const Rcomplex *dv = COMPLEX(v); for (int j=0; j != n; ++j) ians[j] |= (ISNAN(dv[j].r) || ISNAN(dv[j].i)); } break; default: error("Unsupported column type '%s'", type2char(TYPEOF(v))); } } } UNPROTECT(1); return(ans); } // from data.table_frank.c -> simplified frank, only dense method !! SEXP frankds(SEXP xorderArg, SEXP xstartArg, SEXP xlenArg, SEXP dns) { int i=0, j=0, k=0, end=0, n, ng; int *xstart = INTEGER(xstartArg), *xlen = INTEGER(xlenArg), *xorder = INTEGER(xorderArg); n = length(xorderArg); ng = length(xstartArg); if(n > 0 && n == ng && asInteger(dns) == 1) return xorderArg; SEXP ans = PROTECT(allocVector(INTSXP, n)); int *ians = INTEGER(ans); if(n > 0) { switch(asInteger(dns)) { case 0: // Not Sorted k=1; if(n == ng) { for (i = 0; i != n; i++) ians[xorder[i]-1] = i+1; } else { for (i = 0; i != ng; i++) { for (j = xstart[i]-1, end = xstart[i]+xlen[i]-1; j < end; j++) ians[xorder[j]-1] = k; k++; } } break; case 1: // Sorted k=1; for (i = 0; i != ng; i++) { for (j = xstart[i]-1, end = xstart[i]+xlen[i]-1; j < end; j++) ians[j] = k; k++; } break; case 2: // This is basically run-length type group-id: currently not used in collapse! for (i = 0; i != ng; i++) { k=1; for (j = xstart[i]-1, end = xstart[i]+xlen[i]-1; j < end; j++) ians[xorder[j]-1] = k++; } break; default: error("dns must be 0, 1 or 2"); } } UNPROTECT(1); return ans; } // from data.table_assign.c: SEXP setcolorder(SEXP x, SEXP o) { SEXP names = getAttrib(x, R_NamesSymbol); const int *od = INTEGER(o), ncol=LENGTH(x); if (isNull(names)) error("list passed to setcolorder has no names"); if (ncol != LENGTH(names)) error("Internal error: dt passed to setcolorder has %d columns but %d names", ncol, LENGTH(names)); // # nocov // Double-check here at C level that o[] is a strict permutation of 1:ncol. Reordering columns by reference makes no // difference to generations/refcnt so we can write behind barrier in this very special case of strict permutation. bool *seen = R_Calloc(ncol, bool); for (int i=0; i != ncol; ++i) { if (od[i]==NA_INTEGER || od[i]<1 || od[i]>ncol) error("Internal error: o passed to Csetcolorder contains an NA or out-of-bounds"); // # nocov if (seen[od[i]-1]) error("Internal error: o passed to Csetcolorder contains a duplicate"); // # nocov seen[od[i]-1] = true; } R_Free(seen); SEXP *tmp = R_Calloc(ncol, SEXP), *namesd = SEXPPTR(names); const SEXP *xd = SEXPPTR_RO(x); for (int i=0; i != ncol; ++i) tmp[i] = xd[od[i]-1]; for (int i=0; i != ncol; ++i) SET_VECTOR_ELT(x, i, tmp[i]); // SEXP *xd = SEXPPTR(x); // for (int i=0; i != ncol; ++i) tmp[i] = xd[od[i]-1]; // memcpy(xd, tmp, ncol*sizeof(SEXP)); // sizeof is type size_t so no overflow here for (int i=0; i != ncol; ++i) tmp[i] = namesd[od[i]-1]; memcpy(namesd, tmp, ncol*sizeof(SEXP)); // No need to change key (if any); sorted attribute is column names not positions R_Free(tmp); return(R_NilValue); } collapse/src/data.table_subset.c0000644000176200001440000007342015122271061016376 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "collapse_c.h" #include "data.table.h" // selfref stuff is taken from data.tables assign.c void setselfref(SEXP x) { SEXP p; // Store pointer to itself so we can detect if the object has been copied. See // ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables. // Called from C only, not R level, so returns void. // setAttrib(x, SelfRefSymbol, R_NilValue); // Probably not needed but I like it here. setAttrib(x, SelfRefSymbol, p=R_MakeExternalPtr( R_NilValue, // for identical() to return TRUE. identical() doesn't look at tag and prot PROTECT(getAttrib(x, R_NamesSymbol)), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...) PROTECT(R_MakeExternalPtr( // to avoid an infinite loop in object.size(), if prot=x here x, // to know if this data.table has been copied by key<-, attr<-, names<-, etc. R_NilValue, // this tag and prot currently unused R_NilValue )) )); UNPROTECT(2); /* * base::identical doesn't check prot and tag of EXTPTR, just that the ptr itself is the same in both objects. R_NilValue is always equal to R_NilValue. R_NilValue is a memory location constant within an R session, but can vary from session to session. So, it looks like a pointer to a user looking at attributes(DT), but they might wonder how it works if they realise the selfref of all data.tables all point to the same address (rather than to the table itself which would be reasonable to expect given the attribute's name). * p=NULL rather than R_NilValue works too, other than we need something other than NULL so we can detect tables loaded from disk (which set p to NULL, see 5.13 of R-exts). * x is wrapped in another EXTPTR because of object.size (called by tables(), and by users). If the prot (or tag) was x directly it sends object.size into an infinite loop and then "segfault from C stack overflow" (object.size does count tag and prot, unlike identical, but doesn't count what's pointed to). * Could use weak reference possibly, but the fact that they can get be set to R_NilValue by gc (?) didn't seem appropriate. * If the .internal.selfref attribute is removed (e.g. by user code), nothing will break, but an extra copy will just be taken on next :=, with warning, with a new selfref. * object.size will count size of names twice, but that's ok as only small. * Thanks to Steve L for suggesting ExtPtr for this, rather than the previous REALSXP vector which required data.table to do a show/hide dance in a masked identical. */ } // also need this stuff from assign.c static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) { // NEW: cols argument to specify the columns to shallow copy on. If NULL, all columns. // called from alloccol where n is checked carefully, or from shallow() at R level // where n is set to truelength (i.e. a shallow copy only with no size change) int protecti=0; SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // SHALLOW_DUPLICATE_ATTRIB would be a bit neater but is only available from R 3.3.0 // if(IS_S4_OBJECT(dt)) { // newdt = PROTECT(asS4(newdt, TRUE, 1)); protecti++; // To support S4 objects that include data.table // } // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It // also increases truelength. Perhaps make that distinction, then, and split out, but marked // so that the next change knows to duplicate. // keepattr() also merely points to the entire attrbutes list and thus doesn't allow replacing // some of its elements. // We copy all attributes that refer to column names so that calling setnames on either // the original or the shallow copy doesn't break anything. SEXP index = PROTECT(getAttrib(dt, sym_index)); protecti++; if(length(index)) setAttrib(newdt, sym_index, shallow_duplicate(index)); SEXP sorted = PROTECT(getAttrib(dt, sym_sorted)); protecti++; if(length(sorted)) setAttrib(newdt, sym_sorted, duplicate(sorted)); SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++; const int l = isNull(cols) ? LENGTH(dt) : length(cols); if (isNull(cols)) { // for (int i=0; i != l; ++i) pnewdt[i] = pdt[i]; memcpy(SEXPPTR(newdt), SEXPPTR_RO(dt), l*sizeof(SEXP)); if (length(names)) { if (length(names) < l) error("Internal error: length(names)>0 but 0 && tl < l) error("Internal error, please report (including result of sessionInfo()) to collapse issue tracker: tl (%d) < l (%d) but tl of class is marked.", tl, l); // # nocov // if (tl > l+10000) warning("tl (%d) is greater than 10,000 items over-allocated (l = %d). If you didn't set the collapse_DT_alloccol option to be very large, please report to collapse issue tracker including the result of sessionInfo().",tl,l); // TODO: MAKE THIS WORK WITHOUT SHALLOW COPYING EVERY TIME !!! // if (n > tl) return shallow(dt, R_NilValue, n); // usual case (increasing alloc) // SEXP nam = PROTECT(getAttrib(dt, R_NamesSymbol)); // if(LENGTH(nam) != l) SET_LEN(nam, l); // SET_TRULEN(nam, n); // setselfref(dt); // better, otherwise may be invalid !! // UNPROTECT(1); // return(dt); } // #pragma GCC diagnostic ignored "-Wunknown-pragmas" // don't display this warning!! // https://stackoverflow.com/questions/1867065/how-to-suppress-gcc-warnings-from-library-headers?noredirect=1&lq=1 void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA) // Only for use by subsetDT() or subsetVector() below, hence static -> nope, also used in match.c now { const int n = length(idx); if (length(ans)!=n) error("Internal error: subsetVectorRaw length(ans)==%d n=%d", length(ans), n); const int *restrict idxp = INTEGER(idx); // anyNA refers to NA _in idx_; if there's NA in the data (source) that's just regular data to be copied // negatives, zeros and out-of-bounds have already been dealt with in convertNegAndZero so we can rely // here on idx in range [1,length(ans)]. // _Pragma("omp parallel for num_threads(getDTthreads())") (in PARLOOP below) // _Pragma("omp parallel for num_threads(getDTthreads())") #define PARLOOP(_NAVAL_) \ if (anyNA) { \ _Pragma("omp simd") \ for (int i = 0; i < n; ++i) { \ int elem = idxp[i]; \ ap[i] = elem==NA_INTEGER ? _NAVAL_ : sp[elem]; \ } \ } else { \ _Pragma("omp simd") \ for (int i = 0; i < n; ++i) { \ ap[i] = sp[idxp[i]]; \ } \ } // For small n such as 2,3,4 etc we hope OpenMP will be sensible inside it and not create a team with each thread doing just one item. Otherwise, // call overhead would be too high for highly iterated calls on very small subests. Timings were tested in #3175 // Further, we desire (currently at least) to stress-test the threaded code (especially in latest R-devel) on small data to reduce chance that bugs // arise only over a threshold of n. switch(TYPEOF(source)) { case INTSXP: case LGLSXP: { int *restrict sp = INTEGER(source)-1, *restrict ap = INTEGER(ans); PARLOOP(NA_INTEGER); } break; case REALSXP : { if (INHERITS(source, char_integer64)) { int64_t *restrict sp = (int64_t *)REAL(source)-1, *restrict ap = (int64_t *)REAL(ans); PARLOOP(INT64_MIN); } else { double *restrict sp = REAL(source)-1, *restrict ap = REAL(ans); PARLOOP(NA_REAL); } } break; case STRSXP : { // write barrier (assigning strings/lists) is not thread safe. Hence single threaded. // To go parallel here would need access to NODE_IS_OLDER, at least. Given gcgen, mark and named // are upper bounded and max 3, REFCNT==REFCNTMAX could be checked first and then critical SET_ if not. // Inside that critical just before SET_ it could check REFCNTmax since they should have been dealt with by convertNegAndZeroIdx() called earlier at R level. // single cache efficient sweep with prefetch, so very low priority to go parallel { if (!isInteger(idx)) error("Internal error. 'idx' is type '%s' not 'integer'", type2char(TYPEOF(idx))); // # nocov bool anyNA = false, stop = false; // anyLess=false, // int last = INT32_MIN; int *idxp = INTEGER(idx), n = LENGTH(idx); #pragma omp simd reduction(|:stop,anyNA) for (int i = 0; i < n; ++i) { int elem = idxp[i]; stop |= (elem<1 && elem!=NA_INTEGER) || elem>max; anyNA |= elem == NA_INTEGER; } if(stop) return "Internal inefficiency: idx contains an item out-of-range. Should have been dealt with earlier."; // previous solution: slower // for (int i = 0; i != n; ++i) { // int elem = idxp[i]; // if (elem<=0 && elem!=NA_INTEGER) return "Internal inefficiency: idx contains negatives or zeros. Should have been dealt with earlier."; // e.g. test 762 (TODO-fix) // if (elem>max) return "Internal inefficiency: idx contains an item out-of-range. Should have been dealt with earlier."; // e.g. test 1639.64 // anyNA |= elem==NA_INTEGER; // // anyLess |= elem= 0.", max); // # nocov includes NA which will print as INT_MIN int *idxp = INTEGER(idx); bool stop = false; // #pragma omp parallel for num_threads(getDTthreads()) #pragma omp simd reduction(|:stop) for (int i = 0; i < n; ++i) { int elem = idxp[i]; stop |= (elem<1 && elem!=NA_INTEGER) || elem>max; } if (!stop) return(idx); // most common case to return early: no 0, no negative; all idx either NA or in range [1-max] // --------- // else massage the input to a standard idx where all items are either NA or in range [1,max] ... int countNeg=0, countZero=0, countNA=0, firstOverMax=0; for (int i = 0; i != n; ++i) { int elem = idxp[i]; if (elem==NA_INTEGER) countNA++; else if (elem<0) countNeg++; else if (elem==0) countZero++; else if (elem>max && firstOverMax==0) firstOverMax=i+1; } if (firstOverMax && LOGICAL(allowOverMax)[0]==FALSE) { error("i[%d] is %d which is out of range [1,nrow=%d]", firstOverMax, idxp[firstOverMax-1], max); } int countPos = n-countNeg-countZero-countNA; if (countPos && countNeg) { int i = 0, firstNeg=0, firstPos=0; while (i != n && (firstNeg==0 || firstPos==0)) { int elem = idxp[i]; if (firstPos==0 && elem>0) firstPos=i+1; if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1; i++; } error("Item %d of i is %d and item %d is %d. Cannot mix positives and negatives.", firstNeg, idxp[firstNeg-1], firstPos, idxp[firstPos-1]); } if (countNeg && countNA) { int i = 0, firstNeg=0, firstNA=0; while (i != n && (firstNeg==0 || firstNA==0)) { int elem = idxp[i]; if (firstNeg==0 && elem<0 && elem!=NA_INTEGER) firstNeg=i+1; if (firstNA==0 && elem==NA_INTEGER) firstNA=i+1; i++; } error("Item %d of i is %d and item %d is NA. Cannot mix negatives and NA.", firstNeg, idxp[firstNeg-1], firstNA); } SEXP ans; if (countNeg==0) { // just zeros to remove, or >max to convert to NA ans = PROTECT(allocVector(INTSXP, n - countZero)); int *ansp = INTEGER(ans); for (int i = 0, ansi = 0; i != n; ++i) { int elem = idxp[i]; if (elem==0) continue; ansp[ansi++] = elem>max ? NA_INTEGER : elem; } } else { // idx is all negative without any NA but perhaps some zeros bool *keep = (bool *)R_alloc(max, sizeof(bool)); // 4 times less memory that INTSXP in src/main/subscript.c for (int i = 0; i != max; ++i) keep[i] = true; int countRemoved=0, countDup=0, countBeyond=0; // idx=c(-10,-5,-10) removing row 10 twice int firstBeyond=0, firstDup=0; for (int i = 0; i != n; ++i) { int elem = -idxp[i]; if (elem==0) continue; if (elem>max) { countBeyond++; if (firstBeyond==0) firstBeyond=i+1; continue; } if (!keep[elem-1]) { countDup++; if (firstDup==0) firstDup=i+1; } else { keep[elem-1] = false; countRemoved++; } } if (countBeyond) warning("Item %d of i is %d but there are only %d rows. Ignoring this and %d more like it out of %d.", firstBeyond, idxp[firstBeyond-1], max, countBeyond-1, n); if (countDup) warning("Item %d of i is %d which removes that item but that has occurred before. Ignoring this dup and %d other dups.", firstDup, idxp[firstDup-1], countDup-1); int ansn = max-countRemoved; ans = PROTECT(allocVector(INTSXP, ansn)); int *ansp = INTEGER(ans); for (int i = 0, ansi = 0; i != max; ++i) { if (keep[i]) ansp[ansi++] = i+1; } } UNPROTECT(1); return ans; } static void checkCol(SEXP col, int colNum, int nrow, SEXP x) { if (isNull(col)) error("Column %d is NULL; malformed data.table.", colNum); if (isNewList(col) && INHERITS(col, char_dataframe)) { SEXP names = getAttrib(x, R_NamesSymbol); error("Column %d ['%s'] is a data.frame or data.table; malformed data.table.", colNum, isNull(names)?"":CHAR(STRING_ELT(names,colNum-1))); } if (length(col)!=nrow) { SEXP names = getAttrib(x, R_NamesSymbol); error("Column %d ['%s'] is length %d but column 1 is length %d; malformed data.table.", colNum, isNull(names)?"":CHAR(STRING_ELT(names,colNum-1)), length(col), nrow); } } /* helper */ SEXP extendIntVec(SEXP x, int len, int val) { SEXP out = PROTECT(allocVector(INTSXP, len + 1)); int *pout = INTEGER(out), *px = INTEGER(x); for(int i = len; i--; ) pout[i] = px[i]; pout[len] = val; UNPROTECT(1); return out; } /* subset columns of a list efficiently */ SEXP subsetCols(SEXP x, SEXP cols, SEXP checksf) { // SEXP fretall if(TYPEOF(x) != VECSXP) error("x is not a list."); int l = LENGTH(x), nprotect = 3, oxl = isObject(x); if(l == 0) return x; // ncol == 0 -> Nope, need emty selections such as cat_vars(mtcars) !! PROTECT_INDEX ipx; PROTECT_WITH_INDEX(cols = convertNegAndZeroIdx(cols, ScalarInteger(l), ScalarLogical(FALSE)), &ipx); int ncol = LENGTH(cols); int *pcols = INTEGER(cols); // if(ncol == 0 || (asLogical(fretall) && l == ncol)) return(x); // names SEXP nam = PROTECT(getAttrib(x, R_NamesSymbol)); // sf data frames: Need to add sf_column if(oxl && asLogical(checksf) && INHERITS(x, char_sf)) { int sfcoln = NA_INTEGER, sf_col_sel = 0; const SEXP *pnam = SEXPPTR_RO(nam), sfcol = asChar(getAttrib(x, sym_sf_column)); for(int i = l; i--; ) { if(pnam[i] == sfcol) { sfcoln = i + 1; break; } } if(sfcoln == NA_INTEGER) error("sf data frame has no attribute 'sf_column'"); for(int i = ncol; i--; ) { if(pcols[i] == sfcoln) { sf_col_sel = 1; break; } } if(sf_col_sel == 0) { REPROTECT(cols = extendIntVec(cols, ncol, sfcoln), ipx); ++ncol; pcols = INTEGER(cols); } } SEXP ans = PROTECT(allocVector(VECSXP, ncol)); const SEXP *px = SEXPPTR_RO(x); // SEXP *pans = SEXPPTR(ans); for(int i = 0; i != ncol; ++i) { // pans[i] = px[pcols[i]-1]; SET_VECTOR_ELT(ans, i, px[pcols[i]-1]); } if(!isNull(nam)) { SEXP tmp = PROTECT(allocVector(STRSXP, ncol)); setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, nam, cols, /*anyNA=*/false); ++nprotect; } copyMostAttrib(x, ans); // includes row.names and class... // clear any index that was copied over by copyMostAttrib(), e.g. #1760 and #1734 (test 1678) // setAttrib(ans, sym_index, R_NilValue); -> deletes "index" attribute of pdata.frame -> don't use!! if(oxl && INHERITS(x, char_datatable)) { setAttrib(ans, sym_datatable_locked, R_NilValue); // int n = asInteger(GetOption1(sym_collapse_DT_alloccol)); // UNPROTECT(nprotect); // This needs to be here !! (asInteger and GetOption1 are allocating functions) SEXP res = shallow(ans, R_NilValue, ncol + 100); // n // 1024 is data.table default.. UNPROTECT(nprotect); return res; // setselfref(ans); // done by shallow } UNPROTECT(nprotect); return ans; } /* * subsetDT - Subsets a data.table * NOTE: * 1) 'rows' and 'cols' are 1-based, passed from R level * 2) Originally for subsetting vectors in fcast and now the beginnings of [.data.table ported to C * 3) Immediate need is for R 3.1 as lglVec[1] now returns R's global TRUE and we don't want := to change that global [think 1 row data.tables] * 4) Could do it other ways but may as well go to C now as we were going to do that anyway */ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols, SEXP checkrows) { // , SEXP fastret int nprotect=0, oxl = isObject(x); if (!isNewList(x)) error("Internal error. Argument 'x' to CsubsetDT is type '%s' not 'list'", type2char(TYPEOF(rows))); // # nocov if (!length(x)) return x; // return empty list if (!isInteger(cols)) error("Internal error. Argument 'cols' to Csubset is type '%s' not 'integer'", type2char(TYPEOF(cols))); // # nocov int ncol = LENGTH(cols), l = LENGTH(x), *pcols = INTEGER(cols); for (int i = 0; i != ncol; ++i) { if (pcols[i] < 1 || pcols[i] > l) error("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]", i+1, pcols[i], l); } const int nrow = ncol ? length(VECTOR_ELT(x, pcols[0]-1)) : 0; // Allows checking just subsetted columns for right length // if fast return, return data.table if all rows selected through positive indices... // if(asLogical(fastret) && nrow == LENGTH(rows) && INTEGER(rows)[0] > 0) { // if(LENGTH(cols) == length(x)) return x; // return subsetCols(x, cols); // } // check index once up front for 0 or NA, for branchless subsetVectorRaw which is repeated for each column bool anyNA=false; // , orderedSubset=true; // true for when rows==null (meaning all rows) if (asLogical(checkrows) && !isNull(rows) && check_idx(rows, nrow, &anyNA)!=NULL) { // , &orderedSubset SEXP max = PROTECT(ScalarInteger(nrow)); nprotect++; rows = PROTECT(convertNegAndZeroIdx(rows, max, ScalarLogical(TRUE))); nprotect++; const char *err = check_idx(rows, nrow, &anyNA); // , &orderedSubset if (err!=NULL) error("%s", err); } // Adding sf geometry column if not already selected... if(oxl && INHERITS(x, char_sf)) { int sfcoln = NA_INTEGER, sf_col_sel = 0; SEXP nam = PROTECT(getAttrib(x, R_NamesSymbol)); const SEXP *pnam = SEXPPTR_RO(nam), sfcol = asChar(getAttrib(x, sym_sf_column)); for(int i = l; i--; ) { if(pnam[i] == sfcol) { sfcoln = i + 1; break; } } UNPROTECT(1); if(sfcoln == NA_INTEGER) error("sf data frame has no attribute 'sf_column'"); for(int i = ncol; i--; ) { if(pcols[i] == sfcoln) { sf_col_sel = 1; break; } } if(sf_col_sel == 0) { cols = PROTECT(extendIntVec(cols, LENGTH(cols), sfcoln)); ++ncol; ++nprotect; pcols = INTEGER(cols); } } // int overAlloc = 1024; // checkOverAlloc(GetOption(install("datatable.alloccol"), R_NilValue)); SEXP ans = PROTECT(allocVector(VECSXP, ncol)); nprotect++; // +overAlloc // doing alloc.col directly here; eventually alloc.col can be deprecated. // user-defined and superclass attributes get copied as from v1.12.0 copyMostAttrib(x, ans); // most means all except R_NamesSymbol, R_DimSymbol and R_DimNamesSymbol // includes row.names (oddly, given other dims aren't) and "sorted" dealt with below // class is also copied here which retains superclass name in class vector as has been the case for many years; e.g. tests 1228.* for #5296 // This is because overalloc.. creating columns by reference stuff.. // SET_TRULEN(ans, LENGTH(ans)); // SET_LEN(ans, LENGTH(cols)); int ansn; const SEXP *px = SEXPPTR_RO(x); // SEXP *pans = SEXPPTR(ans); if (isNull(rows)) { ansn = nrow; for (int i = 0; i != ncol; ++i) { SEXP thisCol = px[pcols[i]-1]; checkCol(thisCol, pcols[i], nrow, x); // pans[i] = thisCol; // copyAsPlain(thisCol) -> No deep copy SET_VECTOR_ELT(ans, i, thisCol); // materialize the column subset as we have always done for now, until REFCNT is on by default in R (TODO) } } else { ansn = LENGTH(rows); // has been checked not to contain zeros or negatives, so this length is the length of result for (int i = 0; i != ncol; ++i) { SEXP source = px[pcols[i]-1]; checkCol(source, pcols[i], nrow, x); SEXP target; SET_VECTOR_ELT(ans, i, target = allocVector(TYPEOF(source), ansn)); copyMostAttrib(source, target); subsetVectorRaw(target, source, rows, anyNA); // parallel within column } } SEXP colnam = getAttrib(x, R_NamesSymbol); if(TYPEOF(colnam) == STRSXP) { PROTECT(colnam); SEXP tmp = PROTECT(allocVector(STRSXP, ncol)); nprotect++; // SET_TRULEN(tmp, LENGTH(tmp)); // SET_LEN(tmp, LENGTH(cols)); setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, colnam, cols, /*anyNA=*/false); UNPROTECT(1); } if(oxl) { SEXP tmp = PROTECT(allocVector(INTSXP, 2)); nprotect++; INTEGER(tmp)[0] = NA_INTEGER; INTEGER(tmp)[1] = -ansn; setAttrib(ans, R_RowNamesSymbol, tmp); // The contents of tmp must be set before being passed to setAttrib(). setAttrib looks at tmp value and copies it in the case of R_RowNamesSymbol. Caused hard to track bug around 28 Sep 2014. // clear any index that was copied over by copyMostAttrib() above, e.g. #1760 and #1734 (test 1678) setAttrib(ans, sym_index, R_NilValue); // also ok for pdata.frame (can't use on subsetted or ordered data frame) setAttrib(ans, sym_index_df, R_NilValue); } if(oxl && INHERITS(x, char_datatable)) { setAttrib(ans, sym_sorted, R_NilValue); setAttrib(ans, sym_datatable_locked, R_NilValue); // int n = asInteger(GetOption1(sym_collapse_DT_alloccol)); SEXP res = shallow(ans, R_NilValue, ncol + 100); // n // 1024 is data.table default.. UNPROTECT(nprotect); // This needs to be here !! (asInteger and GetOption1 are allocating functions) return res; // setselfref(ans); // done by shallow } UNPROTECT(nprotect); return ans; } SEXP subsetVector(SEXP x, SEXP idx, SEXP checkidx) { // idx is 1-based passed from R level bool anyNA = false; //, orderedSubset=false; int nprotect=0; if (isNull(x)) error("Internal error: NULL can not be subset. It is invalid for a data.table to contain a NULL column."); // # nocov if (asLogical(checkidx) && check_idx(idx, length(x), &anyNA) != NULL) { // , &orderedSubset SEXP max = PROTECT(ScalarInteger(length(x))); nprotect++; idx = PROTECT(convertNegAndZeroIdx(idx, max, ScalarLogical(TRUE))); nprotect++; const char *err = check_idx(idx, length(x), &anyNA); // , &orderedSubset if (err != NULL) error("%s", err); } SEXP ans = PROTECT(allocVector(TYPEOF(x), length(idx))); nprotect++; copyMostAttrib(x, ans); subsetVectorRaw(ans, x, idx, anyNA); UNPROTECT(nprotect); return ans; } collapse/src/mrtl_mctl.cpp0000644000176200001440000002014015113725245015346 0ustar liggesusers#include using namespace Rcpp; template List mrtlImpl(Matrix X, bool names, int ret) { int l = X.nrow(); List out(l); for(int i = l; i--; ) out[i] = X(i, _); if(names) { SEXP dn = Rf_getAttrib(X, R_DimNamesSymbol); if(dn == R_NilValue) dn = List::create(R_NilValue, R_NilValue); // should also work for plain matrices ! if(Rf_isNull(VECTOR_ELT(dn, 0))) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); } else Rf_namesgets(out, VECTOR_ELT(dn, 0)); if(ret != 0) { if(Rf_isNull(VECTOR_ELT(dn, 1)) || ret == 2) { Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.ncol())); } else Rf_setAttrib(out, R_RowNamesSymbol, VECTOR_ELT(dn, 1)); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } } else if (ret != 0) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.ncol())); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } return out; } template <> List mrtlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } template <> List mrtlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP mrtl(const SEXP& X, bool names = false, int ret = 0){ RCPP_RETURN_MATRIX(mrtlImpl, X, names, ret); } template List mctlImpl(Matrix X, bool names, int ret) { int l = X.ncol(); List out(l); for(int i = l; i--; ) out[i] = X(_, i); if(names) { SEXP dn = Rf_getAttrib(X, R_DimNamesSymbol); if(dn == R_NilValue) dn = List::create(R_NilValue, R_NilValue); // should also work for plain matrices ! if(Rf_isNull(VECTOR_ELT(dn, 1))) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); } else Rf_namesgets(out, VECTOR_ELT(dn, 1)); if(ret != 0) { if(Rf_isNull(VECTOR_ELT(dn, 0)) || ret == 2) { Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.nrow())); } else Rf_setAttrib(out, R_RowNamesSymbol, VECTOR_ELT(dn, 0)); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } } else if (ret != 0) { CharacterVector rn(l); std::string VS = std::string("V"); // faster ! for (int i = l; i--; ) rn[i] = VS + std::to_string(i+1); Rf_namesgets(out, rn); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -X.nrow())); if(ret == 1) { Rf_classgets(out, Rf_mkString("data.frame")); } else { Rf_classgets(out, CharacterVector::create("data.table","data.frame")); } } return out; } template <> List mctlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } template <> List mctlImpl(Matrix X, bool names, int ret) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] SEXP mctl(const SEXP& X, bool names = false, int ret = 0){ RCPP_RETURN_MATRIX(mctlImpl, X, names, ret); } // Experimental Matrix apply functions -> Need to make faster, see Hmisc::mApply // template // Slower than lapply(mctl...) // List mrtlapplyImpl(Matrix X, Function FUN, bool names, int ret) { // int l = X.nrow(); // List out(l); // for(int i = l; i--; ) { // MatrixRow Xi = X(i,_); // out[i] = FUN(Xi); // } // if(names && X.hasAttribute("dimnames")) { // List dn(2); // dn = X.attr("dimnames"); // if (Rf_isNull(dn[0])) { // CharacterVector rn(l); // for (int i = l; i--; ) { // rn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = rn; // } else out.attr("names") = dn[0]; // if (ret != 0) { // if (Rf_isNull(dn[1])) { // out.attr("row.names") = NumericVector::create(NA_REAL,-X.ncol()); // } else out.attr("row.names") = dn[1]; // if(ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // } else if (ret != 0) { // CharacterVector rn(l); // for (int i = l; i--; ) { // rn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = rn; // out.attr("row.names") = NumericVector::create(NA_REAL,-X.ncol()); // if (ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // return out; // } // template // Matrix mrtmapplyImpl(Matrix X, Function FUN) { // int l = X.nrow(); // Vector out0 = FUN(X(0,_)); // What if not same type ?? // int col = out0.size(); // Matrix out = no_init_matrix(l, col); // for(int i = 1; i != l; ++i) { // out(i,_) = FUN(X(i,_)); // } // if(X.ncol() == col) SHALLOW_DUPLICATE_ATTRIB(out, X); // else rownames(out) = rownames(X); // return out; // } // template // Slower than lapply(mctl...) // List mctlapplyImpl(Matrix X, Function FUN, bool names, int ret) { // int l = X.ncol(); // List out(l); // for(int i = l; i--; ) { // MatrixColumn Xi = X(_,i); // out[i] = FUN(Xi); // } // if(names && X.hasAttribute("dimnames")) { // List dn(2); // dn = X.attr("dimnames"); // if (Rf_isNull(dn[1])) { // CharacterVector cn(l); // for (int i = l; i--; ) { // cn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = cn; // } else out.attr("names") = dn[1]; // if (ret != 0) { // if (Rf_isNull(dn[0])) { // out.attr("row.names") = NumericVector::create(NA_REAL,-X.nrow()); // } else out.attr("row.names") = dn[0]; // if(ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // } else if (ret != 0) { // CharacterVector cn(l); // for (int i = l; i--; ) { // cn[i] = std::string("V") + std::to_string(i+1); // } // out.attr("names") = cn; // out.attr("row.names") = NumericVector::create(NA_REAL,-X.nrow()); // if (ret == 1) { // out.attr("class") = "data.frame"; // } else { // out.attr("class") = CharacterVector::create("data.table","data.frame"); // } // } // return out; // } // template // Matrix mctmapplyImpl(Matrix X, Function FUN) { // int l = X.ncol(); // Vector out0 = FUN(X(_,0)); // What if not same type ?? // int row = out0.size(); // Matrix out = no_init_matrix(row, l); // for(int i = 1; i != l; ++i) { // NumericMatrix::Column outi = out(_,i); // outi = FUN(X(_,i)); // } // if(X.nrow() == row) SHALLOW_DUPLICATE_ATTRIB(out, X); // else colnames(out) = colnames(X); // return out; // } // // [[Rcpp::export]] // SEXP mrtlapply(SEXP X, Function FUN, bool names = false, int ret = 0){ // RCPP_RETURN_MATRIX(mrtlapplyImpl, X, FUN, names, ret); // } // // [[Rcpp::export]] // SEXP mrtmapply(SEXP X, Function FUN){ // RCPP_RETURN_MATRIX(mrtmapplyImpl, X, FUN); // } // // [[Rcpp::export]] // SEXP mctlapply(SEXP X, Function FUN, bool names = false, int ret = 0){ // RCPP_RETURN_MATRIX(mctlapplyImpl, X, FUN, names, ret); // } // // [[Rcpp::export]] // SEXP mctmapply(SEXP X, Function FUN){ // RCPP_RETURN_MATRIX(mctmapplyImpl, X, FUN); // } collapse/src/seqid_groupid.cpp0000644000176200001440000004133115113725406016213 0ustar liggesusers#include using namespace Rcpp; // TODO: Optimize ! // TODO: can do something about doubles using == ? // TODO: Option na_fill ? // Note: For x[i] == NA_INTEGER, which is equal to INT_MIN, cannot calculate x[i]-prev ! -> fixed in 1.2.1 // https://stackoverflow.com/questions/776624/whats-faster-iterating-an-stl-vector-with-vectoriterator-or-with-at // [[Rcpp::export]] IntegerVector seqid(const IntegerVector& x, const SEXP& o = R_NilValue, int del = 1, int start = 1, bool na_skip = false, bool skip_seq = false, bool check_o = true) { int l = x.size(), id = start, prev; if(l < 1) return x; // Prevents seqfault for numeric(0) #101 IntegerVector out = no_init_vector(l); if(Rf_isNull(o)) { if(na_skip) { int j = 0, end = l; while(j != end && x[j] == NA_INTEGER) out[j++] = NA_INTEGER; if(j != end) { prev = x[j]; out[j] = id; for(int i = j+1; i != l; ++i) { if(x[i] != NA_INTEGER) { if(x[i] - prev != del) ++id; // x[i]-x[i-1]? prev = x[i]; out[i] = id; } else { // Faster way ? out[i] = NA_INTEGER; if(skip_seq) prev += del; } } } } else { int nafill = INT_MAX - 1e7; prev = x[0]; if(prev == NA_INTEGER) prev = nafill; out[0] = id; for(int i = 1; i != l; ++i) { if(x[i] == NA_INTEGER) { ++id; prev = nafill; } else { if(x[i] - prev != del) ++id; prev = x[i]; } out[i] = id; } } } else { IntegerVector oo = o; if(oo.size() != l) stop("length(o) must match length(x)"); int val(oo[0]-1); if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(na_skip) { int j = 0, end = l-1; if(check_o) { while(x[val] == NA_INTEGER && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] != NA_INTEGER) { if(x[val] - prev != del) ++id; // x[i]-x[i-1]? prev = x[val]; out[val] = id; } else { out[val] = NA_INTEGER; if(skip_seq) prev += del; } } } } else { while(x[val] == NA_INTEGER && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(x[val] != NA_INTEGER) { if(x[val] - prev != del) ++id; // x[i]-x[i-1]? prev = x[val]; out[val] = id; } else { out[val] = NA_INTEGER; if(skip_seq) prev += del; } } } } } else { int nafill = INT_MAX - 1e7; prev = x[val]; if(prev == NA_INTEGER) prev = nafill; out[val] = id; // faster than iterator ? if(check_o) { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] == NA_INTEGER) { ++id; prev = nafill; } else { if(x[val] - prev != del) ++id; prev = x[val]; } out[val] = id; } } else { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(x[val] == NA_INTEGER) { ++id; prev = nafill; } else { if(x[val] - prev != del) ++id; prev = x[val]; } out[val] = id; } } } } out.attr("N.groups") = id - start + 1; if(start == 1) Rf_classgets(out, na_skip ? CharacterVector::create("qG") : CharacterVector::create("qG", "na.included")); return out; } // TODO: Make unique argument and generalize to all vector input types !! Or starts ?? -> Nah, GRP already does that. need to think harder. First publish without.. // The problem with groups or starts is also that you either have to dynamically fill a vector or do a second iteration... // Rather have it process starts attribute from radixorder... template IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { int l = x.size(), id = start; if(l < 1) return IntegerVector(0); // Prevents seqfault for numeric(0) #101 typedef typename Rcpp::traits::storage_type::type storage_t; auto isnanT = (RTYPE == REALSXP) ? [](storage_t x) { return x != x; } : [](storage_t x) { return x == Vector::get_na(); }; storage_t prev; IntegerVector out = no_init_vector(l); if(Rf_isNull(o)) { if(na_skip) { int j = 0, end = l; while(j != end && isnanT(x[j])) out[j++] = NA_INTEGER; if(j != end) { prev = x[j]; out[j] = id; for(int i = j+1; i != l; ++i) { if(!isnanT(x[i])) { if(x[i] != prev) { ++id; prev = x[i]; } out[i] = id; } else out[i] = NA_INTEGER; } } } else { prev = x[0]; out[0] = id; if(RTYPE == REALSXP) { for(int i = 1; i != l; ++i) { if(x[i] != prev) { if(!(prev != prev && isnanT(x[i]))) ++id; prev = x[i]; } out[i] = id; } } else { for(int i = 1; i != l; ++i) { if(x[i] != prev) { ++id; prev = x[i]; } out[i] = id; } } } } else { IntegerVector oo = o; if(oo.size() != l) stop("length(o) must match length(x)"); int val(oo[0]-1); if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(na_skip) { int j = 0, end = l-1; if(check_o) { while(isnanT(x[val]) && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(!isnanT(x[val])) { if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } else out[val] = NA_INTEGER; } } } else { while(isnanT(x[val]) && j != end) { out[val] = NA_INTEGER; val = oo[++j]-1; } if(j != end) { prev = x[val]; out[val] = id; for(int i = j+1; i != l; ++i) { val = oo[i]-1; if(!isnanT(x[val])) { if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } else out[val] = NA_INTEGER; } } } } else { prev = x[val]; out[val] = id; // faster than iterator ? if(RTYPE == REALSXP) { if(check_o) { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] != prev) { if(!(prev != prev && isnanT(x[val]))) ++id; prev = x[val]; } out[val] = id; } } else { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(x[val] != prev) { if(!(prev != prev && isnanT(x[val]))) ++id; prev = x[val]; } out[val] = id; } } } else { if(check_o) { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } } else { for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; val = oo[i]-1; if(x[val] != prev) { ++id; prev = x[val]; } out[val] = id; } } } } } out.attr("N.groups") = id - start + 1; if(start == 1) Rf_classgets(out, na_skip ? CharacterVector::create("qG") : CharacterVector::create("qG", "na.included")); return out; } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } template <> IntegerVector groupidImpl(Vector x, SEXP o, int start, bool na_skip, bool check_o) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] IntegerVector groupid(const SEXP& x, const SEXP& o = R_NilValue, int start = 1, bool na_skip = false, bool check_o = true) { RCPP_RETURN_VECTOR(groupidImpl, x, o, start, na_skip, check_o); } // Integer Version // // [[Rcpp::export]] // IntegerVector groupid(const IntegerVector& x, const SEXP& o = R_NilValue, int start = 1, // bool na_skip = false, bool check_o = true) { // int l = x.size(), prev, id = start; // IntegerVector out = no_init_vector(l); // if(Rf_isNull(o)) { // if(na_skip) { // int j = 0, end = l-1; // while(x[j] == NA_INTEGER && j != end) out[j++] = NA_INTEGER; // if(j != end) { // prev = x[j]; // out[j] = id; // for(int i = j+1; i != l; ++i) { // if(x[i] != NA_INTEGER) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } else out[i] = NA_INTEGER; // } // } // } else { // prev = x[0]; // out[0] = id; // for(int i = 1; i != l; ++i) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } // } // } else { // IntegerVector oo = o; // int val(oo[0]-1); // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(na_skip) { // int j = 0, end = l-1; // if(check_o) { // while(x[val] == NA_INTEGER && j != end) { // out[val] = NA_INTEGER; // val = oo[++j]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // } // if(j != end) { // prev = x[val]; // out[val] = id; // for(int i = j+1; i != l; ++i) { // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != NA_INTEGER) { // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } else out[val] = NA_INTEGER; // } // } // } else { // while(x[val] == NA_INTEGER && j != end) { // out[val] = NA_INTEGER; // val = oo[++j]-1; // } // if(j != end) { // prev = x[val]; // out[val] = id; // for(int i = j+1; i != l; ++i) { // val = oo[i]-1; // if(x[val] != NA_INTEGER) { // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } else out[val] = NA_INTEGER; // } // } // } // } else { // prev = x[val]; // out[val] = id; // faster than iterator ?? // if(check_o) { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } else { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } // } // } // out.attr("N.groups") = id; // out.attr("class") = na_skip ? "qG" : CharacterVector::create("qG", "na.included"); // return out; // } // // Simple first versions // // // [[Rcpp::export]] // IntegerVector groupid(const IntegerVector& x, const SEXP& o = R_NilValue, bool check = true) { // int l = x.size(), prev, id = 1; // IntegerVector out = no_init_vector(l); // if(Rf_isNull(o)) { // prev = x[0]; // out[0] = 1; // for(int i = 1; i != l; ++i) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } // } else { // IntegerVector oo = o; // int val(oo[0]-1); // prev = x[val]; // https://stackoverflow.com/questions/776624/whats-faster-iterating-an-stl-vector-with-vectoriterator-or-with-at // out[val] = 1; // faster than iterator ?? // if(check) { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } else { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } // } // out.attr("N.groups") = id; // out.attr("class") = CharacterVector::create("qG","na.included"); // return out; // } // // // [[Rcpp::export]] // IntegerVector groupid(const IntegerVector& x, const SEXP& o = R_NilValue, bool check = true) { // int l = x.size(), prev, id = 1; // IntegerVector out = no_init_vector(l); // if(Rf_isNull(o)) { // prev = x[0]; // out[0] = 1; // for(int i = 1; i != l; ++i) { // if(x[i] != prev) { // ++id; // prev = x[i]; // } // out[i] = id; // } // } else { // IntegerVector oo = o; // int val(oo[0]-1); // prev = x[val]; // https://stackoverflow.com/questions/776624/whats-faster-iterating-an-stl-vector-with-vectoriterator-or-with-at // out[val] = 1; // faster than iterator ?? // if(check) { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(val < 0 || val >= l) stop("o out of allowed range [1, length(x)]"); // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } else { // for(int i = 1; i != l; ++i) { // for(IntegerVector::iterator it = oo.begin()+1, end = oo.end(); it != end; ++it) { val = *it-1; // val = oo[i]-1; // if(x[val] != prev) { // ++id; // prev = x[val]; // } // out[val] = id; // } // } // } // out.attr("N.groups") = id; // out.attr("class") = CharacterVector::create("qG","na.included"); // return out; // } collapse/src/base_radixsort.c0000644000176200001440000021061014777170131016030 0ustar liggesusers/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 2016 The R Core Team * * Based on code donated from the data.table package * (C) 2006-2015 Matt Dowle and Arun Srinivasan. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ #ifdef HAVE_CONFIG_H #include #endif #include "base_radixsort.h" // gs = groupsizes e.g.23, 12, 87, 2, 1, 34,... static int *gs[2] = { NULL }; //two vectors flip flopped:flip and 1 - flip static int flip = 0; //allocated stack size static int gsalloc[2] = { 0 }; static int gsngrp[2] = { 0 }; //max grpn so far static int gsmax[2] = { 0 }; //max size of stack, set by do_radixsort to nrows static int gsmaxalloc = 0; //switched off for last arg unless retGrp==TRUE static Rboolean stackgrps = TRUE; // TRUE for setkey, FALSE for by= static Rboolean sortStr = TRUE; // used by do_radixsort and [i|d|c]sort to reorder order. // not needed if narg==1 static int *newo = NULL; // =1, 0, -1 for TRUE, NA, FALSE respectively. // Value rewritten inside do_radixsort(). static int nalast = -1; // =1, -1 for ascending and descending order respectively static int order = 1; // static double POS_INF = 1.0/0.0; // static double NEG_INF = -1.0/0.0; //replaced n < 200 with n < N_SMALL.Easier to change later #define N_SMALL 200 // range limit for counting sort. Should be less than INT_MAX // (see setRange for details) #define N_RANGE 100000 static SEXP *saveds = NULL; static R_len_t *savedtl = NULL, nalloc = 0, nsaved = 0; static void savetl_init(void) { if (nsaved || nalloc || saveds || savedtl) error("Internal error: savetl_init checks failed (%d %d %p %p).", nsaved, nalloc, (void *)saveds, (void *)savedtl); nsaved = 0; nalloc = 100; saveds = (SEXP *) malloc(nalloc * sizeof(SEXP)); if (saveds == NULL) error("Could not allocate saveds in savetl_init"); savedtl = (R_len_t *) malloc(nalloc * sizeof(R_len_t)); if (savedtl == NULL) { free(saveds); error("Could not allocate saveds in savetl_init"); } } static void savetl_end(void) { // Can get called if nothing has been saved yet (nsaved == 0), or // even if _init() has not been called yet (pointers NULL). Such as // to clear up before error. Also, it might be that nothing needed // to be saved anyway. for (int i = 0; i != nsaved; ++i) SET_TRLEN(saveds[i], savedtl[i]); free(saveds); // does nothing on NULL input free(savedtl); nsaved = nalloc = 0; saveds = NULL; savedtl = NULL; } static void savetl(SEXP s) { if (nsaved >= nalloc) { nalloc *= 2; char *tmp; tmp = (char *) realloc(saveds, nalloc * sizeof(SEXP)); if (tmp == NULL) { savetl_end(); error("Could not realloc saveds in savetl"); } saveds = (SEXP *) tmp; tmp = (char *) realloc(savedtl, nalloc * sizeof(R_len_t)); if (tmp == NULL) { savetl_end(); error("Could not realloc savedtl in savetl"); } savedtl = (R_len_t *) tmp; } saveds[nsaved] = s; savedtl[nsaved] = TRLEN(s); nsaved++; } // http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html#Swallowing-the-Semicolon #define Error(...) do {savetl_end(); error(__VA_ARGS__);} while(0) #undef warning // since it can be turned to error via warn = 2 #define warning(...) Do not use warning in this file /* use malloc/realloc (not R_Calloc/R_Realloc) so we can trap errors and call savetl_end() before the error(). */ static void growstack(uint64_t newlen) { // no link to icount range restriction, // just 100,000 seems a good minimum at 0.4MB if (newlen == 0) newlen = 100000; if (newlen > gsmaxalloc) newlen = gsmaxalloc; gs[flip] = realloc(gs[flip], newlen * sizeof(int)); if (gs[flip] == NULL) Error("Failed to realloc working memory stack to %d*4bytes (flip=%d)", (int)newlen /* no bigger than gsmaxalloc */, flip); gsalloc[flip] = (int)newlen; } static void push(int x) { if (!stackgrps || x == 0) return; if (gsalloc[flip] == gsngrp[flip]) growstack((uint64_t)(gsngrp[flip]) * 2); gs[flip][gsngrp[flip]++] = x; if (x > gsmax[flip]) gsmax[flip] = x; } static void mpush(int x, int n) { if (!stackgrps || x == 0) return; if (gsalloc[flip] < gsngrp[flip] + n) growstack(((uint64_t)(gsngrp[flip]) + n) * 2); for (int i = 0; i != n; ++i) gs[flip][gsngrp[flip]++] = x; if (x > gsmax[flip]) gsmax[flip] = x; } static void flipflop(void) { flip = 1 - flip; gsngrp[flip] = 0; gsmax[flip] = 0; if (gsalloc[flip] < gsalloc[1 - flip]) growstack((uint64_t)(gsalloc[1 - flip]) * 2); } static void gsfree(void) { free(gs[0]); free(gs[1]); gs[0] = NULL; gs[1] = NULL; flip = 0; gsalloc[0] = gsalloc[1] = 0; gsngrp[0] = gsngrp[1] = 0; gsmax[0] = gsmax[1] = 0; gsmaxalloc = 0; } #ifdef TIMING_ON // many calls to clock() can be expensive, // hence compiled out rather than switch(verbose) #include #define NBLOCK 20 static clock_t tblock[NBLOCK], tstart; static int nblock[NBLOCK]; #define TBEG() tstart = clock(); #define TEND(i) tblock[i] += clock()-tstart; nblock[i]++; tstart = clock(); #else #define TBEG() #define TEND(i) #endif static int range, xmin; // used by both icount and do_radixsort static void setRange(int *x, int n) { xmin = NA_INTEGER; int xmax = NA_INTEGER; double overflow; int i = 0; while(i < n && x[i] == NA_INTEGER) i++; if (i < n) xmax = xmin = x[i]; for (; i != n; ++i) { int tmp = x[i]; if (tmp == NA_INTEGER) continue; if (tmp > xmax) xmax = tmp; else if (tmp < xmin) xmin = tmp; } // all NAs, nothing to do if (xmin == NA_INTEGER) { range = NA_INTEGER; return; } // ex: x=c(-2147483647L, NA_integer_, 1L) results in overflowing int range. overflow = (double) xmax - (double) xmin + 1; // detect and force iradix here, since icount is out of the picture if (overflow > INT_MAX) { range = INT_MAX; return; } range = xmax - xmin + 1; return; } // x*order results in integer overflow when -1*NA, // so careful to avoid that here : static inline int icheck(int x) { // if nalast == 1, NAs must go last. return ((nalast != 1) ? ((x != NA_INTEGER) ? x*order : x) : ((x != NA_INTEGER) ? (x*order) - 1 : INT_MAX)); } static void icount(int *x, int *o, int n) /* Counting sort: 1. Places the ordering into o directly, overwriting whatever was there 2. Doesn't change x 3. Pushes group sizes onto stack */ { int napos = range; // NA's always counted in last bin // static is IMPORTANT, counting sort is called repetitively. static unsigned int counts[N_RANGE + 1] = { 0 }; /* counts are set back to 0 at the end efficiently. 1e5 = 0.4MB i.e. tiny. We'll only use the front part of it, as large as range. So it's just reserving space, not using it. Have defined N_RANGE to be 100000.*/ if (range > N_RANGE) Error("Internal error: range = %d; isorted cannot handle range > %d", range, N_RANGE); for (int i = 0; i != n; ++i) { // For nalast=NA case, we won't remove/skip NAs, rather set 'o' indices // to 0. subset will skip them. We can't know how many NAs to skip // beforehand - i.e. while allocating "ans" vector if (x[i] == NA_INTEGER) counts[napos]++; else counts[x[i] - xmin]++; } int tmp = 0; if (nalast != 1 && counts[napos]) { push(counts[napos]); tmp += counts[napos]; } int w = (order==1) ? 0 : range-1; for (int i = 0; i != range; ++i) /* no point in adding tmp < n && i <= range, since range includes max, need to go to max, unlike 256 loops elsewhere in radixsort.c */ { if (counts[w]) { // cumulate but not through 0's. // Helps resetting zeros when n < range, below. push(counts[w]); counts[w] = (tmp += counts[w]); } w += order; // order is +1 or -1 } if (nalast == 1 && counts[napos]) { push(counts[napos]); counts[napos] = (tmp += counts[napos]); } for (int i = n - 1; i >= 0; i--) { // This way na.last=TRUE/FALSE cases will have just a // single if-check overhead. o[--counts[(x[i] == NA_INTEGER) ? napos : x[i] - xmin]] = (int) (i + 1); } // nalast = 1, -1 are both taken care already. if (nalast == 0) // nalast = 0 is dealt with separately as it just sets o to 0 for (int i = 0; i != n; ++i) o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; // at those indices where x is NA. x[o[i]-1] because x is not modifed here. /* counts were cumulated above so leaves non zero. Faster to clear up now ready for next time. */ if (n < range) { /* Many zeros in counts already. Loop through n instead, doesn't matter if we set to 0 several times on any repeats */ counts[napos] = 0; for (int i = 0; i != n; ++i) { if (x[i] != NA_INTEGER) counts[x[i] - xmin] = 0; } } else memset(counts, 0, (range + 1) * sizeof(int)); return; } static void iinsert(int *x, int *o, int n) /* orders both x and o by reference in-place. Fast for small vectors, low overhead. don't be tempted to binsearch backwards here, have to shift anyway; many memmove would have overhead and do the same thing. */ /* when nalast == 0, iinsert will be called only from within iradix, where o[.] = 0 for x[.]=NA is already taken care of */ { for (int i = 1; i != n; ++i) { int xtmp = x[i]; if (xtmp < x[i - 1]) { int j = i - 1; int otmp = o[i]; while (j >= 0 && xtmp < x[j]) { x[j + 1] = x[j]; o[j + 1] = o[j]; j--; } x[j + 1] = xtmp; o[j + 1] = otmp; } } int tt = 0; for (int i = 1; i != n; ++i) { if (x[i] == x[i - 1]) tt++; else { push(tt + 1); tt = 0; } } push(tt + 1); // INCLUDED ?? } /* iradix is a counting sort performed forwards from MSB to LSB, with some tricks and short circuits building on Terdiman and Herf. http://codercorner.com/RadixSortRevisited.htm http://stereopsis.com/radix.html ~ Note they are LSD, but we do MSD here which is more complicated, for efficiency. ~ NAs need no special treatment as NA is the most negative integer in R (checked in init.c once, for efficiency) so NA naturally sort to the front. ~ Using 4-pass 1-byte radix for the following reasons : * 11-bit (Herf) reduces to 3-passes (3*11=33) yes, and LSD need random access to o vector in each pass 1:n so reduction in passes is good, but Terdiman's idea to skip a radix if all values are equal occurs less the wider the radix. A narrower radix benefits more from that. * That's detected here using a single 'if', an improvement on Terdiman's exposition of a single loop to find if any count==n * The pass through counts bites when radix is wider, because we repetitively call this iradix from fastorder forwards. * Herf's parallel histogramming is neat. In 4-pass 1-byte it needs 4*256 storage, that's tiny, and can be static. 4*256 << 3*2048. 4-pass 1-byte is simpler and tighter code than 3-pass 11-bit, giving modern optimizers and modern CPUs a better chance. We may get lucky anyway, if one or two of the 4-passes are skipped. Recall: there are no comparisons at all in counting and radix, there is wide random access in each LSD radix pass, though. */ // 4 are used for iradix, 8 for dradix and i64radix static unsigned int radixcounts[8][257] = { {0} }; static int skip[8]; /* global because iradix and iradix_r interact and are called repetitively. counts are set back to 0 after each use, to benefit from skipped radix. */ static void *radix_xsub = NULL; static size_t radix_xsuballoc = 0; static int *otmp = NULL, otmp_alloc = 0; static void alloc_otmp(int n) { if (otmp_alloc >= n) return; otmp = (int *) realloc(otmp, n * sizeof(int)); if (otmp == NULL) Error("Failed to allocate working memory for otmp. Requested %d * %d bytes", n, (int)sizeof(int)); otmp_alloc = n; } // TO DO: save xtmp if possible, see allocs in do_radixsort static void *xtmp = NULL; static int xtmp_alloc = 0; // TO DO: currently always the largest type (double) but // could be int if that's all that's needed static void alloc_xtmp(int n) { if (xtmp_alloc >= n) return; xtmp = (double *) realloc(xtmp, n * sizeof(double)); if (xtmp == NULL) Error("Failed to allocate working memory for xtmp. Requested %d * %d bytes", n, (int)sizeof(double)); xtmp_alloc = n; } static void iradix_r(int *xsub, int *osub, int n, int radix); static void iradix(int *x, int *o, int n) /* As icount : Places the ordering into o directly, overwriting whatever was there Doesn't change x Pushes group sizes onto stack */ { int nextradix, itmp, thisgrpn, maxgrpn; unsigned int thisx = 0, shift, *thiscounts; for (int i = 0; i != n; ++i) { /* parallel histogramming pass; i.e. count occurrences of 0:255 in each byte. Sequential so almost negligible. */ // relies on overflow behaviour. And shouldn't -INT_MIN be up in iradix? thisx = (unsigned int) (icheck(x[i])) - INT_MIN; // unrolled since inside n-loop radixcounts[0][thisx & 0xFF]++; radixcounts[1][thisx >> 8 & 0xFF]++; radixcounts[2][thisx >> 16 & 0xFF]++; radixcounts[3][thisx >> 24 & 0xFF]++; } for (int radix = 0; radix < 4; radix++) { /* any(count == n) => all radix must have been that value => last x (still thisx) was that value */ int i = thisx >> (radix*8) & 0xFF; skip[radix] = radixcounts[radix][i] == n; // clear it now, the other counts must be 0 already if (skip[radix]) radixcounts[radix][i] = 0; } int radix = 3; // MSD while (radix >= 0 && skip[radix]) radix--; if (radix == -1) { // All radix are skipped; one number repeated n times. if (nalast == 0 && x[0] == NA_INTEGER) // all values are identical. return 0 if nalast=0 & all NA // because of 'return', have to take care of it here. for (int i = 0; i != n; ++i) o[i] = 0; else for (int i = 0; i != n; ++i) o[i] = (i + 1); push(n); return; } for (int i = radix - 1; i >= 0; i--) { if (!skip[i]) memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); /* clear the counts as we only needed the parallel pass for skip[] and we're going to use radixcounts again below. Can't use parallel lower counts in MSD radix, unlike LSD. */ } thiscounts = radixcounts[radix]; shift = radix * 8; itmp = thiscounts[0]; maxgrpn = itmp; for (int i = 1; itmp < n && i < 256; ++i) { thisgrpn = thiscounts[i]; if (thisgrpn) { // don't cummulate through 0s, important below. if (thisgrpn > maxgrpn) maxgrpn = thisgrpn; thiscounts[i] = (itmp += thisgrpn); } } for (int i = n - 1; i >= 0; i--) { thisx = ((unsigned int) (icheck(x[i])) - INT_MIN) >> shift & 0xFF; o[--thiscounts[thisx]] = i + 1; } if (radix_xsuballoc < maxgrpn) { // The largest group according to the first non-skipped radix, // so could be big (if radix is needed on first arg) // TO DO: could include extra bits to divide the first radix // up more. Often the MSD has groups in just 0-4 out of 256. // free'd at the end of do_radixsort once we're done calling iradix // repetitively radix_xsub = (int *) realloc(radix_xsub, maxgrpn * sizeof(double)); if (!radix_xsub) Error("Failed to realloc working memory %d*8bytes (xsub in iradix), radix=%d", maxgrpn, radix); radix_xsuballoc = maxgrpn; } // TO DO: can we leave this to do_radixsort and remove these calls?? alloc_otmp(maxgrpn); // TO DO: doesn't need to be sizeof(double) always, see inside alloc_xtmp(maxgrpn); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; if (thiscounts[0] != 0) Error("Internal error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; // undo cumulate; i.e. diff thisgrpn = thiscounts[i] - itmp; if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { for (int j = 0; j != thisgrpn; ++j) // this is why this xsub here can't be the same memory as // xsub in do_radixsort. ((int *)radix_xsub)[j] = icheck(x[o[itmp+j]-1]); // changes xsub and o by reference recursively. iradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } if (nalast == 0) // nalast = 1, -1 are both taken care already. // nalast = 0 is dealt with separately as it just sets o to 0 for (int i = 0; i != n; ++i) o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; // at those indices where x is NA. x[o[i]-1] because x is not // modified by reference unlike iinsert or iradix_r } static void iradix_r(int *xsub, int *osub, int n, int radix) // xsub is a recursive offset into xsub working memory above in // iradix, reordered by reference. osub is a an offset into the main // answer o, reordered by reference. radix iterates 3,2,1,0 { int j, itmp, thisx, thisgrpn, nextradix, shift; unsigned int *thiscounts; // N_SMALL=200 is guess based on limited testing. Needs // calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 // cummulate + 256 memset + allowance since reverse order is // unlikely. when nalast==0, iinsert will be called only from // within iradix. if (n < N_SMALL) { iinsert(xsub, osub, n); return; } shift = radix * 8; thiscounts = radixcounts[radix]; for (int i = 0; i != n; ++i) { thisx = (unsigned int) xsub[i] - INT_MIN; // sequential in xsub thiscounts[thisx >> shift & 0xFF]++; } itmp = thiscounts[0]; for (int i = 1; itmp < n && i < 256; ++i) { if (thiscounts[i]) // don't cummulate through 0s, important below thiscounts[i] = (itmp += thiscounts[i]); } // INCLUDED ?? for (int i = n - 1; i >= 0; i--) { thisx = ((unsigned int) xsub[i] - INT_MIN) >> shift & 0xFF; j = --thiscounts[thisx]; otmp[j] = osub[i]; ((int *) xtmp)[j] = xsub[i]; } memcpy(osub, otmp, n * sizeof(int)); memcpy(xsub, xtmp, n * sizeof(int)); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; /* TO DO: If nextradix == -1 AND no further args from do_radixsort AND !retGrp, we're done. We have o. Remember to memset thiscounts before returning. */ if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { iradix_r(xsub+itmp, osub+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } } // dradix from Arun's fastradixdouble.c // + changed to MSD and hooked into do_radixsort framework here. // + replaced tolerance with rounding s.f. // static unsigned long long dmask1; // static unsigned long long dmask2; // static void setNumericRounding(int dround) // { // dmask1 = dround ? 1 << (8 * dround - 1) : 0; // dmask2 = 0xffffffffffffffff << dround * 8; // } static union { double d; unsigned long long ull; } u; static unsigned long long dtwiddle(void *p, int i, int order) { u.d = order * ((double *)p)[i]; // take care of 'order' at the beginning // if (u.d == u.d & u.d != POS_INF & u.d != NEG_INF) { // R_FINITE(u.d) // u.ull = (u.d != 0.0) ? u.ull : 0; // u.ull = (u.d != 0.0) ? u.ull + ((u.ull & dmask1) << 1) : 0; // } else if (ISNAN(u.d)) { u.ull = 0; return (nalast == 1 ? ~u.ull : u.ull); } unsigned long long mask = (u.ull & 0x8000000000000000) ? // always flip sign bit and if negative (sign bit was set) // flip other bits too 0xffffffffffffffff : 0x8000000000000000; // return ((u.ull ^ mask) & dmask2); return (u.ull ^ mask); } static int dnan(void *p, int i) { u.d = ((double *) p)[i]; return (ISNAN(u.d)); } static unsigned long long (*twiddle) (void *, int, int); static int(*is_nan) (void *, int); // the size of the arg type (4 or 8). Just 8 currently until iradix is // merged in. static size_t colSize = 8; static void dradix_r(unsigned char *xsub, int *osub, int n, int radix); #ifdef WORDS_BIGENDIAN #define RADIX_BYTE colSize - radix - 1 #else #define RADIX_BYTE radix #endif static void dradix(unsigned char *x, int *o, int n) { int radix, nextradix, itmp, thisgrpn, maxgrpn; unsigned int *thiscounts; unsigned long long thisx = 0; // see comments in iradix for structure. This follows the same. // TO DO: merge iradix in here (almost ready) for (int i = 0; i != n; ++i) { thisx = twiddle(x, i, order); for (radix = 0; radix != colSize; ++radix) // if dround == 2 then radix 0 and 1 will be all 0 here and skipped. /* on little endian, 0 is the least significant bits (the right) and 7 is the most including sign (the left); i.e. reversed. */ radixcounts[radix][((unsigned char *)&thisx)[RADIX_BYTE]]++; } for (radix = 0; radix != colSize; ++radix) { // thisx is the last x after loop above int i = ((unsigned char *) &thisx)[RADIX_BYTE]; skip[radix] = radixcounts[radix][i] == n; // clear it now, the other counts must be 0 already if (skip[radix]) radixcounts[radix][i] = 0; } radix = (int) colSize - 1; // MSD while (radix >= 0 && skip[radix]) radix--; if (radix == -1) { // All radix are skipped; i.e. one number repeated n times. if (nalast == 0 && is_nan(x, 0)) // all values are identical. return 0 if nalast=0 & all NA // because of 'return', have to take care of it here. for (int i = 0; i != n; ++i) o[i] = 0; else for (int i = 0; i != n; ++i) o[i] = (i + 1); push(n); return; } for (int i = radix - 1; i >= 0; i--) { // clear the lower radix counts, we only did them to know // skip. will be reused within each group if (!skip[i]) memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); } thiscounts = radixcounts[radix]; itmp = thiscounts[0]; maxgrpn = itmp; for (int i = 1; itmp < n && i < 256; ++i) { thisgrpn = thiscounts[i]; if (thisgrpn) { // don't cummulate through 0s, important below if (thisgrpn > maxgrpn) maxgrpn = thisgrpn; thiscounts[i] = (itmp += thisgrpn); } } for (int i = n - 1; i >= 0; i--) { thisx = twiddle(x, i, order); o[ --thiscounts[((unsigned char *)&thisx)[RADIX_BYTE]] ] = i + 1; } if (radix_xsuballoc < maxgrpn) { // TO DO: centralize this alloc // The largest group according to the first non-skipped radix, // so could be big (if radix is needed on first arg) TO DO: // could include extra bits to divide the first radix up // more. Often the MSD has groups in just 0-4 out of 256. // free'd at the end of do_radixsort once we're done calling iradix // repetitively radix_xsub = (double *) realloc(radix_xsub, maxgrpn * sizeof(double)); if (!radix_xsub) Error("Failed to realloc working memory %d*8bytes (xsub in dradix), radix=%d", maxgrpn, radix); radix_xsuballoc = maxgrpn; } alloc_otmp(maxgrpn); // TO DO: leave to do_radixsort and remove these? alloc_xtmp(maxgrpn); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { if (colSize == 4) { // ready for merging in iradix ... error("Not yet used, still using iradix instead"); for (int j = 0; j != thisgrpn; ++j) ((int *)radix_xsub)[j] = (int)twiddle(x, o[itmp+j]-1, order); // this is why this xsub here can't be the same memory // as xsub in do_radixsort } else for (int j = 0; j != thisgrpn; ++j) ((unsigned long long *)radix_xsub)[j] = twiddle(x, o[itmp+j]-1, order); // changes xsub and o by reference recursively. dradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } if (nalast == 0) // nalast = 1, -1 are both taken care already. for (int i = 0; i != n; ++i) o[i] = is_nan(x, o[i] - 1) ? 0 : o[i]; // nalast = 0 is dealt with separately as it just sets o to 0 // at those indices where x is NA. x[o[i]-1] because x is not // modified by reference unlike iinsert or iradix_r } static void dinsert(unsigned long long *x, int *o, int n) // orders both x and o by reference in-place. Fast for small vectors, // low overhead. don't be tempted to binsearch backwards here, have // to shift anyway; many memmove would have overhead and do the same // thing 'dinsert' will not be called when nalast = 0 and o[0] = -1. { int otmp, tt; unsigned long long xtmp; for (int i = 1; i != n; ++i) { xtmp = x[i]; if (xtmp < x[i - 1]) { int j = i - 1; otmp = o[i]; while (j >= 0 && xtmp < x[j]) { x[j + 1] = x[j]; o[j + 1] = o[j]; j--; } x[j + 1] = xtmp; o[j + 1] = otmp; } } tt = 0; for (int i = 1; i != n; ++i) { if (x[i] == x[i - 1]) tt++; else { push(tt + 1); tt = 0; } } // INCLUDED ?? push(tt + 1); } static void dradix_r(unsigned char *xsub, int *osub, int n, int radix) /* xsub is a recursive offset into xsub working memory above in dradix, reordered by reference. osub is a an offset into the main answer o, reordered by reference. dradix iterates 7,6,5,4,3,2,1,0 */ { int itmp, thisgrpn, nextradix; unsigned int *thiscounts; unsigned char *p; if (n < 200) { /* 200 is guess based on limited testing. Needs calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 cummulate + 256 memset + allowance since reverse order is unlikely */ // order=1 here because it's already taken care of in iradix dinsert((void *)xsub, osub, n); return; } thiscounts = radixcounts[radix]; p = xsub + RADIX_BYTE; for (int i = 0; i != n; ++i) { thiscounts[*p]++; p += colSize; } itmp = thiscounts[0]; for (int i = 1; itmp < n && i < 256; ++i) { if (thiscounts[i]) // don't cummulate through 0s, important below thiscounts[i] = (itmp += thiscounts[i]); } // INCLUDED ?? p = xsub + (n - 1) * colSize; if (colSize == 4) { error("Not yet used, still using iradix instead"); for (int i = n - 1; i >= 0; i--) { int j = --thiscounts[*(p + RADIX_BYTE)]; otmp[j] = osub[i]; ((int *) xtmp)[j] = *(int *) p; p -= colSize; } } else { for (int i = n - 1; i >= 0; i--) { int j = --thiscounts[*(p + RADIX_BYTE)]; otmp[j] = osub[i]; ((unsigned long long *) xtmp)[j] = *(unsigned long long *) p; p -= colSize; } } memcpy(osub, otmp, n * sizeof(int)); memcpy(xsub, xtmp, n * colSize); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; // TO DO: If nextradix==-1 and no further args from do_radixsort, // we're done. We have o. Remember to memset thiscounts before // returning. if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) push(thisgrpn); else dradix_r(xsub + itmp * colSize, osub + itmp, thisgrpn, nextradix); itmp = thiscounts[i]; thiscounts[i] = 0; } } // TO DO?: dcount. Find step size, then range = (max-min)/step and // proceed as icount. Many fixed precision floats (such as prices) may // be suitable. Fixed precision such as 1.10, 1.15, 1.20, 1.25, 1.30 // ... do use all bits so dradix skipping may not help. static int *cradix_counts = NULL; static int cradix_counts_alloc = 0; static int maxlen = 1; static SEXP *cradix_xtmp = NULL; static int cradix_xtmp_alloc = 0; // same as StrCmp but also takes into account 'decreasing' and 'na.last' args. static int StrCmp2(SEXP x, SEXP y) { // same cached pointer (including NA_STRING == NA_STRING) if (x == y) return 0; // if x=NA, nalast=1 ? then x > y else x < y (Note: nalast == 0 is // already taken care of in 'csorted', won't be 0 here) if (x == NA_STRING) return nalast; if (y == NA_STRING) return -nalast; // if y=NA, nalast=1 ? then y > x return order*strcmp(CHAR(x), CHAR(y)); // same as explanation in StrCmp } static int StrCmp(SEXP x, SEXP y) // also used by bmerge and chmatch { // same cached pointer (including NA_STRING == NA_STRING) if (x == y) return 0; if (x == NA_STRING) return -1; // x < y if (y == NA_STRING) return 1; // x > y // assumes strings are in same encoding return strcmp(CHAR(x), CHAR(y)); } #define CHAR_ENCODING(x) (IS_ASCII(x) ? CE_UTF8 : getCharCE(x)) void checkEncodings(SEXP x) // static { cetype_t ce; const SEXP *px = SEXPPTR_RO(x); int i, lx = length(x); for (i = 0; i != lx && px[i] == NA_STRING; ++i); if (i < lx) { ce = CHAR_ENCODING(px[i]); if (ce == CE_NATIVE) { error("Character encoding must be UTF-8, Latin-1 or bytes"); } } /* Disabled for now -- doubles the time (for already sorted vectors): why? for (int i = 1; i < length(x); i++) { if (ce != CHAR_ENCODING(STRING_ELT(x, i))) { error("Mixed character encodings are not supported"); } } */ } static void cradix_r(SEXP * xsub, int n, int radix) // xsub is a unique set of CHARSXP, to be ordered by reference // First time, radix == 0, and xsub == x. Then recursively moves SEXP together // for L1 cache efficiency. // Quite different to iradix because // 1) x is known to be unique so fits in cache // (wide random access not an issue) // 2) they're variable length character strings // 3) no need to maintain o. Just simply reorder x. No grps or push. // Fortunately, UTF sorts in the same order if treated as ASCII, so we // can simplify by doing it by bytes. // TO DO: confirm a forwards (MSD) radix for efficiency, although more // complicated. // This part has nothing to do with truelength. The // truelength stuff is to do with finding the unique strings. We may // be able to improve CHARSXP derefencing by submitting patch to R to // make R's string cache contiguous but would likely be difficult. If // we strxfrm, then it'll then be contiguous and compact then anyway. { int itmp, *thiscounts, thisgrpn=0, thisx=0; SEXP stmp; // TO DO?: chmatch to existing sorted vector, then grow it. // TO DO?: if (n= 0; i--) { thisx = xsub[i] == NA_STRING ? 0 : (radix < LENGTH(xsub[i]) ? (unsigned char) (CHAR(xsub[i])[radix]) : 1); int j = --thiscounts[thisx]; cradix_xtmp[j] = xsub[i]; } memcpy(xsub, cradix_xtmp, n * sizeof(SEXP)); if (radix == maxlen - 1) { memset(thiscounts, 0, 256 * sizeof(int)); return; } if (thiscounts[0] != 0) Error("Logical error. counts[0]=%d in cradix but should have been decremented to 0. radix=%d", thiscounts[0], radix); itmp = 0; for (int i = 1; i != 256; ++i) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff cradix_r(xsub + itmp, thisgrpn, radix + 1); itmp = thiscounts[i]; // set to 0 now since we're here, saves memset // afterwards. Important to clear! Also more portable for // machines where 0 isn't all bits 0 (?!) thiscounts[i] = 0; } if (itmp < n - 1) cradix_r(xsub + itmp, n - itmp, radix + 1); // final group } static SEXP *ustr = NULL; static int ustr_alloc = 0, ustr_n = 0; static void cgroup(SEXP * x, int *o, int n) // As icount : // Places the ordering into o directly, overwriting whatever was there // Doesn't change x // Pushes group sizes onto stack // Only run when sortStr == FALSE. Basically a counting sort, in first // appearance order, directly. Since it doesn't sort the strings, the // name is cgroup. there is no _pre for this. ustr created and // cleared each time. { // savetl_init() is called once at the start of do_radixsort if (ustr_n != 0) Error ("Internal error. ustr isn't empty when starting cgroup: ustr_n=%d, ustr_alloc=%d", ustr_n, ustr_alloc); for (int i = 0; i != n; ++i) { SEXP s = x[i]; if (TRLEN(s) < 0) { // this case first as it's the most frequent SET_TRLEN(s, TRLEN(s) - 1); // use negative counts so as to detect R's own (positive) // usage of tl on CHARSXP continue; } if (TRLEN(s) > 0) { // Save any of R's own usage of tl (assumed positive, so // we can both count and save in one scan), to restore // afterwards. From R 2.14.0, tl is initialized to 0, // prior to that it was random so this step saved too much. savetl(s); SET_TRLEN(s, 0); } if (ustr_alloc <= ustr_n) { // 10000 = 78k of 8byte pointers. Small initial guess, // negligible time to alloc. ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; if (ustr_alloc > n) ustr_alloc = n; ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); if (ustr == NULL) Error("Unable to realloc %d * %d bytes in cgroup", ustr_alloc, (int)sizeof(SEXP)); } SET_TRLEN(s, -1); ustr[ustr_n++] = s; } // TO DO: the same string in different encodings will be // considered different here. Sweep through ustr and merge counts // where equal (sort needed therefore, unfortunately?, only if // there are any marked encodings present) int cumsum = 0; for (int i = 0, mtli; i != ustr_n; ++i) { // 0.000 mtli = -TRLEN(ustr[i]); push(mtli); SET_TRLEN(ustr[i], cumsum += mtli); } int *target = (o[0] != -1) ? newo : o; for (int i = n - 1; i >= 0; i--) { SEXP s = x[i]; // 0.400 (page fetches on string cache) int k = TRLEN(s) - 1; SET_TRLEN(s, k); target[k] = i + 1; // 0.800 (random access to o) } // The cummulate meant counts are left non zero, so reset for next // time (0.00s). for (int i = 0; i != ustr_n; ++i) SET_TRLEN(ustr[i], 0); ustr_n = 0; } static int *csort_otmp = NULL, csort_otmp_alloc = 0; static void alloc_csort_otmp(int n) { if (csort_otmp_alloc >= n) return; csort_otmp = (int *) realloc(csort_otmp, n * sizeof(int)); if (csort_otmp == NULL) Error ("Failed to allocate working memory for csort_otmp. Requested %d * %d bytes", n, (int)sizeof(int)); csort_otmp_alloc = n; } static void csort(SEXP * x, int *o, int n) /* As icount : Places the ordering into o directly, overwriting whatever was there Doesn't change x Pushes group sizes onto stack Requires csort_pre() to have created and sorted ustr already */ { /* can't use otmp, since iradix might be called here and that uses otmp (and xtmp). alloc_csort_otmp(n) is called from do_radixsort for either n=nrow if 1st arg, or n=maxgrpn if onwards args */ for (int i = 0; i != n; ++i) csort_otmp[i] = (x[i] == NA_STRING) ? NA_INTEGER : -TRLEN(x[i]); if (nalast == 0 && n == 2) { // special case for nalast == 0. n == 1 is handled inside // do_radixsort. at least 1 will be NA here else use o from caller // directly (not 1st arg) if (o[0] == -1) for (int i = 0; i != n; ++i) o[i] = i + 1; for (int i = 0; i != n; ++i) { if (csort_otmp[i] == NA_INTEGER) o[i] = 0; } // INCLUDED ?? push(1); push(1); return; } if (n < N_SMALL && nalast != 0) { // TO DO: calibrate() N_SMALL=200 if (o[0] == -1) for (int i = 0; i != n; ++i) o[i] = i + 1; // else use o from caller directly (not 1st arg) for (int i = 0; i != n; ++i) csort_otmp[i] = icheck(csort_otmp[i]); iinsert(csort_otmp, o, n); } else { setRange(csort_otmp, n); if (range == NA_INTEGER) Error("Internal error. csort's otmp contains all-NA"); int *target = (o[0] != -1) ? newo : o; if (range <= N_RANGE) // TO DO: calibrate(). radix was faster (9.2s // "range<=10000" instead of 11.6s "range<=N_RANGE && // range 0) { savetl(s); SET_TRLEN(s, 0); } if (ustr_alloc <= ustr_n) { // 10000 = 78k of 8byte pointers. Small initial guess, // negligible time to alloc. ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; if (ustr_alloc > old_un+n) ustr_alloc = old_un + n; ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); if (ustr == NULL) Error("Failed to realloc ustr. Requested %d * %d bytes", ustr_alloc, (int)sizeof(SEXP)); } SET_TRLEN(s, -1); // this -1 will become its ordering later below ustr[ustr_n++] = s; // length on CHARSXP is the nchar of char * (excluding \0), // and treats marked encodings as if ascii. if (s != NA_STRING && LENGTH(s) > maxlen) maxlen = LENGTH(s); } new_un = ustr_n; if (new_un == old_un) return; // No new strings observed, seen them all before in previous // arg. ustr already sufficient. If we ever make ustr // permanently held by data.table, we'll just need to make the // final loop to set -i-1 before returning here. sort ustr. // TODO: just sort new ones and merge them in. These allocs are // here, to save them being in the recursive cradix_r() if (cradix_counts_alloc < maxlen) { cradix_counts_alloc = maxlen + 10; // +10 to save too many reallocs cradix_counts = (int *)realloc(cradix_counts, cradix_counts_alloc * 256 * sizeof(int)); if (!cradix_counts) Error("Failed to alloc cradix_counts"); memset(cradix_counts, 0, cradix_counts_alloc * 256 * sizeof(int)); } if (cradix_xtmp_alloc < ustr_n) { cradix_xtmp = (SEXP *) realloc(cradix_xtmp, ustr_n * sizeof(SEXP)); // TO DO: Reuse the one we have in do_radixsort. // Does it need to be n length? if (!cradix_xtmp) Error("Failed to alloc cradix_tmp"); cradix_xtmp_alloc = ustr_n; } // sorts ustr in-place by reference save ordering in the // CHARSXP. negative so as to distinguish with R's own usage. cradix_r(ustr, ustr_n, 0); for (int i = 0; i != ustr_n; ++i) SET_TRLEN(ustr[i], -i - 1); } // functions to test vectors for sortedness: isorted, dsorted and csorted // base:is.unsorted returns NA in the presence of any NA, but we need // to consider na.last, and we also return -1 if x is sorted in // _strictly_ reverse order; a common case we optimize. If a vector // is in decreasing order *with ties*, then an in-place reverse (no // sort) would result in instability of ties, so we are strict. We // also save grouping information during the check; that information // is required when sorting by multiple arguments. // TO DO: test in big steps first to return faster if unsortedness is // at the end (a common case of rbind'ing data to end) These are all // sequential access to x, so very quick and cache efficient. // order = 1 is ascending and order=-1 is descending; also takes care // of na.last argument with check through 'icheck' Relies on // NA_INTEGER == INT_MIN, checked in init.c static int isorted(int *x, int n) { int i = 1, j = 0; // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it // to sort routines to replace o's with 0's // no NAs ? continue to check rest of isorted - the same routine as usual if (nalast == 0) { for (int k = 0; k != n; ++k) { if (x[k] != NA_INTEGER) j++; } // INCLUDED ?? if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } if (icheck(x[1]) < icheck(x[0])) { i = 2; while (i < n && icheck(x[i]) < icheck(x[i - 1])) i++; // strictly opposite to expected 'order', no ties; if (i == n) { mpush(1, n); return (-1); } // e.g. no more than one NA at the beginning/end (for order=-1/1) else return (0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i != n; ++i) { if (icheck(x[i]) < icheck(x[i - 1])) { gsngrp[flip] = old; return (0); } if (x[i] == x[i - 1]) tt++; else { push(tt); tt = 1; } } push(tt); // same as 'order', NAs at the beginning for order=1, at end for // order=-1, possibly with ties return(1); } // order=1 is ascending and -1 is descending // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) (in twiddle) static int dsorted(double *x, int n) { int i = 1, j = 0; unsigned long long prev, this; if (nalast == 0) { // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it to sort routines to // replace o's with 0's // no NAs ? continue to check the rest of isorted - // the same routine as usual for (int k = 0; k != n; ++k) { if (!is_nan(x, k)) j++; } // INCLUDED ?? if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } prev = twiddle(x, 0, order); this = twiddle(x, 1, order); if (this < prev) { i = 2; prev = this; while (i < n && (this = twiddle(x, i, order)) < prev) { i++; prev = this; } if (i == n) { mpush(1, n); return (-1); } // strictly opposite of expected 'order', no ties; e.g. no // more than one NA at the beginning/end (for order=-1/1) // TO DO: improve to be stable for ties in reverse else return(0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i != n; ++i) { // TO DO: once we get past -Inf, NA and NaN at the bottom, and // +Inf at the top, the middle only need be twiddled // for tolerance (worth it?) this = twiddle(x, i, order); if (this < prev) { gsngrp[flip] = old; return (0); } if (this == prev) tt++; else { push(tt); tt = 1; } prev = this; } push(tt); // exactly as expected in 'order' (1=increasing, -1=decreasing), // possibly with ties return (1); } // order=1 is ascending and -1 is descending // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) static int csorted(SEXP *x, int n) { int i = 1, j = 0, tmp; if (nalast == 0) { // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it to sort routines // to replace o's with 0's // no NAs ? continue to check the rest of isorted - // the same routine as usual for (int k = 0; k != n; ++k) { if (x[k] != NA_STRING) j++; } // INCLUDED ?? if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } if (StrCmp2(x[1], x[0]) < 0) { i = 2; while (i < n && StrCmp2(x[i], x[i - 1]) < 0) i++; if (i == n) { mpush(1, n); return (-1); } // strictly opposite of expected 'order', no ties; // e.g. no more than one NA at the beginning/end (for order=-1/1) else return (0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i != n; ++i) { tmp = StrCmp2(x[i], x[i - 1]); if (tmp < 0) { gsngrp[flip] = old; return (0); } if (tmp == 0) tt++; else { push(tt); tt = 1; } } push(tt); // exactly as expected in 'order', possibly with ties return (1); } static void isort(int *x, int *o, int n) { if (n <= 2) { // nalast = 0 and n == 2 (check bottom of this file for explanation) if (nalast == 0 && n == 2) { if (o[0] == -1) { o[0] = 1; o[1] = 2; } for (int i = 0; i != n; ++i) { if (x[i] == NA_INTEGER) o[i] = 0; } // INCLUDED ?? push(1); push(1); return; } else Error("Internal error: isort received n=%d. isorted should have dealt with this (e.g. as a reverse sorted vector) already",n); } if (n < N_SMALL && o[0] != -1 && nalast != 0) { // see comment above in iradix_r on N_SMALL=200. /* if not o[0] then can't just populate with 1:n here, since x is changed by ref too (so would need to be copied). */ /* pushes inside too. Changes x and o by reference, so not suitable in first arg when o hasn't been populated yet and x is an actual argument (hence check on o[0]). */ if (order != 1 || nalast != -1) // so that default case, i.e., order=1, nalast=FALSE will // not be affected (ex: `setkey`) for (int i = 0; i != n; ++i) x[i] = icheck(x[i]); iinsert(x, o, n); } else { /* Tighter range (e.g. copes better with a few abormally large values in some groups), but also, when setRange was once at arg level that caused an extra scan of (long) x first. 10,000 calls to setRange takes just 0.04s i.e. negligible. */ setRange(x, n); if (range == NA_INTEGER) Error("Internal error: isort passed all-NA. isorted should have caught this before this point"); int *target = (o[0] != -1) ? newo : o; // was range < 10000 for subgroups, but 1e5 for the first // arg, tried to generalise here. 1e4 rather than 1e5 here // because iterated was (thisgrpn < 200 || range > 20000) then // radix a short vector with large range can bite icount when // iterated (BLOCK 4 and 6) if (range <= N_RANGE && range <= n) { icount(x, target, n); } else { iradix(x, target, n); } } } static void dsort(double *x, int *o, int n) { if (n <= 2) { if (nalast == 0 && n == 2) { // don't have to twiddle here.. at least one will be NA // and 'n' WILL BE 2. if (o[0] == -1) { o[0] = 1; o[1] = 2; } for (int i = 0; i != n; ++i) { if (is_nan(x, i)) o[i] = 0; } // INCLUDED ?? push(1); push(1); return; } Error("Internal error: dsort received n=%d. dsorted should have dealt with this (e.g. as a reverse sorted vector) already",n); } if (n < N_SMALL && o[0] != -1 && nalast != 0) { // see comment above in iradix_r re N_SMALL=200, and isort for o[0] for (int i = 0; i != n; ++i) ((unsigned long long *)x)[i] = twiddle(x, i, order); // have to twiddle here anyways, can't speed up default case // like in isort dinsert((unsigned long long *)x, o, n); } else { dradix((unsigned char *) x, (o[0] != -1) ? newo : o, n); } } /* // SEXP attribute_hidden DT_radixsort(SEXP args) SEXP DT_radixsort(SEXP args) { int n = -1, narg = 0, ngrp, tmp, *osub, thisgrpn; R_xlen_t nl = n; Rboolean isSorted = TRUE, retGrp; void *xd; int *o = NULL; // ML: FIXME: Here are just two of the dangerous assumptions here if (sizeof(int) != 4) { error("radix sort assumes sizeof(int) == 4"); } if (sizeof(double) != 8) { error("radix sort assumes sizeof(double) == 8"); } nalast = (asLogical(CAR(args)) == NA_LOGICAL) ? 0 : (asLogical(CAR(args)) == TRUE) ? 1 : -1; // 1=TRUE, -1=FALSE, 0=NA args = CDR(args); SEXP decreasing = CAR(args); args = CDR(args); // If TRUE, return starts of runs of identical values + max group size. retGrp = asLogical(CAR(args)); args = CDR(args); // If FALSE, get order of strings in appearance order. Essentially // abuses the CHARSXP table to group strings without hashing // them. Only makes sense when retGrp=TRUE. sortStr = asLogical(CAR(args)); args = CDR(args); */ SEXP Cradixsort(SEXP NA_last, SEXP decreasing, SEXP RETstrt, SEXP RETgs, SEXP SORTStr, SEXP args) { int n = -1, narg = 0, ngrp, tmp, *osub, thisgrpn; R_xlen_t nl = n; Rboolean isSorted = TRUE, retGrp, retStarts; void *xd; int *o = NULL; // ML: FIXME: Here are just two of the dangerous assumptions here if (sizeof(int) != 4) { error("radix sort assumes sizeof(int) == 4"); } if (sizeof(double) != 8) { error("radix sort assumes sizeof(double) == 8"); } nalast = (asLogical(NA_last) == NA_LOGICAL) ? 0 : (asLogical(NA_last) == TRUE) ? 1 : -1; // 1=TRUE, -1=FALSE, 0=NA retStarts = asLogical(RETstrt); retGrp = retStarts || asLogical(RETgs); sortStr = asLogical(SORTStr); /* When grouping, we round off doubles to account for imprecision */ // setNumericRounding(0); // before: retGrp ? 2 : 0 if (args == R_NilValue) return R_NilValue; if (isVector(CAR(args))) nl = XLENGTH(CAR(args)); for (SEXP ap = args; ap != R_NilValue; ap = CDR(ap), narg++) { if (!isVector(CAR(ap))) error("argument %d is not a vector", narg + 1); //Rprintf("%d, %d\n", XLENGTH(CAR(ap)), nl); if (XLENGTH(CAR(ap)) != nl) error("argument lengths differ"); } if (narg != length(decreasing)) error("length(decreasing) must match the number of order arguments"); for (int i = 0; i != narg; ++i) { if (LOGICAL(decreasing)[i] == NA_LOGICAL) error("'decreasing' elements must be TRUE or FALSE"); } order = asLogical(decreasing) ? -1 : 1; SEXP x = CAR(args); args = CDR(args); // (ML) FIXME: need to support long vectors if (nl > INT_MAX) { error("long vectors not supported"); } n = (int) nl; // upper limit for stack size (all size 1 groups). We'll detect // and avoid that limit, but if just one non-1 group (say 2), that // can't be avoided. gsmaxalloc = n; // once for the result, needs to be length n. // TO DO: save allocation if NULL is returned (isSorted = =TRUE) so // [i|c|d]sort know they can populate o directly with no working // memory needed to reorder existing order had to repace this from // '0' to '-1' because 'nalast = 0' replace 'o[.]' with 0 values. SEXP ans = PROTECT(allocVector(INTSXP, n)); o = INTEGER(ans); if (n > 0) o[0] = -1; xd = DPTR(x); stackgrps = narg > 1 || retGrp; if (TYPEOF(x) == STRSXP) { checkEncodings(x); } savetl_init(); // from now on use Error not error. switch (TYPEOF(x)) { case INTSXP: case LGLSXP: tmp = isorted(xd, n); break; case REALSXP : twiddle = &dtwiddle; is_nan = &dnan; tmp = dsorted(xd, n); break; case STRSXP : tmp = csorted(xd, n); break; default : Error("First arg is type '%s', not yet supported", type2char(TYPEOF(x))); } if (tmp) { // -1 or 1. NEW: or -2 in case of nalast == 0 and all NAs if (tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) isSorted = TRUE; for (int i = 0; i != n; ++i) o[i] = i + 1; } else if (tmp == -1) { // -1 (or -n for result of strcmp), strictly opposite to // -expected 'order' isSorted = FALSE; for (int i = 0; i != n; ++i) o[i] = n - i; } else if (nalast == 0 && tmp == -2) { // happens only when nalast=NA/0. Means all NAs, replace // with 0's therefore! isSorted = FALSE; for (int i = 0; i != n; ++i) o[i] = 0; } } else { isSorted = FALSE; switch (TYPEOF(x)) { case INTSXP: case LGLSXP: isort(xd, o, n); break; case REALSXP : dsort(xd, o, n); break; case STRSXP : if (sortStr) { csort_pre(xd, n); alloc_csort_otmp(n); csort(xd, o, n); } else cgroup(xd, o, n); break; default: Error ("Internal error: previous default should have caught unsupported type"); } } int maxgrpn = gsmax[flip]; // biggest group in the first arg void *xsub = NULL; // , *xsubaddr = NULL; // local // int (*f) (); // void (*g) (); int fgtype; if (narg > 1 && gsngrp[flip] < n) { // double is the largest type, 8 xsub = (void *) malloc(maxgrpn * sizeof(double)); // xsubaddr = xsub; // Needed to get back location... if (xsub == NULL) Error("Couldn't allocate xsub in do_radixsort, requested %d * %d bytes.", maxgrpn, (int)sizeof(double)); // global variable, used by isort, dsort, sort and cgroup newo = (int *) malloc(maxgrpn * sizeof(int)); if (newo == NULL) Error("Couldn't allocate newo in do_radixsort, requested %d * %d bytes.", maxgrpn, (int)sizeof(int)); } for (int col = 2; col <= narg; col++) { x = CAR(args); args = CDR(args); xd = DPTR(x); ngrp = gsngrp[flip]; if (ngrp == n && nalast != 0) break; flipflop(); stackgrps = col != narg || retGrp; order = LOGICAL(decreasing)[col - 1] ? -1 : 1; switch (TYPEOF(x)) { case INTSXP: case LGLSXP: // f = &isorted; // g = &isort; fgtype = 1; break; case REALSXP: twiddle = &dtwiddle; is_nan = &dnan; // f = &dsorted; // g = &dsort; fgtype = 2; break; case STRSXP: // f = &csorted; fgtype = 3; if (sortStr) { csort_pre(xd, n); alloc_csort_otmp(gsmax[1 - flip]); // g = &csort; } // no increasing/decreasing order required if sortStr = FALSE, // just a dummy argument else { // g = &cgroup; fgtype = 4; } break; default: Error("Arg %d is type '%s', not yet supported", col, type2char(TYPEOF(x))); } int i = 0; for (int grp = 0; grp != ngrp; ++grp) { thisgrpn = gs[1 - flip][grp]; if (thisgrpn == 1) { if (nalast == 0) { // this edge case had to be taken care of // here.. (see the bottom of this file for // more explanation) switch (TYPEOF(x)) { case INTSXP: if (INTEGER(x)[o[i] - 1] == NA_INTEGER) { isSorted = FALSE; o[i] = 0; } break; case LGLSXP: if (LOGICAL(x)[o[i] - 1] == NA_LOGICAL) { isSorted = FALSE; o[i] = 0; } break; case REALSXP: if (ISNAN(REAL(x)[o[i] - 1])) { isSorted = FALSE; o[i] = 0; } break; case STRSXP: if (STRING_ELT(x, o[i] - 1) == NA_STRING) { isSorted = FALSE; o[i] = 0; } break; default : Error("Internal error: previous default should have caught unsupported type"); } } i++; push(1); continue; } osub = o+i; // ** TO DO **: if isSorted, we can just point xsub // into x directly. If (*f)() returns 0, // though, will have to copy x at that point // When doing this, xsub could be allocated at // that point for the first time. // -> Implementing this: if(isSorted) { // xsub = xd+i; switch(TYPEOF(x)) { case STRSXP: { // memcpy((SEXP *)xsub, (SEXP *)xd+i, thisgrpn * sizeof(SEXP)); break; // memcpy does not work for SEXP !! SEXP *pxsub = (SEXP *)xsub, *pxd = (SEXP *)xd+i; for(int j = 0; j != thisgrpn; ++j) pxsub[j] = pxd[j]; } break; case REALSXP: memcpy((double *)xsub, (double *)xd+i, thisgrpn * sizeof(double)); break; default: memcpy((int *)xsub, (int *)xd+i, thisgrpn * sizeof(int)); break; } i += thisgrpn; } else switch(TYPEOF(x)) { case STRSXP: { SEXP *pxsub = (SEXP *)xsub, *pxd = (SEXP *)xd-1; for(int j = 0; j != thisgrpn; ++j) pxsub[j] = pxd[o[i++]]; } break; case REALSXP: { double *pxsub = (double *)xsub, *pxd = (double *)xd-1; for (int j = 0; j != thisgrpn; ++j) pxsub[j] = pxd[o[i++]]; } break; default: { int *pxsub = (int *)xsub, *pxd = (int *)xd-1; for (int j = 0; j != thisgrpn; ++j) pxsub[j] = pxd[o[i++]]; } } // continue; // BASELINE short circuit timing // point. Up to here is the cost of creating xsub. // [i|d|c]sorted(); very low cost, sequential // tmp = (*f)(xsub, thisgrpn); switch(fgtype) { case 1: tmp = isorted(xsub, thisgrpn); break; case 2: tmp = dsorted(xsub, thisgrpn); break; case 3: case 4: tmp = csorted(xsub, thisgrpn); } if (tmp) { // if(isSorted) xsub = xsubaddr; // need to reset here as well... // *sorted will have already push()'d the groups if (tmp == -1) { isSorted = FALSE; for (int k = 0, q; k < thisgrpn / 2; k++) { // reverse the order in-place using no // function call or working memory // isorted only returns -1 for // _strictly_ decreasing order, // otherwise ties wouldn't be stable q = thisgrpn - 1 - k; tmp = osub[k]; osub[k] = osub[q]; osub[q] = tmp; } } else if (nalast == 0 && tmp == -2) { // all NAs, replace osub[.] with 0s. isSorted = FALSE; for (int k = 0; k != thisgrpn; ++k) osub[k] = 0; } continue; } // else if(isSorted) { // Need to copy now, because isort, dsort etc modify the data... // switch(TYPEOF(x)) { // case REALSXP: memcpy((double *)xsubaddr, (double *)xsub, thisgrpn * sizeof(double)); break; // default: memcpy((int *)xsubaddr, (int *)xsub, thisgrpn * sizeof(int)); break; // } // xsub = xsubaddr; // } isSorted = FALSE; // nalast=NA will result in newo[0] = 0. So had to change to -1. newo[0] = -1; // may update osub directly, or if not will put the // result in global newo // (*g)(xsub, osub, thisgrpn); switch(fgtype) { case 1: isort(xsub, osub, thisgrpn); break; case 2: dsort(xsub, osub, thisgrpn); break; case 3: csort(xsub, osub, thisgrpn); break; case 4: cgroup(xsub, osub, thisgrpn); break; } if (newo[0] != -1) { int *pxsub = (int *)xsub; if (nalast != 0) { for (int j = 0; j != thisgrpn; ++j) // reuse xsub to reorder osub pxsub[j] = osub[newo[j] - 1]; } else { for (int j = 0; j != thisgrpn; ++j) // final nalast case to handle! pxsub[j] = (newo[j] == 0) ? 0 : osub[newo[j] - 1]; } memcpy(osub, xsub, thisgrpn * sizeof(int)); } } } if (!sortStr && ustr_n != 0) Error("Internal error: at the end of do_radixsort sortStr == FALSE but ustr_n !=0 [%d]", ustr_n); for(int i = 0; i != ustr_n; ++i) SET_TRLEN(ustr[i], 0); maxlen = 1; // reset global. Minimum needed to count "" and NA ustr_n = 0; savetl_end(); free(ustr); ustr = NULL; ustr_alloc = 0; if (retGrp) { int maxgrpn = 0; // formerly: NA_INTEGER; ngrp = gsngrp[flip]; SEXP s_starts = retStarts ? install("starts") : install("group.sizes"); setAttrib(ans, s_starts, x = allocVector(INTSXP, ngrp)); int *px = INTEGER(x); // pointer -> http://adv-r.had.co.nz/C-interface.html if (retStarts && asLogical(RETgs)) { SEXP s_gs = install("group.sizes"); SEXP y; setAttrib(ans, s_gs, y = PROTECT(allocVector(INTSXP, ngrp))); // coerceVector(gs[flip], INTSXP)); Does not work, gs is integer array int *py = INTEGER(y); if (ngrp > 0) { int ngm1 = ngrp-1; px[0] = 1; py[ngm1] = gs[flip][ngm1]; for (int i = 0; i != ngm1; ++i) { py[i] = gs[flip][i]; px[i + 1] = px[i] + py[i]; } maxgrpn = gsmax[flip]; } UNPROTECT(1); // unprotects y !! } else if(retStarts) { if (ngrp > 0) { int ngm1 = ngrp-1; px[0] = 1; for (int i = 0; i != ngm1; ++i) { px[i + 1] = px[i] + gs[flip][i]; } maxgrpn = gsmax[flip]; } } else { if (ngrp > 0) { for (int i = 0; i != ngrp; ++i) { px[i] = gs[flip][i]; } maxgrpn = gsmax[flip]; } } SEXP s_maxgrpn = install("maxgrpn"); setAttrib(ans, s_maxgrpn, ScalarInteger(maxgrpn)); // Attribute indicating whether the vector was sorted !! // SEXP s_sorted = install("sorted"); // setAttrib(ans, s_sorted, ScalarLogical(isSorted)); // SEXP nms; // PROTECT(nms = allocVector(STRSXP, 2)); // SET_STRING_ELT(nms, 0, mkChar("grouping")); // SET_STRING_ELT(nms, 1, mkChar("integer")); // setAttrib(ans, R_ClassSymbol, nms); // UNPROTECT(1); } // Attribute indicating whether the vector was sorted !! -> always attach SEXP s_sorted = install("sorted"); setAttrib(ans, s_sorted, ScalarLogical(isSorted)); Rboolean dropZeros = !retGrp && !isSorted && nalast == 0; if (dropZeros) { int zeros = 0; for (int i = 0; i != n; ++i) { if (o[i] == 0) zeros++; } if (zeros > 0) { PROTECT(ans = allocVector(INTSXP, n - zeros)); int *o2 = INTEGER(ans); for (int i = 0, i2 = 0; i != n; ++i) { if (o[i] > 0) o2[i2++] = o[i]; } UNPROTECT(1); } } gsfree(); free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; free(xsub); free(newo); xsub=newo=NULL; free(xtmp); xtmp=NULL; xtmp_alloc=0; free(otmp); otmp=NULL; otmp_alloc=0; free(csort_otmp); csort_otmp=NULL; csort_otmp_alloc=0; free(cradix_counts); cradix_counts=NULL; cradix_counts_alloc=0; free(cradix_xtmp); cradix_xtmp=NULL; cradix_xtmp_alloc=0; // TO DO: use xtmp already got UNPROTECT(1); return ans; } // Get the order of a single numeric column. Used internally for weighted quantile computations. // Similar to C API Function R_orderVector1() but 1 indexed. // Note that due to reliance on global variables defined in this script, that are modified // in the sorting subroutines, neither this function nor the following two are safe to multithreading. void num1radixsort(int *o, Rboolean NA_last, Rboolean decreasing, SEXP x) { int n = -1, tmp; R_xlen_t nl = n; void *xd; nalast = (NA_last) ? 1 : -1; // 1=TRUE, -1=FALSE if(!isVector(x)) error("x is not a vector"); nl = XLENGTH(x); order = (decreasing) ? -1 : 1; if (nl > INT_MAX) error("long vectors not supported"); n = (int) nl; // upper limit for stack size (all size 1 groups). We'll detect // and avoid that limit, but if just one non-1 group (say 2), that // can't be avoided. gsmaxalloc = n; if (n > 0) o[0] = -1; xd = DPTR(x); switch(TYPEOF(x)) { case INTSXP: case LGLSXP: tmp = isorted(xd, n); break; case REALSXP : twiddle = &dtwiddle; is_nan = &dnan; tmp = dsorted(xd, n); break; default : error("First arg is type '%s', not yet supported", type2char(TYPEOF(x))); } // only needed for multiple columns or grouping stackgrps = FALSE; if (tmp) { // -1 or 1. if (tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) for (int i = 0; i != n; ++i) o[i] = i + 1; } else if (tmp == -1) { // -1 strictly opposite to -expected 'order' for (int i = 0; i != n; ++i) o[i] = n - i; } } else { switch (TYPEOF(x)) { case INTSXP: case LGLSXP: isort(xd, o, n); break; case REALSXP : dsort(xd, o, n); break; default: error("Internal error: previous default should have caught unsupported type"); } } // maxlen = 1; // Only needed for strings... gsfree(); // Needed !! free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; // free(newo); newo=NULL; // not needed if only one column free(xtmp); xtmp=NULL; xtmp_alloc=0; free(otmp); otmp=NULL; otmp_alloc=0; } // Also provide separate versions for integers and doubles: to order matrix columns in fnth.matrix() with weights void iradixsort(int *o, Rboolean NA_last, Rboolean decreasing, int n, int *x) { nalast = (NA_last) ? 1 : -1; // 1=TRUE, -1=FALSE order = (decreasing) ? -1 : 1; gsmaxalloc = n; // upper limit for stack size (all size 1 groups). We'll detect and avoid that limit, but if just one non-1 group (say 2), that can't be avoided. if (n > 0) o[0] = -1; int tmp = isorted(x, n); stackgrps = FALSE; // only needed for multiple columns or grouping if(tmp) { // -1 or 1. if(tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) for(int i = 0; i != n; ++i) o[i] = i + 1; } else if (tmp == -1) { // -1 strictly opposite to -expected 'order' for(int i = 0; i != n; ++i) o[i] = n - i; } } else isort(x, o, n); gsfree(); // Needed !! free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; free(xtmp); xtmp=NULL; xtmp_alloc=0; free(otmp); otmp=NULL; otmp_alloc=0; } void dradixsort(int *o, Rboolean NA_last, Rboolean decreasing, int n, double *x) { nalast = (NA_last) ? 1 : -1; // 1=TRUE, -1=FALSE order = (decreasing) ? -1 : 1; gsmaxalloc = n; // upper limit for stack size (all size 1 groups). We'll detect and avoid that limit, but if just one non-1 group (say 2), that can't be avoided. if (n > 0) o[0] = -1; twiddle = &dtwiddle; is_nan = &dnan; int tmp = dsorted(x, n); stackgrps = FALSE; // only needed for multiple columns or grouping if(tmp) { // -1 or 1. if(tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) for(int i = 0; i != n; ++i) o[i] = i + 1; } else if (tmp == -1) { // -1 strictly opposite to -expected 'order' for(int i = 0; i != n; ++i) o[i] = n - i; } } else dsort(x, o, n); gsfree(); // Needed !! free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; free(xtmp); xtmp=NULL; xtmp_alloc=0; free(otmp); otmp=NULL; otmp_alloc=0; } collapse/src/fvar_fsd.cpp0000644000176200001440000017447315130364767015175 0ustar liggesusers#include using namespace Rcpp; #if R_VERSION < R_Version(4, 5, 0) # define ANY_ATTRIB(x) (ATTRIB(x) != R_NilValue) #endif // Note: More comments are in fvar.cpp (C++ folder, not on GitHub) // [[Rcpp::export]] NumericVector fvarsdCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool stable_algo = true, bool sd = true) { int l = x.size(); if(l < 2) return Rf_ScalarReal(NA_REAL); // Prevents seqfault for numeric(0) #101 if(stable_algo) { // WELFORDS ONLINE METHOD --------------------------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { double n = 0, mean = 0, d1 = 0, M2 = 0; if(narm) { int j = l-1; while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; mean += d1 * (1 / ++n); M2 += d1*(x[i]-mean); } M2 = M2/(n-1); if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; } else M2 = NA_REAL; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { M2 = NA_REAL; break; } else { d1 = x[i]-mean; mean += d1*(1 / ++n); M2 += d1*(x[i]-mean); } } M2 = M2/(l-1); if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) { SEXP out = Rf_ScalarReal(M2); Rf_copyMostAttrib(x, out); return out; } else return Rf_ScalarReal(M2); } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; if(narm) { NumericVector M2(ng, NA_REAL), mean(ng), n(ng, 1.0); // NumericVector mean = no_init_vector(ng); // better for valgrind for(int i = l; i--; ) { if(std::isnan(x[i])) continue; if(std::isnan(M2[g[i]-1])) { mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(n[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= n[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, M2); return M2; } else { NumericVector M2(ng), mean(ng), n(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(n[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= n[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, M2); return M2; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if(ng == 0) { // long double sumw = 0, mean = 0, M2 = 0, d1 = 0; double sumw = 0, mean = 0, M2 = 0, d1 = 0; if(narm) { int j = l-1; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; // additional check to skip 0 weights has practically zero cost.. sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } M2 /= sumw-1; if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; } else M2 = NA_REAL; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { M2 = NA_REAL; break; } else { if(wg[i] == 0) continue; // This is necessary to prevent 0 starting weights which will render the mean infinite. Has little computational cost. sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } } M2 /= sumw-1; if(sd) M2 = sqrt(M2); if(std::isnan(M2)) M2 = NA_REAL; } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) { SEXP out = Rf_ScalarReal(M2); Rf_copyMostAttrib(x, out); return out; } else return Rf_ScalarReal(M2); } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; if(narm) { NumericVector M2(ng, NA_REAL), sumw(ng), mean(ng); // better for valgrind // NumericVector sumw = no_init_vector(ng), mean = no_init_vector(ng); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2[g[i]-1])) { sumw[g[i]-1] = wg[i]; mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(sumw[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= sumw[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, M2); return M2; } else { NumericVector M2(ng), sumw(ng), mean(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i]) || std::isnan(wg[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { if(wg[i] == 0) continue; // This is necessary to prevent 0 starting weights which will render the mean infinite. Has little computational cost. sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] = sqrt(M2[i]/(sumw[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) M2[i] = NA_REAL; else { M2[i] /= sumw[i]-1; if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, M2); return M2; } } } } else { // ONE-PASS METHOD --------------------------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { long double sum = 0, sq_sum = 0; if(narm) { int j = l-1, n = 1; sum = x[j]; while(std::isnan(sum) && j!=0) sum = x[--j]; sq_sum = sum*sum; if(j != 0) { for(int i = j; i--; ) { if(std::isnan(x[i])) continue; sum += x[i]; sq_sum += pow(x[i],2); ++n; } sq_sum = (sq_sum - pow(sum/n,2)*n)/(n-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; } else sq_sum = NA_REAL; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { sq_sum = NA_REAL; break; } else { sum += x[i]; sq_sum += pow(x[i],2); } } sq_sum = (sq_sum - pow(sum/l,2)*l)/(l-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) { SEXP out = Rf_ScalarReal((double)sq_sum); Rf_copyMostAttrib(x, out); return out; } else return Rf_ScalarReal((double)sq_sum); } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sq_sum(ng, NA_REAL), sum(ng); // better for valgrind // NumericVector sum = no_init_vector(ng); IntegerVector n(ng, 1); for(int i = l; i--; ) { if(std::isnan(x[i])) continue; if(std::isnan(sq_sum[g[i]-1])) { sum[g[i]-1] = x[i]; sq_sum[g[i]-1] = pow(x[i],2); } else { sum[g[i]-1] += x[i]; sq_sum[g[i]-1] += pow(x[i],2); ++n[g[i]-1]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/n[i],2)*n[i])/(n[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/n[i],2)*n[i])/(n[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericVector sq_sum(ng), sum(ng); // IntegerVector gsv = no_init_vector(ng); // no problem but this is better for valgrind IntegerVector gsv = (Rf_isNull(gs)) ? IntegerVector(ng) : as(gs); int ngs = 0; if(Rf_isNull(gs)) { // gsv = IntegerVector(ng); // std::fill(gsv.begin(), gsv.end(), 0); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(std::isnan(sq_sum[g[i]-1])) continue; sq_sum[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sum[g[i]-1] += x[i]; sq_sum[g[i]-1] += pow(x[i],2); ++gsv[g[i]-1]; } } } else { // gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(std::isnan(sq_sum[g[i]-1])) continue; sq_sum[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sum[g[i]-1] += x[i]; sq_sum[g[i]-1] += pow(x[i],2); } } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/gsv[i],2)*gsv[i])/(gsv[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if(ng == 0) { long double sum = 0, sumw = 0, sq_sum = 0; if(narm) { int j = l-1; while((std::isnan(x[j]) || std::isnan(wg[j])) && j!=0) --j; sumw = wg[j], sum = x[j]*sumw, sq_sum = sum*x[j]; if(j != 0) { for(int i = j; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; sum += x[i]*wg[i]; sumw += wg[i]; sq_sum += pow(x[i],2)*wg[i]; } sq_sum = (sq_sum - pow(sum/sumw,2)*sumw)/(sumw-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; } else sq_sum = NA_REAL; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { sq_sum = NA_REAL; break; } else { sum += x[i]*wg[i]; sumw += wg[i]; sq_sum += pow(x[i],2)*wg[i]; } } sq_sum = (sq_sum - pow(sum/sumw,2)*sumw)/(sumw-1); if(sd) sq_sum = sqrt(sq_sum); if(std::isnan(sq_sum)) sq_sum = NA_REAL; } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) { SEXP out = Rf_ScalarReal((double)sq_sum); Rf_copyMostAttrib(x, out); return out; } else return Rf_ScalarReal((double)sq_sum); } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sq_sum(ng, NA_REAL), sumw(ng), sum(ng); // better for valgrind // NumericVector sumw = no_init_vector(ng), sum = no_init_vector(ng); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; if(std::isnan(sq_sum[g[i]-1])) { sum[g[i]-1] = x[i]*wg[i]; sumw[g[i]-1] = wg[i]; sq_sum[g[i]-1] = pow(x[i],2)*wg[i]; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; sq_sum[g[i]-1] += pow(x[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericVector sq_sum(ng), sumw(ng), sum(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(sq_sum[g[i]-1])) continue; if(std::isnan(x[i]) || std::isnan(wg[i])) { sq_sum[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; sq_sum[g[i]-1] += pow(x[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = sqrt((sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1)); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sum[i])) continue; sq_sum[i] = (sq_sum[i] - pow(sum[i]/sumw[i],2)*sumw[i])/(sumw[i]-1); if(std::isnan(sq_sum[i])) sq_sum[i] = NA_REAL; } } if(ANY_ATTRIB(x) && !(Rf_isObject(x) && Rf_inherits(x, "ts"))) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } } } // [[Rcpp::export]] SEXP fvarsdmCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool stable_algo = true, bool sd = true, bool drop = true) { int l = x.nrow(), col = x.ncol(); if(stable_algo) { // WELFORDS ONLINE METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } M2i /= ni-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { M2i = NA_REAL; break; } else { d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } } M2i /= l-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix M2 = no_init_matrix(ng, col); std::fill(M2.begin(), M2.end(), NA_REAL); NumericVector meanj(ng), nj(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); double d1j = 0; // , meanj[ng], nj[ng]; // NumericVector meanj = no_init_vector(ng), nj = no_init_vector(ng); // better for valgrind for(int i = l; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; nj[g[i]-1] = 1; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } else { NumericMatrix M2(ng, col); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); std::vector meanj(ng), nj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2i = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups and weights if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix M2 = no_init_matrix(ng, col); std::fill(M2.begin(), M2.end(), NA_REAL); NumericVector meanj(ng), sumwj(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); double d1j = 0; // meanj[ng], sumwj[ng]; // NumericVector meanj = no_init_vector(ng), sumwj = no_init_vector(ng); better for valgrind for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } else { NumericMatrix M2(ng, col); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column M2j = M2( _ , j); std::vector meanj(ng), sumwj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } } colnames(M2) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, M2); return M2; } } } } else { // ONE-PASS METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1, nj = 1; long double sumj = column[k], sq_sumj = 0; while(std::isnan(sumj) && k!=0) sumj = column[--k]; sq_sumj = sumj*sumj; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; sq_sumj += pow(column[i],2); ++nj; } sq_sumj = (sq_sumj-pow(sumj/nj,2)*nj)/(nj-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; out[j] = (double)sq_sumj; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); long double sumj = 0, sq_sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { sq_sumj = NA_REAL; break; } else { sumj += column[i]; sq_sumj += pow(column[i],2); } } if(!std::isnan(sq_sumj)) { sq_sumj = (sq_sumj-pow(sumj/l,2)*l)/(l-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; } out[j] = (double)sq_sumj; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix sq_sum = no_init_matrix(ng, col); std::fill(sq_sum.begin(), sq_sum.end(), NA_REAL); // better for valgrind (although no error) NumericVector sumj(ng); // = no_init_vector(ng); // double sumj[ng]; IntegerVector nj(ng); // = no_init_vector(ng); // int nj[ng]; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); for(int i = l; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; sq_sumj[g[i]-1] = pow(column[i],2); nj[g[i]-1] = 1; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++nj[g[i]-1]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/nj[i],2)*nj[i])/(nj[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericMatrix sq_sum(ng, col); if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); std::vector gsv(ng); // memset(gsv, 0, memsize); std::vector sumj(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++gsv[g[i]-1]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); std::vector sumj(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { NumericVector out = no_init_vector(col); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; long double sumwj = wg[k], sumj = column[k]*sumwj, sq_sumj = column[k]*sumj; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwj += wg[i]; sq_sumj += pow(column[i],2)*wg[i]; } sq_sumj = (sq_sumj - pow(sumj/sumwj,2)*sumwj)/(sumwj-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; out[j] = (double)sq_sumj; } else out[j] = NA_REAL; } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); long double sumj = 0, sumwj = 0, sq_sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumj = NA_REAL; break; } else { sumj += column[i]*wg[i]; sumwj += wg[i]; sq_sumj += pow(column[i],2)*wg[i]; } } if(!std::isnan(sq_sumj)) { sq_sumj = (sq_sumj - pow(sumj/sumwj,2)*sumwj)/(sumwj-1); if(sd) sq_sumj = sqrt(sq_sumj); if(std::isnan(sq_sumj)) sq_sumj = NA_REAL; } out[j] = (double)sq_sumj; } } if(drop) Rf_setAttrib(out, R_NamesSymbol, colnames(x)); else { Rf_dimgets(out, Dimension(1, col)); colnames(out) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, out); } return out; } else { // with groups and weights if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericMatrix sq_sum = no_init_matrix(ng, col); std::fill(sq_sum.begin(), sq_sum.end(), NA_REAL); // better for valgrind (although no error) NumericVector sumj(ng), sumwj(ng); // double sumj[ng], sumwj[ng]; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); // NumericVector sumj = no_init_vector(ng), sumwj = no_init_vector(ng); // double sumj[ng], sumwj[ng]; for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; sq_sumj[g[i]-1] = pow(column[i],2)*wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } else { NumericMatrix sq_sum(ng, col); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column sq_sumj = sq_sum( _ , j); std::vector sumj(ng), sumwj(ng); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(sq_sumj[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumj[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(sd) sq_sumj[i] = sqrt(sq_sumj[i]); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } colnames(sq_sum) = colnames(x); if(!Rf_isObject(x)) Rf_copyMostAttrib(x, sq_sum); return sq_sum; } } } } } // [[Rcpp::export]] SEXP fvarsdlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, bool stable_algo = true, bool sd = true, bool drop = true) { int l = x.size(); if(stable_algo) { // WELFORDS ONLINE METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int k = column.size()-1; // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } M2i /= ni-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); // double ni = 0; // long double meani = 0, d1i = 0, M2i = 0; double ni = 0, meani = 0, d1i = 0, M2i = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { M2i = NA_REAL; break; } else { d1i = column[i]-meani; meani += d1i * (1 / ++ni); M2i += d1i*(column[i]-meani); } } M2i /= row-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { // With groups List out(l); int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng, NA_REAL), nj(ng, 1.0), meanj(ng); // better for valgrind // = no_init_vector(ng); double d1j = 0; // meanj[ng] // std::vector nj(ng, 1.0); for(int i = gss; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng); std::vector meanj(ng), nj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { d1j = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1j * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1j*(column[i]-meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(nj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= nj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); int k = wgs-1; // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); // long double sumwi = 0, meani = 0, M2i = 0, d1i = 0; double sumwi = 0, meani = 0, M2i = 0, d1i = 0; for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2i = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwi += wg[i]; d1i = column[i] - meani; meani += d1i * (wg[i] / sumwi); M2i += wg[i] * d1i * (column[i] - meani); } } M2i /= sumwi-1; if(sd) M2i = sqrt(M2i); if(std::isnan(M2i)) M2i = NA_REAL; out[j] = M2i; // (double)M2i; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { List out(l); int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng, NA_REAL), meanj(ng), sumwj(ng); // better for valgrind //= no_init_vector(ng), sumwj = no_init_vector(ng); double d1j = 0; // , sumwj[ng], meanj[ng]; for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector M2j(ng); std::vector sumwj(ng), meanj(ng); double d1j = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1j = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1j * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1j * (column[i] - meanj[g[i]-1]); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] = sqrt(M2j[i]/(sumwj[i]-1)); if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) M2j[i] = NA_REAL; else { M2j[i] /= sumwj[i]-1; if(std::isnan(M2j[i])) M2j[i] = NA_REAL; } } } SHALLOW_DUPLICATE_ATTRIB(M2j, column); out[j] = M2j; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } } else { // ONE-PASS METHOD ------------------------------------- if(Rf_isNull(w)) { // No weights if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int k = column.size()-1, ni = 1; long double sumi = column[k], sq_sumi = 0; while(std::isnan(sumi) && k!=0) sumi = column[--k]; sq_sumi = sumi*sumi; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumi += column[i]; sq_sumi += pow(column[i],2); ++ni; } sq_sumi = (sq_sumi-pow(sumi/ni,2)*ni)/(ni-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; out[j] = (double)sq_sumi; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; long double sumi = 0, sq_sumi = 0; int row = column.size(); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { sq_sumi = NA_REAL; break; } else { sumi += column[i]; sq_sumi += pow(column[i],2); } } if(!std::isnan(sq_sumi)) { sq_sumi = (sq_sumi - pow(sumi/row,2)*row)/(row-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; } out[j] = (double)sq_sumi; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { // With groups List out(l); int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng, NA_REAL), sumj(ng); // better for valgrind // = no_init_vector(ng); // double sumj[ng]; std::vector nj(ng, 1); for(int i = gss; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; sq_sumj[g[i]-1] = pow(column[i],2); } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++nj[g[i]-1]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/nj[i],2)*nj[i])/(nj[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/nj[i],2)*nj[i])/(nj[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng), sumj(ng); std::vector gsv(ng); // memset(gsv, 0, memsize); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); ++gsv[g[i]-1]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng); std::vector sumj(ng); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(column[i])) { if(std::isnan(sq_sumj[g[i]-1])) continue; sq_sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]; sq_sumj[g[i]-1] += pow(column[i],2); } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/gsv[i],2)*gsv[i])/(gsv[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if(ng == 0) { NumericVector out(l); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); int k = wgs-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; long double sumwi = wg[k], sumi = column[k]*sumwi, sq_sumi = column[k]*sumi; if(k != 0) { for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumi += column[i]*wg[i]; sumwi += wg[i]; sq_sumi += pow(column[i],2)*wg[i]; } sq_sumi = (sq_sumi - pow(sumi/sumwi,2)*sumwi)/(sumwi-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; out[j] = (double)sq_sumi; } else out[j] = NA_REAL; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(column.size() != wgs) stop("length(w) must match nrow(X)"); long double sumi = 0, sumwi = 0, sq_sumi = 0; for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumi = NA_REAL; break; } else { sumi += column[i]*wg[i]; sumwi += wg[i]; sq_sumi += pow(column[i],2)*wg[i]; } } if(!std::isnan(sq_sumi)) { sq_sumi = (sq_sumi - pow(sumi/sumwi,2)*sumwi)/(sumwi-1); if(sd) sq_sumi = sqrt(sq_sumi); if(std::isnan(sq_sumi)) sq_sumi = NA_REAL; } out[j] = (double)sq_sumi; } } if(drop) { Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } else { List res(l); for(int j = l; j--; ) { res[j] = out[j]; SHALLOW_DUPLICATE_ATTRIB(res[j], x[j]); } SHALLOW_DUPLICATE_ATTRIB(res, x); Rf_setAttrib(res, R_RowNamesSymbol, Rf_ScalarInteger(1)); return res; } } else { // With groups and weights List out(l); int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng, NA_REAL), sumj(ng), sumwj(ng); // better for valgrind // = no_init_vector(ng), sumwj = no_init_vector(ng); // double sumj[ng], sumwj[ng]; for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sq_sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; sq_sumj[g[i]-1] = pow(column[i],2)*wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector sq_sumj(ng); std::vector sumwj(ng), sumj(ng); int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(sq_sumj[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { sq_sumj[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) break; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; sq_sumj[g[i]-1] += pow(column[i],2)*wg[i]; } } if(sd) { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = sqrt((sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1)); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } else { for(int i = ng; i--; ) { if(std::isnan(sq_sumj[i])) continue; sq_sumj[i] = (sq_sumj[i] - pow(sumj[i]/sumwj[i],2)*sumwj[i])/(sumwj[i]-1); if(std::isnan(sq_sumj[i])) sq_sumj[i] = NA_REAL; } } SHALLOW_DUPLICATE_ATTRIB(sq_sumj, column); out[j] = sq_sumj; } } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_RowNamesSymbol, IntegerVector::create(NA_INTEGER, -ng)); return out; } } } } collapse/src/collapse_c.h0000644000176200001440000002276115202504365015132 0ustar liggesusers#ifndef COLLAPSE_H // Check if COLLAPSE_H is not defined #define COLLAPSE_H // Define COLLAPSE_H #ifdef _OPENMP #include #define OMP_NUM_PROCS omp_get_num_procs() #define OMP_THREAD_LIMIT omp_get_thread_limit() #define OMP_MAX_THREADS omp_get_max_threads() #else #define OMP_NUM_PROCS 1 #define OMP_THREAD_LIMIT 1 #define OMP_MAX_THREADS 1 #endif #include #include #include #include "internal/R_defn.h" #undef NISNAN #define NISNAN(x) ((x) == (x)) // opposite of ISNAN for doubles // Faster than Rinternals version (which uses math library version) #undef ISNAN #define ISNAN(x) ((x) != (x)) #ifndef ANY_ATTRIB #define ANY_ATTRIB(x) (ATTTR(x) != R_NilValue) #endif // Initialized in data.table_init.c extern int max_threads; extern SEXP sym_label; extern SEXP sym_starts; extern SEXP sym_maxgrpn; extern SEXP sym_n_groups; extern SEXP sym_group_sizes; // from base_radixsort.h (with significant modifications) SEXP Cradixsort(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); void num1radixsort(int *, Rboolean, Rboolean, SEXP); void iradixsort(int *, Rboolean, Rboolean, int, int *); void dradixsort(int *, Rboolean, Rboolean, int, double *); // from stats_mAR.c void multi_yw(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); SEXP pacf1(SEXP, SEXP); // from data.table.h (with major modifications) SEXP collapse_init(SEXP); SEXP dt_na(SEXP, SEXP, SEXP, SEXP); SEXP allNAv(SEXP, SEXP); SEXP frankds(SEXP, SEXP, SEXP, SEXP); SEXP rbindlist(SEXP, SEXP, SEXP, SEXP); SEXP setcolorder(SEXP, SEXP); SEXP subsetDT(SEXP, SEXP, SEXP, SEXP); SEXP subsetCols(SEXP, SEXP, SEXP); SEXP subsetVector(SEXP, SEXP, SEXP); void subsetVectorRaw(SEXP, SEXP, SEXP, const bool); SEXP Calloccol(SEXP); void writeValue(SEXP, SEXP, const int, const int); void writeNA(SEXP, const int, const int); // Native collapse functions void matCopyAttr(SEXP out, SEXP x, SEXP Rdrop, int ng); void DFcopyAttr(SEXP out, SEXP x, int ng); SEXP falloc(SEXP, SEXP, SEXP); SEXP frange(SEXP x, SEXP Rnarm, SEXP Rfinite); SEXP fdist(SEXP x, SEXP vec, SEXP Rret, SEXP Rnthreads); SEXP fnrowC(SEXP x); // SEXP CasChar(SEXP x); SEXP setAttributes(SEXP x, SEXP a); SEXP setattributes(SEXP x, SEXP a); // SEXP CsetAttr(SEXP object, SEXP a, SEXP v); -> mot more efficeint than attr i.e. for row.names... // void setattr(SEXP x, SEXP a, SEXP v); SEXP duplAttributes(SEXP x, SEXP y); // void duplattributes(SEXP x, SEXP y); // SEXP cond_duplAttributes(SEXP x, SEXP y); SEXP CsetAttrib(SEXP object, SEXP a); SEXP CcopyAttrib(SEXP to, SEXP from); SEXP CcopyMostAttrib(SEXP to, SEXP from); SEXP copyMostAttributes(SEXP to, SEXP from); SEXP lassign(SEXP x, SEXP s, SEXP rows, SEXP fill); SEXP gwhich_first(SEXP x, SEXP g, SEXP target); SEXP gslice_multi(SEXP g, SEXP o, SEXP Rn, SEXP first); SEXP groups2GRP(SEXP x, SEXP lx, SEXP gs); SEXP gsplit(SEXP x, SEXP gobj, SEXP toint); SEXP greorder(SEXP x, SEXP gobj); SEXP Cna_rm(SEXP x); SEXP whichv(SEXP x, SEXP val, SEXP Rinvert); SEXP anyallv(SEXP x, SEXP val, SEXP Rall); SEXP setcopyv(SEXP x, SEXP val, SEXP rep, SEXP Rinvert, SEXP Rset, SEXP Rind1); SEXP setop(SEXP x, SEXP val, SEXP op, SEXP roww); SEXP vtypes(SEXP x, SEXP isnum); SEXP vlengths(SEXP x, SEXP usenam); SEXP multiassign(SEXP lhs, SEXP rhs, SEXP envir); SEXP vlabels(SEXP x, SEXP attrn, SEXP usenam); SEXP setvlabels(SEXP x, SEXP attrn, SEXP value, SEXP ind); SEXP setnames(SEXP x, SEXP nam); SEXP Cissorted(SEXP x, SEXP strictly); SEXP groupVec(SEXP X, SEXP starts, SEXP sizes); SEXP groupAtVec(SEXP X, SEXP starts, SEXP naincl); SEXP funiqueC(SEXP x); SEXP fmatchC(SEXP x, SEXP table, SEXP nomatch, SEXP count, SEXP overid); SEXP coerce_to_equal_types(SEXP x, SEXP table); void count_match(SEXP res, int nt, int nmv); SEXP createeptr(SEXP x); SEXP geteptr(SEXP x); SEXP fcrosscolon(SEXP x, SEXP ngp, SEXP y, SEXP ckna); SEXP fwtabulate(SEXP x, SEXP w, SEXP ngp, SEXP ckna); SEXP GRP_default_drop_C(SEXP X, SEXP cols, SEXP namby, SEXP retgrp_); SEXP vecgcd(SEXP x); SEXP all_funs(SEXP x); SEXP unlock_collapse_namespace(SEXP env); void writeValueByIndex(SEXP target, SEXP source, const int from, SEXP index); SEXP pivot_long(SEXP data, SEXP ind, SEXP idcol); SEXP pivot_wide(SEXP index, SEXP id, SEXP column, SEXP fill, SEXP Rnthreads, SEXP Raggfun, SEXP Rnarm); SEXP sort_merge_join(SEXP x, SEXP table, SEXP ot, SEXP count); SEXP replace_outliers(SEXP x, SEXP limits, SEXP value, SEXP single_limit, SEXP set); SEXP na_locf(SEXP x, SEXP Rset); SEXP na_focb(SEXP x, SEXP Rset); SEXP multi_match(SEXP m, SEXP g); SEXP integer64toREAL(SEXP x); SEXP funlist(SEXP x); // fnobs rewritten in C: SEXP fnobsC(SEXP x, SEXP Rng, SEXP g); SEXP fnobsmC(SEXP x, SEXP Rng, SEXP g, SEXP Rdrop); SEXP fnobslC(SEXP x, SEXP Rng, SEXP g, SEXP Rdrop); // ffirst and flast rewritten in C: SEXP ffirstC(SEXP x, SEXP Rng, SEXP g, SEXP gst, SEXP Rnarm); SEXP ffirstmC(SEXP x, SEXP Rng, SEXP g, SEXP gst, SEXP Rnarm, SEXP Rdrop); SEXP ffirstlC(SEXP x, SEXP Rng, SEXP g, SEXP gst, SEXP Rnarm); SEXP flastC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); SEXP flastmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); SEXP flastlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); // fsum rewritten in C: SEXP fsumC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP fill, SEXP Rnthreads); SEXP fsummC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP fill, SEXP Rdrop, SEXP Rnthreads); SEXP fsumlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP fill, SEXP Rdrop, SEXP Rnthreads); // fprod rewritten in C: SEXP fprodC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm); SEXP fprodmC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop); SEXP fprodlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop); // fmean rewritten in C: SEXP fmeanC(SEXP x, SEXP Rng, SEXP g, SEXP gs, SEXP w, SEXP Rnarm, SEXP Rnthreads); SEXP fmeanmC(SEXP x, SEXP Rng, SEXP g, SEXP gs, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads); SEXP fmeanlC(SEXP x, SEXP Rng, SEXP g, SEXP gs, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads); // fmin and fmax rewritten in C: SEXP fminC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); SEXP fminmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); SEXP fminlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); SEXP fmaxC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm); SEXP fmaxmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); SEXP fmaxlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop); // Added fcumsum, written in C: SEXP fcumsumC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill); SEXP fcumsummC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill); SEXP fcumsumlC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill); // TRA, rewritten in C and extended: SEXP TRAC(SEXP x, SEXP xAG, SEXP g, SEXP Rret, SEXP Rset); SEXP TRAmC(SEXP x, SEXP xAG, SEXP g, SEXP Rret, SEXP Rset); SEXP TRAlC(SEXP x, SEXP xAG, SEXP g, SEXP Rret, SEXP Rset); // fndistinct, rewritten in C: SEXP fndistinctC(SEXP x, SEXP g, SEXP Rnarm, SEXP Rnthreads); SEXP fndistinctlC(SEXP x, SEXP g, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads); SEXP fndistinctmC(SEXP x, SEXP g, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads); // fmode, rewritten in C: SEXP fmodeC(SEXP x, SEXP g, SEXP w, SEXP Rnarm, SEXP Rret, SEXP Rnthreads); SEXP fmodelC(SEXP x, SEXP g, SEXP w, SEXP Rnarm, SEXP Rret, SEXP Rnthreads); SEXP fmodemC(SEXP x, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rret, SEXP Rnthreads); // fnth, rewritten in C: SEXP fnthC(SEXP x, SEXP p, SEXP g, SEXP w, SEXP Rnarm, SEXP Rret, SEXP Rnthreads, SEXP o, SEXP checko); SEXP fnthlC(SEXP x, SEXP p, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rret, SEXP Rnthreads); SEXP fnthmC(SEXP x, SEXP p, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rret, SEXP Rnthreads); // New: fquantile: SEXP fquantileC(SEXP x, SEXP Rprobs, SEXP w, SEXP o, SEXP Rnarm, SEXP Rtype, SEXP Rnames, SEXP checko); // Helper functions for C API double dquickselect_elem(double *x, const int n, const unsigned int elem, double h); double iquickselect_elem(int *x, const int n, const unsigned int elem, double h); double dquickselect(double *x, const int n, const int ret, const double Q); double iquickselect(int *x, const int n, const int ret, const double Q); double nth_int(const int *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret, const double Q); double nth_double(const double *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret, const double Q); double nth_int_ord(const int *restrict px, const int *restrict po, int l, const int narm, const int ret, const double Q); double nth_double_ord(const double *restrict px, const int *restrict po, int l, const int narm, const int ret, const double Q); double w_nth_int_ord(const int *restrict px, const double *restrict pw, const int *restrict po, double h, int l, const int narm, const int ret, const double Q); double w_nth_double_ord(const double *restrict px, const double *restrict pw, const int *restrict po, double h, int l, const int narm, const int ret, const double Q); double w_nth_int_qsort(const int *restrict px, const double *restrict pw, const int *restrict po, double h, const int l, const int sorted, const int narm, const int ret, const double Q); double w_nth_double_qsort(const double *restrict px, const double *restrict pw, const int *restrict po, double h, const int l, const int sorted, const int narm, const int ret, const double Q); SEXP nth_impl(SEXP x, int narm, int ret, double Q); SEXP nth_ord_impl(SEXP x, int *pxo, int narm, int ret, double Q); SEXP w_nth_ord_impl(SEXP x, int *pxo, double *pw, int narm, int ret, double Q, double h); #endif // End of COLLAPSE_H guard collapse/src/fdiff_fgrowth.cpp0000644000176200001440000024237615113724742016211 0ustar liggesusers#include using namespace Rcpp; // Return Options: // ret = 1 - differences // ret = 2 - log differences // ret = 3 - log-difference growth rates // ret = 4 - exact growth rates // Also: if rho != 1, quasi-differencing and log differencing with rho... i.e. for Cochrane-Orcutt regression // This Approach: currently does not support iterated differences on irregular time-series and panel data ! // TODO: Make comprehensive... // Note: Now taking logs in R -> Faster and smaller compiled code ! // ... some systems get this wrong, possibly depends on what libs are loaded // // static inline double R_log(double x) { // return x > 0 ? log(x) : x == 0 ? R_NegInf : R_NaN; // } template NumericVector fdiffgrowthCppImpl(const NumericVector& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, std::string stub = "", bool names = true, F FUN = [](double y, double x) { return y-x; }) { int l = x.size(), ns = n.size(), ds = diff.size(), zeros = 0, pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = ns; i--; ) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot pos = n[i]; if(pos == 0) ++zeros; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; std::string stub2 = names ? "F" + stub : ""; int ncol = (ns-zeros)*ds+zeros; if(ncol == 1) names = false; NumericMatrix out = no_init_matrix(l, ncol); CharacterVector colnam = names ? no_init_vector(ncol) : no_init_vector(1); CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; CharacterVector diffc = names ? Rf_coerceVector(diff, STRSXP) : NA_STRING; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; for(int i = np; i != l; ++i) outp[i] = FUN(x[i], x[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i--; ) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = np*L_dq; i != end; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; for(int i = l+np; i--; ) outp[i] = FUN(x[i], x[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i != l; ++i) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = end; i != start; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; bool regular = osize == l; IntegerVector omap(osize), ord2 = regular ? no_init_vector(1) : no_init_vector(l); if(!regular) { // Irregular time series if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); if(Rcpp::max(diff) > 1) stop("Iterations are currently only supported for regular time series. See ?seqid to identify the regular sequences in your time series, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } } else { // Regular time series for(int i = 0; i != l; ++i) { temp = ord[i] - min; if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i; } } for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = np; i != l; ++i) outp[omap[i]] = FUN(x[omap[i]], x[omap[i - np]]); } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i--; ) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = np*L_dq; i != end; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = l+np; i--; ) outp[omap[i]] = FUN(x[omap[i]], x[omap[i - np]]); } else { for(int i = 0, osnp = osize + np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i != l; ++i) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < length(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = end; i != start; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } } else { if(l != g.size()) stop("length(x) must match length(g)"); int ags = l/ng, ngp = ng+1, maxdiff = max(diff); if(Rf_isNull(t)) { bool cond = !Rf_isNull(gs); IntegerVector gsv = (cond || maxdiff == 1) ? no_init_vector(1) : IntegerVector(ng); int *pgsv = cond ? INTEGER(gs)-1 : INTEGER(gsv)-1; if(maxdiff != 1) { if(cond) { if(ng != Rf_length(gs)) stop("ng must match length(gs)"); } else { for(int i = 0; i != l; ++i) ++pgsv[g[i]]; } } // int seen[ngp], memsize = sizeof(int)*(ngp); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) outp[i] = FUN(x[i], x[i - np]); else { outp[i] = fill; ++seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); // Right ? -> seems so std::vector seen(ngp); // memset(seen, 0, memsize); // Needed, because it loops from the beginning for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; for(int i = l; i--; ) { if(seen[g[i]] == np) outp[i] = FUN(x[i], x[i - np]); else { outp[i] = fill; --seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) bool regular = temp == l; IntegerVector omap(temp), ord2 = no_init_vector(l); if(!regular) { // Irregular panel if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); if(maxdiff > 1) stop("Iterations are currently only supported for regular panels. See ?seqid to identify the regular sequences in your panel, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; temp = cgs[g[i]] + ord2[i]; if(omap[temp]) stop("Repeated values of timevar within one or more groups"); omap[temp] = i+1; // needed to add 1 to distinguish between 0 and gap } } else { // Regular panel for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; temp = cgs[g[i]] + ord2[i]; if(omap[temp]) stop("Repeated values of timevar within one or more groups"); omap[temp] = i; } } for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0]; else colnam[pos] = "L" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] >= np) { outp[i] = FUN(x[i], x[omap[cgs[g[i]]+ord2[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q]; else colnam[pos] = "L" + nc[p] + stub + diffc[q]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0]; else colnam[pos] = "F" + nc[p] + stub + diffc[0]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] < max[g[i]]+np) { outp[i] = FUN(x[i], x[omap[cgs[g[i]]+ord2[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < max[g[i]]+np && (temp = omap[cgs[g[i]]+ord2[i]-np])) { outp[i] = FUN(x[i], x[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q]; } ++pos; } } } else { out( _ , pos) = x; if(names) colnam[pos] = "--"; ++pos; } } } } // Previous Version // if(ncol == 1) SHALLOW_DUPLICATE_ATTRIB(out, x); // else if(names) out.attr("dimnames") = List::create(x.attr("names"), colnam); SHALLOW_DUPLICATE_ATTRIB(out, x); if(ncol != 1) { Rf_setAttrib(out, R_NamesSymbol, R_NilValue); // if(x.hasAttribute("names")) out.attr("names") = R_NilValue; Rf_dimgets(out, Dimension(l, ncol)); if(Rf_isObject(x)) { // && !x.inherits("pseries") -> lag matrix in plm is not a pseries anymore anyway... CharacterVector classes = Rf_getAttrib(out, R_ClassSymbol); classes.push_back("matrix"); Rf_classgets(out, classes); } // else { // Rf_classgets(out, Rf_mkString("matrix")); // } if(names) Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), colnam)); } return out; } // [[Rcpp::export]] NumericVector fdiffgrowthCpp(const NumericVector& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, int ret = 1, double rho = 1, bool names = true, double power = 1) { std::string stub; if(ret < 4) { double rho2; if(ret == 3) { rho2 = 1; if(power != 1) stop("High-powered log-difference growth rates are currently not supported"); if(names) stub = "Dlog"; } else { rho2 = rho; if(names) stub = (ret == 1 && rho == 1) ? "D" : (ret == 1) ? "QD" : (rho == 1) ? "Dlog" : "QDlog"; // QD for quasi-differences } return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho2](double y, double x) { return y-rho2*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! } else if (ret == 4) { if(names) stub = "G"; // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? if(power == 1) return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // definitely much faster !! return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho, power](double y, double x) { return (pow(y/x, power)-1)*rho; }); // without: 375 kb } else stop("Unknown return option!"); } inline SEXP coln_check(SEXP x) { return Rf_isNull(x) ? NA_STRING : x; } template NumericMatrix fdiffgrowthmCppImpl(const NumericMatrix& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, std::string stub = "", bool names = true, F FUN = [](double y, double x) { return y-x; }) { int l = x.nrow(), col = x.ncol(), ns = n.size(), ds = diff.size(), zeros = 0, pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = ns; i--; ) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot pos = n[i]; if(pos == 0) ++zeros; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; std::string stub2 = names ? "F" + stub : ""; int ncol = ((ns-zeros)*ds+zeros)*col; NumericMatrix out = no_init_matrix(l, ncol); CharacterVector colnam = names ? no_init_vector(ncol) : no_init_vector(1); CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; CharacterVector diffc = names ? Rf_coerceVector(diff, STRSXP) : NA_STRING; CharacterVector coln = names ? coln_check(colnames(x)) : NA_STRING; if(names && coln[0] == NA_STRING) names = false; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = np; i != l; ++i) outp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i--; ) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = np*L_dq; i != end; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = l+np; i--; ) outp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[i] = FUN(outp[i], outp[i - np]); } for(int i = end; i != l; ++i) outp[i] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = end; i != start; ++i) outtemp[i] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } else { // Unordered data: Timevar provided IntegerVector ord = t; if(l != ord.size()) stop("length(x) must match length(t)"); int min = INT_MAX, max = INT_MIN, osize, temp; for(int i = 0; i != l; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; bool regular = osize == l; IntegerVector omap(osize), ord2 = regular ? no_init_vector(1) : no_init_vector(l); if(!regular) { // Irregular time series if(osize > 10000000 && osize > 3 * l) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); if(Rcpp::max(diff) > 1) stop("Iterations are currently only supported for regular time series. See ?seqid to identify the regular sequences in your time series, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } } else { // Regular time series for(int i = 0; i != l; ++i) { temp = ord[i] - min; if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i; } } for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= l) stop("n * diff needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = np; i != l; ++i) outp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i--; ) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= l) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = l-1; i != start; --i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = np*L_dq; i != end; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = l+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = l+np; i--; ) outp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0, osnp = osize + np; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); } if(regular) for(int i = end; i != l; ++i) outp[omap[i]] = fill; if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = l+np*dq, start = l+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = l+np*(k+1); for(int i = 0; i != final; ++i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = end; i != start; ++i) outtemp[omap[i]] = fill; out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } } else { // With groups if(l != g.size()) stop("nrow(x) must match length(g)"); int ags = l/ng, ngp = ng+1, maxdiff = max(diff); if(Rf_isNull(t)) { // Ordered data bool cond = !Rf_isNull(gs); IntegerVector gsv = (cond || maxdiff == 1) ? no_init_vector(1) : IntegerVector(ng); int *pgsv = cond ? INTEGER(gs)-1 : INTEGER(gsv)-1; if(maxdiff != 1) { if(cond) { if(ng != Rf_length(gs)) stop("ng must match length(gs)"); } else { for(int i = 0; i != l; ++i) ++pgsv[g[i]]; } } // int seen[ngp], memsize = sizeof(int)*(ngp); for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = 0; i != l; ++i) { if(seen[g[i]] == np) outp[i] = FUN(column[i], column[i - np]); else { outp[i] = fill; ++seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; for(int i = l; i--; ) { if(seen[g[i]] == np) outp[i] = FUN(column[i], column[i - np]); else { outp[i] = fill; --seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outp[i] = fill; else { outp[i] = FUN(outp[i], outp[i - np]); ++seen[g[i]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int temp; if(l != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != l; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) bool regular = temp == l; IntegerVector omap(temp), ord2 = no_init_vector(l), index = no_init_vector(l); if(!regular) { // Irregular panel if(temp > 10000000 && temp > 3 * l) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); if(maxdiff > 1) stop("Iterations are currently only supported for regular panels. See ?seqid to identify the regular sequences in your panel, or just apply this function multiple times."); for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } } else { // Regular panel for(int i = 0; i != l; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i; } } for(int j = 0; j != col; ++j) { NumericMatrix::ConstColumn column = x( _ , j); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(L1) colnam[pos] = stub + diffc[0] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] >= np) { outp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = l; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(L1) colnam[pos] = stub + diffc[q] + "." + coln[j]; else colnam[pos] = "L" + nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericMatrix::Column outp = out( _ , pos); if(names) { if(F1) colnam[pos] = stub2 + diffc[0] + "." + coln[j]; else colnam[pos] = "F" + nc[p] + stub + diffc[0] + "." + coln[j]; } ++pos; if(regular) { for(int i = 0; i != l; ++i) { if(ord2[i] < max[g[i]]+np) { outp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outp[i] = fill; } } } else { for(int i = 0; i != l; ++i) { // Smarter solution using while ??? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outp[i] = FUN(column[i], column[temp-1]); } else { outp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outp[omap[i]] = fill; else { outp[omap[i]] = FUN(outp[omap[i]], outp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(ds > 1) { NumericVector outtemp = outp; for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != l; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out( _ , pos) = outtemp; if(names) { if(F1) colnam[pos] = stub2 + diffc[q] + "." + coln[j]; else colnam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + coln[j]; } ++pos; } } } else { out( _ , pos) = column; if(names) colnam[pos] = coln[j]; ++pos; } } } } } // Previous Solution: // if(names) { // out.attr("dimnames") = List::create(rownames(x), colnam); // } else { // if(ns*ds == 1) SHALLOW_DUPLICATE_ATTRIB(out, x); // // else rownames(out) = rownames(x); // redundant !! // } SHALLOW_DUPLICATE_ATTRIB(out, x); if(ncol != col) Rf_dimgets(out, Dimension(l, ncol)); if(names) { Rf_dimnamesgets(out, List::create(rownames(x), colnam)); // colnames(out) = colnam; also deletes rownames ! } else if(ncol != col) { Rf_setAttrib(out, R_DimNamesSymbol, R_NilValue); } return out; } // [[Rcpp::export]] NumericMatrix fdiffgrowthmCpp(const NumericMatrix& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, int ret = 1, double rho = 1, bool names = true, double power = 1) { std::string stub; if(ret < 4) { double rho2; if(ret == 3) { rho2 = 1; if(power != 1) stop("High-powered log-difference growth rates are currently not supported"); if(names) stub = "Dlog"; } else { rho2 = rho; if(names) stub = (ret == 1 && rho == 1) ? "D" : (ret == 1) ? "QD" : (rho == 1) ? "Dlog" : "QDlog"; // QD for quasi-differences } return fdiffgrowthmCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho2](double y, double x) { return y-rho2*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! } else if (ret == 4) { if(names) stub = "G"; if(power == 1) return fdiffgrowthmCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? return fdiffgrowthmCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho, power](double y, double x) { return (pow(y/x, power)-1)*rho; }); } else stop("Unknown return option!"); } template List fdiffgrowthlCppImpl(const List& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, std::string stub = "", bool names = true, F FUN = [](double y, double x) { return y-x; }) { // const needed for #if response... int l = x.size(), ns = n.size(), ds = diff.size(), zeros = 0, pos = INT_MAX; IntegerVector absn = no_init_vector(ns); for(int i = ns; i--; ) { if(n[i] == pos) stop("duplicated values in n detected"); // because one might mistakenly pass a factor to the n-slot pos = n[i]; if(pos == 0) ++zeros; if(pos < 0) { if(pos == NA_INTEGER) stop("NA in n"); absn[i] = -pos; } else absn[i] = pos; } pos = 0; std::string stub2 = names ? "F" + stub : ""; int ncol = ((ns-zeros)*ds+zeros)*l; List out(ncol); CharacterVector nam = names ? no_init_vector(ncol) : no_init_vector(1); CharacterVector nc = names ? Rf_coerceVector(absn, STRSXP) : NA_STRING; CharacterVector diffc = names ? Rf_coerceVector(diff, STRSXP) : NA_STRING; CharacterVector na = names ? coln_check(Rf_getAttrib(x, R_NamesSymbol)) : NA_STRING; if(names && na[0] == NA_STRING) names = false; if(ng == 0) { // No groups if(Rf_isNull(t)) { // Ordered data for(int j = 0; j != l; ++j) { NumericVector column = x[j]; int row = column.size(); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= row) stop("n * diff needs to be < nrow(x)"); NumericVector outjp = no_init_vector(row); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = np; i != row; ++i) outjp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = row-1; i != start; --i) outjp[i] = FUN(outjp[i], outjp[i - np]); } for(int i = end; i--; ) outjp[i] = fill; SHALLOW_DUPLICATE_ATTRIB(outjp, column); out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= row) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = row-1; i != start; --i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = np*L_dq; i != end; ++i) outtemp[i] = fill; if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); // or Rf_copyVector, Rf_shallow_duplicate, Rf_lazy_duplicate http://mtweb.cs.ucl.ac.uk/mus/bin/install_R/R-3.1.1/src/main/duplicate.c } // https://rlang.r-lib.org/reference/duplicate.html } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = row+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericVector outjp = no_init_vector(row); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = row+np; i--; ) outjp[i] = FUN(column[i], column[i - np]); if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = row+np*(k+1); for(int i = 0; i != final; ++i) outjp[i] = FUN(outjp[i], outjp[i - np]); } for(int i = end; i != row; ++i) outjp[i] = fill; out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = row+np*dq, start = row+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = row+np*(k+1); for(int i = 0; i != final; ++i) outtemp[i] = FUN(outtemp[i], outtemp[i - np]); } for(int i = end; i != start; ++i) outtemp[i] = fill; if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } else { // Unordered data: Timevar provided IntegerVector ord = t; int min = INT_MAX, max = INT_MIN, osize, temp, os = ord.size(); if(Rf_length(x[0]) != os) stop("length(x) must match length(t)"); for(int i = 0; i != os; ++i) { if(ord[i] < min) min = ord[i]; if(ord[i] > max) max = ord[i]; } if(min == NA_INTEGER) stop("Timevar contains missing values"); osize = max-min+1; bool regular = osize == os; IntegerVector omap(osize), ord2 = regular ? no_init_vector(1) : no_init_vector(os); if(!regular) { // Irregular time series if(osize > 10000000 && osize > 3 * os) warning("Your time series is very irregular. Need to create an internal ordering vector of length %s to represent it.", osize); if(Rcpp::max(diff) > 1) stop("Iterations are currently only supported for regular time series. See ?seqid to identify the regular sequences in your time series, or just apply this function multiple times."); for(int i = 0; i != os; ++i) { temp = ord[i] - min; // Best ? Or direct assign to ord2[i] ? Also check for panel version.. if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i+1; ord2[i] = temp; } } else { // Regular time series for(int i = 0; i != os; ++i) { temp = ord[i] - min; if(omap[temp]) stop("Repeated values in timevar"); omap[temp] = i; } } for(int j = 0; j != l; ++j) { NumericVector column = x[j]; if(os != column.size()) stop("nrow(x) must match length(t)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(np>0) { // Positive lagged and iterated differences int d1 = diff[0], end = np*d1; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end >= os) stop("n * diff needs to be < nrow(x)"); NumericVector outjp = no_init_vector(os); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = np; i != os; ++i) outjp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] >= np && (temp = omap[ord2[i] - np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1)-1; for(int i = os-1; i != start; --i) outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); } if(regular) for(int i = end; i--; ) outjp[omap[i]] = fill; out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = np*dq; if(end >= os) stop("n * diff needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1)-1; for(int i = os-1; i != start; --i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = np*L_dq; i != end; ++i) outtemp[omap[i]] = fill; if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0], end = os+np*d1; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); NumericVector outjp = no_init_vector(os); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = os+np; i--; ) outjp[omap[i]] = FUN(column[omap[i]], column[omap[i - np]]); } else { for(int i = 0, osnp = osize + np; i != os; ++i) { // Smarter solution using while ??? if(ord2[i] < osnp && (temp = omap[ord2[i] - np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int final = os+np*(k+1); for(int i = 0; i != final; ++i) outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); } if(regular) for(int i = end; i != os; ++i) outjp[omap[i]] = fill; out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1], end = os+np*dq, start = os+np*L_dq; if(end <= 0) stop("abs(n * diff) needs to be < nrow(x)"); if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int final = os+np*(k+1); for(int i = 0; i != final; ++i) outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); } for(int i = end; i != start; ++i) outtemp[omap[i]] = fill; if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } } else { // With groups int gss = g.size(), ags = gss/ng, ngp = ng+1, maxdiff = max(diff); if(Rf_isNull(t)) { // Ordered data bool cond = !Rf_isNull(gs); IntegerVector gsv = (cond || maxdiff == 1) ? no_init_vector(1) : IntegerVector(ng); int *pgsv = cond ? INTEGER(gs)-1 : INTEGER(gsv)-1; if(maxdiff != 1) { if(cond) { if(ng != Rf_length(gs)) stop("ng must match length(gs)"); } else { for(int i = 0; i != gss; ++i) ++pgsv[g[i]]; } } // int seen[ngp], memsize = sizeof(int)*(ngp); for(int j = 0; j != l; ++j) { NumericVector column = x[j]; if(gss != column.size()) stop("nrow(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = 0; i != gss; ++i) { if(seen[g[i]] == np) outjp[i] = FUN(column[i], column[i - np]); else { outjp[i] = fill; ++seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outjp[i] = fill; else { outjp[i] = FUN(outjp[i], outjp[i - np]); ++seen[g[i]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[i]] == pgsv[g[i]]-start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); std::vector seen(ngp); // memset(seen, 0, memsize); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } for(int i = gss; i--; ) { if(seen[g[i]] == np) outjp[i] = FUN(column[i], column[i - np]); else { outjp[i] = fill; --seen[g[i]]; } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outjp[i] = fill; else { outjp[i] = FUN(outjp[i], outjp[i - np]); ++seen[g[i]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[i]] == pgsv[g[i]]+start) outtemp[i] = fill; else { outtemp[i] = FUN(outtemp[i], outtemp[i - np]); ++seen[g[i]]; } } } if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } else { // Unordered data: Timevar Provided IntegerVector ord = t; int temp; if(gss != ord.size()) stop("length(x) must match length(t)"); IntegerVector min(ngp, INT_MAX), max(ngp, INT_MIN), cgs = no_init_vector(ngp); for(int i = 0; i != gss; ++i) { temp = g[i]; if(ord[i] < min[temp]) min[temp] = ord[i]; if(ord[i] > max[temp]) max[temp] = ord[i]; } temp = 0; for(int i = 1; i != ngp; ++i) { if(min[i] == NA_INTEGER) stop("Timevar contains missing values"); if(min[i] == INT_MAX) continue; // Needed in case of unused factor levels (group vector too large) cgs[i] = temp; // This needs to b here (for unused factor levels case...) max[i] -= min[i] - 1; // need max[i] which stores the complete group sizes only if p<0 e.g. if computing leads.. temp += max[i]; } // omap provides the ordering to order the vector (needed to find previous / next values) bool regular = temp == gss; IntegerVector omap(temp), ord2 = no_init_vector(gss), index = no_init_vector(gss); if(!regular) { // Irregular panel if(temp > 10000000 && temp > 3 * gss) warning("Your panel is very irregular. Need to create an internal ordering vector of length %s to represent it.", temp); if(maxdiff > 1) stop("Iterations are currently only supported for regular panels. See ?seqid to identify the regular sequences in your panel, or just apply this function multiple times."); for(int i = 0; i != gss; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i+1; // needed to add 1 to distinguish between 0 and gap } } else { // Regular panel for(int i = 0; i != gss; ++i) { ord2[i] = ord[i] - min[g[i]]; index[i] = cgs[g[i]] + ord2[i]; if(omap[index[i]]) stop("Repeated values of timevar within one or more groups"); omap[index[i]] = i; } } for(int j = 0; j != l; ++j) { NumericVector column = x[j]; if(gss != column.size()) stop("nrow(x) must match length(g)"); for(int p = 0; p != ns; ++p) { int np = n[p]; if(absn[p]*maxdiff > ags) warning("abs(n * diff) exceeds average group-size (%i). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.", ags); if(np>0) { // Positive lagged and iterated differences int d1 = diff[0]; bool L1 = np == 1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(L1) nam[pos] = stub + diffc[0] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = 0; i != gss; ++i) { if(ord2[i] >= np) { outjp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outjp[i] = fill; } } } else { for(int i = 0; i != gss; ++i) { if(ord2[i] >= np && (temp = omap[index[i]-np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outjp[omap[i]] = fill; else { outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = gss; i--; ) { if(seen[g[omap[i]]] == max[g[omap[i]]]-start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(names) { if(L1) nam[pos] = stub + diffc[q] + "." + na[j]; else nam[pos] = "L" + nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else if(np<0) { // (Negative) leaded and iterated differences int d1 = diff[0]; bool F1 = np == -1; if(d1 < 1) stop("diff must be a vector of integers > 0"); NumericVector outjp = no_init_vector(gss); SHALLOW_DUPLICATE_ATTRIB(outjp, column); if(names) { if(F1) nam[pos] = stub2 + diffc[0] + "." + na[j]; else nam[pos] = "F" + nc[p] + stub + diffc[0] + "." + na[j]; } if(regular) { for(int i = 0; i != gss; ++i) { if(ord2[i] < max[g[i]]+np) { outjp[i] = FUN(column[i], column[omap[index[i]-np]]); } else { outjp[i] = fill; } } } else { for(int i = 0; i != gss; ++i) { // Smarter solution using while ??? if(ord2[i] < max[g[i]]+np && (temp = omap[index[i]-np])) { outjp[i] = FUN(column[i], column[temp-1]); } else { outjp[i] = fill; } } } if(d1 > 1) for(int k = 1; k != d1; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outjp[omap[i]] = fill; else { outjp[omap[i]] = FUN(outjp[omap[i]], outjp[omap[i - np]]); ++seen[g[omap[i]]]; } } } out[pos++] = outjp; if(ds > 1) { NumericVector outtemp = Rf_shallow_duplicate(outjp); for(int q = 1; q != ds; ++q) { int dq = diff[q], L_dq = diff[q-1]; if(dq <= L_dq) stop("differences must be passed in ascending order"); for(int k = L_dq; k != dq; ++k) { int start = np*(k+1); std::vector seen(ngp); // memset(seen, 0, memsize); for(int i = 0; i != gss; ++i) { if(seen[g[omap[i]]] == max[g[omap[i]]]+start) outtemp[omap[i]] = fill; else { outtemp[omap[i]] = FUN(outtemp[omap[i]], outtemp[omap[i - np]]); ++seen[g[omap[i]]]; } } } if(names) { if(F1) nam[pos] = stub2 + diffc[q] + "." + na[j]; else nam[pos] = "F"+ nc[p] + stub + diffc[q] + "." + na[j]; } out[pos++] = Rf_shallow_duplicate(outtemp); } } } else { if(names) nam[pos] = na[j]; out[pos++] = column; } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); if(names) { // best way to code this ? Rf_namesgets(out, nam); } else if(ncol != l) { Rf_setAttrib(out, R_NamesSymbol, R_NilValue); } return out; } // [[Rcpp::export]] List fdiffgrowthlCpp(const List& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, int ret = 1, double rho = 1, bool names = true, double power = 1) { std::string stub; if(ret < 4) { double rho2; if(ret == 3) { rho2 = 1; if(power != 1) stop("High-powered log-difference growth rates are currently not supported"); if(names) stub = "Dlog"; } else { rho2 = rho; if(names) stub = (ret == 1 && rho == 1) ? "D" : (ret == 1) ? "QD" : (rho == 1) ? "Dlog" : "QDlog"; // QD for quasi-differences } return fdiffgrowthlCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho2](double y, double x) { return y-rho2*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! } else if (ret == 4) { if(names) stub = "G"; if(power == 1) return fdiffgrowthlCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? return fdiffgrowthlCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho, power](double y, double x) { return (pow(y/x, power)-1)*rho; }); } else stop("Unknown return option!"); } // Old attempts without template .... // #define FUN(y, x) (ret == 1 && rho1) ? ((y)-(x)) : // (ret == 1) ? ((y)-rho*(x)) : // (ret == 2 && rho1) ? (log((y)*(1/(x)))) : // (ret == 2) ? (log((y)*(1/(rho*(x))))) : // (ret == 3) ? (((y)-(x))*(100/(x))) : (log((y)*(1/(x)))*100) // #define rho1 (rho == 1) // #define retm (ret) // // #if retm == 1 && rho1 // #define FUN(y, x) ((y)-(x)) // #elif retm == 1 // #define FUN(y, x) ((y)-rho*(x)) // #elif retm == 2 && rho1 // #define FUN(y, x) (log((y)*(1/(x)))) // #elif retm == 2 // #define FUN(y, x) (log((y)*(1/(rho*(x))))) // #elif retm == 3 // #define FUN(y, x) (((y)-(x))*(100/(x))) // #elif retm == 4 // #define FUN(y, x) (log((y)*(1/(x)))*100) // #endif // Previous: Internally computing log-differences--- compiled file was 648 kb, without debug info !! // // [[Rcpp::export]] // NumericVector fdiffgrowthCpp(const NumericVector& x, const IntegerVector& n = 1, const IntegerVector& diff = 1, // double fill = NA_REAL, int ng = 0, const IntegerVector& g = 0, // const SEXP& gs = R_NilValue, const SEXP& t = R_NilValue, // int ret = 1, double rho = 1, bool names = true) { // // std::string stub; // switch (ret) // { // [rho] or [&rho] ? // https://stackoverflow.com/questions/30217956/error-variable-cannot-be-implicitly-captured-because-no-default-capture-mode-h // case 1: // if(names) stub = (rho == 1) ? "D" : "QD"; // QD for quasi-differences ! // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return y-rho*x; }); // return y-x; same efficiency as return y-rho*x; when rho = 1 -> smart compiler !, and reduced file size !! // case 2: // if(rho == 1) goto fastld; // if(names) stub = "QDlog"; // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return R_log(y)-rho*R_log(x); }); // log(y*(1/(rho*x))) gives log(y) - log(rho*x), but we want log(y) - rho*log(x) // case 3: // if(names) stub = "G"; // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return (y-x)*(rho/x); }); // same speed as fixing 100 ! Faster using (y/x-1)*rho or (x*(1/x)-1)*rho ? // case 4: // fastld: // if(names) stub = "Dlog"; // return fdiffgrowthCppImpl(x, n, diff, fill, ng, g, gs, t, stub, names, [rho](double y, double x) { return rho*R_log(y*(1/x)); }); // default: stop("Unknown return option!"); // } // } collapse/src/internal/0000755000176200001440000000000015005240113014446 5ustar liggesuserscollapse/src/internal/R_defn.h0000644000176200001440000001302015122271011016011 0ustar liggesusers#ifndef R_DEFINITIONS_H // Check if R_DEFINITIONS_H is not defined #define R_DEFINITIONS_H // Define R_DEFINITIONS_H // #define USE_RINTERNALS #include #include // NOTE: All of this is copied from Defn.h: https://github.com/wch/r-source/blob/28de75af0541f93832c5899139b969d290bf422e/src/include/Defn.h #ifndef SEXPREC_HEADER #ifndef NAMED_BITS #define NAMED_BITS 16 #endif struct sxpinfo_struct { SEXPTYPE type : TYPE_BITS; /* ==> (FUNSXP == 99) %% 2^5 == 3 == CLOSXP * -> warning: `type' is narrower than values * of its type * when SEXPTYPE was an enum */ unsigned int scalar: 1; unsigned int obj : 1; unsigned int alt : 1; unsigned int gp : 16; unsigned int mark : 1; unsigned int debug : 1; unsigned int trace : 1; /* functions and memory tracing */ unsigned int spare : 1; /* used on closures and when REFCNT is defined */ unsigned int gcgen : 1; /* old generation number */ unsigned int gccls : 3; /* node class */ unsigned int named : NAMED_BITS; unsigned int extra : 32 - NAMED_BITS; /* used for immediate bindings */ }; /* Tot: 64 */ struct vecsxp_struct { R_xlen_t length; R_xlen_t truelength; }; struct primsxp_struct { int offset; }; struct symsxp_struct { struct SEXPREC *pname; struct SEXPREC *value; struct SEXPREC *internal; }; struct listsxp_struct { struct SEXPREC *carval; struct SEXPREC *cdrval; struct SEXPREC *tagval; }; struct envsxp_struct { struct SEXPREC *frame; struct SEXPREC *enclos; struct SEXPREC *hashtab; }; struct closxp_struct { struct SEXPREC *formals; struct SEXPREC *body; struct SEXPREC *env; }; struct promsxp_struct { struct SEXPREC *value; struct SEXPREC *expr; struct SEXPREC *env; }; #define SEXPREC_HEADER \ struct sxpinfo_struct sxpinfo; \ struct SEXPREC *attrib; \ struct SEXPREC *gengc_next_node, *gengc_prev_node typedef struct SEXPREC { SEXPREC_HEADER; union { struct primsxp_struct primsxp; struct symsxp_struct symsxp; struct listsxp_struct listsxp; struct envsxp_struct envsxp; struct closxp_struct closxp; struct promsxp_struct promsxp; } u; } SEXPREC; // typedef struct { // SEXPREC_HEADER; // } SEXPREC_partial; typedef struct VECTOR_SEXPREC { SEXPREC_HEADER; struct vecsxp_struct vecsxp; } VECTOR_SEXPREC, *VECSEXP; typedef union { VECTOR_SEXPREC s; double align; } SEXPREC_ALIGN; #endif #undef OOBJ #define OOBJ(x) ((x)->sxpinfo.obj) #define SET_OOBJ(x,v) (OOBJ(x)=(v)) #undef ATTTR #define ATTTR(x) ((x)->attrib) #define SET_ATTTR(x,v) (ATTTR(x)=(v)) #undef MYLEV #define MYLEV(x) ((x)->sxpinfo.gp) #undef IS_UTF8 #define IS_UTF8(x) (MYLEV(x) & 8) #undef IS_ASCII #define IS_ASCII(x) (MYLEV(x) & 64) // from data.table.h // #define ASCII_MASK (1<<6) // evaluates to 64 !! // #define IS_ASCII(x) ((x)->sxpinfo.gp & ASCII_MASK) // #define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) #undef SETTOF #define SETTOF(x,v) (((x)->sxpinfo.type)=(v)) // to avoid checking for ALTREP in TRUELENGTH, which slows down the code unnecessarily... #ifndef STDVEC_TRUELENGTH #define STDVEC_TRUELENGTH(x) (((VECSEXP) (x))->vecsxp.truelength) #define SET_STDVEC_TRUELENGTH(x, v) (STDVEC_TRUELENGTH(x)=(v)) #endif /* It would be better to find a way to avoid abusing TRUELENGTH, but in the meantime replace TRUELENGTH/SET_TRUELENGTH with TRLEN/SET_TRLEN that cast to int to avoid warnings. */ #undef TRULEN #define TRULEN(x) (ALTREP(x) ? 0 : STDVEC_TRUELENGTH(x)) #undef SET_TRULEN #define SET_TRULEN(x, v) (STDVEC_TRUELENGTH(x)=(v)) #undef TRLEN #define TRLEN(x) ((int) STDVEC_TRUELENGTH(x)) // ((int) TRUELENGTH(x)) #undef SET_TRLEN #define SET_TRLEN(x, v) SET_STDVEC_TRUELENGTH(x, ((int) (v))) #ifndef STDVEC_LENGTH #define STDVEC_LENGTH(x) (((VECSEXP) (x))->vecsxp.length) #endif // Needed for SETLENGTH #ifndef SETSCAL #define SETSCAL(x, v) (((x)->sxpinfo.scalar) = (v)) #endif #ifndef SET_STDVEC_LENGTH #define SET_STDVEC_LENGTH(x,v) do { \ SEXP __x__ = (x); \ R_xlen_t __v__ = (v); \ STDVEC_LENGTH(__x__) = __v__; \ SETSCAL(__x__, __v__ == 1 ? 1 : 0); \ } while (0) #endif #undef SET_LEN #define SET_LEN(x, v) SET_STDVEC_LENGTH((x), (v)) #undef MYEFL #define MYEFL(x) ((x)->sxpinfo.gp) #undef MYSEFL #define MYSEFL(x,v) (((x)->sxpinfo.gp)=(v)) // For super efficient access, e.g. in gsplit() #undef SEXP_DATAPTR #define SEXP_DATAPTR(x) ((SEXP *) (((SEXPREC_ALIGN *) (x)) + 1)) #undef INT_DATAPTR #define INT_DATAPTR(x) ((int *) (((SEXPREC_ALIGN *) (x)) + 1)) #undef DBL_DATAPTR #define DBL_DATAPTR(x) ((double *) (((SEXPREC_ALIGN *) (x)) + 1)) #undef DPTR #define DPTR(x) ((void *)DATAPTR_RO(x)) #undef SEXPPTR #define SEXPPTR(x) ((SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped VECTOR_ELT #undef SEXPPTR_RO #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped VECTOR_ELT // #define STDVEC_DATAPTR(x) ((void *) (((SEXPREC_ALIGN *) (x)) + 1)) // // static R_INLINE void *DPTR(SEXP x) { // if (ALTREP(x)) error("Cannot get writable DATAPTR from ALTREP string or list"); // else if (LENGTH(x) == 0 && TYPEOF(x) != CHARSXP) return (void *) 1; // else return STDVEC_DATAPTR(x); // } // External symbols not in DLL? // extern inline void *DPTR(SEXP x) { // return DATAPTR(x); // } /* Growable vector support */ #undef GROW_MSK #define GROW_MSK ((unsigned short)(1<<5)) #undef SET_GROWBL_BIT #define SET_GROWBL_BIT(x) (((x)->sxpinfo.gp) |= GROW_MSK) #endif // End of R_DEFINITIONS_H guard collapse/src/fmode.c0000644000176200001440000012461715121644344014120 0ustar liggesusers#include "collapse_c.h" // Needs to be first because includes OpenMP #include "kit.h" static double NEG_INF = -1.0/0.0; // C-implementations for different data types ---------------------------------- // TODO: outsource and memset hash table and count vector? // Problem: does not work in parallel, each thread needs own intermediate vectors int mode_int(const int *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret) { if(l == 1) return sorted ? px[0] : px[po[0]-1]; const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, index = 0, val, mode, max = 1, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values int *restrict n = (int*)R_Calloc(l, int); // Table to count frequency of values if(sorted) { mode = px[0]; if(narm) while(mode == NA_INTEGER && i < end) mode = px[++i]; for(; i < l; ++i) { val = px[i]; if(val == NA_INTEGER && narm) continue; id = HASH(val, K); while(h[id]) { index = h[id]-1; if(px[index] == val) goto ibls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; ibls:; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { mode = px[po[0]-1]; if(narm) while(mode == NA_INTEGER && i < end) mode = px[po[++i]-1]; for(; i < l; ++i) { val = px[po[i]-1]; if(val == NA_INTEGER && narm) continue; id = HASH(val, K); while(h[id]) { index = h[id]-1; if(px[po[index]-1] == val) goto ibl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; ibl:; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } R_Free(h); R_Free(n); return mode; } int w_mode_int(const int *restrict px, const double *restrict pw, const int *restrict po, const int l, const int sorted, const int narm, const int ret) { if(l == 1) { if(sorted) return ISNAN(pw[0]) ? NA_INTEGER : px[0]; return ISNAN(pw[po[0]-1]) ? NA_INTEGER : px[po[0]-1]; } const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, index = 0, val, mode, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values double *restrict sumw = (double*)R_Calloc(l, double); // Table to save each values sum of weights double max = NEG_INF; if(sorted) { mode = px[0]; if(narm) while((mode == NA_INTEGER || ISNAN(pw[i])) && i < end) mode = px[++i]; for(; i < l; ++i) { val = px[i]; if(ISNAN(pw[i]) || (val == NA_INTEGER && narm)) continue; id = HASH(val, K); while(h[id]) { index = h[id]-1; if(px[index] == val) goto ibls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; ibls:; sumw[index] += pw[i]; if(sumw[index] >= max) { if(lastm || sumw[index] > max) { max = sumw[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { mode = px[po[0]-1]; if(narm) while((mode == NA_INTEGER || ISNAN(pw[po[i]-1])) && i < end) mode = px[po[++i]-1]; for(int oi; i < l; ++i) { oi = po[i]-1; val = px[oi]; if(ISNAN(pw[oi]) || (val == NA_INTEGER && narm)) continue; id = HASH(val, K); while(h[id]) { index = h[id]-1; if(px[po[index]-1] == val) goto ibl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; ibl:; sumw[index] += pw[oi]; if(sumw[index] >= max) { if(lastm || sumw[index] > max) { max = sumw[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } R_Free(h); R_Free(sumw); return mode; } int mode_fct_logi(const int *restrict px, const int *restrict po, const int l, const int nlev, const int sorted, const int narm, const int ret) { if(l == 1) return sorted ? px[0] : px[po[0]-1]; int val, mode, max = 1, nlevp = nlev + 1, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; int *restrict n = (int*)R_Calloc(nlevp+1, int); // Table to count frequency of values if(sorted) { mode = px[0]; if(narm) while(mode == NA_INTEGER && i < end) mode = px[++i]; for(; i < l; ++i) { val = px[i]; if(val == NA_INTEGER) { if(narm) continue; val = nlevp; } if(++n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; mode = px[i]; } else if(nfirstm) { if(minm) { if(mode > px[i]) mode = px[i]; } else { if(mode < px[i]) mode = px[i]; } } } } } else { mode = px[po[0]-1]; if(narm) while(mode == NA_INTEGER && i < end) mode = px[po[++i]-1]; for(int xi; i < l; ++i) { val = xi = px[po[i]-1]; if(val == NA_INTEGER) { if(narm) continue; val = nlevp; } if(++n[val] >= max) { if(lastm || n[val] > max) { max = n[val]; mode = xi; } else if(nfirstm) { if(minm) { if(mode > xi) mode = xi; } else { if(mode < xi) mode = xi; } } } } } R_Free(n); return mode; } int w_mode_fct_logi(const int *restrict px, const double *restrict pw, const int *restrict po, const int l, const int nlev, const int sorted, const int narm, const int ret) { if(l == 1) { if(sorted) return ISNAN(pw[0]) ? NA_INTEGER : px[0]; return ISNAN(pw[po[0]-1]) ? NA_INTEGER : px[po[0]-1]; } int val, mode, nlevp = nlev + 1, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; double *restrict sumw = (double*)R_Calloc(nlevp+1, double); // Table to save each values sum of weights double max = NEG_INF; if(sorted) { mode = px[0]; if(narm) while((mode == NA_INTEGER || ISNAN(pw[i])) && i < end) mode = px[++i]; for(; i < l; ++i) { if(ISNAN(pw[i])) continue; val = px[i]; if(val == NA_INTEGER) { if(narm) continue; val = nlevp; } sumw[val] += pw[i]; if(sumw[val] >= max) { if(lastm || sumw[val] > max) { max = sumw[val]; mode = px[i]; } else if(nfirstm) { if(minm) { if(mode > px[i]) mode = px[i]; } else { if(mode < px[i]) mode = px[i]; } } } } } else { mode = px[po[0]-1]; if(narm) while((mode == NA_INTEGER || ISNAN(pw[po[i]-1])) && i < end) mode = px[po[++i]-1]; for(int oi, xoi; i < l; ++i) { oi = po[i]-1; if(ISNAN(pw[oi])) continue; val = xoi = px[oi]; if(val == NA_INTEGER) { if(narm) continue; val = nlevp; } sumw[val] += pw[oi]; if(sumw[val] >= max) { if(lastm || sumw[val] > max) { max = sumw[val]; mode = xoi; } else if(nfirstm) { if(minm) { if(mode > xoi) mode = xoi; } else { if(mode < xoi) mode = xoi; } } } } } R_Free(sumw); return mode; } double mode_double(const double *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret) { if(l == 1) return sorted ? px[0] : px[po[0]-1]; const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, index = 0, max = 1, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values int *restrict n = (int*)R_Calloc(l, int); // Table to count frequency of values double val, mode; union uno tpv; if(sorted) { mode = px[0]; if(narm) while(ISNAN(mode) && i < end) mode = px[++i]; for(; i < l; ++i) { val = px[i]; if(ISNAN(val) && narm) continue; tpv.d = val + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { index = h[id]-1; if(REQUAL(px[index], val)) goto rbls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; rbls:; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { mode = px[po[0]-1]; if(narm) while(ISNAN(mode) && i < end) mode = px[po[++i]-1]; for(; i < l; ++i) { val = px[po[i]-1]; if(ISNAN(val) && narm) continue; tpv.d = val + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { index = h[id]-1; if(REQUAL(px[po[index]-1], val)) goto rbl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; rbl:; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } R_Free(h); R_Free(n); return mode; } double w_mode_double(const double *restrict px, const double *restrict pw, const int *restrict po, const int l, const int sorted, const int narm, const int ret) { if(l == 1) { if(sorted) return ISNAN(pw[0]) ? NA_REAL : px[0]; return ISNAN(pw[po[0]-1]) ? NA_REAL : px[po[0]-1]; } const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, index = 0, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values double *restrict sumw = (double*)R_Calloc(l, double); // Table to save each values sum of weights double val, mode, max = NEG_INF; union uno tpv; if(sorted) { mode = px[0]; if(narm) while((ISNAN(mode) || ISNAN(pw[i])) && i < end) mode = px[++i]; for(; i < l; ++i) { val = px[i]; if(ISNAN(pw[i]) || (ISNAN(val) && narm)) continue; tpv.d = val + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { index = h[id]-1; if(REQUAL(px[index], val)) goto rbls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; rbls:; sumw[index] += pw[i]; if(sumw[index] >= max) { if(lastm || sumw[index] > max) { max = sumw[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { mode = px[po[0]-1]; if(narm) while((ISNAN(mode) || ISNAN(pw[po[i]-1])) && i < end) mode = px[po[++i]-1]; for(int oi; i < l; ++i) { oi = po[i]-1; val = px[oi]; if(ISNAN(pw[oi]) || (ISNAN(val) && narm)) continue; tpv.d = val + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { index = h[id]-1; if(REQUAL(px[po[index]-1], val)) goto rbl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; rbl:; sumw[index] += pw[oi]; if(sumw[index] >= max) { if(lastm || sumw[index] > max) { max = sumw[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } R_Free(h); R_Free(sumw); return mode; } SEXP mode_string(const SEXP *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret) { if(l == 1) return sorted ? px[0] : px[po[0]-1]; const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, index = 0, max = 1, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values int *restrict n = (int*)R_Calloc(l, int); // Table to count frequency of values SEXP val, mode; if(sorted) { mode = px[0]; if(narm) while(mode == NA_STRING && i < end) mode = px[++i]; for(; i < l; ++i) { val = px[i]; if(val == NA_STRING && narm) continue; id = HASH(((uintptr_t) val & 0xffffffff), K); while(h[id]) { index = h[id]-1; if(px[index] == val) goto sbls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; sbls:; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { mode = px[po[0]-1]; if(narm) while(mode == NA_STRING && i < end) mode = px[po[++i]-1]; for(; i < l; ++i) { val = px[po[i]-1]; if(val == NA_STRING && narm) continue; id = HASH(((uintptr_t) val & 0xffffffff), K); while(h[id]) { index = h[id]-1; if(px[po[index]-1] == val) goto sbl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; sbl:; if(++n[index] >= max) { if(lastm || n[index] > max) { max = n[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } R_Free(h); R_Free(n); return mode; } SEXP w_mode_string(const SEXP *restrict px, const double *restrict pw, const int *restrict po, const int l, const int sorted, const int narm, const int ret) { if(l == 1) { if(sorted) return ISNAN(pw[0]) ? NA_STRING : px[0]; return ISNAN(pw[po[0]-1]) ? NA_STRING : px[po[0]-1]; } const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, index = 0, i = 0, end = l-1, minm = ret == 1, nfirstm = ret > 0, lastm = ret == 3; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values double *restrict sumw = (double*)R_Calloc(l, double); // Table to save each values sum of weights double max = NEG_INF; SEXP val, mode; if(sorted) { mode = px[0]; if(narm) while((mode == NA_STRING || ISNAN(pw[i])) && i < end) mode = px[++i]; for(; i != l; ++i) { val = px[i]; if(ISNAN(pw[i]) || (val == NA_STRING && narm)) continue; id = HASH(((uintptr_t) val & 0xffffffff), K); while(h[id]) { index = h[id]-1; if(px[index] == val) goto sbls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; sbls:; sumw[index] += pw[i]; if(sumw[index] >= max) { if(lastm || sumw[index] > max) { max = sumw[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } else { mode = px[po[0]-1]; if(narm) while((mode == NA_STRING || ISNAN(pw[po[i]-1])) && i < end) mode = px[po[++i]-1]; for(int oi; i != l; ++i) { oi = po[i]-1; val = px[oi]; if(ISNAN(pw[oi]) || (val == NA_STRING && narm)) continue; id = HASH(((uintptr_t) val & 0xffffffff), K); while(h[id]) { index = h[id]-1; if(px[po[index]-1] == val) goto sbl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; index = i; // n[i] = 1; sbl:; sumw[index] += pw[oi]; if(sumw[index] >= max) { if(lastm || sumw[index] > max) { max = sumw[index]; mode = val; } else if(nfirstm) { if(minm) { if(mode > val) mode = val; } else { if(mode < val) mode = val; } } } } } R_Free(h); R_Free(sumw); return mode; } // Implementations for R vectors ----------------------------------------------- // https://github.com/wch/r-source/blob/trunk/src/include/Rinlinedfuns.h SEXP my_ScalarLogical(int x) { SEXP ans = allocVector(LGLSXP, 1); // SET_SCALAR_LVAL(ans, x); // Not part of the API LOGICAL(ans)[0] = x; return ans; } // Splitting this up to increase thread safety SEXP mode_impl_plain(SEXP x, int narm, int ret) { int l = length(x); if(l <= 1) return x; switch(TYPEOF(x)) { case REALSXP: return ScalarReal(mode_double(REAL(x), &l, l, 1, narm, ret)); case INTSXP: return ScalarInteger(isFactor(x) ? mode_fct_logi(INTEGER(x), &l, l, nlevels(x), 1, narm, ret) : mode_int(INTEGER(x), &l, l, 1, narm, ret)); case LGLSXP: return my_ScalarLogical(mode_fct_logi(LOGICAL(x), &l, l, 1, 1, narm, ret)); case STRSXP: return ScalarString(mode_string(SEXPPTR_RO(x), &l, l, 1, narm, ret)); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } SEXP mode_impl(SEXP x, int narm, int ret) { if(length(x) <= 1) return x; SEXP res = PROTECT(mode_impl_plain(x, narm, ret)); copyMostAttrib(x, res); UNPROTECT(1); return res; } SEXP w_mode_impl_plain(SEXP x, double *pw, int narm, int ret) { int l = length(x); if(l <= 1) return x; switch(TYPEOF(x)) { case REALSXP: return ScalarReal(w_mode_double(REAL(x), pw, &l, l, 1, narm, ret)); case INTSXP: return ScalarInteger(isFactor(x) ? w_mode_fct_logi(INTEGER(x), pw, &l, l, nlevels(x), 1, narm, ret) : w_mode_int(INTEGER(x), pw, &l, l, 1, narm, ret)); case LGLSXP: return my_ScalarLogical(w_mode_fct_logi(LOGICAL(x), pw, &l, l, 1, 1, narm, ret)); case STRSXP: return ScalarString(w_mode_string(SEXPPTR_RO(x), pw, &l, l, 1, narm, ret)); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } SEXP w_mode_impl(SEXP x, double *pw, int narm, int ret) { if(length(x) <= 1) return x; SEXP res = PROTECT(w_mode_impl_plain(x, pw, narm, ret)); copyMostAttrib(x, res); UNPROTECT(1); return res; } SEXP mode_g_impl(SEXP x, int ng, int *pgs, int *po, int *pst, int sorted, int narm, int ret, int nthreads) { int l = length(x), tx = TYPEOF(x); if(nthreads > ng) nthreads = ng; SEXP res = PROTECT(allocVector(tx, ng)); if(sorted) { // Sorted: could compute cumulative group size (= starts) on the fly... but doesn't work multithreaded... po = &l; switch(tx) { case REALSXP: { double *px = REAL(x), *pres = REAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_REAL : mode_double(px + pst[gr]-1, po, pgs[gr], 1, narm, ret); break; } case INTSXP: { int *px = INTEGER(x), *pres = INTEGER(res); if(isFactor(x) && nlevels(x) < l / ng * 3) { int M = nlevels(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : mode_fct_logi(px + pst[gr]-1, po, pgs[gr], M, 1, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : mode_int(px + pst[gr]-1, po, pgs[gr], 1, narm, ret); } break; } case LGLSXP: { int *px = LOGICAL(x), *pres = LOGICAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_LOGICAL : mode_fct_logi(px + pst[gr]-1, po, pgs[gr], 1, 1, narm, ret); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pres = SEXPPTR(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_STRING : mode_string(px + pst[gr]-1, po, pgs[gr], 1, narm, ret); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } else { // Not sorted. Perhaps reordering x is faster?? switch(tx) { case REALSXP: { double *px = REAL(x), *pres = REAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_REAL : mode_double(px, po + pst[gr]-1, pgs[gr], 0, narm, ret); break; } case INTSXP: { int *px = INTEGER(x), *pres = INTEGER(res); if(isFactor(x) && nlevels(x) < l / ng * 3) { int M = nlevels(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : mode_fct_logi(px, po + pst[gr]-1, pgs[gr], M, 0, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : mode_int(px, po + pst[gr]-1, pgs[gr], 0, narm, ret); } break; } case LGLSXP: { int *px = LOGICAL(x), *pres = LOGICAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_LOGICAL : mode_fct_logi(px, po + pst[gr]-1, pgs[gr], 1, 0, narm, ret); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pres = SEXPPTR(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_STRING : mode_string(px, po + pst[gr]-1, pgs[gr], 0, narm, ret); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } copyMostAttrib(x, res); UNPROTECT(1); return res; } SEXP w_mode_g_impl(SEXP x, double *pw, int ng, int *pgs, int *po, int *pst, int sorted, int narm, int ret, int nthreads) { int l = length(x), tx = TYPEOF(x); if(nthreads > ng) nthreads = ng; SEXP res = PROTECT(allocVector(tx, ng)); if(sorted) { // Sorted: could compute cumulative group size (= starts) on the fly... but doesn't work multithreaded... po = &l; switch(tx) { case REALSXP: { double *px = REAL(x), *pres = REAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_REAL : w_mode_double(px + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, narm, ret); break; } case INTSXP: { int *px = INTEGER(x), *pres = INTEGER(res); if(isFactor(x) && nlevels(x) < l / ng * 3) { int M = nlevels(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : w_mode_fct_logi(px + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], M, 1, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : w_mode_int(px + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, narm, ret); } break; } case LGLSXP: { int *px = LOGICAL(x), *pres = LOGICAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_LOGICAL : w_mode_fct_logi(px + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, 1, narm, ret); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pres = SEXPPTR(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_STRING : w_mode_string(px + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, narm, ret); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } else { // Not sorted. Perhaps reordering x is faster?? switch(tx) { case REALSXP: { double *px = REAL(x), *pres = REAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_REAL : w_mode_double(px, pw, po + pst[gr]-1, pgs[gr], 0, narm, ret); break; } case INTSXP: { int *px = INTEGER(x), *pres = INTEGER(res); if(isFactor(x) && nlevels(x) < l / ng * 3) { int M = nlevels(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : w_mode_fct_logi(px, pw, po + pst[gr]-1, pgs[gr], M, 0, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_INTEGER : w_mode_int(px, pw, po + pst[gr]-1, pgs[gr], 0, narm, ret); } break; } case LGLSXP: { int *px = LOGICAL(x), *pres = LOGICAL(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_LOGICAL : w_mode_fct_logi(px, pw, po + pst[gr]-1, pgs[gr], 1, 0, narm, ret); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pres = SEXPPTR(res); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? NA_STRING : w_mode_string(px, pw, po + pst[gr]-1, pgs[gr], 0, narm, ret); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } copyMostAttrib(x, res); UNPROTECT(1); return res; } // Functions for Export -------------------------------------------------------- SEXP fmodeC(SEXP x, SEXP g, SEXP w, SEXP Rnarm, SEXP Rret, SEXP Rnthreads) { int nullg = isNull(g), nullw = isNull(w), l = length(x), nprotect = 0; if(l <= 1) return x; if(nullg && nullw) return mode_impl(x, asLogical(Rnarm), asInteger(Rret)); double tmp = 0.0, *restrict pw = &tmp; if(!nullw) { if(length(w) != l) error("length(w) must match length(x)"); if(TYPEOF(w) != REALSXP) { if(!(TYPEOF(w) == INTSXP || TYPEOF(w) == LGLSXP)) error("weights need to be double or integer/logical (internally coerced to double)"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } pw = REAL(w); } if(nullg) { // if(TYPEOF(w) != REALSXP) UNPROTECT(nprotect); return w_mode_impl(x, pw, asLogical(Rnarm), asInteger(Rret)); } if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP *restrict pg = SEXPPTR_RO(g), o = pg[6]; int sorted = LOGICAL(pg[5])[1] == 1, ng = INTEGER(pg[0])[0], *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst, nthreads = asInteger(Rnthreads); if(l != length(pg[1])) error("length(g) must match length(x)"); if(isNull(o)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; pst = cgs + 1; if(sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(l, sizeof(int)); --po; for(int i = 0; i != l; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; ++po; R_Free(count); } } else { po = INTEGER(o); pst = INTEGER(getAttrib(o, sym_starts)); } // if(nullw) return mode_g_impl(x, ng, pgs, po, pst, sorted, asLogical(Rnarm), asInteger(Rret), asInteger(Rnthreads)); // if(TYPEOF(w) != REALSXP) UNPROTECT(nprotect); // return w_mode_g_impl(x, pw, ng, pgs, po, pst, sorted, asLogical(Rnarm), asInteger(Rret), asInteger(Rnthreads)); // Thomas Kalibera Patch: if(nthreads > max_threads) nthreads = max_threads; SEXP res; if(nullw) res = mode_g_impl(x, ng, pgs, po, pst, sorted, asLogical(Rnarm), asInteger(Rret), nthreads); else res = w_mode_g_impl(x, pw, ng, pgs, po, pst, sorted, asLogical(Rnarm), asInteger(Rret), nthreads); UNPROTECT(nprotect); return res; } // TODO: allow column-level parallelism?? SEXP fmodelC(SEXP x, SEXP g, SEXP w, SEXP Rnarm, SEXP Rret, SEXP Rnthreads) { int nullg = isNull(g), nullw = isNull(w), l = length(x), ng = 0, nprotect = 1, narm = asLogical(Rnarm), ret = asInteger(Rret), nthreads = asInteger(Rnthreads); if(l < 1) return x; if(nthreads > max_threads) nthreads = max_threads; SEXP out = PROTECT(allocVector(VECSXP, l)), *restrict pout = SEXPPTR(out); const SEXP *restrict px = SEXPPTR_RO(x); if(nullg && nthreads > l) nthreads = l; if(nullg && nullw) { if(nthreads <= 1) { for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, mode_impl(px[j], narm, ret)); } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) pout[j] = mode_impl_plain(px[j], narm, ret); for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); // Not thread safe and thus taken out... } } else { int nrx = length(px[0]); double tmp = 0.0, *restrict pw = &tmp; if(!nullw) { if(length(w) != nrx) error("length(w) must match nrow(x)"); if(TYPEOF(w) != REALSXP) { if(!(TYPEOF(w) == INTSXP || TYPEOF(w) == LGLSXP)) error("weights need to be double or integer/logical (internally coerced to double)"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } pw = REAL(w); } if(nullg) { if(nthreads <= 1) { for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, w_mode_impl(px[j], pw, narm, ret)); } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) pout[j] = w_mode_impl_plain(px[j], pw, narm, ret); for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); // Not thread safe and thus taken out... } } else { if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP *restrict pg = SEXPPTR_RO(g), o = pg[6]; ng = INTEGER(pg[0])[0]; int sorted = LOGICAL(pg[5])[1] == 1, *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst; if(nrx != length(pg[1])) error("length(g) must match nrow(x)"); if(isNull(o)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; pst = cgs + 1; if(sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(nrx, sizeof(int)); --po; for(int i = 0; i != nrx; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; ++po; R_Free(count); } } else { po = INTEGER(o); pst = INTEGER(getAttrib(o, sym_starts)); } if(nullw) { // Parallelism at sub-column level for(int j = 0; j < l; ++j) SET_VECTOR_ELT(out, j, mode_g_impl(px[j], ng, pgs, po, pst, sorted, narm, ret, nthreads)); } else { // Parallelism at sub-column level for(int j = 0; j < l; ++j) SET_VECTOR_ELT(out, j, w_mode_g_impl(px[j], pw, ng, pgs, po, pst, sorted, narm, ret, nthreads)); } } } DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } SEXP fmodemC(SEXP x, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rret, SEXP Rnthreads) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], narm = asLogical(Rnarm), ret = asInteger(Rret), nthreads = asInteger(Rnthreads), nullg = isNull(g), nullw = isNull(w), nprotect = 1; if(l <= 1) return x; // Prevents seqfault for numeric(0) #101 if(nthreads > max_threads) nthreads = max_threads; if(nthreads > col) nthreads = col; double tmp = 0.0, *restrict pw = &tmp; if(!nullw) { if(length(w) != l) error("length(w) must match nrow(x)"); if(TYPEOF(w) != REALSXP) { if(!(TYPEOF(w) == INTSXP || TYPEOF(w) == LGLSXP)) error("weights need to be double or integer/logical (internally coerced to double)"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } pw = REAL(w); } if(nullg) { SEXP res = PROTECT(allocVector(tx, col)); switch(tx) { case REALSXP: { double *px = REAL(x), *restrict pres = REAL(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = mode_double(px + j*l, &l, l, 1, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = w_mode_double(px + j*l, pw, &l, l, 1, narm, ret); } break; } case INTSXP: { // Factor matrix not well defined object... int *px = INTEGER(x), *restrict pres = INTEGER(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = mode_int(px + j*l, &l, l, 1, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = w_mode_int(px + j*l, pw, &l, l, 1, narm, ret); } break; } case LGLSXP: { int *px = LOGICAL(x), *restrict pres = LOGICAL(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = mode_fct_logi(px + j*l, &l, l, 1, 1, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = w_mode_fct_logi(px + j*l, pw, &l, l, 1, 1, narm, ret); } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *restrict pres = SEXPPTR(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = mode_string(px + j*l, &l, l, 1, narm, ret); } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = w_mode_string(px + j*l, pw, &l, l, 1, narm, ret); } break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } matCopyAttr(res, x, Rdrop, /*ng=*/0); UNPROTECT(nprotect); return res; } // With groups if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP *restrict pg = SEXPPTR_RO(g), o = pg[6]; int sorted = LOGICAL(pg[5])[1] == 1, ng = INTEGER(pg[0])[0], *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst, gl = length(pg[1]); if(l != gl) error("length(g) must match nrow(x)"); SEXP res = PROTECT(allocVector(tx, ng * col)); if(isNull(o)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; pst = cgs + 1; if(sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(l, sizeof(int)); --po; for(int i = 0; i != l; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; ++po; R_Free(count); } } else { po = INTEGER(o); pst = INTEGER(getAttrib(o, sym_starts)); } if(sorted) { // Sorted switch(tx) { case REALSXP: { double *px = REAL(x), *restrict pres = REAL(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; double *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_REAL : mode_double(pxj + pst[gr]-1, po, pgs[gr], 1, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; double *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_REAL : w_mode_double(pxj + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, narm, ret); } } break; } case INTSXP: { // Factor matrix not well defined object... int *px = INTEGER(x), *restrict pres = INTEGER(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int *pxj = px + j * l, jng = j * ng; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_INTEGER : mode_int(pxj + pst[gr]-1, po, pgs[gr], 1, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int *pxj = px + j * l, jng = j * ng; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_INTEGER : w_mode_int(pxj + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, narm, ret); } } break; } case LGLSXP: { int *px = LOGICAL(x), *restrict pres = LOGICAL(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int *pxj = px + j * l, jng = j * ng; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_LOGICAL : mode_fct_logi(pxj + pst[gr]-1, po, pgs[gr], 1, 1, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int *pxj = px + j * l, jng = j * ng; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_LOGICAL : w_mode_fct_logi(pxj + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, 1, narm, ret); } } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *restrict pres = SEXPPTR(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; const SEXP *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_STRING : mode_string(pxj + pst[gr]-1, po, pgs[gr], 1, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; const SEXP *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_STRING : w_mode_string(pxj + pst[gr]-1, pw + pst[gr]-1, po, pgs[gr], 1, narm, ret); } } break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } else { // Not sorted switch(tx) { case REALSXP: { double *px = REAL(x), *restrict pres = REAL(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; double *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_REAL : mode_double(pxj, po + pst[gr]-1, pgs[gr], 0, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; double *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_REAL : w_mode_double(pxj, pw, po + pst[gr]-1, pgs[gr], 0, narm, ret); } } break; } case INTSXP: { int *px = INTEGER(x), *restrict pres = INTEGER(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng, *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_INTEGER : mode_int(pxj, po + pst[gr]-1, pgs[gr], 0, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng, *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_INTEGER : w_mode_int(pxj, pw, po + pst[gr]-1, pgs[gr], 0, narm, ret); } } break; } case LGLSXP: { int *px = LOGICAL(x), *restrict pres = LOGICAL(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng, *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_LOGICAL : mode_fct_logi(pxj, po + pst[gr]-1, pgs[gr], 1, 0, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng, *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_LOGICAL : w_mode_fct_logi(pxj, pw, po + pst[gr]-1, pgs[gr], 1, 0, narm, ret); } } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *restrict pres = SEXPPTR(res); if(nullw) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; const SEXP *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_STRING : mode_string(pxj, po + pst[gr]-1, pgs[gr], 0, narm, ret); } } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; const SEXP *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? NA_STRING : w_mode_string(pxj, pw, po + pst[gr]-1, pgs[gr], 0, narm, ret); } } break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } matCopyAttr(res, x, Rdrop, ng); UNPROTECT(nprotect); return res; } collapse/src/small_helper.c0000644000176200001440000007676515202504365015505 0ustar liggesusers#include "collapse_c.h" #include "data.table.h" // #ifndef USE_RINTERNALS // #define USE_RINTERNALS // #endif // #include "base_radixsort.h" #include void matCopyAttr(SEXP out, SEXP x, SEXP Rdrop, int ng) { SEXP dn = getAttrib(x, R_DimNamesSymbol); SEXP cn = isNull(dn) ? R_NilValue : VECTOR_ELT(dn, 1); // PROTECT ?? if(ng == 0 && asLogical(Rdrop)) { if(length(cn)) setAttrib(out, R_NamesSymbol, cn); } else { int nprotect = 1; SEXP dim = PROTECT(duplicate(getAttrib(x, R_DimSymbol))); INTEGER(dim)[0] = ng == 0 ? 1 : ng; dimgets(out, dim); if(length(cn)) { ++nprotect; SEXP dn = PROTECT(allocVector(VECSXP, 2)); SET_VECTOR_ELT(dn, 0, R_NilValue); SET_VECTOR_ELT(dn, 1, cn); dimnamesgets(out, dn); } if(!isObject(x)) copyMostAttrib(x, out); UNPROTECT(nprotect); } } void DFcopyAttr(SEXP out, SEXP x, int ng) { SHALLOW_DUPLICATE_ATTRIB(out, x); if(isObject(x)) { // No attributes for plain lists if(ng == 0) { setAttrib(out, R_RowNamesSymbol, ScalarInteger(1)); } else { SEXP rn = PROTECT(allocVector(INTSXP, 2)); // Needed here, now unsafe to pass uninitialized vectors to R_RowNamesSymbol. INTEGER(rn)[0] = NA_INTEGER; INTEGER(rn)[1] = -ng; setAttrib(out, R_RowNamesSymbol, rn); UNPROTECT(1); } } } // Faster than rep_len(value, n) and slightly faster than matrix(value, n) (which in turn is faster than rep_len)... SEXP falloc(SEXP value, SEXP n, SEXP simplify) { int l = asInteger(n), tval = TYPEOF(value), isat = isVectorAtomic(value); if((length(value) > 1 && isat) || asLogical(simplify) == 0) { isat = 0; tval = VECSXP; } SEXP out = PROTECT(allocVector(isat ? tval : VECSXP, l)); switch(tval) { case INTSXP: case LGLSXP: { int val = asInteger(value), *pout = INTEGER(out); if(val == 0) memset(pout, 0, l*sizeof(int)); else for(int i = 0; i != l; ++i) pout[i] = val; break; } case REALSXP: { double val = asReal(value), *pout = REAL(out); if(val == 0.0) memset(pout, 0, l*sizeof(double)); else for(int i = 0; i != l; ++i) pout[i] = val; break; } case STRSXP: { SEXP val = asChar(value), *pout = SEXPPTR(out); for(int i = 0; i != l; ++i) pout[i] = val; break; } case CPLXSXP: { Rcomplex val = asComplex(value), *pout = COMPLEX(out); for(int i = 0; i != l; ++i) pout[i] = val; break; } case RAWSXP: { Rbyte val = RAW(value)[0], *pout = RAW(out); for(int i = 0; i != l; ++i) pout[i] = val; break; } default: { SEXP *pout = SEXPPTR(out); if(asLogical(simplify) && tval == VECSXP && length(value) == 1) value = VECTOR_ELT(value, 0); for(int i = 0; i != l; ++i) pout[i] = value; break; } } if(isat) copyMostAttrib(value, out); UNPROTECT(1); return out; } SEXP groups2GRP(SEXP x, SEXP lx, SEXP gs) { int l = length(x); SEXP out = PROTECT(allocVector(INTSXP, asInteger(lx))); int *pout = INTEGER(out)-1, *pgs = INTEGER(gs); // SEXP *px = VECTOR_PTR(x); // -> Depreciated interface: https://github.com/hadley/r-internals/blob/ea892fa79bbffe961e78dbe9c90ce4ca3bf2d9bc/vectors.md // Matt Dowle Commented: // VECTOR_PTR does exist but returns 'not safe to return vector pointer' when USE_RINTERNALS is not defined. // VECTOR_DATA and LIST_POINTER exist too but call VECTOR_PTR. All are clearly not intended to be used by packages. // The concern is overhead inside VECTOR_ELT() biting when called repetitively in a loop like we do here. That's why // we take the R API (INTEGER()[i], REAL()[i], etc) outside loops for the simple types even when not parallel. For this // type list case (VECSXP) it might be that some items are ALTREP for example, so we really should use the heavier // _ELT accessor (VECTOR_ELT) inside the loop in this case. const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) { // This can go in any direction.. // SEXP column = VECTOR_ELT(x, j); int *pcolumn = INTEGER(px[j]), jp = j+1; for(int i = pgs[j]; i--; ) pout[pcolumn[i]] = jp; // This can go in any direction... } UNPROTECT(1); return out; } // Note: Only supports numeric data!!!! SEXP lassign(SEXP x, SEXP s, SEXP rows, SEXP fill) { int l = length(x), tr = TYPEOF(rows), ss = asInteger(s), rs = LENGTH(rows); SEXP out = PROTECT(allocVector(VECSXP, l)); // SEXP *px = VECTOR_PTR(x); // -> Depreciated interface: https://github.com/hadley/r-internals/blob/ea892fa79bbffe961e78dbe9c90ce4ca3bf2d9bc/vectors.md const SEXP *px = SEXPPTR_RO(x); double dfill = asReal(fill); if(tr == INTSXP) { int *rowsv = INTEGER(rows); //, vs = ss * sizeof(double); for(int j = l; j--; ) { SEXP column = px[j]; // VECTOR_ELT(x, j); if(length(column) != rs) error("length(rows) must match nrow(x)"); SEXP outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ss)); double *pcolumn = REAL(column), *poutj = REAL(outj); // memset(poutj, dfill, vs); // cannot memset missing values... can only memset 0 for(int i = ss; i--; ) poutj[i] = dfill; for(int i = 0; i != rs; ++i) poutj[rowsv[i]-1] = pcolumn[i]; SHALLOW_DUPLICATE_ATTRIB(outj, column); } } else if(tr == LGLSXP) { int *rowsv = LOGICAL(rows); if(ss != rs) error("length(rows) must match length(s) if rows is a logical vector"); for(int j = l; j--; ) { SEXP column = px[j]; // VECTOR_ELT(x, j); SEXP outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ss)); double *pcolumn = REAL(column), *poutj = REAL(outj); for(int i = 0, k = 0; i != rs; ++i) poutj[i] = rowsv[i] ? pcolumn[k++] : dfill; SHALLOW_DUPLICATE_ATTRIB(outj, column); } } else error("rows must be positive integers or a logical vector"); SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } SEXP gwhich_first(SEXP x, SEXP g, SEXP target) { if(!inherits(g, "GRP")) error("Internal error: g must be an object of class 'GRP'."); const int ng = asInteger(VECTOR_ELT(g, 0)), *pg = INTEGER_RO(VECTOR_ELT(g, 1)), l = length(VECTOR_ELT(g, 1)); if(l != length(x)) error("length(x) must match length(g)."); if(ng != length(target)) error("length(target) must match number of groups."); if(TYPEOF(x) != TYPEOF(target)) error("x is of type %s whereas target is of type %s.", type2char(TYPEOF(x)), type2char(TYPEOF(target))); SEXP res = PROTECT(allocVector(INTSXP, ng)); if(ng == 0) { UNPROTECT(1); return res; } memset(INTEGER(res), 0, ng*sizeof(int)); int *pres = INTEGER(res)-1; switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { const int *px = INTEGER_RO(x), *pt = INTEGER_RO(target)-1; for(int i = 0; i != l; ++i) if(pres[pg[i]] == 0 && px[i] == pt[pg[i]]) pres[pg[i]] = i+1; break; } case REALSXP: { const double *px = REAL_RO(x), *pt = REAL_RO(target)-1; for(int i = 0; i != l; ++i) if(pres[pg[i]] == 0 && px[i] == pt[pg[i]]) pres[pg[i]] = i+1; break; } case STRSXP: { const SEXP *px = STRING_PTR_RO(x), *pt = STRING_PTR_RO(target)-1; for(int i = 0; i != l; ++i) if(pres[pg[i]] == 0 && px[i] == pt[pg[i]]) pres[pg[i]] = i+1; break; } default: error("Unsupported type %s", type2char(TYPEOF(x))); } UNPROTECT(1); return res; } SEXP gslice_multi(SEXP g, SEXP o, SEXP Rn, SEXP first) { if(!inherits(g, "GRP")) error("Internal error: g must be an object of class 'GRP'."); const int n = asInteger(Rn), ng = asInteger(VECTOR_ELT(g, 0)), l = length(VECTOR_ELT(g, 1)), *pg = INTEGER_RO(VECTOR_ELT(g, 1)), *pgs = INTEGER_RO(VECTOR_ELT(g, 2)); int lvec = 0; #pragma omp simd reduction(+:lvec) for(int i = 0; i < ng; ++i) lvec += n <= pgs[i] ? n : pgs[i]; SEXP res = PROTECT(allocVector(INTSXP, lvec)); int *sizes = (int*)R_Calloc(ng+1, int); int *pres = INTEGER(res); if(isNull(o)) { if(asLogical(first)) { for(int i = 0, k = 0; i != l; ++i) if(n > sizes[pg[i]]++) pres[k++] = i+1; } else { for(int i = l, k = lvec; i--; ) if(n > sizes[pg[i]]++) pres[--k] = i+1; } } else { if(length(o) != l) error("length(o) must match length(g)"); const int *po = INTEGER(o); if(asLogical(first)) { for(int i = 0, k = 0; i != l; ++i) if(n > sizes[pg[po[i]-1]]++) pres[k++] = po[i]; } else { for(int i = l, k = lvec; i--; ) if(n > sizes[pg[po[i]-1]]++) pres[--k] = po[i]; } } R_Free(sizes); UNPROTECT(1); return res; } // SEXP CasChar(SEXP x) { // return coerceVector(x, STRSXP); // } /* Inspired by: * do_list2env : .Internal(list2env(x, envir)) */ SEXP multiassign(SEXP lhs, SEXP rhs, SEXP envir) { if(TYPEOF(lhs) != STRSXP) error("lhs needs to be character"); int n = length(lhs); if(n == 1) { // lazy_duplicate appears not necessary (copy-on modify is automatically implemented, and <- also does not use it). SEXP nam = installChar(STRING_ELT(lhs, 0)); defineVar(nam, rhs, envir); return R_NilValue; } if(length(rhs) != n) error("length(lhs) must be equal to length(rhs)"); const SEXP *plhs = SEXPPTR_RO(lhs); switch(TYPEOF(rhs)) { // installTrChar translates to native encoding, installChar does the same now, but also is available on older systems. case REALSXP: { double *prhs = REAL(rhs); for(int i = 0; i < n; ++i) { SEXP nam = installChar(plhs[i]); defineVar(nam, ScalarReal(prhs[i]), envir); } break; } case INTSXP: { int *prhs = INTEGER(rhs); for(int i = 0; i < n; ++i) { SEXP nam = installChar(plhs[i]); defineVar(nam, ScalarInteger(prhs[i]), envir); } break; } case STRSXP: { const SEXP *prhs = SEXPPTR_RO(rhs); for(int i = 0; i < n; ++i) { SEXP nam = installChar(plhs[i]); defineVar(nam, ScalarString(prhs[i]), envir); } break; } case LGLSXP: { int *prhs = LOGICAL(rhs); for(int i = 0; i < n; ++i) { SEXP nam = installChar(plhs[i]); defineVar(nam, ScalarLogical(prhs[i]), envir); } break; } case VECSXP: { // lazy_duplicate appears not necessary (copy-on modify is automatically implemented, and <- also does not use it). for(int i = 0; i < n; ++i) { SEXP nam = installChar(plhs[i]); defineVar(nam, VECTOR_ELT(rhs, i), envir); } break; } default: { SEXP rhsl = PROTECT(coerceVector(rhs, VECSXP)); for(int i = 0; i < n; ++i) { SEXP nam = installChar(plhs[i]); defineVar(nam, VECTOR_ELT(rhsl, i), envir); } UNPROTECT(1); } } return R_NilValue; } SEXP vlabels(SEXP x, SEXP attrn, SEXP usenam) { if(!isString(attrn)) error("'attrn' must be of mode character"); if(length(attrn) != 1) error("exactly one attribute 'attrn' must be given"); SEXP sym_attrn = PROTECT(installChar(STRING_ELT(attrn, 0))); int l = length(x); if(TYPEOF(x) != VECSXP) { SEXP labx = getAttrib(x, sym_attrn); UNPROTECT(1); if(labx == R_NilValue) return ScalarString(NA_STRING); return labx; } SEXP res = PROTECT(allocVector(STRSXP, l)); SEXP *pres = SEXPPTR(res); const SEXP *px = SEXPPTR_RO(x); for(int i = 0; i < l; ++i) { SEXP labxi = getAttrib(px[i], sym_attrn); if(TYPEOF(labxi) == STRSXP) pres[i] = STRING_ELT(labxi, 0); else if(labxi == R_NilValue) pres[i] = NA_STRING; else { PROTECT(labxi); pres[i] = asChar(labxi); UNPROTECT(1); } } if(asLogical(usenam)) { SEXP nam = getAttrib(x, R_NamesSymbol); if(TYPEOF(nam) != NILSXP) namesgets(res, nam); } UNPROTECT(2); return res; } // Note: ind can be NULL... SEXP setvlabels(SEXP x, SEXP attrn, SEXP value, SEXP ind) { // , SEXP sc if(!isString(attrn)) error("'attrn' must be of mode character"); if(length(attrn) != 1) error("exactly one attribute 'attrn' must be given"); if(TYPEOF(x) != VECSXP) error("X must be a list"); int nprotect = 1, l = length(x), tv = TYPEOF(value); // , scl = asLogical(sc); const SEXP *px = SEXPPTR_RO(x); // , xsc; // if(scl) { // Create shallow copy // if(INHERITS(x, char_datatable)) { // xsc = PROTECT(Calloccol(x)); // } else { // xsc = PROTECT(shallow_duplicate(x)); // } // ++nprotect; // px = SEXPPTR(xsc); // } const SEXP *pv = px; if(tv != NILSXP) { if(tv == VECSXP || tv == STRSXP) { pv = SEXPPTR_RO(value); } else { SEXP vl = PROTECT(coerceVector(value, VECSXP)); pv = SEXPPTR_RO(vl); ++nprotect; } } SEXP sym_attrn = PROTECT(installChar(STRING_ELT(attrn, 0))); if(length(ind) == 0) { if(tv != NILSXP && l != length(value)) error("length(x) must match length(value)"); if(tv == NILSXP) { for(int i = 0; i < l; ++i) setAttrib(px[i], sym_attrn, R_NilValue); } else if(tv == STRSXP) { for(int i = 0; i < l; ++i) setAttrib(px[i], sym_attrn, ScalarString(pv[i])); } else { for(int i = 0; i < l; ++i) setAttrib(px[i], sym_attrn, pv[i]); } } else { if(TYPEOF(ind) != INTSXP) error("vlabels<-: ind must be of type integer"); int li = length(ind), *pind = INTEGER(ind), ii; if(tv != NILSXP && li != length(value)) error("length(ind) must match length(value)"); if(li == 0 || li > l) error("vlabels<-: length(ind) must be > 0 and <= length(x)"); if(tv == NILSXP) { for(int i = 0; i < li; ++i) { ii = pind[i]-1; if(ii < 0 || ii >= l) error("vlabels<-: ind must be between 1 and length(x)"); setAttrib(px[ii], sym_attrn, R_NilValue); } } else if(tv == STRSXP) { for(int i = 0; i < li; ++i) { ii = pind[i]-1; if(ii < 0 || ii >= l) error("vlabels<-: ind must be between 1 and length(x)"); setAttrib(px[ii], sym_attrn, ScalarString(pv[i])); } } else { for(int i = 0; i < li; ++i) { ii = pind[i]-1; if(ii < 0 || ii >= l) error("vlabels<-: ind must be between 1 and length(x)"); setAttrib(px[ii], sym_attrn, pv[i]); } } } UNPROTECT(nprotect); // return scl ? xsc : x; return x; } SEXP Cissorted(SEXP x, SEXP strictly) { return ScalarLogical(FALSE == isUnsorted(x, (Rboolean)asLogical(strictly))); } SEXP fcrosscolon(SEXP x, SEXP ngp, SEXP y, SEXP ckna) { int l = length(x), narm = asLogical(ckna); if(l != length(y)) error("length mismatch"); if(TYPEOF(x) != INTSXP) error("x needs to be integer"); if(TYPEOF(y) != INTSXP) error("y needs to be integer"); int ng = asInteger(ngp), *px = INTEGER(x), *py = INTEGER(y); if(ng > INT_MAX / 2) error("Table larger than INT_MAX/2"); if(narm) { for(int i = 0; i != l; ++i) { if(px[i] != NA_INTEGER) { if(py[i] == NA_INTEGER) px[i] = NA_INTEGER; else px[i] += (py[i] - 1) * ng; } } } else { for(int i = 0; i != l; ++i) px[i] += (py[i] - 1) * ng; } return R_NilValue; } SEXP fwtabulate(SEXP x, SEXP w, SEXP ngp, SEXP ckna) { int l = length(x), narm = asLogical(ckna), ng = asInteger(ngp), nwl = isNull(w); if(TYPEOF(x) != INTSXP) error("x needs to be integer"); // if(ng > INT_MAX/2) error("Table larger than INT_MAX/2"); SEXP tab = PROTECT(allocVector(nwl ? INTSXP : REALSXP, ng)); int *px = INTEGER(x); if(nwl) { int *ptab = INTEGER(tab); memset(ptab, 0, sizeof(int) * ng); --ptab; if(narm) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER) ++ptab[px[i]]; } else { for(int i = 0; i != l; ++i) ++ptab[px[i]]; } } else { if(length(w) != l) error("length(w) must be equal to length(x)"); double *ptab = REAL(tab); memset(ptab, 0.0, sizeof(double) * ng); --ptab; switch(TYPEOF(w)) { case REALSXP: { double *pw = REAL(w); if(narm) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER && NISNAN(pw[i])) ptab[px[i]] += pw[i]; } else { for(int i = 0; i != l; ++i) if(NISNAN(pw[i])) ptab[px[i]] += pw[i]; } break; } case INTSXP: case LGLSXP: { int *pw = INTEGER(w); if(narm) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER && pw[i] != NA_INTEGER) ptab[px[i]] += pw[i]; } else { for(int i = 0; i != l; ++i) if(pw[i] != NA_INTEGER) ptab[px[i]] += pw[i]; } break; } default: error("Unsupported weights type!"); } } UNPROTECT(1); return tab; } // --------------------------------------------------------------------------- // GRP.default with drop = FALSE: build full Cartesian product of factor levels // (and observed unique values for non-factor columns) as the grouping universe. // // X: original list / data.frame (used for class preservation on groups) // cols: list of grouping columns (already subset from X by `by`) // namby: character vector of names for the groups data.frame // retgrp_: scalar logical, whether to return the groups data.frame // // Returns list(N.groups, group.id, group.sizes, group.starts, groups). // `group.id` is 1-based and free of NAs (NAs become explicit factor levels). // `group.sizes`/`group.starts` have length Ng (= product of per-column sizes), // with 0 in `group.sizes` and 0 in `group.starts` for combinations not observed. // --------------------------------------------------------------------------- SEXP GRP_default_drop_C(SEXP X, SEXP cols, SEXP namby, SEXP retgrp_) { if(TYPEOF(cols) != VECSXP) error("Internal error: cols must be a list"); int nc = length(cols), retgrp = asLogical(retgrp_); if(nc == 0) error("GRP.default(drop = FALSE) requires at least one grouping column"); int n = length(VECTOR_ELT(cols, 0)); int nprotect = 0; SEXP codes_list = PROTECT(allocVector(VECSXP, nc)); ++nprotect; // per-col 1-based codes SEXP levs_list = PROTECT(allocVector(VECSXP, nc)); ++nprotect; // per-col level/unique-value vectors int *isfac = (int*) R_alloc(nc, sizeof(int)); int *pnl = (int*) R_alloc(nc, sizeof(int)); long long Ng_ll = 1; for(int j = 0; j < nc; ++j) { SEXP col = VECTOR_ELT(cols, j); if(length(col) != n) error("All grouping columns must have equal length"); if(isFactor(col)) { isfac[j] = 1; SEXP lev = getAttrib(col, R_LevelsSymbol); int nl = length(lev); const int *pcol = INTEGER_RO(col); // Does the factor already have an explicit NA level? Does the column contain NAs? int hasNAlev = 0, na_lev_pos = 0; for(int k = 0; k < nl; ++k) { if(STRING_ELT(lev, k) == NA_STRING) { hasNAlev = 1; na_lev_pos = k + 1; break; } } int colHasNA = 0; for(int i = 0; i < n; ++i) if(pcol[i] == NA_INTEGER) { colHasNA = 1; break; } if(colHasNA) { // Remap NA codes to an NA level (adding one if necessary). Mirrors addNA2(). int nl_new = hasNAlev ? nl : nl + 1; int na_code = hasNAlev ? na_lev_pos : nl_new; SEXP new_lev = lev; if(!hasNAlev) { new_lev = PROTECT(allocVector(STRSXP, nl_new)); ++nprotect; for(int k = 0; k < nl; ++k) SET_STRING_ELT(new_lev, k, STRING_ELT(lev, k)); SET_STRING_ELT(new_lev, nl, NA_STRING); } SEXP codes = PROTECT(allocVector(INTSXP, n)); ++nprotect; int *pcodes = INTEGER(codes); for(int i = 0; i < n; ++i) pcodes[i] = pcol[i] == NA_INTEGER ? na_code : pcol[i]; SET_VECTOR_ELT(codes_list, j, codes); SET_VECTOR_ELT(levs_list, j, new_lev); pnl[j] = nl_new; } else { // Use factor codes / levels as-is (no allocation) SET_VECTOR_ELT(codes_list, j, col); SET_VECTOR_ELT(levs_list, j, lev); pnl[j] = nl; } } else { isfac[j] = 0; // Non-factor: hash the column. groupVec returns 1-based codes; NAs are // assigned a dedicated code (na.included), so combined IDs are NA-free too. SEXP g = PROTECT(groupVec(col, retgrp ? ScalarLogical(1) : ScalarLogical(0), ScalarLogical(0))); ++nprotect; int ng = asInteger(getAttrib(g, sym_n_groups)); SET_VECTOR_ELT(codes_list, j, g); pnl[j] = ng; if(retgrp) { // Unique values in hash-encounter order = col[starts] SEXP starts = getAttrib(g, sym_starts); SEXP uvals = PROTECT(allocVector(TYPEOF(col), ng)); ++nprotect; subsetVectorRaw(uvals, col, starts, /*anyNA=*/ false); copyMostAttrib(col, uvals); SET_VECTOR_ELT(levs_list, j, uvals); } } Ng_ll *= pnl[j]; if(Ng_ll > INT_MAX) error("Total number of group combinations (%lld) exceeds INT_MAX. Consider using drop = TRUE.", Ng_ll); } int Ng = (int) Ng_ll; // --- Combined group.id via stride arithmetic (cf. fcrosscolon) --- SEXP gid = PROTECT(allocVector(INTSXP, n)); ++nprotect; int *pgid = INTEGER(gid); const int *p0 = INTEGER_RO(VECTOR_ELT(codes_list, 0)); memcpy(pgid, p0, sizeof(int) * n); long long stride = pnl[0]; for(int j = 1; j < nc; ++j) { const int *pj = INTEGER_RO(VECTOR_ELT(codes_list, j)); int s = (int) stride; for(int i = 0; i < n; ++i) pgid[i] += (pj[i] - 1) * s; stride *= pnl[j]; } // --- group.sizes (cf. fwtabulate) and group.starts (first occurrence) --- SEXP gs = PROTECT(allocVector(INTSXP, Ng)); ++nprotect; SEXP gst = PROTECT(allocVector(INTSXP, Ng)); ++nprotect; int *pgs = INTEGER(gs), *pgst = INTEGER(gst); memset(pgs, 0, sizeof(int) * Ng); memset(pgst, 0, sizeof(int) * Ng); for(int i = 0; i < n; ++i) { int g = pgid[i] - 1; ++pgs[g]; if(pgst[g] == 0) pgst[g] = i + 1; } // --- groups data.frame: enumerate every combination in column-major order --- SEXP groups = R_NilValue; if(retgrp) { PROTECT(groups = allocVector(VECSXP, nc)); ++nprotect; long long stride_j = 1; for(int j = 0; j < nc; ++j) { int nlj = pnl[j], sj = (int) stride_j; SEXP lev = VECTOR_ELT(levs_list, j); if(isfac[j]) { // Build a fresh factor with the same levels and class as the input column SEXP newcol = PROTECT(allocVector(INTSXP, Ng)); int *pnew = INTEGER(newcol); for(int g = 0; g < Ng; ++g) pnew[g] = (g / sj) % nlj + 1; setAttrib(newcol, R_LevelsSymbol, lev); SEXP origcol = VECTOR_ELT(cols, j); SEXP cls = getAttrib(origcol, R_ClassSymbol); setAttrib(newcol, R_ClassSymbol, cls); SET_VECTOR_ELT(groups, j, newcol); UNPROTECT(1); } else { // Non-factor: subset the per-column unique values by the index vector SEXP idx = PROTECT(allocVector(INTSXP, Ng)); int *pidx = INTEGER(idx); for(int g = 0; g < Ng; ++g) pidx[g] = (g / sj) % nlj + 1; SEXP newcol = PROTECT(allocVector(TYPEOF(lev), Ng)); subsetVectorRaw(newcol, lev, idx, /*anyNA=*/ false); copyMostAttrib(lev, newcol); SET_VECTOR_ELT(groups, j, newcol); UNPROTECT(2); } stride_j *= nlj; } // Names and (if applicable) class of groups data.frame: copy class etc. from X // (like C_subsetDT). Only add data.frame row.names if X is itself a data.frame, // matching the drop = TRUE behaviour for plain list inputs. namesgets(groups, namby); if(inherits(X, "data.frame")) { if(isObject(X)) copyMostAttrib(X, groups); SEXP rn = PROTECT(allocVector(INTSXP, 2)); INTEGER(rn)[0] = NA_INTEGER; INTEGER(rn)[1] = -Ng; setAttrib(groups, R_RowNamesSymbol, rn); UNPROTECT(1); } } // --- Assemble result list --- SEXP res = PROTECT(allocVector(VECSXP, 5)); ++nprotect; SET_VECTOR_ELT(res, 0, ScalarInteger(Ng)); SET_VECTOR_ELT(res, 1, gid); SET_VECTOR_ELT(res, 2, gs); SET_VECTOR_ELT(res, 3, gst); SET_VECTOR_ELT(res, 4, groups); UNPROTECT(nprotect); return res; } // Recursive function: doesn't work in C99 Standard // int fgcd(int a, int b) { // if(b == 0) return a; // else return fcgd(b, a % b); // } // https://www.datamentor.io/r-programming/examples/gcd-hcf/ // https://stackoverflow.com/questions/7500128/how-to-use-operator-for-float-values-in-c // https://www.tutorialspoint.com/find-out-the-gcd-of-two-numbers-using-while-loop-in-c-language static inline double dgcd(double a, double b) { double rem; while(b > 0.000001) // check for b>0 condition because in a % b, b should not equal to zero { rem = fmod(a, b); a = b; b = rem; } return a; } static inline int igcd(int a, int b) { int rem; while(b != 0) // check for b!=0 condition because in a % b, b should not equal to zero { rem = a % b; a = b; b = rem; } return a; } // See as_double_integer64 at https://github.com/truecluster/bit64/blob/master/src/integer64.c // static inline long long i64gcd(long long a, long long b) { // long long rem; // while(b != 0) // check for b!=0 condition because in a % b, b should not equal to zero // { // rem = a % b; // a = b; // b = rem; // } // return a; // } // Greatest common divisor of a vector of numeric values // Note that the function expects positive values only (use abs() in R beforehand) // Also best to sort values before entering this function. For example c(0.25, 0) gives 0.25, not 0 SEXP vecgcd(SEXP x) { int n = length(x); if(n == 1) return x; switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { int *px = INTEGER(x), gcd = px[0]; for(int i = 1; i < n; ++i) { if(gcd <= 1) break; gcd = igcd(px[i], gcd); } if(gcd == 0) return ScalarInteger(1); return ScalarInteger(gcd); // fixest solution: https://github.com/lrberge/fixest/blob/master/src/misc_funs.cpp // int *px = INTEGER(x), gcd = px[0], ok = 0; // for(int i = 1; i < n; ++i) if(gcd > px[i]) gcd = px[i]; // while(ok == 0 && gcd > 1) { // ok = 1; // for(int i = 0; i < n; ++i) { // if(px[i] % gcd != 0) { // gcd--; // ok = 0; // break; // } // } // } } case REALSXP: { if(inherits(x, "integer64")) error("vgcd does not support integer64. Please convert your vector to double using as.double(x)."); // if(inherits(x, "integer64")) { // long long *px = (long long *)REAL(x), gcd = px[0]; // for(int i = 1; i < n; ++i) { // if(gcd <= 1) break; // gcd = i64gcd(px[i], gcd); // } // SEXP res = gcd == 0 ? ScalarReal(1) : ScalarReal((double)gcd); // copyMostAttrib(x, res); // return res; // } // TODO: Check if double is integer? double *px = REAL(x), gcd = px[0]; for(int i = 1; i < n; ++i) { if(gcd < 0.000001) break; gcd = dgcd(px[i], gcd); } if(gcd < 0.000001) error("GCD is approximately zero"); return ScalarReal(round(gcd * 1000000) / 1000000); } default: error("Greatest Common Divisor can only be calculated with integer or numeric data"); } return R_NilValue; } // Adapted from https://github.com/wch/r-source/blob/79298c499218846d14500255efd622b5021c10ec/src/main/list.c /* The following code is used to recursive traverse a block */ /* of code and extract all the function calls present in that code. */ typedef struct { SEXP ans; int StoreValues; int ItemCounts; } FunsWalkData; static void funswalk(SEXP s, FunsWalkData *d) { SEXP name; switch(TYPEOF(s)) { case SYMSXP: name = PRINTNAME(s); if(CHAR(name)[0] != '\0') { /* skip blank symbols */ if(d->StoreValues) SET_STRING_ELT(d->ans, d->ItemCounts, name); d->ItemCounts++; } break; case LANGSXP: // https://github.com/hadley/r-internals/blob/ea892fa79bbffe961e78dbe9c90ce4ca3bf2d9bc/pairlists.md while(s != R_NilValue) { funswalk(CAR(s), d); if(TYPEOF(CADR(s)) != LANGSXP) s = CDR(s); if(TYPEOF(CADR(s)) != LANGSXP) break; s = CDR(s); } break; default: /* it seems the intention is to do nothing here! */ break; } } SEXP all_funs(SEXP x) { if(TYPEOF(x) != LANGSXP) return allocVector(STRSXP, 0); SEXP expr = x; int i, savecount; FunsWalkData data = {NULL, 0, 0}; funswalk(expr, &data); savecount = data.ItemCounts; data.ans = allocVector(STRSXP, data.ItemCounts); data.StoreValues = 1; data.ItemCounts = 0; funswalk(expr, &data); if(data.ItemCounts != savecount) { PROTECT(expr = data.ans); data.ans = allocVector(STRSXP, data.ItemCounts); for(i = 0 ; i < data.ItemCounts ; i++) SET_STRING_ELT(data.ans, i, STRING_ELT(expr, i)); UNPROTECT(1); } return data.ans; } SEXP fnrowC(SEXP x) { if(TYPEOF(x) == VECSXP) return ScalarInteger(length(x) ? length(VECTOR_ELT(x, 0)) : 0); SEXP dim = getAttrib(x, R_DimSymbol); if(TYPEOF(dim) != INTSXP) return R_NilValue; return ScalarInteger(INTEGER(dim)[0]); } // Taken from: https://github.com/r-lib/rlang/blob/main/src/internal/env.c #define CLP_FRAME_LOCK_MASK (1 << 14) #define CLP_FRAME_IS_LOCKED(e) (MYEFL(e) & CLP_FRAME_LOCK_MASK) #define CLP_UNLOCK_FRAME(e) MYSEFL(e, MYEFL(e) & (~CLP_FRAME_LOCK_MASK)) SEXP unlock_collapse_namespace(SEXP env) { if(TYPEOF(env) != ENVSXP) error("Unsupported object passed to C_unlock_collapse_namespace: %s", type2char(TYPEOF(env))); CLP_UNLOCK_FRAME(env); R_unLockBinding(install(".FAST_STAT_FUN_EXT"), env); R_unLockBinding(install(".FAST_STAT_FUN_POLD"), env); R_unLockBinding(install(".FAST_FUN_MOPS"), env); R_unLockBinding(install(".COLLAPSE_ALL_EXPORTS"), env); return CLP_FRAME_IS_LOCKED(env) == 0 ? ScalarLogical(1) : ScalarLogical(0); } SEXP integer64toREAL(SEXP x) { int n = length(x); SEXP out = PROTECT(allocVector(REALSXP, n)); double* restrict p_out = REAL(out); const int64_t *p_x = INTEGER64_PTR_RO(x); #pragma omp simd for (int i = 0; i < n; ++i) { p_out[i] = p_x[i] == NA_INTEGER64 ? NA_REAL : (double)p_x[i]; } UNPROTECT(1); return out; } SEXP funlist(SEXP x) { if(TYPEOF(x) != VECSXP) return x; int l = length(x); if(l < 1) return R_NilValue; if(l == 1) return VECTOR_ELT(x, 0); int n = 0, nt = 0, mt = 0, elem = 0, nprotect = 0; const SEXP *px = SEXPPTR_RO(x); // Sum lengths and determine maximum type int *types = (int*)R_Calloc(27, int); for(int i = 0; i < l; ++i) { n += length(px[i]); ++types[TYPEOF(px[i])]; } for(int i = 0; i < 27; ++i) { if(types[i] > 0) { mt = i; ++nt; } } R_Free(types); // If more than one type: need to coerce to largest type if(nt > 1) { SEXP y = PROTECT(allocVector(VECSXP, l)); ++nprotect; for(int i = 0; i < l; ++i) { if(TYPEOF(px[i]) == mt) { elem = i; SET_VECTOR_ELT(y, i, px[i]); } else SET_VECTOR_ELT(y, i, coerceVector(px[i], mt)); } px = SEXPPTR_RO(y); } // Now unlisting SEXP res = PROTECT(allocVector(mt, n)); ++nprotect; n = 0; switch(mt) { case INTSXP: case LGLSXP: { int *pres = INTEGER(res); for(int i = 0; i != l; ++i) { nt = length(px[i]); const int *pxi = INTEGER_RO(px[i]); for (int j = 0; j != nt; ++j) pres[n++] = pxi[j]; } break; } case REALSXP: { double *pres = REAL(res); for(int i = 0; i != l; ++i) { nt = length(px[i]); const double *pxi = REAL_RO(px[i]); for (int j = 0; j != nt; ++j) pres[n++] = pxi[j]; } break; } case STRSXP: case VECSXP: case EXPRSXP: { SEXP *pres = SEXPPTR(res); for(int i = 0; i != l; ++i) { nt = length(px[i]); const SEXP *pxi = SEXPPTR_RO(px[i]); for (int j = 0; j != nt; ++j) pres[n++] = pxi[j]; } break; } case CPLXSXP: { Rcomplex *pres = COMPLEX(res); for(int i = 0; i != l; ++i) { nt = length(px[i]); const Rcomplex *pxi = COMPLEX_RO(px[i]); for (int j = 0; j != nt; ++j) pres[n++] = pxi[j]; } break; } case RAWSXP: { Rbyte *pres = RAW(res); for(int i = 0; i != l; ++i) { nt = length(px[i]); const Rbyte *pxi = RAW_RO(px[i]); for (int j = 0; j != nt; ++j) pres[n++] = pxi[j]; } break; } default: error("unsupported type: %s", type2char(mt)); } if(isObject(px[elem])) copyMostAttrib(px[elem], res); UNPROTECT(nprotect); return res; } collapse/src/extptr.c0000644000176200001440000000147314777170131014352 0ustar liggesusers#include "collapse_c.h" static void eptrFinalizer(SEXP eptr) { if(!R_ExternalPtrAddr(eptr)) return; // R_SetExternalPtrProtected(eptr, R_NilValue); R_ClearExternalPtr(eptr); } SEXP createeptr(SEXP x) { SEXP eptr = PROTECT(R_MakeExternalPtr(x, R_NilValue, R_NilValue)); // x // Using the 'prot' or 'tag' fields includes the object in the pointer, which obscures the purpose of this which is memory efficiency. R_RegisterCFinalizerEx(eptr, eptrFinalizer, TRUE); UNPROTECT(1); return eptr; } SEXP geteptr(SEXP x) { if(TYPEOF(x) != EXTPTRSXP) return x; void * res = R_ExternalPtrAddr(x); if(!res) error("Invalid pointer to 'index': external pointers are only valid within the current R session. Please reindex() your data: data = reindex(data)"); return (SEXP)res; // return R_ExternalPtrProtected(x); } collapse/src/data.table_init.c0000644000176200001440000002173414777170131016050 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "collapse_c.h" // Needs to be first because includes OpenMP, to avoid namespace conflicts. #include "data.table.h" // #include // macros for an S-like interface to the above (no longer maintained) // #include // #include static inline int imin(int a, int b) { return a < b ? a : b; } // global constants extern in data.table.h for gcc10 -fno-common; #4091 // these are written to once here on initialization, but because of that write they can't be declared const SEXP char_integer64; SEXP char_nanotime; SEXP char_factor; SEXP char_ordered; SEXP char_dataframe; SEXP char_datatable; SEXP char_sf; SEXP sym_sorted; SEXP sym_index; SEXP sym_index_df; SEXP sym_sf_column; SEXP SelfRefSymbol; SEXP sym_datatable_locked; // SEXP sym_inherits; // SEXP char_starts; // SEXP sym_collapse_DT_alloccol; SEXP sym_label; SEXP sym_starts; SEXP sym_maxgrpn; SEXP sym_n_groups; SEXP sym_group_sizes; int max_threads; double NA_INT64_D; long long NA_INT64_LL; Rcomplex NA_CPLX; size_t sizes[100]; // max appears to be FUNSXP = 99, see Rinternals.h size_t typeorder[100]; // -> Needed for SIZEOF macro used in rbindlist Howver TYPEORDER macro and typeof is not used... static void setSizes(void) { for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=0; } // only these types are currently allowed as column types : sizes[LGLSXP] = sizeof(int); typeorder[LGLSXP] = 0; sizes[RAWSXP] = sizeof(Rbyte); typeorder[RAWSXP] = 1; sizes[INTSXP] = sizeof(int); typeorder[INTSXP] = 2; // integer and factor sizes[REALSXP] = sizeof(double); typeorder[REALSXP] = 3; // numeric and integer64 sizes[CPLXSXP] = sizeof(Rcomplex); typeorder[CPLXSXP] = 4; sizes[STRSXP] = sizeof(SEXP *); typeorder[STRSXP] = 5; sizes[VECSXP] = sizeof(SEXP *); typeorder[VECSXP] = 6; // list column if (sizeof(char *)>8) error("Pointers are %d bytes, greater than 8. We have not tested on any architecture greater than 64bit yet.", (int)sizeof(char *)); // One place we need the largest sizeof is the working memory malloc in reorder.c } // before it was SEXP attribute_visible SEXP collapse_init(SEXP mess) // void SEXP mess DllInfo *info // relies on pkg/src/Makevars to mv data.table.so to datatable.so { // R_registerRoutines(info, NULL, callMethods, NULL, externalMethods); // R_useDynamicSymbols(info, FALSE); setSizes(); const char *msg = "... failed. Please forward this message to maintainer('collapse')."; if ((int)NA_INTEGER != (int)INT_MIN) error("Checking NA_INTEGER [%d] == INT_MIN [%d] %s", NA_INTEGER, INT_MIN, msg); if ((int)NA_INTEGER != (int)NA_LOGICAL) error("Checking NA_INTEGER [%d] == NA_LOGICAL [%d] %s", NA_INTEGER, NA_LOGICAL, msg); if (sizeof(int) != 4) error("Checking sizeof(int) [%d] is 4 %s", (int)sizeof(int), msg); if (sizeof(double) != 8) error("Checking sizeof(double) [%d] is 8 %s", (int)sizeof(double), msg); // 8 on both 32bit and 64bit // alignof not available in C99: if (alignof(double) != 8) error("Checking alignof(double) [%d] is 8 %s", alignof(double), msg); // 8 on both 32bit and 64bit if (sizeof(long long) != 8) error("Checking sizeof(long long) [%d] is 8 %s", (int)sizeof(long long), msg); if (sizeof(char *) != 4 && sizeof(char *) != 8) error("Checking sizeof(pointer) [%d] is 4 or 8 %s", (int)sizeof(char *), msg); if (sizeof(SEXP) != sizeof(char *)) error("Checking sizeof(SEXP) [%d] == sizeof(pointer) [%d] %s", (int)sizeof(SEXP), (int)sizeof(char *), msg); if (sizeof(uint64_t) != 8) error("Checking sizeof(uint64_t) [%d] is 8 %s", (int)sizeof(uint64_t), msg); if (sizeof(int64_t) != 8) error("Checking sizeof(int64_t) [%d] is 8 %s", (int)sizeof(int64_t), msg); if (sizeof(signed char) != 1) error("Checking sizeof(signed char) [%d] is 1 %s", (int)sizeof(signed char), msg); if (sizeof(int8_t) != 1) error("Checking sizeof(int8_t) [%d] is 1 %s", (int)sizeof(int8_t), msg); if (sizeof(uint8_t) != 1) error("Checking sizeof(uint8_t) [%d] is 1 %s", (int)sizeof(uint8_t), msg); if (sizeof(int16_t) != 2) error("Checking sizeof(int16_t) [%d] is 2 %s", (int)sizeof(int16_t), msg); if (sizeof(uint16_t) != 2) error("Checking sizeof(uint16_t) [%d] is 2 %s", (int)sizeof(uint16_t), msg); SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s", LENGTH(tmp), msg); if (TRULEN(tmp)!=0) error("Checking TRUELENGTH(allocVector(INTSXP,2)) [%d] is 0 %s", (int)TRULEN(tmp), msg); UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. // But check here anyway just to be sure, just in case this answer is right (http://stackoverflow.com/a/2952680/403310). int i = 314; memset(&i, 0, sizeof(int)); if (i != 0) error("Checking memset(&i,0,sizeof(int)); i == (int)0 %s", msg); unsigned int ui = 314; memset(&ui, 0, sizeof(unsigned int)); if (ui != 0) error("Checking memset(&ui, 0, sizeof(unsigned int)); ui == (unsigned int)0 %s", msg); double d = 3.14; memset(&d, 0, sizeof(double)); if (d != 0.0) error("Checking memset(&d, 0, sizeof(double)); d == (double)0.0 %s", msg); long double ld = 3.14; memset(&ld, 0, sizeof(long double)); if (ld != 0.0) error("Checking memset(&ld, 0, sizeof(long double)); ld == (long double)0.0 %s", msg); // Variables rather than #define for NA_INT64 to ensure correct usage; i.e. not casted NA_INT64_LL = LLONG_MIN; NA_INT64_D = LLtoD(NA_INT64_LL); if (NA_INT64_LL != DtoLL(NA_INT64_D)) error("Conversion of NA_INT64 via double failed %lld!=%lld", NA_INT64_LL, DtoLL(NA_INT64_D)); // LLONG_MIN when punned to double is the sign bit set and then all zeros in exponent and significand i.e. -0.0 // That's why we must never test for NA_INT64_D using == in double type. Must always DtoLL and compare long long types. // Assigning NA_INT64_D to a REAL is ok however. if (NA_INT64_D != 0.0) error("NA_INT64_D (negative -0.0) is not == 0.0."); if (NA_INT64_D != -0.0) error("NA_INT64_D (negative -0.0) is not ==-0.0."); if (ISNAN(NA_INT64_D)) error("ISNAN(NA_INT64_D) is TRUE but should not be"); if (isnan(NA_INT64_D)) error("isnan(NA_INT64_D) is TRUE but should not be"); NA_CPLX.r = NA_REAL; // NA_REAL is defined as R_NaReal which is not a strict constant and thus initializer {NA_REAL, NA_REAL} can't be used in .h NA_CPLX.i = NA_REAL; // https://github.com/Rdatatable/data.table/pull/3689/files#r304117234 // create needed strings in advance for speed, same techique as R_*Symbol // Following R-exts 5.9.4; paragraph and example starting "Using install ..." // either use PRINTNAME(install()) or R_PreserveObject(mkChar()) here. char_integer64 = PRINTNAME(install("integer64")); char_nanotime = PRINTNAME(install("nanotime")); // char_starts = PRINTNAME(sym_starts = install("starts")); char_factor = PRINTNAME(install("factor")); char_ordered = PRINTNAME(install("ordered")); char_dataframe = PRINTNAME(install("data.frame")); char_datatable = PRINTNAME(install("data.table")); char_sf = PRINTNAME(install("sf")); if (TYPEOF(char_integer64) != CHARSXP) { // checking one is enough in case of any R-devel changes error("PRINTNAME(install(\"integer64\")) has returned %s not %s", type2char(TYPEOF(char_integer64)), type2char(CHARSXP)); // # nocov } // create commonly used symbols, same as R_*Symbol but internal to DT // Not really for speed but to avoid leak in situations like setAttrib(DT, install(), allocVector()) where // the allocVector() can happen first and then the install() could gc and free it before it is protected // within setAttrib. Thanks to Bill Dunlap finding and reporting. Using these symbols instead of install() // avoids the gc without needing an extra PROTECT and immediate UNPROTECT after the setAttrib which would // look odd (and devs in future might be tempted to remove them). Avoiding passing install() to API calls // keeps the code neat and readable. Also see grep's added to CRAN_Release.cmd to find such calls. sym_sorted = install("sorted"); sym_index = install("index"); sym_index_df = install("index_df"); sym_sf_column = install("sf_column"); SelfRefSymbol = install(".internal.selfref"); sym_datatable_locked = install(".data.table.locked"); // sym_inherits = install("inherits"); // sym_collapse_DT_alloccol = install("collapse_DT_alloccol"); sym_label = install("label"); sym_starts = install("starts"); sym_maxgrpn = install("maxgrpn"); sym_n_groups = install("N.groups"); sym_group_sizes = install("group.sizes"); max_threads = OMP_NUM_PROCS; max_threads = imin(max_threads, OMP_THREAD_LIMIT); max_threads = imin(max_threads, OMP_MAX_THREADS); return mess; } inline long long DtoLL(double x) { union {double d; int64_t i64;} u; u.d = x; return (long long)u.i64; } inline double LLtoD(long long x) { union {double d; int64_t i64;} u; u.i64 = (int64_t)x; return u.d; } collapse/src/fcumsum.c0000644000176200001440000002474614777170131014513 0ustar liggesusers#include "collapse_c.h" void fcumsum_double_impl(double *pout, double *px, int ng, int *pg, int narm, int fill, int l) { if(ng == 0) { if(narm <= 0) { pout[0] = px[0]; for(int i = 1; i != l; ++i) pout[i] = pout[i-1] + px[i]; } else if(fill) { pout[0] = ISNAN(px[0]) ? 0.0 : px[0]; for(int i = 1; i != l; ++i) pout[i] = pout[i-1] + (ISNAN(px[i]) ? 0.0 : px[i]); } else { double last = 0; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) pout[i] = px[i]; else pout[i] = last += px[i]; } } } else { double *last = (double*)R_Calloc(ng+1, double); // Also pass pointer to function ?? if(narm <= 0) { for(int i = 0; i != l; ++i) last[pg[i]] = pout[i] = last[pg[i]] + px[i]; } else if(fill) { for(int i = 0; i != l; ++i) last[pg[i]] = pout[i] = last[pg[i]] + (ISNAN(px[i]) ? 0.0 : px[i]); } else { for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) pout[i] = px[i]; else last[pg[i]] = pout[i] = last[pg[i]] + px[i]; } } R_Free(last); } } void fcumsum_double_impl_order(double *pout, double *px, int ng, int *pg, int *po, int narm, int fill, int l) { if(ng == 0) { if(narm <= 0) { --pout; --px; pout[po[0]] = px[po[0]]; for(int i = 1; i != l; ++i) pout[po[i]] = pout[po[i-1]] + px[po[i]]; } else if(fill) { --pout; --px; pout[po[0]] = ISNAN(px[po[0]]) ? 0.0 : px[po[0]]; for(int i = 1; i != l; ++i) pout[po[i]] = pout[po[i-1]] + (ISNAN(px[po[i]]) ? 0.0 : px[po[i]]); } else { double last = 0; for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(ISNAN(px[poi])) pout[poi] = px[poi]; else pout[poi] = last += px[poi]; } } } else { double *last = (double*)R_Calloc(ng+1, double); // Also pass pointer to function ?? if(narm <= 0) { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; last[pg[poi]] = pout[poi] = last[pg[poi]] + px[poi]; } } else if(fill) { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; last[pg[poi]] = pout[poi] = last[pg[poi]] + (ISNAN(px[poi]) ? 0.0 : px[poi]); } } else { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(ISNAN(px[poi])) pout[poi] = px[poi]; else last[pg[poi]] = pout[poi] = last[pg[poi]] + px[poi]; } } R_Free(last); } } void fcumsum_int_impl(int *pout, int *px, int ng, int *pg, int narm, int fill, int l) { long long ckof; if(ng == 0) { if(narm <= 0) { int i = 1; ckof = pout[0] = px[0]; if(ckof == NA_INTEGER) { --i; ckof = 0; } for( ; i != l; ++i) { if(px[i] == NA_INTEGER) { for( ; i != l; ++i) pout[i] = NA_INTEGER; break; } pout[i] = ckof += px[i]; } } else if(fill) { ckof = pout[0] = (px[0] == NA_INTEGER) ? 0 : px[0]; for(int i = 1; i != l; ++i) { if(px[i] != NA_INTEGER) ckof += (long long)px[i]; pout[i] = (int)ckof; } } else { ckof = 0; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) pout[i] = NA_INTEGER; else pout[i] = ckof += px[i]; } } if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. Use fcumsum(as.numeric(x))."); } else { int *last = (int*)R_Calloc(ng+1, int); // Also pass pointer to function ?? if(narm <= 0) { for(int i = 0, lsi; i != l; ++i) { if(px[i] == NA_INTEGER) { pout[i] = last[pg[i]] = NA_INTEGER; continue; } lsi = last[pg[i]]; if(lsi == NA_INTEGER) pout[i] = NA_INTEGER; else { ckof = (long long)lsi + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[i]] = pout[i] = (int)ckof; } } } else if(fill) { for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) pout[i] = last[pg[i]]; else { ckof = (long long)last[pg[i]] + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[i]] = pout[i] = (int)ckof; } } } else { for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) pout[i] = NA_INTEGER; else { ckof = (long long)last[pg[i]] + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[i]] = pout[i] = (int)ckof; } } } R_Free(last); } } void fcumsum_int_impl_order(int *pout, int *px, int ng, int *pg, int *po, int narm, int fill, int l) { long long ckof; if(ng == 0) { if(narm <= 0) { int i = 1, poi; ckof = pout[po[0]-1] = px[po[0]-1]; if(ckof == NA_INTEGER) { --i; ckof = 0; } for( ; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) { for( ; i != l; ++i) pout[po[i]-1] = NA_INTEGER; break; } pout[poi] = ckof += px[poi]; } } else if(fill) { ckof = pout[po[0]-1] = (px[po[0]-1] == NA_INTEGER) ? 0 : px[po[0]-1]; for(int i = 1, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] != NA_INTEGER) ckof += (long long)px[poi]; pout[poi] = (int)ckof; } } else { ckof = 0; for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) pout[poi] = NA_INTEGER; else pout[poi] = ckof += px[poi]; } } if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. Use fcumsum(as.numeric(x))."); } else { int *last = (int*)R_Calloc(ng+1, int); // Also pass pointer to function ?? if(narm <= 0) { for(int i = 0, poi, lsi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) { pout[poi] = last[pg[poi]] = NA_INTEGER; continue; } lsi = last[pg[poi]]; if(lsi == NA_INTEGER) pout[poi] = NA_INTEGER; else { ckof = (long long)lsi + px[poi]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[poi]] = pout[poi] = (int)ckof; } } } else if(fill) { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) pout[poi] = last[pg[poi]]; else { ckof = (long long)last[pg[poi]] + px[poi]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[poi]] = pout[poi] = (int)ckof; } } } else { for(int i = 0, poi; i != l; ++i) { poi = po[i]-1; if(px[poi] == NA_INTEGER) pout[poi] = NA_INTEGER; else { ckof = (long long)last[pg[poi]] + px[poi]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); last[pg[poi]] = pout[poi] = (int)ckof; } } } R_Free(last); } } SEXP fcumsumC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), fill = asLogical(Rfill), *pg = INTEGER(g), ord = length(o) > 1, *po = ord ? INTEGER(o) : pg; if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng > 0 && l != length(g)) error("length(g) must match length(x)"); if(ord && l != length(o)) error("length(o) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, l)); switch(tx) { case REALSXP: if(ord) fcumsum_double_impl_order(REAL(out), REAL(x), ng, pg, po, narm, fill, l); else fcumsum_double_impl(REAL(out), REAL(x), ng, pg, narm, fill, l); break; case INTSXP: if(ord) fcumsum_int_impl_order(INTEGER(out), INTEGER(x), ng, pg, po, narm, fill, l); else fcumsum_int_impl(INTEGER(out), INTEGER(x), ng, pg, narm, fill, l); break; default: error("Unsupported SEXP type"); } SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } SEXP fcumsummC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], ng = asInteger(Rng), narm = asLogical(Rnarm), fill = asLogical(Rfill), *pg = INTEGER(g), ord = length(o) > 1, *po = ord ? INTEGER(o) : pg; if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng > 0 && l != length(g)) error("length(g) must match nrow(x)"); if(ord && l != length(o)) error("length(o) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, l * col)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); if(ord) for(int j = 0; j != col; ++j) fcumsum_double_impl_order(pout + j*l, px + j*l, ng, pg, po, narm, fill, l); else for(int j = 0; j != col; ++j) fcumsum_double_impl(pout + j*l, px + j*l, ng, pg, narm, fill, l); break; } case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); if(ord) for(int j = 0; j != col; ++j) fcumsum_int_impl_order(pout + j*l, px + j*l, ng, pg, po, narm, fill, l); else for(int j = 0; j != col; ++j) fcumsum_int_impl(pout + j*l, px + j*l, ng, pg, narm, fill, l); break; } default: error("Unsupported SEXP type"); } SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } SEXP fcumsumlC(SEXP x, SEXP Rng, SEXP g, SEXP o, SEXP Rnarm, SEXP Rfill) { int l = length(x); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fcumsumC(px[j], Rng, g, o, Rnarm, Rfill)); SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } collapse/src/pwnobs.cpp0000644000176200001440000002610615113725306014667 0ustar liggesusers#include using namespace Rcpp; template IntegerMatrix pwnobsmCppImpl(const Matrix& x) { int l = x.nrow(), col = x.ncol(); auto isnnanT = (RTYPE == REALSXP) ? [](typename Rcpp::traits::storage_type::type x) { return x == x; } : [](typename Rcpp::traits::storage_type::type x) { return x != Vector::get_na(); }; IntegerMatrix out = no_init_matrix(col, col); for(int j = 0; j != col; ++j) { ConstMatrixColumn colj = x( _ , j); int nj = std::count_if(colj.begin(), colj.end(), isnnanT); out(j, j) = nj; for(int k = j+1; k != col; ++k) { ConstMatrixColumn colk = x( _ , k); int njk = 0; for(int i = l; i--; ) if(isnnanT(colj[i]) && isnnanT(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? out(j, k) = out(k, j) = njk; } } Rf_dimnamesgets(out, List::create(colnames(x), colnames(x))); return out; } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } template <> IntegerMatrix pwnobsmCppImpl(const Matrix& x) { stop("Not supported SEXP type!"); } // [[Rcpp::export]] IntegerMatrix pwnobsmCpp(SEXP x){ RCPP_RETURN_MATRIX(pwnobsmCppImpl, x); } // Old / Experimental: // // inline bool nisnan(double x) { // return x == x; // } // // Not fast !!! : // // [[Rcpp::export]] // IntegerMatrix pwnobslCpp(const List& x) { // int l = x.size(); // IntegerMatrix out = no_init_matrix(l, l); // for(int j = 0; j != l; ++j) { // switch(TYPEOF(x[j])) { // case REALSXP: { // NumericVector colj = x[j]; // int nj = std::count_if(colj.begin(), colj.end(), nisnan); // int rowj = colj.size(); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(nisnan(colj[i]) && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // case INTSXP: { // IntegerVector colj = x[j]; // int rowj = colj.size(); // int nj = rowj - std::count(colj.begin(), colj.end(), NA_INTEGER); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_INTEGER && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // case STRSXP: { // CharacterVector colj = x[j]; // int rowj = colj.size(); // int nj = rowj - std::count(colj.begin(), colj.end(), NA_STRING); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_STRING && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // case LGLSXP: { // LogicalVector colj = x[j]; // int rowj = colj.size(); // int nj = rowj - std::count(colj.begin(), colj.end(), NA_LOGICAL); // out(j, j) = nj; // for(int k = j+1; k != l; ++k) { // switch(TYPEOF(x[k])) { // case REALSXP: { // NumericVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && nisnan(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case INTSXP: { // IntegerVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && colk[i] != NA_INTEGER) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case STRSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && colk[i] != NA_STRING) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // case LGLSXP: { // CharacterVector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(colj[i] != NA_LOGICAL && colk[i] != NA_LOGICAL) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // break; // } // default: stop("incompatible SEXP encountered;"); // } // } // break; // } // default: // stop("incompatible SEXP encountered;"); // } // } // out.attr("dimnames") = List::create(x.attr("names"), x.attr("names")); // return out; // } // // // [[Rcpp::export]] // IntegerMatrix pwnobslCpp(const List& x) { // int l = x.size(); // IntegerMatrix out = no_init_matrix(l, l); // for(int j = 0; j != l; ++j) { // int RTYPEj = TYPEOF(x[j]); // auto isnnanTj = (RTYPEj == REALSXP) ? [](typename Rcpp::traits::storage_type::type x) { return x == x; } : // [](typename Rcpp::traits::storage_type::type x) { return x != Vector::get_na(); }; // Vector colj = x[j]; // int nj = std::count_if(colj.begin(), colj.end(), isnnanTj); // int rowj = colj.size(); // out(j, j) = nj; // for(int k = j+1; k != col; ++k) { // int RTYPEk = TYPEOF(x[k]); // auto isnnanTk = (RTYPEk == REALSXP) ? [](typename Rcpp::traits::storage_type::type x) { return x == x; } : // [](typename Rcpp::traits::storage_type::type x) { return x != Vector::get_na(); }; // Vector colk = x[k]; // if(colk.size() != rowj) stop("All columns need to have the same length!"); // int njk = 0; // for(int i = rowj; i--; ) if(isnnanTj(colj[i]) && isnnanTk(colk[i])) ++njk; // fastest? or save logical vector with colj Non-missing? // out(j, k) = out(k, j) = njk; // } // } // out.attr("dimnames") = List::create(names(x), names(x)); // return out; // } collapse/src/match.c0000644000176200001440000012257715121644252014123 0ustar liggesusers#include "collapse_c.h" // Needs to be first because includes OpenMP, to avoid namespace conflicts. #include "data.table.h" #include "kit.h" SEXP match_single(SEXP x, SEXP table, SEXP nomatch) { // Todo: optimizations for length 1 x or table??? const int n = length(x), nt = length(table), nmv = asInteger(nomatch); if(n == 0) return allocVector(INTSXP, 0); if(nt == 0) { SEXP nmvint = PROTECT(ScalarInteger(nmv)); SEXP nint = PROTECT(ScalarInteger(n)); SEXP sint1 = PROTECT(ScalarInteger(1)); SEXP res = falloc(nmvint, nint, sint1); UNPROTECT(3); return res; } int nprotect = 1; // Allocating here. For factors there is a shorthand SEXP ans = PROTECT(allocVector(INTSXP, n)); // https://github.com/wch/r-source/blob/433b0c829018c7ad8cd6a585bf9c388f8aaae303/src/main/unique.c#L1356C4-L1356C4 if(TYPEOF(x) > STRSXP || TYPEOF(table) > STRSXP) { if(TYPEOF(x) > STRSXP) { PROTECT(x = coerceVector(x, STRSXP)); ++nprotect; } if(TYPEOF(table) > STRSXP) { PROTECT(table = coerceVector(table, STRSXP)); ++nprotect; } } int tx = TYPEOF(x), tt = TYPEOF(table); // factor is between logical and integer if(tx == LGLSXP) tx = INTSXP; else if(tx == INTSXP && isFactor(x)) tx -= 1; else if(tx == REALSXP && isObject(x) && INHERITS(x, char_integer64) && !INHERITS(table, char_integer64)) { PROTECT(x = integer64toREAL(x)); ++nprotect; } if(tt == LGLSXP) tt = INTSXP; else if(tt == INTSXP && isFactor(table)) tt -= 1; else if(tt == REALSXP && isObject(table) && INHERITS(table, char_integer64) && !INHERITS(x, char_integer64)) { PROTECT(table = integer64toREAL(table)); ++nprotect; } if(tx != tt) { if(tx < tt) { // table could be integer, double, complex, character.... if(tx == INTSXP-1) { // For factors there is a shorthand: just match the levels against table... SEXP nmvint = PROTECT(ScalarInteger(nmv)); ++nprotect; SEXP tab = PROTECT(match_single(getAttrib(x, R_LevelsSymbol), table, nmvint)); ++nprotect; int *pans = INTEGER(ans), *pt = INTEGER(tab), *px = INTEGER(x); if(inherits(x, "na.included")) { #pragma omp simd for(int i = 0; i < n; ++i) pans[i] = pt[px[i]-1]; } else { int na_ind = 0; // Need to take care of possible NA matches in table.. switch(tt) { case INTSXP: { const int *ptt = INTEGER_RO(table); for(int i = 0; i != nt; ++i) { if(ptt[i] == NA_INTEGER) { na_ind = i+1; break; } } } break; case REALSXP: { const double *ptt = REAL_RO(table); for(int i = 0; i != nt; ++i) { if(ISNAN(ptt[i])) { na_ind = i+1; break; } } } break; case STRSXP: { const SEXP *ptt = STRING_PTR_RO(table); for(int i = 0; i != nt; ++i) { if(ptt[i] == NA_STRING) { na_ind = i+1; break; } } } break; case CPLXSXP: { const Rcomplex *ptt = COMPLEX_RO(table); for(int i = 0; i != nt; ++i) { if(C_IsNA(ptt[i]) || C_IsNaN(ptt[i])) { na_ind = i+1; break; } } } break; default: error("Type %s for 'table' is not supported.", type2char(tt)); } if(na_ind == 0) na_ind = nmv; #pragma omp simd for(int i = 0; i < n; ++i) pans[i] = px[i] == NA_INTEGER ? na_ind : pt[px[i]-1]; } UNPROTECT(nprotect); return ans; } PROTECT(x = coerceVector(x, tt)); ++nprotect; // Coercing to largest common type } else { // x has a larger type than table... if(tt == INTSXP-1) { // There could be a complicated shorthand involving matching x against the levels and then replacing this by the first occurrence index PROTECT(table = asCharacterFactor(table)); ++nprotect; if(tx != STRSXP) { // Worst case: need to coerce x as well to make the match PROTECT(x = coerceVector(x, STRSXP)); ++nprotect; } } else { PROTECT(table = coerceVector(table, tx)); ++nprotect; } } } else if(tx == INTSXP-1 && tt == INTSXP-1) { // Both factors SEXP x_lev = PROTECT(getAttrib(x, R_LevelsSymbol)); ++nprotect; // Unnecessary but appeases RCHK if(!R_compute_identical(x_lev, getAttrib(table, R_LevelsSymbol), 0)) { // This is the inefficient way: coercing both to character // PROTECT(x = asCharacterFactor(x)); ++nprotect; // PROTECT(table = asCharacterFactor(table)); ++nprotect; // The efficient solution: matching the levels and regenerating table, taking zero as nomatch value here so that NA does not get matched against NA in x SEXP sint0 = PROTECT(ScalarInteger(0)); ++nprotect; SEXP tab_ilev = PROTECT(match_single(getAttrib(table, R_LevelsSymbol), x_lev, sint0)); ++nprotect; SEXP table_new = PROTECT(duplicate(table)); ++nprotect; subsetVectorRaw(table_new, tab_ilev, table, /*anyNA=*/!inherits(table, "na.included")); table = table_new; } } tx = TYPEOF(x); int K = 0, anyNA = 0; size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP || (tx == INTSXP && !isObject(x))) { bigint:; const size_t n2 = 2U * (size_t) nt; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } else if(tx == INTSXP) { // TODO: think about qG objects here... if(isFactor(x)) { tx = 1000; M = (size_t)nlevels(x) + 2; } else if(inherits(x, "qG")) { SEXP ngtab = getAttrib(table, sym_n_groups); if(isNull(ngtab)) goto bigint; int ng = asInteger(getAttrib(x, sym_n_groups)), ngt = asInteger(ngtab); if(ngt > ng) ng = ngt; M = (size_t)ng + 2; tx = 1000; } else goto bigint; anyNA = !(inherits(x, "na.included") && inherits(table, "na.included")); } else if (tx == LGLSXP) { M = 3; } else error("Type %s is not supported.", type2char(tx)); int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M int *restrict pans = INTEGER(ans); size_t id = 0; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known { const int *restrict px = INTEGER(x), *restrict pt = INTEGER(table); if(tx == 1000 && !anyNA) { // fill hash table with indices of 'table' for (int i = 0, j; i != nt; ++i) { j = pt[i]; if(h[j]) continue; h[j] = i + 1; } // look up values of x in hash table for (int i = 0, j; i != n; ++i) { j = px[i]; pans[i] = h[j] ? h[j] : nmv; } } else { // fill hash table with indices of 'table' for (int i = 0, j, k = (int)M-1; i != nt; ++i) { j = (pt[i] == NA_INTEGER) ? k : pt[i]; if(h[j]) continue; h[j] = i + 1; } // look up values of x in hash table for (int i = 0, j, k = (int)M-1; i != n; ++i) { j = (px[i] == NA_INTEGER) ? k : px[i]; pans[i] = h[j] ? h[j] : nmv; } } } break; case INTSXP: { const int *restrict px = INTEGER(x), *restrict pt = INTEGER(table); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(pt[i], K); while(h[id]) { if(pt[h[id]-1] == pt[i]) goto ibl; if(++id >= M) id = 0; } h[id] = i + 1; ibl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(px[i], K); while(h[id]) { if(pt[h[id]-1] == px[i]) { pans[i] = h[id]; goto ibl2; } if(++id >= M) id = 0; } pans[i] = nmv; ibl2:; } } break; case REALSXP: { const double *restrict px = REAL(x), *restrict pt = REAL(table); union uno tpv; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv.d = pt[i] + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(pt[h[id]-1], pt[i])) goto rbl; if(++id >= M) id = 0; } h[id] = i + 1; rbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv.d = px[i] + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(pt[h[id]-1], px[i])) { pans[i] = h[id]; goto rbl2; } if(++id >= M) id = 0; } pans[i] = nmv; rbl2:; } } break; case CPLXSXP: { const Rcomplex *restrict px = COMPLEX(x), *restrict pt = COMPLEX(table); unsigned int u; union uno tpv; Rcomplex tmp; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tmp = pt[i]; if(C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r + 0.0; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i + 0.0; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(pt[h[id]-1], pt[i])) goto cbl; if(++id >= M) id = 0; } h[id] = i + 1; cbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tmp = px[i]; if(C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r + 0.0; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i + 0.0; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(pt[h[id]-1], px[i])) { pans[i] = h[id]; goto cbl2; } if(++id >= M) id = 0; } pans[i] = nmv; cbl2:; } } break; case STRSXP: { if (need2utf8(x)) { PROTECT(x = coerceUtf8IfNeeded(x)); ++nprotect; } if (need2utf8(table)) { PROTECT(table = coerceUtf8IfNeeded(table)); ++nprotect; } const SEXP *restrict px = SEXPPTR_RO(x), *restrict pt = SEXPPTR_RO(table); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(((uintptr_t) pt[i] & 0xffffffff), K); while(h[id]) { if(pt[h[id]-1] == pt[i]) goto sbl; if(++id >= M) id = 0; } h[id] = i + 1; sbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(((uintptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(pt[h[id]-1] == px[i]) { pans[i] = h[id]; goto sbl2; } if(++id >= M) id = 0; } pans[i] = nmv; sbl2:; } } break; } R_Free(h); UNPROTECT(nprotect); return ans; } // Outsourcing the conversions to a central function SEXP coerce_single_to_equal_types(SEXP x, SEXP table) { int nprotect = 1; SEXP out = PROTECT(allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, x); SET_VECTOR_ELT(out, 1, table); // https://github.com/wch/r-source/blob/433b0c829018c7ad8cd6a585bf9c388f8aaae303/src/main/unique.c#L1356C4-L1356C4 if(TYPEOF(x) == CPLXSXP || TYPEOF(x) > STRSXP) SET_VECTOR_ELT(out, 0, coerceVector(x, STRSXP)); if(TYPEOF(table) == CPLXSXP || TYPEOF(table) > STRSXP) SET_VECTOR_ELT(out, 1, coerceVector(table, STRSXP)); x = VECTOR_ELT(out, 0); table = VECTOR_ELT(out, 1); int tx = TYPEOF(x), tt = TYPEOF(table); if(tx == LGLSXP) tx = INTSXP; else if(tx == INTSXP && isFactor(x)) tx -= 1; else if(tx == REALSXP && isObject(x) && INHERITS(x, char_integer64) && !INHERITS(table, char_integer64)) { SET_VECTOR_ELT(out, 0, integer64toREAL(x)); x = VECTOR_ELT(out, 0); } if(tt == LGLSXP) tt = INTSXP; else if(tt == INTSXP && isFactor(table)) tt -= 1; else if(tt == REALSXP && isObject(table) && INHERITS(table, char_integer64) && !INHERITS(x, char_integer64)) { SET_VECTOR_ELT(out, 1, integer64toREAL(table)); table = VECTOR_ELT(out, 1); } if(tx != tt) { if(tx > tt) { if(tt == INTSXP-1) { // TODO: could implement as in single case.. SET_VECTOR_ELT(out, 1, asCharacterFactor(table)); if(tx != STRSXP) SET_VECTOR_ELT(out, 0, coerceVector(x, STRSXP)); } else SET_VECTOR_ELT(out, 1, coerceVector(table, tx)); } else { if(tx == INTSXP-1) { // TODO: could implement as in single case.. SET_VECTOR_ELT(out, 0, asCharacterFactor(x)); if(tt != STRSXP) SET_VECTOR_ELT(out, 1, coerceVector(table, STRSXP)); } else SET_VECTOR_ELT(out, 0, coerceVector(x, tt)); } } else if(tx == INTSXP-1 && tt == INTSXP-1) { // Both factors SEXP x_lev = PROTECT(getAttrib(x, R_LevelsSymbol)); ++nprotect; // Unnecessary but appeases RCHK if(!R_compute_identical(x_lev, getAttrib(table, R_LevelsSymbol), 0)) { SEXP sint0 = PROTECT(ScalarInteger(0)); ++nprotect; SEXP tab_ilev = PROTECT(match_single(getAttrib(table, R_LevelsSymbol), x_lev, sint0)); ++nprotect; SEXP table_new; SET_VECTOR_ELT(out, 1, table_new = duplicate(table)); subsetVectorRaw(table_new, tab_ilev, table, /*anyNA=*/!inherits(table, "na.included")); // TODO: check this !! } } UNPROTECT(nprotect); return out; } SEXP coerce_to_equal_types(SEXP x, SEXP table) { if(TYPEOF(x) == VECSXP || TYPEOF(table) == VECSXP) { if(TYPEOF(x) != TYPEOF(table)) error("x and table must both be lists when one is a list"); int l = length(x); if(length(table) != l) error("lengths of x and table must be equal of both are lists"); SEXP out = PROTECT(allocVector(VECSXP, l)); for(int i = 0; i < l; i++) { SEXP xi = VECTOR_ELT(x, i); SEXP ti = VECTOR_ELT(table, i); SET_VECTOR_ELT(out, i, coerce_single_to_equal_types(xi, ti)); } UNPROTECT(1); return out; } return coerce_single_to_equal_types(x, table); } // Still See: https://www.cockroachlabs.com/blog/vectorized-hash-joiner/ SEXP match_two_vectors(SEXP x, SEXP table, SEXP nomatch) { if(TYPEOF(x) != VECSXP || TYPEOF(table) != VECSXP) error("both x and table need to be atomic vectors or lists"); const int l = length(x), lt = length(table), nmv = asInteger(nomatch); if(l == 0) return allocVector(INTSXP, 0); if(lt == 0) { SEXP nmvint = PROTECT(ScalarInteger(nmv)); SEXP lx0 = PROTECT(ScalarInteger(length(VECTOR_ELT(x, 0)))); SEXP sint1 = PROTECT(ScalarInteger(1)); SEXP res = falloc(nmvint, lx0, sint1); UNPROTECT(3); return res; } if(l != lt) error("length(n) must match length(nt)"); if(l != 2) error("Internal function match_two_vectors() only supports lists of length 2"); // Shallow copy and coercing as necessary int nprotect = 1; SEXP clist = PROTECT(coerce_to_equal_types(x, table)); const SEXP *pc = SEXPPTR_RO(clist), *pc1 = SEXPPTR_RO(pc[0]), *pc2 = SEXPPTR_RO(pc[1]); const int n = length(pc1[0]), nt = length(pc1[1]); if(n != length(pc2[0])) error("both vectors in x must have the same length"); if(nt != length(pc2[1])) error("both vectors in table must have the same length"); int K = 0; size_t M; const size_t n2 = 2U * (size_t) nt; M = 256; K = 8; while (M < n2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M SEXP ans = PROTECT(allocVector(INTSXP, n)); ++nprotect; int *restrict pans = INTEGER(ans); size_t id = 0; int t1 = TYPEOF(pc1[0]), t2 = TYPEOF(pc2[0]); if(t1 == LGLSXP) t1 = INTSXP; if(t2 == LGLSXP) t2 = INTSXP; // 6 cases: 3 same type and 3 different types if(t1 == t2) { // same type switch(t1) { case INTSXP: { const int *restrict px1 = INTEGER(pc1[0]), *restrict px2 = INTEGER(pc2[0]), *restrict pt1 = INTEGER(pc1[1]), *restrict pt2 = INTEGER(pc2[1]); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(pt1[i] + (64988430769U * pt2[i]), K); while(h[id]) { if(pt1[h[id]-1] == pt1[i] && pt2[h[id]-1] == pt2[i]) goto ibl; if(++id >= M) id = 0; } h[id] = i + 1; ibl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(px1[i] + (64988430769U * px2[i]), K); while(h[id]) { if(pt1[h[id]-1] == px1[i] && pt2[h[id]-1] == px2[i]) { pans[i] = h[id]; goto ibl2; } if(++id >= M) id = 0; } pans[i] = nmv; ibl2:; } } break; case STRSXP: { for(int i = 0; i < 2; ++i) { if(need2utf8(pc1[i])) SET_VECTOR_ELT(pc[0], i, coerceUtf8IfNeeded(pc1[i])); if(need2utf8(pc2[i])) SET_VECTOR_ELT(pc[1], i, coerceUtf8IfNeeded(pc2[i])); } const SEXP *restrict px1 = SEXPPTR_RO(pc1[0]), *restrict px2 = SEXPPTR_RO(pc2[0]), *restrict pt1 = SEXPPTR_RO(pc1[1]), *restrict pt2 = SEXPPTR_RO(pc2[1]); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(64988430769U * ((uintptr_t)pt1[i] & 0xffffffff) + ((uintptr_t)pt2[i] & 0xffffffff), K); while(h[id]) { if(pt1[h[id]-1] == pt1[i] && pt2[h[id]-1] == pt2[i]) goto sbl; if(++id >= M) id = 0; } h[id] = i + 1; sbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(64988430769U * ((uintptr_t)px1[i] & 0xffffffff) + ((uintptr_t)px2[i] & 0xffffffff), K); while(h[id]) { if(pt1[h[id]-1] == px1[i] && pt2[h[id]-1] == px2[i]) { pans[i] = h[id]; goto sbl2; } if(++id >= M) id = 0; } pans[i] = nmv; sbl2:; } } break; case REALSXP: { const double *restrict px1 = REAL(pc1[0]), *restrict px2 = REAL(pc2[0]), *restrict pt1 = REAL(pc1[1]), *restrict pt2 = REAL(pc2[1]); union uno tpv1, tpv2; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv1.d = pt1[i] + 0.0; tpv2.d = pt2[i] + 0.0; id = HASH((64988430769U * (tpv1.u[0] + tpv1.u[1])) + tpv2.u[0] + tpv2.u[1], K); while(h[id]) { if(REQUAL(pt1[h[id]-1], pt1[i]) && REQUAL(pt2[h[id]-1], pt2[i])) goto rbl; if(++id >= M) id = 0; } h[id] = i + 1; rbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv1.d = px1[i] + 0.0; tpv2.d = px2[i] + 0.0; id = HASH((64988430769U * (tpv1.u[0] + tpv1.u[1])) + tpv2.u[0] + tpv2.u[1], K); while(h[id]) { if(REQUAL(pt1[h[id]-1], px1[i]) && REQUAL(pt2[h[id]-1], px2[i])) { pans[i] = h[id]; goto rbl2; } if(++id >= M) id = 0; } pans[i] = nmv; rbl2:; } } break; default: error("Type %s is not supported.", type2char(t1)); // Should never be reached } } else { // different types // First case: integer and real if((t1 == INTSXP && t2 == REALSXP) || (t1 == REALSXP && t2 == INTSXP)) { const int rev = t1 == REALSXP; const int *restrict pxi = INTEGER(VECTOR_ELT(pc[rev], 0)), *restrict pti = INTEGER(VECTOR_ELT(pc[rev], 1)); const double *restrict pxr = REAL(VECTOR_ELT(pc[1-rev], 0)), *restrict ptr = REAL(VECTOR_ELT(pc[1-rev], 1)); union uno tpv; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv.d = ptr[i] + 0.0; id = HASH((64988430769U * pti[i]) + tpv.u[0] + tpv.u[1], K); // TODO: improve! while(h[id]) { if(pti[h[id]-1] == pti[i] && REQUAL(ptr[h[id]-1], ptr[i])) goto irbl; if(++id >= M) id = 0; } h[id] = i + 1; irbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv.d = pxr[i] + 0.0; id = HASH((64988430769U * pxi[i]) + tpv.u[0] + tpv.u[1], K); // TODO: improve! while(h[id]) { if(pti[h[id]-1] == pxi[i] && REQUAL(ptr[h[id]-1], pxr[i])) { pans[i] = h[id]; goto irbl2; } if(++id >= M) id = 0; } pans[i] = nmv; irbl2:; } // Second case: real and string } else if ((t1 == REALSXP && t2 == STRSXP) || (t1 == STRSXP && t2 == REALSXP)) { const int rev = t1 == STRSXP; const double *restrict pxr = REAL(VECTOR_ELT(pc[rev], 0)), *restrict ptr = REAL(VECTOR_ELT(pc[rev], 1)); for(int i = 0; i < 2; ++i) { if(need2utf8(VECTOR_ELT(pc[1-rev], i))) SET_VECTOR_ELT(pc[1-rev], i, coerceUtf8IfNeeded(VECTOR_ELT(pc[1-rev], i))); } const SEXP *restrict pxs = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 0)), *restrict pts = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 1)); union uno tpv; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv.d = ptr[i] + 0.0; id = HASH((tpv.u[0] + tpv.u[1]) * ((uintptr_t)pts[i] & 0xffffffff), K); while(h[id]) { if(pts[h[id]-1] == pts[i] && REQUAL(ptr[h[id]-1], ptr[i])) goto rsbl; if(++id >= M) id = 0; } h[id] = i + 1; rsbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv.d = pxr[i] + 0.0; id = HASH((tpv.u[0] + tpv.u[1]) * ((uintptr_t)pxs[i] & 0xffffffff), K); while(h[id]) { if(pts[h[id]-1] == pxs[i] && REQUAL(ptr[h[id]-1], pxr[i])) { pans[i] = h[id]; goto rsbl2; } if(++id >= M) id = 0; } pans[i] = nmv; rsbl2:; } // Third case: integer and string } else if((t1 == INTSXP && t2 == STRSXP) || (t1 == STRSXP && t2 == INTSXP)) { const int rev = t1 == STRSXP; const int *restrict pxi = INTEGER(VECTOR_ELT(pc[rev], 0)), *restrict pti = INTEGER(VECTOR_ELT(pc[rev], 1)); for(int i = 0; i < 2; ++i) { if(need2utf8(VECTOR_ELT(pc[1-rev], i))) SET_VECTOR_ELT(pc[1-rev], i, coerceUtf8IfNeeded(VECTOR_ELT(pc[1-rev], i))); } const SEXP *restrict pxs = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 0)), *restrict pts = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 1)); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(pti[i] * ((uintptr_t)pts[i] & 0xffffffff), K); // TODO: improve! while(h[id]) { if(pts[h[id]-1] == pts[i] && pti[h[id]-1] == pti[i]) goto isbl; if(++id >= M) id = 0; } h[id] = i + 1; isbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(pxi[i] * ((uintptr_t)pxs[i] & 0xffffffff), K); while(h[id]) { if(pts[h[id]-1] == pxs[i] && pti[h[id]-1] == pxi[i]) { pans[i] = h[id]; goto isbl2; } if(++id >= M) id = 0; } pans[i] = nmv; isbl2:; } } else error("Unsupported types: %s and %s", type2char(t1), type2char(t2)); } R_Free(h); UNPROTECT(nprotect); return ans; } // TODO: create match_multiple_vectors: a generalization of match_two_vectors that works for multiple vectors // This will have to involve bucketing and subgroup matching // Also idea: combine matches using the maximum before the next largest value? // This is a workhorse function for matching more than 2 vectors: it matches the first two vectors and also // saves the unique value count and a group-id for the table which is used to match further columns using the same logic void match_two_vectors_extend(const SEXP *pc, const int nmv, const int n, const int nt, const size_t M, const int K, int *ng, int *pans, int *ptab) { const SEXP *pc1 = SEXPPTR_RO(pc[0]), *pc2 = SEXPPTR_RO(pc[1]); if(n != length(pc2[0])) error("both vectors in x must have the same length"); if(nt != length(pc2[1])) error("both vectors in table must have the same length"); int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M size_t id = 0; int ngt = 0; int t1 = TYPEOF(pc1[0]), t2 = TYPEOF(pc2[0]); if(t1 == LGLSXP) t1 = INTSXP; if(t2 == LGLSXP) t2 = INTSXP; // 6 cases: 3 same type and 3 different types if(t1 == t2) { // same type switch(t1) { case INTSXP: { const int *restrict px1 = INTEGER(pc1[0]), *restrict px2 = INTEGER(pc2[0]), *restrict pt1 = INTEGER(pc1[1]), *restrict pt2 = INTEGER(pc2[1]); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(pt1[i] + (64988430769U * pt2[i]), K); while(h[id]) { if(pt1[h[id]-1] == pt1[i] && pt2[h[id]-1] == pt2[i]) { ptab[i] = ptab[h[id]-1]; goto ibl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; ibl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(px1[i] + (64988430769U * px2[i]), K); while(h[id]) { if(pt1[h[id]-1] == px1[i] && pt2[h[id]-1] == px2[i]) { pans[i] = h[id]; goto ibl2; } if(++id >= M) id = 0; } pans[i] = nmv; ibl2:; } } break; case STRSXP: { for(int i = 0; i < 2; ++i) { if(need2utf8(pc1[i])) SET_VECTOR_ELT(pc[0], i, coerceUtf8IfNeeded(pc1[i])); if(need2utf8(pc2[i])) SET_VECTOR_ELT(pc[1], i, coerceUtf8IfNeeded(pc2[i])); } const SEXP *restrict px1 = SEXPPTR_RO(pc1[0]), *restrict px2 = SEXPPTR_RO(pc2[0]), *restrict pt1 = SEXPPTR_RO(pc1[1]), *restrict pt2 = SEXPPTR_RO(pc2[1]); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(64988430769U * ((uintptr_t)pt1[i] & 0xffffffff) + ((uintptr_t)pt2[i] & 0xffffffff), K); while(h[id]) { if(pt1[h[id]-1] == pt1[i] && pt2[h[id]-1] == pt2[i]) { ptab[i] = ptab[h[id]-1]; goto sbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; sbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(64988430769U * ((uintptr_t)px1[i] & 0xffffffff) + ((uintptr_t)px2[i] & 0xffffffff), K); while(h[id]) { if(pt1[h[id]-1] == px1[i] && pt2[h[id]-1] == px2[i]) { pans[i] = h[id]; goto sbl2; } if(++id >= M) id = 0; } pans[i] = nmv; sbl2:; } } break; case REALSXP: { const double *restrict px1 = REAL(pc1[0]), *restrict px2 = REAL(pc2[0]), *restrict pt1 = REAL(pc1[1]), *restrict pt2 = REAL(pc2[1]); union uno tpv1, tpv2; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv1.d = pt1[i] + 0.0; tpv2.d = pt2[i] + 0.0; id = HASH((64988430769U * (tpv1.u[0] + tpv1.u[1])) + tpv2.u[0] + tpv2.u[1], K); while(h[id]) { if(REQUAL(pt1[h[id]-1], pt1[i]) && REQUAL(pt2[h[id]-1], pt2[i])) { ptab[i] = ptab[h[id]-1]; goto rbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; rbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv1.d = px1[i] + 0.0; tpv2.d = px2[i] + 0.0; id = HASH((64988430769U * (tpv1.u[0] + tpv1.u[1])) + tpv2.u[0] + tpv2.u[1], K); while(h[id]) { if(REQUAL(pt1[h[id]-1], px1[i]) && REQUAL(pt2[h[id]-1], px2[i])) { pans[i] = h[id]; goto rbl2; } if(++id >= M) id = 0; } pans[i] = nmv; rbl2:; } } break; default: error("Type %s is not supported.", type2char(t1)); // Should never be reached } } else { // different types // First case: integer and real if((t1 == INTSXP && t2 == REALSXP) || (t1 == REALSXP && t2 == INTSXP)) { const int rev = t1 == REALSXP; const int *restrict pxi = INTEGER(VECTOR_ELT(pc[rev], 0)), *restrict pti = INTEGER(VECTOR_ELT(pc[rev], 1)); const double *restrict pxr = REAL(VECTOR_ELT(pc[1-rev], 0)), *restrict ptr = REAL(VECTOR_ELT(pc[1-rev], 1)); union uno tpv; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv.d = ptr[i] + 0.0; id = HASH((64988430769U * pti[i]) + tpv.u[0] + tpv.u[1], K); while(h[id]) { if(pti[h[id]-1] == pti[i] && REQUAL(ptr[h[id]-1], ptr[i])) { ptab[i] = ptab[h[id]-1]; goto irbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; irbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv.d = pxr[i] + 0.0; id = HASH((64988430769U * pxi[i]) + tpv.u[0] + tpv.u[1], K); while(h[id]) { if(pti[h[id]-1] == pxi[i] && REQUAL(ptr[h[id]-1], pxr[i])) { pans[i] = h[id]; goto irbl2; } if(++id >= M) id = 0; } pans[i] = nmv; irbl2:; } // Second case: real and string } else if ((t1 == REALSXP && t2 == STRSXP) || (t1 == STRSXP && t2 == REALSXP)) { const int rev = t1 == STRSXP; const double *restrict pxr = REAL(VECTOR_ELT(pc[rev], 0)), *restrict ptr = REAL(VECTOR_ELT(pc[rev], 1)); for(int i = 0; i < 2; ++i) { if(need2utf8(VECTOR_ELT(pc[1-rev], i))) SET_VECTOR_ELT(pc[1-rev], i, coerceUtf8IfNeeded(VECTOR_ELT(pc[1-rev], i))); } const SEXP *restrict pxs = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 0)), *restrict pts = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 1)); union uno tpv; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { tpv.d = ptr[i] + 0.0; id = HASH((tpv.u[0] + tpv.u[1]) * ((uintptr_t)pts[i] & 0xffffffff), K); while(h[id]) { if(pts[h[id]-1] == pts[i] && REQUAL(ptr[h[id]-1], ptr[i])) { // TODO: which comparison is more expensive? ptab[i] = ptab[h[id]-1]; goto rsbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; rsbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { tpv.d = pxr[i] + 0.0; id = HASH((tpv.u[0] + tpv.u[1]) * ((uintptr_t)pxs[i] & 0xffffffff), K); while(h[id]) { if(pts[h[id]-1] == pxs[i] && REQUAL(ptr[h[id]-1], pxr[i])) { // TODO: which comparison is more expensive? pans[i] = h[id]; goto rsbl2; } if(++id >= M) id = 0; } pans[i] = nmv; rsbl2:; } // Third case: integer and string } else if((t1 == INTSXP && t2 == STRSXP) || (t1 == STRSXP && t2 == INTSXP)) { const int rev = t1 == STRSXP; const int *restrict pxi = INTEGER(VECTOR_ELT(pc[rev], 0)), *restrict pti = INTEGER(VECTOR_ELT(pc[rev], 1)); for(int i = 0; i < 2; ++i) { if(need2utf8(VECTOR_ELT(pc[1-rev], i))) SET_VECTOR_ELT(pc[1-rev], i, coerceUtf8IfNeeded(VECTOR_ELT(pc[1-rev], i))); } const SEXP *restrict pxs = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 0)), *restrict pts = SEXPPTR_RO(VECTOR_ELT(pc[1-rev], 1)); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { id = HASH(pti[i] * ((uintptr_t)pts[i] & 0xffffffff), K); while(h[id]) { if(pti[h[id]-1] == pti[i] && pts[h[id]-1] == pts[i]) { ptab[i] = ptab[h[id]-1]; goto isbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; isbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { id = HASH(pxi[i] * ((uintptr_t)pxs[i] & 0xffffffff), K); while(h[id]) { if(pti[h[id]-1] == pxi[i] && pts[h[id]-1] == pxs[i]) { pans[i] = h[id]; goto isbl2; } if(++id >= M) id = 0; } pans[i] = nmv; isbl2:; } } else error("Unsupported types: %s and %s", type2char(t1), type2char(t2)); } *ng = ngt; R_Free(h); // Free hash table } // Helper function to match an additional vector void match_additional(const SEXP *pcj, const int nmv, const int n, const int nt, const size_t M, const int K, int *ng, int *pans_copy, int *pans, int *ptab_copy, int *ptab) { if(length(pcj[0]) != n) error("all vectors in x must have the same length"); if(length(pcj[1]) != nt) error("all vectors in table must have the same length"); int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M size_t id = 0; const unsigned int mult = (M-1) / nt; // TODO: This faster? or better hash ans ? -> Seems faster ! but possible failures ? int ngt = 0; // Copies really needed?? memcpy(pans_copy, pans, n * sizeof(int)); memcpy(ptab_copy, ptab, nt * sizeof(int)); // TODO: Special case for factors !!!! switch(TYPEOF(pcj[0])) { case INTSXP: case LGLSXP: { const int *restrict px = INTEGER(pcj[0]), *restrict pt = INTEGER(pcj[1]); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { if(ptab_copy[i] == nmv) { ++ngt; continue; } id = (ptab_copy[i]*mult) ^ HASH(pt[i], K); // HASH(ptab_copy[i], K) while(h[id]) { if(ptab_copy[h[id]-1] == ptab_copy[i] && pt[h[id]-1] == pt[i]) { ptab[i] = ptab[h[id]-1]; goto itbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; itbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { if(pans_copy[i] == nmv) continue; id = (pans_copy[i]*mult) ^ HASH(px[i], K); // HASH(pans_copy[i], K) while(h[id]) { if(ptab_copy[h[id]-1] == pans_copy[i] && pt[h[id]-1] == px[i]) { pans[i] = h[id]; goto itbl2; } if(++id >= M) id = 0; } pans[i] = nmv; itbl2:; } } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pcj[0]))), *restrict pt = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pcj[1]))); // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { if(ptab_copy[i] == nmv) { ++ngt; continue; } id = (ptab_copy[i]*mult) ^ HASH(((uintptr_t) pt[i] & 0xffffffff), K); // HASH(ptab_copy[i], K) while(h[id]) { if(ptab_copy[h[id]-1] == ptab_copy[i] && pt[h[id]-1] == pt[i]) { ptab[i] = ptab[h[id]-1]; goto stbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; stbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { if(pans_copy[i] == nmv) continue; id = (pans_copy[i]*mult) ^ HASH(((uintptr_t) px[i] & 0xffffffff), K); // HASH(pans_copy[i], K) while(h[id]) { if(ptab_copy[h[id]-1] == pans_copy[i] && pt[h[id]-1] == px[i]) { pans[i] = h[id]; goto stbl2; } if(++id >= M) id = 0; } pans[i] = nmv; stbl2:; } UNPROTECT(2); } break; case REALSXP: { const double *restrict px = REAL(pcj[0]), *restrict pt = REAL(pcj[1]); union uno tpv; // fill hash table with indices of 'table' for (int i = 0; i != nt; ++i) { if(ptab_copy[i] == nmv) { ++ngt; continue; } tpv.d = pt[i] + 0.0; id = (ptab_copy[i]*mult) ^ HASH(tpv.u[0] + tpv.u[1], K); // HASH(ptab_copy[i], K) while(h[id]) { if(ptab_copy[h[id]-1] == ptab_copy[i] && REQUAL(pt[h[id]-1], pt[i])) { ptab[i] = ptab[h[id]-1]; goto rtbl; } if(++id >= M) id = 0; } ptab[i] = h[id] = i + 1; ++ngt; rtbl:; } // look up values of x in hash table for (int i = 0; i != n; ++i) { if(pans_copy[i] == nmv) continue; tpv.d = px[i] + 0.0; id = (pans_copy[i]*mult) ^ HASH(tpv.u[0] + tpv.u[1], K); // HASH(pans_copy[i], K) while(h[id]) { if(ptab_copy[h[id]-1] == pans_copy[i] && REQUAL(pt[h[id]-1], px[i])) { pans[i] = h[id]; goto rtbl2; } if(++id >= M) id = 0; } pans[i] = nmv; rtbl2:; } } break; default: error("Type %s is not supported.", type2char(TYPEOF(pcj[0]))); // Should never be reached } *ng = ngt; R_Free(h); // Free hash table } // This is after unique table rows have already been found, we simply need to check if the remaining columns are equal... void match_rest(const SEXP *pcj, const int nmv, const int n, const int nt, int *pans) { if(length(pcj[0]) != n) error("all vectors in x must have the same length"); if(length(pcj[1]) != nt) error("all vectors in table must have the same length"); switch(TYPEOF(pcj[0])) { case INTSXP: case LGLSXP: { const int *restrict px = INTEGER(pcj[0]), *restrict pt = INTEGER(pcj[1])-1; for (int i = 0; i != n; ++i) { if(pans[i] == nmv) continue; if(px[i] != pt[pans[i]]) pans[i] = nmv; } } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pcj[0]))), *restrict pt = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pcj[1])))-1; for (int i = 0; i != n; ++i) { if(pans[i] == nmv) continue; if(px[i] != pt[pans[i]]) pans[i] = nmv; } UNPROTECT(2); } break; case REALSXP: { const double *restrict px = REAL(pcj[0]), *restrict pt = REAL(pcj[1])-1; for (int i = 0; i != n; ++i) { if(pans[i] == nmv) continue; if(!REQUAL(px[i], pt[pans[i]])) pans[i] = nmv; } } break; default: error("Type %s is not supported.", type2char(TYPEOF(pcj[0]))); // Should never be reached } } SEXP match_multiple(SEXP x, SEXP table, SEXP nomatch, SEXP overid) { if(TYPEOF(x) != VECSXP || TYPEOF(table) != VECSXP) error("both x and table need to be atomic vectors or lists"); const int l = length(x), lt = length(table), nmv = asInteger(nomatch); if(l == 0) return allocVector(INTSXP, 0); if(lt == 0) { SEXP nmvint = PROTECT(ScalarInteger(nmv)); SEXP lx0 = PROTECT(ScalarInteger(length(VECTOR_ELT(x, 0)))); SEXP sint1 = PROTECT(ScalarInteger(1)); SEXP res = falloc(nmvint, lx0, sint1); UNPROTECT(3); return res; } if(l != lt) error("length(n) must match length(nt)"); // Shallow copy and coercing as necessary SEXP clist = PROTECT(coerce_to_equal_types(x, table)); const SEXP *pc = SEXPPTR_RO(clist); const int n = length(VECTOR_ELT(pc[0], 0)), nt = length(VECTOR_ELT(pc[0], 1)); // Determining size of hash table const size_t n2 = 2U * (size_t) nt; size_t M = 256; int K = 8; while (M < n2) { M *= 2; K++; } int *restrict ptab = (int*)R_alloc(nt, sizeof(int)); // Table to contain the group-id of table int ng = 0; // Number of groups SEXP ans = PROTECT(allocVector(INTSXP, n)); int *restrict pans = INTEGER(ans); // Initial matching two vectors match_two_vectors_extend(pc, nmv, n, nt, M, K, &ng, pans, ptab); // Early termination if table is already unique or we only have 2 vectors (should use match_two_vectors() directly) if(l > 2) { int oid = asInteger(overid); // 0 = early termination, 1 = proceed with warning, 2 = proceed without warning if(oid > 0 || ng != nt) { // Need to copy table and ans: enters as first vector int *restrict ptab_copy = (int*)R_alloc(nt, sizeof(int)); int *restrict pans_copy = (int*)R_alloc(n, sizeof(int)); for (int j = 2; j < l; ++j) { if(ng != nt) match_additional(SEXPPTR_RO(pc[j]), nmv, n, nt, M, K, &ng, pans_copy, pans, ptab_copy, ptab); else { if(oid == 1) warning("Overidentified match/join: the first %d of %d columns uniquely match the records. With overid > 0, fmatch() continues to match columns. Consider removing columns or setting overid = 0 to terminate the algorithm after %d columns (the results may differ, see ?fmatch). Alternatively set overid = 2 to silence this warning.", j, l/oid++, j); if(oid <= 0) break; match_rest(SEXPPTR_RO(pc[j]), nmv, n, nt, pans); } } } } UNPROTECT(2); return ans; } SEXP fmatch_internal(SEXP x, SEXP table, SEXP nomatch, SEXP overid) { if(TYPEOF(x) == VECSXP) { if(length(x) == 2) return match_two_vectors(x, table, nomatch); if(length(x) == 1) return match_single(VECTOR_ELT(x, 0), VECTOR_ELT(table, 0), nomatch); return match_multiple(x, table, nomatch, overid); } return match_single(x, table, nomatch); } void count_match(SEXP res, int nt, int nmv) { const int *restrict pres = INTEGER(res); int n = length(res), nd = 0, nnm = 0; int *restrict cnt = (int*)R_Calloc(nt+1, int); for (int i = 0; i != n; ++i) { if(pres[i] == nmv) ++nnm; else if(cnt[pres[i]] == 0) { cnt[pres[i]] = 1; ++nd; } } R_Free(cnt); SEXP sym_nomatch = install("N.nomatch"); SEXP sym_distinct = install("N.distinct"); setAttrib(res, sym_nomatch, ScalarInteger(nnm)); setAttrib(res, sym_n_groups, ScalarInteger(nt)); setAttrib(res, sym_distinct, ScalarInteger(nd)); classgets(res, mkString("qG")); } // This is for export SEXP fmatchC(SEXP x, SEXP table, SEXP nomatch, SEXP count, SEXP overid) { if(asLogical(count) <= 0) return fmatch_internal(x, table, nomatch, overid); SEXP res = PROTECT(fmatch_internal(x, table, nomatch, overid)); int nt = isNewList(table) ? length(VECTOR_ELT(table, 0)) : length(table); count_match(res, nt, asInteger(nomatch)); UNPROTECT(1); return res; } collapse/src/Makevars0000644000176200001440000000036314777170131014351 0ustar liggesusers## -- compiling for OpenMP PKG_CFLAGS = $($(subst OPENMP,OPENMP_CFLAGS,SHLIB_OPENMP)) PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS ## -- using C++ 11 # CXX_STD = CXX11 ## -- linking for OpenMP PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) collapse/src/data.table_rbindlist.c0000644000176200001440000011126514777170131017076 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #include "data.table.h" // from assign.c void writeNA(SEXP v, const int from, const int n) // e.g. for use after allocVector() which does not initialize its result. { const int to = from-1+n; // writing to position 2147483647 in mind, 'i<=to' in loop conditions switch(TYPEOF(v)) { case RAWSXP: memset(RAW(v)+from, 0, n*sizeof(Rbyte)); break; case LGLSXP: { int *vd = (int *)LOGICAL(v); for (int i=from; i<=to; ++i) vd[i] = NA_LOGICAL; } break; case INTSXP: { // same whether factor or not int *vd = INTEGER(v); for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER; } break; case REALSXP: { if (INHERITS(v, char_integer64)) { int64_t *vd = (int64_t *)REAL(v); for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER64; } else { double *vd = REAL(v); for (int i=from; i<=to; ++i) vd[i] = NA_REAL; } } break; case CPLXSXP: { Rcomplex *vd = COMPLEX(v); for (int i=from; i<=to; ++i) vd[i] = NA_CPLX; } break; case STRSXP: { SEXP *vd = SEXPPTR(v); for (int i=from; i<=to; ++i) vd[i] = NA_STRING; } break; case VECSXP: case EXPRSXP: for (int i=from; i<=to; ++i) SET_VECTOR_ELT(v, i, R_NilValue); break; default : error("Internal error: writeNA passed a vector of type '%s'", type2char(TYPEOF(v))); // # nocov } } // Added, to replace memrecycle void writeValue(SEXP target, SEXP source, const int from, const int n) { int tt = TYPEOF(target), coerce = TYPEOF(source) != tt, os = isObject(source), ls = LENGTH(source); if(coerce) source = PROTECT(coerceVector(source, tt)); if(LENGTH(target) < n) error("Attempting to write %d elements to a vector of length %d", n, LENGTH(target)); if(ls < n) { if(ls != 1) error("Attempting to write %d elements to a vector of length %d. All vectors in sublist should be either length 1 or %d", ls, n, n); const int to = from-1+n; // writing to position 2147483647 in mind, 'i<=to' in loop conditions switch(tt) { case RAWSXP: { Rbyte *vd = RAW(target), value = RAW(source)[0]; for (int i=from; i<=to; ++i) vd[i] = value; } break; case LGLSXP: { int *vd = (int *)LOGICAL(target), value = (int)LOGICAL(source)[0]; for (int i=from; i<=to; ++i) vd[i] = value; } break; case INTSXP: { // same whether factor or not int *vd = INTEGER(target), value = INTEGER(source)[0]; for (int i=from; i<=to; ++i) vd[i] = value; } break; case REALSXP: { if (INHERITS(target, char_integer64)) { int64_t *vd = (int64_t *)REAL(target); int64_t value = (coerce || os == 0) ? (int64_t)REAL(source)[0] : ((int64_t *)REAL(source))[0]; for (int i=from; i<=to; ++i) vd[i] = value; } else { double *vd = REAL(target), value = REAL(source)[0]; for (int i=from; i<=to; ++i) vd[i] = value; } } break; case CPLXSXP: { Rcomplex *vd = COMPLEX(target), value = COMPLEX(source)[0]; for (int i=from; i<=to; ++i) vd[i] = value; } break; case STRSXP: case VECSXP: case EXPRSXP: { SEXP *vd = SEXPPTR(target); const SEXP value = SEXPPTR_RO(source)[0]; for (int i=from; i<=to; ++i) vd[i] = value; } break; default: error("Internal error: Unsupported column type '%s'", type2char(TYPEOF(target))); } } else { switch(tt) { case INTSXP: memcpy(INTEGER(target) + from, INTEGER(source), n * sizeof(int)); break; case LGLSXP: memcpy(LOGICAL(target) + from, LOGICAL(source), n * sizeof(int)); break; case REALSXP: { if (INHERITS(target, char_integer64)) { if(coerce || os == 0) { int64_t *ptgt = (int64_t *)REAL(target) + from; const double *ptcol = REAL_RO(source); for(int i = 0; i != n; ++i) ptgt[i] = ptcol[i]; } else { memcpy((int64_t *)REAL(target) + from, (int64_t *)REAL(source), n * sizeof(int64_t)); } } else { memcpy(REAL(target) + from, REAL(source), n * sizeof(double)); } } break; case STRSXP: case VECSXP: case EXPRSXP: { SEXP *ptgt = SEXPPTR(target) + from; const SEXP *ptcol = SEXPPTR_RO(source); for(int i = 0; i != n; ++i) ptgt[i] = ptcol[i]; break; } case RAWSXP: memcpy(RAW(target) + from, RAW(source), n * sizeof(Rbyte)); break; case CPLXSXP: memcpy(COMPLEX(target) + from, COMPLEX(source), n * sizeof(Rcomplex)); break; default: error("Internal error: Unsupported column type '%s'", type2char(TYPEOF(target))); } } if(coerce == 0) return; UNPROTECT(1); } static SEXP *saveds=NULL; static R_len_t *savedtl=NULL, nalloc=0, nsaved=0; void savetl_init(void) { if (nsaved || nalloc || saveds || savedtl) { error("Internal error: savetl_init checks failed (%d %d %p %p). please report to data.table issue tracker.", nsaved, nalloc, (void*)saveds, (void*)savedtl); // # nocov } nsaved = 0; nalloc = 100; saveds = (SEXP *)malloc(nalloc * sizeof(SEXP)); savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t)); if (saveds==NULL || savedtl==NULL) { savetl_end(); // # nocov error("Failed to allocate initial %d items in savetl_init", nalloc); // # nocov } } void savetl(SEXP s) { if (nsaved==nalloc) { if (nalloc==INT_MAX) { savetl_end(); // # nocov error("Internal error: reached maximum %d items for savetl. Please report to data.table issue tracker.", nalloc); // # nocov } nalloc = nalloc>(INT_MAX/2) ? INT_MAX : nalloc*2; char *tmp = (char *)realloc(saveds, nalloc*sizeof(SEXP)); if (tmp==NULL) { // C spec states that if realloc() fails the original block is left untouched; it is not freed or moved. We rely on that here. savetl_end(); // # nocov free(saveds) happens inside savetl_end error("Failed to realloc saveds to %d items in savetl", nalloc); // # nocov } saveds = (SEXP *)tmp; tmp = (char *)realloc(savedtl, nalloc*sizeof(R_len_t)); if (tmp==NULL) { savetl_end(); // # nocov error("Failed to realloc savedtl to %d items in savetl", nalloc); // # nocov } savedtl = (R_len_t *)tmp; } saveds[nsaved] = s; savedtl[nsaved] = TRULEN(s); nsaved++; } void savetl_end(void) { // Can get called if nothing has been saved yet (nsaved==0), or even if _init() hasn't been called yet (pointers NULL). Such // as to clear up before error. Also, it might be that nothing needed to be saved anyway. for (int i=0; i0 checked above eachMax[i] = 0; SEXP li = VECTOR_ELT(l, i); if (isNull(li)) continue; if (TYPEOF(li) != VECSXP) error("Item %d of input is not a data.frame, data.table or list", i+1); const int thisncol = length(li); if (!thisncol) continue; // delete as now more flexible ... if (fill && isNull(getAttrib(li, R_NamesSymbol))) error("When fill=TRUE every item of the input must have column names. Item %d does not.", i+1); if (fill) { if (thisncol>ncol) ncol=thisncol; // this section initializes ncol with max ncol. ncol may be increased when usenames is accounted for further down } else { if (ncol==0) { ncol=thisncol; first=i; } else if (thisncol!=ncol) error("Item %d has %d columns, inconsistent with item %d which has %d columns. To fill missing columns use fill=TRUE.", i+1, thisncol, first+1, ncol); } int nNames = length(getAttrib(li, R_NamesSymbol)); if (nNames>0 && nNames!=thisncol) error("Item %d has %d columns but %d column names. Invalid object.", i+1, thisncol, nNames); if (nNames>0) anyNames=true; upperBoundUniqueNames += nNames; int maxLen=0, whichMax=0; for (int j=0; jmaxLen) { maxLen=tt; whichMax=j; } } for (int j=0; j1 && tt!=maxLen) error("Column %d of item %d is length %d inconsistent with column %d which is length %d. Only length-1 columns are recycled.", j+1, i+1, tt, whichMax+1, maxLen); if (tt==0 && maxLen>0 && numZero++==0) { firstZeroCol = j; firstZeroItem=i; } } eachMax[i] = maxLen; nrow += maxLen; } if (numZero) { // #1871 SEXP names = getAttrib(VECTOR_ELT(l, firstZeroItem), R_NamesSymbol); const char *ch = names==R_NilValue ? "" : CHAR(STRING_ELT(names, firstZeroCol)); warning("Column %d ['%s'] of item %d is length 0. This (and %d other%s like it) has been filled with NA (NULL for list columns) to make each item uniform.", firstZeroCol+1, ch, firstZeroItem+1, numZero-1, numZero==2?"":"s"); } if (nrow==0 && ncol==0) return(R_NilValue); if (nrow>INT32_MAX) error("Total rows in the list is %dll which is larger than the maximum number of rows, currently %d", (int)nrow, INT32_MAX); if (usenames==TRUE && !anyNames) error("use.names=TRUE but no item of input list has any names"); int *colMap=NULL; // maps each column in final result to the column of each list item if (usenames==TRUE || usenames==NA_LOGICAL) { // here we proceed as if fill=true for brevity (accounting for dups is tricky) and then catch any missings after this branch // when use.names==NA we also proceed here as if use.names was TRUE to save new code and then check afterwards the map is 1:ncol for every item // first find number of unique column names present; i.e. length(unique(unlist(lapply(l,names)))) SEXP *uniq = (SEXP *)malloc(upperBoundUniqueNames * sizeof(SEXP)); // upperBoundUniqueNames was initialized with 1 to ensure this is defined (otherwise 0 when no item has names) if (!uniq) error("Failed to allocate upper bound of %dll unique column names [sum(lapply(l,ncol))]", (int)upperBoundUniqueNames); savetl_init(); int nuniq=0; for (int i=0; i0) savetl(s); uniq[nuniq++] = s; SET_TRULEN(s,-nuniq); } } if (nuniq>0) { SEXP *tt = realloc(uniq, nuniq*sizeof(SEXP)); // shrink to only what we need to release the spare if (!tt) free(uniq); // shrink never fails; just keep codacy happy uniq = tt; } // now count the dups (if any) and how they're distributed across the items int *counts = (int *)calloc(nuniq, sizeof(int)); // counts of names for each colnames int *maxdup = (int *)calloc(nuniq, sizeof(int)); // the most number of dups for any name within one colname vector if (!counts || !maxdup) { // # nocov start for (int i=0; i maxdup[u]) maxdup[u] = counts[u]; } } int ttncol = 0; for (int u=0; uncol) ncol=ttncol; free(maxdup); maxdup=NULL; // not needed again // ncol is now the final number of columns accounting for unique and dups across all colnames // allocate a matrix: nrows==length(list) each entry contains which column to fetch for that final column int *colMapRaw = (int *)malloc(ll*ncol * sizeof(int)); // the result of this scope used later int *uniqMap = (int *)malloc(ncol * sizeof(int)); // maps the ith unique string to the first time it occurs in the final result int *dupLink = (int *)malloc(ncol * sizeof(int)); // if a colname has occurred before (a dup) links from the 1st to the 2nd time in the final result, 2nd to 3rd, etc if (!colMapRaw || !uniqMap || !dupLink) { // # nocov start for (int i=0; i0) { w=dupLink[w]; --wi; } // hop through the dups if (wi && dupLink[w]==-1) { // first time we've seen this number of dups of this name w = dupLink[w] = lastDup--; uniqMap[w] = nextCol++; } } colMapRaw[i*ncol + uniqMap[w]] = j; } } } for (int i=0; i= for #546 -- TYPEORDER=0 for both LGLSXP and EXPRSXP (but also NULL) if (TYPEORDER(thisType)>=TYPEORDER(maxType) && !isNull(thisCol)) maxType=thisType; if (isFactor(thisCol)) { if (isNull(getAttrib(thisCol,R_LevelsSymbol))) error("Column %d of item %d has type 'factor' but has no levels; i.e. malformed.", w+1, i+1); factor = true; if (isOrdered(thisCol)) { orderedFactor = true; int thisLen = length(getAttrib(thisCol, R_LevelsSymbol)); if (thisLen>longestLen) { longestLen=thisLen; longestLevels=getAttrib(thisCol, R_LevelsSymbol); /*for warnings later ...*/longestW=w; longestI=i; } } } else if (!isString(thisCol)) anyNotStringOrFactor=true; // even for length 0 columns for consistency; test 2113.3 if (INHERITS(thisCol, char_integer64)) { if (firsti>=0 && !length(getAttrib(firstCol, R_ClassSymbol))) { firsti=i; firstw=w; firstCol=thisCol; } // so the integer64 attribute gets copied to target below int64=true; } if (firsti==-1) { firsti=i; firstw=w; firstCol=thisCol; } else { if (!factor && !int64) { if (!R_compute_identical(PROTECT(getAttrib(thisCol, R_ClassSymbol)), PROTECT(getAttrib(firstCol, R_ClassSymbol)), 0)) { error("Class attribute on column %d of item %d does not match with column %d of item %d.", w+1, i+1, firstw+1, firsti+1); } UNPROTECT(2); } } } if (!foundName) { static char buff[12]; snprintf(buff, sizeof(buff), "V%d", j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && maxType!=REALSXP) error("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning("Column %d contains a factor but not all items for the column are character or factor", idcol+j+1); // some coercing from (likely) integer/numeric to character will be needed. But this coerce can feasibly fail with out-of-memory, so we have to do it up-front // before the savetl_init() because we have no hook to clean up tl if coerceVector fails. if (coercedForFactor==NULL) { coercedForFactor=PROTECT(allocVector(VECSXP, ll)); nprotect++; } for (int i=0; i z regular factor because it contains an ambiguity: is a a a regular factor because this case isn't yet implemented. a0) savetl(s); levelsRaw[k] = s; SET_TRULEN(s,-k-1); } for (int i=0; i=last) { // if tl>=0 then also tl>=last because last<=0 if (tl>=0) { snprintf(warnStr, sizeof(warnStr), // not direct warning as we're inside tl region "Column %d of item %d is an ordered factor but level %d ['%s'] is missing from the ordered levels from column %d of item %d. " \ "Each set of ordered factor levels should be an ordered subset of the first longest. A regular factor will be created for this column.", w+1, i+1, k+1, CHAR(s), longestW+1, longestI+1); } else { snprintf(warnStr, sizeof(warnStr), "Column %d of item %d is an ordered factor with '%s'<'%s' in its levels. But '%s'<'%s' in the ordered levels from column %d of item %d. " \ "A regular factor will be created for this column due to this ambiguity.", w+1, i+1, CHAR(levelsD[k-1]), CHAR(s), CHAR(s), CHAR(levelsD[k-1]), longestW+1, longestI+1); // k>=1 (so k-1 is ok) because when k==0 last==0 and this branch wouldn't happen } orderedFactor=false; i=ll; // break outer i loop break; // break inner k loop // we leave the tl set for the longest levels; the regular factor will be created with the longest ordered levels first in case that useful for user } last = tl; // negative ordinal; last should monotonically grow more negative if the levels are an ordered subset of the longest } } } } for (int i=0; i0) savetl(s); if (allocLevel==nLevel) { // including initial time when allocLevel==nLevel==0 SEXP *tt = NULL; if (allocLevel(int64_t)INT_MAX) ? INT_MAX : (int)new; tt = (SEXP *)realloc(levelsRaw, allocLevel*sizeof(SEXP)); // first time levelsRaw==NULL and realloc==malloc in that case } if (tt==NULL) { // # nocov start // C spec states that if realloc() fails (above) the original block (levelsRaw) is left untouched: it is not freed or moved. We ... for (int k=0; k works for factors, Date and POSIXct, but not for POSIXlt (handeled in R) // TODO: SIMD / multithreading? -> I checked SIMD doesn't work, and multithreading hardly give any performance gains. // The largest cost anyways is lapply(), not gsplit() !! SEXP gsplit(SEXP x, SEXP gobj, SEXP toint) { if(TYPEOF(gobj) != VECSXP || !inherits(gobj, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP g = VECTOR_ELT(gobj, 1), gs = VECTOR_ELT(gobj, 2), ord = VECTOR_ELT(gobj, 5), order = VECTOR_ELT(gobj, 6); const int ng = length(gs), *pgs = INTEGER_RO(gs), tx = TYPEOF(x), l = length(g); if(ng != INTEGER(VECTOR_ELT(gobj, 0))[0]) error("'GRP' object needs to have valid vector of group-sizes"); SEXP res = PROTECT(allocVector(VECSXP, ng)); // Output as integer or not if(asLogical(toint)) { for(int i = 0; i != ng; ++i) SET_VECTOR_ELT(res, i, allocVector(INTSXP, pgs[i])); } else { // Allocate split vectors and copy attributes and object bits SEXP x1 = PROTECT(allocVector(tx, 1)); copyMostAttrib(x, x1); SEXP ax = ATTTR(x1); if(length(ax) == 1 && TAG(ax) == sym_label) ax = R_NilValue; int ox = OOBJ(x); // FAZIT: Need to use SET_VECTOR_ELT!! pres[i] = allocVector() doesn't work!! if(TYPEOF(ax) != NILSXP && ox != 0) { for(int i = 0; i != ng; ++i) { // , s4o = IS_S4_OBJECT(x) SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); SET_ATTTR(resi, ax); SET_OOBJ(resi, ox); // if(s4o) SET_S4_OBJECT(resi); } } else if(TYPEOF(ax) != NILSXP) { for(int i = 0; i != ng; ++i) { SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); // SET_ATTRIB(pres[i] = allocVector(tx, pgs[i]), ax); SET_ATTTR(resi, ax); } } else if(ox != 0) { // Is this even possible? Object bits but no attributes? for(int i = 0; i != ng; ++i) { // , s4o = IS_S4_OBJECT(x) SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); SET_OOBJ(resi, ox); // if(s4o) SET_S4_OBJECT(resi); } } else { for(int i = 0; i != ng; ++i) SET_VECTOR_ELT(res, i, allocVector(tx, pgs[i])); } UNPROTECT(1); } const SEXP *restrict pres = SEXPPTR_RO(res); // If grouping is sorted if(LOGICAL(ord)[1] == 1) { // This only works if data is already ordered in order of the groups int count = 0; if(asLogical(toint)) { for(int j = 0; j != ng; ++j) { int *pgj = INTEGER(pres[j]), gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = ++count; } } else { if(length(x) != l) error("length(x) must match length(g)"); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER_RO(x); for(int j = 0; j != ng; ++j) { int *pgj = INT_DATAPTR(pres[j]), gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case REALSXP: { const double *px = REAL_RO(x); for(int j = 0, gsj; j != ng; ++j) { double *pgj = DBL_DATAPTR(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case CPLXSXP: { const Rcomplex *px = COMPLEX_RO(x); for(int j = 0, gsj; j != ng; ++j) { Rcomplex *pgj = COMPLEX(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); for(int j = 0, gsj; j != ng; ++j) { SEXP *pgj = SEXP_DATAPTR(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); for(int j = 0, gsj; j != ng; ++j) { SEXP *pgj = SEXP_DATAPTR(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } case RAWSXP: { const Rbyte *px = RAW_RO(x); for(int j = 0, gsj; j != ng; ++j) { Rbyte *pgj = RAW(pres[j]); gsj = pgs[j]; for(int i = 0; i != gsj; ++i) pgj[i] = px[count++]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } } else if(length(order) == l) { // Grouping not sorted but we have the ordering.. const SEXP starts = getAttrib(order, sym_starts); if(length(starts) != ng) goto unsno; const int *po = INTEGER_RO(order), *ps = INTEGER_RO(starts); if(asLogical(toint)) { for(int i = 0; i != ng; ++i) { int *pri = INT_DATAPTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; j++) pri[k++] = po[j]; } } else { if(length(x) != l) error("length(x) must match length(g)"); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER_RO(x); for(int i = 0; i != ng; ++i) { int *pri = INT_DATAPTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case REALSXP: { const double *px = REAL_RO(x); for(int i = 0; i != ng; ++i) { double *pri = DBL_DATAPTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case CPLXSXP: { const Rcomplex *px = COMPLEX_RO(x); for(int i = 0; i != ng; ++i) { Rcomplex *pri = COMPLEX(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); for(int i = 0; i != ng; ++i) { SEXP *pri = SEXP_DATAPTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); for(int i = 0; i != ng; ++i) { SEXP *pri = SEXP_DATAPTR(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } case RAWSXP: { const Rbyte *px = RAW_RO(x); for(int i = 0; i != ng; ++i) { Rbyte *pri = RAW(pres[i]); for(int j = ps[i]-1, end = ps[i]+pgs[i]-1, k = 0; j < end; ++j) pri[k++] = px[po[j]-1]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } } else { // Unsorted, without ordering unsno:; int *count = (int*)R_Calloc(ng, int); // memset(count, 0, sizeof(int)*(ng+1)); // Needed here ?? // int *count = (int *) R_alloc(ng+1, sizeof(int)); const int *pg = INTEGER_RO(g); // --pres; if(asLogical(toint)) { for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; INT_DATAPTR(pres[gi])[count[gi]++] = i+1; } } else { if(length(x) != l) error("length(x) must match length(g)"); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER_RO(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; INT_DATAPTR(pres[gi])[count[gi]++] = px[i]; } break; } case REALSXP: { const double *px = REAL_RO(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; DBL_DATAPTR(pres[gi])[count[gi]++] = px[i]; } break; } case CPLXSXP: { const Rcomplex *px = COMPLEX_RO(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; COMPLEX(pres[gi])[count[gi]++] = px[i]; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; SEXP_DATAPTR(pres[gi])[count[gi]++] = px[i]; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; SEXP_DATAPTR(pres[gi])[count[gi]++] = px[i]; } break; } case RAWSXP: { const Rbyte *px = RAW_RO(x); for(int i = 0, gi; i != l; ++i) { gi = pg[i]-1; RAW(pres[gi])[count[gi]++] = px[i]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } R_Free(count); } UNPROTECT(1); return res; } // This is for fmutate, to reorder the result of grouped data if the result has the same length as x SEXP greorder(SEXP x, SEXP gobj) { if(TYPEOF(gobj) != VECSXP || !inherits(gobj, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP g = VECTOR_ELT(gobj, 1), gs = VECTOR_ELT(gobj, 2), order = VECTOR_ELT(gobj, 6); const int ng = length(gs), l = length(g), tx = TYPEOF(x), *pgs = INTEGER_RO(gs), *pg = INTEGER_RO(g); if(l != length(x)) error("length(x) must match length(g)"); if(ng != INTEGER(VECTOR_ELT(gobj, 0))[0]) error("'GRP' object needs to have valid vector of group-sizes"); if(LOGICAL(VECTOR_ELT(gobj, 5))[1] == 1) return x; SEXP res = PROTECT(allocVector(tx, l)); // Note: This is only faster for a large number of groups... if(length(order) == l) { // Grouping not sorted but we have the ordering.. const SEXP starts = getAttrib(order, sym_starts); if(length(starts) != ng) goto unsno2; const int *po = INTEGER_RO(order), *ps = INTEGER_RO(starts); switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER_RO(x); int *pr = INTEGER(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case REALSXP: { const double *px = REAL_RO(x); double *pr = REAL(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case CPLXSXP: { const Rcomplex *px = COMPLEX_RO(x); Rcomplex *pr = COMPLEX(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pr = SEXPPTR(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case VECSXP: { SEXP *pr = SEXPPTR(res); const SEXP *px = SEXPPTR_RO(x); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } case RAWSXP: { const Rbyte *px = RAW_RO(x); Rbyte *pr = RAW(res); for(int i = 0, k = 0; i != ng; ++i) { for(int j = ps[i]-1, end = ps[i]+pgs[i]-1; j < end; ++j) pr[po[j]-1] = px[k++]; } break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } else { // Unsorted, without ordering unsno2:; int *count = (int *) R_alloc(ng+1, sizeof(int)); int *cgs = (int *) R_alloc(ng+2, sizeof(int)); cgs[1] = 0; for(int i = 0; i != ng; ++i) { count[i+1] = 0; cgs[i+2] = cgs[i+1] + pgs[i]; } switch(tx) { case INTSXP: case LGLSXP: { const int *px = INTEGER_RO(x); int *pr = INTEGER(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case REALSXP: { const double *px = REAL_RO(x); double *pr = REAL(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case CPLXSXP: { const Rcomplex *px = COMPLEX_RO(x); Rcomplex *pr = COMPLEX(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pr = SEXPPTR(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case VECSXP: { SEXP *pr = SEXPPTR(res); const SEXP *px = SEXPPTR_RO(x); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } case RAWSXP: { const Rbyte *px = RAW_RO(x); Rbyte *pr = RAW(res); for(int i = 0; i != l; ++i) pr[i] = px[cgs[pg[i]]+count[pg[i]]++]; break; } default: error("Unsupported type '%s' passed to gsplit", type2char(tx)); } } SHALLOW_DUPLICATE_ATTRIB(res, x); UNPROTECT(1); return res; } collapse/src/kit_dup.c0000644000176200001440000011103715121643306014452 0ustar liggesusers/* This code is adapted from the kit package: https://github.com/2005m/kit and licensed under a GPL-3.0 license. */ #include "collapse_c.h" #include "kit.h" // **************************************** // This function groups a single vector // **************************************** SEXP dupVecIndex(SEXP x) { const int n = length(x); int K = 0, tx = TYPEOF(x), x_min = INT_MAX, x_max = INT_MIN, anyNA = 0; size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { bigint:; const size_t n2 = 2U * (size_t) n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } else if(tx == INTSXP) { if(isFactor(x) || inherits(x, "qG")) { tx = 1000; M = isFactor(x) ? (size_t)nlevels(x) + 2 : (size_t)asInteger(getAttrib(x, sym_n_groups)) + 2; anyNA = !inherits(x, "na.included"); } else { int *restrict p = INTEGER(x); // Old: if(n < 10 || NOGE(p[0], n) || NOGE(p[n/2], n) || NOGE(p[n-1], n)) { // This loop is highly optimized... for(int i = 0, x_tmp; i != n; ++i) { x_tmp = p[i]; if(x_tmp > x_max) x_max = x_tmp; if(x_tmp < x_min) { if(x_tmp == NA_INTEGER) anyNA = 1; else x_min = x_tmp; } } double x_diff = (double)x_max - x_min; if(x_diff >= INT_MAX || x_diff <= INT_MIN) goto bigint; // To avoid overflows (UBSAN errors) x_max -= x_min; if(++x_max > 3 * n) goto bigint; M = (size_t)(x_max + 2); if(x_min == 0 || x_min == 1) tx = 1000; else x_max = NA_INTEGER; } else M = (size_t)n; } } else if (tx == LGLSXP) { M = 3; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M SEXP ans_i = PROTECT(allocVector(INTSXP, n)); int *restrict pans_i = INTEGER(ans_i), g = 0; size_t id = 0; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known { const int *restrict px = INTEGER(x); if(tx == 1000 && !anyNA) { for(int i = 0, j; i != n; ++i) { j = px[i]; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } else { for(int i = 0, j, k = (int)M-1; i != n; ++i) { j = (px[i] == NA_INTEGER) ? k : px[i]; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } } break; case INTSXP: { const int *restrict px = INTEGER(x); // Old: if(x_max == INT_MIN && M == (size_t)n) { // Faster version based on division hash... unsigned int iid = 0, nu = (unsigned)n; for (int i = 0; i != n; ++i) { iid = (unsigned)px[i]; if(iid >= nu) iid %= nu; while(h[iid]) { if(px[h[iid]-1] == px[i]) { pans_i[i] = pans_i[h[iid]-1]; goto ibl; } if(++iid >= nu) iid = 0; } h[iid] = i + 1; // need + 1 because for zero the while loop gives false.. pans_i[i] = ++g; ibl:; } } else if(x_max == NA_INTEGER) { // fastver version based on range x_min -= 1; if(anyNA) { for (int i = 0, j; i != n; ++i) { j = (px[i] == NA_INTEGER) ? 0 : px[i]-x_min; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } else { for (int i = 0, j; i != n; ++i) { j = px[i]-x_min; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } } else { for (int i = 0; i != n; ++i) { id = HASH(px[i], K); while(h[id]) { if(px[h[id]-1] == px[i]) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto ibbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; ibbl:; } } } break; case REALSXP: { const double *restrict px = REAL(x); // size_t offset; union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = px[i] + 0.0; // R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN : px[i]); id = HASH(tpv.u[0] + tpv.u[1], K); // // Double hashing idea: not faster! // if(h[id]) { // if(REQUAL(px[h[id]-1], px[i])) { // pans_i[i] = pans_i[h[id]-1]; // h[id]; // continue; // } // offset = HASH(tpv.u[0] * tpv.u[1], K) / M + 1; // // if(offset == 0) offset = 1; // id += offset; // id %= M; // // if(id >= M) id = 0; // while(h[id]) { // if(REQUAL(px[h[id]-1], px[i])) { // pans_i[i] = pans_i[h[id]-1]; // h[id]; // goto rbl; // } // id += offset; // id %= M; // // if(id >= M) id = 0; // } // } while(h[id]) { if(REQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto rbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; rbl:; } } break; case CPLXSXP: { const Rcomplex *restrict px = COMPLEX(x); unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp = px[i]; if(C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r + 0.0; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i + 0.0; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto cbl; } if(++id >= M) id = 0; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; cbl:; } } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(x); for (int i = 0; i != n; ++i) { id = HASH(((uintptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(px[h[id]-1] == px[i]) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto sbl; } if(++id >= M) id = 0; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; sbl:; } } break; } R_Free(h); setAttrib(ans_i, sym_n_groups, ScalarInteger(g)); UNPROTECT(1); return ans_i; } SEXP dupVecIndexKeepNA(SEXP x) { const int n = length(x); int K = 0, tx = TYPEOF(x); size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { bigint:; const size_t n2 = 2U * (size_t) n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } else if(tx == INTSXP) { if(isFactor(x) || inherits(x, "qG")) { tx = 1000; M = isFactor(x) ? (size_t)nlevels(x) + 2 : (size_t)asInteger(getAttrib(x, sym_n_groups)) + 2; } else { int *p = INTEGER(x); if(n > 10 && (NOGE(p[0], n) || NOGE(p[n/2], n) || NOGE(p[n-1], n))) goto bigint; M = (size_t)n; } } else if (tx == LGLSXP) { M = 3; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M SEXP ans_i = PROTECT(allocVector(INTSXP, n)); int *restrict pans_i = INTEGER(ans_i), g = 0; size_t id = 0; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known { const int *restrict px = INTEGER(x); for (int i = 0, j; i != n; ++i) { if(px[i] == NA_INTEGER) { pans_i[i] = NA_INTEGER; continue; } j = px[i]; if(h[j]) pans_i[i] = h[j]; else pans_i[i] = h[j] = ++g; } } break; case INTSXP: { const int *restrict px = INTEGER(x); if(M == (size_t)n) { // Faster version based on division hash... unsigned int iid = 0, nu = (unsigned)n; for (int i = 0; i != n; ++i) { if(px[i] == NA_INTEGER) { pans_i[i] = NA_INTEGER; continue; } iid = (unsigned)px[i]; if(iid >= nu) iid %= nu; // iid = (px[i] < n) ? px[i] : px[i] % n; // HASH(px[i], K); // get the hash value of x[i] while(h[iid]) { // Check if this hash value has been seen before if(px[h[iid]-1] == px[i]) { // Get the element of x that produced his value. if x[i] is the same, assign it the same index. pans_i[i] = pans_i[h[iid]-1]; // h[id]; goto ibl; } // else, we move forward to the next slot, until we find an empty one... We need to keep checking against the values, // because if we found the same value before, we would also have put it in another slot after the initial one with the same hash value. if(++iid >= nu) iid = 0; // ++iid; iid %= nu; // # nocov } // We put the index into the empty slot. h[iid] = i + 1; // need + 1 because for zero the while loop gives false.. pans_i[i] = ++g; // h[id]; ibl:; } } else { for (int i = 0; i != n; ++i) { if(px[i] == NA_INTEGER) { pans_i[i] = NA_INTEGER; continue; } id = HASH(px[i], K); while(h[id]) { if(px[h[id]-1] == px[i]) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto ibbl; } if(++id >= M) id = 0; // ++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; ibbl:; } } } break; case REALSXP: { const double *restrict px = REAL(x); union uno tpv; for (int i = 0; i != n; ++i) { if(ISNAN(px[i])) { pans_i[i] = NA_INTEGER; continue; } tpv.d = px[i] + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto rbl; } if(++id >= M) id = 0; // ++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; rbl:; } } break; case CPLXSXP: { const Rcomplex *restrict px = COMPLEX(x); unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp = px[i]; if(C_IsNA(tmp) || C_IsNaN(tmp)) { pans_i[i] = NA_INTEGER; continue; } tpv.d = tmp.r + 0.0; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i + 0.0; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(px[h[id]-1], px[i])) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto cbl; } if(++id >= M) id = 0; //++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; cbl:; } } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(x); for (int i = 0; i != n; ++i) { if(px[i] == NA_STRING) { pans_i[i] = NA_INTEGER; continue; } id = HASH(((uintptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(px[h[id]-1] == px[i]) { pans_i[i] = pans_i[h[id]-1]; // h[id]; goto sbl; } if(++id >= M) id = 0; //++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; sbl:; } } break; } R_Free(h); setAttrib(ans_i, sym_n_groups, ScalarInteger(g)); UNPROTECT(1); return ans_i; } // **************************************** // Group Two Vectors in One Pass // **************************************** SEXP dupVecIndexTwoVectors(SEXP x, SEXP y) { int n = length(x), tx = TYPEOF(x), ty = TYPEOF(y), K, K2, anyNA = 0; if(length(y) != n) error("length of first two columns in the data must be the same"); if(tx == CPLXSXP || ty == CPLXSXP) return R_NilValue; size_t M; SEXP ans = PROTECT(allocVector(INTSXP, n)); int *restrict pans = INTEGER(ans); // Check if both are discrete int both_discr = (tx == LGLSXP || (tx == INTSXP && (isFactor(x) || inherits(x, "qG")))) && (ty == LGLSXP || (ty == INTSXP && (isFactor(y) || inherits(y, "qG")))); if(both_discr) { K = tx == LGLSXP ? 1 : isFactor(x) ? nlevels(x) : asInteger(getAttrib(x, sym_n_groups)); K2 = ty == LGLSXP ? 1 : isFactor(y) ? nlevels(y) : asInteger(getAttrib(y, sym_n_groups)); if(tx == LGLSXP || !inherits(x, "na.included")) { K += 1; anyNA += 1; } if(ty == LGLSXP || !inherits(y, "na.included")) { K2 += 1; anyNA += 1; } if((size_t)K * K2 <= (size_t)n * 3) { M = anyNA ? (size_t)(K+1) * (K2+1) + 1 : (size_t)K * K2 + 1; // + 1 because of zero indexing } else both_discr = 0; } if(!both_discr) { const size_t n2 = 2U * (size_t)n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } int *restrict h = (int*)R_Calloc(M, int), g = 0, hid = 0; // Table to save the hash values, table has size M size_t id = 0; if(both_discr) { const int *restrict px = INTEGER_RO(x), *restrict py = INTEGER_RO(y); if(anyNA == 0) { for (int i = 0; i != n; ++i) { id = px[i] + (py[i]-1) * K; // This assumes logical vectors (with 0 = FALSE) cannot have the "na.included" attribute if(h[id]) pans[i] = h[id]; else pans[i] = h[id] = ++g; } } else { K += 1; for (int i = 0, xi, yi; i != n; ++i) { xi = (px[i] == NA_INTEGER) ? K : px[i]+1; yi = (py[i] == NA_INTEGER) ? K2 : py[i]; id = xi + yi * K; // Problem: if logical xi = 0, yi = 1 and xi = K, yi = 0 give the same.. if(h[id]) pans[i] = h[id]; else pans[i] = h[id] = ++g; } } } else { if(tx == LGLSXP) tx = INTSXP; if(ty == LGLSXP) ty = INTSXP; // 6 cases: 3 same type and 3 different types if(tx == ty) { // same type switch(tx) { case INTSXP: { const int *restrict px = INTEGER_RO(x), *restrict py = INTEGER_RO(y); for (int i = 0; i != n; ++i) { id = HASH(px[i] + (64988430769U * py[i]), K); // Multiplication doesn't work here: too few unique values // Another large prime taken from https://oeis.org/wiki/Higher-order_prime_numbers while(h[id]) { hid = h[id]-1; if(px[hid] == px[i] && py[hid] == py[i]) { pans[i] = pans[hid]; goto iibl; } if(++id >= M) id = 0; } h[id] = i + 1; pans[i] = ++g; iibl:; } } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(x), *restrict py = SEXPPTR_RO(y); for (int i = 0; i != n; ++i) { id = HASH(64988430769U * ((uintptr_t)px[i] & 0xffffffff) + ((uintptr_t)py[i] & 0xffffffff), K); // Best combination it seems while(h[id]) { hid = h[id]-1; if(px[hid] == px[i] && py[hid] == py[i]) { pans[i] = pans[hid]; goto ssbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans[i] = ++g; ssbl:; } } break; case REALSXP: { const double *restrict px = REAL_RO(x), *restrict py = REAL_RO(y); union uno tpx, tpy; // fill hash table with indices of 'table' for (int i = 0; i != n; ++i) { tpx.d = px[i] + 0.0; tpy.d = py[i] + 0.0; id = HASH((64988430769U * (tpx.u[0] + tpx.u[1])) + tpy.u[0] + tpy.u[1], K); // Best combination it seems while(h[id]) { hid = h[id]-1; if(REQUAL(px[hid], px[i]) && REQUAL(py[hid], py[i])) { pans[i] = pans[hid]; goto rrbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans[i] = ++g; rrbl:; } } break; default: error("Type %s is not supported.", type2char(tx)); } } else { // different types // First case: integer and real if((tx == INTSXP && ty == REALSXP) || (tx == REALSXP && ty == INTSXP)) { const int *restrict pi = INTEGER_RO(tx == INTSXP ? x : y); const double *restrict pr = REAL_RO(tx == REALSXP ? x : y); union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = pr[i] + 0.0; id = HASH((64988430769U * pi[i]) + tpv.u[0] + tpv.u[1], K); // Best combination it seems while(h[id]) { hid = h[id]-1; if(pi[hid] == pi[i] && REQUAL(pr[hid], pr[i])) { pans[i] = pans[hid]; goto irbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans[i] = ++g; irbl:; } // Second case: real and string } else if ((tx == REALSXP && ty == STRSXP) || (tx == STRSXP && ty == REALSXP)) { const SEXP *restrict ps = SEXPPTR_RO(tx == STRSXP ? x : y); const double *restrict pr = REAL_RO(tx == REALSXP ? x : y); union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = pr[i] + 0.0; id = HASH((tpv.u[0] + tpv.u[1]) * ((uintptr_t)ps[i] & 0xffffffff), K); // Best combination it seems while(h[id]) { hid = h[id]-1; if(REQUAL(pr[hid], pr[i]) && ps[hid] == ps[i]) { // Seems comparing reals is faster.. pans[i] = pans[hid]; goto srbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans[i] = ++g; srbl:; } // Third case: integer and string } else if((tx == INTSXP && ty == STRSXP) || (tx == STRSXP && ty == INTSXP)) { const int *restrict pi = INTEGER_RO(tx == INTSXP ? x : y); const SEXP *restrict ps = SEXPPTR_RO(tx == STRSXP ? x : y); for (int i = 0; i != n; ++i) { id = HASH(pi[i] * ((uintptr_t)ps[i] & 0xffffffff), K); while(h[id]) { hid = h[id]-1; if(pi[hid] == pi[i] && ps[hid] == ps[i]) { pans[i] = pans[hid]; goto isbl; } if(++id >= M) id = 0; } h[id] = i + 1; pans[i] = ++g; isbl:; } } else error("Unsupported types: %s and %s", type2char(tx), type2char(ty)); } } R_Free(h); setAttrib(ans, sym_n_groups, ScalarInteger(g)); UNPROTECT(1); return ans; } // TODO: Only one M calculation ? // Think: If in the second grouping variable all entries are the same, you loop through the whole table for each value.. // TODO: Faster possibility indexing by grouping vector?? -> would need multiple hash tables through which complicates things, // but could still end up being faster... // Idea: instead of hasing index again, just distribute it fairly through multiplying with (M/ng) // ************************************************** // This function adds a second vector to the grouping // ************************************************** int dupVecSecond(int *restrict pidx, int *restrict pans_i, SEXP x, const int n, const int ng) { if(length(x) != n) error("Unequal length columns"); int K = 0, tx = TYPEOF(x), anyNA = 1; size_t M; if (tx == INTSXP || tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { if(tx == INTSXP && (isFactor(x) || inherits(x, "qG"))) { K = isFactor(x) ? nlevels(x)+1 : asInteger(getAttrib(x, sym_n_groups))+1; anyNA = !inherits(x, "na.included"); if((size_t)K * ng <= (size_t)n * 3) { tx = 1000; M = (size_t)K * ng + 1; } else K = 0; } if(K == 0) { const size_t n2 = 2U * (size_t)n; // + ng M = 256; K = 8; while (M < n2) { M *= 2; K++; } // M += ng; // Here we add the number of previous groups... } } else if (tx == LGLSXP) { M = (size_t)ng * 3 + 1; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *restrict h = (int*)R_Calloc(M, int), g = 0, hid = 0; // Table to save the hash values, table has size M size_t id = 0; switch (tx) { case LGLSXP: { const int *restrict px = LOGICAL(x); for (int i = 0; i != n; ++i) { id = (px[i] == NA_LOGICAL) ? pidx[i] : pidx[i] + (px[i] + 1) * ng; if(h[id]) pans_i[i] = h[id]; else pans_i[i] = h[id] = ++g; } } break; case 1000: // This is for factors if feasible... { const int *restrict px = INTEGER(x); if(anyNA) { for (int i = 0; i != n; ++i) { id = (px[i] == NA_INTEGER) ? pidx[i] : pidx[i] + px[i] * ng; if(h[id]) pans_i[i] = h[id]; else pans_i[i] = h[id] = ++g; } } else { for (int i = 0; i != n; ++i) { id = pidx[i] + px[i] * ng; if(h[id]) pans_i[i] = h[id]; else pans_i[i] = h[id] = ++g; } } } break; // TODO: Think further about this! Perhaps you can also do this totally differently with a second vector capturing the unique values of idx! // See again what Morgan does to his matrix of single groupings... // Note: In general, combining bitwise i.e. px[i] ^ pidx[i] seems slightly faster than multiplying (px[i] * pidx[i])... case INTSXP: { const int *restrict px = INTEGER(x); const unsigned int mult = (M-1) / ng; // -1 because C is zero indexed for (int i = 0; i != n; ++i) { // Check this... DATA group_by lon, lat main_cat, main_tag, main_tag_value: main_tag is the issue.. id = (pidx[i]*mult) ^ HASH(px[i], K); // HASH((unsigned)px[i] * (unsigned)pidx[i], K) + pidx[i]; // Need multiplication here instead of bitwise, see your benchmark with 100 mio. obs where second group is just sample.int(1e4, 1e8, T), there bitwise is very slow!! while(h[id]) { // However multiplication causes signed integer overflow... UBSAN error. hid = h[id]-1; if(pidx[hid] == pidx[i] && px[hid] == px[i]) { // Usually pidx has more distinct values... pans_i[i] = pans_i[hid]; goto ibl; } if(++id >= M) id = 0; // ++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; ibl:; } } break; case REALSXP: { const double *restrict px = REAL(x); const unsigned int mult = (M-1) / ng; // -1 because C is zero indexed union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = px[i] + 0.0; // R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN :px[i]); id = (pidx[i]*mult) ^ HASH(tpv.u[0] + tpv.u[1], K); // HASH((tpv.u[0] + tpv.u[1]) ^ pidx[i], K) + pidx[i]; // Note: This is much faster than just adding pidx[i] to the hash value... while(h[id]) { // Problem: This value might be seen before, but not in combination with that pidx value... hid = h[id]-1; // The issue here is that REQUAL(px[hid], px[i]) could be true but pidx[hid] == pidx[i] fails, although the same combination of px and pidx could be seen earlier before... if(pidx[hid] == pidx[i] && REQUAL(px[hid], px[i])) { pans_i[i] = pans_i[hid]; goto rbl; } if(++id >= M) id = 0; //++id; id %= M; } h[id] = i + 1; pans_i[i] = ++g; // h[id]; rbl:; } } break; case CPLXSXP: { const Rcomplex *restrict px = COMPLEX(x); const unsigned int mult = (M-1) / ng; // -1 because C is zero indexed unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp = px[i]; if(C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r + 0.0; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i + 0.0; u ^= tpv.u[0] ^ tpv.u[1]; id = (pidx[i]*mult) ^ HASH(u, K); while(h[id]) { hid = h[id]-1; if(pidx[hid] == pidx[i] && CEQUAL(px[hid], px[i])) { pans_i[i] = pans_i[hid]; goto cbl; } if(++id >= M) id = 0; //++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; cbl:; } } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(x); const unsigned int mult = (M-1) / ng; // -1 because C is zero indexed for (int i = 0; i != n; ++i) { id = (pidx[i]*mult) ^ HASH(((uintptr_t) px[i] & 0xffffffff), K); // HASH(((uintptr_t) px[i] & 0xffffffff) ^ pidx[i], K) + pidx[i]; while(h[id]) { hid = h[id]-1; if(pidx[hid] == pidx[i] && px[hid] == px[i]) { pans_i[i] = pans_i[hid]; goto sbl; } if(++id >= M) id = 0; //++id; id %= M; // # nocov } h[id] = i + 1; pans_i[i] = ++g; // h[id]; sbl:; } } break; } R_Free(h); return g; } // ************************************************************************ // This function brings everything together for vectors or lists of vectors // ************************************************************************ SEXP groupVec(SEXP X, SEXP starts, SEXP sizes) { int l = length(X), islist = TYPEOF(X) == VECSXP, start = asLogical(starts), size = asLogical(sizes), nprotect = 0; // Better not exceptions to fundamental algorithms, when a couple of user-level functions return qG objects... // if(islist == 0 && isObject(X) && inherits(X, "qG") && inherits(X, "na.included")) return X; // return "qG" objects const SEXP *px = islist ? SEXPPTR_RO(X) : &X; SEXP idx = islist == 0 ? dupVecIndex(X) : l > 1 ? dupVecIndexTwoVectors(px[0], px[1]) : dupVecIndex(px[0]); if(isNull(idx)) { // One of the vectors is complex valued idx = dupVecIndex(px[0]); l += 1; px -= 1; } else if(!(islist && l > 2) && start == 0 && size == 0) return idx; // l == 1 && PROTECT(idx); ++nprotect; SEXP res; int ng = asInteger(getAttrib(idx, sym_n_groups)), n = length(idx); if(islist && l > 2) { SEXP ans = PROTECT(allocVector(INTSXP, n)); ++nprotect; int i = 2, *pidx = INTEGER(idx), *pans = INTEGER(ans); for( ; i < l; ++i) { if(ng == n) break; if(i % 2) { ng = dupVecSecond(pans, pidx, px[i], n, ng); } else { ng = dupVecSecond(pidx, pans, px[i], n, ng); } } res = i % 2 ? ans : idx; setAttrib(res, sym_n_groups, ScalarInteger(ng)); } else res = idx; // Cumpoting group starts and sizes attributes if(start || size) { PROTECT(res); ++nprotect; int *pres = INTEGER(res); if(start && size) { // Protect res ?? SEXP gs, st; setAttrib(res, sym_starts, st = allocVector(INTSXP, ng)); setAttrib(res, sym_group_sizes, gs = allocVector(INTSXP, ng)); if(ng > 0) { int *pgs = INTEGER(gs), *pst = INTEGER(st); memset(pgs, 0, sizeof(int) * ng); --pgs; memset(pst, 0, sizeof(int) * ng); --pst; for(int i = 0; i != n; ++i) { ++pgs[pres[i]]; if(pst[pres[i]] == 0) pst[pres[i]] = i + 1; } } } else if(start) { SEXP st; setAttrib(res, sym_starts, st = allocVector(INTSXP, ng)); if(ng > 0) { int *pst = INTEGER(st), k = 0; memset(pst, 0, sizeof(int) * ng); --pst; for(int i = 0; i != n; ++i) { if(pst[pres[i]] == 0) { pst[pres[i]] = i + 1; if(++k == ng) break; } } } } else { SEXP gs; setAttrib(res, sym_group_sizes, gs = allocVector(INTSXP, ng)); if(ng > 0) { int *pgs = INTEGER(gs); memset(pgs, 0, sizeof(int) * ng); --pgs; for(int i = 0; i != n; ++i) ++pgs[pres[i]]; } } } UNPROTECT(nprotect); return res; } // This version is only for atomic vectors (factor generation) SEXP groupAtVec(SEXP X, SEXP starts, SEXP naincl) { int start = asLogical(starts), nain = asLogical(naincl); // Note: These functions will give errors for unsupported types... SEXP idx = nain ? dupVecIndex(X) : dupVecIndexKeepNA(X); if(start == 0) return idx; PROTECT(idx); SEXP st; int ng = asInteger(getAttrib(idx, sym_n_groups)), n = length(idx), *pidx = INTEGER(idx); setAttrib(idx, sym_starts, st = allocVector(INTSXP, ng)); if(ng > 0) { int *pst = INTEGER(st), k = 0; memset(pst, 0, sizeof(int) * ng); --pst; if(nain) { for(int i = 0; i != n; ++i) { if(pst[pidx[i]] == 0) { pst[pidx[i]] = i + 1; if(++k == ng) break; } } } else { for(int i = 0; i != n; ++i) { if(pidx[i] != NA_INTEGER && pst[pidx[i]] == 0) { pst[pidx[i]] = i + 1; if(++k == ng) break; } } } } UNPROTECT(1); return idx; } // Same as dupVecIndex, but saves group starts and returns unique values SEXP funiqueC(SEXP x) { const int n = length(x); if(n <= 1) return x; int K = 0, tx = TYPEOF(x); size_t M; // if(n >= INT_MAX) error("Length of 'x' is too large. (Long vector not supported yet)"); // 1073741824 if (tx == STRSXP || tx == REALSXP || tx == CPLXSXP) { bigint:; const size_t n2 = 2U * (size_t) n; M = 256; K = 8; while (M < n2) { M *= 2; K++; } } else if(tx == INTSXP) { if(isFactor(x) || inherits(x, "qG")) { tx = 1000; M = isFactor(x) ? (size_t)nlevels(x) + 2 : (size_t)asInteger(getAttrib(x, sym_n_groups)) + 2; } else { int *p = INTEGER(x); if(n > 10 && (NOGE(p[0], n) || NOGE(p[n/2], n) || NOGE(p[n-1], n))) goto bigint; M = (size_t)n; } } else if (tx == LGLSXP) { M = 3; } else error("Type %s is not supported.", type2char(tx)); // # nocov int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M int *restrict st = (int*)R_alloc((tx == LGLSXP || tx == 1000) ? (int)M : n, sizeof(int)); int g = 0, nprotect = 0; size_t id = 0; SEXP res = R_NilValue; switch (tx) { case LGLSXP: case 1000: // This is for factors or logical vectors where the size of the table is known { const int *restrict px = INTEGER(x); if(tx == 1000 && inherits(x, "na.included")) { for(int i = 0, k = (int)M-1, ng = k-1; i != n; ++i) { if(h[px[i]]) continue; h[px[i]] = 1; st[g] = i; if(++g == ng) break; } } else { int ng = tx == LGLSXP ? 3 : (int)M-1; for(int i = 0, j, k = (int)M-1; i != n; ++i) { j = (px[i] == NA_INTEGER) ? k : px[i]; if(h[j]) continue; h[j] = 1; st[g] = i; if(++g == ng) break; } } R_Free(h); if(g == n) return x; PROTECT(res = allocVector(tx == LGLSXP ? LGLSXP : INTSXP, g)); ++nprotect; int *restrict pres = INTEGER(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; case INTSXP: { const int *restrict px = INTEGER(x); if(M == (size_t)n) { // Faster version based on division hash... unsigned int iid = 0, nu = (unsigned)n; for (int i = 0; i != n; ++i) { iid = (unsigned)px[i]; if(iid >= nu) iid %= nu; while(h[iid]) { if(px[h[iid]-1] == px[i]) goto ibl; if(++iid >= nu) iid = 0; } h[iid] = i + 1; st[g++] = i; ibl:; } } else { for (int i = 0; i != n; ++i) { id = HASH(px[i], K); while(h[id]) { if(px[h[id]-1] == px[i]) goto ibbl; if(++id >= M) id = 0; } h[id] = i + 1; st[g++] = i; ibbl:; } } R_Free(h); if(g == n) { UNPROTECT(nprotect); return x; } PROTECT(res = allocVector(INTSXP, g)); ++nprotect; int *restrict pres = INTEGER(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; case REALSXP: { const double *restrict px = REAL(x); union uno tpv; for (int i = 0; i != n; ++i) { tpv.d = px[i] + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(px[h[id]-1], px[i])) goto rbl; if(++id >= M) id = 0; } h[id] = i + 1; st[g++] = i; rbl:; } R_Free(h); if(g == n) { UNPROTECT(nprotect); return x; } PROTECT(res = allocVector(REALSXP, g)); ++nprotect; double *restrict pres = REAL(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; case CPLXSXP: { const Rcomplex *restrict px = COMPLEX(x); unsigned int u; union uno tpv; Rcomplex tmp; for (int i = 0; i != n; ++i) { tmp = px[i]; if(C_IsNA(tmp)) { tmp.r = tmp.i = NA_REAL; } else if (C_IsNaN(tmp)) { tmp.r = tmp.i = R_NaN; } tpv.d = tmp.r + 0.0; u = tpv.u[0] ^ tpv.u[1]; tpv.d = tmp.i + 0.0; u ^= tpv.u[0] ^ tpv.u[1]; id = HASH(u, K); while(h[id]) { if(CEQUAL(px[h[id]-1], px[i])) goto cbl; if(++id >= M) id = 0; // # nocov } h[id] = i + 1; st[g++] = i; cbl:; } R_Free(h); if(g == n) { UNPROTECT(nprotect); return x; } PROTECT(res = allocVector(CPLXSXP, g)); ++nprotect; Rcomplex *restrict pres = COMPLEX(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; case STRSXP: { const SEXP *restrict px = SEXPPTR_RO(x); for (int i = 0; i != n; ++i) { id = HASH(((uintptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(px[h[id]-1] == px[i]) goto sbl; if(++id >= M) id = 0; // # nocov } h[id] = i + 1; st[g++] = i; sbl:; } R_Free(h); if(g == n) { UNPROTECT(nprotect); return x; } PROTECT(res = allocVector(STRSXP, g)); ++nprotect; SEXP *restrict pres = SEXPPTR(res); for(int i = 0; i != g; ++i) pres[i] = px[st[i]]; } break; } copyMostAttrib(x, res); UNPROTECT(nprotect); return res; } // TODO: fduplicated and any_duplicated: smart default methods... // From the kit package... /* * Data.Frame */ // SEXP dupDataFrameR(SEXP x) { // move to matrix if possible // // const SEXP *restrict px = SEXPPTR_RO(x); // const R_xlen_t len_x = xlength(x); // const R_xlen_t len_i = xlength(px[0]); // SEXP ans = R_NilValue; // SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x)); // for (R_xlen_t i = 0; i < len_x; ++i) { // memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i]))), (unsigned)len_i*sizeof(int)); // } // UNPROTECT((int)len_x); // const size_t n2 = 2U * (size_t) len_i; // size_t M = 256; // int K = 8; // while (M < n2) { // M *= 2; // K++; // } // R_xlen_t count = 0; // int *restrict h = (int*) R_Calloc(M, int); // const int *restrict v = INTEGER(mlv); // int *restrict pans = (int*) R_Calloc(len_i, int); // size_t id = 0; // // for (R_xlen_t i = 0; i < len_i; ++i) { // R_xlen_t key = 0; // for (R_xlen_t j = 0; j < len_x; ++j) { // key ^= HASH(v[i+j*len_i],K)*97*(j+1); // } // id = HASH(key, K); // while (h[id]) { // for (R_xlen_t j = 0; j < len_x; ++j) { // if (v[h[id]-1+j*len_i] != v[i+j*len_i]) { // goto label1; // } // } // goto label2; // label1:; // id++; id %= M; // } // h[id] = (int) i + 1; // pans[i]++; // count++; // label2:; // } // R_Free(h); // UNPROTECT(1); // SEXP indx = PROTECT(allocVector(INTSXP, count)); // int ct = 0; // int *restrict py = INTEGER(indx); // for (int i = 0; ct < count; ++i) { // if (pans[i]) { // py[ct++] = i; // } // } // SEXP output = PROTECT(subSetRowDataFrame(x, indx)); // R_Free(pans); // UNPROTECT(2); // return output; // } /* * Data.Frame */ // SEXP dupLenDataFrameR(SEXP x) { // const SEXP *restrict px = SEXPPTR_RO(x); // const R_xlen_t len_x = xlength(x); // // bool allT = true; // // const SEXPTYPE t0 = UTYPEOF(px[0]); // // for (int i = 1; i < len_x; ++i) { // // if (UTYPEOF(px[i]) != t0) { // // allT = false; // // break; // // } // // } // // if (allT) { // // SEXP output = PROTECT(dupLenMatrixR(PROTECT(dfToMatrix(x)))); // // UNPROTECT(2); // // return output; // // } // const R_xlen_t len_i = xlength(px[0]); // SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x)); // for (R_xlen_t i = 0; i < len_x; ++i) { // memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i], ScalarLogical(false)))), (unsigned)len_i*sizeof(int)); // } // UNPROTECT((int)len_x); // const size_t n2 = 2U * (size_t) len_i; // size_t M = 256; // int K = 8; // while (M < n2) { // M *= 2; // K++; // } // R_xlen_t count = 0; // int *restrict h = (int*) R_Calloc(M, int); // const int *restrict v = INTEGER(mlv); // size_t id = 0; // for (R_xlen_t i = 0; i < len_i; ++i) { // R_xlen_t key = 0; // for (R_xlen_t j = 0; j < len_x; ++j) { // key ^= HASH(v[i+j*len_i],K)*97*(j+1); // } // id = HASH(key, K); // while (h[id]) { // for (R_xlen_t j = 0; j < len_x; ++j) { // if (v[h[id]-1+j*len_i] != v[i+j*len_i]) { // goto label1; // } // } // goto label2; // label1:; // id++; id %= M; // } // h[id] = (int) i + 1; // count++; // label2:; // } // R_Free(h); // UNPROTECT(1); // return ScalarInteger(count); // } collapse/src/kit.h0000644000176200001440000000206614777170131013617 0ustar liggesusers/* This code is adapted from the kit package: https://github.com/2005m/kit and licensed under a GPL-3.0 license. */ #include #include #include // needed for uintptr_t on linux #define NOGE(x, l) ((x < 0 && x != NA_INTEGER) || (x >= l)) #define HASH(key, K) (3141592653U * (unsigned int)(key) >> (32 - (K))) #define HASHK(key, K) (3141592653U * (unsigned int)(key) >> (K)) #define N_ISNAN(x, y) (!ISNAN(x) && !ISNAN(y)) #define B_IsNA(x, y) (R_IsNA(x) && R_IsNA(y)) #define B_IsNaN(x, y) (R_IsNaN(x) && R_IsNaN(y)) #define B_ISNAN(x, y) (ISNAN(x) && ISNAN(y)) #define C_IsNA(x) (R_IsNA(x.r) || R_IsNA(x.i)) #define C_IsNaN(x) (R_IsNaN(x.r) || R_IsNaN(x.i)) #define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y)) #define REQUAL(x, y) (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y))) #define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i))))) union uno { double d; unsigned int u[2]; }; collapse/src/fndistinct.c0000644000176200001440000005144515121643531015166 0ustar liggesusers#include "collapse_c.h" // Needs to be first because includes OpenMP #include "kit.h" // C-implementations for different data types ---------------------------------- // TODO: outsource and memset hash table? // Problem: does not work in parallel, each thread needs own hash table... int ndistinct_int(const int *restrict px, const int *restrict po, const int l, const int sorted, const int narm) { if(l == 1) return !(narm && px[sorted ? 0 : po[0]-1] == NA_INTEGER); const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, ndist = 0, anyNA = 0; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M if(sorted) { for (int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) { anyNA = 1; continue; } id = HASH(px[i], K); while(h[id]) { if(px[h[id]-1] == px[i]) goto ibls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; ++ndist; ibls:; } } else { for (int i = 0, xi; i != l; ++i) { xi = px[po[i]-1]; if(xi == NA_INTEGER) { anyNA = 1; continue; } id = HASH(xi, K); while(h[id]) { if(px[po[h[id]-1]-1] == xi) goto ibl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; ++ndist; ibl:; } } R_Free(h); if(narm == 0) ndist += anyNA; return ndist; } int ndistinct_fct(const int *restrict px, const int *restrict po, const int l, const int nlev, const int sorted, const int narm) { if(l == 1) return !(narm && px[sorted ? 0 : po[0]-1] == NA_INTEGER); int *restrict h = (int*)R_Calloc(nlev+1, int); int ndist = 0, anyNA = narm; // Ensures breaking works if narm = TRUE or FALSE if(sorted) { for (int i = 0, xi; i != l; ++i) { xi = px[i]; if(xi == NA_INTEGER) { anyNA = 1; continue; } if(h[xi]) continue; ++ndist; if(anyNA && ndist == nlev) break; h[xi] = 1; } } else { for (int i = 0, xi; i != l; ++i) { xi = px[po[i]-1]; if(xi == NA_INTEGER) { anyNA = 1; continue; } if(h[xi]) continue; ++ndist; if(anyNA && ndist == nlev) break; h[xi] = 1; } } if(narm == 0) ndist += anyNA; R_Free(h); return ndist; } int ndistinct_logi(const int *restrict px, const int *restrict po, const int l, const int sorted, const int narm) { if(l == 1) return !(narm && px[sorted ? 0 : po[0]-1] == NA_LOGICAL); int seenT = 0, seenF = 0, anyNA = narm; // Ensures breaking works if narm = TRUE or FALSE if(sorted) { for (int i = 0, xi; i != l; ++i) { xi = px[i]; if(xi == NA_LOGICAL) { anyNA = 1; } else if(xi) { if(seenT) continue; seenT = 1; if(anyNA && seenF) break; } else { if(seenF) continue; seenF = 1; if(anyNA && seenT) break; } } } else { for (int i = 0, xi; i != l; ++i) { xi = px[po[i]-1]; if(xi == NA_LOGICAL) { anyNA = 1; } else if(xi) { if(seenT) continue; seenT = 1; if(anyNA && seenF) break; } else { if(seenF) continue; seenF = 1; if(anyNA && seenT) break; } } } if(narm == 0) seenT += anyNA; return seenT + seenF; } int ndistinct_double(const double *restrict px, const int *restrict po, const int l, const int sorted, const int narm) { if(l == 1) return !(narm && ISNAN(px[sorted ? 0 : po[0]-1])); const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, ndist = 0, anyNA = 0; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M union uno tpv; double xi; if(sorted) { for (int i = 0; i != l; ++i) { if(ISNAN(px[i])) { anyNA = 1; continue; } tpv.d = px[i] + 0.0; // to avoid -0.0 and 0.0 being different id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(px[h[id]-1], px[i])) goto rbls; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; ++ndist; rbls:; } } else { for (int i = 0; i != l; ++i) { xi = px[po[i]-1]; if(ISNAN(xi)) { anyNA = 1; continue; } tpv.d = xi + 0.0; id = HASH(tpv.u[0] + tpv.u[1], K); while(h[id]) { if(REQUAL(px[po[h[id]-1]-1], xi)) goto rbl; if(++id >= M) id %= M; // ++id; id %= M; } h[id] = i + 1; ++ndist; rbl:; } } R_Free(h); if(narm == 0) ndist += anyNA; return ndist; } int ndistinct_string(const SEXP *restrict px, const int *restrict po, const int l, const int sorted, const int narm) { if(l == 1) return !(narm && px[sorted ? 0 : po[0]-1] == NA_STRING); const size_t l2 = 2U * (size_t) l; size_t M = 256, id = 0; int K = 8, ndist = 0, anyNA = 0; while(M < l2) { M *= 2; K++; } int *restrict h = (int*)R_Calloc(M, int); // Table to save the hash values, table has size M SEXP xi; if(sorted) { for (int i = 0; i != l; ++i) { if(px[i] == NA_STRING) { anyNA = 1; continue; } id = HASH(((uintptr_t) px[i] & 0xffffffff), K); while(h[id]) { if(px[h[id]-1] == px[i]) goto sbls; if(++id >= M) id %= M; //++id; id %= M; } h[id] = i + 1; ++ndist; sbls:; } } else { for (int i = 0; i != l; ++i) { xi = px[po[i]-1]; if(xi == NA_STRING) { anyNA = 1; continue; } id = HASH(((uintptr_t) xi & 0xffffffff), K); while(h[id]) { if(px[po[h[id]-1]-1] == xi) goto sbl; if(++id >= M) id %= M; //++id; id %= M; } h[id] = i + 1; ++ndist; sbl:; } } R_Free(h); if(narm == 0) ndist += anyNA; return ndist; } // Implementations for R vectors ----------------------------------------------- int ndistinct_impl_int(SEXP x, int narm) { int l = length(x); if(l < 1) return 0; switch(TYPEOF(x)) { case REALSXP: return ndistinct_double(REAL(x), &l, l, 1, narm); case INTSXP: // TODO: optimize for plain integer?? return isFactor(x) ? ndistinct_fct(INTEGER(x), &l, l, nlevels(x), 1, narm) : ndistinct_int(INTEGER(x), &l, l, 1, narm); case LGLSXP: return ndistinct_logi(LOGICAL(x), &l, l, 1, narm); case STRSXP: return ndistinct_string(SEXPPTR_RO(x), &l, l, 1, narm); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } SEXP ndistinct_impl(SEXP x, int narm) { return ScalarInteger(ndistinct_impl_int(x, narm)); } // TODO: Optimize grouped distinct value count for logical vectors?? SEXP ndistinct_g_impl(SEXP x, const int ng, const int *restrict pgs, const int *restrict po, const int *restrict pst, const int sorted, const int narm, int nthreads) { SEXP res = PROTECT(allocVector(INTSXP, ng)); int l = length(x), *restrict pres = INTEGER(res); if(nthreads > ng) nthreads = ng; if(sorted) { // Sorted: could compute cumulative group size (= starts) on the fly... but doesn't work multithreaded... po = &l; // int gs = 0, gsgr = 0; // need pst because gs += gsgr; doesn't work multithreaded... switch(TYPEOF(x)) { case REALSXP: { const double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_double(px + pst[gr]-1, po, pgs[gr], 1, narm); break; } case INTSXP: { const int *px = INTEGER(x); if(isFactor(x) && nlevels(x) < l / ng * 3) { int M = nlevels(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_fct(px + pst[gr]-1, po, pgs[gr], M, 1, narm); } else { #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_int(px + pst[gr]-1, po, pgs[gr], 1, narm); } break; } case LGLSXP: { const int *px = LOGICAL(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_logi(px + pst[gr]-1, po, pgs[gr], 1, narm); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_string(px + pst[gr]-1, po, pgs[gr], 1, narm); break; } default: error("Not Supported SEXP Type!"); } } else { // Not sorted. Perhaps reordering x is faster?? switch(TYPEOF(x)) { case REALSXP: { const double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_double(px, po + pst[gr]-1, pgs[gr], 0, narm); break; } case INTSXP: { const int *px = INTEGER(x); if(isFactor(x) && nlevels(x) < l / ng * 3) { int M = nlevels(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_fct(px, po + pst[gr]-1, pgs[gr], M, 0, narm); } else { #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_int(px, po + pst[gr]-1, pgs[gr], 0, narm); } break; } case LGLSXP: { const int *px = LOGICAL(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_logi(px, po + pst[gr]-1, pgs[gr], 0, narm); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = pgs[gr] == 0 ? 0 : ndistinct_string(px, po + pst[gr]-1, pgs[gr], 0, narm); break; } default: error("Not Supported SEXP Type!"); } } UNPROTECT(1); return res; } // Functions for Export -------------------------------------------------------- SEXP fndistinctC(SEXP x, SEXP g, SEXP Rnarm, SEXP Rnthreads) { if(isNull(g)) return ndistinct_impl(x, asLogical(Rnarm)); if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP *restrict pg = SEXPPTR_RO(g), o = pg[6]; SEXP res; int sorted = LOGICAL(pg[5])[1] == 1, ng = INTEGER(pg[0])[0], *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst, l = length(x), nthreads = asInteger(Rnthreads); if(l != length(pg[1])) error("length(g) must match length(x)"); if(l < 1) return ScalarInteger(0); if(isNull(o)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; pst = cgs + 1; if(sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(l, sizeof(int)); --po; for(int i = 0; i != l; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; ++po; R_Free(count); } } else { po = INTEGER(o); pst = INTEGER(getAttrib(o, sym_starts)); } if(nthreads > max_threads) nthreads = max_threads; PROTECT(res = ndistinct_g_impl(x, ng, pgs, po, pst, sorted, asLogical(Rnarm), nthreads)); if(!isObject(x)) copyMostAttrib(x, res); else setAttrib(res, sym_label, getAttrib(x, sym_label)); UNPROTECT(1); return res; } SEXP fndistinctlC(SEXP x, SEXP g, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads) { int l = length(x), narm = asLogical(Rnarm), nthreads = asInteger(Rnthreads); if(l < 1) return ScalarInteger(0); if(nthreads > max_threads) nthreads = max_threads; if(isNull(g) && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(INTSXP, l)); const SEXP *restrict px = SEXPPTR_RO(x); int *restrict pout = INTEGER(out); if(nthreads <= 1) { for(int j = 0; j != l; ++j) pout[j] = ndistinct_impl_int(px[j], narm); } else { if(nthreads > l) nthreads = l; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) pout[j] = ndistinct_impl_int(px[j], narm); } setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } else { SEXP out = PROTECT(allocVector(VECSXP, l)), *restrict pout = SEXPPTR(out); const SEXP *restrict px = SEXPPTR_RO(x); if(isNull(g)) { if(nthreads <= 1) { for(int j = 0; j != l; ++j) pout[j] = ndistinct_impl(px[j], narm); } else { if(nthreads > l) nthreads = l; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) pout[j] = ndistinct_impl(px[j], narm); } // Not thread safe and thus taken out for(int j = 0; j != l; ++j) { SEXP xj = px[j]; if(!isObject(xj)) copyMostAttrib(xj, pout[j]); else setAttrib(pout[j], sym_label, getAttrib(xj, sym_label)); } DFcopyAttr(out, x, /*ng=*/0); } else { if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP *restrict pg = SEXPPTR_RO(g), o = pg[6]; int sorted = LOGICAL(pg[5])[1] == 1, ng = INTEGER(pg[0])[0], *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst, gl = length(pg[1]); if(isNull(o)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; pst = cgs + 1; if(sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(gl, sizeof(int)); --po; for(int i = 0; i != gl; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; ++po; R_Free(count); } } else { po = INTEGER(o); pst = INTEGER(getAttrib(o, sym_starts)); } for(int j = 0; j != l; ++j) { SEXP xj = px[j]; if(length(xj) != gl) error("length(g) must match nrow(x)"); SET_VECTOR_ELT(out, j, ndistinct_g_impl(xj, ng, pgs, po, pst, sorted, narm, nthreads)); if(!isObject(xj)) copyMostAttrib(xj, pout[j]); else setAttrib(pout[j], sym_label, getAttrib(xj, sym_label)); } DFcopyAttr(out, x, ng); } UNPROTECT(1); return out; } } SEXP fndistinctmC(SEXP x, SEXP g, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], narm = asLogical(Rnarm), nthreads = asInteger(Rnthreads); if(l < 1) return ScalarInteger(0); // Prevents seqfault for numeric(0) #101 if(nthreads > max_threads) nthreads = max_threads; if(isNull(g)) { SEXP res = PROTECT(allocVector(INTSXP, col)); int *restrict pres = INTEGER(res); if(nthreads > col) nthreads = col; switch(tx) { case REALSXP: { double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = ndistinct_double(px + j*l, &l, l, 1, narm); break; } case INTSXP: { // Factor matrix not well defined object... int *px = INTEGER(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = ndistinct_int(px + j*l, &l, l, 1, narm); break; } case LGLSXP: { int *px = INTEGER(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = ndistinct_logi(px + j*l, &l, l, 1, narm); break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pres[j] = ndistinct_string(px + j*l, &l, l, 1, narm); break; } default: error("Not Supported SEXP Type!"); } matCopyAttr(res, x, Rdrop, /*ng=*/0); UNPROTECT(1); return res; } else { // With groups if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); const SEXP *restrict pg = SEXPPTR_RO(g), o = pg[6]; int sorted = LOGICAL(pg[5])[1] == 1, ng = INTEGER(pg[0])[0], *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst, gl = length(pg[1]); if(l != gl) error("length(g) must match nrow(x)"); SEXP res = PROTECT(allocVector(INTSXP, col * ng)); int *restrict pres = INTEGER(res); if(nthreads > col) nthreads = col; // column-level sufficient? or do sub-column level?? if(isNull(o)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; pst = cgs + 1; if(sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(l, sizeof(int)); --po; for(int i = 0; i != l; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; ++po; R_Free(count); } } else { po = INTEGER(o); pst = INTEGER(getAttrib(o, sym_starts)); } if(sorted) { // Sorted switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; double *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_double(pxj + pst[gr]-1, po, pgs[gr], 1, narm); } break; } case INTSXP: { // Factor matrix not well defined object... int *px = INTEGER(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int *pxj = px + j * l, jng = j * ng; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_int(pxj + pst[gr]-1, po, pgs[gr], 1, narm); } break; } case LGLSXP: { int *px = LOGICAL(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int *pxj = px + j * l, jng = j * ng; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_logi(pxj + pst[gr]-1, po, pgs[gr], 1, narm); } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; const SEXP *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_string(pxj + pst[gr]-1, po, pgs[gr], 1, narm); } break; } default: error("Not Supported SEXP Type!"); } } else { // Not sorted. Perhaps reordering x is faster?? // Todo: perhaps going first by groups, then by columns is better? saves zero group size checks... switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; double *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_double(pxj, po + pst[gr]-1, pgs[gr], 0, narm); } break; } case INTSXP: { // Factor matrix not well defined object... int *px = INTEGER(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng, *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_int(pxj, po + pst[gr]-1, pgs[gr], 0, narm); } break; } case LGLSXP: { int *px = LOGICAL(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng, *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_logi(pxj, po + pst[gr]-1, pgs[gr], 0, narm); } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { int jng = j * ng; const SEXP *pxj = px + j * l; for(int gr = 0; gr < ng; ++gr) pres[jng + gr] = pgs[gr] == 0 ? 0 : ndistinct_string(pxj, po + pst[gr]-1, pgs[gr], 0, narm); } break; } default: error("Not Supported SEXP Type!"); } } matCopyAttr(res, x, Rdrop, ng); UNPROTECT(1); return res; } } collapse/src/fnth_fmedian_fquantile.c0000644000176200001440000022134515122343427017513 0ustar liggesusers#include "collapse_c.h" /* Inspired by Numerical Recipes in C and data.table's quickselect.c, R's quantile() function, Rfast2::Quantile(), and these references for sample quantiles: https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample https://doi.org/10.2307/2684934 https://aakinshin.net/posts/weighted-quantiles/ https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html The weighted quantile algorithm follows Matthew Kay */ // Adopted from data.table's quickselect.c static inline void iswap(int *a, int *b) {int tmp=*a; *a=*b; *b=tmp;} static inline void dswap(double *a, double *b) {double tmp=*a; *a=*b; *b=tmp;} // For weighted quantile methods static double eps = 10 * DBL_EPSILON; // Barebones quickselect algorithm from Numerical Recipes in C #undef QUICKSELECT #define QUICKSELECT(SWAP) \ unsigned int ir = n-1, l = 0, lp; \ for(;;) { \ lp = l+1; \ if (ir <= lp) { /* Active partition contains 1 or 2 elements. */ \ if (ir == lp && x[ir] < x[l]) { /* Case of 2 elements. */ \ SWAP(x+l, x+ir); \ } \ break; \ } else { \ unsigned int mid=(l+ir) >> 1; /* Choose median of left, center, and right elements as partitioning element a. */ \ SWAP(x+mid, x+lp); /* Also rearrange so that arr[l] ≤ arr[l+1] ≤ arr[ir] */ \ if (x[l] > x[ir]) { \ SWAP(x+l, x+ir); \ } \ if (x[lp] > x[ir]) { \ SWAP(x+lp, x+ir); \ } \ if (x[l] > x[lp]) { \ SWAP(x+l, x+lp); \ } \ unsigned int i=lp, j=ir; /* Initialize pointers for partitioning. */ \ a=x[lp]; /* Partitioning element. */ \ for (;;) { /* Beginning of innermost loop. */ \ do i++; while (x[i] < a); /* Scan up to find element > a. */ \ do j--; while (x[j] > a); /* Scan down to find element < a. */ \ if (j < i) break; /* Pointers crossed. Partitioning complete. */ \ SWAP(x+i, x+j); \ } /* End of innermost loop. */ \ x[lp]=x[j]; /* Insert partitioning element. */ \ x[j]=a; \ if (j >= elem) ir=j-1; /* if index of partitioning element j is above median index */ \ if (j <= elem) l=i; /* if index of partitioning element j is below median index */ \ } \ } \ a = x[elem]; // Quantile method switcher // https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample // Need to subtract 1 from h because of 0-indexing in C #undef RETQSWITCH #define RETQSWITCH(n) \ switch(ret) { \ case 7: \ case 1: \ case 2: /* quantile type 7, average, or Lower element*/ \ h = (n - 1)*Q; \ break; \ case 3: /* upper element*/ \ h = n*Q; \ break; \ case 4: /* quantile type 4*/ \ h = n*Q - 1.0; \ break; \ case 5: /* quantile type 5*/ \ h = n*Q - 0.5; \ break; \ case 6: /* quantile type 6*/ \ h = (n + 1)*Q - 1.0; \ break; \ case 8: /* quantile type 8 (best according to H&F 1986)*/ \ h = ((double)n + 1.0/3.0)*Q - 2.0/3.0; \ break; \ case 9: /* quantile type 9*/ \ h = ((double)n + 1.0/4.0)*Q - 5.0/8.0; \ break; \ } // Weighted quantiles: https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html // Basically we add m to h #undef RETWQADDM #define RETWQADDM \ switch(ret) { \ case 7: /* quantile type 7 */ \ h += 1 - Q; \ break; \ case 4: /* quantile type 4*/ \ break; \ case 5: /* quantile type 5*/ \ h += 0.5; \ break; \ case 6: /* quantile type 6*/ \ h += Q; \ break; \ case 8: /* quantile type 8 (best according to H&F 1986)*/ \ h += 1.0/3.0 * (Q + 1); \ break; \ case 9: /* quantile type 9*/ \ h += 1.0/4.0 * Q + 3.0/8.0; \ break; \ } // -------------------------------------------------------------------------- // First a faster quantile function // -------------------------------------------------------------------------- // Need versions that supply the element and h double dquickselect_elem(double *x, const int n, const unsigned int elem, double h) { // if(n == 0) return NA_REAL; // done in fquantile... double a, b; QUICKSELECT(dswap); if(elem == n-1 || h <= 0.0) return a; b = x[elem+1]; for(int i = elem+2; i < n; ++i) if(x[i] < b) b = x[i]; return a + h*(b-a); } double iquickselect_elem(int *x, const int n, const unsigned int elem, double h) { // if(n == 0) return NA_REAL; // done in fquantile... int a, b; QUICKSELECT(iswap); if(elem == n-1 || h <= 0.0) return (double)a; b = x[elem+1]; for(int i = elem+2; i < n; ++i) if(x[i] < b) b = x[i]; return (double)a + h*(double)(b-a); } #undef FQUANTILE_CORE #define FQUANTILE_CORE(QFUN) \ double h, Q; \ int ih = 0; /* To avoid -Wmaybe-uninitialized */ \ for(int i = 0, offset = 0; i < np; ++i) { \ Q = probs[i]; \ if(Q > 0.0 && Q < 1.0) { \ RETQSWITCH(l); \ ih = h; \ pres[i] = QFUN(x_cc + offset, l - offset, ih - offset, h - ih); \ offset = ih; \ } \ } /* This is much more efficient: fetching min and max ex-post */ \ if(probs[0] == 0.0) { \ x_min = x_cc[0]; \ for(unsigned int i = 0, end = l*probs[1]; i < end; ++i) \ if(x_cc[i] < x_min) x_min = x_cc[i]; \ pres[0] = (double)x_min; \ } \ if(probs[np-1] == 1.0) { \ x_max = x_cc[ih]; \ for(unsigned int i = ih+1; i < l; ++i) \ if(x_cc[i] > x_max) x_max = x_cc[i]; \ pres[np-1] = (double)x_max; \ } // If we have an ordering vector supplied as input to the function // Expects px to be decremented by 1 #undef FQUANTILE_ORDVEC #define FQUANTILE_ORDVEC \ double a, b, h, Q; \ for(int i = 0, ih; i < np; ++i) { \ Q = probs[i]; \ if(Q > 0.0 && Q < 1.0) { \ RETQSWITCH(l); \ ih = h; a = px[po[ih]]; \ if(ih == n-1 || h <= 0.0) pres[i] = a; \ else { \ b = px[po[ih+1]]; \ pres[i] = a + (h - ih) * (b - a); \ } \ } else pres[i] = px[po[(int)((l-1)*Q)]]; \ } // Following https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html // Expects px and pw to be decremented by 1 #undef WQUANTILE_CORE #define WQUANTILE_CORE \ double Q, h; \ int j; \ for(int i = 0, k = 0; i < np; ++i) { \ Q = probs[i]; \ if(Q > 0.0 && Q < 1.0) { \ h = sumw * Q + eps; \ while(wsum <= h) wsum += pw[po[k++]]; \ if(k > 0) wsum -= pw[po[--k]]; \ h = k-1 + (h - wsum) / pw[po[k]]; \ RETWQADDM; \ j = (int)h; h -= j; \ pres[i] = (j >= l-1 || h < eps) ? px[po[j]] : \ (1 - h) * px[po[j]] + h * px[po[j+1]]; \ } else { /* Since probs must be passed in order*/ \ if(Q == 0.0) { \ while(pw[po[k]] == 0.0) ++k; \ } else { \ k = l-1; \ while(pw[po[k]] == 0.0) --k; \ } \ pres[i] = px[po[k]]; \ } \ } SEXP fquantileC(SEXP x, SEXP Rprobs, SEXP w, SEXP o, SEXP Rnarm, SEXP Rtype, SEXP Rnames, SEXP checko) { if(TYPEOF(Rprobs) != REALSXP) error("probs needs to be a numeric vector"); int tx = TYPEOF(x), n = length(x), np = length(Rprobs), narm = asLogical(Rnarm), ret = asInteger(Rtype), nprotect = 1; if(tx != REALSXP && tx != INTSXP && tx != LGLSXP) error("x needs to be numeric"); if(ret < 4 || ret > 9) error("fquantile only supports continuous quantile types 4-9. You requested type: %d", ret); SEXP res = PROTECT(allocVector(REALSXP, np)); copyMostAttrib(x, res); // Consistent with other functions, and works for "units" if(np == 0) { // quantile(x, numeric(0)) UNPROTECT(nprotect); return res; } double *probs = REAL(Rprobs), *pres = REAL(res); unsigned int l = 0; for(int i = 0; i < np; ++i) { if(probs[i] < 0.0 || probs[i] > 1.0) error("probabilities need to be in range [0, 1]"); if(i > 0 && probs[i] < probs[i-1]) error("probabilities need to be passed in ascending order"); } if(asLogical(Rnames)) { SEXP names = PROTECT(allocVector(STRSXP, np)); ++nprotect; char namei[5], nameid[7]; for(int i = 0, dig; i < np; ++i) { dig = (int)(probs[i]*1000) % 10; if(dig == 0) { snprintf(namei, 5, "%d%%", (int)(probs[i]*100)); SET_STRING_ELT(names, i, mkChar(namei)); } else { snprintf(nameid, 7, "%d.%d%%", (int)(probs[i]*100), dig); SET_STRING_ELT(names, i, mkChar(nameid)); } } namesgets(res, names); } // First the trivial case if(n <= 1) { if(!isNull(w)) { if(length(w) != n) error("length(w) must match length(x)"); if(length(w) > 0) { double wtmp = asReal(w); if(wtmp == 0.0) n = 0; else if(ISNAN(wtmp) && NISNAN(asReal(x))) error("Missing weights in order statistics are currently only supported if x is also missing"); } } wall0:; // If all weights are zero double val = n == 0 ? NA_REAL : tx == REALSXP ? REAL(x)[0] : INTEGER(x)[0] == NA_INTEGER ? NA_REAL : (double)INTEGER(x)[0]; for(int i = 0; i < np; ++i) pres[i] = val; // This case: no quantile estimation, simple range } else if(np <= 2 && isNull(o) && (probs[0] == 0.0 || probs[0] == 1.0) && (np <= 1 || probs[1] == 1.0)) { // TODO: could also check weights here, but this case is presumably very rare anyway.. SEXP rng = PROTECT(frange(x, Rnarm, ScalarLogical(FALSE))); ++nprotect; if(TYPEOF(rng) != REALSXP) { rng = PROTECT(coerceVector(rng, REALSXP)); ++nprotect; } if(probs[0] == 0.0) pres[0] = REAL(rng)[0]; else if(probs[0] == 1.0) pres[0] = REAL(rng)[1]; if(np == 2) pres[1] = REAL(rng)[1]; } else if(isNull(w) && isNull(o)) { // Standard: quickselect if(tx == REALSXP) { // Numeric data double *x_cc = (double *) R_alloc(n, sizeof(double)), *px = REAL(x), x_min, x_max; if(narm) { for(unsigned int i = 0; i != n; ++i) if(NISNAN(px[i])) x_cc[l++] = px[i]; if(l <= 1) { // TODO: More elegant way to solve? Also with integers and weighted estimation ... for(int i = 0; i < np; ++i) pres[i] = l == 0 ? NA_REAL : x_cc[0]; UNPROTECT(nprotect); return res; } } else { l = n; memcpy(x_cc, px, sizeof(double) * n); } FQUANTILE_CORE(dquickselect_elem); } else { // Integers int *x_cc = (int *) R_alloc(n, sizeof(int)), *px = INTEGER(x), x_min, x_max; if(narm) { for(unsigned int i = 0; i != n; ++i) if(px[i] != NA_INTEGER) x_cc[l++] = px[i]; if(l <= 1) { for(int i = 0; i < np; ++i) pres[i] = l == 0 ? NA_REAL : (double)x_cc[0]; UNPROTECT(nprotect); return res; } } else { l = n; memcpy(x_cc, px, sizeof(int) * n); } FQUANTILE_CORE(iquickselect_elem); } } else { // Weighted or Ordered int *po = &n; double *pw = probs, nanw0 = 0.0; if(!isNull(o)) { if(length(o) != n || TYPEOF(o) != INTSXP) error("o must be a valid ordering vector, of the same length as x and type integer"); po = INTEGER(o); if(asLogical(checko)) { // TODO: Better way? for(unsigned int i = 0; i != n; ++i) if(po[i] < 1 || po[i] > n) error("Some elements in o are outside of range [1, length(x)]"); } } else { po = (int *) R_alloc(n, sizeof(int)); // R_Calloc ? num1radixsort(po, TRUE, FALSE, x); } if(!isNull(w)) { if(length(w) != n) error("length(w) must match length(x)"); if(TYPEOF(w) != REALSXP) { if(!(TYPEOF(w) == INTSXP || TYPEOF(w) == LGLSXP)) error("weights need to be double or integer/logical (internally coerced to double)"); SEXP wd = PROTECT(coerceVector(w, REALSXP)); ++nprotect; pw = REAL(wd)-1; } else pw = REAL(w)-1; nanw0 = pw[po[0]]; } l = n; if(narm) { if(tx == REALSXP) { // Numeric data double *px = REAL(x)-1; if(ISNAN(px[po[0]])) error("Found missing value at the beginning of the sample. Please use option na.last = TRUE (the default) when creasting ordering vectors for use with fquantile()."); --po; while(l != 0 && ISNAN(px[po[l]])) --l; ++po; if(l <= 1) { double val = (l == 0 || ISNAN(nanw0)) ? NA_REAL : px[po[0]]; for(int i = 0; i < np; ++i) pres[i] = val; UNPROTECT(nprotect); return res; } } else { int *px = INTEGER(x)-1; if(px[po[0]] == NA_INTEGER) error("Found missing value at the beginning of the sample. Please use option na.last = TRUE (the default) when creasting ordering vectors for use with fquantile()."); --po; while(l != 0 && px[po[l]] == NA_INTEGER) --l; ++po; if(l <= 1) { double val = (l == 0 || ISNAN(nanw0)) ? NA_REAL : (double)px[po[0]]; for(int i = 0; i < np; ++i) pres[i] = val; UNPROTECT(nprotect); return res; } } } if(isNull(w)) { if(tx == REALSXP) { // Numeric data double *px = REAL(x)-1; FQUANTILE_ORDVEC; } else { int *px = INTEGER(x)-1; FQUANTILE_ORDVEC; } } else { double wsum = 0.0, sumw = 0.0; // wsum is running sum, sumw is the total sum #pragma omp simd reduction(+:sumw) for (int i = 0; i < l; ++i) sumw += pw[po[i]]; wsum = 0.0; if(ISNAN(sumw)) error("Missing weights in order statistics are currently only supported if x is also missing"); if(sumw < 0.0) error("Weights must be positive or zero"); if(sumw < eps) { // error("For weighted quantile estimation, must supply at least one non-zero weight for non-NA x"); n = 0; goto wall0; } if(tx == REALSXP) { // Numeric data double *px = REAL(x)-1; WQUANTILE_CORE; } else { int *px = INTEGER(x)-1; WQUANTILE_CORE; } } } UNPROTECT(nprotect); return res; } // -------------------------------------------------------------------------- // Then: C rewrite of fnth(), now also supporting (weighted) quantiles // -------------------------------------------------------------------------- // Without weights, we can apply quickselect at the group-level double dquickselect(double *x, const int n, const int ret, const double Q) { if(n == 0) return NA_REAL; unsigned int elem; double a, b, h = 0.0; /* To avoid -Wmaybe-uninitialized */ RETQSWITCH(n); elem = h; h -= elem; // need to subtract elem QUICKSELECT(dswap); if((ret < 4 && (ret != 1 || n%2 == 1)) || elem == n-1 || h <= 0.0) return a; b = x[elem+1]; for(int i = elem+2; i < n; ++i) if(x[i] < b) b = x[i]; if(ret == 1) return (a+b)/2.0; // || Q == 0.5 return a + h*(b-a); // same as (1-h)*a + h*b } double iquickselect(int *x, const int n, const int ret, const double Q) { if(n == 0) return NA_REAL; unsigned int elem; int a, b; double h = 0.0; /* To avoid -Wmaybe-uninitialized */ RETQSWITCH(n); elem = h; h -= elem; // need to subtract elem QUICKSELECT(iswap); if((ret < 4 && (ret != 1 || n%2 == 1)) || elem == n-1 || h <= 0.0) return (double)a; b = x[elem+1]; for(int i = elem+2; i < n; ++i) if(x[i] < b) b = x[i]; if(ret == 1) return ((double)a+(double)b)/2.0; // || Q == 0.5 return (double)a + h*(double)(b-a); // same as (1-h)*(double)a + h*(double)b } // With weights, either radix sort of the entire vector, and then passing through by groups, // or quicksort at the group-level // Expects pw and po to be consistent double w_compute_h(const double *pw, const int *po, const int l, const int sorted, double Q) { if(l == 0) return NA_REAL; double sumw = 0.0; if(sorted) { #pragma omp simd reduction(+:sumw) for(int i = 0; i < l; ++i) sumw += pw[i]; } else { #pragma omp simd reduction(+:sumw) for(int i = 0; i < l; ++i) sumw += pw[po[i]]; } if(ISNAN(sumw)) error("Missing weights in order statistics are currently only supported if x is also missing"); if(sumw < 0.0) error("Weights must be positive or zero"); return Q * sumw; } // If no groups or sorted groups po is the ordering of x // Expects pointers px and pw to be decremented by one #undef WNTH_CORE #define WNTH_CORE \ double wsum = pw[po[0]], wb; \ int k = 1; \ if(ret < 3) { /* lower (2), or average (1) element*/ \ while(wsum < h) wsum += pw[po[k++]]; \ double a = px[po[k-1]]; \ if(ret == 2 || wsum > h+eps) return a;/* h = sumw * Q must be > 0 here */\ wb = px[po[k]]; wsum = 2.0; \ while(pw[po[k]] == 0.0) { /* l should never be reached, I tested it */ \ wb += px[po[++k]]; ++wsum; \ } \ return (a + wb) / wsum; \ } \ wb = h + eps; \ while(wsum <= wb) wsum += pw[po[k++]]; \ if(ret == 3) return px[po[k-1]]; \ wsum -= pw[po[--k]]; \ h = k-1 + (h - wsum) / pw[po[k]]; \ RETWQADDM; \ int j = (int)h; h -= j; \ return (j >= l-1 || h < eps) ? px[po[j]] : (1 - h) * px[po[j]] + h * px[po[j+1]]; // This is the same, just that the result is assigned. Needed for quicksort based implementations // Does not require incremented pointers (depending on the content of i_cc) #undef WNTH_CORE_QSORT #define WNTH_CORE_QSORT \ double res, wsum = pw[i_cc[0]], wb; \ int k = 1; \ if(ret < 3) { /* lower (2), or average (1) element*/ \ while(wsum < h) wsum += pw[i_cc[k++]]; \ double a = x_cc[k-1]; \ if(ret == 2 || wsum > h+eps) res = a; /* h = sumw * Q must be > 0 here */ \ else { \ wb = x_cc[k]; wsum = 2.0; \ while(pw[i_cc[k]] == 0.0) { /* n should never be reached, I tested it */ \ wb += x_cc[++k]; ++wsum; \ } \ res = (a + wb) / wsum; \ } \ } else { \ wb = h + eps; \ while(wsum <= wb) wsum += pw[i_cc[k++]]; \ if(ret == 3) { \ res = x_cc[k-1]; \ } else { \ wsum -= pw[i_cc[--k]]; \ h = k-1 + (h - wsum) / pw[i_cc[k]]; \ RETWQADDM; \ int j = (int)h; h -= j; \ res = (j >= n-1 || h < eps) ? x_cc[j] : (1 - h) * x_cc[j] + h * x_cc[j+1]; \ } \ } // Finally, in the default vector method: also provide the option to pass an ordering vector of x, even without weights // if the groups are unsorted, po needs to be recomputed to provide the ordering within groups // Expects pointer px to be decremented by 1 #undef NTH_ORDVEC #define NTH_ORDVEC \ double a, b, h = 0.0; /* To avoid -Wmaybe-uninitialized */ \ RETQSWITCH(l); \ int ih = h; a = px[po[ih]]; h -= ih; \ if((ret < 4 && (ret != 1 || l%2 == 1)) || ih == l-1 || h <= 0.0) return a; \ b = px[po[ih+1]]; \ return (ret == 1) ? (a+b)/2.0 : a + h * (b - a); // || Q == 0.5 // C-implementations for different data types, parallelizable ---------------------------------- double nth_int(const int *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret, const double Q) { if(l <= 1) return l == 0 ? NA_REAL : sorted ? (double)px[0] : (double)px[po[0]-1]; int *x_cc = (int *) R_Calloc(l, int), n = 0; if(sorted) { // if(narm) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER) x_cc[n++] = px[i]; // } else { // n = l; // memcpy(x_cc, px, l * sizeof(int)); // } } else { const int *pxm = px-1; // creating offset pointer to x // if(narm) { for(int i = 0; i != l; ++i) if(pxm[po[i]] != NA_INTEGER) x_cc[n++] = pxm[po[i]]; // } else { // n = l; // for(int i = 0; i != l; ++i) x_cc[i] = pxm[po[i]]; // } } double res = (narm == 0 && n != l) ? NA_REAL : iquickselect(x_cc, n, ret, Q); R_Free(x_cc); return res; } double nth_int_noalloc(const int *restrict px, const int *restrict po, int *x_cc, const int l, const int sorted, const int narm, const int ret, const double Q) { if(l <= 1) return l == 0 ? NA_REAL : sorted ? (double)px[0] : (double)px[po[0]-1]; int n = 0; if(sorted) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER) x_cc[n++] = px[i]; } else { const int *pxm = px-1; // creating offset pointer to x for(int i = 0; i != l; ++i) if(pxm[po[i]] != NA_INTEGER) x_cc[n++] = pxm[po[i]]; } return (narm == 0 && n != l) ? NA_REAL : iquickselect(x_cc, n, ret, Q); } double nth_double(const double *restrict px, const int *restrict po, const int l, const int sorted, const int narm, const int ret, const double Q) { if(l <= 1) return l == 0 ? NA_REAL : sorted ? px[0] : px[po[0]-1]; double *x_cc = (double *) R_Calloc(l, double); int n = 0; if(sorted) { // if(narm) { for(int i = 0; i != l; ++i) if(NISNAN(px[i])) x_cc[n++] = px[i]; // } else { // n = l; // memcpy(x_cc, px, l * sizeof(double)); // } } else { const double *pxm = px-1; // if(narm) { for(int i = 0; i != l; ++i) if(NISNAN(pxm[po[i]])) x_cc[n++] = pxm[po[i]]; // } else { // n = l; // for(int i = 0; i != l; ++i) x_cc[i] = pxm[po[i]]; // } } double res = (narm == 0 && n != l) ? NA_REAL : dquickselect(x_cc, n, ret, Q); R_Free(x_cc); return res; } double nth_double_noalloc(const double *restrict px, const int *restrict po, double *x_cc, const int l, const int sorted, const int narm, const int ret, const double Q) { if(l <= 1) return l == 0 ? NA_REAL : sorted ? px[0] : px[po[0]-1]; int n = 0; if(sorted) { for(int i = 0; i != l; ++i) if(NISNAN(px[i])) x_cc[n++] = px[i]; } else { const double *pxm = px-1; for(int i = 0; i != l; ++i) if(NISNAN(pxm[po[i]])) x_cc[n++] = pxm[po[i]]; } return (narm == 0 && n != l) ? NA_REAL : dquickselect(x_cc, n, ret, Q); } // Expects pointer px to be decremented by 1 double nth_int_ord(const int *restrict px, const int *restrict po, int l, const int narm, const int ret, const double Q) { if(l <= 1) return l == 0 ? NA_REAL : (double)px[po[0]]; if(narm) { // Adjusting l as necessary... initial NA check done in fnthC() while(l != 0 && px[po[l-1]] == NA_INTEGER) --l; if(l <= 1) return l == 0 ? NA_REAL : (double)px[po[0]]; } else if(px[po[l-1]] == NA_INTEGER) return NA_REAL; NTH_ORDVEC; } // Expects pointer px to be decremented by 1 double nth_double_ord(const double *restrict px, const int *restrict po, int l, const int narm, const int ret, const double Q) { if(l <= 1) return l == 0 ? NA_REAL : px[po[0]]; if(narm) { // Adjusting l as necessary... initial NA check done in fnthC() while(l != 0 && ISNAN(px[po[l-1]])) --l; if(l <= 1) return l == 0 ? NA_REAL : px[po[0]]; } else if(ISNAN(px[po[l-1]])) return NA_REAL; NTH_ORDVEC; } // Expects pointers px and pw to be decremented by 1 double w_nth_int_ord(const int *restrict px, const double *restrict pw, const int *restrict po, double h, int l, const int narm, const int ret, const double Q) { if(l <= 1) { if(l == 0) return NA_REAL; return ISNAN(pw[po[0]]) ? NA_REAL : (double)px[po[0]]; } if(narm) { // Adjusting l as necessary... initial NA check done in fnthC() while(l != 0 && px[po[l-1]] == NA_INTEGER) --l; if(l <= 1) return (l == 0 || ISNAN(pw[po[0]])) ? NA_REAL : (double)px[po[0]]; } else if(px[po[l-1]] == NA_INTEGER) return NA_REAL; if(h == DBL_MIN) h = w_compute_h(pw, po, l, 0, Q); if(ISNAN(h)) return NA_REAL; WNTH_CORE; } // Expects pointers px and pw to be decremented by 1 double w_nth_double_ord(const double *restrict px, const double *restrict pw, const int *restrict po, double h, int l, const int narm, const int ret, const double Q) { if(l <= 1) { if(l == 0) return NA_REAL; return ISNAN(pw[po[0]]) ? NA_REAL : px[po[0]]; } if(narm) { // Adjusting l as necessary... initial NA check done in fnthC() while(l != 0 && ISNAN(px[po[l-1]])) --l; if(l <= 1) return (l == 0 || ISNAN(pw[po[0]])) ? NA_REAL : px[po[0]]; } else if(ISNAN(px[po[l-1]])) return NA_REAL; if(h == DBL_MIN) h = w_compute_h(pw, po, l, 0, Q); if(ISNAN(h)) return NA_REAL; WNTH_CORE; } // Quicksort versions: only for grouped execution (too slow on bigger vectors compared to radix sort) // Expects pointer pw to be decremented by 1 if sorted == 0 double w_nth_int_qsort(const int *restrict px, const double *restrict pw, const int *restrict po, double h, const int l, const int sorted, const int narm, const int ret, const double Q) { if(l <= 1) { if(l == 0) return NA_REAL; if(sorted) return ISNAN(pw[0]) ? NA_REAL : (double)px[0]; return ISNAN(pw[po[0]]) ? NA_REAL : (double)px[po[0]-1]; } int *x_cc = (int *) R_Calloc(l, int), *i_cc = (int *) R_Calloc(l, int), n = 0; // TODO: alloc i_cc afterwards if narm ?? if(sorted) { // both the pointers to x and w need to be suitably incremented for grouped execution. // if(narm) { for(int i = 0; i != l; ++i) { if(px[i] != NA_INTEGER) { i_cc[n] = i; x_cc[n++] = px[i]; } } // } else { // n = l; // for(int i = 0; i != l; ++i) { // i_cc[i] = i; // x_cc[i] = px[i]; // } // } } else { const int *pxm = px-1; // if(narm) { for(int i = 0; i != l; ++i) { if(pxm[po[i]] != NA_INTEGER) { i_cc[n] = po[i]; x_cc[n++] = pxm[po[i]]; } } // } else { // n = l; // for(int i = 0; i != l; ++i) { // i_cc[i] = po[i]; // x_cc[i] = pxm[po[i]]; // } // } } if(narm == 0 && n != l) { R_Free(x_cc); R_Free(i_cc); return NA_REAL; } // i_cc is one-indexed R_qsort_int_I(x_cc, i_cc, 1, n); if(h == DBL_MIN) h = w_compute_h(pw, i_cc, n, 0, Q); if(ISNAN(h)) { R_Free(x_cc); R_Free(i_cc); return NA_REAL; } WNTH_CORE_QSORT; R_Free(x_cc); R_Free(i_cc); return res; } // Expects pointer pw to be decremented by 1 if sorted == 0 double w_nth_double_qsort(const double *restrict px, const double *restrict pw, const int *restrict po, double h, const int l, const int sorted, const int narm, const int ret, const double Q) { if(l <= 1) { if(l == 0) return NA_REAL; if(sorted) return ISNAN(pw[0]) ? NA_REAL : px[0]; return ISNAN(pw[po[0]]) ? NA_REAL : px[po[0]-1]; } double *x_cc = (double *) R_Calloc(l, double); int *i_cc = (int *) R_Calloc(l, int), n = 0; // TODO: alloc afterwards if narm ?? if(sorted) { // if(narm) { for(int i = 0; i != l; ++i) { if(NISNAN(px[i])) { i_cc[n] = i; x_cc[n++] = px[i]; } } // } else { // n = l; // for(int i = 0; i != l; ++i) { // i_cc[i] = i; // x_cc[i] = px[i]; // } // } } else { const double *pxm = px-1; // if(narm) { for(int i = 0; i != l; ++i) { if(NISNAN(pxm[po[i]])) { i_cc[n] = po[i]; x_cc[n++] = pxm[po[i]]; } } // } else { // n = l; // for(int i = 0; i != l; ++i) { // i_cc[i] = po[i]; // x_cc[i] = pxm[po[i]]; // } // } } if(narm == 0 && n != l) { R_Free(x_cc); R_Free(i_cc); return NA_REAL; } // i_cc is one-indexed R_qsort_I(x_cc, i_cc, 1, n); if(h == DBL_MIN) h = w_compute_h(pw, i_cc, n, 0, Q); if(ISNAN(h)) { R_Free(x_cc); R_Free(i_cc); return NA_REAL; } WNTH_CORE_QSORT; R_Free(x_cc); R_Free(i_cc); return res; } // Implementations for R vectors --------------------------------------------------------------- // for safe multithreading in fnthlC() SEXP nth_impl_plain(SEXP x, int narm, int ret, double Q) { int l = length(x); if(l <= 1) return x; switch(TYPEOF(x)) { case REALSXP: return ScalarReal(nth_double(REAL(x), &l, l, 1, narm, ret, Q)); case INTSXP: case LGLSXP: return ScalarReal(nth_int(INTEGER(x), &l, l, 1, narm, ret, Q)); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } SEXP nth_impl(SEXP x, int narm, int ret, double Q) { if(length(x) <= 1) return x; if(!ANY_ATTRIB(x) || (isObject(x) && inherits(x, "ts"))) return nth_impl_plain(x, narm, ret, Q); SEXP res = PROTECT(nth_impl_plain(x, narm, ret, Q)); copyMostAttrib(x, res); UNPROTECT(1); return res; } // for safe multithreading in fnthlC() double nth_impl_dbl(SEXP x, int narm, int ret, double Q) { int l = length(x); if(l < 1) return NA_REAL; switch(TYPEOF(x)) { case REALSXP: return nth_double(REAL(x), &l, l, 1, narm, ret, Q); case INTSXP: case LGLSXP: return nth_int(INTEGER(x), &l, l, 1, narm, ret, Q); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } // for safe multithreading in fnthlC() SEXP nth_impl_noalloc_plain(SEXP x, void* x_cc, int narm, int ret, double Q) { int l = length(x); if(l <= 1) return x; switch(TYPEOF(x)) { case REALSXP: return ScalarReal(nth_double_noalloc(REAL(x), &l, x_cc, l, 1, narm, ret, Q)); case INTSXP: case LGLSXP: return ScalarReal(nth_int_noalloc(INTEGER(x), &l, x_cc, l, 1, narm, ret, Q)); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } double nth_impl_noalloc_dbl(SEXP x, void* x_cc, int narm, int ret, double Q) { int l = length(x); if(l < 1) return NA_REAL; switch(TYPEOF(x)) { case REALSXP: return nth_double_noalloc(REAL(x), &l, x_cc, l, 1, narm, ret, Q); case INTSXP: case LGLSXP: return nth_int_noalloc(INTEGER(x), &l, x_cc, l, 1, narm, ret, Q); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } SEXP nth_ord_impl(SEXP x, int *pxo, int narm, int ret, double Q) { int l = length(x); if(l <= 1) return x; SEXP res; switch(TYPEOF(x)) { case REALSXP: res = ScalarReal(nth_double_ord(REAL(x)-1, pxo, l, narm, ret, Q)); break; case INTSXP: case LGLSXP: res = ScalarReal(nth_int_ord(INTEGER(x)-1, pxo, l, narm, ret, Q)); break; default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } if(!ANY_ATTRIB(x) || (isObject(x) && inherits(x, "ts"))) return res; PROTECT(res); // Needed ?? copyMostAttrib(x, res); UNPROTECT(1); return res; } // Expects pointer pw to be decremented by 1 SEXP w_nth_ord_impl_plain(SEXP x, int *pxo, double *pw, int narm, int ret, double Q, double h) { int l = length(x); if(l <= 1) return x; switch(TYPEOF(x)) { case REALSXP: return ScalarReal(w_nth_double_ord(REAL(x)-1, pw, pxo, h, l, narm, ret, Q)); case INTSXP: case LGLSXP: return ScalarReal(w_nth_int_ord(INTEGER(x)-1, pw, pxo, h, l, narm, ret, Q)); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } // Expects pointer pw to be decremented by 1 SEXP w_nth_ord_impl(SEXP x, int *pxo, double *pw, int narm, int ret, double Q, double h) { if(length(x) <= 1) return x; if(!ANY_ATTRIB(x) || (isObject(x) && inherits(x, "ts"))) return w_nth_ord_impl_plain(x, pxo, pw, narm, ret, Q, h); SEXP res = PROTECT(w_nth_ord_impl_plain(x, pxo, pw, narm, ret, Q, h)); copyMostAttrib(x, res); UNPROTECT(1); return res; } // Expects pointer pw to be decremented by 1 double w_nth_ord_impl_dbl(SEXP x, int *pxo, double *pw, int narm, int ret, double Q, double h) { int l = length(x); if(l < 1) return NA_REAL; switch(TYPEOF(x)) { case REALSXP: return w_nth_double_ord(REAL(x)-1, pw, pxo, h, l, narm, ret, Q); case INTSXP: case LGLSXP: return w_nth_int_ord(INTEGER(x)-1, pw, pxo, h, l, narm, ret, Q); default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } // Expects pointer po to be decremented by 1 SEXP nth_g_impl(SEXP x, int ng, int *pgs, int *po, int *pst, int sorted, int narm, int ret, double Q, int nthreads) { if(nthreads > ng) nthreads = ng; // TODO: if nthreads = 1, pass x_cc array of size maxgrpn repeatedly to the functions!! SEXP res = PROTECT(allocVector(REALSXP, ng)); double *pres = REAL(res); if(sorted) { // Sorted: could compute cumulative group size (= starts) on the fly... but doesn't work multithreaded... switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = nth_double(px + pst[gr], po, pgs[gr], 1, narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = nth_int(px + pst[gr], po, pgs[gr], 1, narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } else { // Not sorted. Perhaps reordering x is faster? switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = nth_double(px, po + pst[gr], pgs[gr], 0, narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = nth_int(px, po + pst[gr], pgs[gr], 0, narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } // Expects pointer po to be decremented by 1 SEXP nth_g_impl_noalloc(SEXP x, int ng, int *pgs, int *po, int *pst, int sorted, int narm, int ret, double Q, void* x_cc) { SEXP res = PROTECT(allocVector(REALSXP, ng)); double *pres = REAL(res); if(sorted) { switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x)-1; for(int gr = 0; gr != ng; ++gr) pres[gr] = nth_double_noalloc(px + pst[gr], po, x_cc, pgs[gr], 1, narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1; for(int gr = 0; gr != ng; ++gr) pres[gr] = nth_int_noalloc(px + pst[gr], po, x_cc, pgs[gr], 1, narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } else { switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); for(int gr = 0; gr != ng; ++gr) pres[gr] = nth_double_noalloc(px, po + pst[gr], x_cc, pgs[gr], 0, narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); for(int gr = 0; gr != ng; ++gr) pres[gr] = nth_int_noalloc(px, po + pst[gr], x_cc, pgs[gr], 0, narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } // Expects pointer po to be decremented by 1 SEXP nth_g_ord_impl(SEXP x, int ng, int *pgs, int *po, int *pst, int narm, int ret, double Q, int nthreads) { if(nthreads > ng) nthreads = ng; SEXP res = PROTECT(allocVector(REALSXP, ng)); double *pres = REAL(res); switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = nth_double_ord(px, po + pst[gr], pgs[gr], narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = nth_int_ord(px, po + pst[gr], pgs[gr], narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } // Expects pointers pw and po to be decremented by 1 SEXP w_nth_g_ord_impl(SEXP x, double *pw, int ng, int *pgs, int *po, int *pst, int narm, int ret, double Q, int nthreads) { if(nthreads > ng) nthreads = ng; SEXP res = PROTECT(allocVector(REALSXP, ng)); double *pres = REAL(res); switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = w_nth_double_ord(px, pw, po + pst[gr], DBL_MIN, pgs[gr], narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = w_nth_int_ord(px, pw, po + pst[gr], DBL_MIN, pgs[gr], narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } // Expects pointers pw and po to be decremented by 1 SEXP w_nth_g_qsort_impl(SEXP x, double *pw, int ng, int *pgs, int *po, int *pst, int sorted, int narm, int ret, double Q, int nthreads) { if(nthreads > ng) nthreads = ng; SEXP res = PROTECT(allocVector(REALSXP, ng)); double *pres = REAL(res); if(sorted) { // sorted by groups: need to offset both px and pw switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = w_nth_double_qsort(px + pst[gr], pw + pst[gr], po, DBL_MIN, pgs[gr], 1, narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1; #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = w_nth_int_qsort(px + pst[gr], pw + pst[gr], po, DBL_MIN, pgs[gr], 1, narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } else { switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = w_nth_double_qsort(px, pw, po + pst[gr], DBL_MIN, pgs[gr], 0, narm, ret, Q); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); #pragma omp parallel for num_threads(nthreads) for(int gr = 0; gr < ng; ++gr) pres[gr] = w_nth_int_qsort(px, pw, po + pst[gr], DBL_MIN, pgs[gr], 0, narm, ret, Q); break; } default: error("Not Supported SEXP Type: '%s'", type2char(TYPEOF(x))); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } // Functions for Export -------------------------------------------------------- int Rties2int(SEXP x) { int tx = TYPEOF(x); if(tx == INTSXP || tx == REALSXP || tx == LGLSXP) { int ret = asInteger(x); if(ret < 1 || ret > 9) error("ties must be 1-9, you supplied: %d", ret); return ret; } if(tx != STRSXP) error("ties must be integer or character"); const char * r = CHAR(STRING_ELT(x, 0)); // translateCharUTF8() if(strcmp(r, "mean") == 0) return 1; if(strcmp(r, "min") == 0) return 2; if(strcmp(r, "max") == 0) return 3; if(strcmp(r, "q4") == 0) return 4; if(strcmp(r, "q5") == 0) return 5; if(strcmp(r, "q6") == 0) return 6; if(strcmp(r, "q7") == 0) return 7; if(strcmp(r, "q8") == 0) return 8; if(strcmp(r, "q9") == 0) return 9; error("Unknown ties option: %s", r); } #undef CHECK_PROB #define CHECK_PROB(l) \ if(length(p) != 1) error("fnth supports only a single element / quantile. Use fquantile for multiple quantiles."); \ double Q = asReal(p); \ if(ISNAN(Q) || Q <= 0.0 || Q == 1.0) error("n needs to be between 0 and 1, or between 1 and length(x). Use fmin and fmax for minima and maxima."); \ if(Q > 1.0) { \ ret = 2; /* ties = "min" */ \ if(nullg) { \ if(Q >= l) error("n needs to be between 0 and 1, or between 1 and length(x). Use fmin and fmax for minima and maxima."); \ Q = (Q-1.0)/(l-1); \ } else { \ if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); \ int ng = INTEGER(VECTOR_ELT(g, 0))[0]; \ if(Q >= (double)l/ng) error("n needs to be between 0 and 1, or between 1 and the length(x)/ng, with ng the number of groups. Use fmin and fmax for minima and maxima."); \ Q = (Q-1.0)/((double)l/ng-1.0); \ } \ } #undef CHECK_WEIGHTS #define CHECK_WEIGHTS(l) \ if(length(w) != l) error("length(w) must match length(x)"); \ if(TYPEOF(w) != REALSXP) { \ if(!(TYPEOF(w) == INTSXP || TYPEOF(w) == LGLSXP)) error("weights need to be double or integer/logical (internally coerced to double). You supplied a vector of type: '%s'", type2char(TYPEOF(w))); \ w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; \ } \ pw = REAL(w)-1; /* All functions require decremented w pointer */ #undef CHECK_GROUPS #define CHECK_GROUPS(nrx, cond) \ if(TYPEOF(g) != VECSXP || !inherits(g, "GRP")) error("g needs to be an object of class 'GRP', see ?GRP"); \ const SEXP *restrict pg = SEXPPTR_RO(g), ord = pg[6]; \ ng = INTEGER(pg[0])[0]; \ int sorted = LOGICAL(pg[5])[1] == 1, *restrict pgs = INTEGER(pg[2]), *restrict po, *restrict pst, maxgrpn = 0; \ if(nrx != length(pg[1])) error("length(g) must match nrow(x)"); \ if(isNull(ord)) { \ int *cgs = (int *) R_alloc(ng+2, sizeof(int)), *restrict pgv = INTEGER(pg[1]); cgs[1] = 1; \ if(nthreads <= 1 && nullw) { \ for(int i = 0; i != ng; ++i) { \ if(pgs[i] > maxgrpn) maxgrpn = pgs[i]; \ cgs[i+2] = cgs[i+1] + pgs[i]; \ } \ } else { \ for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; \ } \ pst = cgs + 1; \ if((cond)) po = &l; \ else { \ int *restrict count = (int *) R_Calloc(ng+1, int); \ po = (int *) R_alloc(nrx, sizeof(int)); --po; \ for(int i = 0; i != nrx; ++i) po[cgs[pgv[i]] + count[pgv[i]]++] = i+1; \ R_Free(count); \ } \ } else { \ po = INTEGER(ord)-1; \ pst = INTEGER(getAttrib(ord, sym_starts)); \ if(nthreads <= 1 && nullw) maxgrpn = asInteger(getAttrib(ord, sym_maxgrpn)); \ } /* Function for atomic vectors: has extra arguments o and checko for passing external ordering vector. This is meant to speed up computation of several (grouped) quantiles on the same data. Note that for grouped execution the ordering vector needs to take into account the grouping e.g. radixorder(GRPid(), myvar). */ SEXP fnthC(SEXP x, SEXP p, SEXP g, SEXP w, SEXP Rnarm, SEXP Rret, SEXP Rnthreads, SEXP o, SEXP checko) { int nullg = isNull(g), nullw = isNull(w), nullo = isNull(o), l = length(x), narm = asLogical(Rnarm), ret = Rties2int(Rret), nprotect = 0; CHECK_PROB(l); // if(l < 1) return x; if(l < 1 || (l == 1 && nullw)) return TYPEOF(x) == REALSXP ? x : l < 1 ? allocVector(REALSXP, 0) : ScalarReal(asReal(x)); // First the simplest case if(nullg && nullw && nullo) return nth_impl(x, narm, ret, Q); // Creating pointers that may or may not be needed double *pw = &Q; int *pxo = &l; // Preprocessing o if(!nullo) { if(length(o) != l || TYPEOF(o) != INTSXP) error("o must be a valid ordering vector, of the same length as x and type integer"); pxo = INTEGER(o); if(asLogical(checko)) { // TODO: Better way? for(unsigned int i = 0; i != l; ++i) if(pxo[i] < 1 || pxo[i] > l) error("Some elements in o are outside of range [1, length(x)]"); } if((TYPEOF(x) == REALSXP && ISNAN(REAL(x)[pxo[0]-1])) || ((TYPEOF(x) == INTSXP || TYPEOF(x) == LGLSXP) && INTEGER(x)[pxo[0]-1] == NA_INTEGER)) error("Found missing value at the beginning of the sample. Please use option na.last = TRUE (the default) when creasting ordering vectors for use with fnth()."); } // Preprocessing w, computing ordering of x if not supplied if(!nullw) { CHECK_WEIGHTS(l); if(l == 1) { UNPROTECT(nprotect); if(ISNAN(pw[1])) return ScalarReal(NA_REAL); return TYPEOF(x) == REALSXP ? x : ScalarReal(asReal(x)); } if(nullo && nullg) { // for grouped execution use w_nth_g_qsort_impl() if o is not supplied. // nullo = 0; pxo = (int *) R_alloc(l, sizeof(int)); num1radixsort(pxo, TRUE, FALSE, x); } } // If no groups, return using suitable functions if(nullg) { SEXP res; // result, could be put outside if() to avoid repetition below, but this seems to confuse rchk if(nullw) res = nth_ord_impl(x, pxo, narm, ret, Q); else res = w_nth_ord_impl(x, pxo, pw, narm, ret, Q, DBL_MIN); UNPROTECT(nprotect); return res; } int nthreads = asInteger(Rnthreads), ng; if(nthreads > max_threads) nthreads = max_threads; // Preprocessing g CHECK_GROUPS(l, sorted || !nullo); /* * Previous version: computes po if overall ordering of x is supplied to o. This is made redundant by requiring * the ordering o to now take into account the grouping (facilitated by R-level helper GRPid()), which provides * much greater speedup for repeated executions, and by the addition of w_nth_g_qsort_impl(). * if((!nullw && nullo) || isNull(ord)) { // Extra case: if ordering vector supplied, need to use it to get the group elements in order int *restrict pgv = INTEGER(pg[1]); if(isNull(ord)) { int *cgs = (int *) R_alloc(ng+2, sizeof(int)); cgs[1] = 1; for(int i = 0; i != ng; ++i) cgs[i+2] = cgs[i+1] + pgs[i]; // TODO: get maxgrpn? pst = cgs; } else pst = INTEGER(getAttrib(ord, sym_starts))-1; if(nullw && sorted) po = &l; else { int *restrict count = (int *) R_Calloc(ng+1, int); po = (int *) R_alloc(l, sizeof(int)); --po; if(nullw) { for(int i = 0; i != l; ++i) po[pst[pgv[i]] + count[pgv[i]]++] = i+1; } else { // This orders the elements of x within groups... e.g. starting with the first group, the indices of all elements of x in order, then the second group etc. --pgv; for(int i = 0, tmp; i != l; ++i) { tmp = pgv[pxo[i]]; po[pst[tmp] + count[tmp]++] = pxo[i]; } } R_Free(count); } ++pst; } */ SEXP res; // result if(nullw && nullo) res = nthreads <= 1 ? nth_g_impl_noalloc(x, ng, pgs, po, pst, sorted, narm, ret, Q, R_alloc(maxgrpn, TYPEOF(x) == REALSXP ? sizeof(double) : sizeof(int))) : nth_g_impl(x, ng, pgs, po, pst, sorted, narm, ret, Q, nthreads); else if(nullw) res = nth_g_ord_impl(x, ng, pgs, pxo-1, pst, narm, ret, Q, nthreads); else if(nullo) res = w_nth_g_qsort_impl(x, pw, ng, pgs, po, pst, sorted, narm, ret, Q, nthreads); else res = w_nth_g_ord_impl(x, pw, ng, pgs, pxo-1, pst, narm, ret, Q, nthreads); UNPROTECT(nprotect); return res; } #undef COLWISE_NTH_LIST #define COLWISE_NTH_LIST(FUN_NA, FUN, WFUN) \ if(nullw) { \ if(nthreads == 1) { \ void *x_cc = R_Calloc(nrx, double); \ for(int j = 0; j != l; ++j) pout[j] = FUN_NA(px[j], x_cc, narm, ret, Q); \ R_Free(x_cc); \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < l; ++j) pout[j] = FUN(px[j], narm, ret, Q); \ } \ } else { /* TODO: if narm = FALSE, can compute sumw beforehand */ \ int *pxo = (int *) R_alloc(nrx, sizeof(int)); \ for(int j = 0; j != l; ++j) { \ num1radixsort(pxo, TRUE, FALSE, px[j]); \ pout[j] = WFUN(px[j], pxo, pw, narm, ret, Q, h); \ } \ } /* Multithreading: does not work with radixorder * } else { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) { int *pxo = (int *) R_Calloc(nrx, int); // num1radixsort(pxo, TRUE, FALSE, px[j]); // Probably cannot be parallelized, can try R_orderVector1() // R_orderVector1(pxo, nrx, px[j], TRUE, FALSE); // Also not thread safe, and also 0-indexed. // for(int i = 0; i < nrx; ++i) pxo[i] += 1; pout[j] = w_nth_ord_impl_dbl(px[j], pxo, pw, narm, ret, Q, h); R_Free(pxo); } } */ // TODO: Pre-compute weights at the group-level if narm = FALSE for list and matrix method // Function for lists / data frames SEXP fnthlC(SEXP x, SEXP p, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rret, SEXP Rnthreads) { int nullg = isNull(g), nullw = isNull(w), l = length(x), ng = 0, nprotect = 1, narm = asLogical(Rnarm), drop = asLogical(Rdrop), ret = Rties2int(Rret), nthreads = asInteger(Rnthreads); if(l < 1) return x; if(nthreads > max_threads) nthreads = max_threads; SEXP out = PROTECT(allocVector(nullg && drop ? REALSXP : VECSXP, l)); const SEXP *restrict px = SEXPPTR_RO(x); int nrx = length(px[0]); CHECK_PROB(nrx); double *restrict pw = &Q, h = DBL_MIN; if(!nullw) { CHECK_WEIGHTS(nrx); if(nullg && !narm) h = w_compute_h(pw+1, &l, nrx, 1, Q); // if no missing value removal, h is the same for all columns } if(nullg) { // No groups, multithreading across columns if(nthreads > l) nthreads = l; if(drop) { // drop dimensions (return vector) double *restrict pout = REAL(out); COLWISE_NTH_LIST(nth_impl_noalloc_dbl, nth_impl_dbl, w_nth_ord_impl_dbl); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(nprotect); return out; } // returns a list of atomic elements SEXP *restrict pout = SEXPPTR(out); COLWISE_NTH_LIST(nth_impl_noalloc_plain, nth_impl_plain, w_nth_ord_impl_plain); // Needed because including it in an OpenMP loop together with ScalarReal() is not thread safe for(int j = 0; j != l; ++j) { SEXP xj = px[j]; if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, pout[j]); } } else { // with groups: do the usual checking CHECK_GROUPS(nrx, sorted); if(nullw) { // Parallelism at sub-column level if(nthreads <= 1) { void *x_cc = R_alloc(maxgrpn, sizeof(double)); for(int j = 0; j < l; ++j) SET_VECTOR_ELT(out, j, nth_g_impl_noalloc(px[j], ng, pgs, po, pst, sorted, narm, ret, Q, x_cc)); } else { for(int j = 0; j < l; ++j) SET_VECTOR_ELT(out, j, nth_g_impl(px[j], ng, pgs, po, pst, sorted, narm, ret, Q, nthreads)); } } else { // Parallelism at sub-column level for(int j = 0; j < l; ++j) SET_VECTOR_ELT(out, j, w_nth_g_qsort_impl(px[j], pw, ng, pgs, po, pst, sorted, narm, ret, Q, nthreads)); } } DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } // Iterate over matrix columns: for integers and doubles #undef COLWISE_NTH #define COLWISE_NTH(tdef, FUN, FUN_NA, WFUN, ORDFUN) \ if(nullw) { \ if(nthreads == 1) { \ tdef *x_cc = (tdef *) R_alloc(l, sizeof(tdef)); \ for(int j = 0; j < col; ++j) pres[j] = FUN_NA(px + j*l, &l, x_cc, l, 1, narm, ret, Q); \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < col; ++j) pres[j] = FUN(px + j*l, &l, l, 1, narm, ret, Q); \ } \ } else { \ /* if(nthreads == 1) { */ \ int *pxo = (int *) R_alloc(l, sizeof(int)); \ for(int j = 0; j < col; ++j) { \ ORDFUN(pxo, TRUE, FALSE, l, px + j*l); \ pres[j] = WFUN(px + j*l - 1, pw, pxo, h, l, narm, ret, Q); \ } \ } /* else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < col; ++j) { \ int *pxo = (int *) R_Calloc(l, int); \ ORDFUN(pxo, TRUE, FALSE, l, px + j*l); // Currently cannot be parallelized \ pres[j] = WFUN(px + j*l - 1, pw, pxo, h, l, narm, ret, Q); \ R_Free(pxo); \ } \ } \ } \ */ // The same by groups if data already sorted by groups. px and pw should be decremented by 1 #undef COLWISE_NTH_GROUPED_SORTED #define COLWISE_NTH_GROUPED_SORTED(tdef, FUN, FUN_NA, WFUN) \ if(nullw) { \ if(nthreads == 1) { \ tdef *x_cc = (tdef *) R_alloc(maxgrpn, sizeof(tdef)); \ for(int j = 0; j != col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr != ng; ++gr) \ pres[jng + gr] = FUN_NA(pxj + pst[gr], po, x_cc, pgs[gr], 1, narm, ret, Q); \ } \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr < ng; ++gr) \ pres[jng + gr] = FUN(pxj + pst[gr], po, pgs[gr], 1, narm, ret, Q); \ } \ } \ } else { \ if(nthreads == 1) { \ for(int j = 0; j != col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr != ng; ++gr) \ pres[jng + gr] = WFUN(pxj + pst[gr], pw + pst[gr], po, DBL_MIN, pgs[gr], 1, narm, ret, Q); \ } \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr < ng; ++gr) \ pres[jng + gr] = WFUN(pxj + pst[gr], pw + pst[gr], po, DBL_MIN, pgs[gr], 1, narm, ret, Q); \ } \ } \ } // The more general case. po should be decremented by 1. #undef COLWISE_NTH_GROUPED_UNSORTED #define COLWISE_NTH_GROUPED_UNSORTED(tdef, FUN, FUN_NA, WFUN) \ if(nullw) { \ if(nthreads == 1) { \ tdef *x_cc = (tdef *) R_alloc(maxgrpn, sizeof(tdef)); \ for(int j = 0; j != col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr != ng; ++gr) \ pres[jng + gr] = FUN_NA(pxj, po + pst[gr], x_cc, pgs[gr], 0, narm, ret, Q); \ } \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr < ng; ++gr) \ pres[jng + gr] = FUN(pxj, po + pst[gr], pgs[gr], 0, narm, ret, Q); \ } \ } \ } else { \ if(nthreads == 1) { \ for(int j = 0; j != col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr != ng; ++gr) \ pres[jng + gr] = WFUN(pxj, pw, po + pst[gr], DBL_MIN, pgs[gr], 0, narm, ret, Q); \ } \ } else { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < col; ++j) { \ int jng = j * ng; \ tdef *pxj = px + j * l; \ for(int gr = 0; gr < ng; ++gr) \ pres[jng + gr] = WFUN(pxj, pw, po + pst[gr], DBL_MIN, pgs[gr], 0, narm, ret, Q); \ } \ } \ } // Function for matrices: implemented at lower-level SEXP fnthmC(SEXP x, SEXP p, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rret, SEXP Rnthreads) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], narm = asLogical(Rnarm), ret = Rties2int(Rret), nthreads = asInteger(Rnthreads), nullg = isNull(g), nullw = isNull(w), nprotect = 1; if(nthreads > col) nthreads = col; if(nthreads > max_threads) nthreads = max_threads; CHECK_PROB(l); if(l < 1 || (l == 1 && nullw)) { if(TYPEOF(x) == REALSXP || TYPEOF(x) == INTSXP || TYPEOF(x) == LGLSXP) return x; error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } double *restrict pw = &Q, h = DBL_MIN; if(!nullw) { CHECK_WEIGHTS(l); if(nullg && !narm) h = w_compute_h(pw+1, &l, l, 1, Q); } if(nullg) { SEXP res = PROTECT(allocVector(REALSXP, col)); switch(tx) { case REALSXP: { double *px = REAL(x), *restrict pres = REAL(res); COLWISE_NTH(double, nth_double, nth_double_noalloc, w_nth_double_ord, dradixsort); break; } case INTSXP: case LGLSXP: { // Factor matrix not well defined object... int *px = INTEGER(x), *restrict pres = INTEGER(res); COLWISE_NTH(int, nth_int, nth_int_noalloc, w_nth_int_ord, iradixsort); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } matCopyAttr(res, x, Rdrop, /*ng=*/0); UNPROTECT(nprotect); return res; } // With groups int ng; CHECK_GROUPS(l, sorted); SEXP res = PROTECT(allocVector(REALSXP, col * ng)); if(sorted) { // Sorted switch(tx) { case REALSXP: { double *px = REAL(x)-1, *restrict pres = REAL(res); COLWISE_NTH_GROUPED_SORTED(double, nth_double, nth_double_noalloc, w_nth_double_qsort); break; } case INTSXP: case LGLSXP: { // Factor matrix not well defined object... int *px = INTEGER(x)-1, *restrict pres = INTEGER(res); COLWISE_NTH_GROUPED_SORTED(int, nth_int, nth_int_noalloc, w_nth_int_qsort); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } else { // Not sorted switch(tx) { case REALSXP: { double *px = REAL(x), *restrict pres = REAL(res); COLWISE_NTH_GROUPED_UNSORTED(double, nth_double, nth_double_noalloc, w_nth_double_qsort); break; } case INTSXP: case LGLSXP: { // Factor matrix not well defined object... int *px = INTEGER(x), *restrict pres = INTEGER(res); COLWISE_NTH_GROUPED_UNSORTED(int, nth_int, nth_int_noalloc, w_nth_int_qsort); break; } default: error("Not Supported SEXP Type: '%s'", type2char(tx)); } } matCopyAttr(res, x, Rdrop, ng); UNPROTECT(nprotect); return res; } collapse/src/fsum.c0000644000176200001440000010727215202367344014000 0ustar liggesusers#include "collapse_c.h" // #include #define N_ACC 4 // number of accumulators double fsum_double_impl(const double *restrict px, const int narm, const int l) { double sum, partial_sums[N_ACC] = {0.0}; int remainder; if(narm == 1) { int j = 1; sum = px[0]; while(ISNAN(sum) && j!=l) sum = px[j++]; if(j != l) { remainder = j + (l - j) % N_ACC; for(int i = j; i < remainder; ++i) sum += NISNAN(px[i]) ? px[i] : 0.0; #pragma omp simd reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += NISNAN(px[i + k]) ? px[i + k] : 0.0; } for(int k = 0; k < N_ACC; ++k) sum += partial_sums[k]; } } else { sum = 0; remainder = l % N_ACC; if(narm) { for(int i = 0; i < remainder; ++i) sum += NISNAN(px[i]) ? px[i] : 0.0; #pragma omp simd reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += NISNAN(px[i + k]) ? px[i + k] : 0.0; } } else { // Should just be fast, don't stop for NA's for(int i = 0; i < remainder; ++i) sum += px[i]; #pragma omp simd reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += px[i + k]; } } for(int k = 0; k < N_ACC; ++k) sum += partial_sums[k]; } return sum; } void fsum_double_g_impl(double *restrict pout, const double *restrict px, const int ng, const int *restrict pg, const int narm, const int l) { if(narm == 1) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) continue; // faster way to code this ? -> Not Bad at all if(ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; else pout[pg[i]] += px[i]; } } else { memset(pout, 0, sizeof(double) * ng); --pout; if(narm == 2) { for(int i = 0; i != l; ++i) if(NISNAN(px[i])) pout[pg[i]] += px[i]; } else { for(int i = 0; i != l; ++i) pout[pg[i]] += px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } double fsum_double_omp_impl(const double *restrict px, const int narm, const int l, const int nthreads) { double sum, partial_sums[N_ACC] = {0.0}; int remainder; if(narm) { int j = 1; sum = px[0]; while(ISNAN(sum) && j != l) sum = px[j++]; if(j != l) { remainder = j + (l - j) % N_ACC; for(int i = j; i < remainder; ++i) sum += NISNAN(px[i]) ? px[i] : 0.0; #pragma omp parallel for simd num_threads(nthreads) reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += NISNAN(px[i + k]) ? px[i + k] : 0.0; } for(int k = 0; k < N_ACC; ++k) sum += partial_sums[k]; } else if(narm == 2) sum = 0.0; } else { sum = 0; remainder = l % N_ACC; for(int i = 0; i < remainder; ++i) sum += px[i]; #pragma omp parallel for simd num_threads(nthreads) reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += px[i + k]; } for(int k = 0; k < N_ACC; ++k) sum += partial_sums[k]; } return sum; } // This is unsafe... // void fsum_double_g_omp_impl(double *restrict pout, double *restrict px, int ng, int *restrict pg, int narm, int l, int nthreads) { // if(narm) { // for(int i = ng; i--; ) pout[i] = NA_REAL; // #pragma omp parallel for num_threads(nthreads) reduction(+:pout[:ng]) // for(int i = 0; i < l; ++i) { // if(!ISNAN(px[i])) { // if(ISNAN(pout[pg[i]-1])) pout[pg[i]-1] = px[i]; // else pout[pg[i]-1] += px[i]; // } // } // } else { // memset(pout, 0, sizeof(double) * ng); // #pragma omp parallel for num_threads(nthreads) reduction(+:pout[:ng]) // shared(pout) // for(int i = 0; i < l; ++i) { // // #pragma omp atomic // pout[pg[i]-1] += px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. // } // } // } double fsum_weights_impl(const double *restrict px, const double *restrict pw, const int narm, const int l) { double sum; if(narm == 1) { int j = 0, end = l-1; while((ISNAN(px[j]) || ISNAN(pw[j])) && j!=end) ++j; sum = px[j] * pw[j]; if(j != end) { #pragma omp simd reduction(+:sum) for(int i = j+1; i < l; ++i) sum += (NISNAN(px[i]) && NISNAN(pw[i])) ? px[i] * pw[i] : 0.0; } } else { sum = 0; if(narm) { #pragma omp simd reduction(+:sum) for(int i = 0; i < l; ++i) sum += (NISNAN(px[i]) && NISNAN(pw[i])) ? px[i] * pw[i] : 0.0; } else { // Also here speed is key... double partial_sums[N_ACC] = {0.0}; const int remainder = l % N_ACC; for(int i = 0; i < remainder; ++i) sum += px[i] * pw[i]; #pragma omp simd reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += px[i + k] * pw[i + k]; } for(int k = 0; k < N_ACC; ++k) sum += partial_sums[k]; } } return sum; } void fsum_weights_g_impl(double *restrict pout, const double *restrict px, const int ng, const int *restrict pg, const double *restrict pw, const int narm, const int l) { if(narm == 1) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) { if(ISNAN(px[i]) || ISNAN(pw[i])) continue; if(ISNAN(pout[pg[i]])) pout[pg[i]] = px[i] * pw[i]; else pout[pg[i]] += px[i] * pw[i]; } } else { memset(pout, 0, sizeof(double) * ng); --pout; if(narm == 2) { for(int i = l; i--; ) if(NISNAN(px[i]) && NISNAN(pw[i])) pout[pg[i]] += px[i] * pw[i]; } else { for(int i = l; i--; ) pout[pg[i]] += px[i] * pw[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } double fsum_weights_omp_impl(const double *restrict px, const double *restrict pw, const int narm, const int l, const int nthreads) { double sum; if(narm) { int j = 0; while(j!=l && (ISNAN(px[j]) || ISNAN(pw[j]))) ++j; if(j != l) { sum = px[j] * pw[j]; #pragma omp parallel for simd num_threads(nthreads) reduction(+:sum) for(int i = j+1; i < l; ++i) sum += (NISNAN(px[i]) && NISNAN(pw[i])) ? px[i] * pw[i] : 0.0; } else sum = narm == 1 ? NA_REAL : 0.0; } else { sum = 0; double partial_sums[N_ACC] = {0.0}; const int remainder = l % N_ACC; for(int i = 0; i < remainder; ++i) sum += px[i] * pw[i]; #pragma omp parallel for simd num_threads(nthreads) reduction(+:partial_sums[:N_ACC]) for(int i = remainder; i < l; i += N_ACC) { for(int k = 0; k < N_ACC; ++k) partial_sums[k] += px[i + k] * pw[i + k]; } for(int k = 0; k < N_ACC; ++k) sum += partial_sums[k]; } return sum; } // This is unsafe... // void fsum_weights_g_omp_impl(double *restrict pout, double *restrict px, int ng, int *restrict pg, double *restrict pw, int narm, int l, int nthreads) { // if(narm) { // for(int i = ng; i--; ) pout[i] = NA_REAL; // #pragma omp parallel for num_threads(nthreads) reduction(+:pout[:ng]) // for(int i = 0; i < l; ++i) { // if(ISNAN(px[i]) || ISNAN(pw[i])) continue; // if(ISNAN(pout[pg[i]-1])) pout[pg[i]-1] = px[i] * pw[i]; // else pout[pg[i]-1] += px[i] * pw[i]; // } // } else { // memset(pout, 0, sizeof(double) * ng); // #pragma omp parallel for num_threads(nthreads) reduction(+:pout[:ng]) // for(int i = 0; i < l; ++i) pout[pg[i]-1] += px[i] * pw[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. // } // } // using long long internally is substantially faster than using doubles !! double fsum_int_impl(const int *restrict px, const int narm, const int l) { long long sum; if(narm) { int j = l-1; while(px[j] == NA_INTEGER && j!=0) --j; sum = (long long)px[j]; if(j == 0 && px[j] == NA_INTEGER) return narm == 1 ? NA_REAL : 0; for(int i = j; i--; ) if(px[i] != NA_INTEGER) sum += (long long)px[i]; } else { sum = 0; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) return NA_REAL; // Need this, otherwise result is incorrect !! sum += (long long)px[i]; } } return (double)sum; } void fsum_int_g_impl(int *restrict pout, const int *restrict px, const int ng, const int *restrict pg, const int narm, const int l) { long long ckof; if(narm == 1) { for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l, lsi; i--; ) { if(px[i] != NA_INTEGER) { lsi = pout[pg[i]]; if(lsi == NA_INTEGER) pout[pg[i]] = px[i]; else { ckof = (long long)lsi + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); pout[pg[i]] = (int)ckof; } } } } else { memset(pout, 0, sizeof(int) * ng); --pout; if(narm == 2) { for(int i = l; i--; ) { if(px[i] != NA_INTEGER) { ckof = (long long)pout[pg[i]] + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); pout[pg[i]] = (int)ckof; } } } else { for(int i = l, lsi; i--; ) { if(px[i] == NA_INTEGER) { pout[pg[i]] = NA_INTEGER; continue; } lsi = pout[pg[i]]; if(lsi != NA_INTEGER) { // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. ckof = (long long)lsi + px[i]; if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); pout[pg[i]] = (int)ckof; } } } } } double fsum_int_omp_impl(const int *restrict px, const int narm, const int l, const int nthreads) { long long sum; if(narm) { int j = 0; while(px[j] == NA_INTEGER && j!=l) ++j; if(j == l && px[j-1] == NA_INTEGER) return narm == 1 ? NA_REAL : 0; sum = (long long)px[j]; #pragma omp parallel for simd num_threads(nthreads) reduction(+:sum) for(int i = j+1; i < l; ++i) sum += px[i] != NA_INTEGER ? (long long)px[i] : 0; } else { if(px[0] == NA_INTEGER || px[l-1] == NA_INTEGER) return NA_REAL; sum = 0; #pragma omp parallel for simd num_threads(nthreads) reduction(+:sum) for(int i = 0; i < l; ++i) sum += (long long)px[i]; // Need this, else wrong result } return (double)sum; } // This is unsafe... // void fsum_int_g_omp_impl(int *restrict pout, int *restrict px, int ng, int *restrict pg, int narm, int l, int nthreads) { // long long ckof; // if(narm) { // for(int i = ng; i--; ) pout[i] = NA_INTEGER; // int lsi; // #pragma omp parallel for num_threads(nthreads) reduction(+:pout[:ng]) // for(int i = 0; i < l; ++i) { // if(px[i] != NA_INTEGER) { // lsi = pout[pg[i]-1]; // if(lsi == NA_INTEGER) pout[pg[i]-1] = px[i]; // else { // ckof = (long long)lsi + px[i]; // if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); // pout[pg[i]-1] = (int)ckof; // } // } // } // } else { // memset(pout, 0, sizeof(int) * ng); // int lsi; // #pragma omp parallel for num_threads(nthreads) reduction(+:pout[:ng]) // for(int i = 0; i < l; ++i) { // if(px[i] == NA_INTEGER) { // pout[pg[i]-1] = NA_INTEGER; // continue; // } // lsi = pout[pg[i]-1]; // if(lsi != NA_INTEGER) { // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. // ckof = (long long)lsi + px[i]; // if(ckof > INT_MAX || ckof <= INT_MIN) error("Integer overflow in one or more groups. Integers in R are bounded between 2,147,483,647 and -2,147,483,647. The sum within each group should be in that range."); // pout[pg[i]-1] = (int)ckof; // } // } // } // } SEXP fsumC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP fill, SEXP Rnthreads) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), nthreads = asInteger(Rnthreads), nprotect = 0, nwl = isNull(w); // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0 && nwl) { // switch(tx) { // case INTSXP: return ALTINTEGER_SUM(x, (Rboolean)narm); // case LGLSXP: return ALTLOGICAL_SUM(x, (Rboolean)narm); // case REALSXP: return ALTREAL_SUM(x, (Rboolean)narm); // default: error("ALTREP object must be integer or real typed"); // } // } if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(l < 100000) nthreads = 1; // No improvements from multithreading on small data. if(narm) narm += asLogical(fill); if(nthreads > max_threads) nthreads = max_threads; if(tx == LGLSXP) tx = INTSXP; SEXP out; if(!(ng == 0 && nwl && tx == INTSXP)) { out = PROTECT(allocVector(nwl ? tx : REALSXP, ng == 0 ? 1 : ng)); ++nprotect; } if(nwl) { switch(tx) { case REALSXP: if(ng == 0) { REAL(out)[0] = (nthreads <= 1) ? fsum_double_impl(REAL(x), narm, l) : fsum_double_omp_impl(REAL(x), narm, l, nthreads); } else fsum_double_g_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); // If safe sub-column-level mutithreading can be developed... // if(nthreads <= 1) { // if(ng == 0) fsum_double_impl(REAL(out), REAL(x), narm, l); // else fsum_double_g_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); // } else { // if(ng == 0) fsum_double_omp_impl(REAL(out), REAL(x), narm, l, nthreads); // else fsum_double_g_omp_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l, nthreads); // } break; case INTSXP: { if(ng > 0) { fsum_int_g_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); // If safe sub-column-level mutithreading can be developed... // if(nthreads <= 1) fsum_int_g_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); // else fsum_int_g_omp_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l, nthreads); } else { double sum = nthreads <= 1 ? fsum_int_impl(INTEGER(x), narm, l) : fsum_int_omp_impl(INTEGER(x), narm, l, nthreads); UNPROTECT(nprotect); // Thomas Kalibera Patch: to appease rchk. if(sum > INT_MAX || sum <= INT_MIN) return ScalarReal(sum); // INT_MIN is NA_INTEGER return ScalarInteger(ISNAN(sum) ? NA_INTEGER : (int)sum); } break; } default: error("Unsupported SEXP type: '%s'", type2char(tx)); } } else { if(l != length(w)) error("length(w) must match length(x)"); if(TYPEOF(w) != REALSXP) { if(TYPEOF(w) != INTSXP && TYPEOF(w) != LGLSXP) error("weights must be double or integer"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } if(tx != REALSXP) { if(tx != INTSXP) error("Unsupported SEXP type: '%s'", type2char(tx)); x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } double *restrict px = REAL(x), *restrict pw = REAL(w); if(ng == 0) { REAL(out)[0] = (nthreads <= 1) ? fsum_weights_impl(px, pw, narm, l) : fsum_weights_omp_impl(px, pw, narm, l, nthreads); } else fsum_weights_g_impl(REAL(out), px, ng, INTEGER(g), pw, narm, l); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); // For example "Units" objects... UNPROTECT(nprotect); return out; } SEXP fsummC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP fill, SEXP Rdrop, SEXP Rnthreads) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *restrict pg = INTEGER(g), ng = asInteger(Rng), // ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm), nprotect = 1, nwl = isNull(w), nthreads = asInteger(Rnthreads); // , cmth = nthreads > 1 && col >= nthreads; if(l < 1) return x; // Prevents seqfault for numeric(0) #101 if(l*col < 100000) nthreads = 1; // No gains from multithreading on small data if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(narm) narm += asLogical(fill); if(nthreads > max_threads) nthreads = max_threads; if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector((nwl && ng > 0) ? tx : REALSXP, ng == 0 ? col : col * ng)); if(nwl) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); if(ng == 0) { if(nthreads <= 1) { for(int j = 0; j != col; ++j) pout[j] = fsum_double_impl(px + j*l, narm, l); } else if(col >= nthreads) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pout[j] = fsum_double_impl(px + j*l, narm, l); } else { for(int j = 0; j != col; ++j) pout[j] = fsum_double_omp_impl(px + j*l, narm, l, nthreads); } } else { if(nthreads <= 1 || col == 1) { for(int j = 0; j != col; ++j) fsum_double_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); } else { if(nthreads > col) nthreads = col; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) fsum_double_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); } } break; } case INTSXP: { int *px = INTEGER(x); if(ng > 0) { int *pout = INTEGER(out); if(nthreads <= 1 || col == 1) { for(int j = 0; j != col; ++j) fsum_int_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); } else { if(nthreads > col) nthreads = col; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) fsum_int_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); } } else { double *restrict pout = REAL(out); int anyoutl = 0; if(nthreads <= 1) { for(int j = 0; j != col; ++j) { double sumj = fsum_int_impl(px + j*l, narm, l); if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; pout[j] = sumj; } } else if(col >= nthreads) { // If high-dimensional: column-level parallelism #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) { double sumj = fsum_int_impl(px + j*l, narm, l); if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; pout[j] = sumj; } } else { for(int j = 0; j != col; ++j) { double sumj = fsum_int_omp_impl(px + j*l, narm, l, nthreads); if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; pout[j] = sumj; } } if(anyoutl == 0) { out = PROTECT(coerceVector(out, INTSXP)); matCopyAttr(out, x, Rdrop, ng); UNPROTECT(nprotect + 1); return out; } } break; } default: error("Unsupported SEXP type: '%s'", type2char(tx)); } } else { if(l != length(w)) error("length(w) must match nrow(x)"); if(TYPEOF(w) != REALSXP) { if(TYPEOF(w) != INTSXP && TYPEOF(w) != LGLSXP) error("weights must be double or integer"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } if(tx != REALSXP) { if(tx != INTSXP) error("Unsupported SEXP type: '%s'", type2char(tx)); x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } double *px = REAL(x), *restrict pw = REAL(w), *pout = REAL(out); if(ng == 0) { if(nthreads <= 1) { for(int j = 0; j != col; ++j) pout[j] = fsum_weights_impl(px + j*l, pw, narm, l); } else if(col >= nthreads) { #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) pout[j] = fsum_weights_impl(px + j*l, pw, narm, l); } else { for(int j = 0; j != col; ++j) pout[j] = fsum_weights_omp_impl(px + j*l, pw, narm, l, nthreads); } } else { if(nthreads <= 1 || col == 1) { for(int j = 0; j != col; ++j) fsum_weights_g_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l); } else { if(nthreads > col) nthreads = col; #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < col; ++j) fsum_weights_g_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l); } } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(nprotect); return out; } // For safe multithreading across data frame columns double fsum_impl_dbl(SEXP x, int narm, int nthreads) { int l = length(x); if(l < 1) return NA_REAL; if(nthreads <= 1) switch(TYPEOF(x)) { case REALSXP: return fsum_double_impl(REAL(x), narm, l); case LGLSXP: case INTSXP: return fsum_int_impl(INTEGER(x), narm, l); default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } switch(TYPEOF(x)) { case REALSXP: return fsum_double_omp_impl(REAL(x), narm, l, nthreads); case LGLSXP: case INTSXP: return fsum_int_omp_impl(INTEGER(x), narm, l, nthreads); default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } } SEXP fsum_impl_SEXP(SEXP x, int narm, int nthreads) { return ScalarReal(fsum_impl_dbl(x, narm, nthreads)); // This is not thread safe... need to do separate serial loop // SEXP res = ScalarReal(fsum_impl_dbl(x, narm, nthreads)); // if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) { // PROTECT(res); // copyMostAttrib(x, res); // UNPROTECT(1); // } // return res; } double fsum_w_impl_dbl(SEXP x, double *pw, int narm, int nthreads) { int l = length(x); if(l < 1) return NA_REAL; if(TYPEOF(x) != REALSXP) { if(TYPEOF(x) != INTSXP && TYPEOF(x) != LGLSXP) error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); x = PROTECT(coerceVector(x, REALSXP)); double res = (nthreads <= 1) ? fsum_weights_impl(REAL(x), pw, narm, l) : fsum_weights_omp_impl(REAL(x), pw, narm, l, nthreads); UNPROTECT(1); return res; } return (nthreads <= 1) ? fsum_weights_impl(REAL(x), pw, narm, l) : fsum_weights_omp_impl(REAL(x), pw, narm, l, nthreads); } SEXP fsum_w_impl_SEXP(SEXP x, double *pw, int narm, int nthreads) { return ScalarReal(fsum_w_impl_dbl(x, pw, narm, nthreads)); // This is not thread safe... need to do separate serial loop // SEXP res = ScalarReal(fsum_w_impl_dbl(x, pw, narm, nthreads)); // if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) { // PROTECT(res); // copyMostAttrib(x, res); // UNPROTECT(1); // } // return res; } SEXP fsum_g_impl(SEXP x, const int ng, const int *pg, int narm) { int l = length(x); if(l < 1) return ScalarReal(NA_REAL); SEXP res; switch(TYPEOF(x)) { case REALSXP: { res = PROTECT(allocVector(REALSXP, ng)); fsum_double_g_impl(REAL(res), REAL(x), ng, pg, narm, l); break; } case LGLSXP: case INTSXP: { res = PROTECT(allocVector(INTSXP, ng)); fsum_int_g_impl(INTEGER(res), INTEGER(x), ng, pg, narm, l); break; } default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(1); return res; } void fsum_g_omp_impl(SEXP x, void *pres, const int ng, const int *pg, int narm) { switch(TYPEOF(x)) { case REALSXP: fsum_double_g_impl(pres, REAL(x), ng, pg, narm, length(x)); break; case LGLSXP: case INTSXP: fsum_int_g_impl(pres, INTEGER(x), ng, pg, narm, length(x)); break; default: error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); } } SEXP fsum_wg_impl(SEXP x, const int ng, const int *pg, double *pw, int narm) { int l = length(x), nprotect = 1; if(l < 1) return ScalarReal(NA_REAL); if(TYPEOF(x) != REALSXP) { if(TYPEOF(x) != INTSXP && TYPEOF(x) != LGLSXP) error("Unsupported SEXP type: '%s'", type2char(TYPEOF(x))); x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } SEXP res = PROTECT(allocVector(REALSXP, ng)); fsum_weights_g_impl(REAL(res), REAL(x), ng, pg, pw, narm, l); if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, res); UNPROTECT(nprotect); return res; } #undef COLWISE_FSUM_LIST #define COLWISE_FSUM_LIST(FUN, WFUN) \ if(nwl) { \ if(nthreads > 1 && l >= nthreads) { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < l; ++j) pout[j] = FUN(px[j], narm, 1); \ } else { \ for(int j = 0; j != l; ++j) pout[j] = FUN(px[j], narm, nthreads); \ } \ } else { \ double *restrict pw = REAL(w); \ if(nthreads > 1 && l >= nthreads) { \ _Pragma("omp parallel for num_threads(nthreads)") \ for(int j = 0; j < l; ++j) pout[j] = WFUN(px[j], pw, narm, 1); \ } else { \ for(int j = 0; j != l; ++j) pout[j] = WFUN(px[j], pw, narm, nthreads); \ } \ } SEXP fsumlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP fill, SEXP Rdrop, SEXP Rnthreads) { int l = length(x), ng = asInteger(Rng), nthreads = asInteger(Rnthreads), nwl = isNull(w), narm = asLogical(Rnarm), nprotect = 1; // TODO: Disable multithreading if overall data size is small? if(l < 1) return x; // needed ?? if(narm) narm += asLogical(fill); if(nthreads > max_threads) nthreads = max_threads; if(!nwl) { if(length(VECTOR_ELT(x, 0)) != length(w)) error("length(w) must match nrow(x)"); if(TYPEOF(w) != REALSXP) { if(TYPEOF(w) != INTSXP && TYPEOF(w) != LGLSXP) error("weights must be double or integer"); w = PROTECT(coerceVector(w, REALSXP)); ++nprotect; } } if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)); const SEXP *restrict px = SEXPPTR_RO(x); double *restrict pout = REAL(out); COLWISE_FSUM_LIST(fsum_impl_dbl, fsum_w_impl_dbl); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(nprotect); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)), *restrict pout = SEXPPTR(out); const SEXP *restrict px = SEXPPTR_RO(x); if(ng == 0) { COLWISE_FSUM_LIST(fsum_impl_SEXP, fsum_w_impl_SEXP); // Needed because including it in an OpenMP loop together with ScalarReal() is not thread safe for(int j = 0; j < l; ++j) { SEXP xj = px[j]; if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, pout[j]); } } else { if(length(VECTOR_ELT(x, 0)) != length(g)) error("length(g) must match length(x)"); const int *restrict pg = INTEGER(g); if(nthreads > l) nthreads = l; if(nwl) { // no weights if(nthreads > 1 && l > 1) { for(int j = 0; j != l; ++j) { SEXP xj = px[j], outj; SET_VECTOR_ELT(out, j, outj = allocVector(TYPEOF(px[j]) == REALSXP ? REALSXP : INTSXP, ng)); if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, outj); } #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) fsum_g_omp_impl(px[j], DPTR(pout[j]), ng, pg, narm); } else { for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fsum_g_impl(px[j], ng, pg, narm)); } } else { double *restrict pw = REAL(w); if(nthreads > 1 && l > 1) { int nrx = length(g); for(int j = 0, dup = 0; j != l; ++j) { SEXP xj = px[j], outj; SET_VECTOR_ELT(out, j, outj = allocVector(REALSXP, ng)); if(ANY_ATTRIB(xj) && !(isObject(xj) && inherits(xj, "ts"))) copyMostAttrib(xj, outj); if(TYPEOF(xj) != REALSXP) { if(TYPEOF(xj) != INTSXP && TYPEOF(xj) != LGLSXP) error("Unsupported SEXP type: '%s'", type2char(TYPEOF(xj))); if(dup == 0) {x = PROTECT(shallow_duplicate(x)); ++nprotect; dup = 1;} SET_VECTOR_ELT(x, j, coerceVector(xj, REALSXP)); px = SEXPPTR_RO(x); // Fix suggested by ChatGPT } } #pragma omp parallel for num_threads(nthreads) for(int j = 0; j < l; ++j) fsum_weights_g_impl(REAL(pout[j]), REAL(px[j]), ng, pg, pw, narm, nrx); } else { for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fsum_wg_impl(px[j], ng, pg, pw, narm)); } } } DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } // If effective sub-column-level multithreading can be developed... // SEXP fsummC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads) { // SEXP dim = getAttrib(x, R_DimSymbol); // if(isNull(dim)) error("x is not a matrix"); // int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *restrict pg = INTEGER(g), // ng = asInteger(Rng), // ng1 = ng == 0 ? 1 : ng, // narm = asLogical(Rnarm), nprotect = 1, nwl = isNull(w), // nthreads = asInteger(Rnthreads), cmth = nthreads > 1 && col >= nthreads; // if (l < 1) return x; // Prevents seqfault for numeric(0) #101 // if(nthreads < 100000) nthreads = 1; // No gains from multithreading on small data // if(ng && l != length(g)) error("length(g) must match nrow(x)"); // if(tx == LGLSXP) tx = INTSXP; // SEXP out = PROTECT(allocVector((nwl && ng > 0) ? tx : REALSXP, ng == 0 ? col : col * ng)); // if(nwl) { // switch(tx) { // case REALSXP: { // double *px = REAL(x), *pout = REAL(out); // if(nthreads <= 1) { // No multithreading // if(ng == 0) for(int j = 0; j != col; ++j) fsum_double_impl(pout + j, px + j*l, narm, l); // else for(int j = 0; j != col; ++j) fsum_double_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); // } else { // Multithreading // if(ng == 0) { // if(cmth) { // If high-dimensional: column-level parallelism // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < col; ++j) fsum_double_impl(pout + j, px + j*l, narm, l); // } else { // for(int j = 0; j != col; ++j) fsum_double_omp_impl(pout + j, px + j*l, narm, l, nthreads); // } // } else { // if(cmth) { // If high-dimensional: column-level parallelism // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < col; ++j) fsum_double_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); // } else { // for(int j = 0; j != col; ++j) fsum_double_g_omp_impl(pout + j*ng, px + j*l, ng, pg, narm, l, nthreads); // } // } // } // break; // } // case INTSXP: { // int *px = INTEGER(x); // if(ng > 0) { // int *pout = INTEGER(out); // if(nthreads <= 1) { // for(int j = 0; j != col; ++j) fsum_int_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); // } else if(cmth) { // If high-dimensional: column-level parallelism // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < col; ++j) fsum_int_g_impl(pout + j*ng, px + j*l, ng, pg, narm, l); // } else { // for(int j = 0; j != col; ++j) fsum_int_g_omp_impl(pout + j*ng, px + j*l, ng, pg, narm, l, nthreads); // } // } else { // double *pout = REAL(out); // int anyoutl = 0; // if(nthreads <= 1) { // for(int j = 0; j != col; ++j) { // double sumj = fsum_int_impl(px + j*l, narm, l); // if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; // pout[j] = sumj; // } // } else if(cmth) { // If high-dimensional: column-level parallelism // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < col; ++j) { // double sumj = fsum_int_impl(px + j*l, narm, l); // if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; // pout[j] = sumj; // } // } else { // for(int j = 0; j != col; ++j) { // double sumj = fsum_int_omp_impl(px + j*l, narm, l, nthreads); // if(sumj > INT_MAX || sumj <= INT_MIN) anyoutl = 1; // pout[j] = sumj; // } // } // if(anyoutl == 0) { // SEXP iout = PROTECT(coerceVector(out, INTSXP)); // matCopyAttr(iout, x, Rdrop, ng); // UNPROTECT(2); // return iout; // } // } // break; // } // default: error("Unsupported SEXP type"); // } // } else { // if(l != length(w)) error("length(w) must match nrow(x)"); // int tw = TYPEOF(w); // SEXP xr, wr; // double *px, *pw, *pout = REAL(out); // if(tw != REALSXP) { // if(tw != INTSXP && tw != LGLSXP) error("weights must be double or integer"); // wr = PROTECT(coerceVector(w, REALSXP)); // pw = REAL(wr); // ++nprotect; // } else pw = REAL(w); // if(tx != REALSXP) { // if(tx != INTSXP) error("x must be double or integer"); // xr = PROTECT(coerceVector(x, REALSXP)); // px = REAL(xr); // ++nprotect; // } else px = REAL(x); // if(nthreads <= 1) { // for(int j = 0; j != col; ++j) fsum_weights_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l); // } else if(cmth) { // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < col; ++j) fsum_weights_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l); // } else { // for(int j = 0; j != col; ++j) fsum_weights_omp_impl(pout + j*ng, px + j*l, ng, pg, pw, narm, l, nthreads); // } // } // matCopyAttr(out, x, Rdrop, ng); // UNPROTECT(nprotect); // return out; // } // If effective sub-column-level multithreading can be developed... // SEXP fsumlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop, SEXP Rnthreads) { // int l = length(x), ng = asInteger(Rng), nthreads = asInteger(Rnthreads), // nprotect = 1, cmth = nthreads > 1 && l >= nthreads; // // TODO: Disable multithreading if overall data size is small? // if(l < 1) return x; // needed ?? // SEXP Rnthreads1; // if(cmth) { // Rnthreads1 = PROTECT(ScalarInteger(1)); // ++nprotect; // } // if(ng == 0 && asLogical(Rdrop)) { // SEXP out = PROTECT(allocVector(REALSXP, l)), *px = SEXPPTR(x); // double *pout = REAL(out); // if(cmth) { // If high-dimensional: column-level parallelism // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < l; ++j) pout[j] = asReal(fsumC(px[j], Rng, g, w, Rnarm, Rnthreads1)); // } else { // for(int j = 0; j != l; ++j) pout[j] = asReal(fsumC(px[j], Rng, g, w, Rnarm, Rnthreads)); // } // setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); // UNPROTECT(nprotect); // return out; // } // SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x); // if(cmth) { // #pragma omp parallel for num_threads(nthreads) // for(int j = 0; j < l; ++j) pout[j] = fsumC(px[j], Rng, g, w, Rnarm, Rnthreads1); // } else { // for(int j = 0; j != l; ++j) pout[j] = fsumC(px[j], Rng, g, w, Rnarm, Rnthreads); // } // // if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); // DFcopyAttr(out, x, ng); // UNPROTECT(nprotect); // return out; // } collapse/src/fbetween_fwithin.cpp0000644000176200001440000011643115113724707016713 0ustar liggesusers#include using namespace Rcpp; // NOTE: Special case is set_mean = -Inf, which is when on the R side mean = "overall.mean" // TODO: Best simply adding set_mean to the mean calculation, or better other solution ? // [[Rcpp::export]] NumericVector BWCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, double theta = 1, double set_mean = 0, bool B = false, bool fill = false) { int l = x.size(); if(l < 1) return x; // Prevents segfault for numeric(0) #101 NumericVector out = no_init_vector(l); if (Rf_isNull(w)) { // No weights if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { int j = l-1, n = 1; // 1 because for-loop starts from 2 double sum = x[j]; while(std::isnan(sum) && j!=0) sum = x[--j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i])) continue; sum += x[i]; // Fastest ? ++n; } sum = theta * sum/n - set_mean; // best ? if(B) { if(fill) std::fill(out.begin(), out.end(), sum); // (double)sum // fastest ? -> yes ! else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum; // double conversion -> nope, slower } } } else { out = x - sum; // conversion to double not necessary } } else { double sum = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { sum = x[i]; break; } else { sum += x[i]; } } sum = theta * sum/l - set_mean; // best ? if(B) { std::fill(out.begin(), out.end(), sum); // (double)sum) // fastest ? } else { out = x - sum; // conversion to double not necessary } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sum(ng, NA_REAL); // Other way ? IntegerVector n(ng, 1); // could also do no_init_vector and then add n[g[i]-1] = 1 in fir if condition... -> Nope, that is slower for(int i = l; i--; ) { if(!std::isnan(x[i])) { // faster way to code this ? -> Not Bad at all -> index for g[i]-1? -> Nope, no noticeable improvement if(std::isnan(sum[g[i]-1])) sum[g[i]-1] = x[i]; else { sum[g[i]-1] += x[i]; ++n[g[i]-1]; } } } if(B) { for(int i = ng; i--; ) sum[i] /= n[i]; if(fill) { for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sum[i] /= n[i]; // faster using two loops? or combine ? -> two loops (this solution) is a lot faster ! } else { for(int i = ng; i--; ) sum[i] = theta / n[i] * sum[i] - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; // best loop ? -> just as fast as the other one ! } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; on += n[i]; sum[i] /= n[i]; // fastest ? } osum = osum/on; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } else { NumericVector sum(ng); // // good? -> yes, but not initializing is numerically unstable.. // better for valgrind IntegerVector gsv = (Rf_isNull(gs)) ? IntegerVector(ng) : as(gs); // no_init_vector(ng); int ngs = 0; if(Rf_isNull(gs)) { // gsv = IntegerVector(ng); // std::fill(gsv.begin(), gsv.end(), 0); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]; ++gsv[g[i]-1]; } } } else { // gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]; ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]; } } } if(B) { for(int i = ng; i--; ) sum[i] /= gsv[i]; for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sum[i] /= gsv[i]; } else { for(int i = ng; i--; ) sum[i] = theta / gsv[i] * sum[i] - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; on += gsv[i]; sum[i] /= gsv[i]; // fastest ? } osum = osum/on; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } } } else { // With weights NumericVector wg = w; // wg(w) Identical speed if(l != wg.size()) stop("length(w) must match length(x)"); if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { int j = l-1; // 1 because for-loop starts from 2 while((std::isnan(x[j]) || std::isnan(wg[j])) && j!=0) --j; // This does not make a difference in performance but is more parsimonious. double sum = x[j]*wg[j], sumw = wg[j]; if(j != 0) for(int i = j; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; sum += x[i]*wg[i]; // Fastest ? sumw += wg[i]; } sum = theta * sum/sumw - set_mean; // best ? if(B) { if(fill) std::fill(out.begin(), out.end(), sum); // (double)sum // fastest ? else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum; // double conversion ? } } } else { out = x - sum; // conversion to double not necessary } } else { double sum = 0, sumw = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { // good, check both ? -> yes sum = x[i]+wg[i]; break; } else { sum += x[i]*wg[i]; sumw += wg[i]; } } sum = theta * sum/sumw - set_mean; // best ? if(B) { std::fill(out.begin(), out.end(), sum); // (double)sum// fastes ? } else { out = x - sum; // conversion to double not necessary } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { NumericVector sum(ng, NA_REAL), sumw(ng); // Other way ? -> Nope, this is as good as it gets // better for valgrind // NumericVector sumw = no_init_vector(ng); // what if only NA ? -> Works for some reason no problem, and faster for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i])) continue; // faster way to code this ? -> Not Bad at all -> index for g[i]-1? -> Nope, no noticeable improvement if(std::isnan(sum[g[i]-1])) { sum[g[i]-1] = x[i]*wg[i]; sumw[g[i]-1] = wg[i]; } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; } } if(B) { sum = sum/sumw; if(fill) { for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) out[i] = x[i]; else out[i] = sum[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sum = sum/sumw; } else { sum = theta * sum/sumw - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; osumw += sumw[i]; sum[i] /= sumw[i]; // fastest ? } osum = osum/osumw; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } else { NumericVector sum(ng), sumw(ng); // good? -> yes // = no_init_vector// Not initializing numerically unstable ! int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { if(!std::isnan(sum[g[i]-1])) { sum[g[i]-1] = sumw[g[i]-1] = x[i]+wg[i]; // or NA_REAL ? -> Nope, good ! ++ngs; if(ngs == ng) break; } } else { sum[g[i]-1] += x[i]*wg[i]; sumw[g[i]-1] += wg[i]; } } if(B) { sum = sum/sumw; for(int i = 0; i != l; ++i) out[i] = sum[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sum = sum/sumw; } else { sum = theta * sum/sumw - set_mean; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sum[i])) continue; // solves the issue ! osum += sum[i]; osumw += sumw[i]; sum[i] /= sumw[i]; // fastest ? } osum = osum/osumw; if(theta != 1) { sum = theta * sum; osum = theta * osum; } for(int i = 0; i != l; ++i) out[i] = x[i] - sum[g[i]-1] + osum; } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] NumericMatrix BWmCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, double theta = 1, double set_mean = 0, bool B = false, bool fill = false) { int l = x.nrow(), col = x.ncol(); NumericMatrix out = no_init_matrix(l, col); if (Rf_isNull(w)) { // No weights ! if(ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = col; j--; ) { // Instead Am(j,_) you can use Am.row(j). NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); int k = l-1, nj = 1; double sumj = column[k]; while(std::isnan(sumj) && k!=0) sumj = column[--k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; ++nj; } sumj = theta * sumj/nj - set_mean; // best ? if(B) { if(fill) std::fill(outj.begin(), outj.end(), sumj); // (double)sumj else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } } } else { outj = column - sumj; } } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double sumj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { sumj = column[i]; break; } else { sumj += column[i]; } } sumj = theta * sumj/l - set_mean; // best ? if(B) { std::fill(outj.begin(), outj.end(), sumj); // (double)sumj } else { outj = column - sumj; } } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng, NA_REAL); // std::vector // faster than NumericVector ? std::vector nj(ng); // int nj[ng]; // use vector also ? for(int i = l; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; nj[g[i]-1] = 1; } else { sumj[g[i]-1] += column[i]; ++nj[g[i]-1]; } } } if(B) { for(int i = ng; i--; ) sumj[i] /= nj[i]; if(fill) { for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= nj[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / nj[i] * sumj[i] - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += nj[i]; sumj[i] /= nj[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng); // std::vector // better than array or NumericVector ? std::vector gsv(ng); // memset(gsv, 0, memsize); int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; ++gsv[g[i]-1]; } } if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng); // std::vector int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; } } if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = col; j--; ) { // Instead Am(j,_) you can use Am.row(j). NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; double sumj = column[k]*wg[k], sumwj = wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwj += wg[i]; } sumj = theta * sumj/sumwj - set_mean; // best ? if(B) { if(fill) std::fill(outj.begin(), outj.end(), sumj); // (double)sumj else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } } } else { outj = column - sumj; } } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double sumj = 0, sumwj = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sumj = column[i]+wg[i]; break; } else { sumj += column[i]*wg[i]; sumwj += wg[i]; } } sumj = theta * sumj/sumwj - set_mean; // best ? if(B) { std::fill(outj.begin(), outj.end(), sumj); // (double)sumj } else { outj = column - sumj; } } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); if(narm) { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng, NA_REAL), sumwj(ng); // best ? // std::vector for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } if(B) { sumj = sumj/sumwj; if(fill) { for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } else { for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); NumericVector sumj(ng), sumwj(ng); // std::vector int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = sumwj[g[i]-1] = column[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } if(B) { sumj = sumj/sumwj; for(int i = 0; i != l; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != l; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] List BWlCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& gs = R_NilValue, const SEXP& w = R_NilValue, bool narm = true, double theta = 1, double set_mean = 0, bool B = false, bool fill = false) { int l = x.size(); List out(l); if (Rf_isNull(w)) { // No weights if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); int k = row-1, nj = 1; double sumj = column[k]; while(std::isnan(sumj) && k!=0) sumj = column[--k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i])) continue; sumj += column[i]; ++nj; } sumj = theta * sumj/nj - set_mean; // best ? if(B) { if(fill) out[j] = rep(sumj, row); // rep((double)sumj, row); // good ? else { NumericVector outj = no_init_vector(row); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } out[j] = outj; } } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); // good ? } } else { for(int j = l; j--; ) { NumericVector column = x[j]; double sumj = 0; int row = column.size(); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { sumj = column[i]; break; } else { sumj += column[i]; } } sumj = theta * sumj/row - set_mean; // best ? if(B) { out[j] = rep(sumj, row); // rep((double)sumj, row); } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); } } } else { // With groups int gss = g.size(); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng, NA_REAL); // std::vector std::vector nj(ng, 1); for(int i = row; i--; ) { if(!std::isnan(column[i])) { if(std::isnan(sumj[g[i]-1])) sumj[g[i]-1] = column[i]; else { sumj[g[i]-1] += column[i]; ++nj[g[i]-1]; } } } NumericVector outj = no_init_vector(row); if(B) { for(int i = ng; i--; ) sumj[i] /= nj[i]; if(fill) { for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= nj[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / nj[i] * sumj[i] - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += nj[i]; sumj[i] /= nj[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { if(Rf_isNull(gs)) { // int gsv[ng], memsize = sizeof(int)*ng; for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng); // std::vector // memset(gsv, 0, memsize); std::vector gsv(ng); int ngs = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; ++gsv[g[i]-1]; } } NumericVector outj = no_init_vector(row); if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { IntegerVector gsv = gs; if(gsv.size() != ng) stop("Vector of group-sizes must match number of groups"); for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng); // = no_init_vector // Not initializing seems to be numerically unstable ! int ngs = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]; ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]; } } NumericVector outj = no_init_vector(row); if(B) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { for(int i = ng; i--; ) sumj[i] /= gsv[i]; } else { for(int i = ng; i--; ) sumj[i] = theta / gsv[i] * sumj[i] - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { int on = 0; double osum = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; on += gsv[i]; sumj[i] /= gsv[i]; } osum = osum/on; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if (ng == 0) { if(!B && set_mean == R_NegInf) stop("For centering on the overall mean a grouping vector needs to be supplied"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(row != wgs) stop("length(w) must match nrow(X)"); int k = row-1; while((std::isnan(column[k]) || std::isnan(wg[k])) && k!=0) --k; double sumj = column[k]*wg[k], sumwi = wg[k]; if(k != 0) for(int i = k; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; sumj += column[i]*wg[i]; sumwi += wg[i]; } sumj = theta * sumj/sumwi - set_mean; // best ? if(B) { if(fill) out[j] = rep(sumj, row); // rep((double)sumj, row); else { NumericVector outj = no_init_vector(row); for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj; } out[j] = outj; } } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); // good like this ? } } else { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(row != wgs) stop("length(w) must match nrow(X)"); double sumj = 0, sumwi = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { sumj = column[i]+wg[i]; break; } else { sumj += column[i]*wg[i]; sumwi += wg[i]; } } sumj = theta * sumj/sumwi - set_mean; // best ? if(B) { out[j] = rep(sumj, row); // rep((double)sumj, row); } else { out[j] = column - sumj; } SHALLOW_DUPLICATE_ATTRIB(out[j], column); } } } else { // With groups int gss = g.size(); if(wgs != gss) stop("length(w) must match length(g)"); if(narm) { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng, NA_REAL), sumwj(ng); // std::vector for(int i = row; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i])) continue; if(std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = column[i]*wg[i]; sumwj[g[i]-1] = wg[i]; } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } NumericVector outj = no_init_vector(row); if(B) { sumj = sumj/sumwj; if(fill) { for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) outj[i] = column[i]; else outj[i] = sumj[g[i]-1]; } } } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); if(gss != row) stop("length(g) must match nrow(X)"); NumericVector sumj(ng), sumwj(ng); // std::vector int ngs = 0; for(int i = 0; i != row; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { if(!std::isnan(sumj[g[i]-1])) { sumj[g[i]-1] = sumwj[g[i]-1] = column[i]+wg[i]; // or NA_REAL ? -> Nope, good ++ngs; if(ngs == ng) break; } } else { sumj[g[i]-1] += column[i]*wg[i]; sumwj[g[i]-1] += wg[i]; } } NumericVector outj = no_init_vector(row); if(B) { sumj = sumj/sumwj; for(int i = 0; i != row; ++i) outj[i] = sumj[g[i]-1]; } else { if(set_mean != R_NegInf) { if(set_mean == 0 && theta == 1) { sumj = sumj/sumwj; } else { sumj = theta * sumj/sumwj - set_mean; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1]; } else { double osum = 0, osumw = 0; for(int i = ng; i--; ) { // Problem: if one sum remained NA, osum becomes NA if(std::isnan(sumj[i])) continue; // solves the issue ! osum += sumj[i]; osumw += sumwj[i]; sumj[i] /= sumwj[i]; } osum = osum/osumw; if(theta != 1) { sumj = theta * sumj; osum = theta * osum; } for(int i = 0; i != row; ++i) outj[i] = column[i] - sumj[g[i]-1] + osum; } } SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } collapse/src/handle_attributes.c0000644000176200001440000001215715122341705016516 0ustar liggesusers#include "collapse_c.h" // See https://github.com/wch/r-source/blob/079f863446b5414dd96f3c29d519e4a654146364/src/main/memory.c // and https://github.com/wch/r-source/blob/80e410a786324e0e472a25481d5dd28db8285330/src/main/attrib.c // https://github.com/wch/r-source/blob/b6f046826c87fc10ad08acd8858921fa1a58e488/doc/manual/R-ints.texi SEXP setAttributes(SEXP x, SEXP a) { SET_ATTTR(x, coerceVector(a, LISTSXP)); classgets(x, getAttrib(x, R_ClassSymbol)); // forcing class after attribute copy !! return x; } SEXP setattributes(SEXP x, SEXP a) { SET_ATTTR(x, coerceVector(a, LISTSXP)); // SET_OOBJ(x, TYPEOF(x)); // if(OOBJ(a)) // This does not work with ts-matrices! could also make compatible with S4 objects ! classgets(x, getAttrib(x, R_ClassSymbol)); return R_NilValue; } // not used ! // SEXP setAttr(SEXP x, SEXP a, SEXP v) { // setAttrib(x, a, v); // return x; // } // void setattr(SEXP x, SEXP a, SEXP v) { // setAttrib(x, a, v); // } SEXP duplAttributes(SEXP x, SEXP y) { // also look at data.table's keepattributes ... SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); return x; } // R_duplicate_attr -> deep copy only of attributes -> expensive if attributes are large ! // lazy_duplicate -> duplicate on modify -> but modifies object in global environment ! // shallow_duplicate -> only duplicate pointer? -> best !! // No speed improvement to attr<- (same slow performance for data.frame 'row.names') // SEXP CsetAttr(SEXP object, SEXP a, SEXP v) { // SEXP res = shallow_duplicate(object); // setAttrib(res, a, v); // return res; // } // Attribute Handling - 4 Situations: // 1 - x is classed (factor, date, time series), xAG is not classed. i.e. vector of fnobs, fmean etc. // -> Sallow replacing, removing class and levels attributes from x, discard attributes of xAG (if any) // -> or (if type matches i.e. double for date or time series), copy attributes of x unless x is a factor // 2 - x is not classed, xAG is classed (factor, date, time series). - an unusual situation should not occur - copy attributes of xAG, discard attributes of x // 3 - xAG and x are classed - same as above, keep attributes of xAG, discard attributes of x // 4 - neither x nor xAG are classed - preserve attributes of x, discard attributes of xAG (if any) // // if(isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); // else if(!isObject(x) || (tx == txAG && !isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); // else { // SHALLOW_DUPLICATE_ATTRIB(out, x); // classgets(out, R_NilValue); // OK ! // setAttrib(out, R_LevelsSymbol, R_NilValue); // if(isFactor(x)) ? faster ? // } // Can think further about this! but this solution appears acceptable... SEXP copyMostAttributes(SEXP x, SEXP y) { int tx = TYPEOF(x); // -> This is about the best we can do: unlist() does not preserve dates, and we don't want to create malformed factors // if(TYPEOF(x) == TYPEOF(y) && (OOBJ(x) == OOBJ(y) || (!inherits(y, "factor") && !(length(x) != length(y) && inherits(y, "ts"))))) if(tx == TYPEOF(y) && (isObject(x) == isObject(y) || tx != INTSXP || inherits(y, "IDate") || inherits(y, "ITime")) && !(length(x) != length(y) && inherits(y, "ts"))) { copyMostAttrib(y, x); return x; } // In any case we can preserve variable labels.. SEXP lab = getAttrib(y, sym_label); if(TYPEOF(lab) != NILSXP) setAttrib(x, sym_label, lab); return x; } SEXP CsetAttrib(SEXP object, SEXP a) { if(TYPEOF(object) == VECSXP) { SEXP res = PROTECT(shallow_duplicate(object)); SET_ATTTR(res, coerceVector(a, LISTSXP)); classgets(res, getAttrib(res, R_ClassSymbol)); UNPROTECT(1); return res; } SEXP res = object; SET_ATTTR(res, coerceVector(a, LISTSXP)); classgets(res, getAttrib(res, R_ClassSymbol)); return res; } SEXP CcopyAttrib(SEXP to, SEXP from) { if(TYPEOF(to) == VECSXP) { SEXP res = PROTECT(shallow_duplicate(to)); SHALLOW_DUPLICATE_ATTRIB(res, from); UNPROTECT(1); return res; } SEXP res = to; SHALLOW_DUPLICATE_ATTRIB(res, from); return res; } SEXP CcopyMostAttrib(SEXP to, SEXP from) { if(TYPEOF(to) == VECSXP) { SEXP res = PROTECT(shallow_duplicate(to)); copyMostAttrib(from, res); if(inherits(from, "data.frame") && length(VECTOR_ELT(to, 0)) != length(VECTOR_ELT(from, 0))) setAttrib(res, R_RowNamesSymbol, getAttrib(to, R_RowNamesSymbol)); UNPROTECT(1); return res; } SEXP res = to; copyMostAttrib(from, res); return res; } // No longer needed... // Warning message: In .Call(C_duplattributes, x, y) : converting NULL pointer to R NULL // void duplattributes(SEXP x, SEXP y) { // SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); // classgets(x, getAttrib(y, R_ClassSymbol)); // This solves the warning message !! // just to return R_NilValue; and the SEXP... returns NULL anyway // } // No longer needed... using copyMostAttributes instead // SEXP cond_duplAttributes(SEXP x, SEXP y) { // if(TYPEOF(x) == TYPEOF(y)) SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); // return x; // } // not used !! // void cond_duplattributes(SEXP x, SEXP y) { // if(TYPEOF(x) == TYPEOF(y)) SHALLOW_DUPLICATE_ATTRIB(x, y); // SET_ATTRIB(x, ATTRIB(y)); // } collapse/src/stats_pacf.c0000644000176200001440000000365214777170131015154 0ustar liggesusers/* R : A Computer Language for Statistical Data Analysis * * Copyright (C) 1999-2016 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/. */ // #ifdef HAVE_CONFIG_H // # include // #endif // #include "data.table.h" #include #include #include // #include // #include "ts.h" /* cor is the autocorrelations starting from 0 lag*/ static void uni_pacf(double *cor, double *p, int nlag) { double a, b, c, *v, *w; v = (double*) R_alloc(nlag, sizeof(double)); w = (double*) R_alloc(nlag, sizeof(double)); w[0] = p[0] = cor[1]; for(int ll = 1; ll < nlag; ll++) { a = cor[ll+1]; b = 1.0; for(int i = 0; i < ll; i++) { a -= w[i] * cor[ll - i]; b -= w[i] * cor[i + 1]; } p[ll] = c = a/b; if(ll+1 == nlag) break; w[ll] = c; for(int i = 0; i < ll; i++) v[ll-i-1] = w[i]; for(int i = 0; i < ll; i++) w[i] -= c*v[i]; } } SEXP pacf1(SEXP acf, SEXP lmax) { int lagmax = asInteger(lmax); acf = PROTECT(coerceVector(acf, REALSXP)); SEXP ans = PROTECT(allocVector(REALSXP, lagmax)); uni_pacf(REAL(acf), REAL(ans), lagmax); SEXP d = PROTECT(allocVector(INTSXP, 3)); INTEGER(d)[0] = lagmax; INTEGER(d)[1] = INTEGER(d)[2] = 1; setAttrib(ans, R_DimSymbol, d); UNPROTECT(3); return ans; } collapse/src/collapse_cpp.h0000644000176200001440000001012114777170131015463 0ustar liggesusers // BWCpp SEXP _collapse_BWCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP); // BWmCpp SEXP _collapse_BWmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP); // BWlCpp SEXP _collapse_BWlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP); // pwnobsmCpp SEXP _collapse_pwnobsmCpp(SEXP xSEXP); // varyingCpp SEXP _collapse_varyingCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP); // varyingmCpp SEXP _collapse_varyingmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP); // varyinglCpp SEXP _collapse_varyinglCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP); // fbstatsCpp SEXP _collapse_fbstatsCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP setnSEXP, SEXP gnSEXP); // fbstatsmCpp SEXP _collapse_fbstatsmCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP); // fbstatslCpp SEXP _collapse_fbstatslCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP); // fdiffgrowthCpp SEXP _collapse_fdiffgrowthCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP); // fdiffgrowthmCpp SEXP _collapse_fdiffgrowthmCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP); // fdiffgrowthlCpp SEXP _collapse_fdiffgrowthlCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP); // flagleadCpp SEXP _collapse_flagleadCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP); // flagleadmCpp SEXP _collapse_flagleadmCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP); // flagleadlCpp SEXP _collapse_flagleadlCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP); // fscaleCpp SEXP _collapse_fscaleCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP); // fscalemCpp SEXP _collapse_fscalemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP); // fscalelCpp SEXP _collapse_fscalelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP); // fvarsdCpp SEXP _collapse_fvarsdCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP); // fvarsdmCpp SEXP _collapse_fvarsdmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP); // fvarsdlCpp SEXP _collapse_fvarsdlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP); // mrtl SEXP _collapse_mrtl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP); // mctl SEXP _collapse_mctl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP); // psmatCpp SEXP _collapse_psmatCpp(SEXP xSEXP, SEXP gSEXP, SEXP tSEXP, SEXP transposeSEXP, SEXP fillSEXP); // qFCpp SEXP _collapse_qFCpp(SEXP xSEXP, SEXP orderedSEXP, SEXP na_excludeSEXP, SEXP keep_attrSEXP, SEXP retSEXP); // sortuniqueCpp SEXP _collapse_sortuniqueCpp(SEXP xSEXP); // fdroplevelsCpp SEXP _collapse_fdroplevelsCpp(SEXP xSEXP, SEXP check_NASEXP); // seqid SEXP _collapse_seqid(SEXP xSEXP, SEXP oSEXP, SEXP delSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP skip_seqSEXP, SEXP check_oSEXP); // groupid SEXP _collapse_groupid(SEXP xSEXP, SEXP oSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP check_oSEXP); collapse/src/base_radixsort.h0000644000176200001440000000120414777170131016032 0ustar liggesusers// #include // Not available in C API !! // #include // Not available in C API !! // #define USE_RINTERNALS #include #include #include #include "internal/R_defn.h" // typedef uint64_t ZPOS64_T; // already defined in stdint.h void checkEncodings(SEXP x); SEXP Cradixsort(SEXP NA_last, SEXP decreasing, SEXP RETstrt, SEXP RETgs, SEXP SORTStr, SEXP args); void num1radixsort(int *o, Rboolean NA_last, Rboolean decreasing, SEXP x); void iradixsort(int *o, Rboolean NA_last, Rboolean decreasing, int n, int *x); void dradixsort(int *o, Rboolean NA_last, Rboolean decreasing, int n, double *x); collapse/src/qF_qG.cpp0000644000176200001440000001667615113725340014365 0ustar liggesusers#include using namespace Rcpp; template IntegerVector qFCppImpl(const Vector& x, bool ordered, bool na_exclude, bool keep_attr, int ret) { Vector levs = (na_exclude) ? na_omit(sort_unique(x)) : sort_unique(x); IntegerVector out = (na_exclude || RTYPE != REALSXP) ? match(x, levs) : as(Rf_match(levs, x, NA_INTEGER)); if(ret == 1) { // returning a factor if(keep_attr) SHALLOW_DUPLICATE_ATTRIB(out, x); // works for all atomic objects ? if(RTYPE == STRSXP) { Rf_setAttrib(out, R_LevelsSymbol, levs); } else { Rf_setAttrib(out, R_LevelsSymbol, Rf_coerceVector(levs, STRSXP)); // What about date objects... } Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","factor","na.included") : ordered ? CharacterVector::create("ordered","factor") : (!na_exclude) ? CharacterVector::create("factor","na.included") : CharacterVector::create("factor")); } else { // returnin a qG out.attr("N.groups") = int(levs.size()); if(ret == 3) { Rf_copyMostAttrib(x, levs); out.attr("groups") = levs; } Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","qG","na.included") : ordered ? CharacterVector::create("ordered","qG") : (!na_exclude) ? CharacterVector::create("qG","na.included") : CharacterVector::create("qG")); } return out; } // [[Rcpp::export]] // do Cpp 11 solution using return macro ? SEXP qFCpp(SEXP x, bool ordered = true, bool na_exclude = true, bool keep_attr = true, int ret = 1) { switch(TYPEOF(x)) { case INTSXP: return qFCppImpl(x, ordered, na_exclude, keep_attr, ret); case REALSXP: return qFCppImpl(x, ordered, na_exclude, keep_attr, ret); case STRSXP: return qFCppImpl(x, ordered, na_exclude, keep_attr, ret); case LGLSXP: { // Note that this always sorts it LogicalVector xl = x; int l = xl.size(); LogicalVector nd(3); IntegerVector out = no_init_vector(l); if(na_exclude) { for(int i = 0; i != l; ++i) { if(xl[i] == NA_LOGICAL) { out[i] = NA_INTEGER; } else if(xl[i] == true) { out[i] = 2; nd[1] = true; } else { out[i] = 1; nd[0] = true; } } if(!nd[0]) for(int i = l; i--; ) if(out[i] == 2) out[i] = 1; // no FALSE // otherwise malformed factor.. only 2 level but not 1 level } else { for(int i = 0; i != l; ++i) { if(xl[i] == NA_LOGICAL) { out[i] = 3; nd[2] = true; } else if(xl[i] == true) { out[i] = 2; nd[1] = true; } else { out[i] = 1; nd[0] = true; } } if(!nd[0] || (nd[2] && !nd[1])) { if(!nd[0]) { // no FALSE if(nd[1]) { // has TRUE (and NA) out = out - 1; } else { // only has NA out = out - 2; } } else { // NA and no TRUE for(int i = l; i--; ) if(out[i] == 3) out[i] = 2; } } } if(ret == 1) { // return factor if(keep_attr) SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_LevelsSymbol, CharacterVector::create("FALSE", "TRUE", NA_STRING)[nd]); Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","factor","na.included") : ordered ? CharacterVector::create("ordered","factor") : (!na_exclude) ? CharacterVector::create("factor","na.included") : CharacterVector::create("factor")); } else { out.attr("N.groups") = int(nd[0]+nd[1]+nd[2]); if(ret == 3) { LogicalVector groups = LogicalVector::create(false, true, NA_LOGICAL)[nd]; Rf_copyMostAttrib(x, groups); out.attr("groups") = groups; } Rf_classgets(out, (ordered && !na_exclude) ? CharacterVector::create("ordered","qG","na.included") : ordered ? CharacterVector::create("ordered","qG") : (!na_exclude) ? CharacterVector::create("qG","na.included") : CharacterVector::create("qG")); } return out; } default: stop("Not Supported SEXP Type"); } return R_NilValue; } template Vector sortuniqueImpl(const Vector& x) { Vector out = sort_unique(x); Rf_copyMostAttrib(x, out); return out; } IntegerVector sortuniqueFACT(const IntegerVector& x) { int nlevp = Rf_nlevels(x)+1, l = x.size(), k = 0; std::vector not_seen(nlevp, true); bool countNA = true; for(int i = 0; i != l; ++i) { if(x[i] == NA_INTEGER) { if(countNA) { ++k; countNA = false; } continue; } if(not_seen[x[i]]) { not_seen[x[i]] = false; if(++k == nlevp) break; } } IntegerVector out = no_init_vector(k); if(!countNA) out[k-1] = NA_INTEGER; k = 0; for(int i = 1; i != nlevp; ++i) if(!not_seen[i]) out[k++] = i; Rf_copyMostAttrib(x, out); return out; } // [[Rcpp::export]] SEXP sortuniqueCpp(SEXP x) { switch(TYPEOF(x)) { case INTSXP: if(Rf_isFactor(x)) return sortuniqueFACT(x); return sortuniqueImpl(x); case REALSXP: return sortuniqueImpl(x); case STRSXP: return sortuniqueImpl(x); case LGLSXP: { LogicalVector xl = x; int nc = 0, n0 = 0, n1 = 0, n2 = 0, l = xl.size(); for(int i = 0; i != l; ++i) { if(n2 == 0 && xl[i] == NA_LOGICAL) { n2 = ++nc; } else if(n1 == 0 && xl[i] == true) { n1 = ++nc; } else if(n0 == 0 && xl[i] == false) { n0 = ++nc; } if(nc == 3) break; } LogicalVector out = no_init_vector(nc); nc = 0; if(n0) out[nc++] = false; if(n1) out[nc++] = true; if(n2) out[nc] = NA_LOGICAL; Rf_copyMostAttrib(x, out); return out; } default: stop("Not Supported SEXP Type"); } return R_NilValue; } // [[Rcpp::export]] IntegerVector fdroplevelsCpp(const IntegerVector& x, bool check_NA = true) { int nlevp = Rf_nlevels(x)+1, l = x.size(), n = 1; std::vector uxp(nlevp, 1); // 1 is also true ! bool anyNA = false; if(check_NA) { for(int i = 0; i != l; ++i) { if(x[i] == NA_INTEGER) { anyNA = true; continue; } if(uxp[x[i]]) { uxp[x[i]] = 0; if(++n == nlevp) return x; } // uxp[x[i]] = 1; // Runs through, slower than above on DHS Uganda (660 factors) } } else { for(int i = 0; i != l; ++i) { if(uxp[x[i]]) { uxp[x[i]] = 0; if(++n == nlevp) return x; } // uxp[x[i]] = 1; // Runs through, slower than above on DHS Uganda (660 factors) } } // n = std::accumulate(uxp.begin()+1, uxp.end(), 0); // if(n == nlevp-1) return x; CharacterVector levs = Rf_getAttrib(x, R_LevelsSymbol); CharacterVector newlevs = no_init_vector(n-1); // n n = 0; for(int i = 1; i != nlevp; ++i) { if(!uxp[i]) { newlevs[n] = levs[i-1]; uxp[i] = ++n; } } IntegerVector out = no_init_vector(l); // fastest solution ! // IntegerVector out = anyNA ? IntegerVector(l, NA_INTEGER) : no_init_vector(l); // Not faster !! if(anyNA) { // for(int i = 0; i != l; ++i) if(x[i] != NA_INTEGER) out[i] = uxp[x[i]]; for(int i = 0; i != l; ++i) out[i] = (x[i] == NA_INTEGER) ? NA_INTEGER : uxp[x[i]]; } else { for(int i = 0; i != l; ++i) out[i] = uxp[x[i]]; } SHALLOW_DUPLICATE_ATTRIB(out, x); Rf_setAttrib(out, R_LevelsSymbol, newlevs); return out; } collapse/src/data.table.h0000644000176200001440000000574215056572047015036 0ustar liggesusers/* This code is adapted from the data.table package: http://r-datatable.com and licensed under a Mozilla Public License 2.0 (MPL-2.0) license. */ #ifndef DATATABLE_H // Check if DATATABLE_H is not defined #define DATATABLE_H // Define DATATABLE_H // #define USE_RINTERNALS #include "base_radixsort.h" // #include // for uint64_t rather than unsigned long long #include // #include "types.h" #define IS_TRUE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE) #define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE) #define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL) #define SIZEOF(x) sizes[TYPEOF(x)] #define TYPEORDER(x) typeorder[x] // Needed for match.c and join.c #define NEED2UTF8(s) !(IS_ASCII(s) || (s)==NA_STRING || IS_UTF8(s)) #define ENC2UTF8(s) (!NEED2UTF8(s) ? (s) : mkCharCE(translateCharUTF8(s), CE_UTF8)) // for use with bit64::integer64 #define NA_INTEGER64 INT64_MIN #define MAX_INTEGER64 INT64_MAX #ifndef INTEGER64_PTR #define INTEGER64_PTR(x) ((int64_t*) REAL(x)) #endif #ifndef INTEGER64_PTR_RO #define INTEGER64_PTR_RO(x) ((int64_t*) REAL_RO(x)) #endif // init.c // https://stackoverflow.com/questions/1410563/what-is-the-difference-between-a-definition-and-a-declaration extern SEXP char_integer64; extern SEXP char_nanotime; extern SEXP char_factor; extern SEXP char_ordered; extern SEXP char_dataframe; extern SEXP char_datatable; extern SEXP char_sf; extern SEXP sym_sorted; extern SEXP sym_index; extern SEXP sym_index_df; extern SEXP sym_sf_column; extern SEXP SelfRefSymbol; extern SEXP sym_datatable_locked; // data.table_init.c SEXP collapse_init(SEXP mess); long long DtoLL(double x); double LLtoD(long long x); extern double NA_INT64_D; extern long long NA_INT64_LL; extern Rcomplex NA_CPLX; // initialized in init.c; see there for comments extern size_t sizes[100]; // max appears to be FUNSXP = 99, see Rinternals.h extern size_t typeorder[100]; // data.table_utils.c int need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); SEXP setnames(SEXP x, SEXP nam); bool allNA(SEXP x, bool errorForBadType); SEXP allNAv(SEXP x, SEXP errorForBadType); bool INHERITS(SEXP x, SEXP char_); SEXP dt_na(SEXP x, SEXP cols, SEXP Rprop, SEXP Rcount); SEXP frankds(SEXP xorderArg, SEXP xstartArg, SEXP xlenArg, SEXP dns); SEXP setcolorder(SEXP x, SEXP o); // data.table_subset.c void setselfref(SEXP x); SEXP Calloccol(SEXP dt); SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax); SEXP extendIntVec(SEXP x, int len, int val); SEXP subsetCols(SEXP x, SEXP cols, SEXP checksf); SEXP subsetDT(SEXP x, SEXP rows, SEXP cols, SEXP checkrows); SEXP subsetVector(SEXP x, SEXP idx, SEXP checkidx); // rbindlist.c void writeNA(SEXP v, const int from, const int n); void writeValue(SEXP target, SEXP source, const int from, const int n); void savetl_init(void), savetl(SEXP s), savetl_end(void); SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg); #endif // End of DATATABLE_H guard collapse/src/stats_mAR.c0000644000176200001440000004016414777170131014721 0ustar liggesusers/* * Copyright (C) 1999 Martyn Plummer * Copyright (C) 1999-2016 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/. */ #include #include // #include "data.table.h" #include #include #include #include // #include #include /* Fortran routines */ // #include "ts.h" // #include "stats.h" #define MAX_DIM_LENGTH 4 #define VECTOR(x) (x.vec) #define MATRIX(x) (x.mat) #define ARRAY1(x) (x.vec) #define ARRAY2(x) (x.mat) #define ARRAY3(x) (x.arr3) #define ARRAY4(x) (x.arr4) #define DIM(x) (x.dim) #define NROW(x) (x.dim[0]) #define NCOL(x) (x.dim[1]) #define DIM_LENGTH(x) (x.ndim) typedef struct array { double *vec; double **mat; double ***arr3; double ****arr4; int dim[MAX_DIM_LENGTH]; int ndim; } Array; static Array make_array(double vec[], int dim[], int ndim); static Array make_zero_array(int dim[], int ndim); static Array make_zero_matrix(int nrow, int ncol); static Array make_identity_matrix(int n); static Array subarray(Array a, int index); static int vector_length(Array a); static void set_array_to_zero(Array arr); static void copy_array (Array orig, Array ans); static void array_op(Array arr1, Array arr2, char op, Array ans); static void transpose_matrix(Array mat, Array ans); static void matrix_prod(Array mat1, Array mat2, int trans1, int trans2, Array ans); /* Functions for dynamically allocating arrays The Array structure contains pointers to arrays which are allocated using the R_alloc function. Although the .C() interface cleans up all memory assigned with R_alloc, judicious use of vmaxget() vmaxset() to free this memory is probably wise. See memory.c in R core. */ static void assert(bool bla) { if(!bla) error("assert failed in src/library/ts/src/carray.c"); } static Array init_array(void) { int i; Array a; /* Initialize everything to zero. Useful for debugging */ ARRAY1(a) = (double *) '\0'; ARRAY2(a) = (double **) '\0'; ARRAY3(a) = (double ***) '\0'; ARRAY4(a) = (double ****) '\0'; for (i = 0; i < MAX_DIM_LENGTH; i++) DIM(a)[i] = 0; DIM_LENGTH(a) = 0; return a; } static int vector_length(Array a) { int i, len; for (i = 0, len = 1; i < DIM_LENGTH(a); i++) { len *= DIM(a)[i]; } return len; } static Array make_array(double vec[], int dim[], int ndim) { int d, i, j; int len[MAX_DIM_LENGTH + 1]; Array a; assert(ndim <= MAX_DIM_LENGTH); a = init_array(); len[ndim] = 1; for (d = ndim; d >= 1; d--) { len[d-1] = len[d] * dim[ndim - d]; } for (d = 1; d <= ndim; d++) { switch(d) { case 1: VECTOR(a) = vec; break; case 2: ARRAY2(a) = (double**) R_alloc(len[2 - 1],sizeof(double*)); for(i = 0, j = 0; i < len[2 - 1]; i++, j+=dim[ndim - 2 + 1]) { ARRAY2(a)[i] = ARRAY1(a) + j; } break; case 3: ARRAY3(a) = (double***) R_alloc(len[3 - 1],sizeof(double**)); for(i = 0, j = 0; i < len[3 - 1]; i++, j+=dim[ndim - 3 + 1]) { ARRAY3(a)[i] = ARRAY2(a) + j; } break; case 4: ARRAY4(a) = (double****) R_alloc(len[4 - 1],sizeof(double***)); for(i = 0, j = 0; i < len[4 - 1]; i++, j+=dim[ndim - 4 + 1]) { ARRAY4(a)[i] = ARRAY3(a) + j; } break; default: break; } } for (i = 0; i < ndim; i++) { DIM(a)[i] = dim[i]; } DIM_LENGTH(a) = ndim; return a; } static Array make_zero_array(int dim[], int ndim) { int i; int len; double *vec; for (i = 0, len = 1; i < ndim; i++) { len *= dim[i]; } vec = (double *) R_alloc(len, sizeof(double)); for (i = 0; i < len; i++) { vec[i] = 0.0; } return make_array(vec, dim, ndim); } static Array make_zero_matrix(int nrow, int ncol) { int dim[2]; Array a; dim[0] = nrow; dim[1] = ncol; a = make_zero_array(dim, 2); return a; } static Array subarray(Array a, int index) /* Return subarray of array a in the form of an Array structure so it can be manipulated by other functions NB The data are not copied, so any changes made to the subarray will affect the original array. */ { int i, offset; Array b; b = init_array(); /* is index in range? */ assert( index >= 0 && index < DIM(a)[0] ); offset = index; switch(DIM_LENGTH(a)) { /* NB Falling through here */ case 4: offset *= DIM(a)[DIM_LENGTH(a) - 4 + 1]; ARRAY3(b) = ARRAY3(a) + offset; case 3: offset *= DIM(a)[DIM_LENGTH(a) - 3 + 1]; ARRAY2(b) = ARRAY2(a) + offset; case 2: offset *= DIM(a)[DIM_LENGTH(a) - 2 + 1]; ARRAY1(b) = ARRAY1(a) + offset; break; default: break; } DIM_LENGTH(b) = DIM_LENGTH(a) - 1; for (i = 0; i < DIM_LENGTH(b); i++) DIM(b)[i] = DIM(a)[i+1]; return b; } static int test_array_conform(Array a1, Array a2) { int i, ans = FALSE; if (DIM_LENGTH(a1) != DIM_LENGTH(a2)) return FALSE; else for (i = 0; i < DIM_LENGTH(a1); i++) { if (DIM(a1)[i] == DIM(a2)[i]) ans = TRUE; else return FALSE; } return ans; } static void copy_array (Array orig, Array ans) /* copy matrix orig to ans */ { int i; assert (test_array_conform(orig, ans)); for(i = 0; i < vector_length(orig); i++) VECTOR(ans)[i] = VECTOR(orig)[i]; } static void transpose_matrix(Array mat, Array ans) { int i,j; const void *vmax; Array tmp; tmp = init_array(); assert(DIM_LENGTH(mat) == 2 && DIM_LENGTH(ans) == 2); assert(NCOL(mat) == NROW(ans)); assert(NROW(mat) == NCOL(ans)); vmax = vmaxget(); tmp = make_zero_matrix(NROW(ans), NCOL(ans)); for(i = 0; i < NROW(mat); i++) for(j = 0; j < NCOL(mat); j++) MATRIX(tmp)[j][i] = MATRIX(mat)[i][j]; copy_array(tmp, ans); vmaxset(vmax); } static void array_op(Array arr1, Array arr2, char op, Array ans) /* Element-wise array operations */ { int i; assert (test_array_conform(arr1, arr2)); assert (test_array_conform(arr2, ans)); switch (op) { case '*': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] * VECTOR(arr2)[i]; break; case '+': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] + VECTOR(arr2)[i]; break; case '/': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] / VECTOR(arr2)[i]; break; case '-': for (i = 0; i < vector_length(ans); i++) VECTOR(ans)[i] = VECTOR(arr1)[i] - VECTOR(arr2)[i]; break; default: error("Unknown op in array_op"); // printf } } static void matrix_prod(Array mat1, Array mat2, int trans1, int trans2, Array ans) /* General matrix product between mat1 and mat2. Put answer in ans. trans1 and trans2 are logical flags which indicate if the matrix is to be transposed. Normal matrix multiplication has trans1 = trans2 = 0. */ { int i,j,k,K1,K2; const void *vmax; double m1, m2; Array tmp; /* Test whether everything is a matrix */ assert(DIM_LENGTH(mat1) == 2 && DIM_LENGTH(mat2) == 2 && DIM_LENGTH(ans) == 2); /* Test whether matrices conform. K is the dimension that is lost by multiplication */ if (trans1) { assert ( NCOL(mat1) == NROW(ans) ); K1 = NROW(mat1); } else { assert ( NROW(mat1) == NROW(ans) ); K1 = NCOL(mat1); } if (trans2) { assert ( NROW(mat2) == NCOL(ans) ); K2 = NCOL(mat2); } else { assert ( NCOL(mat2) == NCOL(ans) ); K2 = NROW(mat2); } assert (K1 == K2); tmp = init_array(); /* In case ans is the same as mat1 or mat2, we create a temporary matrix to hold the answer, then copy it to ans */ vmax = vmaxget(); tmp = make_zero_matrix(NROW(ans), NCOL(ans)); for (i = 0; i < NROW(tmp); i++) { for (j = 0; j < NCOL(tmp); j++) { for(k = 0; k < K1; k++) { m1 = (trans1) ? MATRIX(mat1)[k][i] : MATRIX(mat1)[i][k]; m2 = (trans2) ? MATRIX(mat2)[j][k] : MATRIX(mat2)[k][j]; MATRIX(tmp)[i][j] += m1 * m2; } } } copy_array(tmp, ans); vmaxset(vmax); } static void set_array_to_zero(Array arr) { int i; for (i = 0; i < vector_length(arr); i++) VECTOR(arr)[i] = 0.0; } static Array make_identity_matrix(int n) { int i; Array a; a = make_zero_matrix(n,n); for(i = 0; i < n; i++) MATRIX(a)[i][i] = 1.0; return a; } static void qr_solve(Array x, Array y, Array coef) /* Translation of the R function qr.solve into pure C NB We have to transpose the matrices since the ordering of an array is different in Fortran NB2 We have to copy x to avoid it being overwritten. */ { int i, info = 0, rank, *pivot, n, p; const void *vmax; double tol = 1.0E-7, *qraux, *work; Array xt, yt, coeft; assert(NROW(x) == NROW(y)); assert(NCOL(coef) == NCOL(y)); assert(NCOL(x) == NROW(coef)); vmax = vmaxget(); qraux = (double *) R_alloc(NCOL(x), sizeof(double)); pivot = (int *) R_alloc(NCOL(x), sizeof(int)); work = (double *) R_alloc(2*NCOL(x), sizeof(double)); for(i = 0; i < NCOL(x); i++) pivot[i] = i+1; xt = make_zero_matrix(NCOL(x), NROW(x)); transpose_matrix(x,xt); n = NROW(x); p = NCOL(x); F77_CALL(dqrdc2)(VECTOR(xt), &n, &n, &p, &tol, &rank, qraux, pivot, work); if (rank != p) error("Singular matrix in qr_solve"); yt = make_zero_matrix(NCOL(y), NROW(y)); coeft = make_zero_matrix(NCOL(coef), NROW(coef)); transpose_matrix(y, yt); F77_CALL(dqrcf)(VECTOR(xt), &NROW(x), &rank, qraux, yt.vec, &NCOL(y), coeft.vec, &info); transpose_matrix(coeft,coef); vmaxset(vmax); } static double ldet(Array x) /* Log determinant of square matrix */ { int i, rank, *pivot, n, p; const void *vmax; double ll, tol = 1.0E-7, *qraux, *work; Array xtmp; assert(DIM_LENGTH(x) == 2); /* is x a matrix? */ assert(NROW(x) == NCOL(x)); /* is x square? */ vmax = vmaxget(); qraux = (double *) R_alloc(NCOL(x), sizeof(double)); pivot = (int *) R_alloc(NCOL(x), sizeof(int)); work = (double *) R_alloc(2*NCOL(x), sizeof(double)); xtmp = make_zero_matrix(NROW(x), NCOL(x)); copy_array(x, xtmp); for(i = 0; i < NCOL(x); i++) pivot[i] = i+1; p = n = NROW(x); F77_CALL(dqrdc2)(VECTOR(xtmp), &n, &n, &p, &tol, &rank, qraux, pivot, work); if (rank != p) error("Singular matrix in ldet"); for (i = 0, ll=0.0; i < rank; i++) { ll += log(fabs(MATRIX(xtmp)[i][i])); } vmaxset(vmax); return ll; } /* Whittle's algorithm for autoregression estimation multi_yw is the interface to R. It also handles model selection using AIC whittle,whittle2 implement Whittle's recursion for solving the multivariate Yule-Walker equations. Notation resid residuals (forward and backward) A Estimates of forward autocorrelation coefficients B Estimates of backward autocorrelation coefficients EA,EB Prediction Variance KA,KB Partial correlation coefficient */ void multi_yw(double *acf, int *pn, int *pomax, int *pnser, double *coef, double *pacf, double *var, double *aic, int *porder, int *puseaic); static void whittle(Array acf, int nlag, Array *A, Array *B, Array p_forward, Array v_forward, Array p_back, Array v_back); static void whittle2 (Array acf, Array Aold, Array Bold, int lag, char *direction, Array A, Array K, Array E); void multi_yw(double *acf, int *pn, int *pomax, int *pnser, double *coef, double *pacf, double *var, double *aic, int *porder, int *useaic) { int i, m; int omax = *pomax, n = *pn, nser=*pnser, order=*porder; double aicmin; Array acf_array, p_forward, p_back, v_forward, v_back; Array *A, *B; int dim[3]; dim[0] = omax+1; dim[1] = dim[2] = nser; acf_array = make_array(acf, dim, 3); p_forward = make_array(pacf, dim, 3); v_forward = make_array(var, dim, 3); /* Backward equations (discarded) */ p_back= make_zero_array(dim, 3); v_back= make_zero_array(dim, 3); A = (Array *) R_alloc(omax+2, sizeof(Array)); B = (Array *) R_alloc(omax+2, sizeof(Array)); for (i = 0; i <= omax; i++) { A[i] = make_zero_array(dim, 3); B[i] = make_zero_array(dim, 3); } whittle(acf_array, omax, A, B, p_forward, v_forward, p_back, v_back); /* Model order selection */ for (m = 0; m <= omax; m++) { aic[m] = n * ldet(subarray(v_forward,m)) + 2 * m * nser * nser; } if (*useaic) { order = 0; aicmin = aic[0]; for (m = 0; m <= omax; m++) { if (aic[m] < aicmin) { aicmin = aic[m]; order = m; } } } else order = omax; *porder = order; for(i = 0; i < vector_length(A[order]); i++) coef[i] = VECTOR(A[order])[i]; } static void whittle(Array acf, int nlag, Array *A, Array *B, Array p_forward, Array v_forward, Array p_back, Array v_back) { int lag, nser = DIM(acf)[1]; const void *vmax; Array EA, EB; /* prediction variance */ Array KA, KB; /* partial correlation coefficient */ Array id, tmp; vmax = vmaxget(); KA = make_zero_matrix(nser, nser); EA = make_zero_matrix(nser, nser); KB = make_zero_matrix(nser, nser); EB = make_zero_matrix(nser, nser); id = make_identity_matrix(nser); copy_array(id, subarray(A[0],0)); copy_array(id, subarray(B[0],0)); copy_array(id, subarray(p_forward,0)); copy_array(id, subarray(p_back,0)); for (lag = 1; lag <= nlag; lag++) { whittle2(acf, A[lag-1], B[lag-1], lag, "forward", A[lag], KA, EB); whittle2(acf, B[lag-1], A[lag-1], lag, "back", B[lag], KB, EA); copy_array(EA, subarray(v_forward,lag-1)); copy_array(EB, subarray(v_back,lag-1)); copy_array(KA, subarray(p_forward,lag)); copy_array(KB, subarray(p_back,lag)); } tmp = make_zero_matrix(nser,nser); matrix_prod(KB,KA, 1, 1, tmp); array_op(id, tmp, '-', tmp); matrix_prod(EA, tmp, 0, 0, subarray(v_forward, nlag)); vmaxset(vmax); } static void whittle2 (Array acf, Array Aold, Array Bold, int lag, char *direction, Array A, Array K, Array E) { int d, i, nser=DIM(acf)[1]; const void *vmax; Array beta, tmp, id; d = strcmp(direction, "forward") == 0; vmax = vmaxget(); beta = make_zero_matrix(nser,nser); tmp = make_zero_matrix(nser, nser); id = make_identity_matrix(nser); set_array_to_zero(E); copy_array(id, subarray(A,0)); for(i = 0; i < lag; i++) { matrix_prod(subarray(acf,lag - i), subarray(Aold,i), d, 1, tmp); array_op(beta, tmp, '+', beta); matrix_prod(subarray(acf,i), subarray(Bold,i), d, 1, tmp); array_op(E, tmp, '+', E); } qr_solve(E, beta, K); transpose_matrix(K,K); for (i = 1; i <= lag; i++) { matrix_prod(K, subarray(Bold,lag - i), 0, 0, tmp); array_op(subarray(Aold,i), tmp, '-', subarray(A,i)); } vmaxset(vmax); } // static const R_CMethodDef CEntries[] = { // {"multi_yw", (DL_FUNC) &multi_yw, 10}, // {NULL, NULL, 0} //}; // void R_init_stat(DllInfo *dll) // { // R_registerRoutines(dll, CEntries, NULL, NULL, NULL); // R_useDynamicSymbols(dll, FALSE); //} collapse/src/programming.c0000644000176200001440000011437015202427630015340 0ustar liggesusers#include "collapse_c.h" #include "data.table.h" SEXP Cna_rm(SEXP x) { const int n = LENGTH(x); if (n < 1) return x; int k = 0; switch(TYPEOF(x)) { case LGLSXP: case INTSXP: { const int *xd = INTEGER(x); for (int i = 0; i != n; ++i) if(xd[i] == NA_INTEGER) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(TYPEOF(x), n - k)); int *pout = INTEGER(out); k = 0; for (int i = 0; i != n; ++i) if(xd[i] != NA_INTEGER) pout[k++] = xd[i]; copyMostAttrib(x, out); UNPROTECT(1); return out; } case REALSXP: { // What about integer64?? const double *xd = REAL(x); for (int i = 0; i != n; ++i) if(ISNAN(xd[i])) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(REALSXP, n - k)); double *pout = REAL(out); k = 0; for (int i = 0; i != n; ++i) if(NISNAN(xd[i])) pout[k++] = xd[i]; // using xd[i] == xd[i] is not faster !! copyMostAttrib(x, out); UNPROTECT(1); return out; } case STRSXP: { const SEXP *xd = SEXPPTR_RO(x); for (int i = 0; i != n; ++i) if(xd[i] == NA_STRING) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(STRSXP, n - k)); SEXP *pout = SEXPPTR(out); k = 0; for (int i = 0; i != n; ++i) if(xd[i] != NA_STRING) pout[k++] = xd[i]; copyMostAttrib(x, out); UNPROTECT(1); return out; } case VECSXP: { const SEXP *xd = SEXPPTR_RO(x); for (int i = 0; i != n; ++i) if(length(xd[i]) == 0) ++k; if(k == 0) return x; SEXP out = PROTECT(allocVector(VECSXP, n - k)); SEXP *pout = SEXPPTR(out); k = 0; for (int i = 0; i != n; ++i) if(length(xd[i]) != 0) pout[k++] = xd[i]; copyMostAttrib(x, out); UNPROTECT(1); return out; } } error("Unsupported type '%s' passed to na_rm()", type2char(TYPEOF(x))); } // Helper function to find a single string in factor levels int fchmatch(SEXP x, SEXP val, int nomatch) { const SEXP *px = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(x))), v = PROTECT(ENC2UTF8(asChar(val))); for(int i = 0, l = length(x); i != l; ++i) { if(px[i] == v) { UNPROTECT(2); return i + 1; } } UNPROTECT(2); return nomatch; } SEXP whichv(SEXP x, SEXP val, SEXP Rinvert) { int j = 0, n = length(x), invert = asLogical(Rinvert); int *buf = (int *) R_alloc(n, sizeof(int)); SEXP ans; #define WHICHVLOOP \ if(invert) { \ for(int i = 0; i != n; ++i) if(px[i] != v) buf[j++] = i+1; \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == v) buf[j++] = i+1; \ } #define WHICHVLOOPLX \ if(invert) { \ for(int i = 0; i != n; ++i) if(px[i] != pv[i]) buf[j++] = i+1; \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == pv[i]) buf[j++] = i+1; \ } if(length(val) == n && n > 1) { if(TYPEOF(val) != TYPEOF(x)) error("data types of x and value must be the same"); switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); const int *pv = INTEGER(val); WHICHVLOOPLX break; } case REALSXP: { const double *px = REAL(x); const double *pv = REAL(val); if(invert) { for(int i = 0; i != n; ++i) if(px[i] != pv[i] && (NISNAN(px[i]) || NISNAN(pv[i]))) buf[j++] = i+1; } else { for(int i = 0; i != n; ++i) if(px[i] == pv[i] || (ISNAN(px[i]) && ISNAN(pv[i]))) buf[j++] = i+1; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(x))); const SEXP *pv = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(val))); WHICHVLOOPLX UNPROTECT(2); break; } case RAWSXP : { const Rbyte *px = RAW(x); const Rbyte *pv = RAW(val); WHICHVLOOPLX break; } default: error("Unsupported type '%s' passed to whichv()", type2char(TYPEOF(x))); } } else { if(length(val) != 1) error("length(value) needs to be length(x) or 1"); switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); int v; if(TYPEOF(val) == STRSXP) { if(!isFactor(x)) error("Type mismatch: if value is character, x must be character or factor."); v = fchmatch(getAttrib(x, R_LevelsSymbol), val, 0); } else v = asInteger(val); WHICHVLOOP break; } case REALSXP: { const double *px = REAL(x); const double v = asReal(val); if(ISNAN(v)) { if(invert) { for(int i = 0; i != n; ++i) if(NISNAN(px[i])) buf[j++] = i+1; } else { for(int i = 0; i != n; ++i) if(ISNAN(px[i])) buf[j++] = i+1; } } else { WHICHVLOOP } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(x))); const SEXP v = PROTECT(ENC2UTF8(asChar(val))); WHICHVLOOP UNPROTECT(2); break; } case RAWSXP : { const Rbyte *px = RAW(x); const Rbyte v = RAW(val)[0]; WHICHVLOOP break; } default: error("Unsupported type '%s' passed to whichv()", type2char(TYPEOF(x))); } } PROTECT(ans = allocVector(INTSXP, j)); if(j) memcpy(INTEGER(ans), buf, sizeof(int) * j); UNPROTECT(1); return(ans); } SEXP anyallv(SEXP x, SEXP val, SEXP Rall) { int n = length(x), all = asLogical(Rall); if(length(x) == 0) return ScalarLogical(all ? 1 : 0); if(length(val) != 1) error("value needs to be length 1"); #define ALLANYVLOOP \ if(all) { \ for(int i = 0; i != n; ++i) if(px[i] != v) return ScalarLogical(0); \ return ScalarLogical(1); \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == v) return ScalarLogical(1); \ return ScalarLogical(0); \ } switch(TYPEOF(x)) { case INTSXP: case LGLSXP: { const int *px = INTEGER(x); int v; if(TYPEOF(val) == STRSXP) { if(!isFactor(x)) error("Type mismatch: if value is character, x must be character or factor."); v = fchmatch(getAttrib(x, R_LevelsSymbol), val, 0); } else v = asInteger(val); ALLANYVLOOP break; } case REALSXP: { const double *px = REAL(x); const double v = asReal(val); if(ISNAN(v)) error("please use allNA()"); ALLANYVLOOP break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(x))); const SEXP v = PROTECT(ENC2UTF8(asChar(val))); if(all) { for(int i = 0; i != n; ++i) { if(px[i] != v) { UNPROTECT(2); return ScalarLogical(0); } } UNPROTECT(2); return ScalarLogical(1); } else { for(int i = 0; i != n; ++i) { if(px[i] == v) { UNPROTECT(2); return ScalarLogical(1); } } UNPROTECT(2); return ScalarLogical(0); } break; } case RAWSXP : { const Rbyte *px = RAW(x); const Rbyte v = RAW(val)[0]; ALLANYVLOOP break; } default: error("Unsupported type '%s' passed to allv() / anyv()", type2char(TYPEOF(x))); } return(R_NilValue); } SEXP setcopyv(SEXP x, SEXP val, SEXP rep, SEXP Rinvert, SEXP Rset, SEXP Rind1) { const int n = length(x), lv = length(val), lr = length(rep), tx = TYPEOF(x), ind1 = asLogical(Rind1), invert = asLogical(Rinvert), set = asLogical(Rset); int nprotect = 0, tv = TYPEOF(val), tr = TYPEOF(rep); if(lv > 1 || ind1) { if(tv == LGLSXP) { if(lv != n) error("If v is a logical vector, length(v) needs to be equal to length(x)"); if(lr != 1 && lr != n) error("If v is a logical vector, length(r) needs to be 1 or length(x)"); } else if(tv == INTSXP || tv == REALSXP) { if(invert) error("invert = TRUE is only possible if v is a logical vector"); if(lv == 0) return x; // integer(0) cannot cause error if(lv > n) error("length(v) must be <= length(x)"); if(!(lr == 1 || lr == n || lr == lv)) error("length(r) must be either 1, length(v) or length(x)"); if(tv == REALSXP) { if(lv == 1 && REAL_ELT(val, 0) == (int)REAL_ELT(val, 0)) { tv = INTSXP; val = PROTECT(coerceVector(val, INTSXP)); ++nprotect; } else error("If length(v) > 1 or vind1 = TRUE, v must be an integer or logical vector"); } // Just some heuristic checking as this is a programmers function const int v1 = INTEGER_ELT(val, 0), vn = INTEGER_ELT(val, lv-1); if(v1 < 1 || v1 > n || vn < 1 || vn > n) error("Detected index (v) outside of range [1, length(x)]"); } else error("If length(v) > 1 or vind1 = TRUE, v must be an integer or logical vector"); } else { if(lv == 0) return x; // empty replacement, good to return? if(lr != 1 && lr != n) error("If length(v) == 1, length(r) must be 1 or length(x)"); } if(tr != tx) { // lr == n && if(!((tx == INTSXP && tr == LGLSXP) || (tx == LGLSXP && tr == INTSXP))) { if(tr > tx && !(lr == 1 && tx == INTSXP && tr == REALSXP && REAL_ELT(rep, 0) == (int)REAL_ELT(rep, 0))) warning("Type of R (%s) is larger than X (%s) and thus coerced. This incurs loss of information, such as digits of real numbers being truncated upon coercion to integer. To avoid this, make sure X has a larger type than R: character > double > integer > logical.", type2char(tr), type2char(tx)); if(lr > 1) { tr = tx; rep = PROTECT(coerceVector(rep, tx)); ++nprotect; } } // error("typeof(x) needs to match typeof(r)"); } SEXP ans = R_NilValue; if(set == 0) { PROTECT(ans = shallow_duplicate(x)); // Fastest?? // copies attributes ?? -> Yes ++nprotect; } #define setcopyvLOOP(e) \ if(invert) { \ for(int i = 0; i != n; ++i) if(px[i] != v) px[i] = e; \ } else { \ for(int i = 0; i != n; ++i) if(px[i] == v) px[i] = e; \ } #define setcopyvLOOPLVEC1 \ if(tv == INTSXP) { \ _Pragma("omp simd") \ for(int i = 0; i < lv; ++i) px[pv[i]-1] = r; \ } else if(invert == 0) { \ for(int i = 0; i != n; ++i) if(pv[i] > 0) px[i] = r; \ } else { \ for(int i = 0; i != n; ++i) if(pv[i] == 0) px[i] = r; \ } #define setcopyvLOOPLVEC \ if(tv == INTSXP) { \ if(lr == n) { \ _Pragma("omp simd") \ for(int i = 0; i < lv; ++i) px[pv[i]-1] = pr[pv[i]-1]; \ } else { \ _Pragma("omp simd") \ for(int i = 0; i < lv; ++i) px[pv[i]-1] = pr[i]; \ } \ } else if(invert == 0) { \ for(int i = 0; i != n; ++i) if(pv[i] > 0) px[i] = pr[i]; \ } else { \ for(int i = 0; i != n; ++i) if(pv[i] == 0) px[i] = pr[i]; \ } switch(tx) { case INTSXP: case LGLSXP: { int *restrict px = set ? INTEGER(x) : INTEGER(ans); if(lv == 1 && ind1 == 0) { int v; if(tv == STRSXP) { if(!isFactor(x)) error("Type mismatch: if v is character, x must be character or factor."); v = fchmatch(getAttrib(x, R_LevelsSymbol), val, 0); } else v = asInteger(val); if(lr == 1) { const int r = asInteger(rep); setcopyvLOOP(r) } else { const int *restrict pr = INTEGER(rep); setcopyvLOOP(pr[i]) } } else { const int *restrict pv = INTEGER(val); // ALTREP(val) ? (const int *)ALTVEC_DATAPTR(val) : if(lr == 1) { const int r = asInteger(rep); setcopyvLOOPLVEC1 } else { const int *restrict pr = INTEGER(rep); setcopyvLOOPLVEC } } break; } case REALSXP: { double *restrict px = set ? REAL(x) : REAL(ans); if(lv == 1 && ind1 == 0) { const double v = asReal(val); if(lr == 1) { const double r = asReal(rep); if(ISNAN(v)) { if(invert) { for(int i = 0; i != n; ++i) if(NISNAN(px[i])) px[i] = r; } else { for(int i = 0; i != n; ++i) if(ISNAN(px[i])) px[i] = r; } } else { setcopyvLOOP(r) } } else { const double *restrict pr = REAL(rep); if(ISNAN(v)) { if(invert) { for(int i = 0; i != n; ++i) if(NISNAN(px[i])) px[i] = pr[i]; } else { for(int i = 0; i != n; ++i) if(ISNAN(px[i])) px[i] = pr[i]; } } else { setcopyvLOOP(pr[i]) } } } else { const int *restrict pv = INTEGER(val); // ALTREP(val) ? (const int *)ALTVEC_DATAPTR(val) : if(lr == 1) { const double r = asReal(rep); setcopyvLOOPLVEC1 } else { const double *restrict pr = REAL(rep); setcopyvLOOPLVEC } } break; } case STRSXP: { SEXP *restrict px = set ? SEXPPTR(x) : SEXPPTR(ans); if(lv == 1 && ind1 == 0) { const SEXP v = PROTECT(asChar(val)); if(lr == 1) { const SEXP r = PROTECT(asChar(rep)); setcopyvLOOP(r) UNPROTECT(1); } else { const SEXP *restrict pr = SEXPPTR_RO(rep); setcopyvLOOP(pr[i]) } UNPROTECT(1); } else { const int *restrict pv = INTEGER(val); // ALTREP(val) ? (const int *)ALTVEC_DATAPTR(val) : if(lr == 1) { const SEXP r = PROTECT(asChar(rep)); setcopyvLOOPLVEC1 UNPROTECT(1); } else { const SEXP *restrict pr = SEXPPTR_RO(rep); setcopyvLOOPLVEC } } break; } case VECSXP: { if(set && ALTREP(x)) error("cannot modify ALTREP list by reference"); SEXP *restrict px = set ? SEXPPTR(x) : SEXPPTR(ans); if(lv == 1 && ind1 == 0) error("Cannot compare lists to a value"); // if(tr != VECSXP) error("If X is a list and xlist = TRUE, R also needs to be a list"); const int *restrict pv = INTEGER(val); // ALTREP(val) ? (const int *)ALTVEC_DATAPTR(val) : if(lr == 1) { const SEXP r = VECTOR_ELT(rep, 0); setcopyvLOOPLVEC1 } else { const SEXP *restrict pr = SEXPPTR_RO(rep); setcopyvLOOPLVEC } break; } case RAWSXP: { Rbyte *restrict px = set ? RAW(x) : RAW(ans); if(lv == 1 && ind1 == 0) { const Rbyte v = RAW(val)[0]; if(lr == 1) { const Rbyte r = RAW(rep)[0]; setcopyvLOOP(r) } else { const Rbyte *restrict pr = RAW(rep); setcopyvLOOP(pr[i]) } } else { const int *restrict pv = INTEGER(val); // ALTREP(val) ? (const int *)ALTVEC_DATAPTR(val) : if(lr == 1) { const Rbyte r = RAW(rep)[0]; setcopyvLOOPLVEC1 } else { const Rbyte *restrict pr = RAW(rep); setcopyvLOOPLVEC } } break; } default: error("Unsupported type '%s' passed to setv() / copyv()", type2char(tx)); } UNPROTECT(nprotect); if(set == 0) return(ans); return(x); } SEXP setop_core(SEXP x, SEXP val, SEXP op, SEXP roww) { int n = length(x), nv = length(val), o = asInteger(op), tx = TYPEOF(x); #define OPSWITCH(e) \ switch(o) { \ case 1: \ _Pragma("omp simd") \ for(int i = 0; i < n; ++i) px[i] += e; \ break; \ case 2: \ _Pragma("omp simd") \ for(int i = 0; i < n; ++i) px[i] -= e; \ break; \ case 3: \ _Pragma("omp simd") \ for(int i = 0; i < n; ++i) px[i] *= e; \ break; \ case 4: \ _Pragma("omp simd") \ for(int i = 0; i < n; ++i) px[i] /= e; \ break; \ default: error("unsupported operation"); \ } if(nv == 1 || nv == n) { switch(tx) { case INTSXP: case LGLSXP: { int *px = INTEGER(x); if(nv == 1) { const int v = asInteger(val); OPSWITCH(v) } else { if(TYPEOF(val) == REALSXP) { // warning("adding real values to an integer: will truncate decimals"); const double *v = REAL(val); OPSWITCH(v[i]) } else { const int *v = INTEGER(val); OPSWITCH(v[i]) } } break; } case REALSXP: { double *px = REAL(x); if(nv == 1) { const double v = asReal(val); OPSWITCH(v) } else { if(TYPEOF(val) == REALSXP) { const double *v = REAL(val); OPSWITCH(v[i]) } else { const int *v = INTEGER(val); OPSWITCH(v[i]) } } break; } default: error("Unsupported type '%s'", type2char(tx)); } } else { if(!isMatrix(x)) error("unequal argument lengths"); int nr = nrows(x), nc = n / nr, rwl = asLogical(roww); if((rwl == 0 && nr != nv) || (rwl && nc != nv)) error("length of vector must match matrix rows/columns or the size of the matrix itself"); #define OPSWITCHMAT(e) \ switch(o) { \ case 1: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ _Pragma("omp simd") \ for(int i = 0; i < nr; ++i) px[cj + i] += e; \ } \ break; \ case 2: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ _Pragma("omp simd") \ for(int i = 0; i < nr; ++i) px[cj + i] -= e; \ } \ break; \ case 3: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ _Pragma("omp simd") \ for(int i = 0; i < nr; ++i) px[cj + i] *= e; \ } \ break; \ case 4: for(int j = 0, cj; j != nc; ++j) { \ cj = j * nr; \ _Pragma("omp simd") \ for(int i = 0; i < nr; ++i) px[cj + i] /= e; \ } \ break; \ default: error("unsupported operation"); \ } switch(tx) { case INTSXP: case LGLSXP: { int *px = INTEGER(x); if(TYPEOF(val) == REALSXP) { // warning("adding real values to an integer: will truncate decimals"); const double *v = REAL(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } else { const int *v = INTEGER(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } break; } case REALSXP: { double *px = REAL(x); if(TYPEOF(val) == REALSXP) { const double *v = REAL(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } else { const int *v = INTEGER(val); if(rwl) { OPSWITCHMAT(v[j]) } else { OPSWITCHMAT(v[i]) } } break; } default: error("Unsupported type '%s'", type2char(tx)); } } return(x); } SEXP setop(SEXP x, SEXP val, SEXP op, SEXP roww) { // IF x is a list, call function repeatedly.. if(TYPEOF(x) == VECSXP) { const SEXP *px = SEXPPTR_RO(x); int lx = length(x); if(TYPEOF(val) == VECSXP) { // val is list: must match length(x) const SEXP *pv = SEXPPTR_RO(val); if(lx != length(val)) error("length(X) must match length(V)"); for(int i = 0; i != lx; ++i) setop_core(px[i], pv[i], op, roww); } else if (length(val) == 1 || asLogical(roww) == 0) { // val is a scalar or vector but rowwise = FALSE for(int i = 0; i != lx; ++i) setop_core(px[i], val, op, roww); } else { // val is a numeric or logical vector to be applied rowwise if(lx != length(val)) error("length(X) must match length(V)"); switch(TYPEOF(val)) { case REALSXP: { double *pv = REAL(val); for(int i = 0; i != lx; ++i) { setop_core(px[i], PROTECT(ScalarReal(pv[i])), op, roww); UNPROTECT(1); } break; } case INTSXP: case LGLSXP: { int *pv = INTEGER(val); for(int i = 0; i != lx; ++i) { setop_core(px[i], PROTECT(ScalarInteger(pv[i])), op, roww); UNPROTECT(1); } break; } default: error("Unsupported type '%s'", type2char(TYPEOF(val))); } } return x; } return setop_core(x, val, op, roww); } SEXP replace_outliers(SEXP x, SEXP limits, SEXP value, SEXP single_limit, SEXP set) { const int ll = length(limits), sl = asInteger(single_limit), l = length(x), setl = asLogical(set); int nprotect = setl == 0; if(ll != 1 && ll != 2) error("'limits' must be length 1 or 2. You supplied limits length %d", ll); int clip = 0; if(TYPEOF(value) == STRSXP && strcmp(CHAR(STRING_ELT(value, 0)), "clip") == 0) { value = limits; clip = 1; } SEXP res = setl ? x : PROTECT(allocVector(TYPEOF(x), l)); switch(TYPEOF(x)) { case INTSXP: { if(TYPEOF(limits) != INTSXP) { PROTECT(limits = coerceVector(limits, INTSXP)); ++nprotect; } int *px = INTEGER(x), *pres = INTEGER(res), val = asInteger(value); if(ll == 1) { if(sl == 2 || sl == 3) { int l1 = INTEGER(limits)[0]; if(sl == 2) { // minimum #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] < l1 && px[i] != NA_INTEGER ? val : px[i]; } else { // maximum #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] > l1 ? val : px[i]; } } } else { // two-sided int l1 = INTEGER(limits)[0], l2 = INTEGER(limits)[1]; if(clip) { #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] > l2 ? l2 : px[i] < l1 && px[i] != NA_INTEGER ? l1 : px[i]; } else { #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] > l2 || (px[i] < l1 && px[i] != NA_INTEGER) ? val : px[i]; } } break; } case REALSXP: { if(TYPEOF(limits) != REALSXP) { PROTECT(limits = coerceVector(limits, REALSXP)); ++nprotect; } double *px = REAL(x), *pres = REAL(res), val = asReal(value); if(ll == 1) { if(sl == 2 || sl == 3) { double l1 = REAL(limits)[0]; if(sl == 2) { // minimum #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] < l1 ? val : px[i]; } else { // maximum #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] > l1 ? val : px[i]; } } } else { // two-sided double l1 = REAL(limits)[0], l2 = REAL(limits)[1]; if(clip) { #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] > l2 ? l2 : px[i] < l1 ? l1 : px[i]; } else { #pragma omp simd for (int i = 0; i < l; ++i) pres[i] = px[i] > l2 || px[i] < l1 ? val : px[i]; } } break; } default: error("Unsupported type '%s'", type2char(TYPEOF(x))); } if(setl == 0) SHALLOW_DUPLICATE_ATTRIB(res, x); UNPROTECT(nprotect); return res; } SEXP na_locf(SEXP x, SEXP Rset) { int n = length(x), copy = asLogical(Rset) == 0; if(isMatrix(x)) warning("na_locf() does not (yet) have explicit support for matrices, i.e., it treats a matrix as a single vector. Use dapply(M, na_locf) if column-wise processing is desired"); if(copy) x = PROTECT(shallow_duplicate(x)); switch (TYPEOF(x)) { case INTSXP: case LGLSXP: { int *data = INTEGER(x); int last = data[0]; for (int i = 0; i < n; i++) { if (data[i] == NA_INTEGER) { data[i] = last; } else { last = data[i]; } } break; } case REALSXP: { double *data = REAL(x); double last = data[0]; for (int i = 0; i < n; i++) { if (ISNAN(data[i])) { data[i] = last; } else { last = data[i]; } } break; } case STRSXP: { SEXP *data = SEXPPTR(x); SEXP last = data[0]; for (int i = 0; i < n; i++) { if (data[i] == NA_STRING) { data[i] = last; } else { last = data[i]; } } break; } case VECSXP: { const SEXP *data = SEXPPTR_RO(x); SEXP last = data[0]; for (int i = 0; i < n; i++) { if (length(data[i]) == 0) { SET_VECTOR_ELT(x, i, last); } else { last = data[i]; } } break; } default: error("na_locf() does not support type '%s'", type2char(TYPEOF(x))); } UNPROTECT(copy); return x; } SEXP na_focb(SEXP x, SEXP Rset) { int n = length(x), copy = asLogical(Rset) == 0; if(isMatrix(x)) warning("na_focb() does not (yet) have explicit support for matrices, i.e., it treats a matrix as a single vector. Use dapply(M, na_focb) if column-wise processing is desired"); if(copy) x = PROTECT(shallow_duplicate(x)); switch (TYPEOF(x)) { case INTSXP: case LGLSXP: { int *data = INTEGER(x); int last = data[0]; for (int i = n; i--; ) { if (data[i] == NA_INTEGER) { data[i] = last; } else { last = data[i]; } } break; } case REALSXP: { double *data = REAL(x); double last = data[0]; for (int i = n; i--; ) { if (ISNAN(data[i])) { data[i] = last; } else { last = data[i]; } } break; } case STRSXP: { SEXP *data = SEXPPTR(x); SEXP last = data[0]; for (int i = n; i--; ) { if (data[i] == NA_STRING) { data[i] = last; } else { last = data[i]; } } break; } case VECSXP: { const SEXP *data = SEXPPTR_RO(x); SEXP last = data[0]; for (int i = n; i--; ) { if (length(data[i]) == 0) { SET_VECTOR_ELT(x, i, last); } else { last = data[i]; } } break; } default: error("na_focb() does not support type '%s'", type2char(TYPEOF(x))); } UNPROTECT(copy); return x; } SEXP vtypes(SEXP x, SEXP isnum) { int tx = TYPEOF(x); if(tx != VECSXP) return ScalarInteger(tx); const SEXP *px = SEXPPTR_RO(x); // This is ok, even if x contains ALTREP objects.. int n = length(x); SEXP ans = PROTECT(allocVector(INTSXP, n)); int *pans = INTEGER(ans); switch(asInteger(isnum)) { case 0: for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) + 1; break; case 1: // Numeric variables: do_is with op = 100: https://github.com/wch/r-source/blob/2b0818a47199a0b64b6aa9b9f0e53a1e886e8e95/src/main/coerce.c // See also DispatchOrEval in https://github.com/wch/r-source/blob/trunk/src/main/eval.c { for(int i = 0, tci, tnum; i != n; ++i) { // pans[i] = isNumeric(px[i]) && !isLogical(px[i]); // Date is numeric, from: https://github.com/wch/r-source/blob/2b0818a47199a0b64b6aa9b9f0e53a1e886e8e95/src/main/coerce.c tci = TYPEOF(px[i]); tnum = tci == INTSXP || tci == REALSXP; if(tnum && isObject(px[i])) tnum = !(inherits(px[i], "factor") || inherits(px[i], "Date") || inherits(px[i], "POSIXct") || inherits(px[i], "yearmon") || inherits(px[i], "yearqtr")); pans[i] = tnum; } SETTOF(ans, LGLSXP); break; } case 2: // is.factor for(int i = 0; i != n; ++i) pans[i] = (int)isFactor(px[i]); SETTOF(ans, LGLSXP); break; case 3: // is.list, needed for list processing functions for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) == VECSXP; SETTOF(ans, LGLSXP); break; case 4: // is.sublist, needed for list processing functions for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) == VECSXP && !inherits(px[i], "data.frame"); SETTOF(ans, LGLSXP); break; case 7: // is.atomic(x), needed in atomic_elem() // is.atomic: do_is with op = 200: https://github.com/wch/r-source/blob/9f9033e193071f256e21a181cb053cba983ed4a9/src/main/coerce.c for(int i = 0; i != n; ++i) { switch(TYPEOF(px[i])) { case NILSXP: /* NULL is atomic (S compatibly), but not in isVectorAtomic(.) */ case CHARSXP: case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: pans[i] = 1; break; default: pans[i] = 0; } } SETTOF(ans, LGLSXP); break; case 5: // is.atomic(x) || is.list(x), needed in reg_elem() and irreg_elem() for(int i = 0; i != n; ++i) { switch(TYPEOF(px[i])) { case VECSXP: pans[i] = 1; break; case NILSXP: /* NULL is atomic (S compatibly), but not in isVectorAtomic(.) */ case CHARSXP: case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: pans[i] = 1; break; default: pans[i] = 0; } } SETTOF(ans, LGLSXP); break; case 6: // Faster object type identification, needed in unlist2d: // idf <- function(x) if(inherits(x, "data.frame")) 2L else if (!length(x)) 1L else 3L*is.atomic(x) for(int i = 0; i != n; ++i) { if(length(px[i]) == 0) pans[i] = 1; else switch(TYPEOF(px[i])) { case VECSXP: pans[i] = inherits(px[i], "data.frame") ? 2 : 0; break; case NILSXP: /* NULL is atomic (S compatibly), but not in isVectorAtomic(.) */ case CHARSXP: case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: pans[i] = 3; break; default: pans[i] = 0; } } break; default: error("Unsupported vtypes option"); } UNPROTECT(1); return ans; } SEXP vlengths(SEXP x, SEXP usenam) { // if(TYPEOF(x) != VECSXP && TYPEOF(x) != STRSXP) return ScalarInteger(length(x)); int n = length(x); SEXP ans = PROTECT(allocVector(INTSXP, n)); int *pans = INTEGER(ans); if(TYPEOF(x) == VECSXP || TYPEOF(x) == STRSXP) { const SEXP *px = SEXPPTR_RO(x); for(int i = 0; i != n; ++i) pans[i] = length(px[i]); } else { for(int i = 0; i != n; ++i) pans[i] = 1; } if(asLogical(usenam)) { SEXP nam = getAttrib(x, R_NamesSymbol); if(TYPEOF(nam) != NILSXP) namesgets(ans, nam); } UNPROTECT(1); return ans; } // faster version of base::range, which calls both min() and max() SEXP frange(SEXP x, SEXP Rnarm, SEXP Rfinite) { int l = length(x), narm = asLogical(Rnarm), finite = asLogical(Rfinite), tx = TYPEOF(x); SEXP out = PROTECT(allocVector(tx, 2)); switch(tx) { case INTSXP: case LGLSXP: { if(l < 1) { INTEGER(out)[0] = INTEGER(out)[1] = NA_INTEGER; break; } int min, max, tmp, *px = INTEGER(x); if(narm) { int j = l-1; while(px[j] == NA_INTEGER && j!=0) --j; min = max = px[j]; if(j != 0) for(int i = j; i--; ) { tmp = px[i]; if(tmp == NA_INTEGER) continue; if(min > tmp) min = tmp; if(max < tmp) max = tmp; } } else { min = max = px[0]; for(int i = 0; i != l; ++i) { tmp = px[i]; if(tmp == NA_INTEGER) { min = max = tmp; break; } else { if(min > tmp) min = tmp; if(max < tmp) max = tmp; } } } INTEGER(out)[0] = min; INTEGER(out)[1] = max; break; } case REALSXP: { if(l < 1) { REAL(out)[0] = REAL(out)[1] = NA_REAL; break; } double min, max, tmp, *px = REAL(x); if(narm || finite) { int j = l-1; if(finite) while(!R_FINITE(px[j]) && j!=0) --j; else while(ISNAN(px[j]) && j!=0) --j; min = max = px[j]; if(j != 0) { if(finite) { for(int i = j; i--; ) { tmp = px[i]; if(min > tmp && tmp > R_NegInf) min = tmp; if(max < tmp && tmp < R_PosInf) max = tmp; } } else { for(int i = j; i--; ) { tmp = px[i]; if(min > tmp) min = tmp; if(max < tmp) max = tmp; } } } } else { min = max = px[0]; for(int i = 0; i != l; ++i) { tmp = px[i]; if(ISNAN(tmp)) { min = max = tmp; break; } else { if(min > tmp) min = tmp; if(max < tmp) max = tmp; } } } REAL(out)[0] = min; REAL(out)[1] = max; break; } default: error("Unsupported SEXP type: %s", type2char(tx)); } copyMostAttrib(x, out); UNPROTECT(1); return out; } // faster distance matrices // base R's version: https://github.com/wch/r-source/blob/79298c499218846d14500255efd622b5021c10ec/src/library/stats/src/distance.c SEXP fdist(SEXP x, SEXP vec, SEXP Rret, SEXP Rnthreads) { SEXP dim = getAttrib(x, R_DimSymbol); int nrow, ncol, ret, nullv = isNull(vec), nthreads = asInteger(Rnthreads), nprotect = 1; if(nthreads > max_threads) nthreads = max_threads; if(TYPEOF(dim) != INTSXP) { nrow = 1; ncol= length(x); } else { nrow = INTEGER(dim)[0]; ncol= INTEGER(dim)[1]; } if(TYPEOF(x) != REALSXP) { x = PROTECT(coerceVector(x, REALSXP)); ++nprotect; } if(TYPEOF(Rret) == STRSXP) { const char *r = CHAR(STRING_ELT(Rret, 0)); if(strcmp(r, "euclidean") == 0) ret = 1; else if(strcmp(r, "euclidean_squared") == 0) ret = 2; else error("Unsupported method: %s", r); } else { ret = asInteger(Rret); if(ret < 1 || ret > 2) error("method must be 1 ('euclidean') or 2 ('euclidean_squared')"); } size_t l = nrow; if(nullv) { // Full distance matrix if(nrow <= 1) error("If v is left empty, x needs to be a matrix with at least 2 rows"); l = ((double)nrow / 2) * (nrow - 1); } else if(length(vec) != ncol) error("length(v) must match ncol(x)"); SEXP res = PROTECT(allocVector(REALSXP, l)); double *px = REAL(x), *pres = REAL(res); memset(pres, 0, sizeof(double) * l); // '\0' if(nullv) { // Full distance matrix if(nthreads > 1) { if(nthreads > nrow-1) nthreads = nrow-1; #pragma omp parallel for num_threads(nthreads) for(int k = 1; k < nrow; ++k) { // Row vectors to compute distances with int nmk = nrow - k; double *presk = pres + l - nmk*(nmk+1)/2, // https://en.wikipedia.org/wiki/1_%2B_2_%2B_3_%2B_4_%2B_%E2%8B%AF *pxj = px + k, v; for(int j = 0; j != ncol; ++j) { // Elements of the row vector at hand v = pxj[-1]; #pragma omp simd for(int i = 0; i < nmk; ++i) { // All remaining rows to compute the distance to double tmp = pxj[i] - v; presk[i] += tmp * tmp; } pxj += nrow; } } } else { double *presk = pres, *pxj, v; for(int k = 1, nmk = nrow; k != nrow; ++k) { // Row vectors to compute distances with pxj = px + k; --nmk; for(int j = 0; j != ncol; ++j) { // Elements of the row vector at hand v = pxj[-1]; #pragma omp simd for(int i = 0; i < nmk; ++i) { // All remaining rows to compute the distance to double tmp = pxj[i] - v; presk[i] += tmp * tmp; } pxj += nrow; } presk += nmk; } } } else { // Only a single vector if(TYPEOF(vec) != REALSXP) { vec = PROTECT(coerceVector(vec, REALSXP)); ++nprotect; } double *pv = REAL(vec); if(nrow > 1) { // x is a matrix if(nthreads > 1) { if(nthreads > nrow) nthreads = nrow; for (int j = 0; j < ncol; ++j) { double *pxj = px + j * nrow, v = pv[j]; #pragma omp parallel for simd num_threads(nthreads) for (int i = 0; i < nrow; ++i) { double tmp = pxj[i] - v; pres[i] += tmp * tmp; } } } else { for (int j = 0; j != ncol; ++j) { double *pxj = px + j * nrow, v = pv[j]; #pragma omp simd for (int i = 0; i < nrow; ++i) { double tmp = pxj[i] - v; pres[i] += tmp * tmp; } } } } else { // x is a vector double dres = 0.0; if(nthreads > 1) { if(nthreads > ncol) nthreads = ncol; #pragma omp parallel for num_threads(nthreads) reduction(+:dres) for (int i = 0; i < ncol; ++i) { double tmp = px[i] - pv[i]; dres += tmp * tmp; } } else { #pragma omp simd reduction(+:dres) for (int i = 0; i < ncol; ++i) { double tmp = px[i] - pv[i]; dres += tmp * tmp; } } pres[0] = ret == 1 ? sqrt(dres) : dres; ret = 2; // ensures we avoid the square root loop below } } // Square Root if(ret == 1) { if(nthreads > 1) { #pragma omp parallel for simd num_threads(nthreads) for (size_t i = 0; i < l; ++i) pres[i] = sqrt(pres[i]); } else { #pragma omp simd for (size_t i = 0; i < l; ++i) pres[i] = sqrt(pres[i]); } } if(nullv) { // Full distance matrix object // First creating symbols to avoid protect errors: https://blog.r-project.org/2019/04/18/common-protect-errors/ SEXP sym_Size = install("Size"), sym_Labels = install("Labels"), sym_Diag = install("Diag"), sym_Upper = install("Upper"), sym_method = install("method"); setAttrib(res, sym_Size, ScalarInteger(nrow)); SEXP dn = getAttrib(x, R_DimNamesSymbol); if(TYPEOF(dn) == VECSXP && length(dn)) setAttrib(res, sym_Labels, VECTOR_ELT(dn, 0)); setAttrib(res, sym_Diag, ScalarLogical(0)); setAttrib(res, sym_Upper, ScalarLogical(0)); setAttrib(res, sym_method, mkString(ret == 1 ? "euclidean" : "euclidean_squared")); // Note: Missing "call" attribute classgets(res, mkString("dist")); } UNPROTECT(nprotect); return res; } collapse/src/flast.c0000644000176200001440000002610715122343124014123 0ustar liggesusers#include "collapse_c.h" SEXP flast_impl(SEXP x, int ng, SEXP g, int narm, int *gl) { int l = length(x), tx = TYPEOF(x); if (l < 2) return x; // Prevents seqfault for numeric(0) #101 if (ng == 0) { SEXP out = PROTECT(allocVector(tx, 1)); int j = l-1; if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x); while(ISNAN(px[j]) && j != 0) --j; REAL(out)[0] = px[j]; break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); while(px[j] == NA_STRING && j != 0) --j; SET_STRING_ELT(out, 0, px[j]); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); while(px[j] == NA_INTEGER && j != 0) --j; INTEGER(out)[0] = px[j]; break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); while(length(px[j]) == 0 && j != 0) --j; SET_VECTOR_ELT(out, 0, px[j]); break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: REAL(out)[0] = REAL(x)[l-1]; break; case STRSXP: SET_STRING_ELT(out, 0, STRING_ELT(x, l-1)); break; case INTSXP: case LGLSXP: INTEGER(out)[0] = INTEGER(x)[l-1]; break; case VECSXP: SET_VECTOR_ELT(out, 0, VECTOR_ELT(x, l-1)); break; default: error("Unsupported SEXP type!"); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); if(!isNull(getAttrib(x, R_NamesSymbol))) namesgets(out, ScalarString(STRING_ELT(getAttrib(x, R_NamesSymbol), j))); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng)); if(narm) { int ngs = 0, *pg = INTEGER(g); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = NA_REAL; --pout; for(int i = l; i--; ) { if(NISNAN(px[i])) { if(ISNAN(pout[pg[i]])) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = NA_STRING; --pout; for(int i = l; i--; ) { if(px[i] != NA_STRING) { if(pout[pg[i]] == NA_STRING) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l; i--; ) { if(px[i] != NA_INTEGER) { if(pout[pg[i]] == NA_INTEGER) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = R_NilValue; --pout; for(int i = l; i--; ) { if(length(px[i])) { if(pout[pg[i]] == R_NilValue) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? NA_REAL : px[gl[i]]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? NA_INTEGER : px[gl[i]]; break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? NA_STRING : px[gl[i]]; break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? R_NilValue : px[gl[i]]; break; } default: error("Unsupported SEXP type!"); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); UNPROTECT(1); return out; } } SEXP flastC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm); if(ng == 0 || narm) { pgl = &ng; return flast_impl(x, ng, g, narm, pgl); } SEXP gl = PROTECT(allocVector(INTSXP, ng)); int *pg = INTEGER(g); pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = length(g); i--; ) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; SEXP res = flast_impl(x, ng, g, narm, ++pgl); UNPROTECT(1); return res; } SEXP flastlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm), nprotect = 1; if(ng > 0 && !narm) { SEXP gl = PROTECT(allocVector(INTSXP, ng)); ++nprotect; int *pg = INTEGER(g); pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = length(g); i--; ) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; ++pgl; } else pgl = &l; SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, flast_impl(px[j], ng, g, narm, pgl)); DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } // For matrix writing a separate function to increase efficiency. SEXP flastmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), l = INTEGER(dim)[0], col = INTEGER(dim)[1]; if (l < 2) return x; if (ng == 0) { SEXP out = PROTECT(allocVector(tx, col)); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0, i = l-1; j != col; ++j) { while(ISNAN(px[i]) && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0, i = l-1; j != col; ++j) { while(px[i] == NA_STRING && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0, i = l-1; j != col; ++j) { while(px[i] == NA_INTEGER && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0, i = l-1; j != col; ++j) { while(length(px[i]) == 0 && i != 0) --i; pout[j] = px[i]; px += l; i = l-1; } break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l + l-1]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l + l-1]; break; } case STRSXP: case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l + l-1]; break; } default: error("Unsupported SEXP type!"); } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng * col)); int *pg = INTEGER(g); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng * col; i--; ) pout[i] = NA_REAL; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(NISNAN(px[i]) && ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng * col; i--; ) pout[i] = NA_STRING; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(px[i] != NA_STRING && pout[pg[i]] == NA_STRING) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng * col; i--; ) pout[i] = NA_INTEGER; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(px[i] != NA_INTEGER && pout[pg[i]] == NA_INTEGER) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng * col; i--; ) pout[i] = R_NilValue; --pout; for(int j = 0; j != col; ++j) { for(int i = l; i--; ) if(length(px[i]) && pout[pg[i]] != R_NilValue) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } } else { SEXP gl = PROTECT(allocVector(INTSXP, ng)); int *pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = l; i--; ) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i; ++pgl; switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? NA_REAL : px[pgl[i]]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? NA_INTEGER : px[pgl[i]]; px += l; pout += ng; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? NA_STRING : px[pgl[i]]; px += l; pout += ng; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? R_NilValue : px[pgl[i]]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } UNPROTECT(1); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } } collapse/src/TRA.c0000644000176200001440000016351014777170131013453 0ustar liggesusers#include "collapse_c.h" // Cases: // 0- replace_na (only replace missing values) // 1- replace // 2- replace with NA rm // 3- demean // 4- demean with global mean added // 5- Proportion // 6- Percentages // 7- Add // 8- Multiply // 9- Modulus // 10- Subtract Modulus // int(x * (1/y)) -> This gave the UBSAN error if NaN !!! #pragma omp declare simd static inline double modulus_impl(double x, double y) { double z = x * (1/y); return (z == z) ? x - (int)(z) * y : z; // faster than x - (int)(x/y) * y; } // #define modulus_impl(x, y) (x - ((int)(x/y) * y)) // Macro: not faster ! // template // constexpr double modulus_impl (T x, U mod) // { // return !mod ? x : x - mod * static_cast(x / mod); // } // int(x * (1/y)) -> This gave the UBSAN error if NaN !!! #pragma omp declare simd static inline double remainder_impl(double x, double y) { double z = x * (1/y); return (z == z) ? (int)(z) * y : z; // (int)(x * (1/y)) * y; <- This would be enough, but doesn't keep missing values in x! } int TtI(SEXP x) { if(TYPEOF(x) != STRSXP) error("FUN must be integer or character"); const char * r = CHAR(STRING_ELT(x, 0)); // translateCharUTF8() if(strcmp(r, "replace_na") == 0) return 0; if(strcmp(r, "na") == 0) return 0; if(strcmp(r, "replace_fill") == 0) return 1; if(strcmp(r, "fill") == 0) return 1; if(strcmp(r, "replace") == 0) return 2; if(strcmp(r, "-") == 0) return 3; if(strcmp(r, "-+") == 0) return 4; if(strcmp(r, "/") == 0) return 5; if(strcmp(r, "%") == 0) return 6; if(strcmp(r, "+") == 0) return 7; if(strcmp(r, "*") == 0) return 8; if(strcmp(r, "%%") == 0) return 9; if(strcmp(r, "-%%") == 0) return 10; if(strcmp(r, "replace_NA") == 0) return 0; if(strcmp(r, "NA") == 0) return 0; if(strcmp(r, "REPLACE_NA") == 0) return 0; if(strcmp(r, "REPLACE_FILL") == 0) return 1; if(strcmp(r, "FILL") == 0) return 1; if(strcmp(r, "REPLACE") == 0) return 2; error("Unknown transformation: %s", r); } SEXP ret1(SEXP x, SEXP xAG, SEXP g, int set) { int tx = TYPEOF(x), txAG = TYPEOF(xAG), l = length(x), gs = length(g); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 int *pg = &l, nog = gs <= 1; if(nog) { if(length(xAG) != 1) error("If g = NULL, NROW(STATS) needs to be 1"); } else { if(TYPEOF(g) != INTSXP) error("g must be integer typed, please report this as g should have been internally grouped"); if(gs != l) error("length(g) must match NROW(x)"); pg = INTEGER(g); } if(set && txAG != tx) error("if set = TRUE with option 'replace_fill', x and STATS need to have identical data types"); SEXP out = set == 0 ? PROTECT(allocVector(txAG, l)) : x; switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = asReal(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG; } else { double *AG = REAL(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG[pg[i]]; } break; } case INTSXP: case LGLSXP: { int *pout = INTEGER(out); if(nog) { int AG = asInteger(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG; } else { int *AG = INTEGER(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG[pg[i]]; } break; } case CPLXSXP: { Rcomplex *pout = COMPLEX(out); if(nog) { Rcomplex AG = asComplex(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG; } else { Rcomplex *AG = COMPLEX(xAG)-1; for(int i = 0; i < l; ++i) pout[i] = AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); if(nog) { SEXP AG = asChar(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG; } else { const SEXP *AG = SEXPPTR_RO(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG[pg[i]]; } break; } case VECSXP: { SEXP *pout = SEXPPTR(out); if(nog) { for(int i = 0; i < l; ++i) pout[i] = xAG; } else { const SEXP *AG = SEXPPTR_RO(xAG)-1; for(int i = 0; i < l; ++i) pout[i] = AG[pg[i]]; } break; } case RAWSXP: { Rbyte *pout = RAW(out); if(nog) { Rbyte AG = RAW_ELT(xAG, 0); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG; } else { Rbyte *AG = RAW(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = AG[pg[i]]; } break; } default: error("Not supported SEXP type!"); } // Attribute Handling - 4 Situations: // 1 - x is classed (factor, date, time series), xAG is not classed. i.e. vector of fnobs, fmean etc. // -> Sallow replacing, removing class and levels attributes from x, discard attributes of xAG (if any) // -> or (if type matches i.e. double for date or time series), copy attributes of x unless x is a factor // 2 - x is not classed, xAG is classed (factor, date, time series). - an unusual situation should not occur - copy attributes of xAG, discard attributes of x // 3 - xAG and x are classed - same as above, keep attributes of xAG, discard attributes of x // 4 - neither x nor xAG are classed - preserve attributes of x, discard attributes of xAG (if any) // if(set == 0) { if(isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); else if(!isObject(x) || (tx == txAG && !isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); else { SHALLOW_DUPLICATE_ATTRIB(out, x); classgets(out, R_NilValue); // OK ! setAttrib(out, R_LevelsSymbol, R_NilValue); // if(isFactor(x)) ? faster ? } UNPROTECT(1); } return out; } SEXP ret2(SEXP x, SEXP xAG, SEXP g, int set) { int l = length(x), gs = length(g), tx = TYPEOF(x), txAG = TYPEOF(xAG); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 int *pg = &l, nog = gs <= 1; if(nog) { if(length(xAG) != 1) error("If g = NULL, NROW(STATS) needs to be 1"); } else { if(TYPEOF(g) != INTSXP) error("g must be integer typed, please report this as g should have been internally grouped"); if(gs != l) error("length(g) must match NROW(x)"); pg = INTEGER(g); // Wmaybe uninitialized } if(set && txAG != tx) error("if set = TRUE with option 'replace', x and STATS need to have identical data types"); SEXP out = set == 0 ? PROTECT(allocVector(txAG, l)) : x; switch(tx) { case REALSXP: { double *px = REAL(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = asReal(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? NA_REAL : AG[pg[i]]; } break; } case LGLSXP: case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = asInteger(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); if(nog) { SEXP AG = asChar(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? NA_STRING : AG; } else { const SEXP *AG = SEXPPTR_RO(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? NA_STRING : AG[pg[i]]; } break; } default: error("Not supported SEXP type!"); } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = asReal(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_REAL : AG[pg[i]]; } break; } case LGLSXP: case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = asInteger(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); if(nog) { SEXP AG = asChar(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_STRING : AG; } else { const SEXP *AG = SEXPPTR_RO(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_STRING : AG[pg[i]]; } break; } default: error("Not supported SEXP type!"); } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); switch(txAG) { case REALSXP: { double *pout = REAL(out); if(nog) { double AG = asReal(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? NA_REAL : AG; } else { double *AG = REAL(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? NA_REAL : AG[pg[i]]; } break; } case LGLSXP: case INTSXP: { int *pout = INTEGER(out); if(nog) { int AG = asInteger(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? NA_INTEGER : AG; } else { int *AG = INTEGER(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? NA_INTEGER : AG[pg[i]]; } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); if(nog) { SEXP AG = asChar(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? NA_STRING : AG; } else { const SEXP *AG = SEXPPTR_RO(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? NA_STRING : AG[pg[i]]; } break; } default: error("Not supported SEXP type!"); } break; } default: error("Not supported SEXP type!"); } if(set == 0) { if(isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); else if(!isObject(x) || (tx == txAG && !isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); else { SHALLOW_DUPLICATE_ATTRIB(out, x); classgets(out, R_NilValue); // OK ! setAttrib(out, R_LevelsSymbol, R_NilValue); } UNPROTECT(1); } return out; } // New: Option "replace_NA" SEXP ret0(SEXP x, SEXP xAG, SEXP g, int set) { int l = length(x), gs = length(g), tx = TYPEOF(x), txAG = TYPEOF(xAG); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 int *pg = &l, nog = gs <= 1; if(nog) { if(length(xAG) != 1) error("If g = NULL, NROW(STATS) needs to be 1"); } else { if(TYPEOF(g) != INTSXP) error("g must be integer typed, please report this as g should have been internally grouped"); if(gs != l) error("length(g) must match NROW(x)"); pg = INTEGER(g); // Wmaybe uninitialized } SEXP out = set == 0 ? PROTECT(allocVector(tx, l)) : x; switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); if(nog) { if(txAG != REALSXP && txAG != INTSXP && txAG != LGLSXP) error("STATS needs to be numeric to replace NA's in numeric data!"); double AG = asReal(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? AG : px[i]; } else { switch(txAG) { case REALSXP: { double *AG = REAL(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? AG[pg[i]] : px[i]; break; } case LGLSXP: case INTSXP: { int *AG = INTEGER(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = ISNAN(px[i]) ? AG[pg[i]] : px[i]; break; } case STRSXP: error("Cannot replace missing values in double with a string"); default: error("Not supported SEXP type!"); } } break; } case LGLSXP: case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); if(nog) { if(txAG != REALSXP && txAG != INTSXP && txAG != LGLSXP) error("STATS needs to be numeric to replace NA's in numeric data!"); int AG = asInteger(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? AG : px[i]; } else { switch(txAG) { case REALSXP: { double *AG = REAL(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? AG[pg[i]] : px[i]; break; } case LGLSXP: case INTSXP: { int *AG = INTEGER(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_INTEGER) ? AG[pg[i]] : px[i]; break; } case STRSXP: error("Cannot replace missing values in integer with a string"); default: error("Not supported SEXP type!"); } } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); if(nog) { SEXP AG = asChar(xAG); #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? AG : px[i]; } else { switch(txAG) { case REALSXP: case LGLSXP: case INTSXP: error("Cannot replace missing values in string with numeric data"); case STRSXP: { const SEXP *AG = SEXPPTR_RO(xAG)-1; #pragma omp simd for(int i = 0; i < l; ++i) pout[i] = (px[i] == NA_STRING) ? AG[pg[i]] : px[i]; break; } default: error("Not supported SEXP type!"); } } break; } default: error("Not supported SEXP type!"); } if(set == 0) { SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); } return out; } // TODO: allow integer input ?? SEXP retoth(SEXP x, SEXP xAG, SEXP g, int ret, int set) { int gs = length(g), l = length(x), txAG = TYPEOF(xAG); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 SEXP out = set == 0 ? PROTECT(allocVector(REALSXP, l)) : x; if(gs <= 1) { if(length(xAG) != 1) error("If g = NULL, STATS needs to be an atomic element!"); if(txAG != REALSXP && txAG != INTSXP && txAG != LGLSXP) error("for these transformations STATS needs to be numeric!"); #define NOGOPLOOP \ switch(ret) { \ case 3: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] - AGx; \ break; \ case 4: error("This transformation can only be performed with groups!"); \ case 5: { \ double v = 1 / AGx; \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] * v; \ break; \ } \ case 6: { \ double v = 100 / AGx; \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] * v; \ break; \ } \ case 7: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] + AGx; \ break; \ case 8: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] * AGx; \ break; \ case 9: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = modulus_impl(px[i], AGx); \ break; \ case 10: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = remainder_impl(px[i], AGx); \ break; \ default: error("Unknown Transformation"); \ } switch(TYPEOF(x)) { case REALSXP: { double AGx = asReal(xAG), *pout = REAL(out), *px = REAL(x); NOGOPLOOP break; } case INTSXP: case LGLSXP: { if(set) { int AGx = asInteger(xAG); int *pout = INTEGER(out), *px = INTEGER(x); NOGOPLOOP } else { double AGx = asReal(xAG), *pout = REAL(out); int *px = INTEGER(x); NOGOPLOOP } break; } default: error("x needs to be double or integer"); } } else { if(TYPEOF(g) != INTSXP) error("g must be integer typed, please report this as g should have been internally grouped"); if(gs != l) error("length(g) must match nrow(x)"); int *pg = INTEGER(g); #define GOPLOOP \ switch(ret) { \ case 3: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] - pAG[pg[i]]; \ break; \ case 4: \ { \ long double OM = 0; \ int n = 0; \ for(int i = 0; i != l; ++i) { \ if(ISNAN(px[i])) pout[i] = px[i]; \ else { \ pout[i] = px[i] - pAG[pg[i]]; \ if(ISNAN(pAG[pg[i]])) continue; \ OM += pAG[pg[i]]; \ ++n; \ } \ } \ OM /= n; \ double dOM = (double)OM; \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] += dOM; \ break; \ } \ case 5: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] / pAG[pg[i]]; \ break; \ case 6: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] / pAG[pg[i]] * 100; \ break; \ case 7: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] + pAG[pg[i]]; \ break; \ case 8: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = px[i] * pAG[pg[i]]; \ break; \ case 9: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = modulus_impl(px[i], pAG[pg[i]]); \ break; \ case 10: \ _Pragma("omp simd") \ for(int i = 0; i < l; ++i) pout[i] = remainder_impl(px[i], pAG[pg[i]]); \ break; \ default: error("Unknown Transformation"); \ } #define TXAGSWITCH \ switch(txAG) { \ case REALSXP: { \ double *pAG = REAL(xAG)-1; \ GOPLOOP \ break; \ } \ case INTSXP: \ case LGLSXP: { \ int *pAG = INTEGER(xAG)-1; \ GOPLOOP \ break; \ } \ default: error("STATS needs to be integer or real for statistical transformations"); \ } switch(TYPEOF(x)) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); TXAGSWITCH break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); if(set) { int *pout = INTEGER(out); TXAGSWITCH } else { double *pout = REAL(out); TXAGSWITCH } break; } default: error("x needs to be double or integer"); } } if(set == 0) { SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); } return out; } SEXP TRAC(SEXP x, SEXP xAG, SEXP g, SEXP Rret, SEXP Rset) { if(length(Rret) != 1) error("can only perform one transformation at a time"); int ret = (TYPEOF(Rret) == STRSXP) ? TtI(Rret) : asInteger(Rret), set = asLogical(Rset); switch(ret) { case 0: return ret0(x, xAG, g, set); case 1: return ret1(x, xAG, g, set); case 2: return ret2(x, xAG, g, set); default: return retoth(x, xAG, g, ret, set); } } SEXP TRAlC(SEXP x, SEXP xAG, SEXP g, SEXP Rret, SEXP Rset) { if(length(Rret) != 1) error("can only perform one transformation at a time"); int l = length(x), set = asLogical(Rset), ret = (TYPEOF(Rret) == STRSXP) ? TtI(Rret) : asInteger(Rret); if(length(xAG) != l) error("NCOL(x) must match NCOL(STATS)"); // This is allocated anyway, but not returned if set = TRUE SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); // Need SET_VECTOR_ELT here because we are allocating... (otherwise sometimes segfault) #define RETLOOPS(v) \ switch(ret) { \ case 0: \ for(int j = 0; j != l; ++j) { \ SET_VECTOR_ELT(out, j, ret0(px[j], PROTECT(v), g, set)); UNPROTECT(1); \ } \ break; \ case 1: \ for(int j = 0; j != l; ++j) { \ SET_VECTOR_ELT(out, j, ret1(px[j], PROTECT(v), g, set)); UNPROTECT(1); \ } \ break; \ case 2: \ for(int j = 0; j != l; ++j) { \ SET_VECTOR_ELT(out, j, ret2(px[j], PROTECT(v), g, set)); UNPROTECT(1); \ } \ break; \ default: \ for(int j = 0; j != l; ++j) { \ SET_VECTOR_ELT(out, j, retoth(px[j], PROTECT(v), g, ret, set)); UNPROTECT(1); \ } \ } switch(TYPEOF(xAG)) { case VECSXP: { const SEXP *pAG = SEXPPTR_RO(xAG); RETLOOPS(pAG[j]) break; } case REALSXP: { double *pAG = REAL(xAG); RETLOOPS(ScalarReal(pAG[j])) break; } case LGLSXP: case INTSXP: { int *pAG = INTEGER(xAG); RETLOOPS(ScalarInteger(pAG[j])) break; } case CPLXSXP: { Rcomplex *pAG = COMPLEX(xAG); RETLOOPS(ScalarComplex(pAG[j])) break; } case RAWSXP: { Rbyte *pAG = RAW(xAG); RETLOOPS(ScalarRaw(pAG[j])) break; } case STRSXP: { const SEXP *pAG = SEXPPTR_RO(xAG); RETLOOPS(ScalarString(pAG[j])) break; } default: error("Not supported SEXP type!"); } if(set == 0) SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return set ? x : out; } // TODO: "replace" method for matrices is a bit slower than before, but overall pretty good! SEXP TRAmC(SEXP x, SEXP xAG, SEXP g, SEXP Rret, SEXP Rset) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); if(length(Rret) != 1) error("can only perform one transformation at a time"); int tx = TYPEOF(x), txAG = TYPEOF(xAG), gs = length(g), row = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = &gs, ng = 0, set = asLogical(Rset), ret = (TYPEOF(Rret) == STRSXP) ? TtI(Rret) : asInteger(Rret), nog = gs <= 1; if(nog) { if(length(xAG) != col) error("If g = NULL, NROW(STATS) needs to be 1"); } else { if(TYPEOF(g) != INTSXP) error("g must be integer typed, please report this as g should have been internally grouped"); if(gs != row) error("length(g) must match ncol(x)"); if(ncols(xAG) != col) error("ncol(STATS) must match ncol(x)"); pg = INTEGER(g); ng = nrows(xAG); } if(ret <= 2) { if(ret > 0) { if(set && txAG != tx) error("if set = TRUE with option 'replace_fill', x and STATS need to have identical data types"); SEXP out = set ? x : PROTECT(allocVector(txAG, row * col)); if(ret == 1) { switch(txAG) { case REALSXP: { double *pout = REAL(out), *pAG = REAL(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = AG[pg[i]]; } } break; } case INTSXP: case LGLSXP: { int *pout = INTEGER(out), *pAG = INTEGER(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); const SEXP *pAG = SEXPPTR_RO(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; const SEXP *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = AG[pg[i]]; } } break; } default: error("Not supported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x); switch(txAG) { case REALSXP: { double *pout = REAL(out), *pAG = REAL(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (ISNAN(px[i])) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (ISNAN(px[i + s])) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: case LGLSXP: { int *pout = INTEGER(out), *pAG = INTEGER(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (ISNAN(px[i])) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (ISNAN(px[i + s])) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); const SEXP *pAG = SEXPPTR_RO(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (ISNAN(px[i])) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; const SEXP *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (ISNAN(px[i + s])) ? NA_STRING : AG[pg[i]]; } } break; } default: error("Not supported SEXP type!"); } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); switch(txAG) { case REALSXP: { double *pout = REAL(out), *pAG = REAL(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: case LGLSXP: { int *pout = INTEGER(out), *pAG = INTEGER(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); const SEXP *pAG = SEXPPTR_RO(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_INTEGER) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; const SEXP *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? NA_STRING : AG[pg[i]]; } } break; } default: error("Not supported SEXP type!"); } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); switch(txAG) { case REALSXP: { double *pout = REAL(out), *pAG = REAL(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_REAL : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_REAL : AG[pg[i]]; } } break; } case INTSXP: case LGLSXP: { int *pout = INTEGER(out), *pAG = INTEGER(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_INTEGER : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_INTEGER : AG[pg[i]]; } } break; } case STRSXP: { SEXP *pout = SEXPPTR(out); const SEXP *pAG = SEXPPTR_RO(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_STRING) ? NA_STRING : AGj; } } else { for(int j = 0; j != col; ++j) { int s = j * row; const SEXP *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? NA_STRING : AG[pg[i]]; } } break; } default: error("Not supported SEXP type!"); } break; } default: error("Not supported SEXP type!"); } } if(set == 0) { if(isObject(xAG)) SHALLOW_DUPLICATE_ATTRIB(out, xAG); else if(!isObject(x) || (tx == txAG && !isFactor(x))) SHALLOW_DUPLICATE_ATTRIB(out, x); else { SHALLOW_DUPLICATE_ATTRIB(out, x); classgets(out, R_NilValue); // OK ! setAttrib(out, R_LevelsSymbol, R_NilValue); } UNPROTECT(1); } return out; } else { // ret == 0 if(ret != 0) error("Unknown Transformation!"); SEXP out = set ? x : PROTECT(allocVector(tx, row * col)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); switch(txAG) { case REALSXP: { double *pAG = REAL(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (ISNAN(px[i])) ? AGj : px[i]; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (ISNAN(px[i + s])) ? AG[pg[i]] : px[i + s]; } } break; } case INTSXP: case LGLSXP: { int *pAG = INTEGER(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; double AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (ISNAN(px[i])) ? AGj : px[i]; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (ISNAN(px[i + s])) ? AG[pg[i]] : px[i + s]; } } break; } case STRSXP: error("Cannot replace missing values in double with a string"); default: error("Not supported SEXP type!"); } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); switch(txAG) { case REALSXP: { double *pAG = REAL(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_INTEGER) ? AGj : px[i]; } } else { for(int j = 0; j != col; ++j) { int s = j * row; double *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? AG[pg[i]] : px[i + s]; } } break; } case INTSXP: case LGLSXP: { int *pAG = INTEGER(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row, AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_INTEGER) ? AGj : px[i]; } } else { for(int j = 0; j != col; ++j) { int s = j * row, *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_INTEGER) ? AG[pg[i]] : px[i + s]; } } break; } case STRSXP: error("Cannot replace missing values in integer with a string"); default: error("Not supported SEXP type!"); } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); switch(txAG) { case REALSXP: case INTSXP: case LGLSXP: error("Cannot replace missing values in string with numeric data"); case STRSXP: { const SEXP *pAG = SEXPPTR_RO(xAG); if(nog) { for(int j = 0; j != col; ++j) { int s = j * row, e = s + row; SEXP AGj = pAG[j]; #pragma omp simd for(int i = s; i < e; ++i) pout[i] = (px[i] == NA_STRING) ? AGj : px[i]; } } else { for(int j = 0; j != col; ++j) { int s = j * row; const SEXP *AG = pAG + j * ng - 1; #pragma omp simd for(int i = 0; i < row; ++i) pout[i + s] = (px[i + s] == NA_STRING) ? AG[pg[i]] : px[i + s]; } } break; } default: error("Not supported SEXP type!"); } break; } default: error("Not supported SEXP type!"); } if(set == 0) { SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); } return out; } } // ret > 2 int nprotect = 0; SEXP out = set ? x : PROTECT(allocVector(REALSXP, row * col)); double *pAG; if(txAG != REALSXP) { if(txAG != INTSXP && txAG != LGLSXP) error("STATS needs to be double, integer or logical"); SEXP xxAG = PROTECT(coerceVector(xAG, REALSXP)); ++nprotect; pAG = REAL(xxAG); } else pAG = REAL(xAG); #define MATNUMTRALOOP \ switch(ret) { \ case 3: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = px[i] - AGj; \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = px[i + s] - AG[pg[i]]; \ } \ } \ break; \ } \ case 4: { \ if(nog) error("This transformation can only be computed with groups!"); \ for(int j = 0; j != col; ++j) { \ int s = j * row, n = 0; \ long double OM = 0; \ double *AG = pAG + j * ng - 1; \ for(int i = 0; i != row; ++i) { \ if(ISNAN(px[i + s])) pout[i + s] = px[i + s]; \ else { \ pout[i + s] = px[i + s] - AG[pg[i]]; \ if(ISNAN(AG[pg[i]])) continue; \ OM += AG[pg[i]]; \ ++n; \ } \ } \ OM /= n; \ double OMD = (double)OM; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] += OMD; \ } \ break; \ } \ case 5: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = 1 / pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = px[i] * AGj; \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = px[i + s] * (1 / AG[pg[i]]); \ } \ } \ break; \ } \ case 6: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = 100 / pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = px[i] * AGj; \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = px[i + s] * (100 / AG[pg[i]]); \ } \ } \ break; \ } \ case 7: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = px[i] + AGj; \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = px[i + s] + AG[pg[i]]; \ } \ } \ break; \ } \ case 8: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = px[i] * AGj; \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = px[i + s] * AG[pg[i]]; \ } \ } \ break; \ } \ case 9: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = modulus_impl(px[i], AGj); \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = modulus_impl(px[i + s], AG[pg[i]]); \ } \ } \ break; \ } \ case 10: { \ if(nog) { \ for(int j = 0; j != col; ++j) { \ int s = j * row, e = s + row; \ double AGj = pAG[j]; \ _Pragma("omp simd") \ for(int i = s; i < e; ++i) pout[i] = remainder_impl(px[i], AGj); \ } \ } else { \ for(int j = 0; j != col; ++j) { \ int s = j * row; \ double *AG = pAG + j * ng - 1; \ _Pragma("omp simd") \ for(int i = 0; i < row; ++i) pout[i + s] = remainder_impl(px[i + s], AG[pg[i]]);\ } \ } \ break; \ } \ default: error("Unknown Transformation"); \ } switch(tx) { case REALSXP: { double *pout = REAL(out), *px = REAL(x); MATNUMTRALOOP break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); if(set) { int *pout = INTEGER(out); MATNUMTRALOOP } else { double *pout = REAL(out); MATNUMTRALOOP } break; } default: error("Not supported SEXP type!"); } if(set == 0) { SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(nprotect + 1); } else if(nprotect > 0) UNPROTECT(nprotect); return out; } collapse/src/fbstats.cpp0000644000176200001440000013251015202367344015025 0ustar liggesusers#include using namespace Rcpp; // TODO: Still check printing (naming and setting classes) options // inline bool isnan2(double x) { // return x != x; // } CharacterVector get_stats_names(int n, bool panel = false) { String N = panel ? "N/T" : "N"; switch(n) { case 5: return CharacterVector::create(N,"Mean","SD","Min","Max"); case 6: return CharacterVector::create(N,"WeightSum","Mean","SD","Min","Max"); case 7: return CharacterVector::create(N,"Mean","SD","Min","Max","Skew","Kurt"); case 8: return CharacterVector::create(N,"WeightSum","Mean","SD","Min","Max","Skew","Kurt"); default: stop("length of stats names needs to be between 5 and 8"); } } // use constant references on the temp function also ? NumericVector fbstatstemp(NumericVector x, bool ext = false, int ng = 0, IntegerVector g = 0, SEXP w = R_NilValue, bool setn = true, bool stable_algo = true, SEXP gn = R_NilValue) { int l = x.size(); bool weights = !Rf_isNull(w); if(l == 0 && ng == 0) { int d = ((ext) ? 7 : 5) + weights; NumericVector result(d, NA_REAL); result[0] = 0; if(setn) { Rf_namesgets(result, get_stats_names(d)); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } if(!ext) { if(ng == 0) { // No groups if(l == 1) { // need this so that qsu(1) works properly NumericVector result = weights ? NumericVector::create(1,Rf_asReal(w),x[0],NA_REAL,x[0],x[0]) : NumericVector::create(1,x[0],NA_REAL,x[0],x[0]); if(setn) { Rf_namesgets(result, get_stats_names(5+weights)); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } int j = l-1; // double n = 0, min = R_PosInf, max = R_NegInf; // long double mean = 0, d1 = 0, M2 = 0; double n = 0, min = R_PosInf, max = R_NegInf, mean = 0, d1 = 0, M2 = 0; if(!weights) { // No weights while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); if(stable_algo) { for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; mean += d1 * (1 / ++n); M2 += d1*(x[i]-mean); if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M2 = sqrt(M2/(n-1)); } else { int k = 0; long double sum = 0, sq_sum = 0; for(int i = j+1; i--; ) { d1 = x[i]; if(std::isnan(d1)) continue; sum += d1; sq_sum += d1 * d1; if(min > d1) min = d1; if(max < d1) max = d1; ++k; } sum /= k; sq_sum -= sum*sum*k; M2 = (double)sqrt(sq_sum/(k-1)); n = (double)k; mean = (double)sum; } } else mean = M2 = min = max = NA_REAL; if(std::isnan(M2)) M2 = NA_REAL; NumericVector result = NumericVector::create(n,mean,M2,min,max); // NumericVector::create(n,(double)mean,(double)M2,min,max); if(setn) { Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max")); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // long double sumw = 0; double sumw = 0; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); ++n; if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M2 = sqrt(M2/(sumw-1)); } else mean = M2 = min = max = NA_REAL; if(std::isnan(M2)) M2 = NA_REAL; NumericVector result = NumericVector::create(n,sumw,mean,M2,min,max); // NumericVector::create(n,(double)mean,(double)M2,min,max); if(setn) { Rf_namesgets(result, CharacterVector::create("N","WeightSum","Mean","SD","Min","Max")); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // long double d1 = 0; double d1 = 0; int k = 0; NumericMatrix result(ng, 5+weights); // = no_init_matrix initializing is better -> valgrind NumericMatrix::Column n = result( _ , 0); NumericMatrix::Column mean = result( _ , 1+weights); NumericMatrix::Column M2 = result( _ , 2+weights); NumericMatrix::Column min = result( _ , 3+weights); NumericMatrix::Column max = result( _ , 4+weights); std::fill(M2.begin(), M2.end(), NA_REAL); if(!weights) { // No weights if(stable_algo) { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; if(std::isnan(M2[k])) { mean[k] = min[k] = max[k] = x[i]; M2[k] = 0.0; n[k] = 1.0; } else { d1 = x[i]-mean[k]; mean[k] += d1 * (1 / ++n[k]); M2[k] += d1*(x[i]-mean[k]); if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = sqrt(M2[i]/(n[i]-1)); } else { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; if(std::isnan(M2[k])) { mean[k] = min[k] = max[k] = x[i]; M2[k] = pow(x[i],2); n[k] = 1.0; } else { mean[k] += x[i]; M2[k] += pow(x[i],2); if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; ++n[k]; } } for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; mean[i] /= n[i]; M2[i] = sqrt((M2[i] - pow(mean[i],2)*n[i])/(n[i]-1)); if(std::isnan(M2[i])) M2[i] = NA_REAL; } } } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind NumericMatrix::Column sumw = result( _ , 1); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; if(std::isnan(M2[k])) { sumw[k] = wg[i]; mean[k] = min[k] = max[k] = x[i]; M2[k] = 0.0; n[k] = 1.0; } else { sumw[k] += wg[i]; d1 = x[i] - mean[k]; mean[k] += d1 * (wg[i] / sumw[k]); M2[k] += wg[i] * d1 * (x[i] - mean[k]); ++n[k]; if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) { if(n[i] == 0) mean[i] = min[i] = max[i] = NA_REAL; else if(!std::isnan(M2[i])) M2[i] = sqrt(M2[i]/(sumw[i]-1)); } } if(setn) { Rf_dimnamesgets(result, List::create(gn, get_stats_names(5+weights))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return result; } } else { if(ng == 0) { // No groups int j = l-1; // double n = 0, min = R_PosInf, max = R_NegInf; // long double mean = 0, d1 = 0, dn = 0, dn2 = 0, term1 = 0, M2 = 0, M3 = 0, M4 = 0; double n = 0, min = R_PosInf, max = R_NegInf, mean = 0, d1 = 0, dn = 0, dn2 = 0, term1 = 0, M2 = 0, M3 = 0, M4 = 0; if(!weights) { // No weights while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; dn = d1 * (1 / ++n); mean += dn; dn2 = dn * dn; term1 = d1 * dn * (n-1); M4 += term1*dn2*(n*n - 3*n + 3) + 6*dn2*M2 - 4*dn*M3; M3 += term1*dn*(n - 2) - 3*dn*M2; M2 += term1; if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } M4 = (n*M4)/(M2*M2); // kurtosis // Excess kurtosis: - 3; M3 = (sqrt(n)*M3) / sqrt(pow(M2,3)); // Skewness M2 = sqrt(M2/(n-1)); // Standard Deviation } else mean = M2 = M3 = M4 = min = max = NA_REAL; NumericVector result = NumericVector::create(n,mean,M2,min,max,M3,M4); // NumericVector::create(n,(double)mean,(double)M2,min,max,(double)M3,(double)M4); if(setn) { Rf_namesgets(result, CharacterVector::create("N","Mean","SD","Min","Max","Skew","Kurt")); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // long double sumw = 0; double sumw = 0; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { // if(j == 0) stop("Not enough non-mising obs."); for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumw += wg[i]; mean += x[i] * wg[i]; ++n; if(min > x[i]) min = x[i]; if(max < x[i]) max = x[i]; } mean /= sumw; long double M2l = 0.0, M3l = 0.0, M4l = 0.0; for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; d1 = x[i] - mean; dn = d1 * d1; dn2 = dn * dn; M2l += wg[i] * dn; M3l += wg[i] * dn * d1; M4l += wg[i] * dn2; } M4 = (sumw*M4l)/(M2l*M2l); // kurtosis // Excess kurtosis: - 3; M3 = (sqrt(sumw)*M3l) / sqrt(pow(M2l,3)); // Skewness M2 = sqrt(M2l/(sumw-1)); // Standard Deviation } else mean = M2 = M3 = M4 = min = max = NA_REAL; NumericVector result = NumericVector::create(n,sumw,mean,M2,min,max,M3,M4); // NumericVector::create(n,(double)mean,(double)M2,min,max,(double)M3,(double)M4); if(setn) { Rf_namesgets(result, CharacterVector::create("N","WeightSum","Mean","SD","Min","Max","Skew","Kurt")); Rf_classgets(result, CharacterVector::create("qsu","table")); } return result; } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, dn = 0, dn2 = 0, term1 = 0; int k = 0; NumericMatrix result(ng, 7+weights); // = no_init_matrix // Initializing better -> valgrind NumericMatrix::Column n = result( _ , 0); NumericMatrix::Column mean = result( _ , 1+weights); NumericMatrix::Column M2 = result( _ , 2+weights); NumericMatrix::Column min = result( _ , 3+weights); NumericMatrix::Column max = result( _ , 4+weights); NumericMatrix::Column M3 = result( _ , 5+weights); NumericMatrix::Column M4 = result( _ , 6+weights); std::fill(M2.begin(), M2.end(), NA_REAL); if(!weights) { // No weights for(int i = l; i--; ) { if(std::isnan(x[i])) continue; k = g[i]-1; if(std::isnan(M2[k])) { mean[k] = min[k] = max[k] = x[i]; M2[k] = M3[k] = M4[k] = 0.0; n[k] = 1.0; } else { d1 = x[i]-mean[k]; dn = d1 * (1 / ++n[k]); mean[k] += dn; dn2 = dn * dn; term1 = d1 * dn * (n[k]-1); M4[k] += term1*dn2*(n[k]*n[k] - 3*n[k] + 3) + 6*dn2*M2[k] - 4*dn*M3[k]; M3[k] += term1*dn*(n[k] - 2) - 3*dn*M2[k]; M2[k] += term1; if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = ng; i--; ) { M4[i] = (n[i]*M4[i])/(M2[i]*M2[i]); // kurtosis // Excess kurtosis: - 3; M3[i] = (sqrt(n[i])*M3[i]) / sqrt(pow(M2[i],3)); // Skewness M2[i] = sqrt(M2[i]/(n[i]-1)); // Standard Deviation } } else { // with weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // NumericVector sumw(ng); // = no_init_vector(ng); // better for valgrind NumericMatrix::Column sumw = result( _ , 1); for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; if(std::isnan(M2[k])) { sumw[k] = wg[i]; mean[k] = min[k] = max[k] = x[i]; M2[k] = M3[k] = M4[k] = 0.0; n[k] = 1.0; } else { sumw[k] += wg[i]; mean[k] += (x[i] - mean[k]) * (wg[i] / sumw[k]); ++n[k]; if(min[k] > x[i]) min[k] = x[i]; if(max[k] < x[i]) max[k] = x[i]; } } for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; k = g[i]-1; d1 = x[i] - mean[k]; dn = d1 * d1; dn2 = dn * dn; M2[k] += wg[i] * dn; M3[k] += wg[i] * dn * d1; M4[k] += wg[i] * dn2; } for(int i = ng; i--; ) { if(n[i] == 0) mean[i] = min[i] = max[i] = M3[i] = M4[i] = NA_REAL; else { M4[i] = (sumw[i]*M4[i])/(M2[i]*M2[i]); // kurtosis // Excess kurtosis: - 3; M3[i] = (sqrt(sumw[i])*M3[i]) / sqrt(pow(M2[i],3)); // Skewness M2[i] = sqrt(M2[i]/(sumw[i]-1)); // Standard Deviation } } } if(setn) { Rf_dimnamesgets(result, List::create(gn, get_stats_names(7+weights))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return result; } } // } else { // detailed summary: fully sorting. Note: This doesn't work grouped, groups must also be sorted -> need to sort within each group or compute ordering // NumericVector y = no_init_vector(l); // auto pend = std::remove_copy_if(x.begin(), x.end(), y.begin(), isnan2); // l = pend - x.begin(); // middle = sz/2-1; // std::sort(y.begin(), pend); // good ?? // // if(dets == 1 && det[0] == 1) det = 5; // } } inline NumericVector replaceC12(NumericMatrix x, NumericVector y, bool div = false) { int nc = x.ncol(); if(div) { NumericMatrix::Column C1 = x(_, 0); // best ? C1 = C1 / y; if(nc == 6 || nc == 8) { // WeightSum column NumericMatrix::Column C2 = x(_, 1); C2 = C2 / y; } } else { x(_, 0) = y; // best way ? use NumericMatrix::Column ? if(nc == 6 || nc == 8) { // WeightSum column x(_, 1) = y; } } return x; } // [[Rcpp::export]] SEXP fbstatsCpp(const NumericVector& x, bool ext = false, int ng = 0, const IntegerVector& g = 0, int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, bool setn = true, const SEXP& gn = R_NilValue) { if(npg == 0) { // No panel if(ng == 0) { // No groups return(fbstatstemp(x, ext, 0, 0, w, setn, stable_algo, gn)); } else { return(fbstatstemp(x, ext, ng, g, w, setn, stable_algo, gn)); } } else { int l = x.size(); if(pg.size() != l) stop("length(pid) must match nrow(X)"); bool weights = !Rf_isNull(w); int d = ((ext) ? 7 : 5) + weights; NumericVector sum(npg, NA_REAL); NumericVector sumw((weights) ? npg : 1); // no_init_vector(npg) : no_init_vector(1); // better for valgrind double osum = 0; if(!weights) { IntegerVector n(npg, 1); for(int i = l; i--; ) { if(!std::isnan(x[i])) { if(std::isnan(sum[pg[i]-1])) sum[pg[i]-1] = x[i]; else { sum[pg[i]-1] += x[i]; ++n[pg[i]-1]; } } } int on = 0; for(int i = npg; i--; ) { // Problem: if one sum remained NA, osum becomes NA (also issue with B and W and TRA) if(std::isnan(sum[i])) continue; // solves the issue osum += sum[i]; on += n[i]; sum[i] /= n[i]; } osum = osum/on; } else { NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); // Note: Skipping zero weights is not really necessary here, but it might be numerically better and also faster if there are many. for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(sum[pg[i]-1])) { sum[pg[i]-1] = x[i]*wg[i]; sumw[pg[i]-1] = wg[i]; } else { sum[pg[i]-1] += x[i]*wg[i]; sumw[pg[i]-1] += wg[i]; } } double osumw = 0; for(int i = npg; i--; ) { if(std::isnan(sum[i]) || sumw[i] == 0) continue; // solves the issue osum += sum[i]; osumw += sumw[i]; sum[i] /= sumw[i]; } osum = osum/osumw; // for(int i = npg; i--; ) sumw[i] /= osumw; } NumericVector within = no_init_vector(l); if(ng == 0) { // No groups for(int i = 0; i != l; ++i) within[i] = x[i] - sum[pg[i]-1] + osum; // if-check for NA's is not faster NumericMatrix result = no_init_matrix(3, d); result(0, _) = fbstatstemp(x, ext, 0, 0, w, false, stable_algo); result(1, _) = (weights) ? fbstatstemp(sum, ext, 0, 0, sumw, false, stable_algo) : fbstatstemp(sum, ext, 0, 0, w, false, stable_algo); result(2, _) = fbstatstemp(within, ext, 0, 0, w, false, stable_algo); result[2] /= result[1]; if(weights) { result[4] = result[1]; result[5] /= result[1]; } if(setn) { Rf_dimnamesgets(result, List::create(CharacterVector::create("Overall","Between","Within"), get_stats_names(d, true))); Rf_classgets(result, CharacterVector::create("qsu","matrix","table")); } return(result); } else { if(g.size() != l) stop("length(g) must match nrow(X)"); NumericVector between = no_init_vector(l); // bool groupids[ng][npg]; // could do +1 trick, but that could be costly in term of memory if few g and many pg LogicalMatrix groupids = no_init_matrix(ng, npg); // memset(groupids, true, sizeof(bool)*ng*npg); // works ? necessary ? std::fill(groupids.begin(), groupids.end(), true); NumericVector gnpids(ng); // best ? for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { // important ? right ? between[i] = within[i] = NA_REAL; // x[i] ? } else { if(groupids(g[i]-1, pg[i]-1)) { // added this part ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } between[i] = sum[pg[i]-1]; within[i] = x[i] - between[i] + osum; } } if(array) { NumericMatrix result = no_init_matrix(d*ng, 3); result(_,0) = fbstatstemp(x, ext, ng, g, w, false, stable_algo); result(_,1) = replaceC12(as(fbstatstemp(between, ext, ng, g, w, false, stable_algo)), gnpids); // how to do this ? -> above best approach ? result(_,2) = replaceC12(as(fbstatstemp(within, ext, ng, g, w, false, stable_algo)), gnpids, true); if(setn) { Rf_dimgets(result, Dimension(ng, d, 3)); Rf_dimnamesgets(result, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within"))); Rf_classgets(result, CharacterVector::create("qsu","array","table")); } return(result); } else { List result(3); // option array ? result[0] = fbstatstemp(x, ext, ng, g, w, true, stable_algo, gn); result[1] = replaceC12(as(fbstatstemp(between, ext, ng, g, w, true, stable_algo, gn)), gnpids); // how to do this ? -> above best approach ? result[2] = replaceC12(as(fbstatstemp(within, ext, ng, g, w, true, stable_algo, gn)), gnpids, true); Rf_namesgets(result, CharacterVector::create("Overall","Between","Within")); return(result); } } } } // [[Rcpp::export]] SEXP fbstatsmCpp(const NumericMatrix& x, bool ext = false, int ng = 0, const IntegerVector& g = 0, int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, const SEXP& gn = R_NilValue) { bool weights = !Rf_isNull(w); int col = x.ncol(), d = ((ext) ? 7 : 5) + weights; // l = x.nrow(), if(npg == 0) { // No panel if(ng == 0) { // No groups NumericMatrix out = no_init_matrix(col, d); for(int j = col; j--; ) out(j, _) = fbstatstemp(x(_, j), ext, 0, 0, w, false, stable_algo); Rf_dimnamesgets(out, List::create(colnames(x), get_stats_names(d))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); return out; } else { // if(g.size() != l) stop("length(g) must match nrow(X)"); // checked in fbstatstemp if(array) { NumericMatrix out = no_init_matrix(d*ng, col); for(int j = col; j--; ) out(_, j) = fbstatstemp(x(_, j), ext, ng, g, w, false, stable_algo); Rf_dimgets(out, Dimension(ng, d, col)); Rf_dimnamesgets(out, List::create(gn, get_stats_names(d), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) out[j] = fbstatstemp(x(_, j), ext, ng, g, w, true, stable_algo, gn); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); return out; } } } else { if(ng == 0) { if(array) { NumericMatrix out = no_init_matrix(d*3, col); for(int j = col; j--; ) out(_, j) = as(fbstatsCpp(x(_, j), ext, 0, 0, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? Rf_dimgets(out, Dimension(3, d, col)); Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), get_stats_names(d, true), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) out[j] = fbstatsCpp(x(_, j), ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); return out; } } else { if(array) { NumericMatrix out = no_init_matrix(d*3*ng, col); for(int j = col; j--; ) out(_, j) = as(fbstatsCpp(x(_, j), ext, ng, g, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? Rf_dimgets(out, IntegerVector::create(ng, d, 3, col)); Rf_dimnamesgets(out, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within"), colnames(x))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) out[j] = fbstatsCpp(x(_, j), ext, ng, g, npg, pg, w, stable_algo, false, true, gn); Rf_setAttrib(out, R_NamesSymbol, colnames(x)); return out; } } } } template NumericVector fnobs5Impl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, SEXP w = R_NilValue, bool real = false, bool setn = false, SEXP gn = R_NilValue) { bool weights = !Rf_isNull(w); int l = x.size(), d = ((ext) ? 7 : 5) + weights; if(ng == 0) { int n = 0; double wsum = 0.0; NumericVector out(d, NA_REAL); if(weights) { NumericVector wg = w; if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { wsum += wg[i]; ++n; } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { wsum += wg[i]; ++n; } } } out[0] = (double)n; out[1] = wsum; } else { if(real) { for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n; // This loop is faster } else { for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n; } out[0] = (double)n; } if(setn) { Rf_namesgets(out, get_stats_names(d)); Rf_classgets(out, CharacterVector::create("qsu","table")); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); NumericMatrix out = no_init_matrix(ng, d); std::fill_n(out.begin(), ng*(1+weights), 0.0); // works ?? -> yes std::fill(out.begin()+ng*(1+weights), out.end(), NA_REAL); NumericMatrix::Column n = out(_, 0); if(weights) { NumericVector wg = w; NumericMatrix::Column wsum = out(_, 1); if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { wsum[g[i]-1] += wg[i]; ++n[g[i]-1]; } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { wsum[g[i]-1] += wg[i]; ++n[g[i]-1]; } } } } else { if(real) { for(int i = 0; i != l; ++i) if(x[i] == x[i]) ++n[g[i]-1]; } else { for(int i = 0; i != l; ++i) if(x[i] != Vector::get_na()) ++n[g[i]-1]; } } if(setn) { Rf_dimnamesgets(out, List::create(gn, get_stats_names(d))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); } return out; } } template NumericMatrix fnobs5pImpl(Vector x, bool ext = false, int ng = 0, IntegerVector g = 0, int npg = 0, IntegerVector pg = 0, SEXP w = R_NilValue, bool real = false, bool array = true, SEXP gn = R_NilValue) { bool weights = !Rf_isNull(w); int l = x.size(), d = ((ext) ? 7 : 5) + weights; if(pg.size() != l) stop("length(pid) must match nrow(X)"); if(ng == 0) { int n = 0, npgc = 0; // bool npgs[npg+1]; // memset(npgs, true, sizeof(bool)*(npg+1)); std::vector npgs(npg+1, true); double wsum = 0.0; if(weights) { NumericVector wg = w; if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { wsum += wg[i]; ++n; } if(npgs[pg[i]-1]) { ++npgc; npgs[pg[i]-1] = false; } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { wsum += wg[i]; ++n; } if(npgs[pg[i]-1]) { ++npgc; npgs[pg[i]-1] = false; } } } } else { if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i]) ++n; if(npgs[pg[i]-1]) { ++npgc; npgs[pg[i]-1] = false; } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na()) ++n; if(npgs[pg[i]-1]) { ++npgc; npgs[pg[i]-1] = false; } } } } NumericMatrix out = no_init_matrix(3, d); out[0] = (double)n; out[1] = (double)npgc; out[2] = out[0]/out[1]; if(weights) { out[3] = (double)wsum; out[4] = (double)npgc; out[5] = out[3]/out[4]; } std::fill(out.begin()+3*(1+weights), out.end(), NA_REAL); if(!array) { Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), get_stats_names(d, true))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); } return out; } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); NumericMatrix out = no_init_matrix(ng*d, 3); std::fill_n(out.begin(), ng*(1+weights), 0.0); // works ? -> yes std::fill(out.begin()+ng*(1+weights), out.end(), NA_REAL); NumericMatrix::Column n = out(_, 0); NumericMatrix::Column gnpids = out(_, 1); std::fill_n(gnpids.begin(), ng, 0.0); // bool groupids[ng][npg]; // could do +1 trick, but that could be costly in term of memory, if few g and many pg // memset(groupids, true, sizeof(bool)*ng*npg); LogicalMatrix groupids = no_init_matrix(ng, npg); std::fill(groupids.begin(), groupids.end(), true); if(weights) { NumericVector wg = w; if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i] && wg[i] == wg[i] && wg[i] != 0) { n[g[i]+ng-1] += wg[i]; ++n[g[i]-1]; if(groupids(g[i]-1, pg[i]-1)) { ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na() && wg[i] == wg[i] && wg[i] != 0) { n[g[i]+ng-1] += wg[i]; ++n[g[i]-1]; if(groupids(g[i]-1, pg[i]-1)) { ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } } } } } else { if(real) { for(int i = 0; i != l; ++i) { if(x[i] == x[i]) { ++n[g[i]-1]; if(groupids(g[i]-1, pg[i]-1)) { ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } } } } else { for(int i = 0; i != l; ++i) { if(x[i] != Vector::get_na()) { ++n[g[i]-1]; if(groupids(g[i]-1, pg[i]-1)) { ++gnpids[g[i]-1]; groupids(g[i]-1, pg[i]-1) = false; } } } } } NumericMatrix::Column nt = out(_, 2); if(weights) { for(int i = 0; i != ng; ++i) { gnpids[ng+i] = gnpids[i]; nt[i] = n[i] / gnpids[i]; nt[ng+i] = n[ng+i] / gnpids[i]; } } else { for(int i = 0; i != ng; ++i) nt[i] = n[i] / gnpids[i]; } if(!array) { Rf_dimgets(out, Dimension(ng, d, 3)); Rf_dimnamesgets(out, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within"))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); } return out; } } // [[Rcpp::export]] SEXP fbstatslCpp(const List& x, bool ext = false, int ng = 0, const IntegerVector& g = 0, int npg = 0, const IntegerVector& pg = 0, const SEXP& w = R_NilValue, bool stable_algo = true, bool array = true, const SEXP& gn = R_NilValue) { bool weights = !Rf_isNull(w); int col = x.size(), d = ((ext) ? 7 : 5) + weights; if(npg == 0) { // No panel if(ng == 0) { // No groups NumericMatrix out = no_init_matrix(col, d); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext, 0, 0, w, true); else out(j, _) = fbstatstemp(column, ext, 0, 0, w, false, stable_algo); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(j, _) = fnobs5Impl(column, ext, 0, 0, w); else out(j, _) = fbstatstemp(x[j], ext, 0, 0, w, false, stable_algo); break; } case STRSXP: out(j, _) = fnobs5Impl(x[j], ext, 0, 0, w); break; case LGLSXP: out(j, _) = fnobs5Impl(x[j], ext, 0, 0, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimnamesgets(out, List::create(Rf_getAttrib(x, R_NamesSymbol), get_stats_names(d))); Rf_classgets(out, CharacterVector::create("qsu","matrix","table")); return out; } else { if(array) { NumericMatrix out = no_init_matrix(d*ng, col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g, w, true); else out(_, j) = fbstatstemp(column, ext, ng, g, w, false, stable_algo); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5Impl(column, ext, ng, g, w); else out(_, j) = fbstatstemp(x[j], ext, ng, g, w, false, stable_algo); break; } case STRSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g, w); break; case LGLSXP: out(_, j) = fnobs5Impl(x[j], ext, ng, g, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, Dimension(ng, d, col)); Rf_dimnamesgets(out, List::create(gn, get_stats_names(d), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, w, true, true, gn); else out[j] = fbstatstemp(column, ext, ng, g, w, true, stable_algo, gn); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5Impl(column, ext, ng, g, w, false, true, gn); else out[j] = fbstatstemp(x[j], ext, ng, g, w, true, stable_algo, gn); break; } case STRSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, w, false, true, gn); break; case LGLSXP: out[j] = fnobs5Impl(x[j], ext, ng, g, w, false, true, gn); break; default: stop("Not supported SEXP type!"); } } Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } } } else { // with panel if(ng == 0) { if(array) { NumericMatrix out = no_init_matrix(d*3, col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg, w, true); else out(_, j) = as(fbstatsCpp(column, ext, 0, 0, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, 0, 0, npg, pg, w); else out(_, j) = as(fbstatsCpp(x[j], ext, 0, 0, npg, pg, w, stable_algo, true, false)); break; } case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w); break; case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, Dimension(3, d, col)); Rf_dimnamesgets(out, List::create(CharacterVector::create("Overall","Between","Within"), get_stats_names(d, true), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, w, true, false, gn); else out[j] = fbstatsCpp(column, ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, 0, 0, npg, pg, w, false, false, gn); else out[j] = fbstatsCpp(x[j], ext, 0, 0, npg, pg, w, stable_algo, false, true, gn); break; } case STRSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w, false, false, gn); break; case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, 0, 0, npg, pg, w, false, false, gn); break; default: stop("Not supported SEXP type!"); } } Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } } else { if(array) { NumericMatrix out = no_init_matrix(d*3*ng, col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg, w, true); else out(_, j) = as(fbstatsCpp(column, ext, ng, g, npg, pg, w, stable_algo, true, false)); // or Rf_coerce ? break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out(_, j) = fnobs5pImpl(column, ext, ng, g, npg, pg, w); else out(_, j) = as(fbstatsCpp(x[j], ext, ng, g, npg, pg, w, stable_algo, true, false)); break; } case STRSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w); break; case LGLSXP: out(_, j) = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w); break; default: stop("Not supported SEXP type!"); } } Rf_dimgets(out, IntegerVector::create(ng, d, 3, col)); Rf_dimnamesgets(out, List::create(gn, get_stats_names(d, true), CharacterVector::create("Overall","Between","Within"), Rf_getAttrib(x, R_NamesSymbol))); Rf_classgets(out, CharacterVector::create("qsu","array","table")); return out; } else { List out(col); for(int j = col; j--; ) { switch(TYPEOF(x[j])) { case REALSXP:{ NumericVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, w, true, false, gn); else out[j] = fbstatsCpp(column, ext, ng, g, npg, pg, w, stable_algo, false, true, gn); break; } case INTSXP: { IntegerVector column = x[j]; if(Rf_isObject(column)) out[j] = fnobs5pImpl(column, ext, ng, g, npg, pg, w, false, false, gn); else out[j] = fbstatsCpp(x[j], ext, ng, g, npg, pg, w, stable_algo, false, true, gn); break; } case STRSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w, false, false, gn); break; case LGLSXP: out[j] = fnobs5pImpl(x[j], ext, ng, g, npg, pg, w, false, false, gn); break; default: stop("Not supported SEXP type!"); } } Rf_setAttrib(out, R_NamesSymbol, Rf_getAttrib(x, R_NamesSymbol)); return out; } } } } // Old / Experimental: // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // template <> // NumericVector fnobs5Impl(Vector x, int ng, IntegerVector g, bool real, bool setn) { // stop("Not supported SEXP type!"); // } // // // [[Rcpp::export]] // NumericVector fnobs5Cpp(SEXP x, int ng = 0, IntegerVector g = 0, bool real = false, bool setn = true){ // RCPP_RETURN_VECTOR(fnobs5Impl, x, ng, g, real, setn); // } // // [[Rcpp::export]] // SEXP fbstatsCpp(NumericVector x, int ng = 0, IntegerVector g = 0, IntegerVector gs = 0, // int npg = 0, IntegerVector pg = 0, IntegerVector pgs = 0, // SEXP w, // bool narm = true) { // int l = x.size(); // if(ng == 0 && npg == 0) { // No groups, no panel !! // int n = 0; // double min = 0, max = 0, sum = 0, sq_sum = 0; // if(narm) { // int j = l-1; // while(std::isnan(x[j]) && j!=0) --j; // min = x[j]; max = x[j]; sum = x[j]; sq_sum = x[j]; // if(j != 0) for(int i = j; i--; ) { // if(std::isnan(x[i])) continue; // sum += x[i]; // sq_sum += x[i] * x[i]; // if(min>x[i]) min = x[i]; // if(maxx[i]) min = x[i]; // if(max Not Bad at all // if(std::isnan(sum[k])) { // sum[k] = x[i]; // sq_sum[k] = x[i]*x[i]; // min[k] = x[i]; // max[k] = x[i]; // n[k] = 1; // } else { // integer for subsetting ?? // sum[k] += x[i]; // sq_sum[k] += x[i]*x[i]; // if(min[k] > x[i]) min[k] = x[i]; // if(max[k] < x[i]) max[k] = x[i]; // ++n[k]; // } // } // } // sum = sum / n; // sq_sum = sqrt((sq_sum - (sum*sum)*n)/(n-1)); // return result; // } else if (ng == 0) { // // .... // } // return R_NilValue; // } // // // [[Rcpp::export]] // SEXP test(NumericVector x) { // int l = x.size(); // int j = l-1; // while(std::isnan(x[j]) && j!=0) --j; // right -- before ?? // return NumericVector::create(j); // } // // #include // #include // using namespace Rcpp; // // // [[Rcpp::export]] // NumericVector fbstats(NumericVector x, bool narm = false) { // possibly try quick conversion to factor?? // int l = x.size(); // //NumericVector un = unique(x); // fastest for now. see how constructed.. // //std::sort(x.begin(), x.end()); // //std::unordered_set newvalue; // //std::unordered_map counts; // Also too slow!! // // https://stackoverflow.com/questions/23150905/effective-unique-on-unordered-elements // //std::vector set(1000000000); // simple: just put true if already occurred -> Needs to be positive integers!! // //int un = 0; // //NumericVector y = x * 100000; // double min = x[0]; // what about NA_RM of the first element in NA?? // double max = x[0]; // double sum = 0; // double sq_sum = 0; // //double c_sum = 0; // //double f_sum = 0; // if(narm) { // int n = 0; // for(int i = l; i--; ) { // if(ISNAN(x[i])) continue; // sum += x[i]; // sq_sum += x[i] * x[i]; // //c_sum += sq_sum * x[i]; // //f_sum += c_sum * x[i]; // if(min>x[i]) min = x[i]; // if(maxx[i]) min = x[i]; // if(max static double POS_INF = 1.0/0.0; static double NEG_INF = -1.0/0.0; void fmin_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) { if(ng == 0) { double min; if(narm) { int j = l-1; min = px[j]; while(ISNAN(min) && j!=0) min = px[--j]; if(j != 0) for(int i = j; i--; ) { if(min > px[i]) min = px[i]; } } else { min = px[0]; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) { min = px[i]; break; } else { if(min > px[i]) min = px[i]; } } } pout[0] = min; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) if(pout[pg[i]] > px[i] || ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; // fastest } else { for(int i = ng; i--; ) pout[i] = POS_INF; --pout; for(int i = l; i--; ) if(pout[pg[i]] > px[i] || ISNAN(px[i])) pout[pg[i]] = px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } void fmin_int_impl(int *pout, int *px, int ng, int *pg, int narm, int l) { if(ng == 0) { int min; if(narm) { int j = l-1; min = px[j]; while(min == NA_INTEGER && j!=0) min = px[--j]; if(j != 0) for(int i = j; i--; ) { if(min > px[i] && px[i] != NA_INTEGER) min = px[i]; } } else { min = px[0]; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) { min = NA_INTEGER; break; } else { if(min > px[i]) min = px[i]; } } } pout[0] = min; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l; i--; ) if(px[i] != NA_INTEGER && (pout[pg[i]] > px[i] || pout[pg[i]] == NA_INTEGER)) pout[pg[i]] = px[i]; // fastest?? } else { for(int i = ng; i--; ) pout[i] = INT_MAX; --pout; for(int i = l; i--; ) if(pout[pg[i]] > px[i]) pout[pg[i]] = px[i]; } } } void fmax_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) { if(ng == 0) { double max; if(narm) { int j = l-1; max = px[j]; while(ISNAN(max) && j!=0) max = px[--j]; if(j != 0) for(int i = j; i--; ) { if(max < px[i]) max = px[i]; } } else { max = px[0]; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) { max = px[i]; break; } else { if(max < px[i]) max = px[i]; } } } pout[0] = max; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) if(pout[pg[i]] < px[i] || ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; // fastest } else { for(int i = ng; i--; ) pout[i] = NEG_INF; --pout; for(int i = l; i--; ) if(pout[pg[i]] < px[i] || ISNAN(px[i])) pout[pg[i]] = px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } void fmax_int_impl(int *pout, int *px, int ng, int *pg, int narm, int l) { if(ng == 0) { int max; if(narm) { max = NA_INTEGER; // same as INT_MIN for(int i = l; i--; ) if(max < px[i]) max = px[i]; } else { max = px[0]; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) { max = NA_INTEGER; break; } else { if(max < px[i]) max = px[i]; } } } pout[0] = max; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = l; i--; ) if(pout[pg[i]] < px[i]) pout[pg[i]] = px[i]; // fastest?? } else { for(int i = ng; i--; ) pout[i] = INT_MIN + 1; // best ?? --pout; for(int i = l; i--; ) if(px[i] == NA_INTEGER || (pout[pg[i]] != NA_INTEGER && pout[pg[i]] < px[i])) pout[pg[i]] = px[i]; } } } SEXP fminC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0) { // if(tx == INTSXP) return ALTINTEGER_MIN(x, (Rboolean)narm); // if(tx == REALSXP) return ALTREAL_MIN(x, (Rboolean)narm); // error("ALTREP object must be integer or real typed"); // } SEXP out = PROTECT(allocVector(tx, ng == 0 ? 1 : ng)); switch(tx) { case REALSXP: fmin_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); break; case INTSXP: fmin_int_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); break; default: error("Unsupported SEXP type"); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); UNPROTECT(1); return out; } SEXP fminmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, ng == 0 ? col : col * ng)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) fmin_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) fmin_int_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } default: error("Unsupported SEXP type"); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } SEXP fminlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; // needed ?? if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)); const SEXP *px = SEXPPTR_RO(x); double *pout = REAL(out); for(int j = 0; j != l; ++j) pout[j] = asReal(fminC(px[j], Rng, g, Rnarm)); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fminC(px[j], Rng, g, Rnarm)); // if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } SEXP fmaxC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; // ALTREP methods for compact sequences: not safe yet and not part of the API. // if(ALTREP(x) && ng == 0) { // if(tx == INTSXP) return ALTINTEGER_MAX(x, (Rboolean)narm); // if(tx == REALSXP) return ALTREAL_MAX(x, (Rboolean)narm); // error("ALTREP object must be integer or real typed"); // } SEXP out = PROTECT(allocVector(tx, ng == 0 ? 1 : ng)); switch(tx) { case REALSXP: fmax_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); break; case INTSXP: fmax_int_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l); break; default: error("Unsupported SEXP type"); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); UNPROTECT(1); return out; } SEXP fmaxmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm); if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(tx, ng == 0 ? col : col * ng)); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) fmax_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } case INTSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) fmax_int_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } default: error("Unsupported SEXP type"); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } SEXP fmaxlC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; // needed ?? if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)); const SEXP *px = SEXPPTR_RO(x); double *pout = REAL(out); for(int j = 0; j != l; ++j) pout[j] = asReal(fmaxC(px[j], Rng, g, Rnarm)); setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fmaxC(px[j], Rng, g, Rnarm)); // if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } collapse/src/join.c0000644000176200001440000003400114777170131013754 0ustar liggesusers#include "collapse_c.h" // Needs to be first because includes OpenMP, to avoid namespace conflicts. #include "data.table.h" #include "kit.h" /* A Sort-Merge Join See: https://www.dcs.ed.ac.uk/home/tz/phd/thesis/node20.htm And: https://en.wikipedia.org/wiki/Sort-merge_join Note: this is only used in join(..., sort = TRUE), and expects that x was sorted by the join columns (done at R-level). The default hash join used with sort = FALSE is implemented in match.c */ // TODO: could add any_dup condition similar to fmatch() in while loop for j, i.e. any_dup = 1; // this would resemble the overid argument to fmatch(). // FIRST PASS void sort_merge_join_int(const int *restrict px, const int *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, tmp, otj; while (i != nx && j != nt) { otj = pot[j]; tmp = pt[otj]; if (px[i] == tmp) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && px[i] == tmp) { pres[i] = otj; pg[i] = g; } while (++j != nt && pt[pot[j]] == tmp) ptab[j] = g; } else if ((px[i] != NA_INTEGER && px[i] < tmp) || tmp == NA_INTEGER) { // NA_INTEGER is the smallest integer: assuming ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ++j; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } void sort_merge_join_double(const double *restrict px, const double *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, otj; double tmp; while (i != nx && j != nt) { otj = pot[j]; tmp = pt[otj]; if (REQUAL(px[i], tmp)) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && REQUAL(px[i], tmp)) { pres[i] = otj; pg[i] = g; } while (++j != nt && REQUAL(pt[pot[j]], tmp)) ptab[j] = g; } else if (px[i] < tmp || ISNAN(tmp)) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ++j; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } void sort_merge_join_string(const SEXP *restrict px, const SEXP *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, otj; SEXP tmp; while (i != nx && j != nt) { otj = pot[j]; tmp = pt[otj]; if (px[i] == tmp) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && px[i] == tmp) { pres[i] = otj; pg[i] = g; } while (++j != nt && pt[pot[j]] == tmp) ptab[j] = g; } else if (tmp == NA_STRING || (px[i] != NA_STRING && strcmp(CHAR(px[i]), CHAR(tmp)) < 0)) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ++j; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } void sort_merge_join_complex(const Rcomplex *restrict px, const Rcomplex *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, otj; Rcomplex xi, tmp; while (i != nx && j != nt) { otj = pot[j]; tmp = pt[otj]; xi = px[i]; if (CEQUAL(xi, tmp)) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && CEQUAL(px[i], tmp)) { pres[i] = otj; pg[i] = g; } while (++j != nt && CEQUAL(pt[pot[j]], tmp)) ptab[j] = g; } else if (xi.r < tmp.r || (xi.r == tmp.r && xi.i < tmp.i) || ISNAN(tmp.r) || ISNAN(tmp.i)) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ++j; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } // SECOND PASS void sort_merge_join_int_second(const int *restrict px, const int *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // previous matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, tmp, grj, otj; while (i != nx && j != nt) { if (pres[i] == NA_INTEGER) { ++i; continue; } grj = ptab[j]; if (grj == 0) { ++j; continue; } otj = pot[j]; tmp = pt[otj]; if (px[i] == tmp && pg[i] == grj) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && px[i] == tmp && pg[i] == grj) { pres[i] = otj; pg[i] = g; } while (++j != nt && pt[pot[j]] == tmp && ptab[j] == grj) ptab[j] = g; } else if (pg[i] < grj || (pg[i] == grj && ((px[i] != NA_INTEGER && px[i] < tmp) || tmp == NA_INTEGER))) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ptab[j++] = 0; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } void sort_merge_join_double_second(const double *restrict px, const double *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // previous matches and ordering vector for table const int nx, const int nt, int *restrict pres) // , int pass // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, grj, otj; double tmp; while (i != nx && j != nt) { if (pres[i] == NA_INTEGER) { ++i; continue; } grj = ptab[j]; if (grj == 0) { ++j; continue; } otj = pot[j]; tmp = pt[otj]; if (REQUAL(px[i], tmp) && pg[i] == grj) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && REQUAL(px[i], tmp) && pg[i] == grj) { pres[i] = otj; pg[i] = g; } while (++j != nt && REQUAL(pt[pot[j]], tmp) && ptab[j] == grj) ptab[j] = g; } else if (pg[i] < grj || (pg[i] == grj && (px[i] < tmp || ISNAN(tmp)))) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ptab[j++] = 0; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } void sort_merge_join_string_second(const SEXP *restrict px, const SEXP *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // previous matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, grj, otj; SEXP tmp; while (i != nx && j != nt) { if (pres[i] == NA_INTEGER) { ++i; continue; } grj = ptab[j]; if (grj == 0) { ++j; continue; } otj = pot[j]; tmp = pt[otj]; if (px[i] == tmp && pg[i] == grj) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && px[i] == tmp && pg[i] == grj) { pres[i] = otj; pg[i] = g; } while (++j != nt && pt[pot[j]] == tmp && ptab[j] == grj) ptab[j] = g; } else if (pg[i] < grj || (pg[i] == grj && (tmp == NA_STRING || (px[i] != NA_STRING && strcmp(CHAR(px[i]), CHAR(tmp)) < 0)))) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ptab[j++] = 0; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } void sort_merge_join_complex_second(const Rcomplex *restrict px, const Rcomplex *restrict pt, // Data pointers, decremented by 1 int *restrict pg, int *restrict ptab, const int *restrict pot, // previous matches and ordering vector for table const int nx, const int nt, int *restrict pres) // Sizes and result vector, pres should also be decremented by 1 { int i = 0, j = 0, g = 0, grj, otj; Rcomplex tmp, xi; while (i != nx && j != nt) { if (pres[i] == NA_INTEGER) { ++i; continue; } grj = ptab[j]; if (grj == 0) { ++j; continue; } otj = pot[j]; tmp = pt[otj]; xi = px[i]; if (CEQUAL(xi, tmp) && pg[i] == grj) { pres[i] = otj; pg[i] = ptab[j] = ++g; // This takes care of duplicates in x and table while (++i != nx && CEQUAL(px[i], tmp) && pg[i] == grj) { pres[i] = otj; pg[i] = g; } while (++j != nt && CEQUAL(pt[pot[j]], tmp) && ptab[j] == grj) ptab[j] = g; } else if (pg[i] < grj || (pg[i] == grj && (xi.r < tmp.r || (xi.r == tmp.r && xi.i < tmp.i) || ISNAN(tmp.r) || ISNAN(tmp.i)))) { // Ordering with na.last pg[i] = pres[i] = NA_INTEGER; ++i; } else ptab[j++] = 0; } while(i < nx) { pg[i] = pres[i] = NA_INTEGER; ++i; } } // R FUNCTION SEXP sort_merge_join(SEXP x, SEXP table, SEXP ot, SEXP count) { if(TYPEOF(x) != VECSXP || TYPEOF(table) != VECSXP) error("x and table need to be lists"); if(TYPEOF(ot) != INTSXP) error("ot needs to be integer"); if(length(x) == 0 || length(table) == 0) error("x and table need to have a non-zero number of columns"); // TODO: x and table could be atomic?? const int nx = length(VECTOR_ELT(x, 0)), nt = length(ot), *restrict pot = INTEGER(ot); if(length(VECTOR_ELT(table, 0)) != nt) error("nrow(table) must match length(ot)"); SEXP res = PROTECT(allocVector(INTSXP, nx)); int *restrict pres = INTEGER(res); int *pg = (int*)R_Calloc(nx, int); int *ptab = (int*)R_Calloc(nt, int); SEXP clist = PROTECT(coerce_to_equal_types(x, table)); // This checks that the lengths match const SEXP *pc = SEXPPTR_RO(clist); int l = length(clist); for (int i = 0; i < l; ++i) { const SEXP *pci = SEXPPTR_RO(pc[i]); switch(TYPEOF(pci[0])) { case INTSXP: case LGLSXP: if(i == 0) sort_merge_join_int(INTEGER_RO(pci[0]), INTEGER_RO(pci[1])-1, pg, ptab, pot, nx, nt, pres); else sort_merge_join_int_second(INTEGER_RO(pci[0]), INTEGER_RO(pci[1])-1, pg, ptab, pot, nx, nt, pres); break; case REALSXP: if(i == 0) sort_merge_join_double(REAL_RO(pci[0]), REAL_RO(pci[1])-1, pg, ptab, pot, nx, nt, pres); else sort_merge_join_double_second(REAL_RO(pci[0]), REAL_RO(pci[1])-1, pg, ptab, pot, nx, nt, pres); break; case STRSXP: if(i == 0) sort_merge_join_string(SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pci[0]))), SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pci[1])))-1, pg, ptab, pot, nx, nt, pres); else sort_merge_join_string_second(SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pci[0]))), SEXPPTR_RO(PROTECT(coerceUtf8IfNeeded(pci[1])))-1, pg, ptab, pot, nx, nt, pres); UNPROTECT(2); break; case CPLXSXP: if(i == 0) sort_merge_join_complex(COMPLEX_RO(pci[0]), COMPLEX_RO(pci[1])-1, pg, ptab, pot, nx, nt, pres); else sort_merge_join_complex_second(COMPLEX_RO(pci[0]), COMPLEX_RO(pci[1])-1, pg, ptab, pot, nx, nt, pres); break; default: error("Unsupported type for x/table: %s", type2char(TYPEOF(pci[0]))); } } R_Free(pg); R_Free(ptab); if(asLogical(count)) count_match(res, nt, NA_INTEGER); UNPROTECT(2); return res; } /* Helper to Perform Multi-Match Join The input is fmatch(x, y) and group(y, group.sizes = TRUE) */ SEXP multi_match(SEXP m, SEXP g) { SEXP gsR = getAttrib(g, sym_group_sizes); if(isNull(gsR)) error("Internal error: g needs to be a 'qG' type vector with a 'group.sizes' attribute."); const int ng = asInteger(getAttrib(g, sym_n_groups)), ngp = ng+1; if(ng != length(gsR)) error("'qG' vector is invalied, 'N.groups' attribute does not match 'group.sizes' attribute"); const int lm = length(m), l = length(g), lp = l+1, *gs = INTEGER(gsR)-1, *pm = INTEGER(m), *pg = INTEGER(g)-1; // No multiple matches, records are unique if(ng == l) return m; int n = 0; #pragma omp simd reduction(+:n) for(int i = 0; i < lm; ++i) n += pm[i] == NA_INTEGER ? 1 : gs[pg[pm[i]]]; if(n == lm) return m; // This just creates an ordering vector for g, could also use radixorder on y int *cgs = (int*)R_alloc(ng+2, sizeof(int)); cgs[1] = 1; for(int i = 1; i != ngp; ++i) cgs[i+1] = cgs[i] + gs[i]; int *restrict cnt = (int*)R_Calloc(ngp, int); int *po = (int*)R_alloc(l, sizeof(int)); --po; for(int i = 1; i != lp; ++i) po[cgs[pg[i]] + cnt[pg[i]]++] = i; R_Free(cnt); // Indices to duplicate x SEXP x_ind = PROTECT(allocVector(INTSXP, n)); // Indices to duplicate y (this is the normal fmatch(x, y) vector but now accounting for multiple matches) SEXP y_ind = PROTECT(allocVector(INTSXP, n)); int *px_ind = INTEGER(x_ind), *py_ind = INTEGER(y_ind); for(int i = 0, j = 0, q = 0, k = 0, s = 0; i != lm; ++i) { if(pm[i] == NA_INTEGER) { px_ind[j] = i+1; py_ind[j++] = NA_INTEGER; continue; } k = pg[pm[i]]; q = cgs[k]; s = q + gs[k]; while(q < s) { px_ind[j] = i+1; py_ind[j++] = po[q++]; } } if(isObject(m)) count_match(y_ind, l, NA_INTEGER); // SHALLOW_DUPLICATE_ATTRIB(y_ind, m); SEXP res = PROTECT(allocVector(VECSXP, 2)); SET_VECTOR_ELT(res, 0, x_ind); SET_VECTOR_ELT(res, 1, y_ind); UNPROTECT(3); return res; } collapse/src/ffirst.c0000644000176200001440000003264215122343124014310 0ustar liggesusers#include "collapse_c.h" // #include // #include // TODO: Implemented smarter copy names ?! // About Pointers // https://www.tutorialspoint.com/cprogramming/c_pointers.htm // https://www.tutorialspoint.com/cprogramming/c_pointer_arithmetic.htm // Use const ? SEXP ffirst_impl(SEXP x, int ng, SEXP g, int narm, int *gl) { int l = length(x), tx = TYPEOF(x), end = l-1; if (l < 2) return x; // Prevents seqfault for numeric(0) #101 if (ng == 0) { SEXP out = PROTECT(allocVector(tx, 1)); int j = 0; if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x); while(ISNAN(px[j]) && j != end) ++j; REAL(out)[0] = px[j]; break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); while(px[j] == NA_STRING && j != end) ++j; SET_STRING_ELT(out, 0, px[j]); break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x); while(px[j] == NA_INTEGER && j != end) ++j; INTEGER(out)[0] = px[j]; break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); while(length(px[j]) == 0 && j != end) ++j; SET_VECTOR_ELT(out, 0, px[j]); break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: REAL(out)[0] = REAL(x)[0]; break; case STRSXP: SET_STRING_ELT(out, 0, STRING_ELT(x, 0)); break; case INTSXP: case LGLSXP: INTEGER(out)[0] = INTEGER(x)[0]; break; case VECSXP: SET_VECTOR_ELT(out, 0, VECTOR_ELT(x, 0)); break; default: error("Unsupported SEXP type!"); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); if(!isNull(getAttrib(x, R_NamesSymbol))) namesgets(out, ScalarString(STRING_ELT(getAttrib(x, R_NamesSymbol), j))); UNPROTECT(1); return out; } else { // with groups if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng)); if(narm) { int ngs = 0, *pg = INTEGER(g); switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng; i--; ) pout[i] = NA_REAL; --pout; for(int i = 0; i != l; ++i) { if(NISNAN(px[i])) { // Fastest ??? if(ISNAN(pout[pg[i]])) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = NA_STRING; --pout; for(int i = 0; i != l; ++i) { if(px[i] != NA_STRING) { if(pout[pg[i]] == NA_STRING) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = NA_INTEGER; --pout; for(int i = 0; i != l; ++i) { if(px[i] != NA_INTEGER) { if(pout[pg[i]] == NA_INTEGER) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = R_NilValue; // R_NilValue or just leave empty ?? --pout; for(int i = 0; i != l; ++i) { if(length(px[i])) { if(pout[pg[i]] == R_NilValue) { pout[pg[i]] = px[i]; if(++ngs == ng) break; } } } break; } default: error("Unsupported SEXP type!"); } } else { // Old Implementation: With boolean array // bool gl[ng+1]; // memset(gl, 1, sizeof(bool) * (ng+1)); // for(int i = 0; i != l; ++i) { // if(gl[pg[i]]) { // gl[pg[i]] = false; // pout[pg[i]] = px[i]; // ++ngs; // if(ngs == ng) break; // } // } switch(tx) { case REALSXP: { double *px = REAL(x)-1, *pout = REAL(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? NA_REAL : px[gl[i]]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1, *pout = INTEGER(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? NA_INTEGER : px[gl[i]]; break; } case STRSXP:{ const SEXP *px = SEXPPTR_RO(x)-1; SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? NA_STRING : px[gl[i]]; break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x)-1; SEXP *pout = SEXPPTR(out); for(int i = ng; i--; ) pout[i] = gl[i] == NA_INTEGER ? R_NilValue : px[gl[i]]; break; } default: error("Unsupported SEXP type!"); } } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); // SHALLOW_DUPLICATE_ATTRIB(out, x); UNPROTECT(1); return out; } } SEXP ffirstC(SEXP x, SEXP Rng, SEXP g, SEXP gst, SEXP Rnarm) { int *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm); if(ng == 0 || narm) { pgl = &ng; // TO avoid Wmaybe uninitialized return ffirst_impl(x, ng, g, narm, pgl); } if(length(gst) != ng) { // Using C-Array -> Not a good idea, variable length arrays give note on gcc11 SEXP gl = PROTECT(allocVector(INTSXP, ng)); int *pg = INTEGER(g), lg = length(g); pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; // &gl[0]-1 Or gl-1; // Pointer to -1 array element (since g starts from 1): https://beginnersbook.com/2014/01/c-pointer-to-array-example/ // Above gives gcc11 issue !! (works with R INTEGER() pointer, not plain C array) for(int i = 0; i != lg; ++i) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i+1; // SEXP gl = PROTECT(allocVector(INTSXP, ng)); // memset(gl, 0, sizeof(int)*ng); // // int *pg = INTEGER(g); // pgl = INTEGER(gl)-1; // Pointer to -1 array element (since g starts from 1): https://beginnersbook.com/2014/01/c-pointer-to-array-example/ // for(int i = length(g); i--; ) if(!pgl[pg[i]]) pgl[pg[i]] = i; // Correct? even for first value ? // SEXP out = PROTECT(allocVector(INTSXP, ng)); // int *pout = INTEGER(out); // for(int i = ng; i--; ) pout[i] = pgl[i+1]; // UNPROTECT(1); // return out; // Checking pointer: appears to be correct... // UNPROTECT(1); // return gl; SEXP res = ffirst_impl(x, ng, g, narm, ++pgl); UNPROTECT(1); return res; } else return ffirst_impl(x, ng, g, narm, INTEGER(gst)); } SEXP ffirstlC(SEXP x, SEXP Rng, SEXP g, SEXP gst, SEXP Rnarm) { int l = length(x), *pgl, ng = asInteger(Rng), narm = asLogical(Rnarm), nprotect = 1; if(ng > 0 && !narm) { if(length(gst) != ng) { // Can't use integer array here because apparently it is removed by the garbage collector when passed to a new function SEXP gl = PROTECT(allocVector(INTSXP, ng)); ++nprotect; int *pg = INTEGER(g), lg = length(g); // gl[ng], pgl = INTEGER(gl); // pgl = &gl[0]; for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; for(int i = 0; i != lg; ++i) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i+1; ++pgl; } else pgl = INTEGER(gst); } else pgl = &l; // To avoid Wmaybe uninitialized.. // return ffirst_impl(VECTOR_ELT(x, 0), ng, g, narm, pgl); SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, ffirst_impl(px[j], ng, g, narm, pgl)); DFcopyAttr(out, x, ng); UNPROTECT(nprotect); return out; } // For matrix writing a separate function to increase efficiency. SEXP ffirstmC(SEXP x, SEXP Rng, SEXP g, SEXP gst, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), l = INTEGER(dim)[0], col = INTEGER(dim)[1], end = l-1; if (l < 2) return x; if (ng == 0) { SEXP out = PROTECT(allocVector(tx, col)); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0, i = 0; j != col; ++j) { while(ISNAN(px[i]) && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0, i = 0; j != col; ++j) { while(px[i] == NA_STRING && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0, i = 0; j != col; ++j) { while(px[i] == NA_INTEGER && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0, i = 0; j != col; ++j) { while(length(px[i]) == 0 && i != end) ++i; pout[j] = px[i]; px += l; i = 0; } break; } default: error("Unsupported SEXP type!"); } } else { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l]; break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l]; break; } case STRSXP: case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) pout[j] = px[j * l]; break; } default: error("Unsupported SEXP type!"); } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(1); return out; } else { // with groups int nprotect = 1; if(length(g) != l) error("length(g) must match nrow(X)"); SEXP out = PROTECT(allocVector(tx, ng * col)); int *pg = INTEGER(g); if(narm) { switch(tx) { case REALSXP: { double *px = REAL(x), *pout = REAL(out); for(int i = ng * col; i--; ) pout[i] = NA_REAL; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(NISNAN(px[i]) && ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng * col; i--; ) pout[i] = NA_STRING; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(px[i] != NA_STRING && pout[pg[i]] == NA_STRING) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x), *pout = INTEGER(out); for(int i = ng * col; i--; ) pout[i] = NA_INTEGER; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(px[i] != NA_INTEGER && pout[pg[i]] == NA_INTEGER) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x); SEXP *pout = SEXPPTR(out); for(int i = ng * col; i--; ) pout[i] = R_NilValue; --pout; for(int j = 0; j != col; ++j) { for(int i = 0; i != l; ++i) if(length(px[i]) && pout[pg[i]] == R_NilValue) pout[pg[i]] = px[i]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } } else { int *pgl; if(length(gst) != ng) { SEXP gl = PROTECT(allocVector(INTSXP, ng)); ++nprotect; // int gl[ng], *pgl; pgl = &gl[0]; pgl = INTEGER(gl); for(int i = ng; i--; ) pgl[i] = NA_INTEGER; --pgl; // gcc11 issue with plain array for(int i = 0; i != l; ++i) if(pgl[pg[i]] == NA_INTEGER) pgl[pg[i]] = i+1; ++pgl; } else pgl = INTEGER(gst); switch(tx) { case REALSXP: { double *px = REAL(x)-1, *pout = REAL(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? NA_REAL : px[pgl[i]]; px += l; pout += ng; } break; } case INTSXP: case LGLSXP: { int *px = INTEGER(x)-1, *pout = INTEGER(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? NA_INTEGER : px[pgl[i]]; px += l; pout += ng; } break; } case STRSXP: { const SEXP *px = SEXPPTR_RO(x)-1; SEXP *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? NA_STRING : px[pgl[i]]; px += l; pout += ng; } break; } case VECSXP: { const SEXP *px = SEXPPTR_RO(x)-1; SEXP *pout = SEXPPTR(out); for(int j = 0; j != col; ++j) { for(int i = ng; i--; ) pout[i] = pgl[i] == NA_INTEGER ? R_NilValue : px[pgl[i]]; px += l; pout += ng; } break; } default: error("Unsupported SEXP type!"); } } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(nprotect); return out; } } collapse/src/fprod.c0000644000176200001440000001704115122343124014121 0ustar liggesusers#include "collapse_c.h" // #include void fprod_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) { if(ng == 0) { long double prod; if(narm) { int j = l-1; while(ISNAN(px[j]) && j!=0) --j; prod = (long double)px[j]; if(j != 0) for(int i = j; i--; ) { if(NISNAN(px[i])) prod *= px[i]; // Fastest ? } } else { prod = 1.0; for(int i = 0; i != l; ++i) { if(ISNAN(px[i])) { prod = px[i]; break; } else { prod *= px[i]; } } } pout[0] = (double)prod; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) { if(NISNAN(px[i])) { // faster way to code this ? -> Not Bad at all if(ISNAN(pout[pg[i]])) pout[pg[i]] = px[i]; else pout[pg[i]] *= px[i]; } } } else { for(int i = ng; i--; ) pout[i] = 1.0; --pout; for(int i = l; i--; ) pout[pg[i]] *= px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } void fprod_weights_impl(double *pout, double *px, int ng, int *pg, double *pw, int narm, int l) { if(ng == 0) { long double prod; if(narm) { int j = l-1; while((ISNAN(px[j]) || ISNAN(pw[j])) && j!=0) --j; prod = px[j] * pw[j]; if(j != 0) for(int i = j; i--; ) { if(ISNAN(px[i]) || ISNAN(pw[i])) continue; prod *= px[i] * pw[i]; } } else { prod = 1.0; for(int i = 0; i != l; ++i) { if(ISNAN(px[i]) || ISNAN(pw[i])) { prod = px[i] + pw[i]; break; } else { prod *= px[i] * pw[i]; } } } pout[0] = (double)prod; } else { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ? --pout; for(int i = l; i--; ) { if(ISNAN(px[i]) || ISNAN(pw[i])) continue; if(ISNAN(pout[pg[i]])) pout[pg[i]] = px[i] * pw[i]; else pout[pg[i]] *= px[i] * pw[i]; } } else { for(int i = ng; i--; ) pout[i] = 1.0; --pout; for(int i = l; i--; ) pout[pg[i]] *= px[i] * pw[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } } // using long long internally is substantially faster than using doubles !! double fprod_int_impl(int *px, int narm, int l) { double prod; if(narm) { int j = l-1; while(px[j] == NA_INTEGER && j!=0) --j; prod = px[j]; if(j == 0 && px[j] == NA_INTEGER) return NA_REAL; for(int i = j; i--; ) if(px[i] != NA_INTEGER) prod *= px[i]; } else { prod = 1; for(int i = 0; i != l; ++i) { if(px[i] == NA_INTEGER) return NA_REAL; prod *= px[i]; } } return prod; } void fprod_int_g_impl(double *pout, int *px, int ng, int *pg, int narm, int l) { if(narm) { for(int i = ng; i--; ) pout[i] = NA_REAL; for(int i = l, gi; i--; ) { if(px[i] != NA_INTEGER) { gi = pg[i]-1; if(ISNAN(pout[gi])) pout[gi] = (double)px[i]; else pout[gi] *= px[i]; } } } else { for(int i = ng; i--; ) pout[i] = 1.0; --pout; for(int i = l; i--; ) pout[pg[i]] *= px[i]; // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered. } } SEXP fprodC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm) { int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asLogical(Rnarm), nprotect = 1; if (l < 1) return tx == REALSXP ? x : allocVector(REALSXP, 0); // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match length(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(REALSXP, ng == 0 ? 1 : ng)); if(isNull(w)) { switch(tx) { case REALSXP: fprod_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l); break; case INTSXP: { if(ng > 0) fprod_int_g_impl(REAL(out), INTEGER(x), ng, INTEGER(g), narm, l); else REAL(out)[0] = fprod_int_impl(INTEGER(x), narm, l); break; } default: error("Unsupported SEXP type"); } } else { if(l != length(w)) error("length(w) must match length(x)"); int tw = TYPEOF(w); SEXP xr, wr; double *px, *pw; if(tw != REALSXP) { if(tw != INTSXP && tw != LGLSXP) error("weights must be double or integer"); wr = PROTECT(coerceVector(w, REALSXP)); pw = REAL(wr); ++nprotect; } else pw = REAL(w); if(tx != REALSXP) { if(tx != INTSXP) error("x must be double or integer"); xr = PROTECT(coerceVector(x, REALSXP)); px = REAL(xr); ++nprotect; } else px = REAL(x); fprod_weights_impl(REAL(out), px, ng, INTEGER(g), pw, narm, l); } if(ANY_ATTRIB(x) && !(isObject(x) && inherits(x, "ts"))) copyMostAttrib(x, out); // For example "Units" objects... UNPROTECT(nprotect); return out; } SEXP fprodmC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop) { SEXP dim = getAttrib(x, R_DimSymbol); if(isNull(dim)) error("x is not a matrix"); int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g), ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asLogical(Rnarm), nprotect = 1; if (l < 1) return x; // Prevents seqfault for numeric(0) #101 if(ng && l != length(g)) error("length(g) must match nrow(x)"); if(tx == LGLSXP) tx = INTSXP; SEXP out = PROTECT(allocVector(REALSXP, ng == 0 ? col : col * ng)); double *pout = REAL(out); if(isNull(w)) { switch(tx) { case REALSXP: { double *px = REAL(x); for(int j = 0; j != col; ++j) fprod_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); break; } case INTSXP: { int *px = INTEGER(x); if(ng > 0) { for(int j = 0; j != col; ++j) fprod_int_g_impl(pout + j*ng1, px + j*l, ng, pg, narm, l); } else { for(int j = 0; j != col; ++j) pout[j] = fprod_int_impl(px + j*l, narm, l); } break; } default: error("Unsupported SEXP type"); } } else { if(l != length(w)) error("length(w) must match nrow(x)"); int tw = TYPEOF(w); SEXP xr, wr; double *px, *pw; if(tw != REALSXP) { if(tw != INTSXP && tw != LGLSXP) error("weights must be double or integer"); wr = PROTECT(coerceVector(w, REALSXP)); pw = REAL(wr); ++nprotect; } else pw = REAL(w); if(tx != REALSXP) { if(tx != INTSXP) error("x must be double or integer"); xr = PROTECT(coerceVector(x, REALSXP)); px = REAL(xr); ++nprotect; } else px = REAL(x); for(int j = 0; j != col; ++j) fprod_weights_impl(pout + j*ng1, px + j*l, ng, pg, pw, narm, l); } matCopyAttr(out, x, Rdrop, ng); UNPROTECT(nprotect); return out; } SEXP fprodlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop) { int l = length(x), ng = asInteger(Rng); if(l < 1) return x; // needed ?? if(ng == 0 && asLogical(Rdrop)) { SEXP out = PROTECT(allocVector(REALSXP, l)); const SEXP *px = SEXPPTR_RO(x); double *pout = REAL(out); for(int j = 0; j != l; ++j) pout[j] = REAL(fprodC(px[j], Rng, g, w, Rnarm))[0]; setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); UNPROTECT(1); return out; } SEXP out = PROTECT(allocVector(VECSXP, l)); const SEXP *px = SEXPPTR_RO(x); for(int j = 0; j != l; ++j) SET_VECTOR_ELT(out, j, fprodC(px[j], Rng, g, w, Rnarm)); // if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]); DFcopyAttr(out, x, ng); UNPROTECT(1); return out; } collapse/src/fscale.cpp0000644000176200001440000010223315113725373014614 0ustar liggesusers#include using namespace Rcpp; // Notes: // for mean there are 2 options: "overall.mean" = R_NegInf adds the overall mean. default is centering on 0, or centering on a mean provided, or FALSE = R_PosInf -> no centering, scaling preserves mean // for sd there is "within.sd" = R_NegInf, scaling by the frequency weighted within-group sd, default is 1, or scaling by a sd provided. // All other comments are in fvar.cpp (in C++ folder, not on GitHub) // [[Rcpp::export]] NumericVector fscaleCpp(const NumericVector& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, double set_mean = 0, double set_sd = 1) { // could set mean and sd with SEXP, but complicated... int l = x.size(); if(l < 1) return x; // Prevents seqfault for numeric(0) #101 NumericVector out = no_init_vector(l); // SHALLOW_DUPLICATE_ATTRIB(out, x); // Any speed loss or overwriting attributes ? if (Rf_isNull(w)) { // No weights if (ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); double n = 0, mean = 0, d1 = 0, M2 = 0; if(narm) { int j = l-1; while(std::isnan(x[j]) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i])) continue; d1 = x[i]-mean; mean += d1 * (1 / ++n); M2 += d1*(x[i]-mean); } M2 = set_sd/sqrt(M2/(n-1)); // good ? -> Yes, works ! } else { // use goto to make code simpler ? std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i])) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } else { d1 = x[i]-mean; mean += d1*(1 / ++n); M2 += d1*(x[i]-mean); } } M2 = set_sd/sqrt(M2/(l-1)); } if(std::isnan(M2)) { std::fill(out.begin(), out.end(), NA_REAL); } else { if(set_mean == 0) out = (x-mean)*M2; else if(set_mean == R_PosInf) out = (x-mean)*M2 + mean; // best ? // !R_FINITE(set_mean) else out = (x-mean)*M2 + set_mean; // best ? } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, gl_mean = 0; // Best way of doing this ? How can you declare variables in global scope ? // NumericVector mean = narm ? no_init_vector(ng) : NumericVector(ng); // works but valgrind issue // NumericVector M2 = narm ? NumericVector(ng, NA_REAL) : NumericVector(ng); // NumericVector n = narm ? NumericVector(ng, 1.0) : NumericVector(ng); NumericVector mean(ng), n(ng, (narm) ? 1.0 : 0.0), M2(ng, (narm) ? NA_REAL : 0.0); if(narm) { for(int i = l; i--; ) { if(std::isnan(x[i])) continue; if(std::isnan(M2[g[i]-1])) { mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } } else { int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { d1 = x[i]-mean[g[i]-1]; mean[g[i]-1] += d1 * (1 / ++n[g[i]-1]); M2[g[i]-1] += d1*(x[i]-mean[g[i]-1]); } } } if(set_sd == R_NegInf) { double within_sd = 0; int sum_n = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(n[i]-1)); gl_mean += mean[i]*n[i]; sum_n += n[i]; } gl_mean /= sum_n; } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(n[i]-1)); sum_n += n[i]; } gl_mean = set_mean; } within_sd = sqrt(within_sd/(sum_n-1)); M2 = M2 * within_sd; // fastest ? } else { if(set_mean == R_NegInf) { int sum_n = 0; for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; M2[i] = set_sd/sqrt(M2[i]/(n[i]-1)); gl_mean += mean[i]*n[i]; sum_n += n[i]; } gl_mean /= sum_n; } else { gl_mean = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = set_sd/sqrt(M2[i]/(n[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + mean[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + gl_mean; // best ? } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match length(x)"); if (ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); double sumw = 0, mean = 0, M2 = 0, d1 = 0; if(narm) { int j = l-1; while((std::isnan(x[j]) || std::isnan(wg[j]) || wg[j] == 0) && j!=0) --j; if(j != 0) { for(int i = j+1; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } } else { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { for(int i = 0; i != l; ++i) { if(std::isnan(x[i]) || std::isnan(wg[i])) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } else { if(wg[i] == 0) continue; sumw += wg[i]; d1 = x[i] - mean; mean += d1 * (wg[i] / sumw); M2 += wg[i] * d1 * (x[i] - mean); } } } M2 = set_sd/sqrt(M2/(sumw-1)); if(std::isnan(M2)) { std::fill(out.begin(), out.end(), NA_REAL); } else { if(set_mean == 0) out = (x-mean)*M2; else if(set_mean == R_PosInf) out = (x-mean)*M2 + mean; // best ? else out = (x-mean)*M2 + set_mean; // best ? } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); double d1 = 0, gl_mean = 0; // Best way of doing this ? How can you declare variables in overall scope ? // NumericVector M2 = narm ? NumericVector(ng, NA_REAL) : NumericVector(ng); NumericVector M2(ng, (narm) ? NA_REAL : 0.0), mean(ng), sumw(ng); // = narm ? no_init_vector(ng) : NumericVector(ng); // works but valgrind issues // NumericVector sumw = narm ? no_init_vector(ng) : NumericVector(ng); if(narm) { for(int i = l; i--; ) { if(std::isnan(x[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2[g[i]-1])) { sumw[g[i]-1] = wg[i]; mean[g[i]-1] = x[i]; M2[g[i]-1] = 0; } else { sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } } else { int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2[g[i]-1])) continue; if(std::isnan(x[i]) || std::isnan(wg[i])) { M2[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(out.begin(), out.end(), NA_REAL); SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } } else { if(wg[i] == 0) continue; sumw[g[i]-1] += wg[i]; d1 = x[i] - mean[g[i]-1]; mean[g[i]-1] += d1 * (wg[i] / sumw[g[i]-1]); M2[g[i]-1] += wg[i] * d1 * (x[i] - mean[g[i]-1]); } } } if(set_sd == R_NegInf) { double within_sd = 0, sum_sumw = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(sumw[i]-1)); gl_mean += mean[i]*sumw[i]; sum_sumw += sumw[i]; } gl_mean /= sum_sumw; } else { for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; within_sd += M2[i]; M2[i] = 1/sqrt(M2[i]/(sumw[i]-1)); sum_sumw += sumw[i]; } gl_mean = set_mean; } within_sd = sqrt(within_sd/(sum_sumw-1)); M2 = M2 * within_sd; // fastest ? } else { if(set_mean == R_NegInf) { double sum_sumw = 0; for(int i = ng; i--; ) { if(std::isnan(M2[i])) continue; M2[i] = set_sd/sqrt(M2[i]/(sumw[i]-1)); gl_mean += mean[i]*sumw[i]; sum_sumw += sumw[i]; } gl_mean /= sum_sumw; } else { gl_mean = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2[i])) M2[i] = set_sd/sqrt(M2[i]/(sumw[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + mean[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) out[i] = (x[i]-mean[g[i]-1])*M2[g[i]-1] + gl_mean; // best ? } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] NumericMatrix fscalemCpp(const NumericMatrix& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, double set_mean = 0, double set_sd = 1) { int l = x.nrow(), col = x.ncol(); NumericMatrix out = no_init_matrix(l, col); if (Rf_isNull(w)) { // No weights if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double nj = 0, meanj = 0, d1 = 0, M2j = 0; if(narm) { // faster using 2 loops over columns ? int k = l-1; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } M2j = set_sd/sqrt(M2j/(nj-1)); } else { std::fill(outj.begin(), outj.end(), NA_REAL); continue; // Necessary } } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i])) { M2j = NA_REAL; break; } else { d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } } M2j = set_sd/sqrt(M2j/(l-1)); } if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } } } else { // with groups if(g.size() != l) stop("length(g) must match nrow(X)"); // Better way ? NumericVector meanj(ng), nj(ng), M2j(ng); // NumericVector meanj = no_init_vector(ng), nj = no_init_vector(ng), M2j = no_init_vector(ng); // Works but valgrind issue for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double d1 = 0, gl_meanj = 0; if(narm) { // better do two loops ?? std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = l; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; nj[g[i]-1] = 1; } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = nj[i] = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend; } } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0; int sum_nj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); sum_nj += nj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_nj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { int sum_nj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend:; } } } else { // With weights NumericVector wg = w; if(l != wg.size()) stop("length(w) must match nrow(X)"); if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double sumwj = 0, meanj = 0, M2j = 0, d1 = 0; if(narm) { int k = l-1; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } else { std::fill(outj.begin(), outj.end(), NA_REAL); continue; // Necessary } } else { for(int i = 0; i != l; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } } M2j = set_sd/sqrt(M2j/(sumwj-1)); if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } } } else { // with groups and weights if(g.size() != l) stop("length(g) must match nrow(X)"); // Works but valgrind issue // NumericVector meanj = no_init_vector(ng), sumwj = no_init_vector(ng), M2j = no_init_vector(ng); NumericVector meanj(ng), sumwj(ng), M2j(ng); // better for valgrind for(int j = col; j--; ) { NumericMatrix::ConstColumn column = x( _ , j); NumericMatrix::Column outj = out( _ , j); double d1 = 0, gl_meanj = 0; if(narm) { std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = l; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = sumwj[i] = 0; int ngs = 0; for(int i = 0; i != l; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend2; } } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0, sum_sumwj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); sum_sumwj += sumwj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_sumwj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { double sum_sumwj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != l; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend2:; } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } // [[Rcpp::export]] List fscalelCpp(const List& x, int ng = 0, const IntegerVector& g = 0, const SEXP& w = R_NilValue, bool narm = true, double set_mean = 0, double set_sd = 1) { int l = x.size(); List out(l); if (Rf_isNull(w)) { // No weights if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = l; j--; ) { NumericVector column = x[j]; int row = column.size(); NumericVector outj = no_init_vector(row); double nj = 0, meanj = 0, d1 = 0, M2j = 0; if(narm) { int k = row-1; while(std::isnan(column[k]) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i])) continue; d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } M2j = set_sd/sqrt(M2j/(nj-1)); } else { std::fill(outj.begin(), outj.end(), NA_REAL); // outj = rep(NA_REAL, row); // fastest option ! (faster than std::fill) goto loopend; // Necessary } } else { for(int i = 0; i != row; ++i) { if(std::isnan(column[i])) { M2j = NA_REAL; break; } else { d1 = column[i]-meanj; meanj += d1 * (1 / ++nj); M2j += d1*(column[i]-meanj); } } M2j = set_sd/sqrt(M2j/(row-1)); } if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } loopend:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { // with groups int gss = g.size(); // Better way ? NumericVector meanj(ng), nj(ng), M2j(ng); // NumericVector meanj = no_init_vector(ng), nj = no_init_vector(ng), M2j = no_init_vector(ng); // Works but valgrind issue for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector outj = no_init_vector(gss); double d1 = 0, gl_meanj = 0; if(narm) { // better do two loops ? std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = gss; i--; ) { if(std::isnan(column[i])) continue; if(std::isnan(M2j[g[i]-1])) { meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; nj[g[i]-1] = 1; } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = nj[i] = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend2; } } else { d1 = column[i]-meanj[g[i]-1]; meanj[g[i]-1] += d1 * (1 / ++nj[g[i]-1]); M2j[g[i]-1] += d1*(column[i]-meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0; int sum_nj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(nj[i]-1)); sum_nj += nj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_nj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { int sum_nj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); gl_meanj += meanj[i]*nj[i]; sum_nj += nj[i]; } gl_meanj /= sum_nj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(nj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend2:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } else { // With weights NumericVector wg = w; int wgs = wg.size(); if(ng == 0) { if(set_sd == R_NegInf) stop("within.sd can only be calculated when a grouping vector is supplied"); if(set_mean == R_NegInf) stop("without groups, centering on the overall mean amounts to scaling without centering, so use mean = FALSE instead, or supply a grouping vector to subtract out group means."); for(int j = l; j--; ) { NumericVector column = x[j]; if(wgs != column.size()) stop("length(w) must match nrow(X)"); NumericVector outj = no_init_vector(wgs); double sumwj = 0, meanj = 0, M2j = 0, d1 = 0; if(narm) { int k = wgs-1; while((std::isnan(column[k]) || std::isnan(wg[k]) || wg[k] == 0) && k!=0) --k; if(k != 0) { for(int i = k+1; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } else { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend3; // Necessary } } else { for(int i = 0; i != wgs; ++i) { if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j = NA_REAL; break; } else { if(wg[i] == 0) continue; sumwj += wg[i]; d1 = column[i] - meanj; meanj += d1 * (wg[i] / sumwj); M2j += wg[i] * d1 * (column[i] - meanj); } } } M2j = set_sd/sqrt(M2j/(sumwj-1)); if(std::isnan(M2j)) { std::fill(outj.begin(), outj.end(), NA_REAL); } else { if(set_mean == 0) outj = (column-meanj)*M2j; else if(set_mean == R_PosInf) outj = (column-meanj)*M2j + meanj; // best ? else outj = (column-meanj)*M2j + set_mean; // best ? } loopend3:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } else { // with groups and weights int gss = g.size(); if(gss != wgs) stop("length(w) must match length(g)"); NumericVector meanj(ng), sumwj(ng), M2j(ng); // NumericVector meanj = no_init_vector(ng), sumwj = no_init_vector(ng), M2j = no_init_vector(ng); // Works but valgrind issue for(int j = l; j--; ) { NumericVector column = x[j]; if(gss != column.size()) stop("length(g) must match nrow(X)"); NumericVector outj = no_init_vector(gss); double d1 = 0, gl_meanj = 0; if(narm) { std::fill(M2j.begin(), M2j.end(), NA_REAL); for(int i = gss; i--; ) { if(std::isnan(column[i]) || std::isnan(wg[i]) || wg[i] == 0) continue; if(std::isnan(M2j[g[i]-1])) { sumwj[g[i]-1] = wg[i]; meanj[g[i]-1] = column[i]; M2j[g[i]-1] = 0; } else { sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } else { for(int i = ng; i--; ) meanj[i] = M2j[i] = sumwj[i] = 0; int ngs = 0; for(int i = 0; i != gss; ++i) { if(std::isnan(M2j[g[i]-1])) continue; if(std::isnan(column[i]) || std::isnan(wg[i])) { M2j[g[i]-1] = NA_REAL; ++ngs; if(ngs == ng) { std::fill(outj.begin(), outj.end(), NA_REAL); goto loopend4; } } else { if(wg[i] == 0) continue; sumwj[g[i]-1] += wg[i]; d1 = column[i] - meanj[g[i]-1]; meanj[g[i]-1] += d1 * (wg[i] / sumwj[g[i]-1]); M2j[g[i]-1] += wg[i] * d1 * (column[i] - meanj[g[i]-1]); } } } if(set_sd == R_NegInf) { // best way of coding ? Goes through all the if conditions for every column... double within_sdj = 0, sum_sumwj = 0; if(set_mean == R_NegInf) { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; within_sdj += M2j[i]; M2j[i] = 1/sqrt(M2j[i]/(sumwj[i]-1)); sum_sumwj += sumwj[i]; } gl_meanj = set_mean; } within_sdj = sqrt(within_sdj/(sum_sumwj-1)); M2j = M2j * within_sdj; // fastest ? } else { if(set_mean == R_NegInf) { double sum_sumwj = 0; for(int i = ng; i--; ) { if(std::isnan(M2j[i])) continue; M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); gl_meanj += meanj[i]*sumwj[i]; sum_sumwj += sumwj[i]; } gl_meanj /= sum_sumwj; } else { gl_meanj = set_mean; for(int i = ng; i--; ) if(!std::isnan(M2j[i])) M2j[i] = set_sd/sqrt(M2j[i]/(sumwj[i]-1)); } } if(set_mean == 0) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1]; } else if(set_mean == R_PosInf) { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + meanj[g[i]-1]; // best ? } else { for(int i = 0; i != gss; ++i) outj[i] = (column[i]-meanj[g[i]-1])*M2j[g[i]-1] + gl_meanj; // best ? } loopend4:; SHALLOW_DUPLICATE_ATTRIB(outj, column); out[j] = outj; } } } SHALLOW_DUPLICATE_ATTRIB(out, x); return out; } collapse/NAMESPACE0000644000176200001440000004455214777170130013314 0ustar liggesusersuseDynLib(collapse, .registration=TRUE) # , .fixes = "C_" importFrom(Rcpp, evalCpp) importFrom("graphics", "hist", "par", "plot") importFrom("grDevices", "rainbow") # importFrom("lfe", "demeanlist") importFrom("parallel", "mclapply") importFrom("utils", "packageVersion", "head", "tail", "capture.output") importFrom("stats", "as.formula", "complete.cases", "cor", "cov", "var", "pt", "setNames", "terms.formula", "ts", "ts.plot", "model.matrix.default", "quantile", ".lm.fit", "cov2cor") export(pivot) export(add_stub) export(rm_stub) export(all_identical) export(all_obj_equal) export(all_funs) # export(as.factor.GRP) export(as_factor_GRP) # export(as.factor_GRP) export(as_factor_qG) # export(as.factor_qG) export(atomic_elem) export(`atomic_elem<-`) export(B) export(fbetween) export(fbetween.data.frame) export(fbetween.default) export(fbetween.matrix) export(fselect) export(slt) export(`fselect<-`) export(`slt<-`) export(ss) export(fsubset) export(sbt) export(fsubset.data.frame) export(fsubset.default) export(fsubset.matrix) export(fsummarise) export(fsummarize) export(smr) export(ftransform) export(tfm) export(`ftransform<-`) export(`tfm<-`) export(ftransformv) export(tfmv) export(settransform) export(settfm) export(settransformv) export(settfmv) export(fmutate) export(mtt) export(fcompute) export(fcomputev) export(BY) export(BY.data.frame) export(BY.default) export(BY.matrix) export(cat_vars) export(`cat_vars<-`) export(char_vars) export(`char_vars<-`) export(collap) export(collapv) export(collapg) export(D) export(Dlog) export(dapply) export(date_vars) export(`date_vars<-`) # export(Date_vars) # export(`Date_vars<-`) # export(F) export(fFtest) export(fFtest.default) export(fact_vars) export(`fact_vars<-`) export(fdiff) export(fdiff.data.frame) export(fdiff.default) export(fdiff.matrix) export(ffirst) export(ffirst.data.frame) export(ffirst.default) export(ffirst.matrix) export(fgrowth) export(fgrowth.data.frame) export(fgrowth.default) export(fgrowth.matrix) export(flag) export(flag.data.frame) export(flag.default) export(flag.matrix) export(fcumsum) export(fcumsum.data.frame) export(fcumsum.default) export(fcumsum.matrix) export(flast) export(flast.data.frame) export(flast.default) export(flast.matrix) export(fmax) export(fmax.data.frame) export(fmax.default) export(fmax.matrix) export(fmean) export(fmean.data.frame) export(fmean.default) export(fmean.matrix) export(fmedian) export(fmedian.data.frame) export(fmedian.default) export(fmedian.matrix) export(fnth) export(fnth.data.frame) export(fnth.default) export(fnth.matrix) export(fmin) export(fmin.data.frame) export(fmin.default) export(fmin.matrix) export(fmode) export(fmode.data.frame) export(fmode.default) export(fmode.matrix) export(fndistinct) export(fndistinct.data.frame) export(fndistinct.default) export(fndistinct.matrix) export(fNdistinct) # export(fNdistinct.data.frame) # export(fNdistinct.default) # export(fNdistinct.matrix) export(fnobs) export(fnobs.data.frame) export(fnobs.default) export(fnobs.matrix) export(fNobs) # export(fNobs.data.frame) # export(fNobs.default) # export(fNobs.matrix) export(varying) export(varying.data.frame) export(varying.default) export(varying.matrix) export(fprod) export(fprod.data.frame) export(fprod.default) export(fprod.matrix) export(fscale) export(fscale.data.frame) export(fscale.default) export(fscale.matrix) export(fsd) export(fsd.data.frame) export(fsd.default) export(fsd.matrix) export(fsum) export(fsum.data.frame) export(fsum.default) export(fsum.matrix) export(fvar) export(fvar.data.frame) export(fvar.default) export(fvar.matrix) export(G) export(get_elem) export(get_vars) export(`get_vars<-`) export(gv) export(gvr) export(`gv<-`) export(`gvr<-`) export(add_vars) export(av) export(`add_vars<-`) export(`av<-`) export(radixorder) export(radixorderv) export(seqid) export(timeid) export(is_irregular) export(groupid) export(GRP) export(GRP.default) export(fgroup_by) export(gby) export(group_by_vars) export(fgroup_vars) export(fungroup) export(findex_by) export(iby) export(findex) export(ix) export(reindex) export(unindex) export(to_plm) export(GRPnames) export(GRPN) export(GRPid) export(fcount) export(fcountv) export(fslice) export(fslicev) # export(group_names.GRP) export(has_elem) export(flm) export(flm.default) export(cinv) export(vec) export(HDB) export(fhdbetween) export(fhdbetween.default) export(fhdbetween.matrix) export(fhdbetween.data.frame) export(fHDbetween) # export(fHDbetween.default) # export(fHDbetween.matrix) # export(fHDbetween.data.frame) export(HDW) export(fhdwithin) export(fhdwithin.default) export(fhdwithin.matrix) export(fhdwithin.data.frame) export(fHDwithin) # export(fHDwithin.default) # export(fHDwithin.matrix) # export(fHDwithin.data.frame) export(irreg_elem) export(is_categorical) export(is_date) export(is_GRP) export(is_qG) export(is_unlistable) # export(is.categorical) # export(is.Date) # export(is.GRP) # export(is.qG) # export(is.unlistable) # export(is.regular) export(L) export(ldepth) export(list_elem) export(`list_elem<-`) export(logi_vars) export(`logi_vars<-`) export(mctl) export(mrtl) export(namlab) export(num_vars) export(`num_vars<-`) export(nv) export(`nv<-`) export(psacf) export(psacf.default) export(psacf.data.frame) export(pspacf) export(pspacf.default) export(pspacf.data.frame) export(psccf) export(psccf.default) export(psmat) export(psmat.default) export(psmat.data.frame) export(plot.psmat) export(qDF) export(qDT) export(qTBL) export(qF) export(qG) export(qM) export(qsu) export(qsu.default) export(qsu.matrix) export(qsu.data.frame) export(qtab) export(qtable) export(descr) export(descr.default) export(rapply2d) export(t_list) export(gsplit) export(greorder) export(rsplit) export(rsplit.default) export(rsplit.matrix) export(rsplit.data.frame) export(fdroplevels) export(fdroplevels.factor) export(fdroplevels.data.frame) export(reg_elem) export(STD) export(TRA) export(setTRA) export(TRA.data.frame) export(TRA.default) export(TRA.matrix) export(unlist2d) export(vlabels) export(vclasses) export(vtypes) export(vlengths) export(vgcd) export(`vlabels<-`) export(setLabels) export(W) export(fwithin) export(fwithin.data.frame) export(fwithin.default) export(fwithin.matrix) export(seq_row) export(seq_col) export(.c) export(setRownames) export(setColnames) export(setDimnames) export(unattrib) # export(setAttr) export(setAttrib) export(setattrib) export(copyAttrib) export(copyMostAttrib) export(pwcor) export(pwcov) export(pwnobs) # export(pwNobs) export(whichv) export(`%==%`) export(`%!=%`) export(whichNA) export(copyv) export(setv) export(setop) export(`%+=%`) export(`%-=%`) export(`%*=%`) export(`%/=%`) export(alloc) export(frange) export(.range) export(fquantile) export(.quantile) export(fdist) export(allv) export(anyv) export(allNA) export(missing_cases) export(na_rm) export(na_locf) export(na_focb) export(na_omit) export(na_insert) export(massign) export(`%=%`) export(`%rr%`) export(`%r+%`) export(`%r-%`) export(`%r*%`) export(`%r/%`) export(`%cr%`) export(`%c+%`) export(`%c-%`) export(`%c*%`) export(`%c/%`) export(join) export(fmatch) export(ckmatch) export(`%!in%`) export(`%iin%`) export(`%!iin%`) # export(Recode) export(recode_num) export(recode_char) export(replace_na) export(replace_NA) export(pad) export(replace_inf) export(replace_Inf) # export(replace_non_finite) export(replace_outliers) export(print.qsu) export(print.pwcor) export(print.pwcov) export(fnlevels) export(roworder) export(roworderv) export(rowbind) export(frename) export(rnm) export(setrename) export(relabel) export(setrelabel) export(colorder) export(colorderv) export(group) export(groupv) export(funique) export(funique.default) export(funique.data.frame) export(fnunique) export(fduplicated) export(any_duplicated) export(finteraction) export(itn) export(fnrow) export(fncol) export(fdim) export(as_numeric_factor) export(as_integer_factor) export(as_character_factor) # export(as.numeric_factor) # export(as.character_factor) # export(.NA_RM) export(.FAST_FUN) export(.FAST_STAT_FUN) export(.OPERATOR_FUN) export(.COLLAPSE_TOPICS) export(.COLLAPSE_ALL) export(.COLLAPSE_GENERIC) export(.COLLAPSE_DATA) export(.COLLAPSE_OLD) export(set_collapse) export(get_collapse) S3method(B, data.frame) S3method(B, list) S3method(B, default) S3method(B, grouped_df) S3method(B, matrix) S3method(B, zoo) S3method(B, units) S3method(B, pdata.frame) S3method(B, pseries) S3method(fbetween, data.frame) S3method(fbetween, list) S3method(fbetween, default) S3method(fbetween, grouped_df) S3method(fbetween, matrix) S3method(fbetween, zoo) S3method(fbetween, units) S3method(fbetween, pdata.frame) S3method(fbetween, pseries) S3method(fsubset, data.frame) S3method(fsubset, pseries) S3method(fsubset, pdata.frame) S3method(fsubset, grouped_df) S3method(fsubset, default) S3method(fsubset, matrix) S3method(fsubset, zoo) S3method(fsubset, units) S3method(rsplit, default) S3method(rsplit, matrix) S3method(rsplit, data.frame) S3method(rsplit, zoo) S3method(rsplit, units) S3method(fdroplevels, default) S3method(fdroplevels, factor) S3method(fdroplevels, list) S3method(fdroplevels, data.frame) S3method(BY, data.frame) S3method(BY, list) S3method(BY, default) S3method(BY, grouped_df) S3method(BY, matrix) S3method(BY, zoo) S3method(BY, units) S3method(D, data.frame) S3method(D, list) S3method(D, default) S3method(D, expression) S3method(D, call) S3method(D, name) S3method(D, grouped_df) S3method(D, matrix) S3method(D, zoo) S3method(D, units) S3method(D, pdata.frame) S3method(D, pseries) S3method(Dlog, data.frame) S3method(Dlog, list) S3method(Dlog, default) S3method(Dlog, grouped_df) S3method(Dlog, matrix) S3method(Dlog, zoo) S3method(Dlog, units) S3method(Dlog, pdata.frame) S3method(Dlog, pseries) S3method(fdiff, data.frame) S3method(fdiff, list) S3method(fdiff, default) S3method(fdiff, grouped_df) S3method(fdiff, matrix) S3method(fdiff, zoo) S3method(fdiff, units) S3method(fdiff, pdata.frame) S3method(fdiff, pseries) S3method(ffirst, data.frame) S3method(ffirst, list) S3method(ffirst, default) S3method(ffirst, grouped_df) S3method(ffirst, matrix) S3method(ffirst, zoo) S3method(ffirst, units) S3method(fgrowth, data.frame) S3method(fgrowth, list) S3method(fgrowth, default) S3method(fgrowth, grouped_df) S3method(fgrowth, matrix) S3method(fgrowth, zoo) S3method(fgrowth, units) S3method(fgrowth, pdata.frame) S3method(fgrowth, pseries) S3method(flag, data.frame) S3method(flag, list) S3method(flag, default) S3method(flag, grouped_df) S3method(flag, matrix) S3method(flag, zoo) S3method(flag, units) S3method(flag, pdata.frame) S3method(flag, pseries) S3method(fcumsum, data.frame) S3method(fcumsum, list) S3method(fcumsum, default) S3method(fcumsum, grouped_df) S3method(fcumsum, matrix) S3method(fcumsum, zoo) S3method(fcumsum, units) S3method(fcumsum, pdata.frame) S3method(fcumsum, pseries) S3method(flast, data.frame) S3method(flast, list) S3method(flast, default) S3method(flast, grouped_df) S3method(flast, matrix) S3method(flast, zoo) S3method(flast, units) S3method(fmax, data.frame) S3method(fmax, list) S3method(fmax, default) S3method(fmax, grouped_df) S3method(fmax, matrix) S3method(fmax, zoo) S3method(fmax, units) S3method(fmean, data.frame) S3method(fmean, list) S3method(fmean, default) S3method(fmean, grouped_df) S3method(fmean, matrix) S3method(fmean, zoo) S3method(fmean, units) S3method(fmedian, data.frame) S3method(fmedian, list) S3method(fmedian, default) S3method(fmedian, grouped_df) S3method(fmedian, matrix) S3method(fmedian, zoo) S3method(fmedian, units) S3method(fnth, data.frame) S3method(fnth, list) S3method(fnth, default) S3method(fnth, grouped_df) S3method(fnth, matrix) S3method(fnth, zoo) S3method(fnth, units) S3method(fmin, data.frame) S3method(fmin, list) S3method(fmin, default) S3method(fmin, grouped_df) S3method(fmin, matrix) S3method(fmin, zoo) S3method(fmin, units) S3method(fmode, data.frame) S3method(fmode, list) S3method(fmode, default) S3method(fmode, grouped_df) S3method(fmode, matrix) S3method(fmode, zoo) S3method(fmode, units) S3method(fndistinct, data.frame) S3method(fndistinct, list) S3method(fndistinct, default) S3method(fndistinct, grouped_df) S3method(fndistinct, matrix) S3method(fndistinct, zoo) S3method(fndistinct, units) S3method(fNdistinct, data.frame) S3method(fNdistinct, default) S3method(fNdistinct, matrix) S3method(funique, data.frame) S3method(funique, list) S3method(funique, sf) S3method(funique, default) # S3method(funique, grouped_df) S3method(funique, pseries) S3method(funique, pdata.frame) S3method(fnobs, data.frame) S3method(fnobs, list) S3method(fnobs, default) S3method(fnobs, grouped_df) S3method(fnobs, matrix) S3method(fnobs, zoo) S3method(fnobs, units) S3method(fNobs, data.frame) S3method(fNobs, default) S3method(fNobs, matrix) S3method(varying, data.frame) S3method(varying, pdata.frame) S3method(varying, pseries) S3method(varying, list) S3method(varying, sf) S3method(varying, default) S3method(varying, grouped_df) S3method(varying, matrix) S3method(varying, zoo) S3method(varying, units) S3method(fprod, data.frame) S3method(fprod, list) S3method(fprod, default) S3method(fprod, grouped_df) S3method(fprod, matrix) S3method(fprod, zoo) S3method(fprod, units) S3method(fscale, data.frame) S3method(fscale, list) S3method(fscale, default) S3method(fscale, grouped_df) S3method(fscale, matrix) S3method(fscale, zoo) S3method(fscale, units) S3method(fscale, pdata.frame) S3method(fscale, pseries) S3method(fsd, data.frame) S3method(fsd, list) S3method(fsd, default) S3method(fsd, grouped_df) S3method(fsd, matrix) S3method(fsd, zoo) S3method(fsd, units) S3method(fsum, data.frame) S3method(fsum, list) S3method(fsum, default) S3method(fsum, grouped_df) S3method(fsum, matrix) S3method(fsum, zoo) S3method(fsum, units) S3method(fvar, data.frame) S3method(fvar, list) S3method(fvar, default) S3method(fvar, grouped_df) S3method(fvar, matrix) S3method(fvar, zoo) S3method(fvar, units) S3method(G, data.frame) S3method(G, list) S3method(G, default) S3method(G, grouped_df) S3method(G, matrix) S3method(G, zoo) S3method(G, units) S3method(G, pdata.frame) S3method(G, pseries) S3method(GRP, default) S3method(GRP, GRP) S3method(GRP, factor) S3method(GRP, grouped_df) S3method(GRP, pdata.frame) S3method(GRP, pseries) S3method(GRP, qG) S3method(HDB, data.frame) S3method(HDB, default) S3method(HDB, matrix) S3method(HDB, zoo) S3method(HDB, units) S3method(HDB, pdata.frame) S3method(HDB, pseries) S3method(HDB, list) S3method(fhdbetween, default) S3method(fhdbetween, matrix) S3method(fhdbetween, zoo) S3method(fhdbetween, units) S3method(fhdbetween, data.frame) S3method(fhdbetween, pdata.frame) S3method(fhdbetween, list) S3method(fhdbetween, pseries) S3method(fHDbetween, default) S3method(fHDbetween, matrix) S3method(fHDbetween, data.frame) S3method(HDW, list) S3method(HDW, data.frame) S3method(HDW, default) S3method(HDW, matrix) S3method(HDW, zoo) S3method(HDW, units) S3method(HDW, pdata.frame) S3method(HDW, pseries) S3method(fhdwithin, default) S3method(fhdwithin, matrix) S3method(fhdwithin, zoo) S3method(fhdwithin, units) S3method(fhdwithin, data.frame) S3method(fhdwithin, pdata.frame) S3method(fhdwithin, list) S3method(fhdwithin, pseries) S3method(fHDwithin, default) S3method(fHDwithin, matrix) S3method(fHDwithin, data.frame) S3method(L, data.frame) S3method(L, list) S3method(L, default) S3method(L, grouped_df) S3method(L, matrix) S3method(L, zoo) S3method(L, units) S3method(L, pdata.frame) S3method(L, pseries) S3method(length, GRP) S3method(plot, GRP) S3method(print, GRP) S3method(print, GRP_df) # S3method(head, GRP_df) # S3method(tail, GRP_df) S3method(print, indexed_frame) S3method(print, indexed_series) S3method(print, index_df) S3method(print, qsu) S3method(print, descr) S3method(print, pwcor) S3method(print, pwcov) S3method(print, fFtest) S3method(print, psmat) S3method(print, invisible) S3method(aperm, psmat) S3method(aperm, qsu) S3method('[', psmat) S3method('[', qsu) S3method('[', descr) S3method('[', pwcor) S3method('[', pwcov) S3method('[', GRP_df) S3method('[[', GRP_df) S3method('[<-', GRP_df) S3method('[[<-', GRP_df) S3method('names<-', GRP_df) S3method('[', indexed_series) S3method(Math, indexed_series) S3method(Ops, indexed_series) S3method('[', indexed_frame) S3method('[', index_df) S3method('[[', indexed_frame) S3method('$', indexed_frame) S3method('[<-', indexed_frame) S3method('[[<-', indexed_frame) S3method('$<-', indexed_frame) S3method(as.data.frame, descr) S3method(as.data.frame, qsu) S3method(psacf, data.frame) S3method(psacf, default) S3method(psacf, pdata.frame) S3method(psacf, pseries) S3method(psccf, default) S3method(psccf, pseries) S3method(psmat, data.frame) S3method(psmat, default) S3method(psmat, pdata.frame) S3method(psmat, pseries) S3method(plot, psmat) S3method(pspacf, data.frame) S3method(pspacf, default) S3method(pspacf, pdata.frame) S3method(pspacf, pseries) S3method(qsu, data.frame) S3method(qsu, default) S3method(qsu, matrix) S3method(qsu, zoo) S3method(qsu, units) S3method(qsu, grouped_df) S3method(qsu, pdata.frame) S3method(qsu, list) S3method(qsu, sf) S3method(qsu, pseries) S3method(descr, default) S3method(descr, grouped_df) S3method(STD, data.frame) S3method(STD, list) S3method(STD, default) S3method(STD, grouped_df) S3method(STD, matrix) S3method(STD, zoo) S3method(STD, units) S3method(STD, pdata.frame) S3method(STD, pseries) S3method(TRA, data.frame) S3method(TRA, list) S3method(TRA, default) S3method(TRA, grouped_df) S3method(TRA, matrix) S3method(TRA, zoo) S3method(TRA, units) S3method(W, data.frame) S3method(W, list) S3method(W, default) S3method(W, grouped_df) S3method(W, matrix) S3method(W, zoo) S3method(W, units) S3method(W, pdata.frame) S3method(W, pseries) S3method(fwithin, data.frame) S3method(fwithin, list) S3method(fwithin, default) S3method(fwithin, grouped_df) S3method(fwithin, matrix) S3method(fwithin, zoo) S3method(fwithin, units) S3method(fwithin, pdata.frame) S3method(fwithin, pseries) collapse/LICENSE0000644000176200001440000010605114777170130013073 0ustar liggesusersThis is free software licensed under a GNU General Public License 2.0 (GPL-2.0), and may be redistributed and/or modified under the terms of this license. However this software includes modified C-code from the data.table package (http://r-datatable.com) which is licensed under the weaker Mozilla Public License 2.0 (MPL-2.0) license. Any modification of these source files requires preservation of the MPL-2.0 license. The license statements for GPL-2.0 and MPL-2.0 are provided below. The MPL-2.0 License applies to the following files: src/data.table.h src/data.table_init.c src/data.table_rbindlist.c src/data.table_subset.c src/data.table_utils.c The rest is licensed GPL-2.0. ============================================================================================ ******************************************************************************************** GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ============================================================================================ ******************************************************************************************** Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. collapse/NEWS.md0000644000176200001440000043552415202504365013171 0ustar liggesusers# collapse 2.1.7 * The *collapse* article is now published in the Journal of Statistical Software: https://doi.org/10.18637/jss.v116.i01. This article is now the primary citation for academic use of *collapse*. It is also a great reference to quickly and thoroughly understand the package. `citation("collapse")` was also updated in this regard. The APA-style citation is: Krantz, S. (2026). **collapse**: Advanced and fast statistical computing and data transformation in R. *Journal of Statistical Software, 116*(1), 1–38. [https://doi.org/10.18637/jss.v116.i01](https://doi.org/10.18637/jss.v116.i01) * Performance improvements to `fsum()` and `fmean()` in the non-grouped case through multiple-accumulator SIMD optimizations, particularly benefiting systems without OpenMP support. `fsum()` sees ~2x speedup and `fmean()` ~7x speedup on such systems, with smaller but notable gains on systems with OpenMP. Thanks @TylerSagendorf for the implementation and benchmarking (#824, #828, #832, #833). * `GRP.default()` gains a `drop = TRUE` argument. Setting `drop = FALSE` (and providing at least one factor among the grouping columns) retains all combinations of factor levels with the observed unique values of non-factor grouping columns---the full Cartesian product---similar to `dplyr::group_by(.drop = FALSE)`. Unobserved combinations get `group.sizes` of `0` and `group.starts` of `0L`. Correspondingly, `fgroup_by()`/`gby()` gain a `.drop` argument, and `fcount()`/`fcountv()`/`collap()`/`collapv()` gain a `drop` argument, enabling counts and aggregations that retain empty groups. Thanks @egoipse for the feature request (#820, #839). * Fixed a bug in `descr()` that caused R to crash with a segmentation fault when called on zero-row data frames. Thanks @hatschibratschi for reporting (#831). * Consistency with internal updates to *data.table* regarding growable vectors. Thanks @aitap (#809). * Force C locale sorting in internal operations for consistency. Thanks @MichaelChirico (#815). * Added contributor Ivan Krylov (@aitap) to package authors. # collapse 2.1.6 * The repo has moved to `fastverse/collapse` and the website to [fastverse.org/collapse](https://fastverse.org/collapse/)---for better visibility and maintenance. Appropriate redirects from the old repo/site have been implemented. Selected people now have access to the repo through the organization account and may respond to issues or submit fixes. * Added new AI-generated interactive/chattable [DeepWiki documentation](https://deepwiki.com/fastverse/collapse). * *collapse* now treats `-0` and `0` as the same value in hash functions (`funique()`, `group()`, `fmatch()`, `fndistinct()`, `fmode()`, and all higher-level derivatives). This is implemented by adding a value of `0.0` to double values before hashing them, and has a small (~3%) performance penalty when hashing doubles. It is implemented in synch with an [equivalent change in *Rcpp*](https://github.com/RcppCore/Rcpp/issues/1340). Thanks @mayer79 for reporting and helping with benchmarking the performance implications (#648). * Fixed a bug in `pivot(..., how = "wider", FUN = "sum")` (using internal sum function) when columns to aggregate were integer typed. Thanks @ummel (#803). * Fixed a bug in `roworderv(..., neworder = indices)`, which segfaulted if `indices` were out of range. Thanks @JanMarvin (#807). * Faster installation from source thanks to the `#include ` option in *Rcpp* which loads only part of the header files. Thanks @eddelbuettel for the hint. * Consistency with internal updates to *data.table*. Thanks @aitap (https://github.com/fastverse/collapse/pull/809, https://github.com/Rdatatable/data.table/issues/7497). # collapse 2.1.5 * Fixed small bugs/strange behavior in `collap()` when `g` was passed externally (as columns or *GRP* object). E.g., in `collap(x, g, w = ~ col)`, where `g` is a *GRP* object, the weights were aggregated twice: once using `FUN` (incorrect) and once using `wFUN`. # collapse 2.1.4 * *collapse* now has a custom internal version of `unlist()` with better attribute preservation capabilities and a slight speed improvement. Thanks @aidanhorn (#785). * Fixes (#794) -- thanks @kendonB for reporting and making an effort to create a reprex. # collapse 2.1.3 * Various bug fixes (#769, #772 and #779). # collapse 2.1.2 * `na_insert()` has new argument `set` to do this by reference. * Some moderate performance improvements to `gsplit()`/`BY()` and `pivot()`. # collapse 2.1.1 * `alloc(list(1), 2)` now gives `list(1, 1)` instead of `list(list(1), list(1))`, which can still be generated with `alloc(list(1), 2, simplify = FALSE)`. This change also affects `ftransform()`/`fmutate()`, making, e.g., `fmutate(data, y = list(1))` consistent with `dplyr::mutate(data, y = list(1))`. Thanks @MattAFiedler (#753). * `fslice()` now works with *sf* data frames. # collapse 2.1.0 *collapse* 2.1.0, released in March 2025, introduces a fast slicing function, an improved weighted quantile algorithm, a few convenience features, and removes some legacy functions from the package. ### Potentially breaking changes * Functions `pwNobs`, `as.factor_GRP`, `as.factor_qG`, `is.GRP`, `is.qG`, `is.unlistable`, `is.categorical`, `is.Date`, `as.numeric_factor`, `as.character_factor`, and `Date_vars`, which were renamed in v1.6.0 by either replacing '.' with '_' or using all lower-case letters, and depreciated since then, are now finally removed from the package. * `num_vars()` (and thus also `cat_vars()` and `collap()`) were changed to a simpler C-definition of numeric data types which is more in-line with `is.numeric()`: `is_numeric_C <- function(x) typeof(x) %in% c("integer", "double") && !inherits(x, c("factor", "Date", "POSIXct", "yearmon", "yearqtr"))`. The previous definition was: `is_numeric_C_old <- function(x) typeof(x) %in% c("integer", "double") && (!is.object(x) || inherits(x, c("ts", "units", "integer64")))`. Thus, the definition changed from including only certain classes to excluding the most important classes. Thanks @maouw for flagging this (#727). ### Bug Fixes * Fixed some issues using *collapse* and the *tidyverse* together, particularly regarding tidyverse methods for 'grouped_df' - thanks @NicChr (#645). * More consistent handling of zero-length inputs - they are now also returned in `fmean()` and `fmedian()`/`fnth()` instead of returning `NA` (#628). ### Additions * Added function `fslice()`: a fast alternative to `dplyr::slice_[head|tail|min|max]` that also works with matrices. Thanks @alinacherkas for the proposal and initial implementation (#725). * Added function `groupv()` as programmers version of `group()`, or rather, `groupv()` is now identical to the former `group()`, and `group()` now supports multiple vectors as input e.g. `group(v1, v2)`. This is done for convenience and consistency with `radixorder[v]()`. For backwards compatibility, `group()` also supports a single list as input. * `join()` has a new argument `require` allowing the user to generate messages or errors if the join operation is not successful enough: ``` r join(df1, df2, require = list(x = 0.8, fail = "warning")) #> Warning: Matched 75.0% of records in table df1 (x), but 80.0% is required #> left join: df1[id1, id2] 3/4 (75%) <1:1st> df2[id1, id2] 3/4 (75%) #> id1 id2 name age salary dept #> 1 1 a John 35 60000 IT #> 2 1 b Jane 28 NA #> 3 2 b Bob 42 55000 Marketing #> 4 3 c Carl 50 70000 Sales ``` * `psmat()` now has a `fill` argument to fill empty slots in matrix/array with other elements (default `NULL`/`NA`). ### Improvements * The weighted quantile algorithm in `fquantile()`/`fnth()` was improved to a more theoretically sound method following [excellent notes](https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html) by [Matthew Kay](https://github.com/mjskay). It now also supports quantile type 4, but it does not skip zero weights anymore, as the new algorithm makes it difficult to skip them 'on the fly'. *Note* that the existing *collapse* algorithm [already had very good](https://github.com/mjskay/uncertainty-examples/issues/2) properties after a bug fix in v2.0.17, but the new algorithm is more exact and also faster. * The *collapse* [**arXiv article**](https://arxiv.org/abs/2403.05038) has been updated and significantly enhanced. It is an excellent resource to get an overview of the package. ### Notes * On CRAN, collapse R dependency was changed to >= 4.1.0 to be able to use the base pipe in examples without generating a NOTE on R CMD check (another absolutely unnecessary restriction). The package depends on R >= 3.5.0 and the DESCRIPTION file on GitHub/R-universe will continue to reflect this. # collapse 2.0.19 * `fmatch(factor(NA), NA)` now gives `1` instead of `NA`. Thanks @NicChr (#675). * New developer focused vignette on [developing with *collapse*](https://fastverse.org/collapse/articles/developing_with_collapse.html). * Fixed minor CRAN issues (#676, #702). * Fixed bug with integer64 types in `rowbind()`. Thanks @arthurgailes for reporting and @aitap for providing a fix (#697). * *collapse* now also has a Bluesky account at https://bsky.app/profile/rcollapse.bsky.social. # collapse 2.0.18 * Cases in `pivot(..., how = "longer")` with no `values` columns now no longer give an error. Thanks @alvarocombo for flagging this (#663). * Fixed bug in `qF(c(4L, 1L, NA), sort = FALSE)`: hash function failure due to a coding bug. Thanks @mayer79 for flagging this (#666). * If `x` is already a `qG` object of the right properties, calling `qG(x)` now does not copy `x` anymore. Thanks @mayer79 (https://github.com/mayer79/effectplots/issues/11). # collapse 2.0.17 * In `GRP.default()`, the `"group.starts"` attribute is always returned, even if there is only one group or every observation is its own group. Thanks @JamesThompsonC (#631). * Fixed a bug in `pivot()` if `na.rm = TRUE` and `how = "wider"|"recast"` and there are multiple `value` columns with different missingness patterns. In this case `na_omit(values)` was applied with default settings to the original (long) value columns, implying potential loss of information. The fix applies `na_omit(values, prop = 1)`, i.e., only removes completely missing rows. * `qDF()/qDT()/qTBL()` now allow a length-2 vector of names to `row.names.col` if `X` is a named atomic vector, e.g., `qDF(fmean(mtcars), c("cars", "mean"))` gives the same as `pivot(fmean(mtcars, drop = FALSE), names = list("car", "mean"))`. * Added a subsection on using internal (ad-hoc) grouping to the *collapse* for *tidyverse* users vignette. * `qsu()` now adds a `WeightSum` column giving the sum of (non-zero or missing) weights if the `w` argument is used. Thanks @mayer79 for suggesting (#650). For panel data (`pid`) the 'Between' sum of weights is also simply the number of groups, and the 'Within' sum of weights is the 'Overall' sum of weights divided by the number of groups. * Fixed an inaccuracy in `fquantile()/fnth()` with weights: As per documentation the target sum is `sumwp = (sum(w) - min(w)) * p`, however, in practice, the weight of the minimum element of `x` was used instead of the minimum weight. Since the smallest element in the sample usually has a small weight this was unnoticed for a long while, but thanks to @Jahnic-kb now reported and fixed (#659). * Fixed a bug in `recode_char()` when `regex = TRUE` and the `default` argument was used. Thanks @alinacherkas for both reporting and fixing (#654). # collapse 2.0.16 * Fixes an installation bug on some Linux systems (conflicting types) (#613). * *collapse* now enforces string encoding in `fmatch()` / `join()`, which caused problems if strings being matched had different encodings (#566, #579, and #618). To avoid noticeable performance implications, checks are done heuristically, i.e., the first, 25th, 50th and 75th percentile and last string of a character vector are checked, and if not UTF8, the entire vector is internally coerced to UTF8 strings *before* the matching process. In general, character vectors in R can contain strings of different encodings, but this is not the case with most regular data. For performance reasons, *collapse* assumes that character vectors are uniform in terms of string encoding. Heterogeneous strings should be coerced using tools like `stringi::stri_trans_general(x, "latin-ascii")`. * Fixes a bug using qualified names for fast statistical functions inside `across()` (#621, thanks @alinacherkas). * *collapse* now depends on R >= 3.4.0 due to the enforcement of `STRICT_R_HEADERS = 1` from R v4.5.0. In particular R API functions were renamed `Calloc -> R_Calloc` and `Free -> R_Free`. # collapse 2.0.15 * Some changes on the C-side to move the package closer to C API compliance (demanded by R-Core). One notable change is that `gsplit()` no longer supports S4 objects (because `SET_S4_OBJECT` is not part of the API and `asS4()` is too expensive for tight loops). I cannot think of a single example where it would be necessary to split an S4 object, but if you do have applications please file an issue. * `pivot()` has new arguments `FUN = "last"` and `FUN.args = NULL`, allowing wide and recast pivots with aggregation (default last value as before). `FUN` currently supports a single function returning a scalar value. *Fast Statistical Functions* receive vectorized execution. `FUN.args` can be used to supply a list of function arguments, including data-length arguments such as weights. There are also a couple of internal functions callable using function strings: `"first"`, `"last"`, `"count"`, `"sum"`, `"mean"`, `"min"`, or `"max"`. These are built into the reshaping C-code and thus extremely fast. Thanks @AdrianAntico for the request (#582). * `join()` now provides enhanced verbosity, indicating the average order of the join between the two tables, e.g. ``` r join(data.frame(id = c(1, 2, 2, 4)), data.frame(id = c(rep(1,4), 2:3))) #> left join: x[id] 3/4 (75%) <1.5:1st> y[id] 2/6 (33.3%) #> id #> 1 1 #> 2 2 #> 3 2 #> 4 4 join(data.frame(id = c(1, 2, 2, 4)), data.frame(id = c(rep(1,4), 2:3)), multiple = TRUE) #> left join: x[id] 3/4 (75%) <1.5:2.5> y[id] 5/6 (83.3%) #> id #> 1 1 #> 2 1 #> 3 1 #> 4 1 #> 5 2 #> 6 2 #> 7 4 ``` * In `collap()`, with multiple functions passed to `FUN` or `catFUN` and `return = "long"`, the `"Function"` column is now generated as a factor variable instead of character (which is more efficient). # collapse 2.0.14 * Updated '*collapse* and *sf*' vignette to reflect the recent support for *units* objects, and added a few more examples. * Fixed a bug in `join()` where a full join silently became a left join if there are no matches between the tables (#574). Thanks @D3SL for reporting. * Added function `group_by_vars()`: A standard evaluation version of `fgroup_by()` that is slimmer and safer for programming, e.g. `data |> group_by_vars(ind1) |> collapg(custom = list(fmean = ind2, fsum = ind3))`. Or, using *magrittr*: ```r library(magrittr) set_collapse(mask = "manip") # for fgroup_vars -> group_vars data %>% group_by_vars(ind1) %>% { add_vars( group_vars(., "unique"), get_vars(., ind2) %>% fmean(keep.g = FALSE) %>% add_stub("mean_"), get_vars(., ind3) %>% fsum(keep.g = FALSE) %>% add_stub("sum_") ) } ``` * Added function `as_integer_factor()` to turn factors/factor columns into integer vectors. `as_numeric_factor()` already exists, but is memory inefficient for most factors where levels can be integers. * `join()` now internally checks if the rows of the joined datasets match exactly. This check, using `identical(m, seq_row(y))`, is inexpensive, but, if `TRUE`, saves a full subset and deep copy of `y`. Thus `join()` now inherits the intelligence already present in functions like `fsubset()`, `roworder()` and `funique()` - a key for efficient data manipulation is simply doing less. * In `join()`, if `attr = TRUE`, the `count` option to `fmatch()` is always invoked, so that the attribute attached always has the same form, regardless of `verbose` or `validate` settings. * `roworder[v]()` has optional setting `verbose = 2L` to indicate if `x` is already sorted, making the call to `roworder[v]()` redundant. # collapse 2.0.13 * *collapse* now explicitly supports *xts*/*zoo* and *units* objects and concurrently removes an additional check in the `.default` method of statistical functions that called the matrix method if `is.matrix(x) && !inherits(x, "matrix")`. This was a smart solution to account for the fact that *xts* objects are matrix-based but don't inherit the `"matrix"` class, thus wrongly calling the default method. The same is the case for *units*, but here, my recent more intensive engagement with spatial data convinced me that this should be changed. For one, under the previous heuristic solution, it was not possible to call the default method on a *units* matrix, e.g., `fmean.default(st_distance(points_sf))` called `fmean.matrix()` and yielded a vector. This should not be the case. Secondly, aggregation e.g. `fmean(st_distance(points_sf))` or `fmean(st_distance(points_sf), g = group_vec)` yielded a plain numeric object that lost the *units* class (in line with the [general attribute handling principles](https://fastverse.org/collapse/articles/collapse_object_handling.html#general-principles)). Therefore, I have now decided to remove the heuristic check within the default methods, and explicitly support *zoo* and *units* objects. For [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), the methods are `FUN.zoo <- function(x, ...) if(is.matrix(x)) FUN.matrix(x, ...) else FUN.default(x, ...)` and `FUN.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(FUN.matrix(x, ...), x) else FUN.default(x, ...)`. While the behavior for *xts*/*zoo* remains the same, the behavior for *units* is enhanced, as now the class is preserved in aggregations (the [`.default` method preserves attributes except for *ts*](https://fastverse.org/collapse/articles/collapse_object_handling.html#general-principles)), and it is possible to manually invoke the `.default` method on a units matrix and obtain an aggregate statistic. This change may impact computations on other matrix based classes which don't inherit from `"matrix"` (*mts* does inherit from `"matrix"`, and I am not aware of any other affected classes, but user code like `m <- matrix(rnorm(25), 5); class(m) <- "bla"; fmean(m)` will now yield a scalar instead of a vector. Such code must be adjusted to either `class(m) <- c("bla", "matrix")` or `fmean.matrix(m)`). Overall, the change makes *collapse* behave in a more standard and predictable way, and enhances its support for *units* objects central in the *sf* ecosystem. * `fquantile()` now also preserves the attributes of the input, in line with `quantile()`. # collapse 2.0.12 * Fixes some issues with signed int overflows inside hash functions and possible protect bugs flagged by RCHK. # collapse 2.0.11 * An [**article**](https://arxiv.org/abs/2403.05038) on *collapse* has been submitted to the [Journal of Statistical Software](https://www.jstatsoft.org/). The preprint is available through *arXiv*. * Removed *magrittr* from most documentation examples (using base pipe). * Improved `plot.GRP` a little bit - on request of JSS editors. # collapse 2.0.10 * Fixed a bug in `fmatch()` when matching integer vectors to factors. This also affected `join()`. * Improved cross-platform compatibility of OpenMP flags. Thanks @kalibera. * Added `stub = TRUE` argument to the *grouped_df* methods of *Fast Statistical Functions* supporting weights, to be able to remove or alter prefixes given to aggregated weights columns if `keep.w = TRUE`. Globally, users can set `st_collapse(stub = FALSE)` to disable this prefixing in all statistical functions and operators. # collapse 2.0.9 * Added functions `na_locf()` and `na_focb()` for fast basic C implementations of these procedures (optionally by reference). `replace_na()` now also has a `type` argument which supports options `"locf"` and `"focb"` (default `"const"`), similar to `data.table::nafill`. The implementation also supports character data and list-columns (`NULL/empty` elements). Thanks @BenoitLondon for suggesting (#489). I note that `na_locf()` exists in some other packages (such as *imputeTS*) where it is implemented in R and has additional options. Users should utilize the flexible namespace i.e. `set_collapse(remove = "na_locf")` to deal with this. * Fixed a bug in weighted quantile estimation (`fquantile()`) that could lead to wrong/out-of-range estimates in some cases. Thanks @zander-prinsloo for reporting (#523). * Improved right join such that join column names of `x` instead of `y` are preserved. This is more consistent with the other joins when join columns in `x` and `y` have different names. * More fluent and safe interplay of 'mask' and 'remove' options in `set_collapse()`: it is now seamlessly possible to switch from any combination of 'mask' and 'remove' to any other combination without the need of setting them to `NULL` first. # collapse 2.0.8 * In `pivot(..., values = [multiple columns], labels = "new_labels_column", how = "wieder")`, if the columns selected through `values` already have variable labels, they are concatenated with the new labels provided through `"new_labels_col"` using `" - "` as a separator (similar to `names` where the separator is `"_"`). * `whichv()` and operators `%==%`, `%!=%` now properly account for missing double values, e.g. `c(NA_real_, 1) %==% c(NA_real_, 1)` yields `c(1, 2)` rather than `2`. Thanks @eutwt for flagging this (#518). * In `setv(X, v, R)`, if the type of `R` is greater than `X` e.g. `setv(1:10, 1:3, 9.5)`, then a warning is issued that conversion of `R` to the lower type (real to integer in this case) may incur loss of information. Thanks @tony-aw for suggesting (#498). * `frange()` has an option `finite = FALSE`, like `base::range`. Thanks @MLopez-Ibanez for suggesting (#511). * `varying.pdata.frame(..., any_group = FALSE)` now unindexes the result (as should be the case). # collapse 2.0.7 * Fixed bug in full join if `verbose = 0`. Thanks @zander-prinsloo for reporting. * Added argument `multiple = FALSE` to `join()`. Setting `multiple = TRUE` performs a multiple-matching join where a row in `x` is matched to all matching rows in `y`. The default `FALSE` just takes the first matching row in `y`. * Improved recode/replace functions. Notably, `replace_outliers()` now supports option `value = "clip"` to replace outliers with the respective upper/lower bounds, and also has option `single.limit = "mad"` which removes outliers exceeding a certain number of median absolute deviations. Furthermore, all functions now have a `set` argument which fully applies the transformations by reference. * Functions `replace_NA` and `replace_Inf` were renamed to `replace_na` and `replace_inf` to make the namespace a bit more consistent. The earlier versions remain available. # collapse 2.0.6 * Fixed a serious bug in `qsu()` where higher order weighted statistics were erroneous, i.e. whenever `qsu(x, ..., w = weights, higher = TRUE)` was invoked, the 'SD', 'Skew' and 'Kurt' columns were wrong (if `higher = FALSE` the weighted 'SD' is correct). The reason is that there appears to be no straightforward generalization of Welford's Online Algorithm to higher-order weighted statistics. This was not detected earlier because the algorithm was only tested with unit weights. The fix involved replacing Welford's Algorithm for the higher-order weighted case by a 2-pass method, that additionally uses long doubles for higher-order terms. Thanks @randrescastaneda for reporting. * Fixed some unexpected behavior in `t_list()` where names 'V1', 'V2', etc. were assigned to unnamed inner lists. It now preserves the missing names. Thanks @orgadish for flagging this. # collapse 2.0.5 * In `join`, the if `y` is an expression e.g. `join(x = mtcars, y = subset(mtcars, mpg > 20))`, then its name is not extracted but just set to `"y"`. Before, the name of `y` would be captured as `as.character(substitute(y))[1] = "subset"` in this case. This is an improvement mainly for display purposes, but could also affect code if there are duplicate columns in both datasets and `suffix` was not provided in the `join` call: before, y-columns would be renamed using a (non-sensible) `"_subset"` suffix, but now using a `"_y"` suffix. Note that this only concerns cases where `y` is an expression rather than a single object. * Small performance improvements to `%[!]in%` operators: `%!in%` now uses `is.na(fmatch(x, table))` rather than `fmatch(x, table, 0L) == 0L`, and `%in%`, if exported using `set_collapse(mask = "%in%"|"special"|"all")` is `as.logical(fmatch(x, table, 0L))` instead of `fmatch(x, table, 0L) > 0L`. The latter are faster because comparison operators `>`, `==` with integers additionally need to check for `NA`'s (= the smallest integer in C). # collapse 2.0.4 * In `fnth()/fquantile()`, there has been a slight change to the weighted quantile algorithm. As outlined in the documentation, this algorithm gives weighted versions for all continuous quantile methods (type 7-9) in R by replacing sample quantities with their weighted counterparts. E.g., for the default quantile type 7, the continuous (lower) target element is `(n - 1) * p`. In the weighted algorithm, this became `(sum(w) - mean(w)) * p` and was compared to the cumulative sum of ordered (by `x`) weights, to preserve equivalence of the algorithms in cases where the weights are all equal. However, upon a second thought, the use of `mean(w)` does not really reflect a standard interpretation of the weights as frequencies. I have reasoned that using `min(w)` instead of `mean(w)` better reflects such an interpretation, as the minimum (non-zero) weight reflects the size of the smallest sampled unit. So the weighted quantile type 7 target is now `(sum(w) - min(w)) * p`, and also the other methods have been adjusted accordingly (note that zero weight observations are ignored in the algorithm). * This is more a *Note* than a change to the package: there is an [issue with *vctrs*](https://github.com/r-lib/vctrs/issues/1888) that users can encounter using *collapse* together with the *tidyverse* (especially *ggplot2*), which is that *collapse* internally optimizes computations on factors by giving them an additional `"na.included"` class if they are known to not contain any missing values. For example `pivot(mtcars)` gives a `"variable"` factor which has class `c("factor", "na.included")`, such that grouping on `"variable"` in subsequent operations is faster. Unfortunately, `pivot(mtcars) |> ggplot(aes(y = value)) + geom_histogram() + facet_wrap( ~ variable)` currently gives an error produced by *vctrs*, because *vctrs* does not implement a standard S3 method dispatch and thus does not ignore the `"na.included"` class. It turns out that the only way for me to deal with this is would be to swap the order of classes i.e. `c("na.included", "factor")`, import *vctrs*, and implement `vec_ptype2` and `vec_cast` methods for `"na.included"` objects. This will never happen, as *collapse* is and will remain independent of the *tidyverse*. There are two ways you can deal with this: The first way is to remove the `"na.included"` class for *ggplot2* e.g. `facet_wrap( ~ set_class(variable, "factor"))` or `facet_wrap( ~ factor(variable))` will both work. The second option is to define a function `vec_ptype2.factor.factor <- function(x, y, ...) x` in your global environment, which avoids *vctrs* performing extra checks on factor objects. # collapse 2.0.3 * Fixed a signed integer overflow inside a hash function detected by CRAN checks (changing to unsigned int). * Updated the cheatsheet (see README.md). # collapse 2.0.2 * Added global option 'stub' (default `TRUE`) to `set_collapse`. It is passed to the `stub(s)` arguments of the statistical operators, `B`, `W`, `STD`, `HDW`, `HDW`, `L`, `D`, `Dlog`, `G` (in `.OPERATOR_FUN`). By default these operators add a prefix/stub to transformed matrix or data.frame columns. Setting `set_collapse(stub = FALSE)` now allows to switch off this behavior such that columns are not prepended with a prefix (by default). * `roworder[v]()` now also supports grouped data frames, but prints a message indicating that this is inefficient (also for indexed data). An additional argument `verbose` can be set to `0` to avoid such messages. # collapse 2.0.1 * `%in%` with `set_collapse(mask = "%in%")` does not warn anymore about overidentification when used with data frames (i.e. using `overid = 2` in `fmatch()`). * Fixed several typos in the documentation. # collapse 2.0.0 *collapse* 2.0, released in Mid-October 2023, introduces fast table joins and data reshaping capabilities alongside other convenience functions, and enhances the packages global configurability, including interactive namespace control. ### Potentially breaking changes * In a grouped setting, if `.data` is used inside `fsummarise()` and `fmutate()`, and `.cols = NULL`, `.data` will contain all columns except for grouping columns (in-line with the `.SD` syntax of *data.table*). Before, `.data` contained all columns. The selection in `.cols` still refers to all columns, thus it is still possible to select all columns using e.g. `grouped_data %>% fsummarise(some_expression_involving(.data), .cols = seq_col(.))`. ### Other changes * In `qsu()`, argument `vlabels` was renamed to `labels`. But `vlabels` will continue to work. ### Bug Fixes * Fixed a bug in the integer methods of `fsum()`, `fmean()` and `fprod()` that returned `NA` if and only if there was a single integer followed by `NA`'s e.g `fsum(c(1L, NA, NA))` erroneously gave `NA`. This was caused by a C-level shortcut that returned `NA` when the first element of the vector had been reached (moving from back to front) without encountering any non-NA-values. The bug consisted in the content of the first element not being evaluated in this case. Note that this bug did not occur with real numbers, and also not in grouped execution. Thanks @blset for reporting (#432). ### Additions * Added `join()`: class-agnostic, vectorized, and (default) verbose joins for R, modeled after the *polars* API. Two different join algorithms are implemented: a hash-join (default, if `sort = FALSE`) and a sort-merge-join (if `sort = TRUE`). * Added `pivot()`: fast and easy data reshaping! It supports longer, wider and recast pivoting, including handling of variable labels, through a uniform and parsimonious API. It does not perform data aggregation, and by default does not check if the data is uniquely identified by the supplied ids. Underidentification for 'wide' and 'recast' pivots results in the last value being taken within each group. Users can toggle a duplicates check by setting `check.dups = TRUE`. * Added `rowbind()`: a fast class-agnostic alternative to `rbind.data.frame()` and `data.table::rbindlist()`. * Added `fmatch()`: a fast `match()` function for vectors and data frames/lists. It is the workhorse function of `join()`, and also benefits `ckmatch()`, `%!in%`, and new operators `%iin%` and `%!iin%` (see below). It is also possible to `set_collapse(mask = "%in%")` to replace `base::"%in%"` using `fmatch()`. Thanks to `fmatch()`, these operators also all support data frames/lists of vectors, which are compared row-wise. * Added operators `%iin%` and `%!iin%`: these directly return indices, i.e. `%[!]iin%` is equivalent to `which(x %[!]in% table)`. This is useful especially for subsetting where directly supplying indices is more efficient e.g. `x[x %[!]iin% table]` is faster than `x[x %[!]in% table]`. Similarly `fsubset(wlddev, iso3c %iin% c("DEU", "ITA", "FRA"))` is very fast. * Added `vec()`: efficiently turn matrices or data frames / lists into a single atomic vector. I am aware of multiple implementations in other packages, which are mostly inefficient. With atomic objects, `vec()` simply removes the attributes without copying the object, and with lists it directly calls `C_pivot_longer`. ### Improvements * `set_collapse()` now supports options 'mask' and 'remove', giving *collapse* a flexible namespace in the broadest sense that can be changed at any point within the active session: - 'mask' supports base R or *dplyr* functions that can be masked into the faster *collapse* versions. E.g. `library(collapse); set_collapse(mask = "unique")` (or, equivalently, `set_collapse(mask = "funique")`) will create `unique <- funique` in the *collapse* namespace, export `unique()` from the namespace, and detach and attach the namespace again so R can find it. The re-attaching also ensures that *collapse* comes right after the global environment, implying that all it's functions will take priority over other libraries. Users can use `fastverse::fastverse_conflicts()` to check which functions are masked after using `set_collapse(mask = ...)`. The option can be changed at any time. Using `set_collapse(mask = NULL)` removes all masked functions from the namespace, and can also be called simply to ensure *collapse* is at the top of the search path. - 'remove' allows removing arbitrary functions from the *collapse* namespace. E.g. `set_collapse(remove = "D")` will remove the difference operator `D()`, which also exists in *stats* to calculate symbolic and algorithmic derivatives (this is a convenient example but not necessary since `collapse::D` is S3 generic and will call `stats::D()` on R calls, expressions or names). This is safe to do as it only modifies which objects are exported from the namespace (it does not truly remove objects from the namespace). This option can also be changed at any time. `set_collapse(remove = NULL)` will restore the exported namespace. For both options there exist a number of convenient keywords to bulk-mask / remove functions. For example `set_collapse(mask = "manip", remove = "shorthand")` will mask all data manipulation functions such as `mutate <- fmutate` and remove all function shorthands such as `mtt` (i.e. abbreviations for frequently used functions that *collapse* supplies for faster coding / prototyping). * `set_collapse()` also supports options 'digits', 'verbose' and 'stable.algo', enhancing the global configurability of *collapse*. * `qM()` now also has a `row.names.col` argument in the second position allowing generation of rownames when converting data frame-like objects to matrix e.g. `qM(iris, "Species")` or `qM(GGDC10S, 1:5)` (interaction of id's). * `as_factor_GRP()` and `finteraction()` now have an argument `sep = "."` denoting the separator used for compound factor labels. * `alloc()` now has an additional argument `simplify = TRUE`. `FALSE` always returns list output. * `frename()` supports both `new = old` (*pandas*, used to far) and `old = new` (*dplyr*) style renaming conventions. * `across()` supports negative indices, also in grouped settings: these will select all variables apart from grouping variables. * `TRA()` allows shorthands `"NA"` for `"replace_NA"` and `"fill"` for `"replace_fill"`. * `group()` experienced a minor speedup with >= 2 vectors as the first two vectors are now hashed jointly. * `fquantile()` with `names = TRUE` adds up to 1 digit after the comma in the percent-names, e.g. `fquantile(airmiles, probs = 0.001)` generates appropriate names (not 0% as in the previous version). # collapse 1.9.6 * New vignette on [*collapse*'s Handling of R Objects](https://fastverse.org/collapse/articles/collapse_object_handling.html): provides an overview of collapse’s (internal) class-agnostic R programming framework. * `print.descr()` with groups and option `perc = TRUE` (the default) also shows percentages of the group frequencies for each variable. * `funique(mtcars[NULL, ], sort = TRUE)` gave an error (for data frame with zero rows). Thanks @NicChr (#406). * Added SIMD vectorization for `fsubset()`. * `vlengths()` now also works for strings, and is hence a much faster version of both `lengths()` and `nchar()`. Also for atomic vectors the behavior is like `lengths()`, e.g. `vlengths(rnorm(10))` gives `rep(1L, 10)`. * In `collap[v/g]()`, the `...` argument is now placed after the `custom` argument instead of after the last argument, in order to better guard against unwanted partial argument matching. In particular, previously the `n` argument passed to `fnth` was partially matched to `na.last`. Thanks @ummel for alerting me of this (#421). # collapse 1.9.5 * Using `DATAPTR_RO` to point to R lists because of the use of `ALTLISTS` on R-devel. * Replacing `!=` loop controls for SIMD loops with `<` to ensure compatibility on all platforms. Thanks @albertus82 (#399). # collapse 1.9.4 * Improvements in `get_elem()/has_elem()`: Option `invert = TRUE` is implemented more robustly, and a function passed to `get_elem()/has_elem()` is now applied to all elements in the list, including elements that are themselves list-like. This enables the use of `inherits` to find list-like objects inside a broader list structure e.g. `get_elem(l, inherits, what = "lm")` fetches all linear model objects inside `l`. * Fixed a small bug in `descr()` introduced in v1.9.0, producing an error if a data frame contained no numeric columns - because an internal function was not defined in that case. Also, POSIXct columns are handled better in print - preserving the time zone (thanks @cdignam-chwy #392). * `fmean()` and `fsum()` with `g = NULL`, as well as `TRA()`, `setop()`, and related operators `%r+%`, `%+=%` etc., `setv()` and `fdist()` now utilize Single Instruction Multiple Data (SIMD) vectorization by default (if OpenMP is enabled), enabling potentially very fast computing speeds. Whether these instructions are utilized during compilation depends on your system. In general, if you want to max out *collapse* on your system, consider compiling from source with `CFLAGS += -O3 -march=native -fopenmp` and `CXXFLAGS += -O3 -march=native` in your [`.R/Makevars`](https://cran.r-project.org/doc/manuals/r-devel/R-admin.html#Customizing-package-compilation). # collapse 1.9.3 * Added functions `fduplicated()` and `any_duplicated()`, for vectors and lists / data frames. Thanks @NicChr (#373) * `sort` option added to `set_collapse()` to be able to set unordered grouping as a default. E.g. setting `set_collapse(sort = FALSE)` will affect `collap()`, `BY()`, `GRP()`, `fgroup_by()`, `qF()`, `qG()`, `finteraction()`, `qtab()` and internal use of these functions for ad-hoc grouping in fast statistical functions. Other uses of `sort`, for example in `funique()` where the default is `sort = FALSE`, are not affected by the global default setting. * Fixed a small bug in `group()` / `funique()` resulting in an unnecessary memory allocation error in rare cases. Thanks @NicChr (#381). # collapse 1.9.2 * Further fix to an Address Sanitizer issue as required by CRAN (eliminating an unused out of bounds access at the end of a loop). * `qsu()` finally has a grouped_df method. * Added options `option("collapse_nthreads")` and `option("collapse_na.rm")`, which allow you to load *collapse* with different defaults e.g. through an `.Rprofile` or `.fastverse` configuration file. Once *collapse* is loaded, these options take no effect, and users need to use `set_collapse()` to change `.op[["nthreads"]]` and `.op[["na.rm"]]` interactively. * Exported method `plot.psmat()` (can be useful to plot time series matrices). # collapse 1.9.1 * Fixed minor C/C++ issues flagged by CRAN's detailed checks. * Added functions `set_collapse()` and `get_collapse()`, allowing you to globally set defaults for the `nthreads` and `na.rm` arguments to all functions in the package. E.g. `set_collapse(nthreads = 4, na.rm = FALSE)` could be a suitable setting for larger data without missing values. This is implemented using an internal environment by the name of `.op`, such that these defaults are received using e.g. `.op[["nthreads"]]`, at the computational cost of a few nanoseconds (8-10x faster than `getOption("nthreads")` which would take about 1 microsecond). `.op` is not accessible by the user, so function `get_collapse()` can be used to retrieve settings. Exempt from this are functions `.quantile`, and a new function `.range` (alias of `frange`), which go directly to C for maximum performance in repeated executions, and are not affected by these global settings. Function `descr()`, which internally calls a bunch of statistical functions, is also not affected by these settings. * Further improvements in thread safety for `fsum()` and `fmean()` in grouped computations across data frame columns. All OpenMP enabled functions in *collapse* can now be considered thread safe i.e. they pass the full battery of tests in multithreaded mode. # collapse 1.9.0 *collapse* 1.9.0 released mid of January 2023, provides improvements in performance and versatility in many areas, as well as greater statistical capabilities, most notably efficient (grouped, weighted) estimation of sample quantiles. ### Changes to functionality * All functions renamed in *collapse* 1.6.0 are now depreciated, to be removed end of 2023. These functions had already been giving messages since v1.6.0. See `help("collapse-renamed")`. * The lead operator `F()` is not exported anymore from the package namespace, to avoid clashes with `base::F` flagged by multiple people. The operator is still part of the package and can be accessed using `collapse:::F`. I have also added an option `"collapse_export_F"`, such that setting `options(collapse_export_F = TRUE)` before loading the package exports the operator as before. Thanks @matthewross07 (#100), @edrubin (#194), and @arthurgailes (#347). * Function `fnth()` has a new default `ties = "q7"`, which gives the same result as `quantile(..., type = 7)` (R's default). More details below. ### Bug Fixes * `fmode()` gave wrong results for singleton groups (groups of size 1) on *unsorted* data. I had optimized `fmode()` for singleton groups to directly return the corresponding element, but it did not access the element through the (internal) ordering vector, so the first element/row of the entire vector/data was taken. The same mistake occurred for `fndistinct` if singleton groups were `NA`, which were counted as `1` instead of `0` under the `na.rm = TRUE` default (provided the first element of the vector/data was not `NA`). The mistake did not occur with data sorted by the groups, because here the data pointer already pointed to the first element of the group. (My apologies for this bug, it took me more than half a year to discover it, using *collapse* on a daily basis, and it escaped 700 unit tests as well). * Function `groupid(x, na.skip = TRUE)` returned uninitialized first elements if the first values in `x` where `NA`. Thanks for reporting @Henrik-P (#335). * Fixed a bug in the `.names` argument to `across()`. Passing a naming function such as `.names = function(c, f) paste0(c, "-", f)` now works as intended i.e. the function is applied to all combinations of columns (c) and functions (f) using `outer()`. Previously this was just internally evaluated as `.names(cols, funs)`, which did not work if there were multiple cols and multiple funs. There is also now a possibility to set `.names = "flip"`, which names columns `f_c` instead of `c_f`. * `fnrow()` was rewritten in C and also supports data frames with 0 columns. Similarly for `seq_row()`. Thanks @NicChr (#344). ### Additions * Added functions `fcount()` and `fcountv()`: a versatile and blazing fast alternative to `dplyr::count`. It also works with vectors, matrices, as well as grouped and indexed data. * Added function `fquantile()`: Fast (weighted) continuous quantile estimation (methods 5-9 following Hyndman and Fan (1996)), implemented fully in C based on quickselect and radixsort algorithms, and also supports an ordering vector as optional input to speed up the process. It is up to 2x faster than `stats::quantile` on larger vectors, but also especially fast on smaller data, where the R overhead of `stats::quantile` becomes burdensome. For maximum performance during repeated executions, a programmers version `.quantile()` with different defaults is also provided. * Added function `fdist()`: A fast and versatile replacement for `stats::dist`. It computes a full euclidean distance matrix around 4x faster than `stats::dist` in serial mode, with additional gains possible through multithreading along the distance matrix columns (decreasing thread loads as the matrix is lower triangular). It also supports computing the distance of a matrix with a single row-vector, or simply between two vectors. E.g. `fdist(mat, mat[1, ])` is the same as `sqrt(colSums((t(mat) - mat[1, ])^2)))`, but about 20x faster in serial mode, and `fdist(x, y)` is the same as `sqrt(sum((x-y)^2))`, about 3x faster in serial mode. In both cases (sub-column level) multithreading is available. *Note* that `fdist` does not skip missing values i.e. `NA`'s will result in `NA` distances. There is also no internal implementation for integers or data frames. Such inputs will be coerced to numeric matrices. * Added function `GRPid()` to easily fetch the group id from a grouping object, especially inside grouped `fmutate()` calls. This addition was warranted especially by the new improved `fnth.default()` method which allows orderings to be supplied for performance improvements. See commends on `fnth()` and the example provided below. * `fsummarize()` was added as a synonym to `fsummarise`. Thanks @arthurgailes for the PR. * **C API**: *collapse* exports around 40 C functions that provide functionality that is either convenient or rather complicated to implement from scratch. The exported functions can be found at the bottom of `src/ExportSymbols.c`. The API does not include the *Fast Statistical Functions*, which I thought are too closely related to how *collapse* works internally to be of much use to a C programmer (e.g. they expect grouping objects or certain kinds of integer vectors). But you are free to request the export of additional functions, including C++ functions. ### Improvements * `fnth()` and `fmedian()` were rewritten in C, with significant gains in performance and versatility. Notably, `fnth()` now supports (grouped, weighted) continuous quantile estimation like `fquantile()` (`fmedian()`, which is a wrapper around `fnth()`, can also estimate various quantile based weighted medians). The new default for `fnth()` is `ties = "q7"`, which gives the same result as `(f)quantile(..., type = 7)` (R's default). OpenMP multithreading across groups is also much more effective in both the weighted and unweighted case. Finally, `fnth.default` gained an additional argument `o` to pass an ordering vector, which can dramatically speed up repeated invocations of the function on the dame data: ```r # Estimating multiple weighted-grouped quantiles on mpg: pre-computing an ordering provides extra speed. mtcars %>% fgroup_by(cyl, vs, am) %>% fmutate(o = radixorder(GRPid(), mpg)) %>% # On grouped data, need to account for GRPid() fsummarise(mpg_Q1 = fnth(mpg, 0.25, o = o, w = wt), mpg_median = fmedian(mpg, o = o, w = wt), mpg_Q3 = fnth(mpg, 0.75, o = o, w = wt)) # Note that without weights this is not always faster. Quickselect can be very efficient, so it depends # on the data, the number of groups, whether they are sorted (which speeds up radixorder), etc... ``` * `BY` now supports data-length arguments to be passed e.g. `BY(mtcars, mtcars$cyl, fquantile, w = mtcars$wt)`, making it effectively a generic grouped `mapply` function as well. Furthermore, the grouped_df method now also expands grouping columns for output length > 1. * `collap()`, which internally uses `BY` with non-*Fast Statistical Functions*, now also supports arbitrary further arguments passed down to functions to be split by groups. Thus users can also apply custom weighted functions with `collap()`. Furthermore, the parsing of the `FUN`, `catFUN` and `wFUN` arguments was improved and brought in-line with the parsing of `.fns` in `across()`. The main benefit of this is that *Fast Statistical Functions* are now also detected and optimizations carried out when passed in a list providing a new name e.g. `collap(data, ~ id, list(mean = fmean))` is now optimized! Thanks @ttrodrigz (#358) for requesting this. * `descr()`, by virtue of `fquantile` and the improvements to `BY`, supports full-blown grouped and weighted descriptions of data. This is implemented through additional `by` and `w` arguments. The function has also been turned into an S3 generic, with a default and a 'grouped_df' method. The 'descr' methods `as.data.frame` and `print` also feature various improvements, and a new `compact` argument to `print.descr`, allowing a more compact printout. Users will also notice improved performance, mainly due to `fquantile`: on the M1 `descr(wlddev)` is now 2x faster than `summary(wlddev)`, and 41x faster than `Hmisc::describe(wlddev)`. Thanks @statzhero for the request (#355). * `radixorder` is about 25% faster on characters and doubles. This also benefits grouping performance. Note that `group()` may still be substantially faster on unsorted data, so if performance is critical try the `sort = FALSE` argument to functions like `fgroup_by` and compare. * Most list processing functions are noticeably faster, as checking the data types of elements in a list is now also done in C, and I have made some improvements to *collapse*'s version of `rbindlist()` (used in `unlist2d()`, and various other places). * `fsummarise` and `fmutate` gained an ability to evaluate arbitrary expressions that result in lists / data frames without the need to use `across()`. For example: `mtcars |> fgroup_by(cyl, vs, am) |> fsummarise(mctl(cor(cbind(mpg, wt, carb)), names = TRUE))` or `mtcars |> fgroup_by(cyl) |> fsummarise(mctl(lmtest::coeftest(lm(mpg ~ wt + carb)), names = TRUE))`. There is also the possibility to compute expressions using `.data` e.g. `mtcars |> fgroup_by(cyl) |> fsummarise(mctl(lmtest::coeftest(lm(mpg ~ wt + carb, .data)), names = TRUE))` yields the same thing, but is less efficient because the whole dataset (including 'cyl') is split by groups. For greater efficiency and convenience, you can pre-select columns using a global `.cols` argument, e.g. `mtcars |> fgroup_by(cyl, vs, am) |> fsummarise(mctl(cor(.data), names = TRUE), .cols = .c(mpg, wt, carb))` gives the same as above. Three *Notes* about this: + No grouped vectorizations for fast statistical functions i.e. the entire expression is evaluated for each group. (Let me know if there are applications where vectorization would be possible and beneficial. I can't think of any.) + All elements in the result list need to have the same length, or, for `fmutate`, have the same length as the data (in each group). + If `.data` is used, the entire expression (`expr`) will be turned into a function of `.data` (`function(.data) expr`), which means columns are only available when accessed through `.data` e.g. `.data$col1`. * `fsummarise` supports computations with mixed result lengths e.g. `mtcars |> fgroup_by(cyl) |> fsummarise(N = GRPN(), mean_mpg = fmean(mpg), quantile_mpg = fquantile(mpg))`, as long as all computations result in either length 1 or length k vectors, where k is the maximum result length (e.g. for `fquantile` with default settings k = 5). * List extraction function `get_elem()` now has an option `invert = TRUE` (default `FALSE`) to remove matching elements from a (nested) list. Also the functionality of argument `keep.class = TRUE` is implemented in a better way, such that the default `keep.class = FALSE` toggles classes from (non-matched) list-like objects inside the list to be removed. * `num_vars()` has become a bit smarter: columns of class 'ts' and 'units' are now also recognized as numeric. In general, users should be aware that `num_vars()` does not regard any R methods defined for `is.numeric()`, it is implemented in C and simply checks whether objects are of type integer or double, and do not have a class. The addition of these two exceptions now guards against two common cases where `num_vars()` may give undesirable outcomes. Note that `num_vars()` is also called in `collap()` to distinguish between numeric (`FUN`) and non-numeric (`catFUN`) columns. * Improvements to `setv()` and `copyv()`, making them more robust to borderline cases: `integer(0)` passed to `v` does nothing (instead of error), and it is also possible to pass a single real index if `vind1 = TRUE` i.e. passing `1` instead of `1L` does not produce an error. * `alloc()` now works with all types of objects i.e. it can replicate any object. If the input is non-atomic, atomic with length > 1 or `NULL`, the output is a list of these objects, e.g. `alloc(NULL, 10)` gives a length 10 list of `NULL` objects, or `alloc(mtcars, 10)` gives a list of `mtcars` datasets. Note that in the latter case the datasets are not deep-copied, so no additional memory is consumed. * `missing_cases()` and `na_omit()` have gained an argument `prop = 0`, indicating the proportion of values missing for the case to be considered missing/to be omitted. The default value of `0` indicates that at least 1 value must be missing. Of course setting `prop = 1` indicates that all values must be missing. For data frames/lists the checking is done efficiently in C. For matrices this is currently still implemented using `rowSums(is.na(X)) >= max(as.integer(prop * ncol(X)), 1L)`, so the performance is less than optimal. * `missing_cases()` has an extra argument `count = FALSE`. Setting `count = TRUE` returns the case-wise missing value count (by `cols`). * Functions `frename()` and `setrename()` have an additional argument `.nse = TRUE`, conforming to the default non-standard evaluation of tagged vector expressions e.g. `frename(mtcars, mpg = newname)` is the same as `frename(mtcars, mpg = "newname")`. Setting `.nse = FALSE` allows `newname` to be a variable holding a name e.g. `newname = "othername"; frename(mtcars, mpg = newname, .nse = FALSE)`. Another use of the argument is that a (named) character vector can now be passed to the function to rename a (subset of) columns e.g. `cvec = letters[1:3]; frename(mtcars, cvec, cols = 4:6, .nse = FALSE)` (this works even with `.nse = TRUE`), and `names(cvec) = c("cyl", "vs", "am"); frename(mtcars, cvec, .nse = FALSE)`. Furthermore, `setrename()` now also returns the renamed data invisibly, and `relabel()` and `setrelabel()` have also gained similar flexibility to allow (named) lists or vectors of variable labels to be passed. *Note* that these function have no NSE capabilities, so they work essentially like `frename(..., .nse = FALSE)`. * Function `add_vars()` became a bit more flexible and also allows single vectors to be added with tags e.g. `add_vars(mtcars, log_mpg = log(mtcars$mpg), STD(mtcars))`, similar to `cbind`. However `add_vars()` continues to not replicate length 1 inputs. * Safer multithreading: OpenMP multithreading over parts of the R API is minimized, reducing errors that occurred especially when multithreading across data frame columns. Also the number of threads supplied by the user to all OpenMP enabled functions is ensured to not exceed either of `omp_get_num_procs()`, `omp_get_thread_limit()`, and `omp_get_max_threads()`. # collapse 1.8.9 * Fixed some warnings on rchk and newer C compilers (LLVM clang 10+). * `.pseries` / `.indexed_series` methods also change the implicit class of the vector (attached after `"pseries"`), if the data type changed. e.g. calling a function like `fgrowth` on an integer pseries changed the data type to double, but the "integer" class was still attached after "pseries". * Fixed bad testing for SE inputs in `fgroup_by()` and `findex_by()`. See #320. * Added `rsplit.matrix` method. * `descr()` now by default also reports 10% and 90% quantiles for numeric variables (in line with STATA's detailed summary statistics), and can also be applied to 'pseries' / 'indexed_series'. Furthermore, `descr()` itself now has an argument `stepwise` such that `descr(big_data, stepwise = TRUE)` yields computation of summary statistics on a variable-by-variable basis (and the finished 'descr' object is returned invisibly). The printed result is thus identical to `print(descr(big_data), stepwise = TRUE)`, with the difference that the latter first does the entire computation whereas the former computes statistics on demand. * Function `ss()` has a new argument `check = TRUE`. Setting `check = FALSE` allows subsetting data frames / lists with positive integers without checking whether integers are positive or in-range. For programmers. * Function `get_vars()` has a new argument `rename` allowing select-renaming of columns in standard evaluation programming, e.g. `get_vars(mtcars, c(newname = "cyl", "vs", "am"), rename = TRUE)`. The default is `rename = FALSE`, to warrant full backwards compatibility. See #327. * Added helper function `setattrib()`, to set a new attribute list for an object by reference + invisible return. This is different from the existing function `setAttrib()` (note the capital A), which takes a shallow copy of list-like objects and returns the result. # collapse 1.8.8 * `flm` and `fFtest` are now internal generic with an added formula method e.g. `flm(mpg ~ hp + carb, mtcars, weights = wt)` or `fFtest(mpg ~ hp + carb | vs + am, mtcars, weights = wt)` in addition to the programming interface. Thanks to Grant McDermott for suggesting. * Added method `as.data.frame.qsu`, to efficiently turn the default array outputs from `qsu()` into tidy data frames. * Major improvements to `setv` and `copyv`, generalizing the scope of operations that can be performed to all common cases. This means that even simple base R operations such as `X[v] <- R` can now be done significantly faster using `setv(X, v, R)`. * `n` and `qtab` can now be added to `options("collapse_mask")` e.g. `options(collapse_mask = c("manip", "helper", "n", "qtab"))`. This will export a function `n()` to get the (group) count in `fsummarise` and `fmutate` (which can also always be done using `GRPN()` but `n()` is more familiar to *dplyr* users), and will mask `table()` with `qtab()`, which is principally a fast drop-in replacement, but with some different further arguments. * Added C-level helper function `all_funs`, which fetches all the functions called in an expression, similar to `setdiff(all.names(x), all.vars(x))` but better because it takes account of the syntax. For example let `x = quote(sum(sum))` i.e. we are summing a column named `sum`. Then `all.names(x) = c("sum", "sum")` and `all.vars(x) = "sum"` so that the difference is `character(0)`, whereas `all_funs(x)` returns `"sum"`. This function makes *collapse* smarter when parsing expressions in `fsummarise` and `fmutate` and deciding which ones to vectorize. # collapse 1.8.7 * Fixed a bug in `fscale.pdata.frame` where the default C++ method was being called instead of the list method (i.e. the method didn't work at all). * Fixed 2 minor rchk issues (the remaining ones are spurious). * `fsum` has an additional argument `fill = TRUE` (default `FALSE`) that initializes the result vector with `0` instead of `NA` when `na.rm = TRUE`, so that `fsum(NA, fill = TRUE)` gives `0` like `base::sum(NA, na.rm = TRUE)`. * Slight performance increase in `fmean` with groups if `na.rm = TRUE` (the default). * Significant performance improvement when using base R expressions involving multiple functions and one column e.g. `mid_col = (min(col) + max(col)) / 2` or `lorentz_col = cumsum(sort(col)) / sum(col)` etc. inside `fsummarise` and `fmutate`. Instead of evaluating such expressions on a data subset of one column for each group, they are now turned into a function e.g. `function(x) cumsum(sort(x)) / sum(x)` which is applied to a single vector split by groups. * `fsummarise` now also adds groupings to transformation functions and operators, which allows full vectorization of more complex tasks involving transformations which are subsequently aggregated. A prime example is grouped bivariate linear model fitting, which can now be done using `mtcars |> fgroup_by(cyl) |> fsummarise(slope = fsum(W(mpg), hp) / fsum(W(mpg)^2))`. Before 1.8.7 it was necessary to do a mutate step first e.g. `mtcars |> fgroup_by(cyl) |> fmutate(dm_mpg = W(mpg)) |> fsummarise(slope = fsum(dm_mpg, hp) / fsum(dm_mpg^2))`, because `fsummarise` did not add groupings to transformation functions like `fwithin/W`. Thanks to Brodie Gaslam for making me aware of this. * Argument `return.groups` from `GRP.default` is now also available in `fgroup_by`, allowing grouped data frames without materializing the unique grouping columns. This allows more efficient mutate-only operations e.g. `mtcars |> fgroup_by(cyl, return.groups = FALSE) |> fmutate(across(hp:carb, fscale))`. Similarly for aggregation with dropping of grouping columns `mtcars |> fgroup_by(cyl, return.groups = FALSE) |> fmean()` is equivalent and faster than `mtcars |> fgroup_by(cyl) |> fmean(keep.group_vars = FALSE)`. # collapse 1.8.6 * Fixed further minor issues: - some inline functions in TRA.c needed to be declared 'static' to be local in scope (#275) - timeid.Rd now uses *zoo* package conditionally and limits size of printout # collapse 1.8.5 * Fixed some issues flagged by CRAN: - Installation on some linux distributions failed because omp.h was included after Rinternals.h - Some signed integer overflows while running tests caused UBSAN warnings. (This happened inside a hash function where overflows are not a problem. I changed to unsigned int to avoid the UBSAN warning.) - Ensured that package passes R CMD Check without suggested packages # collapse 1.8.4 * Makevars text substitution hack to have CRAN accept a package that combines C, C++ and OpenMP. Thanks also to @MichaelChirico for pointing me in the right direction. # collapse 1.8.3 * Significant speed improvement in `qF/qG` (factor-generation) for character vectors with more than 100,000 obs and many levels if `sort = TRUE` (the default). For details see the `method` argument of `?qF`. * Optimizations in `fmode` and `fndistinct` for singleton groups. # collapse 1.8.2 * Fixed some rchk issues found by Thomas Kalibera from CRAN. * faster `funique.default` method. * `group` now also internally optimizes on 'qG' objects. # collapse 1.8.1 * Added function `fnunique` (yet another alternative to `data.table::uniqueN`, `kit::uniqLen` or `dplyr::n_distinct`, and principally a simple wrapper for `attr(group(x), "N.groups")`). At present `fnunique` generally outperforms the others on data frames. * `finteraction` has an additional argument `factor = TRUE`. Setting `factor = FALSE` returns a 'qG' object, which is more efficient if just an integer id but no factor object itself is required. * Operators (see `.OPERATOR_FUN`) have been improved a bit such that id-variables selected in the `.data.frame` (`by`, `w` or `t` arguments) or `.pdata.frame` methods (variables in the index) are not computed upon even if they are numeric (since the default is `cols = is.numeric`). In general, if `cols` is a function used to select columns of a certain data type, id variables are excluded from computation even if they are of that data type. It is still possible to compute on id variables by explicitly selecting them using names or indices passed to `cols`, or including them in the lhs of a formula passed to `by`. * Further efforts to facilitate adding the group-count in `fsummarise` and `fmutate`: - if `options(collapse_mask = "all")` before loading the package, an additional function `n()` is exported that works just like `dplyr:::n()`. - otherwise the same can now always be done using `GRPN()`. The previous uses of `GRPN` are unaltered i.e. `GRPN` can also: + fetch group sizes directly grouping object or grouped data frame i.e. `data |> gby(id) |> GRPN()` or `data %>% gby(id) %>% ftransform(N = GRPN(.))` (note the dot). + compute group sizes on the fly, for example `fsubset(data, GRPN(id) > 10L)` or `fsubset(data, GRPN(list(id1, id2)) > 10L)` or `GRPN(data, by = ~ id1 + id2)`. # collapse 1.8.0 *collapse* 1.8.0, released mid of May 2022, brings enhanced support for indexed computations on time series and panel data by introducing flexible 'indexed_frame' and 'indexed_series' classes and surrounding infrastructure, sets a modest start to OpenMP multithreading as well as data transformation by reference in statistical functions, and enhances the packages descriptive statistics toolset. ### Changes to functionality * Functions `Recode`, `replace_non_finite`, depreciated since *collapse* v1.1.0 and `is.regular`, depreciated since *collapse* v1.5.1 and clashing with a more important function in the *zoo* package, are now removed. * *Fast Statistical Functions* operating on numeric data (such as `fmean`, `fmedian`, `fsum`, `fmin`, `fmax`, ...) now preserve attributes in more cases. Previously these functions did not preserve attributes for simple computations using the default method, and only preserved attributes in grouped computations if `!is.object(x)` (see NEWS section for collapse 1.4.0). This meant that `fmin` and `fmax` did not preserve the attributes of Date or POSIXct objects, and none of these functions preserved 'units' objects (used a lot by the *sf* package). Now, attributes are preserved if `!inherits(x, "ts")`, that is the new default of these functions is to generally keep attributes, except for 'ts' objects where doing so obviously causes an unwanted error (note that 'xts' and others are handled by the matrix or data.frame method where other principles apply, see NEWS for 1.4.0). An exception are the functions `fnobs` and `fndistinct` where the previous default is kept. * *Time Series Functions* `flag`, `fdiff`, `fgrowth` and `psacf/pspacf/psccf` (and the operators `L/F/D/Dlog/G`) now internally process time objects passed to the `t` argument (where `is.object(t) && is.numeric(unclass(t))`) via a new function called `timeid` which turns them into integer vectors based on the greatest common divisor (GCD) (see below). Previously such objects were converted to factor. This can change behavior of code e.g. a 'Date' variable representing monthly data may be regular when converted to factor, but is now irregular and regarded as daily data (with a GCD of 1) because of the different day counts of the months. Users should fix such code by either by calling `qG` on the time variable (for grouping / factor-conversion) or using appropriate classes e.g. `zoo::yearmon`. Note that plain numeric vectors where `!is.object(t)` are still used directly for indexation without passing them through `timeid` (which can still be applied manually if desired). * `BY` now has an argument `reorder = TRUE`, which casts elements in the original order if `NROW(result) == NROW(x)` (like `fmutate`). Previously the result was just in order of the groups, regardless of the length of the output. To obtain the former outcome users need to set `reorder = FALSE`. * `options("collapse_DT_alloccol")` was removed, the default is now fixed at 100. The reason is that *data.table* automatically expands the range of overallocated columns if required (so the option is not really necessary), and calling R options from C slows down C code and can cause problems in parallel code. ### Bug Fixes * Fixed a bug in `fcumsum` that caused a segfault during grouped operations on larger data, due to flawed internal memory allocation. Thanks @Gulde91 for reporting #237. * Fixed a bug in `across` caused by two `function(x)` statements being passed in a list e.g. `mtcars |> fsummarise(acr(mpg, list(ssdd = function(x) sd(x), mu = function(x) mean(x))))`. Thanks @trang1618 for reporting #233. * Fixed an issue in `across()` when logical vectors were used to select column on grouped data e.g. `mtcars %>% gby(vs, am) %>% smr(acr(startsWith(names(.), "c"), fmean))` now works without error. * `qsu` gives proper output for length 1 vectors e.g. `qsu(1)`. * *collapse* depends on R > 3.3.0, due to the use of newer C-level macros introduced then. The earlier indication of R > 2.1.0 was only based on R-level code and misleading. Thanks @ben-schwen for reporting #236. I will try to maintain this dependency for as long as possible, without being too restrained by development in R's C API and the ALTREP system in particular, which *collapse* might utilize in the future. ### Additions * Introduction of 'indexed_frame','indexed_series' and 'index_df' classes: fast and flexible indexed time series and panel data classes that inherit from *plm*'s 'pdata.frame', 'pseries' and 'pindex' classes. These classes take full advantage of *collapse*'s computational infrastructure, are class-agnostic i.e. they can be superimposed upon any data frame or vector/matrix like object while maintaining most of the functionality of that object, support both time series and panel data, natively handle irregularity, and supports ad-hoc computations inside arbitrary data masking functions and model formulas. This infrastructure comprises of additional functions and methods, and modification of some existing functions and 'pdata.frame' / 'pseries' methods. - New functions: `findex_by/iby`, `findex/ix`, `unindex`, `reindex`, `is_irregular`, `to_plm`. - New methods: `[.indexed_series`, `[.indexed_frame`, `[<-.indexed_frame`, `$.indexed_frame`, `$<-.indexed_frame`, `[[.indexed_frame`, `[[<-.indexed_frame`, `[.index_df`, `fsubset.pseries`, `fsubset.pdata.frame`, `funique.pseries`, `funique.pdata.frame`, `roworder(v)` (internal) `na_omit` (internal), `print.indexed_series`, `print.indexed_frame`, `print.index_df`, `Math.indexed_series`, `Ops.indexed_series`. - Modification of 'pseries' and 'pdata.frame' methods for functions `flag/L/F`, `fdiff/D/Dlog`, `fgrowth/G`, `fcumsum`, `psmat`, `psacf/pspacf/psccf`, `fscale/STD`, `fbetween/B`, `fwithin/W`, `fhdbetween/HDB`, `fhdwithin/HDW`, `qsu` and `varying` to take advantage of 'indexed_frame' and 'indexed_series' while continuing to work as before with 'pdata.frame' and 'pseries'. For more information and details see `help("indexing")`. * Added function `timeid`: Generation of an integer-id/time-factor from time or date sequences represented by integer of double vectors (such as 'Date', 'POSIXct', 'ts', 'yearmon', 'yearquarter' or plain integers / doubles) by a numerically quite robust greatest common divisor method (see below). This function is used internally in `findex_by`, `reindex` and also in evaluation of the `t` argument to functions like `flag`/`fdiff`/`fgrowth` whenever `is.object(t) && is.numeric(unclass(t))` (see also note above). * Programming helper function `vgcd` to efficiently compute the greatest common divisor from a vector or positive integer or double values (which should ideally be unique and sorted as well, `timeid` uses `vgcd(sort(unique(diff(sort(unique(na_rm(x)))))))`). Precision for doubles is up to 6 digits. * Programming helper function `frange`: A significantly faster alternative to `base::range`, which calls both `min` and `max`. Note that `frange` inherits *collapse*'s global `na.rm = TRUE` default. * Added function `qtab/qtable`: A versatile and computationally more efficient alternative to `base::table`. Notably, it also supports tabulations with frequency weights, and computation of a statistic over combinations of variables. Objects are of class 'qtab' that inherits from 'table'. Thus all 'table' methods apply to it. * `TRA` was rewritten in C, and now has an additional argument `set = TRUE` which toggles data transformation by reference. The function `setTRA` was added as a shortcut which additionally returns the result invisibly. Since `TRA` is usually accessed internally through the like-named argument to *Fast Statistical Functions*, passing `set = TRUE` to those functions yields an internal call to `setTRA`. For example `fmedian(num_vars(iris), g = iris$Species, TRA = "-", set = TRUE)` subtracts the species-wise median from the numeric variables in the iris dataset, modifying the data in place and returning the result invisibly. Similarly the argument can be added in other workflows such as `iris |> fgroup_by(Species) |> fmutate(across(1:2, fmedian, set = TRUE))` or `mtcars |> ftransform(mpg = mpg %+=% hp, wt = fsd(wt, cyl, TRA = "replace_fill", set = TRUE))`. Note that such chains must be ended by `invisible()` if no printout is wanted. * Exported helper function `greorder`, the companion to `gsplit` to reorder output in `fmutate` (and now also in `BY`): let `g` be a 'GRP' object (or something coercible such as a vector) and `x` a vector, then `greorder` orders data in `y = unlist(gsplit(x, g))` such that `identical(greorder(y, g), x)`. ### Improvements * `fmean`, `fprod`, `fmode` and `fndistinct` were rewritten in C, providing performance improvements, particularly in `fmode` and `fndistinct`, and improvements for integers in `fmean` and `fprod`. * OpenMP multithreading in `fsum`, `fmean`, `fmedian`, `fnth`, `fmode` and `fndistinct`, implemented via an additional `nthreads` argument. The default is to use 1 thread, which internally calls a serial version of the code in `fsum` and `fmean` (thus no change in the default behavior). The plan is to slowly roll this out over all statistical functions and then introduce options to set alternative global defaults. Multi-threading internally works different for different functions, see the `nthreads` argument documentation of a particular function. Unfortunately I currently cannot guarantee thread safety, as parallelization of complex loops entails some tricky bugs and I have limited time to sort these out. So please report bugs, and if you happen to have experience with OpenMP please consider examining the code and making some suggestions. * `TRA` has an additional option `"replace_NA"`, e.g. `wlddev |> fgroup_by(iso3c) |> fmutate(across(PCGDP:POP, fmedian, TRA = "replace_NA"))` performs median value imputation of missing values. Similarly for a matrix `X <- matrix(na_insert(rnorm(1e7)), ncol = 100)`, `fmedian(X, TRA = "replace_NA", set = TRUE)` (column-wise median imputation by reference). * All *Fast Statistical Functions* support zero group sizes (e.g. grouping with a factor that has unused levels will always produce an output of length `nlevels(x)` with `0` or `NA` elements for the unused levels). Previously this produced an error message with counting/ordinal functions `fmode`, `fndistinct`, `fnth` and `fmedian`. * 'GRP' objects now also contain a 'group.starts' item in the 8'th slot that gives the first positions of the unique groups, and is returned alongside the groups whenever `return.groups = TRUE`. This now benefits `ffirst` when invoked with `na.rm = FALSE`, e.g. `wlddev %>% fgroup_by(country) %>% ffirst(na.rm = FALSE)` is now just as efficient as `funique(wlddev, cols = "country")`. Note that no additional computing cost is incurred by preserving the 'group.starts' information. * Conversion methods `GRP.factor`, `GRP.qG`, `GRP.pseries`, `GRP.pdata.frame` and `GRP.grouped_df` now also efficiently check if grouping vectors are sorted (the information is stored in the "ordered" element of 'GRP' objects). This leads to performance improvements in `gsplit` / `greorder` and dependent functions such as `BY` and `rsplit` if factors are sorted. * `descr()` received some performance improvements (up to 2x for categorical data), and has an additional argument `sort.table`, allowing frequency tables for categorical variables to be sorted by frequency (`"freq"`) or by table values (`"value"`). The new default is (`"freq"`), which presents tables in decreasing order of frequency. A method `[.descr` was added allowing 'descr' objects to be subset like a list. The print method was also enhanced, and by default now prints 14 values with the highest frequency and groups the remaining values into a single `... %s Others` category. Furthermore, if there are any missing values in the column, the percentage of values missing is now printed behind `Statistics `. Additional arguments `reverse` and `stepwise` allow printing in reverse order and/or one variable at a time. * `whichv` (and operators `%==%`, `%!=%`) now also support comparisons of equal-length arguments e.g. `1:3 %==% 1:3`. Note that this should not be used to compare 2 factors. * Added some code to the `.onLoad` function that checks for the existence of a `.fastverse` configuration file containing a setting for `_opt_collapse_mask`: If found the code makes sure that the option takes effect before the package is loaded. This means that inside projects using the *fastverse* and `options("collapse_mask")` to replace base R / *dplyr* functions, *collapse* cannot be loaded without the masking being applied, making it more secure to utilize this feature. For more information about function masking see `help("collapse-options")` and for `.fastverse` configuration files see the [fastverse vignette](https://fastverse.org/fastverse/articles/fastverse_intro.html#custom-fastverse-configurations-for-projects). * Added hidden `.list` methods for `fhdwithin/HDW` and `fhdbetween/HDB`. As for the other `.FAST_FUN` this is just a wrapper for the data frame method and meant to be used on unclassed data frames. * `ss()` supports unnamed lists / data frames. * The `t` and `w` arguments in 'grouped_df' methods (NSE) and where formula input is allowed, supports ad-hoc transformations. E.g. `wlddev %>% gby(iso3c) %>% flag(t = qG(date))` or `L(wlddev, 1, ~ iso3c, ~qG(date))`, similarly `qsu(wlddev, w = ~ log(POP))`, `wlddev %>% gby(iso3c) %>% collapg(w = log(POP))` or `wlddev %>% gby(iso3c) %>% nv() %>% fmean(w = log(POP))`. * Small improvements to `group()` algorithm, avoiding some cases where the hash function performed badly, particularly with integers. * Function `GRPnames` now has a `sep` argument to choose a separator other than `"."`. # collapse 1.7.6 * Corrected a C-level bug in `gsplit` that could lead R to crash in some instances (`gsplit` is used internally in `fsummarise`, `fmutate`, `BY` and `collap` to perform computations with base R (non-optimized) functions). * Ensured that `BY.grouped_df` always (by default) returns grouping columns in aggregations i.e. `iris |> gby(Species) |> nv() |> BY(sum)` now gives the same as `iris |> gby(Species) |> nv() |> fsum()`. * A `.` was added to the first argument of functions `fselect`, `fsubset`, `colorder` and `fgroup_by`, i.e. `fselect(x, ...) -> fselect(.x, ...)`. The reason for this is that over time I added the option to select-rename columns e.g. `fselect(mtcars, cylinders = cyl)`, which was not offered when these functions were created. This presents problems if columns should be renamed into `x`, e.g. `fselect(mtcars, x = cyl)` failed, see [#221](https://github.com/fastverse/collapse/issues/221). Renaming the first argument to `.x` somewhat guards against such situations. I think this change is worthwhile to implement, because it makes the package more robust going forward, and usually the first argument of these functions is never invoked explicitly. I really hope this breaks nobody's code. * Added a function `GRPN` to make it easy to add a column of group sizes e.g. `mtcars %>% fgroup_by(cyl,vs,am) %>% ftransform(Sizes = GRPN(.))` or `mtcars %>% ftransform(Sizes = GRPN(list(cyl, vs, am)))` or `GRPN(mtcars, by = ~cyl+vs+am)`. * Added `[.pwcor` and `[.pwcov`, to be able to subset correlation/covariance matrices without loosing the print formatting. # collapse 1.7.5 * Also ensuring tidyverse examples are in `\donttest{}` and building without the *dplyr* testing file to avoid issues with static code analysis on CRAN. * 20-50% Speed improvement in `gsplit` (and therefore in `fsummarise`, `fmutate`, `collap` and `BY` *when invoked with base R functions*) when grouping with `GRP(..., sort = TRUE, return.order = TRUE)`. To enable this by default, the default for argument `return.order` in `GRP` was set to `sort`, which retains the ordering vector (needed for the optimization). Retaining the ordering vector uses up some memory which can possibly adversely affect computations with big data, but with big data `sort = FALSE` usually gives faster results anyway, and you can also always set `return.order = FALSE` (also in `fgroup_by`, `collap`), so this default gives the best of both worlds. * An ancient depreciated argument `sort.row` (replaced by `sort` in 2020) is now removed from `collap`. Also arguments `return.order` and `method` were added to `collap` providing full control of the grouping that happens internally. # collapse 1.7.4 * Tests needed to be adjusted for the upcoming release of *dplyr* 1.0.8 which involves an API change in `mutate`. `fmutate` will not take over these changes i.e. `fmutate(..., .keep = "none")` will continue to work like `dplyr::transmute`. Furthermore, no more tests involving *dplyr* are run on CRAN, and I will also not follow along with any future *dplyr* API changes. * The C-API macro `installTrChar` (used in the new `massign` function) was replaced with `installChar` to maintain backwards compatibility with R versions prior to 3.6.0. Thanks @tedmoorman #213. * Minor improvements to `group()`, providing increased performance for doubles and also increased performance when the second grouping variable is integer, which turned out to be very slow in some instances. # collapse 1.7.3 * Removed tests involving the *weights* package (which is not available on R-devel CRAN checks). * `fgroup_by` is more flexible, supporting computing columns e.g. `fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10)` and various programming options e.g. `fgroup_by(GGDC10S, 1:3)`, `fgroup_by(GGDC10S, c("Variable", "Country"))`, or `fgroup_by(GGDC10S, is.character)`. You can also use column sequences e.g. `fgroup_by(GGDC10S, Country:Variable, Year)`, but this should not be mixed with computing columns. Compute expressions may also not include the `:` function. * More memory efficient attribute handling in C/C++ (using C-API macro `SHALLOW_DUPLICATE_ATTRIB` instead of `DUPLICATE_ATTRIB`) in most places. # collapse 1.7.2 * Ensured that the base pipe `|>` is not used in tests or examples, to avoid errors on CRAN checks with older versions of R. * Also adjusted `psacf` / `pspacf` / `psccf` to take advantage of the faster grouping by `group`. # collapse 1.7.1 * Fixed minor C/C++ issues flagged in CRAN checks. * Added option `ties = "last"` to `fmode`. * Added argument `stable.algo` to `qsu`. Setting `stable.algo = FALSE` toggles a faster calculation of the standard deviation, yielding 2x speedup on large datasets. * *Fast Statistical Functions* now internally use `group` for grouping data if both `g` and `TRA` arguments are used, yielding efficiency gains on unsorted data. * Ensured that `fmutate` and `fsummarise` can be called if *collapse* is not attached. # collapse 1.7.0 *collapse* 1.7.0, released mid January 2022, brings major improvements in the computational backend of the package, its data manipulation capabilities, and a whole set of new functions that enable more flexible and memory efficient R programming - significantly enhancing the language itself. For the vast majority of codes, updating to 1.7 should not cause any problems. ### Changes to functionality * `num_vars` is now implemented in C, yielding a massive performance increase over checking columns using `vapply(x, is.numeric, logical(1))`. It selects columns where `(is.double(x) || is.integer(x)) && !is.object(x)`. This provides the same results for most common classes found in data frames (e.g. factors and date columns are not numeric), however it is possible for users to define methods for `is.numeric` for other objects, which will not be respected by `num_vars` anymore. A prominent example are base R's 'ts' objects i.e. `is.numeric(AirPassengers)` returns `TRUE`, but `is.object(AirPassengers)` is also `TRUE` so the above yields `FALSE`, implying - if you happened to work with data frames of 'ts' columns - that `num_vars` will now not select those anymore. Please make me aware if there are other important classes that are found in data frames and where `is.numeric` returns `TRUE`. `num_vars` is also used internally in `collap` so this might affect your aggregations. * In `flag`, `fdiff` and `fgrowth`, if a plain numeric vector is passed to the `t` argument such that `is.double(t) && !is.object(t)`, it is coerced to integer using `as.integer(t)` and directly used as time variable, rather than applying ordered grouping first. This is to avoid the inefficiency of grouping, and owes to the fact that in most data imported into R with various packages, the time (year) variables are coded as double although they should be integer (I also don't know of any cases where time needs to be indexed by a non-date variable with decimal places). Note that the algorithm internally handles irregularity in the time variable so this is not a problem. Should this break any code, kindly raise an issue on GitHub. * The function `setrename` now truly renames objects by reference (without creating a shallow copy). The same is true for `vlabels<-` (which was rewritten in C) and a new function `setrelabel`. Thus additional care needs to be taken (with use inside functions etc.) as the renaming will take global effects unless a shallow copy of the data was created by some prior operation inside the function. If in doubt, better use `frename` which creates a shallow copy. * Some improvements to the `BY` function, both in terms of performance and security. Performance is enhanced through a new C function `gsplit`, providing split-apply-combine computing speeds competitive with *dplyr* on a much broader range of R objects. Regarding Security: if the result of the computation has the same length as the original data, names / rownames and grouping columns (for grouped data) are only added to the result object if known to be valid, i.e. if the data was originally sorted by the grouping columns (information recorded by `GRP.default(..., sort = TRUE)`, which is called internally on non-factor/GRP/qG objects). This is because `BY` does not reorder data after the split-apply-combine step (unlike `dplyr::mutate`); data are simply recombined in the order of the groups. Because of this, in general, `BY` should be used to compute summary statistics (unless data are sorted before grouping). The added security makes this explicit. * Added a method `length.GRP` giving the length of a grouping object. This could break code calling `length` on a grouping object before (which just returned the length of the list). * Functions renamed in collapse 1.6.0 will now print a message telling you to use the updated names. The functions under the old names will stay around for 1-3 more years. * The passing of argument `order` instead of `sort` in function `GRP` (from a very early version of collapse), is now disabled. ### Bug Fixes * Fixed a bug in some functions using Welfords Online Algorithm (`fvar`, `fsd`, `fscale` and `qsu`) to calculate variances, occurring when initial or final zero weights caused the running sum of weights in the algorithm to be zero, yielding a division by zero and `NA` as output although a value was expected. These functions now skip zero weights alongside missing weights, which also implies that you can pass a logical vector to the weights argument to very efficiently calculate statistics on a subset of data (e.g. using `qsu`). ### Additions #### Basic Computational Infrastructure * Function `group` was added, providing a low-level interface to a new unordered grouping algorithm based on hashing in C and optimized for R's data structures. The algorithm was heavily inspired by the great `kit` package of Morgan Jacob, and now feeds into the package through multiple central functions (including `GRP` / `fgroup_by`, `funique` and `qF`) when invoked with argument `sort = FALSE`. It is also used in internal groupings performed in data transformation functions such as `fwithin` (when no factor or 'GRP' object is provided to the `g` argument). The speed of the algorithm is very promising (often superior to `radixorder`), and it could be used in more places still. I welcome any feedback on its performance on different datasets. * Function `gsplit` provides an efficient alternative to `split` based on grouping objects. It is used as a new backend to `rsplit` (which also supports data frame) as well as `BY`, `collap`, `fsummarise` and `fmutate` - for more efficient grouped operations with functions external to the package. * Added multiple functions to facilitate memory efficient programming (written in C). These include elementary mathematical operations by reference (`setop`, `%+=%`, `%-=%`, `%*=%`, `%/=%`), supporting computations involving integers and doubles on vectors, matrices and data frames (including row-wise operations via `setop`) with no copies at all. Furthermore a set of functions which check a single value against a vector without generating logical vectors: `whichv`, `whichNA` (operators `%==%` and `%!=%` which return indices and are significantly faster than `==`, especially inside functions like `fsubset`), `anyv` and `allv` (`allNA` was already added before). Finally, functions `setv` and `copyv` speed up operations involving the replacement of a value (`x[x == 5] <- 6`) or of a sequence of values from a equally sized object (`x[x == 5] <- y[x == 5]`, or `x[ind] <- y[ind]` where `ind` could be pre-computed vectors or indices) in vectors and data frames without generating any logical vectors or materializing vector subsets. * Function `vlengths` was added as a more efficient alternative to `lengths` (without method dispatch, simply coded in C). * Function `massign` provides a multivariate version of `assign` (written in C, and supporting all basic vector types). In addition the operator `%=%` was added as an efficient multiple assignment operator. (It is called `%=%` and not `%<-%` to facilitate the translation of Matlab or Python codes into R, and because the [zeallot]() package already provides multiple-assignment operators (`%<-%` and `%->%`), which are significantly more versatile, but orders of magnitude slower than `%=%`) #### High-Level Features * Fully fledged `fmutate` function that provides functionality analogous to `dplyr::mutate` (sequential evaluation of arguments, including arbitrary tagged expressions and `across` statements). `fmutate` is optimized to work together with the packages *Fast Statistical and Data Transformation Functions*, yielding fast, vectorized execution, but also benefits from `gsplit` for other operations. * `across()` function implemented for use inside `fsummarise` and `fmutate`. It is also optimized for *Fast Statistical and Data Transformation Functions*, but performs well with other functions too. It has an additional arguments `.apply = FALSE` which will apply functions to the entire subset of the data instead of individual columns, and thus allows for nesting tibbles and estimating models or correlation matrices by groups etc.. `across()` also supports an arbitrary number of additional arguments which are split and evaluated by groups if necessary. Multiple `across()` statements can be combined with tagged vector expressions in a single call to `fsummarise` or `fmutate`. Thus the computational framework is pretty general and similar to *data.table*, although less efficient with big datasets. * Added functions `relabel` and `setrelabel` to make interactive dealing with variable labels a bit easier. Note that both functions operate by reference. (Through `vlabels<-` which is implemented in C. Taking a shallow copy of the data frame is useless in this case because variable labels are attributes of the columns, not of the frame). The only difference between the two is that `setrelabel` returns the result invisibly. * function shortcuts `rnm` and `mtt` added for `frename` and `fmutate`. `across` can also be abbreviated using `acr`. * Added two options that can be invoked before loading of the package to change the namespace: `options(collapse_mask = c(...))` can be set to export copies of selected (or all) functions in the package that start with `f` removing the leading `f` e.g. `fsubset` -> `subset` (both `fsubset` and `subset` will be exported). This allows masking base R and dplyr functions (even basic functions such as `sum`, `mean`, `unique` etc. if desired) with *collapse*'s fast functions, facilitating the optimization of existing codes and allowing you to work with *collapse* using a more natural namespace. The package has been internally insulated against such changes, but of course they might have major effects on existing codes. Also `options(collapse_F_to_FALSE = FALSE)` can be invoked to get rid of the lead operator `F`, which masks `base::F` (an issue raised by some people who like to use `T`/`F` instead of `TRUE`/`FALSE`). Read the help page `?collapse-options` for more information. ### Improvements * Package loads faster (because I don't fetch functions from some other C/C++ heavy packages in `.onLoad` anymore, which implied unnecessary loading of a lot of DLLs). * `fsummarise` is now also fully featured supporting evaluation of arbitrary expressions and `across()` statements. Note that mixing *Fast Statistical Functions* with other functions in a single expression can yield unintended outcomes, read more at `?fsummarise`. * `funique` benefits from `group` in the default `sort = FALSE`, configuration, providing extra speed and unique values in first-appearance order in both the default and the data frame method, for all data types. * Function `ss` supports both empty `i` or `j`. * The printout of `fgroup_by` also shows minimum and maximum group size for unbalanced groupings. * In `ftransformv/settransformv` and `fcomputev`, the `vars` argument is also evaluated inside the data frame environment, allowing NSE specifications using column names e.g. `ftransformv(data, c(col1, col2:coln), FUN)`. * `qF` with option `sort = FALSE` now generates factors with levels in first-appearance order (instead of a random order assigned by the hash function), and can also be called on an existing factor to recast the levels in first-appearance order. It is also faster with `sort = FALSE` (thanks to `group`). * `finteraction` has argument `sort = FALSE` to also take advantage of `group`. * `rsplit` has improved performance through `gsplit`, and an additional argument `use.names`, which can be used to return an unnamed list. * Speedup in `vtypes` and functions `num_vars`, `cat_vars`, `char_vars`, `logi_vars` and `fact_vars`. Note than `num_vars` behaves slightly differently as discussed above. * `vlabels(<-)` / `setLabels` rewritten in C, giving a ~20x speed improvement. Note that they now operate by reference. * `vlabels`, `vclasses` and `vtypes` have a `use.names` argument. The default is `TRUE` (as before). * `colorder` can rename columns on the fly and also has a new mode `pos = "after"` to place all selected columns after the first selected one, e.g.: `colorder(mtcars, cyl, vs_new = vs, am, pos = "after")`. The `pos = "after"` option was also added to `roworderv`. + `add_stub` and `rm_stub` have an additional `cols` argument to apply a stub to certain columns only e.g. `add_stub(mtcars, "new_", cols = 6:9)`. * `namlab` has additional arguments `N` and `Ndistinct`, allowing to display number of observations and distinct values next to variable names, labels and classes, to get a nice and quick overview of the variables in a large dataset. * `copyMostAttrib` only copies the `"row.names"` attribute when known to be valid. * `na_rm` can now be used to efficiently remove empty or `NULL` elements from a list. * `flag`, `fdiff` and `fgrowth` produce less messages (i.e. no message if you don't use a time variable in grouped operations, and messages about computations on highly irregular panel data only if data length exceeds 10 million obs.). * The print methods of `pwcor` and `pwcov` now have a `return` argument, allowing users to obtain the formatted correlation matrix, for exporting purposes. * `replace_NA`, `recode_num` and `recode_char` have improved performance and an additional argument `set` to take advantage of `setv` to change (some) data by reference. For `replace_NA`, this feature is mature and setting `set = TRUE` will modify all selected columns in place and return the data invisibly. For `recode_num` and `recode_char` only a part of the transformations are done by reference, thus users will still have to assign the data to preserve changes. In the future, this will be improved so that `set = TRUE` toggles all transformations to be done by reference. # collapse 1.6.5 * Use of `VECTOR_PTR` in C API now gives an error on R-devel even if `USE_RINTERNALS` is defined. Thus this patch gets rid of all remaining usage of this macro to avoid errors on CRAN checks using the development version of R. * The print method for `qsu` now uses an apostrophe (') to designate million digits, instead of a comma (,). This is to avoid confusion with the decimal point, and the typical use of (,) for thousands (which I don't like). # collapse 1.6.4 Checks on the gcc11 compiler flagged an additional issue with a pointer pointing to element -1 of a C array (which I had done on purpose to index it with an R integer vector). # collapse 1.6.3 CRAN checks flagged a valgrind issue because of comparing an uninitialized value to something. # collapse 1.6.2 CRAN maintainers have asked me to remove a line in a Makevars file intended to reduce the size of Rcpp object files (which has been there since version 1.4). So the installed size of the package may now be larger. # collapse 1.6.1 A patch for 1.6.0 which fixes issues flagged by CRAN and adds a few handy extras. ### Bug Fixes * Puts examples using the new base pipe `|>` inside `\donttest{}` so that they don't fail CRAN tests on older R versions. * Fixes a LTO issue caused by a small mistake in a header file (which does not have any implications to the user but was detected by CRAN checks). ### Additions * Added a function `fcomputev`, which allows selecting columns and transforming them with a function in one go. The `keep` argument can be used to add columns to the selection that are not transformed. * Added a function `setLabels` as a wrapper around `vlabels<-` to facilitate setting variable labels inside pipes. * Function `rm_stub` now has an argument `regex = TRUE` which triggers a call to `gsub` and allows general removing of character sequences in column names on the fly. ### Improvements * `vlabels<-` and `setLabels` now support list of variable labels or other attributes (i.e. the `value` is internally subset using `[[`, not `[`). Thus they are now general functions to attach a vector or list of attributes to columns in a list / data frame. # collapse 1.6.0 *collapse* 1.6.0, released end of June 2021, presents some significant improvements in the user-friendliness, compatibility and programmability of the package, as well as a few function additions. ### Changes to Functionality * `ffirst`, `flast`, `fnobs`, `fsum`, `fmin` and `fmax` were rewritten in C. The former three now also support list columns (where `NULL` or empty list elements are considered missing values when `na.rm = TRUE`), and are extremely fast for grouped aggregation if `na.rm = FALSE`. The latter three also support and return integers, with significant performance gains, even compared to base R. Code using these functions expecting an error for list-columns or expecting double output even if the input is integer should be adjusted. * *collapse* now directly supports *sf* data frames through functions like `fselect`, `fsubset`, `num_vars`, `qsu`, `descr`, `varying`, `funique`, `roworder`, `rsplit`, `fcompute` etc., which will take along the geometry column even if it is not explicitly selected (mirroring *dplyr* methods for *sf* data frames). This is mostly done internally at C-level, so functions remain simple and fast. Existing code that explicitly selects the geometry column is unaffected by the change, but code of the form `sf_data %>% num_vars %>% qDF %>% ...`, where columns excluding geometry were selected and the object later converted to a data frame, needs to be rewritten as `sf_data %>% qDF %>% num_vars %>% ...`. A short vignette was added describing the integration of *collapse* and *sf*. * I've received several requests for increased namespace consistency. *collapse* functions were named to be consistent with base R, *dplyr* and *data.table*, resulting in names like `is.Date`, `fgroup_by` or `settransformv`. To me this makes sense, but I've been convinced that a bit more consistency is advantageous. Towards that end I have decided to eliminate the '.' notation of base R and to remove some unexpected capitalizations in function names giving some people the impression I was using camel-case. The following functions are renamed: `fNobs` -> `fnobs`, `fNdistinct` -> `fndistinct`, `pwNobs` -> `pwnobs`, `fHDwithin` -> `fhdwithin`, `fHDbetween` -> `fhdbetween`, `as.factor_GRP` -> `as_factor_GRP`, `as.factor_qG` -> `as_factor_qG`, `is.GRP` -> `is_GRP`, `is.qG` -> `is_qG`, `is.unlistable` -> `is_unlistable`, `is.categorical` -> `is_categorical`, `is.Date` -> `is_date`, `as.numeric_factor` -> `as_numeric_factor`, `as.character_factor` -> `as_character_factor`, `Date_vars` -> `date_vars`. This is done in a very careful manner, the others will stick around for a long while (end of 2022), and the generics of `fNobs`, `fNdistinct`, `fHDbetween` and `fHDwithin` will be kept in the package for an indeterminate period, but their core methods will not be exported beyond 2022. I will start warning about these renamed functions in 2022. In the future I will undogmatically stick to a function naming style with lowercase function names and underslashes where words need to be split. Other function names will be kept. To say something about this: The quick-conversion functions `qDF` `qDT`, `qM`, `qF`, `qG` are consistent and in-line with *data.table* (`setDT` etc.), and similarly the operators `L`, `F`, `D`, `Dlog`, `G`, `B`, `W`, `HDB`, `HDW`. I'll keep `GRP`, `BY` and `TRA`, for lack of better names, parsimony and because they are central to the package. The camel case will be kept in helper functions `setDimnames` etc. because they work like *stats* `setNames` and do not modify the argument by reference (like `settransform` or `setrename` and various *data.table* functions). Functions `copyAttrib` and `copyMostAttrib` are exports of like-named functions in the C API and thus kept as they are. Finally, I want to keep `fFtest` the way it is because the F-distribution is widely recognized by a capital F. * I've updated the `wlddev` dataset with the latest data from the World Bank, and also added a variable giving the total population (which may be useful e.g. for population-weighted aggregations across regions). The extra column could invalidate codes used to demonstrate something (I had to adjust some examples, tests and code in vignettes). ### Additions * Added a function `fcumsum` (written in C), permitting flexible (grouped, ordered) cumulative summations on matrix-like objects (integer or double typed) with extra methods for grouped data frames and panel series and data frames. Apart from the internal grouping, and an ordering argument allowing cumulative sums in a different order than data appear, `fcumsum` has 2 options to deal with missing values. The default (`na.rm = TRUE`) is to skip (preserve) missing values, whereas setting `fill = TRUE` allows missing values to be populated with the previous value of the cumulative sum (starting from 0). * Added a function `alloc` to efficiently generate vectors initialized with any value (faster than `rep_len`). * Added a function `pad` to efficiently pad vectors / matrices / data.frames with a value (default is `NA`). This function was mainly created to make it easy to expand results coming from a statistical model fitted on data with missing values to the original length. For example let `data <- na_insert(mtcars); mod <- lm(mpg ~ cyl, data)`, then we can do `settransform(data, resid = pad(resid(mod), mod$na.action))`, or we could do `pad(model.matrix(mod), mod$na.action)` or `pad(model.frame(mod), mod$na.action)` to receive matrices and data frames from model data matching the rows of `data`. `pad` is a general function that will also work with mixed-type data. It is also possible to pass a vector of indices matching the rows of the data to `pad`, in which case `pad` will fill gaps in those indices with a value/row in the data. ### Improvements * Full *data.table* support, including reference semantics (`set*`, `:=`)!! There is some complex C-level programming behind *data.table*'s operations by reference. Notably, additional (hidden) column pointers are allocated to be able to add columns without taking a shallow copy of the *data.table*, and an `".internal.selfref"` attribute containing an external pointer is used to check if any shallow copy was made using base R commands like `<-`. This is done to avoid even a shallow copy of the *data.table* in manipulations using `:=` (and is in my opinion not worth it as even large tables are shallow copied by base R (>=3.1.0) within microseconds and all of this complicates development immensely). Previously, *collapse* treated *data.table*'s like any other data frame, using shallow copies in manipulations and preserving the attributes (thus ignoring how *data.table* works internally). This produced a warning whenever you wanted to use *data.table* reference semantics (`set*`, `:=`) after passing the *data.table* through a *collapse* function such as `collap`, `fselect`, `fsubset`, `fgroup_by` etc. From v1.6.0, I have adopted essential C code from *data.table* to do the overallocation and generate the `".internal.selfref"` attribute, thus seamless workflows combining *collapse* and *data.table* are now possible. This comes at a cost of about 2-3 microseconds per function, as to do this I have to shallow copy the *data.table* again and add extra column pointers and an `".internal.selfref"` attribute telling *data.table* that this table was not copied (it seems to be the only way to do it for now). This integration encompasses all data manipulation functions in *collapse*, but not the *Fast Statistical Functions* themselves. Thus you can do `agDT <- DT %>% fselect(id, col1:coln) %>% collap(~id, fsum); agDT[, newcol := 1]`, but you would need to do add a `qDT` after a function like `fsum` if you want to use reference semantics without incurring a warning: `agDT <- DT %>% fselect(id, col1:coln) %>% fgroup_by(id) %>% fsum %>% qDT; agDT[, newcol := 1]`. *collapse* appears to be the first package that attempts to account for *data.table*'s internal working without importing *data.table*, and `qDT` is now the fastest way to create a fully functional *data.table* from any R object. A global option `"collapse_DT_alloccol"` was added to regulate how many columns *collapse* overallocates when creating *data.table*'s. The default is 100, which is lower than the *data.table* default of 1024. This was done to increase efficiency of the additional shallow copies, and may be changed by the user. * Programming enabled with `fselect` and `fgroup_by` (you can now pass vectors containing column names or indices). Note that instead of `fselect` you should use `get_vars` for standard eval programming. * `fselect` and `fsubset` support in-place renaming, e.g. `fselect(data, newname = var1, var3:varN)`, `fsubset(data, vark > varp, newname = var1, var3:varN)`. * `collap` supports renaming columns in the custom argument, e.g. `collap(data, ~ id, custom = list(fmean = c(newname = "var1", "var2"), fmode = c(newname = 3), flast = is_date))`. * Performance improvements: `fsubset` / `ss` return the data or perform a simple column subset without deep copying the data if all rows are selected through a logical expression. `fselect` and `get_vars`, `num_vars` etc. are slightly faster through data frame subsetting done fully in C. `ftransform` / `fcompute` use `alloc` instead of `base::rep` to replicate a scalar value which is slightly more efficient. * `fcompute` now has a `keep` argument, to preserve several existing columns when computing columns on a data frame. * `replace_NA` now has a `cols` argument, so we can do `replace_NA(data, cols = is.numeric)`, to replace `NA`'s in numeric columns. I note that for big numeric data `data.table::setnafill` is the most efficient solution. * `fhdbetween` and `fhdwithin` have an `effect` argument in *plm* methods, allowing centering on selected identifiers. The default is still to center on all panel identifiers. * The plot method for panel series matrices and arrays `plot.psmat` was improved slightly. It now supports custom colours and drawing of a grid. * `settransform` and `settransformv` can now be called without attaching the package e.g. `collapse::settransform(data, ...)`. These errored before when *collapse* is not loaded because they are simply wrappers around `data <- ftransform(data, ...)`. I'd like to note from a [discussion](https://github.com/fastverse/collapse/issues/136) that avoiding shallow copies with `<-` (e.g. via `:=`) does not appear to yield noticeable performance gains. Where *data.table* is faster on big data this mostly has to do with parallelism and sometimes with algorithms, generally not memory efficiency. * Functions `setAttrib`, `copyAttrib` and `copyMostAttrib` only make a shallow copy of lists, not of atomic vectors (which amounts to doing a full copy and is inefficient). Thus atomic objects are now modified in-place. * Small improvements: Calling `qF(x, ordered = FALSE)` on an ordered factor will remove the ordered class, the operators `L`, `F`, `D`, `Dlog`, `G`, `B`, `W`, `HDB`, `HDW` and functions like `pwcor` now work on unnamed matrices or data frames. # collapse 1.5.3 * A test that occasionally fails on Mac is removed, and all unit testing is now removed from CRAN. *collapse* has close to 10,000 unit tests covering all central pieces of code. Half of these tests depend on generated data, and for some reasons there is always a test or two that occasionally fail on some operating system (usually not Windows), requiring me to submit a patch. This is not constructive to either the development or the use of this package, therefore tests are now removed from CRAN. They are still run on codecov.io, and every new release is thoroughly tested on Windows. # collapse 1.5.2 ### Changes to Functionality * The first argument of `ftransform` was renamed to `.data` from `X`. This was done to enable the user to transform columns named "X". For the same reason the first argument of `frename` was renamed to `.x` from `x` (not `.data` to make it explicit that `.x` can be any R object with a "names" attribute). It is not possible to depreciate `X` and `x` without at the same time undoing the benefits of the argument renaming, thus this change is immediate and code breaking in rare cases where the first argument is explicitly set. * The function `is.regular` to check whether an R object is atomic or list-like is depreciated and will be removed before the end of the year. This was done to avoid a namespace clash with the *zoo* package (#127). ### Bug Fixes * `unlist2d` produced a subsetting error if an empty list was present in the list-tree. This is now fixed, empty or `NULL` elements in the list-tree are simply ignored (#99). ### Additions * A function `fsummarize` was added to facilitate translating *dplyr* / *data.table* code to *collapse*. Like `collap`, it is only very fast when used with the *Fast Statistical Functions*. * A function `t_list` is made available to efficiently transpose lists of lists. ### Improvements * C files are compiled -O3 on Windows, which gives a boost of around 20% for the grouping mechanism applied to character data. # collapse 1.5.1 A small patch for 1.5.0 that: * Fixes a numeric precision issue when grouping doubles (e.g. before `qF(wlddev$LIFEEX)` gave an error, now it works). * Fixes a minor issue with `fhdwithin` when applied to *pseries* and `fill = FALSE`. # collapse 1.5.0 *collapse* 1.5.0, released early January 2021, presents important refinements and some additional functionality. ### Back to CRAN * I apologize for inconveniences caused by the temporal archival of *collapse* from December 19, 2020. This archival was caused by the archival of the important *lfe* package on the 4th of December. *collapse* depended on *lfe* for higher-dimensional centering, providing the `fhdbetween / fhdwithin` functions for generalized linear projecting / partialling out. To remedy the damage caused by the removal of *lfe*, I had to rewrite `fhdbetween / fhdwithin` to take advantage of the demeaning algorithm provided by *fixest*, which has some quite different mechanics. Beforehand, I made some significant changes to `fixest::demean` itself to make this integration happen. The CRAN deadline was the 18th of December, and I realized too late that I would not make this. A request to CRAN for extension was declined, so *collapse* got archived on the 19th. I have learned from this experience, and *collapse* is now sufficiently insulated that it will not be taken off CRAN even if all suggested packages were removed from CRAN. ### Bug Fixes * Segfaults in several *Fast Statistical Functions* when passed `numeric(0)` are fixed (thanks to @eshom and @acylam, [#101](https://github.com/fastverse/collapse/issues/101)). The default behavior is that all *collapse* functions return `numeric(0)` again, except for `fnobs`, `fndistinct` which return `0L`, and `fvar`, `fsd` which return `NA_real_`. ### Changes to Functionality * Functions `fhdwithin / HDW` and `fhdbetween / HDB` have been reworked, delivering higher performance and greater functionality: For higher-dimensional centering and heterogeneous slopes, the `demean` function from the *fixest* package is imported (conditional on the availability of that package). The linear prediction and partialling out functionality is now built around `flm` and also allows for weights and different fitting methods. * In `collap`, the default behavior of `give.names = "auto"` was altered when used together with the `custom` argument. Before the function name was always added to the column names. Now it is only added if a column is aggregated with two different functions. I apologize if this breaks any code dependent on the new names, but this behavior just better reflects most common use (applying only one function per column), as well as STATA's collapse. * For list processing functions like `get_elem`, `has_elem` etc. the default for the argument `DF.as.list` was changed from `TRUE` to `FALSE`. This means if a nested lists contains data frame's, these data frame's will not be searched for matching elements. This default also reflects the more common usage of these functions (extracting entire data frame's or computed quantities from nested lists rather than searching / subsetting lists of data frame's). The change also delivers a considerable performance gain. * Vignettes were outsourced to the [website](). This nearly halves the size of the source package, and should induce users to appreciate the built-in documentation. The website also makes for much more convenient reading and navigation of these book-style vignettes. ### Additions * Added a set of 10 operators `%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`, `%cr%`, `%c+%`, `%c-%`, `%c*%`, `%c/%` to facilitate and speed up row- and column-wise arithmetic operations involving a vector and a matrix / data frame / list. For example `X %r*% v` efficiently multiplies every row of `X` with `v`. Note that more advanced functionality is already provided in `TRA()`, `dapply()` and the *Fast Statistical Functions*, but these operators are intuitive and very convenient to use in matrix or matrix-style code, or in piped expressions. * Added function `missing_cases` (opposite of `complete.cases` and faster for data frame's / lists). * Added function `allNA` for atomic vectors. * New vignette about using *collapse* together with *data.table*, available [online](). ### Improvements * Time series functions and operators `flag / L / F`, `fdiff / D / Dlog` and `fgrowth / G` now natively support irregular time series and panels, and feature a 'complete approach' i.e. values are shifted around taking full account of the underlying time-dimension! * Functions `pwcor` and `pwcov` can now compute weighted correlations on the pairwise or complete observations, supported by C-code that is (conditionally) imported from the *weights* package. * `fFtest` now also supports weights. * `collap` now provides an easy workaround to aggregate some columns using weights and others without. The user may simply append the names of *Fast Statistical Functions* with `_uw` to disable weights. Example: `collapse::collap(mtcars, ~ cyl, custom = list(fmean_uw = 3:4, fmean = 8:10), w = ~ wt)` aggregates columns 3 through 4 using a simple mean and columns 8 through 10 using the weighted mean. * The parallelism in `collap` using `parallel::mclapply` has been reworked to operate at the column-level, and not at the function level as before. It is still not available for Windows though. The default number of cores was set to `mc.cores = 2L`, which now gives an error on windows if `parallel = TRUE`. * function `recode_char` now has additional options `ignore.case` and `fixed` (passed to `grepl`), for enhanced recoding character data based on regular expressions. * `rapply2d` now has `classes` argument permitting more flexible use. * `na_rm` and some other internal functions were rewritten in C. `na_rm` is now 2x faster than `x[!is.na(x)]` with missing values and 10x faster without missing values. # collapse 1.4.2 * An improvement to the `[.GRP_df` method enabling the use of most *data.table* methods (such as `:=`) on a grouped *data.table* created with `fgroup_by`. * Some documentation updates by Kevin Tappe. # collapse 1.4.1 collapse 1.4.1 is a small patch for 1.4.0 that: * fixes clang-UBSAN and rchk issues in 1.4.0 (minor bugs in compiled code resulting, in this case, from trying to coerce a `NaN` value to integer, and failing to protect a shallow copy of a variable). * Adds a method `[.GRP_df` that allows robust subsetting of grouped objects created with `fgroup_by` (thanks to Patrice Kiener for flagging this). # collapse 1.4.0 *collapse* 1.4.0, released early November 2020, presents some important refinements, particularly in the domain of attribute handling, as well as some additional functionality. The changes make *collapse* smarter, more broadly compatible and more secure, and should not break existing code. ### Changes to Functionality * *Deep Matrix Dispatch / Extended Time Series Support:* The default methods of all statistical and transformation functions dispatch to the matrix method if `is.matrix(x) && !inherits(x, "matrix")` evaluates to `TRUE`. This specification avoids invoking the default method on classed matrix-based objects (such as multivariate time series of the *xts* / *zoo* class) not inheriting a 'matrix' class, while still allowing the user to manually call the default method on matrices (objects with implicit or explicit 'matrix' class). The change implies that *collapse*'s generic statistical functions are now well suited to transform *xts* / *zoo* and many other time series and matrix-based classes. * *Fully Non-Destructive Piped Workflow:* `fgroup_by(x, ...)` now only adds a class *grouped_df*, not classes *table_df*, *tbl*, *grouped_df*, and preserves all classes of `x`. This implies that workflows such as `x %>% fgroup_by(...) %>% fmean` etc. yields an object `xAG` of the same class and attributes as `x`, not a tibble as before. *collapse* aims to be as broadly compatible, class-agnostic and attribute preserving as possible. * *Thorough and Controlled Object Conversions:* Quick conversion functions `qDF`, `qDT` and `qM` now have additional arguments `keep.attr` and `class` providing precise user control over object conversions in terms of classes and other attributes assigned / maintained. The default (`keep.attr = FALSE`) yields *hard* conversions removing all but essential attributes from the object. E.g. before `qM(EuStockMarkets)` would just have returned `EuStockMarkets` (because `is.matrix(EuStockMarkets)` is `TRUE`) whereas now the time series class and 'tsp' attribute are removed. `qM(EuStockMarkets, keep.attr = TRUE)` returns `EuStockMarkets` as before. * *Smarter Attribute Handling:* Drawing on the guidance given in the R Internals manual, the following standards for optimal non-destructive attribute handling are formalized and communicated to the user: + The default and matrix methods of the *Fast Statistical Functions* preserve attributes of the input in grouped aggregations ('names', 'dim' and 'dimnames' are suitably modified). If inputs are classed objects (e.g. factors, time series, checked by `is.object`), the class and other attributes are dropped. Simple (non-grouped) aggregations of vectors and matrices do not preserve attributes, unless `drop = FALSE` in the matrix method. An exemption is made in the default methods of functions `ffirst`, `flast` and `fmode`, which always preserve the attributes (as the input could well be a factor or date variable). + The data frame methods are unaltered: All attributes of the data frame and columns in the data frame are preserved unless the computation result from each column is a scalar (not computing by groups) and `drop = TRUE` (the default). + Transformations with functions like `flag`, `fwithin`, `fscale` etc. are also unaltered: All attributes of the input are preserved in the output (regardless of whether the input is a vector, matrix, data.frame or related classed object). The same holds for transformation options modifying the input ("-", "-+", "/", "+", "\*", "%%", "-%%") when using `TRA()` function or the `TRA = "..."` argument to the *Fast Statistical Functions*. + For `TRA` 'replace' and 'replace_fill' options, the data type of the STATS is preserved, not of x. This provides better results particularly with functions like `fnobs` and `fndistinct`. E.g. previously `fnobs(letters, TRA = "replace")` would have returned the observation counts coerced to character, because `letters` is character. Now the result is integer typed. For attribute handling this means that the attributes of x are preserved unless x is a classed object and the data types of x and STATS do not match. An exemption to this rule is made if x is a factor and an integer (non-factor) replacement is offered to STATS. In that case the attributes of x are copied exempting the 'class' and 'levels' attribute, e.g. so that `fnobs(iris$Species, TRA = "replace")` gives an integer vector, not a (malformed) factor. In the unlikely event that STATS is a classed object, the attributes of STATS are preserved and the attributes of x discarded. * *Reduced Dependency Burden:* The dependency on the *lfe* package was made optional. Functions `fhdwithin` / `fhdbetween` can only perform higher-dimensional centering if *lfe* is available. Linear prediction and centering with a single factor (among a list of covariates) is still possible without installing *lfe*. This change means that *collapse* now only depends on base R and *Rcpp* and is supported down to R version 2.10. ### Additions * Added function `rsplit` for efficient (recursive) splitting of vectors and data frames. * Added function `fdroplevels` for very fast missing level removal + added argument `drop` to `qF` and `GRP.factor`, the default is `drop = FALSE`. The addition of `fdroplevels` also enhances the speed of the `fFtest` function. * `fgrowth` supports annualizing / compounding growth rates through added `power` argument. * A function `flm` was added for bare bones (weighted) linear regression fitting using different efficient methods: 4 from base R (`.lm.fit`, `solve`, `qr`, `chol`), using `fastLm` from *RcppArmadillo* (if installed), or `fastLm` from *RcppEigen* (if installed). * Added function `qTBL` to quickly convert R objects to tibble. * helpers `setAttrib`, `copyAttrib` and `copyMostAttrib` exported for fast attribute handling in R (similar to `attributes<-()`, these functions return a shallow copy of the first argument with the set of attributes replaced, but do not perform checks for attribute validity like `attributes<-()`. This can yield large performance gains with big objects). * helper `cinv` added wrapping the expression `chol2inv(chol(x))` (efficient inverse of a symmetric, positive definite matrix via Choleski factorization). * A shortcut `gby` is now available to abbreviate the frequently used `fgroup_by` function. * A print method for grouped data frames of any class was added. ### Improvements * Faster internal methods for factors for `funique`, `fmode` and `fndistinct`. * The *grouped_df* methods for `flag`, `fdiff`, `fgrowth` now also support multiple time variables to identify a panel e.g. `data %>% fgroup_by(region, person_id) %>% flag(1:2, list(month, day))`. * More security features for `fsubset.data.frame` / `ss`, `ss` is now internal generic and also supports subsetting matrices. * In some functions (like `na_omit`), passing double values (e.g. `1` instead of integer `1L`) or negative indices to the `cols` argument produced an error or unexpected behavior. This is now fixed in all functions. * Fixed a bug in helper function `all_obj_equal` occurring if objects are not all equal. * Some performance improvements through increased use of pointers and C API functions. # collapse 1.3.2 collapse 1.3.2, released mid September 2020: * Fixed a small bug in `fndistinct` for grouped distinct value counts on logical vectors. * Additional security for `ftransform`, which now efficiently checks the names of the data and replacement arguments for uniqueness, and also allows computing and transforming list-columns. * Added function `ftransformv` to facilitate transforming selected columns with function - a very efficient replacement for `dplyr::mutate_if` and `dplyr::mutate_at`. * `frename` now allows additional arguments to be passed to a renaming function. # collapse 1.3.1 collapse 1.3.1, released end of August 2020, is a patch for v1.3.0 that takes care of some unit test failures on certain operating systems (mostly because of numeric precision issues). It provides no changes to the code or functionality. # collapse 1.3.0 collapse 1.3.0, released mid August 2020: ### Changes to Functionality * `dapply` and `BY` now drop all unnecessary attributes if `return = "matrix"` or `return = "data.frame"` are explicitly requested (the default `return = "same"` still seeks to preserve the input data structure). * `unlist2d` now saves integer rownames if `row.names = TRUE` and a list of matrices without rownames is passed, and `id.factor = TRUE` generates a normal factor not an ordered factor. It is however possible to write `id.factor = "ordered"` to get an ordered factor id. * `fdiff` argument `logdiff` renamed to `log`, and taking logs is now done in R (reduces size of C++ code and does not generate as many NaN's). `logdiff` may still be used, but it may be deactivated in the future. Also in the matrix and data.frame methods for `flag`, `fdiff` and `fgrowth`, columns are only stub-renamed if more than one lag/difference/growth rate is computed. ### Additions * Added `fnth` for fast (grouped, weighted) n'th element/quantile computations. * Added `roworder(v)` and `colorder(v)` for fast row and column reordering. * Added `frename` and `setrename` for fast and flexible renaming (by reference). * Added function `fungroup`, as replacement for `dplyr::ungroup`, intended for use with `fgroup_by`. * `fmedian` now supports weights, computing a decently fast (grouped) weighted median based on radix ordering. * `fmode` now has the option to compute min and max mode, the default is still simply the first mode. * `fwithin` now supports quasi-demeaning (added argument `theta`) and can thus be used to manually estimate random-effects models. * `funique` is now generic with a default vector and data.frame method, providing fast unique values and rows of data. The default was changed to `sort = FALSE`. * The shortcut `gvr` was created for `get_vars(..., regex = TRUE)`. * A helper `.c` was introduced for non-standard concatenation (i.e. `.c(a, b) == c("a", "b")`). ### Improvements * `fmode` and `fndistinct` have become a bit faster. * `fgroup_by` now preserves *data.table*'s. * `ftransform` now also supports a data.frame as replacement argument, which automatically replaces matching columns and adds unmatched ones. Also `ftransform<-` was created as a more formal replacement method for this feature. * `collap` columns selected through `cols` argument are returned in the order selected if `keep.col.order = FALSE`. Argument `sort.row` is depreciated, and replace by argument `sort`. In addition the `decreasing` and `na.last` arguments were added and handed down to `GRP.default`. * `radixorder` 'sorted' attribute is now always attached. * `stats::D` which is masked when collapse is attached, is now preserved through methods `D.expression` and `D.call`. * `GRP` option `call = FALSE` to omit a call to `match.call` -> minor performance improvement. * Several small performance improvements through rewriting some internal helper functions in C and reworking some R code. * Performance improvements for some helper functions, `setRownames` / `setColnames`, `na_insert` etc. * Increased scope of testing statistical functions. The functionality of the package is now secured by 7700 unit tests covering all central bits and pieces. # collapse 1.2.1 collapse 1.2.1, released end of May 2020: * Minor fixes for 1.2.0 issues that prevented correct installation on Mac OS X and a vignette rebuilding error on solaris. * `fmode.grouped_df` with groups and weights now saves the sum of the weights instead of the max (this makes more sense as the max only applies if all elements are unique). # collapse 1.2.0 collapse 1.2.0, released mid May 2020: ### Changes to Functionality * *grouped_df* methods for fast statistical functions now always attach the grouping variables to the output in aggregations, unless argument `keep.group_vars = FALSE`. (formerly grouping variables were only attached if also present in the data. Code hinged on this feature should be adjusted) * `qF` `ordered` argument default was changed to `ordered = FALSE`, and the `NA` level is only added if `na.exclude = FALSE`. Thus `qF` now behaves exactly like `as.factor`. * `Recode` is depreciated in favor of `recode_num` and `recode_char`, it will be removed soon. Similarly `replace_non_finite` was renamed to `replace_Inf`. * In `mrtl` and `mctl` the argument `ret` was renamed `return` and now takes descriptive character arguments (the previous version was a direct C++ export and unsafe, code written with these functions should be adjusted). * `GRP` argument `order` is depreciated in favor of argument `decreasing`. `order` can still be used but will be removed at some point. ### Bug Fixes * Fixed a bug in `flag` where unused factor levels caused a group size error. ### Additions * Added a suite of functions for fast data manipulation: + `fselect` selects variables from a data frame and is equivalent but much faster than `dplyr::select`. + `fsubset` is a much faster version of `base::subset` to subset vectors, matrices and data.frames. The function `ss` was also added as a faster alternative to `[.data.frame`. + `ftransform` is a much faster update of `base::transform`, to transform data frames by adding, modifying or deleting columns. The function `settransform` does all of that by reference. + `fcompute` is equivalent to `ftransform` but returns a new data frame containing only the columns computed from an existing one. + `na_omit` is a much faster and enhanced version of `base::na.omit`. + `replace_NA` efficiently replaces missing values in multi-type data. * Added function `fgroup_by` as a much faster version of `dplyr::group_by` based on *collapse* grouping. It attaches a 'GRP' object to a data frame, but only works with *collapse*'s fast functions. This allows *dplyr* like manipulations that are fully *collapse* based and thus significantly faster, i.e. `data %>% fgroup_by(g1,g2) %>% fselect(cola,colb) %>% fmean`. Note that `data %>% dplyr::group_by(g1,g2) %>% dplyr::select(cola,colb) %>% fmean` still works, in which case the *dplyr* 'group' object is converted to 'GRP' as before. However `data %>% fgroup_by(g1,g2) %>% dplyr::summarize(...)` does not work. * Added function `varying` to efficiently check the variation of multi-type data over a dimension or within groups. * Added function `radixorder`, same as `base::order(..., method = "radix")` but more accessible and with built-in grouping features. * Added functions `seqid` and `groupid` for generalized run-length type id variable generation from grouping and time variables. `seqid` in particular strongly facilitates lagging / differencing irregularly spaced panels using `flag`, `fdiff` etc. * `fdiff` now supports quasi-differences i.e. $x_t - \rho x_{t-1}$ and quasi-log differences i.e. $log(x_t) - \rho log(x_{t-1})$. an arbitrary $\rho$ can be supplied. * Added a `Dlog` operator for faster access to log-differences. ### Improvements * Faster grouping with `GRP` and faster factor generation with added radix method + automatic dispatch between hash and radix method. `qF` is now ~ 5x faster than `as.factor` on character and around 30x faster on numeric data. Also `qG` was enhanced. * Further slight speed tweaks here and there. * `collap` now provides more control for weighted aggregations with additional arguments `w`, `keep.w` and `wFUN` to aggregate the weights as well. The defaults are `keep.w = TRUE` and `wFUN = fsum`. A specialty of `collap` remains that `keep.by` and `keep.w` also work for external objects passed, so code of the form `collap(data, by, FUN, catFUN, w = data$weights)` will now have an aggregated `weights` vector in the first column. * `qsu` now also allows weights to be passed in formula i.e. `qsu(data, by = ~ group, pid = ~ panelid, w = ~ weights)`. * `fgrowth` has a `scale` argument, the default is `scale = 100` which provides growth rates in percentage terms (as before), but this may now be changed. * All statistical and transformation functions now have a hidden list method, so they can be applied to unclassed list-objects as well. An error is however provided in grouped operations with unequal-length columns. # collapse 1.1.0 collapse 1.1.0 released early April 2020: * Fixed remaining gcc10, LTO and valgrind issues in C/C++ code, and added some more tests (there are now ~ 5300 tests ensuring that *collapse* statistical functions perform as expected). * Fixed the issue that supplying an unnamed list to `GRP()`, i.e. `GRP(list(v1, v2))` would give an error. Unnamed lists are now automatically named 'Group.1', 'Group.2', etc... * Fixed an issue where aggregating by a single id in `collap()` (i.e. `collap(data, ~ id1)`), the id would be coded as factor in the aggregated data.frame. All variables including id's now retain their class and attributes in the aggregated data. * Added weights (`w`) argument to `fsum` and `fprod`. * Added an argument `mean = 0` to `fwithin / W`. This allows simple and grouped centering on an arbitrary mean, `0` being the default. For grouped centering `mean = "overall.mean"` can be specified, which will center data on the overall mean of the data. The logical argument `add.global.mean = TRUE` used to toggle this in *collapse* 1.0.0 is therefore depreciated. * Added arguments `mean = 0` (the default) and `sd = 1` (the default) to `fscale / STD`. These arguments now allow to (group) scale and center data to an arbitrary mean and standard deviation. Setting `mean = FALSE` will just scale data while preserving the mean(s). Special options for grouped scaling are `mean = "overall.mean"` (same as `fwithin / W`), and `sd = "within.sd"`, which will scale the data such that the standard deviation of each group is equal to the within- standard deviation (= the standard deviation computed on the group-centered data). Thus group scaling a panel-dataset with `mean = "overall.mean"` and `sd = "within.sd"` harmonizes the data across all groups in terms of both mean and variance. The fast algorithm for variance calculation toggled with `stable.algo = FALSE` was removed from `fscale`. Welford's numerically stable algorithm used by default is fast enough for all practical purposes. The fast algorithm is still available for `fvar` and `fsd`. * Added the modulus (`%%`) and subtract modulus (`-%%`) operations to `TRA()`. * Added the function `finteraction`, for fast interactions, and `as_character_factor` to coerce a factor, or all factors in a list, to character (analogous to `as_numeric_factor`). Also exported the function `ckmatch`, for matching with error message showing non-matched elements. # collapse 1.0.0 and earlier * First version of the package featuring only the functions `collap` and `qsu` based on code shared by Sebastian Krantz on R-devel, February 2019. * Major rework of the package using Rcpp and data.table internals, introduction of fast statistical functions and operators and expansion of the scope of the package to a broad set of data transformation and exploration tasks. Several iterations of enhancing speed of R code. Seamless integration of *collapse* with *dplyr*, *plm* and *data.table*. CRAN release of *collapse* 1.0.0 on 19th March 2020. collapse/inst/0000755000176200001440000000000015202627535013040 5ustar liggesuserscollapse/inst/CITATION0000644000176200001440000000270015202504365014167 0ustar liggesuserscitHeader("To cite collapse in publications, please use:") bibentry(bibtype = "misc", key = "krantz2026collapse", title = "{collapse}: Advanced and Fast Statistical Computing and Data Transformation in {R}", author = person(given = "Sebastian", family = "Krantz", email = "sebastian.krantz@kielinstitut.de"), journal = "Journal of Statistical Software", year = "2026", volume = "116", number = "1", pages = "1--38", doi = "10.18637/jss.v116.i01", textVersion = "Krantz, S. (2026). collapse: Advanced and fast statistical computing and data transformation in R. Journal of Statistical Software, 116(1), 1–38. https://doi.org/10.18637/jss.v116.i01") year <- sub("-.*", "", meta$Date) note <- sprintf("R package version %s", meta$Version) bibentry(bibtype = "Manual", key = "rcollapse", title = "collapse: Advanced and Fast Data Transformation in R", author = person("Sebastian", "Krantz"), year = year, note = note, doi = "10.5281/zenodo.8433090", url = "https://fastverse.org/collapse/", textVersion = paste0("Krantz (", year, "). collapse: Advanced and Fast Data Transformation in R. ", note, ". doi:10.5281/zenodo.8433090. https://fastverse.org/collapse/.")) collapse/inst/doc/0000755000176200001440000000000015202627535013605 5ustar liggesuserscollapse/inst/doc/collapse_documentation.Rmd0000644000176200001440000001477315202504365021013 0ustar liggesusers--- title: "collapse Documentation and Resources" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: vignette: > %\VignetteIndexEntry{collapse Documentation and Resources} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- *collapse* is a C/C++ based package for data transformation and statistical computing in R. It's aims are: 1. To facilitate complex data transformation, exploration and computing tasks in R. 2. To help make R code fast, flexible, parsimonious and programmer friendly. Documentation comes in 7 different forms: ## Built-In Structured Documentation After installing *collapse*, you can call `help("collapse-documentation")` which will produce a central help page providing a broad overview of the entire functionality of the package, including direct links to all function documentation pages and links to 13 further topical documentation pages (names in `.COLLAPSE_TOPICS`) describing how clusters of related functions work together. Thus *collapse* comes with a fully structured hierarchical documentation which you can browse within R - and that provides everything necessary to fully understand the package. The Documentation is also available [online](). The package page under `help("collapse-package")` provides some general information about the package and its design philosophy, as well as a compact set of examples covering important functionality. Reading `help("collapse-package")` and `help("collapse-documentation")` is the most comprehensive way to get acquainted with the package. `help("collapse-documentation")` is always the most up-to-date resource. ## DeepWiki [DeepWiki](https://deepwiki.com/) is an AI-powered platform designed to automatically generate structured, interactive documentation for software repositories, primarily on GitHub. Developed by [Cognition AI](https://cognition.ai/)—the same laboratory behind the autonomous AI engineer [Devin](https://devin.ai/)—it serves as a dynamic, "Wikipedia-like" encyclopedia for codebases. While not more comprehensive or accurate than the structured documentation, it is great to learn more about the internal structure of *collapse* and use a chatbot (Devin) to ask questions about or write code using *collapse*. You can access the *collapse* DeepWiki [here](https://deepwiki.com/fastverse/collapse). ## JSS Article The [collapse article](https://doi.org/10.18637/jss.v116.i01) is published in the [Journal of Statistical Software](https://www.jstatsoft.org/) (volume 116, issue 1). If you want to 'read something concise' about *collapse*, this is the best place to start. ## Cheatsheet A fairly up-to-date (v2.0) [cheatsheet]() compactly summarizes the package. ## Vignettes Updated vignettes are * [***collapse* for *tidyverse* Users**](): A quick introduction to *collapse* for *tidyverse* users * [***collapse* and *sf***](): Shows how collapse can be used to efficiently manipulate *sf* data frames * [***collapse*'s Handling of R Objects**](): A quick view behind the scenes of class-agnostic R programming * [**Developing with *collapse***](): How to write efficient statistical packages using R and *collapse* The other vignettes (only available [online]()) do not cover major features introduced in versions >= 1.7, but contain much useful information and examples: * [**Introduction to *collapse* **](): Introduces key features in a structured way * [***collapse* and *dplyr* **](): Demonstrates the integration of collapse with *dplyr* / *tidyverse* workflows and associated performance improvements * [***collapse* and *plm***](): Demonstrates the integration of collapse with *plm* and shows examples of efficient programming with panel data * [***collapse* and *data.table***](): Shows how collapse and *data.table* may be used together in a harmonious way ## Blog I maintain a [blog]() linked to [Rbloggers.com]() where I introduced *collapse* with some compact posts covering central functionality. Among these, the post about [programming with *collapse*]() is useful for developers. ## Presentations and Slides - I have presented *collapse* (v1.8) in some level of detail at useR 2022. A 2h video recording that provides a quite comprehensive introduction is available [here](). The corresponding slides are available [here](). - I have recently presented *collapse* (v2.1) and the *fastverse* at a workshop on "[Speeding Up Empirical Research: Tools and Techniques for Fast Computing](https://www.bportugal.pt/en/evento/workshop-speeding-empirical-research-tools-and-techniques-fast-computing-bplim)" organized by the Bank of Portugal in December 2025. My 45-minute talk focused on two advanced applications in international trade and spatial network analysis/package development. You can find the materials (slides and recording) [here](https://github.com/BPLIM/Workshops/tree/master/BPLIM2025). collapse/inst/doc/collapse_and_data.table.html0000644000176200001440000054671215202627530021210 0ustar liggesusers collapse and data.table

collapse and data.table

Harmony and High Performance

Sebastian Krantz

2021-06-27

This vignette focuses on using collapse with the popular data.table package by Matt Dowle and Arun Srinivasan. In contrast to dplyr and plm whose methods (‘grouped_df’, ‘pseries’, ‘pdata.frame’) collapse supports, the integration between collapse and data.table is hidden in the ‘data.frame’ methods and collapse’s C code.

From version 1.6.0 collapse seamlessly handles data.tables, permitting reference operations (set*, :=) on data tables created with collapse (qDT) or returned from collapse’s data manipulation functions (= all functions except .FAST_FUN, .OPERATOR_FUN, BY and TRA, see the NEWS for details on the low-level integration). Apart from data.table reference semantics, both packages work similarly on the C/C++ side of things, and nicely complement each other in functionality.

Overview of Both Packages

Both data.table and collapse are high-performance packages that work well together. For effective co-use it is helpful to understand where each has its strengths, what one can do what the other cannot, and where they overlap. Therefore this small comparison:

  • data.table offers an enhanced data frame based class to contain data (including list columns). For this class it provides a concise data manipulation syntax which also includes fast aggregation / slit-apply-combine computing, (rolling, non-equi) joins, keying, reshaping, some time-series functionality like lagging and rolling statistics, set operations on tables and a number of very useful other functions like the fast csv reader, fast switches, list-transpose etc.. data.table makes data management, and computations on data very easy and scalable, supporting huge datasets in a very memory efficient way. The package caters well to the end user by compressing an enormous amount of functionality into two square brackets []. Some of the exported functions are great for programming and also support other classes, but a lot of the functionality and optimization of data.table happens under the hood and can only be accessed through the non-standard evaluation table [i, j, by] syntax. This syntax has a cost of about 1-3 milliseconds for each call. Memory efficiency and thread-parallelization make data.table the star performer on huge data.

  • collapse is class-agnostic in nature, supporting vectors, matrices, data frames and non-destructively handling most R classes and objects. It focuses on advanced statistical computing, proving fast column-wise grouped and weighted statistical functions, fast and complex data aggregation and transformations, linear fitting, time series and panel data computations, advanced summary statistics, and recursive processing of lists of data objects. It also includes powerful functions for data manipulation, grouping / factor generation, recoding, handling outliers and missing values. The package default for missing values is na.rm = TRUE, which is implemented efficiently in C/C++ in all functions. collapse supports both tidyverse (piped) and base R / standard evaluation programming. It makes accessible most of it’s internal C/C++ based functionality (like grouping objects). collapse’s R functions are simple and strongly optimized, i.e. they access the serial C/C++ code quickly, resulting in baseline execution speeds of 10-50 microseconds. All of this makes collapse ideal for advanced statistical computing on matrices and larger datasets, and tasks requiring fast programs with repeated function executions.

Interoperating and some Do’s and Dont’s

Applying collapse functions to a data.table always gives a data.table back e.g. 

library(collapse)
library(magrittr)
library(data.table)

DT <- qDT(wlddev) # collapse::qDT converts objects to data.table using a shallow copy


DT %>% gby(country) %>% gv(9:13) %>% fmean
#                    country      PCGDP   LIFEEX     GINI        ODA         POP
#                     <char>      <num>    <num>    <num>      <num>       <num>
#   1:           Afghanistan   483.8351 49.19717       NA 1487548499 18362258.22
#   2:               Albania  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:               Algeria  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4:        American Samoa 10071.0659       NA       NA         NA    43115.10
#   5:               Andorra 40083.0911       NA       NA         NA    51547.35
#  ---                                                                          
# 212: Virgin Islands (U.S.) 35629.7336 73.71292       NA         NA    92238.53
# 213:    West Bank and Gaza  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:           Yemen, Rep.  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:                Zambia  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:              Zimbabwe  1219.4360 54.53360 45.93333  397104997  9402160.33

# Same thing, but notice that fmean give's NA's for missing countries
DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13]
# Key: <country>
#                    country      PCGDP   LIFEEX     GINI        ODA         POP
#                     <char>      <num>    <num>    <num>      <num>       <num>
#   1:           Afghanistan   483.8351 49.19717      NaN 1487548499 18362258.22
#   2:               Albania  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:               Algeria  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4:        American Samoa 10071.0659      NaN      NaN        NaN    43115.10
#   5:               Andorra 40083.0911      NaN      NaN        NaN    51547.35
#  ---                                                                          
# 212: Virgin Islands (U.S.) 35629.7336 73.71292      NaN        NaN    92238.53
# 213:    West Bank and Gaza  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:           Yemen, Rep.  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:                Zambia  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:              Zimbabwe  1219.4360 54.53360 45.93333  397104997  9402160.33

# This also works without magrittr pipes with the collap() function
collap(DT, ~ country, fmean, cols = 9:13)
#                    country      PCGDP   LIFEEX     GINI        ODA         POP
#                     <char>      <num>    <num>    <num>      <num>       <num>
#   1:           Afghanistan   483.8351 49.19717       NA 1487548499 18362258.22
#   2:               Albania  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:               Algeria  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4:        American Samoa 10071.0659       NA       NA         NA    43115.10
#   5:               Andorra 40083.0911       NA       NA         NA    51547.35
#  ---                                                                          
# 212: Virgin Islands (U.S.) 35629.7336 73.71292       NA         NA    92238.53
# 213:    West Bank and Gaza  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:           Yemen, Rep.  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:                Zambia  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:              Zimbabwe  1219.4360 54.53360 45.93333  397104997  9402160.33

By default, collapse orders groups in aggregations, which is equivalent to using keyby with data.table. gby / fgroup_by has an argument sort = FALSE to yield an unordered grouping equivalent to data.table’s by on character data1.

At this data size collapse outperforms data.table (which might reverse as data size grows, depending in your computer, the number of data.table threads used, and the function in question):

library(microbenchmark)

microbenchmark(collapse = DT %>% gby(country) %>% get_vars(9:13) %>% fmean,
               data.table = DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13])
# Unit: microseconds
#        expr     min       lq     mean   median       uq      max neval
#    collapse 203.073 212.7285 223.4156 217.1565 225.6230  475.559   100
#  data.table 758.623 777.4010 929.5450 793.1655 854.4605 2292.515   100

It is critical to never do something like this:

DT[, lapply(.SD, fmean), keyby = country, .SDcols = 9:13]
# Key: <country>
#                    country      PCGDP   LIFEEX     GINI        ODA         POP
#                     <char>      <num>    <num>    <num>      <num>       <num>
#   1:           Afghanistan   483.8351 49.19717       NA 1487548499 18362258.22
#   2:               Albania  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:               Algeria  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4:        American Samoa 10071.0659       NA       NA         NA    43115.10
#   5:               Andorra 40083.0911       NA       NA         NA    51547.35
#  ---                                                                          
# 212: Virgin Islands (U.S.) 35629.7336 73.71292       NA         NA    92238.53
# 213:    West Bank and Gaza  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:           Yemen, Rep.  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:                Zambia  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:              Zimbabwe  1219.4360 54.53360 45.93333  397104997  9402160.33

The reason is that collapse functions are S3 generic with methods for vectors, matrices and data frames among others. So you incur a method-dispatch for every column and every group the function is applied to.

fmean
# function (x, ...) 
# UseMethod("fmean")
# <bytecode: 0x10fb58540>
# <environment: namespace:collapse>
methods(fmean)
# [1] fmean.data.frame* fmean.default*    fmean.grouped_df* fmean.list*       fmean.matrix*    
# [6] fmean.units*      fmean.zoo*       
# see '?methods' for accessing help and source code

You may now contend that base::mean is also S3 generic, but in this DT[, lapply(.SD, mean, na.rm = TRUE), by = country, .SDcols = 9:13] code data.table does not use base::mean, but data.table:::gmean, an internal optimized mean function which is efficiently applied over those groups (see ?data.table::GForce). fmean works similar, and includes this functionality explicitly.

args(fmean.data.frame)
# function (x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], 
#     use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], 
#     ...) 
# NULL

Here we can see the x argument for the data, the g argument for grouping vectors, a weight vector w, different options TRA to transform the original data using the computed means, and some functionality regarding missing values (default: removed / skipped), group names (which are added as row-names to a data frame, but not to a data.table) etc. So we can also do

fmean(gv(DT, 9:13), DT$country)
#           PCGDP   LIFEEX     GINI        ODA         POP
#           <num>    <num>    <num>      <num>       <num>
#   1:   483.8351 49.19717       NA 1487548499 18362258.22
#   2:  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4: 10071.0659       NA       NA         NA    43115.10
#   5: 40083.0911       NA       NA         NA    51547.35
#  ---                                                    
# 212: 35629.7336 73.71292       NA         NA    92238.53
# 213:  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:  1219.4360 54.53360 45.93333  397104997  9402160.33

# Or
g <- GRP(DT, "country")
add_vars(g[["groups"]], fmean(gv(DT, 9:13), g))
#                    country      PCGDP   LIFEEX     GINI        ODA         POP
#                     <char>      <num>    <num>    <num>      <num>       <num>
#   1:           Afghanistan   483.8351 49.19717       NA 1487548499 18362258.22
#   2:               Albania  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:               Algeria  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4:        American Samoa 10071.0659       NA       NA         NA    43115.10
#   5:               Andorra 40083.0911       NA       NA         NA    51547.35
#  ---                                                                          
# 212: Virgin Islands (U.S.) 35629.7336 73.71292       NA         NA    92238.53
# 213:    West Bank and Gaza  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:           Yemen, Rep.  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:                Zambia  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:              Zimbabwe  1219.4360 54.53360 45.93333  397104997  9402160.33

To give us the same result obtained through the high-level functions gby / fgroup_by or collap. This is however not what data.table is doing in DT[, lapply(.SD, fmean), by = country, .SDcols = 9:13]. Since fmean is not a function it recognizes and is able to optimize, it does something like this,

BY(gv(DT, 9:13), g, fmean) # using collapse::BY
#           PCGDP   LIFEEX     GINI        ODA         POP
#           <num>    <num>    <num>      <num>       <num>
#   1:   483.8351 49.19717       NA 1487548499 18362258.22
#   2:  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4: 10071.0659       NA       NA         NA    43115.10
#   5: 40083.0911       NA       NA         NA    51547.35
#  ---                                                    
# 212: 35629.7336 73.71292       NA         NA    92238.53
# 213:  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:  1219.4360 54.53360 45.93333  397104997  9402160.33

which applies fmean to every group in every column of the data.

More generally, it is very important to understand that collapse is not based around applying functions to data by groups using some universal mechanism: The dplyr data %>% group_by(...) %>% summarize(...) / mutate(...) and data.table [i, j, by] syntax are essentially universal mechanisms to apply any function to data by groups. data.table additionally internally optimizes some functions (min, max, mean, median, var, sd, sum, prod, first, last, head, tail) which they called GForce, ?data.table::GForce.

collapse instead provides grouped statistical and transformation functions where all grouped computation is done efficiently in C++, and some supporting mechanisms (fgroup_by, collap) to operate them. In data.table words, everything2 in collapse, the Fast Statistical Functions, data transformations, time series etc. is GForce optimized.

The full set of optimized grouped statistical and transformation functions in collapse is:

.FAST_FUN
#  [1] "fmean"      "fmedian"    "fmode"      "fsum"       "fprod"      "fsd"        "fvar"      
#  [8] "fmin"       "fmax"       "fnth"       "ffirst"     "flast"      "fnobs"      "fndistinct"
# [15] "fcumsum"    "fscale"     "fbetween"   "fwithin"    "fhdbetween" "fhdwithin"  "flag"      
# [22] "fdiff"      "fgrowth"

Additional optimized grouped functions include TRA, qsu, varying, fFtest, psmat, psacf, pspacf, psccf.

The nice thing about those GForce (fast) functions provided by collapse is that they can be accessed explicitly and programmatically without any overhead as incurred through data.table, they cover a broader range of statistical operations (such as mode, distinct values, order statistics), support sampling weights, operate in a class-agnostic way on vectors, matrices, data.frame’s and many related classes, and cover transformations (replacing and sweeping, scaling, (higher order) centering, linear fitting) and time series functionality (lags, differences and growth rates, including irregular time series and unbalanced panels).

So if we would want to use fmean inside the data.table, we should do something like this:

# This does not save the grouping columns, we are simply passing a grouping vector to g
# and aggregating the subset of the data table (.SD).
DT[, fmean(.SD, country), .SDcols = 9:13]
#           PCGDP   LIFEEX     GINI        ODA         POP
#           <num>    <num>    <num>      <num>       <num>
#   1:   483.8351 49.19717       NA 1487548499 18362258.22
#   2:  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4: 10071.0659       NA       NA         NA    43115.10
#   5: 40083.0911       NA       NA         NA    51547.35
#  ---                                                    
# 212: 35629.7336 73.71292       NA         NA    92238.53
# 213:  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:  1219.4360 54.53360 45.93333  397104997  9402160.33

# If we want to keep the grouping columns, we need to group .SD first.
DT[, fmean(gby(.SD, country)), .SDcols = c(1L, 9:13)]
#                    country      PCGDP   LIFEEX     GINI        ODA         POP
#                     <char>      <num>    <num>    <num>      <num>       <num>
#   1:           Afghanistan   483.8351 49.19717       NA 1487548499 18362258.22
#   2:               Albania  2819.2400 71.68027 31.41111  312928126  2708297.17
#   3:               Algeria  3532.2714 63.56290 34.36667  612238500 25305290.68
#   4:        American Samoa 10071.0659       NA       NA         NA    43115.10
#   5:               Andorra 40083.0911       NA       NA         NA    51547.35
#  ---                                                                          
# 212: Virgin Islands (U.S.) 35629.7336 73.71292       NA         NA    92238.53
# 213:    West Bank and Gaza  2388.4348 71.60780 34.52500 1638581462  3312289.13
# 214:           Yemen, Rep.  1069.6596 52.53707 35.46667  859950996 13741375.82
# 215:                Zambia  1318.8627 51.09263 52.68889  734624330  8614972.38
# 216:              Zimbabwe  1219.4360 54.53360 45.93333  397104997  9402160.33

Needless to say this kind of programming seems a bit arcane, so there is actually not that great of a scope to use collapse’s Fast Statistical Functions for aggregations inside data.table. I drive this point home with a benchmark:

microbenchmark(collapse = DT %>% gby(country) %>% get_vars(9:13) %>% fmean,
               data.table = DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13],
               data.table_base = DT[, lapply(.SD, base::mean, na.rm = TRUE), keyby = country, .SDcols = 9:13],
               hybrid_bad = DT[, lapply(.SD, fmean), keyby = country, .SDcols = 9:13],
               hybrid_ok = DT[, fmean(gby(.SD, country)), .SDcols = c(1L, 9:13)])
# Unit: microseconds
#             expr      min        lq      mean    median        uq       max neval
#         collapse  207.419  234.9915  322.3994  255.6760  283.6790  1685.305   100
#       data.table  755.630  845.7685 1029.9024  904.6650  962.1060  2409.529   100
#  data.table_base 2795.257 3148.4310 4034.2081 3349.8025 3561.9570 37919.916   100
#       hybrid_bad 2198.994 2481.3815 3737.1102 2650.5680 2909.4215 62158.747   100
#        hybrid_ok  374.699  451.1025  564.6873  484.9275  542.8605  2082.554   100

It is evident that data.table has some overhead, so there is absolutely no need to do this kind of syntax manipulation.

There is more scope to use collapse transformation functions inside data.table.

Below some basic examples:

# Computing a column containing the sum of ODA received by country
DT[, sum_ODA := sum(ODA, na.rm = TRUE), by = country]
# Same using fsum; "replace_fill" overwrites missing values, "replace" keeps the
DT[, sum_ODA := fsum(ODA, country, TRA = "replace_fill")]  
# Same: A native collapse solution using settransform (or its shortcut form)
settfm(DT, sum_ODA = fsum(ODA, country, TRA = "replace_fill"))  

# settfm may be more convenient than `:=` for multiple column modifications,
# each involving a different grouping:
  # This computes the percentage of total ODA distributed received by 
  # each country both over time and within a given year
settfm(DT, perc_c_ODA = fsum(ODA, country, TRA = "%"),
           perc_y_ODA = fsum(ODA, year, TRA = "%"))

The TRA argument is available to all Fast Statistical Functions (see the macro .FAST_STAT_FUN) and offers 10 different replacing and sweeping operations. Note that TRA() can also be called directly to replace or sweep with a previously aggregated data.table. A set of operators %rr%, %r+%, %r-%, %r*%, %r/%, %cr%, %c+%, %c-%, %c*%, %c/% additionally facilitate row- or column-wise replacing or sweeping out vectors of statistics or other data.table’s.

Similarly, we can use the following vector valued functions

setdiff(.FAST_FUN, .FAST_STAT_FUN)
# [1] "fcumsum"    "fscale"     "fbetween"   "fwithin"    "fhdbetween" "fhdwithin"  "flag"      
# [8] "fdiff"      "fgrowth"

for very efficient data transformations:

# Centering GDP
DT[, demean_PCGDP := PCGDP - mean(PCGDP, na.rm = TRUE), by = country]
DT[, demean_PCGDP := fwithin(PCGDP, country)]

# Lagging GDP
DT[order(year), lag_PCGDP := shift(PCGDP, 1L), by = country]
DT[, lag_PCGDP := flag(PCGDP, 1L, country, year)]

# Computing a growth rate
DT[order(year), growth_PCGDP := (PCGDP / shift(PCGDP, 1L) - 1) * 100, by = country]
DT[, lag_PCGDP := fgrowth(PCGDP, 1L, 1L, country, year)] # 1 lag, 1 iteration

# Several Growth rates
DT[order(year), paste0("growth_", .c(PCGDP, LIFEEX, GINI, ODA)) := (.SD / shift(.SD, 1L) - 1) * 100, 
   by = country, .SDcols = 9:13]

# Same thing using collapse
DT %<>% tfm(gv(., 9:13) %>% fgrowth(1L, 1L, country, year) %>% add_stub("growth_"))

# Or even simpler using settransform and the Growth operator
settfmv(DT, 9:13, G, 1L, 1L, country, year, apply = FALSE)

head(DT)
#        country  iso3c       date  year decade     region     income   OECD PCGDP LIFEEX  GINI
#         <char> <fctr>     <Date> <int>  <int>     <fctr>     <fctr> <lgcl> <num>  <num> <num>
# 1: Afghanistan    AFG 1961-01-01  1960   1960 South Asia Low income  FALSE    NA 32.446    NA
# 2: Afghanistan    AFG 1962-01-01  1961   1960 South Asia Low income  FALSE    NA 32.962    NA
# 3: Afghanistan    AFG 1963-01-01  1962   1960 South Asia Low income  FALSE    NA 33.471    NA
# 4: Afghanistan    AFG 1964-01-01  1963   1960 South Asia Low income  FALSE    NA 33.971    NA
# 5: Afghanistan    AFG 1965-01-01  1964   1960 South Asia Low income  FALSE    NA 34.463    NA
# 6: Afghanistan    AFG 1966-01-01  1965   1960 South Asia Low income  FALSE    NA 34.948    NA
#          ODA     POP     sum_ODA perc_c_ODA perc_y_ODA demean_PCGDP lag_PCGDP growth_PCGDP
#        <num>   <num>       <num>      <num>      <num>        <num>     <num>        <num>
# 1: 116769997 8996973 89252909923  0.1308305  0.4441407           NA        NA           NA
# 2: 232080002 9169410 89252909923  0.2600251  0.7356654           NA        NA           NA
# 3: 112839996 9351441 89252909923  0.1264272  0.3494956           NA        NA           NA
# 4: 237720001 9543205 89252909923  0.2663443  0.7003399           NA        NA           NA
# 5: 295920013 9744781 89252909923  0.3315522  0.8570540           NA        NA           NA
# 6: 341839996 9956320 89252909923  0.3830015  0.8992630           NA        NA           NA
#    growth_LIFEEX growth_GINI growth_ODA growth_POP G1.PCGDP G1.LIFEEX G1.GINI    G1.ODA   G1.POP
#            <num>       <num>      <num>      <num>    <num>     <num>   <num>     <num>    <num>
# 1:            NA          NA         NA         NA       NA        NA      NA        NA       NA
# 2:      1.590335          NA   98.74969   1.916611       NA  1.590335      NA  98.74969 1.916611
# 3:      1.544202          NA  -51.37884   1.985199       NA  1.544202      NA -51.37884 1.985199
# 4:      1.493830          NA  110.66998   2.050636       NA  1.493830      NA 110.66998 2.050636
# 5:      1.448294          NA   24.48259   2.112246       NA  1.448294      NA  24.48259 2.112246
# 6:      1.407306          NA   15.51770   2.170793       NA  1.407306      NA  15.51770 2.170793

Since transformations (:= operations) are not highly optimized in data.table, collapse will be faster in most circumstances. Also time series functionality in collapse is significantly faster as it does not require data to be ordered or balanced to compute. For example flag computes an ordered lag without sorting the entire data first.

# Lets generate a large dataset and benchmark this stuff
DT_large <- replicate(1000, qDT(wlddev), simplify = FALSE) %>% 
    lapply(tfm, country = paste(country, rnorm(1))) %>%
    rbindlist

# 12.7 million Obs
fdim(DT_large)
# [1] 13176000       13

microbenchmark(
  S1 = DT_large[, sum_ODA := sum(ODA, na.rm = TRUE), by = country],
  S2 = DT_large[, sum_ODA := fsum(ODA, country, TRA = "replace_fill")],
  S3 = settfm(DT_large, sum_ODA = fsum(ODA, country, TRA = "replace_fill")),
  W1 = DT_large[, demean_PCGDP := PCGDP - mean(PCGDP, na.rm = TRUE), by = country],
  W2 = DT_large[, demean_PCGDP := fwithin(PCGDP, country)],
  L1 = DT_large[order(year), lag_PCGDP := shift(PCGDP, 1L), by = country],
  L2 = DT_large[, lag_PCGDP := flag(PCGDP, 1L, country, year)],
  L3 = DT_large[, lag_PCGDP := shift(PCGDP, 1L), by = country], # Not ordered
  L4 = DT_large[, lag_PCGDP := flag(PCGDP, 1L, country)], # Not ordered
  times = 5
)
# Unit: milliseconds
#  expr        min         lq      mean     median        uq       max neval
#    S1  343.03396  347.18443  391.7494  364.51431  379.7866  524.2279     5
#    S2  100.52544  101.72645  165.8369  128.76042  153.6818  344.4906     5
#    S3   98.48249  104.80830  120.3499  114.20591  127.0192  157.2335     5
#    W1  913.00883 1009.29930 1071.0633 1035.74446 1104.7680 1292.4957     5
#    W2   99.48199   99.69654  110.0907  113.95884  118.5229  118.7931     5
#    L1 1812.59987 1822.58026 1896.8809 1905.67377 1942.9434 2000.6074     5
#    L2  110.36056  128.45845  135.0995  133.80219  139.1405  163.7357     5
#    L3  611.28392  665.22123  768.0616  718.38679  803.7170 1041.6991     5
#    L4   64.26369   66.99006  105.7952   84.26537  106.1809  207.2758     5

rm(DT_large)
gc()
#           used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
# Ncells 3113072 166.3    8413097  449.4         NA   8413097  449.4
# Vcells 7897587  60.3  324289364 2474.2      16384 405361681 3092.7

Further collapse features supporting data.table’s

As mentioned, qDT is a flexible and very fast function to create / column-wise convert R objects to data.table’s. You can also row-wise convert a matrix to data.table using mrtl:

# Creating a matrix from mtcars
m <- qM(mtcars) 
str(m)
#  num [1:32, 1:11] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
#   ..$ : chr [1:11] "mpg" "cyl" "disp" "hp" ...

# Demonstrating another nice feature of qDT
qDT(m, row.names.col = "car") %>% head
#                  car   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#               <char> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
# 1:         Mazda RX4  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4
# 2:     Mazda RX4 Wag  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4
# 3:        Datsun 710  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1
# 4:    Hornet 4 Drive  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1
# 5: Hornet Sportabout  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2
# 6:           Valiant  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1

# Row-wise conversion to data.table
mrtl(m, names = TRUE, return = "data.table") %>% head(2)
#    Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout Valiant Duster 360 Merc 240D
#        <num>         <num>      <num>          <num>             <num>   <num>      <num>     <num>
# 1:        21            21       22.8           21.4              18.7    18.1       14.3      24.4
# 2:         6             6        4.0            6.0               8.0     6.0        8.0       4.0
#    Merc 230 Merc 280 Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood
#       <num>    <num>     <num>      <num>      <num>       <num>              <num>
# 1:     22.8     19.2      17.8       16.4       17.3        15.2               10.4
# 2:      4.0      6.0       6.0        8.0        8.0         8.0                8.0
#    Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla Toyota Corona
#                  <num>             <num>    <num>       <num>          <num>         <num>
# 1:                10.4              14.7     32.4        30.4           33.9          21.5
# 2:                 8.0               8.0      4.0         4.0            4.0           4.0
#    Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
#               <num>       <num>      <num>            <num>     <num>         <num>        <num>
# 1:             15.5        15.2       13.3             19.2      27.3            26         30.4
# 2:              8.0         8.0        8.0              8.0       4.0             4          4.0
#    Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
#             <num>        <num>         <num>      <num>
# 1:           15.8         19.7            15       21.4
# 2:            8.0          6.0             8        4.0

The computational efficiency of these functions makes them very useful to use in data.table based workflows.

# Benchmark
microbenchmark(qDT(m, "car"), mrtl(m, TRUE, "data.table"))
# Unit: microseconds
#                         expr   min    lq    mean median    uq    max neval
#                qDT(m, "car") 4.838 5.043 6.16230 5.3300 6.437 20.049   100
#  mrtl(m, TRUE, "data.table") 3.608 3.854 4.23981 3.9975 4.182 15.908   100

For example we could regress the growth rate of GDP per capita on the Growth rate of life expectancy in each country and save results in a data.table:

library(lmtest)

wlddev %>% fselect(country, PCGDP, LIFEEX) %>% 
  # This counts missing values on PCGDP and LIFEEX only
  na_omit(cols = -1L) %>% 
  # This removes countries with less than 20 observations
  fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% 
  qDT %>% 
  # Run estimations by country using data.table
  .[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX))), "Coef"), keyby = country] %>% head
# Key: <country>
#    country        Coef   Estimate Std. Error    t value    Pr(>|t|)
#     <char>      <char>      <num>      <num>      <num>       <num>
# 1: Albania (Intercept) -3.6146411   2.371885 -1.5239527 0.136023086
# 2: Albania   G(LIFEEX) 22.1596308   7.288971  3.0401591 0.004325856
# 3: Algeria (Intercept)  0.5973329   1.740619  0.3431726 0.732731107
# 4: Algeria   G(LIFEEX)  0.8412547   1.689221  0.4980134 0.620390703
# 5:  Angola (Intercept) -3.3793976   1.540330 -2.1939445 0.034597175
# 6:  Angola   G(LIFEEX)  4.2362895   1.402380  3.0207852 0.004553260

If we only need the coefficients, not the standard errors, we can also use collapse::flm together with mrtl:

wlddev %>% fselect(country, PCGDP, LIFEEX) %>% 
  na_omit(cols = -1L) %>% 
  fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% 
  qDT %>% 
  .[, mrtl(flm(fgrowth(PCGDP)[-1L], 
               cbind(Intercept = 1, 
                     LIFEEX = fgrowth(LIFEEX)[-1L])), TRUE), 
    keyby = country] %>% head
# Key: <country>
#                country   Intercept     LIFEEX
#                 <char>       <num>      <num>
# 1:             Albania -3.61464113 22.1596308
# 2:             Algeria  0.59733291  0.8412547
# 3:              Angola -3.37939760  4.2362895
# 4: Antigua and Barbuda -3.11880717 18.8700870
# 5:           Argentina  1.14613567 -0.2896305
# 6:             Armenia  0.08178344 11.5523992

… which provides a significant speed gain here:


microbenchmark(
  
A = wlddev %>% fselect(country, PCGDP, LIFEEX) %>% 
  na_omit(cols = -1L) %>% 
  fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% 
  qDT %>% 
  .[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX))), "Coef"), keyby = country],

B = wlddev %>% fselect(country, PCGDP, LIFEEX) %>% 
  na_omit(cols = -1L) %>% 
  fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% 
  qDT %>% 
  .[, mrtl(flm(fgrowth(PCGDP)[-1L], 
               cbind(Intercept = 1, 
                     LIFEEX = fgrowth(LIFEEX)[-1L])), TRUE), 
    keyby = country]
)
# Unit: milliseconds
#  expr       min        lq      mean    median        uq        max neval
#     A 58.914253 60.063381 68.770933 60.865217 73.507813 241.594509   100
#     B  3.145766  3.293715  3.463643  3.377006  3.503983   5.378995   100

Another feature to highlight at this point are collapse’s list processing functions, in particular rsplit, rapply2d, get_elem and unlist2d. rsplit is an efficient recursive generalization of split:

DT_list <- rsplit(DT, country + year + PCGDP + LIFEEX ~ region + income) 

# Note: rsplit(DT, year + PCGDP + LIFEEX ~ region + income, flatten = TRUE) 
# would yield a simple list with interacted categories (like split) 

str(DT_list, give.attr = FALSE)
# List of 7
#  $ East Asia & Pacific       :List of 3
#   ..$ High income        :Classes 'data.table' and 'data.frame':  793 obs. of  4 variables:
#   .. ..$ country: chr [1:793] "Australia" "Australia" "Australia" "Australia" ...
#   .. ..$ year   : int [1:793] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:793] 19378 19469 19246 20053 21036 ...
#   .. ..$ LIFEEX : num [1:793] 70.8 71 70.9 70.9 70.9 ...
#   ..$ Lower middle income:Classes 'data.table' and 'data.frame':  793 obs. of  4 variables:
#   .. ..$ country: chr [1:793] "Cambodia" "Cambodia" "Cambodia" "Cambodia" ...
#   .. ..$ year   : int [1:793] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:793] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:793] 41.2 41.4 41.5 41.7 41.9 ...
#   ..$ Upper middle income:Classes 'data.table' and 'data.frame':  610 obs. of  4 variables:
#   .. ..$ country: chr [1:610] "American Samoa" "American Samoa" "American Samoa" "American Samoa" ...
#   .. ..$ year   : int [1:610] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:610] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:610] NA NA NA NA NA NA NA NA NA NA ...
#  $ Europe & Central Asia     :List of 4
#   ..$ High income        :Classes 'data.table' and 'data.frame':  2257 obs. of  4 variables:
#   .. ..$ country: chr [1:2257] "Andorra" "Andorra" "Andorra" "Andorra" ...
#   .. ..$ year   : int [1:2257] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:2257] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:2257] NA NA NA NA NA NA NA NA NA NA ...
#   ..$ Low income         :Classes 'data.table' and 'data.frame':  61 obs. of  4 variables:
#   .. ..$ country: chr [1:61] "Tajikistan" "Tajikistan" "Tajikistan" "Tajikistan" ...
#   .. ..$ year   : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:61] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:61] 50.6 50.9 51.2 51.5 51.9 ...
#   ..$ Lower middle income:Classes 'data.table' and 'data.frame':  244 obs. of  4 variables:
#   .. ..$ country: chr [1:244] "Kyrgyz Republic" "Kyrgyz Republic" "Kyrgyz Republic" "Kyrgyz Republic" ...
#   .. ..$ year   : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:244] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:244] 56.1 56.6 57 57.4 57.9 ...
#   ..$ Upper middle income:Classes 'data.table' and 'data.frame':  976 obs. of  4 variables:
#   .. ..$ country: chr [1:976] "Albania" "Albania" "Albania" "Albania" ...
#   .. ..$ year   : int [1:976] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:976] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:976] 62.3 63.3 64.2 64.9 65.5 ...
#  $ Latin America & Caribbean :List of 4
#   ..$ High income        :Classes 'data.table' and 'data.frame':  1037 obs. of  4 variables:
#   .. ..$ country: chr [1:1037] "Antigua and Barbuda" "Antigua and Barbuda" "Antigua and Barbuda" "Antigua and Barbuda" ...
#   .. ..$ year   : int [1:1037] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:1037] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:1037] 62 62.5 63 63.5 64 ...
#   ..$ Low income         :Classes 'data.table' and 'data.frame':  61 obs. of  4 variables:
#   .. ..$ country: chr [1:61] "Haiti" "Haiti" "Haiti" "Haiti" ...
#   .. ..$ year   : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:61] 1512 1439 1523 1466 1414 ...
#   .. ..$ LIFEEX : num [1:61] 41.8 42.2 42.6 43 43.4 ...
#   ..$ Lower middle income:Classes 'data.table' and 'data.frame':  244 obs. of  4 variables:
#   .. ..$ country: chr [1:244] "Bolivia" "Bolivia" "Bolivia" "Bolivia" ...
#   .. ..$ year   : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:244] 1005 1007 1042 1091 1112 ...
#   .. ..$ LIFEEX : num [1:244] 41.8 42.1 42.5 42.8 43.2 ...
#   ..$ Upper middle income:Classes 'data.table' and 'data.frame':  1220 obs. of  4 variables:
#   .. ..$ country: chr [1:1220] "Argentina" "Argentina" "Argentina" "Argentina" ...
#   .. ..$ year   : int [1:1220] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:1220] 5643 5853 5711 5323 5773 ...
#   .. ..$ LIFEEX : num [1:1220] 65.1 65.2 65.3 65.3 65.4 ...
#  $ Middle East & North Africa:List of 4
#   ..$ High income        :Classes 'data.table' and 'data.frame':  488 obs. of  4 variables:
#   .. ..$ country: chr [1:488] "Bahrain" "Bahrain" "Bahrain" "Bahrain" ...
#   .. ..$ year   : int [1:488] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:488] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:488] 51.9 53.2 54.6 55.9 57.2 ...
#   ..$ Low income         :Classes 'data.table' and 'data.frame':  122 obs. of  4 variables:
#   .. ..$ country: chr [1:122] "Syrian Arab Republic" "Syrian Arab Republic" "Syrian Arab Republic" "Syrian Arab Republic" ...
#   .. ..$ year   : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:122] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:122] 52 52.6 53.2 53.8 54.4 ...
#   ..$ Lower middle income:Classes 'data.table' and 'data.frame':  305 obs. of  4 variables:
#   .. ..$ country: chr [1:305] "Djibouti" "Djibouti" "Djibouti" "Djibouti" ...
#   .. ..$ year   : int [1:305] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:305] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:305] 44 44.5 44.9 45.3 45.7 ...
#   ..$ Upper middle income:Classes 'data.table' and 'data.frame':  366 obs. of  4 variables:
#   .. ..$ country: chr [1:366] "Algeria" "Algeria" "Algeria" "Algeria" ...
#   .. ..$ year   : int [1:366] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:366] 2481 2091 1638 2146 2214 ...
#   .. ..$ LIFEEX : num [1:366] 46.1 46.6 47.1 47.5 48 ...
#  $ North America             :List of 1
#   ..$ High income:Classes 'data.table' and 'data.frame':  183 obs. of  4 variables:
#   .. ..$ country: chr [1:183] "Bermuda" "Bermuda" "Bermuda" "Bermuda" ...
#   .. ..$ year   : int [1:183] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:183] 33363 34080 34763 34324 37202 ...
#   .. ..$ LIFEEX : num [1:183] NA NA NA NA NA ...
#  $ South Asia                :List of 3
#   ..$ Low income         :Classes 'data.table' and 'data.frame':  122 obs. of  4 variables:
#   .. ..$ country: chr [1:122] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
#   .. ..$ year   : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:122] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:122] 32.4 33 33.5 34 34.5 ...
#   ..$ Lower middle income:Classes 'data.table' and 'data.frame':  244 obs. of  4 variables:
#   .. ..$ country: chr [1:244] "Bangladesh" "Bangladesh" "Bangladesh" "Bangladesh" ...
#   .. ..$ year   : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:244] 372 384 394 381 411 ...
#   .. ..$ LIFEEX : num [1:244] 45.4 46 46.6 47.1 47.6 ...
#   ..$ Upper middle income:Classes 'data.table' and 'data.frame':  122 obs. of  4 variables:
#   .. ..$ country: chr [1:122] "Maldives" "Maldives" "Maldives" "Maldives" ...
#   .. ..$ year   : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:122] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:122] 37.3 37.9 38.6 39.2 39.9 ...
#  $ Sub-Saharan Africa        :List of 4
#   ..$ High income        :Classes 'data.table' and 'data.frame':  61 obs. of  4 variables:
#   .. ..$ country: chr [1:61] "Seychelles" "Seychelles" "Seychelles" "Seychelles" ...
#   .. ..$ year   : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:61] 2830 2617 2763 2966 3064 ...
#   .. ..$ LIFEEX : num [1:61] NA NA NA NA NA NA NA NA NA NA ...
#   ..$ Low income         :Classes 'data.table' and 'data.frame':  1464 obs. of  4 variables:
#   .. ..$ country: chr [1:1464] "Benin" "Benin" "Benin" "Benin" ...
#   .. ..$ year   : int [1:1464] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:1464] 712 724 689 710 745 ...
#   .. ..$ LIFEEX : num [1:1464] 37.3 37.7 38.2 38.7 39.1 ...
#   ..$ Lower middle income:Classes 'data.table' and 'data.frame':  1037 obs. of  4 variables:
#   .. ..$ country: chr [1:1037] "Angola" "Angola" "Angola" "Angola" ...
#   .. ..$ year   : int [1:1037] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:1037] NA NA NA NA NA NA NA NA NA NA ...
#   .. ..$ LIFEEX : num [1:1037] 37.5 37.8 38.1 38.4 38.8 ...
#   ..$ Upper middle income:Classes 'data.table' and 'data.frame':  366 obs. of  4 variables:
#   .. ..$ country: chr [1:366] "Botswana" "Botswana" "Botswana" "Botswana" ...
#   .. ..$ year   : int [1:366] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
#   .. ..$ PCGDP  : num [1:366] 408 425 444 460 480 ...
#   .. ..$ LIFEEX : num [1:366] 49.2 49.7 50.2 50.6 51.1 ...

We can use rapply2d to apply a function to each data frame / data.table in an arbitrary nested structure:

# This runs region-income level regressions, with country fixed effects
# following Mundlak (1978)
lm_summary_list <- DT_list %>% 
  rapply2d(lm, formula = G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)) %>% 
  # Summarizing the results
  rapply2d(summary, classes = "lm")

# This is a nested list of linear model summaries
str(lm_summary_list, give.attr = FALSE)
# List of 7
#  $ East Asia & Pacific       :List of 3
#   ..$ High income        :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:441] -1.64 -2.59 2.75 3.45 2.48 ...
#   .. ..$ coefficients : num [1:3, 1:4] 0.531 2.494 3.83 0.706 0.759 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 4.59
#   .. ..$ df           : int [1:3] 3 438 3
#   .. ..$ r.squared    : num 0.0525
#   .. ..$ adj.r.squared: num 0.0481
#   .. ..$ fstatistic   : Named num [1:3] 12.1 2 438
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.02361 -0.00158 -0.04895 -0.00158 0.02728 ...
#   .. ..$ na.action    : 'omit' Named int [1:352] 1 61 62 63 64 65 66 67 68 69 ...
#   ..$ Lower middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:549] -39.6968 3.6618 -0.0944 -1.8261 -1.0491 ...
#   .. ..$ coefficients : num [1:3, 1:4] 1.348 0.524 0.949 0.701 0.757 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 5.4
#   .. ..$ df           : int [1:3] 3 546 3
#   .. ..$ r.squared    : num 0.00471
#   .. ..$ adj.r.squared: num 0.00106
#   .. ..$ fstatistic   : Named num [1:3] 1.29 2 546
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.016821 0.000511 -0.022767 0.000511 0.01965 ...
#   .. ..$ na.action    : 'omit' Named int [1:244] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Upper middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:312] -32.29 -11.61 2.91 11.23 10.28 ...
#   .. ..$ coefficients : num [1:3, 1:4] 1.507 -0.547 4.816 0.428 0.478 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 4.39
#   .. ..$ df           : int [1:3] 3 309 3
#   .. ..$ r.squared    : num 0.103
#   .. ..$ adj.r.squared: num 0.0976
#   .. ..$ fstatistic   : Named num [1:3] 17.8 2 309
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.009471 0.000492 -0.013551 0.000492 0.011842 ...
#   .. ..$ na.action    : 'omit' Named int [1:298] 1 2 3 4 5 6 7 8 9 10 ...
#  $ Europe & Central Asia     :List of 4
#   ..$ High income        :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:1355] 2.706 -0.548 1.001 3.034 0.257 ...
#   .. ..$ coefficients : num [1:3, 1:4] 3.254 -0.172 -2.506 0.407 0.227 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 3.3
#   .. ..$ df           : int [1:3] 3 1352 3
#   .. ..$ r.squared    : num 0.00257
#   .. ..$ adj.r.squared: num 0.00109
#   .. ..$ fstatistic   : Named num [1:3] 1.74 2 1352
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.015254 -0.000863 -0.05461 -0.000863 0.004722 ...
#   .. ..$ na.action    : 'omit' Named int [1:902] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Low income         :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:34] 0.166 -1.804 15.949 -0.778 7.165 ...
#   .. ..$ coefficients : num [1:2, 1:4] -5.31 9.36 2.03 2.56 -2.61 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE TRUE
#   .. ..$ sigma        : num 8.43
#   .. ..$ df           : int [1:3] 2 32 3
#   .. ..$ r.squared    : num 0.295
#   .. ..$ adj.r.squared: num 0.273
#   .. ..$ fstatistic   : Named num [1:3] 13.4 1 32
#   .. ..$ cov.unscaled : num [1:2, 1:2] 0.0582 -0.0514 -0.0514 0.092
#   .. ..$ na.action    : 'omit' Named int [1:27] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Lower middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:121] -1.626 8.745 -14.47 0.298 -11.886 ...
#   .. ..$ coefficients : num [1:3, 1:4] 0.106 4.631 1.499 1.315 0.938 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 6.02
#   .. ..$ df           : int [1:3] 3 118 3
#   .. ..$ r.squared    : num 0.178
#   .. ..$ adj.r.squared: num 0.164
#   .. ..$ fstatistic   : Named num [1:3] 12.7 2 118
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.047775 -0.000927 -0.142782 -0.000927 0.024298 ...
#   .. ..$ na.action    : 'omit' Named int [1:123] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Upper middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:511] 0.761 -2.153 -4.091 -6.476 -3.43 ...
#   .. ..$ coefficients : num [1:3, 1:4] 2.983 4.147 -3.351 0.698 0.779 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 8.28
#   .. ..$ df           : int [1:3] 3 508 3
#   .. ..$ r.squared    : num 0.0531
#   .. ..$ adj.r.squared: num 0.0493
#   .. ..$ fstatistic   : Named num [1:3] 14.2 2 508
#   .. ..$ cov.unscaled : num [1:3, 1:3] 7.11e-03 4.52e-05 -1.45e-02 4.52e-05 8.85e-03 ...
#   .. ..$ na.action    : 'omit' Named int [1:465] 1 2 3 4 5 6 7 8 9 10 ...
#  $ Latin America & Caribbean :List of 4
#   ..$ High income        :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:487] 2.39 6.02 6.1 1.71 -2.27 ...
#   .. ..$ coefficients : num [1:3, 1:4] 1.015 0.483 2.613 0.677 0.952 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 4.71
#   .. ..$ df           : int [1:3] 3 484 3
#   .. ..$ r.squared    : num 0.00592
#   .. ..$ adj.r.squared: num 0.00181
#   .. ..$ fstatistic   : Named num [1:3] 1.44 2 484
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.02062 0.00155 -0.05714 0.00155 0.04082 ...
#   .. ..$ na.action    : 'omit' Named int [1:550] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Low income         :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:59] -5.667 5.091 -4.46 -4.224 -0.526 ...
#   .. ..$ coefficients : num [1:2, 1:4] -3.18 4.02 1.73 2.28 -1.83 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE TRUE
#   .. ..$ sigma        : num 3.79
#   .. ..$ df           : int [1:3] 2 57 3
#   .. ..$ r.squared    : num 0.0516
#   .. ..$ adj.r.squared: num 0.0349
#   .. ..$ fstatistic   : Named num [1:3] 3.1 1 57
#   .. ..$ cov.unscaled : num [1:2, 1:2] 0.209 -0.265 -0.265 0.364
#   .. ..$ na.action    : 'omit' Named int [1:2] 1 61
#   ..$ Lower middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:231] -1.386 2.029 3.213 0.413 1.334 ...
#   .. ..$ coefficients : num [1:3, 1:4] -1.678 -0.479 3.896 2.26 0.709 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 3.96
#   .. ..$ df           : int [1:3] 3 228 3
#   .. ..$ r.squared    : num 0.0081
#   .. ..$ adj.r.squared: num -0.000602
#   .. ..$ fstatistic   : Named num [1:3] 0.931 2 228
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.3264 0.005 -0.4084 0.005 0.0321 ...
#   .. ..$ na.action    : 'omit' Named int [1:13] 1 61 62 63 64 65 66 67 122 123 ...
#   ..$ Upper middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:1065] 1.97 -4.16 -8.5 6.72 7.17 ...
#   .. ..$ coefficients : num [1:3, 1:4] 1.681 0.583 -0.124 0.353 0.512 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 4.22
#   .. ..$ df           : int [1:3] 3 1062 3
#   .. ..$ r.squared    : num 0.0016
#   .. ..$ adj.r.squared: num -0.000283
#   .. ..$ fstatistic   : Named num [1:3] 0.85 2 1062
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.006982 0.000348 -0.013936 0.000348 0.014734 ...
#   .. ..$ na.action    : 'omit' Named int [1:155] 1 61 62 122 123 183 184 244 245 305 ...
#  $ Middle East & North Africa:List of 4
#   ..$ High income        :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:334] -10.728 -11.988 2.151 0.985 -8.618 ...
#   .. ..$ coefficients : num [1:3, 1:4] 1.929 3.963 -3.533 1.102 0.996 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 8.36
#   .. ..$ df           : int [1:3] 3 331 3
#   .. ..$ r.squared    : num 0.0456
#   .. ..$ adj.r.squared: num 0.0399
#   .. ..$ fstatistic   : Named num [1:3] 7.91 2 331
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.01738 0.00101 -0.02441 0.00101 0.01419 ...
#   .. ..$ na.action    : 'omit' Named int [1:154] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Low income         :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:29] 0.468 3.424 0.415 3.842 3.342 ...
#   .. ..$ coefficients : num [1:2, 1:4] -6.91 11.38 2.11 3.64 -3.27 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE TRUE
#   .. ..$ sigma        : num 6.05
#   .. ..$ df           : int [1:3] 2 27 3
#   .. ..$ r.squared    : num 0.266
#   .. ..$ adj.r.squared: num 0.239
#   .. ..$ fstatistic   : Named num [1:3] 9.81 1 27
#   .. ..$ cov.unscaled : num [1:2, 1:2] 0.122 -0.178 -0.178 0.361
#   .. ..$ na.action    : 'omit' Named int [1:93] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Lower middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:191] -0.95 -2.047 4.541 5.594 -0.723 ...
#   .. ..$ coefficients : num [1:3, 1:4] 2.238 1.271 -0.647 1.002 0.599 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 3.94
#   .. ..$ df           : int [1:3] 3 188 3
#   .. ..$ r.squared    : num 0.0244
#   .. ..$ adj.r.squared: num 0.014
#   .. ..$ fstatistic   : Named num [1:3] 2.35 2 188
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.06471 -0.00043 -0.07801 -0.00043 0.02309 ...
#   .. ..$ na.action    : 'omit' Named int [1:114] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Upper middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:263] -18.068 -23.976 28.692 0.858 1.141 ...
#   .. ..$ coefficients : num [1:3, 1:4] 2.663 0.718 -1.19 3.538 1.318 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 13.8
#   .. ..$ df           : int [1:3] 3 260 3
#   .. ..$ r.squared    : num 0.00119
#   .. ..$ adj.r.squared: num -0.00649
#   .. ..$ fstatistic   : Named num [1:3] 0.155 2 260
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.065741 0.000795 -0.084456 0.000795 0.009122 ...
#   .. ..$ na.action    : 'omit' Named int [1:103] 1 61 62 122 123 124 125 126 127 128 ...
#  $ North America             :List of 1
#   ..$ High income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:137] 4.6986 -3.1098 1.8243 0.5643 0.0176 ...
#   .. ..$ coefficients : num [1:3, 1:4] 6.542 -1.461 -19.53 2.272 0.662 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 2.49
#   .. ..$ df           : int [1:3] 3 134 3
#   .. ..$ r.squared    : num 0.0657
#   .. ..$ adj.r.squared: num 0.0518
#   .. ..$ fstatistic   : Named num [1:3] 4.71 2 134
#   .. ..$ cov.unscaled : num [1:3, 1:3] 8.36e-01 1.59e-17 -3.60 1.59e-17 7.10e-02 ...
#   .. ..$ na.action    : 'omit' Named int [1:46] 1 2 3 4 5 6 7 8 9 10 ...
#  $ South Asia                :List of 3
#   ..$ Low income         :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:76] 0.544 -6.17 3.951 -0.964 7.829 ...
#   .. ..$ coefficients : num [1:3, 1:4] -108.62 -1.72 96.06 174.19 1.25 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 3.7
#   .. ..$ df           : int [1:3] 3 73 3
#   .. ..$ r.squared    : num 0.0494
#   .. ..$ adj.r.squared: num 0.0233
#   .. ..$ fstatistic   : Named num [1:3] 1.9 2 73
#   .. ..$ cov.unscaled : num [1:3, 1:3] 2210.639 -6.979 -1875.261 -6.979 0.114 ...
#   .. ..$ na.action    : 'omit' Named int [1:46] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Lower middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:216] 0.294 -0.293 -6.067 4.954 -4.164 ...
#   .. ..$ coefficients : num [1:3, 1:4] -2.232 0.238 5.972 1.074 0.493 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 3.44
#   .. ..$ df           : int [1:3] 3 213 3
#   .. ..$ r.squared    : num 0.111
#   .. ..$ adj.r.squared: num 0.103
#   .. ..$ fstatistic   : Named num [1:3] 13.3 2 213
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.09757 -0.00201 -0.10483 -0.00201 0.02054 ...
#   .. ..$ na.action    : 'omit' Named int [1:28] 1 61 62 63 64 65 66 67 68 69 ...
#   ..$ Upper middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:82] 3.262 3.976 3.128 1.67 -0.901 ...
#   .. ..$ coefficients : num [1:3, 1:4] 3.859 -0.577 -0.476 1.036 1.365 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 4.25
#   .. ..$ df           : int [1:3] 3 79 3
#   .. ..$ r.squared    : num 0.00622
#   .. ..$ adj.r.squared: num -0.0189
#   .. ..$ fstatistic   : Named num [1:3] 0.247 2 79
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.0595 -0.028 -0.0473 -0.028 0.1034 ...
#   .. ..$ na.action    : 'omit' Named int [1:40] 1 2 3 4 5 6 7 8 9 10 ...
#  $ Sub-Saharan Africa        :List of 4
#   ..$ High income        :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:39] -11.33 -5.041 -3.158 0.585 7.81 ...
#   .. ..$ coefficients : num [1:2, 1:4] 2.551 -0.644 0.775 0.55 3.293 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE TRUE
#   .. ..$ sigma        : num 4.8
#   .. ..$ df           : int [1:3] 2 37 3
#   .. ..$ r.squared    : num 0.0357
#   .. ..$ adj.r.squared: num 0.00959
#   .. ..$ fstatistic   : Named num [1:3] 1.37 1 37
#   .. ..$ cov.unscaled : num [1:2, 1:2] 0.026 -0.00217 -0.00217 0.01312
#   .. ..$ na.action    : 'omit' Named int [1:22] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Low income         :List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:1085] 0.694 -5.869 2.069 3.855 2.415 ...
#   .. ..$ coefficients : num [1:3, 1:4] -0.0756 0.5308 0.5124 0.8887 0.137 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 5.88
#   .. ..$ df           : int [1:3] 3 1082 3
#   .. ..$ r.squared    : num 0.0146
#   .. ..$ adj.r.squared: num 0.0128
#   .. ..$ fstatistic   : Named num [1:3] 8.01 2 1082
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.022858 -0.000025 -0.025534 -0.000025 0.000543 ...
#   .. ..$ na.action    : 'omit' Named int [1:379] 1 61 62 122 123 183 184 244 245 305 ...
#   ..$ Lower middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:891] -8.2839 -4.0289 0.0449 1.8231 -0.5267 ...
#   .. ..$ coefficients : num [1:3, 1:4] 2.352 0.782 -2.616 0.608 0.169 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 5.27
#   .. ..$ df           : int [1:3] 3 888 3
#   .. ..$ r.squared    : num 0.0277
#   .. ..$ adj.r.squared: num 0.0255
#   .. ..$ fstatistic   : Named num [1:3] 12.7 2 888
#   .. ..$ cov.unscaled : num [1:3, 1:3] 1.33e-02 -1.13e-05 -2.00e-02 -1.13e-05 1.02e-03 ...
#   .. ..$ na.action    : 'omit' Named int [1:146] 1 2 3 4 5 6 7 8 9 10 ...
#   ..$ Upper middle income:List of 12
#   .. ..$ call         : language FUN(formula = ..1, data = y)
#   .. ..$ terms        :Classes 'terms', 'formula'  language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)
#   .. ..$ residuals    : Named num [1:298] 0.7659 0.9133 0.0921 0.996 0.0765 ...
#   .. ..$ coefficients : num [1:3, 1:4] 0.584 0.456 4.112 2.472 0.652 ...
#   .. ..$ aliased      : Named logi [1:3] FALSE FALSE FALSE
#   .. ..$ sigma        : num 11.4
#   .. ..$ df           : int [1:3] 3 295 3
#   .. ..$ r.squared    : num 0.00658
#   .. ..$ adj.r.squared: num -0.000152
#   .. ..$ fstatistic   : Named num [1:3] 0.977 2 295
#   .. ..$ cov.unscaled : num [1:3, 1:3] 0.047213 0.000438 -0.070778 0.000438 0.003285 ...
#   .. ..$ na.action    : 'omit' Named int [1:68] 1 61 62 63 64 65 66 67 68 69 ...

We can turn this list into a data.table again by calling first get_elem to recursively extract the coefficient matrices and then unlist2d to recursively bind them to a new data.table:

lm_summary_list %>%
  get_elem("coefficients") %>% 
  unlist2d(idcols = .c(Region, Income), 
           row.names = "Coef", 
           DT = TRUE) %>% head
#                 Region              Income                  Coef  Estimate Std. Error   t value
#                 <char>              <char>                <char>     <num>      <num>     <num>
# 1: East Asia & Pacific         High income           (Intercept) 0.5313479  0.7058550 0.7527720
# 2: East Asia & Pacific         High income             G(LIFEEX) 2.4935584  0.7586943 3.2866443
# 3: East Asia & Pacific         High income B(G(LIFEEX), country) 3.8297123  1.6916770 2.2638554
# 4: East Asia & Pacific Lower middle income           (Intercept) 1.3476602  0.7008556 1.9228785
# 5: East Asia & Pacific Lower middle income             G(LIFEEX) 0.5238856  0.7574904 0.6916069
# 6: East Asia & Pacific Lower middle income B(G(LIFEEX), country) 0.9494439  1.2031228 0.7891496
#       Pr(>|t|)
#          <num>
# 1: 0.451991327
# 2: 0.001095466
# 3: 0.024071386
# 4: 0.055015131
# 5: 0.489478164
# 6: 0.430367103

The fact that this is a nested list of matrices, and that we can save both the names of the lists at each level of nesting and the row- and column- names of the matrices make unlist2d a significant generalization of rbindlist3.

But why do all this fuzz if we could have simply done:?

DT[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country))), "Coef"), 
   keyby = .(region, income)] %>% head
# Key: <region, income>
#                 region              income                  Coef  Estimate Std. Error   t value
#                 <fctr>              <fctr>                <char>     <num>      <num>     <num>
# 1: East Asia & Pacific         High income           (Intercept) 0.5313479  0.7058550 0.7527720
# 2: East Asia & Pacific         High income             G(LIFEEX) 2.4935584  0.7586943 3.2866443
# 3: East Asia & Pacific         High income B(G(LIFEEX), country) 3.8297123  1.6916770 2.2638554
# 4: East Asia & Pacific Lower middle income           (Intercept) 1.3476602  0.7008556 1.9228785
# 5: East Asia & Pacific Lower middle income             G(LIFEEX) 0.5238856  0.7574904 0.6916069
# 6: East Asia & Pacific Lower middle income B(G(LIFEEX), country) 0.9494439  1.2031228 0.7891496
#       Pr(>|t|)
#          <num>
# 1: 0.451991327
# 2: 0.001095466
# 3: 0.024071386
# 4: 0.055015131
# 5: 0.489478164
# 6: 0.430367103

Well we might want to do more things with that list of linear models first before tidying it, so this is a more general workflow. We might also be interested in additional statistics like the R-squared or the F-statistic:

DT_sum <- lm_summary_list %>%
get_elem("coef|r.sq|fstat", regex = TRUE) %>% 
  unlist2d(idcols = .c(Region, Income, Statistic), 
           row.names = "Coef", 
           DT = TRUE)

head(DT_sum)
#                 Region      Income     Statistic                  Coef  Estimate Std. Error
#                 <char>      <char>        <char>                <char>     <num>      <num>
# 1: East Asia & Pacific High income  coefficients           (Intercept) 0.5313479  0.7058550
# 2: East Asia & Pacific High income  coefficients             G(LIFEEX) 2.4935584  0.7586943
# 3: East Asia & Pacific High income  coefficients B(G(LIFEEX), country) 3.8297123  1.6916770
# 4: East Asia & Pacific High income     r.squared                  <NA>        NA         NA
# 5: East Asia & Pacific High income adj.r.squared                  <NA>        NA         NA
# 6: East Asia & Pacific High income    fstatistic                  <NA>        NA         NA
#     t value    Pr(>|t|)         V1    value numdf dendf
#       <num>       <num>      <num>    <num> <num> <num>
# 1: 0.752772 0.451991327         NA       NA    NA    NA
# 2: 3.286644 0.001095466         NA       NA    NA    NA
# 3: 2.263855 0.024071386         NA       NA    NA    NA
# 4:       NA          NA 0.05245359       NA    NA    NA
# 5:       NA          NA 0.04812690       NA    NA    NA
# 6:       NA          NA         NA 12.12325     2   438

# Reshaping to long form: 
DT_sum %>%
  melt(1:4, na.rm = TRUE) %>%
  roworderv(1:2) %>% head(20)
#                  Region              Income     Statistic                  Coef   variable
#                  <char>              <char>        <char>                <char>     <fctr>
#  1: East Asia & Pacific         High income  coefficients           (Intercept)   Estimate
#  2: East Asia & Pacific         High income  coefficients             G(LIFEEX)   Estimate
#  3: East Asia & Pacific         High income  coefficients B(G(LIFEEX), country)   Estimate
#  4: East Asia & Pacific         High income  coefficients           (Intercept) Std. Error
#  5: East Asia & Pacific         High income  coefficients             G(LIFEEX) Std. Error
#  6: East Asia & Pacific         High income  coefficients B(G(LIFEEX), country) Std. Error
#  7: East Asia & Pacific         High income  coefficients           (Intercept)    t value
#  8: East Asia & Pacific         High income  coefficients             G(LIFEEX)    t value
#  9: East Asia & Pacific         High income  coefficients B(G(LIFEEX), country)    t value
# 10: East Asia & Pacific         High income  coefficients           (Intercept)   Pr(>|t|)
# 11: East Asia & Pacific         High income  coefficients             G(LIFEEX)   Pr(>|t|)
# 12: East Asia & Pacific         High income  coefficients B(G(LIFEEX), country)   Pr(>|t|)
# 13: East Asia & Pacific         High income     r.squared                  <NA>         V1
# 14: East Asia & Pacific         High income adj.r.squared                  <NA>         V1
# 15: East Asia & Pacific         High income    fstatistic                  <NA>      value
# 16: East Asia & Pacific         High income    fstatistic                  <NA>      numdf
# 17: East Asia & Pacific         High income    fstatistic                  <NA>      dendf
# 18: East Asia & Pacific Lower middle income  coefficients           (Intercept)   Estimate
# 19: East Asia & Pacific Lower middle income  coefficients             G(LIFEEX)   Estimate
# 20: East Asia & Pacific Lower middle income  coefficients B(G(LIFEEX), country)   Estimate
#                  Region              Income     Statistic                  Coef   variable
#            value
#            <num>
#  1: 5.313479e-01
#  2: 2.493558e+00
#  3: 3.829712e+00
#  4: 7.058550e-01
#  5: 7.586943e-01
#  6: 1.691677e+00
#  7: 7.527720e-01
#  8: 3.286644e+00
#  9: 2.263855e+00
# 10: 4.519913e-01
# 11: 1.095466e-03
# 12: 2.407139e-02
# 13: 5.245359e-02
# 14: 4.812690e-02
# 15: 1.212325e+01
# 16: 2.000000e+00
# 17: 4.380000e+02
# 18: 1.347660e+00
# 19: 5.238856e-01
# 20: 9.494439e-01
#            value

As a final example of this kind, lets suppose we are interested in the within-country correlations of all these variables by region and income group:

DT[, qDT(pwcor(W(.SD, country)), "Variable"), 
   keyby = .(region, income), .SDcols = PCGDP:ODA] %>% head
# Key: <region, income>
#                 region              income Variable    W.PCGDP   W.LIFEEX    W.GINI       W.ODA
#                 <fctr>              <fctr>   <char>      <num>      <num>     <num>       <num>
# 1: East Asia & Pacific         High income  W.PCGDP  1.0000000  0.7562668 0.6253844 -0.25258496
# 2: East Asia & Pacific         High income W.LIFEEX  0.7562668  1.0000000 0.3191255 -0.33611662
# 3: East Asia & Pacific         High income   W.GINI  0.6253844  0.3191255 1.0000000          NA
# 4: East Asia & Pacific         High income    W.ODA -0.2525850 -0.3361166        NA  1.00000000
# 5: East Asia & Pacific Lower middle income  W.PCGDP  1.0000000  0.4685618 0.4428879 -0.02508852
# 6: East Asia & Pacific Lower middle income W.LIFEEX  0.4685618  1.0000000 0.3231520  0.09356733

In summary: The list processing features, statistical capabilities and efficient converters of collapse and the flexibility of data.table work well together, facilitating more complex workflows.

Additional Benchmarks

See here or here.

These are all run on a 2 core laptop, so I honestly don’t know how collapse scales on powerful multi-core machines. My own limited computational resources are part of the reason I did not opt for a thread-parallel package from the start. But a multi-core version of collapse will eventually be released, maybe by end of 2021.

References

Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†Econometrica 46 (1): 69–85.


  1. Grouping on numeric variables in collapse is always ordered.↩︎

  2. Apart from collapse::BY which is only an auxiliary function written in base R to perform flexible split-apply combine computing on vectors, matrices and data frames.↩︎

  3. unlist2d can similarly bind nested lists of arrays, data frames or data.table’s↩︎

collapse/inst/doc/collapse_and_dplyr.R0000644000176200001440000000003715202627530017561 0ustar liggesusersparams <- list(cache = TRUE) collapse/inst/doc/collapse_intro.Rmd0000644000176200001440000077445715121640575017315 0ustar liggesusers--- title: "Introduction to collapse" subtitle: "Advanced and Fast Data Transformation in R" author: "Sebastian Krantz" date: "2021-06-27" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{Introduction to collapse} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- *collapse* is a C/C++ based package for data transformation and statistical computing in R. It's aims are: 1. To facilitate complex data transformation, exploration and computing tasks in R. 2. To help make R code fast, flexible, parsimonious and programmer friendly. This vignette demonstrates these two points and introduces all main features of the package in a structured way. The chapters are pretty self-contained, however the first chapters introduce the data and faster data manipulation functions which are used throughout the rest of this vignette. *** **Notes:** - Apart from this vignette, *collapse* comes with a built-in structured documentation available under `help("collapse-documentation")` after installing the package, and `help("collapse-package")` provides a compact set of examples for quick-start. A cheat sheet is available at [Rstudio](). - The two other vignettes focus on the integration of *collapse* with *dplyr* workflows (recommended for *dplyr* / *tidyverse* users), and on the integration of *collapse* with the *plm* package (+ some advanced programming with panel data). - Documentation and vignettes can also be viewed [online](). *** ## Why *collapse*? *collapse* is a high-performance package that extends and enhances the data-manipulation capabilities of R and existing popular packages (such as *dplyr*, *data.table*, and matrix packages). It's main focus is on grouped and weighted statistical programming, complex aggregations and transformations, time series and panel data operations, and programming with lists of data objects. The lead author is an applied economist and created the package mainly to facilitate advanced computations on varied and complex data, in particular surveys, (multivariate) time series, multilevel / panel data, and lists / model objects. A secondary aspect to applied work is that data is often imported into R from richer data structures (such as STATA, SPSS or SAS files imported with *haven*). This called for an intelligent suite of data manipulation functions that can both utilize aspects of the richer data structure (such as variable labels), and preserve the data structure / attributes in computations. Sometimes specialized classes like *xts*, *pdata.frame* and *grouped_df* can also become very useful to manipulate certain types of data. Thus *collapse* was built to explicitly supports these classes, while preserving most other classes / data structures in R. Another objective was to radically improve the speed of R code by extensively relying on efficient algorithms in C/C++ and the faster components of base R. *collapse* ranks among the fastest R packages, and performs many grouped and/or weighted computations noticeably faster than *dplyr* or *data.table*. A final development objective was to channel this performance through a stable and well conceived user API providing extensive and optimized programming capabilities (in standard evaluation) while also facilitating quick use and easy integration with existing data manipulation frameworks (in particular *dplyr* / *tidyverse* and *data.table*, both relying on non-standard evaluation). ## 1. Data and Summary Tools We begin by introducing some powerful summary tools along with the 2 panel datasets *collapse* provides which are used throughout this vignette. If you are just interested in programming you can skip this section. Apart from the 2 datasets that come with *collapse* (`wlddev` and `GGDC10S`), this vignette uses a few well known datasets from base R: `mtcars`, `iris`, `airquality`, and the time series `Airpassengers` and `EuStockMarkets`. ### 1.1 `wlddev` - World Bank Development Data This dataset contains 5 key World Bank Development Indicators covering 216 countries for up to 61 years (1960-2020). It is a balanced balanced panel with $216 \times 61 = 13176$ observations. --> ```r library(collapse) head(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # 3 Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA 112839996 # 4 Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA 237720001 # 5 Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA 295920013 # 6 Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA 341839996 # POP # 1 8996973 # 2 9169410 # 3 9351441 # 4 9543205 # 5 9744781 # 6 9956320 # The variables have "label" attributes. Use vlabels() to get and set labels namlab(wlddev, class = TRUE) # Variable Class # 1 country character # 2 iso3c factor # 3 date Date # 4 year integer # 5 decade integer # 6 region factor # 7 income factor # 8 OECD logical # 9 PCGDP numeric # 10 LIFEEX numeric # 11 GINI numeric # 12 ODA numeric # 13 POP numeric # Label # 1 Country Name # 2 Country Code # 3 Date Recorded (Fictitious) # 4 Year # 5 Decade # 6 Region # 7 Income Level # 8 Is OECD Member Country? # 9 GDP per capita (constant 2010 US$) # 10 Life expectancy at birth, total (years) # 11 Gini index (World Bank estimate) # 12 Net official development assistance and official aid received (constant 2018 US$) # 13 Population, total ``` Of the categorical identifiers, the date variable was artificially generated to have an example dataset that contains all common data types frequently encountered in R. A detailed statistical description of this data is computed by `descr`: ```r # A fast and detailed statistical description descr(wlddev) # Dataset: wlddev, 13 Variables, N = 13176 # ---------------------------------------------------------------------------------------------------- # country (character): Country Name # Statistics # N Ndist # 13176 216 # Table # Freq Perc # Afghanistan 61 0.46 # Albania 61 0.46 # Algeria 61 0.46 # American Samoa 61 0.46 # Andorra 61 0.46 # Angola 61 0.46 # Antigua and Barbuda 61 0.46 # Argentina 61 0.46 # Armenia 61 0.46 # Aruba 61 0.46 # Australia 61 0.46 # Austria 61 0.46 # Azerbaijan 61 0.46 # Bahamas, The 61 0.46 # ... 202 Others 12322 93.52 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 61 61 61 61 61 61 # ---------------------------------------------------------------------------------------------------- # iso3c (factor): Country Code # Statistics # N Ndist # 13176 216 # Table # Freq Perc # ABW 61 0.46 # AFG 61 0.46 # AGO 61 0.46 # ALB 61 0.46 # AND 61 0.46 # ARE 61 0.46 # ARG 61 0.46 # ARM 61 0.46 # ASM 61 0.46 # ATG 61 0.46 # AUS 61 0.46 # AUT 61 0.46 # AZE 61 0.46 # BDI 61 0.46 # ... 202 Others 12322 93.52 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 61 61 61 61 61 61 # ---------------------------------------------------------------------------------------------------- # date (Date): Date Recorded (Fictitious) # Statistics # N Ndist Min Max # 13176 61 1961-01-01 2021-01-01 # ---------------------------------------------------------------------------------------------------- # year (integer): Year # Statistics # N Ndist Mean SD Min Max Skew Kurt # 13176 61 1990 17.61 1960 2020 -0 1.8 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1960 1963 1966 1975 1990 2005 2014 2017 2020 # ---------------------------------------------------------------------------------------------------- # decade (integer): Decade # Statistics # N Ndist Mean SD Min Max Skew Kurt # 13176 7 1985.57 17.51 1960 2020 0.03 1.79 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1960 1960 1960 1970 1990 2000 2010 2010 2020 # ---------------------------------------------------------------------------------------------------- # region (factor): Region # Statistics # N Ndist # 13176 7 # Table # Freq Perc # Europe & Central Asia 3538 26.85 # Sub-Saharan Africa 2928 22.22 # Latin America & Caribbean 2562 19.44 # East Asia & Pacific 2196 16.67 # Middle East & North Africa 1281 9.72 # South Asia 488 3.70 # North America 183 1.39 # ---------------------------------------------------------------------------------------------------- # income (factor): Income Level # Statistics # N Ndist # 13176 4 # Table # Freq Perc # High income 4819 36.57 # Upper middle income 3660 27.78 # Lower middle income 2867 21.76 # Low income 1830 13.89 # ---------------------------------------------------------------------------------------------------- # OECD (logical): Is OECD Member Country? # Statistics # N Ndist # 13176 2 # Table # Freq Perc # FALSE 10980 83.33 # TRUE 2196 16.67 # ---------------------------------------------------------------------------------------------------- # PCGDP (numeric): GDP per capita (constant 2010 US$) # Statistics (28.13% NAs) # N Ndist Mean SD Min Max Skew Kurt # 9470 9470 12048.78 19077.64 132.08 196061.42 3.13 17.12 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 227.71 399.62 555.55 1303.19 3767.16 14787.03 35646.02 48507.84 92340.28 # ---------------------------------------------------------------------------------------------------- # LIFEEX (numeric): Life expectancy at birth, total (years) # Statistics (11.43% NAs) # N Ndist Mean SD Min Max Skew Kurt # 11670 10548 64.3 11.48 18.91 85.42 -0.67 2.67 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 35.83 42.77 46.83 56.36 67.44 72.95 77.08 79.34 82.36 # ---------------------------------------------------------------------------------------------------- # GINI (numeric): Gini index (World Bank estimate) # Statistics (86.76% NAs) # N Ndist Mean SD Min Max Skew Kurt # 1744 368 38.53 9.2 20.7 65.8 0.6 2.53 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 24.6 26.3 27.6 31.5 36.4 45 52.6 55.98 60.5 # ---------------------------------------------------------------------------------------------------- # ODA (numeric): Net official development assistance and official aid received (constant 2018 US$) # Statistics (34.67% NAs) # N Ndist Mean SD Min Max Skew Kurt # 8608 7832 454'720131 868'712654 -997'679993 2.56715605e+10 6.98 114.89 # Quantiles # 1% 5% 10% 25% 50% 75% 90% # -12'593999.7 1'363500.01 8'347000.31 44'887499.8 165'970001 495'042503 1.18400697e+09 # 95% 99% # 1.93281696e+09 3.73380782e+09 # ---------------------------------------------------------------------------------------------------- # POP (numeric): Population, total # Statistics (1.95% NAs) # N Ndist Mean SD Min Max Skew Kurt # 12919 12877 24'245971.6 102'120674 2833 1.39771500e+09 9.75 108.91 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 8698.84 31083.3 62268.4 443791 4'072517 12'816178 46'637331.4 81'177252.5 308'862641 # ---------------------------------------------------------------------------------------------------- ``` The output of `descr` can be converted into a tidy data frame using: ```r head(as.data.frame(descr(wlddev))) # Variable Class Label N Ndist Min Max Mean SD # 1 country character Country Name 13176 216 NA NA NA NA # 2 iso3c factor Country Code 13176 216 NA NA NA NA # 3 date Date Date Recorded (Fictitious) 13176 61 -3287 18628 NA NA # 4 year integer Year 13176 61 1960 2020 1990.000 17.60749 # 5 decade integer Decade 13176 7 1960 2020 1985.574 17.51175 # 6 region factor Region 13176 7 NA NA NA NA # Skew Kurt 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1 NA NA NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA NA NA NA NA # 4 -5.812381e-16 1.799355 1960 1963 1966 1975 1990 2005 2014 2017 2020 # 5 3.256512e-02 1.791726 1960 1960 1960 1970 1990 2000 2010 2010 2020 # 6 NA NA NA NA NA NA NA NA NA NA NA ``` Note that `descr` does not require data to be labeled. Since `wlddev` is a panel data set tracking countries over time, we might be interested in checking which variables are time-varying, with the function `varying`: ```r varying(wlddev, wlddev$iso3c) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE # POP # TRUE ``` `varying` tells us that all 5 variables `PCGDP`, `LIFEEX`, `GINI`, `ODA` and `POP` vary over time. However the `OECD` variable does not, so this data does not track when countries entered the OECD. We can also have a more detailed look letting `varying` check the variation in each country: ```r head(varying(wlddev, wlddev$iso3c, any_group = FALSE)) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # ABW FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE NA TRUE TRUE # AFG FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE NA TRUE TRUE # AGO FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE # ALB FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE # AND FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE NA NA NA TRUE # ARE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE ``` `NA` indicates that there are no data for this country. In general data is varying if it has two or more distinct non-missing values. We could also take a closer look at observation counts and distinct values using: ```r head(fnobs(wlddev, wlddev$iso3c)) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # ABW 61 61 61 61 61 61 61 61 32 60 0 20 60 # AFG 61 61 61 61 61 61 61 61 18 60 0 60 60 # AGO 61 61 61 61 61 61 61 61 40 60 3 58 60 # ALB 61 61 61 61 61 61 61 61 40 60 9 32 60 # AND 61 61 61 61 61 61 61 61 50 0 0 0 60 # ARE 61 61 61 61 61 61 61 61 45 60 2 45 60 head(fndistinct(wlddev, wlddev$iso3c)) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # ABW 1 1 61 61 7 1 1 1 32 60 0 20 60 # AFG 1 1 61 61 7 1 1 1 18 60 0 60 60 # AGO 1 1 61 61 7 1 1 1 40 59 3 58 60 # ALB 1 1 61 61 7 1 1 1 40 59 9 32 60 # AND 1 1 61 61 7 1 1 1 50 0 0 0 60 # ARE 1 1 61 61 7 1 1 1 45 60 2 45 60 ``` Note that `varying` is more efficient than `fndistinct`, although both functions are very fast. Even more powerful summary methods for multilevel / panel data are provided by `qsu` (shorthand for *quick-summary*). It is modeled after *STATA*'s *summarize* and *xtsummarize* commands. Calling `qsu` on the data gives a concise summary. We can subset columns internally using the `cols` argument: ```r qsu(wlddev, cols = 9:12, higher = TRUE) # higher adds skewness and kurtosis # N Mean SD Min Max Skew Kurt # PCGDP 9470 12048.778 19077.6416 132.0776 196061.417 3.1276 17.1154 # LIFEEX 11670 64.2963 11.4764 18.907 85.4171 -0.6748 2.6718 # GINI 1744 38.5341 9.2006 20.7 65.8 0.596 2.5329 # ODA 8608 454'720131 868'712654 -997'679993 2.56715605e+10 6.9832 114.889 ``` We could easily compute these statistics by region: ```r qsu(wlddev, by = ~region, cols = 9:12, vlabels = TRUE, higher = TRUE) # , , PCGDP: GDP per capita (constant 2010 US$) # # N Mean SD Min Max Skew Kurt # East Asia & Pacific 1467 10513.2441 14383.5507 132.0776 71992.1517 1.6392 4.7419 # Europe & Central Asia 2243 25992.9618 26435.1316 366.9354 196061.417 2.2022 10.1977 # Latin America & Caribbean 1976 7628.4477 8818.5055 1005.4085 88391.3331 4.1702 29.3739 # Middle East & North Africa 842 13878.4213 18419.7912 578.5996 116232.753 2.4178 9.7669 # North America 180 48699.76 24196.2855 16405.9053 113236.091 0.938 2.9688 # South Asia 382 1235.9256 1611.2232 265.9625 8476.564 2.7874 10.3402 # Sub-Saharan Africa 2380 1840.0259 2596.0104 164.3366 20532.9523 3.1161 14.4175 # # , , LIFEEX: Life expectancy at birth, total (years) # # N Mean SD Min Max Skew Kurt # East Asia & Pacific 1807 65.9445 10.1633 18.907 85.078 -0.856 4.3125 # Europe & Central Asia 3046 72.1625 5.7602 45.369 85.4171 -0.5594 4.0434 # Latin America & Caribbean 2107 68.3486 7.3768 41.762 82.1902 -1.0357 3.9379 # Middle East & North Africa 1226 66.2508 9.8306 29.919 82.8049 -0.8782 3.3054 # North America 144 76.2867 3.5734 68.8978 82.0488 -0.1963 1.976 # South Asia 480 57.5585 11.3004 32.446 78.921 -0.2623 2.1147 # Sub-Saharan Africa 2860 51.581 8.6876 26.172 74.5146 0.1452 2.7245 # # , , GINI: Gini index (World Bank estimate) # # N Mean SD Min Max Skew Kurt # East Asia & Pacific 154 37.7571 5.0318 27.8 49.1 0.3631 2.3047 # Europe & Central Asia 798 31.9114 4.5809 20.7 48.4 0.2989 2.5254 # Latin America & Caribbean 413 49.9557 5.4821 34.4 63.3 -0.0386 2.3631 # Middle East & North Africa 91 36.0143 5.2073 26 47.4 0.0241 1.9209 # North America 49 37.4816 3.6972 31 41.5 -0.4282 1.4577 # South Asia 46 33.8804 3.9898 25.9 43.8 0.4205 2.7748 # Sub-Saharan Africa 193 44.6606 8.2003 29.8 65.8 0.6598 2.8451 # # , , ODA: Net official development assistance and official aid received (constant 2018 US$) # # N Mean SD Min Max # East Asia & Pacific 1537 352'017964 622'847624 -997'679993 4.04487988e+09 # Europe & Central Asia 787 402'455286 568'237036 -322'070007 4.34612988e+09 # Latin America & Caribbean 1972 172'880081 260'781049 -444'040009 2.99568994e+09 # Middle East & North Africa 1105 732'380009 1.52108993e+09 -141'789993 2.56715605e+10 # North America 39 468717.916 10'653560.8 -15'869999.9 61'509998.3 # South Asia 466 1.27049955e+09 1.61492889e+09 -247'369995 8.75425977e+09 # Sub-Saharan Africa 2702 486'371750 656'336230 -18'409999.8 1.18790801e+10 # Skew Kurt # East Asia & Pacific 2.722 11.5221 # Europe & Central Asia 3.1305 15.2525 # Latin America & Caribbean 3.3259 22.4569 # Middle East & North Africa 6.6304 79.2238 # North America 4.8602 29.3092 # South Asia 1.7923 6.501 # Sub-Saharan Africa 4.5456 48.8447 ``` Computing summary statistics by country is of course also possible but would be too much information. Fortunately `qsu` lets us do something much more powerful: ```r qsu(wlddev, pid = ~ iso3c, cols = c(1,4,9:12), vlabels = TRUE, higher = TRUE) # , , country: Country Name # # N/T Mean SD Min Max Skew Kurt # Overall 13176 - - - - - - # Between 216 - - - - - - # Within 61 - - - - - - # # , , year: Year # # N/T Mean SD Min Max Skew Kurt # Overall 13176 1990 17.6075 1960 2020 -0 1.7994 # Between 216 1990 0 1990 1990 - - # Within 61 1990 17.6075 1960 2020 -0 1.7994 # # , , PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew Kurt # Overall 9470 12048.778 19077.6416 132.0776 196061.417 3.1276 17.1154 # Between 206 12962.6054 20189.9007 253.1886 141200.38 3.1263 16.2299 # Within 45.9709 12048.778 6723.6808 -33504.8721 76767.5254 0.6576 17.2003 # # , , LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # Overall 11670 64.2963 11.4764 18.907 85.4171 -0.6748 2.6718 # Between 207 64.9537 9.8936 40.9663 85.4171 -0.5012 2.1693 # Within 56.3768 64.2963 6.0842 32.9068 84.4198 -0.2643 3.7027 # # , , GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # Overall 1744 38.5341 9.2006 20.7 65.8 0.596 2.5329 # Between 167 39.4233 8.1356 24.8667 61.7143 0.5832 2.8256 # Within 10.4431 38.5341 2.9277 25.3917 55.3591 0.3263 5.3389 # # , , ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max Skew Kurt # Overall 8608 454'720131 868'712654 -997'679993 2.56715605e+10 6.9832 114.889 # Between 178 439'168412 569'049959 468717.916 3.62337432e+09 2.355 9.9487 # Within 48.3596 454'720131 650'709624 -2.44379420e+09 2.45610972e+10 9.6047 263.3716 ``` The above output reports 3 sets of summary statistics for each variable: Statistics computed on the *Overall* (raw) data, and on the *Between*-country (i.e. country averaged) and *Within*-country (i.e. country-demeaned) data^[in the *Within* data, the overall mean was added back after subtracting out country means, to preserve the level of the data, see also section 6.5.]. This is a powerful way to summarize panel data because aggregating the data by country gives us a cross-section of countries with no variation over time, whereas subtracting country specific means from the data eliminates all cross-sectional variation. So what can these statistics tell us about our data? The `N/T` columns shows that for `PCGDP` we have 8995 total observations, that we observe GDP data for 203 countries and that we have on average 44.3 observations (time-periods) per country. In contrast the GINI Index is only available for 161 countries with 8.4 observations on average. The *Overall* and *Within* mean of the data are identical by definition, and the *Between* mean would also be the same in a balanced panel with no missing observations. In practice we have unequal amounts of observations for different countries, thus countries have different weights in the *Overall* mean and the difference between *Overall* and *Between*-country mean reflects this discrepancy. The most interesting statistic in this summary arguably is the standard deviation, and in particular the comparison of the *Between*-SD reflecting the variation between countries and the *Within*-SD reflecting average variation over time. This comparison shows that PCGDP, LIFEEX and GINI vary more between countries, but ODA received varies more within countries over time. The 0 *Between*-SD for the year variable and the fact that the *Overall* and *Within*-SD are equal shows that year is individual invariant. Thus `qsu` also provides the same information as `varying`, but with additional details on the relative magnitudes of cross-sectional and time series variation. It is also a common pattern that the *kurtosis* increases in within-transformed data, while the *skewness* decreases in most cases. We could also do all of that by regions to have a look at the between and within country variations inside and across different World regions: ```r qsu(wlddev, by = ~ region, pid = ~ iso3c, cols = 9:12, vlabels = TRUE, higher = TRUE) # , , Overall, PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 1467 10513.2441 14383.5507 132.0776 71992.1517 1.6392 4.7419 # Europe & Central Asia 2243 25992.9618 26435.1316 366.9354 196061.417 2.2022 10.1977 # Latin America & Caribbean 1976 7628.4477 8818.5055 1005.4085 88391.3331 4.1702 29.3739 # Middle East & North Africa 842 13878.4213 18419.7912 578.5996 116232.753 2.4178 9.7669 # North America 180 48699.76 24196.2855 16405.9053 113236.091 0.938 2.9688 # South Asia 382 1235.9256 1611.2232 265.9625 8476.564 2.7874 10.3402 # Sub-Saharan Africa 2380 1840.0259 2596.0104 164.3366 20532.9523 3.1161 14.4175 # # , , Between, PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 34 10513.2441 12771.742 444.2899 39722.0077 1.1488 2.7089 # Europe & Central Asia 56 25992.9618 24051.035 809.4753 141200.38 2.0026 9.0733 # Latin America & Caribbean 38 7628.4477 8470.9708 1357.3326 77403.7443 4.4548 32.4956 # Middle East & North Africa 20 13878.4213 17251.6962 1069.6596 64878.4021 1.9508 6.0796 # North America 3 48699.76 18604.4369 35260.4708 74934.5874 0.7065 1.5 # South Asia 8 1235.9256 1488.3669 413.68 6621.5002 3.0546 11.3083 # Sub-Saharan Africa 47 1840.0259 2234.3254 253.1886 9922.0052 2.1442 6.8259 # # , , Within, PCGDP: GDP per capita (constant 2010 US$) # # N/T Mean SD Min Max Skew # East Asia & Pacific 43.1471 12048.778 6615.8248 -11964.6472 49541.463 0.824 # Europe & Central Asia 40.0536 12048.778 10971.0483 -33504.8721 76767.5254 0.4307 # Latin America & Caribbean 52 12048.778 2451.2636 -354.1639 23036.3668 0.1259 # Middle East & North Africa 42.1 12048.778 6455.0512 -18674.4049 63665.0446 1.8525 # North America 60 12048.778 15470.4609 -29523.1017 50350.2816 -0.2451 # South Asia 47.75 12048.778 617.0934 10026.9155 14455.865 0.9846 # Sub-Saharan Africa 50.6383 12048.778 1321.764 4846.3834 24883.1246 1.3879 # Kurt # East Asia & Pacific 8.9418 # Europe & Central Asia 7.4139 # Latin America & Caribbean 7.1939 # Middle East & North Africa 23.0457 # North America 3.2075 # South Asia 5.6366 # Sub-Saharan Africa 28.0186 # # , , Overall, LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 1807 65.9445 10.1633 18.907 85.078 -0.856 4.3125 # Europe & Central Asia 3046 72.1625 5.7602 45.369 85.4171 -0.5594 4.0434 # Latin America & Caribbean 2107 68.3486 7.3768 41.762 82.1902 -1.0357 3.9379 # Middle East & North Africa 1226 66.2508 9.8306 29.919 82.8049 -0.8782 3.3054 # North America 144 76.2867 3.5734 68.8978 82.0488 -0.1963 1.976 # South Asia 480 57.5585 11.3004 32.446 78.921 -0.2623 2.1147 # Sub-Saharan Africa 2860 51.581 8.6876 26.172 74.5146 0.1452 2.7245 # # , , Between, LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 32 65.9445 7.6833 49.7995 77.9008 -0.3832 2.4322 # Europe & Central Asia 55 72.1625 4.4378 60.1129 85.4171 -0.6584 2.8874 # Latin America & Caribbean 40 68.3486 4.9199 53.4918 82.1902 -0.9947 4.1617 # Middle East & North Africa 21 66.2508 5.922 52.5371 76.7395 -0.3181 3.0331 # North America 3 76.2867 1.3589 74.8065 78.4175 0.1467 1.6356 # South Asia 8 57.5585 5.6158 49.1972 69.3429 0.6643 3.1288 # Sub-Saharan Africa 48 51.581 5.657 40.9663 71.5749 1.1333 4.974 # # , , Within, LIFEEX: Life expectancy at birth, total (years) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 56.4688 64.2963 6.6528 32.9068 83.9918 -0.3949 3.9528 # Europe & Central Asia 55.3818 64.2963 3.6723 46.3045 78.6265 -0.0307 3.7576 # Latin America & Caribbean 52.675 64.2963 5.4965 46.7831 79.5026 -0.3827 2.9936 # Middle East & North Africa 58.381 64.2963 7.8467 41.6187 78.8872 -0.6216 2.808 # North America 48 64.2963 3.3049 54.7766 69.4306 -0.4327 2.3027 # South Asia 60 64.2963 9.8062 41.4342 83.0122 -0.0946 2.1035 # Sub-Saharan Africa 59.5833 64.2963 6.5933 41.5678 84.4198 0.0811 2.7821 # # , , Overall, GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 154 37.7571 5.0318 27.8 49.1 0.3631 2.3047 # Europe & Central Asia 798 31.9114 4.5809 20.7 48.4 0.2989 2.5254 # Latin America & Caribbean 413 49.9557 5.4821 34.4 63.3 -0.0386 2.3631 # Middle East & North Africa 91 36.0143 5.2073 26 47.4 0.0241 1.9209 # North America 49 37.4816 3.6972 31 41.5 -0.4282 1.4577 # South Asia 46 33.8804 3.9898 25.9 43.8 0.4205 2.7748 # Sub-Saharan Africa 193 44.6606 8.2003 29.8 65.8 0.6598 2.8451 # # , , Between, GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 23 37.7571 4.3005 30.8 45.8857 0.4912 2.213 # Europe & Central Asia 49 31.9114 4.0611 24.8667 40.935 0.3323 2.291 # Latin America & Caribbean 25 49.9557 4.0492 41.1 57.9 0.03 2.2573 # Middle East & North Africa 15 36.0143 4.7002 29.05 42.7 -0.2035 1.6815 # North America 2 37.4816 3.3563 33.1222 40.0129 -0.5503 1.3029 # South Asia 7 33.8804 3.0052 30.3556 38.8 0.2786 1.4817 # Sub-Saharan Africa 46 44.6606 6.8844 34.52 61.7143 0.9464 3.2302 # # , , Within, GINI: Gini index (World Bank estimate) # # N/T Mean SD Min Max Skew Kurt # East Asia & Pacific 6.6957 38.5341 2.6125 31.0187 45.8901 -0.0585 3.0933 # Europe & Central Asia 16.2857 38.5341 2.1195 31.2841 50.1387 0.6622 6.1763 # Latin America & Caribbean 16.52 38.5341 3.6955 25.3917 48.8341 -0.0506 2.7603 # Middle East & North Africa 6.0667 38.5341 2.2415 31.7675 45.777 0.0408 4.7415 # North America 24.5 38.5341 1.5507 33.0212 42.7119 -1.3213 6.8321 # South Asia 6.5714 38.5341 2.6244 32.8341 45.0675 -0.1055 2.6885 # Sub-Saharan Africa 4.1957 38.5341 4.4553 27.9452 55.3591 0.6338 4.4174 # # , , Overall, ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max # East Asia & Pacific 1537 352'017964 622'847624 -997'679993 4.04487988e+09 # Europe & Central Asia 787 402'455286 568'237036 -322'070007 4.34612988e+09 # Latin America & Caribbean 1972 172'880081 260'781049 -444'040009 2.99568994e+09 # Middle East & North Africa 1105 732'380009 1.52108993e+09 -141'789993 2.56715605e+10 # North America 39 468717.916 10'653560.8 -15'869999.9 61'509998.3 # South Asia 466 1.27049955e+09 1.61492889e+09 -247'369995 8.75425977e+09 # Sub-Saharan Africa 2702 486'371750 656'336230 -18'409999.8 1.18790801e+10 # Skew Kurt # East Asia & Pacific 2.722 11.5221 # Europe & Central Asia 3.1305 15.2525 # Latin America & Caribbean 3.3259 22.4569 # Middle East & North Africa 6.6304 79.2238 # North America 4.8602 29.3092 # South Asia 1.7923 6.501 # Sub-Saharan Africa 4.5456 48.8447 # # , , Between, ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max # East Asia & Pacific 31 352'017964 457'183279 1'654615.38 1.63585532e+09 # Europe & Central Asia 32 402'455286 438'074771 12'516000.1 2.05456932e+09 # Latin America & Caribbean 37 172'880081 167'160838 2'225483.88 538'386665 # Middle East & North Africa 21 732'380009 775'418887 3'112820.5 2.86174883e+09 # North America 1 468717.916 0 468717.916 468717.916 # South Asia 8 1.27049955e+09 1.18347893e+09 27'152499.9 3.62337432e+09 # Sub-Saharan Africa 48 486'371750 397'995105 28'801206.9 1.55049113e+09 # Skew Kurt # East Asia & Pacific 1.7771 5.1361 # Europe & Central Asia 2.0449 7.2489 # Latin America & Caribbean 0.8981 2.4954 # Middle East & North Africa 1.1363 3.6377 # North America - - # South Asia 0.7229 2.4072 # Sub-Saharan Africa 0.9871 3.1513 # # , , Within, ODA: Net official development assistance and official aid received (constant 2018 US$) # # N/T Mean SD Min Max # East Asia & Pacific 49.5806 454'720131 422'992450 -2.04042108e+09 3.59673152e+09 # Europe & Central Asia 24.5938 454'720131 361'916875 -1.08796786e+09 3.30549004e+09 # Latin America & Caribbean 53.2973 454'720131 200'159960 -527'706542 3.28976141e+09 # Middle East & North Africa 52.619 454'720131 1.30860235e+09 -2.34610870e+09 2.45610972e+10 # North America 39 454'720131 10'653560.8 438'381413 515'761411 # South Asia 58.25 454'720131 1.09880524e+09 -2.44379420e+09 5.58560558e+09 # Sub-Saharan Africa 56.2917 454'720131 521'897637 -952'168698 1.12814455e+10 # Skew Kurt # East Asia & Pacific 0.2908 14.4428 # Europe & Central Asia 2.3283 18.6937 # Latin America & Caribbean 3.7015 41.7506 # Middle East & North Africa 7.8663 117.987 # North America 4.8602 29.3092 # South Asia 1.8418 9.4588 # Sub-Saharan Africa 5.2349 86.1042 ``` Notice that the output here is a 4D array of summary statistics, which we could also subset (`[`) or permute (`aperm`) to view these statistics in any convenient way. If we don't like the array, we can also output as a nested list of statistics matrices: ```r l <- qsu(wlddev, by = ~ region, pid = ~ iso3c, cols = 9:12, vlabels = TRUE, higher = TRUE, array = FALSE) str(l, give.attr = FALSE) # List of 4 # $ PCGDP: GDP per capita (constant 2010 US$) :List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 1467 2243 1976 842 180 ... # ..$ Between: 'qsu' num [1:7, 1:7] 34 56 38 20 3 ... # ..$ Within : 'qsu' num [1:7, 1:7] 43.1 40.1 52 42.1 60 ... # $ LIFEEX: Life expectancy at birth, total (years) :List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 1807 3046 2107 1226 144 ... # ..$ Between: 'qsu' num [1:7, 1:7] 32 55 40 21 3 ... # ..$ Within : 'qsu' num [1:7, 1:7] 56.5 55.4 52.7 58.4 48 ... # $ GINI: Gini index (World Bank estimate) :List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 154 798 413 91 49 ... # ..$ Between: 'qsu' num [1:7, 1:7] 23 49 25 15 2 ... # ..$ Within : 'qsu' num [1:7, 1:7] 6.7 16.29 16.52 6.07 24.5 ... # $ ODA: Net official development assistance and official aid received (constant 2018 US$):List of 3 # ..$ Overall: 'qsu' num [1:7, 1:7] 1537 787 1972 1105 39 ... # ..$ Between: 'qsu' num [1:7, 1:7] 31 32 37 21 1 ... # ..$ Within : 'qsu' num [1:7, 1:7] 49.6 24.6 53.3 52.6 39 ... ``` Such a list of statistics matrices could, for example, be converted into a tidy data frame using `unlist2d` (more about this in the section on list-processing): ```r head(unlist2d(l, idcols = c("Variable", "Trans"), row.names = "Region")) # Variable Trans Region N Mean # 1 PCGDP: GDP per capita (constant 2010 US$) Overall East Asia & Pacific 1467 10513.244 # 2 PCGDP: GDP per capita (constant 2010 US$) Overall Europe & Central Asia 2243 25992.962 # 3 PCGDP: GDP per capita (constant 2010 US$) Overall Latin America & Caribbean 1976 7628.448 # 4 PCGDP: GDP per capita (constant 2010 US$) Overall Middle East & North Africa 842 13878.421 # 5 PCGDP: GDP per capita (constant 2010 US$) Overall North America 180 48699.760 # 6 PCGDP: GDP per capita (constant 2010 US$) Overall South Asia 382 1235.926 # SD Min Max Skew Kurt # 1 14383.551 132.0776 71992.152 1.6392248 4.741856 # 2 26435.132 366.9354 196061.417 2.2022472 10.197685 # 3 8818.505 1005.4085 88391.333 4.1701769 29.373869 # 4 18419.791 578.5996 116232.753 2.4177586 9.766883 # 5 24196.285 16405.9053 113236.091 0.9380056 2.968769 # 6 1611.223 265.9625 8476.564 2.7873830 10.340176 ``` This is not yet end of `qsu`'s functionality, as we can also do all of the above on panel-surveys utilizing weights (`w` argument). Finally, we can look at (weighted) pairwise correlations in this data: ```r pwcor(wlddev[9:12], N = TRUE, P = TRUE) # PCGDP LIFEEX GINI ODA # PCGDP 1 (9470) .57* (9022) -.44* (1735) -.16* (7128) # LIFEEX .57* (9022) 1 (11670) -.35* (1742) -.02 (8142) # GINI -.44* (1735) -.35* (1742) 1 (1744) -.20* (1109) # ODA -.16* (7128) -.02 (8142) -.20* (1109) 1 (8608) ``` which can of course also be computed on averaged and within-transformed data: ```r print(pwcor(fmean(wlddev[9:12], wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri") # PCGDP LIFEEX GINI ODA # PCGDP 1 (206) # LIFEEX .60* (199) 1 (207) # GINI -.42* (165) -.40* (165) 1 (167) # ODA -.25* (172) -.21* (172) -.19* (145) 1 (178) # N is same as overall N shown above... print(pwcor(fwithin(wlddev[9:12], wlddev$iso3c), P = TRUE), show = "lower.tri") # PCGDP LIFEEX GINI ODA # PCGDP 1 # LIFEEX .31* 1 # GINI -.01 -.16* 1 # ODA -.01 .17* -.08* 1 ``` A useful function called by `pwcor` is `pwnobs`, which is very handy to explore the joint observation structure when selecting variables to include in a statistical model: ```r pwnobs(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP # country 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # iso3c 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # date 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # year 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # decade 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # region 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # income 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # OECD 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 12919 # PCGDP 9470 9470 9470 9470 9470 9470 9470 9470 9470 9022 1735 7128 9470 # LIFEEX 11670 11670 11670 11670 11670 11670 11670 11670 9022 11670 1742 8142 11659 # GINI 1744 1744 1744 1744 1744 1744 1744 1744 1735 1742 1744 1109 1744 # ODA 8608 8608 8608 8608 8608 8608 8608 8608 7128 8142 1109 8608 8597 # POP 12919 12919 12919 12919 12919 12919 12919 12919 9470 11659 1744 8597 12919 ``` Note that both `pwcor/pwcov` and `pwnobs` are faster on matrices. ### 1.2 `GGDC10S` - GGDC 10-Sector Database The Groningen Growth and Development Centre 10-Sector Database provides long-run data on sectoral productivity performance in Africa, Asia, and Latin America. Variables covered in the data set are annual series of value added (VA, in local currency), and persons employed (EMP) for 10 broad sectors. ```r head(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA # 3 BWA SSA Sub-saharan Africa VA 1962 NA NA NA NA # 4 BWA SSA Sub-saharan Africa VA 1963 NA NA NA NA # 5 BWA SSA Sub-saharan Africa VA 1964 16.30154 3.494075 0.7365696 0.1043936 # 6 BWA SSA Sub-saharan Africa VA 1965 15.72700 2.495768 1.0181992 0.1350976 # CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA # 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229 # 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710 namlab(GGDC10S, class = TRUE) # Variable Class Label # 1 Country character Country # 2 Regioncode character Region code # 3 Region character Region # 4 Variable character Variable # 5 Year numeric Year # 6 AGR numeric Agriculture # 7 MIN numeric Mining # 8 MAN numeric Manufacturing # 9 PU numeric Utilities # 10 CON numeric Construction # 11 WRT numeric Trade, restaurants and hotels # 12 TRA numeric Transport, storage and communication # 13 FIRE numeric Finance, insurance, real estate and business services # 14 GOV numeric Government services # 15 OTH numeric Community, social and personal services # 16 SUM numeric Summation of sector GDP fnobs(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 5027 5027 5027 5027 5027 4364 4355 4355 4354 # CON WRT TRA FIRE GOV OTH SUM # 4355 4355 4355 4355 3482 4248 4364 fndistinct(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 43 6 6 2 67 4353 4224 4353 4237 # CON WRT TRA FIRE GOV OTH SUM # 4339 4344 4334 4349 3470 4238 4364 # The countries included: cat(funique(GGDC10S$Country, sort = TRUE)) # ARG BOL BRA BWA CHL CHN COL CRI DEW DNK EGY ESP ETH FRA GBR GHA HKG IDN IND ITA JPN KEN KOR MEX MOR MUS MWI MYS NGA NGA(alt) NLD PER PHL SEN SGP SWE THA TWN TZA USA VEN ZAF ZMB ``` The first problem in summarizing this data is that value added (VA) is in local currency, the second that it contains 2 different Variables (VA and EMP) stacked in the same column. One way of solving the first problem could be converting the data to percentages through dividing by the overall VA and EMP contained in the last column. A different solution involving grouped-scaling is introduced in section 6.4. The second problem is again nicely handled by `qsu`, which can also compute panel-statistics by groups. ```r # Converting data to percentages of overall VA / EMP, dapply keeps the attributes, see section 6.1 pGGDC10S <- ftransformv(GGDC10S, 6:15, `*`, 100 / SUM) # Summarizing the sectoral data by variable, overall, between and within countries su <- qsu(pGGDC10S, by = ~ Variable, pid = ~ Variable + Country, cols = 6:16, higher = TRUE) # This gives a 4D array of summary statistics str(su) # 'qsu' num [1:2, 1:7, 1:3, 1:11] 2225 2139 35.1 17.3 26.7 ... # - attr(*, "dimnames")=List of 4 # ..$ : chr [1:2] "EMP" "VA" # ..$ : chr [1:7] "N/T" "Mean" "SD" "Min" ... # ..$ : chr [1:3] "Overall" "Between" "Within" # ..$ : chr [1:11] "AGR" "MIN" "MAN" "PU" ... # Permuting this array to a more readible format aperm(su, c(4L, 2L, 3L, 1L)) # , , Overall, EMP # # N/T Mean SD Min Max Skew Kurt # AGR 2225 35.0949 26.7235 0.156 100 0.4856 2.0951 # MIN 2216 1.0349 1.4247 0.0043 9.4097 3.1281 15.0429 # MAN 2216 14.9768 8.0392 0.5822 45.2974 0.4272 2.8455 # PU 2215 0.5782 0.3601 0.0154 2.4786 1.2588 5.5822 # CON 2216 5.6583 2.9252 0.1417 15.9887 -0.0631 2.2725 # WRT 2216 14.9155 6.5573 0.809 32.8046 -0.1814 2.3226 # TRA 2216 4.8193 2.652 0.1506 15.0454 0.9477 4.4695 # FIRE 2216 4.6501 4.3518 0.0799 21.7717 1.2345 4.0831 # GOV 1780 13.1263 8.0844 0 34.8897 0.6301 2.5338 # OTH 2109 8.3977 6.6409 0.421 34.8942 1.4028 4.3191 # SUM 2225 36846.8741 96318.6544 173.8829 764200 5.0229 30.9814 # # , , Between, EMP # # N/T Mean SD Min Max Skew Kurt # AGR 42 35.0949 24.1204 0.9997 88.3263 0.5202 2.2437 # MIN 42 1.0349 1.2304 0.0296 6.8532 2.7313 12.331 # MAN 42 14.9768 7.0375 1.718 32.3439 -0.0164 2.4321 # PU 42 0.5782 0.3041 0.0671 1.3226 0.5459 2.6905 # CON 42 5.6583 2.4748 0.5037 10.3691 -0.4442 2.3251 # WRT 42 14.9155 5.264 4.0003 26.7699 -0.5478 2.7294 # TRA 42 4.8193 2.4712 0.374 12.3887 0.9782 4.7857 # FIRE 42 4.6501 3.4468 0.1505 12.4402 0.6052 2.5883 # GOV 34 13.1263 7.2832 2.0086 29.1577 0.3858 2.1068 # OTH 40 8.3977 6.266 1.3508 26.4036 1.4349 4.3185 # SUM 42 36846.8741 89205.503 369.2353 485820.474 4.0761 19.3159 # # , , Within, EMP # # N/T Mean SD Min Max Skew Kurt # AGR 52.9762 26.3768 11.5044 -5.3234 107.4891 1.6002 11.9683 # MIN 52.7619 3.4006 0.7182 -1.4068 7.509 -0.1988 15.0343 # MAN 52.7619 17.476 3.8861 -1.1061 40.3964 -0.082 7.3994 # PU 52.7381 1.3896 0.1929 0.6346 2.5461 0.5731 7.8523 # CON 52.7619 5.7633 1.5596 0.8964 12.9663 0.3077 4.1248 # WRT 52.7619 15.7581 3.91 3.7356 29.7615 0.3339 3.3386 # TRA 52.7619 6.3486 0.9623 2.3501 11.1064 0.2671 5.7162 # FIRE 52.7619 5.8228 2.6567 -2.9836 15.9974 0.5486 4.0288 # GOV 52.3529 13.263 3.5088 -2.1983 23.611 -0.5647 4.7286 # OTH 52.725 7.3941 2.1999 -2.3286 17.4413 0.2929 6.4631 # SUM 52.9762 21'566436.8 36327.1443 21'287906.3 21'844816.3 0.6649 34.2495 # # , , Overall, VA # # N/T Mean SD Min Max Skew Kurt # AGR 2139 17.3082 15.5066 0.0318 95.222 1.3274 4.8827 # MIN 2139 5.8514 9.0975 0 59.0602 2.7193 10.9184 # MAN 2139 20.0651 8.0033 0.979 41.6281 -0.0348 2.6831 # PU 2139 2.2298 1.1088 0 9.1888 0.8899 6.2385 # CON 2139 5.8721 2.5113 0.5951 25.8575 1.5002 8.9578 # WRT 2139 16.631 5.1374 4.5187 39.7594 0.3455 3.2655 # TRA 2139 7.9329 3.1057 0.7957 25.9625 1.0122 5.7137 # FIRE 2139 7.0377 12.7077 -151.065 39.1705 -6.2254 59.8739 # GOV 1702 13.406 6.3521 0.7607 32.5107 0.4888 2.9043 # OTH 2139 6.4046 5.8416 0.2327 31.4474 1.4978 4.2051 # SUM 2139 43'961639.1 358'350627 0 8.06794210e+09 15.7682 289.4632 # # , , Between, VA # # N/T Mean SD Min Max Skew Kurt # AGR 43 17.3082 13.1901 0.6058 63.8364 1.1328 4.7111 # MIN 43 5.8514 7.5705 0.0475 27.9214 1.7113 4.807 # MAN 43 20.0651 6.6423 4.1869 32.1138 -0.3591 2.619 # PU 43 2.2298 0.7457 0.4462 4.307 0.6196 3.8724 # CON 43 5.8721 1.8455 2.9405 12.9279 1.3285 6.505 # WRT 43 16.631 4.3779 8.4188 26.3876 0.292 2.4553 # TRA 43 7.9329 2.7222 2.037 14.8892 0.6362 3.6686 # FIRE 43 7.0377 9.0284 -35.6144 23.8658 -2.674 15.0975 # GOV 35 13.406 5.875 1.9757 27.7714 0.5198 3.0416 # OTH 43 6.4046 5.6137 1.1184 19.5299 1.3274 3.2043 # SUM 43 43'961639.1 185'785836 5077.7231 1.23317892e+09 5.8098 36.9778 # # , , Within, VA # # N/T Mean SD Min Max Skew Kurt # AGR 49.7442 26.3768 8.1532 5.245 94.3499 1.234 9.5269 # MIN 49.7442 3.4006 5.0451 -20.051 35.7053 0.341 13.102 # MAN 49.7442 17.476 4.4647 1.1188 36.3501 -0.1928 3.9339 # PU 49.7442 1.3896 0.8206 -1.0904 6.2714 0.5258 5.3462 # CON 49.7442 5.7633 1.7031 -0.3464 18.6929 0.7493 6.3751 # WRT 49.7442 15.7581 2.6884 4.6513 32.6691 0.2338 4.4953 # TRA 49.7442 6.3486 1.4951 0.9187 18.5977 0.6995 10.1129 # FIRE 49.7442 5.8228 8.9428 -109.6278 54.1241 -2.7728 54.5971 # GOV 48.6286 13.263 2.4153 5.1249 22.8497 0.1663 3.3083 # OTH 49.7442 7.3941 1.6159 -0.9151 19.3116 0.7301 9.6613 # SUM 49.7442 21'566436.8 306'429102 -1.21124805e+09 6.85632962e+09 12.6639 253.1145 ``` The statistics show that the dataset is very consistent: Employment data cover 42 countries and 53 time-periods in almost all sectors. Agriculture is the largest sector in terms of employment, amounting to a 35% share of employment across countries and time, with a standard deviation (SD) of around 27%. The between-country SD in agricultural employment share is 24% and the within SD is 12%, indicating that processes of structural change are very gradual and most of the variation in structure is between countries. The next largest sectors after agriculture are manufacturing, wholesale and retail trade and government, each claiming an approx. 15% share of the economy. In these sectors the between-country SD is also about twice as large as the within-country SD. In terms of value added, the data covers 43 countries in 50 time-periods. Agriculture, manufacturing, wholesale and retail trade and government are also the largest sectors in terms of VA, but with a diminished agricultural share (around 17%) and a greater share for manufacturing (around 20%). The variation between countries is again greater than the variation within countries, but it seems that at least in terms of agricultural VA share there is also a considerable within-country SD of 8%. This is also true for the finance and real estate sector with a within SD of 9%, suggesting (using a bit of common sense) that a diminishing VA share in agriculture and increased VA share in finance and real estate was a pattern characterizing most of the countries in this sample. As a final step we consider a plot function which can be used to plot the structural transformation of any supported country. Below for Botswana: ```r library(data.table) library(ggplot2) library(magrittr) plotGGDC <- function(ctry) { # Select and subset fsubset(GGDC10S, Country == ctry, Variable, Year, AGR:SUM) %>% # Convert to shares and replace negative values with NA ftransform(fselect(., AGR:OTH) %>% lapply(`*`, 1 / SUM) %>% replace_outliers(0, NA, "min")) %>% # Remove totals column and make proper variable labels ftransform(Variable = recode_char(Variable, VA = "Value Added Share", EMP = "Employment Share"), SUM = NULL) %>% # Fast conversion to data.table qDT %>% # data.table's melt function melt(1:2, variable.name = "Sector", na.rm = TRUE) %>% # ggplot with some scales provided by the 'scales' package ggplot(aes(x = Year, y = value, fill = Sector)) + geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) + theme_linedraw(base_size = 14L) + facet_wrap( ~ Variable) + scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10L))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 7L), expand = c(0, 0)) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10L), expand = c(0, 0), labels = scales::percent) + theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)), strip.background = element_rect(colour = "grey20", fill = "grey20"), strip.text = element_text(face = "bold")) } # Plotting the structural transformation of Botswana plotGGDC("BWA") ```
plot of chunk scplot_BWA

plot of chunk scplot_BWA

## 2. Fast Data Manipulation A lot of R code is not concerned with statistical computations but with preliminary data wrangling. For various reasons R development has focused on data frames as the main medium to contain data, although matrices / arrays provide significantly faster methods for common manipulations. A first essential step towards optimizing R code is thus to speed up very frequent manipulations on data frames. *collapse* introduces a set of highly optimized functions to efficiently manipulate (mostly) data frames. Most manipulations can be conducted in non-standard evaluation or standard evaluation (utilizing different functions), and all functions preserve the data structure (i.e. they can be used with data.table, tbl_df, grouped_df, pdata.frame etc.). ### 2.1 Selecting and Replacing Columns `fselect` is an analogue to `dplyr::select`, but executes about 100x faster. It can be used to select variables using expressions involving variable names: ```r library(magrittr) # Pipe operators fselect(wlddev, country, year, PCGDP:ODA) %>% head(2) # country year PCGDP LIFEEX GINI ODA # 1 Afghanistan 1960 NA 32.446 NA 116769997 # 2 Afghanistan 1961 NA 32.962 NA 232080002 fselect(wlddev, -country, -year, -(PCGDP:ODA)) %>% head(2) # iso3c date decade region income OECD POP # 1 AFG 1961-01-01 1960 South Asia Low income FALSE 8996973 # 2 AFG 1962-01-01 1960 South Asia Low income FALSE 9169410 library(microbenchmark) microbenchmark(fselect = collapse::fselect(wlddev, country, year, PCGDP:ODA), select = dplyr::select(wlddev, country, year, PCGDP:ODA)) # Unit: microseconds # expr min lq mean median uq max neval # fselect 2.911 3.4645 4.76297 4.3665 5.3710 20.459 100 # select 382.284 393.0055 442.70734 410.3075 441.4265 2951.262 100 ``` in contrast to `dplyr::select`, `fselect` has a replacement method ```r # Computing the log of columns fselect(wlddev, PCGDP:POP) <- lapply(fselect(wlddev, PCGDP:POP), log) head(wlddev, 2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 3.479577 NA 18.57572 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 3.495355 NA 19.26259 # POP # 1 16.01240 # 2 16.03138 # Efficient deleting fselect(wlddev, country, year, PCGDP:POP) <- NULL head(wlddev, 2) # iso3c date decade region income OECD # 1 AFG 1961-01-01 1960 South Asia Low income FALSE # 2 AFG 1962-01-01 1960 South Asia Low income FALSE rm(wlddev) ``` and it can also return information about the selected columns other than the data itself. ```r fselect(wlddev, PCGDP:POP, return = "names") # [1] "PCGDP" "LIFEEX" "GINI" "ODA" "POP" fselect(wlddev, PCGDP:POP, return = "indices") # [1] 9 10 11 12 13 fselect(wlddev, PCGDP:POP, return = "named_indices") # PCGDP LIFEEX GINI ODA POP # 9 10 11 12 13 fselect(wlddev, PCGDP:POP, return = "logical") # [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE fselect(wlddev, PCGDP:POP, return = "named_logical") # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE # POP # TRUE ``` While `fselect` is faster than `dplyr::select`, it is also simpler and does not offer special methods for grouped tibbles (e.g. where grouping columns are always selected) and some other *dplyr*-specific features of `select`. We will see that this is not a problem at all when working with statistical functions in *collapse* that have a grouped_df method, but users should be careful replacing `dplyr::select` with `fselect` in *dplyr* scripts. From *collapse* 1.6.0, `fselect` has explicit support for *sf* data frames. The standard-evaluation analogue to `fselect` is the function `get_vars`. `get_vars` can be used to select variables using names, indices, logical vectors, functions or regular expressions evaluated against column names: ```r get_vars(wlddev, 9:13) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 get_vars(wlddev, c("PCGDP","LIFEEX","GINI","ODA","POP")) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 get_vars(wlddev, "[[:upper:]]", regex = TRUE) %>% head(1) # OECD PCGDP LIFEEX GINI ODA POP # 1 FALSE NA 32.446 NA 116769997 8996973 get_vars(wlddev, "PC|LI|GI|OD|PO", regex = TRUE) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 # Same as above, vectors of regular expressions are sequentially passed to grep get_vars(wlddev, c("PC","LI","GI","OD","PO"), regex = TRUE) %>% head(1) # PCGDP LIFEEX GINI ODA POP # 1 NA 32.446 NA 116769997 8996973 get_vars(wlddev, is.numeric) %>% head(1) # year decade PCGDP LIFEEX GINI ODA POP # 1 1960 1960 NA 32.446 NA 116769997 8996973 # Returning other information get_vars(wlddev, is.numeric, return = "names") # [1] "year" "decade" "PCGDP" "LIFEEX" "GINI" "ODA" "POP" get_vars(wlddev, "[[:upper:]]", regex = TRUE, return = "named_indices") # OECD PCGDP LIFEEX GINI ODA POP # 8 9 10 11 12 13 ``` Replacing operations are conducted analogous: ```r get_vars(wlddev, 9:13) <- lapply(get_vars(wlddev, 9:13), log) get_vars(wlddev, 9:13) <- NULL head(wlddev, 2) # country iso3c date year decade region income OECD # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE rm(wlddev) ``` `get_vars` is about 2x faster than `[.data.frame`, and `get_vars<-` is about 6-8x faster than `[<-.data.frame`. In addition to `get_vars`, *collapse* offers a set of functions to efficiently select and replace data by data type: `num_vars`, `cat_vars` (for categorical = non-numeric columns), `char_vars`, `fact_vars`, `logi_vars` and `date_vars` (for date and date-time columns). ```r head(num_vars(wlddev), 2) # year decade PCGDP LIFEEX GINI ODA POP # 1 1960 1960 NA 32.446 NA 116769997 8996973 # 2 1961 1960 NA 32.962 NA 232080002 9169410 head(cat_vars(wlddev), 2) # country iso3c date region income OECD # 1 Afghanistan AFG 1961-01-01 South Asia Low income FALSE # 2 Afghanistan AFG 1962-01-01 South Asia Low income FALSE head(fact_vars(wlddev), 2) # iso3c region income # 1 AFG South Asia Low income # 2 AFG South Asia Low income # Replacing fact_vars(wlddev) <- fact_vars(wlddev) ``` ### 2.2 Subsetting `fsubset` is an enhanced version of `base::subset` using C functions from the *data.table* package for fast and subsetting operations. In contrast to `base::subset`, `fsubset` allows multiple comma-separated select arguments after the subset argument, and it also preserves all attributes of subsetted columns: ```r # Returning only value-added data after 1990 fsubset(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:GOV) %>% head(2) # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # 1 BWA 1991 303.1157 2646.950 472.6488 160.6079 580.0876 806.7509 232.7884 432.6965 1073.263 # 2 BWA 1992 333.4364 2690.939 537.4274 178.4532 678.7320 725.2577 285.1403 517.2141 1234.012 # Same thing fsubset(GGDC10S, Variable == "VA" & Year > 1990, -(Regioncode:Variable), -(OTH:SUM)) %>% head(2) # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # 1 BWA 1991 303.1157 2646.950 472.6488 160.6079 580.0876 806.7509 232.7884 432.6965 1073.263 # 2 BWA 1992 333.4364 2690.939 537.4274 178.4532 678.7320 725.2577 285.1403 517.2141 1234.012 ``` It is also possible to use standard evaluation with `fsubset`, but for these purposes the function `ss` exists as a fast and more secure alternative to `[.data.frame`: ```r ss(GGDC10S, 1:2, 6:16) # or fsubset(GGDC10S, 1:2, 6:16), but not recommended. # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA NA NA ss(GGDC10S, -(1:2), c("AGR","MIN")) %>% head(2) # AGR MIN # 1 NA NA # 2 NA NA ``` Thanks to the *data.table* C code and optimized R code, `fsubset` is very fast. ```r microbenchmark(base = subset(GGDC10S, Variable == "VA" & Year > 1990, AGR:SUM), collapse = fsubset(GGDC10S, Variable == "VA" & Year > 1990, AGR:SUM)) # Unit: microseconds # expr min lq mean median uq max neval # base 150.839 156.5585 199.63105 160.3510 166.993 3778.191 100 # collapse 45.715 49.0975 51.55545 50.9015 52.357 82.861 100 microbenchmark(GGDC10S[1:10, 1:10], ss(GGDC10S, 1:10, 1:10)) # Unit: microseconds # expr min lq mean median uq max neval # GGDC10S[1:10, 1:10] 36.367 36.982 38.14599 37.515 38.294 76.219 100 # ss(GGDC10S, 1:10, 1:10) 1.886 2.050 2.30666 2.214 2.419 8.405 100 ``` like `base::subset`, `fsubset` is S3 generic with methods for vectors, matrices and data frames. For certain classes such as factors, `fsubset.default` also improves upon `[`, but the largest improvements are with the data frame method. ### 2.3 Reordering Rows and Columns `roworder` is a fast analogue to `dplyr::arrange`. The syntax is inspired by `data.table::setorder`, so that negative variable names indicate descending sort. ```r roworder(GGDC10S, -Variable, Country) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ARG LAM Latin America VA 1950 5.887857e-07 0 3.53443e-06 # 2 ARG LAM Latin America VA 1951 9.165327e-07 0 4.77277e-06 microbenchmark(collapse = collapse::roworder(GGDC10S, -Variable, Country), dplyr = dplyr::arrange(GGDC10S, desc(Variable), Country)) # Unit: microseconds # expr min lq mean median uq max neval # collapse 113.406 152.151 176.7567 165.722 183.0855 538.330 100 # dplyr 1240.168 1299.372 1618.5869 1384.755 1507.8160 8350.552 100 ``` In contrast to `data.table::setorder`, `roworder` creates a copy of the data frame (unless data are already sorted). If this copy is not required, `data.table::setorder` is faster. The function `roworderv` is a standard evaluation analogue to `roworder`: ```r # Same as above roworderv(GGDC10S, c("Variable", "Country"), decreasing = c(TRUE, FALSE)) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ARG LAM Latin America VA 1950 5.887857e-07 0 3.53443e-06 # 2 ARG LAM Latin America VA 1951 9.165327e-07 0 4.77277e-06 ``` With `roworderv`, it is also possible to move or exchange rows in a data frame: ```r # If length(neworder) < fnrow(data), the default (pos = "front") brings rows to the front roworderv(GGDC10S, neworder = which(GGDC10S$Country == "GHA")) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 GHA SSA Sub-saharan Africa VA 1960 0.03576160 0.005103683 0.01744687 # 2 GHA SSA Sub-saharan Africa VA 1961 0.03823049 0.005456030 0.01865136 # pos = "end" brings rows to the end roworderv(GGDC10S, neworder = which(GGDC10S$Country == "BWA"), pos = "end") %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ETH SSA Sub-saharan Africa VA 1960 NA NA NA # 2 ETH SSA Sub-saharan Africa VA 1961 4495.614 11.86979 109.616 # pos = "exchange" arranges selected rows in the order they are passed, without affecting other rows roworderv(GGDC10S, neworder = with(GGDC10S, c(which(Country == "GHA"), which(Country == "BWA"))), pos = "exchange") %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 GHA SSA Sub-saharan Africa VA 1960 0.03576160 0.005103683 0.01744687 # 2 GHA SSA Sub-saharan Africa VA 1961 0.03823049 0.005456030 0.01865136 ``` Similarly, the pair `colorder` / `colorderv` facilitates efficient reordering of columns in a data frame. These functions not require a deep copy of the data and are very fast. To reorder columns by reference, see also `data.table::setcolorder`. ```r # The default is again pos = "front" which brings selected columns to the front / left colorder(GGDC10S, Variable, Country, Year) %>% head(2) # Variable Country Year Regioncode Region AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # 1 VA BWA 1960 SSA Sub-saharan Africa NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA 1961 SSA Sub-saharan Africa NA NA NA NA NA NA NA NA NA NA NA ``` ### 2.4 Transforming and Computing New Columns `ftransform` is an improved version of `base::transform` for data frames and lists. `ftransform` can be used to compute new columns or modify and delete existing columns, and always returns the entire data frame. ```r ftransform(GGDC10S, AGR_perc = AGR / SUM * 100, # Computing Agricultural percentage Year = as.integer(Year), # Coercing Year to integer AGR = NULL) %>% tail(2) # Deleting column AGR # Country Regioncode Region Variable Year MIN MAN PU # 5026 EGY MENA Middle East and North Africa EMP 2011 27.56394 2373.814 317.9979 # 5027 EGY MENA Middle East and North Africa EMP 2012 24.78083 2348.434 324.9332 # CON WRT TRA FIRE GOV OTH SUM AGR_perc # 5026 2795.264 3020.236 2048.335 814.7403 5635.522 NA 22219.39 23.33961 # 5027 2931.196 3109.522 2065.004 832.4770 5735.623 NA 22532.56 22.90281 # Computing scalar results replicates them ftransform(GGDC10S, MIN_mean = fmean(MIN), Intercept = 1) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 5185.919 27.56394 2373.814 # 5027 EGY MENA Middle East and North Africa EMP 2012 5160.590 24.78083 2348.434 # PU CON WRT TRA FIRE GOV OTH SUM MIN_mean Intercept # 5026 317.9979 2795.264 3020.236 2048.335 814.7403 5635.522 NA 22219.39 1867909 1 # 5027 324.9332 2931.196 3109.522 2065.004 832.4770 5735.623 NA 22532.56 1867909 1 ``` The modification `ftransformv` exists to transform specific columns using a function: ```r # Apply the log to columns 6-16 GGDC10S %>% ftransformv(6:16, log) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 8.553702 3.316508 7.772253 # 5027 EGY MENA Middle East and North Africa EMP 2012 8.548806 3.210070 7.761504 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845 NA 10.00872 # 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452 NA 10.02272 # Convert data to percentage terms GGDC10S %>% ftransformv(6:16, `*`, 100/SUM) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 23.33961 0.1240535 10.68352 # 5027 EGY MENA Middle East and North Africa EMP 2012 22.90281 0.1099779 10.42240 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 1.431173 12.58029 13.59279 9.218680 3.666798 25.36308 NA 100 # 5027 1.442061 13.00871 13.80013 9.164534 3.694551 25.45482 NA 100 # Apply log to numeric columns GGDC10S %>% ftransformv(is.numeric, log) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 7.606387 8.553702 3.316508 7.772253 # 5027 EGY MENA Middle East and North Africa EMP 7.606885 8.548806 3.210070 7.761504 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845 NA 10.00872 # 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452 NA 10.02272 ``` Instead of passing comma-separated `column = value` expressions, it is also possible to bulk-process data with `fransform` by passing a single list of expressions (such as a data frame). This is useful for more complex transformations involving multiple steps: ```r # Same as above, but also replacing any generated infinite values with NA GGDC10S %>% ftransform(num_vars(.) %>% lapply(log) %>% replace_Inf) %>% tail(2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 7.606387 8.553702 3.316508 7.772253 # 5027 EGY MENA Middle East and North Africa EMP 7.606885 8.548806 3.210070 7.761504 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845 NA 10.00872 # 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452 NA 10.02272 ``` This mode of usage toggles automatic column matching and replacement. Non-matching columns are added to the data frame. Apart from to `ftransform`, the function `settransform(v)` can be used to change the input data frame by reference: ```r # Computing a new column and deleting some others by reference settransform(GGDC10S, FIRE_MAN = FIRE / MAN, Regioncode = NULL, Region = NULL) tail(GGDC10S, 2) # Country Variable Year AGR MIN MAN PU CON WRT TRA FIRE # 5026 EGY EMP 2011 5185.919 27.56394 2373.814 317.9979 2795.264 3020.236 2048.335 814.7403 # 5027 EGY EMP 2012 5160.590 24.78083 2348.434 324.9332 2931.196 3109.522 2065.004 832.4770 # GOV OTH SUM FIRE_MAN # 5026 5635.522 NA 22219.39 0.3432200 # 5027 5735.623 NA 22532.56 0.3544817 rm(GGDC10S) # Bulk-processing the data into percentage terms settransformv(GGDC10S, 6:16, `*`, 100/SUM) tail(GGDC10S, 2) # Country Regioncode Region Variable Year AGR MIN MAN # 5026 EGY MENA Middle East and North Africa EMP 2011 23.33961 0.1240535 10.68352 # 5027 EGY MENA Middle East and North Africa EMP 2012 22.90281 0.1099779 10.42240 # PU CON WRT TRA FIRE GOV OTH SUM # 5026 1.431173 12.58029 13.59279 9.218680 3.666798 25.36308 NA 100 # 5027 1.442061 13.00871 13.80013 9.164534 3.694551 25.45482 NA 100 # Same thing via replacement ftransform(GGDC10S) <- fselect(GGDC10S, AGR:SUM) %>% lapply(`*`, 100/.$SUM) # Or using double pipes GGDC10S %<>% ftransformv(6:16, `*`, 100/SUM) rm(GGDC10S) ``` Another convenient addition is provided by the function `fcompute`, which can be used to compute new columns in a data frame environment and returns the computed columns in a new data frame: ```r fcompute(GGDC10S, AGR_perc = AGR / SUM * 100, FIRE_MAN = FIRE / MAN) %>% tail(2) # AGR_perc FIRE_MAN # 5026 23.33961 0.3432200 # 5027 22.90281 0.3544817 ``` For more complex tasks see `?ftransform`. ### 2.5 Adding and Binding Columns For cases where multiple columns are computed and need to be added to a data frame (regardless of whether names are duplicated or not), *collapse* introduces the predicate `add_vars`. Together with `add_vars`, the function `add_stub` is useful to add a prefix (default) or postfix to computed variables keeping the variable names unique: ```r # Efficient adding logged versions of some variables add_vars(wlddev) <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.") head(wlddev, 2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # POP log10.PCGDP log10.LIFEEX log10.GINI log10.ODA log10.POP # 1 8996973 NA 1.511161 NA 8.067331 6.954096 # 2 9169410 NA 1.518014 NA 8.365638 6.962341 rm(wlddev) ``` By default `add_vars` appends a data frame towards the (right) end, but it can also replace columns in front or at other positions in the data frame: ```r add_vars(wlddev, "front") <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.") head(wlddev, 2) # log10.PCGDP log10.LIFEEX log10.GINI log10.ODA log10.POP country iso3c date year decade # 1 NA 1.511161 NA 8.067331 6.954096 Afghanistan AFG 1961-01-01 1960 1960 # 2 NA 1.518014 NA 8.365638 6.962341 Afghanistan AFG 1962-01-01 1961 1960 # region income OECD PCGDP LIFEEX GINI ODA POP # 1 South Asia Low income FALSE NA 32.446 NA 116769997 8996973 # 2 South Asia Low income FALSE NA 32.962 NA 232080002 9169410 rm(wlddev) add_vars(wlddev, c(10L,12L,14L,16L,18L)) <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.") head(wlddev, 2) # country iso3c date year decade region income OECD PCGDP log10.PCGDP LIFEEX # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA NA 32.446 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA NA 32.962 # log10.LIFEEX GINI log10.GINI ODA log10.ODA POP log10.POP # 1 1.511161 NA NA 116769997 8.067331 8996973 6.954096 # 2 1.518014 NA NA 232080002 8.365638 9169410 6.962341 rm(wlddev) ``` `add_vars` can also be used without replacement, where it serves as a more efficient version of `cbind.data.frame`, with the difference that the data structure and attributes of the first argument are preserved: ```r add_vars(wlddev, get_vars(wlddev, 9:13) %>% lapply(log) %>% add_stub("log."), get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.")) %>% head(2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # POP log.PCGDP log.LIFEEX log.GINI log.ODA log.POP log10.PCGDP log10.LIFEEX log10.GINI # 1 8996973 NA 3.479577 NA 18.57572 16.01240 NA 1.511161 NA # 2 9169410 NA 3.495355 NA 19.26259 16.03138 NA 1.518014 NA # log10.ODA log10.POP # 1 8.067331 6.954096 # 2 8.365638 6.962341 add_vars(wlddev, get_vars(wlddev, 9:13) %>% lapply(log) %>% add_stub("log."), get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10."), pos = c(10L,13L,16L,19L,22L,11L,14L,17L,20L,23L)) %>% head(2) # country iso3c date year decade region income OECD PCGDP log.PCGDP log10.PCGDP # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA NA NA # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA NA NA # LIFEEX log.LIFEEX log10.LIFEEX GINI log.GINI log10.GINI ODA log.ODA log10.ODA POP # 1 32.446 3.479577 1.511161 NA NA NA 116769997 18.57572 8.067331 8996973 # 2 32.962 3.495355 1.518014 NA NA NA 232080002 19.26259 8.365638 9169410 # log.POP log10.POP # 1 16.01240 6.954096 # 2 16.03138 6.962341 identical(cbind(wlddev, wlddev), add_vars(wlddev, wlddev)) # [1] TRUE microbenchmark(cbind(wlddev, wlddev), add_vars(wlddev, wlddev)) # Unit: microseconds # expr min lq mean median uq max neval # cbind(wlddev, wlddev) 13.694 14.1040 15.72760 14.391 14.7600 57.072 100 # add_vars(wlddev, wlddev) 3.280 3.6285 4.13567 3.813 4.0385 19.352 100 ``` ### 2.6 Renaming Columns `frename` is a fast substitute for `dplyr::rename`: ```r frename(GGDC10S, AGR = Agriculture, MIN = Mining) %>% head(2) # Country Regioncode Region Variable Year Agriculture Mining MAN PU CON WRT TRA FIRE # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA # GOV OTH SUM # 1 NA NA NA # 2 NA NA NA frename(GGDC10S, tolower) %>% head(2) # country regioncode region variable year agr min man pu con wrt tra fire gov oth sum # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA NA NA NA frename(GGDC10S, tolower, cols = .c(AGR, MIN)) %>% head(2) # Country Regioncode Region Variable Year agr min MAN PU CON WRT TRA FIRE GOV OTH SUM # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA NA NA NA ``` The function `setrename` does this by reference: ```r setrename(GGDC10S, AGR = Agriculture, MIN = Mining) head(GGDC10S, 2) # Country Regioncode Region Variable Year Agriculture Mining MAN PU CON WRT TRA FIRE # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA # GOV OTH SUM # 1 NA NA NA # 2 NA NA NA setrename(GGDC10S, Agriculture = AGR, Mining = MIN) rm(GGDC10S) ``` Both functions are not limited to data frames but can be applied to any R object with a 'names' attribute. ### 2.7 Using Shortcuts The most frequently required among the functions introduced above can be abbreviated as follows: `fselect -> slt`, `fsubset -> sbt`, `ftransform(v) -> tfm(v)`, `settransform(v) -> settfm(v)`, `get_vars -> gv`, `num_vars -> nv`, `add_vars -> av`. This was done to make it possible to write faster and more parsimonious code, but is recommended only for personally kept scripts. A lazy person may also decide to code everything using shortcuts and then do ctrl+F replacement with the long names on the finished script. ### 2.8 Missing Values / Rows The function `na_omit` is a much faster alternative to `stats::na.omit` for vectors, matrices and data frames. By default the 'na.action' attribute containing the removed cases is omitted, but it can be added with the option `na.attr = TRUE`. Like `fsubset`, `na_omit` preserves all column attributes as well as attributes of the data frame itself. ```r microbenchmark(na_omit(wlddev, na.attr = TRUE), na.omit(wlddev)) # Unit: microseconds # expr min lq mean median uq max neval # na_omit(wlddev, na.attr = TRUE) 60.393 69.208 84.8126 79.9910 88.683 419.881 100 # na.omit(wlddev) 745.790 856.449 1721.5457 940.6015 1005.177 56344.414 100 ``` Another added feature is the removal of cases missing on certain columns only: ```r na_omit(wlddev, cols = .c(PCGDP, LIFEEX)) %>% head(2) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI # 1 Afghanistan AFG 2003-01-01 2002 2000 South Asia Low income FALSE 330.3036 56.784 NA # 2 Afghanistan AFG 2004-01-01 2003 2000 South Asia Low income FALSE 343.0809 57.271 NA # ODA POP # 1 1790479980 22600770 # 2 1972890015 23680871 # only removing missing data from numeric columns -> same and slightly faster than na_omit(wlddev) na_omit(wlddev, cols = is.numeric) %>% head(2) # country iso3c date year decade region income OECD PCGDP # 1 Albania ALB 1997-01-01 1996 1990 Europe & Central Asia Upper middle income FALSE 1869.866 # 2 Albania ALB 2003-01-01 2002 2000 Europe & Central Asia Upper middle income FALSE 2572.721 # LIFEEX GINI ODA POP # 1 72.495 27.0 294089996 3168033 # 2 74.579 31.7 453309998 3051010 ``` For atomic vectors the function `na_rm` also exists which is 2x faster than `x[!is.na(x)]`. Both `na_omit` and `na_rm` return their argument if no missing cases were found. The existence of missing cases can be checked using `missing_cases`, which is also considerably faster than `complete.cases` for data frames. There is also a function `na_insert` to randomly insert missing values into vectors, matrices and data frames. The default is `na_insert(X, prop = 0.1)` so that 10% of values are randomly set to missing. Finally, a function `allNA` provides the much needed opposite of `anyNA` for atomic vectors. ### 2.9 Unique Values / Rows Similar to `na_omit`, the function `funique` is a much faster alternative to `base::unique` for atomic vectors and data frames. Like most *collapse* functions it also seeks to preserve attributes. ```r funique(GGDC10S$Variable) # Unique values in order of appearance # [1] "VA" "EMP" # attr(,"label") # [1] "Variable" # attr(,"format.stata") # [1] "%9s" funique(GGDC10S$Variable, sort = TRUE) # Sorted unique values # [1] "EMP" "VA" # attr(,"label") # [1] "Variable" # attr(,"format.stata") # [1] "%9s" # If all values/rows are unique, the original data is returned (no copy) identical(funique(GGDC10S), GGDC10S) # [1] TRUE # Can remove duplicate rows by a subset of columns funique(GGDC10S, cols = .c(Country, Variable)) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA # 2 BWA SSA Sub-saharan Africa EMP 1960 NA NA NA funique(GGDC10S, cols = .c(Country, Variable), sort = TRUE) %>% ss(1:2, 1:8) # Country Regioncode Region Variable Year AGR MIN MAN # 1 ARG LAM Latin America EMP 1950 1.799565e+03 32.71936 1.603249e+03 # 2 ARG LAM Latin America VA 1950 5.887857e-07 0.00000 3.534430e-06 ``` ### 2.10 Recoding and Replacing Values With `recode_num`, `recode_char`, `replace_NA`, `replace_Inf` and `replace_outliers`, *collapse* also introduces a set of functions to efficiently recode and replace numeric and character values in matrix-like objects (vectors, matrices, arrays, data frames, lists of atomic objects). When called on a data frame, `recode_num`, `replace_Inf` and `replace_outliers` will skip non-numeric columns, and `recode_char` skips non-character columns, whereas `replace_NA` replaces missing values in all columns. ```r # Efficient replacing missing values with 0 microbenchmark(replace_NA(GGDC10S, 0)) # Unit: microseconds # expr min lq mean median uq max neval # replace_NA(GGDC10S, 0) 109.757 141.163 203.4982 151.0235 163.0775 4579.085 100 # Adding log-transformed sectoral data: Some NaN and Inf values generated add_vars(GGDC10S, 6:16*2-5) <- fselect(GGDC10S, AGR:SUM) %>% lapply(log) %>% replace_Inf %>% add_stub("log.") head(GGDC10S, 2) # Country Regioncode Region Variable Year AGR log.AGR MIN log.MIN MAN log.MAN PU log.PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA NA NA NA NA # CON log.CON WRT log.WRT TRA log.TRA FIRE log.FIRE GOV log.GOV OTH log.OTH SUM log.SUM # 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA rm(GGDC10S) ``` `recode_num` and `recode_char` follow the syntax of `dplyr::recode` and provide more or less the same functionality except that they can efficiently be applied to matrices and data frames, and that `recode_char` allows for regular expression matching implemented via `base::grepl`: ```r month.name # [1] "January" "February" "March" "April" "May" "June" "July" "August" # [9] "September" "October" "November" "December" recode_char(month.name, ber = "C", "^J" = "A", default = "B", regex = TRUE) # [1] "A" "B" "B" "B" "B" "A" "A" "B" "B" "B" "B" "B" ``` The perhaps most interesting function in this ensemble is `replace_outliers`, which replaces values falling outside a 1- or 2-sided numeric threshold or outside a certain number of column- standard deviations with a value (default is `NA`). ```r # replace all values below 2 and above 100 with NA replace_outliers(mtcars, c(2, 100)) %>% head(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 21.0 6 NA NA 3.90 2.620 16.46 NA NA 4 4 # Mazda RX4 Wag 21.0 6 NA NA 3.90 2.875 17.02 NA NA 4 4 # Datsun 710 22.8 4 NA 93 3.85 2.320 18.61 NA NA 4 NA # replace all value smaller than 2 with NA replace_outliers(mtcars, 2, single.limit = "min") %>% head(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 NA NA 4 4 # Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 NA NA 4 4 # Datsun 710 22.8 4 108 93 3.85 2.320 18.61 NA NA 4 NA # replace all value larger than 100 with NA replace_outliers(mtcars, 100, single.limit = "max") %>% head(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 21.0 6 NA NA 3.90 2.620 16.46 0 1 4 4 # Mazda RX4 Wag 21.0 6 NA NA 3.90 2.875 17.02 0 1 4 4 # Datsun 710 22.8 4 NA 93 3.85 2.320 18.61 1 1 4 1 # replace all values above or below 3 column-standard-deviations from the column-mean with NA replace_outliers(mtcars, 3) %>% tail(3) # mpg cyl disp hp drat wt qsec vs am gear carb # Ferrari Dino 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 # Maserati Bora 15.0 8 301 335 3.54 3.57 14.6 0 1 5 NA # Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 ``` ## 3. Quick Data Object Conversions Apart from code employed for manipulation of data and the actual statistical computations performed, frequently used data object conversions with base functions like `as.data.frame`, `as.matrix` or `as.factor` have a significant share in slowing down R code. Optimally code would be written without such conversions, but sometimes they are necessary and thus *collapse* provides a set of functions (`qDF`, `qDT`, `qTBL`, `qM`, `qF`, `mrtl` and `mctl`) to speed these conversions up quite a bit. These functions are fast because they are non-generic and dispatch different objects internally, perform critical steps in C++, and, when passed lists of objects, they only check the length of the first column. `qDF`, `qDT` and `qTBL` efficiently convert vectors, matrices, higher-dimensional arrays and suitable lists to data.frame, *data.table* and *tibble* respectively. ```r str(EuStockMarkets) # Time-Series [1:1860, 1:4] from 1991 to 1999: 1629 1614 1607 1621 1618 ... # - attr(*, "dimnames")=List of 2 # ..$ : NULL # ..$ : chr [1:4] "DAX" "SMI" "CAC" "FTSE" # Efficient Conversion of data frames and matrices to data.table microbenchmark(qDT(wlddev), qDT(EuStockMarkets), as.data.table(wlddev), as.data.frame(EuStockMarkets)) # Unit: microseconds # expr min lq mean median uq max neval # qDT(wlddev) 3.075 3.608 4.21439 3.8950 4.2640 12.546 100 # qDT(EuStockMarkets) 6.765 8.733 12.09254 12.5050 14.1040 30.217 100 # as.data.table(wlddev) 64.206 122.180 253.75023 143.2745 173.1635 3653.346 100 # as.data.frame(EuStockMarkets) 64.247 70.971 82.25174 79.6835 84.8700 339.849 100 # Converting a time series to data.frame head(qDF(AirPassengers)) # AirPassengers # 1 112 # 2 118 # 3 132 # 4 129 # 5 121 # 6 135 ``` By default these functions drop all unnecessary attributes from matrices or lists / data frames in the conversion, but this can be changed using the `keep.attr = TRUE` argument. A useful additional feature of `qDF` and `qDT` is the `row.names.col` argument, enabling the saving of names / row-names in a column when converting from vector, matrix, array or data frame: ```r # This saves the row-names in a column named 'car' head(qDT(mtcars, "car")) # car mpg cyl disp hp drat wt qsec vs am gear carb # # 1: Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 # 2: Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 # 3: Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 # 4: Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 # 5: Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 # 6: Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 N_distinct <- fndistinct(GGDC10S) N_distinct # Country Regioncode Region Variable Year AGR MIN MAN PU # 43 6 6 2 67 4353 4224 4353 4237 # CON WRT TRA FIRE GOV OTH SUM # 4339 4344 4334 4349 3470 4238 4364 # Converting a vector to data.frame, saving names head(qDF(N_distinct, "variable")) # variable N_distinct # 1 Country 43 # 2 Regioncode 6 # 3 Region 6 # 4 Variable 2 # 5 Year 67 # 6 AGR 4353 ``` For the conversion of matrices to list there are also the programmers functions `mrtl` and `mctl`, which row- or column- wise convert a matrix into a plain list, data.frame or *data.table*. ```r # This converts the matrix to a list of 1860 row-vectors of length 4. microbenchmark(mrtl(EuStockMarkets)) # Unit: microseconds # expr min lq mean median uq max neval # mrtl(EuStockMarkets) 139.728 151.4335 168.5522 155.841 164.6355 399.791 100 ``` For the reverse operation, `qM` converts vectors, higher-dimensional arrays, data frames and suitable lists to matrix. ```r # Note: kit::psum is the most efficient way to do this microbenchmark(rowSums(qM(mtcars)), rowSums(mtcars), kit::psum(mtcars)) # Unit: nanoseconds # expr min lq mean median uq max neval # rowSums(qM(mtcars)) 5699 7933.5 12702.62 9122.5 11131.5 316315 100 # rowSums(mtcars) 38868 41697.0 48003.21 44157.0 51496.0 95981 100 # kit::psum(mtcars) 574 820.0 510905.51 943.0 1107.0 50967797 100 ``` At last, `qF` converts vectors to factor and is quite a bit faster than `as.factor`: ```r # Converting from character str(wlddev$country) # chr [1:13176] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ... # - attr(*, "label")= chr "Country Name" fndistinct(wlddev$country) # [1] 216 microbenchmark(qF(wlddev$country), as.factor(wlddev$country)) # Unit: microseconds # expr min lq mean median uq max neval # qF(wlddev$country) 70.192 71.1965 73.77376 72.160 74.784 107.256 100 # as.factor(wlddev$country) 263.794 275.7660 282.21530 278.841 283.761 360.431 100 # Converting from numeric str(wlddev$PCGDP) # num [1:13176] NA NA NA NA NA NA NA NA NA NA ... # - attr(*, "label")= chr "GDP per capita (constant 2010 US$)" fndistinct(wlddev$PCGDP) # [1] 9470 microbenchmark(qF(wlddev$PCGDP), as.factor(wlddev$PCGDP)) # Unit: microseconds # expr min lq mean median uq max neval # qF(wlddev$PCGDP) 445.096 474.944 531.221 488.146 509.0765 3930.342 100 # as.factor(wlddev$PCGDP) 9374.240 9546.132 9823.477 9633.196 9727.5165 13732.499 100 ``` ## 4. Advanced Statistical Programming Having introduced some of the more basic *collapse* data manipulation infrastructure in the preceding chapters, this chapter introduces some of the packages core functionality for programming with data. ### 4.1 Fast (Grouped, Weighted) Statistical Functions A key feature of *collapse* is it's broad set of *Fast Statistical Functions* (`fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct`), which are able to tangibly speed-up column-wise, grouped and weighted statistical computations on vectors, matrices or data frames. The basic syntax common to all of these functions is: ```r FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE) ``` where `x` is a vector, matrix or data frame, `g` takes groups supplied as vector, factor, list of vectors or *GRP* object, and `w` takes a weight vector (supported by `fsum, fprod, fmean, fmedian, fmode, fnth, fvar` and `fsd`). `TRA` can be used to transform `x` using the computed statistics and one of 10 available transformations (`"replace_fill", "replace", "-", "-+", "/", "%", "+", "*", "%%, "-%%"`, discussed in section 6.3). `na.rm` efficiently skips missing values during the computation and is `TRUE` by default. `use.g.names = TRUE` generates new row-names from the unique groups supplied to `g`, and `drop = TRUE` returns a vector when performing simple (non-grouped) computations on matrix or data frame columns. With that in mind, let's start with some simple examples. To calculate simple column-wise means, it is sufficient to type: ```r fmean(mtcars$mpg) # Vector # [1] 20.09062 fmean(mtcars) # mpg cyl disp hp drat wt qsec vs am # 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750 0.437500 0.406250 # gear carb # 3.687500 2.812500 fmean(mtcars, drop = FALSE) # This returns a 1-row data-frame # mpg cyl disp hp drat wt qsec vs am gear carb # 1 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125 m <- qM(mtcars) # Generate matrix fmean(m) # mpg cyl disp hp drat wt qsec vs am # 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750 0.437500 0.406250 # gear carb # 3.687500 2.812500 fmean(m, drop = FALSE) # This returns a 1-row matrix # mpg cyl disp hp drat wt qsec vs am gear carb # [1,] 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125 ``` Note that separate methods for vectors, matrices and data frames are written in C++, thus no conversions are needed and computations on matrices and data frames are equally efficient. If we had a weight vector, weighted statistics are easily computed: ```r weights <- abs(rnorm(fnrow(mtcars))) # fnrow is a bit faster for data frames fmean(mtcars, w = weights) # Weighted mean # mpg cyl disp hp drat wt qsec vs # 20.8090714 5.8876772 214.9587303 142.8931066 3.7558442 3.0941361 17.8201120 0.5025300 # am gear carb # 0.4918237 3.8375831 2.7771280 fmedian(mtcars, w = weights) # Weighted median # mpg cyl disp hp drat wt qsec vs am gear carb # 21.00 6.00 160.00 113.00 3.77 3.17 18.00 1.00 0.00 4.00 2.00 fsd(mtcars, w = weights) # Frequency-weighted standard deviation # mpg cyl disp hp drat wt qsec vs # 5.8799568 1.8416865 122.4274353 74.9459089 0.5413624 0.9689836 1.8516418 0.5089768 # am gear carb # 0.5089152 0.7557877 1.6744062 fmode(mtcars, w = weights) # Weighted statistical mode (i.e. the value with the largest sum of weights) # mpg cyl disp hp drat wt qsec vs am gear carb # 21.40 4.00 121.00 109.00 3.92 2.78 18.60 1.00 0.00 4.00 2.00 ``` Fast grouped statistics can be calculated by simply passing grouping vectors or lists of grouping vectors to the fast functions: ```r fmean(mtcars, mtcars$cyl) # mpg cyl disp hp drat wt qsec vs am gear carb # 4 26.66364 4 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909 0.7272727 4.090909 1.545455 # 6 19.74286 6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 0.4285714 3.857143 3.428571 # 8 15.10000 8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000 0.1428571 3.285714 3.500000 fmean(mtcars, fselect(mtcars, cyl, vs, am)) # mpg cyl disp hp drat wt qsec vs am gear carb # 4.0.1 26.00000 4 120.3000 91.00000 4.430000 2.140000 16.70000 0 1 5.000000 2.000000 # 4.1.0 22.90000 4 135.8667 84.66667 3.770000 2.935000 20.97000 1 0 3.666667 1.666667 # 4.1.1 28.37143 4 89.8000 80.57143 4.148571 2.028286 18.70000 1 1 4.142857 1.428571 # 6.0.1 20.56667 6 155.0000 131.66667 3.806667 2.755000 16.32667 0 1 4.333333 4.666667 # 6.1.0 19.12500 6 204.5500 115.25000 3.420000 3.388750 19.21500 1 0 3.500000 2.500000 # 8.0.0 15.05000 8 357.6167 194.16667 3.120833 4.104083 17.14250 0 0 3.000000 3.083333 # 8.0.1 15.40000 8 326.0000 299.50000 3.880000 3.370000 14.55000 0 1 5.000000 6.000000 # Getting column indices ind <- fselect(mtcars, cyl, vs, am, return = "indices") fmean(get_vars(mtcars, -ind), get_vars(mtcars, ind)) # mpg disp hp drat wt qsec gear carb # 4.0.1 26.00000 120.3000 91.00000 4.430000 2.140000 16.70000 5.000000 2.000000 # 4.1.0 22.90000 135.8667 84.66667 3.770000 2.935000 20.97000 3.666667 1.666667 # 4.1.1 28.37143 89.8000 80.57143 4.148571 2.028286 18.70000 4.142857 1.428571 # 6.0.1 20.56667 155.0000 131.66667 3.806667 2.755000 16.32667 4.333333 4.666667 # 6.1.0 19.12500 204.5500 115.25000 3.420000 3.388750 19.21500 3.500000 2.500000 # 8.0.0 15.05000 357.6167 194.16667 3.120833 4.104083 17.14250 3.000000 3.083333 # 8.0.1 15.40000 326.0000 299.50000 3.880000 3.370000 14.55000 5.000000 6.000000 ``` ### 4.2 Factors, Grouping Objects and Grouped Data Frames This programming can becomes more efficient when passing *factors* or *grouping objects* to the `g` argument, as otherwise vectors and lists of vectors are grouped internally. ```r # This creates a factor, na.exclude = FALSE attaches a class 'na.included' f <- qF(mtcars$cyl, na.exclude = FALSE) # The 'na.included' attribute skips a missing value check on this factor attributes(f) # $levels # [1] "4" "6" "8" # # $class # [1] "factor" "na.included" # Saving data without grouping columns dat <- get_vars(mtcars, -ind) # Grouped standard-deviation fsd(dat, f) # mpg disp hp drat wt qsec gear carb # 4 4.509828 26.87159 20.93453 0.3654711 0.5695637 1.682445 0.5393599 0.522233 # 6 1.453567 41.56246 24.26049 0.4760552 0.3563455 1.706866 0.6900656 1.812654 # 8 2.560048 67.77132 50.97689 0.3723618 0.7594047 1.196014 0.7262730 1.556624 # Without option na.exclude = FALSE, anyNA needs to be called on the factor (noticeable on larger data). f2 <- qF(mtcars$cyl) microbenchmark(fsd(dat, f), fsd(dat, f2)) # Unit: microseconds # expr min lq mean median uq max neval # fsd(dat, f) 6.027 6.232 6.51613 6.4165 6.601 11.152 100 # fsd(dat, f2) 6.150 6.396 6.77771 6.5190 6.683 25.830 100 ``` For programming purposes *GRP* objects are preferable over factors because they never require further checks and they provide additional information about the grouping (such as group sizes and the original unique values in each group). The `GRP` function creates grouping objects (of class *GRP*) from vectors or lists of columns. Grouping is done very efficiently via radix ordering in C (using the `radixorder` function): ```r # This creates a 'GRP' object. g <- GRP(mtcars, ~ cyl + vs + am) # Using the formula interface, could also use c("cyl","vs","am") or c(2,8:9) str(g) # Class 'GRP' hidden list of 9 # $ N.groups : int 7 # $ group.id : int [1:32] 4 4 3 5 6 5 6 2 2 5 ... # $ group.sizes : int [1:7] 1 3 7 3 4 12 2 # $ groups :'data.frame': 7 obs. of 3 variables: # ..$ cyl: num [1:7] 4 4 4 6 6 8 8 # ..$ vs : num [1:7] 0 1 1 0 1 0 0 # ..$ am : num [1:7] 1 0 1 1 0 0 1 # $ group.vars : chr [1:3] "cyl" "vs" "am" # $ ordered : Named logi [1:2] TRUE FALSE # ..- attr(*, "names")= chr [1:2] "ordered" "sorted" # $ order : int [1:32] 27 8 9 21 3 18 19 20 26 28 ... # ..- attr(*, "starts")= int [1:7] 1 2 5 12 15 19 31 # ..- attr(*, "maxgrpn")= int 12 # ..- attr(*, "sorted")= logi FALSE # $ group.starts: int [1:7] 27 8 3 1 4 5 29 # $ call : language GRP.default(X = mtcars, by = ~cyl + vs + am) ``` The first three elements of this object provide information about the number of groups, the group to which each row belongs, and the size of each group. A print and a plot method provide further information about the grouping: ```r print(g) # collapse grouping object of length 32 with 7 ordered groups # # Call: GRP.default(X = mtcars, by = ~cyl + vs + am), X is unsorted # # Distribution of group sizes: # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1.000 2.500 3.000 4.571 5.500 12.000 # # Groups with sizes: # 4.0.1 4.1.0 4.1.1 6.0.1 6.1.0 8.0.0 8.0.1 # 1 3 7 3 4 12 2 plot(g) ```
plot of chunk GRPplot

plot of chunk GRPplot

The important elements of the *GRP* object are directly handed down to the compiled C++ code of the statistical functions, making repeated computations over the same groups very efficient. ```r fsd(dat, g) # mpg disp hp drat wt qsec gear carb # 4.0.1 NA NA NA NA NA NA NA NA # 4.1.0 1.4525839 13.969371 19.65536 0.1300000 0.4075230 1.67143651 0.5773503 0.5773503 # 4.1.1 4.7577005 18.802128 24.14441 0.3783926 0.4400840 0.94546285 0.3779645 0.5345225 # 6.0.1 0.7505553 8.660254 37.52777 0.1616581 0.1281601 0.76872188 0.5773503 1.1547005 # 6.1.0 1.6317169 44.742634 9.17878 0.5919459 0.1162164 0.81590441 0.5773503 1.7320508 # 8.0.0 2.7743959 71.823494 33.35984 0.2302749 0.7683069 0.80164745 0.0000000 0.9003366 # 8.0.1 0.5656854 35.355339 50.20458 0.4808326 0.2828427 0.07071068 0.0000000 2.8284271 # Grouped computation with and without prior grouping microbenchmark(fsd(dat, g), fsd(dat, get_vars(mtcars, ind))) # Unit: microseconds # expr min lq mean median uq max neval # fsd(dat, g) 19.065 21.1765 23.68447 22.9600 24.9690 38.909 100 # fsd(dat, get_vars(mtcars, ind)) 31.611 35.2600 44.56823 37.3715 41.1845 327.877 100 ``` Yet another possibility is creating a grouped data frame (class *grouped_df*). This can either be done using `dplyr::group_by`, which creates a grouped tibble and requires a conversion of the grouping object using `GRP.grouped_df`, or using the more efficient `fgroup_by` provided in *collapse*: ```r gmtcars <- fgroup_by(mtcars, cyl, vs, am) # fgroup_by() can also be abbreviated as gby() fmedian(gmtcars) # cyl vs am mpg disp hp drat wt qsec gear carb # 1 4 0 1 26.00 120.3 91.0 4.430 2.140 16.70 5.0 2.0 # 2 4 1 0 22.80 140.8 95.0 3.700 3.150 20.01 4.0 2.0 # 3 4 1 1 30.40 79.0 66.0 4.080 1.935 18.61 4.0 1.0 # 4 6 0 1 21.00 160.0 110.0 3.900 2.770 16.46 4.0 4.0 # 5 6 1 0 18.65 196.3 116.5 3.500 3.440 19.17 3.5 2.5 # 6 8 0 0 15.20 355.0 180.0 3.075 3.810 17.35 3.0 3.0 # 7 8 0 1 15.40 326.0 299.5 3.880 3.370 14.55 5.0 6.0 head(fgroup_vars(gmtcars)) # cyl vs am # Mazda RX4 6 0 1 # Mazda RX4 Wag 6 0 1 # Datsun 710 4 1 1 # Hornet 4 Drive 6 1 0 # Hornet Sportabout 8 0 0 # Valiant 6 1 0 fmedian(gmtcars, keep.group_vars = FALSE) # mpg disp hp drat wt qsec gear carb # 1 26.00 120.3 91.0 4.430 2.140 16.70 5.0 2.0 # 2 22.80 140.8 95.0 3.700 3.150 20.01 4.0 2.0 # 3 30.40 79.0 66.0 4.080 1.935 18.61 4.0 1.0 # 4 21.00 160.0 110.0 3.900 2.770 16.46 4.0 4.0 # 5 18.65 196.3 116.5 3.500 3.440 19.17 3.5 2.5 # 6 15.20 355.0 180.0 3.075 3.810 17.35 3.0 3.0 # 7 15.40 326.0 299.5 3.880 3.370 14.55 5.0 6.0 ``` Now suppose we wanted to create a new dataset which contains the *mean*, *sd*, *min* and *max* of the variables *mpg* and *disp* grouped by *cyl*, *vs* and *am*: ```r # Standard evaluation dat <- get_vars(mtcars, c("mpg", "disp")) add_vars(g[["groups"]], add_stub(fmean(dat, g, use.g.names = FALSE), "mean_"), add_stub(fsd(dat, g, use.g.names = FALSE), "sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_")) # cyl vs am mean_mpg mean_disp sd_mpg sd_disp min_mpg min_disp max_mpg max_disp # 1 4 0 1 26.00000 120.3000 NA NA 26.0 120.3 26.0 120.3 # 2 4 1 0 22.90000 135.8667 1.4525839 13.969371 21.5 120.1 24.4 146.7 # 3 4 1 1 28.37143 89.8000 4.7577005 18.802128 21.4 71.1 33.9 121.0 # 4 6 0 1 20.56667 155.0000 0.7505553 8.660254 19.7 145.0 21.0 160.0 # 5 6 1 0 19.12500 204.5500 1.6317169 44.742634 17.8 167.6 21.4 258.0 # 6 8 0 0 15.05000 357.6167 2.7743959 71.823494 10.4 275.8 19.2 472.0 # 7 8 0 1 15.40000 326.0000 0.5656854 35.355339 15.0 301.0 15.8 351.0 # Non-Standard evaluation fgroup_by(mtcars, cyl, vs, am) %>% fselect(mpg, disp) %>% { add_vars(fgroup_vars(., "unique"), fmean(., keep.group_vars = FALSE) %>% add_stub("mean_"), fsd(., keep.group_vars = FALSE) %>% add_stub("sd_"), fmin(., keep.group_vars = FALSE) %>% add_stub("min_"), fmax(., keep.group_vars = FALSE) %>% add_stub("max_")) } # cyl vs am mean_mpg mean_disp sd_mpg sd_disp min_mpg min_disp max_mpg max_disp # 1 4 0 1 26.00000 120.3000 NA NA 26.0 120.3 26.0 120.3 # 2 4 1 0 22.90000 135.8667 1.4525839 13.969371 21.5 120.1 24.4 146.7 # 3 4 1 1 28.37143 89.8000 4.7577005 18.802128 21.4 71.1 33.9 121.0 # 4 6 0 1 20.56667 155.0000 0.7505553 8.660254 19.7 145.0 21.0 160.0 # 5 6 1 0 19.12500 204.5500 1.6317169 44.742634 17.8 167.6 21.4 258.0 # 6 8 0 0 15.05000 357.6167 2.7743959 71.823494 10.4 275.8 19.2 472.0 # 7 8 0 1 15.40000 326.0000 0.5656854 35.355339 15.0 301.0 15.8 351.0 ``` ### 4.3 Grouped and Weighted Computations We could also calculate groupwise-frequency weighted means and standard-deviations using a weight vector^[You may wonder why with weights the standard-deviations in the group '4.0.1' are `0` while they were `NA` without weights. This stirs from the fact that group '4.0.1' only has one observation, and in the Bessel-corrected estimate of the variance there is a `n - 1` in the denominator which becomes `0` if `n = 1` and division by `0` becomes `NA` in this case (`fvar` was designed that way to match the behavior or `stats::var`). In the weighted version the denominator is `sum(w) - 1`, and if `sum(w)` is not 1, then the denominator is not `0`. The standard-deviation however is still `0` because the sum of squares in the numerator is `0`. In other words this means that in a weighted aggregation singleton-groups are not treated like singleton groups unless the corresponding weight is `1`.]. ```r # Grouped and weighted mean and sd and grouped min and max add_vars(g[["groups"]], add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"), add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_")) # cyl vs am w_mean_mpg w_mean_disp w_sd_mpg w_sd_disp min_mpg min_disp max_mpg max_disp # 1 4 0 1 26.00000 120.30000 0.0000000 0.00000 26.0 120.3 26.0 120.3 # 2 4 1 0 23.08757 136.62639 1.5306081 14.19412 21.5 120.1 24.4 146.7 # 3 4 1 1 27.34688 92.65353 4.8723476 21.44005 21.4 71.1 33.9 121.0 # 4 6 0 1 20.22046 151.00525 0.9349875 10.78832 19.7 145.0 21.0 160.0 # 5 6 1 0 19.52725 204.86661 1.7612203 50.80083 17.8 167.6 21.4 258.0 # 6 8 0 0 15.12267 359.56902 2.2886672 70.60949 10.4 275.8 19.2 472.0 # 7 8 0 1 15.51023 332.88960 0.4758366 29.73979 15.0 301.0 15.8 351.0 # Binding and reordering columns in a single step: Add columns in specific positions add_vars(g[["groups"]], add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"), add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_"), pos = c(4,8,5,9,6,10,7,11)) # cyl vs am w_mean_mpg w_sd_mpg min_mpg max_mpg w_mean_disp w_sd_disp min_disp max_disp # 1 4 0 1 26.00000 0.0000000 26.0 26.0 120.30000 0.00000 120.3 120.3 # 2 4 1 0 23.08757 1.5306081 21.5 24.4 136.62639 14.19412 120.1 146.7 # 3 4 1 1 27.34688 4.8723476 21.4 33.9 92.65353 21.44005 71.1 121.0 # 4 6 0 1 20.22046 0.9349875 19.7 21.0 151.00525 10.78832 145.0 160.0 # 5 6 1 0 19.52725 1.7612203 17.8 21.4 204.86661 50.80083 167.6 258.0 # 6 8 0 0 15.12267 2.2886672 10.4 19.2 359.56902 70.60949 275.8 472.0 # 7 8 0 1 15.51023 0.4758366 15.0 15.8 332.88960 29.73979 301.0 351.0 ``` The R overhead of this kind of programming in standard-evaluation is very low: ```r microbenchmark(call = add_vars(g[["groups"]], add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"), add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"), add_stub(fmin(dat, g, use.g.names = FALSE), "min_"), add_stub(fmax(dat, g, use.g.names = FALSE), "max_"))) # Unit: microseconds # expr min lq mean median uq max neval # call 27.388 28.1875 29.56428 28.823 29.356 97.58 100 ``` ### 4.4 Transformations Using the `TRA` Argument As a final layer of added complexity, we could utilize the `TRA` argument to generate groupwise-weighted demeaned, and scaled data, with additional columns giving the group-minimum and maximum values: ```r head(add_vars(get_vars(mtcars, ind), add_stub(fmean(dat, g, weights, "-"), "w_demean_"), # This calculates weighted group means and uses them to demean the data add_stub(fsd(dat, g, weights, "/"), "w_scale_"), # This calculates weighted group sd's and uses them to scale the data add_stub(fmin(dat, g, "replace"), "min_"), # This replaces all observations by their group-minimum add_stub(fmax(dat, g, "replace"), "max_"))) # This replaces all observations by their group-maximum # cyl vs am w_demean_mpg w_demean_disp w_scale_mpg w_scale_disp min_mpg min_disp # Mazda RX4 6 0 1 0.7795446 8.9947455 22.460194 14.830858 19.7 145.0 # Mazda RX4 Wag 6 0 1 0.7795446 8.9947455 22.460194 14.830858 19.7 145.0 # Datsun 710 4 1 1 -4.5468786 15.3464694 4.679469 5.037303 21.4 71.1 # Hornet 4 Drive 6 1 0 1.8727485 53.1333901 12.150666 5.078657 17.8 167.6 # Hornet Sportabout 8 0 0 3.5773335 0.4309751 8.170694 5.098465 10.4 275.8 # Valiant 6 1 0 -1.4272515 20.1333901 10.276966 4.429062 17.8 167.6 # max_mpg max_disp # Mazda RX4 21.0 160 # Mazda RX4 Wag 21.0 160 # Datsun 710 33.9 121 # Hornet 4 Drive 21.4 258 # Hornet Sportabout 19.2 472 # Valiant 21.4 258 ``` It is also possible to `add_vars<-` to `mtcars` itself. The default option would add these columns at the end, but we could also specify positions: ```r # This defines the positions where we want to add these columns pos <- as.integer(c(2,8,3,9,4,10,5,11)) add_vars(mtcars, pos) <- c(add_stub(fmean(dat, g, weights, "-"), "w_demean_"), add_stub(fsd(dat, g, weights, "/"), "w_scale_"), add_stub(fmin(dat, g, "replace"), "min_"), add_stub(fmax(dat, g, "replace"), "max_")) head(mtcars) # mpg w_demean_mpg w_scale_mpg min_mpg max_mpg cyl disp w_demean_disp w_scale_disp # Mazda RX4 21.0 0.7795446 22.460194 19.7 21.0 6 160 8.9947455 14.830858 # Mazda RX4 Wag 21.0 0.7795446 22.460194 19.7 21.0 6 160 8.9947455 14.830858 # Datsun 710 22.8 -4.5468786 4.679469 21.4 33.9 4 108 15.3464694 5.037303 # Hornet 4 Drive 21.4 1.8727485 12.150666 17.8 21.4 6 258 53.1333901 5.078657 # Hornet Sportabout 18.7 3.5773335 8.170694 10.4 19.2 8 360 0.4309751 5.098465 # Valiant 18.1 -1.4272515 10.276966 17.8 21.4 6 225 20.1333901 4.429062 # min_disp max_disp hp drat wt qsec vs am gear carb # Mazda RX4 145.0 160 110 3.90 2.620 16.46 0 1 4 4 # Mazda RX4 Wag 145.0 160 110 3.90 2.875 17.02 0 1 4 4 # Datsun 710 71.1 121 93 3.85 2.320 18.61 1 1 4 1 # Hornet 4 Drive 167.6 258 110 3.08 3.215 19.44 1 0 3 1 # Hornet Sportabout 275.8 472 175 3.15 3.440 17.02 0 0 3 2 # Valiant 167.6 258 105 2.76 3.460 20.22 1 0 3 1 rm(mtcars) ``` Together with `ftransform`, things can become arbitrarily more complex: ```r # 2 different grouped and weighted computations (mutate operations) performed in one call settransform(mtcars, carb_dwmed_cyl = fmedian(carb, cyl, weights, "-"), carb_wsd_vs_am = fsd(carb, list(vs, am), weights, "replace")) # Multivariate settransform(mtcars, c(fmedian(list(carb_dwmed_cyl = carb, mpg_dwmed_cyl = mpg), cyl, weights, "-"), fsd(list(carb_wsd_vs_am = carb, mpg_wsd_vs_am = mpg), list(vs, am), weights, "replace"))) # Nested (Computing the weighted 3rd quartile of mpg, grouped by cyl and carb being greater than it's weighted median, grouped by vs) settransform(mtcars, mpg_gwQ3_cyl = fnth(mpg, 0.75, list(cyl, carb > fmedian(carb, vs, weights, 1L)), weights, 1L)) head(mtcars) # mpg cyl disp hp drat wt qsec vs am gear carb carb_dwmed_cyl carb_wsd_vs_am # Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0 2.1897386 # Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0 2.1897386 # Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 -1 0.5286617 # Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 -3 1.3161442 # Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 -2 0.9674070 # Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 -3 1.3161442 # mpg_dwmed_cyl mpg_wsd_vs_am mpg_gwQ3_cyl # Mazda RX4 1.3 4.567045 21.40000 # Mazda RX4 Wag 1.3 4.567045 21.40000 # Datsun 710 -3.2 4.872348 27.95146 # Hornet 4 Drive 1.7 2.444036 21.40000 # Hornet Sportabout 3.5 2.288667 16.21512 # Valiant -1.6 2.444036 21.40000 rm(mtcars) ``` With the full set of 14 *Fast Statistical Functions*, and additional vector- valued functions and operators (`fscale/STD, fbetween/B, fwithin/W, fhdbetween/HDB, fhdwithin/HDW, flag/L/F, fdiff/D, fgrowth/G`) discussed later, *collapse* provides extraordinary new possibilities for highly complex and efficient statistical programming in R. Computation speeds generally exceed those of packages like *dplyr* or *data.table*, sometimes by orders of magnitude. Column-wise matrix computations are also highly efficient and comparable to packages like `matrixStats` and base R functions like `colSums`. In particular the ability to perform grouped and weighted computations on matrices is new to R and very useful for complex computations (such as aggregating input-output tables etc.). Note that the above examples provide merely suggestions for use of these features and are focused on programming with data frames (as the predicates `get_vars`, `add_vars` etc. are made for data frames). Equivalently efficient code could be written using vectors or matrices. ## 5. Advanced Data Aggregation The grouped statistical programming introduced in the previous section is the fastest and most customizable way of dealing with many data transformation problems. Some tasks such as multivariate aggregations on a single data frame are however so common that this demanded for a more compact solution which efficiently integrates multiple computational steps. For such purposes `collap` was created as a fast multi-purpose aggregation command designed to solve complex aggregation problems efficiently and with a minimum of coding. `collap` performs optimally together with the *Fast Statistical Functions*, but will also work with other functions. To perform the above aggregation with `collap`, one would simply need to type: ```r collap(mtcars, mpg + disp ~ cyl + vs + am, list(fmean, fsd, fmin, fmax), w = weights, keep.col.order = FALSE) # cyl vs am weights fmean.mpg fmean.disp fsd.mpg fsd.disp fmin.mpg fmin.disp fmax.mpg fmax.disp # 1 4 0 1 1.416054 26.00000 120.30000 0.0000000 0.00000 26.0 120.3 26.0 120.3 # 2 4 1 0 3.232217 23.08757 136.62639 1.5306081 14.19412 21.5 120.1 24.4 146.7 # 3 4 1 1 7.893395 27.34688 92.65353 4.8723476 21.44005 21.4 71.1 33.9 121.0 # 4 6 0 1 1.866025 20.22046 151.00525 0.9349875 10.78832 19.7 145.0 21.0 160.0 # 5 6 1 0 3.237565 19.52725 204.86661 1.7612203 50.80083 17.8 167.6 21.4 258.0 # 6 8 0 0 8.054777 15.12267 359.56902 2.2886672 70.60949 10.4 275.8 19.2 472.0 # 7 8 0 1 2.881698 15.51023 332.88960 0.4758366 29.73979 15.0 301.0 15.8 351.0 ``` `collap` here also saves the sum of the weights in a column. The original idea behind `collap` is however better demonstrated with a different dataset. Consider the *World Development Dataset* `wlddev` introduced in section 1: ```r head(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # 3 Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA 112839996 # 4 Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA 237720001 # 5 Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA 295920013 # 6 Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA 341839996 # POP # 1 8996973 # 2 9169410 # 3 9351441 # 4 9543205 # 5 9744781 # 6 9956320 ``` Suppose we would like to aggregate this data by country and decade, but keep all that categorical information. With `collap` this is extremely simple: ```r collap(wlddev, ~ iso3c + decade) %>% head # country iso3c date year decade region income OECD PCGDP # 1 Aruba ABW 1961-01-01 1964.5 1960 Latin America & Caribbean High income FALSE NA # 2 Aruba ABW 1971-01-01 1974.5 1970 Latin America & Caribbean High income FALSE NA # 3 Aruba ABW 1981-01-01 1984.5 1980 Latin America & Caribbean High income FALSE 20267.30 # 4 Aruba ABW 1991-01-01 1994.5 1990 Latin America & Caribbean High income FALSE 26611.44 # 5 Aruba ABW 2001-01-01 2004.5 2000 Latin America & Caribbean High income FALSE 26664.99 # 6 Aruba ABW 2011-01-01 2014.5 2010 Latin America & Caribbean High income FALSE 24926.17 # LIFEEX GINI ODA POP # 1 67.2592 NA NA 56984.3 # 2 70.6372 NA NA 60080.6 # 3 73.0153 NA 49745999 61665.9 # 4 73.6069 NA 29971000 76946.7 # 5 74.2660 NA 23292000 97939.7 # 6 75.6546 NA NA 103994.6 ``` Note that the columns of the data are in the original order and also retain all their attributes. To understand this result let us briefly examine the syntax of `collap`: ```r collap(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort.row = TRUE, parallel = FALSE, mc.cores = 1L, return = c("wide","list","long","long_dupl"), give.names = "auto") # , ... ``` It is clear that `X` is the data and `by` supplies the grouping information, which can be a one- or two-sided formula or alternatively grouping vectors, factors, lists and `GRP` objects (like the *Fast Statistical Functions*). Then `FUN` provides the function(s) applied only to numeric variables in `X` and defaults to `fmean`, while `catFUN` provides the function(s) applied only to categorical variables in `X` and defaults to `fmode`^[I.e. the most frequent value. By default a first-mode is computed.]. `keep.col.order = TRUE` specifies that the data is to be returned with the original column-order. Thus in the above example it was sufficient to supply `X` and `by` and `collap` did the rest for us. Suppose we only want to aggregate 4 series in this dataset. ```r # Same as collap(wlddev, ~ iso3c + decade, cols = 9:12) collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + decade) %>% head # iso3c decade PCGDP LIFEEX GINI ODA # 1 ABW 1960 NA 67.2592 NA NA # 2 ABW 1970 NA 70.6372 NA NA # 3 ABW 1980 20267.30 73.0153 NA 49745999 # 4 ABW 1990 26611.44 73.6069 NA 29971000 # 5 ABW 2000 26664.99 74.2660 NA 23292000 # 6 ABW 2010 24926.17 75.6546 NA NA ``` As before we could use multiple functions by putting them in a named or unnamed list^[If the list is unnamed, `collap` uses `all.vars(substitute(list(FUN1, FUN2, ...)))` to get the function names. Alternatively it is also possible to pass a character vector of function names.]: ```r collap(wlddev, ~ iso3c + decade, list(fmean, fmedian, fsd), cols = 9:12) %>% head # iso3c decade fmean.PCGDP fmedian.PCGDP fsd.PCGDP fmean.LIFEEX fmedian.LIFEEX fsd.LIFEEX # 1 ABW 1960 NA NA NA 67.2592 67.2740 1.03046880 # 2 ABW 1970 NA NA NA 70.6372 70.6760 0.96813702 # 3 ABW 1980 20267.30 20280.81 4037.2695 73.0153 73.1260 0.38203753 # 4 ABW 1990 26611.44 26684.19 592.7919 73.6069 73.6100 0.08549392 # 5 ABW 2000 26664.99 26992.71 1164.6741 74.2660 74.2215 0.37614448 # 6 ABW 2010 24926.17 24599.50 1159.7344 75.6546 75.6540 0.42974339 # fmean.GINI fmedian.GINI fsd.GINI fmean.ODA fmedian.ODA fsd.ODA # 1 NA NA NA NA NA NA # 2 NA NA NA NA NA NA # 3 NA NA NA 49745999 39259998 23573651 # 4 NA NA NA 29971000 35155001 17270808 # 5 NA NA NA 23292000 16219999 42969712 # 6 NA NA NA NA NA NA ``` With multiple functions, we could also request `collap` to return a long-format of the data: ```r collap(wlddev, ~ iso3c + decade, list(fmean, fmedian, fsd), cols = 9:12, return = "long") %>% head # Function iso3c decade PCGDP LIFEEX GINI ODA # 1 fmean ABW 1960 NA 67.2592 NA NA # 2 fmean ABW 1970 NA 70.6372 NA NA # 3 fmean ABW 1980 20267.30 73.0153 NA 49745999 # 4 fmean ABW 1990 26611.44 73.6069 NA 29971000 # 5 fmean ABW 2000 26664.99 74.2660 NA 23292000 # 6 fmean ABW 2010 24926.17 75.6546 NA NA ``` A very important feature of `collap` to highlight at this point is the `custom` argument, which allows the user to circumvent the broad distinction into numeric and categorical data (and the associated `FUN` and `catFUN` arguments) and specify exactly which columns to aggregate using which functions: ```r collap(wlddev, ~ iso3c + decade, custom = list(fmean = 9:10, fmedian = 11:12, ffirst = c("country","region","income"), flast = c("year","date"), fmode = "OECD")) %>% head # country iso3c date year decade region income OECD PCGDP LIFEEX # 1 Aruba ABW 1970-01-01 1969 1960 Latin America & Caribbean High income FALSE NA 67.2592 # 2 Aruba ABW 1980-01-01 1979 1970 Latin America & Caribbean High income FALSE NA 70.6372 # 3 Aruba ABW 1990-01-01 1989 1980 Latin America & Caribbean High income FALSE 20267.30 73.0153 # 4 Aruba ABW 2000-01-01 1999 1990 Latin America & Caribbean High income FALSE 26611.44 73.6069 # 5 Aruba ABW 2010-01-01 2009 2000 Latin America & Caribbean High income FALSE 26664.99 74.2660 # 6 Aruba ABW 2020-01-01 2019 2010 Latin America & Caribbean High income FALSE 24926.17 75.6546 # GINI ODA # 1 NA NA # 2 NA NA # 3 NA 39259998 # 4 NA 35155001 # 5 NA 16219999 # 6 NA NA ``` Since *collapse* 1.5.0, it is also possible to perform weighted aggregations and append functions with `_uw` to yield an unweighted computation: ```r # This aggregates using weighted mean and mode, and unweighted median, first and last value collap(wlddev, ~ region + year, w = ~ POP, custom = list(fmean = 9:10, fmedian_uw = 11:12, ffirst_uw = c("country","region","income"), flast_uw = c("year","date"), fmode = "OECD"), keep.w = FALSE) %>% head # country date year year region region income # 1 American Samoa 1961-01-01 1960 1960 East Asia & Pacific East Asia & Pacific Upper middle income # 2 American Samoa 1962-01-01 1961 1961 East Asia & Pacific East Asia & Pacific Upper middle income # 3 American Samoa 1963-01-01 1962 1962 East Asia & Pacific East Asia & Pacific Upper middle income # 4 American Samoa 1964-01-01 1963 1963 East Asia & Pacific East Asia & Pacific Upper middle income # 5 American Samoa 1965-01-01 1964 1964 East Asia & Pacific East Asia & Pacific Upper middle income # 6 American Samoa 1966-01-01 1965 1965 East Asia & Pacific East Asia & Pacific Upper middle income # OECD PCGDP LIFEEX GINI ODA # 1 FALSE 1313.760 48.20996 NA 37295000 # 2 FALSE 1395.228 48.73451 NA 26630001 # 3 FALSE 1463.441 49.39960 NA 100040001 # 4 FALSE 1540.621 50.37529 NA 40389999 # 5 FALSE 1665.385 51.57330 NA 70059998 # 6 FALSE 1733.757 52.94426 NA 91545002 ``` Next to `collap`, the functions `collapv` provides a programmers alternative allowing grouping and weighting columns to be passed using column names or indices, and the function `collapg` operates on grouped data frames. ## 6. Data Transformations While `ftransform` and the `TRA` argument to the *Fast Statistical Functions* introduced earlier already provide a significant scope for transforming data, this section introduces some further specialized functions covering some advanced and common use cases, sometimes with greater efficiency. ### 6.1 Row and Column Arithmetic When dealing with matrices or matrix-like datasets, we often have to perform operations applying a vector to the rows or columns of the data object in question. The mathematical operations of base R (`+`, `-`, `*`, `/`, `%%`, ...) operate column-wise and are quite inefficient when used with data frames. Even in matrix code it is challenging to efficiently apply a vector `v` to the rows of a matrix `X`. For this reason *collapse* introduces a set of efficient row- and column-wise arithmetic operators for matrix-like objects: `%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`, `%cr%`, `%c+%`, `%c-%`, `%c*%`, `%c/%`. ```r X <- qM(fselect(GGDC10S, AGR:SUM)) v <- fsum(X) v # AGR MIN MAN PU CON WRT TRA FIRE # 11026503529 8134743462 24120129864 1461548426 7845957666 14776120961 6416089614 7216735147 # GOV OTH SUM # 5962229565 7155872037 94115930269 # This divides the rows of X by v all_obj_equal(t(t(X) / v), X / outer(rep(1, nrow(X)), v), X %r/% v) # [1] TRUE # Base R vs. efficient base R vs. collapse microbenchmark(t(t(X) / v), X / outer(rep(1, nrow(X)), v), X %r/% v) # Unit: microseconds # expr min lq mean median uq max neval # t(t(X)/v) 194.873 234.3560 358.13500 284.6425 298.0905 3244.043 100 # X/outer(rep(1, nrow(X)), v) 55.555 83.5580 101.45696 108.5885 113.5495 137.637 100 # X %r/% v 11.685 37.2075 83.87657 63.2630 72.7135 2744.663 100 # Data frame row operations dat <- fselect(GGDC10S, AGR:SUM) microbenchmark(dat %r/% v, # Same thing using mapply and collapse::copyAttrib copyAttrib(mapply(`/`, dat, v, SIMPLIFY = FALSE), dat)) # Unit: microseconds # expr min lq mean median uq # dat %r/% v 15.129 37.187 143.03998 40.139 46.5555 # copyAttrib(mapply(`/`, dat, v, SIMPLIFY = FALSE), dat) 59.204 64.124 71.98944 66.379 76.7315 # max neval # 5089.289 100 # 110.003 100 # Data frame column arithmetic is very slow microbenchmark(dat / dat$SUM, dat / 5, dat / dat, dat %c/% dat$SUM, dat %c/% 5, dat %c/% dat) # Unit: microseconds # expr min lq mean median uq max neval # dat/dat$SUM 1275.264 1385.2260 1636.95411 1434.2825 1551.1940 5150.092 100 # dat/5 276.012 295.4870 1181.83361 306.2905 327.4260 83176.208 100 # dat/dat 295.323 320.1075 417.10858 330.5010 361.7020 3807.711 100 # dat %c/% dat$SUM 20.295 45.4075 120.01479 48.5235 55.1245 3520.096 100 # dat %c/% 5 17.179 44.5260 87.22996 48.7285 64.1035 3489.223 100 # dat %c/% dat 20.459 46.2685 93.95601 51.0040 67.5065 3795.903 100 ``` ### 6.1 Row and Column Data Apply `dapply` is an efficient apply command for matrices and data frames. It can be used to apply functions to rows or (by default) columns of matrices or data frames and by default returns objects of the same type and with the same attributes unless the result of each computation is a scalar. ```r dapply(mtcars, median) # mpg cyl disp hp drat wt qsec vs am gear carb # 19.200 6.000 196.300 123.000 3.695 3.325 17.710 0.000 0.000 4.000 2.000 dapply(mtcars, median, MARGIN = 1) # Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout # 4.000 4.000 4.000 3.215 3.440 # Valiant Duster 360 Merc 240D Merc 230 Merc 280 # 3.460 4.000 4.000 4.000 4.000 # Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood # 4.000 4.070 3.730 3.780 5.250 # Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla # 5.424 5.345 4.000 4.000 4.000 # Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird # 3.700 3.520 3.435 4.000 3.845 # Fiat X1-9 Porsche 914-2 Lotus Europa Ford Pantera L Ferrari Dino # 4.000 4.430 4.000 5.000 6.000 # Maserati Bora Volvo 142E # 8.000 4.000 dapply(mtcars, quantile) # mpg cyl disp hp drat wt qsec vs am gear carb # 0% 10.400 4 71.100 52.0 2.760 1.51300 14.5000 0 0 3 1 # 25% 15.425 4 120.825 96.5 3.080 2.58125 16.8925 0 0 3 2 # 50% 19.200 6 196.300 123.0 3.695 3.32500 17.7100 0 0 4 2 # 75% 22.800 8 326.000 180.0 3.920 3.61000 18.9000 1 1 4 4 # 100% 33.900 8 472.000 335.0 4.930 5.42400 22.9000 1 1 5 8 dapply(mtcars, quantile, MARGIN = 1) %>% head # 0% 25% 50% 75% 100% # Mazda RX4 0 3.2600 4.000 18.730 160 # Mazda RX4 Wag 0 3.3875 4.000 19.010 160 # Datsun 710 1 1.6600 4.000 20.705 108 # Hornet 4 Drive 0 2.0000 3.215 20.420 258 # Hornet Sportabout 0 2.5000 3.440 17.860 360 # Valiant 0 1.8800 3.460 19.160 225 # This is considerably more efficient than log(mtcars): dapply(mtcars, log) %>% head # mpg cyl disp hp drat wt qsec vs am # Mazda RX4 3.044522 1.791759 5.075174 4.700480 1.360977 0.9631743 2.800933 -Inf 0 # Mazda RX4 Wag 3.044522 1.791759 5.075174 4.700480 1.360977 1.0560527 2.834389 -Inf 0 # Datsun 710 3.126761 1.386294 4.682131 4.532599 1.348073 0.8415672 2.923699 0 0 # Hornet 4 Drive 3.063391 1.791759 5.552960 4.700480 1.124930 1.1678274 2.967333 0 -Inf # Hornet Sportabout 2.928524 2.079442 5.886104 5.164786 1.147402 1.2354715 2.834389 -Inf -Inf # Valiant 2.895912 1.791759 5.416100 4.653960 1.015231 1.2412686 3.006672 0 -Inf # gear carb # Mazda RX4 1.386294 1.3862944 # Mazda RX4 Wag 1.386294 1.3862944 # Datsun 710 1.386294 0.0000000 # Hornet 4 Drive 1.098612 0.0000000 # Hornet Sportabout 1.098612 0.6931472 # Valiant 1.098612 0.0000000 ``` `dapply` preserves the data structure: ```r is.data.frame(dapply(mtcars, log)) # [1] TRUE is.matrix(dapply(m, log)) # [1] TRUE ``` It also delivers seamless conversions, i.e. you can apply functions to data frame rows or columns and return a matrix and vice-versa: ```r identical(log(m), dapply(mtcars, log, return = "matrix")) # [1] TRUE identical(dapply(mtcars, log), dapply(m, log, return = "data.frame")) # [1] TRUE ``` On data frames, the performance is comparable to `lapply`, and `dapply` is about 2x faster than `apply` for row- or column-wise operations on matrices. The most important feature is that it does not change the structure of the data at all: all attributes are preserved unless the result is a scalar and `drop = TRUE` (the default). ### 6.2 Split-Apply-Combine Computing `BY` is a generalization of `dapply` for grouped computations using functions that are not part of the *Fast Statistical Functions* introduced above. It fundamentally is a re-implementation of the `lapply(split(x, g), FUN, ...)` computing paradigm in base R, but substantially faster and more versatile than functions like `tapply`, `by` or `aggregate`. It is however not faster than *dplyr* or *data.table* for larger grouped computations on data frames requiring split-apply-combine computing. `BY` is S3 generic with methods for vector, matrix, data.frame and grouped_df^[`BY.grouped_df` is probably only useful together with the `expand.wide = TRUE` argument which *dplyr* does not have, because otherwise *dplyr*'s `summarise` and `mutate` are substantially faster on larger data.]. It also supports the same grouping (`g`) inputs as the *Fast Statistical Functions* (grouping vectors, factors, lists or *GRP* objects). Below the use of `BY` is demonstrated on vectors matrices and data frames. ```r v <- iris$Sepal.Length # A numeric vector f <- iris$Species # A factor ## default vector method BY(v, f, sum) # Sum by species, about 2x faster than tapply(v, f, sum) # setosa versicolor virginica # 250.3 296.8 329.4 BY(v, f, quantile) # Species quantiles: by default stacked # setosa.0% setosa.25% setosa.50% setosa.75% setosa.100% versicolor.0% # 4.300 4.800 5.000 5.200 5.800 4.900 # versicolor.25% versicolor.50% versicolor.75% versicolor.100% virginica.0% virginica.25% # 5.600 5.900 6.300 7.000 4.900 6.225 # virginica.50% virginica.75% virginica.100% # 6.500 6.900 7.900 BY(v, f, quantile, expand.wide = TRUE) # Wide format # 0% 25% 50% 75% 100% # setosa 4.3 4.800 5.0 5.2 5.8 # versicolor 4.9 5.600 5.9 6.3 7.0 # virginica 4.9 6.225 6.5 6.9 7.9 ## matrix method miris <- qM(num_vars(iris)) BY(miris, f, sum) # Also returns as matrix # Sepal.Length Sepal.Width Petal.Length Petal.Width # setosa 250.3 171.4 73.1 12.3 # versicolor 296.8 138.5 213.0 66.3 # virginica 329.4 148.7 277.6 101.3 BY(miris, f, quantile) %>% head # Sepal.Length Sepal.Width Petal.Length Petal.Width # setosa.0% 4.3 2.300 1.000 0.1 # setosa.25% 4.8 3.200 1.400 0.2 # setosa.50% 5.0 3.400 1.500 0.2 # setosa.75% 5.2 3.675 1.575 0.3 # setosa.100% 5.8 4.400 1.900 0.6 # versicolor.0% 4.9 2.000 3.000 1.0 BY(miris, f, quantile, expand.wide = TRUE)[, 1:5] # Sepal.Length.0% Sepal.Length.25% Sepal.Length.50% Sepal.Length.75% Sepal.Length.100% # setosa 4.3 4.800 5.0 5.2 5.8 # versicolor 4.9 5.600 5.9 6.3 7.0 # virginica 4.9 6.225 6.5 6.9 7.9 BY(miris, f, quantile, expand.wide = TRUE, return = "list")[1:2] # list of matrices # $Sepal.Length # 0% 25% 50% 75% 100% # setosa 4.3 4.800 5.0 5.2 5.8 # versicolor 4.9 5.600 5.9 6.3 7.0 # virginica 4.9 6.225 6.5 6.9 7.9 # # $Sepal.Width # 0% 25% 50% 75% 100% # setosa 2.3 3.200 3.4 3.675 4.4 # versicolor 2.0 2.525 2.8 3.000 3.4 # virginica 2.2 2.800 3.0 3.175 3.8 ## data.frame method BY(num_vars(iris), f, sum) # Also returns a data.frame etc... # Sepal.Length Sepal.Width Petal.Length Petal.Width # setosa 250.3 171.4 73.1 12.3 # versicolor 296.8 138.5 213.0 66.3 # virginica 329.4 148.7 277.6 101.3 ## Conversions identical(BY(num_vars(iris), f, sum), BY(miris, f, sum, return = "data.frame")) # [1] TRUE identical(BY(miris, f, sum), BY(num_vars(iris), f, sum, return = "matrix")) # [1] TRUE ``` ### 6.3 Fast (Grouped) Replacing and Sweeping-out Statistics `TRA` is an S3 generic that efficiently transforms data by either replacing data values with supplied statistics or sweeping the statistics out of the data. It is the workhorse function behind the row-wise arithmetic operators introduced above (`%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`), and generalizes those to grouped operations. The 10 operations supported by `TRA` are: * 1 - "replace_fill" : replace and overwrite missing values (same as dplyr::mutate) * 2 - "replace" : replace but preserve missing values * 3 - "-" : subtract (center) * 4 - "-+" : subtract group-statistics but add average of group statistics * 5 - "/" : divide (scale) * 6 - "%" : compute percentages (divide and multiply by 100) * 7 - "+" : add * 8 - "*" : multiply * 9 - "%%" : modulus * 10 - "-%%" : subtract modulus `TRA` is also incorporated as an argument to all *Fast Statistical Functions*. Therefore it is only really necessary and advisable to use the `TRA` function if both aggregate statistics and transformed data are required, or to sweep out statistics otherwise obtained (e.g. regression or correlation coefficients etc.). The code below computes the column means of the iris-matrix obtained above, and uses them to demean that matrix. ```r # Note: All examples below generalize to vectors or data frames stats <- fmean(miris) # Saving stats # 6 identical ways of centering a matrix microbenchmark(sweep(miris, 2, stats, "-"), # base R miris - outer(rep(1, nrow(iris)), stats), TRA(miris, fmean(miris), "-"), miris %r-% fmean(miris), # The operator is actually a wrapper around TRA fmean(miris, TRA = "-"), # better for any operation if the stats are not needed fwithin(miris)) # fastest, fwithin is discussed in section 6.5 # Unit: microseconds # expr min lq mean median uq max neval # sweep(miris, 2, stats, "-") 15.457 16.2975 17.57711 17.0355 17.6915 53.505 100 # miris - outer(rep(1, nrow(iris)), stats) 4.715 5.6375 6.36812 6.0270 6.6010 21.402 100 # TRA(miris, fmean(miris), "-") 3.075 3.3210 3.98930 3.6080 4.4895 14.678 100 # miris %r-% fmean(miris) 3.362 3.8130 4.68425 4.0590 4.5305 42.066 100 # fmean(miris, TRA = "-") 2.583 2.8085 3.79496 2.9930 4.2640 29.848 100 # fwithin(miris) 3.321 3.6080 5.26768 3.8130 4.9815 78.474 100 # Simple replacing [same as fmean(miris, TRA = "replace") or fbetween(miris)] TRA(miris, fmean(miris), "replace") %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 5.843333 3.057333 3.758 1.199333 # [2,] 5.843333 3.057333 3.758 1.199333 # [3,] 5.843333 3.057333 3.758 1.199333 # Simple scaling [same as fsd(miris, TRA = "/")] TRA(miris, fsd(miris), "/") %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 6.158928 8.029986 0.7930671 0.2623854 # [2,] 5.917402 6.882845 0.7930671 0.2623854 # [3,] 5.675875 7.341701 0.7364195 0.2623854 ``` All of the above is functionality also offered by `base::sweep`, but `TRA` is significantly faster. The big advantage of `TRA` is that it also supports grouped operations: ```r # Grouped centering [same as fmean(miris, f, TRA = "-") or fwithin(m, f)] TRA(miris, fmean(miris, f), "-", f) %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 0.094 0.072 -0.062 -0.046 # [2,] -0.106 -0.428 -0.062 -0.046 # [3,] -0.306 -0.228 -0.162 -0.046 # Grouped replacing [same as fmean(m, f, TRA = "replace") or fbetween(m, f)] TRA(miris, fmean(miris, f), "replace", f) %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 5.006 3.428 1.462 0.246 # [2,] 5.006 3.428 1.462 0.246 # [3,] 5.006 3.428 1.462 0.246 # Groupwise percentages [same as fsum(m, f, TRA = "%")] TRA(miris, fsum(miris, f), "%", f) %>% head(3) # Sepal.Length Sepal.Width Petal.Length Petal.Width # [1,] 2.037555 2.042007 1.915185 1.626016 # [2,] 1.957651 1.750292 1.915185 1.626016 # [3,] 1.877747 1.866978 1.778386 1.626016 ``` As mentioned, calling the `TRA()` function does not make much sense if the same task can be performed using the *Fast Statistical Functions* or the arithmetic operators. It is however a very useful function to call for complex transformations involving grouped sweeping operations with precomputed quantities. ### 6.4 Fast Standardizing The function `fscale` can be used to efficiently standardize (i.e. scale and center) data using a numerically stable online algorithm. It's structure is the same as the *Fast Statistical Functions*. The standardization-operator `STD` also exists as a wrapper around `fscale`. The difference is that by default `STD` adds a prefix to standardized variables and also provides an enhanced method for data frames (more about operators in the next section). ```r # fscale doesn't rename columns fscale(mtcars) %>% head(2) # mpg cyl disp hp drat wt qsec vs # Mazda RX4 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.6103996 -0.7771651 -0.8680278 # Mazda RX4 Wag 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.3497853 -0.4637808 -0.8680278 # am gear carb # Mazda RX4 1.189901 0.4235542 0.7352031 # Mazda RX4 Wag 1.189901 0.4235542 0.7352031 # By default adds a prefix STD(mtcars) %>% head(2) # STD.mpg STD.cyl STD.disp STD.hp STD.drat STD.wt STD.qsec STD.vs # Mazda RX4 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.6103996 -0.7771651 -0.8680278 # Mazda RX4 Wag 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.3497853 -0.4637808 -0.8680278 # STD.am STD.gear STD.carb # Mazda RX4 1.189901 0.4235542 0.7352031 # Mazda RX4 Wag 1.189901 0.4235542 0.7352031 # See that is works STD(mtcars) %>% qsu # N Mean SD Min Max # STD.mpg 32 0 1 -1.6079 2.2913 # STD.cyl 32 0 1 -1.2249 1.0149 # STD.disp 32 -0 1 -1.2879 1.9468 # STD.hp 32 0 1 -1.381 2.7466 # STD.drat 32 -0 1 -1.5646 2.4939 # STD.wt 32 -0 1 -1.7418 2.2553 # STD.qsec 32 -0 1 -1.874 2.8268 # STD.vs 32 0 1 -0.868 1.116 # STD.am 32 -0 1 -0.8141 1.1899 # STD.gear 32 -0 1 -0.9318 1.7789 # STD.carb 32 -0 1 -1.1222 3.2117 # We can also scale and center to a different mean and standard deviation: qsu(fscale(mtcars, mean = 5, sd = 3))[, .c(Mean, SD)] %>% t # mpg cyl disp hp drat wt qsec vs am gear carb # Mean 5 5 5 5 5 5 5 5 5 5 5 # SD 3 3 3 3 3 3 3 3 3 3 3 # Or not center at all. In that case scaling is mean-preserving, in contrast to fsd(mtcars, TRA = "/") qsu(fscale(mtcars, mean = FALSE, sd = 3))[, .c(Mean, SD)] %>% t # mpg cyl disp hp drat wt qsec vs am gear carb # Mean 20.0906 6.1875 230.7219 146.6875 3.5966 3.2172 17.8487 0.4375 0.4062 3.6875 2.8125 # SD 3 3 3 3 3 3 3 3 3 3 3 ``` Scaling with `fscale / STD` can also be done groupwise and / or weighted. For example the Groningen Growth and Development Center 10-Sector Database provides annual series of value added in local currency and persons employed for 10 broad sectors in several African, Asian, and Latin American countries. ```r head(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA # 3 BWA SSA Sub-saharan Africa VA 1962 NA NA NA NA # 4 BWA SSA Sub-saharan Africa VA 1963 NA NA NA NA # 5 BWA SSA Sub-saharan Africa VA 1964 16.30154 3.494075 0.7365696 0.1043936 # 6 BWA SSA Sub-saharan Africa VA 1965 15.72700 2.495768 1.0181992 0.1350976 # CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA # 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229 # 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710 ``` If we wanted to correlate this data across countries and sectors, it needs to be standardized: ```r # Standardizing Sectors by Variable and Country STD_GGDC10S <- STD(GGDC10S, ~ Variable + Country, cols = 6:16) head(STD_GGDC10S) # Variable Country STD.AGR STD.MIN STD.MAN STD.PU STD.CON STD.WRT STD.TRA # 1 VA BWA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA # 5 VA BWA -0.7382911 -0.7165772 -0.6682536 -0.8051315 -0.6922839 -0.6032762 -0.5889923 # 6 VA BWA -0.7392424 -0.7167359 -0.6680535 -0.8050172 -0.6917529 -0.6030211 -0.5887320 # STD.FIRE STD.GOV STD.OTH STD.SUM # 1 NA NA NA NA # 2 NA NA NA NA # 3 NA NA NA NA # 4 NA NA NA NA # 5 -0.6349956 -0.6561054 -0.5959744 -0.6758663 # 6 -0.6349359 -0.6558634 -0.5957137 -0.6757768 # Correlating Standardized Value-Added across countries fsubset(STD_GGDC10S, Variable == "VA", STD.AGR:STD.SUM) %>% pwcor # STD.AGR STD.MIN STD.MAN STD.PU STD.CON STD.WRT STD.TRA STD.FIRE STD.GOV STD.OTH STD.SUM # STD.AGR 1 .88 .93 .88 .89 .90 .90 .86 .93 .88 .90 # STD.MIN .88 1 .86 .84 .85 .85 .84 .83 .88 .84 .86 # STD.MAN .93 .86 1 .95 .96 .97 .98 .95 .98 .97 .98 # STD.PU .88 .84 .95 1 .95 .96 .96 .95 .96 .96 .97 # STD.CON .89 .85 .96 .95 1 .98 .98 .97 .98 .97 .98 # STD.WRT .90 .85 .97 .96 .98 1 .99 .98 .99 .99 1.00 # STD.TRA .90 .84 .98 .96 .98 .99 1 .98 .99 .99 .99 # STD.FIRE .86 .83 .95 .95 .97 .98 .98 1 .98 .98 .98 # STD.GOV .93 .88 .98 .96 .98 .99 .99 .98 1 .99 1.00 # STD.OTH .88 .84 .97 .96 .97 .99 .99 .98 .99 1 .99 # STD.SUM .90 .86 .98 .97 .98 1.00 .99 .98 1.00 .99 1 ``` ### 6.5 Fast Centering and Averaging As a slightly faster alternative to `fmean(x, g, w, TRA = "-"/"-+")` or `fmean(x, g, w, TRA = "replace"/"replace_fill")`, `fwithin` and `fbetween` can be used to perform common (grouped, weighted) centering and averaging tasks (also known as *between*- and *within*- transformations in the language of panel data econometrics). `fbetween` / `fwithin` are faster than `fmean(..., TRA = ...)` because they don't materialize the full set of computed averages. The operators `W` and `B` also exist. ```r ## Simple centering and averaging fbetween(mtcars$mpg) %>% head # [1] 20.09062 20.09062 20.09062 20.09062 20.09062 20.09062 fwithin(mtcars$mpg) %>% head # [1] 0.909375 0.909375 2.709375 1.309375 -1.390625 -1.990625 all.equal(fbetween(mtcars) + fwithin(mtcars), mtcars) # [1] TRUE ## Groupwise centering and averaging fbetween(mtcars$mpg, mtcars$cyl) %>% head # [1] 19.74286 19.74286 26.66364 19.74286 15.10000 19.74286 fwithin(mtcars$mpg, mtcars$cyl) %>% head # [1] 1.257143 1.257143 -3.863636 1.657143 3.600000 -1.642857 all.equal(fbetween(mtcars, mtcars$cyl) + fwithin(mtcars, mtcars$cyl), mtcars) # [1] TRUE ``` To demonstrate more clearly the utility of the operators which exists for all fast transformation and time series functions, the code below implements the task of demeaning 4 series by country and saving the country-id using the within-operator `W` as opposed to `fwithin` which requires all input to be passed externally like the *Fast Statistical Functions*. ```r # Center 4 series in this dataset by country W(wlddev, ~ iso3c, cols = 9:12) %>% head # iso3c W.PCGDP W.LIFEEX W.GINI W.ODA # 1 AFG NA -16.75117 NA -1370778502 # 2 AFG NA -16.23517 NA -1255468497 # 3 AFG NA -15.72617 NA -1374708502 # 4 AFG NA -15.22617 NA -1249828497 # 5 AFG NA -14.73417 NA -1191628485 # 6 AFG NA -14.24917 NA -1145708502 # Same thing done manually using fwithin... add_vars(get_vars(wlddev, "iso3c"), get_vars(wlddev, 9:12) %>% fwithin(wlddev$iso3c) %>% add_stub("W.")) %>% head # iso3c W.PCGDP W.LIFEEX W.GINI W.ODA # 1 AFG NA -16.75117 NA -1370778502 # 2 AFG NA -16.23517 NA -1255468497 # 3 AFG NA -15.72617 NA -1374708502 # 4 AFG NA -15.22617 NA -1249828497 # 5 AFG NA -14.73417 NA -1191628485 # 6 AFG NA -14.24917 NA -1145708502 ``` It is also possible to drop the id's in `W` using the argument `keep.by = FALSE`. `fbetween / B` and `fwithin / W` each have one additional computational option: ```r # This replaces missing values with the group-mean: Same as fmean(x, g, TRA = "replace_fill") B(wlddev, ~ iso3c, cols = 9:12, fill = TRUE) %>% head # iso3c B.PCGDP B.LIFEEX B.GINI B.ODA # 1 AFG 483.8351 49.19717 NA 1487548499 # 2 AFG 483.8351 49.19717 NA 1487548499 # 3 AFG 483.8351 49.19717 NA 1487548499 # 4 AFG 483.8351 49.19717 NA 1487548499 # 5 AFG 483.8351 49.19717 NA 1487548499 # 6 AFG 483.8351 49.19717 NA 1487548499 # This adds back the overall mean after subtracting out group means: Same as fmean(x, g, TRA = "-+") W(wlddev, ~ iso3c, cols = 9:12, mean = "overall.mean") %>% head # iso3c W.PCGDP W.LIFEEX W.GINI W.ODA # 1 AFG NA 47.54514 NA -916058371 # 2 AFG NA 48.06114 NA -800748366 # 3 AFG NA 48.57014 NA -919988371 # 4 AFG NA 49.07014 NA -795108366 # 5 AFG NA 49.56214 NA -736908354 # 6 AFG NA 50.04714 NA -690988371 # Visual demonstration of centering on the overall mean vs. simple centering oldpar <- par(mfrow = c(1, 3)) plot(iris[1:2], col = iris$Species, main = "Raw Data") # Raw data plot(W(iris, ~ Species)[2:3], col = iris$Species, main = "Simple Centering") # Simple centering plot(W(iris, ~ Species, mean = "overall.mean")[2:3], col = iris$Species, # Centering on overall mean: Preserves level of data main = "Added Overall Mean") ```
plot of chunk BWplot

plot of chunk BWplot

```r par(oldpar) ``` Another great utility of operators is that they can be employed in regression formulas in a manor that is both very efficient and pleasing to the eyes. The code below demonstrates the use of `W` and `B` to efficiently run fixed-effects regressions with `lm`. ```r # When using operators in formulas, we need to remove missing values beforehand to obtain the same results as a Fixed-Effects package data <- wlddev %>% fselect(iso3c, year, PCGDP, LIFEEX) %>% na_omit # classical lm() -> iso3c is a factor, creates a matrix of 200+ country dummies. coef(lm(PCGDP ~ LIFEEX + iso3c, data))[1:2] # (Intercept) LIFEEX # -2837.039 380.448 # Centering each variable individually coef(lm(W(PCGDP, iso3c) ~ W(LIFEEX, iso3c), data)) # (Intercept) W(LIFEEX, iso3c) # 5.596034e-13 3.804480e+02 # Centering the data coef(lm(W.PCGDP ~ W.LIFEEX, W(data, PCGDP + LIFEEX ~ iso3c))) # (Intercept) W.LIFEEX # 5.596034e-13 3.804480e+02 # Adding the overall mean back to the data only changes the intercept coef(lm(W.PCGDP ~ W.LIFEEX, W(data, PCGDP + LIFEEX ~ iso3c, mean = "overall.mean"))) # (Intercept) W.LIFEEX # -14020.142 380.448 # Procedure suggested by Mundlak (1978) - controlling for group averages instead of demeaning coef(lm(PCGDP ~ LIFEEX + B(LIFEEX, iso3c), data)) # (Intercept) LIFEEX B(LIFEEX, iso3c) # -52254.7421 380.4480 585.8386 ``` In general it is recommended calling the long names (i.e. `fwithin` or `fscale` etc.) for programming since they are a bit more efficient on the R-side of things and require all input in terms of data. For all other purposes the operators are more convenient. It is important to note that the operators can do everything the functions can do (i.e. you can also pass grouping vectors or *GRP* objects to them). They are just simple wrappers that in the data frame method add 4 additional features: * The possibility of formula input to `by` i.e. `W(mtcars, ~ cyl)` or `W(mtcars, mpg ~ cyl)` * They preserve grouping columns (`cyl` in the above example) when passed in a formula (default `keep.by = TRUE`) * The ability to subset many columns using the `cols` argument (i.e. `W(mtcars, ~ cyl, cols = 4:7)` is the same as `W(mtcars, hp + drat + wt + qsec ~ cyl)`) * They rename transformed columns by adding a prefix (default `stub = "W."`) ### 6.6 HD Centering and Linear Prediction Sometimes simple centering is not enough, for example if a linear model with multiple levels of fixed-effects needs to be estimated, potentially involving interactions with continuous covariates. For these purposes `fhdwithin / HDW` and `fhdbetween / HDB` were created as efficient multi-purpose functions for linear prediction and partialling out. They operate by splitting complex regression problems in 2 parts: Factors and factor-interactions are projected out using `fixest::demean`, an efficient `C++` routine for centering vectors on multiple factors, whereas continuous variables are dealt with using a standard `chol` or `qr` decomposition in base R. The examples below show the use of the `HDW` operator in manually solving a regression problem with country and time fixed effects. ```r data$year <- qF(data$year, na.exclude = FALSE) # the country code (iso3c) is already a factor # classical lm() -> creates a matrix of 196 country dummies and 56 year dummies coef(lm(PCGDP ~ LIFEEX + iso3c + year, data))[1:2] # (Intercept) LIFEEX # 37388.0493 -333.0115 # Centering each variable individually coef(lm(HDW(PCGDP, list(iso3c, year)) ~ HDW(LIFEEX, list(iso3c, year)), data)) # (Intercept) HDW(LIFEEX, list(iso3c, year)) # -2.450245e-13 -3.330115e+02 # Centering the entire data coef(lm(HDW.PCGDP ~ HDW.LIFEEX, HDW(data, PCGDP + LIFEEX ~ iso3c + year))) # (Intercept) HDW.LIFEEX # -2.450245e-13 -3.330115e+02 # Procedure suggested by Mundlak (1978) - controlling for averages instead of demeaning coef(lm(PCGDP ~ LIFEEX + HDB(LIFEEX, list(iso3c, year)), data)) # (Intercept) LIFEEX HDB(LIFEEX, list(iso3c, year)) # -48141.1094 -333.0115 1236.2681 ``` We may wish to test whether including time fixed-effects in the above regression actually impacts the fit. This can be done with the fast F-test: ```r # The syntax is fFtest(y, exc, X, ...). 'exc' are exclusion restrictions. data %$% fFtest(PCGDP, year, list(LIFEEX, iso3c)) # R-Sq. DF1 DF2 F-Stat. P-Value # Full Model 0.894 258 8763 286.130 0.000 # Restricted Model 0.873 199 8822 304.661 0.000 # Exclusion Rest. 0.021 59 8763 29.280 0.000 ``` The test shows that the time fixed-effects (accounted for like year dummies) are jointly significant. One can also use `fhdbetween / HDB` and `fhdwithin / HDW` to project out interactions and continuous covariates. ```r wlddev$year <- as.numeric(wlddev$year) # classical lm() -> full country-year interaction, -> 200+ country dummies, 200+ trends, year and ODA coef(lm(PCGDP ~ LIFEEX + iso3c * year + ODA, wlddev))[1:2] # (Intercept) LIFEEX # -7.257955e+05 8.938626e+00 # Same using HDW coef(lm(HDW.PCGDP ~ HDW.LIFEEX, HDW(wlddev, PCGDP + LIFEEX ~ iso3c * year + ODA))) # (Intercept) HDW.LIFEEX # 3.403288e-12 8.938626e+00 # example of a simple continuous problem HDW(iris[1:2], iris[3:4]) %>% head # HDW.Sepal.Length HDW.Sepal.Width # 1 0.21483967 0.2001352 # 2 0.01483967 -0.2998648 # 3 -0.13098262 -0.1255786 # 4 -0.33933805 -0.1741510 # 5 0.11483967 0.3001352 # 6 0.41621663 0.6044681 # May include factors.. HDW(iris[1:2], iris[3:5]) %>% head # HDW.Sepal.Length HDW.Sepal.Width # 1 0.14989286 0.1102684 # 2 -0.05010714 -0.3897316 # 3 -0.15951256 -0.1742640 # 4 -0.44070173 -0.3051992 # 5 0.04989286 0.2102684 # 6 0.17930818 0.3391766 ``` ## 7. Time Series and Panel Series *collapse* also presents some essential contributions in the time series domain, particularly in the area of (irregular) time series, panel data and efficient and secure computations on (potentially unordered) time-dependent vectors and (unbalanced) panels. ### 7.1 Panel Series to Array Conversions To facilitate the exploration and access of panel data, `psmat` was created as an S3 generic to efficiently obtain matrices or 3D-arrays from panel data. ```r mts <- psmat(wlddev, PCGDP ~ iso3c, ~ year) str(mts) # 'psmat' num [1:216, 1:61] NA NA NA NA NA ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # - attr(*, "transpose")= logi FALSE plot(log10(mts), main = paste("Log10", vlabels(wlddev$PCGDP)), xlab = "Year") ```
plot of chunk psmatplot

plot of chunk psmatplot

Passing a data frame of panel series to `psmat` generates a 3D array: ```r # Get panel series array psar <- psmat(wlddev, ~ iso3c, ~ year, cols = 9:12) str(psar) # 'psmat' num [1:216, 1:61, 1:4] NA NA NA NA NA ... # - attr(*, "dimnames")=List of 3 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA" # - attr(*, "transpose")= logi FALSE plot(psar) ```
plot of chunk psarplot

plot of chunk psarplot

```r # Plot array of Panel Series aggregated by region: collap(wlddev, ~ region + year, cols = 9:12) %>% psmat( ~ region, ~ year) %>% plot(legend = TRUE, labs = vlabels(wlddev)[9:12]) ```
plot of chunk psarplot2

plot of chunk psarplot2

`psmat` can also output a list of panel series matrices, which can be used among other things to reshape the data with `unlist2d` (discussed in more detail in List-Processing section). ```r # This gives list of ps-matrices psml <- psmat(wlddev, ~ iso3c, ~ year, 9:12, array = FALSE) str(psml, give.attr = FALSE) # List of 4 # $ PCGDP : 'psmat' num [1:216, 1:61] NA NA NA NA NA ... # $ LIFEEX: 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ... # $ GINI : 'psmat' num [1:216, 1:61] NA NA NA NA NA NA NA NA NA NA ... # $ ODA : 'psmat' num [1:216, 1:61] NA 116769997 -390000 NA NA ... # Using unlist2d, can generate a data.frame unlist2d(psml, idcols = "Variable", row.names = "Country") %>% gv(1:10) %>% head # Variable Country 1960 1961 1962 1963 1964 1965 1966 1967 # 1 PCGDP ABW NA NA NA NA NA NA NA NA # 2 PCGDP AFG NA NA NA NA NA NA NA NA # 3 PCGDP AGO NA NA NA NA NA NA NA NA # 4 PCGDP ALB NA NA NA NA NA NA NA NA # 5 PCGDP AND NA NA NA NA NA NA NA NA # 6 PCGDP ARE NA NA NA NA NA NA NA NA ``` ### 7.2 Panel Series ACF, PACF and CCF The correlation structure of panel data can also be explored with `psacf`, `pspacf` and `psccf`. These functions are exact analogues to `stats::acf`, `stats::pacf` and `stats::ccf`. They use `fscale` to group-scale panel data by the panel-id provided, and then compute the covariance of a sequence of panel-lags (generated with `flag` discussed below) with the group-scaled level-series, dividing by the variance of the group-scaled level series. The Partial-ACF is generated from the ACF using a Yule-Walker decomposition (as in `stats::pacf`). ```r # Panel-ACF of GDP per Capita psacf(wlddev, PCGDP ~ iso3c, ~year) ```
plot of chunk PSACF

plot of chunk PSACF

```r # Panel-Partial-ACF of GDP per Capia pspacf(wlddev, PCGDP ~ iso3c, ~year) ```
plot of chunk PSACF

plot of chunk PSACF

```r # Panel- Cross-Correlation function of GDP per Capia and Life-Expectancy wlddev %$% psccf(PCGDP, LIFEEX, iso3c, year) ```
plot of chunk PSACF

plot of chunk PSACF

```r # Multivariate Panel-auto and cross-correlation function of 3 variables: psacf(wlddev, PCGDP + LIFEEX + ODA ~ iso3c, ~year) ```
plot of chunk PSACF

plot of chunk PSACF

### 7.3 Fast Lags and Leads `flag` and the corresponding lag- and lead- operators `L` and `F` are S3 generics to efficiently compute lags and leads on time series and panel data. The code below shows how to compute simple lags and leads on the classic Box & Jenkins airline data that comes with R. ```r # 1 lag L(AirPassengers) # Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec # 1949 NA 112 118 132 129 121 135 148 148 136 119 104 # 1950 118 115 126 141 135 125 149 170 170 158 133 114 # 1951 140 145 150 178 163 172 178 199 199 184 162 146 # 1952 166 171 180 193 181 183 218 230 242 209 191 172 # 1953 194 196 196 236 235 229 243 264 272 237 211 180 # 1954 201 204 188 235 227 234 264 302 293 259 229 203 # 1955 229 242 233 267 269 270 315 364 347 312 274 237 # 1956 278 284 277 317 313 318 374 413 405 355 306 271 # 1957 306 315 301 356 348 355 422 465 467 404 347 305 # 1958 336 340 318 362 348 363 435 491 505 404 359 310 # 1959 337 360 342 406 396 420 472 548 559 463 407 362 # 1960 405 417 391 419 461 472 535 622 606 508 461 390 # 3 identical ways of computing 1 lag all_identical(flag(AirPassengers), L(AirPassengers), F(AirPassengers,-1)) # [1] TRUE # 1 lead and 3 lags - output as matrix L(AirPassengers, -1:3) %>% head # F1 -- L1 L2 L3 # [1,] 118 112 NA NA NA # [2,] 132 118 112 NA NA # [3,] 129 132 118 112 NA # [4,] 121 129 132 118 112 # [5,] 135 121 129 132 118 # [6,] 148 135 121 129 132 # ... this is still a time series object: attributes(L(AirPassengers, -1:3)) # $tsp # [1] 1949.000 1960.917 12.000 # # $class # [1] "ts" "matrix" # # $dim # [1] 144 5 # # $dimnames # $dimnames[[1]] # NULL # # $dimnames[[2]] # [1] "F1" "--" "L1" "L2" "L3" ``` `flag / L / F` also work well on (time series) matrices. Below a regression with daily closing prices of major European stock indices is run: Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE. The data are sampled in business time, i.e. weekends and holidays are omitted. ```r str(EuStockMarkets) # Time-Series [1:1860, 1:4] from 1991 to 1999: 1629 1614 1607 1621 1618 ... # - attr(*, "dimnames")=List of 2 # ..$ : NULL # ..$ : chr [1:4] "DAX" "SMI" "CAC" "FTSE" # Data is recorded on 260 days per year, 1991-1999 tsp(EuStockMarkets) # [1] 1991.496 1998.646 260.000 freq <- frequency(EuStockMarkets) # There is some obvious seasonality stl(EuStockMarkets[, "DAX"], freq) %>% plot ```
plot of chunk mts

plot of chunk mts

```r # 1 annual lead and 1 annual lag L(EuStockMarkets, -1:1*freq) %>% head # F260.DAX DAX L260.DAX F260.SMI SMI L260.SMI F260.CAC CAC L260.CAC F260.FTSE FTSE # [1,] 1755.98 1628.75 NA 1846.6 1678.1 NA 1907.3 1772.8 NA 2515.8 2443.6 # [2,] 1754.95 1613.63 NA 1854.8 1688.5 NA 1900.6 1750.5 NA 2521.2 2460.2 # [3,] 1759.90 1606.51 NA 1845.3 1678.6 NA 1880.9 1718.0 NA 2493.9 2448.2 # [4,] 1759.84 1621.04 NA 1854.5 1684.1 NA 1873.5 1708.1 NA 2476.1 2470.4 # [5,] 1776.50 1618.16 NA 1870.5 1686.6 NA 1883.6 1723.1 NA 2497.1 2484.7 # [6,] 1769.98 1610.61 NA 1862.6 1671.6 NA 1868.5 1714.3 NA 2469.0 2466.8 # L260.FTSE # [1,] NA # [2,] NA # [3,] NA # [4,] NA # [5,] NA # [6,] NA # DAX regressed on it's own 2 annual lags and the lags of the other indicators lm(DAX ~., data = L(EuStockMarkets, 0:2*freq)) %>% summary # # Call: # lm(formula = DAX ~ ., data = L(EuStockMarkets, 0:2 * freq)) # # Residuals: # Min 1Q Median 3Q Max # -240.46 -51.28 -12.01 45.19 358.02 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -564.02041 93.94903 -6.003 2.49e-09 *** # L260.DAX -0.12577 0.03002 -4.189 2.99e-05 *** # L520.DAX -0.12528 0.04103 -3.053 0.00231 ** # SMI 0.32601 0.01726 18.890 < 2e-16 *** # L260.SMI 0.27499 0.02517 10.926 < 2e-16 *** # L520.SMI 0.04602 0.02602 1.769 0.07721 . # CAC 0.59637 0.02349 25.389 < 2e-16 *** # L260.CAC -0.14283 0.02763 -5.169 2.72e-07 *** # L520.CAC 0.05196 0.03657 1.421 0.15557 # FTSE 0.01002 0.02403 0.417 0.67675 # L260.FTSE 0.04509 0.02807 1.606 0.10843 # L520.FTSE 0.10601 0.02717 3.902 0.00010 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 83.06 on 1328 degrees of freedom # (520 observations deleted due to missingness) # Multiple R-squared: 0.9943, Adjusted R-squared: 0.9942 # F-statistic: 2.092e+04 on 11 and 1328 DF, p-value: < 2.2e-16 ``` Since v1.5.0, irregular time series are supported: ```r t <- seq_row(EuStockMarkets)[-4L] flag(EuStockMarkets[-4L, ], -1:1, t = t) %>% head # F1.DAX DAX L1.DAX F1.SMI SMI L1.SMI F1.CAC CAC L1.CAC F1.FTSE FTSE L1.FTSE # [1,] 1613.63 1628.75 NA 1688.5 1678.1 NA 1750.5 1772.8 NA 2460.2 2443.6 NA # [2,] 1606.51 1613.63 1628.75 1678.6 1688.5 1678.1 1718.0 1750.5 1772.8 2448.2 2460.2 2443.6 # [3,] NA 1606.51 1613.63 NA 1678.6 1688.5 NA 1718.0 1750.5 NA 2448.2 2460.2 # [4,] 1610.61 1618.16 NA 1671.6 1686.6 NA 1714.3 1723.1 NA 2466.8 2484.7 NA # [5,] 1630.75 1610.61 1618.16 1682.9 1671.6 1686.6 1734.5 1714.3 1723.1 2487.9 2466.8 2484.7 # [6,] 1640.17 1630.75 1610.61 1703.6 1682.9 1671.6 1757.4 1734.5 1714.3 2508.4 2487.9 2466.8 ``` The main innovation of `flag / L / F` is the ability to very efficiently compute sequences of lags and leads on panel data, and that this panel data need not be ordered or balanced: ```r # This lags all 4 series L(wlddev, 1L, ~ iso3c, ~ year, cols = 9:12) %>% head # iso3c year L1.PCGDP L1.LIFEEX L1.GINI L1.ODA # 1 AFG 1960 NA NA NA NA # 2 AFG 1961 NA 32.446 NA 116769997 # 3 AFG 1962 NA 32.962 NA 232080002 # 4 AFG 1963 NA 33.471 NA 112839996 # 5 AFG 1964 NA 33.971 NA 237720001 # 6 AFG 1965 NA 34.463 NA 295920013 # Without t: Works here because data is ordered, but gives a message L(wlddev, 1L, ~ iso3c, cols = 9:12) %>% head # iso3c L1.PCGDP L1.LIFEEX L1.GINI L1.ODA # 1 AFG NA NA NA NA # 2 AFG NA 32.446 NA 116769997 # 3 AFG NA 32.962 NA 232080002 # 4 AFG NA 33.471 NA 112839996 # 5 AFG NA 33.971 NA 237720001 # 6 AFG NA 34.463 NA 295920013 # 1 lead and 2 lags of Life Expectancy # after removing the 4th row, thus creating an unbalanced panel wlddev %>% ss(-4L) %>% L(-1:2, LIFEEX ~ iso3c, ~year) %>% head # iso3c year F1.LIFEEX LIFEEX L1.LIFEEX L2.LIFEEX # 1 AFG 1960 32.962 32.446 NA NA # 2 AFG 1961 33.471 32.962 32.446 NA # 3 AFG 1962 NA 33.471 32.962 32.446 # 4 AFG 1964 34.948 34.463 NA 33.471 # 5 AFG 1965 35.430 34.948 34.463 NA # 6 AFG 1966 35.914 35.430 34.948 34.463 ``` Optimal performance is obtained if the panel-id is a factor, and the time variable also a factor or an integer variable. In that case an ordering vector of the data is computed directly without any prior sorting or grouping, and the data is accessed through this vector. Thus the data need not be sorted to compute a fully-identified panel-lag, which is a key advantage to, say, the `shift` function in `data.table`. One intended area of use, especially for the operators `L` and `F`, is to substantially facilitate the implementation of dynamic models in various contexts (independent of the estimation package). Below different ways `L` can be used to estimate a dynamic panel-model using `lm` are shown: ```r # Different ways of regressing GDP on it's lags and life-Expectancy and it's lags # 1 - Precomputing lags lm(PCGDP ~ ., L(wlddev, 0:2, PCGDP + LIFEEX ~ iso3c, ~ year, keep.ids = FALSE)) %>% summary # # Call: # lm(formula = PCGDP ~ ., data = L(wlddev, 0:2, PCGDP + LIFEEX ~ # iso3c, ~year, keep.ids = FALSE)) # # Residuals: # Min 1Q Median 3Q Max # -16776.5 -102.2 -17.2 91.5 12277.1 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -333.93994 61.04617 -5.470 4.62e-08 *** # L1.PCGDP 1.31959 0.01021 129.270 < 2e-16 *** # L2.PCGDP -0.31707 0.01029 -30.815 < 2e-16 *** # LIFEEX -17.77368 35.47772 -0.501 0.616 # L1.LIFEEX 45.76286 65.87124 0.695 0.487 # L2.LIFEEX -21.43005 34.98964 -0.612 0.540 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 787.3 on 8609 degrees of freedom # (4561 observations deleted due to missingness) # Multiple R-squared: 0.9976, Adjusted R-squared: 0.9976 # F-statistic: 7.26e+05 on 5 and 8609 DF, p-value: < 2.2e-16 # 2 - Ad-hoc computation in lm formula lm(PCGDP ~ L(PCGDP, 1:2, iso3c, year) + L(LIFEEX, 0:2, iso3c, year), wlddev) %>% summary # # Call: # lm(formula = PCGDP ~ L(PCGDP, 1:2, iso3c, year) + L(LIFEEX, 0:2, # iso3c, year), data = wlddev) # # Residuals: # Min 1Q Median 3Q Max # -16776.5 -102.2 -17.2 91.5 12277.1 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -333.93994 61.04617 -5.470 4.62e-08 *** # L(PCGDP, 1:2, iso3c, year)L1 1.31959 0.01021 129.270 < 2e-16 *** # L(PCGDP, 1:2, iso3c, year)L2 -0.31707 0.01029 -30.815 < 2e-16 *** # L(LIFEEX, 0:2, iso3c, year)-- -17.77368 35.47772 -0.501 0.616 # L(LIFEEX, 0:2, iso3c, year)L1 45.76286 65.87124 0.695 0.487 # L(LIFEEX, 0:2, iso3c, year)L2 -21.43005 34.98964 -0.612 0.540 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 787.3 on 8609 degrees of freedom # (4561 observations deleted due to missingness) # Multiple R-squared: 0.9976, Adjusted R-squared: 0.9976 # F-statistic: 7.26e+05 on 5 and 8609 DF, p-value: < 2.2e-16 # 3 - Precomputing panel-identifiers g = qF(wlddev$iso3c, na.exclude = FALSE) t = qF(wlddev$year, na.exclude = FALSE) lm(PCGDP ~ L(PCGDP, 1:2, g, t) + L(LIFEEX, 0:2, g, t), wlddev) %>% summary # # Call: # lm(formula = PCGDP ~ L(PCGDP, 1:2, g, t) + L(LIFEEX, 0:2, g, # t), data = wlddev) # # Residuals: # Min 1Q Median 3Q Max # -16776.5 -102.2 -17.2 91.5 12277.1 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -333.93994 61.04617 -5.470 4.62e-08 *** # L(PCGDP, 1:2, g, t)L1 1.31959 0.01021 129.270 < 2e-16 *** # L(PCGDP, 1:2, g, t)L2 -0.31707 0.01029 -30.815 < 2e-16 *** # L(LIFEEX, 0:2, g, t)-- -17.77368 35.47772 -0.501 0.616 # L(LIFEEX, 0:2, g, t)L1 45.76286 65.87124 0.695 0.487 # L(LIFEEX, 0:2, g, t)L2 -21.43005 34.98964 -0.612 0.540 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 787.3 on 8609 degrees of freedom # (4561 observations deleted due to missingness) # Multiple R-squared: 0.9976, Adjusted R-squared: 0.9976 # F-statistic: 7.26e+05 on 5 and 8609 DF, p-value: < 2.2e-16 ``` ### 7.4 Fast Differences and Growth Rates Similarly to `flag / L / F`, `fdiff / D / Dlog` computes sequences of suitably lagged / leaded and iterated differences, quasi-differences or (quasi-)log-differences on time series and panel data, and `fgrowth / G` computes growth rates. Using again the `Airpassengers` data, the seasonal decomposition shows significant seasonality: ```r stl(AirPassengers, "periodic") %>% plot ```
plot of chunk stl

plot of chunk stl

We can test the statistical significance of this seasonality by jointly testing a set of monthly dummies regressed on the differenced series. Given that the seasonal fluctuations are increasing in magnitude, using growth rates for the test seems more appropriate: ```r f <- qF(cycle(AirPassengers)) fFtest(fgrowth(AirPassengers), f) # R-Sq. DF1 DF2 F-Stat. P-value # 0.874 11 131 82.238 0.000 ``` The test shows significant seasonality, accounting for 87% of the variation in the growth rate of the series. We can plot the series together with the ordinary, seasonal (12-month) and deseasonalized monthly growth rate using: ```r G(AirPassengers, c(0, 1, 12)) %>% cbind(W.G1 = W(G(AirPassengers), f)) %>% plot(main = "Growth Rate of Airpassengers") ```
plot of chunk Gplot

plot of chunk Gplot

It is evident that taking the annualized growth rate also removes the periodic behavior. We can also compute second differences or growth rates of growth rates. Below a plot of the ordinary and annual first and second differences of the data: ```r D(AirPassengers, c(1,12), 1:2) %>% plot ```
plot of chunk Dplot

plot of chunk Dplot

In general, both `fdiff / D` and `fgrowth / G` can compute sequences of lagged / leaded and iterated differences / growth rates. ```r # sequence of leaded/lagged and iterated differences y = 1:10 D(y, -2:2, 1:3) # F2D1 F2D2 F2D3 FD1 FD2 FD3 -- D1 D2 D3 L2D1 L2D2 L2D3 # [1,] -2 0 0 -1 0 0 1 NA NA NA NA NA NA # [2,] -2 0 0 -1 0 0 2 1 NA NA NA NA NA # [3,] -2 0 0 -1 0 0 3 1 0 NA 2 NA NA # [4,] -2 0 0 -1 0 0 4 1 0 0 2 NA NA # [5,] -2 0 NA -1 0 0 5 1 0 0 2 0 NA # [6,] -2 0 NA -1 0 0 6 1 0 0 2 0 NA # [7,] -2 NA NA -1 0 0 7 1 0 0 2 0 0 # [8,] -2 NA NA -1 0 NA 8 1 0 0 2 0 0 # [9,] NA NA NA -1 NA NA 9 1 0 0 2 0 0 # [10,] NA NA NA NA NA NA 10 1 0 0 2 0 0 ``` All of this also works for panel data. The code below gives an example: ```r g = rep(1:2, each = 5) t = rep(1:5, 2) D(y, -2:2, 1:2, g, t) # F2D1 F2D2 FD1 FD2 -- D1 D2 L2D1 L2D2 # [1,] -2 0 -1 0 1 NA NA NA NA # [2,] -2 NA -1 0 2 1 NA NA NA # [3,] -2 NA -1 0 3 1 0 2 NA # [4,] NA NA -1 NA 4 1 0 2 NA # [5,] NA NA NA NA 5 1 0 2 0 # [6,] -2 0 -1 0 6 NA NA NA NA # [7,] -2 NA -1 0 7 1 NA NA NA # [8,] -2 NA -1 0 8 1 0 2 NA # [9,] NA NA -1 NA 9 1 0 2 NA # [10,] NA NA NA NA 10 1 0 2 0 ``` Calls to `flag / L / F`, `fdiff / D` and `fgrowth / G` can be nested. In the example below, `L.matrix` is called on the right-half ob the above sequence: ```r L(D(y, 0:2, 1:2, g, t), 0:1, g, t) # -- L1.-- D1 L1.D1 D2 L1.D2 L2D1 L1.L2D1 L2D2 L1.L2D2 # [1,] 1 NA NA NA NA NA NA NA NA NA # [2,] 2 1 1 NA NA NA NA NA NA NA # [3,] 3 2 1 1 0 NA 2 NA NA NA # [4,] 4 3 1 1 0 0 2 2 NA NA # [5,] 5 4 1 1 0 0 2 2 0 NA # [6,] 6 NA NA NA NA NA NA NA NA NA # [7,] 7 6 1 NA NA NA NA NA NA NA # [8,] 8 7 1 1 0 NA 2 NA NA NA # [9,] 9 8 1 1 0 0 2 2 NA NA # [10,] 10 9 1 1 0 0 2 2 0 NA ``` `fdiff / D` and `fgrowth / G` also come with a data frame method, making the computation of growth-variables on datasets very easy: ```r G(GGDC10S, 1L, 1L, ~ Variable + Country, ~ Year, cols = 6:10) %>% head # Variable Country Year G1.AGR G1.MIN G1.MAN G1.PU G1.CON # 1 VA BWA 1960 NA NA NA NA NA # 2 VA BWA 1961 NA NA NA NA NA # 3 VA BWA 1962 NA NA NA NA NA # 4 VA BWA 1963 NA NA NA NA NA # 5 VA BWA 1964 NA NA NA NA NA # 6 VA BWA 1965 -3.524492 -28.57143 38.23529 29.41176 103.9604 ``` The code below estimates a dynamic panel model regressing the 10-year growth rate of GDP per capita on it's 10-year lagged level and the 10-year growth rate of life-expectancy: ```r summary(lm(G(PCGDP,10,1,iso3c,year) ~ L(PCGDP,10,iso3c,year) + G(LIFEEX,10,1,iso3c,year), data = wlddev)) # # Call: # lm(formula = G(PCGDP, 10, 1, iso3c, year) ~ L(PCGDP, 10, iso3c, # year) + G(LIFEEX, 10, 1, iso3c, year), data = wlddev) # # Residuals: # Min 1Q Median 3Q Max # -104.32 -21.97 -3.96 13.26 1714.58 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 2.740e+01 1.089e+00 25.168 < 2e-16 *** # L(PCGDP, 10, iso3c, year) -3.337e-04 4.756e-05 -7.016 2.49e-12 *** # G(LIFEEX, 10, 1, iso3c, year) 4.617e-01 1.124e-01 4.107 4.05e-05 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 58.43 on 7113 degrees of freedom # (6060 observations deleted due to missingness) # Multiple R-squared: 0.01132, Adjusted R-squared: 0.01104 # F-statistic: 40.73 on 2 and 7113 DF, p-value: < 2.2e-16 ``` To go a step further, the code below regresses the 10-year growth rate of GDP on the 10-year lagged levels and 10-year growth rates of GDP and life expectancy, with country and time-fixed effects projected out using `HDW`. The standard errors are unreliable without bootstrapping, but this example nicely demonstrates the potential for complex estimations brought by *collapse*. ```r moddat <- HDW(L(G(wlddev, c(0, 10), 1, ~iso3c, ~year, 9:10), c(0, 10), ~iso3c, ~year), ~iso3c + qF(year))[-c(1,5)] summary(lm(HDW.L10G1.PCGDP ~. , moddat)) # # Call: # lm(formula = HDW.L10G1.PCGDP ~ ., data = moddat) # # Residuals: # Min 1Q Median 3Q Max # -807.68 -10.80 -0.64 10.23 779.99 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 1.907e-15 4.930e-01 0.000 1.000000 # HDW.L10.PCGDP -2.500e-03 1.292e-04 -19.347 < 2e-16 *** # HDW.L10.L10G1.PCGDP -5.885e-01 1.082e-02 -54.412 < 2e-16 *** # HDW.L10.LIFEEX 1.056e+00 2.885e-01 3.661 0.000254 *** # HDW.L10G1.LIFEEX 6.927e-01 1.154e-01 6.002 2.08e-09 *** # HDW.L10.L10G1.LIFEEX 8.749e-01 1.108e-01 7.899 3.39e-15 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 35.69 on 5235 degrees of freedom # Multiple R-squared: 0.4029, Adjusted R-squared: 0.4023 # F-statistic: 706.4 on 5 and 5235 DF, p-value: < 2.2e-16 ``` One of the inconveniences of the above computations is that it requires declaring the panel-identifiers `iso3c` and `year` again and again for each function. A great remedy here are the *plm* classes *pseries* and *pdata.frame* which *collapse* was built to support. This shows how one could run the same regression with plm: ```r pwlddev <- plm::pdata.frame(wlddev, index = c("iso3c", "year")) moddat <- HDW(L(G(pwlddev, c(0, 10), 1, 9:10), c(0, 10)))[-c(1,5)] summary(lm(HDW.L10G1.PCGDP ~. , moddat)) # # Call: # lm(formula = HDW.L10G1.PCGDP ~ ., data = moddat) # # Residuals: # Min 1Q Median 3Q Max # -677.61 -12.45 -1.02 10.86 913.22 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 0.1456192 0.5187976 0.281 0.778962 # HDW.L10.PCGDP -0.0022910 0.0001253 -18.291 < 2e-16 *** # HDW.L10.L10G1.PCGDP -0.5859896 0.0113538 -51.612 < 2e-16 *** # HDW.L10.LIFEEX 0.8701877 0.2456255 3.543 0.000399 *** # HDW.L10G1.LIFEEX 0.6910533 0.1132028 6.105 1.11e-09 *** # HDW.L10.L10G1.LIFEEX 0.8990853 0.1068241 8.417 < 2e-16 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # Residual standard error: 37.51 on 5235 degrees of freedom # (7935 observations deleted due to missingness) # Multiple R-squared: 0.3784, Adjusted R-squared: 0.3778 # F-statistic: 637.4 on 5 and 5235 DF, p-value: < 2.2e-16 ``` To learn more about the integration of *collapse* and *plm*, consult the corresponding vignette. ## 8. List Processing and a Panel-VAR Example *collapse* also provides an ensemble of list-processing functions that grew out of a necessity of working with complex nested lists of data objects. The example provided in this section is also somewhat complex, but it demonstrates the utility of these functions while also providing a nice data-transformation task. When summarizing the `GGDC10S` data in section 1, it was evident that certain sectors have a high share of economic activity in almost all countries in the sample. This prompts the question of whether there exist common patterns in the interaction of these important sectors across countries. One way to empirically study this could be through a (Structural) Panel-Vector-Autoregression (PSVAR) in value added with the 6 most important sectors (excluding government): Agriculture, manufacturing, wholesale and retail trade, construction, transport and storage and finance and real estate. For this we will use the *vars* package^[I noticed there is a *panelvar* package, but I am more familiar with *vars* and *panelvar* can be pretty slow in my experience. We also have about 50 years of data here, so dynamic panel bias is not a big issue.]. Since *vars* natively does not support panel-VAR, we need to create the central *varest* object manually and then run the `SVAR` function to impose identification restrictions. We start with exploring and harmonizing the data: ```r library(vars) # The 6 most important non-government sectors (see section 1) sec <- c("AGR", "MAN", "WRT", "CON", "TRA", "FIRE") # This creates a data.frame containing the value added of the 6 most important non-government sectors data <- fsubset(GGDC10S, Variable == "VA", c("Country", "Year", sec)) %>% na_omit(cols = sec) # Let's look at the log VA in agriculture across countries: AGRmat <- psmat(data, AGR ~ Country, ~ Year, transpose = TRUE) %>% log # Converting to panel series matrix plot(AGRmat) ```
plot of chunk AGRmat

plot of chunk AGRmat

The plot shows quite some heterogeneity both in the levels (VA is in local currency) and in trend growth rates. In the panel-VAR estimation we are only really interested in the sectoral relationships within countries. Thus we need to harmonize this sectoral data further. One way would be taking growth rates or log-differences of the data, but VAR's are usually estimated in levels unless the data are cointegrated (and value added series do not, in general, exhibit unit-root behavior). Thus to harmonize the data further we opt for subtracting a country-sector specific cubic trend from the data in logs: ```r # Subtracting a country specific cubic growth trend AGRmat <- dapply(AGRmat, fhdwithin, poly(seq_row(AGRmat), 3), fill = TRUE) plot(AGRmat) ```
plot of chunk AGRmatplot

plot of chunk AGRmatplot

This seems to have done a decent job in curbing most of the heterogeneity. Some series however have a high variance around that cubic trend. Therefore a final step is to standardize the data to bring the variances in line: ```r # Standardizing the cubic log-detrended data AGRmat <- fscale(AGRmat) plot(AGRmat) ```
plot of chunk AGRmatplot2

plot of chunk AGRmatplot2

Now this looks pretty good, and is about the most we can do in terms of harmonization without differencing the data. The code below applies these transformations to all sectors: ```r # Taking logs settransformv(data, 3:8, log) # Projecting out country FE and cubic trends from complete cases gv(data, 3:8) <- HDW(data, ~ qF(Country)*poly(Year, 3), fill = TRUE) # Scaling gv(data, 3:8) <- STD(data, ~ Country, cols = 3:8, keep.by = FALSE) # Check the plot psmat(data, ~ Country, ~ Year) %>% plot ```
plot of chunk psmatplot2

plot of chunk psmatplot2

Since the data is annual, let us estimate the Panel-VAR with one lag: ```r # This adds one lag of all series to the data add_vars(data) <- L(data, 1, ~ Country, ~ Year, keep.ids = FALSE) # This removes missing values from all but the first row and drops identifier columns (vars is made for time series without gaps) data <- rbind(ss(data, 1, -(1:2)), na_omit(ss(data, -1, -(1:2)))) head(data) # STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE L1.STD.HDW.AGR # 1 0.65713943 2.2350584 1.946383 -0.03574399 1.0877811 1.0476507 NA # 2 -0.14377115 1.8693570 1.905081 1.23225734 1.0542315 0.9105622 0.65713943 # 3 -0.09209878 -0.8212004 1.997253 -0.01783824 0.6718465 0.6134260 -0.14377115 # 4 -0.25213869 -1.7830320 -1.970855 -2.68332505 -1.8475551 0.4382902 -0.09209878 # 5 -0.31623401 -4.2931567 -1.822211 -2.75551916 -0.7066491 -2.1982640 -0.25213869 # 6 -0.72691916 -1.3219387 -2.079333 -0.12148295 -1.1398220 -2.2230474 -0.31623401 # L1.STD.HDW.MAN L1.STD.HDW.WRT L1.STD.HDW.CON L1.STD.HDW.TRA L1.STD.HDW.FIRE # 1 NA NA NA NA NA # 2 2.2350584 1.946383 -0.03574399 1.0877811 1.0476507 # 3 1.8693570 1.905081 1.23225734 1.0542315 0.9105622 # 4 -0.8212004 1.997253 -0.01783824 0.6718465 0.6134260 # 5 -1.7830320 -1.970855 -2.68332505 -1.8475551 0.4382902 # 6 -4.2931567 -1.822211 -2.75551916 -0.7066491 -2.1982640 ``` Having prepared the data, the code below estimates the panel-VAR using `lm` and creates the *varest* object: ```r # saving the names of the 6 sectors nam <- names(data)[1:6] pVAR <- list(varresult = setNames(lapply(seq_len(6), function(i) # list of 6 lm's each regressing lm(as.formula(paste0(nam[i], "~ -1 + . ")), # the sector on all lags of get_vars(data, c(i, 7:fncol(data))))), nam), # itself and other sectors, removing the missing first row datamat = ss(data, -1), # The full data containing levels and lags of the sectors, removing the missing first row y = do.call(cbind, get_vars(data, 1:6)), # Only the levels data as matrix type = "none", # No constant or tend term: We harmonized the data already p = 1, # The lag-order K = 6, # The number of variables obs = fnrow(data)-1, # The number of non-missing obs totobs = fnrow(data), # The total number of obs restrictions = NULL, call = quote(VAR(y = data))) class(pVAR) <- "varest" ``` The significant serial-correlation test below suggests that the panel-VAR with one lag is ill-identified, but the sample size is also quite large so the test is prone to reject, and the test is likely also still picking up remaining cross-sectional heterogeneity. For the purposes of this vignette this shall not bother us. ```r serial.test(pVAR) # # Portmanteau Test (asymptotic) # # data: Residuals of VAR object pVAR # Chi-squared = 1680.8, df = 540, p-value < 2.2e-16 ``` By default the VAR is identified using a Choleski ordering of the direct impact matrix in which the first variable (here Agriculture) is assumed to not be directly impacted by any other sector in the current period, and this descends down to the last variable (Finance and Real Estate), which is assumed to be impacted by all other sectors in the current period. For structural identification it is usually necessary to impose restrictions on the direct impact matrix in line with economic theory. It is difficult to conceive theories on the average worldwide interaction of broad economic sectors, but to aid identification we will compute the correlation matrix in growth rates and restrict the lowest coefficients to be 0, which should be better than just imposing a random Choleski ordering. ```r # This computes the pairwise correlations between standardized sectoral growth rates across countries corr <- fsubset(GGDC10S, Variable == "VA") %>% # Subset rows: Only VA fgroup_by(Country) %>% # Group by country get_vars(sec) %>% # Select the 6 sectors fgrowth %>% # Compute Sectoral growth rates (a time-variable can be passed, but not necessary here as the data is ordered) fscale %>% # Scale and center (i.e. standardize) pwcor # Compute Pairwise correlations corr # AGR MAN WRT CON TRA FIRE # AGR 1 .55 .59 .39 .52 .41 # MAN .55 1 .67 .54 .65 .48 # WRT .59 .67 1 .56 .66 .52 # CON .39 .54 .56 1 .53 .46 # TRA .52 .65 .66 .53 1 .51 # FIRE .41 .48 .52 .46 .51 1 # We need to impose K*(K-1)/2 = 15 (with K = 6 variables) restrictions for identification corr[corr <= sort(corr)[15]] <- 0 corr # AGR MAN WRT CON TRA FIRE # AGR 1 .55 .59 .00 .00 .00 # MAN .55 1 .67 .54 .65 .00 # WRT .59 .67 1 .56 .66 .00 # CON .00 .54 .56 1 .00 .00 # TRA .00 .65 .66 .00 1 .00 # FIRE .00 .00 .00 .00 .00 1 # The rest is unknown (i.e. will be estimated) corr[corr > 0 & corr < 1] <- NA # Using a diagonal shock vcov matrix (standard assumption for SVAR) Bmat <- diag(6) diag(Bmat) <- NA # This estimates the Panel-SVAR using Maximum Likelihood: pSVAR <- SVAR(pVAR, Amat = unclass(corr), Bmat = Bmat, estmethod = "direct") pSVAR # # SVAR Estimation Results: # ======================== # # # Estimated A matrix: # STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # STD.HDW.AGR 1.0000 -0.59223 0.51301 0.0000 0.00000 0 # STD.HDW.MAN -0.2547 1.00000 -0.07819 -0.1711 0.14207 0 # STD.HDW.WRT -0.3924 -0.56875 1.00000 -0.0135 -0.01391 0 # STD.HDW.CON 0.0000 0.02595 -0.18541 1.0000 0.00000 0 # STD.HDW.TRA 0.0000 -0.03321 -0.05370 0.0000 1.00000 0 # STD.HDW.FIRE 0.0000 0.00000 0.00000 0.0000 0.00000 1 # # Estimated B matrix: # STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # STD.HDW.AGR 0.678 0.0000 0.0000 0.0000 0.0000 0.0000 # STD.HDW.MAN 0.000 0.6248 0.0000 0.0000 0.0000 0.0000 # STD.HDW.WRT 0.000 0.0000 0.4155 0.0000 0.0000 0.0000 # STD.HDW.CON 0.000 0.0000 0.0000 0.5028 0.0000 0.0000 # STD.HDW.TRA 0.000 0.0000 0.0000 0.0000 0.5593 0.0000 # STD.HDW.FIRE 0.000 0.0000 0.0000 0.0000 0.0000 0.6475 ``` Now this object is quite involved, which brings us to the actual subject of this section: ```r # psVAR$var$varresult is a list containing the 6 linear models fitted above, it is not displayed in full here. str(pSVAR, give.attr = FALSE, max.level = 3) # List of 13 # $ A : num [1:6, 1:6] 1 -0.255 -0.392 0 0 ... # $ Ase : num [1:6, 1:6] 0 0 0 0 0 0 0 0 0 0 ... # $ B : num [1:6, 1:6] 0.678 0 0 0 0 ... # $ Bse : num [1:6, 1:6] 0 0 0 0 0 0 0 0 0 0 ... # $ LRIM : NULL # $ Sigma.U: num [1:6, 1:6] 43.898 24.88 23.941 4.873 0.661 ... # $ LR :List of 5 # ..$ statistic: Named num 1130 # ..$ parameter: Named num 1 # ..$ p.value : Named num 0 # ..$ method : chr "LR overidentification" # ..$ data.name: symbol data # $ opt :List of 5 # ..$ par : num [1:20] -0.2547 -0.3924 -0.5922 -0.5688 0.0259 ... # ..$ value : num 10924 # ..$ counts : Named int [1:2] 501 NA # ..$ convergence: int 1 # ..$ message : NULL # $ start : num [1:20] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ... # $ type : chr "AB-model" # $ var :List of 10 # ..$ varresult :List of 6 # .. ..$ STD.HDW.AGR :List of 13 # .. ..$ STD.HDW.MAN :List of 13 # .. ..$ STD.HDW.WRT :List of 13 # .. ..$ STD.HDW.CON :List of 13 # .. ..$ STD.HDW.TRA :List of 13 # .. ..$ STD.HDW.FIRE:List of 13 # ..$ datamat :'data.frame': 2060 obs. of 12 variables: # .. ..$ STD.HDW.AGR : num [1:2060] -0.1438 -0.0921 -0.2521 -0.3162 -0.7269 ... # .. ..$ STD.HDW.MAN : num [1:2060] 1.869 -0.821 -1.783 -4.293 -1.322 ... # .. ..$ STD.HDW.WRT : num [1:2060] 1.91 2 -1.97 -1.82 -2.08 ... # .. ..$ STD.HDW.CON : num [1:2060] 1.2323 -0.0178 -2.6833 -2.7555 -0.1215 ... # .. ..$ STD.HDW.TRA : num [1:2060] 1.054 0.672 -1.848 -0.707 -1.14 ... # .. ..$ STD.HDW.FIRE : num [1:2060] 0.911 0.613 0.438 -2.198 -2.223 ... # .. ..$ L1.STD.HDW.AGR : num [1:2060] 0.6571 -0.1438 -0.0921 -0.2521 -0.3162 ... # .. ..$ L1.STD.HDW.MAN : num [1:2060] 2.235 1.869 -0.821 -1.783 -4.293 ... # .. ..$ L1.STD.HDW.WRT : num [1:2060] 1.95 1.91 2 -1.97 -1.82 ... # .. ..$ L1.STD.HDW.CON : num [1:2060] -0.0357 1.2323 -0.0178 -2.6833 -2.7555 ... # .. ..$ L1.STD.HDW.TRA : num [1:2060] 1.088 1.054 0.672 -1.848 -0.707 ... # .. ..$ L1.STD.HDW.FIRE: num [1:2060] 1.048 0.911 0.613 0.438 -2.198 ... # ..$ y : num [1:2061, 1:6] 0.6571 -0.1438 -0.0921 -0.2521 -0.3162 ... # ..$ type : chr "none" # ..$ p : num 1 # ..$ K : num 6 # ..$ obs : num 2060 # ..$ totobs : int 2061 # ..$ restrictions: NULL # ..$ call : language VAR(y = data) # $ iter : Named int 501 # $ call : language SVAR(x = pVAR, estmethod = "direct", Amat = unclass(corr), Bmat = Bmat) ``` ### 8.1 List Search and Identification When dealing with such a list-like object, we might be interested in its complexity by measuring the level of nesting. This can be done with `ldepth`: ```r # The list-tree of this object has 5 levels of nesting ldepth(pSVAR) # [1] 5 # This data has a depth of 1, thus this dataset does not contain list-columns ldepth(data) # [1] 1 ``` Further we might be interested in knowing whether this list-object contains non-atomic elements like call, terms or formulas. The function `is.regular` in the *collapse* package checks if an object is atomic or list-like, and the recursive version `is_unlistable` checks whether all objects in a nested structure are atomic or list-like: ```r # Is this object composed only of atomic elements e.g. can it be unlisted? is_unlistable(pSVAR) # [1] FALSE ``` Evidently this object is not unlistable, from viewing its structure we know that it contains several call and terms objects. We might also want to know if this object saves some kind of residuals or fitted values. This can be done using `has_elem`, which also supports regular expression search of element names: ```r # Does this object contain an element with "fitted" in its name? has_elem(pSVAR, "fitted", regex = TRUE) # [1] TRUE # Does this object contain an element with "residuals" in its name? has_elem(pSVAR, "residuals", regex = TRUE) # [1] TRUE ``` We might also want to know whether the object contains some kind of data-matrix. This can be checked by calling: ```r # Is there a matrix stored in this object? has_elem(pSVAR, is.matrix) # [1] TRUE ``` These functions can sometimes be helpful in exploring objects. A much greater advantage of having functions to search and check lists is the ability to write more complex programs with them (which will not be demonstrated here). ### 8.2 List Subsetting Having gathered some information about the `pSVAR` object, this section introduces several extractor functions to pull out elements from such lists: `get_elem` can be used to pull out elements from lists in a simplified format^[The *vars* package also provides convenient extractor functions for some quantities, but `get_elem` of course works in a much broader range of contexts.]. ```r # This is the path to the residuals from a single equation str(pSVAR$var$varresult$STD.HDW.AGR$residuals) # Named num [1:2060] -0.7234 -0.1962 -0.1993 0.0739 -0.1418 ... # - attr(*, "names")= chr [1:2060] "2" "3" "4" "5" ... # get_elem gets the residuals from all 6 equations and puts them in a top-level list resid <- get_elem(pSVAR, "residuals") str(resid, give.attr = FALSE) # List of 6 # $ STD.HDW.AGR : Named num [1:2060] -0.7234 -0.1962 -0.1993 0.0739 -0.1418 ... # $ STD.HDW.MAN : Named num [1:2060] 0.363 -1.989 -1.167 -3.082 1.474 ... # $ STD.HDW.WRT : Named num [1:2060] 0.37 0.628 -3.054 -0.406 -0.384 ... # $ STD.HDW.CON : Named num [1:2060] 1.035 -1.093 -2.62 -0.611 2.307 ... # $ STD.HDW.TRA : Named num [1:2060] 0.1481 -0.2599 -2.2361 0.8619 -0.0915 ... # $ STD.HDW.FIRE: Named num [1:2060] -0.11396 -0.33092 0.11754 -2.10521 -0.00968 ... # Quick conversion to matrix and plotting qM(resid) %>% plot.ts(main = "Panel-VAR Residuals") ```
plot of chunk PVARplot

plot of chunk PVARplot

Similarly, we could pull out and plot the fitted values: ```r # Regular expression search and retrieval of fitted values get_elem(pSVAR, "^fi", regex = TRUE) %>% qM %>% plot.ts(main = "Panel-VAR Fitted Values") ```
plot of chunk PVARfittedplot

plot of chunk PVARfittedplot

Below the main quantities of interest in SVAR analysis are computed: The impulse response functions (IRF's) and forecast error variance decompositions (FEVD's): ```r # This computes orthogonalized impulse response functions pIRF <- irf(pSVAR) # This computes the forecast error variance decompositions pFEVD <- fevd(pSVAR) ``` The `pIRF` object contains the IRF's with lower and upper confidence bounds and some atomic elements providing information about the object: ```r # See the structure of a vars IRF object: str(pIRF, give.attr = FALSE) # List of 11 # $ irf :List of 6 # ..$ STD.HDW.AGR : num [1:11, 1:6] 0.611 0.399 0.268 0.185 0.132 ... # ..$ STD.HDW.MAN : num [1:11, 1:6] 0.1774 0.1549 0.134 0.1142 0.0959 ... # ..$ STD.HDW.WRT : num [1:11, 1:6] -0.1807 -0.1071 -0.0647 -0.0402 -0.0259 ... # ..$ STD.HDW.CON : num [1:11, 1:6] 0.0215 0.0383 0.0442 0.0438 0.0403 ... # ..$ STD.HDW.TRA : num [1:11, 1:6] -0.02595 -0.01257 -0.00721 -0.00511 -0.00421 ... # ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 0.0122 0.0147 0.0132 0.0104 ... # $ Lower :List of 6 # ..$ STD.HDW.AGR : num [1:11, 1:6] 0.1137 -0.0144 -0.0393 -0.0446 -0.0439 ... # ..$ STD.HDW.MAN : num [1:11, 1:6] -0.6474 -0.3434 -0.2069 -0.125 -0.0734 ... # ..$ STD.HDW.WRT : num [1:11, 1:6] -0.659 -0.427 -0.311 -0.236 -0.189 ... # ..$ STD.HDW.CON : num [1:11, 1:6] -0.721 -0.417 -0.258 -0.183 -0.123 ... # ..$ STD.HDW.TRA : num [1:11, 1:6] -0.4161 -0.2568 -0.169 -0.1231 -0.0894 ... # ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 -0.0157 -0.022 -0.0227 -0.0211 ... # $ Upper :List of 6 # ..$ STD.HDW.AGR : num [1:11, 1:6] 1.218 0.801 0.565 0.389 0.275 ... # ..$ STD.HDW.MAN : num [1:11, 1:6] 0.906 0.601 0.439 0.328 0.239 ... # ..$ STD.HDW.WRT : num [1:11, 1:6] 0.846 0.601 0.428 0.319 0.239 ... # ..$ STD.HDW.CON : num [1:11, 1:6] 0.716 0.514 0.4 0.305 0.234 ... # ..$ STD.HDW.TRA : num [1:11, 1:6] 0.2866 0.21 0.1591 0.1207 0.0899 ... # ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 0.0363 0.0471 0.0461 0.0405 ... # $ response : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ impulse : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ ortho : logi TRUE # $ cumulative: logi FALSE # $ runs : num 100 # $ ci : num 0.05 # $ boot : logi TRUE # $ model : chr "svarest" ``` We could separately access the top-level atomic or list elements using `atomic_elem` or `list_elem`: ```r # Pool-out top-level atomic elements in the list str(atomic_elem(pIRF)) # List of 8 # $ response : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ impulse : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ... # $ ortho : logi TRUE # $ cumulative: logi FALSE # $ runs : num 100 # $ ci : num 0.05 # $ boot : logi TRUE # $ model : chr "svarest" ``` There are also recursive versions of `atomic_elem` and `list_elem` named `reg_elem` and `irreg_elem` which can be used to split nested lists into the atomic and non-atomic parts. These are not covered in this vignette. ### 8.3 Recursive Apply and Unlisting in 2D *vars* supplies simple `plot` methods for IRF and FEVD objects using base graphics. In this section we however want to generate nicer and more compact plots using `ggplot2`, and also compute some statistics on the IRF data. Starting with the latter, the code below sums the 10-period impulse response coefficients of each sector in response to each sectoral impulse and stores them in a data frame: ```r # Computing the cumulative impact after 10 periods list_elem(pIRF) %>% # Pull out the sublist elements containing the IRF coefficients + CI's rapply2d(function(x) round(fsum(x), 2)) %>% # Recursively apply the column-sums to coefficient matrices (could also use colSums) unlist2d(c("Type", "Impulse")) # Recursively row-bind the result to a data.frame and add identifier columns # Type Impulse STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # 1 irf STD.HDW.AGR 1.92 1.08 1.68 0.83 0.72 0.54 # 2 irf STD.HDW.MAN 0.98 2.22 2.12 1.09 0.97 1.05 # 3 irf STD.HDW.WRT -0.47 -0.27 0.65 0.17 0.03 -0.02 # 4 irf STD.HDW.CON 0.33 0.39 0.34 2.00 0.55 0.38 # 5 irf STD.HDW.TRA -0.07 -0.11 -0.24 -0.30 1.31 -0.20 # 6 irf STD.HDW.FIRE 0.07 -0.07 0.02 -0.09 -0.06 1.84 # 7 Lower STD.HDW.AGR -0.18 -2.08 -3.14 -0.68 -2.46 -0.68 # 8 Lower STD.HDW.MAN -1.52 0.38 -1.30 -0.86 -1.82 0.12 # 9 Lower STD.HDW.WRT -2.38 -2.65 -0.22 -2.68 -2.01 -1.20 # 10 Lower STD.HDW.CON -2.01 -2.47 -2.16 0.53 -1.68 -0.80 # 11 Lower STD.HDW.TRA -1.32 -1.34 -1.17 -1.64 0.31 -0.69 # 12 Lower STD.HDW.FIRE -0.16 -0.26 -0.16 -0.27 -0.20 0.96 # 13 Upper STD.HDW.AGR 3.97 3.18 3.21 3.69 2.61 1.58 # 14 Upper STD.HDW.MAN 3.19 3.85 3.00 3.60 3.05 1.78 # 15 Upper STD.HDW.WRT 3.06 2.66 4.41 2.49 3.31 1.47 # 16 Upper STD.HDW.CON 2.85 3.30 3.20 3.88 2.59 1.76 # 17 Upper STD.HDW.TRA 1.08 1.93 1.76 0.72 2.82 0.63 # 18 Upper STD.HDW.FIRE 0.30 0.15 0.30 0.12 0.18 2.21 ``` The function `rapply2d` used here is very similar to `base::rapply`, with the difference that the result is not simplified / unlisted by default and that `rapply2d` will treat data frames like atomic objects and apply functions to them. `unlist2d` is an efficient generalization of `base::unlist` to 2-dimensions, or one could also think of it as a recursive generalization of `do.call(rbind, ...)`. It efficiently unlists nested lists of data objects and creates a data frame with identifier columns for each level of nesting on the left, and the content of the list in columns on the right. The above cumulative coefficients suggest that Agriculture responds mostly to it's own shock, and a bit to shocks in Manufacturing and Wholesale and Retail Trade. Similar patters can be observed for Manufacturing and Wholesale and Retail Trade. Thus these three sectors seem to be interlinked in most countries. The remaining three sectors are mostly affected by their own dynamics, but also by Agriculture and Manufacturing. Let us use `ggplot2` to create nice compact plots of the IRF's and FEVD's. For this task `unlist2d` will again be extremely helpful in creating the data frame representation required. Starting with the IRF's, we will discard the upper and lower bounds and just use the impulses: ```r # This binds the matrices after adding integer row-names to them to a data.table data <- pIRF$irf %>% # Get only the coefficient matrices, discard the confidence bounds unlist2d(idcols = "Impulse", # Recursive unlisting to data.table creating a factor id-column row.names = "Time", # and saving generated rownames in a variable called 'Time' id.factor = TRUE, # -> Create Id column ('Impulse') as factor DT = TRUE) # -> Output as data.table (default is data.frame) head(data, 3) # Impulse Time STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE # # 1: STD.HDW.AGR 1 0.6113132 0.1896711 0.3488940 0.05976606 0.02503336 0.00000000 # 2: STD.HDW.AGR 2 0.3986337 0.1892803 0.3014961 0.09430567 0.07263670 0.03669857 # 3: STD.HDW.AGR 3 0.2676944 0.1654161 0.2491999 0.10769335 0.09330830 0.06042380 data <- melt(data, 1:2) # Using data.table's melt head(data, 3) # Impulse Time variable value # # 1: STD.HDW.AGR 1 STD.HDW.AGR 0.6113132 # 2: STD.HDW.AGR 2 STD.HDW.AGR 0.3986337 # 3: STD.HDW.AGR 3 STD.HDW.AGR 0.2676944 # Here comes the plot: ggplot(data, aes(x = Time, y = value, color = Impulse)) + geom_line(size = I(1)) + geom_hline(yintercept = 0) + labs(y = NULL, title = "Orthogonal Impulse Response Functions") + scale_color_manual(values = rainbow(6)) + facet_wrap(~ variable) + theme_light(base_size = 14) + scale_x_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ scale_y_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ theme(axis.text = element_text(colour = "black"), plot.title = element_text(hjust = 0.5), strip.background = element_rect(fill = "white", colour = NA), strip.text = element_text(face = "bold", colour = "grey30"), axis.ticks = element_line(colour = "black"), panel.border = element_rect(colour = "black")) ```
plot of chunk IRFplot

plot of chunk IRFplot

To round things off, below we do the same thing for the FEVD's: ```r data <- unlist2d(pFEVD, idcols = "variable", row.names = "Time", id.factor = TRUE, DT = TRUE) %>% melt(c("variable", "Time"), variable.name = "Sector") head(data, 3) # variable Time Sector value # # 1: STD.HDW.AGR 1 STD.HDW.AGR 0.8513029 # 2: STD.HDW.AGR 2 STD.HDW.AGR 0.8385913 # 3: STD.HDW.AGR 3 STD.HDW.AGR 0.8264789 # Here comes the plot: ggplot(data, aes(x = Time, y = value, fill = Sector)) + geom_area(position = "fill", alpha = 0.8) + labs(y = NULL, title = "Forecast Error Variance Decompositions") + scale_fill_manual(values = rainbow(6)) + facet_wrap(~ set_class(variable, "factor")) + theme_linedraw(base_size = 14) + scale_x_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ scale_y_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+ theme(plot.title = element_text(hjust = 0.5), strip.background = element_rect(fill = "white", colour = NA), strip.text = element_text(face = "bold", colour = "grey30")) ```
plot of chunk FEVDplot

plot of chunk FEVDplot

Both the IRF's and the FEVD's show that Agriculture, Manufacturing and Wholesale and Retail Trade are broadly interlinked, even in the short-run, and that Agriculture and Manufacturing explain some of the variation in Construction, Transport and Finance at longer horizons. Of course the identification strategy used for this example was not really structural or theory based. A better strategy could be to aggregate the World Input-Output Database and use those shares for identification (which would be another very nice *collapse* exercise, but not for this vignette). ## Going Further To learn more about *collapse*, just examine the documentation `help("collapse-documentation")` which is organized, extensive and contains lots of examples. ## References Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), *Routledge Handbook of Industry and Development.* (pp. 65-83). Routledge. Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†*Econometrica* 46 (1): 69–85. collapse/inst/doc/collapse_and_plm.Rmd0000644000176200001440000031147415121640575017556 0ustar liggesusers--- title: "collapse and plm" subtitle: "Fast Transformation and Exploration of Panel Data" # utilizing *plm* classes" Advanced and fast author: "Sebastian Krantz" date: "2021-06-27" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and plm} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This vignette focuses on the integration of *collapse* and the popular *plm* ('Linear Models for Panel Data') package by Yves Croissant, Giovanni Millo and Kevin Tappe. It will demonstrate the utility of the *pseries* and *pdata.frame* classes introduced in *plm* together with the corresponding methods for fast *collapse* functions (implemented in C or C++), to extend and facilitate extremely fast computations on panel-vectors and panel data frames (20-100 times faster than native *plm*). The *collapse* package should enable R programmers to - with very little effort - write high-performance code in the domain of panel data exploration and panel data econometrics. *** **Notes:** - To learn more about *collapse*, see the 'Introduction to *collapse*' vignette or the built-in structured documentation available under `help("collapse-documentation")` after installing the package. In addition `help("collapse-package")` provides a compact set of examples for quick-start. - Documentation and vignettes can also be viewed [online](). *** The vignette is structured as follows: * **Part 1** introduces *collapse*'s fast functions and associated *transformation operators* to compute various transformations on panel data, and delivers some benchmarks. * **Part 2** uses these functions to explore panel data a bit and introduce additional functions for summary statistics, panel-autocorrelations and testing fixed effects. * **Part 3** finally provides an example programming application by coding a slightly extended and very efficient Hausman and Taylor (1981) estimator. For this vignette we will use a dataset (`wlddev`) supplied with *collapse* containing a panel of 5 key development indicators taken from the World Bank Development Indicators Database: ```r library(collapse) head(wlddev) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 1 Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA 116769997 # 2 Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA 232080002 # 3 Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA 112839996 # 4 Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA 237720001 # 5 Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA 295920013 # 6 Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA 341839996 # POP # 1 8996973 # 2 9169410 # 3 9351441 # 4 9543205 # 5 9744781 # 6 9956320 fnobs(wlddev) # This column-wise counts the number of observations # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 13176 13176 13176 13176 13176 13176 13176 13176 9470 11670 1744 8608 # POP # 12919 fndistinct(wlddev) # This counts the number of distinct values # country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA # 216 216 61 61 7 7 4 2 9470 10548 368 7832 # POP # 12877 ``` ## Part 1: Fast Transformation of Panel Data First let us convert this data to a *plm* panel data.frame (class *pdata.frame*): ```r library(plm) # This creates a panel data frame pwlddev <- pdata.frame(wlddev, index = c("iso3c", "year")) str(pwlddev, give.attr = FALSE) # Classes 'pdata.frame' and 'data.frame': 13176 obs. of 13 variables: # $ country: 'pseries' Named chr "Aruba" "Aruba" "Aruba" "Aruba" ... # $ iso3c : Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ... # $ date : pseries, format: "1961-01-01" "1962-01-01" "1963-01-01" ... # $ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... # $ decade : 'pseries' Named int 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ... # $ region : Factor w/ 7 levels "East Asia & Pacific",..: 3 3 3 3 3 3 3 3 3 3 ... # $ income : Factor w/ 4 levels "High income",..: 1 1 1 1 1 1 1 1 1 1 ... # $ OECD : 'pseries' Named logi FALSE FALSE FALSE FALSE FALSE FALSE ... # $ PCGDP : 'pseries' Named num NA NA NA NA NA NA NA NA NA NA ... # $ LIFEEX : 'pseries' Named num 65.7 66.1 66.4 66.8 67.1 ... # $ GINI : 'pseries' Named num NA NA NA NA NA NA NA NA NA NA ... # $ ODA : 'pseries' Named num NA NA NA NA NA NA NA NA NA NA ... # $ POP : 'pseries' Named num 54211 55438 56225 56695 57032 ... # A pdata.frame has an index attribute attached [retrieved using index(pwlddev) or attr(pwlddev, "index")] str(index(pwlddev)) # Classes 'pindex' and 'data.frame': 13176 obs. of 2 variables: # $ iso3c: Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ... # $ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... # This shows the individual and time dimensions pdim(pwlddev) # Balanced Panel: n = 216, T = 61, N = 13176 ``` A `plm::pdata.frame` is a data.frame with panel identifiers attached as a list of factors in an *index* attribute (non-factor index variables are converted to factor). Each column in that data.frame is a Panel Series (`plm::pseries`), which also has the panel identifiers attached: ```r # Panel Series of GDP per Capita and Life-Expectancy at Birth PCGDP <- pwlddev$PCGDP LIFEEX <- pwlddev$LIFEEX str(LIFEEX) # 'pseries' Named num [1:13176] 65.7 66.1 66.4 66.8 67.1 ... # - attr(*, "names")= chr [1:13176] "ABW-1960" "ABW-1961" "ABW-1962" "ABW-1963" ... # - attr(*, "index")=Classes 'pindex' and 'data.frame': 13176 obs. of 2 variables: # ..$ iso3c: Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ... # ..$ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... ``` Now that we have explored the basic data structures provided in the *plm* package, let's compute some transformations on them: ### 1.1 Between and Within Transformations The functions `fbetween` and `fbetween` can be used to compute efficient between and within transformations on panel vectors and panel data.frames: ```r # Between-Transformations head(fbetween(LIFEEX)) # Between individual (default) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 72.40653 72.40653 72.40653 72.40653 72.40653 72.40653 head(fbetween(LIFEEX, effect = "year")) # Between time # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 53.91206 54.47441 54.85718 55.20272 55.66802 56.12963 # Within-Transformations head(fwithin(LIFEEX)) # Within individuals (default) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # -6.744533 -6.332533 -5.962533 -5.619533 -5.293533 -4.971533 head(fwithin(LIFEEX, effect = "year")) # Within time # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 11.74994 11.59959 11.58682 11.58428 11.44498 11.30537 ``` by default `na.rm = TRUE` thus both functions skip (preserve) missing values in the data (which is the default for all *collapse* functions). For `fbetween` the output behavior can be altered with the option `fill`: Setting `fill = TRUE` will compute the group-means on the complete cases in each group (as long as `na.rm = TRUE`), but replace all values in each group with the group mean (hence overwriting or 'filling up' missing values): ```r # This preserves missing values in the output head(fbetween(PCGDP), 30) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 # NA NA NA NA 25413.84 25413.84 25413.84 25413.84 # This replaces all individuals with the group mean head(fbetween(PCGDP, fill = TRUE), 30) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 # 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 # ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 # 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 # ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 # 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 ``` In `fwithin` the `mean` argument allows to set an arbitrary data mean (different from 0) after the data is centered. In grouped centering task, as sensible choice for such an added mean would be the overall mean of the data series, enabled by the option `mean = "overall.mean"`. This will add the overall mean of the series back to the data after subtracting out group means, and thus preserve the level of the data (and will only change the intercept when employed in a regression): ```r # This performed standard grouped centering head(fwithin(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # -6.744533 -6.332533 -5.962533 -5.619533 -5.293533 -4.971533 # This adds the overall average Life-Expectancy (across countries) to the country-demeaned series head(fwithin(LIFEEX, mean = "overall.mean")) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 57.55177 57.96377 58.33377 58.67677 59.00277 59.32477 ``` `fbetween` and `fwithin` can also be applied to *pdata.frame*'s where they will perform these computations variable by variable: ```r head(fbetween(num_vars(pwlddev)), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1961 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1962 1985.574 NA 72.40653 NA NA 76268.63 head(fbetween(num_vars(pwlddev), fill = TRUE), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 1985.574 25413.84 72.40653 NA 33245000 76268.63 # ABW-1961 1985.574 25413.84 72.40653 NA 33245000 76268.63 # ABW-1962 1985.574 25413.84 72.40653 NA 33245000 76268.63 head(fwithin(num_vars(pwlddev)), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 -25.57377 NA -6.744533 NA NA -22057.63 # ABW-1961 -25.57377 NA -6.332533 NA NA -20830.63 # ABW-1962 -25.57377 NA -5.962533 NA NA -20043.63 head(fwithin(num_vars(pwlddev), mean = "overall.mean"), 3) # decade PCGDP LIFEEX GINI ODA POP # ABW-1960 1960 NA 57.55177 NA NA 24223914 # ABW-1961 1960 NA 57.96377 NA NA 24225141 # ABW-1962 1960 NA 58.33377 NA NA 24225928 ``` Now next to `fbetween` and `fwithin` there also exist short versions `B` and `W`, which are referred to as *transformation operators*. These are essentially wrappers around `fbetween` and `fwithin` and provide the same functionality, but are more parsimonious to employ in regression formulas and also offer additional features when applied to panel data.frames. For panel series, `B` and `W` are exact analogues to `fbetween` and `fwithin`, just under a shorter name: ```r identical(fbetween(PCGDP), B(PCGDP)) # [1] TRUE identical(fbetween(PCGDP, fill = TRUE), B(PCGDP, fill = TRUE)) # [1] TRUE identical(fwithin(PCGDP), W(PCGDP)) # [1] TRUE identical(fwithin(PCGDP, mean = "overall.mean"), W(PCGDP, mean = "overall.mean")) # [1] TRUE ``` When applied to panel data.frames, `B` and `W` offer some additional utility by (a) allowing you to select columns to transform using the `cols` argument (default is `cols = is.numeric`, so by default all numeric columns will be selected for transformation), (b) allowing you to add a prefix to the transformed columns with the `stub` argument (default is `stub = "B."` for `B` and `stub = "W."` for `W`) and (c) preserving the panel-id's with the `keep.ids` argument (default `keep.ids = TRUE`): ```r head(B(pwlddev), 3) # iso3c year B.decade B.PCGDP B.LIFEEX B.GINI B.ODA B.POP # ABW-1960 ABW 1960 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1961 ABW 1961 1985.574 NA 72.40653 NA NA 76268.63 # ABW-1962 ABW 1962 1985.574 NA 72.40653 NA NA 76268.63 head(W(pwlddev, cols = 9:12), 3) # Here using the cols argument # iso3c year W.PCGDP W.LIFEEX W.GINI W.ODA # ABW-1960 ABW 1960 NA -6.744533 NA NA # ABW-1961 ABW 1961 NA -6.332533 NA NA # ABW-1962 ABW 1962 NA -5.962533 NA NA ``` `fbetween` / `B` and `fwithin` / `W` also support weighted computations. This of course applies more to panel-survey settings, but for the sake of illustration suppose we wanted to weight our between and within transformations by the population of these countries: ```r # This replaces values by the POP-weighted group mean and also preserves the weight variable (POP, argument keep.w = TRUE) head(B(pwlddev, w = ~ POP), 3) # iso3c year POP B.decade B.PCGDP B.LIFEEX B.GINI B.ODA # ABW-1960 ABW 1960 54211 1988.976 NA 72.96257 NA NA # ABW-1961 ABW 1961 55438 1988.976 NA 72.96257 NA NA # ABW-1962 ABW 1962 56225 1988.976 NA 72.96257 NA NA # This centers values on the POP-weighted group mean head(W(pwlddev, w = ~ POP, cols = c("PCGDP","LIFEEX","GINI")), 3) # iso3c year POP W.PCGDP W.LIFEEX W.GINI # ABW-1960 ABW 1960 54211 NA -7.300566 NA # ABW-1961 ABW 1961 55438 NA -6.888566 NA # ABW-1962 ABW 1962 56225 NA -6.518566 NA # This centers values on the POP-weighted group mean and also adds the overall POP-weighted mean of the data head(W(pwlddev, w = ~ POP, cols = c("PCGDP","LIFEEX","GINI"), mean = "overall.mean"), 3) # iso3c year POP W.PCGDP W.LIFEEX W.GINI # ABW-1960 ABW 1960 54211 NA 58.58012 NA # ABW-1961 ABW 1961 55438 NA 58.99212 NA # ABW-1962 ABW 1962 56225 NA 59.36212 NA ``` As shown above, with `B` and `W` the weight column can also be passed as a formula or character string, whereas `fbetween` and `fwithin` require the all inputs to be passed directly in terms of data (i.e. `fbetween(get_vars(pwlddev, 9:11), w = pwlddev$POP)`), and the weight vector or id columns are never preserved in the output. Therefore in most applications `B` and `W` are probably more convenient for quick use, whereas `fbetween` and `fwithin` are the preferred programmers choice, also because they have a little less R-overhead which makes them a tiny bit faster. ### 1.2 Higher-Dimensional Between and Within Transformations Analogous to `fbetween` / `B` and `fwithin` / `W`, *collapse* provides a duo of functions and operators `fhdbetween` / `HDB` and `fhdwithin` / `HDW` to efficiently average and center data on multiple groups. The credit herefore goes to Laurent Berge, the author of the *fixest* package who wrote an efficient C-implementation of the alternating-projections algorithm to perform this task. `fhdbetween` / `HDB` and `fhdwithin` / `HDW` enrich this implementation (available in the function `fixest::demean`) by providing more options regarding missing values, and also allowing continuous covariates and (full) interactions to be projected out alongside factors. The methods for *pseries* and *pdata.frame*'s are however rather simple, as they simply simultaneously center panel-vectors on various panel-identifiers in the index (which can be more than 2, the default is to center on all identifiers): ```r # This simultaneously averages Life-Expectancy across countries and years head(HDB(LIFEEX)) # (same as running a regression on country and year dummies and taking the fitted values) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 62.36179 62.85981 63.24258 63.65245 64.11774 64.52503 # This simultaneously centers Life-Expectenacy on countries and years head(HDW(LIFEEX)) # (same as running a regression on country and year dummies and taking the residuals) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 3.300210 3.214193 3.201424 3.134554 2.995255 2.909975 ``` The architecture of `fhdbetween` / `HDB` and `fhdwithin` / `HDW` differs a bit from `fbetween` / `B` and `fwithin` / `W`. This is essentially a consequence of the underlying C++-implementation (accessed through `fixest::demean`), which was not built to accommodate missing values. `fhdbetween` / `HDB` and `fhdwithin` / `HDW` therefore both have an argument `fill = TRUE` (the default), which stipulates that missing values in the data are preserved in the output. The *collapse* default `na.rm = TRUE` again ensures that only complete cases are used for the computation: ```r # Missing values are preserved in the output when fill = TRUE (the default) head(HDB(PCGDP), 30) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 # NA NA NA NA NA NA NA NA NA NA NA # ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 # NA NA NA NA 21833.32 22132.25 22479.20 22772.31 # When fill = FALSE, only the complete cases are returned nofill <- HDB(PCGDP, fill = FALSE) head(nofill, 30) # ABW-1986 ABW-1987 ABW-1988 ABW-1989 ABW-1990 ABW-1991 ABW-1992 ABW-1993 ABW-1994 ABW-1995 ABW-1996 # 21833.32 22132.25 22479.20 22772.31 23064.29 23060.00 23089.75 23115.36 23343.25 23595.16 23823.11 # ABW-1997 ABW-1998 ABW-1999 ABW-2000 ABW-2001 ABW-2002 ABW-2003 ABW-2004 ABW-2005 ABW-2006 ABW-2007 # 24149.44 24424.69 24727.46 25205.98 25399.16 25603.11 25851.29 26349.64 26665.54 27224.58 27772.82 # ABW-2008 ABW-2009 ABW-2010 ABW-2011 ABW-2012 ABW-2013 ABW-2014 ABW-2015 # 27769.52 27002.95 27218.84 27424.18 27471.49 27660.92 27889.34 28107.78 # This results in a shorter panel-vector length(nofill) # [1] 9470 length(PCGDP) # [1] 13176 # The cases that were missing and removed from the output are available as an attribute head(attr(nofill, "na.rm"), 30) # [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 59 60 61 62 ``` In the *pdata.frame* methods there are 3 different choices how to deal with missing values. The default for the *plm* classes in `variable.wise = TRUE`, which will essentially sequentially apply `fhdbetween.pseries` and `fhdwithin.pseries` (with the default `fill = TRUE`) to all columns. This is the same behavior as in `fbetween` / `B` and `fwithin` / `W`, which also consider the column-wise complete obs: ```r # This column-wise centers the data on countries and years tail(HDW(pwlddev), 10) # HDW.decade HDW.PCGDP HDW.LIFEEX HDW.GINI HDW.ODA HDW.POP # ZWE-2011 0 -4632.971 -8.080748 -3.663217 118306300 -4547122 # ZWE-2012 0 -4523.505 -6.271385 NA 385526419 -4749368 # ZWE-2013 0 -4710.576 -4.753056 NA 149910333 -4903132 # ZWE-2014 0 -4931.693 -3.568136 NA 93295114 -5059317 # ZWE-2015 0 -5148.895 -2.685053 NA 150833589 -5224484 # ZWE-2016 0 -5433.809 -2.203219 NA -27844184 -5404667 # ZWE-2017 0 -5645.022 -1.920365 -1.964138 10266318 -5591762 # ZWE-2018 0 -5938.794 -1.759333 NA 59646823 -5774326 # ZWE-2019 0 -5710.646 -1.669415 5.627356 223473855 -5946725 # ZWE-2020 0 NA NA NA NA NA ``` If `variable.wise = FALSE`, `fhdbetween` / `HDB` and `fhdwithin` / `HDW` will only consider the complete cases in the dataset, but still return a dataset of the same dimensions (as long as `fill = TRUE`), resulting in some rows all-missing: ```r # This centers the complete cases of the data data on countries and years and keeps missing cases tail(HDW(pwlddev, variable.wise = FALSE), 10) # HDW.decade HDW.PCGDP HDW.LIFEEX HDW.GINI HDW.ODA HDW.POP # ZWE-2011 0 517.6924 -4.379840 -3.839653 -176176494 -3042247 # ZWE-2012 NA NA NA NA NA NA # ZWE-2013 NA NA NA NA NA NA # ZWE-2014 NA NA NA NA NA NA # ZWE-2015 NA NA NA NA NA NA # ZWE-2016 NA NA NA NA NA NA # ZWE-2017 0 -128.5240 1.971143 -1.314869 -67497466 1936716 # ZWE-2018 NA NA NA NA NA NA # ZWE-2019 0 -389.1684 2.408697 5.154522 243673961 1105530 # ZWE-2020 NA NA NA NA NA NA ``` Finally, if also `fill = FALSE`, the behavior is the same as in the *pseries* method: Missing cases are removed from the data: ```r # This centers the complete cases of the data data on countries and years, and removes missing cases res <- HDW(pwlddev, fill = FALSE) tail(res, 10) # HDW.decade HDW.PCGDP HDW.LIFEEX HDW.GINI HDW.ODA HDW.POP # ZMB-1996 0 534.39373 -3.6445256 -4.744748 -174237036 4911230.7 # ZMB-1998 0 201.58094 -4.1708951 -5.085621 -492258601 644947.7 # ZMB-2002 0 250.78234 -2.9085522 -10.912265 81848768 -1027712.3 # ZMB-2004 0 -72.94954 -1.9629513 1.494340 396830282 -3774596.6 # ZMB-2006 0 -308.55937 -0.4975872 2.407226 485998870 -2255101.6 # ZMB-2010 0 -428.16949 3.9600416 4.497547 -148714637 -4174306.2 # ZMB-2015 0 -1106.52213 8.4099983 7.553052 -335529320 -4962997.8 # ZWE-2011 0 517.69244 -4.3798401 -3.839653 -176176494 -3042246.9 # ZWE-2017 0 -128.52402 1.9711431 -1.314869 -67497466 1936716.5 # ZWE-2019 0 -389.16842 2.4086971 5.154522 243673961 1105530.5 tail(attr(res, "na.rm")) # [1] 13169 13170 13171 13172 13174 13176 ``` *Notes: * (1) Because of the different missing case options and associated challenges, panel-identifiers are not preserved in `HDB` and `HDW`. (2) The default `variable.wise = TRUE` and `fill = TRUE` was only set for the *pseries* and *pdata.frame* methods, to harmonize the default implementations with `fbetween` / `B` and `fwithin` / `W` for these classes. In the standard *default*, *matrix* and *data.frame* methods, the defaults are `variable.wise = FALSE` and `fill = FALSE` (i.e. missing cases are removed beforehand), which is generally more efficient. ### 1.3 Scaling and Centering Next to the above functions for grouped centering and averaging, the function / operator pair `fscale` / `STD` can be used to efficiently standardize (i.e. scale and center) panel data along an arbitrary dimension. The architecture is identical to that of `fwithin` / `W` or `fbetween` / `B`. ```r # This standardizes GDP per capita in each country STD_PCGDP <- STD(PCGDP) # Checks: head(fmean(STD_PCGDP, index(STD_PCGDP, 1))) # ABW AFG AGO ALB AND ARE # -1.422473e-15 2.528841e-16 -6.189493e-16 -2.275957e-16 -9.281464e-16 -6.661338e-17 head(fsd(STD_PCGDP, index(STD_PCGDP, 1))) # ABW AFG AGO ALB AND ARE # 1 1 1 1 1 1 # This standardizes GDP per capita in each year STD_PCGDP_T <- STD(PCGDP, effect = "year") # Checks: head(fmean(STD_PCGDP_T, index(STD_PCGDP_T, 2))) # 1960 1961 1962 1963 1964 1965 # 9.882205e-17 3.496021e-16 1.889741e-17 -2.185013e-16 -1.724389e-16 2.616954e-16 head(fsd(STD_PCGDP_T, index(STD_PCGDP_T, 2))) # 1960 1961 1962 1963 1964 1965 # 1 1 1 1 1 1 ``` And similarly for *pdata.frame*'s: ```r head(STD(pwlddev, cols = 9:12)) # iso3c year STD.PCGDP STD.LIFEEX STD.GINI STD.ODA # ABW-1960 ABW 1960 NA -2.372636 NA NA # ABW-1961 ABW 1961 NA -2.227700 NA NA # ABW-1962 ABW 1962 NA -2.097539 NA NA # ABW-1963 ABW 1963 NA -1.976876 NA NA # ABW-1964 ABW 1964 NA -1.862193 NA NA # ABW-1965 ABW 1965 NA -1.748918 NA NA head(STD(pwlddev, cols = 9:12, effect = "year")) # iso3c year STD.PCGDP STD.LIFEEX STD.GINI STD.ODA # ABW-1960 ABW 1960 NA 0.9609854 NA NA # ABW-1961 ABW 1961 NA 0.9485730 NA NA # ABW-1962 ABW 1962 NA 0.9585105 NA NA # ABW-1963 ABW 1963 NA 0.9669638 NA NA # ABW-1964 ABW 1964 NA 0.9579477 NA NA # ABW-1965 ABW 1965 NA 0.9556529 NA NA ``` More customized scaling can be done with the help of the `mean` and `sd` arguments to `fscale` / `STD`. By default `mean = 0` and `sd = 1`, but these could be assigned any numeric values: ```r # This will scale the data such that mean mean within each country is 5 and the standard deviation is 3 qsu(fscale(pwlddev$PCGDP, mean = 5, sd = 3)) # N/T Mean SD Min Max # Overall 9466 5 2.968 -6.1908 16.257 # Between 202 5 0 5 5 # Within 46.8614 5 2.968 -6.1908 16.257 ``` Even further customization (i.e. setting means and standard deviations for each group and / or each column) can of course be achieved by calling `collapse::TRA` on the result of `fscale` to sweep out an appropriate set of means and standard deviations. Scaling without centering can be done with the option `mean = FALSE`. This will also preserve the mean of the data overall and within each group: ```r # Scaling without centering: Mean preserving with fscale / STD qsu(fscale(pwlddev$PCGDP, mean = FALSE, sd = 3)) # N/T Mean SD Min Max # Overall 9466 12031.4627 17803.3537 247.7598 131349.27 # Between 202 12169.2793 18055.6626 253.1886 131342.669 # Within 46.8614 12031.4627 2.968 12020.2718 12042.7196 # Scaling without centering can also be done using fsd, but this does not preserve the mean qsu(fsd(pwlddev$PCGDP, index(pwlddev, 1), TRA = "/")) # N/T Mean SD Min Max # Overall 9466 4.247 3.192 0.0579 26.647 # Between 202 4.6036 3.5846 0.8207 24.8111 # Within 46.8614 4.247 0.9893 0.5167 7.9993 ``` Finally a special kind of data harmonization in the first two moments can be done by setting `mean = "overall.mean"` and `sd = "within.sd"` in a grouped scaling task. This will harmonize the data across groups such that the mean of each group is equal to the overall data mean and the standard deviation equal to the within standard deviation (= the standard deviation calculated on the group-centered series): ```r fmean(pwlddev$PCGDP) # Overall mean # [1] 12048.78 fsd(W(pwlddev$PCGDP)) # Within sd # [1] 6723.681 # Scaling and centerin such that the mean of each country is the overall mean, and the sd of each country is the within sd qsu(fscale(pwlddev$PCGDP, mean = "overall.mean", sd = "within.sd")) # N/T Mean SD Min Max # Overall 9466 12048.778 6651.9052 -13032.4333 37278.2175 # Between 202 12048.778 0 12048.778 12048.778 # Within 46.8614 12048.778 6651.9052 -13032.4333 37278.2175 ``` All of this seamlessly generalizes to weighted scaling an centering, using the `w` argument to add a weight vector. ### 1.4 Panel Lags / Leads, Differences and Growth Rates With `flag` / `L` / `F`, `fdiff` / `D` and `fgrowth` / `G`, *collapse* provides a fast and comprehensive C++ based solution to the computation of (sequences of) lags / leads and (sequences of) lagged / leaded and suitably iterated (quasi-, log-) differences and growth rates on panel data. The *pseries* and *pdata.frame* methods to these functions and associated *transformation operators* use the panel-identifiers in the 'index' attached to these objects (where the last variable in the 'index' is taken as the time-variable and the variables before that are taken as individual identifiers) to perform fast fully-identified time-dependent operations on panel data, without the need of sorting the data. With `flag` / `L` / `F`, it is easy to lag or lead *pseries*: ```r # A panel-lag head(flag(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 65.662 66.074 66.444 66.787 67.113 # A panel-lead head(flag(LIFEEX, -1)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # 66.074 66.444 66.787 67.113 67.435 67.762 # The lag and lead operators are even more parsimonious to employ: all_identical(L(LIFEEX), flag(LIFEEX), lag(LIFEEX)) # [1] TRUE all_identical(F(LIFEEX), flag(LIFEEX, -1), lead(LIFEEX)) # [1] TRUE ``` It is also possible to compute a sequence of lags / leads using `flag` or one of the operators: ```r # sequence of panel- lags and leads head(flag(LIFEEX, -1:3)) # F1 -- L1 L2 L3 # ABW-1960 66.074 65.662 NA NA NA # ABW-1961 66.444 66.074 65.662 NA NA # ABW-1962 66.787 66.444 66.074 65.662 NA # ABW-1963 67.113 66.787 66.444 66.074 65.662 # ABW-1964 67.435 67.113 66.787 66.444 66.074 # ABW-1965 67.762 67.435 67.113 66.787 66.444 all_identical(L(LIFEEX, -1:3), F(LIFEEX, 1:-3), flag(LIFEEX, -1:3)) # [1] TRUE # The native plm implementation also returns a matrix of lags but with different column names head(lag(LIFEEX, -1:3), 4) # -1 0 1 2 3 # ABW-1960 66.074 65.662 NA NA NA # ABW-1961 66.444 66.074 65.662 NA NA # ABW-1962 66.787 66.444 66.074 65.662 NA # ABW-1963 67.113 66.787 66.444 66.074 65.662 ``` Of course the lag orders may be unevenly spaced, i.e. `L(x, -1:3*12)` would compute seasonal lags on monthly data. On *pdata.frame*'s, the effects of `flag` and `L` / `F` differ insofar that `flag` will just lag the entire dataset without preserving identifiers (although the index attribute is always preserved), whereas `L` / `F` by default (`cols = is.numeric`) select the numeric variables and add the panel-id's on the left (default `keep.ids = TRUE`): ```r # This lags the entire data head(flag(pwlddev)) # country iso3c date year decade region income OECD PCGDP # ABW-1960 NA NA NA # ABW-1961 Aruba ABW 1961-01-01 1960 1960 Latin America & Caribbean High income FALSE NA # ABW-1962 Aruba ABW 1962-01-01 1961 1960 Latin America & Caribbean High income FALSE NA # ABW-1963 Aruba ABW 1963-01-01 1962 1960 Latin America & Caribbean High income FALSE NA # ABW-1964 Aruba ABW 1964-01-01 1963 1960 Latin America & Caribbean High income FALSE NA # ABW-1965 Aruba ABW 1965-01-01 1964 1960 Latin America & Caribbean High income FALSE NA # LIFEEX GINI ODA POP # ABW-1960 NA NA NA NA # ABW-1961 65.662 NA NA 54211 # ABW-1962 66.074 NA NA 55438 # ABW-1963 66.444 NA NA 56225 # ABW-1964 66.787 NA NA 56695 # ABW-1965 67.113 NA NA 57032 # This lags only numeric columns and preserves panel-id's head(L(pwlddev)) # iso3c year L1.decade L1.PCGDP L1.LIFEEX L1.GINI L1.ODA L1.POP # ABW-1960 ABW 1960 NA NA NA NA NA NA # ABW-1961 ABW 1961 1960 NA 65.662 NA NA 54211 # ABW-1962 ABW 1962 1960 NA 66.074 NA NA 55438 # ABW-1963 ABW 1963 1960 NA 66.444 NA NA 56225 # ABW-1964 ABW 1964 1960 NA 66.787 NA NA 56695 # ABW-1965 ABW 1965 1960 NA 67.113 NA NA 57032 # This lags only columns 9 through 12 and preserves panel-id's head(L(pwlddev, cols = 9:12)) # iso3c year L1.PCGDP L1.LIFEEX L1.GINI L1.ODA # ABW-1960 ABW 1960 NA NA NA NA # ABW-1961 ABW 1961 NA 65.662 NA NA # ABW-1962 ABW 1962 NA 66.074 NA NA # ABW-1963 ABW 1963 NA 66.444 NA NA # ABW-1964 ABW 1964 NA 66.787 NA NA # ABW-1965 ABW 1965 NA 67.113 NA NA ``` We can also easily compute a sequence of lags / leads on a panel data.frame: ```r # This lags only columns 9 through 12 and preserves panel-id's head(L(pwlddev, -1:3, cols = 9:12)) # iso3c year F1.PCGDP PCGDP L1.PCGDP L2.PCGDP L3.PCGDP F1.LIFEEX LIFEEX L1.LIFEEX L2.LIFEEX # ABW-1960 ABW 1960 NA NA NA NA NA 66.074 65.662 NA NA # ABW-1961 ABW 1961 NA NA NA NA NA 66.444 66.074 65.662 NA # ABW-1962 ABW 1962 NA NA NA NA NA 66.787 66.444 66.074 65.662 # ABW-1963 ABW 1963 NA NA NA NA NA 67.113 66.787 66.444 66.074 # ABW-1964 ABW 1964 NA NA NA NA NA 67.435 67.113 66.787 66.444 # ABW-1965 ABW 1965 NA NA NA NA NA 67.762 67.435 67.113 66.787 # L3.LIFEEX F1.GINI GINI L1.GINI L2.GINI L3.GINI F1.ODA ODA L1.ODA L2.ODA L3.ODA # ABW-1960 NA NA NA NA NA NA NA NA NA NA NA # ABW-1961 NA NA NA NA NA NA NA NA NA NA NA # ABW-1962 NA NA NA NA NA NA NA NA NA NA NA # ABW-1963 65.662 NA NA NA NA NA NA NA NA NA NA # ABW-1964 66.074 NA NA NA NA NA NA NA NA NA NA # ABW-1965 66.444 NA NA NA NA NA NA NA NA NA NA ``` Essentially the same functionality applies to `fdiff` / `D` and `fgrowth` / `G`, with the main differences that these functions also have a `diff` argument to determine the number of iterations: ```r # Panel-difference of Life Expectancy head(fdiff(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.412 0.370 0.343 0.326 0.322 # Second panel-difference head(fdiff(LIFEEX, diff = 2)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA NA -0.042 -0.027 -0.017 -0.004 # Panel-growth rate of Life Expectancy head(fgrowth(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.6274558 0.5599782 0.5162242 0.4881189 0.4797878 # Growth rate of growth rate of Life Expectancy head(fgrowth(LIFEEX, diff = 2)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA NA -10.754153 -7.813521 -5.444387 -1.706782 identical(D(LIFEEX), fdiff(LIFEEX)) # [1] TRUE identical(G(LIFEEX), fgrowth(LIFEEX)) # [1] TRUE identical(fdiff(LIFEEX), diff(LIFEEX)) # Same as plm::diff.pseries (which does not compute iterated panel-differences) # [1] TRUE ``` By default, growth rates are calculated in percentage terms which is set by the default argument `scale = 100`. It is also possible to compute log-differences with `fdiff(.., log = TRUE)` or the `Dlog` operator, and growth rates in percentage terms based on log-differences using `fgrowth(.., logdiff = TRUE)`. ```r # Panel log-difference of Life Expectancy head(Dlog(LIFEEX)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.006254955 0.005584162 0.005148963 0.004869315 0.004786405 # Panel log-difference growth rate (in percentage terms) of Life Expectancy head(G(LIFEEX, logdiff = TRUE)) # ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 # NA 0.6254955 0.5584162 0.5148963 0.4869315 0.4786405 ``` It is also possible to compute sequences of lagged / leaded and iterated differences, log-differences and growth rates: ```r # first and second forward-difference and first and second difference of lags 1-3 of Life-Expectancy head(D(LIFEEX, -1:3, 1:2)) # FD1 FD2 -- D1 D2 L2D1 L2D2 L3D1 L3D2 # ABW-1960 -0.412 -0.042 65.662 NA NA NA NA NA NA # ABW-1961 -0.370 -0.027 66.074 0.412 NA NA NA NA NA # ABW-1962 -0.343 -0.017 66.444 0.370 -0.042 0.782 NA NA NA # ABW-1963 -0.326 -0.004 66.787 0.343 -0.027 0.713 NA 1.125 NA # ABW-1964 -0.322 0.005 67.113 0.326 -0.017 0.669 -0.113 1.039 NA # ABW-1965 -0.327 0.006 67.435 0.322 -0.004 0.648 -0.065 0.991 NA # Same with Log-differences head(Dlog(LIFEEX, -1:3, 1:2)) # FDlog1 FDlog2 -- Dlog1 Dlog2 L2Dlog1 L2Dlog2 # ABW-1960 -0.006254955 -6.707929e-04 4.184520 NA NA NA NA # ABW-1961 -0.005584162 -4.351984e-04 4.190775 0.006254955 NA NA NA # ABW-1962 -0.005148963 -2.796481e-04 4.196359 0.005584162 -0.0006707929 0.01183912 NA # ABW-1963 -0.004869315 -8.291000e-05 4.201508 0.005148963 -0.0004351984 0.01073312 NA # ABW-1964 -0.004786405 5.098981e-05 4.206378 0.004869315 -0.0002796481 0.01001828 -0.001820838 # ABW-1965 -0.004837395 6.482830e-05 4.211164 0.004786405 -0.0000829100 0.00965572 -0.001077405 # L3Dlog1 L3Dlog2 # ABW-1960 NA NA # ABW-1961 NA NA # ABW-1962 NA NA # ABW-1963 0.01698808 NA # ABW-1964 0.01560244 NA # ABW-1965 0.01480468 NA # Same with (exact) growth rates head(G(LIFEEX, -1:3, 1:2)) # FG1 FG2 -- G1 G2 L2G1 L2G2 L3G1 L3G2 # ABW-1960 -0.6235433 11.974895 65.662 NA NA NA NA NA NA # ABW-1961 -0.5568599 8.428580 66.074 0.6274558 NA NA NA NA NA # ABW-1962 -0.5135730 5.728297 66.444 0.5599782 -10.754153 1.1909476 NA NA NA # ABW-1963 -0.4857479 1.727984 66.787 0.5162242 -7.813521 1.0790931 NA 1.713320 NA # ABW-1964 -0.4774968 -1.051555 67.113 0.4881189 -5.444387 1.0068629 -15.45699 1.572479 NA # ABW-1965 -0.4825714 -1.319230 67.435 0.4797878 -1.706782 0.9702487 -10.08666 1.491482 NA ``` A further possibility is to compute quasi-differences and quasi-log-differences of the form $x_t - \rho x_{t-s}$ or $log(x_t) - \rho log(x_{t-s})$. These are useful for panel-regressions suffering from serial-correlation, following Cochrane & Orcutt (1949), and can be specified with the `rho` argument to `fdiff`, `D` and `Dlog`. ```r # Regression of GDP on Life Expectance with country and time FE mod <- lm(PCGDP ~ LIFEEX, data = fhdwithin(fselect(pwlddev, PCGDP, LIFEEX), fill = FALSE)) mod # # Call: # lm(formula = PCGDP ~ LIFEEX, data = fhdwithin(fselect(pwlddev, # PCGDP, LIFEEX), fill = FALSE)) # # Coefficients: # (Intercept) LIFEEX # -2.442e-12 -3.330e+02 # Computing autocorrelation of residuals r <- residuals(mod) r <- pwcor(r, L(r, 1, substr(names(r), 1, 3))) # Need this to compute a panel-lag r # [1] .98 # Running the regression again quasi-differencing the transformed data modCO <- lm(PCGDP ~ LIFEEX, data = fdiff(fhdwithin(fselect(pwlddev, PCGDP, LIFEEX), variable.wise = FALSE), rho = r, stubs = FALSE)) modCO # # Call: # lm(formula = PCGDP ~ LIFEEX, data = fdiff(fhdwithin(fselect(pwlddev, # PCGDP, LIFEEX), variable.wise = FALSE), rho = r, stubs = FALSE)) # # Coefficients: # (Intercept) LIFEEX # -12.93 -91.97 # In this case rho is almost 1, so we might as well just difference the untransformed data and go with that # We also need to bootstrap this for proper standard errors. ``` A final important advantage of the *collapse* functions is that the panel-identifiers are preserved, even if a matrix of lags / leads / differences or growth rates is returned. This allows for nested panel-computations, for example we can compute shifted sequences of lagged / leaded and iterated panel differences: ```r # Sequence of differneces (same as above), adding one extra lag of the whole sequence head(L(D(LIFEEX, -1:3, 1:2), 0:1)) # FD1 L1.FD1 FD2 L1.FD2 -- L1.-- D1 L1.D1 D2 L1.D2 L2D1 L1.L2D1 L2D2 # ABW-1960 -0.412 NA -0.042 NA 65.662 NA NA NA NA NA NA NA NA # ABW-1961 -0.370 -0.412 -0.027 -0.042 66.074 65.662 0.412 NA NA NA NA NA NA # ABW-1962 -0.343 -0.370 -0.017 -0.027 66.444 66.074 0.370 0.412 -0.042 NA 0.782 NA NA # ABW-1963 -0.326 -0.343 -0.004 -0.017 66.787 66.444 0.343 0.370 -0.027 -0.042 0.713 0.782 NA # ABW-1964 -0.322 -0.326 0.005 -0.004 67.113 66.787 0.326 0.343 -0.017 -0.027 0.669 0.713 -0.113 # ABW-1965 -0.327 -0.322 0.006 0.005 67.435 67.113 0.322 0.326 -0.004 -0.017 0.648 0.669 -0.065 # L1.L2D2 L3D1 L1.L3D1 L3D2 L1.L3D2 # ABW-1960 NA NA NA NA NA # ABW-1961 NA NA NA NA NA # ABW-1962 NA NA NA NA NA # ABW-1963 NA 1.125 NA NA NA # ABW-1964 NA 1.039 1.125 NA NA # ABW-1965 -0.113 0.991 1.039 NA NA ``` All of this naturally generalized to computations on *pdata.frames*: ```r head(D(pwlddev, -1:3, 1:2, cols = 9:10), 3) # iso3c year FD1.PCGDP FD2.PCGDP PCGDP D1.PCGDP D2.PCGDP L2D1.PCGDP L2D2.PCGDP L3D1.PCGDP # ABW-1960 ABW 1960 NA NA NA NA NA NA NA NA # ABW-1961 ABW 1961 NA NA NA NA NA NA NA NA # ABW-1962 ABW 1962 NA NA NA NA NA NA NA NA # L3D2.PCGDP FD1.LIFEEX FD2.LIFEEX LIFEEX D1.LIFEEX D2.LIFEEX L2D1.LIFEEX L2D2.LIFEEX # ABW-1960 NA -0.412 -0.042 65.662 NA NA NA NA # ABW-1961 NA -0.370 -0.027 66.074 0.412 NA NA NA # ABW-1962 NA -0.343 -0.017 66.444 0.370 -0.042 0.782 NA # L3D1.LIFEEX L3D2.LIFEEX # ABW-1960 NA NA # ABW-1961 NA NA # ABW-1962 NA NA head(L(D(pwlddev, -1:3, 1:2, cols = 9:10), 0:1), 3) # iso3c year FD1.PCGDP L1.FD1.PCGDP FD2.PCGDP L1.FD2.PCGDP PCGDP L1.PCGDP D1.PCGDP # ABW-1960 ABW 1960 NA NA NA NA NA NA NA # ABW-1961 ABW 1961 NA NA NA NA NA NA NA # ABW-1962 ABW 1962 NA NA NA NA NA NA NA # L1.D1.PCGDP D2.PCGDP L1.D2.PCGDP L2D1.PCGDP L1.L2D1.PCGDP L2D2.PCGDP L1.L2D2.PCGDP # ABW-1960 NA NA NA NA NA NA NA # ABW-1961 NA NA NA NA NA NA NA # ABW-1962 NA NA NA NA NA NA NA # L3D1.PCGDP L1.L3D1.PCGDP L3D2.PCGDP L1.L3D2.PCGDP FD1.LIFEEX L1.FD1.LIFEEX FD2.LIFEEX # ABW-1960 NA NA NA NA -0.412 NA -0.042 # ABW-1961 NA NA NA NA -0.370 -0.412 -0.027 # ABW-1962 NA NA NA NA -0.343 -0.370 -0.017 # L1.FD2.LIFEEX LIFEEX L1.LIFEEX D1.LIFEEX L1.D1.LIFEEX D2.LIFEEX L1.D2.LIFEEX L2D1.LIFEEX # ABW-1960 NA 65.662 NA NA NA NA NA NA # ABW-1961 -0.042 66.074 65.662 0.412 NA NA NA NA # ABW-1962 -0.027 66.444 66.074 0.370 0.412 -0.042 NA 0.782 # L1.L2D1.LIFEEX L2D2.LIFEEX L1.L2D2.LIFEEX L3D1.LIFEEX L1.L3D1.LIFEEX L3D2.LIFEEX # ABW-1960 NA NA NA NA NA NA # ABW-1961 NA NA NA NA NA NA # ABW-1962 NA NA NA NA NA NA # L1.L3D2.LIFEEX # ABW-1960 NA # ABW-1961 NA # ABW-1962 NA ``` ### 1.5 Panel Data to Array Conversions Viewing and transforming panel data stored in an array can be a powerful strategy, especially as it provides much more direct access to the different dimensions of the data. The function `psmat` can be used to efficiently transform *pseries* to a 2D matrix, and *pdata.frame*'s to a 3D array: ```r # Converting the panel series to array, individual rows (default) str(psmat(LIFEEX)) # 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # - attr(*, "transpose")= logi FALSE # Converting the panel series to array, individual columns str(psmat(LIFEEX, transpose = TRUE)) # 'psmat' num [1:61, 1:216] 65.7 66.1 66.4 66.8 67.1 ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # - attr(*, "transpose")= logi TRUE # Same as plm::as.matrix.pseries, apart from attributes identical(unattrib(psmat(LIFEEX)), unattrib(as.matrix(LIFEEX))) # [1] TRUE identical(unattrib(psmat(LIFEEX, transpose = TRUE)), unattrib(as.matrix(LIFEEX, idbyrow = FALSE))) # [1] TRUE ``` Applying `psmat` to a *pdata.frame* yields a 3D array: ```r psar <- psmat(pwlddev, cols = 9:12) str(psar) # 'psmat' num [1:216, 1:61, 1:4] NA NA NA NA NA ... # - attr(*, "dimnames")=List of 3 # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA" # - attr(*, "transpose")= logi FALSE str(psmat(pwlddev, cols = 9:12, transpose = TRUE)) # 'psmat' num [1:61, 1:216, 1:4] NA NA NA NA NA NA NA NA NA NA ... # - attr(*, "dimnames")=List of 3 # ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA" # - attr(*, "transpose")= logi TRUE ``` This format can be very convenient to quickly and freely access data for different countries, variables and time-periods: ```r # Looking at wealth, health and inequality in Brazil and Argentinia, 1990-1999 aperm(psar[c("BRA","ARG"), as.character(1990:1999), c("PCGDP", "LIFEEX", "GINI")]) # , , BRA # # 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 # PCGDP 7983.7 7963.1 7791.8 8020.6 8311.6 8540.1 8591.0 8744.8 8641.3 8554.1 # LIFEEX 66.3 66.7 67.1 67.5 67.9 68.3 68.7 69.1 69.4 69.8 # GINI 60.5 NA 53.2 60.1 NA 59.6 59.9 59.8 59.6 59.0 # # , , ARG # # 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 # PCGDP 6245.7 6721.3 7157.3 7644.2 7988.6 7666.5 7994.2 8543.0 8772.1 8381.3 # LIFEEX 71.6 71.8 72.0 72.2 72.5 72.7 72.8 73.0 73.2 73.4 # GINI NA 46.8 45.5 44.9 45.9 48.9 49.5 49.1 50.7 49.8 ``` `psmat` can also return the output as a list of panel series matrices: ```r pslist <- psmat(pwlddev, cols = 9:12, array = FALSE) str(pslist) # List of 4 # $ PCGDP : 'psmat' num [1:216, 1:61] NA NA NA NA NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE # $ LIFEEX: 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE # $ GINI : 'psmat' num [1:216, 1:61] NA NA NA NA NA NA NA NA NA NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE # $ ODA : 'psmat' num [1:216, 1:61] NA 116769997 -390000 NA NA ... # ..- attr(*, "dimnames")=List of 2 # .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ... # .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ... # ..- attr(*, "transpose")= logi FALSE ``` This list can then be unlisted using the function `unlist2d` (for unlisting in 2-dimensions), to yield a reshaped data.frame: ```r head(unlist2d(pslist, idcols = "Variable", row.names = "Country Code"), 3) # Variable Country Code 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 # 1 PCGDP ABW NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 2 PCGDP AFG NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 3 PCGDP AGO NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA # 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 # 1 NA NA NA NA NA NA NA NA NA NA NA 15669.616 # 2 NA NA NA NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA 3193.404 2947.194 2844.322 2859.919 2925.367 2922.217 2902.618 # 1987 1988 1989 1990 1991 1992 1993 1994 1995 # 1 18427.612 22134.017 24837.951 25357.787 26329.313 26401.969 26663.208 27272.310 26705.18 # 2 NA NA NA NA NA NA NA NA NA # 3 2916.794 2989.617 2889.886 2697.491 2635.156 2401.234 1767.025 1733.844 1930.80 # 1996 1997 1998 1999 2000 2001 2002 2003 2004 # 1 26087.776 27190.501 27151.92 26954.40 28417.384 26966.055 25508.3027 25469.2876 27005.5294 # 2 NA NA NA NA NA NA 330.3036 343.0809 333.2167 # 3 2122.968 2205.294 2235.39 2211.13 2205.205 2223.335 2444.4178 2433.8616 2608.7840 # 2005 2006 2007 2008 2009 2010 2011 2012 2013 # 1 26979.8854 27046.2242 27427.579 27365.9312 24463.6922 23512.603 24233.0011 23781.2573 24635.7649 # 2 357.2347 365.2845 405.549 412.0143 488.3003 543.303 528.7366 576.1901 587.5651 # 3 2896.5547 3116.1810 3424.372 3668.0799 3565.0569 3587.884 3579.9599 3748.4507 3796.8822 # 2014 2015 2016 2017 2018 2019 2020 # 1 24563.2343 25822.2514 26231.0267 26630.2053 NA NA NA # 2 583.6562 574.1841 571.0738 571.4407 564.610 573.2876 NA # 3 3843.1979 3748.3201 3530.3107 3409.9303 3233.906 3111.1577 NA ``` Of course we could also have applied some transformation (like computing pairwise correlations) to each matrix before unlisting. In any case this kind of programming provides lots of possibilities to explore and manipulate panel data (as we will see in Part 2). ### Benchmarks Below benchmarks are provided of the *collapse* implementation against native *plm*. To do this the dataset used so far is extended to have approx 1 million observations: ```r wlddevsmall <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) wlddevsmall$iso3c <- as.character(wlddevsmall$iso3c) data <- replicate(100, wlddevsmall, simplify = FALSE) rm(wlddevsmall) uniquify <- function(x, i) { x$iso3c <- paste0(x$iso3c, i) x } data <- unlist2d(Map(uniquify, data, as.list(1:100)), idcols = FALSE) data <- pdata.frame(data, index = c("iso3c", "year")) pdim(data) # Balanced Panel: n = 21600, T = 61, N = 1317600 ``` The data has 21600 individuals (countries) observed for up to 61 years (1960-2020), the total number of rows is 1317600. We can pull out a series of life expectancy and run some benchmarks. The Windows laptop on which these benchmarks were run has a 2x 2.2 GHZ Intel i5 processor, 8GB DDR3 RAM and a Samsung SSD hard drive. ```r library(microbenchmark) # Creating the extended panel series for Life Expectancy (l for large) LIFEEX_l <- data$LIFEEX str(LIFEEX_l) # 'pseries' Named num [1:1317600] 65.7 66.1 66.4 66.8 67.1 ... # - attr(*, "names")= chr [1:1317600] "ABW1-1960" "ABW1-1961" "ABW1-1962" "ABW1-1963" ... # - attr(*, "index")=Classes 'pindex' and 'data.frame': 1317600 obs. of 2 variables: # ..$ iso3c: Factor w/ 21600 levels "ABW1","ABW10",..: 1 1 1 1 1 1 1 1 1 1 ... # ..$ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ... # Between Transformations microbenchmark(Between(LIFEEX_l, na.rm = TRUE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # Between(LIFEEX_l, na.rm = TRUE) 17.73594 18.71248 21.84342 20.13574 22.35853 37.94689 10 microbenchmark(fbetween(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fbetween(LIFEEX_l) 4.408771 4.639519 4.705529 4.718424 4.771498 4.908684 10 # Within Transformations microbenchmark(Within(LIFEEX_l, na.rm = TRUE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # Within(LIFEEX_l, na.rm = TRUE) 10.17887 10.74663 10.91092 10.8766 11.24224 11.37664 10 microbenchmark(fwithin(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(LIFEEX_l) 4.522218 4.550303 4.735344 4.644296 4.696017 5.297036 10 # Higher-Dimenional Between and Within Transformations microbenchmark(fhdbetween(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fhdbetween(LIFEEX_l) 56.916 57.29971 66.0179 58.13864 76.50108 84.10625 10 microbenchmark(fhdwithin(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fhdwithin(LIFEEX_l) 55.55906 56.2372 62.31852 56.56555 75.78784 77.20657 10 # Single Lag microbenchmark(lag(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # lag(LIFEEX_l) 7.967776 8.144896 8.542879 8.632468 8.840092 8.949357 10 microbenchmark(flag(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(LIFEEX_l) 7.994057 8.038747 8.337862 8.180484 8.603481 9.12086 10 # Sequence of Lags / Leads microbenchmark(lag(LIFEEX_l, -1:3), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # lag(LIFEEX_l, -1:3) 18.7525 19.29476 28.61876 27.95813 38.11081 39.5329 10 microbenchmark(flag(LIFEEX_l, -1:3), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(LIFEEX_l, -1:3) 15.5415 15.64335 21.10042 15.83998 33.37699 34.10265 10 # Single difference microbenchmark(diff(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # diff(LIFEEX_l) 8.00525 8.16884 8.370421 8.368776 8.554404 8.733697 10 microbenchmark(fdiff(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l) 7.937805 8.020502 8.3458 8.2451 8.426238 9.34923 10 # Iterated Difference microbenchmark(fdiff(LIFEEX_l, diff = 2), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l, diff = 2) 10.20129 10.62786 10.72184 10.77488 10.82326 11.21805 10 # Sequence of Lagged / Leaded and iterated differences microbenchmark(fdiff(LIFEEX_l, -1:3, 1:2), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l, -1:3, 1:2) 45.90159 52.22494 66.83236 53.21347 57.53222 187.8582 10 # Single Growth Rate microbenchmark(fgrowth(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fgrowth(LIFEEX_l) 8.222304 8.357153 8.69059 8.727158 8.884167 9.436683 10 # Single Log-Difference microbenchmark(fdiff(LIFEEX_l, log = TRUE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(LIFEEX_l, log = TRUE) 12.41394 12.8583 15.06961 13.17156 13.61659 32.51989 10 # Panel Series to Matrix Conversion # system.time(as.matrix(LIFEEX_l)) This takes about 3 minutes to compute microbenchmark(psmat(LIFEEX_l), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # psmat(LIFEEX_l) 1.482478 1.500149 1.628028 1.520813 1.553941 2.438639 10 ``` This shows a comparison between flag and *data.table*'s shift: ```r microbenchmark(L(data, cols = 3:6), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # L(data, cols = 3:6) 14.13692 14.43877 20.88276 18.865 19.73141 37.06244 10 library(data.table) setDT(data) # 'Improper' panel-lag microbenchmark(data[, shift(.SD), by = iso3c, .SDcols = 3:6], times = 10) # Unit: milliseconds # expr min lq mean median uq max # data[, shift(.SD), by = iso3c, .SDcols = 3:6] 176.5308 199.9415 215.6897 204.0719 230.089 268.9992 # neval # 10 # This does what L is actually doing (without sorting the data) microbenchmark(data[order(year), shift(.SD), by = iso3c, .SDcols = 3:6], times = 10) # Unit: milliseconds # expr min lq mean median # data[order(year), shift(.SD), by = iso3c, .SDcols = 3:6] 193.9684 210.7025 213.7664 213.0727 # uq max neval # 221.9783 226.3685 10 ``` The above dataset has 1 million obs in 20 thousand groups, but what about 10 million obs and 1 million groups? Do *collapse* functions scale efficiently as data and the number of groups grows large? Here is a simple benchmark: ```r x <- rnorm(1e7) # 10 million obs g <- qF(rep(1:1e6, each = 10), na.exclude = FALSE) # 1 million individuals t <- qF(rep(1:10, 1e6), na.exclude = FALSE) # 10 time-periods per individual microbenchmark(fbetween(x, g), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fbetween(x, g) 51.66189 53.60693 91.00168 62.54655 73.87835 233.3696 10 microbenchmark(fwithin(x, g), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(x, g) 43.46291 44.03954 77.0216 45.33919 58.65132 196.7248 10 microbenchmark(flag(x, 1, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(x, 1, g, t) 42.65382 55.05332 87.72527 59.55935 80.86143 210.8074 10 microbenchmark(flag(x, -1:1, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(x, -1:1, g, t) 92.19842 92.5559 162.8994 166.736 228.6354 239.6953 10 microbenchmark(fdiff(x, 1, 1, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(x, 1, 1, g, t) 42.51778 46.29306 82.27838 53.85735 67.54295 205.0114 10 microbenchmark(fdiff(x, 1, 2, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(x, 1, 2, g, t) 59.9363 62.11689 84.42818 69.85072 75.38506 217.1431 10 microbenchmark(fdiff(x, -1:1, 1:2, g, t), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(x, -1:1, 1:2, g, t) 163.5046 182.9127 246.2855 250.664 301.4046 339.1415 10 ``` The results show that *collapse* functions perform very well even as the number of groups grows large. The conclusion of this benchmark analysis is that *collapse*'s fast functions, with or without the help of *plm* classes, allow for very fast transformations of panel data, and should enable R programmers and econometricians to implement high-performance panel data estimators without having to dive into C/C++ themselves or resorting to *data.table* metaprogramming. ## Part 2: Fast Exploration of Panel Data *collapse* also provides some essential functions to summarize and explore panel data, such as a fast check of variation over different dimensions, fast summary-statistics for panel data, panel-auto, partial-auto and cross-correlation functions, and a fast F-test to test fixed effects and other exclusion restrictions on (large) panel data models. Panel data to matrix conversion further allows the application of some correlational and unsupervised learning tools such as PCA, clustering or dynamic factor analysis. ### 2.1 Variation Check for Panel Data The function `varying` can be used to check over which panel-dimensions different variable have variation. When passed a *pdata.frame*, `varying` by default takes the first identifier and checks for variation *within* that dimension. ```r # This checks for any variation within "iso3c", the first index variable: TRUE means data vary within country i.e. over time. varying(pwlddev) # country date year decade region income OECD PCGDP LIFEEX GINI ODA POP # FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE ``` Alternatively any index variable or combination of index variables can be specified: ```r # This checks any variation within time variable, i.e. cross-sectional variation. varying(pwlddev, effect = "year") # country iso3c date decade region income OECD PCGDP LIFEEX GINI ODA POP # TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE ``` Another possibility is checking for variation within each group: ```r # This checks cross-sectional variation within each year for 4 indicators. head(varying(pwlddev, effect = "year", cols = 9:12, any_group = FALSE)) # PCGDP LIFEEX GINI ODA # 1960 TRUE TRUE NA TRUE # 1961 TRUE TRUE NA TRUE # 1962 TRUE TRUE NA TRUE # 1963 TRUE TRUE NA TRUE # 1964 TRUE TRUE NA TRUE # 1965 TRUE TRUE NA TRUE ``` `varying` also has a pseries method. The code below checks for time-variation of the GINI index within each country. A `NA` is returned when there are no observations within a particular country. ```r head(varying(pwlddev$GINI, any_group = FALSE), 20) # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR # NA NA TRUE TRUE NA TRUE TRUE TRUE NA NA TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA ``` If we would like to gave more information about this variation, we could also invoke the functions `fndistinct` and `fsd`, which do not have *pseries* methods: ```r head(fndistinct(pwlddev$GINI, index(pwlddev, "iso3c")), 20) # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR # 0 0 3 9 0 2 29 20 0 0 9 16 5 4 16 3 5 9 12 0 head(round(fsd(pwlddev$GINI, index(pwlddev, "iso3c")), 2), 20) # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR # NA NA 5.18 2.47 NA 4.60 3.84 2.76 NA NA 1.19 1.76 4.85 4.37 1.71 4.60 5.98 3.02 2.58 NA ``` ### 2.2 Summary Statistics for Panel Data Efficient summary statistics for panel data have long been implemented in other statistical softwares. The command `qsu`, shorthand for 'quick-summary', is a very efficient summary statistics command inspired by the *xtsummarize* command in the Stata statistical software. It computes a default set of 5 statistics (N, mean, sd, min and max) and can also computed higher moments (skewness and kurtosis) in a single pass through the data (using a numerically stable online algorithm generalized from Welford's Algorithm for variance computations). With panel data, `qsu` computes these statistics not just on the raw data, but also on the between-transformed and within-transformed data: ```r qsu(pwlddev, cols = 9:12, higher = TRUE) # , , PCGDP # # N/T Mean SD Min Max Skew Kurt # Overall 9470 12048.778 19077.6416 132.0776 196061.417 3.1276 17.1154 # Between 206 12962.6054 20189.9007 253.1886 141200.38 3.1263 16.2299 # Within 45.9709 12048.778 6723.6808 -33504.8721 76767.5254 0.6576 17.2003 # # , , LIFEEX # # N/T Mean SD Min Max Skew Kurt # Overall 11670 64.2963 11.4764 18.907 85.4171 -0.6748 2.6718 # Between 207 64.9537 9.8936 40.9663 85.4171 -0.5012 2.1693 # Within 56.3768 64.2963 6.0842 32.9068 84.4198 -0.2643 3.7027 # # , , GINI # # N/T Mean SD Min Max Skew Kurt # Overall 1744 38.5341 9.2006 20.7 65.8 0.596 2.5329 # Between 167 39.4233 8.1356 24.8667 61.7143 0.5832 2.8256 # Within 10.4431 38.5341 2.9277 25.3917 55.3591 0.3263 5.3389 # # , , ODA # # N/T Mean SD Min Max Skew Kurt # Overall 8608 454'720131 868'712654 -997'679993 2.56715605e+10 6.9832 114.889 # Between 178 439'168412 569'049959 468717.916 3.62337432e+09 2.355 9.9487 # Within 48.3596 454'720131 650'709624 -2.44379420e+09 2.45610972e+10 9.6047 263.3716 ``` Key statistics to look at in this summary are the sample size and the standard-deviation decomposed into the between-individuals and the within-individuals standard-deviation: For GDP per Capita we have 8995 observations in the panel series for 203 countries, with on average 44.31 observations (time-periods T) per country. The between-country standard deviation is 19600 USD, around 3-times larger than the within-country (over-time) standard deviation of 6300 USD. Regarding the mean, the between-mean, computed as a cross-sectional average of country averages, usually differs slightly from the overall average taken across all data points. The within-transformed data is computed and summarized with the overall mean added back (i.e. as in `fwithin(PCGDP, mean = "overall.mean")`). We can also do groupwise panel-statistics and `qsu` also supports weights (not shown): ```r qsu(pwlddev, ~ income, cols = 9:12, higher = TRUE) # , , Overall, PCGDP # # N/T Mean SD Min Max Skew Kurt # High income 3179 30280.7283 23847.0483 932.0417 196061.417 2.1702 10.3425 # Low income 1311 597.4053 288.4392 164.3366 1864.7925 1.2385 4.7115 # Lower middle income 2246 1574.2535 858.7183 144.9863 4818.1922 0.9093 3.7153 # Upper middle income 2734 4945.3258 2979.5609 132.0776 20532.9523 1.2286 4.9391 # # , , Between, PCGDP # # N/T Mean SD Min Max Skew Kurt # High income 71 30280.7283 20908.5323 5413.4495 141200.38 2.1347 9.9673 # Low income 28 597.4053 243.8219 253.1886 1357.3326 1.4171 5.3137 # Lower middle income 47 1574.2535 676.3157 444.2899 2896.8682 0.3562 2.2358 # Upper middle income 60 4945.3258 2327.3834 1604.595 13344.5423 1.24 4.7803 # # , , Within, PCGDP # # N/T Mean SD Min Max Skew Kurt # High income 44.7746 12048.778 11467.9987 -33504.8721 76767.5254 0.3924 6.0523 # Low income 46.8214 12048.778 154.1039 11606.2382 12698.296 0.5098 4.0676 # Lower middle income 47.7872 12048.778 529.1449 10377.7234 14603.1055 0.7658 5.4272 # Upper middle income 45.5667 12048.778 1860.395 4846.3834 24883.1246 0.6858 7.8469 # # , , Overall, LIFEEX # # N/T Mean SD Min Max Skew Kurt # High income 3831 73.6246 5.6693 42.672 85.4171 -1.0067 5.5553 # Low income 1800 49.7301 9.0944 26.172 74.43 0.2748 2.6721 # Lower middle income 2790 58.1481 9.3115 18.907 76.699 -0.3406 2.6845 # Upper middle income 3249 66.6466 7.537 36.535 80.279 -1.0988 4.2262 # # , , Between, LIFEEX # # N/T Mean SD Min Max Skew Kurt # High income 73 73.6246 3.3499 64.0302 85.4171 -0.6537 2.9946 # Low income 30 49.7301 4.8321 40.9663 66.945 1.5195 6.6802 # Lower middle income 47 58.1481 5.9945 45.7687 71.6078 0.0352 2.2126 # Upper middle income 57 66.6466 4.9955 48.057 74.0504 -1.3647 5.303 # # , , Within, LIFEEX # # N/T Mean SD Min Max Skew Kurt # High income 52.4795 64.2963 4.5738 42.9381 78.1271 -0.4838 3.8923 # Low income 60 64.2963 7.7045 41.5678 84.4198 0.0402 2.6086 # Lower middle income 59.3617 64.2963 7.1253 32.9068 83.9918 -0.2522 3.181 # Upper middle income 57 64.2963 5.6437 41.4342 83.0122 -0.507 4.0355 # # , , Overall, GINI # # N/T Mean SD Min Max Skew Kurt # High income 680 33.3037 6.7885 20.7 58.9 1.4864 5.6772 # Low income 107 41.1327 6.5767 29.5 65.8 0.7523 4.236 # Lower middle income 369 40.0504 9.3032 24 63.2 0.4388 2.2218 # Upper middle income 588 43.1585 8.9549 25.2 64.8 0.0814 2.3517 # # , , Between, GINI # # N/T Mean SD Min Max Skew Kurt # High income 41 33.3037 6.5238 24.8667 53.6296 1.5091 5.3913 # Low income 28 41.1327 5.1706 32.1333 58.75 0.6042 4.0473 # Lower middle income 46 40.0504 8.4622 27.6955 54.925 0.334 1.797 # Upper middle income 52 43.1585 8.4359 27.9545 61.7143 0.0336 2.2441 # # , , Within, GINI # # N/T Mean SD Min Max Skew Kurt # High income 16.5854 38.5341 1.8771 31.1841 45.8841 -0.0818 4.902 # Low income 3.8214 38.5341 4.0643 29.4591 55.3591 0.6766 5.1025 # Lower middle income 8.0217 38.5341 3.8654 27.9452 55.1008 0.4093 4.0058 # Upper middle income 11.3077 38.5341 3.0043 25.3917 48.0131 0.0728 3.5115 # # , , Overall, ODA # # N/T Mean SD Min Max Skew # High income 1575 153'663194 425'918409 -464'709991 4.34612988e+09 5.2505 # Low income 1692 631'660165 941'498380 -500000 1.04032100e+10 4.4628 # Lower middle income 2544 692'072692 1.02452490e+09 -605'969971 1.18790801e+10 3.7913 # Upper middle income 2797 301'326218 765'116131 -997'679993 2.56715605e+10 16.3123 # Kurt # High income 36.2748 # Low income 32.1305 # Lower middle income 25.2442 # Upper middle income 464.8625 # # , , Between, ODA # # N/T Mean SD Min Max Skew Kurt # High income 42 153'663194 339'972909 468717.916 2.05456932e+09 3.9522 19.0792 # Low income 30 631'660165 466'265486 91'536334 1.67220583e+09 0.9769 2.6602 # Lower middle income 47 692'072692 765'003585 28'919000.2 3.62337432e+09 2.0429 7.2664 # Upper middle income 59 301'326218 382'148153 13'160000 1.91297800e+09 2.1072 7.0291 # # , , Within, ODA # # N/T Mean SD Min Max Skew # High income 37.5 454'720131 256'563661 -920'977647 2.87632242e+09 2.2074 # Low income 56.4 454'720131 817'933797 -1.19519570e+09 9.18572426e+09 3.8872 # Lower middle income 54.1277 454'720131 681'484247 -2.44379420e+09 1.12814455e+10 3.8965 # Upper middle income 47.4068 454'720131 662'846500 -2.04042108e+09 2.45610972e+10 19.6351 # Kurt # High income 28.8682 # Low income 33.5194 # Lower middle income 47.7246 # Upper middle income 657.3041 ``` Here it should be noted that any grouping is applied independently from the data-transformation, i.e. the data is first transformed, and then grouped statistics are calculated on the transformed data. The computation of statistics is very efficient: ```r qsu(LIFEEX_l) # N/T Mean SD Min Max # Overall 1'167000 64.2963 11.4759 18.907 85.4171 # Between 20700 64.9537 9.87 40.9663 85.4171 # Within 56.3768 64.2963 6.0839 32.9068 84.4198 microbenchmark(qsu(LIFEEX_l)) # Unit: milliseconds # expr min lq mean median uq max neval # qsu(LIFEEX_l) 9.49355 10.25679 11.07317 10.37214 10.78839 50.22574 100 ``` Using the transformation functions and the functions `pwcor` and `pwcov`, we can also easily explore the correlation structure of the data: ```r # Overall pairwise correlations with pairwise observation count and significance testing (* = significant at 5% level) pwcor(get_vars(pwlddev, 9:12), N = TRUE, P = TRUE) # PCGDP LIFEEX GINI ODA # PCGDP 1 (9470) .57* (9022) -.44* (1735) -.16* (7128) # LIFEEX .57* (9022) 1 (11670) -.35* (1742) -.02 (8142) # GINI -.44* (1735) -.35* (1742) 1 (1744) -.20* (1109) # ODA -.16* (7128) -.02 (8142) -.20* (1109) 1 (8608) # Between correlations pwcor(fmean(get_vars(pwlddev, 9:12), pwlddev$iso3c), N = TRUE, P = TRUE) # PCGDP LIFEEX GINI ODA # PCGDP 1 (206) .60* (199) -.42* (165) -.25* (172) # LIFEEX .60* (199) 1 (207) -.40* (165) -.21* (172) # GINI -.42* (165) -.40* (165) 1 (167) -.19* (145) # ODA -.25* (172) -.21* (172) -.19* (145) 1 (178) # Within correlations pwcor(W(pwlddev, cols = 9:12, keep.ids = FALSE), N = TRUE, P = TRUE) # W.PCGDP W.LIFEEX W.GINI W.ODA # W.PCGDP 1 (9470) .31* (9022) -.01 (1735) -.01 (7128) # W.LIFEEX .31* (9022) 1 (11670) -.16* (1742) .17* (8142) # W.GINI -.01 (1735) -.16* (1742) 1 (1744) -.08* (1109) # W.ODA -.01 (7128) .17* (8142) -.08* (1109) 1 (8608) ``` The correlations show that the between (cross-country) relationships of these macro-variables are quite strong, but within countries the relationships are much weaker, for example there seems to be no significant relationship between GDP per Capita and either inequality or ODA received within countries over time. ### 2.3 Exploring Panel Data in Matrix / Array Form We can take a single panel series such as GDP per Capita and explore it further: ```r # Generating a (transposed) matrix of country GDPs per capita tGDPmat <- psmat(PCGDP, transpose = TRUE) tGDPmat[1:10, 1:10] # ABW AFG AGO ALB AND ARE ARG ARM ASM ATG # 1960 NA NA NA NA NA NA 5643 NA NA NA # 1961 NA NA NA NA NA NA 5853 NA NA NA # 1962 NA NA NA NA NA NA 5711 NA NA NA # 1963 NA NA NA NA NA NA 5323 NA NA NA # 1964 NA NA NA NA NA NA 5773 NA NA NA # 1965 NA NA NA NA NA NA 6286 NA NA NA # 1966 NA NA NA NA NA NA 6152 NA NA NA # 1967 NA NA NA NA NA NA 6255 NA NA NA # 1968 NA NA NA NA NA NA 6461 NA NA NA # 1969 NA NA NA NA NA NA 6981 NA NA NA # plot the matrix (it will plot correctly no matter how the matrix is transposed) plot(tGDPmat, main = "GDP per Capita") ```
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

```r # Taking series with more than 20 observation suffsamp <- tGDPmat[, fnobs(tGDPmat) > 20] # Minimum pairwise observations between any two series: min(pwnobs(suffsamp)) # [1] 16 # We can use the pairwise-correlations of the annual growth rates to hierarchically cluster the economies: plot(hclust(as.dist(1-pwcor(G(suffsamp))))) ```
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

```r # Finally we could do PCA on the growth rates: eig <- eigen(pwcor(G(suffsamp))) plot(seq_col(suffsamp), eig$values/sum(eig$values)*100, xlab = "Number of Principal Components", ylab = "% Variance Explained", main = "Screeplot") ```
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

There is also a nice plot-method applied to panel series arrays returned when `psmat` is applied to a panel data.frame: ```r plot(psmat(pwlddev, cols = 9:12), legend = TRUE) ```
plot of chunk pwlddev_plot

plot of chunk pwlddev_plot

Above we have explored the cross-sectional relationship between the different national GDP series. Now we explore the time-dependence of the panel-vectors as a whole: ### 2.4 Panel- Auto-, Partial-Auto and Cross-Correlation Functions The functions `psacf`, `pspacf` and `psccf` mimic `stats::acf`, `stats::pacf` and `stats::ccf` for panel-vectors and panel data.frames. Below we compute the panel series autocorrelation function of the data: ```r psacf(pwlddev, cols = 9:12) ```
plot of chunk plm_psacf

plot of chunk plm_psacf

The computation is conducted by first scaling and centering (i.e. standardizing) the panel-vectors by groups (using `fscale`, default argument `gscale = TRUE`), and then taking the covariance of each series with a matrix of properly computed panel-lags of itself (using `flag`), and dividing that by the variance of the overall series (using `fvar`). In a similar way we can compute the Partial-ACF (using a multivariate Yule-Walker decomposition on the ACF, as in `stats::pacf`), ```r pspacf(pwlddev, cols = 9:12) ```
plot of chunk plm_pspacf

plot of chunk plm_pspacf

and the panel-cross-correlation function between GDP per capita and life expectancy (which is already contained in the ACF plot above): ```r psccf(PCGDP, LIFEEX) ```
plot of chunk plm_psccf

plot of chunk plm_psccf

### 2.5 Testing for Individual Specific and Time-Effects As a final step of exploration, we could analyze our series and simple models for the significance and explanatory power of individual or time-fixed effects, without going all the way to running a Hausman Test of fixed vs. random effects on a fully specified model. The main function here is `fFtest` which efficiently computes a fast R-Squared based F-test of exclusion restrictions on models potentially involving many factors. By default (argument `full.df = TRUE`) the degrees of freedom of the test are adjusted to make it identical to the F-statistic from regressing the series on a set of country and time dummies^[In fact factors are projected out using `fixest::demean` and no regression is run at all]. ```r # Testing GDP per Capita fFtest(PCGDP, index(PCGDP)) # Testing individual and time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.905 264 9205 330.349 0.000 fFtest(PCGDP, index(PCGDP, 1)) # Testing individual effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.876 215 9254 303.476 0.000 fFtest(PCGDP, index(PCGDP, 2)) # Testing time effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.027 60 9409 4.276 0.000 # Same for Life-Expectancy fFtest(LIFEEX, index(LIFEEX)) # Testing individual and time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.924 265 11404 519.762 0.000 fFtest(LIFEEX, index(LIFEEX, 1)) # Testing individual effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.719 215 11454 136.276 0.000 fFtest(LIFEEX, index(LIFEEX, 2)) # Testing time effects # R-Sq. DF1 DF2 F-Stat. P-value # 0.218 60 11609 54.075 0.000 ``` Below we test the correlation between the country and time-means of GDP and Life-Expectancy: ```r cor.test(B(PCGDP), B(LIFEEX)) # Testing correlation of country means # # Pearson's product-moment correlation # # data: B(PCGDP) and B(LIFEEX) # t = 78.752, df = 9020, p-value < 2.2e-16 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # 0.6259141 0.6503737 # sample estimates: # cor # 0.638305 cor.test(B(PCGDP, effect = 2), B(LIFEEX, effect = 2)) # Same for time-means # # Pearson's product-moment correlation # # data: B(PCGDP, effect = 2) and B(LIFEEX, effect = 2) # t = 325.6, df = 9020, p-value < 2.2e-16 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # 0.9583431 0.9615804 # sample estimates: # cor # 0.9599938 ``` We can also test for the significance of individual and time-fixed effects (or both) in the regression of GDP on life expectancy and ODA received: ```r fFtest(PCGDP, index(PCGDP), get_vars(pwlddev, c("LIFEEX","ODA"))) # Testing individual and time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-Value # Full Model 0.915 227 6682 316.551 0.000 # Restricted Model 0.162 2 6907 668.816 0.000 # Exclusion Rest. 0.753 225 6682 262.732 0.000 fFtest(PCGDP, index(PCGDP, 2), get_vars(pwlddev, c("iso3c","LIFEEX","ODA"))) # Testing time-fixed effects # R-Sq. DF1 DF2 F-Stat. P-Value # Full Model 0.915 227 6682 316.551 0.000 # Restricted Model 0.909 168 6741 403.168 0.000 # Exclusion Rest. 0.005 59 6682 7.238 0.000 ``` As can be expected in this cross-country data, individual and time-fixed effects play a large role in explaining the data, and these effects are correlated across series, suggesting that a fixed-effects model with both types of fixed-effects would be appropriate. To round things off, below we compute the Hausman test of Fixed vs. Random effects, which confirms this conclusion: ```r phtest(PCGDP ~ LIFEEX, data = pwlddev) # # Hausman Test # # data: PCGDP ~ LIFEEX # chisq = 397.04, df = 1, p-value < 2.2e-16 # alternative hypothesis: one model is inconsistent ``` ## Part 3: Programming Panel Data Estimators A central goal of the *collapse* package is to facilitate advanced and fast programming with data. A primary field of application for the fast functions introduced above is to program efficient panel data estimators. In this section we walk through a short example of how this can be done. The application will be an implementation of the Hausman and Taylor (1981) estimator, considering a more general case than currently implemented in the *plm* package. In Hausman and Taylor (1981), in a more general scenario, we have a linear panel-model of the form $$y_{it} = \beta_1X_{1it} + \beta_2X_{2it} + \beta_3Z_{1i} + \beta_4Z_{2i} + \alpha_i + \gamma_t + \epsilon$$ where $\alpha_i$ denotes unobserved individual specific effects and $\gamma_t$ denotes unobserved global events. This model has up to 4 kinds of covariates: * Time-Varying covariates $X_{1it}$ that are uncorrelated with the individual specific effect $\alpha_i$, such that $E[X_{1it}\alpha_i] = 0$. It may be the case that $E[X_{1it}\gamma_t] \neq 0$ * Time-Varying covariates $X_{2it}$ with $E[X_{2it}\alpha_i] \neq 0$ and possibly $E[X_{2it}\gamma_t] \neq 0$ * Time-Invariant covariates $Z_{1i}$ with $E[Z_{1i}\alpha_i] = 0$ * Time-Invariant covariates $Z_{2i}$ with $E[Z_{2i}\alpha_i] \neq 0$ The main estimation problem arises from $E[Z_{2i}\alpha_i] \neq 0$, which would usually prevent us from estimating $\beta_4$ since taking a within-transformation (fixed effects) would remove $Z_{2i}$ from the equation. Hausman and Taylor (1981) stipulated that since $E[X_{1it}\alpha_i] = 0$, once could use $X_{1i.}$ i.e. the between-transformed $X_{1it}$ to instrument for $Z_{2i}$. They propose an IV/2SLS estimation of the whole equation where the within-transformed covariates $\tilde{X}_{1it}$ and $\tilde{X}_{2it}$ are used to instrument $X_{1it}$ and $X_{2it}$, and $X_{1i.}$ instruments $Z_{2i}$. Assuming that missing values have been removed beforehand, and also taking into account the possibility that $E[X_{1it}\gamma_t] \neq 0$ and $E[X_{2it}\gamma_t] \neq 0$ (i.e. accounting for time fixed-effects), this estimator can be coded as follows: ```r HT_est <- function(y, X1, Z2, X2 = NULL, Z1 = NULL, time.FE = FALSE) { # Create matrix of independent variables X <- cbind(Intercept = 1, do.call(cbind, c(X1, X2, Z1, Z2))) # Create instrument matrix: if time.FE, higher-order demean X1 and X2, else normal demeaning IVS <- cbind(Intercept = 1, do.call(cbind, c(if(time.FE) fhdwithin(X1, na.rm = FALSE) else fwithin(X1, na.rm = FALSE), if(is.null(X2)) X2 else if(time.FE) fhdwithin(X2, na.rm = FALSE) else fwithin(X2, na.rm = FALSE), Z1, fbetween(X1, na.rm = FALSE)))) if(length(IVS) == length(X)) { # The IV estimator case return(drop(solve(crossprod(IVS, X), crossprod(IVS, y)))) } else { # The 2SLS case Xhat <- qr.fitted(qr(IVS), X) # First stage return(drop(qr.coef(qr(Xhat), y))) # Second stage } } ``` The estimator is written in such a way that variables of the type $X_{2it}$ and $Z_{1i}$ are optional, and it also includes an option to also project out time-FE or not. The expected inputs for $X_{1it}$ (`X1`), and $X_{2it}$ (`X2`) are column-subsets of a *pdata.frame*. Having coded the estimator, it would be good to have an example to run it on. I have tried to squeeze an example out of the `wlddev` data used so far in this vignette. It is quite crappy and suffers from a weak-IV problem, but for there sake of illustration lets do it: We want to estimate the panel-regression of life-expectancy on GDP per Capita, ODA received, the GINI index and a time-invariant dummy indicating whether the country is an OECD member. All variables except the dummy enter in logs, so this is an elasticity regression. < ```r dat <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) get_vars(dat, 4:7) <- lapply(get_vars(dat, 4:7), log) # Taking logs of the data dat$OECD <- as.numeric(dat$OECD) # Creating OECD dummy dat <- pdata.frame(fdroplevels(na_omit(dat)), # Creating Panel data.frame, after removing missing values index = c("iso3c", "year")) # and dropping unused factor levels pdim(dat) # Unbalanced Panel: n = 134, T = 1-34, N = 1068 varying(dat) # year OECD PCGDP LIFEEX GINI ODA # TRUE FALSE TRUE TRUE TRUE TRUE ``` Using the GINI index cost a lot of observations and brought the sample size down to 918, but the GINI index will be a key variable in what follows. Clearly the OECD dummy is time-invariant. Below we run Hausman-tests of fixed vs. random effects to determine which covariates might be correlated with the unobserved individual effects, and which model would be most appropriate. ```r # This tests whether each of the covariates is correlated with alpha_i phtest(LIFEEX ~ PCGDP, dat) # Likely correlated # # Hausman Test # # data: LIFEEX ~ PCGDP # chisq = 17.495, df = 1, p-value = 2.881e-05 # alternative hypothesis: one model is inconsistent phtest(LIFEEX ~ ODA, dat) # Likely correlated # # Hausman Test # # data: LIFEEX ~ ODA # chisq = 43.925, df = 1, p-value = 3.413e-11 # alternative hypothesis: one model is inconsistent phtest(LIFEEX ~ GINI, dat) # Likely not correlated ! # # Hausman Test # # data: LIFEEX ~ GINI # chisq = 0.56851, df = 1, p-value = 0.4509 # alternative hypothesis: one model is inconsistent phtest(LIFEEX ~ PCGDP + ODA + GINI, dat) # Fixed Effects is the appropriate model for this regression # # Hausman Test # # data: LIFEEX ~ PCGDP + ODA + GINI # chisq = 24.198, df = 3, p-value = 2.272e-05 # alternative hypothesis: one model is inconsistent ``` The tests suggest that both GDP per Capita and ODA are correlated with country-specific unobservables affecting life-expectancy, and overall a fixed-effects model would be appropriate. However, the Hausman test on the GINI index fails to reject: Country specific unobservables affecting average life-expectancy are not necessarily correlated with the level of inequality across countries. Now if we want to include the OECD dummy in the regression, we cannot use fixed-effects as this would wipe-out the dummy as well. If the dummy is uncorrelated with the country-specific unobservables affecting life-expectancy (the $\alpha_i$), then we could use a solution suggested by Mundlak (1978) and simply add between-transformed versions of PCGDP and ODA in the regression (in addition to PCGDP and ODA in levels), and so 'control' for the part of PCGDP and ODA correlated with the $\alpha_i$ (in the IV literature this is known as the control-function approach). If however the OECD dummy is correlated with the $\alpha_i$, then we need to use the Hausman and Taylor (1981) estimator. Below I suggest 2 methods of testing this correlation: ```r # Testing the correlation between OECD dummy and the Between-transformed Life-Expectancy (i.e. not accounting for other covariates) cor.test(dat$OECD, B(dat$LIFEEX)) # -> Significant correlation of 0.21 # # Pearson's product-moment correlation # # data: dat$OECD and B(dat$LIFEEX) # t = 6.797, df = 1066, p-value = 1.774e-11 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # 0.1456048 0.2606109 # sample estimates: # cor # 0.2038109 # Getting the fixed-effects (estimates of alpha_i) from the model (i.e. accounting for the other covariates) fe <- fixef(plm(LIFEEX ~ PCGDP + ODA + GINI, dat, model = "within")) mODA <- fmean(dat$ODA, dat$iso3c) # Again testing the correlation cor.test(fe, mODA[match(names(fe), names(mODA))]) # -> Not Significant.. but probably due to small sample size, the correlation is still 0.13 # # Pearson's product-moment correlation # # data: fe and mODA[match(names(fe), names(mODA))] # t = 1.1218, df = 132, p-value = 0.264 # alternative hypothesis: true correlation is not equal to 0 # 95 percent confidence interval: # -0.07362567 0.26243949 # sample estimates: # cor # 0.09717608 ``` I interpret the test results as rejecting the hypothesis that the dummy is uncorrelated with $\alpha_i$, thus we do have a case for Hausman and Taylor (1981) here: the OECD dummy is a $Z_{2i}$ with $E[Z_{2i}\alpha_i]\neq 0$. The Hausman tests above suggested that the GINI index is the only variable uncorrelated with $\alpha_i$, thus GINI is $X_{1it}$ with $E[X_{1it}\alpha_i] = 0$. Finally PCGDP and ODA jointly constitute $X_{2it}$, where the Hausman tests strongly suggested that $E[X_{2it}\alpha_i] \neq 0$. We do not have a $Z_{1i}$ in this setup, i.e. a time-invariant variable uncorrelated with the $\alpha_i$. The Hausman and Taylor (1981) estimator stipulates that we should instrument the OECD dummy with $X_{1i.}$, the between-transformed GINI index. Let us therefore test the regression of the dummy on this instrument to see of it would be a good (i.e. relevant) instrument: ```r # This computes the regression of OECD on the GINI instrument: Weak IV problem !! fFtest(dat$OECD, B(dat$GINI)) # R-Sq. DF1 DF2 F-Stat. P-value # 0.000 1 1066 0.153 0.695 ``` The 0 R-Squared and the F-Statistic of 0.21 suggest that the instrument is very weak indeed, rubbish to be precise, thus the implementation of the HT estimator below is also a rubbish example, but it is still good for illustration purposes: ```r HT_est(y = dat$LIFEEX, X1 = get_vars(dat, "GINI"), Z2 = get_vars(dat, "OECD"), X2 = get_vars(dat, c("PCGDP","ODA"))) # Intercept GINI PCGDP ODA OECD # 3.638486969 -0.035596160 0.120981946 0.005744747 -5.862368476 ``` Now a central questions is of course: How computationally efficient is this estimator? Let us try to re-run it on the data generated for the benchmark in Part 1: ```r dat <- get_vars(data, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA")) get_vars(dat, 4:7) <- lapply(get_vars(dat, 4:7), log) # Taking logs of the data dat$OECD <- as.numeric(dat$OECD) # Creating OECD dummy dat <- pdata.frame(fdroplevels(na_omit(dat)), # Creating Panel data.frame, after removing missing values index = c("iso3c", "year")) # and dropping unused factor levels pdim(dat) # Unbalanced Panel: n = 13400, T = 1-34, N = 106800 varying(dat) # year OECD PCGDP LIFEEX GINI ODA # TRUE FALSE TRUE TRUE TRUE TRUE library(microbenchmark) microbenchmark(HT_est = HT_est(y = dat$LIFEEX, # The estimator as before X1 = get_vars(dat, "GINI"), Z2 = get_vars(dat, "OECD"), X2 = get_vars(dat, c("PCGDP","ODA"))), HT_est_TFE = HT_est(y = dat$LIFEEX, # Also Projecting out Time-FE X1 = get_vars(dat, "GINI"), Z2 = get_vars(dat, "OECD"), X2 = get_vars(dat, c("PCGDP","ODA")), time.FE = TRUE)) # Unit: milliseconds # expr min lq mean median uq max neval # HT_est 7.919437 8.46937 9.761301 8.869612 9.508597 45.08717 100 # HT_est_TFE 22.501128 23.18640 25.387041 23.469835 24.490612 85.96462 100 ``` At around 100,000 obs and 13,000 groups in an unbalanced panel, the computation involving 3 grouped centering and 1 grouped averaging task as well as 2 list-to matrix conversions and an IV-procedure took about 10 milliseconds with only individual effects, and about 40 - 45 milliseconds with individual and time-fixed effects (projected out iteratively). This should leave some room for running this on much larger data. ## References Hausman J, Taylor W (1981). “Panel Data and Unobservable Individual Effects.†*Econometrica*, 49, 1377–1398. Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†*Econometrica* 46 (1): 69–85. Cochrane, D. & Orcutt, G. H. (1949). "Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Terms". *Journal of the American Statistical Association.* 44 (245): 32–61. Prais, S. J. & Winsten, C. B. (1954). "Trend Estimators and Serial Correlation". *Cowles Commission Discussion Paper No. 383.* Chicago. collapse/inst/doc/collapse_for_tidyverse_users.R0000644000176200001440000001367515202627533021731 0ustar liggesusers## ----echo=FALSE----------------------------------------------------------------------------------- oldopts <- options(width = 100L) ## ----echo = FALSE, message = FALSE, warning=FALSE------------------------------------------------- knitr::opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE, comment = "#", tidy = FALSE, cache = TRUE, collapse = TRUE, fig.width = 8, fig.height = 5, out.width = '100%') ## ------------------------------------------------------------------------------------------------- library(collapse) set_collapse(mask = "manip") # version >= 2.0.0 ## ------------------------------------------------------------------------------------------------- mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), mean), qsec_wt = weighted.mean(qsec, wt)) ## ------------------------------------------------------------------------------------------------- fmean(mtcars$mpg) # Vector fmean(EuStockMarkets) # Matrix fmean(mtcars) # Data Frame fmean(mtcars$mpg, w = mtcars$wt) # Weighted mean fmean(mtcars$mpg, g = mtcars$cyl) # Grouped mean fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt) # Weighted group mean fmean(mtcars[5:10], g = mtcars$cyl, w = mtcars$wt) # Of data frame fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt, TRA = "fill") # Replace data by weighted group mean # etc... ## ------------------------------------------------------------------------------------------------- mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), fmean), qsec_wt = fmean(qsec, wt)) ## ------------------------------------------------------------------------------------------------- mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> select(mpg, carb, hp) |> fmean() ## ------------------------------------------------------------------------------------------------- mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + min(qsec)) # Vectorized ## ------------------------------------------------------------------------------------------------- mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + fmin(qsec)) # Vectorized mtcars |> group_by(cyl) |> summarise(mpg = mean(mpg) + min(qsec)) # Not vectorized ## ------------------------------------------------------------------------------------------------- mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp, qsec, wt) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), fmean), qsec_wt = fmean(qsec, wt)) ## ------------------------------------------------------------------------------------------------- mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |> group_by(cyl, vs, am) |> fmean() ## ------------------------------------------------------------------------------------------------- mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |> group_by(cyl, vs, am, sort = FALSE) |> fmean(nthreads = 3, na.rm = FALSE) ## ------------------------------------------------------------------------------------------------- mtcars |> mutate(mpg_median = fmedian(mpg, list(cyl, vs, am), TRA = "fill")) |> head(3) ## ------------------------------------------------------------------------------------------------- mtcars |> mutate(across(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill")) |> head(2) # Or mtcars |> transformv(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill") |> head(2) ## ------------------------------------------------------------------------------------------------- mtcars |> group_by(cyl, vs, am, return.groups = FALSE) |> mutate(mpg_median = fmedian(mpg), mpg_mean = fmean(mpg), # Or fbetween(mpg) mpg_demean = fwithin(mpg), # Or fmean(mpg, TRA = "-") mpg_scale = fscale(mpg), .keep = "used") |> ungroup() |> head(3) ## ----include = FALSE------------------------------------------------------------------------------ set.seed(101) ## ------------------------------------------------------------------------------------------------- # c = country, s = sector, y = year, v = value exports <- expand.grid(c = paste0("c", 1:8), s = paste0("s", 1:8), y = 1:15) |> mutate(v = round(abs(rnorm(length(c), mean = 5)), 2)) |> subset(-sample.int(length(v), 360)) # Making it unbalanced and irregular head(exports) nrow(exports) ## ------------------------------------------------------------------------------------------------- # Computing Balassa's (1965) RCA index: fast and memory efficient # settfm() modifies exports and assigns it back to the global environment settfm(exports, RCA = fsum(v, list(c, y), TRA = "/") %/=% fsum(fsum(v, y, TRA = "/"), list(s, y), TRA = "fill", set = TRUE)) ## ------------------------------------------------------------------------------------------------- pivot(exports, ids = "c", values = "RCA", names = "s", how = "wider", FUN = "mean", sort = TRUE) ## ------------------------------------------------------------------------------------------------- exports |> mutate(RCA_growth = fgrowth(RCA, g = list(c, s), t = y)) |> pivot(ids = "c", values = "RCA_growth", names = "s", how = "wider", FUN = fmedian, sort = TRUE) ## ------------------------------------------------------------------------------------------------- # Taking the latest observation within the last 3 years exports_latest <- subset(exports, y > 12 & y == fmax(y, list(c, s), "fill"), -y) # How many sectors do we observe for each country in the last 3 years? with(exports_latest, fndistinct(s, c)) ## ------------------------------------------------------------------------------------------------- exports_latest |> mutate(RCA = fsum(v, c, TRA = "/") %/=% fsum(proportions(v), s, TRA = "fill")) |> pivot("c", "RCA", "s", how = "wider", sort = TRUE) ## ----echo=FALSE--------------------------------------------------------------- options(oldopts) collapse/inst/doc/developing_with_collapse.Rmd0000644000176200001440000010020015121640707021307 0ustar liggesusers--- title: "Developing with collapse" subtitle: "Or: How to Code Efficiently in R" author: "Sebastian Krantz" date: "2024-12-30" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{developing with collapse} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Introduction *collapse* offers an integrated suite of C/C++-based statistical and data manipulation functions, many low-level tools for memory efficient programming, and a [class-agnostic architecture](https://fastverse.org/collapse/articles/collapse_object_handling.html) that seamlessly supports vectors, matrices, and data frame-like objects. These features make it an ideal backend for high-performance statistical packages. This vignette is meant to provide some recommendations for developing with *collapse*. It is complementary to the earlier [blog post on programming with *collapse*](https://sebkrantz.github.io/Rblog/2020/09/13/programming-with-collapse/) which readers are also encouraged to consult. The vignette adds 3 important points for writing efficient R/*collapse* code. ## Point 1: Be Minimalistic in Computations *collapse* supports different types of R objects (vectors, matrices, data frames + variants) and it can perform grouped operations on them using different types of grouping information (plain vectors, 'qG'^[Alias for quick-group.] objects, factors, 'GRP' objects, grouped or indexed data frames). Grouping can be sorted or unsorted. A key for very efficient code is to use the minimal required operations/objects to get the job done. Suppose you want to sum an object `x` by groups using a grouping vector `g`. If the grouping is only needed once, this should be done using the internal grouping of `fsum()` without creating external grouping objects - `fsum(x, g)` for aggregation and `fsum(x, g, TRA = "fill")` for expansion: ```r fmean(mtcars$mpg, mtcars$cyl) # 4 6 8 # 26.66364 19.74286 15.10000 fmean(mtcars$mpg, mtcars$cyl, TRA = "fill") # [1] 19.74286 19.74286 26.66364 19.74286 15.10000 19.74286 15.10000 26.66364 26.66364 19.74286 # [11] 19.74286 15.10000 15.10000 15.10000 15.10000 15.10000 15.10000 26.66364 26.66364 26.66364 # [21] 26.66364 15.10000 15.10000 15.10000 15.10000 26.66364 26.66364 26.66364 15.10000 19.74286 # [31] 15.10000 26.66364 ``` The expansion case is very efficient because it internally uses unsorted grouping. Apart from the default sorted aggregation, these functions efficiently convert your input `g` into the minimally required information. In the aggregation case, we can improve performance by also using unsorted grouping, e.g., `fsum(x, qF(g, sort = FALSE))` or `fsum(x, qG(g, sort = FALSE), use.g.names = FALSE)` if the group-names are not needed. It is advisable to also set argument `na.exclude = FALSE` in `qF()`/`qG()` to add a class 'na.included' which precludes internal missing value checks in `fsum()` and friends. If `g` is a plain vector or the first-appearance order of groups should be kept even if `g` is a factor, use `group(g)` instead of `qG(g, sort = FALSE, na.exclude = FALSE)`.^[`group()` directly calls a C-based hashing algorithm which works for all types of vectors and lists of vectors/data frames. Missing values are treated as distinct elements.] Set `use.g.names = FALSE` if not needed (can abbreviate as `use = FALSE`), and, if your data has no missing values, set `na.rm = FALSE` for maximum performance. ```r x <- rnorm(1e7) # 10 million random obs g <- sample.int(1e6, 1e7, TRUE) # 1 Million random groups oldopts <- set_collapse(na.rm = FALSE) # No missing values: maximum performance microbenchmark::microbenchmark( internal = fsum(x, g), internal_expand = fsum(x, g, TRA = "fill"), qF1 = fsum(x, qF(g, sort = FALSE)), qF2 = fsum(x, qF(g, sort = FALSE, na.exclude = FALSE)), qG1 = fsum(x, qG(g, sort = FALSE), use = FALSE), qG2 = fsum(x, qG(g, sort = FALSE, na.exclude = FALSE), use = FALSE), group = fsum(x, group(g), use = FALSE), # Same as above basically GRP1 = fsum(x, GRP(g)), GRP2 = fsum(x, GRP(g, sort = FALSE)), GRP3 = fsum(x, GRP(g, sort = FALSE, return.groups = FALSE), use = FALSE) ) # Unit: milliseconds # expr min lq mean median uq max neval # internal 119.62078 124.61575 133.51499 129.24721 136.84295 187.9376 100 # internal_expand 87.45751 93.53473 101.63398 97.34573 105.04102 195.5121 100 # qF1 98.40816 101.62102 110.80120 105.03839 112.72224 265.5931 100 # qF2 86.75518 89.82823 100.47122 93.89814 103.04776 194.9115 100 # qG1 88.38563 92.44846 103.28242 97.29579 105.35159 202.8058 100 # qG2 72.94851 76.86912 87.05558 79.43137 86.15307 262.4734 100 # group 74.08335 77.19435 87.62058 82.58726 90.61506 162.0318 100 # GRP1 145.13799 149.54178 163.89938 154.71379 164.11361 297.5056 100 # GRP2 95.83557 99.05297 109.58577 103.34950 112.50322 266.9996 100 # GRP3 82.56629 86.15699 97.54058 90.40781 98.05956 328.7744 100 ``` Factors and 'qG' objects are efficient inputs to all statistical/transformation functions except for `fmedian()`, `fnth()`, `fmode()`, `fndistinct()`, and split-apply-combine operations using `BY()`/`gsplit()`. For repeated grouped operations involving those, it makes sense to create 'GRP' objects using `GRP()`. These objects are more expensive to create but provide more complete information.^[See `?GRP`, in particular the 'Value' section.] If sorting is not needed, set `sort = FALSE`, and if aggregation or the unique groups/names are not needed set `return.groups = FALSE`. ```r f <- qF(g); f2 <- qF(g, na.exclude = FALSE) gg <- group(g) # Same as qG(g, sort = FALSE, na.exclude = FALSE) grp <- GRP(g) # Simple functions: factors are efficient inputs microbenchmark::microbenchmark( factor = fsum(x, f), factor_nona = fsum(x, f2), qG_nona = fsum(x, gg), qG_nona_nonam = fsum(x, gg, use = FALSE), GRP = fsum(x, grp), GRP_nonam = fsum(x, grp, use = FALSE) ) # Unit: milliseconds # expr min lq mean median uq max neval # factor 16.02514 16.49498 17.50705 17.11619 18.16497 21.72975 100 # factor_nona 12.72911 13.15124 14.41943 13.87850 15.03540 23.27144 100 # qG_nona 14.30178 14.95450 20.48179 15.67930 17.34989 57.15597 100 # qG_nona_nonam 11.57118 12.00423 13.12157 12.49071 13.61801 23.31219 100 # GRP 12.83345 13.08907 14.45512 13.95154 15.21594 21.46473 100 # GRP_nonam 12.67589 13.22139 14.15271 13.76600 14.84057 20.36359 100 # Complex functions: more information helps microbenchmark::microbenchmark( qG = fmedian(x, gg, use = FALSE), GRP = fmedian(x, grp, use = FALSE), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # qG 258.4450 261.9357 267.2520 264.2608 267.4161 297.1552 10 # GRP 191.8623 193.0631 196.0935 193.4358 194.6245 210.3685 10 set_collapse(oldopts) ``` Why not always use `group()` for unsorted grouping with simple functions? You can do that, but `qF()`/`qG()` are a bit smarter when it comes to handling input factors/'qG' objects whereas `group()` hashes every vector: ```r microbenchmark::microbenchmark( factor_factor = qF(f), # This checks NA's and adds 'na.included' class -> full deep copy factor_factor2 = qF(f, na.exclude = FALSE), # NA checking costs.. incurred in fsum() and friends check_na = collapse:::is.nmfactor(f), check_na2 = collapse:::is.nmfactor(f2), factor_qG = qF(gg), qG_factor = qG(f), qG_qG = qG(gg), group_factor = group(f), group_qG = group(gg) ) # Unit: nanoseconds # expr min lq mean median uq max neval # factor_factor 1107 2562.5 6925.31 7298.0 9676.0 19270 100 # factor_factor2 5926960 6147663.0 6898849.83 6235136.5 6421686.5 15325349 100 # check_na 3440474 3503880.5 3525056.59 3513597.5 3524770.0 3927185 100 # check_na2 287 1496.5 3325.10 3341.5 4243.5 9922 100 # factor_qG 2583 11644.0 15105.63 15887.5 18614.0 31898 100 # qG_factor 1927 4284.5 10171.28 9614.5 13796.5 50799 100 # qG_qG 1476 2583.0 6674.39 6498.5 8897.0 23124 100 # group_factor 16066629 16300165.0 17378151.76 16489011.0 16858872.0 54181582 100 # group_qG 13824175 14194917.5 15083957.81 14347396.5 14700345.0 22289117 100 ``` Only in rare cases are grouped/indexed data frames created with `fgroup_by()`/`findex_by()` needed in package code. Likewise, functions like `fsummarise()`/`fmutate()` are essentially wrappers. For example ```r mtcars |> fgroup_by(cyl, vs, am) |> fsummarise(mpg = fsum(mpg), across(c(carb, hp, qsec), fmean)) # cyl vs am mpg carb hp qsec # 1 4 0 1 26.0 2.000000 91.00000 16.70000 # 2 4 1 0 68.7 1.666667 84.66667 20.97000 # 3 4 1 1 198.6 1.428571 80.57143 18.70000 # 4 6 0 1 61.7 4.666667 131.66667 16.32667 # 5 6 1 0 76.5 2.500000 115.25000 19.21500 # 6 8 0 0 180.6 3.083333 194.16667 17.14250 # 7 8 0 1 30.8 6.000000 299.50000 14.55000 ``` is the same as (again `use = FALSE` abbreviates `use.g.names = FALSE`) ```r g <- GRP(mtcars, c("cyl", "vs", "am")) add_vars(g$groups, get_vars(mtcars, "mpg") |> fsum(g, use = FALSE), get_vars(mtcars, c("carb", "hp", "qsec")) |> fmean(g, use = FALSE) ) # cyl vs am mpg carb hp qsec # 1 4 0 1 26.0 2.000000 91.00000 16.70000 # 2 4 1 0 68.7 1.666667 84.66667 20.97000 # 3 4 1 1 198.6 1.428571 80.57143 18.70000 # 4 6 0 1 61.7 4.666667 131.66667 16.32667 # 5 6 1 0 76.5 2.500000 115.25000 19.21500 # 6 8 0 0 180.6 3.083333 194.16667 17.14250 # 7 8 0 1 30.8 6.000000 299.50000 14.55000 ``` To be clear: nothing prevents you from using these wrappers - they are quite efficient - but if you want to change all inputs programmatically it makes sense to go down one level - your code will also become safer.^[If you do use `fgroup_by()` in a package use it with non-standard evaluation, e.g., `fgroup_by(cyl, vs, am)`. Don't do `ind <- c("cyl", "vs", "am")` and then `fgroup_by(ind)` as the data may contain a column called `ind`. For such cases use `group_by_vars(ind)`.] In general, think carefully about how to vectorize in a minimalistic and memory efficient way. You will find that you can craft very parsimonious and efficient code to solve complicated problems. For example, after merging multiple spatial datasets, I had some of the same map features (businesses) from multiple sources, and, unwilling to match features individually across data sources, I decided to keep the richest source covering each feature type and location. After creating a feature `importance` indicator comparable across sources, the deduplication expression ended up being a single line of the form: `fsubset(data, source == fmode(source, list(location, type), importance, "fill"))` - keep features from the importance-weighted most frequent source by location and type. If an effective *collapse* solution is not apparent, other packages may offer efficient solutions. Check out the [*fastverse*](https://fastverse.org/fastverse/) and its [suggested packages list](https://fastverse.org/fastverse/#suggested-extensions). For example if you want to efficiently replace multiple items in a vector, `kit::vswitch()/nswitch()` can be pretty magical. Also functions like `data.table::set()/rowid()` etc. are great. ## Point 2: Think About Memory and Optimize R programs are inefficient for 2 principal reasons: (1) operations are not vectorized; (2) too many intermediate objects/copies are created. *collapse*'s vectorized statistical functions help with (1), but it also provides many [efficient programming functions](https://fastverse.org/collapse/reference/efficient-programming.html) to deal with (2). One source of inefficiency in R code is the widespread use of logical vectors. For example ```r x <- abs(round(rnorm(1e6))) x[x == 0] <- NA ``` where `x == 0` creates a logical vector of 1 million elements just to indicate to R which elements of `x` are `0`. In *collapse*, `setv(x, 0, NA)` is the efficient equivalent. This also works if we don't want to replace with `NA` but with another vector `y`: ```r y <- rnorm(1e6) setv(x, NA, y) # Replaces missing x with y ``` is much better than ```r x[is.na(x)] <- y[is.na(x)] ``` `setv()` is quite versatile and also works with indices and logical vectors instead of elements to search for. You can also invert the query by setting `invert = TRUE`. In more complex workflows, we may wish to save the logical vector, e.g., `xmiss <- is.na(x)`, and use it repeatedly. One aspect to note here is that logical vectors are inefficient for subsetting compared to indices: ```r xNA <- na_insert(x, prop = 0.4) xmiss <- is.na(xNA) ind <- which(xmiss) bench::mark(x[xmiss], x[ind]) # # A tibble: 2 × 6 # expression min median `itr/sec` mem_alloc `gc/sec` # # 1 x[xmiss] 3.34ms 3.58ms 269. 8.39MB 4.21 # 2 x[ind] 771.74µs 972.11µs 1025. 3.05MB 6.61 ``` Thus, indices are always preferable. With *collapse*, they can be created directly using `whichNA(xNA)` in this case, or `whichv(x, 0)` for `which(x == 0)` or any other number. Also here there exist an `invert = TRUE` argument covering the `!=` case. For convenience, infix operators `x %==% 0` and `x %!=% 0` wrap `whichv(x, 0)` and `whichv(x, 0, invert = TRUE)`, respectively. Similarly, `fmatch()` supports faster matching with associated operators `%iin%` and `%!iin%` which also return indices, e.g., `letters %iin% c("a", "b")` returns `1:2`. This can also be used in subsetting: ```r bench::mark( `%in%` = fsubset(wlddev, iso3c %in% c("USA", "DEU", "ITA", "GBR")), `%iin%` = fsubset(wlddev, iso3c %iin% c("USA", "DEU", "ITA", "GBR")) ) # # A tibble: 2 × 6 # expression min median `itr/sec` mem_alloc `gc/sec` # # 1 %in% 146.8µs 165.7µs 6008. 3.8MB 2.12 # 2 %iin% 17.3µs 23.6µs 39878. 130.4KB 23.9 ``` Likewise, `anyNA(), allNA(), anyv()` and `allv()` help avoid expressions like `any(x == 0)` in favor of `anyv(x, 0)`. Other convenience functions exist such as `na_rm(x)` for the common `x[!is.na(x)]` expression which is extremely inefficient. Another hint here particularly for data frame subsetting is the `ss()` function, which has an argument `check = FALSE` to avoid checks on indices (small effect with this data size): ```r ind <- wlddev$iso3c %!iin% c("USA", "DEU", "ITA", "GBR") microbenchmark::microbenchmark( withcheck = ss(wlddev, ind), nocheck = ss(wlddev, ind, check = FALSE) ) # Unit: microseconds # expr min lq mean median uq max neval # withcheck 48.749 106.6615 124.4366 122.1595 143.8895 256.619 100 # nocheck 47.355 105.5750 126.9225 119.6380 150.8595 344.113 100 ``` Another common source of inefficiencies is copies produced in statistical operations. For example ```r x <- rnorm(100); y <- rnorm(100); z <- rnorm(100) res <- x + y + z # Creates 2 copies ``` For this particular case `res <- kit::psum(x, y, z)` offers an efficient solution^[In general, also see other packages, in particular *kit* and *data.table* for useful programming functions.]. A more general solution is ```r res <- x + y res %+=% z ``` *collapse*'s `%+=%`, `%-=%`, `%*=%` and `%/=%` operators are wrappers around the `setop()` function which also works with matrices and data frames.^[*Note* that infix operators do not obey the rules of arithmetic but are always evaluated from left to right.] This function also has a `rowwise` argument for operations between vectors and matrix/data.frame rows: ```r m <- qM(mtcars) setop(m, "*", seq_col(m), rowwise = TRUE) head(m / qM(mtcars)) # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 1 2 3 4 5 6 7 NaN 9 10 11 # Mazda RX4 Wag 1 2 3 4 5 6 7 NaN 9 10 11 # Datsun 710 1 2 3 4 5 6 7 8 9 10 11 # Hornet 4 Drive 1 2 3 4 5 6 7 8 NaN 10 11 # Hornet Sportabout 1 2 3 4 5 6 7 NaN NaN 10 11 # Valiant 1 2 3 4 5 6 7 8 NaN 10 11 ``` Some functions like `na_locf()`/`na_focb()` also have `set = TRUE` arguments to perform operations by reference.^[Note that `na_locf()`/`na_focb()` are not vectorized across groups, thus, if using them in a grouped `fmutate()` call, adding `set = TRUE` will save some memory on intermediate objects.] There is also `setTRA()` for (grouped) transformations by reference, wrapping `TRA(..., set = TRUE)`. Since `TRA` is added as an argument to all [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), `set = TRUE` can be passed down to modify by reference. For example: ```r fmedian(iris$Sepal.Length, iris$Species, TRA = "fill", set = TRUE) ``` Is the same as `setTRA(iris$Sepal.Length, fmedian(iris$Sepal.Length, iris$Species), "fill", iris$Species)`, replacing the values of the `Sepal.Length` vector with its species median by reference: ```r head(iris) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5 3.5 1.4 0.2 setosa # 2 5 3.0 1.4 0.2 setosa # 3 5 3.2 1.3 0.2 setosa # 4 5 3.1 1.5 0.2 setosa # 5 5 3.6 1.4 0.2 setosa # 6 5 3.9 1.7 0.4 setosa ``` This `set` argument can be invoked anywhere, also inside `fmutate()` calls with/without groups. This can also be done in combination with other transformations (sweeping operations). For example, the following turns the columns of the matrix into proportions. ```r fsum(m, TRA = "/", set = TRUE) fsum(m) # Check # mpg cyl disp hp drat wt qsec vs am gear carb # 1 1 1 1 1 1 1 1 1 1 1 ``` In summary, think what is really needed to complete a task and keep things to a minimum in terms of both computations and memory. Let's do a final exercise in this regard and create a hyper-efficient function for univariate linear regression by groups: ```r greg <- function(y, x, g) { g <- group(g) dmx <- fmean(x, g, TRA = "-", na.rm = FALSE) (fsum(y, g, dmx, use = FALSE, na.rm = FALSE) %/=% fsum(dmx, g, dmx, use = FALSE, na.rm = FALSE)) } # Test y <- rnorm(1e7) x <- rnorm(1e7) g <- sample.int(1e6, 1e7, TRUE) microbenchmark::microbenchmark(greg(y, x, g), group(g)) # Unit: milliseconds # expr min lq mean median uq max neval # greg(y, x, g) 131.39639 138.68961 153.1586 145.78243 161.48137 305.5862 100 # group(g) 62.41733 64.80468 72.2558 68.87266 73.21657 153.1643 100 ``` The expression computed by `greg()` amounts to `sum(y * (x - mean(x)))/sum((x - mean(x))^2)` for each group, which is equivalent to `cov(x, y)/var(x)`, but very efficient, requiring exactly one full copy of `x` to create a group-demeaned vector, `dmx`, and then using the `w` (weights) argument to `fsum()` to sum the products (`y * dmx` and `dmx * dmx`) on the fly, including a division by reference avoiding an additional copy. One cannot do much better coding a grouped regression directly in C. ## Point 3: Internally Favor Primitive R Objects and Functions This partly reiterates Point 1 but now with a focus on internal data representation rather than grouping and computing. The point could also be bluntly stated as: 'vectors, matrices and lists are good, data frames and complex objects are bad'. Many frameworks seem to imply the opposite - the *tidyverse* encourages you to cast your data as a tidy tibble, and *data.table* offers you a more efficient data frame. But these objects are internally complex, and, in the case of *data.table*, only efficient because of the internal C-level algorithms for large-data manipulation. You should always take a step back to ask yourself: for the statistical software I am writing, do I need this complexity? Complex objects require complex methods to manipulate them, thus, when using them, you incur the cost of everything that goes on in these methods. Vectors, matrices, and lists are much more efficient in R and *collapse* provides you with many options to manipulate them directly. It may surprise you to hear that, internally, *collapse* does not use data frame-like objects at all. Instead, such objects are cast to lists using `unclass(data)`, `class(data) <- NULL`, or `attributes(data) <- NULL`. This is advisable if you want to write fast package code for data frame-like objects. The benchmark below illustrates that basically everything you do on a *data.frame* is more expensive than on the equivalent list. ```r l <- unclass(mtcars) nam <- names(mtcars) microbenchmark::microbenchmark(names(mtcars), attr(mtcars, "names"), names(l), names(mtcars) <- nam, attr(mtcars, "names") <- nam, names(l) <- nam, mtcars[["mpg"]], .subset2(mtcars, "mpg"), l[["mpg"]], mtcars[3:8], .subset(mtcars, 3:8), l[3:8], ncol(mtcars), length(mtcars), length(unclass(mtcars)), length(l), nrow(mtcars), length(.subset2(mtcars, 1L)), length(l[[1L]])) # Unit: nanoseconds # expr min lq mean median uq max neval # names(mtcars) 164 205 240.26 246 246.0 410 100 # attr(mtcars, "names") 41 82 109.88 82 123.0 1476 100 # names(l) 0 0 24.60 41 41.0 82 100 # names(mtcars) <- nam 451 492 651.90 656 697.0 3321 100 # attr(mtcars, "names") <- nam 287 369 480.52 451 492.0 4346 100 # names(l) <- nam 164 246 276.34 246 287.0 533 100 # mtcars[["mpg"]] 2009 2091 2363.65 2173 2296.0 15539 100 # .subset2(mtcars, "mpg") 41 41 68.88 82 82.0 164 100 # l[["mpg"]] 41 82 78.31 82 82.0 205 100 # mtcars[3:8] 5166 5371 5607.98 5453 5576.0 15908 100 # .subset(mtcars, 3:8) 246 246 321.03 287 328.0 2788 100 # l[3:8] 246 287 305.45 287 328.0 492 100 # ncol(mtcars) 1025 1107 1200.07 1189 1230.0 2255 100 # length(mtcars) 164 205 249.28 246 266.5 492 100 # length(unclass(mtcars)) 123 164 176.71 164 164.0 861 100 # length(l) 0 0 18.86 0 41.0 287 100 # nrow(mtcars) 1025 1107 1239.84 1148 1230.0 6642 100 # length(.subset2(mtcars, 1L)) 41 82 113.57 82 123.0 1845 100 # length(l[[1L]]) 41 82 100.45 82 123.0 492 100 ``` By means of further illustration, let's recreate the `pwnobs()` function in *collapse* which counts pairwise missing values. The list method is written in R. A basic implementation is:^[By Point 2 this implementation is not ideal because I am creating two logical vectors for each iteration of the inner loop, but I currently don't see any way to write this more efficiently.] ```r pwnobs_list <- function(X) { dg <- fnobs(X) n <- ncol(X) nr <- nrow(X) N.mat <- diag(dg) for (i in 1:(n - 1L)) { miss <- is.na(X[[i]]) for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - sum(miss | is.na(X[[j]])) } rownames(N.mat) <- names(dg) colnames(N.mat) <- names(dg) N.mat } mtcNA <- na_insert(mtcars, prop = 0.2) pwnobs_list(mtcNA) # mpg cyl disp hp drat wt qsec vs am gear carb # mpg 26 20 20 20 20 20 21 22 21 21 22 # cyl 20 26 21 20 22 21 22 22 22 23 20 # disp 20 21 26 22 22 23 22 22 21 21 22 # hp 20 20 22 26 21 23 22 20 20 21 21 # drat 20 22 22 21 26 23 21 21 20 21 21 # wt 20 21 23 23 23 26 22 21 21 20 20 # qsec 21 22 22 22 21 22 26 22 20 22 20 # vs 22 22 22 20 21 21 22 26 20 23 21 # am 21 22 21 20 20 21 20 20 26 20 21 # gear 21 23 21 21 21 20 22 23 20 26 20 # carb 22 20 22 21 21 20 20 21 21 20 26 ``` Now with the above tips we can optimize this as follows: ```r pwnobs_list_opt <- function(X) { dg <- fnobs.data.frame(X) class(X) <- NULL n <- length(X) nr <- length(X[[1L]]) N.mat <- diag(dg) for (i in 1:(n - 1L)) { miss <- is.na(X[[i]]) for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - sum(miss | is.na(X[[j]])) } dimnames(N.mat) <- list(names(dg), names(dg)) N.mat } identical(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA)) # [1] TRUE microbenchmark::microbenchmark(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA)) # Unit: microseconds # expr min lq mean median uq max neval # pwnobs_list(mtcNA) 153.217 160.1255 185.09696 179.744 215.004 241.654 100 # pwnobs_list_opt(mtcNA) 27.429 31.1600 33.38507 32.964 35.137 45.387 100 ``` Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what's going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code. If the latter is your intent the [vignette on *collapse*'s object handling](https://fastverse.org/collapse/articles/collapse_object_handling.html) will also be helpful. If you only use *collapse* functions this discussion is void - all *collapse* functions designed for data frames, including `join()`, `pivot()`, `fsubset()`, etc., internally handle your data as a list and are equally efficient on data frames and lists. However, if you want to use base R semantics (`[`, etc.) alongside *collapse* and other functions, it makes sense to unclass incoming data frame-like objects and reclass them at the end. If you don't want to internally convert data frames to lists, at least use functions `.subset()`, `.subset2()`, or `collapse::get_vars()` to efficiently extract columns and `attr()` to extract/set attributes. With matrices, use `dimnames()` directly instead of `rownames()` and `colnames()` which wrap it. Also avoid `as.data.frame()` and friends to coerce/recreate data frame-like objects. It is quite easy to construct a *data.frame* from a list: ```r attr(l, "row.names") <- .set_row_names(length(l[[1L]])) class(l) <- "data.frame" head(l, 2) # mpg cyl disp hp drat wt qsec vs am gear carb # 1 21 6 160 110 3.9 2.620 16.46 0 1 4 4 # 2 21 6 160 110 3.9 2.875 17.02 0 1 4 4 ``` You can also use *collapse* functions `qDF()`, `qDT()` and `qTBL()` to efficiently convert/create *data.frame*'s, *data.table*'s, and *tibble*'s: ```r library(data.table) library(tibble) microbenchmark::microbenchmark(qDT(mtcars), as.data.table(mtcars), qTBL(mtcars), as_tibble(mtcars)) # Unit: microseconds # expr min lq mean median uq max neval # qDT(mtcars) 2.952 3.280 6.35705 3.5670 3.8130 269.534 100 # as.data.table(mtcars) 34.194 36.572 44.93641 37.4535 39.2985 697.410 100 # qTBL(mtcars) 2.419 2.583 3.19267 2.8700 2.9930 38.704 100 # as_tibble(mtcars) 48.257 49.569 71.56304 50.4095 52.5005 2050.533 100 l <- unclass(mtcars) microbenchmark::microbenchmark(qDF(l), as.data.frame(l), as.data.table(l), as_tibble(l)) # Unit: microseconds # expr min lq mean median uq max neval # qDF(l) 1.722 2.2140 4.51779 2.4600 2.747 199.424 100 # as.data.frame(l) 210.412 225.1515 242.65973 248.3370 254.569 301.186 100 # as.data.table(l) 70.889 77.2030 90.30086 83.0045 88.683 798.393 100 # as_tibble(l) 55.350 61.8690 68.20924 67.0760 72.898 139.769 100 ``` *collapse* also provides functions like `setattrib()`, `copyMostAttrib()`, etc., to efficiently attach attributes again. So another efficient workflow for general data frame-like objects is to save the attributes `ax <- attributes(data)`, manipulate it as a list `attributes(data) <- NULL`, modify `ax$names` and `ax$row.names` as needed and then use `setattrib(data, ax)` before returning. ## Some Notes on Global Options *collapse* has its own set of global options which can be set using `set_collapse()` and retrieved using `get_collapse()`.^[This is done mainly for efficiency reasons, but also do implement advanced options such as namespace masking (options `mask` and `remove`). The options are stored in an internal environment called `.op` visible in the documentation of some functions such as `fmean()` when used to set argument defaults.] This confers responsibilities upon package developers as setting these options inside a package also affects how *collapse* behaves outside of your package. In general, the same rules apply as for setting other R options through `options()` or `par()`: they need to be reset using `on.exit()` so that the user choices are unaffected even if your package function breaks. For example, if you want a block of code multithreaded and without missing value skipping for maximum performance: ```r fast_function <- function(x, ...) { # Your code... oldopts <- set_collapse(nthreads = 4, na.rm = FALSE) on.exit(set_collapse(oldopts)) # Multithreaded code... } ``` Namespace masking (options `mask` and `remove`) should not be set inside packages because it may have unintended side-effects for the user (e.g., *collapse* appears at the top of the `search()` path afterwards). Conversely, user choices in `set_collapse()` also affect your package code, except for namespace masking as you should specify explicitly which *collapse* functions you are using (e.g., via `importFrom("collapse", "fmean")` in NAMESPACE or `collapse::fmean()` in your code). Particularly options `na.rm`, `nthreads`, and `sort`, if set by the user, will impact your code, unless you explicitly set the targeted arguments (e.g., `nthreads` and `na.rm` in statistical functions like `fmean()`, and `sort` arguments in grouping functions like `GRP()`/`qF()`/`qG()`/`fgroup_by()`). My general view is that this is not necessary - if the user sets `set_collapse(na.rm = FALSE)` because data has no missing values, then it is good if that also speeds up your package functions. However, if your package code generates missing values and expects *collapse* functions to skip them you should take care of this using either `set_collapse()` + `on.exit()` or explicitly setting `na.rm = TRUE` in all relevant functions. Also watch out for internally-grouped aggregations using [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), which are affected by global defaults: ```r fmean(mtcars$mpg, mtcars$cyl) # 4 6 8 # 26.66364 19.74286 15.10000 oldopts <- set_collapse(sort = FALSE) fmean(mtcars$mpg, mtcars$cyl) # 6 4 8 # 19.74286 26.66364 15.10000 ``` Statistical functions do not have `sort` arguments, thus, if it is crucial that the output remains sorted, ensure that a sorted factor, 'qG', or 'GRP' object is passed: ```r fmean(mtcars$mpg, qF(mtcars$cyl, sort = TRUE)) # 4 6 8 # 26.66364 19.74286 15.10000 set_collapse(oldopts) ``` Of course, you can also check which options the user has set and adjust your code, e.g. ```r # Your code ... if(!get_collapse("sort")) { oldopts <- set_collapse(sort = TRUE) on.exit(set_collapse(oldopts)) } # Critical code ... ``` ## Conclusion *collapse* can become a game-changer for your statistical software development in R, enabling you to write programs that effectively run like C while accomplishing complex statistical/data tasks with few lines of code. This however requires taking a closer look at the package, in particular the [documentation](https://fastverse.org/collapse/reference/collapse-documentation.html), and following the advice given in this vignette. collapse/inst/doc/collapse_and_data.table.Rmd0000644000176200001440000021621215121640575020757 0ustar liggesusers--- title: "collapse and data.table" subtitle: "Harmony and High Performance" author: "Sebastian Krantz" date: "2021-06-27" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and data.table} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This vignette focuses on using *collapse* with the popular *data.table* package by Matt Dowle and Arun Srinivasan. In contrast to *dplyr* and *plm* whose methods ('grouped_df', 'pseries', 'pdata.frame') *collapse* supports, the integration between *collapse* and *data.table* is hidden in the 'data.frame' methods and *collapse*'s C code. From version 1.6.0 *collapse* seamlessly handles *data.tables*, permitting reference operations (`set*`, `:=`) on data tables created with collapse (`qDT`) or returned from *collapse*'s data manipulation functions (= all functions except `.FAST_FUN`, `.OPERATOR_FUN`, `BY` and `TRA`, see the [NEWS]() for details on the low-level integration). Apart from *data.table* reference semantics, both packages work similarly on the C/C++ side of things, and nicely complement each other in functionality. ## Overview of Both Packages Both *data.table* and *collapse* are high-performance packages that work well together. For effective co-use it is helpful to understand where each has its strengths, what one can do what the other cannot, and where they overlap. Therefore this small comparison: * *data.table* offers an enhanced data frame based class to contain data (including list columns). For this class it provides a concise data manipulation syntax which also includes fast aggregation / slit-apply-combine computing, (rolling, non-equi) joins, keying, reshaping, some time-series functionality like lagging and rolling statistics, set operations on tables and a number of very useful other functions like the fast csv reader, fast switches, list-transpose etc.. *data.table* makes data management, and computations on data very easy and scalable, supporting huge datasets in a very memory efficient way. The package caters well to the end user by compressing an enormous amount of functionality into two square brackets `[]`. Some of the exported functions are great for programming and also support other classes, but a lot of the functionality and optimization of *data.table* happens under the hood and can only be accessed through the non-standard evaluation table `[i, j, by]` syntax. This syntax has a cost of about 1-3 milliseconds for each call. Memory efficiency and thread-parallelization make *data.table* the star performer on huge data. * *collapse* is class-agnostic in nature, supporting vectors, matrices, data frames and non-destructively handling most R classes and objects. It focuses on advanced statistical computing, proving fast column-wise grouped and weighted statistical functions, fast and complex data aggregation and transformations, linear fitting, time series and panel data computations, advanced summary statistics, and recursive processing of lists of data objects. It also includes powerful functions for data manipulation, grouping / factor generation, recoding, handling outliers and missing values. The package default for missing values is `na.rm = TRUE`, which is implemented efficiently in C/C++ in all functions. *collapse* supports both *tidyverse* (piped) and base R / standard evaluation programming. It makes accessible most of it's internal C/C++ based functionality (like grouping objects). *collapse*'s R functions are simple and strongly optimized, i.e. they access the serial C/C++ code quickly, resulting in baseline execution speeds of 10-50 microseconds. All of this makes *collapse* ideal for advanced statistical computing on matrices and larger datasets, and tasks requiring fast programs with repeated function executions. ## Interoperating and some Do's and Dont's Applying *collapse* functions to a data.table always gives a data.table back e.g. ```r library(collapse) library(magrittr) library(data.table) DT <- qDT(wlddev) # collapse::qDT converts objects to data.table using a shallow copy DT %>% gby(country) %>% gv(9:13) %>% fmean # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 # Same thing, but notice that fmean give's NA's for missing countries DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13] # Key: # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NaN 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NaN NaN NaN 43115.10 # 5: Andorra 40083.0911 NaN NaN NaN 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NaN NaN 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 # This also works without magrittr pipes with the collap() function collap(DT, ~ country, fmean, cols = 9:13) # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` By default, *collapse* orders groups in aggregations, which is equivalent to using `keyby` with *data.table*. `gby / fgroup_by` has an argument `sort = FALSE` to yield an unordered grouping equivalent to *data.table*'s `by` on character data^[Grouping on numeric variables in *collapse* is always ordered.]. At this data size *collapse* outperforms *data.table* (which might reverse as data size grows, depending in your computer, the number of *data.table* threads used, and the function in question): ```r library(microbenchmark) microbenchmark(collapse = DT %>% gby(country) %>% get_vars(9:13) %>% fmean, data.table = DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 203.073 212.7285 223.4156 217.1565 225.6230 475.559 100 # data.table 758.623 777.4010 929.5450 793.1655 854.4605 2292.515 100 ``` It is critical to never do something like this: ```r DT[, lapply(.SD, fmean), keyby = country, .SDcols = 9:13] # Key: # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` The reason is that *collapse* functions are S3 generic with methods for vectors, matrices and data frames among others. So you incur a method-dispatch for every column and every group the function is applied to. ```r fmean # function (x, ...) # UseMethod("fmean") # # methods(fmean) # [1] fmean.data.frame* fmean.default* fmean.grouped_df* fmean.list* fmean.matrix* # [6] fmean.units* fmean.zoo* # see '?methods' for accessing help and source code ``` You may now contend that `base::mean` is also S3 generic, but in this `DT[, lapply(.SD, mean, na.rm = TRUE), by = country, .SDcols = 9:13]` code *data.table* does not use `base::mean`, but `data.table:::gmean`, an internal optimized mean function which is efficiently applied over those groups (see `?data.table::GForce`). `fmean` works similar, and includes this functionality explicitly. ```r args(fmean.data.frame) # function (x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], # use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], # ...) # NULL ``` Here we can see the `x` argument for the data, the `g` argument for grouping vectors, a weight vector `w`, different options `TRA` to transform the original data using the computed means, and some functionality regarding missing values (default: removed / skipped), group names (which are added as row-names to a data frame, but not to a *data.table*) etc. So we can also do ```r fmean(gv(DT, 9:13), DT$country) # PCGDP LIFEEX GINI ODA POP # # 1: 483.8351 49.19717 NA 1487548499 18362258.22 # 2: 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: 10071.0659 NA NA NA 43115.10 # 5: 40083.0911 NA NA NA 51547.35 # --- # 212: 35629.7336 73.71292 NA NA 92238.53 # 213: 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: 1219.4360 54.53360 45.93333 397104997 9402160.33 # Or g <- GRP(DT, "country") add_vars(g[["groups"]], fmean(gv(DT, 9:13), g)) # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` To give us the same result obtained through the high-level functions `gby / fgroup_by` or `collap`. This is however not what *data.table* is doing in `DT[, lapply(.SD, fmean), by = country, .SDcols = 9:13]`. Since `fmean` is not a function it recognizes and is able to optimize, it does something like this, ```r BY(gv(DT, 9:13), g, fmean) # using collapse::BY # PCGDP LIFEEX GINI ODA POP # # 1: 483.8351 49.19717 NA 1487548499 18362258.22 # 2: 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: 10071.0659 NA NA NA 43115.10 # 5: 40083.0911 NA NA NA 51547.35 # --- # 212: 35629.7336 73.71292 NA NA 92238.53 # 213: 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` which applies `fmean` to every group in every column of the data. More generally, it is very important to understand that *collapse* is not based around applying functions to data by groups using some universal mechanism: The *dplyr* `data %>% group_by(...) %>% summarize(...) / mutate(...)` and *data.table* `[i, j, by]` syntax are essentially universal mechanisms to apply any function to data by groups. *data.table* additionally internally optimizes some functions (`min, max, mean, median, var, sd, sum, prod, first, last, head, tail`) which they called GForce, `?data.table::GForce`. *collapse* instead provides grouped statistical and transformation functions where all grouped computation is done efficiently in C++, and some supporting mechanisms (`fgroup_by`, `collap`) to operate them. In *data.table* words, everything^[Apart from `collapse::BY` which is only an auxiliary function written in base R to perform flexible split-apply combine computing on vectors, matrices and data frames.] in *collapse*, the *Fast Statistical Functions*, data transformations, time series etc. is GForce optimized. The full set of optimized grouped statistical and transformation functions in *collapse* is: ```r .FAST_FUN # [1] "fmean" "fmedian" "fmode" "fsum" "fprod" "fsd" "fvar" # [8] "fmin" "fmax" "fnth" "ffirst" "flast" "fnobs" "fndistinct" # [15] "fcumsum" "fscale" "fbetween" "fwithin" "fhdbetween" "fhdwithin" "flag" # [22] "fdiff" "fgrowth" ``` Additional optimized grouped functions include `TRA`, `qsu`, `varying`, `fFtest`, `psmat`, `psacf`, `pspacf`, `psccf`. The nice thing about those GForce (fast) functions provided by *collapse* is that they can be accessed explicitly and programmatically without any overhead as incurred through *data.table*, they cover a broader range of statistical operations (such as mode, distinct values, order statistics), support sampling weights, operate in a class-agnostic way on vectors, matrices, data.frame's and many related classes, and cover transformations (replacing and sweeping, scaling, (higher order) centering, linear fitting) and time series functionality (lags, differences and growth rates, including irregular time series and unbalanced panels). So if we would want to use `fmean` inside the *data.table*, we should do something like this: ```r # This does not save the grouping columns, we are simply passing a grouping vector to g # and aggregating the subset of the data table (.SD). DT[, fmean(.SD, country), .SDcols = 9:13] # PCGDP LIFEEX GINI ODA POP # # 1: 483.8351 49.19717 NA 1487548499 18362258.22 # 2: 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: 10071.0659 NA NA NA 43115.10 # 5: 40083.0911 NA NA NA 51547.35 # --- # 212: 35629.7336 73.71292 NA NA 92238.53 # 213: 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: 1219.4360 54.53360 45.93333 397104997 9402160.33 # If we want to keep the grouping columns, we need to group .SD first. DT[, fmean(gby(.SD, country)), .SDcols = c(1L, 9:13)] # country PCGDP LIFEEX GINI ODA POP # # 1: Afghanistan 483.8351 49.19717 NA 1487548499 18362258.22 # 2: Albania 2819.2400 71.68027 31.41111 312928126 2708297.17 # 3: Algeria 3532.2714 63.56290 34.36667 612238500 25305290.68 # 4: American Samoa 10071.0659 NA NA NA 43115.10 # 5: Andorra 40083.0911 NA NA NA 51547.35 # --- # 212: Virgin Islands (U.S.) 35629.7336 73.71292 NA NA 92238.53 # 213: West Bank and Gaza 2388.4348 71.60780 34.52500 1638581462 3312289.13 # 214: Yemen, Rep. 1069.6596 52.53707 35.46667 859950996 13741375.82 # 215: Zambia 1318.8627 51.09263 52.68889 734624330 8614972.38 # 216: Zimbabwe 1219.4360 54.53360 45.93333 397104997 9402160.33 ``` Needless to say this kind of programming seems a bit arcane, so there is actually not that great of a scope to use collapse's *Fast Statistical Functions* for aggregations inside *data.table*. I drive this point home with a benchmark: ```r microbenchmark(collapse = DT %>% gby(country) %>% get_vars(9:13) %>% fmean, data.table = DT[, lapply(.SD, mean, na.rm = TRUE), keyby = country, .SDcols = 9:13], data.table_base = DT[, lapply(.SD, base::mean, na.rm = TRUE), keyby = country, .SDcols = 9:13], hybrid_bad = DT[, lapply(.SD, fmean), keyby = country, .SDcols = 9:13], hybrid_ok = DT[, fmean(gby(.SD, country)), .SDcols = c(1L, 9:13)]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 207.419 234.9915 322.3994 255.6760 283.6790 1685.305 100 # data.table 755.630 845.7685 1029.9024 904.6650 962.1060 2409.529 100 # data.table_base 2795.257 3148.4310 4034.2081 3349.8025 3561.9570 37919.916 100 # hybrid_bad 2198.994 2481.3815 3737.1102 2650.5680 2909.4215 62158.747 100 # hybrid_ok 374.699 451.1025 564.6873 484.9275 542.8605 2082.554 100 ``` It is evident that *data.table* has some overhead, so there is absolutely no need to do this kind of syntax manipulation. There is more scope to use *collapse* transformation functions inside *data.table*. Below some basic examples: ```r # Computing a column containing the sum of ODA received by country DT[, sum_ODA := sum(ODA, na.rm = TRUE), by = country] # Same using fsum; "replace_fill" overwrites missing values, "replace" keeps the DT[, sum_ODA := fsum(ODA, country, TRA = "replace_fill")] # Same: A native collapse solution using settransform (or its shortcut form) settfm(DT, sum_ODA = fsum(ODA, country, TRA = "replace_fill")) # settfm may be more convenient than `:=` for multiple column modifications, # each involving a different grouping: # This computes the percentage of total ODA distributed received by # each country both over time and within a given year settfm(DT, perc_c_ODA = fsum(ODA, country, TRA = "%"), perc_y_ODA = fsum(ODA, year, TRA = "%")) ``` The `TRA` argument is available to all *Fast Statistical Functions* (see the macro `.FAST_STAT_FUN`) and offers 10 different replacing and sweeping operations. Note that `TRA()` can also be called directly to replace or sweep with a previously aggregated *data.table*. A set of operators `%rr%`, `%r+%`, `%r-%`, `%r*%`, `%r/%`, `%cr%`, `%c+%`, `%c-%`, `%c*%`, `%c/%` additionally facilitate row- or column-wise replacing or sweeping out vectors of statistics or other *data.table*'s. Similarly, we can use the following vector valued functions ```r setdiff(.FAST_FUN, .FAST_STAT_FUN) # [1] "fcumsum" "fscale" "fbetween" "fwithin" "fhdbetween" "fhdwithin" "flag" # [8] "fdiff" "fgrowth" ``` for very efficient data transformations: ```r # Centering GDP DT[, demean_PCGDP := PCGDP - mean(PCGDP, na.rm = TRUE), by = country] DT[, demean_PCGDP := fwithin(PCGDP, country)] # Lagging GDP DT[order(year), lag_PCGDP := shift(PCGDP, 1L), by = country] DT[, lag_PCGDP := flag(PCGDP, 1L, country, year)] # Computing a growth rate DT[order(year), growth_PCGDP := (PCGDP / shift(PCGDP, 1L) - 1) * 100, by = country] DT[, lag_PCGDP := fgrowth(PCGDP, 1L, 1L, country, year)] # 1 lag, 1 iteration # Several Growth rates DT[order(year), paste0("growth_", .c(PCGDP, LIFEEX, GINI, ODA)) := (.SD / shift(.SD, 1L) - 1) * 100, by = country, .SDcols = 9:13] # Same thing using collapse DT %<>% tfm(gv(., 9:13) %>% fgrowth(1L, 1L, country, year) %>% add_stub("growth_")) # Or even simpler using settransform and the Growth operator settfmv(DT, 9:13, G, 1L, 1L, country, year, apply = FALSE) head(DT) # country iso3c date year decade region income OECD PCGDP LIFEEX GINI # # 1: Afghanistan AFG 1961-01-01 1960 1960 South Asia Low income FALSE NA 32.446 NA # 2: Afghanistan AFG 1962-01-01 1961 1960 South Asia Low income FALSE NA 32.962 NA # 3: Afghanistan AFG 1963-01-01 1962 1960 South Asia Low income FALSE NA 33.471 NA # 4: Afghanistan AFG 1964-01-01 1963 1960 South Asia Low income FALSE NA 33.971 NA # 5: Afghanistan AFG 1965-01-01 1964 1960 South Asia Low income FALSE NA 34.463 NA # 6: Afghanistan AFG 1966-01-01 1965 1960 South Asia Low income FALSE NA 34.948 NA # ODA POP sum_ODA perc_c_ODA perc_y_ODA demean_PCGDP lag_PCGDP growth_PCGDP # # 1: 116769997 8996973 89252909923 0.1308305 0.4441407 NA NA NA # 2: 232080002 9169410 89252909923 0.2600251 0.7356654 NA NA NA # 3: 112839996 9351441 89252909923 0.1264272 0.3494956 NA NA NA # 4: 237720001 9543205 89252909923 0.2663443 0.7003399 NA NA NA # 5: 295920013 9744781 89252909923 0.3315522 0.8570540 NA NA NA # 6: 341839996 9956320 89252909923 0.3830015 0.8992630 NA NA NA # growth_LIFEEX growth_GINI growth_ODA growth_POP G1.PCGDP G1.LIFEEX G1.GINI G1.ODA G1.POP # # 1: NA NA NA NA NA NA NA NA NA # 2: 1.590335 NA 98.74969 1.916611 NA 1.590335 NA 98.74969 1.916611 # 3: 1.544202 NA -51.37884 1.985199 NA 1.544202 NA -51.37884 1.985199 # 4: 1.493830 NA 110.66998 2.050636 NA 1.493830 NA 110.66998 2.050636 # 5: 1.448294 NA 24.48259 2.112246 NA 1.448294 NA 24.48259 2.112246 # 6: 1.407306 NA 15.51770 2.170793 NA 1.407306 NA 15.51770 2.170793 ``` Since transformations (`:=` operations) are not highly optimized in *data.table*, *collapse* will be faster in most circumstances. Also time series functionality in *collapse* is significantly faster as it does not require data to be ordered or balanced to compute. For example `flag` computes an ordered lag without sorting the entire data first. ```r # Lets generate a large dataset and benchmark this stuff DT_large <- replicate(1000, qDT(wlddev), simplify = FALSE) %>% lapply(tfm, country = paste(country, rnorm(1))) %>% rbindlist # 12.7 million Obs fdim(DT_large) # [1] 13176000 13 microbenchmark( S1 = DT_large[, sum_ODA := sum(ODA, na.rm = TRUE), by = country], S2 = DT_large[, sum_ODA := fsum(ODA, country, TRA = "replace_fill")], S3 = settfm(DT_large, sum_ODA = fsum(ODA, country, TRA = "replace_fill")), W1 = DT_large[, demean_PCGDP := PCGDP - mean(PCGDP, na.rm = TRUE), by = country], W2 = DT_large[, demean_PCGDP := fwithin(PCGDP, country)], L1 = DT_large[order(year), lag_PCGDP := shift(PCGDP, 1L), by = country], L2 = DT_large[, lag_PCGDP := flag(PCGDP, 1L, country, year)], L3 = DT_large[, lag_PCGDP := shift(PCGDP, 1L), by = country], # Not ordered L4 = DT_large[, lag_PCGDP := flag(PCGDP, 1L, country)], # Not ordered times = 5 ) # Unit: milliseconds # expr min lq mean median uq max neval # S1 343.03396 347.18443 391.7494 364.51431 379.7866 524.2279 5 # S2 100.52544 101.72645 165.8369 128.76042 153.6818 344.4906 5 # S3 98.48249 104.80830 120.3499 114.20591 127.0192 157.2335 5 # W1 913.00883 1009.29930 1071.0633 1035.74446 1104.7680 1292.4957 5 # W2 99.48199 99.69654 110.0907 113.95884 118.5229 118.7931 5 # L1 1812.59987 1822.58026 1896.8809 1905.67377 1942.9434 2000.6074 5 # L2 110.36056 128.45845 135.0995 133.80219 139.1405 163.7357 5 # L3 611.28392 665.22123 768.0616 718.38679 803.7170 1041.6991 5 # L4 64.26369 66.99006 105.7952 84.26537 106.1809 207.2758 5 rm(DT_large) gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3113072 166.3 8413097 449.4 NA 8413097 449.4 # Vcells 7897587 60.3 324289364 2474.2 16384 405361681 3092.7 ``` ## Further *collapse* features supporting *data.table*'s As mentioned, `qDT` is a flexible and very fast function to create / column-wise convert R objects to *data.table*'s. You can also row-wise convert a matrix to data.table using `mrtl`: ```r # Creating a matrix from mtcars m <- qM(mtcars) str(m) # num [1:32, 1:11] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ... # ..$ : chr [1:11] "mpg" "cyl" "disp" "hp" ... # Demonstrating another nice feature of qDT qDT(m, row.names.col = "car") %>% head # car mpg cyl disp hp drat wt qsec vs am gear carb # # 1: Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 # 2: Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 # 3: Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 # 4: Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 # 5: Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 # 6: Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 # Row-wise conversion to data.table mrtl(m, names = TRUE, return = "data.table") %>% head(2) # Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout Valiant Duster 360 Merc 240D # # 1: 21 21 22.8 21.4 18.7 18.1 14.3 24.4 # 2: 6 6 4.0 6.0 8.0 6.0 8.0 4.0 # Merc 230 Merc 280 Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood # # 1: 22.8 19.2 17.8 16.4 17.3 15.2 10.4 # 2: 4.0 6.0 6.0 8.0 8.0 8.0 8.0 # Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla Toyota Corona # # 1: 10.4 14.7 32.4 30.4 33.9 21.5 # 2: 8.0 8.0 4.0 4.0 4.0 4.0 # Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa # # 1: 15.5 15.2 13.3 19.2 27.3 26 30.4 # 2: 8.0 8.0 8.0 8.0 4.0 4 4.0 # Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E # # 1: 15.8 19.7 15 21.4 # 2: 8.0 6.0 8 4.0 ``` The computational efficiency of these functions makes them very useful to use in *data.table* based workflows. ```r # Benchmark microbenchmark(qDT(m, "car"), mrtl(m, TRUE, "data.table")) # Unit: microseconds # expr min lq mean median uq max neval # qDT(m, "car") 4.838 5.043 6.16230 5.3300 6.437 20.049 100 # mrtl(m, TRUE, "data.table") 3.608 3.854 4.23981 3.9975 4.182 15.908 100 ``` For example we could regress the growth rate of GDP per capita on the Growth rate of life expectancy in each country and save results in a *data.table*: ```r library(lmtest) wlddev %>% fselect(country, PCGDP, LIFEEX) %>% # This counts missing values on PCGDP and LIFEEX only na_omit(cols = -1L) %>% # This removes countries with less than 20 observations fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% # Run estimations by country using data.table .[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX))), "Coef"), keyby = country] %>% head # Key: # country Coef Estimate Std. Error t value Pr(>|t|) # # 1: Albania (Intercept) -3.6146411 2.371885 -1.5239527 0.136023086 # 2: Albania G(LIFEEX) 22.1596308 7.288971 3.0401591 0.004325856 # 3: Algeria (Intercept) 0.5973329 1.740619 0.3431726 0.732731107 # 4: Algeria G(LIFEEX) 0.8412547 1.689221 0.4980134 0.620390703 # 5: Angola (Intercept) -3.3793976 1.540330 -2.1939445 0.034597175 # 6: Angola G(LIFEEX) 4.2362895 1.402380 3.0207852 0.004553260 ``` If we only need the coefficients, not the standard errors, we can also use `collapse::flm` together with `mrtl`: ```r wlddev %>% fselect(country, PCGDP, LIFEEX) %>% na_omit(cols = -1L) %>% fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% .[, mrtl(flm(fgrowth(PCGDP)[-1L], cbind(Intercept = 1, LIFEEX = fgrowth(LIFEEX)[-1L])), TRUE), keyby = country] %>% head # Key: # country Intercept LIFEEX # # 1: Albania -3.61464113 22.1596308 # 2: Algeria 0.59733291 0.8412547 # 3: Angola -3.37939760 4.2362895 # 4: Antigua and Barbuda -3.11880717 18.8700870 # 5: Argentina 1.14613567 -0.2896305 # 6: Armenia 0.08178344 11.5523992 ``` ... which provides a significant speed gain here: ```r microbenchmark( A = wlddev %>% fselect(country, PCGDP, LIFEEX) %>% na_omit(cols = -1L) %>% fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% .[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX))), "Coef"), keyby = country], B = wlddev %>% fselect(country, PCGDP, LIFEEX) %>% na_omit(cols = -1L) %>% fsubset(fnobs(PCGDP, country, "replace_fill") > 20L) %>% qDT %>% .[, mrtl(flm(fgrowth(PCGDP)[-1L], cbind(Intercept = 1, LIFEEX = fgrowth(LIFEEX)[-1L])), TRUE), keyby = country] ) # Unit: milliseconds # expr min lq mean median uq max neval # A 58.914253 60.063381 68.770933 60.865217 73.507813 241.594509 100 # B 3.145766 3.293715 3.463643 3.377006 3.503983 5.378995 100 ``` Another feature to highlight at this point are *collapse*'s list processing functions, in particular `rsplit`, `rapply2d`, `get_elem` and `unlist2d`. `rsplit` is an efficient recursive generalization of `split`: ```r DT_list <- rsplit(DT, country + year + PCGDP + LIFEEX ~ region + income) # Note: rsplit(DT, year + PCGDP + LIFEEX ~ region + income, flatten = TRUE) # would yield a simple list with interacted categories (like split) str(DT_list, give.attr = FALSE) # List of 7 # $ East Asia & Pacific :List of 3 # ..$ High income :Classes 'data.table' and 'data.frame': 793 obs. of 4 variables: # .. ..$ country: chr [1:793] "Australia" "Australia" "Australia" "Australia" ... # .. ..$ year : int [1:793] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:793] 19378 19469 19246 20053 21036 ... # .. ..$ LIFEEX : num [1:793] 70.8 71 70.9 70.9 70.9 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 793 obs. of 4 variables: # .. ..$ country: chr [1:793] "Cambodia" "Cambodia" "Cambodia" "Cambodia" ... # .. ..$ year : int [1:793] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:793] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:793] 41.2 41.4 41.5 41.7 41.9 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 610 obs. of 4 variables: # .. ..$ country: chr [1:610] "American Samoa" "American Samoa" "American Samoa" "American Samoa" ... # .. ..$ year : int [1:610] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:610] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:610] NA NA NA NA NA NA NA NA NA NA ... # $ Europe & Central Asia :List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 2257 obs. of 4 variables: # .. ..$ country: chr [1:2257] "Andorra" "Andorra" "Andorra" "Andorra" ... # .. ..$ year : int [1:2257] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:2257] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:2257] NA NA NA NA NA NA NA NA NA NA ... # ..$ Low income :Classes 'data.table' and 'data.frame': 61 obs. of 4 variables: # .. ..$ country: chr [1:61] "Tajikistan" "Tajikistan" "Tajikistan" "Tajikistan" ... # .. ..$ year : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:61] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:61] 50.6 50.9 51.2 51.5 51.9 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 244 obs. of 4 variables: # .. ..$ country: chr [1:244] "Kyrgyz Republic" "Kyrgyz Republic" "Kyrgyz Republic" "Kyrgyz Republic" ... # .. ..$ year : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:244] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:244] 56.1 56.6 57 57.4 57.9 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 976 obs. of 4 variables: # .. ..$ country: chr [1:976] "Albania" "Albania" "Albania" "Albania" ... # .. ..$ year : int [1:976] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:976] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:976] 62.3 63.3 64.2 64.9 65.5 ... # $ Latin America & Caribbean :List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 1037 obs. of 4 variables: # .. ..$ country: chr [1:1037] "Antigua and Barbuda" "Antigua and Barbuda" "Antigua and Barbuda" "Antigua and Barbuda" ... # .. ..$ year : int [1:1037] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1037] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:1037] 62 62.5 63 63.5 64 ... # ..$ Low income :Classes 'data.table' and 'data.frame': 61 obs. of 4 variables: # .. ..$ country: chr [1:61] "Haiti" "Haiti" "Haiti" "Haiti" ... # .. ..$ year : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:61] 1512 1439 1523 1466 1414 ... # .. ..$ LIFEEX : num [1:61] 41.8 42.2 42.6 43 43.4 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 244 obs. of 4 variables: # .. ..$ country: chr [1:244] "Bolivia" "Bolivia" "Bolivia" "Bolivia" ... # .. ..$ year : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:244] 1005 1007 1042 1091 1112 ... # .. ..$ LIFEEX : num [1:244] 41.8 42.1 42.5 42.8 43.2 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 1220 obs. of 4 variables: # .. ..$ country: chr [1:1220] "Argentina" "Argentina" "Argentina" "Argentina" ... # .. ..$ year : int [1:1220] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1220] 5643 5853 5711 5323 5773 ... # .. ..$ LIFEEX : num [1:1220] 65.1 65.2 65.3 65.3 65.4 ... # $ Middle East & North Africa:List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 488 obs. of 4 variables: # .. ..$ country: chr [1:488] "Bahrain" "Bahrain" "Bahrain" "Bahrain" ... # .. ..$ year : int [1:488] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:488] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:488] 51.9 53.2 54.6 55.9 57.2 ... # ..$ Low income :Classes 'data.table' and 'data.frame': 122 obs. of 4 variables: # .. ..$ country: chr [1:122] "Syrian Arab Republic" "Syrian Arab Republic" "Syrian Arab Republic" "Syrian Arab Republic" ... # .. ..$ year : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:122] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:122] 52 52.6 53.2 53.8 54.4 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 305 obs. of 4 variables: # .. ..$ country: chr [1:305] "Djibouti" "Djibouti" "Djibouti" "Djibouti" ... # .. ..$ year : int [1:305] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:305] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:305] 44 44.5 44.9 45.3 45.7 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 366 obs. of 4 variables: # .. ..$ country: chr [1:366] "Algeria" "Algeria" "Algeria" "Algeria" ... # .. ..$ year : int [1:366] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:366] 2481 2091 1638 2146 2214 ... # .. ..$ LIFEEX : num [1:366] 46.1 46.6 47.1 47.5 48 ... # $ North America :List of 1 # ..$ High income:Classes 'data.table' and 'data.frame': 183 obs. of 4 variables: # .. ..$ country: chr [1:183] "Bermuda" "Bermuda" "Bermuda" "Bermuda" ... # .. ..$ year : int [1:183] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:183] 33363 34080 34763 34324 37202 ... # .. ..$ LIFEEX : num [1:183] NA NA NA NA NA ... # $ South Asia :List of 3 # ..$ Low income :Classes 'data.table' and 'data.frame': 122 obs. of 4 variables: # .. ..$ country: chr [1:122] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ... # .. ..$ year : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:122] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:122] 32.4 33 33.5 34 34.5 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 244 obs. of 4 variables: # .. ..$ country: chr [1:244] "Bangladesh" "Bangladesh" "Bangladesh" "Bangladesh" ... # .. ..$ year : int [1:244] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:244] 372 384 394 381 411 ... # .. ..$ LIFEEX : num [1:244] 45.4 46 46.6 47.1 47.6 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 122 obs. of 4 variables: # .. ..$ country: chr [1:122] "Maldives" "Maldives" "Maldives" "Maldives" ... # .. ..$ year : int [1:122] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:122] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:122] 37.3 37.9 38.6 39.2 39.9 ... # $ Sub-Saharan Africa :List of 4 # ..$ High income :Classes 'data.table' and 'data.frame': 61 obs. of 4 variables: # .. ..$ country: chr [1:61] "Seychelles" "Seychelles" "Seychelles" "Seychelles" ... # .. ..$ year : int [1:61] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:61] 2830 2617 2763 2966 3064 ... # .. ..$ LIFEEX : num [1:61] NA NA NA NA NA NA NA NA NA NA ... # ..$ Low income :Classes 'data.table' and 'data.frame': 1464 obs. of 4 variables: # .. ..$ country: chr [1:1464] "Benin" "Benin" "Benin" "Benin" ... # .. ..$ year : int [1:1464] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1464] 712 724 689 710 745 ... # .. ..$ LIFEEX : num [1:1464] 37.3 37.7 38.2 38.7 39.1 ... # ..$ Lower middle income:Classes 'data.table' and 'data.frame': 1037 obs. of 4 variables: # .. ..$ country: chr [1:1037] "Angola" "Angola" "Angola" "Angola" ... # .. ..$ year : int [1:1037] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:1037] NA NA NA NA NA NA NA NA NA NA ... # .. ..$ LIFEEX : num [1:1037] 37.5 37.8 38.1 38.4 38.8 ... # ..$ Upper middle income:Classes 'data.table' and 'data.frame': 366 obs. of 4 variables: # .. ..$ country: chr [1:366] "Botswana" "Botswana" "Botswana" "Botswana" ... # .. ..$ year : int [1:366] 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... # .. ..$ PCGDP : num [1:366] 408 425 444 460 480 ... # .. ..$ LIFEEX : num [1:366] 49.2 49.7 50.2 50.6 51.1 ... ``` We can use `rapply2d` to apply a function to each data frame / data.table in an arbitrary nested structure: ```r # This runs region-income level regressions, with country fixed effects # following Mundlak (1978) lm_summary_list <- DT_list %>% rapply2d(lm, formula = G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country)) %>% # Summarizing the results rapply2d(summary, classes = "lm") # This is a nested list of linear model summaries str(lm_summary_list, give.attr = FALSE) # List of 7 # $ East Asia & Pacific :List of 3 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:441] -1.64 -2.59 2.75 3.45 2.48 ... # .. ..$ coefficients : num [1:3, 1:4] 0.531 2.494 3.83 0.706 0.759 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.59 # .. ..$ df : int [1:3] 3 438 3 # .. ..$ r.squared : num 0.0525 # .. ..$ adj.r.squared: num 0.0481 # .. ..$ fstatistic : Named num [1:3] 12.1 2 438 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.02361 -0.00158 -0.04895 -0.00158 0.02728 ... # .. ..$ na.action : 'omit' Named int [1:352] 1 61 62 63 64 65 66 67 68 69 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:549] -39.6968 3.6618 -0.0944 -1.8261 -1.0491 ... # .. ..$ coefficients : num [1:3, 1:4] 1.348 0.524 0.949 0.701 0.757 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 5.4 # .. ..$ df : int [1:3] 3 546 3 # .. ..$ r.squared : num 0.00471 # .. ..$ adj.r.squared: num 0.00106 # .. ..$ fstatistic : Named num [1:3] 1.29 2 546 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.016821 0.000511 -0.022767 0.000511 0.01965 ... # .. ..$ na.action : 'omit' Named int [1:244] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:312] -32.29 -11.61 2.91 11.23 10.28 ... # .. ..$ coefficients : num [1:3, 1:4] 1.507 -0.547 4.816 0.428 0.478 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.39 # .. ..$ df : int [1:3] 3 309 3 # .. ..$ r.squared : num 0.103 # .. ..$ adj.r.squared: num 0.0976 # .. ..$ fstatistic : Named num [1:3] 17.8 2 309 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.009471 0.000492 -0.013551 0.000492 0.011842 ... # .. ..$ na.action : 'omit' Named int [1:298] 1 2 3 4 5 6 7 8 9 10 ... # $ Europe & Central Asia :List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:1355] 2.706 -0.548 1.001 3.034 0.257 ... # .. ..$ coefficients : num [1:3, 1:4] 3.254 -0.172 -2.506 0.407 0.227 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.3 # .. ..$ df : int [1:3] 3 1352 3 # .. ..$ r.squared : num 0.00257 # .. ..$ adj.r.squared: num 0.00109 # .. ..$ fstatistic : Named num [1:3] 1.74 2 1352 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.015254 -0.000863 -0.05461 -0.000863 0.004722 ... # .. ..$ na.action : 'omit' Named int [1:902] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:34] 0.166 -1.804 15.949 -0.778 7.165 ... # .. ..$ coefficients : num [1:2, 1:4] -5.31 9.36 2.03 2.56 -2.61 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 8.43 # .. ..$ df : int [1:3] 2 32 3 # .. ..$ r.squared : num 0.295 # .. ..$ adj.r.squared: num 0.273 # .. ..$ fstatistic : Named num [1:3] 13.4 1 32 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.0582 -0.0514 -0.0514 0.092 # .. ..$ na.action : 'omit' Named int [1:27] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:121] -1.626 8.745 -14.47 0.298 -11.886 ... # .. ..$ coefficients : num [1:3, 1:4] 0.106 4.631 1.499 1.315 0.938 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 6.02 # .. ..$ df : int [1:3] 3 118 3 # .. ..$ r.squared : num 0.178 # .. ..$ adj.r.squared: num 0.164 # .. ..$ fstatistic : Named num [1:3] 12.7 2 118 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.047775 -0.000927 -0.142782 -0.000927 0.024298 ... # .. ..$ na.action : 'omit' Named int [1:123] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:511] 0.761 -2.153 -4.091 -6.476 -3.43 ... # .. ..$ coefficients : num [1:3, 1:4] 2.983 4.147 -3.351 0.698 0.779 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 8.28 # .. ..$ df : int [1:3] 3 508 3 # .. ..$ r.squared : num 0.0531 # .. ..$ adj.r.squared: num 0.0493 # .. ..$ fstatistic : Named num [1:3] 14.2 2 508 # .. ..$ cov.unscaled : num [1:3, 1:3] 7.11e-03 4.52e-05 -1.45e-02 4.52e-05 8.85e-03 ... # .. ..$ na.action : 'omit' Named int [1:465] 1 2 3 4 5 6 7 8 9 10 ... # $ Latin America & Caribbean :List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:487] 2.39 6.02 6.1 1.71 -2.27 ... # .. ..$ coefficients : num [1:3, 1:4] 1.015 0.483 2.613 0.677 0.952 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.71 # .. ..$ df : int [1:3] 3 484 3 # .. ..$ r.squared : num 0.00592 # .. ..$ adj.r.squared: num 0.00181 # .. ..$ fstatistic : Named num [1:3] 1.44 2 484 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.02062 0.00155 -0.05714 0.00155 0.04082 ... # .. ..$ na.action : 'omit' Named int [1:550] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:59] -5.667 5.091 -4.46 -4.224 -0.526 ... # .. ..$ coefficients : num [1:2, 1:4] -3.18 4.02 1.73 2.28 -1.83 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 3.79 # .. ..$ df : int [1:3] 2 57 3 # .. ..$ r.squared : num 0.0516 # .. ..$ adj.r.squared: num 0.0349 # .. ..$ fstatistic : Named num [1:3] 3.1 1 57 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.209 -0.265 -0.265 0.364 # .. ..$ na.action : 'omit' Named int [1:2] 1 61 # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:231] -1.386 2.029 3.213 0.413 1.334 ... # .. ..$ coefficients : num [1:3, 1:4] -1.678 -0.479 3.896 2.26 0.709 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.96 # .. ..$ df : int [1:3] 3 228 3 # .. ..$ r.squared : num 0.0081 # .. ..$ adj.r.squared: num -0.000602 # .. ..$ fstatistic : Named num [1:3] 0.931 2 228 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.3264 0.005 -0.4084 0.005 0.0321 ... # .. ..$ na.action : 'omit' Named int [1:13] 1 61 62 63 64 65 66 67 122 123 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:1065] 1.97 -4.16 -8.5 6.72 7.17 ... # .. ..$ coefficients : num [1:3, 1:4] 1.681 0.583 -0.124 0.353 0.512 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.22 # .. ..$ df : int [1:3] 3 1062 3 # .. ..$ r.squared : num 0.0016 # .. ..$ adj.r.squared: num -0.000283 # .. ..$ fstatistic : Named num [1:3] 0.85 2 1062 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.006982 0.000348 -0.013936 0.000348 0.014734 ... # .. ..$ na.action : 'omit' Named int [1:155] 1 61 62 122 123 183 184 244 245 305 ... # $ Middle East & North Africa:List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:334] -10.728 -11.988 2.151 0.985 -8.618 ... # .. ..$ coefficients : num [1:3, 1:4] 1.929 3.963 -3.533 1.102 0.996 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 8.36 # .. ..$ df : int [1:3] 3 331 3 # .. ..$ r.squared : num 0.0456 # .. ..$ adj.r.squared: num 0.0399 # .. ..$ fstatistic : Named num [1:3] 7.91 2 331 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.01738 0.00101 -0.02441 0.00101 0.01419 ... # .. ..$ na.action : 'omit' Named int [1:154] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:29] 0.468 3.424 0.415 3.842 3.342 ... # .. ..$ coefficients : num [1:2, 1:4] -6.91 11.38 2.11 3.64 -3.27 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 6.05 # .. ..$ df : int [1:3] 2 27 3 # .. ..$ r.squared : num 0.266 # .. ..$ adj.r.squared: num 0.239 # .. ..$ fstatistic : Named num [1:3] 9.81 1 27 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.122 -0.178 -0.178 0.361 # .. ..$ na.action : 'omit' Named int [1:93] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:191] -0.95 -2.047 4.541 5.594 -0.723 ... # .. ..$ coefficients : num [1:3, 1:4] 2.238 1.271 -0.647 1.002 0.599 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.94 # .. ..$ df : int [1:3] 3 188 3 # .. ..$ r.squared : num 0.0244 # .. ..$ adj.r.squared: num 0.014 # .. ..$ fstatistic : Named num [1:3] 2.35 2 188 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.06471 -0.00043 -0.07801 -0.00043 0.02309 ... # .. ..$ na.action : 'omit' Named int [1:114] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:263] -18.068 -23.976 28.692 0.858 1.141 ... # .. ..$ coefficients : num [1:3, 1:4] 2.663 0.718 -1.19 3.538 1.318 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 13.8 # .. ..$ df : int [1:3] 3 260 3 # .. ..$ r.squared : num 0.00119 # .. ..$ adj.r.squared: num -0.00649 # .. ..$ fstatistic : Named num [1:3] 0.155 2 260 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.065741 0.000795 -0.084456 0.000795 0.009122 ... # .. ..$ na.action : 'omit' Named int [1:103] 1 61 62 122 123 124 125 126 127 128 ... # $ North America :List of 1 # ..$ High income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:137] 4.6986 -3.1098 1.8243 0.5643 0.0176 ... # .. ..$ coefficients : num [1:3, 1:4] 6.542 -1.461 -19.53 2.272 0.662 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 2.49 # .. ..$ df : int [1:3] 3 134 3 # .. ..$ r.squared : num 0.0657 # .. ..$ adj.r.squared: num 0.0518 # .. ..$ fstatistic : Named num [1:3] 4.71 2 134 # .. ..$ cov.unscaled : num [1:3, 1:3] 8.36e-01 1.59e-17 -3.60 1.59e-17 7.10e-02 ... # .. ..$ na.action : 'omit' Named int [1:46] 1 2 3 4 5 6 7 8 9 10 ... # $ South Asia :List of 3 # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:76] 0.544 -6.17 3.951 -0.964 7.829 ... # .. ..$ coefficients : num [1:3, 1:4] -108.62 -1.72 96.06 174.19 1.25 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.7 # .. ..$ df : int [1:3] 3 73 3 # .. ..$ r.squared : num 0.0494 # .. ..$ adj.r.squared: num 0.0233 # .. ..$ fstatistic : Named num [1:3] 1.9 2 73 # .. ..$ cov.unscaled : num [1:3, 1:3] 2210.639 -6.979 -1875.261 -6.979 0.114 ... # .. ..$ na.action : 'omit' Named int [1:46] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:216] 0.294 -0.293 -6.067 4.954 -4.164 ... # .. ..$ coefficients : num [1:3, 1:4] -2.232 0.238 5.972 1.074 0.493 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 3.44 # .. ..$ df : int [1:3] 3 213 3 # .. ..$ r.squared : num 0.111 # .. ..$ adj.r.squared: num 0.103 # .. ..$ fstatistic : Named num [1:3] 13.3 2 213 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.09757 -0.00201 -0.10483 -0.00201 0.02054 ... # .. ..$ na.action : 'omit' Named int [1:28] 1 61 62 63 64 65 66 67 68 69 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:82] 3.262 3.976 3.128 1.67 -0.901 ... # .. ..$ coefficients : num [1:3, 1:4] 3.859 -0.577 -0.476 1.036 1.365 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 4.25 # .. ..$ df : int [1:3] 3 79 3 # .. ..$ r.squared : num 0.00622 # .. ..$ adj.r.squared: num -0.0189 # .. ..$ fstatistic : Named num [1:3] 0.247 2 79 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.0595 -0.028 -0.0473 -0.028 0.1034 ... # .. ..$ na.action : 'omit' Named int [1:40] 1 2 3 4 5 6 7 8 9 10 ... # $ Sub-Saharan Africa :List of 4 # ..$ High income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:39] -11.33 -5.041 -3.158 0.585 7.81 ... # .. ..$ coefficients : num [1:2, 1:4] 2.551 -0.644 0.775 0.55 3.293 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE TRUE # .. ..$ sigma : num 4.8 # .. ..$ df : int [1:3] 2 37 3 # .. ..$ r.squared : num 0.0357 # .. ..$ adj.r.squared: num 0.00959 # .. ..$ fstatistic : Named num [1:3] 1.37 1 37 # .. ..$ cov.unscaled : num [1:2, 1:2] 0.026 -0.00217 -0.00217 0.01312 # .. ..$ na.action : 'omit' Named int [1:22] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Low income :List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:1085] 0.694 -5.869 2.069 3.855 2.415 ... # .. ..$ coefficients : num [1:3, 1:4] -0.0756 0.5308 0.5124 0.8887 0.137 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 5.88 # .. ..$ df : int [1:3] 3 1082 3 # .. ..$ r.squared : num 0.0146 # .. ..$ adj.r.squared: num 0.0128 # .. ..$ fstatistic : Named num [1:3] 8.01 2 1082 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.022858 -0.000025 -0.025534 -0.000025 0.000543 ... # .. ..$ na.action : 'omit' Named int [1:379] 1 61 62 122 123 183 184 244 245 305 ... # ..$ Lower middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:891] -8.2839 -4.0289 0.0449 1.8231 -0.5267 ... # .. ..$ coefficients : num [1:3, 1:4] 2.352 0.782 -2.616 0.608 0.169 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 5.27 # .. ..$ df : int [1:3] 3 888 3 # .. ..$ r.squared : num 0.0277 # .. ..$ adj.r.squared: num 0.0255 # .. ..$ fstatistic : Named num [1:3] 12.7 2 888 # .. ..$ cov.unscaled : num [1:3, 1:3] 1.33e-02 -1.13e-05 -2.00e-02 -1.13e-05 1.02e-03 ... # .. ..$ na.action : 'omit' Named int [1:146] 1 2 3 4 5 6 7 8 9 10 ... # ..$ Upper middle income:List of 12 # .. ..$ call : language FUN(formula = ..1, data = y) # .. ..$ terms :Classes 'terms', 'formula' language G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country) # .. ..$ residuals : Named num [1:298] 0.7659 0.9133 0.0921 0.996 0.0765 ... # .. ..$ coefficients : num [1:3, 1:4] 0.584 0.456 4.112 2.472 0.652 ... # .. ..$ aliased : Named logi [1:3] FALSE FALSE FALSE # .. ..$ sigma : num 11.4 # .. ..$ df : int [1:3] 3 295 3 # .. ..$ r.squared : num 0.00658 # .. ..$ adj.r.squared: num -0.000152 # .. ..$ fstatistic : Named num [1:3] 0.977 2 295 # .. ..$ cov.unscaled : num [1:3, 1:3] 0.047213 0.000438 -0.070778 0.000438 0.003285 ... # .. ..$ na.action : 'omit' Named int [1:68] 1 61 62 63 64 65 66 67 68 69 ... ``` We can turn this list into a *data.table* again by calling first `get_elem` to recursively extract the coefficient matrices and then `unlist2d` to recursively bind them to a new *data.table*: ```r lm_summary_list %>% get_elem("coefficients") %>% unlist2d(idcols = .c(Region, Income), row.names = "Coef", DT = TRUE) %>% head # Region Income Coef Estimate Std. Error t value # # 1: East Asia & Pacific High income (Intercept) 0.5313479 0.7058550 0.7527720 # 2: East Asia & Pacific High income G(LIFEEX) 2.4935584 0.7586943 3.2866443 # 3: East Asia & Pacific High income B(G(LIFEEX), country) 3.8297123 1.6916770 2.2638554 # 4: East Asia & Pacific Lower middle income (Intercept) 1.3476602 0.7008556 1.9228785 # 5: East Asia & Pacific Lower middle income G(LIFEEX) 0.5238856 0.7574904 0.6916069 # 6: East Asia & Pacific Lower middle income B(G(LIFEEX), country) 0.9494439 1.2031228 0.7891496 # Pr(>|t|) # # 1: 0.451991327 # 2: 0.001095466 # 3: 0.024071386 # 4: 0.055015131 # 5: 0.489478164 # 6: 0.430367103 ``` The fact that this is a nested list of matrices, and that we can save both the names of the lists at each level of nesting and the row- and column- names of the matrices make `unlist2d` a significant generalization of `rbindlist`^[`unlist2d` can similarly bind nested lists of arrays, data frames or *data.table*'s]. But why do all this fuzz if we could have simply done:? ```r DT[, qDT(coeftest(lm(G(PCGDP) ~ G(LIFEEX) + B(G(LIFEEX), country))), "Coef"), keyby = .(region, income)] %>% head # Key: # region income Coef Estimate Std. Error t value # # 1: East Asia & Pacific High income (Intercept) 0.5313479 0.7058550 0.7527720 # 2: East Asia & Pacific High income G(LIFEEX) 2.4935584 0.7586943 3.2866443 # 3: East Asia & Pacific High income B(G(LIFEEX), country) 3.8297123 1.6916770 2.2638554 # 4: East Asia & Pacific Lower middle income (Intercept) 1.3476602 0.7008556 1.9228785 # 5: East Asia & Pacific Lower middle income G(LIFEEX) 0.5238856 0.7574904 0.6916069 # 6: East Asia & Pacific Lower middle income B(G(LIFEEX), country) 0.9494439 1.2031228 0.7891496 # Pr(>|t|) # # 1: 0.451991327 # 2: 0.001095466 # 3: 0.024071386 # 4: 0.055015131 # 5: 0.489478164 # 6: 0.430367103 ``` Well we might want to do more things with that list of linear models first before tidying it, so this is a more general workflow. We might also be interested in additional statistics like the R-squared or the F-statistic: ```r DT_sum <- lm_summary_list %>% get_elem("coef|r.sq|fstat", regex = TRUE) %>% unlist2d(idcols = .c(Region, Income, Statistic), row.names = "Coef", DT = TRUE) head(DT_sum) # Region Income Statistic Coef Estimate Std. Error # # 1: East Asia & Pacific High income coefficients (Intercept) 0.5313479 0.7058550 # 2: East Asia & Pacific High income coefficients G(LIFEEX) 2.4935584 0.7586943 # 3: East Asia & Pacific High income coefficients B(G(LIFEEX), country) 3.8297123 1.6916770 # 4: East Asia & Pacific High income r.squared NA NA # 5: East Asia & Pacific High income adj.r.squared NA NA # 6: East Asia & Pacific High income fstatistic NA NA # t value Pr(>|t|) V1 value numdf dendf # # 1: 0.752772 0.451991327 NA NA NA NA # 2: 3.286644 0.001095466 NA NA NA NA # 3: 2.263855 0.024071386 NA NA NA NA # 4: NA NA 0.05245359 NA NA NA # 5: NA NA 0.04812690 NA NA NA # 6: NA NA NA 12.12325 2 438 # Reshaping to long form: DT_sum %>% melt(1:4, na.rm = TRUE) %>% roworderv(1:2) %>% head(20) # Region Income Statistic Coef variable # # 1: East Asia & Pacific High income coefficients (Intercept) Estimate # 2: East Asia & Pacific High income coefficients G(LIFEEX) Estimate # 3: East Asia & Pacific High income coefficients B(G(LIFEEX), country) Estimate # 4: East Asia & Pacific High income coefficients (Intercept) Std. Error # 5: East Asia & Pacific High income coefficients G(LIFEEX) Std. Error # 6: East Asia & Pacific High income coefficients B(G(LIFEEX), country) Std. Error # 7: East Asia & Pacific High income coefficients (Intercept) t value # 8: East Asia & Pacific High income coefficients G(LIFEEX) t value # 9: East Asia & Pacific High income coefficients B(G(LIFEEX), country) t value # 10: East Asia & Pacific High income coefficients (Intercept) Pr(>|t|) # 11: East Asia & Pacific High income coefficients G(LIFEEX) Pr(>|t|) # 12: East Asia & Pacific High income coefficients B(G(LIFEEX), country) Pr(>|t|) # 13: East Asia & Pacific High income r.squared V1 # 14: East Asia & Pacific High income adj.r.squared V1 # 15: East Asia & Pacific High income fstatistic value # 16: East Asia & Pacific High income fstatistic numdf # 17: East Asia & Pacific High income fstatistic dendf # 18: East Asia & Pacific Lower middle income coefficients (Intercept) Estimate # 19: East Asia & Pacific Lower middle income coefficients G(LIFEEX) Estimate # 20: East Asia & Pacific Lower middle income coefficients B(G(LIFEEX), country) Estimate # Region Income Statistic Coef variable # value # # 1: 5.313479e-01 # 2: 2.493558e+00 # 3: 3.829712e+00 # 4: 7.058550e-01 # 5: 7.586943e-01 # 6: 1.691677e+00 # 7: 7.527720e-01 # 8: 3.286644e+00 # 9: 2.263855e+00 # 10: 4.519913e-01 # 11: 1.095466e-03 # 12: 2.407139e-02 # 13: 5.245359e-02 # 14: 4.812690e-02 # 15: 1.212325e+01 # 16: 2.000000e+00 # 17: 4.380000e+02 # 18: 1.347660e+00 # 19: 5.238856e-01 # 20: 9.494439e-01 # value ``` As a final example of this kind, lets suppose we are interested in the within-country correlations of all these variables by region and income group: ```r DT[, qDT(pwcor(W(.SD, country)), "Variable"), keyby = .(region, income), .SDcols = PCGDP:ODA] %>% head # Key: # region income Variable W.PCGDP W.LIFEEX W.GINI W.ODA # # 1: East Asia & Pacific High income W.PCGDP 1.0000000 0.7562668 0.6253844 -0.25258496 # 2: East Asia & Pacific High income W.LIFEEX 0.7562668 1.0000000 0.3191255 -0.33611662 # 3: East Asia & Pacific High income W.GINI 0.6253844 0.3191255 1.0000000 NA # 4: East Asia & Pacific High income W.ODA -0.2525850 -0.3361166 NA 1.00000000 # 5: East Asia & Pacific Lower middle income W.PCGDP 1.0000000 0.4685618 0.4428879 -0.02508852 # 6: East Asia & Pacific Lower middle income W.LIFEEX 0.4685618 1.0000000 0.3231520 0.09356733 ``` In summary: The list processing features, statistical capabilities and efficient converters of *collapse* and the flexibility of *data.table* work well together, facilitating more complex workflows. ## Additional Benchmarks See [here]() or [here](). These are all run on a 2 core laptop, so I honestly don't know how *collapse* scales on powerful multi-core machines. My own limited computational resources are part of the reason I did not opt for a thread-parallel package from the start. But a multi-core version of *collapse* will eventually be released, maybe by end of 2021. ## References Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†*Econometrica* 46 (1): 69–85. collapse/inst/doc/collapse_intro.html0000644000176200001440001063105715202627534017524 0ustar liggesusers Introduction to collapse

Introduction to collapse

Advanced and Fast Data Transformation in R

Sebastian Krantz

2021-06-27

collapse is a C/C++ based package for data transformation and statistical computing in R. It’s aims are:

  1. To facilitate complex data transformation, exploration and computing tasks in R.
  2. To help make R code fast, flexible, parsimonious and programmer friendly.

This vignette demonstrates these two points and introduces all main features of the package in a structured way. The chapters are pretty self-contained, however the first chapters introduce the data and faster data manipulation functions which are used throughout the rest of this vignette.


Notes:

  • Apart from this vignette, collapse comes with a built-in structured documentation available under help("collapse-documentation") after installing the package, and help("collapse-package") provides a compact set of examples for quick-start. A cheat sheet is available at Rstudio.

  • The two other vignettes focus on the integration of collapse with dplyr workflows (recommended for dplyr / tidyverse users), and on the integration of collapse with the plm package (+ some advanced programming with panel data).

  • Documentation and vignettes can also be viewed online.


Why collapse?

collapse is a high-performance package that extends and enhances the data-manipulation capabilities of R and existing popular packages (such as dplyr, data.table, and matrix packages). It’s main focus is on grouped and weighted statistical programming, complex aggregations and transformations, time series and panel data operations, and programming with lists of data objects. The lead author is an applied economist and created the package mainly to facilitate advanced computations on varied and complex data, in particular surveys, (multivariate) time series, multilevel / panel data, and lists / model objects.

A secondary aspect to applied work is that data is often imported into R from richer data structures (such as STATA, SPSS or SAS files imported with haven). This called for an intelligent suite of data manipulation functions that can both utilize aspects of the richer data structure (such as variable labels), and preserve the data structure / attributes in computations. Sometimes specialized classes like xts, pdata.frame and grouped_df can also become very useful to manipulate certain types of data. Thus collapse was built to explicitly supports these classes, while preserving most other classes / data structures in R.

Another objective was to radically improve the speed of R code by extensively relying on efficient algorithms in C/C++ and the faster components of base R. collapse ranks among the fastest R packages, and performs many grouped and/or weighted computations noticeably faster than dplyr or data.table.

A final development objective was to channel this performance through a stable and well conceived user API providing extensive and optimized programming capabilities (in standard evaluation) while also facilitating quick use and easy integration with existing data manipulation frameworks (in particular dplyr / tidyverse and data.table, both relying on non-standard evaluation).

1. Data and Summary Tools

We begin by introducing some powerful summary tools along with the 2 panel datasets collapse provides which are used throughout this vignette. If you are just interested in programming you can skip this section. Apart from the 2 datasets that come with collapse (wlddev and GGDC10S), this vignette uses a few well known datasets from base R: mtcars, iris, airquality, and the time series Airpassengers and EuStockMarkets.

1.1 wlddev - World Bank Development Data

This dataset contains 5 key World Bank Development Indicators covering 216 countries for up to 61 years (1960-2020). It is a balanced balanced panel with \(216 \times 61 = 13176\) observations. –>

library(collapse)
head(wlddev)
#       country iso3c       date year decade     region     income  OECD PCGDP LIFEEX GINI       ODA
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA 32.446   NA 116769997
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA 32.962   NA 232080002
# 3 Afghanistan   AFG 1963-01-01 1962   1960 South Asia Low income FALSE    NA 33.471   NA 112839996
# 4 Afghanistan   AFG 1964-01-01 1963   1960 South Asia Low income FALSE    NA 33.971   NA 237720001
# 5 Afghanistan   AFG 1965-01-01 1964   1960 South Asia Low income FALSE    NA 34.463   NA 295920013
# 6 Afghanistan   AFG 1966-01-01 1965   1960 South Asia Low income FALSE    NA 34.948   NA 341839996
#       POP
# 1 8996973
# 2 9169410
# 3 9351441
# 4 9543205
# 5 9744781
# 6 9956320

# The variables have "label" attributes. Use vlabels() to get and set labels
namlab(wlddev, class = TRUE)
#    Variable     Class
# 1   country character
# 2     iso3c    factor
# 3      date      Date
# 4      year   integer
# 5    decade   integer
# 6    region    factor
# 7    income    factor
# 8      OECD   logical
# 9     PCGDP   numeric
# 10   LIFEEX   numeric
# 11     GINI   numeric
# 12      ODA   numeric
# 13      POP   numeric
#                                                                                Label
# 1                                                                       Country Name
# 2                                                                       Country Code
# 3                                                         Date Recorded (Fictitious)
# 4                                                                               Year
# 5                                                                             Decade
# 6                                                                             Region
# 7                                                                       Income Level
# 8                                                            Is OECD Member Country?
# 9                                                 GDP per capita (constant 2010 US$)
# 10                                           Life expectancy at birth, total (years)
# 11                                                  Gini index (World Bank estimate)
# 12 Net official development assistance and official aid received (constant 2018 US$)
# 13                                                                 Population, total

Of the categorical identifiers, the date variable was artificially generated to have an example dataset that contains all common data types frequently encountered in R. A detailed statistical description of this data is computed by descr:

# A fast and detailed statistical description
descr(wlddev)
# Dataset: wlddev, 13 Variables, N = 13176
# ----------------------------------------------------------------------------------------------------
# country (character): Country Name
# Statistics
#       N  Ndist
#   13176    216
# Table
#                       Freq   Perc
# Afghanistan             61   0.46
# Albania                 61   0.46
# Algeria                 61   0.46
# American Samoa          61   0.46
# Andorra                 61   0.46
# Angola                  61   0.46
# Antigua and Barbuda     61   0.46
# Argentina               61   0.46
# Armenia                 61   0.46
# Aruba                   61   0.46
# Australia               61   0.46
# Austria                 61   0.46
# Azerbaijan              61   0.46
# Bahamas, The            61   0.46
# ... 202 Others       12322  93.52
# 
# Summary of Table Frequencies
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#      61      61      61      61      61      61 
# ----------------------------------------------------------------------------------------------------
# iso3c (factor): Country Code
# Statistics
#       N  Ndist
#   13176    216
# Table
#                  Freq   Perc
# ABW                61   0.46
# AFG                61   0.46
# AGO                61   0.46
# ALB                61   0.46
# AND                61   0.46
# ARE                61   0.46
# ARG                61   0.46
# ARM                61   0.46
# ASM                61   0.46
# ATG                61   0.46
# AUS                61   0.46
# AUT                61   0.46
# AZE                61   0.46
# BDI                61   0.46
# ... 202 Others  12322  93.52
# 
# Summary of Table Frequencies
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#      61      61      61      61      61      61 
# ----------------------------------------------------------------------------------------------------
# date (Date): Date Recorded (Fictitious)
# Statistics
#          N       Ndist         Min         Max  
#      13176          61  1961-01-01  2021-01-01  
# ----------------------------------------------------------------------------------------------------
# year (integer): Year
# Statistics
#       N  Ndist  Mean     SD   Min   Max  Skew  Kurt
#   13176     61  1990  17.61  1960  2020    -0   1.8
# Quantiles
#     1%    5%   10%   25%   50%   75%   90%   95%   99%
#   1960  1963  1966  1975  1990  2005  2014  2017  2020
# ----------------------------------------------------------------------------------------------------
# decade (integer): Decade
# Statistics
#       N  Ndist     Mean     SD   Min   Max  Skew  Kurt
#   13176      7  1985.57  17.51  1960  2020  0.03  1.79
# Quantiles
#     1%    5%   10%   25%   50%   75%   90%   95%   99%
#   1960  1960  1960  1970  1990  2000  2010  2010  2020
# ----------------------------------------------------------------------------------------------------
# region (factor): Region
# Statistics
#       N  Ndist
#   13176      7
# Table
#                             Freq   Perc
# Europe & Central Asia       3538  26.85
# Sub-Saharan Africa          2928  22.22
# Latin America & Caribbean   2562  19.44
# East Asia & Pacific         2196  16.67
# Middle East & North Africa  1281   9.72
# South Asia                   488   3.70
# North America                183   1.39
# ----------------------------------------------------------------------------------------------------
# income (factor): Income Level
# Statistics
#       N  Ndist
#   13176      4
# Table
#                      Freq   Perc
# High income          4819  36.57
# Upper middle income  3660  27.78
# Lower middle income  2867  21.76
# Low income           1830  13.89
# ----------------------------------------------------------------------------------------------------
# OECD (logical): Is OECD Member Country?
# Statistics
#       N  Ndist
#   13176      2
# Table
#         Freq   Perc
# FALSE  10980  83.33
# TRUE    2196  16.67
# ----------------------------------------------------------------------------------------------------
# PCGDP (numeric): GDP per capita (constant 2010 US$)
# Statistics (28.13% NAs)
#      N  Ndist      Mean        SD     Min        Max  Skew   Kurt
#   9470   9470  12048.78  19077.64  132.08  196061.42  3.13  17.12
# Quantiles
#       1%      5%     10%      25%      50%       75%       90%       95%       99%
#   227.71  399.62  555.55  1303.19  3767.16  14787.03  35646.02  48507.84  92340.28
# ----------------------------------------------------------------------------------------------------
# LIFEEX (numeric): Life expectancy at birth, total (years)
# Statistics (11.43% NAs)
#       N  Ndist  Mean     SD    Min    Max   Skew  Kurt
#   11670  10548  64.3  11.48  18.91  85.42  -0.67  2.67
# Quantiles
#      1%     5%    10%    25%    50%    75%    90%    95%    99%
#   35.83  42.77  46.83  56.36  67.44  72.95  77.08  79.34  82.36
# ----------------------------------------------------------------------------------------------------
# GINI (numeric): Gini index (World Bank estimate)
# Statistics (86.76% NAs)
#      N  Ndist   Mean   SD   Min   Max  Skew  Kurt
#   1744    368  38.53  9.2  20.7  65.8   0.6  2.53
# Quantiles
#     1%    5%   10%   25%   50%  75%   90%    95%   99%
#   24.6  26.3  27.6  31.5  36.4   45  52.6  55.98  60.5
# ----------------------------------------------------------------------------------------------------
# ODA (numeric): Net official development assistance and official aid received (constant 2018 US$)
# Statistics (34.67% NAs)
#      N  Ndist        Mean          SD          Min             Max  Skew    Kurt
#   8608   7832  454'720131  868'712654  -997'679993  2.56715605e+10  6.98  114.89
# Quantiles
#             1%           5%          10%          25%         50%         75%             90%
#   -12'593999.7  1'363500.01  8'347000.31  44'887499.8  165'970001  495'042503  1.18400697e+09
#              95%             99%
#   1.93281696e+09  3.73380782e+09
# ----------------------------------------------------------------------------------------------------
# POP (numeric): Population, total
# Statistics (1.95% NAs)
#       N  Ndist         Mean          SD   Min             Max  Skew    Kurt
#   12919  12877  24'245971.6  102'120674  2833  1.39771500e+09  9.75  108.91
# Quantiles
#        1%       5%      10%     25%       50%        75%          90%          95%         99%
#   8698.84  31083.3  62268.4  443791  4'072517  12'816178  46'637331.4  81'177252.5  308'862641
# ----------------------------------------------------------------------------------------------------

The output of descr can be converted into a tidy data frame using:

head(as.data.frame(descr(wlddev)))
#   Variable     Class                      Label     N Ndist   Min   Max     Mean       SD
# 1  country character               Country Name 13176   216    NA    NA       NA       NA
# 2    iso3c    factor               Country Code 13176   216    NA    NA       NA       NA
# 3     date      Date Date Recorded (Fictitious) 13176    61 -3287 18628       NA       NA
# 4     year   integer                       Year 13176    61  1960  2020 1990.000 17.60749
# 5   decade   integer                     Decade 13176     7  1960  2020 1985.574 17.51175
# 6   region    factor                     Region 13176     7    NA    NA       NA       NA
#            Skew     Kurt   1%   5%  10%  25%  50%  75%  90%  95%  99%
# 1            NA       NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
# 2            NA       NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
# 3            NA       NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
# 4 -5.812381e-16 1.799355 1960 1963 1966 1975 1990 2005 2014 2017 2020
# 5  3.256512e-02 1.791726 1960 1960 1960 1970 1990 2000 2010 2010 2020
# 6            NA       NA   NA   NA   NA   NA   NA   NA   NA   NA   NA

Note that descr does not require data to be labeled. Since wlddev is a panel data set tracking countries over time, we might be interested in checking which variables are time-varying, with the function varying:

varying(wlddev, wlddev$iso3c)
# country   iso3c    date    year  decade  region  income    OECD   PCGDP  LIFEEX    GINI     ODA 
#   FALSE   FALSE    TRUE    TRUE    TRUE   FALSE   FALSE   FALSE    TRUE    TRUE    TRUE    TRUE 
#     POP 
#    TRUE

varying tells us that all 5 variables PCGDP, LIFEEX, GINI, ODA and POP vary over time. However the OECD variable does not, so this data does not track when countries entered the OECD. We can also have a more detailed look letting varying check the variation in each country:

head(varying(wlddev, wlddev$iso3c, any_group = FALSE))
#     country iso3c date year decade region income  OECD PCGDP LIFEEX GINI  ODA  POP
# ABW   FALSE FALSE TRUE TRUE   TRUE  FALSE  FALSE FALSE  TRUE   TRUE   NA TRUE TRUE
# AFG   FALSE FALSE TRUE TRUE   TRUE  FALSE  FALSE FALSE  TRUE   TRUE   NA TRUE TRUE
# AGO   FALSE FALSE TRUE TRUE   TRUE  FALSE  FALSE FALSE  TRUE   TRUE TRUE TRUE TRUE
# ALB   FALSE FALSE TRUE TRUE   TRUE  FALSE  FALSE FALSE  TRUE   TRUE TRUE TRUE TRUE
# AND   FALSE FALSE TRUE TRUE   TRUE  FALSE  FALSE FALSE  TRUE     NA   NA   NA TRUE
# ARE   FALSE FALSE TRUE TRUE   TRUE  FALSE  FALSE FALSE  TRUE   TRUE TRUE TRUE TRUE

NA indicates that there are no data for this country. In general data is varying if it has two or more distinct non-missing values. We could also take a closer look at observation counts and distinct values using:

head(fnobs(wlddev, wlddev$iso3c))
#     country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP
# ABW      61    61   61   61     61     61     61   61    32     60    0  20  60
# AFG      61    61   61   61     61     61     61   61    18     60    0  60  60
# AGO      61    61   61   61     61     61     61   61    40     60    3  58  60
# ALB      61    61   61   61     61     61     61   61    40     60    9  32  60
# AND      61    61   61   61     61     61     61   61    50      0    0   0  60
# ARE      61    61   61   61     61     61     61   61    45     60    2  45  60

head(fndistinct(wlddev, wlddev$iso3c))
#     country iso3c date year decade region income OECD PCGDP LIFEEX GINI ODA POP
# ABW       1     1   61   61      7      1      1    1    32     60    0  20  60
# AFG       1     1   61   61      7      1      1    1    18     60    0  60  60
# AGO       1     1   61   61      7      1      1    1    40     59    3  58  60
# ALB       1     1   61   61      7      1      1    1    40     59    9  32  60
# AND       1     1   61   61      7      1      1    1    50      0    0   0  60
# ARE       1     1   61   61      7      1      1    1    45     60    2  45  60

Note that varying is more efficient than fndistinct, although both functions are very fast. Even more powerful summary methods for multilevel / panel data are provided by qsu (shorthand for quick-summary). It is modeled after STATA’s summarize and xtsummarize commands. Calling qsu on the data gives a concise summary. We can subset columns internally using the cols argument:

qsu(wlddev, cols = 9:12, higher = TRUE) # higher adds skewness and kurtosis
#             N        Mean          SD          Min             Max     Skew     Kurt
# PCGDP    9470   12048.778  19077.6416     132.0776      196061.417   3.1276  17.1154
# LIFEEX  11670     64.2963     11.4764       18.907         85.4171  -0.6748   2.6718
# GINI     1744     38.5341      9.2006         20.7            65.8    0.596   2.5329
# ODA      8608  454'720131  868'712654  -997'679993  2.56715605e+10   6.9832  114.889

We could easily compute these statistics by region:

qsu(wlddev, by = ~region, cols = 9:12, vlabels = TRUE, higher = TRUE)
# , , PCGDP: GDP per capita (constant 2010 US$)
# 
#                                N        Mean          SD         Min         Max    Skew     Kurt
# East Asia & Pacific         1467  10513.2441  14383.5507    132.0776  71992.1517  1.6392   4.7419
# Europe & Central Asia       2243  25992.9618  26435.1316    366.9354  196061.417  2.2022  10.1977
# Latin America & Caribbean   1976   7628.4477   8818.5055   1005.4085  88391.3331  4.1702  29.3739
# Middle East & North Africa   842  13878.4213  18419.7912    578.5996  116232.753  2.4178   9.7669
# North America                180    48699.76  24196.2855  16405.9053  113236.091   0.938   2.9688
# South Asia                   382   1235.9256   1611.2232    265.9625    8476.564  2.7874  10.3402
# Sub-Saharan Africa          2380   1840.0259   2596.0104    164.3366  20532.9523  3.1161  14.4175
# 
# , , LIFEEX: Life expectancy at birth, total (years)
# 
#                                N     Mean       SD      Min      Max     Skew    Kurt
# East Asia & Pacific         1807  65.9445  10.1633   18.907   85.078   -0.856  4.3125
# Europe & Central Asia       3046  72.1625   5.7602   45.369  85.4171  -0.5594  4.0434
# Latin America & Caribbean   2107  68.3486   7.3768   41.762  82.1902  -1.0357  3.9379
# Middle East & North Africa  1226  66.2508   9.8306   29.919  82.8049  -0.8782  3.3054
# North America                144  76.2867   3.5734  68.8978  82.0488  -0.1963   1.976
# South Asia                   480  57.5585  11.3004   32.446   78.921  -0.2623  2.1147
# Sub-Saharan Africa          2860   51.581   8.6876   26.172  74.5146   0.1452  2.7245
# 
# , , GINI: Gini index (World Bank estimate)
# 
#                               N     Mean      SD   Min   Max     Skew    Kurt
# East Asia & Pacific         154  37.7571  5.0318  27.8  49.1   0.3631  2.3047
# Europe & Central Asia       798  31.9114  4.5809  20.7  48.4   0.2989  2.5254
# Latin America & Caribbean   413  49.9557  5.4821  34.4  63.3  -0.0386  2.3631
# Middle East & North Africa   91  36.0143  5.2073    26  47.4   0.0241  1.9209
# North America                49  37.4816  3.6972    31  41.5  -0.4282  1.4577
# South Asia                   46  33.8804  3.9898  25.9  43.8   0.4205  2.7748
# Sub-Saharan Africa          193  44.6606  8.2003  29.8  65.8   0.6598  2.8451
# 
# , , ODA: Net official development assistance and official aid received (constant 2018 US$)
# 
#                                N            Mean              SD           Min             Max
# East Asia & Pacific         1537      352'017964      622'847624   -997'679993  4.04487988e+09
# Europe & Central Asia        787      402'455286      568'237036   -322'070007  4.34612988e+09
# Latin America & Caribbean   1972      172'880081      260'781049   -444'040009  2.99568994e+09
# Middle East & North Africa  1105      732'380009  1.52108993e+09   -141'789993  2.56715605e+10
# North America                 39      468717.916     10'653560.8  -15'869999.9     61'509998.3
# South Asia                   466  1.27049955e+09  1.61492889e+09   -247'369995  8.75425977e+09
# Sub-Saharan Africa          2702      486'371750      656'336230  -18'409999.8  1.18790801e+10
#                               Skew     Kurt
# East Asia & Pacific          2.722  11.5221
# Europe & Central Asia       3.1305  15.2525
# Latin America & Caribbean   3.3259  22.4569
# Middle East & North Africa  6.6304  79.2238
# North America               4.8602  29.3092
# South Asia                  1.7923    6.501
# Sub-Saharan Africa          4.5456  48.8447

Computing summary statistics by country is of course also possible but would be too much information. Fortunately qsu lets us do something much more powerful:

qsu(wlddev, pid = ~ iso3c, cols = c(1,4,9:12), vlabels = TRUE, higher = TRUE)
# , , country: Country Name
# 
#            N/T  Mean  SD  Min  Max  Skew  Kurt
# Overall  13176     -   -    -    -     -     -
# Between    216     -   -    -    -     -     -
# Within      61     -   -    -    -     -     -
# 
# , , year: Year
# 
#            N/T  Mean       SD   Min   Max  Skew    Kurt
# Overall  13176  1990  17.6075  1960  2020    -0  1.7994
# Between    216  1990        0  1990  1990     -       -
# Within      61  1990  17.6075  1960  2020    -0  1.7994
# 
# , , PCGDP: GDP per capita (constant 2010 US$)
# 
#              N/T        Mean          SD          Min         Max    Skew     Kurt
# Overall     9470   12048.778  19077.6416     132.0776  196061.417  3.1276  17.1154
# Between      206  12962.6054  20189.9007     253.1886   141200.38  3.1263  16.2299
# Within   45.9709   12048.778   6723.6808  -33504.8721  76767.5254  0.6576  17.2003
# 
# , , LIFEEX: Life expectancy at birth, total (years)
# 
#              N/T     Mean       SD      Min      Max     Skew    Kurt
# Overall    11670  64.2963  11.4764   18.907  85.4171  -0.6748  2.6718
# Between      207  64.9537   9.8936  40.9663  85.4171  -0.5012  2.1693
# Within   56.3768  64.2963   6.0842  32.9068  84.4198  -0.2643  3.7027
# 
# , , GINI: Gini index (World Bank estimate)
# 
#              N/T     Mean      SD      Min      Max    Skew    Kurt
# Overall     1744  38.5341  9.2006     20.7     65.8   0.596  2.5329
# Between      167  39.4233  8.1356  24.8667  61.7143  0.5832  2.8256
# Within   10.4431  38.5341  2.9277  25.3917  55.3591  0.3263  5.3389
# 
# , , ODA: Net official development assistance and official aid received (constant 2018 US$)
# 
#              N/T        Mean          SD              Min             Max    Skew      Kurt
# Overall     8608  454'720131  868'712654      -997'679993  2.56715605e+10  6.9832   114.889
# Between      178  439'168412  569'049959       468717.916  3.62337432e+09   2.355    9.9487
# Within   48.3596  454'720131  650'709624  -2.44379420e+09  2.45610972e+10  9.6047  263.3716

The above output reports 3 sets of summary statistics for each variable: Statistics computed on the Overall (raw) data, and on the Between-country (i.e. country averaged) and Within-country (i.e. country-demeaned) data1. This is a powerful way to summarize panel data because aggregating the data by country gives us a cross-section of countries with no variation over time, whereas subtracting country specific means from the data eliminates all cross-sectional variation.

So what can these statistics tell us about our data? The N/T columns shows that for PCGDP we have 8995 total observations, that we observe GDP data for 203 countries and that we have on average 44.3 observations (time-periods) per country. In contrast the GINI Index is only available for 161 countries with 8.4 observations on average. The Overall and Within mean of the data are identical by definition, and the Between mean would also be the same in a balanced panel with no missing observations. In practice we have unequal amounts of observations for different countries, thus countries have different weights in the Overall mean and the difference between Overall and Between-country mean reflects this discrepancy. The most interesting statistic in this summary arguably is the standard deviation, and in particular the comparison of the Between-SD reflecting the variation between countries and the Within-SD reflecting average variation over time. This comparison shows that PCGDP, LIFEEX and GINI vary more between countries, but ODA received varies more within countries over time. The 0 Between-SD for the year variable and the fact that the Overall and Within-SD are equal shows that year is individual invariant. Thus qsu also provides the same information as varying, but with additional details on the relative magnitudes of cross-sectional and time series variation. It is also a common pattern that the kurtosis increases in within-transformed data, while the skewness decreases in most cases.

We could also do all of that by regions to have a look at the between and within country variations inside and across different World regions:

qsu(wlddev, by = ~ region, pid = ~ iso3c, cols = 9:12, vlabels = TRUE, higher = TRUE)
# , , Overall, PCGDP: GDP per capita (constant 2010 US$)
# 
#                              N/T        Mean          SD         Min         Max    Skew     Kurt
# East Asia & Pacific         1467  10513.2441  14383.5507    132.0776  71992.1517  1.6392   4.7419
# Europe & Central Asia       2243  25992.9618  26435.1316    366.9354  196061.417  2.2022  10.1977
# Latin America & Caribbean   1976   7628.4477   8818.5055   1005.4085  88391.3331  4.1702  29.3739
# Middle East & North Africa   842  13878.4213  18419.7912    578.5996  116232.753  2.4178   9.7669
# North America                180    48699.76  24196.2855  16405.9053  113236.091   0.938   2.9688
# South Asia                   382   1235.9256   1611.2232    265.9625    8476.564  2.7874  10.3402
# Sub-Saharan Africa          2380   1840.0259   2596.0104    164.3366  20532.9523  3.1161  14.4175
# 
# , , Between, PCGDP: GDP per capita (constant 2010 US$)
# 
#                             N/T        Mean          SD         Min         Max    Skew     Kurt
# East Asia & Pacific          34  10513.2441   12771.742    444.2899  39722.0077  1.1488   2.7089
# Europe & Central Asia        56  25992.9618   24051.035    809.4753   141200.38  2.0026   9.0733
# Latin America & Caribbean    38   7628.4477   8470.9708   1357.3326  77403.7443  4.4548  32.4956
# Middle East & North Africa   20  13878.4213  17251.6962   1069.6596  64878.4021  1.9508   6.0796
# North America                 3    48699.76  18604.4369  35260.4708  74934.5874  0.7065      1.5
# South Asia                    8   1235.9256   1488.3669      413.68   6621.5002  3.0546  11.3083
# Sub-Saharan Africa           47   1840.0259   2234.3254    253.1886   9922.0052  2.1442   6.8259
# 
# , , Within, PCGDP: GDP per capita (constant 2010 US$)
# 
#                                 N/T       Mean          SD          Min         Max     Skew
# East Asia & Pacific         43.1471  12048.778   6615.8248  -11964.6472   49541.463    0.824
# Europe & Central Asia       40.0536  12048.778  10971.0483  -33504.8721  76767.5254   0.4307
# Latin America & Caribbean        52  12048.778   2451.2636    -354.1639  23036.3668   0.1259
# Middle East & North Africa     42.1  12048.778   6455.0512  -18674.4049  63665.0446   1.8525
# North America                    60  12048.778  15470.4609  -29523.1017  50350.2816  -0.2451
# South Asia                    47.75  12048.778    617.0934   10026.9155   14455.865   0.9846
# Sub-Saharan Africa          50.6383  12048.778    1321.764    4846.3834  24883.1246   1.3879
#                                Kurt
# East Asia & Pacific          8.9418
# Europe & Central Asia        7.4139
# Latin America & Caribbean    7.1939
# Middle East & North Africa  23.0457
# North America                3.2075
# South Asia                   5.6366
# Sub-Saharan Africa          28.0186
# 
# , , Overall, LIFEEX: Life expectancy at birth, total (years)
# 
#                              N/T     Mean       SD      Min      Max     Skew    Kurt
# East Asia & Pacific         1807  65.9445  10.1633   18.907   85.078   -0.856  4.3125
# Europe & Central Asia       3046  72.1625   5.7602   45.369  85.4171  -0.5594  4.0434
# Latin America & Caribbean   2107  68.3486   7.3768   41.762  82.1902  -1.0357  3.9379
# Middle East & North Africa  1226  66.2508   9.8306   29.919  82.8049  -0.8782  3.3054
# North America                144  76.2867   3.5734  68.8978  82.0488  -0.1963   1.976
# South Asia                   480  57.5585  11.3004   32.446   78.921  -0.2623  2.1147
# Sub-Saharan Africa          2860   51.581   8.6876   26.172  74.5146   0.1452  2.7245
# 
# , , Between, LIFEEX: Life expectancy at birth, total (years)
# 
#                             N/T     Mean      SD      Min      Max     Skew    Kurt
# East Asia & Pacific          32  65.9445  7.6833  49.7995  77.9008  -0.3832  2.4322
# Europe & Central Asia        55  72.1625  4.4378  60.1129  85.4171  -0.6584  2.8874
# Latin America & Caribbean    40  68.3486  4.9199  53.4918  82.1902  -0.9947  4.1617
# Middle East & North Africa   21  66.2508   5.922  52.5371  76.7395  -0.3181  3.0331
# North America                 3  76.2867  1.3589  74.8065  78.4175   0.1467  1.6356
# South Asia                    8  57.5585  5.6158  49.1972  69.3429   0.6643  3.1288
# Sub-Saharan Africa           48   51.581   5.657  40.9663  71.5749   1.1333   4.974
# 
# , , Within, LIFEEX: Life expectancy at birth, total (years)
# 
#                                 N/T     Mean      SD      Min      Max     Skew    Kurt
# East Asia & Pacific         56.4688  64.2963  6.6528  32.9068  83.9918  -0.3949  3.9528
# Europe & Central Asia       55.3818  64.2963  3.6723  46.3045  78.6265  -0.0307  3.7576
# Latin America & Caribbean    52.675  64.2963  5.4965  46.7831  79.5026  -0.3827  2.9936
# Middle East & North Africa   58.381  64.2963  7.8467  41.6187  78.8872  -0.6216   2.808
# North America                    48  64.2963  3.3049  54.7766  69.4306  -0.4327  2.3027
# South Asia                       60  64.2963  9.8062  41.4342  83.0122  -0.0946  2.1035
# Sub-Saharan Africa          59.5833  64.2963  6.5933  41.5678  84.4198   0.0811  2.7821
# 
# , , Overall, GINI: Gini index (World Bank estimate)
# 
#                             N/T     Mean      SD   Min   Max     Skew    Kurt
# East Asia & Pacific         154  37.7571  5.0318  27.8  49.1   0.3631  2.3047
# Europe & Central Asia       798  31.9114  4.5809  20.7  48.4   0.2989  2.5254
# Latin America & Caribbean   413  49.9557  5.4821  34.4  63.3  -0.0386  2.3631
# Middle East & North Africa   91  36.0143  5.2073    26  47.4   0.0241  1.9209
# North America                49  37.4816  3.6972    31  41.5  -0.4282  1.4577
# South Asia                   46  33.8804  3.9898  25.9  43.8   0.4205  2.7748
# Sub-Saharan Africa          193  44.6606  8.2003  29.8  65.8   0.6598  2.8451
# 
# , , Between, GINI: Gini index (World Bank estimate)
# 
#                             N/T     Mean      SD      Min      Max     Skew    Kurt
# East Asia & Pacific          23  37.7571  4.3005     30.8  45.8857   0.4912   2.213
# Europe & Central Asia        49  31.9114  4.0611  24.8667   40.935   0.3323   2.291
# Latin America & Caribbean    25  49.9557  4.0492     41.1     57.9     0.03  2.2573
# Middle East & North Africa   15  36.0143  4.7002    29.05     42.7  -0.2035  1.6815
# North America                 2  37.4816  3.3563  33.1222  40.0129  -0.5503  1.3029
# South Asia                    7  33.8804  3.0052  30.3556     38.8   0.2786  1.4817
# Sub-Saharan Africa           46  44.6606  6.8844    34.52  61.7143   0.9464  3.2302
# 
# , , Within, GINI: Gini index (World Bank estimate)
# 
#                                 N/T     Mean      SD      Min      Max     Skew    Kurt
# East Asia & Pacific          6.6957  38.5341  2.6125  31.0187  45.8901  -0.0585  3.0933
# Europe & Central Asia       16.2857  38.5341  2.1195  31.2841  50.1387   0.6622  6.1763
# Latin America & Caribbean     16.52  38.5341  3.6955  25.3917  48.8341  -0.0506  2.7603
# Middle East & North Africa   6.0667  38.5341  2.2415  31.7675   45.777   0.0408  4.7415
# North America                  24.5  38.5341  1.5507  33.0212  42.7119  -1.3213  6.8321
# South Asia                   6.5714  38.5341  2.6244  32.8341  45.0675  -0.1055  2.6885
# Sub-Saharan Africa           4.1957  38.5341  4.4553  27.9452  55.3591   0.6338  4.4174
# 
# , , Overall, ODA: Net official development assistance and official aid received (constant 2018 US$)
# 
#                              N/T            Mean              SD           Min             Max
# East Asia & Pacific         1537      352'017964      622'847624   -997'679993  4.04487988e+09
# Europe & Central Asia        787      402'455286      568'237036   -322'070007  4.34612988e+09
# Latin America & Caribbean   1972      172'880081      260'781049   -444'040009  2.99568994e+09
# Middle East & North Africa  1105      732'380009  1.52108993e+09   -141'789993  2.56715605e+10
# North America                 39      468717.916     10'653560.8  -15'869999.9     61'509998.3
# South Asia                   466  1.27049955e+09  1.61492889e+09   -247'369995  8.75425977e+09
# Sub-Saharan Africa          2702      486'371750      656'336230  -18'409999.8  1.18790801e+10
#                               Skew     Kurt
# East Asia & Pacific          2.722  11.5221
# Europe & Central Asia       3.1305  15.2525
# Latin America & Caribbean   3.3259  22.4569
# Middle East & North Africa  6.6304  79.2238
# North America               4.8602  29.3092
# South Asia                  1.7923    6.501
# Sub-Saharan Africa          4.5456  48.8447
# 
# , , Between, ODA: Net official development assistance and official aid received (constant 2018 US$)
# 
#                             N/T            Mean              SD          Min             Max
# East Asia & Pacific          31      352'017964      457'183279  1'654615.38  1.63585532e+09
# Europe & Central Asia        32      402'455286      438'074771  12'516000.1  2.05456932e+09
# Latin America & Caribbean    37      172'880081      167'160838  2'225483.88      538'386665
# Middle East & North Africa   21      732'380009      775'418887   3'112820.5  2.86174883e+09
# North America                 1      468717.916               0   468717.916      468717.916
# South Asia                    8  1.27049955e+09  1.18347893e+09  27'152499.9  3.62337432e+09
# Sub-Saharan Africa           48      486'371750      397'995105  28'801206.9  1.55049113e+09
#                               Skew    Kurt
# East Asia & Pacific         1.7771  5.1361
# Europe & Central Asia       2.0449  7.2489
# Latin America & Caribbean   0.8981  2.4954
# Middle East & North Africa  1.1363  3.6377
# North America                    -       -
# South Asia                  0.7229  2.4072
# Sub-Saharan Africa          0.9871  3.1513
# 
# , , Within, ODA: Net official development assistance and official aid received (constant 2018 US$)
# 
#                                 N/T        Mean              SD              Min             Max
# East Asia & Pacific         49.5806  454'720131      422'992450  -2.04042108e+09  3.59673152e+09
# Europe & Central Asia       24.5938  454'720131      361'916875  -1.08796786e+09  3.30549004e+09
# Latin America & Caribbean   53.2973  454'720131      200'159960      -527'706542  3.28976141e+09
# Middle East & North Africa   52.619  454'720131  1.30860235e+09  -2.34610870e+09  2.45610972e+10
# North America                    39  454'720131     10'653560.8       438'381413      515'761411
# South Asia                    58.25  454'720131  1.09880524e+09  -2.44379420e+09  5.58560558e+09
# Sub-Saharan Africa          56.2917  454'720131      521'897637      -952'168698  1.12814455e+10
#                               Skew     Kurt
# East Asia & Pacific         0.2908  14.4428
# Europe & Central Asia       2.3283  18.6937
# Latin America & Caribbean   3.7015  41.7506
# Middle East & North Africa  7.8663  117.987
# North America               4.8602  29.3092
# South Asia                  1.8418   9.4588
# Sub-Saharan Africa          5.2349  86.1042

Notice that the output here is a 4D array of summary statistics, which we could also subset ([) or permute (aperm) to view these statistics in any convenient way. If we don’t like the array, we can also output as a nested list of statistics matrices:

l <- qsu(wlddev, by = ~ region, pid = ~ iso3c, cols = 9:12, vlabels = TRUE,
         higher = TRUE, array = FALSE)

str(l, give.attr = FALSE)
# List of 4
#  $ PCGDP: GDP per capita (constant 2010 US$)                                             :List of 3
#   ..$ Overall: 'qsu' num [1:7, 1:7] 1467 2243 1976 842 180 ...
#   ..$ Between: 'qsu' num [1:7, 1:7] 34 56 38 20 3 ...
#   ..$ Within : 'qsu' num [1:7, 1:7] 43.1 40.1 52 42.1 60 ...
#  $ LIFEEX: Life expectancy at birth, total (years)                                       :List of 3
#   ..$ Overall: 'qsu' num [1:7, 1:7] 1807 3046 2107 1226 144 ...
#   ..$ Between: 'qsu' num [1:7, 1:7] 32 55 40 21 3 ...
#   ..$ Within : 'qsu' num [1:7, 1:7] 56.5 55.4 52.7 58.4 48 ...
#  $ GINI: Gini index (World Bank estimate)                                                :List of 3
#   ..$ Overall: 'qsu' num [1:7, 1:7] 154 798 413 91 49 ...
#   ..$ Between: 'qsu' num [1:7, 1:7] 23 49 25 15 2 ...
#   ..$ Within : 'qsu' num [1:7, 1:7] 6.7 16.29 16.52 6.07 24.5 ...
#  $ ODA: Net official development assistance and official aid received (constant 2018 US$):List of 3
#   ..$ Overall: 'qsu' num [1:7, 1:7] 1537 787 1972 1105 39 ...
#   ..$ Between: 'qsu' num [1:7, 1:7] 31 32 37 21 1 ...
#   ..$ Within : 'qsu' num [1:7, 1:7] 49.6 24.6 53.3 52.6 39 ...

Such a list of statistics matrices could, for example, be converted into a tidy data frame using unlist2d (more about this in the section on list-processing):

head(unlist2d(l, idcols = c("Variable", "Trans"), row.names = "Region"))
#                                    Variable   Trans                     Region    N      Mean
# 1 PCGDP: GDP per capita (constant 2010 US$) Overall        East Asia & Pacific 1467 10513.244
# 2 PCGDP: GDP per capita (constant 2010 US$) Overall      Europe & Central Asia 2243 25992.962
# 3 PCGDP: GDP per capita (constant 2010 US$) Overall  Latin America & Caribbean 1976  7628.448
# 4 PCGDP: GDP per capita (constant 2010 US$) Overall Middle East & North Africa  842 13878.421
# 5 PCGDP: GDP per capita (constant 2010 US$) Overall              North America  180 48699.760
# 6 PCGDP: GDP per capita (constant 2010 US$) Overall                 South Asia  382  1235.926
#          SD        Min        Max      Skew      Kurt
# 1 14383.551   132.0776  71992.152 1.6392248  4.741856
# 2 26435.132   366.9354 196061.417 2.2022472 10.197685
# 3  8818.505  1005.4085  88391.333 4.1701769 29.373869
# 4 18419.791   578.5996 116232.753 2.4177586  9.766883
# 5 24196.285 16405.9053 113236.091 0.9380056  2.968769
# 6  1611.223   265.9625   8476.564 2.7873830 10.340176

This is not yet end of qsu’s functionality, as we can also do all of the above on panel-surveys utilizing weights (w argument).

Finally, we can look at (weighted) pairwise correlations in this data:

pwcor(wlddev[9:12], N = TRUE, P = TRUE)
#               PCGDP        LIFEEX         GINI          ODA
# PCGDP    1   (9470)   .57* (9022) -.44* (1735) -.16* (7128)
# LIFEEX  .57* (9022)   1   (11670) -.35* (1742) -.02  (8142)
# GINI   -.44* (1735)  -.35* (1742)   1   (1744) -.20* (1109)
# ODA    -.16* (7128)  -.02  (8142) -.20* (1109)   1   (8608)

which can of course also be computed on averaged and within-transformed data:

print(pwcor(fmean(wlddev[9:12], wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri")
#              PCGDP      LIFEEX        GINI         ODA
# PCGDP    1   (206)                                    
# LIFEEX  .60* (199)   1   (207)                        
# GINI   -.42* (165) -.40* (165)   1   (167)            
# ODA    -.25* (172) -.21* (172) -.19* (145)   1   (178)

# N is same as overall N shown above...
print(pwcor(fwithin(wlddev[9:12], wlddev$iso3c), P = TRUE), show = "lower.tri")
#         PCGDP LIFEEX   GINI    ODA
# PCGDP     1                       
# LIFEEX   .31*    1                
# GINI    -.01   -.16*    1         
# ODA     -.01    .17*  -.08*    1

A useful function called by pwcor is pwnobs, which is very handy to explore the joint observation structure when selecting variables to include in a statistical model:

pwnobs(wlddev)
#         country iso3c  date  year decade region income  OECD PCGDP LIFEEX GINI  ODA   POP
# country   13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# iso3c     13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# date      13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# year      13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# decade    13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# region    13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# income    13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# OECD      13176 13176 13176 13176  13176  13176  13176 13176  9470  11670 1744 8608 12919
# PCGDP      9470  9470  9470  9470   9470   9470   9470  9470  9470   9022 1735 7128  9470
# LIFEEX    11670 11670 11670 11670  11670  11670  11670 11670  9022  11670 1742 8142 11659
# GINI       1744  1744  1744  1744   1744   1744   1744  1744  1735   1742 1744 1109  1744
# ODA        8608  8608  8608  8608   8608   8608   8608  8608  7128   8142 1109 8608  8597
# POP       12919 12919 12919 12919  12919  12919  12919 12919  9470  11659 1744 8597 12919

Note that both pwcor/pwcov and pwnobs are faster on matrices.

1.2 GGDC10S - GGDC 10-Sector Database

The Groningen Growth and Development Centre 10-Sector Database provides long-run data on sectoral productivity performance in Africa, Asia, and Latin America. Variables covered in the data set are annual series of value added (VA, in local currency), and persons employed (EMP) for 10 broad sectors.

head(GGDC10S)
#   Country Regioncode             Region Variable Year      AGR      MIN       MAN        PU
# 1     BWA        SSA Sub-saharan Africa       VA 1960       NA       NA        NA        NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961       NA       NA        NA        NA
# 3     BWA        SSA Sub-saharan Africa       VA 1962       NA       NA        NA        NA
# 4     BWA        SSA Sub-saharan Africa       VA 1963       NA       NA        NA        NA
# 5     BWA        SSA Sub-saharan Africa       VA 1964 16.30154 3.494075 0.7365696 0.1043936
# 6     BWA        SSA Sub-saharan Africa       VA 1965 15.72700 2.495768 1.0181992 0.1350976
#         CON      WRT      TRA     FIRE      GOV      OTH      SUM
# 1        NA       NA       NA       NA       NA       NA       NA
# 2        NA       NA       NA       NA       NA       NA       NA
# 3        NA       NA       NA       NA       NA       NA       NA
# 4        NA       NA       NA       NA       NA       NA       NA
# 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229
# 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710

namlab(GGDC10S, class = TRUE)
#      Variable     Class                                                 Label
# 1     Country character                                               Country
# 2  Regioncode character                                           Region code
# 3      Region character                                                Region
# 4    Variable character                                              Variable
# 5        Year   numeric                                                  Year
# 6         AGR   numeric                                          Agriculture 
# 7         MIN   numeric                                                Mining
# 8         MAN   numeric                                         Manufacturing
# 9          PU   numeric                                             Utilities
# 10        CON   numeric                                          Construction
# 11        WRT   numeric                         Trade, restaurants and hotels
# 12        TRA   numeric                  Transport, storage and communication
# 13       FIRE   numeric Finance, insurance, real estate and business services
# 14        GOV   numeric                                   Government services
# 15        OTH   numeric               Community, social and personal services
# 16        SUM   numeric                               Summation of sector GDP

fnobs(GGDC10S)
#    Country Regioncode     Region   Variable       Year        AGR        MIN        MAN         PU 
#       5027       5027       5027       5027       5027       4364       4355       4355       4354 
#        CON        WRT        TRA       FIRE        GOV        OTH        SUM 
#       4355       4355       4355       4355       3482       4248       4364

fndistinct(GGDC10S)
#    Country Regioncode     Region   Variable       Year        AGR        MIN        MAN         PU 
#         43          6          6          2         67       4353       4224       4353       4237 
#        CON        WRT        TRA       FIRE        GOV        OTH        SUM 
#       4339       4344       4334       4349       3470       4238       4364

# The countries included:
cat(funique(GGDC10S$Country, sort = TRUE))
# ARG BOL BRA BWA CHL CHN COL CRI DEW DNK EGY ESP ETH FRA GBR GHA HKG IDN IND ITA JPN KEN KOR MEX MOR MUS MWI MYS NGA NGA(alt) NLD PER PHL SEN SGP SWE THA TWN TZA USA VEN ZAF ZMB

The first problem in summarizing this data is that value added (VA) is in local currency, the second that it contains 2 different Variables (VA and EMP) stacked in the same column. One way of solving the first problem could be converting the data to percentages through dividing by the overall VA and EMP contained in the last column. A different solution involving grouped-scaling is introduced in section 6.4. The second problem is again nicely handled by qsu, which can also compute panel-statistics by groups.

# Converting data to percentages of overall VA / EMP, dapply keeps the attributes, see section 6.1
pGGDC10S <- ftransformv(GGDC10S, 6:15, `*`, 100 / SUM)

# Summarizing the sectoral data by variable, overall, between and within countries
su <- qsu(pGGDC10S, by = ~ Variable, pid = ~ Variable + Country,
          cols = 6:16, higher = TRUE)

# This gives a 4D array of summary statistics
str(su)
#  'qsu' num [1:2, 1:7, 1:3, 1:11] 2225 2139 35.1 17.3 26.7 ...
#  - attr(*, "dimnames")=List of 4
#   ..$ : chr [1:2] "EMP" "VA"
#   ..$ : chr [1:7] "N/T" "Mean" "SD" "Min" ...
#   ..$ : chr [1:3] "Overall" "Between" "Within"
#   ..$ : chr [1:11] "AGR" "MIN" "MAN" "PU" ...

# Permuting this array to a more readible format
aperm(su, c(4L, 2L, 3L, 1L))
# , , Overall, EMP
# 
#        N/T        Mean          SD       Min      Max     Skew     Kurt
# AGR   2225     35.0949     26.7235     0.156      100   0.4856   2.0951
# MIN   2216      1.0349      1.4247    0.0043   9.4097   3.1281  15.0429
# MAN   2216     14.9768      8.0392    0.5822  45.2974   0.4272   2.8455
# PU    2215      0.5782      0.3601    0.0154   2.4786   1.2588   5.5822
# CON   2216      5.6583      2.9252    0.1417  15.9887  -0.0631   2.2725
# WRT   2216     14.9155      6.5573     0.809  32.8046  -0.1814   2.3226
# TRA   2216      4.8193       2.652    0.1506  15.0454   0.9477   4.4695
# FIRE  2216      4.6501      4.3518    0.0799  21.7717   1.2345   4.0831
# GOV   1780     13.1263      8.0844         0  34.8897   0.6301   2.5338
# OTH   2109      8.3977      6.6409     0.421  34.8942   1.4028   4.3191
# SUM   2225  36846.8741  96318.6544  173.8829   764200   5.0229  30.9814
# 
# , , Between, EMP
# 
#       N/T        Mean         SD       Min         Max     Skew     Kurt
# AGR    42     35.0949    24.1204    0.9997     88.3263   0.5202   2.2437
# MIN    42      1.0349     1.2304    0.0296      6.8532   2.7313   12.331
# MAN    42     14.9768     7.0375     1.718     32.3439  -0.0164   2.4321
# PU     42      0.5782     0.3041    0.0671      1.3226   0.5459   2.6905
# CON    42      5.6583     2.4748    0.5037     10.3691  -0.4442   2.3251
# WRT    42     14.9155      5.264    4.0003     26.7699  -0.5478   2.7294
# TRA    42      4.8193     2.4712     0.374     12.3887   0.9782   4.7857
# FIRE   42      4.6501     3.4468    0.1505     12.4402   0.6052   2.5883
# GOV    34     13.1263     7.2832    2.0086     29.1577   0.3858   2.1068
# OTH    40      8.3977      6.266    1.3508     26.4036   1.4349   4.3185
# SUM    42  36846.8741  89205.503  369.2353  485820.474   4.0761  19.3159
# 
# , , Within, EMP
# 
#           N/T         Mean          SD          Min          Max     Skew     Kurt
# AGR   52.9762      26.3768     11.5044      -5.3234     107.4891   1.6002  11.9683
# MIN   52.7619       3.4006      0.7182      -1.4068        7.509  -0.1988  15.0343
# MAN   52.7619       17.476      3.8861      -1.1061      40.3964   -0.082   7.3994
# PU    52.7381       1.3896      0.1929       0.6346       2.5461   0.5731   7.8523
# CON   52.7619       5.7633      1.5596       0.8964      12.9663   0.3077   4.1248
# WRT   52.7619      15.7581        3.91       3.7356      29.7615   0.3339   3.3386
# TRA   52.7619       6.3486      0.9623       2.3501      11.1064   0.2671   5.7162
# FIRE  52.7619       5.8228      2.6567      -2.9836      15.9974   0.5486   4.0288
# GOV   52.3529       13.263      3.5088      -2.1983       23.611  -0.5647   4.7286
# OTH    52.725       7.3941      2.1999      -2.3286      17.4413   0.2929   6.4631
# SUM   52.9762  21'566436.8  36327.1443  21'287906.3  21'844816.3   0.6649  34.2495
# 
# , , Overall, VA
# 
#        N/T         Mean          SD       Min             Max     Skew      Kurt
# AGR   2139      17.3082     15.5066    0.0318          95.222   1.3274    4.8827
# MIN   2139       5.8514      9.0975         0         59.0602   2.7193   10.9184
# MAN   2139      20.0651      8.0033     0.979         41.6281  -0.0348    2.6831
# PU    2139       2.2298      1.1088         0          9.1888   0.8899    6.2385
# CON   2139       5.8721      2.5113    0.5951         25.8575   1.5002    8.9578
# WRT   2139       16.631      5.1374    4.5187         39.7594   0.3455    3.2655
# TRA   2139       7.9329      3.1057    0.7957         25.9625   1.0122    5.7137
# FIRE  2139       7.0377     12.7077  -151.065         39.1705  -6.2254   59.8739
# GOV   1702       13.406      6.3521    0.7607         32.5107   0.4888    2.9043
# OTH   2139       6.4046      5.8416    0.2327         31.4474   1.4978    4.2051
# SUM   2139  43'961639.1  358'350627         0  8.06794210e+09  15.7682  289.4632
# 
# , , Between, VA
# 
#       N/T         Mean          SD        Min             Max     Skew     Kurt
# AGR    43      17.3082     13.1901     0.6058         63.8364   1.1328   4.7111
# MIN    43       5.8514      7.5705     0.0475         27.9214   1.7113    4.807
# MAN    43      20.0651      6.6423     4.1869         32.1138  -0.3591    2.619
# PU     43       2.2298      0.7457     0.4462           4.307   0.6196   3.8724
# CON    43       5.8721      1.8455     2.9405         12.9279   1.3285    6.505
# WRT    43       16.631      4.3779     8.4188         26.3876    0.292   2.4553
# TRA    43       7.9329      2.7222      2.037         14.8892   0.6362   3.6686
# FIRE   43       7.0377      9.0284   -35.6144         23.8658   -2.674  15.0975
# GOV    35       13.406       5.875     1.9757         27.7714   0.5198   3.0416
# OTH    43       6.4046      5.6137     1.1184         19.5299   1.3274   3.2043
# SUM    43  43'961639.1  185'785836  5077.7231  1.23317892e+09   5.8098  36.9778
# 
# , , Within, VA
# 
#           N/T         Mean          SD              Min             Max     Skew      Kurt
# AGR   49.7442      26.3768      8.1532            5.245         94.3499    1.234    9.5269
# MIN   49.7442       3.4006      5.0451          -20.051         35.7053    0.341    13.102
# MAN   49.7442       17.476      4.4647           1.1188         36.3501  -0.1928    3.9339
# PU    49.7442       1.3896      0.8206          -1.0904          6.2714   0.5258    5.3462
# CON   49.7442       5.7633      1.7031          -0.3464         18.6929   0.7493    6.3751
# WRT   49.7442      15.7581      2.6884           4.6513         32.6691   0.2338    4.4953
# TRA   49.7442       6.3486      1.4951           0.9187         18.5977   0.6995   10.1129
# FIRE  49.7442       5.8228      8.9428        -109.6278         54.1241  -2.7728   54.5971
# GOV   48.6286       13.263      2.4153           5.1249         22.8497   0.1663    3.3083
# OTH   49.7442       7.3941      1.6159          -0.9151         19.3116   0.7301    9.6613
# SUM   49.7442  21'566436.8  306'429102  -1.21124805e+09  6.85632962e+09  12.6639  253.1145

The statistics show that the dataset is very consistent: Employment data cover 42 countries and 53 time-periods in almost all sectors. Agriculture is the largest sector in terms of employment, amounting to a 35% share of employment across countries and time, with a standard deviation (SD) of around 27%. The between-country SD in agricultural employment share is 24% and the within SD is 12%, indicating that processes of structural change are very gradual and most of the variation in structure is between countries. The next largest sectors after agriculture are manufacturing, wholesale and retail trade and government, each claiming an approx. 15% share of the economy. In these sectors the between-country SD is also about twice as large as the within-country SD.

In terms of value added, the data covers 43 countries in 50 time-periods. Agriculture, manufacturing, wholesale and retail trade and government are also the largest sectors in terms of VA, but with a diminished agricultural share (around 17%) and a greater share for manufacturing (around 20%). The variation between countries is again greater than the variation within countries, but it seems that at least in terms of agricultural VA share there is also a considerable within-country SD of 8%. This is also true for the finance and real estate sector with a within SD of 9%, suggesting (using a bit of common sense) that a diminishing VA share in agriculture and increased VA share in finance and real estate was a pattern characterizing most of the countries in this sample.

As a final step we consider a plot function which can be used to plot the structural transformation of any supported country. Below for Botswana:

library(data.table)
library(ggplot2)
library(magrittr)

plotGGDC <- function(ctry) {
  # Select and subset
  fsubset(GGDC10S, Country == ctry, Variable, Year, AGR:SUM) %>%
  # Convert to shares and replace negative values with NA
  ftransform(fselect(., AGR:OTH) %>%
             lapply(`*`, 1 / SUM) %>%
             replace_outliers(0, NA, "min")) %>%
  # Remove totals column and make proper variable labels
  ftransform(Variable = recode_char(Variable,
                                    VA = "Value Added Share",
                                    EMP = "Employment Share"),
             SUM = NULL) %>%
  # Fast conversion to data.table
  qDT %>%
  # data.table's melt function
  melt(1:2, variable.name = "Sector", na.rm = TRUE) %>%
  # ggplot with some scales provided by the 'scales' package
  ggplot(aes(x = Year, y = value, fill = Sector)) +
    geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) +
    theme_linedraw(base_size = 14L) + facet_wrap( ~ Variable) +
    scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10L))) +
    scale_x_continuous(breaks = scales::pretty_breaks(n = 7L), expand = c(0, 0)) +
    scale_y_continuous(breaks = scales::pretty_breaks(n = 10L), expand = c(0, 0),
                       labels = scales::percent) +
    theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)),
          strip.background = element_rect(colour = "grey20", fill = "grey20"),
          strip.text = element_text(face = "bold"))
}

# Plotting the structural transformation of Botswana
plotGGDC("BWA")
plot of chunk scplot_BWA

plot of chunk scplot_BWA

2. Fast Data Manipulation

A lot of R code is not concerned with statistical computations but with preliminary data wrangling. For various reasons R development has focused on data frames as the main medium to contain data, although matrices / arrays provide significantly faster methods for common manipulations.

A first essential step towards optimizing R code is thus to speed up very frequent manipulations on data frames. collapse introduces a set of highly optimized functions to efficiently manipulate (mostly) data frames. Most manipulations can be conducted in non-standard evaluation or standard evaluation (utilizing different functions), and all functions preserve the data structure (i.e. they can be used with data.table, tbl_df, grouped_df, pdata.frame etc.).

2.1 Selecting and Replacing Columns

fselect is an analogue to dplyr::select, but executes about 100x faster. It can be used to select variables using expressions involving variable names:

library(magrittr) # Pipe operators
fselect(wlddev, country, year, PCGDP:ODA) %>% head(2)
#       country year PCGDP LIFEEX GINI       ODA
# 1 Afghanistan 1960    NA 32.446   NA 116769997
# 2 Afghanistan 1961    NA 32.962   NA 232080002

fselect(wlddev, -country, -year, -(PCGDP:ODA)) %>% head(2)
#   iso3c       date decade     region     income  OECD     POP
# 1   AFG 1961-01-01   1960 South Asia Low income FALSE 8996973
# 2   AFG 1962-01-01   1960 South Asia Low income FALSE 9169410

library(microbenchmark)
microbenchmark(fselect = collapse::fselect(wlddev, country, year, PCGDP:ODA),
               select = dplyr::select(wlddev, country, year, PCGDP:ODA))
# Unit: microseconds
#     expr     min       lq      mean   median       uq      max neval
#  fselect   2.911   3.4645   4.76297   4.3665   5.3710   20.459   100
#   select 382.284 393.0055 442.70734 410.3075 441.4265 2951.262   100

in contrast to dplyr::select, fselect has a replacement method

# Computing the log of columns
fselect(wlddev, PCGDP:POP) <- lapply(fselect(wlddev, PCGDP:POP), log)
head(wlddev, 2)
#       country iso3c       date year decade     region     income  OECD PCGDP   LIFEEX GINI      ODA
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA 3.479577   NA 18.57572
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA 3.495355   NA 19.26259
#        POP
# 1 16.01240
# 2 16.03138
# Efficient deleting
fselect(wlddev, country, year, PCGDP:POP) <- NULL
head(wlddev, 2)
#   iso3c       date decade     region     income  OECD
# 1   AFG 1961-01-01   1960 South Asia Low income FALSE
# 2   AFG 1962-01-01   1960 South Asia Low income FALSE
rm(wlddev)

and it can also return information about the selected columns other than the data itself.

fselect(wlddev, PCGDP:POP, return = "names")
# [1] "PCGDP"  "LIFEEX" "GINI"   "ODA"    "POP"
fselect(wlddev, PCGDP:POP, return = "indices")
# [1]  9 10 11 12 13
fselect(wlddev, PCGDP:POP, return = "named_indices")
#  PCGDP LIFEEX   GINI    ODA    POP 
#      9     10     11     12     13
fselect(wlddev, PCGDP:POP, return = "logical")
#  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
fselect(wlddev, PCGDP:POP, return = "named_logical")
# country   iso3c    date    year  decade  region  income    OECD   PCGDP  LIFEEX    GINI     ODA 
#   FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   FALSE    TRUE    TRUE    TRUE    TRUE 
#     POP 
#    TRUE

While fselect is faster than dplyr::select, it is also simpler and does not offer special methods for grouped tibbles (e.g. where grouping columns are always selected) and some other dplyr-specific features of select. We will see that this is not a problem at all when working with statistical functions in collapse that have a grouped_df method, but users should be careful replacing dplyr::select with fselect in dplyr scripts. From collapse 1.6.0, fselect has explicit support for sf data frames.

The standard-evaluation analogue to fselect is the function get_vars. get_vars can be used to select variables using names, indices, logical vectors, functions or regular expressions evaluated against column names:

get_vars(wlddev, 9:13) %>% head(1)
#   PCGDP LIFEEX GINI       ODA     POP
# 1    NA 32.446   NA 116769997 8996973
get_vars(wlddev, c("PCGDP","LIFEEX","GINI","ODA","POP")) %>% head(1)
#   PCGDP LIFEEX GINI       ODA     POP
# 1    NA 32.446   NA 116769997 8996973
get_vars(wlddev, "[[:upper:]]", regex = TRUE) %>% head(1)
#    OECD PCGDP LIFEEX GINI       ODA     POP
# 1 FALSE    NA 32.446   NA 116769997 8996973
get_vars(wlddev, "PC|LI|GI|OD|PO", regex = TRUE) %>% head(1)
#   PCGDP LIFEEX GINI       ODA     POP
# 1    NA 32.446   NA 116769997 8996973
# Same as above, vectors of regular expressions are sequentially passed to grep
get_vars(wlddev, c("PC","LI","GI","OD","PO"), regex = TRUE) %>% head(1)
#   PCGDP LIFEEX GINI       ODA     POP
# 1    NA 32.446   NA 116769997 8996973
get_vars(wlddev, is.numeric) %>% head(1)
#   year decade PCGDP LIFEEX GINI       ODA     POP
# 1 1960   1960    NA 32.446   NA 116769997 8996973

# Returning other information
get_vars(wlddev, is.numeric, return = "names")
# [1] "year"   "decade" "PCGDP"  "LIFEEX" "GINI"   "ODA"    "POP"
get_vars(wlddev, "[[:upper:]]", regex = TRUE, return = "named_indices")
#   OECD  PCGDP LIFEEX   GINI    ODA    POP 
#      8      9     10     11     12     13

Replacing operations are conducted analogous:

get_vars(wlddev, 9:13) <- lapply(get_vars(wlddev, 9:13), log)
get_vars(wlddev, 9:13) <- NULL
head(wlddev, 2)
#       country iso3c       date year decade     region     income  OECD
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE
rm(wlddev)

get_vars is about 2x faster than [.data.frame, and get_vars<- is about 6-8x faster than [<-.data.frame.

In addition to get_vars, collapse offers a set of functions to efficiently select and replace data by data type: num_vars, cat_vars (for categorical = non-numeric columns), char_vars, fact_vars, logi_vars and date_vars (for date and date-time columns).

head(num_vars(wlddev), 2)
#   year decade PCGDP LIFEEX GINI       ODA     POP
# 1 1960   1960    NA 32.446   NA 116769997 8996973
# 2 1961   1960    NA 32.962   NA 232080002 9169410
head(cat_vars(wlddev), 2)
#       country iso3c       date     region     income  OECD
# 1 Afghanistan   AFG 1961-01-01 South Asia Low income FALSE
# 2 Afghanistan   AFG 1962-01-01 South Asia Low income FALSE
head(fact_vars(wlddev), 2)
#   iso3c     region     income
# 1   AFG South Asia Low income
# 2   AFG South Asia Low income

# Replacing
fact_vars(wlddev) <- fact_vars(wlddev)

2.2 Subsetting

fsubset is an enhanced version of base::subset using C functions from the data.table package for fast and subsetting operations. In contrast to base::subset, fsubset allows multiple comma-separated select arguments after the subset argument, and it also preserves all attributes of subsetted columns:

# Returning only value-added data after 1990
fsubset(GGDC10S, Variable == "VA" & Year > 1990, Country, Year, AGR:GOV) %>% head(2)
#   Country Year      AGR      MIN      MAN       PU      CON      WRT      TRA     FIRE      GOV
# 1     BWA 1991 303.1157 2646.950 472.6488 160.6079 580.0876 806.7509 232.7884 432.6965 1073.263
# 2     BWA 1992 333.4364 2690.939 537.4274 178.4532 678.7320 725.2577 285.1403 517.2141 1234.012
# Same thing
fsubset(GGDC10S, Variable == "VA" & Year > 1990, -(Regioncode:Variable), -(OTH:SUM)) %>% head(2)
#   Country Year      AGR      MIN      MAN       PU      CON      WRT      TRA     FIRE      GOV
# 1     BWA 1991 303.1157 2646.950 472.6488 160.6079 580.0876 806.7509 232.7884 432.6965 1073.263
# 2     BWA 1992 333.4364 2690.939 537.4274 178.4532 678.7320 725.2577 285.1403 517.2141 1234.012

It is also possible to use standard evaluation with fsubset, but for these purposes the function ss exists as a fast and more secure alternative to [.data.frame:

ss(GGDC10S, 1:2, 6:16)  # or fsubset(GGDC10S, 1:2, 6:16), but not recommended.
#   AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM
# 1  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA
# 2  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA
ss(GGDC10S, -(1:2), c("AGR","MIN")) %>% head(2)
#   AGR MIN
# 1  NA  NA
# 2  NA  NA

Thanks to the data.table C code and optimized R code, fsubset is very fast.

microbenchmark(base = subset(GGDC10S, Variable == "VA" & Year > 1990, AGR:SUM),
               collapse = fsubset(GGDC10S, Variable == "VA" & Year > 1990, AGR:SUM))
# Unit: microseconds
#      expr     min       lq      mean   median      uq      max neval
#      base 150.839 156.5585 199.63105 160.3510 166.993 3778.191   100
#  collapse  45.715  49.0975  51.55545  50.9015  52.357   82.861   100

microbenchmark(GGDC10S[1:10, 1:10], ss(GGDC10S, 1:10, 1:10))
# Unit: microseconds
#                     expr    min     lq     mean median     uq    max neval
#      GGDC10S[1:10, 1:10] 36.367 36.982 38.14599 37.515 38.294 76.219   100
#  ss(GGDC10S, 1:10, 1:10)  1.886  2.050  2.30666  2.214  2.419  8.405   100

like base::subset, fsubset is S3 generic with methods for vectors, matrices and data frames. For certain classes such as factors, fsubset.default also improves upon [, but the largest improvements are with the data frame method.

2.3 Reordering Rows and Columns

roworder is a fast analogue to dplyr::arrange. The syntax is inspired by data.table::setorder, so that negative variable names indicate descending sort.

roworder(GGDC10S, -Variable, Country) %>% ss(1:2, 1:8)
#   Country Regioncode        Region Variable Year          AGR MIN         MAN
# 1     ARG        LAM Latin America       VA 1950 5.887857e-07   0 3.53443e-06
# 2     ARG        LAM Latin America       VA 1951 9.165327e-07   0 4.77277e-06

microbenchmark(collapse = collapse::roworder(GGDC10S, -Variable, Country),
               dplyr = dplyr::arrange(GGDC10S, desc(Variable), Country))
# Unit: microseconds
#      expr      min       lq      mean   median        uq      max neval
#  collapse  113.406  152.151  176.7567  165.722  183.0855  538.330   100
#     dplyr 1240.168 1299.372 1618.5869 1384.755 1507.8160 8350.552   100

In contrast to data.table::setorder, roworder creates a copy of the data frame (unless data are already sorted). If this copy is not required, data.table::setorder is faster. The function roworderv is a standard evaluation analogue to roworder:

# Same as above
roworderv(GGDC10S, c("Variable", "Country"), decreasing = c(TRUE, FALSE)) %>% ss(1:2, 1:8)
#   Country Regioncode        Region Variable Year          AGR MIN         MAN
# 1     ARG        LAM Latin America       VA 1950 5.887857e-07   0 3.53443e-06
# 2     ARG        LAM Latin America       VA 1951 9.165327e-07   0 4.77277e-06

With roworderv, it is also possible to move or exchange rows in a data frame:

# If length(neworder) < fnrow(data), the default (pos = "front") brings rows to the front
roworderv(GGDC10S, neworder = which(GGDC10S$Country == "GHA")) %>% ss(1:2, 1:8)
#   Country Regioncode             Region Variable Year        AGR         MIN        MAN
# 1     GHA        SSA Sub-saharan Africa       VA 1960 0.03576160 0.005103683 0.01744687
# 2     GHA        SSA Sub-saharan Africa       VA 1961 0.03823049 0.005456030 0.01865136

# pos = "end" brings rows to the end
roworderv(GGDC10S, neworder = which(GGDC10S$Country == "BWA"), pos = "end") %>% ss(1:2, 1:8)
#   Country Regioncode             Region Variable Year      AGR      MIN     MAN
# 1     ETH        SSA Sub-saharan Africa       VA 1960       NA       NA      NA
# 2     ETH        SSA Sub-saharan Africa       VA 1961 4495.614 11.86979 109.616

# pos = "exchange" arranges selected rows in the order they are passed, without affecting other rows
roworderv(GGDC10S, neworder = with(GGDC10S, c(which(Country == "GHA"),
                                              which(Country == "BWA"))), pos = "exchange") %>% ss(1:2, 1:8)
#   Country Regioncode             Region Variable Year        AGR         MIN        MAN
# 1     GHA        SSA Sub-saharan Africa       VA 1960 0.03576160 0.005103683 0.01744687
# 2     GHA        SSA Sub-saharan Africa       VA 1961 0.03823049 0.005456030 0.01865136

Similarly, the pair colorder / colorderv facilitates efficient reordering of columns in a data frame. These functions not require a deep copy of the data and are very fast. To reorder columns by reference, see also data.table::setcolorder.

# The default is again pos = "front" which brings selected columns to the front / left
colorder(GGDC10S, Variable, Country, Year) %>% head(2)
#   Variable Country Year Regioncode             Region AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM
# 1       VA     BWA 1960        SSA Sub-saharan Africa  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA
# 2       VA     BWA 1961        SSA Sub-saharan Africa  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA

2.4 Transforming and Computing New Columns

ftransform is an improved version of base::transform for data frames and lists. ftransform can be used to compute new columns or modify and delete existing columns, and always returns the entire data frame.

ftransform(GGDC10S, AGR_perc = AGR / SUM * 100, # Computing Agricultural percentage
                    Year = as.integer(Year),    # Coercing Year to integer
                    AGR = NULL) %>% tail(2)     # Deleting column AGR
#      Country Regioncode                       Region Variable Year      MIN      MAN       PU
# 5026     EGY       MENA Middle East and North Africa      EMP 2011 27.56394 2373.814 317.9979
# 5027     EGY       MENA Middle East and North Africa      EMP 2012 24.78083 2348.434 324.9332
#           CON      WRT      TRA     FIRE      GOV OTH      SUM AGR_perc
# 5026 2795.264 3020.236 2048.335 814.7403 5635.522  NA 22219.39 23.33961
# 5027 2931.196 3109.522 2065.004 832.4770 5735.623  NA 22532.56 22.90281

# Computing scalar results replicates them
ftransform(GGDC10S, MIN_mean = fmean(MIN), Intercept = 1) %>% tail(2)
#      Country Regioncode                       Region Variable Year      AGR      MIN      MAN
# 5026     EGY       MENA Middle East and North Africa      EMP 2011 5185.919 27.56394 2373.814
# 5027     EGY       MENA Middle East and North Africa      EMP 2012 5160.590 24.78083 2348.434
#            PU      CON      WRT      TRA     FIRE      GOV OTH      SUM MIN_mean Intercept
# 5026 317.9979 2795.264 3020.236 2048.335 814.7403 5635.522  NA 22219.39  1867909         1
# 5027 324.9332 2931.196 3109.522 2065.004 832.4770 5735.623  NA 22532.56  1867909         1

The modification ftransformv exists to transform specific columns using a function:

# Apply the log to columns 6-16
GGDC10S %>% ftransformv(6:16, log) %>% tail(2)
#      Country Regioncode                       Region Variable Year      AGR      MIN      MAN
# 5026     EGY       MENA Middle East and North Africa      EMP 2011 8.553702 3.316508 7.772253
# 5027     EGY       MENA Middle East and North Africa      EMP 2012 8.548806 3.210070 7.761504
#            PU      CON      WRT      TRA     FIRE      GOV OTH      SUM
# 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845  NA 10.00872
# 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452  NA 10.02272

# Convert data to percentage terms
GGDC10S %>% ftransformv(6:16, `*`, 100/SUM) %>% tail(2)
#      Country Regioncode                       Region Variable Year      AGR       MIN      MAN
# 5026     EGY       MENA Middle East and North Africa      EMP 2011 23.33961 0.1240535 10.68352
# 5027     EGY       MENA Middle East and North Africa      EMP 2012 22.90281 0.1099779 10.42240
#            PU      CON      WRT      TRA     FIRE      GOV OTH SUM
# 5026 1.431173 12.58029 13.59279 9.218680 3.666798 25.36308  NA 100
# 5027 1.442061 13.00871 13.80013 9.164534 3.694551 25.45482  NA 100

# Apply log to numeric columns
GGDC10S %>% ftransformv(is.numeric, log) %>% tail(2)
#      Country Regioncode                       Region Variable     Year      AGR      MIN      MAN
# 5026     EGY       MENA Middle East and North Africa      EMP 7.606387 8.553702 3.316508 7.772253
# 5027     EGY       MENA Middle East and North Africa      EMP 7.606885 8.548806 3.210070 7.761504
#            PU      CON      WRT      TRA     FIRE      GOV OTH      SUM
# 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845  NA 10.00872
# 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452  NA 10.02272

Instead of passing comma-separated column = value expressions, it is also possible to bulk-process data with fransform by passing a single list of expressions (such as a data frame). This is useful for more complex transformations involving multiple steps:

# Same as above, but also replacing any generated infinite values with NA
GGDC10S %>% ftransform(num_vars(.) %>% lapply(log) %>% replace_Inf) %>% tail(2)
#      Country Regioncode                       Region Variable     Year      AGR      MIN      MAN
# 5026     EGY       MENA Middle East and North Africa      EMP 7.606387 8.553702 3.316508 7.772253
# 5027     EGY       MENA Middle East and North Africa      EMP 7.606885 8.548806 3.210070 7.761504
#            PU      CON      WRT      TRA     FIRE      GOV OTH      SUM
# 5026 5.762045 7.935682 8.013090 7.624782 6.702869 8.636845  NA 10.00872
# 5027 5.783620 7.983166 8.042224 7.632888 6.724406 8.654452  NA 10.02272

This mode of usage toggles automatic column matching and replacement. Non-matching columns are added to the data frame. Apart from to ftransform, the function settransform(v) can be used to change the input data frame by reference:

# Computing a new column and deleting some others by reference
settransform(GGDC10S, FIRE_MAN = FIRE / MAN,
                      Regioncode = NULL, Region = NULL)
tail(GGDC10S, 2)
#      Country Variable Year      AGR      MIN      MAN       PU      CON      WRT      TRA     FIRE
# 5026     EGY      EMP 2011 5185.919 27.56394 2373.814 317.9979 2795.264 3020.236 2048.335 814.7403
# 5027     EGY      EMP 2012 5160.590 24.78083 2348.434 324.9332 2931.196 3109.522 2065.004 832.4770
#           GOV OTH      SUM  FIRE_MAN
# 5026 5635.522  NA 22219.39 0.3432200
# 5027 5735.623  NA 22532.56 0.3544817
rm(GGDC10S)

# Bulk-processing the data into percentage terms
settransformv(GGDC10S, 6:16, `*`, 100/SUM)
tail(GGDC10S, 2)
#      Country Regioncode                       Region Variable Year      AGR       MIN      MAN
# 5026     EGY       MENA Middle East and North Africa      EMP 2011 23.33961 0.1240535 10.68352
# 5027     EGY       MENA Middle East and North Africa      EMP 2012 22.90281 0.1099779 10.42240
#            PU      CON      WRT      TRA     FIRE      GOV OTH SUM
# 5026 1.431173 12.58029 13.59279 9.218680 3.666798 25.36308  NA 100
# 5027 1.442061 13.00871 13.80013 9.164534 3.694551 25.45482  NA 100

# Same thing via replacement
ftransform(GGDC10S) <- fselect(GGDC10S, AGR:SUM) %>% lapply(`*`, 100/.$SUM)
# Or using double pipes
GGDC10S %<>% ftransformv(6:16, `*`, 100/SUM)
rm(GGDC10S)

Another convenient addition is provided by the function fcompute, which can be used to compute new columns in a data frame environment and returns the computed columns in a new data frame:

fcompute(GGDC10S, AGR_perc = AGR / SUM * 100, FIRE_MAN = FIRE / MAN) %>% tail(2)
#      AGR_perc  FIRE_MAN
# 5026 23.33961 0.3432200
# 5027 22.90281 0.3544817

For more complex tasks see ?ftransform.

2.5 Adding and Binding Columns

For cases where multiple columns are computed and need to be added to a data frame (regardless of whether names are duplicated or not), collapse introduces the predicate add_vars. Together with add_vars, the function add_stub is useful to add a prefix (default) or postfix to computed variables keeping the variable names unique:

# Efficient adding logged versions of some variables
add_vars(wlddev) <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.")
head(wlddev, 2)
#       country iso3c       date year decade     region     income  OECD PCGDP LIFEEX GINI       ODA
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA 32.446   NA 116769997
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA 32.962   NA 232080002
#       POP log10.PCGDP log10.LIFEEX log10.GINI log10.ODA log10.POP
# 1 8996973          NA     1.511161         NA  8.067331  6.954096
# 2 9169410          NA     1.518014         NA  8.365638  6.962341
rm(wlddev)

By default add_vars appends a data frame towards the (right) end, but it can also replace columns in front or at other positions in the data frame:

add_vars(wlddev, "front") <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.")
head(wlddev, 2)
#   log10.PCGDP log10.LIFEEX log10.GINI log10.ODA log10.POP     country iso3c       date year decade
# 1          NA     1.511161         NA  8.067331  6.954096 Afghanistan   AFG 1961-01-01 1960   1960
# 2          NA     1.518014         NA  8.365638  6.962341 Afghanistan   AFG 1962-01-01 1961   1960
#       region     income  OECD PCGDP LIFEEX GINI       ODA     POP
# 1 South Asia Low income FALSE    NA 32.446   NA 116769997 8996973
# 2 South Asia Low income FALSE    NA 32.962   NA 232080002 9169410
rm(wlddev)

add_vars(wlddev, c(10L,12L,14L,16L,18L)) <- get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.")
head(wlddev, 2)
#       country iso3c       date year decade     region     income  OECD PCGDP log10.PCGDP LIFEEX
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA          NA 32.446
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA          NA 32.962
#   log10.LIFEEX GINI log10.GINI       ODA log10.ODA     POP log10.POP
# 1     1.511161   NA         NA 116769997  8.067331 8996973  6.954096
# 2     1.518014   NA         NA 232080002  8.365638 9169410  6.962341
rm(wlddev)

add_vars can also be used without replacement, where it serves as a more efficient version of cbind.data.frame, with the difference that the data structure and attributes of the first argument are preserved:

add_vars(wlddev, get_vars(wlddev, 9:13) %>% lapply(log) %>% add_stub("log."),
                 get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10.")) %>% head(2)
#       country iso3c       date year decade     region     income  OECD PCGDP LIFEEX GINI       ODA
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA 32.446   NA 116769997
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA 32.962   NA 232080002
#       POP log.PCGDP log.LIFEEX log.GINI  log.ODA  log.POP log10.PCGDP log10.LIFEEX log10.GINI
# 1 8996973        NA   3.479577       NA 18.57572 16.01240          NA     1.511161         NA
# 2 9169410        NA   3.495355       NA 19.26259 16.03138          NA     1.518014         NA
#   log10.ODA log10.POP
# 1  8.067331  6.954096
# 2  8.365638  6.962341

add_vars(wlddev,  get_vars(wlddev, 9:13) %>% lapply(log) %>% add_stub("log."),
                  get_vars(wlddev, 9:13) %>% lapply(log10) %>% add_stub("log10."),
         pos = c(10L,13L,16L,19L,22L,11L,14L,17L,20L,23L)) %>% head(2)
#       country iso3c       date year decade     region     income  OECD PCGDP log.PCGDP log10.PCGDP
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA        NA          NA
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA        NA          NA
#   LIFEEX log.LIFEEX log10.LIFEEX GINI log.GINI log10.GINI       ODA  log.ODA log10.ODA     POP
# 1 32.446   3.479577     1.511161   NA       NA         NA 116769997 18.57572  8.067331 8996973
# 2 32.962   3.495355     1.518014   NA       NA         NA 232080002 19.26259  8.365638 9169410
#    log.POP log10.POP
# 1 16.01240  6.954096
# 2 16.03138  6.962341

identical(cbind(wlddev, wlddev), add_vars(wlddev, wlddev))
# [1] TRUE
microbenchmark(cbind(wlddev, wlddev), add_vars(wlddev, wlddev))
# Unit: microseconds
#                      expr    min      lq     mean median      uq    max neval
#     cbind(wlddev, wlddev) 13.694 14.1040 15.72760 14.391 14.7600 57.072   100
#  add_vars(wlddev, wlddev)  3.280  3.6285  4.13567  3.813  4.0385 19.352   100

2.6 Renaming Columns

frename is a fast substitute for dplyr::rename:

frename(GGDC10S, AGR = Agriculture, MIN = Mining) %>% head(2)
#   Country Regioncode             Region Variable Year Agriculture Mining MAN PU CON WRT TRA FIRE
# 1     BWA        SSA Sub-saharan Africa       VA 1960          NA     NA  NA NA  NA  NA  NA   NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961          NA     NA  NA NA  NA  NA  NA   NA
#   GOV OTH SUM
# 1  NA  NA  NA
# 2  NA  NA  NA
frename(GGDC10S, tolower) %>% head(2)
#   country regioncode             region variable year agr min man pu con wrt tra fire gov oth sum
# 1     BWA        SSA Sub-saharan Africa       VA 1960  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA
frename(GGDC10S, tolower, cols = .c(AGR, MIN)) %>% head(2)
#   Country Regioncode             Region Variable Year agr min MAN PU CON WRT TRA FIRE GOV OTH SUM
# 1     BWA        SSA Sub-saharan Africa       VA 1960  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961  NA  NA  NA NA  NA  NA  NA   NA  NA  NA  NA

The function setrename does this by reference:

setrename(GGDC10S, AGR = Agriculture, MIN = Mining)
head(GGDC10S, 2)
#   Country Regioncode             Region Variable Year Agriculture Mining MAN PU CON WRT TRA FIRE
# 1     BWA        SSA Sub-saharan Africa       VA 1960          NA     NA  NA NA  NA  NA  NA   NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961          NA     NA  NA NA  NA  NA  NA   NA
#   GOV OTH SUM
# 1  NA  NA  NA
# 2  NA  NA  NA
setrename(GGDC10S, Agriculture = AGR, Mining = MIN)
rm(GGDC10S)

Both functions are not limited to data frames but can be applied to any R object with a ‘names’ attribute.

2.7 Using Shortcuts

The most frequently required among the functions introduced above can be abbreviated as follows: fselect -> slt, fsubset -> sbt, ftransform(v) -> tfm(v), settransform(v) -> settfm(v), get_vars -> gv, num_vars -> nv, add_vars -> av. This was done to make it possible to write faster and more parsimonious code, but is recommended only for personally kept scripts. A lazy person may also decide to code everything using shortcuts and then do ctrl+F replacement with the long names on the finished script.

2.8 Missing Values / Rows

The function na_omit is a much faster alternative to stats::na.omit for vectors, matrices and data frames. By default the ‘na.action’ attribute containing the removed cases is omitted, but it can be added with the option na.attr = TRUE. Like fsubset, na_omit preserves all column attributes as well as attributes of the data frame itself.

microbenchmark(na_omit(wlddev, na.attr = TRUE), na.omit(wlddev))
# Unit: microseconds
#                             expr     min      lq      mean   median       uq       max neval
#  na_omit(wlddev, na.attr = TRUE)  60.393  69.208   84.8126  79.9910   88.683   419.881   100
#                  na.omit(wlddev) 745.790 856.449 1721.5457 940.6015 1005.177 56344.414   100

Another added feature is the removal of cases missing on certain columns only:

na_omit(wlddev, cols = .c(PCGDP, LIFEEX)) %>% head(2)
#       country iso3c       date year decade     region     income  OECD    PCGDP LIFEEX GINI
# 1 Afghanistan   AFG 2003-01-01 2002   2000 South Asia Low income FALSE 330.3036 56.784   NA
# 2 Afghanistan   AFG 2004-01-01 2003   2000 South Asia Low income FALSE 343.0809 57.271   NA
#          ODA      POP
# 1 1790479980 22600770
# 2 1972890015 23680871
# only removing missing data from numeric columns -> same and slightly faster than na_omit(wlddev)
na_omit(wlddev, cols = is.numeric) %>% head(2)
#   country iso3c       date year decade                region              income  OECD    PCGDP
# 1 Albania   ALB 1997-01-01 1996   1990 Europe & Central Asia Upper middle income FALSE 1869.866
# 2 Albania   ALB 2003-01-01 2002   2000 Europe & Central Asia Upper middle income FALSE 2572.721
#   LIFEEX GINI       ODA     POP
# 1 72.495 27.0 294089996 3168033
# 2 74.579 31.7 453309998 3051010

For atomic vectors the function na_rm also exists which is 2x faster than x[!is.na(x)]. Both na_omit and na_rm return their argument if no missing cases were found.

The existence of missing cases can be checked using missing_cases, which is also considerably faster than complete.cases for data frames.

There is also a function na_insert to randomly insert missing values into vectors, matrices and data frames. The default is na_insert(X, prop = 0.1) so that 10% of values are randomly set to missing.

Finally, a function allNA provides the much needed opposite of anyNA for atomic vectors.

2.9 Unique Values / Rows

Similar to na_omit, the function funique is a much faster alternative to base::unique for atomic vectors and data frames. Like most collapse functions it also seeks to preserve attributes.

funique(GGDC10S$Variable)              # Unique values in order of appearance
# [1] "VA"  "EMP"
# attr(,"label")
# [1] "Variable"
# attr(,"format.stata")
# [1] "%9s"
funique(GGDC10S$Variable, sort = TRUE) # Sorted unique values
# [1] "EMP" "VA" 
# attr(,"label")
# [1] "Variable"
# attr(,"format.stata")
# [1] "%9s"

# If all values/rows are unique, the original data is returned (no copy)
identical(funique(GGDC10S), GGDC10S)
# [1] TRUE

# Can remove duplicate rows by a subset of columns
funique(GGDC10S, cols = .c(Country, Variable)) %>% ss(1:2, 1:8)
#   Country Regioncode             Region Variable Year AGR MIN MAN
# 1     BWA        SSA Sub-saharan Africa       VA 1960  NA  NA  NA
# 2     BWA        SSA Sub-saharan Africa      EMP 1960  NA  NA  NA
funique(GGDC10S, cols = .c(Country, Variable), sort = TRUE) %>% ss(1:2, 1:8)
#   Country Regioncode        Region Variable Year          AGR      MIN          MAN
# 1     ARG        LAM Latin America      EMP 1950 1.799565e+03 32.71936 1.603249e+03
# 2     ARG        LAM Latin America       VA 1950 5.887857e-07  0.00000 3.534430e-06

2.10 Recoding and Replacing Values

With recode_num, recode_char, replace_NA, replace_Inf and replace_outliers, collapse also introduces a set of functions to efficiently recode and replace numeric and character values in matrix-like objects (vectors, matrices, arrays, data frames, lists of atomic objects). When called on a data frame, recode_num, replace_Inf and replace_outliers will skip non-numeric columns, and recode_char skips non-character columns, whereas replace_NA replaces missing values in all columns.

# Efficient replacing missing values with 0
microbenchmark(replace_NA(GGDC10S, 0))
# Unit: microseconds
#                    expr     min      lq     mean   median       uq      max neval
#  replace_NA(GGDC10S, 0) 109.757 141.163 203.4982 151.0235 163.0775 4579.085   100

# Adding log-transformed sectoral data: Some NaN and Inf values generated
add_vars(GGDC10S, 6:16*2-5) <- fselect(GGDC10S, AGR:SUM) %>%
  lapply(log) %>% replace_Inf %>% add_stub("log.")
head(GGDC10S, 2)
#   Country Regioncode             Region Variable Year AGR log.AGR MIN log.MIN MAN log.MAN PU log.PU
# 1     BWA        SSA Sub-saharan Africa       VA 1960  NA      NA  NA      NA  NA      NA NA     NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961  NA      NA  NA      NA  NA      NA NA     NA
#   CON log.CON WRT log.WRT TRA log.TRA FIRE log.FIRE GOV log.GOV OTH log.OTH SUM log.SUM
# 1  NA      NA  NA      NA  NA      NA   NA       NA  NA      NA  NA      NA  NA      NA
# 2  NA      NA  NA      NA  NA      NA   NA       NA  NA      NA  NA      NA  NA      NA
rm(GGDC10S)

recode_num and recode_char follow the syntax of dplyr::recode and provide more or less the same functionality except that they can efficiently be applied to matrices and data frames, and that recode_char allows for regular expression matching implemented via base::grepl:

month.name
#  [1] "January"   "February"  "March"     "April"     "May"       "June"      "July"      "August"   
#  [9] "September" "October"   "November"  "December"
recode_char(month.name, ber = "C", "^J" = "A", default = "B", regex = TRUE)
#  [1] "A" "B" "B" "B" "B" "A" "A" "B" "B" "B" "B" "B"

The perhaps most interesting function in this ensemble is replace_outliers, which replaces values falling outside a 1- or 2-sided numeric threshold or outside a certain number of column- standard deviations with a value (default is NA).

# replace all values below 2 and above 100 with NA
replace_outliers(mtcars, c(2, 100)) %>% head(3)
#                mpg cyl disp hp drat    wt  qsec vs am gear carb
# Mazda RX4     21.0   6   NA NA 3.90 2.620 16.46 NA NA    4    4
# Mazda RX4 Wag 21.0   6   NA NA 3.90 2.875 17.02 NA NA    4    4
# Datsun 710    22.8   4   NA 93 3.85 2.320 18.61 NA NA    4   NA

# replace all value smaller than 2 with NA
replace_outliers(mtcars, 2, single.limit = "min") %>% head(3)
#                mpg cyl disp  hp drat    wt  qsec vs am gear carb
# Mazda RX4     21.0   6  160 110 3.90 2.620 16.46 NA NA    4    4
# Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02 NA NA    4    4
# Datsun 710    22.8   4  108  93 3.85 2.320 18.61 NA NA    4   NA

# replace all value larger than 100 with NA
replace_outliers(mtcars, 100, single.limit = "max") %>% head(3)
#                mpg cyl disp hp drat    wt  qsec vs am gear carb
# Mazda RX4     21.0   6   NA NA 3.90 2.620 16.46  0  1    4    4
# Mazda RX4 Wag 21.0   6   NA NA 3.90 2.875 17.02  0  1    4    4
# Datsun 710    22.8   4   NA 93 3.85 2.320 18.61  1  1    4    1

# replace all values above or below 3 column-standard-deviations from the column-mean with NA
replace_outliers(mtcars, 3) %>% tail(3)
#                mpg cyl disp  hp drat   wt qsec vs am gear carb
# Ferrari Dino  19.7   6  145 175 3.62 2.77 15.5  0  1    5    6
# Maserati Bora 15.0   8  301 335 3.54 3.57 14.6  0  1    5   NA
# Volvo 142E    21.4   4  121 109 4.11 2.78 18.6  1  1    4    2

3. Quick Data Object Conversions

Apart from code employed for manipulation of data and the actual statistical computations performed, frequently used data object conversions with base functions like as.data.frame, as.matrix or as.factor have a significant share in slowing down R code. Optimally code would be written without such conversions, but sometimes they are necessary and thus collapse provides a set of functions (qDF, qDT, qTBL, qM, qF, mrtl and mctl) to speed these conversions up quite a bit. These functions are fast because they are non-generic and dispatch different objects internally, perform critical steps in C++, and, when passed lists of objects, they only check the length of the first column.

qDF, qDT and qTBL efficiently convert vectors, matrices, higher-dimensional arrays and suitable lists to data.frame, data.table and tibble respectively.

str(EuStockMarkets)
#  Time-Series [1:1860, 1:4] from 1991 to 1999: 1629 1614 1607 1621 1618 ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : NULL
#   ..$ : chr [1:4] "DAX" "SMI" "CAC" "FTSE"
# Efficient Conversion of data frames and matrices to data.table
microbenchmark(qDT(wlddev), qDT(EuStockMarkets), as.data.table(wlddev), as.data.frame(EuStockMarkets))
# Unit: microseconds
#                           expr    min      lq      mean   median       uq      max neval
#                    qDT(wlddev)  3.075   3.608   4.21439   3.8950   4.2640   12.546   100
#            qDT(EuStockMarkets)  6.765   8.733  12.09254  12.5050  14.1040   30.217   100
#          as.data.table(wlddev) 64.206 122.180 253.75023 143.2745 173.1635 3653.346   100
#  as.data.frame(EuStockMarkets) 64.247  70.971  82.25174  79.6835  84.8700  339.849   100

# Converting a time series to data.frame
head(qDF(AirPassengers))
#   AirPassengers
# 1           112
# 2           118
# 3           132
# 4           129
# 5           121
# 6           135

By default these functions drop all unnecessary attributes from matrices or lists / data frames in the conversion, but this can be changed using the keep.attr = TRUE argument.

A useful additional feature of qDF and qDT is the row.names.col argument, enabling the saving of names / row-names in a column when converting from vector, matrix, array or data frame:

# This saves the row-names in a column named 'car'
head(qDT(mtcars, "car"))
#                  car   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#               <char> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
# 1:         Mazda RX4  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4
# 2:     Mazda RX4 Wag  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4
# 3:        Datsun 710  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1
# 4:    Hornet 4 Drive  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1
# 5: Hornet Sportabout  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2
# 6:           Valiant  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1

N_distinct <- fndistinct(GGDC10S)
N_distinct
#    Country Regioncode     Region   Variable       Year        AGR        MIN        MAN         PU 
#         43          6          6          2         67       4353       4224       4353       4237 
#        CON        WRT        TRA       FIRE        GOV        OTH        SUM 
#       4339       4344       4334       4349       3470       4238       4364
# Converting a vector to data.frame, saving names
head(qDF(N_distinct, "variable"))
#     variable N_distinct
# 1    Country         43
# 2 Regioncode          6
# 3     Region          6
# 4   Variable          2
# 5       Year         67
# 6        AGR       4353

For the conversion of matrices to list there are also the programmers functions mrtl and mctl, which row- or column- wise convert a matrix into a plain list, data.frame or data.table.

# This converts the matrix to a list of 1860 row-vectors of length 4.
microbenchmark(mrtl(EuStockMarkets))
# Unit: microseconds
#                  expr     min       lq     mean  median       uq     max neval
#  mrtl(EuStockMarkets) 139.728 151.4335 168.5522 155.841 164.6355 399.791   100

For the reverse operation, qM converts vectors, higher-dimensional arrays, data frames and suitable lists to matrix.

# Note: kit::psum is the most efficient way to do this
microbenchmark(rowSums(qM(mtcars)), rowSums(mtcars), kit::psum(mtcars))
# Unit: nanoseconds
#                 expr   min      lq      mean  median      uq      max neval
#  rowSums(qM(mtcars))  5699  7933.5  12702.62  9122.5 11131.5   316315   100
#      rowSums(mtcars) 38868 41697.0  48003.21 44157.0 51496.0    95981   100
#    kit::psum(mtcars)   574   820.0 510905.51   943.0  1107.0 50967797   100

At last, qF converts vectors to factor and is quite a bit faster than as.factor:

# Converting from character
str(wlddev$country)
#  chr [1:13176] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
#  - attr(*, "label")= chr "Country Name"
fndistinct(wlddev$country)
# [1] 216
microbenchmark(qF(wlddev$country), as.factor(wlddev$country))
# Unit: microseconds
#                       expr     min       lq      mean  median      uq     max neval
#         qF(wlddev$country)  70.192  71.1965  73.77376  72.160  74.784 107.256   100
#  as.factor(wlddev$country) 263.794 275.7660 282.21530 278.841 283.761 360.431   100

# Converting from numeric
str(wlddev$PCGDP)
#  num [1:13176] NA NA NA NA NA NA NA NA NA NA ...
#  - attr(*, "label")= chr "GDP per capita (constant 2010 US$)"
fndistinct(wlddev$PCGDP)
# [1] 9470
microbenchmark(qF(wlddev$PCGDP), as.factor(wlddev$PCGDP))
# Unit: microseconds
#                     expr      min       lq     mean   median        uq       max neval
#         qF(wlddev$PCGDP)  445.096  474.944  531.221  488.146  509.0765  3930.342   100
#  as.factor(wlddev$PCGDP) 9374.240 9546.132 9823.477 9633.196 9727.5165 13732.499   100

4. Advanced Statistical Programming

Having introduced some of the more basic collapse data manipulation infrastructure in the preceding chapters, this chapter introduces some of the packages core functionality for programming with data.

4.1 Fast (Grouped, Weighted) Statistical Functions

A key feature of collapse is it’s broad set of Fast Statistical Functions (fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct), which are able to tangibly speed-up column-wise, grouped and weighted statistical computations on vectors, matrices or data frames. The basic syntax common to all of these functions is:

FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE)

where x is a vector, matrix or data frame, g takes groups supplied as vector, factor, list of vectors or GRP object, and w takes a weight vector (supported by fsum, fprod, fmean, fmedian, fmode, fnth, fvar and fsd). TRA can be used to transform x using the computed statistics and one of 10 available transformations ("replace_fill", "replace", "-", "-+", "/", "%", "+", "*", "%%, "-%%", discussed in section 6.3). na.rm efficiently skips missing values during the computation and is TRUE by default. use.g.names = TRUE generates new row-names from the unique groups supplied to g, and drop = TRUE returns a vector when performing simple (non-grouped) computations on matrix or data frame columns.

With that in mind, let’s start with some simple examples. To calculate simple column-wise means, it is sufficient to type:

fmean(mtcars$mpg) # Vector
# [1] 20.09062

fmean(mtcars)
#        mpg        cyl       disp         hp       drat         wt       qsec         vs         am 
#  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750   0.437500   0.406250 
#       gear       carb 
#   3.687500   2.812500

fmean(mtcars, drop = FALSE)  # This returns a 1-row data-frame
#        mpg    cyl     disp       hp     drat      wt     qsec     vs      am   gear   carb
# 1 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125

m <- qM(mtcars) # Generate matrix
fmean(m)
#        mpg        cyl       disp         hp       drat         wt       qsec         vs         am 
#  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750   0.437500   0.406250 
#       gear       carb 
#   3.687500   2.812500

fmean(m, drop = FALSE)  # This returns a 1-row matrix
#           mpg    cyl     disp       hp     drat      wt     qsec     vs      am   gear   carb
# [1,] 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875 2.8125

Note that separate methods for vectors, matrices and data frames are written in C++, thus no conversions are needed and computations on matrices and data frames are equally efficient. If we had a weight vector, weighted statistics are easily computed:

weights <- abs(rnorm(fnrow(mtcars))) # fnrow is a bit faster for data frames

fmean(mtcars, w = weights) # Weighted mean
#         mpg         cyl        disp          hp        drat          wt        qsec          vs 
#  20.8090714   5.8876772 214.9587303 142.8931066   3.7558442   3.0941361  17.8201120   0.5025300 
#          am        gear        carb 
#   0.4918237   3.8375831   2.7771280
fmedian(mtcars, w = weights) # Weighted median
#    mpg    cyl   disp     hp   drat     wt   qsec     vs     am   gear   carb 
#  21.00   6.00 160.00 113.00   3.77   3.17  18.00   1.00   0.00   4.00   2.00
fsd(mtcars, w = weights) # Frequency-weighted standard deviation
#         mpg         cyl        disp          hp        drat          wt        qsec          vs 
#   5.8799568   1.8416865 122.4274353  74.9459089   0.5413624   0.9689836   1.8516418   0.5089768 
#          am        gear        carb 
#   0.5089152   0.7557877   1.6744062
fmode(mtcars, w = weights) # Weighted statistical mode (i.e. the value with the largest sum of weights)
#    mpg    cyl   disp     hp   drat     wt   qsec     vs     am   gear   carb 
#  21.40   4.00 121.00 109.00   3.92   2.78  18.60   1.00   0.00   4.00   2.00

Fast grouped statistics can be calculated by simply passing grouping vectors or lists of grouping vectors to the fast functions:

fmean(mtcars, mtcars$cyl)
#        mpg cyl     disp        hp     drat       wt     qsec        vs        am     gear     carb
# 4 26.66364   4 105.1364  82.63636 4.070909 2.285727 19.13727 0.9090909 0.7272727 4.090909 1.545455
# 6 19.74286   6 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286 0.4285714 3.857143 3.428571
# 8 15.10000   8 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000 0.1428571 3.285714 3.500000

fmean(mtcars, fselect(mtcars, cyl, vs, am))
#            mpg cyl     disp        hp     drat       wt     qsec vs am     gear     carb
# 4.0.1 26.00000   4 120.3000  91.00000 4.430000 2.140000 16.70000  0  1 5.000000 2.000000
# 4.1.0 22.90000   4 135.8667  84.66667 3.770000 2.935000 20.97000  1  0 3.666667 1.666667
# 4.1.1 28.37143   4  89.8000  80.57143 4.148571 2.028286 18.70000  1  1 4.142857 1.428571
# 6.0.1 20.56667   6 155.0000 131.66667 3.806667 2.755000 16.32667  0  1 4.333333 4.666667
# 6.1.0 19.12500   6 204.5500 115.25000 3.420000 3.388750 19.21500  1  0 3.500000 2.500000
# 8.0.0 15.05000   8 357.6167 194.16667 3.120833 4.104083 17.14250  0  0 3.000000 3.083333
# 8.0.1 15.40000   8 326.0000 299.50000 3.880000 3.370000 14.55000  0  1 5.000000 6.000000

# Getting column indices
ind <- fselect(mtcars, cyl, vs, am, return = "indices")
fmean(get_vars(mtcars, -ind), get_vars(mtcars, ind))
#            mpg     disp        hp     drat       wt     qsec     gear     carb
# 4.0.1 26.00000 120.3000  91.00000 4.430000 2.140000 16.70000 5.000000 2.000000
# 4.1.0 22.90000 135.8667  84.66667 3.770000 2.935000 20.97000 3.666667 1.666667
# 4.1.1 28.37143  89.8000  80.57143 4.148571 2.028286 18.70000 4.142857 1.428571
# 6.0.1 20.56667 155.0000 131.66667 3.806667 2.755000 16.32667 4.333333 4.666667
# 6.1.0 19.12500 204.5500 115.25000 3.420000 3.388750 19.21500 3.500000 2.500000
# 8.0.0 15.05000 357.6167 194.16667 3.120833 4.104083 17.14250 3.000000 3.083333
# 8.0.1 15.40000 326.0000 299.50000 3.880000 3.370000 14.55000 5.000000 6.000000

4.2 Factors, Grouping Objects and Grouped Data Frames

This programming can becomes more efficient when passing factors or grouping objects to the g argument, as otherwise vectors and lists of vectors are grouped internally.

# This creates a factor, na.exclude = FALSE attaches a class 'na.included'
f <- qF(mtcars$cyl, na.exclude = FALSE)
# The 'na.included' attribute skips a missing value check on this factor
attributes(f)
# $levels
# [1] "4" "6" "8"
# 
# $class
# [1] "factor"      "na.included"
# Saving data without grouping columns
dat <- get_vars(mtcars, -ind)
# Grouped standard-deviation
fsd(dat, f)
#        mpg     disp       hp      drat        wt     qsec      gear     carb
# 4 4.509828 26.87159 20.93453 0.3654711 0.5695637 1.682445 0.5393599 0.522233
# 6 1.453567 41.56246 24.26049 0.4760552 0.3563455 1.706866 0.6900656 1.812654
# 8 2.560048 67.77132 50.97689 0.3723618 0.7594047 1.196014 0.7262730 1.556624

# Without option na.exclude = FALSE, anyNA needs to be called on the factor (noticeable on larger data).
f2 <- qF(mtcars$cyl)
microbenchmark(fsd(dat, f), fsd(dat, f2))
# Unit: microseconds
#          expr   min    lq    mean median    uq    max neval
#   fsd(dat, f) 6.027 6.232 6.51613 6.4165 6.601 11.152   100
#  fsd(dat, f2) 6.150 6.396 6.77771 6.5190 6.683 25.830   100

For programming purposes GRP objects are preferable over factors because they never require further checks and they provide additional information about the grouping (such as group sizes and the original unique values in each group). The GRP function creates grouping objects (of class GRP) from vectors or lists of columns. Grouping is done very efficiently via radix ordering in C (using the radixorder function):

# This creates a 'GRP' object.
g <- GRP(mtcars, ~ cyl + vs + am) # Using the formula interface, could also use c("cyl","vs","am") or c(2,8:9)
str(g)
# Class 'GRP'  hidden list of 9
#  $ N.groups    : int 7
#  $ group.id    : int [1:32] 4 4 3 5 6 5 6 2 2 5 ...
#  $ group.sizes : int [1:7] 1 3 7 3 4 12 2
#  $ groups      :'data.frame': 7 obs. of  3 variables:
#   ..$ cyl: num [1:7] 4 4 4 6 6 8 8
#   ..$ vs : num [1:7] 0 1 1 0 1 0 0
#   ..$ am : num [1:7] 1 0 1 1 0 0 1
#  $ group.vars  : chr [1:3] "cyl" "vs" "am"
#  $ ordered     : Named logi [1:2] TRUE FALSE
#   ..- attr(*, "names")= chr [1:2] "ordered" "sorted"
#  $ order       : int [1:32] 27 8 9 21 3 18 19 20 26 28 ...
#   ..- attr(*, "starts")= int [1:7] 1 2 5 12 15 19 31
#   ..- attr(*, "maxgrpn")= int 12
#   ..- attr(*, "sorted")= logi FALSE
#  $ group.starts: int [1:7] 27 8 3 1 4 5 29
#  $ call        : language GRP.default(X = mtcars, by = ~cyl + vs + am)

The first three elements of this object provide information about the number of groups, the group to which each row belongs, and the size of each group. A print and a plot method provide further information about the grouping:

print(g)
# collapse grouping object of length 32 with 7 ordered groups
# 
# Call: GRP.default(X = mtcars, by = ~cyl + vs + am), X is unsorted
# 
# Distribution of group sizes: 
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#   1.000   2.500   3.000   4.571   5.500  12.000 
# 
# Groups with sizes: 
# 4.0.1 4.1.0 4.1.1 6.0.1 6.1.0 8.0.0 8.0.1 
#     1     3     7     3     4    12     2
plot(g)
plot of chunk GRPplot

plot of chunk GRPplot

The important elements of the GRP object are directly handed down to the compiled C++ code of the statistical functions, making repeated computations over the same groups very efficient.

fsd(dat, g)
#             mpg      disp       hp      drat        wt       qsec      gear      carb
# 4.0.1        NA        NA       NA        NA        NA         NA        NA        NA
# 4.1.0 1.4525839 13.969371 19.65536 0.1300000 0.4075230 1.67143651 0.5773503 0.5773503
# 4.1.1 4.7577005 18.802128 24.14441 0.3783926 0.4400840 0.94546285 0.3779645 0.5345225
# 6.0.1 0.7505553  8.660254 37.52777 0.1616581 0.1281601 0.76872188 0.5773503 1.1547005
# 6.1.0 1.6317169 44.742634  9.17878 0.5919459 0.1162164 0.81590441 0.5773503 1.7320508
# 8.0.0 2.7743959 71.823494 33.35984 0.2302749 0.7683069 0.80164745 0.0000000 0.9003366
# 8.0.1 0.5656854 35.355339 50.20458 0.4808326 0.2828427 0.07071068 0.0000000 2.8284271

# Grouped computation with and without prior grouping
microbenchmark(fsd(dat, g), fsd(dat, get_vars(mtcars, ind)))
# Unit: microseconds
#                             expr    min      lq     mean  median      uq     max neval
#                      fsd(dat, g) 19.065 21.1765 23.68447 22.9600 24.9690  38.909   100
#  fsd(dat, get_vars(mtcars, ind)) 31.611 35.2600 44.56823 37.3715 41.1845 327.877   100

Yet another possibility is creating a grouped data frame (class grouped_df). This can either be done using dplyr::group_by, which creates a grouped tibble and requires a conversion of the grouping object using GRP.grouped_df, or using the more efficient fgroup_by provided in collapse:

gmtcars <- fgroup_by(mtcars, cyl, vs, am) # fgroup_by() can also be abbreviated as gby()
fmedian(gmtcars)
#   cyl vs am   mpg  disp    hp  drat    wt  qsec gear carb
# 1   4  0  1 26.00 120.3  91.0 4.430 2.140 16.70  5.0  2.0
# 2   4  1  0 22.80 140.8  95.0 3.700 3.150 20.01  4.0  2.0
# 3   4  1  1 30.40  79.0  66.0 4.080 1.935 18.61  4.0  1.0
# 4   6  0  1 21.00 160.0 110.0 3.900 2.770 16.46  4.0  4.0
# 5   6  1  0 18.65 196.3 116.5 3.500 3.440 19.17  3.5  2.5
# 6   8  0  0 15.20 355.0 180.0 3.075 3.810 17.35  3.0  3.0
# 7   8  0  1 15.40 326.0 299.5 3.880 3.370 14.55  5.0  6.0

head(fgroup_vars(gmtcars))
#                   cyl vs am
# Mazda RX4           6  0  1
# Mazda RX4 Wag       6  0  1
# Datsun 710          4  1  1
# Hornet 4 Drive      6  1  0
# Hornet Sportabout   8  0  0
# Valiant             6  1  0

fmedian(gmtcars, keep.group_vars = FALSE)
#     mpg  disp    hp  drat    wt  qsec gear carb
# 1 26.00 120.3  91.0 4.430 2.140 16.70  5.0  2.0
# 2 22.80 140.8  95.0 3.700 3.150 20.01  4.0  2.0
# 3 30.40  79.0  66.0 4.080 1.935 18.61  4.0  1.0
# 4 21.00 160.0 110.0 3.900 2.770 16.46  4.0  4.0
# 5 18.65 196.3 116.5 3.500 3.440 19.17  3.5  2.5
# 6 15.20 355.0 180.0 3.075 3.810 17.35  3.0  3.0
# 7 15.40 326.0 299.5 3.880 3.370 14.55  5.0  6.0

Now suppose we wanted to create a new dataset which contains the mean, sd, min and max of the variables mpg and disp grouped by cyl, vs and am:

# Standard evaluation
dat <- get_vars(mtcars, c("mpg", "disp"))
add_vars(g[["groups"]],
         add_stub(fmean(dat, g, use.g.names = FALSE), "mean_"),
         add_stub(fsd(dat, g, use.g.names = FALSE), "sd_"),
         add_stub(fmin(dat, g, use.g.names = FALSE), "min_"),
         add_stub(fmax(dat, g, use.g.names = FALSE), "max_"))
#   cyl vs am mean_mpg mean_disp    sd_mpg   sd_disp min_mpg min_disp max_mpg max_disp
# 1   4  0  1 26.00000  120.3000        NA        NA    26.0    120.3    26.0    120.3
# 2   4  1  0 22.90000  135.8667 1.4525839 13.969371    21.5    120.1    24.4    146.7
# 3   4  1  1 28.37143   89.8000 4.7577005 18.802128    21.4     71.1    33.9    121.0
# 4   6  0  1 20.56667  155.0000 0.7505553  8.660254    19.7    145.0    21.0    160.0
# 5   6  1  0 19.12500  204.5500 1.6317169 44.742634    17.8    167.6    21.4    258.0
# 6   8  0  0 15.05000  357.6167 2.7743959 71.823494    10.4    275.8    19.2    472.0
# 7   8  0  1 15.40000  326.0000 0.5656854 35.355339    15.0    301.0    15.8    351.0

# Non-Standard evaluation
fgroup_by(mtcars, cyl, vs, am) %>% fselect(mpg, disp) %>% {
  add_vars(fgroup_vars(., "unique"),
           fmean(., keep.group_vars = FALSE) %>% add_stub("mean_"),
           fsd(., keep.group_vars = FALSE) %>% add_stub("sd_"),
           fmin(., keep.group_vars = FALSE) %>% add_stub("min_"),
           fmax(., keep.group_vars = FALSE) %>% add_stub("max_"))
}
#   cyl vs am mean_mpg mean_disp    sd_mpg   sd_disp min_mpg min_disp max_mpg max_disp
# 1   4  0  1 26.00000  120.3000        NA        NA    26.0    120.3    26.0    120.3
# 2   4  1  0 22.90000  135.8667 1.4525839 13.969371    21.5    120.1    24.4    146.7
# 3   4  1  1 28.37143   89.8000 4.7577005 18.802128    21.4     71.1    33.9    121.0
# 4   6  0  1 20.56667  155.0000 0.7505553  8.660254    19.7    145.0    21.0    160.0
# 5   6  1  0 19.12500  204.5500 1.6317169 44.742634    17.8    167.6    21.4    258.0
# 6   8  0  0 15.05000  357.6167 2.7743959 71.823494    10.4    275.8    19.2    472.0
# 7   8  0  1 15.40000  326.0000 0.5656854 35.355339    15.0    301.0    15.8    351.0

4.3 Grouped and Weighted Computations

We could also calculate groupwise-frequency weighted means and standard-deviations using a weight vector2.

# Grouped and weighted mean and sd and grouped min and max
add_vars(g[["groups"]],
         add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"),
         add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"),
         add_stub(fmin(dat, g, use.g.names = FALSE), "min_"),
         add_stub(fmax(dat, g, use.g.names = FALSE), "max_"))
#   cyl vs am w_mean_mpg w_mean_disp  w_sd_mpg w_sd_disp min_mpg min_disp max_mpg max_disp
# 1   4  0  1   26.00000   120.30000 0.0000000   0.00000    26.0    120.3    26.0    120.3
# 2   4  1  0   23.08757   136.62639 1.5306081  14.19412    21.5    120.1    24.4    146.7
# 3   4  1  1   27.34688    92.65353 4.8723476  21.44005    21.4     71.1    33.9    121.0
# 4   6  0  1   20.22046   151.00525 0.9349875  10.78832    19.7    145.0    21.0    160.0
# 5   6  1  0   19.52725   204.86661 1.7612203  50.80083    17.8    167.6    21.4    258.0
# 6   8  0  0   15.12267   359.56902 2.2886672  70.60949    10.4    275.8    19.2    472.0
# 7   8  0  1   15.51023   332.88960 0.4758366  29.73979    15.0    301.0    15.8    351.0

# Binding and reordering columns in a single step: Add columns in specific positions
add_vars(g[["groups"]],
         add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"),
         add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"),
         add_stub(fmin(dat, g, use.g.names = FALSE), "min_"),
         add_stub(fmax(dat, g, use.g.names = FALSE), "max_"),
         pos = c(4,8,5,9,6,10,7,11))
#   cyl vs am w_mean_mpg  w_sd_mpg min_mpg max_mpg w_mean_disp w_sd_disp min_disp max_disp
# 1   4  0  1   26.00000 0.0000000    26.0    26.0   120.30000   0.00000    120.3    120.3
# 2   4  1  0   23.08757 1.5306081    21.5    24.4   136.62639  14.19412    120.1    146.7
# 3   4  1  1   27.34688 4.8723476    21.4    33.9    92.65353  21.44005     71.1    121.0
# 4   6  0  1   20.22046 0.9349875    19.7    21.0   151.00525  10.78832    145.0    160.0
# 5   6  1  0   19.52725 1.7612203    17.8    21.4   204.86661  50.80083    167.6    258.0
# 6   8  0  0   15.12267 2.2886672    10.4    19.2   359.56902  70.60949    275.8    472.0
# 7   8  0  1   15.51023 0.4758366    15.0    15.8   332.88960  29.73979    301.0    351.0

The R overhead of this kind of programming in standard-evaluation is very low:

microbenchmark(call = add_vars(g[["groups"]],
         add_stub(fmean(dat, g, weights, use.g.names = FALSE), "w_mean_"),
         add_stub(fsd(dat, g, weights, use.g.names = FALSE), "w_sd_"),
         add_stub(fmin(dat, g, use.g.names = FALSE), "min_"),
         add_stub(fmax(dat, g, use.g.names = FALSE), "max_")))
# Unit: microseconds
#  expr    min      lq     mean median     uq   max neval
#  call 27.388 28.1875 29.56428 28.823 29.356 97.58   100

4.4 Transformations Using the TRA Argument

As a final layer of added complexity, we could utilize the TRA argument to generate groupwise-weighted demeaned, and scaled data, with additional columns giving the group-minimum and maximum values:

head(add_vars(get_vars(mtcars, ind),
              add_stub(fmean(dat, g, weights, "-"), "w_demean_"), # This calculates weighted group means and uses them to demean the data
              add_stub(fsd(dat, g, weights, "/"), "w_scale_"),    # This calculates weighted group sd's and uses them to scale the data
              add_stub(fmin(dat, g, "replace"), "min_"),          # This replaces all observations by their group-minimum
              add_stub(fmax(dat, g, "replace"), "max_")))         # This replaces all observations by their group-maximum
#                   cyl vs am w_demean_mpg w_demean_disp w_scale_mpg w_scale_disp min_mpg min_disp
# Mazda RX4           6  0  1    0.7795446     8.9947455   22.460194    14.830858    19.7    145.0
# Mazda RX4 Wag       6  0  1    0.7795446     8.9947455   22.460194    14.830858    19.7    145.0
# Datsun 710          4  1  1   -4.5468786    15.3464694    4.679469     5.037303    21.4     71.1
# Hornet 4 Drive      6  1  0    1.8727485    53.1333901   12.150666     5.078657    17.8    167.6
# Hornet Sportabout   8  0  0    3.5773335     0.4309751    8.170694     5.098465    10.4    275.8
# Valiant             6  1  0   -1.4272515    20.1333901   10.276966     4.429062    17.8    167.6
#                   max_mpg max_disp
# Mazda RX4            21.0      160
# Mazda RX4 Wag        21.0      160
# Datsun 710           33.9      121
# Hornet 4 Drive       21.4      258
# Hornet Sportabout    19.2      472
# Valiant              21.4      258

It is also possible to add_vars<- to mtcars itself. The default option would add these columns at the end, but we could also specify positions:

# This defines the positions where we want to add these columns
pos <- as.integer(c(2,8,3,9,4,10,5,11))

add_vars(mtcars, pos) <- c(add_stub(fmean(dat, g, weights, "-"), "w_demean_"),
                           add_stub(fsd(dat, g, weights, "/"), "w_scale_"),
                           add_stub(fmin(dat, g, "replace"), "min_"),
                           add_stub(fmax(dat, g, "replace"), "max_"))
head(mtcars)
#                    mpg w_demean_mpg w_scale_mpg min_mpg max_mpg cyl disp w_demean_disp w_scale_disp
# Mazda RX4         21.0    0.7795446   22.460194    19.7    21.0   6  160     8.9947455    14.830858
# Mazda RX4 Wag     21.0    0.7795446   22.460194    19.7    21.0   6  160     8.9947455    14.830858
# Datsun 710        22.8   -4.5468786    4.679469    21.4    33.9   4  108    15.3464694     5.037303
# Hornet 4 Drive    21.4    1.8727485   12.150666    17.8    21.4   6  258    53.1333901     5.078657
# Hornet Sportabout 18.7    3.5773335    8.170694    10.4    19.2   8  360     0.4309751     5.098465
# Valiant           18.1   -1.4272515   10.276966    17.8    21.4   6  225    20.1333901     4.429062
#                   min_disp max_disp  hp drat    wt  qsec vs am gear carb
# Mazda RX4            145.0      160 110 3.90 2.620 16.46  0  1    4    4
# Mazda RX4 Wag        145.0      160 110 3.90 2.875 17.02  0  1    4    4
# Datsun 710            71.1      121  93 3.85 2.320 18.61  1  1    4    1
# Hornet 4 Drive       167.6      258 110 3.08 3.215 19.44  1  0    3    1
# Hornet Sportabout    275.8      472 175 3.15 3.440 17.02  0  0    3    2
# Valiant              167.6      258 105 2.76 3.460 20.22  1  0    3    1
rm(mtcars)

Together with ftransform, things can become arbitrarily more complex:

# 2 different grouped and weighted computations (mutate operations) performed in one call
settransform(mtcars, carb_dwmed_cyl = fmedian(carb, cyl, weights, "-"),
                     carb_wsd_vs_am = fsd(carb, list(vs, am), weights, "replace"))

# Multivariate
settransform(mtcars, c(fmedian(list(carb_dwmed_cyl = carb, mpg_dwmed_cyl = mpg), cyl, weights, "-"),
                      fsd(list(carb_wsd_vs_am = carb, mpg_wsd_vs_am = mpg), list(vs, am), weights, "replace")))

# Nested (Computing the weighted 3rd quartile of mpg, grouped by cyl and carb being greater than it's weighted median, grouped by vs)
settransform(mtcars,
 mpg_gwQ3_cyl = fnth(mpg, 0.75, list(cyl, carb > fmedian(carb, vs, weights, 1L)), weights, 1L))

head(mtcars)
#                    mpg cyl disp  hp drat    wt  qsec vs am gear carb carb_dwmed_cyl carb_wsd_vs_am
# Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4              0      2.1897386
# Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4              0      2.1897386
# Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1             -1      0.5286617
# Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1             -3      1.3161442
# Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2             -2      0.9674070
# Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1             -3      1.3161442
#                   mpg_dwmed_cyl mpg_wsd_vs_am mpg_gwQ3_cyl
# Mazda RX4                   1.3      4.567045     21.40000
# Mazda RX4 Wag               1.3      4.567045     21.40000
# Datsun 710                 -3.2      4.872348     27.95146
# Hornet 4 Drive              1.7      2.444036     21.40000
# Hornet Sportabout           3.5      2.288667     16.21512
# Valiant                    -1.6      2.444036     21.40000
rm(mtcars)

With the full set of 14 Fast Statistical Functions, and additional vector- valued functions and operators (fscale/STD, fbetween/B, fwithin/W, fhdbetween/HDB, fhdwithin/HDW, flag/L/F, fdiff/D, fgrowth/G) discussed later, collapse provides extraordinary new possibilities for highly complex and efficient statistical programming in R. Computation speeds generally exceed those of packages like dplyr or data.table, sometimes by orders of magnitude. Column-wise matrix computations are also highly efficient and comparable to packages like matrixStats and base R functions like colSums. In particular the ability to perform grouped and weighted computations on matrices is new to R and very useful for complex computations (such as aggregating input-output tables etc.).

Note that the above examples provide merely suggestions for use of these features and are focused on programming with data frames (as the predicates get_vars, add_vars etc. are made for data frames). Equivalently efficient code could be written using vectors or matrices.

5. Advanced Data Aggregation

The grouped statistical programming introduced in the previous section is the fastest and most customizable way of dealing with many data transformation problems. Some tasks such as multivariate aggregations on a single data frame are however so common that this demanded for a more compact solution which efficiently integrates multiple computational steps.

For such purposes collap was created as a fast multi-purpose aggregation command designed to solve complex aggregation problems efficiently and with a minimum of coding. collap performs optimally together with the Fast Statistical Functions, but will also work with other functions.

To perform the above aggregation with collap, one would simply need to type:

collap(mtcars, mpg + disp ~ cyl + vs + am, list(fmean, fsd, fmin, fmax),
       w = weights, keep.col.order = FALSE)
#   cyl vs am  weights fmean.mpg fmean.disp   fsd.mpg fsd.disp fmin.mpg fmin.disp fmax.mpg fmax.disp
# 1   4  0  1 1.416054  26.00000  120.30000 0.0000000  0.00000     26.0     120.3     26.0     120.3
# 2   4  1  0 3.232217  23.08757  136.62639 1.5306081 14.19412     21.5     120.1     24.4     146.7
# 3   4  1  1 7.893395  27.34688   92.65353 4.8723476 21.44005     21.4      71.1     33.9     121.0
# 4   6  0  1 1.866025  20.22046  151.00525 0.9349875 10.78832     19.7     145.0     21.0     160.0
# 5   6  1  0 3.237565  19.52725  204.86661 1.7612203 50.80083     17.8     167.6     21.4     258.0
# 6   8  0  0 8.054777  15.12267  359.56902 2.2886672 70.60949     10.4     275.8     19.2     472.0
# 7   8  0  1 2.881698  15.51023  332.88960 0.4758366 29.73979     15.0     301.0     15.8     351.0

collap here also saves the sum of the weights in a column. The original idea behind collap is however better demonstrated with a different dataset. Consider the World Development Dataset wlddev introduced in section 1:

head(wlddev)
#       country iso3c       date year decade     region     income  OECD PCGDP LIFEEX GINI       ODA
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA 32.446   NA 116769997
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA 32.962   NA 232080002
# 3 Afghanistan   AFG 1963-01-01 1962   1960 South Asia Low income FALSE    NA 33.471   NA 112839996
# 4 Afghanistan   AFG 1964-01-01 1963   1960 South Asia Low income FALSE    NA 33.971   NA 237720001
# 5 Afghanistan   AFG 1965-01-01 1964   1960 South Asia Low income FALSE    NA 34.463   NA 295920013
# 6 Afghanistan   AFG 1966-01-01 1965   1960 South Asia Low income FALSE    NA 34.948   NA 341839996
#       POP
# 1 8996973
# 2 9169410
# 3 9351441
# 4 9543205
# 5 9744781
# 6 9956320

Suppose we would like to aggregate this data by country and decade, but keep all that categorical information. With collap this is extremely simple:

collap(wlddev, ~ iso3c + decade) %>% head
#   country iso3c       date   year decade                    region      income  OECD    PCGDP
# 1   Aruba   ABW 1961-01-01 1964.5   1960 Latin America & Caribbean High income FALSE       NA
# 2   Aruba   ABW 1971-01-01 1974.5   1970 Latin America & Caribbean High income FALSE       NA
# 3   Aruba   ABW 1981-01-01 1984.5   1980 Latin America & Caribbean High income FALSE 20267.30
# 4   Aruba   ABW 1991-01-01 1994.5   1990 Latin America & Caribbean High income FALSE 26611.44
# 5   Aruba   ABW 2001-01-01 2004.5   2000 Latin America & Caribbean High income FALSE 26664.99
# 6   Aruba   ABW 2011-01-01 2014.5   2010 Latin America & Caribbean High income FALSE 24926.17
#    LIFEEX GINI      ODA      POP
# 1 67.2592   NA       NA  56984.3
# 2 70.6372   NA       NA  60080.6
# 3 73.0153   NA 49745999  61665.9
# 4 73.6069   NA 29971000  76946.7
# 5 74.2660   NA 23292000  97939.7
# 6 75.6546   NA       NA 103994.6

Note that the columns of the data are in the original order and also retain all their attributes. To understand this result let us briefly examine the syntax of collap:

collap(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum,
       custom = NULL, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE,
       sort.row = TRUE, parallel = FALSE, mc.cores = 1L,
       return = c("wide","list","long","long_dupl"), give.names = "auto") # , ...

It is clear that X is the data and by supplies the grouping information, which can be a one- or two-sided formula or alternatively grouping vectors, factors, lists and GRP objects (like the Fast Statistical Functions). Then FUN provides the function(s) applied only to numeric variables in X and defaults to fmean, while catFUN provides the function(s) applied only to categorical variables in X and defaults to fmode3. keep.col.order = TRUE specifies that the data is to be returned with the original column-order. Thus in the above example it was sufficient to supply X and by and collap did the rest for us.

Suppose we only want to aggregate 4 series in this dataset.

# Same as collap(wlddev, ~ iso3c + decade, cols = 9:12)
collap(wlddev, PCGDP + LIFEEX + GINI + ODA ~ iso3c + decade) %>% head
#   iso3c decade    PCGDP  LIFEEX GINI      ODA
# 1   ABW   1960       NA 67.2592   NA       NA
# 2   ABW   1970       NA 70.6372   NA       NA
# 3   ABW   1980 20267.30 73.0153   NA 49745999
# 4   ABW   1990 26611.44 73.6069   NA 29971000
# 5   ABW   2000 26664.99 74.2660   NA 23292000
# 6   ABW   2010 24926.17 75.6546   NA       NA

As before we could use multiple functions by putting them in a named or unnamed list4:

collap(wlddev, ~ iso3c + decade, list(fmean, fmedian, fsd), cols = 9:12) %>% head
#   iso3c decade fmean.PCGDP fmedian.PCGDP fsd.PCGDP fmean.LIFEEX fmedian.LIFEEX fsd.LIFEEX
# 1   ABW   1960          NA            NA        NA      67.2592        67.2740 1.03046880
# 2   ABW   1970          NA            NA        NA      70.6372        70.6760 0.96813702
# 3   ABW   1980    20267.30      20280.81 4037.2695      73.0153        73.1260 0.38203753
# 4   ABW   1990    26611.44      26684.19  592.7919      73.6069        73.6100 0.08549392
# 5   ABW   2000    26664.99      26992.71 1164.6741      74.2660        74.2215 0.37614448
# 6   ABW   2010    24926.17      24599.50 1159.7344      75.6546        75.6540 0.42974339
#   fmean.GINI fmedian.GINI fsd.GINI fmean.ODA fmedian.ODA  fsd.ODA
# 1         NA           NA       NA        NA          NA       NA
# 2         NA           NA       NA        NA          NA       NA
# 3         NA           NA       NA  49745999    39259998 23573651
# 4         NA           NA       NA  29971000    35155001 17270808
# 5         NA           NA       NA  23292000    16219999 42969712
# 6         NA           NA       NA        NA          NA       NA

With multiple functions, we could also request collap to return a long-format of the data:

collap(wlddev, ~ iso3c + decade, list(fmean, fmedian, fsd), cols = 9:12, return = "long") %>% head
#   Function iso3c decade    PCGDP  LIFEEX GINI      ODA
# 1    fmean   ABW   1960       NA 67.2592   NA       NA
# 2    fmean   ABW   1970       NA 70.6372   NA       NA
# 3    fmean   ABW   1980 20267.30 73.0153   NA 49745999
# 4    fmean   ABW   1990 26611.44 73.6069   NA 29971000
# 5    fmean   ABW   2000 26664.99 74.2660   NA 23292000
# 6    fmean   ABW   2010 24926.17 75.6546   NA       NA

A very important feature of collap to highlight at this point is the custom argument, which allows the user to circumvent the broad distinction into numeric and categorical data (and the associated FUN and catFUN arguments) and specify exactly which columns to aggregate using which functions:

collap(wlddev, ~ iso3c + decade,
        custom = list(fmean = 9:10, fmedian = 11:12,
                      ffirst = c("country","region","income"),
                      flast = c("year","date"),
                      fmode = "OECD")) %>% head
#   country iso3c       date year decade                    region      income  OECD    PCGDP  LIFEEX
# 1   Aruba   ABW 1970-01-01 1969   1960 Latin America & Caribbean High income FALSE       NA 67.2592
# 2   Aruba   ABW 1980-01-01 1979   1970 Latin America & Caribbean High income FALSE       NA 70.6372
# 3   Aruba   ABW 1990-01-01 1989   1980 Latin America & Caribbean High income FALSE 20267.30 73.0153
# 4   Aruba   ABW 2000-01-01 1999   1990 Latin America & Caribbean High income FALSE 26611.44 73.6069
# 5   Aruba   ABW 2010-01-01 2009   2000 Latin America & Caribbean High income FALSE 26664.99 74.2660
# 6   Aruba   ABW 2020-01-01 2019   2010 Latin America & Caribbean High income FALSE 24926.17 75.6546
#   GINI      ODA
# 1   NA       NA
# 2   NA       NA
# 3   NA 39259998
# 4   NA 35155001
# 5   NA 16219999
# 6   NA       NA

Since collapse 1.5.0, it is also possible to perform weighted aggregations and append functions with _uw to yield an unweighted computation:

# This aggregates using weighted mean and mode, and unweighted median, first and last value
collap(wlddev, ~ region + year, w = ~ POP,
        custom = list(fmean = 9:10, fmedian_uw = 11:12,
                      ffirst_uw = c("country","region","income"),
                      flast_uw = c("year","date"),
                      fmode = "OECD"), keep.w = FALSE) %>% head
#          country       date year year              region              region              income
# 1 American Samoa 1961-01-01 1960 1960 East Asia & Pacific East Asia & Pacific Upper middle income
# 2 American Samoa 1962-01-01 1961 1961 East Asia & Pacific East Asia & Pacific Upper middle income
# 3 American Samoa 1963-01-01 1962 1962 East Asia & Pacific East Asia & Pacific Upper middle income
# 4 American Samoa 1964-01-01 1963 1963 East Asia & Pacific East Asia & Pacific Upper middle income
# 5 American Samoa 1965-01-01 1964 1964 East Asia & Pacific East Asia & Pacific Upper middle income
# 6 American Samoa 1966-01-01 1965 1965 East Asia & Pacific East Asia & Pacific Upper middle income
#    OECD    PCGDP   LIFEEX GINI       ODA
# 1 FALSE 1313.760 48.20996   NA  37295000
# 2 FALSE 1395.228 48.73451   NA  26630001
# 3 FALSE 1463.441 49.39960   NA 100040001
# 4 FALSE 1540.621 50.37529   NA  40389999
# 5 FALSE 1665.385 51.57330   NA  70059998
# 6 FALSE 1733.757 52.94426   NA  91545002

Next to collap, the functions collapv provides a programmers alternative allowing grouping and weighting columns to be passed using column names or indices, and the function collapg operates on grouped data frames.

6. Data Transformations

While ftransform and the TRA argument to the Fast Statistical Functions introduced earlier already provide a significant scope for transforming data, this section introduces some further specialized functions covering some advanced and common use cases, sometimes with greater efficiency.

6.1 Row and Column Arithmetic

When dealing with matrices or matrix-like datasets, we often have to perform operations applying a vector to the rows or columns of the data object in question. The mathematical operations of base R (+, -, *, /, %%, …) operate column-wise and are quite inefficient when used with data frames. Even in matrix code it is challenging to efficiently apply a vector v to the rows of a matrix X.

For this reason collapse introduces a set of efficient row- and column-wise arithmetic operators for matrix-like objects: %rr%, %r+%, %r-%, %r*%, %r/%, %cr%, %c+%, %c-%, %c*%, %c/%.

X <- qM(fselect(GGDC10S, AGR:SUM))
v <- fsum(X)
v
#         AGR         MIN         MAN          PU         CON         WRT         TRA        FIRE 
# 11026503529  8134743462 24120129864  1461548426  7845957666 14776120961  6416089614  7216735147 
#         GOV         OTH         SUM 
#  5962229565  7155872037 94115930269

# This divides the rows of X by v
all_obj_equal(t(t(X) / v), X / outer(rep(1, nrow(X)), v), X %r/% v)
# [1] TRUE

# Base R vs. efficient base R vs. collapse
microbenchmark(t(t(X) / v), X / outer(rep(1, nrow(X)), v), X %r/% v)
# Unit: microseconds
#                         expr     min       lq      mean   median       uq      max neval
#                    t(t(X)/v) 194.873 234.3560 358.13500 284.6425 298.0905 3244.043   100
#  X/outer(rep(1, nrow(X)), v)  55.555  83.5580 101.45696 108.5885 113.5495  137.637   100
#                     X %r/% v  11.685  37.2075  83.87657  63.2630  72.7135 2744.663   100

# Data frame row operations
dat <- fselect(GGDC10S, AGR:SUM)
microbenchmark(dat %r/% v, # Same thing using mapply and collapse::copyAttrib
               copyAttrib(mapply(`/`, dat, v, SIMPLIFY = FALSE), dat))
# Unit: microseconds
#                                                    expr    min     lq      mean median      uq
#                                              dat %r/% v 15.129 37.187 143.03998 40.139 46.5555
#  copyAttrib(mapply(`/`, dat, v, SIMPLIFY = FALSE), dat) 59.204 64.124  71.98944 66.379 76.7315
#       max neval
#  5089.289   100
#   110.003   100

# Data frame column arithmetic is very slow
microbenchmark(dat / dat$SUM, dat / 5, dat / dat,
               dat %c/% dat$SUM, dat %c/% 5, dat %c/% dat)
# Unit: microseconds
#              expr      min        lq       mean    median        uq       max neval
#       dat/dat$SUM 1275.264 1385.2260 1636.95411 1434.2825 1551.1940  5150.092   100
#             dat/5  276.012  295.4870 1181.83361  306.2905  327.4260 83176.208   100
#           dat/dat  295.323  320.1075  417.10858  330.5010  361.7020  3807.711   100
#  dat %c/% dat$SUM   20.295   45.4075  120.01479   48.5235   55.1245  3520.096   100
#        dat %c/% 5   17.179   44.5260   87.22996   48.7285   64.1035  3489.223   100
#      dat %c/% dat   20.459   46.2685   93.95601   51.0040   67.5065  3795.903   100

6.1 Row and Column Data Apply

dapply is an efficient apply command for matrices and data frames. It can be used to apply functions to rows or (by default) columns of matrices or data frames and by default returns objects of the same type and with the same attributes unless the result of each computation is a scalar.

dapply(mtcars, median)
#     mpg     cyl    disp      hp    drat      wt    qsec      vs      am    gear    carb 
#  19.200   6.000 196.300 123.000   3.695   3.325  17.710   0.000   0.000   4.000   2.000

dapply(mtcars, median, MARGIN = 1)
#           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive   Hornet Sportabout 
#               4.000               4.000               4.000               3.215               3.440 
#             Valiant          Duster 360           Merc 240D            Merc 230            Merc 280 
#               3.460               4.000               4.000               4.000               4.000 
#           Merc 280C          Merc 450SE          Merc 450SL         Merc 450SLC  Cadillac Fleetwood 
#               4.000               4.070               3.730               3.780               5.250 
# Lincoln Continental   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
#               5.424               5.345               4.000               4.000               4.000 
#       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28    Pontiac Firebird 
#               3.700               3.520               3.435               4.000               3.845 
#           Fiat X1-9       Porsche 914-2        Lotus Europa      Ford Pantera L        Ferrari Dino 
#               4.000               4.430               4.000               5.000               6.000 
#       Maserati Bora          Volvo 142E 
#               8.000               4.000

dapply(mtcars, quantile)
#         mpg cyl    disp    hp  drat      wt    qsec vs am gear carb
# 0%   10.400   4  71.100  52.0 2.760 1.51300 14.5000  0  0    3    1
# 25%  15.425   4 120.825  96.5 3.080 2.58125 16.8925  0  0    3    2
# 50%  19.200   6 196.300 123.0 3.695 3.32500 17.7100  0  0    4    2
# 75%  22.800   8 326.000 180.0 3.920 3.61000 18.9000  1  1    4    4
# 100% 33.900   8 472.000 335.0 4.930 5.42400 22.9000  1  1    5    8

dapply(mtcars, quantile, MARGIN = 1) %>% head
#                   0%    25%   50%    75% 100%
# Mazda RX4          0 3.2600 4.000 18.730  160
# Mazda RX4 Wag      0 3.3875 4.000 19.010  160
# Datsun 710         1 1.6600 4.000 20.705  108
# Hornet 4 Drive     0 2.0000 3.215 20.420  258
# Hornet Sportabout  0 2.5000 3.440 17.860  360
# Valiant            0 1.8800 3.460 19.160  225

# This is considerably more efficient than log(mtcars):
dapply(mtcars, log) %>% head
#                        mpg      cyl     disp       hp     drat        wt     qsec   vs   am
# Mazda RX4         3.044522 1.791759 5.075174 4.700480 1.360977 0.9631743 2.800933 -Inf    0
# Mazda RX4 Wag     3.044522 1.791759 5.075174 4.700480 1.360977 1.0560527 2.834389 -Inf    0
# Datsun 710        3.126761 1.386294 4.682131 4.532599 1.348073 0.8415672 2.923699    0    0
# Hornet 4 Drive    3.063391 1.791759 5.552960 4.700480 1.124930 1.1678274 2.967333    0 -Inf
# Hornet Sportabout 2.928524 2.079442 5.886104 5.164786 1.147402 1.2354715 2.834389 -Inf -Inf
# Valiant           2.895912 1.791759 5.416100 4.653960 1.015231 1.2412686 3.006672    0 -Inf
#                       gear      carb
# Mazda RX4         1.386294 1.3862944
# Mazda RX4 Wag     1.386294 1.3862944
# Datsun 710        1.386294 0.0000000
# Hornet 4 Drive    1.098612 0.0000000
# Hornet Sportabout 1.098612 0.6931472
# Valiant           1.098612 0.0000000

dapply preserves the data structure:

is.data.frame(dapply(mtcars, log))
# [1] TRUE
is.matrix(dapply(m, log))
# [1] TRUE

It also delivers seamless conversions, i.e. you can apply functions to data frame rows or columns and return a matrix and vice-versa:

identical(log(m), dapply(mtcars, log, return = "matrix"))
# [1] TRUE
identical(dapply(mtcars, log), dapply(m, log, return = "data.frame"))
# [1] TRUE

On data frames, the performance is comparable to lapply, and dapply is about 2x faster than apply for row- or column-wise operations on matrices. The most important feature is that it does not change the structure of the data at all: all attributes are preserved unless the result is a scalar and drop = TRUE (the default).

6.2 Split-Apply-Combine Computing

BY is a generalization of dapply for grouped computations using functions that are not part of the Fast Statistical Functions introduced above. It fundamentally is a re-implementation of the lapply(split(x, g), FUN, ...) computing paradigm in base R, but substantially faster and more versatile than functions like tapply, by or aggregate. It is however not faster than dplyr or data.table for larger grouped computations on data frames requiring split-apply-combine computing.

BY is S3 generic with methods for vector, matrix, data.frame and grouped_df5. It also supports the same grouping (g) inputs as the Fast Statistical Functions (grouping vectors, factors, lists or GRP objects). Below the use of BY is demonstrated on vectors matrices and data frames.

v <- iris$Sepal.Length   # A numeric vector
f <- iris$Species        # A factor

## default vector method
BY(v, f, sum)                          # Sum by species, about 2x faster than tapply(v, f, sum)
#     setosa versicolor  virginica 
#      250.3      296.8      329.4

BY(v, f, quantile)                     # Species quantiles: by default stacked
#       setosa.0%      setosa.25%      setosa.50%      setosa.75%     setosa.100%   versicolor.0% 
#           4.300           4.800           5.000           5.200           5.800           4.900 
#  versicolor.25%  versicolor.50%  versicolor.75% versicolor.100%    virginica.0%   virginica.25% 
#           5.600           5.900           6.300           7.000           4.900           6.225 
#   virginica.50%   virginica.75%  virginica.100% 
#           6.500           6.900           7.900

BY(v, f, quantile, expand.wide = TRUE) # Wide format
#             0%   25% 50% 75% 100%
# setosa     4.3 4.800 5.0 5.2  5.8
# versicolor 4.9 5.600 5.9 6.3  7.0
# virginica  4.9 6.225 6.5 6.9  7.9

## matrix method
miris <- qM(num_vars(iris))
BY(miris, f, sum)                          # Also returns as matrix
#            Sepal.Length Sepal.Width Petal.Length Petal.Width
# setosa            250.3       171.4         73.1        12.3
# versicolor        296.8       138.5        213.0        66.3
# virginica         329.4       148.7        277.6       101.3

BY(miris, f, quantile) %>% head
#               Sepal.Length Sepal.Width Petal.Length Petal.Width
# setosa.0%              4.3       2.300        1.000         0.1
# setosa.25%             4.8       3.200        1.400         0.2
# setosa.50%             5.0       3.400        1.500         0.2
# setosa.75%             5.2       3.675        1.575         0.3
# setosa.100%            5.8       4.400        1.900         0.6
# versicolor.0%          4.9       2.000        3.000         1.0

BY(miris, f, quantile, expand.wide = TRUE)[, 1:5]
#            Sepal.Length.0% Sepal.Length.25% Sepal.Length.50% Sepal.Length.75% Sepal.Length.100%
# setosa                 4.3            4.800              5.0              5.2               5.8
# versicolor             4.9            5.600              5.9              6.3               7.0
# virginica              4.9            6.225              6.5              6.9               7.9

BY(miris, f, quantile, expand.wide = TRUE, return = "list")[1:2] # list of matrices
# $Sepal.Length
#             0%   25% 50% 75% 100%
# setosa     4.3 4.800 5.0 5.2  5.8
# versicolor 4.9 5.600 5.9 6.3  7.0
# virginica  4.9 6.225 6.5 6.9  7.9
# 
# $Sepal.Width
#             0%   25% 50%   75% 100%
# setosa     2.3 3.200 3.4 3.675  4.4
# versicolor 2.0 2.525 2.8 3.000  3.4
# virginica  2.2 2.800 3.0 3.175  3.8

## data.frame method
BY(num_vars(iris), f, sum)             # Also returns a data.frame etc...
#            Sepal.Length Sepal.Width Petal.Length Petal.Width
# setosa            250.3       171.4         73.1        12.3
# versicolor        296.8       138.5        213.0        66.3
# virginica         329.4       148.7        277.6       101.3

## Conversions
identical(BY(num_vars(iris), f, sum), BY(miris, f, sum, return = "data.frame"))
# [1] TRUE
identical(BY(miris, f, sum), BY(num_vars(iris), f, sum, return = "matrix"))
# [1] TRUE

6.3 Fast (Grouped) Replacing and Sweeping-out Statistics

TRA is an S3 generic that efficiently transforms data by either replacing data values with supplied statistics or sweeping the statistics out of the data. It is the workhorse function behind the row-wise arithmetic operators introduced above (%rr%, %r+%, %r-%, %r*%, %r/%), and generalizes those to grouped operations. The 10 operations supported by TRA are:

  • 1 - “replace_fill†: replace and overwrite missing values (same as dplyr::mutate)

  • 2 - “replace†: replace but preserve missing values

  • 3 - “-†: subtract (center)

  • 4 - “-+†: subtract group-statistics but add average of group statistics

  • 5 - “/†: divide (scale)

  • 6 - “%†: compute percentages (divide and multiply by 100)

  • 7 - “+†: add

  • 8 - “*†: multiply

  • 9 - “%%†: modulus

  • 10 - “-%%†: subtract modulus

TRA is also incorporated as an argument to all Fast Statistical Functions. Therefore it is only really necessary and advisable to use the TRA function if both aggregate statistics and transformed data are required, or to sweep out statistics otherwise obtained (e.g. regression or correlation coefficients etc.). The code below computes the column means of the iris-matrix obtained above, and uses them to demean that matrix.

# Note: All examples below generalize to vectors or data frames
stats <- fmean(miris)               # Saving stats

# 6 identical ways of centering a matrix
microbenchmark(sweep(miris, 2, stats, "-"),  # base R
               miris - outer(rep(1, nrow(iris)), stats),
               TRA(miris, fmean(miris), "-"),
               miris %r-% fmean(miris),      # The operator is actually a wrapper around TRA
               fmean(miris, TRA = "-"),      # better for any operation if the stats are not needed
               fwithin(miris))               # fastest, fwithin is discussed in section 6.5
# Unit: microseconds
#                                      expr    min      lq     mean  median      uq    max neval
#               sweep(miris, 2, stats, "-") 15.457 16.2975 17.57711 17.0355 17.6915 53.505   100
#  miris - outer(rep(1, nrow(iris)), stats)  4.715  5.6375  6.36812  6.0270  6.6010 21.402   100
#             TRA(miris, fmean(miris), "-")  3.075  3.3210  3.98930  3.6080  4.4895 14.678   100
#                   miris %r-% fmean(miris)  3.362  3.8130  4.68425  4.0590  4.5305 42.066   100
#                   fmean(miris, TRA = "-")  2.583  2.8085  3.79496  2.9930  4.2640 29.848   100
#                            fwithin(miris)  3.321  3.6080  5.26768  3.8130  4.9815 78.474   100

# Simple replacing [same as fmean(miris, TRA = "replace") or fbetween(miris)]
TRA(miris, fmean(miris), "replace") %>% head(3)
#      Sepal.Length Sepal.Width Petal.Length Petal.Width
# [1,]     5.843333    3.057333        3.758    1.199333
# [2,]     5.843333    3.057333        3.758    1.199333
# [3,]     5.843333    3.057333        3.758    1.199333

# Simple scaling [same as fsd(miris, TRA = "/")]
TRA(miris, fsd(miris), "/") %>% head(3)
#      Sepal.Length Sepal.Width Petal.Length Petal.Width
# [1,]     6.158928    8.029986    0.7930671   0.2623854
# [2,]     5.917402    6.882845    0.7930671   0.2623854
# [3,]     5.675875    7.341701    0.7364195   0.2623854

All of the above is functionality also offered by base::sweep, but TRA is significantly faster. The big advantage of TRA is that it also supports grouped operations:

# Grouped centering [same as fmean(miris, f, TRA = "-") or fwithin(m, f)]
TRA(miris, fmean(miris, f), "-", f) %>% head(3)
#      Sepal.Length Sepal.Width Petal.Length Petal.Width
# [1,]        0.094       0.072       -0.062      -0.046
# [2,]       -0.106      -0.428       -0.062      -0.046
# [3,]       -0.306      -0.228       -0.162      -0.046

# Grouped replacing [same as fmean(m, f, TRA = "replace") or fbetween(m, f)]
TRA(miris, fmean(miris, f), "replace", f) %>% head(3)
#      Sepal.Length Sepal.Width Petal.Length Petal.Width
# [1,]        5.006       3.428        1.462       0.246
# [2,]        5.006       3.428        1.462       0.246
# [3,]        5.006       3.428        1.462       0.246

# Groupwise percentages [same as fsum(m, f, TRA = "%")]
TRA(miris, fsum(miris, f), "%", f) %>% head(3)
#      Sepal.Length Sepal.Width Petal.Length Petal.Width
# [1,]     2.037555    2.042007     1.915185    1.626016
# [2,]     1.957651    1.750292     1.915185    1.626016
# [3,]     1.877747    1.866978     1.778386    1.626016

As mentioned, calling the TRA() function does not make much sense if the same task can be performed using the Fast Statistical Functions or the arithmetic operators. It is however a very useful function to call for complex transformations involving grouped sweeping operations with precomputed quantities.

6.4 Fast Standardizing

The function fscale can be used to efficiently standardize (i.e. scale and center) data using a numerically stable online algorithm. It’s structure is the same as the Fast Statistical Functions. The standardization-operator STD also exists as a wrapper around fscale. The difference is that by default STD adds a prefix to standardized variables and also provides an enhanced method for data frames (more about operators in the next section).

# fscale doesn't rename columns
fscale(mtcars) %>% head(2)
#                     mpg        cyl       disp         hp      drat         wt       qsec         vs
# Mazda RX4     0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.6103996 -0.7771651 -0.8680278
# Mazda RX4 Wag 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.3497853 -0.4637808 -0.8680278
#                     am      gear      carb
# Mazda RX4     1.189901 0.4235542 0.7352031
# Mazda RX4 Wag 1.189901 0.4235542 0.7352031

# By default adds a prefix
STD(mtcars) %>% head(2)
#                 STD.mpg    STD.cyl   STD.disp     STD.hp  STD.drat     STD.wt   STD.qsec     STD.vs
# Mazda RX4     0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.6103996 -0.7771651 -0.8680278
# Mazda RX4 Wag 0.1508848 -0.1049878 -0.5706198 -0.5350928 0.5675137 -0.3497853 -0.4637808 -0.8680278
#                 STD.am  STD.gear  STD.carb
# Mazda RX4     1.189901 0.4235542 0.7352031
# Mazda RX4 Wag 1.189901 0.4235542 0.7352031

# See that is works
STD(mtcars) %>% qsu
#            N  Mean  SD      Min     Max
# STD.mpg   32     0   1  -1.6079  2.2913
# STD.cyl   32     0   1  -1.2249  1.0149
# STD.disp  32    -0   1  -1.2879  1.9468
# STD.hp    32     0   1   -1.381  2.7466
# STD.drat  32    -0   1  -1.5646  2.4939
# STD.wt    32    -0   1  -1.7418  2.2553
# STD.qsec  32    -0   1   -1.874  2.8268
# STD.vs    32     0   1   -0.868   1.116
# STD.am    32    -0   1  -0.8141  1.1899
# STD.gear  32    -0   1  -0.9318  1.7789
# STD.carb  32    -0   1  -1.1222  3.2117

# We can also scale and center to a different mean and standard deviation:
qsu(fscale(mtcars, mean = 5, sd = 3))[, .c(Mean, SD)] %>% t
#       mpg  cyl  disp  hp  drat  wt  qsec  vs  am  gear  carb
# Mean    5    5     5   5     5   5     5   5   5     5     5
# SD      3    3     3   3     3   3     3   3   3     3     3

# Or not center at all. In that case scaling is mean-preserving, in contrast to fsd(mtcars, TRA = "/")
qsu(fscale(mtcars, mean = FALSE, sd = 3))[, .c(Mean, SD)] %>% t
#           mpg     cyl      disp        hp    drat      wt     qsec      vs      am    gear    carb
# Mean  20.0906  6.1875  230.7219  146.6875  3.5966  3.2172  17.8487  0.4375  0.4062  3.6875  2.8125
# SD          3       3         3         3       3       3        3       3       3       3       3

Scaling with fscale / STD can also be done groupwise and / or weighted. For example the Groningen Growth and Development Center 10-Sector Database provides annual series of value added in local currency and persons employed for 10 broad sectors in several African, Asian, and Latin American countries.

head(GGDC10S)
#   Country Regioncode             Region Variable Year      AGR      MIN       MAN        PU
# 1     BWA        SSA Sub-saharan Africa       VA 1960       NA       NA        NA        NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961       NA       NA        NA        NA
# 3     BWA        SSA Sub-saharan Africa       VA 1962       NA       NA        NA        NA
# 4     BWA        SSA Sub-saharan Africa       VA 1963       NA       NA        NA        NA
# 5     BWA        SSA Sub-saharan Africa       VA 1964 16.30154 3.494075 0.7365696 0.1043936
# 6     BWA        SSA Sub-saharan Africa       VA 1965 15.72700 2.495768 1.0181992 0.1350976
#         CON      WRT      TRA     FIRE      GOV      OTH      SUM
# 1        NA       NA       NA       NA       NA       NA       NA
# 2        NA       NA       NA       NA       NA       NA       NA
# 3        NA       NA       NA       NA       NA       NA       NA
# 4        NA       NA       NA       NA       NA       NA       NA
# 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229
# 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710

If we wanted to correlate this data across countries and sectors, it needs to be standardized:

# Standardizing Sectors by Variable and Country
STD_GGDC10S <- STD(GGDC10S, ~ Variable + Country, cols = 6:16)
head(STD_GGDC10S)
#   Variable Country    STD.AGR    STD.MIN    STD.MAN     STD.PU    STD.CON    STD.WRT    STD.TRA
# 1       VA     BWA         NA         NA         NA         NA         NA         NA         NA
# 2       VA     BWA         NA         NA         NA         NA         NA         NA         NA
# 3       VA     BWA         NA         NA         NA         NA         NA         NA         NA
# 4       VA     BWA         NA         NA         NA         NA         NA         NA         NA
# 5       VA     BWA -0.7382911 -0.7165772 -0.6682536 -0.8051315 -0.6922839 -0.6032762 -0.5889923
# 6       VA     BWA -0.7392424 -0.7167359 -0.6680535 -0.8050172 -0.6917529 -0.6030211 -0.5887320
#     STD.FIRE    STD.GOV    STD.OTH    STD.SUM
# 1         NA         NA         NA         NA
# 2         NA         NA         NA         NA
# 3         NA         NA         NA         NA
# 4         NA         NA         NA         NA
# 5 -0.6349956 -0.6561054 -0.5959744 -0.6758663
# 6 -0.6349359 -0.6558634 -0.5957137 -0.6757768

# Correlating Standardized Value-Added across countries
fsubset(STD_GGDC10S, Variable == "VA", STD.AGR:STD.SUM) %>% pwcor
#          STD.AGR STD.MIN STD.MAN STD.PU STD.CON STD.WRT STD.TRA STD.FIRE STD.GOV STD.OTH STD.SUM
# STD.AGR       1      .88     .93    .88     .89     .90     .90      .86     .93     .88     .90
# STD.MIN      .88      1      .86    .84     .85     .85     .84      .83     .88     .84     .86
# STD.MAN      .93     .86      1     .95     .96     .97     .98      .95     .98     .97     .98
# STD.PU       .88     .84     .95     1      .95     .96     .96      .95     .96     .96     .97
# STD.CON      .89     .85     .96    .95      1      .98     .98      .97     .98     .97     .98
# STD.WRT      .90     .85     .97    .96     .98      1      .99      .98     .99     .99    1.00
# STD.TRA      .90     .84     .98    .96     .98     .99      1       .98     .99     .99     .99
# STD.FIRE     .86     .83     .95    .95     .97     .98     .98       1      .98     .98     .98
# STD.GOV      .93     .88     .98    .96     .98     .99     .99      .98      1      .99    1.00
# STD.OTH      .88     .84     .97    .96     .97     .99     .99      .98     .99      1      .99
# STD.SUM      .90     .86     .98    .97     .98    1.00     .99      .98    1.00     .99      1

6.5 Fast Centering and Averaging

As a slightly faster alternative to fmean(x, g, w, TRA = "-"/"-+") or fmean(x, g, w, TRA = "replace"/"replace_fill"), fwithin and fbetween can be used to perform common (grouped, weighted) centering and averaging tasks (also known as between- and within- transformations in the language of panel data econometrics). fbetween / fwithin are faster than fmean(..., TRA = ...) because they don’t materialize the full set of computed averages. The operators W and B also exist.

## Simple centering and averaging
fbetween(mtcars$mpg) %>% head
# [1] 20.09062 20.09062 20.09062 20.09062 20.09062 20.09062

fwithin(mtcars$mpg) %>% head
# [1]  0.909375  0.909375  2.709375  1.309375 -1.390625 -1.990625

all.equal(fbetween(mtcars) + fwithin(mtcars), mtcars)
# [1] TRUE

## Groupwise centering and averaging
fbetween(mtcars$mpg, mtcars$cyl) %>% head
# [1] 19.74286 19.74286 26.66364 19.74286 15.10000 19.74286

fwithin(mtcars$mpg, mtcars$cyl) %>% head
# [1]  1.257143  1.257143 -3.863636  1.657143  3.600000 -1.642857

all.equal(fbetween(mtcars, mtcars$cyl) + fwithin(mtcars, mtcars$cyl), mtcars)
# [1] TRUE

To demonstrate more clearly the utility of the operators which exists for all fast transformation and time series functions, the code below implements the task of demeaning 4 series by country and saving the country-id using the within-operator W as opposed to fwithin which requires all input to be passed externally like the Fast Statistical Functions.

# Center 4 series in this dataset by country
W(wlddev, ~ iso3c, cols = 9:12) %>% head
#   iso3c W.PCGDP  W.LIFEEX W.GINI       W.ODA
# 1   AFG      NA -16.75117     NA -1370778502
# 2   AFG      NA -16.23517     NA -1255468497
# 3   AFG      NA -15.72617     NA -1374708502
# 4   AFG      NA -15.22617     NA -1249828497
# 5   AFG      NA -14.73417     NA -1191628485
# 6   AFG      NA -14.24917     NA -1145708502

# Same thing done manually using fwithin...
add_vars(get_vars(wlddev, "iso3c"),
         get_vars(wlddev, 9:12) %>%
         fwithin(wlddev$iso3c) %>%
         add_stub("W.")) %>% head
#   iso3c W.PCGDP  W.LIFEEX W.GINI       W.ODA
# 1   AFG      NA -16.75117     NA -1370778502
# 2   AFG      NA -16.23517     NA -1255468497
# 3   AFG      NA -15.72617     NA -1374708502
# 4   AFG      NA -15.22617     NA -1249828497
# 5   AFG      NA -14.73417     NA -1191628485
# 6   AFG      NA -14.24917     NA -1145708502

It is also possible to drop the id’s in W using the argument keep.by = FALSE. fbetween / B and fwithin / W each have one additional computational option:

# This replaces missing values with the group-mean: Same as fmean(x, g, TRA = "replace_fill")
B(wlddev, ~ iso3c, cols = 9:12, fill = TRUE) %>% head
#   iso3c  B.PCGDP B.LIFEEX B.GINI      B.ODA
# 1   AFG 483.8351 49.19717     NA 1487548499
# 2   AFG 483.8351 49.19717     NA 1487548499
# 3   AFG 483.8351 49.19717     NA 1487548499
# 4   AFG 483.8351 49.19717     NA 1487548499
# 5   AFG 483.8351 49.19717     NA 1487548499
# 6   AFG 483.8351 49.19717     NA 1487548499

# This adds back the overall mean after subtracting out group means: Same as fmean(x, g, TRA = "-+")
W(wlddev, ~ iso3c, cols = 9:12, mean = "overall.mean")  %>% head
#   iso3c W.PCGDP W.LIFEEX W.GINI      W.ODA
# 1   AFG      NA 47.54514     NA -916058371
# 2   AFG      NA 48.06114     NA -800748366
# 3   AFG      NA 48.57014     NA -919988371
# 4   AFG      NA 49.07014     NA -795108366
# 5   AFG      NA 49.56214     NA -736908354
# 6   AFG      NA 50.04714     NA -690988371

# Visual demonstration of centering on the overall mean vs. simple centering
oldpar <- par(mfrow = c(1, 3))
plot(iris[1:2], col = iris$Species, main = "Raw Data")                       # Raw data
plot(W(iris, ~ Species)[2:3], col = iris$Species, main = "Simple Centering") # Simple centering
plot(W(iris, ~ Species, mean = "overall.mean")[2:3], col = iris$Species,     # Centering on overall mean: Preserves level of data
     main = "Added Overall Mean")
plot of chunk BWplot

plot of chunk BWplot

par(oldpar)

Another great utility of operators is that they can be employed in regression formulas in a manor that is both very efficient and pleasing to the eyes. The code below demonstrates the use of W and B to efficiently run fixed-effects regressions with lm.

# When using operators in formulas, we need to remove missing values beforehand to obtain the same results as a Fixed-Effects package
data <- wlddev %>% fselect(iso3c, year, PCGDP, LIFEEX) %>% na_omit

# classical lm() -> iso3c is a factor, creates a matrix of 200+ country dummies.
coef(lm(PCGDP ~ LIFEEX + iso3c, data))[1:2]
# (Intercept)      LIFEEX 
#   -2837.039     380.448

# Centering each variable individually
coef(lm(W(PCGDP, iso3c) ~ W(LIFEEX, iso3c), data))
#      (Intercept) W(LIFEEX, iso3c) 
#     5.596034e-13     3.804480e+02

# Centering the data
coef(lm(W.PCGDP ~ W.LIFEEX, W(data, PCGDP + LIFEEX ~ iso3c)))
#  (Intercept)     W.LIFEEX 
# 5.596034e-13 3.804480e+02

# Adding the overall mean back to the data only changes the intercept
coef(lm(W.PCGDP ~ W.LIFEEX, W(data, PCGDP + LIFEEX  ~ iso3c, mean = "overall.mean")))
# (Intercept)    W.LIFEEX 
#  -14020.142     380.448

# Procedure suggested by Mundlak (1978) - controlling for group averages instead of demeaning
coef(lm(PCGDP ~ LIFEEX + B(LIFEEX, iso3c), data))
#      (Intercept)           LIFEEX B(LIFEEX, iso3c) 
#      -52254.7421         380.4480         585.8386

In general it is recommended calling the long names (i.e. fwithin or fscale etc.) for programming since they are a bit more efficient on the R-side of things and require all input in terms of data. For all other purposes the operators are more convenient. It is important to note that the operators can do everything the functions can do (i.e. you can also pass grouping vectors or GRP objects to them). They are just simple wrappers that in the data frame method add 4 additional features:

  • The possibility of formula input to by i.e. W(mtcars, ~ cyl) or W(mtcars, mpg ~ cyl)
  • They preserve grouping columns (cyl in the above example) when passed in a formula (default keep.by = TRUE)
  • The ability to subset many columns using the cols argument (i.e. W(mtcars, ~ cyl, cols = 4:7) is the same as W(mtcars, hp + drat + wt + qsec ~ cyl))
  • They rename transformed columns by adding a prefix (default stub = "W.")

6.6 HD Centering and Linear Prediction

Sometimes simple centering is not enough, for example if a linear model with multiple levels of fixed-effects needs to be estimated, potentially involving interactions with continuous covariates. For these purposes fhdwithin / HDW and fhdbetween / HDB were created as efficient multi-purpose functions for linear prediction and partialling out. They operate by splitting complex regression problems in 2 parts: Factors and factor-interactions are projected out using fixest::demean, an efficient C++ routine for centering vectors on multiple factors, whereas continuous variables are dealt with using a standard chol or qr decomposition in base R. The examples below show the use of the HDW operator in manually solving a regression problem with country and time fixed effects.

data$year <- qF(data$year, na.exclude = FALSE) # the country code (iso3c) is already a factor

# classical lm() -> creates a matrix of 196 country dummies and 56 year dummies
coef(lm(PCGDP ~ LIFEEX + iso3c + year, data))[1:2]
# (Intercept)      LIFEEX 
#  37388.0493   -333.0115

# Centering each variable individually
coef(lm(HDW(PCGDP, list(iso3c, year)) ~ HDW(LIFEEX, list(iso3c, year)), data))
#                    (Intercept) HDW(LIFEEX, list(iso3c, year)) 
#                  -2.450245e-13                  -3.330115e+02

# Centering the entire data
coef(lm(HDW.PCGDP ~ HDW.LIFEEX, HDW(data, PCGDP + LIFEEX ~ iso3c + year)))
#   (Intercept)    HDW.LIFEEX 
# -2.450245e-13 -3.330115e+02

# Procedure suggested by Mundlak (1978) - controlling for averages instead of demeaning
coef(lm(PCGDP ~ LIFEEX + HDB(LIFEEX, list(iso3c, year)), data))
#                    (Intercept)                         LIFEEX HDB(LIFEEX, list(iso3c, year)) 
#                    -48141.1094                      -333.0115                      1236.2681

We may wish to test whether including time fixed-effects in the above regression actually impacts the fit. This can be done with the fast F-test:

# The syntax is fFtest(y, exc, X, ...). 'exc' are exclusion restrictions.
data %$% fFtest(PCGDP, year, list(LIFEEX, iso3c))
#                     R-Sq.  DF1  DF2  F-Stat.  P-Value
# Full Model          0.894  258 8763  286.130    0.000
# Restricted Model    0.873  199 8822  304.661    0.000
# Exclusion Rest.     0.021   59 8763   29.280    0.000

The test shows that the time fixed-effects (accounted for like year dummies) are jointly significant.

One can also use fhdbetween / HDB and fhdwithin / HDW to project out interactions and continuous covariates.

wlddev$year <- as.numeric(wlddev$year)

# classical lm() -> full country-year interaction, -> 200+ country dummies, 200+ trends, year and ODA
coef(lm(PCGDP ~ LIFEEX + iso3c * year + ODA, wlddev))[1:2]
#   (Intercept)        LIFEEX 
# -7.257955e+05  8.938626e+00

# Same using HDW
coef(lm(HDW.PCGDP ~ HDW.LIFEEX, HDW(wlddev, PCGDP + LIFEEX ~ iso3c * year + ODA)))
#  (Intercept)   HDW.LIFEEX 
# 3.403288e-12 8.938626e+00

# example of a simple continuous problem
HDW(iris[1:2], iris[3:4]) %>% head
#   HDW.Sepal.Length HDW.Sepal.Width
# 1       0.21483967       0.2001352
# 2       0.01483967      -0.2998648
# 3      -0.13098262      -0.1255786
# 4      -0.33933805      -0.1741510
# 5       0.11483967       0.3001352
# 6       0.41621663       0.6044681

# May include factors..
HDW(iris[1:2], iris[3:5]) %>% head
#   HDW.Sepal.Length HDW.Sepal.Width
# 1       0.14989286       0.1102684
# 2      -0.05010714      -0.3897316
# 3      -0.15951256      -0.1742640
# 4      -0.44070173      -0.3051992
# 5       0.04989286       0.2102684
# 6       0.17930818       0.3391766

7. Time Series and Panel Series

collapse also presents some essential contributions in the time series domain, particularly in the area of (irregular) time series, panel data and efficient and secure computations on (potentially unordered) time-dependent vectors and (unbalanced) panels.

7.1 Panel Series to Array Conversions

To facilitate the exploration and access of panel data, psmat was created as an S3 generic to efficiently obtain matrices or 3D-arrays from panel data.

mts <- psmat(wlddev, PCGDP ~ iso3c, ~ year)
str(mts)
#  'psmat' num [1:216, 1:61] NA NA NA NA NA ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#  - attr(*, "transpose")= logi FALSE
plot(log10(mts), main = paste("Log10", vlabels(wlddev$PCGDP)), xlab = "Year")
plot of chunk psmatplot

plot of chunk psmatplot

Passing a data frame of panel series to psmat generates a 3D array:

# Get panel series array
psar <- psmat(wlddev, ~ iso3c, ~ year, cols = 9:12)
str(psar)
#  'psmat' num [1:216, 1:61, 1:4] NA NA NA NA NA ...
#  - attr(*, "dimnames")=List of 3
#   ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA"
#  - attr(*, "transpose")= logi FALSE
plot(psar)
plot of chunk psarplot

plot of chunk psarplot

# Plot array of Panel Series aggregated by region:
collap(wlddev, ~ region + year, cols = 9:12) %>%
  psmat( ~ region, ~ year) %>%
  plot(legend = TRUE, labs = vlabels(wlddev)[9:12])
plot of chunk psarplot2

plot of chunk psarplot2

psmat can also output a list of panel series matrices, which can be used among other things to reshape the data with unlist2d (discussed in more detail in List-Processing section).

# This gives list of ps-matrices
psml <- psmat(wlddev, ~ iso3c, ~ year, 9:12, array = FALSE)
str(psml, give.attr = FALSE)
# List of 4
#  $ PCGDP : 'psmat' num [1:216, 1:61] NA NA NA NA NA ...
#  $ LIFEEX: 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ...
#  $ GINI  : 'psmat' num [1:216, 1:61] NA NA NA NA NA NA NA NA NA NA ...
#  $ ODA   : 'psmat' num [1:216, 1:61] NA 116769997 -390000 NA NA ...

# Using unlist2d, can generate a data.frame
unlist2d(psml, idcols = "Variable", row.names = "Country") %>% gv(1:10) %>% head
#   Variable Country 1960 1961 1962 1963 1964 1965 1966 1967
# 1    PCGDP     ABW   NA   NA   NA   NA   NA   NA   NA   NA
# 2    PCGDP     AFG   NA   NA   NA   NA   NA   NA   NA   NA
# 3    PCGDP     AGO   NA   NA   NA   NA   NA   NA   NA   NA
# 4    PCGDP     ALB   NA   NA   NA   NA   NA   NA   NA   NA
# 5    PCGDP     AND   NA   NA   NA   NA   NA   NA   NA   NA
# 6    PCGDP     ARE   NA   NA   NA   NA   NA   NA   NA   NA

7.2 Panel Series ACF, PACF and CCF

The correlation structure of panel data can also be explored with psacf, pspacf and psccf. These functions are exact analogues to stats::acf, stats::pacf and stats::ccf. They use fscale to group-scale panel data by the panel-id provided, and then compute the covariance of a sequence of panel-lags (generated with flag discussed below) with the group-scaled level-series, dividing by the variance of the group-scaled level series. The Partial-ACF is generated from the ACF using a Yule-Walker decomposition (as in stats::pacf).

# Panel-ACF of GDP per Capita
psacf(wlddev, PCGDP ~ iso3c, ~year)
plot of chunk PSACF

plot of chunk PSACF

# Panel-Partial-ACF of GDP per Capia
pspacf(wlddev, PCGDP ~ iso3c, ~year)
plot of chunk PSACF

plot of chunk PSACF

# Panel- Cross-Correlation function of GDP per Capia and Life-Expectancy
wlddev %$% psccf(PCGDP, LIFEEX, iso3c, year)
plot of chunk PSACF

plot of chunk PSACF

# Multivariate Panel-auto and cross-correlation function of 3 variables:
psacf(wlddev, PCGDP + LIFEEX + ODA ~ iso3c, ~year)
plot of chunk PSACF

plot of chunk PSACF

7.3 Fast Lags and Leads

flag and the corresponding lag- and lead- operators L and F are S3 generics to efficiently compute lags and leads on time series and panel data. The code below shows how to compute simple lags and leads on the classic Box & Jenkins airline data that comes with R.

# 1 lag
L(AirPassengers)
#      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
# 1949  NA 112 118 132 129 121 135 148 148 136 119 104
# 1950 118 115 126 141 135 125 149 170 170 158 133 114
# 1951 140 145 150 178 163 172 178 199 199 184 162 146
# 1952 166 171 180 193 181 183 218 230 242 209 191 172
# 1953 194 196 196 236 235 229 243 264 272 237 211 180
# 1954 201 204 188 235 227 234 264 302 293 259 229 203
# 1955 229 242 233 267 269 270 315 364 347 312 274 237
# 1956 278 284 277 317 313 318 374 413 405 355 306 271
# 1957 306 315 301 356 348 355 422 465 467 404 347 305
# 1958 336 340 318 362 348 363 435 491 505 404 359 310
# 1959 337 360 342 406 396 420 472 548 559 463 407 362
# 1960 405 417 391 419 461 472 535 622 606 508 461 390

# 3 identical ways of computing 1 lag
all_identical(flag(AirPassengers), L(AirPassengers), F(AirPassengers,-1))
# [1] TRUE

# 1 lead and 3 lags - output as matrix
L(AirPassengers, -1:3) %>% head
#       F1  --  L1  L2  L3
# [1,] 118 112  NA  NA  NA
# [2,] 132 118 112  NA  NA
# [3,] 129 132 118 112  NA
# [4,] 121 129 132 118 112
# [5,] 135 121 129 132 118
# [6,] 148 135 121 129 132

# ... this is still a time series object:
attributes(L(AirPassengers, -1:3))
# $tsp
# [1] 1949.000 1960.917   12.000
# 
# $class
# [1] "ts"     "matrix"
# 
# $dim
# [1] 144   5
# 
# $dimnames
# $dimnames[[1]]
# NULL
# 
# $dimnames[[2]]
# [1] "F1" "--" "L1" "L2" "L3"

flag / L / F also work well on (time series) matrices. Below a regression with daily closing prices of major European stock indices is run: Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE. The data are sampled in business time, i.e. weekends and holidays are omitted.

str(EuStockMarkets)
#  Time-Series [1:1860, 1:4] from 1991 to 1999: 1629 1614 1607 1621 1618 ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : NULL
#   ..$ : chr [1:4] "DAX" "SMI" "CAC" "FTSE"

# Data is recorded on 260 days per year, 1991-1999
tsp(EuStockMarkets)
# [1] 1991.496 1998.646  260.000
freq <- frequency(EuStockMarkets)

# There is some obvious seasonality
stl(EuStockMarkets[, "DAX"], freq) %>% plot
plot of chunk mts

plot of chunk mts


# 1 annual lead and 1 annual lag
L(EuStockMarkets, -1:1*freq) %>% head
#      F260.DAX     DAX L260.DAX F260.SMI    SMI L260.SMI F260.CAC    CAC L260.CAC F260.FTSE   FTSE
# [1,]  1755.98 1628.75       NA   1846.6 1678.1       NA   1907.3 1772.8       NA    2515.8 2443.6
# [2,]  1754.95 1613.63       NA   1854.8 1688.5       NA   1900.6 1750.5       NA    2521.2 2460.2
# [3,]  1759.90 1606.51       NA   1845.3 1678.6       NA   1880.9 1718.0       NA    2493.9 2448.2
# [4,]  1759.84 1621.04       NA   1854.5 1684.1       NA   1873.5 1708.1       NA    2476.1 2470.4
# [5,]  1776.50 1618.16       NA   1870.5 1686.6       NA   1883.6 1723.1       NA    2497.1 2484.7
# [6,]  1769.98 1610.61       NA   1862.6 1671.6       NA   1868.5 1714.3       NA    2469.0 2466.8
#      L260.FTSE
# [1,]        NA
# [2,]        NA
# [3,]        NA
# [4,]        NA
# [5,]        NA
# [6,]        NA

# DAX regressed on it's own 2 annual lags and the lags of the other indicators
lm(DAX ~., data = L(EuStockMarkets, 0:2*freq)) %>% summary
# 
# Call:
# lm(formula = DAX ~ ., data = L(EuStockMarkets, 0:2 * freq))
# 
# Residuals:
#     Min      1Q  Median      3Q     Max 
# -240.46  -51.28  -12.01   45.19  358.02 
# 
# Coefficients:
#               Estimate Std. Error t value Pr(>|t|)    
# (Intercept) -564.02041   93.94903  -6.003 2.49e-09 ***
# L260.DAX      -0.12577    0.03002  -4.189 2.99e-05 ***
# L520.DAX      -0.12528    0.04103  -3.053  0.00231 ** 
# SMI            0.32601    0.01726  18.890  < 2e-16 ***
# L260.SMI       0.27499    0.02517  10.926  < 2e-16 ***
# L520.SMI       0.04602    0.02602   1.769  0.07721 .  
# CAC            0.59637    0.02349  25.389  < 2e-16 ***
# L260.CAC      -0.14283    0.02763  -5.169 2.72e-07 ***
# L520.CAC       0.05196    0.03657   1.421  0.15557    
# FTSE           0.01002    0.02403   0.417  0.67675    
# L260.FTSE      0.04509    0.02807   1.606  0.10843    
# L520.FTSE      0.10601    0.02717   3.902  0.00010 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 83.06 on 1328 degrees of freedom
#   (520 observations deleted due to missingness)
# Multiple R-squared:  0.9943,  Adjusted R-squared:  0.9942 
# F-statistic: 2.092e+04 on 11 and 1328 DF,  p-value: < 2.2e-16

Since v1.5.0, irregular time series are supported:

t <- seq_row(EuStockMarkets)[-4L]

flag(EuStockMarkets[-4L, ], -1:1, t = t) %>% head
#       F1.DAX     DAX  L1.DAX F1.SMI    SMI L1.SMI F1.CAC    CAC L1.CAC F1.FTSE   FTSE L1.FTSE
# [1,] 1613.63 1628.75      NA 1688.5 1678.1     NA 1750.5 1772.8     NA  2460.2 2443.6      NA
# [2,] 1606.51 1613.63 1628.75 1678.6 1688.5 1678.1 1718.0 1750.5 1772.8  2448.2 2460.2  2443.6
# [3,]      NA 1606.51 1613.63     NA 1678.6 1688.5     NA 1718.0 1750.5      NA 2448.2  2460.2
# [4,] 1610.61 1618.16      NA 1671.6 1686.6     NA 1714.3 1723.1     NA  2466.8 2484.7      NA
# [5,] 1630.75 1610.61 1618.16 1682.9 1671.6 1686.6 1734.5 1714.3 1723.1  2487.9 2466.8  2484.7
# [6,] 1640.17 1630.75 1610.61 1703.6 1682.9 1671.6 1757.4 1734.5 1714.3  2508.4 2487.9  2466.8

The main innovation of flag / L / F is the ability to very efficiently compute sequences of lags and leads on panel data, and that this panel data need not be ordered or balanced:

# This lags all 4 series
L(wlddev, 1L, ~ iso3c, ~ year, cols = 9:12) %>% head
#   iso3c year L1.PCGDP L1.LIFEEX L1.GINI    L1.ODA
# 1   AFG 1960       NA        NA      NA        NA
# 2   AFG 1961       NA    32.446      NA 116769997
# 3   AFG 1962       NA    32.962      NA 232080002
# 4   AFG 1963       NA    33.471      NA 112839996
# 5   AFG 1964       NA    33.971      NA 237720001
# 6   AFG 1965       NA    34.463      NA 295920013

# Without t: Works here because data is ordered, but gives a message
L(wlddev, 1L, ~ iso3c, cols = 9:12) %>% head
#   iso3c L1.PCGDP L1.LIFEEX L1.GINI    L1.ODA
# 1   AFG       NA        NA      NA        NA
# 2   AFG       NA    32.446      NA 116769997
# 3   AFG       NA    32.962      NA 232080002
# 4   AFG       NA    33.471      NA 112839996
# 5   AFG       NA    33.971      NA 237720001
# 6   AFG       NA    34.463      NA 295920013

# 1 lead and 2 lags of Life Expectancy
# after removing the 4th row, thus creating an unbalanced panel
wlddev %>% ss(-4L) %>%
  L(-1:2, LIFEEX ~ iso3c, ~year) %>% head
#   iso3c year F1.LIFEEX LIFEEX L1.LIFEEX L2.LIFEEX
# 1   AFG 1960    32.962 32.446        NA        NA
# 2   AFG 1961    33.471 32.962    32.446        NA
# 3   AFG 1962        NA 33.471    32.962    32.446
# 4   AFG 1964    34.948 34.463        NA    33.471
# 5   AFG 1965    35.430 34.948    34.463        NA
# 6   AFG 1966    35.914 35.430    34.948    34.463

Optimal performance is obtained if the panel-id is a factor, and the time variable also a factor or an integer variable. In that case an ordering vector of the data is computed directly without any prior sorting or grouping, and the data is accessed through this vector. Thus the data need not be sorted to compute a fully-identified panel-lag, which is a key advantage to, say, the shift function in data.table.

One intended area of use, especially for the operators L and F, is to substantially facilitate the implementation of dynamic models in various contexts (independent of the estimation package). Below different ways L can be used to estimate a dynamic panel-model using lm are shown:

# Different ways of regressing GDP on it's lags and life-Expectancy and it's lags

# 1 - Precomputing lags
lm(PCGDP ~ ., L(wlddev, 0:2, PCGDP + LIFEEX ~ iso3c, ~ year, keep.ids = FALSE)) %>% summary
# 
# Call:
# lm(formula = PCGDP ~ ., data = L(wlddev, 0:2, PCGDP + LIFEEX ~ 
#     iso3c, ~year, keep.ids = FALSE))
# 
# Residuals:
#      Min       1Q   Median       3Q      Max 
# -16776.5   -102.2    -17.2     91.5  12277.1 
# 
# Coefficients:
#               Estimate Std. Error t value Pr(>|t|)    
# (Intercept) -333.93994   61.04617  -5.470 4.62e-08 ***
# L1.PCGDP       1.31959    0.01021 129.270  < 2e-16 ***
# L2.PCGDP      -0.31707    0.01029 -30.815  < 2e-16 ***
# LIFEEX       -17.77368   35.47772  -0.501    0.616    
# L1.LIFEEX     45.76286   65.87124   0.695    0.487    
# L2.LIFEEX    -21.43005   34.98964  -0.612    0.540    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 787.3 on 8609 degrees of freedom
#   (4561 observations deleted due to missingness)
# Multiple R-squared:  0.9976,  Adjusted R-squared:  0.9976 
# F-statistic: 7.26e+05 on 5 and 8609 DF,  p-value: < 2.2e-16

# 2 - Ad-hoc computation in lm formula
lm(PCGDP ~ L(PCGDP, 1:2, iso3c, year) + L(LIFEEX, 0:2, iso3c, year), wlddev) %>% summary
# 
# Call:
# lm(formula = PCGDP ~ L(PCGDP, 1:2, iso3c, year) + L(LIFEEX, 0:2, 
#     iso3c, year), data = wlddev)
# 
# Residuals:
#      Min       1Q   Median       3Q      Max 
# -16776.5   -102.2    -17.2     91.5  12277.1 
# 
# Coefficients:
#                                 Estimate Std. Error t value Pr(>|t|)    
# (Intercept)                   -333.93994   61.04617  -5.470 4.62e-08 ***
# L(PCGDP, 1:2, iso3c, year)L1     1.31959    0.01021 129.270  < 2e-16 ***
# L(PCGDP, 1:2, iso3c, year)L2    -0.31707    0.01029 -30.815  < 2e-16 ***
# L(LIFEEX, 0:2, iso3c, year)--  -17.77368   35.47772  -0.501    0.616    
# L(LIFEEX, 0:2, iso3c, year)L1   45.76286   65.87124   0.695    0.487    
# L(LIFEEX, 0:2, iso3c, year)L2  -21.43005   34.98964  -0.612    0.540    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 787.3 on 8609 degrees of freedom
#   (4561 observations deleted due to missingness)
# Multiple R-squared:  0.9976,  Adjusted R-squared:  0.9976 
# F-statistic: 7.26e+05 on 5 and 8609 DF,  p-value: < 2.2e-16

# 3 - Precomputing panel-identifiers
g = qF(wlddev$iso3c, na.exclude = FALSE)
t = qF(wlddev$year, na.exclude = FALSE)
lm(PCGDP ~ L(PCGDP, 1:2, g, t) + L(LIFEEX, 0:2, g, t), wlddev) %>% summary
# 
# Call:
# lm(formula = PCGDP ~ L(PCGDP, 1:2, g, t) + L(LIFEEX, 0:2, g, 
#     t), data = wlddev)
# 
# Residuals:
#      Min       1Q   Median       3Q      Max 
# -16776.5   -102.2    -17.2     91.5  12277.1 
# 
# Coefficients:
#                          Estimate Std. Error t value Pr(>|t|)    
# (Intercept)            -333.93994   61.04617  -5.470 4.62e-08 ***
# L(PCGDP, 1:2, g, t)L1     1.31959    0.01021 129.270  < 2e-16 ***
# L(PCGDP, 1:2, g, t)L2    -0.31707    0.01029 -30.815  < 2e-16 ***
# L(LIFEEX, 0:2, g, t)--  -17.77368   35.47772  -0.501    0.616    
# L(LIFEEX, 0:2, g, t)L1   45.76286   65.87124   0.695    0.487    
# L(LIFEEX, 0:2, g, t)L2  -21.43005   34.98964  -0.612    0.540    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 787.3 on 8609 degrees of freedom
#   (4561 observations deleted due to missingness)
# Multiple R-squared:  0.9976,  Adjusted R-squared:  0.9976 
# F-statistic: 7.26e+05 on 5 and 8609 DF,  p-value: < 2.2e-16

7.4 Fast Differences and Growth Rates

Similarly to flag / L / F, fdiff / D / Dlog computes sequences of suitably lagged / leaded and iterated differences, quasi-differences or (quasi-)log-differences on time series and panel data, and fgrowth / G computes growth rates. Using again the Airpassengers data, the seasonal decomposition shows significant seasonality:

stl(AirPassengers, "periodic") %>% plot
plot of chunk stl

plot of chunk stl

We can test the statistical significance of this seasonality by jointly testing a set of monthly dummies regressed on the differenced series. Given that the seasonal fluctuations are increasing in magnitude, using growth rates for the test seems more appropriate:

f <- qF(cycle(AirPassengers))
fFtest(fgrowth(AirPassengers), f)
#   R-Sq.     DF1     DF2 F-Stat. P-value 
#   0.874      11     131  82.238   0.000

The test shows significant seasonality, accounting for 87% of the variation in the growth rate of the series. We can plot the series together with the ordinary, seasonal (12-month) and deseasonalized monthly growth rate using:

G(AirPassengers, c(0, 1, 12)) %>% cbind(W.G1 = W(G(AirPassengers), f)) %>%
  plot(main = "Growth Rate of Airpassengers")
plot of chunk Gplot

plot of chunk Gplot

It is evident that taking the annualized growth rate also removes the periodic behavior. We can also compute second differences or growth rates of growth rates. Below a plot of the ordinary and annual first and second differences of the data:

D(AirPassengers, c(1,12), 1:2) %>% plot
plot of chunk Dplot

plot of chunk Dplot

In general, both fdiff / D and fgrowth / G can compute sequences of lagged / leaded and iterated differences / growth rates.

# sequence of leaded/lagged and iterated differences
y = 1:10
D(y, -2:2, 1:3)
#       F2D1 F2D2 F2D3 FD1 FD2 FD3 -- D1 D2 D3 L2D1 L2D2 L2D3
#  [1,]   -2    0    0  -1   0   0  1 NA NA NA   NA   NA   NA
#  [2,]   -2    0    0  -1   0   0  2  1 NA NA   NA   NA   NA
#  [3,]   -2    0    0  -1   0   0  3  1  0 NA    2   NA   NA
#  [4,]   -2    0    0  -1   0   0  4  1  0  0    2   NA   NA
#  [5,]   -2    0   NA  -1   0   0  5  1  0  0    2    0   NA
#  [6,]   -2    0   NA  -1   0   0  6  1  0  0    2    0   NA
#  [7,]   -2   NA   NA  -1   0   0  7  1  0  0    2    0    0
#  [8,]   -2   NA   NA  -1   0  NA  8  1  0  0    2    0    0
#  [9,]   NA   NA   NA  -1  NA  NA  9  1  0  0    2    0    0
# [10,]   NA   NA   NA  NA  NA  NA 10  1  0  0    2    0    0

All of this also works for panel data. The code below gives an example:

g = rep(1:2, each = 5)
t = rep(1:5, 2)

D(y, -2:2, 1:2, g, t)
#       F2D1 F2D2 FD1 FD2 -- D1 D2 L2D1 L2D2
#  [1,]   -2    0  -1   0  1 NA NA   NA   NA
#  [2,]   -2   NA  -1   0  2  1 NA   NA   NA
#  [3,]   -2   NA  -1   0  3  1  0    2   NA
#  [4,]   NA   NA  -1  NA  4  1  0    2   NA
#  [5,]   NA   NA  NA  NA  5  1  0    2    0
#  [6,]   -2    0  -1   0  6 NA NA   NA   NA
#  [7,]   -2   NA  -1   0  7  1 NA   NA   NA
#  [8,]   -2   NA  -1   0  8  1  0    2   NA
#  [9,]   NA   NA  -1  NA  9  1  0    2   NA
# [10,]   NA   NA  NA  NA 10  1  0    2    0

Calls to flag / L / F, fdiff / D and fgrowth / G can be nested. In the example below, L.matrix is called on the right-half ob the above sequence:

L(D(y, 0:2, 1:2, g, t), 0:1, g, t)
#       -- L1.-- D1 L1.D1 D2 L1.D2 L2D1 L1.L2D1 L2D2 L1.L2D2
#  [1,]  1    NA NA    NA NA    NA   NA      NA   NA      NA
#  [2,]  2     1  1    NA NA    NA   NA      NA   NA      NA
#  [3,]  3     2  1     1  0    NA    2      NA   NA      NA
#  [4,]  4     3  1     1  0     0    2       2   NA      NA
#  [5,]  5     4  1     1  0     0    2       2    0      NA
#  [6,]  6    NA NA    NA NA    NA   NA      NA   NA      NA
#  [7,]  7     6  1    NA NA    NA   NA      NA   NA      NA
#  [8,]  8     7  1     1  0    NA    2      NA   NA      NA
#  [9,]  9     8  1     1  0     0    2       2   NA      NA
# [10,] 10     9  1     1  0     0    2       2    0      NA

fdiff / D and fgrowth / G also come with a data frame method, making the computation of growth-variables on datasets very easy:

G(GGDC10S, 1L, 1L, ~ Variable + Country, ~ Year, cols = 6:10) %>% head
#   Variable Country Year    G1.AGR    G1.MIN   G1.MAN    G1.PU   G1.CON
# 1       VA     BWA 1960        NA        NA       NA       NA       NA
# 2       VA     BWA 1961        NA        NA       NA       NA       NA
# 3       VA     BWA 1962        NA        NA       NA       NA       NA
# 4       VA     BWA 1963        NA        NA       NA       NA       NA
# 5       VA     BWA 1964        NA        NA       NA       NA       NA
# 6       VA     BWA 1965 -3.524492 -28.57143 38.23529 29.41176 103.9604

The code below estimates a dynamic panel model regressing the 10-year growth rate of GDP per capita on it’s 10-year lagged level and the 10-year growth rate of life-expectancy:

summary(lm(G(PCGDP,10,1,iso3c,year) ~
             L(PCGDP,10,iso3c,year) +
             G(LIFEEX,10,1,iso3c,year), data = wlddev))
# 
# Call:
# lm(formula = G(PCGDP, 10, 1, iso3c, year) ~ L(PCGDP, 10, iso3c, 
#     year) + G(LIFEEX, 10, 1, iso3c, year), data = wlddev)
# 
# Residuals:
#     Min      1Q  Median      3Q     Max 
# -104.32  -21.97   -3.96   13.26 1714.58 
# 
# Coefficients:
#                                 Estimate Std. Error t value Pr(>|t|)    
# (Intercept)                    2.740e+01  1.089e+00  25.168  < 2e-16 ***
# L(PCGDP, 10, iso3c, year)     -3.337e-04  4.756e-05  -7.016 2.49e-12 ***
# G(LIFEEX, 10, 1, iso3c, year)  4.617e-01  1.124e-01   4.107 4.05e-05 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 58.43 on 7113 degrees of freedom
#   (6060 observations deleted due to missingness)
# Multiple R-squared:  0.01132, Adjusted R-squared:  0.01104 
# F-statistic: 40.73 on 2 and 7113 DF,  p-value: < 2.2e-16

To go a step further, the code below regresses the 10-year growth rate of GDP on the 10-year lagged levels and 10-year growth rates of GDP and life expectancy, with country and time-fixed effects projected out using HDW. The standard errors are unreliable without bootstrapping, but this example nicely demonstrates the potential for complex estimations brought by collapse.

moddat <- HDW(L(G(wlddev, c(0, 10), 1, ~iso3c, ~year, 9:10), c(0, 10), ~iso3c, ~year), ~iso3c + qF(year))[-c(1,5)]
summary(lm(HDW.L10G1.PCGDP ~. , moddat))
# 
# Call:
# lm(formula = HDW.L10G1.PCGDP ~ ., data = moddat)
# 
# Residuals:
#     Min      1Q  Median      3Q     Max 
# -807.68  -10.80   -0.64   10.23  779.99 
# 
# Coefficients:
#                        Estimate Std. Error t value Pr(>|t|)    
# (Intercept)           1.907e-15  4.930e-01   0.000 1.000000    
# HDW.L10.PCGDP        -2.500e-03  1.292e-04 -19.347  < 2e-16 ***
# HDW.L10.L10G1.PCGDP  -5.885e-01  1.082e-02 -54.412  < 2e-16 ***
# HDW.L10.LIFEEX        1.056e+00  2.885e-01   3.661 0.000254 ***
# HDW.L10G1.LIFEEX      6.927e-01  1.154e-01   6.002 2.08e-09 ***
# HDW.L10.L10G1.LIFEEX  8.749e-01  1.108e-01   7.899 3.39e-15 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 35.69 on 5235 degrees of freedom
# Multiple R-squared:  0.4029,  Adjusted R-squared:  0.4023 
# F-statistic: 706.4 on 5 and 5235 DF,  p-value: < 2.2e-16

One of the inconveniences of the above computations is that it requires declaring the panel-identifiers iso3c and year again and again for each function. A great remedy here are the plm classes pseries and pdata.frame which collapse was built to support. This shows how one could run the same regression with plm:

pwlddev <- plm::pdata.frame(wlddev, index = c("iso3c", "year"))
moddat <- HDW(L(G(pwlddev, c(0, 10), 1, 9:10), c(0, 10)))[-c(1,5)]
summary(lm(HDW.L10G1.PCGDP ~. , moddat))
# 
# Call:
# lm(formula = HDW.L10G1.PCGDP ~ ., data = moddat)
# 
# Residuals:
#     Min      1Q  Median      3Q     Max 
# -677.61  -12.45   -1.02   10.86  913.22 
# 
# Coefficients:
#                        Estimate Std. Error t value Pr(>|t|)    
# (Intercept)           0.1456192  0.5187976   0.281 0.778962    
# HDW.L10.PCGDP        -0.0022910  0.0001253 -18.291  < 2e-16 ***
# HDW.L10.L10G1.PCGDP  -0.5859896  0.0113538 -51.612  < 2e-16 ***
# HDW.L10.LIFEEX        0.8701877  0.2456255   3.543 0.000399 ***
# HDW.L10G1.LIFEEX      0.6910533  0.1132028   6.105 1.11e-09 ***
# HDW.L10.L10G1.LIFEEX  0.8990853  0.1068241   8.417  < 2e-16 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Residual standard error: 37.51 on 5235 degrees of freedom
#   (7935 observations deleted due to missingness)
# Multiple R-squared:  0.3784,  Adjusted R-squared:  0.3778 
# F-statistic: 637.4 on 5 and 5235 DF,  p-value: < 2.2e-16

To learn more about the integration of collapse and plm, consult the corresponding vignette.

8. List Processing and a Panel-VAR Example

collapse also provides an ensemble of list-processing functions that grew out of a necessity of working with complex nested lists of data objects. The example provided in this section is also somewhat complex, but it demonstrates the utility of these functions while also providing a nice data-transformation task.

When summarizing the GGDC10S data in section 1, it was evident that certain sectors have a high share of economic activity in almost all countries in the sample. This prompts the question of whether there exist common patterns in the interaction of these important sectors across countries. One way to empirically study this could be through a (Structural) Panel-Vector-Autoregression (PSVAR) in value added with the 6 most important sectors (excluding government): Agriculture, manufacturing, wholesale and retail trade, construction, transport and storage and finance and real estate.

For this we will use the vars package6. Since vars natively does not support panel-VAR, we need to create the central varest object manually and then run the SVAR function to impose identification restrictions. We start with exploring and harmonizing the data:

library(vars)
# The 6 most important non-government sectors (see section 1)
sec <- c("AGR", "MAN", "WRT", "CON", "TRA", "FIRE")
# This creates a data.frame containing the value added of the 6 most important non-government sectors
data <- fsubset(GGDC10S, Variable == "VA", c("Country", "Year", sec)) %>%
  na_omit(cols = sec)
# Let's look at the log VA in agriculture across countries:
AGRmat <- psmat(data, AGR ~ Country, ~ Year, transpose = TRUE) %>% log   # Converting to panel series matrix
plot(AGRmat)
plot of chunk AGRmat

plot of chunk AGRmat

The plot shows quite some heterogeneity both in the levels (VA is in local currency) and in trend growth rates. In the panel-VAR estimation we are only really interested in the sectoral relationships within countries. Thus we need to harmonize this sectoral data further. One way would be taking growth rates or log-differences of the data, but VAR’s are usually estimated in levels unless the data are cointegrated (and value added series do not, in general, exhibit unit-root behavior). Thus to harmonize the data further we opt for subtracting a country-sector specific cubic trend from the data in logs:

# Subtracting a country specific cubic growth trend
AGRmat <- dapply(AGRmat, fhdwithin, poly(seq_row(AGRmat), 3), fill = TRUE)

plot(AGRmat)
plot of chunk AGRmatplot

plot of chunk AGRmatplot

This seems to have done a decent job in curbing most of the heterogeneity. Some series however have a high variance around that cubic trend. Therefore a final step is to standardize the data to bring the variances in line:

# Standardizing the cubic log-detrended data
AGRmat <- fscale(AGRmat)
plot(AGRmat)
plot of chunk AGRmatplot2

plot of chunk AGRmatplot2

Now this looks pretty good, and is about the most we can do in terms of harmonization without differencing the data. The code below applies these transformations to all sectors:

# Taking logs
settransformv(data, 3:8, log)
# Projecting out country FE and cubic trends from complete cases
gv(data, 3:8) <- HDW(data, ~ qF(Country)*poly(Year, 3), fill = TRUE)
# Scaling
gv(data, 3:8) <- STD(data, ~ Country, cols = 3:8, keep.by = FALSE)

# Check the plot
psmat(data, ~ Country, ~ Year) %>% plot
plot of chunk psmatplot2

plot of chunk psmatplot2

Since the data is annual, let us estimate the Panel-VAR with one lag:

# This adds one lag of all series to the data
add_vars(data) <- L(data, 1, ~ Country, ~ Year, keep.ids = FALSE)
# This removes missing values from all but the first row and drops identifier columns (vars is made for time series without gaps)
data <- rbind(ss(data, 1, -(1:2)), na_omit(ss(data, -1, -(1:2))))
head(data)
#   STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE L1.STD.HDW.AGR
# 1  0.65713943   2.2350584    1.946383 -0.03574399   1.0877811    1.0476507             NA
# 2 -0.14377115   1.8693570    1.905081  1.23225734   1.0542315    0.9105622     0.65713943
# 3 -0.09209878  -0.8212004    1.997253 -0.01783824   0.6718465    0.6134260    -0.14377115
# 4 -0.25213869  -1.7830320   -1.970855 -2.68332505  -1.8475551    0.4382902    -0.09209878
# 5 -0.31623401  -4.2931567   -1.822211 -2.75551916  -0.7066491   -2.1982640    -0.25213869
# 6 -0.72691916  -1.3219387   -2.079333 -0.12148295  -1.1398220   -2.2230474    -0.31623401
#   L1.STD.HDW.MAN L1.STD.HDW.WRT L1.STD.HDW.CON L1.STD.HDW.TRA L1.STD.HDW.FIRE
# 1             NA             NA             NA             NA              NA
# 2      2.2350584       1.946383    -0.03574399      1.0877811       1.0476507
# 3      1.8693570       1.905081     1.23225734      1.0542315       0.9105622
# 4     -0.8212004       1.997253    -0.01783824      0.6718465       0.6134260
# 5     -1.7830320      -1.970855    -2.68332505     -1.8475551       0.4382902
# 6     -4.2931567      -1.822211    -2.75551916     -0.7066491      -2.1982640

Having prepared the data, the code below estimates the panel-VAR using lm and creates the varest object:

# saving the names of the 6 sectors
nam <- names(data)[1:6]

pVAR <- list(varresult = setNames(lapply(seq_len(6), function(i)    # list of 6 lm's each regressing
               lm(as.formula(paste0(nam[i], "~ -1 + . ")),          # the sector on all lags of
               get_vars(data, c(i, 7:fncol(data))))), nam),         # itself and other sectors, removing the missing first row
             datamat = ss(data, -1),                                # The full data containing levels and lags of the sectors, removing the missing first row
             y = do.call(cbind, get_vars(data, 1:6)),               # Only the levels data as matrix
             type = "none",                                         # No constant or tend term: We harmonized the data already
             p = 1,                                                 # The lag-order
             K = 6,                                                 # The number of variables
             obs = fnrow(data)-1,                                   # The number of non-missing obs
             totobs = fnrow(data),                                  # The total number of obs
             restrictions = NULL,
             call = quote(VAR(y = data)))

class(pVAR) <- "varest"

The significant serial-correlation test below suggests that the panel-VAR with one lag is ill-identified, but the sample size is also quite large so the test is prone to reject, and the test is likely also still picking up remaining cross-sectional heterogeneity. For the purposes of this vignette this shall not bother us.

serial.test(pVAR)
# 
#   Portmanteau Test (asymptotic)
# 
# data:  Residuals of VAR object pVAR
# Chi-squared = 1680.8, df = 540, p-value < 2.2e-16

By default the VAR is identified using a Choleski ordering of the direct impact matrix in which the first variable (here Agriculture) is assumed to not be directly impacted by any other sector in the current period, and this descends down to the last variable (Finance and Real Estate), which is assumed to be impacted by all other sectors in the current period. For structural identification it is usually necessary to impose restrictions on the direct impact matrix in line with economic theory. It is difficult to conceive theories on the average worldwide interaction of broad economic sectors, but to aid identification we will compute the correlation matrix in growth rates and restrict the lowest coefficients to be 0, which should be better than just imposing a random Choleski ordering.

# This computes the pairwise correlations between standardized sectoral growth rates across countries
corr <- fsubset(GGDC10S, Variable == "VA") %>%   # Subset rows: Only VA
           fgroup_by(Country) %>%                # Group by country
                get_vars(sec) %>%                # Select the 6 sectors
                   fgrowth %>%                   # Compute Sectoral growth rates (a time-variable can be passed, but not necessary here as the data is ordered)
                      fscale %>%                 # Scale and center (i.e. standardize)
                         pwcor                   # Compute Pairwise correlations

corr
#        AGR   MAN   WRT   CON   TRA  FIRE
# AGR     1    .55   .59   .39   .52   .41
# MAN    .55    1    .67   .54   .65   .48
# WRT    .59   .67    1    .56   .66   .52
# CON    .39   .54   .56    1    .53   .46
# TRA    .52   .65   .66   .53    1    .51
# FIRE   .41   .48   .52   .46   .51    1

# We need to impose K*(K-1)/2 = 15 (with K = 6 variables) restrictions for identification
corr[corr <= sort(corr)[15]] <- 0
corr
#        AGR   MAN   WRT   CON   TRA  FIRE
# AGR     1    .55   .59   .00   .00   .00
# MAN    .55    1    .67   .54   .65   .00
# WRT    .59   .67    1    .56   .66   .00
# CON    .00   .54   .56    1    .00   .00
# TRA    .00   .65   .66   .00    1    .00
# FIRE   .00   .00   .00   .00   .00    1

# The rest is unknown (i.e. will be estimated)
corr[corr > 0 & corr < 1] <- NA

# Using a diagonal shock vcov matrix (standard assumption for SVAR)
Bmat <- diag(6)
diag(Bmat) <- NA


# This estimates the Panel-SVAR using Maximum Likelihood:
pSVAR <- SVAR(pVAR, Amat = unclass(corr), Bmat = Bmat, estmethod = "direct")
pSVAR
# 
# SVAR Estimation Results:
# ======================== 
# 
# 
# Estimated A matrix:
#              STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE
# STD.HDW.AGR       1.0000    -0.59223     0.51301      0.0000     0.00000            0
# STD.HDW.MAN      -0.2547     1.00000    -0.07819     -0.1711     0.14207            0
# STD.HDW.WRT      -0.3924    -0.56875     1.00000     -0.0135    -0.01391            0
# STD.HDW.CON       0.0000     0.02595    -0.18541      1.0000     0.00000            0
# STD.HDW.TRA       0.0000    -0.03321    -0.05370      0.0000     1.00000            0
# STD.HDW.FIRE      0.0000     0.00000     0.00000      0.0000     0.00000            1
# 
# Estimated B matrix:
#              STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE
# STD.HDW.AGR        0.678      0.0000      0.0000      0.0000      0.0000       0.0000
# STD.HDW.MAN        0.000      0.6248      0.0000      0.0000      0.0000       0.0000
# STD.HDW.WRT        0.000      0.0000      0.4155      0.0000      0.0000       0.0000
# STD.HDW.CON        0.000      0.0000      0.0000      0.5028      0.0000       0.0000
# STD.HDW.TRA        0.000      0.0000      0.0000      0.0000      0.5593       0.0000
# STD.HDW.FIRE       0.000      0.0000      0.0000      0.0000      0.0000       0.6475

Now this object is quite involved, which brings us to the actual subject of this section:

# psVAR$var$varresult is a list containing the 6 linear models fitted above, it is not displayed in full here.
str(pSVAR, give.attr = FALSE, max.level = 3)
# List of 13
#  $ A      : num [1:6, 1:6] 1 -0.255 -0.392 0 0 ...
#  $ Ase    : num [1:6, 1:6] 0 0 0 0 0 0 0 0 0 0 ...
#  $ B      : num [1:6, 1:6] 0.678 0 0 0 0 ...
#  $ Bse    : num [1:6, 1:6] 0 0 0 0 0 0 0 0 0 0 ...
#  $ LRIM   : NULL
#  $ Sigma.U: num [1:6, 1:6] 43.898 24.88 23.941 4.873 0.661 ...
#  $ LR     :List of 5
#   ..$ statistic: Named num 1130
#   ..$ parameter: Named num 1
#   ..$ p.value  : Named num 0
#   ..$ method   : chr "LR overidentification"
#   ..$ data.name: symbol data
#  $ opt    :List of 5
#   ..$ par        : num [1:20] -0.2547 -0.3924 -0.5922 -0.5688 0.0259 ...
#   ..$ value      : num 10924
#   ..$ counts     : Named int [1:2] 501 NA
#   ..$ convergence: int 1
#   ..$ message    : NULL
#  $ start  : num [1:20] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
#  $ type   : chr "AB-model"
#  $ var    :List of 10
#   ..$ varresult   :List of 6
#   .. ..$ STD.HDW.AGR :List of 13
#   .. ..$ STD.HDW.MAN :List of 13
#   .. ..$ STD.HDW.WRT :List of 13
#   .. ..$ STD.HDW.CON :List of 13
#   .. ..$ STD.HDW.TRA :List of 13
#   .. ..$ STD.HDW.FIRE:List of 13
#   ..$ datamat     :'data.frame':  2060 obs. of  12 variables:
#   .. ..$ STD.HDW.AGR    : num [1:2060] -0.1438 -0.0921 -0.2521 -0.3162 -0.7269 ...
#   .. ..$ STD.HDW.MAN    : num [1:2060] 1.869 -0.821 -1.783 -4.293 -1.322 ...
#   .. ..$ STD.HDW.WRT    : num [1:2060] 1.91 2 -1.97 -1.82 -2.08 ...
#   .. ..$ STD.HDW.CON    : num [1:2060] 1.2323 -0.0178 -2.6833 -2.7555 -0.1215 ...
#   .. ..$ STD.HDW.TRA    : num [1:2060] 1.054 0.672 -1.848 -0.707 -1.14 ...
#   .. ..$ STD.HDW.FIRE   : num [1:2060] 0.911 0.613 0.438 -2.198 -2.223 ...
#   .. ..$ L1.STD.HDW.AGR : num [1:2060] 0.6571 -0.1438 -0.0921 -0.2521 -0.3162 ...
#   .. ..$ L1.STD.HDW.MAN : num [1:2060] 2.235 1.869 -0.821 -1.783 -4.293 ...
#   .. ..$ L1.STD.HDW.WRT : num [1:2060] 1.95 1.91 2 -1.97 -1.82 ...
#   .. ..$ L1.STD.HDW.CON : num [1:2060] -0.0357 1.2323 -0.0178 -2.6833 -2.7555 ...
#   .. ..$ L1.STD.HDW.TRA : num [1:2060] 1.088 1.054 0.672 -1.848 -0.707 ...
#   .. ..$ L1.STD.HDW.FIRE: num [1:2060] 1.048 0.911 0.613 0.438 -2.198 ...
#   ..$ y           : num [1:2061, 1:6] 0.6571 -0.1438 -0.0921 -0.2521 -0.3162 ...
#   ..$ type        : chr "none"
#   ..$ p           : num 1
#   ..$ K           : num 6
#   ..$ obs         : num 2060
#   ..$ totobs      : int 2061
#   ..$ restrictions: NULL
#   ..$ call        : language VAR(y = data)
#  $ iter   : Named int 501
#  $ call   : language SVAR(x = pVAR, estmethod = "direct", Amat = unclass(corr), Bmat = Bmat)

8.1 List Search and Identification

When dealing with such a list-like object, we might be interested in its complexity by measuring the level of nesting. This can be done with ldepth:

# The list-tree of this object has 5 levels of nesting
ldepth(pSVAR)
# [1] 5

# This data has a depth of 1, thus this dataset does not contain list-columns
ldepth(data)
# [1] 1

Further we might be interested in knowing whether this list-object contains non-atomic elements like call, terms or formulas. The function is.regular in the collapse package checks if an object is atomic or list-like, and the recursive version is_unlistable checks whether all objects in a nested structure are atomic or list-like:

# Is this object composed only of atomic elements e.g. can it be unlisted?
is_unlistable(pSVAR)
# [1] FALSE

Evidently this object is not unlistable, from viewing its structure we know that it contains several call and terms objects. We might also want to know if this object saves some kind of residuals or fitted values. This can be done using has_elem, which also supports regular expression search of element names:

# Does this object contain an element with "fitted" in its name?
has_elem(pSVAR, "fitted", regex = TRUE)
# [1] TRUE

# Does this object contain an element with "residuals" in its name?
has_elem(pSVAR, "residuals", regex = TRUE)
# [1] TRUE

We might also want to know whether the object contains some kind of data-matrix. This can be checked by calling:

# Is there a matrix stored in this object?
has_elem(pSVAR, is.matrix)
# [1] TRUE

These functions can sometimes be helpful in exploring objects. A much greater advantage of having functions to search and check lists is the ability to write more complex programs with them (which will not be demonstrated here).

8.2 List Subsetting

Having gathered some information about the pSVAR object, this section introduces several extractor functions to pull out elements from such lists: get_elem can be used to pull out elements from lists in a simplified format7.

# This is the path to the residuals from a single equation
str(pSVAR$var$varresult$STD.HDW.AGR$residuals)
#  Named num [1:2060] -0.7234 -0.1962 -0.1993 0.0739 -0.1418 ...
#  - attr(*, "names")= chr [1:2060] "2" "3" "4" "5" ...

# get_elem gets the residuals from all 6 equations and puts them in a top-level list
resid <- get_elem(pSVAR, "residuals")
str(resid, give.attr = FALSE)
# List of 6
#  $ STD.HDW.AGR : Named num [1:2060] -0.7234 -0.1962 -0.1993 0.0739 -0.1418 ...
#  $ STD.HDW.MAN : Named num [1:2060] 0.363 -1.989 -1.167 -3.082 1.474 ...
#  $ STD.HDW.WRT : Named num [1:2060] 0.37 0.628 -3.054 -0.406 -0.384 ...
#  $ STD.HDW.CON : Named num [1:2060] 1.035 -1.093 -2.62 -0.611 2.307 ...
#  $ STD.HDW.TRA : Named num [1:2060] 0.1481 -0.2599 -2.2361 0.8619 -0.0915 ...
#  $ STD.HDW.FIRE: Named num [1:2060] -0.11396 -0.33092 0.11754 -2.10521 -0.00968 ...

# Quick conversion to matrix and plotting
qM(resid) %>% plot.ts(main = "Panel-VAR Residuals")
plot of chunk PVARplot

plot of chunk PVARplot

Similarly, we could pull out and plot the fitted values:

# Regular expression search and retrieval of fitted values
get_elem(pSVAR, "^fi", regex = TRUE) %>% qM %>%
  plot.ts(main = "Panel-VAR Fitted Values")
plot of chunk PVARfittedplot

plot of chunk PVARfittedplot

Below the main quantities of interest in SVAR analysis are computed: The impulse response functions (IRF’s) and forecast error variance decompositions (FEVD’s):

# This computes orthogonalized impulse response functions
pIRF <- irf(pSVAR)
# This computes the forecast error variance decompositions
pFEVD <- fevd(pSVAR)

The pIRF object contains the IRF’s with lower and upper confidence bounds and some atomic elements providing information about the object:

# See the structure of a vars IRF object:
str(pIRF, give.attr = FALSE)
# List of 11
#  $ irf       :List of 6
#   ..$ STD.HDW.AGR : num [1:11, 1:6] 0.611 0.399 0.268 0.185 0.132 ...
#   ..$ STD.HDW.MAN : num [1:11, 1:6] 0.1774 0.1549 0.134 0.1142 0.0959 ...
#   ..$ STD.HDW.WRT : num [1:11, 1:6] -0.1807 -0.1071 -0.0647 -0.0402 -0.0259 ...
#   ..$ STD.HDW.CON : num [1:11, 1:6] 0.0215 0.0383 0.0442 0.0438 0.0403 ...
#   ..$ STD.HDW.TRA : num [1:11, 1:6] -0.02595 -0.01257 -0.00721 -0.00511 -0.00421 ...
#   ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 0.0122 0.0147 0.0132 0.0104 ...
#  $ Lower     :List of 6
#   ..$ STD.HDW.AGR : num [1:11, 1:6] 0.1137 -0.0144 -0.0393 -0.0446 -0.0439 ...
#   ..$ STD.HDW.MAN : num [1:11, 1:6] -0.6474 -0.3434 -0.2069 -0.125 -0.0734 ...
#   ..$ STD.HDW.WRT : num [1:11, 1:6] -0.659 -0.427 -0.311 -0.236 -0.189 ...
#   ..$ STD.HDW.CON : num [1:11, 1:6] -0.721 -0.417 -0.258 -0.183 -0.123 ...
#   ..$ STD.HDW.TRA : num [1:11, 1:6] -0.4161 -0.2568 -0.169 -0.1231 -0.0894 ...
#   ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 -0.0157 -0.022 -0.0227 -0.0211 ...
#  $ Upper     :List of 6
#   ..$ STD.HDW.AGR : num [1:11, 1:6] 1.218 0.801 0.565 0.389 0.275 ...
#   ..$ STD.HDW.MAN : num [1:11, 1:6] 0.906 0.601 0.439 0.328 0.239 ...
#   ..$ STD.HDW.WRT : num [1:11, 1:6] 0.846 0.601 0.428 0.319 0.239 ...
#   ..$ STD.HDW.CON : num [1:11, 1:6] 0.716 0.514 0.4 0.305 0.234 ...
#   ..$ STD.HDW.TRA : num [1:11, 1:6] 0.2866 0.21 0.1591 0.1207 0.0899 ...
#   ..$ STD.HDW.FIRE: num [1:11, 1:6] 0 0.0363 0.0471 0.0461 0.0405 ...
#  $ response  : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ...
#  $ impulse   : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ...
#  $ ortho     : logi TRUE
#  $ cumulative: logi FALSE
#  $ runs      : num 100
#  $ ci        : num 0.05
#  $ boot      : logi TRUE
#  $ model     : chr "svarest"

We could separately access the top-level atomic or list elements using atomic_elem or list_elem:

# Pool-out top-level atomic elements in the list
str(atomic_elem(pIRF))
# List of 8
#  $ response  : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ...
#  $ impulse   : chr [1:6] "STD.HDW.AGR" "STD.HDW.MAN" "STD.HDW.WRT" "STD.HDW.CON" ...
#  $ ortho     : logi TRUE
#  $ cumulative: logi FALSE
#  $ runs      : num 100
#  $ ci        : num 0.05
#  $ boot      : logi TRUE
#  $ model     : chr "svarest"

There are also recursive versions of atomic_elem and list_elem named reg_elem and irreg_elem which can be used to split nested lists into the atomic and non-atomic parts. These are not covered in this vignette.

8.3 Recursive Apply and Unlisting in 2D

vars supplies simple plot methods for IRF and FEVD objects using base graphics. In this section we however want to generate nicer and more compact plots using ggplot2, and also compute some statistics on the IRF data. Starting with the latter, the code below sums the 10-period impulse response coefficients of each sector in response to each sectoral impulse and stores them in a data frame:

# Computing the cumulative impact after 10 periods
list_elem(pIRF) %>%                            # Pull out the sublist elements containing the IRF coefficients + CI's
  rapply2d(function(x) round(fsum(x), 2)) %>%  # Recursively apply the column-sums to coefficient matrices (could also use colSums)
  unlist2d(c("Type", "Impulse"))               # Recursively row-bind the result to a data.frame and add identifier columns
#     Type      Impulse STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE
# 1    irf  STD.HDW.AGR        1.92        1.08        1.68        0.83        0.72         0.54
# 2    irf  STD.HDW.MAN        0.98        2.22        2.12        1.09        0.97         1.05
# 3    irf  STD.HDW.WRT       -0.47       -0.27        0.65        0.17        0.03        -0.02
# 4    irf  STD.HDW.CON        0.33        0.39        0.34        2.00        0.55         0.38
# 5    irf  STD.HDW.TRA       -0.07       -0.11       -0.24       -0.30        1.31        -0.20
# 6    irf STD.HDW.FIRE        0.07       -0.07        0.02       -0.09       -0.06         1.84
# 7  Lower  STD.HDW.AGR       -0.18       -2.08       -3.14       -0.68       -2.46        -0.68
# 8  Lower  STD.HDW.MAN       -1.52        0.38       -1.30       -0.86       -1.82         0.12
# 9  Lower  STD.HDW.WRT       -2.38       -2.65       -0.22       -2.68       -2.01        -1.20
# 10 Lower  STD.HDW.CON       -2.01       -2.47       -2.16        0.53       -1.68        -0.80
# 11 Lower  STD.HDW.TRA       -1.32       -1.34       -1.17       -1.64        0.31        -0.69
# 12 Lower STD.HDW.FIRE       -0.16       -0.26       -0.16       -0.27       -0.20         0.96
# 13 Upper  STD.HDW.AGR        3.97        3.18        3.21        3.69        2.61         1.58
# 14 Upper  STD.HDW.MAN        3.19        3.85        3.00        3.60        3.05         1.78
# 15 Upper  STD.HDW.WRT        3.06        2.66        4.41        2.49        3.31         1.47
# 16 Upper  STD.HDW.CON        2.85        3.30        3.20        3.88        2.59         1.76
# 17 Upper  STD.HDW.TRA        1.08        1.93        1.76        0.72        2.82         0.63
# 18 Upper STD.HDW.FIRE        0.30        0.15        0.30        0.12        0.18         2.21

The function rapply2d used here is very similar to base::rapply, with the difference that the result is not simplified / unlisted by default and that rapply2d will treat data frames like atomic objects and apply functions to them. unlist2d is an efficient generalization of base::unlist to 2-dimensions, or one could also think of it as a recursive generalization of do.call(rbind, ...). It efficiently unlists nested lists of data objects and creates a data frame with identifier columns for each level of nesting on the left, and the content of the list in columns on the right.

The above cumulative coefficients suggest that Agriculture responds mostly to it’s own shock, and a bit to shocks in Manufacturing and Wholesale and Retail Trade. Similar patters can be observed for Manufacturing and Wholesale and Retail Trade. Thus these three sectors seem to be interlinked in most countries. The remaining three sectors are mostly affected by their own dynamics, but also by Agriculture and Manufacturing.

Let us use ggplot2 to create nice compact plots of the IRF’s and FEVD’s. For this task unlist2d will again be extremely helpful in creating the data frame representation required. Starting with the IRF’s, we will discard the upper and lower bounds and just use the impulses:

# This binds the matrices after adding integer row-names to them to a data.table

data <- pIRF$irf %>%                      # Get only the coefficient matrices, discard the confidence bounds
           unlist2d(idcols = "Impulse",   # Recursive unlisting to data.table creating a factor id-column
                    row.names = "Time",   # and saving generated rownames in a variable called 'Time'
                    id.factor = TRUE,     # -> Create Id column ('Impulse') as factor
                    DT = TRUE)            # -> Output as data.table (default is data.frame)

head(data, 3)
#        Impulse  Time STD.HDW.AGR STD.HDW.MAN STD.HDW.WRT STD.HDW.CON STD.HDW.TRA STD.HDW.FIRE
#         <fctr> <int>       <num>       <num>       <num>       <num>       <num>        <num>
# 1: STD.HDW.AGR     1   0.6113132   0.1896711   0.3488940  0.05976606  0.02503336   0.00000000
# 2: STD.HDW.AGR     2   0.3986337   0.1892803   0.3014961  0.09430567  0.07263670   0.03669857
# 3: STD.HDW.AGR     3   0.2676944   0.1654161   0.2491999  0.10769335  0.09330830   0.06042380

data <- melt(data, 1:2)                   # Using data.table's melt
head(data, 3)
#        Impulse  Time    variable     value
#         <fctr> <int>      <fctr>     <num>
# 1: STD.HDW.AGR     1 STD.HDW.AGR 0.6113132
# 2: STD.HDW.AGR     2 STD.HDW.AGR 0.3986337
# 3: STD.HDW.AGR     3 STD.HDW.AGR 0.2676944

# Here comes the plot:
  ggplot(data, aes(x = Time, y = value, color = Impulse)) +
    geom_line(size = I(1)) + geom_hline(yintercept = 0) +
    labs(y = NULL, title = "Orthogonal Impulse Response Functions") +
    scale_color_manual(values = rainbow(6)) +
    facet_wrap(~ variable) +
    theme_light(base_size = 14) +
    scale_x_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+
    scale_y_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+
    theme(axis.text = element_text(colour = "black"),
      plot.title = element_text(hjust = 0.5),
      strip.background = element_rect(fill = "white", colour = NA),
      strip.text = element_text(face = "bold", colour = "grey30"),
      axis.ticks = element_line(colour = "black"),
      panel.border = element_rect(colour = "black"))
plot of chunk IRFplot

plot of chunk IRFplot

To round things off, below we do the same thing for the FEVD’s:

data <- unlist2d(pFEVD, idcols = "variable", row.names = "Time", id.factor = TRUE, DT = TRUE) %>%
            melt(c("variable", "Time"), variable.name = "Sector")
head(data, 3)
#       variable  Time      Sector     value
#         <fctr> <int>      <fctr>     <num>
# 1: STD.HDW.AGR     1 STD.HDW.AGR 0.8513029
# 2: STD.HDW.AGR     2 STD.HDW.AGR 0.8385913
# 3: STD.HDW.AGR     3 STD.HDW.AGR 0.8264789

# Here comes the plot:
  ggplot(data, aes(x = Time, y = value, fill = Sector)) +
    geom_area(position = "fill", alpha = 0.8) +
    labs(y = NULL, title = "Forecast Error Variance Decompositions") +
    scale_fill_manual(values = rainbow(6)) +
    facet_wrap(~ set_class(variable, "factor")) +
    theme_linedraw(base_size = 14) +
    scale_x_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+
    scale_y_continuous(breaks = scales::pretty_breaks(n=7), expand = c(0, 0))+
    theme(plot.title = element_text(hjust = 0.5),
      strip.background = element_rect(fill = "white", colour = NA),
      strip.text = element_text(face = "bold", colour = "grey30"))
plot of chunk FEVDplot

plot of chunk FEVDplot

Both the IRF’s and the FEVD’s show that Agriculture, Manufacturing and Wholesale and Retail Trade are broadly interlinked, even in the short-run, and that Agriculture and Manufacturing explain some of the variation in Construction, Transport and Finance at longer horizons. Of course the identification strategy used for this example was not really structural or theory based. A better strategy could be to aggregate the World Input-Output Database and use those shares for identification (which would be another very nice collapse exercise, but not for this vignette).

Going Further

To learn more about collapse, just examine the documentation help("collapse-documentation") which is organized, extensive and contains lots of examples.

References

Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). “Patterns of Structural Change in Developing Countries.†. In J. Weiss, & M. Tribe (Eds.), Routledge Handbook of Industry and Development. (pp. 65-83). Routledge.

Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†Econometrica 46 (1): 69–85.


  1. in the Within data, the overall mean was added back after subtracting out country means, to preserve the level of the data, see also section 6.5.↩︎

  2. You may wonder why with weights the standard-deviations in the group ‘4.0.1’ are 0 while they were NA without weights. This stirs from the fact that group ‘4.0.1’ only has one observation, and in the Bessel-corrected estimate of the variance there is a n - 1 in the denominator which becomes 0 if n = 1 and division by 0 becomes NA in this case (fvar was designed that way to match the behavior or stats::var). In the weighted version the denominator is sum(w) - 1, and if sum(w) is not 1, then the denominator is not 0. The standard-deviation however is still 0 because the sum of squares in the numerator is 0. In other words this means that in a weighted aggregation singleton-groups are not treated like singleton groups unless the corresponding weight is 1.↩︎

  3. I.e. the most frequent value. By default a first-mode is computed.↩︎

  4. If the list is unnamed, collap uses all.vars(substitute(list(FUN1, FUN2, ...))) to get the function names. Alternatively it is also possible to pass a character vector of function names.↩︎

  5. BY.grouped_df is probably only useful together with the expand.wide = TRUE argument which dplyr does not have, because otherwise dplyr’s summarise and mutate are substantially faster on larger data.↩︎

  6. I noticed there is a panelvar package, but I am more familiar with vars and panelvar can be pretty slow in my experience. We also have about 50 years of data here, so dynamic panel bias is not a big issue.↩︎

  7. The vars package also provides convenient extractor functions for some quantities, but get_elem of course works in a much broader range of contexts.↩︎

collapse/inst/doc/collapse_for_tidyverse_users.html0000644000176200001440000016557515202627533022503 0ustar liggesusers collapse for tidyverse Users

collapse for tidyverse Users

Sebastian Krantz

2026-05-18

collapse is a C/C++ based package for data transformation and statistical computing in R that aims to enable greater performance and statistical complexity in data manipulation tasks and offers a stable, class-agnostic, and lightweight API. It is part of the core fastverse, a suite of lightweight packages with similar objectives.

The tidyverse set of packages provides a rich, expressive, and consistent syntax for data manipulation in R centering on the tibble object and tidy data principles (each observation is a row, each variable is a column).

collapse fully supports the tibble object and provides many tidyverse-like functions for data manipulation. It can thus be used to write tidyverse-like data manipulation code that, thanks to low-level vectorization of many statistical operations and optimized R code, typically runs much faster than native tidyverse code, in addition to being much more lightweight in dependencies.

Its aim is not to create a faster tidyverse, i.e., it does not implements all aspects of the rich tidyverse grammar or changes to it1, and also takes inspiration from other leading data manipulation libraries to serve broad aims of performance, parsimony, complexity, and robustness in data manipulation for R.

Namespace and Global Options

collapse data manipulation functions familiar to tidyverse users include fselect, fgroup_by, fsummarise, fmutate, across, frename, fslice, and fcount. Other functions like fsubset, ftransform, and get_vars are inspired by base R, while again other functions like join, pivot, roworder, colorder, rowbind, etc. are inspired by other data manipulation libraries such as data.table and polars.

By virtue of the f- prefixes, the collapse namespace has no conflicts with the tidyverse, and these functions can easily be substituted in a tidyverse workflow.

R users willing to replace the tidyverse have the additional option to mask functions and eliminate the prefixes with set_collapse. For example

library(collapse)
set_collapse(mask = "manip") # version >= 2.0.0 

makes available functions select, group_by, summarise, mutate, rename, count, subset, slice, and transform in the collapse namespace and detaches and re-attaches the package, such that the following code is executed by collapse:

mtcars |>
  subset(mpg > 11) |>
  group_by(cyl, vs, am) |>
  summarise(across(c(mpg, carb, hp), mean), 
            qsec_wt = weighted.mean(qsec, wt))
#   cyl vs am      mpg     carb        hp  qsec_wt
# 1   4  0  1 26.00000 2.000000  91.00000 16.70000
# 2   4  1  0 22.90000 1.666667  84.66667 21.04028
# 3   4  1  1 28.37143 1.428571  80.57143 18.75509
# 4   6  0  1 20.56667 4.666667 131.66667 16.33306
# 5   6  1  0 19.12500 2.500000 115.25000 19.21275
# 6   8  0  0 15.98000 2.900000 191.00000 17.01239
# 7   8  0  1 15.40000 6.000000 299.50000 14.55297

Note that the correct documentation still needs to be called with prefixes, i.e., ?fsubset. See ?set_collapse for further options to the package, which also includes optimization options such as nthreads, na.rm, sort, and stable.algo. Note also that if you use collapse’s namespace masking, you can use fastverse::fastverse_conflicts() to check for namespace conflicts with other packages.

Using the Fast Statistical Functions

A key feature of collapse is that it not only provides functions for data manipulation, but also a full set of statistical functions and algorithms to speed up statistical calculations and perform more complex statistical operations (e.g. involving weights or time series data).

Notably among these, the Fast Statistical Functions is a consistent set of S3-generic statistical functions providing fully vectorized statistical operations in R.

Specifically, operations such as calculating the mean via the S3 generic fmean() function are vectorized across columns and groups and may also involve weights or transformations of the original data:

fmean(mtcars$mpg)     # Vector
# [1] 20.09063
fmean(EuStockMarkets) # Matrix
#      DAX      SMI      CAC     FTSE 
# 2530.657 3376.224 2227.828 3565.643
fmean(mtcars)         # Data Frame
#        mpg        cyl       disp         hp       drat         wt       qsec         vs         am 
#  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750   0.437500   0.406250 
#       gear       carb 
#   3.687500   2.812500

fmean(mtcars$mpg, w = mtcars$wt)  # Weighted mean
# [1] 18.54993
fmean(mtcars$mpg, g = mtcars$cyl) # Grouped mean
#        4        6        8 
# 26.66364 19.74286 15.10000
fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt)   # Weighted group mean
#        4        6        8 
# 25.93504 19.64578 14.80643
fmean(mtcars[5:10], g = mtcars$cyl, w = mtcars$wt) # Of data frame
#       drat       wt     qsec        vs        am     gear
# 4 4.031264 2.414750 19.38044 0.9148868 0.6498031 4.047250
# 6 3.569170 3.152060 18.12198 0.6212191 0.3787809 3.821036
# 8 3.205658 4.133116 16.88529 0.0000000 0.1203808 3.240762
fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt, TRA = "fill") # Replace data by weighted group mean
#  [1] 19.64578 19.64578 25.93504 19.64578 14.80643 19.64578 14.80643 25.93504 25.93504 19.64578
# [11] 19.64578 14.80643 14.80643 14.80643 14.80643 14.80643 14.80643 25.93504 25.93504 25.93504
# [21] 25.93504 14.80643 14.80643 14.80643 14.80643 25.93504 25.93504 25.93504 14.80643 19.64578
# [31] 14.80643 25.93504
# etc...

The data manipulation functions of collapse are integrated with these Fast Statistical Functions to enable vectorized statistical operations. For example, the following code

mtcars |>
  subset(mpg > 11) |>
  group_by(cyl, vs, am) |>
  summarise(across(c(mpg, carb, hp), fmean), 
            qsec_wt = fmean(qsec, wt))
#   cyl vs am      mpg     carb        hp  qsec_wt
# 1   4  0  1 26.00000 2.000000  91.00000 16.70000
# 2   4  1  0 22.90000 1.666667  84.66667 21.04028
# 3   4  1  1 28.37143 1.428571  80.57143 18.75509
# 4   6  0  1 20.56667 4.666667 131.66667 16.33306
# 5   6  1  0 19.12500 2.500000 115.25000 19.21275
# 6   8  0  0 15.98000 2.900000 191.00000 17.01239
# 7   8  0  1 15.40000 6.000000 299.50000 14.55297

gives exactly the same result as above, but the execution is much faster (especially on larger data), because with Fast Statistical Functions, the data does not need to be split by groups, and there is no need to call lapply() inside the across() statement: fmean.data.frame() is simply applied to a subset of the data containing columns mpg, carb and hp.

The Fast Statistical Functions also have a method for grouped data, so if we did not want to calculate the weighted mean of qsec, the code would simplify as follows:

mtcars |>
  subset(mpg > 11) |>
  group_by(cyl, vs, am) |>
  select(mpg, carb, hp) |> 
  fmean()
#   cyl vs am      mpg     carb        hp
# 1   4  0  1 26.00000 2.000000  91.00000
# 2   4  1  0 22.90000 1.666667  84.66667
# 3   4  1  1 28.37143 1.428571  80.57143
# 4   6  0  1 20.56667 4.666667 131.66667
# 5   6  1  0 19.12500 2.500000 115.25000
# 6   8  0  0 15.98000 2.900000 191.00000
# 7   8  0  1 15.40000 6.000000 299.50000

Note that all functions in collapse, including the Fast Statistical Functions, have the default na.rm = TRUE, i.e., missing values are skipped in calculations. This can be changed using set_collapse(na.rm = FALSE) to give behavior more consistent with base R.

Another thing to be aware of when using Fast Statistical Functions inside data manipulation functions is that they toggle vectorized execution wherever they are used. E.g.

mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + min(qsec)) # Vectorized
#   cyl      mpg
# 1   4 41.16364
# 2   6 34.24286
# 3   8 29.60000

calculates a grouped mean of mpg but adds the overall minimum of qsec to the result, i.e., it is equivalent to fmean(mpg, g = cyl) + min(qsec). On the other hand

mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + fmin(qsec)) # Vectorized
#   cyl      mpg
# 1   4 43.36364
# 2   6 35.24286
# 3   8 29.60000
mtcars |> group_by(cyl) |> summarise(mpg = mean(mpg) + min(qsec))   # Not vectorized
#   cyl      mpg
# 1   4 43.36364
# 2   6 35.24286
# 3   8 29.60000

both give the mean + the minimum within each group, but calculated in different ways: the former is equivalent to fmean(mpg, g = cyl) + fmin(qsec, g = cyl), whereas the latter is equal to sapply(gsplit(mpg, cyl), function(x) mean(x) + min(x)).

See ?fsummarise and ?fmutate for more detailed examples. This eager vectorization approach is intentional as it allows users to vectorize complex expressions and fall back to base R if this is not desired. This blog post by Andrew Ghazi provides an excellent example of computing a p-value test statistic by groups. Note that only expressions typed out can be vectorized; expressions inside functions such as mean_plus_min <- function(x) fmean(x) + fmin(x) are not vectorized.2 To take full advantage of collapse, it is thus highly recommended to use the Fast Statistical Functions as much as possible.

Writing Efficient Code

It is also performance-critical to correctly sequence operations and limit excess computations. tidyverse code is often inefficient simply because the tidyverse allows you to do everything. For example, mtcars |> group_by(cyl) |> filter(mpg > 13) |> arrange(mpg) is permissible but inefficient code as it filters and reorders grouped data, requiring modifications to both the data frame and the attached grouping object. collapse does not allow calls to fsubset() on grouped data, and messages about it in roworder(), encouraging you to write more efficient code.

The above example can also be optimized because we are subsetting the whole frame and then doing computations on a subset of columns. It would be more efficient to select all required columns during the subset operation:

mtcars |>
  subset(mpg > 11, cyl, vs, am, mpg, carb, hp, qsec, wt) |>
  group_by(cyl, vs, am) |>
  summarise(across(c(mpg, carb, hp), fmean), 
            qsec_wt = fmean(qsec, wt))
#   cyl vs am      mpg     carb        hp  qsec_wt
# 1   4  0  1 26.00000 2.000000  91.00000 16.70000
# 2   4  1  0 22.90000 1.666667  84.66667 21.04028
# 3   4  1  1 28.37143 1.428571  80.57143 18.75509
# 4   6  0  1 20.56667 4.666667 131.66667 16.33306
# 5   6  1  0 19.12500 2.500000 115.25000 19.21275
# 6   8  0  0 15.98000 2.900000 191.00000 17.01239
# 7   8  0  1 15.40000 6.000000 299.50000 14.55297

Without the weighted mean of qsec, this would simplify to

mtcars |>
  subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |>
  group_by(cyl, vs, am) |> 
  fmean()
#   cyl vs am      mpg     carb        hp
# 1   4  0  1 26.00000 2.000000  91.00000
# 2   4  1  0 22.90000 1.666667  84.66667
# 3   4  1  1 28.37143 1.428571  80.57143
# 4   6  0  1 20.56667 4.666667 131.66667
# 5   6  1  0 19.12500 2.500000 115.25000
# 6   8  0  0 15.98000 2.900000 191.00000
# 7   8  0  1 15.40000 6.000000 299.50000

Finally, we could set the following options to toggle unsorted grouping, no missing value skipping, and multithreading across the three columns for more efficient execution.

mtcars |>
  subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |>
  group_by(cyl, vs, am, sort = FALSE) |> 
  fmean(nthreads = 3, na.rm = FALSE)
#   cyl vs am      mpg     carb        hp
# 1   6  0  1 20.56667 4.666667 131.66667
# 2   4  1  1 28.37143 1.428571  80.57143
# 3   6  1  0 19.12500 2.500000 115.25000
# 4   8  0  0 15.98000 2.900000 191.00000
# 5   4  1  0 22.90000 1.666667  84.66667
# 6   4  0  1 26.00000 2.000000  91.00000
# 7   8  0  1 15.40000 6.000000 299.50000

Setting these options globally using set_collapse(sort = FALSE, nthreads = 3, na.rm = FALSE) avoids the need to set them repeatedly.

Using Internal Grouping

Another key to writing efficient code with collapse is to avoid fgroup_by() where possible, especially for mutate operations. collapse does not implement .by arguments to manipulation functions like dplyr, but instead allows ad-hoc grouped transformations through its statistical functions. For example, the easiest and fastest way to computed the median of mpg by cyl, vs, and am is

mtcars |>
  mutate(mpg_median = fmedian(mpg, list(cyl, vs, am), TRA = "fill")) |> 
  head(3)
#                mpg cyl disp  hp drat    wt  qsec vs am gear carb mpg_median
# Mazda RX4     21.0   6  160 110 3.90 2.620 16.46  0  1    4    4       21.0
# Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4       21.0
# Datsun 710    22.8   4  108  93 3.85 2.320 18.61  1  1    4    1       30.4

For the common case of averaging and centering data, collapse also provides functions fbetween() for averaging and fwithin() for centering, i.e., fbetween(mpg, list(cyl, vs, am)) is the same as fmean(mpg, list(cyl, vs, am), TRA = "fill"). There is also fscale() for (grouped) scaling and centering.

This also applies to multiple columns, where we can use fmutate(across(...)) or ftransformv(), i.e. 

mtcars |>
  mutate(across(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill")) |> 
  head(2)
#               mpg cyl disp  hp drat    wt  qsec vs am gear carb
# Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4
# Mazda RX4 Wag  21   6  160 110  3.9 2.875 16.46  0  1    4    4

# Or 
mtcars |>
  transformv(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill") |> 
  head(2)
#               mpg cyl disp  hp drat    wt  qsec vs am gear carb
# Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4
# Mazda RX4 Wag  21   6  160 110  3.9 2.875 16.46  0  1    4    4

Of course, if we want to apply different functions using the same grouping, fgroup_by() is sensible, but for mutate operations it also has the argument return.groups = FALSE, which avoids materializing the unique grouping columns, saving some memory.

mtcars |>
  group_by(cyl, vs, am, return.groups = FALSE) |> 
  mutate(mpg_median = fmedian(mpg), 
         mpg_mean = fmean(mpg), # Or fbetween(mpg)
         mpg_demean = fwithin(mpg), # Or fmean(mpg, TRA = "-")
         mpg_scale = fscale(mpg), 
         .keep = "used") |>
  ungroup() |>
  head(3)
#                mpg cyl vs am mpg_median mpg_mean mpg_demean  mpg_scale
# Mazda RX4     21.0   6  0  1       21.0 20.56667  0.4333333  0.5773503
# Mazda RX4 Wag 21.0   6  0  1       21.0 20.56667  0.4333333  0.5773503
# Datsun 710    22.8   4  1  1       30.4 28.37143 -5.5714286 -1.1710339

The TRA argument supports a whole array of operations, see ?TRA. For example fsum(mtcars, TRA = "/") turns the column vectors into proportions. As an application of this, consider a generated dataset of sector-level exports.

# c = country, s = sector, y = year, v = value
exports <- expand.grid(c = paste0("c", 1:8), s = paste0("s", 1:8), y = 1:15) |>
           mutate(v = round(abs(rnorm(length(c), mean = 5)), 2)) |>
           subset(-sample.int(length(v), 360)) # Making it unbalanced and irregular
head(exports)
#    c  s y    v
# 1 c2 s1 1 5.55
# 2 c3 s1 1 4.33
# 3 c4 s1 1 5.21
# 4 c5 s1 1 5.31
# 5 c6 s1 1 6.17
# 6 c7 s1 1 5.62
nrow(exports)
# [1] 600

It is very easy then to compute Balassa’s (1965) Revealed Comparative Advantage (RCA) index, which is the share of a sector in country exports divided by the share of the sector in world exports. An index above 1 indicates that a RCA of country c in sector s.

# Computing Balassa's (1965) RCA index: fast and memory efficient
# settfm() modifies exports and assigns it back to the global environment
settfm(exports, RCA = fsum(v, list(c, y), TRA = "/") %/=% fsum(fsum(v, y, TRA = "/"), list(s, y), TRA = "fill", set = TRUE))

Note that this involved a single expression with two different grouped operations, which is only possible by incorporating grouping into statistical functions themselves. Let’s summarise this dataset using pivot() to aggregate the RCA index across years. Here "mean" calls a highly efficient internal mean function.

pivot(exports, ids = "c", values = "RCA", names = "s", 
      how = "wider", FUN = "mean", sort = TRUE)
#    c       s1       s2       s3       s4       s5       s6       s7       s8
# 1 c1 1.456983 1.674245 2.106907 1.715610 1.517669 2.058640 1.731403 1.533286
# 2 c2 2.196345 1.741839 1.925417 1.940657 1.422963 1.523795 1.385106 1.455789
# 3 c3 1.261560 1.552989 1.710201 1.420272 1.470105 1.531912 1.562338 1.307914
# 4 c4 1.455803 1.480939 1.558595 1.424664 1.213920 1.283873 1.631415 1.249383
# 5 c5 1.420965 1.616355 1.732715 1.465465 1.579685 1.252126 1.385581 1.359236
# 6 c6 1.445393 1.452775 1.872439 1.529396 1.464301 1.732497 1.331926 1.264625
# 7 c7 1.730497 1.627966 1.678039 1.710256 1.572039 1.798925 2.119763 1.451539
# 8 c8 1.763551 1.773720 1.730399 1.553112 1.419381 1.609315 1.715916 1.568516

We may also wish to investigate the growth rate of RCA. This can be done using fgrowth(). Since the panel is irregular, i.e., not every sector is observed in every year, it is critical to also supply the time variable.

exports |> 
  mutate(RCA_growth = fgrowth(RCA, g = list(c, s), t = y)) |> 
  pivot(ids = "c", values = "RCA_growth", names = "s", 
        how = "wider", FUN = fmedian, sort = TRUE)
#    c         s1         s2          s3          s4         s5          s6         s7         s8
# 1 c1         NA -31.320346  33.2382015 -17.7150170 -19.521910   7.7699227 -11.166836   9.014163
# 2 c2   1.837294  60.313915   7.6639286 -36.3451812   7.657809   0.5202565 -17.252738  16.234799
# 3 c3 -17.644211  10.140848  39.3044351  -0.5140010 -27.571156 -15.3070853 -20.052042  -9.645808
# 4 c4  -3.619271  13.614077 -11.5213936 -29.1795219  12.698973  -2.8301315   9.579979   4.351506
# 5 c5 -11.267960   1.563708  49.2593990   0.6372803  12.894361 -10.7062506 -16.359597   1.331514
# 6 c6  -8.854774 -24.375237  -0.7098001  -0.6061250 -21.095221  17.3704638 -23.141631  -5.861039
# 7 c7   7.168700   9.169368 -51.7958299 -27.7699562  10.830523  23.9014624 -27.645297 -15.541500
# 8 c8  42.166200  -6.204723 114.3084929 -18.3894910 -17.674001  -3.4403949   1.342354 -38.826719

Lastly, since the panel is unbalanced, we may wish to create an RCA index for only the last year, but balance the dataset a bit more by taking the last available trade within the last three years. This can be done using a single subset call

# Taking the latest observation within the last 3 years
exports_latest <- subset(exports, y > 12 & y == fmax(y, list(c, s), "fill"), -y)
# How many sectors do we observe for each country in the last 3 years?
with(exports_latest, fndistinct(s, c))
# c1 c2 c3 c4 c5 c6 c7 c8 
#  8  8  7  7  8  8  6  8

We can then compute the RCA index on this data

exports_latest |>
    mutate(RCA = fsum(v, c, TRA = "/") %/=% fsum(proportions(v), s, TRA = "fill")) |>
    pivot("c", "RCA", "s", how = "wider", sort = TRUE)
#    c        s1        s2        s3        s4        s5        s6        s7        s8
# 1 c1 0.9957444 1.0039325 1.2424563 0.9257392 0.8152179 1.3325429 0.7410637 1.0259104
# 2 c2 1.1416748 0.8007287 1.1660717 1.0364984 0.7154912 1.0625854 1.2649881 0.8687216
# 3 c3 1.1104473 0.9500677 1.3770016        NA 1.1941963 1.1301935 0.9773947 1.0015135
# 4 c4 0.8381306 1.2543034 1.1274679 1.3990983 1.3918678        NA 0.7364405 1.1539036
# 5 c5 0.8536024 0.8182961 0.9638389 1.6273503 1.0172714 0.8268992 1.0423516 1.0273071
# 6 c6 0.8465415 0.8878380 1.2123911 1.7417480 0.8812675 1.1393711 0.9840424 0.6626898
# 7 c7 1.0284817 1.2207153        NA        NA 1.2871187 1.4475702 1.2210074 1.3880608
# 8 c8 1.2217063 1.1452869 0.7166041 0.9448634 0.8388402 0.9760660 1.1123412 0.9686146

To summarise, collapse provides many options for ad-hoc or limited grouping, which are faster than a full fgroup_by(), and also syntactically efficient. Further efficiency gains are possible using operations by reference, e.g., %/=% instead of / to avoid an intermediate copy. It is also possible to transform by reference using fast statistical functions by passing the set = TRUE argument, e.g., with(mtcars, fmean(mpg, cyl, TRA = "fill", set = TRUE)) replaces mpg by its group-averaged version (the transformed vector is returned invisibly).

Conclusion

collapse enhances R both statistically and computationally and is a good option for tidyverse users searching for more efficient and lightweight solutions to data manipulation and statistical computing problems in R. For more information, I recommend starting with the short vignette on Documentation Resources.

R users willing to write efficient/lightweight code and completely replace the tidyverse in their workflow are also encouraged to closely examine the fastverse suite of packages. collapse alone may not always suffice, but 99% of tidyverse code can be replaced with an efficient and lightweight fastverse solution.


  1. Notably, tidyselect, lambda expressions, and many of the smaller helper functions are left out.↩︎

  2. collapse can only read what you type, e.g. exp <- substitute(fmean(mpg) + min(mpg)), then all_funs(exp) gives c("+", "fmean", "min"), and any(all_funs(exp) %in% .FAST_STAT_FUN) returns TRUE, signifying to fsummarise() that the expression should be executed only once with the grouping object passed to the g argument of fmean(), instead of it being executed once for every group.↩︎

collapse/inst/doc/collapse_and_dplyr.Rmd0000644000176200001440000035757715121640575020136 0ustar liggesusers--- title: "collapse and dplyr" subtitle: "Fast (Weighted) Aggregations and Transformations in a Piped Workflow" author: "Sebastian Krantz" date: "2021-01-04" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and dplyr} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} params: cache: true --- This vignette focuses on the integration of *collapse* and the popular *dplyr* package by Hadley Wickham. In particular it will demonstrate how using *collapse*'s fast functions and some fast alternatives for *dplyr* verbs can substantially facilitate and speed up basic data manipulation, grouped and weighted aggregations and transformations, and panel data computations (i.e. between- and within-transformations, panel-lags, differences and growth rates) in a *dplyr* (piped) workflow. *** **Notes:** - This vignette is targeted at *dplyr* / *tidyverse* users. *collapse* is a standalone package and can be programmed efficiently without pipes or *dplyr* verbs. - The 'Introduction to *collapse*' vignette provides a thorough introduction to the package and a built-in structured documentation is available under `help("collapse-documentation")` after installing the package. In addition `help("collapse-package")` provides a compact set of examples for quick-start. - Documentation and vignettes can also be viewed [online](). *** ## 1. Fast Aggregations A key feature of *collapse* is it's broad set of *Fast Statistical Functions* (`fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct`) which are able to substantially speed-up column-wise, grouped and weighted computations on vectors, matrices or data frames. The functions are S3 generic, with a default (vector), matrix and data frame method, as well as a grouped_df method for grouped tibbles used by *dplyr*. The grouped tibble method has the following arguments: ```r FUN.grouped_df(x, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = FALSE, keep.group_vars = TRUE, [keep.w = TRUE,] ...) ``` where `w` is a weight variable, and `TRA` and can be used to transform `x` using the computed statistics and one of 10 available transformations (`"replace_fill", "replace", "-", "-+", "/", "%", "+", "*", "%%", "-%%"`, discussed in section 2). `na.rm` efficiently removes missing values and is `TRUE` by default. `use.g.names` generates new row-names from the unique combinations of groups (default: disabled), whereas `keep.group_vars` (default: enabled) will keep the grouping columns as is custom in the native `data %>% group_by(...) %>% summarize(...)` workflow in *dplyr*. Finally, `keep.w` regulates whether a weighting variable used is also aggregated and saved in a column. For `fsum, fmean, fmedian, fnth, fvar, fsd` and `fmode` this will compute the sum of the weights in each group, whereas `fprod` returns the product of the weights. With that in mind, let's consider some straightforward applications. ### 1.1 Simple Aggregations Consider the Groningen Growth and Development Center 10-Sector Database included in *collapse* and introduced in the main vignette: ```r library(collapse) head(GGDC10S) # Country Regioncode Region Variable Year AGR MIN MAN PU # 1 BWA SSA Sub-saharan Africa VA 1960 NA NA NA NA # 2 BWA SSA Sub-saharan Africa VA 1961 NA NA NA NA # 3 BWA SSA Sub-saharan Africa VA 1962 NA NA NA NA # 4 BWA SSA Sub-saharan Africa VA 1963 NA NA NA NA # 5 BWA SSA Sub-saharan Africa VA 1964 16.30154 3.494075 0.7365696 0.1043936 # 6 BWA SSA Sub-saharan Africa VA 1965 15.72700 2.495768 1.0181992 0.1350976 # CON WRT TRA FIRE GOV OTH SUM # 1 NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA # 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229 # 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710 # Summarize the Data: # descr(GGDC10S, cols = is_categorical) # aperm(qsu(GGDC10S, ~Variable, cols = is.numeric)) # Efficiently converting to tibble (no deep copy) GGDC10S <- qTBL(GGDC10S) ``` Simple column-wise computations using the fast functions and pipe operators are performed as follows: ```r library(dplyr) GGDC10S %>% fnobs # Number of Observations # Country Regioncode Region Variable Year AGR MIN MAN PU # 5027 5027 5027 5027 5027 4364 4355 4355 4354 # CON WRT TRA FIRE GOV OTH SUM # 4355 4355 4355 4355 3482 4248 4364 GGDC10S %>% fndistinct # Number of distinct values # Country Regioncode Region Variable Year AGR MIN MAN PU # 43 6 6 2 67 4353 4224 4353 4237 # CON WRT TRA FIRE GOV OTH SUM # 4339 4344 4334 4349 3470 4238 4364 GGDC10S %>% select_at(6:16) %>% fmedian # Median # AGR MIN MAN PU CON WRT TRA FIRE GOV # 4394.5194 173.2234 3718.0981 167.9500 1473.4470 3773.6430 1174.8000 960.1251 3928.5127 # OTH SUM # 1433.1722 23186.1936 GGDC10S %>% select_at(6:16) %>% fmean # Mean # AGR MIN MAN PU CON WRT TRA FIRE GOV # 2526696.5 1867908.9 5538491.4 335679.5 1801597.6 3392909.5 1473269.7 1657114.8 1712300.3 # OTH SUM # 1684527.3 21566436.8 GGDC10S %>% fmode # Mode # Country Regioncode Region Variable Year # "USA" "ASI" "Asia" "EMP" "2010" # AGR MIN MAN PU CON # "171.315882316326" "0" "4645.12507642586" "0" "1.34623115930777" # WRT TRA FIRE GOV OTH # "21.8380052682527" "8.97743416914571" "40.0701608636442" "0" "3626.84423577048" # SUM # "37.4822945751317" GGDC10S %>% fmode(drop = FALSE) # Keep data structure intact # # A tibble: 1 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # * # 1 USA ASI Asia EMP 2010 171. 0 4645. 0 1.35 21.8 8.98 40.1 0 # # ℹ 2 more variables: OTH , SUM ``` Moving on to grouped statistics, we can compute the average value added and employment by sector and country using: ```r GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmean # # A tibble: 85 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1420. 52.1 1932. 1.02e2 7.42e2 1.98e3 6.49e2 628. 2043. 9.92e2 1.05e4 # 2 EMP BOL 964. 56.0 235. 5.35e0 1.23e2 2.82e2 1.15e2 44.6 NA 3.96e2 2.22e3 # 3 EMP BRA 17191. 206. 6991. 3.65e2 3.52e3 8.51e3 2.05e3 4414. 5307. 5.71e3 5.43e4 # 4 EMP BWA 188. 10.5 18.1 3.09e0 2.53e1 3.63e1 8.36e0 15.3 61.1 2.76e1 3.94e2 # 5 EMP CHL 702. 101. 625. 2.94e1 2.96e2 6.95e2 2.58e2 272. NA 1.00e3 3.98e3 # 6 EMP CHN 287744. 7050. 67144. 1.61e3 2.09e4 2.89e4 1.39e4 4929. 22669. 3.10e4 4.86e5 # 7 EMP COL 3091. 145. 1175. 3.39e1 5.24e2 2.07e3 4.70e2 649. NA 1.73e3 9.89e3 # 8 EMP CRI 231. 1.70 136. 1.43e1 5.76e1 1.57e2 4.24e1 54.9 128. 6.51e1 8.87e2 # 9 EMP DEW 2490. 407. 8473. 2.26e2 2.09e3 4.44e3 1.48e3 1689. 3945. 9.99e2 2.62e4 # 10 EMP DNK 236. 8.03 507. 1.38e1 1.71e2 4.55e2 1.61e2 181. 549. 1.11e2 2.39e3 # # ℹ 75 more rows ``` Similarly we can aggregate using any other of the above functions. It is important to not use *dplyr*'s `summarize` together with these functions since that would eliminate their speed gain. These functions are fast because they are executed only once and carry out the grouped computations in C++, whereas `summarize` will apply the function to each group in the grouped tibble. *** #### Excursus: What is Happening Behind the Scenes? To better explain this point it is perhaps good to shed some light on what is happening behind the scenes of *dplyr* and *collapse*. Fundamentally both packages follow different computing paradigms: *dplyr* is an efficient implementation of the Split-Apply-Combine computing paradigm. Data is split into groups, these data-chunks are then passed to a function carrying out the computation, and finally recombined to produce the aggregated data.frame. This modus operandi is evident in the grouping mechanism of *dplyr*. When a data.frame is passed through *group_by*, a 'groups' attribute is attached: ```r GGDC10S %>% group_by(Variable, Country) %>% attr("groups") # # A tibble: 85 × 3 # Variable Country .rows # > # 1 EMP ARG [62] # 2 EMP BOL [61] # 3 EMP BRA [62] # 4 EMP BWA [52] # 5 EMP CHL [63] # 6 EMP CHN [62] # 7 EMP COL [61] # 8 EMP CRI [62] # 9 EMP DEW [61] # 10 EMP DNK [64] # # ℹ 75 more rows ``` This object is a data.frame giving the unique groups and in the third (last) column vectors containing the indices of the rows belonging to that group. A command like `summarize` uses this information to split the data.frame into groups which are then passed sequentially to the function used and later recombined. These steps are also done in C++ which makes *dplyr* quite efficient. Now *collapse* is based around one-pass grouped computations at the C++ level using its own grouped statistical functions. In other words the data is not split and recombined at all but the entire computation is performed in a single C++ loop running through that data and completing the computations for each group simultaneously. This modus operandi is also evident in *collapse* grouping objects. The method `GRP.grouped_df` takes a *dplyr* grouping object from a grouped tibble and efficiently converts it to a *collapse* grouping object: ```r GGDC10S %>% group_by(Variable, Country) %>% GRP %>% str # Class 'GRP' hidden list of 9 # $ N.groups : int 85 # $ group.id : int [1:5027] 46 46 46 46 46 46 46 46 46 46 ... # $ group.sizes : int [1:85] 62 61 62 52 63 62 61 62 61 64 ... # $ groups :List of 2 # ..$ Variable: chr [1:85] "EMP" "EMP" "EMP" "EMP" ... # .. ..- attr(*, "label")= chr "Variable" # .. ..- attr(*, "format.stata")= chr "%9s" # ..$ Country : chr [1:85] "ARG" "BOL" "BRA" "BWA" ... # .. ..- attr(*, "label")= chr "Country" # .. ..- attr(*, "format.stata")= chr "%9s" # $ group.vars : chr [1:2] "Variable" "Country" # $ ordered : Named logi [1:2] TRUE FALSE # ..- attr(*, "names")= chr [1:2] "ordered" "sorted" # $ order : NULL # $ group.starts: NULL # $ call : language GRP.grouped_df(X = .) ``` This object is a list where the first three elements give the number of groups, the group-id to which each row belongs and a vector of group-sizes. A function like `fsum` uses this information to (for each column) create a result vector of size 'N.groups' and the run through the column using the 'group.id' vector to add the i'th data point to the 'group.id[i]'th element of the result vector. When the loop is finished, the grouped computation is also finished. It is obvious that *collapse* is faster than *dplyr* since it's method of computing involves less steps, and it does not need to call statistical functions multiple times. See the benchmark section. *** ### 1.2 More Speed using *collapse* Verbs *collapse* fast functions do not develop their maximal performance on a grouped tibble created with `group_by` because of the additional conversion cost of the grouping object incurred by `GRP.grouped_df`. This cost is already minimized through the use of C++, but we can do even better replacing `group_by` with `collapse::fgroup_by`. `fgroup_by` works like `group_by` but does the grouping with `collapse::GRP` (up to 10x faster than `group_by`) and simply attaches a *collapse* grouping object to the grouped_df. Thus the speed gain is 2-fold: Faster grouping and no conversion cost when calling *collapse* functions. Another improvement comes from replacing the *dplyr* verb `select` with `collapse::fselect`, and, for selection using column names, indices or functions use `collapse::get_vars` instead of `select_at` or `select_if`. Next to `get_vars`, *collapse* also introduces the predicates `num_vars`, `cat_vars`, `char_vars`, `fact_vars`, `logi_vars` and `date_vars` to efficiently select columns by type. ```r GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fmedian # # A tibble: 85 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1325. 47.4 1988. 1.05e2 7.82e2 1.85e3 5.80e2 464. 1739. 866. 9.74e3 # 2 EMP BOL 943. 53.5 167. 4.46e0 6.60e1 1.32e2 9.70e1 15.3 NA 384. 1.84e3 # 3 EMP BRA 17481. 225. 7208. 3.76e2 4.05e3 6.45e3 1.58e3 4355. 4450. 4479. 5.19e4 # 4 EMP BWA 175. 12.2 13.1 3.71e0 1.90e1 2.11e1 6.75e0 10.4 53.8 31.2 3.61e2 # 5 EMP CHL 690. 93.9 607. 2.58e1 2.30e2 4.84e2 2.05e2 106. NA 900. 3.31e3 # 6 EMP CHN 293915 8150. 61761. 1.14e3 1.06e4 1.70e4 9.56e3 4328. 19468. 9954. 4.45e5 # 7 EMP COL 3006. 84.0 1033. 3.71e1 4.19e2 1.55e3 3.91e2 655. NA 1430. 8.63e3 # 8 EMP CRI 216. 1.49 114. 7.92e0 5.50e1 8.98e1 2.55e1 19.6 122. 60.6 7.19e2 # 9 EMP DEW 2178 320. 8459. 2.47e2 2.10e3 4.45e3 1.53e3 1656 3700 900 2.65e4 # 10 EMP DNK 187. 3.75 508. 1.36e1 1.65e2 4.61e2 1.61e2 169. 642. 104. 2.42e3 # # ℹ 75 more rows microbenchmark(collapse = GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fmedian, hybrid = GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmedian, dplyr = GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% summarise_all(median, na.rm = TRUE)) # Unit: microseconds # expr min lq mean median uq max neval # collapse 236.406 263.6095 303.309 295.9175 337.061 419.635 100 # hybrid 2699.317 2894.9690 3573.611 2998.3505 3119.772 56249.212 100 # dplyr 15923.908 16297.8280 18810.943 16742.5140 18578.105 71125.939 100 ``` Benchmarks on the different components of this code and with larger data are provided under 'Benchmarks'. Note that a grouped tibble created with `fgroup_by` can no longer be used for grouped computations with *dplyr* verbs like `mutate` or `summarize`. `fgroup_by` first assigns the class *GDP_df* which is for printing grouping information and subsetting, then the object classes (*tbl_df*, *data.table* or whatever else), followed by classes *grouped_df* and *data.frame*, and adds the grouping object in a 'groups' attribute. Since *tbl_df* is assigned before *grouped_df*, the object is treated by the *dplyr* ecosystem like a normal tibble. ```r class(group_by(GGDC10S, Variable, Country)) # [1] "grouped_df" "tbl_df" "tbl" "data.frame" class(fgroup_by(GGDC10S, Variable, Country)) # [1] "GRP_df" "tbl_df" "tbl" "grouped_df" "data.frame" ``` The function `fungroup` removes classes 'GDP_df' and 'grouped_df' and the 'groups' attribute (and can thus also be used for grouped tibbles created with `dplyr::group_by`). Note that any kind of data frame based class can be grouped with `fgroup_by`, and still retain full responsiveness to all methods defined for that class. Functions performing aggregation on the grouped data frame remove the grouping object and classes afterwards, yielding an object with the same class and attributes as the input. The print method shown below reports the grouping variables, and then in square brackets the information `[number of groups | average group size (standard-deviation of group sizes)]`: ```r fgroup_by(GGDC10S, Variable, Country) # # A tibble: 5,027 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA SSA Sub-s… VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-s… VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA SSA Sub-s… VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA SSA Sub-s… VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA SSA Sub-s… VA 1964 16.3 3.49 0.737 0.104 0.660 6.24 1.66 1.12 4.82 # 6 BWA SSA Sub-s… VA 1965 15.7 2.50 1.02 0.135 1.35 7.06 1.94 1.25 5.70 # 7 BWA SSA Sub-s… VA 1966 17.7 1.97 0.804 0.203 1.35 8.27 2.15 1.36 6.37 # 8 BWA SSA Sub-s… VA 1967 19.1 2.30 0.938 0.203 0.897 4.31 1.72 1.54 7.04 # 9 BWA SSA Sub-s… VA 1968 21.1 1.84 0.750 0.203 1.22 5.17 2.44 1.03 5.03 # 10 BWA SSA Sub-s… VA 1969 21.9 5.24 2.14 0.578 3.47 5.75 2.72 1.23 5.59 # # ℹ 5,017 more rows # # ℹ 2 more variables: OTH , SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Note further that `fselect` and `get_vars` are not full drop-in replacements for `select` because they do not have a grouped_df method: ```r GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% tail(3) # # A tibble: 3 × 13 # # Groups: Variable, Country [1] # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP EGY 5206. 29.0 2436. 307. 2733. 2977. 1992. 801. 5539. NA 22020. # 2 EMP EGY 5186. 27.6 2374. 318. 2795. 3020. 2048. 815. 5636. NA 22219. # 3 EMP EGY 5161. 24.8 2348. 325. 2931. 3110. 2065. 832. 5736. NA 22533. GGDC10S %>% group_by(Variable, Country) %>% get_vars(6:16) %>% tail(3) # # A tibble: 3 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 5206. 29.0 2436. 307. 2733. 2977. 1992. 801. 5539. NA 22020. # 2 5186. 27.6 2374. 318. 2795. 3020. 2048. 815. 5636. NA 22219. # 3 5161. 24.8 2348. 325. 2931. 3110. 2065. 832. 5736. NA 22533. ``` Since by default `keep.group_vars = TRUE` in the *Fast Statistical Functions*, the end result is nevertheless the same: ```r GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmean %>% tail(3) # # A tibble: 3 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 VA VEN 6860. 35478. 1.96e4 1.06e3 1.17e4 1.93e4 8.03e3 5.60e3 NA 19986. 1.28e5 # 2 VA ZAF 16419. 42928. 8.76e4 1.38e4 1.64e4 6.83e4 4.53e4 6.64e4 7.58e4 30167. 4.63e5 # 3 VA ZMB 1268849. 1006099. 9.00e5 2.19e5 8.66e5 2.10e6 7.05e5 9.10e5 1.10e6 81871. 9.16e6 GGDC10S %>% group_by(Variable, Country) %>% get_vars(6:16) %>% fmean %>% tail(3) # # A tibble: 3 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 VA VEN 6860. 35478. 1.96e4 1.06e3 1.17e4 1.93e4 8.03e3 5.60e3 NA 19986. 1.28e5 # 2 VA ZAF 16419. 42928. 8.76e4 1.38e4 1.64e4 6.83e4 4.53e4 6.64e4 7.58e4 30167. 4.63e5 # 3 VA ZMB 1268849. 1006099. 9.00e5 2.19e5 8.66e5 2.10e6 7.05e5 9.10e5 1.10e6 81871. 9.16e6 ``` Another useful verb introduced by *collapse* is `fgroup_vars`, which can be used to efficiently obtain the grouping columns or grouping variables from a grouped tibble: ```r # fgroup_by fully supports grouped tibbles created with group_by or fgroup_by: GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars %>% head(3) # # A tibble: 3 × 2 # Variable Country # # 1 VA BWA # 2 VA BWA # 3 VA BWA GGDC10S %>% fgroup_by(Variable, Country) %>% fgroup_vars %>% head(3) # # A tibble: 3 × 2 # Variable Country # # 1 VA BWA # 2 VA BWA # 3 VA BWA # The other possibilities: GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("unique") %>% head(3) # # A tibble: 3 × 2 # Variable Country # # 1 EMP ARG # 2 EMP BOL # 3 EMP BRA GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("names") # [1] "Variable" "Country" GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("indices") # [1] 4 1 GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("named_indices") # Variable Country # 4 1 GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("logical") # [1] TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("named_logical") # Country Regioncode Region Variable Year AGR MIN MAN PU # TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE # CON WRT TRA FIRE GOV OTH SUM # FALSE FALSE FALSE FALSE FALSE FALSE FALSE ``` Another *collapse* verb to mention here is `fsubset`, a faster alternative to `dplyr::filter` which also provides an option to flexibly subset columns after the select argument: ```r # Two equivalent calls, the first is substantially faster GGDC10S %>% fsubset(Variable == "VA" & Year > 1990, Country, Year, AGR:GOV) %>% head(3) # # A tibble: 3 × 11 # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA 1991 303. 2647. 473. 161. 580. 807. 233. 433. 1073. # 2 BWA 1992 333. 2691. 537. 178. 679. 725. 285. 517. 1234. # 3 BWA 1993 405. 2625. 567. 219. 634. 772. 350. 673. 1487. GGDC10S %>% filter(Variable == "VA" & Year > 1990) %>% select(Country, Year, AGR:GOV) %>% head(3) # # A tibble: 3 × 11 # Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA 1991 303. 2647. 473. 161. 580. 807. 233. 433. 1073. # 2 BWA 1992 333. 2691. 537. 178. 679. 725. 285. 517. 1234. # 3 BWA 1993 405. 2625. 567. 219. 634. 772. 350. 673. 1487. ``` *collapse* also offers `roworder`, `frename`, `colorder` and `ftransform`/`TRA` as fast replacements for `dplyr::arrange`, `dplyr::rename`, `dplyr::relocate` and `dplyr::mutate`. ### 1.3 Multi-Function Aggregations One can also aggregate with multiple functions at the same time. For such operations it is often necessary to use curly braces `{` to prevent first argument injection so that `%>% cbind(FUN1(.), FUN2(.))` does not evaluate as `%>% cbind(., FUN1(.), FUN2(.))`: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% { cbind(fmedian(.), add_stub(fmean(., keep.group_vars = FALSE), "mean_")) } %>% head(3) # Variable Country AGR MIN MAN PU CON WRT TRA # 1 EMP ARG 1324.5255 47.35255 1987.5912 104.738825 782.40283 1854.612 579.93982 # 2 EMP BOL 943.1612 53.53538 167.1502 4.457895 65.97904 132.225 96.96828 # 3 EMP BRA 17480.9810 225.43693 7207.7915 375.851832 4054.66103 6454.523 1580.81120 # FIRE GOV OTH SUM mean_AGR mean_MIN mean_MAN mean_PU mean_CON # 1 464.39920 1738.836 866.1119 9743.223 1419.8013 52.08903 1931.7602 101.720936 742.4044 # 2 15.34259 NA 384.0678 1842.055 964.2103 56.03295 235.0332 5.346433 122.7827 # 3 4354.86210 4449.942 4478.6927 51881.110 17191.3529 206.02389 6991.3710 364.573404 3524.7384 # mean_WRT mean_TRA mean_FIRE mean_GOV mean_OTH mean_SUM # 1 1982.1775 648.5119 627.79291 2043.471 992.4475 10542.177 # 2 281.5164 115.4728 44.56442 NA 395.5650 2220.524 # 3 8509.4612 2054.3731 4413.54448 5307.280 5710.2665 54272.985 ``` The function `add_stub` used above is a *collapse* function adding a prefix (default) or suffix to variables names. The *collapse* predicate `add_vars` provides a more efficient alternative to `cbind.data.frame`. The idea here is 'adding' variables to the data.frame in the first argument i.e. the attributes of the first argument are preserved, so the expression below still gives a tibble instead of a data.frame: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% { add_vars(get_vars(., "Reg", regex = TRUE) %>% ffirst, # Regular expression matching column names num_vars(.) %>% fmean(keep.group_vars = FALSE) %>% add_stub("mean_"), # num_vars selects all numeric variables fselect(., PU:TRA) %>% fmedian(keep.group_vars = FALSE) %>% add_stub("median_"), fselect(., PU:CON) %>% fmin(keep.group_vars = FALSE) %>% add_stub("min_")) } %>% head(3) # # A tibble: 3 × 22 # Variable Country Regioncode Region mean_Year mean_AGR mean_MIN mean_MAN mean_PU mean_CON mean_WRT # # 1 EMP ARG LAM Latin … 1980. 1420. 52.1 1932. 102. 742. 1982. # 2 EMP BOL LAM Latin … 1980 964. 56.0 235. 5.35 123. 282. # 3 EMP BRA LAM Latin … 1980. 17191. 206. 6991. 365. 3525. 8509. # # ℹ 11 more variables: mean_TRA , mean_FIRE , mean_GOV , mean_OTH , # # mean_SUM , median_PU , median_CON , median_WRT , median_TRA , # # min_PU , min_CON ``` Another nice feature of `add_vars` is that it can also very efficiently reorder columns i.e. bind columns in a different order than they are passed. This can be done by simply specifying the positions the added columns should have in the final data frame, and then `add_vars` shifts the first argument columns to the right to fill in the gaps. ```r GGDC10S %>% fsubset(Variable == "VA", Country, AGR, SUM) %>% fgroup_by(Country) %>% { add_vars(fgroup_vars(.,"unique"), fmean(., keep.group_vars = FALSE) %>% add_stub("mean_"), fsd(., keep.group_vars = FALSE) %>% add_stub("sd_"), pos = c(2,4,3,5)) } %>% head(3) # # A tibble: 3 × 5 # Country mean_AGR sd_AGR mean_SUM sd_SUM # # 1 ARG 14951. 33061. 152534. 301316. # 2 BOL 3300. 4456. 22619. 33173. # 3 BRA 76870. 59442. 1200563. 976963. ``` A much more compact solution to multi-function and multi-type aggregation is offered by the function *collapg*: ```r # This aggregates numeric colums using the mean (fmean) and categorical columns with the mode (fmode) GGDC10S %>% fgroup_by(Variable, Country) %>% collapg %>% head(3) # # A tibble: 3 × 16 # Variable Country Regioncode Region Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 EMP ARG LAM Latin … 1980. 1420. 52.1 1932. 102. 742. 1982. 649. 628. 2043. # 2 EMP BOL LAM Latin … 1980 964. 56.0 235. 5.35 123. 282. 115. 44.6 NA # 3 EMP BRA LAM Latin … 1980. 17191. 206. 6991. 365. 3525. 8509. 2054. 4414. 5307. # # ℹ 2 more variables: OTH , SUM ``` By default it aggregates numeric columns using the `fmean` and categorical columns using `fmode`, and preserves the order of all columns. Changing these defaults is very easy: ```r # This aggregates numeric colums using the median and categorical columns using the first value GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(fmedian, flast) %>% head(3) # # A tibble: 3 × 16 # Variable Country Regioncode Region Year AGR MIN MAN PU CON WRT TRA FIRE # # 1 EMP ARG LAM Latin Amer… 1980. 1325. 47.4 1988. 105. 782. 1855. 580. 464. # 2 EMP BOL LAM Latin Amer… 1980 943. 53.5 167. 4.46 66.0 132. 97.0 15.3 # 3 EMP BRA LAM Latin Amer… 1980. 17481. 225. 7208. 376. 4055. 6455. 1581. 4355. # # ℹ 3 more variables: GOV , OTH , SUM ``` One can apply multiple functions to both numeric and/or categorical data: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(list(fmean, fmedian), list(first, fmode, flast)) %>% head(3) # # A tibble: 3 × 32 # Variable Country first.Regioncode fmode.Regioncode flast.Regioncode first.Region fmode.Region # # 1 EMP ARG LAM LAM LAM Latin America Latin America # 2 EMP BOL LAM LAM LAM Latin America Latin America # 3 EMP BRA LAM LAM LAM Latin America Latin America # # ℹ 25 more variables: flast.Region , fmean.Year , fmedian.Year , fmean.AGR , # # fmedian.AGR , fmean.MIN , fmedian.MIN , fmean.MAN , fmedian.MAN , # # fmean.PU , fmedian.PU , fmean.CON , fmedian.CON , fmean.WRT , # # fmedian.WRT , fmean.TRA , fmedian.TRA , fmean.FIRE , fmedian.FIRE , # # fmean.GOV , fmedian.GOV , fmean.OTH , fmedian.OTH , fmean.SUM , # # fmedian.SUM ``` Applying multiple functions to only numeric (or only categorical) data allows return in a long format: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(list(fmean, fmedian), cols = is.numeric, return = "long") %>% head(3) # # A tibble: 3 × 15 # Function Variable Country Year AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 fmean EMP ARG 1980. 1420. 52.1 1932. 102. 742. 1982. 649. 628. 2043. 992. # 2 fmean EMP BOL 1980 964. 56.0 235. 5.35 123. 282. 115. 44.6 NA 396. # 3 fmean EMP BRA 1980. 17191. 206. 6991. 365. 3525. 8509. 2054. 4414. 5307. 5710. # # ℹ 1 more variable: SUM ``` Finally, `collapg` also makes it very easy to apply aggregator functions to certain columns only: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(custom = list(fmean = 6:8, fmedian = 10:12)) %>% head(3) # # A tibble: 3 × 8 # Variable Country AGR MIN MAN CON WRT TRA # # 1 EMP ARG 1420. 52.1 1932. 782. 1855. 580. # 2 EMP BOL 964. 56.0 235. 66.0 132. 97.0 # 3 EMP BRA 17191. 206. 6991. 4055. 6455. 1581. ``` To understand more about `collapg`, look it up in the documentation (`?collapg`). ### 1.4 Weighted Aggregations Weighted aggregations are possible with the functions `fsum, fprod, fmean, fmedian, fnth, fmode, fvar` and `fsd`. The implementation is such that by default (option `keep.w = TRUE`) these functions also aggregate the weights, so that further weighted computations can be performed on the aggregated data. `fprod` saves the product of the weights, whereas the other functions save the sum of the weights in a column next to the grouping variables. If `na.rm = TRUE` (the default), rows with missing weights are omitted from the computation. ```r # This computes a frequency-weighted grouped standard-deviation, taking the total EMP / VA as weight GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>% fsd(SUM) %>% head(3) # # A tibble: 3 × 13 # Variable Country sum.SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 EMP ARG 653615. 225. 22.2 176. 20.5 285. 856. 195. 493. 1123. 506. # 2 EMP BOL 135452. 99.7 17.1 168. 4.87 123. 324. 98.1 69.8 NA 258. # 3 EMP BRA 3364925. 1587. 73.8 2952. 93.8 1861. 6285. 1306. 3003. 3621. 4257. # This computes a weighted grouped mode, taking the total EMP / VA as weight GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>% fmode(SUM) %>% head(3) # # A tibble: 3 × 13 # Variable Country sum.SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 EMP ARG 653615. 1162. 127. 2164. 152. 1415. 3768. 1060. 1748. 4336. 1999. # 2 EMP BOL 135452. 819. 37.6 604. 10.8 433. 893. 333. 321. NA 1057. # 3 EMP BRA 3364925. 16451. 313. 11841. 388. 8154. 21860. 5169. 12011. 12149. 14235. ``` The weighted variance / standard deviation is currently only implemented with frequency weights. Weighted aggregations may also be performed with `collapg`. By default `fsum` is used to compute a sum of the weights, but it is also possible here to aggregate the weights with other functions: ```r # This aggregates numeric colums using the weighted mean (the default) and categorical columns using the weighted mode (the default). # Weights (column SUM) are aggregated using both the sum and the maximum. GGDC10S %>% group_by(Variable, Country) %>% collapg(w = SUM, wFUN = list(fsum, fmax)) %>% head(3) # # A tibble: 3 × 17 # Variable Country fsum.SUM fmax.SUM Regioncode Region Year AGR MIN MAN PU CON WRT # # 1 EMP ARG 653615. 17929. LAM Latin … 1985. 1361. 56.5 1935. 105. 811. 2217. # 2 EMP BOL 135452. 4508. LAM Latin … 1987. 977. 57.9 296. 7.07 167. 400. # 3 EMP BRA 3364925. 102572. LAM Latin … 1989. 17746. 238. 8466. 389. 4436. 11376. # # ℹ 4 more variables: TRA , FIRE , GOV , OTH ``` ## 2. Fast Transformations *collapse* also provides some fast transformations that significantly extend the scope and speed of manipulations that can be performed with `dplyr::mutate`. ### 2.1 Fast Transform and Compute Variables The function `ftransform` can be used to manipulate columns in the same ways as `mutate`: ```r GGDC10S %>% fsubset(Variable == "VA", Country, Year, AGR, SUM) %>% ftransform(AGR_perc = AGR / SUM * 100, # Computing % of VA in Agriculture AGR_mean = fmean(AGR), # Average Agricultural VA AGR = NULL, SUM = NULL) %>% # Deleting columns AGR and SUM head # # A tibble: 6 × 4 # Country Year AGR_perc AGR_mean # # 1 BWA 1960 NA 5137561. # 2 BWA 1961 NA 5137561. # 3 BWA 1962 NA 5137561. # 4 BWA 1963 NA 5137561. # 5 BWA 1964 43.5 5137561. # 6 BWA 1965 40.0 5137561. ``` The modification brought by `ftransformv` enables transformations of groups of columns like `dplyr::mutate_at` and `dplyr::mutate_if`: ```r # This replaces variables mpg, carb and wt by their log (.c turns expressions into character vectors) mtcars %>% ftransformv(.c(mpg, carb, wt), log) %>% head # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 3.044522 6 160 110 3.90 0.9631743 16.46 0 1 4 1.3862944 # Mazda RX4 Wag 3.044522 6 160 110 3.90 1.0560527 17.02 0 1 4 1.3862944 # Datsun 710 3.126761 4 108 93 3.85 0.8415672 18.61 1 1 4 0.0000000 # Hornet 4 Drive 3.063391 6 258 110 3.08 1.1678274 19.44 1 0 3 0.0000000 # Hornet Sportabout 2.928524 8 360 175 3.15 1.2354715 17.02 0 0 3 0.6931472 # Valiant 2.895912 6 225 105 2.76 1.2412686 20.22 1 0 3 0.0000000 # Logging numeric variables iris %>% ftransformv(is.numeric, log) %>% head # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 1.629241 1.252763 0.3364722 -1.6094379 setosa # 2 1.589235 1.098612 0.3364722 -1.6094379 setosa # 3 1.547563 1.163151 0.2623643 -1.6094379 setosa # 4 1.526056 1.131402 0.4054651 -1.6094379 setosa # 5 1.609438 1.280934 0.3364722 -1.6094379 setosa # 6 1.686399 1.360977 0.5306283 -0.9162907 setosa ``` Instead of `column = value` type arguments, it is also possible to pass a single list of transformed variables to `ftransform`, which will be regarded in the same way as an evaluated list of `column = value` arguments. It can be used for more complex transformations: ```r # Logging values and replacing generated Inf values mtcars %>% ftransform(fselect(., mpg, cyl, vs:gear) %>% lapply(log) %>% replace_Inf) %>% head # mpg cyl disp hp drat wt qsec vs am gear carb # Mazda RX4 3.044522 1.791759 160 110 3.90 2.620 16.46 NA 0 1.386294 4 # Mazda RX4 Wag 3.044522 1.791759 160 110 3.90 2.875 17.02 NA 0 1.386294 4 # Datsun 710 3.126761 1.386294 108 93 3.85 2.320 18.61 0 0 1.386294 1 # Hornet 4 Drive 3.063391 1.791759 258 110 3.08 3.215 19.44 0 NA 1.098612 1 # Hornet Sportabout 2.928524 2.079442 360 175 3.15 3.440 17.02 NA NA 1.098612 2 # Valiant 2.895912 1.791759 225 105 2.76 3.460 20.22 0 NA 1.098612 1 ``` If only the computed columns need to be returned, `fcompute` provides an efficient alternative: ```r GGDC10S %>% fsubset(Variable == "VA", Country, Year, AGR, SUM) %>% fcompute(AGR_perc = AGR / SUM * 100, AGR_mean = fmean(AGR)) %>% head # # A tibble: 6 × 2 # AGR_perc AGR_mean # # 1 NA 5137561. # 2 NA 5137561. # 3 NA 5137561. # 4 NA 5137561. # 5 43.5 5137561. # 6 40.0 5137561. ``` `ftransform` and `fcompute` are an order of magnitude faster than `mutate`, but they do not support grouped computations using arbitrary functions. We will see that this is hardly a limitation as *collapse* provides very efficient and elegant alternative programming mechanisms... ### 2.2 Replacing and Sweeping out Statistics All statistical (scalar-valued) functions in the collapse package (`fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct`) have a `TRA` argument which can be used to efficiently transform data by either (column-wise) replacing data values with computed statistics or sweeping the statistics out of the data. Operations can be specified using either an integer or quoted operator / string. The 10 operations supported by `TRA` are: * 1 - "replace_fill" : replace and overwrite missing values (same as `mutate`) * 2 - "replace" : replace but preserve missing values * 3 - "-" : subtract (center) * 4 - "-+" : subtract group-statistics but add average of group statistics * 5 - "/" : divide (scale) * 6 - "%" : compute percentages (divide and multiply by 100) * 7 - "+" : add * 8 - "*" : multiply * 9 - "%%" : modulus * 10 - "-%%" : subtract modulus Simple transformations are again straightforward to specify: ```r # This subtracts the median value from all data points i.e. centers on the median GGDC10S %>% num_vars %>% fmedian(TRA = "-") %>% head # # A tibble: 6 × 12 # Year AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 -22 NA NA NA NA NA NA NA NA NA NA NA # 2 -21 NA NA NA NA NA NA NA NA NA NA NA # 3 -20 NA NA NA NA NA NA NA NA NA NA NA # 4 -19 NA NA NA NA NA NA NA NA NA NA NA # 5 -18 -4378. -170. -3717. -168. -1473. -3767. -1173. -959. -3924. -1431. -23149. # 6 -17 -4379. -171. -3717. -168. -1472. -3767. -1173. -959. -3923. -1430. -23147. # This replaces all data points with the mode GGDC10S %>% char_vars %>% fmode(TRA = "replace") %>% head # # A tibble: 6 × 4 # Country Regioncode Region Variable # # 1 USA ASI Asia EMP # 2 USA ASI Asia EMP # 3 USA ASI Asia EMP # 4 USA ASI Asia EMP # 5 USA ASI Asia EMP # 6 USA ASI Asia EMP ``` Similarly for grouped transformations: ```r # Replacing data with the 2nd quartile (25%) GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fnth(0.25, TRA = "replace_fill") %>% head(3) # # A tibble: 3 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 VA BWA 63.5 33.1 27.3 7.36 26.8 31.1 13.2 12.0 33.6 11.5 262. # 2 VA BWA 63.5 33.1 27.3 7.36 26.8 31.1 13.2 12.0 33.6 11.5 262. # 3 VA BWA 63.5 33.1 27.3 7.36 26.8 31.1 13.2 12.0 33.6 11.5 262. # Scaling sectoral data by Variable and Country GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fsd(TRA = "/") %>% head # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 VA BWA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA # 5 VA BWA 0.0270 0.000556 0.000523 3.88e-4 5.11e-4 0.00194 0.00154 5.23e-4 0.00134 # 6 VA BWA 0.0260 0.000397 0.000723 5.03e-4 1.04e-3 0.00220 0.00180 5.83e-4 0.00158 # # ℹ 2 more variables: OTH , SUM ``` The benchmarks below will demonstrate that these internal sweeping and replacement operations fully performed in C++ compute significantly faster than using `dplyr::mutate`, especially as the number of groups grows large. The S3 generic nature of the *Fast Statistical Functions* further allows us to perform grouped mutations on the fly (together with `ftransform` or `fcompute`), without the need of first creating a grouped tibble: ```r # AGR_gmed = TRUE if AGR is greater than it's median value, grouped by Variable and Country # Note: This calls fmedian.default settransform(GGDC10S, AGR_gmed = AGR > fmedian(AGR, list(Variable, Country), TRA = "replace")) tail(GGDC10S, 3) # # A tibble: 3 × 17 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 EGY MENA Middle Ea… EMP 2010 5206. 29.0 2436. 307. 2733. 2977. 1992. 801. 5539. # 2 EGY MENA Middle Ea… EMP 2011 5186. 27.6 2374. 318. 2795. 3020. 2048. 815. 5636. # 3 EGY MENA Middle Ea… EMP 2012 5161. 24.8 2348. 325. 2931. 3110. 2065. 832. 5736. # # ℹ 3 more variables: OTH , SUM , AGR_gmed # Dividing (scaling) the sectoral data (columns 6 through 16) by their grouped standard deviation settransformv(GGDC10S, 6:16, fsd, list(Variable, Country), TRA = "/", apply = FALSE) tail(GGDC10S, 3) # # A tibble: 3 × 17 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 EGY MENA Middle Ea… EMP 2010 8.41 2.28 4.32 3.56 3.62 3.75 3.75 3.14 3.80 # 2 EGY MENA Middle Ea… EMP 2011 8.38 2.17 4.21 3.68 3.70 3.81 3.86 3.19 3.86 # 3 EGY MENA Middle Ea… EMP 2012 8.34 1.95 4.17 3.76 3.88 3.92 3.89 3.26 3.93 # # ℹ 3 more variables: OTH , SUM , AGR_gmed rm(GGDC10S) ``` Weights are easily added to any grouped transformation: ```r # This subtracts weighted group means from the data, using SUM column as weights.. GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fmean(SUM, "-") %>% head # # A tibble: 6 × 13 # Variable Country SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 5 VA BWA 37.5 -1301. -13317. -2965. -529. -2746. -6540. -2157. -4431. -7551. -2613. # 6 VA BWA 39.3 -1302. -13318. -2964. -529. -2745. -6540. -2156. -4431. -7550. -2613. ``` Sequential operations are also easily performed: ```r # This scales and then subtracts the median GGDC10S %>% fselect(Variable, Country, AGR:SUM) %>% fgroup_by(Variable, Country) %>% fsd(TRA = "/") %>% fmedian(TRA = "-") # # A tibble: 5,027 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # * # 1 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 5 VA BWA -0.182 -0.235 -0.183 -0.245 -0.118 -0.0820 -0.0724 -0.0661 -0.108 -0.0848 -0.146 # 6 VA BWA -0.183 -0.235 -0.183 -0.245 -0.117 -0.0817 -0.0722 -0.0660 -0.108 -0.0846 -0.146 # 7 VA BWA -0.180 -0.235 -0.183 -0.245 -0.117 -0.0813 -0.0720 -0.0659 -0.107 -0.0843 -0.145 # 8 VA BWA -0.177 -0.235 -0.183 -0.245 -0.117 -0.0826 -0.0724 -0.0659 -0.107 -0.0841 -0.146 # 9 VA BWA -0.174 -0.235 -0.183 -0.245 -0.117 -0.0823 -0.0717 -0.0661 -0.108 -0.0848 -0.146 # 10 VA BWA -0.173 -0.234 -0.182 -0.243 -0.115 -0.0821 -0.0715 -0.0660 -0.108 -0.0846 -0.145 # # ℹ 5,017 more rows # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Of course it is also possible to combine multiple functions as in the aggregation section, or to add variables to existing data: ```r # This adds a groupwise observation count next to each column add_vars(GGDC10S, seq(7,27,2)) <- GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>% fnobs("replace_fill") %>% add_stub("N_") head(GGDC10S) # # A tibble: 6 × 27 # Country Regioncode Region Variable Year AGR N_AGR MIN N_MIN MAN N_MAN PU N_PU CON # # 1 BWA SSA Sub-sa… VA 1960 NA 47 NA 47 NA 47 NA 47 NA # 2 BWA SSA Sub-sa… VA 1961 NA 47 NA 47 NA 47 NA 47 NA # 3 BWA SSA Sub-sa… VA 1962 NA 47 NA 47 NA 47 NA 47 NA # 4 BWA SSA Sub-sa… VA 1963 NA 47 NA 47 NA 47 NA 47 NA # 5 BWA SSA Sub-sa… VA 1964 16.3 47 3.49 47 0.737 47 0.104 47 0.660 # 6 BWA SSA Sub-sa… VA 1965 15.7 47 2.50 47 1.02 47 0.135 47 1.35 # # ℹ 13 more variables: N_CON , WRT , N_WRT , TRA , N_TRA , FIRE , # # N_FIRE , GOV , N_GOV , OTH , N_OTH , SUM , N_SUM rm(GGDC10S) ``` There are lots of other examples one could construct using the 10 operations and 14 functions listed above, the examples provided just outline the suggested programming basics. Performance considerations make it very much worthwhile to spend some time and think how complex operations can be implemented in this programming framework, before defining some function in R and applying it to data using `dplyr::mutate`. ### 2.3 More Control using the `TRA` Function Towards this end, calling `TRA()` directly also facilitates more complex and customized operations. Behind the scenes of the `TRA = ...` argument, the *Fast Statistical Functions* first compute the grouped statistics on all columns of the data, and these statistics are then directly fed into a C++ function that uses them to replace or sweep them out of data points in one of the 10 ways described above. This function can also be called directly by the name of `TRA`. Fundamentally, `TRA` is a generalization of `base::sweep` for column-wise grouped operations^[Row-wise operations are not supported by TRA.]. Direct calls to `TRA` enable more control over inputs and outputs. The two operations below are equivalent, although the first is slightly more efficient as it only requires one method dispatch and one check of the inputs: ```r # This divides by the product GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fprod(TRA = "/") %>% head # # A tibble: 6 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA NA NA # 5 1.29e-105 2.81e-127 1.40e-101 4.44e-74 4.19e-102 3.97e-113 6.91e-92 1.01e-97 2.51e-117 # 6 1.24e-105 2.00e-127 1.94e-101 5.75e-74 8.55e-102 4.49e-113 8.08e-92 1.13e-97 2.96e-117 # # ℹ 2 more variables: OTH , SUM # Same thing GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% TRA(fprod(., keep.group_vars = FALSE), "/") %>% head # [same as TRA(.,fprod(., keep.group_vars = FALSE),"/")] # # A tibble: 6 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 NA NA NA NA NA NA NA NA NA # 2 NA NA NA NA NA NA NA NA NA # 3 NA NA NA NA NA NA NA NA NA # 4 NA NA NA NA NA NA NA NA NA # 5 1.29e-105 2.81e-127 1.40e-101 4.44e-74 4.19e-102 3.97e-113 6.91e-92 1.01e-97 2.51e-117 # 6 1.24e-105 2.00e-127 1.94e-101 5.75e-74 8.55e-102 4.49e-113 8.08e-92 1.13e-97 2.96e-117 # # ℹ 2 more variables: OTH , SUM ``` `TRA.grouped_df` was designed such that it matches the columns of the statistics (aggregated columns) to those of the original data, and only transforms matching columns while returning the whole data frame. Thus it is easily possible to only apply a transformation to the first two sectors: ```r # This only demeans Agriculture (AGR) and Mining (MIN) GGDC10S %>% fgroup_by(Variable, Country) %>% TRA(fselect(., AGR, MIN) %>% fmean(keep.group_vars = FALSE), "-") %>% head # # A tibble: 6 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA SSA Sub-s… VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-s… VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA SSA Sub-s… VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA SSA Sub-s… VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA SSA Sub-s… VA 1964 -446. -4505. 0.737 0.104 0.660 6.24 1.66 1.12 4.82 # 6 BWA SSA Sub-s… VA 1965 -446. -4506. 1.02 0.135 1.35 7.06 1.94 1.25 5.70 # # ℹ 2 more variables: OTH , SUM ``` Since `TRA` is already built into all *Fast Statistical Functions* as an argument, it is best used in computations where grouped statistics are computed using some other function. ```r # Same as above, with one line of code using fmean.data.frame and ftransform... GGDC10S %>% ftransform(fmean(list(AGR = AGR, MIN = MIN), list(Variable, Country), TRA = "-")) %>% head # # A tibble: 6 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV # # 1 BWA SSA Sub-s… VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA SSA Sub-s… VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA SSA Sub-s… VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA SSA Sub-s… VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA SSA Sub-s… VA 1964 -446. -4505. 0.737 0.104 0.660 6.24 1.66 1.12 4.82 # 6 BWA SSA Sub-s… VA 1965 -446. -4506. 1.02 0.135 1.35 7.06 1.94 1.25 5.70 # # ℹ 2 more variables: OTH , SUM ``` Another potential use of `TRA` is to do computations in two- or more steps, for example if both aggregated and transformed data are needed, or if computations are more complex and involve other manipulations in-between the aggregating and sweeping part: ```r # Get grouped tibble gGGDC <- GGDC10S %>% fgroup_by(Variable, Country) # Get aggregated data gsumGGDC <- gGGDC %>% fselect(AGR:SUM) %>% fsum head(gsumGGDC) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 88028. 3230. 1.20e5 6307. 4.60e4 1.23e5 4.02e4 3.89e4 1.27e5 6.15e4 6.54e5 # 2 EMP BOL 58817. 3418. 1.43e4 326. 7.49e3 1.72e4 7.04e3 2.72e3 NA 2.41e4 1.35e5 # 3 EMP BRA 1065864. 12773. 4.33e5 22604. 2.19e5 5.28e5 1.27e5 2.74e5 3.29e5 3.54e5 3.36e6 # 4 EMP BWA 8839. 493. 8.49e2 145. 1.19e3 1.71e3 3.93e2 7.21e2 2.87e3 1.30e3 1.85e4 # 5 EMP CHL 44220. 6389. 3.94e4 1850. 1.86e4 4.38e4 1.63e4 1.72e4 NA 6.32e4 2.51e5 # 6 EMP CHN 17264654. 422972. 4.03e6 96364. 1.25e6 1.73e6 8.36e5 2.96e5 1.36e6 1.86e6 2.91e7 # Get transformed (scaled) data head(TRA(gGGDC, gsumGGDC, "/")) # # A tibble: 6 × 16 # Country Regioncode Region Variable Year AGR MIN MAN PU CON WRT # # 1 BWA SSA Sub-sahar… VA 1960 NA NA NA NA NA NA # 2 BWA SSA Sub-sahar… VA 1961 NA NA NA NA NA NA # 3 BWA SSA Sub-sahar… VA 1962 NA NA NA NA NA NA # 4 BWA SSA Sub-sahar… VA 1963 NA NA NA NA NA NA # 5 BWA SSA Sub-sahar… VA 1964 7.50e-4 1.65e-5 1.66e-5 1.03e-5 1.57e-5 6.82e-5 # 6 BWA SSA Sub-sahar… VA 1965 7.24e-4 1.18e-5 2.30e-5 1.33e-5 3.20e-5 7.72e-5 # # ℹ 5 more variables: TRA , FIRE , GOV , OTH , SUM ``` As discussed, whether using the argument to fast statistical functions or `TRA` directly, these data transformations are essentially a two-step process: Statistics are first computed and then used to transform the original data. Although both steps are efficiently done in C++, it would be even more efficient to do them in a single step without materializing all the statistics before transforming the data. Such slightly more efficient functions are provided for the very commonly applied tasks of centering and averaging data by groups (widely known as 'between'-group and 'within'-group transformations), and scaling and centering data by groups (also known as 'standardizing' data). ### 2.4 Faster Centering, Averaging and Standardizing The functions `fbetween` and `fwithin` are slightly more memory efficient implementations of `fmean` invoked with different `TRA` options: ```r GGDC10S %>% # Same as ... %>% fmean(TRA = "replace") fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fbetween %>% tail(2) # # A tibble: 2 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. # 2 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. GGDC10S %>% # Same as ... %>% fmean(TRA = "replace_fill") fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fbetween(fill = TRUE) %>% tail(2) # # A tibble: 2 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. # 2 4444. 34.9 1614. 131. 997. 1307. 799. 320. 2958. NA 12605. GGDC10S %>% # Same as ... %>% fmean(TRA = "-") fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fwithin %>% tail(2) # # A tibble: 2 × 11 # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 742. -7.35 760. 187. 1798. 1713. 1249. 495. 2678. NA 9614. # 2 717. -10.1 734. 194. 1934. 1803. 1266. 512. 2778. NA 9928. ``` Apart from higher speed, `fwithin` has a `mean` argument to assign an arbitrary mean to centered data, the default being `mean = 0`. A very common choice for such an added mean is just the overall mean of the data, which can be added in by invoking `mean = "overall.mean"`: ```r GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) %>% fwithin(mean = "overall.mean") %>% tail(3) # # A tibble: 3 × 13 # Country Variable AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EGY EMP 2527458. 1867903. 5539313. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.71e6 NA 2.16e7 # 2 EGY EMP 2527439. 1867902. 5539251. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.71e6 NA 2.16e7 # 3 EGY EMP 2527413. 1867899. 5539226. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.72e6 NA 2.16e7 ``` This can also be done using weights. The code below uses the `SUM` column as weights, and then for each variable and each group subtracts out the weighted mean, and then adds the overall weighted column mean back to the centered columns. The `SUM` column is just kept as it is and added after the grouping columns. ```r GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) %>% fwithin(SUM, mean = "overall.mean") %>% tail(3) # # A tibble: 3 × 13 # Country Variable SUM AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # # 1 EGY EMP 22020. 429066006. 3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8 NA # 2 EGY EMP 22219. 429065986. 3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8 NA # 3 EGY EMP 22533. 429065961. 3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8 NA ``` Another argument to `fwithin` is the `theta` parameter, allowing partial- or quasi-demeaning operations, e.g. `fwithin(gdata, theta = theta)` is equal to `gdata - theta * fbetween(gdata)`. This is particularly useful to prepare data for variance components (also known as 'random-effects') estimation. Apart from `fbetween` and `fwithin`, the function `fscale` exists to efficiently scale and center data, to avoid sequential calls such as `... %>% fsd(TRA = "/") %>% fmean(TRA = "-")`. ```r # This efficiently scales and centers (i.e. standardizes) the data GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) %>% fscale # # A tibble: 5,027 × 13 # Country Variable AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # * # 1 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 2 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 3 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 4 BWA VA NA NA NA NA NA NA NA NA NA NA NA # 5 BWA VA -0.738 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.596 -0.676 # 6 BWA VA -0.739 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.596 -0.676 # 7 BWA VA -0.736 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.595 -0.676 # 8 BWA VA -0.734 -0.717 -0.668 -0.805 -0.692 -0.604 -0.589 -0.635 -0.655 -0.595 -0.676 # 9 BWA VA -0.730 -0.717 -0.668 -0.805 -0.692 -0.604 -0.588 -0.635 -0.656 -0.596 -0.676 # 10 BWA VA -0.729 -0.716 -0.667 -0.803 -0.690 -0.603 -0.588 -0.635 -0.656 -0.596 -0.675 # # ℹ 5,017 more rows # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` `fscale` also has additional `mean` and `sd` arguments allowing the user to (group-) scale data to an arbitrary mean and standard deviation. Setting `mean = FALSE` just scales the data but preserves the means, and is thus different from `fsd(..., TRA = "/")` which simply divides all values by the standard deviation: ```r # Saving grouped tibble gGGDC <- GGDC10S %>% fgroup_by(Variable, Country) %>% fselect(Country, Variable, AGR:SUM) # Original means head(fmean(gGGDC)) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1420. 52.1 1932. 102. 742. 1.98e3 6.49e2 628. 2043. 9.92e2 1.05e4 # 2 EMP BOL 964. 56.0 235. 5.35 123. 2.82e2 1.15e2 44.6 NA 3.96e2 2.22e3 # 3 EMP BRA 17191. 206. 6991. 365. 3525. 8.51e3 2.05e3 4414. 5307. 5.71e3 5.43e4 # 4 EMP BWA 188. 10.5 18.1 3.09 25.3 3.63e1 8.36e0 15.3 61.1 2.76e1 3.94e2 # 5 EMP CHL 702. 101. 625. 29.4 296. 6.95e2 2.58e2 272. NA 1.00e3 3.98e3 # 6 EMP CHN 287744. 7050. 67144. 1606. 20852. 2.89e4 1.39e4 4929. 22669. 3.10e4 4.86e5 # Mean Preserving Scaling head(fmean(fscale(gGGDC, mean = FALSE))) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1420. 52.1 1932. 102. 742. 1.98e3 6.49e2 628. 2043. 9.92e2 1.05e4 # 2 EMP BOL 964. 56.0 235. 5.35 123. 2.82e2 1.15e2 44.6 NA 3.96e2 2.22e3 # 3 EMP BRA 17191. 206. 6991. 365. 3525. 8.51e3 2.05e3 4414. 5307. 5.71e3 5.43e4 # 4 EMP BWA 188. 10.5 18.1 3.09 25.3 3.63e1 8.36e0 15.3 61.1 2.76e1 3.94e2 # 5 EMP CHL 702. 101. 625. 29.4 296. 6.95e2 2.58e2 272. NA 1.00e3 3.98e3 # 6 EMP CHN 287744. 7050. 67144. 1606. 20852. 2.89e4 1.39e4 4929. 22669. 3.10e4 4.86e5 head(fsd(fscale(gGGDC, mean = FALSE))) # # A tibble: 6 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 EMP ARG 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 # 2 EMP BOL 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 NA 1.00 1.00 # 3 EMP BRA 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 # 4 EMP BWA 1.00 1.00 1.00 1 1.00 1.00 1.00 1 1.00 1.00 1.00 # 5 EMP CHL 1.00 1 1.00 1.00 1.00 1.00 1.00 1.00 NA 1.00 1.00 # 6 EMP CHN 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 ``` One can also set `mean = "overall.mean"`, which group-centers columns on the overall mean as illustrated with `fwithin`. Another interesting option is setting `sd = "within.sd"`. This group-scales data such that every group has a standard deviation equal to the within-standard deviation of the data: ```r # Just using VA data for this example gGGDC <- GGDC10S %>% fsubset(Variable == "VA", Country, AGR:SUM) %>% fgroup_by(Country) # This calculates the within- standard deviation for all columns fsd(num_vars(ungroup(fwithin(gGGDC)))) # AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # 45046972 40122220 75608708 3062688 30811572 44125207 20676901 16030868 20358973 18780869 # SUM # 306429102 # This scales all groups to take on the within- standard deviation while preserving group means fsd(fscale(gGGDC, mean = FALSE, sd = "within.sd")) # # A tibble: 43 × 12 # Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # # 1 ARG 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 2 BOL 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 NA 1.88e7 3.06e8 # 3 BRA 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 4 BWA 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 5 CHL 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 NA 1.88e7 3.06e8 # 6 CHN 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 7 COL 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 NA 1.88e7 3.06e8 # 8 CRI 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 9 DEW 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # 10 DNK 45046972. 40122220. 75608708. 3062688. 3.08e7 4.41e7 2.07e7 1.60e7 2.04e7 1.88e7 3.06e8 # # ℹ 33 more rows ``` A grouped scaling operation with both `mean = "overall.mean"` and `sd = "within.sd"` thus efficiently achieves a harmonization of all groups in the first two moments without changing the fundamental properties (in terms of level and scale) of the data. ### 2.5 Lags / Leads, Differences and Growth Rates This section introduces 3 further powerful *collapse* functions: `flag`, `fdiff` and `fgrowth`. The first function, `flag`, efficiently computes sequences of fully identified lags and leads on time series and panel data. The following code computes 1 fully-identified panel-lag and 1 fully identified panel-lead of each variable in the data: ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% flag(-1:1, Year) # # A tibble: 5,027 × 36 # Country Variable Year F1.AGR AGR L1.AGR F1.MIN MIN L1.MIN F1.MAN MAN L1.MAN F1.PU PU # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 16.3 NA NA 3.49 NA NA 0.737 NA NA 0.104 NA # 5 BWA VA 1964 15.7 16.3 NA 2.50 3.49 NA 1.02 0.737 NA 0.135 0.104 # 6 BWA VA 1965 17.7 15.7 16.3 1.97 2.50 3.49 0.804 1.02 0.737 0.203 0.135 # 7 BWA VA 1966 19.1 17.7 15.7 2.30 1.97 2.50 0.938 0.804 1.02 0.203 0.203 # 8 BWA VA 1967 21.1 19.1 17.7 1.84 2.30 1.97 0.750 0.938 0.804 0.203 0.203 # 9 BWA VA 1968 21.9 21.1 19.1 5.24 1.84 2.30 2.14 0.750 0.938 0.578 0.203 # 10 BWA VA 1969 23.1 21.9 21.1 10.2 5.24 1.84 4.15 2.14 0.750 1.12 0.578 # # ℹ 5,017 more rows # # ℹ 22 more variables: L1.PU , F1.CON , CON , L1.CON , F1.WRT , WRT , # # L1.WRT , F1.TRA , TRA , L1.TRA , F1.FIRE , FIRE , L1.FIRE , # # F1.GOV , GOV , L1.GOV , F1.OTH , OTH , L1.OTH , F1.SUM , # # SUM , L1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` If the time-variable passed does not exactly identify the data (i.e. because of repeated values in each group), all 3 functions will issue appropriate error messages. `flag`, `fdiff` and `fgrowth` support irregular time series and unbalanced panels. It is also possible to omit the time-variable if one is certain that the data is sorted: ```r GGDC10S %>% fselect(Variable, Country,AGR:SUM) %>% fgroup_by(Variable, Country) %>% flag # # A tibble: 5,027 × 13 # Variable Country AGR MIN MAN PU CON WRT TRA FIRE GOV OTH SUM # * # 1 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 2 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 3 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 4 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 5 VA BWA NA NA NA NA NA NA NA NA NA NA NA # 6 VA BWA 16.3 3.49 0.737 0.104 0.660 6.24 1.66 1.12 4.82 2.34 37.5 # 7 VA BWA 15.7 2.50 1.02 0.135 1.35 7.06 1.94 1.25 5.70 2.68 39.3 # 8 VA BWA 17.7 1.97 0.804 0.203 1.35 8.27 2.15 1.36 6.37 2.99 43.1 # 9 VA BWA 19.1 2.30 0.938 0.203 0.897 4.31 1.72 1.54 7.04 3.31 41.4 # 10 VA BWA 21.1 1.84 0.750 0.203 1.22 5.17 2.44 1.03 5.03 2.36 41.1 # # ℹ 5,017 more rows # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` `fdiff` computes sequences of lagged-leaded and iterated differences as well as quasi-differences and log-differences on time series and panel data. The code below computes the 1 and 10 year first and second differences of each variable in the data: ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fdiff(c(1, 10), 1:2, Year) # # A tibble: 5,027 × 47 # Country Variable Year D1.AGR D2.AGR L10D1.AGR L10D2.AGR D1.MIN D2.MIN L10D1.MIN L10D2.MIN D1.MAN # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA NA NA # 6 BWA VA 1965 -0.575 NA NA NA -0.998 NA NA NA 0.282 # 7 BWA VA 1966 1.95 2.53 NA NA -0.525 0.473 NA NA -0.214 # 8 BWA VA 1967 1.47 -0.488 NA NA 0.328 0.854 NA NA 0.134 # 9 BWA VA 1968 1.95 0.488 NA NA -0.460 -0.788 NA NA -0.188 # 10 BWA VA 1969 0.763 -1.19 NA NA 3.41 3.87 NA NA 1.39 # # ℹ 5,017 more rows # # ℹ 35 more variables: D2.MAN , L10D1.MAN , L10D2.MAN , D1.PU , D2.PU , # # L10D1.PU , L10D2.PU , D1.CON , D2.CON , L10D1.CON , L10D2.CON , # # D1.WRT , D2.WRT , L10D1.WRT , L10D2.WRT , D1.TRA , D2.TRA , # # L10D1.TRA , L10D2.TRA , D1.FIRE , D2.FIRE , L10D1.FIRE , # # L10D2.FIRE , D1.GOV , D2.GOV , L10D1.GOV , L10D2.GOV , D1.OTH , # # D2.OTH , L10D1.OTH , L10D2.OTH , D1.SUM , D2.SUM , L10D1.SUM , … # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Log-differences of the form $log(x_t) - log(x_{t-s})$ are also easily computed. ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fdiff(c(1, 10), 1, Year, log = TRUE) # # A tibble: 5,027 × 25 # Country Variable Year Dlog1.AGR L10Dlog1.AGR Dlog1.MIN L10Dlog1.MIN Dlog1.MAN L10Dlog1.MAN # * # 1 BWA VA 1960 NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA # 6 BWA VA 1965 -0.0359 NA -0.336 NA 0.324 NA # 7 BWA VA 1966 0.117 NA -0.236 NA -0.236 NA # 8 BWA VA 1967 0.0796 NA 0.154 NA 0.154 NA # 9 BWA VA 1968 0.0972 NA -0.223 NA -0.223 NA # 10 BWA VA 1969 0.0355 NA 1.05 NA 1.05 NA # # ℹ 5,017 more rows # # ℹ 16 more variables: Dlog1.PU , L10Dlog1.PU , Dlog1.CON , L10Dlog1.CON , # # Dlog1.WRT , L10Dlog1.WRT , Dlog1.TRA , L10Dlog1.TRA , Dlog1.FIRE , # # L10Dlog1.FIRE , Dlog1.GOV , L10Dlog1.GOV , Dlog1.OTH , L10Dlog1.OTH , # # Dlog1.SUM , L10Dlog1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` Finally, it is also possible to compute quasi-differences and quasi-log-differences of the form $x_t - \rho x_{t-s}$ or $log(x_t) - \rho log(x_{t-s})$: ```r GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fdiff(t = Year, rho = 0.95) # # A tibble: 5,027 × 14 # Country Variable Year AGR MIN MAN PU CON WRT TRA FIRE GOV OTH # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA NA NA NA # 6 BWA VA 1965 0.241 -0.824 0.318 0.0359 0.719 1.13 0.363 0.184 1.11 0.454 # 7 BWA VA 1966 2.74 -0.401 -0.163 0.0743 0.0673 1.56 0.312 0.174 0.955 0.449 # 8 BWA VA 1967 2.35 0.427 0.174 0.0101 -0.381 -3.55 -0.323 0.246 0.988 0.465 # 9 BWA VA 1968 2.91 -0.345 -0.141 0.0101 0.365 1.08 0.804 -0.427 -1.66 -0.780 # 10 BWA VA 1969 1.82 3.50 1.43 0.385 2.32 0.841 0.397 0.252 0.818 0.385 # # ℹ 5,017 more rows # # ℹ 1 more variable: SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` The quasi-differencing feature was added to `fdiff` to facilitate the preparation of time series and panel data for least-squares estimations suffering from serial correlation following Cochrane & Orcutt (1949). Finally, `fgrowth` computes growth rates in the same way. By default exact growth rates are computed in percentage terms using $(x_t-x_{t-s}) / x_{t-s} \times 100$ (the default argument is `scale = 100`). The user can also request growth rates obtained by log-differencing using $log(x_t/ x_{t-s}) \times 100$. ```r # Exact growth rates, computed as: (x/lag(x) - 1) * 100 GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year) # # A tibble: 5,027 × 25 # Country Variable Year G1.AGR L10G1.AGR G1.MIN L10G1.MIN G1.MAN L10G1.MAN G1.PU L10G1.PU G1.CON # * # 1 BWA VA 1960 NA NA NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA NA NA # 6 BWA VA 1965 -3.52 NA -28.6 NA 38.2 NA 29.4 NA 104. # 7 BWA VA 1966 12.4 NA -21.1 NA -21.1 NA 50 NA 0 # 8 BWA VA 1967 8.29 NA 16.7 NA 16.7 NA 0 NA -33.3 # 9 BWA VA 1968 10.2 NA -20 NA -20 NA 0 NA 35.7 # 10 BWA VA 1969 3.61 NA 185. NA 185. NA 185. NA 185. # # ℹ 5,017 more rows # # ℹ 13 more variables: L10G1.CON , G1.WRT , L10G1.WRT , G1.TRA , # # L10G1.TRA , G1.FIRE , L10G1.FIRE , G1.GOV , L10G1.GOV , G1.OTH , # # L10G1.OTH , G1.SUM , L10G1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] # Log-difference growth rates, computed as: log(x / lag(x)) * 100 GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year, logdiff = TRUE) # # A tibble: 5,027 × 25 # Country Variable Year Dlog1.AGR L10Dlog1.AGR Dlog1.MIN L10Dlog1.MIN Dlog1.MAN L10Dlog1.MAN # * # 1 BWA VA 1960 NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA # 6 BWA VA 1965 -3.59 NA -33.6 NA 32.4 NA # 7 BWA VA 1966 11.7 NA -23.6 NA -23.6 NA # 8 BWA VA 1967 7.96 NA 15.4 NA 15.4 NA # 9 BWA VA 1968 9.72 NA -22.3 NA -22.3 NA # 10 BWA VA 1969 3.55 NA 105. NA 105. NA # # ℹ 5,017 more rows # # ℹ 16 more variables: Dlog1.PU , L10Dlog1.PU , Dlog1.CON , L10Dlog1.CON , # # Dlog1.WRT , L10Dlog1.WRT , Dlog1.TRA , L10Dlog1.TRA , Dlog1.FIRE , # # L10Dlog1.FIRE , Dlog1.GOV , L10Dlog1.GOV , Dlog1.OTH , L10Dlog1.OTH , # # Dlog1.SUM , L10Dlog1.SUM # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` `fdiff` and `fgrowth` can also perform leaded (forward) differences and growth rates (i.e. `... %>% fgrowth(-c(1, 10), 1:2, Year)` would compute one and 10-year leaded first and second differences). Again it is possible to perform sequential operations: ```r # This computes the 1 and 10-year growth rates, for the current period and lagged by one period GGDC10S %>% fselect(-Region, -Regioncode) %>% fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year) %>% flag(0:1, Year) # # A tibble: 5,027 × 47 # Country Variable Year G1.AGR L1.G1.AGR L10G1.AGR L1.L10G1.AGR G1.MIN L1.G1.MIN L10G1.MIN # * # 1 BWA VA 1960 NA NA NA NA NA NA NA # 2 BWA VA 1961 NA NA NA NA NA NA NA # 3 BWA VA 1962 NA NA NA NA NA NA NA # 4 BWA VA 1963 NA NA NA NA NA NA NA # 5 BWA VA 1964 NA NA NA NA NA NA NA # 6 BWA VA 1965 -3.52 NA NA NA -28.6 NA NA # 7 BWA VA 1966 12.4 -3.52 NA NA -21.1 -28.6 NA # 8 BWA VA 1967 8.29 12.4 NA NA 16.7 -21.1 NA # 9 BWA VA 1968 10.2 8.29 NA NA -20 16.7 NA # 10 BWA VA 1969 3.61 10.2 NA NA 185. -20 NA # # ℹ 5,017 more rows # # ℹ 37 more variables: L1.L10G1.MIN , G1.MAN , L1.G1.MAN , L10G1.MAN , # # L1.L10G1.MAN , G1.PU , L1.G1.PU , L10G1.PU , L1.L10G1.PU , # # G1.CON , L1.G1.CON , L10G1.CON , L1.L10G1.CON , G1.WRT , # # L1.G1.WRT , L10G1.WRT , L1.L10G1.WRT , G1.TRA , L1.G1.TRA , # # L10G1.TRA , L1.L10G1.TRA , G1.FIRE , L1.G1.FIRE , L10G1.FIRE , # # L1.L10G1.FIRE , G1.GOV , L1.G1.GOV , L10G1.GOV , L1.L10G1.GOV , … # # Grouped by: Variable, Country [85 | 59 (7.7) 4-65] ``` ## 3. Benchmarks This section seeks to demonstrate that the functionality introduced in the preceding 2 sections indeed produces code that evaluates substantially faster than native *dplyr*. To do this properly, the different components of a typical piped call (selecting / subsetting, ordering, grouping, and performing some computation) are benchmarked separately on 2 different data sizes. All benchmarks are run on a Windows 8.1 laptop with a 2x 2.2 GHZ Intel i5 processor, 8GB DDR3 RAM and a Samsung 850 EVO SSD hard drive. ### 3.1 Data Benchmarks are run on the original `GGDC10S` data used throughout this vignette and a larger dataset with approx. 1 million observations, obtained by replicating and row-binding `GGDC10S` 200 times while maintaining unique groups. ```r # This shows the groups in GGDC10S GRP(GGDC10S, ~ Variable + Country) # collapse grouping object of length 5027 with 85 ordered groups # # Call: GRP.default(X = GGDC10S, by = ~Variable + Country), X is unsorted # # Distribution of group sizes: # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.00 53.00 62.00 59.14 63.00 65.00 # # Groups with sizes: # EMP.ARG EMP.BOL EMP.BRA EMP.BWA EMP.CHL EMP.CHN # 62 61 62 52 63 62 # --- # VA.TWN VA.TZA VA.USA VA.VEN VA.ZAF VA.ZMB # 63 52 65 63 52 52 # This replicates the data 200 times data <- replicate(200, GGDC10S, simplify = FALSE) # This function adds a number i to the country and variable columns of each dataset uniquify <- function(x, i) ftransform(x, lapply(unclass(x)[c(1,4)], paste0, i)) # Making datasets unique and row-binding them data <- unlist2d(Map(uniquify, data, as.list(1:200)), idcols = FALSE) fdim(data) # [1] 1005400 16 # This shows the groups in the replicated data GRP(data, ~ Variable + Country) # collapse grouping object of length 1005400 with 17000 ordered groups # # Call: GRP.default(X = data, by = ~Variable + Country), X is unsorted # # Distribution of group sizes: # Min. 1st Qu. Median Mean 3rd Qu. Max. # 4.00 53.00 62.00 59.14 63.00 65.00 # # Groups with sizes: # EMP1.ARG1 EMP1.BOL1 EMP1.BRA1 EMP1.BWA1 EMP1.CHL1 EMP1.CHN1 # 62 61 62 52 63 62 # --- # VA99.TWN99 VA99.TZA99 VA99.USA99 VA99.VEN99 VA99.ZAF99 VA99.ZMB99 # 63 52 65 63 52 52 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3184710 170.1 8862174 473.3 NA 8862174 473.3 # Vcells 23965820 182.9 147787078 1127.6 16384 445825141 3401.4 ``` ### 3.1 Selecting, Subsetting, Ordering and Grouping ```r ## Selecting columns # Small microbenchmark(dplyr = select(GGDC10S, Country, Variable, AGR:SUM), collapse = fselect(GGDC10S, Country, Variable, AGR:SUM)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 400.775 410.7585 425.43117 416.396 424.637 820.041 100 # collapse 2.911 3.4645 4.59856 4.469 5.412 15.293 100 # Large microbenchmark(dplyr = select(data, Country, Variable, AGR:SUM), collapse = fselect(data, Country, Variable, AGR:SUM)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 388.926 396.429 412.67730 402.9890 411.0455 728.734 100 # collapse 2.870 3.280 4.44686 3.8335 5.3300 12.669 100 ## Subsetting columns # Small microbenchmark(dplyr = filter(GGDC10S, Variable == "VA"), collapse = fsubset(GGDC10S, Variable == "VA")) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 374.084 394.4405 409.23986 401.0005 414.3050 716.475 100 # collapse 39.278 48.2775 55.85307 55.5550 60.4545 103.320 100 # Large microbenchmark(dplyr = filter(data, Variable == "VA"), collapse = fsubset(data, Variable == "VA")) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 4.487409 5.242752 8.352270 5.653223 6.434048 159.13658 100 # collapse 2.840808 3.082359 3.469128 3.163478 3.302714 16.56047 100 ## Ordering rows # Small microbenchmark(dplyr = arrange(GGDC10S, desc(Country), Variable, Year), collapse = roworder(GGDC10S, -Country, Variable, Year)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 1715.112 1867.4270 1983.4726 2015.109 2080.7500 2367.791 100 # collapse 192.495 232.4085 256.3878 247.968 258.7715 1055.381 100 # Large microbenchmark(dplyr = arrange(data, desc(Country), Variable, Year), collapse = roworder(data, -Country, Variable, Year), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 89.37512 89.37512 101.05180 101.05180 112.72848 112.72848 2 # collapse 66.46703 66.46703 67.45254 67.45254 68.43806 68.43806 2 ## Grouping # Small microbenchmark(dplyr = group_by(GGDC10S, Country, Variable), collapse = fgroup_by(GGDC10S, Country, Variable)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 778.713 815.1825 911.3484 874.2225 960.3840 1529.874 100 # collapse 146.534 157.6245 198.5921 165.0660 177.3455 1484.241 100 # Large microbenchmark(dplyr = group_by(data, Country, Variable), collapse = fgroup_by(data, Country, Variable), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 34.20294 34.62839 34.88041 34.88432 35.07821 35.48279 10 # collapse 27.89972 28.03211 28.55175 28.36954 29.32283 29.54206 10 ## Computing a new column # Small microbenchmark(dplyr = mutate(GGDC10S, NEW = AGR+1), collapse = ftransform(GGDC10S, NEW = AGR+1)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 317.463 321.7270 333.38822 324.9660 333.7810 631.564 100 # collapse 8.897 11.0495 12.95354 12.4435 14.2065 38.991 100 # Large microbenchmark(dplyr = mutate(data, NEW = AGR+1), collapse = ftransform(data, NEW = AGR+1)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 637.878 1084.225 1330.006 1164.6665 1291.2335 15869.05 100 # collapse 210.740 657.025 1021.434 698.3735 781.7675 16725.09 100 ## All combined with pipes # Small microbenchmark(dplyr = filter(GGDC10S, Variable == "VA") %>% select(Country, Year, AGR:SUM) %>% arrange(desc(Country), Year) %>% mutate(NEW = AGR+1) %>% group_by(Country), collapse = fsubset(GGDC10S, Variable == "VA", Country, Year, AGR:SUM) %>% roworder(-Country, Year) %>% ftransform(NEW = AGR+1) %>% fgroup_by(Country)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 2982.340 3416.325 3525.7983 3538.464 3668.516 5034.021 100 # collapse 136.858 186.632 214.4681 211.683 243.130 314.470 100 # Large microbenchmark(dplyr = filter(data, Variable == "VA") %>% select(Country, Year, AGR:SUM) %>% arrange(desc(Country), Year) %>% mutate(NEW = AGR+1) %>% group_by(Country), collapse = fsubset(data, Variable == "VA", Country, Year, AGR:SUM) %>% roworder(-Country, Year) %>% ftransform(NEW = AGR+1) %>% fgroup_by(Country), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 7.917182 7.997378 8.142653 8.109943 8.292291 8.423163 10 # collapse 3.080289 3.104028 3.150153 3.140969 3.188365 3.251259 10 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3184728 170.1 8862174 473.3 NA 8862174 473.3 # Vcells 23970594 182.9 75772825 578.2 16384 445825141 3401.4 ``` ### 3.1 Aggregation ```r ## Grouping the data cgGGDC10S <- fgroup_by(GGDC10S, Variable, Country) %>% fselect(-Region, -Regioncode) gGGDC10S <- group_by(GGDC10S, Variable, Country) %>% fselect(-Region, -Regioncode) cgdata <- fgroup_by(data, Variable, Country) %>% fselect(-Region, -Regioncode) gdata <- group_by(data, Variable, Country) %>% fselect(-Region, -Regioncode) rm(data, GGDC10S) gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3201723 171 8862174 473.3 NA 8862174 473.3 # Vcells 23589381 180 75772825 578.2 16384 445825141 3401.4 ## Conversion of Grouping object: This time would be required extra in all hybrid calls ## i.e. when calling collapse functions on data grouped with dplyr::group_by # Small microbenchmark(GRP(gGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # GRP(gGGDC10S) 8.692 9.2455 10.16021 9.4915 10.086 39.196 100 # Large microbenchmark(GRP(gdata)) # Unit: microseconds # expr min lq mean median uq max neval # GRP(gdata) 885.641 1160.915 1248.258 1237.236 1323.234 1651.398 100 ## Sum # Small microbenchmark(dplyr = summarise_all(gGGDC10S, sum, na.rm = TRUE), collapse = fsum(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 3017.723 3354.1895 3733.4739 3620.9560 3738.441 22135.736 100 # collapse 218.120 227.3655 236.7693 235.1965 244.852 270.805 100 # Large microbenchmark(dplyr = summarise_all(gdata, sum, na.rm = TRUE), collapse = fsum(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 272.9737 279.91024 305.02067 283.59737 303.57122 448.07629 10 # collapse 41.5330 41.63214 41.88717 41.77062 41.96059 42.78662 10 ## Mean # Small microbenchmark(dplyr = summarise_all(gGGDC10S, mean.default, na.rm = TRUE), collapse = fmean(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 4360.104 4596.6740 5125.4194 4754.791 5005.710 37144.852 100 # collapse 169.084 174.3935 185.4594 183.434 194.832 221.933 100 # Large microbenchmark(dplyr = summarise_all(gdata, mean.default, na.rm = TRUE), collapse = fmean(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 623.5123 642.83748 704.39836 681.32260 786.82731 829.74435 10 # collapse 31.7636 31.88037 32.00222 31.99445 32.08209 32.43875 10 ## Median # Small microbenchmark(dplyr = summarise_all(gGGDC10S, median, na.rm = TRUE), collapse = fmedian(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 14399.118 14849.933 16170.3500 14982.5685 15145.892 33613.235 100 # collapse 137.596 164.902 189.2056 178.1245 214.676 248.624 100 # Large microbenchmark(dplyr = summarise_all(gdata, median, na.rm = TRUE), collapse = fmedian(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 2826.83036 2826.83036 2828.12912 2828.12912 2829.42788 2829.42788 2 # collapse 19.95564 19.95564 19.98524 19.98524 20.01485 20.01485 2 ## Standard Deviation # Small microbenchmark(dplyr = summarise_all(gGGDC10S, sd, na.rm = TRUE), collapse = fsd(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 8332.635 8612.5215 9365.1216 8712.766 8989.086 25087.982 100 # collapse 242.228 251.0225 269.7849 273.552 282.326 321.891 100 # Large microbenchmark(dplyr = summarise_all(gdata, sd, na.rm = TRUE), collapse = fsd(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 1375.80363 1375.80363 1409.60358 1409.60358 1443.40352 1443.40352 2 # collapse 46.21713 46.21713 56.88205 56.88205 67.54697 67.54697 2 ## Maximum # Small microbenchmark(dplyr = summarise_all(gGGDC10S, max, na.rm = TRUE), collapse = fmax(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 39964.504 41008.8560 43577.92707 41448.273 44195.1095 58816.550 100 # collapse 68.798 74.7225 87.83389 77.572 100.9215 129.519 100 # Large microbenchmark(dplyr = summarise_all(gdata, max, na.rm = TRUE), collapse = fmax(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 480.83804 490.9982 540.7374 517.86136 533.85723 687.14713 10 # collapse 11.40116 11.7745 11.9366 11.85156 11.94908 13.18318 10 ## First Value # Small microbenchmark(dplyr = summarise_all(gGGDC10S, first), collapse = ffirst(cgGGDC10S, na.rm = FALSE)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 4147.888 4242.249 4801.88966 4383.248 4701.532 19254.215 100 # collapse 11.685 14.227 26.25476 24.764 35.301 137.514 100 # Large microbenchmark(dplyr = summarise_all(gdata, first), collapse = ffirst(cgdata, na.rm = FALSE), times = 10) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 530327.66 558767.393 637499.226 596503.08 672801.103 969373.660 10 # collapse 872.89 999.088 1087.845 1068.87 1204.416 1289.327 10 ## Number of Distinct Values # Small microbenchmark(dplyr = summarise_all(gGGDC10S, n_distinct, na.rm = TRUE), collapse = fndistinct(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 11316.574 11600.847 12573.1010 11759.435 11939.487 31659.667 100 # collapse 189.051 205.164 226.0933 235.422 239.604 443.661 100 # Large microbenchmark(dplyr = summarise_all(gdata, n_distinct, na.rm = TRUE), collapse = fndistinct(cgdata), times = 5) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 2044.13376 2110.16926 2133.91960 2138.07456 2154.39797 2222.82246 5 # collapse 30.65443 30.94582 31.51081 31.17123 31.17972 33.60286 5 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3972309 212.2 8862174 473.3 NA 8862174 473.3 # Vcells 24857303 189.7 75772825 578.2 16384 445825141 3401.4 ``` Below are some additional benchmarks for weighted aggregations and aggregations using the statistical mode, which cannot easily or efficiently be performed with *dplyr*. ```r ## Weighted Mean # Small microbenchmark(fmean(cgGGDC10S, SUM)) # Unit: microseconds # expr min lq mean median uq max neval # fmean(cgGGDC10S, SUM) 195.488 200.4285 218.2836 211.1295 218.8375 444.276 100 # Large microbenchmark(fmean(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fmean(cgdata, SUM) 34.73516 35.28276 35.66689 35.32257 36.44802 36.80722 10 ## Weighted Standard-Deviation # Small microbenchmark(fsd(cgGGDC10S, SUM)) # Unit: microseconds # expr min lq mean median uq max neval # fsd(cgGGDC10S, SUM) 243.048 244.606 249.2181 246.9635 249.444 323.9 100 # Large microbenchmark(fsd(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fsd(cgdata, SUM) 44.905 44.93116 45.15391 45.01095 45.22677 46.14689 10 ## Statistical Mode # Small microbenchmark(fmode(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # fmode(cgGGDC10S) 245.098 248.3575 253.4809 250.6945 253.9335 420.619 100 # Large microbenchmark(fmode(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fmode(cgdata) 40.26151 41.82082 41.63019 41.88382 42.0232 42.0587 10 ## Weighted Statistical Mode # Small microbenchmark(fmode(cgGGDC10S, SUM)) # Unit: microseconds # expr min lq mean median uq max neval # fmode(cgGGDC10S, SUM) 330.993 333.535 337.7744 334.5395 337.3685 447.187 100 # Large microbenchmark(fmode(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fmode(cgdata, SUM) 57.69815 57.78466 57.98187 57.84567 58.09942 58.81835 10 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3971768 212.2 8862174 473.3 NA 8862174 473.3 # Vcells 24853915 189.7 75772825 578.2 16384 445825141 3401.4 ``` ### 3.2 Transformation ```r ## Replacing with group sum # Small microbenchmark(dplyr = mutate_all(gGGDC10S, sum, na.rm = TRUE), collapse = fsum(cgGGDC10S, TRA = "replace_fill")) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 13088.102 13223.340 14388.9000 13359.7680 14380.05 29060.554 100 # collapse 238.456 273.757 292.1693 293.9905 312.01 388.106 100 # Large microbenchmark(dplyr = mutate_all(gdata, sum, na.rm = TRUE), collapse = fsum(cgdata, TRA = "replace_fill"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 391.63618 679.62609 662.91807 716.40975 729.7527 749.4973 10 # collapse 49.63788 50.24189 61.77658 55.18416 63.4596 111.6039 10 ## Dividing by group sum # Small microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) x/sum(x, na.rm = TRUE)), collapse = fsum(cgGGDC10S, TRA = "/")) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 13058.992 13203.8450 14294.3733 13321.41 13880.796 42300.028 100 # collapse 242.884 268.5295 278.8541 274.29 294.585 330.255 100 # Large microbenchmark(dplyr = mutate_all(gdata, function(x) x/sum(x, na.rm = TRUE)), collapse = fsum(cgdata, TRA = "/"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 474.9046 654.6199 796.14248 907.32863 942.32567 999.2501 10 # collapse 49.3542 50.9056 84.66647 52.05635 74.51705 325.4319 10 ## Centering # Small microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) x-mean.default(x, na.rm = TRUE)), collapse = fwithin(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 14460.04 14769.4095 15977.4942 14859.815 15013.421 37113.077 100 # collapse 203.77 229.7845 246.5043 242.638 266.664 293.191 100 # Large microbenchmark(dplyr = mutate_all(gdata, function(x) x-mean.default(x, na.rm = TRUE)), collapse = fwithin(cgdata), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 893.06503 925.50231 1217.2225 1259.34620 1445.254 1545.5490 10 # collapse 43.90731 56.97093 143.4797 73.39498 152.872 429.3341 10 ## Centering and Scaling (Standardizing) # Small microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) (x-mean.default(x, na.rm = TRUE))/sd(x, na.rm = TRUE)), collapse = fscale(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr 20275.033 21145.524 24976.1242 22214.190 25194.0285 79869.435 100 # collapse 277.775 304.958 323.3613 314.388 338.2705 437.388 100 # Large microbenchmark(dplyr = mutate_all(gdata, function(x) (x-mean.default(x, na.rm = TRUE))/sd(x, na.rm = TRUE)), collapse = fscale(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr 2118.97696 2118.97696 2315.9282 2315.9282 2512.87938 2512.87938 2 # collapse 60.17144 60.17144 60.6284 60.6284 61.08537 61.08537 2 ## Lag # Small microbenchmark(dplyr_unordered = mutate(gGGDC10S, across(everything(), dplyr::lag)), collapse_unordered = flag(cgGGDC10S), dplyr_ordered = mutate(gGGDC10S, across(everything(), \(x) dplyr::lag(x, order_by = Year))), collapse_ordered = flag(cgGGDC10S, t = Year)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr_unordered 14495.386 14796.101 17579.85413 15265.3250 15889.7550 49137.721 100 # collapse_unordered 48.544 75.071 90.29225 86.6330 109.6545 225.377 100 # dplyr_ordered 24893.437 25327.607 27521.59809 25904.9275 27136.2190 51312.074 100 # collapse_ordered 80.196 107.953 120.85160 117.5675 131.6715 189.051 100 # Large microbenchmark(dplyr_unordered = mutate(gdata, across(everything(), dplyr::lag)), collapse_unordered = flag(cgdata), dplyr_ordered = mutate(gdata, across(everything(), \(x) dplyr::lag(x, order_by = Year))), collapse_ordered = flag(cgdata, t = Year), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr_unordered 3461.11500 3461.11500 3471.95821 3471.95821 3482.80142 3482.80142 2 # collapse_unordered 13.71897 13.71897 211.59809 211.59809 409.47721 409.47721 2 # dplyr_ordered 5786.57522 5786.57522 6291.90389 6291.90389 6797.23256 6797.23256 2 # collapse_ordered 25.14399 25.14399 35.36102 35.36102 45.57806 45.57806 2 ## First-Difference (unordered) # Small microbenchmark(dplyr_unordered = mutate_all(gGGDC10S, function(x) x - dplyr::lag(x)), collapse_unordered = fdiff(cgGGDC10S)) # Unit: microseconds # expr min lq mean median uq max neval # dplyr_unordered 25613.274 25878.0725 27951.41954 26257.3225 27226.808 43048.893 100 # collapse_unordered 56.539 72.3035 95.72147 91.6965 102.664 254.077 100 # Large microbenchmark(dplyr_unordered = mutate_all(gdata, function(x) x - dplyr::lag(x)), collapse_unordered = fdiff(cgdata), times = 2) # Unit: milliseconds # expr min lq mean median uq max neval # dplyr_unordered 3287.88487 3287.88487 3425.69703 3425.69703 3563.509 3563.509 2 # collapse_unordered 16.58971 16.58971 23.36885 23.36885 30.148 30.148 2 gc() # used (Mb) gc trigger (Mb) limit (Mb) max used (Mb) # Ncells 3978800 212.5 8862175 473.3 NA 8862175 473.3 # Vcells 24870572 189.8 72805912 555.5 16384 445825141 3401.4 ``` Below again some benchmarks for transformations not easily of efficiently performed with *dplyr*, such as centering on the overall mean, mean-preserving scaling, weighted scaling and centering, sequences of lags / leads, (iterated) panel-differences and growth rates. ```r # Centering on overall mean microbenchmark(fwithin(cgdata, mean = "overall.mean"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(cgdata, mean = "overall.mean") 44.66782 48.03445 52.04073 50.07953 53.67134 71.13221 10 # Weighted Centering microbenchmark(fwithin(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fwithin(cgdata, SUM) 40.45204 42.86833 46.55326 46.18277 47.28202 57.82673 10 microbenchmark(fwithin(cgdata, SUM, mean = "overall.mean"), times = 10) # Unit: milliseconds # expr min lq mean median uq max # fwithin(cgdata, SUM, mean = "overall.mean") 39.99279 40.32256 43.0638 40.60269 41.34366 54.45542 # neval # 10 # Weighted Scaling and Standardizing microbenchmark(fsd(cgdata, SUM, TRA = "/"), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fsd(cgdata, SUM, TRA = "/") 50.19536 50.9145 55.12553 53.23862 56.27094 67.46816 10 microbenchmark(fscale(cgdata, SUM), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fscale(cgdata, SUM) 54.14792 57.64584 60.83251 59.88025 61.16425 72.31928 10 # Sequence of lags and leads microbenchmark(flag(cgdata, -1:1), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # flag(cgdata, -1:1) 26.03902 48.02695 194.8518 257.0652 264.5479 276.5348 10 # Iterated difference microbenchmark(fdiff(cgdata, 1, 2), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fdiff(cgdata, 1, 2) 38.76001 39.83896 44.93731 41.08887 48.98348 63.42528 10 # Growth Rate microbenchmark(fgrowth(cgdata,1), times = 10) # Unit: milliseconds # expr min lq mean median uq max neval # fgrowth(cgdata, 1) 11.58627 13.81528 18.05776 14.03489 22.34279 31.15811 10 ``` ## References Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), *Routledge Handbook of Industry and Development.* (pp. 65-83). Routledge. Cochrane, D. & Orcutt, G. H. (1949). "Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Terms". *Journal of the American Statistical Association.* 44 (245): 32–61. Prais, S. J. & Winsten, C. B. (1954). "Trend Estimators and Serial Correlation". *Cowles Commission Discussion Paper No. 383.* Chicago. collapse/inst/doc/collapse_documentation.html0000644000176200001440000003062215202627532021226 0ustar liggesusers collapse Documentation and Resources

collapse Documentation and Resources

Sebastian Krantz

2026-05-18

collapse is a C/C++ based package for data transformation and statistical computing in R. It’s aims are:

  1. To facilitate complex data transformation, exploration and computing tasks in R.
  2. To help make R code fast, flexible, parsimonious and programmer friendly.

Documentation comes in 7 different forms:

Built-In Structured Documentation

After installing collapse, you can call help("collapse-documentation") which will produce a central help page providing a broad overview of the entire functionality of the package, including direct links to all function documentation pages and links to 13 further topical documentation pages (names in .COLLAPSE_TOPICS) describing how clusters of related functions work together.

Thus collapse comes with a fully structured hierarchical documentation which you can browse within R - and that provides everything necessary to fully understand the package. The Documentation is also available online.

The package page under help("collapse-package") provides some general information about the package and its design philosophy, as well as a compact set of examples covering important functionality.

Reading help("collapse-package") and help("collapse-documentation") is the most comprehensive way to get acquainted with the package. help("collapse-documentation") is always the most up-to-date resource.

DeepWiki

DeepWiki is an AI-powered platform designed to automatically generate structured, interactive documentation for software repositories, primarily on GitHub. Developed by Cognition AI—the same laboratory behind the autonomous AI engineer Devin—it serves as a dynamic, “Wikipedia-like†encyclopedia for codebases.

While not more comprehensive or accurate than the structured documentation, it is great to learn more about the internal structure of collapse and use a chatbot (Devin) to ask questions about or write code using collapse.

You can access the collapse DeepWiki here.

JSS Article

The collapse article is published in the Journal of Statistical Software (volume 116, issue 1). If you want to ‘read something concise’ about collapse, this is the best place to start.

Cheatsheet

A fairly up-to-date (v2.0) cheatsheet compactly summarizes the package.

Vignettes

Updated vignettes are

The other vignettes (only available online) do not cover major features introduced in versions >= 1.7, but contain much useful information and examples:

  • Introduction to collapse : Introduces key features in a structured way

  • collapse and dplyr : Demonstrates the integration of collapse with dplyr / tidyverse workflows and associated performance improvements

  • collapse and plm: Demonstrates the integration of collapse with plm and shows examples of efficient programming with panel data

  • collapse and data.table: Shows how collapse and data.table may be used together in a harmonious way

Blog

I maintain a blog linked to Rbloggers.com where I introduced collapse with some compact posts covering central functionality. Among these, the post about programming with collapse is useful for developers.

Presentations and Slides

  • I have presented collapse (v1.8) in some level of detail at useR 2022. A 2h video recording that provides a quite comprehensive introduction is available here. The corresponding slides are available here.

  • I have recently presented collapse (v2.1) and the fastverse at a workshop on “Speeding Up Empirical Research: Tools and Techniques for Fast Computing†organized by the Bank of Portugal in December 2025. My 45-minute talk focused on two advanced applications in international trade and spatial network analysis/package development. You can find the materials (slides and recording) here.

collapse/inst/doc/collapse_and_plm.html0000644000176200001440000323356015202627531017777 0ustar liggesusers collapse and plm

collapse and plm

Fast Transformation and Exploration of Panel Data

Sebastian Krantz

2021-06-27

This vignette focuses on the integration of collapse and the popular plm (‘Linear Models for Panel Data’) package by Yves Croissant, Giovanni Millo and Kevin Tappe. It will demonstrate the utility of the pseries and pdata.frame classes introduced in plm together with the corresponding methods for fast collapse functions (implemented in C or C++), to extend and facilitate extremely fast computations on panel-vectors and panel data frames (20-100 times faster than native plm). The collapse package should enable R programmers to - with very little effort - write high-performance code in the domain of panel data exploration and panel data econometrics.


Notes:

  • To learn more about collapse, see the ‘Introduction to collapse’ vignette or the built-in structured documentation available under help("collapse-documentation") after installing the package. In addition help("collapse-package") provides a compact set of examples for quick-start.

  • Documentation and vignettes can also be viewed online.


The vignette is structured as follows:

  • Part 1 introduces collapse’s fast functions and associated transformation operators to compute various transformations on panel data, and delivers some benchmarks.

  • Part 2 uses these functions to explore panel data a bit and introduce additional functions for summary statistics, panel-autocorrelations and testing fixed effects.

  • Part 3 finally provides an example programming application by coding a slightly extended and very efficient Hausman and Taylor (1981) estimator.

For this vignette we will use a dataset (wlddev) supplied with collapse containing a panel of 5 key development indicators taken from the World Bank Development Indicators Database:

library(collapse)

head(wlddev)
#       country iso3c       date year decade     region     income  OECD PCGDP LIFEEX GINI       ODA
# 1 Afghanistan   AFG 1961-01-01 1960   1960 South Asia Low income FALSE    NA 32.446   NA 116769997
# 2 Afghanistan   AFG 1962-01-01 1961   1960 South Asia Low income FALSE    NA 32.962   NA 232080002
# 3 Afghanistan   AFG 1963-01-01 1962   1960 South Asia Low income FALSE    NA 33.471   NA 112839996
# 4 Afghanistan   AFG 1964-01-01 1963   1960 South Asia Low income FALSE    NA 33.971   NA 237720001
# 5 Afghanistan   AFG 1965-01-01 1964   1960 South Asia Low income FALSE    NA 34.463   NA 295920013
# 6 Afghanistan   AFG 1966-01-01 1965   1960 South Asia Low income FALSE    NA 34.948   NA 341839996
#       POP
# 1 8996973
# 2 9169410
# 3 9351441
# 4 9543205
# 5 9744781
# 6 9956320

fnobs(wlddev)      # This column-wise counts the number of observations
# country   iso3c    date    year  decade  region  income    OECD   PCGDP  LIFEEX    GINI     ODA 
#   13176   13176   13176   13176   13176   13176   13176   13176    9470   11670    1744    8608 
#     POP 
#   12919

fndistinct(wlddev) # This counts the number of distinct values
# country   iso3c    date    year  decade  region  income    OECD   PCGDP  LIFEEX    GINI     ODA 
#     216     216      61      61       7       7       4       2    9470   10548     368    7832 
#     POP 
#   12877

Part 1: Fast Transformation of Panel Data

First let us convert this data to a plm panel data.frame (class pdata.frame):

library(plm)

# This creates a panel data frame
pwlddev <- pdata.frame(wlddev, index = c("iso3c", "year"))

str(pwlddev, give.attr = FALSE)
# Classes 'pdata.frame' and 'data.frame':   13176 obs. of  13 variables:
#  $ country: 'pseries' Named chr  "Aruba" "Aruba" "Aruba" "Aruba" ...
#  $ iso3c  : Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ date   : pseries, format: "1961-01-01" "1962-01-01" "1963-01-01" ...
#  $ year   : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ...
#  $ decade : 'pseries' Named int  1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ...
#  $ region : Factor w/ 7 levels "East Asia & Pacific",..: 3 3 3 3 3 3 3 3 3 3 ...
#  $ income : Factor w/ 4 levels "High income",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ OECD   : 'pseries' Named logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
#  $ PCGDP  : 'pseries' Named num  NA NA NA NA NA NA NA NA NA NA ...
#  $ LIFEEX : 'pseries' Named num  65.7 66.1 66.4 66.8 67.1 ...
#  $ GINI   : 'pseries' Named num  NA NA NA NA NA NA NA NA NA NA ...
#  $ ODA    : 'pseries' Named num  NA NA NA NA NA NA NA NA NA NA ...
#  $ POP    : 'pseries' Named num  54211 55438 56225 56695 57032 ...

# A pdata.frame has an index attribute attached [retrieved using index(pwlddev) or attr(pwlddev, "index")]
str(index(pwlddev))
# Classes 'pindex' and 'data.frame':    13176 obs. of  2 variables:
#  $ iso3c: Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ...

# This shows the individual and time dimensions
pdim(pwlddev)
# Balanced Panel: n = 216, T = 61, N = 13176

A plm::pdata.frame is a data.frame with panel identifiers attached as a list of factors in an index attribute (non-factor index variables are converted to factor). Each column in that data.frame is a Panel Series (plm::pseries), which also has the panel identifiers attached:

# Panel Series of GDP per Capita and Life-Expectancy at Birth
PCGDP <- pwlddev$PCGDP
LIFEEX <- pwlddev$LIFEEX
str(LIFEEX)
#  'pseries' Named num [1:13176] 65.7 66.1 66.4 66.8 67.1 ...
#  - attr(*, "names")= chr [1:13176] "ABW-1960" "ABW-1961" "ABW-1962" "ABW-1963" ...
#  - attr(*, "index")=Classes 'pindex' and 'data.frame':    13176 obs. of  2 variables:
#   ..$ iso3c: Factor w/ 216 levels "ABW","AFG","AGO",..: 1 1 1 1 1 1 1 1 1 1 ...
#   ..$ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ...

Now that we have explored the basic data structures provided in the plm package, let’s compute some transformations on them:

1.1 Between and Within Transformations

The functions fbetween and fbetween can be used to compute efficient between and within transformations on panel vectors and panel data.frames:

# Between-Transformations
head(fbetween(LIFEEX))                        # Between individual (default)
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
# 72.40653 72.40653 72.40653 72.40653 72.40653 72.40653

head(fbetween(LIFEEX, effect = "year"))       # Between time
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
# 53.91206 54.47441 54.85718 55.20272 55.66802 56.12963

# Within-Transformations
head(fwithin(LIFEEX))                         # Within individuals (default)
#  ABW-1960  ABW-1961  ABW-1962  ABW-1963  ABW-1964  ABW-1965 
# -6.744533 -6.332533 -5.962533 -5.619533 -5.293533 -4.971533

head(fwithin(LIFEEX, effect = "year"))        # Within time
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
# 11.74994 11.59959 11.58682 11.58428 11.44498 11.30537

by default na.rm = TRUE thus both functions skip (preserve) missing values in the data (which is the default for all collapse functions). For fbetween the output behavior can be altered with the option fill: Setting fill = TRUE will compute the group-means on the complete cases in each group (as long as na.rm = TRUE), but replace all values in each group with the group mean (hence overwriting or ‘filling up’ missing values):

# This preserves missing values in the output
head(fbetween(PCGDP), 30)
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 
#       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA 
# ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 
#       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA 
# ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 
#       NA       NA       NA       NA 25413.84 25413.84 25413.84 25413.84

# This replaces all individuals with the group mean
head(fbetween(PCGDP, fill = TRUE), 30)
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 
# 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 
# ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 
# 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 
# ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 
# 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84 25413.84

In fwithin the mean argument allows to set an arbitrary data mean (different from 0) after the data is centered. In grouped centering task, as sensible choice for such an added mean would be the overall mean of the data series, enabled by the option mean = "overall.mean". This will add the overall mean of the series back to the data after subtracting out group means, and thus preserve the level of the data (and will only change the intercept when employed in a regression):

# This performed standard grouped centering
head(fwithin(LIFEEX))
#  ABW-1960  ABW-1961  ABW-1962  ABW-1963  ABW-1964  ABW-1965 
# -6.744533 -6.332533 -5.962533 -5.619533 -5.293533 -4.971533

# This adds the overall average Life-Expectancy (across countries) to the country-demeaned series
head(fwithin(LIFEEX, mean = "overall.mean"))
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
# 57.55177 57.96377 58.33377 58.67677 59.00277 59.32477

fbetween and fwithin can also be applied to pdata.frame’s where they will perform these computations variable by variable:

head(fbetween(num_vars(pwlddev)), 3)
#            decade PCGDP   LIFEEX GINI ODA      POP
# ABW-1960 1985.574    NA 72.40653   NA  NA 76268.63
# ABW-1961 1985.574    NA 72.40653   NA  NA 76268.63
# ABW-1962 1985.574    NA 72.40653   NA  NA 76268.63

head(fbetween(num_vars(pwlddev), fill = TRUE), 3)
#            decade    PCGDP   LIFEEX GINI      ODA      POP
# ABW-1960 1985.574 25413.84 72.40653   NA 33245000 76268.63
# ABW-1961 1985.574 25413.84 72.40653   NA 33245000 76268.63
# ABW-1962 1985.574 25413.84 72.40653   NA 33245000 76268.63

head(fwithin(num_vars(pwlddev)), 3)
#             decade PCGDP    LIFEEX GINI ODA       POP
# ABW-1960 -25.57377    NA -6.744533   NA  NA -22057.63
# ABW-1961 -25.57377    NA -6.332533   NA  NA -20830.63
# ABW-1962 -25.57377    NA -5.962533   NA  NA -20043.63

head(fwithin(num_vars(pwlddev), mean = "overall.mean"), 3)
#          decade PCGDP   LIFEEX GINI ODA      POP
# ABW-1960   1960    NA 57.55177   NA  NA 24223914
# ABW-1961   1960    NA 57.96377   NA  NA 24225141
# ABW-1962   1960    NA 58.33377   NA  NA 24225928

Now next to fbetween and fwithin there also exist short versions B and W, which are referred to as transformation operators. These are essentially wrappers around fbetween and fwithin and provide the same functionality, but are more parsimonious to employ in regression formulas and also offer additional features when applied to panel data.frames. For panel series, B and W are exact analogues to fbetween and fwithin, just under a shorter name:

identical(fbetween(PCGDP), B(PCGDP))
# [1] TRUE
identical(fbetween(PCGDP, fill = TRUE), B(PCGDP, fill = TRUE))
# [1] TRUE
identical(fwithin(PCGDP), W(PCGDP))
# [1] TRUE
identical(fwithin(PCGDP, mean = "overall.mean"), W(PCGDP, mean = "overall.mean"))
# [1] TRUE

When applied to panel data.frames, B and W offer some additional utility by (a) allowing you to select columns to transform using the cols argument (default is cols = is.numeric, so by default all numeric columns will be selected for transformation), (b) allowing you to add a prefix to the transformed columns with the stub argument (default is stub = "B." for B and stub = "W." for W) and (c) preserving the panel-id’s with the keep.ids argument (default keep.ids = TRUE):

head(B(pwlddev), 3)
#          iso3c year B.decade B.PCGDP B.LIFEEX B.GINI B.ODA    B.POP
# ABW-1960   ABW 1960 1985.574      NA 72.40653     NA    NA 76268.63
# ABW-1961   ABW 1961 1985.574      NA 72.40653     NA    NA 76268.63
# ABW-1962   ABW 1962 1985.574      NA 72.40653     NA    NA 76268.63

head(W(pwlddev, cols = 9:12), 3) # Here using the cols argument
#          iso3c year W.PCGDP  W.LIFEEX W.GINI W.ODA
# ABW-1960   ABW 1960      NA -6.744533     NA    NA
# ABW-1961   ABW 1961      NA -6.332533     NA    NA
# ABW-1962   ABW 1962      NA -5.962533     NA    NA

fbetween / B and fwithin / W also support weighted computations. This of course applies more to panel-survey settings, but for the sake of illustration suppose we wanted to weight our between and within transformations by the population of these countries:

# This replaces values by the POP-weighted group mean and also preserves the weight variable (POP, argument keep.w = TRUE)
head(B(pwlddev, w = ~ POP), 3)
#          iso3c year   POP B.decade B.PCGDP B.LIFEEX B.GINI B.ODA
# ABW-1960   ABW 1960 54211 1988.976      NA 72.96257     NA    NA
# ABW-1961   ABW 1961 55438 1988.976      NA 72.96257     NA    NA
# ABW-1962   ABW 1962 56225 1988.976      NA 72.96257     NA    NA

# This centers values on the POP-weighted group mean
head(W(pwlddev, w = ~ POP, cols = c("PCGDP","LIFEEX","GINI")), 3)
#          iso3c year   POP W.PCGDP  W.LIFEEX W.GINI
# ABW-1960   ABW 1960 54211      NA -7.300566     NA
# ABW-1961   ABW 1961 55438      NA -6.888566     NA
# ABW-1962   ABW 1962 56225      NA -6.518566     NA

# This centers values on the POP-weighted group mean and also adds the overall POP-weighted mean of the data
head(W(pwlddev, w = ~ POP, cols = c("PCGDP","LIFEEX","GINI"), mean = "overall.mean"), 3)
#          iso3c year   POP W.PCGDP W.LIFEEX W.GINI
# ABW-1960   ABW 1960 54211      NA 58.58012     NA
# ABW-1961   ABW 1961 55438      NA 58.99212     NA
# ABW-1962   ABW 1962 56225      NA 59.36212     NA

As shown above, with B and W the weight column can also be passed as a formula or character string, whereas fbetween and fwithin require the all inputs to be passed directly in terms of data (i.e. fbetween(get_vars(pwlddev, 9:11), w = pwlddev$POP)), and the weight vector or id columns are never preserved in the output. Therefore in most applications B and W are probably more convenient for quick use, whereas fbetween and fwithin are the preferred programmers choice, also because they have a little less R-overhead which makes them a tiny bit faster.

1.2 Higher-Dimensional Between and Within Transformations

Analogous to fbetween / B and fwithin / W, collapse provides a duo of functions and operators fhdbetween / HDB and fhdwithin / HDW to efficiently average and center data on multiple groups. The credit herefore goes to Laurent Berge, the author of the fixest package who wrote an efficient C-implementation of the alternating-projections algorithm to perform this task. fhdbetween / HDB and fhdwithin / HDW enrich this implementation (available in the function fixest::demean) by providing more options regarding missing values, and also allowing continuous covariates and (full) interactions to be projected out alongside factors. The methods for pseries and pdata.frame’s are however rather simple, as they simply simultaneously center panel-vectors on various panel-identifiers in the index (which can be more than 2, the default is to center on all identifiers):

# This simultaneously averages Life-Expectancy across countries and years
head(HDB(LIFEEX)) # (same as running a regression on country and year dummies and taking the fitted values)
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
# 62.36179 62.85981 63.24258 63.65245 64.11774 64.52503

# This simultaneously centers Life-Expectenacy on countries and years
head(HDW(LIFEEX)) # (same as running a regression on country and year dummies and taking the residuals)
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
# 3.300210 3.214193 3.201424 3.134554 2.995255 2.909975

The architecture of fhdbetween / HDB and fhdwithin / HDW differs a bit from fbetween / B and fwithin / W. This is essentially a consequence of the underlying C++-implementation (accessed through fixest::demean), which was not built to accommodate missing values. fhdbetween / HDB and fhdwithin / HDW therefore both have an argument fill = TRUE (the default), which stipulates that missing values in the data are preserved in the output. The collapse default na.rm = TRUE again ensures that only complete cases are used for the computation:

# Missing values are preserved in the output when fill = TRUE (the default)
head(HDB(PCGDP), 30)
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 ABW-1966 ABW-1967 ABW-1968 ABW-1969 ABW-1970 
#       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA 
# ABW-1971 ABW-1972 ABW-1973 ABW-1974 ABW-1975 ABW-1976 ABW-1977 ABW-1978 ABW-1979 ABW-1980 ABW-1981 
#       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA       NA 
# ABW-1982 ABW-1983 ABW-1984 ABW-1985 ABW-1986 ABW-1987 ABW-1988 ABW-1989 
#       NA       NA       NA       NA 21833.32 22132.25 22479.20 22772.31

# When fill = FALSE, only the complete cases are returned
nofill <- HDB(PCGDP, fill = FALSE)
head(nofill, 30)
# ABW-1986 ABW-1987 ABW-1988 ABW-1989 ABW-1990 ABW-1991 ABW-1992 ABW-1993 ABW-1994 ABW-1995 ABW-1996 
# 21833.32 22132.25 22479.20 22772.31 23064.29 23060.00 23089.75 23115.36 23343.25 23595.16 23823.11 
# ABW-1997 ABW-1998 ABW-1999 ABW-2000 ABW-2001 ABW-2002 ABW-2003 ABW-2004 ABW-2005 ABW-2006 ABW-2007 
# 24149.44 24424.69 24727.46 25205.98 25399.16 25603.11 25851.29 26349.64 26665.54 27224.58 27772.82 
# ABW-2008 ABW-2009 ABW-2010 ABW-2011 ABW-2012 ABW-2013 ABW-2014 ABW-2015 
# 27769.52 27002.95 27218.84 27424.18 27471.49 27660.92 27889.34 28107.78

# This results in a shorter panel-vector
length(nofill)
# [1] 9470
length(PCGDP)
# [1] 13176

# The cases that were missing and removed from the output are available as an attribute
head(attr(nofill, "na.rm"), 30)
#  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 59 60 61 62

In the pdata.frame methods there are 3 different choices how to deal with missing values. The default for the plm classes in variable.wise = TRUE, which will essentially sequentially apply fhdbetween.pseries and fhdwithin.pseries (with the default fill = TRUE) to all columns. This is the same behavior as in fbetween / B and fwithin / W, which also consider the column-wise complete obs:

# This column-wise centers the data on countries and years
tail(HDW(pwlddev), 10)
#          HDW.decade HDW.PCGDP HDW.LIFEEX  HDW.GINI   HDW.ODA  HDW.POP
# ZWE-2011          0 -4632.971  -8.080748 -3.663217 118306300 -4547122
# ZWE-2012          0 -4523.505  -6.271385        NA 385526419 -4749368
# ZWE-2013          0 -4710.576  -4.753056        NA 149910333 -4903132
# ZWE-2014          0 -4931.693  -3.568136        NA  93295114 -5059317
# ZWE-2015          0 -5148.895  -2.685053        NA 150833589 -5224484
# ZWE-2016          0 -5433.809  -2.203219        NA -27844184 -5404667
# ZWE-2017          0 -5645.022  -1.920365 -1.964138  10266318 -5591762
# ZWE-2018          0 -5938.794  -1.759333        NA  59646823 -5774326
# ZWE-2019          0 -5710.646  -1.669415  5.627356 223473855 -5946725
# ZWE-2020          0        NA         NA        NA        NA       NA

If variable.wise = FALSE, fhdbetween / HDB and fhdwithin / HDW will only consider the complete cases in the dataset, but still return a dataset of the same dimensions (as long as fill = TRUE), resulting in some rows all-missing:

# This centers the complete cases of the data data on countries and years and keeps missing cases
tail(HDW(pwlddev, variable.wise = FALSE), 10)
#          HDW.decade HDW.PCGDP HDW.LIFEEX  HDW.GINI    HDW.ODA  HDW.POP
# ZWE-2011          0  517.6924  -4.379840 -3.839653 -176176494 -3042247
# ZWE-2012         NA        NA         NA        NA         NA       NA
# ZWE-2013         NA        NA         NA        NA         NA       NA
# ZWE-2014         NA        NA         NA        NA         NA       NA
# ZWE-2015         NA        NA         NA        NA         NA       NA
# ZWE-2016         NA        NA         NA        NA         NA       NA
# ZWE-2017          0 -128.5240   1.971143 -1.314869  -67497466  1936716
# ZWE-2018         NA        NA         NA        NA         NA       NA
# ZWE-2019          0 -389.1684   2.408697  5.154522  243673961  1105530
# ZWE-2020         NA        NA         NA        NA         NA       NA

Finally, if also fill = FALSE, the behavior is the same as in the pseries method: Missing cases are removed from the data:

# This centers the complete cases of the data data on countries and years, and removes missing cases
res <- HDW(pwlddev, fill = FALSE)
tail(res, 10)
#          HDW.decade   HDW.PCGDP HDW.LIFEEX   HDW.GINI    HDW.ODA    HDW.POP
# ZMB-1996          0   534.39373 -3.6445256  -4.744748 -174237036  4911230.7
# ZMB-1998          0   201.58094 -4.1708951  -5.085621 -492258601   644947.7
# ZMB-2002          0   250.78234 -2.9085522 -10.912265   81848768 -1027712.3
# ZMB-2004          0   -72.94954 -1.9629513   1.494340  396830282 -3774596.6
# ZMB-2006          0  -308.55937 -0.4975872   2.407226  485998870 -2255101.6
# ZMB-2010          0  -428.16949  3.9600416   4.497547 -148714637 -4174306.2
# ZMB-2015          0 -1106.52213  8.4099983   7.553052 -335529320 -4962997.8
# ZWE-2011          0   517.69244 -4.3798401  -3.839653 -176176494 -3042246.9
# ZWE-2017          0  -128.52402  1.9711431  -1.314869  -67497466  1936716.5
# ZWE-2019          0  -389.16842  2.4086971   5.154522  243673961  1105530.5

tail(attr(res, "na.rm"))
# [1] 13169 13170 13171 13172 13174 13176

Notes: (1) Because of the different missing case options and associated challenges, panel-identifiers are not preserved in HDB and HDW. (2) The default variable.wise = TRUE and fill = TRUE was only set for the pseries and pdata.frame methods, to harmonize the default implementations with fbetween / B and fwithin / W for these classes. In the standard default, matrix and data.frame methods, the defaults are variable.wise = FALSE and fill = FALSE (i.e. missing cases are removed beforehand), which is generally more efficient.

1.3 Scaling and Centering

Next to the above functions for grouped centering and averaging, the function / operator pair fscale / STD can be used to efficiently standardize (i.e. scale and center) panel data along an arbitrary dimension. The architecture is identical to that of fwithin / W or fbetween / B.

# This standardizes GDP per capita in each country
STD_PCGDP <- STD(PCGDP)

# Checks:
head(fmean(STD_PCGDP, index(STD_PCGDP, 1)))
#           ABW           AFG           AGO           ALB           AND           ARE 
# -1.422473e-15  2.528841e-16 -6.189493e-16 -2.275957e-16 -9.281464e-16 -6.661338e-17
head(fsd(STD_PCGDP, index(STD_PCGDP, 1)))
# ABW AFG AGO ALB AND ARE 
#   1   1   1   1   1   1

# This standardizes GDP per capita in each year
STD_PCGDP_T <- STD(PCGDP, effect = "year")

# Checks:
head(fmean(STD_PCGDP_T, index(STD_PCGDP_T, 2)))
#          1960          1961          1962          1963          1964          1965 
#  9.882205e-17  3.496021e-16  1.889741e-17 -2.185013e-16 -1.724389e-16  2.616954e-16
head(fsd(STD_PCGDP_T, index(STD_PCGDP_T, 2)))
# 1960 1961 1962 1963 1964 1965 
#    1    1    1    1    1    1

And similarly for pdata.frame’s:

head(STD(pwlddev, cols = 9:12))
#          iso3c year STD.PCGDP STD.LIFEEX STD.GINI STD.ODA
# ABW-1960   ABW 1960        NA  -2.372636       NA      NA
# ABW-1961   ABW 1961        NA  -2.227700       NA      NA
# ABW-1962   ABW 1962        NA  -2.097539       NA      NA
# ABW-1963   ABW 1963        NA  -1.976876       NA      NA
# ABW-1964   ABW 1964        NA  -1.862193       NA      NA
# ABW-1965   ABW 1965        NA  -1.748918       NA      NA

head(STD(pwlddev, cols = 9:12, effect = "year"))
#          iso3c year STD.PCGDP STD.LIFEEX STD.GINI STD.ODA
# ABW-1960   ABW 1960        NA  0.9609854       NA      NA
# ABW-1961   ABW 1961        NA  0.9485730       NA      NA
# ABW-1962   ABW 1962        NA  0.9585105       NA      NA
# ABW-1963   ABW 1963        NA  0.9669638       NA      NA
# ABW-1964   ABW 1964        NA  0.9579477       NA      NA
# ABW-1965   ABW 1965        NA  0.9556529       NA      NA

More customized scaling can be done with the help of the mean and sd arguments to fscale / STD. By default mean = 0 and sd = 1, but these could be assigned any numeric values:

# This will scale the data such that mean mean within each country is 5 and the standard deviation is 3
qsu(fscale(pwlddev$PCGDP, mean = 5, sd = 3))
#              N/T  Mean     SD      Min     Max
# Overall     9466     5  2.968  -6.1908  16.257
# Between      202     5      0        5       5
# Within   46.8614     5  2.968  -6.1908  16.257

Even further customization (i.e. setting means and standard deviations for each group and / or each column) can of course be achieved by calling collapse::TRA on the result of fscale to sweep out an appropriate set of means and standard deviations.

Scaling without centering can be done with the option mean = FALSE. This will also preserve the mean of the data overall and within each group:

# Scaling without centering: Mean preserving with fscale / STD
qsu(fscale(pwlddev$PCGDP, mean = FALSE, sd = 3))
#              N/T        Mean          SD         Min         Max
# Overall     9466  12031.4627  17803.3537    247.7598   131349.27
# Between      202  12169.2793  18055.6626    253.1886  131342.669
# Within   46.8614  12031.4627       2.968  12020.2718  12042.7196

# Scaling without centering can also be done using fsd, but this does not preserve the mean
qsu(fsd(pwlddev$PCGDP, index(pwlddev, 1), TRA = "/"))
#              N/T    Mean      SD     Min      Max
# Overall     9466   4.247   3.192  0.0579   26.647
# Between      202  4.6036  3.5846  0.8207  24.8111
# Within   46.8614   4.247  0.9893  0.5167   7.9993

Finally a special kind of data harmonization in the first two moments can be done by setting mean = "overall.mean" and sd = "within.sd" in a grouped scaling task. This will harmonize the data across groups such that the mean of each group is equal to the overall data mean and the standard deviation equal to the within standard deviation (= the standard deviation calculated on the group-centered series):

fmean(pwlddev$PCGDP)  # Overall mean
# [1] 12048.78
fsd(W(pwlddev$PCGDP)) # Within sd
# [1] 6723.681

# Scaling and centerin such that the mean of each country is the overall mean, and the sd of each country is the within sd
qsu(fscale(pwlddev$PCGDP, mean = "overall.mean", sd = "within.sd"))
#              N/T       Mean         SD          Min         Max
# Overall     9466  12048.778  6651.9052  -13032.4333  37278.2175
# Between      202  12048.778          0    12048.778   12048.778
# Within   46.8614  12048.778  6651.9052  -13032.4333  37278.2175

All of this seamlessly generalizes to weighted scaling an centering, using the w argument to add a weight vector.

1.4 Panel Lags / Leads, Differences and Growth Rates

With flag / L / F, fdiff / D and fgrowth / G, collapse provides a fast and comprehensive C++ based solution to the computation of (sequences of) lags / leads and (sequences of) lagged / leaded and suitably iterated (quasi-, log-) differences and growth rates on panel data. The pseries and pdata.frame methods to these functions and associated transformation operators use the panel-identifiers in the ‘index’ attached to these objects (where the last variable in the ‘index’ is taken as the time-variable and the variables before that are taken as individual identifiers) to perform fast fully-identified time-dependent operations on panel data, without the need of sorting the data.

With flag / L / F, it is easy to lag or lead pseries:

# A panel-lag
head(flag(LIFEEX))
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
#       NA   65.662   66.074   66.444   66.787   67.113

# A panel-lead
head(flag(LIFEEX, -1))
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
#   66.074   66.444   66.787   67.113   67.435   67.762

# The lag and lead operators are even more parsimonious to employ:
all_identical(L(LIFEEX), flag(LIFEEX), lag(LIFEEX))
# [1] TRUE
all_identical(F(LIFEEX), flag(LIFEEX, -1), lead(LIFEEX))
# [1] TRUE

It is also possible to compute a sequence of lags / leads using flag or one of the operators:

# sequence of panel- lags and leads
head(flag(LIFEEX, -1:3))
#              F1     --     L1     L2     L3
# ABW-1960 66.074 65.662     NA     NA     NA
# ABW-1961 66.444 66.074 65.662     NA     NA
# ABW-1962 66.787 66.444 66.074 65.662     NA
# ABW-1963 67.113 66.787 66.444 66.074 65.662
# ABW-1964 67.435 67.113 66.787 66.444 66.074
# ABW-1965 67.762 67.435 67.113 66.787 66.444

all_identical(L(LIFEEX, -1:3), F(LIFEEX, 1:-3), flag(LIFEEX, -1:3))
# [1] TRUE

# The native plm implementation also returns a matrix of lags but with different column names
head(lag(LIFEEX, -1:3), 4)
#              -1      0      1      2      3
# ABW-1960 66.074 65.662     NA     NA     NA
# ABW-1961 66.444 66.074 65.662     NA     NA
# ABW-1962 66.787 66.444 66.074 65.662     NA
# ABW-1963 67.113 66.787 66.444 66.074 65.662

Of course the lag orders may be unevenly spaced, i.e. L(x, -1:3*12) would compute seasonal lags on monthly data. On pdata.frame’s, the effects of flag and L / F differ insofar that flag will just lag the entire dataset without preserving identifiers (although the index attribute is always preserved), whereas L / F by default (cols = is.numeric) select the numeric variables and add the panel-id’s on the left (default keep.ids = TRUE):

# This lags the entire data
head(flag(pwlddev))
#          country iso3c       date year decade                    region      income  OECD PCGDP
# ABW-1960    <NA>  <NA>       <NA> <NA>     NA                      <NA>        <NA>    NA    NA
# ABW-1961   Aruba   ABW 1961-01-01 1960   1960 Latin America & Caribbean High income FALSE    NA
# ABW-1962   Aruba   ABW 1962-01-01 1961   1960 Latin America & Caribbean High income FALSE    NA
# ABW-1963   Aruba   ABW 1963-01-01 1962   1960 Latin America & Caribbean High income FALSE    NA
# ABW-1964   Aruba   ABW 1964-01-01 1963   1960 Latin America & Caribbean High income FALSE    NA
# ABW-1965   Aruba   ABW 1965-01-01 1964   1960 Latin America & Caribbean High income FALSE    NA
#          LIFEEX GINI ODA   POP
# ABW-1960     NA   NA  NA    NA
# ABW-1961 65.662   NA  NA 54211
# ABW-1962 66.074   NA  NA 55438
# ABW-1963 66.444   NA  NA 56225
# ABW-1964 66.787   NA  NA 56695
# ABW-1965 67.113   NA  NA 57032

# This lags only numeric columns and preserves panel-id's
head(L(pwlddev))
#          iso3c year L1.decade L1.PCGDP L1.LIFEEX L1.GINI L1.ODA L1.POP
# ABW-1960   ABW 1960        NA       NA        NA      NA     NA     NA
# ABW-1961   ABW 1961      1960       NA    65.662      NA     NA  54211
# ABW-1962   ABW 1962      1960       NA    66.074      NA     NA  55438
# ABW-1963   ABW 1963      1960       NA    66.444      NA     NA  56225
# ABW-1964   ABW 1964      1960       NA    66.787      NA     NA  56695
# ABW-1965   ABW 1965      1960       NA    67.113      NA     NA  57032

# This lags only columns 9 through 12 and preserves panel-id's
head(L(pwlddev, cols = 9:12))
#          iso3c year L1.PCGDP L1.LIFEEX L1.GINI L1.ODA
# ABW-1960   ABW 1960       NA        NA      NA     NA
# ABW-1961   ABW 1961       NA    65.662      NA     NA
# ABW-1962   ABW 1962       NA    66.074      NA     NA
# ABW-1963   ABW 1963       NA    66.444      NA     NA
# ABW-1964   ABW 1964       NA    66.787      NA     NA
# ABW-1965   ABW 1965       NA    67.113      NA     NA

We can also easily compute a sequence of lags / leads on a panel data.frame:

# This lags only columns 9 through 12 and preserves panel-id's
head(L(pwlddev, -1:3, cols = 9:12))
#          iso3c year F1.PCGDP PCGDP L1.PCGDP L2.PCGDP L3.PCGDP F1.LIFEEX LIFEEX L1.LIFEEX L2.LIFEEX
# ABW-1960   ABW 1960       NA    NA       NA       NA       NA    66.074 65.662        NA        NA
# ABW-1961   ABW 1961       NA    NA       NA       NA       NA    66.444 66.074    65.662        NA
# ABW-1962   ABW 1962       NA    NA       NA       NA       NA    66.787 66.444    66.074    65.662
# ABW-1963   ABW 1963       NA    NA       NA       NA       NA    67.113 66.787    66.444    66.074
# ABW-1964   ABW 1964       NA    NA       NA       NA       NA    67.435 67.113    66.787    66.444
# ABW-1965   ABW 1965       NA    NA       NA       NA       NA    67.762 67.435    67.113    66.787
#          L3.LIFEEX F1.GINI GINI L1.GINI L2.GINI L3.GINI F1.ODA ODA L1.ODA L2.ODA L3.ODA
# ABW-1960        NA      NA   NA      NA      NA      NA     NA  NA     NA     NA     NA
# ABW-1961        NA      NA   NA      NA      NA      NA     NA  NA     NA     NA     NA
# ABW-1962        NA      NA   NA      NA      NA      NA     NA  NA     NA     NA     NA
# ABW-1963    65.662      NA   NA      NA      NA      NA     NA  NA     NA     NA     NA
# ABW-1964    66.074      NA   NA      NA      NA      NA     NA  NA     NA     NA     NA
# ABW-1965    66.444      NA   NA      NA      NA      NA     NA  NA     NA     NA     NA

Essentially the same functionality applies to fdiff / D and fgrowth / G, with the main differences that these functions also have a diff argument to determine the number of iterations:

# Panel-difference of Life Expectancy
head(fdiff(LIFEEX))
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
#       NA    0.412    0.370    0.343    0.326    0.322

# Second panel-difference
head(fdiff(LIFEEX, diff = 2))
# ABW-1960 ABW-1961 ABW-1962 ABW-1963 ABW-1964 ABW-1965 
#       NA       NA   -0.042   -0.027   -0.017   -0.004

# Panel-growth rate of Life Expectancy
head(fgrowth(LIFEEX))
#  ABW-1960  ABW-1961  ABW-1962  ABW-1963  ABW-1964  ABW-1965 
#        NA 0.6274558 0.5599782 0.5162242 0.4881189 0.4797878

# Growth rate of growth rate of Life Expectancy
head(fgrowth(LIFEEX, diff = 2))
#   ABW-1960   ABW-1961   ABW-1962   ABW-1963   ABW-1964   ABW-1965 
#         NA         NA -10.754153  -7.813521  -5.444387  -1.706782

identical(D(LIFEEX), fdiff(LIFEEX))
# [1] TRUE
identical(G(LIFEEX), fgrowth(LIFEEX))
# [1] TRUE
identical(fdiff(LIFEEX), diff(LIFEEX)) # Same as plm::diff.pseries (which does not compute iterated panel-differences)
# [1] TRUE

By default, growth rates are calculated in percentage terms which is set by the default argument scale = 100. It is also possible to compute log-differences with fdiff(.., log = TRUE) or the Dlog operator, and growth rates in percentage terms based on log-differences using fgrowth(.., logdiff = TRUE).

# Panel log-difference of Life Expectancy
head(Dlog(LIFEEX))
#    ABW-1960    ABW-1961    ABW-1962    ABW-1963    ABW-1964    ABW-1965 
#          NA 0.006254955 0.005584162 0.005148963 0.004869315 0.004786405

# Panel log-difference growth rate (in percentage terms) of Life Expectancy
head(G(LIFEEX, logdiff = TRUE))
#  ABW-1960  ABW-1961  ABW-1962  ABW-1963  ABW-1964  ABW-1965 
#        NA 0.6254955 0.5584162 0.5148963 0.4869315 0.4786405

It is also possible to compute sequences of lagged / leaded and iterated differences, log-differences and growth rates:

# first and second forward-difference and first and second difference of lags 1-3 of Life-Expectancy
head(D(LIFEEX, -1:3, 1:2))
#             FD1    FD2     --    D1     D2  L2D1   L2D2  L3D1 L3D2
# ABW-1960 -0.412 -0.042 65.662    NA     NA    NA     NA    NA   NA
# ABW-1961 -0.370 -0.027 66.074 0.412     NA    NA     NA    NA   NA
# ABW-1962 -0.343 -0.017 66.444 0.370 -0.042 0.782     NA    NA   NA
# ABW-1963 -0.326 -0.004 66.787 0.343 -0.027 0.713     NA 1.125   NA
# ABW-1964 -0.322  0.005 67.113 0.326 -0.017 0.669 -0.113 1.039   NA
# ABW-1965 -0.327  0.006 67.435 0.322 -0.004 0.648 -0.065 0.991   NA

# Same with Log-differences
head(Dlog(LIFEEX, -1:3, 1:2))
#                FDlog1        FDlog2       --       Dlog1         Dlog2    L2Dlog1      L2Dlog2
# ABW-1960 -0.006254955 -6.707929e-04 4.184520          NA            NA         NA           NA
# ABW-1961 -0.005584162 -4.351984e-04 4.190775 0.006254955            NA         NA           NA
# ABW-1962 -0.005148963 -2.796481e-04 4.196359 0.005584162 -0.0006707929 0.01183912           NA
# ABW-1963 -0.004869315 -8.291000e-05 4.201508 0.005148963 -0.0004351984 0.01073312           NA
# ABW-1964 -0.004786405  5.098981e-05 4.206378 0.004869315 -0.0002796481 0.01001828 -0.001820838
# ABW-1965 -0.004837395  6.482830e-05 4.211164 0.004786405 -0.0000829100 0.00965572 -0.001077405
#             L3Dlog1 L3Dlog2
# ABW-1960         NA      NA
# ABW-1961         NA      NA
# ABW-1962         NA      NA
# ABW-1963 0.01698808      NA
# ABW-1964 0.01560244      NA
# ABW-1965 0.01480468      NA

# Same with (exact) growth rates
head(G(LIFEEX, -1:3, 1:2))
#                 FG1       FG2     --        G1         G2      L2G1      L2G2     L3G1 L3G2
# ABW-1960 -0.6235433 11.974895 65.662        NA         NA        NA        NA       NA   NA
# ABW-1961 -0.5568599  8.428580 66.074 0.6274558         NA        NA        NA       NA   NA
# ABW-1962 -0.5135730  5.728297 66.444 0.5599782 -10.754153 1.1909476        NA       NA   NA
# ABW-1963 -0.4857479  1.727984 66.787 0.5162242  -7.813521 1.0790931        NA 1.713320   NA
# ABW-1964 -0.4774968 -1.051555 67.113 0.4881189  -5.444387 1.0068629 -15.45699 1.572479   NA
# ABW-1965 -0.4825714 -1.319230 67.435 0.4797878  -1.706782 0.9702487 -10.08666 1.491482   NA

A further possibility is to compute quasi-differences and quasi-log-differences of the form \(x_t - \rho x_{t-s}\) or \(log(x_t) - \rho log(x_{t-s})\). These are useful for panel-regressions suffering from serial-correlation, following Cochrane & Orcutt (1949), and can be specified with the rho argument to fdiff, D and Dlog.

# Regression of GDP on Life Expectance with country and time FE
mod <- lm(PCGDP ~ LIFEEX, data = fhdwithin(fselect(pwlddev, PCGDP, LIFEEX), fill = FALSE))
mod
# 
# Call:
# lm(formula = PCGDP ~ LIFEEX, data = fhdwithin(fselect(pwlddev, 
#     PCGDP, LIFEEX), fill = FALSE))
# 
# Coefficients:
# (Intercept)       LIFEEX  
#  -2.442e-12   -3.330e+02

# Computing autocorrelation of residuals
r <- residuals(mod)
r <- pwcor(r, L(r, 1, substr(names(r), 1, 3)))  # Need this to compute a panel-lag
r
# [1] .98

# Running the regression again quasi-differencing the transformed data
modCO <- lm(PCGDP ~ LIFEEX, data = fdiff(fhdwithin(fselect(pwlddev, PCGDP, LIFEEX), variable.wise = FALSE), rho = r, stubs = FALSE))
modCO
# 
# Call:
# lm(formula = PCGDP ~ LIFEEX, data = fdiff(fhdwithin(fselect(pwlddev, 
#     PCGDP, LIFEEX), variable.wise = FALSE), rho = r, stubs = FALSE))
# 
# Coefficients:
# (Intercept)       LIFEEX  
#      -12.93       -91.97

# In this case rho is almost 1, so we might as well just difference the untransformed data and go with that
# We also need to bootstrap this for proper standard errors.

A final important advantage of the collapse functions is that the panel-identifiers are preserved, even if a matrix of lags / leads / differences or growth rates is returned. This allows for nested panel-computations, for example we can compute shifted sequences of lagged / leaded and iterated panel differences:

# Sequence of differneces (same as above), adding one extra lag of the whole sequence
head(L(D(LIFEEX, -1:3, 1:2), 0:1))
#             FD1 L1.FD1    FD2 L1.FD2     --  L1.--    D1 L1.D1     D2  L1.D2  L2D1 L1.L2D1   L2D2
# ABW-1960 -0.412     NA -0.042     NA 65.662     NA    NA    NA     NA     NA    NA      NA     NA
# ABW-1961 -0.370 -0.412 -0.027 -0.042 66.074 65.662 0.412    NA     NA     NA    NA      NA     NA
# ABW-1962 -0.343 -0.370 -0.017 -0.027 66.444 66.074 0.370 0.412 -0.042     NA 0.782      NA     NA
# ABW-1963 -0.326 -0.343 -0.004 -0.017 66.787 66.444 0.343 0.370 -0.027 -0.042 0.713   0.782     NA
# ABW-1964 -0.322 -0.326  0.005 -0.004 67.113 66.787 0.326 0.343 -0.017 -0.027 0.669   0.713 -0.113
# ABW-1965 -0.327 -0.322  0.006  0.005 67.435 67.113 0.322 0.326 -0.004 -0.017 0.648   0.669 -0.065
#          L1.L2D2  L3D1 L1.L3D1 L3D2 L1.L3D2
# ABW-1960      NA    NA      NA   NA      NA
# ABW-1961      NA    NA      NA   NA      NA
# ABW-1962      NA    NA      NA   NA      NA
# ABW-1963      NA 1.125      NA   NA      NA
# ABW-1964      NA 1.039   1.125   NA      NA
# ABW-1965  -0.113 0.991   1.039   NA      NA

All of this naturally generalized to computations on pdata.frames:

head(D(pwlddev, -1:3, 1:2, cols = 9:10), 3)
#          iso3c year FD1.PCGDP FD2.PCGDP PCGDP D1.PCGDP D2.PCGDP L2D1.PCGDP L2D2.PCGDP L3D1.PCGDP
# ABW-1960   ABW 1960        NA        NA    NA       NA       NA         NA         NA         NA
# ABW-1961   ABW 1961        NA        NA    NA       NA       NA         NA         NA         NA
# ABW-1962   ABW 1962        NA        NA    NA       NA       NA         NA         NA         NA
#          L3D2.PCGDP FD1.LIFEEX FD2.LIFEEX LIFEEX D1.LIFEEX D2.LIFEEX L2D1.LIFEEX L2D2.LIFEEX
# ABW-1960         NA     -0.412     -0.042 65.662        NA        NA          NA          NA
# ABW-1961         NA     -0.370     -0.027 66.074     0.412        NA          NA          NA
# ABW-1962         NA     -0.343     -0.017 66.444     0.370    -0.042       0.782          NA
#          L3D1.LIFEEX L3D2.LIFEEX
# ABW-1960          NA          NA
# ABW-1961          NA          NA
# ABW-1962          NA          NA

head(L(D(pwlddev, -1:3, 1:2, cols = 9:10), 0:1), 3)
#          iso3c year FD1.PCGDP L1.FD1.PCGDP FD2.PCGDP L1.FD2.PCGDP PCGDP L1.PCGDP D1.PCGDP
# ABW-1960   ABW 1960        NA           NA        NA           NA    NA       NA       NA
# ABW-1961   ABW 1961        NA           NA        NA           NA    NA       NA       NA
# ABW-1962   ABW 1962        NA           NA        NA           NA    NA       NA       NA
#          L1.D1.PCGDP D2.PCGDP L1.D2.PCGDP L2D1.PCGDP L1.L2D1.PCGDP L2D2.PCGDP L1.L2D2.PCGDP
# ABW-1960          NA       NA          NA         NA            NA         NA            NA
# ABW-1961          NA       NA          NA         NA            NA         NA            NA
# ABW-1962          NA       NA          NA         NA            NA         NA            NA
#          L3D1.PCGDP L1.L3D1.PCGDP L3D2.PCGDP L1.L3D2.PCGDP FD1.LIFEEX L1.FD1.LIFEEX FD2.LIFEEX
# ABW-1960         NA            NA         NA            NA     -0.412            NA     -0.042
# ABW-1961         NA            NA         NA            NA     -0.370        -0.412     -0.027
# ABW-1962         NA            NA         NA            NA     -0.343        -0.370     -0.017
#          L1.FD2.LIFEEX LIFEEX L1.LIFEEX D1.LIFEEX L1.D1.LIFEEX D2.LIFEEX L1.D2.LIFEEX L2D1.LIFEEX
# ABW-1960            NA 65.662        NA        NA           NA        NA           NA          NA
# ABW-1961        -0.042 66.074    65.662     0.412           NA        NA           NA          NA
# ABW-1962        -0.027 66.444    66.074     0.370        0.412    -0.042           NA       0.782
#          L1.L2D1.LIFEEX L2D2.LIFEEX L1.L2D2.LIFEEX L3D1.LIFEEX L1.L3D1.LIFEEX L3D2.LIFEEX
# ABW-1960             NA          NA             NA          NA             NA          NA
# ABW-1961             NA          NA             NA          NA             NA          NA
# ABW-1962             NA          NA             NA          NA             NA          NA
#          L1.L3D2.LIFEEX
# ABW-1960             NA
# ABW-1961             NA
# ABW-1962             NA

1.5 Panel Data to Array Conversions

Viewing and transforming panel data stored in an array can be a powerful strategy, especially as it provides much more direct access to the different dimensions of the data. The function psmat can be used to efficiently transform pseries to a 2D matrix, and pdata.frame’s to a 3D array:

# Converting the panel series to array, individual rows (default)
str(psmat(LIFEEX))
#  'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#  - attr(*, "transpose")= logi FALSE

# Converting the panel series to array, individual columns
str(psmat(LIFEEX, transpose = TRUE))
#  'psmat' num [1:61, 1:216] 65.7 66.1 66.4 66.8 67.1 ...
#  - attr(*, "dimnames")=List of 2
#   ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#  - attr(*, "transpose")= logi TRUE

# Same as plm::as.matrix.pseries, apart from attributes
identical(unattrib(psmat(LIFEEX)),
          unattrib(as.matrix(LIFEEX)))
# [1] TRUE
identical(unattrib(psmat(LIFEEX, transpose = TRUE)),
          unattrib(as.matrix(LIFEEX, idbyrow = FALSE)))
# [1] TRUE

Applying psmat to a pdata.frame yields a 3D array:

psar <- psmat(pwlddev, cols = 9:12)
str(psar)
#  'psmat' num [1:216, 1:61, 1:4] NA NA NA NA NA ...
#  - attr(*, "dimnames")=List of 3
#   ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA"
#  - attr(*, "transpose")= logi FALSE

str(psmat(pwlddev, cols = 9:12, transpose = TRUE))
#  'psmat' num [1:61, 1:216, 1:4] NA NA NA NA NA NA NA NA NA NA ...
#  - attr(*, "dimnames")=List of 3
#   ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   ..$ : chr [1:4] "PCGDP" "LIFEEX" "GINI" "ODA"
#  - attr(*, "transpose")= logi TRUE

This format can be very convenient to quickly and freely access data for different countries, variables and time-periods:

# Looking at wealth, health and inequality in Brazil and Argentinia, 1990-1999
aperm(psar[c("BRA","ARG"), as.character(1990:1999), c("PCGDP", "LIFEEX", "GINI")])
# , , BRA
# 
#          1990   1991   1992   1993   1994   1995   1996   1997   1998   1999
# PCGDP  7983.7 7963.1 7791.8 8020.6 8311.6 8540.1 8591.0 8744.8 8641.3 8554.1
# LIFEEX   66.3   66.7   67.1   67.5   67.9   68.3   68.7   69.1   69.4   69.8
# GINI     60.5     NA   53.2   60.1     NA   59.6   59.9   59.8   59.6   59.0
# 
# , , ARG
# 
#          1990   1991   1992   1993   1994   1995   1996   1997   1998   1999
# PCGDP  6245.7 6721.3 7157.3 7644.2 7988.6 7666.5 7994.2 8543.0 8772.1 8381.3
# LIFEEX   71.6   71.8   72.0   72.2   72.5   72.7   72.8   73.0   73.2   73.4
# GINI       NA   46.8   45.5   44.9   45.9   48.9   49.5   49.1   50.7   49.8

psmat can also return the output as a list of panel series matrices:

pslist <- psmat(pwlddev, cols = 9:12, array = FALSE)
str(pslist)
# List of 4
#  $ PCGDP : 'psmat' num [1:216, 1:61] NA NA NA NA NA ...
#   ..- attr(*, "dimnames")=List of 2
#   .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..- attr(*, "transpose")= logi FALSE
#  $ LIFEEX: 'psmat' num [1:216, 1:61] 65.7 32.4 37.5 62.3 NA ...
#   ..- attr(*, "dimnames")=List of 2
#   .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..- attr(*, "transpose")= logi FALSE
#  $ GINI  : 'psmat' num [1:216, 1:61] NA NA NA NA NA NA NA NA NA NA ...
#   ..- attr(*, "dimnames")=List of 2
#   .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..- attr(*, "transpose")= logi FALSE
#  $ ODA   : 'psmat' num [1:216, 1:61] NA 116769997 -390000 NA NA ...
#   ..- attr(*, "dimnames")=List of 2
#   .. ..$ : chr [1:216] "ABW" "AFG" "AGO" "ALB" ...
#   .. ..$ : chr [1:61] "1960" "1961" "1962" "1963" ...
#   ..- attr(*, "transpose")= logi FALSE

This list can then be unlisted using the function unlist2d (for unlisting in 2-dimensions), to yield a reshaped data.frame:

head(unlist2d(pslist, idcols = "Variable", row.names = "Country Code"), 3)
#   Variable Country Code 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974
# 1    PCGDP          ABW   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
# 2    PCGDP          AFG   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
# 3    PCGDP          AGO   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
#   1975 1976 1977 1978 1979     1980     1981     1982     1983     1984     1985      1986
# 1   NA   NA   NA   NA   NA       NA       NA       NA       NA       NA       NA 15669.616
# 2   NA   NA   NA   NA   NA       NA       NA       NA       NA       NA       NA        NA
# 3   NA   NA   NA   NA   NA 3193.404 2947.194 2844.322 2859.919 2925.367 2922.217  2902.618
#        1987      1988      1989      1990      1991      1992      1993      1994     1995
# 1 18427.612 22134.017 24837.951 25357.787 26329.313 26401.969 26663.208 27272.310 26705.18
# 2        NA        NA        NA        NA        NA        NA        NA        NA       NA
# 3  2916.794  2989.617  2889.886  2697.491  2635.156  2401.234  1767.025  1733.844  1930.80
#        1996      1997     1998     1999      2000      2001       2002       2003       2004
# 1 26087.776 27190.501 27151.92 26954.40 28417.384 26966.055 25508.3027 25469.2876 27005.5294
# 2        NA        NA       NA       NA        NA        NA   330.3036   343.0809   333.2167
# 3  2122.968  2205.294  2235.39  2211.13  2205.205  2223.335  2444.4178  2433.8616  2608.7840
#         2005       2006      2007       2008       2009      2010       2011       2012       2013
# 1 26979.8854 27046.2242 27427.579 27365.9312 24463.6922 23512.603 24233.0011 23781.2573 24635.7649
# 2   357.2347   365.2845   405.549   412.0143   488.3003   543.303   528.7366   576.1901   587.5651
# 3  2896.5547  3116.1810  3424.372  3668.0799  3565.0569  3587.884  3579.9599  3748.4507  3796.8822
#         2014       2015       2016       2017     2018      2019 2020
# 1 24563.2343 25822.2514 26231.0267 26630.2053       NA        NA   NA
# 2   583.6562   574.1841   571.0738   571.4407  564.610  573.2876   NA
# 3  3843.1979  3748.3201  3530.3107  3409.9303 3233.906 3111.1577   NA

Of course we could also have applied some transformation (like computing pairwise correlations) to each matrix before unlisting. In any case this kind of programming provides lots of possibilities to explore and manipulate panel data (as we will see in Part 2).

Benchmarks

Below benchmarks are provided of the collapse implementation against native plm. To do this the dataset used so far is extended to have approx 1 million observations:

wlddevsmall <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA"))
wlddevsmall$iso3c <- as.character(wlddevsmall$iso3c)
data <- replicate(100, wlddevsmall, simplify = FALSE)
rm(wlddevsmall)
uniquify <- function(x, i) {
  x$iso3c <- paste0(x$iso3c, i)
  x
}
data <- unlist2d(Map(uniquify, data, as.list(1:100)), idcols = FALSE)
data <- pdata.frame(data, index = c("iso3c", "year"))
pdim(data)
# Balanced Panel: n = 21600, T = 61, N = 1317600

The data has 21600 individuals (countries) observed for up to 61 years (1960-2020), the total number of rows is 1317600. We can pull out a series of life expectancy and run some benchmarks. The Windows laptop on which these benchmarks were run has a 2x 2.2 GHZ Intel i5 processor, 8GB DDR3 RAM and a Samsung SSD hard drive.

library(microbenchmark)
# Creating the extended panel series for Life Expectancy (l for large)
LIFEEX_l <- data$LIFEEX
str(LIFEEX_l)
#  'pseries' Named num [1:1317600] 65.7 66.1 66.4 66.8 67.1 ...
#  - attr(*, "names")= chr [1:1317600] "ABW1-1960" "ABW1-1961" "ABW1-1962" "ABW1-1963" ...
#  - attr(*, "index")=Classes 'pindex' and 'data.frame':    1317600 obs. of  2 variables:
#   ..$ iso3c: Factor w/ 21600 levels "ABW1","ABW10",..: 1 1 1 1 1 1 1 1 1 1 ...
#   ..$ year : Factor w/ 61 levels "1960","1961",..: 1 2 3 4 5 6 7 8 9 10 ...

# Between Transformations
microbenchmark(Between(LIFEEX_l, na.rm = TRUE), times = 10)
# Unit: milliseconds
#                             expr      min       lq     mean   median       uq      max neval
#  Between(LIFEEX_l, na.rm = TRUE) 17.73594 18.71248 21.84342 20.13574 22.35853 37.94689    10
microbenchmark(fbetween(LIFEEX_l), times = 10)
# Unit: milliseconds
#                expr      min       lq     mean   median       uq      max neval
#  fbetween(LIFEEX_l) 4.408771 4.639519 4.705529 4.718424 4.771498 4.908684    10

# Within Transformations
microbenchmark(Within(LIFEEX_l, na.rm = TRUE), times = 10)
# Unit: milliseconds
#                            expr      min       lq     mean  median       uq      max neval
#  Within(LIFEEX_l, na.rm = TRUE) 10.17887 10.74663 10.91092 10.8766 11.24224 11.37664    10
microbenchmark(fwithin(LIFEEX_l), times = 10)
# Unit: milliseconds
#               expr      min       lq     mean   median       uq      max neval
#  fwithin(LIFEEX_l) 4.522218 4.550303 4.735344 4.644296 4.696017 5.297036    10

# Higher-Dimenional Between and Within Transformations
microbenchmark(fhdbetween(LIFEEX_l), times = 10)
# Unit: milliseconds
#                  expr    min       lq    mean   median       uq      max neval
#  fhdbetween(LIFEEX_l) 56.916 57.29971 66.0179 58.13864 76.50108 84.10625    10
microbenchmark(fhdwithin(LIFEEX_l), times = 10)
# Unit: milliseconds
#                 expr      min      lq     mean   median       uq      max neval
#  fhdwithin(LIFEEX_l) 55.55906 56.2372 62.31852 56.56555 75.78784 77.20657    10

# Single Lag
microbenchmark(lag(LIFEEX_l), times = 10)
# Unit: milliseconds
#           expr      min       lq     mean   median       uq      max neval
#  lag(LIFEEX_l) 7.967776 8.144896 8.542879 8.632468 8.840092 8.949357    10
microbenchmark(flag(LIFEEX_l), times = 10)
# Unit: milliseconds
#            expr      min       lq     mean   median       uq     max neval
#  flag(LIFEEX_l) 7.994057 8.038747 8.337862 8.180484 8.603481 9.12086    10

# Sequence of Lags / Leads
microbenchmark(lag(LIFEEX_l, -1:3), times = 10)
# Unit: milliseconds
#                 expr     min       lq     mean   median       uq     max neval
#  lag(LIFEEX_l, -1:3) 18.7525 19.29476 28.61876 27.95813 38.11081 39.5329    10
microbenchmark(flag(LIFEEX_l, -1:3), times = 10)
# Unit: milliseconds
#                  expr     min       lq     mean   median       uq      max neval
#  flag(LIFEEX_l, -1:3) 15.5415 15.64335 21.10042 15.83998 33.37699 34.10265    10

# Single difference
microbenchmark(diff(LIFEEX_l), times = 10)
# Unit: milliseconds
#            expr     min      lq     mean   median       uq      max neval
#  diff(LIFEEX_l) 8.00525 8.16884 8.370421 8.368776 8.554404 8.733697    10
microbenchmark(fdiff(LIFEEX_l), times = 10)
# Unit: milliseconds
#             expr      min       lq   mean median       uq     max neval
#  fdiff(LIFEEX_l) 7.937805 8.020502 8.3458 8.2451 8.426238 9.34923    10

# Iterated Difference
microbenchmark(fdiff(LIFEEX_l, diff = 2), times = 10)
# Unit: milliseconds
#                       expr      min       lq     mean   median       uq      max neval
#  fdiff(LIFEEX_l, diff = 2) 10.20129 10.62786 10.72184 10.77488 10.82326 11.21805    10

# Sequence of Lagged / Leaded and iterated differences
microbenchmark(fdiff(LIFEEX_l, -1:3, 1:2), times = 10)
# Unit: milliseconds
#                        expr      min       lq     mean   median       uq      max neval
#  fdiff(LIFEEX_l, -1:3, 1:2) 45.90159 52.22494 66.83236 53.21347 57.53222 187.8582    10

# Single Growth Rate
microbenchmark(fgrowth(LIFEEX_l), times = 10)
# Unit: milliseconds
#               expr      min       lq    mean   median       uq      max neval
#  fgrowth(LIFEEX_l) 8.222304 8.357153 8.69059 8.727158 8.884167 9.436683    10

# Single Log-Difference
microbenchmark(fdiff(LIFEEX_l, log = TRUE), times = 10)
# Unit: milliseconds
#                         expr      min      lq     mean   median       uq      max neval
#  fdiff(LIFEEX_l, log = TRUE) 12.41394 12.8583 15.06961 13.17156 13.61659 32.51989    10

# Panel Series to Matrix Conversion
# system.time(as.matrix(LIFEEX_l))  This takes about 3 minutes to compute
microbenchmark(psmat(LIFEEX_l), times = 10)
# Unit: milliseconds
#             expr      min       lq     mean   median       uq      max neval
#  psmat(LIFEEX_l) 1.482478 1.500149 1.628028 1.520813 1.553941 2.438639    10

This shows a comparison between flag and data.table’s shift:

microbenchmark(L(data, cols = 3:6), times = 10)
# Unit: milliseconds
#                 expr      min       lq     mean median       uq      max neval
#  L(data, cols = 3:6) 14.13692 14.43877 20.88276 18.865 19.73141 37.06244    10
library(data.table)
setDT(data)
# 'Improper' panel-lag
microbenchmark(data[, shift(.SD), by = iso3c, .SDcols = 3:6], times = 10)
# Unit: milliseconds
#                                           expr      min       lq     mean   median      uq      max
#  data[, shift(.SD), by = iso3c, .SDcols = 3:6] 176.5308 199.9415 215.6897 204.0719 230.089 268.9992
#  neval
#     10

# This does what L is actually doing (without sorting the data)
microbenchmark(data[order(year), shift(.SD), by = iso3c, .SDcols = 3:6], times = 10)
# Unit: milliseconds
#                                                      expr      min       lq     mean   median
#  data[order(year), shift(.SD), by = iso3c, .SDcols = 3:6] 193.9684 210.7025 213.7664 213.0727
#        uq      max neval
#  221.9783 226.3685    10

The above dataset has 1 million obs in 20 thousand groups, but what about 10 million obs and 1 million groups? Do collapse functions scale efficiently as data and the number of groups grows large? Here is a simple benchmark:

x <- rnorm(1e7)                                     # 10 million obs
g <- qF(rep(1:1e6, each = 10), na.exclude = FALSE)  # 1 million individuals
t <- qF(rep(1:10, 1e6), na.exclude = FALSE)         # 10 time-periods per individual

microbenchmark(fbetween(x, g), times = 10)
# Unit: milliseconds
#            expr      min       lq     mean   median       uq      max neval
#  fbetween(x, g) 51.66189 53.60693 91.00168 62.54655 73.87835 233.3696    10
microbenchmark(fwithin(x, g), times = 10)
# Unit: milliseconds
#           expr      min       lq    mean   median       uq      max neval
#  fwithin(x, g) 43.46291 44.03954 77.0216 45.33919 58.65132 196.7248    10
microbenchmark(flag(x, 1, g, t), times = 10)
# Unit: milliseconds
#              expr      min       lq     mean   median       uq      max neval
#  flag(x, 1, g, t) 42.65382 55.05332 87.72527 59.55935 80.86143 210.8074    10
microbenchmark(flag(x, -1:1, g, t), times = 10)
# Unit: milliseconds
#                 expr      min      lq     mean  median       uq      max neval
#  flag(x, -1:1, g, t) 92.19842 92.5559 162.8994 166.736 228.6354 239.6953    10
microbenchmark(fdiff(x, 1, 1, g, t), times = 10)
# Unit: milliseconds
#                  expr      min       lq     mean   median       uq      max neval
#  fdiff(x, 1, 1, g, t) 42.51778 46.29306 82.27838 53.85735 67.54295 205.0114    10
microbenchmark(fdiff(x, 1, 2, g, t), times = 10)
# Unit: milliseconds
#                  expr     min       lq     mean   median       uq      max neval
#  fdiff(x, 1, 2, g, t) 59.9363 62.11689 84.42818 69.85072 75.38506 217.1431    10
microbenchmark(fdiff(x, -1:1, 1:2, g, t), times = 10)
# Unit: milliseconds
#                       expr      min       lq     mean  median       uq      max neval
#  fdiff(x, -1:1, 1:2, g, t) 163.5046 182.9127 246.2855 250.664 301.4046 339.1415    10

The results show that collapse functions perform very well even as the number of groups grows large.

The conclusion of this benchmark analysis is that collapse’s fast functions, with or without the help of plm classes, allow for very fast transformations of panel data, and should enable R programmers and econometricians to implement high-performance panel data estimators without having to dive into C/C++ themselves or resorting to data.table metaprogramming.

Part 2: Fast Exploration of Panel Data

collapse also provides some essential functions to summarize and explore panel data, such as a fast check of variation over different dimensions, fast summary-statistics for panel data, panel-auto, partial-auto and cross-correlation functions, and a fast F-test to test fixed effects and other exclusion restrictions on (large) panel data models. Panel data to matrix conversion further allows the application of some correlational and unsupervised learning tools such as PCA, clustering or dynamic factor analysis.

2.1 Variation Check for Panel Data

The function varying can be used to check over which panel-dimensions different variable have variation. When passed a pdata.frame, varying by default takes the first identifier and checks for variation within that dimension.

# This checks for any variation within "iso3c", the first index variable: TRUE means data vary within country i.e. over time.
varying(pwlddev)
# country    date    year  decade  region  income    OECD   PCGDP  LIFEEX    GINI     ODA     POP 
#   FALSE    TRUE    TRUE    TRUE   FALSE   FALSE   FALSE    TRUE    TRUE    TRUE    TRUE    TRUE

Alternatively any index variable or combination of index variables can be specified:

# This checks any variation within time variable, i.e. cross-sectional variation.
varying(pwlddev, effect = "year")
# country   iso3c    date  decade  region  income    OECD   PCGDP  LIFEEX    GINI     ODA     POP 
#    TRUE    TRUE   FALSE   FALSE    TRUE    TRUE    TRUE    TRUE    TRUE    TRUE    TRUE    TRUE

Another possibility is checking for variation within each group:

# This checks cross-sectional variation within each year for 4 indicators.
head(varying(pwlddev, effect = "year", cols = 9:12, any_group = FALSE))
#      PCGDP LIFEEX GINI  ODA
# 1960  TRUE   TRUE   NA TRUE
# 1961  TRUE   TRUE   NA TRUE
# 1962  TRUE   TRUE   NA TRUE
# 1963  TRUE   TRUE   NA TRUE
# 1964  TRUE   TRUE   NA TRUE
# 1965  TRUE   TRUE   NA TRUE

varying also has a pseries method. The code below checks for time-variation of the GINI index within each country. A NA is returned when there are no observations within a particular country.

head(varying(pwlddev$GINI, any_group = FALSE), 20)
#  ABW  AFG  AGO  ALB  AND  ARE  ARG  ARM  ASM  ATG  AUS  AUT  AZE  BDI  BEL  BEN  BFA  BGD  BGR  BHR 
#   NA   NA TRUE TRUE   NA TRUE TRUE TRUE   NA   NA TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE   NA

If we would like to gave more information about this variation, we could also invoke the functions fndistinct and fsd, which do not have pseries methods:

head(fndistinct(pwlddev$GINI, index(pwlddev, "iso3c")), 20)
# ABW AFG AGO ALB AND ARE ARG ARM ASM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR 
#   0   0   3   9   0   2  29  20   0   0   9  16   5   4  16   3   5   9  12   0

head(round(fsd(pwlddev$GINI, index(pwlddev, "iso3c")), 2), 20)
#  ABW  AFG  AGO  ALB  AND  ARE  ARG  ARM  ASM  ATG  AUS  AUT  AZE  BDI  BEL  BEN  BFA  BGD  BGR  BHR 
#   NA   NA 5.18 2.47   NA 4.60 3.84 2.76   NA   NA 1.19 1.76 4.85 4.37 1.71 4.60 5.98 3.02 2.58   NA

2.2 Summary Statistics for Panel Data

Efficient summary statistics for panel data have long been implemented in other statistical softwares. The command qsu, shorthand for ‘quick-summary’, is a very efficient summary statistics command inspired by the xtsummarize command in the Stata statistical software. It computes a default set of 5 statistics (N, mean, sd, min and max) and can also computed higher moments (skewness and kurtosis) in a single pass through the data (using a numerically stable online algorithm generalized from Welford’s Algorithm for variance computations). With panel data, qsu computes these statistics not just on the raw data, but also on the between-transformed and within-transformed data:

qsu(pwlddev, cols = 9:12, higher = TRUE)
# , , PCGDP
# 
#              N/T        Mean          SD          Min         Max    Skew     Kurt
# Overall     9470   12048.778  19077.6416     132.0776  196061.417  3.1276  17.1154
# Between      206  12962.6054  20189.9007     253.1886   141200.38  3.1263  16.2299
# Within   45.9709   12048.778   6723.6808  -33504.8721  76767.5254  0.6576  17.2003
# 
# , , LIFEEX
# 
#              N/T     Mean       SD      Min      Max     Skew    Kurt
# Overall    11670  64.2963  11.4764   18.907  85.4171  -0.6748  2.6718
# Between      207  64.9537   9.8936  40.9663  85.4171  -0.5012  2.1693
# Within   56.3768  64.2963   6.0842  32.9068  84.4198  -0.2643  3.7027
# 
# , , GINI
# 
#              N/T     Mean      SD      Min      Max    Skew    Kurt
# Overall     1744  38.5341  9.2006     20.7     65.8   0.596  2.5329
# Between      167  39.4233  8.1356  24.8667  61.7143  0.5832  2.8256
# Within   10.4431  38.5341  2.9277  25.3917  55.3591  0.3263  5.3389
# 
# , , ODA
# 
#              N/T        Mean          SD              Min             Max    Skew      Kurt
# Overall     8608  454'720131  868'712654      -997'679993  2.56715605e+10  6.9832   114.889
# Between      178  439'168412  569'049959       468717.916  3.62337432e+09   2.355    9.9487
# Within   48.3596  454'720131  650'709624  -2.44379420e+09  2.45610972e+10  9.6047  263.3716

Key statistics to look at in this summary are the sample size and the standard-deviation decomposed into the between-individuals and the within-individuals standard-deviation: For GDP per Capita we have 8995 observations in the panel series for 203 countries, with on average 44.31 observations (time-periods T) per country. The between-country standard deviation is 19600 USD, around 3-times larger than the within-country (over-time) standard deviation of 6300 USD. Regarding the mean, the between-mean, computed as a cross-sectional average of country averages, usually differs slightly from the overall average taken across all data points. The within-transformed data is computed and summarized with the overall mean added back (i.e. as in fwithin(PCGDP, mean = "overall.mean")).

We can also do groupwise panel-statistics and qsu also supports weights (not shown):

qsu(pwlddev, ~ income, cols = 9:12, higher = TRUE)
# , , Overall, PCGDP
# 
#                       N/T        Mean          SD       Min         Max    Skew     Kurt
# High income          3179  30280.7283  23847.0483  932.0417  196061.417  2.1702  10.3425
# Low income           1311    597.4053    288.4392  164.3366   1864.7925  1.2385   4.7115
# Lower middle income  2246   1574.2535    858.7183  144.9863   4818.1922  0.9093   3.7153
# Upper middle income  2734   4945.3258   2979.5609  132.0776  20532.9523  1.2286   4.9391
# 
# , , Between, PCGDP
# 
#                      N/T        Mean          SD        Min         Max    Skew    Kurt
# High income           71  30280.7283  20908.5323  5413.4495   141200.38  2.1347  9.9673
# Low income            28    597.4053    243.8219   253.1886   1357.3326  1.4171  5.3137
# Lower middle income   47   1574.2535    676.3157   444.2899   2896.8682  0.3562  2.2358
# Upper middle income   60   4945.3258   2327.3834   1604.595  13344.5423    1.24  4.7803
# 
# , , Within, PCGDP
# 
#                          N/T       Mean          SD          Min         Max    Skew    Kurt
# High income          44.7746  12048.778  11467.9987  -33504.8721  76767.5254  0.3924  6.0523
# Low income           46.8214  12048.778    154.1039   11606.2382   12698.296  0.5098  4.0676
# Lower middle income  47.7872  12048.778    529.1449   10377.7234  14603.1055  0.7658  5.4272
# Upper middle income  45.5667  12048.778    1860.395    4846.3834  24883.1246  0.6858  7.8469
# 
# , , Overall, LIFEEX
# 
#                       N/T     Mean      SD     Min      Max     Skew    Kurt
# High income          3831  73.6246  5.6693  42.672  85.4171  -1.0067  5.5553
# Low income           1800  49.7301  9.0944  26.172    74.43   0.2748  2.6721
# Lower middle income  2790  58.1481  9.3115  18.907   76.699  -0.3406  2.6845
# Upper middle income  3249  66.6466   7.537  36.535   80.279  -1.0988  4.2262
# 
# , , Between, LIFEEX
# 
#                      N/T     Mean      SD      Min      Max     Skew    Kurt
# High income           73  73.6246  3.3499  64.0302  85.4171  -0.6537  2.9946
# Low income            30  49.7301  4.8321  40.9663   66.945   1.5195  6.6802
# Lower middle income   47  58.1481  5.9945  45.7687  71.6078   0.0352  2.2126
# Upper middle income   57  66.6466  4.9955   48.057  74.0504  -1.3647   5.303
# 
# , , Within, LIFEEX
# 
#                          N/T     Mean      SD      Min      Max     Skew    Kurt
# High income          52.4795  64.2963  4.5738  42.9381  78.1271  -0.4838  3.8923
# Low income                60  64.2963  7.7045  41.5678  84.4198   0.0402  2.6086
# Lower middle income  59.3617  64.2963  7.1253  32.9068  83.9918  -0.2522   3.181
# Upper middle income       57  64.2963  5.6437  41.4342  83.0122   -0.507  4.0355
# 
# , , Overall, GINI
# 
#                      N/T     Mean      SD   Min   Max    Skew    Kurt
# High income          680  33.3037  6.7885  20.7  58.9  1.4864  5.6772
# Low income           107  41.1327  6.5767  29.5  65.8  0.7523   4.236
# Lower middle income  369  40.0504  9.3032    24  63.2  0.4388  2.2218
# Upper middle income  588  43.1585  8.9549  25.2  64.8  0.0814  2.3517
# 
# , , Between, GINI
# 
#                      N/T     Mean      SD      Min      Max    Skew    Kurt
# High income           41  33.3037  6.5238  24.8667  53.6296  1.5091  5.3913
# Low income            28  41.1327  5.1706  32.1333    58.75  0.6042  4.0473
# Lower middle income   46  40.0504  8.4622  27.6955   54.925   0.334   1.797
# Upper middle income   52  43.1585  8.4359  27.9545  61.7143  0.0336  2.2441
# 
# , , Within, GINI
# 
#                          N/T     Mean      SD      Min      Max     Skew    Kurt
# High income          16.5854  38.5341  1.8771  31.1841  45.8841  -0.0818   4.902
# Low income            3.8214  38.5341  4.0643  29.4591  55.3591   0.6766  5.1025
# Lower middle income   8.0217  38.5341  3.8654  27.9452  55.1008   0.4093  4.0058
# Upper middle income  11.3077  38.5341  3.0043  25.3917  48.0131   0.0728  3.5115
# 
# , , Overall, ODA
# 
#                       N/T        Mean              SD          Min             Max     Skew
# High income          1575  153'663194      425'918409  -464'709991  4.34612988e+09   5.2505
# Low income           1692  631'660165      941'498380      -500000  1.04032100e+10   4.4628
# Lower middle income  2544  692'072692  1.02452490e+09  -605'969971  1.18790801e+10   3.7913
# Upper middle income  2797  301'326218      765'116131  -997'679993  2.56715605e+10  16.3123
#                          Kurt
# High income           36.2748
# Low income            32.1305
# Lower middle income   25.2442
# Upper middle income  464.8625
# 
# , , Between, ODA
# 
#                      N/T        Mean          SD          Min             Max    Skew     Kurt
# High income           42  153'663194  339'972909   468717.916  2.05456932e+09  3.9522  19.0792
# Low income            30  631'660165  466'265486    91'536334  1.67220583e+09  0.9769   2.6602
# Lower middle income   47  692'072692  765'003585  28'919000.2  3.62337432e+09  2.0429   7.2664
# Upper middle income   59  301'326218  382'148153    13'160000  1.91297800e+09  2.1072   7.0291
# 
# , , Within, ODA
# 
#                          N/T        Mean          SD              Min             Max     Skew
# High income             37.5  454'720131  256'563661      -920'977647  2.87632242e+09   2.2074
# Low income              56.4  454'720131  817'933797  -1.19519570e+09  9.18572426e+09   3.8872
# Lower middle income  54.1277  454'720131  681'484247  -2.44379420e+09  1.12814455e+10   3.8965
# Upper middle income  47.4068  454'720131  662'846500  -2.04042108e+09  2.45610972e+10  19.6351
#                          Kurt
# High income           28.8682
# Low income            33.5194
# Lower middle income   47.7246
# Upper middle income  657.3041

Here it should be noted that any grouping is applied independently from the data-transformation, i.e. the data is first transformed, and then grouped statistics are calculated on the transformed data. The computation of statistics is very efficient:

qsu(LIFEEX_l)
#               N/T     Mean       SD      Min      Max
# Overall  1'167000  64.2963  11.4759   18.907  85.4171
# Between     20700  64.9537     9.87  40.9663  85.4171
# Within    56.3768  64.2963   6.0839  32.9068  84.4198

microbenchmark(qsu(LIFEEX_l))
# Unit: milliseconds
#           expr     min       lq     mean   median       uq      max neval
#  qsu(LIFEEX_l) 9.49355 10.25679 11.07317 10.37214 10.78839 50.22574   100

Using the transformation functions and the functions pwcor and pwcov, we can also easily explore the correlation structure of the data:

# Overall pairwise correlations with pairwise observation count and significance testing (* = significant at 5% level)
pwcor(get_vars(pwlddev, 9:12), N = TRUE, P = TRUE)
#               PCGDP        LIFEEX         GINI          ODA
# PCGDP    1   (9470)   .57* (9022) -.44* (1735) -.16* (7128)
# LIFEEX  .57* (9022)   1   (11670) -.35* (1742) -.02  (8142)
# GINI   -.44* (1735)  -.35* (1742)   1   (1744) -.20* (1109)
# ODA    -.16* (7128)  -.02  (8142) -.20* (1109)   1   (8608)

# Between correlations
pwcor(fmean(get_vars(pwlddev, 9:12), pwlddev$iso3c), N = TRUE, P = TRUE)
#              PCGDP      LIFEEX        GINI         ODA
# PCGDP    1   (206)  .60* (199) -.42* (165) -.25* (172)
# LIFEEX  .60* (199)   1   (207) -.40* (165) -.21* (172)
# GINI   -.42* (165) -.40* (165)   1   (167) -.19* (145)
# ODA    -.25* (172) -.21* (172) -.19* (145)   1   (178)

# Within correlations
pwcor(W(pwlddev, cols = 9:12, keep.ids = FALSE), N = TRUE, P = TRUE)
#               W.PCGDP      W.LIFEEX       W.GINI        W.ODA
# W.PCGDP    1   (9470)   .31* (9022) -.01  (1735) -.01  (7128)
# W.LIFEEX  .31* (9022)   1   (11670) -.16* (1742)  .17* (8142)
# W.GINI   -.01  (1735)  -.16* (1742)   1   (1744) -.08* (1109)
# W.ODA    -.01  (7128)   .17* (8142) -.08* (1109)   1   (8608)

The correlations show that the between (cross-country) relationships of these macro-variables are quite strong, but within countries the relationships are much weaker, for example there seems to be no significant relationship between GDP per Capita and either inequality or ODA received within countries over time.

2.3 Exploring Panel Data in Matrix / Array Form

We can take a single panel series such as GDP per Capita and explore it further:

# Generating a (transposed) matrix of country GDPs per capita
tGDPmat <- psmat(PCGDP, transpose = TRUE)
tGDPmat[1:10, 1:10]
#      ABW AFG AGO ALB AND ARE  ARG ARM ASM ATG
# 1960  NA  NA  NA  NA  NA  NA 5643  NA  NA  NA
# 1961  NA  NA  NA  NA  NA  NA 5853  NA  NA  NA
# 1962  NA  NA  NA  NA  NA  NA 5711  NA  NA  NA
# 1963  NA  NA  NA  NA  NA  NA 5323  NA  NA  NA
# 1964  NA  NA  NA  NA  NA  NA 5773  NA  NA  NA
# 1965  NA  NA  NA  NA  NA  NA 6286  NA  NA  NA
# 1966  NA  NA  NA  NA  NA  NA 6152  NA  NA  NA
# 1967  NA  NA  NA  NA  NA  NA 6255  NA  NA  NA
# 1968  NA  NA  NA  NA  NA  NA 6461  NA  NA  NA
# 1969  NA  NA  NA  NA  NA  NA 6981  NA  NA  NA

# plot the matrix (it will plot correctly no matter how the matrix is transposed)
plot(tGDPmat, main = "GDP per Capita")
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat


# Taking series with more than 20 observation
suffsamp <- tGDPmat[, fnobs(tGDPmat) > 20]

# Minimum pairwise observations between any two series:
min(pwnobs(suffsamp))
# [1] 16

# We can use the pairwise-correlations of the annual growth rates to hierarchically cluster the economies:
plot(hclust(as.dist(1-pwcor(G(suffsamp)))))
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat


# Finally we could do PCA on the growth rates:
eig <- eigen(pwcor(G(suffsamp)))
plot(seq_col(suffsamp), eig$values/sum(eig$values)*100, xlab = "Number of Principal Components", ylab = "% Variance Explained", main = "Screeplot")
plot of chunk PLMGDPmat

plot of chunk PLMGDPmat

There is also a nice plot-method applied to panel series arrays returned when psmat is applied to a panel data.frame:

plot(psmat(pwlddev, cols = 9:12), legend = TRUE)
plot of chunk pwlddev_plot

plot of chunk pwlddev_plot

Above we have explored the cross-sectional relationship between the different national GDP series. Now we explore the time-dependence of the panel-vectors as a whole:

2.4 Panel- Auto-, Partial-Auto and Cross-Correlation Functions

The functions psacf, pspacf and psccf mimic stats::acf, stats::pacf and stats::ccf for panel-vectors and panel data.frames. Below we compute the panel series autocorrelation function of the data:

psacf(pwlddev, cols = 9:12)
plot of chunk plm_psacf

plot of chunk plm_psacf

The computation is conducted by first scaling and centering (i.e. standardizing) the panel-vectors by groups (using fscale, default argument gscale = TRUE), and then taking the covariance of each series with a matrix of properly computed panel-lags of itself (using flag), and dividing that by the variance of the overall series (using fvar).

In a similar way we can compute the Partial-ACF (using a multivariate Yule-Walker decomposition on the ACF, as in stats::pacf),

pspacf(pwlddev, cols = 9:12)
plot of chunk plm_pspacf

plot of chunk plm_pspacf

and the panel-cross-correlation function between GDP per capita and life expectancy (which is already contained in the ACF plot above):

psccf(PCGDP, LIFEEX)
plot of chunk plm_psccf

plot of chunk plm_psccf

2.5 Testing for Individual Specific and Time-Effects

As a final step of exploration, we could analyze our series and simple models for the significance and explanatory power of individual or time-fixed effects, without going all the way to running a Hausman Test of fixed vs. random effects on a fully specified model. The main function here is fFtest which efficiently computes a fast R-Squared based F-test of exclusion restrictions on models potentially involving many factors. By default (argument full.df = TRUE) the degrees of freedom of the test are adjusted to make it identical to the F-statistic from regressing the series on a set of country and time dummies1.

# Testing GDP per Capita
fFtest(PCGDP, index(PCGDP))    # Testing individual and time-fixed effects
#    R-Sq.      DF1      DF2  F-Stat.  P-value 
#    0.905      264     9205  330.349    0.000
fFtest(PCGDP, index(PCGDP, 1)) # Testing individual effects
#    R-Sq.      DF1      DF2  F-Stat.  P-value 
#    0.876      215     9254  303.476    0.000
fFtest(PCGDP, index(PCGDP, 2)) # Testing time effects
#    R-Sq.      DF1      DF2  F-Stat.  P-value 
#    0.027       60     9409    4.276    0.000

# Same for Life-Expectancy
fFtest(LIFEEX, index(LIFEEX))    # Testing individual and time-fixed effects
#     R-Sq.       DF1       DF2   F-Stat.   P-value 
#     0.924       265     11404   519.762     0.000
fFtest(LIFEEX, index(LIFEEX, 1)) # Testing individual effects
#     R-Sq.       DF1       DF2   F-Stat.   P-value 
#     0.719       215     11454   136.276     0.000
fFtest(LIFEEX, index(LIFEEX, 2)) # Testing time effects
#     R-Sq.       DF1       DF2   F-Stat.   P-value 
#     0.218        60     11609    54.075     0.000

Below we test the correlation between the country and time-means of GDP and Life-Expectancy:

cor.test(B(PCGDP), B(LIFEEX)) # Testing correlation of country means
# 
#   Pearson's product-moment correlation
# 
# data:  B(PCGDP) and B(LIFEEX)
# t = 78.752, df = 9020, p-value < 2.2e-16
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
#  0.6259141 0.6503737
# sample estimates:
#      cor 
# 0.638305

cor.test(B(PCGDP, effect = 2), B(LIFEEX, effect = 2)) # Same for time-means
# 
#   Pearson's product-moment correlation
# 
# data:  B(PCGDP, effect = 2) and B(LIFEEX, effect = 2)
# t = 325.6, df = 9020, p-value < 2.2e-16
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
#  0.9583431 0.9615804
# sample estimates:
#       cor 
# 0.9599938

We can also test for the significance of individual and time-fixed effects (or both) in the regression of GDP on life expectancy and ODA received:

fFtest(PCGDP, index(PCGDP), get_vars(pwlddev, c("LIFEEX","ODA")))    # Testing individual and time-fixed effects
#                     R-Sq.  DF1  DF2  F-Stat.  P-Value
# Full Model          0.915  227 6682  316.551    0.000
# Restricted Model    0.162    2 6907  668.816    0.000
# Exclusion Rest.     0.753  225 6682  262.732    0.000
fFtest(PCGDP, index(PCGDP, 2), get_vars(pwlddev, c("iso3c","LIFEEX","ODA")))    # Testing time-fixed effects
#                     R-Sq.  DF1  DF2  F-Stat.  P-Value
# Full Model          0.915  227 6682  316.551    0.000
# Restricted Model    0.909  168 6741  403.168    0.000
# Exclusion Rest.     0.005   59 6682    7.238    0.000

As can be expected in this cross-country data, individual and time-fixed effects play a large role in explaining the data, and these effects are correlated across series, suggesting that a fixed-effects model with both types of fixed-effects would be appropriate. To round things off, below we compute the Hausman test of Fixed vs. Random effects, which confirms this conclusion:

phtest(PCGDP ~ LIFEEX, data = pwlddev)
# 
#   Hausman Test
# 
# data:  PCGDP ~ LIFEEX
# chisq = 397.04, df = 1, p-value < 2.2e-16
# alternative hypothesis: one model is inconsistent

Part 3: Programming Panel Data Estimators

A central goal of the collapse package is to facilitate advanced and fast programming with data. A primary field of application for the fast functions introduced above is to program efficient panel data estimators. In this section we walk through a short example of how this can be done. The application will be an implementation of the Hausman and Taylor (1981) estimator, considering a more general case than currently implemented in the plm package.

In Hausman and Taylor (1981), in a more general scenario, we have a linear panel-model of the form \[y_{it} = \beta_1X_{1it} + \beta_2X_{2it} + \beta_3Z_{1i} + \beta_4Z_{2i} + \alpha_i + \gamma_t + \epsilon\] where \(\alpha_i\) denotes unobserved individual specific effects and \(\gamma_t\) denotes unobserved global events. This model has up to 4 kinds of covariates:

  • Time-Varying covariates \(X_{1it}\) that are uncorrelated with the individual specific effect \(\alpha_i\), such that \(E[X_{1it}\alpha_i] = 0\). It may be the case that \(E[X_{1it}\gamma_t] \neq 0\)

  • Time-Varying covariates \(X_{2it}\) with \(E[X_{2it}\alpha_i] \neq 0\) and possibly \(E[X_{2it}\gamma_t] \neq 0\)

  • Time-Invariant covariates \(Z_{1i}\) with \(E[Z_{1i}\alpha_i] = 0\)

  • Time-Invariant covariates \(Z_{2i}\) with \(E[Z_{2i}\alpha_i] \neq 0\)

The main estimation problem arises from \(E[Z_{2i}\alpha_i] \neq 0\), which would usually prevent us from estimating \(\beta_4\) since taking a within-transformation (fixed effects) would remove \(Z_{2i}\) from the equation. Hausman and Taylor (1981) stipulated that since \(E[X_{1it}\alpha_i] = 0\), once could use \(X_{1i.}\) i.e. the between-transformed \(X_{1it}\) to instrument for \(Z_{2i}\). They propose an IV/2SLS estimation of the whole equation where the within-transformed covariates \(\tilde{X}_{1it}\) and \(\tilde{X}_{2it}\) are used to instrument \(X_{1it}\) and \(X_{2it}\), and \(X_{1i.}\) instruments \(Z_{2i}\). Assuming that missing values have been removed beforehand, and also taking into account the possibility that \(E[X_{1it}\gamma_t] \neq 0\) and \(E[X_{2it}\gamma_t] \neq 0\) (i.e. accounting for time fixed-effects), this estimator can be coded as follows:

HT_est <- function(y, X1, Z2, X2 = NULL, Z1 = NULL, time.FE = FALSE) {

  # Create matrix of independent variables
  X <- cbind(Intercept = 1, do.call(cbind, c(X1, X2, Z1, Z2)))

  # Create instrument matrix: if time.FE, higher-order demean X1 and X2, else normal demeaning
  IVS <- cbind(Intercept = 1, do.call(cbind,
               c(if(time.FE) fhdwithin(X1, na.rm = FALSE) else fwithin(X1, na.rm = FALSE),
                 if(is.null(X2)) X2 else if(time.FE) fhdwithin(X2, na.rm = FALSE) else fwithin(X2, na.rm = FALSE),
                 Z1, fbetween(X1, na.rm = FALSE))))

  if(length(IVS) == length(X)) { # The IV estimator case
    return(drop(solve(crossprod(IVS, X), crossprod(IVS, y))))
  } else { # The 2SLS case
    Xhat <- qr.fitted(qr(IVS), X)  # First stage
    return(drop(qr.coef(qr(Xhat), y)))   # Second stage
  }
}

The estimator is written in such a way that variables of the type \(X_{2it}\) and \(Z_{1i}\) are optional, and it also includes an option to also project out time-FE or not. The expected inputs for \(X_{1it}\) (X1), and \(X_{2it}\) (X2) are column-subsets of a pdata.frame.

Having coded the estimator, it would be good to have an example to run it on. I have tried to squeeze an example out of the wlddev data used so far in this vignette. It is quite crappy and suffers from a weak-IV problem, but for there sake of illustration lets do it:

We want to estimate the panel-regression of life-expectancy on GDP per Capita, ODA received, the GINI index and a time-invariant dummy indicating whether the country is an OECD member. All variables except the dummy enter in logs, so this is an elasticity regression. <

dat <- get_vars(wlddev, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA"))
get_vars(dat, 4:7) <- lapply(get_vars(dat, 4:7), log) # Taking logs of the data
dat$OECD <- as.numeric(dat$OECD)                      # Creating OECD dummy
dat <- pdata.frame(fdroplevels(na_omit(dat)),         # Creating Panel data.frame, after removing missing values
                   index = c("iso3c", "year"))        # and dropping unused factor levels
pdim(dat)
# Unbalanced Panel: n = 134, T = 1-34, N = 1068
varying(dat)
#   year   OECD  PCGDP LIFEEX   GINI    ODA 
#   TRUE  FALSE   TRUE   TRUE   TRUE   TRUE

Using the GINI index cost a lot of observations and brought the sample size down to 918, but the GINI index will be a key variable in what follows. Clearly the OECD dummy is time-invariant. Below we run Hausman-tests of fixed vs. random effects to determine which covariates might be correlated with the unobserved individual effects, and which model would be most appropriate.

# This tests whether each of the covariates is correlated with alpha_i
phtest(LIFEEX ~ PCGDP, dat)  # Likely correlated
# 
#   Hausman Test
# 
# data:  LIFEEX ~ PCGDP
# chisq = 17.495, df = 1, p-value = 2.881e-05
# alternative hypothesis: one model is inconsistent
phtest(LIFEEX ~ ODA, dat)    # Likely correlated
# 
#   Hausman Test
# 
# data:  LIFEEX ~ ODA
# chisq = 43.925, df = 1, p-value = 3.413e-11
# alternative hypothesis: one model is inconsistent
phtest(LIFEEX ~ GINI, dat)   # Likely not correlated !
# 
#   Hausman Test
# 
# data:  LIFEEX ~ GINI
# chisq = 0.56851, df = 1, p-value = 0.4509
# alternative hypothesis: one model is inconsistent
phtest(LIFEEX ~ PCGDP + ODA + GINI, dat)  # Fixed Effects is the appropriate model for this regression
# 
#   Hausman Test
# 
# data:  LIFEEX ~ PCGDP + ODA + GINI
# chisq = 24.198, df = 3, p-value = 2.272e-05
# alternative hypothesis: one model is inconsistent

The tests suggest that both GDP per Capita and ODA are correlated with country-specific unobservables affecting life-expectancy, and overall a fixed-effects model would be appropriate. However, the Hausman test on the GINI index fails to reject: Country specific unobservables affecting average life-expectancy are not necessarily correlated with the level of inequality across countries.

Now if we want to include the OECD dummy in the regression, we cannot use fixed-effects as this would wipe-out the dummy as well. If the dummy is uncorrelated with the country-specific unobservables affecting life-expectancy (the \(\alpha_i\)), then we could use a solution suggested by Mundlak (1978) and simply add between-transformed versions of PCGDP and ODA in the regression (in addition to PCGDP and ODA in levels), and so ‘control’ for the part of PCGDP and ODA correlated with the \(\alpha_i\) (in the IV literature this is known as the control-function approach). If however the OECD dummy is correlated with the \(\alpha_i\), then we need to use the Hausman and Taylor (1981) estimator. Below I suggest 2 methods of testing this correlation:

# Testing the correlation between OECD dummy and the Between-transformed Life-Expectancy (i.e. not accounting for other covariates)
cor.test(dat$OECD, B(dat$LIFEEX)) # -> Significant correlation of 0.21
# 
#   Pearson's product-moment correlation
# 
# data:  dat$OECD and B(dat$LIFEEX)
# t = 6.797, df = 1066, p-value = 1.774e-11
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
#  0.1456048 0.2606109
# sample estimates:
#       cor 
# 0.2038109

# Getting the fixed-effects (estimates of alpha_i) from the model (i.e. accounting for the other covariates)
fe <- fixef(plm(LIFEEX ~ PCGDP + ODA + GINI, dat, model = "within"))
mODA <- fmean(dat$ODA, dat$iso3c)
# Again testing the correlation
cor.test(fe, mODA[match(names(fe), names(mODA))]) # -> Not Significant.. but probably due to small sample size, the correlation is still 0.13
# 
#   Pearson's product-moment correlation
# 
# data:  fe and mODA[match(names(fe), names(mODA))]
# t = 1.1218, df = 132, p-value = 0.264
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
#  -0.07362567  0.26243949
# sample estimates:
#        cor 
# 0.09717608

I interpret the test results as rejecting the hypothesis that the dummy is uncorrelated with \(\alpha_i\), thus we do have a case for Hausman and Taylor (1981) here: the OECD dummy is a \(Z_{2i}\) with \(E[Z_{2i}\alpha_i]\neq 0\). The Hausman tests above suggested that the GINI index is the only variable uncorrelated with \(\alpha_i\), thus GINI is \(X_{1it}\) with \(E[X_{1it}\alpha_i] = 0\). Finally PCGDP and ODA jointly constitute \(X_{2it}\), where the Hausman tests strongly suggested that \(E[X_{2it}\alpha_i] \neq 0\). We do not have a \(Z_{1i}\) in this setup, i.e. a time-invariant variable uncorrelated with the \(\alpha_i\).

The Hausman and Taylor (1981) estimator stipulates that we should instrument the OECD dummy with \(X_{1i.}\), the between-transformed GINI index. Let us therefore test the regression of the dummy on this instrument to see of it would be a good (i.e. relevant) instrument:

# This computes the regression of OECD on the GINI instrument: Weak IV problem !!
fFtest(dat$OECD, B(dat$GINI))
#    R-Sq.      DF1      DF2  F-Stat.  P-value 
#    0.000        1     1066    0.153    0.695

The 0 R-Squared and the F-Statistic of 0.21 suggest that the instrument is very weak indeed, rubbish to be precise, thus the implementation of the HT estimator below is also a rubbish example, but it is still good for illustration purposes:

HT_est(y = dat$LIFEEX,
       X1 = get_vars(dat, "GINI"),
       Z2 = get_vars(dat, "OECD"),
       X2 = get_vars(dat, c("PCGDP","ODA")))
#    Intercept         GINI        PCGDP          ODA         OECD 
#  3.638486969 -0.035596160  0.120981946  0.005744747 -5.862368476

Now a central questions is of course: How computationally efficient is this estimator? Let us try to re-run it on the data generated for the benchmark in Part 1:

dat <- get_vars(data, c("iso3c","year","OECD","PCGDP","LIFEEX","GINI","ODA"))
get_vars(dat, 4:7) <- lapply(get_vars(dat, 4:7), log) # Taking logs of the data
dat$OECD <- as.numeric(dat$OECD)                      # Creating OECD dummy
dat <- pdata.frame(fdroplevels(na_omit(dat)),         # Creating Panel data.frame, after removing missing values
                   index = c("iso3c", "year"))        # and dropping unused factor levels
pdim(dat)
# Unbalanced Panel: n = 13400, T = 1-34, N = 106800
varying(dat)
#   year   OECD  PCGDP LIFEEX   GINI    ODA 
#   TRUE  FALSE   TRUE   TRUE   TRUE   TRUE

library(microbenchmark)
microbenchmark(HT_est = HT_est(y = dat$LIFEEX,     # The estimator as before
                      X1 = get_vars(dat, "GINI"),
                      Z2 = get_vars(dat, "OECD"),
                      X2 = get_vars(dat, c("PCGDP","ODA"))),
              HT_est_TFE =  HT_est(y = dat$LIFEEX, # Also Projecting out Time-FE
                      X1 = get_vars(dat, "GINI"),
                      Z2 = get_vars(dat, "OECD"),
                      X2 = get_vars(dat, c("PCGDP","ODA")),
                      time.FE = TRUE))
# Unit: milliseconds
#        expr       min       lq      mean    median        uq      max neval
#      HT_est  7.919437  8.46937  9.761301  8.869612  9.508597 45.08717   100
#  HT_est_TFE 22.501128 23.18640 25.387041 23.469835 24.490612 85.96462   100

At around 100,000 obs and 13,000 groups in an unbalanced panel, the computation involving 3 grouped centering and 1 grouped averaging task as well as 2 list-to matrix conversions and an IV-procedure took about 10 milliseconds with only individual effects, and about 40 - 45 milliseconds with individual and time-fixed effects (projected out iteratively). This should leave some room for running this on much larger data.

References

Hausman J, Taylor W (1981). “Panel Data and Unobservable Individual Effects.†Econometrica, 49, 1377–1398.

Mundlak, Yair. 1978. “On the Pooling of Time Series and Cross Section Data.†Econometrica 46 (1): 69–85.

Cochrane, D. & Orcutt, G. H. (1949). “Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Termsâ€. Journal of the American Statistical Association. 44 (245): 32–61.

Prais, S. J. & Winsten, C. B. (1954). “Trend Estimators and Serial Correlationâ€. Cowles Commission Discussion Paper No. 383. Chicago.


  1. In fact factors are projected out using fixest::demean and no regression is run at all↩︎

collapse/inst/doc/collapse_and_sf.html0000644000176200001440000056621215202627532017620 0ustar liggesusers collapse and sf

collapse and sf

Fast Manipulation of Simple Features Data Frames

Sebastian Krantz and Grant McDermott

2024-04-19

This short vignette focuses on using collapse with the popular sf package by Edzer Pebesma. It shows that collapse supports easy manipulation of sf data frames, at computation speeds far above dplyr.

collapse v1.6.0 added internal support for sf data frames by having most essential functions (e.g., fselect/gv, fsubset/ss, fgroup_by, findex_by, qsu, descr, varying, funique, roworder, rsplit, fcompute, …) internally handle the geometry column.

To demonstrate this, we can load a test dataset provided by sf:

library(collapse)
library(sf)

nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
options(sf_max_print = 3)
nc
# Simple feature collection with 100 features and 14 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79
# 1 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091     1      10  1364     0
# 2 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487     0      10   542     3
# 3 0.143     1.630  1828    1828     Surry 37171  37171       86  3188     5     208  3616     6
#   NWBIR79                       geometry
# 1      19 MULTIPOLYGON (((-81.47276 3...
# 2      12 MULTIPOLYGON (((-81.23989 3...
# 3     260 MULTIPOLYGON (((-80.45634 3...

Summarising sf Data Frames

Computing summary statistics on sf data frames automatically excludes the ‘geometry’ column:

# Which columns have at least 2 non-missing distinct values
varying(nc) 
#      AREA PERIMETER     CNTY_   CNTY_ID      NAME      FIPS    FIPSNO  CRESS_ID     BIR74     SID74 
#      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE 
#   NWBIR74     BIR79     SID79   NWBIR79 
#      TRUE      TRUE      TRUE      TRUE

# Quick summary stats
qsu(nc)
#              N     Mean         SD    Min    Max
# AREA       100   0.1263     0.0492  0.042  0.241
# PERIMETER  100    1.673     0.4823  0.999   3.64
# CNTY_      100  1985.96   106.5166   1825   2241
# CNTY_ID    100  1985.96   106.5166   1825   2241
# NAME       100        -          -      -      -
# FIPS       100        -          -      -      -
# FIPSNO     100    37100     58.023  37001  37199
# CRESS_ID   100     50.5    29.0115      1    100
# BIR74      100  3299.62  3848.1651    248  21588
# SID74      100     6.67     7.7812      0     44
# NWBIR74    100  1050.81  1432.9117      1   8027
# BIR79      100  4223.92  5179.4582    319  30757
# SID79      100     8.36     9.4319      0     57
# NWBIR79    100  1352.81  1975.9988      3  11631

# Detailed statistics description of each column
descr(nc)
# Dataset: nc, 14 Variables, N = 100
# ----------------------------------------------------------------------------------------------------
# AREA (numeric): 
# Statistics
#     N  Ndist  Mean    SD   Min   Max  Skew  Kurt
#   100     77  0.13  0.05  0.04  0.24  0.48   2.5
# Quantiles
#     1%    5%   10%   25%   50%   75%  90%   95%   99%
#   0.04  0.06  0.06  0.09  0.12  0.15  0.2  0.21  0.24
# ----------------------------------------------------------------------------------------------------
# PERIMETER (numeric): 
# Statistics
#     N  Ndist  Mean    SD  Min   Max  Skew  Kurt
#   100     96  1.67  0.48    1  3.64  1.48  5.95
# Quantiles
#   1%    5%   10%   25%   50%   75%  90%   95%  99%
#    1  1.09  1.19  1.32  1.61  1.86  2.2  2.72  3.2
# ----------------------------------------------------------------------------------------------------
# CNTY_ (numeric): 
# Statistics
#     N  Ndist     Mean      SD   Min   Max  Skew  Kurt
#   100    100  1985.96  106.52  1825  2241  0.26  2.32
# Quantiles
#        1%       5%     10%      25%   50%      75%   90%     95%      99%
#   1826.98  1832.95  1837.9  1902.25  1982  2067.25  2110  2156.3  2238.03
# ----------------------------------------------------------------------------------------------------
# CNTY_ID (numeric): 
# Statistics
#     N  Ndist     Mean      SD   Min   Max  Skew  Kurt
#   100    100  1985.96  106.52  1825  2241  0.26  2.32
# Quantiles
#        1%       5%     10%      25%   50%      75%   90%     95%      99%
#   1826.98  1832.95  1837.9  1902.25  1982  2067.25  2110  2156.3  2238.03
# ----------------------------------------------------------------------------------------------------
# NAME (character): 
# Statistics
#     N  Ndist
#   100    100
# Table
#                Freq  Perc
# Ashe              1     1
# Alleghany         1     1
# Surry             1     1
# Currituck         1     1
# Northampton       1     1
# Hertford          1     1
# Camden            1     1
# Gates             1     1
# Warren            1     1
# Stokes            1     1
# Caswell           1     1
# Rockingham        1     1
# Granville         1     1
# Person            1     1
# ... 86 Others    86    86
# 
# Summary of Table Frequencies
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#       1       1       1       1       1       1 
# ----------------------------------------------------------------------------------------------------
# FIPS (character): 
# Statistics
#     N  Ndist
#   100    100
# Table
#                Freq  Perc
# 37009             1     1
# 37005             1     1
# 37171             1     1
# 37053             1     1
# 37131             1     1
# 37091             1     1
# 37029             1     1
# 37073             1     1
# 37185             1     1
# 37169             1     1
# 37033             1     1
# 37157             1     1
# 37077             1     1
# 37145             1     1
# ... 86 Others    86    86
# 
# Summary of Table Frequencies
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#       1       1       1       1       1       1 
# ----------------------------------------------------------------------------------------------------
# FIPSNO (numeric): 
# Statistics
#     N  Ndist   Mean     SD    Min    Max  Skew  Kurt
#   100    100  37100  58.02  37001  37199    -0   1.8
# Quantiles
#         1%       5%      10%      25%    50%      75%      90%      95%       99%
#   37002.98  37010.9  37020.8  37050.5  37100  37149.5  37179.2  37189.1  37197.02
# ----------------------------------------------------------------------------------------------------
# CRESS_ID (integer): 
# Statistics
#     N  Ndist  Mean     SD  Min  Max  Skew  Kurt
#   100    100  50.5  29.01    1  100     0   1.8
# Quantiles
#     1%    5%   10%    25%   50%    75%   90%    95%    99%
#   1.99  5.95  10.9  25.75  50.5  75.25  90.1  95.05  99.01
# ----------------------------------------------------------------------------------------------------
# BIR74 (numeric): 
# Statistics
#     N  Ndist     Mean       SD  Min    Max  Skew   Kurt
#   100    100  3299.62  3848.17  248  21588  2.79  11.79
# Quantiles
#       1%      5%    10%   25%     50%   75%     90%    95%       99%
#   283.64  419.75  531.8  1077  2180.5  3936  6725.7  11193  20378.22
# ----------------------------------------------------------------------------------------------------
# SID74 (numeric): 
# Statistics
#     N  Ndist  Mean    SD  Min  Max  Skew   Kurt
#   100     23  6.67  7.78    0   44  2.44  10.28
# Quantiles
#   1%  5%  10%  25%  50%   75%   90%    95%    99%
#    0   0    0    2    4  8.25  15.1  18.25  38.06
# ----------------------------------------------------------------------------------------------------
# NWBIR74 (numeric): 
# Statistics
#     N  Ndist     Mean       SD  Min   Max  Skew   Kurt
#   100     93  1050.81  1432.91    1  8027  2.83  11.84
# Quantiles
#   1%    5%   10%  25%    50%     75%     90%     95%      99%
#    1  9.95  39.2  190  697.5  1168.5  2231.8  3942.9  7052.84
# ----------------------------------------------------------------------------------------------------
# BIR79 (numeric): 
# Statistics
#     N  Ndist     Mean       SD  Min    Max  Skew  Kurt
#   100    100  4223.92  5179.46  319  30757  2.99  13.1
# Quantiles
#       1%     5%    10%      25%   50%   75%   90%       95%       99%
#   349.69  539.3  675.7  1336.25  2636  4889  8313  14707.45  26413.87
# ----------------------------------------------------------------------------------------------------
# SID79 (numeric): 
# Statistics
#     N  Ndist  Mean    SD  Min  Max  Skew  Kurt
#   100     28  8.36  9.43    0   57  2.28  9.88
# Quantiles
#   1%  5%  10%  25%  50%    75%  90%  95%    99%
#    0   0    1    2    5  10.25   21   26  38.19
# ----------------------------------------------------------------------------------------------------
# NWBIR79 (numeric): 
# Statistics
#     N  Ndist     Mean    SD  Min    Max  Skew   Kurt
#   100     98  1352.81  1976    3  11631  3.18  14.45
# Quantiles
#     1%    5%   10%    25%    50%      75%     90%     95%       99%
#   3.99  11.9  44.7  250.5  874.5  1406.75  2987.9  5090.5  10624.17
# ----------------------------------------------------------------------------------------------------

Selecting Columns and Subsetting

We can select columns from the sf data frame without having to worry about taking along ‘geometry’:

# Selecting a sequence of columns
fselect(nc, AREA, NAME:FIPSNO)
# Simple feature collection with 100 features and 4 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA      NAME  FIPS FIPSNO                       geometry
# 1 0.114      Ashe 37009  37009 MULTIPOLYGON (((-81.47276 3...
# 2 0.061 Alleghany 37005  37005 MULTIPOLYGON (((-81.23989 3...
# 3 0.143     Surry 37171  37171 MULTIPOLYGON (((-80.45634 3...

# Same using standard evaluation (gv is a shorthand for get_vars())
gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO"))
# Simple feature collection with 100 features and 4 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA      NAME  FIPS FIPSNO                       geometry
# 1 0.114      Ashe 37009  37009 MULTIPOLYGON (((-81.47276 3...
# 2 0.061 Alleghany 37005  37005 MULTIPOLYGON (((-81.23989 3...
# 3 0.143     Surry 37171  37171 MULTIPOLYGON (((-80.45634 3...

The same applies to subsetting rows (and columns):

# A fast and enhanced version of base::subset
fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO)
# Simple feature collection with 44 features and 4 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA        NAME  FIPS FIPSNO                       geometry
# 1 0.143       Surry 37171  37171 MULTIPOLYGON (((-80.45634 3...
# 2 0.153 Northampton 37131  37131 MULTIPOLYGON (((-77.21767 3...
# 3 0.153  Rockingham 37157  37157 MULTIPOLYGON (((-79.53051 3...

# A fast version of `[` (where i is used and optionally j)
ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO"))
# Simple feature collection with 10 features and 4 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA      NAME  FIPS FIPSNO                       geometry
# 1 0.114      Ashe 37009  37009 MULTIPOLYGON (((-81.47276 3...
# 2 0.061 Alleghany 37005  37005 MULTIPOLYGON (((-81.23989 3...
# 3 0.143     Surry 37171  37171 MULTIPOLYGON (((-80.45634 3...

This is significantly faster than using [, base::subset(), dplyr::select() or dplyr::filter():

library(microbenchmark)
library(dplyr)

# Selecting columns
microbenchmark(collapse = fselect(nc, AREA, NAME:FIPSNO), 
               dplyr = select(nc, AREA, NAME:FIPSNO),
               collapse2 = gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO")), 
               sf = nc[c("AREA", "NAME", "FIPS", "FIPSNO")])
# Unit: microseconds
#       expr     min       lq      mean   median       uq      max neval
#   collapse   3.034   3.9565   5.19429   5.1865   5.6990   22.878   100
#      dplyr 431.279 452.2915 505.29015 466.3750 493.8450 3356.342   100
#  collapse2   2.665   3.4850   4.59610   4.4075   5.0635   14.391   100
#         sf 105.165 114.1235 120.39732 118.0390 124.9270  156.497   100
# Subsetting
microbenchmark(collapse = fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO), 
               dplyr = select(nc, AREA, NAME:FIPSNO) |> filter(AREA > fmean(AREA)),
               collapse2 = ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO")), 
               sf = nc[1:10, c("AREA", "NAME", "FIPS", "FIPSNO")])
# Unit: microseconds
#       expr     min       lq       mean   median        uq      max neval
#   collapse   9.676  11.5825   15.01707  14.4730   16.8920   30.463   100
#      dplyr 890.643 917.6415 1055.40970 941.7085 1009.7890 5546.685   100
#  collapse2   2.829   3.5465    5.40585   4.8995    6.4165   20.541   100
#         sf 176.997 187.6160  202.72286 200.7565  210.8220  340.464   100

However, collapse functions don’t subset the ‘agr’ attribute on selecting columns, which (if specified) relates columns (attributes) to the geometry, and also don’t modify the ‘bbox’ attribute giving the overall boundaries of a set of geometries when subsetting the sf data frame. Keeping the full ‘agr’ attribute is not problematic for all practical purposes, but not changing ‘bbox’ upon subsetting may lead to too large margins when plotting the geometries of a subset sf data frame.

One way to to change this is calling st_make_valid() on the subset frame; but st_make_valid() is very expensive, thus unless the subset frame is very small, it is better to use [, base::subset() or dplyr::filter() in cases where the bounding box size matters.

Aggregation and Grouping

The flexibility and speed of collap() for aggregation can be used on sf data frames. A separate method for sf objects was not considered necessary as one can simply aggregate the geometry column using st_union():

# Aggregating by variable SID74 using the median for numeric and the mode for categorical columns
collap(nc, ~ SID74, custom = list(fmedian = is.numeric, 
                                  fmode = is.character, 
                                  st_union = "geometry")) # or use is.list to fetch the geometry
# Simple feature collection with 23 features and 15 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#     AREA PERIMETER  CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 SID74 NWBIR74  BIR79
# 1 0.0780    1.3070 1950.0  1950.0 Alleghany 37005  37073     37.0   487     0     0    40.0  594.0
# 2 0.0810    1.2880 1887.0  1887.0      Ashe 37009  37137     69.0   751     1     1   148.0  899.0
# 3 0.1225    1.6435 1959.5  1959.5   Caswell 37033  37078     39.5  1271     2     2   382.5 1676.5
#   SID79 NWBIR79                       geometry
# 1     1      45 MULTIPOLYGON (((-83.69563 3...
# 2     1     176 MULTIPOLYGON (((-80.02406 3...
# 3     2     452 MULTIPOLYGON (((-77.16129 3...

sf data frames can also be grouped and then aggregated using fsummarise():

nc |> fgroup_by(SID74)
# Simple feature collection with 100 features and 14 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79
# 1 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091     1      10  1364     0
# 2 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487     0      10   542     3
# 3 0.143     1.630  1828    1828     Surry 37171  37171       86  3188     5     208  3616     6
#   NWBIR79                       geometry
# 1      19 MULTIPOLYGON (((-81.47276 3...
# 2      12 MULTIPOLYGON (((-81.23989 3...
# 3     260 MULTIPOLYGON (((-80.45634 3...
# 
# Grouped by:  SID74  [23 | 4 (4) 1-13]

nc |> 
  fgroup_by(SID74) |>
  fsummarise(AREA_Ag = fsum(AREA), 
             Perimeter_Ag = fmedian(PERIMETER),
             geometry = st_union(geometry))
# Simple feature collection with 23 features and 3 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#   SID74 AREA_Ag Perimeter_Ag                       geometry
# 1     0   1.103       1.3070 MULTIPOLYGON (((-83.69563 3...
# 2     1   0.914       1.2880 MULTIPOLYGON (((-80.02406 3...
# 3     2   1.047       1.6435 MULTIPOLYGON (((-77.16129 3...

Typically most of the time in aggregation is consumed by st_union() so that the speed of collapse does not really become visible on most datasets. A faster alternative is to use geos (sf backend for planar geometries) or s2 (sf backend for spherical geometries) directly:

# Using s2 backend: sensible for larger tasks
nc |> 
  fmutate(geometry = s2::as_s2_geography(geometry)) |>
  fgroup_by(SID74) |>
  fsummarise(AREA_Ag = fsum(AREA), 
             Perimeter_Ag = fmedian(PERIMETER),
             geometry = s2::s2_union_agg(geometry)) |>
  fmutate(geometry = st_as_sfc(geometry))
# Simple feature collection with 23 features and 3 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  WGS 84
# First 3 features:
#   SID74 AREA_Ag Perimeter_Ag                       geometry
# 1     0   1.103       1.3070 MULTIPOLYGON (((-83.69563 3...
# 2     1   0.914       1.2880 MULTIPOLYGON (((-80.02406 3...
# 3     2   1.047       1.6435 MULTIPOLYGON (((-77.16129 3...

In general, also upon aggregation with collapse, functions st_as_sfc(), st_as_sf(), or, in the worst case, st_make_valid(), may need to be invoked to ensure valid sf object output. Functions collap() and fsummarise() are attribute preserving but do not give special regard to geometry columns.

One exception that both avoids the high cost of spatial functions in aggregation and any need for ex-post conversion/validation is aggregating spatial panel data over the time-dimension. Such panels can quickly be aggregated using ffirst() or flast() to aggregate the geometry:

# Creating a panel-dataset by simply duplicating nc for 2 different years
pnc <- rowbind(`2000` = nc, `2001` = nc, idcol = "Year") |> as_integer_factor()
pnc 
# Simple feature collection with 200 features and 15 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#   Year  AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79
# 1 2000 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091     1      10  1364
# 2 2000 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487     0      10   542
# 3 2000 0.143     1.630  1828    1828     Surry 37171  37171       86  3188     5     208  3616
#   SID79 NWBIR79                       geometry
# 1     0      19 MULTIPOLYGON (((-81.47276 3...
# 2     3      12 MULTIPOLYGON (((-81.23989 3...
# 3     6     260 MULTIPOLYGON (((-80.45634 3...

# Aggregating by NAME, using the last value for all categorical data
collap(pnc, ~ NAME, fmedian, catFUN = flast, cols = -1L)
# Simple feature collection with 100 features and 15 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79
# 1 0.111     1.392  1904    1904  Alamance  Alamance 37001  37001        1  4672    13    1243  5767
# 2 0.066     1.070  1950    1950 Alexander Alexander 37003  37003        2  1333     0     128  1683
# 3 0.061     1.231  1827    1827 Alleghany Alleghany 37005  37005        3   487     0      10   542
#   SID79 NWBIR79                       geometry
# 1    11    1397 MULTIPOLYGON (((-79.24619 3...
# 2     2     150 MULTIPOLYGON (((-81.10889 3...
# 3     3      12 MULTIPOLYGON (((-81.23989 3...

# Using fsummarise to aggregate just two variables and the geometry
pnc_ag <- pnc |> 
  fgroup_by(NAME) |>
  fsummarise(AREA_Ag = fsum(AREA), 
             Perimeter_Ag = fmedian(PERIMETER),
             geometry = flast(geometry))

# The geometry is still valid... (slt = shorthand for fselect)
plot(slt(pnc_ag, AREA_Ag))

plot of chunk AREA_Ag

Indexing

sf data frames can also become indexed frames (spatio-temporal panels):

pnc <- pnc |> findex_by(CNTY_ID, Year)
pnc 
# Simple feature collection with 200 features and 15 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#   Year  AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79
# 1 2000 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091     1      10  1364
# 2 2000 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487     0      10   542
# 3 2000 0.143     1.630  1828    1828     Surry 37171  37171       86  3188     5     208  3616
#   SID79 NWBIR79                       geometry
# 1     0      19 MULTIPOLYGON (((-81.47276 3...
# 2     3      12 MULTIPOLYGON (((-81.23989 3...
# 3     6     260 MULTIPOLYGON (((-80.45634 3...
# 
# Indexed by:  CNTY_ID [100] | Year [2]
qsu(pnc$AREA)
#          N/T    Mean      SD     Min     Max
# Overall  200  0.1263  0.0491   0.042   0.241
# Between  100  0.1263  0.0492   0.042   0.241
# Within     2  0.1263       0  0.1263  0.1263
settransform(pnc, AREA_diff = fdiff(AREA)) 
psmat(pnc$AREA_diff) |> head()
#      2000 2001
# 1825   NA    0
# 1827   NA    0
# 1828   NA    0
# 1831   NA    0
# 1832   NA    0
# 1833   NA    0
pnc <- unindex(pnc)

Unique Values, Ordering, Splitting, Binding

Functions funique() and roworder[v]() ignore the ‘geometry’ column in determining the unique values / order of rows when applied to sf data frames. rsplit() can be used to (recursively) split an sf data frame into multiple chunks.

# Splitting by SID74
rsplit(nc, ~ SID74) |> head(2)
# $`0`
# Simple feature collection with 13 features and 13 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 NWBIR74 BIR79 SID79 NWBIR79
# 1 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487      10   542     3      12
# 2 0.062     1.547  1834    1834    Camden 37029  37029       15   286     115   350     2     139
# 3 0.091     1.284  1835    1835     Gates 37073  37073       37   420     254   594     2     371
#                         geometry
# 1 MULTIPOLYGON (((-81.23989 3...
# 2 MULTIPOLYGON (((-76.00897 3...
# 3 MULTIPOLYGON (((-76.56251 3...
# 
# $`1`
# Simple feature collection with 11 features and 13 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 NWBIR74 BIR79 SID79 NWBIR79
# 1 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091      10  1364     0      19
# 2 0.070     2.968  1831    1831 Currituck 37053  37053       27   508     123   830     2     145
# 3 0.124     1.428  1837    1837    Stokes 37169  37169       85  1612     160  2038     5     176
#                         geometry
# 1 MULTIPOLYGON (((-81.47276 3...
# 2 MULTIPOLYGON (((-76.00897 3...
# 3 MULTIPOLYGON (((-80.02567 3...

The default in rsplit() for data frames is simplify = TRUE, which, for a single LHS variable, would just split the column-vector. This does not apply to sf data frames as the ‘geometry’ column is always selected as well.

# Only splitting Area
rsplit(nc, AREA ~ SID74) |> head(1)
# $`0`
# Simple feature collection with 13 features and 1 field
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA                       geometry
# 1 0.061 MULTIPOLYGON (((-81.23989 3...
# 2 0.062 MULTIPOLYGON (((-76.00897 3...
# 3 0.091 MULTIPOLYGON (((-76.56251 3...

# For data frames the default simplify = TRUE drops the data frame structure
rsplit(qDF(nc), AREA ~ SID74) |> head(1)
# $`0`
#  [1] 0.061 0.062 0.091 0.064 0.059 0.080 0.066 0.099 0.094 0.078 0.131 0.167 0.051

sf data frames can be combined using rowbind(), which, by default, preserves the attributes of the first object.

# Splitting by each row and recombining
nc_combined <- nc %>% rsplit(seq_row(.)) %>% rowbind() 
identical(nc, nc_combined)
# [1] TRUE

Transformations

For transforming and computing columns, fmutate() and ftransform[v]() apply as to any other data frame.

fmutate(nc, gsum_AREA = fsum(AREA, SID74, TRA = "fill")) |> head()
# Simple feature collection with 6 features and 15 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79
# 1 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091     1      10  1364     0
# 2 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487     0      10   542     3
# 3 0.143     1.630  1828    1828     Surry 37171  37171       86  3188     5     208  3616     6
#   NWBIR79                       geometry gsum_AREA
# 1      19 MULTIPOLYGON (((-81.47276 3...     0.914
# 2      12 MULTIPOLYGON (((-81.23989 3...     1.103
# 3     260 MULTIPOLYGON (((-80.45634 3...     1.380

# Same thing, more expensive
nc |> fgroup_by(SID74) |> fmutate(gsum_AREA = fsum(AREA)) |> fungroup() |> head()
# Simple feature collection with 6 features and 15 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79
# 1 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091     1      10  1364     0
# 2 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487     0      10   542     3
# 3 0.143     1.630  1828    1828     Surry 37171  37171       86  3188     5     208  3616     6
#   NWBIR79                       geometry gsum_AREA
# 1      19 MULTIPOLYGON (((-81.47276 3...     0.914
# 2      12 MULTIPOLYGON (((-81.23989 3...     1.103
# 3     260 MULTIPOLYGON (((-80.45634 3...     1.380

Special attention to sf data frames is afforded by fcompute(), which can be used to compute new columns dropping existing ones - except for the geometry column and any columns selected through the keep argument.

fcompute(nc, scaled_AREA = fscale(AREA), 
             gsum_AREA = fsum(AREA, SID74, TRA = "fill"), 
         keep = .c(AREA, SID74))
# Simple feature collection with 100 features and 4 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#    AREA SID74 scaled_AREA gsum_AREA                       geometry
# 1 0.114     1  -0.2491860     0.914 MULTIPOLYGON (((-81.47276 3...
# 2 0.061     0  -1.3264176     1.103 MULTIPOLYGON (((-81.23989 3...
# 3 0.143     5   0.3402426     1.380 MULTIPOLYGON (((-80.45634 3...

Conversion to and from sf

The quick converters qDF(), qDT(), and qTBL() can be used to efficiently convert sf data frames to standard data frames, data.table’s or tibbles, and the result can be converted back to the original sf data frame using setAttrib(), copyAttrib() or copyMostAttrib().

library(data.table)
# Create a data.table on the fly to do an fast grouped rolling mean and back to sf
qDT(nc)[, list(roll_AREA = frollmean(AREA, 2), geometry), by = SID74] |> copyMostAttrib(nc)
# Simple feature collection with 100 features and 2 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#   SID74 roll_AREA                       geometry
# 1     1        NA MULTIPOLYGON (((-81.47276 3...
# 2     1     0.092 MULTIPOLYGON (((-76.00897 3...
# 3     1     0.097 MULTIPOLYGON (((-80.02567 3...

The easiest way to strip a geometry column off an sf data frame is via the function atomic_elem(), which removes list-like columns and, by default, also the class attribute. For example, we can create a data.table without list column using

qDT(atomic_elem(nc)) |> head()
#     AREA PERIMETER CNTY_ CNTY_ID        NAME   FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79
#    <num>     <num> <num>   <num>      <char> <char>  <num>    <int> <num> <num>   <num> <num> <num>
# 1: 0.114     1.442  1825    1825        Ashe  37009  37009        5  1091     1      10  1364     0
# 2: 0.061     1.231  1827    1827   Alleghany  37005  37005        3   487     0      10   542     3
# 3: 0.143     1.630  1828    1828       Surry  37171  37171       86  3188     5     208  3616     6
# 4: 0.070     2.968  1831    1831   Currituck  37053  37053       27   508     1     123   830     2
# 5: 0.153     2.206  1832    1832 Northampton  37131  37131       66  1421     9    1066  1606     3
# 6: 0.097     1.670  1833    1833    Hertford  37091  37091       46  1452     7     954  1838     5
#    NWBIR79
#      <num>
# 1:      19
# 2:      12
# 3:     260
# 4:     145
# 5:    1197
# 6:    1237

This is also handy for other functions such as join() and pivot(), which are class agnostic like all of collapse, but do not have any built-in logic to deal with the sf column.

# Use atomic_elem() to strip geometry off y in left join
identical(nc, join(nc, atomic_elem(nc), overid = 2))
# left join: nc[AREA, PERIMETER, CNTY_, CNTY_ID, NAME, FIPS, FIPSNO, CRESS_ID, BIR74, SID74, NWBIR74, BIR79, SID79, NWBIR79] 100/100 (100%) <m:m> y[AREA, PERIMETER, CNTY_, CNTY_ID, NAME, FIPS, FIPSNO, CRESS_ID, BIR74, SID74, NWBIR74, BIR79, SID79, NWBIR79] 100/100 (100%)
# [1] TRUE

# In pivot: presently need to specify what to do with geometry column
pivot(nc, c("CNTY_ID", "geometry")) |> head()
# Simple feature collection with 6 features and 3 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965
# Geodetic CRS:  NAD27
# First 3 features:
#   CNTY_ID                       geometry variable value
# 1    1825 MULTIPOLYGON (((-81.47276 3...     AREA 0.114
# 2    1827 MULTIPOLYGON (((-81.23989 3...     AREA 0.061
# 3    1828 MULTIPOLYGON (((-80.45634 3...     AREA 0.143
# Or use
pivot(qDT(atomic_elem(nc)), "CNTY_ID") |> head()
#    CNTY_ID variable  value
#      <num>   <fctr> <char>
# 1:    1825     AREA  0.114
# 2:    1827     AREA  0.061
# 3:    1828     AREA  0.143
# 4:    1831     AREA   0.07
# 5:    1832     AREA  0.153
# 6:    1833     AREA  0.097

Support for units

Since v2.0.13, collapse explicitly supports/preserves units objects through dedicated methods that preserve the ‘units’ class wherever sensible.

nc_dist <- st_centroid(nc) |> st_distance()
nc_dist[1:3, 1:3]
# Units: [m]
#          [,1]     [,2]     [,3]
# [1,]     0.00 34020.35 72728.02
# [2,] 34020.35     0.00 40259.55
# [3,] 72728.02 40259.55     0.00

fmean(nc_dist) |> head()
# Units: [m]
# [1] 250543.9 237040.0 217941.5 337016.5 250380.2 269604.6
fndistinct(nc_dist) |> head()
# [1] 100 100 100 100 100 100

Conclusion

collapse provides no deep integration with the sf ecosystem and cannot perform spatial operations, but offers sufficient features and flexibility to painlessly manipulate sf data frames at much greater speeds than dplyr.

This requires a bit of care by the user though to ensure that the returned sf objects are valid, especially following aggregation and subsetting.

collapse/inst/doc/collapse_and_dplyr.html0000644000176200001440000111300115202627530020321 0ustar liggesusers collapse and dplyr

collapse and dplyr

Fast (Weighted) Aggregations and Transformations in a Piped Workflow

Sebastian Krantz

2021-01-04

This vignette focuses on the integration of collapse and the popular dplyr package by Hadley Wickham. In particular it will demonstrate how using collapse’s fast functions and some fast alternatives for dplyr verbs can substantially facilitate and speed up basic data manipulation, grouped and weighted aggregations and transformations, and panel data computations (i.e. between- and within-transformations, panel-lags, differences and growth rates) in a dplyr (piped) workflow.


Notes:

  • This vignette is targeted at dplyr / tidyverse users. collapse is a standalone package and can be programmed efficiently without pipes or dplyr verbs.

  • The ‘Introduction to collapse’ vignette provides a thorough introduction to the package and a built-in structured documentation is available under help("collapse-documentation") after installing the package. In addition help("collapse-package") provides a compact set of examples for quick-start.

  • Documentation and vignettes can also be viewed online.


1. Fast Aggregations

A key feature of collapse is it’s broad set of Fast Statistical Functions (fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct) which are able to substantially speed-up column-wise, grouped and weighted computations on vectors, matrices or data frames. The functions are S3 generic, with a default (vector), matrix and data frame method, as well as a grouped_df method for grouped tibbles used by dplyr. The grouped tibble method has the following arguments:

FUN.grouped_df(x, [w = NULL,] TRA = NULL, [na.rm = TRUE,]
               use.g.names = FALSE, keep.group_vars = TRUE, [keep.w = TRUE,] ...)

where w is a weight variable, and TRA and can be used to transform x using the computed statistics and one of 10 available transformations ("replace_fill", "replace", "-", "-+", "/", "%", "+", "*", "%%", "-%%", discussed in section 2). na.rm efficiently removes missing values and is TRUE by default. use.g.names generates new row-names from the unique combinations of groups (default: disabled), whereas keep.group_vars (default: enabled) will keep the grouping columns as is custom in the native data %>% group_by(...) %>% summarize(...) workflow in dplyr. Finally, keep.w regulates whether a weighting variable used is also aggregated and saved in a column. For fsum, fmean, fmedian, fnth, fvar, fsd and fmode this will compute the sum of the weights in each group, whereas fprod returns the product of the weights.

With that in mind, let’s consider some straightforward applications.

1.1 Simple Aggregations

Consider the Groningen Growth and Development Center 10-Sector Database included in collapse and introduced in the main vignette:

library(collapse)
head(GGDC10S)
#   Country Regioncode             Region Variable Year      AGR      MIN       MAN        PU
# 1     BWA        SSA Sub-saharan Africa       VA 1960       NA       NA        NA        NA
# 2     BWA        SSA Sub-saharan Africa       VA 1961       NA       NA        NA        NA
# 3     BWA        SSA Sub-saharan Africa       VA 1962       NA       NA        NA        NA
# 4     BWA        SSA Sub-saharan Africa       VA 1963       NA       NA        NA        NA
# 5     BWA        SSA Sub-saharan Africa       VA 1964 16.30154 3.494075 0.7365696 0.1043936
# 6     BWA        SSA Sub-saharan Africa       VA 1965 15.72700 2.495768 1.0181992 0.1350976
#         CON      WRT      TRA     FIRE      GOV      OTH      SUM
# 1        NA       NA       NA       NA       NA       NA       NA
# 2        NA       NA       NA       NA       NA       NA       NA
# 3        NA       NA       NA       NA       NA       NA       NA
# 4        NA       NA       NA       NA       NA       NA       NA
# 5 0.6600454 6.243732 1.658928 1.119194 4.822485 2.341328 37.48229
# 6 1.3462312 7.064825 1.939007 1.246789 5.695848 2.678338 39.34710

# Summarize the Data: 
# descr(GGDC10S, cols = is_categorical)
# aperm(qsu(GGDC10S, ~Variable, cols = is.numeric))

# Efficiently converting to tibble (no deep copy)
GGDC10S <- qTBL(GGDC10S)

Simple column-wise computations using the fast functions and pipe operators are performed as follows:

library(dplyr)

GGDC10S %>% fnobs                       # Number of Observations
#    Country Regioncode     Region   Variable       Year        AGR        MIN        MAN         PU 
#       5027       5027       5027       5027       5027       4364       4355       4355       4354 
#        CON        WRT        TRA       FIRE        GOV        OTH        SUM 
#       4355       4355       4355       4355       3482       4248       4364
GGDC10S %>% fndistinct                  # Number of distinct values
#    Country Regioncode     Region   Variable       Year        AGR        MIN        MAN         PU 
#         43          6          6          2         67       4353       4224       4353       4237 
#        CON        WRT        TRA       FIRE        GOV        OTH        SUM 
#       4339       4344       4334       4349       3470       4238       4364
GGDC10S %>% select_at(6:16) %>% fmedian # Median
#        AGR        MIN        MAN         PU        CON        WRT        TRA       FIRE        GOV 
#  4394.5194   173.2234  3718.0981   167.9500  1473.4470  3773.6430  1174.8000   960.1251  3928.5127 
#        OTH        SUM 
#  1433.1722 23186.1936
GGDC10S %>% select_at(6:16) %>% fmean   # Mean
#        AGR        MIN        MAN         PU        CON        WRT        TRA       FIRE        GOV 
#  2526696.5  1867908.9  5538491.4   335679.5  1801597.6  3392909.5  1473269.7  1657114.8  1712300.3 
#        OTH        SUM 
#  1684527.3 21566436.8
GGDC10S %>% fmode                       # Mode
#            Country         Regioncode             Region           Variable               Year 
#              "USA"              "ASI"             "Asia"              "EMP"             "2010" 
#                AGR                MIN                MAN                 PU                CON 
# "171.315882316326"                "0" "4645.12507642586"                "0" "1.34623115930777" 
#                WRT                TRA               FIRE                GOV                OTH 
# "21.8380052682527" "8.97743416914571" "40.0701608636442"                "0" "3626.84423577048" 
#                SUM 
# "37.4822945751317"
GGDC10S %>% fmode(drop = FALSE)         # Keep data structure intact
# # A tibble: 1 × 16
#   Country Regioncode Region Variable  Year   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV
# * <chr>   <chr>      <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 USA     ASI        Asia   EMP       2010  171.     0 4645.     0  1.35  21.8  8.98  40.1     0
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

Moving on to grouped statistics, we can compute the average value added and employment by sector and country using:

GGDC10S %>% 
  group_by(Variable, Country) %>%
  select_at(6:16) %>% fmean
# # A tibble: 85 × 13
#    Variable Country     AGR     MIN     MAN     PU    CON    WRT    TRA   FIRE     GOV    OTH    SUM
#    <chr>    <chr>     <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1 EMP      ARG       1420.   52.1   1932.  1.02e2 7.42e2 1.98e3 6.49e2  628.   2043.  9.92e2 1.05e4
#  2 EMP      BOL        964.   56.0    235.  5.35e0 1.23e2 2.82e2 1.15e2   44.6    NA   3.96e2 2.22e3
#  3 EMP      BRA      17191.  206.    6991.  3.65e2 3.52e3 8.51e3 2.05e3 4414.   5307.  5.71e3 5.43e4
#  4 EMP      BWA        188.   10.5     18.1 3.09e0 2.53e1 3.63e1 8.36e0   15.3    61.1 2.76e1 3.94e2
#  5 EMP      CHL        702.  101.     625.  2.94e1 2.96e2 6.95e2 2.58e2  272.     NA   1.00e3 3.98e3
#  6 EMP      CHN     287744. 7050.   67144.  1.61e3 2.09e4 2.89e4 1.39e4 4929.  22669.  3.10e4 4.86e5
#  7 EMP      COL       3091.  145.    1175.  3.39e1 5.24e2 2.07e3 4.70e2  649.     NA   1.73e3 9.89e3
#  8 EMP      CRI        231.    1.70   136.  1.43e1 5.76e1 1.57e2 4.24e1   54.9   128.  6.51e1 8.87e2
#  9 EMP      DEW       2490.  407.    8473.  2.26e2 2.09e3 4.44e3 1.48e3 1689.   3945.  9.99e2 2.62e4
# 10 EMP      DNK        236.    8.03   507.  1.38e1 1.71e2 4.55e2 1.61e2  181.    549.  1.11e2 2.39e3
# # ℹ 75 more rows

Similarly we can aggregate using any other of the above functions.

It is important to not use dplyr’s summarize together with these functions since that would eliminate their speed gain. These functions are fast because they are executed only once and carry out the grouped computations in C++, whereas summarize will apply the function to each group in the grouped tibble.


Excursus: What is Happening Behind the Scenes?

To better explain this point it is perhaps good to shed some light on what is happening behind the scenes of dplyr and collapse. Fundamentally both packages follow different computing paradigms:

dplyr is an efficient implementation of the Split-Apply-Combine computing paradigm. Data is split into groups, these data-chunks are then passed to a function carrying out the computation, and finally recombined to produce the aggregated data.frame. This modus operandi is evident in the grouping mechanism of dplyr. When a data.frame is passed through group_by, a ‘groups’ attribute is attached:

GGDC10S %>% group_by(Variable, Country) %>% attr("groups")
# # A tibble: 85 × 3
#    Variable Country       .rows
#    <chr>    <chr>   <list<int>>
#  1 EMP      ARG            [62]
#  2 EMP      BOL            [61]
#  3 EMP      BRA            [62]
#  4 EMP      BWA            [52]
#  5 EMP      CHL            [63]
#  6 EMP      CHN            [62]
#  7 EMP      COL            [61]
#  8 EMP      CRI            [62]
#  9 EMP      DEW            [61]
# 10 EMP      DNK            [64]
# # ℹ 75 more rows

This object is a data.frame giving the unique groups and in the third (last) column vectors containing the indices of the rows belonging to that group. A command like summarize uses this information to split the data.frame into groups which are then passed sequentially to the function used and later recombined. These steps are also done in C++ which makes dplyr quite efficient.

Now collapse is based around one-pass grouped computations at the C++ level using its own grouped statistical functions. In other words the data is not split and recombined at all but the entire computation is performed in a single C++ loop running through that data and completing the computations for each group simultaneously. This modus operandi is also evident in collapse grouping objects. The method GRP.grouped_df takes a dplyr grouping object from a grouped tibble and efficiently converts it to a collapse grouping object:

GGDC10S %>% group_by(Variable, Country) %>% GRP %>% str
# Class 'GRP'  hidden list of 9
#  $ N.groups    : int 85
#  $ group.id    : int [1:5027] 46 46 46 46 46 46 46 46 46 46 ...
#  $ group.sizes : int [1:85] 62 61 62 52 63 62 61 62 61 64 ...
#  $ groups      :List of 2
#   ..$ Variable: chr [1:85] "EMP" "EMP" "EMP" "EMP" ...
#   .. ..- attr(*, "label")= chr "Variable"
#   .. ..- attr(*, "format.stata")= chr "%9s"
#   ..$ Country : chr [1:85] "ARG" "BOL" "BRA" "BWA" ...
#   .. ..- attr(*, "label")= chr "Country"
#   .. ..- attr(*, "format.stata")= chr "%9s"
#  $ group.vars  : chr [1:2] "Variable" "Country"
#  $ ordered     : Named logi [1:2] TRUE FALSE
#   ..- attr(*, "names")= chr [1:2] "ordered" "sorted"
#  $ order       : NULL
#  $ group.starts: NULL
#  $ call        : language GRP.grouped_df(X = .)

This object is a list where the first three elements give the number of groups, the group-id to which each row belongs and a vector of group-sizes. A function like fsum uses this information to (for each column) create a result vector of size ‘N.groups’ and the run through the column using the ‘group.id’ vector to add the i’th data point to the ’group.id[i]’th element of the result vector. When the loop is finished, the grouped computation is also finished.

It is obvious that collapse is faster than dplyr since it’s method of computing involves less steps, and it does not need to call statistical functions multiple times. See the benchmark section.


1.2 More Speed using collapse Verbs

collapse fast functions do not develop their maximal performance on a grouped tibble created with group_by because of the additional conversion cost of the grouping object incurred by GRP.grouped_df. This cost is already minimized through the use of C++, but we can do even better replacing group_by with collapse::fgroup_by. fgroup_by works like group_by but does the grouping with collapse::GRP (up to 10x faster than group_by) and simply attaches a collapse grouping object to the grouped_df. Thus the speed gain is 2-fold: Faster grouping and no conversion cost when calling collapse functions.

Another improvement comes from replacing the dplyr verb select with collapse::fselect, and, for selection using column names, indices or functions use collapse::get_vars instead of select_at or select_if. Next to get_vars, collapse also introduces the predicates num_vars, cat_vars, char_vars, fact_vars, logi_vars and date_vars to efficiently select columns by type.

GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fmedian
# # A tibble: 85 × 13
#    Variable Country     AGR     MIN     MAN     PU    CON    WRT    TRA   FIRE     GOV    OTH    SUM
#    <chr>    <chr>     <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1 EMP      ARG       1325.   47.4   1988.  1.05e2 7.82e2 1.85e3 5.80e2  464.   1739.   866.  9.74e3
#  2 EMP      BOL        943.   53.5    167.  4.46e0 6.60e1 1.32e2 9.70e1   15.3    NA    384.  1.84e3
#  3 EMP      BRA      17481.  225.    7208.  3.76e2 4.05e3 6.45e3 1.58e3 4355.   4450.  4479.  5.19e4
#  4 EMP      BWA        175.   12.2     13.1 3.71e0 1.90e1 2.11e1 6.75e0   10.4    53.8   31.2 3.61e2
#  5 EMP      CHL        690.   93.9    607.  2.58e1 2.30e2 4.84e2 2.05e2  106.     NA    900.  3.31e3
#  6 EMP      CHN     293915  8150.   61761.  1.14e3 1.06e4 1.70e4 9.56e3 4328.  19468.  9954.  4.45e5
#  7 EMP      COL       3006.   84.0   1033.  3.71e1 4.19e2 1.55e3 3.91e2  655.     NA   1430.  8.63e3
#  8 EMP      CRI        216.    1.49   114.  7.92e0 5.50e1 8.98e1 2.55e1   19.6   122.    60.6 7.19e2
#  9 EMP      DEW       2178   320.    8459.  2.47e2 2.10e3 4.45e3 1.53e3 1656    3700    900   2.65e4
# 10 EMP      DNK        187.    3.75   508.  1.36e1 1.65e2 4.61e2 1.61e2  169.    642.   104.  2.42e3
# # ℹ 75 more rows

microbenchmark(collapse = GGDC10S %>% fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fmedian,
               hybrid = GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmedian,
               dplyr = GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% summarise_all(median, na.rm = TRUE))
# Unit: microseconds
#      expr       min         lq      mean     median        uq       max neval
#  collapse   236.406   263.6095   303.309   295.9175   337.061   419.635   100
#    hybrid  2699.317  2894.9690  3573.611  2998.3505  3119.772 56249.212   100
#     dplyr 15923.908 16297.8280 18810.943 16742.5140 18578.105 71125.939   100

Benchmarks on the different components of this code and with larger data are provided under ‘Benchmarks’. Note that a grouped tibble created with fgroup_by can no longer be used for grouped computations with dplyr verbs like mutate or summarize. fgroup_by first assigns the class GDP_df which is for printing grouping information and subsetting, then the object classes (tbl_df, data.table or whatever else), followed by classes grouped_df and data.frame, and adds the grouping object in a ‘groups’ attribute. Since tbl_df is assigned before grouped_df, the object is treated by the dplyr ecosystem like a normal tibble.

class(group_by(GGDC10S, Variable, Country))
# [1] "grouped_df" "tbl_df"     "tbl"        "data.frame"

class(fgroup_by(GGDC10S, Variable, Country))
# [1] "GRP_df"     "tbl_df"     "tbl"        "grouped_df" "data.frame"

The function fungroup removes classes ‘GDP_df’ and ‘grouped_df’ and the ‘groups’ attribute (and can thus also be used for grouped tibbles created with dplyr::group_by).

Note that any kind of data frame based class can be grouped with fgroup_by, and still retain full responsiveness to all methods defined for that class. Functions performing aggregation on the grouped data frame remove the grouping object and classes afterwards, yielding an object with the same class and attributes as the input.

The print method shown below reports the grouping variables, and then in square brackets the information [number of groups | average group size (standard-deviation of group sizes)]:

fgroup_by(GGDC10S, Variable, Country)
# # A tibble: 5,027 × 16
#    Country Regioncode Region Variable  Year   AGR   MIN    MAN     PU    CON   WRT   TRA  FIRE   GOV
#    <chr>   <chr>      <chr>  <chr>    <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
#  1 BWA     SSA        Sub-s… VA        1960  NA   NA    NA     NA     NA     NA    NA    NA    NA   
#  2 BWA     SSA        Sub-s… VA        1961  NA   NA    NA     NA     NA     NA    NA    NA    NA   
#  3 BWA     SSA        Sub-s… VA        1962  NA   NA    NA     NA     NA     NA    NA    NA    NA   
#  4 BWA     SSA        Sub-s… VA        1963  NA   NA    NA     NA     NA     NA    NA    NA    NA   
#  5 BWA     SSA        Sub-s… VA        1964  16.3  3.49  0.737  0.104  0.660  6.24  1.66  1.12  4.82
#  6 BWA     SSA        Sub-s… VA        1965  15.7  2.50  1.02   0.135  1.35   7.06  1.94  1.25  5.70
#  7 BWA     SSA        Sub-s… VA        1966  17.7  1.97  0.804  0.203  1.35   8.27  2.15  1.36  6.37
#  8 BWA     SSA        Sub-s… VA        1967  19.1  2.30  0.938  0.203  0.897  4.31  1.72  1.54  7.04
#  9 BWA     SSA        Sub-s… VA        1968  21.1  1.84  0.750  0.203  1.22   5.17  2.44  1.03  5.03
# 10 BWA     SSA        Sub-s… VA        1969  21.9  5.24  2.14   0.578  3.47   5.75  2.72  1.23  5.59
# # ℹ 5,017 more rows
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

Note further that fselect and get_vars are not full drop-in replacements for select because they do not have a grouped_df method:

GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% tail(3)
# # A tibble: 3 × 13
# # Groups:   Variable, Country [1]
#   Variable Country   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH    SUM
#   <chr>    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
# 1 EMP      EGY     5206.  29.0 2436.  307. 2733. 2977. 1992.  801. 5539.    NA 22020.
# 2 EMP      EGY     5186.  27.6 2374.  318. 2795. 3020. 2048.  815. 5636.    NA 22219.
# 3 EMP      EGY     5161.  24.8 2348.  325. 2931. 3110. 2065.  832. 5736.    NA 22533.
GGDC10S %>% group_by(Variable, Country) %>% get_vars(6:16) %>% tail(3)
# # A tibble: 3 × 11
#     AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH    SUM
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
# 1 5206.  29.0 2436.  307. 2733. 2977. 1992.  801. 5539.    NA 22020.
# 2 5186.  27.6 2374.  318. 2795. 3020. 2048.  815. 5636.    NA 22219.
# 3 5161.  24.8 2348.  325. 2931. 3110. 2065.  832. 5736.    NA 22533.

Since by default keep.group_vars = TRUE in the Fast Statistical Functions, the end result is nevertheless the same:

GGDC10S %>% group_by(Variable, Country) %>% select_at(6:16) %>% fmean %>% tail(3)
# # A tibble: 3 × 13
#   Variable Country      AGR      MIN    MAN     PU    CON    WRT    TRA   FIRE     GOV    OTH    SUM
#   <chr>    <chr>      <dbl>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
# 1 VA       VEN        6860.   35478. 1.96e4 1.06e3 1.17e4 1.93e4 8.03e3 5.60e3 NA      19986. 1.28e5
# 2 VA       ZAF       16419.   42928. 8.76e4 1.38e4 1.64e4 6.83e4 4.53e4 6.64e4  7.58e4 30167. 4.63e5
# 3 VA       ZMB     1268849. 1006099. 9.00e5 2.19e5 8.66e5 2.10e6 7.05e5 9.10e5  1.10e6 81871. 9.16e6
GGDC10S %>% group_by(Variable, Country) %>% get_vars(6:16) %>% fmean %>% tail(3)
# # A tibble: 3 × 13
#   Variable Country      AGR      MIN    MAN     PU    CON    WRT    TRA   FIRE     GOV    OTH    SUM
#   <chr>    <chr>      <dbl>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
# 1 VA       VEN        6860.   35478. 1.96e4 1.06e3 1.17e4 1.93e4 8.03e3 5.60e3 NA      19986. 1.28e5
# 2 VA       ZAF       16419.   42928. 8.76e4 1.38e4 1.64e4 6.83e4 4.53e4 6.64e4  7.58e4 30167. 4.63e5
# 3 VA       ZMB     1268849. 1006099. 9.00e5 2.19e5 8.66e5 2.10e6 7.05e5 9.10e5  1.10e6 81871. 9.16e6

Another useful verb introduced by collapse is fgroup_vars, which can be used to efficiently obtain the grouping columns or grouping variables from a grouped tibble:

# fgroup_by fully supports grouped tibbles created with group_by or fgroup_by: 
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars %>% head(3)
# # A tibble: 3 × 2
#   Variable Country
#   <chr>    <chr>  
# 1 VA       BWA    
# 2 VA       BWA    
# 3 VA       BWA
GGDC10S %>% fgroup_by(Variable, Country) %>% fgroup_vars %>% head(3)
# # A tibble: 3 × 2
#   Variable Country
#   <chr>    <chr>  
# 1 VA       BWA    
# 2 VA       BWA    
# 3 VA       BWA

# The other possibilities:
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("unique") %>% head(3)
# # A tibble: 3 × 2
#   Variable Country
#   <chr>    <chr>  
# 1 EMP      ARG    
# 2 EMP      BOL    
# 3 EMP      BRA
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("names")
# [1] "Variable" "Country"
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("indices")
# [1] 4 1
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("named_indices")
# Variable  Country 
#        4        1
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("logical")
#  [1]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
GGDC10S %>% group_by(Variable, Country) %>% fgroup_vars("named_logical")
#    Country Regioncode     Region   Variable       Year        AGR        MIN        MAN         PU 
#       TRUE      FALSE      FALSE       TRUE      FALSE      FALSE      FALSE      FALSE      FALSE 
#        CON        WRT        TRA       FIRE        GOV        OTH        SUM 
#      FALSE      FALSE      FALSE      FALSE      FALSE      FALSE      FALSE

Another collapse verb to mention here is fsubset, a faster alternative to dplyr::filter which also provides an option to flexibly subset columns after the select argument:

# Two equivalent calls, the first is substantially faster
GGDC10S %>% fsubset(Variable == "VA" & Year > 1990, Country, Year, AGR:GOV) %>% head(3)
# # A tibble: 3 × 11
#   Country  Year   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV
#   <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 BWA      1991  303. 2647.  473.  161.  580.  807.  233.  433. 1073.
# 2 BWA      1992  333. 2691.  537.  178.  679.  725.  285.  517. 1234.
# 3 BWA      1993  405. 2625.  567.  219.  634.  772.  350.  673. 1487.

GGDC10S %>% filter(Variable == "VA" & Year > 1990) %>% select(Country, Year, AGR:GOV) %>% head(3)
# # A tibble: 3 × 11
#   Country  Year   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV
#   <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 BWA      1991  303. 2647.  473.  161.  580.  807.  233.  433. 1073.
# 2 BWA      1992  333. 2691.  537.  178.  679.  725.  285.  517. 1234.
# 3 BWA      1993  405. 2625.  567.  219.  634.  772.  350.  673. 1487.

collapse also offers roworder, frename, colorder and ftransform/TRA as fast replacements for dplyr::arrange, dplyr::rename, dplyr::relocate and dplyr::mutate.

1.3 Multi-Function Aggregations

One can also aggregate with multiple functions at the same time. For such operations it is often necessary to use curly braces { to prevent first argument injection so that %>% cbind(FUN1(.), FUN2(.)) does not evaluate as %>% cbind(., FUN1(.), FUN2(.)):

GGDC10S %>%
  fgroup_by(Variable, Country) %>%
  get_vars(6:16) %>% {
    cbind(fmedian(.),
          add_stub(fmean(., keep.group_vars = FALSE), "mean_"))
    } %>% head(3)
#   Variable Country        AGR       MIN       MAN         PU        CON      WRT        TRA
# 1      EMP     ARG  1324.5255  47.35255 1987.5912 104.738825  782.40283 1854.612  579.93982
# 2      EMP     BOL   943.1612  53.53538  167.1502   4.457895   65.97904  132.225   96.96828
# 3      EMP     BRA 17480.9810 225.43693 7207.7915 375.851832 4054.66103 6454.523 1580.81120
#         FIRE      GOV       OTH       SUM   mean_AGR  mean_MIN  mean_MAN    mean_PU  mean_CON
# 1  464.39920 1738.836  866.1119  9743.223  1419.8013  52.08903 1931.7602 101.720936  742.4044
# 2   15.34259       NA  384.0678  1842.055   964.2103  56.03295  235.0332   5.346433  122.7827
# 3 4354.86210 4449.942 4478.6927 51881.110 17191.3529 206.02389 6991.3710 364.573404 3524.7384
#    mean_WRT  mean_TRA  mean_FIRE mean_GOV  mean_OTH  mean_SUM
# 1 1982.1775  648.5119  627.79291 2043.471  992.4475 10542.177
# 2  281.5164  115.4728   44.56442       NA  395.5650  2220.524
# 3 8509.4612 2054.3731 4413.54448 5307.280 5710.2665 54272.985

The function add_stub used above is a collapse function adding a prefix (default) or suffix to variables names. The collapse predicate add_vars provides a more efficient alternative to cbind.data.frame. The idea here is ‘adding’ variables to the data.frame in the first argument i.e. the attributes of the first argument are preserved, so the expression below still gives a tibble instead of a data.frame:

GGDC10S %>%
  fgroup_by(Variable, Country) %>% {
   add_vars(get_vars(., "Reg", regex = TRUE) %>% ffirst, # Regular expression matching column names
            num_vars(.) %>% fmean(keep.group_vars = FALSE) %>% add_stub("mean_"), # num_vars selects all numeric variables
            fselect(., PU:TRA) %>% fmedian(keep.group_vars = FALSE) %>% add_stub("median_"), 
            fselect(., PU:CON) %>% fmin(keep.group_vars = FALSE) %>% add_stub("min_"))      
  } %>% head(3)
# # A tibble: 3 × 22
#   Variable Country Regioncode Region  mean_Year mean_AGR mean_MIN mean_MAN mean_PU mean_CON mean_WRT
#   <chr>    <chr>   <chr>      <chr>       <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>    <dbl>
# 1 EMP      ARG     LAM        Latin …     1980.    1420.     52.1    1932.  102.       742.    1982.
# 2 EMP      BOL     LAM        Latin …     1980      964.     56.0     235.    5.35     123.     282.
# 3 EMP      BRA     LAM        Latin …     1980.   17191.    206.     6991.  365.      3525.    8509.
# # ℹ 11 more variables: mean_TRA <dbl>, mean_FIRE <dbl>, mean_GOV <dbl>, mean_OTH <dbl>,
# #   mean_SUM <dbl>, median_PU <dbl>, median_CON <dbl>, median_WRT <dbl>, median_TRA <dbl>,
# #   min_PU <dbl>, min_CON <dbl>

Another nice feature of add_vars is that it can also very efficiently reorder columns i.e. bind columns in a different order than they are passed. This can be done by simply specifying the positions the added columns should have in the final data frame, and then add_vars shifts the first argument columns to the right to fill in the gaps.

GGDC10S %>%
  fsubset(Variable == "VA", Country, AGR, SUM) %>% 
  fgroup_by(Country) %>% {
   add_vars(fgroup_vars(.,"unique"),
            fmean(., keep.group_vars = FALSE) %>% add_stub("mean_"),
            fsd(., keep.group_vars = FALSE) %>% add_stub("sd_"), 
            pos = c(2,4,3,5))
  } %>% head(3)
# # A tibble: 3 × 5
#   Country mean_AGR sd_AGR mean_SUM  sd_SUM
#   <chr>      <dbl>  <dbl>    <dbl>   <dbl>
# 1 ARG       14951. 33061.  152534. 301316.
# 2 BOL        3300.  4456.   22619.  33173.
# 3 BRA       76870. 59442. 1200563. 976963.

A much more compact solution to multi-function and multi-type aggregation is offered by the function collapg:

# This aggregates numeric colums using the mean (fmean) and categorical columns with the mode (fmode)
GGDC10S %>% fgroup_by(Variable, Country) %>% collapg %>% head(3)
# # A tibble: 3 × 16
#   Variable Country Regioncode Region   Year    AGR   MIN   MAN     PU   CON   WRT   TRA   FIRE   GOV
#   <chr>    <chr>   <chr>      <chr>   <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
# 1 EMP      ARG     LAM        Latin … 1980.  1420.  52.1 1932. 102.    742. 1982.  649.  628.  2043.
# 2 EMP      BOL     LAM        Latin … 1980    964.  56.0  235.   5.35  123.  282.  115.   44.6   NA 
# 3 EMP      BRA     LAM        Latin … 1980. 17191. 206.  6991. 365.   3525. 8509. 2054. 4414.  5307.
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

By default it aggregates numeric columns using the fmean and categorical columns using fmode, and preserves the order of all columns. Changing these defaults is very easy:

# This aggregates numeric colums using the median and categorical columns using the first value
GGDC10S %>% fgroup_by(Variable, Country) %>% collapg(fmedian, flast) %>% head(3)
# # A tibble: 3 × 16
#   Variable Country Regioncode Region       Year    AGR   MIN   MAN     PU    CON   WRT    TRA   FIRE
#   <chr>    <chr>   <chr>      <chr>       <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>
# 1 EMP      ARG     LAM        Latin Amer… 1980.  1325.  47.4 1988. 105.    782.  1855.  580.   464. 
# 2 EMP      BOL     LAM        Latin Amer… 1980    943.  53.5  167.   4.46   66.0  132.   97.0   15.3
# 3 EMP      BRA     LAM        Latin Amer… 1980. 17481. 225.  7208. 376.   4055.  6455. 1581.  4355. 
# # ℹ 3 more variables: GOV <dbl>, OTH <dbl>, SUM <dbl>

One can apply multiple functions to both numeric and/or categorical data:

GGDC10S %>% fgroup_by(Variable, Country) %>%
  collapg(list(fmean, fmedian), list(first, fmode, flast)) %>% head(3)
# # A tibble: 3 × 32
#   Variable Country first.Regioncode fmode.Regioncode flast.Regioncode first.Region  fmode.Region 
#   <chr>    <chr>   <chr>            <chr>            <chr>            <chr>         <chr>        
# 1 EMP      ARG     LAM              LAM              LAM              Latin America Latin America
# 2 EMP      BOL     LAM              LAM              LAM              Latin America Latin America
# 3 EMP      BRA     LAM              LAM              LAM              Latin America Latin America
# # ℹ 25 more variables: flast.Region <chr>, fmean.Year <dbl>, fmedian.Year <dbl>, fmean.AGR <dbl>,
# #   fmedian.AGR <dbl>, fmean.MIN <dbl>, fmedian.MIN <dbl>, fmean.MAN <dbl>, fmedian.MAN <dbl>,
# #   fmean.PU <dbl>, fmedian.PU <dbl>, fmean.CON <dbl>, fmedian.CON <dbl>, fmean.WRT <dbl>,
# #   fmedian.WRT <dbl>, fmean.TRA <dbl>, fmedian.TRA <dbl>, fmean.FIRE <dbl>, fmedian.FIRE <dbl>,
# #   fmean.GOV <dbl>, fmedian.GOV <dbl>, fmean.OTH <dbl>, fmedian.OTH <dbl>, fmean.SUM <dbl>,
# #   fmedian.SUM <dbl>

Applying multiple functions to only numeric (or only categorical) data allows return in a long format:

GGDC10S %>% fgroup_by(Variable, Country) %>%
  collapg(list(fmean, fmedian), cols = is.numeric, return = "long") %>% head(3)
# # A tibble: 3 × 15
#   Function Variable Country  Year    AGR   MIN   MAN     PU   CON   WRT   TRA   FIRE   GOV   OTH
#   <chr>    <chr>    <chr>   <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
# 1 fmean    EMP      ARG     1980.  1420.  52.1 1932. 102.    742. 1982.  649.  628.  2043.  992.
# 2 fmean    EMP      BOL     1980    964.  56.0  235.   5.35  123.  282.  115.   44.6   NA   396.
# 3 fmean    EMP      BRA     1980. 17191. 206.  6991. 365.   3525. 8509. 2054. 4414.  5307. 5710.
# # ℹ 1 more variable: SUM <dbl>

Finally, collapg also makes it very easy to apply aggregator functions to certain columns only:

GGDC10S %>% fgroup_by(Variable, Country) %>%
  collapg(custom = list(fmean = 6:8, fmedian = 10:12)) %>% head(3)
# # A tibble: 3 × 8
#   Variable Country    AGR   MIN   MAN    CON   WRT    TRA
#   <chr>    <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
# 1 EMP      ARG      1420.  52.1 1932.  782.  1855.  580. 
# 2 EMP      BOL       964.  56.0  235.   66.0  132.   97.0
# 3 EMP      BRA     17191. 206.  6991. 4055.  6455. 1581.

To understand more about collapg, look it up in the documentation (?collapg).

1.4 Weighted Aggregations

Weighted aggregations are possible with the functions fsum, fprod, fmean, fmedian, fnth, fmode, fvar and fsd. The implementation is such that by default (option keep.w = TRUE) these functions also aggregate the weights, so that further weighted computations can be performed on the aggregated data. fprod saves the product of the weights, whereas the other functions save the sum of the weights in a column next to the grouping variables. If na.rm = TRUE (the default), rows with missing weights are omitted from the computation.

# This computes a frequency-weighted grouped standard-deviation, taking the total EMP / VA as weight
GGDC10S %>%
  fgroup_by(Variable, Country) %>%
  fselect(AGR:SUM) %>% fsd(SUM) %>% head(3)
# # A tibble: 3 × 13
#   Variable Country  sum.SUM    AGR   MIN   MAN    PU   CON   WRT    TRA   FIRE   GOV   OTH
#   <chr>    <chr>      <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
# 1 EMP      ARG      653615.  225.   22.2  176. 20.5   285.  856.  195.   493.  1123.  506.
# 2 EMP      BOL      135452.   99.7  17.1  168.  4.87  123.  324.   98.1   69.8   NA   258.
# 3 EMP      BRA     3364925. 1587.   73.8 2952. 93.8  1861. 6285. 1306.  3003.  3621. 4257.

# This computes a weighted grouped mode, taking the total EMP / VA as weight
GGDC10S %>%
  fgroup_by(Variable, Country) %>%
  fselect(AGR:SUM) %>% fmode(SUM) %>% head(3)
# # A tibble: 3 × 13
#   Variable Country  sum.SUM    AGR   MIN    MAN    PU   CON    WRT   TRA   FIRE    GOV    OTH
#   <chr>    <chr>      <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
# 1 EMP      ARG      653615.  1162. 127.   2164. 152.  1415.  3768. 1060.  1748.  4336.  1999.
# 2 EMP      BOL      135452.   819.  37.6   604.  10.8  433.   893.  333.   321.    NA   1057.
# 3 EMP      BRA     3364925. 16451. 313.  11841. 388.  8154. 21860. 5169. 12011. 12149. 14235.

The weighted variance / standard deviation is currently only implemented with frequency weights.

Weighted aggregations may also be performed with collapg. By default fsum is used to compute a sum of the weights, but it is also possible here to aggregate the weights with other functions:

# This aggregates numeric colums using the weighted mean (the default) and categorical columns using the weighted mode (the default).
# Weights (column SUM) are aggregated using both the sum and the maximum. 
GGDC10S %>% group_by(Variable, Country) %>% 
  collapg(w = SUM, wFUN = list(fsum, fmax)) %>% head(3)
# # A tibble: 3 × 17
#   Variable Country fsum.SUM fmax.SUM Regioncode Region   Year    AGR   MIN   MAN     PU   CON    WRT
#   <chr>    <chr>      <dbl>    <dbl> <chr>      <chr>   <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
# 1 EMP      ARG      653615.   17929. LAM        Latin … 1985.  1361.  56.5 1935. 105.    811.  2217.
# 2 EMP      BOL      135452.    4508. LAM        Latin … 1987.   977.  57.9  296.   7.07  167.   400.
# 3 EMP      BRA     3364925.  102572. LAM        Latin … 1989. 17746. 238.  8466. 389.   4436. 11376.
# # ℹ 4 more variables: TRA <dbl>, FIRE <dbl>, GOV <dbl>, OTH <dbl>

2. Fast Transformations

collapse also provides some fast transformations that significantly extend the scope and speed of manipulations that can be performed with dplyr::mutate.

2.1 Fast Transform and Compute Variables

The function ftransform can be used to manipulate columns in the same ways as mutate:

GGDC10S %>% fsubset(Variable == "VA", Country, Year, AGR, SUM) %>%
  ftransform(AGR_perc = AGR / SUM * 100,  # Computing % of VA in Agriculture
             AGR_mean = fmean(AGR),       # Average Agricultural VA
             AGR = NULL, SUM = NULL) %>%  # Deleting columns AGR and SUM
             head
# # A tibble: 6 × 4
#   Country  Year AGR_perc AGR_mean
#   <chr>   <dbl>    <dbl>    <dbl>
# 1 BWA      1960     NA   5137561.
# 2 BWA      1961     NA   5137561.
# 3 BWA      1962     NA   5137561.
# 4 BWA      1963     NA   5137561.
# 5 BWA      1964     43.5 5137561.
# 6 BWA      1965     40.0 5137561.

The modification brought by ftransformv enables transformations of groups of columns like dplyr::mutate_at and dplyr::mutate_if:

# This replaces variables mpg, carb and wt by their log (.c turns expressions into character vectors)
mtcars %>% ftransformv(.c(mpg, carb, wt), log) %>% head
#                        mpg cyl disp  hp drat        wt  qsec vs am gear      carb
# Mazda RX4         3.044522   6  160 110 3.90 0.9631743 16.46  0  1    4 1.3862944
# Mazda RX4 Wag     3.044522   6  160 110 3.90 1.0560527 17.02  0  1    4 1.3862944
# Datsun 710        3.126761   4  108  93 3.85 0.8415672 18.61  1  1    4 0.0000000
# Hornet 4 Drive    3.063391   6  258 110 3.08 1.1678274 19.44  1  0    3 0.0000000
# Hornet Sportabout 2.928524   8  360 175 3.15 1.2354715 17.02  0  0    3 0.6931472
# Valiant           2.895912   6  225 105 2.76 1.2412686 20.22  1  0    3 0.0000000

# Logging numeric variables
iris %>% ftransformv(is.numeric, log) %>% head
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1     1.629241    1.252763    0.3364722  -1.6094379  setosa
# 2     1.589235    1.098612    0.3364722  -1.6094379  setosa
# 3     1.547563    1.163151    0.2623643  -1.6094379  setosa
# 4     1.526056    1.131402    0.4054651  -1.6094379  setosa
# 5     1.609438    1.280934    0.3364722  -1.6094379  setosa
# 6     1.686399    1.360977    0.5306283  -0.9162907  setosa

Instead of column = value type arguments, it is also possible to pass a single list of transformed variables to ftransform, which will be regarded in the same way as an evaluated list of column = value arguments. It can be used for more complex transformations:

# Logging values and replacing generated Inf values
mtcars %>% ftransform(fselect(., mpg, cyl, vs:gear) %>% lapply(log) %>% replace_Inf) %>% head
#                        mpg      cyl disp  hp drat    wt  qsec vs am     gear carb
# Mazda RX4         3.044522 1.791759  160 110 3.90 2.620 16.46 NA  0 1.386294    4
# Mazda RX4 Wag     3.044522 1.791759  160 110 3.90 2.875 17.02 NA  0 1.386294    4
# Datsun 710        3.126761 1.386294  108  93 3.85 2.320 18.61  0  0 1.386294    1
# Hornet 4 Drive    3.063391 1.791759  258 110 3.08 3.215 19.44  0 NA 1.098612    1
# Hornet Sportabout 2.928524 2.079442  360 175 3.15 3.440 17.02 NA NA 1.098612    2
# Valiant           2.895912 1.791759  225 105 2.76 3.460 20.22  0 NA 1.098612    1

If only the computed columns need to be returned, fcompute provides an efficient alternative:

GGDC10S %>% fsubset(Variable == "VA", Country, Year, AGR, SUM) %>%
  fcompute(AGR_perc = AGR / SUM * 100,
           AGR_mean = fmean(AGR)) %>% head
# # A tibble: 6 × 2
#   AGR_perc AGR_mean
#      <dbl>    <dbl>
# 1     NA   5137561.
# 2     NA   5137561.
# 3     NA   5137561.
# 4     NA   5137561.
# 5     43.5 5137561.
# 6     40.0 5137561.

ftransform and fcompute are an order of magnitude faster than mutate, but they do not support grouped computations using arbitrary functions. We will see that this is hardly a limitation as collapse provides very efficient and elegant alternative programming mechanisms…

2.2 Replacing and Sweeping out Statistics

All statistical (scalar-valued) functions in the collapse package (fsum, fprod, fmean, fmedian, fmode, fvar, fsd, fmin, fmax, fnth, ffirst, flast, fnobs, fndistinct) have a TRA argument which can be used to efficiently transform data by either (column-wise) replacing data values with computed statistics or sweeping the statistics out of the data. Operations can be specified using either an integer or quoted operator / string. The 10 operations supported by TRA are:

  • 1 - “replace_fill†: replace and overwrite missing values (same as mutate)

  • 2 - “replace†: replace but preserve missing values

  • 3 - “-†: subtract (center)

  • 4 - “-+†: subtract group-statistics but add average of group statistics

  • 5 - “/†: divide (scale)

  • 6 - “%†: compute percentages (divide and multiply by 100)

  • 7 - “+†: add

  • 8 - “*†: multiply

  • 9 - “%%†: modulus

  • 10 - “-%%†: subtract modulus

Simple transformations are again straightforward to specify:

# This subtracts the median value from all data points i.e. centers on the median
GGDC10S %>% num_vars %>% fmedian(TRA = "-") %>% head
# # A tibble: 6 × 12
#    Year    AGR   MIN    MAN    PU    CON    WRT    TRA  FIRE    GOV    OTH     SUM
#   <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>   <dbl>
# 1   -22    NA    NA     NA    NA     NA     NA     NA    NA     NA     NA      NA 
# 2   -21    NA    NA     NA    NA     NA     NA     NA    NA     NA     NA      NA 
# 3   -20    NA    NA     NA    NA     NA     NA     NA    NA     NA     NA      NA 
# 4   -19    NA    NA     NA    NA     NA     NA     NA    NA     NA     NA      NA 
# 5   -18 -4378. -170. -3717. -168. -1473. -3767. -1173. -959. -3924. -1431. -23149.
# 6   -17 -4379. -171. -3717. -168. -1472. -3767. -1173. -959. -3923. -1430. -23147.

# This replaces all data points with the mode
GGDC10S %>% char_vars %>% fmode(TRA = "replace") %>% head
# # A tibble: 6 × 4
#   Country Regioncode Region Variable
#   <chr>   <chr>      <chr>  <chr>   
# 1 USA     ASI        Asia   EMP     
# 2 USA     ASI        Asia   EMP     
# 3 USA     ASI        Asia   EMP     
# 4 USA     ASI        Asia   EMP     
# 5 USA     ASI        Asia   EMP     
# 6 USA     ASI        Asia   EMP

Similarly for grouped transformations:

# Replacing data with the 2nd quartile (25%)
GGDC10S %>%
  fselect(Variable, Country, AGR:SUM) %>% 
   fgroup_by(Variable, Country) %>% fnth(0.25, TRA = "replace_fill") %>% head(3)
# # A tibble: 3 × 13
#   Variable Country   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH   SUM
#   <chr>    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 VA       BWA      63.5  33.1  27.3  7.36  26.8  31.1  13.2  12.0  33.6  11.5  262.
# 2 VA       BWA      63.5  33.1  27.3  7.36  26.8  31.1  13.2  12.0  33.6  11.5  262.
# 3 VA       BWA      63.5  33.1  27.3  7.36  26.8  31.1  13.2  12.0  33.6  11.5  262.

# Scaling sectoral data by Variable and Country
GGDC10S %>%
  fselect(Variable, Country, AGR:SUM) %>% 
   fgroup_by(Variable, Country) %>% fsd(TRA = "/") %>% head
# # A tibble: 6 × 13
#   Variable Country     AGR       MIN       MAN       PU      CON      WRT      TRA     FIRE      GOV
#   <chr>    <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
# 1 VA       BWA     NA      NA        NA        NA       NA       NA       NA       NA       NA      
# 2 VA       BWA     NA      NA        NA        NA       NA       NA       NA       NA       NA      
# 3 VA       BWA     NA      NA        NA        NA       NA       NA       NA       NA       NA      
# 4 VA       BWA     NA      NA        NA        NA       NA       NA       NA       NA       NA      
# 5 VA       BWA      0.0270  0.000556  0.000523  3.88e-4  5.11e-4  0.00194  0.00154  5.23e-4  0.00134
# 6 VA       BWA      0.0260  0.000397  0.000723  5.03e-4  1.04e-3  0.00220  0.00180  5.83e-4  0.00158
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

The benchmarks below will demonstrate that these internal sweeping and replacement operations fully performed in C++ compute significantly faster than using dplyr::mutate, especially as the number of groups grows large. The S3 generic nature of the Fast Statistical Functions further allows us to perform grouped mutations on the fly (together with ftransform or fcompute), without the need of first creating a grouped tibble:

# AGR_gmed = TRUE if AGR is greater than it's median value, grouped by Variable and Country
# Note: This calls fmedian.default
settransform(GGDC10S, AGR_gmed = AGR > fmedian(AGR, list(Variable, Country), TRA = "replace"))
tail(GGDC10S, 3)
# # A tibble: 3 × 17
#   Country Regioncode Region     Variable  Year   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV
#   <chr>   <chr>      <chr>      <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 EGY     MENA       Middle Ea… EMP       2010 5206.  29.0 2436.  307. 2733. 2977. 1992.  801. 5539.
# 2 EGY     MENA       Middle Ea… EMP       2011 5186.  27.6 2374.  318. 2795. 3020. 2048.  815. 5636.
# 3 EGY     MENA       Middle Ea… EMP       2012 5161.  24.8 2348.  325. 2931. 3110. 2065.  832. 5736.
# # ℹ 3 more variables: OTH <dbl>, SUM <dbl>, AGR_gmed <lgl>

# Dividing (scaling) the sectoral data (columns 6 through 16) by their grouped standard deviation
settransformv(GGDC10S, 6:16, fsd, list(Variable, Country), TRA = "/", apply = FALSE)
tail(GGDC10S, 3)
# # A tibble: 3 × 17
#   Country Regioncode Region     Variable  Year   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV
#   <chr>   <chr>      <chr>      <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 EGY     MENA       Middle Ea… EMP       2010  8.41  2.28  4.32  3.56  3.62  3.75  3.75  3.14  3.80
# 2 EGY     MENA       Middle Ea… EMP       2011  8.38  2.17  4.21  3.68  3.70  3.81  3.86  3.19  3.86
# 3 EGY     MENA       Middle Ea… EMP       2012  8.34  1.95  4.17  3.76  3.88  3.92  3.89  3.26  3.93
# # ℹ 3 more variables: OTH <dbl>, SUM <dbl>, AGR_gmed <lgl>
rm(GGDC10S)

Weights are easily added to any grouped transformation:

# This subtracts weighted group means from the data, using SUM column as weights.. 
GGDC10S %>%
  fselect(Variable, Country, AGR:SUM) %>% 
   fgroup_by(Variable, Country) %>% fmean(SUM, "-") %>% head
# # A tibble: 6 × 13
#   Variable Country   SUM    AGR     MIN    MAN    PU    CON    WRT    TRA   FIRE    GOV    OTH
#   <chr>    <chr>   <dbl>  <dbl>   <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1 VA       BWA      NA      NA      NA     NA    NA     NA     NA     NA     NA     NA     NA 
# 2 VA       BWA      NA      NA      NA     NA    NA     NA     NA     NA     NA     NA     NA 
# 3 VA       BWA      NA      NA      NA     NA    NA     NA     NA     NA     NA     NA     NA 
# 4 VA       BWA      NA      NA      NA     NA    NA     NA     NA     NA     NA     NA     NA 
# 5 VA       BWA      37.5 -1301. -13317. -2965. -529. -2746. -6540. -2157. -4431. -7551. -2613.
# 6 VA       BWA      39.3 -1302. -13318. -2964. -529. -2745. -6540. -2156. -4431. -7550. -2613.

Sequential operations are also easily performed:

# This scales and then subtracts the median
GGDC10S %>%
  fselect(Variable, Country, AGR:SUM) %>% 
   fgroup_by(Variable, Country) %>% fsd(TRA = "/") %>% fmedian(TRA = "-")
# # A tibble: 5,027 × 13
#    Variable Country    AGR    MIN    MAN     PU    CON     WRT     TRA    FIRE    GOV     OTH    SUM
#  * <chr>    <chr>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>  <dbl>
#  1 VA       BWA     NA     NA     NA     NA     NA     NA      NA      NA      NA     NA      NA    
#  2 VA       BWA     NA     NA     NA     NA     NA     NA      NA      NA      NA     NA      NA    
#  3 VA       BWA     NA     NA     NA     NA     NA     NA      NA      NA      NA     NA      NA    
#  4 VA       BWA     NA     NA     NA     NA     NA     NA      NA      NA      NA     NA      NA    
#  5 VA       BWA     -0.182 -0.235 -0.183 -0.245 -0.118 -0.0820 -0.0724 -0.0661 -0.108 -0.0848 -0.146
#  6 VA       BWA     -0.183 -0.235 -0.183 -0.245 -0.117 -0.0817 -0.0722 -0.0660 -0.108 -0.0846 -0.146
#  7 VA       BWA     -0.180 -0.235 -0.183 -0.245 -0.117 -0.0813 -0.0720 -0.0659 -0.107 -0.0843 -0.145
#  8 VA       BWA     -0.177 -0.235 -0.183 -0.245 -0.117 -0.0826 -0.0724 -0.0659 -0.107 -0.0841 -0.146
#  9 VA       BWA     -0.174 -0.235 -0.183 -0.245 -0.117 -0.0823 -0.0717 -0.0661 -0.108 -0.0848 -0.146
# 10 VA       BWA     -0.173 -0.234 -0.182 -0.243 -0.115 -0.0821 -0.0715 -0.0660 -0.108 -0.0846 -0.145
# # ℹ 5,017 more rows
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

Of course it is also possible to combine multiple functions as in the aggregation section, or to add variables to existing data:

# This adds a groupwise observation count next to each column
add_vars(GGDC10S, seq(7,27,2)) <- GGDC10S %>%
    fgroup_by(Variable, Country) %>% fselect(AGR:SUM) %>%
    fnobs("replace_fill") %>% add_stub("N_")

head(GGDC10S)
# # A tibble: 6 × 27
#   Country Regioncode Region  Variable  Year   AGR N_AGR   MIN N_MIN    MAN N_MAN     PU  N_PU    CON
#   <chr>   <chr>      <chr>   <chr>    <dbl> <dbl> <int> <dbl> <int>  <dbl> <int>  <dbl> <int>  <dbl>
# 1 BWA     SSA        Sub-sa… VA        1960  NA      47 NA       47 NA        47 NA        47 NA    
# 2 BWA     SSA        Sub-sa… VA        1961  NA      47 NA       47 NA        47 NA        47 NA    
# 3 BWA     SSA        Sub-sa… VA        1962  NA      47 NA       47 NA        47 NA        47 NA    
# 4 BWA     SSA        Sub-sa… VA        1963  NA      47 NA       47 NA        47 NA        47 NA    
# 5 BWA     SSA        Sub-sa… VA        1964  16.3    47  3.49    47  0.737    47  0.104    47  0.660
# 6 BWA     SSA        Sub-sa… VA        1965  15.7    47  2.50    47  1.02     47  0.135    47  1.35 
# # ℹ 13 more variables: N_CON <int>, WRT <dbl>, N_WRT <int>, TRA <dbl>, N_TRA <int>, FIRE <dbl>,
# #   N_FIRE <int>, GOV <dbl>, N_GOV <int>, OTH <dbl>, N_OTH <int>, SUM <dbl>, N_SUM <int>
rm(GGDC10S)

There are lots of other examples one could construct using the 10 operations and 14 functions listed above, the examples provided just outline the suggested programming basics. Performance considerations make it very much worthwhile to spend some time and think how complex operations can be implemented in this programming framework, before defining some function in R and applying it to data using dplyr::mutate.

2.3 More Control using the TRA Function

Towards this end, calling TRA() directly also facilitates more complex and customized operations. Behind the scenes of the TRA = ... argument, the Fast Statistical Functions first compute the grouped statistics on all columns of the data, and these statistics are then directly fed into a C++ function that uses them to replace or sweep them out of data points in one of the 10 ways described above. This function can also be called directly by the name of TRA.

Fundamentally, TRA is a generalization of base::sweep for column-wise grouped operations1. Direct calls to TRA enable more control over inputs and outputs.

The two operations below are equivalent, although the first is slightly more efficient as it only requires one method dispatch and one check of the inputs:

# This divides by the product
GGDC10S %>%
  fgroup_by(Variable, Country) %>%
    get_vars(6:16) %>% fprod(TRA = "/") %>% head
# # A tibble: 6 × 11
#          AGR        MIN        MAN        PU        CON        WRT       TRA      FIRE        GOV
#        <dbl>      <dbl>      <dbl>     <dbl>      <dbl>      <dbl>     <dbl>     <dbl>      <dbl>
# 1 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 2 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 3 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 4 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 5  1.29e-105  2.81e-127  1.40e-101  4.44e-74  4.19e-102  3.97e-113  6.91e-92  1.01e-97  2.51e-117
# 6  1.24e-105  2.00e-127  1.94e-101  5.75e-74  8.55e-102  4.49e-113  8.08e-92  1.13e-97  2.96e-117
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

# Same thing
GGDC10S %>%
  fgroup_by(Variable, Country) %>%
    get_vars(6:16) %>% 
     TRA(fprod(., keep.group_vars = FALSE), "/") %>% head # [same as TRA(.,fprod(., keep.group_vars = FALSE),"/")]
# # A tibble: 6 × 11
#          AGR        MIN        MAN        PU        CON        WRT       TRA      FIRE        GOV
#        <dbl>      <dbl>      <dbl>     <dbl>      <dbl>      <dbl>     <dbl>     <dbl>      <dbl>
# 1 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 2 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 3 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 4 NA         NA         NA         NA        NA         NA         NA        NA        NA        
# 5  1.29e-105  2.81e-127  1.40e-101  4.44e-74  4.19e-102  3.97e-113  6.91e-92  1.01e-97  2.51e-117
# 6  1.24e-105  2.00e-127  1.94e-101  5.75e-74  8.55e-102  4.49e-113  8.08e-92  1.13e-97  2.96e-117
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

TRA.grouped_df was designed such that it matches the columns of the statistics (aggregated columns) to those of the original data, and only transforms matching columns while returning the whole data frame. Thus it is easily possible to only apply a transformation to the first two sectors:

# This only demeans Agriculture (AGR) and Mining (MIN)
GGDC10S %>%
  fgroup_by(Variable, Country) %>%
    TRA(fselect(., AGR, MIN) %>% fmean(keep.group_vars = FALSE), "-") %>% head
# # A tibble: 6 × 16
#   Country Regioncode Region Variable  Year   AGR    MIN    MAN     PU    CON   WRT   TRA  FIRE   GOV
#   <chr>   <chr>      <chr>  <chr>    <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 BWA     SSA        Sub-s… VA        1960   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 2 BWA     SSA        Sub-s… VA        1961   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 3 BWA     SSA        Sub-s… VA        1962   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 4 BWA     SSA        Sub-s… VA        1963   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 5 BWA     SSA        Sub-s… VA        1964 -446. -4505.  0.737  0.104  0.660  6.24  1.66  1.12  4.82
# 6 BWA     SSA        Sub-s… VA        1965 -446. -4506.  1.02   0.135  1.35   7.06  1.94  1.25  5.70
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

Since TRA is already built into all Fast Statistical Functions as an argument, it is best used in computations where grouped statistics are computed using some other function.

# Same as above, with one line of code using fmean.data.frame and ftransform...
GGDC10S %>% ftransform(fmean(list(AGR = AGR, MIN = MIN), list(Variable, Country), TRA = "-")) %>% head
# # A tibble: 6 × 16
#   Country Regioncode Region Variable  Year   AGR    MIN    MAN     PU    CON   WRT   TRA  FIRE   GOV
#   <chr>   <chr>      <chr>  <chr>    <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 BWA     SSA        Sub-s… VA        1960   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 2 BWA     SSA        Sub-s… VA        1961   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 3 BWA     SSA        Sub-s… VA        1962   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 4 BWA     SSA        Sub-s… VA        1963   NA     NA  NA     NA     NA     NA    NA    NA    NA   
# 5 BWA     SSA        Sub-s… VA        1964 -446. -4505.  0.737  0.104  0.660  6.24  1.66  1.12  4.82
# 6 BWA     SSA        Sub-s… VA        1965 -446. -4506.  1.02   0.135  1.35   7.06  1.94  1.25  5.70
# # ℹ 2 more variables: OTH <dbl>, SUM <dbl>

Another potential use of TRA is to do computations in two- or more steps, for example if both aggregated and transformed data are needed, or if computations are more complex and involve other manipulations in-between the aggregating and sweeping part:

# Get grouped tibble
gGGDC <- GGDC10S %>% fgroup_by(Variable, Country)

# Get aggregated data
gsumGGDC <- gGGDC %>% fselect(AGR:SUM) %>% fsum
head(gsumGGDC)
# # A tibble: 6 × 13
#   Variable Country       AGR     MIN    MAN     PU    CON    WRT    TRA   FIRE     GOV    OTH    SUM
#   <chr>    <chr>       <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
# 1 EMP      ARG        88028.   3230. 1.20e5  6307. 4.60e4 1.23e5 4.02e4 3.89e4  1.27e5 6.15e4 6.54e5
# 2 EMP      BOL        58817.   3418. 1.43e4   326. 7.49e3 1.72e4 7.04e3 2.72e3 NA      2.41e4 1.35e5
# 3 EMP      BRA      1065864.  12773. 4.33e5 22604. 2.19e5 5.28e5 1.27e5 2.74e5  3.29e5 3.54e5 3.36e6
# 4 EMP      BWA         8839.    493. 8.49e2   145. 1.19e3 1.71e3 3.93e2 7.21e2  2.87e3 1.30e3 1.85e4
# 5 EMP      CHL        44220.   6389. 3.94e4  1850. 1.86e4 4.38e4 1.63e4 1.72e4 NA      6.32e4 2.51e5
# 6 EMP      CHN     17264654. 422972. 4.03e6 96364. 1.25e6 1.73e6 8.36e5 2.96e5  1.36e6 1.86e6 2.91e7

# Get transformed (scaled) data
head(TRA(gGGDC, gsumGGDC, "/"))
# # A tibble: 6 × 16
#   Country Regioncode Region     Variable  Year      AGR      MIN      MAN       PU      CON      WRT
#   <chr>   <chr>      <chr>      <chr>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
# 1 BWA     SSA        Sub-sahar… VA        1960 NA       NA       NA       NA       NA       NA      
# 2 BWA     SSA        Sub-sahar… VA        1961 NA       NA       NA       NA       NA       NA      
# 3 BWA     SSA        Sub-sahar… VA        1962 NA       NA       NA       NA       NA       NA      
# 4 BWA     SSA        Sub-sahar… VA        1963 NA       NA       NA       NA       NA       NA      
# 5 BWA     SSA        Sub-sahar… VA        1964  7.50e-4  1.65e-5  1.66e-5  1.03e-5  1.57e-5  6.82e-5
# 6 BWA     SSA        Sub-sahar… VA        1965  7.24e-4  1.18e-5  2.30e-5  1.33e-5  3.20e-5  7.72e-5
# # ℹ 5 more variables: TRA <dbl>, FIRE <dbl>, GOV <dbl>, OTH <dbl>, SUM <dbl>

As discussed, whether using the argument to fast statistical functions or TRA directly, these data transformations are essentially a two-step process: Statistics are first computed and then used to transform the original data.

Although both steps are efficiently done in C++, it would be even more efficient to do them in a single step without materializing all the statistics before transforming the data. Such slightly more efficient functions are provided for the very commonly applied tasks of centering and averaging data by groups (widely known as ‘between’-group and ‘within’-group transformations), and scaling and centering data by groups (also known as ‘standardizing’ data).

2.4 Faster Centering, Averaging and Standardizing

The functions fbetween and fwithin are slightly more memory efficient implementations of fmean invoked with different TRA options:

GGDC10S %>% # Same as ... %>% fmean(TRA = "replace")
  fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fbetween %>% tail(2)
# # A tibble: 2 × 11
#     AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH    SUM
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
# 1 4444.  34.9 1614.  131.  997. 1307.  799.  320. 2958.    NA 12605.
# 2 4444.  34.9 1614.  131.  997. 1307.  799.  320. 2958.    NA 12605.

GGDC10S %>% # Same as ... %>% fmean(TRA = "replace_fill")
  fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fbetween(fill = TRUE) %>% tail(2)
# # A tibble: 2 × 11
#     AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH    SUM
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
# 1 4444.  34.9 1614.  131.  997. 1307.  799.  320. 2958.    NA 12605.
# 2 4444.  34.9 1614.  131.  997. 1307.  799.  320. 2958.    NA 12605.

GGDC10S %>% # Same as ... %>% fmean(TRA = "-")
  fgroup_by(Variable, Country) %>% get_vars(6:16) %>% fwithin %>% tail(2)
# # A tibble: 2 × 11
#     AGR    MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH   SUM
#   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1  742.  -7.35  760.  187. 1798. 1713. 1249.  495. 2678.    NA 9614.
# 2  717. -10.1   734.  194. 1934. 1803. 1266.  512. 2778.    NA 9928.

Apart from higher speed, fwithin has a mean argument to assign an arbitrary mean to centered data, the default being mean = 0. A very common choice for such an added mean is just the overall mean of the data, which can be added in by invoking mean = "overall.mean":

GGDC10S %>% 
  fgroup_by(Variable, Country) %>% 
    fselect(Country, Variable, AGR:SUM) %>% fwithin(mean = "overall.mean") %>% tail(3)
# # A tibble: 3 × 13
#   Country Variable      AGR      MIN      MAN     PU    CON    WRT    TRA   FIRE    GOV   OTH    SUM
#   <chr>   <chr>       <dbl>    <dbl>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
# 1 EGY     EMP      2527458. 1867903. 5539313. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.71e6    NA 2.16e7
# 2 EGY     EMP      2527439. 1867902. 5539251. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.71e6    NA 2.16e7
# 3 EGY     EMP      2527413. 1867899. 5539226. 3.36e5 1.80e6 3.39e6 1.47e6 1.66e6 1.72e6    NA 2.16e7

This can also be done using weights. The code below uses the SUM column as weights, and then for each variable and each group subtracts out the weighted mean, and then adds the overall weighted column mean back to the centered columns. The SUM column is just kept as it is and added after the grouping columns.

GGDC10S %>% 
  fgroup_by(Variable, Country) %>% 
    fselect(Country, Variable, AGR:SUM) %>% fwithin(SUM, mean = "overall.mean") %>% tail(3)
# # A tibble: 3 × 13
#   Country Variable    SUM        AGR      MIN    MAN     PU    CON    WRT    TRA   FIRE    GOV   OTH
#   <chr>   <chr>     <dbl>      <dbl>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
# 1 EGY     EMP      22020. 429066006.   3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8    NA
# 2 EGY     EMP      22219. 429065986.   3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8    NA
# 3 EGY     EMP      22533. 429065961.   3.70e8 7.38e8 2.73e7 2.83e8 4.33e8 1.97e8 1.55e8 2.10e8    NA

Another argument to fwithin is the theta parameter, allowing partial- or quasi-demeaning operations, e.g. fwithin(gdata, theta = theta) is equal to gdata - theta * fbetween(gdata). This is particularly useful to prepare data for variance components (also known as ‘random-effects’) estimation.

Apart from fbetween and fwithin, the function fscale exists to efficiently scale and center data, to avoid sequential calls such as ... %>% fsd(TRA = "/") %>% fmean(TRA = "-").

# This efficiently scales and centers (i.e. standardizes) the data
GGDC10S %>%
  fgroup_by(Variable, Country) %>%
    fselect(Country, Variable, AGR:SUM) %>% fscale
# # A tibble: 5,027 × 13
#    Country Variable    AGR    MIN    MAN     PU    CON    WRT    TRA   FIRE    GOV    OTH    SUM
#  * <chr>   <chr>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#  1 BWA     VA       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA    
#  2 BWA     VA       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA    
#  3 BWA     VA       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA    
#  4 BWA     VA       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA    
#  5 BWA     VA       -0.738 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.596 -0.676
#  6 BWA     VA       -0.739 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.596 -0.676
#  7 BWA     VA       -0.736 -0.717 -0.668 -0.805 -0.692 -0.603 -0.589 -0.635 -0.656 -0.595 -0.676
#  8 BWA     VA       -0.734 -0.717 -0.668 -0.805 -0.692 -0.604 -0.589 -0.635 -0.655 -0.595 -0.676
#  9 BWA     VA       -0.730 -0.717 -0.668 -0.805 -0.692 -0.604 -0.588 -0.635 -0.656 -0.596 -0.676
# 10 BWA     VA       -0.729 -0.716 -0.667 -0.803 -0.690 -0.603 -0.588 -0.635 -0.656 -0.596 -0.675
# # ℹ 5,017 more rows
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

fscale also has additional mean and sd arguments allowing the user to (group-) scale data to an arbitrary mean and standard deviation. Setting mean = FALSE just scales the data but preserves the means, and is thus different from fsd(..., TRA = "/") which simply divides all values by the standard deviation:

# Saving grouped tibble
gGGDC <- GGDC10S %>%
  fgroup_by(Variable, Country) %>%
    fselect(Country, Variable, AGR:SUM)

# Original means
head(fmean(gGGDC)) 
# # A tibble: 6 × 13
#   Variable Country     AGR    MIN     MAN      PU     CON    WRT    TRA   FIRE     GOV    OTH    SUM
#   <chr>    <chr>     <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
# 1 EMP      ARG       1420.   52.1  1932.   102.     742.  1.98e3 6.49e2  628.   2043.  9.92e2 1.05e4
# 2 EMP      BOL        964.   56.0   235.     5.35   123.  2.82e2 1.15e2   44.6    NA   3.96e2 2.22e3
# 3 EMP      BRA      17191.  206.   6991.   365.    3525.  8.51e3 2.05e3 4414.   5307.  5.71e3 5.43e4
# 4 EMP      BWA        188.   10.5    18.1    3.09    25.3 3.63e1 8.36e0   15.3    61.1 2.76e1 3.94e2
# 5 EMP      CHL        702.  101.    625.    29.4    296.  6.95e2 2.58e2  272.     NA   1.00e3 3.98e3
# 6 EMP      CHN     287744. 7050.  67144.  1606.   20852.  2.89e4 1.39e4 4929.  22669.  3.10e4 4.86e5

# Mean Preserving Scaling
head(fmean(fscale(gGGDC, mean = FALSE)))
# # A tibble: 6 × 13
#   Variable Country     AGR    MIN     MAN      PU     CON    WRT    TRA   FIRE     GOV    OTH    SUM
#   <chr>    <chr>     <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
# 1 EMP      ARG       1420.   52.1  1932.   102.     742.  1.98e3 6.49e2  628.   2043.  9.92e2 1.05e4
# 2 EMP      BOL        964.   56.0   235.     5.35   123.  2.82e2 1.15e2   44.6    NA   3.96e2 2.22e3
# 3 EMP      BRA      17191.  206.   6991.   365.    3525.  8.51e3 2.05e3 4414.   5307.  5.71e3 5.43e4
# 4 EMP      BWA        188.   10.5    18.1    3.09    25.3 3.63e1 8.36e0   15.3    61.1 2.76e1 3.94e2
# 5 EMP      CHL        702.  101.    625.    29.4    296.  6.95e2 2.58e2  272.     NA   1.00e3 3.98e3
# 6 EMP      CHN     287744. 7050.  67144.  1606.   20852.  2.89e4 1.39e4 4929.  22669.  3.10e4 4.86e5
head(fsd(fscale(gGGDC, mean = FALSE)))
# # A tibble: 6 × 13
#   Variable Country   AGR   MIN   MAN    PU   CON   WRT   TRA  FIRE   GOV   OTH   SUM
#   <chr>    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 EMP      ARG      1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00
# 2 EMP      BOL      1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00 NA     1.00  1.00
# 3 EMP      BRA      1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00
# 4 EMP      BWA      1.00  1.00  1.00  1     1.00  1.00  1.00  1     1.00  1.00  1.00
# 5 EMP      CHL      1.00  1     1.00  1.00  1.00  1.00  1.00  1.00 NA     1.00  1.00
# 6 EMP      CHN      1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00

One can also set mean = "overall.mean", which group-centers columns on the overall mean as illustrated with fwithin. Another interesting option is setting sd = "within.sd". This group-scales data such that every group has a standard deviation equal to the within-standard deviation of the data:

# Just using VA data for this example
gGGDC <- GGDC10S %>%
  fsubset(Variable == "VA", Country, AGR:SUM) %>% 
      fgroup_by(Country)

# This calculates the within- standard deviation for all columns
fsd(num_vars(ungroup(fwithin(gGGDC))))
#       AGR       MIN       MAN        PU       CON       WRT       TRA      FIRE       GOV       OTH 
#  45046972  40122220  75608708   3062688  30811572  44125207  20676901  16030868  20358973  18780869 
#       SUM 
# 306429102

# This scales all groups to take on the within- standard deviation while preserving group means 
fsd(fscale(gGGDC, mean = FALSE, sd = "within.sd"))
# # A tibble: 43 × 12
#    Country       AGR       MIN       MAN       PU     CON    WRT    TRA   FIRE     GOV    OTH    SUM
#    <chr>       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1 ARG     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
#  2 BOL     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7 NA      1.88e7 3.06e8
#  3 BRA     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
#  4 BWA     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
#  5 CHL     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7 NA      1.88e7 3.06e8
#  6 CHN     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
#  7 COL     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7 NA      1.88e7 3.06e8
#  8 CRI     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
#  9 DEW     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
# 10 DNK     45046972. 40122220. 75608708. 3062688.  3.08e7 4.41e7 2.07e7 1.60e7  2.04e7 1.88e7 3.06e8
# # ℹ 33 more rows

A grouped scaling operation with both mean = "overall.mean" and sd = "within.sd" thus efficiently achieves a harmonization of all groups in the first two moments without changing the fundamental properties (in terms of level and scale) of the data.

2.5 Lags / Leads, Differences and Growth Rates

This section introduces 3 further powerful collapse functions: flag, fdiff and fgrowth. The first function, flag, efficiently computes sequences of fully identified lags and leads on time series and panel data. The following code computes 1 fully-identified panel-lag and 1 fully identified panel-lead of each variable in the data:

GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% flag(-1:1, Year)
# # A tibble: 5,027 × 36
#    Country Variable  Year F1.AGR   AGR L1.AGR F1.MIN   MIN L1.MIN F1.MAN    MAN L1.MAN  F1.PU     PU
#  * <chr>   <chr>    <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#  1 BWA     VA        1960   NA    NA     NA    NA    NA     NA    NA     NA     NA     NA     NA    
#  2 BWA     VA        1961   NA    NA     NA    NA    NA     NA    NA     NA     NA     NA     NA    
#  3 BWA     VA        1962   NA    NA     NA    NA    NA     NA    NA     NA     NA     NA     NA    
#  4 BWA     VA        1963   16.3  NA     NA     3.49 NA     NA     0.737 NA     NA      0.104 NA    
#  5 BWA     VA        1964   15.7  16.3   NA     2.50  3.49  NA     1.02   0.737 NA      0.135  0.104
#  6 BWA     VA        1965   17.7  15.7   16.3   1.97  2.50   3.49  0.804  1.02   0.737  0.203  0.135
#  7 BWA     VA        1966   19.1  17.7   15.7   2.30  1.97   2.50  0.938  0.804  1.02   0.203  0.203
#  8 BWA     VA        1967   21.1  19.1   17.7   1.84  2.30   1.97  0.750  0.938  0.804  0.203  0.203
#  9 BWA     VA        1968   21.9  21.1   19.1   5.24  1.84   2.30  2.14   0.750  0.938  0.578  0.203
# 10 BWA     VA        1969   23.1  21.9   21.1  10.2   5.24   1.84  4.15   2.14   0.750  1.12   0.578
# # ℹ 5,017 more rows
# # ℹ 22 more variables: L1.PU <dbl>, F1.CON <dbl>, CON <dbl>, L1.CON <dbl>, F1.WRT <dbl>, WRT <dbl>,
# #   L1.WRT <dbl>, F1.TRA <dbl>, TRA <dbl>, L1.TRA <dbl>, F1.FIRE <dbl>, FIRE <dbl>, L1.FIRE <dbl>,
# #   F1.GOV <dbl>, GOV <dbl>, L1.GOV <dbl>, F1.OTH <dbl>, OTH <dbl>, L1.OTH <dbl>, F1.SUM <dbl>,
# #   SUM <dbl>, L1.SUM <dbl>
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

If the time-variable passed does not exactly identify the data (i.e. because of repeated values in each group), all 3 functions will issue appropriate error messages. flag, fdiff and fgrowth support irregular time series and unbalanced panels.

It is also possible to omit the time-variable if one is certain that the data is sorted:

GGDC10S %>%
  fselect(Variable, Country,AGR:SUM) %>% 
    fgroup_by(Variable, Country) %>% flag
# # A tibble: 5,027 × 13
#    Variable Country   AGR   MIN    MAN     PU    CON   WRT   TRA  FIRE   GOV   OTH   SUM
#  * <chr>    <chr>   <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1 VA       BWA      NA   NA    NA     NA     NA     NA    NA    NA    NA    NA     NA  
#  2 VA       BWA      NA   NA    NA     NA     NA     NA    NA    NA    NA    NA     NA  
#  3 VA       BWA      NA   NA    NA     NA     NA     NA    NA    NA    NA    NA     NA  
#  4 VA       BWA      NA   NA    NA     NA     NA     NA    NA    NA    NA    NA     NA  
#  5 VA       BWA      NA   NA    NA     NA     NA     NA    NA    NA    NA    NA     NA  
#  6 VA       BWA      16.3  3.49  0.737  0.104  0.660  6.24  1.66  1.12  4.82  2.34  37.5
#  7 VA       BWA      15.7  2.50  1.02   0.135  1.35   7.06  1.94  1.25  5.70  2.68  39.3
#  8 VA       BWA      17.7  1.97  0.804  0.203  1.35   8.27  2.15  1.36  6.37  2.99  43.1
#  9 VA       BWA      19.1  2.30  0.938  0.203  0.897  4.31  1.72  1.54  7.04  3.31  41.4
# 10 VA       BWA      21.1  1.84  0.750  0.203  1.22   5.17  2.44  1.03  5.03  2.36  41.1
# # ℹ 5,017 more rows
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

fdiff computes sequences of lagged-leaded and iterated differences as well as quasi-differences and log-differences on time series and panel data. The code below computes the 1 and 10 year first and second differences of each variable in the data:

GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% fdiff(c(1, 10), 1:2, Year)
# # A tibble: 5,027 × 47
#    Country Variable  Year D1.AGR D2.AGR L10D1.AGR L10D2.AGR D1.MIN D2.MIN L10D1.MIN L10D2.MIN D1.MAN
#  * <chr>   <chr>    <dbl>  <dbl>  <dbl>     <dbl>     <dbl>  <dbl>  <dbl>     <dbl>     <dbl>  <dbl>
#  1 BWA     VA        1960 NA     NA            NA        NA NA     NA            NA        NA NA    
#  2 BWA     VA        1961 NA     NA            NA        NA NA     NA            NA        NA NA    
#  3 BWA     VA        1962 NA     NA            NA        NA NA     NA            NA        NA NA    
#  4 BWA     VA        1963 NA     NA            NA        NA NA     NA            NA        NA NA    
#  5 BWA     VA        1964 NA     NA            NA        NA NA     NA            NA        NA NA    
#  6 BWA     VA        1965 -0.575 NA            NA        NA -0.998 NA            NA        NA  0.282
#  7 BWA     VA        1966  1.95   2.53         NA        NA -0.525  0.473        NA        NA -0.214
#  8 BWA     VA        1967  1.47  -0.488        NA        NA  0.328  0.854        NA        NA  0.134
#  9 BWA     VA        1968  1.95   0.488        NA        NA -0.460 -0.788        NA        NA -0.188
# 10 BWA     VA        1969  0.763 -1.19         NA        NA  3.41   3.87         NA        NA  1.39 
# # ℹ 5,017 more rows
# # ℹ 35 more variables: D2.MAN <dbl>, L10D1.MAN <dbl>, L10D2.MAN <dbl>, D1.PU <dbl>, D2.PU <dbl>,
# #   L10D1.PU <dbl>, L10D2.PU <dbl>, D1.CON <dbl>, D2.CON <dbl>, L10D1.CON <dbl>, L10D2.CON <dbl>,
# #   D1.WRT <dbl>, D2.WRT <dbl>, L10D1.WRT <dbl>, L10D2.WRT <dbl>, D1.TRA <dbl>, D2.TRA <dbl>,
# #   L10D1.TRA <dbl>, L10D2.TRA <dbl>, D1.FIRE <dbl>, D2.FIRE <dbl>, L10D1.FIRE <dbl>,
# #   L10D2.FIRE <dbl>, D1.GOV <dbl>, D2.GOV <dbl>, L10D1.GOV <dbl>, L10D2.GOV <dbl>, D1.OTH <dbl>,
# #   D2.OTH <dbl>, L10D1.OTH <dbl>, L10D2.OTH <dbl>, D1.SUM <dbl>, D2.SUM <dbl>, L10D1.SUM <dbl>, …
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

Log-differences of the form \(log(x_t) - log(x_{t-s})\) are also easily computed.

GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% fdiff(c(1, 10), 1, Year, log = TRUE)
# # A tibble: 5,027 × 25
#    Country Variable  Year Dlog1.AGR L10Dlog1.AGR Dlog1.MIN L10Dlog1.MIN Dlog1.MAN L10Dlog1.MAN
#  * <chr>   <chr>    <dbl>     <dbl>        <dbl>     <dbl>        <dbl>     <dbl>        <dbl>
#  1 BWA     VA        1960   NA                NA    NA               NA    NA               NA
#  2 BWA     VA        1961   NA                NA    NA               NA    NA               NA
#  3 BWA     VA        1962   NA                NA    NA               NA    NA               NA
#  4 BWA     VA        1963   NA                NA    NA               NA    NA               NA
#  5 BWA     VA        1964   NA                NA    NA               NA    NA               NA
#  6 BWA     VA        1965   -0.0359           NA    -0.336           NA     0.324           NA
#  7 BWA     VA        1966    0.117            NA    -0.236           NA    -0.236           NA
#  8 BWA     VA        1967    0.0796           NA     0.154           NA     0.154           NA
#  9 BWA     VA        1968    0.0972           NA    -0.223           NA    -0.223           NA
# 10 BWA     VA        1969    0.0355           NA     1.05            NA     1.05            NA
# # ℹ 5,017 more rows
# # ℹ 16 more variables: Dlog1.PU <dbl>, L10Dlog1.PU <dbl>, Dlog1.CON <dbl>, L10Dlog1.CON <dbl>,
# #   Dlog1.WRT <dbl>, L10Dlog1.WRT <dbl>, Dlog1.TRA <dbl>, L10Dlog1.TRA <dbl>, Dlog1.FIRE <dbl>,
# #   L10Dlog1.FIRE <dbl>, Dlog1.GOV <dbl>, L10Dlog1.GOV <dbl>, Dlog1.OTH <dbl>, L10Dlog1.OTH <dbl>,
# #   Dlog1.SUM <dbl>, L10Dlog1.SUM <dbl>
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

Finally, it is also possible to compute quasi-differences and quasi-log-differences of the form \(x_t - \rho x_{t-s}\) or \(log(x_t) - \rho log(x_{t-s})\):

GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% fdiff(t = Year, rho = 0.95)
# # A tibble: 5,027 × 14
#    Country Variable  Year    AGR    MIN    MAN      PU     CON    WRT    TRA   FIRE    GOV    OTH
#  * <chr>   <chr>    <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#  1 BWA     VA        1960 NA     NA     NA     NA      NA      NA     NA     NA     NA     NA    
#  2 BWA     VA        1961 NA     NA     NA     NA      NA      NA     NA     NA     NA     NA    
#  3 BWA     VA        1962 NA     NA     NA     NA      NA      NA     NA     NA     NA     NA    
#  4 BWA     VA        1963 NA     NA     NA     NA      NA      NA     NA     NA     NA     NA    
#  5 BWA     VA        1964 NA     NA     NA     NA      NA      NA     NA     NA     NA     NA    
#  6 BWA     VA        1965  0.241 -0.824  0.318  0.0359  0.719   1.13   0.363  0.184  1.11   0.454
#  7 BWA     VA        1966  2.74  -0.401 -0.163  0.0743  0.0673  1.56   0.312  0.174  0.955  0.449
#  8 BWA     VA        1967  2.35   0.427  0.174  0.0101 -0.381  -3.55  -0.323  0.246  0.988  0.465
#  9 BWA     VA        1968  2.91  -0.345 -0.141  0.0101  0.365   1.08   0.804 -0.427 -1.66  -0.780
# 10 BWA     VA        1969  1.82   3.50   1.43   0.385   2.32    0.841  0.397  0.252  0.818  0.385
# # ℹ 5,017 more rows
# # ℹ 1 more variable: SUM <dbl>
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

The quasi-differencing feature was added to fdiff to facilitate the preparation of time series and panel data for least-squares estimations suffering from serial correlation following Cochrane & Orcutt (1949).

Finally, fgrowth computes growth rates in the same way. By default exact growth rates are computed in percentage terms using \((x_t-x_{t-s}) / x_{t-s} \times 100\) (the default argument is scale = 100). The user can also request growth rates obtained by log-differencing using \(log(x_t/ x_{t-s}) \times 100\).

# Exact growth rates, computed as: (x/lag(x) - 1) * 100
GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year)
# # A tibble: 5,027 × 25
#    Country Variable  Year G1.AGR L10G1.AGR G1.MIN L10G1.MIN G1.MAN L10G1.MAN G1.PU L10G1.PU G1.CON
#  * <chr>   <chr>    <dbl>  <dbl>     <dbl>  <dbl>     <dbl>  <dbl>     <dbl> <dbl>    <dbl>  <dbl>
#  1 BWA     VA        1960  NA           NA   NA          NA   NA          NA  NA         NA   NA  
#  2 BWA     VA        1961  NA           NA   NA          NA   NA          NA  NA         NA   NA  
#  3 BWA     VA        1962  NA           NA   NA          NA   NA          NA  NA         NA   NA  
#  4 BWA     VA        1963  NA           NA   NA          NA   NA          NA  NA         NA   NA  
#  5 BWA     VA        1964  NA           NA   NA          NA   NA          NA  NA         NA   NA  
#  6 BWA     VA        1965  -3.52        NA  -28.6        NA   38.2        NA  29.4       NA  104. 
#  7 BWA     VA        1966  12.4         NA  -21.1        NA  -21.1        NA  50         NA    0  
#  8 BWA     VA        1967   8.29        NA   16.7        NA   16.7        NA   0         NA  -33.3
#  9 BWA     VA        1968  10.2         NA  -20          NA  -20          NA   0         NA   35.7
# 10 BWA     VA        1969   3.61        NA  185.         NA  185.         NA 185.        NA  185. 
# # ℹ 5,017 more rows
# # ℹ 13 more variables: L10G1.CON <dbl>, G1.WRT <dbl>, L10G1.WRT <dbl>, G1.TRA <dbl>,
# #   L10G1.TRA <dbl>, G1.FIRE <dbl>, L10G1.FIRE <dbl>, G1.GOV <dbl>, L10G1.GOV <dbl>, G1.OTH <dbl>,
# #   L10G1.OTH <dbl>, G1.SUM <dbl>, L10G1.SUM <dbl>
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

# Log-difference growth rates, computed as: log(x / lag(x)) * 100
GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year, logdiff = TRUE)
# # A tibble: 5,027 × 25
#    Country Variable  Year Dlog1.AGR L10Dlog1.AGR Dlog1.MIN L10Dlog1.MIN Dlog1.MAN L10Dlog1.MAN
#  * <chr>   <chr>    <dbl>     <dbl>        <dbl>     <dbl>        <dbl>     <dbl>        <dbl>
#  1 BWA     VA        1960     NA              NA      NA             NA      NA             NA
#  2 BWA     VA        1961     NA              NA      NA             NA      NA             NA
#  3 BWA     VA        1962     NA              NA      NA             NA      NA             NA
#  4 BWA     VA        1963     NA              NA      NA             NA      NA             NA
#  5 BWA     VA        1964     NA              NA      NA             NA      NA             NA
#  6 BWA     VA        1965     -3.59           NA     -33.6           NA      32.4           NA
#  7 BWA     VA        1966     11.7            NA     -23.6           NA     -23.6           NA
#  8 BWA     VA        1967      7.96           NA      15.4           NA      15.4           NA
#  9 BWA     VA        1968      9.72           NA     -22.3           NA     -22.3           NA
# 10 BWA     VA        1969      3.55           NA     105.            NA     105.            NA
# # ℹ 5,017 more rows
# # ℹ 16 more variables: Dlog1.PU <dbl>, L10Dlog1.PU <dbl>, Dlog1.CON <dbl>, L10Dlog1.CON <dbl>,
# #   Dlog1.WRT <dbl>, L10Dlog1.WRT <dbl>, Dlog1.TRA <dbl>, L10Dlog1.TRA <dbl>, Dlog1.FIRE <dbl>,
# #   L10Dlog1.FIRE <dbl>, Dlog1.GOV <dbl>, L10Dlog1.GOV <dbl>, Dlog1.OTH <dbl>, L10Dlog1.OTH <dbl>,
# #   Dlog1.SUM <dbl>, L10Dlog1.SUM <dbl>
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

fdiff and fgrowth can also perform leaded (forward) differences and growth rates (i.e. ... %>% fgrowth(-c(1, 10), 1:2, Year) would compute one and 10-year leaded first and second differences). Again it is possible to perform sequential operations:

# This computes the 1 and 10-year growth rates, for the current period and lagged by one period
GGDC10S %>%
  fselect(-Region, -Regioncode) %>% 
    fgroup_by(Variable, Country) %>% fgrowth(c(1, 10), 1, Year) %>% flag(0:1, Year)
# # A tibble: 5,027 × 47
#    Country Variable  Year G1.AGR L1.G1.AGR L10G1.AGR L1.L10G1.AGR G1.MIN L1.G1.MIN L10G1.MIN
#  * <chr>   <chr>    <dbl>  <dbl>     <dbl>     <dbl>        <dbl>  <dbl>     <dbl>     <dbl>
#  1 BWA     VA        1960  NA        NA           NA           NA   NA        NA          NA
#  2 BWA     VA        1961  NA        NA           NA           NA   NA        NA          NA
#  3 BWA     VA        1962  NA        NA           NA           NA   NA        NA          NA
#  4 BWA     VA        1963  NA        NA           NA           NA   NA        NA          NA
#  5 BWA     VA        1964  NA        NA           NA           NA   NA        NA          NA
#  6 BWA     VA        1965  -3.52     NA           NA           NA  -28.6      NA          NA
#  7 BWA     VA        1966  12.4      -3.52        NA           NA  -21.1     -28.6        NA
#  8 BWA     VA        1967   8.29     12.4         NA           NA   16.7     -21.1        NA
#  9 BWA     VA        1968  10.2       8.29        NA           NA  -20        16.7        NA
# 10 BWA     VA        1969   3.61     10.2         NA           NA  185.      -20          NA
# # ℹ 5,017 more rows
# # ℹ 37 more variables: L1.L10G1.MIN <dbl>, G1.MAN <dbl>, L1.G1.MAN <dbl>, L10G1.MAN <dbl>,
# #   L1.L10G1.MAN <dbl>, G1.PU <dbl>, L1.G1.PU <dbl>, L10G1.PU <dbl>, L1.L10G1.PU <dbl>,
# #   G1.CON <dbl>, L1.G1.CON <dbl>, L10G1.CON <dbl>, L1.L10G1.CON <dbl>, G1.WRT <dbl>,
# #   L1.G1.WRT <dbl>, L10G1.WRT <dbl>, L1.L10G1.WRT <dbl>, G1.TRA <dbl>, L1.G1.TRA <dbl>,
# #   L10G1.TRA <dbl>, L1.L10G1.TRA <dbl>, G1.FIRE <dbl>, L1.G1.FIRE <dbl>, L10G1.FIRE <dbl>,
# #   L1.L10G1.FIRE <dbl>, G1.GOV <dbl>, L1.G1.GOV <dbl>, L10G1.GOV <dbl>, L1.L10G1.GOV <dbl>, …
# 
# Grouped by:  Variable, Country  [85 | 59 (7.7) 4-65]

3. Benchmarks

This section seeks to demonstrate that the functionality introduced in the preceding 2 sections indeed produces code that evaluates substantially faster than native dplyr.

To do this properly, the different components of a typical piped call (selecting / subsetting, ordering, grouping, and performing some computation) are benchmarked separately on 2 different data sizes.

All benchmarks are run on a Windows 8.1 laptop with a 2x 2.2 GHZ Intel i5 processor, 8GB DDR3 RAM and a Samsung 850 EVO SSD hard drive.

3.1 Data

Benchmarks are run on the original GGDC10S data used throughout this vignette and a larger dataset with approx. 1 million observations, obtained by replicating and row-binding GGDC10S 200 times while maintaining unique groups.

# This shows the groups in GGDC10S
GRP(GGDC10S, ~ Variable + Country)
# collapse grouping object of length 5027 with 85 ordered groups
# 
# Call: GRP.default(X = GGDC10S, by = ~Variable + Country), X is unsorted
# 
# Distribution of group sizes: 
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#    4.00   53.00   62.00   59.14   63.00   65.00 
# 
# Groups with sizes: 
# EMP.ARG EMP.BOL EMP.BRA EMP.BWA EMP.CHL EMP.CHN 
#      62      61      62      52      63      62 
#   ---
# VA.TWN VA.TZA VA.USA VA.VEN VA.ZAF VA.ZMB 
#     63     52     65     63     52     52

# This replicates the data 200 times 
data <- replicate(200, GGDC10S, simplify = FALSE) 
# This function adds a number i to the country and variable columns of each dataset
uniquify <- function(x, i) ftransform(x, lapply(unclass(x)[c(1,4)], paste0, i))
# Making datasets unique and row-binding them
data <- unlist2d(Map(uniquify, data, as.list(1:200)), idcols = FALSE)
fdim(data)
# [1] 1005400      16

# This shows the groups in the replicated data
GRP(data, ~ Variable + Country)
# collapse grouping object of length 1005400 with 17000 ordered groups
# 
# Call: GRP.default(X = data, by = ~Variable + Country), X is unsorted
# 
# Distribution of group sizes: 
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#    4.00   53.00   62.00   59.14   63.00   65.00 
# 
# Groups with sizes: 
# EMP1.ARG1 EMP1.BOL1 EMP1.BRA1 EMP1.BWA1 EMP1.CHL1 EMP1.CHN1 
#        62        61        62        52        63        62 
#   ---
# VA99.TWN99 VA99.TZA99 VA99.USA99 VA99.VEN99 VA99.ZAF99 VA99.ZMB99 
#         63         52         65         63         52         52

gc()
#            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
# Ncells  3184710 170.1    8862174  473.3         NA   8862174  473.3
# Vcells 23965820 182.9  147787078 1127.6      16384 445825141 3401.4

3.1 Selecting, Subsetting, Ordering and Grouping

## Selecting columns
# Small
microbenchmark(dplyr = select(GGDC10S, Country, Variable, AGR:SUM),
               collapse = fselect(GGDC10S, Country, Variable, AGR:SUM))
# Unit: microseconds
#      expr     min       lq      mean  median      uq     max neval
#     dplyr 400.775 410.7585 425.43117 416.396 424.637 820.041   100
#  collapse   2.911   3.4645   4.59856   4.469   5.412  15.293   100

# Large
microbenchmark(dplyr = select(data, Country, Variable, AGR:SUM),
               collapse = fselect(data, Country, Variable, AGR:SUM))
# Unit: microseconds
#      expr     min      lq      mean   median       uq     max neval
#     dplyr 388.926 396.429 412.67730 402.9890 411.0455 728.734   100
#  collapse   2.870   3.280   4.44686   3.8335   5.3300  12.669   100

## Subsetting columns 
# Small
microbenchmark(dplyr = filter(GGDC10S, Variable == "VA"),
               collapse = fsubset(GGDC10S, Variable == "VA"))
# Unit: microseconds
#      expr     min       lq      mean   median       uq     max neval
#     dplyr 374.084 394.4405 409.23986 401.0005 414.3050 716.475   100
#  collapse  39.278  48.2775  55.85307  55.5550  60.4545 103.320   100

# Large
microbenchmark(dplyr = filter(data, Variable == "VA"),
               collapse = fsubset(data, Variable == "VA"))
# Unit: milliseconds
#      expr      min       lq     mean   median       uq       max neval
#     dplyr 4.487409 5.242752 8.352270 5.653223 6.434048 159.13658   100
#  collapse 2.840808 3.082359 3.469128 3.163478 3.302714  16.56047   100

## Ordering rows
# Small
microbenchmark(dplyr = arrange(GGDC10S, desc(Country), Variable, Year),
               collapse = roworder(GGDC10S, -Country, Variable, Year))
# Unit: microseconds
#      expr      min        lq      mean   median        uq      max neval
#     dplyr 1715.112 1867.4270 1983.4726 2015.109 2080.7500 2367.791   100
#  collapse  192.495  232.4085  256.3878  247.968  258.7715 1055.381   100

# Large
microbenchmark(dplyr = arrange(data, desc(Country), Variable, Year),
               collapse = roworder(data, -Country, Variable, Year), times = 2)
# Unit: milliseconds
#      expr      min       lq      mean    median        uq       max neval
#     dplyr 89.37512 89.37512 101.05180 101.05180 112.72848 112.72848     2
#  collapse 66.46703 66.46703  67.45254  67.45254  68.43806  68.43806     2


## Grouping 
# Small
microbenchmark(dplyr = group_by(GGDC10S, Country, Variable),
               collapse = fgroup_by(GGDC10S, Country, Variable))
# Unit: microseconds
#      expr     min       lq     mean   median       uq      max neval
#     dplyr 778.713 815.1825 911.3484 874.2225 960.3840 1529.874   100
#  collapse 146.534 157.6245 198.5921 165.0660 177.3455 1484.241   100

# Large
microbenchmark(dplyr = group_by(data, Country, Variable),
               collapse = fgroup_by(data, Country, Variable), times = 10)
# Unit: milliseconds
#      expr      min       lq     mean   median       uq      max neval
#     dplyr 34.20294 34.62839 34.88041 34.88432 35.07821 35.48279    10
#  collapse 27.89972 28.03211 28.55175 28.36954 29.32283 29.54206    10

## Computing a new column 
# Small
microbenchmark(dplyr = mutate(GGDC10S, NEW = AGR+1),
               collapse = ftransform(GGDC10S, NEW = AGR+1))
# Unit: microseconds
#      expr     min       lq      mean   median       uq     max neval
#     dplyr 317.463 321.7270 333.38822 324.9660 333.7810 631.564   100
#  collapse   8.897  11.0495  12.95354  12.4435  14.2065  38.991   100

# Large
microbenchmark(dplyr = mutate(data, NEW = AGR+1),
               collapse = ftransform(data, NEW = AGR+1))
# Unit: microseconds
#      expr     min       lq     mean    median        uq      max neval
#     dplyr 637.878 1084.225 1330.006 1164.6665 1291.2335 15869.05   100
#  collapse 210.740  657.025 1021.434  698.3735  781.7675 16725.09   100

## All combined with pipes 
# Small
microbenchmark(dplyr = filter(GGDC10S, Variable == "VA") %>% 
                       select(Country, Year, AGR:SUM) %>% 
                       arrange(desc(Country), Year) %>%
                       mutate(NEW = AGR+1) %>%
                       group_by(Country),
               collapse = fsubset(GGDC10S, Variable == "VA", Country, Year, AGR:SUM) %>% 
                       roworder(-Country, Year) %>%
                       ftransform(NEW = AGR+1) %>%
                       fgroup_by(Country))
# Unit: microseconds
#      expr      min       lq      mean   median       uq      max neval
#     dplyr 2982.340 3416.325 3525.7983 3538.464 3668.516 5034.021   100
#  collapse  136.858  186.632  214.4681  211.683  243.130  314.470   100

# Large
microbenchmark(dplyr = filter(data, Variable == "VA") %>% 
                       select(Country, Year, AGR:SUM) %>% 
                       arrange(desc(Country), Year) %>%
                       mutate(NEW = AGR+1) %>%
                       group_by(Country),
               collapse = fsubset(data, Variable == "VA", Country, Year, AGR:SUM) %>% 
                       roworder(-Country, Year) %>%
                       ftransform(NEW = AGR+1) %>%
                       fgroup_by(Country), times = 10)
# Unit: milliseconds
#      expr      min       lq     mean   median       uq      max neval
#     dplyr 7.917182 7.997378 8.142653 8.109943 8.292291 8.423163    10
#  collapse 3.080289 3.104028 3.150153 3.140969 3.188365 3.251259    10

gc()
#            used  (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
# Ncells  3184728 170.1    8862174 473.3         NA   8862174  473.3
# Vcells 23970594 182.9   75772825 578.2      16384 445825141 3401.4

3.1 Aggregation

## Grouping the data
cgGGDC10S <- fgroup_by(GGDC10S, Variable, Country) %>% fselect(-Region, -Regioncode)
gGGDC10S <- group_by(GGDC10S, Variable, Country) %>% fselect(-Region, -Regioncode)
cgdata <- fgroup_by(data, Variable, Country) %>% fselect(-Region, -Regioncode)
gdata <- group_by(data, Variable, Country) %>% fselect(-Region, -Regioncode)
rm(data, GGDC10S) 
gc()
#            used (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
# Ncells  3201723  171    8862174 473.3         NA   8862174  473.3
# Vcells 23589381  180   75772825 578.2      16384 445825141 3401.4

## Conversion of Grouping object: This time would be required extra in all hybrid calls 
## i.e. when calling collapse functions on data grouped with dplyr::group_by
# Small
microbenchmark(GRP(gGGDC10S))
# Unit: microseconds
#           expr   min     lq     mean median     uq    max neval
#  GRP(gGGDC10S) 8.692 9.2455 10.16021 9.4915 10.086 39.196   100

# Large
microbenchmark(GRP(gdata))
# Unit: microseconds
#        expr     min       lq     mean   median       uq      max neval
#  GRP(gdata) 885.641 1160.915 1248.258 1237.236 1323.234 1651.398   100


## Sum 
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, sum, na.rm = TRUE),
               collapse = fsum(cgGGDC10S))
# Unit: microseconds
#      expr      min        lq      mean    median       uq       max neval
#     dplyr 3017.723 3354.1895 3733.4739 3620.9560 3738.441 22135.736   100
#  collapse  218.120  227.3655  236.7693  235.1965  244.852   270.805   100

# Large
microbenchmark(dplyr = summarise_all(gdata, sum, na.rm = TRUE),
               collapse = fsum(cgdata), times = 10)
# Unit: milliseconds
#      expr      min        lq      mean    median        uq       max neval
#     dplyr 272.9737 279.91024 305.02067 283.59737 303.57122 448.07629    10
#  collapse  41.5330  41.63214  41.88717  41.77062  41.96059  42.78662    10

## Mean
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, mean.default, na.rm = TRUE),
               collapse = fmean(cgGGDC10S))
# Unit: microseconds
#      expr      min        lq      mean   median       uq       max neval
#     dplyr 4360.104 4596.6740 5125.4194 4754.791 5005.710 37144.852   100
#  collapse  169.084  174.3935  185.4594  183.434  194.832   221.933   100

# Large
microbenchmark(dplyr = summarise_all(gdata, mean.default, na.rm = TRUE),
               collapse = fmean(cgdata), times = 10)
# Unit: milliseconds
#      expr      min        lq      mean    median        uq       max neval
#     dplyr 623.5123 642.83748 704.39836 681.32260 786.82731 829.74435    10
#  collapse  31.7636  31.88037  32.00222  31.99445  32.08209  32.43875    10

## Median
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, median, na.rm = TRUE),
               collapse = fmedian(cgGGDC10S))
# Unit: microseconds
#      expr       min        lq       mean     median        uq       max neval
#     dplyr 14399.118 14849.933 16170.3500 14982.5685 15145.892 33613.235   100
#  collapse   137.596   164.902   189.2056   178.1245   214.676   248.624   100

# Large
microbenchmark(dplyr = summarise_all(gdata, median, na.rm = TRUE),
               collapse = fmedian(cgdata), times = 2)
# Unit: milliseconds
#      expr        min         lq       mean     median         uq        max neval
#     dplyr 2826.83036 2826.83036 2828.12912 2828.12912 2829.42788 2829.42788     2
#  collapse   19.95564   19.95564   19.98524   19.98524   20.01485   20.01485     2

## Standard Deviation
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, sd, na.rm = TRUE),
               collapse = fsd(cgGGDC10S))
# Unit: microseconds
#      expr      min        lq      mean   median       uq       max neval
#     dplyr 8332.635 8612.5215 9365.1216 8712.766 8989.086 25087.982   100
#  collapse  242.228  251.0225  269.7849  273.552  282.326   321.891   100

# Large
microbenchmark(dplyr = summarise_all(gdata, sd, na.rm = TRUE),
               collapse = fsd(cgdata), times = 2)
# Unit: milliseconds
#      expr        min         lq       mean     median         uq        max neval
#     dplyr 1375.80363 1375.80363 1409.60358 1409.60358 1443.40352 1443.40352     2
#  collapse   46.21713   46.21713   56.88205   56.88205   67.54697   67.54697     2

## Maximum
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, max, na.rm = TRUE),
               collapse = fmax(cgGGDC10S))
# Unit: microseconds
#      expr       min         lq        mean    median         uq       max neval
#     dplyr 39964.504 41008.8560 43577.92707 41448.273 44195.1095 58816.550   100
#  collapse    68.798    74.7225    87.83389    77.572   100.9215   129.519   100

# Large
microbenchmark(dplyr = summarise_all(gdata, max, na.rm = TRUE),
               collapse = fmax(cgdata), times = 10)
# Unit: milliseconds
#      expr       min       lq     mean    median        uq       max neval
#     dplyr 480.83804 490.9982 540.7374 517.86136 533.85723 687.14713    10
#  collapse  11.40116  11.7745  11.9366  11.85156  11.94908  13.18318    10

## First Value
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, first),
               collapse = ffirst(cgGGDC10S, na.rm = FALSE))
# Unit: microseconds
#      expr      min       lq       mean   median       uq       max neval
#     dplyr 4147.888 4242.249 4801.88966 4383.248 4701.532 19254.215   100
#  collapse   11.685   14.227   26.25476   24.764   35.301   137.514   100

# Large
microbenchmark(dplyr = summarise_all(gdata, first),
               collapse = ffirst(cgdata, na.rm = FALSE), times = 10)
# Unit: microseconds
#      expr       min         lq       mean    median         uq        max neval
#     dplyr 530327.66 558767.393 637499.226 596503.08 672801.103 969373.660    10
#  collapse    872.89    999.088   1087.845   1068.87   1204.416   1289.327    10

## Number of Distinct Values
# Small
microbenchmark(dplyr = summarise_all(gGGDC10S, n_distinct, na.rm = TRUE),
               collapse = fndistinct(cgGGDC10S))
# Unit: microseconds
#      expr       min        lq       mean    median        uq       max neval
#     dplyr 11316.574 11600.847 12573.1010 11759.435 11939.487 31659.667   100
#  collapse   189.051   205.164   226.0933   235.422   239.604   443.661   100

# Large
microbenchmark(dplyr = summarise_all(gdata, n_distinct, na.rm = TRUE),
               collapse = fndistinct(cgdata), times = 5)
# Unit: milliseconds
#      expr        min         lq       mean     median         uq        max neval
#     dplyr 2044.13376 2110.16926 2133.91960 2138.07456 2154.39797 2222.82246     5
#  collapse   30.65443   30.94582   31.51081   31.17123   31.17972   33.60286     5

gc()
#            used  (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
# Ncells  3972309 212.2    8862174 473.3         NA   8862174  473.3
# Vcells 24857303 189.7   75772825 578.2      16384 445825141 3401.4

Below are some additional benchmarks for weighted aggregations and aggregations using the statistical mode, which cannot easily or efficiently be performed with dplyr.

## Weighted Mean
# Small
microbenchmark(fmean(cgGGDC10S, SUM)) 
# Unit: microseconds
#                   expr     min       lq     mean   median       uq     max neval
#  fmean(cgGGDC10S, SUM) 195.488 200.4285 218.2836 211.1295 218.8375 444.276   100

# Large 
microbenchmark(fmean(cgdata, SUM), times = 10) 
# Unit: milliseconds
#                expr      min       lq     mean   median       uq      max neval
#  fmean(cgdata, SUM) 34.73516 35.28276 35.66689 35.32257 36.44802 36.80722    10

## Weighted Standard-Deviation
# Small
microbenchmark(fsd(cgGGDC10S, SUM)) 
# Unit: microseconds
#                 expr     min      lq     mean   median      uq   max neval
#  fsd(cgGGDC10S, SUM) 243.048 244.606 249.2181 246.9635 249.444 323.9   100

# Large 
microbenchmark(fsd(cgdata, SUM), times = 10) 
# Unit: milliseconds
#              expr    min       lq     mean   median       uq      max neval
#  fsd(cgdata, SUM) 44.905 44.93116 45.15391 45.01095 45.22677 46.14689    10

## Statistical Mode
# Small
microbenchmark(fmode(cgGGDC10S)) 
# Unit: microseconds
#              expr     min       lq     mean   median       uq     max neval
#  fmode(cgGGDC10S) 245.098 248.3575 253.4809 250.6945 253.9335 420.619   100

# Large 
microbenchmark(fmode(cgdata), times = 10) 
# Unit: milliseconds
#           expr      min       lq     mean   median      uq     max neval
#  fmode(cgdata) 40.26151 41.82082 41.63019 41.88382 42.0232 42.0587    10

## Weighted Statistical Mode
# Small
microbenchmark(fmode(cgGGDC10S, SUM)) 
# Unit: microseconds
#                   expr     min      lq     mean   median       uq     max neval
#  fmode(cgGGDC10S, SUM) 330.993 333.535 337.7744 334.5395 337.3685 447.187   100

# Large 
microbenchmark(fmode(cgdata, SUM), times = 10) 
# Unit: milliseconds
#                expr      min       lq     mean   median       uq      max neval
#  fmode(cgdata, SUM) 57.69815 57.78466 57.98187 57.84567 58.09942 58.81835    10

gc()
#            used  (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
# Ncells  3971768 212.2    8862174 473.3         NA   8862174  473.3
# Vcells 24853915 189.7   75772825 578.2      16384 445825141 3401.4

3.2 Transformation


## Replacing with group sum
# Small
microbenchmark(dplyr = mutate_all(gGGDC10S, sum, na.rm = TRUE),
               collapse = fsum(cgGGDC10S, TRA = "replace_fill"))
# Unit: microseconds
#      expr       min        lq       mean     median       uq       max neval
#     dplyr 13088.102 13223.340 14388.9000 13359.7680 14380.05 29060.554   100
#  collapse   238.456   273.757   292.1693   293.9905   312.01   388.106   100

# Large
microbenchmark(dplyr = mutate_all(gdata, sum, na.rm = TRUE),
               collapse = fsum(cgdata, TRA = "replace_fill"), times = 10)
# Unit: milliseconds
#      expr       min        lq      mean    median       uq      max neval
#     dplyr 391.63618 679.62609 662.91807 716.40975 729.7527 749.4973    10
#  collapse  49.63788  50.24189  61.77658  55.18416  63.4596 111.6039    10

## Dividing by group sum
# Small
microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) x/sum(x, na.rm = TRUE)),
               collapse = fsum(cgGGDC10S, TRA = "/"))
# Unit: microseconds
#      expr       min         lq       mean   median        uq       max neval
#     dplyr 13058.992 13203.8450 14294.3733 13321.41 13880.796 42300.028   100
#  collapse   242.884   268.5295   278.8541   274.29   294.585   330.255   100

# Large
microbenchmark(dplyr = mutate_all(gdata, function(x) x/sum(x, na.rm = TRUE)),
               collapse = fsum(cgdata, TRA = "/"), times = 10)
# Unit: milliseconds
#      expr      min       lq      mean    median        uq      max neval
#     dplyr 474.9046 654.6199 796.14248 907.32863 942.32567 999.2501    10
#  collapse  49.3542  50.9056  84.66647  52.05635  74.51705 325.4319    10

## Centering
# Small
microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) x-mean.default(x, na.rm = TRUE)),
               collapse = fwithin(cgGGDC10S))
# Unit: microseconds
#      expr      min         lq       mean    median        uq       max neval
#     dplyr 14460.04 14769.4095 15977.4942 14859.815 15013.421 37113.077   100
#  collapse   203.77   229.7845   246.5043   242.638   266.664   293.191   100

# Large
microbenchmark(dplyr = mutate_all(gdata, function(x) x-mean.default(x, na.rm = TRUE)),
               collapse = fwithin(cgdata), times = 10)
# Unit: milliseconds
#      expr       min        lq      mean     median       uq       max neval
#     dplyr 893.06503 925.50231 1217.2225 1259.34620 1445.254 1545.5490    10
#  collapse  43.90731  56.97093  143.4797   73.39498  152.872  429.3341    10

## Centering and Scaling (Standardizing)
# Small
microbenchmark(dplyr = mutate_all(gGGDC10S, function(x) (x-mean.default(x, na.rm = TRUE))/sd(x, na.rm = TRUE)),
               collapse = fscale(cgGGDC10S))
# Unit: microseconds
#      expr       min        lq       mean    median         uq       max neval
#     dplyr 20275.033 21145.524 24976.1242 22214.190 25194.0285 79869.435   100
#  collapse   277.775   304.958   323.3613   314.388   338.2705   437.388   100

# Large
microbenchmark(dplyr = mutate_all(gdata, function(x) (x-mean.default(x, na.rm = TRUE))/sd(x, na.rm = TRUE)),
               collapse = fscale(cgdata), times = 2)
# Unit: milliseconds
#      expr        min         lq      mean    median         uq        max neval
#     dplyr 2118.97696 2118.97696 2315.9282 2315.9282 2512.87938 2512.87938     2
#  collapse   60.17144   60.17144   60.6284   60.6284   61.08537   61.08537     2

## Lag
# Small
microbenchmark(dplyr_unordered = mutate(gGGDC10S, across(everything(), dplyr::lag)),
               collapse_unordered = flag(cgGGDC10S),
               dplyr_ordered = mutate(gGGDC10S, across(everything(), \(x) dplyr::lag(x, order_by = Year))),
               collapse_ordered = flag(cgGGDC10S, t = Year))
# Unit: microseconds
#                expr       min        lq        mean     median         uq       max neval
#     dplyr_unordered 14495.386 14796.101 17579.85413 15265.3250 15889.7550 49137.721   100
#  collapse_unordered    48.544    75.071    90.29225    86.6330   109.6545   225.377   100
#       dplyr_ordered 24893.437 25327.607 27521.59809 25904.9275 27136.2190 51312.074   100
#    collapse_ordered    80.196   107.953   120.85160   117.5675   131.6715   189.051   100

# Large
microbenchmark(dplyr_unordered = mutate(gdata, across(everything(), dplyr::lag)),
               collapse_unordered = flag(cgdata),
               dplyr_ordered = mutate(gdata, across(everything(), \(x) dplyr::lag(x, order_by = Year))),
               collapse_ordered = flag(cgdata, t = Year), times = 2)
# Unit: milliseconds
#                expr        min         lq       mean     median         uq        max neval
#     dplyr_unordered 3461.11500 3461.11500 3471.95821 3471.95821 3482.80142 3482.80142     2
#  collapse_unordered   13.71897   13.71897  211.59809  211.59809  409.47721  409.47721     2
#       dplyr_ordered 5786.57522 5786.57522 6291.90389 6291.90389 6797.23256 6797.23256     2
#    collapse_ordered   25.14399   25.14399   35.36102   35.36102   45.57806   45.57806     2

## First-Difference (unordered)
# Small
microbenchmark(dplyr_unordered = mutate_all(gGGDC10S, function(x) x - dplyr::lag(x)),
               collapse_unordered = fdiff(cgGGDC10S))
# Unit: microseconds
#                expr       min         lq        mean     median        uq       max neval
#     dplyr_unordered 25613.274 25878.0725 27951.41954 26257.3225 27226.808 43048.893   100
#  collapse_unordered    56.539    72.3035    95.72147    91.6965   102.664   254.077   100

# Large
microbenchmark(dplyr_unordered = mutate_all(gdata, function(x) x - dplyr::lag(x)),
               collapse_unordered = fdiff(cgdata), times = 2)
# Unit: milliseconds
#                expr        min         lq       mean     median       uq      max neval
#     dplyr_unordered 3287.88487 3287.88487 3425.69703 3425.69703 3563.509 3563.509     2
#  collapse_unordered   16.58971   16.58971   23.36885   23.36885   30.148   30.148     2

gc()
#            used  (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
# Ncells  3978800 212.5    8862175 473.3         NA   8862175  473.3
# Vcells 24870572 189.8   72805912 555.5      16384 445825141 3401.4

Below again some benchmarks for transformations not easily of efficiently performed with dplyr, such as centering on the overall mean, mean-preserving scaling, weighted scaling and centering, sequences of lags / leads, (iterated) panel-differences and growth rates.

# Centering on overall mean
microbenchmark(fwithin(cgdata, mean = "overall.mean"), times = 10)
# Unit: milliseconds
#                                    expr      min       lq     mean   median       uq      max neval
#  fwithin(cgdata, mean = "overall.mean") 44.66782 48.03445 52.04073 50.07953 53.67134 71.13221    10

# Weighted Centering
microbenchmark(fwithin(cgdata, SUM), times = 10)
# Unit: milliseconds
#                  expr      min       lq     mean   median       uq      max neval
#  fwithin(cgdata, SUM) 40.45204 42.86833 46.55326 46.18277 47.28202 57.82673    10
microbenchmark(fwithin(cgdata, SUM, mean = "overall.mean"), times = 10)
# Unit: milliseconds
#                                         expr      min       lq    mean   median       uq      max
#  fwithin(cgdata, SUM, mean = "overall.mean") 39.99279 40.32256 43.0638 40.60269 41.34366 54.45542
#  neval
#     10

# Weighted Scaling and Standardizing
microbenchmark(fsd(cgdata, SUM, TRA = "/"), times = 10)
# Unit: milliseconds
#                         expr      min      lq     mean   median       uq      max neval
#  fsd(cgdata, SUM, TRA = "/") 50.19536 50.9145 55.12553 53.23862 56.27094 67.46816    10
microbenchmark(fscale(cgdata, SUM), times = 10)
# Unit: milliseconds
#                 expr      min       lq     mean   median       uq      max neval
#  fscale(cgdata, SUM) 54.14792 57.64584 60.83251 59.88025 61.16425 72.31928    10

# Sequence of lags and leads
microbenchmark(flag(cgdata, -1:1), times = 10)
# Unit: milliseconds
#                expr      min       lq     mean   median       uq      max neval
#  flag(cgdata, -1:1) 26.03902 48.02695 194.8518 257.0652 264.5479 276.5348    10

# Iterated difference
microbenchmark(fdiff(cgdata, 1, 2), times = 10)
# Unit: milliseconds
#                 expr      min       lq     mean   median       uq      max neval
#  fdiff(cgdata, 1, 2) 38.76001 39.83896 44.93731 41.08887 48.98348 63.42528    10

# Growth Rate
microbenchmark(fgrowth(cgdata,1), times = 10)
# Unit: milliseconds
#                expr      min       lq     mean   median       uq      max neval
#  fgrowth(cgdata, 1) 11.58627 13.81528 18.05776 14.03489 22.34279 31.15811    10

References

Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). “Patterns of Structural Change in Developing Countries.†. In J. Weiss, & M. Tribe (Eds.), Routledge Handbook of Industry and Development. (pp. 65-83). Routledge.

Cochrane, D. & Orcutt, G. H. (1949). “Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Termsâ€. Journal of the American Statistical Association. 44 (245): 32–61.

Prais, S. J. & Winsten, C. B. (1954). “Trend Estimators and Serial Correlationâ€. Cowles Commission Discussion Paper No. 383. Chicago.


  1. Row-wise operations are not supported by TRA.↩︎

collapse/inst/doc/collapse_object_handling.R0000644000176200001440000000021715202627534020723 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) collapse/inst/doc/collapse_object_handling.Rmd0000644000176200001440000005544615121640575021262 0ustar liggesusers--- title: "collapse's Handling of R Objects" subtitle: "A Quick View Behind the Scenes of Class-Agnostic R Programming" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: vignette: > %\VignetteIndexEntry{collapse's Handling of R Objects} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This much-requested vignette provides some details about how *collapse* deals with various R objects. It is principally a digest of cumulative details provided in the [NEWS](https://fastverse.org/collapse/news/index.html) for various releases since v1.4.0. ## Overview *collapse* provides a class-agnostic architecture permitting computations on a very broad range of R objects. It provides explicit support for base R classes and data types (*logical*, *integer*, *double*, *character*, *list*, *data.frame*, *matrix*, *factor*, *Date*, *POSIXct*, *ts*) and their popular extensions, including *integer64*, *data.table*, *tibble*, *grouped_df*, *xts*/*zoo*, *pseries*, *pdata.frame*, *units*, and *sf* (no geometric operations). It also introduces [*GRP_df*](https://fastverse.org/collapse/reference/GRP.html) as a more performant and class-agnostic grouped data frame, and [*indexed_series* and *indexed_frame*](https://fastverse.org/collapse/reference/indexing.html) classes as modern class-agnostic successors of *pseries*, *pdata.frame*. These objects inherit the classes they succeed and are handled through `.pseries`, `.pdata.frame`, and `.grouped_df` methods, which also support the original (*plm* / *dplyr*) implementations (details below). All other objects are handled internally at the C or R level using general principles extended by specific considerations for some of the above classes. I start with summarizing the general principles, which enable the usage of *collapse* with further classes it does not explicitly support. ## General Principles In general, *collapse* preserves attributes and classes of R objects in statistical and data manipulation operations unless their preservation involves a **high-risk** of yielding something wrong/useless. Risky operations change the dimensions or internal data type (`typeof()`) of an R object. To *collapse*'s R and C code, there exist 3 principal types of R objects: atomic vectors, matrices, and lists - which are often assumed to be data frames. Most data manipulation functions in *collapse*, like `fmutate()`, only support lists, whereas statistical functions - like the S3 generic [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html) like `fmean()` - generally support all 3 types of objects. S3 generic functions initially dispatch to `.default`, `.matrix`, `.data.frame`, and (hidden) `.list` methods. The `.list` method generally dispatches to the `.data.frame` method. These basic methods, and other non-generic functions in *collapse*, then decide how exactly to handle the object based on the statistical operation performed and attribute handling principles mostly implemented in C. The simplest case arises when an operation preserves the dimensions of the object, such as `fscale(x)` or `fmutate(data, across(a:c, log))`. In this case, all attributes of `x / data` are fully preserved^[Preservation implies a shallow copy of the attribute lists from the original object to the result object. A shallow copy is memory-efficient and means we are copying the list containing the attributes in memory, but not the attributes themselves. Whenever I talk about copying attributes, I mean a shallow copy, not a deep copy. You can perform shallow copies with [helper functions](https://fastverse.org/collapse/reference/small-helpers.html) `copyAttrib()` or `copyMostAttrib()`, and directly set attribute lists using `setAttrib()` or `setattrib()`.]. Another simple case for matrices and lists arises when a statistical operation reduces them to a single dimension such as `fmean(x)`, where, under the `drop = TRUE` default of [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), all attributes apart from (column-)names are dropped and a (named) vector of means is returned. For atomic vectors, a statistical operation like `fmean(x)` will preserve the attributes (except for *ts* objects), as the object could have useful properties such as labels or units. More complex cases involve changing the dimensions of an object. If the number of rows is preserved e.g. `fmutate(data, a_b = a / b)` or `flag(x, -1:1)`, only the (column-)names attribute of the object is modified. If the number of rows is reduced e.g. `fmean(x, g)`, all attributes are also retained under suitable modifications of the (row-)names attribute. However, if `x` is a matrix, other attributes than row- or column-names are only retained if `!is.object(x)`, that is, if the matrix does not have a 'class' attribute. For atomic vectors, attributes are retained if `!inherits(x, "ts")`, as aggregating a time series will break the class. This also applies to columns in a data frame being aggregated. When data is transformed using statistics as provided by the [`TRA()` function](https://fastverse.org/collapse/reference/TRA.html) e.g. `TRA(x, STATS, operation, groups)` and the like-named argument to the [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html), operations that simply modify the input (`x`) in a statistical sense (`"replace_na"`, `"-"`, `"-+"`, `"/"`, `"+"`, `"*"`, `"%%"`, `"-%%"`) just copy the attributes to the transformed object. Operations `"fill"` and `"replace"` are more tricky, since here `x` is replaced with `STATS`, which could be of a different class or data type. The following rules apply: (1) the result has the same data type as `STATS`; (2) if `is.object(STATS)`, the attributes of `STATS` are preserved; (3) otherwise the attributes of `x` are preserved unless `is.object(x) && typeof(x) != typeof(STATS)`; (4) an exemption to this rule is made if `x` is a factor and an integer replacement is offered to STATS e.g. `fnobs(factor, group, TRA = "fill")`. In that case, the attributes of `x` are copied except for the 'class' and 'levels' attributes. These rules were devised considering the possibility that `x` may have important information attached to it which should be preserved in data transformations, such as a `"label"` attribute. So to summarize the general principles: *collapse* just tries to preserve attributes in all cases except where it is likely to break something, beholding the way most commonly used R classes and objects behave. The most likely operations that break something are when aggregating matrices which have a class (such as *mts*/*xts*) or univariate time series (*ts*), or when data is to be replaced by another object. In the latter case, particular attention is paid to integer vectors and factors, as we often count something generating integers, and malformed factors need to be avoided. The following section provides some further details for some *collapse* functions and supported classes. ## Specific Functions and Classes #### Object Conversions [Quick conversion functions](https://fastverse.org/collapse/reference/quick-conversion.html) `qDF`, `qDT`, `qTBL()` and `qM` (to create data.frame's, *data.table*'s, *tibble*'s and matrices from arbitrary R objects) by default (`keep.attr = FALSE`) perform very strict conversions, where all attributes non-essential to the class are dropped from the input object. This is to ensure that, following conversion, objects behave exactly the way users expect. This is different from the behavior of functions like `as.data.frame()`, `as.data.table()`, `as_tibble()` or `as.matrix()` e.g. `as.matrix(EuStockMarkets)` just returns `EuStockMarkets` whereas `qM(EuStockMarkets)` returns a plain matrix without time series attributes. This behavior can be changed by setting `keep.attr = TRUE`, i.e. `qM(EuStockMarkets, keep.attr = TRUE)`. #### Selecting Columns by Data Type Functions [`num_vars()`, `cat_vars()` (the opposite of `num_vars()`), `char_vars()` etc.](https://fastverse.org/collapse/reference/select_replace_vars.html) are implemented in C to avoid the need to check data frame columns by applying an R function such as `is.numeric()`. For `is.numeric`, the C implementation is equivalent to `is_numeric_C <- function(x) typeof(x) %in% c("integer", "double") && !inherits(x, c("factor", "Date", "POSIXct", "yearmon", "yearqtr"))`. This of course does not respect the behavior of other classes that define methods for `is.numeric` e.g. `is.numeric.foo <- function(x) FALSE`, then for `y = structure(rnorm(100), class = "foo")`, `is.numeric(y)` is `FALSE` but `num_vars(data.frame(y))` still returns it. Correct behavior in this case requires `get_vars(data.frame(y), is.numeric)`. A particular case to be aware of is when using `collap()` with the `FUN` and `catFUN` arguments, where the C code (`is_numeric_C`) is used internally to decide whether a column is numeric or categorical. *collapse* does not support statistical operations on complex data. #### Parsing of Time-IDs [*Time Series Functions*](https://fastverse.org/collapse/reference/time-series-panel-series.html) `flag`, `fdiff`, `fgrowth` and `psacf/pspacf/psccf` (and the operators `L/F/D/Dlog/G`) have a `t` argument to pass time-ids for fully identified temporal operations on time series and panel data. If `t` is a plain numeric vector or a factor, it is coerced to integer using `as.integer()`, and the integer steps are used as time steps. This is premised on the observation that the most common form of temporal identifier is a numeric variable denoting calendar years. If on the other hand `t` is a numeric time object such that `is.object(t) && is.numeric(unclass(t))` (e.g. Date, POSIXct, etc.), then it is passed through `timeid()` which computes the greatest common divisor of the vector and generates an integer time-id in that way. Users are therefore advised to use appropriate classes to represent time steps e.g. for monthly data `zoo::yearmon` would be appropriate. It is also possible to pass non-numeric `t`, such as character or list/data.frame. In such cases ordered grouping is applied to generate an integer time-id, but this should rather be avoided. #### *xts*/*zoo* Time Series *xts*/*zoo* time series are handled through `.zoo` methods to all relevant functions. These methods are simple and all follow this pattern: `FUN.zoo <- function(x, ...) if(is.matrix(x)) FUN.matrix(x, ...) else FUN.default(x, ....)`. Thus the general principles apply. Time-Series function do not automatically use the index for indexed computations, partly for consistency with native methods where this is also not the case (e.g. `lag.xts` does not perform an indexed lag), and partly because, as outlined above, the index does not necessarily accurately reflect the time structure. Thus the user must exercise discretion to perform an indexed lag on *xts*/*zoo*. For example: `flag(xts_daily, 1:3, t = index(xts_daily))` or `flag(xts_monthly, 1:3, t = zoo::as.yearmon(index(xts_monthly)))`. #### Support for *sf* and *units* *collapse* internally supports *sf* data frames by seeking to avoid their undue destruction through removal of the 'geometry' column in data manipulation operations. This is simply implemented through an additional check in the C programs used to subset columns of data: if the object is an *sf* data frame, the 'geometry' column is added to the column selection. Other functions like `funique()` or `roworder()` have internal facilities to avoid sorting or grouping on the 'geometry' column. Again other functions like `descr()` and `qsu()` simply omit the geometry column in their statistical calculations. A short [vignette](https://fastverse.org/collapse/articles/collapse_and_sf.html) describes the integration of *collapse* and *sf* in a bit more detail. In summary: *collapse* supports *sf* by seeking to appropriately deal with the 'geometry' column. It cannot perform geometrical operations. For example, after subsetting with `fsubset()`, the bounding box attribute of the geometry is unaltered and likely too large. To preserve *units* objects used in the *sf* ecosystem, all relevant functions also have simple methods of the form `FUN.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(FUN.matrix(x, ...), x) else FUN.default(x, ....)`. According to the general principles, the default method preserves the units class, whereas the matrix method does not if `FUN` aggregates the data. The use of `copyMostAttrib()`, which copies all attributes apart from `"dim"`, `"dimnames"`, and `"names"`, ensures that the returned objects are still *units*. #### Support for *data.table* *collapse* provides quite thorough support for *data.table*. The simplest level of support is that it avoids assigning descriptive (character) row names to *data.table*'s e.g. `fmean(mtcars, mtcars$cyl)` has row-names corresponding to the groups but `fmean(qDT(mtcars), mtcars$cyl)` does not. *collapse* further supports *data.table*'s reference semantics (`set*`, `:=`). To be able to add columns by reference (e.g. `DT[, new := 1]`), *data.table*'s are implemented as overallocated lists^[Notably, additional (hidden) column pointers are allocated to be able to add columns without taking a shallow copy of the *data.table*, and an `".internal.selfref"` attribute containing an external pointer is used to check if any shallow copy was made using base R commands like `<-`.]. *collapse* copied some C code from *data.table* to do the overallocation and generate the `".internal.selfref"` attribute, so that `qDT()` creates a valid and fully functional *data.table*. To enable seamless data manipulation combining *collapse* and *data.table*, all data manipulation functions in *collapse* call this C code at the end and return a valid (overallocated) *data.table*. However, because this overallocation comes at a computational cost of 2-3 microseconds, I have opted against also adding it to the `.data.frame` methods of statistical functions. Concretely, this means that `res <- DT |> fgroup_by(id) |> fsummarise(mu_a = fmean(a))` gives a fully functional *data.table* i.e. `res[, new := 1]` works, but `res2 <- DT |> fgroup_by(id) |> fmean()` gives a non-overallocated *data.table* such that `res2[, new := 1]` will still work but issue a warning. In this case, `res2 <- DT |> fgroup_by(id) |> fmean() |> qDT()` can be used to avoid the warning. This, to me, seems a reasonable trade-off between flexibility and performance. More details and examples are provided in the [*collapse* and *data.table* vignette](https://fastverse.org/collapse/articles/collapse_and_data.table.html). #### Class-Agnostic Grouped and Indexed Data Frames As indicated in the introductory remarks, *collapse* provides a fast [class-agnostic grouped data frame](https://fastverse.org/collapse/reference/GRP.html) created with `fgroup_by()`, and fast [class-agnostic indexed time series and panel data](https://fastverse.org/collapse/reference/indexing.html), created with `findex_by()`/`reindex()`. Class-agnostic means that the object that is grouped/indexed continues to behave as before except in *collapse* operations utilizing the 'groups'/'index_df' attributes. The grouped data frame is implemented as follows: `fgroup_by()` saves the class of the input data, calls `GRP()` on the columns being grouped, and attaches the resulting 'GRP' object in a `"groups"` attribute. It then assigns a class attribute as follows ```r clx <- class(.X) # .X is the data frame being grouped, clx is its class m <- match(c("GRP_df", "grouped_df", "data.frame"), clx, nomatch = 0L) class(.X) <- c("GRP_df", if(length(mp <- m[m != 0L])) clx[-mp] else clx, "grouped_df", if(m[3L]) "data.frame") ``` In words: a class `"GRP_df"` is added in front, followed by the classes of the original object^[Removing `c("GRP_df", "grouped_df", "data.frame")` if present to avoid duplicate classes and allowing grouped data to be re-grouped.], followed by `"grouped_df"` and finally `"data.frame"`, if present. The `"GRP_df"` class is for dealing appropriately with the object through methods for `print()` and subsetting (`[`, `[[`), e.g. `print.GRP_df` fetches the grouping object, prints `fungroup(.X)`^[Which reverses the changes of `fgroup_by()` so that the print method for the original object `.X` is called.], and then prints a summary of the grouping. `[.GRP_df` works similarly: it saves the groups, calls `[` on `fungroup(.X)`, and attaches the groups again if the result is a list with the same number of rows. So *collapse* has no issues printing and handling grouped *data.table*'s, *tibbles*, *sf* data frames, etc. - they continue to behave as usual. Now *collapse* has various functions with a `.grouped_df` method to deal with grouped data frames. For example `fmean.grouped_df`, in a nutshell, fetches the attached 'GRP' object using `GRP.grouped_df`, and calls `fmean.data.frame` on `fungroup(data)`, passing the 'GRP' object to the `g` argument for grouped computation. Here the general principles outlined above apply so that the resulting object has the same attributes as the input. This architecture has an additional advantage: it allows `GRP.grouped_df` to examine the grouping object and check if it was created by *collapse* (class 'GRP') or by *dplyr*. If the latter is the case, an efficient C routine is called to convert the *dplyr* grouping object to a 'GRP' object so that all `.grouped_df` methods in *collapse* apply to data frames created with either `dplyr::group_by()` or `fgroup_by()`. The *indexed_frame* works similarly. It inherits from *pdata.frame* so that `.pdata.frame` methods in *collapse* deal with both *indexed_frame*'s of arbitrary classes and *pdata.frame*'s created with *plm*. A notable difference to both *grouped_df* and *pdata.frame* is that *indexed_frame* is a deeply indexed data structure: each variable inside an *indexed_frame* is an *indexed_series* which contains in its *index_df* attribute an external pointer to the *index_df* attribute of the frame. Functions with *pseries* methods operating on *indexed_series* stored inside the frame (such as `with(data, flag(column))`) can fetch the index from this pointer. This allows worry-free application inside arbitrary data masking environments (`with`, `%$%`, `attach`, etc..) and estimation commands (`glm`, `feols`, `lmrob` etc..) without duplication of the index in memory. As you may have guessed, *indexed_series* are also class-agnostic and inherit from *pseries*. Any vector or matrix of any class can become an *indexed_series*. Further levels of generality are that indexed series and frames allow one, two or more variables in the index to support both time series and complex panels, natively deal with irregularity in time^[This is done through the creation of a time-factor in the *index_df* attribute whose levels represent time steps, i.e., the factor will have unused levels for gaps in time.], and provide a rich set of methods for subsetting and manipulation which also subset the *index_df* attribute, including internal methods for `fsubset()`, `funique()`, `roworder(v)` and `na_omit()`. So *indexed_frame* and *indexed_series* is a rich and general structure permitting fully time-aware computations on nearly any R object. See [`?indexing`](https://fastverse.org/collapse/reference/indexing.html) for more information. ## Conclusion *collapse* handles R objects in a preserving and fairly intelligent manner, allowing seamless compatibility with many common data classes in R, and statistical workflows that preserve attributes (labels, units, etc.) of the data. This is implemented through general principles and some specific considerations/exemptions mostly implemented in C - as detailed in this vignette. The main benefits of this design are generality and execution speed: *collapse* has much fewer R-level method dispatches and function calls than other frameworks used to perform statistical or data manipulation operations, it behaves predictably, and may also work well with your simple new class. The main disadvantage is that the general principles and exemptions are hard-coded in C and thus may not work with specific classes. A prominent example where *collapse* simply fails is *lubridate*'s *interval* class ([#186](https://github.com/fastverse/collapse/issues/186), [#418](https://github.com/fastverse/collapse/issues/418)), which has a `"starts"` attribute of the same length as the data that is preserved but not subset in *collapse* operations. collapse/inst/doc/developing_with_collapse.html0000644000176200001440000024746315202627535021564 0ustar liggesusers Developing with collapse

Developing with collapse

Or: How to Code Efficiently in R

Sebastian Krantz

2024-12-30

Introduction

collapse offers an integrated suite of C/C++-based statistical and data manipulation functions, many low-level tools for memory efficient programming, and a class-agnostic architecture that seamlessly supports vectors, matrices, and data frame-like objects. These features make it an ideal backend for high-performance statistical packages. This vignette is meant to provide some recommendations for developing with collapse. It is complementary to the earlier blog post on programming with collapse which readers are also encouraged to consult. The vignette adds 3 important points for writing efficient R/collapse code.

Point 1: Be Minimalistic in Computations

collapse supports different types of R objects (vectors, matrices, data frames + variants) and it can perform grouped operations on them using different types of grouping information (plain vectors, ‘qG’1 objects, factors, ‘GRP’ objects, grouped or indexed data frames). Grouping can be sorted or unsorted. A key for very efficient code is to use the minimal required operations/objects to get the job done.

Suppose you want to sum an object x by groups using a grouping vector g. If the grouping is only needed once, this should be done using the internal grouping of fsum() without creating external grouping objects - fsum(x, g) for aggregation and fsum(x, g, TRA = "fill") for expansion:

fmean(mtcars$mpg, mtcars$cyl)
#        4        6        8 
# 26.66364 19.74286 15.10000
fmean(mtcars$mpg, mtcars$cyl, TRA = "fill")
#  [1] 19.74286 19.74286 26.66364 19.74286 15.10000 19.74286 15.10000 26.66364 26.66364 19.74286
# [11] 19.74286 15.10000 15.10000 15.10000 15.10000 15.10000 15.10000 26.66364 26.66364 26.66364
# [21] 26.66364 15.10000 15.10000 15.10000 15.10000 26.66364 26.66364 26.66364 15.10000 19.74286
# [31] 15.10000 26.66364

The expansion case is very efficient because it internally uses unsorted grouping. Apart from the default sorted aggregation, these functions efficiently convert your input g into the minimally required information.

In the aggregation case, we can improve performance by also using unsorted grouping, e.g., fsum(x, qF(g, sort = FALSE)) or fsum(x, qG(g, sort = FALSE), use.g.names = FALSE) if the group-names are not needed. It is advisable to also set argument na.exclude = FALSE in qF()/qG() to add a class ‘na.included’ which precludes internal missing value checks in fsum() and friends. If g is a plain vector or the first-appearance order of groups should be kept even if g is a factor, use group(g) instead of qG(g, sort = FALSE, na.exclude = FALSE).2 Set use.g.names = FALSE if not needed (can abbreviate as use = FALSE), and, if your data has no missing values, set na.rm = FALSE for maximum performance.

x <- rnorm(1e7) # 10 million random obs
g <- sample.int(1e6, 1e7, TRUE) # 1 Million random groups
oldopts <- set_collapse(na.rm = FALSE) # No missing values: maximum performance
microbenchmark::microbenchmark(
  internal = fsum(x, g),
  internal_expand = fsum(x, g, TRA = "fill"),
  qF1 = fsum(x, qF(g, sort = FALSE)),
  qF2 = fsum(x, qF(g, sort = FALSE, na.exclude = FALSE)),
  qG1 = fsum(x, qG(g, sort = FALSE), use = FALSE),
  qG2 = fsum(x, qG(g, sort = FALSE, na.exclude = FALSE), use = FALSE),
  group = fsum(x, group(g), use = FALSE), # Same as above basically
  GRP1 = fsum(x, GRP(g)), 
  GRP2 = fsum(x, GRP(g, sort = FALSE)), 
  GRP3 = fsum(x, GRP(g, sort = FALSE, return.groups = FALSE), use = FALSE)
)
# Unit: milliseconds
#             expr       min        lq      mean    median        uq      max neval
#         internal 119.62078 124.61575 133.51499 129.24721 136.84295 187.9376   100
#  internal_expand  87.45751  93.53473 101.63398  97.34573 105.04102 195.5121   100
#              qF1  98.40816 101.62102 110.80120 105.03839 112.72224 265.5931   100
#              qF2  86.75518  89.82823 100.47122  93.89814 103.04776 194.9115   100
#              qG1  88.38563  92.44846 103.28242  97.29579 105.35159 202.8058   100
#              qG2  72.94851  76.86912  87.05558  79.43137  86.15307 262.4734   100
#            group  74.08335  77.19435  87.62058  82.58726  90.61506 162.0318   100
#             GRP1 145.13799 149.54178 163.89938 154.71379 164.11361 297.5056   100
#             GRP2  95.83557  99.05297 109.58577 103.34950 112.50322 266.9996   100
#             GRP3  82.56629  86.15699  97.54058  90.40781  98.05956 328.7744   100

Factors and ‘qG’ objects are efficient inputs to all statistical/transformation functions except for fmedian(), fnth(), fmode(), fndistinct(), and split-apply-combine operations using BY()/gsplit(). For repeated grouped operations involving those, it makes sense to create ‘GRP’ objects using GRP(). These objects are more expensive to create but provide more complete information.3 If sorting is not needed, set sort = FALSE, and if aggregation or the unique groups/names are not needed set return.groups = FALSE.

f <- qF(g); f2 <- qF(g, na.exclude = FALSE)
gg <- group(g) # Same as qG(g, sort = FALSE, na.exclude = FALSE)
grp <- GRP(g)
# Simple functions: factors are efficient inputs
microbenchmark::microbenchmark(
  factor = fsum(x, f),
  factor_nona = fsum(x, f2),
  qG_nona = fsum(x, gg),
  qG_nona_nonam = fsum(x, gg, use = FALSE),
  GRP = fsum(x, grp),
  GRP_nonam = fsum(x, grp, use = FALSE)
)
# Unit: milliseconds
#           expr      min       lq     mean   median       uq      max neval
#         factor 16.02514 16.49498 17.50705 17.11619 18.16497 21.72975   100
#    factor_nona 12.72911 13.15124 14.41943 13.87850 15.03540 23.27144   100
#        qG_nona 14.30178 14.95450 20.48179 15.67930 17.34989 57.15597   100
#  qG_nona_nonam 11.57118 12.00423 13.12157 12.49071 13.61801 23.31219   100
#            GRP 12.83345 13.08907 14.45512 13.95154 15.21594 21.46473   100
#      GRP_nonam 12.67589 13.22139 14.15271 13.76600 14.84057 20.36359   100

# Complex functions: more information helps
microbenchmark::microbenchmark(
  qG = fmedian(x, gg, use = FALSE),
  GRP = fmedian(x, grp, use = FALSE), times = 10)
# Unit: milliseconds
#  expr      min       lq     mean   median       uq      max neval
#    qG 258.4450 261.9357 267.2520 264.2608 267.4161 297.1552    10
#   GRP 191.8623 193.0631 196.0935 193.4358 194.6245 210.3685    10
set_collapse(oldopts)

Why not always use group() for unsorted grouping with simple functions? You can do that, but qF()/qG() are a bit smarter when it comes to handling input factors/‘qG’ objects whereas group() hashes every vector:

microbenchmark::microbenchmark(
  factor_factor = qF(f),
  # This checks NA's and adds 'na.included' class -> full deep copy
  factor_factor2 = qF(f, na.exclude = FALSE), 
  # NA checking costs.. incurred in fsum() and friends
  check_na = collapse:::is.nmfactor(f), 
  check_na2 = collapse:::is.nmfactor(f2),
  factor_qG = qF(gg),
  qG_factor = qG(f),
  qG_qG = qG(gg),
  group_factor = group(f),
  group_qG = group(gg)
)
# Unit: nanoseconds
#            expr      min         lq        mean     median         uq      max neval
#   factor_factor     1107     2562.5     6925.31     7298.0     9676.0    19270   100
#  factor_factor2  5926960  6147663.0  6898849.83  6235136.5  6421686.5 15325349   100
#        check_na  3440474  3503880.5  3525056.59  3513597.5  3524770.0  3927185   100
#       check_na2      287     1496.5     3325.10     3341.5     4243.5     9922   100
#       factor_qG     2583    11644.0    15105.63    15887.5    18614.0    31898   100
#       qG_factor     1927     4284.5    10171.28     9614.5    13796.5    50799   100
#           qG_qG     1476     2583.0     6674.39     6498.5     8897.0    23124   100
#    group_factor 16066629 16300165.0 17378151.76 16489011.0 16858872.0 54181582   100
#        group_qG 13824175 14194917.5 15083957.81 14347396.5 14700345.0 22289117   100

Only in rare cases are grouped/indexed data frames created with fgroup_by()/findex_by() needed in package code. Likewise, functions like fsummarise()/fmutate() are essentially wrappers. For example

mtcars |>
  fgroup_by(cyl, vs, am) |>
  fsummarise(mpg = fsum(mpg),
             across(c(carb, hp, qsec), fmean))
#   cyl vs am   mpg     carb        hp     qsec
# 1   4  0  1  26.0 2.000000  91.00000 16.70000
# 2   4  1  0  68.7 1.666667  84.66667 20.97000
# 3   4  1  1 198.6 1.428571  80.57143 18.70000
# 4   6  0  1  61.7 4.666667 131.66667 16.32667
# 5   6  1  0  76.5 2.500000 115.25000 19.21500
# 6   8  0  0 180.6 3.083333 194.16667 17.14250
# 7   8  0  1  30.8 6.000000 299.50000 14.55000

is the same as (again use = FALSE abbreviates use.g.names = FALSE)

g <- GRP(mtcars, c("cyl", "vs", "am"))

add_vars(g$groups,
  get_vars(mtcars, "mpg") |> fsum(g, use = FALSE),
  get_vars(mtcars, c("carb", "hp", "qsec")) |> fmean(g, use = FALSE)
)
#   cyl vs am   mpg     carb        hp     qsec
# 1   4  0  1  26.0 2.000000  91.00000 16.70000
# 2   4  1  0  68.7 1.666667  84.66667 20.97000
# 3   4  1  1 198.6 1.428571  80.57143 18.70000
# 4   6  0  1  61.7 4.666667 131.66667 16.32667
# 5   6  1  0  76.5 2.500000 115.25000 19.21500
# 6   8  0  0 180.6 3.083333 194.16667 17.14250
# 7   8  0  1  30.8 6.000000 299.50000 14.55000

To be clear: nothing prevents you from using these wrappers - they are quite efficient - but if you want to change all inputs programmatically it makes sense to go down one level - your code will also become safer.4

In general, think carefully about how to vectorize in a minimalistic and memory efficient way. You will find that you can craft very parsimonious and efficient code to solve complicated problems.

For example, after merging multiple spatial datasets, I had some of the same map features (businesses) from multiple sources, and, unwilling to match features individually across data sources, I decided to keep the richest source covering each feature type and location. After creating a feature importance indicator comparable across sources, the deduplication expression ended up being a single line of the form: fsubset(data, source == fmode(source, list(location, type), importance, "fill")) - keep features from the importance-weighted most frequent source by location and type.

If an effective collapse solution is not apparent, other packages may offer efficient solutions. Check out the fastverse and its suggested packages list. For example if you want to efficiently replace multiple items in a vector, kit::vswitch()/nswitch() can be pretty magical. Also functions like data.table::set()/rowid() etc. are great.

Point 2: Think About Memory and Optimize

R programs are inefficient for 2 principal reasons: (1) operations are not vectorized; (2) too many intermediate objects/copies are created. collapse’s vectorized statistical functions help with (1), but it also provides many efficient programming functions to deal with (2).

One source of inefficiency in R code is the widespread use of logical vectors. For example

x <- abs(round(rnorm(1e6)))
x[x == 0] <- NA

where x == 0 creates a logical vector of 1 million elements just to indicate to R which elements of x are 0. In collapse, setv(x, 0, NA) is the efficient equivalent. This also works if we don’t want to replace with NA but with another vector y:

y <- rnorm(1e6)
setv(x, NA, y) # Replaces missing x with y

is much better than

x[is.na(x)] <- y[is.na(x)]

setv() is quite versatile and also works with indices and logical vectors instead of elements to search for. You can also invert the query by setting invert = TRUE.

In more complex workflows, we may wish to save the logical vector, e.g., xmiss <- is.na(x), and use it repeatedly. One aspect to note here is that logical vectors are inefficient for subsetting compared to indices:

xNA <- na_insert(x, prop = 0.4)
xmiss <- is.na(xNA)
ind <- which(xmiss)
bench::mark(x[xmiss], x[ind])
# # A tibble: 2 × 6
#   expression      min   median `itr/sec` mem_alloc `gc/sec`
#   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
# 1 x[xmiss]     3.34ms   3.58ms      269.    8.39MB     4.21
# 2 x[ind]     771.74µs 972.11µs     1025.    3.05MB     6.61

Thus, indices are always preferable. With collapse, they can be created directly using whichNA(xNA) in this case, or whichv(x, 0) for which(x == 0) or any other number. Also here there exist an invert = TRUE argument covering the != case. For convenience, infix operators x %==% 0 and x %!=% 0 wrap whichv(x, 0) and whichv(x, 0, invert = TRUE), respectively.

Similarly, fmatch() supports faster matching with associated operators %iin% and %!iin% which also return indices, e.g., letters %iin% c("a", "b") returns 1:2. This can also be used in subsetting:

bench::mark(
  `%in%` = fsubset(wlddev, iso3c %in% c("USA", "DEU", "ITA", "GBR")),
  `%iin%` = fsubset(wlddev, iso3c %iin% c("USA", "DEU", "ITA", "GBR"))
)
# # A tibble: 2 × 6
#   expression      min   median `itr/sec` mem_alloc `gc/sec`
#   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
# 1 %in%        146.8µs  165.7µs     6008.     3.8MB     2.12
# 2 %iin%        17.3µs   23.6µs    39878.   130.4KB    23.9

Likewise, anyNA(), allNA(), anyv() and allv() help avoid expressions like any(x == 0) in favor of anyv(x, 0). Other convenience functions exist such as na_rm(x) for the common x[!is.na(x)] expression which is extremely inefficient.

Another hint here particularly for data frame subsetting is the ss() function, which has an argument check = FALSE to avoid checks on indices (small effect with this data size):

ind <- wlddev$iso3c %!iin% c("USA", "DEU", "ITA", "GBR")
microbenchmark::microbenchmark(
  withcheck = ss(wlddev, ind),
  nocheck = ss(wlddev, ind, check = FALSE)
)
# Unit: microseconds
#       expr    min       lq     mean   median       uq     max neval
#  withcheck 48.749 106.6615 124.4366 122.1595 143.8895 256.619   100
#    nocheck 47.355 105.5750 126.9225 119.6380 150.8595 344.113   100

Another common source of inefficiencies is copies produced in statistical operations. For example

x <- rnorm(100); y <- rnorm(100); z <- rnorm(100)
res <- x + y + z # Creates 2 copies

For this particular case res <- kit::psum(x, y, z) offers an efficient solution5. A more general solution is

res <- x + y
res %+=% z

collapse’s %+=%, %-=%, %*=% and %/=% operators are wrappers around the setop() function which also works with matrices and data frames.6 This function also has a rowwise argument for operations between vectors and matrix/data.frame rows:

m <- qM(mtcars)
setop(m, "*", seq_col(m), rowwise = TRUE)
head(m / qM(mtcars))
#                   mpg cyl disp hp drat wt qsec  vs  am gear carb
# Mazda RX4           1   2    3  4    5  6    7 NaN   9   10   11
# Mazda RX4 Wag       1   2    3  4    5  6    7 NaN   9   10   11
# Datsun 710          1   2    3  4    5  6    7   8   9   10   11
# Hornet 4 Drive      1   2    3  4    5  6    7   8 NaN   10   11
# Hornet Sportabout   1   2    3  4    5  6    7 NaN NaN   10   11
# Valiant             1   2    3  4    5  6    7   8 NaN   10   11

Some functions like na_locf()/na_focb() also have set = TRUE arguments to perform operations by reference.7 There is also setTRA() for (grouped) transformations by reference, wrapping TRA(..., set = TRUE). Since TRA is added as an argument to all Fast Statistical Functions, set = TRUE can be passed down to modify by reference. For example:

fmedian(iris$Sepal.Length, iris$Species, TRA = "fill", set = TRUE)

Is the same as setTRA(iris$Sepal.Length, fmedian(iris$Sepal.Length, iris$Species), "fill", iris$Species), replacing the values of the Sepal.Length vector with its species median by reference:

head(iris)
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1            5         3.5          1.4         0.2  setosa
# 2            5         3.0          1.4         0.2  setosa
# 3            5         3.2          1.3         0.2  setosa
# 4            5         3.1          1.5         0.2  setosa
# 5            5         3.6          1.4         0.2  setosa
# 6            5         3.9          1.7         0.4  setosa

This set argument can be invoked anywhere, also inside fmutate() calls with/without groups. This can also be done in combination with other transformations (sweeping operations). For example, the following turns the columns of the matrix into proportions.

fsum(m, TRA = "/", set = TRUE)
fsum(m) # Check
#  mpg  cyl disp   hp drat   wt qsec   vs   am gear carb 
#    1    1    1    1    1    1    1    1    1    1    1

In summary, think what is really needed to complete a task and keep things to a minimum in terms of both computations and memory. Let’s do a final exercise in this regard and create a hyper-efficient function for univariate linear regression by groups:

greg <- function(y, x, g) {
  g <- group(g)
  dmx <- fmean(x, g, TRA = "-", na.rm = FALSE)
  (fsum(y, g, dmx, use = FALSE, na.rm = FALSE) %/=%
   fsum(dmx, g, dmx, use = FALSE, na.rm = FALSE))
}

# Test
y <- rnorm(1e7)
x <- rnorm(1e7)
g <- sample.int(1e6, 1e7, TRUE)

microbenchmark::microbenchmark(greg(y, x, g), group(g))
# Unit: milliseconds
#           expr       min        lq     mean    median        uq      max neval
#  greg(y, x, g) 131.39639 138.68961 153.1586 145.78243 161.48137 305.5862   100
#       group(g)  62.41733  64.80468  72.2558  68.87266  73.21657 153.1643   100

The expression computed by greg() amounts to sum(y * (x - mean(x)))/sum((x - mean(x))^2) for each group, which is equivalent to cov(x, y)/var(x), but very efficient, requiring exactly one full copy of x to create a group-demeaned vector, dmx, and then using the w (weights) argument to fsum() to sum the products (y * dmx and dmx * dmx) on the fly, including a division by reference avoiding an additional copy. One cannot do much better coding a grouped regression directly in C.

Point 3: Internally Favor Primitive R Objects and Functions

This partly reiterates Point 1 but now with a focus on internal data representation rather than grouping and computing. The point could also be bluntly stated as: ‘vectors, matrices and lists are good, data frames and complex objects are bad’.

Many frameworks seem to imply the opposite - the tidyverse encourages you to cast your data as a tidy tibble, and data.table offers you a more efficient data frame. But these objects are internally complex, and, in the case of data.table, only efficient because of the internal C-level algorithms for large-data manipulation. You should always take a step back to ask yourself: for the statistical software I am writing, do I need this complexity? Complex objects require complex methods to manipulate them, thus, when using them, you incur the cost of everything that goes on in these methods. Vectors, matrices, and lists are much more efficient in R and collapse provides you with many options to manipulate them directly.

It may surprise you to hear that, internally, collapse does not use data frame-like objects at all. Instead, such objects are cast to lists using unclass(data), class(data) <- NULL, or attributes(data) <- NULL. This is advisable if you want to write fast package code for data frame-like objects.

The benchmark below illustrates that basically everything you do on a data.frame is more expensive than on the equivalent list.

l <- unclass(mtcars)
nam <- names(mtcars)
microbenchmark::microbenchmark(names(mtcars), attr(mtcars, "names"), names(l),
               names(mtcars) <- nam, attr(mtcars, "names") <- nam, names(l) <- nam,
               mtcars[["mpg"]], .subset2(mtcars, "mpg"), l[["mpg"]],
               mtcars[3:8], .subset(mtcars, 3:8), l[3:8],
               ncol(mtcars), length(mtcars), length(unclass(mtcars)), length(l),
               nrow(mtcars), length(.subset2(mtcars, 1L)), length(l[[1L]]))
# Unit: nanoseconds
#                          expr  min   lq    mean median     uq   max neval
#                 names(mtcars)  164  205  240.26    246  246.0   410   100
#         attr(mtcars, "names")   41   82  109.88     82  123.0  1476   100
#                      names(l)    0    0   24.60     41   41.0    82   100
#          names(mtcars) <- nam  451  492  651.90    656  697.0  3321   100
#  attr(mtcars, "names") <- nam  287  369  480.52    451  492.0  4346   100
#               names(l) <- nam  164  246  276.34    246  287.0   533   100
#               mtcars[["mpg"]] 2009 2091 2363.65   2173 2296.0 15539   100
#       .subset2(mtcars, "mpg")   41   41   68.88     82   82.0   164   100
#                    l[["mpg"]]   41   82   78.31     82   82.0   205   100
#                   mtcars[3:8] 5166 5371 5607.98   5453 5576.0 15908   100
#          .subset(mtcars, 3:8)  246  246  321.03    287  328.0  2788   100
#                        l[3:8]  246  287  305.45    287  328.0   492   100
#                  ncol(mtcars) 1025 1107 1200.07   1189 1230.0  2255   100
#                length(mtcars)  164  205  249.28    246  266.5   492   100
#       length(unclass(mtcars))  123  164  176.71    164  164.0   861   100
#                     length(l)    0    0   18.86      0   41.0   287   100
#                  nrow(mtcars) 1025 1107 1239.84   1148 1230.0  6642   100
#  length(.subset2(mtcars, 1L))   41   82  113.57     82  123.0  1845   100
#               length(l[[1L]])   41   82  100.45     82  123.0   492   100

By means of further illustration, let’s recreate the pwnobs() function in collapse which counts pairwise missing values. The list method is written in R. A basic implementation is:8

pwnobs_list <- function(X) {
    dg <- fnobs(X)
    n <- ncol(X)
    nr <- nrow(X)
    N.mat <- diag(dg)
    for (i in 1:(n - 1L)) {
        miss <- is.na(X[[i]])
        for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - sum(miss | is.na(X[[j]]))
    }
    rownames(N.mat) <- names(dg)
    colnames(N.mat) <- names(dg)
    N.mat
}

mtcNA <- na_insert(mtcars, prop = 0.2)
pwnobs_list(mtcNA)
#      mpg cyl disp hp drat wt qsec vs am gear carb
# mpg   26  20   20 20   20 20   21 22 21   21   22
# cyl   20  26   21 20   22 21   22 22 22   23   20
# disp  20  21   26 22   22 23   22 22 21   21   22
# hp    20  20   22 26   21 23   22 20 20   21   21
# drat  20  22   22 21   26 23   21 21 20   21   21
# wt    20  21   23 23   23 26   22 21 21   20   20
# qsec  21  22   22 22   21 22   26 22 20   22   20
# vs    22  22   22 20   21 21   22 26 20   23   21
# am    21  22   21 20   20 21   20 20 26   20   21
# gear  21  23   21 21   21 20   22 23 20   26   20
# carb  22  20   22 21   21 20   20 21 21   20   26

Now with the above tips we can optimize this as follows:

pwnobs_list_opt <- function(X) {
    dg <- fnobs.data.frame(X)
    class(X) <- NULL
    n <- length(X)
    nr <- length(X[[1L]])
    N.mat <- diag(dg)
    for (i in 1:(n - 1L)) {
        miss <- is.na(X[[i]])
        for (j in (i + 1L):n) N.mat[i, j] <- N.mat[j, i] <- nr - sum(miss | is.na(X[[j]]))
    }
    dimnames(N.mat) <- list(names(dg), names(dg))
    N.mat
}

identical(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA))
# [1] TRUE

microbenchmark::microbenchmark(pwnobs_list(mtcNA), pwnobs_list_opt(mtcNA))
# Unit: microseconds
#                    expr     min       lq      mean  median      uq     max neval
#      pwnobs_list(mtcNA) 153.217 160.1255 185.09696 179.744 215.004 241.654   100
#  pwnobs_list_opt(mtcNA)  27.429  31.1600  33.38507  32.964  35.137  45.387   100

Evidently, the optimized function is 6x faster on this (small) dataset and we have changed nothing to the loops doing the computation. With larger data the difference is less stark, but you never know what’s going on in methods you have not written and how they scale. My advice is: try to avoid them, use simple objects and take full control over your code. This also makes your code more robust and you can create class-agnostic code. If the latter is your intent the vignette on collapse’s object handling will also be helpful.

If you only use collapse functions this discussion is void - all collapse functions designed for data frames, including join(), pivot(), fsubset(), etc., internally handle your data as a list and are equally efficient on data frames and lists. However, if you want to use base R semantics ([, etc.) alongside collapse and other functions, it makes sense to unclass incoming data frame-like objects and reclass them at the end.

If you don’t want to internally convert data frames to lists, at least use functions .subset(), .subset2(), or collapse::get_vars() to efficiently extract columns and attr() to extract/set attributes. With matrices, use dimnames() directly instead of rownames() and colnames() which wrap it.

Also avoid as.data.frame() and friends to coerce/recreate data frame-like objects. It is quite easy to construct a data.frame from a list:

attr(l, "row.names") <- .set_row_names(length(l[[1L]]))
class(l) <- "data.frame"
head(l, 2)
#   mpg cyl disp  hp drat    wt  qsec vs am gear carb
# 1  21   6  160 110  3.9 2.620 16.46  0  1    4    4
# 2  21   6  160 110  3.9 2.875 17.02  0  1    4    4

You can also use collapse functions qDF(), qDT() and qTBL() to efficiently convert/create data.frame’s, data.table’s, and tibble’s:

library(data.table)
library(tibble)
microbenchmark::microbenchmark(qDT(mtcars), as.data.table(mtcars),
                               qTBL(mtcars), as_tibble(mtcars))
# Unit: microseconds
#                   expr    min     lq     mean  median      uq      max neval
#            qDT(mtcars)  2.952  3.280  6.35705  3.5670  3.8130  269.534   100
#  as.data.table(mtcars) 34.194 36.572 44.93641 37.4535 39.2985  697.410   100
#           qTBL(mtcars)  2.419  2.583  3.19267  2.8700  2.9930   38.704   100
#      as_tibble(mtcars) 48.257 49.569 71.56304 50.4095 52.5005 2050.533   100

l <- unclass(mtcars)
microbenchmark::microbenchmark(qDF(l), as.data.frame(l), as.data.table(l), as_tibble(l))
# Unit: microseconds
#              expr     min       lq      mean   median      uq     max neval
#            qDF(l)   1.722   2.2140   4.51779   2.4600   2.747 199.424   100
#  as.data.frame(l) 210.412 225.1515 242.65973 248.3370 254.569 301.186   100
#  as.data.table(l)  70.889  77.2030  90.30086  83.0045  88.683 798.393   100
#      as_tibble(l)  55.350  61.8690  68.20924  67.0760  72.898 139.769   100

collapse also provides functions like setattrib(), copyMostAttrib(), etc., to efficiently attach attributes again. So another efficient workflow for general data frame-like objects is to save the attributes ax <- attributes(data), manipulate it as a list attributes(data) <- NULL, modify ax$names and ax$row.names as needed and then use setattrib(data, ax) before returning.

Some Notes on Global Options

collapse has its own set of global options which can be set using set_collapse() and retrieved using get_collapse().9 This confers responsibilities upon package developers as setting these options inside a package also affects how collapse behaves outside of your package.

In general, the same rules apply as for setting other R options through options() or par(): they need to be reset using on.exit() so that the user choices are unaffected even if your package function breaks. For example, if you want a block of code multithreaded and without missing value skipping for maximum performance:

fast_function <- function(x, ...) {
  
  # Your code...

  oldopts <- set_collapse(nthreads = 4, na.rm = FALSE)
  on.exit(set_collapse(oldopts)) 
  
  # Multithreaded code...
}

Namespace masking (options mask and remove) should not be set inside packages because it may have unintended side-effects for the user (e.g., collapse appears at the top of the search() path afterwards).

Conversely, user choices in set_collapse() also affect your package code, except for namespace masking as you should specify explicitly which collapse functions you are using (e.g., via importFrom("collapse", "fmean") in NAMESPACE or collapse::fmean() in your code).

Particularly options na.rm, nthreads, and sort, if set by the user, will impact your code, unless you explicitly set the targeted arguments (e.g., nthreads and na.rm in statistical functions like fmean(), and sort arguments in grouping functions like GRP()/qF()/qG()/fgroup_by()).

My general view is that this is not necessary - if the user sets set_collapse(na.rm = FALSE) because data has no missing values, then it is good if that also speeds up your package functions. However, if your package code generates missing values and expects collapse functions to skip them you should take care of this using either set_collapse() + on.exit() or explicitly setting na.rm = TRUE in all relevant functions.

Also watch out for internally-grouped aggregations using Fast Statistical Functions, which are affected by global defaults:

fmean(mtcars$mpg, mtcars$cyl)
#        4        6        8 
# 26.66364 19.74286 15.10000
oldopts <- set_collapse(sort = FALSE)
fmean(mtcars$mpg, mtcars$cyl)
#        6        4        8 
# 19.74286 26.66364 15.10000

Statistical functions do not have sort arguments, thus, if it is crucial that the output remains sorted, ensure that a sorted factor, ‘qG’, or ‘GRP’ object is passed:

fmean(mtcars$mpg, qF(mtcars$cyl, sort = TRUE))
#        4        6        8 
# 26.66364 19.74286 15.10000
set_collapse(oldopts)

Of course, you can also check which options the user has set and adjust your code, e.g. 

# Your code ...
if(!get_collapse("sort")) {
  oldopts <- set_collapse(sort = TRUE)
  on.exit(set_collapse(oldopts)) 
}
# Critical code ...

Conclusion

collapse can become a game-changer for your statistical software development in R, enabling you to write programs that effectively run like C while accomplishing complex statistical/data tasks with few lines of code. This however requires taking a closer look at the package, in particular the documentation, and following the advice given in this vignette.


  1. Alias for quick-group.↩︎

  2. group() directly calls a C-based hashing algorithm which works for all types of vectors and lists of vectors/data frames. Missing values are treated as distinct elements.↩︎

  3. See ?GRP, in particular the ‘Value’ section.↩︎

  4. If you do use fgroup_by() in a package use it with non-standard evaluation, e.g., fgroup_by(cyl, vs, am). Don’t do ind <- c("cyl", "vs", "am") and then fgroup_by(ind) as the data may contain a column called ind. For such cases use group_by_vars(ind).↩︎

  5. In general, also see other packages, in particular kit and data.table for useful programming functions.↩︎

  6. Note that infix operators do not obey the rules of arithmetic but are always evaluated from left to right.↩︎

  7. Note that na_locf()/na_focb() are not vectorized across groups, thus, if using them in a grouped fmutate() call, adding set = TRUE will save some memory on intermediate objects.↩︎

  8. By Point 2 this implementation is not ideal because I am creating two logical vectors for each iteration of the inner loop, but I currently don’t see any way to write this more efficiently.↩︎

  9. This is done mainly for efficiency reasons, but also do implement advanced options such as namespace masking (options mask and remove). The options are stored in an internal environment called .op visible in the documentation of some functions such as fmean() when used to set argument defaults.↩︎

collapse/inst/doc/collapse_for_tidyverse_users.Rmd0000644000176200001440000003762015121640707022244 0ustar liggesusers--- title: "collapse for tidyverse Users" author: "Sebastian Krantz" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse for tidyverse Users} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{css, echo=FALSE} pre { max-height: 500px; overflow-y: auto; } pre[class] { max-height: 500px; } ``` ```{r, echo=FALSE} oldopts <- options(width = 100L) ``` ```{r, echo = FALSE, message = FALSE, warning=FALSE} knitr::opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE, comment = "#", tidy = FALSE, cache = TRUE, collapse = TRUE, fig.width = 8, fig.height = 5, out.width = '100%') ``` *collapse* is a C/C++ based package for data transformation and statistical computing in R that aims to enable greater performance and statistical complexity in data manipulation tasks and offers a stable, class-agnostic, and lightweight API. It is part of the core [*fastverse*](https://fastverse.org/fastverse/), a suite of lightweight packages with similar objectives. The [*tidyverse*](https://tidyverse.org/) set of packages provides a rich, expressive, and consistent syntax for data manipulation in R centering on the *tibble* object and tidy data principles (each observation is a row, each variable is a column). *collapse* fully supports the *tibble* object and provides many *tidyverse*-like functions for data manipulation. It can thus be used to write *tidyverse*-like data manipulation code that, thanks to low-level vectorization of many statistical operations and optimized R code, typically runs much faster than native *tidyverse* code, in addition to being much more lightweight in dependencies. Its aim is not to create a faster *tidyverse*, i.e., it does not implements all aspects of the rich *tidyverse* grammar or changes to it^[Notably, tidyselect, lambda expressions, and many of the smaller helper functions are left out.], and also takes inspiration from other leading data manipulation libraries to serve broad aims of performance, parsimony, complexity, and robustness in data manipulation for R. ## Namespace and Global Options *collapse* data manipulation functions familiar to *tidyverse* users include `fselect`, `fgroup_by`, `fsummarise`, `fmutate`, `across`, `frename`, `fslice`, and `fcount`. Other functions like `fsubset`, `ftransform`, and `get_vars` are inspired by base R, while again other functions like `join`, `pivot`, `roworder`, `colorder`, `rowbind`, etc. are inspired by other data manipulation libraries such as *data.table* and *polars*. By virtue of the f- prefixes, the *collapse* namespace has no conflicts with the *tidyverse*, and these functions can easily be substituted in a *tidyverse* workflow. R users willing to replace the *tidyverse* have the additional option to mask functions and eliminate the prefixes with `set_collapse`. For example ```{r} library(collapse) set_collapse(mask = "manip") # version >= 2.0.0 ``` makes available functions `select`, `group_by`, `summarise`, `mutate`, `rename`, `count`, `subset`, `slice`, and `transform` in the *collapse* namespace and detaches and re-attaches the package, such that the following code is executed by *collapse*: ```{r} mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), mean), qsec_wt = weighted.mean(qsec, wt)) ``` *Note* that the correct documentation still needs to be called with prefixes, i.e., `?fsubset`. See `?set_collapse` for further options to the package, which also includes optimization options such as `nthreads`, `na.rm`, `sort`, and `stable.algo`. *Note* also that if you use *collapse*'s namespace masking, you can use `fastverse::fastverse_conflicts()` to check for namespace conflicts with other packages. ## Using the *Fast Statistical Functions* A key feature of *collapse* is that it not only provides functions for data manipulation, but also a full set of statistical functions and algorithms to speed up statistical calculations and perform more complex statistical operations (e.g. involving weights or time series data). Notably among these, the [*Fast Statistical Functions*](https://fastverse.org/collapse/reference/fast-statistical-functions.html) is a consistent set of S3-generic statistical functions providing fully vectorized statistical operations in R. Specifically, operations such as calculating the mean via the S3 generic `fmean()` function are vectorized across columns and groups and may also involve weights or transformations of the original data: ```{r} fmean(mtcars$mpg) # Vector fmean(EuStockMarkets) # Matrix fmean(mtcars) # Data Frame fmean(mtcars$mpg, w = mtcars$wt) # Weighted mean fmean(mtcars$mpg, g = mtcars$cyl) # Grouped mean fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt) # Weighted group mean fmean(mtcars[5:10], g = mtcars$cyl, w = mtcars$wt) # Of data frame fmean(mtcars$mpg, g = mtcars$cyl, w = mtcars$wt, TRA = "fill") # Replace data by weighted group mean # etc... ``` The data manipulation functions of *collapse* are integrated with these *Fast Statistical Functions* to enable vectorized statistical operations. For example, the following code ```{r} mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), fmean), qsec_wt = fmean(qsec, wt)) ``` gives exactly the same result as above, but the execution is much faster (especially on larger data), because with *Fast Statistical Functions*, the data does not need to be split by groups, and there is no need to call `lapply()` inside the `across()` statement: `fmean.data.frame()` is simply applied to a subset of the data containing columns `mpg`, `carb` and `hp`. The *Fast Statistical Functions* also have a method for grouped data, so if we did not want to calculate the weighted mean of `qsec`, the code would simplify as follows: ```{r} mtcars |> subset(mpg > 11) |> group_by(cyl, vs, am) |> select(mpg, carb, hp) |> fmean() ``` Note that all functions in *collapse*, including the *Fast Statistical Functions*, have the default `na.rm = TRUE`, i.e., missing values are skipped in calculations. This can be changed using `set_collapse(na.rm = FALSE)` to give behavior more consistent with base R. Another thing to be aware of when using *Fast Statistical Functions* inside data manipulation functions is that they toggle vectorized execution wherever they are used. E.g. ```{r} mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + min(qsec)) # Vectorized ``` calculates a grouped mean of `mpg` but adds the overall minimum of `qsec` to the result, i.e., it is equivalent to `fmean(mpg, g = cyl) + min(qsec)`. On the other hand ```{r} mtcars |> group_by(cyl) |> summarise(mpg = fmean(mpg) + fmin(qsec)) # Vectorized mtcars |> group_by(cyl) |> summarise(mpg = mean(mpg) + min(qsec)) # Not vectorized ``` both give the mean + the minimum within each group, but calculated in different ways: the former is equivalent to `fmean(mpg, g = cyl) + fmin(qsec, g = cyl)`, whereas the latter is equal to `sapply(gsplit(mpg, cyl), function(x) mean(x) + min(x))`. See `?fsummarise` and `?fmutate` for more detailed examples. This *eager vectorization* approach is intentional as it allows users to vectorize complex expressions and fall back to base R if this is not desired. [This blog post](https://andrewghazi.github.io/posts/collapse_is_sick/sick.html) by Andrew Ghazi provides an excellent example of computing a p-value test statistic by groups. *Note* that only expressions typed out can be vectorized; expressions inside functions such as `mean_plus_min <- function(x) fmean(x) + fmin(x)` are not vectorized.^[*collapse* can only read what you type, e.g. `exp <- substitute(fmean(mpg) + min(mpg))`, then `all_funs(exp)` gives `c("+", "fmean", "min")`, and `any(all_funs(exp) %in% .FAST_STAT_FUN)` returns `TRUE`, signifying to `fsummarise()` that the expression should be executed only once with the grouping object passed to the `g` argument of `fmean()`, instead of it being executed once for every group.] To take full advantage of *collapse*, it is thus highly recommended to use the *Fast Statistical Functions* as much as possible. ## Writing Efficient Code It is also performance-critical to correctly sequence operations and limit excess computations. *tidyverse* code is often inefficient simply because the *tidyverse* allows you to do everything. For example, `mtcars |> group_by(cyl) |> filter(mpg > 13) |> arrange(mpg)` is permissible but inefficient code as it filters and reorders grouped data, requiring modifications to both the data frame and the attached grouping object. *collapse* does not allow calls to `fsubset()` on grouped data, and messages about it in `roworder()`, encouraging you to write more efficient code. The above example can also be optimized because we are subsetting the whole frame and then doing computations on a subset of columns. It would be more efficient to select all required columns during the subset operation: ```{r} mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp, qsec, wt) |> group_by(cyl, vs, am) |> summarise(across(c(mpg, carb, hp), fmean), qsec_wt = fmean(qsec, wt)) ``` Without the weighted mean of `qsec`, this would simplify to ```{r} mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |> group_by(cyl, vs, am) |> fmean() ``` Finally, we could set the following options to toggle unsorted grouping, no missing value skipping, and multithreading across the three columns for more efficient execution. ```{r} mtcars |> subset(mpg > 11, cyl, vs, am, mpg, carb, hp) |> group_by(cyl, vs, am, sort = FALSE) |> fmean(nthreads = 3, na.rm = FALSE) ``` Setting these options globally using `set_collapse(sort = FALSE, nthreads = 3, na.rm = FALSE)` avoids the need to set them repeatedly. ### Using Internal Grouping Another key to writing efficient code with *collapse* is to avoid `fgroup_by()` where possible, especially for mutate operations. *collapse* does not implement `.by` arguments to manipulation functions like *dplyr*, but instead allows ad-hoc grouped transformations through its statistical functions. For example, the easiest and fastest way to computed the median of `mpg` by `cyl`, `vs`, and `am` is ```{r} mtcars |> mutate(mpg_median = fmedian(mpg, list(cyl, vs, am), TRA = "fill")) |> head(3) ``` For the common case of averaging and centering data, *collapse* also provides functions `fbetween()` for averaging and `fwithin()` for centering, i.e., `fbetween(mpg, list(cyl, vs, am))` is the same as `fmean(mpg, list(cyl, vs, am), TRA = "fill")`. There is also `fscale()` for (grouped) scaling and centering. This also applies to multiple columns, where we can use `fmutate(across(...))` or `ftransformv()`, i.e. ```{r} mtcars |> mutate(across(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill")) |> head(2) # Or mtcars |> transformv(c(mpg, disp, qsec), fmedian, list(cyl, vs, am), TRA = "fill") |> head(2) ``` Of course, if we want to apply different functions using the same grouping, `fgroup_by()` is sensible, but for mutate operations it also has the argument `return.groups = FALSE`, which avoids materializing the unique grouping columns, saving some memory. ```{r} mtcars |> group_by(cyl, vs, am, return.groups = FALSE) |> mutate(mpg_median = fmedian(mpg), mpg_mean = fmean(mpg), # Or fbetween(mpg) mpg_demean = fwithin(mpg), # Or fmean(mpg, TRA = "-") mpg_scale = fscale(mpg), .keep = "used") |> ungroup() |> head(3) ``` The `TRA` argument supports a whole array of operations, see `?TRA`. For example `fsum(mtcars, TRA = "/")` turns the column vectors into proportions. As an application of this, consider a generated dataset of sector-level exports. ```{r, include = FALSE} set.seed(101) ``` ```{r} # c = country, s = sector, y = year, v = value exports <- expand.grid(c = paste0("c", 1:8), s = paste0("s", 1:8), y = 1:15) |> mutate(v = round(abs(rnorm(length(c), mean = 5)), 2)) |> subset(-sample.int(length(v), 360)) # Making it unbalanced and irregular head(exports) nrow(exports) ``` It is very easy then to compute Balassa's (1965) Revealed Comparative Advantage (RCA) index, which is the share of a sector in country exports divided by the share of the sector in world exports. An index above 1 indicates that a RCA of country c in sector s. ```{r} # Computing Balassa's (1965) RCA index: fast and memory efficient # settfm() modifies exports and assigns it back to the global environment settfm(exports, RCA = fsum(v, list(c, y), TRA = "/") %/=% fsum(fsum(v, y, TRA = "/"), list(s, y), TRA = "fill", set = TRUE)) ``` Note that this involved a single expression with two different grouped operations, which is only possible by incorporating grouping into statistical functions themselves. Let's summarise this dataset using `pivot()` to aggregate the RCA index across years. Here `"mean"` calls a highly efficient internal mean function. ```{r} pivot(exports, ids = "c", values = "RCA", names = "s", how = "wider", FUN = "mean", sort = TRUE) ``` We may also wish to investigate the growth rate of RCA. This can be done using `fgrowth()`. Since the panel is irregular, i.e., not every sector is observed in every year, it is critical to also supply the time variable. ```{r} exports |> mutate(RCA_growth = fgrowth(RCA, g = list(c, s), t = y)) |> pivot(ids = "c", values = "RCA_growth", names = "s", how = "wider", FUN = fmedian, sort = TRUE) ``` Lastly, since the panel is unbalanced, we may wish to create an RCA index for only the last year, but balance the dataset a bit more by taking the last available trade within the last three years. This can be done using a single subset call ```{r} # Taking the latest observation within the last 3 years exports_latest <- subset(exports, y > 12 & y == fmax(y, list(c, s), "fill"), -y) # How many sectors do we observe for each country in the last 3 years? with(exports_latest, fndistinct(s, c)) ``` We can then compute the RCA index on this data ```{r} exports_latest |> mutate(RCA = fsum(v, c, TRA = "/") %/=% fsum(proportions(v), s, TRA = "fill")) |> pivot("c", "RCA", "s", how = "wider", sort = TRUE) ``` To summarise, *collapse* provides many options for ad-hoc or limited grouping, which are faster than a full `fgroup_by()`, and also syntactically efficient. Further efficiency gains are possible using operations by reference, e.g., `%/=%` instead of `/` to avoid an intermediate copy. It is also possible to transform by reference using fast statistical functions by passing the `set = TRUE` argument, e.g., `with(mtcars, fmean(mpg, cyl, TRA = "fill", set = TRUE))` replaces `mpg` by its group-averaged version (the transformed vector is returned invisibly). ## Conclusion *collapse* enhances R both statistically and computationally and is a good option for *tidyverse* users searching for more efficient and lightweight solutions to data manipulation and statistical computing problems in R. For more information, I recommend starting with the short vignette on [*Documentation Resources*](https://fastverse.org/collapse/articles/collapse_documentation.html). R users willing to write efficient/lightweight code and completely replace the *tidyverse* in their workflow are also encouraged to closely examine the [*fastverse*](https://fastverse.org/fastverse/) suite of packages. *collapse* alone may not always suffice, but 99% of *tidyverse* code can be replaced with an efficient and lightweight *fastverse* solution. ```{r, echo=FALSE} options(oldopts) ``` collapse/inst/doc/collapse_object_handling.html0000644000176200001440000011162115202627535021471 0ustar liggesusers collapse’s Handling of R Objects

collapse’s Handling of R Objects

A Quick View Behind the Scenes of Class-Agnostic R Programming

Sebastian Krantz

2026-05-18

This much-requested vignette provides some details about how collapse deals with various R objects. It is principally a digest of cumulative details provided in the NEWS for various releases since v1.4.0.

Overview

collapse provides a class-agnostic architecture permitting computations on a very broad range of R objects. It provides explicit support for base R classes and data types (logical, integer, double, character, list, data.frame, matrix, factor, Date, POSIXct, ts) and their popular extensions, including integer64, data.table, tibble, grouped_df, xts/zoo, pseries, pdata.frame, units, and sf (no geometric operations).

It also introduces GRP_df as a more performant and class-agnostic grouped data frame, and indexed_series and indexed_frame classes as modern class-agnostic successors of pseries, pdata.frame. These objects inherit the classes they succeed and are handled through .pseries, .pdata.frame, and .grouped_df methods, which also support the original (plm / dplyr) implementations (details below).

All other objects are handled internally at the C or R level using general principles extended by specific considerations for some of the above classes. I start with summarizing the general principles, which enable the usage of collapse with further classes it does not explicitly support.

General Principles

In general, collapse preserves attributes and classes of R objects in statistical and data manipulation operations unless their preservation involves a high-risk of yielding something wrong/useless. Risky operations change the dimensions or internal data type (typeof()) of an R object.

To collapse’s R and C code, there exist 3 principal types of R objects: atomic vectors, matrices, and lists - which are often assumed to be data frames. Most data manipulation functions in collapse, like fmutate(), only support lists, whereas statistical functions - like the S3 generic Fast Statistical Functions like fmean() - generally support all 3 types of objects.

S3 generic functions initially dispatch to .default, .matrix, .data.frame, and (hidden) .list methods. The .list method generally dispatches to the .data.frame method. These basic methods, and other non-generic functions in collapse, then decide how exactly to handle the object based on the statistical operation performed and attribute handling principles mostly implemented in C.

The simplest case arises when an operation preserves the dimensions of the object, such as fscale(x) or fmutate(data, across(a:c, log)). In this case, all attributes of x / data are fully preserved1.

Another simple case for matrices and lists arises when a statistical operation reduces them to a single dimension such as fmean(x), where, under the drop = TRUE default of Fast Statistical Functions, all attributes apart from (column-)names are dropped and a (named) vector of means is returned.

For atomic vectors, a statistical operation like fmean(x) will preserve the attributes (except for ts objects), as the object could have useful properties such as labels or units.

More complex cases involve changing the dimensions of an object. If the number of rows is preserved e.g. fmutate(data, a_b = a / b) or flag(x, -1:1), only the (column-)names attribute of the object is modified. If the number of rows is reduced e.g. fmean(x, g), all attributes are also retained under suitable modifications of the (row-)names attribute. However, if x is a matrix, other attributes than row- or column-names are only retained if !is.object(x), that is, if the matrix does not have a ‘class’ attribute. For atomic vectors, attributes are retained if !inherits(x, "ts"), as aggregating a time series will break the class. This also applies to columns in a data frame being aggregated.

When data is transformed using statistics as provided by the TRA() function e.g. TRA(x, STATS, operation, groups) and the like-named argument to the Fast Statistical Functions, operations that simply modify the input (x) in a statistical sense ("replace_na", "-", "-+", "/", "+", "*", "%%", "-%%") just copy the attributes to the transformed object. Operations "fill" and "replace" are more tricky, since here x is replaced with STATS, which could be of a different class or data type. The following rules apply: (1) the result has the same data type as STATS; (2) if is.object(STATS), the attributes of STATS are preserved; (3) otherwise the attributes of x are preserved unless is.object(x) && typeof(x) != typeof(STATS); (4) an exemption to this rule is made if x is a factor and an integer replacement is offered to STATS e.g. fnobs(factor, group, TRA = "fill"). In that case, the attributes of x are copied except for the ‘class’ and ‘levels’ attributes. These rules were devised considering the possibility that x may have important information attached to it which should be preserved in data transformations, such as a "label" attribute.

So to summarize the general principles: collapse just tries to preserve attributes in all cases except where it is likely to break something, beholding the way most commonly used R classes and objects behave. The most likely operations that break something are when aggregating matrices which have a class (such as mts/xts) or univariate time series (ts), or when data is to be replaced by another object. In the latter case, particular attention is paid to integer vectors and factors, as we often count something generating integers, and malformed factors need to be avoided.

The following section provides some further details for some collapse functions and supported classes.

Specific Functions and Classes

Object Conversions

Quick conversion functions qDF, qDT, qTBL() and qM (to create data.frame’s, data.table’s, tibble’s and matrices from arbitrary R objects) by default (keep.attr = FALSE) perform very strict conversions, where all attributes non-essential to the class are dropped from the input object. This is to ensure that, following conversion, objects behave exactly the way users expect. This is different from the behavior of functions like as.data.frame(), as.data.table(), as_tibble() or as.matrix() e.g. as.matrix(EuStockMarkets) just returns EuStockMarkets whereas qM(EuStockMarkets) returns a plain matrix without time series attributes. This behavior can be changed by setting keep.attr = TRUE, i.e. qM(EuStockMarkets, keep.attr = TRUE).

Selecting Columns by Data Type

Functions num_vars(), cat_vars() (the opposite of num_vars()), char_vars() etc. are implemented in C to avoid the need to check data frame columns by applying an R function such as is.numeric(). For is.numeric, the C implementation is equivalent to is_numeric_C <- function(x) typeof(x) %in% c("integer", "double") && !inherits(x, c("factor", "Date", "POSIXct", "yearmon", "yearqtr")). This of course does not respect the behavior of other classes that define methods for is.numeric e.g. is.numeric.foo <- function(x) FALSE, then for y = structure(rnorm(100), class = "foo"), is.numeric(y) is FALSE but num_vars(data.frame(y)) still returns it. Correct behavior in this case requires get_vars(data.frame(y), is.numeric). A particular case to be aware of is when using collap() with the FUN and catFUN arguments, where the C code (is_numeric_C) is used internally to decide whether a column is numeric or categorical. collapse does not support statistical operations on complex data.

Parsing of Time-IDs

Time Series Functions flag, fdiff, fgrowth and psacf/pspacf/psccf (and the operators L/F/D/Dlog/G) have a t argument to pass time-ids for fully identified temporal operations on time series and panel data. If t is a plain numeric vector or a factor, it is coerced to integer using as.integer(), and the integer steps are used as time steps. This is premised on the observation that the most common form of temporal identifier is a numeric variable denoting calendar years. If on the other hand t is a numeric time object such that is.object(t) && is.numeric(unclass(t)) (e.g. Date, POSIXct, etc.), then it is passed through timeid() which computes the greatest common divisor of the vector and generates an integer time-id in that way. Users are therefore advised to use appropriate classes to represent time steps e.g. for monthly data zoo::yearmon would be appropriate. It is also possible to pass non-numeric t, such as character or list/data.frame. In such cases ordered grouping is applied to generate an integer time-id, but this should rather be avoided.

xts/zoo Time Series

xts/zoo time series are handled through .zoo methods to all relevant functions. These methods are simple and all follow this pattern: FUN.zoo <- function(x, ...) if(is.matrix(x)) FUN.matrix(x, ...) else FUN.default(x, ....). Thus the general principles apply. Time-Series function do not automatically use the index for indexed computations, partly for consistency with native methods where this is also not the case (e.g. lag.xts does not perform an indexed lag), and partly because, as outlined above, the index does not necessarily accurately reflect the time structure. Thus the user must exercise discretion to perform an indexed lag on xts/zoo. For example: flag(xts_daily, 1:3, t = index(xts_daily)) or flag(xts_monthly, 1:3, t = zoo::as.yearmon(index(xts_monthly))).

Support for sf and units

collapse internally supports sf data frames by seeking to avoid their undue destruction through removal of the ‘geometry’ column in data manipulation operations. This is simply implemented through an additional check in the C programs used to subset columns of data: if the object is an sf data frame, the ‘geometry’ column is added to the column selection. Other functions like funique() or roworder() have internal facilities to avoid sorting or grouping on the ‘geometry’ column. Again other functions like descr() and qsu() simply omit the geometry column in their statistical calculations. A short vignette describes the integration of collapse and sf in a bit more detail. In summary: collapse supports sf by seeking to appropriately deal with the ‘geometry’ column. It cannot perform geometrical operations. For example, after subsetting with fsubset(), the bounding box attribute of the geometry is unaltered and likely too large.

To preserve units objects used in the sf ecosystem, all relevant functions also have simple methods of the form FUN.units <- function(x, ...) if(is.matrix(x)) copyMostAttrib(FUN.matrix(x, ...), x) else FUN.default(x, ....). According to the general principles, the default method preserves the units class, whereas the matrix method does not if FUN aggregates the data. The use of copyMostAttrib(), which copies all attributes apart from "dim", "dimnames", and "names", ensures that the returned objects are still units.

Support for data.table

collapse provides quite thorough support for data.table. The simplest level of support is that it avoids assigning descriptive (character) row names to data.table’s e.g. fmean(mtcars, mtcars$cyl) has row-names corresponding to the groups but fmean(qDT(mtcars), mtcars$cyl) does not.

collapse further supports data.table’s reference semantics (set*, :=). To be able to add columns by reference (e.g. DT[, new := 1]), data.table’s are implemented as overallocated lists2. collapse copied some C code from data.table to do the overallocation and generate the ".internal.selfref" attribute, so that qDT() creates a valid and fully functional data.table. To enable seamless data manipulation combining collapse and data.table, all data manipulation functions in collapse call this C code at the end and return a valid (overallocated) data.table. However, because this overallocation comes at a computational cost of 2-3 microseconds, I have opted against also adding it to the .data.frame methods of statistical functions. Concretely, this means that res <- DT |> fgroup_by(id) |> fsummarise(mu_a = fmean(a)) gives a fully functional data.table i.e. res[, new := 1] works, but res2 <- DT |> fgroup_by(id) |> fmean() gives a non-overallocated data.table such that res2[, new := 1] will still work but issue a warning. In this case, res2 <- DT |> fgroup_by(id) |> fmean() |> qDT() can be used to avoid the warning. This, to me, seems a reasonable trade-off between flexibility and performance. More details and examples are provided in the collapse and data.table vignette.

Class-Agnostic Grouped and Indexed Data Frames

As indicated in the introductory remarks, collapse provides a fast class-agnostic grouped data frame created with fgroup_by(), and fast class-agnostic indexed time series and panel data, created with findex_by()/reindex(). Class-agnostic means that the object that is grouped/indexed continues to behave as before except in collapse operations utilizing the ‘groups’/‘index_df’ attributes.

The grouped data frame is implemented as follows: fgroup_by() saves the class of the input data, calls GRP() on the columns being grouped, and attaches the resulting ‘GRP’ object in a "groups" attribute. It then assigns a class attribute as follows

clx <- class(.X) # .X is the data frame being grouped, clx is its class
m <- match(c("GRP_df", "grouped_df", "data.frame"), clx, nomatch = 0L)
class(.X) <- c("GRP_df",  if(length(mp <- m[m != 0L])) clx[-mp] else clx, "grouped_df", if(m[3L]) "data.frame") 

In words: a class "GRP_df" is added in front, followed by the classes of the original object3, followed by "grouped_df" and finally "data.frame", if present. The "GRP_df" class is for dealing appropriately with the object through methods for print() and subsetting ([, [[), e.g. print.GRP_df fetches the grouping object, prints fungroup(.X)4, and then prints a summary of the grouping. [.GRP_df works similarly: it saves the groups, calls [ on fungroup(.X), and attaches the groups again if the result is a list with the same number of rows. So collapse has no issues printing and handling grouped data.table’s, tibbles, sf data frames, etc. - they continue to behave as usual. Now collapse has various functions with a .grouped_df method to deal with grouped data frames. For example fmean.grouped_df, in a nutshell, fetches the attached ‘GRP’ object using GRP.grouped_df, and calls fmean.data.frame on fungroup(data), passing the ‘GRP’ object to the g argument for grouped computation. Here the general principles outlined above apply so that the resulting object has the same attributes as the input.

This architecture has an additional advantage: it allows GRP.grouped_df to examine the grouping object and check if it was created by collapse (class ‘GRP’) or by dplyr. If the latter is the case, an efficient C routine is called to convert the dplyr grouping object to a ‘GRP’ object so that all .grouped_df methods in collapse apply to data frames created with either dplyr::group_by() or fgroup_by().

The indexed_frame works similarly. It inherits from pdata.frame so that .pdata.frame methods in collapse deal with both indexed_frame’s of arbitrary classes and pdata.frame’s created with plm.

A notable difference to both grouped_df and pdata.frame is that indexed_frame is a deeply indexed data structure: each variable inside an indexed_frame is an indexed_series which contains in its index_df attribute an external pointer to the index_df attribute of the frame. Functions with pseries methods operating on indexed_series stored inside the frame (such as with(data, flag(column))) can fetch the index from this pointer. This allows worry-free application inside arbitrary data masking environments (with, %$%, attach, etc..) and estimation commands (glm, feols, lmrob etc..) without duplication of the index in memory. As you may have guessed, indexed_series are also class-agnostic and inherit from pseries. Any vector or matrix of any class can become an indexed_series.

Further levels of generality are that indexed series and frames allow one, two or more variables in the index to support both time series and complex panels, natively deal with irregularity in time5, and provide a rich set of methods for subsetting and manipulation which also subset the index_df attribute, including internal methods for fsubset(), funique(), roworder(v) and na_omit(). So indexed_frame and indexed_series is a rich and general structure permitting fully time-aware computations on nearly any R object. See ?indexing for more information.

Conclusion

collapse handles R objects in a preserving and fairly intelligent manner, allowing seamless compatibility with many common data classes in R, and statistical workflows that preserve attributes (labels, units, etc.) of the data. This is implemented through general principles and some specific considerations/exemptions mostly implemented in C - as detailed in this vignette.

The main benefits of this design are generality and execution speed: collapse has much fewer R-level method dispatches and function calls than other frameworks used to perform statistical or data manipulation operations, it behaves predictably, and may also work well with your simple new class.

The main disadvantage is that the general principles and exemptions are hard-coded in C and thus may not work with specific classes. A prominent example where collapse simply fails is lubridate’s interval class (#186, #418), which has a "starts" attribute of the same length as the data that is preserved but not subset in collapse operations.


  1. Preservation implies a shallow copy of the attribute lists from the original object to the result object. A shallow copy is memory-efficient and means we are copying the list containing the attributes in memory, but not the attributes themselves. Whenever I talk about copying attributes, I mean a shallow copy, not a deep copy. You can perform shallow copies with helper functions copyAttrib() or copyMostAttrib(), and directly set attribute lists using setAttrib() or setattrib().↩︎

  2. Notably, additional (hidden) column pointers are allocated to be able to add columns without taking a shallow copy of the data.table, and an ".internal.selfref" attribute containing an external pointer is used to check if any shallow copy was made using base R commands like <-.↩︎

  3. Removing c("GRP_df", "grouped_df", "data.frame") if present to avoid duplicate classes and allowing grouped data to be re-grouped.↩︎

  4. Which reverses the changes of fgroup_by() so that the print method for the original object .X is called.↩︎

  5. This is done through the creation of a time-factor in the index_df attribute whose levels represent time steps, i.e., the factor will have unused levels for gaps in time.↩︎

collapse/inst/doc/collapse_and_sf.Rmd0000644000176200001440000010210315121640575017361 0ustar liggesusers--- title: "collapse and sf" subtitle: "Fast Manipulation of Simple Features Data Frames" author: "Sebastian Krantz and Grant McDermott" date: "2024-04-19" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{collapse and sf} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This short vignette focuses on using *collapse* with the popular *sf* package by Edzer Pebesma. It shows that *collapse* supports easy manipulation of *sf* data frames, at computation speeds far above *dplyr*. *collapse* v1.6.0 added internal support for *sf* data frames by having most essential functions (e.g., `fselect/gv`, `fsubset/ss`, `fgroup_by`, `findex_by`, `qsu`, `descr`, `varying`, `funique`, `roworder`, `rsplit`, `fcompute`, ...) internally handle the geometry column. To demonstrate this, we can load a test dataset provided by *sf*: ```r library(collapse) library(sf) nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) options(sf_max_print = 3) nc # Simple feature collection with 100 features and 14 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry # 1 19 MULTIPOLYGON (((-81.47276 3... # 2 12 MULTIPOLYGON (((-81.23989 3... # 3 260 MULTIPOLYGON (((-80.45634 3... ``` ## Summarising sf Data Frames Computing summary statistics on *sf* data frames automatically excludes the 'geometry' column: ```r # Which columns have at least 2 non-missing distinct values varying(nc) # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 # TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE # NWBIR74 BIR79 SID79 NWBIR79 # TRUE TRUE TRUE TRUE # Quick summary stats qsu(nc) # N Mean SD Min Max # AREA 100 0.1263 0.0492 0.042 0.241 # PERIMETER 100 1.673 0.4823 0.999 3.64 # CNTY_ 100 1985.96 106.5166 1825 2241 # CNTY_ID 100 1985.96 106.5166 1825 2241 # NAME 100 - - - - # FIPS 100 - - - - # FIPSNO 100 37100 58.023 37001 37199 # CRESS_ID 100 50.5 29.0115 1 100 # BIR74 100 3299.62 3848.1651 248 21588 # SID74 100 6.67 7.7812 0 44 # NWBIR74 100 1050.81 1432.9117 1 8027 # BIR79 100 4223.92 5179.4582 319 30757 # SID79 100 8.36 9.4319 0 57 # NWBIR79 100 1352.81 1975.9988 3 11631 # Detailed statistics description of each column descr(nc) # Dataset: nc, 14 Variables, N = 100 # ---------------------------------------------------------------------------------------------------- # AREA (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 77 0.13 0.05 0.04 0.24 0.48 2.5 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 0.04 0.06 0.06 0.09 0.12 0.15 0.2 0.21 0.24 # ---------------------------------------------------------------------------------------------------- # PERIMETER (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 96 1.67 0.48 1 3.64 1.48 5.95 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1 1.09 1.19 1.32 1.61 1.86 2.2 2.72 3.2 # ---------------------------------------------------------------------------------------------------- # CNTY_ (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 1985.96 106.52 1825 2241 0.26 2.32 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1826.98 1832.95 1837.9 1902.25 1982 2067.25 2110 2156.3 2238.03 # ---------------------------------------------------------------------------------------------------- # CNTY_ID (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 1985.96 106.52 1825 2241 0.26 2.32 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1826.98 1832.95 1837.9 1902.25 1982 2067.25 2110 2156.3 2238.03 # ---------------------------------------------------------------------------------------------------- # NAME (character): # Statistics # N Ndist # 100 100 # Table # Freq Perc # Ashe 1 1 # Alleghany 1 1 # Surry 1 1 # Currituck 1 1 # Northampton 1 1 # Hertford 1 1 # Camden 1 1 # Gates 1 1 # Warren 1 1 # Stokes 1 1 # Caswell 1 1 # Rockingham 1 1 # Granville 1 1 # Person 1 1 # ... 86 Others 86 86 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1 1 1 1 1 1 # ---------------------------------------------------------------------------------------------------- # FIPS (character): # Statistics # N Ndist # 100 100 # Table # Freq Perc # 37009 1 1 # 37005 1 1 # 37171 1 1 # 37053 1 1 # 37131 1 1 # 37091 1 1 # 37029 1 1 # 37073 1 1 # 37185 1 1 # 37169 1 1 # 37033 1 1 # 37157 1 1 # 37077 1 1 # 37145 1 1 # ... 86 Others 86 86 # # Summary of Table Frequencies # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1 1 1 1 1 1 # ---------------------------------------------------------------------------------------------------- # FIPSNO (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 37100 58.02 37001 37199 -0 1.8 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 37002.98 37010.9 37020.8 37050.5 37100 37149.5 37179.2 37189.1 37197.02 # ---------------------------------------------------------------------------------------------------- # CRESS_ID (integer): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 50.5 29.01 1 100 0 1.8 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1.99 5.95 10.9 25.75 50.5 75.25 90.1 95.05 99.01 # ---------------------------------------------------------------------------------------------------- # BIR74 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 3299.62 3848.17 248 21588 2.79 11.79 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 283.64 419.75 531.8 1077 2180.5 3936 6725.7 11193 20378.22 # ---------------------------------------------------------------------------------------------------- # SID74 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 23 6.67 7.78 0 44 2.44 10.28 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 0 0 0 2 4 8.25 15.1 18.25 38.06 # ---------------------------------------------------------------------------------------------------- # NWBIR74 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 93 1050.81 1432.91 1 8027 2.83 11.84 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 1 9.95 39.2 190 697.5 1168.5 2231.8 3942.9 7052.84 # ---------------------------------------------------------------------------------------------------- # BIR79 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 100 4223.92 5179.46 319 30757 2.99 13.1 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 349.69 539.3 675.7 1336.25 2636 4889 8313 14707.45 26413.87 # ---------------------------------------------------------------------------------------------------- # SID79 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 28 8.36 9.43 0 57 2.28 9.88 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 0 0 1 2 5 10.25 21 26 38.19 # ---------------------------------------------------------------------------------------------------- # NWBIR79 (numeric): # Statistics # N Ndist Mean SD Min Max Skew Kurt # 100 98 1352.81 1976 3 11631 3.18 14.45 # Quantiles # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 3.99 11.9 44.7 250.5 874.5 1406.75 2987.9 5090.5 10624.17 # ---------------------------------------------------------------------------------------------------- ``` ## Selecting Columns and Subsetting We can select columns from the *sf* data frame without having to worry about taking along 'geometry': ```r # Selecting a sequence of columns fselect(nc, AREA, NAME:FIPSNO) # Simple feature collection with 100 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.114 Ashe 37009 37009 MULTIPOLYGON (((-81.47276 3... # 2 0.061 Alleghany 37005 37005 MULTIPOLYGON (((-81.23989 3... # 3 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... # Same using standard evaluation (gv is a shorthand for get_vars()) gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO")) # Simple feature collection with 100 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.114 Ashe 37009 37009 MULTIPOLYGON (((-81.47276 3... # 2 0.061 Alleghany 37005 37005 MULTIPOLYGON (((-81.23989 3... # 3 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... ``` The same applies to subsetting rows (and columns): ```r # A fast and enhanced version of base::subset fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO) # Simple feature collection with 44 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... # 2 0.153 Northampton 37131 37131 MULTIPOLYGON (((-77.21767 3... # 3 0.153 Rockingham 37157 37157 MULTIPOLYGON (((-79.53051 3... # A fast version of `[` (where i is used and optionally j) ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO")) # Simple feature collection with 10 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA NAME FIPS FIPSNO geometry # 1 0.114 Ashe 37009 37009 MULTIPOLYGON (((-81.47276 3... # 2 0.061 Alleghany 37005 37005 MULTIPOLYGON (((-81.23989 3... # 3 0.143 Surry 37171 37171 MULTIPOLYGON (((-80.45634 3... ``` This is significantly faster than using `[`, `base::subset()`, `dplyr::select()` or `dplyr::filter()`: ```r library(microbenchmark) library(dplyr) # Selecting columns microbenchmark(collapse = fselect(nc, AREA, NAME:FIPSNO), dplyr = select(nc, AREA, NAME:FIPSNO), collapse2 = gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO")), sf = nc[c("AREA", "NAME", "FIPS", "FIPSNO")]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 3.034 3.9565 5.19429 5.1865 5.6990 22.878 100 # dplyr 431.279 452.2915 505.29015 466.3750 493.8450 3356.342 100 # collapse2 2.665 3.4850 4.59610 4.4075 5.0635 14.391 100 # sf 105.165 114.1235 120.39732 118.0390 124.9270 156.497 100 # Subsetting microbenchmark(collapse = fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO), dplyr = select(nc, AREA, NAME:FIPSNO) |> filter(AREA > fmean(AREA)), collapse2 = ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO")), sf = nc[1:10, c("AREA", "NAME", "FIPS", "FIPSNO")]) # Unit: microseconds # expr min lq mean median uq max neval # collapse 9.676 11.5825 15.01707 14.4730 16.8920 30.463 100 # dplyr 890.643 917.6415 1055.40970 941.7085 1009.7890 5546.685 100 # collapse2 2.829 3.5465 5.40585 4.8995 6.4165 20.541 100 # sf 176.997 187.6160 202.72286 200.7565 210.8220 340.464 100 ``` However, *collapse* functions don't subset the 'agr' attribute on selecting columns, which (if specified) relates columns (attributes) to the geometry, and also don't modify the 'bbox' attribute giving the overall boundaries of a set of geometries when subsetting the *sf* data frame. Keeping the full 'agr' attribute is not problematic for all practical purposes, but not changing 'bbox' upon subsetting may lead to too large margins when plotting the geometries of a subset *sf* data frame. One way to to change this is calling `st_make_valid()` on the subset frame; but `st_make_valid()` is very expensive, thus unless the subset frame is very small, it is better to use `[`, `base::subset()` or `dplyr::filter()` in cases where the bounding box size matters. ## Aggregation and Grouping The flexibility and speed of `collap()` for aggregation can be used on *sf* data frames. A separate method for *sf* objects was not considered necessary as one can simply aggregate the geometry column using `st_union()`: ```r # Aggregating by variable SID74 using the median for numeric and the mode for categorical columns collap(nc, ~ SID74, custom = list(fmedian = is.numeric, fmode = is.character, st_union = "geometry")) # or use is.list to fetch the geometry # Simple feature collection with 23 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 SID74 NWBIR74 BIR79 # 1 0.0780 1.3070 1950.0 1950.0 Alleghany 37005 37073 37.0 487 0 0 40.0 594.0 # 2 0.0810 1.2880 1887.0 1887.0 Ashe 37009 37137 69.0 751 1 1 148.0 899.0 # 3 0.1225 1.6435 1959.5 1959.5 Caswell 37033 37078 39.5 1271 2 2 382.5 1676.5 # SID79 NWBIR79 geometry # 1 1 45 MULTIPOLYGON (((-83.69563 3... # 2 1 176 MULTIPOLYGON (((-80.02406 3... # 3 2 452 MULTIPOLYGON (((-77.16129 3... ``` *sf* data frames can also be grouped and then aggregated using `fsummarise()`: ```r nc |> fgroup_by(SID74) # Simple feature collection with 100 features and 14 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry # 1 19 MULTIPOLYGON (((-81.47276 3... # 2 12 MULTIPOLYGON (((-81.23989 3... # 3 260 MULTIPOLYGON (((-80.45634 3... # # Grouped by: SID74 [23 | 4 (4) 1-13] nc |> fgroup_by(SID74) |> fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = st_union(geometry)) # Simple feature collection with 23 features and 3 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # SID74 AREA_Ag Perimeter_Ag geometry # 1 0 1.103 1.3070 MULTIPOLYGON (((-83.69563 3... # 2 1 0.914 1.2880 MULTIPOLYGON (((-80.02406 3... # 3 2 1.047 1.6435 MULTIPOLYGON (((-77.16129 3... ``` Typically most of the time in aggregation is consumed by `st_union()` so that the speed of *collapse* does not really become visible on most datasets. A faster alternative is to use *geos* (*sf* backend for planar geometries) or *s2* (*sf* backend for spherical geometries) directly: ```r # Using s2 backend: sensible for larger tasks nc |> fmutate(geometry = s2::as_s2_geography(geometry)) |> fgroup_by(SID74) |> fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = s2::s2_union_agg(geometry)) |> fmutate(geometry = st_as_sfc(geometry)) # Simple feature collection with 23 features and 3 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: WGS 84 # First 3 features: # SID74 AREA_Ag Perimeter_Ag geometry # 1 0 1.103 1.3070 MULTIPOLYGON (((-83.69563 3... # 2 1 0.914 1.2880 MULTIPOLYGON (((-80.02406 3... # 3 2 1.047 1.6435 MULTIPOLYGON (((-77.16129 3... ``` In general, also upon aggregation with *collapse*, functions `st_as_sfc()`, `st_as_sf()`, or, in the worst case, `st_make_valid()`, may need to be invoked to ensure valid *sf* object output. Functions `collap()` and `fsummarise()` are attribute preserving but do not give special regard to geometry columns. One exception that both avoids the high cost of spatial functions in aggregation and any need for ex-post conversion/validation is aggregating spatial panel data over the time-dimension. Such panels can quickly be aggregated using `ffirst()` or `flast()` to aggregate the geometry: ```r # Creating a panel-dataset by simply duplicating nc for 2 different years pnc <- rowbind(`2000` = nc, `2001` = nc, idcol = "Year") |> as_integer_factor() pnc # Simple feature collection with 200 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # Year AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 # 1 2000 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 # 2 2000 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 # 3 2000 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 # SID79 NWBIR79 geometry # 1 0 19 MULTIPOLYGON (((-81.47276 3... # 2 3 12 MULTIPOLYGON (((-81.23989 3... # 3 6 260 MULTIPOLYGON (((-80.45634 3... # Aggregating by NAME, using the last value for all categorical data collap(pnc, ~ NAME, fmedian, catFUN = flast, cols = -1L) # Simple feature collection with 100 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 # 1 0.111 1.392 1904 1904 Alamance Alamance 37001 37001 1 4672 13 1243 5767 # 2 0.066 1.070 1950 1950 Alexander Alexander 37003 37003 2 1333 0 128 1683 # 3 0.061 1.231 1827 1827 Alleghany Alleghany 37005 37005 3 487 0 10 542 # SID79 NWBIR79 geometry # 1 11 1397 MULTIPOLYGON (((-79.24619 3... # 2 2 150 MULTIPOLYGON (((-81.10889 3... # 3 3 12 MULTIPOLYGON (((-81.23989 3... # Using fsummarise to aggregate just two variables and the geometry pnc_ag <- pnc |> fgroup_by(NAME) |> fsummarise(AREA_Ag = fsum(AREA), Perimeter_Ag = fmedian(PERIMETER), geometry = flast(geometry)) # The geometry is still valid... (slt = shorthand for fselect) plot(slt(pnc_ag, AREA_Ag)) ```
plot of chunk AREA_Ag
## Indexing *sf* data frames can also become [*indexed frames*](https://fastverse.org/collapse/reference/indexing.html) (spatio-temporal panels): ```r pnc <- pnc |> findex_by(CNTY_ID, Year) pnc # Simple feature collection with 200 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # Year AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 # 1 2000 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 # 2 2000 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 # 3 2000 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 # SID79 NWBIR79 geometry # 1 0 19 MULTIPOLYGON (((-81.47276 3... # 2 3 12 MULTIPOLYGON (((-81.23989 3... # 3 6 260 MULTIPOLYGON (((-80.45634 3... # # Indexed by: CNTY_ID [100] | Year [2] qsu(pnc$AREA) # N/T Mean SD Min Max # Overall 200 0.1263 0.0491 0.042 0.241 # Between 100 0.1263 0.0492 0.042 0.241 # Within 2 0.1263 0 0.1263 0.1263 settransform(pnc, AREA_diff = fdiff(AREA)) psmat(pnc$AREA_diff) |> head() # 2000 2001 # 1825 NA 0 # 1827 NA 0 # 1828 NA 0 # 1831 NA 0 # 1832 NA 0 # 1833 NA 0 pnc <- unindex(pnc) ``` ## Unique Values, Ordering, Splitting, Binding Functions `funique()` and `roworder[v]()` ignore the 'geometry' column in determining the unique values / order of rows when applied to *sf* data frames. `rsplit()` can be used to (recursively) split an *sf* data frame into multiple chunks. ```r # Splitting by SID74 rsplit(nc, ~ SID74) |> head(2) # $`0` # Simple feature collection with 13 features and 13 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 NWBIR74 BIR79 SID79 NWBIR79 # 1 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 10 542 3 12 # 2 0.062 1.547 1834 1834 Camden 37029 37029 15 286 115 350 2 139 # 3 0.091 1.284 1835 1835 Gates 37073 37073 37 420 254 594 2 371 # geometry # 1 MULTIPOLYGON (((-81.23989 3... # 2 MULTIPOLYGON (((-76.00897 3... # 3 MULTIPOLYGON (((-76.56251 3... # # $`1` # Simple feature collection with 11 features and 13 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 NWBIR74 BIR79 SID79 NWBIR79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 10 1364 0 19 # 2 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 123 830 2 145 # 3 0.124 1.428 1837 1837 Stokes 37169 37169 85 1612 160 2038 5 176 # geometry # 1 MULTIPOLYGON (((-81.47276 3... # 2 MULTIPOLYGON (((-76.00897 3... # 3 MULTIPOLYGON (((-80.02567 3... ``` The default in `rsplit()` for data frames is `simplify = TRUE`, which, for a single LHS variable, would just split the column-vector. This does not apply to *sf* data frames as the 'geometry' column is always selected as well. ```r # Only splitting Area rsplit(nc, AREA ~ SID74) |> head(1) # $`0` # Simple feature collection with 13 features and 1 field # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA geometry # 1 0.061 MULTIPOLYGON (((-81.23989 3... # 2 0.062 MULTIPOLYGON (((-76.00897 3... # 3 0.091 MULTIPOLYGON (((-76.56251 3... # For data frames the default simplify = TRUE drops the data frame structure rsplit(qDF(nc), AREA ~ SID74) |> head(1) # $`0` # [1] 0.061 0.062 0.091 0.064 0.059 0.080 0.066 0.099 0.094 0.078 0.131 0.167 0.051 ``` *sf* data frames can be combined using `rowbind()`, which, by default, preserves the attributes of the first object. ```r # Splitting by each row and recombining nc_combined <- nc %>% rsplit(seq_row(.)) %>% rowbind() identical(nc, nc_combined) # [1] TRUE ``` ## Transformations For transforming and computing columns, `fmutate()` and `ftransform[v]()` apply as to any other data frame. ```r fmutate(nc, gsum_AREA = fsum(AREA, SID74, TRA = "fill")) |> head() # Simple feature collection with 6 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry gsum_AREA # 1 19 MULTIPOLYGON (((-81.47276 3... 0.914 # 2 12 MULTIPOLYGON (((-81.23989 3... 1.103 # 3 260 MULTIPOLYGON (((-80.45634 3... 1.380 # Same thing, more expensive nc |> fgroup_by(SID74) |> fmutate(gsum_AREA = fsum(AREA)) |> fungroup() |> head() # Simple feature collection with 6 features and 15 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # NWBIR79 geometry gsum_AREA # 1 19 MULTIPOLYGON (((-81.47276 3... 0.914 # 2 12 MULTIPOLYGON (((-81.23989 3... 1.103 # 3 260 MULTIPOLYGON (((-80.45634 3... 1.380 ``` Special attention to *sf* data frames is afforded by `fcompute()`, which can be used to compute new columns dropping existing ones - except for the geometry column and any columns selected through the `keep` argument. ```r fcompute(nc, scaled_AREA = fscale(AREA), gsum_AREA = fsum(AREA, SID74, TRA = "fill"), keep = .c(AREA, SID74)) # Simple feature collection with 100 features and 4 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # AREA SID74 scaled_AREA gsum_AREA geometry # 1 0.114 1 -0.2491860 0.914 MULTIPOLYGON (((-81.47276 3... # 2 0.061 0 -1.3264176 1.103 MULTIPOLYGON (((-81.23989 3... # 3 0.143 5 0.3402426 1.380 MULTIPOLYGON (((-80.45634 3... ``` ## Conversion to and from *sf* The quick converters `qDF()`, `qDT()`, and `qTBL()` can be used to efficiently convert *sf* data frames to standard data frames, *data.table*'s or *tibbles*, and the result can be converted back to the original *sf* data frame using `setAttrib()`, `copyAttrib()` or `copyMostAttrib()`. ```r library(data.table) # Create a data.table on the fly to do an fast grouped rolling mean and back to sf qDT(nc)[, list(roll_AREA = frollmean(AREA, 2), geometry), by = SID74] |> copyMostAttrib(nc) # Simple feature collection with 100 features and 2 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # SID74 roll_AREA geometry # 1 1 NA MULTIPOLYGON (((-81.47276 3... # 2 1 0.092 MULTIPOLYGON (((-76.00897 3... # 3 1 0.097 MULTIPOLYGON (((-80.02567 3... ``` The easiest way to strip a geometry column off an *sf* data frame is via the function `atomic_elem()`, which removes list-like columns and, by default, also the class attribute. For example, we can create a *data.table* without list column using ```r qDT(atomic_elem(nc)) |> head() # AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74 BIR79 SID79 # # 1: 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10 1364 0 # 2: 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0 10 542 3 # 3: 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5 208 3616 6 # 4: 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1 123 830 2 # 5: 0.153 2.206 1832 1832 Northampton 37131 37131 66 1421 9 1066 1606 3 # 6: 0.097 1.670 1833 1833 Hertford 37091 37091 46 1452 7 954 1838 5 # NWBIR79 # # 1: 19 # 2: 12 # 3: 260 # 4: 145 # 5: 1197 # 6: 1237 ``` This is also handy for other functions such as `join()` and `pivot()`, which are class agnostic like all of *collapse*, but do not have any built-in logic to deal with the *sf* column. ```r # Use atomic_elem() to strip geometry off y in left join identical(nc, join(nc, atomic_elem(nc), overid = 2)) # left join: nc[AREA, PERIMETER, CNTY_, CNTY_ID, NAME, FIPS, FIPSNO, CRESS_ID, BIR74, SID74, NWBIR74, BIR79, SID79, NWBIR79] 100/100 (100%) y[AREA, PERIMETER, CNTY_, CNTY_ID, NAME, FIPS, FIPSNO, CRESS_ID, BIR74, SID74, NWBIR74, BIR79, SID79, NWBIR79] 100/100 (100%) # [1] TRUE # In pivot: presently need to specify what to do with geometry column pivot(nc, c("CNTY_ID", "geometry")) |> head() # Simple feature collection with 6 features and 3 fields # Geometry type: MULTIPOLYGON # Dimension: XY # Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965 # Geodetic CRS: NAD27 # First 3 features: # CNTY_ID geometry variable value # 1 1825 MULTIPOLYGON (((-81.47276 3... AREA 0.114 # 2 1827 MULTIPOLYGON (((-81.23989 3... AREA 0.061 # 3 1828 MULTIPOLYGON (((-80.45634 3... AREA 0.143 # Or use pivot(qDT(atomic_elem(nc)), "CNTY_ID") |> head() # CNTY_ID variable value # # 1: 1825 AREA 0.114 # 2: 1827 AREA 0.061 # 3: 1828 AREA 0.143 # 4: 1831 AREA 0.07 # 5: 1832 AREA 0.153 # 6: 1833 AREA 0.097 ``` ## Support for *units* Since v2.0.13, *collapse* explicitly supports/preserves *units* objects through dedicated methods that preserve the 'units' class wherever sensible. ```r nc_dist <- st_centroid(nc) |> st_distance() nc_dist[1:3, 1:3] # Units: [m] # [,1] [,2] [,3] # [1,] 0.00 34020.35 72728.02 # [2,] 34020.35 0.00 40259.55 # [3,] 72728.02 40259.55 0.00 fmean(nc_dist) |> head() # Units: [m] # [1] 250543.9 237040.0 217941.5 337016.5 250380.2 269604.6 fndistinct(nc_dist) |> head() # [1] 100 100 100 100 100 100 ``` ## Conclusion *collapse* provides no deep integration with the *sf* ecosystem and cannot perform spatial operations, but offers sufficient features and flexibility to painlessly manipulate *sf* data frames at much greater speeds than *dplyr*. This requires a bit of care by the user though to ensure that the returned *sf* objects are valid, especially following aggregation and subsetting. collapse/build/0000755000176200001440000000000015202627535013162 5ustar liggesuserscollapse/build/vignette.rds0000644000176200001440000000070715202627535015525 0ustar liggesusers‹­S]OÂ0-c ‰àGMML|ãW¢ñAC4ñm['3ݺl„7ÿµoâet¥­Ž'úuÏéííɹïm„…l»Ž¬:lë]˜š0:0ZȆÐÀc”ºIF7ößå3JFÓÈ” ’ÐUª = Mh¤`] ËêKÈg^‘˜» note that here we get an additional match based on the unique ids, # which we didn't get before because "Jane" != "Janne" } \keyword{manip} collapse/man/rapply2d.Rd0000644000176200001440000000344014777170130014663 0ustar liggesusers\name{rapply2d} \alias{rapply2d} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Recursively Apply a Function to a List of Data Objects } \description{ \code{rapply2d} is a recursive version of \code{lapply} with three differences to \code{\link{rapply}}: \enumerate{ \item data frames (or other list-based objects specified in \code{classes}) are considered as atomic, not as (sub-)lists \item \code{FUN} is applied to all 'atomic' objects in the nested list \item the result is not simplified / unlisted. } } \usage{ rapply2d(l, FUN, \dots, classes = "data.frame") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list.} \item{FUN}{a function that can be applied to all 'atomic' elements in l.} \item{\dots}{additional elements passed to FUN.} \item{classes}{character. Classes of list-based objects inside \code{l} that should be considered as atomic. } } \value{ A list of the same structure as \code{l}, where \code{FUN} was applied to all atomic elements and list-based objects of a class included in \code{classes}. } \note{ The main reason \code{rapply2d} exists is to have a recursive function that out-of-the-box applies a function to a nested list of data frames. For most other purposes \code{\link{rapply}}, or by extension the excellent \href{https://cran.r-project.org/package=rrapply}{rrapply} function / package, provide more advanced functionality and greater performance. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{rsplit}}, \code{\link{unlist2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ l <- list(mtcars, list(mtcars, as.matrix(mtcars))) rapply2d(l, fmean) unlist2d(rapply2d(l, fmean)) } \keyword{manip} \keyword{list} collapse/man/qF.Rd0000644000176200001440000002424614777170130013503 0ustar liggesusers\name{qF-qG-finteraction} \alias{qF} \alias{qG} \alias{is_qG} \alias{as_factor_qG} \alias{finteraction} \alias{itn} \title{ Fast Factor Generation, Interactions and Vector Grouping } \description{ \code{qF}, shorthand for 'quick-factor' implements very fast factor generation from atomic vectors using either radix ordering or index hashing followed by sorting. \code{qG}, shorthand for 'quick-group', generates a kind of factor-light without the levels attribute but instead an attribute providing the number of levels. Optionally the levels / groups can be attached, but without converting them to character (which can have large performance implications). Objects have a class 'qG'. \code{finteraction} generates a factor or 'qG' object by interacting multiple vectors or factors. In that process missing values are always replaced with a level and unused levels/combinations are always dropped. \emph{collapse} internally makes optimal use of factors and 'qG' objects when passed as grouping vectors to statistical functions (\code{g/by}, or \code{t} arguments) i.e. typically no further grouping or ordering is performed and objects are used directly by statistical C/C++ code. } \usage{ qF(x, ordered = FALSE, na.exclude = TRUE, sort = .op[["sort"]], drop = FALSE, keep.attr = TRUE, method = "auto") qG(x, ordered = FALSE, na.exclude = TRUE, sort = .op[["sort"]], return.groups = FALSE, method = "auto") is_qG(x) as_factor_qG(x, ordered = FALSE, na.exclude = TRUE) finteraction(\dots, factor = TRUE, ordered = FALSE, sort = factor && .op[["sort"]], method = "auto", sep = ".") itn(\dots) # Shorthand for finteraction } \arguments{ \item{x}{a atomic vector, factor or quick-group.} \item{ordered}{logical. Adds a class 'ordered'.} \item{na.exclude}{logical. \code{TRUE} preserves missing values (i.e. no level is generated for \code{NA}). \code{FALSE} attaches an additional class \code{"na.included"} which is used to skip missing value checks performed before sending objects to C/C++. See Details. } \item{sort}{logical. \code{TRUE} sorts the levels in ascending order (like \code{\link{factor}}); \code{FALSE} provides the levels in order of first appearance, which can be significantly faster. Note that if a factor is passed as input, only \code{sort = FALSE} takes effect and unused levels will be dropped (as factors usually have sorted levels and checking sortedness can be expensive).} \item{drop}{logical. If \code{x} is a factor, \code{TRUE} efficiently drops unused factor levels beforehand using \code{\link{fdroplevels}}.} \item{keep.attr}{logical. If \code{TRUE} and \code{x} has additional attributes apart from 'levels' and 'class', these are preserved in the conversion to factor.} \item{method}{an integer or character string specifying the method of computation: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "auto" \tab\tab automatic selection: \code{if(is.double(x) && sort) "radix" else if(sort && length(x) < 1e5) "rcpp_hash" else "hash"}. \cr 2 \tab\tab "radix" \tab\tab use radix ordering to generate factors. Supports \code{sort = FALSE} only for character vectors. See Details. \cr 3 \tab\tab "hash" \tab\tab use hashing to generate factors. Since v1.8.3 this is a fast hybrid implementation using \code{\link{group}} and radix ordering applied to the unique elements. See Details. \cr 4 \tab\tab "rcpp_hash" \tab\tab the previous "hash" algorithm prior to v1.8.3: uses \code{Rcpp::sugar::sort_unique} and \code{Rcpp::sugar::match}. Only supports \code{sort = TRUE}. \cr } Note that for \code{finteraction}, \code{method = "hash"} is always unsorted and \code{method = "rcpp_hash"} is not available. } \item{return.groups}{logical. \code{TRUE} returns the unique elements / groups / levels of \code{x} in an attribute called \code{"groups"}. Unlike \code{qF}, they are not converted to character.} \item{factor}{logical. \code{TRUE} returns an factor, \code{FALSE} returns a 'qG' object. } \item{sep}{character. The separator passed to \code{\link{paste}} when creating factor levels from multiple grouping variables.} \item{\dots}{multiple atomic vectors or factors, or a single list of equal-length vectors or factors. See Details. } } \details{ Whenever a vector is passed to a \link[=fast-statistical-functions]{Fast Statistical Function} such as \code{fmean(mtcars, mtcars$cyl)}, is is grouped using \code{qF}, or \code{qG} if \code{use.g.names = FALSE}. \code{qF} is a combination of \code{as.factor} and \code{factor}. Applying it to a vector i.e. \code{qF(x)} gives the same result as \code{as.factor(x)}. \code{qF(x, ordered = TRUE)} generates an ordered factor (same as \code{factor(x, ordered = TRUE)}), and \code{qF(x, na.exclude = FALSE)} generates a level for missing values (same as \code{factor(x, exclude = NULL)}). An important addition is that \code{qF(x, na.exclude = FALSE)} also adds a class 'na.included'. This prevents \emph{collapse} functions from checking missing values in the factor, and is thus computationally more efficient. Therefore factors used in grouped operations should preferably be generated using \code{qF(x, na.exclude = FALSE)}. Setting \code{sort = FALSE} gathers the levels in first-appearance order (unless \code{method = "radix"} and \code{x} is numeric, in which case the levels are always sorted). This often gives a noticeable speed improvement. % for non-numeric \code{x}. There are 3 internal methods of computation: radix ordering, hashing, and Rcpp sugar hashing. Radix ordering is done by combining the functions \code{\link{radixorder}} and \code{\link{groupid}}. It is generally faster than hashing for large numeric data and pre-sorted data (although there are exceptions). Hashing uses \code{\link{group}}, followed by \code{\link{radixorder}} on the unique elements if \code{sort = TRUE}. It is generally fastest for character data. Rcpp hashing uses \code{Rcpp::sugar::sort_unique} and \code{Rcpp::sugar::match}. This is often less efficient than the former on large data, but the sorting properties (relying on \code{std::sort}) may be superior in borderline cases where \code{\link{radixorder}} fails to deliver exact lexicographic ordering of factor levels. % If \code{sort = FALSE}, \code{\link{group}} is used which is generally very fast. % The hashing methods have very fast For logical data, a super fast one-pass method was written which is subsumed in the hash method. Regarding speed: In general \code{qF} is around 5x faster than \code{as.factor} on character data and about 30x faster on numeric data. Automatic method dispatch typically does a good job delivering optimal performance. \code{qG} is in the first place a programmers function. It generates a factor-'light' class 'qG' consisting of only an integer grouping vector and an attribute providing the number of groups. It is slightly faster and more memory efficient than \code{\link{GRP}} for grouping atomic vectors, and also convenient as it can be stored in a data frame column, which are the main reasons for its existence. %The fact that it (optionally) returns the unique groups / levels without converting them to character is an added bonus (this also provides a small performance gain compared to \code{qF}). Since v1.7, you can also call a C-level function \code{\link{group}} directly, which works for multivariate data as well, but does not sort the data and does not preserve missing values. \code{finteraction} is simply a wrapper around \code{as_factor_GRP(GRP.default(X))}, where X is replaced by the arguments in '\dots' combined in a list (so its not really an interaction function but just a multivariate grouping converted to factor, see \code{\link{GRP}} for computational details). In general: All vectors, factors, or lists of vectors / factors passed can be interacted. Interactions always create a level for missing values and always drop unused levels. } \value{ \code{qF} returns an (ordered) factor. \code{qG} returns an object of class 'qG': an integer grouping vector with an attribute \code{"N.groups"} indicating the number of groups, and, if \code{return.groups = TRUE}, an attribute \code{"groups"} containing the vector of unique groups / elements in \code{x} corresponding to the integer-id. \code{finteraction} can return either. } \note{ An efficient alternative for character vectors with multithreading support is provided by \code{kit::charToFact}. \code{qG(x, sort = FALSE, na.exclude = FALSE, method = "hash")} internally calls \code{\link[=group]{group(x)}} which can also be used directly and also supports multivariate groupings. Neither \code{qF} nor \code{qG} reorder groups / factor levels. An exception was added in v1.7, when calling \code{qF(f, sort = FALSE)} on a factor \code{f}, the levels are recast in first appearance order. These objects can however be converted into one another using \code{qF/qG} or the direct method \code{as_factor_qG} (called inside \code{qF}). It is also possible to add a class 'ordered' (\code{ordered = TRUE}) and to create am extra level / integer for missing values (\code{na.exclude = FALSE}) if factors or 'qG' objects are passed to \code{qF} or \code{qG}. % Apart from that \code{qF} and \code{qG} don't do much to each others objects. } \seealso{ \code{\link{group}}, \code{\link{groupid}}, \code{\link{GRP}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ cylF <- qF(mtcars$cyl) # Factor from atomic vector cylG <- qG(mtcars$cyl) # Quick-group from atomic vector cylG # See the simple structure of this object cf <- qF(wlddev$country) # Bigger data cf2 <- qF(wlddev$country, na.exclude = FALSE) # With na.included class dat <- num_vars(wlddev) \donttest{ % No code relying on suggested package # cf2 is faster in grouped operations because no missing value check is performed library(microbenchmark) microbenchmark(fmax(dat, cf), fmax(dat, cf2)) } finteraction(mtcars$cyl, mtcars$vs) # Interacting two variables (can be factors) head(finteraction(mtcars)) # A more crude example.. finteraction(mtcars$cyl, mtcars$vs, factor = FALSE) # Returns 'qG', by default unsorted group(mtcars$cyl, mtcars$vs) # Same thing } \keyword{manip} collapse/man/roworder.Rd0000644000176200001440000001221714777170130014773 0ustar liggesusers\name{roworder} \alias{roworder} \alias{roworderv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Reordering of Data Frame Rows } \description{ A fast substitute for \code{dplyr::arrange}, based on \code{\link[=radixorder]{radixorder(v)}} and inspired by \code{data.table::setorder(v)}. It returns a sorted copy of the data frame, unless the data is already sorted in which case no copy is made. In addition, rows can be manually re-ordered. \code{roworderv} is a programmers version that takes vectors/variables as input. Use \code{data.table::setorder(v)} to sort a data frame without creating a copy. %\code{roworder} also does not support grouped tibbles or pdata.frame's, i.e. every data frame is treated the same. } \usage{ roworder(X, \dots, na.last = TRUE, verbose = .op[["verbose"]]) roworderv(X, cols = NULL, neworder = NULL, decreasing = FALSE, na.last = TRUE, pos = "front", verbose = .op[["verbose"]]) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a data frame or list of equal-length columns. } \item{\dots}{comma-separated columns of \code{X} to sort by e.g. \code{var1, var2}. Negatives i.e. \code{-var1, var2} can be used to sort in decreasing order of \code{var1}. Internally all expressions are turned into strings and \code{startsWith(expr, "-")} is used to detect this, thus it does not negate the actual values (which may as well be strings), and you cannot apply any other functions to columns inside \code{roworder()} to induce different sorting behavior.} \item{cols}{select columns to sort by using a function, column names, indices or a logical vector. The default \code{NULL} sorts by all columns in order of occurrence (from left to right). } \item{na.last}{logical. If \code{TRUE}, missing values in the sorting columns are placed last; if \code{FALSE}, they are placed first; if \code{NA} they are removed (argument passed to \code{\link{radixorder}}).} \item{decreasing}{logical. Should the sort order be increasing or decreasing? Can also be a vector of length equal to the number of arguments in \code{cols} (argument passed to \code{\link{radixorder}}).} \item{neworder}{an ordering vector, can be \code{< nrow(X)}. if \code{pos = "front"} or \code{pos = "end"}, a logical vector can also be supplied. This argument overwrites \code{cols}.} \item{pos}{integer or character. Different arrangement options if \code{!is.null(neworder) && length(neworder) < nrow(X)}. \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "front" \tab\tab move rows in \code{neworder} to the front (top) of \code{X} (the default). \cr 2 \tab\tab "end" \tab\tab move rows in \code{neworder} to the end (bottom) of \code{X}. \cr 3 \tab\tab "exchange" \tab\tab just exchange the order of rows in \code{neworder}, other rows remain in the same position. \cr 4 \tab\tab "after" \tab\tab place all further selected rows behind the first selected row. \cr } } \item{verbose}{logical. \code{1L} (default) prints a message when ordering a grouped or indexed frame, indicating that this is not efficient and encouraging reordering the data prior to the grouping/indexing step. Users can also set \code{verbose = 2L} to also toggle a message if \code{x} is already sorted, implying that no copy was made and the call to \code{roworder(v)} is redundant.} } \value{ A copy of \code{X} with rows reordered. If \code{X} is already sorted, \code{X} is simply returned. } \note{ If you don't require a copy of the data, use \code{data.table::setorder} (you can also use it in a piped call as it invisibly returns the data). \code{roworder(v)} has internal facilities to deal with \link[=GRP]{grouped} and \link[=indexing]{indexed} data. This is however inefficient (since in most cases data could be reordered before grouping/indexing), and therefore issues a message if \code{verbose > 0L}. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{colorder}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ head(roworder(airquality, Month, -Ozone)) head(roworder(airquality, Month, -Ozone, na.last = NA)) # Removes the missing values in Ozone ## Same in standard evaluation head(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE))) head(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE), na.last = NA)) ## Custom reordering head(roworderv(mtcars, neworder = 3:4)) # Bring rows 3 and 4 to the front head(roworderv(mtcars, neworder = 3:4, pos = "end")) # Bring them to the end head(roworderv(mtcars, neworder = mtcars$vs == 1)) # Bring rows with vs == 1 to the top } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/descr.Rd0000644000176200001440000002350114777170130014226 0ustar liggesusers\name{descr} \alias{descr} \alias{descr.default} \alias{descr.grouped_df} \alias{[.descr} \alias{print.descr} \alias{as.data.frame.descr} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Detailed Statistical Description of Data Frame } \description{ \code{descr} offers a fast and detailed description of each variable in a data frame. Since v1.9.0 it fully supports grouped and weighted computations. } \usage{ descr(X, \dots) \method{descr}{default}(X, by = NULL, w = NULL, cols = NULL, Ndistinct = TRUE, higher = TRUE, table = TRUE, sort.table = "freq", Qprobs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99), Qtype = 7L, label.attr = "label", stepwise = FALSE, \dots) \method{descr}{grouped_df}(X, w = NULL, Ndistinct = TRUE, higher = TRUE, table = TRUE, sort.table = "freq", Qprobs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99), Qtype = 7L, label.attr = "label", stepwise = FALSE, \dots) \method{as.data.frame}{descr}(x, \dots, gid = "Group") \method{print}{descr}(x, n = 14, perc = TRUE, digits = .op[["digits"]], t.table = TRUE, total = TRUE, compact = FALSE, summary = !compact, reverse = FALSE, stepwise = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ a (grouped) data frame or list of atomic vectors. Atomic vectors, matrices or arrays can be passed but will first be coerced to data frame using \code{\link{qDF}}. } \item{by}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{GRP}}), or a one- or two-sided formula e.g. \code{~ group1} or \code{var1 + var2 ~ group1 + group2} to group \code{X}. See Examples.} \item{w}{a numeric vector of (non-negative) weights. the default method also supports a one-sided formulas i.e. \code{~ weightcol} or \code{~ log(weightcol)}. The \code{grouped_df} method supports lazy-expressions (same without \code{~}). See Examples.} \item{cols}{select columns to describe using column names, indices a logical vector or selector function (e.g. \code{is.numeric}). \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{Ndistinct}{ logical. \code{TRUE} (default) computes the number of distinct values on all variables using \code{\link{fndistinct}}. } \item{higher}{ logical. Argument is passed down to \code{\link{qsu}}: \code{TRUE} (default) computes the skewness and the kurtosis. } \item{table}{ logical. \code{TRUE} (default) computes a (sorted) frequency table for all categorical variables (excluding \link[=is_date]{Date} variables). } \item{sort.table}{an integer or character string specifying how the frequency table should be presented: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "value" \tab\tab sort table by values. \cr 2 \tab\tab "freq" \tab\tab sort table by frequencies. \cr 3 \tab\tab "none" \tab\tab return table in first-appearance order of values, or levels for factors (most efficient). \cr } } \item{Qprobs}{ double. Probabilities for quantiles to compute on numeric variables, passed down to \code{\link{.quantile}}. If something non-numeric is passed (i.e. \code{NULL}, \code{FALSE}, \code{NA}, \code{""} etc.), no quantiles are computed. } \item{Qtype}{ integer. Quantile types 5-9 following Hyndman and Fan (1996) who recommended type 8, default 7 as in \code{\link{quantile}}. } \item{label.attr}{ character. The name of a label attribute to display for each variable (if variables are labeled). } \item{\dots}{for \code{descr}: other arguments passed to \code{\link{qsu.default}}. For \code{[.descr}: variable names or indices passed to \code{[.list}. The argument is unused in the \code{print} and \code{as.data.frame} methods.} \item{x}{an object of class 'descr'.} \item{n}{integer. The maximum number of table elements to print for categorical variables. If the number of distinct elements is \code{<= n}, the whole table is printed. Otherwise the remaining items are summed into an '... \%s Others' category.} \item{perc}{logical. \code{TRUE} (default) adds percentages to the frequencies in the table for categorical variables, and, if \code{!is.null(by)}, the percentage of observations in each group.} \item{digits}{integer. The number of decimals to print in statistics, quantiles and percentage tables.} \item{t.table}{logical. \code{TRUE} (default) prints a transposed table.} \item{total}{logical. \code{TRUE} (default) adds a 'Total' column for grouped tables (when using \code{by} argument).} \item{compact}{logical. \code{TRUE} combines statistics and quantiles to generate a more compact printout. Especially useful with groups (\code{by}).} \item{summary}{logical. \code{TRUE} (default) computes and displays a summary of the frequencies, if the size of the table for a categorical variable exceeds \code{n}.} \item{reverse}{logical. \code{TRUE} prints contents in reverse order, starting with the last column, so that the dataset can be analyzed by scrolling up the console after calling \code{descr}.} \item{stepwise}{logical. \code{TRUE} prints one variable at a time. The user needs to press [enter] to see the printout for the next variable. If called from \code{descr}, the computation is also done one variable at a time, and the finished 'descr' object is returned invisibly. } \item{gid}{character. Name assigned to the group-id column, when describing data by groups.} %\item{wsum}{\code{TRUE} sums the weights by groups and adds them as a 'WeightSum' column. Alternatively a name for the column can be supplied. } %\item{stringsAsFactors}{logical. Make factors from dimension names of 'qsu' array. Same as option to \code{\link{as.data.frame.table}}.} } \details{ \code{descr} was heavily inspired by \code{Hmisc::describe}, but is much faster and has more advanced statistical capabilities. It is principally a wrapper around \code{\link{qsu}}, \code{\link{fquantile}} (\code{.quantile}), and \code{\link{fndistinct}} for numeric variables, and computes frequency tables for categorical variables using \code{\link{qtab}}. Date variables are summarized with \code{\link{fnobs}}, \code{\link{fndistinct}} and \code{\link{frange}}. Since v1.9.0 grouped and weighted computations are fully supported. The use of sampling weights will produce a weighted mean, sd, skewness and kurtosis, and weighted quantiles for numeric data. For categorical data, tables will display the sum of weights instead of the frequencies, and percentage tables as well as the percentage of missing values indicated next to 'Statistics' in print, be relative to the total sum of weights. All this can be done by groups. Grouped (weighted) quantiles are computed using \code{\link{BY}}. For larger datasets, calling the \code{stepwise} option directly from \code{descr()} is recommended, as precomputing the statistics for all variables before digesting the results can be time consuming. %\code{\link{qsu}} itself is yet about 10x faster than \code{descr}, and is optimized for grouped, panel data and weighted statistics. It is possible to also compute grouped, panel data and/or weighted statistics with \code{descr} by passing group-ids to \code{g}, panel-ids to \code{pid} or a weight vector to \code{w}. These arguments are handed down to \code{\link{qsu.default}} and only affect the statistics natively computed by \code{qsu}, i.e. passing a weight vector produces a weighted mean, sd, skewness and kurtosis but not weighted quantiles. The list-object returned from \code{descr} can efficiently be converted to a tidy data frame using the \code{as.data.frame} method. This representation will not include frequency tables computed for categorical variables. %, and the method cannot handle arrays of statistics (applicable when \code{g} or \code{pid} arguments are passed to \code{descr}, in that case \code{as.data.frame.descr} will throw an appropriate error). } \value{ A 2-level nested list-based object of class 'descr'. The list has the same size as the dataset, and contains the statistics computed for each variable, which are themselves stored in a list containing the class, the label, the basic statistics and quantiles / tables computed for the variable (in matrix form). The object has attributes attached providing the 'name' of the dataset, the number of rows in the dataset ('N'), an attribute 'arstat' indicating whether arrays of statistics where generated by passing arguments (e.g. \code{pid}) down to \code{qsu.default}, an attribute 'table' indicating whether \code{table = TRUE} (i.e. the object could contain tables for categorical variables), and attributes 'groups' and/or 'weights' providing a \code{\link{GRP}} object and/or weight vector for grouped and/or weighted data descriptions. } \seealso{ \code{\link{qsu}}, \code{\link{qtab}}, \code{\link{fquantile}}, \code{\link{pwcor}}, \link[=summary-statistics]{Summary Statistics}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Use descr(iris) descr(wlddev) descr(GGDC10S) # Some useful print options (also try stepwise argument) print(descr(GGDC10S), reverse = TRUE, t.table = FALSE) # For bigger data consider: descr(big_data, stepwise = TRUE) # Generating a data frame as.data.frame(descr(wlddev, table = FALSE)) ## Weighted Desciptions descr(wlddev, w = ~ replace_na(POP)) # replacing NA's with 0's for fquantile() ## Grouped Desciptions descr(GGDC10S, ~ Variable) descr(wlddev, ~ income) print(descr(wlddev, ~ income), compact = TRUE) ## Grouped & Weighted Desciptions descr(wlddev, ~ income, w = ~ replace_na(POP)) ## Passing Arguments down to qsu.default: for Panel Data Statistics descr(iris, pid = iris$Species) descr(wlddev, pid = wlddev$iso3c) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{ univar }% use one of RShowDoc("KEYWORDS") collapse/man/collapse-options.Rd0000644000176200001440000004054315202504365016421 0ustar liggesusers\name{collapse-options} \alias{collapse-options} \alias{AA4-collapse-options} \alias{set_collapse} \alias{get_collapse} \alias{.op} \title{\emph{collapse} Package Options} \description{\emph{collapse} is globally configurable to an extent few packages are: the default value of key function arguments governing the behavior of its algorithms, and the exported namespace, can be adjusted interactively through the \code{set_collapse()} function. These options are saved in an internal environment called \code{.op} (for safety and performance reasons) visible in the documentation of some functions such as \code{\link{fmean}}. The contents of this environment can be accessed using \code{get_collapse()}. There are also a few options that can be set using \code{\link{options}} (retrievable using \code{\link{getOption}}). These options mainly affect package startup behavior. %Global options affecting package operation. There are 2 kinds of options, those set using \code{\link{options}} (retrievable using \code{\link{getOption}}), and those set (to avoid the performance overhead of \code{getOption()}) using \code{set_collapse()} (retrievable using \code{get_collapse()}). The latter are implemented using an \link{environment} called \code{.op} contained in the package namespace. } \usage{ set_collapse(\dots) get_collapse(opts = NULL) } \arguments{ \item{\dots}{either comma separated options, or a single list of options. The available options are: \tabular{lll}{ \code{na.rm} \tab\tab logical, default \code{TRUE}. Sets the default for statistical algorithms such as the \link[=fast-statistical-functions]{Fast Statistical Functions} to skip missing values. If your data does not have missing values, or only in rare cases, it is recommended to change this to \code{FALSE} for performance gains. \emph{Note} that this does not affect other (non-statistical) uses of \code{na.rm} arguments, such as in \code{\link{pivot}}. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{sort} \tab\tab logical, default \code{TRUE}. Sets the default for grouping operations to be sorted. This also applies to factor generation using \code{\link{qF}} and tabulation with \code{\link{qtab}}, but excludes other uses of \code{sort} arguments where grouping is not the objective (such as in \code{\link{funique}} or \code{\link{pivot}}). In general, sorted grouping (internally using \code{\link{radixorder}}) is slower than hash-based direct grouping (internally using \code{\link{group}}). However, if data is pre-sorted, sorted grouping is slightly faster. In general, if records don't need to be sorted or you want to maintain their first-appearance order, changing this to \code{FALSE} is recommended and often brings substantial performance gains. \emph{Note} that this also affects internal grouping applied when atomic vectors (except for factors) or lists are passed to \code{g} arguments in \link[=fast-statistical-functions]{Fast Statistical Functions}. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{nthreads} \tab\tab integer, default 1. Sets the default for OpenMP multithreading, available in certain statistical and data manipulation functions. Setting values greater than 1 is strongly recommended with larger datasets. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{stable.algo} \tab\tab logical, default \code{TRUE}. Option passed to \code{\link[=fvar]{fvar()/fsd()}} and \code{\link[=qsu]{qsu()}}. \code{FALSE} enables one-pass standard deviation calculation, which is very fast, but might incur catastrophic cancellation if numbers are large and the variance is small. see \code{\link{fvar}} for details. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{stub} \tab\tab logical, default \code{TRUE}. Controls whether \link[=.OPERATOR_FUN]{transformation operators} (\code{.OPERATOR_FUN}) such as \code{\link{W}}, \code{\link{L}}, \code{\link{STD}} etc. add prefixes to transformed columns of matrix and data.frame-like objects. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{verbose} \tab\tab integer, default \code{1}. Print additional (diagnostic) information or messages when executing code. Currently only used in \code{\link{join}} and \code{\link{roworder}}. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{digits} \tab\tab integer, default \code{2}. Number of digits to print, e.g. in \code{\link{descr}} or \code{\link{pwcor}}. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{mask} \tab\tab character, default \code{NULL}. Allows masking existing base R/dplyr functions with faster \emph{collapse} versions, by creating additional functions in the namespace and instantly exporting them: \cr \tab\tab \cr \tab\tab For example \code{set_collapse(mask = "unique")} (or, equivalently, \code{set_collapse(mask = "funique")}) will create \code{unique <- funique} in the \emph{collapse} namespace, export \code{unique()}, and silently detach and attach the namespace again so R can find it - all in millisecond. Thus calling \code{unique()} afterwards uses the \emph{collapse} version - which is many times faster. \code{funique} remains available and you can still call \code{base::unique} explicitly. \cr \tab\tab \cr \tab\tab All \emph{collapse} functions starting with 'f' can be passed to the option (with or without the 'f') e.g. \code{set_collapse(mask = c("subset", "transform", "droplevels"))} creates \code{subset <- fsubset}, \code{transform <- ftransform} etc. Special functions are \code{"n"} and \code{"table"/"qtab"}, and \code{"\%in\%"}, which create \code{n <- GRPN} (for use in \code{(f)summarise}/\code{(f)mutate}), \code{table <- qtab}, and replace \code{\%in\%} with a fast version using \code{\link{fmatch}}, respectively. \cr \tab\tab \cr \tab\tab There are also a couple of convenience keywords that you can use to mask groups of functions: \cr \tab\tab \cr \tab\tab - \code{"manip"} adds data manipulation functions: \code{fsubset, fslice, fslicev, ftransform, ftransform<-, ftransformv, fcompute, fcomputev, fselect, fselect<-, fgroup_by, fgroup_vars, fungroup, fsummarise, fsummarize, fmutate, frename, findex_by, findex}. \cr \tab\tab \cr \tab\tab - \code{"helper"} adds the functions: \code{fdroplevels}, \code{finteraction}, \code{fmatch}, \code{funique}, \code{fnunique}, \code{fduplicated}, \code{fcount}, \code{fcountv}, \code{fquantile}, \code{frange}, \code{fdist}, \code{fnlevels}, \code{fnrow} and \code{fncol}. \cr \tab\tab \cr % fdim not because of infinite recursion \tab\tab - \code{"special"} exports \code{n()}, \code{table()} and \code{\%in\%}. See above. \cr \tab\tab \cr \tab\tab - \code{"fast-fun"} adds the functions contained in the macro: \code{.FAST_FUN}. See also Note. \cr \tab\tab \cr \tab\tab - \code{"fast-stat-fun"} adds the functions contained in the macro: \code{.FAST_STAT_FUN}. See also Note. \cr \tab\tab \cr \tab\tab - \code{"fast-trfm-fun"} adds the functions contained in: \code{setdiff(.FAST_FUN, .FAST_STAT_FUN)}. See also Note. \cr \tab\tab \cr \tab\tab - \code{"all"} turns on all of the above.\cr \tab\tab \cr \tab\tab The re-attaching of the namespace places \emph{collapse} at the top of the search path (after the global environment), implying that all its exported functions will take priority over other libraries. Users can use \code{fastverse::fastverse_conflicts()} to check which functions are masked following \code{set_collapse(mask = ...)}. The option can be changed at any time with immediate effect. Using \code{set_collapse(mask = NULL)} removes all masked functions from the namespace, and can also be called simply to place \emph{collapse} at the top of the search path. \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \code{remove} \tab\tab character, default \code{NULL}. Similar to 'mask': allows removing functions from the exported namespace (they are still in the namespace, just no longer exported). All \emph{collapse} functions can be passed here. This argument is always evaluated after 'mask', thus you can also remove masked functions again i.e. after setting a keyword which masks a bunch of functions. There are also a couple of convenience keywords you can specify to bulk-remove certain functions: \cr \tab\tab \cr \tab\tab - \code{"shorthand"} removes function shorthands: \code{gv, gv<-, av, av<-, nv, nv<-, gvr, gvr<-, itn, ix, slt, slt<-, sbt, gby, iby, mtt, smr, tfm, tfmv, tfm<-, settfm, settfmv, rnm}. \cr \tab\tab \cr \tab\tab - \code{"infix"} removes infix functions: \code{\%!=\%, \%[!]in\%, \%[!]iin\%, \%*=\%, \%+=\%, \%-=\%, \%/=\%, \%=\%, \%==\%, \%c*\%, \%c+\%, \%c-\%, \%c/\%, \%cr\%, \%r*\%, \%r+\%, \%r-\%, \%r/\%, \%rr\%}.\cr \tab\tab \cr \tab\tab - \code{"operator"} removes functions contained in the macro: \code{.OPERATOR_FUN}.\cr \tab\tab \cr \tab\tab - \code{"old"} removes depreciated functions contained in the macro: \code{.COLLAPSE_OLD}.\cr \tab\tab \cr \tab\tab Like 'mask', the option is alterable and reversible. Specifying \code{set_collapse(remove = NULL)} restores the exported namespace. Also like 'mask', this option silently detaches and attaches \emph{collapse} again, ensuring that it is at the top of the search path. % \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr \tab\tab \cr } } \item{opts}{character. A vector of options to receive from \code{.op}, or \code{NULL} for a list of all options.} } \value{ \code{set_collapse()} returns the old content of \code{.op} invisibly as a list. \code{get_collapse()}, if called with only one option, returns the value of the option, and otherwise a list. } \note{ Setting keywords "fast-fun", "fast-stat-fun", "fast-trfm-fun" or "all" with \code{set_collapse(mask = ...)} will also adjust internal optimization flags, e.g. in \code{\link[=fsummarise]{(f)summarise}} and \code{\link[=fmutate]{(f)mutate}}, so that these functions - and all expressions containing them - receive vectorized execution (see examples of \code{\link[=fsummarise]{(f)summarise}} and \code{\link[=fmutate]{(f)mutate}}). Users should be aware of expressions like \code{fmutate(mu = sum(var) / lenth(var))}: this usually gets executed by groups, but with these keywords set,this will be vectorized (like \code{fmutate(mu = fsum(var) / lenth(var))}) implying grouped sum divided by overall length. In this case \code{fmutate(mu = base::sum(var) / lenth(var))} needs to be specified to retain the original result. \emph{Note} that passing individual functions like \code{set_collapse(mask = "(f)sum")} will \bold{not} change internal optimization flags for these functions. This is to ensure consistency i.e. you can be either all in (by setting appropriate keywords) or all out when it comes to vectorized stats with basic R names. \emph{Note} also that masking does not change documentation links, so you need to look up the f- version of a function to get the right documentation. A safe way to set options affecting startup behavior is by using a \code{\link{.Rprofile}} file in your user or project directory (see also \href{https://www.datacamp.com/doc/r/customizing}{here}, the user-level file is located at \code{file.path(Sys.getenv("HOME"), ".Rprofile")} and can be edited using \code{file.edit(Sys.getenv("HOME"), ".Rprofile")}), or by using a \href{https://fastverse.org/fastverse/articles/fastverse_intro.html#custom-fastverse-configurations-for-projects}{\code{.fastverse}} configuration file in the project directory. \code{options("collapse_remove")} does in fact remove functions from the namespace and cannot be reversed by \code{set_collapse(remove = NULL)} once the package is loaded. It is only reversed by re-loading \emph{collapse}. } \section{Options Set Using \code{options()}}{ \itemize{ \item \code{"collapse_unused_arg_action"} regulates how generic functions (such as the \link[=fast-statistical-functions]{Fast Statistical Functions}) in the package react when an unknown argument is passed to a method. The default action is \code{"warning"} which issues a warning. Other options are \code{"error"}, \code{"message"} or \code{"none"}, whereby the latter enables silent swallowing of such arguments. % \item \code{option("collapse_mask")} %none of these options will impact internal \emph{collapse} code, but they may change the way your programs run. \code{"manip"} is probably the safest option to start with. %Specifying \code{"fast-fun"}, \code{"fast-stat-fun"}, \code{"fast-trfm-fun"} or \code{"all"} are ambitious as they replace basic R functions like \code{sum} and \code{max}, introducing \emph{collapse}'s \code{na.rm = TRUE} default (which can now be changed using \code{set_collapse}) and different behavior for matrices and data frames. % These options also change some internal macros so that base R functions like \code{sum} or \code{max} called inside \code{fsummarise}, \code{fsummarize}, \code{fmutate} or \code{collap} will also receive vectorized execution. In other words, if you put \code{options(collapse_mask = "all")} before loading the package, and you have a collapse-compatible line of \emph{dplyr} code like \code{wlddev |> group_by(region, income) |> summarise(across(PCGDP:POP, sum))}, this will now receive fully optimized execution. %Note however that because of \code{collapse}'s \code{na.rm = TRUE} default, the result will be different unless you add \code{na.rm = FALSE}. % In General, this option is for your convenience, if you want to write visually more appealing code or you want to translate existing \emph{dplyr} codes to \emph{collapse}. Use with care! %Thus for production code I generally recommend not using it, unless you can ensure that the option is always set before any code is run, and that \emph{collapse} is always attached after \emph{dplyr}. \item \code{"collapse_export_F"}, if set to \code{TRUE}, exports the lead operator \code{F} in the package namespace when loading the package. The operator was exported by default until v1.9.0, but is now hidden inside the package due to too many problems with \code{base::F}. Alternatively, the operator can be accessed using \code{collapse:::F}. % \item \code{option("collapse_DT_alloccol")} sets how many empty columns \emph{collapse} data manipulation functions like \code{ftransform} allocate when taking a shallow copy of \emph{data.table}'s. The default is \code{100L}. Note that the \emph{data.table} default is \code{getOption("datatable.alloccol") = 1024L}. I chose a lower default because shallow copies are taken by each data manipulation function if you manipulate \emph{data.table}'s with collapse, and the cost increases with the number of overallocated columns. With 100 columns, the cost is 2-5 microseconds per copy. \item \code{"collapse_nthreads"}, \code{"collapse_na_rm"}, \code{"collapse_sort"}, \code{"collapse_stable_algo"}, \code{"collapse_verbose"}, \code{"collapse_digits"}, \code{"collapse_mask"} and \code{"collapse_remove"} can be set before loading the package to initialize \code{.op} with different defaults (e.g. using an \code{\link{.Rprofile}} file). Once loaded, these options have no effect, and users need to use \code{set_collapse()} to change them. See also the Note. } } \references{ Krantz S (2026). \emph{collapse}: Advanced and Fast Statistical Computing and Data Transformation in R. \emph{Journal of Statistical Software} \bold{116}(1), 1--38. \doi{10.18637/jss.v116.i01} } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link{collapse-package} } \examples{ # Setting new values oldopts <- set_collapse(nthreads = 2, na.rm = FALSE) # Getting the values get_collapse() get_collapse("nthreads") # Resetting set_collapse(oldopts) rm(oldopts) \dontrun{ ## This is a typical working setup I use: library(fastverse) # Loading other stats packages with fastverse_extend(): # displays versions, checks conflicts, and installs if unavailable fastverse_extend(qs, fixest, grf, glmnet, install = TRUE) # Now setting collapse options with some namespace modification set_collapse( nthreads = 4, sort = FALSE, mask = c("manip", "helper", "special", "mean", "scale"), remove = "old" ) # Final conflicts check (optional) fastverse_conflicts() # For some simpler scripts I also use set_collapse( nthreads = 4, sort = FALSE, mask = "all", remove = c("old", "between") # I use data.table::between > fbetween ) # This is now collapse code mtcars |> subset(mpg > 12) |> group_by(cyl) |> sum() } ## Changing what happens with unused arguments oldopts <- options(collapse_unused_arg_action = "message") # default: "warning" fmean(mtcars$mpg, bla = 1) # Now nothing happens, same as base R options(collapse_unused_arg_action = "none") fmean(mtcars$mpg, bla = 1) mean(mtcars$mpg, bla = 1) options(oldopts) rm(oldopts) } \keyword{documentation} collapse/man/fscale.Rd0000644000176200001440000002647714777170130014402 0ustar liggesusers\name{fscale} \alias{fscale} \alias{fscale.default} \alias{fscale.matrix} \alias{fscale.data.frame} \alias{fscale.pseries} \alias{fscale.pdata.frame} \alias{fscale.grouped_df} % \alias{standardize} \alias{STD} \alias{STD.default} \alias{STD.matrix} \alias{STD.data.frame} \alias{STD.pseries} \alias{STD.pdata.frame} \alias{STD.grouped_df} % - Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Grouped, Weighted) Scaling and Centering of Matrix-like Objects } \description{ \code{fscale} is a generic function to efficiently standardize (scale and center) data. \code{STD} is a wrapper around \code{fscale} representing the 'standardization operator', with more options than \code{fscale} when applied to matrices and data frames. Standardization can be simple or groupwise, ordinary or weighted. Arbitrary target means and standard deviations can be set, with special options for grouped scaling and centering. It is also possible to scale data without centering i.e. perform mean-preserving scaling. } \usage{ fscale(x, \dots) STD(x, \dots) \method{fscale}{default}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{STD}{default}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{fscale}{matrix}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{STD}{matrix}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], \dots) \method{fscale}{data.frame}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{STD}{data.frame}(x, by = NULL, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], keep.by = TRUE, keep.w = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{fscale}{pseries}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{STD}{pseries}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{fscale}{pdata.frame}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, \dots) \method{STD}{pdata.frame}(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], keep.ids = TRUE, keep.w = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fscale}{grouped_df}(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{STD}{grouped_df}(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, sd = 1, stub = .op[["stub"]], keep.group_vars = TRUE, keep.w = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector, matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}.} \item{by}{\emph{STD data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{cols}{\emph{STD (p)data.frame method}: Select columns to scale using a function, column names, indices or a logical vector. Default: All numeric columns. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{w}{a numeric vector of (non-negative) weights. \code{STD} data frame and \code{pdata.frame} methods also allow a one-sided formula i.e. \code{~ weightcol}. The \code{grouped_df} (\emph{dplyr}) method supports lazy-evaluation. See Examples.} \item{na.rm}{logical. Skip missing values in \code{x} or \code{w} when computing means and sd's.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used as group-id. 1L takes the first variable in the \link[=indexing]{index}, 2L the second etc.. Index variables can also be called by name using a character string. More than one variable can be supplied. } \item{stub}{character. A prefix/stub to add to the names of all transformed columns. \code{TRUE} (default) uses \code{"STD."}, \code{FALSE} will not rename columns.} \item{mean}{the mean to center on (default is 0). If \code{mean = FALSE}, no centering will be performed. In that case the scaling is mean-preserving. A numeric value different from 0 (i.e. \code{mean = 5}) will be added to the data after subtracting out the mean(s), such that the data will have a mean of 5. A special option when performing grouped scaling and centering is \code{mean = "overall.mean"}. In that case the overall mean of the data will be added after subtracting out group means.} \item{sd}{the standard deviation to scale the data to (default is 1). A numeric value different from 0 (i.e. \code{sd = 3}) will scale the data to have a standard deviation of 3. A special option when performing grouped scaling is \code{sd = "within.sd"}. In that case the within standard deviation (= the standard deviation of the group-centered series) will be calculated and applied to each group. The results is that the variance of the data within each group is harmonized without forcing a certain variance (such as 1).} \item{keep.by, keep.ids, keep.group_vars}{\emph{data.frame, pdata.frame and grouped_df methods}: Logical. Retain grouping / panel-identifier columns in the output. For \code{STD.data.frame} this only works if grouping variables were passed in a formula.} \item{keep.w}{\emph{data.frame, pdata.frame and grouped_df methods}: Logical. Retain column containing the weights in the output. Only works if \code{w} is passed as formula / lazy-expression.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ If \code{g = NULL}, \code{fscale} by default (column-wise) subtracts the mean or weighted mean (if \code{w} is supplied) from all data points in \code{x}, and then divides this difference by the standard deviation or frequency-weighted standard deviation. The result is that all columns in \code{x} will have a (weighted) mean 0 and (weighted) standard deviation 1. Alternatively, data can be scaled to have a mean of \code{mean} and a standard deviation of \code{sd}. If \code{mean = FALSE} the data is only scaled (not centered) such that the mean of the data is preserved. \cr Means and standard deviations are computed using Welford's numerically stable online algorithm. With groups supplied to \code{g}, this standardizing becomes groupwise, so that in each group (in each column) the data points will have mean \code{mean} and standard deviation \code{sd}. Naturally if \code{mean = FALSE} then each group is just scaled and the mean is preserved. For centering without scaling see \code{\link{fwithin}}. If \code{na.rm = FALSE} and a \code{NA} or \code{NaN} is encountered, the mean and sd for that group will be \code{NA}, and all data points belonging to that group will also be \code{NA} in the output. If \code{na.rm = TRUE}, means and sd's are computed (column-wise) on the available data points, and also the weight vector can have missing values. In that case, the weighted mean an sd are computed on (column-wise) \code{complete.cases(x, w)}, and \code{x} is scaled using these statistics. \emph{Note} that \code{fscale} will not insert a missing value in \code{x} if the weight for that value is missing, rather, that value will be scaled using a weighted mean and standard-deviated computed without itself! (The intention here is that a few (randomly) missing weights shouldn't break the computation when \code{na.rm = TRUE}, but it is not meant for weight vectors with many missing values. If you don't like this behavior, you should prepare your data using \code{x[is.na(w), ] <- NA}, or impute your weight vector for non-missing \code{x}). Special options for grouped scaling are \code{mean = "overall.mean"} and \code{sd = "within.sd"}. The former group-centers vectors on the overall mean of the data (see \code{\link{fwithin}} for more details) and the latter scales the data in each group to have the within-group standard deviation (= the standard deviation of the group-centered data). Thus scaling a grouped vector with options \code{mean = "overall.mean"} and \code{sd = "within.sd"} amounts to removing all differences in the mean and standard deviations between these groups. In weighted computations, \code{mean = "overall.mean"} will subtract weighted group-means from the data and add the overall weighted mean of the data, whereas \code{sd = "within.sd"} will compute the weighted within- standard deviation and apply it to each group. } \value{ \code{x} standardized (mean = mean, standard deviation = sd), grouped by \code{g/by}, weighted with \code{w}. See Details. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ For centering without scaling see \code{\link[=fwithin]{fwithin/W}}. For simple not mean-preserving scaling use \code{\link[=fsd]{fsd(..., TRA = "/")}}. To sweep pre-computed means and scale-factors out of data see \code{\link{TRA}}. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{fwithin}}, \code{\link{fsd}}, \code{\link{TRA}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Scaling & Centering / Standardizing head(fscale(mtcars)) # Doesn't rename columns head(STD(mtcars)) # By default adds a prefix qsu(STD(mtcars)) # See that is works qsu(STD(mtcars, mean = 5, sd = 3)) # Assigning a mean of 5 and a standard deviation of 3 qsu(STD(mtcars, mean = FALSE)) # No centering: Scaling is mean-preserving ## Panel Data head(fscale(get_vars(wlddev,9:12), wlddev$iso3c)) # Standardizing 4 series within each country head(STD(wlddev, ~iso3c, cols = 9:12)) # Same thing using STD, id's added pwcor(fscale(get_vars(wlddev,9:12), wlddev$iso3c)) # Correlaing panel series after standardizing fmean(get_vars(wlddev, 9:12)) # This calculates the overall means fsd(fwithin(get_vars(wlddev, 9:12), wlddev$iso3c)) # This calculates the within standard deviations head(qsu(fscale(get_vars(wlddev, 9:12), # This group-centers on the overall mean and wlddev$iso3c, # group-scales to the within standard deviation mean = "overall.mean", sd = "within.sd"), # -> data harmonized in the first 2 moments by = wlddev$iso3c)) ## Indexed data wldi <- findex_by(wlddev, iso3c, year) head(STD(wldi)) # Standardizing all numeric variables by country head(STD(wldi, effect = 2L)) # Standardizing all numeric variables by year ## Weighted Standardizing weights = abs(rnorm(nrow(wlddev))) head(fscale(get_vars(wlddev,9:12), wlddev$iso3c, weights)) head(STD(wlddev, ~iso3c, weights, 9:12)) # Grouped data wlddev |> fgroup_by(iso3c) |> fselect(PCGDP,LIFEEX) |> STD() wlddev |> fgroup_by(iso3c) |> fselect(PCGDP,LIFEEX) |> STD(weights) # weighted standardizing wlddev |> fgroup_by(iso3c) |> fselect(PCGDP,LIFEEX,POP) |> STD(POP) # weighting by POP -> # ..keeps the weight column unless keep.w = FALSE } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") collapse/man/quick-conversion.Rd0000644000176200001440000002262614777170130016434 0ustar liggesusers\name{quick-conversion} \alias{A4-quick-conversion} \alias{quick-conversion} \alias{qDF} \alias{qDT} \alias{qTBL} \alias{qM} \alias{mctl} \alias{mrtl} \alias{as_numeric_factor} \alias{as_integer_factor} \alias{as_character_factor} %- Also NEED an '\alias' for EACH other topic documented here. \title{Quick Data Conversion} \description{ Fast, flexible and precise conversion of common data objects, without method dispatch and extensive checks: \itemize{ \item \code{qDF}, \code{qDT} and \code{qTBL} convert vectors, matrices, higher-dimensional arrays and suitable lists to data frame, \emph{data.table} and \emph{tibble}, respectively. \item \code{qM} converts vectors, higher-dimensional arrays, data frames and suitable lists to matrix. \item \code{mctl} and \code{mrtl} column- or row-wise convert a matrix to list, data frame or \emph{data.table}. They are used internally by \code{qDF/qDT/qTBL}, \code{\link{dapply}}, \code{\link{BY}}, etc\dots \item \code{\link{qF}} converts atomic vectors to factor (documented on a separate page). \item \code{as_numeric_factor}, \code{as_integer_factor}, and \code{as_character_factor} convert factors, or all factor columns in a data frame / list, to character or numeric (by converting the levels). } } \usage{ # Converting between matrices, data frames / tables / tibbles qDF(X, row.names.col = FALSE, keep.attr = FALSE, class = "data.frame") qDT(X, row.names.col = FALSE, keep.attr = FALSE, class = c("data.table", "data.frame")) qTBL(X, row.names.col = FALSE, keep.attr = FALSE, class = c("tbl_df","tbl","data.frame")) qM(X, row.names.col = NULL , keep.attr = FALSE, class = NULL, sep = ".") # Programmer functions: matrix rows or columns to list / DF / DT - fully in C++ mctl(X, names = FALSE, return = "list") mrtl(X, names = FALSE, return = "list") # Converting factors or factor columns as_numeric_factor(X, keep.attr = TRUE) as_integer_factor(X, keep.attr = TRUE) as_character_factor(X, keep.attr = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, factor, matrix, higher-dimensional array, data frame or list. \code{mctl} and \code{mrtl} only accept matrices, \code{as_numeric_factor}, \code{as_integer_factor} and \code{as_character_factor} only accept factors, data frames or lists.} \item{row.names.col}{can be used to add an column saving names or row.names when converting objects to data frame using \code{qDF/qDT/qTBL}. \code{TRUE} will add a column \code{"row.names"}, or you can supply a name e.g. \code{row.names.col = "variable"}. If \code{X} is a named atomic vector, a length 2 vector of names can be supplied, e.g., \code{qDF(fmean(mtcars), c("car", "mean"))}. With \code{qM}, the argument has the opposite meaning, and can be used to select one or more columns in a data frame/list which will be used to create the rownames of the matrix e.g. \code{qM(iris, row.names.col = "Species")}. In this case the column(s) can be specified using names, indices, a logical vector or a selector function. See Examples.} \item{keep.attr}{logical. \code{FALSE} (default) yields a \emph{hard} / \emph{thorough} object conversion: All unnecessary attributes are removed from the object yielding a plain matrix / data.frame / \emph{data.table}. \code{FALSE} yields a \emph{soft} / \emph{minimal} object conversion: Only the attributes 'names', 'row.names', 'dim', 'dimnames' and 'levels' are modified in the conversion. Other attributes are preserved. See also \code{class}.} \item{class}{if a vector of classes is passed here, the converted object will be assigned these classes. If \code{NULL} is passed, the default classes are assigned: \code{qM} assigns no class, \code{qDF} a class \code{"data.frame"}, and \code{qDT} a class \code{c("data.table", "data.frame")}. If \code{keep.attr = TRUE} and \code{class = NULL} and the object already inherits the default classes, further inherited classes are preserved. See Details and the Example. } \item{sep}{character. Separator used for interacting multiple variables selected through \code{row.names.col}.} \item{names}{logical. Should the list be named using row/column names from the matrix?} \item{return}{an integer or string specifying what to return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "list" \tab\tab returns a plain list \cr 2 \tab\tab "data.frame" \tab\tab returns a plain data.frame \cr 3 \tab\tab "data.table" \tab\tab returns a plain \emph{data.table} \cr } } } \details{ Object conversions using these functions are maximally efficient and involve 3 consecutive steps: (1) Converting the storage mode / dimensions / data of the object, (2) converting / modifying the attributes and (3) modifying the class of the object: (1) is determined by the choice of function and the optional \code{row.names.col} argument. Higher-dimensional arrays are converted by expanding the second dimension (adding columns, same as \code{as.matrix, as.data.frame, as.data.table}). (2) is determined by the \code{keep.attr} argument: \code{keep.attr = TRUE} seeks to preserve the attributes of the object. Its effect is like copying \code{attributes(converted) <- attributes(original)}, and then modifying the \code{"dim", "dimnames", "names", "row.names"} and \code{"levels"} attributes as necessitated by the conversion task. \code{keep.attr = FALSE} only converts / assigns / removes these attributes and drops all others. (3) is determined by the \code{class} argument: Setting \code{class = "myclass"} will yield a converted object of class \code{"myclass"}, with any other / prior classes being removed by this replacement. Setting \code{class = NULL} does NOT mean that a class \code{NULL} is assigned (which would remove the class attribute), but rather that the default classes are assigned: \code{qM} assigns no class, \code{qDF} a class \code{"data.frame"}, and \code{qDT} a class \code{c("data.table", "data.frame")}. At this point there is an interaction with \code{keep.attr}: If \code{keep.attr = TRUE} and \code{class = NULL} and the object converted already inherits the respective default classes, then any other inherited classes will also be preserved (with \code{qM(x, keep.attr = TRUE, class = NULL)} any class will be preserved if \code{is.matrix(x)} evaluates to \code{TRUE}.) The default \code{keep.attr = FALSE} ensures \emph{hard} conversions so that all unnecessary attributes are dropped. Furthermore in \code{qDF/qDT/qTBL} the default classes were explicitly assigned. This is to ensure that the default methods apply, even if the user chooses to preserve further attributes. For \code{qM} a more lenient default setup was chosen to enable the full preservation of time series matrices with \code{keep.attr = TRUE}. If the user wants to keep attributes attached to a matrix but make sure that all default methods work properly, either one of \code{qM(x, keep.attr = TRUE, class = "matrix")} or \code{unclass(qM(x, keep.attr = TRUE))} should be employed. } \value{ \code{qDF} - returns a data.frame\cr \code{qDT} - returns a \emph{data.table}\cr \code{qTBL} - returns a \emph{tibble}\cr \code{qM} - returns a matrix\cr \code{mctl}, \code{mrtl} - return a list, data frame or \emph{data.table} \cr \code{qF} - returns a factor\cr \code{as_numeric_factor} - returns X with factors converted to numeric (double) variables\cr \code{as_integer_factor} - returns X with factors converted to integer variables\cr \code{as_character_factor} - returns X with factors converted to character variables } % \note{ % \code{qTBL} works similarly to \code{qDT} assigning different classes, i.e. \code{qTBL(x)} is equivalent to \code{qDT(x, class = c("tbl_df", "tbl", "data.frame"))}. Similar converters for other data frame based classes are easily created from \code{qDF} and \code{qDT}. The principle difference between them is that \code{qDF} preserves rownames whereas \code{qDT} always assigns integer rownames. % } \seealso{ \code{\link{qF}}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Basic Examples mtcarsM <- qM(mtcars) # Matrix from data.frame mtcarsDT <- qDT(mtcarsM) # data.table from matrix columns mtcarsTBL <- qTBL(mtcarsM) # tibble from matrix columns head(mrtl(mtcarsM, TRUE, "data.frame")) # data.frame from matrix rows, etc.. head(qDF(mtcarsM, "cars")) # Adding a row.names column when converting from matrix head(qDT(mtcars, "cars")) # Saving row.names when converting data frame to data.table head(qM(iris, "Species")) # Examples converting data to matrix, saving information head(qM(GGDC10S, is.character)) # as rownames head(qM(gv(GGDC10S, -(2:3)), 1:3, sep = "-")) # plm-style rownames qDF(fmean(mtcars), c("cars", "mean")) # Data frame from named vector, with names # mrtl() and mctl() are very useful for iteration over matrices # Think of a coordninates matrix e.g. from sf::st_coordinates() coord <- matrix(rnorm(10), ncol = 2, dimnames = list(NULL, c("X", "Y"))) # Then we can for (d in mrtl(coord)) { cat("lon =", d[1], ", lat =", d[2], fill = TRUE) # do something complicated ... } rm(coord) ## Factors cylF <- qF(mtcars$cyl) # Factor from atomic vector cylF # Factor to numeric conversions identical(mtcars, as_numeric_factor(dapply(mtcars, qF))) % ## Explaining the interaction of keep.attr and class. Consider the time series EuStockMarkets % plot() } \keyword{manip} \keyword{documentation} collapse/man/fmean.Rd0000644000176200001440000001436714777170130014226 0ustar liggesusers\name{fmean} \alias{fmean} \alias{fmean.default} \alias{fmean.matrix} \alias{fmean.data.frame} \alias{fmean.grouped_df} \title{Fast (Grouped, Weighted) Mean for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmean} is a generic function that computes the (column-wise) mean of \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) mean. } \usage{ fmean(x, \dots) \method{fmean}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, nthreads = .op[["nthreads"]], \dots) \method{fmean}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], \dots) \method{fmean}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], \dots) \method{fmean}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], nthreads = .op[["nthreads"]], \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{nthreads}{integer. The number of threads to utilize. See Details of \code{\link{fsum}}. } \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{stub}{character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the summed weights column is prefixed by \code{"sum."}. Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ % Non-grouped mean computations internally utilize long-doubles in C++, for additional numeric precision. % Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping them in the computation (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{mean}} which just runs through without any checks). The weighted mean is computed as \code{sum(x * w) / sum(w)}, using a single pass in C. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. %This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. %When applied to data frames with groups or \code{drop = FALSE}, \code{fmean} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed object (thus applying \code{fmean} to a factor column will give a 'malformed factor' error). The attributes of the data frame itself are also preserved. For further computational details see \code{\link{fsum}}, which works equivalently. } \value{ The (\code{w} weighted) mean of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped, weighted) mean. } \seealso{ \code{\link{fmedian}}, \code{\link{fmode}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fmean(mpg) # Simple mean fmean(mpg, w = mtcars$hp) # Weighted mean: Weighted by hp fmean(mpg, TRA = "-") # Simple transformation: demeaning (See also ?W) fmean(mpg, mtcars$cyl) # Grouped mean fmean(mpg, mtcars[8:9]) # another grouped mean. g <- GRP(mtcars[c(2,8:9)]) fmean(mpg, g) # Pre-computing groups speeds up the computation fmean(mpg, g, mtcars$hp) # Grouped weighted mean fmean(mpg, g, TRA = "-") # Demeaning by group fmean(mpg, g, mtcars$hp, "-") # Group-demeaning using weighted group means ## data.frame method fmean(mtcars) fmean(mtcars, g) fmean(fgroup_by(mtcars, cyl, vs, am)) # Another way of doing it.. head(fmean(mtcars, g, TRA = "-")) # etc.. ## matrix method m <- qM(mtcars) fmean(m) fmean(m, g) head(fmean(m, g, TRA = "-")) # etc.. ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fmean() # Ordinary mtcars |> fgroup_by(cyl,vs,am) |> fmean(hp) # Weighted mtcars |> fgroup_by(cyl,vs,am) |> fmean(hp, "-") # Weighted Transform mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg,hp) |> fmean(hp, "-") # Only mpg } \keyword{univar} \keyword{manip} collapse/man/fcount.Rd0000644000176200001440000001030115202504365014411 0ustar liggesusers\name{fcount} \alias{fcount} \alias{fcountv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Efficiently Count Observations by Group } \description{ A much faster replacement for \code{dplyr::count}. } \usage{ fcount(x, ..., w = NULL, name = "N", add = FALSE, sort = FALSE, decreasing = FALSE, drop = TRUE) fcountv(x, cols = NULL, w = NULL, name = "N", add = FALSE, sort = FALSE, drop = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a data frame or list-like object, including 'grouped_df' or 'indexed_frame'. Atomic vectors or matrices can also be passed, but will be sent through \code{\link{qDF}}. } \item{\dots}{for \code{fcount}: names or sequences of columns to count cases by - passed to \code{\link{fselect}}. For \code{fcountv}: further arguments passed to \code{\link{GRP}} (such as \code{decreasing}, \code{na.last}, \code{method}, \code{effect} etc.). Leaving this empty will count on all columns. } \item{cols}{select columns to count cases by, using column names, indices, a logical vector or a selector function (e.g. \code{is_categorical}).} \item{w}{a numeric vector of weights, may contain missing values. In \code{fcount} this can also be the (unquoted) name of a column in the data frame. \code{fcountv} also supports a single character name. \emph{Note} that the corresponding argument in \code{dplyr::count} is called \code{wt}, but \emph{collapse} has a global default for weights arguments to be called \code{w}.} \item{name}{character. The name of the column containing the count or sum of weights. \code{dplyr::count} it is called \code{"n"}, but \code{"N"} is more consistent with the rest of \emph{collapse} and \emph{data.table}.} \item{add}{\code{TRUE} adds the count column to \code{x}. Alternatively \code{add = "group_vars"} (or \code{add = "gv"} for parsimony) can be used to retain only the variables selected for counting in \code{x} and the count.} \item{sort, decreasing}{arguments passed to \code{\link{GRP}} affecting the order of rows in the output (if \code{add = FALSE}), and the algorithm used for counting. In general, \code{sort = FALSE} is faster unless data is already sorted by the columns used for counting. } \item{drop}{logical. \code{FALSE} retains zero-count rows for unobserved combinations of factor levels (analogous to \code{dplyr::count(..., .drop = FALSE)}); applies only when at least one of the counted columns is a factor. See \code{\link{GRP}} (\code{drop} argument of \code{GRP.default}).} } \value{ If \code{x} is a list, an object of the same type as \code{x} with a column (\code{name}) added at the end giving the count. Otherwise, if \code{x} is atomic, a data frame returned from \code{\link[=qDF]{qDF(x)}} with the count column added. By default (\code{add = FALSE}) only the unique rows of \code{x} of the columns used for counting are returned. } \seealso{ \code{\link{GRPN}}, \code{\link{fnobs}}, \code{\link{fndistinct}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ fcount(mtcars, cyl, vs, am) fcountv(mtcars, cols = .c(cyl, vs, am)) fcount(mtcars, cyl, vs, am, sort = TRUE) fcount(mtcars, cyl, vs, am, add = TRUE) fcount(mtcars, cyl, vs, am, add = "group_vars") ## With grouped data mtcars |> fgroup_by(cyl, vs, am) |> fcount() mtcars |> fgroup_by(cyl, vs, am) |> fcount(add = TRUE) mtcars |> fgroup_by(cyl, vs, am) |> fcount(add = "group_vars") ## With indexed data: by default counting on the first index variable wlddev |> findex_by(country, year) |> fcount() wlddev |> findex_by(country, year) |> fcount(add = TRUE) # Use fcountv to pass additional arguments to GRP.pdata.frame, # here using the effect argument to choose a different index variable wlddev |> findex_by(country, year) |> fcountv(effect = "year") wlddev |> findex_by(country, year) |> fcountv(add = "group_vars", effect = "year") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/flm.Rd0000644000176200001440000001376614777170130013720 0ustar liggesusers\name{flm} \alias{flm} \alias{flm.default} \alias{flm.formula} \title{ Fast (Weighted) Linear Model Fitting } \description{ \code{flm} is a fast linear model command that (by default) only returns a coefficient matrix. 6 different efficient fitting methods are implemented: 4 using base R linear algebra, and 2 utilizing the \emph{RcppArmadillo} and \emph{RcppEigen} packages. The function itself only has an overhead of 5-10 microseconds, and is thus well suited as a bootstrap workhorse. } \usage{ flm(...) # Internal method dispatch: default if is.atomic(..1) \method{flm}{default}(y, X, w = NULL, add.icpt = FALSE, return.raw = FALSE, method = c("lm", "solve", "qr", "arma", "chol", "eigen"), eigen.method = 3L, ...) \method{flm}{formula}(formula, data = NULL, weights = NULL, add.icpt = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{a response vector or matrix. Multiple dependent variables are only supported by methods "lm", "solve", "qr" and "chol".} \item{X}{a matrix of regressors.} \item{w}{a weight vector.} \item{add.icpt}{logical. \code{TRUE} adds an intercept column named '(Intercept)' to \code{X}.} \item{formula}{a \code{\link{lm}} formula, without factors, interaction terms or other operators (\code{:}, \code{*}, \code{^}, \code{-}, etc.), may include regular transformations e.g. \code{log(var)}, \code{cbind(y1, y2)}, \code{magrittr::multiply_by(var1, var2)}, \code{magrittr::raise_to_power(var, 2)}.} \item{data}{a named list or data frame.} \item{weights}{a weights vector or expression that results in a vector when evaluated in the \code{data} environment.} % \item{sparse}{logical. \code{TRUE} coerces \code{X} to a sparse matrix using \code{as(X, "dgCMatrix")}.} \item{return.raw}{logical. \code{TRUE} returns the original output from the different methods. For 'lm', 'arma' and 'eigen', this includes additional statistics such as residuals, fitted values or standard errors. The other methods just return coefficients but in different formats. } \item{method}{an integer or character string specifying the method of computation: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "lm" \tab\tab uses \code{\link{.lm.fit}}. \cr 2 \tab\tab "solve" \tab\tab \code{solve(crossprod(X), crossprod(X, y))}. \cr 3 \tab\tab "qr" \tab\tab \code{qr.coef(qr(X), y)}. \cr 4 \tab\tab "arma" \tab\tab uses \code{RcppArmadillo::fastLmPure}. \cr 5 \tab\tab "chol" \tab\tab \code{chol2inv(chol(crossprod(X))) \%*\% crossprod(X, y)} (quite fast, requires \code{crossprod(X)} to be positive definite i.e. problematic if multicollinearity). \cr 6 \tab\tab "eigen" \tab\tab uses \code{RcppEigen::fastLmPure} (very fast but, depending on the method, also unstable if multicollinearity). \cr } } \item{eigen.method}{integer. Select the method of computation used by \code{RcppEigen::fastLmPure}: \tabular{lll}{\emph{ Int. } \tab\tab \emph{ Description } \cr 0 \tab\tab column-pivoted QR decomposition. \cr 1 \tab\tab unpivoted QR decomposition. \cr 2 \tab\tab LLT Cholesky. \cr 3 \tab\tab LDLT Cholesky. \cr 4 \tab\tab Jacobi singular value decomposition (SVD). \cr 5 \tab\tab method based on the eigenvalue-eigenvector decomposition of X'X. \cr } See \code{vignette("RcppEigen-Introduction", package = "RcppEigen")} for details on these methods and benchmark results. Run \code{source(system.file("examples", "lmBenchmark.R", package = "RcppEigen"))} to re-run the benchmark on your machine. } \item{...}{further arguments passed to other methods. For the formula method further arguments passed to the default method. Additional arguments can also be passed to the default method e.g. \code{tol = value} to set a numerical tolerance for the solution - applicable with methods "lm", "solve" and "qr" (default is \code{1e-7}), or \code{LAPACK = TRUE} with method "qr" to use LAPACK routines to for the qr decomposition (typically faster than the LINPACK default).} } % \details{ %% ~~ If necessary, more details than the description above ~~ % } \value{ If \code{return.raw = FALSE}, a matrix of coefficients with the rows corresponding to the columns of \code{X}, otherwise the raw results from the various methods are returned. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ Method "qr" supports sparse matrices, so for an \code{X} matrix with many dummy variables consider method "qr" passing \code{as(X, "dgCMatrix")} instead of just \code{X}. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link[=HDW]{fhdwithin/HDW}}, \code{\link{fFtest}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Simple usage coef <- flm(mpg ~ hp + carb, mtcars, w = wt) # Same thing in programming usage flm(mtcars$mpg, qM(mtcars[c("hp","carb")]), mtcars$wt, add.icpt = TRUE) # Check this is correct lmcoef <- coef(lm(mpg ~ hp + carb, weights = wt, mtcars)) all.equal(drop(coef), lmcoef) # Multi-dependent variable (only some methods) flm(cbind(mpg, qsec) ~ hp + carb, mtcars, w = wt) # Returning raw results from solver: different for different methods flm(mpg ~ hp + carb, mtcars, return.raw = TRUE) flm(mpg ~ hp + carb, mtcars, method = "qr", return.raw = TRUE) \donttest{ % Need RcppArmadillo and RcppEigen # Test that all methods give the same result all_obj_equal(lapply(1:6, function(i) flm(mpg ~ hp + carb, mtcars, w = wt, method = i))) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fast-statistical-functions.Rd0000644000176200001440000002214214777170130020413 0ustar liggesusers\name{fast-statistical-functions} \alias{A1-fast-statistical-functions} \alias{fast-statistical-functions} \alias{.FAST_STAT_FUN} \alias{.FAST_FUN} \title{Fast (Grouped, Weighted) Statistical Functions for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ With \code{\link{fsum}}, \code{\link{fprod}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fmode}}, \code{\link{fvar}}, \code{\link{fsd}}, \code{\link{fmin}}, \code{\link{fmax}}, \code{\link{fnth}}, \code{\link{ffirst}}, \code{\link{flast}}, \code{\link{fnobs}} and \code{\link{fndistinct}}, \emph{collapse} presents a coherent set of extremely fast and flexible statistical functions (S3 generics) to perform column-wise, grouped and weighted computations on vectors, matrices and data frames, with special support for grouped data frames / tibbles (\emph{dplyr}) and \emph{data.table}'s. } \section{Usage}{\if{html}{\out{
}}\preformatted{ ## All functions (FUN) follow a common syntax in 4 methods: FUN(x, ...) ## Default S3 method: FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, [nthreads = 1L,] ...) ## S3 method for class 'matrix' FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE, [nthreads = 1L,] ...) ## S3 method for class 'data.frame' FUN(x, g = NULL, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = TRUE, drop = TRUE, [nthreads = 1L,] ...) ## S3 method for class 'grouped_df' FUN(x, [w = NULL,] TRA = NULL, [na.rm = TRUE,] use.g.names = FALSE, keep.group_vars = TRUE, [keep.w = TRUE,] [stub = TRUE,] [nthreads = 1L,] ...) }\if{html}{\out{
}} } \section{Arguments}{ \tabular{lll}{ \code{x} \tab \tab a vector, matrix, data frame or grouped data frame (class 'grouped_df'). \cr \code{g} \tab \tab a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}. \cr \code{w} \tab \tab a numeric vector of (non-negative) weights, may contain missing values. Supported by \code{\link{fsum}}, \code{\link{fprod}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fnth}}, \code{\link{fvar}}, \code{\link{fsd}} and \code{\link{fmode}}. \cr \code{TRA} \tab \tab an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}. \cr \code{na.rm} \tab \tab logical. Skip missing values in \code{x}. Defaults to \code{TRUE} in all functions and implemented at very little computational cost. Not available for \code{\link{fnobs}}. \cr \code{use.g.names} \tab \tab logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s. \cr \code{nthreads} \tab \tab integer. The number of threads to utilize. Supported by \code{\link{fsum}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fnth}}, \code{\link{fmode}} and \code{\link{fndistinct}}. \cr \code{drop} \tab \tab \emph{matrix and data.frame methods:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}. \cr \code{keep.group_vars} \tab \tab \emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation. By default grouping variables are added, even if not present in the grouped_df. \cr \code{keep.w} \tab \tab \emph{grouped_df method:} Logical. \code{TRUE} (default) also aggregates weights and saves them in a column, \code{FALSE} removes weighting variable after computation (if contained in \code{grouped_df}). \cr \code{stub} \tab \tab \emph{grouped_df method:} Character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the aggregated weights column is prefixed by the name of the aggregation function (mostly \code{"sum."}). Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.\cr \code{\dots} \tab \tab arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly (except for the grouped_df method which always returns visible output). \cr } } \section{Details}{ Please see the documentation of individual functions. } \section{Value}{ \code{x} suitably aggregated or transformed. Data frame column-attributes and overall attributes are generally preserved if the output is of the same data type. } \section{Related Functionality}{ \itemize{ \item Functions \code{\link{fquantile}} and \code{\link{frange}} are for atomic vectors. \item Panel-decomposed (i.e. between and within) statistics as well as grouped and weighted skewness and kurtosis are implemented in \code{\link{qsu}}. \item The vector-valued functions and operators \code{\link{fcumsum}}, \code{\link[=fscale]{fscale/STD}}, \code{\link[=fbetween]{fbetween/B}}, \code{\link[=fhdbetween]{fhdbetween/HDB}}, \code{\link[=fwithin]{fwithin/W}}, \code{\link[=fhdwithin]{fhdwithin/HDW}}, \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}} and \code{\link[=fgrowth]{fgrowth/G}} are grouped under \link[=data-transformations]{Data Transformations} and \link[=time-series-panel-series]{Time Series and Panel Series}. These functions also support \link[=indexing]{indexed data} (\emph{plm}). } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=data-transformations]{Data Transformations}, \link[=time-series-panel-series]{Time Series and Panel Series} } \section{Examples}{\if{html}{\out{
}}\preformatted{ ## default vector method mpg <- mtcars$mpg fsum(mpg) # Simple sum fsum(mpg, TRA = "/") # Simple transformation: divide all values by the sum fsum(mpg, mtcars$cyl) # Grouped sum fmean(mpg, mtcars$cyl) # Grouped mean fmean(mpg, w = mtcars$hp) # Weighted mean, weighted by hp fmean(mpg, mtcars$cyl, mtcars$hp) # Grouped mean, weighted by hp fsum(mpg, mtcars$cyl, TRA = "/") # Proportions / division by group sums fmean(mpg, mtcars$cyl, mtcars$hp, # Subtract weighted group means, see also ?fwithin TRA = "-") ## data.frame method fsum(mtcars) fsum(mtcars, TRA = "\%") # This computes percentages fsum(mtcars, mtcars[c(2,8:9)]) # Grouped column sum g <- GRP(mtcars, ~ cyl + vs + am) # Here precomputing the groups! fsum(mtcars, g) # Faster !! fmean(mtcars, g, mtcars$hp) fmean(mtcars, g, mtcars$hp, "-") # Demeaning by weighted group means.. fmean(fgroup_by(mtcars, cyl, vs, am), hp, "-") # Another way of doing it.. fmode(wlddev, drop = FALSE) # Compute statistical modes of variables in this data fmode(wlddev, wlddev$income) # Grouped statistical modes .. ## matrix method m <- qM(mtcars) fsum(m) fsum(m, g) # .. ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) mtcars |> group_by(cyl,vs,am) |> select(mpg,carb) |> fsum() mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg,carb) |> fsum() # equivalent and faster !! mtcars |> fgroup_by(cyl,vs,am) |> fsum(TRA = "\%") mtcars |> fgroup_by(cyl,vs,am) |> fmean(hp) # weighted grouped mean, save sum of weights mtcars |> fgroup_by(cyl,vs,am) |> fmean(hp, keep.group_vars = FALSE) }\if{html}{\out{
}} } \section{Benchmark}{\if{html}{\out{
}}\preformatted{ ## This compares fsum with data.table (2 threads) and base::rowsum # Starting with small data mtcDT <- qDT(mtcars) f <- qF(mtcars$cyl) library(microbenchmark) microbenchmark(mtcDT[, lapply(.SD, sum), by = f], rowsum(mtcDT, f, reorder = FALSE), fsum(mtcDT, f, na.rm = FALSE), unit = "relative") # expr min lq mean median uq max neval cld # mtcDT[, lapply(.SD, sum), by = f] 145.436928 123.542134 88.681111 98.336378 71.880479 85.217726 100 c # rowsum(mtcDT, f, reorder = FALSE) 2.833333 2.798203 2.489064 2.937889 2.425724 2.181173 100 b # fsum(mtcDT, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100 a # Now larger data tdata <- qDT(replicate(100, rnorm(1e5), simplify = FALSE)) # 100 columns with 100.000 obs f <- qF(sample.int(1e4, 1e5, TRUE)) # A factor with 10.000 groups microbenchmark(tdata[, lapply(.SD, sum), by = f], rowsum(tdata, f, reorder = FALSE), fsum(tdata, f, na.rm = FALSE), unit = "relative") # expr min lq mean median uq max neval cld # tdata[, lapply(.SD, sum), by = f] 2.646992 2.975489 2.834771 3.081313 3.120070 1.2766475 100 c # rowsum(tdata, f, reorder = FALSE) 1.747567 1.753313 1.629036 1.758043 1.839348 0.2720937 100 b # fsum(tdata, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 100 a }\if{html}{\out{
}} } \keyword{univar} \keyword{manip} \keyword{documentation} collapse/man/fast-grouping-ordering.Rd0000644000176200001440000001734714777170130017535 0ustar liggesusers\name{fast-grouping-ordering} \alias{A2-fast-grouping-ordering} \alias{fast-grouping-ordering} \title{Fast Grouping and Ordering} % \emph{collapse} \description{ \emph{collapse} provides the following functions to efficiently group and order data: \itemize{ \item \code{\link[=radixorder]{radixorder(v)}}, provides fast radix-ordering through direct access to the method \code{\link[=order]{order(..., method = "radix")}}, as well as the possibility to return some attributes very useful for grouping data and finding unique elements. The function \code{\link[=roworder]{roworder(v)}} efficiently reorders a data frame. %The source code for both \code{\link{radixorder}} and \code{\link{order(\dots, method = "radix")}, comes from \code{data.table:::forder}. %\code{\link{radixorder}} was modified to optionally return either a vector of group starts, a vector of group sizes, or both as an attribute, and also an attribute providing the size of the largest group and a logical statement on whether the input was already ordered. The function \code{\link{radixorderv}} exists as a programmers alternative. \item \code{\link[=group]{group(v)}} provides fast grouping in first-appearance order of rows, based on a hashing algorithm in C. Objects have class 'qG', see below. \item \code{\link{GRP}} creates \emph{collapse} grouping objects of class 'GRP' based on \code{\link{radixorder}} or \code{\link{group}}. 'GRP' objects form the central building block for grouped operations and programming in \emph{collapse} and are very efficient inputs to all \emph{collapse} functions supporting grouped operations. %A 'GRP' object provides information about (1) the number of groups, (2) which rows belong to which group, (3) the group sizes, (4) the unique groups, (5) the variables used for grouping, (6) whether the grouping and initial inputs were ordered and (7) (optionally) the output from \code{\link{radixorder}} containing the ordering vector with group starts and maximum group size attributes. \item \code{\link{fgroup_by}} provides a fast replacement for \code{dplyr::group_by}, creating a grouped data frame (or data.table / tibble etc.) with a 'GRP' object attached. This grouped frame can be used for grouped operations using \emph{collapse}'s fast functions. % \emph{dplyr} functions will treat this tibble like an ordinary (non-grouped) one. \item \code{\link{fmatch}} is a fast alternative to \code{\link[base]{match}}, which also supports matching of data frame rows. \item \code{\link{funique}} is a faster version of \code{\link{unique}}. The data frame method also allows selecting unique rows according to a subset of the columns. \code{\link{fnunique}} efficiently calculates the number of unique values/rows. \code{\link{fduplicated}} is a fast alternative to \code{\link{duplicated}}. \code{\link{any_duplicated}} is a simpler and faster alternative to \code{\link{anyDuplicated}}. \item \code{\link[=fcount]{fcount(v)}} computes group counts based on a subset of columns in the data, and is a fast replacement for \code{dplyr::count}. % \code{\link{fcountv}} is a programmers version of the function. \item \code{\link{qF}}, shorthand for 'quick-factor' implements very fast factor generation from atomic vectors using either radix ordering \code{method = "radix"} or hashing \code{method = "hash"}. Factors can also be used for efficient grouped programming with \emph{collapse} functions, especially if they are generated using \code{qF(x, na.exclude = FALSE)} which assigns a level to missing values and attaches a class 'na.included' ensuring that no additional missing value checks are executed by \emph{collapse} functions. \item \code{\link{qG}}, shorthand for 'quick-group', generates a kind of factor-light without the levels attribute but instead an attribute providing the number of levels. Optionally the levels / groups can be attached, but without converting them to character. Objects have a class 'qG', which is also recognized in the \emph{collapse} ecosystem. \item \code{\link{fdroplevels}} is a substantially faster replacement for \code{\link{droplevels}}. \item \code{\link{finteraction}} is a fast alternative to \code{\link{interaction}} implemented as a wrapper around \code{as_factor_GRP(GRP(\dots))}. It can be used to generate a factor from multiple vectors, factors or a list of vectors / factors. Unused factor levels are always dropped. \item \code{\link{groupid}} is a generalization of \code{data.table::rleid} providing a run-length type group-id from atomic vectors. It is generalization as it also supports passing an ordering vector and skipping missing values. For example \code{\link{qF}} and \code{\link{qG}} with \code{method = "radix"} are essentially implemented using \code{groupid(x, radixorder(x))}. \item \code{\link{seqid}} is a specialized function which creates a group-id from sequences of integer values. For any regular panel dataset \code{groupid(id, order(id, time))} and \code{seqid(time, order(id, time))} provide the same id variable. \code{\link{seqid}} is especially useful for identifying discontinuities in time-sequences. \item \code{\link{timeid}} is a specialized function to convert integer or double vectors representing time (such as 'Date', 'POSIXct' etc.) to factor or 'qG' object based on the greatest common divisor of elements (thus preserving gaps in time intervals). } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link[=radixorder]{radixorder(v)}} \tab\tab No methods, for data frames and vectors \tab\tab Radix-based ordering + grouping information \cr \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames incl. pdata.frame \tab\tab Row sorting/reordering \cr \code{\link[=group]{group(v)}} \tab\tab No methods, for data frames and vectors \tab\tab Hash-based grouping + grouping information \cr \code{\link{GRP}} \tab\tab \code{default, GRP, factor, qG, grouped_df, pseries, pdata.frame} \tab\tab Fast grouping and a flexible grouping object \cr \code{\link{fgroup_by}} \tab\tab No methods, for data frames \tab\tab Fast grouped data frame \cr \code{\link{fmatch}} \tab\tab No methods, for vectors and data frames \tab\tab Fast matching \cr \code{\link{funique}}, \code{\link{fnunique}}, \code{\link{fduplicated}}, \code{\link{any_duplicated}} \tab\tab \code{default, data.frame, sf, pseries, pdata.frame, list} \tab\tab Fast (number of) unique values/rows \cr \code{\link[=fcount]{fcount(v)}} \tab\tab Internal generic, supports vectors, matrices, data.frames, lists, grouped_df and pdata.frame \tab\tab Fast group counts \cr \code{\link{qF}} \tab\tab No methods, for vectors \tab\tab Quick factor generation \cr \code{\link{qG}} \tab\tab No methods, for vectors \tab\tab Quick grouping of vectors and a 'factor-light' class \cr \code{\link{fdroplevels}} \tab\tab \code{factor, data.frame, list} \tab\tab Fast removal of unused factor levels \cr \code{\link{finteraction}} \tab\tab No methods, for data frames and vectors \tab\tab Fast interactions \cr \code{\link{groupid}} \tab\tab No methods, for vectors \tab\tab Run-length type group-id \cr \code{\link{seqid}} \tab\tab No methods, for integer vectors \tab\tab Run-length type integer sequence-id \cr \code{\link{timeid}} \tab\tab No methods, for integer or double vectors \tab\tab Integer-id from time/date sequences \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=time-series-panel-series]{Time Series and Panel Series} } \keyword{manip} \keyword{documentation} collapse/man/fhdbetween_fhdwithin.Rd0000644000176200001440000003400414777170130017305 0ustar liggesusers\name{fhdbetween-fhdwithin} \alias{fhdbetween} \alias{fhdbetween.default} \alias{fhdbetween.matrix} \alias{fhdbetween.data.frame} \alias{fhdbetween.pseries} \alias{fhdbetween.pdata.frame} \alias{fhdwithin} \alias{fhdwithin.default} \alias{fhdwithin.matrix} \alias{fhdwithin.data.frame} \alias{fhdwithin.pseries} \alias{fhdwithin.pdata.frame} \alias{HDW} \alias{HDW.default} \alias{HDW.matrix} \alias{HDW.data.frame} \alias{HDW.pseries} \alias{HDW.pdata.frame} \alias{HDB} \alias{HDB.default} \alias{HDB.matrix} \alias{HDB.data.frame} \alias{HDB.pseries} \alias{HDB.pdata.frame} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Higher-Dimensional Centering and Linear Prediction } \description{ \code{fhdbetween} is a generalization of \code{fbetween} to efficiently predict with multiple factors and linear models (i.e. predict with vectors/factors, matrices, or data frames/lists where the latter may contain multiple factor variables). Similarly, \code{fhdwithin} is a generalization of \code{fwithin} to center on multiple factors and partial-out linear models. The corresponding operators \code{HDB} and \code{HDW} additionally allow to predict / partial out full \code{lm()} formulas with interactions between variables. } \usage{ fhdbetween(x, \dots) fhdwithin(x, \dots) HDB(x, \dots) HDW(x, \dots) \method{fhdbetween}{default}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", \dots) \method{fhdwithin}{default}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", \dots) \method{HDB}{default}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", \dots) \method{HDW}{default}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", \dots) \method{fhdbetween}{matrix}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", \dots) \method{fhdwithin}{matrix}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, lm.method = "qr", \dots) \method{HDB}{matrix}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], lm.method = "qr", \dots) \method{HDW}{matrix}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], lm.method = "qr", \dots) \method{fhdbetween}{data.frame}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, lm.method = "qr", \dots) \method{fhdwithin}{data.frame}(x, fl, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, lm.method = "qr", \dots) \method{HDB}{data.frame}(x, fl, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, stub = .op[["stub"]], lm.method = "qr", \dots) \method{HDW}{data.frame}(x, fl, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, variable.wise = FALSE, stub = .op[["stub"]], lm.method = "qr", \dots) # Methods for indexed data / compatibility with plm: \method{fhdbetween}{pseries}(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, \dots) \method{fhdwithin}{pseries}(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, \dots) \method{HDB}{pseries}(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, \dots) \method{HDW}{pseries}(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, \dots) \method{fhdbetween}{pdata.frame}(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, \dots) \method{fhdwithin}{pdata.frame}(x, effect = "all", w = NULL, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, \dots) \method{HDB}{pdata.frame}(x, effect = "all", w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, stub = .op[["stub"]], \dots) \method{HDW}{pdata.frame}(x, effect = "all", w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = TRUE, variable.wise = TRUE, stub = .op[["stub"]], \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame, 'indexed_series' ('pseries') or 'indexed_frame' ('pdata.frame').} \item{fl}{a numeric vector, factor, matrix, data frame or list (which may or may not contain factors). In the \code{HDW/HDB} data frame method \code{fl} can also be a one-or two sided \code{lm()} formula with variables contained in \code{x}. Interactions \code{(:)} and full interactions \code{(*)} are supported. See Examples and the Note.} \item{w}{a vector of (non-negative) weights.} \item{cols}{\emph{data.frame methods}: Select columns to center (partial-out) or predict using column names, indices, a logical vector or a function. Unless specified otherwise all numeric columns are selected. If \code{NULL}, all columns are selected.} \item{na.rm}{remove missing values from both \code{x} and \code{fl}. by default rows with missing values in \code{x} or \code{fl} are removed. In that case an attribute "na.rm" is attached containing the rows removed.} \item{fill}{If \code{na.rm = TRUE}, \code{fill = TRUE} will not remove rows with missing values in \code{x} or \code{fl}, but fill them with \code{NA}'s.} \item{variable.wise}{\emph{(p)data.frame methods}: Setting \code{variable.wise = TRUE} will process each column individually i.e. use all non-missing cases in each column and in \code{fl} (\code{fl} is only checked for missing values if \code{na.rm = TRUE}). This is a lot less efficient but uses all data available in each column. } \item{effect}{\emph{plm} methods: Select which panel identifiers should be used for centering. 1L takes the first variable in the \link[=indexing]{index}, 2L the second etc.. Index variables can also be called by name using a character vector. The keyword \code{"all"} uses all identifiers. } \item{stub}{character. A prefix/stub to add to the names of all transformed columns. \code{TRUE} (default) uses \code{"HDW."/"HDB."}, \code{FALSE} will not rename columns.} \item{lm.method}{character. The linear fitting method. Supported are \code{"chol"} and \code{"qr"}. See \code{\link{flm}}.} \item{\dots}{further arguments passed to \code{fixest::demean} (other than \code{notes} and \code{im_confident}) and \code{\link{chol}} / \code{\link{qr}}. Possible choices are \code{tol} to set a uniform numerical tolerance for the entire fitting process, or \code{nthreads} and \code{iter} to govern the higher-order centering process.} } \details{ \code{fhdbetween/HDB} and \code{fhdwithin/HDW} are powerful functions for high-dimensional linear prediction problems involving large factors and datasets, but can just as well handle ordinary regression problems. They are implemented as efficient wrappers around \code{\link[=fwithin]{fbetween / fwithin}}, \code{\link{flm}} and some C++ code from the \code{fixest} package that is imported for higher-order centering tasks (thus \code{fixest} needs to be installed for problems involving more than one factor). Intended areas of use are to efficiently obtain residuals and predicted values from data, and to prepare data for complex linear models involving multiple levels of fixed effects. Such models can now be fitted using \code{(g)lm()} on data prepared with \code{fhdwithin / HDW} (relying on bootstrapped SE's for inference, or implementing the appropriate corrections). See Examples. If \code{fl} is a vector or matrix, the result are identical to \code{lm} i.e. \code{fhdbetween / HDB} returns \code{fitted(lm(x ~ fl))} and \code{fhdwithin / HDW} \code{residuals(lm(x ~ fl))}. If \code{fl} is a list containing factors, all variables in \code{x} and non-factor variables in \code{fl} are centered on these factors using either \code{\link[=fwithin]{fbetween / fwithin}} for a single factor or \code{fixest} C++ code for multiple factors. Afterwards the centered data is regressed on the centered predictors. If \code{fl} is just a list of factors, \code{fhdwithin/HDW} returns the centered data and \code{fhdbetween/HDB} the corresponding means. Take as a most general example a list \code{fl = list(fct1, fct2, ..., var1, var2, ...)} where \code{fcti} are factors and \code{vari} are continuous variables. The output of \code{fhdwithin/HDW | fhdbetween/HDB} will then be identical to calling \code{resid | fitted} on \code{lm(x ~ fct1 + fct2 + ... + var1 + var2 + ...)}. The computations performed by \code{fhdwithin/HDW} and \code{fhdbetween/HDB} are however much faster and more memory efficient than \code{lm} because factors are not passed to \code{\link{model.matrix}} and expanded to matrices of dummies but projected out beforehand. The formula interface to the data.frame method (only supported by the operators \code{HDW | HDB}) provides ease of use and allows for additional modeling complexity. For example it is possible to project out formulas like \code{HDW(data, ~ fct1*var1 + fct2:fct3 + var2:fct2:fct3 + var2:var3 + poly(var5,3)*fct5)} containing simple \code{(:)} or full \code{(*)} interactions of factors with continuous variables or polynomials of continuous variables, and two-or three-way interactions of factors and continuous variables. If the formula is one-sided as in the example above (the space left of \code{(~)} is left empty), the formula is applied to all variables selected through \code{cols}. The specification provided in \code{cols} (default: all numeric variables not used in the formula) can be overridden by supplying one-or more dependent variables. For example \code{HDW(data, var1 + var2 ~ fct1 + fct2)} will return a data.frame with \code{var1} and \code{var2} centered on \code{fct1} and \code{fct2}. The special methods for 'indexed_series' (\code{plm::pseries}) and 'indexed_frame's (\code{plm::pdata.frame}) center a panel series or variables in a panel data frame on all panel-identifiers. By default in these methods \code{fill = TRUE} and \code{variable.wise = TRUE}, so missing values are kept. This change in the default arguments was done to ensure a coherent framework of functions and operators applied to \emph{plm} panel data classes. } \note{ % \subsection{Caution with full (*) and factor-continuous variable interactions:}{ % In general full interactions specified with \code{(*)} can be very slow on large data, and \code{lfe::demeanlist} is also not very speedy on interaction between factors and continuous variables, so these structures should be used with caution (don't just specify an interaction like that on a large dataset, start with smaller data and see how long computations take. Upon further updates of \code{lfe::demeanlist}, performance might improve). % } \subsection{On the differences between \code{fhdwithin/HDW}\dots and \code{fwithin/W}\dots:}{ \itemize{ \item \code{fhdwithin/HDW} can center data on multiple factors and also partial out continuous variables and factor-continuous interactions while \code{fwithin/W} only centers on one factor or the interaction of a set of factors, and does that very efficiently. \item \code{HDW(data, ~ qF(group1) + qF(group2))} simultaneously centers numeric variables in data on \code{group1} and \code{group2}, while \code{W(data, ~ group1 + group2)} centers data on the interaction of \code{group1} and \code{group2}. The equivalent operation in \code{HDW} would be: \code{HDW(data, ~ qF(group1):qF(group2))}. \item \code{W} always does computations on the variable-wise complete observations (in both matrices and data frames), whereas by default \code{HDW} removes all cases missing in either \code{x} or \code{fl}. In short, \code{W(data, ~ group1 + group2)} is actually equivalent to \code{HDW(data, ~ qF(group1):qF(group2), variable.wise = TRUE)}. \code{HDW(data, ~ qF(group1):qF(group2))} would remove any missing cases. \item \code{fbetween/B} and \code{fwithin/W} have options to fill missing cases using group-averages and to add the overall mean back to group-demeaned data. These options are not available in \code{fhdbetween/HDB} and \code{fhdwithin/HDW}. Since \code{HDB} and \code{HDW} by default remove missing cases, they also don't have options to keep grouping-columns as in \code{B} and \code{W}. } } } \value{ \code{HDB} returns fitted values of regressing \code{x} on \code{fl}. \code{HDW} returns residuals. See Details and Examples. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link[=fbetween]{fbetween, fwithin}}, \code{\link{fscale}}, \code{\link{TRA}}, \code{\link{flm}}, \code{\link{fFtest}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ HDW(mtcars$mpg, mtcars$carb) # Simple regression problems HDW(mtcars$mpg, mtcars[-1]) HDW(mtcars$mpg, qM(mtcars[-1])) head(HDW(qM(mtcars[3:4]), mtcars[1:2])) head(HDW(iris[1:2], iris[3:4])) # Partialling columns 3 and 4 out of columns 1 and 2 head(HDW(iris[1:2], iris[3:5])) # Adding the Species factor -> fixed effect head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c + qF(year))) # Partialling out 2 fixed effects head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c + qF(year), variable.wise = TRUE)) # Variable-wise head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c + qF(year) + ODA)) # Adding ODA as a continuous regressor head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c:qF(decade) + qF(year) + ODA)) # Country-decade and year FE's head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c*year)) # Country specific time trends head(HDW(wlddev, PCGDP + LIFEEX ~ iso3c*poly(year, 3))) # Country specific cubic trends # More complex examples lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear + wt:gear:carb)) lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear + wt:gear:carb, data = mtcars) lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, ~ factor(cyl)*carb + vs + wt:gear)) lm(mpg ~ hp + factor(cyl)*carb + vs + wt:gear, data = mtcars) lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, ~ cyl*carb + vs + wt:gear)) lm(mpg ~ hp + cyl*carb + vs + wt:gear, data = mtcars) lm(HDW.mpg ~ HDW.hp, data = HDW(mtcars, mpg + hp ~ cyl*carb + factor(cyl)*poly(drat,2))) lm(mpg ~ hp + cyl*carb + factor(cyl)*poly(drat,2), data = mtcars) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line \keyword{multivariate} collapse/man/list-processing.Rd0000644000176200001440000001167014777170130016257 0ustar liggesusers\name{list-processing} \alias{A8-list-processing} \alias{list-processing} \title{List Processing} % \emph{collapse} \description{ \emph{collapse} provides the following set of functions to efficiently work with lists of R objects: \itemize{ \item \bold{Search and Identification}\itemize{ \item \code{\link{is_unlistable}} checks whether a (nested) list is composed of atomic objects in all final nodes, and thus unlistable to an atomic vector using \code{\link{unlist}}. \item \code{\link{ldepth}} determines the level of nesting of the list (i.e. the maximum number of nodes of the list-tree). \item \code{\link{has_elem}} searches elements in a list using element names, regular expressions applied to element names, or a function applied to the elements, and returns \code{TRUE} if any matches were found. } \item \bold{Subsetting} \itemize{ \item \code{\link{atomic_elem}} examines the top-level of a list and returns a sublist with the atomic elements. Conversely \code{\link{list_elem}} returns the sublist of elements which are themselves lists or list-like objects. \item \code{\link{reg_elem}} and \code{\link{irreg_elem}} are recursive versions of the former. \code{\link{reg_elem}} extracts the 'regular' part of the list-tree leading to atomic elements in the final nodes, while \code{\link{irreg_elem}} extracts the 'irregular' part of the list tree leading to non-atomic elements in the final nodes. (\emph{Tip}: try calling both on an \code{lm} object). Naturally for all lists \code{l}, \code{is_unlistable(reg_elem(l))} evaluates to \code{TRUE}. \item \code{\link{get_elem}} extracts elements from a list using element names, regular expressions applied to element names, a function applied to the elements, or element-indices used to subset the lowest-level sub-lists. by default the result is presented as a simplified list containing all matching elements. With the \code{keep.tree} option however \code{\link{get_elem}} can also be used to subset lists i.e. maintain the full tree but cut off non-matching branches. } \item \bold{Splitting and Transposition} \itemize{ \item \code{\link{rsplit}} recursively splits a vector or data frame into subsets according to combinations of (multiple) vectors / factors - by default returning a (nested) list. If \code{flatten = TRUE}, the list is flattened yielding the same result as \code{\link{split}}. \code{rsplit} is also faster than \code{\link{split}}, particularly for data frames. \item \code{\link{t_list}} efficiently transposes nested lists of lists, such as those obtained from splitting a data frame by multiple variables using \code{\link{rsplit}}. } \item \bold{Apply Functions} \itemize{ \item \code{\link{rapply2d}} is a recursive version of \code{\link{lapply}} with two key differences to \code{\link{rapply}} to apply a function to nested lists of data frames or other list-based objects. } \item \bold{Unlisting / Row-Binding} \itemize{ \item \code{\link{unlist2d}} efficiently unlists unlistable lists in 2-dimensions and creates a data frame (or \emph{data.table}) representation of the list. This is done by recursively flattening and row-binding R objects in the list while creating identifier columns for each level of the list-tree and (optionally) saving the row-names of the objects in a separate column. \code{\link{unlist2d}} can thus also be understood as a recursive generalization of \code{do.call(rbind, l)}, for lists of vectors, data frames, arrays or heterogeneous objects. A simpler version for non-recursive row-binding lists of lists / data.frames, is also available by \code{\link{rowbind}}. } } } \section{Table of Functions}{ \tabular{lll}{\emph{ Function } \tab\tab \emph{ Description } \cr % \code{\link{is.regular}} \tab\tab \code{function(x) is.atomic(x) || is.list(x)} \cr \code{\link{is_unlistable}} \tab\tab Checks if list is unlistable \cr \code{\link{ldepth}} \tab\tab Level of nesting / maximum depth of list-tree \cr \code{\link{has_elem}} \tab\tab Checks if list contains a certain element \cr \code{\link{get_elem}} \tab\tab Subset list / extract certain elements \cr \code{\link{atomic_elem}} \tab\tab Top-level subset atomic elements \cr \code{\link{list_elem}} \tab\tab Top-level subset list/list-like elements \cr \code{\link{reg_elem}} \tab\tab Recursive version of \code{atomic_elem}: Subset / extract 'regular' part of list \cr \code{\link{irreg_elem}} \tab\tab Subset / extract non-regular part of list \cr \code{\link{rsplit}} \tab\tab Recursively split vectors or data frames / lists \cr \code{\link{t_list}} \tab\tab Transpose lists of lists \cr \code{\link{rapply2d}} \tab\tab Recursively apply functions to lists of data objects \cr \code{\link{unlist2d}} \tab\tab Recursively unlist/row-bind lists of data objects in 2D, to data frame or \emph{data.table} \cr \code{\link{rowbind}} \tab\tab Non-recursive binding of lists of lists / data.frames. \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview} } \keyword{list} \keyword{manip} \keyword{documentation} collapse/man/frename.Rd0000644000176200001440000001001614777170130014540 0ustar liggesusers\name{frename} \alias{rnm} \alias{frename} \alias{setrename} \alias{relabel} \alias{setrelabel} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Renaming and Relabelling Objects } \description{ \code{frename} returns a renamed shallow-copy, \code{setrename} renames objects by reference. These functions also work with objects other than data frames that have a 'names' attribute. \code{relabel} and \code{setrelabel} do that same for labels attached to data frame columns. } \usage{ frename(.x, \dots, cols = NULL, .nse = TRUE) rnm(.x, \dots, cols = NULL, .nse = TRUE) # Shorthand for frename() setrename(.x, \dots, cols = NULL, .nse = TRUE) relabel(.x, \dots, cols = NULL, attrn = "label") setrelabel(.x, \dots, cols = NULL, attrn = "label") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.x}{for \code{(f/set)rename}: an R object with a \code{"names"} attribute. For \code{(set)relabel}: a named list. } \item{\dots}{either tagged vector expressions of the form \code{name = newname} / \code{name = newlabel} (\code{frename} also supports \code{newname = name}), a (named) vector of names/labels, or a single function (+ optional arguments to the function) applied to all names/labels (of columns/elements selected in \code{cols}). } \item{cols}{If \code{\dots} is a function, select a subset of columns/elements to rename/relabel using names, indices, a logical vector or a function applied to the columns if \code{.x} is a list (e.g. \code{is.numeric}).} \item{.nse}{logical. \code{TRUE} allows non-standard evaluation of tagged vector expressions, allowing you to supply new names without quotes. Set to \code{FALSE} for programming or passing vectors of names.} \item{attrn}{character. Name of attribute to store labels or retrieve labels from.} } \value{ \code{.x} renamed / relabelled. \code{setrename} and \code{setrelabel} return \code{.x} invisibly. } \note{ Note that both \code{relabel} and \code{setrelabel} modify \code{.x} by reference. This is because labels are attached to columns themselves, making it impossible to avoid permanent modification by taking a shallow copy of the encompassing list / data.frame. On the other hand \code{frename} makes a shallow copy whereas \code{setrename} also modifies by reference. } \seealso{ \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Using tagged expressions head(frename(iris, Sepal.Length = SL, Sepal.Width = SW, Petal.Length = PL, Petal.Width = PW)) head(frename(iris, Sepal.Length = "S L", Sepal.Width = "S W", Petal.Length = "P L", Petal.Width = "P W")) ## Since v2.0.0 this is also supported head(frename(iris, SL = Sepal.Length, SW = Sepal.Width, PL = Petal.Length, PW = Petal.Width)) ## Using a function head(frename(iris, tolower)) head(frename(iris, tolower, cols = 1:2)) head(frename(iris, tolower, cols = is.numeric)) head(frename(iris, paste, "new", sep = "_", cols = 1:2)) ## Using vectors of names and programming newname = "sepal_length" head(frename(iris, Sepal.Length = newname, .nse = FALSE)) newnames = c("sepal_length", "sepal_width") head(frename(iris, newnames, cols = 1:2)) newnames = c(Sepal.Length = "sepal_length", Sepal.Width = "sepal_width") head(frename(iris, newnames, .nse = FALSE)) # Since v2.0.0, this works as well newnames = c(sepal_length = "Sepal.Length", sepal_width = "Sepal.Width") head(frename(iris, newnames, .nse = FALSE)) ## Renaming by reference # setrename(iris, tolower) # head(iris) # rm(iris) # etc... ## Relabelling (by reference) # namlab(relabel(wlddev, PCGDP = "GDP per Capita", LIFEEX = "Life Expectancy")) # namlab(relabel(wlddev, toupper)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/extract_list.Rd0000644000176200001440000001454714777170130015645 0ustar liggesusers\name{get_elem} % \alias{extract_list} \alias{atomic_elem} \alias{atomic_elem<-} \alias{list_elem} \alias{list_elem<-} \alias{reg_elem} \alias{irreg_elem} \alias{has_elem} \alias{get_elem} %- Also NEED an '\alias' for EACH other topic documented here. \title{Find and Extract / Subset List Elements} \description{ A suite of functions to subset or extract from (potentially complex) lists and list-like structures. Subsetting may occur according to certain data types, using identifier functions, element names or regular expressions to search the list for certain objects. \itemize{ \item \code{atomic_elem} and \code{list_elem} are non-recursive functions to extract and replace the atomic and sub-list elements at the top-level of the list tree. \item \code{reg_elem} is the recursive equivalent of \code{atomic_elem} and returns the 'regular' part of the list - with atomic elements in the final nodes. \code{irreg_elem} returns all the non-regular elements (i.e. call and terms objects, formulas, etc\dots). See Examples. \item \code{get_elem} returns the part of the list responding to either an identifier function, regular expression, exact element names or indices applied to all final objects. \code{has_elem} checks for the existence of an element and returns \code{TRUE} if a match is found. See Examples. } } \usage{ ## Non-recursive (top-level) subsetting and replacing atomic_elem(l, return = "sublist", keep.class = FALSE) atomic_elem(l) <- value list_elem(l, return = "sublist", keep.class = FALSE) list_elem(l) <- value ## Recursive separation of regular (atomic) and irregular (non-atomic) parts reg_elem(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) irreg_elem(l, recursive = TRUE, keep.tree = FALSE, keep.class = FALSE) ## Extract elements / subset list tree get_elem(l, elem, recursive = TRUE, DF.as.list = FALSE, keep.tree = FALSE, keep.class = FALSE, regex = FALSE, invert = FALSE, \dots) ## Check for the existence of elements has_elem(l, elem, recursive = TRUE, DF.as.list = FALSE, regex = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list.} \item{value}{a list of the same length as the extracted subset of \code{l}.} \item{elem}{a function returning \code{TRUE} or \code{FALSE} when applied to elements of \code{l}, or a character vector of element names or regular expressions (if \code{regex = TRUE}). \code{get_elem} also supports a vector or indices which will be used to subset all final objects.} \item{return}{an integer or string specifying what the selector function should return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "sublist" \tab\tab subset of list (default) \cr 2 \tab\tab "names" \tab\tab column names \cr 3 \tab\tab "indices" \tab\tab column indices \cr 4 \tab\tab "named_indices" \tab\tab named column indices \cr 5 \tab\tab "logical" \tab\tab logical selection vector \cr 6 \tab\tab "named_logical" \tab\tab named logical vector \cr } \emph{Note}: replacement functions only replace data, names are replaced together with the data. } \item{recursive}{logical. Should the list search be recursive (i.e. go though all the elements), or just at the top-level?} \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.} \item{keep.tree}{logical. \code{TRUE} always returns the entire list tree leading up to all matched results, while \code{FALSE} drops the top-level part of the tree if possible.} \item{keep.class}{logical. For list-based objects: should the class be retained? This only works if these objects have a \code{[} method that retains the class.} \item{regex}{logical. Should regular expression search be used on the list names, or only exact matches?} \item{invert}{logical. Invert search i.e. exclude matched elements from the list?} \item{\dots}{further arguments to \code{grep} (if \code{regex = TRUE}).} } \details{ For a lack of better terminology, \emph{collapse} defines 'regular' R objects as objects that are either atomic or a list. \code{reg_elem} with \code{recursive = TRUE} extracts the subset of the list tree leading up to atomic elements in the final nodes. This part of the list tree is unlistable - calling \code{is_unlistable(reg_elem(l))} will be \code{TRUE} for all lists \code{l}. Conversely, all elements left behind by \code{reg_elem} will be picked up be \code{irreg_elem}. Thus \code{is_unlistable(irreg_elem(l))} is always \code{FALSE} for lists with irregular elements (otherwise \code{irreg_elem} returns an empty list). \cr If \code{keep.tree = TRUE}, \code{reg_elem}, \code{irreg_elem} and \code{get_elem} always return the entire list tree, but cut off all of the branches not leading to the desired result. If \code{keep.tree = FALSE}, top-level parts of the tree are omitted as far as possible. For example in a nested list with three levels and one data-matrix in one of the final branches, \code{get_elem(l, is.matrix, keep.tree = TRUE)} will return a list (\code{lres}) of depth 3, from which the matrix can be accessed as \code{lres[[1]][[1]][[1]]}. This however does not make much sense. \code{get_elem(l, is.matrix, keep.tree = FALSE)} will therefore figgure out that it can drop the entire tree and return just the matrix. \code{keep.tree = FALSE} makes additional optimizations if matching elements are at far-apart corners in a nested structure, by only preserving the hierarchy if elements are above each other on the same branch. Thus for a list \code{l <- list(list(2,list("a",1)),list(1,list("b",2)))} calling \code{get_elem(l, is.character)} will just return \code{list("a","b")}. } % \value{ % } \seealso{ \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ m <- qM(mtcars) get_elem(list(list(list(m))), is.matrix) get_elem(list(list(list(m))), is.matrix, keep.tree = TRUE) l <- list(list(2,list("a",1)),list(1,list("b",2))) has_elem(l, is.logical) has_elem(l, is.numeric) get_elem(l, is.character) get_elem(l, is.character, keep.tree = TRUE) l <- lm(mpg ~ cyl + vs, data = mtcars) str(reg_elem(l)) str(irreg_elem(l)) get_elem(l, is.matrix) get_elem(l, "residuals") get_elem(l, "fit", regex = TRUE) has_elem(l, "tol") get_elem(l, "tol") } \keyword{list} \keyword{manip} collapse/man/fquantile.Rd0000644000176200001440000002101715000542453015105 0ustar liggesusers\name{fquantile} \alias{fquantile} \alias{.quantile} \alias{frange} \alias{.range} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Weighted) Sample Quantiles and Range} \description{ A faster alternative to \code{\link{quantile}} (written fully in C), that supports sampling weights, and can also quickly compute quantiles from an ordering vector (e.g. \code{order(x)}). \code{frange} provides a fast alternative to \code{\link{range}}. } \usage{ fquantile(x, probs = c(0, 0.25, 0.5, 0.75, 1), w = NULL, o = if(length(x) > 1e5L && length(probs) > log(length(x))) radixorder(x) else NULL, na.rm = .op[["na.rm"]], type = 7L, names = TRUE, check.o = is.null(attr(o, "sorted"))) # Programmers version: no names, intelligent defaults, or checks .quantile(x, probs = c(0, 0.25, 0.5, 0.75, 1), w = NULL, o = NULL, na.rm = TRUE, type = 7L, names = FALSE, check.o = FALSE) # Fast range (min and max) frange(x, na.rm = .op[["na.rm"]], finite = FALSE) .range(x, na.rm = TRUE, finite = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric or integer vector.} \item{probs}{numeric vector of probabilities with values in [0,1].} \item{w}{a numeric vector of strictly positive sampling weights. Missing weights are only supported if \code{x} is also missing.} \item{o}{integer. An vector giving the ordering of the elements in \code{x}, such that \code{identical(x[o], sort(x))}. If available this considerably speeds up the estimation.} \item{na.rm}{logical. Remove missing values, default \code{TRUE}. } \item{finite}{logical. Omit all non-finite values.} \item{type}{integer. Quantile types 4-9. See \code{\link{quantile}}. Further details are provided in \href{https://www.tandfonline.com/doi/abs/10.1080/00031305.1996.10473566}{Hyndman and Fan (1996)} who recommended type 8. The default method is type 7.} \item{names}{logical. Generates names of the form \code{paste0(round(probs * 100, 1), "\%")} (in C). Set to \code{FALSE} for speedup. } \item{check.o}{logical. If \code{o} is supplied, \code{TRUE} runs through \code{o} once and checks that it is valid, i.e. that each element is in \code{[1, length(x)]}. Set to \code{FALSE} for significant speedup if \code{o} is known to be valid. } } \details{ \code{fquantile} is implemented using a quickselect algorithm in C, inspired by \emph{data.table}'s \code{gmedian}. The algorithm is applied incrementally to different sections of the array to find individual quantiles. If many quantile probabilities are requested, sorting the whole array with the fast \code{\link{radixorder}} algorithm is more efficient. The default threshold for this (\code{length(x) > 1e5L && length(probs) > log(length(x))}) is conservative, given that quickselect is generally more efficient on longitudinal data with similar values repeated by groups. With random data, my investigations yield that a threshold of \code{length(probs) > log10(length(x))} would be more appropriate. \code{frange} is considerably more efficient than \code{\link{range}}, requiring only one pass through the data instead of two. For probabilities 0 and 1, \code{fquantile} internally calls \code{frange}. Following \href{https://www.tandfonline.com/doi/abs/10.1080/00031305.1996.10473566}{Hyndman and Fan (1996)}, the quantile type-\eqn{i} quantile function of the sample \eqn{X} can be written as a weighted average of two order statistics: \deqn{\hat{Q}_{X,i}(p) = (1 - \gamma) X_{(j)} + \gamma X_{(j + 1)}} where \eqn{j = \lfloor pn + m \rfloor,\ m \in \mathbb{R}} and \eqn{\gamma = pn + m - j,\ 0 \le \gamma \le 1}, with \eqn{m} differing by quantile type (\eqn{i}). For example, the default type 7 quantile estimator uses \eqn{m = 1 - p}, see \code{\link{quantile}}. For weighted data with normalized weights \eqn{w = \{w_1, ..., w_n\}}, where \eqn{w_k > 0} and \eqn{\sum_k w_k = 1}, let \eqn{\{w_{(1)}, ..., w_{(n)}\}} be the weights for each order statistic and \eqn{W_{(k)} = \operatorname{Weight}[X_j \le X_{(k)}] = \sum_{j=1}^k w_{(j)}} the cumulative weight for each order statistic. We can then first find the largest value \eqn{l} such that the cumulative normalized weight \eqn{W_{(l)} \leq p}, and replace \eqn{pn} with \eqn{l + (p - W_{(l)})/w_{(l+1)}}, where \eqn{w_{(l+1)}} is the weight of the next observation. This gives: \deqn{j = \lfloor l + \frac{p - W_{(l)}}{w_{(l+1)}} + m \rfloor} \deqn{\gamma = l + \frac{p - W_{(l)}}{w_{(l+1)}} + m - j} For a more detailed exposition \href{https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html}{see these excellent notes} by Matthew Kay. See also the R implementation of weighted quantiles type 7 in the Examples below. } \note{ The new weighted quantile algorithm from v2.1.0 does not skip zero weights anymore as this is technically very difficult (it is not clear if \eqn{j} hits a zero weight element whether one should move forward or backward to find an alternative). Thus, all non-missing elements are considered and weights should be strictly positive. } \value{ A vector of quantiles. If \code{names = TRUE}, \code{fquantile} generates names as \code{paste0(round(probs * 100, 1), "\%")} (in C). } %% ~Make other sections like Warning with \section{Warning }{....} ~ \author{ Sebastian Krantz based on \href{https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html}{notes} by Matthew Kay. } \references{ Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical packages, \emph{American Statistician} 50, 361–365. doi:10.2307/2684934. Wicklin, R. (2017) Sample quantiles: A comparison of 9 definitions; SAS Blog. https://blogs.sas.com/content/iml/2017/05/24/definitions-sample-quantiles.html Wikipedia: https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample Weighted Quantiles by Matthew Kay: https://htmlpreview.github.io/?https://github.com/mjskay/uncertainty-examples/blob/master/weighted-quantiles.html } \seealso{ \code{\link{fnth}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Basic range and quantiles frange(mtcars$mpg) fquantile(mtcars$mpg) ## Checking computational equivalence to stats::quantile() w = alloc(abs(rnorm(1)), 32) o = radixorder(mtcars$mpg) for (i in 5:9) print(all_obj_equal(fquantile(mtcars$mpg, type = i), fquantile(mtcars$mpg, type = i, w = w), fquantile(mtcars$mpg, type = i, o = o), fquantile(mtcars$mpg, type = i, w = w, o = o), quantile(mtcars$mpg, type = i))) ## Demonstaration: weighted quantiles type 7 in R wquantile7R <- function(x, w, probs = c(0.25, 0.5, 0.75), na.rm = TRUE, names = TRUE) { if(na.rm && anyNA(x)) { # Removing missing values (only in x) cc = whichNA(x, invert = TRUE) # The C code first calls radixorder(x), which places x = x[cc]; w = w[cc] # missing values last, so removing = early termination } o = radixorder(x) # Ordering wo = proportions(w[o]) Wo = cumsum(wo) # Cumulative sum res = sapply(probs, function(p) { l = which.max(Wo > p) - 1L # Lower order statistic s = l + (p - Wo[l])/wo[l+1L] + 1 - p j = floor(s) gamma = s - j (1 - gamma) * x[o[j]] + gamma * x[o[j+1L]] # Weighted quantile }) if(names) names(res) = paste0(as.integer(probs * 100), "\%") res } # Note: doesn't work for min and max. wquantile7R(mtcars$mpg, mtcars$wt) all.equal(wquantile7R(mtcars$mpg, mtcars$wt), fquantile(mtcars$mpg, c(0.25, 0.5, 0.75), mtcars$wt)) ## Efficient grouped quantile estimation: use .quantile for less call overhead BY(mtcars$mpg, mtcars$cyl, .quantile, names = TRUE, expand.wide = TRUE) BY(mtcars, mtcars$cyl, .quantile, names = TRUE) mtcars |> fgroup_by(cyl) |> BY(.quantile) ## With weights BY(mtcars$mpg, mtcars$cyl, .quantile, w = mtcars$wt, names = TRUE, expand.wide = TRUE) BY(mtcars, mtcars$cyl, .quantile, w = mtcars$wt, names = TRUE) mtcars |> fgroup_by(cyl) |> fselect(-wt) |> BY(.quantile, w = mtcars$wt) mtcars |> fgroup_by(cyl) |> fsummarise(across(-wt, .quantile, w = wt)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{univar} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fast-data-manipulation.Rd0000644000176200001440000001562114777170130017474 0ustar liggesusers\name{fast-data-manipulation} \alias{A3-fast-data-manipulation} \alias{fast-data-manipulation} \title{Fast Data Manipulation} \description{ \emph{collapse} provides the following functions for fast manipulation of (mostly) data frames. \itemize{ \item \code{\link{fselect}} is a much faster alternative to \code{dplyr::select} to select columns using expressions involving column names. \code{\link{get_vars}} is a more versatile and programmer friendly function to efficiently select and replace columns by names, indices, logical vectors, regular expressions, or using functions to identify columns. \item \code{\link{num_vars}}, \code{\link{cat_vars}}, \code{\link{char_vars}}, \code{\link{fact_vars}}, \code{\link{logi_vars}} and \code{\link{date_vars}} are convenience functions to efficiently select and replace columns by data type. \item \code{\link{add_vars}} efficiently adds new columns at any position within a data frame (default at the end). This can be done vie replacement (i.e. \code{add_vars(data) <- newdata}) or returning the appended data, e.g., \code{add_vars(data, newdata1, newdata2, \dots)}. It is thus also an efficient alternative to \code{\link{cbind.data.frame}}. \item \code{\link{rowbind}} efficiently combines data frames / lists row-wise. The implementation is derived from \code{data.table::rbindlist}, it is also a fast alternative to \code{\link{rbind.data.frame}}. \item \code{\link{join}} provides fast, class-agnostic, and verbose table joins. \item \code{\link{pivot}} efficiently reshapes data, supporting longer, wider and recast pivoting, as well as multi-column-pivots and pivots taking along variable labels. \item \code{\link{fsubset}} is a much faster version of \code{\link{subset}} to efficiently subset vectors, matrices and data frames. If the non-standard evaluation offered by \code{\link{fsubset}} is not needed, the function \code{\link{ss}} is a much faster and more secure alternative to \code{[.data.frame}. \item \code{\link[=fslice]{fslice(v)}} is a much faster alternative to \code{dplyr::slice_[head|tail|min|max]} for filtering/deduplicating matrix-like objects (by groups). \item \code{\link{fsummarise}} is a much faster version of \code{dplyr::summarise}, especially when used together with the \link[=fast-statistical-functions]{Fast Statistical Functions} and \code{\link{fgroup_by}}. \item \code{\link{fmutate}} is a much faster version of \code{dplyr::mutate}, especially when used together with the \link[=fast-statistical-functions]{Fast Statistical Functions}, the fast \link[=data-transformations]{Data Transformation Functions}, and \code{\link{fgroup_by}}. \item \code{\link[=ftransform]{ftransform(v)}} is a much faster version of \code{\link{transform}}, which also supports list input and nested pipelines. \code{\link[=ftransform]{settransform(v)}} does all of that by reference, i.e. it assigns to the calling environment. \code{\link[=fcompute]{fcompute(v)}} is similar to \code{\link[=ftransform]{ftransform(v)}} but only returns modified/computed columns. %As a new feature, it is now possible to bulk-process columns with \code{\link{ftransform}}, i.e. \code{ftransform(data, fscale(data[1:2]))} is the same as \code{ftransform(data, col1 = fscale(col1), col2 = fscale(col2))}, and \code{ftransform(data) <- fscale(data[1:2]))} or \code{settransform(data, fscale(data[1:2]))} are both equivalent to \code{data[1:2] <- fscale(data[1:2]))}. Non-matching columns are added to the data.frame. \item \code{\link{roworder}} is a fast substitute for \code{dplyr::arrange}, but the syntax is inspired by \code{data.table::setorder}. \item \code{\link{colorder}} efficiently reorders columns in a data frame, see also \code{data.table::setcolorder}. \item \code{\link{frename}} is a fast substitute for \code{dplyr::rename}, to efficiently rename various objects. \code{\link{setrename}} renames objects by reference. \code{\link{relabel}} and \code{\link{setrelabel}} do the same thing for variable labels (see also \code{\link{vlabels}}). } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link[=fselect]{fselect(<-)}} \tab\tab No methods, for data frames \tab\tab Fast select or replace columns (non-standard evaluation) \cr \code{\link[=get_vars]{get_vars(<-)}}, \code{\link[=num_vars]{num_vars(<-)}}, \code{\link[=cat_vars]{cat_vars(<-)}}, \code{\link[=char_vars]{char_vars(<-)}}, \code{\link[=fact_vars]{fact_vars(<-)}}, \code{\link[=logi_vars]{logi_vars(<-)}}, \code{\link[=date_vars]{date_vars(<-)}} \tab\tab No methods, for data frames \tab\tab Fast select or replace columns \cr \code{\link[=add_vars]{add_vars(<-)}} \tab\tab No methods, for data frames \tab\tab Fast add columns \cr \code{\link{rowbind}} \tab\tab No methods, for lists of lists/data frames \tab\tab Fast row-binding lists \cr \code{\link{join}} \tab\tab No methods, for data frames \tab\tab Fast table joins \cr \code{\link{pivot}} \tab\tab No methods, for data frames \tab\tab Fast reshaping \cr \code{\link{fsubset}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab Fast subset data (non-standard evaluation) \cr \code{\link{ss}} \tab\tab No methods, for data frames \tab\tab Fast subset data frames \cr \code{\link[=fslice]{fslice(v)}} \tab\tab No methods, for matrices and data frames\tab\tab Fast slicing of rows \cr \code{\link{fsummarise}} \tab\tab No methods, for data frames \tab\tab Fast data aggregation \cr \code{\link{fmutate}}, \code{\link[=ftransform]{(f/set)transform(v)(<-)}} \tab\tab No methods, for data frames \tab\tab Compute, modify or delete columns (non-standard evaluation) \cr %\code{\link{settransform}} \tab\tab No methods, for data frames \tab\tab Compute, modify or delete columns by reference (non-standard evaluation) \cr \code{\link[=fcompute]{fcompute(v)}} \tab\tab No methods, for data frames \tab\tab Compute or modify columns, returned in a new data frame (non-standard evaluation) \cr \code{\link[=roworder]{roworder(v)}} \tab\tab No methods, for data frames incl. pdata.frame \tab\tab Reorder rows and return data frame (standard and non-standard evaluation) \cr \code{\link[=colorder]{colorder(v)}} \tab\tab No methods, for data frames \tab\tab Reorder columns and return data frame (standard and non-standard evaluation) \cr \code{\link[=frename]{(f/set)rename}}, \code{\link[=frename]{(set)relabel}} \tab\tab No methods, for all objects with 'names' attribute \tab\tab Rename and return object / relabel columns in a data frame. \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=quick-conversion]{Quick Data Conversion}, \link[=recode-replace]{Recode and Replace Values} } \keyword{manip} \keyword{documentation} collapse/man/across.Rd0000644000176200001440000001574314777170130014431 0ustar liggesusers\name{across} \alias{across} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Apply Functions Across Multiple Columns } \description{ \code{across()} can be used inside \code{\link{fmutate}} and \code{\link{fsummarise}} to apply one or more functions to a selection of columns. It is overall very similar to \code{dplyr::across}, but does not support some \code{rlang} features, has some additional features (arguments), and is optimized to work with \emph{collapse}'s, \code{\link{.FAST_FUN}}, yielding much faster computations. } \usage{ across(.cols = NULL, .fns, ..., .names = NULL, .apply = "auto", .transpose = "auto") # acr(...) can be used to abbreviate across(...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.cols}{select columns using column names and expressions (e.g. \code{a:b} or \code{c(a, b, c:f)}), column indices, logical vectors, or functions yielding a logical value e.g. \code{is.numeric}. \code{NULL} applies functions to all columns except for grouping columns.} \item{.fns}{A function, character vector of functions or list of functions. Vectors / lists can be named to yield alternative names in the result (see \code{.names}). This argument is evaluated inside \code{substitute()}, and the content (not the names of vectors/lists) is checked against \code{.FAST_FUN} and \code{.OPERATOR_FUN}. Matching functions receive vectorized execution, other functions are applied to the data in a standard way.} \item{\dots}{further arguments to \code{.fns}. Arguments are evaluated in the data environment and split by groups as well (for non-vectorized functions, if of the same length as the data).} \item{.names}{controls the naming of computed columns. \code{NULL} generates names of the form \code{coli_funj} if multiple functions are used. \code{.names = TRUE} enables this for a single function, \code{.names = FALSE} disables it for multiple functions (sensible for functions such as \code{.OPERATOR_FUN} that rename columns (if \code{.apply = FALSE})). Setting \code{.names = "flip"} generates names of the form \code{funj_coli}. It is also possible to supply a function with two arguments for column and function names e.g. \code{function(c, f) paste0(f, "_", c)}. Finally, you can supply a custom vector of names which must match \code{length(.cols) * length(.fns)}.} \item{.apply}{controls whether functions are applied column-by-column (\code{TRUE}) or to multiple columns at once (\code{FALSE}). The default, \code{"auto"}, does the latter for vectorized functions, which have an efficient data frame method. It can also be sensible to use \code{.apply = FALSE} for non-vectorized functions, especially multivariate functions like \code{\link{lm}} or \code{\link{pwcor}}, or functions renaming the data. See Examples. } \item{.transpose}{with multiple \code{.fns}, \code{.transpose} controls whether the result is ordered first by column, then by function (\code{TRUE}), or vice-versa (\code{FALSE}). \code{"auto"} does the former if all functions yield results of the same dimensions (dimensions may differ if \code{.apply = FALSE}). See Examples.} } \note{ \code{across()} does not support \emph{purr}-style lambdas, and does not support \code{dplyr}-style predicate functions e.g. \code{across(where(is.numeric), sum)}, simply use \code{across(is.numeric, sum)}. In contrast to \code{dplyr}, you can also compute on grouping columns. Also \emph{note} that \code{across()} is NOT a function in \emph{collapse} but a known expression that is internally transformed by \code{fsummarise()/fmutate()} into something else. Thus, it cannot be called using qualified names, i.e., \code{collapse::across()} does not work and is not necessary if \emph{collapse} is not attached. %In general, my mission with \code{collapse} is not to create a \code{dplyr}-clone, but to take some of the useful features and make them robust and fast using base R and C/C++, with the aim of having a stable API. So don't ask me to implement the latest \emph{dplyr} feature, unless you firmly believe it is very useful and will be around 10 years from now. } \seealso{ \code{\link{fsummarise}}, \code{\link{fmutate}}, \link[=fast-data-manipulation]{Fast Data Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Basic (Weighted) Summaries fsummarise(wlddev, across(PCGDP:GINI, fmean, w = POP)) wlddev |> fgroup_by(region, income) |> fsummarise(across(PCGDP:GINI, fmean, w = POP)) # Note that for these we don't actually need across... fselect(wlddev, PCGDP:GINI) |> fmean(w = wlddev$POP, drop = FALSE) wlddev |> fgroup_by(region, income) |> fselect(PCGDP:GINI, POP) |> fmean(POP, keep.w = FALSE) collap(wlddev, PCGDP + LIFEEX + GINI ~ region + income, w = ~ POP, keep.w = FALSE) # But if we want to use some base R function that reguires argument splitting... wlddev |> na_omit(cols = "POP") |> fgroup_by(region, income) |> fsummarise(across(PCGDP:GINI, weighted.mean, w = POP, na.rm = TRUE)) # Or if we want to apply different functions... wlddev |> fgroup_by(region, income) |> fsummarise(across(PCGDP:GINI, list(mu = fmean, sd = fsd), w = POP), POP_sum = fsum(POP), OECD = fmean(OECD)) # Note that the above still detects fmean as a fast function, the names of the list # are irrelevant, but the function name must be typed or passed as a character vector, # Otherwise functions will be executed by groups e.g. function(x) fmean(x) won't vectorize # Same, naming in a different way wlddev |> fgroup_by(region, income) |> fsummarise(across(PCGDP:GINI, list(mu = fmean, sd = fsd), w = POP, .names = "flip"), sum_POP = fsum(POP), OECD = fmean(OECD)) # Or we want to do more advanced things.. # Such as nesting data frames.. qTBL(wlddev) |> fgroup_by(region, income) |> fsummarise(across(c(PCGDP, LIFEEX, ODA), function(x) list(Nest = list(x)), .apply = FALSE)) # Or linear models.. qTBL(wlddev) |> fgroup_by(region, income) |> fsummarise(across(c(PCGDP, LIFEEX, ODA), function(x) list(Mods = list(lm(PCGDP ~., x))), .apply = FALSE)) # Or cumputing grouped correlation matrices qTBL(wlddev) |> fgroup_by(region, income) |> fsummarise(across(c(PCGDP, LIFEEX, ODA), function(x) qDF(pwcor(x), "Variable"), .apply = FALSE)) # Here calculating 1- and 10-year lags and growth rates of these variables qTBL(wlddev) |> fgroup_by(country) |> fmutate(across(c(PCGDP, LIFEEX, ODA), list(L, G), n = c(1, 10), t = year, .names = FALSE)) # Same but variables in different order qTBL(wlddev) |> fgroup_by(country) |> fmutate(across(c(PCGDP, LIFEEX, ODA), list(L, G), n = c(1, 10), t = year, .names = FALSE, .transpose = FALSE)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/psacf.Rd0000644000176200001440000001640114777170130014223 0ustar liggesusers\name{psacf} \alias{psacf} \alias{psacf.default} \alias{psacf.pseries} \alias{psacf.data.frame} \alias{psacf.pdata.frame} \alias{pspacf} \alias{pspacf.default} \alias{pspacf.pseries} \alias{pspacf.data.frame} \alias{pspacf.pdata.frame} \alias{psccf} \alias{psccf.default} \alias{psccf.pseries} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Auto- and Cross- Covariance and Correlation Function Estimation for Panel Series } \description{ \code{psacf}, \code{pspacf} and \code{psccf} compute (and by default plot) estimates of the auto-, partial auto- and cross- correlation or covariance functions for panel series. They are analogues to \code{\link{acf}}, \code{\link{pacf}} and \code{\link{ccf}}. } \usage{ psacf(x, \dots) pspacf(x, \dots) psccf(x, y, \dots) \method{psacf}{default}(x, g, t = NULL, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, \dots) \method{pspacf}{default}(x, g, t = NULL, lag.max = NULL, plot = TRUE, gscale = TRUE, \dots) \method{psccf}{default}(x, y, g, t = NULL, lag.max = NULL, type = c("correlation", "covariance"), plot = TRUE, gscale = TRUE, \dots) \method{psacf}{data.frame}(x, by, t = NULL, cols = is.numeric, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, \dots) \method{pspacf}{data.frame}(x, by, t = NULL, cols = is.numeric, lag.max = NULL, plot = TRUE, gscale = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{psacf}{pseries}(x, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, \dots) \method{pspacf}{pseries}(x, lag.max = NULL, plot = TRUE, gscale = TRUE, \dots) \method{psccf}{pseries}(x, y, lag.max = NULL, type = c("correlation", "covariance"), plot = TRUE, gscale = TRUE, \dots) \method{psacf}{pdata.frame}(x, cols = is.numeric, lag.max = NULL, type = c("correlation", "covariance","partial"), plot = TRUE, gscale = TRUE, \dots) \method{pspacf}{pdata.frame}(x, cols = is.numeric, lag.max = NULL, plot = TRUE, gscale = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x, y}{a numeric vector, 'indexed_series' ('pseries'), data frame or 'indexed_frame' ('pdata.frame').} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}.} \item{by}{\emph{data.frame method}: Same input as \code{g}, but also allows one- or two-sided formulas using the variables in \code{x}, i.e. \code{~ idvar} or \code{var1 + var2 ~ idvar1 + idvar2}.} \item{t}{a time vector or list of vectors. See \code{\link{flag}}.} \item{cols}{\emph{data.frame method}: Select columns using a function, column names, indices or a logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{lag.max}{integer. Maximum lag at which to calculate the acf. Default is \code{2*sqrt(length(x)/ng)} where \code{ng} is the number of groups in the panel series / supplied to \code{g}.} \item{type}{character. String giving the type of acf to be computed. Allowed values are "correlation" (the default), "covariance" or "partial".} \item{plot}{logical. If \code{TRUE} (default) the acf is plotted.} \item{gscale}{logical. Do a groupwise scaling / standardization of \code{x, y} (using \code{\link{fscale}} and the groups supplied to \code{g}) before computing panel-autocovariances / correlations. See Details.} \item{\dots}{further arguments to be passed to \code{\link{plot.acf}}.} } \details{ If \code{gscale = TRUE} data are standardized within each group (using \code{\link{fscale}}) such that the group-mean is 0 and the group-standard deviation is 1. This is strongly recommended for most panels to get rid of individual-specific heterogeneity which would corrupt the ACF computations. After scaling, \code{psacf}, \code{pspacf} and \code{psccf} compute the ACF/CCF by creating a matrix of panel-lags of the series using \code{\link{flag}} and then computing the covariance of this matrix with the series (\code{x, y}) using \code{\link{cov}} and pairwise-complete observations, and dividing by the variance (of \code{x, y}). Creating the lag matrix may require a lot of memory on large data, but passing a sequence of lags to \code{\link{flag}} and thus calling \code{\link{flag}} and \code{\link{cov}} one time is generally much faster than calling them \code{lag.max} times. The partial ACF is computed from the ACF using a Yule-Walker decomposition, in the same way as in \code{\link{pacf}}. } \value{ An object of class 'acf', see \code{\link{acf}}. The result is returned invisibly if \code{plot = TRUE}.} % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } %\note{ % \code{psacf} does not compute the true ACF defined as dividing the autocorvariance function of \code{x} by the variance of \code{x}. Rather, for reasons having to do with computational efficiency and assuming use on larger panel-vectors, \code{psacf} simply uses \code{\link{cor} to correlate \code{x} with its lags (thus dividing the autocovariance by the product of the standard deviations of \code{x} and its lag). % For \code{plm::pseries} and \code{plm::pdata.frame}, the first index variable is assumed to be the group-id and the second the time variable. If more than 2 index variables are attached to \code{plm::pseries}, the last one is taken as the time variable and the others are taken as group-id's and interacted. %The \code{pdata.frame} method only works for properly subsetted objects of class 'pdata.frame'. A list of 'pseries' will not work. %} %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ ## World Development Panel Data head(wlddev) # See also help(wlddev) psacf(wlddev$PCGDP, wlddev$country, wlddev$year) # ACF of GDP per Capita psacf(wlddev, PCGDP ~ country, ~year) # Same using data.frame method psacf(wlddev$PCGDP, wlddev$country) # The Data is sorted, can omit t pspacf(wlddev$PCGDP, wlddev$country) # Partial ACF psccf(wlddev$PCGDP, wlddev$LIFEEX, wlddev$country) # CCF with Life-Expectancy at Birth psacf(wlddev, PCGDP + LIFEEX + ODA ~ country, ~year) # ACF and CCF of GDP, LIFEEX and ODA psacf(wlddev, ~ country, ~year, c(9:10,12)) # Same, using cols argument pspacf(wlddev, ~ country, ~year, c(9:10,12)) # Partial ACF ## Using indexed data: wldi <- findex_by(wlddev, iso3c, year) # Creating a indexed frame PCGDP <- wldi$PCGDP # Indexed Series of GDP per Capita LIFEEX <- wldi$LIFEEX # Indexed Series of Life Expectancy psacf(PCGDP) # Same as above, more parsimonious pspacf(PCGDP) psccf(PCGDP, LIFEEX) psacf(wldi[c(9:10,12)]) pspacf(wldi[c(9:10,12)]) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} collapse/man/BY.Rd0000644000176200001440000002143114777170130013440 0ustar liggesusers\name{BY} \alias{BY} \alias{BY.default} \alias{BY.matrix} \alias{BY.data.frame} \alias{BY.grouped_df} \title{ Split-Apply-Combine Computing % (Efficient) } \description{ \code{BY} is an S3 generic that efficiently applies functions over vectors or matrix- and data frame columns by groups. Similar to \code{\link{dapply}} it seeks to retain the structure and attributes of the data, but can also output to various standard formats. A simple parallelism is also available. } \usage{ BY(x, \dots) \method{BY}{default}(x, g, FUN, \dots, use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "vector", "list")) \method{BY}{matrix}(x, g, FUN, \dots, use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) \method{BY}{data.frame}(x, g, FUN, \dots, use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE, expand.wide = FALSE, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame", "list")) \method{BY}{grouped_df}(x, FUN, \dots, reorder = TRUE, keep.group_vars = TRUE, use.g.names = FALSE) } \arguments{ \item{x}{a vector, matrix, data frame or alike object.} \item{g}{a \code{\link{GRP}} object, or a factor / atomic vector / list of atomic vectors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{FUN}{a function, can be scalar- or vector-valued. For vector valued functions see also \code{reorder} and \code{expand.wide}.} \item{\dots}{further arguments to \code{FUN}, or to \code{BY.data.frame} for the 'grouped_df' method. Since v1.9.0 data length arguments are also split by groups.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). For vector-valued functions (row-)names are only generated if the function itself creates names for the statistics e.g. \code{quantile()} adds names, \code{range()} or \code{log()} don't. No row-names are generated on \emph{data.table}'s.} \item{sort}{logical. Sort the groups? Internally passed to \code{\link{GRP}}, and only effective if \code{g} is not already a factor or \code{\link{GRP}} object.} \item{reorder}{logical. If a vector-valued function is passed that preserves the data length, \code{TRUE} will reorder the result such that the elements/rows match the original data. \code{FALSE} just combines the data in order of the groups (i.e. all elements of the first group in first-appearance order followed by all elements in the second group etc..). \emph{Note} that if \code{reorder = FALSE}, grouping variables, names or rownames are only retained if the grouping is on sorted data, see \code{\link{GRP}}. } \item{expand.wide}{logical. If \code{FUN} is a vector-valued function returning a vector of fixed length > 1 (such as the \code{\link{quantile}} function), \code{expand.wide} can be used to return the result in a wider format (instead of stacking the resulting vectors of fixed length above each other in each output column).} \item{parallel}{logical. \code{TRUE} implements simple parallel execution by internally calling \code{\link{mclapply}} instead of \code{\link{lapply}}. Parallelism is across columns, except for the default method.} \item{mc.cores}{integer. Argument to \code{\link{mclapply}} indicating the number of cores to use for parallel execution. Can use \code{\link[=detectCores]{detectCores()}} to select all available cores.} \item{return}{an integer or string indicating the type of object to return. The default \code{1 - "same"} returns the same object type (i.e. class and other attributes are retained if the underlying data type is the same, just the names for the dimensions are adjusted). \code{2 - "matrix"} always returns the output as matrix, \code{3 - "data.frame"} always returns a data frame and \code{4 - "list"} returns the raw (uncombined) output. \emph{Note}: \code{4 - "list"} works together with \code{expand.wide} to return a list of matrices.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation. See also the Note. } % \item{simplify}{logical. Simplify the result to return an object of the same class and with the same attributes. If \code{FALSE}, the raw computation retult in form of a (nested) list is returned.} } \details{ \code{BY} is a re-implementation of the Split-Apply-Combine computing paradigm. It is faster than \code{\link{tapply}}, \code{\link{by}}, \code{\link{aggregate}} and \emph{(d)plyr}, and preserves data attributes just like \code{\link{dapply}}. % and more versatile though not faster than \emph{dplyr} %I note at this point that the philosophy of \emph{collapse} is to move beyond this rather slow computing paradigm, which is why the \link[=fast-statistical-functions]{Fast Statistical Functions} were implemented. However sometimes tasks need to be performed that involve more complex and customized operations on data, and for these cases \code{BY} is a good solution. It is principally a wrapper around \code{lapply(gsplit(x, g), FUN, \dots)}, that uses \code{\link{gsplit}} for optimized splitting and also strongly optimizes on the internal code compared to \emph{base} R functions. For more details look at the documentation for \code{\link{dapply}} which works very similar (apart from the splitting performed in \code{BY}). The function is intended for simple cases involving flexible computation of statistics across groups using a single function e.g. \code{iris |> gby(Species) |> BY(IQR)} is simpler than \code{iris |> gby(Species) |> smr(acr(.fns = IQR))} etc.. % For larger tasks, the \link[=fast-statistical-functions]{Fast Statistical Functions} or the \emph{data.table} package are more appropriate tools. } %\note{ %\code{BY} can be used with vector-valued functions preserving the length of the data, note however that, unlike \code{\link{fmutate}}, data is recombined in the order of the groups, not in the order of the original data. It is thus advisable to sort the data by the grouping variable before using \code{BY} with such a function. In particular, in such cases the 'grouped_df' method only keeps grouping columns if data was grouped with \code{fgroup_by(data, ..., sort = TRUE)}, and the grouping algorithm detected that the data is already sorted in the order of the groups (i.e. if \code{attr(with(data, radixorder(...)), "sorted")} is \code{TRUE}), even if \code{keep.group_vars = TRUE}. The same holds for preservation names / rownames in the default, matrix or data frame methods. Basically, \code{BY} is kept as simple as possible without running danger of returning something wrong. %} \value{ \code{X} where \code{FUN} was applied to every column split by \code{g}. } \seealso{ \code{\link{dapply}}, \code{\link{collap}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ v <- iris$Sepal.Length # A numeric vector g <- GRP(iris$Species) # A grouping ## default vector method BY(v, g, sum) # Sum by species head(BY(v, g, scale)) # Scale by species (please use fscale instead) BY(v, g, fquantile) # Species quantiles: by default stacked BY(v, g, fquantile, expand.wide = TRUE) # Wide format ## matrix method m <- qM(num_vars(iris)) BY(m, g, sum) # Also return as matrix BY(m, g, sum, return = "data.frame") # Return as data.frame.. also works for computations below head(BY(m, g, scale)) BY(m, g, fquantile) BY(m, g, fquantile, expand.wide = TRUE) ml <- BY(m, g, fquantile, expand.wide = TRUE, # Return as list of matrices return = "list") ml # Unlisting to Data Frame unlist2d(ml, idcols = "Variable", row.names = "Species") ## data.frame method BY(num_vars(iris), g, sum) # Also returns a data.fram BY(num_vars(iris), g, sum, return = 2) # Return as matrix.. also works for computations below head(BY(num_vars(iris), g, scale)) BY(num_vars(iris), g, fquantile) BY(num_vars(iris), g, fquantile, expand.wide = TRUE) BY(num_vars(iris), g, fquantile, # Return as list of matrices expand.wide = TRUE, return = "list") ## grouped data frame method giris <- fgroup_by(iris, Species) giris |> BY(sum) # Compute sum giris |> BY(sum, use.g.names = TRUE, # Use row.names and keep.group_vars = FALSE) # remove 'Species' and groups attribute giris |> BY(sum, return = "matrix") # Return matrix giris |> BY(sum, return = "matrix", # Matrix with row.names use.g.names = TRUE) giris |> BY(.quantile) # Compute quantiles (output is stacked) giris |> BY(.quantile, names = TRUE, # Wide output expand.wide = TRUE) } \keyword{manip} collapse/man/summary-statistics.Rd0000644000176200001440000000733215202400476017010 0ustar liggesusers\name{summary-statistics} % \name{Time Series and Panel Computations} \alias{A9-summary-statistics} \alias{summary-statistics} % \alias{tscomp} \title{Summary Statistics} % \emph{collapse} \description{ \emph{collapse} provides the following functions to efficiently summarize and examine data: \itemize{ \item \code{\link{qsu}}, shorthand for quick-summary, is an extremely fast summary command inspired by the (xt)summarize command in the STATA statistical software. It computes a set of 7 statistics (nobs, mean, sd, min, max, skewness and kurtosis) using a numerically stable one-pass method. Statistics can be computed weighted, by groups, and also within-and between entities (for multilevel / panel data). \item \code{\link{qtab}}, shorthand for quick-table, is a faster and more versatile alternative to \code{\link{table}}. Notably, it also supports tabulations with frequency weights, as well as computing a statistic over combinations of variables. 'qtab's inherit the 'table' class, allowing for seamless application of 'table' methods. \item \code{\link{descr}} computes a concise and detailed description of a data frame, including (sorted) frequency tables for categorical variables and various statistics and quantiles for numeric variables. It is inspired by \code{Hmisc::describe}, but about 10x faster. \item \code{\link{pwcor}}, \code{\link{pwcov}} and \code{\link{pwnobs}} compute (weighted) pairwise correlations, covariances and observation counts on matrices and data frames. Pairwise correlations and covariances can be computed together with observation counts and p-values. The elaborate print method displays all of these statistics in a single correlation table. \item \code{\link{varying}} very efficiently checks for the presence of any variation in data (optionally) within groups (such as panel-identifiers). A variable is variant if it has at least 2 distinct non-missing data points. % \item \code{\link{fFtest}} is a fast implementation of the R-Squared based F-test, to test \bold{exclusion restrictions} in linear models potentially involving multiple large factors (fixed effects). It internally utilizes \code{\link{fhdwithin}} to project out factors while counting the degrees of freedom. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr \code{\link{qsu}} \tab\tab \code{default, matrix, data.frame, grouped_df, pseries, pdata.frame, sf} \tab\tab Fast (grouped, weighted, panel-decomposed) summary statistics \cr \code{\link{qtab}} \tab\tab No methods, for data frames or vectors \tab\tab Fast (weighted) cross tabulation \cr \code{\link{descr}} \tab\tab \code{default, grouped_df} (default method handles most objects) \tab\tab Detailed statistical description of data frame \cr \code{\link{pwcor}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise (weighted) correlations \cr \code{\link{pwcov}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise (weighted) covariances \cr \code{\link{pwnobs}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise observation counts \cr \code{\link{varying}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Fast variation check % \code{\link{fFtest}} \tab\tab No methods, its a standalone test to which data needs to be supplied. \tab\tab Fast F-test of exclusion restrictions in linear models (with factors variables) \cr } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=fast-statistical-functions]{Fast Statistical Functions} } \keyword{manip} \keyword{documentation} collapse/man/efficient-programming.Rd0000644000176200001440000004105515000542453017375 0ustar liggesusers\name{efficient-programming} \alias{AA2-efficient-programming} \alias{efficient-programming} \alias{anyv} \alias{allv} \alias{allNA} \alias{whichv} \alias{whichNA} \alias{alloc} \alias{copyv} \alias{setv} \alias{setop} \alias{\%==\%} \alias{\%!=\%} \alias{\%+=\%} \alias{\%-=\%} \alias{\%*=\%} \alias{\%/=\%} \alias{cinv} \alias{vec} \alias{vlengths} \alias{vtypes} \alias{vgcd} \alias{fnlevels} \alias{fnrow} \alias{fncol} \alias{fdim} \alias{missing_cases} \alias{na_rm} \alias{na_locf} \alias{na_focb} \alias{na_omit} \alias{na_insert} \alias{seq_row} \alias{seq_col} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Small Functions to Make R Programming More Efficient } \description{ A small set of functions to address some common inefficiencies in R, such as the creation of logical vectors to compare quantities, unnecessary copies of objects in elementary mathematical or sub-assignment operations, obtaining information about objects (esp. data frames), or dealing with missing values. % It makes sense to use them when dealing with > 10,000 obs. on modern computers. } \usage{ anyv(x, value) # Faster than any(x == value). See also kit::panyv() allv(x, value) # Faster than all(x == value). See also kit::pallv() allNA(x) # Faster than all(is.na(x)). See also kit::pallNA() whichv(x, value, # Faster than which(x == value) invert = FALSE) # or which(x != value). See also Note (3) whichNA(x, invert = FALSE) # Faster than which((!)is.na(x)) x \%==\% value # Infix for whichv(v, value, FALSE), use e.g. in fsubset() x \%!=\% value # Infix for whichv(v, value, TRUE). See also Note (3) alloc(value, n, # Fast rep_len(value, n) or replicate(n, value). simplify = TRUE) # simplify only works if length(value) == 1. See Details. copyv(X, v, R, \dots, invert # Fast replace(X, v, R), replace(X, X (!/=)= v, R) or = FALSE, vind1 = FALSE, # replace(X, (!)v, R[(!)v]). See Details and Note (4). xlist = FALSE) # For multi-replacement see also kit::vswitch() setv(X, v, R, \dots, invert # Same for X[v] <- r, X[x (!/=)= v] <- r or = FALSE, vind1 = FALSE, # x[(!)v] <- r[(!)v]. Modifies X by reference, fastest. xlist = FALSE) # X/R/V can also be lists/DFs. See Details and Examples. setop(X, op, V, \dots, # Faster than X <- X +\-\*\/ V (modifies by reference) rowwise = FALSE) # optionally can also add v to rows of a matrix or list X \%+=\% V # Infix for setop(X, "+", V). See also Note (2) X \%-=\% V # Infix for setop(X, "-", V). See also Note (2) X \%*=\% V # Infix for setop(X, "*", V). See also Note (2) X \%/=\% V # Infix for setop(X, "/", V). See also Note (2) na_rm(x) # Fast: if(anyNA(x)) x[!is.na(x)] else x, last na_locf(x, set = FALSE) # obs. carried forward and first obs. carried back. na_focb(x, set = FALSE) # (by reference). These also support lists (NULL/empty) na_omit(X, cols = NULL, # Faster na.omit for matrices and data frames, na.attr = FALSE, # can use selected columns to check, attach indices, prop = 0, ...) # and remove cases with a proportion of values missing na_insert(X, prop = 0.1, # Insert missing values at random (by reference) value = NA, set = FALSE) missing_cases(X, cols=NULL, # The opposite of complete.cases(), faster for DF's. prop = 0, count = FALSE) # See also kit::panyNA(), kit::pallNA(), kit::pcountNA() vlengths(X, use.names=TRUE) # Faster lengths() and nchar() (in C, no method dispatch) vtypes(X, use.names = TRUE) # Get data storage types (faster vapply(X, typeof, ...)) vgcd(x) # Greatest common divisor of positive integers or doubles fnlevels(x) # Faster version of nlevels(x) (for factors) fnrow(X) # Faster nrow for data frames (not faster for matrices) fncol(X) # Faster ncol for data frames (not faster for matrices) fdim(X) # Faster dim for data frames (not faster for matrices) seq_row(X) # Fast integer sequences along rows of X seq_col(X) # Fast integer sequences along columns of X vec(X) # Vectorization (stacking) of matrix or data frame/list cinv(x) # Choleski (fast) inverse of symmetric PD matrix, e.g. X'X } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X, V, R}{a vector, matrix or data frame.} \item{x, v}{a (atomic) vector or matrix (\code{na_rm}/\code{locf}/\code{focb} also support lists).} \item{value}{a single value of any (atomic) vector type. For \code{whichv} it can also be a \code{length(x)} vector.} \item{invert}{logical. \code{TRUE} considers elements \code{x != value}.} \item{set}{logical. \code{TRUE} transforms \code{x} by reference.} \item{simplify}{logical. If \code{value} is a length-1 vector, \code{alloc()} with \code{simplify = TRUE} returns a length-n vector of the same type. If \code{simplify = FALSE}, the result is always a list.} \item{vind1}{logical. If \code{length(v) == 1L}, setting \code{vind1 = TRUE} will interpret \code{v} as an index, rather than a value to search and replace.} \item{xlist}{logical. If \code{X} is a list, the default is to treat it like a data frame and replace rows. Setting \code{xlist = TRUE} will treat \code{X} and its replacement \code{R} like 1-dimensional list vectors.} \item{op}{an integer or character string indicating the operation to perform. \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab \code{"+"} \tab\tab add \code{V} \cr 2 \tab\tab \code{"-"} \tab\tab subtract \code{V} \cr 3 \tab\tab \code{"*"} \tab\tab multiply by \code{V} \cr 4 \tab\tab \code{"/"} \tab\tab divide by \code{V} \cr } } \item{rowwise}{logical. \code{TRUE} performs the operation between \code{V} and each row of \code{X}.} \item{cols}{select columns to check for missing values using column names, indices, a logical vector or a function (e.g. \code{is.numeric}). The default is to check all columns, which could be inefficient.} \item{n}{integer. The length of the vector to allocate with \code{value}.} \item{na.attr}{logical. \code{TRUE} adds an attribute containing the removed cases. For compatibility reasons this is exactly the same format as \code{na.omit} i.e. the attribute is called "na.action" and of class "omit".} \item{prop}{double. For \code{na_insert}: the proportion of observations to be randomly replaced with \code{NA}. For \code{missing_cases} and \code{na_omit}: the proportion of values missing for the case to be considered missing (within \code{cols} if specified). For matrices this is implemented in R as \code{rowSums(is.na(X)) >= max(as.integer(prop * ncol(X)), 1L)}. The C code for data frames works equivalently, and skips list- and raw-columns (\code{ncol(X)} is adjusted downwards).} \item{count}{logical. \code{TRUE} returns the row-wise missing value count (within \code{cols}). This ignores \code{prop}.} \item{use.names}{logical. Preserve names if \code{X} is a list. } \item{\dots}{for \code{na_omit}: further arguments passed to \code{[} for vectors and matrices. With indexed data it is also possible to specify the \code{drop.index.levels} argument, see \link{indexing}. For \code{copyv}, \code{setv} and \code{setop}, the argument is unused, and serves as a placeholder for possible future arguments.} } \details{ \code{alloc} is a fusion of \code{\link{rep_len}} and \code{\link{replicate}} that is faster in both cases. If \code{value} is a length one vector and \code{simplify = TRUE}, the functionality is as \code{rep_len(value, n)} i.e. the output is a length \code{n} vector with \code{value}. Otherwise, it is equivalent to \code{replicate(n, value, simplify = FALSE)}, i.e., the output is a length-\code{n} list of the objects. For efficiency reasons the object is not copied (only the pointer to the object is replicated). \code{copyv} and \code{setv} are designed to optimize operations that require replacing data in objects in the broadest sense. The only difference between them is that \code{copyv} first deep-copies \code{X} before doing replacements whereas \code{setv} modifies \code{X} in place and returns the result invisibly. There are 3 ways these functions can be used: \enumerate{ \item To replace a single value, \code{setv(X, v, R)} is an efficient alternative to \code{X[X == v] <- R}, and \code{copyv(X, v, R)} is more efficient than \code{replace(X, X == v, R)}. This can be inverted using \code{setv(X, v, R, invert = TRUE)}, equivalent to \code{X[X != v] <- R}. \item To do standard replacement with integer or logical indices i.e. \code{X[v] <- R} is more efficient using \code{setv(X, v, R)}, and, if \code{v} is logical, \code{setv(X, v, R, invert = TRUE)} is efficient for \code{X[!v] <- R}. To distinguish this from use case (1) when \code{length(v) == 1}, the argument \code{vind1 = TRUE} can be set to ensure that \code{v} is always interpreted as an index. \item To copy values from objects of equal size i.e. \code{setv(X, v, R)} is faster than \code{X[v] <- R[v]}, and \code{setv(X, v, R, invert = TRUE)} is faster than \code{X[!v] <- R[!v]}. } Both \code{X} and \code{R} can be atomic or data frames / lists. If \code{X} is a list, the default behavior is to interpret it like a data frame, and apply \code{setv/copyv} to each element/column of \code{X}. If \code{R} is also a list, this is done using \code{\link{mapply}}. Thus \code{setv/copyv} can also be used to replace elements or rows in data frames, or copy rows from equally sized frames. Note that for replacing subsets in data frames \code{\link[data.table]{set}} from \code{data.table} provides a more convenient interface (and there is also \code{\link[data.table]{copy}} if you just want to deep-copy an object without any modifications to it). If \code{X} should not be interpreted like a data frame, setting \code{xlist = TRUE} will interpret it like a 1D list-vector analogous to atomic vectors, except that use case (1) is not permitted i.e. no value comparisons on list elements. %\code{copyv} and \code{setv} perform different tasks, depending on the input. If \code{v} is a scalar, the elements of \code{X} are compared to \code{v}, and the matching ones (or non-matching ones if \code{invert = TRUE}) are replaced with \code{R}, where \code{R} can be either a scalar or an object of the same dimensions as \code{X}. If \code{X} is a data frame, \code{R} can also be a column-vector matching \code{fnrow(X)}. The second option is if \code{v} is either a logical or integer vector of indices with \code{length(v) > 1L}, indicating the elements of a vector / matrix (or rows if \code{X} is a data frame) to replace with corresponding elements from \code{R}. Thus \code{R} has to be of equal dimensions as \code{X}, but could also be a column-vector if \code{X} is a data frame. Setting \code{vind1 = TRUE} ensures that \code{v} is always interpreted as an index, even if \code{length(v) == 1L}. % In this case \code{r} has to be a vector of the same length as \code{x}, and the corresponding elements in \code{v} are replaced with their counterparts in \code{r}. \code{copyv} does all that by first creating a copy of \code{x}, whereas \code{setv} modifies \code{x} directly and is thus more efficient. } \note{ \enumerate{ \item None of these functions (apart from \code{alloc}) currently support complex vectors. \item \code{setop} and the operators \code{\%+=\%}, \code{\%-=\%}, \code{\%*=\%} and \code{\%/=\%} also work with integer data, but do not perform any integer related checks. R's integers are bounded between +-2,147,483,647 and \code{NA_integer_} is stored as the value -2,147,483,648. Thus computations resulting in values exceeding +-2,147,483,647 will result in integer overflows, and \code{NA_integer_} should not occur on either side of a \code{setop} call. These are programmers functions and meant to provide the most efficient math possible to responsible users. \item It is possible to compare factors by the levels (e.g. \code{iris$Species \%==\% "setosa")}) or using integers (\code{iris$Species \%==\% 1L}). The latter is slightly more efficient. Nothing special is implemented for other objects apart from basic types, e.g. for dates (which are stored as doubles) you need to generate a date object i.e. \code{wlddev$date \%==\% as.Date("2019-01-01")}. Using \code{wlddev$date \%==\% "2019-01-01"} will give \code{integer(0)}. \item \code{setv/copyv} only allow positive integer indices being passed to \code{v}, and, for efficiency reasons, they only check the first and the last index. Thus if there are indices in the middle that fall outside of the data range it will terminate R. } } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link[=data-transformations]{Data Transformations}, \link[=small-helpers]{Small (Helper) Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ oldopts <- options(max.print = 70) ## Which value whichNA(wlddev$PCGDP) # Same as which(is.na(wlddev$PCGDP)) whichNA(wlddev$PCGDP, invert = TRUE) # Same as which(!is.na(wlddev$PCGDP)) whichv(wlddev$country, "Chad") # Same as which(wlddev$county == "Chad") wlddev$country \%==\% "Chad" # Same thing whichv(wlddev$country, "Chad", TRUE) # Same as which(wlddev$county != "Chad") wlddev$country \%!=\% "Chad" # Same thing lvec <- wlddev$country == "Chad" # If we already have a logical vector... whichv(lvec, FALSE) # is fastver than which(!lvec) rm(lvec) # Using the \%==\% operator can yield tangible performance gains fsubset(wlddev, iso3c \%==\% "DEU") # 3x faster than: fsubset(wlddev, iso3c == "DEU") # With multiple categories we can use \%iin\% fsubset(wlddev, iso3c \%iin\% c("DEU", "ITA", "FRA")) ## Math by reference: permissible types of operations x <- alloc(1.0, 1e5) # Vector x \%+=\% 1 x \%+=\% 1:1e5 xm <- matrix(alloc(1.0, 1e5), ncol = 100) # Matrix xm \%+=\% 1 xm \%+=\% 1:1e3 setop(xm, "+", 1:100, rowwise = TRUE) xm \%+=\% xm xm \%+=\% 1:1e5 xd <- qDF(replicate(100, alloc(1.0, 1e3), simplify = FALSE)) # Data Frame xd \%+=\% 1 xd \%+=\% 1:1e3 setop(xd, "+", 1:100, rowwise = TRUE) xd \%+=\% xd rm(x, xm, xd) ## setv() and copyv() x <- rnorm(100) y <- sample.int(10, 100, replace = TRUE) setv(y, 5, 0) # Faster than y[y == 5] <- 0 setv(y, 4, x) # Faster than y[y == 4] <- x[y == 4] setv(y, 20:30, y[40:50]) # Faster than y[20:30] <- y[40:50] setv(y, 20:30, x) # Faster than y[20:30] <- x[20:30] rm(x, y) # Working with data frames, here returning copies of the frame copyv(mtcars, 20:30, ss(mtcars, 10:20)) copyv(mtcars, 20:30, fscale(mtcars)) ftransform(mtcars, new = copyv(cyl, 4, vs)) # Column-wise: copyv(mtcars, 2:3, fscale(mtcars), xlist = TRUE) copyv(mtcars, 2:3, mtcars[4:5], xlist = TRUE) ## Missing values mtc_na <- na_insert(mtcars, 0.15) # Set 15\% of values missing at random fnobs(mtc_na) # See observation count missing_cases(mtc_na) # Fast equivalent to !complete.cases(mtc_na) missing_cases(mtc_na, cols = 3:4) # Missing cases on certain columns? missing_cases(mtc_na, count = TRUE) # Missing case count missing_cases(mtc_na, prop = 0.8) # Cases with 80\% or more missing missing_cases(mtc_na, cols = 3:4, prop = 1) # Cases mssing columns 3 and 4 missing_cases(mtc_na, cols = 3:4, count = TRUE) # Missing case count on columns 3 and 4 na_omit(mtc_na) # 12x faster than na.omit(mtc_na) na_omit(mtc_na, prop = 0.8) # Only remove cases missing 80\% or more na_omit(mtc_na, na.attr = TRUE) # Adds attribute with removed cases, like na.omit na_omit(mtc_na, cols = .c(vs, am)) # Removes only cases missing vs or am na_omit(qM(mtc_na)) # Also works for matrices na_omit(mtc_na$vs, na.attr = TRUE) # Also works with vectors na_rm(mtc_na$vs) # For vectors na_rm is faster ... rm(mtc_na) ## Efficient vectorization head(vec(EuStockMarkets)) # Atomic objects: no copy at all head(vec(mtcars)) # Lists: directly in C options(oldopts) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. \keyword{utilities} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{manip} \keyword{math} \keyword{documentation} collapse/man/fmin_fmax.Rd0000644000176200001440000001266114777170130015077 0ustar liggesusers\name{fmin-fmax} \alias{fmax} \alias{fmax.default} \alias{fmax.matrix} \alias{fmax.data.frame} \alias{fmax.grouped_df} \alias{fmin} \alias{fmin.default} \alias{fmin.matrix} \alias{fmin.data.frame} \alias{fmin.grouped_df} \title{Fast (Grouped) Maxima and Minima for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmax} and \code{fmin} are generic functions that compute the (column-wise) maximum and minimum value of all values in \code{x}, (optionally) grouped by \code{g}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) maximum or minimum value. } \usage{ fmax(x, \dots) fmin(x, \dots) \method{fmax}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, \dots) \method{fmin}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, \dots) \method{fmax}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{fmin}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{fmax}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{fmin}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{fmax}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, \dots) \method{fmin}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ Missing-value removal as controlled by the \code{na.rm} argument is done at no extra cost since in C++ any logical comparison involving \code{NA} or \code{NaN} evaluates to \code{FALSE}. Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{max}} and \code{\link{min}} which just run through without any checks). %This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. %When applied to data frames with groups or \code{drop = FALSE}, \code{fmax} and \code{fmin} preserve all column attributes (such as variable labels) but do not distinguish between classed and unclassed objects. The attributes of the data frame itself are also preserved. For further computational details see \code{\link{fsum}}. } \value{ \code{fmax} returns the maximum value of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped) maximum value. Analogous, \code{fmin} returns the minimum value \dots } \seealso{ \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fmax(mpg) # Maximum value fmin(mpg) # Minimum value (all examples below use fmax but apply to fmin) fmax(mpg, TRA = "\%") # Simple transformation: Take percentage of maximum value fmax(mpg, mtcars$cyl) # Grouped maximum value fmax(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fmax(mpg, g) fmax(mpg, g, TRA = "\%") # Groupwise percentage of maximum value fmax(mpg, g, TRA = "replace") # Groupwise replace by maximum value ## data.frame method fmax(mtcars) head(fmax(mtcars, TRA = "\%")) fmax(mtcars, g) fmax(mtcars, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fmax(m) head(fmax(m, TRA = "\%")) fmax(m, g) # etc.. ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fmax() mtcars |> fgroup_by(cyl,vs,am) |> fmax("\%") mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg) |> fmax() } \keyword{univar} \keyword{manip} collapse/man/fdist.Rd0000644000176200001440000000630514777170130014242 0ustar liggesusers\name{fdist} \alias{fdist} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast and Flexible Distance Computations } \description{ A fast and flexible replacement for \code{\link{dist}}, to compute euclidean distances. } \usage{ fdist(x, v = NULL, ..., method = "euclidean", nthreads = .op[["nthreads"]]) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector or matrix. Data frames/lists can be passed but will be converted to matrix using \code{\link{qM}}. Non-numeric (double) inputs will be coerced. } \item{v}{an (optional) numeric (double) vector such that \code{length(v) == NCOL(x)}, to compute distances with (the rows of) \code{x}. Other vector types will be coerced.} \item{\dots}{not used. A placeholder for possible future arguments.} \item{method}{an integer or character string indicating the method of computing distances. \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab \code{"euclidean"} \tab\tab euclidean distance \cr 2 \tab\tab \code{"euclidean_squared"} \tab\tab squared euclidean distance (more efficient) \cr } %\emph{Note:} The mahalanobis distance can be computed using: \code{x_mahal = t(forwardsolve(t(chol(cov(x))), t(x)))}. See Examples. } \item{nthreads}{integer. The number of threads to use. If \code{v = NULL} (full distance matrix), multithreading is along the distance matrix columns (decreasing thread loads as matrix is lower triangular). If \code{v} is supplied, multithreading is at the sub-column level (across elements).} } \value{ If \code{v = NULL}, a full lower-triangular distance matrix between the rows of \code{x} is computed and returned as a 'dist' object (all methods apply, see \code{\link{dist}}). Otherwise, a numeric vector of distances of each row of \code{x} with \code{v} is returned. See Examples. } \note{ \code{fdist} does not check for missing values, so \code{NA}'s will result in \code{NA} distances. \code{kit::topn} is a suitable complimentary function to find nearest neighbors. It is very efficient and skips missing values by default. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{flm}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Distance matrix m = as.matrix(mtcars) str(fdist(m)) # Same as dist(m) # Distance with vector d = fdist(m, fmean(m)) kit::topn(d, 5) # Index of 5 nearest neighbours # Mahalanobis distance m_mahal = t(forwardsolve(t(chol(cov(m))), t(m))) fdist(m_mahal, fmean(m_mahal)) sqrt(unattrib(mahalanobis(m, fmean(m), cov(m)))) \donttest{ # Distance of two vectors x <- rnorm(1e6) y <- rnorm(1e6) microbenchmark::microbenchmark( fdist(x, y), fdist(x, y, nthreads = 2), sqrt(sum((x-y)^2)) ) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{multivariate} \keyword{nonparametric} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fFtest.Rd0000644000176200001440000001420614777170130014363 0ustar liggesusers\name{fFtest} \alias{fFtest} \alias{fFtest.default} \alias{fFtest.formula} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Weighted) F-test for Linear Models (with Factors) } \description{ \code{fFtest} computes an R-squared based F-test for the exclusion of the variables in \code{exc}, where the full (unrestricted) model is defined by variables supplied to both \code{exc} and \code{X}. The test is efficient and designed for cases where both \code{exc} and \code{X} may contain multiple factors and continuous variables. There is also an efficient 2-part formula method. } \usage{ fFtest(...) # Internal method dispatch: formula if is.call(..1) || is.call(..2) \method{fFtest}{default}(y, exc, X = NULL, w = NULL, full.df = TRUE, \dots) \method{fFtest}{formula}(formula, data = NULL, weights = NULL, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{y}{a numeric vector: the dependent variable.} \item{exc}{a numeric vector, factor, numeric matrix or list / data frame of numeric vectors and/or factors: variables to test / exclude.} \item{X}{a numeric vector, factor, numeric matrix or list / data frame of numeric vectors and/or factors: covariates to include in both the restricted (without \code{exc}) and unrestricted model. If left empty (\code{X = NULL}), the test amounts to the F-test of the regression of \code{y} on \code{exc}.} \item{w}{numeric. A vector of (frequency) weights.} \item{formula}{a 2-part formula: \code{y ~ exc | X}, where both \code{exc} and \code{X} are expressions connected with \code{+}, and \code{X} can be omitted. \emph{Note} that other operators (\code{:}, \code{*}, \code{^}, \code{-}, etc.) are not supported, you can interact variables using standard functions like \code{\link[=itn]{finteraction/itn}} or \code{magrittr::multiply_by} inside the formula e.g. \code{log(y) ~ x1 + itn(x2, x3) | x4} or \code{log(y) ~ x1 + multiply_by(x2, x3) | x4}.} \item{data}{a named list or data frame.} \item{weights}{a weights vector or expression that results in a vector when evaluated in the \code{data} environment.} \item{full.df}{logical. If \code{TRUE} (default), the degrees of freedom are calculated as if both restricted and unrestricted models were estimated using \code{lm()} (i.e. as if factors were expanded to matrices of dummies). \code{FALSE} only uses one degree of freedom per factor. } \item{\dots}{other arguments passed to \code{fFtest.default} or to \code{fhdwithin}. Sensible options might be the \code{lm.method} argument or further control parameters to \code{fixest::demean}, the workhorse function underlying \code{fhdwithin} for higher-order centering tasks. } } \details{ Factors and continuous regressors are efficiently projected out using \code{\link{fhdwithin}}, and the option \code{full.df} regulates whether a degree of freedom is subtracted for each used factor level (equivalent to dummy-variable estimator / expanding factors), or only one degree of freedom per factor (treating factors as variables). The test automatically removes missing values and considers only the complete cases of \code{y, exc} and \code{X}. Unused factor levels in \code{exc} and \code{X} are dropped. \emph{Note} that an intercept is always added by \code{\link{fhdwithin}}, so it is not necessary to include an intercept in data supplied to \code{exc} / \code{X}. } \value{ A 5 x 3 numeric matrix of statistics. The columns contain statistics: \enumerate{ \item the R-squared of the model \item the numerator degrees of freedom i.e. the number of variables (k) and used factor levels if \code{full.df = TRUE} \item the denominator degrees of freedom: N - k - 1. \item the F-statistic \item the corresponding P-value } The rows show these statistics for: \enumerate{ \item the Full (unrestricted) Model (\code{y ~ exc + X}) \item the Restricted Model (\code{y ~ X}) \item the Exclusion Restriction of \code{exc}. The R-squared shown is simply the difference of the full and restricted R-Squared's, not the R-Squared of the model \code{y ~ exc}. } If \code{X = NULL}, only a vector of the same 5 statistics testing the model (\code{y ~ exc}) is shown. %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% \dots } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{flm}}, \code{\link{fhdwithin}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## We could use fFtest as a simple seasonality test: fFtest(AirPassengers, qF(cycle(AirPassengers))) # Testing for level-seasonality fFtest(AirPassengers, qF(cycle(AirPassengers)), # Seasonality test around a cubic trend poly(seq_along(AirPassengers), 3)) fFtest(fdiff(AirPassengers), qF(cycle(AirPassengers))) # Seasonality in first-difference ## A more classical example with only continuous variables fFtest(mpg ~ cyl + vs | hp + carb, mtcars) fFtest(mtcars$mpg, mtcars[c("cyl","vs")], mtcars[c("hp","carb")]) \donttest{ % requires fixest package ## Now encoding cyl and vs as factors fFtest(mpg ~ qF(cyl) + qF(vs) | hp + carb, mtcars) fFtest(mtcars$mpg, lapply(mtcars[c("cyl","vs")], qF), mtcars[c("hp","carb")]) } ## Using iris data: A factor and a continuous variable excluded fFtest(Sepal.Length ~ Petal.Width + Species | Sepal.Width + Petal.Length, iris) fFtest(iris$Sepal.Length, iris[4:5], iris[2:3]) ## Testing the significance of country-FE in regression of GDP on life expectancy fFtest(log(PCGDP) ~ iso3c | LIFEEX, wlddev) fFtest(log(wlddev$PCGDP), wlddev$iso3c, wlddev$LIFEEX) \donttest{ % requires fixest package ## Ok, country-FE are significant, what about adding time-FE fFtest(log(PCGDP) ~ qF(year) | iso3c + LIFEEX, wlddev) fFtest(log(wlddev$PCGDP), qF(wlddev$year), wlddev[c("iso3c","LIFEEX")]) } # Same test done using lm: data <- na_omit(get_vars(wlddev, c("iso3c","year","PCGDP","LIFEEX"))) full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), data) rest <- lm(PCGDP ~ LIFEEX + iso3c, data) anova(rest, full) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{htest} % use one of RShowDoc("KEYWORDS") collapse/man/indexing.Rd0000644000176200001440000005160415005240245014726 0ustar liggesusers\name{indexing} \alias{indexing} \alias{findex_by} \alias{iby} \alias{findex} \alias{ix} \alias{unindex} \alias{reindex} \alias{is_irregular} \alias{to_plm} \alias{[.indexed_series} \alias{[.indexed_frame} \alias{$.indexed_frame} \alias{[[.indexed_frame} \alias{[.index_df} \alias{print.index_df} \alias{[<-.indexed_frame} \alias{$<-.indexed_frame} \alias{[[<-.indexed_frame} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Indexed Time Series and Panels } \description{ A fast and flexible indexed time series and panel data class that inherits from \emph{plm}'s 'pseries' and 'pdata.frame', but is more rigorous, natively handles irregularity, can be superimposed on any data.frame/list, matrix or vector, and supports ad-hoc computations inside data masking functions and model formulas. } \usage{ ## Create an 'indexed_frame' containing 'indexed_series' findex_by(.X, \dots, single = "auto", interact.ids = TRUE) iby(.X, \dots, single = "auto", interact.ids = TRUE) # Shorthand ## Retrieve the index ('index_df') from an 'indexed_frame' or 'indexed_series' findex(x) ix(x) # Shorthand ## Remove index from 'indexed_frame' or 'indexed_series' (i.e. get .X back) unindex(x) ## Reindex 'indexed_frame' or 'indexed_series' (or index vectors / matrices) reindex(x, index = findex(x), single = "auto") ## Check if 'indexed_frame', 'indexed_series', index or time vector is irregular is_irregular(x, any_id = TRUE) ## Convert 'indexed_frame'/'indexed_series' to normal 'pdata.frame'/'pseries' to_plm(x, row.names = FALSE) # Subsetting & replacement methods: [(<-) methods call NextMethod(). # Also methods for fsubset, funique and roworder(v), na_omit (internal). \method{[}{indexed_series}(x, i, \dots, drop.index.levels = "id") \method{[}{indexed_frame}(x, i, \dots, drop.index.levels = "id") \method{[}{indexed_frame}(x, i, j) <- value \method{$}{indexed_frame}(x, name) \method{$}{indexed_frame}(x, name) <- value \method{[[}{indexed_frame}(x, i, \dots) \method{[[}{indexed_frame}(x, i) <- value # Index subsetting and printing: optimized using ss() \method{[}{index_df}(x, i, j, drop = FALSE, drop.index.levels = "id") \method{print}{index_df}(x, topn = 5, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.X}{a data frame or list-like object of equal-length columns.} \item{x}{an 'indexed_frame' or 'indexed_series'. \code{findex} also works with 'pseries' and 'pdata.frame's created with \emph{plm}. For \code{is_irregular} \code{x} can also be an index (inherits 'pindex') or a vector representing time. } \item{\dots}{for \code{findex_by}: variables identifying the individual (id) and/or time dimensions of the data. Passed either as unquoted comma-separated column names or (tagged) expressions involving columns, or as a vector of column names, indices, a logical vector or a selector function. The time variable must enter last. See Examples. Otherwise: further arguments passed to \code{\link[=NextMethod]{NextMethod()}}.} \item{single}{character. If only one indexing variable is supplied, this can be declared as \code{"id"} or \code{"time"} variable. \code{"auto"} chooses \code{"id"} if the variable has \code{\link{anyDuplicated}} values. } \item{interact.ids}{logical. If \code{n > 2} indexing variables are passed, \code{TRUE} calls \code{\link{finteraction}} on the first \code{n-1} of them (\code{n}'th variable must be time). \code{FALSE} keeps all variables in the index. The latter slows down computations of lags / differences etc. because ad-hoc interactions need to be computed, but gives more flexibility for scaling / centering / summarising over different data dimensions. } \item{index}{and index (inherits 'pindex'), or an atomic vector or list of factors matching the data dimensions. Atomic vectors or lists with 1 factor will must be declared, see \code{single}. Atomic vectors will additionally be grouped / turned into time-factors. See Details. } \item{drop.index.levels}{character. Subset methods also subset the index (= a data.frame of factors), and this argument regulates which factor levels should be dropped: either \code{"all"}, \code{"id"}, \code{"time"} or \code{"none"}. The default \code{"id"} only drops levels from id's. \code{"all"} or \code{"time"} should be used with caution because time-factors may contain levels for missing time periods (gaps in irregular sequences, or periods within a sequence removed through subsetting), and dropping those levels would create a variable that is ordinal but no longer represents time. The benefit of dropping levels is that it can speed-up subsequent computations by reducing the size of intermediate vectors created in C++. } \item{any_id}{logical. For panel series: \code{FALSE} returns the irregularity check performed for each id, \code{TRUE} calls \code{\link{any}} on those checks.} \item{row.names}{logical. \code{TRUE} creates descriptive row-names (or names for pseries) as in \code{plm}. This can be expensive and is usually not required for \code{plm} models to work.} \item{topn}{integer. The number of first and last rows to print.} \item{i, j, name, drop, value}{Arguments passed to \code{\link{NextMethod}}, or as in the \link[=[.data.frame]{data.frame methods}. Note that for index subsetting to work, \code{i} needs to be integer or logical (or an expression evaluation to integer or logical if \code{x} is a \emph{data.table}).} } \details{ The 'indexed_frame', 'indexed_series' and 'index_df' classes inherit \emph{plm}'s 'pdata.frame', 'pseries' and 'pindex' classes, respectively. They add, improve, and, in some cases, remove functionality offered by \emph{plm}, with the aim of striking an optimal balance of flexibility and performance. The inheritance means that all 'pseries' and 'pdata.frame' methods in \emph{collapse}, and also some methods in \emph{plm}, apply to them. % Where compatibility or performance considerations allow for it, \emph{collapse} will continue to create methods for \emph{plm}'s classes instead of the new classes. The use of these classes does not require \emph{plm}, but as a basic background: A 'pdata.frame' is a data.frame with an index attribute: a data.frame of 2 factors identifying the individual and time-dimension of the data. When pulling a variable out of the pdata.frame using a method like \code{$.pdata.frame} or \code{[[.pdata.frame}, a 'pseries' is created by transferring the index attribute to the vector. Methods defined for functions like \code{\link{lag}} / \code{\link{flag}} etc. use the index for correct computations on this panel data, also inside \emph{plm}'s estimation commands. \bold{Main Features and Enhancements} The 'indexed_frame' and 'indexed_series' classes extend and enhance 'pdata.frame' and 'pseries' in a number of critical dimensions. Most notably they: \itemize{ \item Support \bold{both time series and panel data}, by allowing indexation of data with one, two or more variables. \item Are \bold{class-agnostic}: any data.frame/list (such as data.table, tibble, tsibble, sf etc.) can become an 'indexed_frame' and continue to function as usual for most use cases. Similarly, any vector or matrix (such as ts, mts, xts) can become an 'indexed_series'. This also allows for transient workflows e.g. \code{some_df |> findex_by(...) |> 'do something using collapse functions' |> unindex() |> 'continue working with some_df'}. \item Have a comprehensive and efficient set of \bold{methods for subsetting and manipulation}, including methods for \code{\link{fsubset}}, \code{\link{funique}}, \code{\link[=roworder]{roworder(v)}} (internal) and \code{\link{na_omit}} (internal, \code{\link{na.omit}} also works but is slower). It is also possible to group indexed data with \code{\link{fgroup_by}} for transformations e.g. using \code{\link{fmutate}}, but aggregation requires \code{unindex()}ing. \item \bold{Natively handle irregularity}: time objects (such as 'Date', 'POSIXct' etc.) are passed to \code{\link{timeid}}, which efficiently determines the temporal structure by finding the greatest common divisor (GCD), and creates a time-factor with levels corresponding to a complete time-sequence. Plain numeric vectors are assumed to represent unit time steps (GDC = 1) and coerced to integer (but can also be passed through \code{\link{timeid}} if non-unitary). Character time variables are converted to factor. Using this time-factor in the index, \emph{collapse}'s functions efficiently perform correct computations on irregular sequences and panels without the need to 'expand' the data / fill gaps. \code{is_irregular} can be used to check for irregularity in the entire sequence / panel or separately for each individual in panel data. \item Support computations inside \bold{data-masking functions and formulas}, by virtue of "\bold{deep indexation}": Each variable inside an 'indexed_frame' is an 'indexed_series' which contains in its 'index_df' attribute an external pointer to the 'index_df' attribute of the frame. Functions operating on 'indexed_series' stored inside the frame (such as \code{with(data, flag(column))}) can fetch the index from this pointer. This allows worry-free application inside arbitrary data masking environments (\code{with}, \code{\%$\%}, \code{attach}, etc..) and estimation commands (\code{glm}, \code{feols}, \code{lmrob} etc..) without duplication of the index in memory. A limitation is that external pointers are only valid during the present R session, thus when saving an 'indexed_frame' and loading it again, you need to call \code{data = reindex(data)} before computing on it. } Indexed series also have simple \link[base]{Math} and \link[base]{Ops} methods, which apply the operation to the unindexed series and shallow copy the attributes of the original object to the result, unless the result it is a logical vector (from operations like \code{!}, \code{==} etc.). For \link[base]{Ops} methods, if the LHS object is an 'indexed_series' its attributes are taken, otherwise the attributes of the RHS object are taken. \bold{Limits to plm Compatibility} In contrast to 'pseries' and 'pdata.frame's, 'indexed_series' and 'indexed_frames' do not have descriptive "names" or "row.names" attributes attached to them, mainly for efficiency reasons. Furthermore, the index is stored in an attribute named 'index_df' (same as the class name), not 'index' as in \emph{plm}, mainly to make these classes work with \emph{data.table}, \emph{tsibble} and \emph{xts}, which also utilize 'index' attributes. This for the most part poses no problem to plm compatibility because plm source code fetches the index using \code{attr(x, "index")}, and \code{\link{attr}} by default performs partial matching. %It however allows plm objects to be indexed again / doubly indexed with both 'index' and 'index_df' attributes, so care needs to be taken when working with \emph{plm}. A much greater obstacle in working with \emph{plm} is that some internal \emph{plm} code is hinged on there being no \code{[.pseries} method, and the existence of \code{[.indexed_series} limits the use of these classes in most \emph{plm} estimation commands. Therefore the \code{to_plm} function is provided to efficiently coerce the classes to ordinary plm objects before estimation. See Examples. Overall these classes don't really benefit \emph{plm}, especially given that collapse's plm methods also support native plm objects. % However, they work very well inside other models and software, including \emph{stats} models, \emph{fixest} / \emph{lfe}, and a whole bunch of time series and ML models. See Examples. \bold{Performance Considerations} When indexing long time-series or panels with a single variable, setting \code{single = "id" or "time"} avoids a potentially expensive call to \code{\link{anyDuplicated}}. Note also that when panel-data are regular and sorted, omitting the time variable in the index can bring >= 2x performance improvements in operations like lagging and differencing (alternatively use \code{shift = "row"} argument to \code{\link{flag}}, \code{\link{fdiff}} etc.) . When dealing with long Date or POSIXct time sequences, it may also be that the internal processing by \code{\link{timeid}} is slow simply because calling \code{\link{strftime}} on these sequences to create factor levels is slow. In this case you may choose to generate an index factor with integer levels by passing \code{timeid(t)} to \code{findex_by} or \code{reindex} (which by default generates a 'qG' object which is internally converted to factor using \code{as_factor_qG}. The lazy evaluation of expressions like \code{as.character(seq_len(nlev))} in modern R makes this extremely efficient). With multiple id variables e.g. \code{findex_by(data, id1, id2, id3, time)}, the default call to \code{finteraction()} can be expensive because of pasting the levels together. In this case, users may gain performance by invoking \code{group()}, e.g. \code{findex_by(data, ids = group(id1, id2, id3), time)}. This will generate a factor with integer levels instead. \bold{Print Method} The print methods for 'indexed_frame' and 'indexed_series' first call \code{print(unindex(x), ...)}, followed by the index variables with the number of categories (index factor levels) in square brackets. If the time factor contains unused levels (= irregularity in the sequence), the square brackets indicate the number of used levels (periods), followed by the total number of levels (periods in the sequence) in parentheses. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{timeid}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview}} \examples{ oldopts <- options(max.print = 70) # Indexing panel data ---------------------------------------------------------- wldi <- findex_by(wlddev, iso3c, year) wldi wldi[1:100,1] # Works like a data frame POP <- wldi$POP # indexed_series qsu(POP) # Summary statistics G(POP) # Population growth STD(G(POP, c(1, 10))) # Within-standardized 1 and 10-year growth rates psmat(POP) # Panel-Series Matrix plot(psmat(log10(POP))) POP[30:5000] # Subsetting indexed_series Dlog(POP[30:5000]) # Log-difference of subset psacf(identity(POP[30:5000])) # ACF of subset L(Dlog(POP[30:5000], c(1, 10)), -1:1) # Multiple computations on subset # Fast Statistical Functions don't have dedicated methods # Thus for aggregation we need to unindex beforehand ... fmean(unindex(POP)) wldi |> unindex() |> fgroup_by(iso3c) |> num_vars() |> fmean() library(magrittr) # ... or unindex after taking group identifiers from the index fmean(unindex(fgrowth(POP)), ix(POP)$iso3c) wldi |> num_vars() \%>\% fgroup_by(iso3c = ix(.)$iso3c) |> unindex() |> fmean() # With matrix methods it is easier as most attributes are dropped upon aggregation. G(POP, c(1, 10)) \%>\% fmean(ix(.)$iso3c) # Example of index with multiple ids GGDC10S |> findex_by(Variable, Country, Year) |> head() # default is interact.ids = TRUE GGDCi <- GGDC10S |> findex_by(Variable, Country, Year, interact.ids = FALSE) head(GGDCi) findex(GGDCi) # The benefit is increased flexibility for summary statistics and data transformation qsu(GGDCi, effect = "Country") STD(GGDCi$SUM, effect = "Variable") # Standardizing by variable STD(GGDCi$SUM, effect = c("Variable", "Year")) # ... by variable and year # But time-based operations are a bit more expensive because of the necessary interactions D(GGDCi$SUM) # Panel-Data modelling --------------------------------------------------------- # Linear model of 5-year annualized growth rates of GDP on Life Expactancy + 5y lag lm(G(PCGDP, 5, p = 1/5) ~ L(G(LIFEEX, 5, p = 1/5), c(0, 5)), wldi) # p abbreviates "power" # Same, adding time fixed effects via plm package: need to utilize to_plm function plm::plm(G(PCGDP, 5, p = 1/5) ~ L(G(LIFEEX, 5, p = 1/5), c(0, 5)), to_plm(wldi), effect = "time") # With country and time fixed effects via fixest fixest::feols(G(PCGDP, 5, p=1/5) ~ L(G(LIFEEX, 5, p=1/5), c(0, 5)), wldi, fixef = .c(iso3c, year)) \dontrun{ % Not suggested packages # Running a robust MM regression without fixed effects robustbase::lmrob(G(PCGDP, 5, p = 1/5) ~ L(G(LIFEEX, 5, p = 1/5), c(0, 5)), wldi) # Running a robust MM regression with country and time fixed effects wldi |> fselect(PCGDP, LIFEEX) |> fgrowth(5, power = 1/5) |> ftransform(LIFEEX_L5 = L(LIFEEX, 5)) |> # drop abbreviates drop.index.levels (not strictly needed here but more consistent) na_omit(drop = "all") |> fhdwithin(na.rm = FALSE) |> # For TFE use fwithin(effect = "year") unindex() |> robustbase::lmrob(formula = PCGDP ~.) # using lm() gives same result as fixest # Using a random forest model without fixed effects # ranger does not support these kinds of formulas, thus we need some preprocessing... wldi |> fselect(PCGDP, LIFEEX) |> fgrowth(5, power = 1/5) |> ftransform(LIFEEX_L5 = L(LIFEEX, 5)) |> unindex() |> na_omit() |> ranger::ranger(formula = PCGDP ~.) } # Indexing other data frame based classes -------------------------------------- library(tibble) wlditbl <- qTBL(wlddev) |> findex_by(iso3c, year) wlditbl[,2] # Works like a tibble... wlditbl[[2]] wlditbl[1:1000, 10] head(wlditbl) library(data.table) wldidt <- qDT(wlddev) |> findex_by(iso3c, year) wldidt[1:1000] # Works like a data.table... wldidt[year > 2000] wldidt[, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country] # Aggregation unindexes the result wldidt[, lapply(.SD, sum, na.rm = TRUE), by = country, .SDcols = .c(PCGDP, LIFEEX)] # This also works but is a bit inefficient since the index is subset and then dropped # -> better unindex beforehand wldidt[year > 2000, .(sum_PCGDP = sum(PCGDP, na.rm = TRUE)), by = country] wldidt[, PCGDP_gr_5Y := G(PCGDP, 5, power = 1/5)] # Can add Variables by reference # Note that .SD is a data.table of indexed_series, not an indexed_frame, so this is WRONG! wldidt[, .c(PCGDP_gr_5Y, LIFEEX_gr_5Y) := G(slt(.SD, PCGDP, LIFEEX), 5, power = 1/5)] # This gives the correct outcome wldidt[, .c(PCGDP_gr_5Y, LIFEEX_gr_5Y) := lapply(slt(.SD, PCGDP, LIFEEX), G, 5, power = 1/5)] %# wldidt[, .c(PCGDP_growth_5Y, LIFEEX_growth_5Y) := G(slt(reindex(.SD, ix(wldidt)), PCGDP, LIFEEX), 5, power = 1/5)] # Works !! \dontrun{ library(sf) nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) nci <- findex_by(nc, SID74) nci[1:10, "AREA"] st_centroid(nci) # The geometry column is never indexed, thus sf computations work normally st_coordinates(nci) fmean(st_area(nci)) library(tsibble) pedi <- findex_by(pedestrian, Sensor, Date_Time) pedi[1:5, ] findex(pedi) # Time factor with 17k levels from POSIXct # Now here is a case where integer levels in the index can really speed things up ix(iby(pedestrian, Sensor, timeid(Date_Time))) library(microbenchmark) microbenchmark(descriptive_levels = findex_by(pedestrian, Sensor, Date_Time), integer_levels = findex_by(pedestrian, Sensor, timeid(Date_Time))) # Data has irregularity is_irregular(pedi) is_irregular(pedi, any_id = FALSE) # irregularity in all sequences # Manipulation such as lagging with tsibble/dplyr requires expanding rows and grouping # Collapse can just compute correct lag on indexed series or frames library(dplyr) microbenchmark( dplyr = fill_gaps(pedestrian) |> group_by_key() |> mutate(Lag_Count = lag(Count)), collapse = fmutate(pedi, Lag_Count = flag(Count)), times = 10) } # Indexing Atomic objects --------------------------------------------------------- ## ts print(AirPassengers) AirPassengers[-(20:30)] # Ts class does not support irregularity, subsetting drops class G(AirPassengers[-(20:30)], 12) # Annual Growth Rate: Wrong! # Now indexing AirPassengers (identity() is a trick so that the index is named time(AirPassengers)) iAP <- reindex(AirPassengers, identity(time(AirPassengers))) iAP findex(iAP) # See the index iAP[-(20:30)] # Subsetting G(iAP[-(20:30)], 12) # Annual Growth Rate: Correct! L(G(iAP[-(20:30)], c(0,1,12)), 0:1) # Lagged level, period and annual growth rates... \donttest{ % No code relying on suggested package ## xts library(xts) library(zoo) # Needed for as.yearmon() and index() functions X <- wlddev |> fsubset(iso3c == "DEU", date, PCGDP:POP) \%>\% { xts(num_vars(.), order.by = as.yearmon(.$date)) } |> ss(-(30:40)) \%>\% reindex(identity(index(.))) # Introducing a gap # plot(G(unindex(X))) diff(unindex(X)) # diff.xts gixes wrong result fdiff(X) # fdiff gives right result # But xts range-based subsets do not work... \dontrun{ X["1980/"] } # Thus a better way is not to index and perform ad-hoc omputations on the xts index X <- unindex(X) X["1980/"] \%>\% fdiff(t = index(.)) # xts index is internally processed by timeid() } ## Of course you can also index plain vectors / matrices... options(oldopts) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/unlist2d.Rd0000644000176200001440000001724714777170130014704 0ustar liggesusers\name{unlist2d} \alias{unlist2d} \title{ Recursive Row-Binding / Unlisting in 2D - to Data Frame } \description{ \code{unlist2d} efficiently unlists lists of regular R objects (objects built up from atomic elements) and creates a data frame representation of the list through recursive flattening and intelligent row-binding operations. It is a full 2-dimensional generalization of \code{\link{unlist}}, and best understood as a recursive generalization of \code{do.call(rbind, ...)}. It is a powerful tool to create a tidy data frame representation from (nested) lists of vectors, data frames, matrices, arrays or heterogeneous objects. For simple row-wise combining lists/data.frame's use the non-recursive \code{\link{rowbind}} function. % (i.e. unlisting happens via recursive flattening and intelligent row-binding of objects, see Details and Examples). } \usage{ unlist2d(l, idcols = ".id", row.names = FALSE, recursive = TRUE, id.factor = FALSE, DT = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a unlistable list (with atomic elements in all final nodes, see \code{\link{is_unlistable}}).} \item{idcols}{a character stub or a vector of names for id-columns automatically added - one for each level of nesting in \code{l}. By default the stub is \code{".id"}, so columns will be of the form \code{".id.1", ".id.2",} etc... . if \code{idcols = TRUE}, the stub is also set to \code{".id"}. If \code{idcols = FALSE}, id-columns are omitted. The content of the id columns are the list names, or (if missing) integers for the list elements. Missing elements in asymmetric nested structures are filled up with \code{NA}. See Examples. } \item{row.names}{\code{TRUE} extracts row names from all the objects in \code{l} (where available) and adds them to the output in a column named \code{"row.names"}. Alternatively, a column name i.e. \code{row.names = "variable"} can be supplied. For plain matrices in \code{l}, integer row names are generated. } \item{recursive}{logical. if \code{FALSE}, only process the lowest (deepest) level of \code{l}. See Details.} \item{id.factor}{if \code{TRUE} and \code{!isFALSE(idcols)}, create id columns as factors instead of character or integer vectors. Alternatively it is possible to specify \code{id.factor = "ordered"} to generate ordered factor id's. This is \bold{strongly recommended} when binding lists of larger data frames, as factors are much more memory efficient than character vectors and also speed up subsequent grouping operations on these columns. } \item{DT}{logical. \code{TRUE} returns a \emph{data.table}, not a data.frame.} } \details{ The data frame representation created by \code{unlist2d} is built as follows: \itemize{ \item Recurse down to the lowest level of the list-tree, data frames are exempted and treated as a final (atomic) elements. \item Identify the objects, if they are vectors, matrices or arrays convert them to data frame (in the case of atomic vectors each element becomes a column). \item Row-bind these data frames using \emph{data.table}'s \code{rbindlist} function. Columns are matched by name. If the number of columns differ, fill empty spaces with \code{NA}'s. If \code{!isFALSE(idcols)}, create id-columns on the left, filled with the object names or indices (if the (sub-)list is unnamed). If \code{!isFALSE(row.names)}, store rownames of the objects (if available) in a separate column. \item Move up to the next higher level of the list-tree and repeat: Convert atomic objects to data frame and row-bind while matching all columns and filling unmatched ones with \code{NA}'s. Create another id-column for each level of nesting passed through. If the list-tree is asymmetric, fill empty spaces in lower-level id columns with \code{NA}'s. } The result of this iterative procedure is a single data frame containing on the left side id-columns for each level of nesting (from higher to lower level), followed by a column containing all the rownames of the objects (if \code{!isFALSE(row.names)}), followed by the data columns, matched at each level of recursion. Optimal results are obtained with symmetric lists of arrays, matrices or data frames, which \code{unlist2d} efficiently binds into a beautiful data frame ready for plotting or further analysis. See examples below. } \value{ A data frame or (if \code{DT = TRUE}) a \emph{data.table}. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ For lists of data frames \code{unlist2d} works just like \code{data.table::rbindlist(l, use.names = TRUE, fill = TRUE, idcol = ".id")} however for lists of lists \code{unlist2d} does not produce the same output as \code{data.table::rbindlist} because \code{unlist2d} is a recursive function. You can use \code{\link{rowbind}} as a faithful alternative to \code{data.table::rbindlist}. The function \code{rrapply::rrapply(l, how = "melt"|"bind")} is a fast alternative (written fully in C) for nested lists of atomic elements. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{rowbind}}, \code{\link{rsplit}}, \code{\link{rapply2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Basic Examples: l <- list(mtcars, list(mtcars, mtcars)) tail(unlist2d(l)) unlist2d(rapply2d(l, fmean)) l = list(a = qM(mtcars[1:8]), b = list(c = mtcars[4:11], d = list(e = mtcars[2:10], f = mtcars))) tail(unlist2d(l, row.names = TRUE)) unlist2d(rapply2d(l, fmean)) unlist2d(rapply2d(l, fmean), recursive = FALSE) ## Groningen Growth and Development Center 10-Sector Database head(GGDC10S) # See ?GGDC10S namlab(GGDC10S, class = TRUE) # Panel-Summarize this data by Variable (Emloyment and Value Added) l <- qsu(GGDC10S, by = ~ Variable, # Output as list (instead of 4D array) pid = ~ Variable + Country, cols = 6:16, array = FALSE) str(l, give.attr = FALSE) # A list of 2-levels with matrices of statistics head(unlist2d(l)) # Default output, missing the variables (row-names) head(unlist2d(l, row.names = TRUE)) # Here we go, but this is still not very nice head(unlist2d(l, idcols = c("Sector","Trans"), # Now this is looking pretty good row.names = "Variable")) dat <- unlist2d(l, c("Sector","Trans"), # Id-columns can also be generated as factors "Variable", id.factor = TRUE) str(dat) # Split this sectoral data, first by Variable (Emloyment and Value Added), then by Country sdat <- rsplit(GGDC10S, ~ Variable + Country, cols = 6:16) # Compute pairwise correlations between sectors and recombine: dat <- unlist2d(rapply2d(sdat, pwcor), idcols = c("Variable","Country"), row.names = "Sector") head(dat) plot(hclust(as.dist(1-pwcor(dat[-(1:3)])))) # Using corrs. as distance metric to cluster sectors # List of panel-series matrices psml <- psmat(fsubset(GGDC10S, Variable == "VA"), ~Country, ~Year, cols = 6:16, array = FALSE) # Recombining with unlist2d() (effectively like reshapig the data) head(unlist2d(psml, idcols = "Sector", row.names = "Country")) rm(l, dat, sdat, psml) } % # We can also examine the correlations of Growth rates of VA in each sector across countries % dat <- G(subset(GGDC10S, Variable == "VA"),1,1, ~ Country, ~Year, cols = 6:16) % dat <- psmat(dat, ~ Country, ~Year) % plot(dat, legend = TRUE) % dat[dat > 100] = NA # remove outliers % plot(dat, legend = TRUE) % sort(apply(dat, 3, function(x) fmean.default(pwcor(x)))) % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{list} collapse/man/collapse-package.Rd0000644000176200001440000007056715202504365016332 0ustar liggesusers\name{collapse-package} \alias{collapse-package} \alias{collapse} \docType{package} \title{ % \code{collapse:} Advanced and Fast Data Transformation } \description{ \emph{collapse} is a C/C++ based package for data transformation and statistical computing in R. Its aims are: \itemize{ \item To facilitate complex data transformation, exploration and computing tasks in R. \item To help make R code fast, flexible, parsimonious and programmer friendly. % \emph{collapse} is a fast %to facilitate (advanced) data manipulation in R % To achieve the latter, % collapse provides a broad set.. -> Nah, its not a misc package } It also implements a \href{https://fastverse.org/collapse/articles/collapse_object_handling.html}{class-agnostic approach} to data manipulation in R, supporting all major classes. } \section{Getting Started}{ Read the short \href{https://fastverse.org/collapse/articles/collapse_documentation.html}{vignette} on documentation resources, and check out the built-in \link[=collapse-documentation]{documentation}. % A careful consideration of the \href{https://raw.githubusercontent.com/SebKrantz/cheatsheets/master/collapse.pdf}{cheat sheet} is recommended for quick starters. % or read the \href{https://fastverse.org/collapse/articles/collapse_intro.html}{introductory vignette}. % All vignettes can be accessed on the \href{https://fastverse.org/collapse/}{package website}. A cheatsheet is available \href{https://raw.githubusercontent.com/SebKrantz/cheatsheets/master/collapse.pdf}{here}. A compact introduction for quick-starters is provided in the examples section below. } % \section{Key Features} { % \cr % \bold{Key Features:} % (In more detail in \link[=collapse-documentation]{Collapse Overview}) % Key functionality: % Key areas Key topics addressed by \emph{collapse} are: % where \emph{collapse} offers innovative solutions are: % \tabular{lll}{ % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \tab \emph{ Description } \cr % \enumerate{ % (1) \tab\tab \dots \cr % \item \emph{Advanced data programming}: A full set of fast statistical functions % supporting grouped and weighted computations on vectors, matrices and % data frames. Fast (ordered) and programmable grouping, factor % generation, manipulation of data frames and data object conversions. % select, subset, transform, replace, add and delete data frame columns. % \item \emph{Advanced aggregation}: Fast and easy multi-data-type, multi-function, % weighted, parallelized and fully customized data aggregation. % \item \emph{Advanced transformations}: Fast (grouped, weighted) replacing and % sweeping out of statistics, scaling / standardizing, centering (i.e. % between and within transformations), higher-dimensional centering % (i.e. multiple fixed effects transformations), linear % prediction and partialling-out. % \item \emph{Advanced time-computations}: Fast (sequences of) lags / leads, and % (lagged / leaded, iterated, quasi-, log-) differences and growth % rates on (unordered) time series and panel data. Multivariate auto, % partial and cross-correlation functions for panel data. % Panel data to (ts-)array conversions. % \item \emph{List processing}: (Recursive) list search / identification, extraction / % subsetting, data-apply, and generalized row-binding / unlisting in 2D. % \item \emph{Advanced data exploration}: Fast (grouped, weighted, panel-decomposed) % summary statistics for complex multilevel / panel data. % } % } % } \section{Details}{ % Put this below bullet points ?? % \emph{collapse} provides a carefully conceived % \emph{collapse} provides a compact set of functions % organized roughly into several topics \emph{collapse} provides an integrated suite of statistical and data manipulation functions that greatly extend and enhance the capabilities of base R. In a nutshell, \emph{collapse} provides: \itemize{ \item Fast C/C++ based (grouped, weighted) computations embedded in highly optimized R code. \item More complex statistical, time series / panel data and recursive (list-processing) operations. \item A flexible and generic approach supporting and preserving many R objects. % standard % (S3 generic statistical functions, class/attribute preservation). % , compatibility with \emph{dplyr}, \emph{plm} and \emph{data.table} \item Optimized programming in standard and non-standard evaluation. } % To explain this a bit: The statistical functions in \emph{collapse} are S3 generic with core methods for vectors, matrices and data frames, and internally support grouped and weighted computations carried out in C/C++. %Thus functions need only be called once for column-wise and/or grouped computations, providing a lot of extra speed and full support for sampling weights. %R code is strongly optimized and inputs are swiftly passed to compiled C/C++ code, %, with further checks run at that level. %where various efficient algorithms are implemented. %This approach enables flexible and parsimonious programming and data manipulation in R at high speeds. %when passed to a \emph{collapse} statistical function together with a suitable data object % To provide extra speed and programmability %To facilitate efficient programming, core S3 methods, grouping and ordering functionality and some C-level helper functions can be accessed by the user. %For example \code{GRP()} creates grouping objects directly passed to C++ by statistical functions. \code{fgroup_by()} attaches these objects to a data frame, yielding efficient chained calls when combined with \emph{magrittr} pipes, \link[=fast-data-manipulation]{fast manipulation functions} and \link[=fast-statistical-functions]{fast statistical functions}. %Performance gains are also realized when grouping with factors, or computing on grouped (\emph{dplyr}) or panel data (\emph{plm}) frames. %Hence \emph{collapse} enables optimized programming and data manipulation in both standard and non-standard evaluation. %The function \code{fgroup_by} can be used to efficiently create a grouped tibble inside dplyr-like chained calls (constructed with \emph{magrittr} pipes, fast manipulation functions like \code{fselect}, \code{fsubset}, \code{ftransform} and fast statistical functions). Thus \emph{collapse} enables optimized programming in both standard and non-standard evaluation. % attributes of atomic objects are preserved if the dimensions are unaltered by the computation, and data frame attributes are always preserved, Functions and core methods seek to preserve object attributes (including column attributes such as variable labels), ensuring flexibility and effective workflows with a very broad range of R objects (including most time-series classes). See the \href{https://fastverse.org/collapse/articles/collapse_object_handling.html}{vignette} on \emph{collapse}'s handling of R objects. Missing values are efficiently skipped at C/C++ level. The package default is \code{na.rm = TRUE}. This can be changed using \code{\link[=set_collapse]{set_collapse(na.rm = FALSE)}}. Missing weights are generally supported. % Core functionality and all statistical functions / computations are tested with > 20,000 unit tests for Base R equivalence, exempting some improvements (e.g. \code{fsum(NA, na.rm = TRUE)} evaluates to \code{NA}, not 0 (unless \code{fill = TRUE}), similarly for \code{fmin} and \code{fmax}; no \code{NaN} values are generated from computations involving \code{NA} values). %Generic functions provide some \link[=collapse-options]{security} against silent swallowing of arguments. %Hence they also handle various date and time series classes etc., and can easily be integreated into most approaches to data transformation. %A global \code{option("collapse_unused_arg_action")} can be set to regulate the behavior when unused arguments are passed to a generic function. The default is to issue a warning. \emph{collapse} installs with a built-in hierarchical \link[=collapse-documentation]{documentation} facilitating the use of the package. % The \href{https://fastverse.org/collapse/articles/index.html}{vignettes} are complimentary and also follow a more structured approach. % and \code{base/stats}. % extra methods warrant, provide ? %Apart from general performance considerations, \emph{collapse} excels at applications involving fast panel data transformations and techniques, fast weighted computations (e.g. weighted aggregation), fast programming and aggregation with categorical and mixed-type data, fast programming with (multivariate) time series, and programming with lists of data objects. %Other broad areas are fast grouped and weighted programming to implement new statistical techniques and packages, and fast data manipulation code (i.e. server-side for \code{shiny} apps). % The package largely avoids non-standard evaluation and exports core methods for maximum programmability. % Most are S3 generic with methods for common \code{R} objects (vectors, matrices, data frames, \dots) % high computation %(aggregation and transformations ~10x \emph{data.table} on data <1 Mio obs.). % Beyond speed, flexibility and parsimony in coding, a central objective of \emph{collapse} is to facilitate advanced / complex operations on data. The package is coded both in C and C++ and built with \emph{Rcpp}, but also uses C/C++ functions from \emph{data.table}, \emph{kit}, \emph{fixest}, \emph{weights}, \emph{stats} and \emph{RcppArmadillo / RcppEigen}. % For the moment \emph{collapse} does not utilize low-level parallelism (such as OpenMP). % \emph{collapse} is built with \code{Rcpp} and imports \code{C} functions from \emph{data.table}, \emph{lfe} and \emph{stats}. %, and uses \code{ggplot2} visualizations. } \section{Author(s)}{ % \author{ \bold{Maintainer}: Sebastian Krantz \email{sebastian.krantz@graduateinstitute.ch} % Other contributors from packages \emph{collapse} utilizes: % \itemize{ % \item Matt Dowle, Arun Srinivasan and contributors worldwide (\emph{data.table}) % \item Dirk Eddelbuettel and contributors worldwide (\emph{Rcpp}, \emph{RcppArmadillo}, \emph{RcppEigen}) % \item Morgan Jacob (\emph{kit}) % \item Laurent Berge (\emph{fixest}) % \item Josh Pasek (\emph{weights}) % \item R Core Team and contributors worldwide (\emph{stats}) % } % I thank many people from diverse fields for helpful answers on Stackoverflow and many other people for feature requests and helpful suggestions. } \section{Developing / Bug Reporting}{ \itemize{ \item Please report issues at \url{https://github.com/fastverse/collapse/issues}. \item Please send pull-requests to the 'development' branch of the repository. } } \references{ Krantz S (2026). \emph{collapse}: Advanced and Fast Statistical Computing and Data Transformation in R. \emph{Journal of Statistical Software} \bold{116}(1), 1--38. \doi{10.18637/jss.v116.i01} } %\seealso{ % Optional links to other man pages %} \examples{ ## Note: this set of examples is is certainly non-exhaustive and does not ## showcase many recent features, but remains a very good starting point ## Let's start with some statistical programming v <- iris$Sepal.Length d <- num_vars(iris) # Saving numeric variables f <- iris$Species # Factor # Simple statistics fmean(v) # vector fmean(qM(d)) # matrix (qM is a faster as.matrix) fmean(d) # data.frame # Preserving data structure fmean(qM(d), drop = FALSE) # Still a matrix fmean(d, drop = FALSE) # Still a data.frame # Weighted statistics, supported by most functions... w <- abs(rnorm(fnrow(iris))) fmean(d, w = w) # Grouped statistics... fmean(d, f) # Groupwise-weighted statistics... fmean(d, f, w) # Simple Transformations... head(fmode(d, TRA = "replace")) # Replacing values with the mode head(fmedian(d, TRA = "-")) # Subtracting the median head(fsum(d, TRA = "\%")) # Computing percentages head(fsd(d, TRA = "/")) # Dividing by the standard-deviation (scaling), etc... # Weighted Transformations... head(fnth(d, 0.75, w = w, TRA = "replace")) # Replacing by the weighted 3rd quartile # Grouped Transformations... head(fvar(d, f, TRA = "replace")) # Replacing values with the group variance head(fsd(d, f, TRA = "/")) # Grouped scaling head(fmin(d, f, TRA = "-")) # Setting the minimum value in each species to 0 head(fsum(d, f, TRA = "/")) # Dividing by the sum (proportions) head(fmedian(d, f, TRA = "-")) # Groupwise de-median head(ffirst(d, f, TRA = "\%\%")) # Taking modulus of first group-value, etc. ... # Grouped and weighted transformations... head(fsd(d, f, w, "/"), 3) # weighted scaling head(fmedian(d, f, w, "-"), 3) # subtracting the weighted group-median head(fmode(d, f, w, "replace"), 3) # replace with weighted statistical mode ## Some more advanced transformations... head(fbetween(d)) # Averaging (faster t.: fmean(d, TRA = "replace")) head(fwithin(d)) # Centering (faster than: fmean(d, TRA = "-")) head(fwithin(d, f, w)) # Grouped and weighted (same as fmean(d, f, w, "-")) head(fwithin(d, f, w, mean = 5)) # Setting a custom mean head(fwithin(d, f, w, theta = 0.76)) # Quasi-centering i.e. d - theta*fbetween(d, f, w) head(fwithin(d, f, w, mean = "overall.mean")) # Preserving the overall mean of the data head(fscale(d)) # Scaling and centering head(fscale(d, mean = 5, sd = 3)) # Custom scaling and centering head(fscale(d, mean = FALSE, sd = 3)) # Mean preserving scaling head(fscale(d, f, w)) # Grouped and weighted scaling and centering head(fscale(d, f, w, mean = 5, sd = 3)) # Custom grouped and weighted scaling and centering head(fscale(d, f, w, mean = FALSE, # Preserving group means sd = "within.sd")) # and setting group-sd to fsd(fwithin(d, f, w), w = w) head(fscale(d, f, w, mean = "overall.mean", # Full harmonization of group means and variances, sd = "within.sd")) # while preserving the level and scale of the data. head(get_vars(iris, 1:2)) # Use get_vars for fast selecting, gv is shortcut head(fhdbetween(gv(iris, 1:2), gv(iris, 3:5))) # Linear prediction with factors and covariates head(fhdwithin(gv(iris, 1:2), gv(iris, 3:5))) # Linear partialling out factors and covariates ss(iris, 1:10, 1:2) # Similarly fsubset/ss for fast subsetting rows # Simple Time-Computations.. head(flag(AirPassengers, -1:3)) # One lead and three lags head(fdiff(EuStockMarkets, # Suitably lagged first and second differences c(1, frequency(EuStockMarkets)), diff = 1:2)) head(fdiff(EuStockMarkets, rho = 0.87)) # Quasi-differences (x_t - rho*x_t-1) head(fdiff(EuStockMarkets, log = TRUE)) # Log-differences head(fgrowth(EuStockMarkets)) # Exact growth rates (percentage change) head(fgrowth(EuStockMarkets, logdiff = TRUE)) # Log-difference growth rates (percentage change) # Note that it is not necessary to use factors for grouping. fmean(gv(mtcars, -c(2,8:9)), mtcars$cyl) # Can also use vector (internally converted using qF()) fmean(gv(mtcars, -c(2,8:9)), gv(mtcars, c(2,8:9))) # or a list of vector (internally grouped using GRP()) g <- GRP(mtcars, ~ cyl + vs + am) # It is also possible to create grouping objects print(g) # These are instructive to learn about the grouping, plot(g) # and are directly handed down to C++ code fmean(gv(mtcars, -c(2,8:9)), g) # This can speed up multiple computations over same groups fsd(gv(mtcars, -c(2,8:9)), g) # Factors can efficiently be created using qF() f1 <- qF(mtcars$cyl) # Unlike GRP objects, factors are checked for NA's f2 <- qF(mtcars$cyl, na.exclude = FALSE) # This can however be avoided through this option class(f2) # Note the added class library(microbenchmark) microbenchmark(fmean(mtcars, f1), fmean(mtcars, f2)) # A minor difference, larger on larger data with(mtcars, finteraction(cyl, vs, am)) # Efficient interactions of vectors and/or factors finteraction(gv(mtcars, c(2,8:9))) # .. or lists of vectors/factors # Simple row- or column-wise computations on matrices or data frames with dapply() dapply(mtcars, quantile) # column quantiles dapply(mtcars, quantile, MARGIN = 1) # Row-quantiles # dapply preserves the data structure of any matrices / data frames passed # Some fast matrix row/column functions are also provided by the matrixStats package # Similarly, BY performs grouped comptations BY(mtcars, f2, quantile) BY(mtcars, f2, quantile, expand.wide = TRUE) # For efficient (grouped) replacing and sweeping out computed statistics, use TRA() sds <- fsd(mtcars) head(TRA(mtcars, sds, "/")) # Simple scaling (if sd's not needed, use fsd(mtcars, TRA = "/")) microbenchmark(TRA(mtcars, sds, "/"), sweep(mtcars, 2, sds, "/")) # A remarkable performance gain.. sds <- fsd(mtcars, f2) head(TRA(mtcars, sds, "/", f2)) # Groupd scaling (if sd's not needed: fsd(mtcars, f2, TRA = "/")) # All functions above perserve the structure of matrices / data frames # If conversions are required, use these efficient functions: mtcarsM <- qM(mtcars) # Matrix from data.frame head(qDF(mtcarsM)) # data.frame from matrix columns head(mrtl(mtcarsM, TRUE, "data.frame")) # data.frame from matrix rows, etc.. head(qDT(mtcarsM, "cars")) # Saving row.names when converting matrix to data.table head(qDT(mtcars, "cars")) # Same use a data.frame \donttest{ % No code relying on suggested packages and the tidyverse, also need to reduce execution time for CRAN ## Now let's get some real data and see how we can use this power for data manipulation head(wlddev) # World Bank World Development Data: 216 countries, 61 years, 5 series (columns 9-13) # Starting with some discriptive tools... namlab(wlddev, class = TRUE) # Show variable names, labels and classes fnobs(wlddev) # Observation count pwnobs(wlddev) # Pairwise observation count head(fnobs(wlddev, wlddev$country)) # Grouped observation count fndistinct(wlddev) # Distinct values descr(wlddev) # Describe data varying(wlddev, ~ country) # Show which variables vary within countries qsu(wlddev, pid = ~ country, # Panel-summarize columns 9 though 12 of this data cols = 9:12, vlabels = TRUE) # (between and within countries) qsu(wlddev, ~ region, ~ country, # Do all of that by region and also compute higher moments cols = 9:12, higher = TRUE) # -> returns a 4D array qsu(wlddev, ~ region, ~ country, cols = 9:12, higher = TRUE, array = FALSE) |> # Return as a list of matrices.. unlist2d(c("Variable","Trans"), row.names = "Region") |> head()# and turn into a tidy data.frame pwcor(num_vars(wlddev), P = TRUE) # Pairwise correlations with p-value pwcor(fmean(num_vars(wlddev), wlddev$country), P = TRUE) # Correlating country means pwcor(fwithin(num_vars(wlddev), wlddev$country), P = TRUE) # Within-country correlations psacf(wlddev, ~country, ~year, cols = 9:12) # Panel-data Autocorrelation function pspacf(wlddev, ~country, ~year, cols = 9:12) # Partial panel-autocorrelations psmat(wlddev, ~iso3c, ~year, cols = 9:12) |> plot() # Convert panel to 3D array and plot ## collapse offers a few very efficent functions for data manipulation: # Fast selecting and replacing columns series <- get_vars(wlddev, 9:12) # Same as wlddev[9:12] but 2x faster series <- fselect(wlddev, PCGDP:ODA) # Same thing: > 100x faster than dplyr::select get_vars(wlddev, 9:12) <- series # Replace, 8x faster wlddev[9:12] <- series + replaces names fselect(wlddev, PCGDP:ODA) <- series # Same thing # Fast subsetting head(fsubset(wlddev, country == "Ireland", -country, -iso3c)) head(fsubset(wlddev, country == "Ireland" & year > 1990, year, PCGDP:ODA)) ss(wlddev, 1:10, 1:10) # This is an order of magnitude faster than wlddev[1:10, 1:10] # Fast transforming head(ftransform(wlddev, ODA_GDP = ODA / PCGDP, ODA_LIFEEX = sqrt(ODA) / LIFEEX)) settransform(wlddev, ODA_GDP = ODA / PCGDP, ODA_LIFEEX = sqrt(ODA) / LIFEEX) # by reference head(ftransform(wlddev, PCGDP = NULL, ODA = NULL, GINI_sum = fsum(GINI))) head(ftransformv(wlddev, 9:12, log)) # Can also transform with lists of columns head(ftransformv(wlddev, 9:12, fscale, apply = FALSE)) # apply = FALSE invokes fscale.data.frame settransformv(wlddev, 9:12, fscale, apply = FALSE) # Changing the data by reference ftransform(wlddev) <- fscale(gv(wlddev, 9:12)) # Same thing (using replacement method) library(magrittr) # Same thing, using magrittr wlddev \%<>\% ftransformv(9:12, fscale, apply = FALSE) wlddev \%>\% ftransform(gv(., 9:12) |> # With compound pipes: Scaling and lagging fscale() |> flag(0:2, iso3c, year)) |> head() # Fast reordering head(roworder(wlddev, -country, year)) head(colorder(wlddev, country, year)) # Fast renaming head(frename(wlddev, country = Ctry, year = Yr)) setrename(wlddev, country = Ctry, year = Yr) # By reference head(frename(wlddev, tolower, cols = 9:12)) # Fast grouping fgroup_by(wlddev, Ctry, decade) |> fgroup_vars() |> head() rm(wlddev) # .. but only works with collapse functions ## Now lets start putting things together wlddev |> fsubset(year > 1990, region, income, PCGDP:ODA) |> fgroup_by(region, income) |> fmean() # Fast aggregation using the mean # Same thing using dplyr manipulation verbs library(dplyr) wlddev |> filter(year > 1990) |> select(region, income, PCGDP:ODA) |> group_by(region,income) |> fmean() # This is already a lot faster than summarize_all(mean) wlddev |> fsubset(year > 1990, region, income, PCGDP:POP) |> fgroup_by(region, income) |> fmean(POP) # Weighted group means wlddev |> fsubset(year > 1990, region, income, PCGDP:POP) |> fgroup_by(region, income) |> fsd(POP) # Weighted group standard deviations wlddev |> na_omit(cols = "POP") |> fgroup_by(region, income) |> fselect(PCGDP:POP) |> fnth(0.75, POP) # Weighted group third quartile wlddev |> fgroup_by(country) |> fselect(PCGDP:ODA) |> fwithin() |> head() # Within transformation wlddev |> fgroup_by(country) |> fselect(PCGDP:ODA) |> fmedian(TRA = "-") |> head() # Grouped centering using the median # Replacing data points by the weighted first quartile: wlddev |> na_omit(cols = "POP") |> fgroup_by(country) |> fselect(country, year, PCGDP:POP) \%>\% ftransform(fselect(., -country, -year) |> fnth(0.25, POP, "fill")) |> head() wlddev |> fgroup_by(country) |> fselect(PCGDP:ODA) |> fscale() |> head() # Standardizing wlddev |> fgroup_by(country) |> fselect(PCGDP:POP) |> fscale(POP) |> head() # Weighted.. wlddev |> fselect(country, year, PCGDP:ODA) |> # Adding 1 lead and 2 lags of each variable fgroup_by(country) |> flag(-1:2, year) |> head() wlddev |> fselect(country, year, PCGDP:ODA) |> # Adding 1 lead and 10-year growth rates fgroup_by(country) |> fgrowth(c(0:1,10), 1, year) |> head() # etc... # Aggregation with multiple functions wlddev |> fsubset(year > 1990, region, income, PCGDP:ODA) |> fgroup_by(region, income) \%>\% { add_vars(fgroup_vars(., "unique"), fmedian(., keep.group_vars = FALSE) |> add_stub("median_"), fmean(., keep.group_vars = FALSE) |> add_stub("mean_"), fsd(., keep.group_vars = FALSE) |> add_stub("sd_")) } |> head() # Transformation with multiple functions wlddev |> fselect(country, year, PCGDP:ODA) |> fgroup_by(country) \%>\% { add_vars(fdiff(., c(1,10), 1, year) |> flag(0:2, year), # Sequence of lagged differences ftransform(., fselect(., PCGDP:ODA) |> fwithin() |> add_stub("W.")) |> flag(0:2, year, keep.ids = FALSE)) # Sequence of lagged demeaned vars } |> head() # With ftransform, can also easily do one or more grouped mutations on the fly.. settransform(wlddev, median_ODA = fmedian(ODA, list(region, income), TRA = "fill")) settransform(wlddev, sd_ODA = fsd(ODA, list(region, income), TRA = "fill"), mean_GDP = fmean(PCGDP, country, TRA = "fill")) wlddev \%<>\% ftransform(fmedian(list(median_ODA = ODA, median_GDP = PCGDP), list(region, income), TRA = "fill")) # On a groped data frame it is also possible to grouped transform certain columns # but perform aggregate operatins on others: wlddev |> fgroup_by(region, income) \%>\% ftransform(gmedian_GDP = fmedian(PCGDP, GRP(.), TRA = "replace"), omedian_GDP = fmedian(PCGDP, TRA = "replace"), # "replace" preserves NA's omedian_GDP_fill = fmedian(PCGDP)) |> tail() rm(wlddev) ## For multi-type data aggregation, the function collap() offers ease and flexibility # Aggregate this data by country and decade: Numeric columns with mean, categorical with mode head(collap(wlddev, ~ country + decade, fmean, fmode)) # taking weighted mean and weighted mode: head(collap(wlddev, ~ country + decade, fmean, fmode, w = ~ POP, wFUN = fsum)) # Multi-function aggregation of certain columns head(collap(wlddev, ~ country + decade, list(fmean, fmedian, fsd), list(ffirst, flast), cols = c(3,9:12))) # Customized Aggregation: Assign columns to functions head(collap(wlddev, ~ country + decade, custom = list(fmean = 9:10, fsd = 9:12, flast = 3, ffirst = 6:8))) # For grouped data frames use collapg wlddev |> fsubset(year > 1990, country, region, income, PCGDP:ODA) |> fgroup_by(country) |> collapg(fmean, ffirst) |> ftransform(AMGDP = PCGDP > fmedian(PCGDP, list(region, income), TRA = "fill"), AMODA = ODA > fmedian(ODA, income, TRA = "replace_fill")) |> head() ## Additional flexibility for data transformation tasks is offerend by tidy transformation operators # Within-transformation (centering on overall mean) head(W(wlddev, ~ country, cols = 9:12, mean = "overall.mean")) # Partialling out country and year fixed effects head(HDW(wlddev, PCGDP + LIFEEX ~ qF(country) + qF(year))) # Same, adding ODA as continuous regressor head(HDW(wlddev, PCGDP + LIFEEX ~ qF(country) + qF(year) + ODA)) # Standardizing (scaling and centering) by country head(STD(wlddev, ~ country, cols = 9:12)) # Computing 1 lead and 3 lags of the 4 series head(L(wlddev, -1:3, ~ country, ~year, cols = 9:12)) # Computing the 1- and 10-year first differences head(D(wlddev, c(1,10), 1, ~ country, ~year, cols = 9:12)) head(D(wlddev, c(1,10), 1:2, ~ country, ~year, cols = 9:12)) # ..first and second differences # Computing the 1- and 10-year growth rates head(G(wlddev, c(1,10), 1, ~ country, ~year, cols = 9:12)) # Adding growth rate variables to dataset add_vars(wlddev) <- G(wlddev, c(1, 10), 1, ~ country, ~year, cols = 9:12, keep.ids = FALSE) get_vars(wlddev, "G1.", regex = TRUE) <- NULL # Deleting again # These operators can conveniently be used in regression formulas: # Using a Mundlak (1978) procedure to estimate the effect of OECD on LIFEEX, controlling for PCGDP lm(LIFEEX ~ log(PCGDP) + OECD + B(log(PCGDP), country), wlddev |> fselect(country, OECD, PCGDP, LIFEEX) |> na_omit()) # Adding 10-year lagged life-expectancy to allow for some convergence effects (dynamic panel model) lm(LIFEEX ~ L(LIFEEX, 10, country) + log(PCGDP) + OECD + B(log(PCGDP), country), wlddev |> fselect(country, OECD, PCGDP, LIFEEX) |> na_omit()) # Tranformation functions and operators also support indexed data classes: wldi <- findex_by(wlddev, country, year) head(W(wldi$PCGDP)) # Country-demeaning head(W(wldi, cols = 9:12)) head(W(wldi$PCGDP, effect = 2)) # Time-demeaning head(W(wldi, effect = 2, cols = 9:12)) head(HDW(wldi$PCGDP)) # Country- and time-demeaning head(HDW(wldi, cols = 9:12)) head(STD(wldi$PCGDP)) # Standardizing by country head(STD(wldi, cols = 9:12)) head(L(wldi$PCGDP, -1:3)) # Panel-lags head(L(wldi, -1:3, 9:12)) head(G(wldi$PCGDP)) # Panel-Growth rates head(G(wldi, 1, 1, 9:12)) lm(Dlog(PCGDP) ~ L(Dlog(LIFEEX), 0:3), wldi) # Panel data regression rm(wldi) } # Remove all objects used in this example section rm(v, d, w, f, f1, f2, g, mtcarsM, sds, series, wlddev) } \keyword{package} \keyword{manip} collapse/man/rsplit.Rd0000644000176200001440000000754414777170130014454 0ustar liggesusers\name{rsplit} \alias{rsplit} \alias{rsplit.default} \alias{rsplit.matrix} \alias{rsplit.data.frame} \title{ Fast (Recursive) Splitting } \description{ \code{rsplit} (recursively) splits a vector, matrix or data frame into subsets according to combinations of (multiple) vectors / factors and returns a (nested) list. If \code{flatten = TRUE}, the list is flattened yielding the same result as \code{\link{split}}. \code{rsplit} is implemented as a wrapper around \code{\link{gsplit}}, and significantly faster than \code{\link{split}}. } \usage{ rsplit(x, \dots) \method{rsplit}{default}(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, \dots) \method{rsplit}{matrix}(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, drop.dim = FALSE, \dots) \method{rsplit}{data.frame}(x, by, drop = TRUE, flatten = FALSE, cols = NULL, keep.by = FALSE, simplify = TRUE, use.names = TRUE, \dots) } \arguments{ \item{x}{a vector, matrix, data.frame or list like object.} \item{fl}{a \code{\link{GRP}} object, or a (list of) vector(s) / factor(s) (internally converted to a \code{\link{GRP}} object(s)) used to split \code{x}.} \item{by}{\emph{data.frame method}: Same as \code{fl}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{drop}{logical. \code{TRUE} removes unused levels or combinations of levels from factors before splitting; \code{FALSE} retains those combinations yielding empty list elements in the output.} \item{flatten}{logical. If \code{fl} is a list of vectors / factors, \code{TRUE} calls \code{\link{GRP}} on the list, creating a single grouping used for splitting; \code{FALSE} yields recursive splitting.} \item{use.names}{logical. \code{TRUE} returns a named list (like \code{\link{split}}); \code{FALSE} returns a plain list.} \item{drop.dim}{logical. \code{TRUE} returns atomic vectors for matrix-splits consisting of one row. } \item{cols}{\emph{data.frame method}: Select columns to split using a function, column names, indices or a logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{keep.by}{logical. If a formula is passed to \code{by}, then \code{TRUE} preserves the splitting (right-hand-side) variables in the data frame.} \item{simplify}{\emph{data.frame method}: Logical. \code{TRUE} calls \code{rsplit.default} if a single column is split e.g. \code{rsplit(data, col1 ~ group1)} becomes the same as \code{rsplit(data$col1, data$group1)}.} \item{\dots}{further arguments passed to \code{\link{GRP}}. Sensible choices would be \code{sort = FALSE}, \code{decreasing = TRUE} or \code{na.last = FALSE}. Note that these options only apply if \code{fl} is not already a (list of) factor(s).} } \value{ a (nested) list containing the subsets of \code{x}. } \seealso{ \code{\link{gsplit}}, \code{\link{rapply2d}}, \code{\link{unlist2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ rsplit(mtcars$mpg, mtcars$cyl) rsplit(mtcars, mtcars$cyl) rsplit(mtcars, mtcars[.c(cyl, vs, am)]) rsplit(mtcars, ~ cyl + vs + am, keep.by = TRUE) # Same thing rsplit(mtcars, ~ cyl + vs + am) rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE) rsplit(mtcars, mpg ~ cyl) rsplit(mtcars, mpg ~ cyl, simplify = FALSE) rsplit(mtcars, mpg + hp ~ cyl + vs + am) rsplit(mtcars, mpg + hp ~ cyl + vs + am, keep.by = TRUE) # Split this sectoral data, first by Variable (Emloyment and Value Added), then by Country GGDCspl <- rsplit(GGDC10S, ~ Variable + Country, cols = 6:16) str(GGDCspl) # The nested list can be reassembled using unlist2d() head(unlist2d(GGDCspl, idcols = .c(Variable, Country))) rm(GGDCspl) # Another example with mtcars (not as clean because of row.names) nl <- rsplit(mtcars, mpg + hp ~ cyl + vs + am) str(nl) unlist2d(nl, idcols = .c(cyl, vs, am), row.names = "car") rm(nl) } \keyword{manip} collapse/man/fsum.Rd0000644000176200001440000002277114777170130014110 0ustar liggesusers\name{fsum} \alias{fsum} \alias{fsum.default} \alias{fsum.matrix} \alias{fsum.data.frame} \alias{fsum.grouped_df} \title{Fast (Grouped, Weighted) Sum for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fsum} is a generic function that computes the (column-wise) sum of all values in \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w} (e.g. to calculate survey totals). The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) sum. } \usage{ fsum(x, \dots) \method{fsum}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, fill = FALSE, nthreads = .op[["nthreads"]], \dots) \method{fsum}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, fill = FALSE, nthreads = .op[["nthreads"]], \dots) \method{fsum}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, fill = FALSE, nthreads = .op[["nthreads"]], \dots) \method{fsum}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], fill = FALSE, nthreads = .op[["nthreads"]], \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{GRP} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{GRP} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{fill}{logical. Initialize result with \code{0} instead of \code{NA} when \code{na.rm = TRUE} e.g. \code{fsum(NA, fill = TRUE)} returns \code{0} instead of \code{NA}. } \item{nthreads}{integer. The number of threads to utilize. See Details. } \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{stub}{character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the summed weights column is prefixed by \code{"sum."}. Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ % Non-grouped sum computations internally utilize long-doubles in C++, for additional numeric precision. % Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping them in the computation (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{sum}} which just runs through without any checks). The weighted sum (e.g. survey total) is computed as \code{sum(x * w)}, but in one pass and about twice as efficient. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and are therefore extremely fast. See Benchmark and Examples below. When applied to data frames with groups or \code{drop = FALSE}, \code{fsum} preserves all column attributes. The attributes of the data frame itself are also preserved. Since v1.6.0 \code{fsum} explicitly supports integers. Integers are summed using the long long type in C which is bounded at +-9,223,372,036,854,775,807 (so ~4.3 billion times greater than the minimum/maximum R integer bounded at +-2,147,483,647). If the value of the sum is outside +-2,147,483,647, a double containing the result is returned, otherwise an integer is returned. With groups, an integer results vector is initialized, and an integer overflow error is provided if the sum in any group is outside +-2,147,483,647. Data needs to be coerced to double beforehand in such cases. Multithreading, added in v1.8.0, applies at the column-level unless \code{g = NULL} and \code{nthreads > NCOL(x)}. Parallelism over groups is not available because sums are computed simultaneously within each group. \code{nthreads = 1L} uses a serial version of the code, not parallel code running on one thread. This serial code is always used with less than 100,000 obs (\code{length(x) < 100000} for vectors and matrices), because parallel execution itself has some overhead. } \value{ The (\code{w} weighted) sum of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped, weighted) sum. } \section{See Also}{ \code{\link{fprod}}, \code{\link{fmean}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fsum(mpg) # Simple sum fsum(mpg, w = mtcars$hp) # Weighted sum (total): Weighted by hp fsum(mpg, TRA = "\%") # Simple transformation: obtain percentages of mpg fsum(mpg, mtcars$cyl) # Grouped sum fsum(mpg, mtcars$cyl, mtcars$hp) # Weighted grouped sum (total) fsum(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fsum(mpg, g) fmean(mpg, g) == fsum(mpg, g) / fnobs(mpg, g) fsum(mpg, g, TRA = "\%") # Percentages by group ## data.frame method fsum(mtcars) fsum(mtcars, TRA = "\%") fsum(mtcars, g) fsum(mtcars, g, TRA = "\%") ## matrix method m <- qM(mtcars) fsum(m) fsum(m, TRA = "\%") fsum(m, g) fsum(m, g, TRA = "\%") ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fsum(hp) # Weighted grouped sum (total) mtcars |> fgroup_by(cyl,vs,am) |> fsum(TRA = "\%") mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg) |> fsum() \donttest{ %% Needs to be in \donttest because of example execution time limits ## This compares fsum with data.table and base::rowsum # Starting with small data library(data.table) opts <- set_collapse(nthreads = getDTthreads()) mtcDT <- qDT(mtcars) f <- qF(mtcars$cyl) library(microbenchmark) microbenchmark(mtcDT[, lapply(.SD, sum), by = f], rowsum(mtcDT, f, reorder = FALSE), fsum(mtcDT, f, na.rm = FALSE), unit = "relative") # Now larger data tdata <- qDT(replicate(100, rnorm(1e5), simplify = FALSE)) # 100 columns with 100.000 obs f <- qF(sample.int(1e4, 1e5, TRUE)) # A factor with 10.000 groups microbenchmark(tdata[, lapply(.SD, sum), by = f], rowsum(tdata, f, reorder = FALSE), fsum(tdata, f, na.rm = FALSE), unit = "relative") # Reset options set_collapse(opts) } } %\section{Benchmark}{\preformatted{ %## This compares fsum with data.table (2 threads) and base::rowsum %# Starting with small data %mtcDT <- qDT(mtcars) %f <- qF(mtcars$cyl) % %library(microbenchmark) %microbenchmark(mtcDT[, lapply(.SD, sum), by = f], % rowsum(mtcDT, f, reorder = FALSE), % fsum(mtcDT, f, na.rm = FALSE), unit = "relative") % % expr min lq mean median uq max neval cld % mtcDT[, lapply(.SD, sum), by = f] 145.436928 123.542134 88.681111 98.336378 71.880479 85.217726 100 c % rowsum(mtcDT, f, reorder = FALSE) 2.833333 2.798203 2.489064 2.937889 2.425724 2.181173 100 b % fsum(mtcDT, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100 a % %# Now larger data %tdata <- qDT(replicate(100, rnorm(1e5), simplify = FALSE)) # 100 columns with 100.000 obs %f <- qF(sample.int(1e4, 1e5, TRUE)) # A factor with 10.000 groups % %microbenchmark(tdata[, lapply(.SD, sum), by = f], % rowsum(tdata, f, reorder = FALSE), % fsum(tdata, f, na.rm = FALSE), unit = "relative") % % expr min lq mean median uq max neval cld % tdata[, lapply(.SD, sum), by = f] 2.646992 2.975489 2.834771 3.081313 3.120070 1.2766475 100 c % rowsum(tdata, f, reorder = FALSE) 1.747567 1.753313 1.629036 1.758043 1.839348 0.2720937 100 b % fsum(tdata, f, na.rm = FALSE) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 100 a %} %} \keyword{univar} \keyword{manip} collapse/man/fprod.Rd0000644000176200001440000001326614777170130014247 0ustar liggesusers\name{fprod} \alias{fprod} \alias{fprod.default} \alias{fprod.matrix} \alias{fprod.data.frame} \alias{fprod.grouped_df} \title{Fast (Grouped, Weighted) Product for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fprod} is a generic function that computes the (column-wise) product of all values in \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) product. } \usage{ fprod(x, \dots) \method{fprod}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, \dots) \method{fprod}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{fprod}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{fprod}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain product of weighting variable after computation (if contained in \code{grouped_df}).} \item{stub}{character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the weights column is prefixed by \code{"prod."}. Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ Non-grouped product computations internally utilize long-doubles in C, for additional numeric precision. %Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping them in the computation (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned (unlike \code{\link{prod}} which just runs through without any checks). %This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. The weighted product is computed as \code{prod(x * w)}, using a single pass in C. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. %When applied to data frames with groups or \code{drop = FALSE}, \code{fprod} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed objects. The attributes of the data frame itself are also preserved. For further computational details see \code{\link{fsum}}, which works equivalently. } \value{ The (\code{w} weighted) product of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped, weighted) product. } \seealso{ \code{\link{fsum}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fprod(mpg) # Simple product fprod(mpg, w = mtcars$hp) # Weighted product fprod(mpg, TRA = "/") # Simple transformation: Divide by product fprod(mpg, mtcars$cyl) # Grouped product fprod(mpg, mtcars$cyl, mtcars$hp) # Weighted grouped product fprod(mpg, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fprod(mpg, g) fprod(mpg, g, TRA = "/") # Groupwise divide by product ## data.frame method fprod(mtcars) head(fprod(mtcars, TRA = "/")) fprod(mtcars, g) fprod(mtcars, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fprod(m) head(fprod(m, TRA = "/")) fprod(m, g) # etc.. ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fprod() mtcars |> fgroup_by(cyl,vs,am) |> fprod(TRA = "/") mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg) |> fprod() } \keyword{univar} \keyword{manip} collapse/man/seqid.Rd0000644000176200001440000001411115115712014014216 0ustar liggesusers\name{seqid} \alias{seqid} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Generate Group-Id from Integer Sequences } \description{ \code{seqid} can be used to group sequences of integers in a vector, e.g. \code{seqid(c(1:3, 5:7))} becomes \code{c(rep(1,3), rep(2,3))}. It also supports increments \code{> 1}, unordered sequences, and missing values in the sequence. Some applications are to facilitate identification of, and grouped operations on, (irregular) time series and panels. } \usage{ seqid(x, o = NULL, del = 1L, start = 1L, na.skip = FALSE, skip.seq = FALSE, check.o = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a factor or integer vector. Numeric vectors will be converted to integer i.e. rounded downwards.} \item{o}{an (optional) integer ordering vector specifying the order by which to pass through \code{x}.} \item{del}{integer. The integer deliminating two consecutive points in a sequence. \code{del = 1} lets \code{seqid} track sequences of the form \code{c(1,2,3,..)}, \code{del = 2} tracks sequences \code{c(1,3,5,..)} etc.} \item{start}{integer. The starting value of the resulting sequence id. Default is starting from 1. } \item{na.skip}{logical. \code{TRUE} skips missing values in the sequence. The default behavior is skipping such that \code{seqid(c(1, NA, 2))} is regarded as one sequence and coded as \code{c(1, NA, 1)}.} \item{skip.seq}{logical. If \code{na.skip = TRUE}, this changes the behavior such that missing values are viewed as part of the sequence, i.e. \code{seqid(c(1, NA, 3))} is regarded as one sequence and coded as \code{c(1, NA, 1)}.} \item{check.o}{logical. Programmers option: \code{FALSE} prevents checking that each element of \code{o} is in the range \code{[1, length(x)]}, it only checks the length of \code{o}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. } } \details{ \code{seqid} was created primarily as a workaround to deal with problems of computing lagged values, differences and growth rates on irregularly spaced time series and panels before \emph{collapse} version 1.5.0 (\href{https://github.com/fastverse/collapse/issues/26}{#26}). Now \code{flag}, \code{fdiff} and \code{fgrowth} natively support irregular data so this workaround is superfluous, except for iterated differencing which is not yet supported with irregular data. % panels because they do not pre-compute an ordering of the data but directly compute the ordering from the supplied id and time variables while providing errors for gaps and repeated time values. see \code{\link{flag}} for computational details. The theory of the workaround was to express an irregular time series or panel series as a regular panel series with a group-id created such that the time-periods within each group are consecutive. \code{seqid} makes this very easy: For an irregular panel with some gaps or repeated values in the time variable, an appropriate id variable can be generated using \code{settransform(data, newid = seqid(time, radixorder(id, time)))}. Lags can then be computed using \code{L(data, 1, ~newid, ~time)} etc. %A simple solution to applying existing functionality (\code{flag}, \code{fdiff} and \code{fgrowth}) to irregular time series and panels is thus to create a group-id that fully identifies the data together with the time variable. % This way \emph{collapse} maintains a balance between offering very fast computations on regular time series and panels (which may be unbalanced but where observations for each entity are consecutive in time), and flexibility of application. In general, for any regularly spaced panel the identity given by \code{identical(groupid(id, order(id, time)), seqid(time, order(id, time)))} should hold. % Regularly spaced panels with gaps in time (such as a panel-survey with measurements every 2 years) can be handled either by \code{seqid(\dots, del = gap)} or, in most cases, simply by converting the time variable to factor using \code{\link{qF}}, which will make observations consecutive. % \enumerate{ % \item Sort the data in ascending order (e.g. using \code{data.table::setorder(data, time)} for time series and \code{data.table::setorder(data, id, time)} for panels) % \item Generate a new id variable using \code{seqid} (e.g. \code{settransform(data, newid = seqid(time))}) % \item Use the new id to identify the data together with the time variable (e.g. compute a panel-lag using \code{L(data, 1, ~newid, ~time)} or create a panel data frame: \code{pdata <- plm::pdata.frame(data, index = c("newid", "time")); L(pdata)}) % } %There are potentially other more analytical applications for \code{seqid}\dots For the opposite operation of creating a new time-variable that is consecutive in each group, see \code{data.table::rowid}. } \value{ An integer vector of class 'qG'. See \code{\link{qG}}. } \seealso{ \code{\link{timeid}}, \code{\link{groupid}}, \code{\link{qG}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## This creates an irregularly spaced panel, with a gap in time for id = 2 data <- data.frame(id = rep(1:3, each = 4), time = c(1:4, 1:2, 4:5, 1:4), value = rnorm(12)) data ## This gave a gaps in time error previous to collapse 1.5.0 L(data, 1, value ~ id, ~time) ## Generating new id variable (here seqid(time) would suffice as data is sorted) settransform(data, newid = seqid(time, order(id, time))) data ## Lag the panel this way L(data, 1, value ~ newid, ~time) ## A different possibility: Creating a consecutive time variable settransform(data, newtime = data.table::rowid(id)) data L(data, 1, value ~ id, ~newtime) ## With sorted data, the time variable can also just be omitted.. L(data, 1, value ~ id) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ts} \keyword{manip} % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/dapply.Rd0000644000176200001440000001345114777170130014422 0ustar liggesusers\name{dapply} \alias{dapply} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Data Apply } \description{ \code{dapply} efficiently applies functions to columns or rows of matrix-like objects and by default returns an object of the same type and with the same attributes (unless the result is scalar and \code{drop = TRUE}). Alternatively it is possible to return the result in a plain matrix or data.frame. A simple parallelism is also available. } \usage{ dapply(X, FUN, \dots, MARGIN = 2, parallel = FALSE, mc.cores = 1L, return = c("same", "matrix", "data.frame"), drop = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a matrix, data frame or alike object.} \item{FUN}{a function, can be scalar- or vector-valued.} \item{\dots}{further arguments to \code{FUN}.} \item{MARGIN}{integer. The margin which \code{FUN} will be applied over. Default \code{2} indicates columns while \code{1} indicates rows. See also Details. } \item{parallel}{logical. \code{TRUE} implements simple parallel execution by internally calling \code{\link{mclapply}} instead of \code{\link{lapply}}.} \item{mc.cores}{integer. Argument to \code{\link{mclapply}} indicating the number of cores to use for parallel execution. Can use \code{\link[=detectCores]{detectCores()}} to select all available cores.} \item{return}{an integer or string indicating the type of object to return. The default \code{1 - "same"} returns the same object type (i.e. class and other attributes are retained, just the names for the dimensions are adjusted). \code{2 - "matrix"} always returns the output as matrix and \code{3 - "data.frame"} always returns a data frame.} \item{drop}{logical. If the result has only one row or one column, \code{drop = TRUE} will drop dimensions and return a (named) atomic vector.} } \details{ \code{dapply} is an efficient command to apply functions to rows or columns of data without loosing information (attributes) about the data or changing the classes or format of the data. It is principally an efficient wrapper around \code{\link{lapply}} and works as follows: \itemize{ \item Save the attributes of \code{X}. \item If \code{MARGIN = 2} (columns), convert matrices to plain lists of columns using \code{\link{mctl}} and remove all attributes from data frames. \item If \code{MARGIN = 1} (rows), convert matrices to plain lists of rows using \code{\link{mrtl}}. For data frames remove all attributes, efficiently convert to matrix using \code{do.call(cbind, X)} and also convert to list of rows using \code{\link{mrtl}}. \item Call \code{\link{lapply}} or \code{\link{mclapply}} on these plain lists (which is faster than calling \code{lapply} on an object with attributes). \item depending on the requested output type, use \code{\link{matrix}}, \code{\link{unlist}} or \code{\link[=do.call]{do.call(cbind, ...)}} to convert the result back to a matrix or list of columns. \item modify the relevant attributes accordingly and efficiently attach to the object again (no further checks). % , non essential attributes are kept and added at the end of the attribute list } The performance gain from working with plain lists makes \code{dapply} not much slower than calling \code{lapply} itself on a data frame. Because of the conversions involved, row-operations require some memory, but are still faster than \code{\link{apply}}. } \value{ \code{X} where \code{FUN} was applied to every row or column. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{BY}}, \code{\link{collap}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ head(dapply(mtcars, log)) # Take natural log of each variable head(dapply(mtcars, log, return = "matrix")) # Return as matrix m <- as.matrix(mtcars) head(dapply(m, log)) # Same thing head(dapply(m, log, return = "data.frame")) # Return data frame from matrix dapply(mtcars, sum); dapply(m, sum) # Computing sum of each column, return as vector dapply(mtcars, sum, drop = FALSE) # This returns a data frame of 1 row dapply(mtcars, sum, MARGIN = 1) # Compute row-sum of each column, return as vector dapply(m, sum, MARGIN = 1) # Same thing for matrices, faster t. apply(m, 1, sum) head(dapply(m, sum, MARGIN = 1, drop = FALSE)) # Gives matrix with one column head(dapply(m, quantile, MARGIN = 1)) # Compute row-quantiles dapply(m, quantile) # Column-quantiles head(dapply(mtcars, quantile, MARGIN = 1)) # Same for data frames, output is also a data.frame dapply(mtcars, quantile) # With classed objects, we have to be a bit careful \dontrun{ dapply(EuStockMarkets, quantile) # This gives an error because the tsp attribute is misspecified } dapply(EuStockMarkets, quantile, return = "matrix") # These both work fine.. dapply(EuStockMarkets, quantile, return = "data.frame") \donttest{ % No code relying on suggested package # Similarly for grouped tibbles and other data frame based classes library(dplyr) gmtcars <- group_by(mtcars,cyl,vs,am) head(dapply(gmtcars, log)) # Still gives a grouped tibble back dapply(gmtcars, quantile, MARGIN = 1) # Here it makes sense to keep the groups attribute dapply(gmtcars, quantile) # This does not make much sense, ... dapply(gmtcars, quantile, # better convert to plain data.frame: return = "data.frame") } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/time-series-panel-series.Rd0000644000176200001440000001156114777170130017744 0ustar liggesusers\name{time-series-panel-series} % \name{Time Series and Panel Computations} \alias{A7-time-series-panel-series} \alias{time-series-panel-series} % \alias{tscomp} \title{Time Series and Panel Series} % \emph{collapse} \description{ \emph{collapse} provides a flexible and powerful set of functions and classes to work with time-dependent data: \itemize{ \item \code{\link[=findex_by]{findex_by/iby}} creates an 'indexed_frame': a flexible structure that can be imposed upon any data-frame like object and facilitates \bold{indexed (time-aware) computations on time series and panel data}. Indexed frames are composed of 'indexed_series', which can also be created from vector and matrix-based objects using the \code{reindex} function. Further functions \code{findex/ix}, \code{unindex}, \code{is_irregular} and \code{to_plm} help operate these classes, check for irregularity, and ensure \emph{plm} compatibility. Methods are defined for various time series, data transformation and data manipulation functions in \emph{collapse}. \item \code{\link{timeid}} efficiently converts numeric time sequences, such as 'Date' or 'POSIXct' vectors, to a \bold{time-factor / integer id}, where a unit-step represents the greatest common divisor of the underlying sequence. \item \code{\link{flag}}, and the lag- and lead- operators \code{\link{L}} and \code{\link{F}} are S3 generics to efficiently compute sequences of \bold{lags and leads} on regular or irregular / unbalanced time series and panel data. \item Similarly, \code{\link{fdiff}}, \code{\link{fgrowth}}, and the operators \code{\link{D}}, \code{\link{Dlog}} and \code{\link{G}} are S3 generics to efficiently compute sequences of suitably lagged / leaded and iterated \bold{differences, log-differences and growth rates}. \code{\link[=fdiff]{fdiff/D/Dlog}} can also compute \bold{quasi-differences} of the form \eqn{x_t - \rho x_{t-1}}. \item \code{\link{fcumsum}} is an S3 generic to efficiently compute \bold{cumulative sums} on time series and panel data. In contrast to \code{\link{cumsum}}, it can handle missing values and supports both grouped and indexed / ordered computations. \item \code{\link{psmat}} is an S3 generic to efficiently convert panel-vectors / 'indexed_series' and data frames / 'indexed_frame's to \bold{panel series matrices and 3D arrays}, respectively (where time, individuals and variables receive different dimensions, allowing for fast indexation, visualization, and computations). \item \code{\link{psacf}}, \code{\link{pspacf}} and \code{\link{psccf}} are S3 generics to compute estimates of the \bold{auto-, partial auto- and cross- correlation or covariance functions} for panel-vectors / 'indexed_series', and multivariate versions for data frames / 'indexed_frame's. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \Sexpr{"\u200B"} \Sexpr{"\u200B"} \code{\link[=findex_by]{findex_by/iby}}, \code{findex/ix}, \code{reindex}, \code{unindex}, \code{is_irregular}, \code{to_plm} \tab\tab For vectors, matrices and data frames / lists. \tab\tab Fast and flexible time series and panel data classes 'indexed_series' and 'indexed_frame'. \cr \code{\link{timeid}} \tab\tab For time sequences represented by integer or double vectors / objects. \tab\tab Generate integer time-id/factor \cr \code{\link[=flag]{flag/L/F}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute (sequences of) lags and leads \cr \code{\link[=fdiff]{fdiff/D/Dlog}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute (sequences of lagged / leaded and iterated) (quasi-)differences or log-differences \cr \code{\link[=fgrowth]{fgrowth/G}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute (sequences of lagged / leaded and iterated) growth rates (exact, via log-differencing, or compounded) \cr \code{\link{fcumsum}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute cumulative sums \cr \code{\link{psmat}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Convert panel data to matrix / array \cr \code{\link{psacf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute ACF on panel data \cr \code{\link{pspacf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute PACF on panel data \cr \code{\link{psccf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute CCF on panel data } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=data-transformations]{Data Transformations} } \keyword{ts} \keyword{manip} \keyword{documentation} collapse/man/fnobs.Rd0000644000176200001440000000675514777170130014251 0ustar liggesusers\name{fnobs} \alias{fnobs} \alias{fnobs.default} \alias{fnobs.matrix} \alias{fnobs.data.frame} \alias{fnobs.grouped_df} \title{Fast (Grouped) Observation Count for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fnobs} is a generic function that (column-wise) computes the number of non-missing values in \code{x}, (optionally) grouped by \code{g}. It is much faster than \code{sum(!is.na(x))}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) observation count. } \usage{ fnobs(x, \dots) \method{fnobs}{default}(x, g = NULL, TRA = NULL, use.g.names = TRUE, \dots) \method{fnobs}{matrix}(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, \dots) \method{fnobs}{data.frame}(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, \dots) \method{fnobs}{grouped_df}(x, TRA = NULL, use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ \code{fnobs} preserves all attributes of non-classed vectors / columns, and only the 'label' attribute (if available) of classed vectors / columns (i.e. dates or factors). When applied to data frames and matrices, the row-names are adjusted as necessary. } \value{ Integer. The number of non-missing observations in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its number of non-missing observations, grouped by \code{g}. } \seealso{ \code{\link{fndistinct}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method fnobs(airquality$Solar.R) # Simple Nobs fnobs(airquality$Solar.R, airquality$Month) # Grouped Nobs ## data.frame method fnobs(airquality) fnobs(airquality, airquality$Month) fnobs(wlddev) # Works with data of all types! head(fnobs(wlddev, wlddev$iso3c)) ## matrix method aqm <- qM(airquality) fnobs(aqm) # Also works for character or logical matrices fnobs(aqm, airquality$Month) ## method for grouped data frames - created with dplyr::group_by or fgroup_by airquality |> fgroup_by(Month) |> fnobs() wlddev |> fgroup_by(country) |> fselect(PCGDP,LIFEEX,GINI,ODA) |> fnobs() } \keyword{univar} \keyword{manip} collapse/man/ftransform.Rd0000644000176200001440000004205615014371633015311 0ustar liggesusers\name{ftransform} \alias{fmutate} \alias{mtt} \alias{ftransform} \alias{tfm} \alias{ftransformv} \alias{tfmv} \alias{ftransform<-} \alias{tfm<-} \alias{settransform} \alias{settfm} \alias{settransformv} \alias{settfmv} \alias{fcompute} \alias{fcomputev} \title{Fast Transform and Compute Columns on a Data Frame} \description{ \code{ftransform} is a much faster version of \code{\link{transform}} for data frames. It returns the data frame with new columns computed and/or existing columns modified or deleted. \code{settransform} does all of that by reference. \code{fcompute} computes and returns new columns. These functions evaluate all arguments simultaneously, allow list-input (nested pipelines) and disregard grouped data. Catering to the \emph{tidyverse} user, v1.7.0 introduced \code{fmutate}, providing familiar functionality i.e. arguments are evaluated sequentially, computation on grouped data is done by groups, and functions can be applied to multiple columns using \code{\link{across}}. See also the Details. } \usage{ # dplyr-style mutate (sequential evaluation + across() feature) fmutate(.data, ..., .keep = "all", .cols = NULL) mtt(.data, ..., .keep = "all", .cols = NULL) # Shorthand for fmutate # Modify and return data frame ftransform(.data, \dots) ftransformv(.data, vars, FUN, \dots, apply = TRUE) tfm(.data, \dots) # Shorthand for ftransform tfmv(.data, vars, FUN, \dots, apply = TRUE) # Modify data frame by reference settransform(.data, \dots) settransformv(.data, \dots) # Same arguments as ftransformv settfm(.data, \dots) # Shorthand for settransform settfmv(.data, \dots) # Replace/add modified columns in/to a data frame ftransform(.data) <- value tfm(.data) <- value # Shorthand for ftransform<- # Compute columns, returned as a new data frame fcompute(.data, \dots, keep = NULL) fcomputev(.data, vars, FUN, \dots, apply = TRUE, keep = NULL) } \arguments{ \item{.data}{a data frame or named list of columns.} \item{\dots}{further arguments of the form \code{column = value}. The \code{value} can be a combination of other columns, a scalar value, or \code{NULL}, which deletes \code{column}. Alternatively it is also possible to place a single list here, which will be treated like a list of \code{column = value} arguments. For \code{ftransformv} and \code{fcomputev}, \code{\dots} can be used to pass further arguments to \code{FUN}. The ellipsis (\code{\dots}) is always evaluated within the data frame (\code{.data}) environment. See Examples. \code{fmutate} additionally supports \code{\link{across}} statements, and evaluates tagged vector expressions sequentially. With grouped execution, \code{dots} can also contain arbitrary expressions that result in a list of data-length columns. See Examples.} \item{vars}{variables to be transformed by applying \code{FUN} to them: select using names, indices, a logical vector or a selector function (e.g. \code{is.numeric}). Since v1.7 \code{vars} is evaluated within the \code{.data} environment, permitting expressions on columns e.g. \code{c(col1, col3:coln)}.} \item{FUN}{a single function yielding a result of length \code{NROW(.data)} or 1. See also \code{apply}.} \item{apply}{logical. \code{TRUE} (default) will apply \code{FUN} to each column selected in \code{vars}; \code{FALSE} will apply \code{FUN} to the subsetted data frame i.e. \code{FUN(get_vars(.data, vars), ...)}. The latter is useful for \emph{collapse} functions with data frame or grouped / panel data frame methods, yielding performance gains and enabling grouped transformations. See Examples.} \item{value}{a named list of replacements, it will be treated like an evaluated list of \code{column = value} arguments.} \item{keep}{select columns to preserve using column names, indices or a function (e.g. \code{is.numeric}). By default computed columns are added after the preserved ones, unless they are assigned the same name in which case the preserved columns will be replaced in order.} \item{.keep}{either one of \code{"all", "used", "unused"} or \code{"none"} (see \code{\link[dplyr]{mutate}}), or columns names/indices/function as \code{keep}. \emph{Note} that this does not work well with \code{across()} or other expressions supported since v1.9.0. The only sensible option you have there is to supply a character vector of all columns in the final dataset that you want to keep. } \item{.cols}{for expressions involving \code{.data}, \code{.cols} can be used to subset columns, e.g. \code{mtcars |> gby(cyl) |> mtt(broom::augment(lm(mpg ~., .data)), .cols = 1:7)}. Can pass column names, indices, a logical vector or a selector function (e.g. \code{is.numericr}).} } \details{ The \code{\dots} arguments to \code{ftransform} are tagged vector expressions, which are evaluated in the data frame \code{.data}. The tags are matched against \code{names(.data)}, and for those that match, the values replace the corresponding variable in \code{.data}, whereas the others are appended to \code{.data}. It is also possible to delete columns by assigning \code{NULL} to them, i.e. \code{ftransform(data, colk = NULL)} removes \code{colk} from the data. \emph{Note} that \code{names(.data)} and the names of the \code{...} arguments are checked for uniqueness beforehand, yielding an error if this is not the case. Since \emph{collapse} v1.3.0, is is also possible to pass a single named list to \code{\dots}, i.e. \code{ftransform(data, newdata)}. This list will be treated like a list of tagged vector expressions. \emph{Note} the different behavior: \code{ftransform(data, list(newcol = col1))} is the same as \code{ftransform(data, newcol = col1)}, whereas \code{ftransform(data, newcol = as.list(col1))} creates a list column. Something like \code{ftransform(data, as.list(col1))} gives an error because the list is not named. See Examples. % and \code{ftransform(data, as.list(col1))} gives an error because an unnamed list is passed. % , but \code{ftransform(data, setNames(as.list(col1), col1))} will work and add the values of \code{col1} as separate columns. % \code{ftransform(data, fmean(list(col1mean = col1, col2mean = col2), drop = FALSE))} etc. % For example \code{ftransformv(data, 1:3, log)} is the same as \code{ftransform(data, lapply(get_vars(data, 1:3), log))}, and \code{ftransformv(data, 1:3, log, apply = FALSE)} is the same as \code{ftransform(data, log(get_vars(data, 1:3)))}. The function \code{ftransformv} added in v1.3.2 provides a fast replacement for the functions \code{dplyr::mutate_at} and \code{dplyr::mutate_if} (without the grouping feature) facilitating mutations of groups of columns (\code{dplyr::mutate_all} is already accounted for by \code{\link{dapply}}). See Examples. The function \code{settransform} does all of that by reference, but uses base-R's copy-on modify semantics, which is equivalent to replacing the data with \code{<-} (thus it is still memory efficient but the data will have a different memory address afterwards). The function \code{fcompute(v)} works just like \code{ftransform(v)}, but returns only the changed / computed columns without modifying or appending the data in \code{.data}. See Examples. The function \code{fmutate} added in v1.7.0, provides functionality familiar from \emph{dplyr} 1.0.0 and higher. It evaluates tagged vector expressions sequentially and does operations by groups on a grouped frame (thus it is slower than \code{ftransform} if you have many tagged expressions or a grouped data frame). Note however that \emph{collapse} does not depend on \emph{rlang}, so things like lambda expressions are not available. \emph{Note also} that \code{fmutate} operates differently on grouped data whether you use \code{.FAST_FUN} or base R functions / functions from other packages. With \code{.FAST_FUN} (including \code{.OPERATOR_FUN}, excluding \code{fhdbetween} / \code{fhdwithin} / \code{HDW} / \code{HDB}), \code{fmutate} performs an efficient vectorized execution, i.e. the grouping object from the grouped data frame is passed to the \code{g} argument of these functions, and for \code{.FAST_STAT_FUN} also \code{TRA = "replace_fill"} is set (if not overwritten by the user), yielding internal grouped computation by these functions without the need for splitting the data by groups. For base R and other functions, \code{fmutate} performs classical split-apply combine computing i.e. the relevant columns of the data are selected and split into groups, the expression is evaluated for each group, and the result is recombined and suitably expanded to match the original data frame. \bold{Note} that it is not possible to mix vectorized and standard execution in the same expression!! Vectorized execution is performed if \bold{any} \code{.FAST_FUN} or \code{.OPERATOR_FUN} is part of the expression, thus a code like \code{mtcars |> gby(cyl) |> fmutate(new = fmin(mpg) / min(mpg))} will be expanded to something like \code{mtcars |> gby(cyl) |> ftransform(new = fmin(mpg, g = GRP(.), TRA = "replace_fill") / min(mpg))} and then executed, i.e. \code{fmin(mpg)} will be executed in a vectorized way, and \code{min(mpg)} will not be executed by groups at all. } \note{ \code{ftransform} ignores grouped data. This is on purpose as it allows non-grouped transformation inside a pipeline on grouped data, and affords greater flexibility and performance in programming with the \code{.FAST_FUN}. In particular, you can run a nested pipeline inside \code{ftransform}, and decide which expressions should be grouped, and you can use the ad-hoc grouping functionality of the \code{.FAST_FUN}, allowing operations where different groupings are applied simultaneously in an expression. See Examples or the answer provided \href{https://stackoverflow.com/questions/67349744/using-ftransform-along-with-fgroup-by-from-collapse-r-package}{here}. \code{fmutate} on the other hand supports grouped operations just like \code{dplyr::mutate}, but works in two different ways depending on whether you use \code{.FAST_FUN} in an expression or other functions. See the Examples. } \value{ The modified data frame \code{.data}, or, for \code{fcompute}, a new data frame with the columns computed on \code{.data}. All attributes of \code{.data} are preserved. } \seealso{ \code{\link{across}}, \code{\link{fsummarise}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## fmutate() examples --------------------------------------------------------------- # Please note that expressions are vectorized whenever they contain 'ANY' fast function mtcars |> fgroup_by(cyl, vs, am) |> fmutate(mean_mpg = fmean(mpg), # Vectorized mean_mpg_base = mean(mpg), # Non-vectorized mpg_cumpr = fcumsum(mpg) / fsum(mpg), # Vectorized mpg_cumpr_base = cumsum(mpg) / sum(mpg), # Non-vectorized mpg_cumpr_mixed = fcumsum(mpg) / sum(mpg)) # Vectorized: division by overall sum # Using across: here fmean() gets vectorized across both groups and columns (requiring a single # call to fmean.data.frame which goes to C), whereas weighted.mean needs to be called many times. mtcars |> fgroup_by(cyl, vs, am) |> fmutate(across(disp:qsec, list(mu = fmean, mu2 = weighted.mean), w = wt, .names = "flip")) # Can do more complex things... mtcars |> fgroup_by(cyl) |> fmutate(res = resid(lm(mpg ~ carb + hp, weights = wt))) # Since v1.9.0: supports arbitrary expressions returning suitable lists \dontrun{ % broom is not suggested mtcars |> fgroup_by(cyl) |> fmutate(broom::augment(lm(mpg ~ carb + hp, weights = wt))) # Same thing using across() (supported before 1.9.0) modelfun <- function(data) broom::augment(lm(mpg ~ carb + hp, data, weights = wt)) mtcars |> fgroup_by(cyl) |> fmutate(across(c(mpg, carb, hp, wt), modelfun, .apply = FALSE)) } ## ftransform() / fcompute() examples: ---------------------------------------------- ## ftransform modifies and returns a data.frame head(ftransform(airquality, Ozone = -Ozone)) head(ftransform(airquality, new = -Ozone, Temp = (Temp-32)/1.8)) head(ftransform(airquality, new = -Ozone, new2 = 1, Temp = NULL)) # Deleting Temp head(ftransform(airquality, Ozone = NULL, Temp = NULL)) # Deleting columns # With collapse's grouped and weighted functions, complex operations are done on the fly head(ftransform(airquality, # Grouped operations by month: Ozone_Month_median = fmedian(Ozone, Month, TRA = "fill"), Ozone_Month_sd = fsd(Ozone, Month, TRA = "replace"), Ozone_Month_centered = fwithin(Ozone, Month))) # Grouping by month and above/below average temperature in each month head(ftransform(airquality, Ozone_Month_high_median = fmedian(Ozone, list(Month, Temp > fbetween(Temp, Month)), TRA = "fill"))) ## ftransformv can be used to modify multiple columns using a function head(ftransformv(airquality, 1:3, log)) head(`[<-`(airquality, 1:3, value = lapply(airquality[1:3], log))) # Same thing in base R head(ftransformv(airquality, 1:3, log, apply = FALSE)) head(`[<-`(airquality, 1:3, value = log(airquality[1:3]))) # Same thing in base R # Using apply = FALSE yields meaningful performance gains with collapse functions # This calls fwithin.default, and repeates the grouping by month 3 times: head(ftransformv(airquality, 1:3, fwithin, Month)) # This calls fwithin.data.frame, and only groups one time -> 5x faster! head(ftransformv(airquality, 1:3, fwithin, Month, apply = FALSE)) # This also works for grouped and panel data frames (calling fwithin.grouped_df) airquality |> fgroup_by(Month) |> ftransformv(1:3, fwithin, apply = FALSE) |> head() # But this gives the WRONG result (calling fwithin.default). Need option apply = FALSE!! airquality |> fgroup_by(Month) |> ftransformv(1:3, fwithin) |> head() # For grouped modification of single columns in a grouped dataset, we can use GRP(): library(magrittr) airquality |> fgroup_by(Month) \%>\% ftransform(W_Ozone = fwithin(Ozone, GRP(.)), # Grouped centering sd_Ozone_m = fsd(Ozone, GRP(.), TRA = "replace"), # In-Month standard deviation sd_Ozone = fsd(Ozone, TRA = "replace"), # Overall standard deviation sd_Ozone2 = fsd(Ozone, TRA = "fill"), # Same, overwriting NA's sd_Ozone3 = fsd(Ozone)) |> head() # Same thing (calling alloc()) ## For more complex mutations we can use ftransform with compound pipes airquality |> fgroup_by(Month) \%>\% ftransform(get_vars(., 1:3) |> fwithin() |> flag(0:2)) |> head() airquality \%>\% ftransform(STD(., cols = 1:3) |> replace_na(0)) |> head() # The list argument feature also allows flexible operations creating multiple new columns airquality |> # The variance of Wind and Ozone, by month, weighted by temperature: ftransform(fvar(list(Wind_var = Wind, Ozone_var = Ozone), Month, Temp, "replace")) |> head() # Same as above using a grouped data frame (a bit more complex) airquality |> fgroup_by(Month) \%>\% ftransform(fselect(., Wind, Ozone) |> fvar(Temp, "replace") |> add_stub("_var", FALSE)) |> fungroup() |> head() # This performs 2 different multi-column grouped operations (need c() to make it one list) ftransform(airquality, c(fmedian(list(Wind_Day_median = Wind, Ozone_Day_median = Ozone), Day, TRA = "replace"), fsd(list(Wind_Month_sd = Wind, Ozone_Month_sd = Ozone), Month, TRA = "replace"))) |> head() ## settransform(v) works like ftransform(v) but modifies a data frame in the global environment.. settransform(airquality, Ratio = Ozone / Temp, Ozone = NULL, Temp = NULL) head(airquality) rm(airquality) # Grouped and weighted centering settransformv(airquality, 1:3, fwithin, Month, Temp, apply = FALSE) head(airquality) rm(airquality) # Suitably lagged first-differences settransform(airquality, get_vars(airquality, 1:3) |> fdiff() |> flag(0:2)) head(airquality) rm(airquality) # Same as above using magrittr::`\%<>\%` airquality \%<>\% ftransform(get_vars(., 1:3) |> fdiff() |> flag(0:2)) head(airquality) rm(airquality) # It is also possible to achieve the same thing via a replacement method (if needed) ftransform(airquality) <- get_vars(airquality, 1:3) |> fdiff() |> flag(0:2) head(airquality) rm(airquality) ## fcompute only returns the modified / computed columns head(fcompute(airquality, Ozone = -Ozone)) head(fcompute(airquality, new = -Ozone, Temp = (Temp-32)/1.8)) head(fcompute(airquality, new = -Ozone, new2 = 1)) # Can preserve existing columns, computed ones are added to the right if names are different head(fcompute(airquality, new = -Ozone, new2 = 1, keep = 1:3)) # If given same name as preserved columns, preserved columns are replaced in order... head(fcompute(airquality, Ozone = -Ozone, new = 1, keep = 1:3)) # Same holds for fcomputev head(fcomputev(iris, is.numeric, log)) # Same as: iris |> get_vars(is.numeric) |> dapply(log) |> head() head(fcomputev(iris, is.numeric, log, keep = "Species")) # Adds in front head(fcomputev(iris, is.numeric, log, keep = names(iris))) # Preserve order # Keep a subset of the data, add standardized columns head(fcomputev(iris, 3:4, STD, apply = FALSE, keep = names(iris)[3:5])) } \keyword{manip} collapse/man/data-transformations.Rd0000644000176200001440000001610114777170130017264 0ustar liggesusers\name{data-transformations} \alias{A6-data-transformations} \alias{data-transformations} \alias{.OPERATOR_FUN} \title{Data Transformations} % \emph{collapse} \description{ \emph{collapse} provides an ensemble of functions to perform common data transformations efficiently and user friendly: \itemize{ \item \code{\link{dapply}} \bold{applies functions to rows or columns} of matrices and data frames, preserving the data format. \item \code{\link{BY}} is an S3 generic for efficient \bold{Split-Apply-Combine computing}, similar to \code{\link{dapply}}. \item A set of arithmetic operators facilitates \bold{row-wise} \code{\link{\%rr\%}}, \code{\link{\%r+\%}}, \code{\link{\%r-\%}}, \code{\link{\%r*\%}}, \code{\link{\%r/\%}} and \bold{column-wise} \code{\link{\%cr\%}}, \code{\link{\%c+\%}}, \code{\link{\%c-\%}}, \code{\link{\%c*\%}}, \code{\link{\%c/\%}} \bold{replacing and sweeping operations} involving a vector and a matrix or data frame / list. Since v1.7, the operators \code{\link{\%+=\%}}, \code{\link{\%-=\%}}, \code{\link{\%*=\%}} and \code{\link{\%/=\%}} do column- and element- wise math by reference, and the function \code{\link{setop}} can also perform sweeping out rows by reference. \item \code{\link[=TRA]{(set)TRA}} is a more advanced S3 generic to efficiently perform \bold{(groupwise) replacing and sweeping out of statistics}, either by creating a copy of the data or by reference. %The basic syntax is \code{TRA(x, xag, g)} where \code{x} is data to be transformed, \code{xag} is some set of aggregate statistics to tranform \code{x} and \code{g} is an optional grouping vector for grouped transformations. Supported operations are: \tabular{lllll}{\emph{ Integer-id } \tab\tab \emph{ String-id } \tab\tab \emph{ Description } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} % \Sexpr{"\u200B"} \Sexpr{"\u200B"} 0 \tab\tab "na" or "replace_na" \tab\tab replace only missing values \cr 1 \tab\tab "fill" or "replace_fill" \tab\tab replace everything \cr 2 \tab\tab "replace" \tab\tab replace data but preserve missing values \cr 3 \tab\tab "-" \tab\tab subtract \cr 4 \tab\tab "-+" \tab\tab subtract group-statistics but add group-frequency weighted average of group statistics \cr 5 \tab\tab "/" \tab\tab divide \cr 6 \tab\tab "\%" \tab\tab compute percentages \cr 7 \tab\tab "+" \tab\tab add \cr 8 \tab\tab "*" \tab\tab multiply \cr 9 \tab\tab "\%\%" \tab\tab modulus \cr 10 \tab\tab "-\%\%" \tab\tab subtract modulus } All of \emph{collapse}'s \link[=fast-statistical-functions]{Fast Statistical Functions} have a built-in \code{TRA} argument for faster access (i.e. you can compute (groupwise) statistics and use them to transform your data with a single function call). \item \code{\link[=fscale]{fscale/STD}} is an S3 generic to perform (groupwise and / or weighted) \bold{scaling / standardizing} of data and is orders of magnitude faster than \code{\link{scale}}. \item \code{\link[=fwithin]{fwithin/W}} is an S3 generic to efficiently perform (groupwise and / or weighted) \bold{within-transformations / demeaning / centering} of data. Similarly \code{\link[=fbetween]{fbetween/B}} computes (groupwise and / or weighted) \bold{between-transformations / averages} (also a lot faster than \code{\link{ave}}). \item \code{\link[=HDW]{fhdwithin/HDW}}, shorthand for 'higher-dimensional within transform', is an S3 generic to efficiently \bold{center data on multiple groups and partial-out linear models} (possibly involving many levels of fixed effects and interactions). In other words, \code{\link[=HDW]{fhdwithin/HDW}} efficiently computes \bold{residuals} from linear models. Similarly \code{\link[=HDB]{fhdbetween/HDB}}, shorthand for 'higher-dimensional between transformation', computes the corresponding means or \bold{fitted values}. %\item \code{flm} is an efficient function for bare-bones (weighted) \bold{linear model fitting}. It supports 6 different fitting methods, 4 from base R, and 2 utilizing the \emph{RcppArmadillo} or \emph{RcppEigen} packages. \item \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}} and \code{\link[=fgrowth]{fgrowth/G}} are S3 generics to compute sequences of \bold{lags / leads} and suitably lagged and iterated (quasi-, log-) \bold{differences} and \bold{growth rates} on time series and panel data. \code{\link{fcumsum}} flexibly computes (grouped, ordered) cumulative sums. More in \link[=time-series-panel-series]{Time Series and Panel Series}. \item \code{STD, W, B, HDW, HDB, L, D, Dlog} and \code{G} are parsimonious wrappers around the \code{f-} functions above representing the corresponding transformation 'operators'. They have additional capabilities when applied to data-frames (i.e. variable selection, formula input, auto-renaming and id-variable preservation), and are easier to employ in regression formulas, but are otherwise identical in functionality. } } \section{Table of Functions}{ \tabular{lllll}{\emph{ Function / S3 Generic } \tab\tab \emph{ Methods } \tab\tab \emph{ Description } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \code{\link{dapply}} \tab\tab No methods, works with matrices and data frames \tab\tab Apply functions to rows or columns \cr \code{\link{BY}} \tab\tab \code{default, matrix, data.frame, grouped_df} \tab\tab Split-Apply-Combine computing \cr \code{\link[=arithmetic]{\%(r/c)(r/+/-/*//)\%}} \tab\tab No methods, works with matrices and data frames / lists \tab\tab Row- and column-arithmetic \cr \code{\link[=TRA]{(set)TRA}} \tab\tab \code{default, matrix, data.frame, grouped_df} \tab\tab Replace and sweep out statistics (by reference) \cr \code{\link[=fscale]{fscale/STD}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Scale / standardize data \cr \code{\link[=fwithin]{fwithin/W}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Demean / center data \cr \code{\link[=fbetween]{fbetween/B}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Compute means / average data \cr \code{\link[=HDW]{fhdwithin/HDW}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab High-dimensional centering and lm residuals \cr \code{\link[=HDB]{fhdbetween/HDB}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame} \tab\tab High-dimensional averages and lm fitted values \cr % \code{\link{flm}} \tab\tab No methods, for matrices \tab\tab Linear model fitting \cr \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}}, \code{\link[=fdiff]{fgrowth/G}}, \code{\link{fcumsum}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab (Sequences of) lags / leads, differences, growth rates and cumulative sums } } \seealso{ \link[=collapse-documentation]{Collapse Overview}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=time-series-panel-series]{Time Series and Panel Series} } \keyword{manip} \keyword{documentation} collapse/man/t_list.Rd0000644000176200001440000000260414777170130014425 0ustar liggesusers\name{t_list} \alias{t_list} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Efficient List Transpose } \description{ \code{t_list} turns a list of lists inside-out. The performance is quite efficient regardless of the size of the list. } \usage{ t_list(l) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list of lists. Elements inside the sublists can be heterogeneous, including further lists. } } \value{ \code{l} transposed such that the second layer of the list becomes the top layer and the top layer the second layer. See Examples. } \note{ To transpose a data frame / list of atomic vectors see \code{data.table::transpose()}. } \seealso{ \code{\link{rsplit}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Homogenous list of lists l <- list(a = list(c = 1, d = 2), b = list(c = 3, d = 4)) str(l) str(t_list(l)) # Heterogenous case l2 <- list(a = list(c = 1, d = letters), b = list(c = 3:10, d = list(4, e = 5))) attr(l2, "bla") <- "abc" # Attributes other than names are preserved str(l2) str(t_list(l2)) rm(l, l2) } \keyword{list} \keyword{manip} \keyword{utilities} % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fvar_fsd.Rd0000644000176200001440000002074315076527614014733 0ustar liggesusers\name{fvar-fsd} \alias{fvar} \alias{fvar.default} \alias{fvar.matrix} \alias{fvar.data.frame} \alias{fvar.grouped_df} \alias{fsd} \alias{fsd.default} \alias{fsd.matrix} \alias{fsd.data.frame} \alias{fsd.grouped_df} \title{Fast (Grouped, Weighted) Variance and Standard Deviation for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns \description{ \code{fvar} and \code{fsd} are generic functions that compute the (column-wise) variance and standard deviation of \code{x}, (optionally) grouped by \code{g} and/or frequency-weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) variance/sd. } \usage{ fvar(x, \dots) fsd(x, \dots) \method{fvar}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{fsd}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{fvar}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{fsd}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{fvar}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{fsd}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{fvar}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], stable.algo = .op[["stable.algo"]], \dots) \method{fsd}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], stable.algo = .op[["stable.algo"]], \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain summed weighting variable after computation (if contained in \code{grouped_df}).} \item{stub}{character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the summed weights column is prefixed by \code{"sum."}. Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.} \item{stable.algo}{logical. \code{TRUE} (default) use Welford's numerically stable online algorithm. \code{FALSE} implements a faster but numerically unstable one-pass method. See Details. } \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ \emph{Welford's online algorithm} used by default to compute the variance is well described \href{https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance}{here} (the section \emph{Weighted incremental algorithm} also shows how the weighted variance is obtained by this algorithm). If \code{stable.algo = FALSE}, the variance is computed in one-pass as \code{(sum(x^2)-n*mean(x)^2)/(n-1)}, where \code{sum(x^2)} is the sum of squares from which the expected sum of squares \code{n*mean(x)^2} is subtracted, normalized by \code{n-1} (Bessel's correction). This is numerically unstable if \code{sum(x^2)} and \code{n*mean(x)^2} are large numbers very close together, which will be the case for large \code{n}, large \code{x}-values and small variances (catastrophic cancellation occurs, leading to a loss of numeric precision). Numeric precision is however still maximized through the internal use of long doubles in C++, and the fast algorithm can be up to 4-times faster compared to Welford's method. The weighted variance is computed with frequency weights as \code{(sum(x^2*w)-sum(w)*weighted.mean(x,w)^2)/(sum(w)-1)}. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. %Missing-value removal as controlled by the \code{na.rm} argument is done very efficiently by simply skipping the values (thus setting \code{na.rm = FALSE} on data with no missing values doesn't give extra speed). Large performance gains can nevertheless be achieved in the presence of missing values if \code{na.rm = FALSE}, since then the corresponding computation is terminated once a \code{NA} is encountered and \code{NA} is returned. %This all seamlessly generalizes to grouped computations, which are performed in a single pass (without splitting the data) and therefore extremely fast. %When applied to data frames with groups or \code{drop = FALSE}, \code{fvar/fsd} preserves all column attributes (such as variable labels) but does not distinguish between classed and unclassed object (thus applying \code{fvar/fsd} to a factor column will give a 'malformed factor' error). The attributes of the data frame itself are also preserved. For further computational detail see \code{\link{fsum}}. } \value{ \code{fvar} returns the (\code{w} weighted) variance of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped, weighted) variance. \code{fsd} computes the standard deviation of \code{x} in like manor. } \references{ Welford, B. P. (1962). Note on a method for calculating corrected sums of squares and products. \emph{Technometrics}. 4 (3): 419-420. doi:10.2307/1266577. } \seealso{ \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method fvar(mtcars$mpg) # Simple variance (all examples also hold for fvar!) fsd(mtcars$mpg) # Simple standard deviation fsd(mtcars$mpg, w = mtcars$hp) # Weighted sd: Weighted by hp fsd(mtcars$mpg, TRA = "/") # Simple transformation: scaling (See also ?fscale) fsd(mtcars$mpg, mtcars$cyl) # Grouped sd fsd(mtcars$mpg, mtcars$cyl, mtcars$hp) # Grouped weighted sd fsd(mtcars$mpg, mtcars$cyl, TRA = "/") # Scaling by group fsd(mtcars$mpg, mtcars$cyl, mtcars$hp, "/") # Group-scaling using weighted group sds ## data.frame method fsd(iris) # This works, although 'Species' is a factor variable fsd(mtcars, drop = FALSE) # This works, all columns are numeric variables fsd(iris[-5], iris[5]) # By Species: iris[5] is still a list, and thus passed to GRP() fsd(iris[-5], iris[[5]]) # Same thing much faster: fsd recognizes 'Species' is a factor head(fsd(iris[-5], iris[[5]], TRA = "/")) # Data scaled by species (see also fscale) ## matrix method m <- qM(mtcars) fsd(m) fsd(m, mtcars$cyl) # etc.. ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fsd() mtcars |> fgroup_by(cyl,vs,am) |> fsd(keep.group_vars = FALSE) # Remove grouping columns mtcars |> fgroup_by(cyl,vs,am) |> fsd(hp) # Weighted by hp mtcars |> fgroup_by(cyl,vs,am) |> fsd(hp, "/") # Weighted scaling transformation } \keyword{univar} \keyword{manip} collapse/man/collap.Rd0000644000176200001440000004662215202504365014404 0ustar liggesusers\name{collap} \alias{advanced-aggregation} \alias{A5-advanced-aggregation} \alias{collap} \alias{collapv} \alias{collapg} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Advanced Data Aggregation } \description{ \code{collap} is a fast and versatile multi-purpose data aggregation command. It performs simple and weighted aggregations, multi-type aggregations automatically applying different functions to numeric and categorical columns, multi-function aggregations applying multiple functions to each column, and fully custom aggregations where the user passes a list mapping functions to columns. % \code{collap} works with \emph{collapse}'s \link[=fast-statistical-functions]{Fast Statistical Functions}, providing extremely fast conventional and weighted aggregation. It also works with other functions but this does not deliver high speeds on large data and does not support weighted aggregations. % \code{collap} supports formula and data (i.e. grouping vectors or lists of vectors) input to \code{by}, whereas \code{collapv} allows names and indices of grouping columns to be passed to \code{by}. } \usage{ # Main function: allows formula and data input to `by` and `w` arguments collap(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, \dots, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", drop = TRUE, parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto") # Programmer function: allows column names and indices input to `by` and `w` arguments collapv(X, by, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, \dots, keep.by = TRUE, keep.w = TRUE, keep.col.order = TRUE, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.order = sort, method = "auto", drop = TRUE, parallel = FALSE, mc.cores = 2L, return = c("wide","list","long","long_dupl"), give.names = "auto") # Auxiliary function: for grouped data ('grouped_df') input + non-standard evaluation collapg(X, FUN = fmean, catFUN = fmode, cols = NULL, w = NULL, wFUN = fsum, custom = NULL, keep.group_vars = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a data frame, or an object coercible to data frame using \code{\link{qDF}}.} \item{by}{for \code{collap}: a one-or two sided formula, i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}, or a atomic vector, list of vectors or \code{\link{GRP}} object used to group \code{X}. For \code{collapv}: names or indices of grouping columns, or a logical vector or selector function such as \code{\link{is_categorical}} selecting grouping columns.} \item{FUN}{a function, list of functions (i.e. \code{list(fsum, fmean, fsd)} or \code{list(sd = fsd, myfun1 = function(x)..)}), or a character vector of function names, which are automatically applied only to numeric variables.} \item{catFUN}{same as \code{FUN}, but applied only to categorical (non-numeric) typed columns (\code{\link{is_categorical}}).} \item{cols}{select columns to aggregate using a function, column names, indices or logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{w}{weights. Can be passed as numeric vector or alternatively as formula i.e. \code{~ weightvar} in \code{collap} or column name / index etc. i.e. \code{"weightvar"} in \code{collapv}. \code{collapg} supports non-standard evaluations so \code{weightvar} can be indicated without quotes. } \item{wFUN}{same as \code{FUN}: Function(s) to aggregate weight variable if \code{keep.w = TRUE}. By default the sum of the weights is computed in each group.} \item{custom}{a named list specifying a fully customized aggregation task. The names of the list are function names and the content columns to aggregate using this function (same input as \code{cols}). For example \code{custom = list(fmean = 1:6, fsd = 7:9, fmode = 10:11)} tells \code{collap} to aggregate columns 1-6 of \code{X} using the mean, columns 7-9 using the standard deviation etc. \emph{Notes}: \code{custom} lets \code{collap} ignore any inputs passed to \code{FUN}, \code{catFUN} or \code{cols}. Since v1.6.0 you can also rename columns e.g. \code{custom = list(fmean = c(newname = "col1", "col2"), fmode = c(newname = 3))}.} \item{keep.by, keep.group_vars}{logical. \code{FALSE} will omit grouping variables from the output. \code{TRUE} keeps the variables, even if passed externally in a list or vector (unlike other \emph{collapse} functions).} \item{keep.w}{logical. \code{FALSE} will omit weight variable from the output i.e. no aggregation of the weights. \code{TRUE} aggregates and adds weights, even if passed externally as a vector (unlike other \emph{collapse} functions).} \item{keep.col.order}{logical. Retain original column order post-aggregation.} \item{sort, decreasing, na.last, return.order, method}{logical / character. Arguments passed to \code{\link{GRP.default}} and affecting the row-order in the aggregated data frame and the grouping algorithm.} \item{drop}{logical. \code{FALSE} retains zero-count rows for unobserved combinations of factor levels among the grouping columns (analogous to \code{dplyr::group_by(.drop = FALSE)}). The corresponding rows in the aggregated output will contain values produced by the aggregation functions for empty groups (e.g. \code{NA} for \code{fmean}, \code{0} for \code{fsum}). See \code{\link{GRP}} (\code{drop} argument of \code{GRP.default}).} \item{parallel}{logical. Use \code{\link{mclapply}} instead of \code{lapply} to parallelize the computation at the column level. Not available for Windows.} \item{mc.cores}{integer. Argument to \code{\link{mclapply}} setting the number of cores to use, default is 2.} \item{return}{character. Control the output format when aggregating with multiple functions or performing custom aggregation. "wide" (default) returns a wider data frame with added columns for each additional function. "list" returns a list of data frames - one for each function. "long" adds a column "Function" and row-binds the results from different functions using \code{data.table::rbindlist}. "long_dupl" is a special option for aggregating multi-type data using multiple \code{FUN} but only one \code{catFUN} or vice-versa. In that case the format is long and data aggregated using only one function is duplicated. See Examples.} \item{give.names}{logical. Create unique names of aggregated columns by adding a prefix 'FUN.var'. \code{'auto'} will automatically create such prefixes whenever multiple functions are applied to a column. % By default \code{"."} is used as a separator between 'FUN' and 'var'. It is also possible to choose a different separator by specifying \code{give.names = "_"}, for example. } \item{\dots}{additional arguments passed to all functions supplied to \code{FUN}, \code{catFUN}, \code{wFUN} or \code{custom}. Since v1.9.0 these are also split by groups for non-\link[=fast-statistical-functions]{Fast Statistical Functions}. The behavior of \link[=fast-statistical-functions]{Fast Statistical Functions} with unused arguments is regulated by \code{option("collapse_unused_arg_action")} and defaults to \code{"warning"}. \code{collapg} also allows other arguments to \code{collap} except for \code{sort, decreasing, na.last, return.order, method} and \code{keep.by}.} } \details{ \code{collap} automatically checks each function passed to it whether it is a \link[=fast-statistical-functions]{Fast Statistical Function} (i.e. whether the function name is contained in \code{.FAST_STAT_FUN}). If the function is a fast statistical function, \code{collap} only does the grouping and then calls the function to carry out the grouped computations (vectorized in C/C++), resulting in high aggregation speeds, even with weights. If the function is not one of \code{.FAST_STAT_FUN}, \code{\link{BY}} is called internally to perform the computation. The resulting computations from each function are put into a list and recombined to produce the desired output format as controlled by the \code{return} argument. This is substantially slower, particularly with many groups. When setting \code{parallel = TRUE} on a non-windows computer, aggregations will efficiently be parallelized at the column level using \code{\link{mclapply}} utilizing \code{mc.cores} cores. Some \link[=fast-statistical-functions]{Fast Statistical Function} support multithreading i.e. have an \code{nthreads} argument that can be passed to \code{collap}. Using C-level multithreading is much more effective than R-level parallelism, and also works on Windows, but the two should never be combined. When the \code{w} argument is used, the weights are passed to all functions except for \code{wFUN}. This may be undesirable in settings like \code{collap(data, ~ id, custom = list(fsum = ..., fmean = ...), w = ~ weights)} where we wish to aggregate some columns using the weighted mean, and others using a simple sum or another unweighted statistic. %Since many \link[=fast-statistical-functions]{Fast Statistical Functions} including \code{\link{fsum}} support weights, the above computes a weighted mean and a weighted sum. A couple of workarounds were outlined \href{https://github.com/fastverse/collapse/issues/96}{here}, but \emph{collapse} 1.5.0 incorporates an easy solution into \code{collap}: Therefore it is possible to append \link[=fast-statistical-functions]{Fast Statistical Functions} by \code{_uw} to yield an unweighted computation. So for the above example one can specify: \code{collap(data, ~ id, custom = list(fsum_uw = ..., fmean = ...), w = ~ weights)} to get the weighted mean and the simple sum. \emph{Note} that the \code{_uw} functions are not available for use outside collap. Thus one also needs to quote them when passing to the \code{FUN} or \code{catFUN} arguments, e.g. use \code{collap(data, ~ id, fmean, "fmode_uw", w = ~ weights)}. %\emph{Note} also that it is never necessary for functions passed to \code{wFUN} to be appended like this, as the weights are never used to aggregate themselves. } \value{ \code{X} aggregated. If \code{X} is not a data frame it is coerced to one using \code{\link{qDF}} and then aggregated. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ %} %\note{ % (1) Additional arguments passed are not split by groups. Weighted aggregations with user defined functions should be done with \code{\link{BY}}, \code{\link{fsummarise}}, or using the \emph{data.table} package. % (2) Move above... % (3) The dispatch between using optimized \link[=fast-statistical-functions]{Fast Statistical Functions} performing grouped computations internally or calling \code{BY} to perform split-apply-combine computing is done by matching the function name against \code{.FAST_STAT_FUN}. Thus code like \code{collapse::collap(data, ~ id, collapse::fmedian)} does not yield an optimized computation, as \code{"collapse::fmedian" \%!in\% .FAST_STAT_FUN}. It is sufficient to write \code{collapse::collap(data, ~ id, "fmedian")} to get the desired result when the \emph{collapse} namespace is not attached. %If you want to perform optimized computations with \code{collap} without loading the pacckage, load the functions beforehand as well, e.g. \code{fmedian <- collapse::fmedian; data, ~ id, fmedian)}. Alternatively it is of course also possible to use \code{collapse::fmedian(collapse::fgroup_by(data, id))}, or something similar... % \code{collap} by default (\code{keep.by = TRUE, keep.w = TRUE}) preserves all arguments passed to the \code{by} or \code{w} arguments, whether passed in a formula or externally. The names of externally passed vectors and lists are intelligently extracted. So it is possible to write \code{collap(iris, iris$Species)}, and obtain an aggregated data frame with two \code{Species} columns, whereas \code{collap(iris, ~ Species)} only has one \code{Species} column. Similarly for weight vectors passed to \code{w}. In this regard \code{collap} is more sophisticated than other \emph{collapse} functions where preservation of grouping and weight variables is restricted to formula use. For example \code{STD(iris, iris$Species)} does not preserve \code{Species} in the output, whereas \code{STD(iris, ~ Species)} does. This \code{collap} feature is there simply for convenience, for example sometimes a survey is disaggregated into several datasets, and this now allows easy pulling of identifiers or weights from other datasets for aggregations. If all information is available in one dataset, just using formulas is highly recommended. %} %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{fsummarise}}, \code{\link{BY}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## A Simple Introduction -------------------------------------- head(iris) collap(iris, ~ Species) # Default: FUN = fmean for numeric collapv(iris, 5) # Same using collapv collap(iris, ~ Species, fmedian) # Using the median collap(iris, ~ Species, fmedian, keep.col.order = FALSE) # Groups in-front collap(iris, Sepal.Width + Petal.Width ~ Species, fmedian) # Only '.Width' columns collapv(iris, 5, cols = c(2, 4)) # Same using collapv collap(iris, ~ Species, list(fmean, fmedian)) # Two functions collap(iris, ~ Species, list(fmean, fmedian), return = "long") # Long format collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4)) # Custom aggregation collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4), # Raw output, no column reordering return = "list") collapv(iris, 5, custom = list(fmean = 1:2, fmedian = 3:4), # A strange choice.. return = "long") collap(iris, ~ Species, w = ~ Sepal.Length) # Using Sepal.Length as weights, .. weights <- abs(rnorm(fnrow(iris))) collap(iris, ~ Species, w = weights) # Some random weights.. collap(iris, iris$Species, w = weights) # Note this behavior.. collap(iris, iris$Species, w = weights, keep.by = FALSE, keep.w = FALSE) % \donttest{iris |> fgroup_by(Species) |> collapg()} # dplyr style, but faster ## Multi-Type Aggregation -------------------------------------- head(wlddev) # World Development Panel Data head(collap(wlddev, ~ country + decade)) # Aggregate by country and decade head(collap(wlddev, ~ country + decade, fmedian, ffirst)) # Different functions head(collap(wlddev, ~ country + decade, cols = is.numeric)) # Aggregate only numeric columns head(collap(wlddev, ~ country + decade, cols = 9:13)) # Only the 5 series head(collap(wlddev, PCGDP + LIFEEX ~ country + decade)) # Only GDP and life-expactancy head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, fsum)) # Using the sum instead head(collap(wlddev, PCGDP + LIFEEX ~ country + decade, sum, # Same using base::sum -> slower! na.rm = TRUE)) head(collap(wlddev, wlddev[c("country","decade")], fsum, # Same, exploring different inputs cols = 9:10)) head(collap(wlddev[9:10], wlddev[c("country","decade")], fsum)) head(collapv(wlddev, c("country","decade"), fsum)) # ..names/indices with collapv head(collapv(wlddev, c(1,5), fsum)) g <- GRP(wlddev, ~ country + decade) # Precomputing the grouping head(collap(wlddev, g, keep.by = FALSE)) # This is slightly faster now # Aggregate categorical data using not the mode but the last element head(collap(wlddev, ~ country + decade, fmean, flast)) head(collap(wlddev, ~ country + decade, catFUN = flast, # Aggregate only categorical data cols = is_categorical)) ## Weighted Aggregation ---------------------------------------- # We aggregate to region level using population weights head(collap(wlddev, ~ region + year, w = ~ POP)) # Takes weighted mean for numeric.. # ..and weighted mode for categorical data. The weight vector is aggregated using fsum head(collap(wlddev, ~ region + year, w = ~ POP, # Aggregating weights using sum wFUN = list(sum = fsum, max = fmax))) # and max (corresponding to mode) ## Multi-Function Aggregation ---------------------------------- head(collap(wlddev, ~ country + decade, list(mean = fmean, N = fnobs), # Saving mean and Nobs cols = 9:13)) head(collap(wlddev, ~ country + decade, # Same using base R -> slower list(mean = mean, N = function(x, \dots) sum(!is.na(x))), cols = 9:13, na.rm = TRUE)) lapply(collap(wlddev, ~ country + decade, # List output format list(mean = fmean, N = fnobs), cols = 9:13, return = "list"), head) head(collap(wlddev, ~ country + decade, # Long output format list(mean = fmean, N = fnobs), cols = 9:13, return = "long")) head(collap(wlddev, ~ country + decade, # Also aggregating categorical data, list(mean = fmean, N = fnobs), return = "long_dupl")) # and duplicating it 2 times head(collap(wlddev, ~ country + decade, # Now also using 2 functions on list(mean = fmean, N = fnobs), list(mode = fmode, last = flast), # categorical data keep.col.order = FALSE)) head(collap(wlddev, ~ country + decade, # More functions, string input, c("fmean","fsum","fnobs","fsd","fvar"), # parallelized execution c("fmode","ffirst","flast","fndistinct"), # (choose more than 1 cores, parallel = TRUE, mc.cores = 1L, # depending on your machine) keep.col.order = FALSE)) ## Custom Aggregation ------------------------------------------ head(collap(wlddev, ~ country + decade, # Custom aggregation custom = list(fmean = 11:13, fsd = 9:10, fmode = 7:8))) head(collap(wlddev, ~ country + decade, # Using column names custom = list(fmean = "PCGDP", fsd = c("LIFEEX","GINI"), flast = "date"))) head(collap(wlddev, ~ country + decade, # Weighted parallelized custom custom = list(fmean = 9:12, fsd = 9:10, # aggregation fmode = 7:8), w = ~ POP, wFUN = list(fsum, fmax), parallel = TRUE, mc.cores = 1L)) head(collap(wlddev, ~ country + decade, # No column reordering custom = list(fmean = 9:12, fsd = 9:10, fmode = 7:8), w = ~ POP, wFUN = list(fsum, fmax), parallel = TRUE, mc.cores = 1L, keep.col.order = FALSE)) ## Piped Use -------------------------------------------------- iris |> fgroup_by(Species) |> collapg() wlddev |> fgroup_by(country, decade) |> collapg() |> head() wlddev |> fgroup_by(region, year) |> collapg(w = POP) |> head() wlddev |> fgroup_by(country, decade) |> collapg(fmedian, flast) |> head() wlddev |> fgroup_by(country, decade) |> collapg(custom = list(fmean = 9:12, fmode = 5:7, flast = 3)) |> head() } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/fndistinct.Rd0000644000176200001440000001214714777170130015277 0ustar liggesusers\name{fndistinct} \alias{fndistinct} \alias{fndistinct.default} \alias{fndistinct.matrix} \alias{fndistinct.data.frame} \alias{fndistinct.grouped_df} \title{Fast (Grouped) Distinct Value Count for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fndistinct} is a generic function that (column-wise) computes the number of distinct values in \code{x}, (optionally) grouped by \code{g}. It is significantly faster than \code{length(unique(x))}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) distinct value count. } \usage{ fndistinct(x, \dots) \method{fndistinct}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, nthreads = .op[["nthreads"]], \dots) \method{fndistinct}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], \dots) \method{fndistinct}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], \dots) \method{fndistinct}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, nthreads = .op[["nthreads"]], \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. \code{TRUE}: Skip missing values in \code{x} (faster computation). \code{FALSE}: Also consider 'NA' as one distinct value.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{nthreads}{integer. The number of threads to utilize. Parallelism is across groups for grouped computations and at the column-level otherwise. } \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ \code{fndistinct} implements a pretty fast C-level hashing algorithm inspired by the \emph{kit} package to find the number of distinct values. %\code{fndistinct} implements a fast algorithm to find the number of distinct values utilizing index- hashing implemented in the \code{Rcpp::sugar::IndexHash} class. If \code{na.rm = TRUE} (the default), missing values will be skipped yielding substantial performance gains in data with many missing values. If \code{na.rm = FALSE}, missing values will simply be treated as any other value and read into the hash-map. Thus with the former, a numeric vector \code{c(1.25,NaN,3.56,NA)} will have a distinct value count of 2, whereas the latter will return a distinct value count of 4. % Grouped computations are performed by mapping the data to a sparse-array and then hash-mapping each group. This is often not much slower than using a larger hash-map for the entire data when \code{g = NULL}. \code{fndistinct} preserves all attributes of non-classed vectors / columns, and only the 'label' attribute (if available) of classed vectors / columns (i.e. dates or factors). When applied to data frames and matrices, the row-names are adjusted as necessary. } \value{ Integer. The number of distinct values in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its distinct value count, grouped by \code{g}. } \seealso{ \code{\link{fnunique}}, \code{\link{fnobs}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method fndistinct(airquality$Solar.R) # Simple distinct value count fndistinct(airquality$Solar.R, airquality$Month) # Grouped distinct value count ## data.frame method fndistinct(airquality) fndistinct(airquality, airquality$Month) fndistinct(wlddev) # Works with data of all types! head(fndistinct(wlddev, wlddev$iso3c)) ## matrix method aqm <- qM(airquality) fndistinct(aqm) # Also works for character or logical matrices fndistinct(aqm, airquality$Month) ## method for grouped data frames - created with dplyr::group_by or fgroup_by airquality |> fgroup_by(Month) |> fndistinct() wlddev |> fgroup_by(country) |> fselect(PCGDP,LIFEEX,GINI,ODA) |> fndistinct() } \keyword{univar} \keyword{manip} collapse/man/pad.Rd0000644000176200001440000000764215202400476013673 0ustar liggesusers\name{pad} \alias{pad} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Pad Matrix-Like Objects with a Value } \description{ The \code{pad} function inserts elements / rows filled with \code{value} into a vector matrix or data frame \code{X} at positions given by \code{i}. It is particularly useful to expand objects returned by statistical procedures which remove missing values to the original data dimensions. } \usage{ pad(X, i, value = NA, method = c("auto", "xpos", "vpos")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ a vector, matrix, data frame or list of equal-length columns. } \item{i}{ either an integer (positive or negative) or logical vector giving positions / rows of \code{X} into which \code{value}'s should be inserted, or, alternatively, a positive integer vector with \code{length(i) == NROW(X)}, but with some gaps in the indices into which \code{value}'s can be inserted, or a logical vector with \code{sum(i) == NROW(X)} such that \code{value}'s can be inserted for \code{FALSE} values in the logical vector. See also \code{method} and Examples. } \item{value}{ a scalar value to be replicated and inserted into \code{X} at positions / rows given by \code{i}. Default is \code{NA}. } \item{method}{ an integer or string specifying the use of \code{i}. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "auto" \tab\tab automatic method selection: If \code{i} is positive integer and \code{length(i) == NROW(X)} or if \code{i} is logical and \code{sum(i) == NROW(X)}, choose method "xpos", else choose "vpos". \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr 1 \tab\tab "xpos" \tab\tab \code{i} is a vector of positive integers or a logical vector giving the positions of the the elements / rows of \code{X}. \code{values}'s are inserted where there are gaps / \code{FALSE} values in \code{i}. \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr 2 \tab\tab "vpos" \tab\tab \code{i} is a vector of positive / negative integers or a logical vector giving the positions at which \code{values}'s / rows should be inserted into \code{X}. } } } \value{ \code{X} with elements / rows filled with \code{value} inserted at positions given by \code{i}. } \seealso{ \code{\link{append}}, \link[=recode-replace]{Recode and Replace Values}, \link[=small-helpers]{Small (Helper) Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ v <- 1:3 pad(v, 1:2) # Automatic selection of method "vpos" pad(v, -(1:2)) # Same thing pad(v, c(TRUE, TRUE, FALSE, FALSE, FALSE)) # Same thing pad(v, c(1, 3:4)) # Automatic selection of method "xpos" pad(v, c(TRUE, FALSE, TRUE, TRUE, FALSE)) # Same thing head(pad(wlddev, 1:3)) # Insert 3 missing rows at the beginning of the data head(pad(wlddev, 2:4)) # ... at rows positions 2-4 # pad() is mostly useful for statistical models which only use the complete cases: mod <- lm(LIFEEX ~ PCGDP, wlddev) # Generating a residual column in the original data (automatic selection of method "vpos") settfm(wlddev, resid = pad(resid(mod), mod$na.action)) # Another way to do it: r <- resid(mod) i <- as.integer(names(r)) resid2 <- pad(r, i) # automatic selection of method "xpos" # here we need to add some elements as flast(i) < nrow(wlddev) resid2 <- c(resid2, rep(NA, nrow(wlddev)-length(resid2))) # See that these are identical: identical(unattrib(wlddev$resid), resid2) # Can also easily get a model matrix at the dimensions of the original data mm <- pad(model.matrix(mod), mod$na.action) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fsubset.Rd0000644000176200001440000001424214777170130014603 0ustar liggesusers\name{fsubset} \alias{fsubset} \alias{sbt} \alias{ss} \alias{fsubset.default} \alias{fsubset.matrix} \alias{fsubset.data.frame} \alias{fsubset.pseries} \alias{fsubset.pdata.frame} \title{Fast Subsetting Matrix-Like Objects} \description{ \code{fsubset} returns subsets of vectors, matrices or data frames which meet conditions. It is programmed very efficiently and uses C source code from the \emph{data.table} package. %Especially for data frames it is significantly (4-5 times) faster than \code{\link{subset}} or \code{dplyr::filter}. The methods also provide enhanced functionality compared to \code{\link{subset}}. The function \code{ss} provides an (internal generic) programmers alternative to \code{[} that does not drop dimensions and is significantly faster than \code{[} for data frames. } \usage{ fsubset(.x, \dots) sbt(.x, \dots) # Shorthand for fsubset \method{fsubset}{default}(.x, subset, \dots) \method{fsubset}{matrix}(.x, subset, \dots, drop = FALSE) \method{fsubset}{data.frame}(.x, subset, \dots) # Methods for indexed data / compatibility with plm: \method{fsubset}{pseries}(.x, subset, \dots, drop.index.levels = "id") \method{fsubset}{pdata.frame}(.x, subset, \dots, drop.index.levels = "id") # Fast subsetting (replaces `[` with drop = FALSE, programmers choice) ss(x, i, j, check = TRUE) } \arguments{ \item{.x}{object to be subsetted according to different methods.} \item{x}{a data frame / list, matrix or vector/array (only \code{i}).} \item{subset}{logical expression indicating elements or rows to keep: missing values are taken as \code{FALSE}. The default, matrix and pseries methods only support logical vectors or row-indices (or a character vector of rownames if the matrix has rownames). } \item{\dots}{For the matrix or data frame method: multiple comma-separated expressions indicating columns to select. Otherwise: further arguments to be passed to or from other methods.} \item{drop}{passed on to \code{[} indexing operator. Only available for the matrix method.} \item{i}{positive or negative row-indices or a logical vector to subset the rows of \code{x}.} \item{j}{a vector of column names, positive or negative indices or a suitable logical vector to subset the columns of \code{x}. \emph{Note:} Negative indices are converted to positive ones using \code{j <- seq_along(x)[j]}.} \item{check}{logical. \code{FALSE} skips checks on \code{i} and \code{j}, e.g. whether indices are negative. This offers a speedup to programmers, but can terminate R if zero or negative indices are passed. } \item{drop.index.levels}{character. Either \code{"id"}, \code{"time"}, \code{"all"} or \code{"none"}. See \link{indexing}.} } \details{ \code{fsubset} is a generic function, with methods supplied for vectors, matrices, and data frames (including lists). It represents an improvement over \code{\link{subset}} in terms of both speed and functionality. The function \code{ss} is an improvement of \code{[} to subset (vectors) matrices and data frames without dropping dimensions. It is significantly faster than \code{[.data.frame}. For ordinary vectors, \code{subset} can be integer or logical, subsetting is done in C and more efficient than \code{[} for large vectors. For matrices the implementation is all base-R but slightly more efficient and more versatile than \code{\link{subset.matrix}}. Thus it is possible to \code{subset} matrix rows using logical or integer vectors, or character vectors matching rownames. The \code{drop} argument is passed on to the \code{[} method for matrices. For both matrices and data frames, the \code{\dots} argument can be used to subset columns, and is evaluated in a non-standard way. Thus it can support vectors of column names, indices or logical vectors, but also multiple comma separated column names passed without quotes, each of which may also be replaced by a sequence of columns i.e. \code{col1:coln}, and new column names may be assigned e.g. \code{fsubset(data, col1 > 20, newname = col2, col3:col6)} (see examples). For data frames, the \code{subset} argument is also evaluated in a non-standard way. Thus next to vector of row-indices or logical vectors, it supports logical expressions of the form \code{col2 > 5 & col2 < col3} etc. (see examples). The data frame method is implemented in C, hence it is significantly faster than \code{\link{subset.data.frame}}. If fast data frame subsetting is required but no non-standard evaluation, the function \code{ss} is slightly simpler and faster. Factors may have empty levels after subsetting; unused levels are not automatically removed. See \code{\link{fdroplevels}} to drop all unused levels from a data frame. } \value{ An object similar to \code{.x/x} containing just the selected elements (for a vector), rows and columns (for a matrix or data frame). } \note{ \code{ss} offers no support for indexed data. Use \code{fsubset} with indices instead. No replacement method \code{fsubset<-} or \code{ss<-} is offered in \emph{collapse}. For efficient subset replacement (without copying) use \code{data.table::set}, which can also be used with data frames and tibbles. To search and replace certain elements without copying, and to efficiently copy elements / rows from an equally sized vector / data frame, see \code{\link{setv}}. For subsetting columns alone, please also see \link[=fselect]{selecting and replacing columns}. Note that the use of \code{\link{\%==\%}} can yield significant performance gains on large data. } \seealso{ \code{\link{fselect}}, \code{\link{get_vars}}, \code{\link{ftransform}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ fsubset(airquality, Temp > 90, Ozone, Temp) fsubset(airquality, Temp > 90, OZ = Ozone, Temp) # With renaming fsubset(airquality, Day == 1, -Temp) fsubset(airquality, Day == 1, -(Day:Temp)) fsubset(airquality, Day == 1, Ozone:Wind) fsubset(airquality, Day == 1 & !is.na(Ozone), Ozone:Wind, Month) fsubset(airquality, Day \%==\% 1, -Temp) # Faster for big data, as \%==\% directly returns indices ss(airquality, 1:10, 2:3) # Significantly faster than airquality[1:10, 2:3] fsubset(airquality, 1:10, 2:3) # This is possible but not advised } \keyword{manip} collapse/man/fdroplevels.Rd0000644000176200001440000000436214777170130015457 0ustar liggesusers\name{fdroplevels} \alias{fdroplevels} \alias{fdroplevels.factor} \alias{fdroplevels.data.frame} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Removal of Unused Factor Levels } \description{ A substantially faster replacement for \code{\link{droplevels}}. } \usage{ fdroplevels(x, ...) \method{fdroplevels}{factor}(x, ...) \method{fdroplevels}{data.frame}(x, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a factor, or data frame / list containing one or more factors.} \item{\dots}{not used.} } \details{ \code{\link{droplevels}} passes a factor from which levels are to be dropped to \code{\link{factor}}, which first calls \code{\link{unique}} and then \code{\link{match}} to drop unused levels. Both functions internally use a hash table, which is highly inefficient. \code{fdroplevels} does not require mapping values at all, but uses a super fast boolean vector method to determine which levels are unused and remove those levels. In addition, if no unused levels are found, \code{x} is simply returned. Any missing values found in \code{x} are efficiently skipped in the process of checking and replacing levels. All other attributes of \code{x} are preserved. } \value{ \code{x} with unused factor levels removed. } \note{ If \code{x} is malformed e.g. has too few levels, this function can cause a segmentation fault terminating the R session, thus only use with ordinary / proper factors. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{qF}}, \code{\link{funique}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ f <- iris$Species[1:100] fdroplevels(f) identical(fdroplevels(f), droplevels(f)) fNA <- na_insert(f) fdroplevels(fNA) identical(fdroplevels(fNA), droplevels(fNA)) identical(fdroplevels(ss(iris, 1:100)), droplevels(ss(iris, 1:100))) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): % \keyword{ ~kwd1 } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/qsu.Rd0000644000176200001440000004103515076527614013746 0ustar liggesusers\name{qsu} \alias{qsu} \alias{qsu.default} \alias{qsu.matrix} \alias{qsu.data.frame} \alias{qsu.grouped_df} \alias{qsu.pseries} \alias{qsu.pdata.frame} \alias{qsu.sf} \alias{print.qsu} \alias{as.data.frame.qsu} % - Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Grouped, Weighted) Summary Statistics for Cross-Sectional and Panel Data } \description{ \code{qsu}, shorthand for quick-summary, is an extremely fast summary command inspired by the (xt)summarize command in the STATA statistical software. It computes a set of 7 statistics (nobs, mean, sd, min, max, skewness and kurtosis) using a numerically stable one-pass method generalized from Welford's Algorithm. Statistics can be computed weighted, by groups, and also within-and between entities (for panel data, see Details). } \usage{ qsu(x, \dots) \method{qsu}{default}(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{qsu}{matrix}(x, g = NULL, pid = NULL, w = NULL, higher = FALSE, array = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{qsu}{data.frame}(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], \dots) \method{qsu}{grouped_df}(x, pid = NULL, w = NULL, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], \dots) # Methods for indexed data / compatibility with plm: \method{qsu}{pseries}(x, g = NULL, w = NULL, effect = 1L, higher = FALSE, array = TRUE, stable.algo = .op[["stable.algo"]], \dots) \method{qsu}{pdata.frame}(x, by = NULL, w = NULL, cols = NULL, effect = 1L, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], \dots) # Methods for compatibility with sf: \method{qsu}{sf}(x, by = NULL, pid = NULL, w = NULL, cols = NULL, higher = FALSE, array = TRUE, labels = FALSE, stable.algo = .op[["stable.algo"]], \dots) \method{as.data.frame}{qsu}(x, ..., gid = "Group", stringsAsFactors = TRUE) \method{print}{qsu}(x, digits = .op[["digits"]] + 2L, nonsci.digits = 9, na.print = "-", return = FALSE, print.gap = 2, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, matrix, data frame, 'indexed_series' ('pseries') or 'indexed_frame' ('pdata.frame').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{by}{\emph{(p)data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1 + group2} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{pid}{same input as \code{g/by}: Specify a panel-identifier to also compute statistics on between- and within- transformed data. Data frame method also supports one- or two-sided formulas, grouped_df method supports expressions evaluated in the data environment. Transformations are taken independently from grouping with \code{g/by} (grouped statistics are computed on the transformed data if \code{g/by} is also used). However, passing any LHS variables to \code{pid} will overwrite any \code{LHS} variables passed to \code{by}.} \item{w}{a vector of (non-negative) weights. Adding weights will compute the weighted mean, sd, skewness and kurtosis, and transform the data using weighted individual means if \code{pid} is used. A \code{"WeightSum"} column will be added giving the sum of weights, see also Details. Data frame method supports formula, grouped_df method supports expression.} \item{cols}{select columns to summarize using column names, indices, a logical vector or a function (e.g. \code{is.numeric}). Two-sided formulas passed to \code{by} or \code{pid} overwrite \code{cols}.} \item{higher}{logical. Add higher moments (skewness and kurtosis).} \item{array}{logical. If computations have more than 2 dimensions (up to a maximum of 4D: variables, statistics, groups and panel-decomposition) \code{TRUE} returns an array, while \code{FALSE} returns a (nested) list of matrices.} \item{stable.algo}{logical. \code{FALSE} uses a faster but less stable method to calculate the standard deviation (see Details of \code{\link{fsd}}). Only available if \code{w = NULL} and \code{higher = FALSE}.} \item{labels}{logical \code{TRUE} or a function: to display variable labels in the summary. See Details.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used for between and within transformations of the data. 1L takes the first variable in the \link[=indexing]{index}, 2L the second etc.. Index variables can also be called by name using a character string. More than one variable can be supplied. } \item{\dots}{arguments to be passed to or from other methods.} \item{gid}{character. Name assigned to the group-id column, when summarising variables by groups.} \item{stringsAsFactors}{logical. Make factors from dimension names of 'qsu' array. Same as option to \code{\link{as.data.frame.table}}.} \item{digits}{the number of digits to print after the comma/dot.} \item{nonsci.digits}{the number of digits to print before resorting to scientific notation (default is to print out numbers with up to 9 digits and print larger numbers scientifically).} \item{na.print}{character string to substitute for missing values.} \item{return}{logical. Don't print but instead return the formatted object.} \item{print.gap}{integer. Spacing between printed columns. Passed to \code{print.default}.} } \details{ The algorithm used to compute statistics is well described \href{https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance}{here} [see sections \emph{Welford's online algorithm}, \emph{Weighted incremental algorithm} and \emph{Higher-order statistics}. Skewness and kurtosis are calculated as described in \emph{Higher-order statistics} and are mathematically identical to those implemented in the \emph{moments} package. Just note that \code{qsu} computes the kurtosis (like \code{momens::kurtosis}), not the excess-kurtosis (= kurtosis - 3) defined in \emph{Higher-order statistics}. The \emph{Weighted incremental algorithm} described can easily be generalized to higher-order statistics]. Grouped computations specified with \code{g/by} are carried out extremely efficiently as in \code{fsum} (in a single pass, without splitting the data). If \code{pid} is used, \code{qsu} performs a panel-decomposition of each variable and computes 3 sets of statistics: Statistics computed on the 'Overall' (raw) data, statistics computed on the 'Between' - transformed (pid - averaged) data, and statistics computed on the 'Within' - transformed (pid - demeaned) data. More formally, let \bold{\code{x}} (bold) be a panel vector of data for \code{N} individuals indexed by \code{i}, recorded for \code{T} periods, indexed by \code{t}. \code{xit} then denotes a single data-point belonging to individual \code{i} in time-period \code{t} (\code{t/T} must not represent time). Then \code{xi.} denotes the average of all values for individual \code{i} (averaged over \code{t}), and by extension \bold{\code{xN.}} is the vector (length \code{N}) of such averages for all individuals. If no groups are supplied to \code{g/by}, the 'Between' statistics are computed on \bold{\code{xN.}}, the vector of individual averages. (This means that for a non-balanced panel or in the presence of missing values, the 'Overall' mean computed on \bold{\code{x}} can be slightly different than the 'Between' mean computed on \bold{\code{xN.}}, and the variance decomposition is not exact). If groups are supplied to \code{g/by}, \bold{\code{xN.}} is expanded to the vector \bold{\code{xi.}} (length \code{N x T}) by replacing each value \code{xit} in \bold{\code{x}} with \code{xi.}, while preserving missing values in \bold{\code{x}}. Grouped Between-statistics are then computed on \bold{\code{xi.}}, with the only difference that the number of observations ('Between-N') reported for each group is the number of distinct non-missing values of \bold{\code{xi.}} in each group (not the total number of non-missing values of \bold{\code{xi.}} in each group, which is already reported in 'Overall-N'). See Examples. 'Within' statistics are always computed on the vector \bold{\code{x - xi. + x..}}, where \bold{\code{x..}} is simply the 'Overall' mean computed from \bold{\code{x}}, which is added back to preserve the level of the data. The 'Within' mean computed on this data will always be identical to the 'Overall' mean. In the summary output, \code{qsu} reports not 'N', which would be identical to the 'Overall-N', but 'T', the average number of time-periods of data available for each individual obtained as 'T' = 'Overall-N / 'Between-N'. When using weights (\code{w}) with panel data (\code{pid}), the 'Between' sum of weights is also simply the number of groups, and the 'Within' sum of weights is the 'Overall' sum of weights divided by the number of groups. See Examples. Apart from 'N/T' and the extrema, the standard-deviations ('SD') computed on between- and within- transformed data are extremely valuable because they indicate how much of the variation in a panel-variable is between-individuals and how much of the variation is within-individuals (over time). At the extremes, variables that have common values across individuals (such as the time-variable(s) 't' in a balanced panel), can readily be identified as individual-invariant because the 'Between-SD' on this variable is 0 and the 'Within-SD' is equal to the 'Overall-SD'. Analogous, time-invariant individual characteristics (such as the individual-id 'i') have a 0 'Within-SD' and a 'Between-SD' equal to the 'Overall-SD'. See Examples. For data frame methods, if \code{labels = TRUE}, \code{qsu} uses \code{function(x) paste(names(x), setv(vlabels(x), NA, ""), sep = ": ")} to combine variable names and labels for display. Alternatively, the user can pass a custom function which will be applied to the data frame, e.g. using \code{labels = vlabels} just displays the labels. See also \code{\link{vlabels}}. \code{qsu} comes with its own print method which by default writes out up to 9 digits at 4 decimal places. Larger numbers are printed in scientific format. for numbers between 7 and 9 digits, an apostrophe (') is placed after the 6th digit to designate the millions. Missing values are printed using '-'. The \emph{sf} method simply ignores the geometry column. } \value{ A vector, matrix, array or list of matrices of summary statistics. All matrices and arrays have a class 'qsu' and a class 'table' attached. } \note{ In weighted summaries, observations with missing or zero weights are skipped, and thus do not affect any of the calculated statistics, including the observation count. This also implies that a logical vector passed to \code{w} can be used to efficiently summarize a subset of the data. If weights \code{w} are used together with \code{pid}, transformed data is computed using weighted individual means i.e. weighted \bold{\code{xi.}} and weighted \bold{\code{x..}}. Weighted statistics are subsequently computed on this weighted-transformed data. } \references{ Welford, B. P. (1962). Note on a method for calculating corrected sums of squares and products. \emph{Technometrics}. 4 (3): 419-420. doi:10.2307/1266577. } % \author{ %% ~~who you are~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{descr}}, \link[=summary-statistics]{Summary Statistics}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ ## World Development Panel Data # Simple Summaries ------------------------- qsu(wlddev) # Simple summary qsu(wlddev, labels = TRUE) # Display variable labels qsu(wlddev, higher = TRUE) # Add skewness and kurtosis # Grouped Summaries ------------------------ qsu(wlddev, ~ region, labels = TRUE) # Statistics by World Bank Region qsu(wlddev, PCGDP + LIFEEX ~ income) # Summarize GDP per Capita and Life Expectancy by stats <- qsu(wlddev, ~ region + income, # World Bank Income Level cols = 9:10, higher = TRUE) # Same variables, by both region and income aperm(stats) # A different perspective on the same stats # Grouped summary wlddev |> fgroup_by(region) |> fselect(PCGDP, LIFEEX) |> qsu() # Panel Data Summaries --------------------- qsu(wlddev, pid = ~ iso3c, labels = TRUE) # Adding between and within countries statistics # -> They show amongst other things that year and decade are individual-invariant, # that we have GINI-data on only 161 countries, with only 8.42 observations per country on average, # and that GDP, LIFEEX and GINI vary more between-countries, but ODA received varies more within # countries over time. # Let's do this manually for PCGDP: x <- wlddev$PCGDP g <- wlddev$iso3c # This is the exact variance decomposion all.equal(fvar(x), fvar(B(x, g)) + fvar(W(x, g))) # What qsu does is calculate r <- rbind(Overall = qsu(x), Between = qsu(fmean(x, g)), # Aggregation instead of between-transform Within = qsu(fwithin(x, g, mean = "overall.mean"))) # Same as qsu(W(x, g) + fmean(x)) r[3, 1] <- r[1, 1] / r[2, 1] print.qsu(r) # Proof: qsu(x, pid = g) # Using indexed data: wldi <- findex_by(wlddev, iso3c, year) # Creating a Indexed Data Frame frame from this data qsu(wldi) # Summary for pdata.frame -> qsu(wlddev, pid = ~ iso3c) qsu(wldi$PCGDP) # Default summary for Panel Series qsu(G(wldi$PCGDP)) # Summarizing GDP growth, see also ?G # Grouped Panel Data Summaries ------------- qsu(wlddev, ~ region, ~ iso3c, cols = 9:12) # Panel-Statistics by region psr <- qsu(wldi, ~ region, cols = 9:12) # Same on indexed data psr # -> Gives a 4D array psr[,"N/T",,] # Checking out the number of observations: # In North america we only have 3 countries, for the GINI we only have 3.91 observations on average # for 45 Sub-Saharan-African countries, etc.. psr[,"SD",,] # Considering only standard deviations # -> In all regions variations in inequality (GINI) between countries are greater than variations # in inequality within countries. The opposite is true for Life-Expectancy in all regions apart # from Europe, etc.. # Again let's do this manually for PDGCP: d <- cbind(Overall = x, Between = fbetween(x, g), Within = fwithin(x, g, mean = "overall.mean")) r <- qsu(d, g = wlddev$region) r[,"N","Between"] <- fndistinct(g[!is.na(x)], wlddev$region[!is.na(x)]) r[,"N","Within"] <- r[,"N","Overall"] / r[,"N","Between"] r # Proof: qsu(wlddev, PCGDP ~ region, ~ iso3c) # Weighted Summaries ----------------------- n <- nrow(wlddev) weights <- abs(rnorm(n)) # Generate random weights qsu(wlddev, w = weights, higher = TRUE) # Computed weighted mean, SD, skewness and kurtosis weightsNA <- weights # Weights may contain missing values.. inserting 1000 weightsNA[sample.int(n, 1000)] <- NA qsu(wlddev, w = weightsNA, higher = TRUE) # But now these values are removed from all variables # Grouped and panel-summaries can also be weighted in the same manner # Alternative Output Formats --------------- # Simple case as.data.frame(qsu(mtcars)) # For matrices can also use qDF/qDT/qTBL to assign custom name and get a character-id qDF(qsu(mtcars), "car") # DF from 3D array: do not combine with aperm(), might introduce wrong column labels as.data.frame(stats, gid = "Region_Income") # DF from 4D array: also no aperm() as.data.frame(qsu(wlddev, ~ income, ~ iso3c, cols = 9:10), gid = "Region") # Output as nested list psrl <- qsu(wlddev, ~ income, ~ iso3c, cols = 9:10, array = FALSE) psrl # We can now use unlist2d to create a tidy data frame unlist2d(psrl, c("Variable", "Trans"), row.names = "Income") } % View(psrdat) % # We've gotten this far, let's give it a ggplot2 finish: % psrdat <- reshape2::melt(psrdat, 1:3, % variable.name = "Statistic") # Looks freakin rediculous, but still a nice demonstation % library(ggplot2) % ggplot(psrdat, aes(x = Trans, y = value, fill = Region)) + % geom_bar(stat = "identity", position = position_dodge()) + % facet_wrap(Statistic ~ Variable, scales = "free", ncol = 4) % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{univar} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") collapse/man/qtab.Rd0000644000176200001440000001170314777170130014056 0ustar liggesusers\name{qtab} \alias{qtab} \alias{qtable} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Weighted) Cross Tabulation } \description{ A versatile and computationally more efficient replacement for \code{\link{table}}. Notably, it also supports tabulations with frequency weights, and computation of a statistic over combinations of variables. } \usage{ qtab(..., w = NULL, wFUN = NULL, wFUN.args = NULL, dnn = "auto", sort = .op[["sort"]], na.exclude = TRUE, drop = FALSE, method = "auto") qtable(...) # Long-form. Use set_collapse(mask = "table") to replace table() } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{ atomic vectors or factors spanning the table dimensions, (optionally) with tags for the dimension names, or a data frame / list of these. See Examples. } \item{w}{ a single vector to aggregate over the table dimensions e.g. a vector of frequency weights. } \item{wFUN}{ a function used to aggregate \code{w} over the table dimensions. The default \code{NULL} computes the sum of the non-missing weights via an optimized internal algorithm. \link[=fast-statistical-functions]{Fast Statistical Functions} also receive vectorized execution. } \item{wFUN.args}{ a list of (optional) further arguments passed to \code{wFUN}. See Examples. } \item{dnn}{ the names of the table dimensions. Either passed directly as a character vector or list (internally \code{\link{unlist}}'ed), a function applied to the \code{\dots} list (e.g. \code{\link{names}}, or \code{\link{vlabels}}), or one of the following options: \itemize{ \item \code{"auto"} constructs names based on the \code{\dots} arguments, or calls \code{\link{names}} if a single list is passed as input. \item \code{"namlab"} does the same as \code{"auto"}, but also calls \code{\link{vlabels}} on the list and appends the names by the variable labels. } \code{dnn = NULL} will return a table without dimension names. } \item{sort, na.exclude, drop, method}{ arguments passed down to \code{\link{qF}}: \itemize{ \item \code{sort = FALSE} orders table dimensions in first-appearance order of items in the data (can be more efficient if vectors are not factors already). Note that for factors this option will both recast levels in first-appearance order and drop unused levels. \item \code{na.exclude = FALSE} includes \code{NA}'s in the table (equivalent to \code{\link{table}}'s \code{useNA = "ifany"}). \item \code{drop = TRUE} removes any unused factor levels (= zero frequency rows or columns). \item \code{method \%in\% c("radix", "hash")} provides additional control over the algorithm used to convert atomic vectors to factors. } } } \value{ An array of class 'qtab' that inherits from 'table'. Thus all 'table' methods apply to it. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{descr}}, \link[=summary-statistics]{Summary Statistics}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Basic use qtab(iris$Species) with(mtcars, qtab(vs, am)) qtab(mtcars[.c(vs, am)]) library(magrittr) iris \%$\% qtab(Sepal.Length > mean(Sepal.Length), Species) iris \%$\% qtab(AMSL = Sepal.Length > mean(Sepal.Length), Species) ## World after 2015 wlda15 <- wlddev |> fsubset(year >= 2015) |> collap(~ iso3c) # Regions and income levels (country frequency) wlda15 \%$\% qtab(region, income) wlda15 \%$\% qtab(region, income, dnn = vlabels) wlda15 \%$\% qtab(region, income, dnn = "namlab") # Population (millions) wlda15 \%$\% qtab(region, income, w = POP) |> divide_by(1e6) # Life expectancy (years) wlda15 \%$\% qtab(region, income, w = LIFEEX, wFUN = fmean) # Life expectancy (years), weighted by population wlda15 \%$\% qtab(region, income, w = LIFEEX, wFUN = fmean, wFUN.args = list(w = POP)) # GDP per capita (constant 2010 US$): median wlda15 \%$\% qtab(region, income, w = PCGDP, wFUN = fmedian, wFUN.args = list(na.rm = TRUE)) # GDP per capita (constant 2010 US$): median, weighted by population wlda15 \%$\% qtab(region, income, w = PCGDP, wFUN = fmedian, wFUN.args = list(w = POP)) # Including OECD membership tab <- wlda15 \%$\% qtab(region, income, OECD) tab # Various 'table' methods tab |> addmargins() tab |> marginSums(margin = c("region", "income")) tab |> proportions() tab |> proportions(margin = "income") as.data.frame(tab) |> head(10) ftable(tab, row.vars = c("region", "OECD")) # Other options tab |> fsum(TRA = "\%") # Percentage table (on a matrix use fsum.default) tab \%/=\% (sum(tab)/100) # Another way (division by reference, preserves integers) tab rm(tab, wlda15) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{multivariate} % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/rowbind.Rd0000644000176200001440000000667714777170130014611 0ustar liggesusers\name{rowbind} \alias{rowbind} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Row-Bind Lists / Data Frame-Like Objects } \description{ \emph{collapse}'s version of \code{data.table::rbindlist} and \code{rbind.data.frame}. The core code is copied from \emph{data.table}, which deserves all credit for the implementation. \code{rowbind} only binds lists/data.frame's. For a more flexible recursive version see \code{\link{unlist2d}}. To combine lists column-wise see \code{\link{add_vars}} or \code{\link{ftransform}} (with replacement). } \usage{ rowbind(\dots, idcol = NULL, row.names = FALSE, use.names = TRUE, fill = FALSE, id.factor = "auto", return = c("as.first", "data.frame", "data.table", "tibble", "list")) } \arguments{ \item{\dots}{a single list of list-like objects (data.frames) or comma separated objects (internally assembled using \code{list(\dots)}). Names can be supplied if \code{!is.null(idcol)}.} \item{idcol}{character. The name of an id-column to be generated identifying the source of rows in the final object. Using \code{idcol = TRUE} will set the name to \code{".id"}. If the input list has names, these will form the content of the id column, otherwise integers are used. To save memory, it is advised to keep \code{id.factor = TRUE}.} \item{row.names}{\code{TRUE} extracts row names from all the objects in \code{l} and adds them to the output in a column named \code{"row.names"}. Alternatively, a column name i.e. \code{row.names = "variable"} can be supplied. } \item{use.names}{logical. \code{TRUE} binds by matching column name, \code{FALSE} by position. } \item{fill}{logical. \code{TRUE} fills missing columns with NAs. When \code{TRUE}, \code{use.names} is set to \code{TRUE}.} \item{id.factor}{if \code{TRUE} and \code{!isFALSE(idcols)}, create id column as factor instead of character or integer vector. It is also possible to specify \code{"ordered"} to generate an ordered factor id. \code{"auto"} uses \code{TRUE} if \code{!is.null(names(l))} where \code{l} is the input list (because factors are much more memory efficient than character vectors). } \item{return}{an integer or string specifying what to return. \code{1 - "as.first"} preserves the attributes of the first element of the list, \code{2/3/4 - "data.frame"/"data.table"/"tibble"} coerces to specific objects, and \code{5 - "list"} returns a (named) list. } } \value{ a long list or data frame-like object formed by combining the rows / elements of the input objects. The \code{return} argument controls the exact format of the output. } \seealso{ \code{\link{unlist2d}}, \code{\link{add_vars}}, \code{\link{ftransform}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # These are the same rowbind(mtcars, mtcars) rowbind(list(mtcars, mtcars)) # With id column rowbind(mtcars, mtcars, idcol = "id") rowbind(a = mtcars, b = mtcars, idcol = "id") # With saving row-names rowbind(mtcars, mtcars, row.names = "cars") rowbind(a = mtcars, b = mtcars, idcol = "id", row.names = "cars") # Filling up columns rowbind(mtcars, mtcars[2:8], fill = TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/GGDC10S.Rd0000644000176200001440000001034314777170130014116 0ustar liggesusers\name{GGDC10S} \alias{GGDC10S} \docType{data} \title{ Groningen Growth and Development Centre 10-Sector Database } \description{ The GGDC 10-Sector Database provides a long-run internationally comparable dataset on sectoral productivity performance in Africa, Asia, and Latin America. Variables covered in the data set are annual series of value added (in local currency), and persons employed for 10 broad sectors. } \usage{data("GGDC10S")} \format{ A data frame with 5027 observations on the following 16 variables. \describe{ \item{\code{Country}}{\emph{char}: Country (43 countries)} \item{\code{Regioncode}}{\emph{char}: ISO3 Region code} \item{\code{Region}}{\emph{char}: Region (6 World Regions)} \item{\code{Variable}}{\emph{char}: Variable (Value Added or Employment)} \item{\code{Year}}{\emph{num}: Year (67 Years, 1947-2013)} \item{\code{AGR}}{\emph{num}: Agriculture} \item{\code{MIN}}{\emph{num}: Mining} \item{\code{MAN}}{\emph{num}: Manufacturing} \item{\code{PU}}{\emph{num}: Utilities} \item{\code{CON}}{\emph{num}: Construction} \item{\code{WRT}}{\emph{num}: Trade, restaurants and hotels} \item{\code{TRA}}{\emph{num}: Transport, storage and communication} \item{\code{FIRE}}{\emph{num}: Finance, insurance, real estate and business services} \item{\code{GOV}}{\emph{num}: Government services} \item{\code{OTH}}{\emph{num}: Community, social and personal services} \item{\code{SUM}}{\emph{num}: Summation of sector GDP} } } % \details{ %% ~~ If necessary, more details than the __description__ above ~~ % } \source{ \url{https://www.rug.nl/ggdc/productivity/10-sector/} } \references{ Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), \emph{Routledge Handbook of Industry and Development.} (pp. 65-83). Routledge. } \seealso{ \code{\link{wlddev}}, \link[=collapse-documentation]{Collapse Overview} } \examples{ namlab(GGDC10S, class = TRUE) # aperm(qsu(GGDC10S, ~ Variable, ~ Variable + Country, vlabels = TRUE)) \donttest{ library(ggplot2) ## World Regions Structural Change Plot GGDC10S |> fmutate(across(AGR:OTH, `*`, 1 / SUM), Variable = ifelse(Variable == "VA","Value Added Share", "Employment Share")) |> replace_outliers(0, NA, "min") |> collap( ~ Variable + Region + Year, cols = 6:15) |> qDT() |> pivot(1:3, names = list(variable = "Sector"), na.rm = TRUE) |> ggplot(aes(x = Year, y = value, fill = Sector)) + geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) + theme_linedraw(base_size = 14) + facet_grid(Variable ~ Region, scales = "free_x") + scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 7), expand = c(0, 0))+ scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0, 0), labels = scales::percent) + theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)), strip.background = element_rect(colour = "grey30", fill = "grey30")) # A function to plot the structural change of an arbitrary country plotGGDC <- function(ctry) { GGDC10S |> fsubset(Country == ctry, Variable, Year, AGR:SUM) |> fmutate(across(AGR:OTH, `*`, 1 / SUM), SUM = NULL, Variable = ifelse(Variable == "VA","Value Added Share", "Employment Share")) |> replace_outliers(0, NA, "min") |> qDT() |> pivot(1:2, names = list(variable = "Sector"), na.rm = TRUE) |> ggplot(aes(x = Year, y = value, fill = Sector)) + geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) + theme_linedraw(base_size = 14) + facet_wrap( ~ Variable) + scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 7), expand = c(0, 0)) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0, 0), labels = scales::percent) + theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)), strip.background = element_rect(colour = "grey20", fill = "grey20"), strip.text = element_text(face = "bold")) } plotGGDC("BWA") } } \keyword{datasets} collapse/man/fslice.Rd0000644000176200001440000000676715056572047014416 0ustar liggesusers\name{fslice} \alias{fslice} \alias{fslicev} \title{ Fast Slicing of Matrix-Like Objects } \description{ A fast function to extract rows from a matrix or data frame-like object (by groups). } \usage{ fslice(x, ..., n = 1, how = "first", order.by = NULL, na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE) fslicev(x, cols = NULL, n = 1, how = "first", order.by = NULL, na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE, ...) } \arguments{ \item{x}{a matrix, data frame or list-like object, including 'grouped_df'.} \item{\dots}{for \code{fslice}: names or sequences of columns to group by - passed to \code{\link{fselect}}. If \code{x} is a matrix: atomic vectors to group \code{x}. Can be empty to operate on (un)grouped data. For \code{fslicev}: further arguments passed to \code{\link{GRP}} (such as \code{decreasing}, \code{na.last}, \code{method}). } \item{cols}{select columns to group by, using column names, indices, a logical vector or a selector function (e.g. \code{is_categorical}). It can also be a list of vectors, or, if \code{x} is a matrix, a single vector.} \item{n}{integer or proportion (if < 1). Number of rows to select from each group. If a proportion is provided, it is converted to the equivalent number of rows using \code{max(1, round(n * nrow(x)))} or \code{max(1, round(n * nrow(x) / N.groups))} for grouped data.} \item{how}{character. Method to select rows. One of: \itemize{ \item \code{"first"}: select first \code{n} rows \item \code{"last"}: select last \code{n} rows \item \code{"min"}: select \code{n} rows with minimum values of \code{order.by} \item \code{"max"}: select \code{n} rows with maximum values of \code{order.by} } } \item{order.by}{vector or column name to order by when \code{how} is \code{"min"} or \code{"max"}. Must be same length as rows in \code{x}. In \code{fslice} it must not be quoted.} \item{na.rm}{logical. If \code{TRUE}, missing values in \code{order.by} are removed before selecting rows.} \item{sort}{logical. If \code{TRUE}, sort selected rows on the grouping columns. \code{FALSE} uses first-appearance order (including grouping columns if \code{how} is \code{"first"} or \code{"last"}) - fastest.} \item{with.ties}{logical. If \code{TRUE} and \code{how} is \code{"min"} or \code{"max"}, returns all rows with the extreme value. Currently only supported for \code{n = 1} and \code{sort = FALSE}.} } \value{ A subset of \code{x} containing the selected rows. } \seealso{ \code{\link{fsubset}}, \code{\link{fcount}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Basic usage fslice(mtcars, n = 3) # First 3 rows fslice(mtcars, n = 3, how = "last") # Last 3 rows fslice(mtcars, n = 0.1) # First 10\% of rows # Using order.by fslice(mtcars, n = 3, how = "min", order.by = mpg) # 3 cars with lowest mpg fslice(mtcars, n = 3, how = "max", order.by = mpg) # 3 cars with highest mpg # With grouping mtcars |> fslice(cyl, n = 2) # First 2 cars per cylinder mtcars |> fslice(cyl, n = 2, sort = TRUE) # with sorting (slightly less efficient) mtcars |> fslice(cyl, n = 2, how = "min", order.by = mpg) # 2 lowest mpg cars per cylinder # Using with.ties mtcars |> fslice(cyl, n = 1, how = "min", order.by = mpg, with.ties = TRUE) # With grouped data mtcars |> fgroup_by(cyl) |> fslice(n = 2, how = "max", order.by = mpg) # 2 highest mpg cars per cylinder } \keyword{manip} collapse/man/psmat.Rd0000644000176200001440000001475514777170130014265 0ustar liggesusers\name{psmat} \alias{psmat} \alias{psmat.default} \alias{psmat.pseries} \alias{psmat.data.frame} \alias{psmat.pdata.frame} \alias{plot.psmat} \alias{aperm.psmat} \alias{[.psmat} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Matrix / Array from Panel Series } \description{ \code{psmat} efficiently expands a panel-vector or 'indexed_series' ('pseries') into a matrix. If a data frame or 'indexed_frame' ('pdata.frame') is passed, \code{psmat} returns a 3D array or a list of matrices. % By default the matrix is created such that group-identifiers constitute the rows and time the columns. } \usage{ psmat(x, \dots) \method{psmat}{default}(x, g, t = NULL, transpose = FALSE, fill = NULL, \dots) \method{psmat}{data.frame}(x, by, t = NULL, cols = NULL, transpose = FALSE, fill = NULL, array = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{psmat}{pseries}(x, transpose = FALSE, fill = NULL, drop.index.levels = "none", \dots) \method{psmat}{pdata.frame}(x, cols = NULL, transpose = FALSE, fill = NULL, array = TRUE, drop.index.levels = "none", \dots) \method{plot}{psmat}(x, legend = FALSE, colours = legend, labs = NULL, grid = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, indexed series 'indexed_series' ('pseries'), data frame or 'indexed_frame' ('pdata.frame').} \item{g}{a factor, \code{GRP} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{GRP} object) used to group \code{x}. If the panel is balanced an integer indicating the number of groups can also be supplied. See Examples.} \item{by}{\emph{data.frame method}: Same input as \code{g}, but also allows one- or two-sided formulas using the variables in \code{x}, i.e. \code{~ idvar} or \code{var1 + var2 ~ idvar1 + idvar2}.} \item{t}{same inputs as \code{g/by}, to indicate the time-variable(s) or second identifier(s). \code{g} and \code{t} together should fully identify the panel. If \code{t = NULL}, the data is assumed sorted and \code{seq_col} is used to generate rownames for the output matrix.} \item{cols}{\emph{data.frame method}: Select columns using a function, column names, indices or a logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{transpose}{logical. \code{TRUE} generates the matrix such that \code{g/by -> columns, t -> rows}. Default is \code{g/by -> rows, t -> columns}.} \item{fill}{element to fill empty slots of matrix / array if panel is unbalanced. \code{NULL} will generate a \code{NA} of the right type.} \item{array}{\emph{data.frame / pdata.frame methods}: logical. \code{TRUE} returns a 3D array (if just one column is selected a matrix is returned). \code{FALSE} returns a list of matrices.} \item{drop.index.levels}{character. Either \code{"id"}, \code{"time"}, \code{"all"} or \code{"none"}. See \link{indexing}.} \item{\dots}{arguments to be passed to or from other methods, or for the plot method additional arguments passed to \code{\link{ts.plot}}.} \item{legend}{logical. Automatically create a legend of panel-groups.} \item{colours}{either \code{TRUE} to automatically colour by panel-groups using \code{\link{rainbow}} or a character vector of colours matching the number of panel-groups (series).} \item{labs}{character. Provide a character-vector of variable labels / series titles when plotting an array.} \item{grid}{logical. Calls \code{\link{grid}} to draw gridlines on the plot.} } \details{ If n > 2 index variables are attached to an indexed series or frame, the first n-1 variables in the index are interacted. } \value{ A matrix or 3D array containing the data in \code{x}, where by default the rows constitute the groups-ids (\code{g/by}) and the columns the time variable or individual ids (\code{t}). 3D arrays contain the variables in the 3rd dimension. The objects have a class 'psmat', and also a 'transpose' attribute indicating whether \code{transpose = TRUE}. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ The \code{pdata.frame} method only works for properly subsetted objects of class 'pdata.frame'. A list of 'pseries' won't work. There also exist simple \code{aperm} and \code{[} (subset) methods for 'psmat' objects. These differ from the default methods only by keeping the class and the 'transpose' attribute. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ ## World Development Panel Data head(wlddev) # View data qsu(wlddev, pid = ~ iso3c, cols = 9:12, vlabels = TRUE) # Sumarizing data str(psmat(wlddev$PCGDP, wlddev$iso3c, wlddev$year)) # Generating matrix of GDP r <- psmat(wlddev, PCGDP ~ iso3c, ~ year) # Same thing using data.frame method plot(r, main = vlabels(wlddev)[9], xlab = "Year") # Plot the matrix str(r) # See srructure str(psmat(wlddev$PCGDP, wlddev$iso3c)) # The Data is sorted, could omit t str(psmat(wlddev$PCGDP, 216)) # This panel is also balanced, so # ..indicating the number of groups would be sufficient to obtain a matrix ar <- psmat(wlddev, ~ iso3c, ~ year, 9:12) # Get array of transposed matrices str(ar) plot(ar) plot(ar, legend = TRUE) plot(psmat(collap(wlddev, ~region+year, cols = 9:12), # More legible and fancy plot ~region, ~year), legend = TRUE, labs = vlabels(wlddev)[9:12]) psml <- psmat(wlddev, ~ iso3c, ~ year, 9:12, array = FALSE) # This gives list of ps-matrices head(unlist2d(psml, "Variable", "Country", id.factor = TRUE),2) # Using unlist2d, can generate DF ## Indexing simplifies things wldi <- findex_by(wlddev, iso3c, year) # Creating an indexed frame PCGDP <- wldi$PCGDP # An indexed_series of GDP per Capita head(psmat(PCGDP), 2) # Same as above, more parsimonious plot(psmat(PCGDP)) plot(psmat(wldi[9:12])) plot(psmat(G(wldi[9:12]))) # Here plotting panel-growth rates } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{array} \keyword{ts} collapse/man/fcumsum.Rd0000644000176200001440000001273114777170130014610 0ustar liggesusers\name{fcumsum} \alias{fcumsum} \alias{fcumsum.default} \alias{fcumsum.matrix} \alias{fcumsum.data.frame} \alias{fcumsum.pseries} \alias{fcumsum.pdata.frame} \alias{fcumsum.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast (Grouped, Ordered) Cumulative Sum for Matrix-Like Objects } \description{ \code{fcumsum} is a generic function that computes the (column-wise) cumulative sum of \code{x}, (optionally) grouped by \code{g} and/or ordered by \code{o}. Several options to deal with missing values are provided. } \usage{ fcumsum(x, \dots) \method{fcumsum}{default}(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, \dots) \method{fcumsum}{matrix}(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, \dots) \method{fcumsum}{data.frame}(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{fcumsum}{pseries}(x, na.rm = .op[["na.rm"]], fill = FALSE, shift = "time", \dots) \method{fcumsum}{pdata.frame}(x, na.rm = .op[["na.rm"]], fill = FALSE, shift = "time", \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fcumsum}{grouped_df}(x, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, keep.ids = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector / time series, (time series) matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}.} \item{o}{a vector or list of vectors providing the order in which the elements of \code{x} are cumulatively summed. Will be passed to \code{\link{radixorderv}} unless \code{check.o = FALSE}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost.} \item{fill}{if \code{na.rm = TRUE}, setting \code{fill = TRUE} will overwrite missing values with the previous value of the cumulative sum, starting from 0.} \item{check.o}{logical. Programmers option: \code{FALSE} prevents passing \code{o} to \code{\link{radixorderv}}, requiring \code{o} to be a valid ordering vector that is integer typed with each element in the range \code{[1, length(x)]}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. } \item{shift}{\emph{pseries / pdata.frame methods}: character. \code{"time"} or \code{"row"}. See \code{\link{flag}} for details. The argument here does not control 'shifting' of data but rather the order in which elements are summed.} \item{keep.ids}{\emph{pdata.frame / grouped_df methods}: Logical. Drop all identifiers from the output (which includes all grouping variables and variables passed to \code{o}). \emph{Note}: For grouped / panel data frames identifiers are dropped, but the \code{"groups"} / \code{"index"} attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ If \code{na.rm = FALSE}, \code{fcumsum} works like \code{\link{cumsum}} and propagates missing values. The default \code{na.rm = TRUE} skips missing values and computes the cumulative sum on the non-missing values. Missing values are kept. If \code{fill = TRUE}, missing values are replaced with the previous value of the cumulative sum (starting from 0), computed on the non-missing values. By default the cumulative sum is computed in the order in which elements appear in \code{x}. If \code{o} is provided, the cumulative sum is computed in the order given by \code{radixorderv(o)}, without the need to first sort \code{x}. This applies as well if groups are used (\code{g}), in which case the cumulative sum is computed separately in each group. The \emph{pseries} and \emph{pdata.frame} methods assume that the last factor in the \link[=findex]{index} is the time-variable and the rest are grouping variables. The time-variable is passed to \code{radixorderv} and used for ordered computation, so that cumulative sums are accurately computed regardless of whether the panel-data is ordered or balanced. \code{fcumsum} explicitly supports integers. Integers in R are bounded at bounded at +-2,147,483,647, and an integer overflow error will be provided if the cumulative sum (within any group) exceeds +-2,147,483,647. In that case data should be converted to double beforehand. } \value{ the cumulative sum of values in \code{x}, (optionally) grouped by \code{g} and/or ordered by \code{o}. See Details and Examples. } \seealso{ \code{\link{fdiff}}, \code{\link{fgrowth}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Non-grouped fcumsum(AirPassengers) head(fcumsum(EuStockMarkets)) fcumsum(mtcars) # Non-grouped but ordered o <- order(rnorm(nrow(EuStockMarkets))) all.equal(copyAttrib(fcumsum(EuStockMarkets[o, ], o = o)[order(o), ], EuStockMarkets), fcumsum(EuStockMarkets)) ## Grouped head(with(wlddev, fcumsum(PCGDP, iso3c))) ## Grouped and ordered head(with(wlddev, fcumsum(PCGDP, iso3c, year))) head(with(wlddev, fcumsum(PCGDP, iso3c, year, fill = TRUE))) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} % __ONLY ONE__ keyword per line collapse/man/ldepth.Rd0000644000176200001440000000314714777170130014412 0ustar liggesusers\name{ldepth} \alias{ldepth} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Determine the Depth / Level of Nesting of a List } \description{ \code{ldepth} provides the depth of a list or list-like structure. } \usage{ ldepth(l, DF.as.list = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{l}{a list.} \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.} } \details{ The depth or level or nesting of a list or list-like structure (e.g. a model object) is found by recursing down to the bottom of the list and adding an integer count of 1 for each level passed. For example the depth of a data frame is 1. If a data frame has list-columns, the depth is 2. However for reasons of efficiency, if \code{l} is not a data frame and \code{DF.as.list = FALSE}, data frames found inside \code{l} will not be checked for list column's but assumed to have a depth of 1. } \value{ A single integer indicating the depth of the list. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{is_unlistable}}, \code{\link{has_elem}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ l <- list(1, 2) ldepth(l) l <- list(1, 2, mtcars) ldepth(l) ldepth(l, DF.as.list = FALSE) l <- list(1, 2, list(4, 5, list(6, mtcars))) ldepth(l) ldepth(l, DF.as.list = FALSE) } \keyword{list} \keyword{utilities} collapse/man/groupid.Rd0000644000176200001440000000430414777170130014577 0ustar liggesusers\name{groupid} \alias{groupid} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Generate Run-Length Type Group-Id } \description{ \code{groupid} is an enhanced version of \code{data.table::rleid} for atomic vectors. It generates a run-length type group-id where consecutive identical values are assigned the same integer. It is a generalization as it can be applied to unordered vectors, generate group id's starting from an arbitrary value, and skip missing values. } \usage{ groupid(x, o = NULL, start = 1L, na.skip = FALSE, check.o = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{an atomic vector of any type. Attributes are not considered.} \item{o}{an (optional) integer ordering vector specifying the order by which to pass through \code{x}.} \item{start}{integer. The starting value of the resulting group-id. Default is starting from 1.} %For C++ programmers, starting from 0 could be a better choice. } \item{na.skip}{logical. Skip missing values i.e. if \code{TRUE} something like \code{groupid(c("a", NA, "a"))} gives \code{c(1, NA, 1)} whereas \code{FALSE} gives \code{c(1, 2, 3)}.} \item{check.o}{logical. Programmers option: \code{FALSE} prevents checking that each element of \code{o} is in the range \code{[1, length(x)]}, it only checks the length of \code{o}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. } } \value{ An integer vector of class 'qG'. See \code{\link{qG}}. } \seealso{ \code{\link{seqid}}, \code{\link{timeid}}, \code{\link{qG}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ groupid(airquality$Month) groupid(airquality$Month, start = 0) groupid(wlddev$country)[1:100] ## Same thing since country is alphabetically ordered: (groupid is faster..) all.equal(groupid(wlddev$country), qG(wlddev$country, na.exclude = FALSE)) ## When data is unordered, group-id can be generated through an ordering.. uo <- order(rnorm(fnrow(airquality))) monthuo <- airquality$Month[uo] o <- order(monthuo) groupid(monthuo, o) identical(groupid(monthuo, o)[o], unattrib(groupid(airquality$Month))) } \keyword{manip} collapse/man/flag.Rd0000644000176200001440000003650514777170130014047 0ustar liggesusers\name{flag} \alias{flag} \alias{flag.default} \alias{flag.matrix} \alias{flag.data.frame} \alias{flag.pseries} \alias{flag.pdata.frame} \alias{flag.grouped_df} \alias{L} \alias{L.default} \alias{L.matrix} \alias{L.data.frame} \alias{L.pseries} \alias{L.pdata.frame} \alias{L.grouped_df} \alias{F} \alias{F.default} \alias{F.matrix} \alias{F.data.frame} \alias{F.pseries} \alias{F.pdata.frame} \alias{F.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Lags and Leads for Time Series and Panel Data } \description{ \code{flag} is an S3 generic to compute (sequences of) lags and leads. \code{L} and \code{F} are wrappers around \code{flag} representing the lag- and lead-operators, such that \code{L(x,-1) = F(x,1) = F(x)} and \code{L(x,-3:3) = F(x,3:-3)}. \code{L} and \code{F} provide more flexibility than \code{flag} when applied to data frames (i.e. column subsetting, formula input and id-variable-preservation capabilities\dots), but are otherwise identical. \emph{Note:} Since v1.9.0, \code{F} is no longer exported, but can be accessed using \code{collapse:::F}, or through setting \code{options(collapse_export_F = TRUE)} before loading the package. The syntax is the same as \code{L}. % (\code{flag} is more of a programmers function in style of the \link[=fast-statistical-functions]{Fast Statistical Functions} while \code{L} and \code{F} are more practical to use in regression formulas or for computations on data frames.) } \usage{ flag(x, n = 1, \dots) L(x, n = 1, \dots) \method{flag}{default}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = TRUE, \dots) \method{L}{default}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = .op[["stub"]], \dots) \method{flag}{matrix}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, \dots) \method{L}{matrix}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = .op[["stub"]], \dots) \method{flag}{data.frame}(x, n = 1, g = NULL, t = NULL, fill = NA, stubs = length(n) > 1L, \dots) \method{L}{data.frame}(x, n = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, stubs = .op[["stub"]], keep.ids = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{flag}{pseries}(x, n = 1, fill = NA, stubs = length(n) > 1L, shift = "time", \dots) \method{L}{pseries}(x, n = 1, fill = NA, stubs = .op[["stub"]], shift = "time", \dots) \method{flag}{pdata.frame}(x, n = 1, fill = NA, stubs = length(n) > 1L, shift = "time", \dots) \method{L}{pdata.frame}(x, n = 1, cols = is.numeric, fill = NA, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{flag}{grouped_df}(x, n = 1, t = NULL, fill = NA, stubs = length(n) > 1L, keep.ids = TRUE, \dots) \method{L}{grouped_df}(x, n = 1, t = NULL, fill = NA, stubs = .op[["stub"]], keep.ids = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector / time series, (time series) matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df'). Data must not be numeric.} \item{n}{integer. A vector indicating the lags / leads to compute (passing negative integers to \code{flag} or \code{L} computes leads, passing negative integers to \code{F} computes lags).} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}. \emph{Note} that without \code{t}, all values in a group need to be consecutive and in the right order. See Details.} \item{by}{\emph{data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{t}{a time vector or list of vectors. Data frame methods also allows one-sided formula i.e. \code{~time}. grouped_df method supports lazy-evaluation i.e. \code{time} (no quotes). Either support wrapping a transformation function e.g. \code{~timeid(time)}, \code{qG(time)} etc.. See also Details on how \code{t} is processed.} \item{cols}{\emph{data.frame method}: Select columns to lag using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{fill}{value to insert when vectors are shifted. Default is \code{NA}. } \item{stubs}{logical. \code{TRUE} (default) will rename all lagged / leaded columns by adding a stub or prefix "L\code{n}." / "F\code{n}.".} \item{shift}{\emph{pseries / pdata.frame methods}: character. \code{"time"} performs a fully identified time-lag (if the index contains a time variable), whereas \code{"row"} performs a simple (group) lag, where observations are shifted based on the present order of rows (in each group). The latter is significantly faster, but requires time series / panels to be regularly spaced and sorted by time within each group.} \item{keep.ids}{\emph{data.frame / pdata.frame / grouped_df methods}: Logical. Drop all identifiers from the output (which includes all variables passed to \code{by} or \code{t} using formulas). \emph{Note}: For 'grouped_df' / 'pdata.frame' identifiers are dropped, but the \code{"groups"} / \code{"index"} attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ If a single integer is passed to \code{n}, and \code{g/by} and \code{t} are left empty, \code{flag/L/F} just returns \code{x} with all columns lagged / leaded by \code{n}. If \code{length(n)>1}, and \code{x} is an atomic vector (time series), \code{flag/L/F} returns a (time series) matrix with lags / leads computed in the same order as passed to \code{n}. If instead \code{x} is a matrix / data frame, a matrix / data frame with \code{ncol(x)*length(n)} columns is returned where columns are sorted first by variable and then by lag (so all lags computed on a variable are grouped together). \code{x} can be of any standard data type. With groups/panel-identifiers supplied to \code{g/by}, \code{flag/L/F} efficiently computes a panel-lag/lead by shifting the entire vector(s) but inserting \code{fill} elements in the right places. If \code{t} is left empty, the data needs to be ordered such that all values belonging to a group are consecutive and in the right order. It is not necessary that the groups themselves are alphabetically ordered. If a time-variable is supplied to \code{t} (or a list of time-variables uniquely identifying the time-dimension), the series / panel is fully identified and lags / leads can be securely computed even if the data is unordered / irregular. %It is also possible to lag unordered or irregular time series utilizing only the \code{t} argument to identify the temporal dimension of the data. % Since v1.5.0 \code{flag/L/F} provide full built-in support for irregular time series and unbalanced panels. The suggested workaround using the \code{\link{seqid}} function is therefore no longer necessary. %\code{flag/L/F} supports balanced panels and unbalanced panels where various individuals are observed for different time-sequences (both start, end and duration of observation can differ for each individual). \code{flag/L/F} does not natively support irregularly spaced time series and panels, that is situations where there are either gaps in time and/or repeated observations in the same time-period for some individual (see also computational details below). For such cases the function \code{\link{seqid}} can be used to generate an appropriate panel-identifier (i.e. splitting individuals with an irregular time-sequence into multiple individuals with regular time-sequences before applying \code{flag/L/F}). %(in that case data is shifted around and \code{fill} values are inserted in such a way that if the data were sorted afterwards the result would be identical to computing lags / leads on sorted data). Internally this works by using the grouping- and time-variable(s) to create an ordering and then accessing the panel-vector(s) through this ordering. If the data is just a bit unordered, such computations are nearly as fast as computations on ordered data (without \code{t}), however, if the data is very unordered, it can take significantly longer. Since most panel data come perfectly or pretty ordered, I recommend always supplying \code{t} to be on the safe-side. % It is also possible to compute lags / leads on unordered time series (thus utilizing \code{t} but leaving \code{g/by} empty), although this is probably more rare to encounter than unordered panels. Irregularly spaced time series can also be lagged using a panel- identifier generated with \code{\link{seqid}}. \bold{Note} that the \code{t} argument is processed as follows: If \code{is.factor(t) || (is.numeric(t) && !is.object(t))} (i.e. \code{t} is a factor or plain numeric vector), it is assumed to represent unit timesteps (e.g. a 'year' variable in a typical dataset), and thus coerced to integer using \code{as.integer(t)} and directly passed to C++ without further checks or transformations at the R-level. Otherwise, if \code{is.object(t) && is.numeric(unclass(t))} (i.e. \code{t} is a numeric time object, most likely 'Date' or 'POSIXct'), this object is passed through \code{\link{timeid}} before going to C++. Else (e.g. \code{t} is character), it is passed through \code{\link{qG}} which performs ordered grouping. If \code{t} is a list of multiple variables, it is passed through \code{\link{finteraction}}. You can customize this behavior by calling any of these functions (including \code{unclass/as.integer}) on your time variable beforehand. At the C++ level, if both \code{g/by} and \code{t} are supplied, \code{flag} works as follows: Use two initial passes to create an ordering through which the data are accessed. First-pass: Calculate minimum and maximum time-value for each individual. Second-pass: Generate an internal ordering vector (\code{o}) by placing the current element index into the vector slot obtained by adding the cumulative group size and the current time-value subtracted its individual-minimum together. This method of computation is faster than any sort-based method and delivers optimal performance if the panel-id supplied to \code{g/by} is already a factor variable, and if \code{t} is an integer/factor variable. For irregular time/panel series, \code{length(o) > length(x)}, and \code{o} represents the unobserved 'complete series'. If \code{length(o) > 1e7 && length(o) > 3*length(x)}, a warning is issued to make you aware of potential performance implications of the oversized ordering vector. %If \code{t} is not factor or integer but instead \code{is.double(t) && !is.object(t)}, it is assumed to be integer represented by double and converted using \code{as.integer(t)}. For other objects such as dates, \code{t} is grouped using \code{\link{qG}} or \code{\link{GRP}} (for multiple time identifiers). Similarly, if \code{g/by} is not factor or 'GRP' object, \code{\link{qG}} or \code{\link{GRP}} will be called to group the respective identifier. Since grouping is more expensive than computing lags, prepare the data for optimal performance (or use \emph{plm} classes). See also the Note. %A caveat of not using sort-based methods is that gaps or repeated values in time are only recognized towards the end of the second pass where they cannot be rectified anymore, and thus \code{flag/L/F} does not natively support irregular panels but throws an error. The 'indexed_series' ('pseries') and 'indexed_frame' ('pdata.frame') methods automatically utilize the identifiers attached to these objects, which are already factors, thus lagging is quite efficient. However, the internal ordering vector still needs to be computed, thus if data are known to be ordered and regularly spaced, using \code{shift = "row"} to toggle a simple group-lag (same as utilizing \code{g} but not \code{t} in other methods) can yield a significant performance gain. %and thus securely and efficiently compute fully identified panel-lags. If these objects have > 2 panel-identifiers attached to them, the last identifier is assumed to be the time-variable, and the others are taken as grouping-variables and interacted. Note that \code{flag/L/F} is significantly faster than \code{plm::lag/plm::lead} since the latter is written in R and based on a Split-Apply-Combine logic. } \value{ \code{x} lagged / leaded \code{n}-times, grouped by \code{g/by}, ordered by \code{t}. See Details and Examples. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %Since v1.7.0, if \code{is.double(t) && !is.object(t)}, it is coerced to integer using \code{as.integer(t)}. This is to avoid the inefficiency of ordered grouping, and owes to the fact that in most data imported into R, the time (year) variables are coded as double although they should be integer. % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{fdiff}}, \code{\link{fgrowth}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Time Series: AirPassengers L(AirPassengers) # 1 lag flag(AirPassengers) # Same L(AirPassengers, -1) # 1 lead head(L(AirPassengers, -1:3)) # 1 lead and 3 lags - output as matrix ## Time Series Matrix of 4 EU Stock Market Indicators, 1991-1998 tsp(EuStockMarkets) # Data is recorded on 260 days per year freq <- frequency(EuStockMarkets) plot(stl(EuStockMarkets[,"DAX"], freq)) # There is some obvious seasonality head(L(EuStockMarkets, -1:3 * freq)) # 1 annual lead and 3 annual lags summary(lm(DAX ~., data = L(EuStockMarkets,-1:3*freq))) # DAX regressed on its own annual lead, # lags and the lead/lags of the other series ## World Development Panel Data head(flag(wlddev, 1, wlddev$iso3c, wlddev$year)) # This lags all variables, head(L(wlddev, 1, ~iso3c, ~year)) # This lags all numeric variables head(L(wlddev, 1, ~iso3c)) # Without t: Works because data is ordered head(L(wlddev, 1, PCGDP + LIFEEX ~ iso3c, ~year)) # This lags GDP per Capita & Life Expectancy head(L(wlddev, 0:2, ~ iso3c, ~year, cols = 9:10)) # Same, also retaining original series head(L(wlddev, 1:2, PCGDP + LIFEEX ~ iso3c, ~year, # Two lags, dropping id columns keep.ids = FALSE)) # Regressing GDP on its's lags and life-Expectancy and its lags summary(lm(PCGDP ~ ., L(wlddev, 0:2, ~iso3c, ~year, 9:10, keep.ids = FALSE))) ## Indexing the data: facilitates time-based computations wldi <- findex_by(wlddev, iso3c, year) head(L(wldi, 0:2, cols = 9:10)) # Again 2 lags of GDP and LIFEEX head(L(wldi$PCGDP)) # Lagging an indexed series summary(lm(PCGDP ~ L(PCGDP,1:2) + L(LIFEEX,0:2), wldi)) # Running the lm again summary(lm(PCGDP ~ ., L(wldi, 0:2, 9:10, keep.ids = FALSE))) # Same thing ## Using grouped data: library(magrittr) wlddev |> fgroup_by(iso3c) |> fselect(PCGDP,LIFEEX) |> flag(0:2) wlddev |> fgroup_by(iso3c) |> fselect(year,PCGDP,LIFEEX) |> flag(0:2,year) # Also using t (safer) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} % __ONLY ONE__ keyword per line collapse/man/GRP.Rd0000644000176200001440000005400715202504365013556 0ustar liggesusers\name{GRP} \alias{GRP} \alias{GRP.GRP} \alias{GRP.default} \alias{GRP.factor} \alias{GRP.qG} \alias{GRP.pseries} \alias{GRP.pdata.frame} \alias{GRP.grouped_df} \alias{fgroup_by} \alias{gby} \alias{group_by_vars} \alias{fgroup_vars} \alias{fungroup} \alias{gsplit} \alias{greorder} \alias{is_GRP} \alias{length.GRP} \alias{print.GRP} \alias{plot.GRP} \alias{GRPnames} \alias{GRPid} \alias{GRPN} \alias{as_factor_GRP} \title{Fast Grouping / \emph{collapse} Grouping Objects} \description{ \code{GRP} performs fast, ordered and unordered, groupings of vectors and data frames (or lists of vectors) using \code{\link{radixorder}} or \code{\link{group}}. The output is a list-like object of class 'GRP' which can be printed, plotted and used as an efficient input to all of \emph{collapse}'s fast statistical and transformation functions and operators (see macros \code{.FAST_FUN} and \code{.OPERATOR_FUN}), as well as to \code{\link{collap}}, \code{\link{BY}} and \code{\link{TRA}}. \code{fgroup_by} is similar to \code{dplyr::group_by} but faster and class-agnostic. It creates a grouped data frame with a 'GRP' object attached - for fast dplyr-like programming with \emph{collapse}'s fast functions. There are also several conversion methods to and from 'GRP' objects. Notable among these is \code{GRP.grouped_df}, which returns a 'GRP' object from a grouped data frame created with \code{dplyr::group_by} or \code{fgroup_by}, and the duo \code{GRP.factor} and \code{as_factor_GRP}. \code{gsplit} efficiently splits a vector based on a 'GRP' object, and \code{greorder} helps to recombine the results. These are the workhorses behind functions like \code{\link{BY}}, and \code{\link{collap}}, \code{\link{fsummarise}} and \code{\link{fmutate}} when evaluated with base R and user-defined functions. } \usage{ GRP(X, \dots) \method{GRP}{default}(X, by = NULL, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", drop = TRUE, call = TRUE, \dots) \method{GRP}{factor}(X, \dots, group.sizes = TRUE, drop = FALSE, return.groups = TRUE, call = TRUE) \method{GRP}{qG}(X, \dots, group.sizes = TRUE, return.groups = TRUE, call = TRUE) \method{GRP}{pseries}(X, effect = 1L, \dots, group.sizes = TRUE, return.groups = TRUE, call = TRUE) \method{GRP}{pdata.frame}(X, effect = 1L, \dots, group.sizes = TRUE, return.groups = TRUE, call = TRUE) \method{GRP}{grouped_df}(X, \dots, return.groups = TRUE, call = TRUE) # Identify 'GRP' objects is_GRP(x) \method{length}{GRP}(x) # Length of data being grouped GRPN(x, expand = TRUE, \dots) # Group sizes (default: expanded to match data length) GRPid(x, sort = FALSE, \dots) # Group id (data length, same as GRP(.)$group.id) GRPnames(x, force.char = TRUE, sep = ".") # Group names as_factor_GRP(x, ordered = FALSE, sep = ".") # 'GRP'-object to (ordered) factor conversion # Efficiently split a vector using a 'GRP' object gsplit(x, g, use.g.names = FALSE, \dots) # Efficiently reorder y = unlist(gsplit(x, g)) such that identical(greorder(y, g), x) greorder(x, g, \dots) # Fast, class-agnostic pendant to dplyr::group_by for use with fast functions, see details fgroup_by(.X, \dots, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", .drop = TRUE) # Standard-evaluation analogue (slim wrapper around GRP.default(), for programming) group_by_vars(X, by = NULL, ...) # Shorthand for fgroup_by gby(.X, \dots, sort = .op[["sort"]], decreasing = FALSE, na.last = TRUE, return.groups = TRUE, return.order = sort, method = "auto", .drop = TRUE) # Get grouping columns from a grouped data frame created with dplyr::group_by or fgroup_by fgroup_vars(X, return = "data") # Ungroup grouped data frame created with dplyr::group_by or fgroup_by fungroup(X, \dots) \method{print}{GRP}(x, n = 6, \dots) \method{plot}{GRP}(x, breaks = "auto", type = "l", horizontal = FALSE, \dots) } \arguments{ \item{X}{a vector, list of columns or data frame (default method), or a suitable object (conversion / extractor methods).} \item{.X}{a data frame or list.} \item{x, g}{a 'GRP' object. For \code{gsplit/greorder}, \code{x} can be a vector of any type, or \code{NULL} to return the integer indices of the groups. \code{gsplit/greorder/GRPN/GRPid} also support vectors or data frames to be passed to \code{g/x}.} \item{by}{if \code{X} is a data frame or list, \code{by} can indicate columns to use for the grouping (by default all columns are used). Columns must be passed using a vector of column names, indices, a one-sided formula i.e. \code{~ col1 + col2}, a logical vector (converted to indices) or a selector function e.g. \code{is_categorical}.} \item{sort}{logical. If \code{FALSE}, groups are not ordered but simply grouped in the order of first appearance of unique elements / rows. This often provides a performance gain if the data was not sorted beforehand. See also \code{method}.} \item{ordered}{logical. \code{TRUE} adds a class 'ordered' i.e. generates an ordered factor.} \item{decreasing}{logical. Should the sort order be increasing or decreasing? Can be a vector of length equal to the number of arguments in \code{X} / \code{by} (argument passed to \code{\link{radixorder}}).} \item{na.last}{logical. If missing values are encountered in grouping vector/columns, assign them to the last group (argument passed to \code{\link{radixorder}}).} \item{return.groups}{logical. Include the unique groups in the created GRP object.} \item{return.order}{logical. If \code{sort = TRUE}, include the output from \code{\link{radixorder}} in the created GRP object. This brings performance improvements in \code{gsplit} (and thus also benefits grouped execution of base R functions). } \item{method}{character. The algorithm to use for grouping: either \code{"radix"}, \code{"hash"} or \code{"auto"}. \code{"auto"} will chose \code{"radix"} when \code{sort = TRUE}, yielding ordered grouping via \code{\link{radixorder}}, and \code{"hash"}-based grouping in first-appearance order via \code{\link{group}} otherwise. It is possibly to put \code{method = "radix"} and \code{sort = FALSE}, which will group character data in first appearance order but sort numeric data (a good hybrid option). \code{method = "hash"} currently does not support any sorting, thus putting \code{sort = TRUE} will simply be ignored.} \item{group.sizes}{logical. \code{TRUE} tabulates factor levels using \code{\link{tabulate}} to create a vector of group sizes; \code{FALSE} leaves that slot empty when converting from factors.} \item{drop, .drop}{logical. In \code{GRP.factor}: \code{TRUE} efficiently drops unused factor levels beforehand using \code{\link{fdroplevels}}. In \code{GRP.default}/\code{fgroup_by}/\code{gby}: if \code{FALSE} and any of the grouping columns are factors, the full Cartesian product of factor levels (combined with the observed unique values of any non-factor grouping columns) is used to define the groups. Group combinations not present in the data have \code{group.sizes} of \code{0} and \code{group.starts} of \code{0L}. This is analogous to \code{dplyr::group_by(.drop = FALSE)} and useful for aggregations or counts that should retain empty groups (e.g. \code{fcount(..., drop = FALSE)} or \code{collap(..., drop = FALSE)}). The default is \code{TRUE} (only observed combinations are kept). Only applies to the default method - has no effect for grouped data frames or factor/qG/pseries/pdata.frame inputs.} \item{call}{logical. \code{TRUE} calls \code{\link{match.call}} and saves it in the final slot of the GRP object.} \item{expand}{logical. \code{TRUE} returns a vector the same length as the data. \code{FALSE} returns the group sizes (computed in first-appearance-order of groups if \code{x} is not already a 'GRP' object). } \item{force.char}{logical. Always output group names as character vector, even if a single numeric vector was passed to \code{GRP.default}.} \item{sep}{character. The separator passed to \code{\link{paste}} when creating group names from multiple grouping variables by pasting them together.} \item{effect}{\emph{plm} / indexed data methods: Select which panel identifier should be used as grouping variable. 1L takes the first variable in the \link[=findex]{index}, 2L the second etc., identifiers can also be passed as a character string. More than one variable can be supplied. } \item{return}{an integer or string specifying what \code{fgroup_vars} should return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "data" \tab\tab full grouping columns (default) \cr 2 \tab\tab "unique" \tab\tab unique rows of grouping columns \cr 3 \tab\tab "names" \tab\tab names of grouping columns \cr 4 \tab\tab "indices" \tab\tab integer indices of grouping columns \cr 5 \tab\tab "named_indices" \tab\tab named integer indices of grouping columns \cr 6 \tab\tab "logical" \tab\tab logical selection vector of grouping columns \cr 7 \tab\tab "named_logical" \tab\tab named logical selection vector of grouping columns \cr } } \item{use.g.names}{logical. \code{TRUE} returns a named list, like \code{\link{split}}. \code{FALSE} is slightly more efficient. } % \item{untibble}{logical. \code{TRUE} also removes classes \code{c("tbl_df", "tbl")} from \code{X}. \code{fgroup_by} attaches an attribute \code{"was.tibble"} indicating if \code{X} was a tibble prior to grouping. The argument thus defaults to \code{TRUE} if this attribute is attached and equal to \code{FALSE}, so that \code{identical(X, X |> fgroup_by(...) |> fungroup())}, regardless of the nature of \code{X}. } \item{n}{integer. Number of groups to print out.} \item{breaks}{integer. Number of breaks in the histogram of group-sizes.} \item{type}{linetype for plot.} \item{horizontal}{logical. \code{TRUE} arranges plots next to each other, instead of above each other. \emph{Note} that the size of each group is only plotted for objects with less than 10,000 groups.} \item{\dots}{for \code{fgroup_by}: unquoted comma-separated column names, sequences of columns, expressions involving columns, and column names, indices, logical vectors or selector functions. See Examples. For \code{group_by_vars}, \code{gsplit}, \code{greorder}, \code{GRPN} and \code{GRPid}: further arguments passed to \code{GRP} (if \code{g/x} is not already a 'GRP' object). For example the \code{by} argument could be used if a data frame is passed.} } \details{ \code{GRP} is a central function in the \emph{collapse} package because it provides, in the form of integer vectors, some key pieces of information to efficiently perform grouped operations at the \code{C/C++} level. Most statistical function require information about (1) the number of groups (2) an integer group-id indicating which values / rows belong to which group and (3) information about the size of each group. Provided with these, \emph{collapse}'s \link[=fast-statistical-functions]{Fast Statistical Functions} pre-allocate intermediate and result vectors of the right sizes and (in most cases) perform grouped statistical computations in a single pass through the data. The sorting functionality of \code{GRP.default} lets groups receive different integer-id's depending on whether the groups are sorted \code{sort = TRUE} (\code{FALSE} gives first-appearance order), and in which order (argument \code{decreasing}). This affects the order of values/rows in the output whenever an aggregation is performed. % \emph{Note} that \code{sort = FALSE} is only effective on character vectors, numeric grouping vectors will always produce ordered groupings. %This in-turn changes the order of values/rows in the output of \emph{collapse} functions (the row/value corresponding to group 1 always comes out on top). The default setting with \code{sort = TRUE} and \code{decreasing = FALSE} results in groups being sorted in ascending order. This is equivalent to performing grouped operations in \emph{data.table} using \code{keyby}, whereas \code{sort = FALSE} is equivalent to \emph{data.table} grouping with \code{by}, however this only works if the \code{by} columns are character, numeric grouping columns are always ordered. Other elements in the object provide information about whether the data was sorted by the variables defining the grouping (6) and the ordering vector (7). These also feed into optimizations in \code{gsplit/greorder} that benefit the execution of base R functions across groups. Complimentary to \code{GRP}, the function \code{fgroup_by} is a significantly faster and class-agnostic alternative to \code{dplyr::group_by} for programming with \emph{collapse}. It creates a grouped data frame with a 'GRP' object attached in a \code{"groups"} attribute. This data frame has classes 'GRP_df', \dots, 'grouped_df' and 'data.frame', where \dots stands for any other classes the input frame inherits such as 'data.table', 'sf', 'tbl_df', 'indexed_frame' etc.. \emph{collapse} functions with a 'grouped_df' method respond to 'grouped_df' objects created with either \code{fgroup_by} or \code{dplyr::group_by}. The method \code{GRP.grouped_df} takes the \code{"groups"} attribute from a 'grouped_df' and converts it to a 'GRP' object if created with \code{dplyr::group_by}. %If the grouped data frame was generated using \code{fgroup_by}, all work is done already. If it was created using \code{dplyr::group_by}, a C routine is called to efficiently convert the grouping object. The 'GRP_df' class in front responds to \code{print.GRP_df} which first calls \code{print(fungroup(x), ...)} and prints one line below the object indicating the grouping variables, followed, in square brackets, by some statistics on the group sizes: \code{[N | Mean (SD) Min-Max]}. The mean is rounded to a full number and the standard deviation (SD) to one digit. Minimum and maximum are only displayed if the SD is non-zero. There also exist a method \code{[.GRP_df} which calls \code{\link{NextMethod}} but makes sure that the grouping information is preserved or dropped depending on the dimensions of the result (subsetting rows or aggregation with \emph{data.table} drops the grouping object). %Note that \code{fgroup_by} can only be used in combination with \emph{collapse} functions, not with \code{dplyr::summarize} or \code{dplyr::mutate} (the grouping object and method of computing results is different). The converse is not true, you can group data with \code{dplyr::group_by} and then apply \emph{collapse} functions. \code{fgroup_by} is class-agnostic, i.e. the classes of the data frame or list passed are preserved, and all standard methods (like subsetting with \code{`[`} or \code{print} methods) apply to the grouped object. % Apart from the class 'grouped_df' which is added behind any classes the object might inherit (apart from 'data.frame'), a class 'GRP_df' is added in front. This class responds to a \code{print} method. Both first call the corresponding method for the object and then print / attach the grouping information. \code{GRP.default} supports vector and list input and will also return 'GRP' objects if passed. There is also a hidden method \code{GRP.GRP} which simply returns grouping objects (no re-grouping functionality is offered). Apart from \code{GRP.grouped_df} there are several further conversion methods: The conversion of factors to 'GRP' objects by \code{GRP.factor} involves obtaining the number of groups calling \code{ng <- fnlevels(f)} and then computing the count of each level using \code{\link[=tabulate]{tabulate(f, ng)}}. The integer group-id (2) is already given by the factor itself after removing the levels and class attributes and replacing any missing values with \code{ng + 1L}. The levels are put in a list and moved to position (4) in the 'GRP' object, which is reserved for the unique groups. Finally, a sortedness check \code{!is.unsorted(id)} is run on the group-id to check if the data represented by the factor was sorted (6). \code{GRP.qG} works similarly (see also \code{\link{qG}}), and the 'pseries' and 'pdata.frame' methods simply group one or more factors in the \link[=indexing]{index} (selected using the \code{effect} argument) . Creating a factor from a 'GRP' object using \code{as_factor_GRP} does not involve any computations, but may involve interacting multiple grouping columns using the \code{paste} function to produce unique factor levels. % or \code{\link{as.character}} conversions if the grouping column(s) were numeric (which are potentially expensive). %\emph{Note}: For faster factor generation and a factor-light class 'qG' which avoids the coercion of factor levels to character also see \code{\link{qF}} and \code{\link{qG}}. } \value{ A list-like object of class `GRP' containing information about the number of groups, the observations (rows) belonging to each group, the size of each group, the unique group names / definitions, whether the groups are ordered and data grouped is sorted or not, the ordering vector used to perform the ordering and the group start positions. The object is structured as follows: \tabular{lllllll}{\emph{ List-index } \tab\tab \emph{ Element-name } \tab\tab \emph{ Content type } \tab\tab \emph{ Content description} \cr \tab\tab\tab\tab\tab\tab \cr [[1]] \tab\tab N.groups \tab\tab \code{integer(1)} \tab\tab Number of Groups \cr \tab\tab\tab\tab\tab\tab \cr [[2]] \tab\tab group.id \tab\tab \code{integer(NROW(X))} \tab\tab An integer group-identifier \cr \tab\tab\tab\tab\tab\tab \cr [[3]] \tab\tab group.sizes \tab\tab \code{integer(N.groups)} \tab\tab Vector of group sizes \cr \tab\tab\tab\tab\tab\tab \cr [[4]] \tab\tab groups \tab\tab \code{unique(X)} or \code{NULL} \tab\tab Unique groups (same format as input, except for \code{fgroup_by} which uses a plain list, sorted if \code{sort = TRUE}), or \code{NULL} if \code{return.groups = FALSE} \cr \tab\tab\tab\tab\tab\tab \cr [[5]] \tab\tab group.vars \tab\tab \code{character} \tab\tab The names of the grouping variables \cr \tab\tab\tab\tab\tab\tab \cr [[6]] \tab\tab ordered \tab\tab \code{logical(2)} \tab\tab \code{[1]} Whether the groups are ordered: equal to the \code{sort} argument in the default method, or \code{TRUE} if converted objects inherit a class \code{"ordered"} and \code{NA} otherwise, \code{[2]} Whether the data (\code{X}) is already sorted: the result of \code{!is.unsorted(group.id)}. If \code{sort = FALSE} (default method) the second entry is \code{NA}. \cr \tab\tab\tab\tab\tab\tab \cr [[7]] \tab\tab order \tab\tab \code{integer(NROW(X))} or \code{NULL} \tab\tab Ordering vector from \code{radixorder} (with \code{"starts"} attribute), or \code{NULL} if \code{return.order = FALSE} \cr \tab\tab\tab\tab\tab\tab \cr [[8]] \tab\tab group.starts \tab\tab \code{integer(N.groups)} or \code{NULL} \tab\tab The first-occurrence positions/rows of the groups. Useful e.g. with \code{ffirst(x, g, na.rm = FALSE)}. \code{NULL} if \code{return.groups = FALSE}. \cr \tab\tab\tab\tab\tab\tab \cr [[9]] \tab\tab call \tab\tab \code{match.call()} or \code{NULL} \tab\tab The \code{GRP()} call, obtained from \code{match.call()}, or \code{NULL} if \code{call = FALSE} } } \seealso{ \code{\link{radixorder}}, \code{\link{group}}, \code{\link{qF}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default method GRP(mtcars$cyl) GRP(mtcars, ~ cyl + vs + am) # Or GRP(mtcars, c("cyl","vs","am")) or GRP(mtcars, c(2,8:9)) g <- GRP(mtcars, ~ cyl + vs + am) # Saving the object print(g) # Printing it plot(g) # Plotting it GRPnames(g) # Retain group names GRPid(g) # Retain group id (same as g$group.id), useful inside fmutate() fsum(mtcars, g) # Compute the sum of mtcars, grouped by variables cyl, vs and am gsplit(mtcars$mpg, g) # Use the object to split a vector gsplit(NULL, g) # The indices of the groups identical(mtcars$mpg, # greorder and unlist undo the effect of gsplit greorder(unlist(gsplit(mtcars$mpg, g)), g)) ## Convert factor to GRP object and vice-versa GRP(iris$Species) as_factor_GRP(g) \donttest{ % The tidyverse regularly causes havoc to CRAN tests in other packages, therefore this is not tested ## dplyr integration library(dplyr) mtcars |> group_by(cyl,vs,am) |> GRP() # Get GRP object from a dplyr grouped tibble mtcars |> group_by(cyl,vs,am) |> fmean() # Grouped mean using dplyr grouping mtcars |> fgroup_by(cyl,vs,am) |> fmean() # Faster alternative with collapse grouping mtcars |> fgroup_by(cyl,vs,am) # Print method for grouped data frame ## Adding a column of group sizes. mtcars |> fgroup_by(cyl,vs,am) |> fsummarise(Sizes = GRPN()) # Note: can also set_collapse(mask = "n") to use n() instead, see help("collapse-options") # Other usage modes: mtcars |> fgroup_by(cyl,vs,am) |> fmutate(Sizes = GRPN()) mtcars |> fmutate(Sizes = GRPN(list(cyl,vs,am))) # Same thing, slightly more efficient ## Various options for programming and interactive use fgroup_by(GGDC10S, Variable, Decade = floor(Year / 10) * 10) |> head(3) fgroup_by(GGDC10S, 1:3, 5) |> head(3) fgroup_by(GGDC10S, c("Variable", "Country")) |> head(3) fgroup_by(GGDC10S, is.character) |> head(3) fgroup_by(GGDC10S, Country:Variable, Year) |> head(3) fgroup_by(GGDC10S, Country:Region, Var = Variable, Year) |> head(3) ## Note that you can create a grouped data frame without materializing the unique grouping columns fgroup_by(GGDC10S, Variable, Country, return.groups = FALSE) |> fmutate(across(AGR:SUM, fscale)) fgroup_by(GGDC10S, Variable, Country, return.groups = FALSE) |> fselect(AGR:SUM) |> fmean() ## Note also that setting sort = FALSE on unsorted data can be much faster... if not required... library(microbenchmark) microbenchmark(gby(GGDC10S, Variable, Country), gby(GGDC10S, Variable, Country, sort = FALSE)) } } \keyword{manip} collapse/man/fnth_fmedian.Rd0000644000176200001440000003455514777170130015563 0ustar liggesusers\name{fnth-fmedian} \alias{fnth} \alias{fnth.default} \alias{fnth.matrix} \alias{fnth.data.frame} \alias{fnth.grouped_df} \alias{fmedian} \alias{fmedian.default} \alias{fmedian.matrix} \alias{fmedian.data.frame} \alias{fmedian.grouped_df} \title{ Fast (Grouped, Weighted) N'th Element/Quantile for Matrix-Like Objects } \description{ \code{fnth} (column-wise) returns the n'th smallest element from a set of unsorted elements \code{x} corresponding to an integer index (\code{n}), or to a probability between 0 and 1. If \code{n} is passed as a probability, ties can be resolved using the lower, upper, or average of the possible elements, or (default) continuous quantile estimation. For \code{n > 1}, the lower element is always returned (as in \code{sort(x, partial = n)[n]}). See Details. \code{fmedian} is a simple wrapper around \code{fnth}, which fixes \code{n = 0.5} and (default) \code{ties = "mean"}, i.e., it averages eligible elements. See Details. %Users may prefer a quantile based definition of the weighted median. } \usage{ fnth(x, n = 0.5, \dots) fmedian(x, \dots) \method{fnth}{default}(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ties = "q7", nthreads = .op[["nthreads"]], o = NULL, check.o = is.null(attr(o, "sorted")), \dots) \method{fmedian}{default}(x, \dots, ties = "mean") \method{fnth}{matrix}(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "q7", nthreads = .op[["nthreads"]], \dots) \method{fmedian}{matrix}(x, \dots, ties = "mean") \method{fnth}{data.frame}(x, n = 0.5, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "q7", nthreads = .op[["nthreads"]], \dots) \method{fmedian}{data.frame}(x, \dots, ties = "mean") \method{fnth}{grouped_df}(x, n = 0.5, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ties = "q7", nthreads = .op[["nthreads"]], \dots) \method{fmedian}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ties = "mean", nthreads = .op[["nthreads"]], \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{n}{the element to return using a single integer index such that \code{1 < n < NROW(x)}, or a probability \code{0 < n < 1}. See Details. } \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values only where \code{x} is also missing.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE} a \code{NA} is returned when encountered.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{ties}{an integer or character string specifying the method to resolve ties between adjacent qualifying elements: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "mean" \tab\tab take the arithmetic mean of all qualifying elements. \cr 2 \tab\tab "min" \tab\tab take the smallest of the elements. \cr 3 \tab\tab "max" \tab\tab take the largest of the elements. \cr 4-9 \tab\tab "qn" \tab\tab continuous quantile types 4-9, see \code{\link{fquantile}}. \cr } } \item{nthreads}{integer. The number of threads to utilize. Parallelism is across groups for grouped computations on vectors and data frames, and at the column-level otherwise. See Details. } \item{o}{integer. A valid ordering of \code{x}, e.g. \code{radixorder(x)}. With groups, the grouping needs to be accounted e.g. \code{radixorder(g, x)}.} \item{check.o}{logical. \code{TRUE} checks that each element of \code{o} is within \code{[1, length(x)]}. The default uses the fact that orderings from \code{\link{radixorder}} have a \code{"sorted"} attribute which let's \code{fnth} infer that the ordering is valid. The length and data type of \code{o} is always checked, regardless of \code{check.o}.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain \code{sum} of weighting variable after computation (if contained in \code{grouped_df}).} \item{stub}{character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the summed weights column is prefixed by \code{"sum."}. Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.} \item{\dots}{for \code{fmedian}: further arguments passed to \code{fnth} (apart from \code{n}). If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ \code{fnth} uses a combination of quickselect, quicksort, and radixsort algorithms, combined with several (weighted) quantile estimation methods and, where possible, OpenMP multithreading: \itemize{ \item without weights, quickselect is used to determine a (lower) order statistic. If \code{ties \%!in\% c("min", "max")} a second order statistic is found by taking the max of the upper part of the partitioned array, and the two statistics are averaged using a simple mean (\code{ties = "mean"}), or weighted average according to a \code{\link{quantile}} method (\code{ties = "q4"-"q9"}). For \code{n = 0.5}, all supported quantile methods give the sample median. With matrices, multithreading is always across columns, for vectors and data frames it is across groups unless \code{is.null(g)} for data frames. \item with weights and no groups (\code{is.null(g)}), \code{\link{radixorder}} is called internally (on each column of \code{x}). The ordering is used to sum the weights in order of \code{x} and determine weighted order statistics or quantiles. See details below. Multithreading is disabled as \code{\link{radixorder}} cannot be called concurrently on the same memory stack. \item with weights and groups (\code{!is.null(g)}), R's quicksort algorithm is used to sort the data in each group and return an index which can be used to sum the weights in order and proceed as before. This is multithreaded across columns for matrices, and across groups otherwise. \item in \code{fnth.default}, an ordering of \code{x} can be supplied to '\code{o}' e.g. \code{fnth(x, 0.75, o = radixorder(x))}. This dramatically speeds up the estimation both with and without weights, and is useful if \code{fnth} is to be invoked repeatedly on the same data. With groups, \code{o} needs to also account for the grouping e.g. \code{fnth(x, 0.75, g, o = radixorder(g, x))}. Multithreading is possible across groups. See Examples. } %This is an R port to \code{std::nth_element}, an efficient partial sorting algorithm in C++. It is also used to calculated the median (in fact the default \code{fnth(x, n = 0.5)} is identical to \code{fmedian(x)}, so see also the details for \code{\link{fmedian}}). % \code{fnth} generalizes the principles of median value calculation to find arbitrary elements. It offers considerable flexibility by providing both simple order statistics and simple discontinuous quantile estimation. Regarding the former, setting \code{n} to an index between 1 and \code{NROW(x)} will return the n'th smallest element of \code{x}, about 2x faster than \code{sort(x, partial = n)[n]}. As to the latter, setting \code{n} to a probability between 0 and 1 will return the corresponding element of \code{x}, and resolve ties between multiple qualifying elements (such as when \code{n = 0.5} and \code{x} is even) using the arithmetic average \code{ties = "mean"}, or the smallest \code{ties = "min"} or largest \code{ties = "max"} of those elements. If \code{n > 1}, the result is equivalent to (column-wise) \code{sort(x, partial = n)[n]}. Internally, \code{n} is converted to a probability using \code{p = (n-1)/(NROW(x)-1)}, and that probability is applied to the set of non-missing elements to find the \code{as.integer(p*(fnobs(x)-1))+1L}'th element (which corresponds to option \code{ties = "min"}). % Note that it is necessary to subtract and add 1 so that \code{n = 1} corresponds to \code{p = 0} and \code{n = NROW(x)} to \code{p = 1}. %So if \code{n > 1} is used in the presence of missing values, and the default \code{ties = "mean"} is enabled, the resulting element could be the average of two elements. When using grouped computations with \code{n > 1}, \code{n} is transformed to a probability \code{p = (n-1)/(NROW(x)/ng-1)} (where \code{ng} contains the number of unique groups in \code{g}). If weights are used and \code{ties = "q4"-"q9"}, weighted continuous quantile estimation is done as described in \code{\link{fquantile}}. For \code{ties \%in\% c("mean", "min", "max")}, a target partial sum of weights \code{p*sum(w)} is calculated, and the weighted n'th element is the element k such that all elements smaller than k have a sum of weights \code{<= p*sum(w)}, and all elements larger than k have a sum of weights \code{<= (1 - p)*sum(w)}. If the partial-sum of weights (\code{p*sum(w)}) is reached exactly for some element k, then (summing from the lower end) both k and k+1 would qualify as the weighted n'th element. If the weight of element k+1 is zero, k, k+1 and k+2 would qualify... . If \code{n > 1}, k is chosen (consistent with the unweighted behavior). %(ensuring that \code{fnth(x, n)}) and \code{fnth(x, n, w = rep(1, NROW(x)))}, always provide the same outcome) If \code{0 < n < 1}, the \code{ties} option regulates how to resolve such conflicts, yielding lower (\code{ties = "min"}: k), upper (\code{ties = "max"}: k+2) or average weighted (\code{ties = "mean"}: mean(k, k+1, k+2)) n'th elements. Thus, in the presence of zero weights, the weighted median (default \code{ties = "mean"}) can be an arithmetic average of >2 qualifying elements. For data frames, column-attributes and overall attributes are preserved if \code{g} is used or \code{drop = FALSE}. } \value{ The (\code{w} weighted) n'th element/quantile of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped, weighted) n'th element/quantile. } \seealso{ \code{\link{fquantile}}, \code{\link{fmean}}, \code{\link{fmode}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method mpg <- mtcars$mpg fnth(mpg) # Simple nth element: Median (same as fmedian(mpg)) fnth(mpg, 5) # 5th smallest element sort(mpg, partial = 5)[5] # Same using base R, fnth is 2x faster. fnth(mpg, 0.75) # Third quartile fnth(mpg, 0.75, w = mtcars$hp) # Weighted third quartile: Weighted by hp fnth(mpg, 0.75, TRA = "-") # Simple transformation: Subtract third quartile fnth(mpg, 0.75, mtcars$cyl) # Grouped third quartile fnth(mpg, 0.75, mtcars[c(2,8:9)]) # More groups.. g <- GRP(mtcars, ~ cyl + vs + am) # Precomputing groups gives more speed ! fnth(mpg, 0.75, g) fnth(mpg, 0.75, g, mtcars$hp) # Grouped weighted third quartile fnth(mpg, 0.75, g, TRA = "-") # Groupwise subtract third quartile fnth(mpg, 0.75, g, mtcars$hp, "-") # Groupwise subtract weighted third quartile ## data.frame method fnth(mtcars, 0.75) head(fnth(mtcars, 0.75, TRA = "-")) fnth(mtcars, 0.75, g) fnth(fgroup_by(mtcars, cyl, vs, am), 0.75) # Another way of doing it.. fnth(mtcars, 0.75, g, use.g.names = FALSE) # No row-names generated ## matrix method m <- qM(mtcars) fnth(m, 0.75) head(fnth(m, 0.75, TRA = "-")) fnth(m, 0.75, g) # etc.. ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fnth(0.75) mtcars |> fgroup_by(cyl,vs,am) |> fnth(0.75, hp) # Weighted mtcars |> fgroup_by(cyl,vs,am) |> fnth(0.75, TRA = "/") # Divide by third quartile mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg, hp) |> # Faster selecting fnth(0.75, hp, "/") # Divide mpg by its third weighted group-quartile, using hp as weights # Efficient grouped estimation of multiple quantiles mtcars |> fgroup_by(cyl,vs,am) |> fmutate(o = radixorder(GRPid(), mpg)) |> fsummarise(mpg_Q1 = fnth(mpg, 0.25, o = o), mpg_median = fmedian(mpg, o = o), mpg_Q3 = fnth(mpg, 0.75, o = o)) ## fmedian() fmedian(mpg) # Simple median value fmedian(mpg, w = mtcars$hp) # Weighted median: Weighted by hp fmedian(mpg, TRA = "-") # Simple transformation: Subtract median value fmedian(mpg, mtcars$cyl) # Grouped median value fmedian(mpg, mtcars[c(2,8:9)]) # More groups.. fmedian(mpg, g) fmedian(mpg, g, mtcars$hp) # Grouped weighted median fmedian(mpg, g, TRA = "-") # Groupwise subtract median value fmedian(mpg, g, mtcars$hp, "-") # Groupwise subtract weighted median value ## data.frame method fmedian(mtcars) head(fmedian(mtcars, TRA = "-")) fmedian(mtcars, g) fmedian(fgroup_by(mtcars, cyl, vs, am)) # Another way of doing it.. fmedian(mtcars, g, use.g.names = FALSE) # No row-names generated ## matrix method fmedian(m) head(fmedian(m, TRA = "-")) fmedian(m, g) # etc.. ## method for grouped data frames - created with dplyr::group_by or fgroup_by mtcars |> fgroup_by(cyl,vs,am) |> fmedian() mtcars |> fgroup_by(cyl,vs,am) |> fmedian(hp) # Weighted mtcars |> fgroup_by(cyl,vs,am) |> fmedian(TRA = "-") # De-median mtcars |> fgroup_by(cyl,vs,am) |> fselect(mpg, hp) |> # Faster selecting fmedian(hp, "-") # Weighted de-median mpg, using hp as weights } \keyword{univar} \keyword{manip} collapse/man/fsummarise.Rd0000644000176200001440000001355614777170130015312 0ustar liggesusers\name{fsummarise} \alias{fsummarise} \alias{fsummarize} \alias{smr} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Summarise } \description{ \code{fsummarise} is a much faster version of \code{dplyr::summarise}, when used together with the \link[=fast-statistical-functions]{Fast Statistical Functions}. \code{fsummarize} and \code{fsummarise} are synonyms. } \usage{ fsummarise(.data, ..., keep.group_vars = TRUE, .cols = NULL) fsummarize(.data, ..., keep.group_vars = TRUE, .cols = NULL) smr(.data, ..., keep.group_vars = TRUE, .cols = NULL) # Shorthand } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.data}{ a (grouped) data frame or named list of columns. Grouped data can be created with \code{\link{fgroup_by}} or \code{dplyr::group_by}. } \item{\dots}{ name-value pairs of summary functions, \code{\link{across}} statements, or arbitrary expressions resulting in a list. See Examples. For fast performance use the \link[=fast-statistical-functions]{Fast Statistical Functions}. % The name will be the name of the variable in the result. Functions when applied to a vector need to return a scalar. } \item{keep.group_vars}{ logical. \code{FALSE} removes grouping variables after computation. } \item{.cols}{ for expressions involving \code{.data}, \code{.cols} can be used to subset columns, e.g. \code{mtcars |> gby(cyl) |> smr(mctl(cor(.data), TRUE), .cols = 5:7)}. Can pass column names, indices, a logical vector or a selector function (e.g. \code{is.numericr}). } } \value{ If \code{.data} is grouped by \code{\link{fgroup_by}} or \code{dplyr::group_by}, the result is a data frame of the same class and attributes with rows reduced to the number of groups. If \code{.data} is not grouped, the result is a data frame of the same class and attributes with 1 row. } \note{ Since v1.7, \code{fsummarise} is fully featured, allowing expressions using functions and columns of the data as well as external scalar values (just like \code{dplyr::summarise}). \bold{NOTE} however that once a \link[=fast-statistical-functions]{Fast Statistical Function} is used, the execution will be vectorized instead of split-apply-combine computing over groups. Please see the first Example. } \seealso{ \code{\link{across}}, \code{\link{collap}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Since v1.7, fsummarise supports arbitrary expressions, and expressions ## containing fast statistical functions receive vectorized execution: # (a) This is an expression using base R functions which is executed by groups mtcars |> fgroup_by(cyl) |> fsummarise(res = mean(mpg) + min(qsec)) # (b) Here, the use of fmean causes the whole expression to be executed # in a vectorized way i.e. the expression is translated to something like # fmean(mpg, g = cyl) + min(mpg) and executed, thus the result is different # from (a), because the minimum is calculated over the entire sample mtcars |> fgroup_by(cyl) |> fsummarise(mpg = fmean(mpg) + min(qsec)) # (c) For fully vectorized execution, use fmin. This yields the same as (a) mtcars |> fgroup_by(cyl) |> fsummarise(mpg = fmean(mpg) + fmin(qsec)) # More advanced use: vectorized grouped regression slopes: mpg ~ carb mtcars |> fgroup_by(cyl) |> fmutate(dm_carb = fwithin(carb)) |> fsummarise(beta = fsum(mpg, dm_carb) \%/=\% fsum(dm_carb^2)) # In across() statements it is fine to mix different functions, each will # be executed on its own terms (i.e. vectorized for fmean and standard for sum) mtcars |> fgroup_by(cyl) |> fsummarise(across(mpg:hp, list(fmean, sum))) # Note that this still detects fmean as a fast function, the names of the list # are irrelevant, but the function name must be typed or passed as a character vector, # Otherwise functions will be executed by groups e.g. function(x) fmean(x) won't vectorize mtcars |> fgroup_by(cyl) |> fsummarise(across(mpg:hp, list(mu = fmean, sum = sum))) # We can force none-vectorized execution by setting .apply = TRUE mtcars |> fgroup_by(cyl) |> fsummarise(across(mpg:hp, list(mu = fmean, sum = sum), .apply = TRUE)) # Another argument of across(): Order the result first by function, then by column mtcars |> fgroup_by(cyl) |> fsummarise(across(mpg:hp, list(mu = fmean, sum = sum), .transpose = FALSE)) # Since v1.9.0, can also evaluate arbitrary expressions mtcars |> fgroup_by(cyl, vs, am) |> fsummarise(mctl(cor(cbind(mpg, wt, carb)), names = TRUE)) # This can also be achieved using across(): corfun <- function(x) mctl(cor(x), names = TRUE) mtcars |> fgroup_by(cyl, vs, am) |> fsummarise(across(c(mpg, wt, carb), corfun, .apply = FALSE)) #---------------------------------------------------------------------------- # Examples that also work for pre 1.7 versions # Simple use fsummarise(mtcars, mean_mpg = fmean(mpg), sd_mpg = fsd(mpg)) # Using base functions (not a big difference without groups) fsummarise(mtcars, mean_mpg = mean(mpg), sd_mpg = sd(mpg)) # Grouped use mtcars |> fgroup_by(cyl) |> fsummarise(mean_mpg = fmean(mpg), sd_mpg = fsd(mpg)) # This is still efficient but quite a bit slower on large data (many groups) mtcars |> fgroup_by(cyl) |> fsummarise(mean_mpg = mean(mpg), sd_mpg = sd(mpg)) # Weighted aggregation mtcars |> fgroup_by(cyl) |> fsummarise(w_mean_mpg = fmean(mpg, wt), w_sd_mpg = fsd(mpg, wt)) \donttest{ % The tidyverse regularly causes havoc to CRAN tests in other packages, therefore this is not tested ## Can also group with dplyr::group_by, but at a conversion cost, see ?GRP library(dplyr) mtcars |> group_by(cyl) |> fsummarise(mean_mpg = fmean(mpg), sd_mpg = fsd(mpg)) # Again less efficient... mtcars |> group_by(cyl) |> fsummarise(mean_mpg = mean(mpg), sd_mpg = sd(mpg)) } } \keyword{manip} collapse/man/ffirst_flast.Rd0000644000176200001440000001105114777170130015611 0ustar liggesusers\name{ffirst-flast} \alias{ffirst} \alias{ffirst.default} \alias{ffirst.matrix} \alias{ffirst.data.frame} \alias{ffirst.grouped_df} \alias{flast} \alias{flast.default} \alias{flast.matrix} \alias{flast.data.frame} \alias{flast.grouped_df} \title{Fast (Grouped) First and Last Value for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{ffirst} and \code{flast} are S3 generic functions that (column-wise) returns the first and last values in \code{x}, (optionally) grouped by \code{g}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (groupwise) first and last values. } \usage{ ffirst(x, \dots) flast(x, \dots) \method{ffirst}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, \dots) \method{flast}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, \dots) \method{ffirst}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{flast}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{ffirst}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{flast}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, \dots) \method{ffirst}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, \dots) \method{flast}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. \code{TRUE} skips missing values and returns the first / last non-missing value i.e. if the first (1) / last (n) value is \code{NA}, take the second (2) / second-to-last (n-1) value etc..} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \value{ \code{ffirst} returns the first value in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its first value, grouped by \code{g}. Similarly \code{flast} returns the last value in \code{x}, \dots } \note{ Both functions are significantly faster if \code{na.rm = FALSE}, particularly \code{ffirst} which can take direct advantage of the 'group.starts' elements in \code{\link{GRP}} objects. } \seealso{ \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## default vector method ffirst(airquality$Ozone) # Simple first value ffirst(airquality$Ozone, airquality$Month) # Grouped first value ffirst(airquality$Ozone, airquality$Month, na.rm = FALSE) # Grouped first, but without skipping initial NA's ## data.frame method ffirst(airquality) ffirst(airquality, airquality$Month) ffirst(airquality, airquality$Month, na.rm = FALSE) # Again first Ozone measurement in month 6 is NA ## matrix method aqm <- qM(airquality) ffirst(aqm) ffirst(aqm, airquality$Month) # etc.. \donttest{ % The tidyverse regularly causes havoc to CRAN tests in other packages, therefore this is not tested ## method for grouped data frames - created with dplyr::group_by or fgroup_by library(dplyr) airquality |> group_by(Month) |> ffirst() airquality |> group_by(Month) |> select(Ozone) |> ffirst(na.rm = FALSE) } # Note: All examples generalize to flast. } \keyword{univar} \keyword{manip} collapse/man/varying.Rd0000644000176200001440000001237214777170130014611 0ustar liggesusers\name{varying} \alias{varying} \alias{varying.default} \alias{varying.matrix} \alias{varying.data.frame} \alias{varying.pseries} \alias{varying.pdata.frame} \alias{varying.grouped_df} \alias{varying.sf} \title{Fast Check of Variation in Data} % Vectors, Matrix and Data Frame Columns} \description{ \code{varying} is a generic function that (column-wise) checks for variation in the values of \code{x}, (optionally) within the groups \code{g} (e.g. a panel-identifier). } \usage{ varying(x, ...) \method{varying}{default}(x, g = NULL, any_group = TRUE, use.g.names = TRUE, ...) \method{varying}{matrix}(x, g = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) \method{varying}{data.frame}(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) # Methods for indexed data / compatibility with plm: \method{varying}{pseries}(x, effect = 1L, any_group = TRUE, use.g.names = TRUE, ...) \method{varying}{pdata.frame}(x, effect = 1L, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) # Methods for grouped data frame / compatibility with dplyr: \method{varying}{grouped_df}(x, any_group = TRUE, use.g.names = FALSE, drop = TRUE, keep.group_vars = TRUE, ...) # Methods for grouped data frame / compatibility with sf: \method{varying}{sf}(x, by = NULL, cols = NULL, any_group = TRUE, use.g.names = TRUE, drop = TRUE, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a vector, matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df'). Data must not be numeric.} \item{g}{a factor, \code{GRP} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{GRP} object) used to group \code{x}.} \item{by}{same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1 + group2} or \code{var1 + var2 ~ group1 + group2}. See Examples} \item{any_group}{logical. If \code{!is.null(g)}, \code{FALSE} will check and report variation in all groups, whereas the default \code{TRUE} only checks if there is variation within any group. See Examples.} \item{cols}{select columns using column names, indices or a function (e.g. \code{is.numeric}). Two-sided formulas passed to \code{by} overwrite \code{cols}.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{drop}{\emph{matrix and data.frame methods:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if the result is 1-dimensional.} \item{effect}{\emph{plm} methods: Select the panel identifier by which variation in the data should be examined. 1L takes the first variable in the \link[=indexing]{index}, 2L the second etc.. Index variables can also be called by name. More than one index variable can be supplied, which will be interacted.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{...}{arguments to be passed to or from other methods.} } \details{ Without groups passed to \code{g}, \code{varying} simply checks if there is any variation in the columns of \code{x} and returns \code{TRUE} for each column where this is the case and \code{FALSE} otherwise. A set of data points is defined as varying if it contains at least 2 distinct non-missing values (such that a non-0 standard deviation can be computed on numeric data). \code{varying} checks for variation in both numeric and non-numeric data. If groups are supplied to \code{g} (or alternatively a \emph{grouped_df} to \code{x}), \code{varying} can operate in one of 2 modes: \itemize{ \item If \code{any_group = TRUE} (the default), \code{varying} checks each column for variation in any of the groups defined by \code{g}, and returns \code{TRUE} if such within-variation was detected and \code{FALSE} otherwise. Thus only one logical value is returned for each column and the computation on each column is terminated as soon as any variation within any group was found. \item If \code{any_group = FALSE}, \code{varying} runs through the entire data checking each group for variation and returns, for each column in \code{x}, a logical vector reporting the variation check for all groups. If a group contains only missing values, a \code{NA} is returned for that group. } The \emph{sf} method simply ignores the geometry column. } \value{ A logical vector or (if \code{!is.null(g)} and \code{any_group = FALSE}), a matrix or data frame of logical vectors indicating whether the data vary (over the dimension supplied by \code{g}). } \seealso{ \link[=summary-statistics]{Summary Statistics}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Checks overall variation in all columns varying(wlddev) ## Checks whether data are time-variant i.e. vary within country varying(wlddev, ~ country) ## Same as above but done for each country individually, countries without data are coded NA head(varying(wlddev, ~ country, any_group = FALSE)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % use one of RShowDoc("KEYWORDS") collapse/man/funique.Rd0000644000176200001440000001262414777170130014606 0ustar liggesusers\name{funique} \alias{funique} \alias{funique.default} \alias{funique.data.frame} \alias{funique.sf} \alias{funique.pseries} \alias{funique.pdata.frame} \alias{fnunique} \alias{fduplicated} \alias{any_duplicated} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Unique Elements / Rows } \description{ \code{funique} is an efficient alternative to \code{\link{unique}} (or \code{unique.data.table, kit::funique, dplyr::distinct}). \code{fnunique} is an alternative to \code{NROW(unique(x))} (or \code{data.table::uniqueN, kit::uniqLen, dplyr::n_distinct}). \code{fduplicated} is an alternative to \code{\link{duplicated}} (or \code{duplicated.data.table}, \code{kit::fduplicated}). The \emph{collapse} versions are versatile and highly competitive. % on data frames. \code{any_duplicated(x)} is faster than \code{any(fduplicated(x))}. \emph{Note} that for atomic vectors, \code{\link{anyDuplicated}} is currently more efficient if there are duplicates at the beginning of the vector. } \usage{ funique(x, \dots) \method{funique}{default}(x, sort = FALSE, method = "auto", \dots) \method{funique}{data.frame}(x, cols = NULL, sort = FALSE, method = "auto", \dots) \method{funique}{sf}(x, cols = NULL, sort = FALSE, method = "auto", \dots) # Methods for indexed data / compatibility with plm: \method{funique}{pseries}(x, sort = FALSE, method = "auto", drop.index.levels = "id", \dots) \method{funique}{pdata.frame}(x, cols = NULL, sort = FALSE, method = "auto", drop.index.levels = "id", \dots) fnunique(x) # Fast NROW(unique(x)), for vectors and lists fduplicated(x, all = FALSE) # Fast duplicated(x), for vectors and lists any_duplicated(x) # Simple logical TRUE|FALSE duplicates check } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a atomic vector or data frame / list of equal-length columns. } \item{sort}{logical. \code{TRUE} orders the unique elements / rows. \code{FALSE} returns unique values in order of first occurrence. } \item{method}{an integer or character string specifying the method of computation: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "auto" \tab\tab automatic selection: hash if \code{sort = FALSE} else radix. \cr 2 \tab\tab "radix" \tab\tab use radix ordering to determine unique values. Supports \code{sort = FALSE} but only for character data. \cr 3 \tab\tab "hash" \tab\tab use index hashing to determine unique values. Supports \code{sort = TRUE} but only for atomic vectors (default method). \cr } } \item{cols}{compute unique rows according to a subset of columns. Columns can be selected using column names, indices, a logical vector or a selector function (e.g. \code{is.character}). \emph{Note:} All columns are returned. } \item{\dots}{arguments passed to \code{\link{radixorder}}, e.g. \code{decreasing} or \code{na.last}. Only applicable if \code{method = "radix"}.} \item{drop.index.levels}{character. Either \code{"id"}, \code{"time"}, \code{"all"} or \code{"none"}. See \link{indexing}.} \item{all}{logical. \code{TRUE} returns all duplicated values, including the first occurrence.} } \details{ If all values/rows are already unique, then \code{x} is returned. Otherwise a copy of \code{x} with duplicate rows removed is returned. See \code{\link{group}} for some additional computational details. The \emph{sf} method simply ignores the geometry column when determining unique values. Methods for indexed data also subset the index accordingly. \code{any_duplicated} is currently simply implemented as \code{fnunique(x) < NROW(x)}, which means it does not have facilities to terminate early, and users are advised to use \code{\link{anyDuplicated}} with atomic vectors if chances are high that there are duplicates at the beginning of the vector. With no duplicate values or data frames, \code{any_duplicated} is considerably faster than \code{\link{anyDuplicated}}. } \note{ These functions treat lists like data frames, unlike \code{\link{unique}} which has a list method to determine uniqueness of (non-atomic/heterogeneous) elements in a list. No matrix method is provided. Please use the alternatives provided in package \emph{kit} with matrices. % The \emph{kit} version is also often faster for vectors. } \value{ \code{funique} returns \code{x} with duplicate elements/rows removed, \code{fnunique} returns an integer giving the number of unique values/rows, \code{fduplicated} gives a logical vector with \code{TRUE} indicating duplicated elements/rows. % sorted in ascending order if \code{sort = TRUE}, and in order of first occurrence if \code{sort = FALSE}. } \seealso{ \code{\link{fndistinct}}, \code{\link{group}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}. } \examples{ funique(mtcars$cyl) funique(gv(mtcars, c(2,8,9))) funique(mtcars, cols = c(2,8,9)) fnunique(gv(mtcars, c(2,8,9))) fduplicated(gv(mtcars, c(2,8,9))) fduplicated(gv(mtcars, c(2,8,9)), all = TRUE) any_duplicated(gv(mtcars, c(2,8,9))) any_duplicated(mtcars) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/select_replace_vars.Rd0000644000176200001440000002420115000030221017101 0ustar liggesusers\name{fselect-get_vars-add_vars} % \name{select-replace-vars} % \alias{select-replace-vars} \alias{fselect} \alias{fselect<-} \alias{slt} \alias{slt<-} \alias{get_vars} \alias{gv} \alias{gvr} \alias{num_vars} \alias{nv} \alias{cat_vars} \alias{char_vars} \alias{fact_vars} \alias{logi_vars} \alias{date_vars} \alias{add_vars} \alias{av} \alias{get_vars<-} \alias{gv<-} \alias{gvr<-} \alias{num_vars<-} \alias{nv<-} \alias{cat_vars<-} \alias{char_vars<-} \alias{fact_vars<-} \alias{logi_vars<-} \alias{date_vars<-} \alias{add_vars<-} \alias{av<-} \title{Fast Select, Replace or Add Data Frame Columns} \description{ Efficiently select and replace (or add) a subset of columns from (to) a data frame. This can be done by data type, or using expressions, column names, indices, logical vectors, selector functions or regular expressions matching column names. } \usage{ ## Select and replace variables, analgous to dplyr::select but significantly faster fselect(.x, \dots, return = "data") fselect(x, \dots) <- value slt(.x, \dots, return = "data") # Shorthand for fselect slt(x, \dots) <- value # Shorthand for fselect<- ## Select and replace columns by names, indices, logical vectors, ## regular expressions or using functions to identify columns get_vars(x, vars, return = "data", regex = FALSE, rename = FALSE, \dots) gv(x, vars, return = "data", \dots) # Shorthand for get_vars gvr(x, vars, return = "data", \dots) # Shorthand for get_vars(..., regex = TRUE) get_vars(x, vars, regex = FALSE, \dots) <- value gv(x, vars, \dots) <- value # Shorthand for get_vars<- gvr(x, vars, \dots) <- value # Shorthand for get_vars<-(..., regex = TRUE) ## Add columns at any position within a data.frame add_vars(x, \dots, pos = "end") add_vars(x, pos = "end") <- value av(x, \dots, pos = "end") # Shorthand for add_vars av(x, pos = "end") <- value # Shorthand for add_vars<- ## Select and replace columns by data type num_vars(x, return = "data") num_vars(x) <- value nv(x, return = "data") # Shorthand for num_vars nv(x) <- value # Shorthand for num_vars<- cat_vars(x, return = "data") # Categorical variables, see is_categorical cat_vars(x) <- value char_vars(x, return = "data") char_vars(x) <- value fact_vars(x, return = "data") fact_vars(x) <- value logi_vars(x, return = "data") logi_vars(x) <- value date_vars(x, return = "data") # See is_date date_vars(x) <- value } \arguments{ \item{x, .x}{a data frame or list.} \item{value}{a data frame or list of columns whose dimensions exactly match those of the extracted subset of \code{x}. If only 1 variable is in the subset of \code{x}, \code{value} can also be an atomic vector or matrix, provided that \code{NROW(value) == nrow(x)}.} \item{vars}{a vector of column names, indices (can be negative), a suitable logical vector, or a vector of regular expressions matching column names (if \code{regex = TRUE}). It is also possible to pass a function returning \code{TRUE} or \code{FALSE} when applied to the columns of \code{x}.} \item{return}{an integer or string specifying what the selector function should return. The options are: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "data" \tab\tab subset of data frame (default) \cr 2 \tab\tab "names" \tab\tab column names \cr 3 \tab\tab "indices" \tab\tab column indices \cr 4 \tab\tab "named_indices" \tab\tab named column indices \cr 5 \tab\tab "logical" \tab\tab logical selection vector \cr 6 \tab\tab "named_logical" \tab\tab named logical vector \cr } \emph{Note}: replacement functions only replace data, however column names are replaced together with the data (if available). } \item{regex}{logical. \code{TRUE} will do regular expression search on the column names of \code{x} using a (vector of) regular expression(s) passed to \code{vars}. Matching is done using \code{\link{grep}}.} \item{rename}{logical. If \code{vars} is a named vector of column names or indices, \code{rename = TRUE} will use the (non missing) names to rename columns.} \item{pos}{the position where columns are added in the data frame. \code{"end"} (default) will append the data frame at the end (right) side. "front" will add columns in front (left). Alternatively one can pass a vector of positions (matching \code{length(value)} if value is a list). In that case the other columns will be shifted around the new ones while maintaining their order. } \item{\dots}{for \code{fselect}: column names and expressions e.g. \code{fselect(mtcars, newname = mpg, hp, carb:vs)}. for \code{get_vars}: further arguments passed to \code{\link{grep}}, if \code{regex = TRUE}. For \code{add_vars}: multiple lists/data frames or vectors (which should be given names e.g. \code{name = vector}). A single argument passed may also be an (unnamed) vector or matrix.} } \details{ \code{get_vars(<-)} is around 2x faster than \code{`[.data.frame`} and 8x faster than \code{`[<-.data.frame`}, so the common operation \code{data[cols] <- someFUN(data[cols])} can be made 10x more efficient (abstracting from computations performed by \code{someFUN}) using \code{get_vars(data, cols) <- someFUN(get_vars(data, cols))} or the shorthand \code{gv(data, cols) <- someFUN(gv(data, cols))}. Similarly type-wise operations like \code{data[sapply(data, is.numeric)]} or \code{data[sapply(data, is.numeric)] <- value} are facilitated and more efficient using \code{num_vars(data)} and \code{num_vars(data) <- value} or the shortcuts \code{nv} and \code{nv<-} etc. \code{fselect} provides an efficient alternative to \code{dplyr::select}, allowing the selection of variables based on expressions evaluated within the data frame, see Examples. It is about 100x faster than \code{dplyr::select} but also more simple as it does not provide special methods (except for 'sf' and 'data.table' which are handled internally) . Finally, \code{add_vars(data1, data2, data3, \dots)} is a lot faster than \code{cbind(data1, data2, data3, \dots)}, and preserves the attributes of \code{data1} (i.e. it is like adding columns to \code{data1}). The replacement function \code{add_vars(data) <- someFUN(get_vars(data, cols))} efficiently appends \code{data} with computed columns. The \code{pos} argument allows adding columns at positions other than the end (right) of the data frame, see Examples. \emph{Note} that \code{add_vars} does not check duplicated column names or \code{NULL} columns, and does not evaluate expressions in a data environment, or replicate length 1 inputs like \code{\link{cbind}}. All of this is provided by \code{\link{ftransform}}. All functions introduced here perform their operations class-independent. They all basically work like this: (1) save the attributes of \code{x}, (2) unclass \code{x}, (3) subset, replace or append \code{x} as a list, (4) modify the "names" component of the attributes of \code{x} accordingly and (5) efficiently attach the attributes again to the result from step (3). Thus they can freely be applied to data.table's, grouped tibbles, panel data frames and other classes and will return an object of exactly the same class and the same attributes. % secure w.r.t. redefinitions of \code{`[.data.frame`} or \code{`[<-.data.frame`} for other classes (i.e. data.table's, tibbles etc.) and preserve all attributes of the data } \note{ In many cases functions here only check the length of the first column, which is one of the reasons why they are so fast. When lists of unequal-length columns are offered as replacements this yields a malformed data frame (which will also print a warning in the console i.e. you will notice that). } \seealso{ \code{\link{fsubset}}, \code{\link{ftransform}}, \code{\link{rowbind}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Wold Development Data head(fselect(wlddev, Country = country, Year = year, ODA)) # Fast dplyr-like selecting head(fselect(wlddev, -country, -year, -PCGDP)) head(fselect(wlddev, country, year, PCGDP:ODA)) head(fselect(wlddev, -(PCGDP:ODA))) fselect(wlddev, country, year, PCGDP:ODA) <- NULL # Efficient deleting head(wlddev) rm(wlddev) head(num_vars(wlddev)) # Select numeric variables head(cat_vars(wlddev)) # Select categorical (non-numeric) vars head(get_vars(wlddev, is_categorical)) # Same thing num_vars(wlddev) <- num_vars(wlddev) # Replace Numeric Variables by themselves get_vars(wlddev,is.numeric) <- get_vars(wlddev,is.numeric) # Same thing head(get_vars(wlddev, 9:12)) # Select columns 9 through 12, 2x faster head(get_vars(wlddev, -(9:12))) # All except columns 9 through 12 head(get_vars(wlddev, c("PCGDP","LIFEEX","GINI","ODA"))) # Select using column names head(get_vars(wlddev, "[[:upper:]]", regex = TRUE)) # Same thing: match upper-case var. names head(gvr(wlddev, "[[:upper:]]")) # Same thing get_vars(wlddev, 9:12) <- get_vars(wlddev, 9:12) # 9x faster wlddev[9:12] <- wlddev[9:12] add_vars(wlddev) <- STD(gv(wlddev,9:12), wlddev$iso3c) # Add Standardized columns 9 through 12 head(wlddev) # gv and av are shortcuts get_vars(wlddev, 14:17) <- NULL # Efficient Deleting added columns again av(wlddev, "front") <- STD(gv(wlddev,9:12), wlddev$iso3c) # Again adding in Front head(wlddev) get_vars(wlddev, 1:4) <- NULL # Deleting av(wlddev,c(10,12,14,16)) <- W(wlddev,~iso3c, cols = 9:12, # Adding next to original variables keep.by = FALSE) head(wlddev) get_vars(wlddev, c(10,12,14,16)) <- NULL # Deleting head(add_vars(wlddev, new = STD(wlddev$PCGDP))) # Can also add columns like this head(add_vars(wlddev, STD(nv(wlddev)), new = W(wlddev$PCGDP))) # etc... head(add_vars(mtcars, mtcars, mpg = mtcars$mpg, mtcars), 2) # add_vars does not check names! } \keyword{manip} collapse/man/colorder.Rd0000644000176200001440000000616414777170130014745 0ustar liggesusers\name{colorder} \alias{colorder} \alias{colorderv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Reordering of Data Frame Columns } \description{ Efficiently reorder columns in a data frame. To do this fully by reference see also \code{data.table::setcolorder}. } \usage{ colorder(.X, \dots, pos = "front") colorderv(X, neworder = radixorder(names(X)), pos = "front", regex = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{.X, X}{a data frame or list.} \item{\dots}{for \code{colorder}: Column names of \code{.X} in the new order (can also use sequences i.e. \code{col1:coln, newname = colk, \dots}). For \code{colorderv}: Further arguments to \code{\link{grep}} if \code{regex = TRUE}.} \item{neworder}{a vector of column names, positive indices, a suitable logical vector, a function such as \code{is.numeric}, or a vector of regular expressions matching column names (if \code{regex = TRUE}). } \item{pos}{integer or character. Different options regarding column arrangement if \code{...length() < ncol(.X)} (or \code{length(neworder) < ncol(X)}). \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "front" \tab\tab move specified columns to the front (the default). \cr 2 \tab\tab "end" \tab\tab move specified columns to the end. \cr 3 \tab\tab "exchange" \tab\tab just exchange the positions of selected columns, other columns remain in the same position. \cr 4 \tab\tab "after" \tab\tab place all further selected columns behind the first selected column. \cr } } \item{regex}{logical. \code{TRUE} will do regular expression search on the column names of \code{X} using a (vector of) regular expression(s) passed to \code{neworder}. Matching is done using \code{\link{grep}}. \emph{Note} that multiple regular expressions will be matched in the order they are passed, and \code{\link{funique}} will be applied to the resulting set of indices. } } \value{ \code{.X/X} with columns reordered (no deep copies). } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{roworder}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ head(colorder(mtcars, vs, cyl:hp, am)) head(colorder(mtcars, vs, cyl:hp, am, pos = "end")) head(colorder(mtcars, vs, cyl:hp, am, pos = "after")) head(colorder(mtcars, vs, cyl, pos = "exchange")) head(colorder(mtcars, vs, cyl:hp, new = am)) # renaming ## Same in standard evaluation head(colorderv(mtcars, c(8, 2:4, 9))) head(colorderv(mtcars, c(8, 2:4, 9), pos = "end")) head(colorderv(mtcars, c(8, 2:4, 9), pos = "after")) head(colorderv(mtcars, c(8, 2), pos = "exchange")) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/radixorder.Rd0000644000176200001440000001020414777170130015265 0ustar liggesusers\name{radixorder} \alias{radixorder} \alias{radixorderv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Radix-Based Ordering } \description{ A slight modification of \code{\link[=order]{order(..., method = "radix")}} that is more programmer friendly and, importantly, provides features for ordered grouping of data (similar to \code{data.table:::forderv} from which it descended). % \code{radixorderv} is a programmers version directly supporting vector and list input. % Apart from added grouping features, the source code and standard functionality is identical to \code{\link{order(\dots, method = "radix")}. } \usage{ radixorder(\dots, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) radixorderv(x, na.last = TRUE, decreasing = FALSE, starts = FALSE, group.sizes = FALSE, sort = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{comma-separated atomic vectors to order. } \item{x}{ an atomic vector or list of atomic vectors such as a data frame. } \item{na.last}{logical. for controlling the treatment of \code{NA}'s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first; if NA, they are removed. } \item{decreasing}{ logical. Should the sort order be increasing or decreasing? Can be a vector of length equal to the number of arguments in \code{\dots} / \code{x}. } \item{starts}{logical. \code{TRUE} returns an attribute 'starts' containing the first element of each new group i.e. the row denoting the start of each new group if the data were sorted using the computed ordering vector. See Examples. %% ~~Describe \code{starts} here~~ } \item{group.sizes}{logical. \code{TRUE} returns an attribute 'group.sizes' containing sizes of each group in the same order as groups are encountered if the data were sorted using the computed ordering vector. See Examples. } \item{sort}{logical. This argument only affects character vectors / columns passed. If \code{FALSE}, these are not ordered but simply grouped in the order of first appearance of unique elements. This provides a slight performance gain if only grouping but not alphabetic ordering is required. See also \code{\link{group}}. %% ~~Describe \code{sort} here~~ } } % \details{ % \code{radixorder} works just like \code{\link[=order]{order(\dots, method = "radix")}}, the source code is the same. However if \code{starts = TRUE}, and attribute % } %} \value{ An integer ordering vector with attributes: Unless \code{na.last = NA} an attribute \code{"sorted"} indicating whether the input data was already sorted is attached. If \code{starts = TRUE}, \code{"starts"} giving a vector of group starts in the ordered data, and if \code{group.sizes = TRUE}, \code{"group.sizes"} giving the vector of group sizes are attached. In either case an attribute \code{"maxgrpn"} providing the size of the largest group is also attached. } \author{ The C code was taken - with slight modifications - from \href{https://github.com/wch/r-source/blob/79298c499218846d14500255efd622b5021c10ec/src/main/radixsort.c}{base R source code}, and is originally due to \emph{data.table} authors Matt Dowle and Arun Srinivasan. } \seealso{ \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ radixorder(mtcars$mpg) head(mtcars[radixorder(mtcars$mpg), ]) radixorder(mtcars$cyl, mtcars$vs) o <- radixorder(mtcars$cyl, mtcars$vs, starts = TRUE) st <- attr(o, "starts") head(mtcars[o, ]) mtcars[o[st], c("cyl", "vs")] # Unique groups # Note that if attr(o, "sorted") == TRUE, then all(o[st] == st) radixorder(rep(1:3, each = 3), starts = TRUE) # Group sizes radixorder(mtcars$cyl, mtcars$vs, group.sizes = TRUE) # Both radixorder(mtcars$cyl, mtcars$vs, starts = TRUE, group.sizes = TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/timeid.Rd0000644000176200001440000000764614777170130014415 0ustar liggesusers\name{timeid} \alias{timeid} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Generate Integer-Id From Time/Date Sequences } \description{ \code{timeid} groups time vectors in a way that preserves the temporal structure. It generate an integer id where unit steps represent the greatest common divisor in the original sequence e.g \code{c(4, 6, 10) -> c(1, 2, 4)} or \code{c(0.25, 0.75, 1) -> c(1, 3, 4)}. } \usage{ timeid(x, factor = FALSE, ordered = factor, extra = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric time object such as a \code{Date}, \code{POSIXct} or other integer or double vector representing time.} \item{factor}{logical. \code{TRUE} returns an (ordered) factor with levels corresponding to the full sequence (without irregular gaps) of time. This is useful for inclusion in the \link[=findex]{index} but might be computationally expensive for long sequences, see Details. \code{FALSE} returns a simpler object of class '\code{\link{qG}}'. } \item{ordered}{logical. \code{TRUE} adds a class 'ordered'. } \item{extra}{logical. \code{TRUE} attaches a set of 4 diagnostic items as attributes to the result: \itemize{ \item \code{"unique_ints"}: \code{unique(unattrib(timeid(x)))} - the unique integer time steps in first-appearance order. This can be useful to check the size of gaps in the sequence. %The \code{\link{seqid}} function can help in the exploration of this attribute, e.g. \code{seqid(attr(timeid(x, extra = TRUE), "unique"))} shows the number and position of the dicontinuities. \item \code{"sort_unique_x"}: \code{sort(unique(x))}. \item \code{"range_x"}: \code{range(x)}. \item \code{"step_x"}: \code{vgcd(sort(unique(diff(sort(unique(x))))))} - the greatest common divisor. } \emph{Note} that returning these attributes does not incur additional computations. } } \details{ Let \code{range_x} and \code{step_x} be the like-named attributes returned when \code{extra = TRUE}, then, if \code{factor = TRUE}, a complete sequence of levels is generated as \code{seq(range_x[1], range_x[2], by = step_x) |> copyMostAttrib(x) |> as.character()}. If \code{factor = FALSE}, the number of timesteps recorded in the \code{"N.groups"} attribute is computed as \code{(range_x[2]-range_x[1])/step_x + 1}, which is equal to the number of factor levels. In both cases the underlying integer id is the same and preserves gaps in time. Large gaps (strong irregularity) can result in many unused factor levels, the generation of which can become expensive. Using \code{factor = FALSE} (the default) is thus more efficient. } \value{ A factor or '\code{\link{qG}}' object, optionally with additional attributes attached. } \seealso{ \code{\link{seqid}}, \link[=indexing]{Indexing}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ oldopts <- options(max.print = 30) # A normal use case timeid(wlddev$decade) timeid(wlddev$decade, factor = TRUE) timeid(wlddev$decade, extra = TRUE) # Here a large number of levels is generated, which is expensive timeid(wlddev$date, factor = TRUE) tid <- timeid(wlddev$date, extra = TRUE) # Much faster str(tid) # The reason for step = 1 are leap years with 366 days every 4 years diff(attr(tid, "unique")) # So in this case simple factor generation gives a better result qF(wlddev$date, ordered = TRUE, na.exclude = FALSE) # The best way to deal with this data would be to convert it # to zoo::yearmon and then use timeid: timeid(zoo::as.yearmon(wlddev$date), factor = TRUE, extra = TRUE) options(oldopts) rm(oldopts, tid) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ts} \keyword{manip} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/join.Rd0000644000176200001440000002001614777170130014063 0ustar liggesusers\name{join} \alias{join} \title{Fast and Verbose Table Joins} \description{ Join two data frame like objects \code{x} and \code{y} \code{on} columns. Inspired by \emph{polars} and by default uses a vectorized hash join algorithm (workhorse function \code{\link{fmatch}}), with several verbose options. } \usage{ join(x, y, on = NULL, how = "left", suffix = NULL, validate = "m:m", multiple = FALSE, sort = FALSE, keep.col.order = TRUE, drop.dup.cols = FALSE, verbose = .op[["verbose"]], require = NULL, column = NULL, attr = NULL, \dots ) } \arguments{ \item{x}{a data frame-like object. The result will inherit the attributes of this object. } \item{y}{a data frame-like object to join with \code{x}.} \item{on}{character. vector of columns to join on. \code{NULL} uses \code{intersect(names(x), names(y))}. Use a named vector to match columns named differently in \code{x} and \code{y}, e.g. \code{c("x_id" = "y_id")}.} \item{how}{character. Join type: \code{"left"}, \code{"right"}, \code{"inner"}, \code{"full"}, \code{"semi"} or \code{"anti"}. The first letter suffices. } \item{suffix}{character(1 or 2). Suffix to add to duplicate column names. \code{NULL} renames duplicate \code{y} columns as \code{paste(col, y_name, sep = "_")}, where \code{y_name = as.character(substitute(y))} i.e. the name of the data frame as passed into the function. In general, passing \code{suffix} length 1 will only rename \code{y}, whereas a length 2 suffix will rename both \code{x} and \code{y}, respectively. If \code{verbose > 0} a message will be printed. } \item{validate}{character. (Optional) check if join is of specified type. One of \code{"1:1"}, \code{"1:m"}, \code{"m:1"} or \code{"m:m"}. The default \code{"m:m"} does not perform any checks. Checks are done before the actual join step and failure results in an error. \emph{Note} that this argument does not affect the result, it only triggers a check.} \item{multiple}{logical. Handling of rows in \code{x} with multiple matches in \code{y}. The default \code{FALSE} takes the first match in \code{y}. \code{TRUE} returns every match in \code{y} (a full cartesian product), increasing the size of the joined table. } \item{sort}{logical. \code{TRUE} implements a sort-merge-join: a completely separate join algorithm that sorts both datasets on the join columns using \code{\link{radixorder}} and then matches the rows without hashing. \emph{Note} that in this case the result will be sorted by the join columns, whereas \code{sort = FALSE} preserves the order of rows in \code{x}.} \item{keep.col.order}{logical. Keep order of columns in \code{x}? \code{FALSE} places the \code{on} columns in front.} \item{drop.dup.cols}{instead of renaming duplicate columns in \code{x} and \code{y} using \code{suffix}, this option simply drops them: \code{TRUE} or \code{"y"} drops them from \code{y}, \code{"x"} from \code{x}.} \item{verbose}{integer. Prints information about the join. One of 0 (off), 1 (default, see Details) or 2 (additionally prints the classes of the \code{on} columns). \emph{Note:} \code{verbose > 0} or \code{validate != "m:m"} invoke the \code{count} argument to \code{\link{fmatch}}, so \code{verbose = 0} is slightly more efficient. } \item{require}{(optional) named list of the form \code{list(x = 1, y = 0.5, fail = "warning")} (or \code{fail.with} if you want to be more expressive) giving proportions of records that need to be matched and the action if any requirement fails (\code{"message"}, \code{"warning"}, or \code{"error"}). Any elements of the list can be omitted, the default action is \code{"error"}.} \item{column}{(optional) name for an extra column to generate in the output indicating which dataset a record came from. \code{TRUE} calls this column \code{".join"} (inspired by STATA's '_merge' column). By default this column is generated as the last column, but, if \code{keep.col.order = FALSE}, it is placed after the 'on' columns. The column is a factor variable with levels corresponding to the dataset names (inferred from the input) or \code{"matched"} for matched records. Alternatively, it is possible to specify a list of 2, where the first element is the column name, and the second a length 3 (!) vector of levels e.g. \code{column = list("joined", c("x", "y", "x_y"))}, where \code{"x_y"} replaces \code{"matched"}. The column has an additional attribute \code{"on.cols"} giving the join columns corresponding to the factor levels. See Examples. } \item{attr}{(optional) name for attribute providing information about the join performed (including the output of \code{\link{fmatch}}) to the result. \code{TRUE} calls this attribute \code{"join.match"}. \emph{Note:} this also invokes the \code{count} argument to \code{\link{fmatch}}.} \item{\dots}{further arguments to \code{\link{fmatch}} (if \code{sort = FALSE}). Notably, \code{overid} can bet set to 0 or 2 (default 1) to control the matching process if the join condition more than identifies the records.} } \details{ If \code{verbose > 0}, \code{join} prints a compact summary of the join operation using \code{\link{cat}}. If the names of \code{x} and \code{y} can be extracted (if \code{as.character(substitute(x))} yields a single string) they will be displayed (otherwise 'x' and 'y' are used) followed by the respective join keys in brackets. This is followed by a summary of the records used from each table. If \code{multiple = FALSE}, only the first matches from \code{y} are used and counted here (or the first matches of \code{x} if \code{how = "right"}). \emph{Note} that if \code{how = "full"} any further matches are simply appended to the results table, thus it may make more sense to use \code{multiple = TRUE} with the full join when suspecting multiple matches. If \code{multiple = TRUE}, \code{join} performs a full cartesian product matching every key in \code{x} to every matching key in \code{y}. This can considerably increase the size of the resulting table. No memory checks are performed (your system will simply run out of memory; usually this should not terminate R). In both cases, \code{join} will also determine the average order of the join as the number of records used from each table divided by the number of unique matches and display it between the two tables at up to 2 digits. For example \code{"<4:1.5>"} means that on average 4 records from \code{x} match 1.5 records from \code{y}, implying on average \code{4*1.5 = 6} records generated per unique match. If \code{multiple = FALSE} \code{"1st"} will be displayed for the using table (\code{y} unless \code{how = "right"}), indicating that there could be multiple matches but only the first is retained. \emph{Note} that an order of '1' on either table must not imply that the key is unique as this value is generated from \code{round(v, 2)}. To be sure about a keys uniqueness employ the \code{validate} argument. } \value{ A data frame-like object of the same type and attributes as \code{x}. \code{"row.names"} of \code{x} are only preserved in left-join operations. } \examples{ df1 <- data.frame( id1 = c(1, 1, 2, 3), id2 = c("a", "b", "b", "c"), name = c("John", "Jane", "Bob", "Carl"), age = c(35, 28, 42, 50) ) df2 <- data.frame( id1 = c(1, 2, 3, 3), id2 = c("a", "b", "c", "e"), salary = c(60000, 55000, 70000, 80000), dept = c("IT", "Marketing", "Sales", "IT") ) # Different types of joins for(i in c("l","i","r","f","s","a")) join(df1, df2, how = i) |> print() # With multiple matches for(i in c("l","i","r","f","s","a")) join(df1, df2, on = "id2", how = i, multiple = TRUE) |> print() # Adding join column: useful esp. for full join join(df1, df2, how = "f", column = TRUE) # Custom column + rearranging join(df1, df2, how = "f", column = list("join", c("x", "y", "x_y")), keep = FALSE) # Attaching match attribute str(join(df1, df2, attr = TRUE)) } \seealso{ \code{\link{fmatch}}, \code{\link{pivot}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \keyword{manip} collapse/man/wlddev.Rd0000644000176200001440000000603114777170130014412 0ustar liggesusers\name{wlddev} \alias{wlddev} \docType{data} \title{ World Development Dataset } \description{ This dataset contains 5 indicators from the World Bank's World Development Indicators (WDI) database: (1) GDP per capita, (2) Life expectancy at birth, (3) GINI index, (4) Net ODA and official aid received and (5) Population. The panel data is balanced and covers 216 present and historic countries from 1960-2020 (World Bank aggregates and regional entities are excluded). Apart from the indicators the data contains a number of identifiers (character country name, factor ISO3 country code, World Bank region and income level, numeric year and decade) and 2 generated variables: A logical variable indicating whether the country is an OECD member, and a fictitious variable stating the date the data was recorded. These variables were added so that all common data-types are represented in this dataset, making it an ideal test-dataset for certain \emph{collapse} functions. } \usage{data("wlddev")} \format{ A data frame with 13176 observations on the following 13 variables. All variables are labeled e.g. have a 'label' attribute. \describe{ \item{\code{country}}{\emph{chr} Country Name} \item{\code{iso3c}}{\emph{fct} Country Code} \item{\code{date}}{\emph{date} Date Recorded (Fictitious)} \item{\code{year}}{\emph{int} Year} \item{\code{decade}}{\emph{int} Decade} \item{\code{region}}{\emph{fct} World Bank Region} \item{\code{income}}{\emph{fct} World Bank Income Level} \item{\code{OECD}}{\emph{log} Is OECD Member Country?} \item{\code{PCGDP}}{\emph{num} GDP per capita (constant 2010 US$)} \item{\code{LIFEEX}}{\emph{num} Life expectancy at birth, total (years)} \item{\code{GINI}}{\emph{num} GINI index (World Bank estimate)} \item{\code{ODA}}{\emph{num} Net official development assistance and official aid received (constant 2018 US$)} \item{\code{POP}}{\emph{num} Population, total} } } % \details{ %% ~~ If necessary, more details than the __description__ above ~~ % } \source{ \url{https://data.worldbank.org/}, accessed via the \code{WDI} package. The codes for the series are \code{c("NY.GDP.PCAP.KD", "SP.DYN.LE00.IN", "SI.POV.GINI", "DT.ODA.ALLD.KD", "SP.POP.TOTL")}. } % \references{ %% ~~ possibly secondary sources and usages ~~ % } \seealso{ \code{\link{GGDC10S}}, \link[=collapse-documentation]{Collapse Overview} } \examples{ data(wlddev) # Panel-summarizing the 5 series qsu(wlddev, pid = ~iso3c, cols = 9:13, vlabels = TRUE) # By Region qsu(wlddev, by = ~region, cols = 9:13, vlabels = TRUE) # Panel-summary by region qsu(wlddev, by = ~region, pid = ~iso3c, cols = 9:13, vlabels = TRUE) # Pairwise correlations: Ovarall print(pwcor(get_vars(wlddev, 9:13), N = TRUE, P = TRUE), show = "lower.tri") # Pairwise correlations: Between Countries print(pwcor(fmean(get_vars(wlddev, 9:13), wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri") # Pairwise correlations: Within Countries print(pwcor(fwithin(get_vars(wlddev, 9:13), wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri") } \keyword{datasets} collapse/man/TRA.Rd0000644000176200001440000002272614777170130013564 0ustar liggesusers\name{TRA} \alias{TRA} \alias{setTRA} \alias{TRA.default} \alias{TRA.matrix} \alias{TRA.data.frame} \alias{TRA.grouped_df} \title{ Transform Data by (Grouped) Replacing or Sweeping out Statistics } \description{ \code{TRA} is an S3 generic that efficiently transforms data by either (column-wise) replacing data values with supplied statistics or sweeping the statistics out of the data. \code{TRA} supports grouped operations and data transformation by reference, and is thus a generalization of \code{\link{sweep}}. } \usage{ TRA(x, STATS, FUN = "-", ...) setTRA(x, STATS, FUN = "-", ...) # Shorthand for invisible(TRA(..., set = TRUE)) \method{TRA}{default}(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) \method{TRA}{matrix}(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) \method{TRA}{data.frame}(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) \method{TRA}{grouped_df}(x, STATS, FUN = "-", keep.group_vars = TRUE, set = FALSE, ...) } \arguments{ \item{x}{a atomic vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{STATS}{a matching set of summary statistics. See Details and Examples.} \item{FUN}{an integer or character string indicating the operation to perform. There are 11 supported operations: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 0 \tab\tab "na" or "replace_na" \tab\tab replace missing values in \code{x} \cr 1 \tab\tab "fill" or "replace_fill" \tab\tab replace data and missing values in \code{x} \cr 2 \tab\tab "replace" \tab\tab replace data but preserve missing values in \code{x} \cr 3 \tab\tab "-" \tab\tab subtract (center on \code{STATS}) \cr 4 \tab\tab "-+" \tab\tab subtract group-statistics but add group-frequency weighted average of group statistics (i.e. center on overall average statistic) \cr 5 \tab\tab "/" \tab\tab divide (i.e. scale. For mean-preserving scaling see also \code{\link{fscale}}) \cr 6 \tab\tab "\%" \tab\tab compute percentages (divide and multiply by 100) \cr 7 \tab\tab "+" \tab\tab add \cr 8 \tab\tab "*" \tab\tab multiply \cr 9 \tab\tab "\%\%" \tab\tab modulus (remainder from division by \code{STATS}) \cr 10 \tab\tab "-\%\%" \tab\tab subtract modulus (make data divisible by \code{STATS}) } } \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}. Number of groups must match rows of \code{STATS}. See Details.} \item{set}{logical. \code{TRUE} transforms data by reference i.e. performs in-place modification of the data without creating a copy.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation. See Details and Examples.} \item{...}{arguments to be passed to or from other methods.} } \details{ Without groups (\code{g = NULL}), \code{TRA} is little more than a column based version of \code{\link{sweep}}, albeit many times more efficient. In this case all methods support an atomic vector of statistics of length \code{NCOL(x)} passed to \code{STATS}. The matrix and data frame methods also support a 1-row matrix or 1-row data frame / list, respectively. \code{TRA} always preserves all attributes of \code{x}. With groups passed to \code{g}, \code{STATS} needs to be of the same type as \code{x} and of appropriate dimensions [such that \code{NCOL(x) == NCOL(STATS)} and \code{NROW(STATS)} equals the number of groups (i.e. the number of levels if \code{g} is a factor)]. If this condition is satisfied, \code{TRA} will assume that the first row of \code{STATS} is the set of statistics computed on the first group/level of \code{g}, the second row on the second group/level etc. and do groupwise replacing or sweeping out accordingly. For example Let \code{x = c(1.2, 4.6, 2.5, 9.1, 8.7, 3.3)}, g is an integer vector in 3 groups \code{g = c(1,3,3,2,1,2)} and \code{STATS = fmean(x,g) = c(4.95, 6.20, 3.55)}. Then \code{out = TRA(x,STATS,"-",g) = c(-3.75, 1.05, -1.05, 2.90, 3.75, -2.90)} [same as \code{fmean(x, g, TRA = "-")}] does the equivalent of the following for-loop: \code{for(i in 1:6) out[i] = x[i] - STATS[g[i]]}. Correct computation requires that \code{g} as used in \code{fmean} and \code{g} passed to \code{TRA} are exactly the same vector. Using \code{g = c(1,3,3,2,1,2)} for \code{fmean} and \code{g = c(3,1,1,2,3,2)} for \code{TRA} will not give the right result. The safest way of programming with \code{TRA} is thus to repeatedly employ the same factor or \code{\link{GRP}} object for all grouped computations. Atomic vectors passed to \code{g} will be converted to factors (see \code{\link{qF}}) and lists will be converted to \code{\link{GRP}} objects. This is also done by all \link[=fast-statistical-functions]{Fast Statistical Functions} and \code{\link{BY}}, thus together with these functions, \code{TRA} can also safely be used with atomic- or list-groups (as long as all functions apply sorted grouping, which is the default in \emph{collapse}). %Problems may arise if functions from other packages internally group atomic vectors or lists in a non-sorted way. [\emph{Note}: \code{as.factor} conversions are ok as this also involves sorting.] %In contrast to the other methods, \code{TRA.grouped_df} matches column names exactly, thus \code{STATS} can be any subset of aggregated columns in \code{x} in any order, with or without grouping columns. \code{TRA.grouped_df} will transform the columns in \code{x} with their aggregated versions matched from \code{STATS} (ignoring grouping columns found in \code{x} or \code{STATS} and columns in \code{x} not found in \code{STATS}), and return \code{x} again. If \code{x} is a grouped data frame ('grouped_df'), \code{TRA} matches the columns of \code{x} and \code{STATS} and also checks for grouping columns in \code{x} and \code{STATS}. \code{TRA.grouped_df} will then only transform those columns in \code{x} for which matching counterparts were found in \code{STATS} (exempting grouping columns) and return \code{x} again (with columns in the same order). If \code{keep.group_vars = FALSE}, the grouping columns are dropped after computation, however the "groups" attribute is not dropped (it can be removed using \code{\link[=fungroup]{fungroup()}} or \code{dplyr::ungroup()}). } \value{ \code{x} with columns replaced or swept out using \code{STATS}, (optionally) grouped by \code{g}. } \note{ In most cases there is no need to call the \code{TRA()} function, because of the TRA-argument to all \link[=fast-statistical-functions]{Fast Statistical Functions} (ensuring that the exact same grouping vector is used for computing statistics and subsequent transformation). In addition the functions \code{\link[=fbetween]{fbetween/B}} and \code{\link[=fwithin]{fwithin/W}} and \code{\link[=fscale]{fscale/STD}} provide optimized solutions for frequent scaling, centering and averaging tasks. %\code{TRA} is really a programmers function for cases when both aggregate statistics and transformed data need to be retained, or to work with more complex statistics (i.e. together with \code{\link{dapply}} or \code{\link{BY}}). } \seealso{ \code{\link{sweep}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ v <- iris$Sepal.Length # A numeric vector f <- iris$Species # A factor dat <- num_vars(iris) # Numeric columns m <- qM(dat) # Matrix of numeric data head(TRA(v, fmean(v))) # Simple centering [same as fmean(v, TRA = "-") or W(v)] head(TRA(m, fmean(m))) # [same as sweep(m, 2, fmean(m)), fmean(m, TRA = "-") or W(m)] head(TRA(dat, fmean(dat))) # [same as fmean(dat, TRA = "-") or W(dat)] head(TRA(v, fmean(v), "replace")) # Simple replacing [same as fmean(v, TRA = "replace") or B(v)] head(TRA(m, fmean(m), "replace")) # [same as sweep(m, 2, fmean(m)), fmean(m, TRA = 1L) or B(m)] head(TRA(dat, fmean(dat), "replace")) # [same as fmean(dat, TRA = "replace") or B(dat)] head(TRA(m, fsd(m), "/")) # Simple scaling... [same as fsd(m, TRA = "/")]... # Note: All grouped examples also apply for v and dat... head(TRA(m, fmean(m, f), "-", f)) # Centering [same as fmean(m, f, TRA = "-") or W(m, f)] head(TRA(m, fmean(m, f), "replace", f)) # Replacing [same fmean(m, f, TRA = "replace") or B(m, f)] head(TRA(m, fsd(m, f), "/", f)) # Scaling [same as fsd(m, f, TRA = "/")] head(TRA(m, fmean(m, f), "-+", f)) # Centering on the overall mean ... # [same as fmean(m, f, TRA = "-+") or # W(m, f, mean = "overall.mean")] head(TRA(TRA(m, fmean(m, f), "-", f), # Also the same thing done manually !! fmean(m), "+")) # Grouped data method library(magrittr) iris \%>\% fgroup_by(Species) \%>\% TRA(fmean(.)) iris \%>\% fgroup_by(Species) \%>\% fmean(TRA = "-") # Same thing iris \%>\% fgroup_by(Species) \%>\% TRA(fmean(.)[c(2,4)]) # Only transforming 2 columns iris \%>\% fgroup_by(Species) \%>\% TRA(fmean(.)[c(2,4)], # Dropping species column keep.group_vars = FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/fmode.Rd0000644000176200001440000002246314777170130014226 0ustar liggesusers\name{fmode} \alias{fmode} \alias{fmode.default} \alias{fmode.matrix} \alias{fmode.data.frame} \alias{fmode.grouped_df} \title{Fast (Grouped, Weighted) Statistical Mode for Matrix-Like Objects} % Vectors, Matrix and Data Frame Columns} \description{ \code{fmode} is a generic function and returns the (column-wise) statistical mode i.e. the most frequent value of \code{x}, (optionally) grouped by \code{g} and/or weighted by \code{w}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped, weighted) mode. Ties between multiple possible modes can be resolved by taking the minimum, maximum, (default) first or last occurring mode. } \usage{ fmode(x, \dots) \method{fmode}{default}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, ties = "first", nthreads = .op[["nthreads"]], \dots) \method{fmode}{matrix}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "first", nthreads = .op[["nthreads"]], \dots) \method{fmode}{data.frame}(x, g = NULL, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, ties = "first", nthreads = .op[["nthreads"]], \dots) \method{fmode}{grouped_df}(x, w = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, keep.w = TRUE, stub = .op[["stub"]], ties = "first", nthreads = .op[["nthreads"]], \dots) } \arguments{ \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.} \item{w}{a numeric vector of (non-negative) weights, may contain missing values.} \item{TRA}{an integer or quoted operator indicating the transformation to perform: 0 - "na" | 1 - "fill" | 2 - "replace" | 3 - "-" | 4 - "-+" | 5 - "/" | 6 - "\%" | 7 - "+" | 8 - "*" | 9 - "\%\%" | 10 - "-\%\%". See \code{\link{TRA}}.} \item{na.rm}{logical. Skip missing values in \code{x}. Defaults to \code{TRUE} and implemented at very little computational cost. If \code{na.rm = FALSE}, \code{NA} is treated as any other value.} \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.} \item{ties}{an integer or character string specifying the method to resolve ties between multiple possible modes i.e. multiple values with the maximum frequency or sum of weights: \tabular{lllll}{\emph{ Int. } \tab\tab \emph{ String } \tab\tab \emph{ Description } \cr 1 \tab\tab "first" \tab\tab take the first occurring mode. \cr 2 \tab\tab "min" \tab\tab take the smallest of the possible modes. \cr 3 \tab\tab "max" \tab\tab take the largest of the possible modes. \cr 4 \tab\tab "last" \tab\tab take the last occurring mode. \cr } \emph{Note:} \code{"min"/"max"} don't work with character data. % For logical data \code{TRUE} will be chosen unless \code{ties = "min"}. See also Details. } \item{nthreads}{integer. The number of threads to utilize. Parallelism is across groups for grouped computations and at the column-level otherwise. } \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.} \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.} \item{keep.w}{\emph{grouped_df method:} Logical. Retain \code{sum} of weighting variable after computation (if contained in \code{grouped_df}).} \item{stub}{character. If \code{keep.w = TRUE} and \code{stub = TRUE} (default), the summed weights column is prefixed by \code{"sum."}. Users can specify a different prefix through this argument, or set it to \code{FALSE} to avoid prefixing.} \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.} } \details{ \code{fmode} implements a pretty fast C-level hashing algorithm inspired by the \emph{kit} package to find the statistical mode. % utilizing index- hashing implemented in the \code{Rcpp::sugar::IndexHash} class. %If all values are distinct, the first value is returned. If there are multiple distinct values having the top frequency, the first value established as having the top frequency when passing through the data from element 1 to element n is returned. If \code{na.rm = FALSE}, \code{NA} is not removed but treated as any other value (i.e. its frequency is counted). If all values are \code{NA}, \code{NA} is always returned. The weighted mode is computed by summing up the weights for all distinct values and choosing the value with the largest sum. If \code{na.rm = TRUE}, missing values will be removed from both \code{x} and \code{w} i.e. utilizing only \code{x[complete.cases(x,w)]} and \code{w[complete.cases(x,w)]}. It is possible that multiple values have the same mode (the maximum frequency or sum of weights). Typical cases are simply when all values are either all the same or all distinct. In such cases, the default option \code{ties = "first"} returns the first occurring value in the data reaching the maximum frequency count or sum of weights. For example in a sample \code{x = c(1, 3, 2, 2, 4, 4, 1, 7)}, the first mode is 2 as \code{fmode} goes through the data from left to right. \code{ties = "last"} on the other hand gives 1. It is also possible to take the minimum or maximum mode, i.e. \code{fmode(x, ties = "min")} returns 1, and \code{fmode(x, ties = "max")} returns 4. It should be noted that options \code{ties = "min"} and \code{ties = "max"} give unintuitive results for character data (no strict alphabetic sorting, similar to using \code{<} and \code{>} to compare character values in R). These options are also best avoided if missing values are counted (\code{na.rm = FALSE}) since no proper logical comparison with missing values is possible: With numeric data it depends, since in C++ any comparison with \code{NA_real_} evaluates to \code{FALSE}, \code{NA_real_} is chosen as the min or max mode only if it is also the first mode, and never otherwise. For integer data, \code{NA_integer_} is stored as the smallest integer in C++, so it will always be chosen as the min mode and never as the max mode. For character data, \code{NA_character_} is stored as the string \code{"NA"} in C++ and thus the behavior depends on the other character content. % \code{fmode} also implements a fast method for logical values which does not support the options \code{"first"/"last"} i.e. \code{TRUE} is returned unless \code{ties = "min"}. % This all seamlessly generalizes to grouped computations, which are performed by mapping the data to a sparse-array (except for logical values) and then going group-by group. \code{fmode} preserves all the attributes of the objects it is applied to (apart from names or row-names which are adjusted as necessary in grouped operations). If a data frame is passed to \code{fmode} and \code{drop = TRUE} (the default), \code{\link{unlist}} will be called on the result, which might not be sensible depending on the data at hand. } \value{ The (\code{w} weighted) statistical mode of \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its (grouped, weighed) mode. %See also Details. } \seealso{ \code{\link{fmean}}, \code{\link{fmedian}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview} } \examples{ x <- c(1, 3, 2, 2, 4, 4, 1, 7, NA, NA, NA) fmode(x) # Default is ties = "first" fmode(x, ties = "last") fmode(x, ties = "min") fmode(x, ties = "max") fmode(x, na.rm = FALSE) # Here NA is the mode, regardless of ties option fmode(x[-length(x)], na.rm = FALSE) # Not anymore.. ## World Development Data attach(wlddev) ## default vector method fmode(PCGDP) # Numeric mode head(fmode(PCGDP, iso3c)) # Grouped numeric mode head(fmode(PCGDP, iso3c, LIFEEX)) # Grouped and weighted numeric mode fmode(region) # Factor mode fmode(date) # Date mode (defaults to first value since panel is balanced) fmode(country) # Character mode (also defaults to first value) fmode(OECD) # Logical mode # ..all the above can also be performed grouped and weighted ## matrix method m <- qM(airquality) fmode(m) fmode(m, na.rm = FALSE) # NA frequency is also counted fmode(m, airquality$Month) # Groupwise fmode(m, w = airquality$Day) # Weighted: Later days in the month are given more weight fmode(m>50, airquality$Month) # Groupwise logical mode # etc.. ## data.frame method fmode(wlddev) # Calling unlist -> coerce to character vector fmode(wlddev, drop = FALSE) # Gives one row head(fmode(wlddev, iso3c)) # Grouped mode head(fmode(wlddev, iso3c, LIFEEX)) # Grouped and weighted mode detach(wlddev) } \keyword{univar} \keyword{manip} collapse/man/arithmetic.Rd0000644000176200001440000001243114777170130015257 0ustar liggesusers\name{arithmetic} \alias{arithmetic} \alias{\%rr\%} \alias{\%r+\%} \alias{\%r-\%} \alias{\%r*\%} \alias{\%r/\%} \alias{\%cr\%} \alias{\%c+\%} \alias{\%c-\%} \alias{\%c*\%} \alias{\%c/\%} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Row/Column Arithmetic for Matrix-Like Objects } \description{ Fast operators to perform row- or column-wise replacing and sweeping operations of vectors on matrices, data frames, lists. See also \code{\link{setop}} for math by reference and \code{\link{setTRA}} for sweeping by reference. } \usage{ ## Perform the operation with v and each row of X X \%rr\% v # Replace rows of X with v X \%r+\% v # Add v to each row of X X \%r-\% v # Subtract v from each row of X X \%r*\% v # Multiply each row of X with v X \%r/\% v # Divide each row of X by v ## Perform a column-wise operation between V and X X \%cr\% V # Replace columns of X with V X \%c+\% V # Add V to columns of X X \%c-\% V # Subtract V from columns of X X \%c*\% V # Multiply columns of X with V X \%c/\% V # Divide columns of X by V } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, matrix, data frame or list like object (with rows (r) columns (c) matching \code{v} / \code{V}).} \item{v}{for row operations: an atomic vector of matching \code{NCOL(X)}. If \code{X} is a data frame, \code{v} can also be a list of scalar atomic elements. It is also possible to sweep lists of vectors \code{v} out of lists of matrices or data frames \code{X}.} \item{V}{for column operations: a suitable scalar, vector, or matrix / data frame matching \code{NROW(X)}. \code{X} can also be a list of vectors / matrices in which case \code{V} can be a scalar / vector / matrix or matching list of scalars / vectors / matrices.} } \details{ With a matrix or data frame \code{X}, the default behavior of R when calling \code{X op v} (such as multiplication \code{X * v}) is to perform the operation of \code{v} with each column of \code{X}. The equivalent operation is performed by \code{X \%cop\% V}, with the difference that it computes significantly faster if \code{X}/\code{V} is a data frame / list. A more complex but frequently required task is to perform an operation with \code{v} on each row of \code{X}. This is provided based on efficient C++ code by the \code{\%rop\%} set of functions, e.g. \code{X \%r*\% v} efficiently multiplies \code{v} to each row of \code{X}. } \value{ \code{X} where the operation with \code{v} / \code{V} was performed on each row or column. All attributes of \code{X} are preserved. } \note{ \emph{Computations and Output:} These functions are all quite simple, they only work with \code{X} on the LHS i.e. \code{v \%op\% X} will likely fail. The row operations are simple wrappers around \code{\link{TRA}} which provides more operations including grouped replacing and sweeping (where \code{v} would be a matrix or data frame with less rows than \code{X} being mapped to the rows of \code{X} by grouping vectors). One consequence is that just like \code{\link{TRA}}, row-wise mathematical operations (+, -, *, /) always yield numeric output, even if both \code{X} and \code{v} may be integer. This is different for column- operations which depend on base R and may also preserve integer data. \emph{Rules of Arithmetic:} Since these operators are defined as simple infix functions, the normal rules of arithmetic are not respected. So \code{a \%c+\% b \%c*\% c} evaluates as \code{(a \%c+\% b) \%c*\% c}. As with all chained infix operations, they are just evaluated sequentially from left to right. \emph{Performance Notes:} The function \code{\link{setop}} and a related set of \code{\%op=\%} operators as well as the \code{\link{setTRA}} function can be used to perform these operations by reference, and are faster if copies of the output are not required!! Furthermore, for Fast Statistical Functions, using \code{fmedian(X, TRA = "-")} will be a tiny bit faster than \code{X \%r-\% fmedian(X)}. Also use \code{fwithin(X)} for fast centering using the mean, and \code{fscale(X)} for fast scaling and centering or mean-preserving scaling. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{setop}}, \code{\link{TRA}}, \code{\link{dapply}}, \link[=efficient-programming]{Efficient Programming}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Using data frame's / lists v <- mtcars$cyl mtcars \%cr\% v mtcars \%c-\% v mtcars \%r-\% seq_col(mtcars) mtcars \%r-\% lapply(mtcars, quantile, 0.28) mtcars \%c*\% 5 # Significantly faster than mtcars * 5 mtcars \%c*\% mtcars # Significantly faster than mtcars * mtcars ## Using matrices X <- qM(mtcars) X \%cr\% v X \%c-\% v X \%r-\% dapply(X, quantile, 0.28) ## Chained Operations library(magrittr) # Needed here to evaluate infix operators in sequence mtcars \%>\% fwithin() \%r-\% rnorm(11) \%c*\% 5 \%>\% tfm(mpg = fsum(mpg)) \%>\% qsu() } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{manip} \keyword{math} % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/collapse-renamed.Rd0000644000176200001440000000417214777170130016344 0ustar liggesusers\name{collapse-renamed} \alias{collapse-renamed} \alias{.COLLAPSE_OLD} \alias{fNobs} \alias{fNobs.default} \alias{fNobs.matrix} \alias{fNobs.data.frame} \alias{fNobs.grouped_df} \alias{fNdistinct} \alias{fNdistinct.default} \alias{fNdistinct.matrix} \alias{fNdistinct.data.frame} \alias{fNdistinct.grouped_df} \alias{fHDwithin} \alias{fHDwithin.default} \alias{fHDwithin.matrix} \alias{fHDwithin.data.frame} \alias{fHDwithin.pseries} \alias{fHDwithin.pdata.frame} \alias{fHDwithin.grouped_df} \alias{fHDbetween} \alias{fHDbetween.default} \alias{fHDbetween.matrix} \alias{fHDbetween.data.frame} \alias{fHDbetween.pseries} \alias{fHDbetween.pdata.frame} \alias{fHDbetween.grouped_df} \alias{replace_NA} \alias{replace_Inf} % \alias{pwNobs} % \alias{as.factor_GRP} % \alias{as.factor_qG} % \alias{is.GRP} % \alias{is.qG} % \alias{is.unlistable} % \alias{is.categorical} % \alias{is.Date} % \alias{as.character_factor} % \alias{as.numeric_factor} % \alias{Date_vars} % \alias{Date_vars<-} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Renamed Functions } \description{ These functions were renamed (mostly during v1.6.0 update) to make the namespace more consistent. % Except for the S3 generics of \code{fNobs}, \code{fNdistinct}, \code{fHDbetween} and \code{fHDwithin}, and functions \code{replace_NA} and \code{replace_Inf}, I intend to remove all of these functions by end of 2023. %The S3 generics and the other functions will be depreciated in 2023 for the earliest. These all now give a message reminding you not to use them in fresh code. } \section{Renaming}{\if{html}{\out{
}}\preformatted{ fNobs -> fnobs fNdistinct -> fndistinct fHDwithin -> fhdwithin fHDbetween -> fhdbetween replace_NA -> replace_na replace_Inf -> replace_inf % pwNobs -> pwnobs % as.factor_GRP -> as_factor_GRP % as.factor_qG -> as_factor_qG % is.GRP -> is_GRP % is.qG -> is_qG % is.unlistable -> is_unlistable % is.categorical -> is_categorical % is.Date -> is_date % as.numeric_factor -> as_numeric_factor % as.character_factor -> as_character_factor % Date_vars -> date_vars % `Date_vars<-` -> `date_vars<-` }\if{html}{\out{
}} } collapse/man/collapse-documentation.Rd0000644000176200001440000003601315202504365017574 0ustar liggesusers\name{collapse-documentation} \alias{A0-collapse-documentation} \alias{collapse-documentation} \alias{.COLLAPSE_TOPICS} \alias{.COLLAPSE_ALL} \alias{.COLLAPSE_GENERIC} \alias{.COLLAPSE_DATA} % \docType{package} \title{Collapse Documentation & Overview} \description{ The following table fully summarizes the contents of \emph{\link{collapse}}. The documentation is structured hierarchically: This is the main overview page, linking to topical overview pages and associated function pages (unless functions are documented on the topic page). % Calling \code{?FUN} brings up the documentation page for \code{FUN}, with links to associated topic pages and closely related functions. % Calling \code{help(FUN)} still brings up the right / most relevant page documenting the function. % % Functions with separate documentation entries (apart from the topic page) are linked. % Each topic further has its own overview page in the documentation. % , linking to functions % , i.e. only functions with separate pages are linked here } \section{Topics and Functions}{ \tabular{lllll}{ \emph{ Topic } \tab\tab \emph{ Main Features / Keywords} \tab\tab \emph{ Functions } \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \link[=fast-statistical-functions]{Fast Statistical Functions} \tab\tab Fast (grouped and weighted) statistical functions for vector, matrix, data frame and grouped data frames (class 'grouped_df', \emph{dplyr} compatible). \tab\tab \code{\link{fsum}}, \code{\link{fprod}}, \code{\link{fmean}}, \code{\link{fmedian}}, \code{\link{fmode}}, \code{\link{fvar}}, \code{\link{fsd}}, \code{\link{fmin}}, \code{\link{fmax}}, \code{\link{fnth}}, \code{\link{ffirst}}, \code{\link{flast}}, \code{\link{fnobs}}, \code{\link{fndistinct}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=fast-grouping-ordering]{Fast Grouping and Ordering} \tab\tab Fast (ordered) groupings from vectors, data frames, lists. 'GRP' objects are efficient inputs for programming with \emph{collapse}'s fast functions. \code{fgroup_by} can attach them to a data frame, for fast dplyr-style grouped computations. Fast splitting of vectors based on 'GRP' objects. Fast radix-based ordering and hash-based grouping (the workhorses behind \code{GRP}). Fast matching (rows) and unique values/rows, group counts, factor generation, vector grouping, interactions, dropping unused factor levels, generalized run-length type grouping and grouping of integer sequences and time vectors. % (to optimize different / repeated computations over the same groups). \tab\tab \code{\link{GRP}}, \code{\link{as_factor_GRP}}, \code{\link{GRPN}}, \code{\link{GRPid}}, \code{\link{GRPnames}}, \code{\link{is_GRP}}, \code{\link{fgroup_by}}, \code{\link{group_by_vars}}, \code{\link{fgroup_vars}}, \code{\link{fungroup}}, \code{\link{gsplit}}, \code{\link{greorder}}, \code{\link[=radixorder]{radixorder(v)}}, \code{\link[=group]{group(v)}}, \code{\link{fmatch}}, \code{\link{ckmatch}}, \code{\link[=fmatch]{\%!in\%}}, \code{\link[=fmatch]{\%[!]iin\%}}, \code{\link{funique}}, \code{\link{fnunique}}, \code{\link{fduplicated}}, \code{\link{any_duplicated}}, \code{\link[=fcount]{fcount(v)}}, \code{\link{qF}}, \code{\link{qG}}, \code{\link{is_qG}}, \code{\link{finteraction}}, \code{\link{fdroplevels}}, \code{\link{groupid}}, \code{\link{seqid}}, \code{\link{timeid}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr % \code{GRP} creates 'GRP' objects, and \code{fgroup_by} can be used to attach them to a data frame (analogous to \code{dplyr::group_by}) % (speed about 2x '[' for selecting and 4x '[<-' for replacing). %, get data, variables names, variable indices \link[=fast-data-manipulation]{Fast Data Manipulation} \tab\tab Fast and flexible select, subset, slice, summarise, mutate/transform, sort/reorder, combine, join, reshape, rename and relabel data. Some functions modify by reference and/or allow assignment. In addition a set of (standard evaluation) functions for fast selecting, replacing or adding data frame columns, including shortcuts to select and replace variables by data type. \tab\tab \code{\link[=fselect]{fselect(<-)}}, \code{\link[=fsubset]{fsubset/ss}}, \code{\link[=fslice]{fslice(v)}}, \code{\link{fsummarise}}, \code{\link{fmutate}}, \code{\link{across}}, \code{\link[=ftransform]{(f/set)transform(v)(<-)}}, \code{\link[=fcompute]{fcompute(v)}}, \code{\link[=roworder]{roworder(v)}}, \code{\link[=colorder]{colorder(v)}}, \code{\link{rowbind}}, \code{\link{join}}, \code{\link{pivot}}, \code{\link[=frename]{(f/set)rename}}, \code{\link[=relabel]{(set)relabel}}, \code{\link[=get_vars]{get_vars(<-)}}, \code{\link[=add_vars]{add_vars(<-)}}, \code{\link[=num_vars]{num_vars(<-)}}, \code{\link[=cat_vars]{cat_vars(<-)}}, \code{\link[=char_vars]{char_vars(<-)}}, \code{\link[=fact_vars]{fact_vars(<-)}}, \code{\link[=logi_vars]{logi_vars(<-)}}, \code{\link[=date_vars]{date_vars(<-)}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=quick-conversion]{Quick Data Conversion} \tab\tab Quick conversions: data.frame <> data.table <> tibble <> matrix (row- or column-wise) <> list | array > matrix, data.frame, data.table, tibble | vector > factor, matrix, data.frame, data.table, tibble; and converting factors / all factor columns. \tab\tab \code{qDF}, \code{qDT}, \code{qTBL}, \code{qM}, \code{qF}, \code{mrtl}, \code{mctl}, \code{as_numeric_factor}, \code{as_integer_factor}, \code{as_character_factor} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=advanced-aggregation]{Advanced Data Aggregation} \tab\tab Fast and easy (weighted and parallelized) aggregation of multi-type data, with different functions applied to numeric and categorical variables. Custom specifications allow mappings of functions to variables + renaming. \tab\tab \code{collap(v/g)} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=data-transformations]{Data Transformations} \tab\tab Fast row- and column- arithmetic and (object preserving) apply functionality for vectors, matrices and data frames. Fast (grouped) replacing and sweeping of statistics (by reference) and (grouped and weighted) scaling / standardizing, (higher-dimensional) between- and within-transformations (i.e. averaging and centering), linear prediction and partialling out. %Additional methods for grouped_df (\emph{dplyr}) and pseries, pdata.frame (\emph{plm}). \tab\tab \code{\link[=arithmetic]{\%(r/c)r\%}}, \code{\link[=arithmetic]{\%(r/c)(+/-/*//)\%}}, \code{\link{dapply}}, \code{\link{BY}}, \code{\link[=TRA]{(set)TRA}}, \code{\link[=fscale]{fscale/STD}}, \code{\link[=fbetween]{fbetween/B}}, \code{\link[=fwithin]{fwithin/W}}, \code{\link[=HDB]{fhdbetween/HDB}}, \code{\link[=HDW]{fhdwithin/HDW}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr Linear Models \tab\tab Fast (weighted) linear model fitting with 6 different solvers and a fast F-test to test exclusion restrictions on linear models with (large) factors. \tab\tab \code{\link{flm}}, \code{\link{fFtest}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=time-series-panel-series]{Time Series and Panel Series} \tab\tab Fast and class-agnostic indexed time series and panel data objects, check for irregularity in time series and panels, and efficient time-sequence to integer/factor conversion. Fast (sequences of) lags / leads and (lagged / leaded and iterated, quasi-, log-) differences, and (compounded) growth rates on (irregular) time series and panel data. Flexible cumulative sums. Panel data to array conversions. Multivariate panel- auto-, partial- and cross-correlation functions. %Additional methods for grouped_df (\emph{dplyr}) and pseries, pdata.frame (\emph{plm}). \tab\tab \code{\link{findex_by}}, \code{\link{findex}}, \code{\link{unindex}}, \code{\link{reindex}}, \code{\link{is_irregular}}, \code{\link{to_plm}}, \code{\link{timeid}}, \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}}, \code{\link[=fgrowth]{fgrowth/G}}, \code{\link{fcumsum}}, \code{\link{psmat}}, \code{\link{psacf}}, \code{\link{pspacf}}, \code{\link{psccf}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=summary-statistics]{Summary Statistics} \tab\tab Fast (grouped and weighted) summary statistics for cross-sectional and panel data. Fast (weighted) cross tabulation. Efficient detailed description of data frame. Fast check of variation in data (within groups / dimensions). (Weighted) pairwise correlations and covariances (with obs. and p-value), pairwise observation count. %Some additional methods for grouped_df (\emph{dplyr}) pseries and pdata.frame (\emph{plm}). \tab\tab \code{\link{qsu}}, \code{\link{qtab}}, \code{\link{descr}}, \code{\link{varying}}, \code{\link{pwcor}}, \code{\link{pwcov}}, \code{\link{pwnobs}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr Other Statistical \tab\tab Fast euclidean distance computations, (weighted) sample quantiles, and range of vector. \tab\tab \code{\link{fdist}}, \code{\link{fquantile}}, \code{\link{frange}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=list-processing]{List Processing} \tab\tab (Recursive) list search and checks, extraction of list-elements / list-subsetting, fast (recursive) splitting, list-transpose, apply functions to lists of data frames / data objects, and generalized recursive row-binding / unlisting in 2-dimensions / to data frame. \tab\tab \code{\link{is_unlistable}}, \code{\link{ldepth}}, \code{\link{has_elem}}, \code{\link{get_elem}}, \code{\link[=atomic_elem]{atomic_elem(<-)}}, \code{\link[=list_elem]{list_elem(<-)}}, \code{\link{reg_elem}}, \code{\link{irreg_elem}}, \code{\link{rsplit}}, \code{\link{t_list}}, \code{\link{rapply2d}}, \code{\link{unlist2d}}, \code{\link{rowbind}} \cr % (within- and between-groups) ww %Visual Data Exploration \tab\tab Pretty (grouped, weighted, and panel-decomposed) histogram-, density-, scatter- and correlation plots \tab\tab histplot, densplot, scplot, corplot \cr \link[=recode-replace]{Recode and Replace Values} \tab\tab Recode multiple values (exact or regex matching) and replace \code{NaN/Inf/-Inf} and outliers (according to 1- or 2-sided threshold or standard-deviations) in vectors, matrices or data frames. Insert a value at arbitrary positions into vectors, matrices or data frames. \tab\tab \code{recode_num}, \code{recode_char}, \code{replace_na}, \code{replace_inf}, \code{replace_outliers}, \code{\link{pad}} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=efficient-programming]{(Memory) Efficient Programming} \tab\tab Efficient comparisons of a vector/matrix with a value, and replacing values/rows in vector/matrix/DF (avoiding logical vectors or subsets), faster generation of initialized vectors, and fast mathematical operations on vectors/matrices/DF's with no copies at all. Fast missing value detection, (random) insertion and removal/replacement, lengths and C storage types, greatest common divisor of vector, \code{nlevels} for factors, \code{nrow}, \code{ncol}, \code{dim} (for data frames) and \code{seq_along} rows or columns. Fast vectorization of matrices and lists, and choleski inverse of symmetric PD matrix. \tab\tab \code{anyv}, \code{allv}, \code{allNA}, \code{whichv}, \code{whichNA}, \code{\%==\%}, \code{\%!=\%}, \code{copyv}, \code{setv}, \code{alloc}, \code{setop}, \code{\%+=\%}, \code{\%-=\%}, \code{\%*=\%}, \code{\%/=\%}, \code{missing_cases}, \code{na_insert}, \code{na_rm}, \code{na_locf}, \code{na_focb}, \code{na_omit}, \code{vlengths}, \code{vtypes}, \code{vgcd}, \code{fnlevels}, \code{fnrow}, \code{fncol}, \code{fdim}, \code{seq_row}, \code{seq_col}, \code{vec}, \code{cinv} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=small-helpers]{Small (Helper) Functions} \tab\tab Multiple-assignment, non-standard concatenation, set and extract variable labels and classes, display variable names and labels together, add / remove prefix or postfix to / from column names, check exact or near / numeric equality of multiple objects or of all elements in a list, get names of functions called in an expression, return object with dimnames, row- or colnames efficiently set, or with all attributes removed, C-level functions to set and shallow-copy attributes, identify categorical (non-numeric) and date(-time) objects. \tab\tab \code{massign}, \code{\%=\%}, \code{.c}, \code{vlabels(<-)}, \code{setLabels}, \code{vclasses}, \code{namlab}, \code{add_stub}, \code{rm_stub}, \code{all_identical}, \code{all_obj_equal}, \code{all_funs}, \code{setDimnames}, \code{setRownames}, \code{setColnames}, \code{unattrib}, \code{setAttrib}, \code{setattrib}, \code{copyAttrib}, \code{copyMostAttrib}, \code{is_categorical}, \code{is_date} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr Data and Global Macros \tab\tab Groningen Growth and Development Centre 10-Sector Database, World Bank World Development dataset, and some global macros containing links to the topical documentation pages (including this page), all exported objects (excluding exported S3 methods and depreciated functions), all generic functions (excluding depreciated), the 2 datasets, depreciated functions, all fast functions, all fast statistical (scalar-valued) functions, and all transformation operators (these are not infix functions but function shortcuts resembling operators in a statistical sense, such as the lag/lead operators \code{L}/\code{F}, both wrapping \code{flag}, see \code{\link{.OPERATOR_FUN}}). \tab\tab \code{\link{GGDC10S}, \link{wlddev}, .COLLAPSE_TOPICS, .COLLAPSE_ALL, .COLLAPSE_GENERIC, .COLLAPSE_DATA, .COLLAPSE_OLD, .FAST_FUN, .FAST_STAT_FUN, .OPERATOR_FUN} \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr \link[=collapse-options]{Package Options} \tab\tab \code{set_collapse}/\code{get_collapse} can be used to globally set/get the defaults for \code{na.rm}, \code{nthreads} and \code{sort}, etc., arguments found in many functions, and to globally control the namespace with options 'mask' and 'remove': 'mask' can be used to mask base R/dplyr functions by export copies of equivalent \emph{collapse} functions starting with \code{"f"}, removing the leading \code{"f"} (e.g. exporting \code{subset <- fsubset}). 'remove' allows removing arbitrary functions from the exported namespace. \code{options("collapse_unused_arg_action")} sets the action taken by generic statistical functions when unknown arguments are passed to a method. The default is \code{"warning"}. \tab\tab \code{set_collapse}, \code{get_collapse} } } \section{Details}{ The added top-level documentation infrastructure in \emph{collapse} allows you to effectively navigate the package. % (as in other commercial software documentations like Mathematica). Calling \code{?FUN} brings up the documentation page documenting the function, which contains links to associated topic pages and closely related functions. You can also call topical documentation pages directly from the console. The links to these pages are contained in the global macro \code{.COLLAPSE_TOPICS} (e.g. calling \code{help(.COLLAPSE_TOPICS[1])} brings up this page). } \author{ \bold{Maintainer}: Sebastian Krantz \email{sebastian.krantz@graduateinstitute.ch} } \references{ Krantz S (2026). \emph{collapse}: Advanced and Fast Statistical Computing and Data Transformation in R. \emph{Journal of Statistical Software} \bold{116}(1), 1--38. \doi{10.18637/jss.v116.i01} } \seealso{ \link{collapse-package} } % \keyword{package} \keyword{documentation} collapse/man/recode-replace.Rd0000644000176200001440000002005314777170130015777 0ustar liggesusers\name{recode-replace} \alias{AA1-recode-replace} \alias{recode-replace} \alias{recode_num} \alias{recode_char} \alias{replace_na} \alias{replace_inf} \alias{replace_outliers} \title{ Recode and Replace Values in Matrix-Like Objects } \description{ A small suite of functions to efficiently perform common recoding and replacing tasks in matrix-like objects. } \usage{ recode_num(X, \dots, default = NULL, missing = NULL, set = FALSE) recode_char(X, \dots, default = NULL, missing = NULL, regex = FALSE, ignore.case = FALSE, fixed = FALSE, set = FALSE) replace_na(X, value = 0, cols = NULL, set = FALSE, type = "const") replace_inf(X, value = NA, replace.nan = FALSE, set = FALSE) replace_outliers(X, limits, value = NA, single.limit = c("sd", "mad", "min", "max"), ignore.groups = FALSE, set = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a vector, matrix, array, data frame or list of atomic objects. \code{replace_outliers} has internal methods for \link[=fgroup_by]{grouped} and \link[=findex_by]{indexed} data.} \item{\dots}{comma-separated recode arguments of the form: \code{value = replacement, `2` = 0, Secondary = "SEC"} etc. \code{recode_char} with \code{regex = TRUE} also supports regular expressions i.e. \code{`^S|D$` = "STD"} etc.} \item{default}{optional argument to specify a scalar value to replace non-matched elements with.} \item{missing}{optional argument to specify a scalar value to replace missing elements with. \emph{Note} that to increase efficiency this is done before the rest of the recoding i.e. the recoding is performed on data where missing values are filled!} \item{set}{logical. \code{TRUE} does replacements by reference (i.e. in-place modification of the data) and returns the result invisibly.} \item{type}{character. One of \code{"const"}, \code{"locf"} (last non-missing observation carried forward) or \code{"focb"} (first non-missing observation carried back). The latter two ignore \code{value}.} \item{regex}{logical. If \code{TRUE}, all recode-argument names are (sequentially) passed to \code{\link{grepl}} as a pattern to search \code{X}. All matches are replaced. \emph{Note} that \code{NA}'s are also matched as strings by \code{grepl}. } \item{value}{a single (scalar) value to replace matching elements with. In \code{replace_outliers} setting \code{value = "clip"} will replace outliers with the corresponding threshold values. See Examples.} \item{cols}{select columns to replace missing values in using a function, column names, indices or a logical vector.} \item{replace.nan}{logical. \code{TRUE} replaces \code{NaN/Inf/-Inf}. \code{FALSE} (default) replaces only \code{Inf/-Inf}.} \item{limits}{either a vector of two-numeric values \code{c(minval, maxval)} constituting a two-sided outlier threshold, or a single numeric value:} \item{single.limit}{character, controls the behavior if \code{length(limits) == 1}: \itemize{ \item \code{"sd"/"mad":} \code{limits} will be interpreted as a (two-sided) outlier threshold in terms of (column) standard deviations/median absolute deviations. For the standard deviation this is equivalent to \code{X[abs(fscale(X)) > limits] <- value}. Since \code{fscale} is S3 generic with methods for 'grouped_df', 'pseries' and 'pdata.frame', the standardizing will be grouped if such objects are passed (i.e. the outlier threshold is then measured in within-group standard deviations) unless \code{ignore.groups = TRUE}. The same holds for median absolute deviations. \item \code{"min"/"max":} \code{limits} will be interpreted as a (one-sided) minimum/maximum threshold. The underlying code is equivalent to \code{X[X limits] <- value}. } } \item{ignore.groups}{logical. If \code{length(limits) == 1} and \code{single.limit \%in\% c("sd", "mad")} and \code{X} is a 'grouped_df', 'pseries' or 'pdata.frame', \code{TRUE} will ignore the grouped nature of the data and calculate outlier thresholds on the entire dataset rather than within each group.} \item{ignore.case, fixed}{logical. Passed to \code{\link{grepl}} and only applicable if \code{regex = TRUE}.} } \details{ \itemize{ \item \code{recode_num} and \code{recode_char} can be used to efficiently recode multiple numeric or character values, respectively. The syntax is inspired by \code{dplyr::recode}, but the functionality is enhanced in the following respects: (1) when passed a data frame / list, all appropriately typed columns will be recoded. (2) They preserve the attributes of the data object and of columns in a data frame / list, and (3) \code{recode_char} also supports regular expression matching using \code{\link{grepl}}. \item \code{replace_na} efficiently replaces \code{NA/NaN} with a value (default is \code{0}). data can be multi-typed, in which case appropriate columns can be selected through the \code{cols} argument. For numeric data a more versatile alternative is provided by \code{data.table::nafill} and \code{data.table::setnafill}. \item \code{replace_inf} replaces \code{Inf/-Inf} (or optionally \code{NaN/Inf/-Inf}) with a value (default is \code{NA}). It skips non-numeric columns in a data frame. \item \code{replace_outliers} replaces values falling outside a 1- or 2-sided numeric threshold or outside a certain number of standard deviations or median absolute deviation with a value (default is \code{NA}). It skips non-numeric columns in a data frame. } } % \value{ %% ~Describe the value returned %% If it is a LIST, use %% \item{comp1 }{Description of 'comp1'} %% \item{comp2 }{Description of 'comp2'} %% \dots % } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } \note{ These functions are not generic and do not offer support for factors or date(-time) objects. see \code{dplyr::recode_factor}, \emph{forcats} and other appropriate packages for dealing with these classes. Simple replacing tasks on a vector can also effectively be handled by, \code{\link{setv}} / \code{\link{copyv}}. Fast vectorized switches are offered by package \emph{kit} (functions \code{iif}, \code{nif}, \code{vswitch}, \code{nswitch}) as well as \code{data.table::fcase} and \code{data.table::fifelse}. Using switches is more efficient than \code{recode_*}, as \code{recode_*} creates an internal copy of the object to enable cross-replacing. Function \code{\link{TRA}}, and the associated \code{TRA} ('transform') argument to \link[=fast-statistical-functions]{Fast Statistical Functions} also has option \code{"replace_na"}, to replace missing values with a statistic computed on the non-missing observations, e.g. \code{fmedian(airquality, TRA = "replace_na")} does median imputation. } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{pad}}, \link[=efficient-programming]{Efficient Programming}, \link[=collapse-documentation]{Collapse Overview} } \examples{ recode_char(c("a","b","c"), a = "b", b = "c") recode_char(month.name, ber = NA, regex = TRUE) mtcr <- recode_num(mtcars, `0` = 2, `4` = Inf, `1` = NaN) replace_inf(mtcr) replace_inf(mtcr, replace.nan = TRUE) replace_outliers(mtcars, c(2, 100)) # Replace all values below 2 and above 100 w. NA replace_outliers(mtcars, c(2, 100), value = "clip") # Clipping outliers to the thresholds replace_outliers(mtcars, 2, single.limit = "min") # Replace all value smaller than 2 with NA replace_outliers(mtcars, 100, single.limit = "max") # Replace all value larger than 100 with NA replace_outliers(mtcars, 2) # Replace all values above or below 2 column- # standard-deviations from the column-mean w. NA replace_outliers(fgroup_by(iris, Species), 2) # Passing a grouped_df, pseries or pdata.frame # allows to remove outliers according to # in-group standard-deviation. see ?fscale } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{documentation} collapse/man/pivot.Rd0000644000176200001440000004641015202400476014264 0ustar liggesusers\name{pivot} \alias{pivot} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast and Easy Data Reshaping } \description{ \code{pivot()} is \emph{collapse}'s data reshaping command. It combines longer-, wider-, and recast-pivoting functionality in a single parsimonious API. Notably, it can also accommodate variable labels. } \usage{ pivot(data, # Summary of Documentation: ids = NULL, # identifier cols to preserve values = NULL, # cols containing the data names = NULL, # name(s) of new col(s) | col(s) containing names labels = NULL, # name of new labels col | col(s) containing labels how = "longer", # method: "longer"/"l", "wider"/"w" or "recast"/"r" na.rm = FALSE, # remove rows missing 'values' in reshaped data factor = c("names", "labels"), # create new id col(s) as factor variable(s)? check.dups = FALSE, # detect duplicate 'ids'+'names' combinations # Only apply if how = "wider" or "recast" FUN = "last", # aggregation function (internal or external) FUN.args = NULL, # list of arguments passed to aggregation function nthreads = .op[["nthreads"]], # minor gains as grouping remains serial fill = NULL, # value to insert for unbalanced data (default NA/NULL) drop = TRUE, # drop unused levels (=columns) if 'names' is factor sort = FALSE, # "ids": sort 'ids' and/or "names": alphabetic casting # Only applies if how = "wider" with multiple long columns ('values') transpose = FALSE # "columns": applies t_list() before flattening, and/or ) # "names": sets names nami_colj. default: colj_nami } %- maybe also 'usage' for other objects documented here. \arguments{ \item{data}{data frame-like object (list of equal-length columns).} \item{ids}{identifier columns to keep. Specified using column names, indices, a logical vector or an identifier function e.g. \code{\link{is_categorical}}.} \item{values}{columns containing the data to be reshaped. Specified like \code{ids}. } \item{names}{names of columns to generate, or retrieve variable names from: \tabular{lll}{\code{ how } \tab\tab \emph{ Description } \cr \tab\tab \cr \code{"longer"} \tab\tab list of names for the variable and value column in the long format, respectively. If \code{NULL}, \code{list("variable", "value")} will be chosen. Alternatively, a named list length 1 or 2 can be provided using "variable"/"value" as keys e.g. \code{list(value = "data_col")}. \cr \tab\tab \cr \code{ "wider"} \tab\tab column(s) containing names of the new variables. Specified using a vector of column names, indices, a logical vector or selector function e.g. \code{is.character}. Multiple columns will be combined using \code{\link{finteraction}} with \code{"_"} as separator. \cr \tab\tab \cr \code{ "recast"} \tab\tab (named) list with the following elements: [[1]]/[["from"]] - column(s) containing names of the new variables, specified as in \code{"wider"}; [[2]]/[["to"]] - name of the variable to generate containing old column names. If \code{NULL}, \code{list("variable", "variable")} will be chosen. } } \item{labels}{ names of columns to generate, or retrieve variable labels from: \tabular{lll}{\code{ how } \tab\tab \emph{ Description } \cr \tab\tab \cr \code{"longer"} \tab\tab A string specifying the name of the column to store labels - retrieved from the data using \code{vlabels(values)}. \code{TRUE} will create a column named \code{"label"}. Alternatively, a (named) list with two elements: [[1]]/[["name"]] - the name of the labels column; [[2]]/[["new"]] - a (named) character vector of new labels for the 'variable', 'label' and 'value' columns in the long-format frame. See Examples. \cr \tab\tab \cr \code{ "wider"} \tab\tab column(s) containing labels of the new variables. Specified using a vector of column names, indices, a logical vector or selector function e.g. \code{is.character}. Multiple columns will be combined using \code{\link{finteraction}} with \code{" - "} as separator. \cr \tab\tab \cr \code{ "recast"} \tab\tab (named) list with the following elements: [[1]]/[["from"]] - column(s) containing labels for the new variables, specified as in \code{"wider"}; [[2]]/[["to"]] - name of the variable to generate containing old labels; [[3]]/[["new"]] - a (named) character vector of new labels for the generated 'variable' and 'label' columns. If [[1]]/[["from"]] is not supplied, this can also include labels for new variables. Omitting one of the elements via a named list or setting it to \code{NULL} in a list of 3 will omit the corresponding operation i.e. either not saving existing labels or not assigning new ones. } } \item{how}{ character. The pivoting method: one of \code{"longer"}, \code{"wider"} or \code{"recast"}. These can be abbreviated by the first letter i.e. \code{"l"/"w"/"r"}. } \item{na.rm}{ logical. \code{TRUE} will remove missing values such that in the reshaped data there is no row missing all data columns - selected through 'values'. For wide/recast pivots using internal \code{FUN}'s \code{"first"/"last"/"count"}, this also toggles skipping of missing values. } \item{factor}{ character. Whether to generate new 'names' and/or 'labels' columns as factor variables. This is generally recommended as factors are more memory efficient than character vectors and also faster in subsequent filtering and grouping. Internally, this argument is evaluated as \code{factor <- c("names", "labels") \%in\% factor}, so passing anything other than \code{"names"} and/or \code{"labels"} will disable it. } \item{check.dups}{ logical. \code{TRUE} checks for duplicate 'ids'+'names' combinations, and, if 'labels' are specified, also for duplicate 'names'+'labels' combinations. The default \code{FALSE} implies that the algorithm just runs through the data, leading effectively to the \code{FUN} option to be executed (default last value). See Details. } \item{FUN}{ function to aggregate values. At present, only a single function is allowed. \link[=fast-statistical-functions]{Fast Statistical Functions} receive vectorized execution. For maximum efficiency, a small set of internal functions is provided: \code{"first"}, \code{"last"}, \code{"count"}, \code{"sum"}, \code{"mean"}, \code{"min"}, or \code{"max"}. In options \code{"first"/"last"/"count"} setting \code{na.rm = TRUE} skips missing values. In options \code{"sum"/"mean"/"min"/"max"} missing values are always skipped (see Details why). The \code{fill} argument is ignored in \code{"count"/"sum"/"mean"/"min"/"max"} (\code{"count"/"sum"} force \code{fill = 0} else \code{NA} is used). } \item{FUN.args}{ (optional) list of arguments passed to \code{FUN} (if using an external function). Data-length arguments such as weight vectors are supported. } \item{nthreads}{ integer. if \code{how = "wider"|"recast"}: number of threads to use with OpenMP (default \code{get_collapse("nthreads")}, initialized to 1). Only the distribution of values to columns with \code{how = "wider"|"recast"} is multithreaded here. Since grouping id columns on a long data frame is expensive and serial, the gains are minor. With \code{how = "long"}, multithreading does not make much sense as the most expensive operation is allocating the long results vectors. The rest is a couple of \code{memset()}'s in C to copy the values. } \item{fill}{if \code{how = "wider"|"recast"}: value to insert for 'ids'-'names' combinations not present in the long format. \code{NULL} uses \code{NA} for atomic vectors and \code{NULL} for lists. } \item{drop}{ logical. if \code{how = "wider"|"recast"} and 'names' is a single factor variable: \code{TRUE} will check for and drop unused levels in that factor, avoiding the generation of empty columns. } \item{sort}{ if \code{how = "wider"|"recast"}: specifying \code{"ids"} applies ordered grouping on the id-columns, returning data sorted by ids. Specifying \code{"names"} sorts the names before casting (unless 'names' is a factor), yielding columns cast in alphabetic order. Both options can be passed as a character vector, or, alternatively, \code{TRUE} can be used to enable both. } \item{transpose}{ if \code{how = "wider"|"recast"} and multiple columns are selected through 'values': specifying \code{"columns"} applies \code{\link{t_list}} to the result before flattening, resulting in a different column order. Specifying \code{"names"} generates names of the form nami_colj, instead of colj_nami. Both options can be passed as a character vector, or, alternatively, \code{TRUE} can be used to enable both. } } \details{ Pivot wider essentially works as follows: compute \code{g_rows = group(ids)} and also \code{g_cols = group(names)} (using \code{\link{group}} if \code{sort = FALSE}). \code{g_rows} gives the row-numbers of the wider data frame and \code{g_cols} the column numbers. Then, a C function generates a wide data frame and runs through each long column ('values'), assigning each value to the corresponding row and column in the wide frame. In this process \code{FUN} is always applied. The default, \code{"last"}, does nothing at all, i.e., if there are duplicates, some values are overwritten. \code{"first"} works similarly just that the C-loop is executed the other way around. The other hard-coded options count, sum, average, or compare observations on the fly. Missing values are internally skipped for statistical functions as there is no way to distinguish an incoming \code{NA} from an initial \code{NA} - apart from counting occurrences using an internal structure of the same size as the result data frame which is costly and thus not implemented. When passing an R-function to \code{FUN}, the data is grouped using \code{g_full = group(g_rows, g_cols)}, aggregated by groups, and expanded again to full length using \code{\link{TRA}} before entering the reshaping algorithm. Thus, this is significantly more expensive than the optimized internal functions. With \link[=fast-statistical-functions]{Fast Statistical Functions} the aggregation is vectorized across groups, other functions are applied using \code{\link{BY}} - by far the slowest option. % Since the algorithm runs through the data from first to last row, this amounts to removing duplicates by taking the last value for each set of 'ids' - in first-appearance order. If \code{check.dups = TRUE}, a check of the form \code{fnunique(list(g_rows, g_cols)) < fnrow(data)} is run, and an informative warning is issued if duplicates are found. Recast pivoting works similarly. In long pivots \code{FUN} is ignored and the check simply amounts to \code{fnunique(ids) < fnrow(data)}. % Making this check optional ensures greater performance, but also requires the user to exercise discretion i.e. know your data or invoke the check. } \value{ A reshaped data frame with the same class and attributes (except for 'names'/'row-names') as the input frame. } \note{ Leaving either 'ids' or 'values' empty will assign all other columns (except for \code{"variable"} if \code{how = "wider"|"recast"}) to the non-specified argument. It is also possible to leave both empty, e.g. for complete melting if \code{how = "longer"} or data transposition if \code{how = "recast"} (similar to \code{data.table::transpose} but supporting multiple names columns and variable labels). See Examples. \code{pivot} currently does not support concurrently melting/pivoting longer to multiple columns. See \code{data.table::melt} or \code{pivot_longer} from \emph{tidyr} or \emph{tidytable} for an efficient alternative with this feature. It is also possible to achieve this with just a little bit of programming. An example is provided below. % Currently I don't see a 'complete' (like \code{pivot_longer}) way of including this feature in the API, and in general I don't see this as a very well-defined operation. But I am open to suggestions. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{collap}}, \code{\link{vec}}, \code{\link{rowbind}}, \code{\link{unlist2d}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # -------------------------------- PIVOT LONGER --------------------------------- # Simple Melting (Reshaping Long) pivot(mtcars) |> head() pivot(iris, "Species") |> head() pivot(iris, values = 1:4) |> head() # Same thing # Using collapse's datasets head(wlddev) pivot(wlddev, 1:8, na.rm = TRUE) |> head() pivot(wlddev, c("iso3c", "year"), c("PCGDP", "LIFEEX"), na.rm = TRUE) |> head() head(GGDC10S) pivot(GGDC10S, 1:5, names = list("Sectorcode", "Value"), na.rm = TRUE) |> head() # Can also set by name: variable and/or value. Note that 'value' here remains lowercase pivot(GGDC10S, 1:5, names = list(variable = "Sectorcode"), na.rm = TRUE) |> head() # Melting including saving labels pivot(GGDC10S, 1:5, na.rm = TRUE, labels = TRUE) |> head() pivot(GGDC10S, 1:5, na.rm = TRUE, labels = "description") |> head() # Also assigning new labels pivot(GGDC10S, 1:5, na.rm = TRUE, labels = list("description", c("Sector Code", "Sector Description", "Value"))) |> namlab() # Can leave out value column by providing named vector of labels pivot(GGDC10S, 1:5, na.rm = TRUE, labels = list("description", c(variable = "Sector Code", description = "Sector Description"))) |> namlab() # Now here is a nice example that is explicit and respects the dataset naming conventions pivot(GGDC10S, ids = 1:5, na.rm = TRUE, names = list(variable = "Sectorcode", value = "Value"), labels = list(name = "Sector", new = c(Sectorcode = "GGDC10S Sector Code", Sector = "Long Sector Description", Value = "Employment or Value Added"))) |> namlab(N = TRUE, Nd = TRUE, class = TRUE) # Note that pivot() currently does not support melting to multiple columns # But you can tackle the issue with a bit of programming: wide <- pivot(GGDC10S, c("Country", "Year"), c("AGR", "MAN", "SUM"), "Variable", how = "wider", na.rm = TRUE) head(wide) library(magrittr) wide \%>\% {av(pivot(., 1:2, grep("_VA", names(.))), pivot(gvr(., "_EMP")))} |> head() wide \%>\% {av(av(gv(., 1:2), rm_stub(gvr(., "_VA"), "_VA", pre = FALSE)) |> pivot(1:2, names = list("Sectorcode", "VA"), labels = "Sector"), EMP = vec(gvr(., "_EMP")))} |> head() rm(wide) # -------------------------------- PIVOT WIDER --------------------------------- iris_long <- pivot(iris, "Species") # Getting a long frame head(iris_long) # If 'names'/'values' not supplied, searches for 'variable' and 'value' columns pivot(iris_long, how = "wider") # But here the records are not identified by 'Species': thus aggregation with last value: pivot(iris_long, how = "wider", check = TRUE) # issues a warning rm(iris_long) # This works better, these two are inverse operations wlddev |> pivot(1:8) |> pivot(how = "w") |> head() # ...but not perfect, we loose labels namlab(wlddev) wlddev |> pivot(1:8) |> pivot(how = "w") |> namlab() # But pivot() supports labels: these are perfect inverse operations wlddev |> pivot(1:8, labels = "label") |> print(max = 50) |> # Notice the "label" column pivot(how = "w", labels = "label") |> namlab() # If the data does not have 'variable'/'value' cols: need to specify 'names'/'values' # Using a single column: pivot(GGDC10S, c("Country", "Year"), "SUM", "Variable", how = "w") |> head() SUM_wide <- pivot(GGDC10S, c("Country", "Year"), "SUM", "Variable", how = "w", na.rm = TRUE) head(SUM_wide) # na.rm = TRUE here removes all new rows completely missing data tail(SUM_wide) # But there may still be NA's, notice the NA in the final row # We could use fill to set another value pivot(GGDC10S, c("Country", "Year"), "SUM", "Variable", how = "w", na.rm = TRUE, fill = -9999) |> tail() # This will keep the label of "SUM", unless we supply a column with new labels namlab(SUM_wide) # Such a column is not available here, but we could use "Variable" twice pivot(GGDC10S, c("Country", "Year"), "SUM", "Variable", "Variable", how = "w", na.rm = TRUE) |> namlab() # Alternatively, can of course relabel ex-post SUM_wide |> relabel(VA = "Value Added", EMP = "Employment") |> namlab() rm(SUM_wide) # Multiple-column pivots pivot(GGDC10S, c("Country", "Year"), c("AGR", "MAN", "SUM"), "Variable", how = "w", na.rm = TRUE) |> head() # Here we may prefer a transposed column order pivot(GGDC10S, c("Country", "Year"), c("AGR", "MAN", "SUM"), "Variable", how = "w", na.rm = TRUE, transpose = "columns") |> head() # Can also flip the order of names (independently of columns) pivot(GGDC10S, c("Country", "Year"), c("AGR", "MAN", "SUM"), "Variable", how = "w", na.rm = TRUE, transpose = "names") |> head() # Can also enable both (complete transposition) pivot(GGDC10S, c("Country", "Year"), c("AGR", "MAN", "SUM"), "Variable", how = "w", na.rm = TRUE, transpose = TRUE) |> head() # or tranpose = c("columns", "names") # Finally, here is a nice, simple way to reshape the entire dataset. pivot(GGDC10S, values = 6:16, names = "Variable", na.rm = TRUE, how = "w") |> namlab(N = TRUE, Nd = TRUE, class = TRUE) # -------------------------------- PIVOT RECAST --------------------------------- # Look at the data again head(GGDC10S) # Let's stack the sectors and instead create variable columns pivot(GGDC10S, .c(Country, Regioncode, Region, Year), names = list("Variable", "Sectorcode"), how = "r") |> head() # Same thing (a bit easier) pivot(GGDC10S, values = 6:16, names = list("Variable", "Sectorcode"), how = "r") |> head() # Removing missing values pivot(GGDC10S, values = 6:16, names = list("Variable", "Sectorcode"), how = "r", na.rm = TRUE) |> head() # Saving Labels pivot(GGDC10S, values = 6:16, names = list("Variable", "Sectorcode"), labels = list(to = "Sector"), how = "r", na.rm = TRUE) |> head() # Supplying new labels for generated columns: as complete as it gets pivot(GGDC10S, values = 6:16, names = list("Variable", "Sectorcode"), labels = list(to = "Sector", new = c(Sectorcode = "GGDC10S Sector Code", Sector = "Long Sector Description", VA = "Value Added", EMP = "Employment")), how = "r", na.rm = TRUE) |> namlab(N = TRUE, Nd = TRUE, class = TRUE) # Now another (slightly unconventional) use case here is data transposition # Let's get the data for Botswana BWA <- GGDC10S |> fsubset(Country == "BWA", Variable, Year, AGR:SUM) head(BWA) # By supplying no ids or values, we are simply requesting a transpose operation pivot(BWA, names = list(from = c("Variable", "Year"), to = "Sectorcode"), how = "r") # Same with labels pivot(BWA, names = list(from = c("Variable", "Year"), to = "Sectorcode"), labels = list(to = "Sector"), how = "r") # For simple cases, data.table::transpose() will be more efficient, but with multiple # columns to generate names and/or variable labels to be saved/assigned, pivot() is handy rm(BWA) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory (show via RShowDoc("KEYWORDS")): \keyword{ manip } % \keyword{ ~kwd2 } % Use only one keyword per line. % For non-standard keywords, use \concept instead of \keyword: % \concept{ ~cpt1 } % \concept{ ~cpt2 } % Use only one concept per line. collapse/man/fdiff.Rd0000644000176200001440000003325514777170130014213 0ustar liggesusers\name{fdiff} \alias{fdiff} \alias{fdiff.default} \alias{fdiff.matrix} \alias{fdiff.data.frame} \alias{fdiff.list} \alias{fdiff.pseries} \alias{fdiff.pdata.frame} \alias{fdiff.grouped_df} \alias{D} \alias{D.default} \alias{D.matrix} \alias{D.data.frame} \alias{D.list} \alias{D.pseries} \alias{D.pdata.frame} \alias{D.grouped_df} \alias{Dlog} \alias{Dlog.default} \alias{Dlog.matrix} \alias{Dlog.data.frame} \alias{Dlog.list} \alias{Dlog.pseries} \alias{Dlog.pdata.frame} \alias{Dlog.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ % Lagged and Iterated Fast (Quasi-, Log-) Differences for Time Series and Panel Data } \description{ \code{fdiff} is a S3 generic to compute (sequences of) suitably lagged / leaded and iterated differences, quasi-differences or (quasi-)log-differences. The difference and log-difference operators \code{D} and \code{Dlog} also exists as parsimonious wrappers around \code{fdiff}, providing more flexibility than \code{fdiff} when applied to data frames. } \usage{ fdiff(x, n = 1, diff = 1, \dots) D(x, n = 1, diff = 1, \dots) Dlog(x, n = 1, diff = 1, \dots) \method{fdiff}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = TRUE, \dots) \method{D}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], \dots) \method{Dlog}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], \dots) \method{fdiff}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{D}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], \dots) \method{Dlog}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], \dots) \method{fdiff}{data.frame}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{D}{data.frame}(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, \dots) \method{Dlog}{data.frame}(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{fdiff}{pseries}(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, shift = "time", \dots) \method{D}{pseries}(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", \dots) \method{Dlog}{pseries}(x, n = 1, diff = 1, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", \dots) \method{fdiff}{pdata.frame}(x, n = 1, diff = 1, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, shift = "time", \dots) \method{D}{pdata.frame}(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, \dots) \method{Dlog}{pdata.frame}(x, n = 1, diff = 1, cols = is.numeric, fill = NA, rho = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fdiff}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, log = FALSE, rho = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, \dots) \method{D}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, \dots) \method{Dlog}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, rho = 1, stubs = .op[["stub"]], keep.ids = TRUE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric vector / time series, (time series) matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df').} \item{n}{integer. A vector indicating the number of lags or leads.} \item{diff}{integer. A vector of integers > 1 indicating the order of differencing / log-differencing.} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}. \emph{Note} that without \code{t}, all values in a group need to be consecutive and in the right order. See Details of \code{\link{flag}}.} \item{by}{\emph{data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{t}{a time vector or list of vectors. See \code{\link{flag}}.} \item{cols}{\emph{data.frame method}: Select columns to difference using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{fill}{value to insert when vectors are shifted. Default is \code{NA}. } \item{log}{logical. \code{TRUE} computes log-differences. See Details.} \item{rho}{double. Autocorrelation parameter. Set to a value between 0 and 1 for quasi-differencing. Any numeric value can be supplied. } \item{stubs}{logical. \code{TRUE} (default) will rename all differenced columns by adding prefixes "L\code{n}D\code{diff}." / "F\code{n}D\code{diff}." for differences "L\code{n}Dlog\code{diff}." / "F\code{n}Dlog\code{diff}." for log-differences and replacing "D" / "Dlog" with "QD" / "QDlog" for quasi-differences. } \item{shift}{\emph{pseries / pdata.frame methods}: character. \code{"time"} or \code{"row"}. See \code{\link{flag}} for details.} \item{keep.ids}{\emph{data.frame / pdata.frame / grouped_df methods}: Logical. Drop all identifiers from the output (which includes all variables passed to \code{by} or \code{t} using formulas). \emph{Note}: For 'grouped_df' / 'pdata.frame' identifiers are dropped, but the \code{"groups"} / \code{"index"} attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ By default, \code{fdiff/D/Dlog} return \code{x} with all columns differenced / log-differenced. Differences are computed as \code{repeat(diff) x[i] - rho*x[i-n]}, and log-differences as \code{log(x[i]) - rho*log(x[i-n])} for \code{diff = 1} and \code{repeat(diff-1) x[i] - rho*x[i-n]} is used to compute subsequent differences (usually \code{diff = 1} for log-differencing). If \code{rho < 1}, this becomes quasi- (or partial) differencing, which is a technique suggested by Cochrane and Orcutt (1949) to deal with serial correlation in regression models, where \code{rho} is typically estimated by running a regression of the model residuals on the lagged residuals. %Setting \code{diff = 2} returns differences of differences etc\dots and setting \code{n = 2} returns simple differences computed by subtracting twice-lagged \code{x} from \code{x}. It is also possible to compute forward differences by passing negative \code{n} values. \code{n} also supports arbitrary vectors of integers (lags), and \code{diff} supports positive sequences of integers (differences): If more than one value is passed to \code{n} and/or \code{diff}, the data is expanded-wide as follows: If \code{x} is an atomic vector or time series, a (time series) matrix is returned with columns ordered first by lag, then by difference. If \code{x} is a matrix or data frame, each column is expanded in like manor such that the output has \code{ncol(x)*length(n)*length(diff)} columns ordered first by column name, then by lag, then by difference. %With groups/panel-identifiers supplied to \code{g/by}, \code{fdiff/D/Dlog} efficiently compute panel-differences. If \code{t} is left empty, the data needs to be ordered such that all values belonging to a group are consecutive and in the right order. It is not necessary that the groups themselves occur in the right order. If time-variable(s) are supplied to \code{t}, the panel is fully identified and differences can be securely computed even if the data is unordered. % \code{fdiff/D/Dlog} supports balanced panels and unbalanced panels where various individuals are observed for different time-sequences. % (both start, end and duration of observation can differ for each individual), but does not natively support irregularly spaced time series and panels. For further computational details and efficiency considerations see the help page of \code{\link{flag}}. %A work-around for differencing irregular panels is easily achieved with the help of \code{\link{seqid}}. %It is also possible to compute differences on unordered vectors or irregular time series (thus utilizing \code{t} but leaving \code{g/by} empty). %The methods applying to \emph{plm} objects (panel series and panel data frames) automatically utilize the panel-identifiers attached to these objects and thus securely compute fully identified panel-differences. If these objects have > 2 panel-identifiers attached to them, the last identifier is assumed to be the time-variable, and the others are taken as grouping-variables and interacted. } \value{ \code{x} differenced \code{diff} times using lags \code{n} of itself. Quasi and log-differences are toggled by the \code{rho} and \code{log} arguments or the \code{Dlog} operator. Computations can be grouped by \code{g/by} and/or ordered by \code{t}. See Details and Examples. } \references{ Cochrane, D.; Orcutt, G. H. (1949). Application of Least Squares Regression to Relationships Containing Auto-Correlated Error Terms. \emph{Journal of the American Statistical Association}. 44 (245): 32-61. Prais, S. J. & Winsten, C. B. (1954). Trend Estimators and Serial Correlation. \emph{Cowles Commission Discussion Paper No. 383.} Chicago. } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link[=flag]{flag/L/F}}, \code{\link[=fgrowth]{fgrowth/G}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Time Series: AirPassengers D(AirPassengers) # 1st difference, same as fdiff(AirPassengers) D(AirPassengers, -1) # Forward difference Dlog(AirPassengers) # Log-difference D(AirPassengers, 1, 2) # Second difference Dlog(AirPassengers, 1, 2) # Second log-difference D(AirPassengers, 12) # Seasonal difference (data is monthly) D(AirPassengers, # Quasi-difference, see a better example below rho = pwcor(AirPassengers, L(AirPassengers))) head(D(AirPassengers, -2:2, 1:3)) # Sequence of leaded/lagged and iterated differences # let's do some visual analysis plot(AirPassengers) # Plot the series - seasonal pattern is evident plot(stl(AirPassengers, "periodic")) # Seasonal decomposition plot(D(AirPassengers,c(1,12),1:2)) # Plotting ordinary and seasonal first and second differences plot(stl(window(D(AirPassengers,12), # Taking seasonal differences removes most seasonal variation 1950), "periodic")) ## Time Series Matrix of 4 EU Stock Market Indicators, recorded 260 days per year plot(D(EuStockMarkets, c(0, 260))) # Plot series and annual differnces mod <- lm(DAX ~., L(EuStockMarkets, c(0, 260))) # Regressing the DAX on its annual lag summary(mod) # and the levels and annual lags others r <- residuals(mod) # Obtain residuals pwcor(r, L(r)) # Residual Autocorrelation fFtest(r, L(r)) # F-test of residual autocorrelation # (better use lmtest :: bgtest) modCO <- lm(QD1.DAX ~., D(L(EuStockMarkets, c(0, 260)), # Cochrane-Orcutt (1949) estimation rho = pwcor(r, L(r)))) summary(modCO) rCO <- residuals(modCO) fFtest(rCO, L(rCO)) # No more autocorrelation ## World Development Panel Data head(fdiff(num_vars(wlddev), 1, 1, # Computes differences of numeric variables wlddev$country, wlddev$year)) # fdiff requires external inputs.. head(D(wlddev, 1, 1, ~country, ~year)) # Differences of numeric variables head(D(wlddev, 1, 1, ~country)) # Without t: Works because data is ordered head(D(wlddev, 1, 1, PCGDP + LIFEEX ~ country, ~year)) # Difference of GDP & Life Expectancy head(D(wlddev, 0:1, 1, ~ country, ~year, cols = 9:10)) # Same, also retaining original series head(D(wlddev, 0:1, 1, ~ country, ~year, 9:10, # Dropping id columns keep.ids = FALSE)) ## Indexed computations: wldi <- findex_by(wlddev, iso3c, year) # Dynamic Panel Data Models: summary(lm(D(PCGDP) ~ L(PCGDP) + D(LIFEEX), data = wldi)) # Simple case summary(lm(Dlog(PCGDP) ~ L(log(PCGDP)) + Dlog(LIFEEX), data = wldi)) # In log-differneces # Adding a lagged difference... summary(lm(D(PCGDP) ~ L(D(PCGDP, 0:1)) + L(D(LIFEEX), 0:1), data = wldi)) summary(lm(Dlog(PCGDP) ~ L(Dlog(PCGDP, 0:1)) + L(Dlog(LIFEEX), 0:1), data = wldi)) # Same thing: summary(lm(D1.PCGDP ~., data = L(D(wldi,0:1,1,9:10),0:1,keep.ids = FALSE)[,-1])) ## Grouped data library(magrittr) wlddev |> fgroup_by(country) |> fselect(PCGDP,LIFEEX) |> fdiff(0:1,1:2) # Adding a first and second difference wlddev |> fgroup_by(country) |> fselect(year,PCGDP,LIFEEX) |> D(0:1,1:2,year) # Also using t (safer) wlddev |> fgroup_by(country) |> # Dropping id's fselect(year,PCGDP,LIFEEX) |> D(0:1,1:2,year, keep.ids = FALSE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line % use one of RShowDoc("KEYWORDS") \keyword{ts} collapse/man/group.Rd0000644000176200001440000000477014777170130014271 0ustar liggesusers\name{group} \alias{group} \alias{groupv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Hash-Based Grouping } \description{ \code{group()} scans the rows of a data frame (or atomic vector / list of atomic vectors), assigning to each unique row an integer id - starting with 1 and proceeding in first-appearance order of the rows. The function is written in C and optimized for R's data structures. It is the workhorse behind functions like \code{\link{GRP}} / \code{\link{fgroup_by}}, \code{\link{collap}}, \code{\link{qF}}, \code{\link{qG}}, \code{\link{finteraction}} and \code{\link{funique}}, when called with argument \code{sort = FALSE}. } \usage{ group(\dots, starts = FALSE, group.sizes = FALSE) groupv(x, starts = FALSE, group.sizes = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{comma separated atomic vectors to group. Also supports a single list of vectors for backward compatibility.} \item{x}{an atomic vector or data frame / list of equal-length atomic vectors.} \item{starts}{logical. If \code{TRUE}, an additional attribute \code{"starts"} is attached giving a vector of group starts (= index of first-occurrence of unique rows). } \item{group.sizes}{ logical. If \code{TRUE}, an additional attribute \code{"group.sizes"} is attached giving the size of each group. } } \details{ A data frame is grouped on a column-by-column basis, starting from the leftmost column. For each new column the grouping vector obtained after the previous column is also fed back into the hash function so that unique values are determined on a running basis. The algorithm terminates as soon as the number of unique rows reaches the size of the data frame. Missing values are also grouped just like any other values. Invoking arguments \code{starts} and/or \code{group.sizes} requires an additional pass through the final grouping vector. } \value{ An object is of class 'qG' see \code{\link{qG}}. } \author{ The Hash Function and inspiration was taken from the excellent \emph{kit} package by Morgan Jacob, the algorithm was developed by Sebastian Krantz. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{radixorder}}, \code{\link{GRPid}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview} } \examples{ # Let's replicate what funique does g <- groupv(wlddev, starts = TRUE) if(attr(g, "N.groups") == fnrow(wlddev)) wlddev else ss(wlddev, attr(g, "starts")) } collapse/man/fgrowth.Rd0000644000176200001440000002011014777170130014577 0ustar liggesusers\name{fgrowth} \alias{fgrowth} \alias{fgrowth.default} \alias{fgrowth.matrix} \alias{fgrowth.data.frame} \alias{fgrowth.list} \alias{fgrowth.pseries} \alias{fgrowth.pdata.frame} \alias{fgrowth.grouped_df} \alias{G} \alias{G.default} \alias{G.matrix} \alias{G.data.frame} \alias{G.list} \alias{G.pseries} \alias{G.pdata.frame} \alias{G.grouped_df} \title{ % Lagged and Iterated Fast Growth Rates for Time Series and Panel Data } \description{ \code{fgrowth} is a S3 generic to compute (sequences of) suitably lagged / leaded, iterated and compounded growth rates, obtained with via the exact method of computation or through log differencing. By default growth rates are provided in percentage terms, but any scale factor can be applied. The growth operator \code{G} is a parsimonious wrapper around \code{fgrowth}, and also provides more flexibility when applied to data frames. } \usage{ fgrowth(x, n = 1, diff = 1, \dots) G(x, n = 1, diff = 1, \dots) \method{fgrowth}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = TRUE, \dots) \method{G}{default}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], \dots) \method{fgrowth}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{G}{matrix}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], \dots) \method{fgrowth}{data.frame}(x, n = 1, diff = 1, g = NULL, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, \dots) \method{G}{data.frame}(x, n = 1, diff = 1, by = NULL, t = NULL, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], keep.ids = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{fgrowth}{pseries}(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, shift = "time", \dots) \method{G}{pseries}(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], shift = "time", \dots) \method{fgrowth}{pdata.frame}(x, n = 1, diff = 1, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, shift = "time", \dots) \method{G}{pdata.frame}(x, n = 1, diff = 1, cols = is.numeric, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], shift = "time", keep.ids = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fgrowth}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = length(n) + length(diff) > 2L, keep.ids = TRUE, \dots) \method{G}{grouped_df}(x, n = 1, diff = 1, t = NULL, fill = NA, logdiff = FALSE, scale = 100, power = 1, stubs = .op[["stub"]], keep.ids = TRUE, \dots) } \arguments{ \item{x}{a numeric vector / time series, (time series) matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df').} \item{n}{integer. A vector indicating the number of lags or leads.} \item{diff}{integer. A vector of integers > 1 indicating the order of taking growth rates, e.g. \code{diff = 2} means computing the growth rate of the growth rate.} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}. \emph{Note} that without \code{t}, all values in a group need to be consecutive and in the right order. See Details of \code{\link{flag}}.} \item{by}{\emph{data.frame method}: Same as \code{g}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{t}{a time vector or list of vectors. See \code{\link{flag}}.} \item{cols}{\emph{data.frame method}: Select columns to compute growth rates using a function, column names, indices or a logical vector. Default: All numeric variables. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{fill}{value to insert when vectors are shifted. Default is \code{NA}. } \item{logdiff}{logical. Compute log-difference growth rates instead of exact growth rates. See Details.} \item{scale}{logical. Scale factor post-applied to growth rates, default is 100 which gives growth rates in percentage terms. See Details.} \item{power}{numeric. Apply a power to annualize or compound growth rates e.g. \code{fgrowth(AirPassengers, 12, power = 1/12)} is equivalent to \code{((AirPassengers/flag(AirPassengers, 12))^(1/12)-1)*100}.} \item{stubs}{logical. \code{TRUE} (default) will rename all computed columns by adding a prefix "L\code{n}G\code{diff}." / "F\code{n}G\code{diff}.", or "L\code{n}Dlog\code{diff}." / "F\code{n}Dlog\code{diff}." if \code{logdiff = TRUE}.} \item{shift}{\emph{pseries / pdata.frame methods}: character. \code{"time"} or \code{"row"}. See \code{\link{flag}} for details.} \item{keep.ids}{\emph{data.frame / pdata.frame / grouped_df methods}: Logical. Drop all identifiers from the output (which includes all variables passed to \code{by} or \code{t} using formulas). \emph{Note}: For 'grouped_df' / 'pdata.frame' identifiers are dropped, but the \code{"groups"} / \code{"index"} attributes are kept.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ \code{fgrowth/G} by default computes exact growth rates using \code{repeat(diff) ((x[i]/x[i-n])^power - 1)*scale}, so for \code{diff > 1} it computes growth rate of growth rates. If \code{logdiff = TRUE}, approximate growth rates are computed using \code{log(x[i]/x[i-n])*scale} for \code{diff = 1} and \code{repeat(diff-1) x[i] - x[i-n]} thereafter (usually \code{diff = 1} for log-differencing). For further details see the help pages of \code{\link{fdiff}} and \code{\link{flag}}. } \value{ \code{x} where the growth rate was taken \code{diff} times using lags \code{n} of itself, scaled by \code{scale}. Computations can be grouped by \code{g/by} and/or ordered by \code{t}. See Details and Examples. } \seealso{ \code{\link[=flag]{flag/L/F}}, \code{\link[=fdiff]{fdiff/D/Dlog}}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple Time Series: AirPassengers G(AirPassengers) # Growth rate, same as fgrowth(AirPassengers) G(AirPassengers, logdiff = TRUE) # Log-difference G(AirPassengers, 1, 2) # Growth rate of growth rate G(AirPassengers, 12) # Seasonal growth rate (data is monthly) head(G(AirPassengers, -2:2, 1:3)) # Sequence of leaded/lagged and iterated growth rates # let's do some visual analysis plot(G(AirPassengers, c(0, 1, 12))) plot(stl(window(G(AirPassengers, 12), # Taking seasonal growth rate removes most seasonal variation 1950), "periodic")) ## Time Series Matrix of 4 EU Stock Market Indicators, recorded 260 days per year plot(G(EuStockMarkets,c(0,260))) # Plot series and annual growth rates summary(lm(L260G1.DAX ~., G(EuStockMarkets,260))) # Annual growth rate of DAX regressed on the # growth rates of the other indicators ## World Development Panel Data head(fgrowth(num_vars(wlddev), 1, 1, # Computes growth rates of numeric variables wlddev$country, wlddev$year)) # fgrowth requires external inputs.. head(G(wlddev, 1, 1, ~country, ~year)) # Growth of numeric variables, id's attached head(G(wlddev, 1, 1, ~country)) # Without t: Works because data is ordered head(G(wlddev, 1, 1, PCGDP + LIFEEX ~ country, ~year)) # Growth of GDP per Capita & Life Expectancy head(G(wlddev, 0:1, 1, ~ country, ~year, cols = 9:10)) # Same, also retaining original series head(G(wlddev, 0:1, 1, ~ country, ~year, 9:10, # Dropping id columns keep.ids = FALSE)) } \keyword{manip} \keyword{ts} collapse/man/is_unlistable.Rd0000644000176200001440000000277614777170130015776 0ustar liggesusers\name{is_unlistable} \alias{is_unlistable} \title{ Unlistable Lists } \description{ A (nested) list with atomic objects in all final nodes of the list-tree is unlistable - checked with \code{is_unlistable}. } \usage{ is_unlistable(l, DF.as.list = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ % \item{x}{an R object.} \item{l}{a list.} \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.} } \details{ \code{is_unlistable} with \code{DF.as.list = TRUE} is defined as \code{all(rapply(l, is.atomic))}, whereas \code{DF.as.list = FALSE} yields checking using \code{all(unlist(rapply2d(l, function(x) is.atomic(x) || is.list(x)), use.names = FALSE))}, assuming that data frames are lists composed of atomic elements. If \code{l} contains data frames, the latter can be a lot faster than applying \code{is.atomic} to every data frame column. } \value{ \code{logical(1)} - \code{TRUE} or \code{FALSE}. } % \references{ %% ~put references to the literature/web site here ~ % } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{ldepth}}, \code{\link{has_elem}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview} } \examples{ l <- list(1, 2, list(3, 4, "b", FALSE)) is_unlistable(l) l <- list(1, 2, list(3, 4, "b", FALSE, e ~ b)) is_unlistable(l) } \keyword{list} \keyword{utilities} collapse/man/fbetween_fwithin.Rd0000644000176200001440000003425514777170130016465 0ustar liggesusers\name{fbetween-fwithin} \alias{B} \alias{B.default} \alias{B.matrix} \alias{B.data.frame} \alias{B.pseries} \alias{B.pdata.frame} \alias{B.grouped_df} \alias{W} \alias{W.default} \alias{W.matrix} \alias{W.data.frame} \alias{W.pseries} \alias{W.pdata.frame} \alias{W.grouped_df} \alias{fbetween} \alias{fbetween.default} \alias{fbetween.matrix} \alias{fbetween.data.frame} \alias{fbetween.pseries} \alias{fbetween.pdata.frame} \alias{fbetween.grouped_df} \alias{fwithin} \alias{fwithin.default} \alias{fwithin.matrix} \alias{fwithin.data.frame} \alias{fwithin.pseries} \alias{fwithin.pdata.frame} \alias{fwithin.grouped_df} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Fast Between (Averaging) and (Quasi-)Within (Centering) Transformations } \description{ \code{fbetween} and \code{fwithin} are S3 generics to efficiently obtain between-transformed (averaged) or (quasi-)within-transformed (demeaned) data. These operations can be performed groupwise and/or weighted. \code{B} and \code{W} are wrappers around \code{fbetween} and \code{fwithin} representing the 'between-operator' and the 'within-operator'. (\code{B} / \code{W} provide more flexibility than \code{fbetween} / \code{fwithin} when applied to data frames (i.e. column subsetting, formula input, auto-renaming and id-variable-preservation capabilities\dots), but are otherwise identical.) %(\code{fbetween} and \code{fwithin} are simple programmers functions in style of the \link[=fast-statistical-functions]{Fast Statistical Functions} while \code{B} and \code{W} are more practical to use in regression formulas or for ad-hoc computations on data frames.) } \usage{ fbetween(x, \dots) fwithin(x, \dots) B(x, \dots) W(x, \dots) \method{fbetween}{default}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{fwithin}{default}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{B}{default}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{W}{default}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{fbetween}{matrix}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{fwithin}{matrix}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{B}{matrix}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], \dots) \method{W}{matrix}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], \dots) \method{fbetween}{data.frame}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{fwithin}{data.frame}(x, g = NULL, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{B}{data.frame}(x, by = NULL, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], keep.by = TRUE, keep.w = TRUE, \dots) \method{W}{data.frame}(x, by = NULL, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], keep.by = TRUE, keep.w = TRUE, \dots) # Methods for indexed data / compatibility with plm: \method{fbetween}{pseries}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{fwithin}{pseries}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{B}{pseries}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{W}{pseries}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{fbetween}{pdata.frame}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, \dots) \method{fwithin}{pdata.frame}(x, effect = 1L, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, \dots) \method{B}{pdata.frame}(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], keep.ids = TRUE, keep.w = TRUE, \dots) \method{W}{pdata.frame}(x, effect = 1L, w = NULL, cols = is.numeric, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], keep.ids = TRUE, keep.w = TRUE, \dots) # Methods for grouped data frame / compatibility with dplyr: \method{fbetween}{grouped_df}(x, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{fwithin}{grouped_df}(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{B}{grouped_df}(x, w = NULL, na.rm = .op[["na.rm"]], fill = FALSE, stub = .op[["stub"]], keep.group_vars = TRUE, keep.w = TRUE, \dots) \method{W}{grouped_df}(x, w = NULL, na.rm = .op[["na.rm"]], mean = 0, theta = 1, stub = .op[["stub"]], keep.group_vars = TRUE, keep.w = TRUE, \dots) } \arguments{ \item{x}{a numeric vector, matrix, data frame, 'indexed_series' ('pseries'), 'indexed_frame' ('pdata.frame') or grouped data frame ('grouped_df').} \item{g}{a factor, \code{\link{GRP}} object, or atomic vector / list of vectors (internally grouped with \code{\link{group}}) used to group \code{x}.} \item{by}{\emph{B and W data.frame method}: Same as g, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.} \item{w}{a numeric vector of (non-negative) weights. \code{B}/\code{W} data frame and \code{pdata.frame} methods also allow a one-sided formula i.e. \code{~ weightcol}. The \code{grouped_df} (\emph{dplyr}) method supports lazy-evaluation. See Examples.} \item{cols}{\emph{B/W (p)data.frame methods}: Select columns to scale using a function, column names, indices or a logical vector. Default: All numeric columns. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.} \item{na.rm}{logical. Skip missing values in \code{x} and \code{w} when computing averages. If \code{na.rm = FALSE} and a \code{NA} or \code{NaN} is encountered, the average for that group will be \code{NA}, and all data points belonging to that group in the output vector will also be \code{NA}.} \item{effect}{\emph{plm} methods: Select which panel identifier should be used as grouping variable. 1L takes the first variable in the \link[=indexing]{index}, 2L the second etc. Index variables can also be called by name using a character string. If more than one variable is supplied, the corresponding index-factors are interacted. } \item{stub}{character. A prefix/stub to add to the names of all transformed columns. \code{TRUE} (default) uses \code{"W."/"B."}, \code{FALSE} will not rename columns.} \item{fill}{\emph{option to \code{fbetween}/\code{B}}: Logical. \code{TRUE} will overwrite missing values in \code{x} with the respective average. By default missing values in \code{x} are preserved.} \item{mean}{\emph{option to \code{fwithin}/\code{W}}: The mean to center on, default is 0, but a different mean can be supplied and will be added to the data after the centering is performed. A special option when performing grouped centering is \code{mean = "overall.mean"}. In that case the overall mean of the data will be added after subtracting out group means.} \item{theta}{\emph{option to \code{fwithin}/\code{W}}: Double. An optional scalar parameter for quasi-demeaning i.e. \code{x - theta * xi.}. This is useful for variance components ('random-effects') estimators. see Details.} \item{keep.by, keep.ids, keep.group_vars}{\emph{B and W data.frame, pdata.frame and grouped_df methods}: Logical. Retain grouping / panel-identifier columns in the output. For data frames this only works if grouping variables were passed in a formula.} \item{keep.w}{\emph{B and W data.frame, pdata.frame and grouped_df methods}: Logical. Retain column containing the weights in the output. Only works if \code{w} is passed as formula / lazy-expression.} \item{\dots}{arguments to be passed to or from other methods.} } \details{ Without groups, \code{fbetween}/\code{B} replaces all data points in \code{x} with their mean or weighted mean (if \code{w} is supplied). Similarly \code{fwithin/W} subtracts the (weighted) mean from all data points i.e. centers the data on the mean. \cr With groups supplied to \code{g}, the replacement / centering performed by \code{fbetween/B} | \code{fwithin/W} becomes groupwise. In terms of panel data notation: If \code{x} is a vector in such a panel dataset, \code{xit} denotes a single data-point belonging to group \code{i} in time-period \code{t} (\code{t} need not be a time-period). Then \code{xi.} denotes \code{x}, averaged over \code{t}. \code{fbetween}/\code{B} now returns \code{xi.} and \code{fwithin}/\code{W} returns \code{x - xi.}. Thus for any data \code{x} and any grouping vector \code{g}: \code{B(x,g) + W(x,g) = xi. + x - xi. = x}. In terms of variance, \code{fbetween/B} only retains the variance between group averages, while \code{fwithin}/\code{W}, by subtracting out group means, only retains the variance within those groups. \cr The data replacement performed by \code{fbetween}/\code{B} can keep (default) or overwrite missing values (option \code{fill = TRUE}) in \code{x}. \code{fwithin/W} can center data simply (default), or add back a mean after centering (option \code{mean = value}), or add the overall mean in groupwise computations (option \code{mean = "overall.mean"}). Let \code{x..} denote the overall mean of \code{x}, then \code{fwithin}/\code{W} with \code{mean = "overall.mean"} returns \code{x - xi. + x..} instead of \code{x - xi.}. This is useful to get rid of group-differences but preserve the overall level of the data. In regression analysis, centering with \code{mean = "overall.mean"} will only change the constant term. See Examples. If \code{theta != 1}, \code{fwithin}/\code{W} performs quasi-demeaning \code{x - theta * xi.}. If \code{mean = "overall.mean"}, \code{x - theta * xi. + theta * x..} is returned, so that the mean of the partially demeaned data is still equal to the overall data mean \code{x..}. A numeric value passed to \code{mean} will simply be added back to the quasi-demeaned data i.e. \code{x - theta * xi. + mean}. Now in the case of a linear panel model \eqn{y_{it} = \beta_0 + \beta_1 X_{it} + u_{it}} with \eqn{u_{it} = \alpha_i + \epsilon_{it}}. If \eqn{\alpha_i \neq \alpha = const.} (there exists individual heterogeneity), then pooled OLS is at least inefficient and inference on \eqn{\beta_1} is invalid. If \eqn{E[\alpha_i|X_{it}] = 0} (mean independence of individual heterogeneity \eqn{\alpha_i}), the variance components or 'random-effects' estimator provides an asymptotically efficient FGLS solution by estimating a transformed model \eqn{y_{it}-\theta y_{i.} = \beta_0 + \beta_1 (X_{it} - \theta X_{i.}) + (u_{it} - \theta u_{i.}}), where \eqn{\theta = 1 - \frac{\sigma_\alpha}{\sqrt(\sigma^2_\alpha + T \sigma^2_\epsilon)}}. An estimate of \eqn{\theta} can be obtained from the an estimate of \eqn{\hat{u}_{it}} (the residuals from the pooled model). If \eqn{E[\alpha_i|X_{it}] \neq 0}, pooled OLS is biased and inconsistent, and taking \eqn{\theta = 1} gives an unbiased and consistent fixed-effects estimator of \eqn{\beta_1}. See Examples. } \value{ \code{fbetween}/\code{B} returns \code{x} with every element replaced by its (groupwise) mean (\code{xi.}). Missing values are preserved if \code{fill = FALSE} (the default). \code{fwithin/W} returns \code{x} where every element was subtracted its (groupwise) mean (\code{x - theta * xi. + mean} or, if \code{mean = "overall.mean"}, \code{x - theta * xi. + theta * x..}). See Details. } \references{ Mundlak, Yair. 1978. On the Pooling of Time Series and Cross Section Data. \emph{Econometrica} 46 (1): 69-85. } % \author{ %% ~~who you are~~ % } % \note{ %% ~~further notes~~ % } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link[=HDW]{fhdbetween/HDB and fhdwithin/HDW}}, \code{\link[=fscale]{fscale/STD}}, \code{\link{TRA}}, \link[=data-transformations]{Data Transformations}, \link[=collapse-documentation]{Collapse Overview} } \examples{ ## Simple centering and averaging head(fbetween(mtcars)) head(B(mtcars)) head(fwithin(mtcars)) head(W(mtcars)) all.equal(fbetween(mtcars) + fwithin(mtcars), mtcars) ## Groupwise centering and averaging head(fbetween(mtcars, mtcars$cyl)) head(fwithin(mtcars, mtcars$cyl)) all.equal(fbetween(mtcars, mtcars$cyl) + fwithin(mtcars, mtcars$cyl), mtcars) head(W(wlddev, ~ iso3c, cols = 9:13)) # Center the 5 series in this dataset by country head(cbind(get_vars(wlddev,"iso3c"), # Same thing done manually using fwithin.. add_stub(fwithin(get_vars(wlddev,9:13), wlddev$iso3c), "W."))) ## Using B() and W() for fixed-effects regressions: # Several ways of running the same regression with cyl-fixed effects lm(W(mpg,cyl) ~ W(carb,cyl), data = mtcars) # Centering each individually lm(mpg ~ carb, data = W(mtcars, ~ cyl, stub = FALSE)) # Centering the entire data lm(mpg ~ carb, data = W(mtcars, ~ cyl, stub = FALSE, # Here only the intercept changes mean = "overall.mean")) lm(mpg ~ carb + B(carb,cyl), data = mtcars) # Procedure suggested by # ..Mundlak (1978) - partialling out group averages amounts to the same as demeaning the data plm::plm(mpg ~ carb, mtcars, index = "cyl", model = "within") # "Proof".. # This takes the interaction of cyl, vs and am as fixed effects lm(W(mpg) ~ W(carb), data = iby(mtcars, id = finteraction(cyl, vs, am))) lm(mpg ~ carb, data = W(mtcars, ~ cyl + vs + am, stub = FALSE)) lm(mpg ~ carb + B(carb,list(cyl,vs,am)), data = mtcars) # Now with cyl fixed effects weighted by hp: lm(W(mpg,cyl,hp) ~ W(carb,cyl,hp), data = mtcars) lm(mpg ~ carb, data = W(mtcars, ~ cyl, ~ hp, stub = FALSE)) lm(mpg ~ carb + B(carb,cyl,hp), data = mtcars) # WRONG ! Gives a different coefficient!! ## Manual variance components (random-effects) estimation res <- HDW(mtcars, mpg ~ carb)[[1]] # Get residuals from pooled OLS sig2_u <- fvar(res) sig2_e <- fvar(fwithin(res, mtcars$cyl)) T <- length(res) / fndistinct(mtcars$cyl) sig2_alpha <- sig2_u - sig2_e theta <- 1 - sqrt(sig2_alpha) / sqrt(sig2_alpha + T * sig2_e) lm(mpg ~ carb, data = W(mtcars, ~ cyl, theta = theta, mean = "overall.mean", stub = FALSE)) # A slightly different method to obtain theta... plm::plm(mpg ~ carb, mtcars, index = "cyl", model = "random") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{manip} % __ONLY ONE__ keyword per line collapse/man/pwcor_pwcov_pwnobs.Rd0000644000176200001440000001006014777170130017062 0ustar liggesusers\name{pwcor-pwcov-pwnobs} \alias{pwcor} \alias{pwcov} \alias{pwnobs} \alias{print.pwcov} \alias{print.pwcor} %- Also NEED an '\alias' for EACH other topic documented here. \title{ (Pairwise, Weighted) Correlations, Covariances and Observation Counts } \description{ Computes (pairwise, weighted) Pearson's correlations, covariances and observation counts. Pairwise correlations and covariances can be computed together with observation counts and p-values, and output as 3D array (default) or list of matrices. \code{pwcor} and \code{pwcov} offer an elaborate print method. } \usage{ pwcor(X, \dots, w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") pwcov(X, \dots, w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs") pwnobs(X) \method{print}{pwcor}(x, digits = .op[["digits"]], sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, \dots) \method{print}{pwcov}(x, digits = .op[["digits"]], sig.level = 0.05, show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{a matrix or data.frame, for \code{pwcor} and \code{pwcov} all columns must be numeric. All functions are faster on matrices, so converting is advised for large data (see \code{\link{qM}}).} \item{x}{an object of class 'pwcor' / 'pwcov'. } \item{w}{numeric. A vector of (frequency) weights. } \item{N}{logical. \code{TRUE} also computes pairwise observation counts.} \item{P}{logical. \code{TRUE} also computes pairwise p-values (same as \code{\link{cor.test}} and \code{Hmisc::rcorr}).} \item{array}{logical. If \code{N = TRUE} or \code{P = TRUE}, \code{TRUE} (default) returns output as 3D array whereas \code{FALSE} returns a list of matrices.} \item{use}{argument passed to \code{\link{cor}} / \code{\link{cov}}. If \code{use != "pairwise.complete.obs"}, \code{sum(complete.cases(X))} is used for \code{N}, and p-values are computed accordingly. } \item{digits}{integer. The number of digits to round to in print. } \item{sig.level}{numeric. P-value threshold below which a \code{'*'} is displayed above significant coefficients if \code{P = TRUE}. } \item{show}{character. The part of the correlation / covariance matrix to display. } \item{spacing}{integer. Controls the spacing between different reported quantities in the printout of the matrix: 0 - compressed, 1 - single space, 2 - double space.} \item{return}{logical. \code{TRUE} returns the formatted object from the print method for exporting. The default is to return \code{x} invisibly.} \item{\dots}{other arguments passed to \code{\link{cor}} or \code{\link{cov}}. Only sensible if \code{P = FALSE}. } } \value{ a numeric matrix, 3D array or list of matrices with the computed statistics. For \code{pwcor} and \code{pwcov} the object has a class 'pwcor' and 'pwcov', respectively. } \note{ \code{weights::wtd.cors} is imported for weighted pairwise correlations (written in C for speed). For weighted correlations with bootstrap SE's see \code{weights::wtd.cor} (bootstrap can be slow). Weighted correlations for complex surveys are implemented in \code{jtools::svycor}. An equivalent and faster implementation of \code{pwcor} (without weights) is provided in \code{Hmisc::rcorr} (written in Fortran). } %% ~Make other sections like Warning with \section{Warning }{\dots.} ~ \seealso{ \code{\link{qsu}}, \link[=summary-statistics]{Summary Statistics}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as } \examples{ mna <- na_insert(mtcars) pwcor(mna) pwcov(mna) pwnobs(mna) pwcor(mna, N = TRUE) pwcor(mna, P = TRUE) pwcor(mna, N = TRUE, P = TRUE) aperm(pwcor(mna, N = TRUE, P = TRUE)) print(pwcor(mna, N = TRUE, P = TRUE), digits = 3, sig.level = 0.01, show = "lower.tri") pwcor(mna, N = TRUE, P = TRUE, array = FALSE) print(pwcor(mna, N = TRUE, P = TRUE, array = FALSE), show = "lower.tri") } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} % use one of RShowDoc("KEYWORDS") collapse/DESCRIPTION0000644000176200001440000000635515202770267013603 0ustar liggesusersPackage: collapse Title: Advanced and Fast Data Transformation Version: 2.1.7 Date: 2026-05-17 Authors@R: c( person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch", comment = c(ORCID = "0000-0001-6212-5229")), person("Matt", "Dowle", role = "ctb"), person("Arun", "Srinivasan", role = "ctb"), person("Morgan", "Jacob", role = "ctb"), person("Dirk", "Eddelbuettel", role = "ctb"), person("Laurent", "Berge", role = "ctb"), person("Kevin", "Tappe", role = "ctb"), person("Alina", "Cherkas", role = "ctb"), person("Ivan", "Krylov", role = "ctb"), person("R Core Team and contributors worldwide", role = "ctb"), person("Martyn", "Plummer", role = "cph"), person("1999-2016 The R Core Team", role = "cph") ) Description: A large C/C++-based package for advanced data transformation and statistical computing in R that is extremely fast, class-agnostic, robust, and programmer friendly. Core functionality includes a rich set of S3 generic grouped and weighted statistical functions for vectors, matrices and data frames, which provide efficient low-level vectorizations, OpenMP multithreading, and skip missing values by default. These are integrated with fast grouping and ordering algorithms (also callable from C), and efficient data manipulation functions. The package also provides a flexible and rigorous approach to time series and panel data in R, fast functions for data transformation and common statistical procedures, detailed (grouped, weighted) summary statistics, powerful tools to work with nested data, fast data object conversions, functions for memory efficient R programming, and helpers to effectively deal with variable labels, attributes, and missing data. It seamlessly supports base R objects/classes as well as 'units', 'integer64', 'xts'/ 'zoo', 'tibble', 'grouped_df', 'data.table', 'sf', and 'pseries'/'pdata.frame'. For a concise overview of the package see Krantz (2026) . URL: https://fastverse.org/collapse/, https://github.com/fastverse/collapse BugReports: https://github.com/fastverse/collapse/issues License: GPL (>= 2) | file LICENSE Encoding: UTF-8 LazyData: true Depends: R (>= 4.1.0) Imports: Rcpp (>= 1.0.1) LinkingTo: Rcpp Suggests: fastverse, data.table, magrittr, kit, xts, zoo, plm, fixest, vars, RcppArmadillo, RcppEigen, tibble, dplyr, ggplot2, scales, microbenchmark, testthat, covr, knitr, rmarkdown, withr, bit64 VignetteBuilder: knitr NeedsCompilation: yes Packaged: 2026-05-18 15:25:18 UTC; sebastiankrantz Author: Sebastian Krantz [aut, cre] (ORCID: ), Matt Dowle [ctb], Arun Srinivasan [ctb], Morgan Jacob [ctb], Dirk Eddelbuettel [ctb], Laurent Berge [ctb], Kevin Tappe [ctb], Alina Cherkas [ctb], Ivan Krylov [ctb], R Core Team and contributors worldwide [ctb], Martyn Plummer [cph], 1999-2016 The R Core Team [cph] Maintainer: Sebastian Krantz Repository: CRAN Date/Publication: 2026-05-19 05:10:15 UTC