Skip to content

Commit

Permalink
Merge pull request #608 from SebKrantz/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
SebKrantz committed Jun 25, 2024
2 parents f912549 + dda6ccf commit 4bd8c21
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 31 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# collapse 2.0.15

* Some changes on the C-side to move the package closer to C API compliance (demanded by R-Core). One notable change is that `gsplit()` no longer supports S4 objects (because `SET_S4_OBJECT` is not part of the API and `asS4()` is too expensive for tight loops). I cannot think of a single example where it would be necessary to split an S4 object, but if you do have applications please file an issue.

* `pivot()` has new arguments `FUN = "last"` and `FUN.args = NULL`, allowing wide and recast pivots with aggregation (default last value as before). `FUN` currently supports a single function returning a scalar value. *Fast Statistical Functions* receive vectorized execution. `FUN.args` can be used to supply a list of function arguments, including data-length arguments such as weights. There are also a couple of internal functions callable using function strings: `"first"`, `"last"`, `"count"`, `"sum"`, `"mean"`, `"min"`, or `"max"`. These are built into the reshaping C-code and thus extremely fast. Thanks @AdrianAntico for the request (#582).

* `join()` now provides enhanced verbosity, indicating the average order of the join between the two tables, e.g.
Expand Down
2 changes: 1 addition & 1 deletion man/join.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ join(x, y,

\item{y}{a data frame-like object to join with \code{x}.}

\item{on}{character. vector of columns to join on. \code{NULL} uses \code{union(names(x), names(y))}. Use a named vector to match columns named differently in \code{x} and \code{y}, e.g. \code{c("x_id" = "y_id")}.}
\item{on}{character. vector of columns to join on. \code{NULL} uses \code{intersect(names(x), names(y))}. Use a named vector to match columns named differently in \code{x} and \code{y}, e.g. \code{c("x_id" = "y_id")}.}

\item{how}{character. Join type: \code{"left"}, \code{"right"}, \code{"inner"}, \code{"full"}, \code{"semi"} or \code{"anti"}. The first letter suffices. }

Expand Down
4 changes: 3 additions & 1 deletion src/base_radixsort.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// #define IS_ASCII(x) ((x)->sxpinfo.gp & ASCII_MASK)
// #define IS_ASCII(x) (LEVELS(x) & ASCII_MASK)

#define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // Replacing STRING_PTR
#define SEXPPTR(x) ((SEXP *)DATAPTR(x))

// NOTE: All of this is copied from Defn.h: https://github.com/wch/r-source/blob/28de75af0541f93832c5899139b969d290bf422e/src/include/Defn.h
// to avoid checking for ALTREP in TRUELENGTH, which slows down the code unnecessarily...
Expand All @@ -25,6 +25,8 @@
#define MYLEV(x) (((SEXPREC_partial *)(x))->sxpinfo.gp)
#define IS_ASCII(x) (MYLEV(x) & 64) // from data.table.h

#define SETTOF(x,v) ((((SEXPREC_partial *)(x))->sxpinfo.type)=(v))

// NOTE: All of this is copied from Defn.h: https://github.com/wch/r-source/blob/28de75af0541f93832c5899139b969d290bf422e/src/include/Defn.h
// to avoid checking for ALTREP in TRUELENGTH, which slows down the code unnecessarily...

Expand Down
2 changes: 1 addition & 1 deletion src/data.table_subset.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here?
SET_ATTRIB(newdt, shallow_duplicate(ATTRIB(dt)));
SET_OBJECT(newdt, OBJECT(dt));
IS_S4_OBJECT(dt) ? SET_S4_OBJECT(newdt) : UNSET_S4_OBJECT(newdt); // To support S4 objects that include data.table
if(IS_S4_OBJECT(dt)) newdt = asS4(newdt, TRUE, 1); // To support S4 objects that include data.table
//SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // SHALLOW_DUPLICATE_ATTRIB would be a bit neater but is only available from R 3.3.0

// TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
Expand Down
2 changes: 1 addition & 1 deletion src/data.table_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ SEXP dt_na(SEXP x, SEXP cols, SEXP Rprop, SEXP Rcount) {
}
}
if(count) {
SET_TYPEOF(ans, INTSXP);
SETTOF(ans, INTSXP);
} else {
// This computes the result
if(prop < 1.0) {
Expand Down
27 changes: 11 additions & 16 deletions src/flag.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include <Rcpp.h>
using namespace Rcpp;

LogicalVector intToLogical(IntegerVector x) {
return LogicalVector(x.begin(), x.end());
}

// 7th version: Irregular time series and panels supported !
template <int RTYPE>
Expand Down Expand Up @@ -569,17 +572,15 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
while(i != np) outjp[i++] = ff;
for( ; i != row; ++i) outjp[i] = column[i - np];
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else if(np<0) {
IntegerVector outjp = no_init_vector(row);
if(names) nam[pos] = "F" + nc[p] + "." + na[j];
int i = row, st = row+np;
while(i != st) outjp[--i] = ff;
for( ; i--; ) outjp[i] = column[i - np];
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else {
if(names) nam[pos] = na[j];
out[pos] = x[j];
Expand Down Expand Up @@ -702,8 +703,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
}
}
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else if(np<0) {
IntegerVector outjp = no_init_vector(os);
if(names) nam[pos] = "F" + nc[p] + "." + na[j];
Expand All @@ -715,8 +715,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
}
}
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else {
if(names) nam[pos] = na[j];
out[pos] = x[j];
Expand Down Expand Up @@ -840,8 +839,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
}
}
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else if(np<0) {
IntegerVector outjp = no_init_vector(gss);
if(names) nam[pos] = "F" + nc[p] + "." + na[j];
Expand All @@ -855,8 +853,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
}
}
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else {
if(names) nam[pos] = na[j];
out[pos] = x[j];
Expand Down Expand Up @@ -1009,8 +1006,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
}
}
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else if(np<0) {
IntegerVector outjp = no_init_vector(gss);
if(names) nam[pos] = "F" + nc[p] + "." + na[j];
Expand All @@ -1022,8 +1018,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill =
}
}
SHALLOW_DUPLICATE_ATTRIB(outjp, column);
if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP);
out[pos] = outjp;
out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp;
} else {
if(names) nam[pos] = na[j];
out[pos] = x[j];
Expand Down
8 changes: 4 additions & 4 deletions src/gsplit.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ SEXP gsplit(SEXP x, SEXP gobj, SEXP toint) {
int ox = OBJECT(x);
// FAZIT: Need to use SET_VECTOR_ELT!! pres[i] = allocVector() doesn't work!!
if(TYPEOF(ax) != NILSXP && ox != 0) {
for(int i = 0, s4o = IS_S4_OBJECT(x); i != ng; ++i) {
for(int i = 0; i != ng; ++i) { // , s4o = IS_S4_OBJECT(x)
SEXP resi;
SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i]));
SET_ATTRIB(resi, ax);
SET_OBJECT(resi, ox);
if(s4o) SET_S4_OBJECT(resi);
// if(s4o) SET_S4_OBJECT(resi);
}
} else if(TYPEOF(ax) != NILSXP) {
for(int i = 0; i != ng; ++i) {
Expand All @@ -38,11 +38,11 @@ SEXP gsplit(SEXP x, SEXP gobj, SEXP toint) {
SET_ATTRIB(resi, ax);
}
} else if(ox != 0) { // Is this even possible? Object bits but no attributes?
for(int i = 0, s4o = IS_S4_OBJECT(x); i != ng; ++i) {
for(int i = 0; i != ng; ++i) { // , s4o = IS_S4_OBJECT(x)
SEXP resi;
SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i]));
SET_OBJECT(resi, ox);
if(s4o) SET_S4_OBJECT(resi);
// if(s4o) SET_S4_OBJECT(resi);
}
} else {
for(int i = 0; i != ng; ++i) SET_VECTOR_ELT(res, i, allocVector(tx, pgs[i]));
Expand Down
2 changes: 1 addition & 1 deletion src/kit.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
#define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y))
#define REQUAL(x, y) (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y)))
#define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i)))))
#define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // To replace STRING_PTR
#define SEXPPTR(x) ((SEXP *)DATAPTR(x))
// #define STR_DF mkString("data.frame")
// #define MAX(a,b) (((a)>(b))?(a):(b))
// #define IS_LOGICAL(x) (isLogical(x) && LENGTH(x)==1)
Expand Down
13 changes: 7 additions & 6 deletions src/programming.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "collapse_c.h"
#include "base_radixsort.h"

SEXP Cna_rm(SEXP x) {
const int n = LENGTH(x);
Expand Down Expand Up @@ -894,20 +895,20 @@ SEXP vtypes(SEXP x, SEXP isnum) {
pans[i] = is_num;
}
}
SET_TYPEOF(ans, LGLSXP);
SETTOF(ans, LGLSXP);
break;
}
case 2: // is.factor
for(int i = 0; i != n; ++i) pans[i] = (int)isFactor(px[i]);
SET_TYPEOF(ans, LGLSXP);
SETTOF(ans, LGLSXP);
break;
case 3: // is.list, needed for list processing functions
for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) == VECSXP;
SET_TYPEOF(ans, LGLSXP);
SETTOF(ans, LGLSXP);
break;
case 4: // is.sublist, needed for list processing functions
for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) == VECSXP && !isFrame(px[i]);
SET_TYPEOF(ans, LGLSXP);
SETTOF(ans, LGLSXP);
break;
case 7: // is.atomic(x), needed in atomic_elem()
// is.atomic: do_is with op = 200: https://github.com/wch/r-source/blob/9f9033e193071f256e21a181cb053cba983ed4a9/src/main/coerce.c
Expand All @@ -927,7 +928,7 @@ SEXP vtypes(SEXP x, SEXP isnum) {
pans[i] = 0;
}
}
SET_TYPEOF(ans, LGLSXP);
SETTOF(ans, LGLSXP);
break;
case 5: // is.atomic(x) || is.list(x), needed in reg_elem() and irreg_elem()
for(int i = 0; i != n; ++i) {
Expand All @@ -949,7 +950,7 @@ SEXP vtypes(SEXP x, SEXP isnum) {
pans[i] = 0;
}
}
SET_TYPEOF(ans, LGLSXP);
SETTOF(ans, LGLSXP);
break;
case 6:
// Faster object type identification, needed in unlist2d:
Expand Down

0 comments on commit 4bd8c21

Please sign in to comment.