From f248bbe6d1204dfc8def62328788eaadcc8e17a1 Mon Sep 17 00:00:00 2001 From: Joshua Wu Date: Wed, 4 Sep 2024 12:54:37 -0700 Subject: [PATCH] Fix Performance of 'by' Operations when `verbose=TRUE` (#6296) * replace calls to clock() with wallclock() * reorganize and clarify time variables following #6228 * better translatability * NEWS * better NEWS --------- Co-authored-by: Michael Chirico --- NEWS.md | 2 ++ src/dogroups.c | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 48f384ba9..d2b910dbd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,8 @@ 4. The translations submitted for 1.16.0 are now actually shipped with the package -- our deepest apologies to the translators for the omission. We have added a CI check to ensure that the .mo binaries which get shipped with the package are always up-to-date. +5. Some grouping operations run much faster under `verbose=TRUE`, [#6286](https://github.com/Rdatatable/data.table/issues/6286). Thanks @joshhwuu for the report and fix. This overhead was not present on Windows. As a rule, users should expect `verbose=TRUE` operations to run more slowly, as extra statistics might be calculated as part of the report; here was a case where the overhead was particularly high and the fix was particularly easy. + # data.table [v1.16.0](https://github.com/Rdatatable/data.table/milestone/30) (25 August 2024) ## BREAKING CHANGES diff --git a/src/dogroups.c b/src/dogroups.c index 6ad2bc779..5d2babc64 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -69,12 +69,12 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX int nprotect=0; SEXP ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, target, source; Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; - clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; const bool verbose = LOGICAL(verboseArg)[0]==1; const bool showProgress = LOGICAL(showProgressArg)[0]==1; - bool hasPrinted = false; - double startTime = (showProgress) ? wallclock() : 0; + double tstart=0, tblock[10]={0}; int nblock[10]={0}; // For verbose printing, tstart is updated each block + double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning double nextTime = (showProgress) ? startTime+3 : 0; // wait 3 seconds before printing progress + bool hasPrinted = false; if (!isInteger(order)) internal_error(__func__, "order not integer vector"); // # nocov if (TYPEOF(starts) != INTSXP) internal_error(__func__, "starts not integer"); // # nocov @@ -230,7 +230,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX writeNA(VECTOR_ELT(xSD, j), 0, 1, false); } } else { - if (verbose) tstart = clock(); + if (verbose) tstart = wallclock(); SETLENGTH(I, grpn); int *iI = INTEGER(I); if (LENGTH(order)==0) { @@ -242,7 +242,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (int j=0; j0; - Rprintf(_("\n %s took %.3fs for %d groups\n"), w ? "collecting discontiguous groups" : "memcpy contiguous groups", - 1.0*tblock[w]/CLOCKS_PER_SEC, nblock[w]); - Rprintf(_(" eval(j) took %.3fs for %d calls\n"), 1.0*tblock[2]/CLOCKS_PER_SEC, nblock[2]); + Rprintf(w ? _("\n collecting discontiguous groups took %.3fs for %d groups\n") + : _("\n memcpy contiguous groups took %.3fs for %d groups\n"), + 1.0*tblock[w], nblock[w]); + Rprintf(_(" eval(j) took %.3fs for %d calls\n"), 1.0*tblock[2], nblock[2]); } UNPROTECT(nprotect); return(ans);