From 7e0ddbf143e4ac23e782a31847dbceabeadfbdec Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Fri, 27 Feb 2026 22:59:46 +0000 Subject: [PATCH 1/2] refactor: use snprintf to clean up WriteStats The deeply-nested conditions on the length of the sizes is due to MesPrint truncating numbers which are too long for positioned fields, rather than loosening the positioning. snprintf doesn't truncate, so is much easier to align. --- sources/sort.c | 461 +++++++++---------------------------------------- 1 file changed, 79 insertions(+), 382 deletions(-) diff --git a/sources/sort.c b/sources/sort.c index 1005e17c..30e5d837 100644 --- a/sources/sort.c +++ b/sources/sort.c @@ -108,6 +108,16 @@ void HumanString(char* string, float input, const char suffix[HUMANSUFFLEN][4]) } } +WORD DigitsIn(LONG x) { + if ( x < 0 ) x = -x; + WORD dig = 1; + while ( x > 9 ) { + dig++; + x /= 10; + } + return dig; +} + /** * Writes the statistics. * @@ -127,11 +137,12 @@ void HumanString(char* string, float input, const char suffix[HUMANSUFFLEN][4]) void WriteStats(POSITION *plspace, WORD par, WORD checkLogType) { GETIDENTITY - LONG millitime, y = 0x7FFFFFFFL >> 1; + char buf[120]; + LONG millitime; WORD timepart; SORTING *S; - POSITION pp; int use_wtime; + if ( AT.SS == AT.S0 && AC.StatsFlag ) { #ifdef WITHPTHREADS if ( AC.ThreadStats == 0 && identity > 0 ) return; @@ -201,423 +212,109 @@ void WriteStats(POSITION *plspace, WORD par, WORD checkLogType) #elif defined(WITHMPI) if ( use_wtime && PF.me != MASTER ) use_wtime = 0; #endif + char *wpref = use_wtime ? "W" : ""; + char *wspac = use_wtime ? "" : " "; millitime = use_wtime ? TimeWallClock(1) * 10 : TimeCPU(1); timepart = (WORD)(millitime%1000); millitime /= 1000; timepart /= 10; + if ( AC.ShortStats ) { #if defined(WITHPTHREADS) || defined(WITHMPI) #ifdef WITHPTHREADS - if ( identity > 0 ) { + if ( identity > 0 ) { #else - if ( PF.me != MASTER ) { - const int identity = PF.me; -#endif - if ( par == STATSSPLITMERGE || par == STATSPOSTSORT ) { - SETBASEPOSITION(pp,y); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%10p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); -/* - MesPrint("%d: %14s %17s %7l.%2is %8l>%10l%3s%10l:%10p",identity, - EXPRNAME(AR.CurExpr),AC.Commercial,millitime,timepart, - AN.ninterms,S->GenTerms,toterms[par],S->TermsLeft,plspace); -*/ - } - else { - y = 1000000000L; - SETBASEPOSITION(pp,y); - MULPOS(pp,100); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%11p %s %s",identity, + if ( PF.me != MASTER ) { + const int identity = PF.me; +#endif + if ( par == STATSSPLITMERGE || par == STATSPOSTSORT ) { + snprintf(buf, sizeof(buf), + "%d: %7ld.%02us %8ld>%10ld%3s%10ld:%10ld %s %s",identity, millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%12p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%13p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%14p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%15p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%16p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %8l>%10l%3s%10l:%17p %s %s",identity, - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - } } } } } - } + S->TermsLeft,BASEPOSITION(*plspace),EXPRNAME(AR.CurExpr), + AC.Commercial); + MesPrint("%s", buf); } - } - else if ( par == STATSMERGETOFILE ) { - SETBASEPOSITION(pp,y); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%10p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - y = 1000000000L; - SETBASEPOSITION(pp,y); - MULPOS(pp,100); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%11p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%12p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%13p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%14p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%15p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%16p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%d: %7l.%2is %10l:%17p",identity,millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - } } } } } - } + else if ( par == STATSMERGETOFILE ) { + snprintf(buf, sizeof(buf), + "%d: %7ld.%02us %10ld:%10ld",identity,millitime,timepart, + S->TermsLeft,BASEPOSITION(*plspace)); + MesPrint("%s", buf); } - } } else + } + else #endif { - if ( par == STATSSPLITMERGE || par == STATSPOSTSORT ) { - SETBASEPOSITION(pp,y); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%10p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); -/* - MesPrint("%14s %17s %7l.%2is %8l>%10l%3s%10l:%10p", - EXPRNAME(AR.CurExpr),AC.Commercial,millitime,timepart, - AN.ninterms,S->GenTerms,toterms[par],S->TermsLeft,plspace); -*/ - } - else { - y = 1000000000L; - SETBASEPOSITION(pp,y); - MULPOS(pp,100); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%11p %s %s", + if ( par == STATSSPLITMERGE || par == STATSPOSTSORT ) { + snprintf(buf, sizeof(buf), + "%7ld.%02us %8ld>%10ld%3s%10ld:%10ld %s %s", millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%12p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%13p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%14p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%15p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%16p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %8l>%10l%3s%10l:%17p %s %s", - millitime,timepart,AN.ninterms,S->GenTerms,toterms[par], - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - } } } } } - } - } - } - else if ( par == STATSMERGETOFILE ) { - SETBASEPOSITION(pp,y); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%10p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); + S->TermsLeft,BASEPOSITION(*plspace),EXPRNAME(AR.CurExpr), + AC.Commercial); + MesPrint("%s", buf); } - else { - y = 1000000000L; - SETBASEPOSITION(pp,y); - MULPOS(pp,100); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%11p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%12p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%13p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%14p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%15p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%16p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%7l.%2is %10l:%17p",millitime,timepart, - S->TermsLeft,plspace,EXPRNAME(AR.CurExpr),AC.Commercial); - } - } } } } } - } + else if ( par == STATSMERGETOFILE ) { + snprintf(buf, sizeof(buf), + "%7ld.%02us %10ld:%10ld",millitime,timepart, + S->TermsLeft,BASEPOSITION(*plspace)); + MesPrint("%s", buf); } } - } } - else { - if ( par == STATSMERGETOFILE ) { - if ( use_wtime ) { - MesPrint("WTime = %7l.%2i sec",millitime,timepart); - } - else { - MesPrint("Time = %7l.%2i sec",millitime,timepart); - } } else { -#if ( BITSINLONG > 32 ) - if ( S->GenTerms >= 10000000000L ) { - if ( use_wtime ) { - MesPrint("WTime = %7l.%2i sec Generated terms = %16l%s", - millitime,timepart,S->GenTerms,humanGenTermsText); - } - else { - MesPrint("Time = %7l.%2i sec Generated terms = %16l%s", - millitime,timepart,S->GenTerms,humanGenTermsText); - } - } - else { - if ( use_wtime ) { - MesPrint("WTime = %7l.%2i sec Generated terms = %10l%s", - millitime,timepart,S->GenTerms,humanGenTermsText); - } - else { - MesPrint("Time = %7l.%2i sec Generated terms = %10l%s", - millitime,timepart,S->GenTerms,humanGenTermsText); - } - } -#else - if ( use_wtime ) { - MesPrint("WTime = %7l.%2i sec Generated terms = %10l%s", - millitime,timepart,S->GenTerms,humanGenTermsText); - } - else { - MesPrint("Time = %7l.%2i sec Generated terms = %10l%s", - millitime,timepart,S->GenTerms,humanGenTermsText); - } -#endif - } -#if ( BITSINLONG > 32 ) - if ( par == STATSSPLITMERGE ) - if ( S->TermsLeft >= 10000000000L ) { - MesPrint("%16s%8l Terms %s = %16l%s",EXPRNAME(AR.CurExpr), - AN.ninterms,FG.swmes[par],S->TermsLeft,humanTermsLeftText); + if ( par == STATSMERGETOFILE ) { + snprintf(buf, sizeof(buf), + "%sTime = %7ld.%02u sec",wpref,millitime,timepart); + MesPrint("%s", buf); } else { - MesPrint("%16s%8l Terms %s = %10l%s",EXPRNAME(AR.CurExpr), - AN.ninterms,FG.swmes[par],S->TermsLeft,humanTermsLeftText); + snprintf(buf, sizeof(buf), + "%sTime = %7ld.%02u sec %sGenerated terms = %10ld%s", + wpref,millitime,timepart,wspac,S->GenTerms,humanGenTermsText); + MesPrint("%s", buf); } - else { - if ( S->TermsLeft >= 10000000000L ) { -#ifdef WITHPTHREADS - if ( identity > 0 && par == STATSPOSTSORT ) { - MesPrint("%16s Terms in thread = %16l%s", - EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); - } - else -#elif defined(WITHMPI) - if ( PF.me != MASTER && par == STATSPOSTSORT ) { - MesPrint("%16s Terms in process= %16l%s", - EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); - } - else -#endif - { - MesPrint("%16s Terms %s = %16l%s", - EXPRNAME(AR.CurExpr),FG.swmes[par],S->TermsLeft,humanTermsLeftText); - } + + if ( par == STATSSPLITMERGE ) { + snprintf(buf, sizeof(buf), + "%16s%8ld Terms %s = %10ld%s", + EXPRNAME(AR.CurExpr),AN.ninterms,FG.swmes[par],S->TermsLeft, + humanTermsLeftText); + MesPrint("%s", buf); } else { #ifdef WITHPTHREADS if ( identity > 0 && par == STATSPOSTSORT ) { - MesPrint("%16s Terms in thread = %10l%s", - EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); + snprintf(buf, sizeof(buf), + "%16s Terms in thread = %10ld%s", + EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); + MesPrint("%s", buf); } else #elif defined(WITHMPI) if ( PF.me != MASTER && par == STATSPOSTSORT ) { - MesPrint("%16s Terms in process= %10l%s", - EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); + snprintf(buf, sizeof(buf), + "%16s Terms in process= %10ld%s", + EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); + MesPrint("%s", buf); } else #endif { - MesPrint("%16s Terms %s = %10l%s", - EXPRNAME(AR.CurExpr),FG.swmes[par],S->TermsLeft,humanTermsLeftText); + snprintf(buf, sizeof(buf), + "%16s Terms %s = %10ld%s", + EXPRNAME(AR.CurExpr),FG.swmes[par],S->TermsLeft, + humanTermsLeftText); + MesPrint("%s", buf); } } + + const WORD dig = DigitsIn(BASEPOSITION(*plspace)); + snprintf(buf, sizeof(buf), "%24s Bytes used%*s=%11ld%s", + AC.Commercial,MiN(6,17-dig),"",BASEPOSITION(*plspace), + humanBytesText); + MesPrint("%s", buf); } -#else - if ( par == STATSSPLITMERGE ) - MesPrint("%16s%8l Terms %s = %10l%s",EXPRNAME(AR.CurExpr), - AN.ninterms,FG.swmes[par],S->TermsLeft,humanTermsLeftText); - else { -#ifdef WITHPTHREADS - if ( identity > 0 && par == STATSPOSTSORT ) { - MesPrint("%16s Terms in thread = %10l%s", - EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); - } - else -#elif defined(WITHMPI) - if ( PF.me != MASTER && par == STATSPOSTSORT ) { - MesPrint("%16s Terms in process= %10l%s", - EXPRNAME(AR.CurExpr),S->TermsLeft,humanTermsLeftText); - } - else -#endif - { - MesPrint("%16s Terms %s = %10l%s", - EXPRNAME(AR.CurExpr),FG.swmes[par],S->TermsLeft,humanTermsLeftText); - } - } -#endif - SETBASEPOSITION(pp,y); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used = %10p%s",AC.Commercial,plspace,humanBytesText); - } - else { - y = 1000000000L; - SETBASEPOSITION(pp,y); - MULPOS(pp,100); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used =%11p%s",AC.Commercial,plspace,humanBytesText); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used =%12p%s",AC.Commercial,plspace,humanBytesText); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used =%13p%s",AC.Commercial,plspace,humanBytesText); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used =%14p%s",AC.Commercial,plspace,humanBytesText); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used =%15p%s",AC.Commercial,plspace,humanBytesText); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used =%16p%s",AC.Commercial,plspace,humanBytesText); - } - else { - MULPOS(pp,10); - if ( ISLESSPOS(*plspace,pp) ) { - MesPrint("%24s Bytes used=%17p%s",AC.Commercial,plspace,humanBytesText); - } - } } } } } - } - } } + #ifdef WITHSTATS MesPrint("Total number of writes: %l, reads: %l, seeks, %l" ,numwrites,numreads,numseeks); From c04fadf4eed33d74e7c006db4b6b4291f786ed6b Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Sat, 28 Mar 2026 21:42:26 +0000 Subject: [PATCH 2/2] feat: add "On SortVerbose;" for extra sort info Add information to the final sort summaries (per thread and master) including the number of comparisons made, the number of times the small and large buffers were sorted due to their capacities, and the total size of the unsorted, uncompressed generated terms. --- doc/manual/sorting.tex | 19 +++++++++ doc/manual/statements.tex | 8 ++++ sources/compcomm.c | 1 + sources/sort.c | 82 +++++++++++++++++++++------------------ sources/structs.h | 7 ++++ sources/threads.c | 31 +++++++++++++++ 6 files changed, 111 insertions(+), 37 deletions(-) diff --git a/doc/manual/sorting.tex b/doc/manual/sorting.tex index 9fb0398c..ed5146b6 100644 --- a/doc/manual/sorting.tex +++ b/doc/manual/sorting.tex @@ -214,6 +214,25 @@ \chapter{Sorting and statistics} The sizes of buffers involved can all be tuned to a given hardware. How this is done is explained in the chapter on the setup\index{setup} \ref{setup}. +By specifying \texttt{On SortVerbose;}\index{on!sortverbose} (see +\ref{substaon}), more information is printed in the final sorting statistics of +each expression, which take the form: +\begin{verbatim} +Time = 0.00 sec Generated terms = 64 + test1 Terms in output = 64 + Bytes used = 4888 + Unsorted bytes = 6144 + Small Buffer = 0, 0 + Large Buffer = 0, 0 + Comparisons = 63 +\end{verbatim} +The additional information includes the total size of the generated terms +before cancellations, merging or compression, the number of times the small +buffer was sorted due to reaching TermsInSmall, or SmallSize, the number of +times the large buffer was sorted due to reaching LargePatches, or LargeSize, +and the total number of term comparisons made during the sort. In particular, +the information on why the small and large buffers were sorted helps when +configuring their sizes, as described in \ref{setup}. When \FORM\ is dealing with the arguments\index{arguments of functions} of functions and if an argument is a multiterm subexpression, also such diff --git a/doc/manual/statements.tex b/doc/manual/statements.tex index 51c7ffae..9e2532e5 100644 --- a/doc/manual/statements.tex +++ b/doc/manual/statements.tex @@ -3868,6 +3868,10 @@ \section{off} \leftvitem{3.5cm}{sortreallocate\index{off!sortreallocate}} \rightvitem{13cm}{Turns off the reallocation of the small and large buffer at the end of each module.} + +\leftvitem{3.5cm}{sortverbose\index{off!sortverbose}} +\rightvitem{13cm}{Turns off the printing of additional information in the +sorting statistics.} \leftvitem{3.5cm}{threadloadbalancing\index{off!threadloadbalancing}} \rightvitem{13cm}{\vspace{1.5ex}Disables the loadbalancing mechanism of @@ -4091,6 +4095,10 @@ \section{on} noticable performance penalty. See also \#sortreallocate (\ref{presortreallocate}) for a single-module version of this feature.} +\leftvitem{3.5cm}{sortverbose\index{on!sortverbose}} +\rightvitem{13cm}{Turns on the printing of additional information in the +sorting statistics. See \ref{sorting} for more information.} + \leftvitem{3.5cm}{statistics\index{on!statistics}} \rightvitem{13cm}{Turns the writing of runtime statistics on. This is the default. It is possible to change this default with one of the setup diff --git a/sources/compcomm.c b/sources/compcomm.c index da4b8a34..07c7746d 100644 --- a/sources/compcomm.c +++ b/sources/compcomm.c @@ -143,6 +143,7 @@ static KEYWORDV onoffoptions[] = { ,{"humanstats", &(AC.HumanStatsFlag), 1, 0} ,{"humanstatistics", &(AC.HumanStatsFlag), 1, 0} ,{"grccverbose", &(AC.GrccVerbose), 1, 0} + ,{"sortverbose", &(AC.SortVerbose), 1, 0} }; static WORD one = 1; diff --git a/sources/sort.c b/sources/sort.c index 30e5d837..d72b1490 100644 --- a/sources/sort.c +++ b/sources/sort.c @@ -72,14 +72,6 @@ extern LONG nummallocs; extern LONG numfrees; #endif -//#define COUNTCOMPARES -#ifdef COUNTCOMPARES - // This needs to be large enough for the number of threads. - // It is hardcoded here, but 1024 should be enough. - // Enabling this has a performance impact. - LONG numcompares[1024]; -#endif - /* #] Includes : #[ SortUtilities : @@ -164,10 +156,14 @@ void WriteStats(POSITION *plspace, WORD par, WORD checkLogType) char humanGenTermsText[HUMANSTRLEN] = ""; char humanTermsLeftText[HUMANSTRLEN] = ""; char humanBytesText[HUMANSTRLEN] = ""; + char humanUnsortedBytesText[HUMANSTRLEN] = ""; + char humanComparisonsText[HUMANSTRLEN] = ""; if ( AC.HumanStatsFlag ) { HumanString(humanGenTermsText, (float)(S->GenTerms), humanTermsSuffix); HumanString(humanTermsLeftText, (float)(S->TermsLeft), humanTermsSuffix); HumanString(humanBytesText, (float)(BASEPOSITION(*plspace)), humanBytesSuffix); + HumanString(humanUnsortedBytesText, (float)(S->verbUnsortedSize), humanBytesSuffix); + HumanString(humanComparisonsText, (float)(S->verbComparisons), humanTermsSuffix); } MLOCK(ErrorMessageLock); @@ -315,6 +311,23 @@ void WriteStats(POSITION *plspace, WORD par, WORD checkLogType) MesPrint("%s", buf); } + if ( par == STATSPOSTSORT ) { + if ( AC.SortVerbose ) { + snprintf(buf, sizeof(buf), "%24s Unsorted bytes =%11ld%s", + "",S->verbUnsortedSize,humanUnsortedBytesText); + MesPrint("%s", buf); + snprintf(buf, sizeof(buf), "%24s Small Buffer =%5ld,%5ld", + "",S->verbSBsortTerms,S->verbSBsortCap); + MesPrint("%s", buf); + snprintf(buf, sizeof(buf), "%24s Large Buffer =%5ld,%5ld", + "",S->verbLBsortPatches,S->verbLBsortCap); + MesPrint("%s", buf); + snprintf(buf, sizeof(buf), "%24s Comparisons =%11ld%s", + "",S->verbComparisons,humanComparisonsText); + MesPrint("%s", buf); + } + } + #ifdef WITHSTATS MesPrint("Total number of writes: %l, reads: %l, seeks, %l" ,numwrites,numreads,numseeks); @@ -362,14 +375,6 @@ int NewSort(PHEAD0) } if ( AR.sLevel == 0 ) { -#ifdef COUNTCOMPARES -#ifdef WITHPTHREADS - numcompares[AT.identity] = 0; -#else - numcompares[0] = 0; -#endif -#endif - AN.FunSorts[0] = AT.S0; if ( AR.PolyFun == 0 ) { AT.S0->PolyFlag = 0; } else if ( AR.PolyFunType == 1 ) { AT.S0->PolyFlag = 1; } @@ -408,6 +413,13 @@ int NewSort(PHEAD0) PUTZERO(AN.OldPosOut); */ + // Zero the SortVerbose counters: + S->verbComparisons = 0; + S->verbSBsortTerms = 0; + S->verbSBsortCap = 0; + S->verbLBsortPatches = 0; + S->verbLBsortCap = 0; + S->verbUnsortedSize = 0; return(0); } @@ -621,6 +633,10 @@ LONG EndSort(PHEAD WORD *buffer, int par) /* The large buffer is too full. Merge and write it */ + // Update SortVerbose counters + if ( S->lPatch >= S->MaxPatches ) S->verbLBsortPatches++; + else S->verbLBsortCap++; + #ifdef GZIPDEBUG MLOCK(ErrorMessageLock); MesPrint("%w EndSort: lPatch = %d, MaxPatches = %d,lFill = %x, sSpace = %ld, MaxTer = %d, lTop = %x" @@ -977,18 +993,6 @@ LONG EndSort(PHEAD WORD *buffer, int par) } } -#ifdef COUNTCOMPARES - if ( AR.sLevel < 0 ) { -#ifdef WITHPTHREADS - MLOCK(ErrorMessageLock); - MesPrint(">>>number of calls to Compare: %l (tid %d)", numcompares[AT.identity], AT.identity); - MUNLOCK(ErrorMessageLock); -#else - MesPrint(">>>number of calls to Compare: %l", numcompares[0]); -#endif - } -#endif - return(retval); WorkSpaceError: MLOCK(ErrorMessageLock); @@ -2344,15 +2348,8 @@ WORD Compare1(PHEAD WORD *term1, WORD *term2, WORD level) WORD prevorder; WORD count = -1, localPoly, polyhit = -1; -#ifdef COUNTCOMPARES - if ( AR.sLevel == 0 ) { -#ifdef WITHPTHREADS - numcompares[AT.identity]++; -#else - numcompares[0]++; -#endif - } -#endif + // Update SortVerbose counter + S->verbComparisons++; if ( S->PolyFlag ) { /* @@ -4252,6 +4249,9 @@ int StoreTerm(PHEAD WORD *term) POSITION pp; LONG lSpace, sSpace, RetCode, over, tover; + // Update SortVerbose counters + S->verbUnsortedSize += *term * sizeof(*term); + if ( ( ( AP.PreDebug & DUMPTOSORT ) == DUMPTOSORT ) && AR.sLevel == 0 ) { #ifdef WITHPTHREADS snprintf((char *)(THRbuf),100,"StoreTerm(%d)",AT.identity); @@ -4266,6 +4266,10 @@ int StoreTerm(PHEAD WORD *term) /* The small buffer is full. It has to be sorted and written. */ + // Update SortVerbose counters + if ( S->sTerms >= S->TermsInSmall ) S->verbSBsortTerms++; + else S->verbSBsortCap++; + tover = over = S->sTerms; ss = S->sPointer; ss[over] = 0; @@ -4298,6 +4302,10 @@ int StoreTerm(PHEAD WORD *term) /* The large buffer is too full. Merge and write it */ + // Update SortVerbose counters + if ( S->lPatch >= S->MaxPatches ) S->verbLBsortPatches++; + else S->verbLBsortCap++; + if ( MergePatches(1) ) goto StoreCall; /* pp = S->SizeInFile[1]; diff --git a/sources/structs.h b/sources/structs.h index 651a6ddf..276d7234 100644 --- a/sources/structs.h +++ b/sources/structs.h @@ -1156,6 +1156,12 @@ typedef struct sOrT { LONG SpaceLeft; /* Space needed for still existing terms */ LONG putinsize; /* Size of buffer in putin */ LONG ninterms; /* Which input term ? */ + LONG verbComparisons; /* Counters for "On SortVerbose;" statistics */ + LONG verbSBsortTerms; + LONG verbSBsortCap; + LONG verbLBsortPatches; + LONG verbLBsortCap; + LONG verbUnsortedSize; int MaxPatches; /* Maximum number of patches in large buffer */ int MaxFpatches; /* Maximum number of patches in one filesort */ int type; /* Main, function or sub(routine) */ @@ -1826,6 +1832,7 @@ struct C_const { int FlintPolyFlag; /* Use Flint for polynomial arithmetic */ int HumanStatsFlag; /* Print human-readable stats in the stats print? */ int GrccVerbose; /* Enable extra print statements in grcc? */ + int SortVerbose; /* Enable extra sort stats information? */ int doloopstacksize; int dolooplevel; int CheckpointFlag; /**< Tells preprocessor whether checkpoint code must executed. diff --git a/sources/threads.c b/sources/threads.c index 9176e4c6..0d8879f2 100644 --- a/sources/threads.c +++ b/sources/threads.c @@ -1987,6 +1987,8 @@ void *RunSortBot(void *dummy) AT.SB.FillBlock = 1; AT.SB.MasterFill[1] = AT.SB.MasterStart[1]; SETBASEPOSITION(AN.theposition,0); + // Reset the sortbot comparison count + AT.SS->verbComparisons = 0; break; /* #] INISORTBOT : @@ -4052,10 +4054,23 @@ int MasterMerge(void) fin->handle = -1; position = S->SizeInFile[0]; MULPOS(position,sizeof(WORD)); + + // Collect global sort statistics information from the threads. + // The total GenTerms is the sum of the thread GenTerms. + // The total small/large buffer sort info is the sum of the thread info. + // The total comparison count is the sum of the thread counts. + // The total unsorted size is the sum of the total generated terms sizes S->GenTerms = 0; for ( j = 1; j <= numberofworkers; j++ ) { S->GenTerms += AB[j]->T.SS->GenTerms; + S->verbComparisons += AB[j]->T.SS->verbComparisons; + S->verbSBsortTerms += AB[j]->T.SS->verbSBsortTerms; + S->verbSBsortCap += AB[j]->T.SS->verbSBsortCap; + S->verbLBsortPatches += AB[j]->T.SS->verbLBsortPatches; + S->verbLBsortCap += AB[j]->T.SS->verbLBsortCap; + S->verbUnsortedSize += AB[j]->T.SS->verbUnsortedSize; } + WriteStats(&position,STATSPOSTSORT,NOCHECKLOGTYPE); Expressions[AR0.CurExpr].counter = S->TermsLeft; Expressions[AR0.CurExpr].size = position; @@ -4193,10 +4208,26 @@ int SortBotMasterMerge(void) } position = S->SizeInFile[0]; MULPOS(position,sizeof(WORD)); + + // Collect global sort statistics information from the threads. + // The total GenTerms is the sum of the thread GenTerms. + // The total small/large buffer sort info is the sum of the thread info. + // The total comparison count is the sum of the thread and sortbot counts. + // The total unsorted size is the sum of the total generated terms sizes S->GenTerms = 0; for ( j = 1; j <= numberofworkers; j++ ) { S->GenTerms += AB[j]->T.SS->GenTerms; + S->verbComparisons += AB[j]->T.SS->verbComparisons; + S->verbSBsortTerms += AB[j]->T.SS->verbSBsortTerms; + S->verbSBsortCap += AB[j]->T.SS->verbSBsortCap; + S->verbLBsortPatches += AB[j]->T.SS->verbLBsortPatches; + S->verbLBsortCap += AB[j]->T.SS->verbLBsortCap; + S->verbUnsortedSize += AB[j]->T.SS->verbUnsortedSize; } + for ( j = numberofworkers+1; j <= numberofworkers+numberofsortbots; j++ ) { + S->verbComparisons += AB[j]->T.SS->verbComparisons; + } + S->TermsLeft = numberofterms; WriteStats(&position,STATSPOSTSORT,NOCHECKLOGTYPE); Expressions[AR.CurExpr].counter = S->TermsLeft;