diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index a8dbd7083bb..cf6606458e6 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -20,6 +20,7 @@ set(HEADERS ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_minmax.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_ostream.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_print.hpp + ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_random_ziggurat_tables.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_ranges_to.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_ranges_tuple_formatter.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_sanitizer_annotate_container.hpp diff --git a/stl/inc/__msvc_random_ziggurat_tables.hpp b/stl/inc/__msvc_random_ziggurat_tables.hpp new file mode 100644 index 00000000000..e473a4b2a95 --- /dev/null +++ b/stl/inc/__msvc_random_ziggurat_tables.hpp @@ -0,0 +1,313 @@ +// __msvc_random_ziggurat_tables.hpp internal header + +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef __MSVC_RANDOM_ZIGGURAT_TABLES_HPP +#define __MSVC_RANDOM_ZIGGURAT_TABLES_HPP +#include +#if _STL_COMPILER_PREPROCESSOR + +#include +#include + +#pragma pack(push, _CRT_PACKING) +#pragma warning(push, _STL_WARNING_LEVEL) +#pragma warning(disable : _STL_DISABLED_WARNINGS) +_STL_DISABLE_CLANG_WARNINGS +#pragma push_macro("new") +#undef new + +_STD_BEGIN + +template +struct _Modified_ziggurat_tables { + static_assert(_Lw >= 2, "invalid table size"); + static_assert(_Lw <= 8, "invalid table size"); + static_assert(_Lx >= 2, "invalid table size"); + static_assert(_Lx <= (1 << _Lw) - 2, "invalid table size"); + + using _Uint_type = _Uty; + using _Xtype = conditional_t<_Signed, make_signed_t<_Uty>, _Uty>; + + static constexpr int _Layer_bits = _Lw; + static constexpr int _Layer_num = _Lx; + static constexpr _Ty _Width_scale = (_Signed ? _Ty{1} : _Ty{2}) * _Ty{static_cast<_Uty>(-1) / 2u + 1u}; + + _Ty _Layer_widths[_Lx + 1]; + _Ty _Layer_heights[_Lx + 1]; + _Uty _Alias_probabilities[1 << _Lw]; + uint8_t _Alias_indices[1 << _Lw]; +}; + +template +struct _Normal_distribution_tables; + +template <> +struct _Normal_distribution_tables { + static constexpr _Modified_ziggurat_tables _Value{ + {0., 1.8499396913479098e-20, 3.22000654070119e-20, 3.801834492672213e-20, 4.239176863940338e-20, + 4.601072376592995e-20, 4.914927859205541e-20, 5.194925708831539e-20, 5.449510525906388e-20, + 5.684178753233453e-20, 5.902737255155258e-20, 6.107946896282281e-20, 6.301882450145966e-20, + 6.486147944862723e-20, 6.6620125308614e-20, 6.830499955576846e-20, 6.992449607878411e-20, + 7.148559422803419e-20, 7.299416811556012e-20, 7.445521452721239e-20, 7.587302410409083e-20, + 7.72513120969412e-20, 7.859331974430587e-20, 7.990189393089238e-20, 8.117955053562326e-20, + 8.24285253589125e-20, 8.365081547048237e-20, 8.484821308332657e-20, 8.60223335347763e-20, + 8.717463857601196e-20, 8.830645589300582e-20, 8.941899557525784e-20, 9.051336409356474e-20, + 9.159057623037997e-20, 9.265156531616995e-20, 9.369719205548056e-20, 9.472825217209685e-20, + 9.574548305998523e-20, 9.67495695929092e-20, 9.774114921866556e-20, 9.872081644226706e-20, + 9.968912678493958e-20, 1.006466002916233e-19, 1.0159372464808746e-19, 1.0253095795926176e-19, + 1.0345873123254324e-19, 1.0437745060333484e-19, 1.0528749933465681e-19, 1.0618923961814279e-19, + 1.0708301419992969e-19, 1.0796914785174347e-19, 1.0884794870477023e-19, 1.0971970946159712e-19, + 1.105847084995444e-19, 1.1144321087703066e-19, 1.1229546925317396e-19, 1.1314172472959168e-19, + 1.1398220762229465e-19, 1.1481713817064524e-19, 1.1564672718954736e-19, 1.164711766703387e-19, + 1.1729068033524656e-19, 1.1810542414973788e-19, 1.1891558679662693e-19, 1.1972134011539592e-19, + 1.2052284950982363e-19, 1.213202743266993e-19, 1.221137682081192e-19, 1.2290347941961424e-19, + 1.2368955115613734e-19, 1.244721218277429e-19, 1.2525132532661717e-19, 1.260272912769623e-19, + 1.26800145269098e-19, 1.2757000907902154e-19, 1.2833700087455358e-19, 1.291012354091e-19, + 1.298628242039675e-19, 1.306218757200922e-19, 1.313784955199656e-19, 1.3213278642047865e-19, + 1.3288484863734324e-19, 1.3363477992169825e-19, 1.3438267568945765e-19, 1.3512862914391452e-19, + 1.3587273139207447e-19, 1.3661507155515575e-19, 1.3735573687365977e-19, 1.3809481280738638e-19, + 1.3883238313073954e-19, 1.3956853002364515e-19, 1.4030333415837886e-19, 1.4103687478258152e-19, + 1.4176922979871992e-19, 1.4250047584023415e-19, 1.4323068834459525e-19, 1.4395994162348356e-19, + 1.4468830893028414e-19, 1.4541586252508258e-19, 1.4614267373733478e-19, 1.468688130263721e-19, + 1.4759435003989504e-19, 1.483193536705992e-19, 1.490438921110696e-19, 1.4976803290707173e-19, + 1.504918430093611e-19, 1.512153888241274e-19, 1.5193873626218252e-19, 1.5266195078699826e-19, + 1.5338509746169285e-19, 1.5410824099506307e-19, 1.5483144578675359e-19, 1.5555477597165208e-19, + 1.562782954635959e-19, 1.5700206799847218e-19, 1.577261571767924e-19, 1.584506265058186e-19, + 1.5917553944131766e-19, 1.5990095942901765e-19, 1.606269499458396e-19, 1.613535745409764e-19, + 1.6208089687688948e-19, 1.628089807702942e-19, 1.6353789023320314e-19, 1.6426768951409766e-19, + 1.64998443139297e-19, 1.6573021595459525e-19, 1.6646307316723643e-19, 1.6719708038829945e-19, + 1.679323036755644e-19, 1.686688095769344e-19, 1.6940666517448715e-19, 1.7014593812923316e-19, + 1.708866967266589e-19, 1.716290099231355e-19, 1.723729473932763e-19, 1.7311857957832871e-19, + 1.738659777356899e-19, 1.7461521398963791e-19, 1.753663613833746e-19, 1.7611949393248055e-19, + 1.7687468667988562e-19, 1.7763201575246545e-19, 1.7839155841937753e-19, 1.791533931522572e-19, + 1.7991759968740025e-19, 1.8068425909006447e-19, 1.8145345382103085e-19, 1.822252678055723e-19, + 1.8299978650498606e-19, 1.8377709699085584e-19, 1.8455728802221894e-19, 1.8534045012582449e-19, + 1.8612667567968074e-19, 1.8691605900010147e-19, 1.877086964324755e-19, 1.8850468644599757e-19, + 1.89304129732615e-19, 1.9010712931046181e-19, 1.9091379063206988e-19, 1.9172422169766829e-19, + 1.9253853317390242e-19, 1.9335683851832939e-19, 1.9417925411007123e-19, 1.9500589938703654e-19, + 1.9583689699015082e-19, 1.9667237291506962e-19, 1.9751245667188498e-19, 1.9835728145337485e-19, + 1.9920698431238925e-19, 2.0006170634901302e-19, 2.0092159290819812e-19, 2.017867937886136e-19, + 2.0265746346352489e-19, 2.03533761314581e-19, 2.044158518794635e-19, 2.053039051144333e-19, + 2.0619809667290188e-19, 2.0709860820125294e-19, 2.0800562765325088e-19, 2.0891934962449412e-19, + 2.0983997570850482e-19, 2.1076771487619696e-19, 2.1170278388062835e-19, 2.12645407689126e-19, + 2.1359581994507893e-19, 2.1455426346191782e-19, 2.155209907520557e-19, 2.1649626459384575e-19, + 2.174803586399288e-19, 2.184735580706977e-19, 2.1947616029700383e-19, 2.2048847571667746e-19, + 2.2151082852993892e-19, 2.2254355761934443e-19, 2.2358701750055466e-19, 2.246415793509406e-19, + 2.257076321238689e-19, 2.2678558375744746e-19, 2.2787586248758337e-19, 2.2897891827642875e-19, + 2.3009522436868987e-19, 2.31225278989884e-19, 2.323696072024741e-19, 2.3352876293794817e-19, + 2.3470333122537183e-19, 2.358939306398056e-19, 2.371012159973014e-19, 2.3832588132707035e-19, + 2.3956866315595304e-19, 2.4083034414564453e-19, 2.421117571293987e-19, 2.434137896023415e-19, + 2.4473738872831126e-19, 2.4608356693659843e-19, 2.4745340819445845e-19, 2.4884807505626692e-19, + 2.502688166082661e-19, 2.5171697744973648e-19, 2.531940078780562e-19, 2.5470147547766574e-19, + 2.562410783529719e-19, 2.578146602946863e-19, 2.594242282305874e-19, 2.6107197238862816e-19, + 2.62760289697196e-19, 2.6449181107018934e-19, 2.66269433381545e-19, 2.680963571359764e-19, + 2.699761311051723e-19, 2.7191270554267153e-19, 2.739104960457322e-19, 2.759744607409643e-19, + 2.781101942931087e-19, 2.8032404336205924e-19, 2.826232496940566e-19, 2.8501612922825993e-19, + 2.8751229873605726e-19, 2.9012296606812436e-19, 2.928613068338427e-19, 2.9574296054462676e-19, + 2.9878669505179366e-19, 3.020153132122829e-19, 3.054569167962314e-19, 3.091467121816156e-19, + 3.131296647180764e-19, 3.174645338293655e-19, 3.222302581329743e-19, 3.2753656486948307e-19, + 3.3354270450965077e-19, 3.404932226364455e-19, 3.4879379220476567e-19, 3.591976955385327e-19, + 3.733841512673591e-19, 3.967452642543983e-19}, + {1., 0.9855486814582198, 0.9568558983223457, 0.9403714851053674, 0.9264098256800032, 0.913888546176474, + 0.9023521229182082, 0.891552387927414, 0.8813347406782813, 0.8715944313191278, 0.8622562453326127, + 0.853263820956628, 0.8445735164599403, 0.8361506493566254, 0.8279670681571968, 0.8199995180914469, + 0.8122285029793022, 0.8046374695997428, 0.7972122087550235, 0.789940406123578, 0.782811299221483, + 0.7758154111565139, 0.7689443410170602, 0.7621905967356147, 0.7555474602882859, 0.7490088778462707, + 0.7425693694179482, 0.736223953885565, 0.7299680863252, 0.7237976052190542, 0.71770868770298, + 0.711697811392577, 0.7057617216348233, 0.6998974032648396, 0.6941020561272937, 0.6883730737623268, + 0.6827080247663162, 0.6771046364253323, 0.6715607802890552, 0.6660744594091138, 0.6606437970112796, + 0.6552670264079558, 0.6499424819877, 0.6446685911434497, 0.6394438670217427, 0.6342669019923727, + 0.6291363617522275, 0.6240509799890716, 0.6190095535411343, 0.614010937996926, 0.609054043686948, + 0.6041378320251521, 0.5992613121632794, 0.5944235379257464, 0.5896236049966371, 0.5848606483337264, + 0.5801338397873709, 0.5754423859046276, 0.5707855259011625, 0.5661625297854257, 0.5615726966212504, + 0.5570153529165045, 0.5524898511267139, 0.547995568263719, 0.543531904600429, 0.5390982824636296, + 0.534694145107587, 0.5303189556618926, 0.5259721961476151, 0.5216533665563823, 0.5173619839875113, + 0.5130975818387515, 0.5088597090465974, 0.5046479293724917, 0.5004618207315557, 0.4963009745607749, + 0.4921649952238278, 0.488053499449981, 0.48396611580468757, 0.47990248418971704, 0.47586225537082144, + 0.4718450905310989, 0.4678506608483627, 0.4638786470949526, 0.4599287392585472, 0.4560006361826431, + 0.4520940452254683, 0.44820868193618724, 0.44434426974733815, 0.44050053968252195, 0.4366772300784287, + 0.43287408632035523, 0.42909086059042545, 0.42532731162777876, 0.4215832045000436, 0.4178583103854581, + 0.414152406365044, 0.4104652752242772, 0.4067967052637376, 0.403146490118252, 0.39951442858407665, + 0.3959003244536946, 0.3923039863578296, 0.38872522761430267, 0.38516386608338293, 0.3816197240293021, + 0.37809262798762566, 0.3745824086381898, 0.37108890068333245, 0.3676119427311606, 0.3641513771836147, + 0.3607070501291003, 0.3572788112394759, 0.3538665136711927, 0.3504700139703973, 0.34708917198181866, + 0.3437238507612683, 0.3403739164915959, 0.33703923840194755, 0.3337196886901865, 0.3304151424483392, + 0.32712547759094174, 0.32385057478616464, 0.3205903173896036, 0.3173445913806279, 0.3141132853011852, + 0.31089629019696596, 0.3076934995608376, 0.30450480927846096, 0.3013301175760091, 0.29816932496990994, + 0.2950223342185416, 0.29188905027581014, 0.2887693802465453, 0.28566323334365357, 0.2825705208469688, + 0.27949115606374747, 0.2764250542907558, 0.27337213277790046, 0.2703323106933574, 0.2673055090901553, + 0.2642916508741738, 0.2612906607735181, 0.2583024653092354, 0.2553269927673393, 0.25236417317211224, + 0.24941393826065747, 0.24647622145867346, 0.2435509578574281, 0.24063808419190957, 0.23773753882013424, + 0.23484926170359383, 0.2319731943888259, 0.2291092799900934, 0.22625746317316164, 0.22341769014016277, + 0.2205899086155393, 0.21777406783306102, 0.2149701185239115, 0.21217801290584146, 0.2093977046733897, + 0.20662914898917398, 0.20387230247625548, 0.20112712321158419, 0.19839357072053368, 0.1956716059725369, + 0.19296119137783668, 0.1902622907853669, 0.18757486948178453, 0.18489889419167307, 0.18223433307894354, + 0.1795811557494604, 0.17693933325492425, 0.17430883809804612, 0.17168964423905236, 0.16908172710356292, + 0.1664850635918901, 0.16389963208981004, 0.16132541248086313, 0.15876238616024635, 0.15621053605036486, + 0.15366984661811764, 0.1511403038939979, 0.14862189549309693, 0.1461146106381079, 0.14361844018443456, + 0.14113337664751988, 0.1386594142325193, 0.13619654886645588, 0.1337447782330054, 0.13130410181007504, + 0.12887452091035284, 0.12645603872502292, 0.12404866037085907, 0.12165239294092992, 0.1192672455591718, + 0.11689322943910975, 0.11453035794703531, 0.11217864666998106, 0.10983811348886573, 0.10750877865722319, + 0.10519066488597145, 0.10288379743472703, 0.10058820421022437, 0.09830391587246251, 0.09603096594927059, + 0.09376939096006338, 0.09151923054964821, 0.08928052763304674, 0.08705332855241284, 0.08483768324726132, + 0.08263364543937649, 0.0804412728339469, 0.07826062733867703, 0.0760917753028642, 0.07393478777870469, + 0.07178974080741468, 0.0696567157331283, 0.06753579954797788, 0.06542708527228344, 0.06333067237439781, + 0.06124666723548977, 0.05917518366542869, 0.057116343476991766, 0.05507027712689312, 0.053037124433685696, + 0.051017035384481935, 0.04901017104476877, 0.04701670458847328, 0.04503682246902435, 0.04307072575665931, + 0.04111863167292178, 0.0391807753605653, 0.03725741193643623, 0.03534881888708009, 0.033455298882813636, + 0.03157718310728027, 0.029714835228161415, 0.027868656173851813, 0.02603908993517536, 0.024226630687753957, + 0.02243183164061715, 0.020655316178002918, 0.018897792103686122, 0.017160070171080462, 0.015443088676963571, + 0.013747946876007678, 0.012075951653800543, 0.010428684923300423, 0.008808104992035565, + 0.007216707019283454, 0.005657794393437552, 0.004135980658085006, 0.0026582451648572286, + 0.0012366551429421327}, + {5772624080256872389u, 18376497011742254427u, 15174068518956773574u, 18119521015726707089u, + 9689093257773371827u, 11219466041440825504u, 13761587801584342737u, 14139015317900913463u, + 16959627068628606629u, 15365847406342567327u, 17560410186731783357u, 13862244452794322725u, + 16362681989933510177u, 18030425636884538356u, 17610338966896050457u, 17906026651327018438u, + 17608680640278073945u, 16641642626856403322u, 15781736612137234058u, 15012042578368789908u, + 14319036540231952622u, 13691777583262682375u, 13121318097675721029u, 12600268766726670007u, + 12122472868302235684u, 11682759097006158714u, 11276751652135400984u, 10900722672872907010u, + 10551476388020007257u, 10226257295438350670u, 9922676744977986297u, 9638653756585376387u, + 9372366951255892252u, 9122215231984041202u, 8886785409525460239u, 8664825381532682147u, + 8455221783597966057u, 8256981265082920509u, 8069214721305801714u, 7891123951009085432u, + 7721990314407312922u, 7561165050093418409u, 7408060974245858756u, 7262145337081180582u, + 7122933652448654560u, 6989984349214309097u, 6862894119417130358u, 6741293859467498760u, + 6624845117950325541u, 6513236977709194344u, 6406183311459562149u, 6303420359707705504u, + 6204704587630885979u, 6109810784114699328u, 6018530371593775259u, 5930669899900263567u, + 5846049701150232982u, 5764502685919797201u, 5685873263684640757u, 5610016372803382953u, + 5536796607286084230u, 5466087429260875468u, 5397770457480820794u, 5331734823438205240u, + 5267876587706348697u, 5206098210036269391u, 5146308067519121932u, 5088420015803761565u, + 5032352988947498680u, 4978030633990102386u, 4925380976787343191u, 4874336116030090240u, + 4824831942716041428u, 4776807882640226959u, 4730206659733160555u, 4684974078306757803u, + 4641058822472038179u, 4598412271172725037u, 4556988327438204979u, 4516743260600528908u, + 4477635560345495621u, 4439625801579311407u, 4402676519191560289u, 4366752091883718908u, + 4331818634311485492u, 4297843896859875941u, 4264797172433342282u, 4232649209699937077u, + 4201372132279523762u, 4170939363411867498u, 4141325555681704793u, 4112506525415084886u, + 4084459191394843573u, 4057161517573400681u, 4030592459488509904u, 4004731914112438776u, + 3979560672887586650u, 3955060377721998896u, 3931213479736818425u, 3908003200574619843u, + 3885413496092965157u, 3863429022281552824u, 3842035103254137745u, 3821217701178098081u, + 3800963388015222443u, 3781259318957083791u, 3762093207447340218u, 3743453301691534809u, + 3725328362562526215u, 3707707642816630985u, 3690580867541954120u, 3673938215766276675u, + 3657770303157304129u, 3642068165753098333u, 3626823244665156465u, 3612027371700896721u, + 3597672755856293303u, 3583751970633100685u, 3570257942138544800u, 3557183937928559981u, + 3544523556558636521u, 3532270717809134186u, 3520419653554529694u, 3508964899248517694u, + 3497901285999190120u, 3487223933210692173u, 3476928241769807422u, 3467009887757871792u, + 3457464816670267801u, 3448289238127516793u, 3439479621063678173u, 3431032689379390071u, + 3422945418048454436u, 3415215029668389889u, 3407838991446855855u, 3400815012617299736u, + 3394141042278602704u, 3387815267654907022u, 3381836112773206025u, 3376202237557674709u, + 3370912537341121900u, 3365966142795361925u, 3361362420283742624u, 3357100972640535432u, + 3353181640383400854u, 3349604503366697606u, 3346369882885015573u, 3343478344237991310u, + 3340930699769220746u, 3338728012393928322u, 3336871599631997180u, 3335363038165024448u, + 3334204168938253332u, 3333397102830565304u, 3332944226918208053u, 3332848211360606800u, + 3333112016939478448u, 3333738903285562445u, 3334732437830624098u, 3336096505526002947u, + 3337835319372901186u, 3339953431813869046u, 3342455747039583217u, 3345347534270072924u, + 3348634442075073122u, 3352322513804228121u, 3356418204204490518u, 3360928397309325696u, + 3365860425692315551u, 3371222091186539932u, 3377021687180794558u, 3383268022614386067u, + 3389970447804048250u, 3397138882249584127u, 3404783844579310262u, 3412916484812437249u, + 3421548619133362302u, 3430692767392702573u, 3440362193572019452u, 3450570949473869494u, + 3461333921926404621u, 3472666883822619956u, 3484586549348956888u, 3497110633796823753u, + 3510257918394286936u, 3524048320644393221u, 3538502970712099044u, 3553644294464519334u, + 3569496103840231756u, 3586083695303920639u, 3603433957234165034u, 3621575487196359634u, + 3640538720171589919u, 3660356068948096692u, 3681062078037508582u, 3702693592656541357u, + 3725289944520205988u, 3748893156429301117u, 3773548167908530159u, 3799303084468476330u, + 3826209453432674952u, 3854322569699495447u, 3883701815308762405u, 3914411037268645101u, + 3946518968785881418u, 3980099699852036513u, 4015233204094875460u, 4052005929937296858u, + 4090511465453892772u, 4130851287923166129u, 4173135610999021596u, 4217484344739825246u, + 4264028186526719015u, 4312909864287889665u, 4364285557565113080u, 4418326526995153228u, + 4475220988964517684u, 4535176279831707418u, 4598421363585156225u, 4665209748624330486u, + 4735822894181165115u, 4810574205618489143u, 4889813741622439947u, 4973933786719036921u, + 5063375481718726727u, 5158636755532028239u, 5260281868308503160u, 5368952963603784990u, + 5485384144115933258u, 5610418742607386359u, 5745030673003077333u, 5890351039753998130u, + 6047701591021027161u, 6218637175066705190u, 6405000178766652428u, 6608991115502686893u, + 6833261281686051708u, 7081036031445943421u, 7356281247561702361u, 7663931894516361511u, + 8010211661403582613u, 8403089396219588626u, 8852946429754057787u, 9373578915469751777u, + 9983751065318593022u, 10709691466846080045u, 11589282488071815533u, 12679467968045647555u, + 14070222599022507247u, 15913135573072714593u, 17648462416075622825u, 15851568064076581739u, + 16502145279702562360u, 14002019182168966720u, 9520131846432463495u, 18446744073709551615u}, + {1, 255, 0, 254, 3, 4, 5, 253, 7, 8, 252, 10, 11, 251, 13, 14, 250, 14, 251, 251, 12, 11, 10, 252, 9, 9, 8, 7, + 7, 253, 253, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 254, 254, 254, 254, 254, 254, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 254, 254, 3, 3, 4, 4, 5, + 6, 253, 7, 8, 252, 11, 13, 15, 12, 9, 6, 2, 255}, + }; +}; + +template <> +struct _Normal_distribution_tables { + static constexpr _Modified_ziggurat_tables _Value{ + {0.f, 1.0039637e-10f, 1.7527653e-10f, 2.0737359e-10f, 2.3165295e-10f, 2.5185834e-10f, 2.694753e-10f, + 2.8527236e-10f, 2.9970704e-10f, 3.130771e-10f, 3.2558878e-10f, 3.3739173e-10f, 3.4859837e-10f, + 3.592957e-10f, 3.6955258e-10f, 3.7942458e-10f, 3.889573e-10f, 3.981887e-10f, 4.071508e-10f, 4.1587087e-10f, + 4.243723e-10f, 4.3267545e-10f, 4.4079812e-10f, 4.4875598e-10f, 4.565629e-10f, 4.6423124e-10f, 4.717722e-10f, + 4.791956e-10f, 4.8651067e-10f, 4.9372556e-10f, 5.0084786e-10f, 5.0788446e-10f, 5.1484167e-10f, + 5.217254e-10f, 5.285411e-10f, 5.3529386e-10f, 5.419884e-10f, 5.486291e-10f, 5.552202e-10f, 5.617657e-10f, + 5.6826915e-10f, 5.7473415e-10f, 5.811641e-10f, 5.8756217e-10f, 5.9393135e-10f, 6.0027466e-10f, + 6.065949e-10f, 6.128948e-10f, 6.1917693e-10f, 6.25444e-10f, 6.316983e-10f, 6.379424e-10f, 6.4417865e-10f, + 6.504094e-10f, 6.5663686e-10f, 6.628634e-10f, 6.6909117e-10f, 6.7532246e-10f, 6.815594e-10f, 6.8780426e-10f, + 6.9405925e-10f, 7.003265e-10f, 7.066083e-10f, 7.129069e-10f, 7.192245e-10f, 7.2556344e-10f, 7.3192596e-10f, + 7.383145e-10f, 7.447315e-10f, 7.511794e-10f, 7.5766066e-10f, 7.641779e-10f, 7.7073387e-10f, 7.7733125e-10f, + 7.839729e-10f, 7.906618e-10f, 7.9740103e-10f, 8.0419377e-10f, 8.1104334e-10f, 8.1795326e-10f, 8.249271e-10f, + 8.319688e-10f, 8.390823e-10f, 8.462719e-10f, 8.53542e-10f, 8.608974e-10f, 8.683432e-10f, 8.7588453e-10f, + 8.835273e-10f, 8.9127755e-10f, 8.991417e-10f, 9.071268e-10f, 9.152404e-10f, 9.2349056e-10f, 9.31886e-10f, + 9.404362e-10f, 9.491515e-10f, 9.580433e-10f, 9.671237e-10f, 9.764064e-10f, 9.85906e-10f, 9.956393e-10f, + 1.0056244e-09f, 1.0158817e-09f, 1.0264339e-09f, 1.0373069e-09f, 1.0485297e-09f, 1.0601353e-09f, + 1.0721617e-09f, 1.0846526e-09f, 1.0976589e-09f, 1.1112402e-09f, 1.125467e-09f, 1.1404241e-09f, + 1.1562137e-09f, 1.1729614e-09f, 1.1908244e-09f, 1.2100023e-09f, 1.2307549e-09f, 1.2534292e-09f, + 1.2785046e-09f, 1.3066712e-09f, 1.3389777e-09f, 1.37714e-09f, 1.4242785e-09f, 1.4871162e-09f, + 1.5856787e-09f}, + {1.f, 0.9770264f, 0.9316111f, 0.9055978f, 0.8836107f, 0.86392814f, 0.8458246f, 0.82890457f, 0.81292146f, + 0.7977082f, 0.7831446f, 0.76914066f, 0.7556266f, 0.742547f, 0.7298568f, 0.7175188f, 0.70550185f, 0.6937794f, + 0.68232876f, 0.6711301f, 0.6601663f, 0.6494221f, 0.6388841f, 0.62854034f, 0.61837995f, 0.6083933f, + 0.5985716f, 0.58890694f, 0.57939196f, 0.5700201f, 0.5607851f, 0.5516815f, 0.542704f, 0.5338478f, 0.5251085f, + 0.5164819f, 0.5079641f, 0.49955145f, 0.49124068f, 0.48302856f, 0.47491214f, 0.4668886f, 0.45895532f, + 0.4511098f, 0.44334972f, 0.43567285f, 0.42807713f, 0.4205605f, 0.4131212f, 0.40575734f, 0.39846727f, + 0.39124936f, 0.38410214f, 0.37702408f, 0.37001383f, 0.36307007f, 0.35619152f, 0.349377f, 0.34262538f, + 0.33593553f, 0.32930642f, 0.32273704f, 0.31622648f, 0.30977377f, 0.30337808f, 0.29703858f, 0.29075447f, + 0.28452495f, 0.27834937f, 0.272227f, 0.26615715f, 0.26013926f, 0.25417265f, 0.24825683f, 0.24239121f, + 0.23657529f, 0.23080856f, 0.22509058f, 0.21942088f, 0.21379907f, 0.20822476f, 0.20269756f, 0.19721715f, + 0.1917832f, 0.18639542f, 0.18105353f, 0.17575727f, 0.17050643f, 0.1653008f, 0.1601402f, 0.15502448f, + 0.1499535f, 0.14492716f, 0.13994537f, 0.1350081f, 0.1301153f, 0.12526698f, 0.120463185f, 0.11570398f, + 0.11098945f, 0.10631975f, 0.10169504f, 0.09711553f, 0.0925815f, 0.08809326f, 0.08365115f, 0.0792556f, + 0.0749071f, 0.07060621f, 0.06635356f, 0.06214988f, 0.057996012f, 0.05389291f, 0.049841676f, 0.045843575f, + 0.041900076f, 0.03801288f, 0.034183994f, 0.030415794f, 0.026711132f, 0.023073487f, 0.019507188f, + 0.016017765f, 0.012612532f, 0.009301662f, 0.006100371f, 0.0030343498f}, + {1746900733u, 2635282000u, 3562142067u, 3757002556u, 3098700230u, 4247551824u, 3486706040u, 3961819002u, + 4104331036u, 3674373995u, 3331263584u, 3050988967u, 2817679804u, 2620417004u, 2451436745u, 2305064808u, + 2177056265u, 2064171101u, 1963892823u, 1874236772u, 1793616401u, 1720748040u, 1654581792u, 1594250556u, + 1539031852u, 1488318844u, 1441598078u, 1398432177u, 1358446256u, 1321317148u, 1286764801u, 1254545328u, + 1224445371u, 1196277488u, 1169876354u, 1145095614u, 1121805261u, 1099889442u, 1079244605u, 1059777936u, + 1041406031u, 1024053757u, 1007653285u, 992143249u, 977468030u, 963577124u, 950424606u, 937968654u, + 926171134u, 914997245u, 904415195u, 894395925u, 884912863u, 875941707u, 867460234u, 859448132u, 851886846u, + 844759450u, 838050529u, 831746074u, 825833396u, 820301040u, 815138718u, 810337253u, 805888519u, 801785405u, + 798021773u, 794592431u, 791493112u, 788720453u, 786271990u, 784146153u, 782342268u, 780860569u, 779702213u, + 778869304u, 778364927u, 778193182u, 778359241u, 778869400u, 779731150u, 780953262u, 782545879u, 784520631u, + 786890761u, 789671278u, 792879129u, 796533401u, 800655556u, 805269703u, 810402913u, 816085591u, 822351909u, + 829240318u, 836794146u, 845062314u, 854100187u, 863970583u, 874745003u, 886505108u, 899344515u, 913371013u, + 928709280u, 945504283u, 963925533u, 984172479u, 1006481418u, 1031134425u, 1058471057u, 1088903876u, + 1122939361u, 1161206526u, 1204496800u, 1253820769u, 1310490763u, 1376244313u, 1453434388u, 1545333178u, + 1656638073u, 1794358253u, 1969468217u, 2200244208u, 2519726432u, 2994947621u, 3788095816u, 3383917011u, + 3543188136u, 4294967295u}, + {1, 127, 0, 126, 3, 4, 125, 6, 7, 6, 6, 125, 4, 4, 3, 3, 126, 126, 126, 126, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 126, 3, 4, 5, 7, 5, 2, 127}, + }; +}; + +_STD_END + +#pragma pop_macro("new") +_STL_RESTORE_CLANG_WARNINGS +#pragma warning(pop) +#pragma pack(pop) + +#endif // _STL_COMPILER_PREPROCESSOR +#endif // __MSVC_RANDOM_ZIGGURAT_TABLES_HPP diff --git a/stl/inc/header-units.json b/stl/inc/header-units.json index e94f92bd534..ec3efdb70bf 100644 --- a/stl/inc/header-units.json +++ b/stl/inc/header-units.json @@ -18,6 +18,7 @@ "__msvc_minmax.hpp", "__msvc_ostream.hpp", "__msvc_print.hpp", + "__msvc_random_ziggurat_tables.hpp", "__msvc_ranges_to.hpp", "__msvc_ranges_tuple_formatter.hpp", "__msvc_sanitizer_annotate_container.hpp", diff --git a/stl/inc/random b/stl/inc/random index 78260d357cf..3b39d52bf32 100644 --- a/stl/inc/random +++ b/stl/inc/random @@ -9,6 +9,7 @@ #if _STL_COMPILER_PREPROCESSOR #include <__msvc_int128.hpp> +#include <__msvc_random_ziggurat_tables.hpp> #include #include #include @@ -2641,53 +2642,99 @@ public: private: template - result_type _Eval(_Engine& _Eng, const param_type& _Par0) { - // compute next value - // Knuth, vol. 2, p. 122, alg. P - _Ty _Res; - if (_Valid) { - _Res = _Xx2; - _Valid = false; - } else { // generate two values, store one, return one - _Ty _Vx1; - _Ty _Vx2; - _Ty _Sx; - for (;;) { // reject bad values to avoid generating NaN/Inf on the next calculations - _Vx1 = 2 * _Nrand_impl<_Ty>(_Eng) - 1; - _Vx2 = 2 * _Nrand_impl<_Ty>(_Eng) - 1; - _Sx = _Vx1 * _Vx1 + _Vx2 * _Vx2; - if (_Sx < _Ty{1} && _Vx1 != _Ty{0} && _Vx2 != _Ty{0}) { - // good values! - break; - } - } + _NODISCARD result_type _Eval_std(_Engine& _Eng) { + // McFarland, C. D. (2015). A modified ziggurat algorithm for generating exponentially and normally distributed + // pseudorandom numbers. Journal of Statistical Computation and Simulation, 86(7), 1281-1294. + // https://doi.org/10.1080/00949655.2015.1060234 + using _Fty = conditional_t::digits <= 24, float, double>; + using _Tables = _Normal_distribution_tables<_Fty>; + using _Traits = decltype(_Tables::_Value); + constexpr typename _Traits::_Uint_type _Value_mask = + ~typename _Traits::_Uint_type{0u} << (_STD max) (_Traits::_Layer_bits, + numeric_limits::digits - numeric_limits<_Ty>::digits); + constexpr typename _Traits::_Uint_type _Layer_mask = + (typename _Traits::_Uint_type{1u} << _Traits::_Layer_bits) - 1u; + + _Rng_from_urng_v2 _Generator(_Eng); + + const auto _Rand_bits = static_cast(_Generator._Get_all_bits()); + const auto _Xbits = static_cast(_Rand_bits & _Value_mask); + const auto _Regular_layer = static_cast(_Rand_bits & _Layer_mask); + + if (_Regular_layer < _Tables::_Value._Layer_num - 1) { + // rectangular region of a middle layer + return static_cast<_Ty>(_Tables::_Value._Layer_widths[_Regular_layer + 1] * static_cast<_Fty>(_Xbits)); + } else { + const auto _Lbits = static_cast(_Generator._Get_all_bits()); + const auto _Index = static_cast(_Lbits & _Layer_mask); + const uint8_t _Irregular_layer = + _Lbits < _Tables::_Value._Alias_probabilities[_Index] ? _Index : _Tables::_Value._Alias_indices[_Index]; + + if (_Irregular_layer < _Tables::_Value._Layer_num) { + // tail of a non-bottommost layer + const _Fty _Xx0 = _Tables::_Value._Layer_widths[_Irregular_layer] * _Tables::_Value._Width_scale; + const _Fty _Xx1 = _Tables::_Value._Layer_widths[_Irregular_layer + 1] * _Tables::_Value._Width_scale; + const _Fty _Yy0 = _Tables::_Value._Layer_heights[_Irregular_layer]; + const _Fty _Yy1 = _Tables::_Value._Layer_heights[_Irregular_layer + 1]; + const _Fty _Xdiff = _Xx1 - _Xx0; + const _Fty _Ydiff = _Yy1 - _Yy0; + + for (;;) { + const _Fty _Dx = _Xdiff * _STD _Nrand_impl<_Fty>(_Eng); + const _Fty _Dy = _Ydiff * _STD _Nrand_impl<_Fty>(_Eng); + + // _Dexp = -(_Xval^2 - _Xx0^2) / 2 + // f(_Xval) = exp(-_Xval^2 / 2) = _Yy0 * exp(_Dexp) + // _Yy0 >= d(f(_Xval))/d(_Dexp) = f(_Xval) >= _Yy1 + // _Dexp * _Yy0 <= f(_Xval) - _Yy0 <= _Dexp * _Yy1 + const _Fty _Dexp = -_Dx * (_Xx0 + _Dx * _Fty{0.5}); + if (_Dy > _Dexp * _Yy1) { + // _Yval > _Yy0 + _Dexp * _Yy1 >= f(_Xval), reject + continue; + } + + const _Fty _Xval = _Xx0 + _Dx; + if (_Dy < _Dexp * _Yy0) { + // _Yval < _Yy0 + _Dexp * _Yy0 <= f(_Xval), accept + return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval); + } - _Ty _LogSx; - if (_Sx > _Ty{1e-4}) { - _LogSx = _STD log(_Sx); + const _Fty _Yval = _Yy0 + _Dy; + if (_Yval < _STD exp(_Xval * _Xval * _Fty{-0.5})) { + return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval); + } + } + } else if (_Irregular_layer == _Tables::_Value._Layer_num) { + // tail of the bottommost layer (infinite width) + constexpr _Fty _Tail = + _Tables::_Value._Layer_widths[_Tables::_Value._Layer_num] * _Tables::_Value._Width_scale; + constexpr _Fty _Tail_squared = _Tail * _Tail; + for (;;) { + const _Fty _Xval = + _STD sqrt(_Tail_squared - _Fty{2.0} * _STD log(_Fty{1.0} - _STD _Nrand_impl<_Fty>(_Eng))); + const _Fty _Yval = _STD _Nrand_impl<_Fty>(_Eng); + if (_Xval * _Yval < _Tail) { + return static_cast<_Ty>(_Xbits < 0 ? -_Xval : _Xval); + } + } } else { - // Bad _Sx value! Very small values will overflow log(_Sx) / _Sx. - // Generate a new value based on scaling method. - const _Ty _Ln2{_Ty{0.69314718055994530941723212145818}}; - const _Ty _Maxabs{(_STD max) (_STD abs(_Vx1), _STD abs(_Vx2))}; - const int _ExpMax{_STD ilogb(_Maxabs)}; - _Vx1 = _STD scalbn(_Vx1, -_ExpMax); - _Vx2 = _STD scalbn(_Vx2, -_ExpMax); - _Sx = _Vx1 * _Vx1 + _Vx2 * _Vx2; - _LogSx = _STD log(_Sx) + static_cast<_Ty>(_ExpMax) * (_Ln2 * 2); + // rectangular region of the bottommost layer + return static_cast<_Ty>( + _Tables::_Value._Layer_widths[_Tables::_Value._Layer_num] * static_cast<_Fty>(_Xbits)); } - - const _Ty _Fx{_STD sqrt(_Ty{-2} * _LogSx / _Sx)}; - _Xx2 = _Fx * _Vx2; // save second value for next call - _Valid = true; - _Res = _Fx * _Vx1; } - return _Res * _Par0._Sigma + _Par0._Mean; + } + + template + result_type _Eval(_Engine& _Eng, const param_type& _Par0) { + return _Eval_std(_Eng) * _Par0._Sigma + _Par0._Mean; } param_type _Par; + // TRANSITION, ABI: _Valid must be initialized to false, and must be reset to false when reset() is called, but is + // unused by the current implementation bool _Valid; - _Ty _Xx2; + _Ty _Xx2; // TRANSITION, ABI: unused by the current implementation }; _EXPORT_STD template diff --git a/tests/std/test.lst b/tests/std/test.lst index e0348743600..aab0475b23f 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -256,7 +256,6 @@ tests\GH_004477_mdspan_warning_5246 tests\GH_004597_self_swap tests\GH_004609_heterogeneous_cmp_overloads tests\GH_004618_mixed_operator_usage_keeps_statistical_properties -tests\GH_004618_normal_distribution_avoids_resets tests\GH_004657_expected_constraints_permissive tests\GH_004686_vectorization_on_trivial_assignability tests\GH_004845_logical_operator_traits_with_non_bool_constant diff --git a/tests/std/tests/GH_004618_normal_distribution_avoids_resets/env.lst b/tests/std/tests/GH_004618_normal_distribution_avoids_resets/env.lst deleted file mode 100644 index 19f025bd0e6..00000000000 --- a/tests/std/tests/GH_004618_normal_distribution_avoids_resets/env.lst +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_004618_normal_distribution_avoids_resets/test.cpp b/tests/std/tests/GH_004618_normal_distribution_avoids_resets/test.cpp deleted file mode 100644 index ce653e64e14..00000000000 --- a/tests/std/tests/GH_004618_normal_distribution_avoids_resets/test.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include - -class FakeGenerator { -private: - std::mt19937 m_underlying_gen; - std::size_t m_operator_calls = 0; - -public: - using GenType = std::mt19937; - using result_type = GenType::result_type; - - static constexpr result_type min() { - return GenType::min(); - } - static constexpr result_type max() { - return GenType::max(); - } - - result_type operator()() { - ++m_operator_calls; - return m_underlying_gen(); - } - - std::size_t calls() const { - return m_operator_calls; - } -}; - -int main() { - FakeGenerator rng; - std::normal_distribution<> dist(0.0, 1.0); - using dist_params = std::normal_distribution<>::param_type; - dist_params params(50.0, 0.5); - (void) dist(rng); - const auto calls_before = rng.calls(); - (void) dist(rng); - const auto calls_after = rng.calls(); - assert(calls_before == calls_after); -} diff --git a/tools/scripts/random_ziggurat_tables_generate.cpp b/tools/scripts/random_ziggurat_tables_generate.cpp new file mode 100644 index 00000000000..de5917ad4da --- /dev/null +++ b/tools/scripts/random_ziggurat_tables_generate.cpp @@ -0,0 +1,336 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// icx-cl /EHsc /W4 /MT /O2 /std:c++latest /fp:precise /Qoption,cpp,--extended_float_types + +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +extern "C" [[nodiscard]] __float128 __fabsq(__float128 x) noexcept; +extern "C" [[nodiscard]] __float128 __fminq(__float128 x, __float128 y) noexcept; +extern "C" [[nodiscard]] __float128 __nearbyintq(__float128 x) noexcept; +extern "C" [[nodiscard]] __float128 __expq(__float128 x) noexcept; +extern "C" [[nodiscard]] __float128 __scalbnq(__float128 x, int y) noexcept; +extern "C" [[nodiscard]] __float128 __logq(__float128 x) noexcept; +extern "C" [[nodiscard]] __float128 __sqrtq(__float128 x) noexcept; +extern "C" [[nodiscard]] __float128 __cbrtq(__float128 x) noexcept; +extern "C" [[nodiscard]] __float128 __erfq(__float128 x) noexcept; + +struct ziggurat_layer { + __float128 x_inner{}; + __float128 x_outer{}; + __float128 y_min{}; + __float128 y_max{}; +}; + +struct alias_table_entry { + __float128 probability{}; + size_t index{}; + size_t alias_index = index; +}; + +template +struct modified_ziggurat_traits; + +template <> +struct modified_ziggurat_traits { + static constexpr string_view type_name = "double"sv; + static constexpr string_view type_suffix = ""sv; + static constexpr int rand_bits = 64; + static constexpr int layer_bits = 8; +}; + +template <> +struct modified_ziggurat_traits { + static constexpr string_view type_name = "float"sv; + static constexpr string_view type_suffix = "f"sv; + static constexpr int rand_bits = 32; + static constexpr int layer_bits = 7; // 32 - 1 (sign) - 24 (magnitude) +}; + +template +void generate_tables(string_view name, bool is_signed, auto&& pdf, auto&& inverse_pdf, auto&& cdf, + auto&& pdf_derivative, auto&& pdf_2nd_derivative, __float128 height_scale = 1.0q) { + using traits = modified_ziggurat_traits; + constexpr size_t layer_division = 1uz << traits::layer_bits; + constexpr __float128 regular_area = 1.0q / layer_division; + + // the bottommost layer is excluded + vector layers; + + __float128 x_inner = 0.0q; + __float128 y_max = pdf(x_inner); + + // minimize the total height of the top two layers + __float128 x_outer = [&] { + if (const __float128 y1 = pdf_derivative(0.0q); y1 < 0.0q) { + return __sqrtq(regular_area / -y1); + } + + if (const __float128 y2 = pdf_2nd_derivative(0.0q); y2 < 0.0q) { + return __cbrtq(regular_area / -y2); + } + + return 1.0q; + }(); + + for (;;) { + const __float128 y1 = pdf_derivative(x_outer) + regular_area / (x_outer * x_outer); + const __float128 y2 = pdf_2nd_derivative(x_outer) - 2.0q * regular_area / (x_outer * x_outer * x_outer); + const __float128 dx = y1 / y2; + x_outer -= dx; + if (!(__fabsq(dx) > __fabsq(x_outer) * 0x1p-80q)) { + break; + } + } + + __float128 y_min = pdf(x_outer); + + // make sure that we have at most `layer_division` irregular regions + // the bottommost layer has 2 irregular regions, other layers have 1 each + while (layers.size() + 2 < layer_division) { + layers.push_back({.x_inner = x_inner, .x_outer = x_outer, .y_min = y_min, .y_max = y_max}); + + x_inner = x_outer; + y_max = y_min; + y_min = y_max - regular_area / x_inner; + if (y_min < 0.0q) { + break; + } + + x_outer = inverse_pdf(y_min); + } + + const size_t regular_layer_num = layers.size() - 1; + + vector<__float128> irregular_areas; + + for (const ziggurat_layer& layer : layers) { + irregular_areas.push_back( + cdf(layer.x_outer) - cdf(layer.x_inner) - (layer.x_outer - layer.x_inner) * layer.y_min); + } + + // the bottommost layer + irregular_areas.push_back(1.0q - cdf(layers.back().x_outer)); + irregular_areas.push_back(layers.back().x_outer * layers.back().y_min); + + // alias table for irregular region selection + vector alias_table(layer_division); + + const __float128 average_area = (layer_division - regular_layer_num) * regular_area / layer_division; + vector temp_overs; + vector temp_unders; + + for (size_t i = 0; i != irregular_areas.size(); ++i) { + const __float128 probability = irregular_areas[i] / average_area; + const alias_table_entry entry{.probability = probability, .index = i}; + if (probability > 1.0) { + temp_overs.push_back(entry); + } else if (probability < 1.0) { + temp_unders.push_back(entry); + } else { + alias_table[i] = entry; + } + } + + for (size_t i = irregular_areas.size(); i != layer_division; ++i) { + temp_unders.push_back({.probability = 0.0q, .index = i}); + } + + priority_queue overs( + [](const alias_table_entry& lhs, const alias_table_entry& rhs) { return lhs.probability > rhs.probability; }, + std::move(temp_overs)); + priority_queue unders( + [](const alias_table_entry& lhs, const alias_table_entry& rhs) { return lhs.probability < rhs.probability; }, + std::move(temp_unders)); + + while (!overs.empty() && !unders.empty()) { + const alias_table_entry& over = overs.top(); + const alias_table_entry& under = unders.top(); + alias_table[under.index].probability = under.probability; + alias_table[under.index].index = under.index; + alias_table[under.index].alias_index = over.index; + + const alias_table_entry remaining{ + .probability = over.probability - 1.0q + under.probability, .index = over.index}; + overs.pop(); + unders.pop(); + if (remaining.probability > 1.0) { + overs.push(remaining); + } else if (remaining.probability < 1.0) { + unders.push(remaining); + } else { + alias_table[remaining.index] = remaining; + } + } + + while (!overs.empty()) { + alias_table[overs.top().index] = overs.top(); + overs.pop(); + } + + while (!unders.empty()) { + alias_table[unders.top().index] = unders.top(); + unders.pop(); + } + + // generate tables + const __float128 width_scale = __scalbnq(is_signed ? 2.0q : 1.0q, -traits::rand_bits); + const __float128 probability_scale = __scalbnq(1.0q, traits::rand_bits); + const __float128 max_probability = 1.0q - __scalbnq(1.0q, -traits::rand_bits); + + println(); + println("template <>"); + println("struct {}<{}> {{", name, traits::type_name); + println(" static constexpr _Modified_ziggurat_tables<{}, uint{}_t, {}, {}, {}> _Value{{", traits::type_name, + traits::rand_bits, is_signed, traits::layer_bits, layers.size()); + + // _Ty _Layer_widths[_Layer_num + 1]; + print(" {{{:#}{}", static_cast(layers[0].x_inner * width_scale), traits::type_suffix); + for (const ziggurat_layer& layer : layers) { + print(", {:#}{}", static_cast(layer.x_outer * width_scale), traits::type_suffix); + } + + println("}},"); + + // _Ty _Layer_heights[_Layer_num + 1]; + print(" {{{:#}{}", static_cast(layers[0].y_max * height_scale), traits::type_suffix); + for (const ziggurat_layer& layer : layers) { + print(", {:#}{}", static_cast(layer.y_min * height_scale), traits::type_suffix); + } + + println("}},"); + + // _Uty _Alias_probabilities[1 << _Lw]; + for (bool first = true; const alias_table_entry& entry : alias_table) { + if (first) { + print(" {{{}u", static_cast( + __nearbyintq(__fminq(entry.probability, max_probability) * probability_scale))); + first = false; + } else { + print(", {}u", static_cast( + __nearbyintq(__fminq(entry.probability, max_probability) * probability_scale))); + } + } + + println("}},"); + + // uint8_t _Alias_indices[1 << _Lw]; + for (bool first = true; const alias_table_entry& entry : alias_table) { + if (first) { + print(" {{{}", entry.alias_index); + first = false; + } else { + print(", {}", entry.alias_index); + } + } + + println("}},"); + + println(" }};"); + println("}};"); +} + +__float128 half_normal_pdf(__float128 x) { + return __expq(x * x * -0.5q) * 0x1.9884533d436508d0fcb3c500bab9p-1q; +} + +__float128 half_normal_inverse_pdf(__float128 y) { + return __sqrtq(-2.0q * __logq(y * 0x1.40d931ff627059657ca41fae722dp+0q)); +} + +__float128 half_normal_cdf(__float128 x) { + return __erfq(x * 0x1.6a09e667f3bcda1ec56c7db8f04cp-1q); +} + +__float128 half_normal_pdf_derivative(__float128 x) { + return __expq(x * x * -0.5q) * x * -0x1.9884533d436508d0fcb3c500bab9p-1q; +} + +__float128 half_normal_pdf_2nd_derivative(__float128 x) { + return __expq(x * x * -0.5q) * (x + 1.0q) * (x - 1.0q) * 0x1.9884533d436508d0fcb3c500bab9p-1q; +} + +void generate_header() { + puts(R"(// __msvc_random_ziggurat_tables.hpp internal header + +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef __MSVC_RANDOM_ZIGGURAT_TABLES_HPP +#define __MSVC_RANDOM_ZIGGURAT_TABLES_HPP +#include +#if _STL_COMPILER_PREPROCESSOR + +#include +#include + +#pragma pack(push, _CRT_PACKING) +#pragma warning(push, _STL_WARNING_LEVEL) +#pragma warning(disable : _STL_DISABLED_WARNINGS) +_STL_DISABLE_CLANG_WARNINGS +#pragma push_macro("new") +#undef new + +_STD_BEGIN + +template +struct _Modified_ziggurat_tables { + static_assert(_Lw >= 2, "invalid table size"); + static_assert(_Lw <= 8, "invalid table size"); + static_assert(_Lx >= 2, "invalid table size"); + static_assert(_Lx <= (1 << _Lw) - 2, "invalid table size"); + + using _Uint_type = _Uty; + using _Xtype = conditional_t<_Signed, make_signed_t<_Uty>, _Uty>; + + static constexpr int _Layer_bits = _Lw; + static constexpr int _Layer_num = _Lx; + static constexpr _Ty _Width_scale = (_Signed ? _Ty{1} : _Ty{2}) * _Ty{static_cast<_Uty>(-1) / 2u + 1u}; + + _Ty _Layer_widths[_Lx + 1]; + _Ty _Layer_heights[_Lx + 1]; + _Uty _Alias_probabilities[1 << _Lw]; + uint8_t _Alias_indices[1 << _Lw]; +};)"); +} + +void generate_normal_distribution() { + println(); + println("template "); + println("struct _Normal_distribution_tables;"); + + generate_tables("_Normal_distribution_tables"sv, true, half_normal_pdf, half_normal_inverse_pdf, + half_normal_cdf, half_normal_pdf_derivative, half_normal_pdf_2nd_derivative, + 0x1.40d931ff627059657ca41fae722dp+0q); + + generate_tables("_Normal_distribution_tables"sv, true, half_normal_pdf, half_normal_inverse_pdf, + half_normal_cdf, half_normal_pdf_derivative, half_normal_pdf_2nd_derivative, + 0x1.40d931ff627059657ca41fae722dp+0q); +} + +void generate_footer() { + puts(R"( +_STD_END + +#pragma pop_macro("new") +_STL_RESTORE_CLANG_WARNINGS +#pragma warning(pop) +#pragma pack(pop) + +#endif // _STL_COMPILER_PREPROCESSOR +#endif // __MSVC_RANDOM_ZIGGURAT_TABLES_HPP)"); +} + +int main() { + generate_header(); + generate_normal_distribution(); + generate_footer(); + return 0; +}