https://github.com/mordante created 
https://github.com/llvm/llvm-project/pull/101835

__formatted_size_buffer is not used in the public library interface so the 
changes are not an ABI break.

Before
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   
Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                47.6 ns         47.6 ns     
14729701 bytes_per_second=20.055Mi/s
BM_formatted_size_string<char>/2                23.8 ns         23.8 ns     
29445832 bytes_per_second=80.1657Mi/s
BM_formatted_size_string<char>/4                11.9 ns         11.9 ns     
58813944 bytes_per_second=320.771Mi/s
BM_formatted_size_string<char>/8                5.95 ns         5.95 ns    
117601928 bytes_per_second=1.25307Gi/s
BM_formatted_size_string<char>/16               2.97 ns         2.97 ns    
235800528 bytes_per_second=5.02342Gi/s
BM_formatted_size_string<char>/32               1.49 ns         1.49 ns    
471177152 bytes_per_second=20.0428Gi/s
BM_formatted_size_string<char>/64              0.745 ns        0.744 ns    
942161408 bytes_per_second=80.1172Gi/s
BM_formatted_size_string<char>/128             0.377 ns        0.376 ns   
1861005568 bytes_per_second=316.85Gi/s
BM_formatted_size_string<char>/256             0.200 ns        0.200 ns   
3503415296 bytes_per_second=1.16508Ti/s
BM_formatted_size_string<char>/512             0.111 ns        0.110 ns   
6351184384 bytes_per_second=4.21671Ti/s
BM_formatted_size_string<char>/1024            0.067 ns        0.067 ns   
10441098240 bytes_per_second=13.9409Ti/s
BM_formatted_size_string<char>/2048            0.045 ns        0.045 ns   
15404886016 bytes_per_second=41.0182Ti/s
BM_formatted_size_string<char>/4096            0.036 ns        0.036 ns   
19634089984 bytes_per_second=104.431Ti/s
BM_formatted_size_string<char>/8192            0.030 ns        0.030 ns   
23265501184 bytes_per_second=247.504Ti/s
BM_formatted_size_string<char>/16384           0.027 ns        0.027 ns   
25556238336 bytes_per_second=545.066Ti/s
BM_formatted_size_string<char>/32768           0.027 ns        0.027 ns   
26036731904 bytes_per_second=1.08282Pi/s
BM_formatted_size_string<char>/65536           0.027 ns        0.027 ns   
26192379904 bytes_per_second=2.17753Pi/s
BM_formatted_size_string<char>/131072          0.026 ns        0.026 ns   
26622033920 bytes_per_second=4.4243Pi/s
BM_formatted_size_string<char>/262144          0.027 ns        0.027 ns   
25452085248 bytes_per_second=8.48295Pi/s
BM_formatted_size_string<char>/524288          0.028 ns        0.028 ns   
24593825792 bytes_per_second=16.3639Pi/s
BM_formatted_size_string<char>/1048576         0.028 ns        0.028 ns   
24775753728 bytes_per_second=32.8865Pi/s
BM_formatted_size_string<wchar_t>/1             47.0 ns         46.9 ns     
14912646 bytes_per_second=81.3132Mi/s
BM_formatted_size_string<wchar_t>/2             23.4 ns         23.3 ns     
29964298 bytes_per_second=326.845Mi/s
BM_formatted_size_string<wchar_t>/4             11.7 ns         11.7 ns     
59734380 bytes_per_second=1.26993Gi/s
BM_formatted_size_string<wchar_t>/8             5.84 ns         5.84 ns    
120131824 bytes_per_second=5.10665Gi/s
BM_formatted_size_string<wchar_t>/16            2.92 ns         2.92 ns    
239874128 bytes_per_second=20.4273Gi/s
BM_formatted_size_string<wchar_t>/32            1.48 ns         1.48 ns    
473672928 bytes_per_second=80.6502Gi/s
BM_formatted_size_string<wchar_t>/64           0.797 ns        0.796 ns    
877660480 bytes_per_second=299.351Gi/s
BM_formatted_size_string<wchar_t>/128          0.450 ns        0.449 ns   
1568318336 bytes_per_second=1.03679Ti/s
BM_formatted_size_string<wchar_t>/256          0.273 ns        0.273 ns   
2571588096 bytes_per_second=3.41553Ti/s
BM_formatted_size_string<wchar_t>/512          0.181 ns        0.181 ns   
3868106240 bytes_per_second=10.2834Ti/s
BM_formatted_size_string<wchar_t>/1024         0.143 ns        0.143 ns   
4902914048 bytes_per_second=26.0513Ti/s
BM_formatted_size_string<wchar_t>/2048         0.121 ns        0.121 ns   
5801660416 bytes_per_second=61.775Ti/s
BM_formatted_size_string<wchar_t>/4096         0.110 ns        0.110 ns   
6380191744 bytes_per_second=135.763Ti/s
BM_formatted_size_string<wchar_t>/8192         0.107 ns        0.107 ns   
6531432448 bytes_per_second=278.051Ti/s
BM_formatted_size_string<wchar_t>/16384        0.107 ns        0.107 ns   
6531842048 bytes_per_second=556.731Ti/s
BM_formatted_size_string<wchar_t>/32768        0.106 ns        0.105 ns   
6648430592 bytes_per_second=1.10374Pi/s
BM_formatted_size_string<wchar_t>/65536        0.110 ns        0.110 ns   
6352273408 bytes_per_second=2.11267Pi/s
BM_formatted_size_string<wchar_t>/131072       0.114 ns        0.114 ns   
6139805696 bytes_per_second=4.07511Pi/s
BM_formatted_size_string<wchar_t>/262144       0.114 ns        0.114 ns   
6169821184 bytes_per_second=8.20475Pi/s
BM_formatted_size_string<wchar_t>/524288       0.114 ns        0.113 ns   
6168248320 bytes_per_second=16.4288Pi/s
BM_formatted_size_string<wchar_t>/1048576      0.120 ns        0.120 ns   
5817499648 bytes_per_second=30.9853Pi/s

After
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   
Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                44.6 ns         44.4 ns     
15768750 bytes_per_second=21.4556Mi/s
BM_formatted_size_string<char>/2                22.3 ns         22.2 ns     
31477648 bytes_per_second=85.8719Mi/s
BM_formatted_size_string<char>/4                11.1 ns         11.1 ns     
63031396 bytes_per_second=343.327Mi/s
BM_formatted_size_string<char>/8                5.57 ns         5.56 ns    
126105968 bytes_per_second=1.34101Gi/s
BM_formatted_size_string<char>/16               2.78 ns         2.78 ns    
251793552 bytes_per_second=5.36504Gi/s
BM_formatted_size_string<char>/32               1.39 ns         1.39 ns    
504195136 bytes_per_second=21.4672Gi/s
BM_formatted_size_string<char>/64              0.696 ns        0.694 ns   
1008341184 bytes_per_second=85.8541Gi/s
BM_formatted_size_string<char>/128             0.348 ns        0.347 ns   
2016425984 bytes_per_second=343.386Gi/s
BM_formatted_size_string<char>/256             0.174 ns        0.174 ns   
4031530496 bytes_per_second=1.33811Ti/s
BM_formatted_size_string<char>/512             0.087 ns        0.087 ns   
8058140160 bytes_per_second=5.36282Ti/s
BM_formatted_size_string<char>/1024            0.044 ns        0.043 ns   
16131134464 bytes_per_second=21.4476Ti/s
BM_formatted_size_string<char>/2048            0.022 ns        0.022 ns   
32242241536 bytes_per_second=85.938Ti/s
BM_formatted_size_string<char>/4096            0.011 ns        0.011 ns   
64521842688 bytes_per_second=343.596Ti/s
BM_formatted_size_string<char>/8192            0.005 ns        0.005 ns   
129048797184 bytes_per_second=1.34065Pi/s
BM_formatted_size_string<char>/16384           0.003 ns        0.003 ns   
258152202240 bytes_per_second=5.37696Pi/s
BM_formatted_size_string<char>/32768           0.001 ns        0.001 ns   
516496818176 bytes_per_second=21.4635Pi/s
BM_formatted_size_string<char>/65536           0.001 ns        0.001 ns   
1000000061440 bytes_per_second=85.868Pi/s
BM_formatted_size_string<char>/131072          0.000 ns        0.000 ns   
1000000061440 bytes_per_second=343.451Pi/s
BM_formatted_size_string<char>/262144          0.000 ns        0.000 ns   
1000000192512 bytes_per_second=1.33951Ei/s
BM_formatted_size_string<char>/524288          0.000 ns        0.000 ns   
1000000192512 bytes_per_second=5.35714Ei/s
BM_formatted_size_string<char>/1048576         0.000 ns        0.000 ns   
1000000716800 bytes_per_second=21.4307Ei/s
BM_formatted_size_string<wchar_t>/1             43.3 ns         43.2 ns     
16196853 bytes_per_second=88.2127Mi/s
BM_formatted_size_string<wchar_t>/2             21.6 ns         21.6 ns     
32428704 bytes_per_second=353.328Mi/s
BM_formatted_size_string<wchar_t>/4             10.8 ns         10.8 ns     
64858356 bytes_per_second=1.38065Gi/s
BM_formatted_size_string<wchar_t>/8             5.42 ns         5.40 ns    
129503096 bytes_per_second=5.51619Gi/s
BM_formatted_size_string<wchar_t>/16            2.71 ns         2.70 ns    
259444512 bytes_per_second=22.056Gi/s
BM_formatted_size_string<wchar_t>/32            1.35 ns         1.35 ns    
517963680 bytes_per_second=88.217Gi/s
BM_formatted_size_string<wchar_t>/64           0.678 ns        0.676 ns   
1035243264 bytes_per_second=352.499Gi/s
BM_formatted_size_string<wchar_t>/128          0.339 ns        0.338 ns   
2069611264 bytes_per_second=1.37816Ti/s
BM_formatted_size_string<wchar_t>/256          0.169 ns        0.169 ns   
4146004480 bytes_per_second=5.51363Ti/s
BM_formatted_size_string<wchar_t>/512          0.085 ns        0.085 ns   
8282438656 bytes_per_second=22.023Ti/s
BM_formatted_size_string<wchar_t>/1024         0.042 ns        0.042 ns   
16622990336 bytes_per_second=88.6687Ti/s
BM_formatted_size_string<wchar_t>/2048         0.021 ns        0.021 ns   
33236293632 bytes_per_second=354.449Ti/s
BM_formatted_size_string<wchar_t>/4096         0.011 ns        0.011 ns   
66449469440 bytes_per_second=1.38568Pi/s
BM_formatted_size_string<wchar_t>/8192         0.005 ns        0.005 ns   
133098782720 bytes_per_second=5.54274Pi/s
BM_formatted_size_string<wchar_t>/16384        0.003 ns        0.003 ns   
266227466240 bytes_per_second=22.1089Pi/s
BM_formatted_size_string<wchar_t>/32768        0.001 ns        0.001 ns   
532288503808 bytes_per_second=88.5187Pi/s
BM_formatted_size_string<wchar_t>/65536        0.001 ns        0.001 ns   
1000000061440 bytes_per_second=354.202Pi/s
BM_formatted_size_string<wchar_t>/131072       0.000 ns        0.000 ns   
1000000061440 bytes_per_second=1.38438Ei/s
BM_formatted_size_string<wchar_t>/262144       0.000 ns        0.000 ns   
1000000192512 bytes_per_second=5.54102Ei/s
BM_formatted_size_string<wchar_t>/524288       0.000 ns        0.000 ns   
1000000192512 bytes_per_second=22.0527Ei/s
BM_formatted_size_string<wchar_t>/1048576      0.000 ns        0.000 ns   
1000000716800 bytes_per_second=88.673Ei/s

Comparison
Benchmark                                                   Time             
CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                         -0.0639         
-0.0653            48            45            48            44
BM_formatted_size_string<char>/2                         -0.0651         
-0.0665            24            22            24            22
BM_formatted_size_string<char>/4                         -0.0642         
-0.0657            12            11            12            11
BM_formatted_size_string<char>/8                         -0.0643         
-0.0656             6             6             6             6
BM_formatted_size_string<char>/16                        -0.0623         
-0.0637             3             3             3             3
BM_formatted_size_string<char>/32                        -0.0650         
-0.0664             1             1             1             1
BM_formatted_size_string<char>/64                        -0.0655         
-0.0668             1             1             1             1
BM_formatted_size_string<char>/128                       -0.0756         
-0.0773             0             0             0             0
BM_formatted_size_string<char>/256                       -0.1280         
-0.1293             0             0             0             0
BM_formatted_size_string<char>/512                       -0.2126         
-0.2137             0             0             0             0
BM_formatted_size_string<char>/1024                      -0.3489         
-0.3500             0             0             0             0
BM_formatted_size_string<char>/2048                      -0.5220         
-0.5227             0             0             0             0
BM_formatted_size_string<char>/4096                      -0.6955         
-0.6961             0             0             0             0
BM_formatted_size_string<char>/8192                      -0.8194         
-0.8197             0             0             0             0
BM_formatted_size_string<char>/16384                     -0.9009         
-0.9010             0             0             0             0
BM_formatted_size_string<char>/32768                     -0.9495         
-0.9496             0             0             0             0
BM_formatted_size_string<char>/65536                     -0.9746         
-0.9746             0             0             0             0
BM_formatted_size_string<char>/131072                    -0.9871         
-0.9871             0             0             0             0
BM_formatted_size_string<char>/262144                    -0.9938         
-0.9938             0             0             0             0
BM_formatted_size_string<char>/524288                    -0.9970         
-0.9970             0             0             0             0
BM_formatted_size_string<char>/1048576                   -0.9985         
-0.9985             0             0             0             0
BM_formatted_size_string<wchar_t>/1                      -0.0769         
-0.0782            47            43            47            43
BM_formatted_size_string<wchar_t>/2                      -0.0737         
-0.0750            23            22            23            22
BM_formatted_size_string<wchar_t>/4                      -0.0788         
-0.0802            12            11            12            11
BM_formatted_size_string<wchar_t>/8                      -0.0729         
-0.0742             6             5             6             5
BM_formatted_size_string<wchar_t>/16                     -0.0724         
-0.0738             3             3             3             3
BM_formatted_size_string<wchar_t>/32                     -0.0844         
-0.0858             1             1             1             1
BM_formatted_size_string<wchar_t>/64                     -0.1493         
-0.1508             1             1             1             1
BM_formatted_size_string<wchar_t>/128                    -0.2466         
-0.2477             0             0             0             0
BM_formatted_size_string<wchar_t>/256                    -0.3796         
-0.3805             0             0             0             0
BM_formatted_size_string<wchar_t>/512                    -0.5323         
-0.5331             0             0             0             0
BM_formatted_size_string<wchar_t>/1024                   -0.7058         
-0.7062             0             0             0             0
BM_formatted_size_string<wchar_t>/2048                   -0.8255         
-0.8257             0             0             0             0
BM_formatted_size_string<wchar_t>/4096                   -0.9042         
-0.9043             0             0             0             0
BM_formatted_size_string<wchar_t>/8192                   -0.9509         
-0.9510             0             0             0             0
BM_formatted_size_string<wchar_t>/16384                  -0.9754         
-0.9754             0             0             0             0
BM_formatted_size_string<wchar_t>/32768                  -0.9875         
-0.9875             0             0             0             0
BM_formatted_size_string<wchar_t>/65536                  -0.9940         
-0.9940             0             0             0             0
BM_formatted_size_string<wchar_t>/131072                 -0.9971         
-0.9971             0             0             0             0
BM_formatted_size_string<wchar_t>/262144                 -0.9986         
-0.9986             0             0             0             0
BM_formatted_size_string<wchar_t>/524288                 -0.9993         
-0.9993             0             0             0             0
BM_formatted_size_string<wchar_t>/1048576                -0.9997         
-0.9997             0             0             0             0
OVERALL_GEOMEAN                                          -0.8740         
-0.8742             0             0             0             0

>From b4563824913a27cc40d8f1415e96949aeb60b0a3 Mon Sep 17 00:00:00 2001
From: Mark de Wever <ko...@xs4all.nl>
Date: Sat, 30 Mar 2024 17:35:56 +0100
Subject: [PATCH] [libc++][format][6/7] Optimizes formatted_size.

__formatted_size_buffer is not used in the public library interface so the
changes are not an ABI break.

Before
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   
Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                47.6 ns         47.6 ns     
14729701 bytes_per_second=20.055Mi/s
BM_formatted_size_string<char>/2                23.8 ns         23.8 ns     
29445832 bytes_per_second=80.1657Mi/s
BM_formatted_size_string<char>/4                11.9 ns         11.9 ns     
58813944 bytes_per_second=320.771Mi/s
BM_formatted_size_string<char>/8                5.95 ns         5.95 ns    
117601928 bytes_per_second=1.25307Gi/s
BM_formatted_size_string<char>/16               2.97 ns         2.97 ns    
235800528 bytes_per_second=5.02342Gi/s
BM_formatted_size_string<char>/32               1.49 ns         1.49 ns    
471177152 bytes_per_second=20.0428Gi/s
BM_formatted_size_string<char>/64              0.745 ns        0.744 ns    
942161408 bytes_per_second=80.1172Gi/s
BM_formatted_size_string<char>/128             0.377 ns        0.376 ns   
1861005568 bytes_per_second=316.85Gi/s
BM_formatted_size_string<char>/256             0.200 ns        0.200 ns   
3503415296 bytes_per_second=1.16508Ti/s
BM_formatted_size_string<char>/512             0.111 ns        0.110 ns   
6351184384 bytes_per_second=4.21671Ti/s
BM_formatted_size_string<char>/1024            0.067 ns        0.067 ns   
10441098240 bytes_per_second=13.9409Ti/s
BM_formatted_size_string<char>/2048            0.045 ns        0.045 ns   
15404886016 bytes_per_second=41.0182Ti/s
BM_formatted_size_string<char>/4096            0.036 ns        0.036 ns   
19634089984 bytes_per_second=104.431Ti/s
BM_formatted_size_string<char>/8192            0.030 ns        0.030 ns   
23265501184 bytes_per_second=247.504Ti/s
BM_formatted_size_string<char>/16384           0.027 ns        0.027 ns   
25556238336 bytes_per_second=545.066Ti/s
BM_formatted_size_string<char>/32768           0.027 ns        0.027 ns   
26036731904 bytes_per_second=1.08282Pi/s
BM_formatted_size_string<char>/65536           0.027 ns        0.027 ns   
26192379904 bytes_per_second=2.17753Pi/s
BM_formatted_size_string<char>/131072          0.026 ns        0.026 ns   
26622033920 bytes_per_second=4.4243Pi/s
BM_formatted_size_string<char>/262144          0.027 ns        0.027 ns   
25452085248 bytes_per_second=8.48295Pi/s
BM_formatted_size_string<char>/524288          0.028 ns        0.028 ns   
24593825792 bytes_per_second=16.3639Pi/s
BM_formatted_size_string<char>/1048576         0.028 ns        0.028 ns   
24775753728 bytes_per_second=32.8865Pi/s
BM_formatted_size_string<wchar_t>/1             47.0 ns         46.9 ns     
14912646 bytes_per_second=81.3132Mi/s
BM_formatted_size_string<wchar_t>/2             23.4 ns         23.3 ns     
29964298 bytes_per_second=326.845Mi/s
BM_formatted_size_string<wchar_t>/4             11.7 ns         11.7 ns     
59734380 bytes_per_second=1.26993Gi/s
BM_formatted_size_string<wchar_t>/8             5.84 ns         5.84 ns    
120131824 bytes_per_second=5.10665Gi/s
BM_formatted_size_string<wchar_t>/16            2.92 ns         2.92 ns    
239874128 bytes_per_second=20.4273Gi/s
BM_formatted_size_string<wchar_t>/32            1.48 ns         1.48 ns    
473672928 bytes_per_second=80.6502Gi/s
BM_formatted_size_string<wchar_t>/64           0.797 ns        0.796 ns    
877660480 bytes_per_second=299.351Gi/s
BM_formatted_size_string<wchar_t>/128          0.450 ns        0.449 ns   
1568318336 bytes_per_second=1.03679Ti/s
BM_formatted_size_string<wchar_t>/256          0.273 ns        0.273 ns   
2571588096 bytes_per_second=3.41553Ti/s
BM_formatted_size_string<wchar_t>/512          0.181 ns        0.181 ns   
3868106240 bytes_per_second=10.2834Ti/s
BM_formatted_size_string<wchar_t>/1024         0.143 ns        0.143 ns   
4902914048 bytes_per_second=26.0513Ti/s
BM_formatted_size_string<wchar_t>/2048         0.121 ns        0.121 ns   
5801660416 bytes_per_second=61.775Ti/s
BM_formatted_size_string<wchar_t>/4096         0.110 ns        0.110 ns   
6380191744 bytes_per_second=135.763Ti/s
BM_formatted_size_string<wchar_t>/8192         0.107 ns        0.107 ns   
6531432448 bytes_per_second=278.051Ti/s
BM_formatted_size_string<wchar_t>/16384        0.107 ns        0.107 ns   
6531842048 bytes_per_second=556.731Ti/s
BM_formatted_size_string<wchar_t>/32768        0.106 ns        0.105 ns   
6648430592 bytes_per_second=1.10374Pi/s
BM_formatted_size_string<wchar_t>/65536        0.110 ns        0.110 ns   
6352273408 bytes_per_second=2.11267Pi/s
BM_formatted_size_string<wchar_t>/131072       0.114 ns        0.114 ns   
6139805696 bytes_per_second=4.07511Pi/s
BM_formatted_size_string<wchar_t>/262144       0.114 ns        0.114 ns   
6169821184 bytes_per_second=8.20475Pi/s
BM_formatted_size_string<wchar_t>/524288       0.114 ns        0.113 ns   
6168248320 bytes_per_second=16.4288Pi/s
BM_formatted_size_string<wchar_t>/1048576      0.120 ns        0.120 ns   
5817499648 bytes_per_second=30.9853Pi/s

After
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   
Iterations UserCounters...
----------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                44.6 ns         44.4 ns     
15768750 bytes_per_second=21.4556Mi/s
BM_formatted_size_string<char>/2                22.3 ns         22.2 ns     
31477648 bytes_per_second=85.8719Mi/s
BM_formatted_size_string<char>/4                11.1 ns         11.1 ns     
63031396 bytes_per_second=343.327Mi/s
BM_formatted_size_string<char>/8                5.57 ns         5.56 ns    
126105968 bytes_per_second=1.34101Gi/s
BM_formatted_size_string<char>/16               2.78 ns         2.78 ns    
251793552 bytes_per_second=5.36504Gi/s
BM_formatted_size_string<char>/32               1.39 ns         1.39 ns    
504195136 bytes_per_second=21.4672Gi/s
BM_formatted_size_string<char>/64              0.696 ns        0.694 ns   
1008341184 bytes_per_second=85.8541Gi/s
BM_formatted_size_string<char>/128             0.348 ns        0.347 ns   
2016425984 bytes_per_second=343.386Gi/s
BM_formatted_size_string<char>/256             0.174 ns        0.174 ns   
4031530496 bytes_per_second=1.33811Ti/s
BM_formatted_size_string<char>/512             0.087 ns        0.087 ns   
8058140160 bytes_per_second=5.36282Ti/s
BM_formatted_size_string<char>/1024            0.044 ns        0.043 ns   
16131134464 bytes_per_second=21.4476Ti/s
BM_formatted_size_string<char>/2048            0.022 ns        0.022 ns   
32242241536 bytes_per_second=85.938Ti/s
BM_formatted_size_string<char>/4096            0.011 ns        0.011 ns   
64521842688 bytes_per_second=343.596Ti/s
BM_formatted_size_string<char>/8192            0.005 ns        0.005 ns   
129048797184 bytes_per_second=1.34065Pi/s
BM_formatted_size_string<char>/16384           0.003 ns        0.003 ns   
258152202240 bytes_per_second=5.37696Pi/s
BM_formatted_size_string<char>/32768           0.001 ns        0.001 ns   
516496818176 bytes_per_second=21.4635Pi/s
BM_formatted_size_string<char>/65536           0.001 ns        0.001 ns   
1000000061440 bytes_per_second=85.868Pi/s
BM_formatted_size_string<char>/131072          0.000 ns        0.000 ns   
1000000061440 bytes_per_second=343.451Pi/s
BM_formatted_size_string<char>/262144          0.000 ns        0.000 ns   
1000000192512 bytes_per_second=1.33951Ei/s
BM_formatted_size_string<char>/524288          0.000 ns        0.000 ns   
1000000192512 bytes_per_second=5.35714Ei/s
BM_formatted_size_string<char>/1048576         0.000 ns        0.000 ns   
1000000716800 bytes_per_second=21.4307Ei/s
BM_formatted_size_string<wchar_t>/1             43.3 ns         43.2 ns     
16196853 bytes_per_second=88.2127Mi/s
BM_formatted_size_string<wchar_t>/2             21.6 ns         21.6 ns     
32428704 bytes_per_second=353.328Mi/s
BM_formatted_size_string<wchar_t>/4             10.8 ns         10.8 ns     
64858356 bytes_per_second=1.38065Gi/s
BM_formatted_size_string<wchar_t>/8             5.42 ns         5.40 ns    
129503096 bytes_per_second=5.51619Gi/s
BM_formatted_size_string<wchar_t>/16            2.71 ns         2.70 ns    
259444512 bytes_per_second=22.056Gi/s
BM_formatted_size_string<wchar_t>/32            1.35 ns         1.35 ns    
517963680 bytes_per_second=88.217Gi/s
BM_formatted_size_string<wchar_t>/64           0.678 ns        0.676 ns   
1035243264 bytes_per_second=352.499Gi/s
BM_formatted_size_string<wchar_t>/128          0.339 ns        0.338 ns   
2069611264 bytes_per_second=1.37816Ti/s
BM_formatted_size_string<wchar_t>/256          0.169 ns        0.169 ns   
4146004480 bytes_per_second=5.51363Ti/s
BM_formatted_size_string<wchar_t>/512          0.085 ns        0.085 ns   
8282438656 bytes_per_second=22.023Ti/s
BM_formatted_size_string<wchar_t>/1024         0.042 ns        0.042 ns   
16622990336 bytes_per_second=88.6687Ti/s
BM_formatted_size_string<wchar_t>/2048         0.021 ns        0.021 ns   
33236293632 bytes_per_second=354.449Ti/s
BM_formatted_size_string<wchar_t>/4096         0.011 ns        0.011 ns   
66449469440 bytes_per_second=1.38568Pi/s
BM_formatted_size_string<wchar_t>/8192         0.005 ns        0.005 ns   
133098782720 bytes_per_second=5.54274Pi/s
BM_formatted_size_string<wchar_t>/16384        0.003 ns        0.003 ns   
266227466240 bytes_per_second=22.1089Pi/s
BM_formatted_size_string<wchar_t>/32768        0.001 ns        0.001 ns   
532288503808 bytes_per_second=88.5187Pi/s
BM_formatted_size_string<wchar_t>/65536        0.001 ns        0.001 ns   
1000000061440 bytes_per_second=354.202Pi/s
BM_formatted_size_string<wchar_t>/131072       0.000 ns        0.000 ns   
1000000061440 bytes_per_second=1.38438Ei/s
BM_formatted_size_string<wchar_t>/262144       0.000 ns        0.000 ns   
1000000192512 bytes_per_second=5.54102Ei/s
BM_formatted_size_string<wchar_t>/524288       0.000 ns        0.000 ns   
1000000192512 bytes_per_second=22.0527Ei/s
BM_formatted_size_string<wchar_t>/1048576      0.000 ns        0.000 ns   
1000000716800 bytes_per_second=88.673Ei/s

Comparison
Benchmark                                                   Time             
CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------
BM_formatted_size_string<char>/1                         -0.0639         
-0.0653            48            45            48            44
BM_formatted_size_string<char>/2                         -0.0651         
-0.0665            24            22            24            22
BM_formatted_size_string<char>/4                         -0.0642         
-0.0657            12            11            12            11
BM_formatted_size_string<char>/8                         -0.0643         
-0.0656             6             6             6             6
BM_formatted_size_string<char>/16                        -0.0623         
-0.0637             3             3             3             3
BM_formatted_size_string<char>/32                        -0.0650         
-0.0664             1             1             1             1
BM_formatted_size_string<char>/64                        -0.0655         
-0.0668             1             1             1             1
BM_formatted_size_string<char>/128                       -0.0756         
-0.0773             0             0             0             0
BM_formatted_size_string<char>/256                       -0.1280         
-0.1293             0             0             0             0
BM_formatted_size_string<char>/512                       -0.2126         
-0.2137             0             0             0             0
BM_formatted_size_string<char>/1024                      -0.3489         
-0.3500             0             0             0             0
BM_formatted_size_string<char>/2048                      -0.5220         
-0.5227             0             0             0             0
BM_formatted_size_string<char>/4096                      -0.6955         
-0.6961             0             0             0             0
BM_formatted_size_string<char>/8192                      -0.8194         
-0.8197             0             0             0             0
BM_formatted_size_string<char>/16384                     -0.9009         
-0.9010             0             0             0             0
BM_formatted_size_string<char>/32768                     -0.9495         
-0.9496             0             0             0             0
BM_formatted_size_string<char>/65536                     -0.9746         
-0.9746             0             0             0             0
BM_formatted_size_string<char>/131072                    -0.9871         
-0.9871             0             0             0             0
BM_formatted_size_string<char>/262144                    -0.9938         
-0.9938             0             0             0             0
BM_formatted_size_string<char>/524288                    -0.9970         
-0.9970             0             0             0             0
BM_formatted_size_string<char>/1048576                   -0.9985         
-0.9985             0             0             0             0
BM_formatted_size_string<wchar_t>/1                      -0.0769         
-0.0782            47            43            47            43
BM_formatted_size_string<wchar_t>/2                      -0.0737         
-0.0750            23            22            23            22
BM_formatted_size_string<wchar_t>/4                      -0.0788         
-0.0802            12            11            12            11
BM_formatted_size_string<wchar_t>/8                      -0.0729         
-0.0742             6             5             6             5
BM_formatted_size_string<wchar_t>/16                     -0.0724         
-0.0738             3             3             3             3
BM_formatted_size_string<wchar_t>/32                     -0.0844         
-0.0858             1             1             1             1
BM_formatted_size_string<wchar_t>/64                     -0.1493         
-0.1508             1             1             1             1
BM_formatted_size_string<wchar_t>/128                    -0.2466         
-0.2477             0             0             0             0
BM_formatted_size_string<wchar_t>/256                    -0.3796         
-0.3805             0             0             0             0
BM_formatted_size_string<wchar_t>/512                    -0.5323         
-0.5331             0             0             0             0
BM_formatted_size_string<wchar_t>/1024                   -0.7058         
-0.7062             0             0             0             0
BM_formatted_size_string<wchar_t>/2048                   -0.8255         
-0.8257             0             0             0             0
BM_formatted_size_string<wchar_t>/4096                   -0.9042         
-0.9043             0             0             0             0
BM_formatted_size_string<wchar_t>/8192                   -0.9509         
-0.9510             0             0             0             0
BM_formatted_size_string<wchar_t>/16384                  -0.9754         
-0.9754             0             0             0             0
BM_formatted_size_string<wchar_t>/32768                  -0.9875         
-0.9875             0             0             0             0
BM_formatted_size_string<wchar_t>/65536                  -0.9940         
-0.9940             0             0             0             0
BM_formatted_size_string<wchar_t>/131072                 -0.9971         
-0.9971             0             0             0             0
BM_formatted_size_string<wchar_t>/262144                 -0.9986         
-0.9986             0             0             0             0
BM_formatted_size_string<wchar_t>/524288                 -0.9993         
-0.9993             0             0             0             0
BM_formatted_size_string<wchar_t>/1048576                -0.9997         
-0.9997             0             0             0             0
OVERALL_GEOMEAN                                          -0.8740         
-0.8742             0             0             0             0
---
 libcxx/include/__format/buffer.h | 51 ++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h
index c41ea73b3eb7e..ca2334f93fd04 100644
--- a/libcxx/include/__format/buffer.h
+++ b/libcxx/include/__format/buffer.h
@@ -603,28 +603,6 @@ class _LIBCPP_TEMPLATE_VIS __format_buffer {
   typename __writer_selector<_OutIt, _CharT>::type __writer_;
 };
 
-/// A buffer that counts the number of insertions.
-///
-/// Since \ref formatted_size only needs to know the size, the output itself is
-/// discarded.
-template <__fmt_char_type _CharT>
-class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer {
-public:
-  _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return 
__output_.__make_output_iterator(); }
-
-  _LIBCPP_HIDE_FROM_ABI void __flush(const _CharT*, size_t __n) { __size_ += 
__n; }
-
-  _LIBCPP_HIDE_FROM_ABI size_t __result() && {
-    __output_.__flush(0);
-    return __size_;
-  }
-
-private:
-  __internal_storage<_CharT> __storage_;
-  __output_buffer<_CharT> __output_{__storage_.__begin(), 
__storage_.__buffer_size, this};
-  size_t __size_{0};
-};
-
 // ***** ***** ***** LLVM-20 classes ***** ***** *****
 
 // A dynamically growing buffer.
@@ -817,6 +795,35 @@ class _LIBCPP_TEMPLATE_VIS __format_to_n_buffer : private 
__buffer_selector<_Out
   __max_output_size __max_output_size_;
 };
 
+// A buffer that counts the number of insertions.
+//
+// Since formatted_size only needs to know the size, the output itself is
+// discarded.
+template <__fmt_char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer : private 
__output_buffer<_CharT> {
+public:
+  using _Base = __output_buffer<_CharT>;
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __formatted_size_buffer()
+      : _Base{nullptr, 0, __prepare_write, std::addressof(__max_output_size_)} 
{}
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return 
_Base::__make_output_iterator(); }
+
+  // This function does not need to be r-value qualified, however this is
+  // consistent with similar objects.
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __result() && { return 
__max_output_size_.__code_units_written(); }
+
+private:
+  __max_output_size __max_output_size_{0};
+
+  _LIBCPP_HIDE_FROM_ABI static void
+  __prepare_write([[maybe_unused]] __output_buffer<_CharT>& __buffer, 
[[maybe_unused]] size_t __size_hint) {
+    // Note this function does not satisfy the requirement of giving a 1 code 
unit buffer.
+    _LIBCPP_ASSERT_INTERNAL(
+        false, "Since __max_output_size_.__max_size_ == 0 there should never 
be call to this function.");
+  }
+};
+
 // ***** ***** ***** LLVM-19 and LLVM-20 class ***** ***** *****
 
 // A dynamically growing buffer intended to be used for retargeting a context.

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to