http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55354



--- Comment #18 from Dmitry Vyukov <dvyukov at google dot com> 2012-11-21 
07:45:20 UTC ---

(In reply to comment #17)

> >When building libtsan as a shared library (for which I had to hack our 
> >assembly

> >blobs a bit) we get two sources of slowdown: 

> >  1. __tsan_read8 and friends are called through PLT

> >  2. __tsan_read8 and friends use one extra load to get to TLS

> 

> > I bet 9.5% or more of that is due to the PLT call.

> 

> That's not the overhead you are looking for, Luke.

> 

> We currently compile with -fPIC and link statically, linker inserts only 1

> memory dereference in this case. However, -fPIC affects code generation in

> compiler, it has to reserve more registers for tls access code and has to

> allocate stack frame because of the potential call. Only that causes *20%*

> slowdown on a real application (not a synthetic benchmark).

> 

> Kostya, to evaluate initial-exec you need to insure that code characteristics

> of __tsan_read/write are not affected, i.e. 0 stack spills and analyze script

> passes. Everything else we have w/o initial-exec.



For actual ThreadSanitizer runtime -fPIC -ftls-model=initial-exec causes

degradation of generated code. Linker emits the same tls access code in all

cases, but the compiler generates worse code.

The table below show various stats about generated code for the hot functions.

We are mostly interested in stack access instructions (rsp/push/pop):



gcc -fPIE

    write1 tot 325; size 1316; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  68; lea   6; cmp  46

    write2 tot 326; size 1340; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  68; lea   6; cmp  46

    write4 tot 325; size 1316; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  68; lea   6; cmp  46

    write8 tot 325; size 1316; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  68; lea   6; cmp  46

     read1 tot 355; size 1476; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  52; mov  71; lea   6; cmp  52

     read2 tot 344; size 1428; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  71; lea   6; cmp  51

     read4 tot 344; size 1436; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  71; lea   6; cmp  51

     read8 tot 344; size 1436; rsp 1; push 0; pop 0; call 2; load 16; store  5;

sh  52; mov  71; lea   6; cmp  51

func_entry tot  28; size  116; rsp 0; push 0; pop 0; call 1; load  3; store  1;

sh   2; mov   8; lea   0; cmp   1

 func_exit tot  25; size  100; rsp 0; push 0; pop 0; call 1; load  1; store  0;

sh   2; mov   5; lea   0; cmp   1



gcc -fPIC -ftls-model=initial-exec

    write1 tot 323; size 1268; rsp 1; push 1; pop 5; call 2; load 17; store  7;

sh  52; mov  64; lea   6; cmp  46

    write2 tot 321; size 1275; rsp 1; push 1; pop 5; call 2; load 17; store  7;

sh  52; mov  64; lea   6; cmp  46

    write4 tot 323; size 1268; rsp 1; push 1; pop 5; call 2; load 17; store  7;

sh  52; mov  64; lea   6; cmp  46

    write8 tot 323; size 1268; rsp 1; push 1; pop 5; call 2; load 17; store  7;

sh  52; mov  64; lea   6; cmp  46

     read1 tot 342; size 1380; rsp 1; push 1; pop 4; call 2; load 18; store  7;

sh  52; mov  67; lea   6; cmp  52

     read2 tot 331; size 1331; rsp 1; push 1; pop 3; call 2; load 17; store  7;

sh  52; mov  67; lea   6; cmp  51

     read4 tot 334; size 1356; rsp 1; push 1; pop 3; call 2; load 17; store  7;

sh  52; mov  67; lea   6; cmp  51

     read8 tot 334; size 1356; rsp 1; push 1; pop 3; call 2; load 17; store  7;

sh  52; mov  67; lea   6; cmp  51

func_entry tot   7; size   24; rsp 0; push 0; pop 0; call 0; load  0; store  1;

sh   0; mov   2; lea   0; cmp   0

 func_exit tot   6; size   21; rsp 0; push 0; pop 0; call 0; load  0; store  1;

sh   0; mov   1; lea   0; cmp   0



gcc -fPIC

    write1 tot 379; size 1571; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 100; lea   6; cmp  42

    write2 tot 383; size 1603; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 100; lea   6; cmp  42

    write4 tot 379; size 1571; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 100; lea   6; cmp  42

    write8 tot 379; size 1571; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 100; lea   6; cmp  42

     read1 tot 402; size 1715; rsp 23; push 0; pop 0; call 2; load 26; store

20; sh  52; mov 103; lea   6; cmp  48

     read2 tot 393; size 1659; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 103; lea   6; cmp  47

     read4 tot 391; size 1659; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 103; lea   6; cmp  47

     read8 tot 391; size 1659; rsp 23; push 0; pop 0; call 2; load 25; store

20; sh  52; mov 103; lea   6; cmp  47

func_entry tot   9; size   32; rsp 0; push 1; pop 1; call 0; load  0; store  0;

sh   0; mov   4; lea   0; cmp   0

 func_exit tot   7; size   32; rsp 0; push 0; pop 0; call 0; load  0; store  0;

sh   0; mov   2; lea   0; cmp   0



clang -fPIE

    write1 tot 326; size 1194; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  77; lea   4; cmp  42

    write2 tot 330; size 1210; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  77; lea   4; cmp  46

    write4 tot 330; size 1210; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  77; lea   4; cmp  46

    write8 tot 330; size 1210; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  77; lea   4; cmp  46

     read1 tot 350; size 1304; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  38; mov  85; lea   3; cmp  47

     read2 tot 355; size 1311; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  83; lea   4; cmp  50

     read4 tot 355; size 1311; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  83; lea   4; cmp  50

     read8 tot 355; size 1311; rsp 1; push 1; pop 5; call 2; load 16; store  5;

sh  37; mov  83; lea   4; cmp  50

func_entry tot  26; size  108; rsp 0; push 0; pop 0; call 1; load  2; store  0;

sh   1; mov   7; lea   0; cmp   1

 func_exit tot  21; size   93; rsp 0; push 0; pop 0; call 1; load  1; store  0;

sh   1; mov   4; lea   0; cmp   1



clang -fPIC -ftls-model=initial-exec

    write1 tot 315; size 1173; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  78; lea   4; cmp  42

    write2 tot 319; size 1189; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  78; lea   4; cmp  46

    write4 tot 319; size 1189; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  78; lea   4; cmp  46

    write8 tot 319; size 1189; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  78; lea   4; cmp  46

     read1 tot 347; size 1271; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  38; mov  86; lea   3; cmp  47

     read2 tot 345; size 1268; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  84; lea   4; cmp  50

     read4 tot 345; size 1268; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  84; lea   4; cmp  50

     read8 tot 345; size 1268; rsp 1; push 2; pop 2; call 2; load 20; store  6;

sh  37; mov  84; lea   4; cmp  50

func_entry tot  21; size   96; rsp 0; push 0; pop 0; call 1; load  4; store  1;

sh   1; mov   8; lea   0; cmp   1

 func_exit tot  19; size   82; rsp 0; push 0; pop 0; call 1; load  2; store  0;

sh   1; mov   5; lea   0; cmp   1



clang -fPIC

    write1 tot 409; size 1625; rsp 17; push 6; pop 6; call 2; load 17; store

25; sh  37; mov 131; lea  20; cmp  42

    write2 tot 413; size 1641; rsp 17; push 6; pop 6; call 2; load 17; store

25; sh  37; mov 131; lea  20; cmp  46

    write4 tot 413; size 1641; rsp 17; push 6; pop 6; call 2; load 17; store

25; sh  37; mov 131; lea  20; cmp  46

    write8 tot 413; size 1641; rsp 17; push 6; pop 6; call 2; load 17; store

25; sh  37; mov 131; lea  20; cmp  46

     read1 tot 423; size 1654; rsp 15; push 6; pop 6; call 2; load 13; store

26; sh  38; mov 123; lea  19; cmp  47

     read2 tot 446; size 1742; rsp 19; push 6; pop 6; call 2; load 19; store

27; sh  37; mov 140; lea  20; cmp  50

     read4 tot 446; size 1742; rsp 19; push 6; pop 6; call 2; load 19; store
27; sh  37; mov 140; lea  20; cmp  50

     read8 tot 446; size 1742; rsp 19; push 6; pop 6; call 2; load 19; store

27; sh  37; mov 140; lea  20; cmp  50

func_entry tot  36; size  129; rsp 0; push 3; pop 3; call 1; load  4; store  1;

sh   1; mov  10; lea   2; cmp   1

 func_exit tot  30; size  114; rsp 0; push 3; pop 2; call 1; load  2; store  0;

sh   1; mov   6; lea   2; cmp   1



clang -fPIE project-stream

    write1 tot 293; size 1071; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  72; lea   1; cmp  38

    write2 tot 297; size 1087; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  72; lea   1; cmp  42

    write4 tot 297; size 1087; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  72; lea   1; cmp  42

    write8 tot 297; size 1087; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  72; lea   1; cmp  42

     read1 tot 334; size 1252; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  78; lea   1; cmp  38

     read2 tot 341; size 1283; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  79; lea   1; cmp  42

     read4 tot 341; size 1283; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  79; lea   1; cmp  42

     read8 tot 341; size 1283; rsp 1; push 0; pop 0; call 2; load 17; store  5;

sh  33; mov  79; lea   1; cmp  42

func_entry tot  26; size  102; rsp 0; push 0; pop 0; call 1; load  2; store  0;

sh   0; mov   7; lea   0; cmp   1

 func_exit tot  21; size   87; rsp 0; push 0; pop 0; call 1; load  1; store  0;

sh   0; mov   4; lea   0; cmp   1



clang -fPIC -ftls-model=initial-exec project-stream

    write1 tot 293; size 1051; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  73; lea   1; cmp  38

    write2 tot 297; size 1067; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  73; lea   1; cmp  42

    write4 tot 297; size 1067; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  73; lea   1; cmp  42

    write8 tot 297; size 1067; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  73; lea   1; cmp  42

     read1 tot 327; size 1231; rsp 1; push 0; pop 0; call 2; load 21; store  6;

sh  33; mov  79; lea   1; cmp  38

     read2 tot 340; size 1263; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  80; lea   1; cmp  42

     read4 tot 340; size 1263; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  80; lea   1; cmp  42

     read8 tot 340; size 1263; rsp 1; push 1; pop 5; call 2; load 21; store  6;

sh  33; mov  80; lea   1; cmp  42

func_entry tot  21; size   90; rsp 0; push 0; pop 0; call 1; load  4; store  2;

sh   0; mov   8; lea   0; cmp   1

 func_exit tot  17; size   76; rsp 0; push 0; pop 0; call 1; load  2; store  1;

sh   0; mov   5; lea   0; cmp   1



clang -fPIC project-stream

    write1 tot 341; size 1289; rsp 7; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  91; lea  12; cmp  38

    write2 tot 345; size 1305; rsp 7; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  91; lea  12; cmp  42

    write4 tot 345; size 1305; rsp 7; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  91; lea  12; cmp  42

    write8 tot 345; size 1305; rsp 7; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  91; lea  12; cmp  42

     read1 tot 379; size 1489; rsp 7; push 6; pop 6; call 2; load 21; store  6;

sh  33; mov  93; lea  12; cmp  38

     read2 tot 388; size 1533; rsp 9; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  97; lea  12; cmp  42

     read4 tot 388; size 1533; rsp 9; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  97; lea  12; cmp  42

     read8 tot 388; size 1533; rsp 9; push 6; pop 6; call 2; load 22; store  7;

sh  33; mov  97; lea  12; cmp  42

func_entry tot  33; size  123; rsp 0; push 3; pop 3; call 1; load  4; store  2;

sh   0; mov  10; lea   2; cmp   1

 func_exit tot  28; size  108; rsp 0; push 3; pop 2; call 1; load  2; store  1;

sh   0; mov   6; lea   2; cmp   1

Reply via email to