[ https://issues.apache.org/jira/browse/KAFKA-3565?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15269491#comment-15269491 ]
Jiangjie Qin commented on KAFKA-3565: ------------------------------------- [~junrao] Thanks for help looking into this. I updated run 11 in the Google sheet. The batch size was around 40 - 50K. The summary is following. It looks trunk still performs better. {noformat} 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0, messageSize=100, compression.type=gzip (2.04 > 1.36, 50%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0, messageSize=100, compression.type=snappy (11.43 > 8.05, 41%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0, messageSize=1000, compression.type=gzip (3.75 > 2.12, 76%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0, messageSize=1000, compression.type=snappy (16.33 > 11.28, 44%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=100000, messageSize=100, compression.type=gzip (1.40 > 0.99, 41%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=100000, messageSize=100, compression.type=snappy (9.16 > 7.37, 24%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=100000, messageSize=1000, compression.type=gzip (2.48 > 1.43, 73%) 1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=100000, messageSize=1000, compression.type=snappy (16.04 > 10.91, 47%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0, messageSize=100, compression.type=gzip (2.28 > 1.51, 50%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0, messageSize=100, compression.type=snappy (11.59 > 8.66, 33%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0, messageSize=1000, compression.type=gzip (4.50 > 2.23, 101%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0, messageSize=1000, compression.type=snappy (17.36 > 12.14, 42%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=100000, messageSize=100, compression.type=gzip (1.55 > 1.04, 49%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=100000, messageSize=100, compression.type=snappy (10.41 > 7.66, 35%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=100000, messageSize=1000, compression.type=gzip (2.58 > 1.51, 70%) 1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=100000, messageSize=1000, compression.type=snappy (15.57 > 11.32, 37%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0, messageSize=100, compression.type=gzip (2.36 > 1.58, 49%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0, messageSize=100, compression.type=snappy (11.96 > 9.51, 25%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0, messageSize=1000, compression.type=gzip (5.14 > 2.41, 113%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0, messageSize=1000, compression.type=snappy (16.90 > 14.75, 14%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=100000, messageSize=100, compression.type=gzip (1.30 > 1.07, 21%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=100000, messageSize=100, compression.type=snappy (10.31 > 8.34, 23%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=100000, messageSize=1000, compression.type=gzip (2.53 > 1.54, 64%) 1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=100000, messageSize=1000, compression.type=snappy (15.84 > 13.59, 16%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0, messageSize=100, compression.type=gzip (1.73 > 1.42, 21%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0, messageSize=100, compression.type=snappy (9.20 > 7.66, 20%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0, messageSize=1000, compression.type=gzip (2.90 > 2.12, 36%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0, messageSize=1000, compression.type=snappy (13.85 > 10.25, 35%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=100000, messageSize=100, compression.type=gzip (1.52 > 1.00, 52%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=100000, messageSize=100, compression.type=snappy (10.64 > 7.60, 40%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=100000, messageSize=1000, compression.type=gzip (2.43 > 1.44, 68%) 1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=100000, messageSize=1000, compression.type=snappy (15.75 > 10.98, 43%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0, messageSize=100, compression.type=gzip (1.75 > 1.64, 6%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0, messageSize=100, compression.type=snappy (10.31 > 8.79, 17%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0, messageSize=1000, compression.type=gzip (4.04 > 2.58, 56%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0, messageSize=1000, compression.type=snappy (16.35 > 12.24, 33%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=100000, messageSize=100, compression.type=gzip (1.51 > 1.04, 45%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=100000, messageSize=100, compression.type=snappy (11.53 > 8.45, 36%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=100000, messageSize=1000, compression.type=gzip (2.59 > 1.53, 69%) 1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=100000, messageSize=1000, compression.type=snappy (17.44 > 12.34, 41%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0, messageSize=100, compression.type=gzip (2.07 > 1.69, 22%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0, messageSize=100, compression.type=snappy (11.16 > 8.87, 25%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0, messageSize=1000, compression.type=gzip (4.40 > 2.83, 55%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0, messageSize=1000, compression.type=snappy (17.57 > 14.70, 19%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=100000, messageSize=100, compression.type=gzip (1.63 > 1.07, 52%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=100000, messageSize=100, compression.type=snappy (11.91 > 9.20, 29%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=100000, messageSize=1000, compression.type=gzip (2.65 > 1.52, 74%) 1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=100000, messageSize=1000, compression.type=snappy (18.06 > 15.17, 19%) {noformat} > Producer's throughput lower with compressed data after KIP-31/32 > ---------------------------------------------------------------- > > Key: KAFKA-3565 > URL: https://issues.apache.org/jira/browse/KAFKA-3565 > Project: Kafka > Issue Type: Bug > Reporter: Ismael Juma > Priority: Critical > Fix For: 0.10.0.0 > > > Relative offsets were introduced by KIP-31 so that the broker does not have > to recompress data (this was previously required after offsets were > assigned). The implicit assumption is that reducing CPU usage required by > recompression would mean that producer throughput for compressed data would > increase. > However, this doesn't seem to be the case: > {code} > Commit: eee95228fabe1643baa016a2d49fb0a9fe2c66bd (one before KIP-31/32) > test_id: > 2016-04-15--012.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100.compression_type=snappy > status: PASS > run time: 59.030 seconds > {"records_per_sec": 519418.343653, "mb_per_sec": 49.54} > {code} > Full results: https://gist.github.com/ijuma/0afada4ff51ad6a5ac2125714d748292 > {code} > Commit: fa594c811e4e329b6e7b897bce910c6772c46c0f (KIP-31/32) > test_id: > 2016-04-15--013.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100.compression_type=snappy > status: PASS > run time: 1 minute 0.243 seconds > {"records_per_sec": 427308.818848, "mb_per_sec": 40.75} > {code} > Full results: https://gist.github.com/ijuma/e49430f0548c4de5691ad47696f5c87d > The difference for the uncompressed case is smaller (and within what one > would expect given the additional size overhead caused by the timestamp > field): > {code} > Commit: eee95228fabe1643baa016a2d49fb0a9fe2c66bd (one before KIP-31/32) > test_id: > 2016-04-15--010.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100 > status: PASS > run time: 1 minute 4.176 seconds > {"records_per_sec": 321018.17747, "mb_per_sec": 30.61} > {code} > Full results: https://gist.github.com/ijuma/5fec369d686751a2d84debae8f324d4f > {code} > Commit: fa594c811e4e329b6e7b897bce910c6772c46c0f (KIP-31/32) > test_id: > 2016-04-15--014.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100 > status: PASS > run time: 1 minute 5.079 seconds > {"records_per_sec": 291777.608696, "mb_per_sec": 27.83} > {code} > Full results: https://gist.github.com/ijuma/1d35bd831ff9931448b0294bd9b787ed -- This message was sent by Atlassian JIRA (v6.3.4#6332)