[
https://issues.apache.org/jira/browse/KAFKA-3565?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15269491#comment-15269491
]
Jiangjie Qin commented on KAFKA-3565:
-------------------------------------
[~junrao] Thanks for help looking into this. I updated run 11 in the Google
sheet. The batch size was around 40 - 50K. The summary is following. It looks
trunk still performs better.
{noformat}
1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0,
messageSize=100, compression.type=gzip (2.04 > 1.36, 50%)
1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0,
messageSize=100, compression.type=snappy (11.43 > 8.05, 41%)
1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0,
messageSize=1000, compression.type=gzip (3.75 > 2.12, 76%)
1st: max.in.flight.requests.per.connection=1, valueBound=500, linger.ms=0,
messageSize=1000, compression.type=snappy (16.33 > 11.28, 44%)
1st: max.in.flight.requests.per.connection=1, valueBound=500,
linger.ms=100000, messageSize=100, compression.type=gzip (1.40 > 0.99,
41%)
1st: max.in.flight.requests.per.connection=1, valueBound=500,
linger.ms=100000, messageSize=100, compression.type=snappy (9.16 > 7.37,
24%)
1st: max.in.flight.requests.per.connection=1, valueBound=500,
linger.ms=100000, messageSize=1000, compression.type=gzip (2.48 > 1.43,
73%)
1st: max.in.flight.requests.per.connection=1, valueBound=500,
linger.ms=100000, messageSize=1000, compression.type=snappy (16.04 >
10.91, 47%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0,
messageSize=100, compression.type=gzip (2.28 > 1.51, 50%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0,
messageSize=100, compression.type=snappy (11.59 > 8.66, 33%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0,
messageSize=1000, compression.type=gzip (4.50 > 2.23, 101%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000, linger.ms=0,
messageSize=1000, compression.type=snappy (17.36 > 12.14, 42%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000,
linger.ms=100000, messageSize=100, compression.type=gzip (1.55 > 1.04,
49%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000,
linger.ms=100000, messageSize=100, compression.type=snappy (10.41 > 7.66,
35%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000,
linger.ms=100000, messageSize=1000, compression.type=gzip (2.58 > 1.51,
70%)
1st: max.in.flight.requests.per.connection=1, valueBound=5000,
linger.ms=100000, messageSize=1000, compression.type=snappy (15.57 >
11.32, 37%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0,
messageSize=100, compression.type=gzip (2.36 > 1.58, 49%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0,
messageSize=100, compression.type=snappy (11.96 > 9.51, 25%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0,
messageSize=1000, compression.type=gzip (5.14 > 2.41, 113%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000, linger.ms=0,
messageSize=1000, compression.type=snappy (16.90 > 14.75, 14%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000,
linger.ms=100000, messageSize=100, compression.type=gzip (1.30 > 1.07,
21%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000,
linger.ms=100000, messageSize=100, compression.type=snappy (10.31 > 8.34,
23%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000,
linger.ms=100000, messageSize=1000, compression.type=gzip (2.53 > 1.54,
64%)
1st: max.in.flight.requests.per.connection=1, valueBound=50000,
linger.ms=100000, messageSize=1000, compression.type=snappy (15.84 >
13.59, 16%)
1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0,
messageSize=100, compression.type=gzip (1.73 > 1.42, 21%)
1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0,
messageSize=100, compression.type=snappy (9.20 > 7.66, 20%)
1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0,
messageSize=1000, compression.type=gzip (2.90 > 2.12, 36%)
1st: max.in.flight.requests.per.connection=5, valueBound=500, linger.ms=0,
messageSize=1000, compression.type=snappy (13.85 > 10.25, 35%)
1st: max.in.flight.requests.per.connection=5, valueBound=500,
linger.ms=100000, messageSize=100, compression.type=gzip (1.52 > 1.00,
52%)
1st: max.in.flight.requests.per.connection=5, valueBound=500,
linger.ms=100000, messageSize=100, compression.type=snappy (10.64 > 7.60,
40%)
1st: max.in.flight.requests.per.connection=5, valueBound=500,
linger.ms=100000, messageSize=1000, compression.type=gzip (2.43 > 1.44,
68%)
1st: max.in.flight.requests.per.connection=5, valueBound=500,
linger.ms=100000, messageSize=1000, compression.type=snappy (15.75 >
10.98, 43%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0,
messageSize=100, compression.type=gzip (1.75 > 1.64, 6%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0,
messageSize=100, compression.type=snappy (10.31 > 8.79, 17%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0,
messageSize=1000, compression.type=gzip (4.04 > 2.58, 56%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000, linger.ms=0,
messageSize=1000, compression.type=snappy (16.35 > 12.24, 33%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000,
linger.ms=100000, messageSize=100, compression.type=gzip (1.51 > 1.04,
45%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000,
linger.ms=100000, messageSize=100, compression.type=snappy (11.53 > 8.45,
36%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000,
linger.ms=100000, messageSize=1000, compression.type=gzip (2.59 > 1.53,
69%)
1st: max.in.flight.requests.per.connection=5, valueBound=5000,
linger.ms=100000, messageSize=1000, compression.type=snappy (17.44 >
12.34, 41%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0,
messageSize=100, compression.type=gzip (2.07 > 1.69, 22%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0,
messageSize=100, compression.type=snappy (11.16 > 8.87, 25%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0,
messageSize=1000, compression.type=gzip (4.40 > 2.83, 55%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000, linger.ms=0,
messageSize=1000, compression.type=snappy (17.57 > 14.70, 19%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000,
linger.ms=100000, messageSize=100, compression.type=gzip (1.63 > 1.07,
52%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000,
linger.ms=100000, messageSize=100, compression.type=snappy (11.91 > 9.20,
29%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000,
linger.ms=100000, messageSize=1000, compression.type=gzip (2.65 > 1.52,
74%)
1st: max.in.flight.requests.per.connection=5, valueBound=50000,
linger.ms=100000, messageSize=1000, compression.type=snappy (18.06 >
15.17, 19%)
{noformat}
> Producer's throughput lower with compressed data after KIP-31/32
> ----------------------------------------------------------------
>
> Key: KAFKA-3565
> URL: https://issues.apache.org/jira/browse/KAFKA-3565
> Project: Kafka
> Issue Type: Bug
> Reporter: Ismael Juma
> Priority: Critical
> Fix For: 0.10.0.0
>
>
> Relative offsets were introduced by KIP-31 so that the broker does not have
> to recompress data (this was previously required after offsets were
> assigned). The implicit assumption is that reducing CPU usage required by
> recompression would mean that producer throughput for compressed data would
> increase.
> However, this doesn't seem to be the case:
> {code}
> Commit: eee95228fabe1643baa016a2d49fb0a9fe2c66bd (one before KIP-31/32)
> test_id:
> 2016-04-15--012.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100.compression_type=snappy
> status: PASS
> run time: 59.030 seconds
> {"records_per_sec": 519418.343653, "mb_per_sec": 49.54}
> {code}
> Full results: https://gist.github.com/ijuma/0afada4ff51ad6a5ac2125714d748292
> {code}
> Commit: fa594c811e4e329b6e7b897bce910c6772c46c0f (KIP-31/32)
> test_id:
> 2016-04-15--013.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100.compression_type=snappy
> status: PASS
> run time: 1 minute 0.243 seconds
> {"records_per_sec": 427308.818848, "mb_per_sec": 40.75}
> {code}
> Full results: https://gist.github.com/ijuma/e49430f0548c4de5691ad47696f5c87d
> The difference for the uncompressed case is smaller (and within what one
> would expect given the additional size overhead caused by the timestamp
> field):
> {code}
> Commit: eee95228fabe1643baa016a2d49fb0a9fe2c66bd (one before KIP-31/32)
> test_id:
> 2016-04-15--010.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100
> status: PASS
> run time: 1 minute 4.176 seconds
> {"records_per_sec": 321018.17747, "mb_per_sec": 30.61}
> {code}
> Full results: https://gist.github.com/ijuma/5fec369d686751a2d84debae8f324d4f
> {code}
> Commit: fa594c811e4e329b6e7b897bce910c6772c46c0f (KIP-31/32)
> test_id:
> 2016-04-15--014.kafkatest.tests.benchmark_test.Benchmark.test_producer_throughput.topic=topic-replication-factor-three.security_protocol=PLAINTEXT.acks=1.message_size=100
> status: PASS
> run time: 1 minute 5.079 seconds
> {"records_per_sec": 291777.608696, "mb_per_sec": 27.83}
> {code}
> Full results: https://gist.github.com/ijuma/1d35bd831ff9931448b0294bd9b787ed
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)