Compute binlog checksums (when enabled) already when writing events
into the statement or transaction caches, where before it was done
when the caches are copied to the real binlog file. This moves the
checksum computation outside of holding LOCK_log, improving
scalabitily.

At stmt/trx cache write time, the final end_log_pos values are not
known, so with this patch these will be set to 0. Events that are
written directly to the binlog file (not through stmt/trx cache) keep
the correct end_log_pos value. The GTID and COMMIT/XID events at the
start and end of event groups are written directly, so the zero
end_log_pos is only for events in the middle of event groups, which
do not negatively affect replication.

An option --binlog-legacy-event-pos, off by default, is provided to
disable this behavior to provide backwards compatibility with any
external applications that might rely on end_log_pos in events in the
middle of event groups.

Checksums cannot be pre-computed when binlog encryption is enabled, as
encryption relies on correct end_log_pos to provide part of the
nonce/IV. Checksum pre-computation is also disabled for WSREP/Galera.

The current --binlog-checksum configuration is saved in
binlog_cache_data at transaction start and used to pre-compute
checksums in cache, if applicable. When the cache is later copied to
the binlog, a check is made if the saved value still matches the
configured global value; if so, the events are block-copied directly
into the binlog file. If --binlog-checksum was changed during the
transaction, events are re-written to the binlog file one-by-one and
the checksums recomputed/discarded as appropriate.

Signed-off-by: Kristian Nielsen <kniel...@knielsen-hq.org>
---
 include/my_atomic.h                           |  41 ++++-
 include/my_sys.h                              |   2 +
 .../main/mysqlbinlog_row_compressed.result    |  48 ++---
 .../main/mysqlbinlog_row_minimal.result       |  48 ++---
 .../main/mysqlbinlog_stmt_compressed.result   |  16 +-
 mysql-test/main/mysqld--help.result           |   7 +
 .../suite/binlog/include/binlog_ioerr.inc     |   3 +
 mysql-test/suite/binlog/r/binlog_ioerr.result |   2 +
 mysql-test/suite/binlog/t/binlog_killed.test  |   2 +-
 .../t/binlog_table_map_optional_metadata.test |   4 +-
 .../binlog_encryption/binlog_ioerr.result     |   2 +
 .../suite/rpl/r/rpl_checksum_cache.result     |  43 ++++-
 .../suite/rpl/t/rpl_checksum_cache.test       |  98 +++++++++-
 .../r/sysvars_server_notembedded.result       |  10 +
 mysys/mf_iocache2.c                           |  34 ++++
 sql/log.cc                                    | 173 ++++++++++++++----
 sql/log.h                                     |   2 +-
 sql/log_event.h                               |   2 +-
 sql/log_event_server.cc                       |  24 +--
 sql/mysqld.cc                                 |   1 +
 sql/mysqld.h                                  |   1 +
 sql/privilege.h                               |   3 +
 sql/sys_vars.cc                               |  13 ++
 23 files changed, 466 insertions(+), 113 deletions(-)

diff --git a/include/my_atomic.h b/include/my_atomic.h
index 270134a6caf..01e9170cb15 100644
--- a/include/my_atomic.h
+++ b/include/my_atomic.h
@@ -62,8 +62,8 @@
     Order must be one of MY_MEMORY_ORDER_RELAXED, MY_MEMORY_ORDER_RELEASE,
     MY_MEMORY_ORDER_SEQ_CST.
 
-  '#' is substituted by a size suffix - 8, 16, 32, 64, or ptr
-  (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr).
+  '#' is substituted by a size suffix - 8, 16, 32, 64, ptr, or ul (for unsigned
+  long) (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr).
 
   The first version orders memory accesses according to 
MY_MEMORY_ORDER_SEQ_CST,
   the second version (with _explicit suffix) orders memory accesses according 
to
@@ -153,4 +153,41 @@
 #define my_atomic_casptr_strong_explicit(P, E, D, S, F) \
   my_atomic_casptr((P), (E), (D))
 #endif
+
+/* Convenience macros since ulong is 32 or 64 bit depending on platform. */
+#if SIZEOF_LONG == 4
+#define my_atomic_storeul(P, D) my_atomic_store32((int32 volatile *)(P), (D))
+#define my_atomic_storeul_explicit(P, D, O) \
+  my_atomic_store32_explicit((int32 volatile *)(P), (D), (O))
+#define my_atomic_loadul(P) my_atomic_load32((int32 volatile *)(P))
+#define my_atomic_loadul_explicit(P, O) \
+  my_atomic_load32_explicit((int32 volatile *)(P), (O))
+#define my_atomic_fasul(P, D) my_atomic_fas32((int32 volatile *)(P), (D))
+#define my_atomic_fasul_explict(P, D, O) \
+  my_atomic_fas32_explicit((int32 volatile *)(P), (D), (O))
+#define my_atomic_addul(P, A) my_atomic_add32((int32 volatile *)(P), (A))
+#define my_atomic_addul_explict(P, A, O) \
+  my_atomic_add32_explicit((int32 volatile *)(P), (A), (O))
+#define my_atomic_casul(P, E, D) \
+  my_atomic_cas32((int32 volatile *)(P), (E), (D))
+#define my_atomic_casul_weak_explicit(P, E, D, S, F) \
+  my_atomic_cas32_weak_explicit((int32 volatile *)(P), (E), (D), (S), (F))
+#define my_atomic_casul_strong_explicit(P, E, D, S, F) \
+  my_atomic_cas32_strong_explicit((int32 volatile *)(P), (E), (D), (S), (F))
+#elif SIZEOF_LONG == 8
+#define my_atomic_storeul(P, D) my_atomic_store64((P), (D))
+#define my_atomic_storeul_explicit(P, D, O) my_atomic_store64_explicit((P), 
(D), (O))
+#define my_atomic_loadul(P) my_atomic_load64((P))
+#define my_atomic_loadul_explicit(P, O) my_atomic_load64_explicit((P), (O))
+#define my_atomic_fasul(P, D) my_atomic_fas64((P), (D))
+#define my_atomic_fasul_explict(P, D, O) my_atomic_fas64_explicit((P), (D), 
(O))
+#define my_atomic_addul(P, A) my_atomic_add64((P), (A))
+#define my_atomic_addul_explict(P, A, O) my_atomic_add64_explicit((P), (A), 
(O))
+#define my_atomic_casul(P, E, D) my_atomic_cas64((P), (E), (D))
+#define my_atomic_casul_weak_explicit(P, E, D, S, F) \
+  my_atomic_cas64_weak_explicit((P), (E), (D), (S), (F))
+#define my_atomic_casul_strong_explicit(P, E, D, S, F) \
+  my_atomic_cas64_strong_explicit((P), (E), (D), (S), (F))
+#endif
+
 #endif /* MY_ATOMIC_INCLUDED */
diff --git a/include/my_sys.h b/include/my_sys.h
index 2d1dbb7b2bf..145d9fb2603 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -599,6 +599,8 @@ static inline size_t my_b_bytes_in_cache(const IO_CACHE 
*info)
 
 int my_b_copy_to_file    (IO_CACHE *cache, FILE *file, size_t count);
 int my_b_copy_all_to_file(IO_CACHE *cache, FILE *file);
+int my_b_copy_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache, size_t count);
+int my_b_copy_all_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache);
 
 my_off_t my_b_append_tell(IO_CACHE* info);
 my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
diff --git a/mysql-test/main/mysqlbinlog_row_compressed.result 
b/mysql-test/main/mysqlbinlog_row_compressed.result
index 2cf652655e0..96a0ed61a71 100644
--- a/mysql-test/main/mysqlbinlog_row_compressed.result
+++ b/mysql-test/main/mysqlbinlog_row_compressed.result
@@ -57,11 +57,11 @@ START TRANSACTION
 /*!*/;
 # at 787
 # at 861
-#<date> server id 1  end_log_pos 861 CRC32 XXX         Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (10, 1, 2, 3, 4, 5, 6, 7, "")
-#<date> server id 1  end_log_pos 917 CRC32 XXX         Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 917
-#<date> server id 1  end_log_pos 985 CRC32 XXX         Write_compressed_rows: 
table id 32 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_compressed_rows: table id 
32 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -86,11 +86,11 @@ START TRANSACTION
 /*!*/;
 # at 1100
 # at 1176
-#<date> server id 1  end_log_pos 1176 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (11, 1, 2, 3, 4, 5, 6, 7, NULL)
-#<date> server id 1  end_log_pos 1232 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 1232
-#<date> server id 1  end_log_pos 1299 CRC32 XXX        Write_compressed_rows: 
table id 32 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_compressed_rows: table id 
32 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=11 /* INT meta=0 nullable=0 is_null=0 */
@@ -115,11 +115,11 @@ START TRANSACTION
 /*!*/;
 # at 1414
 # at 1492
-#<date> server id 1  end_log_pos 1492 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (12, 1, 2, 3, NULL, 5, 6, 7, "A")
-#<date> server id 1  end_log_pos 1548 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 1548
-#<date> server id 1  end_log_pos 1614 CRC32 XXX        Write_compressed_rows: 
table id 32 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_compressed_rows: table id 
32 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=12 /* INT meta=0 nullable=0 is_null=0 */
@@ -144,11 +144,11 @@ START TRANSACTION
 /*!*/;
 # at 1729
 # at 1804
-#<date> server id 1  end_log_pos 1804 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (13, 1, 2, 3, 0, 5, 6, 7, "A")
-#<date> server id 1  end_log_pos 1860 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 1860
-#<date> server id 1  end_log_pos 1927 CRC32 XXX        Write_compressed_rows: 
table id 32 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_compressed_rows: table id 
32 flags: STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=13 /* INT meta=0 nullable=0 is_null=0 */
@@ -173,11 +173,11 @@ START TRANSACTION
 /*!*/;
 # at 2042
 # at 2096
-#<date> server id 1  end_log_pos 2096 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t2 SELECT * FROM t1
-#<date> server id 1  end_log_pos 2152 CRC32 XXX        Table_map: `test`.`t2` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t2` mapped 
to number num
 # at 2152
-#<date> server id 1  end_log_pos 2243 CRC32 XXX        Write_compressed_rows: 
table id 33 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_compressed_rows: table id 
33 flags: STMT_END_F
 ### INSERT INTO `test`.`t2`
 ### SET
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -235,11 +235,11 @@ START TRANSACTION
 /*!*/;
 # at 2358
 # at 2424
-#<date> server id 1  end_log_pos 2424 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> UPDATE t2 SET f4=5 WHERE f4>0 or f4 is NULL
-#<date> server id 1  end_log_pos 2480 CRC32 XXX        Table_map: `test`.`t2` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t2` mapped 
to number num
 # at 2480
-#<date> server id 1  end_log_pos 2579 CRC32 XXX        Update_compressed_rows: 
table id 33 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Update_compressed_rows: table 
id 33 flags: STMT_END_F
 ### UPDATE `test`.`t2`
 ### WHERE
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -316,11 +316,11 @@ START TRANSACTION
 /*!*/;
 # at 2694
 # at 2731
-#<date> server id 1  end_log_pos 2731 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> DELETE FROM t1
-#<date> server id 1  end_log_pos 2787 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 2787
-#<date> server id 1  end_log_pos 2879 CRC32 XXX        Delete_compressed_rows: 
table id 32 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Delete_compressed_rows: table 
id 32 flags: STMT_END_F
 ### DELETE FROM `test`.`t1`
 ### WHERE
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -378,11 +378,11 @@ START TRANSACTION
 /*!*/;
 # at 2994
 # at 3031
-#<date> server id 1  end_log_pos 3031 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> DELETE FROM t2
-#<date> server id 1  end_log_pos 3087 CRC32 XXX        Table_map: `test`.`t2` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t2` mapped 
to number num
 # at 3087
-#<date> server id 1  end_log_pos 3172 CRC32 XXX        Delete_compressed_rows: 
table id 33 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Delete_compressed_rows: table 
id 33 flags: STMT_END_F
 ### DELETE FROM `test`.`t2`
 ### WHERE
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
diff --git a/mysql-test/main/mysqlbinlog_row_minimal.result 
b/mysql-test/main/mysqlbinlog_row_minimal.result
index 69aa91a8903..6871d75a985 100644
--- a/mysql-test/main/mysqlbinlog_row_minimal.result
+++ b/mysql-test/main/mysqlbinlog_row_minimal.result
@@ -55,11 +55,11 @@ START TRANSACTION
 /*!*/;
 # at 834
 # at 908
-#<date> server id 1  end_log_pos 908 CRC32 XXX         Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (10, 1, 2, 3, 4, 5, 6, 7, "")
-#<date> server id 1  end_log_pos 964 CRC32 XXX         Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 964
-#<date> server id 1  end_log_pos 1033 CRC32 XXX        Write_rows: table id 32 
flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_rows: table id 32 flags: 
STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -84,11 +84,11 @@ START TRANSACTION
 /*!*/;
 # at 1148
 # at 1224
-#<date> server id 1  end_log_pos 1224 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (11, 1, 2, 3, 4, 5, 6, 7, NULL)
-#<date> server id 1  end_log_pos 1280 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 1280
-#<date> server id 1  end_log_pos 1348 CRC32 XXX        Write_rows: table id 32 
flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_rows: table id 32 flags: 
STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=11 /* INT meta=0 nullable=0 is_null=0 */
@@ -113,11 +113,11 @@ START TRANSACTION
 /*!*/;
 # at 1463
 # at 1541
-#<date> server id 1  end_log_pos 1541 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (12, 1, 2, 3, NULL, 5, 6, 7, "A")
-#<date> server id 1  end_log_pos 1597 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 1597
-#<date> server id 1  end_log_pos 1664 CRC32 XXX        Write_rows: table id 32 
flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_rows: table id 32 flags: 
STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=12 /* INT meta=0 nullable=0 is_null=0 */
@@ -142,11 +142,11 @@ START TRANSACTION
 /*!*/;
 # at 1779
 # at 1854
-#<date> server id 1  end_log_pos 1854 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t1 VALUES (13, 1, 2, 3, 0, 5, 6, 7, "A")
-#<date> server id 1  end_log_pos 1910 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 1910
-#<date> server id 1  end_log_pos 1980 CRC32 XXX        Write_rows: table id 32 
flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_rows: table id 32 flags: 
STMT_END_F
 ### INSERT INTO `test`.`t1`
 ### SET
 ###   @1=13 /* INT meta=0 nullable=0 is_null=0 */
@@ -171,11 +171,11 @@ START TRANSACTION
 /*!*/;
 # at 2095
 # at 2149
-#<date> server id 1  end_log_pos 2149 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> INSERT INTO t2 SELECT * FROM t1
-#<date> server id 1  end_log_pos 2205 CRC32 XXX        Table_map: `test`.`t2` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t2` mapped 
to number num
 # at 2205
-#<date> server id 1  end_log_pos 2372 CRC32 XXX        Write_rows: table id 33 
flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Write_rows: table id 33 flags: 
STMT_END_F
 ### INSERT INTO `test`.`t2`
 ### SET
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -233,11 +233,11 @@ START TRANSACTION
 /*!*/;
 # at 2487
 # at 2553
-#<date> server id 1  end_log_pos 2553 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> UPDATE t2 SET f4=5 WHERE f4>0 or f4 is NULL
-#<date> server id 1  end_log_pos 2609 CRC32 XXX        Table_map: `test`.`t2` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t2` mapped 
to number num
 # at 2609
-#<date> server id 1  end_log_pos 2675 CRC32 XXX        Update_rows: table id 
33 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Update_rows: table id 33 flags: 
STMT_END_F
 ### UPDATE `test`.`t2`
 ### WHERE
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -266,11 +266,11 @@ START TRANSACTION
 /*!*/;
 # at 2790
 # at 2827
-#<date> server id 1  end_log_pos 2827 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> DELETE FROM t1
-#<date> server id 1  end_log_pos 2883 CRC32 XXX        Table_map: `test`.`t1` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t1` mapped 
to number num
 # at 2883
-#<date> server id 1  end_log_pos 2937 CRC32 XXX        Delete_rows: table id 
32 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Delete_rows: table id 32 flags: 
STMT_END_F
 ### DELETE FROM `test`.`t1`
 ### WHERE
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
@@ -296,11 +296,11 @@ START TRANSACTION
 /*!*/;
 # at 3052
 # at 3089
-#<date> server id 1  end_log_pos 3089 CRC32 XXX        Annotate_rows:
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Annotate_rows:
 #Q> DELETE FROM t2
-#<date> server id 1  end_log_pos 3145 CRC32 XXX        Table_map: `test`.`t2` 
mapped to number num
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Table_map: `test`.`t2` mapped 
to number num
 # at 3145
-#<date> server id 1  end_log_pos 3199 CRC32 XXX        Delete_rows: table id 
33 flags: STMT_END_F
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Delete_rows: table id 33 flags: 
STMT_END_F
 ### DELETE FROM `test`.`t2`
 ### WHERE
 ###   @1=10 /* INT meta=0 nullable=0 is_null=0 */
diff --git a/mysql-test/main/mysqlbinlog_stmt_compressed.result 
b/mysql-test/main/mysqlbinlog_stmt_compressed.result
index c0d26f3f9df..6321e74127f 100644
--- a/mysql-test/main/mysqlbinlog_stmt_compressed.result
+++ b/mysql-test/main/mysqlbinlog_stmt_compressed.result
@@ -56,7 +56,7 @@ CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT, f2 INT, f3 INT, 
f4 INT, f5 MEDIUMIN
 START TRANSACTION
 /*!*/;
 # at 787
-#<date> server id 1  end_log_pos 915 CRC32 XXX         Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 INSERT INTO t1 VALUES (10, 1, 2, 3, 4, 5, 6, 7, "")
 /*!*/;
@@ -71,7 +71,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 1030
-#<date> server id 1  end_log_pos 1158 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 INSERT INTO t1 VALUES (11, 1, 2, 3, 4, 5, 6, 7, NULL)
 /*!*/;
@@ -86,7 +86,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 1273
-#<date> server id 1  end_log_pos 1403 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 INSERT INTO t1 VALUES (12, 1, 2, 3, NULL, 5, 6, 7, "A")
 /*!*/;
@@ -101,7 +101,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 1518
-#<date> server id 1  end_log_pos 1645 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 INSERT INTO t1 VALUES (13, 1, 2, 3, 0, 5, 6, 7, "A")
 /*!*/;
@@ -116,7 +116,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 1760
-#<date> server id 1  end_log_pos 1868 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 INSERT INTO t2 SELECT * FROM t1
 /*!*/;
@@ -131,7 +131,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 1983
-#<date> server id 1  end_log_pos 2100 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 UPDATE t2 SET f4=5 WHERE f4>0 or f4 is NULL
 /*!*/;
@@ -146,7 +146,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 2215
-#<date> server id 1  end_log_pos 2306 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 DELETE FROM t1
 /*!*/;
@@ -161,7 +161,7 @@ COMMIT
 START TRANSACTION
 /*!*/;
 # at 2421
-#<date> server id 1  end_log_pos 2512 CRC32 XXX        Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
+#<date> server id 1  end_log_pos 0 CRC32 XXX   Query_compressed        
thread_id=5     exec_time=x     error_code=0    xid=<xid>
 SET TIMESTAMP=X/*!*/;
 DELETE FROM t2
 /*!*/;
diff --git a/mysql-test/main/mysqld--help.result 
b/mysql-test/main/mysqld--help.result
index de0a8310ec1..999e0212d77 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -101,6 +101,12 @@ The following specify which files/extra groups are read 
(specified before remain
  --binlog-ignore-db=name 
  Tells the master that updates to the given database
  should not be logged to the binary log.
+ --binlog-legacy-event-pos 
+ Fill in the end_log_pos field of _all_ events in the
+ binlog, even when doing so costs performance. Can be used
+ in case some old application needs it for backwards
+ compatibility. Setting this option can hurt binlog
+ scalability.
  --binlog-optimize-thread-scheduling 
  Run fast part of group commit in a single thread, to
  optimize kernel thread scheduling. On by default. Disable
@@ -1526,6 +1532,7 @@ binlog-direct-non-transactional-updates FALSE
 binlog-expire-logs-seconds 0
 binlog-file-cache-size 16384
 binlog-format MIXED
+binlog-legacy-event-pos FALSE
 binlog-optimize-thread-scheduling TRUE
 binlog-row-event-max-size 8192
 binlog-row-image FULL
diff --git a/mysql-test/suite/binlog/include/binlog_ioerr.inc 
b/mysql-test/suite/binlog/include/binlog_ioerr.inc
index da6fb5ac727..b710eccc64b 100644
--- a/mysql-test/suite/binlog/include/binlog_ioerr.inc
+++ b/mysql-test/suite/binlog/include/binlog_ioerr.inc
@@ -17,11 +17,14 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
 INSERT INTO t1 VALUES(0);
 SET @saved_dbug = @@SESSION.debug_dbug;
 SET SESSION debug_dbug='+d,fail_binlog_write_1';
+# The error injection is in the "legacy" code path.
+SET GLOBAL binlog_legacy_event_pos= 1;
 --error ER_ERROR_ON_WRITE
 INSERT INTO t1 VALUES(1);
 --error ER_ERROR_ON_WRITE
 INSERT INTO t1 VALUES(2);
 SET SESSION debug_dbug=@saved_dbug;
+SET GLOBAL binlog_legacy_event_pos= 0;
 INSERT INTO t1 VALUES(3);
 SELECT * FROM t1;
 
diff --git a/mysql-test/suite/binlog/r/binlog_ioerr.result 
b/mysql-test/suite/binlog/r/binlog_ioerr.result
index e4f00a017ba..aa4042d3f6f 100644
--- a/mysql-test/suite/binlog/r/binlog_ioerr.result
+++ b/mysql-test/suite/binlog/r/binlog_ioerr.result
@@ -4,11 +4,13 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
 INSERT INTO t1 VALUES(0);
 SET @saved_dbug = @@SESSION.debug_dbug;
 SET SESSION debug_dbug='+d,fail_binlog_write_1';
+SET GLOBAL binlog_legacy_event_pos= 1;
 INSERT INTO t1 VALUES(1);
 ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on 
device")
 INSERT INTO t1 VALUES(2);
 ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on 
device")
 SET SESSION debug_dbug=@saved_dbug;
+SET GLOBAL binlog_legacy_event_pos= 0;
 INSERT INTO t1 VALUES(3);
 SELECT * FROM t1;
 a
diff --git a/mysql-test/suite/binlog/t/binlog_killed.test 
b/mysql-test/suite/binlog/t/binlog_killed.test
index 7c3a262d2c1..271da705c82 100644
--- a/mysql-test/suite/binlog/t/binlog_killed.test
+++ b/mysql-test/suite/binlog/t/binlog_killed.test
@@ -67,7 +67,7 @@ let $rows= `select count(*) from t2  /* must be 2 or 0 */`;
 
 let $MYSQLD_DATADIR= `select @@datadir`;
 --let $binlog_killed_pos=query_get_value(SHOW BINLOG EVENTS, Pos, 6)
---let $binlog_killed_end_log_pos=query_get_value(SHOW BINLOG EVENTS, 
End_log_pos, 6)
+--let $binlog_killed_end_log_pos=query_get_value(SHOW BINLOG EVENTS, Pos, 7)
 --exec $MYSQL_BINLOG --force-if-open --start-position=$binlog_killed_pos 
--stop-position=$binlog_killed_end_log_pos $MYSQLD_DATADIR/master-bin.000001 > 
$MYSQLTEST_VARDIR/tmp/kill_query_calling_sp.binlog
 --replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
 --disable_result_log
diff --git a/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test 
b/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test
index 4577c6c1de1..ea4397306f3 100644
--- a/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test
+++ b/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test
@@ -275,7 +275,7 @@ INSERT INTO t1 VALUES(2, "b");
 
 # The invalid metadata will case assertion failure on Write_rows_log_event
 # So we need to stop mysqlbinlog before reading Write_rows_log_event.
---let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 
3, End_log_pos, 3)
+--let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 
4, Pos, 4)
 --source include/print_optional_metadata.inc
 
 --echo #
@@ -291,7 +291,7 @@ INSERT INTO t1(c_point) VALUES(ST_PointFromText('POINT(10 
10)'));
 
 # The invalid metadata will case assertion failure on Write_rows_log_event
 # So we need to stop mysqlbinlog before reading Write_rows_log_event.
---let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 
3, End_log_pos, 3)
+--let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 
4, Pos, 4)
 --source include/print_optional_metadata.inc
 
 DROP TABLE t1;
diff --git a/mysql-test/suite/binlog_encryption/binlog_ioerr.result 
b/mysql-test/suite/binlog_encryption/binlog_ioerr.result
index 2823b7050c3..146bc50c964 100644
--- a/mysql-test/suite/binlog_encryption/binlog_ioerr.result
+++ b/mysql-test/suite/binlog_encryption/binlog_ioerr.result
@@ -4,11 +4,13 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
 INSERT INTO t1 VALUES(0);
 SET @saved_dbug = @@SESSION.debug_dbug;
 SET SESSION debug_dbug='+d,fail_binlog_write_1';
+SET GLOBAL binlog_legacy_event_pos= 1;
 INSERT INTO t1 VALUES(1);
 ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on 
device")
 INSERT INTO t1 VALUES(2);
 ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on 
device")
 SET SESSION debug_dbug=@saved_dbug;
+SET GLOBAL binlog_legacy_event_pos= 0;
 INSERT INTO t1 VALUES(3);
 SELECT * FROM t1;
 a
diff --git a/mysql-test/suite/rpl/r/rpl_checksum_cache.result 
b/mysql-test/suite/rpl/r/rpl_checksum_cache.result
index e8f221cc181..b908c546ce3 100644
--- a/mysql-test/suite/rpl/r/rpl_checksum_cache.result
+++ b/mysql-test/suite/rpl/r/rpl_checksum_cache.result
@@ -121,13 +121,54 @@ connection slave;
 include/diff_tables.inc [master:test.t1, slave:test.t1]
 include/diff_tables.inc [master:test.t2, slave:test.t2]
 include/diff_tables.inc [master:test.t3, slave:test.t3]
+*** Test switching checksum algorithm while ongoing transactions have 
pre-computed checksum in their binlog cache ***
+connection master;
+CREATE TABLE t4 (a INT, b INT, c VARCHAR(1024), PRIMARY KEY (a,b)) 
ENGINE=InnoDB;
+BEGIN;
+INSERT INTO t4 VALUES (1, 1, "small, pre-computed checksums");
+connection server_1;
+BEGIN;
+INSERT INTO t4 VALUES (2, 1, "big, pre-computed checksums");
+set @@global.binlog_checksum = NONE;
+connection master;
+INSERT INTO t4 VALUES (1, 2, "done");
+COMMIT;
+connection server_1;
+INSERT INTO t4 VALUES (2, 22, "done");
+COMMIT;
+connection master;
+BEGIN;
+INSERT INTO t4 VALUES (3, 1, "small, no pre-computed checksums");
+connection server_1;
+BEGIN;
+INSERT INTO t4 VALUES (4, 1, "big, no pre-computed checksums");
+set @@global.binlog_checksum = CRC32;
+connection master;
+INSERT INTO t4 VALUES (3, 2, "done");
+COMMIT;
+connection server_1;
+INSERT INTO t4 VALUES (4, 22, "done");
+COMMIT;
+connection slave;
+*** Test the --binlog-legacy-event-pos option.
+connection master;
+FLUSH BINARY LOGS;
+BEGIN;
+INSERT INTO t4 VALUES (5, 1, "Zero end_log_pos");
+COMMIT;
+set @@global.binlog_legacy_event_pos= 1;
+BEGIN;
+INSERT INTO t4 VALUES (6, 1, "Non-zero end_log_pos");
+COMMIT;
+set @@global.binlog_legacy_event_pos= 0;
+connection slave;
 connection master;
 begin;
 delete from t1;
 delete from t2;
 delete from t3;
 commit;
-drop table t1, t2, t3;
+drop table t1, t2, t3, t4;
 set @@global.binlog_cache_size = @save_binlog_cache_size;
 set @@global.binlog_checksum = @save_binlog_checksum;
 set @@global.master_verify_checksum = @save_master_verify_checksum;
diff --git a/mysql-test/suite/rpl/t/rpl_checksum_cache.test 
b/mysql-test/suite/rpl/t/rpl_checksum_cache.test
index e04f618b81e..173af8c1d0b 100644
--- a/mysql-test/suite/rpl/t/rpl_checksum_cache.test
+++ b/mysql-test/suite/rpl/t/rpl_checksum_cache.test
@@ -243,6 +243,102 @@ let $diff_tables=master:test.t3, slave:test.t3;
 source include/diff_tables.inc;
 
 
+--echo *** Test switching checksum algorithm while ongoing transactions have 
pre-computed checksum in their binlog cache ***
+
+--connection master
+CREATE TABLE t4 (a INT, b INT, c VARCHAR(1024), PRIMARY KEY (a,b)) 
ENGINE=InnoDB;
+
+# Create a couple transactions that will precompute checksums but commit
+# without them.
+
+BEGIN;
+INSERT INTO t4 VALUES (1, 1, "small, pre-computed checksums");
+
+--connection server_1
+BEGIN;
+INSERT INTO t4 VALUES (2, 1, "big, pre-computed checksums");
+--let $i= 20
+--disable_query_log
+while ($i) {
+  eval INSERT INTO t4 VALUES (2, 22-$i, REPEAT("x", FLOOR(RAND()*100) + 831));
+  dec $i;
+}
+--enable_query_log
+
+# Disable checksums dynamically, so MYSQL_BIN_LOG::write_cache() will have
+# to drop the pre-computed checksums.
+set @@global.binlog_checksum = NONE;
+
+--connection master
+INSERT INTO t4 VALUES (1, 2, "done");
+COMMIT;
+--connection server_1
+INSERT INTO t4 VALUES (2, 22, "done");
+COMMIT;
+
+# Create a couple transactions that will not precompute checksums but commit
+# with them.
+
+--connection master
+BEGIN;
+INSERT INTO t4 VALUES (3, 1, "small, no pre-computed checksums");
+
+--connection server_1
+BEGIN;
+INSERT INTO t4 VALUES (4, 1, "big, no pre-computed checksums");
+--let $i= 20
+--disable_query_log
+while ($i) {
+  eval INSERT INTO t4 VALUES (4, 22-$i, REPEAT("x", FLOOR(RAND()*100) + 853));
+  dec $i;
+}
+--enable_query_log
+
+# Ebable checksums dynamically, so MYSQL_BIN_LOG::write_cache() will have
+# to recompute the checksums.
+set @@global.binlog_checksum = CRC32;
+
+--connection master
+INSERT INTO t4 VALUES (3, 2, "done");
+COMMIT;
+--connection server_1
+INSERT INTO t4 VALUES (4, 22, "done");
+COMMIT;
+
+sync_slave_with_master;
+
+
+--echo *** Test the --binlog-legacy-event-pos option.
+--connection master
+FLUSH BINARY LOGS;
+--source include/wait_for_binlog_checkpoint.inc
+
+--let $query_file= query_get_value(SHOW MASTER STATUS, File, 1)
+--let $query_pos= query_get_value(SHOW MASTER STATUS, Position, 1)
+BEGIN;
+INSERT INTO t4 VALUES (5, 1, "Zero end_log_pos");
+COMMIT;
+--let $end_log_pos= query_get_value(SHOW BINLOG EVENTS IN "$query_file" FROM 
$query_pos LIMIT 3, End_log_pos, 2)
+if ($end_log_pos!=0) {
+  eval SHOW BINLOG EVENTS IN "$query_file";
+  --die Wrong End_log_pos=$end_log_pos, expected zero.
+}
+
+set @@global.binlog_legacy_event_pos= 1;
+--let $query_pos= query_get_value(SHOW MASTER STATUS, Position, 1)
+BEGIN;
+INSERT INTO t4 VALUES (6, 1, "Non-zero end_log_pos");
+COMMIT;
+--let $end_log_pos= query_get_value(SHOW BINLOG EVENTS IN "$query_file" FROM 
$query_pos LIMIT 3, End_log_pos, 2)
+if ($end_log_pos==0) {
+  eval SHOW BINLOG EVENTS IN "$query_file";
+  --die Wrong End_log_pos=$end_log_pos, expected non-zero.
+}
+set @@global.binlog_legacy_event_pos= 0;
+
+sync_slave_with_master;
+
+
 connection master;
 
 begin;
@@ -251,7 +347,7 @@ delete from t2;
 delete from t3;
 commit;
 
-drop table t1, t2, t3;
+drop table t1, t2, t3, t4;
 set @@global.binlog_cache_size = @save_binlog_cache_size;
 set @@global.binlog_checksum = @save_binlog_checksum;
 set @@global.master_verify_checksum = @save_master_verify_checksum;
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result 
b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index d1a6d85f861..6d43640bf65 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -442,6 +442,16 @@ NUMERIC_BLOCK_SIZE NULL
 ENUM_VALUE_LIST        MIXED,STATEMENT,ROW
 READ_ONLY      NO
 COMMAND_LINE_ARGUMENT  REQUIRED
+VARIABLE_NAME  BINLOG_LEGACY_EVENT_POS
+VARIABLE_SCOPE GLOBAL
+VARIABLE_TYPE  BOOLEAN
+VARIABLE_COMMENT       Fill in the end_log_pos field of _all_ events in the 
binlog, even when doing so costs performance. Can be used in case some old 
application needs it for backwards compatibility. Setting this option can hurt 
binlog scalability.
+NUMERIC_MIN_VALUE      NULL
+NUMERIC_MAX_VALUE      NULL
+NUMERIC_BLOCK_SIZE     NULL
+ENUM_VALUE_LIST        OFF,ON
+READ_ONLY      NO
+COMMAND_LINE_ARGUMENT  OPTIONAL
 VARIABLE_NAME  BINLOG_OPTIMIZE_THREAD_SCHEDULING
 VARIABLE_SCOPE GLOBAL
 VARIABLE_TYPE  BOOLEAN
diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c
index 4622b68646e..5d2863bca1c 100644
--- a/mysys/mf_iocache2.c
+++ b/mysys/mf_iocache2.c
@@ -74,6 +74,40 @@ int my_b_copy_all_to_file(IO_CACHE *cache, FILE *file)
   DBUG_RETURN(my_b_copy_to_file(cache, file, SIZE_T_MAX));
 }
 
+/**
+   Similar to above my_b_copy_to_file(), but destination is another IO_CACHE.
+*/
+int
+my_b_copy_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache,
+                  size_t count)
+{
+  size_t curr_write, bytes_in_cache;
+  DBUG_ENTER("my_b_copy_to_cache");
+
+  bytes_in_cache= my_b_bytes_in_cache(from_cache);
+  do
+  {
+    curr_write= MY_MIN(bytes_in_cache, count);
+    if (my_b_write(to_cache, from_cache->read_pos, curr_write))
+      DBUG_RETURN(1);
+
+    from_cache->read_pos += curr_write;
+    count -= curr_write;
+  } while (count && (bytes_in_cache= my_b_fill(from_cache)));
+  if(from_cache->error == -1)
+    DBUG_RETURN(1);
+  DBUG_RETURN(0);
+}
+
+int my_b_copy_all_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache)
+{
+  DBUG_ENTER("my_b_copy_all_to_cache");
+  /* Reinit the cache to read from the beginning of the cache */
+  if (reinit_io_cache(from_cache, READ_CACHE, 0L, FALSE, FALSE))
+    DBUG_RETURN(1);
+  DBUG_RETURN(my_b_copy_to_cache(from_cache, to_cache, SIZE_T_MAX));
+}
+
 my_off_t my_b_append_tell(IO_CACHE* info)
 {
   /*
diff --git a/sql/log.cc b/sql/log.cc
index 1ab90389a37..dfed8265a69 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -107,6 +107,15 @@ static const LEX_CSTRING write_error_msg=
     { STRING_WITH_LEN("error writing to the binary log") };
 
 static my_bool opt_optimize_thread_scheduling= TRUE;
+/*
+  The binlog_checksum_options value is accessed protected under LOCK_log. As
+  the checksum option used must be consistent across an entire binlog file,
+  and log rotation is needed whenever this is changed.
+
+  As an exception, event checksums are precomputed using a non-locked read
+  of binlog_checksum_options. Thus updates to this variable must be atomic,
+  with relaxed semantics.
+*/
 ulong binlog_checksum_options;
 #ifndef DBUG_OFF
 ulong opt_binlog_dbug_fsync_sleep= 0;
@@ -275,12 +284,22 @@ void make_default_log_name(char **out, const char* 
log_ext, bool once)
 class binlog_cache_data
 {
 public:
-  binlog_cache_data(): m_pending(0), status(0),
-  before_stmt_pos(MY_OFF_T_UNDEF),
-  incident(FALSE),
+  binlog_cache_data(bool precompute_checksums_) : m_pending(0), status(0),
+  before_stmt_pos(MY_OFF_T_UNDEF), incident(FALSE),
+  precompute_checksums(precompute_checksums_),
   saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
   ptr_binlog_cache_disk_use(0)
-  { }
+  {
+    /*
+      Read the current checksum setting. We will use this setting to decide
+      whether to pre-compute checksums in the cache. Then when writing the 
cache
+      to the actual binlog, another check will be made and checksums recomputed
+      in the unlikely case that the setting changed meanwhile.
+    */
+    checksum_opt= !precompute_checksums_ ? (uchar)BINLOG_CHECKSUM_ALG_OFF :
+      (uchar)my_atomic_loadul_explicit(&binlog_checksum_options,
+                                       MY_MEMORY_ORDER_RELAXED);
+  }
   
   ~binlog_cache_data()
   {
@@ -332,6 +351,9 @@ class binlog_cache_data
     bool truncate_file= (cache_log.file != -1 &&
                          my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
     truncate(0,1);                              // Forget what's in cache
+    checksum_opt= !precompute_checksums ? (uchar)BINLOG_CHECKSUM_ALG_OFF :
+      (uchar)my_atomic_loadul_explicit(&binlog_checksum_options,
+                                       MY_MEMORY_ORDER_RELAXED);
     if (!cache_was_empty)
       compute_statistics();
     if (truncate_file)
@@ -435,6 +457,17 @@ class binlog_cache_data
   */ 
   bool incident;
 
+  /* Whether the caller requested precomputing checksums. */
+  bool precompute_checksums;
+
+public:
+  /*
+    The algorithm (if any) used to pre-compute checksums in the cache.
+    Initialized from binlog_checksum_options when the cache is reset.
+  */
+  uchar checksum_opt;
+
+private:
   /**
     This function computes binlog cache and disk usage.
   */
@@ -508,6 +541,37 @@ void Log_event_writer::set_incident()
 }
 
 
+/**
+   Select if and how to write checksum for an event written to the binlog.
+
+    - When writing directly to the binlog, the user-configured checksum option
+      is used.
+    - When writing to a transaction or statement cache, we have
+      binlog_cache_data that contains the checksum option to use (pre-computed
+      checksums).
+    - Otherwise, no checksum used.
+*/
+enum enum_binlog_checksum_alg
+Log_event::select_checksum_alg(const binlog_cache_data *data)
+{
+  if (cache_type == Log_event::EVENT_NO_CACHE)
+  {
+    DBUG_ASSERT(!data);
+    /*
+      When we're selecting the checksum algorithm to write directly to the
+      actual binlog, we must be holding the LOCK_log, otherwise the checksum
+      configuration could change just after we read it.
+    */
+    mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
+    return (enum enum_binlog_checksum_alg)binlog_checksum_options;
+  }
+  else if (data)
+    return (enum enum_binlog_checksum_alg)data->checksum_opt;
+  else
+    return BINLOG_CHECKSUM_ALG_OFF;
+}
+
+
 class binlog_cache_mngr {
 public:
   binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
@@ -515,8 +579,10 @@ class binlog_cache_mngr {
                     ulong *param_ptr_binlog_stmt_cache_use,
                     ulong *param_ptr_binlog_stmt_cache_disk_use,
                     ulong *param_ptr_binlog_cache_use,
-                    ulong *param_ptr_binlog_cache_disk_use)
-    : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
+                    ulong *param_ptr_binlog_cache_disk_use,
+                    bool precompute_checksums)
+    : stmt_cache(precompute_checksums), trx_cache(precompute_checksums),
+      last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
   {
      stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
                                       param_ptr_binlog_stmt_cache_use,
@@ -5558,7 +5624,8 @@ int MYSQL_BIN_LOG::new_file_impl()
   {
     DBUG_ASSERT(!is_relay_log);
     DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
-    binlog_checksum_options= checksum_alg_reset;
+    my_atomic_storeul_explicit(&binlog_checksum_options, checksum_alg_reset,
+                               MY_MEMORY_ORDER_RELAXED);
   }
   /*
      Note that at this point, log_state != LOG_CLOSED
@@ -5634,19 +5701,19 @@ int MYSQL_BIN_LOG::new_file_impl()
 bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *data,
                                 IO_CACHE *file)
 {
-  return write_event(ev, ev->select_checksum_alg(), data, file);
+  return write_event(ev, ev->select_checksum_alg(data), data, file);
 }
 
 bool MYSQL_BIN_LOG::write_event(Log_event *ev)
 {
-  return write_event(ev, ev->select_checksum_alg(), 0, &log_file);
+  return write_event(ev, ev->select_checksum_alg(NULL), 0, &log_file);
 }
 
 bool MYSQL_BIN_LOG::write_event(Log_event *ev,
                                 enum enum_binlog_checksum_alg checksum_alg,
                                 binlog_cache_data *cache_data, IO_CACHE *file)
 {
-  Log_event_writer writer(file, 0, checksum_alg, &crypto);
+  Log_event_writer writer(file, cache_data, checksum_alg, &crypto);
   if (crypto.scheme && file == &log_file)
   {
     writer.ctx= alloca(crypto.ctx_size);
@@ -5953,13 +6020,22 @@ binlog_cache_mngr *THD::binlog_setup_trx_data()
   }
   thd_set_ha_data(this, binlog_hton, cache_mngr);
 
+  /*
+    Don't attempt to precompute checksums if:
+     - Disabled by user request, --binlog-legacy-event-pos
+     - Binlog is encrypted, cannot use precomputed checksums
+     - WSREP/Galera.
+  */
+  bool precompute_checksums=
+    !WSREP_NNULL(this) && !encrypt_binlog && !opt_binlog_legacy_event_pos;
   cache_mngr= new (cache_mngr)
               binlog_cache_mngr(max_binlog_stmt_cache_size,
                                 max_binlog_cache_size,
                                 &binlog_stmt_cache_use,
                                 &binlog_stmt_cache_disk_use,
                                 &binlog_cache_use,
-                                &binlog_cache_disk_use);
+                                &binlog_cache_disk_use,
+                                precompute_checksums);
   DBUG_RETURN(cache_mngr);
 }
 
@@ -6287,7 +6363,8 @@ bool THD::binlog_write_table_map(TABLE *table, bool 
with_annotate)
   binlog_cache_data *cache_data= (cache_mngr->
                                   get_binlog_cache_data(is_transactional));
   IO_CACHE *file= &cache_data->cache_log;
-  Log_event_writer writer(file, cache_data, the_event.select_checksum_alg(), 
NULL);
+  Log_event_writer writer(file, cache_data,
+                          the_event.select_checksum_alg(cache_data), NULL);
 
   if (with_annotate)
     if (binlog_write_annotated_row(&writer))
@@ -6442,7 +6519,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
   if (Rows_log_event* pending= cache_data->pending())
   {
     Log_event_writer writer(&cache_data->cache_log, cache_data,
-                            pending->select_checksum_alg(), NULL);
+                            pending->select_checksum_alg(cache_data), NULL);
 
     /*
       Write pending event to the cache.
@@ -7502,22 +7579,37 @@ uint MYSQL_BIN_LOG::next_file_id()
     events prior to fill in the binlog cache.
 */
 
-int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
+int MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data)
 {
   DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
-
+  IO_CACHE *cache= &cache_data->cache_log;
   mysql_mutex_assert_owner(&LOCK_log);
+
+  /*
+    If possible, just copy the cache over byte-by-byte with pre-computed
+    checksums.
+  */
+  if (likely(binlog_checksum_options == cache_data->checksum_opt) &&
+      likely(!crypto.scheme) &&
+      likely(!opt_binlog_legacy_event_pos))
+  {
+    int res= my_b_copy_all_to_cache(cache, &log_file);
+    status_var_add(thd->status_var.binlog_bytes_written, my_b_tell(cache));
+    DBUG_RETURN(res ? ER_ERROR_ON_WRITE : 0);
+  }
+
   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
     DBUG_RETURN(ER_ERROR_ON_WRITE);
   /* Amount of remaining bytes in the IO_CACHE read buffer. */
   size_t length= my_b_bytes_in_cache(cache);
   size_t group;
-  size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 
2 t
   uchar header_buf[LOG_EVENT_HEADER_LEN];
   Log_event_writer writer(&log_file, 0,
                           (enum_binlog_checksum_alg)binlog_checksum_options,
                           &crypto);
   uint checksum_len= writer.checksum_len;
+  bool precomputed_checksums= (cache_data->checksum_opt != 
BINLOG_CHECKSUM_ALG_OFF);
+  uint old_checksum_len= precomputed_checksums ? BINLOG_CHECKSUM_LEN : 0;
   int err= 0;
 
   if (crypto.scheme)
@@ -7591,13 +7683,13 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE 
*cache)
 
     /* Adjust the length and end_log_pos appropriately. */
     uint ev_len= uint4korr(header + EVENT_LEN_OFFSET); // netto len
-    DBUG_ASSERT(ev_len >= LOG_EVENT_HEADER_LEN);
-    if (unlikely(ev_len < LOG_EVENT_HEADER_LEN))
+    DBUG_ASSERT(ev_len >= LOG_EVENT_HEADER_LEN + old_checksum_len);
+    if (unlikely(ev_len < LOG_EVENT_HEADER_LEN + old_checksum_len))
       goto error_in_read;
-    int4store(header + EVENT_LEN_OFFSET, ev_len + checksum_len);
-    end_log_pos_inc += checksum_len;
-    size_t val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
-    int4store(header + LOG_POS_OFFSET, val);
+    uint new_len= ev_len - old_checksum_len + checksum_len;
+    int4store(header + EVENT_LEN_OFFSET, new_len);
+    group+= new_len;
+    int4store(header + LOG_POS_OFFSET, group);
 
     /* Write the header to the binlog. */
     if (writer.write_header(header, LOG_EVENT_HEADER_LEN))
@@ -7614,8 +7706,18 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
           goto error_in_read;
       }
       uint chunk= std::min(ev_len, (uint)length);
-      if (writer.write_data(cache->read_pos, chunk))
-        goto error_in_write;
+      /*
+        Any old precomputed checksum must _not_ be written here. Instead, it
+        must be discarded; the new checksum, if needed, is written by
+        writer.write_footer().
+      */
+      if (ev_len > old_checksum_len)
+      {
+        uint bytes_to_skip=
+          old_checksum_len - std::min(old_checksum_len, ev_len - chunk);
+        if (writer.write_data(cache->read_pos, chunk - bytes_to_skip))
+          goto error_in_write;
+      }
       cache->read_pos+= chunk;
       length-= chunk;
       ev_len-= chunk;
@@ -8745,7 +8847,7 @@ 
MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
     DBUG_RETURN(ER_ERROR_ON_WRITE);
 
   if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
-      write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
+      write_cache(entry->thd, mngr->get_binlog_cache_data(FALSE)))
   {
     entry->error_cache= &mngr->stmt_cache.cache_log;
     DBUG_RETURN(ER_ERROR_ON_WRITE);
@@ -8756,7 +8858,7 @@ 
MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
     DBUG_EXECUTE_IF("crash_before_writing_xid",
                     {
                       if ((write_cache(entry->thd,
-                                       mngr->get_binlog_cache_log(TRUE))))
+                                       mngr->get_binlog_cache_data(TRUE))))
                         DBUG_PRINT("info", ("error writing binlog cache"));
                       else
                         flush_and_sync(0);
@@ -8765,7 +8867,7 @@ 
MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
                       DBUG_SUICIDE();
                     });
 
-    if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
+    if (write_cache(entry->thd, mngr->get_binlog_cache_data(TRUE)))
     {
       entry->error_cache= &mngr->trx_cache.cache_log;
       DBUG_RETURN(ER_ERROR_ON_WRITE);
@@ -11383,6 +11485,7 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char 
*last_log_name,
   char binlog_checkpoint_name[FN_REFLEN];
   bool binlog_checkpoint_found;
   IO_CACHE log;
+  IO_CACHE *cur_log;
   File file= -1;
   const char *errmsg;
 #ifdef HAVE_REPLICATION
@@ -11429,12 +11532,16 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const 
char *last_log_name,
   */
 
   binlog_checkpoint_found= false;
+  cur_log= first_log;
   for (round= 1;;)
   {
-    while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log,
-                                          fdle, opt_master_verify_checksum))
+    while ((ev= Log_event::read_log_event(cur_log, fdle,
+                                          opt_master_verify_checksum))
            && ev->is_valid())
     {
+#ifdef HAVE_REPLICATION
+      my_off_t end_pos= my_b_tell(cur_log);
+#endif
       enum Log_event_type typ= ev->get_type_code();
       switch (typ)
       {
@@ -11451,7 +11558,7 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char 
*last_log_name,
             member->decided_to_commit= true;
         }
 #else
-        if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos))
+        if (ctx.decide_or_assess(member, round, fdle, linfo, end_pos))
           goto err2;
 #endif
       }
@@ -11552,11 +11659,12 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const 
char *last_log_name,
           goto err2;
         ctx.last_gtid_valid= false;
       }
-      ctx.prev_event_pos= ev->log_pos;
+      ctx.prev_event_pos= end_pos;
 #endif
       delete ev;
       ev= NULL;
     } // end of while
+    cur_log= &log;
 
     /*
       If the last binlog checkpoint event points to an older log, we have to
@@ -11813,7 +11921,8 @@ binlog_checksum_update(MYSQL_THD thd, struct 
st_mysql_sys_var *var,
   }
   else
   {
-    binlog_checksum_options= value;
+    my_atomic_storeul_explicit(&binlog_checksum_options, value,
+                               MY_MEMORY_ORDER_RELAXED);
   }
   DBUG_ASSERT(binlog_checksum_options == value);
   mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
diff --git a/sql/log.h b/sql/log.h
index f02b20c12bf..91c406a71aa 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -821,7 +821,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
   bool write_incident_already_locked(THD *thd);
   bool write_incident(THD *thd);
   void write_binlog_checkpoint_event_already_locked(const char *name, uint 
len);
-  int  write_cache(THD *thd, IO_CACHE *cache);
+  int  write_cache(THD *thd, binlog_cache_data *cache_data);
   void set_write_error(THD *thd, bool is_transactional);
   bool check_write_error(THD *thd);
 
diff --git a/sql/log_event.h b/sql/log_event.h
index 33f689c9330..5abc5fa0caf 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -1456,7 +1456,7 @@ class Log_event
   bool write_footer(Log_event_writer *writer)
   { return writer->write_footer(); }
 
-  enum enum_binlog_checksum_alg select_checksum_alg();
+  enum enum_binlog_checksum_alg select_checksum_alg(const binlog_cache_data 
*data);
 
   virtual bool write(Log_event_writer *writer)
   {
diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc
index bee594291d6..468d28c389c 100644
--- a/sql/log_event_server.cc
+++ b/sql/log_event_server.cc
@@ -735,20 +735,6 @@ void Log_event::init_show_field_list(THD *thd, List<Item>* 
field_list)
                         mem_root);
 }
 
-/**
-   Select if and how to write checksum for an event written to the binlog.
-   It returns the actively configured binlog checksum option, unless the event
-   is being written to a cache (in which case the checksum, if any, is added
-   later when the cache is copied to the real binlog).
-*/
-enum enum_binlog_checksum_alg Log_event::select_checksum_alg()
-{
-  if (cache_type == Log_event::EVENT_NO_CACHE)
-    return (enum_binlog_checksum_alg)binlog_checksum_options;
-  else
-    return BINLOG_CHECKSUM_ALG_OFF;
-}
-
 int Log_event_writer::write_internal(const uchar *pos, size_t len)
 {
   DBUG_ASSERT(!ctx || encrypt_or_write == 
&Log_event_writer::encrypt_and_write);
@@ -901,11 +887,17 @@ bool Log_event::write_header(Log_event_writer *writer, 
size_t event_data_length)
     change the position
   */
 
-  if (is_artificial_event())
+  if (is_artificial_event() ||
+      cache_type == Log_event::EVENT_STMT_CACHE ||
+      cache_type == Log_event::EVENT_TRANSACTIONAL_CACHE)
   {
     /*
       Artificial events are automatically generated and do not exist
       in master's binary log, so log_pos should be set to 0.
+
+      Events written through transaction or statement cache have log_pos set
+      to 0 so that they can be copied directly to the binlog without having
+      to compute the real end_log_pos.
     */
     log_pos= 0;
   }
@@ -4937,7 +4929,7 @@ void Create_file_log_event::pack_info(Protocol *protocol)
 
 /**
   Create_file_log_event::do_apply_event()
-  Constructor for Create_file_log_event to intantiate an event
+  Constructor for Create_file_log_event to instantiate an event
   from the relay log on the slave.
 
   @retval
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 99717a2c058..0ddefe81856 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -762,6 +762,7 @@ char *relay_log_info_file, *report_user, *report_password, 
*report_host;
 char *opt_relay_logname = 0, *opt_relaylog_index_name=0;
 char *opt_logname, *opt_slow_logname, *opt_bin_logname;
 char *opt_binlog_index_name=0;
+my_bool opt_binlog_legacy_event_pos= FALSE;
 
 
 
diff --git a/sql/mysqld.h b/sql/mysqld.h
index e99d5cb300c..113bc9112cb 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -173,6 +173,7 @@ extern ulong delay_key_write_options;
 extern char *opt_logname, *opt_slow_logname, *opt_bin_logname, 
             *opt_relay_logname;
 extern char *opt_binlog_index_name;
+extern my_bool opt_binlog_legacy_event_pos;
 extern char *opt_backup_history_logname, *opt_backup_progress_logname,
             *opt_backup_settings_name;
 extern const char *log_output_str;
diff --git a/sql/privilege.h b/sql/privilege.h
index 8e9b9a3748e..7356181975d 100644
--- a/sql/privilege.h
+++ b/sql/privilege.h
@@ -362,6 +362,9 @@ constexpr privilege_t 
PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_COMMIT_WAIT_USEC=
 constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_ROW_METADATA=
   SUPER_ACL | BINLOG_ADMIN_ACL;
 
+constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_LEGACY_EVENT_POS=
+  SUPER_ACL | BINLOG_ADMIN_ACL;
+
 constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_EXPIRE_LOGS_DAYS=
   SUPER_ACL | BINLOG_ADMIN_ACL;
 
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 6142c0bf077..eef6cd34043 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -3518,6 +3518,19 @@ Sys_master_verify_checksum(
        GLOBAL_VAR(opt_master_verify_checksum), CMD_LINE(OPT_ARG),
        DEFAULT(FALSE));
 
+
+static Sys_var_on_access_global<Sys_var_mybool,
+                           PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_LEGACY_EVENT_POS>
+Sys_binlog_legacy_event_pos(
+       "binlog_legacy_event_pos",
+       "Fill in the end_log_pos field of _all_ events in the binlog, even when 
"
+       "doing so costs performance. Can be used in case some old application 
needs "
+       "it for backwards compatibility. Setting this option can hurt binlog "
+       "scalability.",
+       GLOBAL_VAR(opt_binlog_legacy_event_pos), CMD_LINE(OPT_ARG),
+       DEFAULT(FALSE));
+
+
 /* These names must match RPL_SKIP_XXX #defines in slave.h. */
 static const char *replicate_events_marked_for_skip_names[]= {
   "REPLICATE", "FILTER_ON_SLAVE", "FILTER_ON_MASTER", 0
-- 
2.30.2

_______________________________________________
commits mailing list -- commits@lists.mariadb.org
To unsubscribe send an email to commits-le...@lists.mariadb.org

Reply via email to