The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after ark-5.14 ------> commit 248c2b31353de1b28eed02cb9f3eab59eabd24dd Author: Vladimir Davydov <vdavydov....@gmail.com> Date: Wed Sep 29 21:18:47 2021 +0300
ve/printk: Virtualize "prb" and "clear_seq" https://jira.sw.ru/browse/PSBM-17899 Signed-off-by: Vladimir Davydov <vdavy...@parallels.com> Signed-off-by: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> +++ ve/printk: Fix printk virtualization ve_printk() corrupts host's dmesg: # dmesg|wc -l 599 # vzctl create 101 # vzctl set 101 --netif_add eth0 --save # vzctl start 101 # vzctl exec 101 'tcpdump -w tcpdump.out -U -n -i eth0 esp' # dmesg|wc -l 2 Add missing parts of prinkt virtualization to fix this. https://jira.sw.ru/browse/PSBM-17899 https://jira.sw.ru/browse/PSBM-105442 Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> Rebasing to vz9: part of vz8 commit: d63aeb311a64 ("ve/printk: printk virtualization") https://jira.sw.ru/browse/PSBM-133985 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> --- kernel/printk/printk.c | 176 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 123 insertions(+), 53 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 77e6787c752e..a1dedbc88426 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -378,17 +378,6 @@ struct latched_seq { u64 val[2]; }; -/* - * The next printk record to read after the last 'clear' command. There are - * two copies (updated with seqcount_latch) so that reads can locklessly - * access a valid value. Writers are synchronized by @syslog_lock. - */ -static struct latched_seq clear_seq = { - .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), - .val[0] = 0, - .val[1] = 0, -}; - #ifdef CONFIG_PRINTK_CALLER #define PREFIX_MAX 48 #else @@ -425,13 +414,26 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, static struct printk_ringbuffer printk_rb_dynamic; -static struct printk_ringbuffer *prb = &printk_rb_static; static struct log_state { char *buf; u32 buf_len; + + /* + * The next printk record to read after the last 'clear' command. There are + * two copies (updated with seqcount_latch) so that reads can locklessly + * access a valid value. Writers are synchronized by @syslog_lock. + */ + struct latched_seq clear_seq; + struct printk_ringbuffer *prb; } init_log_state = { .buf = __log_buf, .buf_len = __LOG_BUF_LEN, + .clear_seq = { + .latch = SEQCNT_LATCH_ZERO(init_log_state.clear_seq.latch), + .val[0] = 0, + .val[1] = 0, + }, + .prb = &printk_rb_static, }; /* kdump relies on some log_* symbols, let's make it happy */ @@ -443,6 +445,8 @@ static void ____ ## name ## _definition(void) \ : : "g" (offsetof(typeof(inst), memb))); \ } \ extern typeof(inst.memb) name; +DEFINE_STRUCT_MEMBER_ALIAS(clear_seq, init_log_state, clear_seq); +DEFINE_STRUCT_MEMBER_ALIAS(prb, init_log_state, prb); #undef DEFINE_STRUCT_MEMBER_ALIAS static inline struct log_state *ve_log_state(void) @@ -755,6 +759,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct log_state *log = ve_log_state(); struct devkmsg_user *user = file->private_data; struct printk_record *r = &user->record; size_t len; @@ -768,7 +773,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, return ret; printk_safe_enter_irq(); - if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { + if (!prb_read_valid(log->prb, atomic64_read(&user->seq), r)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; printk_safe_exit_irq(); @@ -777,7 +782,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, printk_safe_exit_irq(); ret = wait_event_interruptible(log_wait, - prb_read_valid(prb, atomic64_read(&user->seq), r)); + prb_read_valid(log->prb, atomic64_read(&user->seq), r)); if (ret) goto out; printk_safe_enter_irq(); @@ -824,6 +829,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, */ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) { + struct log_state *log = ve_log_state(); struct devkmsg_user *user = file->private_data; loff_t ret = 0; @@ -836,7 +842,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) switch (whence) { case SEEK_SET: /* the first record */ - atomic64_set(&user->seq, prb_first_valid_seq(prb)); + atomic64_set(&user->seq, prb_first_valid_seq(log->prb)); break; case SEEK_DATA: /* @@ -844,11 +850,11 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ - atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); + atomic64_set(&user->seq, latched_seq_read_nolock(&log->clear_seq)); break; case SEEK_END: /* after the last record */ - atomic64_set(&user->seq, prb_next_seq(prb)); + atomic64_set(&user->seq, prb_next_seq(log->prb)); break; default: ret = -EINVAL; @@ -859,6 +865,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { + struct log_state *log = ve_log_state(); struct devkmsg_user *user = file->private_data; struct printk_info info; __poll_t ret = 0; @@ -869,7 +876,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); printk_safe_enter_irq(); - if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { + if (prb_read_valid_info(log->prb, atomic64_read(&user->seq), &info, NULL)) { /* return error when data has vanished underneath us */ if (info.seq != atomic64_read(&user->seq)) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; @@ -883,6 +890,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) static int devkmsg_open(struct inode *inode, struct file *file) { + struct log_state *log = ve_log_state(); struct devkmsg_user *user; int err; @@ -910,7 +918,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) &user->text_buf[0], sizeof(user->text_buf)); printk_safe_enter_irq(); - atomic64_set(&user->seq, prb_first_valid_seq(prb)); + atomic64_set(&user->seq, prb_first_valid_seq(log->prb)); printk_safe_exit_irq(); file->private_data = user; @@ -1193,7 +1201,7 @@ void __init setup_log_buf(int early) * boot CPU and interrupts are disabled. So no new messages will * appear during the transition to the dynamic buffer. */ - prb = &printk_rb_dynamic; + log->prb = &printk_rb_dynamic; printk_safe_exit_irqrestore(flags); @@ -1480,7 +1488,8 @@ static size_t get_record_print_text_size(struct printk_info *info, * @max_seq is simply an upper bound and does not need to exist. If the caller * does not require an upper bound, -1 can be used for @max_seq. */ -static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, +static u64 find_first_fitting_seq(struct log_state *log, + u64 start_seq, u64 max_seq, size_t size, bool syslog, bool time) { struct printk_info info; @@ -1489,7 +1498,7 @@ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, u64 seq; /* Determine the size of the records up to @max_seq. */ - prb_for_each_info(start_seq, prb, seq, &info, &line_count) { + prb_for_each_info(start_seq, log->prb, seq, &info, &line_count) { if (info.seq >= max_seq) break; len += get_record_print_text_size(&info, line_count, syslog, time); @@ -1508,7 +1517,7 @@ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, * might appear and get lost in the meantime. This is a best effort * that prevents an infinite loop that could occur with a retry. */ - prb_for_each_info(start_seq, prb, seq, &info, &line_count) { + prb_for_each_info(start_seq, log->prb, seq, &info, &line_count) { if (len <= size || info.seq >= max_seq) break; len -= get_record_print_text_size(&info, line_count, syslog, time); @@ -1517,7 +1526,8 @@ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, return seq; } -static int syslog_print(char __user *buf, int size) +static int syslog_print(struct log_state *log, + char __user *buf, int size) { struct printk_info info; struct printk_record r; @@ -1536,7 +1546,7 @@ static int syslog_print(char __user *buf, int size) printk_safe_enter_irq(); raw_spin_lock(&syslog_lock); - if (!prb_read_valid(prb, syslog_seq, &r)) { + if (!prb_read_valid(log->prb, syslog_seq, &r)) { raw_spin_unlock(&syslog_lock); printk_safe_exit_irq(); break; @@ -1588,7 +1598,8 @@ static int syslog_print(char __user *buf, int size) return len; } -static int syslog_print_all(char __user *buf, int size, bool clear) +static int syslog_print_all(struct log_state *log, + char __user *buf, int size, bool clear) { struct printk_info info; struct printk_record r; @@ -1607,13 +1618,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) * Find first record that fits, including all following records, * into the user-provided buffer for this dump. */ - seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, - size, true, time); + seq = find_first_fitting_seq(log, + latched_seq_read_nolock(&log->clear_seq), + -1, size, true, time); prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); len = 0; - prb_for_each_record(seq, prb, seq, &r) { + prb_for_each_record(seq, log->prb, seq, &r) { int textlen; textlen = record_print_text(&r, true, time); @@ -1636,7 +1648,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) if (clear) { raw_spin_lock(&syslog_lock); - latched_seq_write(&clear_seq, seq); + latched_seq_write(&log->clear_seq, seq); raw_spin_unlock(&syslog_lock); } printk_safe_exit_irq(); @@ -1645,11 +1657,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return len; } -static void syslog_clear(void) +static void syslog_clear(struct log_state *log) { printk_safe_enter_irq(); raw_spin_lock(&syslog_lock); - latched_seq_write(&clear_seq, prb_next_seq(prb)); + latched_seq_write(&log->clear_seq, prb_next_seq(log->prb)); raw_spin_unlock(&syslog_lock); printk_safe_exit_irq(); } @@ -1692,7 +1704,7 @@ int do_syslog(int type, char __user *buf, int len, int source) return -EFAULT; error = wait_event_interruptible(log_wait, - prb_read_valid(prb, read_syslog_seq_irq(), NULL)); + prb_read_valid(log->prb, read_syslog_seq_irq(), NULL)); if (error) return error; error = syslog_print(buf, len); @@ -1709,11 +1721,11 @@ int do_syslog(int type, char __user *buf, int len, int source) return 0; if (!access_ok(buf, len)) return -EFAULT; - error = syslog_print_all(buf, len, clear); + error = syslog_print_all(log, buf, len, clear); break; /* Clear ring buffer */ case SYSLOG_ACTION_CLEAR: - syslog_clear(); + syslog_clear(log); break; /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_OFF: @@ -1742,7 +1754,7 @@ int do_syslog(int type, char __user *buf, int len, int source) case SYSLOG_ACTION_SIZE_UNREAD: printk_safe_enter_irq(); raw_spin_lock(&syslog_lock); - if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + if (!prb_read_valid_info(log->prb, log->syslog_seq, &info, NULL)) { /* No unread messages. */ raw_spin_unlock(&syslog_lock); printk_safe_exit_irq(); @@ -1759,13 +1771,13 @@ int do_syslog(int type, char __user *buf, int len, int source) * for pending data, not the size; return the count of * records, not the length. */ - error = prb_next_seq(prb) - syslog_seq; + error = prb_next_seq(log->prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; unsigned int line_count; u64 seq; - prb_for_each_info(syslog_seq, prb, seq, &info, + prb_for_each_info(log->syslog_seq, log->prb, seq, &info, &line_count) { error += get_record_print_text_size(&info, line_count, true, time); @@ -2117,7 +2129,7 @@ int vprintk_store_log(struct log_state *log, int facility, int level, if (lflags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); - if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { + if (prb_reserve_in_last(&e, log->prb, &r, caller_id, LOG_LINE_MAX)) { text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, facility, &lflags, fmt, args); r.info->text_len += text_len; @@ -2139,12 +2151,12 @@ int vprintk_store_log(struct log_state *log, int facility, int level, * structure when they fail. */ prb_rec_init_wr(&r, reserve_size); - if (!prb_reserve(&e, prb, &r)) { + if (!prb_reserve(&e, log->prb, &r)) { /* truncate the message if it is too long for empty buffer */ truncate_msg(log, &reserve_size, &trunc_msg_len); prb_rec_init_wr(&r, reserve_size + trunc_msg_len); - if (!prb_reserve(&e, prb, &r)) + if (!prb_reserve(&e, log->prb, &r)) return 0; } @@ -2600,6 +2612,7 @@ static inline int can_use_console(void) */ void console_unlock(void) { + struct log_state *log = &init_log_state; static char ext_text[CONSOLE_EXT_LOG_MAX]; static char text[CONSOLE_LOG_MAX]; unsigned long flags; @@ -2649,7 +2662,7 @@ void console_unlock(void) printk_safe_enter_irqsave(flags); skip: - if (!prb_read_valid(prb, console_seq, &r)) + if (!prb_read_valid(log->prb, console_seq, &r)) break; if (console_seq != r.info->seq) { @@ -2725,7 +2738,7 @@ void console_unlock(void) * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ - retry = prb_read_valid(prb, console_seq, NULL); + retry = prb_read_valid(log->prb, console_seq, NULL); printk_safe_exit_irqrestore(flags); if (retry && console_trylock()) @@ -2779,6 +2792,8 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + struct log_state *log = &init_log_state; + /* * If someone else is holding the console lock, trylock will fail * and may_schedule may be set. Ignore and proceed to unlock so @@ -2793,7 +2808,7 @@ void console_flush_on_panic(enum con_flush_mode mode) unsigned long flags; printk_safe_enter_irqsave(flags); - console_seq = prb_first_valid_seq(prb); + console_seq = prb_first_valid_seq(log->prb); printk_safe_exit_irqrestore(flags); } console_unlock(); @@ -3443,7 +3458,8 @@ void kmsg_dump(enum kmsg_dump_reason reason) bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len) { - u64 min_seq = latched_seq_read_nolock(&clear_seq); + struct log_state *log = &init_log_state; + u64 min_seq = latched_seq_read_nolock(&log->clear_seq); struct printk_info info; unsigned int line_count; struct printk_record r; @@ -3459,11 +3475,11 @@ bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, /* Read text or count text lines? */ if (line) { - if (!prb_read_valid(prb, iter->cur_seq, &r)) + if (!prb_read_valid(log->prb, iter->cur_seq, &r)) goto out; l = record_print_text(&r, syslog, printk_time); } else { - if (!prb_read_valid_info(prb, iter->cur_seq, + if (!prb_read_valid_info(log->prb, iter->cur_seq, &info, &line_count)) { goto out; } @@ -3504,7 +3520,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out) { - u64 min_seq = latched_seq_read_nolock(&clear_seq); + struct log_state *log = &init_log_state; + u64 min_seq = latched_seq_read_nolock(&log->clear_seq); struct printk_info info; struct printk_record r; unsigned long flags; @@ -3521,7 +3538,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, iter->cur_seq = min_seq; printk_safe_enter_irqsave(flags); - if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { + if (prb_read_valid_info(log->prb, iter->cur_seq, &info, NULL)) { if (info.seq != iter->cur_seq) { /* messages are gone, move to first available one */ iter->cur_seq = info.seq; @@ -3540,7 +3557,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, * because this function (by way of record_print_text()) will * not write more than size-1 bytes of text into @buf. */ - seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, + seq = find_first_fitting_seq(log, iter->cur_seq, iter->next_seq, size - 1, syslog, time); /* @@ -3552,7 +3569,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, prb_rec_init_rd(&r, &info, buf, size); len = 0; - prb_for_each_record(seq, prb, seq, &r) { + prb_for_each_record(seq, log->prb, seq, &r) { if (r.info->seq >= iter->next_seq) break; @@ -3582,11 +3599,12 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); */ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { + struct log_state *log = &init_log_state; unsigned long flags; printk_safe_enter_irqsave(flags); - iter->cur_seq = latched_seq_read_nolock(&clear_seq); - iter->next_seq = prb_next_seq(prb); + iter->cur_seq = latched_seq_read_nolock(&log->clear_seq); + iter->next_seq = prb_next_seq(log->prb); printk_safe_exit_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); @@ -3595,12 +3613,21 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind); int setup_log_buf_ve(struct log_state *log, struct ve_struct *ve, int early) { unsigned long local_log_buf_len; + struct printk_ringbuffer *local_prb; int ret = -EINVAL; + struct printk_info *new_infos; + unsigned int new_descs_count; + struct prb_desc *new_descs; + size_t new_descs_size; + size_t new_infos_size; char *new_log_buf; local_log_buf_len = VE_LOG_BUF_LEN; + BUILD_BUG_ON((VE_LOG_BUF_LEN >> PRB_AVGBITS) == 0); + new_descs_count = local_log_buf_len >> PRB_AVGBITS; + ret = -ENOMEM; new_log_buf = kmalloc(local_log_buf_len, GFP_KERNEL); if (unlikely(!new_log_buf)) { @@ -3609,10 +3636,47 @@ int setup_log_buf_ve(struct log_state *log, struct ve_struct *ve, int early) goto out; } + new_descs_size = new_descs_count * sizeof(struct prb_desc); + new_descs = kmalloc(new_descs_size, GFP_KERNEL); + if (unlikely(!new_descs)) { + pr_err("log_buf_len: %zu desc bytes not available\n", + new_descs_size); + goto err_free_log_buf; + } + + new_infos_size = new_descs_count * sizeof(struct printk_info); + new_infos = kmalloc(new_infos_size, GFP_KERNEL); + if (unlikely(!new_infos)) { + pr_err("log_buf_len: %zu info bytes not available\n", + new_infos_size); + goto err_free_descs; + } + + local_prb = kmalloc(sizeof(struct printk_ringbuffer), GFP_KERNEL); + if (unlikely(!local_prb)) { + pr_err("log_buf_len: %lu info bytes not available\n", + sizeof(struct printk_ringbuffer)); + goto err_free_infos; + } + + prb_init(local_prb, + new_log_buf, ilog2(local_log_buf_len), + new_descs, ilog2(new_descs_count), + new_infos); + log->buf_len = local_log_buf_len; log->buf = new_log_buf; + log->prb = local_prb; + return 0; + +err_free_infos: + kfree(new_infos); +err_free_descs: + kfree(new_descs); +err_free_log_buf: + kfree(new_log_buf); out: return ret; } @@ -3632,6 +3696,8 @@ int ve_log_init(struct ve_struct *ve) return ret; } + seqcount_latch_init(&log->clear_seq.latch); + ve->log_state = log; return 0; } @@ -3639,7 +3705,11 @@ int ve_log_init(struct ve_struct *ve) void ve_log_destroy(struct ve_struct *ve) { struct log_state *log = ve->log_state; + struct printk_ringbuffer *rb = log->prb; + kfree(rb->desc_ring.infos); + kfree(rb->desc_ring.descs); + kfree(log->prb); kfree(log->buf); kfree(log); } _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel