On 07/30/2013 05:59 AM, zwu.ker...@gmail.com wrote:
> From: Zhi Yong Wu <wu...@linux.vnet.ibm.com>
> 
>   It can take a long time to run log recovery operation because it is
> single threaded and is bound by read latency. We can find that it took
> most of the time to wait for the read IO to occur, so if one object
> readahead is introduced to log recovery, it will obviously reduce the
> log recovery time.
> 
> Log recovery time stat:
> 
>           w/o this patch        w/ this patch
> 
> real:        0m15.023s             0m7.802s
> user:        0m0.001s              0m0.001s
> sys:         0m0.246s              0m0.107s
> 
> Signed-off-by: Zhi Yong Wu <wu...@linux.vnet.ibm.com>
> ---

Cool patch. I'm not terribly familiar with the log recovery code so take
my comments with a grain of salt, but a couple things I noticed on a
quick skim...

>  fs/xfs/xfs_log_recover.c | 162 
> +++++++++++++++++++++++++++++++++++++++++++++--
>  fs/xfs/xfs_log_recover.h |   2 +
>  2 files changed, 159 insertions(+), 5 deletions(-)
> 
...
>  
> +STATIC int
> +xlog_recover_items_pass2(
> +     struct xlog                     *log,
> +     struct xlog_recover             *trans,
> +     struct list_head                *buffer_list,
> +     struct list_head                *ra_list)

A nit, but technically this function doesn't have to be involved with
readahead. Perhaps rename ra_list to item_list..?

> +{
> +     int                     error = 0;
> +     xlog_recover_item_t     *item;
> +
> +     list_for_each_entry(item, ra_list, ri_list) {
> +             error = xlog_recover_commit_pass2(log, trans,
> +                                       buffer_list, item);
> +             if (error)
> +                     return error;
> +     }
> +
> +     return error;
> +}
> +
>  /*
>   * Perform the transaction.
>   *
> @@ -3189,9 +3314,11 @@ xlog_recover_commit_trans(
>       struct xlog_recover     *trans,
>       int                     pass)
>  {
> -     int                     error = 0, error2;
> -     xlog_recover_item_t     *item;
> +     int                     error = 0, error2, ra_qdepth = 0;
> +     xlog_recover_item_t     *item, *next;
>       LIST_HEAD               (buffer_list);
> +     LIST_HEAD               (ra_list);
> +     LIST_HEAD               (all_ra_list);
>  
>       hlist_del(&trans->r_list);
>  
> @@ -3199,14 +3326,21 @@ xlog_recover_commit_trans(
>       if (error)
>               return error;
>  
> -     list_for_each_entry(item, &trans->r_itemq, ri_list) {
> +     list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
>               switch (pass) {
>               case XLOG_RECOVER_PASS1:
>                       error = xlog_recover_commit_pass1(log, trans, item);
>                       break;
>               case XLOG_RECOVER_PASS2:
> -                     error = xlog_recover_commit_pass2(log, trans,
> -                                                       &buffer_list, item);
> +                     if (ra_qdepth++ >= XLOG_RECOVER_MAX_QDEPTH) {

The counting mechanism looks strange and easy to break with future
changes. Why not increment ra_qdepth in the else bracket where it is
explicitly tied to the operation it tracks?

> +                             error = xlog_recover_items_pass2(log, trans,
> +                                             &buffer_list, &ra_list);
> +                             list_splice_tail_init(&ra_list, &all_ra_list);
> +                             ra_qdepth = 0;

So now we've queued up a bunch of items we've issued readahead on in
ra_list and we've executed the recovery on the list. What happens to the
current item?

Brian

> +                     } else {
> +                             xlog_recover_ra_pass2(log, item);
> +                             list_move_tail(&item->ri_list, &ra_list);
> +                     }
>                       break;
>               default:
>                       ASSERT(0);
> @@ -3216,9 +3350,27 @@ xlog_recover_commit_trans(
>                       goto out;
>       }
>  
> +     if (!list_empty(&ra_list)) {
> +             error = xlog_recover_items_pass2(log, trans,
> +                             &buffer_list, &ra_list);
> +             if (error)
> +                     goto out;
> +
> +             list_splice_tail_init(&ra_list, &all_ra_list);
> +     }
> +
> +     if (!list_empty(&all_ra_list))
> +             list_splice_init(&all_ra_list, &trans->r_itemq);
> +
>       xlog_recover_free_trans(trans);
>  
>  out:
> +     if (!list_empty(&ra_list))
> +             list_splice_tail_init(&ra_list, &all_ra_list);
> +
> +     if (!list_empty(&all_ra_list))
> +             list_splice_init(&all_ra_list, &trans->r_itemq);
> +
>       error2 = xfs_buf_delwri_submit(&buffer_list);
>       return error ? error : error2;
>  }
> diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h
> index 1c55ccb..16322f6 100644
> --- a/fs/xfs/xfs_log_recover.h
> +++ b/fs/xfs/xfs_log_recover.h
> @@ -63,4 +63,6 @@ typedef struct xlog_recover {
>  #define      XLOG_RECOVER_PASS1      1
>  #define      XLOG_RECOVER_PASS2      2
>  
> +#define XLOG_RECOVER_MAX_QDEPTH 100
> +
>  #endif       /* __XFS_LOG_RECOVER_H__ */
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to