On Sat, 2025-01-25 at 23:30 -0800, Andi Kleen wrote:
> From: Andi Kleen <a...@gcc.gnu.org>
> 
> For larger files the file_cache line index will be spread out to make
> the index fit into the fixed buffer, so any access to the non latest
> line
> will need some skipping of lines.
> 
> Most accesses for line are near the latest line because
> a diagnostic is likely near where the scanner is currently lexing.
> 
> Add a second cache for recent lines. It is organized as a ring buffer
> and maintains the last 256 lines relative to the last input line.
> 
> With that, enabling -Wmisleading-indentation for the test case in
> PR preprocessor/118168, is within the run-to-run variation.
> 
> gcc/ChangeLog:
> 
>       PR preprocessor/118168
>       * input.cc (file_cache::m_line_recent,
>       m_line_recent_first, m_line_recent_last): Add.
>       (file_cache_slot::evict): Clear new fields.
>       (file_cache_slot::create): Clear new fields.
>       (file_cache_slot::file_cache_slot): Initialize new fields.
>       (file_cache_slot::~file_cache_slot): Release m_line_recent.
>       (file_cache_slot::get_next_line): Maintain ring buffer of
> lines
>       in m_line_recent.
>       (file_cache_slot::read_line_num): Use m_line_recent to look
> up
>       recent lines quickly.
> ---
>  gcc/input.cc | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/input.cc b/gcc/input.cc
> index 82a58fdef27..b3fe38eb77c 100644
> --- a/gcc/input.cc
> +++ b/gcc/input.cc
> @@ -126,6 +126,7 @@ public:
>  
>    static const size_t buffer_size = 4 * 1024;
>    static size_t line_record_size;
> +  static size_t recent_cached_lines_shift;
>  
>    /* The number of time this file has been accessed.  This is used
>       to designate which file cache to evict from the cache
> @@ -174,6 +175,13 @@ public:
>       this is scaled down dynamically, with the line_info becoming
> anchors.  */
>    vec<line_info, va_heap> m_line_record;
>  
> +  /* A cache of the recently seen lines. This is maintained as a
> ring
> +     buffer. */
> +  vec<line_info, va_heap> m_line_recent;
> +
> +  /* First and last valid entry in m_line_recent.  */
> +  size_t m_line_recent_last, m_line_recent_first;
> +
>    void offset_buffer (int offset)
>    {
>      gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
> @@ -187,6 +195,7 @@ public:
>  };
>  
>  size_t file_cache_slot::line_record_size = 100;
> +size_t file_cache_slot::recent_cached_lines_shift = 8;
>  
>  /* Tune file_cache.  */
>  void
> @@ -391,6 +400,8 @@ file_cache_slot::evict ()
>    m_line_start_idx = 0;
>    m_line_num = 0;
>    m_line_record.truncate (0);
> +  m_line_recent_first = 0;
> +  m_line_recent_last = 0;
>    m_use_count = 0;
>    m_missing_trailing_newline = true;
>  }
> @@ -486,6 +497,8 @@ file_cache_slot::create (const
> file_cache::input_context &in_context,
>    m_nb_read = 0;
>    m_line_start_idx = 0;
>    m_line_num = 0;
> +  m_line_recent_first = 0;
> +  m_line_recent_last = 0;
>    m_line_record.truncate (0);
>    /* Ensure that this cache entry doesn't get evicted next time
>       add_file_to_cache_tab is called.  */
> @@ -592,9 +605,13 @@ file_cache::lookup_or_add_file (const char
> *file_path)
>  file_cache_slot::file_cache_slot ()
>  : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
>    m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx
> (0),
> -  m_line_num (0), m_missing_trailing_newline (true)
> +  m_line_num (0), m_missing_trailing_newline (true),
> +  m_line_recent_last (0), m_line_recent_first (0)
>  {
>    m_line_record.create (0);
> +  m_line_recent.create (1U << recent_cached_lines_shift);
> +  for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
> +    m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
>  }
>  
>  /* Destructor for a cache of file used by caret diagnostic.  */
> @@ -613,6 +630,7 @@ file_cache_slot::~file_cache_slot ()
>        m_data = 0;
>      }
>    m_line_record.release ();
> +  m_line_recent.release ();
>  }
>  
>  void
> @@ -865,6 +883,20 @@ file_cache_slot::get_next_line (char **line,
> ssize_t *line_len)
>                                      line_end - m_data));
>      }
>  
> +  /* Cache recent tail lines separately for fast access. This
> assumes
> +     most accesses do not skip backwards.  */
> +  if (m_line_recent_last == m_line_recent_first
> +     || m_line_recent[m_line_recent_last].line_num == m_line_num
> - 1)
> +    {
> +      size_t mask = ((size_t)1 << recent_cached_lines_shift) - 1;
> +      m_line_recent_last = (m_line_recent_last + 1) & mask;
> +      if (m_line_recent_last == m_line_recent_first)
> +     m_line_recent_first = (m_line_recent_first + 1) & mask;
> +      m_line_recent[m_line_recent_last] =
> +     file_cache_slot::line_info (m_line_num, m_line_start_idx,
> +                                 line_end - m_data);
> +    }
> +

If I reading this right, calls to get_next_line lead to insertions into
the ring buffer whilst the buffer is empty or the last line in the ring
buffer cache is m_line_num - 1.

There are a few places where we update m_line_num, but this caching
code doesn't seem to touch those places.  Should it?  I don't know if
the lack of a reset is an issue, but it's an aspect of the patch that's
a bit hazy to me; sorry.

If I'm reading this right, the caching that this adds is only for the
final 256 lines read so far in the file, and lets us use a line offset
relative to the most recent entry to go direct to such a recent line in
the file.

Dave

>    /* Update m_line_start_idx so that it points to the next line to
> be
>       read.  */
>    if (next_line_start)
> @@ -910,6 +942,21 @@ file_cache_slot::read_line_num (size_t line_num,
>  {
>    gcc_assert (line_num > 0);
>  
> +  /* Is the line in the recent line cache?  */
> +  if (m_line_recent_first != m_line_recent_last
> +     && m_line_recent[m_line_recent_first].line_num <= line_num
> +     && m_line_recent[m_line_recent_last].line_num >= line_num)
> +    {
> +      line_info &last = m_line_recent[m_line_recent_last];
> +      size_t mask = (1U << recent_cached_lines_shift) - 1;
> +      size_t idx = (m_line_recent_last - (last.line_num - line_num))
> & mask;
> +      line_info &recent = m_line_recent[idx];
> +      gcc_assert (recent.line_num == line_num);
> +      *line = m_data + recent.start_pos;
> +      *line_len = recent.end_pos - recent.start_pos;
> +      return true;
> +    }
> +
>    if (line_num <= m_line_num)
>      {
>        line_info l (line_num, 0, 0);

Reply via email to