On Sat, 2025-01-25 at 23:30 -0800, Andi Kleen wrote: > From: Andi Kleen <a...@gcc.gnu.org> > > For larger files the file_cache line index will be spread out to make > the index fit into the fixed buffer, so any access to the non latest > line > will need some skipping of lines. > > Most accesses for line are near the latest line because > a diagnostic is likely near where the scanner is currently lexing. > > Add a second cache for recent lines. It is organized as a ring buffer > and maintains the last 256 lines relative to the last input line. > > With that, enabling -Wmisleading-indentation for the test case in > PR preprocessor/118168, is within the run-to-run variation. > > gcc/ChangeLog: > > PR preprocessor/118168 > * input.cc (file_cache::m_line_recent, > m_line_recent_first, m_line_recent_last): Add. > (file_cache_slot::evict): Clear new fields. > (file_cache_slot::create): Clear new fields. > (file_cache_slot::file_cache_slot): Initialize new fields. > (file_cache_slot::~file_cache_slot): Release m_line_recent. > (file_cache_slot::get_next_line): Maintain ring buffer of > lines > in m_line_recent. > (file_cache_slot::read_line_num): Use m_line_recent to look > up > recent lines quickly. > --- > gcc/input.cc | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 48 insertions(+), 1 deletion(-) > > diff --git a/gcc/input.cc b/gcc/input.cc > index 82a58fdef27..b3fe38eb77c 100644 > --- a/gcc/input.cc > +++ b/gcc/input.cc > @@ -126,6 +126,7 @@ public: > > static const size_t buffer_size = 4 * 1024; > static size_t line_record_size; > + static size_t recent_cached_lines_shift; > > /* The number of time this file has been accessed. This is used > to designate which file cache to evict from the cache > @@ -174,6 +175,13 @@ public: > this is scaled down dynamically, with the line_info becoming > anchors. */ > vec<line_info, va_heap> m_line_record; > > + /* A cache of the recently seen lines. This is maintained as a > ring > + buffer. */ > + vec<line_info, va_heap> m_line_recent; > + > + /* First and last valid entry in m_line_recent. */ > + size_t m_line_recent_last, m_line_recent_first; > + > void offset_buffer (int offset) > { > gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0 > @@ -187,6 +195,7 @@ public: > }; > > size_t file_cache_slot::line_record_size = 100; > +size_t file_cache_slot::recent_cached_lines_shift = 8; > > /* Tune file_cache. */ > void > @@ -391,6 +400,8 @@ file_cache_slot::evict () > m_line_start_idx = 0; > m_line_num = 0; > m_line_record.truncate (0); > + m_line_recent_first = 0; > + m_line_recent_last = 0; > m_use_count = 0; > m_missing_trailing_newline = true; > } > @@ -486,6 +497,8 @@ file_cache_slot::create (const > file_cache::input_context &in_context, > m_nb_read = 0; > m_line_start_idx = 0; > m_line_num = 0; > + m_line_recent_first = 0; > + m_line_recent_last = 0; > m_line_record.truncate (0); > /* Ensure that this cache entry doesn't get evicted next time > add_file_to_cache_tab is called. */ > @@ -592,9 +605,13 @@ file_cache::lookup_or_add_file (const char > *file_path) > file_cache_slot::file_cache_slot () > : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0), > m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx > (0), > - m_line_num (0), m_missing_trailing_newline (true) > + m_line_num (0), m_missing_trailing_newline (true), > + m_line_recent_last (0), m_line_recent_first (0) > { > m_line_record.create (0); > + m_line_recent.create (1U << recent_cached_lines_shift); > + for (int i = 0; i < 1 << recent_cached_lines_shift; i++) > + m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0)); > } > > /* Destructor for a cache of file used by caret diagnostic. */ > @@ -613,6 +630,7 @@ file_cache_slot::~file_cache_slot () > m_data = 0; > } > m_line_record.release (); > + m_line_recent.release (); > } > > void > @@ -865,6 +883,20 @@ file_cache_slot::get_next_line (char **line, > ssize_t *line_len) > line_end - m_data)); > } > > + /* Cache recent tail lines separately for fast access. This > assumes > + most accesses do not skip backwards. */ > + if (m_line_recent_last == m_line_recent_first > + || m_line_recent[m_line_recent_last].line_num == m_line_num > - 1) > + { > + size_t mask = ((size_t)1 << recent_cached_lines_shift) - 1; > + m_line_recent_last = (m_line_recent_last + 1) & mask; > + if (m_line_recent_last == m_line_recent_first) > + m_line_recent_first = (m_line_recent_first + 1) & mask; > + m_line_recent[m_line_recent_last] = > + file_cache_slot::line_info (m_line_num, m_line_start_idx, > + line_end - m_data); > + } > +
If I reading this right, calls to get_next_line lead to insertions into the ring buffer whilst the buffer is empty or the last line in the ring buffer cache is m_line_num - 1. There are a few places where we update m_line_num, but this caching code doesn't seem to touch those places. Should it? I don't know if the lack of a reset is an issue, but it's an aspect of the patch that's a bit hazy to me; sorry. If I'm reading this right, the caching that this adds is only for the final 256 lines read so far in the file, and lets us use a line offset relative to the most recent entry to go direct to such a recent line in the file. Dave > /* Update m_line_start_idx so that it points to the next line to > be > read. */ > if (next_line_start) > @@ -910,6 +942,21 @@ file_cache_slot::read_line_num (size_t line_num, > { > gcc_assert (line_num > 0); > > + /* Is the line in the recent line cache? */ > + if (m_line_recent_first != m_line_recent_last > + && m_line_recent[m_line_recent_first].line_num <= line_num > + && m_line_recent[m_line_recent_last].line_num >= line_num) > + { > + line_info &last = m_line_recent[m_line_recent_last]; > + size_t mask = (1U << recent_cached_lines_shift) - 1; > + size_t idx = (m_line_recent_last - (last.line_num - line_num)) > & mask; > + line_info &recent = m_line_recent[idx]; > + gcc_assert (recent.line_num == line_num); > + *line = m_data + recent.start_pos; > + *line_len = recent.end_pos - recent.start_pos; > + return true; > + } > + > if (line_num <= m_line_num) > { > line_info l (line_num, 0, 0);