<snip>

> >> a/app/test/test_seqlock.c b/app/test/test_seqlock.c new file mode
> >> 100644 index 0000000000..54fadf8025
> >> --- /dev/null
> >> +++ b/app/test/test_seqlock.c
> >> @@ -0,0 +1,202 @@
> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> + * Copyright(c) 2022 Ericsson AB
> >> + */
> >> +
> >> +#include <rte_seqlock.h>
> >> +
> >> +#include <rte_cycles.h>
> >> +#include <rte_malloc.h>
> >> +#include <rte_random.h>
> >> +
> >> +#include <inttypes.h>
> >> +
> >> +#include "test.h"
> >> +
> >> +struct data {
> >> +  rte_seqlock_t lock;
> >> +
> >> +  uint64_t a;
> >> +  uint64_t b __rte_cache_aligned;
> >> +  uint64_t c __rte_cache_aligned;
> >> +} __rte_cache_aligned;
> >> +
> >> +struct reader {
> >> +  struct data *data;
> >> +  uint8_t stop;
> >> +};
> >> +
> >> +#define WRITER_RUNTIME (2.0) /* s */
> >> +
> >> +#define WRITER_MAX_DELAY (100) /* us */
> >> +
> >> +#define INTERRUPTED_WRITER_FREQUENCY (1000) #define
> >> +WRITER_INTERRUPT_TIME (1) /* us */
> >> +
> >> +static int
> >> +writer_run(void *arg)
> >> +{
> >> +  struct data *data = arg;
> >> +  uint64_t deadline;
> >> +
> >> +  deadline = rte_get_timer_cycles() +
> >> +          WRITER_RUNTIME * rte_get_timer_hz();
> >> +
> >> +  while (rte_get_timer_cycles() < deadline) {
> >> +          bool interrupted;
> >> +          uint64_t new_value;
> >> +          unsigned int delay;
> >> +
> >> +          new_value = rte_rand();
> >> +
> >> +          interrupted =
> >> rte_rand_max(INTERRUPTED_WRITER_FREQUENCY) == 0;
> >> +
> >> +          rte_seqlock_write_lock(&data->lock);
> >> +
> >> +          data->c = new_value;
> >> +
> >> +          /* These compiler barriers (both on the test reader
> >> +           * and the test writer side) are here to ensure that
> >> +           * loads/stores *usually* happen in test program order
> >> +           * (always on a TSO machine). They are arrange in such
> >> +           * a way that the writer stores in a different order
> >> +           * than the reader loads, to emulate an arbitrary
> >> +           * order. A real application using a seqlock does not
> >> +           * require any compiler barriers.
> >> +           */
> >> +          rte_compiler_barrier();
> > The compiler barriers are not sufficient on all architectures (if the 
> > intention
> is to maintain the program order).
> >
> 
> The intention is what is described in the comment (i.e., to make it likely, 
> but
> no guaranteed, that the stores will be globally visible in the program order).
> 
> The reason I didn't put in a release memory barrier, was that it seems a 
> little
> intrusive.
> 
> Maybe I should remove these compiler barriers. They are also intrusive in the
> way may prevent some compiler optimizations, that could expose a seqlock
> bug. Or, I could have two variants of the tests. I don't know.
I would suggest removing the compiler barriers, leave it to the CPU to do what 
it can do.

> 
> >> +          data->b = new_value;
> >> +
> >> +          if (interrupted)
> >> +                  rte_delay_us_block(WRITER_INTERRUPT_TIME);
> >> +
> >> +          rte_compiler_barrier();
> >> +          data->a = new_value;
> >> +
> >> +          rte_seqlock_write_unlock(&data->lock);
> >> +
> >> +          delay = rte_rand_max(WRITER_MAX_DELAY);
> >> +
> >> +          rte_delay_us_block(delay);
> >> +  }
> >> +
> >> +  return 0;
> >> +}
> >> +

<snip>

> >> +
> >> +/**
> >> + * Begin a write-side critical section.
> >> + *
> >> + * A call to this function acquires the write lock associated @p
> >> + * seqlock, and marks the beginning of a write-side critical section.
> >> + *
> >> + * After having called this function, the caller may go on to modify
> >> + * (both read and write) the protected data, in an atomic or
> >> + * non-atomic manner.
> >> + *
> >> + * After the necessary updates have been performed, the application
> >> + * calls rte_seqlock_write_unlock().
> >> + *
> >> + * This function is not preemption-safe in the sense that preemption
> >> + * of the calling thread may block reader progress until the writer
> >> + * thread is rescheduled.
> >> + *
> >> + * Unlike rte_seqlock_read_lock(), each call made to
> >> + * rte_seqlock_write_lock() must be matched with an unlock call.
> >> + *
> >> + * @param seqlock
> >> + *   A pointer to the seqlock.
> >> + *
> >> + * @see rte_seqlock_write_unlock()
> >> + */
> >> +__rte_experimental
> >> +static inline void
> >> +rte_seqlock_write_lock(rte_seqlock_t *seqlock) {
> >> +  uint32_t sn;
> >> +
> >> +  /* to synchronize with other writers */
> >> +  rte_spinlock_lock(&seqlock->lock);
> >> +
> >> +  sn = seqlock->sn + 1;
> > The load of seqlock->sn could use __atomic_load_n to be consistent.
> >
> 
> But why? I know it doesn't have any cost (these loads are going to be atomic
> anyways), but why use a construct with stronger guarantees than you have
> to?
Using __atomic_xxx ensures that the operation is atomic always. I believe (I am 
not sure) that, when not using __atomic_xxx, the compiler is allowed to use 
non-atomic operations.
The other reason is we are not qualifying 'sn' as volatile. Use of __atomic_xxx 
inherently indicate to the compiler not to cache 'sn' in a register. I do not 
know the compiler behavior if some operations on 'sn' use __atomic_xxx and some 
do not.

> 
> >> +
> >> +  __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELAXED);
> >> +
> >> +  /* __ATOMIC_RELEASE to prevent stores after (in program order)
> >> +   * from happening before the sn store.
> >> +   */
> >> +  rte_atomic_thread_fence(__ATOMIC_RELEASE);
> >> +}
> >> +
> >> +/**
> >> + * End a write-side critical section.
> >> + *
> >> + * A call to this function marks the end of the write-side critical
> >> + * section, for @p seqlock. After this call has been made, the
> >> +protected
> >> + * data may no longer be modified.
> >> + *
> >> + * @param seqlock
> >> + *   A pointer to the seqlock.
> >> + *
> >> + * @see rte_seqlock_write_lock()
> >> + */
> >> +__rte_experimental
> >> +static inline void
> >> +rte_seqlock_write_unlock(rte_seqlock_t *seqlock) {
> >> +  uint32_t sn;
> >> +
> >> +  sn = seqlock->sn + 1;
> > Same here, the load of seqlock->sn could use __atomic_load_n
> >
> >> +
> >> +  /* synchronizes-with the load acquire in rte_seqlock_read_lock() */
> >> +  __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELEASE);
> >> +
> >> +  rte_spinlock_unlock(&seqlock->lock);
> >> +}
> >> +
> >> +#ifdef __cplusplus
> >> +}
> >> +#endif
> >> +
> >> +#endif  /* _RTE_SEQLOCK_H_ */
<snip>

Reply via email to