tools/include/linux/ring_buffer.h

   1 #ifndef _TOOLS_LINUX_RING_BUFFER_H_
   2 #define _TOOLS_LINUX_RING_BUFFER_H_
   3
   4 #include <asm/barrier.h>
   5
   6 /*
   7  * Contract with kernel for walking the perf ring buffer from
   8  * user space requires the following barrier pairing (quote
   9  * from kernel/events/ring_buffer.c):
  10  *
  11  *   Since the mmap() consumer (userspace) can run on a
  12  *   different CPU:
  13  *
  14  *   kernel                             user
  15  *
  16  *   if (LOAD ->data_tail) {            LOAD ->data_head
  17  *                      (A)             smp_rmb()       (C)
  18  *      STORE $data                     LOAD $data
  19  *      smp_wmb()       (B)             smp_mb()        (D)
  20  *      STORE ->data_head               STORE ->data_tail
  21  *   }
  22  *
  23  *   Where A pairs with D, and B pairs with C.
  24  *
  25  *   In our case A is a control dependency that separates the
  26  *   load of the ->data_tail and the stores of $data. In case
  27  *   ->data_tail indicates there is no room in the buffer to
  28  *   store $data we do not.
  29  *
  30  *   D needs to be a full barrier since it separates the data
  31  *   READ from the tail WRITE.
  32  *
  33  *   For B a WMB is sufficient since it separates two WRITEs,
  34  *   and for C an RMB is sufficient since it separates two READs.
  35  *
  36  * Note, instead of B, C, D we could also use smp_store_release()
  37  * in B and D as well as smp_load_acquire() in C.
  38  *
  39  * However, this optimization does not make sense for all kernel
  40  * supported architectures since for a fair number it would
  41  * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
  42  * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
  43  *
  44  * Thus for those smp_wmb() in B and smp_rmb() in C would still
  45  * be less expensive. For the case of D this has either the same
  46  * cost or is less expensive, for example, due to TSO x86 can
  47  * avoid the CPU barrier entirely.
  48  */
  49
  50 static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
  51 {
  52 /*
  53  * Architectures where smp_load_acquire() does not fallback to
  54  * READ_ONCE() + smp_mb() pair.
  55  */
  56 #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
  57     defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
  58         return smp_load_acquire(&base->data_head);
  59 #else
  60         u64 head = READ_ONCE(base->data_head);
  61
  62         smp_rmb();
  63         return head;
  64 #endif
  65 }
  66
  67 static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
  68                                           u64 tail)
  69 {
  70         smp_store_release(&base->data_tail, tail);
  71 }
  72
  73 #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */