from:"Brendan Jackman"

[PATCH 0/2] sched/fair: remote load updates for idle CPUs

2017-10-24 Thread Brendan Jackman

Until a NOHZ idle balance takes place on behalf of a CPU (which may
never happen), the blocked load and shares of its root cfs_rq are
updated only by that CPU. That means if a CPU goes suddenly from
being busy to totally idle, its load and shares may not be updated
for a long time.

Schedutil works around this problem by ignoring the util of CPUs
that were last updated more than a tick ago. However the stale
load does impact task placement: elements that look at load and
util (in particular the slow-path of select_task_rq_fair) can
leave the idle CPUs un-used while other CPUs go unnecessarily
overloaded. Furthermore the stale shares can impact CPU time
allotment.

Two complementary solutions are proposed here:
1. When a task wakes up, if necessary an idle CPU is woken as if to
   perform a NOHZ idle balance, which is then aborted once the load
   of NOHZ idle CPUs has been updated. This solves the problem but
   brings with it extra CPU wakeups, which have an energy cost.
2. During newly-idle load balancing, the load of remote nohz-idle
   CPUs in the sched_domain is updated. When all of the idle CPUs
   were updated in that step, the nohz.next_update field
   is pushed further into the future. This field is used to determine
   the need for triggering the newly-added NOHZ kick. So if such
   newly-idle balances are happening often enough, no additional CPU
   wakeups are required to keep all the CPUs' loads updated.


Brendan Jackman (1):
  sched/fair: Update blocked load from newly idle balance

Vincent Guittot (1):
  sched: force update of blocked load of idle cpus

 kernel/sched/core.c  |   1 +
 kernel/sched/fair.c  | 109 ---
 kernel/sched/sched.h |   2 +
 3 files changed, 98 insertions(+), 14 deletions(-)

-- 
2.14.1

[PATCH 1/2] sched: force update of blocked load of idle cpus

2017-10-24 Thread Brendan Jackman

From: Vincent Guittot 

When idle, the blocked load of CPUs will be updated only when an idle
load balance is triggered which may never happen. Because of this
uncertainty on the execution of idle load balance, the utilization,
the load and the shares of idle cfs_rq can stay artificially high and
steal shares and running time to busy cfs_rqs of the task group.
Add a new light idle load balance state which ensures that blocked loads
are periodically updated and decayed but does not perform any task
migration.

The remote load udpates are rate-limited, so that they are not
performed with a shorter period than LOAD_AVG_PERIOD (i.e. PELT
half-life). This is the period after which we have a known 50% error
in stale load.

Cc: Dietmar Eggemann 
Cc: Vincent Guittot 
Cc: Ingo Molnar 
Cc: Morten Rasmussen 
Cc: Peter Zijlstra 
Signed-off-by: Vincent Guittot 
[Switched remote update interval to use PELT half life]
[Moved update_blocked_averges call outside rebalance_domains
 to simplify code]
Signed-off-by: Brendan Jackman 
---
 kernel/sched/fair.c  | 71 +---
 kernel/sched/sched.h |  1 +
 2 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 85d1ec1c3b39..9085caf49c76 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5976,6 +5976,9 @@ static int wake_cap(struct task_struct *p, int cpu, int 
prev_cpu)
return min_cap * 1024 < task_util(p) * capacity_margin;
 }
 
+static inline bool nohz_kick_needed(struct rq *rq, bool only_update);
+static void nohz_balancer_kick(bool only_update);
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -6074,6 +6077,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, 
int sd_flag, int wake_f
}
rcu_read_unlock();
 
+#ifdef CONFIG_NO_HZ_COMMON
+   if (nohz_kick_needed(cpu_rq(new_cpu), true))
+   nohz_balancer_kick(true);
+#endif
+
return new_cpu;
 }
 
@@ -8653,6 +8661,7 @@ static struct {
cpumask_var_t idle_cpus_mask;
atomic_t nr_cpus;
unsigned long next_balance; /* in jiffy units */
+   unsigned long next_update; /* in jiffy units */
 } nohz cacheline_aligned;
 
 static inline int find_new_ilb(void)
@@ -8670,7 +8679,7 @@ static inline int find_new_ilb(void)
  * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
  * CPU (if there is one).
  */
-static void nohz_balancer_kick(void)
+static void nohz_balancer_kick(bool only_update)
 {
int ilb_cpu;
 
@@ -8683,6 +8692,10 @@ static void nohz_balancer_kick(void)
 
if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu)))
return;
+
+   if (only_update)
+   set_bit(NOHZ_STATS_KICK, nohz_flags(ilb_cpu));
+
/*
 * Use smp_send_reschedule() instead of resched_cpu().
 * This way we generate a sched IPI on the target cpu which
@@ -8770,6 +8783,8 @@ void nohz_balance_enter_idle(int cpu)
atomic_inc(&nohz.nr_cpus);
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
 }
+#else
+static inline void nohz_balancer_kick(bool only_update) {}
 #endif
 
 static DEFINE_SPINLOCK(balancing);
@@ -8801,8 +8816,6 @@ static void rebalance_domains(struct rq *rq, enum 
cpu_idle_type idle)
int need_serialize, need_decay = 0;
u64 max_cost = 0;
 
-   update_blocked_averages(cpu);
-
rcu_read_lock();
for_each_domain(cpu, sd) {
/*
@@ -8901,6 +8914,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum 
cpu_idle_type idle)
 {
int this_cpu = this_rq->cpu;
struct rq *rq;
+   struct sched_domain *sd;
int balance_cpu;
/* Earliest time when we have to do rebalance again */
unsigned long next_balance = jiffies + 60*HZ;
@@ -8910,6 +8924,23 @@ static void nohz_idle_balance(struct rq *this_rq, enum 
cpu_idle_type idle)
!test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
goto end;
 
+   /*
+* This cpu is going to update the blocked load of idle CPUs either
+* before doing a rebalancing or just to keep metrics up to date. we
+* can safely update the next update timestamp
+*/
+   rcu_read_lock();
+   sd = rcu_dereference(this_rq->sd);
+   /*
+* Check whether there is a sched_domain available for this cpu.
+* The last other cpu can have been unplugged since the ILB has been
+* triggered and the sched_domain can now be null. The idle balance
+* sequence will quickly be aborted as there is no more idle CPUs
+*/
+   if (sd)
+   nohz.next_update = jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD);
+   rcu_read_unlock();
+
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
if (balance_cpu =

[PATCH 2/2] sched/fair: Update blocked load from newly idle balance

2017-10-24 Thread Brendan Jackman

We now have a NOHZ kick to avoid the load of idle CPUs becoming stale. This is
good, but it brings about CPU wakeups, which have an energy cost. As an
alternative to waking CPUs up to do decay blocked load, we can sometimes do it
from newly idle balance. If the newly idle balance is on a domain that covers
all the currently nohz-idle CPUs, we push the value of nohz.next_update into the
future. That means that if such newly idle balances happen often enough, we
never need wake up a CPU just to update load.

Since we're doing this new update inside a for_each_domain, we need to do
something to avoid doing multiple updates on the same CPU in the same
idle_balance. A tick stamp is set on the rq in update_blocked_averages as a
simple way to do this. Using a simple jiffies-based timestamp, as opposed to the
last_update_time of the root cfs_rq's sched_avg, means we can do this without
taking the rq lock.

Cc: Dietmar Eggemann 
Cc: Vincent Guittot 
Cc: Ingo Molnar 
Cc: Morten Rasmussen 
Cc: Peter Zijlstra 
Signed-off-by: Brendan Jackman 
---
 kernel/sched/core.c  |  1 +
 kernel/sched/fair.c  | 41 +++--
 kernel/sched/sched.h |  1 +
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d17c5da523a0..d8e71fd27806 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5923,6 +5923,7 @@ void __init sched_init(void)
rq_attach_root(rq, &def_root_domain);
 #ifdef CONFIG_NO_HZ_COMMON
rq->last_load_update_tick = jiffies;
+   rq->last_blocked_load_update_tick = jiffies;
rq->nohz_flags = 0;
 #endif
 #ifdef CONFIG_NO_HZ_FULL
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9085caf49c76..45e9c8056161 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7062,6 +7062,7 @@ static void update_blocked_averages(int cpu)
if (cfs_rq_is_decayed(cfs_rq))
list_del_leaf_cfs_rq(cfs_rq);
}
+   rq->last_blocked_load_update_tick = jiffies;
rq_unlock_irqrestore(rq, &rf);
 }
 
@@ -7121,6 +7122,7 @@ static inline void update_blocked_averages(int cpu)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+   rq->last_blocked_load_update_tick = jiffies;
rq_unlock_irqrestore(rq, &rf);
 }
 
@@ -7615,6 +7617,15 @@ static inline enum fbq_type fbq_classify_rq(struct rq 
*rq)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#ifdef CONFIG_NO_HZ_COMMON
+static struct {
+   cpumask_var_t idle_cpus_mask;
+   atomic_t nr_cpus;
+   unsigned long next_balance; /* in jiffy units */
+   unsigned long next_update; /* in jiffy units */
+} nohz cacheline_aligned;
+#endif
+
 /**
  * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
  * @env: The load balancing environment.
@@ -7633,6 +7644,30 @@ static inline void update_sd_lb_stats(struct lb_env 
*env, struct sd_lb_stats *sd
if (child && child->flags & SD_PREFER_SIBLING)
prefer_sibling = 1;
 
+#ifdef CONFIG_NO_HZ_COMMON
+   if (env->idle == CPU_NEWLY_IDLE) {
+   int cpu;
+
+   /* Update the stats of NOHZ idle CPUs in the sd */
+   for_each_cpu_and(cpu, sched_domain_span(env->sd),
+nohz.idle_cpus_mask) {
+   struct rq *rq = cpu_rq(cpu);
+
+   /* ... Unless we've already done since the last tick */
+   if (time_after(jiffies,
+   rq->last_blocked_load_update_tick))
+   update_blocked_averages(cpu);
+   }
+   }
+   /*
+* If we've just updated all of the NOHZ idle CPUs, then we can push
+* back the next nohz.next_update, which will prevent an unnecessary
+* wakeup for the nohz stats kick
+*/
+   if (cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd)))
+   nohz.next_update = jiffies + LOAD_AVG_PERIOD;
+#endif
+
load_idx = get_sd_load_idx(env->sd, env->idle);
 
do {
@@ -8657,12 +8692,6 @@ static inline int on_null_domain(struct rq *rq)
  *   needed, they will kick the idle load balancer, which then does idle
  *   load balancing for all the idle CPUs.
  */
-static struct {
-   cpumask_var_t idle_cpus_mask;
-   atomic_t nr_cpus;
-   unsigned long next_balance; /* in jiffy units */
-   unsigned long next_update; /* in jiffy units */
-} nohz cacheline_aligned;
 
 static inline int find_new_ilb(void)
 {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6f95ef653f73..6be8938bb977 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -681,6 +681,7 @@ struct rq {
 #ifdef CONFIG_NO_HZ_COM

Re: [PATCH RFC 5/5] sched/fair: remove impossible condition from find_idlest_group_cpu

2017-10-30 Thread Brendan Jackman


On Sat, Oct 28 2017 at 09:59, Joel Fernandes wrote:
> find_idlest_group_cpu goes through CPUs of a group previous selected by
> find_idlest_group. find_idlest_group returns NULL if the local group is the
> selected one and doesn't execute find_idlest_group_cpu if the group to which
> 'cpu' belongs to is chosen. So we're always guaranteed to call
> find_idlest_group_cpu with a group to which cpu is non-local. This makes one 
> of
> the conditions in find_idlest_group_cpu an impossible one, which we can get 
> rid
> off.
>
> Cc: Ingo Molnar 
> Cc: Peter Zijlstra 
> Cc: Brendan Jackman 
> Cc: Dietmar 
> Signed-off-by: Joel Fernandes 

FWIW:

Reviewed-by: Brendan Jackman 

> ---
>  kernel/sched/fair.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5c49fdb4c508..740602ce799f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5922,7 +5922,7 @@ find_idlest_group_cpu(struct sched_group *group, struct 
> task_struct *p, int this
>   }
>   } else if (shallowest_idle_cpu == -1) {
>   load = weighted_cpuload(cpu_rq(i));
> - if (load < min_load || (load == min_load && i == 
> this_cpu)) {
> + if (load < min_load) {
>   min_load = load;
>   least_loaded_cpu = i;
>   }

[PATCH bpf-next v3] bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH

2021-02-02 Thread Brendan Jackman

When BPF_FETCH is set, atomic instructions load a value from memory
into a register. The current verifier code first checks via
check_mem_access whether we can access the memory, and then checks
via check_reg_arg whether we can write into the register.

For loads, check_reg_arg has the side-effect of marking the
register's value as unkonwn, and check_mem_access has the side effect
of propagating bounds from memory to the register. This currently only
takes effect for stack memory.

Therefore with the current order, bounds information is thrown away,
but by simply reversing the order of check_reg_arg
vs. check_mem_access, we can instead propagate bounds smartly.

A simple test is added with an infinite loop that can only be proved
unreachable if this propagation is present. This is implemented both
with C and directly in test_verifier using assembly.

Suggested-by: John Fastabend 
Signed-off-by: Brendan Jackman 
---

Difference from v2->v3 [1]:

 * Fixed missing ENABLE_ATOMICS_TESTS check.

Difference from v1->v2:

 * Reworked commit message to clarify this only affects stack memory
 * Added the Suggested-by
 * Added a C-based test.

[1]: 
https://lore.kernel.org/bpf/ca+i-1c2zwubgxwj8kaxbri9rbboyumavj_bbhg+2zf_su9b...@mail.gmail.com/T/#t

 kernel/bpf/verifier.c | 32 +++
 .../selftests/bpf/prog_tests/atomic_bounds.c  | 15 +
 .../selftests/bpf/progs/atomic_bounds.c   | 24 ++
 .../selftests/bpf/verifier/atomic_bounds.c| 27 
 4 files changed, 84 insertions(+), 14 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomic_bounds.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_bounds.c

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 972fc38eb62d..5e09632efddb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3665,9 +3665,26 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
return -EACCES;
}

+   if (insn->imm & BPF_FETCH) {
+   if (insn->imm == BPF_CMPXCHG)
+   load_reg = BPF_REG_0;
+   else
+   load_reg = insn->src_reg;
+
+   /* check and record load of old value */
+   err = check_reg_arg(env, load_reg, DST_OP);
+   if (err)
+   return err;
+   } else {
+   /* This instruction accesses a memory location but doesn't
+* actually load it into a register.
+*/
+   load_reg = -1;
+   }
+
/* check whether we can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-  BPF_SIZE(insn->code), BPF_READ, -1, true);
+  BPF_SIZE(insn->code), BPF_READ, load_reg, true);
if (err)
return err;

@@ -3677,19 +3694,6 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
if (err)
return err;

-   if (!(insn->imm & BPF_FETCH))
-   return 0;
-
-   if (insn->imm == BPF_CMPXCHG)
-   load_reg = BPF_REG_0;
-   else
-   load_reg = insn->src_reg;
-
-   /* check and record load of old value */
-   err = check_reg_arg(env, load_reg, DST_OP);
-   if (err)
-   return err;
-
return 0;
 }

diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c 
b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
new file mode 100644
index ..addf127068e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomic_bounds.skel.h"
+
+void test_atomic_bounds(void)
+{
+   struct atomic_bounds *skel;
+   __u32 duration = 0;
+
+   skel = atomic_bounds__open_and_load();
+   if (CHECK(!skel, "skel_load", "couldn't load program\n"))
+   return;
+}
diff --git a/tools/testing/selftests/bpf/progs/atomic_bounds.c 
b/tools/testing/selftests/bpf/progs/atomic_bounds.c
new file mode 100644
index ..e5fff7fc7f8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/atomic_bounds.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+
+#ifdef ENABLE_ATOMICS_TESTS
+bool skip_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_tests = true;
+#endif
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+   int a = 0;
+   int b = __sync_fetch_and_add(&a, 1);
+   /* b is certainly 0 here. Can the verifier tell? */
+   while (b)
+   continue;
+#endif
+   retur

[PATCH bpf-next v2] bpf: Propagate memory bounds to registers in atomics w/ BPF_FETCH

2021-02-01 Thread Brendan Jackman

When BPF_FETCH is set, atomic instructions load a value from memory
into a register. The current verifier code first checks via
check_mem_access whether we can access the memory, and then checks
via check_reg_arg whether we can write into the register.

For loads, check_reg_arg has the side-effect of marking the
register's value as unkonwn, and check_mem_access has the side effect
of propagating bounds from memory to the register.

Therefore with the current order, bounds information is thrown away,
but by simply reversing the order of check_reg_arg
vs. check_mem_access, we can instead propagate bounds smartly.

A simple test is added with an infinite loop that can only be proved
unreachable if this propagation is present. This is implemented both
with C and directly in test_verifier using assembly.

Suggested-by: John Fastabend 
Signed-off-by: Brendan Jackman 

---

Difference from v1->v2:

 * Reworked commit message to clarify this only affects stack memory
 * Added the Suggested-by
 * Added a C-based test.

 kernel/bpf/verifier.c | 32 +++
 .../selftests/bpf/prog_tests/atomic_bounds.c  | 15 +
 .../selftests/bpf/progs/atomic_bounds.c   | 15 +
 .../selftests/bpf/verifier/atomic_bounds.c| 27 
 4 files changed, 75 insertions(+), 14 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomic_bounds.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_bounds.c

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 972fc38eb62d..5e09632efddb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3665,9 +3665,26 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
return -EACCES;
}

+   if (insn->imm & BPF_FETCH) {
+   if (insn->imm == BPF_CMPXCHG)
+   load_reg = BPF_REG_0;
+   else
+   load_reg = insn->src_reg;
+
+   /* check and record load of old value */
+   err = check_reg_arg(env, load_reg, DST_OP);
+   if (err)
+   return err;
+   } else {
+   /* This instruction accesses a memory location but doesn't
+* actually load it into a register.
+*/
+   load_reg = -1;
+   }
+
/* check whether we can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-  BPF_SIZE(insn->code), BPF_READ, -1, true);
+  BPF_SIZE(insn->code), BPF_READ, load_reg, true);
if (err)
return err;

@@ -3677,19 +3694,6 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
if (err)
return err;

-   if (!(insn->imm & BPF_FETCH))
-   return 0;
-
-   if (insn->imm == BPF_CMPXCHG)
-   load_reg = BPF_REG_0;
-   else
-   load_reg = insn->src_reg;
-
-   /* check and record load of old value */
-   err = check_reg_arg(env, load_reg, DST_OP);
-   if (err)
-   return err;
-
return 0;
 }

diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c 
b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
new file mode 100644
index ..addf127068e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomic_bounds.skel.h"
+
+void test_atomic_bounds(void)
+{
+   struct atomic_bounds *skel;
+   __u32 duration = 0;
+
+   skel = atomic_bounds__open_and_load();
+   if (CHECK(!skel, "skel_load", "couldn't load program\n"))
+   return;
+}
diff --git a/tools/testing/selftests/bpf/progs/atomic_bounds.c 
b/tools/testing/selftests/bpf/progs/atomic_bounds.c
new file mode 100644
index ..ea2e982c7f3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/atomic_bounds.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+   int a = 0;
+   int b = __sync_fetch_and_add(&a, 1);
+   /* b is certainly 0 here. Can the verifier tell? */
+   while (b)
+   continue;
+   return 0;
+}
diff --git a/tools/testing/selftests/bpf/verifier/atomic_bounds.c 
b/tools/testing/selftests/bpf/verifier/atomic_bounds.c
new file mode 100644
index ..e82183e4914f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_bounds.c
@@ -0,0 +1,27 @@
+{
+   "BPF_ATOMIC bounds propagation, mem->reg",
+   .insns = {
+   /* a = 0; */
+   /*
+* No

Re: [PATCH bpf-next v2] bpf: Propagate memory bounds to registers in atomics w/ BPF_FETCH

2021-02-02 Thread Brendan Jackman

On Tue, 2 Feb 2021 at 00:27, Alexei Starovoitov
 wrote:
>
> On Mon, Feb 1, 2021 at 7:00 AM Brendan Jackman  wrote:
> > +
> > +SEC("fentry/bpf_fentry_test1")
> > +int BPF_PROG(sub, int x)
> > +{
> > +   int a = 0;
> > +   int b = __sync_fetch_and_add(&a, 1);
>
> It probably needs ENABLE_ATOMICS_TESTS ?
>
> Otherwise clang without -mcpu=v3 will complain:
> "fatal error: error in backend: Invalid usage of the XADD return value"

Ah yep of course, thanks for spotting. v3 incoming...

[RFC] security: replace indirect calls with static calls

2020-08-20 Thread Brendan Jackman

  MACRO(2, x, y)
MACRO(3, x, y)
MACRO(4, x, y)

This is used in conjunction with LSM_HOOK definitions in
linux/lsm_hook_defs.h to execute a macro for each static slot of each LSM
hook.

The patches for static calls [6] are not upstreamed yet.

The number of available slots for each LSM hook is currently fixed at
11 (the number of LSMs in the kernel). Ideally, it should automatically
adapt to the number of LSMs compiled into the kernel.

If there’s no practical way to implement such automatic adaptation, an
option instead would be to remove the panic call by falling-back to the old
linked-list mechanism, which is still present anyway (see below).

A few special cases of LSM don't use the macro call_[int/void]_hook but
have their own calling logic. The linked-lists are kept as a possible slow
path fallback for them.

Before:

https://gist.githubusercontent.com/PaulRenauld/fe3ee7b51121556e03c181432c8b3dd5/raw/62437b1416829ca0e8a0ed9101530bc90fd42d69/lsm-performance.png

After:

https://gist.githubusercontent.com/PaulRenauld/fe3ee7b51121556e03c181432c8b3dd5/raw/00e414b73e0c38c2eae8f05d5363a745179ba285/faster-lsm-results.png

With this implementation, any overhead of the indirect call in the LSM
framework is completely mitigated (performance results: [7]). This
facilitates the adoption of "bpf" LSM on production machines and also
benefits all other LSMs.

[1]: https://lwn.net/ml/linux-kernel/20200710133831.943894...@infradead.org/
[2]: https://lwn.net/Articles/798157/
[3] measurements: 
https://gist.githubusercontent.com/PaulRenauld/fe3ee7b51121556e03c181432c8b3dd5/raw/62437b1416829ca0e8a0ed9101530bc90fd42d69/lsm-performance.png
protocol: 
https://gist.github.com/PaulRenauld/fe3ee7b51121556e03c181432c8b3dd5#file-measurement-protocol-md
[4]: https://lwn.net/Articles/813261/
[5]: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git 
x86/static_call
[6]: https://lwn.net/ml/linux-kernel/20200710133831.943894...@infradead.org/#t
[7]: 
https://gist.githubusercontent.com/PaulRenauld/fe3ee7b51121556e03c181432c8b3dd5/raw/00e414b73e0c38c2eae8f05d5363a745179ba285/faster-lsm-results.png

Cc: Alexei Starovoitov 
Cc: Daniel Borkmann 
Cc: James Morris 
Cc: p...@google.com
Cc: ja...@google.com
Cc: pet...@infradead.org
Cc: rafael.j.wyso...@intel.com
Cc: keesc...@chromium.org
Cc: thgar...@chromium.org
Cc: kpsi...@google.com
Cc: paul.renauld.e...@gmail.com

Signed-off-by: Paul Renauld 
Signed-off-by: KP Singh 
Signed-off-by: Brendan Jackman 
---
 include/linux/lsm_hooks.h   |   1 +
 include/linux/lsm_static_call.h | 134 
 security/security.c | 217 
 3 files changed, 331 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/lsm_static_call.h

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 95b7c1d32062..d11e116b588e 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1524,6 +1524,7 @@ union security_list_options {
#define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__);
#include "lsm_hook_defs.h"
#undef LSM_HOOK
+   void *generic_func;
 };
 
 struct security_hook_heads {
diff --git a/include/linux/lsm_static_call.h b/include/linux/lsm_static_call.h
new file mode 100644
index ..f5f5698292e0
--- /dev/null
+++ b/include/linux/lsm_static_call.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#ifndef __LINUX_LSM_STATIC_CALL_H
+#define __LINUX_LSM_STATIC_CALL_H
+
+/*
+ * Static slots are used in security/security.c to avoid costly
+ * indirect calls by replacing them with static calls.
+ * The number of static calls for each LSM hook is fixed.
+ */
+#define SECURITY_STATIC_SLOT_COUNT 11
+
+/*
+ * Identifier for the LSM static slots.
+ * HOOK is an LSM hook as defined in linux/lsm_hookdefs.h
+ * IDX is the index of the slot. 0 <= NUM < SECURITY_STATIC_SLOT_COUNT
+ */
+#define STATIC_SLOT(HOOK, IDX) security_static_slot_##HOOK##_##IDX
+
+/*
+ * Call the macro M for each LSM hook slot.
+ * M should take as first argument the index and then
+ * the same __VA_ARGS__
+ * Essentially, this will expand to:
+ * M(0, ...)
+ * M(1, ...)
+ * M(2, ...)
+ * ...
+ * Note that no trailing semicolon is placed so M should be defined
+ * accordingly.
+ * This adapts to a change to SECURITY_STATIC_SLOT_COUNT.
+ */
+#define SECURITY_FOREACH_STATIC_SLOT(M, ...)   \
+   UNROLL_MACRO_LOOP(SECURITY_STATIC_SLOT_COUNT, M, __VA_ARGS__)
+
+/*
+ * Intermediate macros to expand SECURITY_STATIC_SLOT_COUNT
+ */
+#define UNROLL_MACRO_LOOP(N, MACRO, ...)   \
+   _UNROLL_MACRO_LOOP(N, MACRO, __VA_ARGS__)
+
+#define _UNROLL_MACRO_LOOP(N, MACRO, ...)  \
+   __UNROLL_MACRO_LOOP(N, MACRO, __VA_ARGS__)
+
+#define __UNROLL_MACRO_LOOP(N, MACRO, ...) \
+   __UNROLL_MACRO_LOOP_##N(MACRO, __VA_ARGS__)
+
+#defin

[PATCH bpf-next] bpf: Clarify return value of probe str helpers

2021-01-12 Thread Brendan Jackman

When the buffer is too small to contain the input string, these
helpers return the length of the buffer, not the length of the
original string. This tries to make the docs totally clear about
that, since "the length of the [copied ]string" could also refer to
the length of the input.

Signed-off-by: Brendan Jackman 
---
 include/uapi/linux/bpf.h   | 10 +-
 tools/include/uapi/linux/bpf.h | 10 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1bb2923..a1ad32456f89 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2993,10 +2993,10 @@ union bpf_attr {
  * string length is larger than *size*, just *size*-1 bytes are
  * copied and the last byte is set to NUL.
  *
- * On success, the length of the copied string is returned. This
- * makes this helper useful in tracing programs for reading
- * strings, and more importantly to get its length at runtime. See
- * the following snippet:
+ * On success, returns the number of bytes that were written,
+ * including the terminal NUL. This makes this helper useful in
+ * tracing programs for reading strings, and more importantly to
+ * get its length at runtime. See the following snippet:
  *
  * ::
  *
@@ -3024,7 +3024,7 @@ union bpf_attr {
  * **->mm->env_start**: using this helper and the return value,
  * one can quickly iterate at the right offset of the memory area.
  * Return
- * On success, the strictly positive length of the string,
+ * On success, the strictly positive length of the output string,
  * including the trailing NUL character. On error, a negative
  * value.
  *
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1bb2923..a1ad32456f89 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2993,10 +2993,10 @@ union bpf_attr {
  * string length is larger than *size*, just *size*-1 bytes are
  * copied and the last byte is set to NUL.
  *
- * On success, the length of the copied string is returned. This
- * makes this helper useful in tracing programs for reading
- * strings, and more importantly to get its length at runtime. See
- * the following snippet:
+ * On success, returns the number of bytes that were written,
+ * including the terminal NUL. This makes this helper useful in
+ * tracing programs for reading strings, and more importantly to
+ * get its length at runtime. See the following snippet:
  *
  * ::
  *
@@ -3024,7 +3024,7 @@ union bpf_attr {
  * **->mm->env_start**: using this helper and the return value,
  * one can quickly iterate at the right offset of the memory area.
  * Return
- * On success, the strictly positive length of the string,
+ * On success, the strictly positive length of the output string,
  * including the trailing NUL character. On error, a negative
  * value.
  *

base-commit: e22d7f05e445165e58feddb4e40cc9c0f94453bc
--
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next] bpf: Fix a verifier message for alloc size helper arg

2021-01-12 Thread Brendan Jackman

The error message here is misleading, the argument will be rejected
unless it is a known constant.

Signed-off-by: Brendan Jackman 
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 17270b8404f1..5534e667bdb1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4319,7 +4319,7 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 arg,
err = mark_chain_precision(env, regno);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
-   verbose(env, "R%d unbounded size, use 'var &= const' or 
'if (var < const)'\n",
+   verbose(env, "R%d is not a known constant'\n",
regno);
return -EACCES;
}

base-commit: e22d7f05e445165e58feddb4e40cc9c0f94453bc
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next] libbpf: Expose libbpf ringbufer epoll_fd

2020-12-11 Thread Brendan Jackman

This allows the user to do their own manual polling in more
complicated setups.

Signed-off-by: Brendan Jackman 
---
 tools/lib/bpf/libbpf.h  | 1 +
 tools/lib/bpf/ringbuf.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6909ee81113a..cde07f64771e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -536,6 +536,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int 
map_fd,
ring_buffer_sample_fn sample_cb, void *ctx);
 LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__epoll_fd(struct ring_buffer *rb);
 
 /* Perf buffer APIs */
 struct perf_buffer;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..45a36648b403 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -282,3 +282,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int 
timeout_ms)
}
return cnt < 0 ? -errno : res;
 }
+
+/* Get an fd that can be used to sleep until data is available in the ring(s) 
*/
+int ring_buffer__epoll_fd(struct ring_buffer *rb)
+{
+   return rb->epoll_fd;
+}

base-commit: b4fe9fec51ef48011f11c2da4099f0b530449c92
-- 
2.29.2.576.ga3fc446d84-goog

Re: [PATCH bpf-next v3 12/14] bpf: Pull tools/build/feature biz into selftests Makefile

2020-12-07 Thread Brendan Jackman

On Fri, Dec 04, 2020 at 11:00:24AM -0800, Andrii Nakryiko wrote:
> On Fri, Dec 4, 2020 at 1:41 AM Brendan Jackman  wrote:
> >
> > On Thu, Dec 03, 2020 at 01:01:27PM -0800, Andrii Nakryiko wrote:
> > > On Thu, Dec 3, 2020 at 8:07 AM Brendan Jackman  
> > > wrote:
> > > >
> > > > This is somewhat cargo-culted from the libbpf build. It will be used
> > > > in a subsequent patch to query for Clang BPF atomics support.
> > > >
> > > > Change-Id: I9318a1702170eb752acced35acbb33f45126c44c
> > >
> > > Haven't seen this before. What's this Change-Id business?
> >
> > Argh, apologies. Looks like it's time for me to adopt a less error-prone
> > workflow for sending patches.
> >
> > (This is noise from Gerrit, which we sometimes use for internal reviews)
> >
> > > > Signed-off-by: Brendan Jackman 
> > > > ---
> > > >  tools/testing/selftests/bpf/.gitignore |  1 +
> > > >  tools/testing/selftests/bpf/Makefile   | 38 ++
> > > >  2 files changed, 39 insertions(+)
> > >
> > > All this just to detect the support for clang atomics?... Let's not
> > > pull in the entire feature-detection framework unnecessarily,
> > > selftests Makefile is complicated enough without that.
> >
> > Then the test build would break for people who haven't updated Clang.
> > Is that acceptable?
> >
> > I'm aware of cases where you need to be on a pretty fresh Clang for
> > tests to _pass_ so maybe it's fine.
> 
> I didn't mean to drop any detection of this new feature. I just didn't
> want a new dependency on tools' feature probing framework. See
> IS_LITTLE_ENDIAN and get_sys_includes, we already have various feature
> detection-like stuff in there. So we can do this with a one-liner. I
> just want to keep it simple. Thanks.

Ah right gotcha. Then yeah I think we can do this:

 BPF_ATOMICS_SUPPORTED = $(shell \
echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(&x, 
1, 2); }" \
| $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)

Re: [PATCH bpf-next v3 10/14] bpf: Add bitwise atomic instructions

2020-12-07 Thread Brendan Jackman

On Fri, Dec 04, 2020 at 07:21:22AM -0800, Yonghong Song wrote:
> 
> 
> On 12/4/20 1:36 AM, Brendan Jackman wrote:
> > On Thu, Dec 03, 2020 at 10:42:19PM -0800, Yonghong Song wrote:
> > > 
> > > 
> > > On 12/3/20 8:02 AM, Brendan Jackman wrote:
> > > > This adds instructions for
> > > > 
> > > > atomic[64]_[fetch_]and
> > > > atomic[64]_[fetch_]or
> > > > atomic[64]_[fetch_]xor
> > > > 
> > > > All these operations are isomorphic enough to implement with the same
> > > > verifier, interpreter, and x86 JIT code, hence being a single commit.
> > > > 
> > > > The main interesting thing here is that x86 doesn't directly support
> > > > the fetch_ version these operations, so we need to generate a CMPXCHG
> > > > loop in the JIT. This requires the use of two temporary registers,
> > > > IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.
> > > > 
> > > > Change-Id: I340b10cecebea8cb8a52e3606010cde547a10ed4
> > > > Signed-off-by: Brendan Jackman 
> > > > ---
> > > >arch/x86/net/bpf_jit_comp.c  | 50 +-
> > > >include/linux/filter.h   | 60 
> > > > 
> > > >kernel/bpf/core.c|  5 ++-
> > > >kernel/bpf/disasm.c  | 21 ++---
> > > >kernel/bpf/verifier.c|  6 
> > > >tools/include/linux/filter.h | 60 
> > > > 
> > > >6 files changed, 196 insertions(+), 6 deletions(-)
> > > > 
> > [...]
> > > > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > > > index 6186280715ed..698f82897b0d 100644
> > > > --- a/include/linux/filter.h
> > > > +++ b/include/linux/filter.h
> > > > @@ -280,6 +280,66 @@ static inline bool insn_is_zext(const struct 
> > > > bpf_insn *insn)
> > [...]
> > > > +#define BPF_ATOMIC_FETCH_XOR(SIZE, DST, SRC, OFF)  \
> > > > +   ((struct bpf_insn) {\
> > > > +   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
> > > > +   .dst_reg = DST, \
> > > > +   .src_reg = SRC, \
> > > > +   .off   = OFF,   \
> > > > +   .imm   = BPF_XOR | BPF_FETCH })
> > > > +
> > > >/* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) 
> > > > */
> > > 
> > > Looks like BPF_ATOMIC_XOR/OR/AND/... all similar to each other.
> > > The same is for BPF_ATOMIC_FETCH_XOR/OR/AND/...
> > > 
> > > I am wondering whether it makes sence to have to
> > > BPF_ATOMIC_BOP(BOP, SIZE, DST, SRC, OFF) and
> > > BPF_ATOMIC_FETCH_BOP(BOP, SIZE, DST, SRC, OFF)
> > > can have less number of macros?
> > 
> > Hmm yeah I think that's probably a good idea, it would be consistent
> > with the macros for non-atomic ALU ops.
> > 
> > I don't think 'BOP' would be very clear though, 'ALU' might be more
> > obvious.
> 
> BPF_ATOMIC_ALU and BPF_ATOMIC_FETCH_ALU indeed better.

On second thoughts I think it feels right (i.e. it would be roughly
consistent with the level of abstraction of the rest of this macro API)
to go further and just have two macros BPF_ATOMIC64 and BPF_ATOMIC32:

/*
 * Atomic ALU ops:
 *
 *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
 *   BPF_AND  *(uint *) (dst_reg + off16) &= src_reg
 *   BPF_OR   *(uint *) (dst_reg + off16) |= src_reg
 *   BPF_XOR  *(uint *) (dst_reg + off16) ^= src_reg
 *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + 
off16, src_reg);
 *   BPF_AND | BPF_FETCH  src_reg = atomic_fetch_and(dst_reg + 
off16, src_reg);
 *   BPF_OR | BPF_FETCH   src_reg = atomic_fetch_or(dst_reg + 
off16, src_reg);
 *   BPF_XOR | BPF_FETCH  src_reg = atomic_fetch_xor(dst_reg + 
off16, src_reg);
 *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, 
src_reg)
 *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, 
src_reg)
 */

#define BPF_ATOMIC64(OP, DST, SRC, OFF) \
((struct bpf_insn) {\

Re: [PATCH bpf-next v3 13/14] bpf: Add tests for new BPF atomic operations

2020-12-07 Thread Brendan Jackman

On Fri, Dec 04, 2020 at 11:49:22AM -0800, Andrii Nakryiko wrote:
> On Fri, Dec 4, 2020 at 7:29 AM Yonghong Song  wrote:
> >
> >
> >
> > On 12/4/20 1:45 AM, Brendan Jackman wrote:
> > > On Thu, Dec 03, 2020 at 11:06:31PM -0800, Yonghong Song wrote:
> > >> On 12/3/20 8:02 AM, Brendan Jackman wrote:
> > > [...]
> > >>> diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c 
> > >>> b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
> > >>> new file mode 100644
> > >>> index ..66f0ccf4f4ec
> > >>> --- /dev/null
> > >>> +++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
> > >>> @@ -0,0 +1,262 @@
> > >>> +// SPDX-License-Identifier: GPL-2.0
> > >>> +
> > >>> +#include 
> > >>> +
> > >>> +
> > >>> +#include "atomics_test.skel.h"
> > >>> +
> > >>> +static struct atomics_test *setup(void)
> > >>> +{
> > >>> +   struct atomics_test *atomics_skel;
> > >>> +   __u32 duration = 0, err;
> > >>> +
> > >>> +   atomics_skel = atomics_test__open_and_load();
> > >>> +   if (CHECK(!atomics_skel, "atomics_skel_load", "atomics skeleton 
> > >>> failed\n"))
> > >>> +   return NULL;
> > >>> +
> > >>> +   if (atomics_skel->data->skip_tests) {
> > >>> +   printf("%s:SKIP:no ENABLE_ATOMICS_TEST (missing Clang BPF 
> > >>> atomics support)",
> > >>> +  __func__);
> > >>> +   test__skip();
> > >>> +   goto err;
> > >>> +   }
> > >>> +
> > >>> +   err = atomics_test__attach(atomics_skel);
> > >>> +   if (CHECK(err, "atomics_attach", "atomics attach failed: %d\n", 
> > >>> err))
> > >>> +   goto err;
> > >>> +
> > >>> +   return atomics_skel;
> > >>> +
> > >>> +err:
> > >>> +   atomics_test__destroy(atomics_skel);
> > >>> +   return NULL;
> > >>> +}
> > >>> +
> > >>> +static void test_add(void)
> > >>> +{
> > >>> +   struct atomics_test *atomics_skel;
> > >>> +   int err, prog_fd;
> > >>> +   __u32 duration = 0, retval;
> > >>> +
> > >>> +   atomics_skel = setup();
> > >>
> > >> When running the test, I observed a noticeable delay between skel load 
> > >> and
> > >> skel attach. The reason is the bpf program object file contains
> > >> multiple programs and the above setup() tries to do attachment
> > >> for ALL programs but actually below only "add" program is tested.
> > >> This will unnecessarily increase test_progs running time.
> > >>
> > >> The best is for setup() here only load and attach program "add".
> > >> The libbpf API bpf_program__set_autoload() can set a particular
> > >> program not autoload. You can call attach function explicitly
> > >> for one specific program. This should be able to reduce test
> > >> running time.
> > >
> > > Interesting, thanks a lot - I'll try this out next week. Maybe we can
> > > actually load all the progs once at the beginning (i.e. in
> >
> > If you have subtest, people expects subtest can be individual runable.
> > This will complicate your logic.
> >
> > > test_atomics_test) then attach/detch each prog individually as needed...
> > > Sorry, I haven't got much of a grip on libbpf yet.
> >
> > One alternative is not to do subtests. There is nothing run to have
> > just one bpf program instead of many. This way, you load all and attach
> > once, then do all the test verification.
> 
> I think subtests are good for debuggability, at least. But in this
> case it's very easy to achieve everything you've discussed:
> 
> 1. do open() right there in test_atomics_test()  (btw, consider naming
> the test just "atomics" or "atomic_insns" or something, no need for
> test-test tautology)
> 2. check if needs skipping, skip entire test
> 3. if not skipping, load
> 4. then pass the same instance of the skeleton to each subtest
> 5. each subtest will
>   5a. bpf_prog__attach(skel->prog.my_specific_subtest_prog);
>   5b. trigger and do checks
>   5c. bpf_link__destroy();

Thanks, this seems like the way forward to me.

[PATCH bpf-next v4 00/11] Atomics for eBPF

2020-12-07 Thread Brendan Jackman

Status of the patches
=

Thanks for the reviews! Differences from v3->v4 [1]:

* Added one Ack from Yonghong. He acked some other patches but those
  have now changed non-trivally so I didn't add those acks.

* Fixups to commit messages.

* Fixed disassembly and comments: first arg to atomic_fetch_* is a
  pointer.

* Improved prog_test efficiency. BPF progs are now all loaded in a
  single call, then the skeleton is re-used for each subtest.

* Dropped use of tools/build/feature in favour of a one-liner in the
  Makefile.

* Dropped the commit that created an emit_neg helper in the x86
  JIT. It's not used any more (it wasn't used in v3 either).

* Combined all the different filter.h macros (used to be
  BPF_ATOMIC_ADD, BPF_ATOMIC_FETCH_ADD, BPF_ATOMIC_AND, etc) into
  just BPF_ATOMIC32 and BPF_ATOMIC64.

* Removed some references to BPF_STX_XADD from tools/, samples/ and
  lib/ that I missed before.

Differences from v2->v3 [1]:

* More minor fixes and naming/comment changes

* Dropped atomic subtract: compilers can implement this by preceding
  an atomic add with a NEG instruction (which is what the x86 JIT did
  under the hood anyway).

* Dropped the use of -mcpu=v4 in the Clang BPF command-line; there is
  no longer an architecture version bump. Instead a feature test is
  added to Kbuild - it builds a source file to check if Clang
  supports BPF atomics.

* Fixed the prog_test so it no longer breaks
  test_progs-no_alu32. This requires some ifdef acrobatics to avoid
  complicating the prog_tests model where the same userspace code
  exercises both the normal and no_alu32 BPF test objects, using the
  same skeleton header.

Differences from v1->v2 [1]:

* Fixed mistakes in the netronome driver

* Addd sub, add, or, xor operations

* The above led to some refactors to keep things readable. (Maybe I
  should have just waited until I'd implemented these before starting
  the review...)

* Replaced BPF_[CMP]SET | BPF_FETCH with just BPF_[CMP]XCHG, which
  include the BPF_FETCH flag

* Added a bit of documentation. Suggestions welcome for more places
  to dump this info...

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was
https://reviews.llvm.org/D72184).

This only includes a JIT implementation for x86_64 - I don't plan to
implement JIT support myself for other architectures.

Operations
==

This patchset adds atomic operations to the eBPF instruction set. The
use-case that motivated this work was a trivial and efficient way to
generate globally-unique cookies in BPF progs, but I think it's
obvious that these features are pretty widely applicable.  The
instructions that are added here can be summarised with this list of
kernel operations:

* atomic[64]_[fetch_]add
* atomic[64]_[fetch_]and
* atomic[64]_[fetch_]or
* atomic[64]_xchg
* atomic[64]_cmpxchg

The following are left out of scope for this effort:

* 16 and 8 bit operations
* Explicit memory barriers

Encoding


I originally planned to add new values for bpf_insn.opcode. This was
rather unpleasant: the opcode space has holes in it but no entire
instruction classes[2]. Yonghong Song had a better idea: use the
immediate field of the existing STX XADD instruction to encode the
operation. This works nicely, without breaking existing programs,
because the immediate field is currently reserved-must-be-zero, and
extra-nicely because BPF_ADD happens to be zero.

Note that this of course makes immediate-source atomic operations
impossible. It's hard to imagine a measurable speedup from such
instructions, and if it existed it would certainly not benefit x86,
which has no support for them.

The BPF_OP opcode fields are re-used in the immediate, and an
additional flag BPF_FETCH is used to mark instructions that should
fetch a pre-modification value from memory.

So, BPF_XADD is now called BPF_ATOMIC (the old name is kept to avoid
breaking userspace builds), and where we previously had .imm = 0, we
now have .imm = BPF_ADD (which is 0).

Operands


Reg-source eBPF instructions only have two operands, while these
atomic operations have up to four. To avoid needing to encode
additional operands, then:

- One of the input registers is re-used as an output register
  (e.g. atomic_fetch_add both reads from and writes to the source
  register).

- Where necessary (i.e. for cmpxchg) , R0 is "hard-coded" as one of
  the operands.

This approach also allows the new eBPF instructions to map directly
to single x86 instructions.

[1] Previous iterations:
v1: 
https://lore.kernel.org/bpf/20201123173202.1335708-1-jackm...@google.com/
v2: 
https://lore.kernel.org/bpf/20201127175738.1085417-1-jackm...@google.com/
v3: https://lore.kernel.org/bpf/x8kn7na7bjc7a...@google.com/

[2] Visualisation of eBPF opcode space:
https://gist.github.com/bjackman/00fdad2d5dfff601c1918bc29b16e778

Brendan Jackman (11):
  bpf

[PATCH bpf-next v4 01/11] bpf: x86: Factor out emission of ModR/M for *(reg + off)

2020-12-07 Thread Brendan Jackman

The case for JITing atomics is about to get more complicated. Let's
factor out some common code to make the review and result more
readable.

NB the atomics code doesn't yet use the new helper - a subsequent
patch will add its use as a side-effect of other changes.

Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 42 +
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 796506dcfc42..cc818ed7c2b9 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -681,6 +681,27 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 
dst_reg, u32 src_reg)
*pprog = prog;
 }
 
+/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
+static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is_imm8(off)) {
+   /* 1-byte signed displacement.
+*
+* If off == 0 we could skip this and save one extra byte, but
+* special case of x86 R13 which always needs an offset is not
+* worth the hassle
+*/
+   EMIT2(add_2reg(0x40, ptr_reg, val_reg), off);
+   } else {
+   /* 4-byte signed displacement */
+   EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off);
+   }
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -708,15 +729,7 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
break;
}
-   /*
-* If insn->off == 0 we can save one extra byte, but
-* special case of x86 R13 which always needs an offset
-* is not worth the hassle
-*/
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+   emit_insn_suffix(&prog, src_reg, dst_reg, off);
*pprog = prog;
 }
 
@@ -751,10 +764,7 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
break;
}
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+   emit_insn_suffix(&prog, dst_reg, src_reg, off);
*pprog = prog;
 }
 
@@ -1240,11 +1250,7 @@ st:  if (is_imm8(insn->off))
goto xadd;
case BPF_STX | BPF_XADD | BPF_DW:
EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
-xadd:  if (is_imm8(insn->off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), 
insn->off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-   insn->off);
+xadd:  emit_modrm_dstoff(&prog, dst_reg, src_reg, insn->off);
break;
 
/* call */
-- 
2.29.2.576.ga3fc446d84-goog

[PATCH bpf-next v4 05/11] bpf: Move BPF_STX reserved field check into BPF_STX verifier code

2020-12-07 Thread Brendan Jackman

I can't find a reason why this code is in resolve_pseudo_ldimm64;
since I'll be modifying it in a subsequent commit, tidy it up.

Signed-off-by: Brendan Jackman 
---
 kernel/bpf/verifier.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 615be10abd71..745c53df0485 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9527,6 +9527,12 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
+   if (((BPF_MODE(insn->code) != BPF_MEM &&
+ BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm 
!= 0)) {
+   verbose(env, "BPF_STX uses reserved fields\n");
+   return -EINVAL;
+   }
+
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
if (err)
@@ -9939,13 +9945,6 @@ static int resolve_pseudo_ldimm64(struct 
bpf_verifier_env *env)
return -EINVAL;
}
 
-   if (BPF_CLASS(insn->code) == BPF_STX &&
-   ((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) {
-   verbose(env, "BPF_STX uses reserved fields\n");
-   return -EINVAL;
-   }
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
-- 
2.29.2.576.ga3fc446d84-goog

[PATCH bpf-next v4 08/11] bpf: Pull out a macro for interpreting atomic ALU operations

2020-12-07 Thread Brendan Jackman

Since the atomic operations that are added in subsequent commits are
all isomorphic with BPF_ADD, pull out a macro to avoid the
interpreter becoming dominated by lines of atomic-related code.

Note that this sacrificies interpreter performance (combining
STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we
need an extra conditional branch to differentiate them) in favour of
compact and (relatively!) simple C code.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 kernel/bpf/core.c | 80 +++
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 28f960bc2e30..1d9e5dcde03a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1618,55 +1618,53 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
LDX_PROBE(DW, 8)
 #undef LDX_PROBE
 
-   STX_ATOMIC_W:
-   switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
-   atomic_add((u32) SRC, (atomic_t *)(unsigned long)
-  (DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u32) atomic_fetch_add(
-   (u32) SRC,
-   (atomic_t *)(unsigned long) (DST + insn->off));
-   break;
-   case BPF_XCHG:
-   SRC = (u32) atomic_xchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) SRC);
-   break;
-   case BPF_CMPXCHG:
-   BPF_R0 = (u32) atomic_cmpxchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) BPF_R0, (u32) SRC);
+#define ATOMIC_ALU_OP(BOP, KOP)
\
+   case BOP:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   atomic_##KOP((u32) SRC, (atomic_t *)(unsigned 
long) \
+(DST + insn->off));\
+   else\
+   atomic64_##KOP((u64) SRC, (atomic64_t 
*)(unsigned long) \
+  (DST + insn->off));  \
+   break;  \
+   case BOP | BPF_FETCH:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   SRC = (u32) atomic_fetch_##KOP( \
+   (u32) SRC,  \
+   (atomic_t *)(unsigned long) (DST + 
insn->off)); \
+   else\
+   SRC = (u64) atomic64_fetch_##KOP(   \
+   (u64) SRC,  \
+   (atomic64_t *)(s64) (DST + insn->off)); 
\
break;
-   default:
-   goto default_label;
-   }
-   CONT;
 
STX_ATOMIC_DW:
+   STX_ATOMIC_W:
switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
-   atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
-(DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u64) atomic64_fetch_add(
-   (u64) SRC,
-   (atomic64_t *)(s64) (DST + insn->off));
-   break;
+   ATOMIC_ALU_OP(BPF_ADD, add)
+#undef ATOMIC_ALU_OP
+
case BPF_XCHG:
-   SRC = (u64) atomic64_xchg(
-   (atomic64_t *)(u64) (DST + insn->off),
-   (u64) SRC);
+   if (BPF_SIZE(insn->code) == BPF_W)
+   SRC = (u32) atomic_xchg(
+   (atomic_t *)(unsigned long) (DST + 
insn->off),
+   (u32) SRC);
+   else
+   SRC = (u64) atomic64_xchg(
+   (atomic64_t *)(u64) (DST + insn->off),
+   (u64) SRC);
break;
case BPF_CMPXCHG:
-   BPF_R0 = (u64) atomic64_cmpxchg(
-

[PATCH bpf-next v4 02/11] bpf: x86: Factor out emission of REX byte

2020-12-07 Thread Brendan Jackman

The JIT case for encoding atomic ops is about to get more
complicated. In order to make the review & resulting code easier,
let's factor out some shared helpers.

Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 39 ++---
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index cc818ed7c2b9..7106cfd10ba6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -702,6 +702,21 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 
val_reg, int off)
*pprog = prog;
 }
 
+/*
+ * Emit a REX byte if it will be necessary to address these registers
+ */
+static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is64)
+   EMIT1(add_2mod(0x48, dst_reg, src_reg));
+   else if (is_ereg(dst_reg) || is_ereg(src_reg))
+   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -854,10 +869,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_OR: b2 = 0x09; break;
case BPF_XOR: b2 = 0x31; break;
}
-   if (BPF_CLASS(insn->code) == BPF_ALU64)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_ALU64);
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1301,20 +1314,16 @@ xadd:   emit_modrm_dstoff(&prog, 
dst_reg, src_reg, insn->off);
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* cmp dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
/* test dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
@@ -1350,10 +1359,8 @@ xadd:emit_modrm_dstoff(&prog, 
dst_reg, src_reg, insn->off);
case BPF_JMP32 | BPF_JSLE | BPF_K:
/* test dst_reg, dst_reg to save one extra byte */
if (imm32 == 0) {
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, dst_reg));
-   else if (is_ereg(dst_reg))
-   EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+   maybe_emit_mod(&prog, dst_reg, dst_reg,
+  BPF_CLASS(insn->code) == 
BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
goto emit_cond_jmp;
}
-- 
2.29.2.576.ga3fc446d84-goog

[PATCH bpf-next v4 03/11] bpf: x86: Factor out a lookup table for some ALU opcodes

2020-12-07 Thread Brendan Jackman

A later commit will need to lookup a subset of these opcodes. To
avoid duplicating code, pull out a table.

The shift opcodes won't be needed by that later commit, but they're
already duplicated, so fold them into the table anyway.

Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7106cfd10ba6..f0c98fd275e5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -205,6 +205,18 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+/* Some 1-byte opcodes for binary ALU operations */
+static u8 simple_alu_opcodes[] = {
+   [BPF_ADD] = 0x01,
+   [BPF_SUB] = 0x29,
+   [BPF_AND] = 0x21,
+   [BPF_OR] = 0x09,
+   [BPF_XOR] = 0x31,
+   [BPF_LSH] = 0xE0,
+   [BPF_RSH] = 0xE8,
+   [BPF_ARSH] = 0xF8,
+};
+
 static void jit_fill_hole(void *area, unsigned int size)
 {
/* Fill whole space with INT3 instructions */
@@ -862,15 +874,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_ALU64 | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
-   switch (BPF_OP(insn->code)) {
-   case BPF_ADD: b2 = 0x01; break;
-   case BPF_SUB: b2 = 0x29; break;
-   case BPF_AND: b2 = 0x21; break;
-   case BPF_OR: b2 = 0x09; break;
-   case BPF_XOR: b2 = 0x31; break;
-   }
maybe_emit_mod(&prog, dst_reg, src_reg,
   BPF_CLASS(insn->code) == BPF_ALU64);
+   b2 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1050,12 +1056,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
-
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
if (imm32 == 1)
EMIT2(0xD1, add_1reg(b3, dst_reg));
else
@@ -1089,11 +1090,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(0xD3, add_1reg(b3, dst_reg));
 
if (src_reg != BPF_REG_4)
-- 
2.29.2.576.ga3fc446d84-goog

[PATCH bpf-next v4 07/11] bpf: Add instructions for atomic_[cmp]xchg

2020-12-07 Thread Brendan Jackman

This adds two atomic opcodes, both of which include the BPF_FETCH
flag. XCHG without the BPF_FETCH flag would naturally encode
atomic_set. This is not supported because it would be of limited
value to userspace (it doesn't imply any barriers). CMPXCHG without
BPF_FETCH woulud be an atomic compare-and-write. We don't have such
an operation in the kernel so it isn't provided to BPF either.

There are two significant design decisions made for the CMPXCHG
instruction:

 - To solve the issue that this operation fundamentally has 3
   operands, but we only have two register fields. Therefore the
   operand we compare against (the kernel's API calls it 'old') is
   hard-coded to be R0. x86 has similar design (and A64 doesn't
   have this problem).

   A potential alternative might be to encode the other operand's
   register number in the immediate field.

 - The kernel's atomic_cmpxchg returns the old value, while the C11
   userspace APIs return a boolean indicating the comparison
   result. Which should BPF do? A64 returns the old value. x86 returns
   the old value in the hard-coded register (and also sets a
   flag). That means return-old-value is easier to JIT.

Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c|  8 
 include/linux/filter.h | 22 ++
 include/uapi/linux/bpf.h   |  4 +++-
 kernel/bpf/core.c  | 20 
 kernel/bpf/disasm.c| 15 +++
 kernel/bpf/verifier.c  | 19 +--
 tools/include/linux/filter.h   | 22 ++
 tools/include/uapi/linux/bpf.h |  4 +++-
 8 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index eea7d8b0bb12..308241187582 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
EMIT2(0x0F, 0xC1);
break;
+   case BPF_XCHG:
+   /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
+   EMIT1(0x87);
+   break;
+   case BPF_CMPXCHG:
+   /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
+   EMIT2(0x0F, 0xB1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index b5258bca10d2..e1e1fc946a7c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
  *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
+ *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
  */
 
 #define BPF_ATOMIC64(OP, DST, SRC, OFF)\
@@ -293,6 +295,26 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
.off   = OFF,   \
.imm   = BPF_ADD })
 
+/* Atomic exchange, src_reg = atomic_xchg(dst_reg + off, src_reg) */
+
+#define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)   \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_XCHG  })
+
+/* Atomic compare-exchange, r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg) */
+
+#define BPF_ATOMIC_CMPXCHG(SIZE, DST, SRC, OFF)\
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_CMPXCHG })
+
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d5389119291e..b733af50a5b9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
 #define BPF_EXIT   0x90/* function return */
 
 /* atomic op type fields (stored in immediate) */
-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
+#define BPF_CMPXCHG

[PATCH bpf-next v4 06/11] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-07 Thread Brendan Jackman

The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC
instructions, in order to have the previous value of the
atomically-modified memory location loaded into the src register
after an atomic op is carried out.

Suggested-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c|  4 
 include/linux/filter.h |  1 +
 include/uapi/linux/bpf.h   |  3 +++
 kernel/bpf/core.c  | 13 +
 kernel/bpf/disasm.c|  7 +++
 kernel/bpf/verifier.c  | 33 -
 tools/include/linux/filter.h   | 11 +++
 tools/include/uapi/linux/bpf.h |  3 +++
 8 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b1829a534da1..eea7d8b0bb12 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -811,6 +811,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
+   case BPF_ADD | BPF_FETCH:
+   /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
+   EMIT2(0x0F, 0xC1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45be19408f68..b5258bca10d2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,6 +264,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  * Atomic operations:
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
  */
 
 #define BPF_ATOMIC64(OP, DST, SRC, OFF)\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 98161e2d389f..d5389119291e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -44,6 +44,9 @@
 #define BPF_CALL   0x80/* function call */
 #define BPF_EXIT   0x90/* function return */
 
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH  0x01/* fetch previous value into src reg */
+
 /* Register numbers */
 enum {
BPF_REG_0 = 0,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3abc6b250b18..61e93eb7d363 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1624,16 +1624,29 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
/* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
atomic_add((u32) SRC, (atomic_t *)(unsigned long)
   (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u32) atomic_fetch_add(
+   (u32) SRC,
+   (atomic_t *)(unsigned long) (DST + insn->off));
+   break;
default:
goto default_label;
}
CONT;
+
STX_ATOMIC_DW:
switch (IMM) {
case BPF_ADD:
/* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
 (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u64) atomic64_fetch_add(
+   (u64) SRC,
+   (atomic64_t *)(s64) (DST + insn->off));
+   break;
default:
goto default_label;
}
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 37c8d6e9b4cc..d2e20f6d0516 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
+   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+  insn->imm == (BPF_ADD | BPF_FETCH)) {
+   verbose(cbs->private_data, "(%02x) r%d = 
atomic%s_fetch_add((%s *)(r%d %+d), r%d)\n",
+   insn->code, insn->src_reg,
+   BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
+   bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+   insn->dst_reg, insn->off, insn->src_reg);
} else {
verbose(cbs->privat

[PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-07 Thread Brendan Jackman

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

 - Avoid breaking the build for people on old versions of Clang
 - Avoid needing separate lists of test objects for no_alu32, where
   atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Signed-off-by: Brendan Jackman 
---
 tools/testing/selftests/bpf/Makefile  |  10 +
 .../selftests/bpf/prog_tests/atomics.c| 246 ++
 tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
 .../selftests/bpf/verifier/atomic_and.c   |  77 ++
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
 .../selftests/bpf/verifier/atomic_or.c|  77 ++
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 
 .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
 9 files changed, 889 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..13bc1d736164 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
\
 -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
 -I$(abspath $(OUTPUT)/../usr/include)
 
+# BPF atomics support was added to Clang in llvm-project commit 286daafd6512
+# (release 12.0.0).
+BPF_ATOMICS_SUPPORTED = $(shell \
+   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(&x, 
1, 2); }" \
+   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)
+
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
   -Wno-compare-distinct-pointer-types
 
@@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
   $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(BPF_ATOMICS_SUPPORTED),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
 TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
 TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..c841a3abc2f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog_fd = bpf_program__fd(skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NULL, &retval, &duration);
+   if (CHECK(err || retval, "test_run add",
+ "err %d errno %d retval %d duration %d\n", err, errno, 
retval, duration))
+   goto cleanup;
+
+   ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
+   ASSERT_EQ(skel->bss->add64_result, 1, "add64_result");
+
+   ASSERT_EQ(skel->data->add32_value, 3,

[PATCH bpf-next v4 11/11] bpf: Document new atomic instructions

2020-12-07 Thread Brendan Jackman

Document new atomic instructions.

Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..26d508a5e038 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,6 +1053,32 @@ encoding.
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
 
+The basic atomic operations supported (from architecture v4 onwards) are:
+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.
+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
 Note that 1 and 2 byte atomic operations are not supported.
 
 You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
-- 
2.29.2.576.ga3fc446d84-goog

[PATCH bpf-next v4 09/11] bpf: Add bitwise atomic instructions

2020-12-07 Thread Brendan Jackman

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c  | 50 ++-
 include/linux/filter.h   | 66 
 kernel/bpf/core.c|  3 ++
 kernel/bpf/disasm.c  | 21 +---
 kernel/bpf/verifier.c|  6 
 tools/include/linux/filter.h | 66 
 6 files changed, 207 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 308241187582..1d4d50199293 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -808,6 +808,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* emit opcode */
switch (atomic_op) {
case BPF_ADD:
+   case BPF_SUB:
+   case BPF_AND:
+   case BPF_OR:
+   case BPF_XOR:
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
@@ -1292,8 +1296,52 @@ st:  if (is_imm8(insn->off))
 
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+   if (insn->imm == (BPF_AND | BPF_FETCH) ||
+   insn->imm == (BPF_OR | BPF_FETCH) ||
+   insn->imm == (BPF_XOR | BPF_FETCH)) {
+   u8 *branch_target;
+   bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+
+   /*
+* Can't be implemented with a single x86 insn.
+* Need to do a CMPXCHG loop.
+*/
+
+   /* Will need RAX as a CMPXCHG operand so save 
R0 */
+   emit_mov_reg(&prog, true, BPF_REG_AX, 
BPF_REG_0);
+   branch_target = prog;
+   /* Load old value */
+   emit_ldx(&prog, BPF_SIZE(insn->code),
+BPF_REG_0, dst_reg, insn->off);
+   /*
+* Perform the (commutative) operation locally,
+* put the result in the AUX_REG.
+*/
+   emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
+   maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+   EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
+ add_2reg(0xC0, AUX_REG, src_reg));
+   /* Attempt to swap in new value */
+   err = emit_atomic(&prog, BPF_CMPXCHG,
+ dst_reg, AUX_REG, insn->off,
+ BPF_SIZE(insn->code));
+   if (WARN_ON(err))
+   return err;
+   /*
+* ZF tells us whether we won the race. If it's
+* cleared we need to try again.
+*/
+   EMIT2(X86_JNE, -(prog - branch_target) - 2);
+   /* Return the pre-modification value */
+   emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+   /* Restore R0 after clobbering RAX */
+   emit_mov_reg(&prog, true, BPF_REG_0, 
BPF_REG_AX);
+   break;
+
+   }
+
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ insn->off, 
BPF_SIZE(insn->code));
if (err)
return err;
break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index e1e1fc946a7c..e100c71555a4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,7 +264,13 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
  * Atomic operations:
  *
  *   BPF_ADD  *(uint *) (dst_reg

[PATCH bpf-next v4 04/11] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm

2020-12-07 Thread Brendan Jackman

A subsequent patch will add additional atomic operations. These new
operations will use the same opcode field as the existing XADD, with
the immediate discriminating different operations.

In preparation, rename the instruction mode BPF_ATOMIC and start
calling the zero immediate BPF_ADD.

This is possible (doesn't break existing valid BPF progs) because the
immediate field is currently reserved MBZ and BPF_ADD is zero.

All uses are removed from the tree but the BPF_XADD definition is
kept around to avoid breaking builds for people including kernel
headers.

Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst   | 30 -
 arch/arm/net/bpf_jit_32.c |  7 ++-
 arch/arm64/net/bpf_jit_comp.c | 16 +--
 arch/mips/net/ebpf_jit.c  | 11 +++--
 arch/powerpc/net/bpf_jit_comp64.c | 25 ---
 arch/riscv/net/bpf_jit_comp32.c   | 20 +++--
 arch/riscv/net/bpf_jit_comp64.c   | 16 +--
 arch/s390/net/bpf_jit_comp.c  | 27 ++-
 arch/sparc/net/bpf_jit_comp_64.c  | 17 +--
 arch/x86/net/bpf_jit_comp.c   | 45 ++-
 arch/x86/net/bpf_jit_comp32.c |  6 +--
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 14 --
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 +-
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 15 ---
 include/linux/filter.h| 29 ++--
 include/uapi/linux/bpf.h  |  5 ++-
 kernel/bpf/core.c | 31 +
 kernel/bpf/disasm.c   |  6 ++-
 kernel/bpf/verifier.c | 24 +-
 lib/test_bpf.c| 14 +++---
 samples/bpf/bpf_insn.h|  4 +-
 samples/bpf/cookie_uid_helper_example.c   |  6 +--
 samples/bpf/sock_example.c|  2 +-
 samples/bpf/test_cgrp2_attach.c   |  5 ++-
 tools/include/linux/filter.h  | 28 ++--
 tools/include/uapi/linux/bpf.h|  5 ++-
 .../bpf/prog_tests/cgroup_attach_multi.c  |  4 +-
 .../selftests/bpf/test_cgroup_storage.c   |  2 +-
 tools/testing/selftests/bpf/verifier/ctx.c|  7 ++-
 .../bpf/verifier/direct_packet_access.c   |  4 +-
 .../testing/selftests/bpf/verifier/leak_ptr.c | 10 ++---
 .../selftests/bpf/verifier/meta_access.c  |  4 +-
 tools/testing/selftests/bpf/verifier/unpriv.c |  3 +-
 .../bpf/verifier/value_illegal_alu.c  |  2 +-
 tools/testing/selftests/bpf/verifier/xadd.c   | 18 
 35 files changed, 317 insertions(+), 149 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index debb59e374de..1583d59d806d 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1006,13 +1006,13 @@ Size modifier is one of ...
 
 Mode modifier is one of::
 
-  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
-  BPF_ABS  0x20
-  BPF_IND  0x40
-  BPF_MEM  0x60
-  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
-  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
-  BPF_XADD 0xc0  /* eBPF only, exclusive add */
+  BPF_IMM 0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+  BPF_ABS 0x20
+  BPF_IND 0x40
+  BPF_MEM 0x60
+  BPF_LEN 0x80  /* classic BPF only, reserved in eBPF */
+  BPF_MSH 0xa0  /* classic BPF only, reserved in eBPF */
+  BPF_ATOMIC  0xc0  /* eBPF only, atomic operations */
 
 eBPF has two non-generic instructions: (BPF_ABS |  | BPF_LD) and
 (BPF_IND |  | BPF_LD) which are used to access packet data.
@@ -1044,11 +1044,19 @@ Unlike classic BPF instruction set, eBPF has generic 
load/store operations::
 BPF_MEM |  | BPF_STX:  *(size *) (dst_reg + off) = src_reg
 BPF_MEM |  | BPF_ST:   *(size *) (dst_reg + off) = imm32
 BPF_MEM |  | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
-BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
-BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
 
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
-2 byte atomic increments are not supported.
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
+
+It also includes atomic operations, which use the immediate field for extra
+encoding.
+
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
+
+Note that 1 and 2 byte atomic operations are not supported.
+
+You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
+the exclusive-add operation encoded when the immediate field is zero.
 
 eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which con

Re: [PATCH bpf-next v3 10/14] bpf: Add bitwise atomic instructions

2020-12-07 Thread Brendan Jackman

On Mon, Dec 07, 2020 at 07:58:09AM -0800, Yonghong Song wrote:
> 
> 
> On 12/7/20 3:28 AM, Brendan Jackman wrote:
> > On Fri, Dec 04, 2020 at 07:21:22AM -0800, Yonghong Song wrote:
> > > 
> > > 
> > > On 12/4/20 1:36 AM, Brendan Jackman wrote:
> > > > On Thu, Dec 03, 2020 at 10:42:19PM -0800, Yonghong Song wrote:
> > > > > 
> > > > > 
> > > > > On 12/3/20 8:02 AM, Brendan Jackman wrote:
> > > > > > This adds instructions for
> > > > > > 
> > > > > > atomic[64]_[fetch_]and
> > > > > > atomic[64]_[fetch_]or
> > > > > > atomic[64]_[fetch_]xor
> > > > > > 
> > > > > > All these operations are isomorphic enough to implement with the 
> > > > > > same
> > > > > > verifier, interpreter, and x86 JIT code, hence being a single 
> > > > > > commit.
> > > > > > 
> > > > > > The main interesting thing here is that x86 doesn't directly support
> > > > > > the fetch_ version these operations, so we need to generate a 
> > > > > > CMPXCHG
> > > > > > loop in the JIT. This requires the use of two temporary registers,
> > > > > > IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.
> > > > > > 
> > > > > > Change-Id: I340b10cecebea8cb8a52e3606010cde547a10ed4
> > > > > > Signed-off-by: Brendan Jackman 
> > > > > > ---
> > > > > > arch/x86/net/bpf_jit_comp.c  | 50 +-
> > > > > > include/linux/filter.h   | 60 
> > > > > > 
> > > > > > kernel/bpf/core.c|  5 ++-
> > > > > > kernel/bpf/disasm.c  | 21 ++---
> > > > > > kernel/bpf/verifier.c|  6 
> > > > > > tools/include/linux/filter.h | 60 
> > > > > > 
> > > > > > 6 files changed, 196 insertions(+), 6 deletions(-)
> > > > > > 
> > > > [...]
> > > > > > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > > > > > index 6186280715ed..698f82897b0d 100644
> > > > > > --- a/include/linux/filter.h
> > > > > > +++ b/include/linux/filter.h
> > > > > > @@ -280,6 +280,66 @@ static inline bool insn_is_zext(const struct 
> > > > > > bpf_insn *insn)
> > > > [...]
> > > > > > +#define BPF_ATOMIC_FETCH_XOR(SIZE, DST, SRC, OFF)  \
> > > > > > +   ((struct bpf_insn) {\
> > > > > > +   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
> > > > > > +   .dst_reg = DST, \
> > > > > > +   .src_reg = SRC, \
> > > > > > +   .off   = OFF,   \
> > > > > > +   .imm   = BPF_XOR | BPF_FETCH })
> > > > > > +
> > > > > > /* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), 
> > > > > > src_reg) */
> > > > > 
> > > > > Looks like BPF_ATOMIC_XOR/OR/AND/... all similar to each other.
> > > > > The same is for BPF_ATOMIC_FETCH_XOR/OR/AND/...
> > > > > 
> > > > > I am wondering whether it makes sence to have to
> > > > > BPF_ATOMIC_BOP(BOP, SIZE, DST, SRC, OFF) and
> > > > > BPF_ATOMIC_FETCH_BOP(BOP, SIZE, DST, SRC, OFF)
> > > > > can have less number of macros?
> > > > 
> > > > Hmm yeah I think that's probably a good idea, it would be consistent
> > > > with the macros for non-atomic ALU ops.
> > > > 
> > > > I don't think 'BOP' would be very clear though, 'ALU' might be more
> > > > obvious.
> > > 
> > > BPF_ATOMIC_ALU and BPF_ATOMIC_FETCH_ALU indeed better.
> > 
> > On second thoughts I think it feels right (i.e. it would be roughly
> > consistent with the level of abstraction of the rest of this macro API)
> > to go further and just have two macros BPF_ATOMIC64 and BPF_ATOMIC32:
> > 
> > /*
> >  * Atomic ALU ops:
> >  *
> >  *   BPF_ADD

Re: [PATCH bpf-next v4 04/11] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm

2020-12-08 Thread Brendan Jackman

Hi John, thanks a lot for the reviews!

On Mon, Dec 07, 2020 at 01:56:53PM -0800, John Fastabend wrote:
> Brendan Jackman wrote:
> > A subsequent patch will add additional atomic operations. These new
> > operations will use the same opcode field as the existing XADD, with
> > the immediate discriminating different operations.
> > 
> > In preparation, rename the instruction mode BPF_ATOMIC and start
> > calling the zero immediate BPF_ADD.
> > 
> > This is possible (doesn't break existing valid BPF progs) because the
> > immediate field is currently reserved MBZ and BPF_ADD is zero.
> > 
> > All uses are removed from the tree but the BPF_XADD definition is
> > kept around to avoid breaking builds for people including kernel
> > headers.
> > 
> > Signed-off-by: Brendan Jackman 
> > ---
> >  Documentation/networking/filter.rst   | 30 -
> >  arch/arm/net/bpf_jit_32.c |  7 ++-
> >  arch/arm64/net/bpf_jit_comp.c | 16 +--
> >  arch/mips/net/ebpf_jit.c  | 11 +++--
> >  arch/powerpc/net/bpf_jit_comp64.c | 25 ---
> >  arch/riscv/net/bpf_jit_comp32.c   | 20 +++--
> >  arch/riscv/net/bpf_jit_comp64.c   | 16 +--
> >  arch/s390/net/bpf_jit_comp.c  | 27 ++-
> >  arch/sparc/net/bpf_jit_comp_64.c  | 17 +--
> >  arch/x86/net/bpf_jit_comp.c   | 45 ++-
> >  arch/x86/net/bpf_jit_comp32.c |  6 +--
> >  drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 14 --
> >  drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 +-
> >  .../net/ethernet/netronome/nfp/bpf/verifier.c | 15 ---
> >  include/linux/filter.h| 29 ++--
> >  include/uapi/linux/bpf.h  |  5 ++-
> >  kernel/bpf/core.c | 31 +
> >  kernel/bpf/disasm.c   |  6 ++-
> >  kernel/bpf/verifier.c | 24 +-
> >  lib/test_bpf.c| 14 +++---
> >  samples/bpf/bpf_insn.h|  4 +-
> >  samples/bpf/cookie_uid_helper_example.c   |  6 +--
> >  samples/bpf/sock_example.c|  2 +-
> >  samples/bpf/test_cgrp2_attach.c   |  5 ++-
> >  tools/include/linux/filter.h  | 28 ++--
> >  tools/include/uapi/linux/bpf.h|  5 ++-
> >  .../bpf/prog_tests/cgroup_attach_multi.c  |  4 +-
> >  .../selftests/bpf/test_cgroup_storage.c   |  2 +-
> >  tools/testing/selftests/bpf/verifier/ctx.c|  7 ++-
> >  .../bpf/verifier/direct_packet_access.c   |  4 +-
> >  .../testing/selftests/bpf/verifier/leak_ptr.c | 10 ++---
> >  .../selftests/bpf/verifier/meta_access.c  |  4 +-
> >  tools/testing/selftests/bpf/verifier/unpriv.c |  3 +-
> >  .../bpf/verifier/value_illegal_alu.c  |  2 +-
> >  tools/testing/selftests/bpf/verifier/xadd.c   | 18 
> >  35 files changed, 317 insertions(+), 149 deletions(-)
> > 
> 
> [...]
> 
> > +++ a/arch/mips/net/ebpf_jit.c
> 
> [...]
> 
> > -   if (BPF_MODE(insn->code) == BPF_XADD) {
> > +   if (BPF_MODE(insn->code) == BPF_ATOMIC) {
> > +   if (insn->imm != BPF_ADD) {
> > +   pr_err("ATOMIC OP %02x NOT HANDLED\n", 
> > insn->imm);
> > +   return -EINVAL;
> > +   }
> > +
> > /*
> [...]
> > +++ b/arch/powerpc/net/bpf_jit_comp64.c
> 
> > -   case BPF_STX | BPF_XADD | BPF_W:
> > +   case BPF_STX | BPF_ATOMIC | BPF_W:
> > +   if (insn->imm != BPF_ADD) {
> > +   pr_err_ratelimited(
> > +   "eBPF filter atomic op code %02x (@%d) 
> > unsupported\n",
> > +   code, i);
> > +   return -ENOTSUPP;
> > +   }
> [...]
> > @@ -699,8 +707,15 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 
> > *image,
> > -   case BPF_STX | BPF_XADD | BPF_DW:
> > +   case BPF_STX | BPF_ATOMIC | BPF_DW:
> > +   if (insn->imm != BPF_ADD) {
> > +   pr_err_ratelimited(
> > +   "eBPF filter atomic op code %02x (@%d) 
> > unsupported\n",
> > +

Re: [PATCH bpf-next v4 06/11] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-08 Thread Brendan Jackman

On Mon, Dec 07, 2020 at 05:41:05PM -0800, Yonghong Song wrote:
> 
> 
> On 12/7/20 8:07 AM, Brendan Jackman wrote:
> > The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC
> > instructions, in order to have the previous value of the
> > atomically-modified memory location loaded into the src register
> > after an atomic op is carried out.
> > 
> > Suggested-by: Yonghong Song 
> > Signed-off-by: Brendan Jackman 
> > ---
> >   arch/x86/net/bpf_jit_comp.c|  4 
> >   include/linux/filter.h |  1 +
> >   include/uapi/linux/bpf.h   |  3 +++
> >   kernel/bpf/core.c  | 13 +
> >   kernel/bpf/disasm.c|  7 +++
> >   kernel/bpf/verifier.c  | 33 -
> >   tools/include/linux/filter.h   | 11 +++
> >   tools/include/uapi/linux/bpf.h |  3 +++
> >   8 files changed, 66 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> [...]
> 
> > index f345f12c1ff8..4e0100ba52c2 100644
> > --- a/tools/include/linux/filter.h
> > +++ b/tools/include/linux/filter.h
> > @@ -173,6 +173,7 @@
> >* Atomic operations:
> >*
> >*   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
> > + *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
> > src_reg);
> >*/
> >   #define BPF_ATOMIC64(OP, DST, SRC, OFF)   \
> > @@ -201,6 +202,16 @@
> > .off   = OFF,   \
> > .imm   = BPF_ADD })
> > +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(dst_reg + off, 
> > src_reg); */
> > +
> > +#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF)  \
> > +   ((struct bpf_insn) {\
> > +   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
> > +   .dst_reg = DST, \
> > +   .src_reg = SRC, \
> > +   .off   = OFF,   \
> > +   .imm   = BPF_ADD | BPF_FETCH })
> 
> Not sure whether it is a good idea or not to fold this into BPF_ATOMIC
> macro. At least you can define BPF_ATOMIC macro and
> #define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF) \
> BPF_ATOMIC(SIZE, DST, SRC, OFF, BPF_ADD | BPF_FETCH)
> 
> to avoid too many code duplications?

Oops.. I intended to totally get rid these and folded them into
BPF_ATOMIC{64,32}! OK, let's combine all of them into a single macro.
It will have to be called something slightly awkward like
BPF_ATOMIC_INSN because BPF_ATOMIC is the name of the BPF_OP.

> 
> > +
> >   /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
> >   #define BPF_ST_MEM(SIZE, DST, OFF, IMM)   \
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index 98161e2d389f..d5389119291e 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -44,6 +44,9 @@
> >   #define BPF_CALL  0x80/* function call */
> >   #define BPF_EXIT  0x90/* function return */
> > +/* atomic op type fields (stored in immediate) */
> > +#define BPF_FETCH  0x01/* fetch previous value into src reg */
> > +
> >   /* Register numbers */
> >   enum {
> > BPF_REG_0 = 0,
> >

Re: [PATCH bpf-next v4 06/11] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-08 Thread Brendan Jackman

On Mon, Dec 07, 2020 at 09:31:40PM -0800, John Fastabend wrote:
> Brendan Jackman wrote:
> > The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC
> > instructions, in order to have the previous value of the
> > atomically-modified memory location loaded into the src register
> > after an atomic op is carried out.
> > 
> > Suggested-by: Yonghong Song 
> > Signed-off-by: Brendan Jackman 
> > ---
> 
> I like Yonghong suggestion 
> 
>  #define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF)   \
>  BPF_ATOMIC(SIZE, DST, SRC, OFF, BPF_ADD | BPF_FETCH)
> 
> otherwise LGTM. One observation to consider below.
> 
> Acked-by: John Fastabend 
> 
> >  arch/x86/net/bpf_jit_comp.c|  4 
> >  include/linux/filter.h |  1 +
> >  include/uapi/linux/bpf.h   |  3 +++
> >  kernel/bpf/core.c  | 13 +
> >  kernel/bpf/disasm.c|  7 +++
> >  kernel/bpf/verifier.c  | 33 -
> >  tools/include/linux/filter.h   | 11 +++
> >  tools/include/uapi/linux/bpf.h |  3 +++
> >  8 files changed, 66 insertions(+), 9 deletions(-)
> 
> [...]
> 
> > @@ -3652,8 +3656,20 @@ static int check_atomic(struct bpf_verifier_env 
> > *env, int insn_idx, struct bpf_i
> > return err;
> >  
> > /* check whether we can write into the same memory */
> > -   return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> > -   BPF_SIZE(insn->code), BPF_WRITE, -1, true);
> > +   err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> > +  BPF_SIZE(insn->code), BPF_WRITE, -1, true);
> > +   if (err)
> > +   return err;
> > +
> > +   if (!(insn->imm & BPF_FETCH))
> > +   return 0;
> > +
> > +   /* check and record load of old value into src reg  */
> > +   err = check_reg_arg(env, insn->src_reg, DST_OP);
> 
> This will mark the reg unknown. I think this is fine here. Might be nice
> to carry bounds through though if possible

Ah, I hadn't thought of this. I think if I move this check_reg_arg to be
before the first check_mem_access, and then (when BPF_FETCH) set the
val_regno arg to load_reg, then the bounds from memory would get
propagated back to the register:

if (insn->imm & BPF_FETCH) {
if (insn->imm == BPF_CMPXCHG)
load_reg = BPF_REG_0;
else
load_reg = insn->src_reg;
err = check_reg_arg(env, load_reg, DST_OP);
if (err)
return err;
} else {
load_reg = -1;
}
/* check wether we can read the memory */
err = check_mem_access(env, insn_index, insn->dst_reg, insn->off
   BPF_SIZE(insn->code), BPF_READ,
   load_reg, // <--
   true);

Is that the kind of thing you had in mind?

> > +   if (err)
> > +   return err;
> > +
> > +   return 0;
> >  }
> >

Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-08 Thread Brendan Jackman

On Mon, Dec 07, 2020 at 07:18:57PM -0800, Yonghong Song wrote:
> 
> 
> On 12/7/20 8:07 AM, Brendan Jackman wrote:
> > The prog_test that's added depends on Clang/LLVM features added by
> > Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184 ).
> > 
> > Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
> > to:
> > 
> >   - Avoid breaking the build for people on old versions of Clang
> >   - Avoid needing separate lists of test objects for no_alu32, where
> > atomics are not supported even if Clang has the feature.
> > 
> > The atomics_test.o BPF object is built unconditionally both for
> > test_progs and test_progs-no_alu32. For test_progs, if Clang supports
> > atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
> > test code. Otherwise, progs and global vars are defined anyway, as
> > stubs; this means that the skeleton user code still builds.
> > 
> > The atomics_test.o userspace object is built once and used for both
> > test_progs and test_progs-no_alu32. A variable called skip_tests is
> > defined in the BPF object's data section, which tells the userspace
> > object whether to skip the atomics test.
> > 
> > Signed-off-by: Brendan Jackman 
> 
> Ack with minor comments below.
> 
> Acked-by: Yonghong Song 
> 
> > ---
> >   tools/testing/selftests/bpf/Makefile  |  10 +
> >   .../selftests/bpf/prog_tests/atomics.c| 246 ++
> >   tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
> >   .../selftests/bpf/verifier/atomic_and.c   |  77 ++
> >   .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
> >   .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
> >   .../selftests/bpf/verifier/atomic_or.c|  77 ++
> >   .../selftests/bpf/verifier/atomic_xchg.c  |  46 
> >   .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
> >   9 files changed, 889 insertions(+)
> >   create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
> >   create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
> >   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
> >   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
> >   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
> >   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
> >   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
> >   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c
> > 
> > diff --git a/tools/testing/selftests/bpf/Makefile 
> > b/tools/testing/selftests/bpf/Makefile
> > index ac25ba5d0d6c..13bc1d736164 100644
> > --- a/tools/testing/selftests/bpf/Makefile
> > +++ b/tools/testing/selftests/bpf/Makefile
> > @@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
> > \
> >  -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
> >  -I$(abspath $(OUTPUT)/../usr/include)
> > +# BPF atomics support was added to Clang in llvm-project commit 
> > 286daafd6512
> > +# (release 12.0.0).
> > +BPF_ATOMICS_SUPPORTED = $(shell \
> > +   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(&x, 
> > 1, 2); }" \
> > +   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
> > 1 || echo 0)
> 
> '-x c' here more intuitive?
> 
> > +
> >   CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
> >-Wno-compare-distinct-pointer-types
> > @@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
> > $(OUTPUT)/bpf_testmod.ko\
> >$(wildcard progs/btf_dump_test_case_*.c)
> >   TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
> >   TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
> > +ifeq ($(BPF_ATOMICS_SUPPORTED),1)
> > +  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
> > +endif
> >   TRUNNER_BPF_LDFLAGS := -mattr=+alu32
> >   $(eval $(call DEFINE_TEST_RUNNER,test_progs))
> >   # Define test_progs-no_alu32 test runner.
> >   TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
> > +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
> >   TRUNNER_BPF_LDFLAGS :=
> >   $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
> > diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
> > b/tools/testing/selftests/bpf/prog_tests/atomics.c
> > new file mode 100644
> > index ..c841a3abc2f7
> > --- /dev/n

Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-08 Thread Brendan Jackman

On Tue, Dec 08, 2020 at 08:38:04AM -0800, Yonghong Song wrote:
> 
> 
> On 12/8/20 4:41 AM, Brendan Jackman wrote:
> > On Mon, Dec 07, 2020 at 07:18:57PM -0800, Yonghong Song wrote:
> > > 
> > > 
> > > On 12/7/20 8:07 AM, Brendan Jackman wrote:
> > > > The prog_test that's added depends on Clang/LLVM features added by
> > > > Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184  ).
> > > > 
> > > > Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
> > > > to:
> > > > 
> > > >- Avoid breaking the build for people on old versions of Clang
> > > >- Avoid needing separate lists of test objects for no_alu32, where
> > > >  atomics are not supported even if Clang has the feature.
> > > > 
> > > > The atomics_test.o BPF object is built unconditionally both for
> > > > test_progs and test_progs-no_alu32. For test_progs, if Clang supports
> > > > atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
> > > > test code. Otherwise, progs and global vars are defined anyway, as
> > > > stubs; this means that the skeleton user code still builds.
> > > > 
> > > > The atomics_test.o userspace object is built once and used for both
> > > > test_progs and test_progs-no_alu32. A variable called skip_tests is
> > > > defined in the BPF object's data section, which tells the userspace
> > > > object whether to skip the atomics test.
> > > > 
> > > > Signed-off-by: Brendan Jackman 
> > > 
> > > Ack with minor comments below.
> > > 
> > > Acked-by: Yonghong Song 
> > > 
> > > > ---
> > > >tools/testing/selftests/bpf/Makefile  |  10 +
> > > >.../selftests/bpf/prog_tests/atomics.c| 246 
> > > > ++
> > > >tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
> > > >.../selftests/bpf/verifier/atomic_and.c   |  77 ++
> > > >.../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
> > > >.../selftests/bpf/verifier/atomic_fetch_add.c | 106 
> > > >.../selftests/bpf/verifier/atomic_or.c|  77 ++
> > > >.../selftests/bpf/verifier/atomic_xchg.c  |  46 
> > > >.../selftests/bpf/verifier/atomic_xor.c   |  77 ++
> > > >9 files changed, 889 insertions(+)
> > > >create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
> > > >create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
> > > >create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
> > > >create mode 100644 
> > > > tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
> > > >create mode 100644 
> > > > tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
> > > >create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
> > > >create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
> > > >create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c
> > > > 
> > > > diff --git a/tools/testing/selftests/bpf/Makefile 
> > > > b/tools/testing/selftests/bpf/Makefile
> > > > index ac25ba5d0d6c..13bc1d736164 100644
> > > > --- a/tools/testing/selftests/bpf/Makefile
> > > > +++ b/tools/testing/selftests/bpf/Makefile
> > > > @@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) 
> > > > $(MENDIAN)  \
> > > >  -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   
> > > > \
> > > >  -I$(abspath $(OUTPUT)/../usr/include)
> > > > +# BPF atomics support was added to Clang in llvm-project commit 
> > > > 286daafd6512
> > > > +# (release 12.0.0).
> > > > +BPF_ATOMICS_SUPPORTED = $(shell \
> > > > +   echo "int x = 0; int foo(void) { return 
> > > > __sync_val_compare_and_swap(&x, 1, 2); }" \
> > > > +   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null 
> > > > && echo 1 || echo 0)
> > > 
> > > '-x c' here more intuitive?
> > > 
> > > > +
> > > >CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
> > > >-Wno-compare-distinct-pointer-types
> > > > @@ -399,11 +405,15 @@ TRUNNER_EXTRA_FI

Re: [PATCH bpf-next v3 12/14] bpf: Pull tools/build/feature biz into selftests Makefile

2020-12-08 Thread Brendan Jackman

On Mon, Dec 07, 2020 at 06:19:12PM -0800, Andrii Nakryiko wrote:
> On Mon, Dec 7, 2020 at 3:00 AM Brendan Jackman  wrote:
> >
> > On Fri, Dec 04, 2020 at 11:00:24AM -0800, Andrii Nakryiko wrote:
> > > On Fri, Dec 4, 2020 at 1:41 AM Brendan Jackman  
> > > wrote:
> > > >
> > > > On Thu, Dec 03, 2020 at 01:01:27PM -0800, Andrii Nakryiko wrote:
> > > > > On Thu, Dec 3, 2020 at 8:07 AM Brendan Jackman  
> > > > > wrote:
> > > > > >
[...]
> >
> > Ah right gotcha. Then yeah I think we can do this:
> >
> >  BPF_ATOMICS_SUPPORTED = $(shell \
> > echo "int x = 0; int foo(void) { return 
> > __sync_val_compare_and_swap(&x, 1, 2); }" \
> > | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && 
> > echo 1 || echo 0)
> 
> Looks like it would work, yes.
/
> Curious what "-x cpp-output" does?

That's just to tell Clang what language to expect, since it can't infer
it from a file extension:

  $ echo foo | clang -S -
  clang-10: error: -E or -x required when input is from standard input

Yonghong pointed out that we can actually just use `-x c`.

Re: [PATCH bpf-next] bpf: Fix a verifier message for alloc size helper arg

2021-01-12 Thread Brendan Jackman

Sorry, duplicate - seems I had my mail client in HTML mode the first
time around.

On Tue, 12 Jan 2021 at 14:14, KP Singh  wrote:
>
> On Tue, Jan 12, 2021 at 1:39 PM Brendan Jackman  wrote:
> >
> > The error message here is misleading, the argument will be rejected
> > unless it is a known constant.
> >
> > Signed-off-by: Brendan Jackman 
> > ---
> >  kernel/bpf/verifier.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 17270b8404f1..5534e667bdb1 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -4319,7 +4319,7 @@ static int check_func_arg(struct bpf_verifier_env 
> > *env, u32 arg,
> > err = mark_chain_precision(env, regno);
> > } else if (arg_type_is_alloc_size(arg_type)) {
> > if (!tnum_is_const(reg->var_off)) {
> > -   verbose(env, "R%d unbounded size, use 'var &= 
> > const' or 'if (var < const)'\n",
>
> Can you check if:
>
> int var = 1000;
> var += 1;
>
> if (var < 2000)
>// call helper
>
> and then using var in the argument works? If so, the existing error
> message would be correct.

I think that would work because var is already a known constant before
the conditional.. but the error message is still wrong, the `if (var <
2000)` is irrelevant. If var was not already a known constant (e.g.
came from the return value of a bpf_probe_read_kernel_str) it would
fail verification.

[PATCH bpf-next v6 00/11] Atomics for eBPF

2021-01-12 Thread Brendan Jackman

nal flag BPF_FETCH is used to mark instructions that should
fetch a pre-modification value from memory.

So, BPF_XADD is now called BPF_ATOMIC (the old name is kept to avoid
breaking userspace builds), and where we previously had .imm = 0, we
now have .imm = BPF_ADD (which is 0).

Operands


Reg-source eBPF instructions only have two operands, while these
atomic operations have up to four. To avoid needing to encode
additional operands, then:

- One of the input registers is re-used as an output register
  (e.g. atomic_fetch_add both reads from and writes to the source
  register).

- Where necessary (i.e. for cmpxchg) , R0 is "hard-coded" as one of
  the operands.

This approach also allows the new eBPF instructions to map directly
to single x86 instructions.

[1] Previous iterations:
v1: 
https://lore.kernel.org/bpf/20201123173202.1335708-1-jackm...@google.com/
v2: 
https://lore.kernel.org/bpf/20201127175738.1085417-1-jackm...@google.com/
v3: https://lore.kernel.org/bpf/x8kn7na7bjc7a...@google.com/
v4: 
https://lore.kernel.org/bpf/20201207160734.2345502-1-jackm...@google.com/
v5: 
https://lore.kernel.org/bpf/20201215121816.1048557-1-jackm...@google.com/

[2] Visualisation of eBPF opcode space:
https://gist.github.com/bjackman/00fdad2d5dfff601c1918bc29b16e778

[3] Comment from John about propagating bounds in verifier:

https://lore.kernel.org/bpf/5fcf0fbcc8aa8_9ab320853@john-XPS-13-9370.notmuch/

[4] Mail from Andrii about not supporting old Clang in selftests:
    
https://lore.kernel.org/bpf/CAEf4BzYBddPaEzRUs=jaWSo5kbf=lzdb7geauvj85gxlqzt...@mail.gmail.com/

Brendan Jackman (11):
  bpf: x86: Factor out emission of ModR/M for *(reg + off)
  bpf: x86: Factor out emission of REX byte
  bpf: x86: Factor out a lookup table for some ALU opcodes
  bpf: Rename BPF_XADD and prepare to encode other atomics in .imm
  bpf: Move BPF_STX reserved field check into BPF_STX verifier code
  bpf: Add BPF_FETCH field / create atomic_fetch_add instruction
  bpf: Add instructions for atomic_[cmp]xchg
  bpf: Pull out a macro for interpreting atomic ALU operations
  bpf: Add bitwise atomic instructions
  bpf: Add tests for new BPF atomic operations
  bpf: Document new atomic instructions

 Documentation/networking/filter.rst   |  61 -
 arch/arm/net/bpf_jit_32.c |   7 +-
 arch/arm64/net/bpf_jit_comp.c |  16 +-
 arch/mips/net/ebpf_jit.c  |  11 +-
 arch/powerpc/net/bpf_jit_comp64.c |  25 +-
 arch/riscv/net/bpf_jit_comp32.c   |  20 +-
 arch/riscv/net/bpf_jit_comp64.c   |  16 +-
 arch/s390/net/bpf_jit_comp.c  |  27 +-
 arch/sparc/net/bpf_jit_comp_64.c  |  17 +-
 arch/x86/net/bpf_jit_comp.c   | 217 ++-
 arch/x86/net/bpf_jit_comp32.c |   6 +-
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  |  14 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.h |   4 +-
 .../net/ethernet/netronome/nfp/bpf/verifier.c |  15 +-
 include/linux/filter.h|  25 +-
 include/uapi/linux/bpf.h  |  10 +-
 kernel/bpf/core.c |  67 -
 kernel/bpf/disasm.c   |  43 ++-
 kernel/bpf/verifier.c |  75 --
 lib/test_bpf.c|  14 +-
 samples/bpf/bpf_insn.h|   4 +-
 samples/bpf/cookie_uid_helper_example.c   |   8 +-
 samples/bpf/sock_example.c|   2 +-
 samples/bpf/test_cgrp2_attach.c   |   5 +-
 tools/include/linux/filter.h  |  24 +-
 tools/include/uapi/linux/bpf.h|  10 +-
 tools/testing/selftests/bpf/Makefile  |   2 +
 .../selftests/bpf/prog_tests/atomics.c| 246 ++
 .../bpf/prog_tests/cgroup_attach_multi.c  |   4 +-
 tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
 .../selftests/bpf/test_cgroup_storage.c   |   2 +-
 .../selftests/bpf/verifier/atomic_and.c   |  77 ++
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
 .../selftests/bpf/verifier/atomic_or.c|  77 ++
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 
 .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
 tools/testing/selftests/bpf/verifier/ctx.c|   7 +-
 .../bpf/verifier/direct_packet_access.c   |   4 +-
 .../testing/selftests/bpf/verifier/leak_ptr.c |  10 +-
 .../selftests/bpf/verifier/meta_access.c  |   4 +-
 tools/testing/selftests/bpf/verifier/unpriv.c |   3 +-
 .../bpf/verifier/value_illegal_alu.c  |   2 +-
 tools/testing/selftests/bpf/verifier/xadd.c   |  18 +-
 44 files changed, 1466 insertions(+), 212 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
 create mode 1006

[PATCH bpf-next v6 01/11] bpf: x86: Factor out emission of ModR/M for *(reg + off)

2021-01-12 Thread Brendan Jackman

The case for JITing atomics is about to get more complicated. Let's
factor out some common code to make the review and result more
readable.

NB the atomics code doesn't yet use the new helper - a subsequent
patch will add its use as a side-effect of other changes.

Acked-by: John Fastabend 
Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 43 +
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 796506dcfc42..30526776fa78 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -681,6 +681,27 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 
dst_reg, u32 src_reg)
*pprog = prog;
 }
 
+/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
+static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is_imm8(off)) {
+   /* 1-byte signed displacement.
+*
+* If off == 0 we could skip this and save one extra byte, but
+* special case of x86 R13 which always needs an offset is not
+* worth the hassle
+*/
+   EMIT2(add_2reg(0x40, ptr_reg, val_reg), off);
+   } else {
+   /* 4-byte signed displacement */
+   EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off);
+   }
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -708,15 +729,7 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
break;
}
-   /*
-* If insn->off == 0 we can save one extra byte, but
-* special case of x86 R13 which always needs an offset
-* is not worth the hassle
-*/
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+   emit_insn_suffix(&prog, src_reg, dst_reg, off);
*pprog = prog;
 }
 
@@ -751,10 +764,7 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
break;
}
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+   emit_insn_suffix(&prog, dst_reg, src_reg, off);
*pprog = prog;
 }
 
@@ -1240,11 +1250,8 @@ st:  if (is_imm8(insn->off))
goto xadd;
case BPF_STX | BPF_XADD | BPF_DW:
EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
-xadd:  if (is_imm8(insn->off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), 
insn->off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-   insn->off);
+xadd:
+   emit_modrm_dstoff(&prog, dst_reg, src_reg, insn->off);
break;
 
/* call */
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v6 02/11] bpf: x86: Factor out emission of REX byte

2021-01-12 Thread Brendan Jackman

The JIT case for encoding atomic ops is about to get more
complicated. In order to make the review & resulting code easier,
let's factor out some shared helpers.

Signed-off-by: Brendan Jackman 
Acked-by: John Fastabend 
---
 arch/x86/net/bpf_jit_comp.c | 39 ++---
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 30526776fa78..f15c93275a18 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -702,6 +702,21 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 
val_reg, int off)
*pprog = prog;
 }
 
+/*
+ * Emit a REX byte if it will be necessary to address these registers
+ */
+static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is64)
+   EMIT1(add_2mod(0x48, dst_reg, src_reg));
+   else if (is_ereg(dst_reg) || is_ereg(src_reg))
+   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -854,10 +869,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_OR: b2 = 0x09; break;
case BPF_XOR: b2 = 0x31; break;
}
-   if (BPF_CLASS(insn->code) == BPF_ALU64)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_ALU64);
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1302,20 +1315,16 @@ st: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* cmp dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
/* test dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
@@ -1351,10 +1360,8 @@ st:  if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLE | BPF_K:
/* test dst_reg, dst_reg to save one extra byte */
if (imm32 == 0) {
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, dst_reg));
-   else if (is_ereg(dst_reg))
-   EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+   maybe_emit_mod(&prog, dst_reg, dst_reg,
+  BPF_CLASS(insn->code) == 
BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
goto emit_cond_jmp;
}
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v6 03/11] bpf: x86: Factor out a lookup table for some ALU opcodes

2021-01-12 Thread Brendan Jackman

A later commit will need to lookup a subset of these opcodes. To
avoid duplicating code, pull out a table.

The shift opcodes won't be needed by that later commit, but they're
already duplicated, so fold them into the table anyway.

Signed-off-by: Brendan Jackman 
Acked-by: John Fastabend 
---
 arch/x86/net/bpf_jit_comp.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f15c93275a18..93f32e0ba0ef 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -205,6 +205,18 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+/* Some 1-byte opcodes for binary ALU operations */
+static u8 simple_alu_opcodes[] = {
+   [BPF_ADD] = 0x01,
+   [BPF_SUB] = 0x29,
+   [BPF_AND] = 0x21,
+   [BPF_OR] = 0x09,
+   [BPF_XOR] = 0x31,
+   [BPF_LSH] = 0xE0,
+   [BPF_RSH] = 0xE8,
+   [BPF_ARSH] = 0xF8,
+};
+
 static void jit_fill_hole(void *area, unsigned int size)
 {
/* Fill whole space with INT3 instructions */
@@ -862,15 +874,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_ALU64 | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
-   switch (BPF_OP(insn->code)) {
-   case BPF_ADD: b2 = 0x01; break;
-   case BPF_SUB: b2 = 0x29; break;
-   case BPF_AND: b2 = 0x21; break;
-   case BPF_OR: b2 = 0x09; break;
-   case BPF_XOR: b2 = 0x31; break;
-   }
maybe_emit_mod(&prog, dst_reg, src_reg,
   BPF_CLASS(insn->code) == BPF_ALU64);
+   b2 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1050,12 +1056,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
-
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
if (imm32 == 1)
EMIT2(0xD1, add_1reg(b3, dst_reg));
else
@@ -1089,11 +1090,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(0xD3, add_1reg(b3, dst_reg));
 
if (src_reg != BPF_REG_4)
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v6 07/11] bpf: Add instructions for atomic_[cmp]xchg

2021-01-12 Thread Brendan Jackman

This adds two atomic opcodes, both of which include the BPF_FETCH
flag. XCHG without the BPF_FETCH flag would naturally encode
atomic_set. This is not supported because it would be of limited
value to userspace (it doesn't imply any barriers). CMPXCHG without
BPF_FETCH woulud be an atomic compare-and-write. We don't have such
an operation in the kernel so it isn't provided to BPF either.

There are two significant design decisions made for the CMPXCHG
instruction:

 - To solve the issue that this operation fundamentally has 3
   operands, but we only have two register fields. Therefore the
   operand we compare against (the kernel's API calls it 'old') is
   hard-coded to be R0. x86 has similar design (and A64 doesn't
   have this problem).

   A potential alternative might be to encode the other operand's
   register number in the immediate field.

 - The kernel's atomic_cmpxchg returns the old value, while the C11
   userspace APIs return a boolean indicating the comparison
   result. Which should BPF do? A64 returns the old value. x86 returns
   the old value in the hard-coded register (and also sets a
   flag). That means return-old-value is easier to JIT, so that's
   what we use.

Signed-off-by: Brendan Jackman 
Acked-by: Yonghong Song 
---
 arch/x86/net/bpf_jit_comp.c|  8 
 include/linux/filter.h |  2 ++
 include/uapi/linux/bpf.h   |  4 +++-
 kernel/bpf/core.c  | 20 
 kernel/bpf/disasm.c| 15 +++
 kernel/bpf/verifier.c  | 19 +--
 tools/include/linux/filter.h   |  2 ++
 tools/include/uapi/linux/bpf.h |  4 +++-
 8 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index eea7d8b0bb12..308241187582 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
EMIT2(0x0F, 0xC1);
break;
+   case BPF_XCHG:
+   /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
+   EMIT1(0x87);
+   break;
+   case BPF_CMPXCHG:
+   /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
+   EMIT2(0x0F, 0xB1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3501f1fe36bb..ab2b2185f2d7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
  *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
+ *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
  */
 
 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 760ae333a5ed..0403c38bac52 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
 #define BPF_EXIT   0x90/* function return */
 
 /* atomic op type fields (stored in immediate) */
-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_FETCH  0x01/* not an opcode on its own, used to build 
others */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
+#define BPF_CMPXCHG(0xf0 | BPF_FETCH)  /* atomic compare-and-write */
 
 /* Register numbers */
 enum {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 2b1d3fea03fc..4f22cff4491e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1630,6 +1630,16 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
(u32) SRC,
(atomic_t *)(unsigned long) (DST + insn->off));
break;
+   case BPF_XCHG:
+   SRC = (u32) atomic_xchg(
+   (atomic_t *)(unsigned long) (DST + insn->off),
+   (u32) SRC);
+   break;
+   case BPF_CMPXCHG:
+   BPF_R0 = (u32) atomic_cmpxchg(
+   (atomic_t *)(unsigned long) (DST + insn->off),
+   (u32) BPF_R0, (u32) SRC);
+   break;
default:
goto default_label;
}
@@ -1647,6 +1657,16 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
(u64) SRC,

[PATCH bpf-next v6 06/11] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2021-01-12 Thread Brendan Jackman

The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC
instructions, in order to have the previous value of the
atomically-modified memory location loaded into the src register
after an atomic op is carried out.

Suggested-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
Acked-by: John Fastabend 
---
 arch/x86/net/bpf_jit_comp.c|  4 
 include/linux/filter.h |  1 +
 include/uapi/linux/bpf.h   |  3 +++
 kernel/bpf/core.c  | 13 +
 kernel/bpf/disasm.c|  7 +++
 kernel/bpf/verifier.c  | 33 -
 tools/include/linux/filter.h   |  1 +
 tools/include/uapi/linux/bpf.h |  3 +++
 8 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b1829a534da1..eea7d8b0bb12 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -811,6 +811,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
+   case BPF_ADD | BPF_FETCH:
+   /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
+   EMIT2(0x0F, 0xC1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index b65a57d3558a..3501f1fe36bb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,6 +264,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  * Atomic operations:
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
  */
 
 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7bd3671bff20..760ae333a5ed 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -44,6 +44,9 @@
 #define BPF_CALL   0x80/* function call */
 #define BPF_EXIT   0x90/* function return */
 
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH  0x01/* fetch previous value into src reg */
+
 /* Register numbers */
 enum {
BPF_REG_0 = 0,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3abc6b250b18..2b1d3fea03fc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1624,16 +1624,29 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
/* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
atomic_add((u32) SRC, (atomic_t *)(unsigned long)
   (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u32) atomic_fetch_add(
+   (u32) SRC,
+   (atomic_t *)(unsigned long) (DST + insn->off));
+   break;
default:
goto default_label;
}
CONT;
+
STX_ATOMIC_DW:
switch (IMM) {
case BPF_ADD:
/* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
 (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u64) atomic64_fetch_add(
+   (u64) SRC,
+   (atomic64_t *)(unsigned long) (DST + 
insn->off));
+   break;
default:
goto default_label;
}
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 37c8d6e9b4cc..d2e20f6d0516 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
+   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+  insn->imm == (BPF_ADD | BPF_FETCH)) {
+   verbose(cbs->private_data, "(%02x) r%d = 
atomic%s_fetch_add((%s *)(r%d %+d), r%d)\n",
+   insn->code, insn->src_reg,
+   BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
+   bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+   insn->dst_reg, insn->off, insn->src_reg);
} else {
verbo

[PATCH bpf-next v6 11/11] bpf: Document new atomic instructions

2021-01-12 Thread Brendan Jackman

Document new atomic instructions.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 31 +
 1 file changed, 31 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..f6d8f90e9a56 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,8 +1053,39 @@ encoding.
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
 
+The basic atomic operations supported are:
+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.
+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
 Note that 1 and 2 byte atomic operations are not supported.
 
+Except ``BPF_ADD`` _without_ ``BPF_FETCH`` (for legacy reasons), all 4 byte
+atomic operations require alu32 mode. Clang enables this mode by default in
+architecture v3 (``-mcpu=v3``). For older versions it can be enabled with
+``-Xclang -target-feature -Xclang +alu32``.
+
 You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
 the exclusive-add operation encoded when the immediate field is zero.
 
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v6 10/11] bpf: Add tests for new BPF atomic operations

2021-01-12 Thread Brendan Jackman

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

 - Avoid breaking the build for people on old versions of Clang
 - Avoid needing separate lists of test objects for no_alu32, where
   atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 tools/testing/selftests/bpf/Makefile  |   2 +
 .../selftests/bpf/prog_tests/atomics.c| 246 ++
 tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
 .../selftests/bpf/verifier/atomic_and.c   |  77 ++
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
 .../selftests/bpf/verifier/atomic_or.c|  77 ++
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 
 .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
 9 files changed, 881 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index c51df6b91bef..e89b9bb28de3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -390,10 +390,12 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
   $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
 # Define test_progs BPF-GCC-flavored test runner.
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..21efe7bbf10d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog_fd = bpf_program__fd(skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NULL, &retval, &duration);
+   if (CHECK(err || retval, "test_run add",
+ "err %d errno %d retval %d duration %d\n", err, errno, 
retval, duration))
+   goto cleanup;
+
+   ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
+   ASSERT_EQ(skel->bss->add64_result, 1, "add64_result");
+
+   ASSERT_EQ(skel->data->add32_value, 3, "add32_value");
+   ASSERT_EQ(skel->bss->add32_result, 1, "add32_result");
+
+   ASSERT_EQ(skel->bss->add_stack_value_copy, 3, "add_stack_value");
+   ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result");
+
+   ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
+
+cleanup:
+   bpf_link__destroy(link);
+}
+
+static void test_sub(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.sub);
+   if (CHECK(IS_ERR(link), "attach(sub)", "err: %l

[PATCH bpf-next v6 08/11] bpf: Pull out a macro for interpreting atomic ALU operations

2021-01-12 Thread Brendan Jackman

Since the atomic operations that are added in subsequent commits are
all isomorphic with BPF_ADD, pull out a macro to avoid the
interpreter becoming dominated by lines of atomic-related code.

Note that this sacrificies interpreter performance (combining
STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we
need an extra conditional branch to differentiate them) in favour of
compact and (relatively!) simple C code.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 kernel/bpf/core.c | 80 +++
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4f22cff4491e..7b52affc5bd8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1618,55 +1618,53 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
LDX_PROBE(DW, 8)
 #undef LDX_PROBE
 
-   STX_ATOMIC_W:
-   switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
-   atomic_add((u32) SRC, (atomic_t *)(unsigned long)
-  (DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u32) atomic_fetch_add(
-   (u32) SRC,
-   (atomic_t *)(unsigned long) (DST + insn->off));
-   break;
-   case BPF_XCHG:
-   SRC = (u32) atomic_xchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) SRC);
-   break;
-   case BPF_CMPXCHG:
-   BPF_R0 = (u32) atomic_cmpxchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) BPF_R0, (u32) SRC);
+#define ATOMIC_ALU_OP(BOP, KOP)
\
+   case BOP:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   atomic_##KOP((u32) SRC, (atomic_t *)(unsigned 
long) \
+(DST + insn->off));\
+   else\
+   atomic64_##KOP((u64) SRC, (atomic64_t 
*)(unsigned long) \
+  (DST + insn->off));  \
+   break;  \
+   case BOP | BPF_FETCH:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   SRC = (u32) atomic_fetch_##KOP( \
+   (u32) SRC,  \
+   (atomic_t *)(unsigned long) (DST + 
insn->off)); \
+   else\
+   SRC = (u64) atomic64_fetch_##KOP(   \
+   (u64) SRC,  \
+   (atomic64_t *)(unsigned long) (DST + 
insn->off)); \
break;
-   default:
-   goto default_label;
-   }
-   CONT;
 
STX_ATOMIC_DW:
+   STX_ATOMIC_W:
switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
-   atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
-(DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u64) atomic64_fetch_add(
-   (u64) SRC,
-   (atomic64_t *)(unsigned long) (DST + 
insn->off));
-   break;
+   ATOMIC_ALU_OP(BPF_ADD, add)
+#undef ATOMIC_ALU_OP
+
case BPF_XCHG:
-   SRC = (u64) atomic64_xchg(
-   (atomic64_t *)(unsigned long) (DST + insn->off),
-   (u64) SRC);
+   if (BPF_SIZE(insn->code) == BPF_W)
+   SRC = (u32) atomic_xchg(
+   (atomic_t *)(unsigned long) (DST + 
insn->off),
+   (u32) SRC);
+   else
+   SRC = (u64) atomic64_xchg(
+   (atomic64_t *)(unsigned long) (DST + 
insn->off),
+   (u64) SRC);
break;
case BPF_CMPXCHG:
-

[PATCH bpf-next v6 05/11] bpf: Move BPF_STX reserved field check into BPF_STX verifier code

2021-01-12 Thread Brendan Jackman

I can't find a reason why this code is in resolve_pseudo_ldimm64;
since I'll be modifying it in a subsequent commit, tidy it up.

Signed-off-by: Brendan Jackman 
Acked-by: Yonghong Song 
Acked-by: John Fastabend 
---
 kernel/bpf/verifier.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d562268c1fd1..48e398667a69 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9528,6 +9528,12 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
+   if (((BPF_MODE(insn->code) != BPF_MEM &&
+ BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm 
!= 0)) {
+   verbose(env, "BPF_STX uses reserved fields\n");
+   return -EINVAL;
+   }
+
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
if (err)
@@ -9940,13 +9946,6 @@ static int resolve_pseudo_ldimm64(struct 
bpf_verifier_env *env)
return -EINVAL;
}
 
-   if (BPF_CLASS(insn->code) == BPF_STX &&
-   ((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) {
-   verbose(env, "BPF_STX uses reserved fields\n");
-   return -EINVAL;
-   }
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v6 04/11] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm

2021-01-12 Thread Brendan Jackman

A subsequent patch will add additional atomic operations. These new
operations will use the same opcode field as the existing XADD, with
the immediate discriminating different operations.

In preparation, rename the instruction mode BPF_ATOMIC and start
calling the zero immediate BPF_ADD.

This is possible (doesn't break existing valid BPF progs) because the
immediate field is currently reserved MBZ and BPF_ADD is zero.

All uses are removed from the tree but the BPF_XADD definition is
kept around to avoid breaking builds for people including kernel
headers.

Signed-off-by: Brendan Jackman 
Acked-by: Björn Töpel 
---
 Documentation/networking/filter.rst   | 30 +++-
 arch/arm/net/bpf_jit_32.c |  7 ++-
 arch/arm64/net/bpf_jit_comp.c | 16 +--
 arch/mips/net/ebpf_jit.c  | 11 +++--
 arch/powerpc/net/bpf_jit_comp64.c | 25 --
 arch/riscv/net/bpf_jit_comp32.c   | 20 ++--
 arch/riscv/net/bpf_jit_comp64.c   | 16 +--
 arch/s390/net/bpf_jit_comp.c  | 27 ++-
 arch/sparc/net/bpf_jit_comp_64.c  | 17 +--
 arch/x86/net/bpf_jit_comp.c   | 46 ++-
 arch/x86/net/bpf_jit_comp32.c |  6 +--
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 14 --
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 +-
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 15 --
 include/linux/filter.h| 16 +--
 include/uapi/linux/bpf.h  |  5 +-
 kernel/bpf/core.c | 31 +
 kernel/bpf/disasm.c   |  6 ++-
 kernel/bpf/verifier.c | 24 ++
 lib/test_bpf.c| 14 +++---
 samples/bpf/bpf_insn.h|  4 +-
 samples/bpf/cookie_uid_helper_example.c   |  8 ++--
 samples/bpf/sock_example.c|  2 +-
 samples/bpf/test_cgrp2_attach.c   |  5 +-
 tools/include/linux/filter.h  | 15 --
 tools/include/uapi/linux/bpf.h|  5 +-
 .../bpf/prog_tests/cgroup_attach_multi.c  |  4 +-
 .../selftests/bpf/test_cgroup_storage.c   |  2 +-
 tools/testing/selftests/bpf/verifier/ctx.c|  7 ++-
 .../bpf/verifier/direct_packet_access.c   |  4 +-
 .../testing/selftests/bpf/verifier/leak_ptr.c | 10 ++--
 .../selftests/bpf/verifier/meta_access.c  |  4 +-
 tools/testing/selftests/bpf/verifier/unpriv.c |  3 +-
 .../bpf/verifier/value_illegal_alu.c  |  2 +-
 tools/testing/selftests/bpf/verifier/xadd.c   | 18 
 35 files changed, 291 insertions(+), 152 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index debb59e374de..1583d59d806d 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1006,13 +1006,13 @@ Size modifier is one of ...
 
 Mode modifier is one of::
 
-  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
-  BPF_ABS  0x20
-  BPF_IND  0x40
-  BPF_MEM  0x60
-  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
-  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
-  BPF_XADD 0xc0  /* eBPF only, exclusive add */
+  BPF_IMM 0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+  BPF_ABS 0x20
+  BPF_IND 0x40
+  BPF_MEM 0x60
+  BPF_LEN 0x80  /* classic BPF only, reserved in eBPF */
+  BPF_MSH 0xa0  /* classic BPF only, reserved in eBPF */
+  BPF_ATOMIC  0xc0  /* eBPF only, atomic operations */
 
 eBPF has two non-generic instructions: (BPF_ABS |  | BPF_LD) and
 (BPF_IND |  | BPF_LD) which are used to access packet data.
@@ -1044,11 +1044,19 @@ Unlike classic BPF instruction set, eBPF has generic 
load/store operations::
 BPF_MEM |  | BPF_STX:  *(size *) (dst_reg + off) = src_reg
 BPF_MEM |  | BPF_ST:   *(size *) (dst_reg + off) = imm32
 BPF_MEM |  | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
-BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
-BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
 
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
-2 byte atomic increments are not supported.
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
+
+It also includes atomic operations, which use the immediate field for extra
+encoding.
+
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
+
+Note that 1 and 2 byte atomic operations are not supported.
+
+You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
+the exclusive-add operation encoded when the immediate field is zero.
 
 eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM

[PATCH bpf-next v6 09/11] bpf: Add bitwise atomic instructions

2021-01-12 Thread Brendan Jackman

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Signed-off-by: Brendan Jackman 
Acked-by: Yonghong Song 
---
 arch/x86/net/bpf_jit_comp.c  | 50 +++-
 include/linux/filter.h   |  6 +
 kernel/bpf/core.c|  3 +++
 kernel/bpf/disasm.c  | 21 ---
 kernel/bpf/verifier.c|  6 +
 tools/include/linux/filter.h |  6 +
 6 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 308241187582..1d4d50199293 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -808,6 +808,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* emit opcode */
switch (atomic_op) {
case BPF_ADD:
+   case BPF_SUB:
+   case BPF_AND:
+   case BPF_OR:
+   case BPF_XOR:
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
@@ -1292,8 +1296,52 @@ st:  if (is_imm8(insn->off))
 
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+   if (insn->imm == (BPF_AND | BPF_FETCH) ||
+   insn->imm == (BPF_OR | BPF_FETCH) ||
+   insn->imm == (BPF_XOR | BPF_FETCH)) {
+   u8 *branch_target;
+   bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+
+   /*
+* Can't be implemented with a single x86 insn.
+* Need to do a CMPXCHG loop.
+*/
+
+   /* Will need RAX as a CMPXCHG operand so save 
R0 */
+   emit_mov_reg(&prog, true, BPF_REG_AX, 
BPF_REG_0);
+   branch_target = prog;
+   /* Load old value */
+   emit_ldx(&prog, BPF_SIZE(insn->code),
+BPF_REG_0, dst_reg, insn->off);
+   /*
+* Perform the (commutative) operation locally,
+* put the result in the AUX_REG.
+*/
+   emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
+   maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+   EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
+ add_2reg(0xC0, AUX_REG, src_reg));
+   /* Attempt to swap in new value */
+   err = emit_atomic(&prog, BPF_CMPXCHG,
+ dst_reg, AUX_REG, insn->off,
+ BPF_SIZE(insn->code));
+   if (WARN_ON(err))
+   return err;
+   /*
+* ZF tells us whether we won the race. If it's
+* cleared we need to try again.
+*/
+   EMIT2(X86_JNE, -(prog - branch_target) - 2);
+   /* Return the pre-modification value */
+   emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+   /* Restore R0 after clobbering RAX */
+   emit_mov_reg(&prog, true, BPF_REG_0, 
BPF_REG_AX);
+   break;
+
+   }
+
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ insn->off, 
BPF_SIZE(insn->code));
if (err)
return err;
break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ab2b2185f2d7..e05702282e73 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,7 +264,13 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
  * Atomic operations:
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_AND

Re: [PATCH bpf-next] docs: bpf: Fixup atomics documentation

2021-01-18 Thread Brendan Jackman

Thanks for the review :)

On Mon, 18 Jan 2021 at 16:33, Jonathan Corbet  wrote:
>
> On Mon, 18 Jan 2021 11:36:43 +0000
> Brendan Jackman  wrote:
>
> > This fixues up the markup to fix a warning, be more consistent with
> > use of monospace, and use the correct .rst syntax for  (* instead
> > of _). It also clarifies the explanation of Clang's -mcpu
> > requirements for this feature, Alexei pointed out that use of the
> > word "version" was confusing here.
>
> This starts to sound like material for more than one patch...?

Good point, I'll split the markup fixups and actual content change
into separate patches.

> > NB this conflicts with Lukas' patch at [1], here where I've added
> > `::` to fix the warning, I also kept the original ':' which appears
> > in the output text.
>
> And why did you do that?

Hmm, indeed looks like that isn't necessary as long as there are no
spaces between the previous character and the '::'.

v2 incoming...

> > [1] 
> > https://lore.kernel.org/bpf/CA+i-1C3cEXqxcXfD4sibQfx+dtmmzvOzruhk8J5pAw3g5v=k...@mail.gmail.com/T/#t
> >
> > Signed-off-by: Brendan Jackman 
> > ---
> >  Documentation/networking/filter.rst | 30 +++--
> >  1 file changed, 16 insertions(+), 14 deletions(-)
> >
> > diff --git a/Documentation/networking/filter.rst 
> > b/Documentation/networking/filter.rst
> > index f6d8f90e9a56..ba03e90a9163 100644
> > --- a/Documentation/networking/filter.rst
> > +++ b/Documentation/networking/filter.rst
> > @@ -1048,12 +1048,12 @@ Unlike classic BPF instruction set, eBPF has 
> > generic load/store operations::
> >  Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
> >
> >  It also includes atomic operations, which use the immediate field for extra
> > -encoding.
> > +encoding: ::
>
> Things like this read really strangely.  Just say "encoding::" and be done
> with it, please.
>
> Thanks,
>
> jon

[PATCH bpf-next v2 0/2] BPF docs fixups

2021-01-18 Thread Brendan Jackman

Difference from v1->v2 [1]:

 * Split into 2 patches

 * Avoided unnecessary ': ::' in .rst source

 * Tweaked wording of the -mcpu=v3 bit a little more

[1] 
https://lore.kernel.org/bpf/ca+i-1c1lvkjfqlbyk6siiqhxfy0jcr7ubcamj4jced0a9aw...@mail.gmail.com/T/#t

Brendan Jackman (2):
  docs: bpf: Fixup atomics markup
  docs: bpf: Clarify -mcpu=v3 requirement for atomic ops

 Documentation/networking/filter.rst | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)


base-commit: 232164e041e925a920bfd28e63d5233cfad90b73
--
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v2 1/2] docs: bpf: Fixup atomics markup

2021-01-18 Thread Brendan Jackman

This fixues up the markup to fix a warning, be more consistent with
use of monospace, and use the correct .rst syntax for  (* instead
of _).

NB this conflicts with Lukas' patch at [1], which just fixes the
warning. The scope of this one is a little broader.

[1] 
https://lore.kernel.org/bpf/CA+i-1C3cEXqxcXfD4sibQfx+dtmmzvOzruhk8J5pAw3g5v=k...@mail.gmail.com/T/#t

Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index f6d8f90e9a56..4c2bb4c6364d 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1048,12 +1048,12 @@ Unlike classic BPF instruction set, eBPF has generic 
load/store operations::
 Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
 
 It also includes atomic operations, which use the immediate field for extra
-encoding.
+encoding::
 
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
 
-The basic atomic operations supported are:
+The basic atomic operations supported are::
 
 BPF_ADD
 BPF_AND
@@ -1066,12 +1066,12 @@ memory location addresed by ``dst_reg + off`` is 
atomically modified, with
 immediate, then these operations also overwrite ``src_reg`` with the
 value that was in memory before it was modified.
 
-The more special operations are:
+The more special operations are::
 
 BPF_XCHG
 
 This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
-off``.
+off``. ::
 
 BPF_CMPXCHG
 
@@ -1081,18 +1081,19 @@ before is loaded back to ``R0``.
 
 Note that 1 and 2 byte atomic operations are not supported.
 
-Except ``BPF_ADD`` _without_ ``BPF_FETCH`` (for legacy reasons), all 4 byte
+Except ``BPF_ADD`` *without* ``BPF_FETCH`` (for legacy reasons), all 4 byte
 atomic operations require alu32 mode. Clang enables this mode by default in
 architecture v3 (``-mcpu=v3``). For older versions it can be enabled with
 ``-Xclang -target-feature -Xclang +alu32``.
 
-You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
-the exclusive-add operation encoded when the immediate field is zero.
+You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
+referring to the exclusive-add operation encoded when the immediate field is
+zero.
 
-eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
 of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
 instruction that loads 64-bit immediate value into a dst_reg.
-Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
+Classic BPF has similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads
 32-bit immediate value into a register.
 
 eBPF verifier
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next] bpf: Propagate memory bounds to registers in atomics w/ BPF_FETCH

2021-01-18 Thread Brendan Jackman

When BPF_FETCH is set, atomic instructions load a value from memory
into a register. The current verifier code first checks via
check_mem_access whether we can access the memory, and then checks
via check_reg_arg whether we can write into the register.

For loads, check_reg_arg has the side-effect of marking the
register's value as unkonwn, and check_mem_access has the side effect
of propagating bounds from memory to the register.

Therefore with the current order, bounds information is thrown away,
but by simply reversing the order of check_reg_arg
vs. check_mem_access, we can instead propagate bounds smartly.

A simple test is added with an infinite loop that can only be proved
unreachable if this propagation is present.

Note that in the test, the memory value has to be written with two
instructions:

BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),

instead of one:

BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),

Because BPF_ST_MEM doesn't seem to set the stack slot type to 0 when
storing an immediate.

Signed-off-by: Brendan Jackman 
---
 kernel/bpf/verifier.c | 32 +++
 .../selftests/bpf/verifier/atomic_bounds.c| 18 +++
 2 files changed, 36 insertions(+), 14 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_bounds.c

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0f82d5d46e2c..0512695c70f4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3663,9 +3663,26 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
return -EACCES;
}
 
+   if (insn->imm & BPF_FETCH) {
+   if (insn->imm == BPF_CMPXCHG)
+   load_reg = BPF_REG_0;
+   else
+   load_reg = insn->src_reg;
+
+   /* check and record load of old value */
+   err = check_reg_arg(env, load_reg, DST_OP);
+   if (err)
+   return err;
+   } else {
+   /* This instruction accesses a memory location but doesn't
+* actually load it into a register.
+*/
+   load_reg = -1;
+   }
+
/* check whether we can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-  BPF_SIZE(insn->code), BPF_READ, -1, true);
+  BPF_SIZE(insn->code), BPF_READ, load_reg, true);
if (err)
return err;
 
@@ -3675,19 +3692,6 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
if (err)
return err;
 
-   if (!(insn->imm & BPF_FETCH))
-   return 0;
-
-   if (insn->imm == BPF_CMPXCHG)
-   load_reg = BPF_REG_0;
-   else
-   load_reg = insn->src_reg;
-
-   /* check and record load of old value */
-   err = check_reg_arg(env, load_reg, DST_OP);
-   if (err)
-   return err;
-
return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/verifier/atomic_bounds.c 
b/tools/testing/selftests/bpf/verifier/atomic_bounds.c
new file mode 100644
index ..45030165ed63
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_bounds.c
@@ -0,0 +1,18 @@
+{
+   "BPF_ATOMIC bounds propagation, mem->reg",
+   .insns = {
+   /* a = 0; */
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+   /* b = atomic_fetch_add(&a, 1); */
+   BPF_MOV64_IMM(BPF_REG_1, 1),
+   BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, 
BPF_REG_1, -8),
+   /* Verifier should be able to tell that this infinite loop 
isn't reachable. */
+   /* if (b) while (true) continue; */
+   BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -1),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .result_unpriv = REJECT,
+   .errstr_unpriv = "back-edge",
+},

base-commit: 232164e041e925a920bfd28e63d5233cfad90b73
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v2 2/2] docs: bpf: Clarify -mcpu=v3 requirement for atomic ops

2021-01-18 Thread Brendan Jackman

Alexei pointed out [1] that this wording is pretty confusing. Here's
an attempt to be more explicit and clear.

[1] 
https://lore.kernel.org/bpf/CAADnVQJVvwoZsE1K+6qRxzF7+6CvZNzygnoBW9tZNWJELk5c=q...@mail.gmail.com/T/#m07264fc18fdc43af02fc1320968afefcc73d96f4

Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 4c2bb4c6364d..b3f457802836 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1081,9 +1081,10 @@ before is loaded back to ``R0``.
 
 Note that 1 and 2 byte atomic operations are not supported.
 
-Except ``BPF_ADD`` *without* ``BPF_FETCH`` (for legacy reasons), all 4 byte
-atomic operations require alu32 mode. Clang enables this mode by default in
-architecture v3 (``-mcpu=v3``). For older versions it can be enabled with
+Clang can generate atomic instructions by default when ``-mcpu=v3`` is
+enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
+Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to 
enable
+the atomics features, while keeping a lower ``-mcpu`` version, you can use
 ``-Xclang -target-feature -Xclang +alu32``.
 
 You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next] docs: bpf: Fixup atomics documentation

2021-01-18 Thread Brendan Jackman

This fixues up the markup to fix a warning, be more consistent with
use of monospace, and use the correct .rst syntax for  (* instead
of _). It also clarifies the explanation of Clang's -mcpu
requirements for this feature, Alexei pointed out that use of the
word "version" was confusing here.

NB this conflicts with Lukas' patch at [1], here where I've added
`::` to fix the warning, I also kept the original ':' which appears
in the output text.

[1] 
https://lore.kernel.org/bpf/CA+i-1C3cEXqxcXfD4sibQfx+dtmmzvOzruhk8J5pAw3g5v=k...@mail.gmail.com/T/#t

Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 30 +++--
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index f6d8f90e9a56..ba03e90a9163 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1048,12 +1048,12 @@ Unlike classic BPF instruction set, eBPF has generic 
load/store operations::
 Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
 
 It also includes atomic operations, which use the immediate field for extra
-encoding.
+encoding: ::
 
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
 
-The basic atomic operations supported are:
+The basic atomic operations supported are: ::
 
 BPF_ADD
 BPF_AND
@@ -1066,12 +1066,12 @@ memory location addresed by ``dst_reg + off`` is 
atomically modified, with
 immediate, then these operations also overwrite ``src_reg`` with the
 value that was in memory before it was modified.
 
-The more special operations are:
+The more special operations are: ::
 
 BPF_XCHG
 
 This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
-off``.
+off``. ::
 
 BPF_CMPXCHG
 
@@ -1081,19 +1081,21 @@ before is loaded back to ``R0``.
 
 Note that 1 and 2 byte atomic operations are not supported.
 
-Except ``BPF_ADD`` _without_ ``BPF_FETCH`` (for legacy reasons), all 4 byte
-atomic operations require alu32 mode. Clang enables this mode by default in
-architecture v3 (``-mcpu=v3``). For older versions it can be enabled with
-``-Xclang -target-feature -Xclang +alu32``.
+Clang can generate atomic instructions when ``-mcpu=v3`` is enabled (this is 
the
+default). If a lower version for ``-mcpu`` is set, the only atomic instruction
+Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to
+enable the atomics features, while keeping a lower ``-mcpu`` version, you can
+use ``-Xclang -target-feature -Xclang +alu32``.
 
-You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
-the exclusive-add operation encoded when the immediate field is zero.
+You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
+referring to the exclusive-add operation encoded when the immediate field is
+zero.
 
-eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
 of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
-instruction that loads 64-bit immediate value into a dst_reg.
-Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
-32-bit immediate value into a register.
+instruction that loads 64-bit immediate value into a dst_reg.  Classic BPF has
+similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads 32-bit immediate
+value into a register.
 
 eBPF verifier
 -

base-commit: 232164e041e925a920bfd28e63d5233cfad90b73
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v7 00/11] Atomics for eBPF

2021-01-14 Thread Brendan Jackman

that should
fetch a pre-modification value from memory.

So, BPF_XADD is now called BPF_ATOMIC (the old name is kept to avoid
breaking userspace builds), and where we previously had .imm = 0, we
now have .imm = BPF_ADD (which is 0).

Operands


Reg-source eBPF instructions only have two operands, while these
atomic operations have up to four. To avoid needing to encode
additional operands, then:

- One of the input registers is re-used as an output register
  (e.g. atomic_fetch_add both reads from and writes to the source
  register).

- Where necessary (i.e. for cmpxchg) , R0 is "hard-coded" as one of
  the operands.

This approach also allows the new eBPF instructions to map directly
to single x86 instructions.

[1] Previous iterations:
v1: 
https://lore.kernel.org/bpf/20201123173202.1335708-1-jackm...@google.com/
v2: 
https://lore.kernel.org/bpf/20201127175738.1085417-1-jackm...@google.com/
v3: https://lore.kernel.org/bpf/x8kn7na7bjc7a...@google.com/
v4: 
https://lore.kernel.org/bpf/20201207160734.2345502-1-jackm...@google.com/
v5: 
https://lore.kernel.org/bpf/20201215121816.1048557-1-jackm...@google.com/
v6: 
https://lore.kernel.org/bpf/20210112154235.2192781-1-jackm...@google.com/

[2] Visualisation of eBPF opcode space:
https://gist.github.com/bjackman/00fdad2d5dfff601c1918bc29b16e778

[3] Comment from John about propagating bounds in verifier:

https://lore.kernel.org/bpf/5fcf0fbcc8aa8_9ab320853@john-XPS-13-9370.notmuch/

[4] Mail from Andrii about not supporting old Clang in selftests:

https://lore.kernel.org/bpf/CAEf4BzYBddPaEzRUs=jaWSo5kbf=lzdb7geauvj85gxlqzt...@mail.gmail.com/

Brendan Jackman (11):
  bpf: x86: Factor out emission of ModR/M for *(reg + off)
  bpf: x86: Factor out emission of REX byte
  bpf: x86: Factor out a lookup table for some ALU opcodes
  bpf: Rename BPF_XADD and prepare to encode other atomics in .imm
  bpf: Move BPF_STX reserved field check into BPF_STX verifier code
  bpf: Add BPF_FETCH field / create atomic_fetch_add instruction
  bpf: Add instructions for atomic_[cmp]xchg
  bpf: Pull out a macro for interpreting atomic ALU operations
  bpf: Add bitwise atomic instructions
  bpf: Add tests for new BPF atomic operations
  bpf: Document new atomic instructions

 Documentation/networking/filter.rst   |  61 -
 arch/arm/net/bpf_jit_32.c |   7 +-
 arch/arm64/net/bpf_jit_comp.c |  16 +-
 arch/mips/net/ebpf_jit.c  |  11 +-
 arch/powerpc/net/bpf_jit_comp64.c |  25 +-
 arch/riscv/net/bpf_jit_comp32.c   |  20 +-
 arch/riscv/net/bpf_jit_comp64.c   |  16 +-
 arch/s390/net/bpf_jit_comp.c  |  27 +-
 arch/sparc/net/bpf_jit_comp_64.c  |  17 +-
 arch/x86/net/bpf_jit_comp.c   | 217 ++-
 arch/x86/net/bpf_jit_comp32.c |   6 +-
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  |  14 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.h |   4 +-
 .../net/ethernet/netronome/nfp/bpf/verifier.c |  15 +-
 include/linux/filter.h|  25 +-
 include/uapi/linux/bpf.h  |  10 +-
 kernel/bpf/core.c |  67 -
 kernel/bpf/disasm.c   |  43 ++-
 kernel/bpf/verifier.c |  75 --
 lib/test_bpf.c|  14 +-
 samples/bpf/bpf_insn.h|   4 +-
 samples/bpf/cookie_uid_helper_example.c   |   8 +-
 samples/bpf/sock_example.c|   2 +-
 samples/bpf/test_cgrp2_attach.c   |   5 +-
 tools/include/linux/filter.h  |  24 +-
 tools/include/uapi/linux/bpf.h|  10 +-
 tools/testing/selftests/bpf/Makefile  |   2 +
 .../selftests/bpf/prog_tests/atomics.c| 246 ++
 .../bpf/prog_tests/cgroup_attach_multi.c  |   4 +-
 tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
 .../selftests/bpf/test_cgroup_storage.c   |   2 +-
 .../selftests/bpf/verifier/atomic_and.c   |  77 ++
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
 .../selftests/bpf/verifier/atomic_or.c|  77 ++
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 
 .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
 tools/testing/selftests/bpf/verifier/ctx.c|   7 +-
 .../bpf/verifier/direct_packet_access.c   |   4 +-
 .../testing/selftests/bpf/verifier/leak_ptr.c |  10 +-
 .../selftests/bpf/verifier/meta_access.c  |   4 +-
 tools/testing/selftests/bpf/verifier/unpriv.c |   3 +-
 .../bpf/verifier/value_illegal_alu.c  |   2 +-
 tools/testing/selftests/bpf/verifier/xadd.c   |  18 +-
 44 files changed, 1466 insertions(+), 212 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
 create mode 100644 tools/testing/selftests/bpf/progs

[PATCH bpf-next v7 01/11] bpf: x86: Factor out emission of ModR/M for *(reg + off)

2021-01-14 Thread Brendan Jackman

The case for JITing atomics is about to get more complicated. Let's
factor out some common code to make the review and result more
readable.

NB the atomics code doesn't yet use the new helper - a subsequent
patch will add its use as a side-effect of other changes.

Acked-by: John Fastabend 
Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 43 +
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 796506dcfc42..30526776fa78 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -681,6 +681,27 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 
dst_reg, u32 src_reg)
*pprog = prog;
 }
 
+/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
+static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is_imm8(off)) {
+   /* 1-byte signed displacement.
+*
+* If off == 0 we could skip this and save one extra byte, but
+* special case of x86 R13 which always needs an offset is not
+* worth the hassle
+*/
+   EMIT2(add_2reg(0x40, ptr_reg, val_reg), off);
+   } else {
+   /* 4-byte signed displacement */
+   EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off);
+   }
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -708,15 +729,7 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
break;
}
-   /*
-* If insn->off == 0 we can save one extra byte, but
-* special case of x86 R13 which always needs an offset
-* is not worth the hassle
-*/
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+   emit_insn_suffix(&prog, src_reg, dst_reg, off);
*pprog = prog;
 }
 
@@ -751,10 +764,7 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
break;
}
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+   emit_insn_suffix(&prog, dst_reg, src_reg, off);
*pprog = prog;
 }
 
@@ -1240,11 +1250,8 @@ st:  if (is_imm8(insn->off))
goto xadd;
case BPF_STX | BPF_XADD | BPF_DW:
EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
-xadd:  if (is_imm8(insn->off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), 
insn->off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-   insn->off);
+xadd:
+   emit_modrm_dstoff(&prog, dst_reg, src_reg, insn->off);
break;
 
/* call */
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v7 02/11] bpf: x86: Factor out emission of REX byte

2021-01-14 Thread Brendan Jackman

The JIT case for encoding atomic ops is about to get more
complicated. In order to make the review & resulting code easier,
let's factor out some shared helpers.

Signed-off-by: Brendan Jackman 
Acked-by: John Fastabend 
---
 arch/x86/net/bpf_jit_comp.c | 39 ++---
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 30526776fa78..f15c93275a18 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -702,6 +702,21 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 
val_reg, int off)
*pprog = prog;
 }
 
+/*
+ * Emit a REX byte if it will be necessary to address these registers
+ */
+static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is64)
+   EMIT1(add_2mod(0x48, dst_reg, src_reg));
+   else if (is_ereg(dst_reg) || is_ereg(src_reg))
+   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -854,10 +869,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_OR: b2 = 0x09; break;
case BPF_XOR: b2 = 0x31; break;
}
-   if (BPF_CLASS(insn->code) == BPF_ALU64)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_ALU64);
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1302,20 +1315,16 @@ st: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* cmp dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
/* test dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
@@ -1351,10 +1360,8 @@ st:  if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLE | BPF_K:
/* test dst_reg, dst_reg to save one extra byte */
if (imm32 == 0) {
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, dst_reg));
-   else if (is_ereg(dst_reg))
-   EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+   maybe_emit_mod(&prog, dst_reg, dst_reg,
+  BPF_CLASS(insn->code) == 
BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
goto emit_cond_jmp;
}
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v7 07/11] bpf: Add instructions for atomic_[cmp]xchg

2021-01-14 Thread Brendan Jackman

This adds two atomic opcodes, both of which include the BPF_FETCH
flag. XCHG without the BPF_FETCH flag would naturally encode
atomic_set. This is not supported because it would be of limited
value to userspace (it doesn't imply any barriers). CMPXCHG without
BPF_FETCH woulud be an atomic compare-and-write. We don't have such
an operation in the kernel so it isn't provided to BPF either.

There are two significant design decisions made for the CMPXCHG
instruction:

 - To solve the issue that this operation fundamentally has 3
   operands, but we only have two register fields. Therefore the
   operand we compare against (the kernel's API calls it 'old') is
   hard-coded to be R0. x86 has similar design (and A64 doesn't
   have this problem).

   A potential alternative might be to encode the other operand's
   register number in the immediate field.

 - The kernel's atomic_cmpxchg returns the old value, while the C11
   userspace APIs return a boolean indicating the comparison
   result. Which should BPF do? A64 returns the old value. x86 returns
   the old value in the hard-coded register (and also sets a
   flag). That means return-old-value is easier to JIT, so that's
   what we use.

Signed-off-by: Brendan Jackman 
Acked-by: Yonghong Song 
---
 arch/x86/net/bpf_jit_comp.c|  8 
 include/linux/filter.h |  2 ++
 include/uapi/linux/bpf.h   |  4 +++-
 kernel/bpf/core.c  | 20 
 kernel/bpf/disasm.c| 15 +++
 kernel/bpf/verifier.c  | 19 +--
 tools/include/linux/filter.h   |  2 ++
 tools/include/uapi/linux/bpf.h |  4 +++-
 8 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index eea7d8b0bb12..308241187582 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
EMIT2(0x0F, 0xC1);
break;
+   case BPF_XCHG:
+   /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
+   EMIT1(0x87);
+   break;
+   case BPF_CMPXCHG:
+   /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
+   EMIT2(0x0F, 0xB1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 23fca41b8540..d563820f197d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
  *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
+ *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
  */
 
 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ea262b009049..c001766adcbc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
 #define BPF_EXIT   0x90/* function return */
 
 /* atomic op type fields (stored in immediate) */
-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_FETCH  0x01/* not an opcode on its own, used to build 
others */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
+#define BPF_CMPXCHG(0xf0 | BPF_FETCH)  /* atomic compare-and-write */
 
 /* Register numbers */
 enum {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 28d6000463e4..4df6daba43ef 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1630,6 +1630,16 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
(u32) SRC,
(atomic_t *)(unsigned long) (DST + insn->off));
break;
+   case BPF_XCHG:
+   SRC = (u32) atomic_xchg(
+   (atomic_t *)(unsigned long) (DST + insn->off),
+   (u32) SRC);
+   break;
+   case BPF_CMPXCHG:
+   BPF_R0 = (u32) atomic_cmpxchg(
+   (atomic_t *)(unsigned long) (DST + insn->off),
+   (u32) BPF_R0, (u32) SRC);
+   break;
default:
goto default_label;
}
@@ -1647,6 +1657,16 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
(u64) SRC,

[PATCH bpf-next v7 03/11] bpf: x86: Factor out a lookup table for some ALU opcodes

2021-01-14 Thread Brendan Jackman

A later commit will need to lookup a subset of these opcodes. To
avoid duplicating code, pull out a table.

The shift opcodes won't be needed by that later commit, but they're
already duplicated, so fold them into the table anyway.

Signed-off-by: Brendan Jackman 
Acked-by: John Fastabend 
---
 arch/x86/net/bpf_jit_comp.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f15c93275a18..93f32e0ba0ef 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -205,6 +205,18 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+/* Some 1-byte opcodes for binary ALU operations */
+static u8 simple_alu_opcodes[] = {
+   [BPF_ADD] = 0x01,
+   [BPF_SUB] = 0x29,
+   [BPF_AND] = 0x21,
+   [BPF_OR] = 0x09,
+   [BPF_XOR] = 0x31,
+   [BPF_LSH] = 0xE0,
+   [BPF_RSH] = 0xE8,
+   [BPF_ARSH] = 0xF8,
+};
+
 static void jit_fill_hole(void *area, unsigned int size)
 {
/* Fill whole space with INT3 instructions */
@@ -862,15 +874,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_ALU64 | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
-   switch (BPF_OP(insn->code)) {
-   case BPF_ADD: b2 = 0x01; break;
-   case BPF_SUB: b2 = 0x29; break;
-   case BPF_AND: b2 = 0x21; break;
-   case BPF_OR: b2 = 0x09; break;
-   case BPF_XOR: b2 = 0x31; break;
-   }
maybe_emit_mod(&prog, dst_reg, src_reg,
   BPF_CLASS(insn->code) == BPF_ALU64);
+   b2 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1050,12 +1056,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
-
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
if (imm32 == 1)
EMIT2(0xD1, add_1reg(b3, dst_reg));
else
@@ -1089,11 +1090,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(0xD3, add_1reg(b3, dst_reg));
 
if (src_reg != BPF_REG_4)
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v7 04/11] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm

2021-01-14 Thread Brendan Jackman

A subsequent patch will add additional atomic operations. These new
operations will use the same opcode field as the existing XADD, with
the immediate discriminating different operations.

In preparation, rename the instruction mode BPF_ATOMIC and start
calling the zero immediate BPF_ADD.

This is possible (doesn't break existing valid BPF progs) because the
immediate field is currently reserved MBZ and BPF_ADD is zero.

All uses are removed from the tree but the BPF_XADD definition is
kept around to avoid breaking builds for people including kernel
headers.

Signed-off-by: Brendan Jackman 
Acked-by: Björn Töpel 
---
 Documentation/networking/filter.rst   | 30 +++-
 arch/arm/net/bpf_jit_32.c |  7 ++-
 arch/arm64/net/bpf_jit_comp.c | 16 +--
 arch/mips/net/ebpf_jit.c  | 11 +++--
 arch/powerpc/net/bpf_jit_comp64.c | 25 --
 arch/riscv/net/bpf_jit_comp32.c   | 20 ++--
 arch/riscv/net/bpf_jit_comp64.c   | 16 +--
 arch/s390/net/bpf_jit_comp.c  | 27 ++-
 arch/sparc/net/bpf_jit_comp_64.c  | 17 +--
 arch/x86/net/bpf_jit_comp.c   | 46 ++-
 arch/x86/net/bpf_jit_comp32.c |  6 +--
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 14 --
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 +-
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 15 --
 include/linux/filter.h| 16 +--
 include/uapi/linux/bpf.h  |  5 +-
 kernel/bpf/core.c | 31 +
 kernel/bpf/disasm.c   |  6 ++-
 kernel/bpf/verifier.c | 24 ++
 lib/test_bpf.c| 14 +++---
 samples/bpf/bpf_insn.h|  4 +-
 samples/bpf/cookie_uid_helper_example.c   |  8 ++--
 samples/bpf/sock_example.c|  2 +-
 samples/bpf/test_cgrp2_attach.c   |  5 +-
 tools/include/linux/filter.h  | 15 --
 tools/include/uapi/linux/bpf.h|  5 +-
 .../bpf/prog_tests/cgroup_attach_multi.c  |  4 +-
 .../selftests/bpf/test_cgroup_storage.c   |  2 +-
 tools/testing/selftests/bpf/verifier/ctx.c|  7 ++-
 .../bpf/verifier/direct_packet_access.c   |  4 +-
 .../testing/selftests/bpf/verifier/leak_ptr.c | 10 ++--
 .../selftests/bpf/verifier/meta_access.c  |  4 +-
 tools/testing/selftests/bpf/verifier/unpriv.c |  3 +-
 .../bpf/verifier/value_illegal_alu.c  |  2 +-
 tools/testing/selftests/bpf/verifier/xadd.c   | 18 
 35 files changed, 291 insertions(+), 152 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index debb59e374de..1583d59d806d 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1006,13 +1006,13 @@ Size modifier is one of ...
 
 Mode modifier is one of::
 
-  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
-  BPF_ABS  0x20
-  BPF_IND  0x40
-  BPF_MEM  0x60
-  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
-  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
-  BPF_XADD 0xc0  /* eBPF only, exclusive add */
+  BPF_IMM 0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+  BPF_ABS 0x20
+  BPF_IND 0x40
+  BPF_MEM 0x60
+  BPF_LEN 0x80  /* classic BPF only, reserved in eBPF */
+  BPF_MSH 0xa0  /* classic BPF only, reserved in eBPF */
+  BPF_ATOMIC  0xc0  /* eBPF only, atomic operations */
 
 eBPF has two non-generic instructions: (BPF_ABS |  | BPF_LD) and
 (BPF_IND |  | BPF_LD) which are used to access packet data.
@@ -1044,11 +1044,19 @@ Unlike classic BPF instruction set, eBPF has generic 
load/store operations::
 BPF_MEM |  | BPF_STX:  *(size *) (dst_reg + off) = src_reg
 BPF_MEM |  | BPF_ST:   *(size *) (dst_reg + off) = imm32
 BPF_MEM |  | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
-BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
-BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
 
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
-2 byte atomic increments are not supported.
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
+
+It also includes atomic operations, which use the immediate field for extra
+encoding.
+
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
+
+Note that 1 and 2 byte atomic operations are not supported.
+
+You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
+the exclusive-add operation encoded when the immediate field is zero.
 
 eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM

[PATCH bpf-next v7 08/11] bpf: Pull out a macro for interpreting atomic ALU operations

2021-01-14 Thread Brendan Jackman

Since the atomic operations that are added in subsequent commits are
all isomorphic with BPF_ADD, pull out a macro to avoid the
interpreter becoming dominated by lines of atomic-related code.

Note that this sacrificies interpreter performance (combining
STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we
need an extra conditional branch to differentiate them) in favour of
compact and (relatively!) simple C code.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 kernel/bpf/core.c | 80 +++
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4df6daba43ef..8669e685825f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1618,55 +1618,53 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
LDX_PROBE(DW, 8)
 #undef LDX_PROBE
 
-   STX_ATOMIC_W:
-   switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
-   atomic_add((u32) SRC, (atomic_t *)(unsigned long)
-  (DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u32) atomic_fetch_add(
-   (u32) SRC,
-   (atomic_t *)(unsigned long) (DST + insn->off));
-   break;
-   case BPF_XCHG:
-   SRC = (u32) atomic_xchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) SRC);
-   break;
-   case BPF_CMPXCHG:
-   BPF_R0 = (u32) atomic_cmpxchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) BPF_R0, (u32) SRC);
+#define ATOMIC_ALU_OP(BOP, KOP)
\
+   case BOP:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   atomic_##KOP((u32) SRC, (atomic_t *)(unsigned 
long) \
+(DST + insn->off));\
+   else\
+   atomic64_##KOP((u64) SRC, (atomic64_t 
*)(unsigned long) \
+  (DST + insn->off));  \
+   break;  \
+   case BOP | BPF_FETCH:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   SRC = (u32) atomic_fetch_##KOP( \
+   (u32) SRC,  \
+   (atomic_t *)(unsigned long) (DST + 
insn->off)); \
+   else\
+   SRC = (u64) atomic64_fetch_##KOP(   \
+   (u64) SRC,  \
+   (atomic64_t *)(unsigned long) (DST + 
insn->off)); \
break;
-   default:
-   goto default_label;
-   }
-   CONT;
 
STX_ATOMIC_DW:
+   STX_ATOMIC_W:
switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
-   atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
-(DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u64) atomic64_fetch_add(
-   (u64) SRC,
-   (atomic64_t *)(unsigned long) (DST + 
insn->off));
-   break;
+   ATOMIC_ALU_OP(BPF_ADD, add)
+#undef ATOMIC_ALU_OP
+
case BPF_XCHG:
-   SRC = (u64) atomic64_xchg(
-   (atomic64_t *)(unsigned long) (DST + insn->off),
-   (u64) SRC);
+   if (BPF_SIZE(insn->code) == BPF_W)
+   SRC = (u32) atomic_xchg(
+   (atomic_t *)(unsigned long) (DST + 
insn->off),
+   (u32) SRC);
+   else
+   SRC = (u64) atomic64_xchg(
+   (atomic64_t *)(unsigned long) (DST + 
insn->off),
+   (u64) SRC);
break;
case BPF_CMPXCHG:
-

[PATCH bpf-next v7 11/11] bpf: Document new atomic instructions

2021-01-14 Thread Brendan Jackman

Document new atomic instructions.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 31 +
 1 file changed, 31 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..f6d8f90e9a56 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,8 +1053,39 @@ encoding.
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
 
+The basic atomic operations supported are:
+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.
+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
 Note that 1 and 2 byte atomic operations are not supported.
 
+Except ``BPF_ADD`` _without_ ``BPF_FETCH`` (for legacy reasons), all 4 byte
+atomic operations require alu32 mode. Clang enables this mode by default in
+architecture v3 (``-mcpu=v3``). For older versions it can be enabled with
+``-Xclang -target-feature -Xclang +alu32``.
+
 You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
 the exclusive-add operation encoded when the immediate field is zero.
 
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v7 05/11] bpf: Move BPF_STX reserved field check into BPF_STX verifier code

2021-01-14 Thread Brendan Jackman

I can't find a reason why this code is in resolve_pseudo_ldimm64;
since I'll be modifying it in a subsequent commit, tidy it up.

Signed-off-by: Brendan Jackman 
Acked-by: Yonghong Song 
Acked-by: John Fastabend 
---
 kernel/bpf/verifier.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cfc137b81ac6..d8a85f4e5b95 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9528,6 +9528,12 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
+   if (((BPF_MODE(insn->code) != BPF_MEM &&
+ BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm 
!= 0)) {
+   verbose(env, "BPF_STX uses reserved fields\n");
+   return -EINVAL;
+   }
+
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
if (err)
@@ -10012,13 +10018,6 @@ static int resolve_pseudo_ldimm64(struct 
bpf_verifier_env *env)
return -EINVAL;
}
 
-   if (BPF_CLASS(insn->code) == BPF_STX &&
-   ((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) {
-   verbose(env, "BPF_STX uses reserved fields\n");
-   return -EINVAL;
-   }
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
-- 
2.30.0.284.gd98b1dd5eaa7-goog

[PATCH bpf-next v7 10/11] bpf: Add tests for new BPF atomic operations

2021-01-14 Thread Brendan Jackman

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

 - Avoid breaking the build for people on old versions of Clang
 - Avoid needing separate lists of test objects for no_alu32, where
   atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Acked-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
 tools/testing/selftests/bpf/Makefile  |   2 +
 .../selftests/bpf/prog_tests/atomics.c| 246 ++
 tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
 .../selftests/bpf/verifier/atomic_and.c   |  77 ++
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
 .../selftests/bpf/verifier/atomic_or.c|  77 ++
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 
 .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
 9 files changed, 881 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 7f8667ad113e..0552b07717b6 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -414,10 +414,12 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
   $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
 # Define test_progs BPF-GCC-flavored test runner.
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..21efe7bbf10d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog_fd = bpf_program__fd(skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NULL, &retval, &duration);
+   if (CHECK(err || retval, "test_run add",
+ "err %d errno %d retval %d duration %d\n", err, errno, 
retval, duration))
+   goto cleanup;
+
+   ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
+   ASSERT_EQ(skel->bss->add64_result, 1, "add64_result");
+
+   ASSERT_EQ(skel->data->add32_value, 3, "add32_value");
+   ASSERT_EQ(skel->bss->add32_result, 1, "add32_result");
+
+   ASSERT_EQ(skel->bss->add_stack_value_copy, 3, "add_stack_value");
+   ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result");
+
+   ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
+
+cleanup:
+   bpf_link__destroy(link);
+}
+
+static void test_sub(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.sub);
+   if (CHECK(IS_ERR(link), "attach(sub)", "err: %l

[PATCH bpf-next v7 09/11] bpf: Add bitwise atomic instructions

2021-01-14 Thread Brendan Jackman

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Signed-off-by: Brendan Jackman 
Acked-by: Yonghong Song 
---
 arch/x86/net/bpf_jit_comp.c  | 50 +++-
 include/linux/filter.h   |  6 +
 kernel/bpf/core.c|  3 +++
 kernel/bpf/disasm.c  | 21 ---
 kernel/bpf/verifier.c|  6 +
 tools/include/linux/filter.h |  6 +
 6 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 308241187582..1d4d50199293 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -808,6 +808,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* emit opcode */
switch (atomic_op) {
case BPF_ADD:
+   case BPF_SUB:
+   case BPF_AND:
+   case BPF_OR:
+   case BPF_XOR:
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
@@ -1292,8 +1296,52 @@ st:  if (is_imm8(insn->off))
 
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+   if (insn->imm == (BPF_AND | BPF_FETCH) ||
+   insn->imm == (BPF_OR | BPF_FETCH) ||
+   insn->imm == (BPF_XOR | BPF_FETCH)) {
+   u8 *branch_target;
+   bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+
+   /*
+* Can't be implemented with a single x86 insn.
+* Need to do a CMPXCHG loop.
+*/
+
+   /* Will need RAX as a CMPXCHG operand so save 
R0 */
+   emit_mov_reg(&prog, true, BPF_REG_AX, 
BPF_REG_0);
+   branch_target = prog;
+   /* Load old value */
+   emit_ldx(&prog, BPF_SIZE(insn->code),
+BPF_REG_0, dst_reg, insn->off);
+   /*
+* Perform the (commutative) operation locally,
+* put the result in the AUX_REG.
+*/
+   emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
+   maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+   EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
+ add_2reg(0xC0, AUX_REG, src_reg));
+   /* Attempt to swap in new value */
+   err = emit_atomic(&prog, BPF_CMPXCHG,
+ dst_reg, AUX_REG, insn->off,
+ BPF_SIZE(insn->code));
+   if (WARN_ON(err))
+   return err;
+   /*
+* ZF tells us whether we won the race. If it's
+* cleared we need to try again.
+*/
+   EMIT2(X86_JNE, -(prog - branch_target) - 2);
+   /* Return the pre-modification value */
+   emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+   /* Restore R0 after clobbering RAX */
+   emit_mov_reg(&prog, true, BPF_REG_0, 
BPF_REG_AX);
+   break;
+
+   }
+
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ insn->off, 
BPF_SIZE(insn->code));
if (err)
return err;
break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d563820f197d..7fdce5407214 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,7 +264,13 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
  * Atomic operations:
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_AND

[PATCH bpf-next v7 06/11] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2021-01-14 Thread Brendan Jackman

The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC
instructions, in order to have the previous value of the
atomically-modified memory location loaded into the src register
after an atomic op is carried out.

Suggested-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
Acked-by: John Fastabend 
---
 arch/x86/net/bpf_jit_comp.c|  4 
 include/linux/filter.h |  1 +
 include/uapi/linux/bpf.h   |  3 +++
 kernel/bpf/core.c  | 13 +
 kernel/bpf/disasm.c|  7 +++
 kernel/bpf/verifier.c  | 33 -
 tools/include/linux/filter.h   |  1 +
 tools/include/uapi/linux/bpf.h |  3 +++
 8 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b1829a534da1..eea7d8b0bb12 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -811,6 +811,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
+   case BPF_ADD | BPF_FETCH:
+   /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
+   EMIT2(0x0F, 0xC1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 392e94b79668..23fca41b8540 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,6 +264,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
  * Atomic operations:
  *
  *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
  */
 
 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6b3996343e63..ea262b009049 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -44,6 +44,9 @@
 #define BPF_CALL   0x80/* function call */
 #define BPF_EXIT   0x90/* function return */
 
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH  0x01/* fetch previous value into src reg */
+
 /* Register numbers */
 enum {
BPF_REG_0 = 0,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4836ebf459cf..28d6000463e4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1624,16 +1624,29 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
/* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
atomic_add((u32) SRC, (atomic_t *)(unsigned long)
   (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u32) atomic_fetch_add(
+   (u32) SRC,
+   (atomic_t *)(unsigned long) (DST + insn->off));
+   break;
default:
goto default_label;
}
CONT;
+
STX_ATOMIC_DW:
switch (IMM) {
case BPF_ADD:
/* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
 (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u64) atomic64_fetch_add(
+   (u64) SRC,
+   (atomic64_t *)(unsigned long) (DST + 
insn->off));
+   break;
default:
goto default_label;
}
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 37c8d6e9b4cc..d2e20f6d0516 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
+   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+  insn->imm == (BPF_ADD | BPF_FETCH)) {
+   verbose(cbs->private_data, "(%02x) r%d = 
atomic%s_fetch_add((%s *)(r%d %+d), r%d)\n",
+   insn->code, insn->src_reg,
+   BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
+   bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+   insn->dst_reg, insn->off, insn->src_reg);
} else {
verbo

Re: [PATCH bpf-next v3 07/14] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-04 Thread Brendan Jackman

On Thu, Dec 03, 2020 at 09:27:04PM -0800, Yonghong Song wrote:
> On 12/3/20 8:02 AM, Brendan Jackman wrote:
[...]
> > diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
> > index 37c8d6e9b4cc..3ee2246a52ef 100644
> > --- a/kernel/bpf/disasm.c
> > +++ b/kernel/bpf/disasm.c
> > @@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> > bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> > insn->dst_reg, insn->off,
> > insn->src_reg);
> > +   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> > +  insn->imm == (BPF_ADD | BPF_FETCH)) {
> > +   verbose(cbs->private_data, "(%02x) r%d = 
> > atomic%s_fetch_add(*(%s *)(r%d %+d), r%d)\n",
> 
> We should not do dereference here (withough first *), right? since the input
> is actually an address. something like below?
>r2 = atomic[64]_fetch_add((u64/u32 *)(r3 +40), r2)

Ah yep  - thanks!

[...]
> > diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
> > index 95ff51d97f25..ac7701678e1a 100644
> > --- a/tools/include/linux/filter.h
> > +++ b/tools/include/linux/filter.h
> > @@ -180,6 +180,16 @@
> > .imm   = BPF_ADD })
> >   #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */
> > +/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + 
> > off), src_reg); */
> 
> Maybe src_reg = atomic_fetch_add(dst_reg + off, src_reg)?

Yep - and the same for the bitwise ops in the later patch.

Re: [PATCH bpf-next v3 08/14] bpf: Add instructions for atomic_[cmp]xchg

2020-12-04 Thread Brendan Jackman

O Thu, Dec 03, 2020 at 09:34:23PM -0800, Yonghong Song wrote:
> On 12/3/20 8:02 AM, Brendan Jackman wrote:
> > This adds two atomic opcodes, both of which include the BPF_FETCH
> > flag. XCHG without the BPF_FETCh flag would naturally encode
> 
> BPF_FETCh => BPF_FETCH

Thanks, sorry I think you've already pointed that one out and I didn't fix it!

> > atomic_set. This is not supported because it would be of limited
> > value to userspace (it doesn't imply any barriers). CMPXCHG without
> > BPF_FETCH woulud be an atomic compare-and-write. We don't have such
> > an operation in the kernel so it isn't provided to BPF either.
> > 
> > There are two significant design decisions made for the CMPXCHG
> > instruction:
> > 
> >   - To solve the issue that this operation fundamentally has 3
> > operands, but we only have two register fields. Therefore the
> > operand we compare against (the kernel's API calls it 'old') is
> > hard-coded to be R0. x86 has similar design (and A64 doesn't
> > have this problem).
> > 
> > A potential alternative might be to encode the other operand's
> > register number in the immediate field.
> > 
> >   - The kernel's atomic_cmpxchg returns the old value, while the C11
> > userspace APIs return a boolean indicating the comparison
> > result. Which should BPF do? A64 returns the old value. x86 returns
> > the old value in the hard-coded register (and also sets a
> > flag). That means return-old-value is easier to JIT.
> > 
> > Signed-off-by: Brendan Jackman 
> 
> Ack with minor comments in the above and below.

Thanks, ack to all the comments.

Have run a `grep -r "atomic_.*(\*" *.patch` - hopefully we're now free
of this mistake where the first arg is dereferenced in the
comments/disasm...

> Acked-by: Yonghong Song 
> 
> > Change-Id: I3f19ad867dfd08515eecf72674e6fdefe28424bb
> > ---
> >   arch/x86/net/bpf_jit_comp.c|  8 
> >   include/linux/filter.h | 20 
> >   include/uapi/linux/bpf.h   |  4 +++-
> >   kernel/bpf/core.c  | 20 
> >   kernel/bpf/disasm.c| 15 +++
> >   kernel/bpf/verifier.c  | 19 +--
> >   tools/include/linux/filter.h   | 20 
> >   tools/include/uapi/linux/bpf.h |  4 +++-
> >   8 files changed, 106 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index 88cb09fa3bfb..7d29bc3bb4ff 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -831,6 +831,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
> > /* src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */
> > EMIT2(0x0F, 0xC1);
> > break;
> > +   case BPF_XCHG:
> > +   /* src_reg = atomic_xchg(*(u32/u64*)(dst_reg + off), src_reg); 
> > */
> 
> src_reg = atomic_xchg((u32/u64*)(dst_reg + off), src_reg)?
> 
> > +   EMIT1(0x87);
> > +   break;
> > +   case BPF_CMPXCHG:
> > +   /* r0 = atomic_cmpxchg(*(u32/u64*)(dst_reg + off), r0, 
> > src_reg); */
> 
> r0 = atomic_cmpxchg((u32/u64*)(dst_reg + off), r0, src_reg)?
> 
> > +   EMIT2(0x0F, 0xB1);
> > +   break;
> > default:
> > pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
> > return -EFAULT;
> > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > index 4e04d0fc454f..6186280715ed 100644
> > --- a/include/linux/filter.h
> > +++ b/include/linux/filter.h
> > @@ -280,6 +280,26 @@ static inline bool insn_is_zext(const struct bpf_insn 
> > *insn)
> > .off   = OFF,   \
> > .imm   = BPF_ADD | BPF_FETCH })
> > +/* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */
> 
> src_reg = atomic_xchg(dst_reg + off, src_reg)?
> 
> > +
> > +#define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)   \
> > +   ((struct bpf_insn) {\
> > +   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
> > +   .dst_reg = DST, \
> > +   .src_reg = SRC, \
> > +   .off   = OFF,   \
> > +   .imm   = BPF_XCHG  })
> > +
> > +/* Atomic compare-exchange, r

Re: [PATCH bpf-next v3 09/14] bpf: Pull out a macro for interpreting atomic ALU operations

2020-12-04 Thread Brendan Jackman

On Thu, Dec 03, 2020 at 10:30:18PM -0800, Yonghong Song wrote:
> 
> 
> On 12/3/20 8:02 AM, Brendan Jackman wrote:
> > Since the atomic operations that are added in subsequent commits are
> > all isomorphic with BPF_ADD, pull out a macro to avoid the
> > interpreter becoming dominated by lines of atomic-related code.
> > 
> > Note that this sacrificies interpreter performance (combining
> > STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we
> > need an extra conditional branch to differentiate them) in favour of
> > compact and (relatively!) simple C code.
> > 
> > Change-Id: I8cae5b66e75f34393de6063b91c05a8006fdd9e6
> > Signed-off-by: Brendan Jackman 
> 
> Ack with a minor suggestion below.
> 
> Acked-by: Yonghong Song 
> 
> > ---
> >   kernel/bpf/core.c | 79 +++
> >   1 file changed, 38 insertions(+), 41 deletions(-)
> > 
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index 28f960bc2e30..498d3f067be7 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -1618,55 +1618,52 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
> > bpf_insn *insn, u64 *stack)
> > LDX_PROBE(DW, 8)
> >   #undef LDX_PROBE
> > -   STX_ATOMIC_W:
> > -   switch (IMM) {
> > -   case BPF_ADD:
> > -   /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
> > -   atomic_add((u32) SRC, (atomic_t *)(unsigned long)
> > -  (DST + insn->off));
> > -   break;
> > -   case BPF_ADD | BPF_FETCH:
> > -   SRC = (u32) atomic_fetch_add(
> > -   (u32) SRC,
> > -   (atomic_t *)(unsigned long) (DST + insn->off));
> > -   break;
> > -   case BPF_XCHG:
> > -   SRC = (u32) atomic_xchg(
> > -   (atomic_t *)(unsigned long) (DST + insn->off),
> > -   (u32) SRC);
> > -   break;
> > -   case BPF_CMPXCHG:
> > -   BPF_R0 = (u32) atomic_cmpxchg(
> > -   (atomic_t *)(unsigned long) (DST + insn->off),
> > -   (u32) BPF_R0, (u32) SRC);
> > +#define ATOMIC(BOP, KOP)   \
> 
> ATOMIC a little bit generic. Maybe ATOMIC_FETCH_BOP?

Well it doesn't fetch in all cases and "BOP" is intended to
differentiate from KOP i.e. BOP = BPF operation KOP = Kernel operation.

Could go with ATOMIC_ALU_OP?

> > +   case BOP:   \
> > +   if (BPF_SIZE(insn->code) == BPF_W)  \
> > +   atomic_##KOP((u32) SRC, (atomic_t *)(unsigned 
> > long) \
> > +(DST + insn->off));\
> > +   else\
> > +   atomic64_##KOP((u64) SRC, (atomic64_t 
> > *)(unsigned long) \
> > +  (DST + insn->off));  \
> > +   break;  \
> > +   case BOP | BPF_FETCH:   \
> > +   if (BPF_SIZE(insn->code) == BPF_W)  \
> > +   SRC = (u32) atomic_fetch_##KOP( \
> > +   (u32) SRC,  \
> > +   (atomic_t *)(unsigned long) (DST + 
> > insn->off)); \
> > +   else\
> > +   SRC = (u64) atomic64_fetch_##KOP(   \
> > +   (u64) SRC,  \
> > +   (atomic64_t *)(s64) (DST + insn->off)); 
> > \
> > break;
> > -   default:
> > -   goto default_label;
> > -   }
> > -   CONT;
> > STX_ATOMIC_DW:
> > +   STX_ATOMIC_W:
> > switch (IMM) {
> > -   case BPF_ADD:
> > -   /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
> > -   atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
> > -(DST + insn->off));
> > -   break;
> > -   case BPF_ADD | BPF_FETCH

Re: [PATCH bpf-next v3 10/14] bpf: Add bitwise atomic instructions

2020-12-04 Thread Brendan Jackman

On Thu, Dec 03, 2020 at 10:42:19PM -0800, Yonghong Song wrote:
> 
> 
> On 12/3/20 8:02 AM, Brendan Jackman wrote:
> > This adds instructions for
> > 
> > atomic[64]_[fetch_]and
> > atomic[64]_[fetch_]or
> > atomic[64]_[fetch_]xor
> > 
> > All these operations are isomorphic enough to implement with the same
> > verifier, interpreter, and x86 JIT code, hence being a single commit.
> > 
> > The main interesting thing here is that x86 doesn't directly support
> > the fetch_ version these operations, so we need to generate a CMPXCHG
> > loop in the JIT. This requires the use of two temporary registers,
> > IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.
> > 
> > Change-Id: I340b10cecebea8cb8a52e3606010cde547a10ed4
> > Signed-off-by: Brendan Jackman 
> > ---
> >   arch/x86/net/bpf_jit_comp.c  | 50 +-
> >   include/linux/filter.h   | 60 
> >   kernel/bpf/core.c|  5 ++-
> >   kernel/bpf/disasm.c  | 21 ++---
> >   kernel/bpf/verifier.c|  6 
> >   tools/include/linux/filter.h | 60 
> >   6 files changed, 196 insertions(+), 6 deletions(-)
> > 
[...]
> > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > index 6186280715ed..698f82897b0d 100644
> > --- a/include/linux/filter.h
> > +++ b/include/linux/filter.h
> > @@ -280,6 +280,66 @@ static inline bool insn_is_zext(const struct bpf_insn 
> > *insn)
[...]
> > +#define BPF_ATOMIC_FETCH_XOR(SIZE, DST, SRC, OFF)  \
> > +   ((struct bpf_insn) {\
> > +   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
> > +   .dst_reg = DST, \
> > +   .src_reg = SRC, \
> > +   .off   = OFF,   \
> > +   .imm   = BPF_XOR | BPF_FETCH })
> > +
> >   /* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */
> 
> Looks like BPF_ATOMIC_XOR/OR/AND/... all similar to each other.
> The same is for BPF_ATOMIC_FETCH_XOR/OR/AND/...
> 
> I am wondering whether it makes sence to have to
> BPF_ATOMIC_BOP(BOP, SIZE, DST, SRC, OFF) and
> BPF_ATOMIC_FETCH_BOP(BOP, SIZE, DST, SRC, OFF)
> can have less number of macros?

Hmm yeah I think that's probably a good idea, it would be consistent
with the macros for non-atomic ALU ops.

I don't think 'BOP' would be very clear though, 'ALU' might be more
obvious.

Re: [PATCH bpf-next v3 12/14] bpf: Pull tools/build/feature biz into selftests Makefile

2020-12-04 Thread Brendan Jackman

On Thu, Dec 03, 2020 at 01:01:27PM -0800, Andrii Nakryiko wrote:
> On Thu, Dec 3, 2020 at 8:07 AM Brendan Jackman  wrote:
> >
> > This is somewhat cargo-culted from the libbpf build. It will be used
> > in a subsequent patch to query for Clang BPF atomics support.
> >
> > Change-Id: I9318a1702170eb752acced35acbb33f45126c44c
> 
> Haven't seen this before. What's this Change-Id business?

Argh, apologies. Looks like it's time for me to adopt a less error-prone
workflow for sending patches.

(This is noise from Gerrit, which we sometimes use for internal reviews)

> > Signed-off-by: Brendan Jackman 
> > ---
> >  tools/testing/selftests/bpf/.gitignore |  1 +
> >  tools/testing/selftests/bpf/Makefile   | 38 ++
> >  2 files changed, 39 insertions(+)
> 
> All this just to detect the support for clang atomics?... Let's not
> pull in the entire feature-detection framework unnecessarily,
> selftests Makefile is complicated enough without that.

Then the test build would break for people who haven't updated Clang.
Is that acceptable?

I'm aware of cases where you need to be on a pretty fresh Clang for
tests to _pass_ so maybe it's fine.

Re: [PATCH bpf-next v3 13/14] bpf: Add tests for new BPF atomic operations

2020-12-04 Thread Brendan Jackman

On Thu, Dec 03, 2020 at 11:06:31PM -0800, Yonghong Song wrote:
> On 12/3/20 8:02 AM, Brendan Jackman wrote:
[...]
> > diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c 
> > b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
> > new file mode 100644
> > index ..66f0ccf4f4ec
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
> > @@ -0,0 +1,262 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include 
> > +
> > +
> > +#include "atomics_test.skel.h"
> > +
> > +static struct atomics_test *setup(void)
> > +{
> > +   struct atomics_test *atomics_skel;
> > +   __u32 duration = 0, err;
> > +
> > +   atomics_skel = atomics_test__open_and_load();
> > +   if (CHECK(!atomics_skel, "atomics_skel_load", "atomics skeleton 
> > failed\n"))
> > +   return NULL;
> > +
> > +   if (atomics_skel->data->skip_tests) {
> > +   printf("%s:SKIP:no ENABLE_ATOMICS_TEST (missing Clang BPF 
> > atomics support)",
> > +  __func__);
> > +   test__skip();
> > +   goto err;
> > +   }
> > +
> > +   err = atomics_test__attach(atomics_skel);
> > +   if (CHECK(err, "atomics_attach", "atomics attach failed: %d\n", err))
> > +   goto err;
> > +
> > +   return atomics_skel;
> > +
> > +err:
> > +   atomics_test__destroy(atomics_skel);
> > +   return NULL;
> > +}
> > +
> > +static void test_add(void)
> > +{
> > +   struct atomics_test *atomics_skel;
> > +   int err, prog_fd;
> > +   __u32 duration = 0, retval;
> > +
> > +   atomics_skel = setup();
> 
> When running the test, I observed a noticeable delay between skel load and
> skel attach. The reason is the bpf program object file contains
> multiple programs and the above setup() tries to do attachment
> for ALL programs but actually below only "add" program is tested.
> This will unnecessarily increase test_progs running time.
> 
> The best is for setup() here only load and attach program "add".
> The libbpf API bpf_program__set_autoload() can set a particular
> program not autoload. You can call attach function explicitly
> for one specific program. This should be able to reduce test
> running time.

Interesting, thanks a lot - I'll try this out next week. Maybe we can
actually load all the progs once at the beginning (i.e. in
test_atomics_test) then attach/detch each prog individually as needed...
Sorry, I haven't got much of a grip on libbpf yet.

Re: [PATCH v2 bpf-next 02/13] bpf: x86: Factor out emission of REX byte

2020-12-01 Thread Brendan Jackman

On Sat, Nov 28, 2020 at 05:14:05PM -0800, Alexei Starovoitov wrote:
> On Fri, Nov 27, 2020 at 05:57:27PM +0000, Brendan Jackman wrote:
> > The JIT case for encoding atomic ops is about to get more
> > complicated. In order to make the review & resulting code easier,
> > let's factor out some shared helpers.
> > 
> > Signed-off-by: Brendan Jackman 
> > ---
> >  arch/x86/net/bpf_jit_comp.c | 39 ++---
> >  1 file changed, 23 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index 94b17bd30e00..a839c1a54276 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -702,6 +702,21 @@ static void emit_modrm_dstoff(u8 **pprog, u32 r1, u32 
> > r2, int off)
> > *pprog = prog;
> >  }
> >  
> > +/*
> > + * Emit a REX byte if it will be necessary to address these registers
> 
> What is "REX byte" ?
> May be rename it to maybe_emit_mod() ?

Er, this is the REX prefix as described in
https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix

Would maybe_emit_mod be accurate? In my mind "mod" is a field in the
ModR/M byte which comes _after_ the opcode. Before developing this
patchset I knew almost nothing about x86, so maybe I'm missing something
about the general terminology?

> > + */
> > +static void maybe_emit_rex(u8 **pprog, u32 reg_rm, u32 reg_reg, bool wide)
> 
> could you please keep original names as dst_reg/src_reg instead of 
> reg_rm/reg_reg ?
> reg_reg reads really odd and reg_rm is equally puzzling unless the reader 
> studied
> intel's manual. I didn't. All these new abbreviations are challenging for me.

OK. I originally changed it to use the x86 names because in theory you
could do:

  maybe_emit_rex(&prog, src_reg, dst_reg);

so the names would look backwards when you jump into the function
implementation.

> > +{
> > +   u8 *prog = *pprog;
> > +   int cnt = 0;
> > +
> > +   if (wide)
> 
> what is 'wide' ? Why not to call it 'bool is_alu64' ?

Ack - there's precedent in the file for 'is64' so I'll go with that.

Re: [PATCH v2 bpf-next 01/13] bpf: x86: Factor out emission of ModR/M for *(reg + off)

2020-12-01 Thread Brendan Jackman

On Sat, Nov 28, 2020 at 05:15:52PM -0800, Alexei Starovoitov wrote:
> On Fri, Nov 27, 2020 at 05:57:26PM +0000, Brendan Jackman wrote:
> > +/* Emit the ModR/M byte for addressing *(r1 + off) and r2 */
> > +static void emit_modrm_dstoff(u8 **pprog, u32 r1, u32 r2, int off)
> 
> same concern as in the another patch. If you could avoid intel's puzzling 
> names
> like above it will make reviewing the patch easier.

In this case there is actually a call like

  emit_modrm_dstoff(&prog, src_reg, dst_reg)

So calling the function args dst_reg, src_reg would be misleading.

I could call them ptr_reg and val_reg or something?

Re: [PATCH v2 bpf-next 05/13] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm

2020-12-01 Thread Brendan Jackman

On Fri, Nov 27, 2020 at 07:43:46PM -0800, Yonghong Song wrote:
> 
> 
> On 11/27/20 9:57 AM, Brendan Jackman wrote:
> > diff --git a/Documentation/networking/filter.rst 
> > b/Documentation/networking/filter.rst
> [...]
> > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c 
> > b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> > index 0a721f6e8676..1c9efc74edfc 100644
> > --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> > +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> > @@ -3109,13 +3109,19 @@ mem_xadd(struct nfp_prog *nfp_prog, struct 
> > nfp_insn_meta *meta, bool is64)
> > return 0;
> >   }
> > -static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
> > +static int mem_atomic4(struct nfp_prog *nfp_prog, struct nfp_insn_meta 
> > *meta)
> >   {
> > +   if (meta->insn.off != BPF_ADD)
> > +   return -EOPNOTSUPP;
> 
> You probably missed this change. it should be meta->insn.imm != BPF_ADD.
> 
> > +
> > return mem_xadd(nfp_prog, meta, false);
> >   }
> > -static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
> > +static int mem_atomic8(struct nfp_prog *nfp_prog, struct nfp_insn_meta 
> > *meta)
> >   {
> > +   if (meta->insn.off != BPF_ADD)
> 
> same as above.

Dang. Many thanks for the careful review!

Re: [PATCH v2 bpf-next 07/13] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-01 Thread Brendan Jackman

On Fri, Nov 27, 2020 at 08:15:49PM -0800, Yonghong Song wrote:
> 
> 
> On 11/27/20 9:57 AM, Brendan Jackman wrote:
[...]
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index e8b41ccdfb90..cd4c03b25573 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -3602,7 +3602,11 @@ static int check_atomic(struct bpf_verifier_env 
> > *env, int insn_idx, struct bpf_i
> >   {
> > int err;
> > -   if (insn->imm != BPF_ADD) {
> > +   switch (insn->imm) {
> > +   case BPF_ADD:
> > +   case BPF_ADD | BPF_FETCH:
> > +   break;
> > +   default:
> > verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", 
> > insn->imm);
> > return -EINVAL;
> > }
> > @@ -3631,7 +3635,7 @@ static int check_atomic(struct bpf_verifier_env *env, 
> > int insn_idx, struct bpf_i
> > is_pkt_reg(env, insn->dst_reg) ||
> > is_flow_key_reg(env, insn->dst_reg) ||
> > is_sk_reg(env, insn->dst_reg)) {
> > -   verbose(env, "atomic stores into R%d %s is not allowed\n",
> > +   verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
> > insn->dst_reg,
> > reg_type_str[reg_state(env, insn->dst_reg)->type]);
> > return -EACCES;
> > @@ -3644,8 +3648,20 @@ static int check_atomic(struct bpf_verifier_env 
> > *env, int insn_idx, struct bpf_i
> > return err;
> > /* check whether we can write into the same memory */
> > -   return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> > -   BPF_SIZE(insn->code), BPF_WRITE, -1, true);
> > +   err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> > +  BPF_SIZE(insn->code), BPF_WRITE, -1, true);
> > +   if (err)
> > +   return err;
> > +
> > +   if (!(insn->imm & BPF_FETCH))
> > +   return 0;
> > +
> > +   /* check and record load of old value into src reg  */
> > +   err = check_reg_arg(env, insn->src_reg, DST_OP);
> > +   if (err)
> > +   return err;
> > +
> > +   return 0;
> >   }
> >   static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
> > @@ -9501,12 +9517,6 @@ static int do_check(struct bpf_verifier_env *env)
> > } else if (class == BPF_STX) {
> > enum bpf_reg_type *prev_dst_type, dst_reg_type;
> > -   if (((BPF_MODE(insn->code) != BPF_MEM &&
> > - BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm 
> > != 0)) {
> > -   verbose(env, "BPF_STX uses reserved fields\n");
> > -   return -EINVAL;
> > -   }
> > -
> > if (BPF_MODE(insn->code) == BPF_ATOMIC) {
> > err = check_atomic(env, env->insn_idx, insn);
> > if (err)
> > @@ -9515,6 +9525,11 @@ static int do_check(struct bpf_verifier_env *env)
> > continue;
> > }
> > +   if (BPF_MODE(insn->code) != BPF_MEM && insn->imm != 0) {
> 
> "||" here instead of "&&"?

Right - thanks again!

Re: [PATCH v2 bpf-next 08/13] bpf: Add instructions for atomic_[cmp]xchg

2020-12-01 Thread Brendan Jackman

On Fri, Nov 27, 2020 at 09:25:53PM -0800, Yonghong Song wrote:
> 
> 
> On 11/27/20 9:57 AM, Brendan Jackman wrote:
> > This adds two atomic opcodes, both of which include the BPF_FETCH
> > flag. XCHG without the BPF_FETCh flag would naturally encode
> 
> BPF_FETCH

Ack, thanks

> > atomic_set. This is not supported because it would be of limited
> > value to userspace (it doesn't imply any barriers). CMPXCHG without
> > BPF_FETCH woulud be an atomic compare-and-write. We don't have such
> > an operation in the kernel so it isn't provided to BPF either.
> > 
> > There are two significant design decisions made for the CMPXCHG
> > instruction:
> > 
> >   - To solve the issue that this operation fundamentally has 3
> > operands, but we only have two register fields. Therefore the
> > operand we compare against (the kernel's API calls it 'old') is
> > hard-coded to be R0. x86 has similar design (and A64 doesn't
> > have this problem).
> > 
> > A potential alternative might be to encode the other operand's
> > register number in the immediate field.
> > 
> >   - The kernel's atomic_cmpxchg returns the old value, while the C11
> > userspace APIs return a boolean indicating the comparison
> > result. Which should BPF do? A64 returns the old value. x86 returns
> > the old value in the hard-coded register (and also sets a
> > flag). That means return-old-value is easier to JIT.
> > 
> > Signed-off-by: Brendan Jackman 
> > ---
> >   arch/x86/net/bpf_jit_comp.c|  8 
> >   include/linux/filter.h | 20 
> >   include/uapi/linux/bpf.h   |  4 +++-
> >   kernel/bpf/core.c  | 20 
> >   kernel/bpf/disasm.c| 15 +++
> >   kernel/bpf/verifier.c  | 19 +--
> >   tools/include/linux/filter.h   | 20 
> >   tools/include/uapi/linux/bpf.h |  4 +++-
> >   8 files changed, 106 insertions(+), 4 deletions(-)
> > 
> [...]
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index cd4c03b25573..c8311cc114ec 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -3601,10 +3601,13 @@ static int check_mem_access(struct bpf_verifier_env 
> > *env, int insn_idx, u32 regn
> >   static int check_atomic(struct bpf_verifier_env *env, int insn_idx, 
> > struct bpf_insn *insn)
> >   {
> > int err;
> > +   int load_reg;
> > switch (insn->imm) {
> > case BPF_ADD:
> > case BPF_ADD | BPF_FETCH:
> > +   case BPF_XCHG:
> > +   case BPF_CMPXCHG:
> > break;
> > default:
> > verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", 
> > insn->imm);
> > @@ -3626,6 +3629,13 @@ static int check_atomic(struct bpf_verifier_env 
> > *env, int insn_idx, struct bpf_i
> > if (err)
> > return err;
> > +   if (insn->imm == BPF_CMPXCHG) {
> > +   /* check src3 operand */
> 
> better comment about what src3 means here?

Ack,  adding "Check comparison of R0 with memory location"

Re: [PATCH v2 bpf-next 08/13] bpf: Add instructions for atomic_[cmp]xchg

2020-12-01 Thread Brendan Jackman

On Sat, Nov 28, 2020 at 05:27:48PM -0800, Alexei Starovoitov wrote:
> On Fri, Nov 27, 2020 at 05:57:33PM +0000, Brendan Jackman wrote:
> >  
> >  /* atomic op type fields (stored in immediate) */
> > -#define BPF_FETCH  0x01/* fetch previous value into src reg */
> > +#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
> > +#define BPF_CMPXCHG(0xf0 | BPF_FETCH)  /* atomic 
> > compare-and-write */
> > +#define BPF_FETCH  0x01/* fetch previous value into src reg or r0*/
> 
> I think such comment is more confusing than helpful.
> I'd just say that the fetch bit is not valid on its own.
> It's used to build other instructions like cmpxchg and atomic_fetch_add.

OK sounds good.

> > +   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> > +  insn->imm == (BPF_CMPXCHG)) {
> 
> redundant ().

Ack, thanks

> > +   verbose(cbs->private_data, "(%02x) r0 = 
> > atomic%s_cmpxchg(*(%s *)(r%d %+d), r0, r%d)\n",
> > +   insn->code,
> > +   BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> > +   bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> > +   insn->dst_reg, insn->off,
> > +   insn->src_reg);
> > +   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> > +  insn->imm == (BPF_XCHG)) {
> 
> redundant ().

Ack, thanks

Re: [PATCH v2 bpf-next 10/13] bpf: Add instructions for atomic[64]_[fetch_]sub

2020-12-01 Thread Brendan Jackman

On Mon, Nov 30, 2020 at 09:18:09AM -0800, Yonghong Song wrote:
> On 11/28/20 5:34 PM, Alexei Starovoitov wrote:
> > On Fri, Nov 27, 2020 at 09:35:07PM -0800, Yonghong Song wrote:
> > > On 11/27/20 9:57 AM, Brendan Jackman wrote:
[...]
> > > > +#define BPF_ATOMIC_SUB(SIZE, DST, SRC, OFF)\
> > > > +   ((struct bpf_insn) {\
> > > > +   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
> > > > +   .dst_reg = DST, \
> > > > +   .src_reg = SRC, \
> > > > +   .off   = OFF,   \
> > > > +   .imm   = BPF_SUB })
> > > 
> > > Currently, llvm does not support XSUB, should we support it in llvm?
> > > At source code, as implemented in JIT, user can just do a negate
> > > followed by xadd.
> > 
> > I forgot we have BPF_NEG insn :)
> > Indeed it's probably easier to handle atomic_fetch_sub() builtin
> > completely on llvm side. It can generate bpf_neg followed by 
> > atomic_fetch_add.
> 
> Just tried. llvm selectiondag won't be able to automatically
> convert atomic_fetch_sub to neg + atomic_fetch_add. So there
> will be a need in BPFInstrInfo.td to match atomic_fetch_sub IR
> pattern. I will experiment this together with xsub.
> 
> > No need to burden verifier, interpreter and JITs with it.
> > 

I guess it's also worth remembering other archs might have an atomic
subtract.

Re: [PATCH v2 bpf-next 12/13] bpf: Add tests for new BPF atomic operations

2020-12-01 Thread Brendan Jackman

On Mon, Nov 30, 2020 at 07:55:02PM -0800, Yonghong Song wrote:
> On 11/27/20 9:57 AM, Brendan Jackman wrote:
[...]
> > diff --git a/tools/testing/selftests/bpf/Makefile 
> > b/tools/testing/selftests/bpf/Makefile
> > index 3d5940cd110d..5eadfd09037d 100644
> > --- a/tools/testing/selftests/bpf/Makefile
> > +++ b/tools/testing/selftests/bpf/Makefile
> > @@ -228,6 +228,12 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E -  > \
> > grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
> >   MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
> > +# Determine if Clang supports BPF arch v4, and therefore atomics.
> > +CLANG_SUPPORTS_V4=$(if $(findstring v4,$(shell $(CLANG) --target=bpf 
> > -mcpu=? 2>&1)),true,)
> > +ifeq ($(CLANG_SUPPORTS_V4),true)
> > +   CFLAGS += -DENABLE_ATOMICS_TESTS
> > +endif
> > +
> >   CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
> >   BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) 
> > \
> >  -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
> > @@ -250,7 +256,9 @@ define CLANG_BPF_BUILD_RULE
> > $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
> > $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
> > -c $1 -o - || echo "BPF obj compilation failed") |  \
> > -   $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
> > +   $(LLC) -mattr=dwarfris -march=bpf   \
> > +   -mcpu=$(if $(CLANG_SUPPORTS_V4),v4,v3)  \
> > +   $4 -filetype=obj -o $2
> >   endef
> >   # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
> >   define CLANG_NOALU32_BPF_BUILD_RULE
> > @@ -391,7 +399,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c 
> > trace_helpers.c  \
> >   TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
> > \
> >$(wildcard progs/btf_dump_test_case_*.c)
> >   TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
> > -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
> > +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(if 
> > $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,)
> 
> If the compiler indeed supports cpu v4 (i.e., atomic insns),
> -DENABLE_ATOMICS_TESTS will be added to TRUNNER_BPF_FLAGS and
> eventually -DENABLE_ATOMICS_TESTS is also available for
> no-alu32 test and this will cause compilation error.
> 
> I did the following hack to workaround the issue, i.e., only adds
> the definition to default (alu32) test run.
> 
> index 5eadfd09037d..3d1320fd93eb 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -230,9 +230,6 @@ MENDIAN=$(if
> $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
> 
>  # Determine if Clang supports BPF arch v4, and therefore atomics.
>  CLANG_SUPPORTS_V4=$(if $(findstring v4,$(shell $(CLANG) --target=bpf
> -mcpu=? 2>&1)),true,)
> -ifeq ($(CLANG_SUPPORTS_V4),true)
> -   CFLAGS += -DENABLE_ATOMICS_TESTS
> -endif
> 
>  CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
>  BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  \
> @@ -255,6 +252,7 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
>  define CLANG_BPF_BUILD_RULE
> $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
> $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
> +   $(if $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,)  \
> -c $1 -o - || echo "BPF obj compilation failed") |  \
> $(LLC) -mattr=dwarfris -march=bpf   \
> -mcpu=$(if $(CLANG_SUPPORTS_V4),v4,v3)  \
> @@ -399,7 +397,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c
> trace_helpers.c  \
>  TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read  \
>$(wildcard progs/btf_dump_test_case_*.c)
>  TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
> -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(if
> $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,)
> +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
>  TRUNNER_BPF_LDFLAGS := -mattr=+alu32
>  $(eval $(call DEFINE_TEST_RUNNER,test_progs))

Ah, good point. I think your "hack" actually improves the overall result
anyway since it avoids the akward global mutation of CFLAGS. Thanks!

I wonder if we should actually have Clang define a built-in macro to say
that the atomics are supported?

> > diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c 
> &

Re: [PATCH v2 bpf-next 01/13] bpf: x86: Factor out emission of ModR/M for *(reg + off)

2020-12-02 Thread Brendan Jackman

Tue, Dec 01, 2020 at 09:50:00PM -0800, Alexei Starovoitov wrote:
> On Tue, Dec 1, 2020 at 4:14 AM Brendan Jackman  wrote:
> >
> > On Sat, Nov 28, 2020 at 05:15:52PM -0800, Alexei Starovoitov wrote:
> > > On Fri, Nov 27, 2020 at 05:57:26PM +, Brendan Jackman wrote:
> > > > +/* Emit the ModR/M byte for addressing *(r1 + off) and r2 */
> > > > +static void emit_modrm_dstoff(u8 **pprog, u32 r1, u32 r2, int off)
> > >
> > > same concern as in the another patch. If you could avoid intel's puzzling 
> > > names
> > > like above it will make reviewing the patch easier.
> >
> > In this case there is actually a call like
> >
> >   emit_modrm_dstoff(&prog, src_reg, dst_reg)
> 
> emit_insn_prefix() ?

Ah sorry, I thought you were talking about the _arg_ names.

This isn't a prefix, but emit_insn_suffix sounds good.

Re: [PATCH v2 bpf-next 02/13] bpf: x86: Factor out emission of REX byte

2020-12-02 Thread Brendan Jackman

On Tue, Dec 01, 2020 at 09:48:36PM -0800, Alexei Starovoitov wrote:
> On Tue, Dec 1, 2020 at 4:12 AM Brendan Jackman  wrote:
> >
> > On Sat, Nov 28, 2020 at 05:14:05PM -0800, Alexei Starovoitov wrote:
> > > On Fri, Nov 27, 2020 at 05:57:27PM +, Brendan Jackman wrote:
> > > > The JIT case for encoding atomic ops is about to get more
> > > > complicated. In order to make the review & resulting code easier,
> > > > let's factor out some shared helpers.
> > > >
> > > > Signed-off-by: Brendan Jackman 
> > > > ---
> > > >  arch/x86/net/bpf_jit_comp.c | 39 ++---
> > > >  1 file changed, 23 insertions(+), 16 deletions(-)
> > > >
> > > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > > > index 94b17bd30e00..a839c1a54276 100644
> > > > --- a/arch/x86/net/bpf_jit_comp.c
> > > > +++ b/arch/x86/net/bpf_jit_comp.c
> > > > @@ -702,6 +702,21 @@ static void emit_modrm_dstoff(u8 **pprog, u32 r1, 
> > > > u32 r2, int off)
> > > > *pprog = prog;
> > > >  }
> > > >
> > > > +/*
> > > > + * Emit a REX byte if it will be necessary to address these registers
> > >
> > > What is "REX byte" ?
> > > May be rename it to maybe_emit_mod() ?
> >
> > Er, this is the REX prefix as described in
> > https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
> >
> > Would maybe_emit_mod be accurate? In my mind "mod" is a field in the
> > ModR/M byte which comes _after_ the opcode. Before developing this
> > patchset I knew almost nothing about x86, so maybe I'm missing something
> > about the general terminology?
> 
> I wrote the JIT without looking into the manual and without studying
> the terminology.
> Why? Because it was not necessary. I still don't see a reason why
> that obscure terminology needs to be brought in into the code.
> 'mod' to me is a 'modifier'. Nothing to do with intel's modrm thing.

OK, calling it maybe_emit_mod(pprog, dst_reg, src_reg)

Re: [PATCH v2 bpf-next 10/13] bpf: Add instructions for atomic[64]_[fetch_]sub

2020-12-02 Thread Brendan Jackman

On Tue, Dec 01, 2020 at 09:55:22PM -0800, Alexei Starovoitov wrote:
> On Tue, Dec 1, 2020 at 4:38 AM Brendan Jackman  wrote:
> >
> > I guess it's also worth remembering other archs might have an atomic
> > subtract.
> 
> which one?
> arm64 LSE implements atomic_fetch_sub as neg+ldadd.
> imo x64 and arm64 example outweighs choices by other archs if there are such.
> Even without LSE it will be neg+llsc loop.
> The reason I proposed bpf xsub insn earlier is that I thought that llvm
> won't be able to emit it so easily and JIT/verifier would struggle.

Ack, I'll drop the atomic subtract instruction.

Re: [PATCH v2 bpf-next 12/13] bpf: Add tests for new BPF atomic operations

2020-12-02 Thread Brendan Jackman

On Tue, Dec 01, 2020 at 06:22:50PM -0800, Andrii Nakryiko wrote:
> On Fri, Nov 27, 2020 at 10:01 AM Brendan Jackman  wrote:
[...]
> > +
> > +static void test_xchg(void)
> > +{
> > +   struct atomics_test *atomics_skel = NULL;
> 
> nit: = NULL is unnecessary
[...[
> > +   CHECK(atomics_skel->data->xchg32_value != 2, "xchg32_value",
> > + "32bit xchg left unexpected value (got %d want 2)\n",
> > + atomics_skel->data->xchg32_value);
> > +   CHECK(atomics_skel->bss->xchg32_result != 1, "xchg_result",
> > + "32bit xchg returned bad result (got %d want 1)\n",
> > + atomics_skel->bss->xchg32_result);
> 
> ASSERT_EQ() is less verbose.
> 
> > +
> > +cleanup:
> > +   atomics_test__destroy(atomics_skel);
> > +}
> > +
> > +void test_atomics_test(void)
> > +{
> 
> why the gigantic #ifdef/#else block if you could do the check here,
> skip and exit?
> 
> > +   test_add();
> > +   test_sub();
> > +   test_and();
> > +   test_or();
> > +   test_xor();
> > +   test_cmpxchg();
> > +   test_xchg();
> 
> 
> please model these as sub-tests, it will be easier to debug, if anything
> 
> > +}
> > +
> > +#else /* ENABLE_ATOMICS_TESTS */
> > +
> > +void test_atomics_test(void)
> > +{
> > +   printf("%s:SKIP:no ENABLE_ATOMICS_TEST (missing Clang BPF atomics 
> > support)",
> > +  __func__);
> > +   test__skip();
> > +}
> > +
> > +#endif /* ENABLE_ATOMICS_TESTS */
> > diff --git a/tools/testing/selftests/bpf/progs/atomics_test.c 
> > b/tools/testing/selftests/bpf/progs/atomics_test.c
> > new file mode 100644
> > index ..3139b00937e5
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/atomics_test.c
> > @@ -0,0 +1,124 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#ifdef ENABLE_ATOMICS_TESTS
> > +
> > +__u64 add64_value = 1;
> > +__u64 add64_result = 0;
> > +__u32 add32_value = 1;
> > +__u32 add32_result = 0;
> > +__u64 add_stack_value_copy = 0;
> > +__u64 add_stack_result = 0;
> 
> empty line here
> 
> > +SEC("fentry/bpf_fentry_test1")
> > +int BPF_PROG(add, int a)
> > +{
> > +   __u64 add_stack_value = 1;
> > +
> > +   add64_result = __sync_fetch_and_add(&add64_value, 2);
> > +   add32_result = __sync_fetch_and_add(&add32_value, 2);
> > +   add_stack_result = __sync_fetch_and_add(&add_stack_value, 2);
> > +   add_stack_value_copy = add_stack_value;
> > +
> > +   return 0;
> > +}
> > +
> > +__s64 sub64_value = 1;
> > +__s64 sub64_result = 0;
> > +__s32 sub32_value = 1;
> > +__s32 sub32_result = 0;
> > +__s64 sub_stack_value_copy = 0;
> > +__s64 sub_stack_result = 0;
> 
> same
> 
> > +SEC("fentry/bpf_fentry_test1")
> > +int BPF_PROG(sub, int a)
> > +{
> > +   __u64 sub_stack_value = 1;
> > +
> > +   sub64_result = __sync_fetch_and_sub(&sub64_value, 2);
> > +   sub32_result = __sync_fetch_and_sub(&sub32_value, 2);
> > +   sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2);
> > +   sub_stack_value_copy = sub_stack_value;
> > +
> > +   return 0;
> > +}
> > +
> > +__u64 and64_value = (0x110ull << 32);
> > +__u64 and64_result = 0;
> > +__u32 and32_value = 0x110;
> > +__u32 and32_result = 0;
> 
> yep
> 
> > +SEC("fentry/bpf_fentry_test1")
> > +int BPF_PROG(and, int a)
> > +{
> > +
> > +   and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32);
> > +   and32_result = __sync_fetch_and_and(&and32_value, 0x011);
> > +
> > +   return 0;
> > +}
> > +
> > +__u64 or64_value = (0x110ull << 32);
> > +__u64 or64_result = 0;
> > +__u32 or32_value = 0x110;
> > +__u32 or32_result = 0;
> 
> here too
> 
> > +SEC("fentry/bpf_fentry_test1")
> > +int BPF_PROG(or, int a)
> > +{
> > +   or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32);
> > +   or32_result = __sync_fetch_and_or(&or32_value, 0x011);
> > +
> > +   return 0;
> > +}
> > +
> > +__u64 xor64_value = (0x110ull << 32);
> > +__u64 xor64_result = 0;
> > +__u32 xor32_value = 0x110;
> > +__u32 xor32_result = 0;
> 
> you get the idea... How often do you define global variables in
> user-space code right next to the function without an extra line
> between them?..
> 
[...]
> > +   cmpxchg64_result_succeed = __sync_val_compare_and_swap(
> > +   &cmpxchg64_value, 1, 2);
> > +
> > +   cmpxchg32_result_fail = __sync_val_compare_and_swap(
> > +   &cmpxchg32_value, 0, 3);
> > +   cmpxchg32_result_succeed = __sync_val_compare_and_swap(
> > +   &cmpxchg32_value, 1, 2);
> 
> single lines are fine here and much more readable

Thanks, ack to all comments.

[PATCH bpf-next] tools/resolve_btfids: Fix some error messages

2020-12-03 Thread Brendan Jackman

Add missing newlines and fix polarity of strerror argument.

Signed-off-by: Brendan Jackman 
---
 tools/bpf/resolve_btfids/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index dfa540d8a02d..e3ea569ee125 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -454,7 +454,7 @@ static int symbols_collect(struct object *obj)
return -ENOMEM;
 
if (id->addr_cnt >= ADDR_CNT) {
-   pr_err("FAILED symbol %s crossed the number of allowed 
lists",
+   pr_err("FAILED symbol %s crossed the number of allowed 
lists\n",
id->name);
return -1;
}
@@ -477,8 +477,8 @@ static int symbols_resolve(struct object *obj)
btf = btf__parse(obj->btf ?: obj->path, NULL);
err = libbpf_get_error(btf);
if (err) {
-   pr_err("FAILED: load BTF from %s: %s",
-   obj->path, strerror(err));
+   pr_err("FAILED: load BTF from %s: %s\n",
+   obj->path, strerror(-err));
return -1;
}
 

base-commit: 97306be45fbe7a02461c3c2a57e666cf662b1aaf
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next] bpf: Fix cold build of test_progs-no_alu32

2020-12-03 Thread Brendan Jackman

This object lives inside the trunner output dir,
i.e. tools/testing/selftests/bpf/no_alu32/btf_data.o

At some point it gets copied into the parent directory during another
part of the build, but that doesn't happen when building
test_progs-no_alu32 from clean.

Signed-off-by: Brendan Jackman 
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 894192c319fb..371b022d932c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -378,7 +378,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)   
\
 | $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
-   $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
+   $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
 
 endef
 

base-commit: 97306be45fbe7a02461c3c2a57e666cf662b1aaf
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 00/14] Atomics for eBPF

2020-12-03 Thread Brendan Jackman

Status of the patches
=

Thanks for the reviews! Differences from v2->v3 [1]:

* More minor fixes and naming/comment changes

* Dropped atomic subtract: compilers can implement this by preceding
  an atomic add with a NEG instruction (which is what the x86 JIT did
  under the hood anyway).

* Dropped the use of -mcpu=v4 in the Clang BPF command-line; there is
  no longer an architecture version bump. Instead a feature test is
  added to Kbuild - it builds a source file to check if Clang
  supports BPF atomics.

* Fixed the prog_test so it no longer breaks
  test_progs-no_alu32. This requires some ifdef acrobatics to avoid
  complicating the prog_tests model where the same userspace code
  exercises both the normal and no_alu32 BPF test objects, using the
  same skeleton header.

Differences from v1->v2 [1]:

* Fixed mistakes in the netronome driver

* Addd sub, add, or, xor operations

* The above led to some refactors to keep things readable. (Maybe I
  should have just waited until I'd implemented these before starting
  the review...)

* Replaced BPF_[CMP]SET | BPF_FETCH with just BPF_[CMP]XCHG, which
  include the BPF_FETCH flag

* Added a bit of documentation. Suggestions welcome for more places
  to dump this info...

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in https://reviews.llvm.org/D72184

This only includes a JIT implementation for x86_64 - I don't plan to
implement JIT support myself for other architectures.

Operations
==

This patchset adds atomic operations to the eBPF instruction set. The
use-case that motivated this work was a trivial and efficient way to
generate globally-unique cookies in BPF progs, but I think it's
obvious that these features are pretty widely applicable.  The
instructions that are added here can be summarised with this list of
kernel operations:

* atomic[64]_[fetch_]add
* atomic[64]_[fetch_]and
* atomic[64]_[fetch_]or
* atomic[64]_xchg
* atomic[64]_cmpxchg

The following are left out of scope for this effort:

* 16 and 8 bit operations
* Explicit memory barriers

Encoding


I originally planned to add new values for bpf_insn.opcode. This was
rather unpleasant: the opcode space has holes in it but no entire
instruction classes[2]. Yonghong Song had a better idea: use the
immediate field of the existing STX XADD instruction to encode the
operation. This works nicely, without breaking existing programs,
because the immediate field is currently reserved-must-be-zero, and
extra-nicely because BPF_ADD happens to be zero.

Note that this of course makes immediate-source atomic operations
impossible. It's hard to imagine a measurable speedup from such
instructions, and if it existed it would certainly not benefit x86,
which has no support for them.

The BPF_OP opcode fields are re-used in the immediate, and an
additional flag BPF_FETCH is used to mark instructions that should
fetch a pre-modification value from memory.

So, BPF_XADD is now called BPF_ATOMIC (the old name is kept to avoid
breaking userspace builds), and where we previously had .imm = 0, we
now have .imm = BPF_ADD (which is 0).

Operands


Reg-source eBPF instructions only have two operands, while these
atomic operations have up to four. To avoid needing to encode
additional operands, then:

- One of the input registers is re-used as an output register
  (e.g. atomic_fetch_add both reads from and writes to the source
  register).

- Where necessary (i.e. for cmpxchg) , R0 is "hard-coded" as one of
  the operands.

This approach also allows the new eBPF instructions to map directly
to single x86 instructions.

[1] Previous patchset:
https://lore.kernel.org/bpf/20201123173202.1335708-1-jackm...@google.com/

[2] Visualisation of eBPF opcode space:
https://gist.github.com/bjackman/00fdad2d5dfff601c1918bc29b16e778


Brendan Jackman (14):
  bpf: x86: Factor out emission of ModR/M for *(reg + off)
  bpf: x86: Factor out emission of REX byte
  bpf: x86: Factor out function to emit NEG
  bpf: x86: Factor out a lookup table for some ALU opcodes
  bpf: Rename BPF_XADD and prepare to encode other atomics in .imm
  bpf: Move BPF_STX reserved field check into BPF_STX verifier code
  bpf: Add BPF_FETCH field / create atomic_fetch_add instruction
  bpf: Add instructions for atomic_[cmp]xchg
  bpf: Pull out a macro for interpreting atomic ALU operations
  bpf: Add bitwise atomic instructions
  tools build: Implement feature check for BPF atomics in Clang
  bpf: Pull tools/build/feature biz into selftests Makefile
  bpf: Add tests for new BPF atomic operations
  bpf: Document new atomic instructions

 Documentation/networking/filter.rst   |  56 +++-
 arch/arm/net/bpf_jit_32.c |   7 +-
 arch/arm64/net/bpf_jit_comp.c |  16 +-
 arch/mips/net/ebpf_jit.c  |  11 +-
 arch/powerpc/net/bpf_jit_comp64.c |  25 +-
 arch/riscv/net/bpf_

[PATCH bpf-next v3 02/14] bpf: x86: Factor out emission of REX byte

2020-12-03 Thread Brendan Jackman

The JIT case for encoding atomic ops is about to get more
complicated. In order to make the review & resulting code easier,
let's factor out some shared helpers.

Signed-off-by: Brendan Jackman 
Change-Id: I66dbd5ad0bf6f820901fb73d6b2c6a63e00483b1
---
 arch/x86/net/bpf_jit_comp.c | 39 ++---
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index cc818ed7c2b9..7106cfd10ba6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -702,6 +702,21 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 
val_reg, int off)
*pprog = prog;
 }
 
+/*
+ * Emit a REX byte if it will be necessary to address these registers
+ */
+static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is64)
+   EMIT1(add_2mod(0x48, dst_reg, src_reg));
+   else if (is_ereg(dst_reg) || is_ereg(src_reg))
+   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -854,10 +869,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_OR: b2 = 0x09; break;
case BPF_XOR: b2 = 0x31; break;
}
-   if (BPF_CLASS(insn->code) == BPF_ALU64)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_ALU64);
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1301,20 +1314,16 @@ xadd:   emit_modrm_dstoff(&prog, 
dst_reg, src_reg, insn->off);
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* cmp dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
/* test dst_reg, src_reg */
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, src_reg));
-   else if (is_ereg(dst_reg) || is_ereg(src_reg))
-   EMIT1(add_2mod(0x40, dst_reg, src_reg));
+   maybe_emit_mod(&prog, dst_reg, src_reg,
+  BPF_CLASS(insn->code) == BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
goto emit_cond_jmp;
 
@@ -1350,10 +1359,8 @@ xadd:emit_modrm_dstoff(&prog, 
dst_reg, src_reg, insn->off);
case BPF_JMP32 | BPF_JSLE | BPF_K:
/* test dst_reg, dst_reg to save one extra byte */
if (imm32 == 0) {
-   if (BPF_CLASS(insn->code) == BPF_JMP)
-   EMIT1(add_2mod(0x48, dst_reg, dst_reg));
-   else if (is_ereg(dst_reg))
-   EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+   maybe_emit_mod(&prog, dst_reg, dst_reg,
+  BPF_CLASS(insn->code) == 
BPF_JMP);
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
goto emit_cond_jmp;
}
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 01/14] bpf: x86: Factor out emission of ModR/M for *(reg + off)

2020-12-03 Thread Brendan Jackman

The case for JITing atomics is about to get more complicated. Let's
factor out some common code to make the review and result more
readable.

NB the atomics code doesn't yet use the new helper - a subsequent
patch will add its use as a side-effect of other changes.

Signed-off-by: Brendan Jackman 
Change-Id: I1510c7eb0132ff9262fea92ce1839243b6d33372
---
 arch/x86/net/bpf_jit_comp.c | 42 +
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 796506dcfc42..cc818ed7c2b9 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -681,6 +681,27 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 
dst_reg, u32 src_reg)
*pprog = prog;
 }
 
+/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
+static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   if (is_imm8(off)) {
+   /* 1-byte signed displacement.
+*
+* If off == 0 we could skip this and save one extra byte, but
+* special case of x86 R13 which always needs an offset is not
+* worth the hassle
+*/
+   EMIT2(add_2reg(0x40, ptr_reg, val_reg), off);
+   } else {
+   /* 4-byte signed displacement */
+   EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off);
+   }
+   *pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -708,15 +729,7 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
break;
}
-   /*
-* If insn->off == 0 we can save one extra byte, but
-* special case of x86 R13 which always needs an offset
-* is not worth the hassle
-*/
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+   emit_insn_suffix(&prog, src_reg, dst_reg, off);
*pprog = prog;
 }
 
@@ -751,10 +764,7 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
break;
}
-   if (is_imm8(off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+   emit_insn_suffix(&prog, dst_reg, src_reg, off);
*pprog = prog;
 }
 
@@ -1240,11 +1250,7 @@ st:  if (is_imm8(insn->off))
goto xadd;
case BPF_STX | BPF_XADD | BPF_DW:
EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
-xadd:  if (is_imm8(insn->off))
-   EMIT2(add_2reg(0x40, dst_reg, src_reg), 
insn->off);
-   else
-   EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-   insn->off);
+xadd:  emit_modrm_dstoff(&prog, dst_reg, src_reg, insn->off);
break;
 
/* call */
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 07/14] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-03 Thread Brendan Jackman

This value can be set in bpf_insn.imm, for BPF_ATOMIC instructions,
in order to have the previous value of the atomically-modified memory
location loaded into the src register after an atomic op is carried
out.

Suggested-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
Change-Id: I649ad48edb565a32ccdf72924ffe96a8c8da57ad
---
 arch/x86/net/bpf_jit_comp.c|  4 
 include/linux/filter.h |  9 +
 include/uapi/linux/bpf.h   |  3 +++
 kernel/bpf/core.c  | 13 +
 kernel/bpf/disasm.c|  7 +++
 kernel/bpf/verifier.c  | 35 --
 tools/include/linux/filter.h   | 10 ++
 tools/include/uapi/linux/bpf.h |  3 +++
 8 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5e5a132b3d52..88cb09fa3bfb 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -827,6 +827,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
+   case BPF_ADD | BPF_FETCH:
+   /* src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */
+   EMIT2(0x0F, 0xC1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ce19988fb312..4e04d0fc454f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -270,6 +270,15 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
.imm   = BPF_ADD })
 #define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */
 
+/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), 
src_reg); */
+
+#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF)  \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_ADD | BPF_FETCH })
 
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d0adc48db43c..025e377e7229 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -44,6 +44,9 @@
 #define BPF_CALL   0x80/* function call */
 #define BPF_EXIT   0x90/* function return */
 
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH  0x01/* fetch previous value into src reg */
+
 /* Register numbers */
 enum {
BPF_REG_0 = 0,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3abc6b250b18..61e93eb7d363 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1624,16 +1624,29 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
/* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
atomic_add((u32) SRC, (atomic_t *)(unsigned long)
   (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u32) atomic_fetch_add(
+   (u32) SRC,
+   (atomic_t *)(unsigned long) (DST + insn->off));
+   break;
default:
goto default_label;
}
CONT;
+
STX_ATOMIC_DW:
switch (IMM) {
case BPF_ADD:
/* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
 (DST + insn->off));
+   break;
+   case BPF_ADD | BPF_FETCH:
+   SRC = (u64) atomic64_fetch_add(
+   (u64) SRC,
+   (atomic64_t *)(s64) (DST + insn->off));
+   break;
default:
goto default_label;
}
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 37c8d6e9b4cc..3ee2246a52ef 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
+   } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+  insn->imm == (BPF_ADD | BPF_FETCH)) {
+   verbose(cbs->private_data, &

[PATCH bpf-next v3 04/14] bpf: x86: Factor out a lookup table for some ALU opcodes

2020-12-03 Thread Brendan Jackman

A later commit will need to lookup a subset of these opcodes. To
avoid duplicating code, pull out a table.

The shift opcodes won't be needed by that later commit, but they're
already duplicated, so fold them into the table anyway.

Change-Id: Ia6888f9fa65da6225c33b530ea16911bf2f70750
Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 171ce539f6b9..ee7905051ee9 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -205,6 +205,18 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+/* Some 1-byte opcodes for binary ALU operations */
+static u8 simple_alu_opcodes[] = {
+   [BPF_ADD] = 0x01,
+   [BPF_SUB] = 0x29,
+   [BPF_AND] = 0x21,
+   [BPF_OR] = 0x09,
+   [BPF_XOR] = 0x31,
+   [BPF_LSH] = 0xE0,
+   [BPF_RSH] = 0xE8,
+   [BPF_ARSH] = 0xF8,
+};
+
 static void jit_fill_hole(void *area, unsigned int size)
 {
/* Fill whole space with INT3 instructions */
@@ -878,15 +890,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
case BPF_ALU64 | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
-   switch (BPF_OP(insn->code)) {
-   case BPF_ADD: b2 = 0x01; break;
-   case BPF_SUB: b2 = 0x29; break;
-   case BPF_AND: b2 = 0x21; break;
-   case BPF_OR: b2 = 0x09; break;
-   case BPF_XOR: b2 = 0x31; break;
-   }
maybe_emit_mod(&prog, dst_reg, src_reg,
   BPF_CLASS(insn->code) == BPF_ALU64);
+   b2 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
 
@@ -1063,12 +1069,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
-
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
if (imm32 == 1)
EMIT2(0xD1, add_1reg(b3, dst_reg));
else
@@ -1102,11 +1103,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
 
-   switch (BPF_OP(insn->code)) {
-   case BPF_LSH: b3 = 0xE0; break;
-   case BPF_RSH: b3 = 0xE8; break;
-   case BPF_ARSH: b3 = 0xF8; break;
-   }
+   b3 = simple_alu_opcodes[BPF_OP(insn->code)];
EMIT2(0xD3, add_1reg(b3, dst_reg));
 
if (src_reg != BPF_REG_4)
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 08/14] bpf: Add instructions for atomic_[cmp]xchg

2020-12-03 Thread Brendan Jackman

This adds two atomic opcodes, both of which include the BPF_FETCH
flag. XCHG without the BPF_FETCh flag would naturally encode
atomic_set. This is not supported because it would be of limited
value to userspace (it doesn't imply any barriers). CMPXCHG without
BPF_FETCH woulud be an atomic compare-and-write. We don't have such
an operation in the kernel so it isn't provided to BPF either.

There are two significant design decisions made for the CMPXCHG
instruction:

 - To solve the issue that this operation fundamentally has 3
   operands, but we only have two register fields. Therefore the
   operand we compare against (the kernel's API calls it 'old') is
   hard-coded to be R0. x86 has similar design (and A64 doesn't
   have this problem).

   A potential alternative might be to encode the other operand's
   register number in the immediate field.

 - The kernel's atomic_cmpxchg returns the old value, while the C11
   userspace APIs return a boolean indicating the comparison
   result. Which should BPF do? A64 returns the old value. x86 returns
   the old value in the hard-coded register (and also sets a
   flag). That means return-old-value is easier to JIT.

Signed-off-by: Brendan Jackman 
Change-Id: I3f19ad867dfd08515eecf72674e6fdefe28424bb
---
 arch/x86/net/bpf_jit_comp.c|  8 
 include/linux/filter.h | 20 
 include/uapi/linux/bpf.h   |  4 +++-
 kernel/bpf/core.c  | 20 
 kernel/bpf/disasm.c| 15 +++
 kernel/bpf/verifier.c  | 19 +--
 tools/include/linux/filter.h   | 20 
 tools/include/uapi/linux/bpf.h |  4 +++-
 8 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 88cb09fa3bfb..7d29bc3bb4ff 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -831,6 +831,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */
EMIT2(0x0F, 0xC1);
break;
+   case BPF_XCHG:
+   /* src_reg = atomic_xchg(*(u32/u64*)(dst_reg + off), src_reg); 
*/
+   EMIT1(0x87);
+   break;
+   case BPF_CMPXCHG:
+   /* r0 = atomic_cmpxchg(*(u32/u64*)(dst_reg + off), r0, 
src_reg); */
+   EMIT2(0x0F, 0xB1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4e04d0fc454f..6186280715ed 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -280,6 +280,26 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
.off   = OFF,   \
.imm   = BPF_ADD | BPF_FETCH })
 
+/* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */
+
+#define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)   \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_XCHG  })
+
+/* Atomic compare-exchange, r0 = atomic_cmpxchg((dst_reg + off), r0, src_reg) 
*/
+
+#define BPF_ATOMIC_CMPXCHG(SIZE, DST, SRC, OFF)\
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_CMPXCHG })
+
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 025e377e7229..53334530cc81 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
 #define BPF_EXIT   0x90/* function return */
 
 /* atomic op type fields (stored in immediate) */
-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
+#define BPF_CMPXCHG(0xf0 | BPF_FETCH)  /* atomic compare-and-write */
+#define BPF_FETCH  0x01/* not an opcode on its own, used to build 
others */
 
 /* Register numbers */
 enum {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 61e93eb7d363..28f960bc2e30 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1630,6 +1630,16 @@ static u64 ___bpf_prog_

[PATCH bpf-next v3 06/14] bpf: Move BPF_STX reserved field check into BPF_STX verifier code

2020-12-03 Thread Brendan Jackman

I can't find a reason why this code is in resolve_pseudo_ldimm64;
since I'll be modifying it in a subsequent commit, tidy it up.

Change-Id: I3410469270f4889a3af67612bd6c2e7979ab4da1
Signed-off-by: Brendan Jackman 
---
 kernel/bpf/verifier.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1947da617b03..e8b41ccdfb90 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9501,6 +9501,12 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
+   if (((BPF_MODE(insn->code) != BPF_MEM &&
+ BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm 
!= 0)) {
+   verbose(env, "BPF_STX uses reserved fields\n");
+   return -EINVAL;
+   }
+
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
if (err)
@@ -9910,13 +9916,6 @@ static int resolve_pseudo_ldimm64(struct 
bpf_verifier_env *env)
return -EINVAL;
}
 
-   if (BPF_CLASS(insn->code) == BPF_STX &&
-   ((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) {
-   verbose(env, "BPF_STX uses reserved fields\n");
-   return -EINVAL;
-   }
-
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 03/14] bpf: x86: Factor out function to emit NEG

2020-12-03 Thread Brendan Jackman

There's currently only one usage of this but implementation of
atomic_sub add another.

Change-Id: Ia56743ec26ff5e7bcde8ae94fa17fef92d418d2b
Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7106cfd10ba6..171ce539f6b9 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -783,6 +783,22 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, 
u32 src_reg, int off)
*pprog = prog;
 }
 
+
+static void emit_neg(u8 **pprog, u32 reg, bool is64)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   /* Emit REX byte if necessary */
+   if (is64)
+   EMIT1(add_1mod(0x48, reg));
+   else if (is_ereg(reg))
+   EMIT1(add_1mod(0x40, reg));
+
+   EMIT2(0xF7, add_1reg(0xD8, reg)); /* x86 NEG */
+   *pprog = prog;
+}
+
 static bool ex_handler_bpf(const struct exception_table_entry *x,
   struct pt_regs *regs, int trapnr,
   unsigned long error_code, unsigned long fault_addr)
@@ -884,11 +900,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
/* neg dst */
case BPF_ALU | BPF_NEG:
case BPF_ALU64 | BPF_NEG:
-   if (BPF_CLASS(insn->code) == BPF_ALU64)
-   EMIT1(add_1mod(0x48, dst_reg));
-   else if (is_ereg(dst_reg))
-   EMIT1(add_1mod(0x40, dst_reg));
-   EMIT2(0xF7, add_1reg(0xD8, dst_reg));
+   emit_neg(&prog, dst_reg,
+BPF_CLASS(insn->code) == BPF_ALU64);
break;
 
case BPF_ALU | BPF_ADD | BPF_K:
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 09/14] bpf: Pull out a macro for interpreting atomic ALU operations

2020-12-03 Thread Brendan Jackman

Since the atomic operations that are added in subsequent commits are
all isomorphic with BPF_ADD, pull out a macro to avoid the
interpreter becoming dominated by lines of atomic-related code.

Note that this sacrificies interpreter performance (combining
STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we
need an extra conditional branch to differentiate them) in favour of
compact and (relatively!) simple C code.

Change-Id: I8cae5b66e75f34393de6063b91c05a8006fdd9e6
Signed-off-by: Brendan Jackman 
---
 kernel/bpf/core.c | 79 +++
 1 file changed, 38 insertions(+), 41 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 28f960bc2e30..498d3f067be7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1618,55 +1618,52 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
LDX_PROBE(DW, 8)
 #undef LDX_PROBE
 
-   STX_ATOMIC_W:
-   switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
-   atomic_add((u32) SRC, (atomic_t *)(unsigned long)
-  (DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u32) atomic_fetch_add(
-   (u32) SRC,
-   (atomic_t *)(unsigned long) (DST + insn->off));
-   break;
-   case BPF_XCHG:
-   SRC = (u32) atomic_xchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) SRC);
-   break;
-   case BPF_CMPXCHG:
-   BPF_R0 = (u32) atomic_cmpxchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) BPF_R0, (u32) SRC);
+#define ATOMIC(BOP, KOP)   \
+   case BOP:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   atomic_##KOP((u32) SRC, (atomic_t *)(unsigned 
long) \
+(DST + insn->off));\
+   else\
+   atomic64_##KOP((u64) SRC, (atomic64_t 
*)(unsigned long) \
+  (DST + insn->off));  \
+   break;  \
+   case BOP | BPF_FETCH:   \
+   if (BPF_SIZE(insn->code) == BPF_W)  \
+   SRC = (u32) atomic_fetch_##KOP( \
+   (u32) SRC,  \
+   (atomic_t *)(unsigned long) (DST + 
insn->off)); \
+   else\
+   SRC = (u64) atomic64_fetch_##KOP(   \
+   (u64) SRC,  \
+   (atomic64_t *)(s64) (DST + insn->off)); 
\
break;
-   default:
-   goto default_label;
-   }
-   CONT;
 
STX_ATOMIC_DW:
+   STX_ATOMIC_W:
switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
-   atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
-(DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u64) atomic64_fetch_add(
-   (u64) SRC,
-   (atomic64_t *)(s64) (DST + insn->off));
-   break;
+   ATOMIC(BPF_ADD, add)
+
case BPF_XCHG:
-   SRC = (u64) atomic64_xchg(
-   (atomic64_t *)(u64) (DST + insn->off),
-   (u64) SRC);
+   if (BPF_SIZE(insn->code) == BPF_W)
+   SRC = (u32) atomic_xchg(
+   (atomic_t *)(unsigned long) (DST + 
insn->off),
+   (u32) SRC);
+   else
+   SRC = (u64) atomic64_xchg(
+   (atomic64_t *)(u64) (DST + insn->off),
+   (u64) SRC);
break;
case BPF_CMPXCHG:
-   BPF_R0 = (u64) atomic64_cmpxchg(
-

[PATCH bpf-next v3 13/14] bpf: Add tests for new BPF atomic operations

2020-12-03 Thread Brendan Jackman

This relies on the work done by Yonghong Song in
https://reviews.llvm.org/D72184

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

 - Avoid breaking the build for people on old versions of Clang
 - Avoid needing separate lists of test objects for no_alu32, where
   atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Change-Id: Iecc12f35f0ded4a1dd805cce1be576e7b27917ef
Signed-off-by: Brendan Jackman 
---
 tools/testing/selftests/bpf/Makefile  |   4 +
 .../selftests/bpf/prog_tests/atomics_test.c   | 262 ++
 .../selftests/bpf/progs/atomics_test.c| 154 ++
 .../selftests/bpf/verifier/atomic_and.c   |  77 +
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 +++
 .../selftests/bpf/verifier/atomic_or.c|  77 +
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 +++
 .../selftests/bpf/verifier/atomic_xor.c   |  77 +
 9 files changed, 899 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics_test.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomics_test.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index f21c4841a612..448a9eb1a56c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -431,11 +431,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
\
   $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(feature-clang-bpf-atomics),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
 TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
 TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c 
b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
new file mode 100644
index ..66f0ccf4f4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+
+#include "atomics_test.skel.h"
+
+static struct atomics_test *setup(void)
+{
+   struct atomics_test *atomics_skel;
+   __u32 duration = 0, err;
+
+   atomics_skel = atomics_test__open_and_load();
+   if (CHECK(!atomics_skel, "atomics_skel_load", "atomics skeleton 
failed\n"))
+   return NULL;
+
+   if (atomics_skel->data->skip_tests) {
+   printf("%s:SKIP:no ENABLE_ATOMICS_TEST (missing Clang BPF 
atomics support)",
+  __func__);
+   test__skip();
+   goto err;
+   }
+
+   err = atomics_test__attach(atomics_skel);
+   if (CHECK(err, "atomics_attach", "atomics attach failed: %d\n", err))
+   goto err;
+
+   return atomics_skel;
+
+err:
+   atomics_test__destroy(atomics_skel);
+   return NULL;
+}
+
+static void test_add(void)
+{
+   struct atomics_test *atomics_skel;
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+
+   atomics_skel = setup();
+   if (!atomics_skel)
+   return;
+
+   prog_fd = bpf_program__fd(atomics_skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NULL, &retval, &duration);
+   if (CHECK(err || retval, "test_run add",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration))
+   goto cleanup;
+
+   ASSERT_EQ(atomics_skel->data->add64_value, 3, "add64_value

[PATCH bpf-next v3 05/14] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm

2020-12-03 Thread Brendan Jackman

A subsequent patch will add additional atomic operations. These new
operations will use the same opcode field as the existing XADD, with
the immediate discriminating different operations.

In preparation, rename the instruction mode BPF_ATOMIC and start
calling the zero immediate BPF_ADD.

This is possible (doesn't break existing valid BPF progs) because the
immediate field is currently reserved MBZ and BPF_ADD is zero.

All uses are removed from the tree but the BPF_XADD definition is
kept around to avoid breaking builds for people including kernel
headers.

Signed-off-by: Brendan Jackman 
Change-Id: Ib78f54acba37f7196cbf6c35ffa1c40805cb0d87
---
 Documentation/networking/filter.rst   | 30 +++-
 arch/arm/net/bpf_jit_32.c |  7 ++-
 arch/arm64/net/bpf_jit_comp.c | 16 +--
 arch/mips/net/ebpf_jit.c  | 11 +++--
 arch/powerpc/net/bpf_jit_comp64.c | 25 --
 arch/riscv/net/bpf_jit_comp32.c   | 20 ++--
 arch/riscv/net/bpf_jit_comp64.c   | 16 +--
 arch/s390/net/bpf_jit_comp.c  | 27 ++-
 arch/sparc/net/bpf_jit_comp_64.c  | 17 +--
 arch/x86/net/bpf_jit_comp.c   | 46 ++-
 arch/x86/net/bpf_jit_comp32.c |  6 +--
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 14 --
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 +-
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 15 --
 include/linux/filter.h|  8 ++--
 include/uapi/linux/bpf.h  |  3 +-
 kernel/bpf/core.c | 31 +
 kernel/bpf/disasm.c   |  6 ++-
 kernel/bpf/verifier.c | 24 ++
 lib/test_bpf.c|  2 +-
 samples/bpf/bpf_insn.h|  4 +-
 samples/bpf/sock_example.c|  2 +-
 samples/bpf/test_cgrp2_attach.c   |  4 +-
 tools/include/linux/filter.h  |  7 +--
 tools/include/uapi/linux/bpf.h|  3 +-
 .../bpf/prog_tests/cgroup_attach_multi.c  |  4 +-
 tools/testing/selftests/bpf/verifier/ctx.c|  7 ++-
 .../testing/selftests/bpf/verifier/leak_ptr.c |  4 +-
 tools/testing/selftests/bpf/verifier/unpriv.c |  3 +-
 tools/testing/selftests/bpf/verifier/xadd.c   |  2 +-
 30 files changed, 248 insertions(+), 120 deletions(-)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index debb59e374de..1583d59d806d 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1006,13 +1006,13 @@ Size modifier is one of ...
 
 Mode modifier is one of::
 
-  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
-  BPF_ABS  0x20
-  BPF_IND  0x40
-  BPF_MEM  0x60
-  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
-  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
-  BPF_XADD 0xc0  /* eBPF only, exclusive add */
+  BPF_IMM 0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+  BPF_ABS 0x20
+  BPF_IND 0x40
+  BPF_MEM 0x60
+  BPF_LEN 0x80  /* classic BPF only, reserved in eBPF */
+  BPF_MSH 0xa0  /* classic BPF only, reserved in eBPF */
+  BPF_ATOMIC  0xc0  /* eBPF only, atomic operations */
 
 eBPF has two non-generic instructions: (BPF_ABS |  | BPF_LD) and
 (BPF_IND |  | BPF_LD) which are used to access packet data.
@@ -1044,11 +1044,19 @@ Unlike classic BPF instruction set, eBPF has generic 
load/store operations::
 BPF_MEM |  | BPF_STX:  *(size *) (dst_reg + off) = src_reg
 BPF_MEM |  | BPF_ST:   *(size *) (dst_reg + off) = imm32
 BPF_MEM |  | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
-BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
-BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
 
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
-2 byte atomic increments are not supported.
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
+
+It also includes atomic operations, which use the immediate field for extra
+encoding.
+
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
+   .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
+
+Note that 1 and 2 byte atomic operations are not supported.
+
+You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
+the exclusive-add operation encoded when the immediate field is zero.
 
 eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
 of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 0207b6ea6e8a..897634d0a67c 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit

[PATCH bpf-next v3 10/14] bpf: Add bitwise atomic instructions

2020-12-03 Thread Brendan Jackman

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Change-Id: I340b10cecebea8cb8a52e3606010cde547a10ed4
Signed-off-by: Brendan Jackman 
---
 arch/x86/net/bpf_jit_comp.c  | 50 +-
 include/linux/filter.h   | 60 
 kernel/bpf/core.c|  5 ++-
 kernel/bpf/disasm.c  | 21 ++---
 kernel/bpf/verifier.c|  6 
 tools/include/linux/filter.h | 60 
 6 files changed, 196 insertions(+), 6 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7d29bc3bb4ff..4ab0f821326c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -824,6 +824,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* emit opcode */
switch (atomic_op) {
case BPF_ADD:
+   case BPF_SUB:
+   case BPF_AND:
+   case BPF_OR:
+   case BPF_XOR:
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
@@ -1306,8 +1310,52 @@ st:  if (is_imm8(insn->off))
 
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+   if (insn->imm == (BPF_AND | BPF_FETCH) ||
+   insn->imm == (BPF_OR | BPF_FETCH) ||
+   insn->imm == (BPF_XOR | BPF_FETCH)) {
+   u8 *branch_target;
+   bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+
+   /*
+* Can't be implemented with a single x86 insn.
+* Need to do a CMPXCHG loop.
+*/
+
+   /* Will need RAX as a CMPXCHG operand so save 
R0 */
+   emit_mov_reg(&prog, true, BPF_REG_AX, 
BPF_REG_0);
+   branch_target = prog;
+   /* Load old value */
+   emit_ldx(&prog, BPF_SIZE(insn->code),
+BPF_REG_0, dst_reg, insn->off);
+   /*
+* Perform the (commutative) operation locally,
+* put the result in the AUX_REG.
+*/
+   emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
+   maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+   EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
+ add_2reg(0xC0, AUX_REG, src_reg));
+   /* Attempt to swap in new value */
+   err = emit_atomic(&prog, BPF_CMPXCHG,
+ dst_reg, AUX_REG, insn->off,
+ BPF_SIZE(insn->code));
+   if (WARN_ON(err))
+   return err;
+   /*
+* ZF tells us whether we won the race. If it's
+* cleared we need to try again.
+*/
+   EMIT2(X86_JNE, -(prog - branch_target) - 2);
+   /* Return the pre-modification value */
+   emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+   /* Restore R0 after clobbering RAX */
+   emit_mov_reg(&prog, true, BPF_REG_0, 
BPF_REG_AX);
+   break;
+
+   }
+
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ insn->off, 
BPF_SIZE(insn->code));
if (err)
return err;
break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6186280715ed..698f82897b0d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -280,6 +280,66 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
.off   = OFF,

[PATCH bpf-next v3 14/14] bpf: Document new atomic instructions

2020-12-03 Thread Brendan Jackman

Change-Id: Ic70fe9e3cb4403df4eb3be2ea5ae5af53156559e
Signed-off-by: Brendan Jackman 
---
 Documentation/networking/filter.rst | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..26d508a5e038 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,6 +1053,32 @@ encoding.
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
 
+The basic atomic operations supported (from architecture v4 onwards) are:
+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.
+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
 Note that 1 and 2 byte atomic operations are not supported.
 
 You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 11/14] tools build: Implement feature check for BPF atomics in Clang

2020-12-03 Thread Brendan Jackman

Change-Id: Ia15bb76f7152fff2974e38242d7430ce2987a71e

Cc: Arnaldo Carvalho de Melo 
Cc: Jiri Olsa 
Cc: Quentin Monnet 
Cc: "Frank Ch. Eigler" 
Cc: Stephane Eranian 
Cc: Namhyung Kim 
Cc: Thomas Hebb 
Change-Id: Ie2c3832eaf050d627764071d1927c7546e7c4b4b
Signed-off-by: Brendan Jackman 
---
 tools/build/feature/Makefile | 4 
 tools/build/feature/test-clang-bpf-atomics.c | 9 +
 2 files changed, 13 insertions(+)
 create mode 100644 tools/build/feature/test-clang-bpf-atomics.c

diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cdde783f3018..81370d7fa193 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -70,6 +70,7 @@ FILES=  \
  test-libaio.bin   \
  test-libzstd.bin  \
  test-clang-bpf-co-re.bin  \
+ test-clang-bpf-atomics.bin\
  test-file-handle.bin  \
  test-libpfm4.bin
 
@@ -331,6 +332,9 @@ $(OUTPUT)test-clang-bpf-co-re.bin:
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) |   \
grep BTF_KIND_VAR
 
+$(OUTPUT)test-clang-bpf-atomics.bin:
+   $(CLANG) -S -g -target bpf -mcpu=v3 
-Werror=implicit-function-declaration -o - $(patsubst %.bin,%.c,$(@F)) 2>&1
+
 $(OUTPUT)test-file-handle.bin:
$(BUILD)
 
diff --git a/tools/build/feature/test-clang-bpf-atomics.c 
b/tools/build/feature/test-clang-bpf-atomics.c
new file mode 100644
index ..8b5fcdd4ba6f
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-atomics.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Google
+
+int x = 0;
+
+int foo(void)
+{
+   return __sync_val_compare_and_swap(&x, 1, 2);
+}
-- 
2.29.2.454.gaff20da3a2-goog

[PATCH bpf-next v3 12/14] bpf: Pull tools/build/feature biz into selftests Makefile

2020-12-03 Thread Brendan Jackman

This is somewhat cargo-culted from the libbpf build. It will be used
in a subsequent patch to query for Clang BPF atomics support.

Change-Id: I9318a1702170eb752acced35acbb33f45126c44c
Signed-off-by: Brendan Jackman 
---
 tools/testing/selftests/bpf/.gitignore |  1 +
 tools/testing/selftests/bpf/Makefile   | 38 ++
 2 files changed, 39 insertions(+)

diff --git a/tools/testing/selftests/bpf/.gitignore 
b/tools/testing/selftests/bpf/.gitignore
index 395ae040ce1f..3c604dff1e20 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -35,3 +35,4 @@ test_cpp
 /tools
 /runqslower
 /bench
+/FEATURE-DUMP.selftests.bpf
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 894192c319fb..f21c4841a612 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -104,8 +104,46 @@ OVERRIDE_TARGETS := 1
 override define CLEAN
$(call msg,CLEAN)
$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) 
$(TEST_GEN_FILES) $(EXTRA_CLEAN)
+   $(Q)$(RM) $(OUTPUT)/FEATURE-DUMP.selftests.bpf
 endef
 
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifeq ($(srctree),)
+update_srctree := 1
+endif
+ifdef building_out_of_srctree
+update_srctree := 1
+endif
+ifeq ($(update_srctree),1)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+FEATURE_USER = .selftests.bpf
+FEATURE_TESTS = clang-bpf-atomics
+FEATURE_DISPLAY = clang-bpf-atomics
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+  check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
 include ../lib.mk
 
 SCRATCH_DIR := $(OUTPUT)/tools
-- 
2.29.2.454.gaff20da3a2-goog

Re: [PATCH bpf-next v3 00/14] Atomics for eBPF

2020-12-03 Thread Brendan Jackman

On Thu, Dec 03, 2020 at 04:02:31PM +, Brendan Jackman wrote:
[...]
> [1] Previous patchset:
> https://lore.kernel.org/bpf/20201123173202.1335708-1-jackm...@google.com/

Sorry, bogus link. That's v1, here's v2:
https://lore.kernel.org/bpf/20201127175738.1085417-1-jackm...@google.com/

1 2 3 >

1 - 100 of 287 matches

Mail list logo