On Wed, Apr 26, 2023 at 12:39:00AM +0200, Alexandr Nedvedicky wrote:
> Hello,
> 
> This is the second diff. It introduces a transaction (pf_trans).
> It's more or less diff with dead code.
> 
> It's still worth to note those two chunks in this diff:
> 
> @@ -1142,10 +1172,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> flags, struct proc *p)
>                       return (EACCES);
>               }
>  
> -     if (flags & FWRITE)
> -             rw_enter_write(&pfioctl_rw);
> -     else
> -             rw_enter_read(&pfioctl_rw);
> +     rw_enter_write(&pfioctl_rw);
>  
>       switch (cmd) {
>  
> @@ -3022,10 +3049,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> flags, struct proc *p)
>               break;
>       }
>  fail:
> -     if (flags & FWRITE)
> -             rw_exit_write(&pfioctl_rw);
> -     else
> -             rw_exit_read(&pfioctl_rw);
> +     rw_exit_write(&pfioctl_rw);

i dont think having the open mode flags affect whether you take a shared
or exclusive lock makes sense anyway, so im happy with these bits.

> `pfioctl_rw` serializes processes which perform ioctl(2) to pf(4).
> I'd like to also this lock to serialize access to table of transactions.
> To keep things simple for now I'd like to make every process to perform
> ioctl(2) on pf(4) exclusively. I plan to revisit this later when I'll
> be pushing operation on pfioctl_rw further down to individual ioctl
> operations.
> 
> the operations pf_open_trans(), pf_find_trans(), pf_rollback_trans()
> introduced in this diff are unused. They will be brought alive in
> the 3rd diff.

i think there's two problems with this diff.

the first is related to this chunk in close:

+       if (minor(dev) >= 1)
+               return (ENXIO);

pf is now a clonable device, so each open of /dev/pf can end up
with a different minor number. the "minor allocator" likely prefers
to reuse minor 0, but that's not guaranteed and we shouldnt rely
on it. if you're wanting to check that the non-clone bits are 0, then
you need to copy the check from bpf, which is like this:

        int unit = minor(dev);

        if (unit & ((1 << CLONE_SHIFT) - 1))
                return (ENXIO);

this has some ties into the second issue, which is that we shouldn't
use the pid as an identifier for state across syscalls like this
in the kernel. the reason is that userland could be weird or buggy
(or hostile) and fork in between creating a transaction and ending
a transaction, but it will still have the fd it from from open(/dev/pf).
instead, we should be using the whole dev_t or the minor number,
as long as it includes the bits that the cloning infrastructure
uses, as the identifier.

i would also check that the process^Wminor number that created a
transaction is the same when looking up a transaction in pf_find_trans.

transactions like this is a great step forward though.

cheers,
dlg

> 
> thanks and
> regards
> sashan
> 
> --------8<---------------8<---------------8<------------------8<--------
> diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
> index 7ea22050506..7e4c7d2a2ab 100644
> --- a/sys/net/pf_ioctl.c
> +++ b/sys/net/pf_ioctl.c
> @@ -117,6 +117,11 @@ void                      pf_qid_unref(u_int16_t);
>  int                   pf_states_clr(struct pfioc_state_kill *);
>  int                   pf_states_get(struct pfioc_states *);
>  
> +struct pf_trans              *pf_open_trans(pid_t);
> +struct pf_trans              *pf_find_trans(uint64_t);
> +void                  pf_free_trans(struct pf_trans *);
> +void                  pf_rollback_trans(struct pf_trans *);
> +
>  struct pf_rule                pf_default_rule, pf_default_rule_new;
>  
>  struct {
> @@ -168,6 +173,8 @@ int                        pf_rtlabel_add(struct 
> pf_addr_wrap *);
>  void                  pf_rtlabel_remove(struct pf_addr_wrap *);
>  void                  pf_rtlabel_copyout(struct pf_addr_wrap *);
>  
> +uint64_t trans_ticket = 1;
> +LIST_HEAD(, pf_trans)        pf_ioctl_trans = 
> LIST_HEAD_INITIALIZER(pf_trans);
>  
>  void
>  pfattach(int num)
> @@ -293,6 +300,29 @@ pfopen(dev_t dev, int flags, int fmt, struct proc *p)
>  int
>  pfclose(dev_t dev, int flags, int fmt, struct proc *p)
>  {
> +     struct pf_trans *w, *s;
> +     LIST_HEAD(, pf_trans)   tmp_list;
> +
> +     if (minor(dev) >= 1)
> +             return (ENXIO);
> +
> +     if (flags & FWRITE) {
> +             LIST_INIT(&tmp_list);
> +             rw_enter_write(&pfioctl_rw);
> +             LIST_FOREACH_SAFE(w, &pf_ioctl_trans, pft_entry, s) {
> +                     if (w->pft_pid == p->p_p->ps_pid) {
> +                             LIST_REMOVE(w, pft_entry);
> +                             LIST_INSERT_HEAD(&tmp_list, w, pft_entry);
> +                     }
> +             }
> +             rw_exit_write(&pfioctl_rw);
> +
> +             while ((w = LIST_FIRST(&tmp_list)) != NULL) {
> +                     LIST_REMOVE(w, pft_entry);
> +                     pf_free_trans(w);
> +             }
> +     }
> +
>       return (0);
>  }
>  
> @@ -1142,10 +1172,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> flags, struct proc *p)
>                       return (EACCES);
>               }
>  
> -     if (flags & FWRITE)
> -             rw_enter_write(&pfioctl_rw);
> -     else
> -             rw_enter_read(&pfioctl_rw);
> +     rw_enter_write(&pfioctl_rw);
>  
>       switch (cmd) {
>  
> @@ -3022,10 +3049,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> flags, struct proc *p)
>               break;
>       }
>  fail:
> -     if (flags & FWRITE)
> -             rw_exit_write(&pfioctl_rw);
> -     else
> -             rw_exit_read(&pfioctl_rw);
> +     rw_exit_write(&pfioctl_rw);
>  
>       return (error);
>  }
> @@ -3244,3 +3268,52 @@ pf_sysctl(void *oldp, size_t *oldlenp, void *newp, 
> size_t newlen)
>  
>       return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs));
>  }
> +
> +struct pf_trans *
> +pf_open_trans(pid_t pid)
> +{
> +     static uint64_t ticket = 1;
> +     struct pf_trans *t;
> +
> +     rw_assert_wrlock(&pfioctl_rw);
> +
> +     t = malloc(sizeof(*t), M_TEMP, M_WAITOK);
> +     memset(t, 0, sizeof(struct pf_trans));
> +     t->pft_pid = pid;
> +     t->pft_ticket = ticket++;
> +
> +     LIST_INSERT_HEAD(&pf_ioctl_trans, t, pft_entry);
> +
> +     return (t);
> +}
> +
> +struct pf_trans *
> +pf_find_trans(uint64_t ticket)
> +{
> +     struct pf_trans *t;
> +
> +     rw_assert_anylock(&pfioctl_rw);
> +
> +     LIST_FOREACH(t, &pf_ioctl_trans, pft_entry) {
> +             if (t->pft_ticket == ticket)
> +                     break;
> +     }
> +
> +     return (t);
> +}
> +
> +void
> +pf_free_trans(struct pf_trans *t)
> +{
> +     free(t, M_TEMP, sizeof(*t));
> +}
> +
> +void
> +pf_rollback_trans(struct pf_trans *t)
> +{
> +     if (t != NULL) {
> +             rw_assert_wrlock(&pfioctl_rw);
> +             LIST_REMOVE(t, pft_entry);
> +             pf_free_trans(t);
> +     }
> +}
> diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
> index 38fff6473aa..c180b22df5c 100644
> --- a/sys/net/pfvar_priv.h
> +++ b/sys/net/pfvar_priv.h
> @@ -322,6 +322,17 @@ enum {
>  
>  extern struct cpumem *pf_anchor_stack;
>  
> +enum pf_trans_type {
> +     PF_TRANS_NONE,
> +     PF_TRANS_MAX
> +};
> +
> +struct pf_trans {
> +     LIST_ENTRY(pf_trans)    pft_entry;
> +     pid_t                   pft_pid;                /* process id */
> +     uint64_t                pft_ticket;
> +     enum pf_trans_type      pft_type;
> +};
>  extern struct task   pf_purge_task;
>  extern struct timeout        pf_purge_to;
>  
> 

Reply via email to