On Wed, Apr 26, 2023 at 12:39:00AM +0200, Alexandr Nedvedicky wrote: > Hello, > > This is the second diff. It introduces a transaction (pf_trans). > It's more or less diff with dead code. > > It's still worth to note those two chunks in this diff: > > @@ -1142,10 +1172,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int > flags, struct proc *p) > return (EACCES); > } > > - if (flags & FWRITE) > - rw_enter_write(&pfioctl_rw); > - else > - rw_enter_read(&pfioctl_rw); > + rw_enter_write(&pfioctl_rw); > > switch (cmd) { > > @@ -3022,10 +3049,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int > flags, struct proc *p) > break; > } > fail: > - if (flags & FWRITE) > - rw_exit_write(&pfioctl_rw); > - else > - rw_exit_read(&pfioctl_rw); > + rw_exit_write(&pfioctl_rw);
i dont think having the open mode flags affect whether you take a shared or exclusive lock makes sense anyway, so im happy with these bits. > `pfioctl_rw` serializes processes which perform ioctl(2) to pf(4). > I'd like to also this lock to serialize access to table of transactions. > To keep things simple for now I'd like to make every process to perform > ioctl(2) on pf(4) exclusively. I plan to revisit this later when I'll > be pushing operation on pfioctl_rw further down to individual ioctl > operations. > > the operations pf_open_trans(), pf_find_trans(), pf_rollback_trans() > introduced in this diff are unused. They will be brought alive in > the 3rd diff. i think there's two problems with this diff. the first is related to this chunk in close: + if (minor(dev) >= 1) + return (ENXIO); pf is now a clonable device, so each open of /dev/pf can end up with a different minor number. the "minor allocator" likely prefers to reuse minor 0, but that's not guaranteed and we shouldnt rely on it. if you're wanting to check that the non-clone bits are 0, then you need to copy the check from bpf, which is like this: int unit = minor(dev); if (unit & ((1 << CLONE_SHIFT) - 1)) return (ENXIO); this has some ties into the second issue, which is that we shouldn't use the pid as an identifier for state across syscalls like this in the kernel. the reason is that userland could be weird or buggy (or hostile) and fork in between creating a transaction and ending a transaction, but it will still have the fd it from from open(/dev/pf). instead, we should be using the whole dev_t or the minor number, as long as it includes the bits that the cloning infrastructure uses, as the identifier. i would also check that the process^Wminor number that created a transaction is the same when looking up a transaction in pf_find_trans. transactions like this is a great step forward though. cheers, dlg > > thanks and > regards > sashan > > --------8<---------------8<---------------8<------------------8<-------- > diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c > index 7ea22050506..7e4c7d2a2ab 100644 > --- a/sys/net/pf_ioctl.c > +++ b/sys/net/pf_ioctl.c > @@ -117,6 +117,11 @@ void pf_qid_unref(u_int16_t); > int pf_states_clr(struct pfioc_state_kill *); > int pf_states_get(struct pfioc_states *); > > +struct pf_trans *pf_open_trans(pid_t); > +struct pf_trans *pf_find_trans(uint64_t); > +void pf_free_trans(struct pf_trans *); > +void pf_rollback_trans(struct pf_trans *); > + > struct pf_rule pf_default_rule, pf_default_rule_new; > > struct { > @@ -168,6 +173,8 @@ int pf_rtlabel_add(struct > pf_addr_wrap *); > void pf_rtlabel_remove(struct pf_addr_wrap *); > void pf_rtlabel_copyout(struct pf_addr_wrap *); > > +uint64_t trans_ticket = 1; > +LIST_HEAD(, pf_trans) pf_ioctl_trans = > LIST_HEAD_INITIALIZER(pf_trans); > > void > pfattach(int num) > @@ -293,6 +300,29 @@ pfopen(dev_t dev, int flags, int fmt, struct proc *p) > int > pfclose(dev_t dev, int flags, int fmt, struct proc *p) > { > + struct pf_trans *w, *s; > + LIST_HEAD(, pf_trans) tmp_list; > + > + if (minor(dev) >= 1) > + return (ENXIO); > + > + if (flags & FWRITE) { > + LIST_INIT(&tmp_list); > + rw_enter_write(&pfioctl_rw); > + LIST_FOREACH_SAFE(w, &pf_ioctl_trans, pft_entry, s) { > + if (w->pft_pid == p->p_p->ps_pid) { > + LIST_REMOVE(w, pft_entry); > + LIST_INSERT_HEAD(&tmp_list, w, pft_entry); > + } > + } > + rw_exit_write(&pfioctl_rw); > + > + while ((w = LIST_FIRST(&tmp_list)) != NULL) { > + LIST_REMOVE(w, pft_entry); > + pf_free_trans(w); > + } > + } > + > return (0); > } > > @@ -1142,10 +1172,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int > flags, struct proc *p) > return (EACCES); > } > > - if (flags & FWRITE) > - rw_enter_write(&pfioctl_rw); > - else > - rw_enter_read(&pfioctl_rw); > + rw_enter_write(&pfioctl_rw); > > switch (cmd) { > > @@ -3022,10 +3049,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int > flags, struct proc *p) > break; > } > fail: > - if (flags & FWRITE) > - rw_exit_write(&pfioctl_rw); > - else > - rw_exit_read(&pfioctl_rw); > + rw_exit_write(&pfioctl_rw); > > return (error); > } > @@ -3244,3 +3268,52 @@ pf_sysctl(void *oldp, size_t *oldlenp, void *newp, > size_t newlen) > > return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs)); > } > + > +struct pf_trans * > +pf_open_trans(pid_t pid) > +{ > + static uint64_t ticket = 1; > + struct pf_trans *t; > + > + rw_assert_wrlock(&pfioctl_rw); > + > + t = malloc(sizeof(*t), M_TEMP, M_WAITOK); > + memset(t, 0, sizeof(struct pf_trans)); > + t->pft_pid = pid; > + t->pft_ticket = ticket++; > + > + LIST_INSERT_HEAD(&pf_ioctl_trans, t, pft_entry); > + > + return (t); > +} > + > +struct pf_trans * > +pf_find_trans(uint64_t ticket) > +{ > + struct pf_trans *t; > + > + rw_assert_anylock(&pfioctl_rw); > + > + LIST_FOREACH(t, &pf_ioctl_trans, pft_entry) { > + if (t->pft_ticket == ticket) > + break; > + } > + > + return (t); > +} > + > +void > +pf_free_trans(struct pf_trans *t) > +{ > + free(t, M_TEMP, sizeof(*t)); > +} > + > +void > +pf_rollback_trans(struct pf_trans *t) > +{ > + if (t != NULL) { > + rw_assert_wrlock(&pfioctl_rw); > + LIST_REMOVE(t, pft_entry); > + pf_free_trans(t); > + } > +} > diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h > index 38fff6473aa..c180b22df5c 100644 > --- a/sys/net/pfvar_priv.h > +++ b/sys/net/pfvar_priv.h > @@ -322,6 +322,17 @@ enum { > > extern struct cpumem *pf_anchor_stack; > > +enum pf_trans_type { > + PF_TRANS_NONE, > + PF_TRANS_MAX > +}; > + > +struct pf_trans { > + LIST_ENTRY(pf_trans) pft_entry; > + pid_t pft_pid; /* process id */ > + uint64_t pft_ticket; > + enum pf_trans_type pft_type; > +}; > extern struct task pf_purge_task; > extern struct timeout pf_purge_to; > >