On Thu, Dec 27, 2012 at 10:06 AM, Luigi Rizzo <ri...@iet.unipi.it> wrote: > Before submitting a proper patch, I'd like to hear feedback on the > following proposed change to hw/e1000.c to properly implement > interrupt mitigation. > This is joint work with Vincenzo Maffione and Giuseppe Lettieri (in Cc), > and is a followup to a similar patch i posted in july > > https://lists.gnu.org/archive/html/qemu-devel/2012-07/msg03195.html > > > The patch models three of the five (sic!) e1000 register that control > moderation, and uses qemu timers for that (the minimum delay for > these timers affects the fidelity of the emulation). > > Right now there is a static variable that controls whether the > feature is enabled. I would probably like to make it a parameter > accessible from the command line in qemu, possibly extending it to > override the mitigation delay set by the device driver. > > Right now we reached transmit speeds in the order of 2-300Kpps > (if matched with a proper guest device driver optimization, see > https://groups.google.com/forum/?fromgroups=#!topic/mailing.freebsd.emulator/xQHR_hleCuc > ) > > Some performance data using a FreeBSD guest, for udp transmissions: > > KVM QEMU > standard KVM, standard driver 20 Kpps 6.3 Kpps > modified KVM, standard driver 37 Kpps 28 Kpps > modified KVM, modified driver 200 Kpps 34 Kpps > > As you can see, on kvm this change gives a 5x speedup to the tx path, > which combines nicely with the 2x speedup that comes from supporting > interrupt mitigation alone in the hypervisor. Without kvm (or kqemu ?) > the benefits are much lower, as the guest becomes too slow. > > cheers > luigi > > diff -urp qemu-1.3.0-orig/hw/e1000.c qemu-1.3.0/hw/e1000.c > --- qemu-1.3.0-orig/hw/e1000.c 2012-12-03 20:37:05.000000000 +0100 > +++ qemu-1.3.0/hw/e1000.c 2012-12-27 09:47:16.000000000 +0100 > @@ -35,6 +35,8 @@ > > #include "e1000_hw.h" > > +static int mit_on = 1; /* interrupt mitigation enable */ > + > #define E1000_DEBUG > > #ifdef E1000_DEBUG > @@ -129,6 +131,9 @@ typedef struct E1000State_st { > } eecd_state; > > QEMUTimer *autoneg_timer; > + QEMUTimer *mit_timer; // handle for the timer > + uint32_t mit_timer_on; // mitigation timer active > + uint32_t mit_cause; // pending interrupt cause > } E1000State; > > #define defreg(x) x = (E1000_##x>>2) > @@ -144,6 +149,7 @@ enum { > defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), > defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), > defreg(VET), > + defreg(RDTR), defreg(RADV), defreg(TADV), defreg(ITR), > }; > > static void > @@ -639,6 +645,68 @@ static uint64_t tx_desc_base(E1000State > return (bah << 32) + bal; > } > > +/* helper function, 0 means the value is not set */ > +static inline void > +mit_update_delay(uint32_t *curr, uint32_t value) > +{ > + if (value && (*curr == 0 || value < *curr))
Missing braces, please read CODING_STYLE and check the patches with scripts/checkpatch.pl. Also in several places below. > + *curr = value; > +} > + > +/* > + * If necessary, rearm the timer and post an interrupt. > + * Called at the end of tx/rx routines (mit_timer_on == 0), > + * and when the timer fires (mit_timer_on == 1). > + * We provide a partial implementation of interrupt mitigation, > + * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for > + * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV; > + * relative timers based on TIDV and RDTR are not implemented. > + */ > +static void > +mit_rearm_and_int(void *opaque) > +{ > + E1000State *s = opaque; > + uint32_t mit_delay = 0; > + > + /* > + * Clear the flag. It is only set when the callback fires, > + * and we need to clear it anyways. > + */ > + s->mit_timer_on = 0; > + if (s->mit_cause == 0) /* no events pending, we are done */ > + return; > + /* > + * Compute the next mitigation delay according to pending interrupts > + * and the current values of RADV (provided RDTR!=0), TADV and ITR. > + * Then rearm the timer. > + */ > + if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW)) > + mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); > + if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0)) > + mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); > + mit_update_delay(&mit_delay, s->mac_reg[ITR]); > + > + if (mit_delay) { > + s->mit_timer_on = 1; > + qemu_mod_timer(s->mit_timer, > + qemu_get_clock_ns(vm_clock) + mit_delay * 256); > + } > + set_ics(s, 0, s->mit_cause); > + s->mit_cause = 0; > +} > + > +static void > +mit_set_ics(E1000State *s, uint32_t cause) > +{ > + if (mit_on == 0) { > + set_ics(s, 0, cause); > + return; > + } > + s->mit_cause |= cause; > + if (!s->mit_timer_on) > + mit_rearm_and_int(s); > +} > + > static void > start_xmit(E1000State *s) > { > @@ -676,7 +744,7 @@ start_xmit(E1000State *s) > break; > } > } > - set_ics(s, 0, cause); > + mit_set_ics(s, cause); > } > > static int > @@ -894,7 +962,7 @@ e1000_receive(NetClientState *nc, const > s->rxbuf_min_shift) > n |= E1000_ICS_RXDMT0; > > - set_ics(s, 0, n); > + mit_set_ics(s, n); > > return size; > } > @@ -999,6 +1067,7 @@ static uint32_t (*macreg_readops[])(E100 > getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), > getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), > getreg(TDLEN), getreg(RDLEN), > + getreg(RDTR), getreg(RADV), getreg(TADV), getreg(ITR), > > [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = > mac_read_clr4, > [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, > @@ -1015,6 +1084,8 @@ static void (*macreg_writeops[])(E1000St > putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), > putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), > putreg(RDBAL), putreg(LEDCTL), putreg(VET), > + [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, > + [ITR] = set_16bit, > [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, > [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, > [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, > @@ -1286,6 +1357,9 @@ static int pci_e1000_init(PCIDevice *pci > add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, > "/ethernet-phy@0"); > > d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d); > + d->mit_cause = 0; > + d->mit_timer_on = 0; > + d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d); > > return 0; > } >