> On 30 May 2016, at 18:10 PM, Michael S. Tsirkin <m...@redhat.com> wrote: > > On Mon, May 30, 2016 at 12:14:41PM +0300, Leonid Bloch wrote: >> diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c >> new file mode 100644 >> index 0000000..4da6bb1 >> --- /dev/null >> +++ b/hw/net/e1000e.c > > Here are minor style issues that can be fixed after this is upstream. > See below.
I’d prefer to fix this and add comments into the first patch of the series before this goes upstream. I will resent the new version today. > >> @@ -0,0 +1,739 @@ >> +/* >> +* QEMU INTEL 82574 GbE NIC emulation >> +* >> +* Software developer's manuals: >> +* >> http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf >> +* >> +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) >> +* Developed by Daynix Computing LTD (http://www.daynix.com) >> +* >> +* Authors: >> +* Dmitry Fleytman <dmi...@daynix.com> >> +* Leonid Bloch <leo...@daynix.com> >> +* Yan Vugenfirer <y...@daynix.com> >> +* >> +* Based on work done by: >> +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. >> +* Copyright (c) 2008 Qumranet >> +* Based on work done by: >> +* Copyright (c) 2007 Dan Aloni >> +* Copyright (c) 2004 Antony T Curtis >> +* >> +* This library is free software; you can redistribute it and/or >> +* modify it under the terms of the GNU Lesser General Public >> +* License as published by the Free Software Foundation; either >> +* version 2 of the License, or (at your option) any later version. >> +* >> +* This library is distributed in the hope that it will be useful, >> +* but WITHOUT ANY WARRANTY; without even the implied warranty of >> +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> +* Lesser General Public License for more details. >> +* >> +* You should have received a copy of the GNU Lesser General Public >> +* License along with this library; if not, see >> <http://www.gnu.org/licenses/>. >> +*/ >> + >> +#include "qemu/osdep.h" >> +#include "net/net.h" >> +#include "net/tap.h" >> +#include "qemu/range.h" >> +#include "sysemu/sysemu.h" >> +#include "hw/pci/msi.h" >> +#include "hw/pci/msix.h" >> + >> +#include "hw/net/e1000_regs.h" >> + >> +#include "e1000x_common.h" >> +#include "e1000e_core.h" >> + >> +#include "trace.h" >> + >> +#define TYPE_E1000E "e1000e" >> +#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E) >> + >> +typedef struct { >> + PCIDevice parent_obj; >> + NICState *nic; >> + NICConf conf; >> + >> + MemoryRegion mmio; >> + MemoryRegion flash; >> + MemoryRegion io; >> + MemoryRegion msix; >> + >> + uint32_t ioaddr; >> + >> + uint16_t subsys_ven; >> + uint16_t subsys; >> + >> + uint16_t subsys_ven_used; >> + uint16_t subsys_used; >> + >> + uint32_t intr_state; >> + bool disable_vnet; >> + >> + E1000ECore core; >> + >> +} E1000EState; > > typedef struct E1000EState is preferably because older > gdb versions do not always see typedefs. > >> + >> +#define E1000E_MMIO_IDX 0 >> +#define E1000E_FLASH_IDX 1 >> +#define E1000E_IO_IDX 2 >> +#define E1000E_MSIX_IDX 3 >> + >> +#define E1000E_MMIO_SIZE (128 * 1024) >> +#define E1000E_FLASH_SIZE (128 * 1024) >> +#define E1000E_IO_SIZE (32) >> +#define E1000E_MSIX_SIZE (16 * 1024) >> + >> +#define E1000E_MSIX_TABLE (0x0000) >> +#define E1000E_MSIX_PBA (0x2000) >> + >> +#define E1000E_USE_MSI BIT(0) >> +#define E1000E_USE_MSIX BIT(1) >> + >> +static uint64_t >> +e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) >> +{ >> + E1000EState *s = opaque; >> + return e1000e_core_read(&s->core, addr, size); >> +} >> + >> +static void >> +e1000e_mmio_write(void *opaque, hwaddr addr, >> + uint64_t val, unsigned size) >> +{ >> + E1000EState *s = opaque; >> + e1000e_core_write(&s->core, addr, val, size); >> +} >> + >> +static bool >> +e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx) >> +{ >> + if (s->ioaddr < 0x1FFFF) { >> + *idx = s->ioaddr; >> + return true; >> + } >> + >> + if (s->ioaddr < 0x7FFFF) { >> + trace_e1000e_wrn_io_addr_undefined(s->ioaddr); >> + return false; >> + } >> + >> + if (s->ioaddr < 0xFFFFF) { >> + trace_e1000e_wrn_io_addr_flash(s->ioaddr); >> + return false; >> + } >> + >> + trace_e1000e_wrn_io_addr_unknown(s->ioaddr); >> + return false; >> +} >> + >> +static uint64_t >> +e1000e_io_read(void *opaque, hwaddr addr, unsigned size) >> +{ >> + E1000EState *s = opaque; >> + uint32_t idx; >> + uint64_t val; >> + >> + switch (addr) { >> + case E1000_IOADDR: >> + trace_e1000e_io_read_addr(s->ioaddr); >> + return s->ioaddr; >> + case E1000_IODATA: >> + if (e1000e_io_get_reg_index(s, &idx)) { >> + val = e1000e_core_read(&s->core, idx, sizeof(val)); >> + trace_e1000e_io_read_data(idx, val); >> + return val; >> + } >> + return 0; >> + default: >> + trace_e1000e_wrn_io_read_unknown(addr); >> + return 0; >> + } >> +} >> + >> +static void >> +e1000e_io_write(void *opaque, hwaddr addr, >> + uint64_t val, unsigned size) >> +{ >> + E1000EState *s = opaque; >> + uint32_t idx; >> + >> + switch (addr) { >> + case E1000_IOADDR: >> + trace_e1000e_io_write_addr(val); >> + s->ioaddr = (uint32_t) val; >> + return; >> + case E1000_IODATA: >> + if (e1000e_io_get_reg_index(s, &idx)) { >> + trace_e1000e_io_write_data(idx, val); >> + e1000e_core_write(&s->core, idx, val, sizeof(val)); >> + } >> + return; >> + default: >> + trace_e1000e_wrn_io_write_unknown(addr); >> + return; >> + } >> +} >> + >> +static const MemoryRegionOps mmio_ops = { >> + .read = e1000e_mmio_read, >> + .write = e1000e_mmio_write, >> + .endianness = DEVICE_LITTLE_ENDIAN, >> + .impl = { >> + .min_access_size = 4, >> + .max_access_size = 4, >> + }, >> +}; >> + >> +static const MemoryRegionOps io_ops = { >> + .read = e1000e_io_read, >> + .write = e1000e_io_write, >> + .endianness = DEVICE_LITTLE_ENDIAN, >> + .impl = { >> + .min_access_size = 4, >> + .max_access_size = 4, >> + }, >> +}; >> + >> +static int >> +e1000e_nc_can_receive(NetClientState *nc) >> +{ >> + E1000EState *s = qemu_get_nic_opaque(nc); >> + return e1000e_can_receive(&s->core); >> +} >> + >> +static ssize_t >> +e1000e_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int >> iovcnt) >> +{ >> + E1000EState *s = qemu_get_nic_opaque(nc); >> + return e1000e_receive_iov(&s->core, iov, iovcnt); >> +} >> + >> +static ssize_t >> +e1000e_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size) >> +{ >> + E1000EState *s = qemu_get_nic_opaque(nc); >> + return e1000e_receive(&s->core, buf, size); >> +} >> + >> +static void >> +e1000e_set_link_status(NetClientState *nc) >> +{ >> + E1000EState *s = qemu_get_nic_opaque(nc); >> + e1000e_core_set_link_status(&s->core); >> +} >> + >> +static NetClientInfo net_e1000e_info = { >> + .type = NET_CLIENT_OPTIONS_KIND_NIC, >> + .size = sizeof(NICState), >> + .can_receive = e1000e_nc_can_receive, >> + .receive = e1000e_nc_receive, >> + .receive_iov = e1000e_nc_receive_iov, >> + .link_status_changed = e1000e_set_link_status, >> +}; >> + >> +/* >> +* EEPROM (NVM) contents documented in Table 36, section 6.1. >> +*/ > > and generally 6.1.2 Software accessed words. > >> +static const uint16_t e1000e_eeprom_template[64] = { >> + /* Address | Compat. | ImVer | Compat. */ >> + 0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff, >> + /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */ >> + 0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058, >> + /* NVM words 1,2,3 |-------------------------------|PCI-EID*/ >> + 0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704, >> + /* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */ >> + 0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706, >> + /* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/ >> + 0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff, >> + /* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */ >> + 0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff, >> + /* SW Section */ >> + 0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff, >> + /* SW Section |CHKSUM */ >> + 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000, >> +}; > > this could be written in a clearer way, for example > I don't see why would we say "ImVer" rather than > "Image Version Information 1". > Compat can be "Compatibility Bytes" etc. > >> + >> +static void e1000e_core_realize(E1000EState *s) >> +{ >> + s->core.owner = &s->parent_obj; >> + s->core.owner_nic = s->nic; >> +} >> + >> +static void >> +e1000e_init_msi(E1000EState *s) >> +{ >> + int res; >> + >> + res = msi_init(PCI_DEVICE(s), >> + 0xD0, /* MSI capability offset */ >> + 1, /* MAC MSI interrupts */ >> + true, /* 64-bit message addresses supported */ >> + false); /* Per vector mask supported */ >> + >> + if (res > 0) { >> + s->intr_state |= E1000E_USE_MSI; >> + } else { >> + trace_e1000e_msi_init_fail(res); >> + } >> +} >> + >> +static void >> +e1000e_cleanup_msi(E1000EState *s) >> +{ >> + if (s->intr_state & E1000E_USE_MSI) { >> + msi_uninit(PCI_DEVICE(s)); >> + } >> +} >> + >> +static void >> +e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors) >> +{ >> + int i; >> + for (i = 0; i < num_vectors; i++) { >> + msix_vector_unuse(PCI_DEVICE(s), i); >> + } >> +} >> + >> +static bool >> +e1000e_use_msix_vectors(E1000EState *s, int num_vectors) >> +{ >> + int i; >> + for (i = 0; i < num_vectors; i++) { >> + int res = msix_vector_use(PCI_DEVICE(s), i); >> + if (res < 0) { >> + trace_e1000e_msix_use_vector_fail(i, res); >> + e1000e_unuse_msix_vectors(s, i); >> + return false; >> + } >> + } >> + return true; >> +} >> + >> +static void >> +e1000e_init_msix(E1000EState *s) >> +{ >> + PCIDevice *d = PCI_DEVICE(s); >> + int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM, >> + &s->msix, >> + E1000E_MSIX_IDX, E1000E_MSIX_TABLE, >> + &s->msix, >> + E1000E_MSIX_IDX, E1000E_MSIX_PBA, >> + 0xA0); >> + >> + if (res < 0) { >> + trace_e1000e_msix_init_fail(res); >> + } else { >> + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { >> + msix_uninit(d, &s->msix, &s->msix); >> + } else { >> + s->intr_state |= E1000E_USE_MSIX; >> + } >> + } >> +} >> + >> +static void >> +e1000e_cleanup_msix(E1000EState *s) >> +{ >> + if (s->intr_state & E1000E_USE_MSIX) { >> + e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM); >> + msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix); >> + } >> +} >> + >> +static void >> +e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) >> +{ >> + DeviceState *dev = DEVICE(pci_dev); >> + NetClientState *nc; >> + int i; >> + >> + s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, >> + object_get_typename(OBJECT(s)), dev->id, s); >> + >> + s->core.max_queue_num = s->conf.peers.queues - 1; >> + >> + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); >> + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); >> + >> + qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr); >> + >> + /* Setup virtio headers */ >> + if (s->disable_vnet) { >> + s->core.has_vnet = false; >> + trace_e1000e_cfg_support_virtio(false); >> + return; >> + } else { >> + s->core.has_vnet = true; >> + } >> + >> + for (i = 0; i < s->conf.peers.queues; i++) { >> + nc = qemu_get_subqueue(s->nic, i); >> + if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) { >> + s->core.has_vnet = false; >> + trace_e1000e_cfg_support_virtio(false); >> + return; >> + } >> + } >> + >> + trace_e1000e_cfg_support_virtio(true); >> + >> + for (i = 0; i < s->conf.peers.queues; i++) { >> + nc = qemu_get_subqueue(s->nic, i); >> + qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr)); >> + qemu_using_vnet_hdr(nc->peer, true); >> + } >> +} >> + >> +static inline uint64_t >> +e1000e_gen_dsn(uint8_t *mac) >> +{ >> + return (uint64_t)(mac[5]) | >> + (uint64_t)(mac[4]) << 8 | >> + (uint64_t)(mac[3]) << 16 | >> + (uint64_t)(0x00FF) << 24 | >> + (uint64_t)(0x00FF) << 32 | >> + (uint64_t)(mac[2]) << 40 | >> + (uint64_t)(mac[1]) << 48 | >> + (uint64_t)(mac[0]) << 56; >> +} >> + >> +static int >> +e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) >> +{ >> + int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, >> PCI_PM_SIZEOF); >> + >> + if (ret >= 0) { >> + pci_set_word(pdev->config + offset + PCI_PM_PMC, >> + PCI_PM_CAP_VER_1_1 | >> + pmc); >> + >> + pci_set_word(pdev->wmask + offset + PCI_PM_CTRL, >> + PCI_PM_CTRL_STATE_MASK | >> + PCI_PM_CTRL_PME_ENABLE | >> + PCI_PM_CTRL_DATA_SEL_MASK); >> + >> + pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL, >> + PCI_PM_CTRL_PME_STATUS); >> + } >> + >> + return ret; >> +} >> + >> +static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address, >> + uint32_t val, int len) >> +{ >> + E1000EState *s = E1000E(pci_dev); >> + >> + pci_default_write_config(pci_dev, address, val, len); >> + >> + if (range_covers_byte(address, len, PCI_COMMAND) && >> + (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { >> + qemu_flush_queued_packets(qemu_get_queue(s->nic)); >> + } >> +} >> + >> +static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) >> +{ >> + static const uint16_t E1000E_PMRB_OFFSET = 0x0C8; >> + static const uint16_t E1000E_PCIE_OFFSET = 0x0E0; >> + static const uint16_t E1000E_AER_OFFSET = 0x100; >> + static const uint16_t E1000E_DSN_OFFSET = 0x140; > > Let's make these lower case pls. > >> + > > and drop this empty line. > >> + E1000EState *s = E1000E(pci_dev); >> + uint8_t *macaddr; >> + >> + trace_e1000e_cb_pci_realize(); >> + >> + pci_dev->config_write = e1000e_write_config; >> + >> + pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10; >> + pci_dev->config[PCI_INTERRUPT_PIN] = 1; >> + >> + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven); >> + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys); >> + >> + s->subsys_ven_used = s->subsys_ven; >> + s->subsys_used = s->subsys; >> + >> + /* Define IO/MMIO regions */ >> + memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s, >> + "e1000e-mmio", E1000E_MMIO_SIZE); >> + pci_register_bar(pci_dev, E1000E_MMIO_IDX, >> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); >> + >> + /* >> + * We provide a dummy implementation for the flash BAR >> + * for drivers that may theoretically check for its presence. >> + */ >> + memory_region_init(&s->flash, OBJECT(s), >> + "e1000e-flash", E1000E_FLASH_SIZE); >> + pci_register_bar(pci_dev, E1000E_FLASH_IDX, >> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash); > > This is a weird kind of reason. We don't really implement flash > so how likely are such drivers to work? > If some drivers do probe for this BAR, let's say so in the comment. >