On Mon, Apr 02, 2018 at 11:52:08AM +0200, Stefan Fritsch wrote:
> Hi,
>
> We have seen problems with em on i219V and i219LM. For example, "Hardware
> Initialization Failed" if no cable is plugged in during boot, or watchdog
> timeouts / hangs until next boot if the cable is removed while data is
> transmitted.
>
> This patch adds a bunch of quirks and fixes from freebsd.
Can you split this into a few patches? It would be easier to review/backout
individual parts if needed.
Ie I wonder if the sw_flag parts could be done differently but other
parts could go in without that.
printf format string could change from ", mac_type %#x phy_type %#x"
to ", mac %#x phy %#x" though I'm not sold on the value of printing
it to begin with. The PCH ems all have a single PHY, MAC can be
inferred from pcidump.
>
> It would be nice if people could give it a try on various em(4) hardware
> to check if there are any regressions.
>
> Comments on the patch are of course welcome, too.
>
> Cheers,
> Stefan
>
> * Some em chips have a semaphore ("software flag") to synchronize access
> to certain registers between OS and firmware (ME/AMT).
>
> Make the logic to get the flag match the logic in freebsd. This includes
> higher timeouts and waiting for a previous unlock to complete before
> trying a lock again.
>
> * Another problem was that openbsd em driver calls em_get_software_flag
> recursively, which causes the semaphore to be unlocked too early. Make
> em_get_software_flag/em_release_software_flag handle this correctly.
> Freebsd does not do this, but they have a mutex that probably allows
> them to detect recursive calls to e1000_acquire_swflag_ich8lan().
> Reworking the openbsd driver to not recursively get the semaphore would
> be very invasive.
>
> * Port the logic from freebsd to em_check_phy_reset_block(). A single
> read does not seem to be reliable.
>
> * Also, increase delay after reset to 20ms, which is the value in freebsd
> for ich8lan.
>
> * Add another magic 1ms delay that seems to help with some remaining issues
> on an HP elitebook 820 G3. A printf() at the same place helps, too.
>
> * Port a silicon errata workaround from FreeBSD.
>
>
> https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/i218-i219-ethernet-connection-spec-update.pdf?asset=9561
>
> * While there, print mac+phy type in em_attach(). This makes it easier to
> determine which quirks are hit/missing when comparing to freebsd.
>
> * Also print em_init_hw() error code if something goes wrong.
>
>
> diff --git sys/dev/pci/if_em.c sys/dev/pci/if_em.c
> index ec8e35245ef..f6a1f1c3894 100644
> --- sys/dev/pci/if_em.c
> +++ sys/dev/pci/if_em.c
> @@ -557,6 +557,8 @@ em_attach(struct device *parent, struct device *self,
> void *aux)
> if (!defer)
> em_update_link_status(sc);
>
> + printf(", mac_type %#x phy_type %#x", sc->hw.mac_type,
> + sc->hw.phy_type);
> printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
>
> /* Indicate SOL/IDER usage */
> @@ -1860,8 +1862,8 @@ em_hardware_init(struct em_softc *sc)
> INIT_DEBUGOUT("\nHardware Initialization Deferred ");
> return (EAGAIN);
> }
> - printf("\n%s: Hardware Initialization Failed\n",
> - DEVNAME(sc));
> + printf("\n%s: Hardware Initialization Failed: %d\n",
> + DEVNAME(sc), ret_val);
> return (EIO);
> }
>
> @@ -2265,7 +2267,9 @@ em_initialize_transmit_unit(struct em_softc *sc)
> EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
>
> reg_val = E1000_READ_REG(&sc->hw, TARC0);
> - reg_val |= E1000_TARC0_CB_MULTIQ_3_REQ;
> + /* i218-i219 Specification Update 1.5.4.5 */
> + reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
> + reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
> E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
> }
> }
> diff --git sys/dev/pci/if_em_hw.c sys/dev/pci/if_em_hw.c
> index df0fa571736..d122e727875 100644
> --- sys/dev/pci/if_em_hw.c
> +++ sys/dev/pci/if_em_hw.c
> @@ -945,7 +945,9 @@ em_reset_hw(struct em_hw *hw)
> }
> em_get_software_flag(hw);
> E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST));
> - msec_delay(5);
> + /* HW reset releases software_flag */
> + hw->sw_flag = 0;
> + msec_delay(20);
>
> /* Ungate automatic PHY configuration on non-managed 82579 */
> if (hw->mac_type == em_pch2lan && !hw->phy_reset_disable &&
> @@ -1491,6 +1493,8 @@ em_init_hw(struct em_hw *hw)
> /* Set the media type and TBI compatibility */
> em_set_media_type(hw);
>
> + /* Magic delay that improves problems with i219LM on HP Elitebook */
> + msec_delay(1);
> /* Must be called after em_set_media_type because media_type is used */
> em_initialize_hardware_bits(hw);
>
> @@ -9539,9 +9543,18 @@ em_check_phy_reset_block(struct em_hw *hw)
> DEBUGFUNC("em_check_phy_reset_block\n");
>
> if (IS_ICH8(hw->mac_type)) {
> - fwsm = E1000_READ_REG(hw, FWSM);
> - return (fwsm & E1000_FWSM_RSPCIPHY) ? E1000_SUCCESS :
> - E1000_BLK_PHY_RESET;
> + int i = 0;
> + int blocked = 0;
> + do {
> + fwsm = E1000_READ_REG(hw, FWSM);
> + if (!(fwsm & E1000_FWSM_RSPCIPHY)) {
> + blocked = 1;
> + msec_delay(10);
> + continue;
> + }
> + blocked = 0;
> + } while (blocked && (i++ < 30));
> + return blocked ? E1000_BLK_PHY_RESET : E1000_SUCCESS;
> }
> if (hw->mac_type > em_82547_rev_2)
> manc = E1000_READ_REG(hw, MANC);
> @@ -9602,11 +9615,27 @@ em_get_software_flag(struct em_hw *hw)
> DEBUGFUNC("em_get_software_flag");
>
> if (IS_ICH8(hw->mac_type)) {
> + if (hw->sw_flag) {
> + hw->sw_flag++;
> + return E1000_SUCCESS;
> + }
> while (timeout) {
> extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
> - extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
> - E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
> + if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG))
> + break;
> + msec_delay_irq(1);
> + timeout--;
> + }
> + if (!timeout) {
> + printf("%s: SW has already locked the resource?\n",
> + __func__);
> + return -E1000_ERR_CONFIG;
> + }
> + timeout = SW_FLAG_TIMEOUT;
> + extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
> + E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
>
> + while (timeout) {
> extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
> if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)
> break;
> @@ -9615,10 +9644,15 @@ em_get_software_flag(struct em_hw *hw)
> }
>
> if (!timeout) {
> - DEBUGOUT("FW or HW locks the resource too long.\n");
> + printf("Failed to acquire the semaphore, FW or HW "
> + "has it: FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n",
> + E1000_READ_REG(hw, FWSM), extcnf_ctrl);
> + extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
> + E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
> return -E1000_ERR_CONFIG;
> }
> }
> + hw->sw_flag++;
> return E1000_SUCCESS;
> }
>
> @@ -9638,6 +9672,13 @@ em_release_software_flag(struct em_hw *hw)
> DEBUGFUNC("em_release_software_flag");
>
> if (IS_ICH8(hw->mac_type)) {
> + if (hw->sw_flag <= 0) {
> + printf("%s: not locked!\n", __func__);
> + return;
> + }
> + hw->sw_flag--;
> + if (hw->sw_flag > 0)
> + return;
> extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
> extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
> E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
> diff --git sys/dev/pci/if_em_hw.h sys/dev/pci/if_em_hw.h
> index 5e415e60a34..9c2cfe97569 100644
> --- sys/dev/pci/if_em_hw.h
> +++ sys/dev/pci/if_em_hw.h
> @@ -1634,6 +1634,7 @@ struct em_hw {
> uint8_t bus_func;
> uint16_t swfw;
> boolean_t eee_enable;
> + int sw_flag;
> };
>
> #define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */
> @@ -2295,6 +2296,7 @@ struct em_hw {
> #define E1000_WUS_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters
> */
>
> /* TRAC0 bits */
> +#define E1000_TARC0_CB_MULTIQ_2_REQ (1 << 29)
> #define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29)
>
> /* Management Control */
> @@ -2755,6 +2757,8 @@ struct em_host_command_info {
> #define AUTO_READ_DONE_TIMEOUT 10
> /* Number of milliseconds we wait for PHY configuration done after MAC reset
> */
> #define PHY_CFG_TIMEOUT 100
> +/* SW Semaphore flag timeout in ms */
> +#define SW_FLAG_TIMEOUT 1000
>
> #define E1000_TX_BUFFER_SIZE ((uint32_t)1514)
>
>