[PATCH 4/4] Drivers: hv: balloon: replace ha_region_mutex with spinlock
lockdep reports possible circular locking dependency when udev is used for memory onlining: systemd-udevd/3996 is trying to acquire lock: ((memory_chain).rwsem){.+}, at: [] __blocking_notifier_call_chain+0x4e/0xc0 but task is already holding lock: (&dm_device.ha_region_mutex){+.+.+.}, at: [] hv_memory_notifier+0x5e/0xc0 [hv_balloon] ... which is probably a false positive because we take and release ha_region_mutex from memory notifier chain depending on the arg. No real deadlocks were reported so far (though I'm not really sure about preemptible kernels...) but we don't really need to hold the mutex for so long. We use it to protect ha_region_list (and its members) and the num_pages_onlined counter. None of these operations require us to sleep and nothing is slow, switch to using spinlock with interrupts disabled. While on it, replace list_for_each -> list_for_each_entry as we actually need entries in all these cases, drop meaningless list_empty() checks. Signed-off-by: Vitaly Kuznetsov --- drivers/hv/hv_balloon.c | 98 ++--- 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index c6252b9..1e6b54a 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -542,7 +542,11 @@ struct hv_dynmem_device { */ struct task_struct *thread; - struct mutex ha_region_mutex; + /* +* Protects ha_region_list, num_pages_onlined counter and individual +* regions from ha_region_list. +*/ + spinlock_t ha_lock; /* * A list of hot-add regions. @@ -566,25 +570,22 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mem = (struct memory_notify *)v; + unsigned long flags; switch (val) { - case MEM_GOING_ONLINE: - mutex_lock(&dm_device.ha_region_mutex); - break; - case MEM_ONLINE: + spin_lock_irqsave(&dm_device.ha_lock, flags); dm_device.num_pages_onlined += mem->nr_pages; - case MEM_CANCEL_ONLINE: - if (val == MEM_ONLINE || - mutex_is_locked(&dm_device.ha_region_mutex)) - mutex_unlock(&dm_device.ha_region_mutex); + spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; case MEM_OFFLINE: - mutex_lock(&dm_device.ha_region_mutex); + spin_lock_irqsave(&dm_device.ha_lock, flags); dm_device.num_pages_onlined -= mem->nr_pages; - mutex_unlock(&dm_device.ha_region_mutex); + spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; + case MEM_GOING_ONLINE: + case MEM_CANCEL_ONLINE: case MEM_GOING_OFFLINE: case MEM_CANCEL_OFFLINE: break; @@ -620,9 +621,12 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; + unsigned long flags; for (i = 0; i < (size/HA_CHUNK); i++) { start_pfn = start + (i * HA_CHUNK); + + spin_lock_irqsave(&dm_device.ha_lock, flags); has->ha_end_pfn += HA_CHUNK; if (total_pfn > HA_CHUNK) { @@ -634,8 +638,8 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, } has->covered_end_pfn += processed_pfn; + spin_unlock_irqrestore(&dm_device.ha_lock, flags); - mutex_unlock(&dm_device.ha_region_mutex); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -652,13 +656,13 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, */ do_hot_add = false; } + spin_lock_irqsave(&dm_device.ha_lock, flags); has->ha_end_pfn -= HA_CHUNK; has->covered_end_pfn -= processed_pfn; - mutex_lock(&dm_device.ha_region_mutex); + spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; } - mutex_lock(&dm_device.ha_region_mutex); post_status(&dm_device); } @@ -672,9 +676,10 @@ static void hv_online_page(struct page *pg) unsigned long cur_start_pgp; unsigned long cur_end_pgp; bool is_gap = false; + unsigned long flags; - list_for_each(cur, &dm_device.ha_region_list) { - has = list_entry(cur, struct hv_hotadd_state, list); + spin_lock_irqsave(&
[PATCH 3/4] Drivers: hv: balloon: get rid on ol_waitevent
With the recently introduced in-kernel memory onlining (MEMORY_HOTPLUG_DEFAULT_ONLINE) these it no point in waiting for pages to come online in the driver and in case the feature is disabled the 5 second wait won't help. Get rid of the waiting. Signed-off-by: Vitaly Kuznetsov --- drivers/hv/hv_balloon.c | 19 --- 1 file changed, 19 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 064ef3d..c6252b9 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -535,11 +535,6 @@ struct hv_dynmem_device { bool host_specified_ha_region; /* -* State to synchronize hot-add. -*/ - struct completion ol_waitevent; - bool ha_waiting; - /* * This thread handles hot-add * requests from the host as well as notifying * the host with regards to memory pressure in @@ -583,10 +578,6 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, if (val == MEM_ONLINE || mutex_is_locked(&dm_device.ha_region_mutex)) mutex_unlock(&dm_device.ha_region_mutex); - if (dm_device.ha_waiting) { - dm_device.ha_waiting = false; - complete(&dm_device.ol_waitevent); - } break; case MEM_OFFLINE: @@ -644,9 +635,6 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, has->covered_end_pfn += processed_pfn; - init_completion(&dm_device.ol_waitevent); - dm_device.ha_waiting = true; - mutex_unlock(&dm_device.ha_region_mutex); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), @@ -670,13 +658,6 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, break; } - /* -* Wait for the memory block to be onlined. -* Since the hot add has succeeded, it is ok to -* proceed even if the pages in the hot added region -* have not been "onlined" within the allowed time. -*/ - wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); mutex_lock(&dm_device.ha_region_mutex); post_status(&dm_device); } -- 2.7.4 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 0/4] Drivers: hv: balloon: fix WS2012 memory hotplug issues and do some cleanup
Crashes with Hyper-V balloon driver are reported with WS2012 (non-R2), hosts I was able to identify two issues which I fix with first two patches of this series. Patches 3 removes ol_waitevent which is not needed since we have in-kernel memory onlining, patch 4 gets rid of ha_region_mutex by doing the locking fine-grained with a spinlock. Vitaly Kuznetsov (4): Drivers: hv: balloon: keep track of where ha_region starts Drivers: hv: balloon: account for gaps in hot add regions Drivers: hv: balloon: get rid on ol_waitevent Drivers: hv: balloon: replace ha_region_mutex with spinlock drivers/hv/hv_balloon.c | 223 +--- 1 file changed, 137 insertions(+), 86 deletions(-) -- 2.7.4 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 2/4] Drivers: hv: balloon: account for gaps in hot add regions
I'm observing the following hot add requests from the WS2012 host: hot_add_req: start_pfn = 0x108200 count = 330752 hot_add_req: start_pfn = 0x158e00 count = 193536 hot_add_req: start_pfn = 0x188400 count = 239616 As the host doesn't specify hot add regions we're trying to create 128Mb-aligned region covering the first request, we create the 0x108000 - 0x16 region and we add 0x108000 - 0x158e00 memory. The second request passes the pfn_covered() check, we enlarge the region to 0x108000 - 0x19 and add 0x158e00 - 0x188200 memory. The problem emerges with the third request as it starts at 0x188400 so there is a 0x200 gap which is not covered. As the end of our region is 0x19 now it again passes the pfn_covered() check were we just adjust the covered_end_pfn and make it 0x188400 instead of 0x188200 which means that we'll try to online 0x188200-0x188400 pages but these pages were never assigned to us and we crash. We can't react to such requests by creating new hot add regions as it may happen that the whole suggested range falls into the previously identified 128Mb-aligned area so we'll end up adding nothing or create intersecting regions and our current logic doesn't allow that. Instead, create a list of such 'gaps' and check for them in the page online callback. Signed-off-by: Vitaly Kuznetsov --- drivers/hv/hv_balloon.c | 107 +--- 1 file changed, 83 insertions(+), 24 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 4ae26d6..064ef3d 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -441,6 +441,16 @@ struct hv_hotadd_state { unsigned long covered_end_pfn; unsigned long ha_end_pfn; unsigned long end_pfn; + /* +* A list of gaps. +*/ + struct list_head gap_list; +}; + +struct hv_hotadd_gap { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; }; struct balloon_state { @@ -676,35 +686,63 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, static void hv_online_page(struct page *pg) { - struct list_head *cur; struct hv_hotadd_state *has; + struct hv_hotadd_gap *gap; unsigned long cur_start_pgp; unsigned long cur_end_pgp; + bool is_gap = false; list_for_each(cur, &dm_device.ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); cur_start_pgp = (unsigned long) + pfn_to_page(has->start_pfn); + cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn); + + /* The page belongs to a different HAS. */ + if (((unsigned long)pg < cur_start_pgp) || + ((unsigned long)pg >= cur_end_pgp)) + continue; + + cur_start_pgp = (unsigned long) pfn_to_page(has->covered_start_pfn); cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); - if (((unsigned long)pg >= cur_start_pgp) && - ((unsigned long)pg < cur_end_pgp)) { - /* -* This frame is currently backed; online the -* page. -*/ - __online_page_set_limits(pg); - __online_page_increment_counters(pg); - __online_page_free(pg); + /* The page is not backed. */ + if (((unsigned long)pg < cur_start_pgp) || + ((unsigned long)pg >= cur_end_pgp)) + continue; + + /* Check for gaps. */ + list_for_each_entry(gap, &has->gap_list, list) { + cur_start_pgp = (unsigned long) + pfn_to_page(gap->start_pfn); + cur_end_pgp = (unsigned long) + pfn_to_page(gap->end_pfn); + if (((unsigned long)pg >= cur_start_pgp) && + ((unsigned long)pg < cur_end_pgp)) { + is_gap = true; + break; + } } + + if (is_gap) + break; + + /* This frame is currently backed; online the page. */ + __online_page_set_limits(pg); + __online_page_increment_counters(pg); + __online_page_free(pg); + break; } } -static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) +static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) { struct list_head *cur; struct hv_hotadd_state *has; + struct hv_hotadd_gap *gap; unsigned long residual, new_inc; + int ret = 0; if (list_empty(&dm_device.ha_region_list)) return false
[PATCH 1/4] Drivers: hv: balloon: keep track of where ha_region starts
Windows 2012 (non-R2) does not specify hot add region in hot add requests and the logic in hot_add_req() is trying to find a 128Mb-aligned region covering the request. It may also happen that host's requests are not 128Mb aligned and the created ha_region will start before the first specified PFN. We can't online these non-present pages but we don't remember the real start of the region. This is a regression introduced by the commit 5a75d733 ("Drivers: hv: hv_balloon: don't lose memory when onlining order is not natural"). While the idea of keeping the 'moving window' was wrong (as there is no guarantee that hot add requests come ordered) we should still keep track of covered_start_pfn. This is not a revert, the logic is different. Signed-off-by: Vitaly Kuznetsov --- drivers/hv/hv_balloon.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index df35fb7..4ae26d6 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -430,13 +430,14 @@ struct dm_info_msg { * currently hot added. We hot add in multiples of 128M * chunks; it is possible that we may not be able to bring * online all the pages in the region. The range - * covered_end_pfn defines the pages that can + * covered_start_pfn:covered_end_pfn defines the pages that can * be brough online. */ struct hv_hotadd_state { struct list_head list; unsigned long start_pfn; + unsigned long covered_start_pfn; unsigned long covered_end_pfn; unsigned long ha_end_pfn; unsigned long end_pfn; @@ -682,7 +683,8 @@ static void hv_online_page(struct page *pg) list_for_each(cur, &dm_device.ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); - cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn); + cur_start_pgp = (unsigned long) + pfn_to_page(has->covered_start_pfn); cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn); if (((unsigned long)pg >= cur_start_pgp) && @@ -854,6 +856,7 @@ static unsigned long process_hot_add(unsigned long pg_start, list_add_tail(&ha_region->list, &dm_device.ha_region_list); ha_region->start_pfn = rg_start; ha_region->ha_end_pfn = rg_start; + ha_region->covered_start_pfn = pg_start; ha_region->covered_end_pfn = pg_start; ha_region->end_pfn = rg_start + rg_size; } -- 2.7.4 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH] Staging: rtl8723au: rtw_ieee80211: Fixed operators spacing style issues
Fixed spaces around operators to fix their coding style issues. Signed-off-by: Shiva Kerdel --- drivers/staging/rtl8723au/core/rtw_ieee80211.c | 80 +- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/staging/rtl8723au/core/rtw_ieee80211.c b/drivers/staging/rtl8723au/core/rtw_ieee80211.c index 07a6490..9fa0ef1 100644 --- a/drivers/staging/rtl8723au/core/rtw_ieee80211.c +++ b/drivers/staging/rtl8723au/core/rtw_ieee80211.c @@ -65,7 +65,7 @@ static u8 WIFI_OFDMRATES[] = { int rtw_get_bit_value_from_ieee_value23a(u8 val) { - unsigned char dot11_rate_table[]= + unsigned char dot11_rate_table[] = {2, 4, 11, 22, 12, 18, 24, 36, 48, 72, 96, 108, 0}; int i = 0; @@ -140,7 +140,7 @@ u8 *rtw_set_ie23a(u8 *pbuf, int index, uint len, const u8 *source, uint *frlen) return pbuf + len + 2; } -inline u8 *rtw_set_ie23a_ch_switch (u8 *buf, u32 *buf_len, u8 ch_switch_mode, +inline u8 *rtw_set_ie23a_ch_switch(u8 *buf, u32 *buf_len, u8 ch_switch_mode, u8 new_ch, u8 ch_switch_cnt) { u8 ie_data[3]; @@ -230,14 +230,14 @@ u8 *rtw_get_ie23a_ex(u8 *in_ie, uint in_len, u8 eid, u8 *oui, u8 oui_len, while (cnt < in_len) { if (eid == in_ie[cnt] && - (!oui || !memcmp(&in_ie[cnt+2], oui, oui_len))) { + (!oui || !memcmp(&in_ie[cnt + 2], oui, oui_len))) { target_ie = &in_ie[cnt]; if (ie) - memcpy(ie, &in_ie[cnt], in_ie[cnt+1]+2); + memcpy(ie, &in_ie[cnt], in_ie[cnt + 1] + 2); if (ielen) - *ielen = in_ie[cnt+1]+2; + *ielen = in_ie[cnt + 1] + 2; break; } else { cnt += in_ie[cnt + 1] + 2; /* goto next */ @@ -331,7 +331,7 @@ uint rtw_get_rateset_len23a(u8 *rateset) { uint i = 0; - while(1) { + while (1) { if (rateset[i] == 0) break; @@ -378,7 +378,7 @@ int rtw_generate_ie23a(struct registry_priv *pregistrypriv) wireless_mode = pregistrypriv->wireless_mode; } - rtw_set_supported_rate23a(pdev_network->SupportedRates, wireless_mode) ; + rtw_set_supported_rate23a(pdev_network->SupportedRates, wireless_mode); rateLen = rtw_get_rateset_len23a(pdev_network->SupportedRates); @@ -533,7 +533,7 @@ int rtw_parse_wpa2_ie23a(const u8 *rsn_ie, int rsn_ie_len, int *group_cipher, return _FAIL; } - if (*rsn_ie != WLAN_EID_RSN || *(rsn_ie+1) != (u8)(rsn_ie_len - 2)) { + if (*rsn_ie != WLAN_EID_RSN || *(rsn_ie + 1) != (u8)(rsn_ie_len - 2)) { return _FAIL; } @@ -791,64 +791,64 @@ u16 rtw_mcs_rate23a(u8 rf_type, u8 bw_40MHz, u8 short_GI_20, u8 short_GI_40, if (rf_type == RF_1T1R) { if (mcs->rx_mask[0] & BIT(7)) - max_rate = (bw_40MHz) ? ((short_GI_40)?1500:1350): - ((short_GI_20)?722:650); + max_rate = (bw_40MHz) ? ((short_GI_40) ? 1500 : 1350) : + ((short_GI_20) ? 722 : 650); else if (mcs->rx_mask[0] & BIT(6)) - max_rate = (bw_40MHz) ? ((short_GI_40)?1350:1215): - ((short_GI_20)?650:585); + max_rate = (bw_40MHz) ? ((short_GI_40) ? 1350 : 1215) : + ((short_GI_20) ? 650 : 585); else if (mcs->rx_mask[0] & BIT(5)) - max_rate = (bw_40MHz) ? ((short_GI_40)?1200:1080): - ((short_GI_20)?578:520); + max_rate = (bw_40MHz) ? ((short_GI_40) ? 1200 : 1080) : + ((short_GI_20) ? 578 : 520); else if (mcs->rx_mask[0] & BIT(4)) - max_rate = (bw_40MHz) ? ((short_GI_40)?900:810): - ((short_GI_20)?433:390); + max_rate = (bw_40MHz) ? ((short_GI_40) ? 900 : 810) : + ((short_GI_20) ? 433 : 390); else if (mcs->rx_mask[0] & BIT(3)) - max_rate = (bw_40MHz) ? ((short_GI_40)?600:540): - ((short_GI_20)?289:260); + max_rate = (bw_40MHz) ? ((short_GI_40) ? 600 : 540) : + ((short_GI_20) ? 289 : 260); else if (mcs->rx_mask[0] & BIT(2)) - max_rate = (bw_40MHz) ? ((short_GI_40)?450:405): - ((short_GI_20)?217:195); + max_rate = (bw_40MHz) ? ((short_GI_40) ? 450 : 405) : + ((short_GI_20) ? 217 : 195); else if (mcs->rx_mask[0] & BIT(1))
RE: [PATCH 2/4] Drivers: hv: balloon: account for gaps in hot add regions
> -Original Message- > From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com] > Sent: Friday, August 5, 2016 3:49 AM > To: de...@linuxdriverproject.org > Cc: linux-ker...@vger.kernel.org; Haiyang Zhang ; > KY Srinivasan ; Alex Ng (LIS) > Subject: [PATCH 2/4] Drivers: hv: balloon: account for gaps in hot add regions > > I'm observing the following hot add requests from the WS2012 host: > > hot_add_req: start_pfn = 0x108200 count = 330752 > hot_add_req: start_pfn = 0x158e00 count = 193536 > hot_add_req: start_pfn = 0x188400 count = 239616 > > As the host doesn't specify hot add regions we're trying to create 128Mb- > aligned region covering the first request, we create the 0x108000 - > 0x16 region and we add 0x108000 - 0x158e00 memory. The second > request passes the pfn_covered() check, we enlarge the region to 0x108000 - > 0x19 and add 0x158e00 - 0x188200 memory. The problem emerges with > the third request as it starts at 0x188400 so there is a 0x200 gap which is > not > covered. As the end of our region is 0x19 now it again passes the > pfn_covered() check were we just adjust the covered_end_pfn and make it > 0x188400 instead of 0x188200 which means that we'll try to online > 0x188200-0x188400 pages but these pages were never assigned to us and we > crash. The fact that the host sent a request that's non-contiguous with the previous request is unexpected. Could we check to see the number of pages we returned in our response, after each request? I'm wondering if we may have given a wrong response to cause the host to follow-up with a gapped request. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel