Hi John, I love your patch! Perhaps something to improve:
[auto build test WARNING on iommu/next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/John-Garry/iommu-arm-smmu-v3-Improve-cmdq-lock-efficiency/20200623-013438 base: https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next config: arm64-randconfig-c024-20200622 (attached as .config) compiler: aarch64-linux-gcc (GCC) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <l...@intel.com> All warnings (new ones prefixed by >>, old ones prefixed by <<): In file included from include/linux/bits.h:23, from include/linux/ioport.h:15, from include/linux/acpi.h:12, from drivers/iommu/arm-smmu-v3.c:12: drivers/iommu/arm-smmu-v3.c: In function 'arm_smmu_cmdq_issue_cmdlist': include/linux/bits.h:26:28: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] 26 | __builtin_constant_p((l) > (h)), (l) > (h), 0))) | ^ include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO' 16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) | ^ include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK' 39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) | ^~~~~~~~~~~~~~~~~~~ >> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK' 1404 | u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0); | ^~~~~~~ include/linux/bits.h:26:40: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] 26 | __builtin_constant_p((l) > (h)), (l) > (h), 0))) | ^ include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO' 16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) | ^ include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK' 39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) | ^~~~~~~~~~~~~~~~~~~ >> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK' 1404 | u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0); | ^~~~~~~ vim +/GENMASK +1404 drivers/iommu/arm-smmu-v3.c 1369 1370 /* 1371 * This is the actual insertion function, and provides the following 1372 * ordering guarantees to callers: 1373 * 1374 * - There is a dma_wmb() before publishing any commands to the queue. 1375 * This can be relied upon to order prior writes to data structures 1376 * in memory (such as a CD or an STE) before the command. 1377 * 1378 * - On completion of a CMD_SYNC, there is a control dependency. 1379 * This can be relied upon to order subsequent writes to memory (e.g. 1380 * freeing an IOVA) after completion of the CMD_SYNC. 1381 * 1382 * - Command insertion is totally ordered, so if two CPUs each race to 1383 * insert their own list of commands then all of the commands from one 1384 * CPU will appear before any of the commands from the other CPU. 1385 * 1386 * - A CMD_SYNC is always inserted, which ensures we limit the prod pointer 1387 * for when the cmdq is full, such that we don't wrap more than twice. 1388 * It also makes it easy for the owner to know by how many to increment the 1389 * cmdq lock. 1390 */ 1391 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, 1392 u64 *cmds, int n) 1393 { 1394 u64 cmd_sync[CMDQ_ENT_DWORDS]; 1395 const int sync = 1; 1396 u32 prod; 1397 unsigned long flags; 1398 bool owner; 1399 struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 1400 struct arm_smmu_ll_queue llq = { 1401 .max_n_shift = cmdq->q.llq.max_n_shift, 1402 }, head = llq, space = llq; 1403 u32 owner_val = 1 << cmdq->q.llq.owner_count_shift; > 1404 u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0); 1405 u32 owner_mask = GENMASK(30, cmdq->q.llq.owner_count_shift); 1406 int ret = 0; 1407 1408 /* 1. Allocate some space in the queue */ 1409 local_irq_save(flags); 1410 1411 prod = atomic_fetch_add(n + sync + owner_val, 1412 &cmdq->q.llq.atomic.prod); 1413 1414 owner = !(prod & owner_mask); 1415 llq.prod = prod_mask & prod; 1416 head.prod = queue_inc_prod_n(&llq, n + sync); 1417 1418 /* 1419 * Ensure it's safe to write the entries. For this, we need to ensure 1420 * that there is space in the queue from our prod pointer. 1421 */ 1422 space.cons = READ_ONCE(cmdq->q.llq.cons); 1423 space.prod = llq.prod; 1424 while (!queue_has_space(&space, n + sync)) { 1425 if (arm_smmu_cmdq_poll_until_not_full(smmu, &space)) 1426 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 1427 1428 space.prod = llq.prod; 1429 } 1430 1431 /* 1432 * 2. Write our commands into the queue 1433 * Dependency ordering from the space-checking while loop, above. 1434 */ 1435 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); 1436 1437 prod = queue_inc_prod_n(&llq, n); 1438 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod); 1439 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); 1440 1441 /* 3. Mark our slots as valid, ensuring commands are visible first */ 1442 dma_wmb(); 1443 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod); 1444 1445 /* 4. If we are the owner, take control of the SMMU hardware */ 1446 if (owner) { 1447 int owner_count; 1448 u32 prod_tmp; 1449 1450 /* a. Wait for previous owner to finish */ 1451 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod); 1452 1453 /* b. Stop gathering work by clearing the owned mask */ 1454 prod_tmp = atomic_fetch_andnot_relaxed(~prod_mask, 1455 &cmdq->q.llq.atomic.prod); 1456 prod = prod_tmp & prod_mask; 1457 owner_count = prod_tmp & owner_mask; 1458 owner_count >>= cmdq->q.llq.owner_count_shift; 1459 1460 /* 1461 * c. Wait for any gathered work to be written to the queue. 1462 * Note that we read our own entries so that we have the control 1463 * dependency required by (d). 1464 */ 1465 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod); 1466 1467 /* 1468 * In order to determine completion of the CMD_SYNCs, we must 1469 * ensure that the queue can't wrap twice without us noticing. 1470 * We achieve that by taking the cmdq lock as shared before 1471 * progressing the prod pointer. 1472 * The owner does this for all the non-owners it has gathered. 1473 * Otherwise, some non-owner(s) may lock the cmdq, blocking 1474 * cons being updating. This could be when the cmdq has just 1475 * become full. In this case, other sibling non-owners could be 1476 * blocked from progressing, leading to deadlock. 1477 */ 1478 arm_smmu_cmdq_shared_lock(cmdq, owner_count); 1479 1480 /* 1481 * d. Advance the hardware prod pointer 1482 * Control dependency ordering from the entries becoming valid. 1483 */ 1484 writel_relaxed(prod, cmdq->q.prod_reg); 1485 1486 /* 1487 * e. Tell the next owner we're done 1488 * Make sure we've updated the hardware first, so that we don't 1489 * race to update prod and potentially move it backwards. 1490 */ 1491 atomic_set_release(&cmdq->owner_prod, prod); 1492 } 1493 1494 /* 5. Since we always insert a CMD_SYNC, we must wait for it to complete */ 1495 llq.prod = queue_inc_prod_n(&llq, n); 1496 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); 1497 if (ret) 1498 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", 1499 llq.prod, readl_relaxed(cmdq->q.prod_reg), 1500 readl_relaxed(cmdq->q.cons_reg)); 1501 1502 /* 1503 * Try to unlock the cmdq lock. This will fail if we're the last reader, 1504 * in which case we can safely update cmdq->q.llq.cons 1505 */ 1506 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { 1507 WRITE_ONCE(cmdq->q.llq.cons, llq.cons); 1508 arm_smmu_cmdq_shared_unlock(cmdq); 1509 } 1510 1511 local_irq_restore(flags); 1512 return ret; 1513 } 1514 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
.config.gz
Description: application/gzip
_______________________________________________ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu