[PATCH v2] mm: don't expose page to fast gup prematurely

2019-09-14 Thread Yu Zhao
We don't want to expose page to fast gup running on a remote CPU
before all local non-atomic ops on page flags are visible first.

For anon page that isn't in swap cache, we need to make sure all
prior non-atomic ops, especially __SetPageSwapBacked() in
page_add_new_anon_rmap(), are order before set_pte_at() to prevent
the following race:

CPU 1   CPU1
set_pte_at()get_user_pages_fast()
page_add_new_anon_rmap()gup_pte_range()
__SetPageSwapBacked()   SetPageReferenced()

This demonstrates a non-fatal scenario. Though I haven't directly
observed any fatal ones, they can exist, e.g., PG_lock set by fast
gup caller and then overwritten by __SetPageSwapBacked().

For anon page that is in swap cache and file page including tmpfs,
we don't need smp_wmb() before set_pte_at(). We've already exposed
them after adding them to swap and file caches. xas_lock_irq() and
xas_unlock_irq() are used during the process, which guarantees
__SetPageUptodate() and other non-atomic ops are ordered before
set_pte_at(). (Using non-atomic ops thereafter is a bug, obviously).

The smp_wmb() is open-coded rather than inserted at the bottom of
page_add_new_anon_rmap() because there is one place that calls the
function doesn't need the barrier (do_huge_pmd_wp_page_fallback()).

Alternatively, we can use atomic ops instead. There seems at least
as many __SetPageUptodate() and __SetPageSwapBacked() to change.

Signed-off-by: Yu Zhao 
---
 kernel/events/uprobes.c |  2 ++
 mm/huge_memory.c|  4 
 mm/khugepaged.c |  2 ++
 mm/memory.c | 10 +-
 mm/migrate.c|  2 ++
 mm/swapfile.c   |  6 --
 mm/userfaultfd.c|  2 ++
 7 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 84fa00497c49..7069785e2e52 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -194,6 +194,8 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
 
flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
ptep_clear_flush_notify(vma, addr, pvmw.pte);
+   /* commit non-atomic ops before exposing to fast gup */
+   smp_wmb();
set_pte_at_notify(mm, addr, pvmw.pte,
mk_pte(new_page, vma->vm_page_prot));
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de1f15969e27..0be8cee94a5b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -616,6 +616,8 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct 
vm_fault *vmf,
mem_cgroup_commit_charge(page, memcg, false, true);
lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+   /* commit non-atomic ops before exposing to fast gup */
+   smp_wmb();
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm_inc_nr_ptes(vma->vm_mm);
@@ -1423,6 +1425,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, 
pmd_t orig_pmd)
page_add_new_anon_rmap(new_page, vma, haddr, true);
mem_cgroup_commit_charge(new_page, memcg, false, true);
lru_cache_add_active_or_unevictable(new_page, vma);
+   /* commit non-atomic ops before exposing to fast gup */
+   smp_wmb();
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
if (!page) {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index eaaa21b23215..c703e4b7c9be 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1081,6 +1081,8 @@ static void collapse_huge_page(struct mm_struct *mm,
count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
+   /* commit non-atomic ops before exposing to fast gup */
+   smp_wmb();
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
spin_unlock(pmd_ptl);
diff --git a/mm/memory.c b/mm/memory.c
index ea3c74855b23..e56d7df0a206 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2363,6 +2363,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 * mmu page tables (such as kvm shadow page tables), we want the
 * new page to be mapped directly into the secondary page table.
 */
+   /* commit non-atomic ops before exposing to fast gup */
+   smp_wmb();
set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
update_mmu_cache(vma, vmf->address, vmf->pte);
if (old_page) {
@@ -2873,7 +2875,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
flush_icache_page(vma, page);
if (pte_swp_sof

[PATCH 1/5] tools/power/x86/intel-speed-select: Fix high priority core mask over count

2019-09-14 Thread Srinivas Pandruvada
From: Youquan Song 

If the CPU package has the less logical CPU than topo_max_cpus, but un-present
CPU's punit_cpu_core will be initiated to 0 and they will be count to core 0

Like below, there are only 10 high priority cores (20 logical CPUs) in the CPU
package, but it count to 27 logic CPUs.

./intel-speed-select base-freq info -l 0 | grep mask
high-priority-cpu-mask:7f000179,f000179f

With the fix patch:
./intel-speed-select base-freq info -l 0
high-priority-cpu-mask:0179,f000179f

Signed-off-by: Youquan Song 
Signed-off-by: Srinivas Pandruvada 
---
 tools/power/x86/intel-speed-select/isst-config.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c 
b/tools/power/x86/intel-speed-select/isst-config.c
index 59753b3917bb..83ac72902b36 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -402,6 +402,9 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned 
long long core_mask,
int j;
 
for (j = 0; j < topo_max_cpus; ++j) {
+   if (!CPU_ISSET_S(j, present_cpumask_size, 
present_cpumask))
+   continue;
+
if (cpu_map[j].pkg_id == pkg_id &&
cpu_map[j].die_id == die_id &&
cpu_map[j].punit_cpu_core == i) {
-- 
2.17.2



[PATCH 4/5] tools/power/x86/intel-speed-select: Fix some debug prints

2019-09-14 Thread Srinivas Pandruvada
Fix wrong debug print for cpu, which is displayed as CLOS. Also
avoid printing clos id, when user is specify clos as parameter.

Signed-off-by: Srinivas Pandruvada 
---
 tools/power/x86/intel-speed-select/isst-config.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c 
b/tools/power/x86/intel-speed-select/isst-config.c
index 4da7ffca2484..15c098e3a512 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -508,7 +508,7 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char 
command,
int write = 0;
int clos_id, core_id, ret = 0;
 
-   debug_printf("CLOS %d\n", cpu);
+   debug_printf("CPU %d\n", cpu);
 
if (parameter & BIT(MBOX_CMD_WRITE_BIT)) {
value = req_data;
@@ -1417,7 +1417,6 @@ static void parse_cmd_args(int argc, int start, char 
**argv)
/* CLOS related */
case 'c':
current_clos = atoi(optarg);
-   printf("clos %d\n", current_clos);
break;
case 'd':
clos_desired = atoi(optarg);
-- 
2.17.2



[PATCH 3/5] tools/power/x86/intel-speed-select: Format get-assoc information

2019-09-14 Thread Srinivas Pandruvada
Format the get-assoc command output consistant with other commands.
For example:

Intel(R) Speed Select Technology
Executing on CPU model:142[0x8e]
 package-0
  die-0
cpu-0
  get-assoc
clos:0

Signed-off-by: Srinivas Pandruvada 
---
 .../x86/intel-speed-select/isst-config.c  | 14 +++
 .../x86/intel-speed-select/isst-display.c | 23 +++
 tools/power/x86/intel-speed-select/isst.h |  2 +-
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c 
b/tools/power/x86/intel-speed-select/isst-config.c
index b44f5b822348..4da7ffca2484 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -1245,7 +1245,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, 
void *arg2, void *arg3,
if (ret)
perror("isst_clos_get_assoc_status");
else
-   isst_display_result(cpu, outf, "core-power", "get-assoc", clos);
+   isst_clos_display_assoc_information(cpu, outf, clos);
 }
 
 static void get_clos_assoc(void)
@@ -1255,13 +1255,17 @@ static void get_clos_assoc(void)
fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
exit(0);
}
-   if (max_target_cpus)
-   for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
- NULL, NULL, NULL);
-   else {
+
+   if (!max_target_cpus) {
fprintf(stderr,
"Invalid target cpu. Specify with [-c|--cpu]\n");
+   exit(0);
}
+
+   isst_ctdp_display_information_start(outf);
+   for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
+ NULL, NULL, NULL);
+   isst_ctdp_display_information_end(outf);
 }
 
 static struct process_cmd_struct isst_cmds[] = {
diff --git a/tools/power/x86/intel-speed-select/isst-display.c 
b/tools/power/x86/intel-speed-select/isst-display.c
index df4aa99c4e92..bd7aaf27e4de 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -503,6 +503,29 @@ void isst_clos_display_information(int cpu, FILE *outf, 
int clos,
format_and_print(outf, 1, NULL, NULL);
 }
 
+void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos)
+{
+   char header[256];
+   char value[256];
+
+   snprintf(header, sizeof(header), "package-%d",
+get_physical_package_id(cpu));
+   format_and_print(outf, 1, header, NULL);
+   snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+   format_and_print(outf, 2, header, NULL);
+   snprintf(header, sizeof(header), "cpu-%d", cpu);
+   format_and_print(outf, 3, header, NULL);
+
+   snprintf(header, sizeof(header), "get-assoc");
+   format_and_print(outf, 4, header, NULL);
+
+   snprintf(header, sizeof(header), "clos");
+   snprintf(value, sizeof(value), "%d", clos);
+   format_and_print(outf, 5, header, value);
+
+   format_and_print(outf, 1, NULL, NULL);
+}
+
 void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
 int result)
 {
diff --git a/tools/power/x86/intel-speed-select/isst.h 
b/tools/power/x86/intel-speed-select/isst.h
index 668f914d077f..48655d0dee2d 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -225,7 +225,7 @@ extern int isst_clos_associate(int cpu, int clos);
 extern int isst_clos_get_assoc_status(int cpu, int *clos_id);
 extern void isst_clos_display_information(int cpu, FILE *outf, int clos,
  struct isst_clos_config *clos_config);
-
+extern void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos);
 extern int isst_read_reg(unsigned short reg, unsigned int *val);
 extern int isst_write_reg(int reg, unsigned int val);
 
-- 
2.17.2



[PATCH 0/5] tools/power/x86/intel-speed-select: New command and

2019-09-14 Thread Srinivas Pandruvada
This series contains some minor fixes, when firmware mask is including
invalid CPU in the perf-profile mask. Also add some commands to
better manage core-power feature.

Srinivas Pandruvada (4):
  tools/power/x86/intel-speed-select: Allow online/offline based on tdp
  tools/power/x86/intel-speed-select: Format get-assoc information
  tools/power/x86/intel-speed-select: Fix some debug prints
  tools/power/x86/intel-speed-select: Extend core-power command set

Youquan Song (1):
  tools/power/x86/intel-speed-select: Fix high priority core mask over
count

 .../x86/intel-speed-select/isst-config.c  | 108 --
 .../power/x86/intel-speed-select/isst-core.c  |  25 
 .../x86/intel-speed-select/isst-display.c |  51 +
 tools/power/x86/intel-speed-select/isst.h |   9 +-
 4 files changed, 182 insertions(+), 11 deletions(-)

-- 
2.17.2



[PATCH 2/5] tools/power/x86/intel-speed-select: Allow online/offline based on tdp

2019-09-14 Thread Srinivas Pandruvada
Using enable core mask, do online offline CPUs. There is a new option
--online|-o for set-config-level.

Signed-off-by: Srinivas Pandruvada 
---
 .../x86/intel-speed-select/isst-config.c  | 52 ++-
 tools/power/x86/intel-speed-select/isst.h |  2 +
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c 
b/tools/power/x86/intel-speed-select/isst-config.c
index 83ac72902b36..b44f5b822348 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -38,6 +38,7 @@ static int fact_avx = 0xFF;
 static unsigned long long fact_trl;
 static int out_format_json;
 static int cmd_help;
+static int force_online_offline;
 
 /* clos related */
 static int current_clos = -1;
@@ -165,6 +166,26 @@ int get_topo_max_cpus(void)
return topo_max_cpus;
 }
 
+static void set_cpu_online_offline(int cpu, int state)
+{
+   char buffer[128];
+   int fd;
+
+   snprintf(buffer, sizeof(buffer),
+"/sys/devices/system/cpu/cpu%d/online", cpu);
+
+   fd = open(buffer, O_WRONLY);
+   if (fd < 0)
+   err(-1, "%s open failed", buffer);
+
+   if (state)
+   write(fd, "1\n", 2);
+   else
+   write(fd, "0\n", 2);
+
+   close(fd);
+}
+
 #define MAX_PACKAGE_COUNT 8
 #define MAX_DIE_PER_PACKAGE 2
 static void for_each_online_package_in_set(void (*callback)(int, void *, void 
*,
@@ -736,9 +757,30 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, 
void *arg2, void *arg3,
ret = isst_set_tdp_level(cpu, tdp_level);
if (ret)
perror("set_tdp_level_for_cpu");
-   else
+   else {
isst_display_result(cpu, outf, "perf-profile", "set_tdp_level",
ret);
+   if (force_online_offline) {
+   struct isst_pkg_ctdp_level_info ctdp_level;
+
+   fprintf(stderr, "Option is set to online/offline\n");
+   ctdp_level.core_cpumask_size =
+   alloc_cpu_set(&ctdp_level.core_cpumask);
+   isst_get_coremask_info(cpu, tdp_level, &ctdp_level);
+   if (ctdp_level.cpu_count) {
+   int i, max_cpus = get_topo_max_cpus();
+   for (i = 0; i < max_cpus; ++i) {
+   if (CPU_ISSET_S(i, 
ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+   fprintf(stderr, "online cpu 
%d\n", i);
+   set_cpu_online_offline(i, 1);
+   } else {
+   fprintf(stderr, "offline cpu 
%d\n", i);
+   set_cpu_online_offline(i, 0);
+   }
+   }
+   }
+   }
+   }
 }
 
 static void set_tdp_level(void)
@@ -747,6 +789,8 @@ static void set_tdp_level(void)
fprintf(stderr, "Set Config TDP level\n");
fprintf(stderr,
"\t Arguments: -l|--level : Specify tdp level\n");
+   fprintf(stderr,
+   "\t Optional Arguments: -o | online : online/offline 
for the tdp level\n");
exit(0);
}
 
@@ -1319,6 +1363,7 @@ static void parse_cmd_args(int argc, int start, char 
**argv)
static struct option long_options[] = {
{ "bucket", required_argument, 0, 'b' },
{ "level", required_argument, 0, 'l' },
+   { "online", required_argument, 0, 'o' },
{ "trl-type", required_argument, 0, 'r' },
{ "trl", required_argument, 0, 't' },
{ "help", no_argument, 0, 'h' },
@@ -1335,7 +1380,7 @@ static void parse_cmd_args(int argc, int start, char 
**argv)
option_index = start;
 
optind = start + 1;
-   while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:h",
+   while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:ho",
  long_options, &option_index)) != -1) {
switch (opt) {
case 'b':
@@ -1347,6 +1392,9 @@ static void parse_cmd_args(int argc, int start, char 
**argv)
case 'l':
tdp_level = atoi(optarg);
break;
+   case 'o':
+   force_online_offline = 1;
+   break;
case 't':
sscanf(optarg, "0x%llx", &fact_trl);
break;
diff --git a/tools/power/x86/intel-speed-select/isst.h 
b/tools/power/x86/intel-speed-select/isst.h
index 2f7f62765eb6..668f914d077f 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x8

[PATCH 5/5] tools/power/x86/intel-speed-select: Extend core-power command set

2019-09-14 Thread Srinivas Pandruvada
Add additional command to get the clos enable and priority type. The
current info option is actually dumping per clos QOS config, so name
the command appropriately to get-config.

Signed-off-by: Srinivas Pandruvada 
---
 .../x86/intel-speed-select/isst-config.c  | 36 ++-
 .../power/x86/intel-speed-select/isst-core.c  | 25 +
 .../x86/intel-speed-select/isst-display.c | 28 +++
 tools/power/x86/intel-speed-select/isst.h |  5 +++
 4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c 
b/tools/power/x86/intel-speed-select/isst-config.c
index 15c098e3a512..671239333f98 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -1129,6 +1129,38 @@ static void dump_clos_config(void)
isst_ctdp_display_information_end(outf);
 }
 
+static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
+ void *arg4)
+{
+   int enable, ret, prio_type;
+
+   ret = isst_clos_get_clos_information(cpu, &enable, &prio_type);
+   if (ret)
+   perror("isst_clos_get_info");
+   else
+   isst_clos_display_clos_information(cpu, outf, enable, 
prio_type);
+}
+
+static void dump_clos_info(void)
+{
+   if (cmd_help) {
+   fprintf(stderr,
+   "Print Intel Speed Select Technology core power 
information\n");
+   fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
+   exit(0);
+   }
+
+   if (!max_target_cpus)
+   fprintf(stderr,
+   "Invalid target cpu. Specify with [-c|--cpu]\n");
+
+   isst_ctdp_display_information_start(outf);
+   for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
+ NULL, NULL, NULL);
+   isst_ctdp_display_information_end(outf);
+
+}
+
 static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void 
*arg3,
void *arg4)
 {
@@ -1282,10 +1314,11 @@ static struct process_cmd_struct isst_cmds[] = {
{ "turbo-freq", "info", dump_fact_config },
{ "turbo-freq", "enable", set_fact_enable },
{ "turbo-freq", "disable", set_fact_disable },
-   { "core-power", "info", dump_clos_config },
+   { "core-power", "info", dump_clos_info },
{ "core-power", "enable", set_clos_enable },
{ "core-power", "disable", set_clos_disable },
{ "core-power", "config", set_clos_config },
+   { "core-power", "get-config", dump_clos_config },
{ "core-power", "assoc", set_clos_assoc },
{ "core-power", "get-assoc", get_clos_assoc },
{ NULL, NULL, NULL }
@@ -1487,6 +1520,7 @@ static void core_power_help(void)
printf("\tenable\n");
printf("\tdisable\n");
printf("\tconfig\n");
+   printf("\tget-config\n");
printf("\tassoc\n");
printf("\tget-assoc\n");
 }
diff --git a/tools/power/x86/intel-speed-select/isst-core.c 
b/tools/power/x86/intel-speed-select/isst-core.c
index 0bf341ad9697..6dee5332c9d3 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -619,6 +619,31 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct 
isst_pkg_ctdp *pkg_dev)
return 0;
 }
 
+int isst_clos_get_clos_information(int cpu, int *enable, int *type)
+{
+   unsigned int resp;
+   int ret;
+
+   ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
+&resp);
+   if (ret)
+   return ret;
+
+   debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp);
+
+   if (resp & BIT(1))
+   *enable = 1;
+   else
+   *enable = 0;
+
+   if (resp & BIT(2))
+   *type = 1;
+   else
+   *type = 0;
+
+   return 0;
+}
+
 int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
 {
unsigned int req, resp;
diff --git a/tools/power/x86/intel-speed-select/isst-display.c 
b/tools/power/x86/intel-speed-select/isst-display.c
index bd7aaf27e4de..2e6e5fcdbd7c 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -503,6 +503,34 @@ void isst_clos_display_information(int cpu, FILE *outf, 
int clos,
format_and_print(outf, 1, NULL, NULL);
 }
 
+void isst_clos_display_clos_information(int cpu, FILE *outf,
+   int clos_enable, int type)
+{
+   char header[256];
+   char value[256];
+
+   snprintf(header, sizeof(header), "package-%d",
+get_physical_package_id(cpu));
+   format_and_print(outf, 1, header, NULL);
+   snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+   format_and_print(outf, 2, header, NULL);

Re: [PATCH 5.2 36/37] vhost: block speculation of translated descriptors

2019-09-14 Thread Stefan Lippers-Hollmann
Hi

On 2019-09-14, Greg Kroah-Hartman wrote:
> On Sat, Sep 14, 2019 at 02:54:11AM +0200, Stefan Lippers-Hollmann wrote:
> > On 2019-09-13, Greg Kroah-Hartman wrote:
> > > From: Michael S. Tsirkin 
> > >
> > > commit a89db445fbd7f1f8457b03759aa7343fa530ef6b upstream.
> > >
> > > iovec addresses coming from vhost are assumed to be
> > > pre-validated, but in fact can be speculated to a value
> > > out of range.
> > >
> > > Userspace address are later validated with array_index_nospec so we can
> > > be sure kernel info does not leak through these addresses, but vhost
> > > must also not leak userspace info outside the allowed memory table to
> > > guests.
> > >
> > > Following the defence in depth principle, make sure
> > > the address is not validated out of node range.
[...]
> Do you have the same problem with Linus's tree right now?

Actually, yes I do (I had not tested i386 for 5.3~ within the last ~2
weeks, only amd64). Very similar kernel config, same compiler versions
but built in a slightly different environment (built directly on the bare
iron, in a amd64 multilib userspace, rather than a pure-i386 chroot on an
amd64 kernel).

$ git describe
v5.3-rc8-36-ga7f89616b737

$ LANG= make ARCH=x86 -j1 bzImage modules
  CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  CHK include/generated/compile.h
  CHK kernel/kheaders_data.tar.xz
  CC [M]  drivers/vhost/vhost.o
In file included from ./include/linux/export.h:45,
 from ./include/linux/linkage.h:7,
 from ./include/linux/kernel.h:8,
 from ./include/linux/list.h:9,
 from ./include/linux/wait.h:7,
 from ./include/linux/eventfd.h:13,
 from drivers/vhost/vhost.c:13:
drivers/vhost/vhost.c: In function 'translate_desc':
./include/linux/compiler.h:350:38: error: call to '__compiletime_assert_2076' 
declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)
  350 |  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  |  ^
./include/linux/compiler.h:331:4: note: in definition of macro 
'__compiletime_assert'
  331 |prefix ## suffix();\
  |^~
./include/linux/compiler.h:350:2: note: in expansion of macro 
'_compiletime_assert'
  350 |  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  |  ^~~
./include/linux/build_bug.h:39:37: note: in expansion of macro 
'compiletime_assert'
   39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
  | ^~
./include/linux/build_bug.h:50:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
   50 |  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  |  ^~~~
./include/linux/nospec.h:56:2: note: in expansion of macro 'BUILD_BUG_ON'
   56 |  BUILD_BUG_ON(sizeof(_s) > sizeof(long));   \
  |  ^~~~
drivers/vhost/vhost.c:2076:5: note: in expansion of macro 'array_index_nospec'
 2076 | array_index_nospec((unsigned long)(addr - node->start),
  | ^~
make[2]: *** [scripts/Makefile.build:281: drivers/vhost/vhost.o] Error 1
make[1]: *** [scripts/Makefile.build:497: drivers/vhost] Error 2
make: *** [Makefile:1083: drivers] Error 2

$ git revert a89db445fbd7f1f8457b03759aa7343fa530ef6b

$ LANG= make ARCH=x86 -j16 bzImage modules
  CALLscripts/atomic/check-atomics.sh
  CALLscripts/checksyscalls.sh
  CHK include/generated/compile.h
  CHK kernel/kheaders_data.tar.xz
  Building modules, stage 2.
Kernel: arch/x86/boot/bzImage is ready  (#1)
  MODPOST 3464 modules

$ echo $?
0

$ find . -name vhost\\.ko
./drivers/vhost/vhost.ko

I've attached the affected kernel config for v5.3~/ i386.

Regards
Stefan Lippers-Hollmann


config-5.3-i386.xz
Description: application/xz


Re: [PATCH v2 1/6] powerpc: Allow flush_icache_range to work across ranges >4GB

2019-09-14 Thread Christophe Leroy




Le 03/09/2019 à 07:23, Alastair D'Silva a écrit :

From: Alastair D'Silva 

When calling flush_icache_range with a size >4GB, we were masking
off the upper 32 bits, so we would incorrectly flush a range smaller
than intended.

This patch replaces the 32 bit shifts with 64 bit ones, so that
the full size is accounted for.


Isn't there the same issue in arch/powerpc/kernel/vdso64/cacheflush.S ?

Christophe



Signed-off-by: Alastair D'Silva 
Cc: sta...@vger.kernel.org
---
  arch/powerpc/kernel/misc_64.S | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..9bc0aa9aeb65 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subfr8,r6,r4/* compute length */
add r8,r8,r5/* ensure we get enough */
lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of cache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
  1:dcbst   0,r6
@@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
subfr8,r6,r4/* compute length */
add r8,r8,r5
lwz r9,ICACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of Icache block 
size */
-   srw.r8,r8,r9/* compute line count */
+   srd.r8,r8,r9/* compute line count */
beqlr   /* nothing to do? */
mtctr   r8
  2:icbi0,r6



Re: [PATCH 5.2 00/37] 5.2.15-stable review

2019-09-14 Thread Greg Kroah-Hartman
On Sat, Sep 14, 2019 at 12:26:40AM -0400, Naresh Kamboju wrote:
> On Fri, 13 Sep 2019 at 09:21, Greg Kroah-Hartman
>  wrote:
> >
> > This is the start of the stable review cycle for the 5.2.15 release.
> > There are 37 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> >
> > Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
> > Anything received after that time might be too late.
> >
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.2.15-rc1.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-5.2.y
> > and the diffstat can be found below.
> >
> > thanks,
> >
> > greg k-h
> >
> 
> Results from Linaro’s test farm.
> No regressions on arm64, arm, x86_64, and i386.

Thanks for testing all of these and letting me know.

greg k-h


Re: [PATCH for vm-scalability] usemem: Add new option -Z|--read-again

2019-09-14 Thread Fengguang Wu

Applied, thanks Teawater!

On Sat, Sep 14, 2019 at 11:07:18AM +0800, Hui Zhu wrote:

usemem will read memory again after access the memory with this option.
It can help test the speed that load page from swap to memory.

Signed-off-by: Hui Zhu 
---
usemem.c | 46 --
1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/usemem.c b/usemem.c
index 264d52a..2d31946 100644
--- a/usemem.c
+++ b/usemem.c
@@ -94,6 +94,7 @@ int opt_sync_rw = 0;
int opt_sync_free = 0;
int opt_bind_interval = 0;
unsigned long opt_delay = 0;
+int opt_read_again = 0;
int nr_task;
int nr_thread;
int nr_cpu;
@@ -151,6 +152,7 @@ void usage(int ok)
"-e|--delay  delay for each page in ns\n"
"-O|--anonymous  mmap with MAP_ANONYMOUS\n"
"-U|--hugetlballocate hugetlbfs page\n"
+   "-Z|--read-again read memory again after access the memory\n"
"-h|--help   show this message\n"
,   ourname);

@@ -188,6 +190,7 @@ static const struct option opts[] = {
{ "sync-rw"   , 0, NULL, 'y' },
{ "delay" , 1, NULL, 'e' },
{ "hugetlb"   , 0, NULL, 'U' },
+   { "read-again", 0, NULL, 'Z' },
{ "help"  , 0, NULL, 'h' },
{ NULL  , 0, NULL, 0 }
};
@@ -616,7 +619,7 @@ unsigned long do_unit(unsigned long bytes, struct 
drand48_data *rand_data,
return rw_bytes;
}

-static void output_statistics(unsigned long unit_bytes)
+static void output_statistics(unsigned long unit_bytes, const char *intro)
{
struct timeval stop;
char buf[1024];
@@ -629,8 +632,8 @@ static void output_statistics(unsigned long unit_bytes)
(stop.tv_usec - start_time.tv_usec);
throughput = ((unit_bytes * 100ULL) >> 10) / delta_us;
len = snprintf(buf, sizeof(buf),
-   "%lu bytes / %lu usecs = %lu KB/s\n",
-   unit_bytes, delta_us, throughput);
+   "%s%lu bytes / %lu usecs = %lu KB/s\n",
+   intro, unit_bytes, delta_us, throughput);
fflush(stdout);
write(1, buf, len);
}
@@ -690,7 +693,34 @@ long do_units(void)
} while (bytes);

if (!opt_write_signal_read && unit_bytes)
-   output_statistics(unit_bytes);
+   output_statistics(unit_bytes, "");
+
+   if (opt_read_again && unit_bytes) {
+   unsigned long rw_bytes = 0;
+
+   gettimeofday(&start_time, NULL);
+   for (i = 0; i < nptr; i++) {
+   int rep;
+
+   for (rep = 0; rep < reps; rep++) {
+   if (rep > 0 && !quiet) {
+   printf(".");
+   fflush(stdout);
+   }
+
+   rw_bytes += do_rw_once(ptrs[i], lens[i], 
&rand_data, 1, &rep, reps);
+
+   if (msync_mode) {
+   if ((msync(ptrs[i], lens[i], 
msync_mode)) == -1) {
+   fprintf(stderr, "msync failed with 
error %s \n", strerror(errno));
+   exit(1);
+   }
+   }
+   }
+   }
+
+   output_statistics(rw_bytes, "read again ");
+   }

if (opt_write_signal_read) {
struct sigaction act;
@@ -731,7 +761,7 @@ long do_units(void)
sigsuspend(&set);
gettimeofday(&start_time, NULL);
unit_bytes = do_rw_once(buffer, opt_bytes, &rand_data, 1, NULL, 
0);
-   output_statistics(unit_bytes);
+   output_statistics(unit_bytes, "");
}

if (opt_sync_free)
@@ -879,7 +909,7 @@ int main(int argc, char *argv[])
pagesize = getpagesize();

while ((c = getopt_long(argc, argv,
-   
"aAB:f:FPp:gqowRMm:n:t:b:ds:T:Sr:u:j:e:EHDNLWyxOUh", opts, NULL)) != -1)
+   
"aAB:f:FPp:gqowRMm:n:t:b:ds:T:Sr:u:j:e:EHDNLWyxOUZh", opts, NULL)) != -1)
{
switch (c) {
case 'a':
@@ -1005,6 +1035,10 @@ int main(int argc, char *argv[])
map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
break;

+   case 'Z':
+   opt_read_again = 1;
+   break;
+
default:
usage(1);
}
--
2.7.4



Re: [PATCH 5.2 36/37] vhost: block speculation of translated descriptors

2019-09-14 Thread Greg Kroah-Hartman
On Sat, Sep 14, 2019 at 09:15:48AM +0200, Stefan Lippers-Hollmann wrote:
> Hi
> 
> On 2019-09-14, Greg Kroah-Hartman wrote:
> > On Sat, Sep 14, 2019 at 02:54:11AM +0200, Stefan Lippers-Hollmann wrote:
> > > On 2019-09-13, Greg Kroah-Hartman wrote:
> > > > From: Michael S. Tsirkin 
> > > >
> > > > commit a89db445fbd7f1f8457b03759aa7343fa530ef6b upstream.
> > > >
> > > > iovec addresses coming from vhost are assumed to be
> > > > pre-validated, but in fact can be speculated to a value
> > > > out of range.
> > > >
> > > > Userspace address are later validated with array_index_nospec so we can
> > > > be sure kernel info does not leak through these addresses, but vhost
> > > > must also not leak userspace info outside the allowed memory table to
> > > > guests.
> > > >
> > > > Following the defence in depth principle, make sure
> > > > the address is not validated out of node range.
> [...]
> > Do you have the same problem with Linus's tree right now?
> 
> Actually, yes I do (I had not tested i386 for 5.3~ within the last ~2
> weeks, only amd64). Very similar kernel config, same compiler versions
> but built in a slightly different environment (built directly on the bare
> iron, in a amd64 multilib userspace, rather than a pure-i386 chroot on an
> amd64 kernel).
> 
> $ git describe
> v5.3-rc8-36-ga7f89616b737
> 
> $ LANG= make ARCH=x86 -j1 bzImage modules
>   CALLscripts/checksyscalls.sh
>   CALLscripts/atomic/check-atomics.sh
>   CHK include/generated/compile.h
>   CHK kernel/kheaders_data.tar.xz
>   CC [M]  drivers/vhost/vhost.o
> In file included from ./include/linux/export.h:45,
>  from ./include/linux/linkage.h:7,
>  from ./include/linux/kernel.h:8,
>  from ./include/linux/list.h:9,
>  from ./include/linux/wait.h:7,
>  from ./include/linux/eventfd.h:13,
>  from drivers/vhost/vhost.c:13:
> drivers/vhost/vhost.c: In function 'translate_desc':
> ./include/linux/compiler.h:350:38: error: call to '__compiletime_assert_2076' 
> declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)
>   350 |  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
>   |  ^
> ./include/linux/compiler.h:331:4: note: in definition of macro 
> '__compiletime_assert'
>   331 |prefix ## suffix();\
>   |^~
> ./include/linux/compiler.h:350:2: note: in expansion of macro 
> '_compiletime_assert'
>   350 |  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
>   |  ^~~
> ./include/linux/build_bug.h:39:37: note: in expansion of macro 
> 'compiletime_assert'
>39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
>   | ^~
> ./include/linux/build_bug.h:50:2: note: in expansion of macro 
> 'BUILD_BUG_ON_MSG'
>50 |  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
>   |  ^~~~
> ./include/linux/nospec.h:56:2: note: in expansion of macro 'BUILD_BUG_ON'
>56 |  BUILD_BUG_ON(sizeof(_s) > sizeof(long));   \
>   |  ^~~~
> drivers/vhost/vhost.c:2076:5: note: in expansion of macro 'array_index_nospec'
>  2076 | array_index_nospec((unsigned long)(addr - node->start),
>   | ^~
> make[2]: *** [scripts/Makefile.build:281: drivers/vhost/vhost.o] Error 1
> make[1]: *** [scripts/Makefile.build:497: drivers/vhost] Error 2
> make: *** [Makefile:1083: drivers] Error 2
> 
> $ git revert a89db445fbd7f1f8457b03759aa7343fa530ef6b
> 
> $ LANG= make ARCH=x86 -j16 bzImage modules
>   CALLscripts/atomic/check-atomics.sh
>   CALLscripts/checksyscalls.sh
>   CHK include/generated/compile.h
>   CHK kernel/kheaders_data.tar.xz
>   Building modules, stage 2.
> Kernel: arch/x86/boot/bzImage is ready  (#1)
>   MODPOST 3464 modules
> 
> $ echo $?
> 0
> 
> $ find . -name vhost\\.ko
> ./drivers/vhost/vhost.ko
> 
> I've attached the affected kernel config for v5.3~/ i386.

Ok, good, we will be "bug compatible" at the very least now :)

When this gets fixed in Linus's tree we can backport the fix here as
well.  The number of users of that compiler version/configuration is
probably pretty low at the moment to want to hold off on this fix.

thanks,

greg k-h


Re: [PATCH 4.9 00/14] 4.9.193-stable review

2019-09-14 Thread Guenter Roeck

On 9/13/19 6:06 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.193 release.
There are 14 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Is it really only me seeing this ?

drivers/vhost/vhost.c: In function 'translate_desc':
include/linux/compiler.h:549:38: error: call to '__compiletime_assert_1879' 
declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)

i386:allyesconfig, mips:allmodconfig and others, everywhere including
mainline. Culprit is commit a89db445fbd7f1 ("vhost: block speculation
of translated descriptors").

Guenter


Re: [PATCH 4.9 00/14] 4.9.193-stable review

2019-09-14 Thread Greg Kroah-Hartman
On Sat, Sep 14, 2019 at 01:28:39AM -0700, Guenter Roeck wrote:
> On 9/13/19 6:06 AM, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 4.9.193 release.
> > There are 14 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
> > Anything received after that time might be too late.
> > 
> 
> Is it really only me seeing this ?
> 
> drivers/vhost/vhost.c: In function 'translate_desc':
> include/linux/compiler.h:549:38: error: call to '__compiletime_assert_1879' 
> declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)
> 
> i386:allyesconfig, mips:allmodconfig and others, everywhere including
> mainline. Culprit is commit a89db445fbd7f1 ("vhost: block speculation
> of translated descriptors").

Nope, I just got another report of this on 5.2.y.  Problem is also in
Linus's tree :(


Re: [alsa-devel] [PATCH] ASoC: Intel: kbl_rt5663_rt5514_max98927: Add dmic format constraint

2019-09-14 Thread Yu-Hsuan Hsu
Pierre-Louis Bossart  於
2019年9月14日 週六 上午1:28寫道:
>
> please don't top-post on public mailing lists, thanks.
>
> On 9/13/19 9:45 AM, Yu-Hsuan Hsu wrote:
> > Thanks for the review! If I'm not mistaken, the microphone is attached
> > to external codec(rt5514) instead of PCH on Kabylake platform. So there
> > should be a TDM between DMICs and PCH. We can see in the
> > kabylake_ssp0_hw_params function, there are some operations about
> > setting tdm slot_width to 16 bits. Therefore, I think it only supports
> > S16_LE format for DMICs. Is it correct?
>
> Ah yes, ok. we have other machine drivers where dmic refers to the PCH
> attached case, thanks for the precision.
>
> I am still not clear on the problem, you are adding this constraint to a
> front-end, so in theory the copier element in the firmware should take
> care of converting from 16-bits recorded on the TDM link to the 24 bits
> used by the application. Is this not the case? is this patch based on an
> actual error and if yes can you share more information to help check
> where the problem happens, topology maybe?

If we use 24 bits format on that device to record, the audio samples
it returns are still in 16 bits. So the rate we measured is only the
half of the expected rate. It's a real problem. Apart from the rate,
the audio samples are also wrong if we still decode them with 24 bits
format. Therefore, the better fix is to add a constraint to remove
24bits support.

By the way, we found this issue by "ALSA conformance test", which is a
new tool to verify audio drivers.
(https://chromium.googlesource.com/chromiumos/platform/audiotest/+/master/alsa_conformance_test.md)

>
> >
> > Pierre-Louis Bossart  > > 於 2019年9月12日 週四 下
> > 午9:02寫道:
> >
> > On 9/11/19 9:27 PM, Yu-Hsuan Hsu wrote:
> >  > 24 bits recording from DMIC is not supported for KBL platform because
> >  > the TDM slot between PCH and codec is 16 bits only. We should add a
> >  > constraint to remove that unsupported format.
> >
> > Humm, when you use DMICs they are directly connected to the PCH with a
> > standard 1-bit PDM. There is no notion of TDM or slot.
> >
> > It could very well be that the firmware/topology only support 16 bit (I
> > vaguely recall another case where 24 bits was added), but the
> > description in the commit message would need to be modified to make the
> > reason for this change clearer.
> >
> >  >
> >  > Signed-off-by: Yu-Hsuan Hsu  > >
> >  > ---
> >  >   sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 3 +++
> >  >   1 file changed, 3 insertions(+)
> >  >
> >  > diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
> > b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
> >  > index 74dda8784f1a01..67b276a65a8d2d 100644
> >  > --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
> >  > +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
> >  > @@ -400,6 +400,9 @@ static int kabylake_dmic_startup(struct
> > snd_pcm_substream *substream)
> >  >   snd_pcm_hw_constraint_list(runtime, 0,
> > SNDRV_PCM_HW_PARAM_CHANNELS,
> >  >   dmic_constraints);
> >  >
> >  > + runtime->hw.formats = SNDRV_PCM_FMTBIT_S16_LE;
> >  > + snd_pcm_hw_constraint_msbits(runtime, 0, 16, 16);
> >  > +
> >  >   return snd_pcm_hw_constraint_list(substream->runtime, 0,
> >  >   SNDRV_PCM_HW_PARAM_RATE, &constraints_rates);
> >  >   }
> >  >
> >
>


Re: [PATCH 4.9 00/14] 4.9.193-stable review

2019-09-14 Thread Guenter Roeck

On 9/14/19 1:31 AM, Greg Kroah-Hartman wrote:

On Sat, Sep 14, 2019 at 01:28:39AM -0700, Guenter Roeck wrote:

On 9/13/19 6:06 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.193 release.
There are 14 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Is it really only me seeing this ?

drivers/vhost/vhost.c: In function 'translate_desc':
include/linux/compiler.h:549:38: error: call to '__compiletime_assert_1879' 
declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)

i386:allyesconfig, mips:allmodconfig and others, everywhere including
mainline. Culprit is commit a89db445fbd7f1 ("vhost: block speculation
of translated descriptors").


Nope, I just got another report of this on 5.2.y.  Problem is also in
Linus's tree :(



Sending a fix in a minute. I'll copy you and Linus.

Guenter


[PATCH] vhost: Fix compile time error

2019-09-14 Thread Guenter Roeck
Building vhost on 32-bit targets results in the following error.

drivers/vhost/vhost.c: In function 'translate_desc':
include/linux/compiler.h:549:38: error:
call to '__compiletime_assert_1879' declared with attribute error:
BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)

Fixes: a89db445fbd7 ("vhost: block speculation of translated descriptors")
Cc: Michael S. Tsirkin 
Cc: Jason Wang 
Signed-off-by: Guenter Roeck 
---
 drivers/vhost/vhost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index acabf20b069e..102a0c877007 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2074,7 +2074,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 
addr, u32 len,
_iov->iov_base = (void __user *)
((unsigned long)node->userspace_addr +
 array_index_nospec((unsigned long)(addr - node->start),
-   node->size));
+   (unsigned long)node->size));
s += size;
addr += size;
++ret;
-- 
2.7.4



Re: [RFC v2 1/2] ARM: dts: omap3: Add cpu trips and cooling map for omap3 family

2019-09-14 Thread H. Nikolaus Schaller


> Am 13.09.2019 um 17:37 schrieb Adam Ford :
> 
> The OMAP3530, AM3517 and DM3730 all show thresholds of 90C and 105C
> depending on commercial or industrial temperature ratings.  This
> patch expands the thermal information to the limits of 90 and 105
> for alert and critical.
> 
> For boards who never use industrial temperatures, these can be
> changed on their respective device trees with something like:
> 
> &cpu_alert0 {
>   temperature = <85000>; /* millicelsius */
> };
> 
> &cpu_crit {
>   temperature = <9>; /* millicelsius */
> };
> 
> Signed-off-by: Adam Ford 
> ---
> V2:  Change the CPU reference to &cpu instead of &cpu0
> 
> diff --git a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi 
> b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> index 235ecfd61e2d..dfbd0cb0b00b 100644
> --- a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> +++ b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> @@ -17,4 +17,25 @@ cpu_thermal: cpu_thermal {
> 
>   /* sensor   ID */
>   thermal-sensors = <&bandgap 0>;
> +
> + cpu_trips: trips {
> + cpu_alert0: cpu_alert {
> + temperature = <9>; /* millicelsius */
> + hysteresis = <2000>; /* millicelsius */
> + type = "passive";
> + };
> + cpu_crit: cpu_crit {
> + temperature = <105000>; /* millicelsius */
> + hysteresis = <2000>; /* millicelsius */
> + type = "critical";
> + };
> + };
> +
> + cpu_cooling_maps: cooling-maps {
> + map0 {
> + trip = <&cpu_alert0>;
> + cooling-device =
> + <&cpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> + };
> + };
> };
> -- 
> 2.17.1
> 

Here is my test log (GTA04A5 with DM3730CBP100).
"high-load" script is driving the NEON to full power
and would report calculation errors.

There is no noise visible in the bandgap sensor data
induced by power supply fluctuations (log shows system
voltage while charging).

root@letux:~# ./high-load -n2
100% load stress test for 1 cores running ./neon_loop2
Sat Sep 14 09:05:50 UTC 2019 65° 4111mV 1000MHz
Sat Sep 14 09:05:50 UTC 2019 67° 4005mV 1000MHz
Sat Sep 14 09:05:52 UTC 2019 68° 4000mV 1000MHz
Sat Sep 14 09:05:53 UTC 2019 68° 4000mV 1000MHz
Sat Sep 14 09:05:55 UTC 2019 72° 3976mV 1000MHz
Sat Sep 14 09:05:56 UTC 2019 72° 4023mV 1000MHz
Sat Sep 14 09:05:57 UTC 2019 72° 3900mV 1000MHz
Sat Sep 14 09:05:59 UTC 2019 73° 4029mV 1000MHz
Sat Sep 14 09:06:00 UTC 2019 73° 3988mV 1000MHz
Sat Sep 14 09:06:01 UTC 2019 73° 4005mV 1000MHz
Sat Sep 14 09:06:03 UTC 2019 73° 4011mV 1000MHz
Sat Sep 14 09:06:04 UTC 2019 73° 4117mV 1000MHz
Sat Sep 14 09:06:06 UTC 2019 73° 4005mV 1000MHz
Sat Sep 14 09:06:07 UTC 2019 75° 3994mV 1000MHz
Sat Sep 14 09:06:08 UTC 2019 75° 3970mV 1000MHz
Sat Sep 14 09:06:09 UTC 2019 75° 4046mV 1000MHz
Sat Sep 14 09:06:11 UTC 2019 75° 4005mV 1000MHz
Sat Sep 14 09:06:12 UTC 2019 75° 4023mV 1000MHz
Sat Sep 14 09:06:14 UTC 2019 75° 3970mV 1000MHz
Sat Sep 14 09:06:15 UTC 2019 75° 4011mV 1000MHz
Sat Sep 14 09:06:16 UTC 2019 77° 4017mV 1000MHz
Sat Sep 14 09:06:18 UTC 2019 77° 3994mV 1000MHz
Sat Sep 14 09:06:19 UTC 2019 77° 3994mV 1000MHz
Sat Sep 14 09:06:20 UTC 2019 77° 3988mV 1000MHz
Sat Sep 14 09:06:22 UTC 2019 77° 4023mV 1000MHz
Sat Sep 14 09:06:23 UTC 2019 77° 4023mV 1000MHz
Sat Sep 14 09:06:24 UTC 2019 78° 4005mV 1000MHz
Sat Sep 14 09:06:26 UTC 2019 78° 4105mV 1000MHz
Sat Sep 14 09:06:27 UTC 2019 78° 4011mV 1000MHz
Sat Sep 14 09:06:28 UTC 2019 78° 3994mV 1000MHz
Sat Sep 14 09:06:30 UTC 2019 78° 4123mV 1000MHz
...
Sat Sep 14 09:09:57 UTC 2019 88° 4082mV 1000MHz
Sat Sep 14 09:09:59 UTC 2019 88° 4164mV 1000MHz
Sat Sep 14 09:10:00 UTC 2019 88° 4058mV 1000MHz
Sat Sep 14 09:10:01 UTC 2019 88° 4058mV 1000MHz
Sat Sep 14 09:10:03 UTC 2019 88° 4082mV 1000MHz
Sat Sep 14 09:10:04 UTC 2019 88° 4058mV 1000MHz
Sat Sep 14 09:10:06 UTC 2019 88° 4146mV 1000MHz
Sat Sep 14 09:10:07 UTC 2019 88° 4041mV 1000MHz
Sat Sep 14 09:10:08 UTC 2019 88° 4035mV 1000MHz
Sat Sep 14 09:10:10 UTC 2019 88° 4052mV 1000MHz
Sat Sep 14 09:10:11 UTC 2019 88° 4087mV 1000MHz
Sat Sep 14 09:10:12 UTC 2019 88° 4152mV 1000MHz
Sat Sep 14 09:10:14 UTC 2019 88° 4070mV 1000MHz
Sat Sep 14 09:10:15 UTC 2019 88° 4064mV 1000MHz
Sat Sep 14 09:10:17 UTC 2019 88° 4170mV 1000MHz
Sat Sep 14 09:10:18 UTC 2019 88° 4058mV 1000MHz
Sat Sep 14 09:10:19 UTC 2019 88° 4187mV 1000MHz
Sat Sep 14 09:10:21 UTC 2019 88° 4093mV 1000MHz
Sat Sep 14 09:10:22 UTC 2019 88° 4087mV 1000MHz
Sat Sep 14 09:10:23 UTC 2019 90° 4070mV 1000MHz
Sat Sep 14 09:10:25 UTC 2019 88° 4123mV 800MHz
Sat Sep 14 09:10:26 UTC 2019 88° 4064mV 1000MHz
Sat Sep 14 09:10:28 UTC 2019 90° 4058mV 1000MHz
Sat Sep 14 09:10:29 UTC 2019 88° 4076mV 1000MHz
Sat Sep 14 09:10:30 UTC 2019 88° 4064mV 1000MHz
Sat Sep 14 09:10:32 UTC 2019 88° 4117mV 1000MHz
Sat Sep 14 09:10:33 UTC 2019 88° 4105mV 800MHz
Sat Sep 14 09:10:34 UTC 2019 

Re: Linux 5.3-rc8

2019-09-14 Thread Ahmed S. Darwish
On Thu, Sep 12, 2019 at 04:25:30AM -0400, Theodore Y. Ts'o wrote:
> On Thu, Sep 12, 2019 at 05:44:21AM +0200, Ahmed S. Darwish wrote:
[...]
> 
> > 1. Cutting down the number of bits needed to initialize the CRNG
> >to 256 bits (CHACHA20 cipher)
> 
> Does the attach patch (see below) help?
>
[...]
> 
> diff --git a/drivers/char/random.c b/drivers/char/random.c
> index 5d5ea4ce1442..b9b3a5a82abf 100644
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
> @@ -500,7 +500,7 @@ static int crng_init = 0;
>  #define crng_ready() (likely(crng_init > 1))
>  static int crng_init_cnt = 0;
>  static unsigned long crng_global_init_time = 0;
> -#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE)
> +#define CRNG_INIT_CNT_THRESH CHACHA_KEY_SIZE
>  static void _extract_crng(struct crng_state *crng, __u8 
> out[CHACHA_BLOCK_SIZE]);
>  static void _crng_backtrack_protect(struct crng_state *crng,
>   __u8 tmp[CHACHA_BLOCK_SIZE], int used);

Unfortunately, it only made the early fast init faster, but didn't fix
the normal crng init blockage :-(

Here's a trace log, got by applying the patch at [1]. The boot was
continued only after typing some random keys after ~30s:

#
# entries-in-buffer/entries-written: 22/22   #P:8
#
#  _-=> irqs-off
# / _=> need-resched
#| / _---=> hardirq/softirq
#|| / _--=> preempt-depth
#||| / delay
#   TASK-PID   CPU#  TIMESTAMP  FUNCTION
#  | |   |      | |
  -0 [001] dNh. 0.687088: crng_fast_load: crng threshold 
= 32
  -0 [001] dNh. 0.687089: crng_fast_load: crng_init_cnt = 0
  -0 [001] dNh. 0.687090: crng_fast_load: crng_init_cnt, 
now set to 16
  -0 [001] dNh. 0.705208: crng_fast_load: crng threshold 
= 32
  -0 [001] dNh. 0.705209: crng_fast_load: crng_init_cnt = 
16
  -0 [001] dNh. 0.705209: crng_fast_load: crng_init_cnt, 
now set to 32
  -0 [001] dNh. 0.708048: crng_fast_load: random: fast 
init done
 lvm-165   [001] d... 2.417971: urandom_read: random: 
crng_init_cnt, now set to 0
 systemd-random--179   [003]  2.495669: wait_for_random_bytes.part.0: 
wait for randomness
 dbus-daemon-274   [006] dN.. 3.294331: urandom_read: random: 
crng_init_cnt, now set to 0
 dbus-daemon-274   [006] dN.. 3.316618: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] dN.. 3.873918: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] dN.. 3.874303: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] dN.. 3.874375: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] d... 3.886204: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] d... 3.886217: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] d... 3.888519: urandom_read: random: 
crng_init_cnt, now set to 0
 polkitd-286   [007] d... 3.888529: urandom_read: random: 
crng_init_cnt, now set to 0
 gnome-session-b-321   [006]  4.292034: wait_for_random_bytes.part.0: 
wait for randomness
  -0 [002] dNh.36.784001: crng_reseed: random: crng init 
done
 gnome-session-b-321   [006] 36.784019: wait_for_random_bytes.part.0: 
wait done
 systemd-random--179   [003] 36.784051: wait_for_random_bytes.part.0: 
wait done

[1] patch:

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 5d5ea4ce1442..4a50ee2c230d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -500,7 +500,7 @@ static int crng_init = 0;
 #define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
 static unsigned long crng_global_init_time = 0;
-#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE)
+#define CRNG_INIT_CNT_THRESH (CHACHA_KEY_SIZE)
 static void _extract_crng(struct crng_state *crng, __u8 
out[CHACHA_BLOCK_SIZE]);
 static void _crng_backtrack_protect(struct crng_state *crng,
__u8 tmp[CHACHA_BLOCK_SIZE], int used);
@@ -931,6 +931,9 @@ static int crng_fast_load(const char *cp, size_t len)
unsigned long flags;
char *p;
 
+   trace_printk("crng threshold = %d\n", CRNG_INIT_CNT_THRESH);
+   trace_printk("crng_init_cnt = %d\n", crng_init_cnt);
+
if (!spin_trylock_irqsave(&primary_crng.lock, flags))
return 0;
if (crng_init != 0) {
@@ -943,11 +946,15 @@ static int crng_fast_load(const char *cp, size_t len)
cp++; crng_init_cnt++; len--;
}
spin_unlock_irqrestore(&primary_crng.lock, flags);
+
+   trace_printk("crng_init_cnt, now set to %d\n", crng_init_cnt);
+
if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {

Re: [RFC] Improve memset

2019-09-14 Thread Alexey Dobriyan
> Instead of calling memset:
> 
> 8100cd8d:   e8 0e 15 7a 00  callq  817ae2a0 
> <__memset>
> 
> and having a JMP inside it depending on the feature supported, let's simply
> have the REP; STOSB directly in the code:
> 
> ...
> 81000442:   4c 89 d7mov%r10,%rdi
> 81000445:   b9 00 10 00 00  mov$0x1000,%ecx
> 
> < new memset
> 8100044a:   f3 aa   rep stos %al,%es:(%rdi)
> 8100044c:   90  nop
> 8100044d:   90  nop
> 8100044e:   90  nop

You can fit entire "xor eax, eax; rep stosb" inside call instruction.

> /* clobbers used by memset_orig() and memset_rep_good() */
> : "rsi", "rdx", "r8", "r9", "memory");

eh... I'd just drop it. These registers screw up everything.

Time to rebase memset0().


Re: [RFC] ARM: dts: omap36xx: Enable thermal throttling

2019-09-14 Thread Daniel Lezcano



Hi Nikolauss,

On 13/
09/2019 22:34, H. Nikolaus Schaller wrote:

[ ... ]

>> The governor continues to read the temperature and see the temperature
>> decrease, it does nothing.
> 
> Ah, I think our misunderstanding is that the govenor "enables" and
> "disables" a set of OPPs. Rather it goes down or up in the list if
> above or below a trip point.

Right.

>> The governor continues to read the temperature, see the temperature
>> decreases and is below 75°C, it decrease the state (state=>1), the OPP
>> change to 2.36GHz.
>>
>> The temperature then increases, etc ...
>>
>> Actually the governors do more than that but it is for the example.
>>
>> So it is a bad idea to set boundaries for the cooling device state as
>> that may prevent the governor to take the right decision for the cooling
>> effect. Imagine in the example above, we set the max state to 1 for the
>> cooling device, that would mean the governor won't be able to stop the
>> temperature increasing, thus ending up to a hard reboot.
> 
> Well, the data sheet only requires that the high speed OPPs are only
> used below 90°C. If I understand correctly if we set the trip point to
> 90°C it will simply go down through the full list of OPPs. This will
> clearly avoid the high speed OPPs (and potentially some low-speed
> ones, but that does not harm).

Yes, right.

> So our approach "how to make it disable these two OPPs" seems to be
> wrong. Rather, we have to think "make sure the temperature
> stays below 90°C".

Your approach is not wrong, it proves there is a limitation in the
thermal/cpufreq framework.

There is the 'turbo mode' [1] which describes exactly what you want but
I'm not sure it is fully implemented.

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/opp/opp.txt#n138

> And is it true that we do not have to define mapping for the "critical"
> trip points?

Right, you don't have to, it is optional. But the critical trip point
will make your system to shutdown in case something is going wrong, for
example an external heating source, like the sun or whatever. It is good
to set a high temperature to force the shutdown .
Usually it is below the hardware reset temperature.


 Now the different combinations:

 <&cpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT> the governor will use the state
 0 to 7.

 <&cpu THERMAL_NO_LIMIT 2> the governor will use the state 0 to 2
>>>
>>> What would be the difference between  <&cpu THERMAL_NO_LIMIT 2>  and
>>> <&cpu 0 2> ?
>>> (if there is any)
>>
>> There is no difference.
>>
>>
 <&cpu 1 2> the governor will use the state 1 and 2. That means there is
 always the cooling effect as the governor won't set it to zero thus
 stopping the mitigation.
>>>
>>> For the purposes of the board in question, we have 4 operating points,
>>> 300MHz, 600MHz, 800MHz and 1GHz.  Once the board reaches 90C, we need
>>> them to cease operation at 800MHz and 1GHz and only permit operation
>>> at 300MHz and 600MHz.  I am going under the assumption that the cpu
>>> index[0] would be for 300MHz, index[1] = 600MHz, etc.
>>>
>>> If I am interpreting your comment correctly, I should set <&cpu
>>> THERMAL_NO_LIMIT 2> which would allow it to either not cool and run up
>>> to 600MHz and not exceed, is that correct?
>>
>> Nope, it will mean the cooling device can only reduce to 800MHz and to
>> 600MHz to mitigate.
>>
>> Actually the thermal framework neither the kernel are designed to handle
>> this case. They assume the OPPs are stable whatever the thermal situation.
>>
>> That is the reason why I think it is a very interesting use case because
>> it introduces a temperature constraint in addition to a duration for a
>> certain OPP. IMO, that could be an extension of the turbo-mode.
>>
>> With what we have now, I doubt it is feasible.
>>
>> The best we can do is preventing to reach the 90°C, so we remove the OPP
>> temperature constraint. I suppose 85°C is a safe temperature to stick on.
>>
>> And in order to let the governor have free hand.
>>
>> <&cpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>
>>
>> I don't think that will have a significant impact on performances
>> compared to be able to run at a higher temperature with less OPPs.
>>
>>
 Does it clarify the DT spec?

>>>
>>> I think your reply to my inquiry might.  If possible, it would be nice
>>> to get this documented into the bindings doc for others in the future.
>>> I can do it, but someone with a better understanding of the concept
>>> maybe more qualified.  I can totally understand why some may want to
>>> integrate this into their SoC device trees to slow the processor when
>>> hot.
>>>
>>> Thank you for taking the time to review this.  I appreciate it.
>>>
>>> adam
> 
> BR,
> Nikolaus
> 


-- 
  Linaro.org │ Open source software for ARM SoCs

Follow Linaro:   Facebook |
 Twitter |


Re: [Patch 3/6] media: dt-bindings: ov2659: add powerdown-gpios optional property

2019-09-14 Thread Lad, Prabhakar
Hi Benoit,

Thank you for the patch.

On Thu, Sep 12, 2019 at 1:58 PM Benoit Parrot  wrote:
>
> Add powerdown-gpios to the list of optional properties for the OV2659
> camera sensor.
>
> Signed-off-by: Benoit Parrot 
> ---
>  Documentation/devicetree/bindings/media/i2c/ov2659.txt | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/Documentation/devicetree/bindings/media/i2c/ov2659.txt 
> b/Documentation/devicetree/bindings/media/i2c/ov2659.txt
> index cabc7d827dfb..f55204cce0cd 100644
> --- a/Documentation/devicetree/bindings/media/i2c/ov2659.txt
> +++ b/Documentation/devicetree/bindings/media/i2c/ov2659.txt
> @@ -12,6 +12,10 @@ Required Properties:
>  - clock-names: should be "xvclk".
>  - link-frequencies: target pixel clock frequency.
>
> +Optional Properties:
> +- powerdown-gpios: reference to the GPIO connected to the pwdn pin, if any.
> +  Active is low.
> +
as per the datasheet this should be active high with  pull-down resistor.

Cheers,
--Prabhakar Lad


pull-request: wireless-drivers-next 2019-09-14

2019-09-14 Thread Kalle Valo
Hi Dave,

here's a pull request to net-next tree for v5.4, more info below. Please
let me know if there are any problems.

Kalle

The following changes since commit 172ca8308b0517ca2522a8c885755fd5c20294e7:

  cxgb4: Fix spelling typos (2019-09-12 12:50:56 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git 
tags/wireless-drivers-next-for-davem-2019-09-14

for you to fetch changes up to f9e568754562e0f506e12aa899c378b4155080e9:

  Merge ath-next from 
git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git (2019-09-13 
18:15:58 +0300)


wireless-drivers-next patches for 5.4

Last set of patches for 5.4. wil6210 and rtw88 being most active this
time, but ath9k also having a new module to load devices without
EEPROM.

Major changes:

wil6210

* add support for Enhanced Directional Multi-Gigabit (EDMG) channels 9-11

* add debugfs file to show PCM ring content

* report boottime_ns in scan results

ath9k

* add a separate loader for AR92XX (and older) pci(e) without eeprom

brcmfmac

* use the same wiphy after PCIe reset to not confuse the user space

rtw88

* enable interrupt migration

* enable AMSDU in AMPDU aggregation

* report RX power for each antenna

* enable to DPK and IQK calibration methods to improve performance


Ahmad Masri (1):
  wil6210: fix PTK re-key race

Alexei Avshalom Lazar (2):
  wil6210: Add EDMG channel support
  wil6210: verify cid value is valid

Arnd Bergmann (1):
  wcn36xx: use dynamic allocation for large variables

Ben Greear (1):
  ath10k: free beacon buf later in vdev teardown

Chin-Yen Lee (1):
  rtw88: 8822c: update pwr_seq to v13

Christian Lamparter (1):
  ath9k: add loader for AR92XX (and older) pci(e)

Colin Ian King (4):
  wil6210: fix wil_cid_valid with negative cid values
  rtlwifi: rtl8821ae: make array static const and remove redundant 
assignment
  bcma: make arrays pwr_info_offset and sprom_sizes static const, shrinks 
object size
  ssb: make array pwr_info_offset static const, makes object smaller

Dedy Lansky (4):
  wil6210: add wil_netif_rx() helper function
  wil6210: add debugfs to show PMC ring content
  wil6210: make sure DR bit is read before rest of the status message
  wil6210: properly initialize discovery_expired_work

Hui Peng (1):
  ath6kl: fix a NULL-ptr-deref bug in ath6kl_usb_alloc_urb_from_pipe()

Jia-Ju Bai (1):
  ath6kl: Fix a possible null-pointer dereference in 
ath6kl_htc_mbox_create()

Kalle Valo (1):
  Merge ath-next from git://git.kernel.org/.../kvalo/ath.git

Larry Finger (9):
  rtlwifi: rtl8723ae: Remove unused GET_XXX and SET_XXX macros
  rtlwifi: rtl8723ae: Replace local bit manipulation macros
  rtlwifi: rtl8723ae: Convert macros that set descriptor
  rtlwifi: rtl8723ae: Convert inline routines to little-endian words
  rtlwifi: rtl8723be: Remove unused SET_XXX and GET_XXX macros
  rtlwifi: rtl8723be: Replace local bit manipulation macros
  rtlwifi: rtl8723be: Convert macros that set descriptor
  rtlwifi: rtl8723be: Convert inline routines to little-endian words
  rtlwifi: rtl8188ee: rtl8192ce: rtl8192de: rtl8723ae: rtl8821ae: Remove 
some unused bit manipulation macros

Lior David (3):
  wil6210: use writel_relaxed in wil_debugfs_iomem_x32_set
  wil6210: fix RX short frame check
  wil6210: ignore reset errors for FW during probe

Lorenzo Bianconi (5):
  ath9k: dynack: fix possible deadlock in ath_dynack_node_{de}init
  ath9k: dyanck: introduce ath_dynack_set_timeout routine
  ath9k: dynack: properly set last timeout timestamp in ath_dynack_reset
  ath9k: dynack: set max timeout according to channel width
  ath9k: dynack: set ackto to max timeout in ath_dynack_reset

Lubomir Rintel (1):
  libertas: use mesh_wdev->ssid instead of priv->mesh_ssid

Luis Correia (1):
  CREDITS: Update email address

Markus Elfring (1):
  wil6210: Delete an unnecessary kfree() call in wil_tid_ampdu_rx_alloc()

Maya Erez (1):
  wil6210: report boottime_ns in scan results

Michael Straube (3):
  rtlwifi: rtl8192ce: replace _rtl92c_evm_db_to_percentage with generic 
version
  rtlwifi: rtl8192cu: replace _rtl92c_evm_db_to_percentage with generic 
version
  rtlwifi: rtl8192de: replace _rtl92d_evm_db_to_percentage with generic 
version

Navid Emamdoost (2):
  ath9k_htc: release allocated buffer if timed out
  ath9k: release allocated buffer if timed out

Nicolas Boichat (1):
  ath10k: adjust skb length in ath10k_sdio_mbox_rx_packet

Rafał Miłecki (3):
  brcmfmac: move "cfg80211_ops" pointer to another struct
  brcmfmac: split brcmf_attach() and brcmf_detach() functions
  brcmfmac: don't realloc wiphy during PCIe reset

Rakesh Pillai (1):
  ath10k: fix channel info

Re: [PATCH] s390: remove pointless drivers-y in drivers/s390/Makefile

2019-09-14 Thread Heiko Carstens
On Thu, Sep 12, 2019 at 02:23:54PM +0900, Masahiro Yamada wrote:
> This is unused.
> 
> Signed-off-by: Masahiro Yamada 
> ---
> 
>  drivers/s390/Makefile | 3 ---
>  1 file changed, 3 deletions(-)

Applied, thanks.



Re: [PATCH v7 5/6] powerpc/64: Make COMPAT user-selectable disabled on littleendian by default.

2019-09-14 Thread Michal Suchánek
On Tue, 03 Sep 2019 10:00:57 +1000
Michael Ellerman  wrote:

> Michal Suchánek  writes:
> > On Mon, 02 Sep 2019 12:03:12 +1000
> > Michael Ellerman  wrote:
> >  
> >> Michal Suchanek  writes:  
> >> > On bigendian ppc64 it is common to have 32bit legacy binaries but much
> >> > less so on littleendian.
> >> 
> >> I think the toolchain people will tell you that there is no 32-bit
> >> little endian ABI defined at all, if anything works it's by accident.  
> >
> > I have seen a piece of software that workarounds code issues on 64bit
> > by always compiling 32bit code. So it does work in some way.  
> 
> What software is that?

The only one I have seen is stockfish (v9)

> 
> > Also it has been pointed out that you can still switch to BE even with
> > the 'fast-switch' removed.  
> 
> Yes we have a proper syscall for endian switching, sys_switch_endian(),
> which is definitely supported.
> 
> But that *only* switches the endian-ness of the process, it does nothing
> to the syscall layer. So any process that switches to the other endian
> must endian flip syscall arguments (that aren't in registers), or flip
> back to the native endian before calling syscalls.

In other words just installing a chroot of binaries built for the other
endian won't work. You need something like qemu to do the syscall
translation or run full VM with a kernel that has the swapped endian
syscall ABI.

Thanks

Michal


[PATCH] mm, tracing: print symbol name for call_site

2019-09-14 Thread Changbin Du
To improve the readability of raw slab trace points, print the call_site ip
using '%pS'. Then we can grep events with function names.

[002]    808.188897: kmem_cache_free: call_site=putname+0x47/0x50 
ptr=cef40c80
[002]    808.188898: kfree: call_site=security_cred_free+0x42/0x50 
ptr=62400820
[002]    808.188904: kmem_cache_free: call_site=put_cred_rcu+0x88/0xa0 
ptr=58d74ef8
[002]    808.188913: kmem_cache_alloc: call_site=prepare_creds+0x26/0x100 
ptr=58d74ef8 bytes_req=168 bytes_alloc=576 gfp_flags=GFP_KERNEL
[002]    808.188917: kmalloc: call_site=security_prepare_creds+0x77/0xa0 
ptr=62400820 bytes_req=8 bytes_alloc=336 gfp_flags=GFP_KERNEL|__GFP_ZERO
[002]    808.188920: kmem_cache_alloc: call_site=getname_flags+0x4f/0x1e0 
ptr=cef40c80 bytes_req=4096 bytes_alloc=4480 gfp_flags=GFP_KERNEL
[002]    808.188925: kmem_cache_free: call_site=putname+0x47/0x50 
ptr=cef40c80
[002]    808.188926: kfree: call_site=security_cred_free+0x42/0x50 
ptr=62400820
[002]    808.188931: kmem_cache_free: call_site=put_cred_rcu+0x88/0xa0 
ptr=58d74ef8

Signed-off-by: Changbin Du 
---
 include/trace/events/kmem.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e3037deb..69e8bb8963db 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -35,8 +35,8 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->gfp_flags  = gfp_flags;
),
 
-   TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu 
gfp_flags=%s",
-   __entry->call_site,
+   TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu 
gfp_flags=%s",
+   (void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
@@ -131,7 +131,8 @@ DECLARE_EVENT_CLASS(kmem_free,
__entry->ptr= ptr;
),
 
-   TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+   TP_printk("call_site=%pS ptr=%p",
+ (void *)__entry->call_site, __entry->ptr)
 );
 
 DEFINE_EVENT(kmem_free, kfree,
-- 
2.20.1



Re: [Patch 4/6] media: i2c: ov2659: Add optional powerdown gpio handling

2019-09-14 Thread Lad, Prabhakar
Hi Benoit,

On Thu, Sep 12, 2019 at 1:58 PM Benoit Parrot  wrote:
>
> On some board it is possible that the sensor 'powerdown'
> pin might be controlled with a gpio instead of being
> tied to always powered.
>
> This patch add support to specify an optional gpio
> which will be set at probe time and remained on.
>
> Signed-off-by: Benoit Parrot 
> ---
>  drivers/media/i2c/Kconfig  |  2 +-
>  drivers/media/i2c/ov2659.c | 13 +
>  2 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
> index 7eee1812bba3..315c1d8bdb7b 100644
> --- a/drivers/media/i2c/Kconfig
> +++ b/drivers/media/i2c/Kconfig
> @@ -634,7 +634,7 @@ config VIDEO_OV2640
>  config VIDEO_OV2659
> tristate "OmniVision OV2659 sensor support"
> depends on VIDEO_V4L2 && I2C
> -   depends on MEDIA_CAMERA_SUPPORT
> +   depends on MEDIA_CAMERA_SUPPORT && GPIOLIB
> select V4L2_FWNODE
> help
>   This is a Video4Linux2 sensor driver for the OmniVision
> diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c
> index efbe6dc720e2..c64f73bef336 100644
> --- a/drivers/media/i2c/ov2659.c
> +++ b/drivers/media/i2c/ov2659.c
> @@ -32,6 +32,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -232,6 +234,8 @@ struct ov2659 {
> struct sensor_register *format_ctrl_regs;
> struct ov2659_pll_ctrl pll;
> int streaming;
> +   /* used to control the sensor powerdownN pin */
> +   struct gpio_desc *pwrdn_gpio;
>  };
>
>  static const struct sensor_register ov2659_init_regs[] = {
> @@ -1391,6 +1395,7 @@ static int ov2659_probe(struct i2c_client *client)
> struct v4l2_subdev *sd;
> struct ov2659 *ov2659;
> struct clk *clk;
> +   struct gpio_desc *gpio;

you don't need the local var here you can just assign it directly to pwrdn_gpio.

> int ret;
>
> if (!pdata) {
> @@ -1414,6 +1419,14 @@ static int ov2659_probe(struct i2c_client *client)
> ov2659->xvclk_frequency > 2700)
> return -EINVAL;
>
> +   /* Optional gpio don't fail if not present */
> +   gpio = devm_gpiod_get_optional(&client->dev, "powerdown",
> +  GPIOD_OUT_HIGH);
> +   if (IS_ERR(gpio))
> +   return PTR_ERR(gpio);
> +
> +   ov2659->pwrdn_gpio = gpio;
> +
apart from assigning it you don't actually use it.

you will also have to read the reset gpio pin and implement
ov2659_set_power() and
call it in appropriate places/ s_power ?

Cheers,
--Prabhakar Lad


[PATCH] x86_64: new and improved memset()

2019-09-14 Thread Alexey Dobriyan
Current memset() implementation does silly things:
* multiplication to get register-wide constant:
waste of cycles if filler is known at compile time,

* REP STOSQ followed by REP STOSB:
REP STOSB setup overhead is very high because trailing length
is very low (< 8)

* suboptimal calling convention:
REP STOSB/STOSQ favours (rdi, rcx), ABI gives (rdi, rsi, rdx).
While shuffling registers is free, rcx and rdx are equivalent
code generation wise.

* memset_orig():
memset(..., 0, ...) could be done within 3 registers,
memset(..., != 0, ...) -- within 4 registers, anything else is
a waste. CPUs which required unrolling are hopefully gone by now.

New implementation is based on the following observations:
* c == 0 is the most common form,
filler can be done with "xor eax, eax" and pushed into memset()
saving 2 bytes per call and multiplication

* "len" divisible by 8 is the most common form:
all it takes is one pointer or unsigned long inside structure,
dispatch at compile time to code without those ugly "lets fill
at most 7 bytes" tails,

* multiplication to get wider filler value can be done at compile time
  for "c != 0" with 1 insn/10 bytes at most saving multiplication.

Note: "memset0" name is chosen because "bzero" is officially deprecated.

Note: memset(,0,) form is interleaved into memset(,c,) form to save space.

TODO:
CONFIG_FORTIFY_SOURCE is enabled by distros
inline "xor eax, eax; rep stosb"
benchmarks
testing

Signed-off-by: Alexey Dobriyan 
---

 arch/x86/boot/compressed/Makefile |1 
 arch/x86/include/asm/string_64.h  |  104 ++
 arch/x86/lib/Makefile |1 
 arch/x86/lib/memset0_64.S |   86 
 drivers/firmware/efi/libstub/Makefile |2 
 5 files changed, 193 insertions(+), 1 deletion(-)

--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -38,6 +38,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
 KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
 KBUILD_CFLAGS += -Wno-pointer-sign
+KBUILD_CFLAGS += -D_ARCH_X86_BOOT
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -15,7 +15,111 @@ extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
 #define __HAVE_ARCH_MEMSET
+#if defined(_ARCH_X86_BOOT) || defined(CONFIG_FORTIFY_SOURCE)
 void *memset(void *s, int c, size_t n);
+#else
+#include 
+#include 
+
+/* Internal, do not use. */
+static __always_inline void memset0(void *s, size_t n)
+{
+   /* Internal, do not use. */
+   void _memset0_mov(void);
+   void _memset0_rep_stosq(void);
+   void memset0_mov(void);
+   void memset0_rep_stosq(void);
+   void memset0_rep_stosb(void);
+
+   if (__builtin_constant_p(n) && n == 0) {
+   } else if (__builtin_constant_p(n) && n == 1) {
+   *(uint8_t *)s = 0;
+   } else if (__builtin_constant_p(n) && n == 2) {
+   *(uint16_t *)s = 0;
+   } else if (__builtin_constant_p(n) && n == 4) {
+   *(uint32_t *)s = 0;
+   } else if (__builtin_constant_p(n) && n == 6) {
+   *(uint32_t *)s = 0;
+   *(uint16_t *)(s + 4) = 0;
+   } else if (__builtin_constant_p(n) && n == 8) {
+   *(uint64_t *)s = 0;
+   } else if (__builtin_constant_p(n) && (n & 7) == 0) {
+   alternative_call_2(
+   _memset0_mov,
+   _memset0_rep_stosq, X86_FEATURE_REP_GOOD,
+   memset0_rep_stosb, X86_FEATURE_ERMS,
+   ASM_OUTPUT2("=D" (s), "=c" (n)),
+   "D" (s), "c" (n)
+   : "rax", "cc", "memory"
+   );
+   } else {
+   alternative_call_2(
+   memset0_mov,
+   memset0_rep_stosq, X86_FEATURE_REP_GOOD,
+   memset0_rep_stosb, X86_FEATURE_ERMS,
+   ASM_OUTPUT2("=D" (s), "=c" (n)),
+   "D" (s), "c" (n)
+   : "rax", "rsi", "cc", "memory"
+   );
+   }
+}
+
+/* Internal, do not use. */
+static __always_inline void memsetx(void *s, int c, size_t n)
+{
+   /* Internal, do not use. */
+   void _memsetx_mov(void);
+   void _memsetx_rep_stosq(void);
+   void memsetx_mov(void);
+   void memsetx_rep_stosq(void);
+   void memsetx_rep_stosb(void);
+
+   const uint64_t ccc = (uint8_t)c * 0x0101010101010101ULL;
+
+   if (__builtin_constant_p(n) && n == 0) {
+   } else if (__builtin_constant_p(n) && n == 1) {
+   *(uint8_t *)s = ccc;
+   } el

Re: [PATCH] mips: sgi-ip27: switch from DISCONTIGMEM to SPARSEMEM

2019-09-14 Thread Mike Rapoport
Hi Thomas,

On Thu, Sep 12, 2019 at 04:09:12PM +0200, Thomas Bogendoerfer wrote:
> On Thu, Sep 12, 2019 at 03:55:39PM +0200, Thomas Bogendoerfer wrote:
> > - reserved[0xd] [0x00035bff8000-0x00035bff], 
> > 0x8000 bytes flags: 0x0
> > 
> > I have no idea which reservation this is, but it's not from one of the
> > node data.
> 
> that's sparsemem's mem_section. And 
> 
>  free_bootmem_with_active_regions(node, end_pfn);

It seems that the call to free_bootmem_with_active_regions() should have
been removed along with bootmem and it's not needed now.

Can you please test the below version of the patch?

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d50fafd..e4b02b5 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -669,6 +669,7 @@ config SGI_IP22
 config SGI_IP27
bool "SGI IP27 (Origin200/2000)"
select ARCH_HAS_PHYS_TO_DMA
+   select ARCH_SPARSEMEM_ENABLE
select FW_ARC
select FW_ARC64
select BOOT_ELF64
@@ -2633,18 +2634,9 @@ config ARCH_FLATMEM_ENABLE
def_bool y
depends on !NUMA && !CPU_LOONGSON2
 
-config ARCH_DISCONTIGMEM_ENABLE
-   bool
-   default y if SGI_IP27
-   help
- Say Y to support efficient handling of discontiguous physical memory,
- for architectures which are either NUMA (Non-Uniform Memory Access)
- or have huge holes in the physical address space for other reasons.
- See  for more.
-
 config ARCH_SPARSEMEM_ENABLE
bool
-   select SPARSEMEM_STATIC
+   select SPARSEMEM_STATIC if !SGI_IP27
 
 config NUMA
bool "NUMA Support"
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index fb077a9..9db8692 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -406,12 +406,8 @@ static void __init node_mem_init(cnodeid_t node)
slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
   sizeof(struct hub_data));
 
-   free_bootmem_with_active_regions(node, end_pfn);
-
memblock_reserve(slot_firstpfn << PAGE_SHIFT,
 ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT));
-
-   sparse_memory_present_with_active_regions(node);
 }
 
 /*
@@ -444,6 +440,8 @@ void __init prom_meminit(void)
}
__node_data[node] = &null_node;
}
+
+   memblocks_present();
 }
 
 void __init prom_free_prom_memory(void)
 
> on the last node will free this reserved memory, when memory
> is added node by node. This explains it.
> 
> So when resending the patch add my
> 
> Tested-by: Thomas Bogendoerfer 
> 
> Thomas.
> 
> -- 
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.[ RFC1925, 2.3 ]

-- 
Sincerely yours,
Mike.


Re: [PATCH 4/5] thermal: Add generic power domain warming device driver.

2019-09-14 Thread Thara Gopinath
On 09/13/2019 03:54 AM, Ulf Hansson wrote:
> On Thu, 12 Sep 2019 at 22:18, Thara Gopinath  
> wrote:
>>
>> On 09/12/2019 11:04 AM, Ulf Hansson wrote:
>>
>> Hi Ulf,
>>
>> Thanks for the review.
>>> On Tue, 10 Sep 2019 at 19:14, Thara Gopinath  
>>> wrote:

 Resources modeled as power domains in linux kenrel
 can  be used to warm the SoC(eg. mx power domain on sdm845).
 To support this feature, introduce a generic power domain
 warming device driver that can be plugged into the thermal framework
 (The thermal framework itself requires further modifiction to
 support a warming device in place of a cooling device.
 Those extensions are not introduced in this patch series).

 Signed-off-by: Thara Gopinath 
 ---
 v1->v2:
 - Make power domain based warming device driver a generic
 driver in the thermal framework. v1 implemented this as a
 Qualcomm specific driver.
 - Rename certain variables as per review suggestions on the
 mailing list.

  drivers/thermal/Kconfig  |  11 +++
  drivers/thermal/Makefile |   2 +
  drivers/thermal/pwr_domain_warming.c | 174 
 +++
  3 files changed, 187 insertions(+)
  create mode 100644 drivers/thermal/pwr_domain_warming.c

 diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
 index 9966364..eeb6018 100644
 --- a/drivers/thermal/Kconfig
 +++ b/drivers/thermal/Kconfig
 @@ -187,6 +187,17 @@ config DEVFREQ_THERMAL

   If you want this support, you should say Y here.

 +config PWR_DOMAIN_WARMING_THERMAL
 +   bool "Power Domain based warming device"
 +   depends on PM_GENERIC_DOMAINS
 +   depends on PM_GENERIC_DOMAINS_OF
>>>
>>> PM_GENERIC_DOMAINS_OF can't be set unless PM_GENERIC_DOMAINS is set too.
>>>
>>> So I assume it's sufficient to depend on PM_GENERIC_DOMAINS_OF?
>>
>> Yes, you are right. I will change it.
>>>
 +   help
 + This implements the generic power domain based warming
 + mechanism through increasing the performance state of
 + a power domain.
 +
 + If you want this support, you should say Y here.
 +
  config THERMAL_EMULATION
 bool "Thermal emulation mode support"
 help
 diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
 index 74a37c7..382c64a 100644
 --- a/drivers/thermal/Makefile
 +++ b/drivers/thermal/Makefile
 @@ -27,6 +27,8 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)   += clock_cooling.o
  # devfreq cooling
  thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o

 +thermal_sys-$(CONFIG_PWR_DOMAIN_WARMING_THERMAL)   += 
 pwr_domain_warming.o
 +
  # platform thermal drivers
  obj-y  += broadcom/
  obj-$(CONFIG_THERMAL_MMIO) += thermal_mmio.o
 diff --git a/drivers/thermal/pwr_domain_warming.c 
 b/drivers/thermal/pwr_domain_warming.c
 new file mode 100644
 index 000..3dd792b
 --- /dev/null
 +++ b/drivers/thermal/pwr_domain_warming.c
 @@ -0,0 +1,174 @@
 +// SPDX-License-Identifier: GPL-2.0
 +/*
 + * Copyright (c) 2019, Linaro Ltd
 + */
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +
 +struct pd_warming_device {
 +   struct thermal_cooling_device *cdev;
 +   struct device *dev;
 +   int max_state;
 +   int cur_state;
 +   bool runtime_resumed;
 +};
 +
 +static const struct of_device_id pd_wdev_match_table[] = {
 +   { .compatible = "thermal-power-domain-wdev", .data = NULL },
 +   { }
 +};
 +MODULE_DEVICE_TABLE(of, pd_wdev_match_table);
 +
 +static int pd_wdev_get_max_state(struct thermal_cooling_device *cdev,
 +unsigned long *state)
 +{
 +   struct pd_warming_device *pd_wdev = cdev->devdata;
 +
 +   *state = pd_wdev->max_state;
 +   return 0;
 +}
 +
 +static int pd_wdev_get_cur_state(struct thermal_cooling_device *cdev,
 +unsigned long *state)
 +{
 +   struct pd_warming_device *pd_wdev = cdev->devdata;
 +
 +   *state = dev_pm_genpd_get_performance_state(pd_wdev->dev);
 +
 +   return 0;
 +}
 +
 +static int pd_wdev_set_cur_state(struct thermal_cooling_device *cdev,
 +unsigned long state)
 +{
 +   struct pd_warming_device *pd_wdev = cdev->devdata;
 +   struct device *dev = pd_wdev->dev;
 +   int ret;
 +
 +   ret = dev_pm_genpd_set_performance_state(dev, state);
 +
 +   if (ret)
 +   return ret;
 +
 +

[PATCH] qcom: ssbi-gpio: convert to hierarchical IRQ helpers in gpio core

2019-09-14 Thread Brian Masney
Now that the GPIO core has support for hierarchical IRQ chips, convert
Qualcomm's ssbi-gpio over to use these new helpers to reduce duplicated
code across drivers.

Signed-off-by: Brian Masney 
---
Linus: I've only compile tested this driver. Hopefully you have time to
test this on your DragonBoard.

 drivers/pinctrl/qcom/Kconfig |   1 +
 drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 121 +++
 2 files changed, 34 insertions(+), 88 deletions(-)

diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index 32fc2458b8eb..41fab621de1b 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -152,6 +152,7 @@ config PINCTRL_QCOM_SSBI_PMIC
select PINMUX
select PINCONF
select GENERIC_PINCONF
+   select GPIOLIB_IRQCHIP
select IRQ_DOMAIN_HIERARCHY
help
  This is the pinctrl, pinmux, pinconf and gpiolib driver for the
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c 
b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index c1f7d0799ebe..dca86886b1f9 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -56,7 +56,6 @@
 /**
  * struct pm8xxx_pin_data - dynamic configuration for a pin
  * @reg:   address of the control register
- * @irq:   IRQ from the PMIC interrupt controller
  * @power_source:  logical selected voltage source, mapping in static data
  * is used translate to register values
  * @mode:  operating mode for the pin (input/output)
@@ -72,7 +71,6 @@
  */
 struct pm8xxx_pin_data {
unsigned reg;
-   int irq;
u8 power_source;
u8 mode;
bool open_drain;
@@ -93,9 +91,6 @@ struct pm8xxx_gpio {
 
struct pinctrl_desc desc;
unsigned npins;
-
-   struct fwnode_handle *fwnode;
-   struct irq_domain *domain;
 };
 
 static const struct pinconf_generic_params pm8xxx_gpio_bindings[] = {
@@ -491,13 +486,16 @@ static int pm8xxx_gpio_get(struct gpio_chip *chip, 
unsigned offset)
 {
struct pm8xxx_gpio *pctrl = gpiochip_get_data(chip);
struct pm8xxx_pin_data *pin = pctrl->desc.pins[offset].drv_data;
+   int ret, irq;
bool state;
-   int ret;
 
-   if (pin->mode == PM8XXX_GPIO_MODE_OUTPUT) {
-   ret = pin->output_value;
-   } else if (pin->irq >= 0) {
-   ret = irq_get_irqchip_state(pin->irq, IRQCHIP_STATE_LINE_LEVEL, 
&state);
+   if (pin->mode == PM8XXX_GPIO_MODE_OUTPUT)
+   return pin->output_value;
+
+   irq = chip->to_irq(chip, offset);
+   if (irq >= 0) {
+   ret = irq_get_irqchip_state(irq, IRQCHIP_STATE_LINE_LEVEL,
+   &state);
if (!ret)
ret = !!state;
} else
@@ -535,37 +533,6 @@ static int pm8xxx_gpio_of_xlate(struct gpio_chip *chip,
 }
 
 
-static int pm8xxx_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
-{
-   struct pm8xxx_gpio *pctrl = gpiochip_get_data(chip);
-   struct pm8xxx_pin_data *pin = pctrl->desc.pins[offset].drv_data;
-   struct irq_fwspec fwspec;
-   int ret;
-
-   fwspec.fwnode = pctrl->fwnode;
-   fwspec.param_count = 2;
-   fwspec.param[0] = offset + PM8XXX_GPIO_PHYSICAL_OFFSET;
-   fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
-
-   ret = irq_create_fwspec_mapping(&fwspec);
-
-   /*
-* Cache the IRQ since pm8xxx_gpio_get() needs this to get determine the
-* line level.
-*/
-   pin->irq = ret;
-
-   return ret;
-}
-
-static void pm8xxx_gpio_free(struct gpio_chip *chip, unsigned int offset)
-{
-   struct pm8xxx_gpio *pctrl = gpiochip_get_data(chip);
-   struct pm8xxx_pin_data *pin = pctrl->desc.pins[offset].drv_data;
-
-   pin->irq = -1;
-}
-
 #ifdef CONFIG_DEBUG_FS
 #include 
 
@@ -624,13 +591,11 @@ static void pm8xxx_gpio_dbg_show(struct seq_file *s, 
struct gpio_chip *chip)
 #endif
 
 static const struct gpio_chip pm8xxx_gpio_template = {
-   .free = pm8xxx_gpio_free,
.direction_input = pm8xxx_gpio_direction_input,
.direction_output = pm8xxx_gpio_direction_output,
.get = pm8xxx_gpio_get,
.set = pm8xxx_gpio_set,
.of_xlate = pm8xxx_gpio_of_xlate,
-   .to_irq = pm8xxx_gpio_to_irq,
.dbg_show = pm8xxx_gpio_dbg_show,
.owner = THIS_MODULE,
 };
@@ -712,43 +677,24 @@ static int pm8xxx_domain_translate(struct irq_domain 
*domain,
return 0;
 }
 
-static int pm8xxx_domain_alloc(struct irq_domain *domain, unsigned int virq,
-  unsigned int nr_irqs, void *data)
+static unsigned int pm8xxx_child_offset_to_irq(struct gpio_chip *chip,
+  unsigned int offset)
 {
-   struct pm8xxx_gpio *pctrl = container_of(domain->host_data,
-struct pm8xxx_gpio, chip);
-   struct irq_fwsp

Re: [PATCH] x86_64: new and improved memset()

2019-09-14 Thread Borislav Petkov
On Sat, Sep 14, 2019 at 01:33:45PM +0300, Alexey Dobriyan wrote:
> --- a/arch/x86/include/asm/string_64.h
> +++ b/arch/x86/include/asm/string_64.h
> @@ -15,7 +15,111 @@ extern void *memcpy(void *to, const void *from, size_t 
> len);
>  extern void *__memcpy(void *to, const void *from, size_t len);
>  
>  #define __HAVE_ARCH_MEMSET
> +#if defined(_ARCH_X86_BOOT) || defined(CONFIG_FORTIFY_SOURCE)
>  void *memset(void *s, int c, size_t n);
> +#else
> +#include 
> +#include 
> +
> +/* Internal, do not use. */
> +static __always_inline void memset0(void *s, size_t n)
> +{
> + /* Internal, do not use. */
> + void _memset0_mov(void);
> + void _memset0_rep_stosq(void);
> + void memset0_mov(void);
> + void memset0_rep_stosq(void);
> + void memset0_rep_stosb(void);
> +
> + if (__builtin_constant_p(n) && n == 0) {
> + } else if (__builtin_constant_p(n) && n == 1) {
> + *(uint8_t *)s = 0;
> + } else if (__builtin_constant_p(n) && n == 2) {
> + *(uint16_t *)s = 0;
> + } else if (__builtin_constant_p(n) && n == 4) {
> + *(uint32_t *)s = 0;
> + } else if (__builtin_constant_p(n) && n == 6) {
> + *(uint32_t *)s = 0;
> + *(uint16_t *)(s + 4) = 0;
> + } else if (__builtin_constant_p(n) && n == 8) {
> + *(uint64_t *)s = 0;
> + } else if (__builtin_constant_p(n) && (n & 7) == 0) {
> + alternative_call_2(
> + _memset0_mov,
> + _memset0_rep_stosq, X86_FEATURE_REP_GOOD,
> + memset0_rep_stosb, X86_FEATURE_ERMS,
> + ASM_OUTPUT2("=D" (s), "=c" (n)),
> + "D" (s), "c" (n)
> + : "rax", "cc", "memory"
> + );
> + } else {
> + alternative_call_2(
> + memset0_mov,
> + memset0_rep_stosq, X86_FEATURE_REP_GOOD,
> + memset0_rep_stosb, X86_FEATURE_ERMS,
> + ASM_OUTPUT2("=D" (s), "=c" (n)),
> + "D" (s), "c" (n)
> + : "rax", "rsi", "cc", "memory"
> + );
> + }
> +}
> +
> +/* Internal, do not use. */
> +static __always_inline void memsetx(void *s, int c, size_t n)
> +{
> + /* Internal, do not use. */
> + void _memsetx_mov(void);
> + void _memsetx_rep_stosq(void);
> + void memsetx_mov(void);
> + void memsetx_rep_stosq(void);
> + void memsetx_rep_stosb(void);
> +
> + const uint64_t ccc = (uint8_t)c * 0x0101010101010101ULL;
> +
> + if (__builtin_constant_p(n) && n == 0) {
> + } else if (__builtin_constant_p(n) && n == 1) {
> + *(uint8_t *)s = ccc;
> + } else if (__builtin_constant_p(n) && n == 2) {
> + *(uint16_t *)s = ccc;
> + } else if (__builtin_constant_p(n) && n == 4) {
> + *(uint32_t *)s = ccc;
> + } else if (__builtin_constant_p(n) && n == 8) {
> + *(uint64_t *)s = ccc;
> + } else if (__builtin_constant_p(n) && (n & 7) == 0) {
> + alternative_call_2(
> + _memsetx_mov,
> + _memsetx_rep_stosq, X86_FEATURE_REP_GOOD,
> + memsetx_rep_stosb, X86_FEATURE_ERMS,
> + ASM_OUTPUT2("=D" (s), "=c" (n)),
> + "D" (s), "c" (n), "a" (ccc)
> + : "cc", "memory"
> + );
> + } else {
> + alternative_call_2(
> + memsetx_mov,
> + memsetx_rep_stosq, X86_FEATURE_REP_GOOD,
> + memsetx_rep_stosb, X86_FEATURE_ERMS,
> + ASM_OUTPUT2("=D" (s), "=c" (n)),
> + "D" (s), "c" (n), "a" (ccc)
> + : "rsi", "cc", "memory"
> + );
> + }
> +}
> +
> +static __always_inline void *memset(void *s, int c, size_t n)
> +{
> + if (__builtin_constant_p(c)) {
> + if (c == 0) {
> + memset0(s, n);
> + } else {
> + memsetx(s, c, n);
> + }
> + return s;
> + } else {
> + return __builtin_memset(s, c, n);
> + }
> +}

I'm willing to take something like that only when such complexity is
justified by numbers. I.e., I'm much more inclined to capping it under
32 and 64 byte sizes and keeping it simple.

...

> +ENTRY(_memset0_mov)
> + xor eax, eax
> +.globl _memsetx_mov
> +_memsetx_mov:
> + add rcx, rdi
> + cmp rdi, rcx
> + je  1f
> +2:
> + mov [rdi], rax
> + add rdi, 8
> + cmp rdi, rcx
> + jne 2b
> +1:
> + ret
> +ENDPROC(_memset0_mov)
> +ENDPROC(_memsetx_mov)
> +EXPORT_SYMBOL(_memset0_mov)
> +EXPORT_SYMBOL(_memsetx_mov)
> +
> +ENTRY(memset0_mov)
> + xor eax, eax
> +.globl memsetx_mov
> +memsetx_mov:
> + lea rsi, [rdi + rcx]
> + cmp rdi, rsi
> + je  1f
> +2:
> + mov [rdi], al
> + add 

Re: [RFC] Improve memset

2019-09-14 Thread Borislav Petkov
On Sat, Sep 14, 2019 at 12:29:15PM +0300, Alexey Dobriyan wrote:
> eh... I'd just drop it. These registers screw up everything.

The intent is to not touch memset_orig and let it die with its users. It
is irrelevant now anyway.

If it can be shown that the extended list of clobbered registers hurt
performance, then we can improve it for the sake of keeping register
pressure low.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette


[PATCH] staging: rtl8723bs: remove return statements from void functions

2019-09-14 Thread Michael Straube
Remove unnecessary return statements from void functions reported by
checkpatch.

WARNING: void function return statements are not generally useful

Signed-off-by: Michael Straube 
---
 drivers/staging/rtl8723bs/core/rtw_mlme.c |  5 -
 drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 10 --
 drivers/staging/rtl8723bs/core/rtw_pwrctrl.c  |  1 -
 drivers/staging/rtl8723bs/core/rtw_security.c |  1 -
 drivers/staging/rtl8723bs/core/rtw_wlan_util.c|  2 --
 drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c|  2 --
 drivers/staging/rtl8723bs/hal/odm_DIG.c   |  1 -
 drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c |  1 -
 drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c|  2 --
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c |  1 -
 drivers/staging/rtl8723bs/os_dep/sdio_intf.c  |  1 -
 11 files changed, 27 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c 
b/drivers/staging/rtl8723bs/core/rtw_mlme.c
index 34adf5789c98..4000125054c3 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c
@@ -329,7 +329,6 @@ void rtw_generate_random_ibss(u8 *pibss)
pibss[3] = (u8)(curtime & 0xff) ;/* p[0]; */
pibss[4] = (u8)((curtime>>8) & 0xff) ;/* p[1]; */
pibss[5] = (u8)((curtime>>16) & 0xff) ;/* p[2]; */
-   return;
 }
 
 u8 *rtw_get_capability_from_ie(u8 *ie)
@@ -832,8 +831,6 @@ void rtw_survey_event_callback(struct adapter   
*adapter, u8 *pbuf)
 exit:
 
spin_unlock_bh(&pmlmepriv->lock);
-
-   return;
 }
 
 
@@ -1840,8 +1837,6 @@ void rtw_mlme_reset_auto_scan_int(struct adapter *adapter)
mlme->auto_scan_int_ms = mlme->roam_scan_int_ms;
} else
mlme->auto_scan_int_ms = 0; /* disabled */
-
-   return;
 }
 
 static void rtw_auto_scan_handler(struct adapter *padapter)
diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c 
b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
index 2128886c9924..4f812cd19b31 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
@@ -3086,8 +3086,6 @@ void issue_auth(struct adapter *padapter, struct sta_info 
*psta, unsigned short
rtw_wep_encrypt(padapter, (u8 *)pmgntframe);
DBG_871X("%s\n", __func__);
dump_mgntframe(padapter, pmgntframe);
-
-   return;
 }
 
 
@@ -3405,8 +3403,6 @@ void issue_assocreq(struct adapter *padapter)
rtw_buf_update(&pmlmepriv->assoc_req, 
&pmlmepriv->assoc_req_len, (u8 *)pwlanhdr, pattrib->pktlen);
else
rtw_buf_free(&pmlmepriv->assoc_req, &pmlmepriv->assoc_req_len);
-
-   return;
 }
 
 /* when wait_ack is ture, this function shoule be called at process context */
@@ -5260,8 +5256,6 @@ void report_del_sta_event(struct adapter *padapter, 
unsigned char *MacAddr, unsi
DBG_871X("report_del_sta_event: delete STA, mac_id =%d\n", mac_id);
 
rtw_enqueue_cmd(pcmdpriv, pcmd_obj);
-
-   return;
 }
 
 void report_add_sta_event(struct adapter *padapter, unsigned char *MacAddr, 
int cam_idx)
@@ -5306,8 +5300,6 @@ void report_add_sta_event(struct adapter *padapter, 
unsigned char *MacAddr, int
DBG_871X("report_add_sta_event: add STA\n");
 
rtw_enqueue_cmd(pcmdpriv, pcmd_obj);
-
-   return;
 }
 
 /
@@ -5869,8 +5861,6 @@ void link_timer_hdl(struct timer_list *t)
issue_assocreq(padapter);
set_link_timer(pmlmeext, REASSOC_TO);
}
-
-   return;
 }
 
 void addba_timer_hdl(struct timer_list *t)
diff --git a/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c 
b/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c
index 4075de07e0a9..30137f0bd984 100644
--- a/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c
+++ b/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c
@@ -190,7 +190,6 @@ void rtw_ps_processor(struct adapter *padapter)
}
 exit:
pwrpriv->ps_processing = false;
-   return;
 }
 
 static void pwr_state_check_handler(struct timer_list *t)
diff --git a/drivers/staging/rtl8723bs/core/rtw_security.c 
b/drivers/staging/rtl8723bs/core/rtw_security.c
index 57cfe06d7d73..5ffaf9bfa6e8 100644
--- a/drivers/staging/rtl8723bs/core/rtw_security.c
+++ b/drivers/staging/rtl8723bs/core/rtw_security.c
@@ -309,7 +309,6 @@ void rtw_wep_decrypt(struct adapter  *padapter, u8 
*precvframe)
 
WEP_SW_DEC_CNT_INC(psecuritypriv, prxattrib->ra);
}
-   return;
 }
 
 /* 3   =TKIP related = */
diff --git a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c 
b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c
index ea3ea2a6b314..5ab98f3e722e 100644
--- a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c
+++ b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c
@@ -1170,8 +1170,6 @@ void HT_info_handler(struct adapter *padapter, struct 
ndis_80211_var_ie *pIE)
 
pmlmeinfo->HT_info_enable = 1;
   

Re: [PATCH] riscv: modify the Image header to improve compatibility with the ARM64 header

2019-09-14 Thread Palmer Dabbelt

On Fri, 13 Sep 2019 20:08:14 PDT (-0700), Paul Walmsley wrote:


Part of the intention during the definition of the RISC-V kernel image
header was to lay the groundwork for a future merge with the ARM64
image header.  One error during my original review was not noticing
that the RISC-V header's "magic" field was at a different size and
position than the ARM64's "magic" field.  If the existing ARM64 Image
header parsing code were to attempt to parse an existing RISC-V kernel
image header format, it would see a magic number 0.  This is
undesirable, since it's our intention to align as closely as possible
with the ARM64 header format.  Another problem was that the original
"res3" field was not being initialized correctly to zero.

Address these issues by creating a 32-bit "magic2" field in the RISC-V
header which matches the ARM64 "magic" field.  RISC-V binaries will
store "RSC\x05" in this field.  The intention is that the use of the
existing 64-bit "magic" field in the RISC-V header will be deprecated
over time.  Increment the minor version number of the file format to
indicate this change, and update the documentation accordingly.  Fix
the assembler directives in head.S to ensure that reserved fields are
properly zero-initialized.

Signed-off-by: Paul Walmsley 
Reported-by: Palmer Dabbelt 
Cc: Atish Patra 
Cc: Karsten Merker 
---
Will try to get this merged before v5.3, to minimize the delta with the
ARM64 header in the final release.

 Documentation/riscv/boot-image-header.txt | 13 +++--
 arch/riscv/include/asm/image.h| 12 ++--
 arch/riscv/kernel/head.S  |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/Documentation/riscv/boot-image-header.txt 
b/Documentation/riscv/boot-image-header.txt
index 1b73fea23b39..14b1492f689b 100644
--- a/Documentation/riscv/boot-image-header.txt
+++ b/Documentation/riscv/boot-image-header.txt
@@ -18,7 +18,7 @@ The following 64-byte header is present in decompressed Linux 
kernel image.
u32 res1  = 0;/* Reserved */
u64 res2  = 0;/* Reserved */
u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */
-   u32 res3; /* Reserved for additional RISC-V specific 
header */
+   u32 magic2 = 0x56534905;  /* Magic number 2, little endian, "RSC\x05" */
u32 res4; /* Reserved for PE COFF offset */

 This header format is compliant with PE/COFF header and largely inspired from
@@ -37,13 +37,14 @@ Notes:
Bits 16:31 - Major version

   This preserves compatibility across newer and older version of the header.
-  The current version is defined as 0.1.
+  The current version is defined as 0.2.

-- res3 is reserved for offset to any other additional fields. This makes the
-  header extendible in future. One example would be to accommodate ISA
-  extension for RISC-V in future. For current version, it is set to be zero.
+- The "magic" field is deprecated as of version 0.2.  In a future
+  release, it may be removed.  This originally should have matched up
+  with the ARM64 header "magic" field, but unfortunately does not.
+  The "magic2" field replaces it, matching up with the ARM64 header.

-- In current header, the flag field has only one field.
+- In current header, the flags field has only one field.
Bit 0: Kernel endianness. 1 if BE, 0 if LE.

 - Image size is mandatory for boot loader to load kernel image. Booting will
diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h
index ef28e106f247..344db5244547 100644
--- a/arch/riscv/include/asm/image.h
+++ b/arch/riscv/include/asm/image.h
@@ -3,7 +3,8 @@
 #ifndef __ASM_IMAGE_H
 #define __ASM_IMAGE_H

-#define RISCV_IMAGE_MAGIC  "RISCV"
+#define RISCV_IMAGE_MAGIC  "RISCV\0\0\0"
+#define RISCV_IMAGE_MAGIC2 "RSC\x05"

 #define RISCV_IMAGE_FLAG_BE_SHIFT  0
 #define RISCV_IMAGE_FLAG_BE_MASK   0x1
@@ -23,7 +24,7 @@
 #define __HEAD_FLAGS   (__HEAD_FLAG(BE))

 #define RISCV_HEADER_VERSION_MAJOR 0
-#define RISCV_HEADER_VERSION_MINOR 1
+#define RISCV_HEADER_VERSION_MINOR 2

 #define RISCV_HEADER_VERSION (RISCV_HEADER_VERSION_MAJOR << 16 | \
  RISCV_HEADER_VERSION_MINOR)
@@ -39,9 +40,8 @@
  * @version:   version
  * @res1:  reserved
  * @res2:  reserved
- * @magic: Magic number
- * @res3:  reserved (will be used for additional RISC-V specific
- * header)
+ * @magic: Magic number (RISC-V specific; deprecated)
+ * @magic2:Magic number 2 (to match the ARM64 'magic' field pos)
  * @res4:  reserved (will be used for PE COFF offset)
  *
  * The intention is for this header format to be shared between multiple
@@ -58,7 +58,7 @@ struct riscv_image_header {
u32 res1;
u64 res2;
u64 magic;
-   u32 res3;
+   u32 magic2;
u32 res4;
 };
 #endif /* __ASS

[PATCH RFC] random: getrandom(2): don't block on non-initialized entropy pool

2019-09-14 Thread Ahmed S. Darwish
getrandom() has been created as a new and more secure interface for
pseudorandom data requests.  Unlike /dev/urandom, it unconditionally
blocks until the entropy pool has been properly initialized.

While getrandom() has no guaranteed upper bound for its waiting time,
user-space has been abusing it by issuing the syscall, from shared
libraries no less, during the main system boot sequence.

Thus, on certain setups where there is no hwrng (embedded), or the
hwrng is not trusted by some users (intel RDRAND), or sometimes it's
just broken (amd RDRAND), the system boot can be *reliably* blocked.

The issue is further exaggerated by recent file-system optimizations,
e.g. b03755ad6f33 (ext4: make __ext4_get_inode_loc plug), which
merges directory lookup code inode table IO, and thus minimizes the
number of disk interrupts and entropy during boot. After that commit,
a blocked boot can be reliably reproduced on a Thinkpad E480 laptop
with standard ArchLinux user-space.

Thus, don't trust user-space on calling getrandom() from the right
context. Just never block, and return -EINVAL if entropy is not yet
available.

Link: 
https://lkml.kernel.org/r/CAHk-=wjyH910+JRBdZf_Y9G54c1M=lbf8nkxb6vjcm9xjln...@mail.gmail.com
Link: https://lkml.kernel.org/r/20190912034421.GA2085@darwi-home-pc
Link: https://lkml.kernel.org/r/20190911173624.gi2...@mit.edu
Link: https://lkml.kernel.org/r/20180514003034.gi14...@thunk.org

Suggested-by: Linus Torvalds 
Signed-off-by: Ahmed S. Darwish 
---

Notes:
This feels very risky at the very end of -rc8, so only sending
this as an RFC. The system of course reliably boots with this,
and the log, as expected, powerfully warns all callers:

$ dmesg | grep random
[0.236472] random: get_random_bytes called from start_kernel+0x30f/0x4d7 
with crng_init=0
[0.680263] random: fast init done
[2.500346] random: lvm: uninitialized urandom read (4 bytes read)
[2.595125] random: systemd-random-: invalid getrandom request (512 bytes): 
crng not ready
[2.595126] random: systemd-random-: uninitialized urandom read (512 bytes 
read)
[3.427699] random: dbus-daemon: uninitialized urandom read (12 bytes read)
[3.979425] urandom_read: 1 callbacks suppressed
[3.979426] random: polkitd: uninitialized urandom read (8 bytes read)
[3.979726] random: polkitd: uninitialized urandom read (8 bytes read)
[3.979752] random: polkitd: uninitialized urandom read (8 bytes read)
[4.473398] random: gnome-session-b: invalid getrandom request (16 bytes): 
crng not ready
[4.473404] random: gnome-session-b: invalid getrandom request (16 bytes): 
crng not ready
[4.473409] random: gnome-session-b: invalid getrandom request (16 bytes): 
crng not ready
[5.265636] random: crng init done
[5.265649] random: 3 urandom warning(s) missed due to ratelimiting
[5.265652] random: 1 getrandom warning(s) missed due to ratelimiting

 drivers/char/random.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4a50ee2c230d..309dc5ddf370 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -511,6 +511,8 @@ static struct ratelimit_state unseeded_warning =
RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
 static struct ratelimit_state urandom_warning =
RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
+static struct ratelimit_state getrandom_warning =
+   RATELIMIT_STATE_INIT("warn_getrandom_notavail", HZ, 3);

 static int ratelimit_disable __read_mostly;

@@ -1053,6 +1055,12 @@ static void crng_reseed(struct crng_state *crng, struct 
entropy_store *r)
  urandom_warning.missed);
urandom_warning.missed = 0;
}
+   if (getrandom_warning.missed) {
+   pr_notice("random: %d getrandom warning(s) missed "
+ "due to ratelimiting\n",
+ getrandom_warning.missed);
+   getrandom_warning.missed = 0;
+   }
}
 }

@@ -1915,6 +1923,7 @@ int __init rand_initialize(void)
crng_global_init_time = jiffies;
if (ratelimit_disable) {
urandom_warning.interval = 0;
+   getrandom_warning.interval = 0;
unseeded_warning.interval = 0;
}
return 0;
@@ -2138,8 +2147,6 @@ const struct file_operations urandom_fops = {
 SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
unsigned int, flags)
 {
-   int ret;
-
if (flags & ~(GRND_NONBLOCK|GRND_RANDOM))
return -EINVAL;

@@ -2152,9 +2159,13 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, 
count,
if (!crng_ready()) {
if (flags & GRND_NONBLOCK)
return -EAGAIN;
-   ret = wait_for_random_bytes();
-   if (unlikely(ret))
-

[PATCH v2 1/4] task: Add a count of task rcu users

2019-09-14 Thread Eric W. Biederman


Add a count of the number of rcu users (currently 1) of the task
struct so that we can later add the scheduler case and get rid of the
very subtle task_rcu_dereference, and just use rcu_dereference.

As suggested by Oleg have the count overlap rcu_head so that no
additional space in task_struct is required.

Inspired-by: Linus Torvalds 
Inspired-by: Oleg Nesterov 
Signed-off-by: "Eric W. Biederman" 
---
 include/linux/sched.h  | 5 -
 include/linux/sched/task.h | 1 +
 kernel/exit.c  | 7 ++-
 kernel/fork.c  | 7 +++
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..99a4518b9b17 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1142,7 +1142,10 @@ struct task_struct {
 
struct tlbflush_unmap_batch tlb_ubc;
 
-   struct rcu_head rcu;
+   union {
+   refcount_t  rcu_users;
+   struct rcu_head rcu;
+   };
 
/* Cache last used pipe for splice(): */
struct pipe_inode_info  *splice_pipe;
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0497091e40c1..4c44c37236b2 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -116,6 +116,7 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b4a5dcce8f8..2e259286f4e7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
put_task_struct(tsk);
 }
 
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+   if (refcount_dec_and_test(&task->rcu_users))
+   call_rcu(&task->rcu, delayed_put_task_struct);
+}
 
 void release_task(struct task_struct *p)
 {
@@ -222,7 +227,7 @@ void release_task(struct task_struct *p)
 
write_unlock_irq(&tasklist_lock);
release_thread(p);
-   call_rcu(&p->rcu, delayed_put_task_struct);
+   put_task_struct_rcu_user(p);
 
p = leader;
if (unlikely(zap_leader))
diff --git a/kernel/fork.c b/kernel/fork.c
index 2852d0e76ea3..9f04741d5c70 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -900,10 +900,9 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;
 
-   /*
-* One for us, one for whoever does the "release_task()" (usually
-* parent)
-*/
+   /* One for the user space visible state that goes away when reaped. */
+   refcount_set(&tsk->rcu_users, 1);
+   /* One for the rcu users, and one for the scheduler */
refcount_set(&tsk->usage, 2);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
-- 
2.21.0.dirty



[PATCH v2 2/4] task: Ensure tasks are available for a grace period after leaving the runqueue

2019-09-14 Thread Eric W. Biederman


In the ordinary case today the rcu grace period for a task_struct is
triggered when another process wait's for it's zombine and causes the
kernel to call release_task().  As the waiting task has to receive a
signal and then act upon it before this happens, typically this will
occur after the original task as been removed from the runqueue.

Unfortunaty in some cases such as self reaping tasks it can be shown
that release_task() will be called starting the grace period for
task_struct long before the task leaves the runqueue.

Therefore use put_task_struct_rcu_user in finish_task_switch to
guarantee that the there is a rcu lifetime after the task
leaves the runqueue.

Besides the change in the start of the rcu grace period for the
task_struct this change may cause perf_event_delayed_put and
trace_sched_process_free.  The function perf_event_delayed_put boils
down to just a WARN_ON for cases that I assume never show happen.  So
I don't see any problem with delaying it.

The function trace_sched_process_free is a trace point and thus
visible to user space.  Occassionally userspace has the strangest
dependencies so this has a miniscule chance of causing a regression.
This change only changes the timing of when the tracepoint is called.
The change in timing arguably gives userspace a more accurate picture
of what is going on.  So I don't expect there to be a regression.

In the case where a task self reaps we are pretty much guaranteed that
the rcu grace period is delayed.  So we should get quite a bit of
coverage in of this worst case for the change in a normal threaded
workload.  So I expect any issues to turn up quickly or not at all.

I have lightly tested this change and everything appears to work
fine.

Inspired-by: Linus Torvalds 
Inspired-by: Oleg Nesterov 
Signed-off-by: "Eric W. Biederman" 
---
 kernel/fork.c   | 11 +++
 kernel/sched/core.c |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 9f04741d5c70..7a74ade4e7d6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -900,10 +900,13 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;
 
-   /* One for the user space visible state that goes away when reaped. */
-   refcount_set(&tsk->rcu_users, 1);
-   /* One for the rcu users, and one for the scheduler */
-   refcount_set(&tsk->usage, 2);
+   /*
+* One for the user space visible state that goes away when reaped.
+* One for the scheduler.
+*/
+   refcount_set(&tsk->rcu_users, 2);
+   /* One for the rcu users */
+   refcount_set(&tsk->usage, 1);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..69015b7c28da 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3135,7 +3135,7 @@ static struct rq *finish_task_switch(struct task_struct 
*prev)
/* Task is done with its stack. */
put_task_stack(prev);
 
-   put_task_struct(prev);
+   put_task_struct_rcu_user(prev);
}
 
tick_nohz_task_switch();
-- 
2.21.0.dirty



[PATCH v2 3/4] task: With a grace period after finish_task_switch, remove unnecessary code

2019-09-14 Thread Eric W. Biederman


Remove work arounds that were written before there was a grace period
after tasks left the runqueue in finish_task_switch.

In particular now that there tasks exiting the runqueue exprience
a rcu grace period none of the work performed by task_rcu_dereference
excpet the rcu_dereference is necessary so replace task_rcu_dereference
with rcu_dereference.

Remove the code in rcuwait_wait_event that checks to ensure the current
task has not exited.  It is no longer necessary as it is guaranteed
that any running task will experience a rcu grace period after it
leaves the run queueue.

Remove the comment in rcuwait_wake_up as it is no longer relevant.

Cc: Davidlohr Bueso 
Cc: Peter Zijlstra (Intel) 
Cc: Oleg Nesterov 
Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery")
Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and 
try_get_task_struct()")
Signed-off-by: "Eric W. Biederman" 
---
 include/linux/rcuwait.h| 20 +++-
 include/linux/sched/task.h |  1 -
 kernel/exit.c  | 67 --
 kernel/sched/fair.c|  2 +-
 kernel/sched/membarrier.c  |  4 +--
 5 files changed, 7 insertions(+), 87 deletions(-)

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
 
 /*
  * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
  *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
  */
 struct rcuwait {
struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
  */
 #define rcuwait_wait_event(w, condition)   \
 ({ \
-   /*  \
-* Complain if we are called after do_exit()/exit_notify(), \
-* as we cannot rely on the rcu critical region for the \
-* wakeup side. \
-*/ \
-   WARN_ON(current->exit_state);   \
-   \
rcu_assign_pointer((w)->task, current); \
for (;;) {  \
/*  \
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 4c44c37236b2..8bd51af44bf8 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -115,7 +115,6 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
 }
 
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/kernel/exit.c b/kernel/exit.c
index 2e259286f4e7..f943773622fc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -234,69 +234,6 @@ void release_task(struct task_struct *p)
goto repeat;
 }
 
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-   struct sighand_struct *sighand;
-   struct task_struct *task;
-
-   /*
-* We need to verify that release_task() was not called and thus
-* delayed_put_task_struct() can't run and drop the last reference
-* before rcu_read_unlock(). We check task->sighand != NULL,
-* but we can read the already freed and reused memory.
-*/
-retry:
-   task = rcu_dereference(*ptask);
-   if (!task)
-   return NULL;
-
-   probe_kernel_address(&task->sighand, sighand);
-
-   /*
-* Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-* was already freed we can not miss the preceding update of this
-* pointer.
-*/
-   smp_rmb();
-   if (unlikely(task != READ_ONCE(*ptask)))
-   goto retry;
-
-   /*
-

[PATCH v2 1/4] task: Add a count of task rcu users

2019-09-14 Thread Eric W. Biederman


Add a count of the number of rcu users (currently 1) of the task
struct so that we can later add the scheduler case and get rid of the
very subtle task_rcu_dereference, and just use rcu_dereference.

As suggested by Oleg have the count overlap rcu_head so that no
additional space in task_struct is required.

Inspired-by: Linus Torvalds 
Inspired-by: Oleg Nesterov 
Signed-off-by: "Eric W. Biederman" 
---
 include/linux/sched.h  | 5 -
 include/linux/sched/task.h | 1 +
 kernel/exit.c  | 7 ++-
 kernel/fork.c  | 7 +++
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..99a4518b9b17 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1142,7 +1142,10 @@ struct task_struct {
 
struct tlbflush_unmap_batch tlb_ubc;
 
-   struct rcu_head rcu;
+   union {
+   refcount_t  rcu_users;
+   struct rcu_head rcu;
+   };
 
/* Cache last used pipe for splice(): */
struct pipe_inode_info  *splice_pipe;
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0497091e40c1..4c44c37236b2 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -116,6 +116,7 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b4a5dcce8f8..2e259286f4e7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
put_task_struct(tsk);
 }
 
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+   if (refcount_dec_and_test(&task->rcu_users))
+   call_rcu(&task->rcu, delayed_put_task_struct);
+}
 
 void release_task(struct task_struct *p)
 {
@@ -222,7 +227,7 @@ void release_task(struct task_struct *p)
 
write_unlock_irq(&tasklist_lock);
release_thread(p);
-   call_rcu(&p->rcu, delayed_put_task_struct);
+   put_task_struct_rcu_user(p);
 
p = leader;
if (unlikely(zap_leader))
diff --git a/kernel/fork.c b/kernel/fork.c
index 2852d0e76ea3..9f04741d5c70 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -900,10 +900,9 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;
 
-   /*
-* One for us, one for whoever does the "release_task()" (usually
-* parent)
-*/
+   /* One for the user space visible state that goes away when reaped. */
+   refcount_set(&tsk->rcu_users, 1);
+   /* One for the rcu users, and one for the scheduler */
refcount_set(&tsk->usage, 2);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
-- 
2.21.0.dirty



[PATCH v2 2/4] task: Ensure tasks are available for a grace period after leaving the runqueue

2019-09-14 Thread Eric W. Biederman


In the ordinary case today the rcu grace period for a task_struct is
triggered when another process wait's for it's zombine and causes the
kernel to call release_task().  As the waiting task has to receive a
signal and then act upon it before this happens, typically this will
occur after the original task as been removed from the runqueue.

Unfortunaty in some cases such as self reaping tasks it can be shown
that release_task() will be called starting the grace period for
task_struct long before the task leaves the runqueue.

Therefore use put_task_struct_rcu_user in finish_task_switch to
guarantee that the there is a rcu lifetime after the task
leaves the runqueue.

Besides the change in the start of the rcu grace period for the
task_struct this change may cause perf_event_delayed_put and
trace_sched_process_free.  The function perf_event_delayed_put boils
down to just a WARN_ON for cases that I assume never show happen.  So
I don't see any problem with delaying it.

The function trace_sched_process_free is a trace point and thus
visible to user space.  Occassionally userspace has the strangest
dependencies so this has a miniscule chance of causing a regression.
This change only changes the timing of when the tracepoint is called.
The change in timing arguably gives userspace a more accurate picture
of what is going on.  So I don't expect there to be a regression.

In the case where a task self reaps we are pretty much guaranteed that
the rcu grace period is delayed.  So we should get quite a bit of
coverage in of this worst case for the change in a normal threaded
workload.  So I expect any issues to turn up quickly or not at all.

I have lightly tested this change and everything appears to work
fine.

Inspired-by: Linus Torvalds 
Inspired-by: Oleg Nesterov 
Signed-off-by: "Eric W. Biederman" 
---
 kernel/fork.c   | 11 +++
 kernel/sched/core.c |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 9f04741d5c70..7a74ade4e7d6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -900,10 +900,13 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask;
 
-   /* One for the user space visible state that goes away when reaped. */
-   refcount_set(&tsk->rcu_users, 1);
-   /* One for the rcu users, and one for the scheduler */
-   refcount_set(&tsk->usage, 2);
+   /*
+* One for the user space visible state that goes away when reaped.
+* One for the scheduler.
+*/
+   refcount_set(&tsk->rcu_users, 2);
+   /* One for the rcu users */
+   refcount_set(&tsk->usage, 1);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..69015b7c28da 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3135,7 +3135,7 @@ static struct rq *finish_task_switch(struct task_struct 
*prev)
/* Task is done with its stack. */
put_task_stack(prev);
 
-   put_task_struct(prev);
+   put_task_struct_rcu_user(prev);
}
 
tick_nohz_task_switch();
-- 
2.21.0.dirty



[PATCH v2 3/4] task: With a grace period after finish_task_switch, remove unnecessary code

2019-09-14 Thread Eric W. Biederman


Remove work arounds that were written before there was a grace period
after tasks left the runqueue in finish_task_switch.

In particular now that there tasks exiting the runqueue exprience
a rcu grace period none of the work performed by task_rcu_dereference
excpet the rcu_dereference is necessary so replace task_rcu_dereference
with rcu_dereference.

Remove the code in rcuwait_wait_event that checks to ensure the current
task has not exited.  It is no longer necessary as it is guaranteed
that any running task will experience a rcu grace period after it
leaves the run queueue.

Remove the comment in rcuwait_wake_up as it is no longer relevant.

Cc: Davidlohr Bueso 
Cc: Peter Zijlstra (Intel) 
Cc: Oleg Nesterov 
Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery")
Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and 
try_get_task_struct()")
Signed-off-by: "Eric W. Biederman" 
---
 include/linux/rcuwait.h| 20 +++-
 include/linux/sched/task.h |  1 -
 kernel/exit.c  | 67 --
 kernel/sched/fair.c|  2 +-
 kernel/sched/membarrier.c  |  4 +--
 5 files changed, 7 insertions(+), 87 deletions(-)

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
 
 /*
  * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
  *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
  */
 struct rcuwait {
struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
  */
 #define rcuwait_wait_event(w, condition)   \
 ({ \
-   /*  \
-* Complain if we are called after do_exit()/exit_notify(), \
-* as we cannot rely on the rcu critical region for the \
-* wakeup side. \
-*/ \
-   WARN_ON(current->exit_state);   \
-   \
rcu_assign_pointer((w)->task, current); \
for (;;) {  \
/*  \
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 4c44c37236b2..8bd51af44bf8 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -115,7 +115,6 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
 }
 
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/kernel/exit.c b/kernel/exit.c
index 2e259286f4e7..f943773622fc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -234,69 +234,6 @@ void release_task(struct task_struct *p)
goto repeat;
 }
 
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-   struct sighand_struct *sighand;
-   struct task_struct *task;
-
-   /*
-* We need to verify that release_task() was not called and thus
-* delayed_put_task_struct() can't run and drop the last reference
-* before rcu_read_unlock(). We check task->sighand != NULL,
-* but we can read the already freed and reused memory.
-*/
-retry:
-   task = rcu_dereference(*ptask);
-   if (!task)
-   return NULL;
-
-   probe_kernel_address(&task->sighand, sighand);
-
-   /*
-* Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-* was already freed we can not miss the preceding update of this
-* pointer.
-*/
-   smp_rmb();
-   if (unlikely(task != READ_ONCE(*ptask)))
-   goto retry;
-
-   /*
-

[PATCH v2 4/4] task: RCUify the assignment of rq->curr

2019-09-14 Thread Eric W. Biederman


The current task on the runqueue is currently read with rcu_dereference().

To obtain ordinary rcu semantics for an rcu_dereference of rq->curr it needs
to be paird with rcu_assign_pointer of rq->curr.  Which provides the
memory barrier necessary to order assignments to the task_struct
and the assignment to rq->curr.

Unfortunately the assignment of rq->curr in __schedule is a hot path,
and it has already been show that additional barriers in that code
will reduce the performance of the scheduler.  So I will attempt to
describe below why you can effectively have ordinary rcu semantics
without any additional barriers.

The assignment of rq->curr in init_idle is a slow path called once
per cpu and that can use rcu_assign_pointer() without any concerns.

As I write this there are effectively two users of rcu_dereference on
rq->curr.  There is the membarrier code in kernel/sched/membarrier.c
that only looks at "->mm" after the rcu_dereference.  Then there is
task_numa_compare() in kernel/sched/fair.c.  My best reading of the
code shows that task_numa_compare only access: "->flags",
"->cpus_ptr", "->numa_group", "->numa_faults[]",
"->total_numa_faults", and "->se.cfs_rq".

The code in __schedule() essentially does:
rq_lock(...);
smp_mb__after_spinlock();

next = pick_next_task(...);
rq->curr = next;

context_switch(prev, next);

At the start of the function the rq_lock/smp_mb__after_spinlock
pair provides a full memory barrier.  Further there is a full memory barrier
in context_switch().

This means that any task that has already run and modified itself (the
common case) has already seen two memory barriers before __schedule()
runs and begins executing.  A task that modifies itself then sees a
third full memory barrier pair with the rq_lock();

For a brand new task that is enqueued with wake_up_new_task() there
are the memory barriers present from the taking and release the
pi_lock and the rq_lock as the processes is enqueued as well as the
full memory barrier at the start of __schedule() assuming __schedule()
happens on the same cpu.

This means that by the time we reach the assignment of rq->curr
except for values on the task struct modified in pick_next_task
the code has the same guarantees as if it used rcu_assign_pointer.

Reading through all of the implementations of pick_next_task it
appears pick_next_task is limited to modifying the task_struct fields
"->se", "->rt", "->dl".  These fields are the sched_entity structures
of the varies schedulers.

Further "->se.cfs_rq" is only changed in cgroup attach/move operations
initialized by userspace.

Unless I have missed something this means that in practice that the
users of "rcu_dereerence(rq->curr)" get normal rcu semantics of
rcu_dereference() for the fields the care about, despite the
assignment of rq->curr in __schedule() ot using rcu_assign_pointer.

Link: 
https://lore.kernel.org/r/20190903200603.gw2...@hirez.programming.kicks-ass.net
Signed-off-by: "Eric W. Biederman" 
---
 kernel/sched/core.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 69015b7c28da..668262806942 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3857,7 +3857,11 @@ static void __sched notrace __schedule(bool preempt)
 
if (likely(prev != next)) {
rq->nr_switches++;
-   rq->curr = next;
+   /*
+* RCU users of rcu_dereference(rq->curr) may not see
+* changes to task_struct made by pick_next_task().
+*/
+   RCU_INIT_POINTER(rq->curr, next);
/*
 * The membarrier system call requires each architecture
 * to have a full memory barrier after updating
@@ -5863,7 +5867,8 @@ void init_idle(struct task_struct *idle, int cpu)
__set_task_cpu(idle, cpu);
rcu_read_unlock();
 
-   rq->curr = rq->idle = idle;
+   rq->idle = idle;
+   rcu_assign_pointer(rq->curr, idle);
idle->on_rq = TASK_ON_RQ_QUEUED;
 #ifdef CONFIG_SMP
idle->on_cpu = 1;
-- 
2.21.0.dirty



warning: objtool: mce_panic()+0x11b: unreachable instruction

2019-09-14 Thread Borislav Petkov
Hey Josh,

I'm seeing

arch/x86/kernel/cpu/mce/core.o: warning: objtool: mce_panic()+0x11b: 
unreachable instruction

on a brand new debian install here with gcc9: gcc (Debian 9.2.1-4) 9.2.1 
20190821

and thought should run it by you, you might've seen it already.

So mce_panic is at 8102f390, which makes the offset
0x8102f4ab and at that I have:

  8102f497:   48 83 c4 08 add$0x8,%rsp
  8102f49b:   5b  pop%rbx
  8102f49c:   5d  pop%rbp
  8102f49d:   41 5c   pop%r12
  8102f49f:   41 5d   pop%r13
  8102f4a1:   41 5e   pop%r14
  8102f4a3:   41 5f   pop%r15
  8102f4a5:   c3  retq

<---

  8102f4a6:   e8 b5 fe ff ff  callq  8102f360 

  8102f4ab:   e9 23 ff ff ff  jmpq   8102f3d3 


  8102f4b0 :
  8102f4b0:   e8 eb 21 7d 00  callq  818016a0 
<__fentry__>
  8102f4b5:   55  push   %rbp
  8102f4b6:   48 89 f5mov%rsi,%rbp
  ...

which is two instructions which gcc has put after the RET. Looking at
gcc8 output, I have that too:

  8102e39b:   0f 8e 72 ff ff ff   jle8102e313 

  8102e3a1:   48 8b 5c 24 10  mov0x10(%rsp),%rbx
  8102e3a6:   48 8b 6c 24 18  mov0x18(%rsp),%rbp
  8102e3ab:   4c 8b 64 24 20  mov0x20(%rsp),%r12
  8102e3b0:   4c 8b 6c 24 28  mov0x28(%rsp),%r13
  8102e3b5:   4c 8b 74 24 30  mov0x30(%rsp),%r14
  8102e3ba:   4c 8b 7c 24 38  mov0x38(%rsp),%r15
  8102e3bf:   48 83 c4 40 add$0x40,%rsp
  8102e3c3:   c3  retq

<---

  8102e3c4:   49 3b 16cmp(%r14),%rdx
  8102e3c7:   75 9e   jne8102e367 

  8102e3c9:   49 8b 4e 10 mov0x10(%r14),%rcx
  8102e3cd:   48 39 4d 18 cmp%rcx,0x18(%rbp)
  8102e3d1:   75 94   jne8102e367 

  8102e3d3:   49 8b 4e 08 mov0x8(%r14),%rcx
  8102e3d7:   48 39 4d 10 cmp%rcx,0x10(%rbp)
  8102e3db:   75 8a   jne8102e367 

  8102e3dd:   eb a2   jmp8102e381 

  8102e3df:   e8 8c fe ff ff  callq  8102e270 

  8102e3e4:   4d 85 f6test   %r14,%r14
  8102e3e7:   0f 85 4c 26 00 00   jne81030a39 

  8102e3ed:   0f 1f 00nopl   (%rax)
  8102e3f0:   e9 b2 25 00 00  jmpq   810309a7 

  8102e3f5:   66 66 2e 0f 1f 84 00data16 nopw 
%cs:0x0(%rax,%rax,1)
  8102e3fc:   00 00 00 00

  8102e400 <__start_timer>:
  8102e400:   e8 eb 2f 7d 00  callq  818013f0 
<__fentry__>
  8102e405:   48 83 ec 10 sub$0x10,%rsp
  8102e409:   48 8b 05 f0 6b fd 00mov0xfd6bf0(%rip),%rax
# 82005000 
  ...

but objtool doesn't complain there, for some reason.

Thoughts?

Btw, I have a couple of those warnings on gcc9:

arch/x86/kernel/cpu/mce/core.o: warning: objtool: mce_panic()+0x11b: 
unreachable instruction
kernel/exit.o: warning: objtool: __x64_sys_exit_group()+0x14: unreachable 
instruction
fs/btrfs/extent_io.o: warning: objtool: __set_extent_bit.cold()+0xd: 
unreachable instruction
fs/btrfs/relocation.o: warning: objtool: add_tree_block.isra.0.cold()+0xc: 
unreachable instruction
net/core/skbuff.o: warning: objtool: skb_push.cold()+0x15: unreachable 
instruction

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette


Re: [PATCH] media: uvcvideo: Fix a typo in UVC_METATADA_BUF_SIZE

2019-09-14 Thread Laurent Pinchart
Hi Christophe,

Thank you for the patch.

On Wed, Jul 24, 2019 at 06:56:12AM +0200, Christophe JAILLET wrote:
> It is likely that it should be UVC_METADATA_BUF_SIZE instead.
> Fix it and use it.
> 
> Signed-off-by: Christophe JAILLET 

Oops indeed. Applied to my tree for v5.5.

> ---
>  drivers/media/usb/uvc/uvc_metadata.c | 4 ++--
>  drivers/media/usb/uvc/uvc_queue.c| 2 +-
>  drivers/media/usb/uvc/uvcvideo.h | 2 +-
>  3 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/media/usb/uvc/uvc_metadata.c 
> b/drivers/media/usb/uvc/uvc_metadata.c
> index 99bb71b47117..b6279ad7ac84 100644
> --- a/drivers/media/usb/uvc/uvc_metadata.c
> +++ b/drivers/media/usb/uvc/uvc_metadata.c
> @@ -51,7 +51,7 @@ static int uvc_meta_v4l2_get_format(struct file *file, void 
> *fh,
>   memset(fmt, 0, sizeof(*fmt));
>  
>   fmt->dataformat = stream->meta.format;
> - fmt->buffersize = UVC_METATADA_BUF_SIZE;
> + fmt->buffersize = UVC_METADATA_BUF_SIZE;
>  
>   return 0;
>  }
> @@ -72,7 +72,7 @@ static int uvc_meta_v4l2_try_format(struct file *file, void 
> *fh,
>  
>   fmt->dataformat = fmeta == dev->info->meta_format
>   ? fmeta : V4L2_META_FMT_UVC;
> - fmt->buffersize = UVC_METATADA_BUF_SIZE;
> + fmt->buffersize = UVC_METADATA_BUF_SIZE;
>  
>   return 0;
>  }
> diff --git a/drivers/media/usb/uvc/uvc_queue.c 
> b/drivers/media/usb/uvc/uvc_queue.c
> index da72577c2998..cd60c6c1749e 100644
> --- a/drivers/media/usb/uvc/uvc_queue.c
> +++ b/drivers/media/usb/uvc/uvc_queue.c
> @@ -79,7 +79,7 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>  
>   switch (vq->type) {
>   case V4L2_BUF_TYPE_META_CAPTURE:
> - size = UVC_METATADA_BUF_SIZE;
> + size = UVC_METADATA_BUF_SIZE;
>   break;
>  
>   default:
> diff --git a/drivers/media/usb/uvc/uvcvideo.h 
> b/drivers/media/usb/uvc/uvcvideo.h
> index c7c1baa90dea..f773dc5d802c 100644
> --- a/drivers/media/usb/uvc/uvcvideo.h
> +++ b/drivers/media/usb/uvc/uvcvideo.h
> @@ -491,7 +491,7 @@ struct uvc_stats_stream {
>   unsigned int max_sof;   /* Maximum STC.SOF value */
>  };
>  
> -#define UVC_METATADA_BUF_SIZE 1024
> +#define UVC_METADATA_BUF_SIZE 1024
>  
>  /**
>   * struct uvc_copy_op: Context structure to schedule asynchronous memcpy

-- 
Regards,

Laurent Pinchart


Re: [PATCH] MAINTAINERS: Add a git and a maintainer entry to keyring subsystems

2019-09-14 Thread Jarkko Sakkinen
On Fri, Sep 13, 2019 at 04:03:53PM +0100, David Howells wrote:
> Jarkko Sakkinen  wrote:
> 
> > Subject: [PATCH] MAINTAINERS: Add a git and a maintainer entry to keyring 
> > subsystems
> 
> I would recommend splitting the patch in two and putting something like:
> 
>   keys: Add Jarkko Sakkinen as co-maintainer
> 
> as the subject of the keyrings maintainership one.

Thanks, I'll refine accordingly.

/Jarkko


Re: [PATCH] MAINTAINERS: Add a git and a maintainer entry to keyring subsystems

2019-09-14 Thread Jarkko Sakkinen
On Fri, Sep 13, 2019 at 10:36:15AM -0700, Joe Perches wrote:
> On Fri, 2019-09-13 at 16:03 +0100, David Howells wrote:
> > Jarkko Sakkinen  wrote:
> > 
> > > Subject: [PATCH] MAINTAINERS: Add a git and a maintainer entry to keyring 
> > > subsystems
> > 
> > I would recommend splitting the patch in two and putting something like:
> > 
> > keys: Add Jarkko Sakkinen as co-maintainer
> > 
> > as the subject of the keyrings maintainership one.
> 
> Why is there utility in micro splitting such a trivial patch?

I kind of get this for the MAINTAINERS file so that the changes can
be agreed/disagreed separately.

/Jarkko


Re: [RFC] buildtar: add case for riscv architecture

2019-09-14 Thread Palmer Dabbelt

On Wed, 11 Sep 2019 05:54:07 PDT (-0700), m...@aurabindo.in wrote:




None of the available RiscV platforms that I’m aware of use compressed images, 
unless there are some new bootloaders I haven’t seen yet.



I noticed that default build image is Image.gz, which is why I thought its a 
good idea to copy it into the tarball. Does such a copy not make sense at this 
point ?


Image.gz can't be booted directly: it's just Image that's been compressed with 
the standard gzip command.  A bootloader would have to decompress that image 
before loading it into memory, which requires extra bootloader support.  
Contrast that with the zImage style images (which are vmlinuz on x86), which 
are self-extracting and therefor require no bootloader support.  The examples 
for u-boot all use the "booti" command, which expects uncompressed images.  
Poking around I couldn't figure out a way to have u-boot decompress the images, 
but that applies to arm64 as well so I'm not sure if I'm missing something.


If I was doing this, I'd copy over arch/riscv/boot/Image and call it 
"/boot/image-${KERNELRELEASE}", as calling it vmlinuz is a bit confusing to me 
because I'd expect vmlinuz to be a self-extracting compressed executable and 
not a raw gzip file.


Re: [PATCH 3/6] hwrng: omap3-rom - Call clk_prepare() on init and exit only

2019-09-14 Thread Sebastian Reichel
Hi,

On Fri, Sep 13, 2019 at 03:09:19PM -0700, Tony Lindgren wrote:
> Also, we should not call prepare and unprepare except during init, and
> only call enable and disable during use.

Why? Usually clk_(un)prepare() is the part saving most power, so I
would expect the runtime resume handlers to call clk_prepare_enable
and vice versa in the suspend handler.

-- Sebastian


signature.asc
Description: PGP signature


Re: [PATCH v4] KEYS: trusted: correctly initialize digests and fix locking issue

2019-09-14 Thread Jarkko Sakkinen
On Fri, Sep 13, 2019 at 08:51:36PM +0200, Roberto Sassu wrote:
> Commit 0b6cf6b97b7e ("tpm: pass an array of tpm_extend_digest structures to
> tpm_pcr_extend()") modifies tpm_pcr_extend() to accept a digest for each
> PCR bank. After modification, tpm_pcr_extend() expects that digests are
> passed in the same order as the algorithms set in chip->allocated_banks.
> 
> This patch fixes two issues introduced in the last iterations of the patch
> set: missing initialization of the TPM algorithm ID in the tpm_digest
> structures passed to tpm_pcr_extend() by the trusted key module, and
> unreleased locks in the TPM driver due to returning from tpm_pcr_extend()
> without calling tpm_put_ops().
> 
> Signed-off-by: Roberto Sassu 
> Suggested-by: Jarkko Sakkinen 
> Reviewed-by: Jerry Snitselaar 
> Fixes: 0b6cf6b97b7e ("tpm: pass an array of tpm_extend_digest structures to 
> tpm_pcr_extend()")

Reviewed-by:  

I picked up this patch to my tree.

/Jarkko


RE: [RFC] buildtar: add case for riscv architecture

2019-09-14 Thread Anup Patel


> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  ow...@vger.kernel.org> On Behalf Of Palmer Dabbelt
> Sent: Saturday, September 14, 2019 6:30 PM
> To: m...@aurabindo.in
> Cc: Troy Benjegerdes ; Paul Walmsley
> ; a...@eecs.berkeley.edu; linux-
> ri...@lists.infradead.org; linux-kernel@vger.kernel.org; linux-
> kbu...@vger.kernel.org
> Subject: Re: [RFC] buildtar: add case for riscv architecture
> 
> On Wed, 11 Sep 2019 05:54:07 PDT (-0700), m...@aurabindo.in wrote:
> >
> >
> >> None of the available RiscV platforms that I’m aware of use compressed
> images, unless there are some new bootloaders I haven’t seen yet.
> >>
> >
> > I noticed that default build image is Image.gz, which is why I thought its a
> good idea to copy it into the tarball. Does such a copy not make sense at this
> point ?
> 
> Image.gz can't be booted directly: it's just Image that's been compressed
> with the standard gzip command.  A bootloader would have to decompress
> that image before loading it into memory, which requires extra bootloader
> support.
> Contrast that with the zImage style images (which are vmlinuz on x86), which
> are self-extracting and therefor require no bootloader support.  The
> examples for u-boot all use the "booti" command, which expects
> uncompressed images.
> Poking around I couldn't figure out a way to have u-boot decompress the
> images, but that applies to arm64 as well so I'm not sure if I'm missing
> something.
> 
> If I was doing this, I'd copy over arch/riscv/boot/Image and call it
> "/boot/image-${KERNELRELEASE}", as calling it vmlinuz is a bit confusing to
> me because I'd expect vmlinuz to be a self-extracting compressed
> executable and not a raw gzip file.

On the contrary, it is indeed possible to boot Image.gz directly using
U-Boot booti command so this patch would be useful.

Atish had got it working on U-Boot but he has deferred booti Image.gz
support due to few more dependent changes. May be he can share
more info.

Regards,
Anup


Re: pull-request: wireless-drivers-next 2019-09-14

2019-09-14 Thread David Miller
From: Kalle Valo 
Date: Sat, 14 Sep 2019 13:14:40 +0300

> here's a pull request to net-next tree for v5.4, more info below. Please
> let me know if there are any problems.

Pulled, thanks Kalle.


Re: [PATCH] staging: exfat: add exfat filesystem code to

2019-09-14 Thread Park Ju Hyung
Hi.

I just noticed that this exfat-staging drivers are based on the old 
Samsung's 1.x exFAT drivers.

I've been working to get the newer Samsung's driver(now named "sdFAT") 
to fit better for general Linux users, and I believe it can provide a 
better base for the community to work on(and hopefully complies better 
to the mainline coding standard).

GitHub link
https://github.com/arter97/exfat-linux

I also included some rudimentary benchmark results.

I encourage mainline developers to explore this driver base and see if 
it's worth to switch, since it's the early days of exfat-staging.

To others watching this thread:
It's more than likely that you can start using exFAT reliably right 
away by following the link above. It's tested on all major LTS kernels 
ranging from 3.4 to 4.19 and the ones Canonical uses for Ubuntu: 3.4, 
3.10, 3.18, 4.1, 4.4, 4.9, 4.14, 4.19 and 4.15, 5.0, 5.2, and 5.3-rc.

Thanks.


Re: [PATCH v22 00/24] Intel SGX foundations

2019-09-14 Thread Jarkko Sakkinen
On Fri, Sep 13, 2019 at 01:38:18PM -0700, Dave Hansen wrote:
> On 9/3/19 7:26 AM, Jarkko Sakkinen wrote:
> > Not having LSM hooks does not cause any risk to other parts of the
> > kernel as the device can still be controlled by using DAC permissions.
> > The hooks just provide more granularity than DAC in access decisions.
> 
> Could we translate the security-speak to english, please? :)
> 
> Is this it:
> 
>   LSMs can (try to) enforce things like "all executable code must
>   be verified".  The implementation in these patches has the
>   potential to subvert policies like that since it has its own
>   unique mechanisms for loading and mapping executable code.  This
>   will be fixed by future LSM enhancements on top of this set.
>   For now, permissions on the SGX device file should be used to
>   prevent untrusted users from using SGX to subvert LSM policies.

I'm not sure what "security-speak" is but lets try plain English and
see where we get from there.

The proposed LSM hooks give the granularity to make yes/no decision
based on the

* The origin of the source of the source for the enclave.
* The requested permissions for the added or mapped peage.

The hooks to do these checks are provided for mmap() and EADD
operations.

With just file permissions you can still limit mmap() by having a
privileged process to build the enclaves and pass the file descriptor
to the enclave user who can mmap() the enclave within the constraints
set by the enclave pages (their permissions refine the roof that you
can mmap() any memory range within an enclave).

/Jarkko


Re: [RFC v2 1/2] ARM: dts: omap3: Add cpu trips and cooling map for omap3 family

2019-09-14 Thread Adam Ford
On Sat, Sep 14, 2019 at 4:20 AM H. Nikolaus Schaller  wrote:
>
>
> > Am 13.09.2019 um 17:37 schrieb Adam Ford :
> >
> > The OMAP3530, AM3517 and DM3730 all show thresholds of 90C and 105C
> > depending on commercial or industrial temperature ratings.  This
> > patch expands the thermal information to the limits of 90 and 105
> > for alert and critical.
> >
> > For boards who never use industrial temperatures, these can be
> > changed on their respective device trees with something like:
> >
> > &cpu_alert0 {
> >   temperature = <85000>; /* millicelsius */
> > };
> >
> > &cpu_crit {
> >   temperature = <9>; /* millicelsius */
> > };
> >
> > Signed-off-by: Adam Ford 
> > ---
> > V2:  Change the CPU reference to &cpu instead of &cpu0
> >
> > diff --git a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi 
> > b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> > index 235ecfd61e2d..dfbd0cb0b00b 100644
> > --- a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> > +++ b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> > @@ -17,4 +17,25 @@ cpu_thermal: cpu_thermal {
> >
> >   /* sensor   ID */
> >   thermal-sensors = <&bandgap 0>;
> > +
> > + cpu_trips: trips {
> > + cpu_alert0: cpu_alert {
> > + temperature = <9>; /* millicelsius */
> > + hysteresis = <2000>; /* millicelsius */
> > + type = "passive";
> > + };
> > + cpu_crit: cpu_crit {
> > + temperature = <105000>; /* millicelsius */
> > + hysteresis = <2000>; /* millicelsius */
> > + type = "critical";
> > + };
> > + };
> > +
> > + cpu_cooling_maps: cooling-maps {
> > + map0 {
> > + trip = <&cpu_alert0>;
> > + cooling-device =
> > + <&cpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> > + };
> > + };
> > };
> > --
> > 2.17.1
> >
>
> Here is my test log (GTA04A5 with DM3730CBP100).
> "high-load" script is driving the NEON to full power
> and would report calculation errors.
>
> There is no noise visible in the bandgap sensor data
> induced by power supply fluctuations (log shows system
> voltage while charging).
>

Great data!

> root@letux:~# ./high-load -n2
> 100% load stress test for 1 cores running ./neon_loop2
> Sat Sep 14 09:05:50 UTC 2019 65° 4111mV 1000MHz
> Sat Sep 14 09:05:50 UTC 2019 67° 4005mV 1000MHz
> Sat Sep 14 09:05:52 UTC 2019 68° 4000mV 1000MHz
> Sat Sep 14 09:05:53 UTC 2019 68° 4000mV 1000MHz
> Sat Sep 14 09:05:55 UTC 2019 72° 3976mV 1000MHz
> Sat Sep 14 09:05:56 UTC 2019 72° 4023mV 1000MHz
> Sat Sep 14 09:05:57 UTC 2019 72° 3900mV 1000MHz
> Sat Sep 14 09:05:59 UTC 2019 73° 4029mV 1000MHz
> Sat Sep 14 09:06:00 UTC 2019 73° 3988mV 1000MHz
> Sat Sep 14 09:06:01 UTC 2019 73° 4005mV 1000MHz
> Sat Sep 14 09:06:03 UTC 2019 73° 4011mV 1000MHz
> Sat Sep 14 09:06:04 UTC 2019 73° 4117mV 1000MHz
> Sat Sep 14 09:06:06 UTC 2019 73° 4005mV 1000MHz
> Sat Sep 14 09:06:07 UTC 2019 75° 3994mV 1000MHz
> Sat Sep 14 09:06:08 UTC 2019 75° 3970mV 1000MHz
> Sat Sep 14 09:06:09 UTC 2019 75° 4046mV 1000MHz
> Sat Sep 14 09:06:11 UTC 2019 75° 4005mV 1000MHz
> Sat Sep 14 09:06:12 UTC 2019 75° 4023mV 1000MHz
> Sat Sep 14 09:06:14 UTC 2019 75° 3970mV 1000MHz
> Sat Sep 14 09:06:15 UTC 2019 75° 4011mV 1000MHz
> Sat Sep 14 09:06:16 UTC 2019 77° 4017mV 1000MHz
> Sat Sep 14 09:06:18 UTC 2019 77° 3994mV 1000MHz
> Sat Sep 14 09:06:19 UTC 2019 77° 3994mV 1000MHz
> Sat Sep 14 09:06:20 UTC 2019 77° 3988mV 1000MHz
> Sat Sep 14 09:06:22 UTC 2019 77° 4023mV 1000MHz
> Sat Sep 14 09:06:23 UTC 2019 77° 4023mV 1000MHz
> Sat Sep 14 09:06:24 UTC 2019 78° 4005mV 1000MHz
> Sat Sep 14 09:06:26 UTC 2019 78° 4105mV 1000MHz
> Sat Sep 14 09:06:27 UTC 2019 78° 4011mV 1000MHz
> Sat Sep 14 09:06:28 UTC 2019 78° 3994mV 1000MHz
> Sat Sep 14 09:06:30 UTC 2019 78° 4123mV 1000MHz
> ...
> Sat Sep 14 09:09:57 UTC 2019 88° 4082mV 1000MHz
> Sat Sep 14 09:09:59 UTC 2019 88° 4164mV 1000MHz
> Sat Sep 14 09:10:00 UTC 2019 88° 4058mV 1000MHz
> Sat Sep 14 09:10:01 UTC 2019 88° 4058mV 1000MHz
> Sat Sep 14 09:10:03 UTC 2019 88° 4082mV 1000MHz
> Sat Sep 14 09:10:04 UTC 2019 88° 4058mV 1000MHz
> Sat Sep 14 09:10:06 UTC 2019 88° 4146mV 1000MHz
> Sat Sep 14 09:10:07 UTC 2019 88° 4041mV 1000MHz
> Sat Sep 14 09:10:08 UTC 2019 88° 4035mV 1000MHz
> Sat Sep 14 09:10:10 UTC 2019 88° 4052mV 1000MHz
> Sat Sep 14 09:10:11 UTC 2019 88° 4087mV 1000MHz
> Sat Sep 14 09:10:12 UTC 2019 88° 4152mV 1000MHz
> Sat Sep 14 09:10:14 UTC 2019 88° 4070mV 1000MHz
> Sat Sep 14 09:10:15 UTC 2019 88° 4064mV 1000MHz
> Sat Sep 14 09:10:17 UTC 2019 88° 4170mV 1000MHz
> Sat Sep 14 09:10:18 UTC 2019 88° 4058mV 1000MHz
> Sat Sep 14 09:10:19 UTC 2019 88° 4187mV 1000MHz
> Sat Sep 14 09:10:21 UTC 2019 88° 4093mV 1000MHz
> Sat Sep 14 09:10:22 UTC 2019 88° 4087mV 1000MHz
> Sat Sep 14 09:10:23 UTC 2019 90° 4070mV 1000MHz

Should we be a little more conservative?  Without knowing t

Re: [RFC v2 2/2] ARM: omap3: Consolidate thermal references to common omap3

2019-09-14 Thread Adam Ford
On Sat, Sep 14, 2019 at 4:25 AM H. Nikolaus Schaller  wrote:
>
>
> > Am 13.09.2019 um 17:37 schrieb Adam Ford :
> >
> > Because the omap34xx, omap36xx and am3517 SoC's have the same
> > thermal junction limits, there is no need to duplicate the entry
> > multiple times.
> >
> > This patch removes the thermal references from omap36xx and
> > omap34xx and pushes it into the common omap3.dtsi file with
> > the added benefit of enabling the thermal info on the AM3517.
> >
> > Signed-off-by: Adam Ford 

Disregard this patch.  I'll drop it based on Nikolaus' comments below.

> > ---
> > V2:   Add node name for cpu and add cooling-cells entry
> >
> > diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
> > index 4043ecb38016..84704eb3b604 100644
> > --- a/arch/arm/boot/dts/omap3.dtsi
> > +++ b/arch/arm/boot/dts/omap3.dtsi
> > @@ -32,7 +32,7 @@
> >   #address-cells = <1>;
> >   #size-cells = <0>;
> >
> > - cpu@0 {
> > + cpu: cpu@0 {
> >   compatible = "arm,cortex-a8";
> >   device_type = "cpu";
> >   reg = <0x0>;
> > @@ -41,9 +41,14 @@
> >   clock-names = "cpu";
> >
> >   clock-latency = <30>; /* From omap-cpufreq driver 
> > */
> > + #cooling-cells = <2>;
> >   };
> >   };
>
> Looks ok.
>
> >
> > + thermal_zones: thermal-zones {
> > + #include "omap3-cpu-thermal.dtsi"
> > + };
> > +
>
> I have observed one compile issue: we also include this indirectly by 
> am3517.dtsi
> and the included code refers to <&bandgap 0> but there is no bandgap 
> definition in am3517.dtsi
>
> Therefore I studied the am35x TRM (SPRUGR0C) and compared to the am/dm37x TRM 
> (SPRUGN4M).
>
> But I can't find a bandgap temperature sensor with ADC like it is described in
> "13.4.6 Band Gap Voltage and Temperature Sensor" for the am/dm37x. Only
> "BANDGAP Logic" exists in both and both have the CM_FCLKEN3_CORE but with
> different meaning of bit 0.

I didn't read the technical details, I just read there was a bandgap
logic, so I assumed it existed.

>
> There is also no description of an CONTROL_TEMP_SENSOR (0x48002524) register 
> for am35x.
> (note: the register is also documented for omap3530).

Thanks for looking into this.

>
> So this might mean that the am35x does not have this feature unless TI simply
> did not document it because the chip is specified for a single OPP only where 
> it
> make no sense to monitor the temperature.
>
> We can find out only by looking at 0x48002524 if there is an undocumented
> bandgap converter.

I will try to read this register when I have some time, but I have to
watch Chelsea FC play in 15 minutes.  ;-)

>
> Which means we probably can't make thermal throttling work for it. And even
> if the bandgap sensor exists we are lacking an value -> celsius table.

I think it's probably best to abandon this patch, per my comment based
on all your comments.

>
>
> >   pmu@5400 {
> >   compatible = "arm,cortex-a8-pmu";
> >   reg = <0x5400 0x80>;
> > diff --git a/arch/arm/boot/dts/omap34xx.dtsi 
> > b/arch/arm/boot/dts/omap34xx.dtsi
> > index f572a477f74c..b80378d6e5c1 100644
> > --- a/arch/arm/boot/dts/omap34xx.dtsi
> > +++ b/arch/arm/boot/dts/omap34xx.dtsi
> > @@ -101,10 +101,6 @@
> >   };
> >   };
> >   };
> > -
> > - thermal_zones: thermal-zones {
> > - #include "omap3-cpu-thermal.dtsi"
> > - };
> > };
> >
> > &ssi {
> > diff --git a/arch/arm/boot/dts/omap36xx.dtsi 
> > b/arch/arm/boot/dts/omap36xx.dtsi
> > index 6fb23ada1f64..ff2dca63a04e 100644
> > --- a/arch/arm/boot/dts/omap36xx.dtsi
> > +++ b/arch/arm/boot/dts/omap36xx.dtsi
> > @@ -140,10 +140,6 @@
> >   };
> >   };
> >   };
> > -
> > - thermal_zones: thermal-zones {
> > - #include "omap3-cpu-thermal.dtsi"
> > - };
> > };
>
> So if we have to exclude the am3517 we can not apply the rearrangement part
> of this patch.
>
> I'd suggest to move the cpu: cpu@0 and #cooling-cells into 1/2 (also to make 
> it
> compile stand-alone). And have the consolidation separately - if we can fix 
> the
> am3517 bandgap sensor issue.

I'll drop this, and leave everything in the omap3-cpu-thermal file and
let omap34xx and omap36xx point to them as we do now.

>
> >
> > /* OMAP3630 needs dss_96m_fck for VENC */
> > --
> > 2.17.1
> >
>
> Tested-by: H. Nikolaus Schaller  # on GTA04A5 with 
> dm3730cbp100
>


Re: [RESEND PATCH] MAINTAINERS: keys: Update path to trusted.h

2019-09-14 Thread Jarkko Sakkinen
On Tue, Sep 10, 2019 at 02:50:39PM +0300, Denis Efremov wrote:
> Hi,
> 
> On 8/16/19 9:58 PM, Jarkko Sakkinen wrote:
> > On Fri, Aug 16, 2019 at 01:12:00AM +0300, Denis Efremov wrote:
> >> Update MAINTAINERS record to reflect that trusted.h
> >> was moved to a different directory in commit 22447981fc05
> >> ("KEYS: Move trusted.h to include/keys [ver #2]").
> >>
> >> Cc: Denis Kenzior 
> >> Cc: James Bottomley 
> >> Cc: Jarkko Sakkinen 
> >> Cc: Mimi Zohar 
> >> Cc: linux-integr...@vger.kernel.org
> >> Signed-off-by: Denis Efremov 
> > 
> > Acked-by: Jarkko Sakkinen 
> > 
> > /Jarkko
> > 
> 
> Could someone take this fix through his tree?

I picked this up now to the tpmdd tree.

/Jarkko


[PATCH] clk: sunxi-ng: h6: Use sigma-delta modulation for audio PLL

2019-09-14 Thread Jernej Skrabec
Audio devices needs exact clock rates in order to correctly reproduce
the sound. Until now, only integer factors were used to configure H6
audio PLL which resulted in inexact rates. Fix that by adding support
for fractional factors using sigma-delta modulation look-up table. It
contains values for two most commonly used audio base frequencies.

Signed-off-by: Jernej Skrabec 
---
 drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c 
b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
index d89353a3cdec..ed6338d74474 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
@@ -203,12 +203,21 @@ static struct ccu_nkmp pll_hsic_clk = {
  * hardcode it to match with the clock names.
  */
 #define SUN50I_H6_PLL_AUDIO_REG0x078
+
+static struct ccu_sdm_setting pll_audio_sdm_table[] = {
+   { .rate = 541900800, .pattern = 0xc001288d, .m = 1, .n = 22 },
+   { .rate = 589824000, .pattern = 0xc00126e9, .m = 1, .n = 24 },
+};
+
 static struct ccu_nm pll_audio_base_clk = {
.enable = BIT(31),
.lock   = BIT(28),
.n  = _SUNXI_CCU_MULT_MIN(8, 8, 12),
.m  = _SUNXI_CCU_DIV(1, 1), /* input divider */
+   .sdm= _SUNXI_CCU_SDM(pll_audio_sdm_table,
+BIT(24), 0x178, BIT(31)),
.common = {
+   .features   = CCU_FEATURE_SIGMA_DELTA_MOD,
.reg= 0x078,
.hw.init= CLK_HW_INIT("pll-audio-base", "osc24M",
  &ccu_nm_ops,
@@ -753,12 +762,12 @@ static const struct clk_hw *clk_parent_pll_audio[] = {
 };
 
 /*
- * The divider of pll-audio is fixed to 8 now, as pll-audio-4x has a
- * fixed post-divider 2.
+ * The divider of pll-audio is fixed to 24 for now, so 24576000 and 22579200
+ * rates can be set exactly in conjunction with sigma-delta modulation.
  */
 static CLK_FIXED_FACTOR_HWS(pll_audio_clk, "pll-audio",
clk_parent_pll_audio,
-   8, 1, CLK_SET_RATE_PARENT);
+   24, 1, CLK_SET_RATE_PARENT);
 static CLK_FIXED_FACTOR_HWS(pll_audio_2x_clk, "pll-audio-2x",
clk_parent_pll_audio,
4, 1, CLK_SET_RATE_PARENT);
@@ -1215,12 +1224,12 @@ static int sun50i_h6_ccu_probe(struct platform_device 
*pdev)
}
 
/*
-* Force the post-divider of pll-audio to 8 and the output divider
-* of it to 1, to make the clock name represents the real frequency.
+* Force the post-divider of pll-audio to 12 and the output divider
+* of it to 2, so 24576000 and 22579200 rates can be set exactly.
 */
val = readl(reg + SUN50I_H6_PLL_AUDIO_REG);
val &= ~(GENMASK(21, 16) | BIT(0));
-   writel(val | (7 << 16), reg + SUN50I_H6_PLL_AUDIO_REG);
+   writel(val | (11 << 16) | BIT(0), reg + SUN50I_H6_PLL_AUDIO_REG);
 
/*
 * First clock parent (osc32K) is unusable for CEC. But since there
-- 
2.23.0



[GIT PULL] Urgent RISC-V fix for v5.3

2019-09-14 Thread Paul Walmsley
Linus,

The following changes since commit f74c2bb98776e2de508f4d607cd519873065118e:

  Linux 5.3-rc8 (2019-09-08 13:33:15 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git 
tags/riscv/for-v5.3

for you to fetch changes up to 474efecb65dceb15f793b6e2f2b226e952f0f8e9:

  riscv: modify the Image header to improve compatibility with the ARM64 header 
(2019-09-13 19:03:52 -0700)


Urgent RISC-V fix for v5.3

Last week, Palmer and I learned that there was an error in the RISC-V
kernel image header format that could make it less compatible with the
ARM64 kernel image header format.  I had missed this error during my
original reviews of the patch.

The kernel image header format is an interface that impacts
bootloaders, QEMU, and other user tools.  Those packages must be
updated to align with whatever is merged in the kernel.  We would like
to avoid proliferating these image formats by keeping the RISC-V
header as close as possible to the existing ARM64 header.  Since the
arch/riscv patch that adds support for the image header was merged
with our v5.3-rc1 pull request as commit 0f327f2aaad6a ("RISC-V: Add
an Image header that boot loader can parse."), we think it wise to try
to fix this error before v5.3 is released.

The fix itself should be backwards-compatible with any project that
has already merged support for premature versions of this interface.
It primarily involves ensuring that the RISC-V image header has
something useful in the same field as the ARM64 image header.


Paul Walmsley (1):
  riscv: modify the Image header to improve compatibility with the ARM64 
header

 Documentation/riscv/boot-image-header.txt | 13 +++--
 arch/riscv/include/asm/image.h| 12 ++--
 arch/riscv/kernel/head.S  |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)


Re: [PATCH 4.4 0/9] 4.4.193-stable review

2019-09-14 Thread Guenter Roeck

On 9/13/19 6:06 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.4.193 release.
There are 9 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Build results:
total: 170 pass: 170 fail: 0
Qemu test results:
total: 324 pass: 324 fail: 0

Guenter


Re: [PATCH] riscv: modify the Image header to improve compatibility with the ARM64 header

2019-09-14 Thread Paul Walmsley
On Sat, 14 Sep 2019, Atish Patra wrote:

> Thanks for the quick fix. Is there a planned timeline when we can
> remove the deprecated magic ?

If Linus merges this patch, we should probably start the transition in the 
bootloaders, QEMU, and user tools as quickly as possible.  Probably the 
key element in the timeline is when we remove support for the old 64-bit 
magic number location in the kernel.  I'm told that U-Boot and QEMU have 
already issued releases with support for the v0.1 image header format, so 
dropping the old magic number from the kernel is probably at least a few 
years away.  (This is to increase the likelihood that anyone using the old 
software has had the chance to update them.)

> I was planning to send a U-boot header documentation patch to match
> Linux one anyways. Do you want me that to rebase based on this patch or
> are you planning to send a U-boot patch as well ?

Once v5.3 comes out, please go ahead.


- Paul


Re: [PATCH 2/9] perf/core: Add PERF_SAMPLE_CGROUP feature

2019-09-14 Thread Song Liu
Hi Tejun,

On Sat, Aug 31, 2019 at 6:01 AM Tejun Heo  wrote:
>
> Hello,
>
> On Sat, Aug 31, 2019 at 12:03:26PM +0900, Namhyung Kim wrote:
> > Hmm.. it looks hard to use fhandle as the identifier since perf
> > sampling is done in NMI context.  AFAICS the encode_fh part seems ok
> > but getting dentry/inode from a kernfs_node seems not.
> >
> > I assume kernfs_node_id's ino and gen are same to its inode's.  Then
> > we might use kernfs_node for encoding but not sure you like it ;-)
>
> Oh yeah, the whole cgroup id situation is kinda shitty and it's likely
> that it needs to be cleaned up a bit for this to be used widely.  The
> issues are...

Here are my 2 cents about this.

I think we don't need a perfect identifier in this case. IIUC, the goal of
this patchset is to map each sample with a cgroup name (or full path).
To achieve this, we need

1. PERF_RECORD_CGROUP, that maps
   "64-bit number" => cgroup name/path
2. PERF_SAMPLE_CGROUP, that adds "64-bit number" to each sample.

I call the id a "64-bit number" because it is not required to be a globally
unique id. As long as it is consistent within the same perf-record session,
we won't get any confusion. Since we add PERF_RECORD_CGROUP
for each cgroup creation, we will map most of samples correctly even
when the  "64-bit number" is recycled within the same perf-record session.

At the moment, I think ino is good enough for the "64-bit number" even
for 32-bit systems. If we don't call it "ino" (just call it "cgroup_tag" or
"cgroup_id", we can change it when kernfs provides a better 64-bit id.

About full path name: The user names the full path here. If the user gives
two different workloads the same name/path, we really cannot change that.
Reasonable users would be able to make sense from the full path.

Thanks,
Song


Re: [PATCH 4.14 00/21] 4.14.144-stable review

2019-09-14 Thread Guenter Roeck

On 9/13/19 6:06 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.14.144 release.
There are 21 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Build results:
total: 172 pass: 168 fail: 4
Failed builds:
arm:allmodconfig
i386:allyesconfig
i386:allmodconfig
mips:allmodconfig
Qemu test results:
total: 372 pass: 372 fail: 0

drivers/vhost/vhost.c: In function 'translate_desc':
include/linux/compiler.h:549:38: error:
call to '__compiletime_assert_1879' declared with attribute error: 
BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)

Affects all 32 bit builds with vhost enabled in v4.9.y and all more recent
branches. Caused by commit a89db445fbd7f1 ("vhost: block speculation of
translated descriptors").

Guenter


Re: [PATCH 4.19 000/190] 4.19.73-stable review

2019-09-14 Thread Guenter Roeck

On 9/13/19 6:04 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.19.73 release.
There are 190 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Build results:
total: 156 pass: 152 fail: 4
Failed builds:
arm:allmodconfig
i386:allyesconfig
i386:allmodconfig
mips:allmodconfig
Qemu test results:
total: 390 pass: 390 fail: 0

Guenter


Re: [PATCH 5.2 00/37] 5.2.15-stable review

2019-09-14 Thread Guenter Roeck

On 9/13/19 6:07 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 5.2.15 release.
There are 37 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Build results:
total: 159 pass: 155 fail: 4
Failed builds:
arm:allmodconfig
i386:allyesconfig
i386:allmodconfig
mips:allmodconfig
Qemu test results:
total: 390 pass: 390 fail: 0

Guenter


Re: [PATCH RFC] random: getrandom(2): don't block on non-initialized entropy pool

2019-09-14 Thread Alexander E. Patrakov

(resending without HTML this time, sorry for the duplicate)
14.09.2019 17:25, Ahmed S. Darwish пишет:

getrandom() has been created as a new and more secure interface for
pseudorandom data requests.  Unlike /dev/urandom, it unconditionally
blocks until the entropy pool has been properly initialized.

While getrandom() has no guaranteed upper bound for its waiting time,
user-space has been abusing it by issuing the syscall, from shared
libraries no less, during the main system boot sequence.

Thus, on certain setups where there is no hwrng (embedded), or the
hwrng is not trusted by some users (intel RDRAND), or sometimes it's
just broken (amd RDRAND), the system boot can be *reliably* blocked.

The issue is further exaggerated by recent file-system optimizations,
e.g. b03755ad6f33 (ext4: make __ext4_get_inode_loc plug), which
merges directory lookup code inode table IO, and thus minimizes the
number of disk interrupts and entropy during boot. After that commit,
a blocked boot can be reliably reproduced on a Thinkpad E480 laptop
with standard ArchLinux user-space.

Thus, don't trust user-space on calling getrandom() from the right
context. Just never block, and return -EINVAL if entropy is not yet
available.

Link: 
https://lkml.kernel.org/r/CAHk-=wjyH910+JRBdZf_Y9G54c1M=lbf8nkxb6vjcm9xjln...@mail.gmail.com
Link: https://lkml.kernel.org/r/20190912034421.GA2085@darwi-home-pc
Link: https://lkml.kernel.org/r/20190911173624.gi2...@mit.edu
Link: https://lkml.kernel.org/r/20180514003034.gi14...@thunk.org


Let me reword the commit message for a hopefully better historical 
perspective.


===
getrandom() has been created as a new and more secure interface for 
pseudorandom data requests. It attempted to solve two problems, as 
compared to /dev/{u,}random: the need to open a file descriptor (which 
can fail) and possibility to get not-so-random data from the 
incompletely initialized entropy pool. It has succeeded in the first 
improvement, but failed horribly in the second one: it blocks until the 
entropy pool has been properly initialized, if called without 
GRND_NONBLOCK, while none of these behaviors are suitable for the early 
boot stage.


The issue is further exaggerated by recent file-system optimizations, 
e.g. b03755ad6f33 (ext4: make __ext4_get_inode_loc plug), which merges 
directory lookup code inode table IO, and thus minimizes the number of 
disk interrupts and entropy during boot. After that commit, a blocked 
boot can be reliably reproduced on a Thinkpad E480 laptop with standard 
ArchLinux user-space.


Thus, on certain setups where there is no hwrng (embedded systems or 
non-KVM virtual machines), or the hwrng is not trusted by some users 
(intel RDRAND), or sometimes it's just broken (amd RDRAND), the system 
boot can be *reliably* blocked. It can be therefore argued that there is 
no way to use getrandom() on Linux correctly, especially from shared 
libraries: GRND_NONBLOCK has to be used, and a fallback to some other 
interface like /dev/urandom is required, thus making the net result no 
better than just using /dev/urandom unconditionally.


While getrandom() has no guaranteed upper bound for its waiting time, 
user-space has been using it incorrectly by issuing the syscall, from 
shared libraries no less, during the main system boot sequence, without 
GRND_NONBLOCK.


We can't trust user-space on calling getrandom() from the right context. 
Therefore, just never block, and return -EINVAL (with some entropy still 
in the buffer) if the requested amount of entropy is not yet available.


Link: 
https://github.com/openbsd/src/commit/edb2eeb7da8494998d0073f8aaeb8478cee5e00b
Link: 
https://lkml.kernel.org/r/CAHk-=wjyH910+JRBdZf_Y9G54c1M=lbf8nkxb6vjcm9xjln...@mail.gmail.com

Link: https://lkml.kernel.org/r/20190912034421.GA2085@darwi-home-pc
Link: https://lkml.kernel.org/r/20190911173624.gi2...@mit.edu
Link: https://lkml.kernel.org/r/20180514003034.gi14...@thunk.org
===

That said, I have an issue with the -EINVAL return code here: it is also 
returned in cases where the parameters passed are genuinely not 
understood by the kernel, and no entropy has been written to the buffer. 
Therefore, the caller has to assume that the call has failed, waste all 
the bytes in the buffer, and try some fallback strategy. Can we think of 
some other error code?


The other part of me thinks that triggering a fallback, by returning an 
error code, is never the right thing to do. If the "uninitialized" state 
exists at all, applications and libraries have to care (and I would 
expect their authors who don't pass GRND_RANDOM to just fall back to 
/dev/urandom). Therefore, we are back to square one, except that the 
fallback code in the application is something that is only rarely 
exercised, and thus has higher chances to accumulate bugs. Because the 
only expected/reasonable fallback is to read from /dev/urandom, the 
whole result looks like shifting the responsibility/blame without 
achieving anything useful. As

Re: [PATCH net-next v8 2/3] net: phy: add support for clause 37 auto-negotiation

2019-09-14 Thread Andrew Lunn
On Mon, Sep 09, 2019 at 01:49:06PM -0700, Tao Ren wrote:
> From: Heiner Kallweit 
> 
> This patch adds support for clause 37 1000Base-X auto-negotiation.
> 
> Signed-off-by: Heiner Kallweit 
> Signed-off-by: Tao Ren 
> Tested-by: René van Dorst 

Reviewed-by: Andrew Lunn 

Andrew


Re: [PATCH 4.9 00/14] 4.9.193-stable review

2019-09-14 Thread Guenter Roeck

On 9/13/19 6:06 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.193 release.
There are 14 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun 15 Sep 2019 01:03:32 PM UTC.
Anything received after that time might be too late.



Build results:
total: 172 pass: 168 fail: 4
Failed builds:
arm:allmodconfig
i386:allyesconfig
i386:allmodconfig
mips:allmodconfig
Qemu test results:
total: 356 pass: 356 fail: 0

Guenter


Re: [PATCH for 5.3 2/3] rseq: Fix: Unregister rseq for CLONE_SETTLS

2019-09-14 Thread Mathieu Desnoyers
There is an ongoing discussion on the choice of flag we want to care
about here. Therefore, please don't pull this patch until we reach an
agreement.

Thanks,

Mathieu

- On Sep 13, 2019, at 11:12 AM, Mathieu Desnoyers 
mathieu.desnoy...@efficios.com wrote:

> It has been reported by Google that rseq is not behaving properly
> with respect to clone when CLONE_VM is used without CLONE_THREAD.
> It keeps the prior thread's rseq TLS registered when the TLS of the
> thread has moved, so the kernel deals with the wrong TLS.
> 
> The approach of clearing the per task-struct rseq registration
> on clone with CLONE_THREAD flag is incomplete. It does not cover
> the use-case of clone with CLONE_VM set, but without CLONE_THREAD.
> 
> Looking more closely at each of the clone flags:
> 
> - CLONE_THREAD,
> - CLONE_VM,
> - CLONE_SETTLS.
> 
> It appears that the flag we really want to track is CLONE_SETTLS, which
> moves the location of the TLS for the child, making the rseq
> registration point to the wrong TLS.
> 
> Suggested-by: "H . Peter Anvin" 
> Signed-off-by: Mathieu Desnoyers 
> Cc: Thomas Gleixner 
> Cc: Peter Zijlstra (Intel) 
> Cc: "Paul E. McKenney" 
> Cc: Boqun Feng 
> Cc: "H . Peter Anvin" 
> Cc: Paul Turner 
> Cc: Dmitry Vyukov 
> Cc: linux-...@vger.kernel.org
> Cc: 
> ---
> include/linux/sched.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 9f51932bd543..76bf55b5cccf 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1919,11 +1919,11 @@ static inline void rseq_migrate(struct task_struct *t)
> 
> /*
>  * If parent process has a registered restartable sequences area, the
> - * child inherits. Only applies when forking a process, not a thread.
> + * child inherits. Unregister rseq for a clone with CLONE_SETTLS set.
>  */
> static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
> {
> - if (clone_flags & CLONE_THREAD) {
> + if (clone_flags & CLONE_SETTLS) {
>   t->rseq = NULL;
>   t->rseq_sig = 0;
>   t->rseq_event_mask = 0;
> --
> 2.17.1

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


Re: [PATCH v6 00/12] implement KASLR for powerpc/fsl_booke/32

2019-09-14 Thread Scott Wood
On Tue, 2019-09-10 at 13:34 +0800, Jason Yan wrote:
> Hi Scott,
> 
> On 2019/8/28 12:05, Scott Wood wrote:
> > On Fri, 2019-08-09 at 18:07 +0800, Jason Yan wrote:
> > > This series implements KASLR for powerpc/fsl_booke/32, as a security
> > > feature that deters exploit attempts relying on knowledge of the
> > > location
> > > of kernel internals.
> > > 
> > > Since CONFIG_RELOCATABLE has already supported, what we need to do is
> > > map or copy kernel to a proper place and relocate.
> > 
> > Have you tested this with a kernel that was loaded at a non-zero
> > address?  I
> > tried loading a kernel at 0x0400 (by changing the address in the
> > uImage,
> > and setting bootm_low to 0400 in U-Boot), and it works without
> > CONFIG_RANDOMIZE and fails with.
> > 
> 
> How did you change the load address of the uImage, by changing the
> kernel config CONFIG_PHYSICAL_START or the "-a/-e" parameter of mkimage?
> I tried both, but it did not work with or without CONFIG_RANDOMIZE.

With mkimage.  Did you set bootm_low in U-Boot as described above?  Was
CONFIG_RELOCATABLE set in the non-CONFIG_RANDOMIZE kernel?

-Scott




Re: [PATCH 2/2] powerpc/83xx: map IMMR with a BAT.

2019-09-14 Thread Scott Wood
On Fri, 2019-08-23 at 12:50 +, Christophe Leroy wrote:
> On mpc83xx with a QE, IMMR is 2Mbytes.
> On mpc83xx without a QE, IMMR is 1Mbytes.
> Each driver will map a part of it to access the registers it needs.
> Some driver will map the same part of IMMR as other drivers.
> 
> In order to reduce TLB misses, map the full IMMR with a BAT.
> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/platforms/83xx/misc.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/83xx/misc.c
> b/arch/powerpc/platforms/83xx/misc.c
> index f46d7bf3b140..1e395b01c535 100644
> --- a/arch/powerpc/platforms/83xx/misc.c
> +++ b/arch/powerpc/platforms/83xx/misc.c
> @@ -18,6 +18,8 @@
>  #include 
>  #include 
>  
> +#include 
> +
>  #include "mpc83xx.h"
>  
>  static __be32 __iomem *restart_reg_base;
> @@ -145,6 +147,14 @@ void __init mpc83xx_setup_arch(void)
>   if (ppc_md.progress)
>   ppc_md.progress("mpc83xx_setup_arch()", 0);
>  
> + if (!__map_without_bats) {
> + int immrsize = IS_ENABLED(CONFIG_QUICC_ENGINE) ? SZ_2M :
> SZ_1M;

Any reason not to unconditionally make it 2M?  After all, the kernel being
built with CONFIG_QUICC_ENGINE doesn't mean that the hardware you're running
on has it...

-Scott




Re: [RFC v2 1/2] ARM: dts: omap3: Add cpu trips and cooling map for omap3 family

2019-09-14 Thread H. Nikolaus Schaller


> Am 14.09.2019 um 15:42 schrieb Adam Ford :
> 
> On Sat, Sep 14, 2019 at 4:20 AM H. Nikolaus Schaller  
> wrote:
>> 
>> 
>>> Am 13.09.2019 um 17:37 schrieb Adam Ford :
>>> 
>>> The OMAP3530, AM3517 and DM3730 all show thresholds of 90C and 105C
>>> depending on commercial or industrial temperature ratings.  This
>>> patch expands the thermal information to the limits of 90 and 105
>>> for alert and critical.
>>> 
>>> For boards who never use industrial temperatures, these can be
>>> changed on their respective device trees with something like:
>>> 
>>> &cpu_alert0 {
>>>  temperature = <85000>; /* millicelsius */
>>> };
>>> 
>>> &cpu_crit {
>>>  temperature = <9>; /* millicelsius */
>>> };
>>> 
>>> Signed-off-by: Adam Ford 
>>> ---
>>> V2:  Change the CPU reference to &cpu instead of &cpu0
>>> 
>>> diff --git a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi 
>>> b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
>>> index 235ecfd61e2d..dfbd0cb0b00b 100644
>>> --- a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
>>> +++ b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
>>> @@ -17,4 +17,25 @@ cpu_thermal: cpu_thermal {
>>> 
>>>  /* sensor   ID */
>>>  thermal-sensors = <&bandgap 0>;
>>> +
>>> + cpu_trips: trips {
>>> + cpu_alert0: cpu_alert {
>>> + temperature = <9>; /* millicelsius */
>>> + hysteresis = <2000>; /* millicelsius */
>>> + type = "passive";
>>> + };
>>> + cpu_crit: cpu_crit {
>>> + temperature = <105000>; /* millicelsius */
>>> + hysteresis = <2000>; /* millicelsius */
>>> + type = "critical";
>>> + };
>>> + };
>>> +
>>> + cpu_cooling_maps: cooling-maps {
>>> + map0 {
>>> + trip = <&cpu_alert0>;
>>> + cooling-device =
>>> + <&cpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
>>> + };
>>> + };
>>> };
>>> --
>>> 2.17.1
>>> 
>> 
>> Here is my test log (GTA04A5 with DM3730CBP100).
>> "high-load" script is driving the NEON to full power
>> and would report calculation errors.
>> 
>> There is no noise visible in the bandgap sensor data
>> induced by power supply fluctuations (log shows system
>> voltage while charging).
>> 
> 
> Great data!
> 
>> root@letux:~# ./high-load -n2
>> 100% load stress test for 1 cores running ./neon_loop2
>> Sat Sep 14 09:05:50 UTC 2019 65° 4111mV 1000MHz
>> Sat Sep 14 09:05:50 UTC 2019 67° 4005mV 1000MHz
>> Sat Sep 14 09:05:52 UTC 2019 68° 4000mV 1000MHz
>> Sat Sep 14 09:05:53 UTC 2019 68° 4000mV 1000MHz
>> Sat Sep 14 09:05:55 UTC 2019 72° 3976mV 1000MHz
>> Sat Sep 14 09:05:56 UTC 2019 72° 4023mV 1000MHz
>> Sat Sep 14 09:05:57 UTC 2019 72° 3900mV 1000MHz
>> Sat Sep 14 09:05:59 UTC 2019 73° 4029mV 1000MHz
>> Sat Sep 14 09:06:00 UTC 2019 73° 3988mV 1000MHz
>> Sat Sep 14 09:06:01 UTC 2019 73° 4005mV 1000MHz
>> Sat Sep 14 09:06:03 UTC 2019 73° 4011mV 1000MHz
>> Sat Sep 14 09:06:04 UTC 2019 73° 4117mV 1000MHz
>> Sat Sep 14 09:06:06 UTC 2019 73° 4005mV 1000MHz
>> Sat Sep 14 09:06:07 UTC 2019 75° 3994mV 1000MHz
>> Sat Sep 14 09:06:08 UTC 2019 75° 3970mV 1000MHz
>> Sat Sep 14 09:06:09 UTC 2019 75° 4046mV 1000MHz
>> Sat Sep 14 09:06:11 UTC 2019 75° 4005mV 1000MHz
>> Sat Sep 14 09:06:12 UTC 2019 75° 4023mV 1000MHz
>> Sat Sep 14 09:06:14 UTC 2019 75° 3970mV 1000MHz
>> Sat Sep 14 09:06:15 UTC 2019 75° 4011mV 1000MHz
>> Sat Sep 14 09:06:16 UTC 2019 77° 4017mV 1000MHz
>> Sat Sep 14 09:06:18 UTC 2019 77° 3994mV 1000MHz
>> Sat Sep 14 09:06:19 UTC 2019 77° 3994mV 1000MHz
>> Sat Sep 14 09:06:20 UTC 2019 77° 3988mV 1000MHz
>> Sat Sep 14 09:06:22 UTC 2019 77° 4023mV 1000MHz
>> Sat Sep 14 09:06:23 UTC 2019 77° 4023mV 1000MHz
>> Sat Sep 14 09:06:24 UTC 2019 78° 4005mV 1000MHz
>> Sat Sep 14 09:06:26 UTC 2019 78° 4105mV 1000MHz
>> Sat Sep 14 09:06:27 UTC 2019 78° 4011mV 1000MHz
>> Sat Sep 14 09:06:28 UTC 2019 78° 3994mV 1000MHz
>> Sat Sep 14 09:06:30 UTC 2019 78° 4123mV 1000MHz
>> ...
>> Sat Sep 14 09:09:57 UTC 2019 88° 4082mV 1000MHz
>> Sat Sep 14 09:09:59 UTC 2019 88° 4164mV 1000MHz
>> Sat Sep 14 09:10:00 UTC 2019 88° 4058mV 1000MHz
>> Sat Sep 14 09:10:01 UTC 2019 88° 4058mV 1000MHz
>> Sat Sep 14 09:10:03 UTC 2019 88° 4082mV 1000MHz
>> Sat Sep 14 09:10:04 UTC 2019 88° 4058mV 1000MHz
>> Sat Sep 14 09:10:06 UTC 2019 88° 4146mV 1000MHz
>> Sat Sep 14 09:10:07 UTC 2019 88° 4041mV 1000MHz
>> Sat Sep 14 09:10:08 UTC 2019 88° 4035mV 1000MHz
>> Sat Sep 14 09:10:10 UTC 2019 88° 4052mV 1000MHz
>> Sat Sep 14 09:10:11 UTC 2019 88° 4087mV 1000MHz
>> Sat Sep 14 09:10:12 UTC 2019 88° 4152mV 1000MHz
>> Sat Sep 14 09:10:14 UTC 2019 88° 4070mV 1000MHz
>> Sat Sep 14 09:10:15 UTC 2019 88° 4064mV 1000MHz
>> Sat Sep 14 09:10:17 UTC 2019 88° 4170mV 1000MHz
>> Sat Sep 14 09:10:18 UTC 2019 88° 4058mV 1000MHz
>> Sat Sep 14 09:10:19 UTC 2019 88° 4187mV 1000MHz
>> Sat Sep 14 09:10:21 UTC 2019 88° 4093mV 1000MHz
>> Sat Sep 14 09:10:22 UTC 

Re: Linux 5.3-rc8

2019-09-14 Thread Ahmed S. Darwish
On Thu, Sep 12, 2019 at 12:34:45PM +0100, Linus Torvalds wrote:
> On Thu, Sep 12, 2019 at 9:25 AM Theodore Y. Ts'o  wrote:
> >
> > Hmm, one thought might be GRND_FAILSAFE, which will wait up to two
> > minutes before returning "best efforts" randomness and issuing a huge
> > massive warning if it is triggered?
> 
> Yeah, based on (by now) _years_ of experience with people mis-using
> "get me random numbers", I think the sense of a new flag needs to be
> "yeah, I'm willing to wait for it".
>
> Because most people just don't want to wait for it, and most people
> don't think about it, and we need to make the default be for that
> "don't think about it" crowd, with the people who ask for randomness
> sources for a secure key having to very clearly and very explicitly
> say "Yes, I understand that this can take minutes and can only be done
> long after boot".
> 
> Even then people will screw that up because they copy code, or some
> less than gifted rodent writes a library and decides "my library is so
> important that I need that waiting sooper-sekrit-secure random
> number", and then people use that broken library by mistake without
> realizing that it's not going to be reliable at boot time.
> 
> An alternative might be to make getrandom() just return an error
> instead of waiting. Sure, fill the buffer with "as random as we can"
> stuff, but then return -EINVAL because you called us too early.
>

ACK, that's probably _the_ most sensible approach. Only caveat is
the slight change in user-space API semantics though...

For example, this breaks the just released systemd-random-seed(8)
as it _explicitly_ requests blocking behvior from getrandom() here:

=> src/random-seed/random-seed.c:
/*
 * Let's make this whole job asynchronous, i.e. let's make
 * ourselves a barrier for proper initialization of the
 * random pool.
 */
 k = getrandom(buf, buf_size, GRND_NONBLOCK);
 if (k < 0 && errno == EAGAIN && synchronous) {
 log_notice("Kernel entropy pool is not initialized yet, "
"waiting until it is.");

 k = getrandom(buf, buf_size, 0); /* retry synchronously */
 }
 if (k < 0) {
 log_debug_errno(errno, "Failed to read random data with "
 "getrandom(), falling back to "
 "/dev/urandom: %m");
 } else if ((size_t) k < buf_size) {
 log_debug("Short read from getrandom(), falling back to "
   "/dev/urandom: %m");
 } else {
 getrandom_worked = true;
 }

Nonetheless, a slightly broken systemd-random-seed, that was just
released only 11 days ago (v243), is honestly much better than a
*non-booting system*...

I've sent an RFC patch at [1].

To handle the systemd case, I'll add the discussed "yeah, I'm
willing to wait for it" flag (GRND_BLOCK) in v2.

If this whole approach is going to be merged, and the slight ABI
breakage is to be tolerated (hm?), I wonder how will systemd
random-seed handle the semantics change though without doing
ugly kernel version checks..

thanks,

[1] https://lkml.kernel.org/r/20190914122500.GA1425@darwi-home-pc

--
darwi
http://darwish.chasingpointers.com


Re: [PATCH] staging: r8188eu: replace rtw_malloc() with it's definition

2019-09-14 Thread Ivan Safonov

On 9/10/19 2:59 PM, Dan Carpenter wrote:

On Sun, Sep 08, 2019 at 12:00:26PM +0300, Ivan Safonov wrote >> rtw_malloc 
prevents the use of kmemdup/kzalloc and others.


Signed-off-by: Ivan Safonov 
---
  drivers/staging/rtl8188eu/core/rtw_ap.c|  4 ++--
  drivers/staging/rtl8188eu/core/rtw_mlme_ext.c  |  2 +-
  .../staging/rtl8188eu/include/osdep_service.h  |  3 ---
  drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 18 +-
  drivers/staging/rtl8188eu/os_dep/mlme_linux.c  |  2 +-
  .../staging/rtl8188eu/os_dep/osdep_service.c   |  7 +--
  6 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/rtl8188eu/core/rtw_ap.c 
b/drivers/staging/rtl8188eu/core/rtw_ap.c
index 51a5b71f8c25..c9c57379b7a2 100644
--- a/drivers/staging/rtl8188eu/core/rtw_ap.c
+++ b/drivers/staging/rtl8188eu/core/rtw_ap.c
@@ -104,7 +104,7 @@ static void update_BCNTIM(struct adapter *padapter)
}
  
  	if (remainder_ielen > 0) {

-   pbackup_remainder_ie = rtw_malloc(remainder_ielen);
+   pbackup_remainder_ie = kmalloc(remainder_ielen, in_interrupt() 
? GFP_ATOMIC : GFP_KERNEL);

 ^
This stuff isn't right.  It really should be checking if spinlocks are
held or IRQs are disabled.  And the only way to do that is by auditing
the callers.

I hope to make these changes later as separate independent patches.
This patch do only one thing - remove rtw_malloc().



(The original rtw_malloc() implementation is buggy nonsense).

regards,
dan carpenter



Ivan Safonov.


Re: [PATCH] x86_64: new and improved memset()

2019-09-14 Thread Alexey Dobriyan
On Sat, Sep 14, 2019 at 01:37:17PM +0200, Borislav Petkov wrote:
> On Sat, Sep 14, 2019 at 01:33:45PM +0300, Alexey Dobriyan wrote:
> > --- a/arch/x86/include/asm/string_64.h
> > +++ b/arch/x86/include/asm/string_64.h
> > @@ -15,7 +15,111 @@ extern void *memcpy(void *to, const void *from, size_t 
> > len);
> >  extern void *__memcpy(void *to, const void *from, size_t len);
> >  
> >  #define __HAVE_ARCH_MEMSET
> > +#if defined(_ARCH_X86_BOOT) || defined(CONFIG_FORTIFY_SOURCE)
> >  void *memset(void *s, int c, size_t n);
> > +#else
> > +#include 
> > +#include 
> > +
> > +/* Internal, do not use. */
> > +static __always_inline void memset0(void *s, size_t n)
> > +{
> > +   /* Internal, do not use. */
> > +   void _memset0_mov(void);
> > +   void _memset0_rep_stosq(void);
> > +   void memset0_mov(void);
> > +   void memset0_rep_stosq(void);
> > +   void memset0_rep_stosb(void);
> > +
> > +   if (__builtin_constant_p(n) && n == 0) {
> > +   } else if (__builtin_constant_p(n) && n == 1) {
> > +   *(uint8_t *)s = 0;
> > +   } else if (__builtin_constant_p(n) && n == 2) {
> > +   *(uint16_t *)s = 0;
> > +   } else if (__builtin_constant_p(n) && n == 4) {
> > +   *(uint32_t *)s = 0;
> > +   } else if (__builtin_constant_p(n) && n == 6) {
> > +   *(uint32_t *)s = 0;
> > +   *(uint16_t *)(s + 4) = 0;
> > +   } else if (__builtin_constant_p(n) && n == 8) {
> > +   *(uint64_t *)s = 0;
> > +   } else if (__builtin_constant_p(n) && (n & 7) == 0) {
> > +   alternative_call_2(
> > +   _memset0_mov,
> > +   _memset0_rep_stosq, X86_FEATURE_REP_GOOD,
> > +   memset0_rep_stosb, X86_FEATURE_ERMS,
> > +   ASM_OUTPUT2("=D" (s), "=c" (n)),
> > +   "D" (s), "c" (n)
> > +   : "rax", "cc", "memory"
> > +   );
> > +   } else {
> > +   alternative_call_2(
> > +   memset0_mov,
> > +   memset0_rep_stosq, X86_FEATURE_REP_GOOD,
> > +   memset0_rep_stosb, X86_FEATURE_ERMS,
> > +   ASM_OUTPUT2("=D" (s), "=c" (n)),
> > +   "D" (s), "c" (n)
> > +   : "rax", "rsi", "cc", "memory"
> > +   );
> > +   }
> > +}
> > +
> > +/* Internal, do not use. */
> > +static __always_inline void memsetx(void *s, int c, size_t n)
> > +{
> > +   /* Internal, do not use. */
> > +   void _memsetx_mov(void);
> > +   void _memsetx_rep_stosq(void);
> > +   void memsetx_mov(void);
> > +   void memsetx_rep_stosq(void);
> > +   void memsetx_rep_stosb(void);
> > +
> > +   const uint64_t ccc = (uint8_t)c * 0x0101010101010101ULL;
> > +
> > +   if (__builtin_constant_p(n) && n == 0) {
> > +   } else if (__builtin_constant_p(n) && n == 1) {
> > +   *(uint8_t *)s = ccc;
> > +   } else if (__builtin_constant_p(n) && n == 2) {
> > +   *(uint16_t *)s = ccc;
> > +   } else if (__builtin_constant_p(n) && n == 4) {
> > +   *(uint32_t *)s = ccc;
> > +   } else if (__builtin_constant_p(n) && n == 8) {
> > +   *(uint64_t *)s = ccc;
> > +   } else if (__builtin_constant_p(n) && (n & 7) == 0) {
> > +   alternative_call_2(
> > +   _memsetx_mov,
> > +   _memsetx_rep_stosq, X86_FEATURE_REP_GOOD,
> > +   memsetx_rep_stosb, X86_FEATURE_ERMS,
> > +   ASM_OUTPUT2("=D" (s), "=c" (n)),
> > +   "D" (s), "c" (n), "a" (ccc)
> > +   : "cc", "memory"
> > +   );
> > +   } else {
> > +   alternative_call_2(
> > +   memsetx_mov,
> > +   memsetx_rep_stosq, X86_FEATURE_REP_GOOD,
> > +   memsetx_rep_stosb, X86_FEATURE_ERMS,
> > +   ASM_OUTPUT2("=D" (s), "=c" (n)),
> > +   "D" (s), "c" (n), "a" (ccc)
> > +   : "rsi", "cc", "memory"
> > +   );
> > +   }
> > +}
> > +
> > +static __always_inline void *memset(void *s, int c, size_t n)
> > +{
> > +   if (__builtin_constant_p(c)) {
> > +   if (c == 0) {
> > +   memset0(s, n);
> > +   } else {
> > +   memsetx(s, c, n);
> > +   }
> > +   return s;
> > +   } else {
> > +   return __builtin_memset(s, c, n);
> > +   }
> > +}
> 
> I'm willing to take something like that only when such complexity is
> justified by numbers. I.e., I'm much more inclined to capping it under
> 32 and 64 byte sizes and keeping it simple.

OK. Those small lengths were indeed annoying.

> > +ENTRY(_memset0_mov)
> > +   xor eax, eax
> > +.globl _memsetx_mov
> > +_memsetx_mov:
> > +   add rcx, rdi
> > +   cmp rdi, rcx
> > +   je  1f
> > +2:
> > +   mov [rdi], rax
> > +   add rdi, 8
> > +   cmp rdi, rcx
> > +   jne 2b
> > +1:
> > +   ret
> > +ENDPROC(_memset0_mov)
> > +ENDPROC(_memsetx_mov)
> > +EXPORT_SYMBOL(_memset0_mov)
> > +EXPORT_SYMBOL(_memsetx_mov)
> > +
> > +ENTRY(mems

Re: [PATCH v2] net: mdio: switch to using gpiod_get_optional()

2019-09-14 Thread Andrew Lunn
On Fri, Sep 13, 2019 at 03:55:47PM -0700, Dmitry Torokhov wrote:
> The MDIO device reset line is optional and now that gpiod_get_optional()
> returns proper value when GPIO support is compiled out, there is no
> reason to use fwnode_get_named_gpiod() that I plan to hide away.
> 
> Let's switch to using more standard gpiod_get_optional() and
> gpiod_set_consumer_name() to keep the nice "PHY reset" label.
> 
> Also there is no reason to only try to fetch the reset GPIO when we have
> OF node, gpiolib can fetch GPIO data from firmwares as well.
> 
> Signed-off-by: Dmitry Torokhov 

Reviewed-by: Andrew Lunn 

Andrew


Re: [PATCH net-next] net: dsa: b53: Add support for port_egress_floods callback

2019-09-14 Thread Andrew Lunn
On Thu, Sep 12, 2019 at 08:28:39PM -0700, Florian Fainelli wrote:
> Add support for configuring the per-port egress flooding control for
> both Unicast and Multicast traffic.
> 
> Signed-off-by: Florian Fainelli 
> ---
> Beneditk,
> 
> Do you mind re-testing, or confirming that this patch that I sent much
> earlier does work correctly for you? Thanks!
> 
>  drivers/net/dsa/b53/b53_common.c | 33 
>  drivers/net/dsa/b53/b53_priv.h   |  2 ++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/drivers/net/dsa/b53/b53_common.c 
> b/drivers/net/dsa/b53/b53_common.c
> index 7d328a5f0161..ac2ec08a652b 100644
> --- a/drivers/net/dsa/b53/b53_common.c
> +++ b/drivers/net/dsa/b53/b53_common.c
> @@ -342,6 +342,13 @@ static void b53_set_forwarding(struct b53_device *dev, 
> int enable)
>   b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt);
>   mgmt |= B53_MII_DUMB_FWDG_EN;
>   b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt);
> +
> + /* Look at B53_UC_FWD_EN and B53_MC_FWD_EN to decide whether
> +  * frames should be flooed or not.

Hi Florian

s/flooed/flooded 

Reviewed-by: Andrew Lunn 

Andrew


[PATCH] staging: bcm2835-audio: Fix draining behavior regression

2019-09-14 Thread Takashi Iwai
The PCM draining behavior got broken since the recent refactoring, and
this turned out to be the incorrect expectation of the firmware
behavior regarding "draining".  While I expected the "drain" flag at
the stop operation would do processing the queued samples, it seems
rather dropping the samples.

As a quick fix, just drop the SNDRV_PCM_INFO_DRAIN_TRIGGER flag, so
that the driver uses the normal PCM draining procedure.  Also, put
some caution comment to the function for future readers not to fall
into the same pitfall.

Fixes: d7ca3a71545b ("staging: bcm2835-audio: Operate non-atomic PCM ops")
BugLink: https://github.com/raspberrypi/linux/issues/2983
Cc: sta...@vger.kernel.org
Signed-off-by: Takashi Iwai 
---
 drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c   | 4 ++--
 drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c 
b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
index bc1eaa3a0773..826016c3431a 100644
--- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
+++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
@@ -12,7 +12,7 @@
 static const struct snd_pcm_hardware snd_bcm2835_playback_hw = {
.info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
 SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
-SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR),
+SNDRV_PCM_INFO_SYNC_APPLPTR),
.formats = SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE,
.rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000,
.rate_min = 8000,
@@ -29,7 +29,7 @@ static const struct snd_pcm_hardware snd_bcm2835_playback_hw 
= {
 static const struct snd_pcm_hardware snd_bcm2835_playback_spdif_hw = {
.info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
 SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
-SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR),
+SNDRV_PCM_INFO_SYNC_APPLPTR),
.formats = SNDRV_PCM_FMTBIT_S16_LE,
.rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_44100 |
SNDRV_PCM_RATE_48000,
diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c 
b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
index 23fba01107b9..c6f9cf1913d2 100644
--- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
+++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
@@ -289,6 +289,7 @@ int bcm2835_audio_stop(struct bcm2835_alsa_stream 
*alsa_stream)
 VC_AUDIO_MSG_TYPE_STOP, false);
 }
 
+/* FIXME: this doesn't seem working as expected for "draining" */
 int bcm2835_audio_drain(struct bcm2835_alsa_stream *alsa_stream)
 {
struct vc_audio_msg m = {
-- 
2.16.4



Re: [PATCH 0/6] ARM, arm64: Remove arm_pm_restart()

2019-09-14 Thread Guenter Roeck
On Mon, Jan 30, 2017 at 12:05:06PM +0100, Thierry Reding wrote:
> From: Thierry Reding 
> 
> Hi everyone,
> 
> This small series is preparatory work for a series that I'm working on
> which attempts to establish a formal framework for system restart and
> power off.
> 
> Guenter has done a lot of good work in this area, but it never got
> merged. I think this set is a valuable addition to the kernel because
> it converts all odd providers to the established mechanism for restart.
> 
> Since this is stretched across both 32-bit and 64-bit ARM, as well as
> PSCI, and given the SoC/board level of functionality, I think it might
> make sense to take this through the ARM SoC tree in order to simplify
> the interdependencies. But it should also be possible to take patches
> 1-4 via their respective trees this cycle and patches 5-6 through the
> ARM and arm64 trees for the next cycle, if that's preferred.
> 

We tried this twice now, and it seems to go nowhere. What does it take
to get it applied ?

Guenter

> Thanks,
> Thierry
> 
> Guenter Roeck (6):
>   ARM: prima2: Register with kernel restart handler
>   ARM: xen: Register with kernel restart handler
>   drivers: firmware: psci: Register with kernel restart handler
>   ARM: Register with kernel restart handler
>   ARM64: Remove arm_pm_restart()
>   ARM: Remove arm_pm_restart()
> 
>  arch/arm/include/asm/system_misc.h   |  1 -
>  arch/arm/kernel/reboot.c |  6 +-
>  arch/arm/kernel/setup.c  | 20 ++--
>  arch/arm/mach-prima2/rstc.c  | 11 +--
>  arch/arm/xen/enlighten.c | 13 +++--
>  arch/arm64/include/asm/system_misc.h |  2 --
>  arch/arm64/kernel/process.c  |  7 +--
>  drivers/firmware/psci.c  | 11 +--
>  8 files changed, 49 insertions(+), 22 deletions(-)
> 
> -- 
> 2.11.0


Re: [PATCH v4 1/2] ethtool: implement Energy Detect Powerdown support via phy-tunable

2019-09-14 Thread Andrew Lunn
On Thu, Sep 12, 2019 at 07:28:11PM +0300, Alexandru Ardelean wrote:
> The `phy_tunable_id` has been named `ETHTOOL_PHY_EDPD` since it looks like
> this feature is common across other PHYs (like EEE), and defining
> `ETHTOOL_PHY_ENERGY_DETECT_POWER_DOWN` seems too long.
> 
> The way EDPD works, is that the RX block is put to a lower power mode,
> except for link-pulse detection circuits. The TX block is also put to low
> power mode, but the PHY wakes-up periodically to send link pulses, to avoid
> lock-ups in case the other side is also in EDPD mode.
> 
> Currently, there are 2 PHY drivers that look like they could use this new
> PHY tunable feature: the `adin` && `micrel` PHYs.
> 
> The ADIN's datasheet mentions that TX pulses are at intervals of 1 second
> default each, and they can be disabled. For the Micrel KSZ9031 PHY, the
> datasheet does not mention whether they can be disabled, but mentions that
> they can modified.
> 
> The way this change is structured, is similar to the PHY tunable downshift
> control:
> * a `ETHTOOL_PHY_EDPD_DFLT_TX_MSECS` value is exposed to cover a default
>   TX interval; some PHYs could specify a certain value that makes sense
> * `ETHTOOL_PHY_EDPD_NO_TX` would disable TX when EDPD is enabled
> * `ETHTOOL_PHY_EDPD_DISABLE` will disable EDPD
> 
> As noted by the `ETHTOOL_PHY_EDPD_DFLT_TX_MSECS` the interval unit is 1
> millisecond, which should cover a reasonable range of intervals:
>  - from 1 millisecond, which does not sound like much of a power-saver
>  - to ~65 seconds which is quite a lot to wait for a link to come up when
>plugging a cable
> 
> Signed-off-by: Alexandru Ardelean 

Reviewed-by: Andrew Lunn 

Andrew


Re: [PATCH v4 2/2] net: phy: adin: implement Energy Detect Powerdown mode via phy-tunable

2019-09-14 Thread Andrew Lunn
On Thu, Sep 12, 2019 at 07:28:12PM +0300, Alexandru Ardelean wrote:

> +static int adin_set_edpd(struct phy_device *phydev, u16 tx_interval)
> +{
> + u16 val;
> +
> + if (tx_interval == ETHTOOL_PHY_EDPD_DISABLE)
> + return phy_clear_bits(phydev, ADIN1300_PHY_CTRL_STATUS2,
> + (ADIN1300_NRG_PD_EN | ADIN1300_NRG_PD_TX_EN));
> +
> + val = ADIN1300_NRG_PD_EN;
> +
> + switch (tx_interval) {
> + case 1000: /* 1 second */
> + /* fallthrough */
> + case ETHTOOL_PHY_EDPD_DFLT_TX_MSECS:
> + val |= ADIN1300_NRG_PD_TX_EN;
> + /* fallthrough */
> + case ETHTOOL_PHY_EDPD_NO_TX:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + return phy_modify(phydev, ADIN1300_PHY_CTRL_STATUS2,
> +   (ADIN1300_NRG_PD_EN | ADIN1300_NRG_PD_TX_EN),
> +   val);
> +}
> +

>  
> + rc = adin_set_edpd(phydev, 1);
> + if (rc < 0)
> + return rc;

Hi Alexandru

Shouldn't this be adin_set_edpd(phydev, 1000);

Andrew


Re: [PATCH v22 00/24] Intel SGX foundations

2019-09-14 Thread Dave Hansen
On 9/14/19 6:41 AM, Jarkko Sakkinen wrote:
> 
> The proposed LSM hooks give the granularity to make yes/no decision
> based on the
> 
> * The origin of the source of the source for the enclave.
> * The requested permissions for the added or mapped peage.
> 
> The hooks to do these checks are provided for mmap() and EADD
> operations.
> 
> With just file permissions you can still limit mmap() by having a
> privileged process to build the enclaves and pass the file descriptor
> to the enclave user who can mmap() the enclave within the constraints
> set by the enclave pages (their permissions refine the roof that you
> can mmap() any memory range within an enclave).

The LSM hooks are presumably fixing a problem that these patches
introduce.  What's that problem?


Re: [RFC v1 04/14] krsi: Add support in libbpf for BPF_PROG_TYPE_KRSI

2019-09-14 Thread Yonghong Song


On 9/10/19 12:55 PM, KP Singh wrote:
> From: KP Singh 
> 
> Update the libbpf library with functionality to load and
> attach a program type BPF_PROG_TYPE_KRSI.
> 
> Since the bpf_prog_load does not allow the specification of an
> expected attach type, it's recommended to use bpf_prog_load_xattr and
> set the expected attach type as KRSI.
> 
> Signed-off-by: KP Singh 
> ---
>   tools/lib/bpf/libbpf.c| 4 
>   tools/lib/bpf/libbpf.h| 2 ++
>   tools/lib/bpf/libbpf.map  | 2 ++
>   tools/lib/bpf/libbpf_probes.c | 1 +
>   4 files changed, 9 insertions(+)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 2b57d7ea7836..3cc86bbc68cd 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -2676,6 +2676,7 @@ static bool bpf_prog_type__needs_kver(enum 
> bpf_prog_type type)
>   case BPF_PROG_TYPE_PERF_EVENT:
>   case BPF_PROG_TYPE_CGROUP_SYSCTL:
>   case BPF_PROG_TYPE_CGROUP_SOCKOPT:
> + case BPF_PROG_TYPE_KRSI:
>   return false;
>   case BPF_PROG_TYPE_KPROBE:
>   default:
> @@ -3536,6 +3537,7 @@ bool bpf_program__is_##NAME(const struct bpf_program 
> *prog) \
>   }   \
>   
>   BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
> +BPF_PROG_TYPE_FNS(krsi, BPF_PROG_TYPE_KRSI);
>   BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
>   BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
>   BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
> @@ -3590,6 +3592,8 @@ static const struct {
>   BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
>   BPF_PROG_SEC("lwt_xmit",BPF_PROG_TYPE_LWT_XMIT),
>   BPF_PROG_SEC("lwt_seg6local",   BPF_PROG_TYPE_LWT_SEG6LOCAL),
> + BPF_APROG_SEC("krsi",   BPF_PROG_TYPE_KRSI,
> + BPF_KRSI),
>   BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB,
>   BPF_CGROUP_INET_INGRESS),
>   BPF_APROG_SEC("cgroup_skb/egress",  BPF_PROG_TYPE_CGROUP_SKB,
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 5cbf459ece0b..8781d29b4035 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -261,6 +261,7 @@ LIBBPF_API int bpf_program__set_sched_cls(struct 
> bpf_program *prog);
>   LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
>   LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
>   LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
> +LIBBPF_API int bpf_program__set_krsi(struct bpf_program *prog);
>   LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
> enum bpf_prog_type type);
>   LIBBPF_API void
> @@ -275,6 +276,7 @@ LIBBPF_API bool bpf_program__is_sched_cls(const struct 
> bpf_program *prog);
>   LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
>   LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
>   LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
> +LIBBPF_API bool bpf_program__is_krsi(const struct bpf_program *prog);
>   
>   /*
>* No need for __attribute__((packed)), all members of 'bpf_map_def'
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index f9d316e873d8..75b8fe419c11 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -68,6 +68,7 @@ LIBBPF_0.0.1 {
>   bpf_prog_test_run_xattr;
>   bpf_program__fd;
>   bpf_program__is_kprobe;
> + bpf_program__is_krsi;
>   bpf_program__is_perf_event;
>   bpf_program__is_raw_tracepoint;
>   bpf_program__is_sched_act;
> @@ -85,6 +86,7 @@ LIBBPF_0.0.1 {
>   bpf_program__set_expected_attach_type;
>   bpf_program__set_ifindex;
>   bpf_program__set_kprobe;
> + bpf_program__set_krsi;
>   bpf_program__set_perf_event;
>   bpf_program__set_prep;
>   bpf_program__set_priv;

Please put the above two new API functions in version LIBBPF_0.0.5.

> diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
> index ace1a0708d99..cc515a36794d 100644
> --- a/tools/lib/bpf/libbpf_probes.c
> +++ b/tools/lib/bpf/libbpf_probes.c
> @@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct 
> bpf_insn *insns,
>   case BPF_PROG_TYPE_FLOW_DISSECTOR:
>   case BPF_PROG_TYPE_CGROUP_SYSCTL:
>   case BPF_PROG_TYPE_CGROUP_SOCKOPT:
> + case BPF_PROG_TYPE_KRSI:
>   default:
>   break;
>   }
> 


Re: [RFC v2 1/2] ARM: dts: omap3: Add cpu trips and cooling map for omap3 family

2019-09-14 Thread Adam Ford
On Sat, Sep 14, 2019 at 9:38 AM H. Nikolaus Schaller  wrote:
>
>
> > Am 14.09.2019 um 15:42 schrieb Adam Ford :
> >
> > On Sat, Sep 14, 2019 at 4:20 AM H. Nikolaus Schaller  
> > wrote:
> >>
> >>
> >>> Am 13.09.2019 um 17:37 schrieb Adam Ford :
> >>>
> >>> The OMAP3530, AM3517 and DM3730 all show thresholds of 90C and 105C
> >>> depending on commercial or industrial temperature ratings.  This
> >>> patch expands the thermal information to the limits of 90 and 105
> >>> for alert and critical.
> >>>
> >>> For boards who never use industrial temperatures, these can be
> >>> changed on their respective device trees with something like:
> >>>
> >>> &cpu_alert0 {
> >>>  temperature = <85000>; /* millicelsius */
> >>> };
> >>>
> >>> &cpu_crit {
> >>>  temperature = <9>; /* millicelsius */
> >>> };
> >>>
> >>> Signed-off-by: Adam Ford 
> >>> ---
> >>> V2:  Change the CPU reference to &cpu instead of &cpu0
> >>>
> >>> diff --git a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi 
> >>> b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> >>> index 235ecfd61e2d..dfbd0cb0b00b 100644
> >>> --- a/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> >>> +++ b/arch/arm/boot/dts/omap3-cpu-thermal.dtsi
> >>> @@ -17,4 +17,25 @@ cpu_thermal: cpu_thermal {
> >>>
> >>>  /* sensor   ID */
> >>>  thermal-sensors = <&bandgap 0>;
> >>> +
> >>> + cpu_trips: trips {
> >>> + cpu_alert0: cpu_alert {
> >>> + temperature = <9>; /* millicelsius */
> >>> + hysteresis = <2000>; /* millicelsius */
> >>> + type = "passive";
> >>> + };
> >>> + cpu_crit: cpu_crit {
> >>> + temperature = <105000>; /* millicelsius */
> >>> + hysteresis = <2000>; /* millicelsius */
> >>> + type = "critical";
> >>> + };
> >>> + };
> >>> +
> >>> + cpu_cooling_maps: cooling-maps {
> >>> + map0 {
> >>> + trip = <&cpu_alert0>;
> >>> + cooling-device =
> >>> + <&cpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> >>> + };
> >>> + };
> >>> };
> >>> --
> >>> 2.17.1
> >>>
> >>
> >> Here is my test log (GTA04A5 with DM3730CBP100).
> >> "high-load" script is driving the NEON to full power
> >> and would report calculation errors.
> >>
> >> There is no noise visible in the bandgap sensor data
> >> induced by power supply fluctuations (log shows system
> >> voltage while charging).
> >>
> >
> > Great data!
> >
> >> root@letux:~# ./high-load -n2
> >> 100% load stress test for 1 cores running ./neon_loop2
> >> Sat Sep 14 09:05:50 UTC 2019 65° 4111mV 1000MHz
> >> Sat Sep 14 09:05:50 UTC 2019 67° 4005mV 1000MHz
> >> Sat Sep 14 09:05:52 UTC 2019 68° 4000mV 1000MHz
> >> Sat Sep 14 09:05:53 UTC 2019 68° 4000mV 1000MHz
> >> Sat Sep 14 09:05:55 UTC 2019 72° 3976mV 1000MHz
> >> Sat Sep 14 09:05:56 UTC 2019 72° 4023mV 1000MHz
> >> Sat Sep 14 09:05:57 UTC 2019 72° 3900mV 1000MHz
> >> Sat Sep 14 09:05:59 UTC 2019 73° 4029mV 1000MHz
> >> Sat Sep 14 09:06:00 UTC 2019 73° 3988mV 1000MHz
> >> Sat Sep 14 09:06:01 UTC 2019 73° 4005mV 1000MHz
> >> Sat Sep 14 09:06:03 UTC 2019 73° 4011mV 1000MHz
> >> Sat Sep 14 09:06:04 UTC 2019 73° 4117mV 1000MHz
> >> Sat Sep 14 09:06:06 UTC 2019 73° 4005mV 1000MHz
> >> Sat Sep 14 09:06:07 UTC 2019 75° 3994mV 1000MHz
> >> Sat Sep 14 09:06:08 UTC 2019 75° 3970mV 1000MHz
> >> Sat Sep 14 09:06:09 UTC 2019 75° 4046mV 1000MHz
> >> Sat Sep 14 09:06:11 UTC 2019 75° 4005mV 1000MHz
> >> Sat Sep 14 09:06:12 UTC 2019 75° 4023mV 1000MHz
> >> Sat Sep 14 09:06:14 UTC 2019 75° 3970mV 1000MHz
> >> Sat Sep 14 09:06:15 UTC 2019 75° 4011mV 1000MHz
> >> Sat Sep 14 09:06:16 UTC 2019 77° 4017mV 1000MHz
> >> Sat Sep 14 09:06:18 UTC 2019 77° 3994mV 1000MHz
> >> Sat Sep 14 09:06:19 UTC 2019 77° 3994mV 1000MHz
> >> Sat Sep 14 09:06:20 UTC 2019 77° 3988mV 1000MHz
> >> Sat Sep 14 09:06:22 UTC 2019 77° 4023mV 1000MHz
> >> Sat Sep 14 09:06:23 UTC 2019 77° 4023mV 1000MHz
> >> Sat Sep 14 09:06:24 UTC 2019 78° 4005mV 1000MHz
> >> Sat Sep 14 09:06:26 UTC 2019 78° 4105mV 1000MHz
> >> Sat Sep 14 09:06:27 UTC 2019 78° 4011mV 1000MHz
> >> Sat Sep 14 09:06:28 UTC 2019 78° 3994mV 1000MHz
> >> Sat Sep 14 09:06:30 UTC 2019 78° 4123mV 1000MHz
> >> ...
> >> Sat Sep 14 09:09:57 UTC 2019 88° 4082mV 1000MHz
> >> Sat Sep 14 09:09:59 UTC 2019 88° 4164mV 1000MHz
> >> Sat Sep 14 09:10:00 UTC 2019 88° 4058mV 1000MHz
> >> Sat Sep 14 09:10:01 UTC 2019 88° 4058mV 1000MHz
> >> Sat Sep 14 09:10:03 UTC 2019 88° 4082mV 1000MHz
> >> Sat Sep 14 09:10:04 UTC 2019 88° 4058mV 1000MHz
> >> Sat Sep 14 09:10:06 UTC 2019 88° 4146mV 1000MHz
> >> Sat Sep 14 09:10:07 UTC 2019 88° 4041mV 1000MHz
> >> Sat Sep 14 09:10:08 UTC 2019 88° 4035mV 1000MHz
> >> Sat Sep 14 09:10:10 UTC 2019 88° 4052mV 1000MHz
> >> Sat Sep 14 09:10:11 UTC 2019 88° 4087mV 1000MHz
> >> Sat Sep 14 09:10:12 UTC 2019 88° 4152mV 1000MHz
> >> Sat Sep 14 09:10:14 UTC 2019 88° 4070mV 1

Re: [RFC v1 05/14] krsi: Initialize KRSI hooks and create files in securityfs

2019-09-14 Thread Yonghong Song


On 9/10/19 12:55 PM, KP Singh wrote:
> From: KP Singh 
> 
> The LSM creates files in securityfs for each hook registered with the
> LSM.
> 
>  /sys/kernel/security/bpf/
> 
> The initialization of the hooks is done collectively in an internal
> header "hooks.h" which results in:
> 
> * Creation of a file for the hook in the securityfs.
> * Allocation of a krsi_hook data structure which stores a pointer to the
>dentry of the newly created file in securityfs.
> * A pointer to the krsi_hook data structure is stored in the private
>d_fsdata of dentry of the file created in securityFS.
> 
> These files will later be used to specify an attachment target during
> BPF_PROG_LOAD.
> 
> Signed-off-by: KP Singh 
> ---
>   security/krsi/Makefile|  4 +-
>   security/krsi/include/hooks.h | 21 
>   security/krsi/include/krsi_fs.h   | 19 +++
>   security/krsi/include/krsi_init.h | 45 
>   security/krsi/krsi.c  | 16 +-
>   security/krsi/krsi_fs.c   | 88 +++
>   6 files changed, 191 insertions(+), 2 deletions(-)
>   create mode 100644 security/krsi/include/hooks.h
>   create mode 100644 security/krsi/include/krsi_fs.h
>   create mode 100644 security/krsi/include/krsi_init.h
>   create mode 100644 security/krsi/krsi_fs.c
> 
> diff --git a/security/krsi/Makefile b/security/krsi/Makefile
> index 660cc1f422fd..4586241f16e1 100644
> --- a/security/krsi/Makefile
> +++ b/security/krsi/Makefile
> @@ -1 +1,3 @@
> -obj-$(CONFIG_SECURITY_KRSI) := krsi.o ops.o
> +obj-$(CONFIG_SECURITY_KRSI) := krsi.o krsi_fs.o ops.o
> +
> +ccflags-y := -I$(srctree)/security/krsi -I$(srctree)/security/krsi/include
> diff --git a/security/krsi/include/hooks.h b/security/krsi/include/hooks.h
> new file mode 100644
> index ..e070c452b5de
> --- /dev/null
> +++ b/security/krsi/include/hooks.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * The hooks for the KRSI LSM are declared in this file.
> + *
> + * This header MUST NOT be included directly and should
> + * be only used to initialize the hooks lists.
> + *
> + * Format:
> + *
> + *   KRSI_HOOK_INIT(TYPE, NAME, LSM_HOOK, KRSI_HOOK_FN)
> + *
> + * KRSI adds one layer of indirection between the name of the hook and the 
> name
> + * it exposes to the userspace in Security FS to prevent the userspace from
> + * breaking in case the name of the hook changes in the kernel or if there's
> + * another LSM hook that maps better to the represented security behaviour.
> + */
> +KRSI_HOOK_INIT(PROCESS_EXECUTION,
> +process_execution,
> +bprm_check_security,
> +krsi_process_execution)
> diff --git a/security/krsi/include/krsi_fs.h b/security/krsi/include/krsi_fs.h
> new file mode 100644
> index ..38134661d8d6
> --- /dev/null
> +++ b/security/krsi/include/krsi_fs.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _KRSI_FS_H
> +#define _KRSI_FS_H
> +
> +#include 
> +#include 
> +#include 
> +
> +bool is_krsi_hook_file(struct file *f);
> +
> +/*
> + * The name of the directory created in securityfs
> + *
> + *   /sys/kernel/security/
> + */
> +#define KRSI_SFS_NAME "krsi"
> +
> +#endif /* _KRSI_FS_H */
> diff --git a/security/krsi/include/krsi_init.h 
> b/security/krsi/include/krsi_init.h
> new file mode 100644
> index ..68755182a031
> --- /dev/null
> +++ b/security/krsi/include/krsi_init.h
> @@ -0,0 +1,45 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _KRSI_INIT_H
> +#define _KRSI_INIT_H
> +
> +#include "krsi_fs.h"
> +
> +enum krsi_hook_type {
> + PROCESS_EXECUTION,
> + __MAX_KRSI_HOOK_TYPE, /* delimiter */
> +};
> +
> +extern int krsi_fs_initialized;
> +/*
> + * The LSM creates one file per hook.
> + *
> + * A pointer to krsi_hook data structure is stored in the
> + * private fsdata of the dentry of the per-hook file created
> + * in securityfs.
> + */
> +struct krsi_hook {
> + /*
> +  * The name of the security hook, a file with this name will be created
> +  * in the securityfs.
> +  */
> + const char *name;
> + /*
> +  * The type of the LSM hook, the LSM uses this to index the list of the
> +  * hooks to run the eBPF programs that may have been attached.
> +  */
> + enum krsi_hook_type h_type;
> + /*
> +  * The dentry of the file created in securityfs.
> +  */
> + struct dentry *h_dentry;
> +};
> +
> +extern struct krsi_hook krsi_hooks_list[];
> +
> +#define krsi_for_each_hook(hook) \
> + for ((hook) = &krsi_hooks_list[0]; \
> +  (hook) < &krsi_hooks_list[__MAX_KRSI_HOOK_TYPE]; \
> +  (hook)++)
> +
> +#endif /* _KRSI_INIT_H */
> diff --git a/security/krsi/krsi.c b/security/krsi/krsi.c
> index 9ce4f56fb78d..77d7e2f91172 100644
> --- a/security/krsi/krsi.c
> +++ b/security/krsi/krsi.c
> @@ -2,13 +2,27 @@
>   
>   #include 
>   
> +#include "krsi_init.h"
> +
> +struct krsi_hook krsi_hooks_l

Re: Linux 5.3-rc8

2019-09-14 Thread Theodore Y. Ts'o
On Sat, Sep 14, 2019 at 11:25:09AM +0200, Ahmed S. Darwish wrote:
> Unfortunately, it only made the early fast init faster, but didn't fix
> the normal crng init blockage :-(

Yeah, I see why; the original goal was to do the fast init so that
using /dev/urandom, even before we were fully initialized, wouldn't be
deadly.  But then we still wanted 128 bits of estimated entropy the
old fashioned way before we declare the CRNG initialized.

There are a bunch of things that I think I want to do long-term, such
as make CONFIG_RANDOM_TRUST_CPU the default, trying to get random
entropy from the bootloader, etc.  But none of this is something we
should do in a hurry, especially this close before 5.4 drops.  So I
think I want to fix things this way, which is a bit a of a hack, but I
think it's better than simply reverting commit b03755ad6f33.

Ahmed, Linus, what do you think?

- Ted

>From f1a111bff3b996258410e51a3760fc39bbd7058f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o 
Date: Sat, 14 Sep 2019 12:21:39 -0400
Subject: [PATCH] ext4: don't plug in __ext4_get_inode_loc if the CRNG is not
 initialized

Unfortuantely commit b03755ad6f33 ("ext4: make __ext4_get_inode_loc
plug") is so effective that on some systems, where RDRAND is not
trusted, and the GNOME display manager is using getrandom(2) to get
randomness for MIT Magic Cookie (which isn't really secure so using
getrandom(2) is a bit of waste) in early boot on an Arch system is
causing the boot to hang.

Since this is causing problems, although arguably this is userspace's
fault, let's not do it if the CRNG is not yet initialized.  This is
better than trying to tweak the random number generator right before
5.4 is released (I'm afraid we'll accidentally make it _too_ weak),
and it's also better than simply completely reverting b03755ad6f33.

We're effectively reverting it while the RNG is not yet initialized,
to slow down the boot and make it less efficient, just to work around
broken init setups.

Fixes: b03755ad6f33 ("ext4: make __ext4_get_inode_loc plug")
Signed-off-by: Theodore Ts'o 
---
 fs/ext4/inode.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4e271b509af1..41ad93f11b6d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4534,6 +4534,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct buffer_head  *bh;
struct super_block  *sb = inode->i_sb;
ext4_fsblk_tblock;
+   int be_inefficient = !rng_is_initialized();
struct blk_plug plug;
int inodes_per_block, inode_offset;
 
@@ -4541,7 +4542,6 @@ static int __ext4_get_inode_loc(struct inode *inode,
if (inode->i_ino < EXT4_ROOT_INO ||
inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
return -EFSCORRUPTED;
-
iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
if (!gdp)
@@ -4623,7 +4623,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
 * If we need to do any I/O, try to pre-readahead extra
 * blocks from the inode table.
 */
-   blk_start_plug(&plug);
+   if (likely(!be_inefficient))
+   blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
@@ -4654,7 +4655,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
-   blk_finish_plug(&plug);
+   if (likely(!be_inefficient))
+   blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
-- 
2.23.0



Re: Linux 5.3-rc8

2019-09-14 Thread Linus Torvalds
On Sat, Sep 14, 2019 at 8:02 AM Ahmed S. Darwish  wrote:
>
> On Thu, Sep 12, 2019 at 12:34:45PM +0100, Linus Torvalds wrote:
> >
> > An alternative might be to make getrandom() just return an error
> > instead of waiting. Sure, fill the buffer with "as random as we can"
> > stuff, but then return -EINVAL because you called us too early.
>
> ACK, that's probably _the_ most sensible approach. Only caveat is
> the slight change in user-space API semantics though...
>
> For example, this breaks the just released systemd-random-seed(8)
> as it _explicitly_ requests blocking behvior from getrandom() here:
>

Actually, I would argue that the "don't ever block, instead fill
buffer and return error instead" fixes this broken case.

> => src/random-seed/random-seed.c:
> /*
>  * Let's make this whole job asynchronous, i.e. let's make
>  * ourselves a barrier for proper initialization of the
>  * random pool.
>  */
>  k = getrandom(buf, buf_size, GRND_NONBLOCK);
>  if (k < 0 && errno == EAGAIN && synchronous) {
>  log_notice("Kernel entropy pool is not initialized yet, "
> "waiting until it is.");
>
>  k = getrandom(buf, buf_size, 0); /* retry synchronously */
>  }

Yeah, the above is yet another example of completely broken garbage.

You can't just wait and block at boot. That is simply 100%
unacceptable, and always has been, exactly because that may
potentially mean waiting forever since you didn't do anything that
actually is likely to add any entropy.

>  if (k < 0) {
>  log_debug_errno(errno, "Failed to read random data with "
>  "getrandom(), falling back to "
>  "/dev/urandom: %m");

At least it gets a log message.

So I think the right thing to do is to just make getrandom() return
-EINVAL, and refuse to block.

As mentioned, this has already historically been a huge issue on
embedded devices, and with disks turnign not just to NVMe but to
actual polling nvdimm/xpoint/flash, the amount of true "entropy"
randomness we can give at boot is very questionable.

We can (and will) continue to do a best-effort thing (including very
much using rdread and friends), but the whole "wait for entropy"
simply *must* stop.

> I've sent an RFC patch at [1].
>
> [1] https://lkml.kernel.org/r/20190914122500.GA1425@darwi-home-pc

Looks reasonable to me. Except I'd just make it simpler and make it a
big WARN_ON_ONCE(), which is a lot harder to miss than pr_notice().
Make it clear that it is a *bug* if user space thinks it should wait
at boot time.

Also, we might even want to just fill the buffer and return 0 at that
point, to make sure that even more broken user space doesn't then try
to sleep manually and turn it into a "I'll wait myself" loop.

 Linus


Re: Linux 5.3-rc8

2019-09-14 Thread Alexander E. Patrakov

14.09.2019 21:30, Linus Torvalds пишет:

On Sat, Sep 14, 2019 at 8:02 AM Ahmed S. Darwish  wrote:


On Thu, Sep 12, 2019 at 12:34:45PM +0100, Linus Torvalds wrote:


An alternative might be to make getrandom() just return an error
instead of waiting. Sure, fill the buffer with "as random as we can"
stuff, but then return -EINVAL because you called us too early.


ACK, that's probably _the_ most sensible approach. Only caveat is
the slight change in user-space API semantics though...

For example, this breaks the just released systemd-random-seed(8)
as it _explicitly_ requests blocking behvior from getrandom() here:



Actually, I would argue that the "don't ever block, instead fill
buffer and return error instead" fixes this broken case.


 => src/random-seed/random-seed.c:
 /*
  * Let's make this whole job asynchronous, i.e. let's make
  * ourselves a barrier for proper initialization of the
  * random pool.
  */
  k = getrandom(buf, buf_size, GRND_NONBLOCK);
  if (k < 0 && errno == EAGAIN && synchronous) {
  log_notice("Kernel entropy pool is not initialized yet, "
 "waiting until it is.");

  k = getrandom(buf, buf_size, 0); /* retry synchronously */
  }


Yeah, the above is yet another example of completely broken garbage.

You can't just wait and block at boot. That is simply 100%
unacceptable, and always has been, exactly because that may
potentially mean waiting forever since you didn't do anything that
actually is likely to add any entropy.


  if (k < 0) {
  log_debug_errno(errno, "Failed to read random data with "
  "getrandom(), falling back to "
  "/dev/urandom: %m");


At least it gets a log message.

So I think the right thing to do is to just make getrandom() return
-EINVAL, and refuse to block.


Let me repeat: not -EINVAL, please. Please find some other error code, 
so that the application could sensibly distinguish between this case 
(low quality entropy is in the buffer) and the "kernel is too dumb" case 
(and no entropy is in the buffer).



--
Alexander E. Patrakov



smime.p7s
Description: Криптографическая подпись S/MIME


Re: [PATCH 2/2] powerpc/83xx: map IMMR with a BAT.

2019-09-14 Thread Christophe Leroy




Le 14/09/2019 à 16:34, Scott Wood a écrit :

On Fri, 2019-08-23 at 12:50 +, Christophe Leroy wrote:

On mpc83xx with a QE, IMMR is 2Mbytes.
On mpc83xx without a QE, IMMR is 1Mbytes.
Each driver will map a part of it to access the registers it needs.
Some driver will map the same part of IMMR as other drivers.

In order to reduce TLB misses, map the full IMMR with a BAT.

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/platforms/83xx/misc.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/platforms/83xx/misc.c
b/arch/powerpc/platforms/83xx/misc.c
index f46d7bf3b140..1e395b01c535 100644
--- a/arch/powerpc/platforms/83xx/misc.c
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -18,6 +18,8 @@
  #include 
  #include 
  
+#include 

+
  #include "mpc83xx.h"
  
  static __be32 __iomem *restart_reg_base;

@@ -145,6 +147,14 @@ void __init mpc83xx_setup_arch(void)
if (ppc_md.progress)
ppc_md.progress("mpc83xx_setup_arch()", 0);
  
+	if (!__map_without_bats) {

+   int immrsize = IS_ENABLED(CONFIG_QUICC_ENGINE) ? SZ_2M :
SZ_1M;


Any reason not to unconditionally make it 2M?  After all, the kernel being
built with CONFIG_QUICC_ENGINE doesn't mean that the hardware you're running
on has it...



Euh .. ok. I didn't see it that way, but you are right.

Do you think it is not a problem to map 2M even when the quicc engine is 
not there ? Or should it check device tree instead ?


Christophe


Re: Linux 5.3-rc8

2019-09-14 Thread Linus Torvalds
On Sat, Sep 14, 2019 at 9:35 AM Alexander E. Patrakov
 wrote:
>
> Let me repeat: not -EINVAL, please. Please find some other error code,
> so that the application could sensibly distinguish between this case
> (low quality entropy is in the buffer) and the "kernel is too dumb" case
> (and no entropy is in the buffer).

I'm not convinced we want applications to see that difference.

The fact is, every time an application thinks it cares, it has caused
problems. I can just see systemd saying "ok, the kernel didn't block,
so I'll just do

   while (getrandom(x) == -ENOENTROPY)
   sleep(1);

instead. Which is still completely buggy garbage.

The fact is, we can't guarantee entropy in general. It's probably
there is practice, particularly with user space saving randomness from
last boot etc, but that kind of data may be real entropy, but the
kernel cannot *guarantee* that it is.

And people don't like us guaranteeing that rdrand/rdseed is "real
entropy" either, since they don't trust the CPU hw either.

Which means that we're all kinds of screwed. The whole "we guarantee
entropy" model is broken.

   Linus


Re: [PATCH 1/2] x86,sched: Add support for frequency invariance

2019-09-14 Thread Quentin Perret
Hi Giovanni

On Monday 09 Sep 2019 at 04:42:15 (+0200), Giovanni Gherdovich wrote:
> +static inline long arch_scale_freq_capacity(int cpu)
> +{
> + if (static_cpu_has(X86_FEATURE_APERFMPERF))
> + return per_cpu(arch_cpu_freq, cpu);

So, if this is conditional, perhaps you could also add this check in an
x86-specific implementation of arch_scale_freq_invariant() ? That would
guide sugov in the right path (see get_next_freq()) if APERF/MPERF are
unavailable.

> + return 1024 /* SCHED_CAPACITY_SCALE */;
> +}

Thanks,
Quentin


Re: [RFC v1 06/14] krsi: Implement eBPF operations, attachment and execution

2019-09-14 Thread Yonghong Song


On 9/10/19 12:55 PM, KP Singh wrote:
> From: KP Singh 
> 
> A user space program can attach an eBPF program by:
> 
>hook_fd = open("/sys/kernel/security/krsi/process_execution", O_RDWR)
>prog_fd = bpf(BPF_PROG_LOAD, ...)
>bpf(BPF_PROG_ATTACH, hook_fd, prog_fd)
> 
> When such an attach call is received, the attachment logic looks up the
> dentry and appends the program to the bpf_prog_array.
> 
> The BPF programs are stored in a bpf_prog_array and writes to the array
> are guarded by a mutex. The eBPF programs are executed as a part of the
> LSM hook they are attached to. If any of the eBPF programs return
> an error (-ENOPERM) the action represented by the hook is denied.
> 
> Signed-off-by: KP Singh 
> ---
>   include/linux/krsi.h  |  18 ++
>   kernel/bpf/syscall.c  |   3 +-
>   security/krsi/include/krsi_init.h |  51 +++
>   security/krsi/krsi.c  |  13 +++-
>   security/krsi/krsi_fs.c   |  28 
>   security/krsi/ops.c   | 102 ++
>   6 files changed, 213 insertions(+), 2 deletions(-)
>   create mode 100644 include/linux/krsi.h
> 
> diff --git a/include/linux/krsi.h b/include/linux/krsi.h
> new file mode 100644
> index ..c7d1790d0c1f
> --- /dev/null
> +++ b/include/linux/krsi.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _KRSI_H
> +#define _KRSI_H
> +
> +#include 
> +
> +#ifdef CONFIG_SECURITY_KRSI
> +int krsi_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> +#else
> +static inline int krsi_prog_attach(const union bpf_attr *attr,
> +struct bpf_prog *prog)
> +{
> + return -EINVAL;
> +}
> +#endif /* CONFIG_SECURITY_KRSI */
> +
> +#endif /* _KRSI_H */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index f38a539f7e67..ab063ed84258 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -4,6 +4,7 @@
>   #include 
>   #include 
>   #include 
> +#include 
>   #include 
>   #include 
>   #include 
> @@ -1950,7 +1951,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
>   ret = lirc_prog_attach(attr, prog);
>   break;
>   case BPF_PROG_TYPE_KRSI:
> - ret = -EINVAL;
> + ret = krsi_prog_attach(attr, prog);
>   break;
>   case BPF_PROG_TYPE_FLOW_DISSECTOR:
>   ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
> diff --git a/security/krsi/include/krsi_init.h 
> b/security/krsi/include/krsi_init.h
> index 68755182a031..4e17ecacd4ed 100644
> --- a/security/krsi/include/krsi_init.h
> +++ b/security/krsi/include/krsi_init.h
> @@ -5,12 +5,29 @@
>   
>   #include "krsi_fs.h"
>   
> +#include 
> +
>   enum krsi_hook_type {
>   PROCESS_EXECUTION,
>   __MAX_KRSI_HOOK_TYPE, /* delimiter */
>   };
>   
>   extern int krsi_fs_initialized;
> +
> +struct krsi_bprm_ctx {
> + struct linux_binprm *bprm;
> +};
> +
> +/*
> + * krsi_ctx is the context that is passed to all KRSI eBPF
> + * programs.
> + */
> +struct krsi_ctx {
> + union {
> + struct krsi_bprm_ctx bprm_ctx;
> + };
> +};
> +
>   /*
>* The LSM creates one file per hook.
>*
> @@ -33,10 +50,44 @@ struct krsi_hook {
>* The dentry of the file created in securityfs.
>*/
>   struct dentry *h_dentry;
> + /*
> +  * The mutex must be held when updating the progs attached to the hook.
> +  */
> + struct mutex mutex;
> + /*
> +  * The eBPF programs that are attached to this hook.
> +  */
> + struct bpf_prog_array __rcu *progs;
>   };
>   
>   extern struct krsi_hook krsi_hooks_list[];
>   
> +static inline int krsi_run_progs(enum krsi_hook_type t, struct krsi_ctx *ctx)
> +{
> + struct bpf_prog_array_item *item;
> + struct bpf_prog *prog;
> + struct krsi_hook *h = &krsi_hooks_list[t];
> + int ret, retval = 0;

Reverse christmas tree style?

> +
> + preempt_disable();

Do we need preempt_disable() here?

> + rcu_read_lock();
> +
> + item = rcu_dereference(h->progs)->items;
> + while ((prog = READ_ONCE(item->prog))) {
> + ret = BPF_PROG_RUN(prog, ctx);
> + if (ret < 0) {
> + retval = ret;
> + goto out;
> + }
> + item++;
> + }
> +
> +out:
> + rcu_read_unlock();
> + preempt_enable();
> + return IS_ENABLED(CONFIG_SECURITY_KRSI_ENFORCE) ? retval : 0;
> +}
> +
>   #define krsi_for_each_hook(hook) \
>   for ((hook) = &krsi_hooks_list[0]; \
>(hook) < &krsi_hooks_list[__MAX_KRSI_HOOK_TYPE]; \
> diff --git a/security/krsi/krsi.c b/security/krsi/krsi.c
> index 77d7e2f91172..d3a4a361c192 100644
> --- a/security/krsi/krsi.c
> +++ b/security/krsi/krsi.c
> @@ -1,6 +1,9 @@
>   // SPDX-License-Identifier: GPL-2.0
>   
>   #include 
> +#include 
> +#include 
> +#include 
>   
>   #include "krsi_init.h"
>   
> @@ -16,7 +19,15 @@ str

Re: Linux 5.3-rc8

2019-09-14 Thread Alexander E. Patrakov

14.09.2019 21:52, Linus Torvalds пишет:

On Sat, Sep 14, 2019 at 9:35 AM Alexander E. Patrakov
 wrote:


Let me repeat: not -EINVAL, please. Please find some other error code,
so that the application could sensibly distinguish between this case
(low quality entropy is in the buffer) and the "kernel is too dumb" case
(and no entropy is in the buffer).


I'm not convinced we want applications to see that difference.

The fact is, every time an application thinks it cares, it has caused
problems. I can just see systemd saying "ok, the kernel didn't block,
so I'll just do

while (getrandom(x) == -ENOENTROPY)
sleep(1);

instead. Which is still completely buggy garbage.


OK, I understand this viewpoint. But then still, -EINVAL is not the 
answer, because a hypothetical evil version of systemd will use -EINVAL 
as -ENOENTROPY (with flags == 0 and a reasonable buffer size, there is 
simply no other reason for the kernel to return -EINVAL). Yes I 
understand that this is a complete reverse of my previous argument.



The fact is, we can't guarantee entropy in general. It's probably
there is practice, particularly with user space saving randomness from
last boot etc, but that kind of data may be real entropy, but the
kernel cannot *guarantee* that it is.

And people don't like us guaranteeing that rdrand/rdseed is "real
entropy" either, since they don't trust the CPU hw either.

Which means that we're all kinds of screwed. The whole "we guarantee
entropy" model is broken.


I agree here. Given that you suggested "to just fill the buffer and 
return 0" in the previous mail (well, I think you really meant "return 
buflen", otherwise ENOENTROPY == 0 and your previous objection applies), 
let's do just that. As a bonus, it saves applications from the complex 
dance with retrying via /dev/urandom and finally brings a reliable API 
(modulo old and broken kernels) to get random numbers (well, as random 
as possible right now) without needing a file descriptor.


--
Alexander E. Patrakov



smime.p7s
Description: Криптографическая подпись S/MIME


Re: [PATCH v2] net: mdio: switch to using gpiod_get_optional()

2019-09-14 Thread Andy Shevchenko
On Fri, Sep 13, 2019 at 03:55:47PM -0700, Dmitry Torokhov wrote:
> The MDIO device reset line is optional and now that gpiod_get_optional()
> returns proper value when GPIO support is compiled out, there is no
> reason to use fwnode_get_named_gpiod() that I plan to hide away.
> 
> Let's switch to using more standard gpiod_get_optional() and
> gpiod_set_consumer_name() to keep the nice "PHY reset" label.
> 
> Also there is no reason to only try to fetch the reset GPIO when we have
> OF node, gpiolib can fetch GPIO data from firmwares as well.
> 

Reviewed-by: Andy Shevchenko 

But see comment below.

> Signed-off-by: Dmitry Torokhov 
> ---
> 
> Note this is an update to a patch titled "[PATCH 05/11] net: mdio:
> switch to using fwnode_gpiod_get_index()" that no longer uses the new
> proposed API and instead works with already existing ones.
> 
>  drivers/net/phy/mdio_bus.c | 22 +-
>  1 file changed, 9 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
> index ce940871331e..2e29ab841b4d 100644
> --- a/drivers/net/phy/mdio_bus.c
> +++ b/drivers/net/phy/mdio_bus.c
> @@ -42,21 +42,17 @@
>  
>  static int mdiobus_register_gpiod(struct mdio_device *mdiodev)
>  {
> - struct gpio_desc *gpiod = NULL;
> + int error;
>  
>   /* Deassert the optional reset signal */
> - if (mdiodev->dev.of_node)
> - gpiod = fwnode_get_named_gpiod(&mdiodev->dev.of_node->fwnode,
> -"reset-gpios", 0, GPIOD_OUT_LOW,
> -"PHY reset");
> - if (IS_ERR(gpiod)) {
> - if (PTR_ERR(gpiod) == -ENOENT || PTR_ERR(gpiod) == -ENOSYS)
> - gpiod = NULL;
> - else
> - return PTR_ERR(gpiod);
> - }
> -
> - mdiodev->reset_gpio = gpiod;
> + mdiodev->reset_gpio = gpiod_get_optional(&mdiodev->dev,
> +  "reset", GPIOD_OUT_LOW);
> + error = PTR_ERR_OR_ZERO(mdiodev->reset_gpio);
> + if (error)
> + return error;
> +

> + if (mdiodev->reset_gpio)

This is redundant check.

> + gpiod_set_consumer_name(mdiodev->reset_gpio, "PHY reset");

>   return 0;
>  }
> -- 
> 2.23.0.237.gc6a4ce50a0-goog
> 
> 
> -- 
> Dmitry

-- 
With Best Regards,
Andy Shevchenko




Re: [PATCH 0/5] tools/power/x86/intel-speed-select: New command and

2019-09-14 Thread Andy Shevchenko
On Sat, Sep 14, 2019 at 12:05:08AM -0700, Srinivas Pandruvada wrote:
> This series contains some minor fixes, when firmware mask is including
> invalid CPU in the perf-profile mask. Also add some commands to
> better manage core-power feature.

Hmm... 150+ LOCs doesn't count to me as minor fixes.
So, are you considering this a material for v5.4?


> Srinivas Pandruvada (4):
>   tools/power/x86/intel-speed-select: Allow online/offline based on tdp
>   tools/power/x86/intel-speed-select: Format get-assoc information
>   tools/power/x86/intel-speed-select: Fix some debug prints
>   tools/power/x86/intel-speed-select: Extend core-power command set
> 
> Youquan Song (1):
>   tools/power/x86/intel-speed-select: Fix high priority core mask over
> count
> 
>  .../x86/intel-speed-select/isst-config.c  | 108 --
>  .../power/x86/intel-speed-select/isst-core.c  |  25 
>  .../x86/intel-speed-select/isst-display.c |  51 +
>  tools/power/x86/intel-speed-select/isst.h |   9 +-
>  4 files changed, 182 insertions(+), 11 deletions(-)
> 
> -- 
> 2.17.2
> 

-- 
With Best Regards,
Andy Shevchenko




[PATCH v4] tpm_crb: fix fTPM on AMD Zen+ CPUs

2019-09-14 Thread ivan . lazeev
From: Ivan Lazeev 

Bug link: https://bugzilla.kernel.org/show_bug.cgi?id=195657

cmd/rsp buffers are expected to be in the same ACPI region.
For Zen+ CPUs BIOS's might report two different regions, some of
them also report region sizes inconsistent with values from TPM
registers.

Memory configuration on ASRock x470 ITX:

db0a-dc59efff : Reserved
dc57e000-dc57efff : MSFT0101:00
dc582000-dc582fff : MSFT0101:00

Work around the issue by storing ACPI regions declared for the
device in a list of struct crb_resource. Each entry holds a
copy of the correcponding ACPI resourse (iores field) and a pointer
to a possibly allocated with devm_ioremap_resource memory region
(iobase field). This data was previously held for a single resource
in struct crb_priv (iobase field) and local variable io_res in
crb_map_io function. The list is used to find corresponding
region for each buffer with crb_containing_resource, make
the buffer size consistent with it's length and map it at most
once, storing the pointer to allocated resource in iobase field
of the entry.

Signed-off-by: Ivan Lazeev 
---

Changes in v3:
- make crb_containing_resource search for address only,
  because buffer sizes aren't trusted anyway
- improve commit message

Changes in v4:
- rename struct crb_resource fields (style change)
- improve commit message

I believe that storing the data in a in list of
struct crb_resource makes tracking of the resource allocation
state explicit, aiding clarity.
Whilst everything that worked before seems not to be broken,
there is a possibility of allocating with crb_map_resource a resource
that is not from ACPI table, and state of such resource is not
tracked in the current solution. It might be good to track allocation
of all resources, not just ones declared by ACPI, for complete
correctness. However, as I see it now, it will complicate the
code a bit more. Do you think the change should be made, or
such situation is completely hypothetical?

 drivers/char/tpm/tpm_crb.c | 137 +++--
 1 file changed, 101 insertions(+), 36 deletions(-)

diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index e59f1f91d7f3..b301f7fc4a73 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -91,7 +91,6 @@ enum crb_status {
 struct crb_priv {
u32 sm;
const char *hid;
-   void __iomem *iobase;
struct crb_regs_head __iomem *regs_h;
struct crb_regs_tail __iomem *regs_t;
u8 __iomem *cmd;
@@ -108,6 +107,12 @@ struct tpm2_crb_smc {
u32 smc_func_id;
 };
 
+struct crb_resource {
+   struct resource iores;
+   void __iomem *iobase;
+   struct list_head list;
+};
+
 static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
unsigned long timeout)
 {
@@ -432,23 +437,57 @@ static const struct tpm_class_ops tpm_crb = {
.req_complete_val = CRB_DRV_STS_COMPLETE,
 };
 
+static void crb_free_resource_list(struct list_head *resources)
+{
+   struct crb_resource *cres, *tmp;
+
+   list_for_each_entry_safe(cres, tmp, resources, list)
+   kfree(cres);
+}
+
+static inline bool crb_resource_contains(const struct resource *io_res,
+u64 address)
+{
+   return address >= io_res->start && address <= io_res->end;
+}
+
+static struct crb_resource *crb_containing_resource(
+   const struct list_head *resources, u64 start)
+{
+   struct crb_resource *cres;
+
+   list_for_each_entry(cres, resources, list) {
+   if (crb_resource_contains(&cres->iores, start))
+   return cres;
+   }
+
+   return NULL;
+}
+
 static int crb_check_resource(struct acpi_resource *ares, void *data)
 {
-   struct resource *io_res = data;
+   struct list_head *list = data;
+   struct crb_resource *cres;
struct resource_win win;
struct resource *res = &(win.res);
 
if (acpi_dev_resource_memory(ares, res) ||
acpi_dev_resource_address_space(ares, &win)) {
-   *io_res = *res;
-   io_res->name = NULL;
+   cres = kzalloc(sizeof(*cres), GFP_KERNEL);
+   if (!cres)
+   return -ENOMEM;
+
+   cres->iores = *res;
+   cres->iores.name = NULL;
+
+   list_add_tail(&cres->list, list);
}
 
return 1;
 }
 
-static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
-struct resource *io_res, u64 start, u32 size)
+static void __iomem *crb_map_res(struct device *dev, struct crb_resource *cres,
+u64 start, u32 size)
 {
struct resource new_res = {
.start  = start,
@@ -460,10 +499,16 @@ static void __iomem *crb_map_res(struct device *dev, 
struct crb_priv *priv,
if (start != new_res

Re: [PATCH v4 0/4] arm64: Add basic support for Amlogic A1 SoC Family

2019-09-14 Thread Martin Blumenstingl
Hi Jianxin,

On Thu, Sep 12, 2019 at 10:20 AM Jianxin Pan  wrote:
>
> A1 is an application processor designed for smart audio and IoT applications,
> with Dual core ARM Cortex-A35 CPU. Unlike the previous GXL and G12 series,
> there is no Cortex-M3 AO CPU in it.
it will be interesting to see which devices will use this SoC

[...]
> Jianxin Pan (4):
>   soc: amlogic: meson-gx-socinfo: Add A1 and A113L IDs
>   dt-bindings: arm: amlogic: add A1 bindings
>   dt-bindings: arm: amlogic: add Amlogic AD401 bindings
>   arm64: dts: add support for A1 based Amlogic AD401
for the whole series:
Reviewed-by: Martin Blumenstingl 


Re: [RESEND PATCH v3 3/3] arm64: dts: meson-g12b-ugoos-am6: add initial device-tree

2019-09-14 Thread Martin Blumenstingl
Hi Christian,

my nit-picks below

On Fri, Sep 6, 2019 at 4:34 PM Christian Hewitt
 wrote:
[...]
> +   spdif_dit: audio-codec-1 {
> +   #sound-dai-cells = <0>;
> +   compatible = "linux,spdif-dit";
> +   status = "okay";
> +   sound-name-prefix = "DIT";
> +   };
please move it below sdio_pwrseq (or at least somewhere below the memory node)

[...]
> +   vcc_3v3: regulator-vcc_3v3 {
> +   compatible = "regulator-fixed";
> +   regulator-name = "VCC_3V3";
> +   regulator-min-microvolt = <330>;
> +   regulator-max-microvolt = <330>;
> +   vin-supply = <&vddao_3v3>;
> +   regulator-always-on;
> +   /* FIXME: actually controlled by VDDCPU_B_EN */
can we add the enable GPIO here now that we know how to describe the
VDDCPU_B regulator?

[...]
> +   usb1_pow: regulator-usb1_pow {
for consistency with the regulators above: regulator-usb1-pow

[...]
> +   usb_pwr_en: regulator-usb_pwr_en {
for consistency with the regulators above: regulator-usb-pwr-en

[...]
> +   vddao_1v8: regulator-vddao_1v8 {
for consistency with the regulators above: regulator-vddao-1v8

[...
> +   vddao_3v3: regulator-vddao_3v3 {
for consistency with the regulators above: regulator-vddao-3v3

[...]
> +&cpu0 {
> +   cpu-supply = <&vddcpu_b>;
> +   operating-points-v2 = <&cpu_opp_table_0>;
> +   clocks = <&clkc CLKID_CPU_CLK>;
> +   clock-latency = <5>;
> +};
> +
> +&cpu1 {
> +   cpu-supply = <&vddcpu_b>;
> +   operating-points-v2 = <&cpu_opp_table_0>;
> +   clocks = <&clkc CLKID_CPU_CLK>;
> +   clock-latency = <5>;
> +};
> +
> +&cpu100 {
> +   cpu-supply = <&vddcpu_a>;
> +   operating-points-v2 = <&cpub_opp_table_1>;
> +   clocks = <&clkc CLKID_CPUB_CLK>;
> +   clock-latency = <5>;
> +};
> +
> +&cpu101 {
> +   cpu-supply = <&vddcpu_a>;
> +   operating-points-v2 = <&cpub_opp_table_1>;
> +   clocks = <&clkc CLKID_CPUB_CLK>;
> +   clock-latency = <5>;
> +};
> +
> +&cpu102 {
> +   cpu-supply = <&vddcpu_a>;
> +   operating-points-v2 = <&cpub_opp_table_1>;
> +   clocks = <&clkc CLKID_CPUB_CLK>;
> +   clock-latency = <5>;
> +};
> +
> +&cpu103 {
> +   cpu-supply = <&vddcpu_a>;
> +   operating-points-v2 = <&cpub_opp_table_1>;
> +   clocks = <&clkc CLKID_CPUB_CLK>;
> +   clock-latency = <5>;
> +};
(not limited to this patch: there's a lot of redundancy with the CPU
nodes across the G12B .dts)

[...]
> +&sd_emmc_a {
all nodes starting here should use alphabetical sorting


Martin


Re: [PATCH v2 0/4] task: Making tasks on the runqueue rcu protected

2019-09-14 Thread Linus Torvalds
On Sat, Sep 14, 2019 at 5:30 AM Eric W. Biederman  wrote:
>
> I have reworked these patches one more time to make it clear that the
> first 3 patches only fix task_struct so that it experiences a rcu grace
> period after it leaves the runqueue for the last time.

I remain a fan of these patches, and the added comment on the last one
is I think a sufficient clarification of the issue.

But it's patch 3 that makes me go "yeah, this is the right approach",
because it just removes subtle code in favor of something that is
understandable.

Yes, most of the lines removed may be comments, and so it doesn't
actually remove a lot of _code_, but I think the comments are a result
of just how subtle and fragile our current approach is, and the new
model not needing them as much is I think a real issue (rather than
just Eric being less verbose in the new comments and removing lines of
code that way).

Can anybody see anything wrong with the series? Because I'd love to
have it for 5.4,

 Linus


Re: [RFC v1 09/14] krsi: Add a helper function for bpf_perf_event_output

2019-09-14 Thread Yonghong Song


On 9/10/19 12:55 PM, KP Singh wrote:
> From: KP Singh 
> 
> This helper is mapped to the existing operation
> BPF_FUNC_perf_event_output.
> 
> An example usage of this function would be:
> 
> #define BUF_SIZE 64;
> 
> struct bpf_map_def SEC("maps") perf_map = {
>  .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
>  .key_size = sizeof(int),
>  .value_size = sizeof(u32),
>  .max_entries = MAX_CPUS,
> };

could you use a map definition similar to
tools/testing/selftests/bpf/progs/test_perf_buffer.c?

struct {
 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
 __uint(key_size, sizeof(int));
 __uint(value_size, sizeof(u32));
} perf_map SEC(".maps");

> 
> SEC("krsi")
> int bpf_prog1(void *ctx)
> {
>   char buf[BUF_SIZE];
>   int len;
>   u64 flags = BPF_F_CURRENT_CPU;
> 
>   /* some logic that fills up buf with len data*/
>   len = fill_up_buf(buf);
>   if (len < 0)
>   return len;
>   if (len > BU)
BUF_SIZE?
>   return 0;
> 
>   bpf_perf_event_output(ctx, &perf_map, flags, buf len);
buf, len?
>   return 0;
> }
> 
> A sample program that showcases the use of bpf_perf_event_output is
> added later.
> 
> Signed-off-by: KP Singh 
> ---
>   security/krsi/ops.c | 22 ++
>   1 file changed, 22 insertions(+)
> 
> diff --git a/security/krsi/ops.c b/security/krsi/ops.c
> index a61508b7018f..57bd304a03f4 100644
> --- a/security/krsi/ops.c
> +++ b/security/krsi/ops.c
> @@ -111,6 +111,26 @@ static bool krsi_prog_is_valid_access(int off, int size,
>   return false;
>   }
>   
> +BPF_CALL_5(krsi_event_output, void *, log,

Maybe name the first argument as 'ctx' to follow typical helper convention?

> +struct bpf_map *, map, u64, flags, void *, data, u64, size)
> +{
> + if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
> + return -EINVAL;
> +
> + return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
> +}
> +
> +static const struct bpf_func_proto krsi_event_output_proto =  {
> + .func   = krsi_event_output,
> + .gpl_only   = true,
> + .ret_type   = RET_INTEGER,
> + .arg1_type  = ARG_PTR_TO_CTX,
> + .arg2_type  = ARG_CONST_MAP_PTR,
> + .arg3_type  = ARG_ANYTHING,
> + .arg4_type  = ARG_PTR_TO_MEM,
> + .arg5_type  = ARG_CONST_SIZE_OR_ZERO,
> +};
> +
>   static const struct bpf_func_proto *krsi_prog_func_proto(enum bpf_func_id
>func_id,
>const struct bpf_prog
> @@ -121,6 +141,8 @@ static const struct bpf_func_proto 
> *krsi_prog_func_proto(enum bpf_func_id
>   return &bpf_map_lookup_elem_proto;
>   case BPF_FUNC_get_current_pid_tgid:
>   return &bpf_get_current_pid_tgid_proto;
> + case BPF_FUNC_perf_event_output:
> + return &krsi_event_output_proto;
>   default:
>   return NULL;
>   }
> 


  1   2   >