This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 2af924d4e IMPALA-12516: Set HDFS limit based on memlock
2af924d4e is described below

commit 2af924d4e5f9191a275e2ddeea6ba866329b0b1d
Author: Michael Smith <[email protected]>
AuthorDate: Tue Oct 24 16:06:38 2023 -0700

    IMPALA-12516: Set HDFS limit based on memlock
    
    With RHEL 8 on AWS Graviton instances,
    dfs.datanode.max.locked.memory=64000 is insufficient to run
    query_test/test_hdfs_caching.py::TestHdfsCaching::test_table_is_cached.
    
    Sets dfs.datanode.max.locked.memory based on 'ulimit -l', and sets
    memlock to 64MB in bootstrap_system.sh to match modern defaults and
    provide space for future HDFS caching tests.
    
    New setting can be seen in admin output like
    
      node-1 will use ports DATANODE_PORT=31002, DATANODE_HTTP_PORT=31012,
      DATANODE_IPC_PORT=31022, DATANODE_HTTPS_PORT=31032,
      DATANODE_CLIENT_PORT=31042, NODEMANAGER_PORT=31102,
      NODEMANAGER_LOCALIZER_PORT=31122, NODEMANAGER_WEBUI_PORT=31142,
      KUDU_TS_RPC_PORT=31202, and KUDU_TS_WEBUI_PORT=31302;
      DATANODE_LOCKED_MEM=65536000
    
    Change-Id: I7722ddd0c7fbd9bbd1979503952b7522b808194a
    Reviewed-on: http://gerrit.cloudera.org:8080/20623
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Joe McDonnell <[email protected]>
---
 bin/bootstrap_system.sh                                      |  6 ++++++
 testdata/cluster/admin                                       | 12 ++++++++++--
 .../node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl |  5 +++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index ee1427277..bf1e7a211 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -471,6 +471,12 @@ sudo chown $(whoami) /var/lib/hadoop-hdfs/
 # TODO: restrict this to only the users it is needed for
 echo -e "\n* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
 
+# Increase memlock for HDFS caching. On RedHat systems this defaults to 64 
(KB). Ubuntu
+# uses systemd, which sets its own default. With Ubuntu 18.04 that default is 
16 KB,
+# 20.04+ defaults to 64 MB. Set all to 64 MB for the current user; Impala test 
systems
+# require 10s of GBs of memory, so this setting should not be a problem.
+echo -e "$USER - memlock 65536" | sudo tee 
/etc/security/limits.d/10-memlock.conf
+
 # Default on CentOS limits a user to 1024 or 4096 processes (threads) , which 
isn't
 # enough for minicluster with all of its friends.
 redhat7 sudo sed -i 's,\*\s*soft\s*nproc\s*[0-9]*$,* soft nproc unlimited,' \
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 8766a89b1..179bb11ab 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -244,6 +244,13 @@ function create_cluster {
     DATANODE_IPC_PORT=$((DATANODE_FREE_IPC_PORT_START++))
     DATANODE_HTTPS_PORT=$((DATANODE_FREE_HTTPS_PORT_START++))
     DATANODE_CLIENT_PORT=$((DATANODE_FREE_CLIENT_PORT_START++))
+    ULIMIT_LOCKED_MEM="$(ulimit -l)"
+    if [[ "${ULIMIT_LOCKED_MEM}" == "unlimited" ]]; then
+      # Use a default of 64MB for HDFS caching. Should match memlock in 
bootstrap_system.
+      ULIMIT_LOCKED_MEM=65536
+    fi
+    # Allocate slightly less than memlock to each datanode.
+    DATANODE_LOCKED_MEM=$((ULIMIT_LOCKED_MEM*1000))
     NODEMANAGER_PORT=$((NODEMANAGER_FREE_PORT_START++))
     NODEMANAGER_LOCALIZER_PORT=$((NODEMANAGER_FREE_LOCALIZER_PORT_START++))
     NODEMANAGER_WEBUI_PORT=$((NODEMANAGER_FREE_WEBUI_PORT_START++))
@@ -258,11 +265,12 @@ function create_cluster {
         "NODEMANAGER_LOCALIZER_PORT=$NODEMANAGER_LOCALIZER_PORT," \
         "NODEMANAGER_WEBUI_PORT=$NODEMANAGER_WEBUI_PORT," \
         "KUDU_TS_RPC_PORT=$KUDU_TS_RPC_PORT," \
-        "and KUDU_TS_WEBUI_PORT=$KUDU_TS_WEBUI_PORT"
+        "and KUDU_TS_WEBUI_PORT=$KUDU_TS_WEBUI_PORT;" \
+        "DATANODE_LOCKED_MEM=$DATANODE_LOCKED_MEM"
 
     export NODE NODE_DIR
     export DATANODE_PORT DATANODE_HTTP_PORT DATANODE_IPC_PORT 
DATANODE_HTTPS_PORT
-    export DATANODE_CLIENT_PORT
+    export DATANODE_CLIENT_PORT DATANODE_LOCKED_MEM
     export NODEMANAGER_PORT NODEMANAGER_LOCALIZER_PORT NODEMANAGER_WEBUI_PORT
     export KUDU_TS_RPC_PORT KUDU_TS_WEBUI_PORT
     for TEMPLATE_PATH in $(find "$NODE_DIR" -name "*$TEMPLATE_SUFFIX"); do
diff --git 
a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl 
b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
index 641559dc4..06289387d 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/hdfs-site.xml.tmpl
@@ -99,10 +99,11 @@
     <value>1024</value>
   </property>
 
-  <!-- Set the max cached memory to ~64kb. This must be less than ulimit -l -->
+  <!-- Set the max cached memory as configured in the admin script.
+    This must be less than ulimit -l -->
   <property>
     <name>dfs.datanode.max.locked.memory</name>
-    <value>64000</value>
+    <value>${DATANODE_LOCKED_MEM}</value>
   </property>
 
   <!-- Increase the frequency the NN talks to the DNs to update the caching 
policy.

Reply via email to