From d464bd9e55516b0a7443a9ef3327f782d01e78d3 Mon Sep 17 00:00:00 2001
From: Grisha Levit <grishalevit@gmail.com>
Date: Thu, 16 Nov 2023 21:46:57 -0500
Subject: [PATCH] tests/unicode1.sub: fixup

Many of the tests in unicode.sub don't actually run because the arrays
containing codepoints to test are sparse and the TestCodePage function
assumes that they are not.

If that's fixed, the zh_TW.BIG5 tests run but fail. I'm not sure what
the original intent was, they seem to expect U+00F6..U+00FE to be
encoded as 0xF6..0xFE which is not the case. I replaced these with a
different set of codepoints in this patch, including one whose encoding
contains a backslash, which I'm assuming is the reason BIG5 is included
in this script.
---
 tests/intl.right   |  2 +-
 tests/unicode1.sub | 39 +++++++++------------------------------
 2 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/tests/intl.right b/tests/intl.right
index d5d7b929..7e25d29f 100644
--- a/tests/intl.right
+++ b/tests/intl.right
@@ -21,7 +21,7 @@ aéb
 bytematch
 0000000 254 012
 0000002
-Passed all 1378 Unicode tests
+Passed all 1770 Unicode tests
 0000000 303 277 012
 0000003
 0000000 303 277 012
diff --git a/tests/unicode1.sub b/tests/unicode1.sub
index 713ab40f..1a9c2882 100644
--- a/tests/unicode1.sub
+++ b/tests/unicode1.sub
@@ -16,40 +16,19 @@ unset LC_ALL
 ErrorCnt=0
 TestCnt=0
 
-  function check_valid_var_name {
-    case "${1:?Missing Variable Name}" in
-      [!a-zA-Z_]* | *[!a-zA-Z_0-9]* ) return 3;;
-    esac
-  }
-  # get_array_element VariableName ArrayName ArrayElement
-  function get_array_element {
-    check_valid_var_name "${1:?Missing Variable Name}" || return $?
-    check_valid_var_name "${2:?Missing Array Name}" || return $?
-    eval "${1}"'="${'"${2}"'["${3:?Missing Array Index}"]}"'
-  }
-  # unset_array_element VarName ArrayName
-  function get_array_element_cnt {
-    check_valid_var_name "${1:?Missing Variable Name}" || return $?
-    check_valid_var_name "${2:?Missing Array Name}" || return $?
-    eval "${1}"'="${#'"${2}"'[@]}"'
-  }
-
-
 function TestCodePage {
     local TargetCharset="${1:?Missing Test charset}"
-    local EChar RChar TCnt
-    get_array_element_cnt TCnt "${2:?Missing Array Name}"
-    for (( x=1 ; x<${TCnt} ; x++ )); do
-      get_array_element EChar "${2}"  ${x}
-      if [ -n "${EChar}" ]; then
+    local EChar RChar Uval x
+    local -n Array=${2:?Missing Array Name}
+    for x in "${!Array[@]}"; do
 	let TestCnt+=1
+	EChar=${Array[$x]}
 	printf -v UVal '\\U%08x' "${x}"
 	LC_CTYPE=${TargetCharset} printf -v RChar "${UVal}" 2>/dev/null
 	if [ "${EChar}" != "${RChar}" ]; then
 	  let ErrorCnt+=1
-	  printf "${TargetCharset}: Error Encoding U+%08X to ${TL} [ \"%q\" != \"%q\" ]\n" "${x}" "${EChar}" "${RChar}"
+	  printf "%s: Error Encoding U+%08X [ \"%q\" != \"%q\" ]\n" "${TargetCharset}" "${x}" "${EChar}" "${RChar}"
 	fi
-      fi
     done
 }
 
@@ -118,8 +97,8 @@ else
 fi
 
 zh_TW_BIG5=(
- [0x00f6]=$'\366' [0x00f7]=$'\367' [0x00f8]=$'\370' [0x00f9]=$'\371' [0x00fa]=$'\372'
- [0x00fb]=$'\373' [0x00fc]=$'\374' [0x00fd]=$'\375' [0x00fe]=$'\376'	
+ [0x03A8]=$'\243Z' [0x03A9]=$'\243[' [0x03B1]=$'\243\\' [0x03B2]=$'\243]'
+ [0x03B3]=$'\243^' [0x03B4]=$'\243_' [0x03B5]=$'\243`'  [0x03B6]=$'\243a'
 )
 TestCodePage zh_TW.BIG5 zh_TW_BIG5
 
@@ -324,10 +303,10 @@ else
 	echo "unicode1.sub: that will cause some of these tests to be skipped." >&2
 fi
 
-#for ((x=1;x<1000;x++)); do printf ' [0x%04x]=%-11q' "$x" "$(printf "$(printf '\\U%08x' $x)")" ; [ $(($x%5)) = 0 ] && echo; done
+#for ((x=1;x<1000;x++)); do printf -v u '\\U%08x' "$x"; printf ' [0x%04x]=%-11q' "$x" "${u@E}"; [ $(($x%5)) = 0 ] && echo; done
 C_UTF_8=(
  [0x0001]=$'\001'     [0x0002]=$'\002'     [0x0003]=$'\003'     [0x0004]=$'\004'     [0x0005]=$'\005'    
- [0x0006]=$'\006'     [0x0007]=$'\a'       [0x0008]=$'\b'       [0x0009]=$'\t'       [0x000a]=''         
+ [0x0006]=$'\006'     [0x0007]=$'\a'       [0x0008]=$'\b'       [0x0009]=$'\t'       [0x000a]=$'\n'      
  [0x000b]=$'\v'       [0x000c]=$'\f'       [0x000d]=$'\r'       [0x000e]=$'\016'     [0x000f]=$'\017'    
  [0x0010]=$'\020'     [0x0011]=$'\021'     [0x0012]=$'\022'     [0x0013]=$'\023'     [0x0014]=$'\024'    
  [0x0015]=$'\025'     [0x0016]=$'\026'     [0x0017]=$'\027'     [0x0018]=$'\030'     [0x0019]=$'\031'    
-- 
2.42.1

