Hi,

> On Mon, Dec 9, 2024 at 3:07 AM Tomas Vondra <to...@vondra.me> wrote:
>
>> Thanks for a nice patch. I did a quick review today, seems almost there,
>> I only have a couple minor comments:
>>
>> 1) Template Patterns for Numeric Formatting
>>
>> Why the wording change? "input between 1 and 3999" sounds fine to me.
>>
> input... seemed to refer to numeric input for to_char roman format. But,
> after roman support in to_number function shouldn't we modify it? It is the
> reason I changed it to "valid for numbers 1 to 3999".
>
> 2) The docs say "standard numbers" but I'm not sure what that is? We
>> don't use that term anywhere else, I think. Same for "standard roman
>> numeral".
>>
> I will change "standard numbers" to "numbers".
> Note that we are following the standard form. There are other forms too.
> For example, 4 can be represented as IV (standard) or IIII (non standard).
> https://en.wikipedia.org/wiki/Roman_numerals#Standard_form
>
>
>> 3) A couple comments need a bit of formatting. Multi-line comments
>> should start with an empty line, some lines are a bit too long.
>>
> I will fix the multi-line formatting.
>
>
>> 4) I was wondering if the regression tests check all interesting cases,
>> and it seems none of the tests hit these two cases:
>>
>>     if (vCount > 1 || lCount > 1 || dCount > 1)
>>         return -1;
>>
> SELECT to_number('viv', 'RN') test covers this if statement for the
> subtraction part. But, I noticed that no test covers simple counts of V, L,
> D.
> I will add SELECT to_number('VV', 'RN') for that.
>
>
>> and
>>
>>     if (!IS_VALID_SUB_COMB(currChar, nextChar))
>>         return -1;
>>
> Noted. SELECT to_number('IL', 'RN') test can be added.
>

I’ve attached a new patch that addresses comments 2, 3, and 4 from Tomas.

Looking forward to your feedback.

Regards,
Hunaid Sohail
From 585dc7a04179be9260ddbdf01d8c7caa48c3037d Mon Sep 17 00:00:00 2001
From: Hunaid2000 <hunaid2000@gmail.com>
Date: Fri, 13 Dec 2024 10:19:20 +0500
Subject: [PATCH v6] Add roman support for to_number function

---
 doc/src/sgml/func.sgml                |  13 ++-
 src/backend/utils/adt/formatting.c    | 159 +++++++++++++++++++++++++-
 src/test/regress/expected/numeric.out |  45 ++++++++
 src/test/regress/sql/numeric.sql      |  23 ++++
 4 files changed, 235 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 47370e581a..5a07e741d7 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -8670,7 +8670,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
        </row>
        <row>
         <entry><literal>RN</literal></entry>
-        <entry>Roman numeral (input between 1 and 3999)</entry>
+        <entry>Roman numeral (valid for numbers 1 to 3999)</entry>
        </row>
        <row>
         <entry><literal>TH</literal> or <literal>th</literal></entry>
@@ -8798,6 +8798,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
        (e.g., <literal>9.99EEEE</literal> is a valid pattern).
       </para>
      </listitem>
+
+    <listitem>
+      <para>
+        In <function>to_number</function>, <literal>RN</literal> pattern converts roman numerals
+        (standard form) to numbers. It is case-insensitive (e.g., <literal>'XIV'</literal>,
+        <literal>'xiv'</literal>, and <literal>'Xiv'</literal> are all seen as <literal>14</literal>).
+        <literal>RN</literal> cannot be used in combination with any other formatting patterns
+        or modifiers, with the exception of <literal>FM</literal>, which is applicable only in
+        <function>to_char()</function> and is ignored in <function>to_number()</function>.
+      </para>
+    </listitem>
     </itemizedlist>
    </para>
 
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 0dcb551511..22de73f2e6 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -49,7 +49,6 @@
  *	- better number building (formatting) / parsing, now it isn't
  *		  ideal code
  *	- use Assert()
- *	- add support for roman number to standard number conversion
  *	- add support for number spelling
  *	- add support for string to string formatting (we must be better
  *	  than Oracle :-),
@@ -271,6 +270,31 @@ static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "D
 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
 
+/* ----------
+ * MACRO: Check if the current and next characters
+ * form a valid subtraction combination for roman numerals
+ * ----------
+ */
+#define IS_VALID_SUB_COMB(curr, next) \
+	(((curr) == 'I' && ((next) == 'V' || (next) == 'X')) || \
+	 ((curr) == 'X' && ((next) == 'L' || (next) == 'C')) || \
+	 ((curr) == 'C' && ((next) == 'D' || (next) == 'M')))
+
+/* ----------
+ * MACRO: Roman number value
+ * ----------
+ */
+#define ROMAN_VAL(r) \
+	((r) == 'I' ? 1 : \
+	(r) == 'V' ? 5 : \
+	(r) == 'X' ? 10 : \
+	(r) == 'L' ? 50 : \
+	(r) == 'C' ? 100 : \
+	(r) == 'D' ? 500 : \
+	(r) == 'M' ? 1000 : 0)
+
+#define MAX_ROMAN_LEN	15
+
 /* ----------
  * Flags & Options:
  * ----------
@@ -1075,6 +1099,7 @@ static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
 static char *fill_str(char *str, int c, int max);
 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
 static char *int_to_roman(int number);
+static int roman_to_int(char* s, int len);
 static void NUM_prepare_locale(NUMProc *Np);
 static char *get_last_relevant_decnum(char *num);
 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
@@ -1285,6 +1310,15 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
 
 		case NUM_rn:
 		case NUM_RN:
+			if (IS_ROMAN(num))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("cannot use \"RN\" twice")));
+			if (IS_BLANK(num) || IS_LSIGN(num) || IS_BRACKET(num) ||
+				IS_MINUS(num) || IS_PLUS(num) || IS_MULTI(num))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("\"RN\" is incompatible with other formats")));
 			num->flag |= NUM_F_ROMAN;
 			break;
 
@@ -5351,7 +5385,114 @@ int_to_roman(int number)
 	return result;
 }
 
+/*
+ * Convert a roman numeral (standard form) to an integer.
+ * Result is an integer between 1 and 3999.
+ *
+ * If input is invalid, return -1.
+ */
+static int
+roman_to_int(char* s, int len)
+{
+	int repeatCount = 1;
+	int vCount = 0, lCount = 0, dCount = 0;
+	bool subtractionEncountered = false;
+	char lastSubtractedChar = 0;
+	int total = 0;
+
+	/*
+	 * Ensure the input is not too long or empty.
+	 * 'MMMDCCCLXXXVIII' (3888) is the longest
+	 * valid roman numeral (15 characters).
+	 */
+	if (len == 0 || len > MAX_ROMAN_LEN)
+		return -1;
+
+	for (int i = 0; i < len; ++i)
+	{
+		char currChar = pg_ascii_toupper(s[i]);
+		int currValue = ROMAN_VAL(currChar);
+
+		if (currValue == 0)
+			return -1;
+
+		/*
+		 * Ensure no character greater than or equal to the
+		 * subtracted character appears after the subtraction.
+		 */
+		if (subtractionEncountered && (currValue >= ROMAN_VAL(lastSubtractedChar)))
+			return -1;
+
+		/* Check for invalid repetitions of characters V, L, or D. */
+		if (currChar == 'V') vCount++;
+		if (currChar == 'L') lCount++;
+		if (currChar == 'D') dCount++;
+		if (vCount > 1 || lCount > 1 || dCount > 1)
+			return -1;
 
+		if (i < len - 1)
+		{
+			char nextChar = pg_ascii_toupper(s[i + 1]);
+			int nextValue = ROMAN_VAL(nextChar);
+
+			if (nextValue == 0)
+				return -1;
+
+			/*
+			 * If the current value is less than the next value,
+			 * handle subtraction. Verify valid subtractive
+			 * combinations and update the total accordingly.
+			 */
+			if (currValue < nextValue)
+			{
+				/* Check for invalid repetitions of characters V, L, or D. */
+				if (nextChar == 'V') vCount++;
+				if (nextChar == 'L') lCount++;
+				if (nextChar == 'D') dCount++;
+				if (vCount > 1 || lCount > 1 || dCount > 1)
+					return -1;
+
+				/*
+				 * For cases where same character is repeated
+				 * with subtraction (e.g. 'MCCM' or 'DCCCD').
+				 */
+				if (repeatCount > 1)
+					return -1;
+
+				if (!IS_VALID_SUB_COMB(currChar, nextChar))
+					return -1;
+
+				/*
+				 * Skip the next character as it is part of
+				 * the subtractive combination.
+				 */
+				i++;
+				repeatCount = 1;
+				subtractionEncountered = true;
+				lastSubtractedChar = currChar;
+				total += (nextValue - currValue);
+			}
+			else
+			{
+				/* For same characters, check for repetition. */
+				if (currChar == nextChar)
+				{
+					repeatCount++;
+					if (repeatCount > 3)
+						return -1;
+				}
+				else
+					repeatCount = 1;
+				total += currValue;
+			}
+		}
+		/* Add the value of the last character. */
+		else
+			total += currValue;
+	}
+
+	return total;
+}
 
 /* ----------
  * Locale
@@ -5933,9 +6074,19 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
 	if (IS_ROMAN(Np->Num))
 	{
 		if (!Np->is_to_char)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("\"RN\" not supported for input")));
+		{
+			int roman_result = roman_to_int(inout, input_len);
+			if (roman_result == -1)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("invalid roman numeral")));
+			else
+			{
+				Np->Num->pre = sprintf(number, "%d", roman_result);
+				Np->Num->post = 0;
+				return number;
+			}
+		}
 
 		Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
 			Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 0898107ec3..ac023d1a24 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -2384,6 +2384,51 @@ SELECT to_number('123456', '99999V99');
  1234.560000000000000000
 (1 row)
 
+-- Test for correct conversion between numbers and Roman numerals
+WITH rows AS
+  (SELECT i, to_char(i, 'FMRN') AS roman FROM generate_series(1, 3999) AS i)
+SELECT
+  bool_and(to_number(roman, 'RN') = i) as valid
+FROM rows;
+ valid 
+-------
+ t
+(1 row)
+
+SELECT to_number('CvIiI', 'rn');
+ to_number 
+-----------
+       108
+(1 row)
+
+SELECT to_number('viv', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('DCCCD', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('XIXL', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('MCCM', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('MMMM', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('VV', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('IL', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('CM', 'MIRN');
+ERROR:  "RN" is incompatible with other formats
+SELECT to_number('CM', 'RNRN');
+ERROR:  cannot use "RN" twice
+SELECT to_number('  XIV  ', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('MMXX  ', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('M CC', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number('', 'RN');
+ERROR:  invalid roman numeral
+SELECT to_number(' ', 'RN');
+ERROR:  invalid roman numeral
 RESET lc_numeric;
 --
 -- Input syntax
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 9da12c6b9e..379a596c31 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -1085,6 +1085,29 @@ SELECT to_number('1234.56','L99,999.99');
 SELECT to_number('1,234.56','L99,999.99');
 SELECT to_number('42nd', '99th');
 SELECT to_number('123456', '99999V99');
+
+-- Test for correct conversion between numbers and Roman numerals
+WITH rows AS
+  (SELECT i, to_char(i, 'FMRN') AS roman FROM generate_series(1, 3999) AS i)
+SELECT
+  bool_and(to_number(roman, 'RN') = i) as valid
+FROM rows;
+
+SELECT to_number('CvIiI', 'rn');
+SELECT to_number('viv', 'RN');
+SELECT to_number('DCCCD', 'RN');
+SELECT to_number('XIXL', 'RN');
+SELECT to_number('MCCM', 'RN');
+SELECT to_number('MMMM', 'RN');
+SELECT to_number('VV', 'RN');
+SELECT to_number('IL', 'RN');
+SELECT to_number('CM', 'MIRN');
+SELECT to_number('CM', 'RNRN');
+SELECT to_number('  XIV  ', 'RN');
+SELECT to_number('MMXX  ', 'RN');
+SELECT to_number('M CC', 'RN');
+SELECT to_number('', 'RN');
+SELECT to_number(' ', 'RN');
 RESET lc_numeric;
 
 --
-- 
2.34.1

Reply via email to