Author: tmunro
Date: Sun Nov  8 02:50:34 2020
New Revision: 367476
URL: https://svnweb.freebsd.org/changeset/base/367476

Log:
  Add collation version support to querylocale(3).
  
  Provide a way to ask for an opaque version string for a locale_t, so
  that potential changes in sort order can be detected.  Similar to
  ICU's ucol_getVersion() and Windows' GetNLSVersionEx(), this API is
  intended to allow databases to detect when text order-based indexes
  might need to be rebuilt.
  
  The CLDR version is extracted from CLDR source data by the Makefile
  under tools/tools/locale, written into the machine-generated Makefile
  under shared/colldef, passed to localedef -V, and then written into
  LC_COLLATE file headers.  The initial version is 34.0.
  tools/tools/locale was recently updated to pull down 35.0, but the
  output hasn't been committed under share/colldef yet, so that will
  provide the first observable change when it happens.  Other versioning
  schemes are possible in future, because the format is unspecified.
  
  Reviewed by:  bapt, 0mp, kib, yuripv (albeit a long time ago)
  Differential Revision:        https://reviews.freebsd.org/D17166

Modified:
  head/include/xlocale/_locale.h
  head/lib/libc/locale/collate.c
  head/lib/libc/locale/collate.h
  head/lib/libc/locale/querylocale.3
  head/lib/libc/locale/xlocale.c
  head/lib/libc/locale/xlocale_private.h
  head/share/colldef/Makefile
  head/tools/tools/locale/Makefile
  head/tools/tools/locale/tools/cldr2def.pl
  head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h
  head/usr.bin/localedef/collate.c
  head/usr.bin/localedef/localedef.1
  head/usr.bin/localedef/localedef.c
  head/usr.bin/localedef/localedef.h

Modified: head/include/xlocale/_locale.h
==============================================================================
--- head/include/xlocale/_locale.h      Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/include/xlocale/_locale.h      Sun Nov  8 02:50:34 2020        
(r367476)
@@ -43,6 +43,7 @@
 #define LC_MESSAGES_MASK (1<<5)
 #define LC_ALL_MASK      (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | 
\
                          LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK)
+#define LC_VERSION_MASK  (1<<6)
 #define LC_GLOBAL_LOCALE ((locale_t)-1)
 
 #ifndef _LOCALE_T_DEFINED

Modified: head/lib/libc/locale/collate.c
==============================================================================
--- head/lib/libc/locale/collate.c      Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/lib/libc/locale/collate.c      Sun Nov  8 02:50:34 2020        
(r367476)
@@ -140,7 +140,9 @@ __collate_load_tables_l(const char *encoding, struct x
                (void) _close(fd);
                return (_LDP_ERROR);
        }
-       if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
+       if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN +
+                           XLOCALE_DEF_VERSION_LEN +
+                           sizeof (info))) {
                (void) _close(fd);
                errno = EINVAL;
                return (_LDP_ERROR);
@@ -151,12 +153,14 @@ __collate_load_tables_l(const char *encoding, struct x
                return (_LDP_ERROR);
        }
 
-       if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
+       if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) {
                (void) munmap(map, sbuf.st_size);
                errno = EINVAL;
                return (_LDP_ERROR);
        }
-       TMP += COLLATE_STR_LEN;
+       TMP += COLLATE_FMT_VERSION_LEN;
+       strlcat(table->header.version, TMP, sizeof (table->header.version));
+       TMP += XLOCALE_DEF_VERSION_LEN;
 
        info = (void *)TMP;
        TMP += sizeof (*info);

Modified: head/lib/libc/locale/collate.h
==============================================================================
--- head/lib/libc/locale/collate.h      Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/lib/libc/locale/collate.h      Sun Nov  8 02:50:34 2020        
(r367476)
@@ -53,8 +53,10 @@
 #endif
 
 #define        COLLATE_STR_LEN         24              /* should be 64-bit 
multiple */
-#define        COLLATE_VERSION         "BSD 1.0\n"
 
+#define        COLLATE_FMT_VERSION_LEN 12
+#define        COLLATE_FMT_VERSION     "BSD 1.0\n"
+
 #define        COLLATE_MAX_PRIORITY    (0x7fffffff)    /* max signed value */
 #define        COLLATE_SUBST_PRIORITY  (0x40000000)    /* bit indicates subst 
table */
 
@@ -69,7 +71,8 @@
 /*
  * The collate file format is as follows:
  *
- * char                version[COLLATE_STR_LEN];       // must be 
COLLATE_VERSION
+ * char        fmt_version[COLLATE_FMT_VERSION_LEN];   // must be 
COLLATE_FMT_VERSION
+ * char        def_version[XLOCALE_DEF_VERSION_LEN];   // NUL-terminated, may 
be empty
  * collate_info_t      info;                   // see below, includes padding
  * collate_char_pri_t  char_data[256];         // 8 bit char values
  * collate_subst_t     subst[*];               // 0 or more substitutions

Modified: head/lib/libc/locale/querylocale.3
==============================================================================
--- head/lib/libc/locale/querylocale.3  Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/lib/libc/locale/querylocale.3  Sun Nov  8 02:50:34 2020        
(r367476)
@@ -27,12 +27,12 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 3, 2013
+.Dd November 8, 2020
 .Dt QUERYLOCALE 3
 .Os
 .Sh NAME
 .Nm querylocale
-.Nd Look up the locale name for a specified category
+.Nd Look up the locale name or version for a specified category
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
@@ -40,11 +40,22 @@
 .Ft const char *
 .Fn querylocale "int mask" "locale_t locale"
 .Sh DESCRIPTION
-Returns the name of the locale for the category specified by
+Returns the name or version of the locale for the category specified by
 .Fa mask .
-This possible values for the mask are the same as those in
-.Xr newlocale 3 .
-If more than one bit in the mask is set, the returned value is undefined.
+The possible values for the mask are the same as those in
+.Xr newlocale 3 ,
+when requesting the locale name.
+Specify the bitwise OR of
+.Fa LC_VERSION_MASK
+and another mask value to request a version string.
+Version strings can be compared to detect changes to the locale's definition.
+The structure of the version string is unspecified.
+Currently, version information is only available for
+.Fa LC_COLLATE_MASK ,
+and an empty string is returned for other categories.
+If more than one bit in the mask is set, not counting
+.Fa LC_VERSION_MASK ,
+the returned value is undefined.
 .Sh SEE ALSO
 .Xr duplocale 3 ,
 .Xr freelocale 3 ,
@@ -52,3 +63,12 @@ If more than one bit in the mask is set, the returned 
 .Xr newlocale 3 ,
 .Xr uselocale 3 ,
 .Xr xlocale 3
+.Sh HISTORY
+The
+.Fn querylocale
+function first appeared in
+.Fx 9.1 ,
+and is based on the function of the same name in Darwin.
+.Fa LC_VERSION_MASK
+first appeared in
+.Fx 13.0 .

Modified: head/lib/libc/locale/xlocale.c
==============================================================================
--- head/lib/libc/locale/xlocale.c      Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/lib/libc/locale/xlocale.c      Sun Nov  8 02:50:34 2020        
(r367476)
@@ -231,6 +231,8 @@ static int dupcomponent(int type, locale_t base, local
                if (new->components[type]) {
                        strncpy(new->components[type]->locale, src->locale,
                            ENCODING_LEN);
+                       strncpy(new->components[type]->version, src->version,
+                           XLOCALE_DEF_VERSION_LEN);
                }
        } else if (base->components[type]) {
                new->components[type] = xlocale_retain(base->components[type]);
@@ -346,17 +348,24 @@ freelocale(locale_t loc)
 }
 
 /*
- * Returns the name of the locale for a particular component of a locale_t.
+ * Returns the name or version of the locale for a particular component of a
+ * locale_t.
  */
 const char *querylocale(int mask, locale_t loc)
 {
-       int type = ffs(mask) - 1;
+       int type = ffs(mask & ~LC_VERSION_MASK) - 1;
        FIX_LOCALE(loc);
        if (type >= XLC_LAST)
                return (NULL);
-       if (loc->components[type])
-               return (loc->components[type]->locale);
-       return ("C");
+       if (mask & LC_VERSION_MASK) {
+               if (loc->components[type])
+                       return (loc->components[type]->version);
+               return ("");
+       } else {
+               if (loc->components[type])
+                       return (loc->components[type]->locale);
+               return ("C");
+       }
 }
 
 /*

Modified: head/lib/libc/locale/xlocale_private.h
==============================================================================
--- head/lib/libc/locale/xlocale_private.h      Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/lib/libc/locale/xlocale_private.h      Sun Nov  8 02:50:34 2020        
(r367476)
@@ -91,6 +91,9 @@ struct xlocale_refcounted {
        /** Function used to destroy this component, if one is required*/
        void(*destructor)(void*);
 };
+
+#define XLOCALE_DEF_VERSION_LEN 12
+
 /**
  * Header for a locale component.  All locale components must begin with this
  * header.
@@ -99,6 +102,8 @@ struct xlocale_component {
        struct xlocale_refcounted header;
        /** Name of the locale used for this component. */
        char locale[ENCODING_LEN+1];
+       /** Version of the definition for this component. */
+       char version[XLOCALE_DEF_VERSION_LEN];
 };
 
 /**

Modified: head/share/colldef/Makefile
==============================================================================
--- head/share/colldef/Makefile Sun Nov  8 02:46:04 2020        (r367475)
+++ head/share/colldef/Makefile Sun Nov  8 02:50:34 2020        (r367476)
@@ -7,10 +7,13 @@ FILESNAME=    LC_COLLATE
 .SUFFIXES:     .src .LC_COLLATE
 MAPLOC=                ${.CURDIR}/../../tools/tools/locale/etc/final-maps
 
+CLDR_VERSION=  "34.0"
+
 .include <bsd.endian.mk>
 
 .src.LC_COLLATE:
        localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \
+       -V ${CLDR_VERSION} \
        -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R}
 
 LOCALES+=      af_ZA.UTF-8
@@ -227,6 +230,7 @@ FILES+=     $t.LC_COLLATE
 FILESDIR_$t.LC_COLLATE=        ${LOCALEDIR}/$t
 $t.LC_COLLATE: ${.CURDIR}/$f.src
        localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \
+       -V ${CLDR_VERSION} \
                -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \
                ${.OBJDIR}/${.TARGET:T:R}
 .endfor

Modified: head/tools/tools/locale/Makefile
==============================================================================
--- head/tools/tools/locale/Makefile    Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/tools/tools/locale/Makefile    Sun Nov  8 02:50:34 2020        
(r367476)
@@ -187,6 +187,8 @@ extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UN
        cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T}
 extract: extract-${CLDRFILES_${N}:T}
 .endfor
+       grep 'name="version"' ${UNIDIR}/tools/build.xml | \
+               sed 's/.* value="//;s/".*//' > ${UNIDIR}/cldr-version
 patch::
 .if exists(${PATCHDIR})
        cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch

Modified: head/tools/tools/locale/tools/cldr2def.pl
==============================================================================
--- head/tools/tools/locale/tools/cldr2def.pl   Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/tools/tools/locale/tools/cldr2def.pl   Sun Nov  8 02:50:34 2020        
(r367476)
@@ -50,6 +50,8 @@ my $UNIDIR = undef;
 my $ETCDIR = undef;
 my $TYPE = undef;
 
+my $CLDR_VERSION = undef;
+
 my $result = GetOptions (
                "unidir=s"      => \$UNIDIR,
                "etc=s"         => \$ETCDIR,
@@ -500,6 +502,12 @@ EOF
 
 
 sub transform_collation {
+       # Read the CLDR version
+       open(FIN, "$UNIDIR/cldr-version") or die "Cannot open cldr-version";
+       read FIN, $CLDR_VERSION, -s FIN;
+       close(FIN);
+       $CLDR_VERSION =~ s/\s*$//;
+
        foreach my $l (sort keys(%languages)) {
        foreach my $f (sort keys(%{$languages{$l}})) {
        foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
@@ -861,8 +869,11 @@ sub make_makefile {
        my $SRCOUT4 = "";
        my $MAPLOC;
        if ($TYPE eq "colldef") {
+               # In future, we might want to try to put the CLDR version into
+               # the .src files with some new syntax, instead of the makefile.
                $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " .
                        "-i \${.IMPSRC} \\\n" .
+                       "\t-V \${CLDR_VERSION} \\\n" .
                        "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
                        "\${.OBJDIR}/\${.IMPSRC:T:R}";
                $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
@@ -875,6 +886,7 @@ sub make_makefile {
                        "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
                        "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " .
                        "-i \${.ALLSRC} \\\n" .
+                       "\t-V \${CLDR_VERSION} \\\n" .
                        "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
                        "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
                        ".endfor\n\n";
@@ -916,6 +928,13 @@ FILESNAME= $FILESNAMES{$TYPE}
 .SUFFIXES:     .src .${SRCOUT2}
 ${MAPLOC}
 EOF
+
+       if ($TYPE eq "colldef") {
+               print FOUT <<EOF;
+CLDR_VERSION=  "${CLDR_VERSION}"
+
+EOF
+       }
 
        if ($TYPE eq "colldef" || $TYPE eq "ctypedef") {
                print FOUT <<EOF;

Modified: head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h
==============================================================================
--- head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h        Sun Nov 
 8 02:46:04 2020        (r367475)
+++ head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h        Sun Nov 
 8 02:50:34 2020        (r367476)
@@ -51,4 +51,7 @@ struct localedef_bootstrap_xlocale_component {
        char unused;
 };
 
+/* This must agree with the definition in xlocale_private.h. */
+#define XLOCALE_DEF_VERSION_LEN 12
+
 #endif /* _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H */

Modified: head/usr.bin/localedef/collate.c
==============================================================================
--- head/usr.bin/localedef/collate.c    Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/usr.bin/localedef/collate.c    Sun Nov  8 02:50:34 2020        
(r367476)
@@ -1119,7 +1119,8 @@ dump_collate(void)
        collelem_t              *ce;
        collchar_t              *cc;
        subst_t                 *sb;
-       char                    vers[COLLATE_STR_LEN];
+       char                    fmt_version[COLLATE_FMT_VERSION_LEN];
+       char                    def_version[XLOCALE_DEF_VERSION_LEN];
        collate_char_t          chars[UCHAR_MAX + 1];
        collate_large_t         *large;
        collate_subst_t         *subst[COLL_WEIGHTS_MAX];
@@ -1160,8 +1161,11 @@ dump_collate(void)
        }
 
        (void) memset(&chars, 0, sizeof (chars));
-       (void) memset(vers, 0, COLLATE_STR_LEN);
-       (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers));
+       (void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN);
+       (void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version));
+       (void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN);
+       if (version)
+               (void) strlcpy(def_version, version, sizeof (def_version));
 
        /*
         * We need to make sure we arrange for the UNDEFINED field
@@ -1301,7 +1305,8 @@ dump_collate(void)
        collinfo.chain_count = htote(chain_count);
        collinfo.large_count = htote(large_count);
 
-       if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) ||
+       if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) ||
+           (wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) ||
            (wr_category(&collinfo, sizeof (collinfo), f) < 0) ||
            (wr_category(&chars, sizeof (chars), f) < 0)) {
                return;

Modified: head/usr.bin/localedef/localedef.1
==============================================================================
--- head/usr.bin/localedef/localedef.1  Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/usr.bin/localedef/localedef.1  Sun Nov  8 02:50:34 2020        
(r367476)
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 18, 2018
+.Dd November 8, 2020
 .Dt LOCALEDEF 1
 .Os
 .Sh NAME
@@ -135,6 +135,14 @@ If not supplied, then default screen widths will be as
 generally not account for East Asian encodings requiring more than a single
 character cell to display, nor for combining or accent marks that occupy
 no additional screen width.
+.It Fl V Ar version
+Specifies a version string describing the version of the locale definition.
+This string can be retrieved with
+.Xr querylocale 3 ,
+and is intended to allow applications to detect locale definition changes.
+Currently it is stored only for the
+.Sy LC_COLLATE
+category.
 .El
 .Pp
 The following operands are required:
@@ -198,6 +206,7 @@ If an error is detected, no permanent output will be c
 .Xr locale 1 ,
 .Xr iconv_open 3 ,
 .Xr nl_langinfo 3 ,
+.Xr querylocale 3 ,
 .Xr strftime 3 ,
 .Xr environ 7
 .Sh WARNINGS

Modified: head/usr.bin/localedef/localedef.c
==============================================================================
--- head/usr.bin/localedef/localedef.c  Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/usr.bin/localedef/localedef.c  Sun Nov  8 02:50:34 2020        
(r367476)
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <limits.h>
 #include <locale.h>
 #include <dirent.h>
+#include "collate.h"
 #include "localedef.h"
 #include "parser.h"
 
@@ -62,6 +63,7 @@ int undefok = 0;
 int warnok = 0;
 static char *locname = NULL;
 static char locpath[PATH_MAX];
+char *version = NULL;
 
 const char *
 category_name(void)
@@ -253,6 +255,7 @@ usage(void)
        (void) fprintf(stderr, "  -u encoding : assume encoding\n");
        (void) fprintf(stderr, "  -w widths   : use screen widths file\n");
        (void) fprintf(stderr, "  -i locsrc   : source file for locale\n");
+       (void) fprintf(stderr, "  -V version  : version string for locale\n");
        exit(4);
 }
 
@@ -279,7 +282,7 @@ main(int argc, char **argv)
 
        (void) setlocale(LC_ALL, "");
 
-       while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) {
+       while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) {
                switch (c) {
                case 'D':
                        bsd = 1;
@@ -314,6 +317,9 @@ main(int argc, char **argv)
                case '?':
                        usage();
                        break;
+               case 'V':
+                       version = optarg;
+                       break;
                }
        }
 
@@ -323,6 +329,11 @@ main(int argc, char **argv)
        locname = argv[argc - 1];
        if (verbose) {
                (void) printf("Processing locale %s.\n", locname);
+       }
+
+       if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) {
+               (void) fprintf(stderr, "Version string too long.\n");
+               exit(1);
        }
 
        if (cfname) {

Modified: head/usr.bin/localedef/localedef.h
==============================================================================
--- head/usr.bin/localedef/localedef.h  Sun Nov  8 02:46:04 2020        
(r367475)
+++ head/usr.bin/localedef/localedef.h  Sun Nov  8 02:50:34 2020        
(r367476)
@@ -55,6 +55,8 @@ extern int undefok;   /* mostly ignore undefined symbols
 extern int warnok;
 extern int warnings;
 
+extern char *version;
+
 int yylex(void);
 void yyerror(const char *);
 _Noreturn void errf(const char *, ...) __printflike(1, 2);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to