Author: tmunro Date: Sun Nov 8 02:50:34 2020 New Revision: 367476 URL: https://svnweb.freebsd.org/changeset/base/367476
Log: Add collation version support to querylocale(3). Provide a way to ask for an opaque version string for a locale_t, so that potential changes in sort order can be detected. Similar to ICU's ucol_getVersion() and Windows' GetNLSVersionEx(), this API is intended to allow databases to detect when text order-based indexes might need to be rebuilt. The CLDR version is extracted from CLDR source data by the Makefile under tools/tools/locale, written into the machine-generated Makefile under shared/colldef, passed to localedef -V, and then written into LC_COLLATE file headers. The initial version is 34.0. tools/tools/locale was recently updated to pull down 35.0, but the output hasn't been committed under share/colldef yet, so that will provide the first observable change when it happens. Other versioning schemes are possible in future, because the format is unspecified. Reviewed by: bapt, 0mp, kib, yuripv (albeit a long time ago) Differential Revision: https://reviews.freebsd.org/D17166 Modified: head/include/xlocale/_locale.h head/lib/libc/locale/collate.c head/lib/libc/locale/collate.h head/lib/libc/locale/querylocale.3 head/lib/libc/locale/xlocale.c head/lib/libc/locale/xlocale_private.h head/share/colldef/Makefile head/tools/tools/locale/Makefile head/tools/tools/locale/tools/cldr2def.pl head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h head/usr.bin/localedef/collate.c head/usr.bin/localedef/localedef.1 head/usr.bin/localedef/localedef.c head/usr.bin/localedef/localedef.h Modified: head/include/xlocale/_locale.h ============================================================================== --- head/include/xlocale/_locale.h Sun Nov 8 02:46:04 2020 (r367475) +++ head/include/xlocale/_locale.h Sun Nov 8 02:50:34 2020 (r367476) @@ -43,6 +43,7 @@ #define LC_MESSAGES_MASK (1<<5) #define LC_ALL_MASK (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | \ LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK) +#define LC_VERSION_MASK (1<<6) #define LC_GLOBAL_LOCALE ((locale_t)-1) #ifndef _LOCALE_T_DEFINED Modified: head/lib/libc/locale/collate.c ============================================================================== --- head/lib/libc/locale/collate.c Sun Nov 8 02:46:04 2020 (r367475) +++ head/lib/libc/locale/collate.c Sun Nov 8 02:50:34 2020 (r367476) @@ -140,7 +140,9 @@ __collate_load_tables_l(const char *encoding, struct x (void) _close(fd); return (_LDP_ERROR); } - if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { + if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN + + XLOCALE_DEF_VERSION_LEN + + sizeof (info))) { (void) _close(fd); errno = EINVAL; return (_LDP_ERROR); @@ -151,12 +153,14 @@ __collate_load_tables_l(const char *encoding, struct x return (_LDP_ERROR); } - if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { + if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } - TMP += COLLATE_STR_LEN; + TMP += COLLATE_FMT_VERSION_LEN; + strlcat(table->header.version, TMP, sizeof (table->header.version)); + TMP += XLOCALE_DEF_VERSION_LEN; info = (void *)TMP; TMP += sizeof (*info); Modified: head/lib/libc/locale/collate.h ============================================================================== --- head/lib/libc/locale/collate.h Sun Nov 8 02:46:04 2020 (r367475) +++ head/lib/libc/locale/collate.h Sun Nov 8 02:50:34 2020 (r367476) @@ -53,8 +53,10 @@ #endif #define COLLATE_STR_LEN 24 /* should be 64-bit multiple */ -#define COLLATE_VERSION "BSD 1.0\n" +#define COLLATE_FMT_VERSION_LEN 12 +#define COLLATE_FMT_VERSION "BSD 1.0\n" + #define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */ #define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */ @@ -69,7 +71,8 @@ /* * The collate file format is as follows: * - * char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION + * char fmt_version[COLLATE_FMT_VERSION_LEN]; // must be COLLATE_FMT_VERSION + * char def_version[XLOCALE_DEF_VERSION_LEN]; // NUL-terminated, may be empty * collate_info_t info; // see below, includes padding * collate_char_pri_t char_data[256]; // 8 bit char values * collate_subst_t subst[*]; // 0 or more substitutions Modified: head/lib/libc/locale/querylocale.3 ============================================================================== --- head/lib/libc/locale/querylocale.3 Sun Nov 8 02:46:04 2020 (r367475) +++ head/lib/libc/locale/querylocale.3 Sun Nov 8 02:50:34 2020 (r367476) @@ -27,12 +27,12 @@ .\" .\" $FreeBSD$ .\" -.Dd May 3, 2013 +.Dd November 8, 2020 .Dt QUERYLOCALE 3 .Os .Sh NAME .Nm querylocale -.Nd Look up the locale name for a specified category +.Nd Look up the locale name or version for a specified category .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -40,11 +40,22 @@ .Ft const char * .Fn querylocale "int mask" "locale_t locale" .Sh DESCRIPTION -Returns the name of the locale for the category specified by +Returns the name or version of the locale for the category specified by .Fa mask . -This possible values for the mask are the same as those in -.Xr newlocale 3 . -If more than one bit in the mask is set, the returned value is undefined. +The possible values for the mask are the same as those in +.Xr newlocale 3 , +when requesting the locale name. +Specify the bitwise OR of +.Fa LC_VERSION_MASK +and another mask value to request a version string. +Version strings can be compared to detect changes to the locale's definition. +The structure of the version string is unspecified. +Currently, version information is only available for +.Fa LC_COLLATE_MASK , +and an empty string is returned for other categories. +If more than one bit in the mask is set, not counting +.Fa LC_VERSION_MASK , +the returned value is undefined. .Sh SEE ALSO .Xr duplocale 3 , .Xr freelocale 3 , @@ -52,3 +63,12 @@ If more than one bit in the mask is set, the returned .Xr newlocale 3 , .Xr uselocale 3 , .Xr xlocale 3 +.Sh HISTORY +The +.Fn querylocale +function first appeared in +.Fx 9.1 , +and is based on the function of the same name in Darwin. +.Fa LC_VERSION_MASK +first appeared in +.Fx 13.0 . Modified: head/lib/libc/locale/xlocale.c ============================================================================== --- head/lib/libc/locale/xlocale.c Sun Nov 8 02:46:04 2020 (r367475) +++ head/lib/libc/locale/xlocale.c Sun Nov 8 02:50:34 2020 (r367476) @@ -231,6 +231,8 @@ static int dupcomponent(int type, locale_t base, local if (new->components[type]) { strncpy(new->components[type]->locale, src->locale, ENCODING_LEN); + strncpy(new->components[type]->version, src->version, + XLOCALE_DEF_VERSION_LEN); } } else if (base->components[type]) { new->components[type] = xlocale_retain(base->components[type]); @@ -346,17 +348,24 @@ freelocale(locale_t loc) } /* - * Returns the name of the locale for a particular component of a locale_t. + * Returns the name or version of the locale for a particular component of a + * locale_t. */ const char *querylocale(int mask, locale_t loc) { - int type = ffs(mask) - 1; + int type = ffs(mask & ~LC_VERSION_MASK) - 1; FIX_LOCALE(loc); if (type >= XLC_LAST) return (NULL); - if (loc->components[type]) - return (loc->components[type]->locale); - return ("C"); + if (mask & LC_VERSION_MASK) { + if (loc->components[type]) + return (loc->components[type]->version); + return (""); + } else { + if (loc->components[type]) + return (loc->components[type]->locale); + return ("C"); + } } /* Modified: head/lib/libc/locale/xlocale_private.h ============================================================================== --- head/lib/libc/locale/xlocale_private.h Sun Nov 8 02:46:04 2020 (r367475) +++ head/lib/libc/locale/xlocale_private.h Sun Nov 8 02:50:34 2020 (r367476) @@ -91,6 +91,9 @@ struct xlocale_refcounted { /** Function used to destroy this component, if one is required*/ void(*destructor)(void*); }; + +#define XLOCALE_DEF_VERSION_LEN 12 + /** * Header for a locale component. All locale components must begin with this * header. @@ -99,6 +102,8 @@ struct xlocale_component { struct xlocale_refcounted header; /** Name of the locale used for this component. */ char locale[ENCODING_LEN+1]; + /** Version of the definition for this component. */ + char version[XLOCALE_DEF_VERSION_LEN]; }; /** Modified: head/share/colldef/Makefile ============================================================================== --- head/share/colldef/Makefile Sun Nov 8 02:46:04 2020 (r367475) +++ head/share/colldef/Makefile Sun Nov 8 02:50:34 2020 (r367476) @@ -7,10 +7,13 @@ FILESNAME= LC_COLLATE .SUFFIXES: .src .LC_COLLATE MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps +CLDR_VERSION= "34.0" + .include <bsd.endian.mk> .src.LC_COLLATE: localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \ + -V ${CLDR_VERSION} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R} LOCALES+= af_ZA.UTF-8 @@ -227,6 +230,7 @@ FILES+= $t.LC_COLLATE FILESDIR_$t.LC_COLLATE= ${LOCALEDIR}/$t $t.LC_COLLATE: ${.CURDIR}/$f.src localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \ + -V ${CLDR_VERSION} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \ ${.OBJDIR}/${.TARGET:T:R} .endfor Modified: head/tools/tools/locale/Makefile ============================================================================== --- head/tools/tools/locale/Makefile Sun Nov 8 02:46:04 2020 (r367475) +++ head/tools/tools/locale/Makefile Sun Nov 8 02:50:34 2020 (r367476) @@ -187,6 +187,8 @@ extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UN cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T} extract: extract-${CLDRFILES_${N}:T} .endfor + grep 'name="version"' ${UNIDIR}/tools/build.xml | \ + sed 's/.* value="//;s/".*//' > ${UNIDIR}/cldr-version patch:: .if exists(${PATCHDIR}) cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch Modified: head/tools/tools/locale/tools/cldr2def.pl ============================================================================== --- head/tools/tools/locale/tools/cldr2def.pl Sun Nov 8 02:46:04 2020 (r367475) +++ head/tools/tools/locale/tools/cldr2def.pl Sun Nov 8 02:50:34 2020 (r367476) @@ -50,6 +50,8 @@ my $UNIDIR = undef; my $ETCDIR = undef; my $TYPE = undef; +my $CLDR_VERSION = undef; + my $result = GetOptions ( "unidir=s" => \$UNIDIR, "etc=s" => \$ETCDIR, @@ -500,6 +502,12 @@ EOF sub transform_collation { + # Read the CLDR version + open(FIN, "$UNIDIR/cldr-version") or die "Cannot open cldr-version"; + read FIN, $CLDR_VERSION, -s FIN; + close(FIN); + $CLDR_VERSION =~ s/\s*$//; + foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { @@ -861,8 +869,11 @@ sub make_makefile { my $SRCOUT4 = ""; my $MAPLOC; if ($TYPE eq "colldef") { + # In future, we might want to try to put the CLDR version into + # the .src files with some new syntax, instead of the makefile. $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.IMPSRC} \\\n" . + "\t-V \${CLDR_VERSION} \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . "\${.OBJDIR}/\${.IMPSRC:T:R}"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . @@ -875,6 +886,7 @@ sub make_makefile { "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.ALLSRC} \\\n" . + "\t-V \${CLDR_VERSION} \\\n" . "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . ".endfor\n\n"; @@ -916,6 +928,13 @@ FILESNAME= $FILESNAMES{$TYPE} .SUFFIXES: .src .${SRCOUT2} ${MAPLOC} EOF + + if ($TYPE eq "colldef") { + print FOUT <<EOF; +CLDR_VERSION= "${CLDR_VERSION}" + +EOF + } if ($TYPE eq "colldef" || $TYPE eq "ctypedef") { print FOUT <<EOF; Modified: head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h ============================================================================== --- head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h Sun Nov 8 02:46:04 2020 (r367475) +++ head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h Sun Nov 8 02:50:34 2020 (r367476) @@ -51,4 +51,7 @@ struct localedef_bootstrap_xlocale_component { char unused; }; +/* This must agree with the definition in xlocale_private.h. */ +#define XLOCALE_DEF_VERSION_LEN 12 + #endif /* _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H */ Modified: head/usr.bin/localedef/collate.c ============================================================================== --- head/usr.bin/localedef/collate.c Sun Nov 8 02:46:04 2020 (r367475) +++ head/usr.bin/localedef/collate.c Sun Nov 8 02:50:34 2020 (r367476) @@ -1119,7 +1119,8 @@ dump_collate(void) collelem_t *ce; collchar_t *cc; subst_t *sb; - char vers[COLLATE_STR_LEN]; + char fmt_version[COLLATE_FMT_VERSION_LEN]; + char def_version[XLOCALE_DEF_VERSION_LEN]; collate_char_t chars[UCHAR_MAX + 1]; collate_large_t *large; collate_subst_t *subst[COLL_WEIGHTS_MAX]; @@ -1160,8 +1161,11 @@ dump_collate(void) } (void) memset(&chars, 0, sizeof (chars)); - (void) memset(vers, 0, COLLATE_STR_LEN); - (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers)); + (void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN); + (void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version)); + (void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN); + if (version) + (void) strlcpy(def_version, version, sizeof (def_version)); /* * We need to make sure we arrange for the UNDEFINED field @@ -1301,7 +1305,8 @@ dump_collate(void) collinfo.chain_count = htote(chain_count); collinfo.large_count = htote(large_count); - if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) || + if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) || + (wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) || (wr_category(&collinfo, sizeof (collinfo), f) < 0) || (wr_category(&chars, sizeof (chars), f) < 0)) { return; Modified: head/usr.bin/localedef/localedef.1 ============================================================================== --- head/usr.bin/localedef/localedef.1 Sun Nov 8 02:46:04 2020 (r367475) +++ head/usr.bin/localedef/localedef.1 Sun Nov 8 02:50:34 2020 (r367476) @@ -33,7 +33,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 18, 2018 +.Dd November 8, 2020 .Dt LOCALEDEF 1 .Os .Sh NAME @@ -135,6 +135,14 @@ If not supplied, then default screen widths will be as generally not account for East Asian encodings requiring more than a single character cell to display, nor for combining or accent marks that occupy no additional screen width. +.It Fl V Ar version +Specifies a version string describing the version of the locale definition. +This string can be retrieved with +.Xr querylocale 3 , +and is intended to allow applications to detect locale definition changes. +Currently it is stored only for the +.Sy LC_COLLATE +category. .El .Pp The following operands are required: @@ -198,6 +206,7 @@ If an error is detected, no permanent output will be c .Xr locale 1 , .Xr iconv_open 3 , .Xr nl_langinfo 3 , +.Xr querylocale 3 , .Xr strftime 3 , .Xr environ 7 .Sh WARNINGS Modified: head/usr.bin/localedef/localedef.c ============================================================================== --- head/usr.bin/localedef/localedef.c Sun Nov 8 02:46:04 2020 (r367475) +++ head/usr.bin/localedef/localedef.c Sun Nov 8 02:50:34 2020 (r367476) @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include <limits.h> #include <locale.h> #include <dirent.h> +#include "collate.h" #include "localedef.h" #include "parser.h" @@ -62,6 +63,7 @@ int undefok = 0; int warnok = 0; static char *locname = NULL; static char locpath[PATH_MAX]; +char *version = NULL; const char * category_name(void) @@ -253,6 +255,7 @@ usage(void) (void) fprintf(stderr, " -u encoding : assume encoding\n"); (void) fprintf(stderr, " -w widths : use screen widths file\n"); (void) fprintf(stderr, " -i locsrc : source file for locale\n"); + (void) fprintf(stderr, " -V version : version string for locale\n"); exit(4); } @@ -279,7 +282,7 @@ main(int argc, char **argv) (void) setlocale(LC_ALL, ""); - while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) { + while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) { switch (c) { case 'D': bsd = 1; @@ -314,6 +317,9 @@ main(int argc, char **argv) case '?': usage(); break; + case 'V': + version = optarg; + break; } } @@ -323,6 +329,11 @@ main(int argc, char **argv) locname = argv[argc - 1]; if (verbose) { (void) printf("Processing locale %s.\n", locname); + } + + if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) { + (void) fprintf(stderr, "Version string too long.\n"); + exit(1); } if (cfname) { Modified: head/usr.bin/localedef/localedef.h ============================================================================== --- head/usr.bin/localedef/localedef.h Sun Nov 8 02:46:04 2020 (r367475) +++ head/usr.bin/localedef/localedef.h Sun Nov 8 02:50:34 2020 (r367476) @@ -55,6 +55,8 @@ extern int undefok; /* mostly ignore undefined symbols extern int warnok; extern int warnings; +extern char *version; + int yylex(void); void yyerror(const char *); _Noreturn void errf(const char *, ...) __printflike(1, 2); _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"