With this patch unzip will select OEM CP for zip files created on
Windows based on system locale (just as Windows itself does). Used
locale to OEM CP matching from Wine. OEM CP selection can be overrided
by OEMCP environment variable.
Patch proposal in original infozip repo:
https://sourceforge.net/p/infozip/patches/29/
@@ -1958,13 +1958,16 @@
#ifdef USE_ICONV_MAPPING
+/*
typedef struct {
char *local_charset;
char *archive_charset;
} CHARSET_MAP;
+*/
/* A mapping of local <-> archive charsets used by default to convert filenames
* of DOS/Windows Zip archives. Currently very basic. */
+/*
static CHARSET_MAP dos_charset_map[] = {
{ "ANSI_X3.4-1968", "CP850" },
{ "ISO-8859-1", "CP850" },
@@ -1974,18 +1977,79 @@
{ "KOI8-U", "CP866" },
{ "ISO-8859-5", "CP866" }
};
+*/
char OEM_CP[MAX_CP_NAME] = "";
char ISO_CP[MAX_CP_NAME] = "";
+char *lc_to_oem_cp(char *lc) {
+ static char *lc_to_cp_table[] = {
+ "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720",
+ "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720",
+ "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720",
+ "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720",
+ "ar_YE", "CP720","ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857",
+ "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850",
+ "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852",
+ "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850",
+ "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737",
+ "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850",
+ "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437",
+ "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850",
+ "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850",
+ "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850",
+ "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850",
+ "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850",
+ "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850",
+ "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850",
+ "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850",
+ "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437",
+ "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862",
+ "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850",
+ "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932",
+ "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775",
+ "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850",
+ "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850",
+ "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850",
+ "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866",
+ "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855",
+ "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437",
+ "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866",
+ "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258",
+ "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"};
+ int table_len = sizeof(lc_to_cp_table) / sizeof(char *);
+ int lc_len, i;
+
+ if (lc && lc[0]) {
+ // Compare up to the dot, if it exists, e.g. en_US.UTF-8
+ for (lc_len = 0; lc[lc_len] != '.' && lc[lc_len] != '\0'; ++lc_len)
+ ;
+ for (i = 0; i < table_len; i += 2)
+ if (strncmp(lc, lc_to_cp_table[i], lc_len) == 0)
+ return lc_to_cp_table[i + 1];
+ }
+
+ return "CP437";
+}
+
/* Try to guess the default value of OEM_CP based on the current locale.
* ISO_CP is left alone for now. */
void init_conversion_charsets()
{
+ char *oemcp;
+ oemcp = getenv("OEMCP");
+ if (!oemcp) {
+ oemcp = lc_to_oem_cp(setlocale(LC_CTYPE, ""));
+ }
+ strncpy(OEM_CP, oemcp, strlen(oemcp));
+
+ /*
const char *local_charset;
int i;
+ */
/* Make a guess only if OEM_CP not already set. */
+ /*
if(*OEM_CP == '\0') {
local_charset = nl_langinfo(CODESET);
for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++)
@@ -1995,6 +2059,7 @@
break;
}
}
+ */
}
/* Convert a string from one encoding to the current locale using iconv().