aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Adler <git@madler.net>2026-01-28 12:41:23 -0800
committerMark Adler <git@madler.net>2026-01-28 12:44:54 -0800
commit2f752cea32bc30143bfbc714bc43919f8ca4079c (patch)
tree4f13ff8f5953ba605abe09b570a312e73d035616
parent8236296e42776f1a2317eb596a6b9753bbe76159 (diff)
downloadzlib-2f752cea32bc30143bfbc714bc43919f8ca4079c.tar.gz
zlib-2f752cea32bc30143bfbc714bc43919f8ca4079c.tar.bz2
zlib-2f752cea32bc30143bfbc714bc43919f8ca4079c.zip
Set bit 11 of the zip header flags in minizip if UTF-8.
The bit is set if the file name contains valid UTF-8 and there is at least one code of two or more bytes. If there is a comment, then the comment has to pass the same test for bit 11 to be set.
-rw-r--r--contrib/minizip/zip.c43
1 files changed, 43 insertions, 0 deletions
diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c
index 103f325d..37cafbf8 100644
--- a/contrib/minizip/zip.c
+++ b/contrib/minizip/zip.c
@@ -1247,6 +1247,46 @@ local int Write_LocalFileHeader(zip64_internal* zi, const char* filename, uInt s
1247 return err; 1247 return err;
1248} 1248}
1249 1249
1250// Return the length of the UTF-8 code at str[0..len-1] in [1..4], or negative
1251// if there is no valid UTF-8 code there. If negative, it is minus the number
1252// of bytes examined in order to determine it was bad. Or if minus the return
1253// code is one less than len, then at least one more byte than provided would
1254// be needed to complete the code.
1255local inline int utf8len(unsigned char const *str, size_t len) {
1256 return
1257 len == 0 ? -1 : // empty input
1258 str[0] < 0x80 ? 1 : // good one-byte
1259 str[0] < 0xc0 ? -1 : // bad first byte
1260 len < 2 || (str[1] >> 6) != 2 ? -2 : // missing or bad second byte
1261 str[0] < 0xc2 ? -2 : // overlong code
1262 str[0] < 0xe0 ? 2 : // good two-byte
1263 len < 3 || (str[2] >> 6) != 2 ? -3 : // missing or bad third byte
1264 str[0] == 0xe0 && str[1] < 0xa0 ? -3 : // overlong code
1265 str[0] < 0xf0 ? 3 : // good three-byte
1266 len < 4 || (str[3] >> 6) != 2 ? -4 : // missing or bad fourth byte
1267 str[0] == 0xf0 && str[1] < 0x90 ? -4 : // overlong code
1268 str[0] < 0xf4 ||
1269 (str[0] == 0xf4 && str[1] < 0x90) ? 4 : // good four-byte
1270 -4; // code > 0x10ffff
1271}
1272
1273// Return true if str[0..len-1] is valid UTF-8 *and* it contains at least one
1274// code of two or more bytes. This is used to determine whether or not to set
1275// bit 11 in the zip header flags.
1276local int isutf8(char const *str, size_t len) {
1277 int utf8 = 0;
1278 while (len) {
1279 int code = utf8len((unsigned char const *)str, len);
1280 if (code < 0)
1281 return 0;
1282 if (code > 1)
1283 utf8 = 1;
1284 str += code;
1285 len -= code;
1286 }
1287 return utf8;
1288}
1289
1250/* 1290/*
1251 NOTE. 1291 NOTE.
1252 When writing RAW the ZIP64 extended information in extrafield_local and extrafield_global needs to be stripped 1292 When writing RAW the ZIP64 extended information in extrafield_local and extrafield_global needs to be stripped
@@ -1333,6 +1373,9 @@ extern int ZEXPORT zipOpenNewFileInZip4_64(zipFile file, const char* filename, c
1333 zi->ci.flag |= 6; 1373 zi->ci.flag |= 6;
1334 if (password != NULL) 1374 if (password != NULL)
1335 zi->ci.flag |= 1; 1375 zi->ci.flag |= 1;
1376 if (isutf8(filename, size_filename) &&
1377 (size_comment == 0 || isutf8(comment, size_comment)))
1378 zi->ci.flag |= (1 << 11);
1336 1379
1337 zi->ci.crc32 = 0; 1380 zi->ci.crc32 = 0;
1338 zi->ci.method = method; 1381 zi->ci.method = method;