From 2f752cea32bc30143bfbc714bc43919f8ca4079c Mon Sep 17 00:00:00 2001 From: Mark Adler Date: Wed, 28 Jan 2026 12:41:23 -0800 Subject: Set bit 11 of the zip header flags in minizip if UTF-8. The bit is set if the file name contains valid UTF-8 and there is at least one code of two or more bytes. If there is a comment, then the comment has to pass the same test for bit 11 to be set. --- contrib/minizip/zip.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c index 103f325d..37cafbf8 100644 --- a/contrib/minizip/zip.c +++ b/contrib/minizip/zip.c @@ -1247,6 +1247,46 @@ local int Write_LocalFileHeader(zip64_internal* zi, const char* filename, uInt s return err; } +// Return the length of the UTF-8 code at str[0..len-1] in [1..4], or negative +// if there is no valid UTF-8 code there. If negative, it is minus the number +// of bytes examined in order to determine it was bad. Or if minus the return +// code is one less than len, then at least one more byte than provided would +// be needed to complete the code. +local inline int utf8len(unsigned char const *str, size_t len) { + return + len == 0 ? -1 : // empty input + str[0] < 0x80 ? 1 : // good one-byte + str[0] < 0xc0 ? -1 : // bad first byte + len < 2 || (str[1] >> 6) != 2 ? -2 : // missing or bad second byte + str[0] < 0xc2 ? -2 : // overlong code + str[0] < 0xe0 ? 2 : // good two-byte + len < 3 || (str[2] >> 6) != 2 ? -3 : // missing or bad third byte + str[0] == 0xe0 && str[1] < 0xa0 ? -3 : // overlong code + str[0] < 0xf0 ? 3 : // good three-byte + len < 4 || (str[3] >> 6) != 2 ? -4 : // missing or bad fourth byte + str[0] == 0xf0 && str[1] < 0x90 ? -4 : // overlong code + str[0] < 0xf4 || + (str[0] == 0xf4 && str[1] < 0x90) ? 4 : // good four-byte + -4; // code > 0x10ffff +} + +// Return true if str[0..len-1] is valid UTF-8 *and* it contains at least one +// code of two or more bytes. This is used to determine whether or not to set +// bit 11 in the zip header flags. +local int isutf8(char const *str, size_t len) { + int utf8 = 0; + while (len) { + int code = utf8len((unsigned char const *)str, len); + if (code < 0) + return 0; + if (code > 1) + utf8 = 1; + str += code; + len -= code; + } + return utf8; +} + /* NOTE. When writing RAW the ZIP64 extended information in extrafield_local and extrafield_global needs to be stripped @@ -1333,6 +1373,9 @@ extern int ZEXPORT zipOpenNewFileInZip4_64(zipFile file, const char* filename, c zi->ci.flag |= 6; if (password != NULL) zi->ci.flag |= 1; + if (isutf8(filename, size_filename) && + (size_comment == 0 || isutf8(comment, size_comment))) + zi->ci.flag |= (1 << 11); zi->ci.crc32 = 0; zi->ci.method = method; -- cgit v1.2.3-55-g6feb