/* $OpenBSD: string.c,v 1.2 2020/07/14 16:40:04 kettenis Exp $ */
/*
 * Copyright (c) 2020 Ingo Schwarze <schwarze@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Test the %c, %lc, %s, and %ls conversion specifiers with all their
 * modifiers, in particular with the minus flag, width, and maxbytes.
 * Also verify that other flags do nothing useful.
 */
#include <err.h>
#include <errno.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>

void	 tc(const char *, int, const char *);
void	 tlc(const char *, wint_t, const char *);
void	 tlc_expect_fail(const char *, wint_t);
void	 ts(const char *, const char *, const char *);
void	 tls(const char *, const wchar_t *, const char *);
void	 tls_expect_fail(const char *, const wchar_t *);

static int	 badret, badlen, badout;	/* Error counters. */
static int	 verbose;			/* For debugging. */


/*
 * Print the single-byte character c with the format fmt,
 * check that the result matches what we want,
 * and report and count the error on failure.
 */
void
tc(const char *fmt, int c, const char *want)
{
	char		 buf[32];
	size_t		 len;
	int		 irc, happy;

	happy = 1;
	irc = snprintf(buf, sizeof(buf), fmt, c);
	len = strlen(want);
	if (irc < 0) {
		warn("printf(\"%s\", %d) returned %d", fmt, c, irc);
		badret++;
		return;
	}
	if ((unsigned long long)irc != len) {
		warnx("printf(\"%s\", %d) returned %d (expected %zu)",
		    fmt, c, irc, len);
		badlen++;
		happy = 0;
	}
	if (strcmp(buf, want) != 0) {
		warnx("printf(\"%s\", %d) wrote \"%s\" (expected \"%s\")",
		    fmt, c, buf, want);
		badout++;
		happy = 0;
	}
	if (verbose && happy)
		warnx("printf(\"%s\", %d) wrote \"%s\" length %d (OK)",
		    fmt, c, buf, irc);
}

/*
 * Print the wide character wc with the format fmt,
 * check that the result matches what we want,
 * and report and count the error on failure.
 */
void
tlc(const char *fmt, wint_t wc, const char *want)
{
	char		 buf[32];
	const char	*charset;
	size_t		 len;
	int		 irc, happy;

	happy = 1;
	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
	irc = snprintf(buf, sizeof(buf), fmt, wc);
	len = strlen(want);
	if (irc < 0) {
		warn("%s printf(\"%s\", U+%.4X) returned %d",
		    charset, fmt, (unsigned int)wc, irc);
		badret++;
		return;
	}
	if ((unsigned long long)irc != len) {
		warnx("%s printf(\"%s\", U+%.4X) returned %d (expected %zu)",
		    charset, fmt, (unsigned int)wc, irc, len);
		badlen++;
		happy = 0;
	}
	if (strcmp(buf, want) != 0) {
		warnx("%s printf(\"%s\", U+%.4X) "
		    "wrote \"%s\" (expected \"%s\")",
		    charset, fmt, (unsigned int)wc, buf, want);
		badout++;
		happy = 0;
	}
	if (verbose && happy)
		warnx("%s printf(\"%s\", U+%.4X) wrote \"%s\" length %d (OK)",
		    charset, fmt, (unsigned int)wc, buf, irc);
}

/*
 * Try to print the invalid wide character wc with the format fmt,
 * check that it fails as it should, and report and count if it doesn't.
 */
void
tlc_expect_fail(const char *fmt, wint_t wc)
{
	char		 buf[32];
	const char	*charset;
	int		 irc;

	errno = 0;
	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
	irc = snprintf(buf, sizeof(buf), fmt, wc);
	if (irc != -1) {
		warn("%s printf(\"%s\", U+%.4X) returned %d",
		    charset, fmt, (unsigned int)wc, irc);
		badret++;
	} else if (errno != EILSEQ) {
		warnx("%s printf(\"%s\", U+%.4X) errno %d (expected %d)",
		    charset, fmt, (unsigned int)wc, errno, EILSEQ);
		badret++;
	} else if (verbose)
		warnx("%s printf(\"%s\", U+%.4X) returned %d errno %d (OK)",
		    charset, fmt, (unsigned int)wc, irc, errno);
}

/*
 * Print the string s with the format fmt,
 * check that the result matches what we want,
 * and report and count the error on failure.
 */
void
ts(const char *fmt, const char *s, const char *want)
{
	char		 buf[32];
	size_t		 len;
	int		 irc, happy;

	happy = 1;
	irc = snprintf(buf, sizeof(buf), fmt, s);
	len = strlen(want);
	if (irc < 0) {
		warn("printf(\"%s\", \"%s\") returned %d", fmt, s, irc);
		badret++;
		return;
	}
	if ((unsigned long long)irc != len) {
		warnx("printf(\"%s\", \"%s\") returned %d (expected %zu)",
		    fmt, s, irc, len);
		badlen++;
		happy = 0;
	}
	if (strcmp(buf, want) != 0) {
		warnx("printf(\"%s\", \"%s\") wrote \"%s\" (expected \"%s\")",
		    fmt, s, buf, want);
		badout++;
		happy = 0;
	}
	if (verbose && happy)
		warnx("printf(\"%s\", \"%s\") wrote \"%s\" length %d (OK)",
		    fmt, s, buf, irc);
}

/*
 * Print the wide character string ws with the format fmt,
 * check that the result matches what we want,
 * and report and count the error on failure.
 */
void
tls(const char *fmt, const wchar_t *ws, const char *want)
{
	char		 buf[32];
	const char	*charset;
	size_t		 len;
	int		 irc, happy;

	happy = 1;
	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
	irc = snprintf(buf, sizeof(buf), fmt, ws);
	len = strlen(want);
	if (irc < 0) {
		warn("%s printf(\"%s\", \"%ls\") returned %d",
		    charset, fmt, ws, irc);
		badret++;
		return;
	}
	if ((unsigned long long)irc != len) {
		warnx("%s printf(\"%s\", \"%ls\") returned %d (expected %zu)",
		    charset, fmt, ws, irc, len);
		badlen++;
		happy = 0;
	}
	if (strcmp(buf, want) != 0) {
		warnx("%s printf(\"%s\", \"%ls\") "
		    "wrote \"%s\" (expected \"%s\")",
		    charset, fmt, ws, buf, want);
		badout++;
		happy = 0;
	}
	if (verbose && happy)
		warnx("%s printf(\"%s\", \"%ls\") wrote \"%s\" length %d (OK)",
		    charset, fmt, ws, buf, irc);
}

/*
 * Try to print the invalid wide character string ws with the format fmt,
 * check that it fails as it should, and report and count if it doesn't.
 */
void
tls_expect_fail(const char *fmt, const wchar_t *ws)
{
	char		 buf[32];
	const char	*charset;
	int		 irc;

	errno = 0;
	charset = MB_CUR_MAX > 1 ? "UTF-8" : "ASCII";
	irc = snprintf(buf, sizeof(buf), fmt, ws);
	if (irc != -1) {
		warn("%s printf(\"%s\", U+%.4X, ...) returned %d",
		    charset, fmt, (unsigned int)*ws, irc);
		badret++;
	} else if (errno != EILSEQ) {
		warnx("%s printf(\"%s\", U+%.4X, ...) errno %d (expected %d)",
		    charset, fmt, (unsigned int)*ws, errno, EILSEQ);
		badret++;
	} else if (verbose)
		warnx("%s printf(\"%s\", U+%.4X, ...) "
		    "returned %d errno %d (OK)",
		    charset, fmt, (unsigned int)*ws, irc, errno);
}

int
main(int argc, char *argv[])
{
	const wchar_t	 ws[] = { 0x0421, 0x043e, 0x0444, 0x044f, 0 };
	const wchar_t	 wsbad[] = { 0x0391, 0xdeef, 0x3c9, 0 };
	int		 badarg, picky;
	int		 ch;

	badarg = picky = 0;
	while ((ch = getopt(argc, argv, "pv")) != -1) {
		switch (ch) {
		case 'p':
			picky = 1;
			break;
		case 'v':
			verbose = 1;
			break;
		default:
			badarg = 1;
			break;
		}
	}
	argc -= optind;
	argv += optind;
	if (argc > 0) {
		warnx("unexpected argument \"%s\"", *argv);
		badarg = 1;
	}
	if (badarg) {
		fputs("usage: string [-pv]\n", stderr);
		return 1;
	}

	/*
	 * Valid use cases of %c and %s.
	 */

	tc("<%c>", '=', "<=>");
	tc("<%c>", '\t', "<\t>");
	tc("<%c>", 0xfe, "<\xfe>");
	tc("<%-c>", '=', "<=>");
	tc("<%2c>", '=', "< =>");
	tc("<%-2c>", '=', "<= >");

	ts("<%s>", "text", "<text>");
	ts("<%-s>", "text", "<text>");
	ts("<%6s>", "text", "<  text>");
	ts("<%-6s>", "text", "<text  >");
	ts("<%.2s>", "text", "<te>");
	ts("<%4.2s>", "text", "<  te>");
	ts("<%-4.2s>", "text", "<te  >");

	/*
	 * Undefined behaviour of %c and %s.
	 * Do not test by default to avoid noise.
	 * But provide the tests anyway to help track down
	 * unintended changes of behaviour when needed.
	 */

	if (picky) {
		tc("<%#c>", '=', "<=>");
		tc("<% -3c>", '=', "<=  >");
		tc("<%+-3c>", '=', "<=  >");
		tc("<%03c>", '=', "<00=>");
		tc("<%-03c>", '=', "<=  >");
		tc("<%3.2c>", '=', "<  =>");
		tc("<%hc>", '=', "<=>");

		ts("<%#s>", "text", "<text>");
		ts("<% -6s>", "text", "<text  >");
		ts("<%+-6s>", "text", "<text  >");
		ts("<%06s>", "text", "<00text>");
		ts("<%-06s>", "text", "<text  >");
		ts("<%hs>", "text", "<text>");
	}

	/*
	 * Valid use cases of %lc and %ls in the POSIX locale.
	 */

	tlc("<%lc>", L'=', "<=>");
	tlc("<%lc>", L'\t', "<\t>");
	tlc_expect_fail("<%lc>", 0x03c0);
	tlc("<%-lc>", L'=', "<=>");
	tlc("<%2lc>", L'=', "< =>");
	tlc("<%-2lc>", L'=', "<= >");

	tls("<%ls>", L"text", "<text>");
	tls_expect_fail("<%ls>", ws);
	tls_expect_fail("<%ls>", wsbad);
	tls("<%-ls>", L"text", "<text>");
	tls("<%6ls>", L"text", "<  text>");
	tls("<%-6ls>", L"text", "<text  >");
	tls("<%.2ls>", L"text", "<te>");
	tls("<%4.2ls>", L"text", "<  te>");
	tls("<%-4.2ls>", L"text", "<te  >");

	/*
	 * Undefined behaviour of %lc and %ls in the POSIX locale.
	 */

	if (picky) {
		tlc("<%lc>", 0x00fe, "<\xfe>");
		tlc("<%#lc>", L'=', "<=>");
		tlc("<% -3lc>", L'=', "<=  >");
		tlc("<%+-3lc>", L'=', "<=  >");
		tlc("<%03lc>", L'=', "<00=>");
		tlc("<%-03lc>", L'=', "<=  >");
		tlc("<%3.2lc>", L'=', "<  =>");
		tc("<%llc>", '=', "<=>");

		tls("<%#ls>", L"text", "<text>");
		tls("<% -6ls>", L"text", "<text  >");
		tls("<%+-6ls>", L"text", "<text  >");
		tls("<%06ls>", L"text", "<00text>");
		tls("<%-06ls>", L"text", "<text  >");
		ts("<%lls>", "text", "<text>");
	}

	/*
	 * Valid use cases of %lc and %ls in a UTF-8 locale.
	 */

	if (setlocale(LC_CTYPE, "C.UTF-8") == NULL)
		err(1, "setlocale");

	tlc("<%lc>", L'=', "<=>");
	tlc("<%lc>", L'\t', "<\t>");
	tlc("<%lc>", 0x00fe, "<\xc3\xbe>");
	tlc("<%lc>", 0x03c0, "<\xcf\x80>");
	tlc_expect_fail("<%lc>", 0x123456);
	tlc("<%-lc>", L'=', "<=>");
	tlc("<%-lc>", 0x03c0, "<\xcf\x80>");
	tlc("<%2lc>", L'=', "< =>");
	tlc("<%3lc>", 0x03c0, "< \xcf\x80>");
	tlc("<%-2lc>", L'=', "<= >");
	tlc("<%-3lc>", 0x03c0, "<\xcf\x80 >");

	tls("<%ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>");
	tls_expect_fail("<%ls>", wsbad);
	tls("<%-ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>");
	tls("<%9ls>", ws, "< \xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f>");
	tls("<%-9ls>", ws, "<\xd0\xa1\xd0\xbe\xd1\x84\xd1\x8f >");
	tls("<%.4ls>", ws, "<\xd0\xa1\xd0\xbe>");
	tls("<%.3ls>", ws, "<\xd0\xa1>");
	tls("<%6.4ls>", ws, "<  \xd0\xa1\xd0\xbe>");
	tls("<%3.3ls>", ws, "< \xd0\xa1>");
	tls("<%-6.4ls>", ws, "<\xd0\xa1\xd0\xbe  >");
	tls("<%-3.3ls>", ws, "<\xd0\xa1 >");

	/*
	 * Undefined behaviour of %lc and %ls in a UTF-8 locale.
	 */

	if (picky) {
		tlc("<%#lc>", 0x03c0, "<\xcf\x80>");
		tlc("<% -4lc>", 0x03c0, "<\xcf\x80  >");
		tlc("<%+-4lc>", 0x03c0, "<\xcf\x80  >");
		tlc("<%04lc>", 0x03c0, "<00\xcf\x80>");
		tlc("<%-04lc>", 0x03c0, "<\xcf\x80  >");
		tlc("<%4.5lc>", 0x03c0, "<  \xcf\x80>");
		tlc("<%4.3lc>", 0x03c0, "<  \xcf\x80>");
		tlc("<%4.1lc>", 0x03c0, "<  \xcf\x80>");
		tc("<%llc>", 0xfe, "<\xfe>");

		tls("<%#ls>", ws + 2, "<\xd1\x84\xd1\x8f>");
		tls("<% -6ls>", ws + 2, "<\xd1\x84\xd1\x8f  >");
		tls("<%+-6ls>", ws + 2, "<\xd1\x84\xd1\x8f  >");
		tls("<%06ls>", ws + 2, "<00\xd1\x84\xd1\x8f>");
		tls("<%-06ls>", ws + 2, "<\xd1\x84\xd1\x8f  >");
		ts("<%lls>", "text", "<text>");
	}

	/*
	 * Summarize the results.
	 */

	if (badret + badlen + badout)
		errx(1, "ERRORS: %d fail + %d mismatch (incl. %d bad length)",
		    badret, badout, badlen);
	else if (verbose)
		warnx("SUCCESS");
	return 0;
}