aboutsummaryrefslogtreecommitdiff
path: root/libbb/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r--libbb/unicode.c38
1 files changed, 29 insertions, 9 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 99dc1dfa6..9c4da50d3 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -28,19 +28,37 @@ void FAST_FUNC reinit_unicode(const char *LANG)
28 static const char unicode_0x394[] = { 0xce, 0x94, 0 }; 28 static const char unicode_0x394[] = { 0xce, 0x94, 0 };
29 size_t width; 29 size_t width;
30 30
31 /* We pass "" instead of "C" because some libc's have
32 * non-ASCII default locale for setlocale("") call
33 * (this allows users of such libc to have Unicoded
34 * system without having to mess with env).
35 *
36 * We set LC_CTYPE because (a) we may be called with $LC_CTYPE
37 * value in LANG, not with $LC_ALL, (b) internationalized
38 * LC_NUMERIC and LC_TIME are more PITA than benefit
39 * (for one, some utilities have hard time with comma
40 * used as a fractional separator).
41 */
31//TODO: avoid repeated calls by caching last string? 42//TODO: avoid repeated calls by caching last string?
32 setlocale(LC_ALL, (LANG && LANG[0]) ? LANG : "C"); 43 setlocale(LC_CTYPE, LANG ? LANG : "");
33 44
34 /* In unicode, this is a one character string */ 45 /* In unicode, this is a one character string */
35// can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused 46 width = unicode_strlen(unicode_0x394);
36 width = mbstowcs(NULL, unicode_0x394, INT_MAX);
37 unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF); 47 unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF);
38} 48}
39 49
40void FAST_FUNC init_unicode(void) 50void FAST_FUNC init_unicode(void)
41{ 51{
42 if (unicode_status == UNICODE_UNKNOWN) 52 /* Some people set only $LC_CTYPE, not $LC_ALL, because they want
43 reinit_unicode(getenv("LANG")); 53 * only Unicode to be activated on their system, not the whole
54 * shebang of wrong decimal points, strange date formats and so on.
55 */
56 if (unicode_status == UNICODE_UNKNOWN) {
57 char *s = getenv("LC_ALL");
58 if (!s) s = getenv("LC_CTYPE");
59 if (!s) s = getenv("LANG");
60 reinit_unicode(s);
61 }
44} 62}
45 63
46#else 64#else
@@ -58,8 +76,12 @@ void FAST_FUNC reinit_unicode(const char *LANG)
58 76
59void FAST_FUNC init_unicode(void) 77void FAST_FUNC init_unicode(void)
60{ 78{
61 if (unicode_status == UNICODE_UNKNOWN) 79 if (unicode_status == UNICODE_UNKNOWN) {
62 reinit_unicode(getenv("LANG")); 80 char *s = getenv("LC_ALL");
81 if (!s) s = getenv("LC_CTYPE");
82 if (!s) s = getenv("LANG");
83 reinit_unicode(s);
84 }
63} 85}
64# endif 86# endif
65 87
@@ -963,7 +985,6 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc)
963 985
964/* The rest is mostly same for libc and for "homegrown" support */ 986/* The rest is mostly same for libc and for "homegrown" support */
965 987
966#if 0 // UNUSED
967size_t FAST_FUNC unicode_strlen(const char *string) 988size_t FAST_FUNC unicode_strlen(const char *string)
968{ 989{
969 size_t width = mbstowcs(NULL, string, INT_MAX); 990 size_t width = mbstowcs(NULL, string, INT_MAX);
@@ -971,7 +992,6 @@ size_t FAST_FUNC unicode_strlen(const char *string)
971 return strlen(string); 992 return strlen(string);
972 return width; 993 return width;
973} 994}
974#endif
975 995
976size_t FAST_FUNC unicode_strwidth(const char *string) 996size_t FAST_FUNC unicode_strwidth(const char *string)
977{ 997{