diff options
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r-- | libbb/unicode.c | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c index 99dc1dfa6..9c4da50d3 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -28,19 +28,37 @@ void FAST_FUNC reinit_unicode(const char *LANG) | |||
28 | static const char unicode_0x394[] = { 0xce, 0x94, 0 }; | 28 | static const char unicode_0x394[] = { 0xce, 0x94, 0 }; |
29 | size_t width; | 29 | size_t width; |
30 | 30 | ||
31 | /* We pass "" instead of "C" because some libc's have | ||
32 | * non-ASCII default locale for setlocale("") call | ||
33 | * (this allows users of such libc to have Unicoded | ||
34 | * system without having to mess with env). | ||
35 | * | ||
36 | * We set LC_CTYPE because (a) we may be called with $LC_CTYPE | ||
37 | * value in LANG, not with $LC_ALL, (b) internationalized | ||
38 | * LC_NUMERIC and LC_TIME are more PITA than benefit | ||
39 | * (for one, some utilities have hard time with comma | ||
40 | * used as a fractional separator). | ||
41 | */ | ||
31 | //TODO: avoid repeated calls by caching last string? | 42 | //TODO: avoid repeated calls by caching last string? |
32 | setlocale(LC_ALL, (LANG && LANG[0]) ? LANG : "C"); | 43 | setlocale(LC_CTYPE, LANG ? LANG : ""); |
33 | 44 | ||
34 | /* In unicode, this is a one character string */ | 45 | /* In unicode, this is a one character string */ |
35 | // can use unicode_strlen(string) too, but otherwise unicode_strlen() is unused | 46 | width = unicode_strlen(unicode_0x394); |
36 | width = mbstowcs(NULL, unicode_0x394, INT_MAX); | ||
37 | unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF); | 47 | unicode_status = (width == 1 ? UNICODE_ON : UNICODE_OFF); |
38 | } | 48 | } |
39 | 49 | ||
40 | void FAST_FUNC init_unicode(void) | 50 | void FAST_FUNC init_unicode(void) |
41 | { | 51 | { |
42 | if (unicode_status == UNICODE_UNKNOWN) | 52 | /* Some people set only $LC_CTYPE, not $LC_ALL, because they want |
43 | reinit_unicode(getenv("LANG")); | 53 | * only Unicode to be activated on their system, not the whole |
54 | * shebang of wrong decimal points, strange date formats and so on. | ||
55 | */ | ||
56 | if (unicode_status == UNICODE_UNKNOWN) { | ||
57 | char *s = getenv("LC_ALL"); | ||
58 | if (!s) s = getenv("LC_CTYPE"); | ||
59 | if (!s) s = getenv("LANG"); | ||
60 | reinit_unicode(s); | ||
61 | } | ||
44 | } | 62 | } |
45 | 63 | ||
46 | #else | 64 | #else |
@@ -58,8 +76,12 @@ void FAST_FUNC reinit_unicode(const char *LANG) | |||
58 | 76 | ||
59 | void FAST_FUNC init_unicode(void) | 77 | void FAST_FUNC init_unicode(void) |
60 | { | 78 | { |
61 | if (unicode_status == UNICODE_UNKNOWN) | 79 | if (unicode_status == UNICODE_UNKNOWN) { |
62 | reinit_unicode(getenv("LANG")); | 80 | char *s = getenv("LC_ALL"); |
81 | if (!s) s = getenv("LC_CTYPE"); | ||
82 | if (!s) s = getenv("LANG"); | ||
83 | reinit_unicode(s); | ||
84 | } | ||
63 | } | 85 | } |
64 | # endif | 86 | # endif |
65 | 87 | ||
@@ -963,7 +985,6 @@ int FAST_FUNC unicode_bidi_is_neutral_wchar(wint_t wc) | |||
963 | 985 | ||
964 | /* The rest is mostly same for libc and for "homegrown" support */ | 986 | /* The rest is mostly same for libc and for "homegrown" support */ |
965 | 987 | ||
966 | #if 0 // UNUSED | ||
967 | size_t FAST_FUNC unicode_strlen(const char *string) | 988 | size_t FAST_FUNC unicode_strlen(const char *string) |
968 | { | 989 | { |
969 | size_t width = mbstowcs(NULL, string, INT_MAX); | 990 | size_t width = mbstowcs(NULL, string, INT_MAX); |
@@ -971,7 +992,6 @@ size_t FAST_FUNC unicode_strlen(const char *string) | |||
971 | return strlen(string); | 992 | return strlen(string); |
972 | return width; | 993 | return width; |
973 | } | 994 | } |
974 | #endif | ||
975 | 995 | ||
976 | size_t FAST_FUNC unicode_strwidth(const char *string) | 996 | size_t FAST_FUNC unicode_strwidth(const char *string) |
977 | { | 997 | { |