diff options
| author | Mark Adler <madler@alumni.caltech.edu> | 2021-12-31 16:57:07 -0800 |
|---|---|---|
| committer | Mark Adler <madler@alumni.caltech.edu> | 2021-12-31 16:57:07 -0800 |
| commit | 8678871f18f4dd51101a9db1e37791f975969079 (patch) | |
| tree | 4db677c163317d56fefa7f52aaa440271fe4c7eb | |
| parent | c3f3043f7aa80750245f8166a338c4877020b589 (diff) | |
| download | zlib-8678871f18f4dd51101a9db1e37791f975969079.tar.gz zlib-8678871f18f4dd51101a9db1e37791f975969079.tar.bz2 zlib-8678871f18f4dd51101a9db1e37791f975969079.zip | |
Replace black/white with allow/block. (theresa-m)
| -rw-r--r-- | doc/txtvsbin.txt | 12 | ||||
| -rw-r--r-- | trees.c | 18 |
2 files changed, 15 insertions, 15 deletions
diff --git a/doc/txtvsbin.txt b/doc/txtvsbin.txt index 3d0f063..2a901ea 100644 --- a/doc/txtvsbin.txt +++ b/doc/txtvsbin.txt | |||
| @@ -38,15 +38,15 @@ The Algorithm | |||
| 38 | 38 | ||
| 39 | The algorithm works by dividing the set of bytecodes [0..255] into three | 39 | The algorithm works by dividing the set of bytecodes [0..255] into three |
| 40 | categories: | 40 | categories: |
| 41 | - The white list of textual bytecodes: | 41 | - The allow list of textual bytecodes: |
| 42 | 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255. | 42 | 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255. |
| 43 | - The gray list of tolerated bytecodes: | 43 | - The gray list of tolerated bytecodes: |
| 44 | 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC). | 44 | 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC). |
| 45 | - The black list of undesired, non-textual bytecodes: | 45 | - The block list of undesired, non-textual bytecodes: |
| 46 | 0 (NUL) to 6, 14 to 31. | 46 | 0 (NUL) to 6, 14 to 31. |
| 47 | 47 | ||
| 48 | If a file contains at least one byte that belongs to the white list and | 48 | If a file contains at least one byte that belongs to the allow list and |
| 49 | no byte that belongs to the black list, then the file is categorized as | 49 | no byte that belongs to the block list, then the file is categorized as |
| 50 | plain text; otherwise, it is categorized as binary. (The boundary case, | 50 | plain text; otherwise, it is categorized as binary. (The boundary case, |
| 51 | when the file is empty, automatically falls into the latter category.) | 51 | when the file is empty, automatically falls into the latter category.) |
| 52 | 52 | ||
| @@ -84,9 +84,9 @@ consistent results, regardless what alphabet encoding is being used. | |||
| 84 | results on a text encoded, say, using ISO-8859-16 versus UTF-8.) | 84 | results on a text encoded, say, using ISO-8859-16 versus UTF-8.) |
| 85 | 85 | ||
| 86 | There is an extra category of plain text files that are "polluted" with | 86 | There is an extra category of plain text files that are "polluted" with |
| 87 | one or more black-listed codes, either by mistake or by peculiar design | 87 | one or more block-listed codes, either by mistake or by peculiar design |
| 88 | considerations. In such cases, a scheme that tolerates a small fraction | 88 | considerations. In such cases, a scheme that tolerates a small fraction |
| 89 | of black-listed codes would provide an increased recall (i.e. more true | 89 | of block-listed codes would provide an increased recall (i.e. more true |
| 90 | positives). This, however, incurs a reduced precision overall, since | 90 | positives). This, however, incurs a reduced precision overall, since |
| 91 | false positives are more likely to appear in binary files that contain | 91 | false positives are more likely to appear in binary files that contain |
| 92 | large chunks of textual data. Furthermore, "polluted" plain text should | 92 | large chunks of textual data. Furthermore, "polluted" plain text should |
| @@ -1091,9 +1091,9 @@ local void compress_block(s, ltree, dtree) | |||
| 1091 | * Check if the data type is TEXT or BINARY, using the following algorithm: | 1091 | * Check if the data type is TEXT or BINARY, using the following algorithm: |
| 1092 | * - TEXT if the two conditions below are satisfied: | 1092 | * - TEXT if the two conditions below are satisfied: |
| 1093 | * a) There are no non-portable control characters belonging to the | 1093 | * a) There are no non-portable control characters belonging to the |
| 1094 | * "black list" (0..6, 14..25, 28..31). | 1094 | * "block list" (0..6, 14..25, 28..31). |
| 1095 | * b) There is at least one printable character belonging to the | 1095 | * b) There is at least one printable character belonging to the |
| 1096 | * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). | 1096 | * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). |
| 1097 | * - BINARY otherwise. | 1097 | * - BINARY otherwise. |
| 1098 | * - The following partially-portable control characters form a | 1098 | * - The following partially-portable control characters form a |
| 1099 | * "gray list" that is ignored in this detection algorithm: | 1099 | * "gray list" that is ignored in this detection algorithm: |
| @@ -1103,19 +1103,19 @@ local void compress_block(s, ltree, dtree) | |||
| 1103 | local int detect_data_type(s) | 1103 | local int detect_data_type(s) |
| 1104 | deflate_state *s; | 1104 | deflate_state *s; |
| 1105 | { | 1105 | { |
| 1106 | /* black_mask is the bit mask of black-listed bytes | 1106 | /* block_mask is the bit mask of block-listed bytes |
| 1107 | * set bits 0..6, 14..25, and 28..31 | 1107 | * set bits 0..6, 14..25, and 28..31 |
| 1108 | * 0xf3ffc07f = binary 11110011111111111100000001111111 | 1108 | * 0xf3ffc07f = binary 11110011111111111100000001111111 |
| 1109 | */ | 1109 | */ |
| 1110 | unsigned long black_mask = 0xf3ffc07fUL; | 1110 | unsigned long block_mask = 0xf3ffc07fUL; |
| 1111 | int n; | 1111 | int n; |
| 1112 | 1112 | ||
| 1113 | /* Check for non-textual ("black-listed") bytes. */ | 1113 | /* Check for non-textual ("block-listed") bytes. */ |
| 1114 | for (n = 0; n <= 31; n++, black_mask >>= 1) | 1114 | for (n = 0; n <= 31; n++, block_mask >>= 1) |
| 1115 | if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) | 1115 | if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) |
| 1116 | return Z_BINARY; | 1116 | return Z_BINARY; |
| 1117 | 1117 | ||
| 1118 | /* Check for textual ("white-listed") bytes. */ | 1118 | /* Check for textual ("allow-listed") bytes. */ |
| 1119 | if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 | 1119 | if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 |
| 1120 | || s->dyn_ltree[13].Freq != 0) | 1120 | || s->dyn_ltree[13].Freq != 0) |
| 1121 | return Z_TEXT; | 1121 | return Z_TEXT; |
| @@ -1123,7 +1123,7 @@ local int detect_data_type(s) | |||
| 1123 | if (s->dyn_ltree[n].Freq != 0) | 1123 | if (s->dyn_ltree[n].Freq != 0) |
| 1124 | return Z_TEXT; | 1124 | return Z_TEXT; |
| 1125 | 1125 | ||
| 1126 | /* There are no "black-listed" or "white-listed" bytes: | 1126 | /* There are no "block-listed" or "allow-listed" bytes: |
| 1127 | * this stream either is empty or has tolerated ("gray-listed") bytes only. | 1127 | * this stream either is empty or has tolerated ("gray-listed") bytes only. |
| 1128 | */ | 1128 | */ |
| 1129 | return Z_BINARY; | 1129 | return Z_BINARY; |
