001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import org.apache.commons.codec.CodecPolicy; 021 022/** 023 * Provides Base16 encoding and decoding. 024 * 025 * <p> 026 * This class is thread-safe. 027 * </p> 028 * <p> 029 * This implementation strictly follows RFC 4648, and as such unlike 030 * the {@link Base32} and {@link Base64} implementations, 031 * it does not ignore invalid alphabet characters or whitespace, 032 * neither does it offer chunking or padding characters. 033 * </p> 034 * <p> 035 * The only additional feature above those specified in RFC 4648 036 * is support for working with a lower-case alphabet in addition 037 * to the default upper-case alphabet. 038 * </p> 039 * 040 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> 041 * 042 * @since 1.15 043 */ 044public class Base16 extends BaseNCodec { 045 046 /** 047 * BASE16 characters are 4 bits in length. 048 * They are formed by taking an 8-bit group, 049 * which is converted into two BASE16 characters. 050 */ 051 private static final int BITS_PER_ENCODED_BYTE = 4; 052 private static final int BYTES_PER_ENCODED_BLOCK = 2; 053 private static final int BYTES_PER_UNENCODED_BLOCK = 1; 054 055 /** 056 * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified 057 * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16 058 * alphabet but fall within the bounds of the array are translated to -1. 059 */ 060 private static final byte[] UPPER_CASE_DECODE_TABLE = { 061 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 062 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 063 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 064 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 065 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 066 -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F 067 }; 068 069 /** 070 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" 071 * equivalents as specified in Table 5 of RFC 4648. 072 */ 073 private static final byte[] UPPER_CASE_ENCODE_TABLE = { 074 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 075 'A', 'B', 'C', 'D', 'E', 'F' 076 }; 077 078 /** 079 * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" 080 * into their 4-bit positive integer equivalents. Characters that are not in the Base16 081 * alphabet but fall within the bounds of the array are translated to -1. 082 */ 083 private static final byte[] LOWER_CASE_DECODE_TABLE = { 084 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 085 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 086 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 087 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 088 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 089 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f 090 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f 091 -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f 092 }; 093 094 /** 095 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" 096 * lower-case equivalents. 097 */ 098 private static final byte[] LOWER_CASE_ENCODE_TABLE = { 099 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 100 'a', 'b', 'c', 'd', 'e', 'f' 101 }; 102 103 /** Mask used to extract 4 bits, used when decoding character. */ 104 private static final int MASK_4BITS = 0x0f; 105 106 /** 107 * Decode table to use. 108 */ 109 private final byte[] decodeTable; 110 111 /** 112 * Encode table to use. 113 */ 114 private final byte[] encodeTable; 115 116 /** 117 * Creates a Base16 codec used for decoding and encoding. 118 */ 119 public Base16() { 120 this(false); 121 } 122 123 /** 124 * Creates a Base16 codec used for decoding and encoding. 125 * 126 * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. 127 */ 128 public Base16(final boolean lowerCase) { 129 this(lowerCase, DECODING_POLICY_DEFAULT); 130 } 131 132 /** 133 * Creates a Base16 codec used for decoding and encoding. 134 * 135 * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. 136 * @param decodingPolicy Decoding policy. 137 */ 138 public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { 139 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, 140 PAD_DEFAULT, decodingPolicy); 141 if (lowerCase) { 142 this.encodeTable = LOWER_CASE_ENCODE_TABLE; 143 this.decodeTable = LOWER_CASE_DECODE_TABLE; 144 } else { 145 this.encodeTable = UPPER_CASE_ENCODE_TABLE; 146 this.decodeTable = UPPER_CASE_DECODE_TABLE; 147 } 148 } 149 150 @Override 151 void decode(final byte[] data, int offset, final int length, final Context context) { 152 if (context.eof || length < 0) { 153 context.eof = true; 154 if (context.ibitWorkArea != 0) { 155 validateTrailingCharacter(); 156 } 157 return; 158 } 159 160 final int dataLen = Math.min(data.length - offset, length); 161 final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen; 162 163 // small optimisation to short-cut the rest of this method when it is fed byte-by-byte 164 if (availableChars == 1 && availableChars == dataLen) { 165 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 166 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 167 return; 168 } 169 170 // we must have an even number of chars to decode 171 final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; 172 final int end = offset + dataLen; 173 174 final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); 175 176 int result; 177 if (dataLen < availableChars) { 178 // we have 1/2 byte from previous invocation to decode 179 result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE; 180 result |= decodeOctet(data[offset++]); 181 182 buffer[context.pos++] = (byte)result; 183 184 // reset to empty-value for next invocation! 185 context.ibitWorkArea = 0; 186 } 187 188 final int loopEnd = end - 1; 189 while (offset < loopEnd) { 190 result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; 191 result |= decodeOctet(data[offset++]); 192 buffer[context.pos++] = (byte)result; 193 } 194 195 // we have one char of a hex-pair left over 196 if (offset < end) { 197 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 198 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 199 } 200 } 201 202 private int decodeOctet(final byte octet) { 203 int decoded = -1; 204 if ((octet & 0xff) < decodeTable.length) { 205 decoded = decodeTable[octet]; 206 } 207 208 if (decoded == -1) { 209 throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet); 210 } 211 212 return decoded; 213 } 214 215 @Override 216 void encode(final byte[] data, final int offset, final int length, final Context context) { 217 if (context.eof) { 218 return; 219 } 220 221 if (length < 0) { 222 context.eof = true; 223 return; 224 } 225 226 final int size = length * BYTES_PER_ENCODED_BLOCK; 227 if (size < 0) { 228 throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length); 229 } 230 231 final byte[] buffer = ensureBufferSize(size, context); 232 233 final int end = offset + length; 234 for (int i = offset; i < end; i++) { 235 final int value = data[i]; 236 final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS; 237 final int low = value & MASK_4BITS; 238 buffer[context.pos++] = encodeTable[high]; 239 buffer[context.pos++] = encodeTable[low]; 240 } 241 } 242 243 /** 244 * Returns whether or not the {@code octet} is in the Base16 alphabet. 245 * 246 * @param octet The value to test. 247 * 248 * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise. 249 */ 250 @Override 251 public boolean isInAlphabet(final byte octet) { 252 return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1; 253 } 254 255 /** 256 * Validates whether decoding allows an entire final trailing character that cannot be 257 * used for a complete byte. 258 * 259 * @throws IllegalArgumentException if strict decoding is enabled 260 */ 261 private void validateTrailingCharacter() { 262 if (isStrictDecoding()) { 263 throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" + 264 "character but not a possible encoding. " + 265 "Decoding requires at least two characters to create one byte."); 266 } 267 } 268}