001 // Copyright 2006-2007 Regents of the University of California. May be used
002 // under the terms of the revised BSD license. See LICENSING for details.
003 package org.joe_e.charset;
004
005 import java.nio.charset.Charset;
006 import java.nio.ByteBuffer;
007 import java.io.InputStream;
008 import java.io.InputStreamReader;
009 import java.io.OutputStream;
010 import java.io.OutputStreamWriter;
011 import java.io.Reader;
012 import java.io.Writer;
013
014 /**
015 * UTF-8 I/O.
016 */
017 public final class UTF8 {
018 private static final Charset charset = Charset.forName("UTF-8");
019
020 private UTF8() {}
021
022 /**
023 * Encodes a string in UTF-8.
024 * @param text The text to encode.
025 * @return The UTF-8 bytes.
026 */
027 static public byte[] encode(final String text) {
028 final ByteBuffer bytes = charset.encode(text);
029 final int len = bytes.limit();
030 final byte[] v = bytes.array();
031 if (len == v.length) { return v; }
032 final byte[] r = new byte[len];
033 System.arraycopy(v, bytes.arrayOffset(), r, 0, len);
034 return r;
035 }
036
037 /**
038 * Decodes a UTF-8 string. Each byte not corresponding to a UTF-8
039 * character decodes to the Unicode replacement character U+FFFD.
040 * Note that an initial byte-order mark is not stripped. This method is
041 * equivalent to <code>decode(buffer, 0, buffer.length)</code>.
042 * @param buffer the ASCII-encoded string to decode
043 * @return The corresponding string
044 * @throws java.lang.IndexOutOfBoundsException
045 */
046 static public String decode(byte[] buffer) {
047 return decode(buffer, 0, buffer.length);
048 }
049
050 /**
051 * Decodes a UTF-8 string. Each byte not corresponding to a UTF-8
052 * character decodes to the Unicode replacement character U+FFFD.
053 * Note that an initial byte-order mark is not stripped.
054 * @param buffer the ASCII-encoded string to decode
055 * @param off where to start decoding
056 * @param len how many bytes to decode
057 * @return The corresponding string
058 * @throws java.lang.IndexOutOfBoundsException
059 */
060 static public String decode(byte[] buffer, int off, int len) {
061 return charset.decode(ByteBuffer.wrap(buffer, off, len)).toString();
062 }
063
064 /**
065 * Constructs a UTF-8 reader.
066 * @param in The binary input stream.
067 */
068 static public Reader input(final InputStream in) {
069 return new InputStreamReader(in, charset);
070 }
071
072 /**
073 * Constructs a UTF-8 writer.
074 * @param out The binary output stream.
075 */
076 static public Writer output(final OutputStream out) {
077 return new OutputStreamWriter(out, charset);
078 }
079 }