001 // Copyright 2006-2007 Regents of the University of California. May be used 002 // under the terms of the revised BSD license. See LICENSING for details. 003 package org.joe_e.charset; 004 005 import java.nio.charset.Charset; 006 import java.nio.ByteBuffer; 007 import java.io.InputStream; 008 import java.io.InputStreamReader; 009 import java.io.OutputStream; 010 import java.io.OutputStreamWriter; 011 import java.io.Reader; 012 import java.io.Writer; 013 014 /** 015 * UTF-8 I/O. 016 */ 017 public final class UTF8 { 018 private static final Charset charset = Charset.forName("UTF-8"); 019 020 private UTF8() {} 021 022 /** 023 * Encodes a string in UTF-8. 024 * @param text The text to encode. 025 * @return The UTF-8 bytes. 026 */ 027 static public byte[] encode(final String text) { 028 final ByteBuffer bytes = charset.encode(text); 029 final int len = bytes.limit(); 030 final byte[] v = bytes.array(); 031 if (len == v.length) { return v; } 032 final byte[] r = new byte[len]; 033 System.arraycopy(v, bytes.arrayOffset(), r, 0, len); 034 return r; 035 } 036 037 /** 038 * Decodes a UTF-8 string. Each byte not corresponding to a UTF-8 039 * character decodes to the Unicode replacement character U+FFFD. 040 * Note that an initial byte-order mark is not stripped. This method is 041 * equivalent to <code>decode(buffer, 0, buffer.length)</code>. 042 * @param buffer the ASCII-encoded string to decode 043 * @return The corresponding string 044 * @throws java.lang.IndexOutOfBoundsException 045 */ 046 static public String decode(byte[] buffer) { 047 return decode(buffer, 0, buffer.length); 048 } 049 050 /** 051 * Decodes a UTF-8 string. Each byte not corresponding to a UTF-8 052 * character decodes to the Unicode replacement character U+FFFD. 053 * Note that an initial byte-order mark is not stripped. 054 * @param buffer the ASCII-encoded string to decode 055 * @param off where to start decoding 056 * @param len how many bytes to decode 057 * @return The corresponding string 058 * @throws java.lang.IndexOutOfBoundsException 059 */ 060 static public String decode(byte[] buffer, int off, int len) { 061 return charset.decode(ByteBuffer.wrap(buffer, off, len)).toString(); 062 } 063 064 /** 065 * Constructs a UTF-8 reader. 066 * @param in The binary input stream. 067 */ 068 static public Reader input(final InputStream in) { 069 return new InputStreamReader(in, charset); 070 } 071 072 /** 073 * Constructs a UTF-8 writer. 074 * @param out The binary output stream. 075 */ 076 static public Writer output(final OutputStream out) { 077 return new OutputStreamWriter(out, charset); 078 } 079 }