001    // Copyright 2008 Waterken Inc. under the terms of the MIT X license
002    // found at http://www.opensource.org/licenses/mit-license.html
003    package org.waterken.syntax.json;
004    
005    import java.io.EOFException;
006    import java.io.IOException;
007    import java.io.Reader;
008    
009    import org.waterken.syntax.SourceReader;
010    
011    /**
012     * A JSON token reader.
013     */
014    public final class
015    JSONLexer {
016        private final SourceReader s;
017        private       int line;
018        private       int column;
019        private       String head;
020        
021        /**
022         * Constructs an instance.
023         * @param in    UTF-8 input stream
024         */
025        public
026        JSONLexer(final Reader in) {
027            s = new SourceReader(in);
028            line = s.getLine();
029            column = s.getColumn();
030            head = "";              // empty token indicates start of token stream
031        }
032        
033        // org.waterken.syntax.json.JSONLexer interface
034        
035        /**
036         * Gets the line number of the {@linkplain #getHead head} token.
037         */
038        public int
039        getLine() { return line; }
040        
041        /**
042         * Gets the column number of the {@linkplain #getHead head} token.
043         */
044        public int
045        getColumn() { return column; }
046        
047        /**
048         * Gets the most recently {@linkplain #next read} token.
049         * @return most recent token, or {@code null} if EOF
050         */
051        public String
052        getHead() { return head; }
053        
054        /**
055         * Move to the next token in the input stream.
056         * @return newly read token
057         * @throws EOFException EOF
058         * @throws IOException  any I/O error
059         * @throws Exception    invalid character escape
060         */
061        public String
062        next() throws EOFException, IOException, Exception {
063            final int c = skipWhitespace(s);
064            line = s.getLine();
065            column = s.getColumn();
066            switch (c) {
067            case -1:
068                head = null;
069                throw new EOFException();
070            case ',':
071                head = ",";
072                s.read();
073                break;
074            case '{':
075                head = "{";
076                s.read();
077                break;
078            case ':':
079                head = ":";
080                s.read();
081                break;
082            case '}':
083                head = "}";
084                s.read();
085                break;
086            case '[':
087                head = "[";
088                s.read();
089                break;
090            case ']':
091                head = "]";
092                s.read();
093                break;
094            case '\"':
095                head = readString(s);
096                break;
097            default:
098                head = readKeyword(s);
099            }
100            return head;
101        }
102        
103        public void
104        close() throws IOException {
105            final int c = skipWhitespace(s);
106            line = s.getLine();
107            column = s.getColumn();
108            s.close();
109            if (-1 != c) { throw new IllegalStateException(); }
110        }
111        
112        // rest of implementation consists of static helper functions
113        
114        static private final String whitespace = " \t" + SourceReader.newLine;
115        
116        static private int
117        skipWhitespace(final SourceReader s) throws IOException {
118            int c = s.getHead();
119            while (whitespace.indexOf(c) != -1) {
120                c = s.read();
121            }
122            return c;
123        }
124        
125        static private final String delimiter = whitespace + ",{:}[]\"";
126        
127        static private String
128        readKeyword(final SourceReader s) throws IOException {
129            final StringBuilder r = new StringBuilder();
130            int c = s.getHead();
131            do {
132                r.append((char)c);
133                c = s.read();
134            } while (-1 != c && delimiter.indexOf(c) == -1);
135            return r.toString();
136        }
137        
138        static private String
139        readString(final SourceReader s) throws Exception {
140            final StringBuilder r = new StringBuilder();
141            r.append((char)s.getHead());
142            while (true) {
143                final int c = s.read();
144                if ('\\' == c) {
145                    r.append(readEscape(s));
146                } else {
147                    r.append((char)c);
148                    if ('\"' == c) {
149                        s.read();
150                        return r.toString();
151                    }
152                }
153            }
154        }
155        
156        static private char
157        readEscape(final SourceReader s) throws Exception {
158            switch (s.read()) {
159            case '\"': return '\"';
160            case '\\': return '\\';
161            case '/': return '/';
162            case 'b': return '\b';
163            case 'f': return '\f';
164            case 'n': return '\n';
165            case 'r': return '\r';
166            case 't': return '\t';
167            case 'u': return readUnicodeEscape(s);
168            default: throw new Exception("0x" + Integer.toHexString(s.getHead()));
169            }
170        }
171        
172        static private char
173        readUnicodeEscape(final SourceReader s) throws Exception {
174            return (char)((hex(s.read()) << 12) |
175                          (hex(s.read()) <<  8) |
176                          (hex(s.read()) <<  4) |
177                          (hex(s.read())      ) );
178        }
179        
180        static private int
181        hex(final int c) throws Exception {
182            if ('0' <= c && '9' >= c) {        return c - '0'     ; }
183            else if ('A' <= c && 'F' >= c) {   return c - 'A' + 10; }
184            else if ('a' <= c && 'f' >= c) {   return c - 'a' + 10; }
185            else { throw new Exception("0x" + Integer.toHexString(c)); }
186        }
187    }