001 // Copyright 2008 Waterken Inc. under the terms of the MIT X license
002 // found at http://www.opensource.org/licenses/mit-license.html
003 package org.waterken.syntax.json;
004
005 import java.io.EOFException;
006 import java.io.IOException;
007 import java.io.Reader;
008
009 import org.waterken.syntax.SourceReader;
010
011 /**
012 * A JSON token reader.
013 */
014 public final class
015 JSONLexer {
016 private final SourceReader s;
017 private int line;
018 private int column;
019 private String head;
020
021 /**
022 * Constructs an instance.
023 * @param in UTF-8 input stream
024 */
025 public
026 JSONLexer(final Reader in) {
027 s = new SourceReader(in);
028 line = s.getLine();
029 column = s.getColumn();
030 head = ""; // empty token indicates start of token stream
031 }
032
033 // org.waterken.syntax.json.JSONLexer interface
034
035 /**
036 * Gets the line number of the {@linkplain #getHead head} token.
037 */
038 public int
039 getLine() { return line; }
040
041 /**
042 * Gets the column number of the {@linkplain #getHead head} token.
043 */
044 public int
045 getColumn() { return column; }
046
047 /**
048 * Gets the most recently {@linkplain #next read} token.
049 * @return most recent token, or {@code null} if EOF
050 */
051 public String
052 getHead() { return head; }
053
054 /**
055 * Move to the next token in the input stream.
056 * @return newly read token
057 * @throws EOFException EOF
058 * @throws IOException any I/O error
059 * @throws Exception invalid character escape
060 */
061 public String
062 next() throws EOFException, IOException, Exception {
063 final int c = skipWhitespace(s);
064 line = s.getLine();
065 column = s.getColumn();
066 switch (c) {
067 case -1:
068 head = null;
069 throw new EOFException();
070 case ',':
071 head = ",";
072 s.read();
073 break;
074 case '{':
075 head = "{";
076 s.read();
077 break;
078 case ':':
079 head = ":";
080 s.read();
081 break;
082 case '}':
083 head = "}";
084 s.read();
085 break;
086 case '[':
087 head = "[";
088 s.read();
089 break;
090 case ']':
091 head = "]";
092 s.read();
093 break;
094 case '\"':
095 head = readString(s);
096 break;
097 default:
098 head = readKeyword(s);
099 }
100 return head;
101 }
102
103 public void
104 close() throws IOException {
105 final int c = skipWhitespace(s);
106 line = s.getLine();
107 column = s.getColumn();
108 s.close();
109 if (-1 != c) { throw new IllegalStateException(); }
110 }
111
112 // rest of implementation consists of static helper functions
113
114 static private final String whitespace = " \t" + SourceReader.newLine;
115
116 static private int
117 skipWhitespace(final SourceReader s) throws IOException {
118 int c = s.getHead();
119 while (whitespace.indexOf(c) != -1) {
120 c = s.read();
121 }
122 return c;
123 }
124
125 static private final String delimiter = whitespace + ",{:}[]\"";
126
127 static private String
128 readKeyword(final SourceReader s) throws IOException {
129 final StringBuilder r = new StringBuilder();
130 int c = s.getHead();
131 do {
132 r.append((char)c);
133 c = s.read();
134 } while (-1 != c && delimiter.indexOf(c) == -1);
135 return r.toString();
136 }
137
138 static private String
139 readString(final SourceReader s) throws Exception {
140 final StringBuilder r = new StringBuilder();
141 r.append((char)s.getHead());
142 while (true) {
143 final int c = s.read();
144 if ('\\' == c) {
145 r.append(readEscape(s));
146 } else {
147 r.append((char)c);
148 if ('\"' == c) {
149 s.read();
150 return r.toString();
151 }
152 }
153 }
154 }
155
156 static private char
157 readEscape(final SourceReader s) throws Exception {
158 switch (s.read()) {
159 case '\"': return '\"';
160 case '\\': return '\\';
161 case '/': return '/';
162 case 'b': return '\b';
163 case 'f': return '\f';
164 case 'n': return '\n';
165 case 'r': return '\r';
166 case 't': return '\t';
167 case 'u': return readUnicodeEscape(s);
168 default: throw new Exception("0x" + Integer.toHexString(s.getHead()));
169 }
170 }
171
172 static private char
173 readUnicodeEscape(final SourceReader s) throws Exception {
174 return (char)((hex(s.read()) << 12) |
175 (hex(s.read()) << 8) |
176 (hex(s.read()) << 4) |
177 (hex(s.read()) ) );
178 }
179
180 static private int
181 hex(final int c) throws Exception {
182 if ('0' <= c && '9' >= c) { return c - '0' ; }
183 else if ('A' <= c && 'F' >= c) { return c - 'A' + 10; }
184 else if ('a' <= c && 'f' >= c) { return c - 'a' + 10; }
185 else { throw new Exception("0x" + Integer.toHexString(c)); }
186 }
187 }