001 // Copyright 2008 Waterken Inc. under the terms of the MIT X license 002 // found at http://www.opensource.org/licenses/mit-license.html 003 package org.waterken.syntax.json; 004 005 import java.io.EOFException; 006 import java.io.IOException; 007 import java.io.Reader; 008 009 import org.waterken.syntax.SourceReader; 010 011 /** 012 * A JSON token reader. 013 */ 014 public final class 015 JSONLexer { 016 private final SourceReader s; 017 private int line; 018 private int column; 019 private String head; 020 021 /** 022 * Constructs an instance. 023 * @param in UTF-8 input stream 024 */ 025 public 026 JSONLexer(final Reader in) { 027 s = new SourceReader(in); 028 line = s.getLine(); 029 column = s.getColumn(); 030 head = ""; // empty token indicates start of token stream 031 } 032 033 // org.waterken.syntax.json.JSONLexer interface 034 035 /** 036 * Gets the line number of the {@linkplain #getHead head} token. 037 */ 038 public int 039 getLine() { return line; } 040 041 /** 042 * Gets the column number of the {@linkplain #getHead head} token. 043 */ 044 public int 045 getColumn() { return column; } 046 047 /** 048 * Gets the most recently {@linkplain #next read} token. 049 * @return most recent token, or {@code null} if EOF 050 */ 051 public String 052 getHead() { return head; } 053 054 /** 055 * Move to the next token in the input stream. 056 * @return newly read token 057 * @throws EOFException EOF 058 * @throws IOException any I/O error 059 * @throws Exception invalid character escape 060 */ 061 public String 062 next() throws EOFException, IOException, Exception { 063 final int c = skipWhitespace(s); 064 line = s.getLine(); 065 column = s.getColumn(); 066 switch (c) { 067 case -1: 068 head = null; 069 throw new EOFException(); 070 case ',': 071 head = ","; 072 s.read(); 073 break; 074 case '{': 075 head = "{"; 076 s.read(); 077 break; 078 case ':': 079 head = ":"; 080 s.read(); 081 break; 082 case '}': 083 head = "}"; 084 s.read(); 085 break; 086 case '[': 087 head = "["; 088 s.read(); 089 break; 090 case ']': 091 head = "]"; 092 s.read(); 093 break; 094 case '\"': 095 head = readString(s); 096 break; 097 default: 098 head = readKeyword(s); 099 } 100 return head; 101 } 102 103 public void 104 close() throws IOException { 105 final int c = skipWhitespace(s); 106 line = s.getLine(); 107 column = s.getColumn(); 108 s.close(); 109 if (-1 != c) { throw new IllegalStateException(); } 110 } 111 112 // rest of implementation consists of static helper functions 113 114 static private final String whitespace = " \t" + SourceReader.newLine; 115 116 static private int 117 skipWhitespace(final SourceReader s) throws IOException { 118 int c = s.getHead(); 119 while (whitespace.indexOf(c) != -1) { 120 c = s.read(); 121 } 122 return c; 123 } 124 125 static private final String delimiter = whitespace + ",{:}[]\""; 126 127 static private String 128 readKeyword(final SourceReader s) throws IOException { 129 final StringBuilder r = new StringBuilder(); 130 int c = s.getHead(); 131 do { 132 r.append((char)c); 133 c = s.read(); 134 } while (-1 != c && delimiter.indexOf(c) == -1); 135 return r.toString(); 136 } 137 138 static private String 139 readString(final SourceReader s) throws Exception { 140 final StringBuilder r = new StringBuilder(); 141 r.append((char)s.getHead()); 142 while (true) { 143 final int c = s.read(); 144 if ('\\' == c) { 145 r.append(readEscape(s)); 146 } else { 147 r.append((char)c); 148 if ('\"' == c) { 149 s.read(); 150 return r.toString(); 151 } 152 } 153 } 154 } 155 156 static private char 157 readEscape(final SourceReader s) throws Exception { 158 switch (s.read()) { 159 case '\"': return '\"'; 160 case '\\': return '\\'; 161 case '/': return '/'; 162 case 'b': return '\b'; 163 case 'f': return '\f'; 164 case 'n': return '\n'; 165 case 'r': return '\r'; 166 case 't': return '\t'; 167 case 'u': return readUnicodeEscape(s); 168 default: throw new Exception("0x" + Integer.toHexString(s.getHead())); 169 } 170 } 171 172 static private char 173 readUnicodeEscape(final SourceReader s) throws Exception { 174 return (char)((hex(s.read()) << 12) | 175 (hex(s.read()) << 8) | 176 (hex(s.read()) << 4) | 177 (hex(s.read()) ) ); 178 } 179 180 static private int 181 hex(final int c) throws Exception { 182 if ('0' <= c && '9' >= c) { return c - '0' ; } 183 else if ('A' <= c && 'F' >= c) { return c - 'A' + 10; } 184 else if ('a' <= c && 'f' >= c) { return c - 'a' + 10; } 185 else { throw new Exception("0x" + Integer.toHexString(c)); } 186 } 187 }