blob: 36131667506a60f7f1d74e74ee3f628053e299d8 [file] [log] [blame]
Till Westmannea8ab392013-06-05 15:17:08 -07001/*
Ian Maxon928bbd12015-09-14 17:12:48 -07002 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
Ian Maxon032a1782015-06-30 17:10:51 -07009 *
Ian Maxon928bbd12015-09-14 17:12:48 -070010 * http://www.apache.org/licenses/LICENSE-2.0
Ian Maxon032a1782015-06-30 17:10:51 -070011 *
Ian Maxon928bbd12015-09-14 17:12:48 -070012 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
Till Westmannea8ab392013-06-05 15:17:08 -070018 */
Till Westmanne2439b42016-01-28 17:06:21 -080019package [PACKAGE];
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000020
21import java.io.IOException;
22import [PACKAGE].[LEXER_NAME]Exception;
23
24public class [LEXER_NAME] {
25
26 public static final int
27 TOKEN_EOF = 0, TOKEN_AUX_NOT_FOUND = 1 [TOKENS_CONSTANTS];
28
29 // Human representation of tokens. Useful for debug.
30 // Is possible to convert a TOKEN_CONSTANT in its image through
Till Westmanne2439b42016-01-28 17:06:21 -080031 // [LEXER_NAME].tokenKindToString(TOKEN_CONSTANT);
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000032 private static final String[] tokenImage = {
33 "<EOF>", "<AUX_NOT_FOUND>" [TOKENS_IMAGES]
34 };
Till Westmanne2439b42016-01-28 17:06:21 -080035
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000036 private static final char EOF_CHAR = 4;
37 protected java.io.Reader inputStream;
38 protected int column;
39 protected int line;
40 protected boolean prevCharIsCR;
41 protected boolean prevCharIsLF;
Till Westmanne2439b42016-01-28 17:06:21 -080042 protected boolean containsEscapes;
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000043 protected char[] buffer;
44 protected int bufsize;
45 protected int bufpos;
46 protected int tokenBegin;
47 protected int endOf_USED_Buffer;
48 protected int endOf_UNUSED_Buffer;
49 protected int maxUnusedBufferSize;
50
51// ================================================================================
52// Auxiliary functions. Can parse the tokens used in the grammar as partial/auxiliary
53// ================================================================================
54
55 [LEXER_AUXFUNCTIONS]
56
57// ================================================================================
58// Main method. Return a TOKEN_CONSTANT
Till Westmanne2439b42016-01-28 17:06:21 -080059// ================================================================================
60
Till Westmann8cdbd392014-04-04 18:31:08 -070061 public int next() throws [LEXER_NAME]Exception, IOException {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000062 char currentChar = buffer[bufpos];
63 while (currentChar == ' ' || currentChar=='\t' || currentChar == '\n' || currentChar=='\r')
Till Westmanne2439b42016-01-28 17:06:21 -080064 currentChar = readNextChar();
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000065 tokenBegin = bufpos;
Till Westmann8cdbd392014-04-04 18:31:08 -070066 containsEscapes = false;
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000067 if (currentChar==EOF_CHAR) return TOKEN_EOF;
68
69 [LEXER_LOGIC]
70 }
71
72// ================================================================================
73// Public interface
74// ================================================================================
Abdullah Alamoudi284590e2016-01-03 15:42:18 +030075
Till Westmann19d30852016-07-28 23:27:58 -070076 public [LEXER_NAME](java.io.Reader stream) throws IOException {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +000077 reInit(stream);
78 }
79
Till Westmann19d30852016-07-28 23:27:58 -070080 public [LEXER_NAME]() {
Abdullah Alamoudi284590e2016-01-03 15:42:18 +030081 reInit();
82 }
83
Till Westmann19d30852016-07-28 23:27:58 -070084 public void setBuffer(char[] buffer) {
Abdullah Alamoudi284590e2016-01-03 15:42:18 +030085 this.buffer = buffer;
86 tokenBegin = bufpos = 0;
87 containsEscapes = false;
Abdullah Alamoudi284590e2016-01-03 15:42:18 +030088 tokenBegin = -1;
89 }
90
Till Westmann19d30852016-07-28 23:27:58 -070091 public void reInit() {
Abdullah Alamoudi284590e2016-01-03 15:42:18 +030092 bufsize = Integer.MAX_VALUE;
93 endOf_UNUSED_Buffer = bufsize;
94 endOf_USED_Buffer = bufsize;
95 line = 0;
96 prevCharIsCR = false;
97 prevCharIsLF = false;
98 tokenBegin = -1;
99 maxUnusedBufferSize = bufsize;
100 }
101
Till Westmann19d30852016-07-28 23:27:58 -0700102 public void reInit(java.io.Reader stream) throws IOException {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000103 done();
104 inputStream = stream;
105 bufsize = 4096;
106 line = 1;
107 column = 0;
108 bufpos = -1;
109 endOf_UNUSED_Buffer = bufsize;
110 endOf_USED_Buffer = 0;
111 prevCharIsCR = false;
112 prevCharIsLF = false;
113 buffer = new char[bufsize];
114 tokenBegin = -1;
115 maxUnusedBufferSize = 4096/2;
116 readNextChar();
117 }
118
119 public String getLastTokenImage() {
Till Westmann19d30852016-07-28 23:27:58 -0700120 if (bufpos >= tokenBegin) {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000121 return new String(buffer, tokenBegin, bufpos - tokenBegin);
Till Westmann19d30852016-07-28 23:27:58 -0700122 } else {
123 return new String(buffer, tokenBegin, bufsize - tokenBegin) + new String(buffer, 0, bufpos);
124 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000125 }
Till Westmanne2439b42016-01-28 17:06:21 -0800126
Till Westmannab40b092014-03-28 15:44:01 -0700127 public int getColumn() {
128 return column;
129 }
Till Westmanne2439b42016-01-28 17:06:21 -0800130
Till Westmannab40b092014-03-28 15:44:01 -0700131 public int getLine() {
132 return line;
133 }
Till Westmanne2439b42016-01-28 17:06:21 -0800134
Till Westmann8cdbd392014-04-04 18:31:08 -0700135 public boolean containsEscapes() {
136 return containsEscapes;
137 }
138
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000139 public static String tokenKindToString(int token) {
Till Westmanne2439b42016-01-28 17:06:21 -0800140 return tokenImage[token];
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000141 }
142
143 public void done(){
144 buffer = null;
145 }
146
147// ================================================================================
148// Parse error management
Till Westmanne2439b42016-01-28 17:06:21 -0800149// ================================================================================
150
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000151 protected int parseError(int ... tokens) throws [LEXER_NAME]Exception {
152 StringBuilder message = new StringBuilder();
Till Westmannab40b092014-03-28 15:44:01 -0700153 message.append("Parse error at (").append(line).append(", ").append(column).append(")");
154 if (tokens.length > 0) {
155 message.append(" expecting:");
156 for (int tokenId : tokens){
157 message.append(" ").append([LEXER_NAME].tokenKindToString(tokenId));
158 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000159 }
160 throw new [LEXER_NAME]Exception(message.toString());
161 }
Till Westmanne2439b42016-01-28 17:06:21 -0800162
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000163 protected void updateLineColumn(char c){
164 column++;
Till Westmanne2439b42016-01-28 17:06:21 -0800165
Till Westmann19d30852016-07-28 23:27:58 -0700166 if (prevCharIsLF) {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000167 prevCharIsLF = false;
168 line += (column = 1);
Till Westmann19d30852016-07-28 23:27:58 -0700169 } else if (prevCharIsCR) {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000170 prevCharIsCR = false;
Till Westmann19d30852016-07-28 23:27:58 -0700171 if (c == '\n') {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000172 prevCharIsLF = true;
Till Westmann19d30852016-07-28 23:27:58 -0700173 } else {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000174 line += (column = 1);
175 }
176 }
Till Westmanne2439b42016-01-28 17:06:21 -0800177
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000178 if (c=='\r') {
179 prevCharIsCR = true;
Till Westmann19d30852016-07-28 23:27:58 -0700180 } else if (c == '\n') {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000181 prevCharIsLF = true;
182 }
183 }
Till Westmanne2439b42016-01-28 17:06:21 -0800184
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000185// ================================================================================
186// Read data, buffer management. It uses a circular (and expandable) buffer
Till Westmanne2439b42016-01-28 17:06:21 -0800187// ================================================================================
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000188
189 protected char readNextChar() throws IOException {
Till Westmann19d30852016-07-28 23:27:58 -0700190 if (++bufpos >= endOf_USED_Buffer) {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000191 fillBuff();
Till Westmann19d30852016-07-28 23:27:58 -0700192 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000193 char c = buffer[bufpos];
194 updateLineColumn(c);
195 return c;
196 }
197
198 protected boolean fillBuff() throws IOException {
Till Westmann19d30852016-07-28 23:27:58 -0700199 if (endOf_UNUSED_Buffer == endOf_USED_Buffer) {
200 // If no more unused buffer space
201 if (endOf_UNUSED_Buffer == bufsize) {
202 // If the previous unused space was at the end of the buffer
203 if (tokenBegin > maxUnusedBufferSize) {
204 // If the first N bytes before the current token are enough
205 // setup buffer to use that fragment
206 bufpos = endOf_USED_Buffer = 0;
207 endOf_UNUSED_Buffer = tokenBegin;
208 } else if (tokenBegin < 0){
209 // If no token yet
210 // reuse the whole buffer
211 bufpos = endOf_USED_Buffer = 0;
212 } else {
213 // Otherwise expand buffer after its end
214 ExpandBuff(false);
215 }
216 } else if (endOf_UNUSED_Buffer > tokenBegin){
217 // If the endOf_UNUSED_Buffer is after the token
218 // set endOf_UNUSED_Buffer to the end of the buffer
219 endOf_UNUSED_Buffer = bufsize;
220 } else if ((tokenBegin - endOf_UNUSED_Buffer) < maxUnusedBufferSize) {
221 // If between endOf_UNUSED_Buffer and the token there is NOT enough space expand the buffer
222 // reorganizing it
223 ExpandBuff(true);
224 } else {
225 // Otherwise there is enough space at the start
226 // so we set the buffer to use that fragment
227 endOf_UNUSED_Buffer = tokenBegin;
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000228 }
Till Westmann19d30852016-07-28 23:27:58 -0700229 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000230 int i;
Till Westmann19d30852016-07-28 23:27:58 -0700231 if ((i = inputStream.read(buffer, endOf_USED_Buffer, endOf_UNUSED_Buffer - endOf_USED_Buffer)) == -1) {
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000232 inputStream.close();
Till Westmann19d30852016-07-28 23:27:58 -0700233 buffer[endOf_USED_Buffer] = (char)EOF_CHAR;
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000234 endOf_USED_Buffer++;
235 return false;
Till Westmann19d30852016-07-28 23:27:58 -0700236 } else {
237 endOf_USED_Buffer += i;
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000238 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000239 return true;
240 }
241
242
Till Westmann19d30852016-07-28 23:27:58 -0700243 protected void ExpandBuff(boolean wrapAround) {
244 char[] newbuffer = new char[bufsize + maxUnusedBufferSize];
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000245
Till Westmann19d30852016-07-28 23:27:58 -0700246 try {
247 if (wrapAround) {
248 System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
249 System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos);
250 buffer = newbuffer;
251 endOf_USED_Buffer = (bufpos += (bufsize - tokenBegin));
252 } else {
253 System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
254 buffer = newbuffer;
255 endOf_USED_Buffer = (bufpos -= tokenBegin);
256 }
257 } catch (Throwable t) {
258 throw new Error(t.getMessage());
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000259 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000260
Till Westmann19d30852016-07-28 23:27:58 -0700261 bufsize += maxUnusedBufferSize;
262 endOf_UNUSED_Buffer = bufsize;
263 tokenBegin = 0;
Abdullah Alamoudi284590e2016-01-03 15:42:18 +0300264 }
diegogiorgini@gmail.com2de6d342013-02-16 02:41:45 +0000265}