1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.BufferedReader;
7 import java.io.CharArrayReader;
8 import java.io.IOException;
9
10 import org.apache.commons.io.IOUtils;
11
12
13
14
15
16
17 public class CsTokenizer implements Tokenizer {
18
19 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
20 BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString().toCharArray()));
21 try {
22 int ic = reader.read(), line=1;
23 char c;
24 StringBuilder b;
25 while(ic!=-1)
26 {
27 c = (char)ic;
28 switch(c)
29 {
30
31 case '\n':
32 line++;
33 ic = reader.read();
34 break;
35
36
37 case ' ':
38 case '\t':
39 case '\r':
40 ic = reader.read();
41 break;
42
43
44 case ';':
45 ic = reader.read();
46 break;
47
48
49 case '<':
50 case '>':
51 ic = reader.read();
52 if(ic == '=')
53 {
54 tokenEntries.add(new TokenEntry(String.valueOf(c)+"=", sourceCode.getFileName(), line));
55 ic = reader.read();
56 }
57 else if(ic == c)
58 {
59 ic = reader.read();
60 if(ic == '=')
61 {
62 tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf(c)+"=", sourceCode.getFileName(), line));
63 ic = reader.read();
64 }
65 else
66 {
67 tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf(c), sourceCode.getFileName(), line));
68 }
69 }
70 else
71 {
72 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
73 }
74 break;
75
76
77 case '=':
78 case '&':
79 case '|':
80 case '+':
81 case '-':
82 ic = reader.read();
83 if(ic == '=' || ic == c)
84 {
85 tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf((char)ic), sourceCode.getFileName(), line));
86 ic = reader.read();
87 }
88 else
89 {
90 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
91 }
92 break;
93
94
95 case '!':
96 case '*':
97 case '%':
98 case '^':
99 case '~':
100 ic = reader.read();
101 if(ic == '=')
102 {
103 tokenEntries.add(new TokenEntry(String.valueOf(c)+"=", sourceCode.getFileName(), line));
104 ic = reader.read();
105 }
106 else
107 {
108 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
109 }
110 break;
111
112
113 case '"':
114 case '\'':
115 b = new StringBuilder();
116 b.append(c);
117 while((ic = reader.read()) != c)
118 {
119 if(ic == -1)
120 break;
121 b.append((char)ic);
122 if(ic == '\\') {
123 int next = reader.read();
124 if (next != -1) b.append((char)next);
125 }
126 }
127 if (ic != -1) b.append((char)ic);
128 tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
129 ic = reader.read();
130 break;
131
132
133 case '/':
134 switch(c = (char)(ic = reader.read()))
135 {
136 case '*':
137 int state = 1;
138 b = new StringBuilder();
139 b.append("/*");
140
141 while((ic = reader.read()) != -1)
142 {
143 c = (char)ic;
144 b.append(c);
145
146 if(state==1)
147 {
148 if(c == '*')
149 state = 2;
150 }
151 else
152 {
153 if(c == '/') {
154 ic = reader.read();
155 break;
156 } else if(c != '*') {
157 state = 1;
158 }
159 }
160 }
161
162
163 break;
164
165 case '/':
166 b = new StringBuilder();
167 b.append("//");
168 while((ic = reader.read()) != '\n')
169 {
170 if(ic==-1)
171 break;
172 b.append((char)ic);
173 }
174
175
176 break;
177
178 case '=':
179 tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
180 ic = reader.read();
181 break;
182
183 default:
184 tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
185 break;
186 }
187 break;
188
189
190
191 default:
192
193 if(Character.isJavaIdentifierStart(c))
194 {
195 b = new StringBuilder();
196 do
197 {
198 b.append(c);
199 c = (char)(ic = reader.read());
200 } while(Character.isJavaIdentifierPart(c));
201 tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
202 }
203
204 else if(Character.isDigit(c) || c == '.')
205 {
206 b = new StringBuilder();
207 do
208 {
209 b.append(c);
210 if(c == 'e' || c == 'E')
211 {
212 c = (char)(ic = reader.read());
213 if("1234567890-".indexOf(c)==-1)
214 break;
215 b.append(c);
216 }
217 c = (char)(ic = reader.read());
218 } while("1234567890.iIlLfFdDsSuUeExX".indexOf(c)!=-1);
219
220 tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
221 }
222
223 else
224 {
225 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
226 ic = reader.read();
227 break;
228 }
229 }
230 }
231 } catch (IOException e) {
232 e.printStackTrace();
233 } finally {
234 IOUtils.closeQuietly(reader);
235 tokenEntries.add(TokenEntry.getEOF());
236 }
237 }
238 }