Re: How to strip comments out of code
silviocortes@yahoo.com wrote:
I need to write a class that will take a java file as input, strip all
the comments out, and save thre result in a different file....
Assuming the use of correct Java sources as an input, the code below
should do the trick. (Warning: not tested intensively!)
Note that it tries to preserve as much of the original code as possible.
That is, the line numbers, positions, and escape sequences of the code
in output should be the same as in input (that may help in debugging).
piotr
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayDeque;
import java.util.Deque;
public class CommentStripper {
public static void main(String[] args) throws Exception {
InputStream in = new BufferedInputStream(
new FileInputStream("CommentStripper.java"));
Reader source = new InputStreamReader(in);
PrintWriter out = new PrintWriter(System.out, true);
stripComments(source, out);
}
public static void stripComments(
Reader source, PrintWriter out) throws IOException {
SourceReader reader = new SourceReader(source);
StringBuilder outbf = new StringBuilder();
boolean inComment = false;
for(Char next; (next = reader.next()) != Char.EOF;) {
int commentCharsInLine = 0;
for(Char sc; !(sc = next).isEOL();) {
next = reader.next();
if (inComment) {
if (sc.codePoint == '*' && next.codePoint == '/') {
// end of comment
// read next
next = reader.next();
if (!next.isEOL()) {
// write out spaces
int ix = outbf.length();
outbf.setLength(ix + commentCharsInLine + 2);
for(final int len = outbf.length(); ix < len; ++ix) {
outbf.setCharAt(ix, ' ');
}
}
commentCharsInLine = 0;
inComment = false;
} else {
commentCharsInLine++;
}
} else if (sc.codePoint == '/' && next.codePoint == '*') {
// start of multiline comment
inComment = true;
commentCharsInLine = 2;
// read next
next = reader.next();
} else if (sc.codePoint == '/' && next.codePoint == '/') {
// single line comment
// skip to the end of line
while(!next.isEOL()) {
next = reader.next();
}
} else if (sc.codePoint == '"' || sc.codePoint == '\'' ) {
// text literal...
sc.appendSource(outbf);
// lookup end of literal (should be in the same line)
boolean literalEndFound = false;
for(; !next.isEOL(); next = reader.next()) {
next.appendSource(outbf);
if (next.codePoint == '\\') {
// read & write next
next = reader.next();
if (!next.isEOL()) {
next.appendSource(outbf);
}
continue;
}
if (literalEndFound = next.codePoint == sc.codePoint) {
// read next
next = reader.next();
break;
}
}
if (!literalEndFound) {
// syntax error in input...
throw new IOException("End of text literal not found");
}
} else {
// write out source "as is"
sc.appendSource(outbf);
}
}
// flush buffered line
String outLine = outbf.toString();
if (outLine.trim().length() == 0) {
out.println();
} else {
out.println(outLine);
}
outbf.setLength(0);
}
}
private static abstract class Char {
final int codePoint;
Char(int codePoint) {
this.codePoint = codePoint;
}
boolean isEOL() {
return codePoint == '\n';
}
abstract void appendSource(StringBuilder sb);
static final Char EOF = new Char(-1) {
@Override
public void appendSource(StringBuilder sb) {
// write nothing
}
@Override
boolean isEOL() {
return true;
}
};
static Char newInstance(final InputChar c) {
return new Char(c.value) {
@Override
void appendSource(StringBuilder sb) {
c.appendSource(sb);
}
};
}
static Char newInstance(int codePoint, final InputChar c) {
return new Char(codePoint) {
@Override
void appendSource(StringBuilder sb) {
c.appendSource(sb);
}
};
}
static Char newInstance(int codePoint, final InputChar... chars) {
return new Char(codePoint) {
@Override
void appendSource(StringBuilder sb) {
for(InputChar c : chars) {
c.appendSource(sb);
}
}
};
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
appendSource(sb);
return "[" + codePoint + "]=" + sb.toString();
}
}
private static abstract class InputChar {
final int value;
static final InputChar EOF = new InputChar(-1) {
@Override
void appendSource(StringBuilder sb) {
// write nothing
};
};
InputChar(int value) {
this.value = value;
}
abstract void appendSource(StringBuilder sb);
static InputChar newCharInstance(int value) {
return new InputChar(value) {
@Override
void appendSource(StringBuilder sb) {
sb.append((char)value);
}
};
}
static InputChar newEscapeSequenceInstance(int value, final
CharSequence seq) {
return new InputChar(value) {
@Override
void appendSource(StringBuilder sb) {
sb.append(seq);
}
};
}
}
private static class SourceReader {
private Reader in;
SourceReader(Reader in) {
this.in = in;
}
private Deque<InputChar> inputChars = new ArrayDeque<InputChar>();
Char next() throws IOException {
InputChar nc = nextInputChar();
if (nc == InputChar.EOF) {
return Char.EOF;
}
InputChar fc = nextInputChar();
if (nc.value == '\r' && fc.value == '\n') {
return Char.newInstance('\n', nc, fc);
}
if (nc.value == '\r' || nc.value == '\n') {
unread(fc);
return Char.newInstance('\n', nc);
}
if (Character.isSurrogatePair((char)nc.value, (char)fc.value)) {
return Char.newInstance(
Character.toCodePoint((char)nc.value, (char)fc.value), nc, fc);
}
unread(fc);
return Char.newInstance(nc);
}
private void unread(InputChar c) {
if (inputChars == null) {
if (c != InputChar.EOF) {
inputChars = new ArrayDeque<InputChar>();
} else {
return;
}
}
inputChars.addFirst(c);
}
private InputChar nextInputChar() throws IOException {
if (inputChars == null) {
return InputChar.EOF;
}
if (!inputChars.isEmpty()) {
return inputChars.removeFirst();
}
int r0 = in.read();
if (r0 == -1) {
inputChars = null;
return InputChar.EOF;
}
if (r0 == '\\') {
int r1 = in.read();
if (r1 == '\\') {
// double backslash, read each separately
inputChars.add(InputChar.newCharInstance(r0));
return inputChars.peek();
}
if (r1 == 'u') {
// escape sequence
StringBuilder seqbf = new StringBuilder();
// collect all 'u's
seqbf.append((char)r0);
do {
seqbf.append((char)r1);
r1 = in.read();
} while(r1 == 'u');
// parse escape sequence value
parseSeq: if (r1 != -1) {
seqbf.append((char)r1);
for(int i = 3; i > 0; --i) {
r1 = in.read();
if (r1 == -1) break parseSeq;
seqbf.append((char)r1);
}
if (r1 != -1) {
int val = Integer.parseInt(
seqbf.substring(seqbf.length() - 4), 16);
return InputChar.newEscapeSequenceInstance(val, seqbf);
}
}
// incorrect escape sequence...
throw new IOException("Incorrect escape sequence: '" + seqbf
+ "'");
}
// unknown...
inputChars.add(InputChar.newCharInstance(r1));
}
return InputChar.newCharInstance(r0);
}
void close() throws IOException {
if (in != null) {
in.close();
}
in = null;
inputChars = null;
}
}
}