001package org.javasimon.jdbc4;
002
003import java.util.List;
004
005import org.javasimon.utils.Replacer;
006
007/**
008 * SqlNormalizer takes SQL statement and replaces parameters with question marks. It is
009 * important to realize, that normalizer IS NOT SQL analyzer. It makes as simple replacement
010 * as possible (with my coding skill ;-)) to still truly represent the original statement.
011 * Normalized statement is merely used to recognize the original one and also to merge
012 * the same statements with various arguments. Its primary purpose is to limit count of
013 * distinct per-statement Simons. It doesn't suppose to be perfect or proof to all dialects.
014 * <p/>
015 * Usage is simple, you create normalizer with SQL statement and than you can ask the
016 * object for normalizedSql and type via respective getters.
017 *
018 * @author Radovan Sninsky
019 * @author <a href="mailto:virgo47@gmail.com">Richard "Virgo" Richter</a>
020 * @since 2.4
021 */
022public final class SqlNormalizer {
023        private static final Replacer[] FIRST_REPLACERS;
024        private static final Replacer[] SECOND_REPLACERS;
025
026        private static final Replacer FUNCTION_REPLACER = new Replacer("([-(=<>!+*/,]+\\s?)\\w+\\([^()]*\\)", "$1?", Replacer.Modificator.REPEAT_UNTIL_UNCHANGED);
027
028        private static final Replacer TYPE_SELECTOR = new Replacer("^\\W*(\\w+)\\W.*", "$1");
029
030        static {
031                FIRST_REPLACERS = new Replacer[]{
032                        new Replacer("''", "?"), // replace empty strings and '' inside other strings
033                        new Replacer(" *([-=<>!+*/,]+) *", "$1"), // remove spaces around various operators and commas
034                        new Replacer("([-=<>!+*/]+)", " $1 "), // put spaces back (results in one space everywhere
035                        new Replacer("\\s+", " "), // normalize white spaces
036                        new Replacer("(create|alter|drop) (\\S+) ([^ (]+).*$", "$1 $2 $3"), // shrink DLL to first three tokens
037                        new Replacer("([-=<>!+*/,.(]+\\s?)(?:(?:'[^']+')|(?:[0-9.]+))", "$1?"), // replace arguments after =, ( and , with ?
038                        new Replacer("like '[^']+'", "like ?"), // replace like arguments
039                        new Replacer("between \\S+ and \\S+", "between ? and ?"), // replace between arguments
040                        new Replacer(" in\\(", " in ("), // put space before ( in "in("
041                        new Replacer("^\\{|\\}$", ""), // remove { and } at the start/end
042                        new Replacer("^\\s*begin", "call"), // replace begin with call
043                        new Replacer(";?\\s*end;?$", ""), // remove final end
044                };
045                SECOND_REPLACERS = new Replacer[]{
046                        new Replacer(",", ", "), // put spaces after ,
047                        new Replacer(" in \\(\\?(?:, \\?)*\\)", " in (?)"), // shrink more ? in "in" to one
048                };
049        }
050
051        private final String sql;
052        private String normalizedSql;
053        private String type;
054
055        /**
056         * Creates SQL normalizer and performs the normalization.
057         *
058         * @param sql SQL to normalize
059         */
060        public SqlNormalizer(String sql) {
061                this.sql = sql;
062                if (sql != null) {
063                        normalize(sql);
064                }
065        }
066
067        /**
068         * Constructor for batch normalization. Type of the "statement" will be "batch".
069         *
070         * @param batch list of statements
071         */
072        public SqlNormalizer(List<String> batch) {
073                sql = "batch";
074                StringBuilder sqlBuilder = new StringBuilder();
075                String lastStmt = null;
076                int stmtCounter = 0;
077                for (String statement : batch) {
078                        normalize(statement);
079                        if (lastStmt == null) {
080                                lastStmt = normalizedSql;
081                        }
082                        if (!lastStmt.equalsIgnoreCase(normalizedSql)) {
083                                sqlBuilder.append(stmtCounter == 1 ? "" : stmtCounter + "x ").append(lastStmt).append("; ");
084                                lastStmt = normalizedSql;
085                                stmtCounter = 1;
086                        } else {
087                                stmtCounter++;
088                        }
089                }
090                sqlBuilder.append(stmtCounter == 1 ? "" : stmtCounter + "x ").append(lastStmt);
091                type = "batch";
092                this.normalizedSql = sqlBuilder.toString();
093        }
094
095        private void normalize(String sql) {
096                normalizedSql = sql.toLowerCase().trim();
097                applyReplacers(FIRST_REPLACERS);
098                type = TYPE_SELECTOR.process(normalizedSql);
099
100                // phase two - complications ;-)
101                if (type.equals("select")) {
102                        String[] sa = normalizedSql.split(" from ", 2);
103                        if (sa.length == 2) {
104                                normalizedSql = sa[0] + " from " + FUNCTION_REPLACER.process(sa[1]);
105                        }
106                } else {
107                        normalizedSql = FUNCTION_REPLACER.process(normalizedSql);
108                }
109                applyReplacers(SECOND_REPLACERS);
110        }
111
112        private void applyReplacers(Replacer[] replacers) {
113                for (Replacer replacer : replacers) {
114                        normalizedSql = replacer.process(normalizedSql);
115                }
116        }
117
118        /**
119         * Returns the original SQL.
120         *
121         * @return original SQL
122         */
123        public String getSql() {
124                return sql;
125        }
126
127        /**
128         * Returns the normalized SQL.
129         *
130         * @return normalized SQL
131         */
132        public String getNormalizedSql() {
133                return normalizedSql;
134        }
135
136        /**
137         * Returns SQL type which is typically first word of the SQL (insert, select, etc). Returns batch for batches.
138         *
139         * @return SQL statement type or "batch"
140         */
141        public String getType() {
142                return type;
143        }
144
145        /**
146         * Returns human readable string describing this SQL normalizer.
147         *
148         * @return original SQL, normalized SQL, SQL type
149         */
150        @Override
151        public String toString() {
152                return "SqlNormalizer{" +
153                        "\n  sql='" + sql + '\'' +
154                        ",\n  normalizedSql='" + normalizedSql + '\'' +
155                        ",\n  type='" + type + '\'' +
156                        '}';
157        }
158}