001/* $Id: ExtendedBaseRules.java 992060 2010-09-02 19:09:47Z simonetripodi $
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one or more
004 * contributor license agreements.  See the NOTICE file distributed with
005 * this work for additional information regarding copyright ownership.
006 * The ASF licenses this file to You under the Apache License, Version 2.0
007 * (the "License"); you may not use this file except in compliance with
008 * the License.  You may obtain a copy of the License at
009 *
010 *      http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019
020package org.apache.commons.digester;
021
022
023import java.util.ArrayList;
024import java.util.Collections;
025import java.util.Comparator;
026import java.util.HashMap;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Map;
030
031
032/**
033 * <p>Extension of {@link RulesBase} for complex schema.</p>
034 *
035 * <p>This is an extension of the basic pattern matching scheme
036 * intended to improve support for mapping complex xml-schema.
037 * It is intended to be a minimal extension of the standard rules
038 * big enough to support complex schema but without the full generality
039 * offered by more exotic matching pattern rules.</p>
040 *
041 * <h4>When should you use this rather than the original?</h4>
042 *
043 * <p>
044 * This pattern-matching engine is complex and slower than the basic
045 * default RulesBase class, but offers more functionality:
046 * <ul>
047 * <li>Universal patterns allow patterns to be specified which will match
048 *  regardless of whether there are "better matching" patterns available.</li>
049 * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct
050 *  children of a specified element.</li>
051 * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements
052 *  nested within a specified element to any nesting depth.</li>
053 * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li>
054 * </ul>
055 * </p>
056 *
057 * <h4>Universal Match Patterns</h4>
058 * 
059 * <p>The default RulesBase pattern-matching engine always attempts to find
060 * the "best matching pattern", and will ignore rules associated with other
061 * patterns that match but are not "as good". As an example, if the pattern
062 * "a/b/c" is associated with rules 1 and 2, and "*&#47;c" is associated with
063 * rules 3 and 4 then element "a/b/c" will cause only rules 1 and 2 to execute.
064 * Rules 3 and 4 do have matching patterns, but because the patterns are shorter
065 * and include wildcard characters they are regarded as being "not as good" as
066 * a direct match. In general, exact patterns are better than wildcard patterns,
067 * and among multiple patterns with wildcards, the longest is preferred.
068 * See the RulesBase class for more information.</p>
069 *
070 * <p>This feature of preferring "better" patterns can be a powerful tool.
071 * However it also means that patterns can interact in unexpected ways.</p>
072 *
073 * <p>When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses 
074 * the "best match" feature. Even if there is an exact match or a longer 
075 * wildcard match, patterns prefixed by '!' will still be tested to see if 
076 * they match, and if so their associated Rule objects will be included in
077 * the set of rules to be executed in the normal manner.</p>
078 *
079 * <ul>
080 * <li>Pattern <code>"!*&#47;a/b"</code> matches whenever an 'b' element
081 *     is inside an 'a'.</li>
082 * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent
083 *     matching <code>"a/b"</code> (see "Parent Match Patterns").</li>
084 * <li>Pattern <code>"!*&#47;a/b/?"</code> matches any child of a parent
085 *     matching <code>"!*&#47;a/b"</code> (see "Parent Match Patterns").</li>
086 * <li>Pattern <code>"!a/b/*"</code> matches any element whose path
087 *     starts with "a" then "b" (see "Ancestor Match Patterns").</li>
088 * <li>Pattern <code>"!*&#47;a/b/*"</code> matches any elements whose path
089 *     contains 'a/b' (see "Ancestor Match Patterns").</li>
090 * </ul>
091 *
092 * <h4>Parent Match Patterns</h4>
093 *
094 * <p>
095 * These will match direct child elements of a particular parent element.
096 * <ul>
097 * <li>
098 *  <code>"a/b/c/?"</code> matches any child whose parent matches
099 *  <code>"a/b/c"</code>.  Exact parent rules take precedence over Ancestor
100 *  Match patterns.
101 * </li>
102 * <li>
103 *  <code>"*&#47;a/b/c/?"</code> matches any child whose parent matches
104 *  <code>"*&#47;a/b/c"</code>.  The longest matching still applies to parent
105 *  matches but the length excludes the '?', which effectively means
106 *  that standard wildcard matches with the same level of depth are
107 *  chosen in preference.
108 * </li>
109 * </ul>
110 * </p>
111 *
112 * <h4>Ancestor Match Patterns</h4>
113 *
114 * <p>
115 * These will match elements whose parentage includes a particular sequence
116 * of elements.
117 * <ul>
118 * <li>
119 *  <code>"a/b/*"</code> matches any element whose path starts with
120 *  'a' then 'b'. Exact parent and parent match rules take precedence. 
121 *  The longest ancestor match will take precedence.
122 * </li>
123 * <li>
124 *  <code>"*&#47;a/b/*"</code> matches any elements whose path contains
125 *  an element 'a' followed by an element 'b'. The longest matching still 
126 *  applies but the length excludes the '*' at the end.
127 * </li>
128 * </ul>
129 * </p>
130 *
131 * <h4>Completely Wild Patterns</h4>
132 *
133 * <p>Pattern <code>"*"</code> matches every pattern that isn't matched by
134 * any other basic rule.</p>
135 *
136 * <p>Pattern <code>"!*"</code> matches every pattern.</p>
137 *
138 * <h4>Using The Extended Rules</h4>
139 *
140 * <p>By default, a Digester instance uses a {@link RulesBase} instance as
141 * its pattern matching engine. To use an ExtendedBaseRules instance, call
142 * the Digester.setRules method before adding any Rule objects to the digester
143 * instance:
144 * <pre>
145 *     Digester digester = new Digester();
146 *     digester.setRules( new ExtendedBaseRules() );
147 * </pre></p>
148 *
149 * <p>The most important thing to remember when using the extended rules is 
150 * that universal and non-universal patterns are completely independent.
151 * Universal patterns are never affected by the addition of new patterns
152 * or the removal of existing ones. Non-universal patterns are never affected
153 * by the addition of new <em>universal</em> patterns or the removal of 
154 * existing <em>universal</em> patterns. As in the basic matching rules, 
155 * non-universal (basic) patterns <strong>can</strong> be affected by the 
156 * addition of new <em>non-universal</em> patterns or the removal of existing 
157 * <em>non-universal</em> patterns, because only rules associated with the
158 * "best matching" pattern for each xml element are executed.
159 *
160 * <p> This means that you can use universal patterns to build up the simple 
161 * parts of your structure - for example defining universal creation and 
162 * property setting rules. More sophisticated and complex mapping will require 
163 * non-universal patterns and this might mean that some of the universal rules 
164 * will need to be replaced by a series of special cases using non-universal 
165 * rules. But by using universal rules as your backbone, these additions 
166 * should not break your existing rules.</p>
167 */
168
169
170public class ExtendedBaseRules extends RulesBase {
171
172
173    // ----------------------------------------------------- Instance Variables
174
175    /**
176     * Counts the entry number for the rules.
177     */
178    private int counter = 0;
179
180
181    /**
182     * The decision algorithm used (unfortunately) doesn't preserve the entry
183     * order.
184     * This map is used by a comparator which orders the list of matches
185     * before it's returned.
186     * This map stores the entry number keyed by the rule.
187     */
188    private Map<Rule, Integer> order = new HashMap<Rule, Integer>();
189
190
191    // --------------------------------------------------------- Public Methods
192
193
194    /**
195     * Register a new Rule instance matching the specified pattern.
196     *
197     * @param pattern Nesting pattern to be matched for this Rule
198     * @param rule Rule instance to be registered
199     */
200    @Override
201    public void add(String pattern, Rule rule) {
202        super.add(pattern, rule);
203        counter++;
204        order.put(rule, counter);
205    }
206
207
208    /**
209     * Return a List of all registered Rule instances that match the specified
210     * nesting pattern, or a zero-length List if there are no matches.  If more
211     * than one Rule instance matches, they <strong>must</strong> be returned
212     * in the order originally registered through the <code>add()</code>
213     * method.
214     *
215     * @param pattern Nesting pattern to be matched
216     */
217    @Override
218    public List<Rule> match(String namespace, String pattern) {
219        // calculate the pattern of the parent
220        // (if the element has one)
221        String parentPattern = "";
222        int lastIndex = pattern.lastIndexOf('/');
223
224        boolean hasParent = true;
225        if (lastIndex == -1) {
226            // element has no parent
227            hasParent = false;
228
229        } else {
230            // calculate the pattern of the parent
231            parentPattern = pattern.substring(0, lastIndex);
232
233        }
234
235
236        // we keep the list of universal matches separate
237        List<Rule> universalList = new ArrayList<Rule>(counter);
238
239        // Universal all wildards ('!*')
240        // These are always matched so always add them
241        List<Rule> tempList = this.cache.get("!*");
242        if (tempList != null) {
243            universalList.addAll(tempList);
244        }
245
246        // Universal exact parent match
247        // need to get this now since only wildcards are considered later
248        tempList = this.cache.get("!" + parentPattern + "/?");
249        if (tempList != null) {
250            universalList.addAll(tempList);
251        }
252
253
254        // base behaviour means that if we certain matches, we don't continue
255        // but we just have a single combined loop and so we have to set
256        // a variable
257        boolean ignoreBasicMatches = false;
258
259
260        // see if we have an exact basic pattern match
261        List<Rule> rulesList = this.cache.get(pattern);
262        if (rulesList != null) {
263            // we have a match!
264            // so ignore all basic matches from now on
265            ignoreBasicMatches = true;
266
267        } else {
268
269            // see if we have an exact child match
270            if (hasParent) {
271                // matching children takes preference
272                rulesList = this.cache.get(parentPattern + "/?");
273                if (rulesList != null) {
274                    // we have a match!
275                    // so ignore all basic matches from now on
276                    ignoreBasicMatches = true;
277                    
278                } else {
279                    // we don't have a match yet - so try exact ancester
280                    //
281                    rulesList = findExactAncesterMatch(pattern);
282                    if (rulesList != null) {
283                        // we have a match!
284                        // so ignore all basic matches from now on
285                        ignoreBasicMatches = true;
286                    }
287                }
288            }
289        }
290
291
292        // OK - we're ready for the big loop!
293        // Unlike the basic rules case,
294        // we have to go through for all those universal rules in all cases.
295
296        // Find the longest key, ie more discriminant
297        String longKey = "";
298        int longKeyLength = 0;
299        
300        for (String key : this.cache.keySet()) {
301
302            // find out if it's a univeral pattern
303            // set a flag
304            boolean isUniversal = key.startsWith("!");
305            if (isUniversal) {
306                // and find the underlying key
307                key = key.substring(1, key.length());
308            }
309
310                    
311            // don't need to check exact matches
312            boolean wildcardMatchStart = key.startsWith("*/");
313            boolean wildcardMatchEnd = key.endsWith("/*");
314            if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) {
315
316                boolean parentMatched = false;
317                boolean basicMatched = false;
318                boolean ancesterMatched = false;
319                
320                boolean parentMatchEnd = key.endsWith("/?");
321                if (parentMatchEnd) {
322                    // try for a parent match
323                    parentMatched = parentMatch(key, pattern, parentPattern);
324
325                } else if (wildcardMatchEnd) {
326                    // check for ancester match
327                    if (wildcardMatchStart) {
328                        String patternBody = key.substring(2, key.length() - 2);
329                        if (pattern.endsWith(patternBody)) {
330                            ancesterMatched = true;
331                        } else {
332                            ancesterMatched = (pattern.indexOf(patternBody + "/") > -1);
333                        }
334                    } else {
335                        String bodyPattern = key.substring(0, key.length() - 2);
336                        if (pattern.startsWith(bodyPattern))
337                        {
338                            if (pattern.length() == bodyPattern.length()) {
339                                // exact match
340                                ancesterMatched = true;
341                            } else {
342                                ancesterMatched = ( pattern.charAt(bodyPattern.length()) == '/' );
343                            }
344                        } else {
345                            ancesterMatched = false;  
346                        } 
347                    }               
348                } else {
349                    // try for a base match
350                    basicMatched = basicMatch(key, pattern);
351                }
352
353                if (parentMatched || basicMatched || ancesterMatched) {
354                    if (isUniversal) {
355                        // universal rules go straight in
356                        // (no longest matching rule)
357                        tempList = this.cache.get("!" + key);
358                        if (tempList != null) {
359                            universalList.addAll(tempList);
360                        }
361
362                    } else {
363                        if (!ignoreBasicMatches) {
364                            // ensure that all parent matches are SHORTER
365                            // than rules with same level of matching.
366                            //
367                            // the calculations below don't work for universal
368                            // matching, but we don't care because in that case
369                            // this if-stmt is not entered.
370                            int keyLength = key.length();
371                            if (wildcardMatchStart) {
372                                --keyLength;
373                            }
374                            if (wildcardMatchEnd) {
375                                --keyLength;
376                            } else if (parentMatchEnd) {
377                                --keyLength;
378                            }
379
380                            if (keyLength > longKeyLength) {
381                                rulesList = this.cache.get(key);
382                                longKey = key;
383                                longKeyLength = keyLength;
384                            }
385                        }
386                    }
387                }
388            }
389        }
390
391
392        // '*' works in practice as a default matching
393        // (this is because anything is a deeper match!)
394        if (rulesList == null) {
395            rulesList = this.cache.get("*");
396        }
397
398        // if we've matched a basic pattern, then add to the universal list
399        if (rulesList != null) {
400            universalList.addAll(rulesList);
401        }
402
403
404        // don't filter if namespace is null
405        if (namespace != null) {
406            // remove invalid namespaces
407            Iterator<Rule> it = universalList.iterator();
408            while (it.hasNext()) {
409                Rule rule = it.next();
410                String ns_uri = rule.getNamespaceURI();
411                if (ns_uri != null && !ns_uri.equals(namespace)) {
412                    it.remove();
413                }
414            }
415        }
416
417
418        // need to make sure that the collection is sort in the order
419        // of addition.  We use a custom comparator for this
420        Collections.sort(
421                universalList,
422                new Comparator<Rule>() {
423
424                    public int compare(Rule r1, Rule r2) throws ClassCastException {
425                        // Get the entry order from the map
426                        Integer i1 = order.get(r1);
427                        Integer i2 = order.get(r2);
428
429                        // and use that to perform the comparison
430                        if (i1 == null) {
431                            if (i2 == null) {
432
433                                return 0;
434
435                            } else {
436
437                                return -1;
438
439                            }
440                        } else if (i2 == null) {
441                            return 1;
442                        }
443
444                        return (i1.intValue() - i2.intValue());
445                    }
446                });
447
448        return universalList;
449    }
450
451    /**
452     * Matching parent.
453     */
454    private boolean parentMatch(String key, String pattern, String parentPattern) {
455        return parentPattern.endsWith(key.substring(1, key.length() - 2));
456    }
457
458    /**
459     * Standard match.
460     * Matches the end of the pattern to the key.
461     */
462    private boolean basicMatch(String key, String pattern) {
463        return (pattern.equals(key.substring(2)) ||
464                pattern.endsWith(key.substring(1)));
465    }
466    
467    /**
468     * Finds an exact ancester match for given pattern
469     */
470    private List<Rule> findExactAncesterMatch(String parentPattern) {
471        List<Rule> matchingRules = null;
472        int lastIndex = parentPattern.length();
473        while (lastIndex-- > 0) {
474            lastIndex = parentPattern.lastIndexOf('/', lastIndex);
475            if (lastIndex > 0) {
476                matchingRules = this.cache.get(parentPattern.substring(0, lastIndex) + "/*");
477                if (matchingRules != null) {
478                    return matchingRules;
479                }
480            }
481        }
482        return null;
483    }
484}