001    /**
002     * JDBM LICENSE v1.00
003     *
004     * Redistribution and use of this software and associated documentation
005     * ("Software"), with or without modification, are permitted provided
006     * that the following conditions are met:
007     *
008     * 1. Redistributions of source code must retain copyright
009     *    statements and notices.  Redistributions must also contain a
010     *    copy of this document.
011     *
012     * 2. Redistributions in binary form must reproduce the
013     *    above copyright notice, this list of conditions and the
014     *    following disclaimer in the documentation and/or other
015     *    materials provided with the distribution.
016     *
017     * 3. The name "JDBM" must not be used to endorse or promote
018     *    products derived from this Software without prior written
019     *    permission of Cees de Groot.  For written permission,
020     *    please contact cg@cdegroot.com.
021     *
022     * 4. Products derived from this Software may not be called "JDBM"
023     *    nor may "JDBM" appear in their names without prior written
024     *    permission of Cees de Groot.
025     *
026     * 5. Due credit should be given to the JDBM Project
027     *    (http://jdbm.sourceforge.net/).
028     *
029     * THIS SOFTWARE IS PROVIDED BY THE JDBM PROJECT AND CONTRIBUTORS
030     * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
031     * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
032     * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
033     * CEES DE GROOT OR ANY CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
034     * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
035     * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
036     * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
037     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
038     * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
039     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
040     * OF THE POSSIBILITY OF SUCH DAMAGE.
041     *
042     * Copyright 2001 (C) Alex Boisvert. All Rights Reserved.
043     * Contributions are Copyright (C) 2001 by their associated contributors.
044     *
045     */
046    
047    package jdbm.btree;
048    
049    import jdbm.RecordManager;
050    
051    import jdbm.helper.Serializer;
052    import jdbm.helper.Tuple;
053    import jdbm.helper.TupleBrowser;
054    
055    import java.io.Externalizable;
056    import java.io.IOException;
057    import java.io.ObjectInput;
058    import java.io.ObjectOutput;
059    import java.io.Serializable;
060    
061    import java.util.Comparator;
062    
063    /**
064     * B+Tree persistent indexing data structure.  B+Trees are optimized for
065     * block-based, random I/O storage because they store multiple keys on
066     * one tree node (called <code>BPage</code>).  In addition, the leaf nodes
067     * directly contain (inline) the values associated with the keys, allowing a
068     * single (or sequential) disk read of all the values on the page.
069     * <p>
070     * B+Trees are n-airy, yeilding log(N) search cost.  They are self-balancing,
071     * preventing search performance degradation when the size of the tree grows.
072     * <p>
073     * Keys and associated values must be <code>Serializable</code> objects. The
074     * user is responsible to supply a serializable <code>Comparator</code> object
075     * to be used for the ordering of entries, which are also called <code>Tuple</code>.
076     * The B+Tree allows traversing the keys in forward and reverse order using a
077     * TupleBrowser obtained from the browse() methods.
078     * <p>
079     * This implementation does not directly support duplicate keys, but it is
080     * possible to handle duplicates by inlining or referencing an object collection
081     * as a value.
082     * <p>
083     * There is no limit on key size or value size, but it is recommended to keep
084     * both as small as possible to reduce disk I/O.   This is especially true for
085     * the key size, which impacts all non-leaf <code>BPage</code> objects.
086     *
087     * @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
088     * @version $Id: BTree.java,v 1.6 2005/06/25 23:12:31 doomdark Exp $
089     */
090    public class BTree
091        implements Externalizable
092    {
093    
094        private static final boolean DEBUG = false;
095    
096        /**
097         * Version id for serialization.
098         */
099        final static long serialVersionUID = 1L;
100    
101    
102        /**
103         * Default page size (number of entries per node)
104         */
105        public static final int DEFAULT_SIZE = 16;
106    
107    
108        /**
109         * Page manager used to persist changes in BPages
110         */
111        protected transient RecordManager _recman;
112    
113    
114        /**
115         * This BTree's record ID in the PageManager.
116         */
117        private transient long _recid;
118    
119    
120        /**
121         * Comparator used to index entries.
122         */
123        protected Comparator _comparator;
124    
125    
126        /**
127         * Serializer used to serialize index keys (optional)
128         */
129        protected Serializer _keySerializer;
130    
131    
132        /**
133         * Serializer used to serialize index values (optional)
134         */
135        protected Serializer _valueSerializer;
136    
137    
138        /**
139         * Height of the B+Tree.  This is the number of BPages you have to traverse
140         * to get to a leaf BPage, starting from the root.
141         */
142        private int _height;
143    
144    
145        /**
146         * Recid of the root BPage
147         */
148        private transient long _root;
149    
150    
151        /**
152         * Number of entries in each BPage.
153         */
154        protected int _pageSize;
155    
156    
157        /**
158         * Total number of entries in the BTree
159         */
160        protected int _entries;
161    
162        
163        /**
164         * Serializer used for BPages of this tree
165         */
166        private transient BPage _bpageSerializer;
167        
168    
169        /**
170         * No-argument constructor used by serialization.
171         */
172        public BTree()
173        {
174            // empty
175        }
176    
177    
178        /**
179         * Create a new persistent BTree, with 16 entries per node.
180         *
181         * @param recman Record manager used for persistence.
182         * @param comparator Comparator used to order index entries
183         */
184        public static BTree createInstance( RecordManager recman,
185                                            Comparator comparator )
186            throws IOException
187        {
188            return createInstance( recman, comparator, null, null, DEFAULT_SIZE );
189        }
190    
191    
192        /**
193         * Create a new persistent BTree, with 16 entries per node.
194         *
195         * @param recman Record manager used for persistence.
196         * @param keySerializer Serializer used to serialize index keys (optional)
197         * @param valueSerializer Serializer used to serialize index values (optional)
198         * @param comparator Comparator used to order index entries
199         */
200        public static BTree createInstance( RecordManager recman,
201                                            Comparator comparator,
202                                            Serializer keySerializer,
203                                            Serializer valueSerializer )
204            throws IOException
205        {
206            return createInstance( recman, comparator, keySerializer, 
207                                   valueSerializer, DEFAULT_SIZE );
208        }
209    
210    
211        /**
212         * Create a new persistent BTree with the given number of entries per node.
213         *
214         * @param recman Record manager used for persistence.
215         * @param comparator Comparator used to order index entries
216         * @param keySerializer Serializer used to serialize index keys (optional)
217         * @param valueSerializer Serializer used to serialize index values (optional)
218         * @param pageSize Number of entries per page (must be even).
219         */
220        public static BTree createInstance( RecordManager recman,
221                                            Comparator comparator,
222                                            Serializer keySerializer,
223                                            Serializer valueSerializer,
224                                            int pageSize )
225            throws IOException
226        {
227            BTree btree;
228    
229            if ( recman == null ) {
230                throw new IllegalArgumentException( "Argument 'recman' is null" );
231            }
232    
233            if ( comparator == null ) {
234                throw new IllegalArgumentException( "Argument 'comparator' is null" );
235            }
236    
237            if ( ! ( comparator instanceof Serializable ) ) {
238                throw new IllegalArgumentException( "Argument 'comparator' must be serializable" );
239            }
240    
241            if ( keySerializer != null && ! ( keySerializer instanceof Serializable ) ) {
242                throw new IllegalArgumentException( "Argument 'keySerializer' must be serializable" );
243            }
244    
245            if ( valueSerializer != null && ! ( valueSerializer instanceof Serializable ) ) {
246                throw new IllegalArgumentException( "Argument 'valueSerializer' must be serializable" );
247            }
248    
249            // make sure there's an even number of entries per BPage
250            if ( ( pageSize & 1 ) != 0 ) {
251                throw new IllegalArgumentException( "Argument 'pageSize' must be even" );
252            }
253    
254            btree = new BTree();
255            btree._recman = recman;
256            btree._comparator = comparator;
257            btree._keySerializer = keySerializer;
258            btree._valueSerializer = valueSerializer;
259            btree._pageSize = pageSize;
260            btree._bpageSerializer = new BPage();
261            btree._bpageSerializer._btree = btree;
262            btree._recid = recman.insert( btree );
263            return btree;
264        }
265    
266    
267        /**
268         * Load a persistent BTree.
269         *
270         * @param recman RecordManager used to store the persistent btree
271         * @param recid Record id of the BTree
272         */
273        public static BTree load( RecordManager recman, long recid )
274            throws IOException
275        {
276            BTree btree = (BTree) recman.fetch( recid );
277            btree._recid = recid;
278            btree._recman = recman;
279            btree._bpageSerializer = new BPage();
280            btree._bpageSerializer._btree = btree;
281            return btree;
282        }
283    
284    
285        /**
286         * Insert an entry in the BTree.
287         * <p>
288         * The BTree cannot store duplicate entries.  An existing entry can be
289         * replaced using the <code>replace</code> flag.   If an entry with the
290         * same key already exists in the BTree, its value is returned.
291         *
292         * @param key Insert key
293         * @param value Insert value
294         * @param replace Set to true to replace an existing key-value pair.
295         * @return Existing value, if any.
296         */
297        public synchronized Object insert( Object key, Object value,
298                                           boolean replace )
299            throws IOException
300        {
301            if ( key == null ) {
302                throw new IllegalArgumentException( "Argument 'key' is null" );
303            }
304            if ( value == null ) {
305                throw new IllegalArgumentException( "Argument 'value' is null" );
306            }
307    
308            BPage rootPage = getRoot();
309    
310            if ( rootPage == null ) {
311                // BTree is currently empty, create a new root BPage
312                if (DEBUG) {
313                    System.out.println( "BTree.insert() new root BPage" );
314                }
315                rootPage = new BPage( this, key, value );
316                _root = rootPage._recid;
317                _height = 1;
318                _entries = 1;
319                _recman.update( _recid, this );
320                return null;
321            } else {
322                BPage.InsertResult insert = rootPage.insert( _height, key, value, replace );
323                boolean dirty = false;
324                if ( insert._overflow != null ) {
325                    // current root page overflowed, we replace with a new root page
326                    if ( DEBUG ) {
327                        System.out.println( "BTree.insert() replace root BPage due to overflow" );
328                    }
329                    rootPage = new BPage( this, rootPage, insert._overflow );
330                    _root = rootPage._recid;
331                    _height += 1;
332                    dirty = true;
333                }
334                if ( insert._existing == null ) {
335                    _entries++;
336                    dirty = true;
337                }
338                if ( dirty ) {
339                    _recman.update( _recid, this );
340                }
341                // insert might have returned an existing value
342                return insert._existing;
343            }
344        }
345    
346    
347        /**
348         * Remove an entry with the given key from the BTree.
349         *
350         * @param key Removal key
351         * @return Value associated with the key, or null if no entry with given
352         *         key existed in the BTree.
353         */
354        public synchronized Object remove( Object key )
355            throws IOException
356        {
357            if ( key == null ) {
358                throw new IllegalArgumentException( "Argument 'key' is null" );
359            }
360    
361            BPage rootPage = getRoot();
362            if ( rootPage == null ) {
363                return null;
364            }
365            boolean dirty = false;
366            BPage.RemoveResult remove = rootPage.remove( _height, key );
367            if ( remove._underflow && rootPage.isEmpty() ) {
368                _height -= 1;
369                dirty = true;
370    
371                _recman.delete(_root);
372                if ( _height == 0 ) {
373                    _root = 0;
374                } else {
375                    _root = rootPage.childBPage( _pageSize-1 )._recid;
376                }
377            }
378            if ( remove._value != null ) {
379                _entries--;
380                dirty = true;
381            }
382            if ( dirty ) {
383                _recman.update( _recid, this );
384            }
385            return remove._value;
386        }
387    
388    
389        /**
390         * Find the value associated with the given key.
391         *
392         * @param key Lookup key.
393         * @return Value associated with the key, or null if not found.
394         */
395        public synchronized Object find( Object key )
396            throws IOException
397        {
398            if ( key == null ) {
399                throw new IllegalArgumentException( "Argument 'key' is null" );
400            }
401            BPage rootPage = getRoot();
402            if ( rootPage == null ) {
403                return null;
404            }
405    
406            Tuple tuple = new Tuple( null, null );
407            TupleBrowser browser = rootPage.find( _height, key );
408    
409            if ( browser.getNext( tuple ) ) {
410                // find returns the matching key or the next ordered key, so we must
411                // check if we have an exact match
412                if ( _comparator.compare( key, tuple.getKey() ) != 0 ) {
413                    return null;
414                } else {
415                    return tuple.getValue();
416                }
417            } else {
418                return null;
419            }
420        }
421    
422    
423        /**
424         * Find the value associated with the given key, or the entry immediately
425         * following this key in the ordered BTree.
426         *
427         * @param key Lookup key.
428         * @return Value associated with the key, or a greater entry, or null if no
429         *         greater entry was found.
430         */
431        public synchronized Tuple findGreaterOrEqual( Object key )
432            throws IOException
433        {
434            Tuple         tuple;
435            TupleBrowser  browser;
436    
437            if ( key == null ) {
438                // there can't be a key greater than or equal to "null"
439                // because null is considered an infinite key.
440                return null;
441            }
442    
443            tuple = new Tuple( null, null );
444            browser = browse( key );
445            if ( browser.getNext( tuple ) ) {
446                return tuple;
447            } else {
448                return null;
449            }
450        }
451    
452    
453        /**
454         * Get a browser initially positioned at the beginning of the BTree.
455         * <p><b>
456         * WARNING: �If you make structural modifications to the BTree during
457         * browsing, you will get inconsistent browing results.
458         * </b>
459         *
460         * @return Browser positionned at the beginning of the BTree.
461         */
462        public synchronized TupleBrowser browse()
463            throws IOException
464        {
465            BPage rootPage = getRoot();
466            if ( rootPage == null ) {
467                return EmptyBrowser.INSTANCE;
468            }
469            TupleBrowser browser = rootPage.findFirst();
470            return browser;
471        }
472    
473    
474        /**
475         * Get a browser initially positioned just before the given key.
476         * <p><b>
477         * WARNING: �If you make structural modifications to the BTree during
478         * browsing, you will get inconsistent browing results.
479         * </b>
480         *
481         * @param key Key used to position the browser.  If null, the browser
482         *            will be positionned after the last entry of the BTree.
483         *            (Null is considered to be an "infinite" key)
484         * @return Browser positionned just before the given key.
485         */
486        public synchronized TupleBrowser browse( Object key )
487            throws IOException
488        {
489            BPage rootPage = getRoot();
490            if ( rootPage == null ) {
491                return EmptyBrowser.INSTANCE;
492            }
493            TupleBrowser browser = rootPage.find( _height, key );
494            return browser;
495        }
496    
497    
498        /**
499         * Return the number of entries (size) of the BTree.
500         */
501        public synchronized int size()
502        {
503            return _entries;
504        }
505    
506    
507        /**
508         * Return the persistent record identifier of the BTree.
509         */
510        public long getRecid()
511        {
512            return _recid;
513        }
514    
515    
516        /**
517         * Return the root BPage, or null if it doesn't exist.
518         */
519        private BPage getRoot()
520            throws IOException
521        {
522            if ( _root == 0 ) {
523                return null;
524            }
525            BPage root = (BPage) _recman.fetch( _root, _bpageSerializer );
526            root._recid = _root;
527            root._btree = this;
528            return root;
529        }
530    
531        /**
532         * Implement Externalizable interface.
533         */
534        public void readExternal( ObjectInput in )
535            throws IOException, ClassNotFoundException
536        {
537            _comparator = (Comparator) in.readObject();
538            _keySerializer = (Serializer) in.readObject();
539            _valueSerializer = (Serializer) in.readObject();
540            _height = in.readInt();
541            _root = in.readLong();
542            _pageSize = in.readInt();
543            _entries = in.readInt();
544        }
545    
546    
547        /**
548         * Implement Externalizable interface.
549         */
550        public void writeExternal( ObjectOutput out )
551            throws IOException
552        {
553            out.writeObject( _comparator );
554            out.writeObject( _keySerializer );
555            out.writeObject( _valueSerializer );
556            out.writeInt( _height );
557            out.writeLong( _root );
558            out.writeInt( _pageSize );
559            out.writeInt( _entries );
560        }
561    
562    
563        public void setValueSerializer( Serializer valueSerializer )
564        {
565            _valueSerializer = valueSerializer;
566        }
567        
568        
569        /*
570        public void assert() throws IOException {
571            BPage root = getRoot();
572            if ( root != null ) {
573                root.assertRecursive( _height );
574            }
575        }
576        */
577    
578    
579        /*
580        public void dump() throws IOException {
581            BPage root = getRoot();
582            if ( root != null ) {
583                root.dumpRecursive( _height, 0 );
584            }
585        }
586        */
587    
588    
589        /** PRIVATE INNER CLASS
590         *  Browser returning no element.
591         */
592        static class EmptyBrowser
593            extends TupleBrowser
594        {
595    
596            static TupleBrowser INSTANCE = new EmptyBrowser();
597    
598            public boolean getNext( Tuple tuple )
599            {
600                return false;
601            }
602    
603            public boolean getPrevious( Tuple tuple )
604            {
605                return false;
606            }
607        }
608    }
609