001 /**
002 * JDBM LICENSE v1.00
003 *
004 * Redistribution and use of this software and associated documentation
005 * ("Software"), with or without modification, are permitted provided
006 * that the following conditions are met:
007 *
008 * 1. Redistributions of source code must retain copyright
009 * statements and notices. Redistributions must also contain a
010 * copy of this document.
011 *
012 * 2. Redistributions in binary form must reproduce the
013 * above copyright notice, this list of conditions and the
014 * following disclaimer in the documentation and/or other
015 * materials provided with the distribution.
016 *
017 * 3. The name "JDBM" must not be used to endorse or promote
018 * products derived from this Software without prior written
019 * permission of Cees de Groot. For written permission,
020 * please contact cg@cdegroot.com.
021 *
022 * 4. Products derived from this Software may not be called "JDBM"
023 * nor may "JDBM" appear in their names without prior written
024 * permission of Cees de Groot.
025 *
026 * 5. Due credit should be given to the JDBM Project
027 * (http://jdbm.sourceforge.net/).
028 *
029 * THIS SOFTWARE IS PROVIDED BY THE JDBM PROJECT AND CONTRIBUTORS
030 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
031 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
032 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
033 * CEES DE GROOT OR ANY CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
034 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
035 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
036 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
037 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
038 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
039 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
040 * OF THE POSSIBILITY OF SUCH DAMAGE.
041 *
042 * Copyright 2001 (C) Alex Boisvert. All Rights Reserved.
043 * Contributions are Copyright (C) 2001 by their associated contributors.
044 *
045 */
046
047 package jdbm.btree;
048
049 import jdbm.RecordManager;
050
051 import jdbm.helper.Serializer;
052 import jdbm.helper.Tuple;
053 import jdbm.helper.TupleBrowser;
054
055 import java.io.Externalizable;
056 import java.io.IOException;
057 import java.io.ObjectInput;
058 import java.io.ObjectOutput;
059 import java.io.Serializable;
060
061 import java.util.Comparator;
062
063 /**
064 * B+Tree persistent indexing data structure. B+Trees are optimized for
065 * block-based, random I/O storage because they store multiple keys on
066 * one tree node (called <code>BPage</code>). In addition, the leaf nodes
067 * directly contain (inline) the values associated with the keys, allowing a
068 * single (or sequential) disk read of all the values on the page.
069 * <p>
070 * B+Trees are n-airy, yeilding log(N) search cost. They are self-balancing,
071 * preventing search performance degradation when the size of the tree grows.
072 * <p>
073 * Keys and associated values must be <code>Serializable</code> objects. The
074 * user is responsible to supply a serializable <code>Comparator</code> object
075 * to be used for the ordering of entries, which are also called <code>Tuple</code>.
076 * The B+Tree allows traversing the keys in forward and reverse order using a
077 * TupleBrowser obtained from the browse() methods.
078 * <p>
079 * This implementation does not directly support duplicate keys, but it is
080 * possible to handle duplicates by inlining or referencing an object collection
081 * as a value.
082 * <p>
083 * There is no limit on key size or value size, but it is recommended to keep
084 * both as small as possible to reduce disk I/O. This is especially true for
085 * the key size, which impacts all non-leaf <code>BPage</code> objects.
086 *
087 * @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
088 * @version $Id: BTree.java,v 1.6 2005/06/25 23:12:31 doomdark Exp $
089 */
090 public class BTree
091 implements Externalizable
092 {
093
094 private static final boolean DEBUG = false;
095
096 /**
097 * Version id for serialization.
098 */
099 final static long serialVersionUID = 1L;
100
101
102 /**
103 * Default page size (number of entries per node)
104 */
105 public static final int DEFAULT_SIZE = 16;
106
107
108 /**
109 * Page manager used to persist changes in BPages
110 */
111 protected transient RecordManager _recman;
112
113
114 /**
115 * This BTree's record ID in the PageManager.
116 */
117 private transient long _recid;
118
119
120 /**
121 * Comparator used to index entries.
122 */
123 protected Comparator _comparator;
124
125
126 /**
127 * Serializer used to serialize index keys (optional)
128 */
129 protected Serializer _keySerializer;
130
131
132 /**
133 * Serializer used to serialize index values (optional)
134 */
135 protected Serializer _valueSerializer;
136
137
138 /**
139 * Height of the B+Tree. This is the number of BPages you have to traverse
140 * to get to a leaf BPage, starting from the root.
141 */
142 private int _height;
143
144
145 /**
146 * Recid of the root BPage
147 */
148 private transient long _root;
149
150
151 /**
152 * Number of entries in each BPage.
153 */
154 protected int _pageSize;
155
156
157 /**
158 * Total number of entries in the BTree
159 */
160 protected int _entries;
161
162
163 /**
164 * Serializer used for BPages of this tree
165 */
166 private transient BPage _bpageSerializer;
167
168
169 /**
170 * No-argument constructor used by serialization.
171 */
172 public BTree()
173 {
174 // empty
175 }
176
177
178 /**
179 * Create a new persistent BTree, with 16 entries per node.
180 *
181 * @param recman Record manager used for persistence.
182 * @param comparator Comparator used to order index entries
183 */
184 public static BTree createInstance( RecordManager recman,
185 Comparator comparator )
186 throws IOException
187 {
188 return createInstance( recman, comparator, null, null, DEFAULT_SIZE );
189 }
190
191
192 /**
193 * Create a new persistent BTree, with 16 entries per node.
194 *
195 * @param recman Record manager used for persistence.
196 * @param keySerializer Serializer used to serialize index keys (optional)
197 * @param valueSerializer Serializer used to serialize index values (optional)
198 * @param comparator Comparator used to order index entries
199 */
200 public static BTree createInstance( RecordManager recman,
201 Comparator comparator,
202 Serializer keySerializer,
203 Serializer valueSerializer )
204 throws IOException
205 {
206 return createInstance( recman, comparator, keySerializer,
207 valueSerializer, DEFAULT_SIZE );
208 }
209
210
211 /**
212 * Create a new persistent BTree with the given number of entries per node.
213 *
214 * @param recman Record manager used for persistence.
215 * @param comparator Comparator used to order index entries
216 * @param keySerializer Serializer used to serialize index keys (optional)
217 * @param valueSerializer Serializer used to serialize index values (optional)
218 * @param pageSize Number of entries per page (must be even).
219 */
220 public static BTree createInstance( RecordManager recman,
221 Comparator comparator,
222 Serializer keySerializer,
223 Serializer valueSerializer,
224 int pageSize )
225 throws IOException
226 {
227 BTree btree;
228
229 if ( recman == null ) {
230 throw new IllegalArgumentException( "Argument 'recman' is null" );
231 }
232
233 if ( comparator == null ) {
234 throw new IllegalArgumentException( "Argument 'comparator' is null" );
235 }
236
237 if ( ! ( comparator instanceof Serializable ) ) {
238 throw new IllegalArgumentException( "Argument 'comparator' must be serializable" );
239 }
240
241 if ( keySerializer != null && ! ( keySerializer instanceof Serializable ) ) {
242 throw new IllegalArgumentException( "Argument 'keySerializer' must be serializable" );
243 }
244
245 if ( valueSerializer != null && ! ( valueSerializer instanceof Serializable ) ) {
246 throw new IllegalArgumentException( "Argument 'valueSerializer' must be serializable" );
247 }
248
249 // make sure there's an even number of entries per BPage
250 if ( ( pageSize & 1 ) != 0 ) {
251 throw new IllegalArgumentException( "Argument 'pageSize' must be even" );
252 }
253
254 btree = new BTree();
255 btree._recman = recman;
256 btree._comparator = comparator;
257 btree._keySerializer = keySerializer;
258 btree._valueSerializer = valueSerializer;
259 btree._pageSize = pageSize;
260 btree._bpageSerializer = new BPage();
261 btree._bpageSerializer._btree = btree;
262 btree._recid = recman.insert( btree );
263 return btree;
264 }
265
266
267 /**
268 * Load a persistent BTree.
269 *
270 * @param recman RecordManager used to store the persistent btree
271 * @param recid Record id of the BTree
272 */
273 public static BTree load( RecordManager recman, long recid )
274 throws IOException
275 {
276 BTree btree = (BTree) recman.fetch( recid );
277 btree._recid = recid;
278 btree._recman = recman;
279 btree._bpageSerializer = new BPage();
280 btree._bpageSerializer._btree = btree;
281 return btree;
282 }
283
284
285 /**
286 * Insert an entry in the BTree.
287 * <p>
288 * The BTree cannot store duplicate entries. An existing entry can be
289 * replaced using the <code>replace</code> flag. If an entry with the
290 * same key already exists in the BTree, its value is returned.
291 *
292 * @param key Insert key
293 * @param value Insert value
294 * @param replace Set to true to replace an existing key-value pair.
295 * @return Existing value, if any.
296 */
297 public synchronized Object insert( Object key, Object value,
298 boolean replace )
299 throws IOException
300 {
301 if ( key == null ) {
302 throw new IllegalArgumentException( "Argument 'key' is null" );
303 }
304 if ( value == null ) {
305 throw new IllegalArgumentException( "Argument 'value' is null" );
306 }
307
308 BPage rootPage = getRoot();
309
310 if ( rootPage == null ) {
311 // BTree is currently empty, create a new root BPage
312 if (DEBUG) {
313 System.out.println( "BTree.insert() new root BPage" );
314 }
315 rootPage = new BPage( this, key, value );
316 _root = rootPage._recid;
317 _height = 1;
318 _entries = 1;
319 _recman.update( _recid, this );
320 return null;
321 } else {
322 BPage.InsertResult insert = rootPage.insert( _height, key, value, replace );
323 boolean dirty = false;
324 if ( insert._overflow != null ) {
325 // current root page overflowed, we replace with a new root page
326 if ( DEBUG ) {
327 System.out.println( "BTree.insert() replace root BPage due to overflow" );
328 }
329 rootPage = new BPage( this, rootPage, insert._overflow );
330 _root = rootPage._recid;
331 _height += 1;
332 dirty = true;
333 }
334 if ( insert._existing == null ) {
335 _entries++;
336 dirty = true;
337 }
338 if ( dirty ) {
339 _recman.update( _recid, this );
340 }
341 // insert might have returned an existing value
342 return insert._existing;
343 }
344 }
345
346
347 /**
348 * Remove an entry with the given key from the BTree.
349 *
350 * @param key Removal key
351 * @return Value associated with the key, or null if no entry with given
352 * key existed in the BTree.
353 */
354 public synchronized Object remove( Object key )
355 throws IOException
356 {
357 if ( key == null ) {
358 throw new IllegalArgumentException( "Argument 'key' is null" );
359 }
360
361 BPage rootPage = getRoot();
362 if ( rootPage == null ) {
363 return null;
364 }
365 boolean dirty = false;
366 BPage.RemoveResult remove = rootPage.remove( _height, key );
367 if ( remove._underflow && rootPage.isEmpty() ) {
368 _height -= 1;
369 dirty = true;
370
371 _recman.delete(_root);
372 if ( _height == 0 ) {
373 _root = 0;
374 } else {
375 _root = rootPage.childBPage( _pageSize-1 )._recid;
376 }
377 }
378 if ( remove._value != null ) {
379 _entries--;
380 dirty = true;
381 }
382 if ( dirty ) {
383 _recman.update( _recid, this );
384 }
385 return remove._value;
386 }
387
388
389 /**
390 * Find the value associated with the given key.
391 *
392 * @param key Lookup key.
393 * @return Value associated with the key, or null if not found.
394 */
395 public synchronized Object find( Object key )
396 throws IOException
397 {
398 if ( key == null ) {
399 throw new IllegalArgumentException( "Argument 'key' is null" );
400 }
401 BPage rootPage = getRoot();
402 if ( rootPage == null ) {
403 return null;
404 }
405
406 Tuple tuple = new Tuple( null, null );
407 TupleBrowser browser = rootPage.find( _height, key );
408
409 if ( browser.getNext( tuple ) ) {
410 // find returns the matching key or the next ordered key, so we must
411 // check if we have an exact match
412 if ( _comparator.compare( key, tuple.getKey() ) != 0 ) {
413 return null;
414 } else {
415 return tuple.getValue();
416 }
417 } else {
418 return null;
419 }
420 }
421
422
423 /**
424 * Find the value associated with the given key, or the entry immediately
425 * following this key in the ordered BTree.
426 *
427 * @param key Lookup key.
428 * @return Value associated with the key, or a greater entry, or null if no
429 * greater entry was found.
430 */
431 public synchronized Tuple findGreaterOrEqual( Object key )
432 throws IOException
433 {
434 Tuple tuple;
435 TupleBrowser browser;
436
437 if ( key == null ) {
438 // there can't be a key greater than or equal to "null"
439 // because null is considered an infinite key.
440 return null;
441 }
442
443 tuple = new Tuple( null, null );
444 browser = browse( key );
445 if ( browser.getNext( tuple ) ) {
446 return tuple;
447 } else {
448 return null;
449 }
450 }
451
452
453 /**
454 * Get a browser initially positioned at the beginning of the BTree.
455 * <p><b>
456 * WARNING: �If you make structural modifications to the BTree during
457 * browsing, you will get inconsistent browing results.
458 * </b>
459 *
460 * @return Browser positionned at the beginning of the BTree.
461 */
462 public synchronized TupleBrowser browse()
463 throws IOException
464 {
465 BPage rootPage = getRoot();
466 if ( rootPage == null ) {
467 return EmptyBrowser.INSTANCE;
468 }
469 TupleBrowser browser = rootPage.findFirst();
470 return browser;
471 }
472
473
474 /**
475 * Get a browser initially positioned just before the given key.
476 * <p><b>
477 * WARNING: �If you make structural modifications to the BTree during
478 * browsing, you will get inconsistent browing results.
479 * </b>
480 *
481 * @param key Key used to position the browser. If null, the browser
482 * will be positionned after the last entry of the BTree.
483 * (Null is considered to be an "infinite" key)
484 * @return Browser positionned just before the given key.
485 */
486 public synchronized TupleBrowser browse( Object key )
487 throws IOException
488 {
489 BPage rootPage = getRoot();
490 if ( rootPage == null ) {
491 return EmptyBrowser.INSTANCE;
492 }
493 TupleBrowser browser = rootPage.find( _height, key );
494 return browser;
495 }
496
497
498 /**
499 * Return the number of entries (size) of the BTree.
500 */
501 public synchronized int size()
502 {
503 return _entries;
504 }
505
506
507 /**
508 * Return the persistent record identifier of the BTree.
509 */
510 public long getRecid()
511 {
512 return _recid;
513 }
514
515
516 /**
517 * Return the root BPage, or null if it doesn't exist.
518 */
519 private BPage getRoot()
520 throws IOException
521 {
522 if ( _root == 0 ) {
523 return null;
524 }
525 BPage root = (BPage) _recman.fetch( _root, _bpageSerializer );
526 root._recid = _root;
527 root._btree = this;
528 return root;
529 }
530
531 /**
532 * Implement Externalizable interface.
533 */
534 public void readExternal( ObjectInput in )
535 throws IOException, ClassNotFoundException
536 {
537 _comparator = (Comparator) in.readObject();
538 _keySerializer = (Serializer) in.readObject();
539 _valueSerializer = (Serializer) in.readObject();
540 _height = in.readInt();
541 _root = in.readLong();
542 _pageSize = in.readInt();
543 _entries = in.readInt();
544 }
545
546
547 /**
548 * Implement Externalizable interface.
549 */
550 public void writeExternal( ObjectOutput out )
551 throws IOException
552 {
553 out.writeObject( _comparator );
554 out.writeObject( _keySerializer );
555 out.writeObject( _valueSerializer );
556 out.writeInt( _height );
557 out.writeLong( _root );
558 out.writeInt( _pageSize );
559 out.writeInt( _entries );
560 }
561
562
563 public void setValueSerializer( Serializer valueSerializer )
564 {
565 _valueSerializer = valueSerializer;
566 }
567
568
569 /*
570 public void assert() throws IOException {
571 BPage root = getRoot();
572 if ( root != null ) {
573 root.assertRecursive( _height );
574 }
575 }
576 */
577
578
579 /*
580 public void dump() throws IOException {
581 BPage root = getRoot();
582 if ( root != null ) {
583 root.dumpRecursive( _height, 0 );
584 }
585 }
586 */
587
588
589 /** PRIVATE INNER CLASS
590 * Browser returning no element.
591 */
592 static class EmptyBrowser
593 extends TupleBrowser
594 {
595
596 static TupleBrowser INSTANCE = new EmptyBrowser();
597
598 public boolean getNext( Tuple tuple )
599 {
600 return false;
601 }
602
603 public boolean getPrevious( Tuple tuple )
604 {
605 return false;
606 }
607 }
608 }
609