diff options
| author | Marc Horowitz <marc@mit.edu> | 1996-07-22 20:49:46 +0000 |
|---|---|---|
| committer | Marc Horowitz <marc@mit.edu> | 1996-07-22 20:49:46 +0000 |
| commit | edf8b4d8a6a665c2aa150993cd813ea6c5cf12e1 (patch) | |
| tree | 6c2974a97b448c040fa4a31708ec5e02f187526c /src/util | |
| parent | 013bb1391582ed9e653ae706e398ddb8d08cfcc9 (diff) | |
| download | krb5-edf8b4d8a6a665c2aa150993cd813ea6c5cf12e1.tar.gz krb5-edf8b4d8a6a665c2aa150993cd813ea6c5cf12e1.tar.xz krb5-edf8b4d8a6a665c2aa150993cd813ea6c5cf12e1.zip | |
this commit includes all the changes on the OV_9510_INTEGRATION and
OV_MERGE branches. This includes, but is not limited to, the new openvision
admin system, and major changes to gssapi to add functionality, and bring
the implementation in line with rfc1964. before committing, the
code was built and tested for netbsd and solaris.
git-svn-id: svn://anonsvn.mit.edu/krb5/trunk@8774 dc483132-0cff-0310-8789-dd5450dbe970
Diffstat (limited to 'src/util')
126 files changed, 49884 insertions, 172 deletions
diff --git a/src/util/ChangeLog b/src/util/ChangeLog index 8540d06a50..ab39db25da 100644 --- a/src/util/ChangeLog +++ b/src/util/ChangeLog @@ -1,3 +1,15 @@ +Wed Jul 10 00:52:28 1996 Marc Horowitz <marc@mit.edu> + + * Makefile.in (all-unix): "install" the db2 headers and libs with + symlinks. db2 has its own self-contained autoconf setup, so this + is necessary here. + (all-unix): before installing db2, remove the links, so ln won't + fail the second time. + +Tue Jul 9 19:29:12 1996 Marc Horowitz <marc@mit.edu> + + * configure.in (CONFIG_DIRS): always build db2 + Mon May 20 11:05:49 1996 Tom Yu <tlyu@dragons-lair.MIT.EDU> * libupdate.sh: allow to deal with multiple directories at once diff --git a/src/util/Makefile.in b/src/util/Makefile.in index 1d0a278d2f..0faedfa1e8 100644 --- a/src/util/Makefile.in +++ b/src/util/Makefile.in @@ -11,6 +11,20 @@ all-unix:: libupdate makeshlib all-mac:: all-windows: libupdate +# this is necessary because the db2 build, which is intended to be +# standalone, of course does not know to make symlinks in the right +# magic places in the krb5 build tree so other parts of the tree can +# find db2. So we make those links here. this can't use CopyHeader, +# because there's no way in this make setup to do stuff after the +# subdirectory recursion. fortunately, one does not need a +# destination to make a link... + +all-unix:: + $(RM) ../include/db.h ../include/db-config.h ../lib/libdb.a + ln -s ../util/db2/obj/db.h ../include + ln -s ../util/db2/obj/db-config.h ../include + ln -s ../util/db2/obj/libdb.a ../lib + unixmac: libupdate libupdate: $(srcdir)/libupdate.sh diff --git a/src/util/autoconf/ChangeLog b/src/util/autoconf/ChangeLog index 67827c8e11..e317fe91e4 100644 --- a/src/util/autoconf/ChangeLog +++ b/src/util/autoconf/ChangeLog @@ -1,3 +1,12 @@ +Tue Jul 9 18:24:12 1996 Marc Horowitz <marc@mit.edu> + + * autoheader.sh (TEMPLATES): add ./acconfig.h to the list of + header templates used. This is necessary when a subpackage is + being autoreconf'd. + + * acgeneral.m4 (AC_CACHE_CHECK): added, from autoconf 2.7. db2 + needs it. + Wed Jun 12 18:17:17 1996 Tom Yu <tlyu@voltage-multiplier.mit.edu> * autoconf.texi (Output): document changes to AC_OUTPUT diff --git a/src/util/autoconf/README.krb5 b/src/util/autoconf/README.krb5 new file mode 100644 index 0000000000..d4b7346583 --- /dev/null +++ b/src/util/autoconf/README.krb5 @@ -0,0 +1,2 @@ +There have been a few local changes and bug-fixes made to this tree. +Check out the ChangeLog file for details. diff --git a/src/util/autoconf/autoconf.info b/src/util/autoconf/autoconf.info index 050a253b8f..472783f0bc 100644 --- a/src/util/autoconf/autoconf.info +++ b/src/util/autoconf/autoconf.info @@ -1,5 +1,5 @@ -This is Info file ../autoconf.info, produced by Makeinfo-1.63 from the -input file ../autoconf.texi. +This is Info file autoconf.info, produced by Makeinfo-1.55 from the +input file ./autoconf.texi. START-INFO-DIR-ENTRY * Autoconf: (autoconf). Create source code configuration scripts. @@ -722,6 +722,10 @@ macro is `AC_INIT' (*note Input::.). separated by a colon. For example, AC_OUTPUT(Makefile:templates/top.mk lib/Makefile:templates/lib.mk) + You can also generate an output file from multiple input files by + separating the input files by a plus sign. For example, + AC_OUTPUT(Makefile:templates/pre.in+Makefile.in+templates/post.in) + If you pass EXTRA-CMDS, those commands will be inserted into `config.status' to be run after all its other processing. If INIT-CMDS are given, they are inserted just before EXTRA-CMDS, @@ -3024,7 +3028,7 @@ automatically. Notify the user of an error that prevents `configure' from completing. This macro prints an error message on the standard error output and exits `configure' with a nonzero status. - ERROR-DESCRIPTION should be something like `invalid value $HOME + eRROR-DESCRIPTION should be something like `invalid value $HOME for \$HOME'. - Macro: AC_MSG_WARN (PROBLEM-DESCRIPTION) @@ -3261,7 +3265,7 @@ it is considered obsolete. - Macro: AC_PROVIDE (THIS-MACRO-NAME) Record the fact that THIS-MACRO-NAME has been called. - THIS-MACRO-NAME should be the name of the macro that is calling + tHIS-MACRO-NAME should be the name of the macro that is calling `AC_PROVIDE'. An easy way to get it is from the `m4' builtin variable `$0', like this: @@ -4953,11 +4957,11 @@ Autoconf checks. * Menu: -* AC_MACRODIR <1>: Invoking autoupdate. -* AC_MACRODIR <1>: Invoking autoheader. -* AC_MACRODIR <1>: Invoking autoreconf. -* AC_MACRODIR <1>: Invoking autoconf. -* AC_MACRODIR <1>: Invoking ifnames. +* AC_MACRODIR: Invoking autoupdate. +* AC_MACRODIR: Invoking autoheader. +* AC_MACRODIR: Invoking autoreconf. +* AC_MACRODIR: Invoking autoconf. +* AC_MACRODIR: Invoking ifnames. * AC_MACRODIR: Invoking autoscan. * CONFIG_FILES: Invoking config.status. * CONFIG_HEADERS: Invoking config.status. @@ -4987,17 +4991,18 @@ how this is done. * build_cpu: System Type Variables. * build_os: System Type Variables. * build_vendor: System Type Variables. -* CC <1>: UNIX Variants. +* CC: UNIX Variants. +* CC: Particular Programs. * CC: Particular Programs. -* CFLAGS <1>: Particular Programs. * CFLAGS: Preset Output Variables. +* CFLAGS: Particular Programs. * configure_input: Preset Output Variables. * CPP: Particular Programs. * CPPFLAGS: Preset Output Variables. * CXX: Particular Programs. * CXXCPP: Particular Programs. -* CXXFLAGS <1>: Particular Programs. * CXXFLAGS: Preset Output Variables. +* CXXFLAGS: Particular Programs. * datadir: Preset Output Variables. * DEFS: Preset Output Variables. * exec_prefix: Preset Output Variables. @@ -5014,15 +5019,17 @@ how this is done. * KMEM_GROUP: Particular Functions. * LDFLAGS: Preset Output Variables. * LEX: Particular Programs. -* LEX_OUTPUT_ROOT: Particular Programs. * LEXLIB: Particular Programs. +* LEX_OUTPUT_ROOT: Particular Programs. * libdir: Preset Output Variables. * libexecdir: Preset Output Variables. -* LIBOBJS <1>: Structures. -* LIBOBJS <1>: Generic Functions. * LIBOBJS: Particular Functions. -* LIBS <1>: UNIX Variants. +* LIBOBJS: Particular Functions. +* LIBOBJS: Generic Functions. +* LIBOBJS: Structures. * LIBS: Preset Output Variables. +* LIBS: UNIX Variants. +* LIBS: UNIX Variants. * LN_S: Particular Programs. * localstatedir: Preset Output Variables. * mandir: Preset Output Variables. @@ -5061,15 +5068,9 @@ use these names in `#if' directives. * Menu: -* __CHAR_UNSIGNED__: Compiler Characteristics. -* _ALL_SOURCE: UNIX Variants. -* _MINIX: UNIX Variants. -* _POSIX_1_SOURCE: UNIX Variants. -* _POSIX_SOURCE: UNIX Variants. -* _POSIX_VERSION: Particular Headers. -* C_ALLOCA: Particular Functions. * CLOSEDIR_VOID: Particular Functions. * const: Compiler Characteristics. +* C_ALLOCA: Particular Functions. * DGUX: Particular Functions. * DIRENT: Particular Headers. * GETGROUPS_T: Particular Typedefs. @@ -5088,11 +5089,11 @@ use these names in `#if' directives. * HAVE_MMAP: Particular Functions. * HAVE_NDIR_H: Particular Headers. * HAVE_RESTARTABLE_SYSCALLS: System Services. +* HAVE_STRCOLL: Particular Functions. +* HAVE_STRFTIME: Particular Functions. * HAVE_ST_BLKSIZE: Structures. * HAVE_ST_BLOCKS: Structures. * HAVE_ST_RDEV: Structures. -* HAVE_STRCOLL: Particular Functions. -* HAVE_STRFTIME: Particular Functions. * HAVE_SYS_DIR_H: Particular Headers. * HAVE_SYS_NDIR_H: Particular Headers. * HAVE_SYS_WAIT_H: Particular Headers. @@ -5122,9 +5123,9 @@ use these names in `#if' directives. * size_t: Particular Typedefs. * STDC_HEADERS: Particular Headers. * SVR4: Particular Functions. -* SYS_SIGLIST_DECLARED: Particular Headers. * SYSDIR: Particular Headers. * SYSNDIR: Particular Headers. +* SYS_SIGLIST_DECLARED: Particular Headers. * TIME_WITH_SYS_TIME: Structures. * TM_IN_SYS_TIME: Structures. * uid_t: Particular Typedefs. @@ -5135,6 +5136,13 @@ use these names in `#if' directives. * VOID_CLOSEDIR: Particular Headers. * WORDS_BIGENDIAN: Compiler Characteristics. * YYTEXT_POINTER: Particular Programs. +* _ALL_SOURCE: UNIX Variants. +* _MINIX: UNIX Variants. +* _POSIX_1_SOURCE: UNIX Variants. +* _POSIX_SOURCE: UNIX Variants. +* _POSIX_SOURCE: UNIX Variants. +* _POSIX_VERSION: Particular Headers. +* __CHAR_UNSIGNED__: Compiler Characteristics. File: autoconf.info, Node: Macro Index, Prev: Preprocessor Symbol Index, Up: Top @@ -5154,17 +5162,12 @@ list easier to use, the macros are listed without their preceding `AC_'. * ARG_PROGRAM: Transforming Names. * ARG_WITH: External Software. * BEFORE: Suggested Ordering. -* C_BIGENDIAN: Compiler Characteristics. -* C_CHAR_UNSIGNED: Compiler Characteristics. -* C_CONST: Compiler Characteristics. -* C_CROSS: Test Programs. -* C_INLINE: Compiler Characteristics. -* C_LONG_DOUBLE: Compiler Characteristics. * CACHE_CHECK: Caching Results. * CACHE_VAL: Caching Results. * CANONICAL_HOST: Canonicalizing. * CANONICAL_SYSTEM: Canonicalizing. * CHAR_UNSIGNED: Old Macro Names. +* CHECKING: Printing Messages. * CHECK_FUNC: Generic Functions. * CHECK_FUNCS: Generic Functions. * CHECK_HEADER: Generic Headers. @@ -5175,13 +5178,18 @@ list easier to use, the macros are listed without their preceding `AC_'. * CHECK_SIZEOF: Compiler Characteristics. * CHECK_TOOL: Generic Programs. * CHECK_TYPE: Generic Typedefs. -* CHECKING: Printing Messages. * COMPILE_CHECK: Examining Libraries. * CONFIG_AUX_DIR: Input. * CONFIG_HEADER: Configuration Headers. * CONFIG_SUBDIRS: Subdirectories. * CONST: Old Macro Names. * CROSS_CHECK: Old Macro Names. +* C_BIGENDIAN: Compiler Characteristics. +* C_CHAR_UNSIGNED: Compiler Characteristics. +* C_CONST: Compiler Characteristics. +* C_CROSS: Test Programs. +* C_INLINE: Compiler Characteristics. +* C_LONG_DOUBLE: Compiler Characteristics. * DECL_SYS_SIGLIST: Particular Headers. * DECL_YYTEXT: Particular Programs. * DEFINE: Defining Symbols. @@ -5260,6 +5268,11 @@ list easier to use, the macros are listed without their preceding `AC_'. * PREFIX: Old Macro Names. * PREFIX_PROGRAM: Default Prefix. * PREREQ: Versions. +* PROGRAMS_CHECK: Old Macro Names. +* PROGRAMS_PATH: Old Macro Names. +* PROGRAM_CHECK: Old Macro Names. +* PROGRAM_EGREP: Old Macro Names. +* PROGRAM_PATH: Old Macro Names. * PROG_AWK: Particular Programs. * PROG_CC: Particular Programs. * PROG_CC_C_O: Particular Programs. @@ -5273,11 +5286,6 @@ list easier to use, the macros are listed without their preceding `AC_'. * PROG_MAKE_SET: Output. * PROG_RANLIB: Particular Programs. * PROG_YACC: Particular Programs. -* PROGRAM_CHECK: Old Macro Names. -* PROGRAM_EGREP: Old Macro Names. -* PROGRAM_PATH: Old Macro Names. -* PROGRAMS_CHECK: Old Macro Names. -* PROGRAMS_PATH: Old Macro Names. * PROVIDE: Prerequisite Macros. * REMOTE_TAPE: Old Macro Names. * REPLACE_FUNCS: Generic Functions. @@ -5288,15 +5296,12 @@ list easier to use, the macros are listed without their preceding `AC_'. * REVISION: Versions. * RSH: Old Macro Names. * SCO_INTL: UNIX Variants. -* SET_MAKE: Old Macro Names. * SETVBUF_REVERSED: Old Macro Names. -* SIZE_T: Old Macro Names. +* SET_MAKE: Old Macro Names. * SIZEOF_TYPE: Old Macro Names. -* ST_BLKSIZE: Old Macro Names. -* ST_BLOCKS: Old Macro Names. -* ST_RDEV: Old Macro Names. -* STAT_MACROS_BROKEN <1>: Old Macro Names. +* SIZE_T: Old Macro Names. * STAT_MACROS_BROKEN: Structures. +* STAT_MACROS_BROKEN: Old Macro Names. * STDC_HEADERS: Old Macro Names. * STRCOLL: Old Macro Names. * STRUCT_ST_BLKSIZE: Structures. @@ -5304,6 +5309,9 @@ list easier to use, the macros are listed without their preceding `AC_'. * STRUCT_ST_RDEV: Structures. * STRUCT_TIMEZONE: Structures. * STRUCT_TM: Structures. +* ST_BLKSIZE: Old Macro Names. +* ST_BLOCKS: Old Macro Names. +* ST_RDEV: Old Macro Names. * SUBST: Setting Output Variables. * SUBST_FILE: Setting Output Variables. * SYS_INTERPRETER: System Services. @@ -5312,8 +5320,8 @@ list easier to use, the macros are listed without their preceding `AC_'. * SYS_SIGLIST_DECLARED: Old Macro Names. * TEST_CPP: Old Macro Names. * TEST_PROGRAM: Old Macro Names. -* TIME_WITH_SYS_TIME: Old Macro Names. * TIMEZONE: Old Macro Names. +* TIME_WITH_SYS_TIME: Old Macro Names. * TRY_COMPILE: Examining Syntax. * TRY_CPP: Examining Declarations. * TRY_LINK: Examining Libraries. @@ -5342,118 +5350,118 @@ list easier to use, the macros are listed without their preceding `AC_'. Tag Table: -Node: Top1177 -Node: Introduction9316 -Node: Making configure Scripts13156 -Node: Writing configure.in16221 -Node: Invoking autoscan19921 -Node: Invoking ifnames22226 -Node: Invoking autoconf23716 -Node: Invoking autoreconf25554 -Node: Setup27874 -Node: Input28760 -Node: Output30483 -Node: Makefile Substitutions33233 -Node: Preset Output Variables34817 -Node: Build Directories39319 -Node: Automatic Remaking40952 -Node: Configuration Headers43038 -Node: Header Templates45405 -Node: Invoking autoheader46584 -Node: Subdirectories49732 -Node: Default Prefix51075 -Node: Versions52479 -Node: Existing Tests54383 -Node: Alternative Programs55848 -Node: Particular Programs56512 -Node: Generic Programs62362 -Node: Libraries65188 -Node: Library Functions67017 -Node: Particular Functions67575 -Node: Generic Functions73878 -Node: Header Files75721 -Node: Particular Headers76280 -Node: Generic Headers83251 -Node: Structures84553 -Node: Typedefs86790 -Node: Particular Typedefs87294 -Node: Generic Typedefs88494 -Node: Compiler Characteristics88937 -Node: System Services91562 -Node: UNIX Variants93911 -Node: Writing Tests95930 -Node: Examining Declarations97896 -Node: Examining Syntax100366 -Node: Examining Libraries101423 -Node: Run Time103982 -Node: Test Programs104943 -Node: Guidelines107480 -Node: Test Functions108669 -Node: Portable Shell110212 -Node: Testing Values and Files112144 -Node: Multiple Cases113799 -Node: Language Choice114997 -Node: Results116555 -Node: Defining Symbols117314 -Node: Setting Output Variables120323 -Node: Caching Results122169 -Node: Cache Variable Names124450 -Node: Cache Files125999 -Node: Printing Messages128097 -Node: Writing Macros131384 -Node: Macro Definitions132003 -Node: Macro Names133108 -Node: Quoting135559 -Node: Dependencies Between Macros137461 -Node: Prerequisite Macros138093 -Node: Suggested Ordering139548 -Node: Obsolete Macros141078 -Node: Manual Configuration142302 -Node: Specifying Names143201 -Node: Canonicalizing145073 -Node: System Type Variables146387 -Node: Using System Type147134 -Node: Site Configuration148581 -Node: External Software149354 -Node: Package Options152052 -Node: Site Details154319 -Node: Transforming Names155542 -Node: Transformation Options156720 -Node: Transformation Examples157186 -Node: Transformation Rules158754 -Node: Site Defaults160163 -Node: Invoking configure163947 -Node: Basic Installation164896 -Node: Compilers and Options167476 -Node: Multiple Architectures168125 -Node: Installation Names169111 -Node: Optional Features170295 -Node: System Type171065 -Node: Sharing Defaults172087 -Node: Operation Controls172711 -Node: Invoking config.status173586 -Node: Questions176974 -Node: Distributing177506 -Node: Why GNU m4178632 -Node: Bootstrapping179445 -Node: Why Not Imake180061 -Node: Upgrading184470 -Node: Changed File Names185991 -Node: Changed Makefiles186727 -Node: Changed Macros187823 -Node: Invoking autoupdate189070 -Node: Changed Results190661 -Node: Changed Macro Writing192763 -Node: History194026 -Node: Genesis194733 -Node: Exodus195906 -Node: Leviticus198955 -Node: Numbers200478 -Node: Deuteronomy202394 -Node: Old Macro Names205058 -Node: Environment Variable Index208107 -Node: Output Variable Index209109 -Node: Preprocessor Symbol Index213792 -Node: Macro Index218607 +Node: Top1173 +Node: Introduction9312 +Node: Making configure Scripts13152 +Node: Writing configure.in16217 +Node: Invoking autoscan19917 +Node: Invoking ifnames22222 +Node: Invoking autoconf23712 +Node: Invoking autoreconf25550 +Node: Setup27870 +Node: Input28756 +Node: Output30479 +Node: Makefile Substitutions33440 +Node: Preset Output Variables35024 +Node: Build Directories39526 +Node: Automatic Remaking41159 +Node: Configuration Headers43245 +Node: Header Templates45612 +Node: Invoking autoheader46791 +Node: Subdirectories49939 +Node: Default Prefix51282 +Node: Versions52686 +Node: Existing Tests54590 +Node: Alternative Programs56055 +Node: Particular Programs56719 +Node: Generic Programs62569 +Node: Libraries65395 +Node: Library Functions67224 +Node: Particular Functions67782 +Node: Generic Functions74085 +Node: Header Files75928 +Node: Particular Headers76487 +Node: Generic Headers83458 +Node: Structures84760 +Node: Typedefs86997 +Node: Particular Typedefs87501 +Node: Generic Typedefs88701 +Node: Compiler Characteristics89144 +Node: System Services91769 +Node: UNIX Variants94118 +Node: Writing Tests96137 +Node: Examining Declarations98103 +Node: Examining Syntax100573 +Node: Examining Libraries101630 +Node: Run Time104189 +Node: Test Programs105150 +Node: Guidelines107687 +Node: Test Functions108876 +Node: Portable Shell110419 +Node: Testing Values and Files112351 +Node: Multiple Cases114006 +Node: Language Choice115204 +Node: Results116762 +Node: Defining Symbols117521 +Node: Setting Output Variables120530 +Node: Caching Results122376 +Node: Cache Variable Names124657 +Node: Cache Files126206 +Node: Printing Messages128304 +Node: Writing Macros131591 +Node: Macro Definitions132210 +Node: Macro Names133315 +Node: Quoting135766 +Node: Dependencies Between Macros137668 +Node: Prerequisite Macros138300 +Node: Suggested Ordering139755 +Node: Obsolete Macros141285 +Node: Manual Configuration142509 +Node: Specifying Names143408 +Node: Canonicalizing145280 +Node: System Type Variables146594 +Node: Using System Type147341 +Node: Site Configuration148788 +Node: External Software149561 +Node: Package Options152259 +Node: Site Details154526 +Node: Transforming Names155749 +Node: Transformation Options156927 +Node: Transformation Examples157393 +Node: Transformation Rules158961 +Node: Site Defaults160370 +Node: Invoking configure164154 +Node: Basic Installation165103 +Node: Compilers and Options167683 +Node: Multiple Architectures168332 +Node: Installation Names169318 +Node: Optional Features170502 +Node: System Type171272 +Node: Sharing Defaults172294 +Node: Operation Controls172918 +Node: Invoking config.status173793 +Node: Questions177181 +Node: Distributing177713 +Node: Why GNU m4178839 +Node: Bootstrapping179652 +Node: Why Not Imake180268 +Node: Upgrading184677 +Node: Changed File Names186198 +Node: Changed Makefiles186934 +Node: Changed Macros188030 +Node: Invoking autoupdate189277 +Node: Changed Results190868 +Node: Changed Macro Writing192970 +Node: History194233 +Node: Genesis194940 +Node: Exodus196113 +Node: Leviticus199162 +Node: Numbers200685 +Node: Deuteronomy202601 +Node: Old Macro Names205265 +Node: Environment Variable Index208314 +Node: Output Variable Index209316 +Node: Preprocessor Symbol Index214177 +Node: Macro Index219047 End Tag Table diff --git a/src/util/autoconf/autoheader.sh b/src/util/autoconf/autoheader.sh index abbc983500..a9aa446934 100644 --- a/src/util/autoconf/autoheader.sh +++ b/src/util/autoconf/autoheader.sh @@ -85,6 +85,7 @@ if test $show_version = yes; then fi TEMPLATES="${AC_MACRODIR}/acconfig.h" +test -r ./acconfig.h && TEMPLATES="${TEMPLATES} ./acconfig.h" test -r $localdir/acconfig.h && TEMPLATES="${TEMPLATES} $localdir/acconfig.h" case $# in diff --git a/src/util/autoconf/configure b/src/util/autoconf/configure index 65ce229d57..d5f3ea864a 100644 --- a/src/util/autoconf/configure +++ b/src/util/autoconf/configure @@ -906,7 +906,19 @@ for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then esac echo creating "$ac_file" rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." + # allow for outfile[:infile1[+infile2[+infile3...]]] syntax + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}+" + ac_files_in= + for ac_file_name in $ac_file_in + do + ac_files_in="$ac_files_in $ac_given_srcdir/$ac_file_name" + done + IFS="$ac_save_ifs" + configure_input=`echo $ac_files_in | sed 's%/./%/%g +s% *./%%g +s% *%+%g +s%^%Generated automatically from % +s%$% by configure.%'` case "$ac_file" in *Makefile*) ac_comsub="1i\\ # $configure_input" ;; @@ -917,7 +929,7 @@ s%@configure_input@%$configure_input%g s%@srcdir@%$srcdir%g s%@top_srcdir@%$top_srcdir%g s%@INSTALL@%$INSTALL%g -" -f conftest.subs $ac_given_srcdir/$ac_file_in > $ac_file +" -f conftest.subs $ac_files_in > $ac_file fi; done rm -f conftest.subs diff --git a/src/util/autoconf/standards.info b/src/util/autoconf/standards.info index c673f9261f..a51f1e2d1a 100644 --- a/src/util/autoconf/standards.info +++ b/src/util/autoconf/standards.info @@ -1,4 +1,4 @@ -This is Info file standards.info, produced by Makeinfo-1.63 from the +This is Info file standards.info, produced by Makeinfo-1.55 from the input file ./standards.texi. START-INFO-DIR-ENTRY diff --git a/src/util/configure.in b/src/util/configure.in index 77abe3b78b..ef2f3635e0 100644 --- a/src/util/configure.in +++ b/src/util/configure.in @@ -18,15 +18,6 @@ esac SHLIB_TAIL_COMP=$krb5_cv_shlibs_tail_comp AC_SUBST(SHLIB_TAIL_COMP) -dbdir= -WITH_ANAME_DB -if test "$dbval" = db; then - db=berk_db -else - WITH_KDB_DB - if test "$dbval" = db; then db=berk_db; fi -fi - -CONFIG_DIRS(et ss profile $db pty) +CONFIG_DIRS(et ss profile pty dyn db2) DO_SUBDIRS V5_AC_OUTPUT_MAKEFILE diff --git a/src/util/db2/CHANGELOG b/src/util/db2/CHANGELOG new file mode 100644 index 0000000000..abd05f95d4 --- /dev/null +++ b/src/util/db2/CHANGELOG @@ -0,0 +1,123 @@ +db2-alpha.0 -> db2-alpha.1 + This fixes a number of bugs in the alpha release. + 1. 64-bit functionality. The test suite now runs on alphas. + Memory leak fixed. + Pairs no longer disappear when pages are exactly full. + Flush meta-data correctly on sync. +1.86 -> db2-alpha.0 + This is an interim release. We are in the process of + adding logging, locking, and transaction support to all + the db access methods. This will necessitate database + format changes, interface changes, and major upheaval. + In the meantime, this PRELIMINARY release is to correct + some known bugs in the hash pacakge. This release uses + a different page format from 1.86, so you will need to + dump and reload any existing databases if you upgrade + to this (and may have to do it again when 2.0 becomes + available. +1.85 -> 1.86 + btree: Fix to split code for single large record at the end of a + page. +1.84 -> 1.85 + recno: #ifdef out use of mmap, it's not portable enough. + +1.83 -> 1.84 Thu Aug 18 15:46:07 EDT 1994 + recno: Rework fixed-length records so that closing and reopening + the file now works. Pad short records on input. Never do + signed comparison in recno input reading functions. + +1.82 -> 1.83 Tue Jul 26 15:33:44 EDT 1994 + btree: Rework cursor deletion code yet again; bugs with + deleting empty pages that only contained the cursor + record. + +1.81 -> 1.82 Sat Jul 16 11:01:50 EDT 1994 + btree: Fix bugs introduced by new cursor/deletion code. + Replace return kbuf/dbuf with real DBT's. + +1.80 -> 1.81 + btree: Fix bugs introduced by new cursor/deletion code. + all: Add #defines for Purify. + +1.79 -> 1.80 Wed Jul 13 22:41:54 EDT 1994 + btree Change deletion to coalesce empty pages. This is a major + change, cursors and duplicate pages all had to be reworked. + Return to a fixed stack. + recno: Affected by cursor changes. New cursor structures should + permit multiple cursors in the future. + +1.78 -> 1.79 Mon Jun 20 17:36:47 EDT 1994 + all: Minor cleanups of 1.78 for porting reasons; only + major change was inlining check of NULL pointer + so that __fix_realloc goes away. + +1.77 -> 1.78 Thu Jun 16 19:06:43 EDT 1994 + all: Move "standard" size typedef's into db.h. + +1.76 -> 1.77 Thu Jun 16 16:48:38 EDT 1994 + hash: Delete __init_ routine, has special meaning to OSF 2.0. + +1.74 -> 1.76 + all: Finish up the port to the Alpha. + +1.73 -> 1.74 + recno: Don't put the record if rec_search fails, in rec_rdelete. + Create fixed-length intermediate records past "end" of DB + correctly. + Realloc bug when reading in fixed records. + all: First cut at port to Alpha (64-bit architecture) using + 4.4BSD basic integral types typedef's. + Cast allocation pointers to shut up old compilers. + Rework PORT directory into OS/machine directories. + +1.72 -> 1.73 + btree: If enough duplicate records were inserted and then deleted + that internal pages had references to empty pages of the + duplicate keys, the search function ended up on the wrong + page. + +1.7 -> 1.72 12 Oct 1993 + hash: Support NET/2 hash formats. + +1.7 -> 1.71 16 Sep 1993 + btree/recno: + Fix bug in internal search routines that caused + return of invalid pointers. + +1.6 -> 1.7 07 Sep 1993 + hash: Fixed big key overflow bugs. + test: Portability hacks, rewrite test script, Makefile. + btree/recno: + Stop copying non-overflow key/data pairs. + PORT: Break PORT directory up into per architecture/OS + subdirectories. + +1.5 -> 1.6 06 Jun 1993 + hash: In PAIRFITS, the first comparison should look at (P)[2]. + The hash_realloc function was walking off the end of memory. + The overflow page number was wrong when bumping splitpoint. + +1.4 -> 1.5 23 May 1993 + hash: Set hash default fill factor dynamically. + recno: Fixed bug in sorted page splits. + Add page size parameter support. + Allow recno to specify the name of the underlying btree; + used for vi recovery. + btree/recno: + Support 64K pages. + btree/hash/recno: + Provide access to an underlying file descriptor. + Change sync routines to take a flag argument, recno + uses this to sync out the underlying btree. + +1.3 -> 1.4 10 May 1993 + recno: Delete the R_CURSORLOG flag from the recno interface. + Zero-length record fix for non-mmap reads. + Try and make SIZE_T_MAX test in open portable. + +1.2 -> 1.3 01 May 1993 + btree: Ignore user byte-order setting when reading already + existing database. Fixes to byte-order conversions. + +1.1 -> 1.2 15 Apr 1993 + No bug fixes, only compatibility hacks. diff --git a/src/util/db2/ChangeLog b/src/util/db2/ChangeLog new file mode 100644 index 0000000000..9d240ec940 --- /dev/null +++ b/src/util/db2/ChangeLog @@ -0,0 +1,77 @@ +Fri Jun 21 00:07:57 1996 Marc Horowitz <marc@mit.edu> + + * hash/dbm.c (delete, store): dbm_rdonly() doesn't exist on some + systems. In addition, the handle is really a DB handle, so it + would break if it did exist. Remove calls to it. + +Wed Apr 10 21:39:54 1996 Marc Horowitz <marc@mit.edu> + + * hash/hash_page.c (__addel): It is possible to damage a page if a + bigpair is added and there's not enough room. Check to make sure + there's enough room before adding anything. + + * hash/hash.c (hdestroy, cursor_delete): there were still a few + things in the hashp which weren't being freed, causing a small + memory leak. + +Sun Apr 7 01:40:54 1996 Marc Horowitz <marc@mit.edu> + + * clib/mk{,s}temp.c: renamed to accurately reflect the function + being provided (ultrix 4.2 has one, but not the other). + + * [way too many files to list here]: rename pgno_t to db_pgno_t, + since this symbol is defined in <sys/types> on at least one OS to + a non-compatible type (irix 5.2 defines it as long; db wants it to + be u_int32_t). + + * hash/dbm.c, include/db-ndbm.h: use and reference the compat + ndbm.h file + + * btree/bt_open.c, hash/hash.c, hash/hash_page.c, + include/db-int.h, include/db.h: build fixes - use configure to set + db internal cpp symbols for endianness stuff, move __P definition + from db-int.h to db.h. + + * configure.in, acconfig.h, Makefile.in, obj/configure.in, + obj/acconfig.in, obj/Makefile.in: rearrange the configure inputs + to deal properly with configure at the top level, and with a + multiarchitecture build using VPATH + +Sat Apr 6 16:43:26 1996 Marc Horowitz <marc@mit.edu> + + * obj/Makefile.in: random cleanup + + * btree/*.c db/db.c hash/*.c mpool/mpool.c recno/*.c + test/SEQ_TEST/t.c test/dbtest.c test/*/*.c: use "db-int.h" instead + of "db.h". + + * include/db.h, include/db-int.h: rototilled to be portable and + sensible, using configure whenever possible. + + * btree/*.c db/db.c hash/*.c mpool/mpool.c recno/*.c + test/SEQ_TEST/t.c test/dbtest.c test/*/*.c: use "db.h" instead of + <db.h>. + + * hash/hash.h, btree/btree.h, mpool/mpool.c: #include "mpool.h" + instead of <mpool.h>. + + * test/hash1.tests/thash4.c: remove unused and nonportable + <sys/timeb.h> + + * test/hash2.tests/bigtest.c: replace <malloc.h> with <stdlib.h> + + * clib/memmove.c: remove <sys/cdefs.h> + + * mpool/mpool.c, mpool/mpool.h, hash/hash.h, include/db-queue.h: + include "db-queue.h" instead of <sys/queue.h>, since it's not part + of any OS standard. + + * obj/*: first attempt at autoconfiscation + + * test/hash1.tests/driver2.c (main), test/hash1.tests/tseq.c + (main): replace berkeley memoryisms with ansi ones. + + * btree/bt_open.c (tmp): use sprintf instead of snprintf(). + conditionalize signal stuff on SIG_BLOCK instead of using special + magic in a header file. + diff --git a/src/util/db2/Makefile.in b/src/util/db2/Makefile.in new file mode 100644 index 0000000000..3e6344baa2 --- /dev/null +++ b/src/util/db2/Makefile.in @@ -0,0 +1,12 @@ +SHELL = /bin/sh + +SUBDIRS = obj + +all clean distclean realclean check:: + for i in ${SUBDIRS}; do (cd $$i; ${MAKE} $@) || exit 1; done + +distclean:: + rm -rf config.status config.log config.cache db-config.h + +realclean:: + rm -rf configure db-config.h.in diff --git a/src/util/db2/Makefile.inc b/src/util/db2/Makefile.inc new file mode 100644 index 0000000000..77af9c5123 --- /dev/null +++ b/src/util/db2/Makefile.inc @@ -0,0 +1,10 @@ +# @(#)Makefile.inc 8.2 (Berkeley) 2/21/94 +# +CFLAGS+=-D__DBINTERFACE_PRIVATE + +.include "${.CURDIR}/db/btree/Makefile.inc" +.include "${.CURDIR}/db/db/Makefile.inc" +.include "${.CURDIR}/db/hash/Makefile.inc" +.include "${.CURDIR}/db/man/Makefile.inc" +.include "${.CURDIR}/db/mpool/Makefile.inc" +.include "${.CURDIR}/db/recno/Makefile.inc" diff --git a/src/util/db2/README b/src/util/db2/README new file mode 100644 index 0000000000..5700b73932 --- /dev/null +++ b/src/util/db2/README @@ -0,0 +1,41 @@ +# @(#)README 8.28 (Berkeley) 11/2/95 + +This is version 2.0-ALPHA of the Berkeley DB code. +THIS IS A PRELIMINARY RELEASE. + +For information on compiling and installing this software, see the file +PORT/README. + +Newer versions of this software will periodically be made available by +anonymous ftp from ftp.cs.berkeley.edu:ucb/4bsd/db.tar.{Z,gz} and from +ftp.harvard.edu:margo/db.tar.{Z,gz}. If you want to receive announcements +of future releases of this software, send email to the contact address +below. + +Email questions may be addressed to dbinfo@eecs.harvard.edu. + +============================================ +Distribution contents: + +README This file. +CHANGELOG List of changes, per version. +btree B+tree access method. +db The db_open interface routine. +docs Various USENIX papers, and the formatted manual pages. +hash Extended linear hashing access method. +lock Lock manager. +log Log manager. +man The unformatted manual pages. +mpool The buffer manager support. +mutex Mutex support. +recno The fixed/variable length record access method. +test Test package. +txn Transaction support. + +============================================ +Debugging: + +If you're running a memory checker (e.g. Purify) on DB, make sure that +you recompile it with "-DPURIFY" in the CFLAGS, first. By default, +allocated pages are not initialized by the DB code, and they will show +up as reads of uninitialized memory in the buffer write routines. diff --git a/src/util/db2/acconfig.h b/src/util/db2/acconfig.h new file mode 100644 index 0000000000..d36e23fe28 --- /dev/null +++ b/src/util/db2/acconfig.h @@ -0,0 +1,17 @@ +#undef ssize_t + +/* BSD4.3, non-posix types */ + +#undef u_char +#undef u_short +#undef u_int +#undef u_long + +/* sized types used by db internals */ + +#undef int8_t +#undef u_int8_t +#undef int16_t +#undef u_int16_t +#undef int32_t +#undef u_int32_t diff --git a/src/util/db2/btree/Makefile.inc b/src/util/db2/btree/Makefile.inc new file mode 100644 index 0000000000..8ed76494aa --- /dev/null +++ b/src/util/db2/btree/Makefile.inc @@ -0,0 +1,7 @@ +# @(#)Makefile.inc 8.2 (Berkeley) 7/14/94 + +.PATH: ${.CURDIR}/db/btree + +SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \ + bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \ + bt_utils.c diff --git a/src/util/db2/btree/bt_close.c b/src/util/db2/btree/bt_close.c new file mode 100644 index 0000000000..b731dcb5d5 --- /dev/null +++ b/src/util/db2/btree/bt_close.c @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_close.c 8.7 (Berkeley) 8/17/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "db-int.h" +#include "btree.h" + +static int bt_meta __P((BTREE *)); + +/* + * BT_CLOSE -- Close a btree. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_close(dbp) + DB *dbp; +{ + BTREE *t; + int fd; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Sync the tree. */ + if (__bt_sync(dbp, 0) == RET_ERROR) + return (RET_ERROR); + + /* Close the memory pool. */ + if (mpool_close(t->bt_mp) == RET_ERROR) + return (RET_ERROR); + + /* Free random memory. */ + if (t->bt_cursor.key.data != NULL) { + free(t->bt_cursor.key.data); + t->bt_cursor.key.size = 0; + t->bt_cursor.key.data = NULL; + } + if (t->bt_rkey.data) { + free(t->bt_rkey.data); + t->bt_rkey.size = 0; + t->bt_rkey.data = NULL; + } + if (t->bt_rdata.data) { + free(t->bt_rdata.data); + t->bt_rdata.size = 0; + t->bt_rdata.data = NULL; + } + + fd = t->bt_fd; + free(t); + free(dbp); + return (close(fd) ? RET_ERROR : RET_SUCCESS); +} + +/* + * BT_SYNC -- sync the btree to disk. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__bt_sync(dbp, flags) + const DB *dbp; + u_int flags; +{ + BTREE *t; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Sync doesn't currently take any flags. */ + if (flags != 0) { + errno = EINVAL; + return (RET_ERROR); + } + + if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED)) + return (RET_SUCCESS); + + if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) + return (RET_ERROR); + + if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS) + F_CLR(t, B_MODIFIED); + + return (status); +} + +/* + * BT_META -- write the tree meta data to disk. + * + * Parameters: + * t: tree + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +bt_meta(t) + BTREE *t; +{ + BTMETA m; + void *p; + + if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL) + return (RET_ERROR); + + /* Fill in metadata. */ + m.magic = BTREEMAGIC; + m.version = BTREEVERSION; + m.psize = t->bt_psize; + m.free = t->bt_free; + m.nrecs = t->bt_nrecs; + m.flags = F_ISSET(t, SAVEMETA); + + memmove(p, &m, sizeof(BTMETA)); + mpool_put(t->bt_mp, p, MPOOL_DIRTY); + return (RET_SUCCESS); +} diff --git a/src/util/db2/btree/bt_conv.c b/src/util/db2/btree/bt_conv.c new file mode 100644 index 0000000000..6cfa216ca8 --- /dev/null +++ b/src/util/db2/btree/bt_conv.c @@ -0,0 +1,221 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> + +#include "db-int.h" +#include "btree.h" + +static void mswap __P((PAGE *)); + +/* + * __BT_BPGIN, __BT_BPGOUT -- + * Convert host-specific number layout to/from the host-independent + * format stored on disk. + * + * Parameters: + * t: tree + * pg: page number + * h: page to convert + */ +void +__bt_pgin(t, pg, pp) + void *t; + db_pgno_t pg; + void *pp; +{ + PAGE *h; + indx_t i, top; + u_char flags; + char *p; + + if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) + return; + if (pg == P_META) { + mswap(pp); + return; + } + + h = pp; + M_32_SWAP(h->pgno); + M_32_SWAP(h->prevpg); + M_32_SWAP(h->nextpg); + M_32_SWAP(h->flags); + M_16_SWAP(h->lower); + M_16_SWAP(h->upper); + + top = NEXTINDEX(h); + if ((h->flags & P_TYPE) == P_BINTERNAL) + for (i = 0; i < top; i++) { + M_16_SWAP(h->linp[i]); + p = (char *)GETBINTERNAL(h, i); + P_32_SWAP(p); + p += sizeof(u_int32_t); + P_32_SWAP(p); + p += sizeof(db_pgno_t); + if (*(u_char *)p & P_BIGKEY) { + p += sizeof(u_char); + P_32_SWAP(p); + p += sizeof(db_pgno_t); + P_32_SWAP(p); + } + } + else if ((h->flags & P_TYPE) == P_BLEAF) + for (i = 0; i < top; i++) { + M_16_SWAP(h->linp[i]); + p = (char *)GETBLEAF(h, i); + P_32_SWAP(p); + p += sizeof(u_int32_t); + P_32_SWAP(p); + p += sizeof(u_int32_t); + flags = *(u_char *)p; + if (flags & (P_BIGKEY | P_BIGDATA)) { + p += sizeof(u_char); + if (flags & P_BIGKEY) { + P_32_SWAP(p); + p += sizeof(db_pgno_t); + P_32_SWAP(p); + } + if (flags & P_BIGDATA) { + p += sizeof(u_int32_t); + P_32_SWAP(p); + p += sizeof(db_pgno_t); + P_32_SWAP(p); + } + } + } +} + +void +__bt_pgout(t, pg, pp) + void *t; + db_pgno_t pg; + void *pp; +{ + PAGE *h; + indx_t i, top; + u_char flags; + char *p; + + if (!F_ISSET(((BTREE *)t), B_NEEDSWAP)) + return; + if (pg == P_META) { + mswap(pp); + return; + } + + h = pp; + top = NEXTINDEX(h); + if ((h->flags & P_TYPE) == P_BINTERNAL) + for (i = 0; i < top; i++) { + p = (char *)GETBINTERNAL(h, i); + P_32_SWAP(p); + p += sizeof(u_int32_t); + P_32_SWAP(p); + p += sizeof(db_pgno_t); + if (*(u_char *)p & P_BIGKEY) { + p += sizeof(u_char); + P_32_SWAP(p); + p += sizeof(db_pgno_t); + P_32_SWAP(p); + } + M_16_SWAP(h->linp[i]); + } + else if ((h->flags & P_TYPE) == P_BLEAF) + for (i = 0; i < top; i++) { + p = (char *)GETBLEAF(h, i); + P_32_SWAP(p); + p += sizeof(u_int32_t); + P_32_SWAP(p); + p += sizeof(u_int32_t); + flags = *(u_char *)p; + if (flags & (P_BIGKEY | P_BIGDATA)) { + p += sizeof(u_char); + if (flags & P_BIGKEY) { + P_32_SWAP(p); + p += sizeof(db_pgno_t); + P_32_SWAP(p); + } + if (flags & P_BIGDATA) { + p += sizeof(u_int32_t); + P_32_SWAP(p); + p += sizeof(db_pgno_t); + P_32_SWAP(p); + } + } + M_16_SWAP(h->linp[i]); + } + + M_32_SWAP(h->pgno); + M_32_SWAP(h->prevpg); + M_32_SWAP(h->nextpg); + M_32_SWAP(h->flags); + M_16_SWAP(h->lower); + M_16_SWAP(h->upper); +} + +/* + * MSWAP -- Actually swap the bytes on the meta page. + * + * Parameters: + * p: page to convert + */ +static void +mswap(pg) + PAGE *pg; +{ + char *p; + + p = (char *)pg; + P_32_SWAP(p); /* magic */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* version */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* psize */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* free */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* nrecs */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* flags */ + p += sizeof(u_int32_t); +} diff --git a/src/util/db2/btree/bt_debug.c b/src/util/db2/btree/bt_debug.c new file mode 100644 index 0000000000..8cf1cdaf53 --- /dev/null +++ b/src/util/db2/btree/bt_debug.c @@ -0,0 +1,372 @@ +/*- + * Copyright (c) 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_debug.c 8.6 (Berkeley) 1/9/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "btree.h" + +#if defined(DEBUG) || defined(STATISTICS) + +static FILE *tracefp; + +/* + * __bt_dinit -- + * initialize debugging. + */ +static void +__bt_dinit() +{ + static int first = 1; + char buf[1024]; + + if (!first) + return; + first = 0; + +#ifndef TRACE_TO_STDERR + if ((tracefp = fopen("/tmp/__bt_debug", "w")) != NULL) + return; +#endif + tracefp = stderr; +} +#endif + +#ifdef DEBUG +/* + * __bt_dump -- + * dump the tree + * + * Parameters: + * dbp: pointer to the DB + */ +int +__bt_dump(dbp) + DB *dbp; +{ + BTREE *t; + PAGE *h; + db_pgno_t i; + char *sep; + + __bt_dinit(); + + t = dbp->internal; + (void)fprintf(tracefp, "%s: pgsz %d", + F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); + if (F_ISSET(t, R_RECNO)) + (void)fprintf(tracefp, " keys %lu", t->bt_nrecs); +#undef X +#define X(flag, name) \ + if (F_ISSET(t, flag)) { \ + (void)fprintf(tracefp, "%s%s", sep, name); \ + sep = ", "; \ + } + if (t->flags != 0) { + sep = " flags ("; + X(R_FIXLEN, "FIXLEN"); + X(B_INMEM, "INMEM"); + X(B_NODUPS, "NODUPS"); + X(B_RDONLY, "RDONLY"); + X(R_RECNO, "RECNO"); + X(B_METADIRTY,"METADIRTY"); + (void)fprintf(tracefp, ")\n"); + } +#undef X + + for (i = P_ROOT; + (h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i) + __bt_dpage(h); + (void)fflush(tracefp); + return (0); +} + +/* + * BT_DMPAGE -- Dump the meta page + * + * Parameters: + * h: pointer to the PAGE + */ +int +__bt_dmpage(h) + PAGE *h; +{ + BTMETA *m; + char *sep; + + __bt_dinit(); + + m = (BTMETA *)h; + (void)fprintf(tracefp, "magic %lx\n", m->magic); + (void)fprintf(tracefp, "version %lu\n", m->version); + (void)fprintf(tracefp, "psize %lu\n", m->psize); + (void)fprintf(tracefp, "free %lu\n", m->free); + (void)fprintf(tracefp, "nrecs %lu\n", m->nrecs); + (void)fprintf(tracefp, "flags %lu", m->flags); +#undef X +#define X(flag, name) \ + if (m->flags & flag) { \ + (void)fprintf(tracefp, "%s%s", sep, name); \ + sep = ", "; \ + } + if (m->flags) { + sep = " ("; + X(B_NODUPS, "NODUPS"); + X(R_RECNO, "RECNO"); + (void)fprintf(tracefp, ")"); + } + (void)fflush(tracefp); + return (0); +} + +/* + * BT_DNPAGE -- Dump the page + * + * Parameters: + * n: page number to dump. + */ +int +__bt_dnpage(dbp, pgno) + DB *dbp; + db_pgno_t pgno; +{ + BTREE *t; + PAGE *h; + + __bt_dinit(); + + t = dbp->internal; + if ((h = mpool_get(t->bt_mp, pgno, MPOOL_IGNOREPIN)) != NULL) + __bt_dpage(h); + (void)fflush(tracefp); + return (0); +} + +/* + * BT_DPAGE -- Dump the page + * + * Parameters: + * h: pointer to the PAGE + */ +int +__bt_dpage(h) + PAGE *h; +{ + BINTERNAL *bi; + BLEAF *bl; + RINTERNAL *ri; + RLEAF *rl; + indx_t cur, top; + char *sep; + + __bt_dinit(); + + (void)fprintf(tracefp, " page %d: (", h->pgno); +#undef X +#define X(flag, name) \ + if (h->flags & flag) { \ + (void)fprintf(tracefp, "%s%s", sep, name); \ + sep = ", "; \ + } + sep = ""; + X(P_BINTERNAL, "BINTERNAL") /* types */ + X(P_BLEAF, "BLEAF") + X(P_RINTERNAL, "RINTERNAL") /* types */ + X(P_RLEAF, "RLEAF") + X(P_OVERFLOW, "OVERFLOW") + X(P_PRESERVE, "PRESERVE"); + (void)fprintf(tracefp, ")\n"); +#undef X + + (void)fprintf(tracefp, "\tprev %2d next %2d", h->prevpg, h->nextpg); + if (h->flags & P_OVERFLOW) + return; + + top = NEXTINDEX(h); + (void)fprintf(tracefp, " lower %3d upper %3d nextind %d\n", + h->lower, h->upper, top); + for (cur = 0; cur < top; cur++) { + (void)fprintf(tracefp, "\t[%03d] %4d ", cur, h->linp[cur]); + switch (h->flags & P_TYPE) { + case P_BINTERNAL: + bi = GETBINTERNAL(h, cur); + (void)fprintf(tracefp, + "size %03d pgno %03d", bi->ksize, bi->pgno); + if (bi->flags & P_BIGKEY) + (void)fprintf(tracefp, " (indirect)"); + else if (bi->ksize) + (void)fprintf(tracefp, + " {%.*s}", (int)bi->ksize, bi->bytes); + break; + case P_RINTERNAL: + ri = GETRINTERNAL(h, cur); + (void)fprintf(tracefp, "entries %03d pgno %03d", + ri->nrecs, ri->pgno); + break; + case P_BLEAF: + bl = GETBLEAF(h, cur); + if (bl->flags & P_BIGKEY) + (void)fprintf(tracefp, + "big key page %lu size %u/", + *(db_pgno_t *)bl->bytes, + *(u_int32_t *)(bl->bytes + sizeof(db_pgno_t))); + else if (bl->ksize) + (void)fprintf(tracefp, "%s/", bl->bytes); + if (bl->flags & P_BIGDATA) + (void)fprintf(tracefp, + "big data page %lu size %u", + *(db_pgno_t *)(bl->bytes + bl->ksize), + *(u_int32_t *)(bl->bytes + bl->ksize + + sizeof(db_pgno_t))); + else if (bl->dsize) + (void)fprintf(tracefp, "%.*s", + (int)bl->dsize, bl->bytes + bl->ksize); + break; + case P_RLEAF: + rl = GETRLEAF(h, cur); + if (rl->flags & P_BIGDATA) + (void)fprintf(tracefp, + "big data page %lu size %u", + *(db_pgno_t *)rl->bytes, + *(u_int32_t *)(rl->bytes + sizeof(db_pgno_t))); + else if (rl->dsize) + (void)fprintf(tracefp, + "%.*s", (int)rl->dsize, rl->bytes); + break; + } + (void)fprintf(tracefp, "\n"); + } + (void)fflush(tracefp); + return (0); +} +#endif + +#ifdef STATISTICS +/* + * bt_stat -- + * Gather/print the tree statistics + * + * Parameters: + * dbp: pointer to the DB + */ +int +__bt_stat(dbp) + DB *dbp; +{ + extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit; + extern u_long bt_sortsplit, bt_split; + BTREE *t; + PAGE *h; + db_pgno_t i, pcont, pinternal, pleaf; + u_long ifree, lfree, nkeys; + int levels; + + __bt_dinit(); + + t = dbp->internal; + pcont = pinternal = pleaf = 0; + nkeys = ifree = lfree = 0; + for (i = P_ROOT; + (h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i) + switch (h->flags & P_TYPE) { + case P_BINTERNAL: + case P_RINTERNAL: + ++pinternal; + ifree += h->upper - h->lower; + break; + case P_BLEAF: + case P_RLEAF: + ++pleaf; + lfree += h->upper - h->lower; + nkeys += NEXTINDEX(h); + break; + case P_OVERFLOW: + ++pcont; + break; + } + + /* Count the levels of the tree. */ + for (i = P_ROOT, levels = 0 ;; ++levels) { + h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN); + if (h->flags & (P_BLEAF|P_RLEAF)) { + if (levels == 0) + levels = 1; + break; + } + i = F_ISSET(t, R_RECNO) ? + GETRINTERNAL(h, 0)->pgno : + GETBINTERNAL(h, 0)->pgno; + } + + (void)fprintf(tracefp, "%d level%s with %ld keys", + levels, levels == 1 ? "" : "s", nkeys); + if (F_ISSET(t, R_RECNO)) + (void)fprintf(tracefp, " (%ld header count)", t->bt_nrecs); + (void)fprintf(tracefp, + "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n", + pinternal + pleaf + pcont, pleaf, pinternal, pcont); + (void)fprintf(tracefp, "%ld cache hits, %ld cache misses\n", + bt_cache_hit, bt_cache_miss); + (void)fprintf(tracefp, + "%ld splits (%ld root splits, %ld sort splits)\n", + bt_split, bt_rootsplit, bt_sortsplit); + pleaf *= t->bt_psize - BTDATAOFF; + if (pleaf) + (void)fprintf(tracefp, + "%.0f%% leaf fill (%ld bytes used, %ld bytes free)\n", + ((double)(pleaf - lfree) / pleaf) * 100, + pleaf - lfree, lfree); + pinternal *= t->bt_psize - BTDATAOFF; + if (pinternal) + (void)fprintf(tracefp, + "%.0f%% internal fill (%ld bytes used, %ld bytes free\n", + ((double)(pinternal - ifree) / pinternal) * 100, + pinternal - ifree, ifree); + if (bt_pfxsaved) + (void)fprintf(tracefp, "prefix checking removed %lu bytes.\n", + bt_pfxsaved); + (void)fflush(tracefp); + return (0); +} +#endif diff --git a/src/util/db2/btree/bt_delete.c b/src/util/db2/btree/bt_delete.c new file mode 100644 index 0000000000..078981c58b --- /dev/null +++ b/src/util/db2/btree/bt_delete.c @@ -0,0 +1,657 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include "db-int.h" +#include "btree.h" + +static int __bt_bdelete __P((BTREE *, const DBT *)); +static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int)); +static int __bt_pdelete __P((BTREE *, PAGE *)); +static int __bt_relink __P((BTREE *, PAGE *)); +static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *)); + +/* + * __bt_delete + * Delete the item(s) referenced by a key. + * + * Return RET_SPECIAL if the key is not found. + */ +int +__bt_delete(dbp, key, flags) + const DB *dbp; + const DBT *key; + u_int flags; +{ + BTREE *t; + CURSOR *c; + PAGE *h; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Check for change to a read-only tree. */ + if (F_ISSET(t, B_RDONLY)) { + errno = EPERM; + return (RET_ERROR); + } + + switch (flags) { + case 0: + status = __bt_bdelete(t, key); + break; + case R_CURSOR: + /* + * If flags is R_CURSOR, delete the cursor. Must already + * have started a scan and not have already deleted it. + */ + c = &t->bt_cursor; + if (F_ISSET(c, CURS_INIT)) { + if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) + return (RET_ERROR); + + /* + * If the page is about to be emptied, we'll need to + * delete it, which means we have to acquire a stack. + */ + if (NEXTINDEX(h) == 1) + if (__bt_stkacq(t, &h, &t->bt_cursor)) + return (RET_ERROR); + + status = __bt_dleaf(t, NULL, h, c->pg.index); + + if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + } else + mpool_put(t->bt_mp, + h, status == RET_SUCCESS ? MPOOL_DIRTY : 0); + break; + } + /* FALLTHROUGH */ + default: + errno = EINVAL; + return (RET_ERROR); + } + if (status == RET_SUCCESS) + F_SET(t, B_MODIFIED); + return (status); +} + +/* + * __bt_stkacq -- + * Acquire a stack so we can delete a cursor entry. + * + * Parameters: + * t: tree + * hp: pointer to current, pinned PAGE pointer + * c: pointer to the cursor + * + * Returns: + * 0 on success, 1 on failure + */ +static int +__bt_stkacq(t, hp, c) + BTREE *t; + PAGE **hp; + CURSOR *c; +{ + BINTERNAL *bi; + EPG *e; + EPGNO *parent; + PAGE *h; + indx_t index; + db_pgno_t pgno; + recno_t nextpg, prevpg; + int exact, level; + + /* + * Find the first occurrence of the key in the tree. Toss the + * currently locked page so we don't hit an already-locked page. + */ + h = *hp; + mpool_put(t->bt_mp, h, 0); + if ((e = __bt_search(t, &c->key, &exact)) == NULL) + return (1); + h = e->page; + + /* See if we got it in one shot. */ + if (h->pgno == c->pg.pgno) + goto ret; + + /* + * Move right, looking for the page. At each move we have to move + * up the stack until we don't have to move to the next page. If + * we have to change pages at an internal level, we have to fix the + * stack back up. + */ + while (h->pgno != c->pg.pgno) { + if ((nextpg = h->nextpg) == P_INVALID) + break; + mpool_put(t->bt_mp, h, 0); + + /* Move up the stack. */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != NEXTINDEX(h) - 1) { + index = parent->index + 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + BT_PUSH(t, pgno, 0); + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + index = 0; + } + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL) + return (1); + } + + if (h->pgno == c->pg.pgno) + goto ret; + + /* Reacquire the original stack. */ + mpool_put(t->bt_mp, h, 0); + if ((e = __bt_search(t, &c->key, &exact)) == NULL) + return (1); + h = e->page; + + /* + * Move left, looking for the page. At each move we have to move + * up the stack until we don't have to change pages to move to the + * next page. If we have to change pages at an internal level, we + * have to fix the stack back up. + */ + while (h->pgno != c->pg.pgno) { + if ((prevpg = h->prevpg) == P_INVALID) + break; + mpool_put(t->bt_mp, h, 0); + + /* Move up the stack. */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != 0) { + index = parent->index - 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + + index = NEXTINDEX(h) - 1; + BT_PUSH(t, pgno, index); + } + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL) + return (1); + } + + +ret: mpool_put(t->bt_mp, h, 0); + return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL); +} + +/* + * __bt_bdelete -- + * Delete all key/data pairs matching the specified key. + * + * Parameters: + * t: tree + * key: key to delete + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +static int +__bt_bdelete(t, key) + BTREE *t; + const DBT *key; +{ + EPG *e; + PAGE *h; + int deleted, exact, redo; + + deleted = 0; + + /* Find any matching record; __bt_search pins the page. */ +loop: if ((e = __bt_search(t, key, &exact)) == NULL) + return (deleted ? RET_SUCCESS : RET_ERROR); + if (!exact) { + mpool_put(t->bt_mp, e->page, 0); + return (deleted ? RET_SUCCESS : RET_SPECIAL); + } + + /* + * Delete forward, then delete backward, from the found key. If + * there are duplicates and we reach either side of the page, do + * the key search again, so that we get them all. + */ + redo = 0; + h = e->page; + do { + if (__bt_dleaf(t, key, h, e->index)) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + if (F_ISSET(t, B_NODUPS)) { + if (NEXTINDEX(h) == 0) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + } else + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); + } + deleted = 1; + } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); + + /* Check for right-hand edge of the page. */ + if (e->index == NEXTINDEX(h)) + redo = 1; + + /* Delete from the key to the beginning of the page. */ + while (e->index-- > 0) { + if (__bt_cmp(t, key, e) != 0) + break; + if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + if (e->index == 0) + redo = 1; + } + + /* Check for an empty page. */ + if (NEXTINDEX(h) == 0) { + if (__bt_pdelete(t, h)) + return (RET_ERROR); + goto loop; + } + + /* Put the page. */ + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + if (redo) + goto loop; + return (RET_SUCCESS); +} + +/* + * __bt_pdelete -- + * Delete a single page from the tree. + * + * Parameters: + * t: tree + * h: leaf page + * + * Returns: + * RET_SUCCESS, RET_ERROR. + * + * Side-effects: + * mpool_put's the page + */ +static int +__bt_pdelete(t, h) + BTREE *t; + PAGE *h; +{ + BINTERNAL *bi; + PAGE *pg; + EPGNO *parent; + indx_t cnt, index, *ip, offset; + u_int32_t nksize; + char *from; + + /* + * Walk the parent page stack -- a LIFO stack of the pages that were + * traversed when we searched for the page where the delete occurred. + * Each stack entry is a page number and a page index offset. The + * offset is for the page traversed on the search. We've just deleted + * a page, so we have to delete the key from the parent page. + * + * If the delete from the parent page makes it empty, this process may + * continue all the way up the tree. We stop if we reach the root page + * (which is never deleted, it's just not worth the effort) or if the + * delete does not empty the page. + */ + while ((parent = BT_POP(t)) != NULL) { + /* Get the parent page. */ + if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (RET_ERROR); + + index = parent->index; + bi = GETBINTERNAL(pg, index); + + /* Free any overflow pages. */ + if (bi->flags & P_BIGKEY && + __ovfl_delete(t, bi->bytes) == RET_ERROR) { + mpool_put(t->bt_mp, pg, 0); + return (RET_ERROR); + } + + /* + * Free the parent if it has only the one key and it's not the + * root page. If it's the rootpage, turn it back into an empty + * leaf page. + */ + if (NEXTINDEX(pg) == 1) + if (pg->pgno == P_ROOT) { + pg->lower = BTDATAOFF; + pg->upper = t->bt_psize; + pg->flags = P_BLEAF; + } else { + if (__bt_relink(t, pg) || __bt_free(t, pg)) + return (RET_ERROR); + continue; + } + else { + /* Pack remaining key items at the end of the page. */ + nksize = NBINTERNAL(bi->ksize); + from = (char *)pg + pg->upper; + memmove(from + nksize, from, (char *)bi - from); + pg->upper += nksize; + + /* Adjust indices' offsets, shift the indices down. */ + offset = pg->linp[index]; + for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nksize; + for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1]; + pg->lower -= sizeof(indx_t); + } + + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + break; + } + + /* Free the leaf page, as long as it wasn't the root. */ + if (h->pgno == P_ROOT) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); + } + return (__bt_relink(t, h) || __bt_free(t, h)); +} + +/* + * __bt_dleaf -- + * Delete a single record from a leaf page. + * + * Parameters: + * t: tree + * key: referenced key + * h: page + * index: index on page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__bt_dleaf(t, key, h, index) + BTREE *t; + const DBT *key; + PAGE *h; + u_int index; +{ + BLEAF *bl; + indx_t cnt, *ip, offset; + u_int32_t nbytes; + void *to; + char *from; + + /* If this record is referenced by the cursor, delete the cursor. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index && + __bt_curdel(t, key, h, index)) + return (RET_ERROR); + + /* If the entry uses overflow pages, make them available for reuse. */ + to = bl = GETBLEAF(h, index); + if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) + return (RET_ERROR); + if (bl->flags & P_BIGDATA && + __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) + return (RET_ERROR); + + /* Pack the remaining key/data items at the end of the page. */ + nbytes = NBLEAF(bl); + from = (char *)h + h->upper; + memmove(from + nbytes, from, (char *)to - from); + h->upper += nbytes; + + /* Adjust the indices' offsets, shift the indices down. */ + offset = h->linp[index]; + for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nbytes; + for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; + h->lower -= sizeof(indx_t); + + /* If the cursor is on this page, adjust it as necessary. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index) + --t->bt_cursor.pg.index; + + return (RET_SUCCESS); +} + +/* + * __bt_curdel -- + * Delete the cursor. + * + * Parameters: + * t: tree + * key: referenced key (or NULL) + * h: page + * index: index on page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +static int +__bt_curdel(t, key, h, index) + BTREE *t; + const DBT *key; + PAGE *h; + u_int index; +{ + CURSOR *c; + EPG e; + PAGE *pg; + int curcopy, status; + + /* + * If there are duplicates, move forward or backward to one. + * Otherwise, copy the key into the cursor area. + */ + c = &t->bt_cursor; + F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE); + + curcopy = 0; + if (!F_ISSET(t, B_NODUPS)) { + /* + * We're going to have to do comparisons. If we weren't + * provided a copy of the key, i.e. the user is deleting + * the current cursor position, get one. + */ + if (key == NULL) { + e.page = h; + e.index = index; + if ((status = __bt_ret(t, &e, + &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS) + return (status); + curcopy = 1; + key = &c->key; + } + /* Check previous key, if not at the beginning of the page. */ + if (index > 0) { + e.page = h; + e.index = index - 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_BEFORE); + goto dup2; + } + } + /* Check next key, if not at the end of the page. */ + if (index < NEXTINDEX(h) - 1) { + e.page = h; + e.index = index + 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_AFTER); + goto dup2; + } + } + /* Check previous key if at the beginning of the page. */ + if (index == 0 && h->prevpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (RET_ERROR); + e.page = pg; + e.index = NEXTINDEX(pg) - 1; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_BEFORE); + goto dup1; + } + mpool_put(t->bt_mp, pg, 0); + } + /* Check next key if at the end of the page. */ + if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (RET_ERROR); + e.page = pg; + e.index = 0; + if (__bt_cmp(t, key, &e) == 0) { + F_SET(c, CURS_AFTER); +dup1: mpool_put(t->bt_mp, pg, 0); +dup2: c->pg.pgno = e.page->pgno; + c->pg.index = e.index; + return (RET_SUCCESS); + } + mpool_put(t->bt_mp, pg, 0); + } + } + e.page = h; + e.index = index; + if (curcopy || (status = + __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) { + F_SET(c, CURS_ACQUIRE); + return (RET_SUCCESS); + } + return (status); +} + +/* + * __bt_relink -- + * Link around a deleted page. + * + * Parameters: + * t: tree + * h: page to be deleted + */ +static int +__bt_relink(t, h) + BTREE *t; + PAGE *h; +{ + PAGE *pg; + + if (h->nextpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (RET_ERROR); + pg->prevpg = h->prevpg; + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + } + if (h->prevpg != P_INVALID) { + if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (RET_ERROR); + pg->nextpg = h->nextpg; + mpool_put(t->bt_mp, pg, MPOOL_DIRTY); + } + return (0); +} diff --git a/src/util/db2/btree/bt_get.c b/src/util/db2/btree/bt_get.c new file mode 100644 index 0000000000..b6318211a1 --- /dev/null +++ b/src/util/db2/btree/bt_get.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> + +#include "db-int.h" +#include "btree.h" + +/* + * __BT_GET -- Get a record from the btree. + * + * Parameters: + * dbp: pointer to access method + * key: key to find + * data: data to return + * flag: currently unused + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__bt_get(dbp, key, data, flags) + const DB *dbp; + const DBT *key; + DBT *data; + u_int flags; +{ + BTREE *t; + EPG *e; + int exact, status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Get currently doesn't take any flags. */ + if (flags) { + errno = EINVAL; + return (RET_ERROR); + } + + if ((e = __bt_search(t, key, &exact)) == NULL) + return (RET_ERROR); + if (!exact) { + mpool_put(t->bt_mp, e->page, 0); + return (RET_SPECIAL); + } + + status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0); + + /* + * If the user is doing concurrent access, we copied the + * key/data, toss the page. + */ + if (F_ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e->page, 0); + else + t->bt_pinned = e->page; + return (status); +} diff --git a/src/util/db2/btree/bt_open.c b/src/util/db2/btree/bt_open.c new file mode 100644 index 0000000000..3a7aa94627 --- /dev/null +++ b/src/util/db2/btree/bt_open.c @@ -0,0 +1,471 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_open.c 8.11 (Berkeley) 11/2/95"; +#endif /* LIBC_SCCS and not lint */ + +/* + * Implementation of btree access method for 4.4BSD. + * + * The design here was originally based on that of the btree access method + * used in the Postgres database system at UC Berkeley. This implementation + * is wholly independent of the Postgres code. + */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "db-int.h" +#include "btree.h" + +#ifdef DEBUG +#undef MINPSIZE +#define MINPSIZE 128 +#endif + +static int byteorder __P((void)); +static int nroot __P((BTREE *)); +static int tmp __P((void)); + +/* + * __BT_OPEN -- Open a btree. + * + * Creates and fills a DB struct, and calls the routine that actually + * opens the btree. + * + * Parameters: + * fname: filename (NULL for in-memory trees) + * flags: open flag bits + * mode: open permission bits + * b: BTREEINFO pointer + * + * Returns: + * NULL on failure, pointer to DB on success. + * + */ +DB * +__bt_open(fname, flags, mode, openinfo, dflags) + const char *fname; + int flags, mode, dflags; + const BTREEINFO *openinfo; +{ + struct stat sb; + BTMETA m; + BTREE *t; + BTREEINFO b; + DB *dbp; + db_pgno_t ncache; + ssize_t nr; + int machine_lorder; + + t = NULL; + + /* + * Intention is to make sure all of the user's selections are okay + * here and then use them without checking. Can't be complete, since + * we don't know the right page size, lorder or flags until the backing + * file is opened. Also, the file's page size can cause the cachesize + * to change. + */ + machine_lorder = byteorder(); + if (openinfo) { + b = *openinfo; + + /* Flags: R_DUP. */ + if (b.flags & ~(R_DUP)) + goto einval; + + /* + * Page size must be indx_t aligned and >= MINPSIZE. Default + * page size is set farther on, based on the underlying file + * transfer size. + */ + if (b.psize && + (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 || + b.psize & sizeof(indx_t) - 1)) + goto einval; + + /* Minimum number of keys per page; absolute minimum is 2. */ + if (b.minkeypage) { + if (b.minkeypage < 2) + goto einval; + } else + b.minkeypage = DEFMINKEYPAGE; + + /* If no comparison, use default comparison and prefix. */ + if (b.compare == NULL) { + b.compare = __bt_defcmp; + if (b.prefix == NULL) + b.prefix = __bt_defpfx; + } + + if (b.lorder == 0) + b.lorder = machine_lorder; + } else { + b.compare = __bt_defcmp; + b.cachesize = 0; + b.flags = 0; + b.lorder = machine_lorder; + b.minkeypage = DEFMINKEYPAGE; + b.prefix = __bt_defpfx; + b.psize = 0; + } + + /* Check for the ubiquitous PDP-11. */ + if (b.lorder != DB_BIG_ENDIAN && b.lorder != DB_LITTLE_ENDIAN) + goto einval; + + /* Allocate and initialize DB and BTREE structures. */ + if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL) + goto err; + memset(t, 0, sizeof(BTREE)); + t->bt_fd = -1; /* Don't close unopened fd on error. */ + t->bt_lorder = b.lorder; + t->bt_order = NOT; + t->bt_cmp = b.compare; + t->bt_pfx = b.prefix; + t->bt_rfd = -1; + + if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL) + goto err; + memset(t->bt_dbp, 0, sizeof(DB)); + if (t->bt_lorder != machine_lorder) + F_SET(t, B_NEEDSWAP); + + dbp->type = DB_BTREE; + dbp->internal = t; + dbp->close = __bt_close; + dbp->del = __bt_delete; + dbp->fd = __bt_fd; + dbp->get = __bt_get; + dbp->put = __bt_put; + dbp->seq = __bt_seq; + dbp->sync = __bt_sync; + + /* + * If no file name was supplied, this is an in-memory btree and we + * open a backing temporary file. Otherwise, it's a disk-based tree. + */ + if (fname) { + switch (flags & O_ACCMODE) { + case O_RDONLY: + F_SET(t, B_RDONLY); + break; + case O_RDWR: + break; + case O_WRONLY: + default: + goto einval; + } + + if ((t->bt_fd = open(fname, flags, mode)) < 0) + goto err; + + } else { + if ((flags & O_ACCMODE) != O_RDWR) + goto einval; + if ((t->bt_fd = tmp()) == -1) + goto err; + F_SET(t, B_INMEM); + } + + if (fcntl(t->bt_fd, F_SETFD, 1) == -1) + goto err; + + if (fstat(t->bt_fd, &sb)) + goto err; + if (sb.st_size) { + if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0) + goto err; + if (nr != sizeof(BTMETA)) + goto eftype; + + /* + * Read in the meta-data. This can change the notion of what + * the lorder, page size and flags are, and, when the page size + * changes, the cachesize value can change too. If the user + * specified the wrong byte order for an existing database, we + * don't bother to return an error, we just clear the NEEDSWAP + * bit. + */ + if (m.magic == BTREEMAGIC) + F_CLR(t, B_NEEDSWAP); + else { + F_SET(t, B_NEEDSWAP); + M_32_SWAP(m.magic); + M_32_SWAP(m.version); + M_32_SWAP(m.psize); + M_32_SWAP(m.free); + M_32_SWAP(m.nrecs); + M_32_SWAP(m.flags); + } + if (m.magic != BTREEMAGIC || m.version != BTREEVERSION) + goto eftype; + if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 || + m.psize & sizeof(indx_t) - 1) + goto eftype; + if (m.flags & ~SAVEMETA) + goto eftype; + b.psize = m.psize; + F_SET(t, m.flags); + t->bt_free = m.free; + t->bt_nrecs = m.nrecs; + } else { + /* + * Set the page size to the best value for I/O to this file. + * Don't overflow the page offset type. + */ + if (b.psize == 0) { + b.psize = sb.st_blksize; + if (b.psize < MINPSIZE) + b.psize = MINPSIZE; + if (b.psize > MAX_PAGE_OFFSET + 1) + b.psize = MAX_PAGE_OFFSET + 1; + } + + /* Set flag if duplicates permitted. */ + if (!(b.flags & R_DUP)) + F_SET(t, B_NODUPS); + + t->bt_free = P_INVALID; + t->bt_nrecs = 0; + F_SET(t, B_METADIRTY); + } + + t->bt_psize = b.psize; + + /* Set the cache size; must be a multiple of the page size. */ + if (b.cachesize && b.cachesize & b.psize - 1) + b.cachesize += (~b.cachesize & b.psize - 1) + 1; + if (b.cachesize < b.psize * MINCACHE) + b.cachesize = b.psize * MINCACHE; + + /* Calculate number of pages to cache. */ + ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize; + + /* + * The btree data structure requires that at least two keys can fit on + * a page, but other than that there's no fixed requirement. The user + * specified a minimum number per page, and we translated that into the + * number of bytes a key/data pair can use before being placed on an + * overflow page. This calculation includes the page header, the size + * of the index referencing the leaf item and the size of the leaf item + * structure. Also, don't let the user specify a minkeypage such that + * a key/data pair won't fit even if both key and data are on overflow + * pages. + */ + t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage - + (sizeof(indx_t) + NBLEAFDBT(0, 0)); + if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t)) + t->bt_ovflsize = + NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t); + + /* Initialize the buffer pool. */ + if ((t->bt_mp = + mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL) + goto err; + if (!F_ISSET(t, B_INMEM)) + mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t); + + /* Create a root page if new tree. */ + if (nroot(t) == RET_ERROR) + goto err; + + /* Global flags. */ + if (dflags & DB_LOCK) + F_SET(t, B_DB_LOCK); + if (dflags & DB_SHMEM) + F_SET(t, B_DB_SHMEM); + if (dflags & DB_TXN) + F_SET(t, B_DB_TXN); + + return (dbp); + +einval: errno = EINVAL; + goto err; + +eftype: errno = EFTYPE; + goto err; + +err: if (t) { + if (t->bt_dbp) + free(t->bt_dbp); + if (t->bt_fd != -1) + (void)close(t->bt_fd); + free(t); + } + return (NULL); +} + +/* + * NROOT -- Create the root of a new tree. + * + * Parameters: + * t: tree + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +nroot(t) + BTREE *t; +{ + PAGE *meta, *root; + db_pgno_t npg; + + if ((root = mpool_get(t->bt_mp, 1, 0)) != NULL) { + if (root->lower == 0 && + root->pgno == 0 && + root->linp[0] == 0) { + mpool_delete(t->bt_mp, root); + errno = EINVAL; + } else { + mpool_put(t->bt_mp, root, 0); + return (RET_SUCCESS); + } + } + if (errno != EINVAL) /* It's OK to not exist. */ + return (RET_ERROR); + errno = 0; + + if ((meta = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL) + return (RET_ERROR); + + if ((root = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL) + return (RET_ERROR); + + if (npg != P_ROOT) + return (RET_ERROR); + root->pgno = npg; + root->prevpg = root->nextpg = P_INVALID; + root->lower = BTDATAOFF; + root->upper = t->bt_psize; + root->flags = P_BLEAF; + memset(meta, 0, t->bt_psize); + mpool_put(t->bt_mp, meta, MPOOL_DIRTY); + mpool_put(t->bt_mp, root, MPOOL_DIRTY); + return (RET_SUCCESS); +} + +static int +tmp() +{ +#ifdef SIG_BLOCK + sigset_t set, oset; +#else + int oset; +#endif + int fd; + char *envtmp; + char path[MAXPATHLEN]; + static char fn[] = "/bt.XXXXXX"; + + envtmp = getenv("TMPDIR"); + + /* this used to be done with snprintf(), but since snprintf + isn't in most operating systems, and overflow checking in + this case is easy, this is what is done */ + + if (envtmp && ((strlen(envtmp)+sizeof(fn)+1) > sizeof(path))) + return(-1); + + (void)sprintf(path, "%s%s", (envtmp ? envtmp : "/tmp"), fn); + +#ifdef SIG_BLOCK + (void)sigfillset(&set); + (void)sigprocmask(SIG_BLOCK, &set, &oset); +#else + oset = sigblock(~0); +#endif + if ((fd = mkstemp(path)) != -1) + (void)unlink(path); +#ifdef SIG_BLOCK + (void)sigprocmask(SIG_SETMASK, &oset, NULL); +#else + sigsetmask(oset); +#endif + return(fd); +} + +static int +byteorder() +{ + u_int32_t x; + u_char *p; + + x = 0x01020304; + p = (u_char *)&x; + switch (*p) { + case 1: + return (DB_BIG_ENDIAN); + case 4: + return (DB_LITTLE_ENDIAN); + default: + return (0); + } +} + +int +__bt_fd(dbp) + const DB *dbp; +{ + BTREE *t; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* In-memory database can't have a file descriptor. */ + if (F_ISSET(t, B_INMEM)) { + errno = ENOENT; + return (-1); + } + return (t->bt_fd); +} diff --git a/src/util/db2/btree/bt_overflow.c b/src/util/db2/btree/bt_overflow.c new file mode 100644 index 0000000000..8b1f597912 --- /dev/null +++ b/src/util/db2/btree/bt_overflow.c @@ -0,0 +1,228 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "btree.h" + +/* + * Big key/data code. + * + * Big key and data entries are stored on linked lists of pages. The initial + * reference is byte string stored with the key or data and is the page number + * and size. The actual record is stored in a chain of pages linked by the + * nextpg field of the PAGE header. + * + * The first page of the chain has a special property. If the record is used + * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set + * in the header. + * + * XXX + * A single DBT is written to each chain, so a lot of space on the last page + * is wasted. This is a fairly major bug for some data sets. + */ + +/* + * __OVFL_GET -- Get an overflow key/data item. + * + * Parameters: + * t: tree + * p: pointer to { db_pgno_t, u_int32_t } + * buf: storage address + * bufsz: storage size + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__ovfl_get(t, p, ssz, buf, bufsz) + BTREE *t; + void *p; + size_t *ssz; + void **buf; + size_t *bufsz; +{ + PAGE *h; + db_pgno_t pg; + size_t nb, plen; + u_int32_t sz; + + memmove(&pg, p, sizeof(db_pgno_t)); + memmove(&sz, (char *)p + sizeof(db_pgno_t), sizeof(u_int32_t)); + *ssz = sz; + +#ifdef DEBUG + if (pg == P_INVALID || sz == 0) + abort(); +#endif + /* Make the buffer bigger as necessary. */ + if (*bufsz < sz) { + *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz)); + if (*buf == NULL) + return (RET_ERROR); + *bufsz = sz; + } + + /* + * Step through the linked list of pages, copying the data on each one + * into the buffer. Never copy more than the data's length. + */ + plen = t->bt_psize - BTDATAOFF; + for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + nb = MIN(sz, plen); + memmove(p, (char *)h + BTDATAOFF, nb); + mpool_put(t->bt_mp, h, 0); + + if ((sz -= nb) == 0) + break; + } + return (RET_SUCCESS); +} + +/* + * __OVFL_PUT -- Store an overflow key/data item. + * + * Parameters: + * t: tree + * data: DBT to store + * pgno: storage page number + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__ovfl_put(t, dbt, pg) + BTREE *t; + const DBT *dbt; + db_pgno_t *pg; +{ + PAGE *h, *last; + void *p; + db_pgno_t npg; + size_t nb, plen; + u_int32_t sz; + + /* + * Allocate pages and copy the key/data record into them. Store the + * number of the first page in the chain. + */ + plen = t->bt_psize - BTDATAOFF; + for (last = NULL, p = dbt->data, sz = dbt->size;; + p = (char *)p + plen, last = h) { + if ((h = __bt_new(t, &npg)) == NULL) + return (RET_ERROR); + + h->pgno = npg; + h->nextpg = h->prevpg = P_INVALID; + h->flags = P_OVERFLOW; + h->lower = h->upper = 0; + + nb = MIN(sz, plen); + memmove((char *)h + BTDATAOFF, p, nb); + + if (last) { + last->nextpg = h->pgno; + mpool_put(t->bt_mp, last, MPOOL_DIRTY); + } else + *pg = h->pgno; + + if ((sz -= nb) == 0) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + } + } + return (RET_SUCCESS); +} + +/* + * __OVFL_DELETE -- Delete an overflow chain. + * + * Parameters: + * t: tree + * p: pointer to { db_pgno_t, u_int32_t } + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__ovfl_delete(t, p) + BTREE *t; + void *p; +{ + PAGE *h; + db_pgno_t pg; + size_t plen; + u_int32_t sz; + + memmove(&pg, p, sizeof(db_pgno_t)); + memmove(&sz, (char *)p + sizeof(db_pgno_t), sizeof(u_int32_t)); + +#ifdef DEBUG + if (pg == P_INVALID || sz == 0) + abort(); +#endif + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + /* Don't delete chains used by internal pages. */ + if (h->flags & P_PRESERVE) { + mpool_put(t->bt_mp, h, 0); + return (RET_SUCCESS); + } + + /* Step through the chain, calling the free routine for each page. */ + for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) { + pg = h->nextpg; + __bt_free(t, h); + if (sz <= plen) + break; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } + return (RET_SUCCESS); +} diff --git a/src/util/db2/btree/bt_page.c b/src/util/db2/btree/bt_page.c new file mode 100644 index 0000000000..cb65040a73 --- /dev/null +++ b/src/util/db2/btree/bt_page.c @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_page.c 8.4 (Berkeley) 11/2/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <stdio.h> + +#include "db-int.h" +#include "btree.h" + +/* + * __bt_free -- + * Put a page on the freelist. + * + * Parameters: + * t: tree + * h: page to free + * + * Returns: + * RET_ERROR, RET_SUCCESS + * + * Side-effect: + * mpool_put's the page. + */ +int +__bt_free(t, h) + BTREE *t; + PAGE *h; +{ + /* Insert the page at the head of the free list. */ + h->prevpg = P_INVALID; + h->nextpg = t->bt_free; + t->bt_free = h->pgno; + + /* Make sure the page gets written back. */ + return (mpool_put(t->bt_mp, h, MPOOL_DIRTY)); +} + +/* + * __bt_new -- + * Get a new page, preferably from the freelist. + * + * Parameters: + * t: tree + * npg: storage for page number. + * + * Returns: + * Pointer to a page, NULL on error. + */ +PAGE * +__bt_new(t, npg) + BTREE *t; + db_pgno_t *npg; +{ + PAGE *h; + + if (t->bt_free != P_INVALID && + (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { + *npg = t->bt_free; + t->bt_free = h->nextpg; + return (h); + } + return (mpool_new(t->bt_mp, npg, MPOOL_PAGE_NEXT)); +} diff --git a/src/util/db2/btree/bt_put.c b/src/util/db2/btree/bt_put.c new file mode 100644 index 0000000000..abdb864294 --- /dev/null +++ b/src/util/db2/btree/bt_put.c @@ -0,0 +1,320 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "btree.h" + +static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *)); + +/* + * __BT_PUT -- Add a btree item to the tree. + * + * Parameters: + * dbp: pointer to access method + * key: key + * data: data + * flag: R_NOOVERWRITE + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the + * tree and R_NOOVERWRITE specified. + */ +int +__bt_put(dbp, key, data, flags) + const DB *dbp; + DBT *key; + const DBT *data; + u_int flags; +{ + BTREE *t; + DBT tkey, tdata; + EPG *e; + PAGE *h; + indx_t index, nxtindex; + db_pgno_t pg; + u_int32_t nbytes; + int dflags, exact, status; + char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Check for change to a read-only tree. */ + if (F_ISSET(t, B_RDONLY)) { + errno = EPERM; + return (RET_ERROR); + } + + switch (flags) { + case 0: + case R_NOOVERWRITE: + break; + case R_CURSOR: + /* + * If flags is R_CURSOR, put the cursor. Must already + * have started a scan and not have already deleted it. + */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, + CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE)) + break; + /* FALLTHROUGH */ + default: + errno = EINVAL; + return (RET_ERROR); + } + + /* + * If the key/data pair won't fit on a page, store it on overflow + * pages. Only put the key on the overflow page if the pair are + * still too big after moving the data to an overflow page. + * + * XXX + * If the insert fails later on, the overflow pages aren't recovered. + */ + dflags = 0; + if (key->size + data->size > t->bt_ovflsize) { + if (key->size > t->bt_ovflsize) { +storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) + return (RET_ERROR); + tkey.data = kb; + tkey.size = NOVFLSIZE; + memmove(kb, &pg, sizeof(db_pgno_t)); + memmove(kb + sizeof(db_pgno_t), + &key->size, sizeof(u_int32_t)); + dflags |= P_BIGKEY; + key = &tkey; + } + if (key->size + data->size > t->bt_ovflsize) { + if (__ovfl_put(t, data, &pg) == RET_ERROR) + return (RET_ERROR); + tdata.data = db; + tdata.size = NOVFLSIZE; + memmove(db, &pg, sizeof(db_pgno_t)); + memmove(db + sizeof(db_pgno_t), + &data->size, sizeof(u_int32_t)); + dflags |= P_BIGDATA; + data = &tdata; + } + if (key->size + data->size > t->bt_ovflsize) + goto storekey; + } + + /* Replace the cursor. */ + if (flags == R_CURSOR) { + if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL) + return (RET_ERROR); + index = t->bt_cursor.pg.index; + goto delete; + } + + /* + * Find the key to delete, or, the location at which to insert. + * Bt_fast and __bt_search both pin the returned page. + */ + if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) + if ((e = __bt_search(t, key, &exact)) == NULL) + return (RET_ERROR); + h = e->page; + index = e->index; + + /* + * Add the key/data pair to the tree. If an identical key is already + * in the tree, and R_NOOVERWRITE is set, an error is returned. If + * R_NOOVERWRITE is not set, the key is either added (if duplicates are + * permitted) or an error is returned. + */ + switch (flags) { + case R_NOOVERWRITE: + if (!exact) + break; + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + default: + if (!exact || !F_ISSET(t, B_NODUPS)) + break; + /* + * !!! + * Note, the delete may empty the page, so we need to put a + * new entry into the page immediately. + */ +delete: if (__bt_dleaf(t, key, h, index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + break; + } + + /* + * If not enough room, or the user has put a ceiling on the number of + * keys permitted in the page, split the page. The split code will + * insert the key and data and unpin the current page. If inserting + * into the offset array, shift the pointers up. + */ + nbytes = NBLEAFDBT(key->size, data->size); + if (h->upper - h->lower < nbytes + sizeof(indx_t)) { + if ((status = __bt_split(t, h, key, + data, dflags, nbytes, index)) != RET_SUCCESS) + return (status); + goto success; + } + + if (index < (nxtindex = NEXTINDEX(h))) + memmove(h->linp + index + 1, h->linp + index, + (nxtindex - index) * sizeof(indx_t)); + h->lower += sizeof(indx_t); + + h->linp[index] = h->upper -= nbytes; + dest = (char *)h + h->upper; + WR_BLEAF(dest, key, data, dflags); + + /* If the cursor is on this page, adjust it as necessary. */ + if (F_ISSET(&t->bt_cursor, CURS_INIT) && + !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) && + t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= index) + ++t->bt_cursor.pg.index; + + if (t->bt_order == NOT) + if (h->nextpg == P_INVALID) { + if (index == NEXTINDEX(h) - 1) { + t->bt_order = FORWARD; + t->bt_last.index = index; + t->bt_last.pgno = h->pgno; + } + } else if (h->prevpg == P_INVALID) { + if (index == 0) { + t->bt_order = BACK; + t->bt_last.index = 0; + t->bt_last.pgno = h->pgno; + } + } + + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + +success: + if (flags == R_SETCURSOR) + __bt_setcur(t, e->page->pgno, e->index); + + F_SET(t, B_MODIFIED); + return (RET_SUCCESS); +} + +#ifdef STATISTICS +u_long bt_cache_hit, bt_cache_miss; +#endif + +/* + * BT_FAST -- Do a quick check for sorted data. + * + * Parameters: + * t: tree + * key: key to insert + * + * Returns: + * EPG for new record or NULL if not found. + */ +static EPG * +bt_fast(t, key, data, exactp) + BTREE *t; + const DBT *key, *data; + int *exactp; +{ + PAGE *h; + u_int32_t nbytes; + int cmp; + + if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { + t->bt_order = NOT; + return (NULL); + } + t->bt_cur.page = h; + t->bt_cur.index = t->bt_last.index; + + /* + * If won't fit in this page or have too many keys in this page, + * have to search to get split stack. + */ + nbytes = NBLEAFDBT(key->size, data->size); + if (h->upper - h->lower < nbytes + sizeof(indx_t)) + goto miss; + + if (t->bt_order == FORWARD) { + if (t->bt_cur.page->nextpg != P_INVALID) + goto miss; + if (t->bt_cur.index != NEXTINDEX(h) - 1) + goto miss; + if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0) + goto miss; + t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index; + } else { + if (t->bt_cur.page->prevpg != P_INVALID) + goto miss; + if (t->bt_cur.index != 0) + goto miss; + if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0) + goto miss; + t->bt_last.index = 0; + } + *exactp = cmp == 0; +#ifdef STATISTICS + ++bt_cache_hit; +#endif + return (&t->bt_cur); + +miss: +#ifdef STATISTICS + ++bt_cache_miss; +#endif + t->bt_order = NOT; + mpool_put(t->bt_mp, h, 0); + return (NULL); +} diff --git a/src/util/db2/btree/bt_search.c b/src/util/db2/btree/bt_search.c new file mode 100644 index 0000000000..869f1ef3f4 --- /dev/null +++ b/src/util/db2/btree/bt_search.c @@ -0,0 +1,297 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_search.c 8.9 (Berkeley) 10/26/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <stdio.h> + +#include "db-int.h" +#include "btree.h" + +static int __bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); +static int __bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); + +/* + * __bt_search -- + * Search a btree for a key. + * + * Parameters: + * t: tree to search + * key: key to find + * exactp: pointer to exact match flag + * + * Returns: + * The EPG for matching record, if any, or the EPG for the location + * of the key, if it were inserted into the tree, is entered into + * the bt_cur field of the tree. A pointer to the field is returned. + */ +EPG * +__bt_search(t, key, exactp) + BTREE *t; + const DBT *key; + int *exactp; +{ + PAGE *h; + indx_t base, index, lim; + db_pgno_t pg; + int cmp; + + BT_CLR(t); + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (NULL); + + /* Do a binary search on the current page. */ + t->bt_cur.page = h; + for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) { + t->bt_cur.index = index = base + (lim >> 1); + if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) { + if (h->flags & P_BLEAF) { + *exactp = 1; + return (&t->bt_cur); + } + goto next; + } + if (cmp > 0) { + base = index + 1; + --lim; + } + } + + /* + * If it's a leaf page, we're almost done. If no duplicates + * are allowed, or we have an exact match, we're done. Else, + * it's possible that there were matching keys on this page, + * which later deleted, and we're on a page with no matches + * while there are matches on other pages. If at the start or + * end of a page, check the adjacent page. + */ + if (h->flags & P_BLEAF) { + if (!F_ISSET(t, B_NODUPS)) { + if (base == 0 && + h->prevpg != P_INVALID && + __bt_sprev(t, h, key, exactp)) + return (&t->bt_cur); + if (base == NEXTINDEX(h) && + h->nextpg != P_INVALID && + __bt_snext(t, h, key, exactp)) + return (&t->bt_cur); + } + *exactp = 0; + t->bt_cur.index = base; + return (&t->bt_cur); + } + + /* + * No match found. Base is the smallest index greater than + * key and may be zero or a last + 1 index. If it's non-zero, + * decrement by one, and record the internal page which should + * be a parent page for the key. If a split later occurs, the + * inserted page will be to the right of the saved page. + */ + index = base ? base - 1 : base; + +next: BT_PUSH(t, h->pgno, index); + pg = GETBINTERNAL(h, index)->pgno; + mpool_put(t->bt_mp, h, 0); + } +} + +/* + * __bt_snext -- + * Check for an exact match after the key. + * + * Parameters: + * t: tree + * h: current page + * key: key + * exactp: pointer to exact match flag + * + * Returns: + * If an exact match found. + */ +static int +__bt_snext(t, h, key, exactp) + BTREE *t; + PAGE *h; + const DBT *key; + int *exactp; +{ + BINTERNAL *bi; + EPG e; + EPGNO *parent; + indx_t index; + db_pgno_t pgno; + int level; + + /* + * Get the next page. The key is either an exact + * match, or not as good as the one we already have. + */ + if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) + return (0); + e.index = 0; + if (__bt_cmp(t, key, &e) != 0) { + mpool_put(t->bt_mp, e.page, 0); + return (0); + } + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + + /* + * Adjust the stack for the movement. + * + * Move up the stack. + */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (0); + + /* Move to the next index. */ + if (parent->index != NEXTINDEX(h) - 1) { + index = parent->index + 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + BT_PUSH(t, pgno, 0); + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (0); + index = 0; + } + mpool_put(t->bt_mp, h, 0); + return (1); +} + +/* + * __bt_sprev -- + * Check for an exact match before the key. + * + * Parameters: + * t: tree + * h: current page + * key: key + * exactp: pointer to exact match flag + * + * Returns: + * If an exact match found. + */ +static int +__bt_sprev(t, h, key, exactp) + BTREE *t; + PAGE *h; + const DBT *key; + int *exactp; +{ + BINTERNAL *bi; + EPG e; + EPGNO *parent; + indx_t index; + db_pgno_t pgno; + int level; + + /* + * Get the previous page. The key is either an exact + * match, or not as good as the one we already have. + */ + if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) + return (0); + e.index = NEXTINDEX(e.page) - 1; + if (__bt_cmp(t, key, &e) != 0) { + mpool_put(t->bt_mp, e.page, 0); + return (0); + } + + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + + /* + * Adjust the stack for the movement. + * + * Move up the stack. + */ + for (level = 0; (parent = BT_POP(t)) != NULL; ++level) { + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + return (1); + + /* Move to the next index. */ + if (parent->index != 0) { + index = parent->index - 1; + BT_PUSH(t, h->pgno, index); + break; + } + mpool_put(t->bt_mp, h, 0); + } + + /* Restore the stack. */ + while (level--) { + /* Push the next level down onto the stack. */ + bi = GETBINTERNAL(h, index); + pgno = bi->pgno; + + /* Lose the currently pinned page. */ + mpool_put(t->bt_mp, h, 0); + + /* Get the next level down. */ + if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL) + return (1); + + index = NEXTINDEX(h) - 1; + BT_PUSH(t, pgno, index); + } + mpool_put(t->bt_mp, h, 0); + return (1); +} diff --git a/src/util/db2/btree/bt_seq.c b/src/util/db2/btree/bt_seq.c new file mode 100644 index 0000000000..3e68c66a9c --- /dev/null +++ b/src/util/db2/btree/bt_seq.c @@ -0,0 +1,490 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_seq.c 8.9 (Berkeley) 6/20/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> + +#include "db-int.h" +#include "btree.h" + +static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); +static int __bt_seqadv __P((BTREE *, EPG *, int)); +static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); + +/* + * Sequential scan support. + * + * The tree can be scanned sequentially, starting from either end of the + * tree or from any specific key. A scan request before any scanning is + * done is initialized as starting from the least node. + */ + +/* + * __bt_seq -- + * Btree sequential scan interface. + * + * Parameters: + * dbp: pointer to access method + * key: key for positioning and return value + * data: data return value + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +int +__bt_seq(dbp, key, data, flags) + const DB *dbp; + DBT *key, *data; + u_int flags; +{ + BTREE *t; + EPG e; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* + * If scan unitialized as yet, or starting at a specific record, set + * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin + * the page the cursor references if they're successful. + */ + switch (flags) { + case R_NEXT: + case R_PREV: + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + status = __bt_seqadv(t, &e, flags); + break; + } + /* FALLTHROUGH */ + case R_FIRST: + case R_LAST: + case R_CURSOR: + status = __bt_seqset(t, &e, key, flags); + break; + default: + errno = EINVAL; + return (RET_ERROR); + } + + if (status == RET_SUCCESS) { + __bt_setcur(t, e.page->pgno, e.index); + + status = + __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); + + /* + * If the user is doing concurrent access, we copied the + * key/data, toss the page. + */ + if (F_ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e.page, 0); + else + t->bt_pinned = e.page; + } + return (status); +} + +/* + * __bt_seqset -- + * Set the sequential scan to a specific key. + * + * Parameters: + * t: tree + * ep: storage for returned key + * key: key for initial scan position + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV + * + * Side effects: + * Pins the page the cursor references. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +static int +__bt_seqset(t, ep, key, flags) + BTREE *t; + EPG *ep; + DBT *key; + int flags; +{ + PAGE *h; + db_pgno_t pg; + int exact; + + /* + * Find the first, last or specific key in the tree and point the + * cursor at it. The cursor may not be moved until a new key has + * been found. + */ + switch (flags) { + case R_CURSOR: /* Keyed scan. */ + /* + * Find the first instance of the key or the smallest key + * which is greater than or equal to the specified key. + */ + if (key->data == NULL || key->size == 0) { + errno = EINVAL; + return (RET_ERROR); + } + return (__bt_first(t, key, ep, &exact)); + case R_FIRST: /* First record. */ + case R_NEXT: + /* Walk down the left-hand side of the tree. */ + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + + if (h->flags & (P_BLEAF | P_RLEAF)) + break; + pg = GETBINTERNAL(h, 0)->pgno; + mpool_put(t->bt_mp, h, 0); + } + ep->page = h; + ep->index = 0; + break; + case R_LAST: /* Last record. */ + case R_PREV: + /* Walk down the right-hand side of the tree. */ + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + /* Check for an empty tree. */ + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + + if (h->flags & (P_BLEAF | P_RLEAF)) + break; + pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; + mpool_put(t->bt_mp, h, 0); + } + + ep->page = h; + ep->index = NEXTINDEX(h) - 1; + break; + } + return (RET_SUCCESS); +} + +/* + * __bt_seqadvance -- + * Advance the sequential scan. + * + * Parameters: + * t: tree + * flags: R_NEXT, R_PREV + * + * Side effects: + * Pins the page the new key/data record is on. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +static int +__bt_seqadv(t, ep, flags) + BTREE *t; + EPG *ep; + int flags; +{ + CURSOR *c; + PAGE *h; + indx_t index; + db_pgno_t pg; + int exact, rval; + + /* + * There are a couple of states that we can be in. The cursor has + * been initialized by the time we get here, but that's all we know. + */ + c = &t->bt_cursor; + + /* + * The cursor was deleted and there weren't any duplicate records, + * so the cursor's key was saved. Find out where that key would + * be in the current tree. If the returned key is an exact match, + * it means that a key/data pair was inserted into the tree after + * the delete. We could reasonably return the key, but the problem + * is that this is the access pattern we'll see if the user is + * doing seq(..., R_NEXT)/put(..., 0) pairs, i.e. the put deletes + * the cursor record and then replaces it, so the cursor was saved, + * and we'll simply return the same "new" record until the user + * notices and doesn't do a put() of it. Since the key is an exact + * match, we could as easily put the new record before the cursor, + * and we've made no guarantee to return it. So, move forward or + * back a record if it's an exact match. + * + * XXX + * In the current implementation, put's to the cursor are done with + * delete/add pairs. This has two consequences. First, it means + * that seq(..., R_NEXT)/put(..., R_CURSOR) pairs are going to exhibit + * the same behavior as above. Second, you can return the same key + * twice if you have duplicate records. The scenario is that the + * cursor record is deleted, moving the cursor forward or backward + * to a duplicate. The add then inserts the new record at a location + * ahead of the cursor because duplicates aren't sorted in any way, + * and the new record is later returned. This has to be fixed at some + * point. + */ + if (F_ISSET(c, CURS_ACQUIRE)) { + if ((rval = __bt_first(t, &c->key, ep, &exact)) == RET_ERROR) + return (RET_ERROR); + if (!exact) + return (rval); + /* + * XXX + * Kluge -- get, release, get the page. + */ + c->pg.pgno = ep->page->pgno; + c->pg.index = ep->index; + mpool_put(t->bt_mp, ep->page, 0); + } + + /* Get the page referenced by the cursor. */ + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) + return (RET_ERROR); + + /* + * Find the next/previous record in the tree and point the cursor at + * it. The cursor may not be moved until a new key has been found. + */ + switch (flags) { + case R_NEXT: /* Next record. */ + /* + * The cursor was deleted in duplicate records, and moved + * forward to a record that has yet to be returned. Clear + * that flag, and return the record. + */ + if (F_ISSET(c, CURS_AFTER)) + goto usecurrent; + index = c->pg.index; + if (++index == NEXTINDEX(h)) { + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + index = 0; + } + break; + case R_PREV: /* Previous record. */ + /* + * The cursor was deleted in duplicate records, and moved + * backward to a record that has yet to be returned. Clear + * that flag, and return the record. + */ + if (F_ISSET(c, CURS_BEFORE)) { +usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); + ep->page = h; + ep->index = c->pg.index; + return (RET_SUCCESS); + } + index = c->pg.index; + if (index == 0) { + pg = h->prevpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + index = NEXTINDEX(h) - 1; + } else + --index; + break; + } + + ep->page = h; + ep->index = index; + return (RET_SUCCESS); +} + +/* + * __bt_first -- + * Find the first entry. + * + * Parameters: + * t: the tree + * key: the key + * erval: return EPG + * exactp: pointer to exact match flag + * + * Returns: + * The first entry in the tree greater than or equal to key, + * or RET_SPECIAL if no such key exists. + */ +static int +__bt_first(t, key, erval, exactp) + BTREE *t; + const DBT *key; + EPG *erval; + int *exactp; +{ + PAGE *h; + EPG *ep, save; + db_pgno_t pg; + + /* + * Find any matching record; __bt_search pins the page. + * + * If it's an exact match and duplicates are possible, walk backwards + * in the tree until we find the first one. Otherwise, make sure it's + * a valid key (__bt_search may return an index just past the end of a + * page) and return it. + */ + if ((ep = __bt_search(t, key, exactp)) == NULL) + return (RET_SPECIAL); + if (*exactp) { + if (F_ISSET(t, B_NODUPS)) { + *erval = *ep; + return (RET_SUCCESS); + } + + /* + * Walk backwards, as long as the entry matches and there are + * keys left in the tree. Save a copy of each match in case + * we go too far. + */ + save = *ep; + h = ep->page; + do { + if (save.page->pgno != ep->page->pgno) { + mpool_put(t->bt_mp, save.page, 0); + save = *ep; + } else + save.index = ep->index; + + /* + * Don't unpin the page the last (or original) match + * was on, but make sure it's unpinned if an error + * occurs. + */ + if (ep->index == 0) { + if (h->prevpg == P_INVALID) + break; + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, + h->prevpg, 0)) == NULL) { + if (h->pgno == save.page->pgno) + mpool_put(t->bt_mp, + save.page, 0); + return (RET_ERROR); + } + ep->page = h; + ep->index = NEXTINDEX(h); + } + --ep->index; + } while (__bt_cmp(t, key, ep) == 0); + + /* + * Reach here with the last page that was looked at pinned, + * which may or may not be the same as the last (or original) + * match page. If it's not useful, release it. + */ + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + + *erval = save; + return (RET_SUCCESS); + } + + /* If at the end of a page, find the next entry. */ + if (ep->index == NEXTINDEX(ep->page)) { + h = ep->page; + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + ep->index = 0; + ep->page = h; + } + *erval = *ep; + return (RET_SUCCESS); +} + +/* + * __bt_setcur -- + * Set the cursor to an entry in the tree. + * + * Parameters: + * t: the tree + * pgno: page number + * index: page index + */ +void +__bt_setcur(t, pgno, index) + BTREE *t; + db_pgno_t pgno; + u_int index; +{ + /* Lose any already deleted key. */ + if (t->bt_cursor.key.data != NULL) { + free(t->bt_cursor.key.data); + t->bt_cursor.key.size = 0; + t->bt_cursor.key.data = NULL; + } + F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); + + /* Update the cursor. */ + t->bt_cursor.pg.pgno = pgno; + t->bt_cursor.pg.index = index; + F_SET(&t->bt_cursor, CURS_INIT); +} diff --git a/src/util/db2/btree/bt_split.c b/src/util/db2/btree/bt_split.c new file mode 100644 index 0000000000..0fc95baf3e --- /dev/null +++ b/src/util/db2/btree/bt_split.c @@ -0,0 +1,827 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_split.c 8.10 (Berkeley) 1/9/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "btree.h" + +static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); +static PAGE *bt_page + __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); +static int bt_preserve __P((BTREE *, db_pgno_t)); +static PAGE *bt_psplit + __P((BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t)); +static PAGE *bt_root + __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); +static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); +static recno_t rec_total __P((PAGE *)); + +#ifdef STATISTICS +u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; +#endif + +/* + * __BT_SPLIT -- Split the tree. + * + * Parameters: + * t: tree + * sp: page to split + * key: key to insert + * data: data to insert + * flags: BIGKEY/BIGDATA flags + * ilen: insert length + * skip: index to leave open + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_split(t, sp, key, data, flags, ilen, argskip) + BTREE *t; + PAGE *sp; + const DBT *key, *data; + int flags; + size_t ilen; + u_int32_t argskip; +{ + BINTERNAL *bi; + BLEAF *bl, *tbl; + DBT a, b; + EPGNO *parent; + PAGE *h, *l, *r, *lchild, *rchild; + indx_t nxtindex; + u_int16_t skip; + u_int32_t n, nbytes, nksize; + int parentsplit; + char *dest; + + /* + * Split the page into two pages, l and r. The split routines return + * a pointer to the page into which the key should be inserted and with + * skip set to the offset which should be used. Additionally, l and r + * are pinned. + */ + skip = argskip; + h = sp->pgno == P_ROOT ? + bt_root(t, sp, &l, &r, &skip, ilen) : + bt_page(t, sp, &l, &r, &skip, ilen); + if (h == NULL) + return (RET_ERROR); + + /* + * Insert the new key/data pair into the leaf page. (Key inserts + * always cause a leaf page to split first.) + */ + h->linp[skip] = h->upper -= ilen; + dest = (char *)h + h->upper; + if (F_ISSET(t, R_RECNO)) + WR_RLEAF(dest, data, flags) + else + WR_BLEAF(dest, key, data, flags) + + /* If the root page was split, make it look right. */ + if (sp->pgno == P_ROOT && + (F_ISSET(t, R_RECNO) ? + bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) + goto err2; + + /* + * Now we walk the parent page stack -- a LIFO stack of the pages that + * were traversed when we searched for the page that split. Each stack + * entry is a page number and a page index offset. The offset is for + * the page traversed on the search. We've just split a page, so we + * have to insert a new key into the parent page. + * + * If the insert into the parent page causes it to split, may have to + * continue splitting all the way up the tree. We stop if the root + * splits or the page inserted into didn't have to split to hold the + * new key. Some algorithms replace the key for the old page as well + * as the new page. We don't, as there's no reason to believe that the + * first key on the old page is any better than the key we have, and, + * in the case of a key being placed at index 0 causing the split, the + * key is unavailable. + * + * There are a maximum of 5 pages pinned at any time. We keep the left + * and right pages pinned while working on the parent. The 5 are the + * two children, left parent and right parent (when the parent splits) + * and the root page or the overflow key page when calling bt_preserve. + * This code must make sure that all pins are released other than the + * root page or overflow page which is unlocked elsewhere. + */ + while ((parent = BT_POP(t)) != NULL) { + lchild = l; + rchild = r; + + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + goto err2; + + /* + * The new key goes ONE AFTER the index, because the split + * was to the right. + */ + skip = parent->index + 1; + + /* + * Calculate the space needed on the parent page. + * + * Prefix trees: space hack when inserting into BINTERNAL + * pages. Retain only what's needed to distinguish between + * the new entry and the LAST entry on the page to its left. + * If the keys compare equal, retain the entire key. Note, + * we don't touch overflow keys, and the entire key must be + * retained for the next-to-left most key on the leftmost + * page of each level, or the search will fail. Applicable + * ONLY to internal pages that have leaf pages as children. + * Further reduction of the key between pairs of internal + * pages loses too much information. + */ + switch (rchild->flags & P_TYPE) { + case P_BINTERNAL: + bi = GETBINTERNAL(rchild, 0); + nbytes = NBINTERNAL(bi->ksize); + break; + case P_BLEAF: + bl = GETBLEAF(rchild, 0); + nbytes = NBINTERNAL(bl->ksize); + if (t->bt_pfx && !(bl->flags & P_BIGKEY) && + (h->prevpg != P_INVALID || skip > 1)) { + tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); + a.size = tbl->ksize; + a.data = tbl->bytes; + b.size = bl->ksize; + b.data = bl->bytes; + nksize = t->bt_pfx(&a, &b); + n = NBINTERNAL(nksize); + if (n < nbytes) { +#ifdef STATISTICS + bt_pfxsaved += nbytes - n; +#endif + nbytes = n; + } else + nksize = 0; + } else + nksize = 0; + break; + case P_RINTERNAL: + case P_RLEAF: + nbytes = NRINTERNAL; + break; + default: + abort(); + } + + /* Split the parent page if necessary or shift the indices. */ + if (h->upper - h->lower < nbytes + sizeof(indx_t)) { + sp = h; + h = h->pgno == P_ROOT ? + bt_root(t, h, &l, &r, &skip, nbytes) : + bt_page(t, h, &l, &r, &skip, nbytes); + if (h == NULL) + goto err1; + parentsplit = 1; + } else { + if (skip < (nxtindex = NEXTINDEX(h))) + memmove(h->linp + skip + 1, h->linp + skip, + (nxtindex - skip) * sizeof(indx_t)); + h->lower += sizeof(indx_t); + parentsplit = 0; + } + + /* Insert the key into the parent page. */ + switch (rchild->flags & P_TYPE) { + case P_BINTERNAL: + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + memmove(dest, bi, nbytes); + ((BINTERNAL *)dest)->pgno = rchild->pgno; + break; + case P_BLEAF: + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, + rchild->pgno, bl->flags & P_BIGKEY); + memmove(dest, bl->bytes, nksize ? nksize : bl->ksize); + if (bl->flags & P_BIGKEY && + bt_preserve(t, *(db_pgno_t *)bl->bytes) == RET_ERROR) + goto err1; + break; + case P_RINTERNAL: + /* + * Update the left page count. If split + * added at index 0, fix the correct page. + */ + if (skip > 0) + dest = (char *)h + h->linp[skip - 1]; + else + dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; + ((RINTERNAL *)dest)->nrecs = rec_total(lchild); + ((RINTERNAL *)dest)->pgno = lchild->pgno; + + /* Update the right page count. */ + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + ((RINTERNAL *)dest)->nrecs = rec_total(rchild); + ((RINTERNAL *)dest)->pgno = rchild->pgno; + break; + case P_RLEAF: + /* + * Update the left page count. If split + * added at index 0, fix the correct page. + */ + if (skip > 0) + dest = (char *)h + h->linp[skip - 1]; + else + dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; + ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); + ((RINTERNAL *)dest)->pgno = lchild->pgno; + + /* Update the right page count. */ + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); + ((RINTERNAL *)dest)->pgno = rchild->pgno; + break; + default: + abort(); + } + + /* Unpin the held pages. */ + if (!parentsplit) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + } + + /* If the root page was split, make it look right. */ + if (sp->pgno == P_ROOT && + (F_ISSET(t, R_RECNO) ? + bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) + goto err1; + + mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); + mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); + } + + /* Unpin the held pages. */ + mpool_put(t->bt_mp, l, MPOOL_DIRTY); + mpool_put(t->bt_mp, r, MPOOL_DIRTY); + + /* Clear any pages left on the stack. */ + return (RET_SUCCESS); + + /* + * If something fails in the above loop we were already walking back + * up the tree and the tree is now inconsistent. Nothing much we can + * do about it but release any memory we're holding. + */ +err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); + mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); + +err2: mpool_put(t->bt_mp, l, 0); + mpool_put(t->bt_mp, r, 0); + __dbpanic(t->bt_dbp); + return (RET_ERROR); +} + +/* + * BT_PAGE -- Split a non-root page of a btree. + * + * Parameters: + * t: tree + * h: root page + * lp: pointer to left page pointer + * rp: pointer to right page pointer + * skip: pointer to index to leave open + * ilen: insert length + * + * Returns: + * Pointer to page in which to insert or NULL on error. + */ +static PAGE * +bt_page(t, h, lp, rp, skip, ilen) + BTREE *t; + PAGE *h, **lp, **rp; + indx_t *skip; + size_t ilen; +{ + PAGE *l, *r, *tp; + db_pgno_t npg; + +#ifdef STATISTICS + ++bt_split; +#endif + /* Put the new right page for the split into place. */ + if ((r = __bt_new(t, &npg)) == NULL) + return (NULL); + r->pgno = npg; + r->lower = BTDATAOFF; + r->upper = t->bt_psize; + r->nextpg = h->nextpg; + r->prevpg = h->pgno; + r->flags = h->flags & P_TYPE; + + /* + * If we're splitting the last page on a level because we're appending + * a key to it (skip is NEXTINDEX()), it's likely that the data is + * sorted. Adding an empty page on the side of the level is less work + * and can push the fill factor much higher than normal. If we're + * wrong it's no big deal, we'll just do the split the right way next + * time. It may look like it's equally easy to do a similar hack for + * reverse sorted data, that is, split the tree left, but it's not. + * Don't even try. + */ + if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { +#ifdef STATISTICS + ++bt_sortsplit; +#endif + h->nextpg = r->pgno; + r->lower = BTDATAOFF + sizeof(indx_t); + *skip = 0; + *lp = h; + *rp = r; + return (r); + } + + /* Put the new left page for the split into place. */ + if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) { + mpool_put(t->bt_mp, r, 0); + return (NULL); + } +#ifdef PURIFY + memset(l, 0xff, t->bt_psize); +#endif + l->pgno = h->pgno; + l->nextpg = r->pgno; + l->prevpg = h->prevpg; + l->lower = BTDATAOFF; + l->upper = t->bt_psize; + l->flags = h->flags & P_TYPE; + + /* Fix up the previous pointer of the page after the split page. */ + if (h->nextpg != P_INVALID) { + if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { + free(l); + /* XXX mpool_free(t->bt_mp, r->pgno); */ + return (NULL); + } + tp->prevpg = r->pgno; + mpool_put(t->bt_mp, tp, MPOOL_DIRTY); + } + + /* + * Split right. The key/data pairs aren't sorted in the btree page so + * it's simpler to copy the data from the split page onto two new pages + * instead of copying half the data to the right page and compacting + * the left page in place. Since the left page can't change, we have + * to swap the original and the allocated left page after the split. + */ + tp = bt_psplit(t, h, l, r, skip, ilen); + + /* Move the new left page onto the old left page. */ + memmove(h, l, t->bt_psize); + if (tp == l) + tp = h; + free(l); + + *lp = h; + *rp = r; + return (tp); +} + +/* + * BT_ROOT -- Split the root page of a btree. + * + * Parameters: + * t: tree + * h: root page + * lp: pointer to left page pointer + * rp: pointer to right page pointer + * skip: pointer to index to leave open + * ilen: insert length + * + * Returns: + * Pointer to page in which to insert or NULL on error. + */ +static PAGE * +bt_root(t, h, lp, rp, skip, ilen) + BTREE *t; + PAGE *h, **lp, **rp; + indx_t *skip; + size_t ilen; +{ + PAGE *l, *r, *tp; + db_pgno_t lnpg, rnpg; + +#ifdef STATISTICS + ++bt_split; + ++bt_rootsplit; +#endif + /* Put the new left and right pages for the split into place. */ + if ((l = __bt_new(t, &lnpg)) == NULL || + (r = __bt_new(t, &rnpg)) == NULL) + return (NULL); + l->pgno = lnpg; + r->pgno = rnpg; + l->nextpg = r->pgno; + r->prevpg = l->pgno; + l->prevpg = r->nextpg = P_INVALID; + l->lower = r->lower = BTDATAOFF; + l->upper = r->upper = t->bt_psize; + l->flags = r->flags = h->flags & P_TYPE; + + /* Split the root page. */ + tp = bt_psplit(t, h, l, r, skip, ilen); + + *lp = l; + *rp = r; + return (tp); +} + +/* + * BT_RROOT -- Fix up the recno root page after it has been split. + * + * Parameters: + * t: tree + * h: root page + * l: left page + * r: right page + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +bt_rroot(t, h, l, r) + BTREE *t; + PAGE *h, *l, *r; +{ + char *dest; + + /* Insert the left and right keys, set the header information. */ + h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; + dest = (char *)h + h->upper; + WR_RINTERNAL(dest, + l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); + + h->linp[1] = h->upper -= NRINTERNAL; + dest = (char *)h + h->upper; + WR_RINTERNAL(dest, + r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); + + h->lower = BTDATAOFF + 2 * sizeof(indx_t); + + /* Unpin the root page, set to recno internal page. */ + h->flags &= ~P_TYPE; + h->flags |= P_RINTERNAL; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + return (RET_SUCCESS); +} + +/* + * BT_BROOT -- Fix up the btree root page after it has been split. + * + * Parameters: + * t: tree + * h: root page + * l: left page + * r: right page + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +bt_broot(t, h, l, r) + BTREE *t; + PAGE *h, *l, *r; +{ + BINTERNAL *bi; + BLEAF *bl; + u_int32_t nbytes; + char *dest; + + /* + * If the root page was a leaf page, change it into an internal page. + * We copy the key we split on (but not the key's data, in the case of + * a leaf page) to the new root page. + * + * The btree comparison code guarantees that the left-most key on any + * level of the tree is never used, so it doesn't need to be filled in. + */ + nbytes = NBINTERNAL(0); + h->linp[0] = h->upper = t->bt_psize - nbytes; + dest = (char *)h + h->upper; + WR_BINTERNAL(dest, 0, l->pgno, 0); + + switch (h->flags & P_TYPE) { + case P_BLEAF: + bl = GETBLEAF(r, 0); + nbytes = NBINTERNAL(bl->ksize); + h->linp[1] = h->upper -= nbytes; + dest = (char *)h + h->upper; + WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); + memmove(dest, bl->bytes, bl->ksize); + + /* + * If the key is on an overflow page, mark the overflow chain + * so it isn't deleted when the leaf copy of the key is deleted. + */ + if (bl->flags & P_BIGKEY && + bt_preserve(t, *(db_pgno_t *)bl->bytes) == RET_ERROR) + return (RET_ERROR); + break; + case P_BINTERNAL: + bi = GETBINTERNAL(r, 0); + nbytes = NBINTERNAL(bi->ksize); + h->linp[1] = h->upper -= nbytes; + dest = (char *)h + h->upper; + memmove(dest, bi, nbytes); + ((BINTERNAL *)dest)->pgno = r->pgno; + break; + default: + abort(); + } + + /* There are two keys on the page. */ + h->lower = BTDATAOFF + 2 * sizeof(indx_t); + + /* Unpin the root page, set to btree internal page. */ + h->flags &= ~P_TYPE; + h->flags |= P_BINTERNAL; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + return (RET_SUCCESS); +} + +/* + * BT_PSPLIT -- Do the real work of splitting the page. + * + * Parameters: + * t: tree + * h: page to be split + * l: page to put lower half of data + * r: page to put upper half of data + * pskip: pointer to index to leave open + * ilen: insert length + * + * Returns: + * Pointer to page in which to insert. + */ +static PAGE * +bt_psplit(t, h, l, r, pskip, ilen) + BTREE *t; + PAGE *h, *l, *r; + indx_t *pskip; + size_t ilen; +{ + BINTERNAL *bi; + BLEAF *bl; + CURSOR *c; + RLEAF *rl; + PAGE *rval; + void *src; + indx_t full, half, nxt, off, skip, top, used; + u_int32_t nbytes; + int bigkeycnt, isbigkey; + + /* + * Split the data to the left and right pages. Leave the skip index + * open. Additionally, make some effort not to split on an overflow + * key. This makes internal page processing faster and can save + * space as overflow keys used by internal pages are never deleted. + */ + bigkeycnt = 0; + skip = *pskip; + full = t->bt_psize - BTDATAOFF; + half = full / 2; + used = 0; + for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { + if (skip == off) { + nbytes = ilen; + isbigkey = 0; /* XXX: not really known. */ + } else + switch (h->flags & P_TYPE) { + case P_BINTERNAL: + src = bi = GETBINTERNAL(h, nxt); + nbytes = NBINTERNAL(bi->ksize); + isbigkey = bi->flags & P_BIGKEY; + break; + case P_BLEAF: + src = bl = GETBLEAF(h, nxt); + nbytes = NBLEAF(bl); + isbigkey = bl->flags & P_BIGKEY; + break; + case P_RINTERNAL: + src = GETRINTERNAL(h, nxt); + nbytes = NRINTERNAL; + isbigkey = 0; + break; + case P_RLEAF: + src = rl = GETRLEAF(h, nxt); + nbytes = NRLEAF(rl); + isbigkey = 0; + break; + default: + abort(); + } + + /* + * If the key/data pairs are substantial fractions of the max + * possible size for the page, it's possible to get situations + * where we decide to try and copy too much onto the left page. + * Make sure that doesn't happen. + */ + if (skip <= off && used + nbytes >= full || nxt == top - 1) { + --off; + break; + } + + /* Copy the key/data pair, if not the skipped index. */ + if (skip != off) { + ++nxt; + + l->linp[off] = l->upper -= nbytes; + memmove((char *)l + l->upper, src, nbytes); + } + + used += nbytes; + if (used >= half) { + if (!isbigkey || bigkeycnt == 3) + break; + else + ++bigkeycnt; + } + } + + /* + * Off is the last offset that's valid for the left page. + * Nxt is the first offset to be placed on the right page. + */ + l->lower += (off + 1) * sizeof(indx_t); + + /* + * If splitting the page that the cursor was on, the cursor has to be + * adjusted to point to the same record as before the split. If the + * cursor is at or past the skipped slot, the cursor is incremented by + * one. If the cursor is on the right page, it is decremented by the + * number of records split to the left page. + */ + c = &t->bt_cursor; + if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) { + if (c->pg.index >= skip) + ++c->pg.index; + if (c->pg.index < nxt) /* Left page. */ + c->pg.pgno = l->pgno; + else { /* Right page. */ + c->pg.pgno = r->pgno; + c->pg.index -= nxt; + } + } + + /* + * If the skipped index was on the left page, just return that page. + * Otherwise, adjust the skip index to reflect the new position on + * the right page. + */ + if (skip <= off) { + skip = 0; + rval = l; + } else { + rval = r; + *pskip -= nxt; + } + + for (off = 0; nxt < top; ++off) { + if (skip == nxt) { + ++off; + skip = 0; + } + switch (h->flags & P_TYPE) { + case P_BINTERNAL: + src = bi = GETBINTERNAL(h, nxt); + nbytes = NBINTERNAL(bi->ksize); + break; + case P_BLEAF: + src = bl = GETBLEAF(h, nxt); + nbytes = NBLEAF(bl); + break; + case P_RINTERNAL: + src = GETRINTERNAL(h, nxt); + nbytes = NRINTERNAL; + break; + case P_RLEAF: + src = rl = GETRLEAF(h, nxt); + nbytes = NRLEAF(rl); + break; + default: + abort(); + } + ++nxt; + r->linp[off] = r->upper -= nbytes; + memmove((char *)r + r->upper, src, nbytes); + } + r->lower += off * sizeof(indx_t); + + /* If the key is being appended to the page, adjust the index. */ + if (skip == top) + r->lower += sizeof(indx_t); + + return (rval); +} + +/* + * BT_PRESERVE -- Mark a chain of pages as used by an internal node. + * + * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the + * record that references them gets deleted. Chains pointed to by internal + * pages never get deleted. This routine marks a chain as pointed to by an + * internal page. + * + * Parameters: + * t: tree + * pg: page number of first page in the chain. + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +static int +bt_preserve(t, pg) + BTREE *t; + db_pgno_t pg; +{ + PAGE *h; + + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + h->flags |= P_PRESERVE; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); +} + +/* + * REC_TOTAL -- Return the number of recno entries below a page. + * + * Parameters: + * h: page + * + * Returns: + * The number of recno entries below a page. + * + * XXX + * These values could be set by the bt_psplit routine. The problem is that the + * entry has to be popped off of the stack etc. or the values have to be passed + * all the way back to bt_split/bt_rroot and it's not very clean. + */ +static recno_t +rec_total(h) + PAGE *h; +{ + recno_t recs; + indx_t nxt, top; + + for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) + recs += GETRINTERNAL(h, nxt)->nrecs; + return (recs); +} diff --git a/src/util/db2/btree/bt_utils.c b/src/util/db2/btree/bt_utils.c new file mode 100644 index 0000000000..1a34598ad6 --- /dev/null +++ b/src/util/db2/btree/bt_utils.c @@ -0,0 +1,260 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "btree.h" + +/* + * __bt_ret -- + * Build return key/data pair. + * + * Parameters: + * t: tree + * e: key/data pair to be returned + * key: user's key structure (NULL if not to be filled in) + * rkey: memory area to hold key + * data: user's data structure (NULL if not to be filled in) + * rdata: memory area to hold data + * copy: always copy the key/data item + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__bt_ret(t, e, key, rkey, data, rdata, copy) + BTREE *t; + EPG *e; + DBT *key, *rkey, *data, *rdata; + int copy; +{ + BLEAF *bl; + void *p; + + bl = GETBLEAF(e->page, e->index); + + /* + * We must copy big keys/data to make them contigous. Otherwise, + * leave the page pinned and don't copy unless the user specified + * concurrent access. + */ + if (key == NULL) + goto dataonly; + + if (bl->flags & P_BIGKEY) { + if (__ovfl_get(t, bl->bytes, + &key->size, &rkey->data, &rkey->size)) + return (RET_ERROR); + key->data = rkey->data; + } else if (copy || F_ISSET(t, B_DB_LOCK)) { + if (bl->ksize > rkey->size) { + p = (void *)(rkey->data == NULL ? + malloc(bl->ksize) : realloc(rkey->data, bl->ksize)); + if (p == NULL) + return (RET_ERROR); + rkey->data = p; + rkey->size = bl->ksize; + } + memmove(rkey->data, bl->bytes, bl->ksize); + key->size = bl->ksize; + key->data = rkey->data; + } else { + key->size = bl->ksize; + key->data = bl->bytes; + } + +dataonly: + if (data == NULL) + return (RET_SUCCESS); + + if (bl->flags & P_BIGDATA) { + if (__ovfl_get(t, bl->bytes + bl->ksize, + &data->size, &rdata->data, &rdata->size)) + return (RET_ERROR); + data->data = rdata->data; + } else if (copy || F_ISSET(t, B_DB_LOCK)) { + /* Use +1 in case the first record retrieved is 0 length. */ + if (bl->dsize + 1 > rdata->size) { + p = (void *)(rdata->data == NULL ? + malloc(bl->dsize + 1) : + realloc(rdata->data, bl->dsize + 1)); + if (p == NULL) + return (RET_ERROR); + rdata->data = p; + rdata->size = bl->dsize + 1; + } + memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize); + data->size = bl->dsize; + data->data = rdata->data; + } else { + data->size = bl->dsize; + data->data = bl->bytes + bl->ksize; + } + + return (RET_SUCCESS); +} + +/* + * __BT_CMP -- Compare a key to a given record. + * + * Parameters: + * t: tree + * k1: DBT pointer of first arg to comparison + * e: pointer to EPG for comparison + * + * Returns: + * < 0 if k1 is < record + * = 0 if k1 is = record + * > 0 if k1 is > record + */ +int +__bt_cmp(t, k1, e) + BTREE *t; + const DBT *k1; + EPG *e; +{ + BINTERNAL *bi; + BLEAF *bl; + DBT k2; + PAGE *h; + void *bigkey; + + /* + * The left-most key on internal pages, at any level of the tree, is + * guaranteed by the following code to be less than any user key. + * This saves us from having to update the leftmost key on an internal + * page when the user inserts a new key in the tree smaller than + * anything we've yet seen. + */ + h = e->page; + if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF)) + return (1); + + bigkey = NULL; + if (h->flags & P_BLEAF) { + bl = GETBLEAF(h, e->index); + if (bl->flags & P_BIGKEY) + bigkey = bl->bytes; + else { + k2.data = bl->bytes; + k2.size = bl->ksize; + } + } else { + bi = GETBINTERNAL(h, e->index); + if (bi->flags & P_BIGKEY) + bigkey = bi->bytes; + else { + k2.data = bi->bytes; + k2.size = bi->ksize; + } + } + + if (bigkey) { + if (__ovfl_get(t, bigkey, + &k2.size, &t->bt_rdata.data, &t->bt_rdata.size)) + return (RET_ERROR); + k2.data = t->bt_rdata.data; + } + return ((*t->bt_cmp)(k1, &k2)); +} + +/* + * __BT_DEFCMP -- Default comparison routine. + * + * Parameters: + * a: DBT #1 + * b: DBT #2 + * + * Returns: + * < 0 if a is < b + * = 0 if a is = b + * > 0 if a is > b + */ +int +__bt_defcmp(a, b) + const DBT *a, *b; +{ + register size_t len; + register u_char *p1, *p2; + + /* + * XXX + * If a size_t doesn't fit in an int, this routine can lose. + * What we need is a integral type which is guaranteed to be + * larger than a size_t, and there is no such thing. + */ + len = MIN(a->size, b->size); + for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) + if (*p1 != *p2) + return ((int)*p1 - (int)*p2); + return ((int)a->size - (int)b->size); +} + +/* + * __BT_DEFPFX -- Default prefix routine. + * + * Parameters: + * a: DBT #1 + * b: DBT #2 + * + * Returns: + * Number of bytes needed to distinguish b from a. + */ +size_t +__bt_defpfx(a, b) + const DBT *a, *b; +{ + register u_char *p1, *p2; + register size_t cnt, len; + + cnt = 1; + len = MIN(a->size, b->size); + for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt) + if (*p1 != *p2) + return (cnt); + + /* a->size must be <= b->size, or they wouldn't be in this order. */ + return (a->size < b->size ? a->size + 1 : a->size); +} diff --git a/src/util/db2/btree/btree.h b/src/util/db2/btree/btree.h new file mode 100644 index 0000000000..171712749f --- /dev/null +++ b/src/util/db2/btree/btree.h @@ -0,0 +1,383 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)btree.h 8.11 (Berkeley) 8/17/94 + */ + +/* Macros to set/clear/test flags. */ +#define F_SET(p, f) (p)->flags |= (f) +#define F_CLR(p, f) (p)->flags &= ~(f) +#define F_ISSET(p, f) ((p)->flags & (f)) + +#include "mpool.h" + +#define DEFMINKEYPAGE (2) /* Minimum keys per page */ +#define MINCACHE (5) /* Minimum cached pages */ +#define MINPSIZE (512) /* Minimum page size */ + +/* + * Page 0 of a btree file contains a copy of the meta-data. This page is also + * used as an out-of-band page, i.e. page pointers that point to nowhere point + * to page 0. Page 1 is the root of the btree. + */ +#define P_INVALID 0 /* Invalid tree page number. */ +#define P_META 0 /* Tree metadata page number. */ +#define P_ROOT 1 /* Tree root page number. */ + +/* + * There are five page layouts in the btree: btree internal pages (BINTERNAL), + * btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages + * (RLEAF) and overflow pages. All five page types have a page header (PAGE). + * This implementation requires that values within structures NOT be padded. + * (ANSI C permits random padding.) If your compiler pads randomly you'll have + * to do some work to get this package to run. + */ +typedef struct _page { + db_pgno_t pgno; /* this page's page number */ + db_pgno_t prevpg; /* left sibling */ + db_pgno_t nextpg; /* right sibling */ + +#define P_BINTERNAL 0x01 /* btree internal page */ +#define P_BLEAF 0x02 /* leaf page */ +#define P_OVERFLOW 0x04 /* overflow page */ +#define P_RINTERNAL 0x08 /* recno internal page */ +#define P_RLEAF 0x10 /* leaf page */ +#define P_TYPE 0x1f /* type mask */ +#define P_PRESERVE 0x20 /* never delete this chain of pages */ + u_int32_t flags; + + indx_t lower; /* lower bound of free space on page */ + indx_t upper; /* upper bound of free space on page */ + indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */ +} PAGE; + +/* First and next index. */ +#define BTDATAOFF \ + (sizeof(db_pgno_t) + sizeof(db_pgno_t) + sizeof(db_pgno_t) + \ + sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) +#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t)) + +/* + * For pages other than overflow pages, there is an array of offsets into the + * rest of the page immediately following the page header. Each offset is to + * an item which is unique to the type of page. The h_lower offset is just + * past the last filled-in index. The h_upper offset is the first item on the + * page. Offsets are from the beginning of the page. + * + * If an item is too big to store on a single page, a flag is set and the item + * is a { page, size } pair such that the page is the first page of an overflow + * chain with size bytes of item. Overflow pages are simply bytes without any + * external structure. + * + * The page number and size fields in the items are db_pgno_t-aligned so they can + * be manipulated without copying. (This presumes that 32 bit items can be + * manipulated on this system.) + */ +#define LALIGN(n) (((n) + sizeof(db_pgno_t) - 1) & ~(sizeof(db_pgno_t) - 1)) +#define NOVFLSIZE (sizeof(db_pgno_t) + sizeof(u_int32_t)) + +/* + * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno} + * pairs, such that the key compares less than or equal to all of the records + * on that page. For a tree without duplicate keys, an internal page with two + * consecutive keys, a and b, will have all records greater than or equal to a + * and less than b stored on the page associated with a. Duplicate keys are + * somewhat special and can cause duplicate internal and leaf page records and + * some minor modifications of the above rule. + */ +typedef struct _binternal { + u_int32_t ksize; /* key size */ + db_pgno_t pgno; /* page number stored on */ +#define P_BIGDATA 0x01 /* overflow data */ +#define P_BIGKEY 0x02 /* overflow key */ + u_char flags; + char bytes[1]; /* data */ +} BINTERNAL; + +/* Get the page's BINTERNAL structure at index indx. */ +#define GETBINTERNAL(pg, indx) \ + ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NBINTERNAL(len) \ + LALIGN(sizeof(u_int32_t) + sizeof(db_pgno_t) + sizeof(u_char) + (len)) + +/* Copy a BINTERNAL entry to the page. */ +#define WR_BINTERNAL(p, size, pgno, flags) { \ + *(u_int32_t *)p = size; \ + p += sizeof(u_int32_t); \ + *(db_pgno_t *)p = pgno; \ + p += sizeof(db_pgno_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ +} + +/* + * For the recno internal pages, the item is a page number with the number of + * keys found on that page and below. + */ +typedef struct _rinternal { + recno_t nrecs; /* number of records */ + db_pgno_t pgno; /* page number stored below */ +} RINTERNAL; + +/* Get the page's RINTERNAL structure at index indx. */ +#define GETRINTERNAL(pg, indx) \ + ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NRINTERNAL \ + LALIGN(sizeof(recno_t) + sizeof(db_pgno_t)) + +/* Copy a RINTERAL entry to the page. */ +#define WR_RINTERNAL(p, nrecs, pgno) { \ + *(recno_t *)p = nrecs; \ + p += sizeof(recno_t); \ + *(db_pgno_t *)p = pgno; \ +} + +/* For the btree leaf pages, the item is a key and data pair. */ +typedef struct _bleaf { + u_int32_t ksize; /* size of key */ + u_int32_t dsize; /* size of data */ + u_char flags; /* P_BIGDATA, P_BIGKEY */ + char bytes[1]; /* data */ +} BLEAF; + +/* Get the page's BLEAF structure at index indx. */ +#define GETBLEAF(pg, indx) \ + ((BLEAF *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize) + +/* Get the number of bytes in the user's key/data pair. */ +#define NBLEAFDBT(ksize, dsize) \ + LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \ + (ksize) + (dsize)) + +/* Copy a BLEAF entry to the page. */ +#define WR_BLEAF(p, key, data, flags) { \ + *(u_int32_t *)p = key->size; \ + p += sizeof(u_int32_t); \ + *(u_int32_t *)p = data->size; \ + p += sizeof(u_int32_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, key->data, key->size); \ + p += key->size; \ + memmove(p, data->data, data->size); \ +} + +/* For the recno leaf pages, the item is a data entry. */ +typedef struct _rleaf { + u_int32_t dsize; /* size of data */ + u_char flags; /* P_BIGDATA */ + char bytes[1]; +} RLEAF; + +/* Get the page's RLEAF structure at index indx. */ +#define GETRLEAF(pg, indx) \ + ((RLEAF *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NRLEAF(p) NRLEAFDBT((p)->dsize) + +/* Get the number of bytes from the user's data. */ +#define NRLEAFDBT(dsize) \ + LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize)) + +/* Copy a RLEAF entry to the page. */ +#define WR_RLEAF(p, data, flags) { \ + *(u_int32_t *)p = data->size; \ + p += sizeof(u_int32_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, data->data, data->size); \ +} + +/* + * A record in the tree is either a pointer to a page and an index in the page + * or a page number and an index. These structures are used as a cursor, stack + * entry and search returns as well as to pass records to other routines. + * + * One comment about searches. Internal page searches must find the largest + * record less than key in the tree so that descents work. Leaf page searches + * must find the smallest record greater than key so that the returned index + * is the record's correct position for insertion. + */ +typedef struct _epgno { + db_pgno_t pgno; /* the page number */ + indx_t index; /* the index on the page */ +} EPGNO; + +typedef struct _epg { + PAGE *page; /* the (pinned) page */ + indx_t index; /* the index on the page */ +} EPG; + +/* + * About cursors. The cursor (and the page that contained the key/data pair + * that it referenced) can be deleted, which makes things a bit tricky. If + * there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set + * or there simply aren't any duplicates of the key) we copy the key that it + * referenced when it's deleted, and reacquire a new cursor key if the cursor + * is used again. If there are duplicates keys, we move to the next/previous + * key, and set a flag so that we know what happened. NOTE: if duplicate (to + * the cursor) keys are added to the tree during this process, it is undefined + * if they will be returned or not in a cursor scan. + * + * The flags determine the possible states of the cursor: + * + * CURS_INIT The cursor references *something*. + * CURS_ACQUIRE The cursor was deleted, and a key has been saved so that + * we can reacquire the right position in the tree. + * CURS_AFTER, CURS_BEFORE + * The cursor was deleted, and now references a key/data pair + * that has not yet been returned, either before or after the + * deleted key/data pair. + * XXX + * This structure is broken out so that we can eventually offer multiple + * cursors as part of the DB interface. + */ +typedef struct _cursor { + EPGNO pg; /* B: Saved tree reference. */ + DBT key; /* B: Saved key, or key.data == NULL. */ + recno_t rcursor; /* R: recno cursor (1-based) */ + +#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */ +#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */ +#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */ +#define CURS_INIT 0x08 /* RB: Cursor initialized. */ + u_int8_t flags; +} CURSOR; + +/* + * The metadata of the tree. The nrecs field is used only by the RECNO code. + * This is because the btree doesn't really need it and it requires that every + * put or delete call modify the metadata. + */ +typedef struct _btmeta { + u_int32_t magic; /* magic number */ + u_int32_t version; /* version */ + u_int32_t psize; /* page size */ + u_int32_t free; /* page number of first free page */ + u_int32_t nrecs; /* R: number of records */ + +#define SAVEMETA (B_NODUPS | R_RECNO) + u_int32_t flags; /* bt_flags & SAVEMETA */ +} BTMETA; + +/* The in-memory btree/recno data structure. */ +typedef struct _btree { + MPOOL *bt_mp; /* memory pool cookie */ + + DB *bt_dbp; /* pointer to enclosing DB */ + + EPG bt_cur; /* current (pinned) page */ + PAGE *bt_pinned; /* page pinned across calls */ + + CURSOR bt_cursor; /* cursor */ + +#define BT_PUSH(t, p, i) { \ + t->bt_sp->pgno = p; \ + t->bt_sp->index = i; \ + ++t->bt_sp; \ +} +#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp) +#define BT_CLR(t) (t->bt_sp = t->bt_stack) + EPGNO bt_stack[50]; /* stack of parent pages */ + EPGNO *bt_sp; /* current stack pointer */ + + DBT bt_rkey; /* returned key */ + DBT bt_rdata; /* returned data */ + + int bt_fd; /* tree file descriptor */ + + db_pgno_t bt_free; /* next free page */ + u_int32_t bt_psize; /* page size */ + indx_t bt_ovflsize; /* cut-off for key/data overflow */ + int bt_lorder; /* byte order */ + /* sorted order */ + enum { NOT, BACK, FORWARD } bt_order; + EPGNO bt_last; /* last insert */ + + /* B: key comparison function */ + int (*bt_cmp) __P((const DBT *, const DBT *)); + /* B: prefix comparison function */ + size_t (*bt_pfx) __P((const DBT *, const DBT *)); + /* R: recno input function */ + int (*bt_irec) __P((struct _btree *, recno_t)); + + FILE *bt_rfp; /* R: record FILE pointer */ + int bt_rfd; /* R: record file descriptor */ + + caddr_t bt_cmap; /* R: current point in mapped space */ + caddr_t bt_smap; /* R: start of mapped space */ + caddr_t bt_emap; /* R: end of mapped space */ + size_t bt_msize; /* R: size of mapped region. */ + + recno_t bt_nrecs; /* R: number of records */ + size_t bt_reclen; /* R: fixed record length */ + u_char bt_bval; /* R: delimiting byte/pad character */ + +/* + * NB: + * B_NODUPS and R_RECNO are stored on disk, and may not be changed. + */ +#define B_INMEM 0x00001 /* in-memory tree */ +#define B_METADIRTY 0x00002 /* need to write metadata */ +#define B_MODIFIED 0x00004 /* tree modified */ +#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */ +#define B_RDONLY 0x00010 /* read-only tree */ + +#define B_NODUPS 0x00020 /* no duplicate keys permitted */ +#define R_RECNO 0x00080 /* record oriented tree */ + +#define R_CLOSEFP 0x00040 /* opened a file pointer */ +#define R_EOF 0x00100 /* end of input file reached. */ +#define R_FIXLEN 0x00200 /* fixed length records */ +#define R_MEMMAPPED 0x00400 /* memory mapped file. */ +#define R_INMEM 0x00800 /* in-memory file */ +#define R_MODIFIED 0x01000 /* modified file */ +#define R_RDONLY 0x02000 /* read-only file */ + +#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */ +#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */ +#define B_DB_TXN 0x10000 /* DB_TXN specified. */ + u_int32_t flags; +} BTREE; + +#include "extern.h" diff --git a/src/util/db2/btree/extern.h b/src/util/db2/btree/extern.h new file mode 100644 index 0000000000..9da8486e3a --- /dev/null +++ b/src/util/db2/btree/extern.h @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.11 (Berkeley) 1/9/95 + */ + +int __bt_close __P((DB *)); +int __bt_cmp __P((BTREE *, const DBT *, EPG *)); +int __bt_crsrdel __P((BTREE *, EPGNO *)); +int __bt_defcmp __P((const DBT *, const DBT *)); +size_t __bt_defpfx __P((const DBT *, const DBT *)); +int __bt_delete __P((const DB *, const DBT *, u_int)); +int __bt_dleaf __P((BTREE *, const DBT *, PAGE *, u_int)); +int __bt_fd __P((const DB *)); +int __bt_free __P((BTREE *, PAGE *)); +int __bt_get __P((const DB *, const DBT *, DBT *, u_int)); +PAGE *__bt_new __P((BTREE *, db_pgno_t *)); +void __bt_pgin __P((void *, db_pgno_t, void *)); +void __bt_pgout __P((void *, db_pgno_t, void *)); +int __bt_push __P((BTREE *, db_pgno_t, int)); +int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int)); +int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int)); +EPG *__bt_search __P((BTREE *, const DBT *, int *)); +int __bt_seq __P((const DB *, DBT *, DBT *, u_int)); +void __bt_setcur __P((BTREE *, db_pgno_t, u_int)); +int __bt_split __P((BTREE *, PAGE *, + const DBT *, const DBT *, int, size_t, u_int32_t)); +int __bt_sync __P((const DB *, u_int)); + +int __ovfl_delete __P((BTREE *, void *)); +int __ovfl_get __P((BTREE *, void *, size_t *, void **, size_t *)); +int __ovfl_put __P((BTREE *, const DBT *, db_pgno_t *)); + +#ifdef DEBUG +int __bt_dnpage __P((DB *, db_pgno_t)); +int __bt_dpage __P((PAGE *)); +int __bt_dump __P((DB *)); +#endif +#ifdef STATISTICS +int __bt_stat __P((DB *)); +#endif diff --git a/src/util/db2/clib/memmove.c b/src/util/db2/clib/memmove.c new file mode 100644 index 0000000000..e8384f7c81 --- /dev/null +++ b/src/util/db2/clib/memmove.c @@ -0,0 +1,138 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bcopy.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <string.h> + +/* + * sizeof(word) MUST BE A POWER OF TWO + * SO THAT wmask BELOW IS ALL ONES + */ +typedef int word; /* "word" used for optimal copy speed */ + +#define wsize sizeof(word) +#define wmask (wsize - 1) + +/* + * Copy a block of memory, handling overlap. + * This is the routine that actually implements + * (the portable versions of) bcopy, memcpy, and memmove. + */ +#ifdef MEMCOPY +void * +memcpy(dst0, src0, length) +#else +#ifdef MEMMOVE +void * +memmove(dst0, src0, length) +#else +void +bcopy(src0, dst0, length) +#endif +#endif + void *dst0; + const void *src0; + register size_t length; +{ + register char *dst = dst0; + register const char *src = src0; + register size_t t; + + if (length == 0 || dst == src) /* nothing to do */ + goto done; + + /* + * Macros: loop-t-times; and loop-t-times, t>0 + */ +#define TLOOP(s) if (t) TLOOP1(s) +#define TLOOP1(s) do { s; } while (--t) + + if ((unsigned long)dst < (unsigned long)src) { + /* + * Copy forward. + */ + t = (int)src; /* only need low bits */ + if ((t | (int)dst) & wmask) { + /* + * Try to align operands. This cannot be done + * unless the low bits match. + */ + if ((t ^ (int)dst) & wmask || length < wsize) + t = length; + else + t = wsize - (t & wmask); + length -= t; + TLOOP1(*dst++ = *src++); + } + /* + * Copy whole words, then mop up any trailing bytes. + */ + t = length / wsize; + TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize); + t = length & wmask; + TLOOP(*dst++ = *src++); + } else { + /* + * Copy backwards. Otherwise essentially the same. + * Alignment works as before, except that it takes + * (t&wmask) bytes to align, not wsize-(t&wmask). + */ + src += length; + dst += length; + t = (int)src; + if ((t | (int)dst) & wmask) { + if ((t ^ (int)dst) & wmask || length <= wsize) + t = length; + else + t &= wmask; + length -= t; + TLOOP1(*--dst = *--src); + } + t = length / wsize; + TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src); + t = length & wmask; + TLOOP(*--dst = *--src); + } +done: +#if defined(MEMCOPY) || defined(MEMMOVE) + return (dst0); +#else + return; +#endif +} diff --git a/src/util/db2/clib/mkstemp.c b/src/util/db2/clib/mkstemp.c new file mode 100644 index 0000000000..07d4186a9d --- /dev/null +++ b/src/util/db2/clib/mkstemp.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 1987, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)mktemp.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <ctype.h> + +static int _gettemp(); + +mkstemp(path) + char *path; +{ + int fd; + + return (_gettemp(path, &fd) ? fd : -1); +} + +static +_gettemp(path, doopen) + char *path; + register int *doopen; +{ + extern int errno; + register char *start, *trv; + struct stat sbuf; + u_int pid; + + pid = getpid(); + for (trv = path; *trv; ++trv); /* extra X's get set to 0's */ + while (*--trv == 'X') { + *trv = (pid % 10) + '0'; + pid /= 10; + } + + /* + * check the target directory; if you have six X's and it + * doesn't exist this runs for a *very* long time. + */ + for (start = trv + 1;; --trv) { + if (trv <= path) + break; + if (*trv == '/') { + *trv = '\0'; + if (stat(path, &sbuf)) + return(0); + if (!S_ISDIR(sbuf.st_mode)) { + errno = ENOTDIR; + return(0); + } + *trv = '/'; + break; + } + } + + for (;;) { + if (doopen) { + if ((*doopen = + open(path, O_CREAT|O_EXCL|O_RDWR, 0600)) >= 0) + return(1); + if (errno != EEXIST) + return(0); + } + else if (stat(path, &sbuf)) + return(errno == ENOENT ? 1 : 0); + + /* tricky little algorithm for backward compatibility */ + for (trv = start;;) { + if (!*trv) + return(0); + if (*trv == 'z') + *trv++ = 'a'; + else { + if (isdigit(*trv)) + *trv = 'a'; + else + ++*trv; + break; + } + } + } + /*NOTREACHED*/ +} diff --git a/src/util/db2/clib/strerror.c b/src/util/db2/clib/strerror.c new file mode 100644 index 0000000000..53f374bdf7 --- /dev/null +++ b/src/util/db2/clib/strerror.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)strerror.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <string.h> + +char * +strerror(num) + int num; +{ + extern int sys_nerr; + extern char *sys_errlist[]; +#define UPREFIX "Unknown error: " + static char ebuf[40] = UPREFIX; /* 64-bit number + slop */ + register unsigned int errnum; + register char *p, *t; + char tmp[40]; + + errnum = num; /* convert to unsigned */ + if (errnum < sys_nerr) + return(sys_errlist[errnum]); + + /* Do this by hand, so we don't include stdio(3). */ + t = tmp; + do { + *t++ = "0123456789"[errnum % 10]; + } while (errnum /= 10); + for (p = ebuf + sizeof(UPREFIX) - 1;;) { + *p++ = *--t; + if (t <= tmp) + break; + } + return(ebuf); +} diff --git a/src/util/db2/configure.in b/src/util/db2/configure.in new file mode 100644 index 0000000000..3d256c40b7 --- /dev/null +++ b/src/util/db2/configure.in @@ -0,0 +1,63 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT(db/db.c) +AC_CONFIG_HEADER(obj/db-config.h) +dnl checks for programs +AC_PROG_CC +AC_PROG_RANLIB + +AC_PATH_PROG(FALSE,false,:) +AC_PATH_PROG(SH,sh,$FALSE) +AC_PATH_PROG(SH5,sh5,$FALSE) +AC_PATH_PROG(BASH,bash,$FALSE) + +AC_CACHE_CHECK([checking for shell with functions],local_cv_program_fctsh, +[if $SH -c 'foo() { true; }; foo' > /dev/null 2>&1; then + local_cv_program_fctsh=$SH +else + if $SH5 -c 'foo() { true; }; foo' > /dev/null 2>&1; then + local_cv_program_fctsh=$SH5 + else + if $BASH -c 'foo() { true; }; foo' > /dev/null 2>&1; then + local_cv_program_fctsh=$BASH + else + local_cv_program_fctsh=$FALSE + fi + fi +fi]) + +FCTSH=$local_cv_program_fctsh +AC_SUBST(FCTSH) + +dnl checks for libraries +dnl checks for header files +AC_CHECK_HEADERS(unistd.h) +dnl checks for typedefs +AC_TYPE_SIZE_T + +AC_CHECK_TYPE(ssize_t, int) + +AC_CHECK_TYPE(u_char, unsigned char) +AC_CHECK_TYPE(u_short, unsigned short) +AC_CHECK_TYPE(u_int, unsigned int) +AC_CHECK_TYPE(u_long, unsigned long) + +AC_CHECK_TYPE(int8_t, signed char) +AC_CHECK_TYPE(u_int8_t, unsigned char) +AC_CHECK_TYPE(int16_t, short) +AC_CHECK_TYPE(u_int16_t, unsigned short) +AC_CHECK_TYPE(int32_t, int) +AC_CHECK_TYPE(u_int32_t, unsigned int) +dnl checks for structures +dnl checks for compiler characteristics +AC_C_BIGENDIAN +AC_C_CONST +dnl checks for library functions +AC_CHECK_FUNC(memmove,,MEMMOVE_OBJ=memmove.o) +AC_CHECK_FUNC(mkstemp,,MKSTEMP_OBJ=mkstemp.o) +AC_CHECK_FUNC(strerror,,STRERROR_OBJ=strerror.o) +AC_SUBST(MEMMOVE_OBJ) +AC_SUBST(MKSTEMP_OBJ) +AC_SUBST(STRERROR_OBJ) +AC_CHECK_FUNCS(memmove mkstemp strerror) +dnl checks for system services +AC_OUTPUT(Makefile obj/Makefile) diff --git a/src/util/db2/db/Makefile.inc b/src/util/db2/db/Makefile.inc new file mode 100644 index 0000000000..59478ba198 --- /dev/null +++ b/src/util/db2/db/Makefile.inc @@ -0,0 +1,5 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/db + +SRCS+= db.c diff --git a/src/util/db2/db/db.c b/src/util/db2/db/db.c new file mode 100644 index 0000000000..dc38abdc4e --- /dev/null +++ b/src/util/db2/db/db.c @@ -0,0 +1,99 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> + +#include "db-int.h" + +DB * +dbopen(fname, flags, mode, type, openinfo) + const char *fname; + int flags, mode; + DBTYPE type; + const void *openinfo; +{ + +#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN) +#define USE_OPEN_FLAGS \ + (O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \ + O_RDWR | O_SHLOCK | O_TRUNC) + + if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0) + switch (type) { + case DB_BTREE: + return (__bt_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + case DB_HASH: + return (__hash_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + case DB_RECNO: + return (__rec_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + } + errno = EINVAL; + return (NULL); +} + +static int +__dberr() +{ + return (RET_ERROR); +} + +/* + * __DBPANIC -- Stop. + * + * Parameters: + * dbp: pointer to the DB structure. + */ +void +__dbpanic(dbp) + DB *dbp; +{ + /* The only thing that can succeed is a close. */ + dbp->del = (int (*)())__dberr; + dbp->fd = (int (*)())__dberr; + dbp->get = (int (*)())__dberr; + dbp->put = (int (*)())__dberr; + dbp->seq = (int (*)())__dberr; + dbp->sync = (int (*)())__dberr; +} diff --git a/src/util/db2/docs/btree.3.ps b/src/util/db2/docs/btree.3.ps new file mode 100644 index 0000000000..c79c97232c --- /dev/null +++ b/src/util/db2/docs/btree.3.ps @@ -0,0 +1,366 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.08 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.08 0 +%%Pages: 2 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.08 0 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 def/PL +792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron/scaron/zcaron +/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/space +/exclam/quotedbl/numbersign/dollar/percent/ampersand/quoteright/parenleft +/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four +/five/six/seven/eight/nine/colon/semicolon/less/equal/greater/question/at/A/B/C +/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash +/bracketright/circumflex/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q +/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase +/guillemotleft/guillemotright/bullet/florin/fraction/perthousand/dagger +/daggerdbl/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar +/section/dieresis/copyright/ordfeminine/guilsinglleft/logicalnot/minus +/registered/macron/degree/plusminus/twosuperior/threesuperior/acute/mu +/paragraph/periodcentered/cedilla/onesuperior/ordmasculine/guilsinglright +/onequarter/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde +/Adieresis/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute +/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls +/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute +/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve +/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex +/udieresis/yacute/thorn/ydieresis]def/Times-Italic@0 ENC0/Times-Italic RE +/Times-Bold@0 ENC0/Times-Bold RE/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 132.34(BTREE\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 132.34(anual BTREE\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 84 S +(ME).18 E F0(btree \255 btree database access method)108 96 Q F1(SYNOPSIS)72 +112.8 Q/F2 10/Times-Bold@0 SF(#include <sys/types.h>)108 124.8 Q(#include <db) +108 136.8 Q(.h>)-.4 E F1(DESCRIPTION)72 153.6 Q F0 .198(The routine)108 165.6 R +/F3 10/Times-Italic@0 SF(dbopen)2.698 E F0 .198(is the library interf)2.698 F +.198(ace to database \214les.)-.1 F .198 +(One of the supported \214le formats is btree \214les.)5.198 F .974 +(The general description of the database access methods is in)108 177.6 R F3 +(dbopen)3.475 E F0 .975(\(3\), this manual page describes only).24 F +(the btree speci\214c information.)108 189.6 Q(The btree data structure is a s\ +orted, balanced tree structure storing associated k)108 206.4 Q -.15(ey)-.1 G +(/data pairs.).15 E .504(The btree access method speci\214c data structure pro) +108 223.2 R .504(vided to)-.15 F F3(dbopen)3.004 E F0 .503 +(is de\214ned in the <db)3.003 F .503(.h> include \214le as)-.4 F(follo)108 +235.2 Q(ws:)-.25 E(typedef struct {)108 252 Q(u_long \215ags;)144 264 Q +(u_int cachesize;)144 276 Q(inde)144 288 Q(x_t psize;)-.15 E(int lorder;)144 +300 Q(int mink)144 312 Q -.15(ey)-.1 G(page;).15 E +(int \(*compare\)\(const DBT *k)144 324 Q -.15(ey)-.1 G(1, const DBT *k).15 E +-.15(ey)-.1 G(2\);).15 E(int \(*pre\214x\)\(const DBT *k)144 336 Q -.15(ey)-.1 +G(1, const DBT *k).15 E -.15(ey)-.1 G(2\);).15 E 2.5(}B)108 348 S(TREEINFO;) +121.97 348 Q(The elements of this structure are as follo)108 364.8 Q(ws:)-.25 E +14.61(\215ags The)108 381.6 R(\215ag v)2.5 E(alue is speci\214ed by)-.25 E F3 +(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)313.2 381.6 S(he follo)321.81 +381.6 Q(wing v)-.25 E(alues:)-.25 E(R_DUP)144 398.4 Q 1.296(Permit duplicate k) +180 410.4 R -.15(ey)-.1 G 3.796(si).15 G 3.796(nt)275.578 410.4 S 1.296 +(he tree, i.e. permit insertion if the k)287.154 410.4 R 1.596 -.15(ey t)-.1 H +3.796(ob).15 G 3.796(ei)466.878 410.4 S 1.296(nserted already)477.894 410.4 R +-.15(ex)180 422.4 S 1.935(ists in the tree.).15 F 1.935(The def)6.935 F 1.935 +(ault beha)-.1 F(vior)-.2 E 4.435(,a)-.4 G 4.435(sd)358.215 422.4 S 1.935 +(escribed in)371.54 422.4 R F3(dbopen)4.435 E F0 1.935(\(3\), is to o).24 F +-.15(ve)-.15 G 1.935(rwrite a).15 F .148(matching k)180 434.4 R .448 -.15(ey w) +-.1 H .148(hen inserting a ne).15 F 2.649(wk)-.25 G .449 -.15(ey o)329.709 +434.4 T 2.649(rt).15 G 2.649(of)355.407 434.4 S .149(ail if the R_NOO)366.286 +434.4 R(VER)-.5 E .149(WRITE \215ag is speci-)-.55 F 5.972(\214ed. The)180 +446.4 R 3.472(R_DUP \215ag is o)5.972 F -.15(ve)-.15 G 3.472 +(rridden by the R_NOO).15 F(VER)-.5 E 3.471(WRITE \215ag, and if the)-.55 F +(R_NOO)180 458.4 Q(VER)-.5 E .781 +(WRITE \215ag is speci\214ed, attempts to insert duplicate k)-.55 F -.15(ey)-.1 +G 3.282(si).15 G .782(nto the tree will)474.604 458.4 R -.1(fa)180 470.4 S(il.) +.1 E 1.13(If the database contains duplicate k)180 487.2 R -.15(ey)-.1 G 1.129 +(s, the order of retrie).15 F -.25(va)-.25 G 3.629(lo).25 G 3.629(fk)439.644 +487.2 S -.15(ey)451.503 487.2 S 1.129(/data pairs is unde-).15 F .837 +(\214ned if the)180 499.2 R F3 -.1(ge)3.337 G(t).1 E F0 .837 +(routine is used, ho)3.337 F(we)-.25 E -.15(ve)-.25 G -.4(r,).15 G F3(seq)3.737 +E F0 .838(routine calls with the R_CURSOR \215ag set)3.337 F(will al)180 511.2 +Q -.1(wa)-.1 G(ys return the logical `).1 E(`\214rst')-.74 E 2.5('o)-.74 G 2.5 +(fa)333.85 511.2 S .3 -.15(ny g)344.12 511.2 T(roup of duplicate k).15 E -.15 +(ey)-.1 G(s.).15 E(cachesize)108 528 Q 3.056(As)144 540 S .556 +(uggested maximum size \(in bytes\) of the memory cache.)158.166 540 R .555 +(This v)5.556 F .555(alue is)-.25 F F2(only)3.055 E F0(advisory)3.055 E 3.055 +(,a)-.65 G .555(nd the)514.725 540 R .759 +(access method will allocate more memory rather than f)144 552 R 3.259 +(ail. Since)-.1 F -2.15 -.25(ev e)3.259 H .76(ry search e).25 F .76 +(xamines the root)-.15 F .055 +(page of the tree, caching the most recently used pages substantially impro)144 +564 R -.15(ve)-.15 G 2.554(sa).15 G .054(ccess time.)459.578 564 R .054 +(In addi-)5.054 F .661(tion, ph)144 576 R .662(ysical writes are delayed as lo\ +ng as possible, so a moderate cache can reduce the number)-.05 F .601 +(of I/O operations signi\214cantly)144 588 R 5.601(.O)-.65 G -.15(bv)280.744 +588 S(iously).15 E 3.101(,u)-.65 G .601(sing a cache increases \(b)324.995 588 +R .6(ut only increases\) the lik)-.2 F(eli-)-.1 E .19(hood of corruption or lo\ +st data if the system crashes while a tree is being modi\214ed.)144 600 R(If) +5.191 E F3(cac)2.691 E(hesize)-.15 E F0(is)2.691 E 2.5(0\()144 612 S +(no size is speci\214ed\) a def)154.83 612 Q(ault cache is used.)-.1 E 12.95 +(psize P)108 628.8 R .45 +(age size is the size \(in bytes\) of the pages used for nodes in the tree.) +-.15 F .449(The minimum page size is)5.449 F .442 +(512 bytes and the maximum page size is 64K.)144 640.8 R(If)5.442 E F3(psize) +2.942 E F0 .442(is 0 \(no page size is speci\214ed\) a page size)2.942 F +(is chosen based on the underlying \214le system I/O block size.)144 652.8 Q +9.62(lorder The)108 669.6 R 1.597(byte order for inte)4.097 F 1.596 +(gers in the stored database metadata.)-.15 F 1.596 +(The number should represent the)6.596 F .688(order as an inte)144 681.6 R .689 +(ger; for e)-.15 F .689(xample, big endian order w)-.15 F .689 +(ould be the number 4,321.)-.1 F(If)5.689 E F3(lor)3.189 E(der)-.37 E F0 .689 +(is 0 \(no)3.189 F(order is speci\214ed\) the current host order is used.)144 +693.6 Q 174.135(4.4BSD June)72 732 R(4, 1993)2.5 E(1)535 732 Q EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 132.34(BTREE\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 132.34(anual BTREE\(3\))340.17 48 R(mink)108 84 Q -.15(ey)-.1 G(page).15 +E 1.423(The minimum number of k)144 96 R -.15(ey)-.1 G 3.923(sw).15 G 1.422 +(hich will be stored on an)282.245 96 R 3.922(ys)-.15 G 1.422(ingle page.) +400.618 96 R 1.422(This v)6.422 F 1.422(alue is used to)-.25 F .257 +(determine which k)144 108 R -.15(ey)-.1 G 2.757(sw).15 G .257 +(ill be stored on o)242.001 108 R -.15(ve)-.15 G(r\215o).15 E 2.757(wp)-.25 G +.257(ages, i.e. if a k)348.006 108 R .558 -.15(ey o)-.1 H 2.758(rd).15 G .258 +(ata item is longer than the)435.11 108 R 1.102(pagesize di)144 120 R 1.102 +(vided by the mink)-.25 F -.15(ey)-.1 G 1.102(page v).15 F 1.102 +(alue, it will be stored on o)-.25 F -.15(ve)-.15 G(r\215o).15 E 3.602(wp)-.25 +G 1.102(ages instead of in the)451.164 120 R(page itself.)144 132 Q(If)5 E/F1 +10/Times-Italic@0 SF(mink)2.5 E -.3(ey)-.1 G(pa).3 E -.1(ge)-.1 G F0 +(is 0 \(no minimum number of k)2.6 E -.15(ey)-.1 G 2.5(si).15 G 2.5(ss)392.84 +132 S(peci\214ed\) a v)403.12 132 Q(alue of 2 is used.)-.25 E(compare)108 148.8 +Q .751(Compare is the k)144 160.8 R 1.051 -.15(ey c)-.1 H .751 +(omparison function.).15 F .751(It must return an inte)5.751 F .752 +(ger less than, equal to, or greater)-.15 F .913(than zero if the \214rst k)144 +172.8 R 1.213 -.15(ey a)-.1 H -.18(rg).15 G .913 +(ument is considered to be respecti).18 F -.15(ve)-.25 G .913 +(ly less than, equal to, or greater).15 F .352(than the second k)144 184.8 R +.652 -.15(ey a)-.1 H -.18(rg).15 G 2.852(ument. The).18 F .353 +(same comparison function must be used on a gi)2.852 F -.15(ve)-.25 G 2.853(nt) +.15 G .353(ree e)503.127 184.8 R -.15(ve)-.25 G(ry).15 E .817 +(time it is opened.)144 196.8 R(If)5.817 E F1(compar)3.317 E(e)-.37 E F0 .817 +(is NULL \(no comparison function is speci\214ed\), the k)3.317 F -.15(ey)-.1 G +3.316(sa).15 G .816(re com-)508.364 196.8 R(pared le)144 208.8 Q(xically)-.15 E +2.5(,w)-.65 G(ith shorter k)214.57 208.8 Q -.15(ey)-.1 G 2.5(sc).15 G +(onsidered less than longer k)282.92 208.8 Q -.15(ey)-.1 G(s.).15 E 10.17 +(pre\214x Pre\214x)108 225.6 R .291(is the pre\214x comparison function.)2.791 +F .292(If speci\214ed, this routine must return the number of bytes)5.291 F +.937(of the second k)144 237.6 R 1.237 -.15(ey a)-.1 H -.18(rg).15 G .937 +(ument which are necessary to determine that it is greater than the \214rst k) +.18 F -.15(ey)-.1 G(ar)144 249.6 Q 3.477(gument. If)-.18 F .977(the k)3.477 F +-.15(ey)-.1 G 3.477(sa).15 G .977(re equal, the k)241.898 249.6 R 1.277 -.15 +(ey l)-.1 H .978(ength should be returned.).15 F .978 +(Note, the usefulness of this)5.978 F .558(routine is v)144 261.6 R .558 +(ery data dependent, b)-.15 F .558 +(ut, in some data sets can produce signi\214cantly reduced tree sizes)-.2 F +.354(and search times.)144 273.6 R(If)5.354 E F1(pr)2.854 E(e\214x)-.37 E F0 +.354(is NULL \(no pre\214x function is speci\214ed\),)2.854 F/F2 10 +/Times-Bold@0 SF(and)2.854 E F0 .354(no comparison function)2.854 F .193 +(is speci\214ed, a def)144 285.6 R .193(ault le)-.1 F .193 +(xical comparison routine is used.)-.15 F(If)5.192 E F1(pr)2.692 E(e\214x)-.37 +E F0 .192(is NULL and a comparison rou-)2.692 F +(tine is speci\214ed, no pre\214x comparison is done.)144 297.6 Q .79 +(If the \214le already e)108 314.4 R .79(xists \(and the O_TR)-.15 F .79 +(UNC \215ag is not speci\214ed\), the v)-.4 F .79 +(alues speci\214ed for the parameters)-.25 F +(\215ags, lorder and psize are ignored in f)108 326.4 Q -.2(avo)-.1 G 2.5(ro).2 +G 2.5(ft)284.4 326.4 S(he v)293.01 326.4 Q(alues used when the tree w)-.25 E +(as created.)-.1 E -.15(Fo)108 343.2 S(rw).15 E +(ard sequential scans of a tree are from the least k)-.1 E .3 -.15(ey t)-.1 H +2.5(ot).15 G(he greatest.)348.55 343.2 Q 1.043(Space freed up by deleting k)108 +360 R -.15(ey)-.1 G 1.043(/data pairs from the tree is ne).15 F -.15(ve)-.25 G +3.543(rr).15 G 1.043(eclaimed, although it is normally made)378.686 360 R -.2 +(av)108 372 S 1.394(ailable for reuse.)-.05 F 1.394 +(This means that the btree storage structure is gro)6.394 F(w-only)-.25 E 6.395 +(.T)-.65 G 1.395(he only solutions are to)441.09 372 R -.2(avo)108 384 S(id e) +.2 E(xcessi)-.15 E .3 -.15(ve d)-.25 H +(eletions, or to create a fresh tree periodically from a scan of an e).15 E +(xisting one.)-.15 E .344(Searches, insertions, and deletions in a btree will \ +all complete in O lg base N where base is the a)108 400.8 R -.15(ve)-.2 G .343 +(rage \214ll).15 F -.1(fa)108 412.8 S(ctor).1 E 5.798(.O)-.55 G .799 +(ften, inserting ordered data into btrees results in a lo)146.188 412.8 R 3.299 +<778c>-.25 G .799(ll f)377.505 412.8 R(actor)-.1 E 5.799(.T)-.55 G .799 +(his implementation has been)423.443 412.8 R(modi\214ed to mak)108 424.8 Q 2.5 +(eo)-.1 G(rdered insertion the best case, resulting in a much better than norm\ +al page \214ll f)185.4 424.8 Q(actor)-.1 E(.)-.55 E/F3 9/Times-Bold@0 SF +(SEE ALSO)72 441.6 Q F1(dbopen)108 453.6 Q F0(\(3\),).24 E F1(hash)2.5 E F0 +(\(3\),).28 E F1(mpool)2.5 E F0(\(3\),).51 E F1 -.37(re)2.5 G(cno).37 E F0 +(\(3\)).18 E F1(The Ubiquitous B-tr)108 477.6 Q(ee)-.37 E F0 2.5(,D).18 G +(ouglas Comer)209.47 477.6 Q 2.5(,A)-.4 G(CM Comput. Surv)276.72 477.6 Q 2.5 +(.1)-.65 G(1, 2 \(June 1979\), 121-138.)360.25 477.6 Q F1(Pr)108 501.6 Q 1.588 +(e\214x B-tr)-.37 F(ees)-.37 E F0 4.088(,B).27 G 1.587(ayer and Unterauer) +177.636 501.6 R 4.087(,A)-.4 G 1.587(CM T)270.447 501.6 R 1.587 +(ransactions on Database Systems, V)-.35 F 1.587(ol. 2, 1 \(March 1977\),)-1.29 +F(11-26.)108 513.6 Q F1(The Art of Computer Pr)108 537.6 Q -.1(og)-.45 G -.15 +(ra).1 G(mming V).15 E(ol. 3: Sorting and Sear)-1.11 E -.15(ch)-.37 G(ing).15 E +F0 2.5(,D).22 G(.E. Knuth, 1968, pp 471-480.)382 537.6 Q F3 -.09(BU)72 554.4 S +(GS).09 E F0(Only big and little endian byte order is supported.)108 566.4 Q +174.135(4.4BSD June)72 732 R(4, 1993)2.5 E(2)535 732 Q EP +%%Trailer +end +%%EOF diff --git a/src/util/db2/docs/dbopen.3.ps b/src/util/db2/docs/dbopen.3.ps new file mode 100644 index 0000000000..c621bef97d --- /dev/null +++ b/src/util/db2/docs/dbopen.3.ps @@ -0,0 +1,508 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.08 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.08 0 +%%Pages: 4 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.08 0 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 def/PL +792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron/scaron/zcaron +/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/space +/exclam/quotedbl/numbersign/dollar/percent/ampersand/quoteright/parenleft +/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four +/five/six/seven/eight/nine/colon/semicolon/less/equal/greater/question/at/A/B/C +/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash +/bracketright/circumflex/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q +/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase +/guillemotleft/guillemotright/bullet/florin/fraction/perthousand/dagger +/daggerdbl/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar +/section/dieresis/copyright/ordfeminine/guilsinglleft/logicalnot/minus +/registered/macron/degree/plusminus/twosuperior/threesuperior/acute/mu +/paragraph/periodcentered/cedilla/onesuperior/ordmasculine/guilsinglright +/onequarter/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde +/Adieresis/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute +/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls +/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute +/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve +/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex +/udieresis/yacute/thorn/ydieresis]def/Times-Italic@0 ENC0/Times-Italic RE +/Times-Bold@0 ENC0/Times-Bold RE/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.01(DBOPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.01(anual DBOPEN\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 +84 S(ME).18 E F0(dbopen \255 database access methods)108 96 Q F1(SYNOPSIS)72 +112.8 Q/F2 10/Times-Bold@0 SF(#include <sys/types.h>)108 124.8 Q +(#include <limits.h>)108 136.8 Q(#include <db)108 148.8 Q(.h>)-.4 E(DB *)108 +172.8 Q(dbopen\(const char *\214le, int \215ags, int mode, DBTYPE type,)108 +184.8 Q(const v)158 196.8 Q(oid *openinf)-.1 E(o\);)-.25 E F1(DESCRIPTION)72 +213.6 Q/F3 10/Times-Italic@0 SF(Dbopen)108 225.6 Q F0 .032 +(is the library interf)2.532 F .031(ace to database \214les.)-.1 F .031 +(The supported \214le formats are btree, hashed and UNIX \214le)5.031 F 2.82 +(oriented. The)108 237.6 R .32 +(btree format is a representation of a sorted, balanced tree structure.)2.82 F +.321(The hashed format is an)5.321 F -.15(ex)108 249.6 S .424 +(tensible, dynamic hashing scheme.).15 F .423 +(The \215at-\214le format is a byte stream \214le with \214x)5.423 F .423 +(ed or v)-.15 F .423(ariable length)-.25 F 2.906(records. The)108 261.6 R .407 +(formats and \214le format speci\214c information are described in detail in t\ +heir respecti)2.906 F .707 -.15(ve m)-.25 H(anual).15 E(pages)108 273.6 Q F3 +(btr)2.5 E(ee)-.37 E F0(\(3\),).18 E F3(hash)2.5 E F0(\(3\) and).28 E F3 -.37 +(re)2.5 G(cno).37 E F0(\(3\).).18 E .433(Dbopen opens)108 290.4 R F3(\214le) +2.933 E F0 .433(for reading and/or writing.)2.933 F .433(Files ne)5.433 F -.15 +(ve)-.25 G 2.933(ri).15 G .433(ntended to be preserv)346.737 290.4 R .433 +(ed on disk may be created)-.15 F(by setting the \214le parameter to NULL.)108 +302.4 Q(The)108 319.2 Q F3<8d61>4.661 E(gs)-.1 E F0(and)4.661 E F3 2.161 +(mode ar)4.661 F(guments)-.37 E F0 2.161(are as speci\214ed to the)4.661 F F3 +(open)4.661 E F0 2.162(\(2\) routine, ho).24 F(we)-.25 E -.15(ve)-.25 G 2.962 +-.4(r, o).15 H 2.162(nly the O_CREA).4 F -.74(T,)-1.11 G .128 +(O_EXCL, O_EXLOCK, O_NONBLOCK, O_RDONL)108 331.2 R 2.708 -1.29(Y, O)-1 H(_RD) +1.29 E .128(WR, O_SHLOCK and O_TR)-.3 F .127(UNC \215ags are)-.4 F 2.5 +(meaningful. \(Note,)108 343.2 R(opening a database \214le O_WR)2.5 E(ONL)-.4 E +2.5(Yi)-1 G 2.5(sn)342.67 343.2 S(ot possible.\))354.06 343.2 Q(The)108 360 Q +F3(type)5.337 E F0(ar)5.337 E 2.837 +(gument is of type DBTYPE \(as de\214ned in the <db)-.18 F 2.838 +(.h> include \214le\) and may be set to)-.4 F(DB_BTREE, DB_HASH or DB_RECNO.) +108 372 Q(The)108 388.8 Q F3(openinfo)2.85 E F0(ar)2.85 E .349(gument is a poi\ +nter to an access method speci\214c structure described in the access method') +-.18 F(s)-.55 E .03(manual page.)108 400.8 R(If)5.03 E F3(openinfo)2.53 E F0 +.031(is NULL, each access method will use def)2.53 F .031 +(aults appropriate for the system and the)-.1 F(access method.)108 412.8 Q F3 +(Dbopen)108 429.6 Q F0 .416 +(returns a pointer to a DB structure on success and NULL on error)2.917 F 5.416 +(.T)-.55 G .416(he DB structure is de\214ned in)423.21 429.6 R(the <db)108 +441.6 Q(.h> include \214le, and contains at least the follo)-.4 E +(wing \214elds:)-.25 E(typedef struct {)108 465.6 Q(DBTYPE type;)144 477.6 Q +(int \(*close\)\(const DB *db\);)144 489.6 Q +(int \(*del\)\(const DB *db, const DBT *k)144 501.6 Q -.15(ey)-.1 G 2.5(,u)-.5 +G(_int \215ags\);)318.92 501.6 Q(int \(*fd\)\(const DB *db\);)144 513.6 Q +(int \(*get\)\(const DB *db, DBT *k)144 525.6 Q -.15(ey)-.1 G 2.5(,D)-.5 G +(BT *data, u_int \215ags\);)297.53 525.6 Q(int \(*put\)\(const DB *db, DBT *k) +144 537.6 Q -.15(ey)-.1 G 2.5(,c)-.5 G(onst DBT *data,)295.31 537.6 Q +(u_int \215ags\);)194 549.6 Q(int \(*sync\)\(const DB *db, u_int \215ags\);)144 +561.6 Q(int \(*seq\)\(const DB *db, DBT *k)144 573.6 Q -.15(ey)-.1 G 2.5(,D)-.5 +G(BT *data, u_int \215ags\);)298.64 573.6 Q 2.5(}D)108 585.6 S(B;)122.52 585.6 +Q .101 +(These elements describe a database type and a set of functions performing v) +108 602.4 R .101(arious actions.)-.25 F .101(These functions)5.101 F(tak)108 +614.4 Q 3.039(eap)-.1 G .539(ointer to a structure as returned by)140.078 614.4 +R F3(dbopen)3.038 E F0 3.038(,a).24 G .538 +(nd sometimes one or more pointers to k)323.196 614.4 R -.15(ey)-.1 G .538 +(/data struc-).15 F(tures and a \215ag v)108 626.4 Q(alue.)-.25 E 16.28 +(type The)108 643.2 R +(type of the underlying access method \(and \214le format\).)2.5 E 12.95 +(close A)108 660 R .988(pointer to a routine to \215ush an)3.488 F 3.489(yc) +-.15 G .989(ached information to disk, free an)293.968 660 R 3.489(ya)-.15 G +.989(llocated resources, and)446.662 660 R .112 +(close the underlying \214le\(s\).)144 672 R .111(Since k)5.112 F -.15(ey)-.1 G +.111(/data pairs may be cached in memory).15 F 2.611(,f)-.65 G .111 +(ailing to sync the \214le)455.666 672 R .494(with a)144 684 R F3(close)2.994 E +F0(or)2.994 E F3(sync)2.994 E F0 .495 +(function may result in inconsistent or lost information.)2.994 F F3(Close) +5.495 E F0 .495(routines return)2.995 F(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 +G(istrib)132.57 732 Q 89.875(ution September)-.2 F(13, 1993)2.5 E(1)535 732 Q +EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.01(DBOPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.01(anual DBOPEN\(3\))340.17 48 R(-1 on error \(setting)144 84 Q +/F1 10/Times-Italic@0 SF(errno)2.5 E F0 2.5(\)a).18 G(nd 0 on success.)254.43 +84 Q 21.28(del A)108 100.8 R(pointer to a routine to remo)2.5 E .3 -.15(ve k) +-.15 H -.15(ey).05 G(/data pairs from the database.).15 E(The parameter)144 +117.6 Q F1<8d61>2.5 E(g)-.1 E F0(may be set to the follo)2.5 E(wing v)-.25 E +(alue:)-.25 E(R_CURSOR)144 134.4 Q .289 +(Delete the record referenced by the cursor)180 146.4 R 5.288(.T)-.55 G .288 +(he cursor must ha)363.342 146.4 R .588 -.15(ve p)-.2 H(re).15 E .288 +(viously been initial-)-.25 F(ized.)180 158.4 Q F1(Delete)144 175.2 Q F0 .03 +(routines return -1 on error \(setting)2.53 F F1(errno)2.53 E F0 .03 +(\), 0 on success, and 1 if the speci\214ed).18 F F1 -.1(ke)2.53 G(y)-.2 E F0 +-.1(wa)2.53 G 2.53(sn).1 G .03(ot in)521.91 175.2 R(the \214le.)144 187.2 Q +25.17(fd A)108 204 R .451 +(pointer to a routine which returns a \214le descriptor representati)2.951 F +.75 -.15(ve o)-.25 H 2.95(ft).15 G .45(he underlying database.)431.73 204 R(A) +5.45 E .942(\214le descriptor referencing the same \214le will be returned to \ +all processes which call)144 216 R F1(dbopen)3.442 E F0(with)3.442 E 1.629 +(the same)144 228 R F1(\214le)4.129 E F0 4.129(name. This)4.129 F 1.628 +(\214le descriptor may be safely used as a ar)4.128 F 1.628(gument to the)-.18 +F F1(fcntl)4.128 E F0 1.628(\(2\) and).51 F F1(\215oc)144 240 Q(k)-.2 E F0 .425 +(\(2\) locking functions.).67 F .425 +(The \214le descriptor is not necessarily associated with an)5.425 F 2.925(yo) +-.15 G 2.925(ft)492.7 240 S .425(he under)501.735 240 R(-)-.2 E .198 +(lying \214les used by the access method.)144 252 R .198 +(No \214le descriptor is a)5.198 F -.25(va)-.2 G .198 +(ilable for in memory databases.).25 F F1(Fd)5.198 E F0 +(routines return -1 on error \(setting)144 264 Q F1(errno)2.5 E F0 +(\), and the \214le descriptor on success.).18 E 21.28(get A)108 280.8 R +(pointer to a routine which is the interf)2.5 E .001(ace for k)-.1 F -.15(ey) +-.1 G .001(ed retrie).15 F -.25(va)-.25 G 2.501(lf).25 G .001 +(rom the database.)399.755 280.8 R .001(The address and)5.001 F .061 +(length of the data associated with the speci\214ed)144 292.8 R F1 -.1(ke)2.561 +G(y)-.2 E F0 .06(are returned in the structure referenced by)2.561 F F1(data) +2.56 E F0(.).26 E F1(Get)144 304.8 Q F0(routines return -1 on error \(setting) +2.5 E F1(errno)2.5 E F0(\), 0 on success, and 1 if the).18 E F1 -.1(ke)2.5 G(y) +-.2 E F0 -.1(wa)2.5 G 2.5(sn).1 G(ot in the \214le.)471.66 304.8 Q 20.72(put A) +108 321.6 R(pointer to a routine to store k)2.5 E -.15(ey)-.1 G +(/data pairs in the database.).15 E(The parameter)144 338.4 Q F1<8d61>2.5 E(g) +-.1 E F0(may be set to one of the follo)2.5 E(wing v)-.25 E(alues:)-.25 E +(R_CURSOR)144 355.2 Q .051(Replace the k)180 367.2 R -.15(ey)-.1 G .051 +(/data pair referenced by the cursor).15 F 5.052(.T)-.55 G .052 +(he cursor must ha)393.98 367.2 R .352 -.15(ve p)-.2 H(re).15 E .052 +(viously been)-.25 F(initialized.)180 379.2 Q(R_IAFTER)144 396 Q 1.165 +(Append the data immediately after the data referenced by)180 408 R F1 -.1(ke) +3.664 G(y)-.2 E F0 3.664(,c).32 G 1.164(reating a ne)446.758 408 R 3.664(wk) +-.25 G -.15(ey)511.27 408 S(/data).15 E(pair)180 420 Q 6.065(.T)-.55 G 1.065 +(he record number of the appended k)209.675 420 R -.15(ey)-.1 G 1.065 +(/data pair is returned in the).15 F F1 -.1(ke)3.565 G(y)-.2 E F0(structure.) +3.565 E(\(Applicable only to the DB_RECNO access method.\))180 432 Q(R_IBEFORE) +144 448.8 Q 1.293(Insert the data immediately before the data referenced by)180 +460.8 R F1 -.1(ke)3.793 G(y)-.2 E F0 3.793(,c).32 G 1.293(reating a ne)446.371 +460.8 R 3.793(wk)-.25 G -.15(ey)511.27 460.8 S(/data).15 E(pair)180 472.8 Q +6.54(.T)-.55 G 1.54(he record number of the inserted k)210.15 472.8 R -.15(ey) +-.1 G 1.541(/data pair is returned in the).15 F F1 -.1(ke)4.041 G(y)-.2 E F0 +(structure.)4.041 E(\(Applicable only to the DB_RECNO access method.\))180 +484.8 Q(R_NOO)144 501.6 Q(VER)-.5 E(WRITE)-.55 E(Enter the ne)180 513.6 Q 2.5 +(wk)-.25 G -.15(ey)242.69 513.6 S(/data pair only if the k).15 E .3 -.15(ey d) +-.1 H(oes not pre).15 E(viously e)-.25 E(xist.)-.15 E(R_SETCURSOR)144 530.4 Q +1.36(Store the k)180 542.4 R -.15(ey)-.1 G 1.36(/data pair).15 F 3.86(,s)-.4 G +1.359(etting or initializing the position of the cursor to reference it.)283.94 +542.4 R(\(Applicable only to the DB_BTREE and DB_RECNO access methods.\))180 +554.4 Q .563(R_SETCURSOR is a)144 571.2 R -.25(va)-.2 G .564 +(ilable only for the DB_BTREE and DB_RECNO access methods because).25 F +(it implies that the k)144 583.2 Q -.15(ey)-.1 G 2.5(sh).15 G -2.25 -.2(av e) +241.81 583.2 T(an inherent order which does not change.)2.7 E .416 +(R_IAFTER and R_IBEFORE are a)144 600 R -.25(va)-.2 G .416 +(ilable only for the DB_RECNO access method because the).25 F(y)-.15 E 1.221 +(each imply that the access method is able to create ne)144 612 R 3.722(wk)-.25 +G -.15(ey)385.644 612 S 3.722(s. This).15 F 1.222(is only true if the k)3.722 F +-.15(ey)-.1 G 3.722(sa).15 G(re)532.23 612 Q +(ordered and independent, record numbers for e)144 624 Q(xample.)-.15 E .289 +(The def)144 640.8 R .289(ault beha)-.1 F .289(vior of the)-.2 F F1(put)2.789 E +F0 .289(routines is to enter the ne)2.789 F 2.789(wk)-.25 G -.15(ey)388.998 +640.8 S .288(/data pair).15 F 2.788(,r)-.4 G .288(eplacing an)444.284 640.8 R +2.788(yp)-.15 G(re)503.03 640.8 Q(viously)-.25 E -.15(ex)144 652.8 S(isting k) +.15 E -.15(ey)-.1 G(.)-.5 E F1(Put)144 669.6 Q F0 .37 +(routines return -1 on error \(setting)2.87 F F1(errno)2.87 E F0 .37 +(\), 0 on success, and 1 if the R_NOO).18 F(VER)-.5 E(WRITE)-.55 E F1<8d61>2.87 +E(g)-.1 E F0 -.1(wa)144 681.6 S 2.5(ss).1 G(et and the k)165.84 681.6 Q .3 -.15 +(ey a)-.1 H(lready e).15 E(xists in the \214le.)-.15 E(4.4 Berk)72 732 Q(ele) +-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 89.875(ution September)-.2 F(13, 1993) +2.5 E(2)535 732 Q EP +%%Page: 3 3 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.01(DBOPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.01(anual DBOPEN\(3\))340.17 48 R 20.17(seq A)108 84 R .002 +(pointer to a routine which is the interf)2.502 F .002 +(ace for sequential retrie)-.1 F -.25(va)-.25 G 2.502(lf).25 G .002 +(rom the database.)416.694 84 R .001(The address)5.001 F .219 +(and length of the k)144 96 R .519 -.15(ey a)-.1 H .219 +(re returned in the structure referenced by).15 F/F1 10/Times-Italic@0 SF -.1 +(ke)2.72 G(y)-.2 E F0 2.72(,a).32 G .22(nd the address and length of)426.42 96 +R(the data are returned in the structure referenced by)144 108 Q F1(data)2.5 E +F0(.).26 E .937(Sequential k)144 124.8 R -.15(ey)-.1 G .937(/data pair retrie) +.15 F -.25(va)-.25 G 3.437(lm).25 G .936(ay be)289.748 124.8 R .936(gin at an) +-.15 F 3.436(yt)-.15 G .936(ime, and the position of the `)359.292 124.8 R +(`cursor')-.74 E 3.436('i)-.74 G 3.436(sn)519.894 124.8 S(ot)532.22 124.8 Q(af) +144 136.8 Q 1.585(fected by calls to the)-.25 F F1(del)4.085 E F0(,).51 E F1 +-.1(ge)4.085 G(t).1 E F0(,).68 E F1(put)4.086 E F0 4.086(,o).68 G(r)308.452 +136.8 Q F1(sync)4.086 E F0 4.086(routines. Modi\214cations)4.086 F 1.586 +(to the database during a)4.086 F 1.404(sequential scan will be re\215ected in\ + the scan, i.e. records inserted behind the cursor will not be)144 148.8 R +(returned while records inserted in front of the cursor will be returned.)144 +160.8 Q(The \215ag v)144 177.6 Q(alue)-.25 E/F2 10/Times-Bold@0 SF(must)2.5 E +F0(be set to one of the follo)2.5 E(wing v)-.25 E(alues:)-.25 E(R_CURSOR)144 +194.4 Q .523(The data associated with the speci\214ed k)180 206.4 R .824 -.15 +(ey i)-.1 H 3.024(sr).15 G 3.024(eturned. This)367.236 206.4 R(dif)3.024 E .524 +(fers from the)-.25 F F1 -.1(ge)3.024 G(t).1 E F0(routines)3.024 E 1.143 +(in that it sets or initializes the cursor to the location of the k)180 218.4 R +1.443 -.15(ey a)-.1 H 3.642(sw).15 G 3.642(ell. \(Note,)464.924 218.4 R 1.142 +(for the)3.642 F 1.275(DB_BTREE access method, the returned k)180 230.4 R 1.575 +-.15(ey i)-.1 H 3.775(sn).15 G 1.276(ot necessarily an e)386.425 230.4 R 1.276 +(xact match for the)-.15 F .598(speci\214ed k)180 242.4 R -.15(ey)-.1 G 5.598 +(.T)-.5 G .598(he returned k)246.396 242.4 R .898 -.15(ey i)-.1 H 3.098(st).15 +G .598(he smallest k)325.188 242.4 R .898 -.15(ey g)-.1 H .598 +(reater than or equal to the speci\214ed).15 F -.1(ke)180 254.4 S 1.3 -.65 +(y, p)-.05 H(ermitting partial k).65 E .3 -.15(ey m)-.1 H +(atches and range searches.\)).15 E(R_FIRST)144 271.2 Q 1.043(The \214rst k)180 +283.2 R -.15(ey)-.1 G 1.044(/data pair of the database is returned, and the cu\ +rsor is set or initialized to).15 F(reference it.)180 295.2 Q(R_LAST)144 312 Q +.085(The last k)180 324 R -.15(ey)-.1 G .085(/data pair of the database is ret\ +urned, and the cursor is set or initialized to ref-).15 F(erence it.)180 336 Q +(\(Applicable only to the DB_BTREE and DB_RECNO access methods.\))5 E(R_NEXT) +144 352.8 Q(Retrie)180 364.8 Q .604 -.15(ve t)-.25 H .304(he k).15 F -.15(ey) +-.1 G .304(/data pair immediately after the cursor).15 F 5.304(.I)-.55 G 2.804 +(ft)410.622 364.8 S .305(he cursor is not yet set, this is)419.536 364.8 R +(the same as the R_FIRST \215ag.)180 376.8 Q(R_PREV)144 393.6 Q(Retrie)180 +405.6 Q .755 -.15(ve t)-.25 H .455(he k).15 F -.15(ey)-.1 G .455 +(/data pair immediately before the cursor).15 F 5.455(.I)-.55 G 2.955(ft)419.05 +405.6 S .454(he cursor is not yet set, this)428.115 405.6 R .62 +(is the same as the R_LAST \215ag.)180 417.6 R .621 +(\(Applicable only to the DB_BTREE and DB_RECNO)5.621 F(access methods.\))180 +429.6 Q .911(R_LAST and R_PREV are a)144 446.4 R -.25(va)-.2 G .911 +(ilable only for the DB_BTREE and DB_RECNO access methods).25 F(because the)144 +458.4 Q 2.5(ye)-.15 G(ach imply that the k)202.16 458.4 Q -.15(ey)-.1 G 2.5(sh) +.15 G -2.25 -.2(av e)302.18 458.4 T(an inherent order which does not change.) +2.7 E F1(Seq)144 475.2 Q F0 .061(routines return -1 on error \(setting)2.561 F +F1(errno)2.561 E F0 .061(\), 0 on success and 1 if there are no k).18 F -.15 +(ey)-.1 G .061(/data pairs less).15 F .35 +(than or greater than the speci\214ed or current k)144 487.2 R -.15(ey)-.1 G +5.349(.I)-.5 G 2.849(ft)346.467 487.2 S .349 +(he DB_RECNO access method is being used,)355.426 487.2 R .025 +(and if the database \214le is a character special \214le and no complete k)144 +499.2 R -.15(ey)-.1 G .025(/data pairs are currently a).15 F -.25(va)-.2 G(il-) +.25 E(able, the)144 511.2 Q F1(seq)2.5 E F0(routines return 2.)2.5 E 15.17 +(sync A)108 528 R .458(pointer to a routine to \215ush an)2.958 F 2.957(yc)-.15 +G .457(ached information to disk.)289.72 528 R .457 +(If the database is in memory only)5.457 F(,)-.65 E(the)144 540 Q F1(sync)2.5 E +F0(routine has no ef)2.5 E(fect and will al)-.25 E -.1(wa)-.1 G(ys succeed.).1 +E(The \215ag v)144 556.8 Q(alue may be set to the follo)-.25 E(wing v)-.25 E +(alue:)-.25 E(R_RECNOSYNC)144 573.6 Q .077(If the DB_RECNO access method is be\ +ing used, this \215ag causes the sync routine to apply)180 585.6 R .75(to the \ +btree \214le which underlies the recno \214le, not the recno \214le itself.)180 +597.6 R .75(\(See the)5.75 F F1(bfname)3.25 E F0(\214eld of the)180 609.6 Q F1 +-.37(re)2.5 G(cno).37 E F0(\(3\) manual page for more information.\)).18 E F1 +(Sync)144 626.4 Q F0(routines return -1 on error \(setting)2.5 E F1(errno)2.5 E +F0 2.5(\)a).18 G(nd 0 on success.)336.91 626.4 Q/F3 9/Times-Bold@0 SF(KEY/D)72 +643.2 Q -1.35 -.855(AT A)-.315 H -.666(PA)3.105 G(IRS).666 E F0 .134 +(Access to all \214le types is based on k)108 655.2 R -.15(ey)-.1 G .134 +(/data pairs.).15 F .134(Both k)5.134 F -.15(ey)-.1 G 2.634(sa).15 G .134 +(nd data are represented by the follo)359.078 655.2 R .135(wing data)-.25 F +(structure:)108 667.2 Q(typedef struct {)108 684 Q(4.4 Berk)72 732 Q(ele)-.1 E +2.5(yD)-.15 G(istrib)132.57 732 Q 89.875(ution September)-.2 F(13, 1993)2.5 E +(3)535 732 Q EP +%%Page: 4 4 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.01(DBOPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.01(anual DBOPEN\(3\))340.17 48 R -.2(vo)144 84 S(id *data;).2 E +(size_t size;)144 96 Q 2.5(}D)108 108 S(BT)122.52 108 Q(;)-.55 E +(The elements of the DBT structure are de\214ned as follo)108 124.8 Q(ws:)-.25 +E 16.84(data A)108 141.6 R(pointer to a byte string.)2.5 E 17.95(size The)108 +158.4 R(length of the byte string.)2.5 E -2.15 -.25(Ke y)108 175.2 T .829(and \ +data byte strings may reference strings of essentially unlimited length althou\ +gh an)3.579 F 3.328(yt)-.15 G 1.028 -.1(wo o)492.894 175.2 T 3.328(ft).1 G(hem) +522.78 175.2 Q 1.133(must \214t into a)108 187.2 R -.25(va)-.2 G 1.134 +(ilable memory at the same time.).25 F 1.134 +(It should be noted that the access methods pro)6.134 F 1.134(vide no)-.15 F +(guarantees about byte string alignment.)108 199.2 Q/F1 9/Times-Bold@0 SF(ERR) +72 216 Q(ORS)-.27 E F0(The)108 228 Q/F2 10/Times-Italic@0 SF(dbopen)3.389 E F0 +.889(routine may f)3.389 F .889(ail and set)-.1 F F2(errno)3.388 E F0 .888 +(for an)3.388 F 3.388(yo)-.15 G 3.388(ft)324.376 228 S .888 +(he errors speci\214ed for the library routines)333.874 228 R F2(open)3.388 E +F0(\(2\)).24 E(and)108 240 Q F2(malloc)2.5 E F0(\(3\) or the follo).31 E(wing:) +-.25 E([EFTYPE])108 256.8 Q 2.5<418c>144 268.8 S(le is incorrectly formatted.) +159.28 268.8 Q([EINV)108 285.6 Q(AL])-1.35 E 2.812(Ap)144 297.6 S .313(aramete\ +r has been speci\214ed \(hash function, pad byte etc.\) that is incompatible w\ +ith the current)159.032 297.6 R .406 +(\214le speci\214cation or which is not meaningful for the function \(for e)144 +309.6 R .405(xample, use of the cursor with-)-.15 F .099 +(out prior initialization\) or there is a mismatch between the v)144 321.6 R .1 +(ersion number of \214le and the softw)-.15 F(are.)-.1 E(The)108 338.4 Q F2 +(close)3.469 E F0 .969(routines may f)3.469 F .969(ail and set)-.1 F F2(errno) +3.469 E F0 .969(for an)3.469 F 3.469(yo)-.15 G 3.469(ft)320.18 338.4 S .969 +(he errors speci\214ed for the library routines)329.759 338.4 R F2(close)3.468 +E F0(\(2\),).18 E F2 -.37(re)108 350.4 S(ad).37 E F0(\(2\),).77 E F2(write)2.5 +E F0(\(2\),).18 E F2(fr)2.5 E(ee)-.37 E F0(\(3\), or).18 E F2(fsync)2.5 E F0 +(\(2\).).31 E(The)108 367.2 Q F2(del)2.969 E F0(,).51 E F2 -.1(ge)2.969 G(t).1 +E F0(,).68 E F2(put)2.969 E F0(and)2.969 E F2(seq)2.969 E F0 .469 +(routines may f)2.969 F .469(ail and set)-.1 F F2(errno)2.97 E F0 .47(for an) +2.97 F 2.97(yo)-.15 G 2.97(ft)377.59 367.2 S .47 +(he errors speci\214ed for the library rou-)386.67 367.2 R(tines)108 379.2 Q F2 +-.37(re)2.5 G(ad).37 E F0(\(2\),).77 E F2(write)2.5 E F0(\(2\),).18 E F2(fr)2.5 +E(ee)-.37 E F0(\(3\) or).18 E F2(malloc)2.5 E F0(\(3\).).31 E(The)108 396 Q F2 +(fd)2.5 E F0(routines will f)2.5 E(ail and set)-.1 E F2(errno)2.5 E F0 +(to ENOENT for in memory databases.)2.5 E(The)108 412.8 Q F2(sync)2.5 E F0 +(routines may f)2.5 E(ail and set)-.1 E F2(errno)2.5 E F0(for an)2.5 E 2.5(yo) +-.15 G 2.5(ft)307.71 412.8 S(he errors speci\214ed for the library routine) +316.32 412.8 Q F2(fsync)2.5 E F0(\(2\).).31 E F1(SEE ALSO)72 429.6 Q F2(btr)108 +441.6 Q(ee)-.37 E F0(\(3\),).18 E F2(hash)2.5 E F0(\(3\),).28 E F2(mpool)2.5 E +F0(\(3\),).51 E F2 -.37(re)2.5 G(cno).37 E F0(\(3\)).18 E F2 .904(LIBTP: P)108 +465.6 R(ortable)-.8 E 3.404(,M)-.1 G .904(odular T)189.738 465.6 R -.15(ra)-.55 +G .904(nsactions for UNIX).15 F F0 3.404(,M).94 G(ar)328.884 465.6 Q .904 +(go Seltzer)-.18 F 3.403(,M)-.4 G .903(ichael Olson, USENIX proceedings,) +392.041 465.6 R -.4(Wi)108 477.6 S(nter 1992.).4 E F1 -.09(BU)72 494.4 S(GS).09 +E F0 .399(The typedef DBT is a mnemonic for `)108 506.4 R .399 +(`data base thang')-.74 F .399(', and w)-.74 F .399 +(as used because noone could think of a rea-)-.1 F(sonable name that w)108 +518.4 Q(asn')-.1 E 2.5(ta)-.18 G(lready used.)216.03 518.4 Q +(The \214le descriptor interf)108 535.2 Q +(ace is a kluge and will be deleted in a future v)-.1 E(ersion of the interf) +-.15 E(ace.)-.1 E(None of the access methods pro)108 552 Q(vide an)-.15 E 2.5 +(yf)-.15 G(orm of concurrent access, locking, or transactions.)275.16 552 Q +(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 89.875 +(ution September)-.2 F(13, 1993)2.5 E(4)535 732 Q EP +%%Trailer +end +%%EOF diff --git a/src/util/db2/docs/hash.3.ps b/src/util/db2/docs/hash.3.ps new file mode 100644 index 0000000000..18303cfb7c --- /dev/null +++ b/src/util/db2/docs/hash.3.ps @@ -0,0 +1,292 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.08 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.08 0 +%%Pages: 2 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.08 0 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 def/PL +792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron/scaron/zcaron +/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/space +/exclam/quotedbl/numbersign/dollar/percent/ampersand/quoteright/parenleft +/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four +/five/six/seven/eight/nine/colon/semicolon/less/equal/greater/question/at/A/B/C +/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash +/bracketright/circumflex/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q +/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase +/guillemotleft/guillemotright/bullet/florin/fraction/perthousand/dagger +/daggerdbl/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar +/section/dieresis/copyright/ordfeminine/guilsinglleft/logicalnot/minus +/registered/macron/degree/plusminus/twosuperior/threesuperior/acute/mu +/paragraph/periodcentered/cedilla/onesuperior/ordmasculine/guilsinglright +/onequarter/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde +/Adieresis/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute +/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls +/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute +/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve +/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex +/udieresis/yacute/thorn/ydieresis]def/Times-Italic@0 ENC0/Times-Italic RE +/Times-Bold@0 ENC0/Times-Bold RE/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 136.79(HASH\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 136.79(anual HASH\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 84 S +(ME).18 E F0(hash \255 hash database access method)108 96 Q F1(SYNOPSIS)72 +112.8 Q/F2 10/Times-Bold@0 SF(#include <sys/types.h>)108 124.8 Q(#include <db) +108 136.8 Q(.h>)-.4 E F1(DESCRIPTION)72 153.6 Q F0 .29(The routine)108 165.6 R +/F3 10/Times-Italic@0 SF(dbopen)2.79 E F0 .29(is the library interf)2.79 F .29 +(ace to database \214les.)-.1 F .29 +(One of the supported \214le formats is hash \214les.)5.29 F .974 +(The general description of the database access methods is in)108 177.6 R F3 +(dbopen)3.475 E F0 .975(\(3\), this manual page describes only).24 F +(the hash speci\214c information.)108 189.6 Q(The hash data structure is an e) +108 206.4 Q(xtensible, dynamic hashing scheme.)-.15 E .83 +(The access method speci\214c data structure pro)108 223.2 R .83(vided to)-.15 +F F3(dbopen)3.33 E F0 .83(is de\214ned in the <db)3.33 F .83 +(.h> include \214le as fol-)-.4 F(lo)108 235.2 Q(ws:)-.25 E(typedef struct {) +108 259.2 Q(int bsize;)144 271.2 Q(int cachesize;)144 283.2 Q(int f)144 295.2 Q +-.1(fa)-.25 G(ctor;).1 E(u_long \(*hash\)\(const v)144 307.2 Q +(oid *, size_t\);)-.2 E(int lorder;)144 319.2 Q(int nelem;)144 331.2 Q 2.5(}H) +108 343.2 S(ASHINFO;)122.52 343.2 Q +(The elements of this structure are as follo)108 360 Q(ws:)-.25 E(bsize)108 +376.8 Q F3(Bsize)144 376.8 Q F0 1.393(de\214nes the hash table b)3.893 F(uck) +-.2 E 1.393(et size, and is, by def)-.1 F 1.394(ault, 256 bytes.)-.1 F 1.394 +(It may be preferable to)6.394 F +(increase the page size for disk-resident tables and tables with lar)144 388.8 +Q(ge data items.)-.18 E(cachesize)108 405.6 Q 3.16(As)144 417.6 S .66 +(uggested maximum size, in bytes, of the memory cache.)158.27 417.6 R .659 +(This v)5.659 F .659(alue is)-.25 F F2(only)3.159 E F0(advisory)3.159 E 3.159 +(,a)-.65 G .659(nd the)514.621 417.6 R +(access method will allocate more memory rather than f)144 429.6 Q(ail.)-.1 E +-2.1 -.25(ff a)108 446.4 T(ctor).25 E F3(Ffactor)9.7 E F0 .482 +(indicates a desired density within the hash table.)2.981 F .482 +(It is an approximation of the number of)5.482 F -.1(ke)144 458.4 S .429 +(ys allo)-.05 F .429(wed to accumulate in an)-.25 F 2.929(yo)-.15 G .429(ne b) +291.454 458.4 R(uck)-.2 E .429(et, determining when the hash table gro)-.1 F +.428(ws or shrinks.)-.25 F(The def)144 470.4 Q(ault v)-.1 E(alue is 8.)-.25 E +(hash)108 487.2 Q F3(Hash)144 487.2 Q F0 .1(is a user de\214ned hash function.) +2.6 F .1(Since no hash function performs equally well on all possible)5.1 F +.924(data, the user may \214nd that the b)144 499.2 R .923 +(uilt-in hash function does poorly on a particular data set.)-.2 F(User)5.923 E +1.408(speci\214ed hash functions must tak)144 511.2 R 3.909(et)-.1 G 1.609 -.1 +(wo a)293.431 511.2 T -.18(rg).1 G 1.409 +(uments \(a pointer to a byte string and a length\) and).18 F +(return an u_long to be used as the hash v)144 523.2 Q(alue.)-.25 E 9.62 +(lorder The)108 540 R 1.597(byte order for inte)4.097 F 1.596 +(gers in the stored database metadata.)-.15 F 1.596 +(The number should represent the)6.596 F .688(order as an inte)144 552 R .689 +(ger; for e)-.15 F .689(xample, big endian order w)-.15 F .689 +(ould be the number 4,321.)-.1 F(If)5.689 E F3(lor)3.189 E(der)-.37 E F0 .689 +(is 0 \(no)3.189 F .822(order is speci\214ed\) the current host order is used.) +144 564 R .822(If the)5.822 F .822(\214le already e)5.822 F .821 +(xists, the speci\214ed v)-.15 F .821(alue is)-.25 F(ignored and the v)144 576 +Q(alue speci\214ed when the tree w)-.25 E(as created is used.)-.1 E(nelem)108 +592.8 Q F3(Nelem)144 592.8 Q F0 .701 +(is an estimate of the \214nal size of the hash table.)3.2 F .701 +(If not set or set too lo)5.701 F 2.001 -.65(w, h)-.25 H .701(ash tables will) +.65 F -.15(ex)144 604.8 S .448(pand gracefully as k).15 F -.15(ey)-.1 G 2.948 +(sa).15 G .448(re entered, although a slight performance de)255.912 604.8 R +.447(gradation may be noticed.)-.15 F(The def)144 616.8 Q(ault v)-.1 E +(alue is 1.)-.25 E .79(If the \214le already e)108 633.6 R .79 +(xists \(and the O_TR)-.15 F .79(UNC \215ag is not speci\214ed\), the v)-.4 F +.79(alues speci\214ed for the parameters)-.25 F(bsize, f)108 645.6 Q -.1(fa) +-.25 G(ctor).1 E 2.5(,l)-.4 G(order and nelem are ignored and the v)167.23 +645.6 Q(alues speci\214ed when the tree w)-.25 E(as created are used.)-.1 E +1.232(If a hash function is speci\214ed,)108 662.4 R F3(hash_open)3.731 E F0 +1.231(will attempt to determine if the hash function speci\214ed is the)3.731 F +(same as the one with which the database w)108 674.4 Q(as created, and will f) +-.1 E(ail if it is not.)-.1 E(Backw)108 691.2 Q .861(ard compatible interf)-.1 +F .861(aces to the routines described in)-.1 F F3(dbm)3.362 E F0 .862 +(\(3\), and).32 F F3(ndbm)3.362 E F0 .862(\(3\) are pro).32 F .862(vided, ho) +-.15 F(we)-.25 E -.15(ve)-.25 G -.4(r,).15 G(4.4 Berk)72 732 Q(ele)-.1 E 2.5 +(yD)-.15 G(istrib)132.57 732 Q 96.815(ution August)-.2 F(17, 1993)2.5 E(1)535 +732 Q EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 136.79(HASH\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 136.79(anual HASH\(3\))340.17 48 R(these interf)108 84 Q +(aces are not compatible with pre)-.1 E(vious \214le formats.)-.25 E/F1 9 +/Times-Bold@0 SF(SEE ALSO)72 100.8 Q/F2 10/Times-Italic@0 SF(btr)108 112.8 Q +(ee)-.37 E F0(\(3\),).18 E F2(dbopen)2.5 E F0(\(3\),).24 E F2(mpool)2.5 E F0 +(\(3\),).51 E F2 -.37(re)2.5 G(cno).37 E F0(\(3\)).18 E F2(Dynamic Hash T)108 +136.8 Q(ables)-.92 E F0 2.5(,P).27 G(er)206.79 136.8 Q(-Ak)-.2 E 2.5(eL)-.1 G +(arson, Communications of the A)242.86 136.8 Q(CM, April 1988.)-.4 E F2 2.5(AN) +108 160.8 S .3 -.15(ew H)123.28 160.8 T(ash P).15 E(ac)-.8 E(ka)-.2 E .2 -.1 +(ge f)-.1 H(or UNIX).1 E F0 2.5(,M).94 G(ar)248.41 160.8 Q(go Seltzer)-.18 E +2.5(,U)-.4 G(SENIX Proceedings, W)308.09 160.8 Q(inter 1991.)-.4 E F1 -.09(BU) +72 177.6 S(GS).09 E F0(Only big and little endian byte order is supported.)108 +189.6 Q(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 96.815 +(ution August)-.2 F(17, 1993)2.5 E(2)535 732 Q EP +%%Trailer +end +%%EOF diff --git a/src/util/db2/docs/hash.usenix.ps b/src/util/db2/docs/hash.usenix.ps new file mode 100644 index 0000000000..acdea09926 --- /dev/null +++ b/src/util/db2/docs/hash.usenix.ps @@ -0,0 +1,12209 @@ +%!PS-Adobe-1.0 +%%Creator: utopia:margo (& Seltzer,608-13E,8072,) +%%Title: stdin (ditroff) +%%CreationDate: Tue Dec 11 15:06:45 1990 +%%EndComments +% @(#)psdit.pro 1.3 4/15/88 +% lib/psdit.pro -- prolog for psdit (ditroff) files +% Copyright (c) 1984, 1985 Adobe Systems Incorporated. All Rights Reserved. +% last edit: shore Sat Nov 23 20:28:03 1985 +% RCSID: $Header$ + +% Changed by Edward Wang (edward@ucbarpa.berkeley.edu) to handle graphics, +% 17 Feb, 87. + +/$DITroff 140 dict def $DITroff begin +/fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def +/xi{0 72 11 mul translate 72 resolution div dup neg scale 0 0 moveto + /fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def F + /pagesave save def}def +/PB{save /psv exch def currentpoint translate + resolution 72 div dup neg scale 0 0 moveto}def +/PE{psv restore}def +/arctoobig 90 def /arctoosmall .05 def +/m1 matrix def /m2 matrix def /m3 matrix def /oldmat matrix def +/tan{dup sin exch cos div}def +/point{resolution 72 div mul}def +/dround {transform round exch round exch itransform}def +/xT{/devname exch def}def +/xr{/mh exch def /my exch def /resolution exch def}def +/xp{}def +/xs{docsave restore end}def +/xt{}def +/xf{/fontname exch def /slotno exch def fontnames slotno get fontname eq not + {fonts slotno fontname findfont put fontnames slotno fontname put}if}def +/xH{/fontheight exch def F}def +/xS{/fontslant exch def F}def +/s{/fontsize exch def /fontheight fontsize def F}def +/f{/fontnum exch def F}def +/F{fontheight 0 le{/fontheight fontsize def}if + fonts fontnum get fontsize point 0 0 fontheight point neg 0 0 m1 astore + fontslant 0 ne{1 0 fontslant tan 1 0 0 m2 astore m3 concatmatrix}if + makefont setfont .04 fontsize point mul 0 dround pop setlinewidth}def +/X{exch currentpoint exch pop moveto show}def +/N{3 1 roll moveto show}def +/Y{exch currentpoint pop exch moveto show}def +/S{show}def +/ditpush{}def/ditpop{}def +/AX{3 -1 roll currentpoint exch pop moveto 0 exch ashow}def +/AN{4 2 roll moveto 0 exch ashow}def +/AY{3 -1 roll currentpoint pop exch moveto 0 exch ashow}def +/AS{0 exch ashow}def +/MX{currentpoint exch pop moveto}def +/MY{currentpoint pop exch moveto}def +/MXY{moveto}def +/cb{pop}def % action on unknown char -- nothing for now +/n{}def/w{}def +/p{pop showpage pagesave restore /pagesave save def}def +/Dt{/Dlinewidth exch def}def 1 Dt +/Ds{/Ddash exch def}def -1 Ds +/Di{/Dstipple exch def}def 1 Di +/Dsetlinewidth{2 Dlinewidth mul setlinewidth}def +/Dsetdash{Ddash 4 eq{[8 12]}{Ddash 16 eq{[32 36]} + {Ddash 20 eq{[32 12 8 12]}{[]}ifelse}ifelse}ifelse 0 setdash}def +/Dstroke{gsave Dsetlinewidth Dsetdash 1 setlinecap stroke grestore + currentpoint newpath moveto}def +/Dl{rlineto Dstroke}def +/arcellipse{/diamv exch def /diamh exch def oldmat currentmatrix pop + currentpoint translate 1 diamv diamh div scale /rad diamh 2 div def + currentpoint exch rad add exch rad -180 180 arc oldmat setmatrix}def +/Dc{dup arcellipse Dstroke}def +/De{arcellipse Dstroke}def +/Da{/endv exch def /endh exch def /centerv exch def /centerh exch def + /cradius centerv centerv mul centerh centerh mul add sqrt def + /eradius endv endv mul endh endh mul add sqrt def + /endang endv endh atan def + /startang centerv neg centerh neg atan def + /sweep startang endang sub dup 0 lt{360 add}if def + sweep arctoobig gt + {/midang startang sweep 2 div sub def /midrad cradius eradius add 2 div def + /midh midang cos midrad mul def /midv midang sin midrad mul def + midh neg midv neg endh endv centerh centerv midh midv Da + Da} + {sweep arctoosmall ge + {/controldelt 1 sweep 2 div cos sub 3 sweep 2 div sin mul div 4 mul def + centerv neg controldelt mul centerh controldelt mul + endv neg controldelt mul centerh add endh add + endh controldelt mul centerv add endv add + centerh endh add centerv endv add rcurveto Dstroke} + {centerh endh add centerv endv add rlineto Dstroke} + ifelse} + ifelse}def +/Dpatterns[ +[%cf[widthbits] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000103810000000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000001038100000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0042660000246600>] +[8<0000990000990000>] +[8<0804020180402010>] +[8<2418814242811824>] +[8<6699996666999966>] +[8<8000000008000000>] +[8<00001c3e363e1c00>] +[8<0000000000000000>] +[32<00000040000000c00000004000000040000000e0000000000000000000000000>] +[32<00000000000060000000900000002000000040000000f0000000000000000000>] +[32<000000000000000000e0000000100000006000000010000000e0000000000000>] +[32<00000000000000002000000060000000a0000000f00000002000000000000000>] +[32<0000000e0000000000000000000000000000000f000000080000000e00000001>] +[32<0000090000000600000000000000000000000000000007000000080000000e00>] +[32<00010000000200000004000000040000000000000000000000000000000f0000>] +[32<0900000006000000090000000600000000000000000000000000000006000000>]] +[%ug +[8<0000020000000000>] +[8<0000020000002000>] +[8<0004020000002000>] +[8<0004020000402000>] +[8<0004060000402000>] +[8<0004060000406000>] +[8<0006060000406000>] +[8<0006060000606000>] +[8<00060e0000606000>] +[8<00060e000060e000>] +[8<00070e000060e000>] +[8<00070e000070e000>] +[8<00070e020070e000>] +[8<00070e020070e020>] +[8<04070e020070e020>] +[8<04070e024070e020>] +[8<04070e064070e020>] +[8<04070e064070e060>] +[8<06070e064070e060>] +[8<06070e066070e060>] +[8<06070f066070e060>] +[8<06070f066070f060>] +[8<060f0f066070f060>] +[8<060f0f0660f0f060>] +[8<060f0f0760f0f060>] +[8<060f0f0760f0f070>] +[8<0e0f0f0760f0f070>] +[8<0e0f0f07e0f0f070>] +[8<0e0f0f0fe0f0f070>] +[8<0e0f0f0fe0f0f0f0>] +[8<0f0f0f0fe0f0f0f0>] +[8<0f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f1f9>] +[8<bf8f1f9ff9f8f1f9>] +[8<bf8f1f9ffbf8f1f9>] +[8<bf8f1fdffbf8f1f9>] +[8<bf8f1fdffbf8f1fd>] +[8<ff8f1fdffbf8f1fd>] +[8<ff8f1fdffff8f1fd>] +[8<ff8f1ffffff8f1fd>] +[8<ff8f1ffffff8f1ff>] +[8<ff9f1ffffff8f1ff>] +[8<ff9f1ffffff9f1ff>] +[8<ff9f9ffffff9f1ff>] +[8<ff9f9ffffff9f9ff>] +[8<ffbf9ffffff9f9ff>] +[8<ffbf9ffffffbf9ff>] +[8<ffbfdffffffbf9ff>] +[8<ffbfdffffffbfdff>] +[8<ffffdffffffbfdff>] +[8<ffffdffffffffdff>] +[8<fffffffffffffdff>] +[8<ffffffffffffffff>]] +[%mg +[8<8000000000000000>] +[8<0822080080228000>] +[8<0204081020408001>] +[8<40e0400000000000>] +[8<66999966>] +[8<8001000010080000>] +[8<81c36666c3810000>] +[8<f0e0c08000000000>] +[16<07c00f801f003e007c00f800f001e003c007800f001f003e007c00f801f003e0>] +[16<1f000f8007c003e001f000f8007c003e001f800fc007e003f001f8007c003e00>] +[8<c3c300000000c3c3>] +[16<0040008001000200040008001000200040008000000100020004000800100020>] +[16<0040002000100008000400020001800040002000100008000400020001000080>] +[16<1fc03fe07df0f8f8f07de03fc01f800fc01fe03ff07df8f87df03fe01fc00f80>] +[8<80>] +[8<8040201000000000>] +[8<84cc000048cc0000>] +[8<9900009900000000>] +[8<08040201804020100800020180002010>] +[8<2418814242811824>] +[8<66999966>] +[8<8000000008000000>] +[8<70f8d8f870000000>] +[8<0814224180402010>] +[8<aa00440a11a04400>] +[8<018245aa45820100>] +[8<221c224180808041>] +[8<88000000>] +[8<0855800080550800>] +[8<2844004482440044>] +[8<0810204080412214>] +[8<00>]]]def +/Dfill{ + transform /maxy exch def /maxx exch def + transform /miny exch def /minx exch def + minx maxx gt{/minx maxx /maxx minx def def}if + miny maxy gt{/miny maxy /maxy miny def def}if + Dpatterns Dstipple 1 sub get exch 1 sub get + aload pop /stip exch def /stipw exch def /stiph 128 def + /imatrix[stipw 0 0 stiph 0 0]def + /tmatrix[stipw 0 0 stiph 0 0]def + /minx minx cvi stiph idiv stiph mul def + /miny miny cvi stipw idiv stipw mul def + gsave eoclip 0 setgray + miny stiph maxy{ + tmatrix exch 5 exch put + minx stipw maxx{ + tmatrix exch 4 exch put tmatrix setmatrix + stipw stiph true imatrix {stip} imagemask + }for + }for + grestore +}def +/Dp{Dfill Dstroke}def +/DP{Dfill currentpoint newpath moveto}def +end + +/ditstart{$DITroff begin + /nfonts 60 def % NFONTS makedev/ditroff dependent! + /fonts[nfonts{0}repeat]def + /fontnames[nfonts{()}repeat]def +/docsave save def +}def + +% character outcalls +/oc{ + /pswid exch def /cc exch def /name exch def + /ditwid pswid fontsize mul resolution mul 72000 div def + /ditsiz fontsize resolution mul 72 div def + ocprocs name known{ocprocs name get exec}{name cb}ifelse +}def +/fractm [.65 0 0 .6 0 0] def +/fraction{ + /fden exch def /fnum exch def gsave /cf currentfont def + cf fractm makefont setfont 0 .3 dm 2 copy neg rmoveto + fnum show rmoveto currentfont cf setfont(\244)show setfont fden show + grestore ditwid 0 rmoveto +}def +/oce{grestore ditwid 0 rmoveto}def +/dm{ditsiz mul}def +/ocprocs 50 dict def ocprocs begin +(14){(1)(4)fraction}def +(12){(1)(2)fraction}def +(34){(3)(4)fraction}def +(13){(1)(3)fraction}def +(23){(2)(3)fraction}def +(18){(1)(8)fraction}def +(38){(3)(8)fraction}def +(58){(5)(8)fraction}def +(78){(7)(8)fraction}def +(sr){gsave 0 .06 dm rmoveto(\326)show oce}def +(is){gsave 0 .15 dm rmoveto(\362)show oce}def +(->){gsave 0 .02 dm rmoveto(\256)show oce}def +(<-){gsave 0 .02 dm rmoveto(\254)show oce}def +(==){gsave 0 .05 dm rmoveto(\272)show oce}def +(uc){gsave currentpoint 400 .009 dm mul add translate + 8 -8 scale ucseal oce}def +end + +% an attempt at a PostScript FONT to implement ditroff special chars +% this will enable us to +% cache the little buggers +% generate faster, more compact PS out of psdit +% confuse everyone (including myself)! +50 dict dup begin +/FontType 3 def +/FontName /DIThacks def +/FontMatrix [.001 0 0 .001 0 0] def +/FontBBox [-260 -260 900 900] def% a lie but ... +/Encoding 256 array def +0 1 255{Encoding exch /.notdef put}for +Encoding + dup 8#040/space put %space + dup 8#110/rc put %right ceil + dup 8#111/lt put %left top curl + dup 8#112/bv put %bold vert + dup 8#113/lk put %left mid curl + dup 8#114/lb put %left bot curl + dup 8#115/rt put %right top curl + dup 8#116/rk put %right mid curl + dup 8#117/rb put %right bot curl + dup 8#120/rf put %right floor + dup 8#121/lf put %left floor + dup 8#122/lc put %left ceil + dup 8#140/sq put %square + dup 8#141/bx put %box + dup 8#142/ci put %circle + dup 8#143/br put %box rule + dup 8#144/rn put %root extender + dup 8#145/vr put %vertical rule + dup 8#146/ob put %outline bullet + dup 8#147/bu put %bullet + dup 8#150/ru put %rule + dup 8#151/ul put %underline + pop +/DITfd 100 dict def +/BuildChar{0 begin + /cc exch def /fd exch def + /charname fd /Encoding get cc get def + /charwid fd /Metrics get charname get def + /charproc fd /CharProcs get charname get def + charwid 0 fd /FontBBox get aload pop setcachedevice + 2 setlinejoin 40 setlinewidth + newpath 0 0 moveto gsave charproc grestore + end}def +/BuildChar load 0 DITfd put +/CharProcs 50 dict def +CharProcs begin +/space{}def +/.notdef{}def +/ru{500 0 rls}def +/rn{0 840 moveto 500 0 rls}def +/vr{0 800 moveto 0 -770 rls}def +/bv{0 800 moveto 0 -1000 rls}def +/br{0 840 moveto 0 -1000 rls}def +/ul{0 -140 moveto 500 0 rls}def +/ob{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath stroke}def +/bu{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath fill}def +/sq{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath stroke}def +/bx{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath fill}def +/ci{500 360 rmoveto currentpoint newpath 333 0 360 arc + 50 setlinewidth stroke}def + +/lt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 add exch s4 a4p stroke}def +/lb{0 800 moveto 0 -550 rlineto currx -200 2cx s4 add exch s4 a4p stroke}def +/rt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 sub exch s4 a4p stroke}def +/rb{0 800 moveto 0 -500 rlineto currx -200 2cx s4 sub exch s4 a4p stroke}def +/lk{0 800 moveto 0 300 -300 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/rk{0 800 moveto 0 300 s2 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/lf{0 800 moveto 0 -1000 rlineto s4 0 rls}def +/rf{0 800 moveto 0 -1000 rlineto s4 neg 0 rls}def +/lc{0 -200 moveto 0 1000 rlineto s4 0 rls}def +/rc{0 -200 moveto 0 1000 rlineto s4 neg 0 rls}def +end + +/Metrics 50 dict def Metrics begin +/.notdef 0 def +/space 500 def +/ru 500 def +/br 0 def +/lt 416 def +/lb 416 def +/rt 416 def +/rb 416 def +/lk 416 def +/rk 416 def +/rc 416 def +/lc 416 def +/rf 416 def +/lf 416 def +/bv 416 def +/ob 350 def +/bu 350 def +/ci 750 def +/bx 750 def +/sq 750 def +/rn 500 def +/ul 500 def +/vr 0 def +end + +DITfd begin +/s2 500 def /s4 250 def /s3 333 def +/a4p{arcto pop pop pop pop}def +/2cx{2 copy exch}def +/rls{rlineto stroke}def +/currx{currentpoint pop}def +/dround{transform round exch round exch itransform} def +end +end +/DIThacks exch definefont pop +ditstart +(psc)xT +576 1 1 xr +1(Times-Roman)xf 1 f +2(Times-Italic)xf 2 f +3(Times-Bold)xf 3 f +4(Times-BoldItalic)xf 4 f +5(Helvetica)xf 5 f +6(Helvetica-Bold)xf 6 f +7(Courier)xf 7 f +8(Courier-Bold)xf 8 f +9(Symbol)xf 9 f +10(DIThacks)xf 10 f +10 s +1 f +xi +%%EndProlog + +%%Page: 1 1 +10 s 10 xH 0 xS 1 f +3 f +22 s +1249 626(A)N +1420(N)X +1547(ew)X +1796(H)X +1933(ashing)X +2467(P)X +2574(ackage)X +3136(for)X +3405(U)X +3532(N)X +3659(IX)X +2 f +20 s +3855 562(1)N +1 f +12 s +1607 779(Margo)N +1887(Seltzer)X +9 f +2179(-)X +1 f +2256(University)X +2686(of)X +2790(California,)X +3229(Berkeley)X +2015 875(Ozan)N +2242(Yigit)X +9 f +2464(-)X +1 f +2541(York)X +2762(University)X +3 f +2331 1086(ABSTRACT)N +1 f +10 s +1152 1222(UNIX)N +1385(support)X +1657(of)X +1756(disk)X +1921(oriented)X +2216(hashing)X +2497(was)X +2654(originally)X +2997(provided)X +3314(by)X +2 f +3426(dbm)X +1 f +3595([ATT79])X +3916(and)X +1152 1310(subsequently)N +1595(improved)X +1927(upon)X +2112(in)X +2 f +2199(ndbm)X +1 f +2402([BSD86].)X +2735(In)X +2826(AT&T)X +3068(System)X +3327(V,)X +3429(in-memory)X +3809(hashed)X +1152 1398(storage)N +1420(and)X +1572(access)X +1814(support)X +2090(was)X +2251(added)X +2479(in)X +2577(the)X +2 f +2711(hsearch)X +1 f +3000(library)X +3249(routines)X +3542([ATT85].)X +3907(The)X +1152 1486(result)N +1367(is)X +1457(a)X +1530(system)X +1789(with)X +1968(two)X +2125(incompatible)X +2580(hashing)X +2865(schemes,)X +3193(each)X +3377(with)X +3555(its)X +3666(own)X +3840(set)X +3965(of)X +1152 1574(shortcomings.)N +1152 1688(This)N +1316(paper)X +1517(presents)X +1802(the)X +1922(design)X +2152(and)X +2289(performance)X +2717(characteristics)X +3198(of)X +3286(a)X +3343(new)X +3498(hashing)X +3768(package)X +1152 1776(providing)N +1483(a)X +1539(superset)X +1822(of)X +1909(the)X +2027(functionality)X +2456(provided)X +2761(by)X +2 f +2861(dbm)X +1 f +3019(and)X +2 f +3155(hsearch)X +1 f +3409(.)X +3469(The)X +3614(new)X +3768(package)X +1152 1864(uses)N +1322(linear)X +1537(hashing)X +1818(to)X +1912(provide)X +2189(ef\256cient)X +2484(support)X +2755(of)X +2853(both)X +3026(memory)X +3324(based)X +3538(and)X +3685(disk)X +3849(based)X +1152 1952(hash)N +1319(tables)X +1526(with)X +1688(performance)X +2115(superior)X +2398(to)X +2480(both)X +2 f +2642(dbm)X +1 f +2800(and)X +2 f +2936(hsearch)X +1 f +3210(under)X +3413(most)X +3588(conditions.)X +3 f +1380 2128(Introduction)N +1 f +892 2260(Current)N +1196(UNIX)X +1456(systems)X +1768(offer)X +1984(two)X +2163(forms)X +2409(of)X +720 2348(hashed)N +973(data)X +1137(access.)X +2 f +1413(Dbm)X +1 f +1599(and)X +1745(its)X +1850(derivatives)X +2231(provide)X +720 2436(keyed)N +939(access)X +1171(to)X +1259(disk)X +1418(resident)X +1698(data)X +1858(while)X +2 f +2062(hsearch)X +1 f +2342(pro-)X +720 2524(vides)N +929(access)X +1175(for)X +1309(memory)X +1616(resident)X +1910(data.)X +2124(These)X +2356(two)X +720 2612(access)N +979(methods)X +1302(are)X +1453(incompatible)X +1923(in)X +2037(that)X +2209(memory)X +720 2700(resident)N +1011(hash)X +1195(tables)X +1419(may)X +1593(not)X +1731(be)X +1843(stored)X +2075(on)X +2191(disk)X +2360(and)X +720 2788(disk)N +884(resident)X +1169(tables)X +1387(cannot)X +1632(be)X +1739(read)X +1909(into)X +2063(memory)X +2360(and)X +720 2876(accessed)N +1022(using)X +1215(the)X +1333(in-memory)X +1709(routines.)X +2 f +892 2990(Dbm)N +1 f +1091(has)X +1241(several)X +1512(shortcomings.)X +2026(Since)X +2247(data)X +2423(is)X +720 3078(assumed)N +1032(to)X +1130(be)X +1242(disk)X +1411(resident,)X +1721(each)X +1905(access)X +2146(requires)X +2440(a)X +720 3166(system)N +963(call,)X +1120(and)X +1257(almost)X +1491(certainly,)X +1813(a)X +1869(disk)X +2022(operation.)X +2365(For)X +720 3254(extremely)N +1072(large)X +1264(databases,)X +1623(where)X +1851(caching)X +2131(is)X +2214(unlikely)X +720 3342(to)N +810(be)X +914(effective,)X +1244(this)X +1386(is)X +1466(acceptable,)X +1853(however,)X +2177(when)X +2378(the)X +720 3430(database)N +1022(is)X +1100(small)X +1298(\(i.e.)X +1447(the)X +1569(password)X +1896(\256le\),)X +2069(performance)X +720 3518(improvements)N +1204(can)X +1342(be)X +1443(obtained)X +1744(through)X +2018(caching)X +2293(pages)X +720 3606(of)N +818(the)X +947(database)X +1255(in)X +1348(memory.)X +1685(In)X +1782(addition,)X +2 f +2094(dbm)X +1 f +2262(cannot)X +720 3694(store)N +902(data)X +1062(items)X +1261(whose)X +1492(total)X +1660(key)X +1802(and)X +1943(data)X +2102(size)X +2252(exceed)X +720 3782(the)N +850(page)X +1034(size)X +1191(of)X +1290(the)X +1420(hash)X +1599(table.)X +1827(Similarly,)X +2176(if)X +2257(two)X +2409(or)X +720 3870(more)N +907(keys)X +1076(produce)X +1357(the)X +1477(same)X +1664(hash)X +1833(value)X +2029(and)X +2166(their)X +2334(total)X +720 3958(size)N +876(exceeds)X +1162(the)X +1291(page)X +1474(size,)X +1650(the)X +1779(table)X +1966(cannot)X +2210(store)X +2396(all)X +720 4046(the)N +838(colliding)X +1142(keys.)X +892 4160(The)N +1050(in-memory)X +2 f +1439(hsearch)X +1 f +1725(routines)X +2015(have)X +2199(different)X +720 4248(shortcomings.)N +1219(First,)X +1413(the)X +1539(notion)X +1771(of)X +1865(a)X +1928(single)X +2146(hash)X +2320(table)X +720 4336(is)N +807(embedded)X +1171(in)X +1266(the)X +1397(interface,)X +1732(preventing)X +2108(an)X +2217(applica-)X +720 4424(tion)N +902(from)X +1116(accessing)X +1482(multiple)X +1806(tables)X +2050(concurrently.)X +720 4512(Secondly,)N +1063(the)X +1186(routine)X +1438(to)X +1525(create)X +1743(a)X +1804(hash)X +1976(table)X +2157(requires)X +2440(a)X +720 4600(parameter)N +1066(which)X +1286(declares)X +1573(the)X +1694(size)X +1842(of)X +1932(the)X +2053(hash)X +2223(table.)X +2422(If)X +720 4688(this)N +856(size)X +1001(is)X +1074(set)X +1183(too)X +1305(low,)X +1465(performance)X +1892(degradation)X +2291(or)X +2378(the)X +720 4776(inability)N +1008(to)X +1092(add)X +1230(items)X +1425(to)X +1509(the)X +1628(table)X +1805(may)X +1964(result.)X +2223(In)X +2311(addi-)X +720 4864(tion,)N +2 f +910(hsearch)X +1 f +1210(requires)X +1515(that)X +1681(the)X +1825(application)X +2226(allocate)X +720 4952(memory)N +1037(for)X +1181(the)X +1329(key)X +1495(and)X +1661(data)X +1845(items.)X +2108(Lastly,)X +2378(the)X +2 f +720 5040(hsearch)N +1 f +1013(routines)X +1310(provide)X +1594(no)X +1713(interface)X +2034(to)X +2135(store)X +2329(hash)X +720 5128(tables)N +927(on)X +1027(disk.)X +16 s +720 5593 MXY +864 0 Dl +2 f +8 s +760 5648(1)N +1 f +9 s +5673(UNIX)Y +990(is)X +1056(a)X +1106(registered)X +1408(trademark)X +1718(of)X +1796(AT&T.)X +10 s +2878 2128(The)N +3032(goal)X +3199(of)X +3295(our)X +3431(work)X +3625(was)X +3779(to)X +3870(design)X +4108(and)X +4253(imple-)X +2706 2216(ment)N +2900(a)X +2970(new)X +3138(package)X +3436(that)X +3590(provides)X +3899(a)X +3968(superset)X +4264(of)X +4364(the)X +2706 2304(functionality)N +3144(of)X +3240(both)X +2 f +3411(dbm)X +1 f +3578(and)X +2 f +3723(hsearch)X +1 f +3977(.)X +4045(The)X +4198(package)X +2706 2392(had)N +2871(to)X +2982(overcome)X +3348(the)X +3495(interface)X +3826(shortcomings)X +4306(cited)X +2706 2480(above)N +2930(and)X +3078(its)X +3185(implementation)X +3719(had)X +3867(to)X +3961(provide)X +4238(perfor-)X +2706 2568(mance)N +2942(equal)X +3142(or)X +3235(superior)X +3524(to)X +3612(that)X +3758(of)X +3851(the)X +3975(existing)X +4253(imple-)X +2706 2656(mentations.)N +3152(In)X +3274(order)X +3498(to)X +3614(provide)X +3913(a)X +4003(compact)X +4329(disk)X +2706 2744(representation,)N +3224(graceful)X +3531(table)X +3729(growth,)X +4018(and)X +4176(expected)X +2706 2832(constant)N +3033(time)X +3234(performance,)X +3720(we)X +3873(selected)X +4191(Litwin's)X +2706 2920(linear)N +2923(hashing)X +3206(algorithm)X +3551([LAR88,)X +3872(LIT80].)X +4178(We)X +4324(then)X +2706 3008(enhanced)N +3037(the)X +3161(algorithm)X +3498(to)X +3586(handle)X +3826(page)X +4004(over\257ows)X +4346(and)X +2706 3096(large)N +2900(key)X +3049(handling)X +3362(with)X +3537(a)X +3606(single)X +3830(mechanism,)X +4248(named)X +2706 3184(buddy-in-waiting.)N +3 f +2975 3338(Existing)N +3274(UNIX)X +3499(Hashing)X +3802(Techniques)X +1 f +2878 3470(Over)N +3076(the)X +3210(last)X +3357(decade,)X +3637(several)X +3901(dynamic)X +4213(hashing)X +2706 3558(schemes)N +3000(have)X +3174(been)X +3348(developed)X +3700(for)X +3816(the)X +3936(UNIX)X +4159(timeshar-)X +2706 3646(ing)N +2856(system,)X +3146(starting)X +3433(with)X +3622(the)X +3767(inclusion)X +4107(of)X +2 f +4221(dbm)X +1 f +4359(,)X +4426(a)X +2706 3734(minimal)N +3008(database)X +3321(library)X +3571(written)X +3834(by)X +3950(Ken)X +4120(Thompson)X +2706 3822([THOM90],)N +3141(in)X +3248(the)X +3391(Seventh)X +3694(Edition)X +3974(UNIX)X +4220(system.)X +2706 3910(Since)N +2916(then,)X +3106(an)X +3214(extended)X +3536(version)X +3804(of)X +3903(the)X +4032(same)X +4228(library,)X +2 f +2706 3998(ndbm)N +1 f +2884(,)X +2933(and)X +3078(a)X +3142(public-domain)X +3637(clone)X +3839(of)X +3934(the)X +4060(latter,)X +2 f +4273(sdbm)X +1 f +4442(,)X +2706 4086(have)N +2902(been)X +3098(developed.)X +3491(Another)X +3797 0.1645(interface-compatible)AX +2706 4174(library)N +2 f +2950(gdbm)X +1 f +3128(,)X +3178(was)X +3333(recently)X +3622(made)X +3826(available)X +4145(as)X +4241(part)X +4395(of)X +2706 4262(the)N +2829(Free)X +2997(Software)X +3312(Foundation's)X +3759(\(FSF\))X +3970(software)X +4271(distri-)X +2706 4350(bution.)N +2878 4464(All)N +3017(of)X +3121(these)X +3323(implementations)X +3893(are)X +4029(based)X +4248(on)X +4364(the)X +2706 4552(idea)N +2871(of)X +2969(revealing)X +3299(just)X +3445(enough)X +3711(bits)X +3856(of)X +3953(a)X +4019(hash)X +4196(value)X +4400(to)X +2706 4640(locate)N +2920(a)X +2978(page)X +3151(in)X +3234(a)X +3291(single)X +3503(access.)X +3770(While)X +2 f +3987(dbm/ndbm)X +1 f +4346(and)X +2 f +2706 4728(sdbm)N +1 f +2908(map)X +3079(the)X +3210(hash)X +3390(value)X +3597(directly)X +3874(to)X +3968(a)X +4036(disk)X +4201(address,)X +2 f +2706 4816(gdbm)N +1 f +2921(uses)X +3096(the)X +3231(hash)X +3414(value)X +3624(to)X +3722(index)X +3936(into)X +4096(a)X +2 f +4168(directory)X +1 f +2706 4904([ENB88])N +3020(containing)X +3378(disk)X +3531(addresses.)X +2878 5018(The)N +2 f +3033(hsearch)X +1 f +3317(routines)X +3605(in)X +3697(System)X +3962(V)X +4049(are)X +4177(designed)X +2706 5106(to)N +2804(provide)X +3085(memory-resident)X +3669(hash)X +3852(tables.)X +4115(Since)X +4328(data)X +2706 5194(access)N +2948(does)X +3131(not)X +3269(require)X +3533(disk)X +3702(access,)X +3964(simple)X +4213(hashing)X +2706 5282(schemes)N +3010(which)X +3238(may)X +3408(require)X +3667(multiple)X +3964(probes)X +4209(into)X +4364(the)X +2706 5370(table)N +2889(are)X +3015(used.)X +3209(A)X +3294(more)X +3486(interesting)X +3851(version)X +4114(of)X +2 f +4208(hsearch)X +1 f +2706 5458(is)N +2784(a)X +2845(public)X +3070(domain)X +3335(library,)X +2 f +3594(dynahash)X +1 f +3901(,)X +3945(that)X +4089(implements)X +2706 5546(Larson's)N +3036(in-memory)X +3440(adaptation)X +3822([LAR88])X +4164(of)X +4279(linear)X +2706 5634(hashing)N +2975([LIT80].)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +1 f +4424(1)X + +2 p +%%Page: 2 2 +10 s 10 xH 0 xS 1 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +2 f +1074 538(dbm)N +1 f +1232(and)X +2 f +1368(ndbm)X +1 f +604 670(The)N +2 f +760(dbm)X +1 f +928(and)X +2 f +1074(ndbm)X +1 f +1282(library)X +1526(implementations)X +2089(are)X +432 758(based)N +667(on)X +799(the)X +949(same)X +1166(algorithm)X +1529(by)X +1661(Ken)X +1846(Thompson)X +432 846([THOM90,)N +824(TOR88,)X +1113(WAL84],)X +1452(but)X +1582(differ)X +1789(in)X +1879(their)X +2054(pro-)X +432 934(grammatic)N +801(interfaces.)X +1160(The)X +1311(latter)X +1502(is)X +1581(a)X +1643(modi\256ed)X +1952(version)X +432 1022(of)N +533(the)X +665(former)X +918(which)X +1148(adds)X +1328(support)X +1601(for)X +1728(multiple)X +2027(data-)X +432 1110(bases)N +634(to)X +724(be)X +828(open)X +1011(concurrently.)X +1484(The)X +1636(discussion)X +1996(of)X +2090(the)X +432 1198(algorithm)N +774(that)X +925(follows)X +1196(is)X +1280(applicable)X +1640(to)X +1732(both)X +2 f +1904(dbm)X +1 f +2072(and)X +2 f +432 1286(ndbm)N +1 f +610(.)X +604 1400(The)N +760(basic)X +956(structure)X +1268(of)X +2 f +1366(dbm)X +1 f +1535(calls)X +1712(for)X +1836(\256xed-sized)X +432 1488(disk)N +612(blocks)X +868(\(buckets\))X +1214(and)X +1377(an)X +2 f +1499(access)X +1 f +1755(function)X +2068(that)X +432 1576(maps)N +623(a)X +681(key)X +819(to)X +902(a)X +959(bucket.)X +1234(The)X +1380(interface)X +1683(routines)X +1962(use)X +2090(the)X +2 f +432 1664(access)N +1 f +673(function)X +970(to)X +1062(obtain)X +1292(the)X +1420(appropriate)X +1816(bucket)X +2060(in)X +2152(a)X +432 1752(single)N +643(disk)X +796(access.)X +604 1866(Within)N +869(the)X +2 f +1010(access)X +1 f +1263(function,)X +1593(a)X +1672(bit-randomizing)X +432 1954(hash)N +610(function)X +2 f +8 s +877 1929(2)N +1 f +10 s +940 1954(is)N +1024(used)X +1202(to)X +1294(convert)X +1565(a)X +1631(key)X +1777(into)X +1931(a)X +1997(32-bit)X +432 2042(hash)N +605(value.)X +825(Out)X +971(of)X +1064(these)X +1254(32)X +1359(bits,)X +1519(only)X +1686(as)X +1778(many)X +1981(bits)X +2121(as)X +432 2130(necessary)N +773(are)X +900(used)X +1075(to)X +1165(determine)X +1514(the)X +1639(particular)X +1974(bucket)X +432 2218(on)N +533(which)X +750(a)X +807(key)X +944(resides.)X +1228(An)X +1347(in-memory)X +1724(bitmap)X +1967(is)X +2041(used)X +432 2306(to)N +533(determine)X +893(how)X +1070(many)X +1287(bits)X +1441(are)X +1579(required.)X +1905(Each)X +2104(bit)X +432 2394(indicates)N +746(whether)X +1033(its)X +1136(associated)X +1494(bucket)X +1736(has)X +1871(been)X +2051(split)X +432 2482(yet)N +562(\(a)X +657(0)X +728(indicating)X +1079(that)X +1230(the)X +1359(bucket)X +1604(has)X +1742(not)X +1875(yet)X +2004(split\).)X +432 2570(The)N +590(use)X +730(of)X +830(the)X +961(hash)X +1141(function)X +1441(and)X +1590(the)X +1720(bitmap)X +1974(is)X +2059(best)X +432 2658(described)N +769(by)X +878(stepping)X +1177(through)X +1454(database)X +1759(creation)X +2046(with)X +432 2746(multiple)N +718(invocations)X +1107(of)X +1194(a)X +2 f +1250(store)X +1 f +1430(operation.)X +604 2860(Initially,)N +906(the)X +1033(hash)X +1209(table)X +1394(contains)X +1690(a)X +1755(single)X +1974(bucket)X +432 2948(\(bucket)N +711(0\),)X +836(the)X +972(bit)X +1094(map)X +1270(contains)X +1575(a)X +1649(single)X +1878(bit)X +2000(\(bit)X +2148(0)X +432 3036(corresponding)N +913(to)X +997(bucket)X +1233(0\),)X +1342(and)X +1480(0)X +1542(bits)X +1699(of)X +1788(a)X +1846(hash)X +2014(value)X +432 3124(are)N +560(examined)X +901(to)X +992(determine)X +1342(where)X +1568(a)X +1633(key)X +1778(is)X +1860(placed)X +2099(\(in)X +432 3212(bucket)N +670(0\).)X +801(When)X +1017(bucket)X +1255(0)X +1319(is)X +1396(full,)X +1551(its)X +1650(bit)X +1758(in)X +1844(the)X +1966(bitmap)X +432 3300(\(bit)N +564(0\))X +652(is)X +726(set,)X +856(and)X +993(its)X +1089(contents)X +1377(are)X +1497(split)X +1655(between)X +1943(buckets)X +432 3388(0)N +499(and)X +641(1,)X +727(by)X +833(considering)X +1233(the)X +1357(0)X +2 f +7 s +3356(th)Y +10 s +1 f +1480 3388(bit)N +1590(\(the)X +1741(lowest)X +1976(bit)X +2086(not)X +432 3476(previously)N +800(examined\))X +1169(of)X +1266(the)X +1393(hash)X +1569(value)X +1772(for)X +1895(each)X +2072(key)X +432 3564(within)N +668(the)X +798(bucket.)X +1064(Given)X +1292(a)X +1359(well-designed)X +1840(hash)X +2018(func-)X +432 3652(tion,)N +613(approximately)X +1112(half)X +1273(of)X +1376(the)X +1510(keys)X +1693(will)X +1853(have)X +2041(hash)X +432 3740(values)N +666(with)X +837(the)X +964(0)X +2 f +7 s +3708(th)Y +10 s +1 f +1090 3740(bit)N +1203(set.)X +1341(All)X +1471(such)X +1646(keys)X +1821(and)X +1965(associ-)X +432 3828(ated)N +586(data)X +740(are)X +859(moved)X +1097(to)X +1179(bucket)X +1413(1,)X +1493(and)X +1629(the)X +1747(rest)X +1883(remain)X +2126(in)X +432 3916(bucket)N +666(0.)X +604 4030(After)N +804(this)X +949(split,)X +1135(the)X +1262(\256le)X +1393(now)X +1560(contains)X +1856(two)X +2005(buck-)X +432 4118(ets,)N +562(and)X +699(the)X +818(bitmap)X +1061(contains)X +1349(three)X +1530(bits:)X +1687(the)X +1805(0)X +2 f +7 s +4086(th)Y +10 s +1 f +1922 4118(bit)N +2026(is)X +2099(set)X +432 4206(to)N +525(indicate)X +810(a)X +876(bucket)X +1120(0)X +1190(split)X +1357(when)X +1561(no)X +1671(bits)X +1816(of)X +1913(the)X +2041(hash)X +432 4294(value)N +648(are)X +789(considered,)X +1199(and)X +1357(two)X +1519(more)X +1726(unset)X +1937(bits)X +2094(for)X +432 4382(buckets)N +706(0)X +775(and)X +920(1.)X +1029(The)X +1183(placement)X +1542(of)X +1638(an)X +1742(incoming)X +2072(key)X +432 4470(now)N +604(requires)X +897(examination)X +1327(of)X +1428(the)X +1560(0)X +2 f +7 s +4438(th)Y +10 s +1 f +1691 4470(bit)N +1809(of)X +1910(the)X +2041(hash)X +432 4558(value,)N +667(and)X +824(the)X +963(key)X +1119(is)X +1212(placed)X +1462(either)X +1685(in)X +1787(bucket)X +2041(0)X +2121(or)X +432 4646(bucket)N +674(1.)X +782(If)X +864(either)X +1075(bucket)X +1317(0)X +1385(or)X +1480(bucket)X +1722(1)X +1790(\256lls)X +1937(up,)X +2064(it)X +2135(is)X +432 4734(split)N +598(as)X +693(before,)X +947(its)X +1050(bit)X +1162(is)X +1243(set)X +1360(in)X +1450(the)X +1576(bitmap,)X +1846(and)X +1990(a)X +2054(new)X +432 4822(set)N +541(of)X +628(unset)X +817(bits)X +952(are)X +1071(added)X +1283(to)X +1365(the)X +1483(bitmap.)X +604 4936(Each)N +791(time)X +959(we)X +1079(consider)X +1376(a)X +1437(new)X +1596(bit)X +1705(\(bit)X +1841(n\),)X +1953(we)X +2072(add)X +432 5024(2)N +2 f +7 s +4992(n)Y +9 f +509(+)X +1 f +540(1)X +10 s +595 5024(bits)N +737(to)X +826(the)X +951(bitmap)X +1199(and)X +1341(obtain)X +1567(2)X +2 f +7 s +4992(n)Y +9 f +1644(+)X +1 f +1675(1)X +10 s +1729 5024(more)N +1920(address-)X +432 5112(able)N +595(buckets)X +869(in)X +960(the)X +1087(\256le.)X +1258(As)X +1376(a)X +1441(result,)X +1668(the)X +1795(bitmap)X +2045(con-)X +432 5200(tains)N +618(the)X +751(previous)X +1062(2)X +2 f +7 s +5168(n)Y +9 f +1139(+)X +1 f +1170(1)X +2 f +10 s +9 f +5200(-)Y +1 f +1242(1)X +1317(bits)X +1467(\(1)X +2 f +9 f +1534(+)X +1 f +1578(2)X +2 f +9 f +(+)S +1 f +1662(4)X +2 f +9 f +(+)S +1 f +1746(...)X +2 f +9 f +(+)S +1 f +1850(2)X +2 f +7 s +5168(n)Y +10 s +1 f +1931 5200(\))N +1992(which)X +432 5288(trace)N +649(the)X +807(entire)X +2 f +1050(split)X +1247(history)X +1 f +1529(of)X +1656(the)X +1813(addressable)X +16 s +432 5433 MXY +864 0 Dl +2 f +8 s +472 5488(2)N +1 f +9 s +523 5513(This)N +670(bit-randomizing)X +1153(property)X +1416(is)X +1482(important)X +1780(to)X +1854(obtain)X +2052(radi-)X +432 5593(cally)N +599(different)X +874(hash)X +1033(values)X +1244(for)X +1355(nearly)X +1562(identical)X +1836(keys,)X +2012(which)X +432 5673(in)N +506(turn)X +640(avoids)X +846(clustering)X +1148(of)X +1226(such)X +1376(keys)X +1526(in)X +1600(a)X +1650(single)X +1840(bucket.)X +10 s +2418 538(buckets.)N +2590 652(Given)N +2809(a)X +2868(key)X +3007(and)X +3146(the)X +3267(bitmap)X +3512(created)X +3768(by)X +3871(this)X +4009(algo-)X +2418 740(rithm,)N +2638(we)X +2759(\256rst)X +2910(examine)X +3209(bit)X +3320(0)X +3386(of)X +3479(the)X +3603(bitmap)X +3851(\(the)X +4002(bit)X +4112(to)X +2418 828(consult)N +2673(when)X +2871(0)X +2934(bits)X +3072(of)X +3162(the)X +3283(hash)X +3453(value)X +3650(are)X +3772(being)X +3973(exam-)X +2418 916(ined\).)N +2631(If)X +2713(it)X +2785(is)X +2866(set)X +2982(\(indicating)X +3356(that)X +3503(the)X +3628(bucket)X +3869(split\),)X +4080(we)X +2418 1004(begin)N +2617(considering)X +3012(the)X +3131(bits)X +3267(of)X +3355(the)X +3473(32-bit)X +3684(hash)X +3851(value.)X +4085(As)X +2418 1092(bit)N +2525(n)X +2587(is)X +2662(revealed,)X +2977(a)X +3035(mask)X +3226(equal)X +3422(to)X +3506(2)X +2 f +7 s +1060(n)Y +9 f +3583(+)X +1 f +3614(1)X +2 f +10 s +9 f +1092(-)Y +1 f +3686(1)X +3748(will)X +3894(yield)X +4076(the)X +2418 1180(current)N +2675(bucket)X +2918(address.)X +3228(Adding)X +3496(2)X +2 f +7 s +1148(n)Y +9 f +3573(+)X +1 f +3604(1)X +2 f +10 s +9 f +1180(-)Y +1 f +3676(1)X +3744(to)X +3834(the)X +3960(bucket)X +2418 1268(address)N +2701(identi\256es)X +3035(which)X +3272(bit)X +3397(in)X +3500(the)X +3639(bitmap)X +3902(must)X +4098(be)X +2418 1356(checked.)N +2743(We)X +2876(continue)X +3173(revealing)X +3493(bits)X +3628(of)X +3715(the)X +3833(hash)X +4000(value)X +2418 1444(until)N +2591(all)X +2698(set)X +2814(bits)X +2955(in)X +3043(the)X +3167(bitmap)X +3415(are)X +3540(exhausted.)X +3907(The)X +4058(fol-)X +2418 1532(lowing)N +2682(algorithm,)X +3055(a)X +3133(simpli\256cation)X +3614(of)X +3723(the)X +3863(algorithm)X +2418 1620(due)N +2565(to)X +2658(Ken)X +2823(Thompson)X +3196([THOM90,)X +3590(TOR88],)X +3908(uses)X +4076(the)X +2418 1708(hash)N +2625(value)X +2839(and)X +2995(the)X +3133(bitmap)X +3395(to)X +3497(calculate)X +3823(the)X +3960(bucket)X +2418 1796(address)N +2679(as)X +2766(discussed)X +3093(above.)X +0(Courier)xf 0 f +1 f +0 f +8 s +2418 2095(hash)N +2608(=)X +2684 -0.4038(calchash\(key\);)AX +2418 2183(mask)N +2608(=)X +2684(0;)X +2418 2271(while)N +2646 -0.4018(\(isbitset\(\(hash)AX +3254(&)X +3330(mask\))X +3558(+)X +3634(mask\)\))X +2706 2359(mask)N +2896(=)X +2972(\(mask)X +3200(<<)X +3314(1\))X +3428(+)X +3504(1;)X +2418 2447(bucket)N +2684(=)X +2760(hash)X +2950(&)X +3026(mask;)X +2 f +10 s +3211 2812(sdbm)N +1 f +2590 2944(The)N +2 f +2738(sdbm)X +1 f +2930(library)X +3167(is)X +3243(a)X +3302(public-domain)X +3791(clone)X +3987(of)X +4076(the)X +2 f +2418 3032(ndbm)N +1 f +2638(library,)X +2914(developed)X +3286(by)X +3408(Ozan)X +3620(Yigit)X +3826(to)X +3929(provide)X +2 f +2418 3120(ndbm)N +1 f +2596('s)X +2692(functionality)X +3139(under)X +3359(some)X +3565(versions)X +3869(of)X +3973(UNIX)X +2418 3208(that)N +2559(exclude)X +2830(it)X +2894(for)X +3008(licensing)X +3317(reasons)X +3578([YIG89].)X +3895(The)X +4040(pro-)X +2418 3296(grammer)N +2735(interface,)X +3064(and)X +3207(the)X +3332(basic)X +3524(structure)X +3832(of)X +2 f +3926(sdbm)X +1 f +4121(is)X +2418 3384(identical)N +2733(to)X +2 f +2834(ndbm)X +1 f +3051(but)X +3192(internal)X +3476(details)X +3723(of)X +3828(the)X +2 f +3964(access)X +1 f +2418 3472(function,)N +2726(such)X +2894(as)X +2982(the)X +3101(calculation)X +3474(of)X +3561(the)X +3679(bucket)X +3913(address,)X +2418 3560(and)N +2563(the)X +2690(use)X +2825(of)X +2920(different)X +3225(hash)X +3400(functions)X +3726(make)X +3928(the)X +4054(two)X +2418 3648(incompatible)N +2856(at)X +2934(the)X +3052(database)X +3349(level.)X +2590 3762(The)N +2 f +2740(sdbm)X +1 f +2934(library)X +3173(is)X +3251(based)X +3458(on)X +3562(a)X +3622(simpli\256ed)X +3965(imple-)X +2418 3850(mentation)N +2778(of)X +2885(Larson's)X +3206(1978)X +2 f +3406(dynamic)X +3717(hashing)X +1 f +4009(algo-)X +2418 3938(rithm)N +2616(including)X +2943(the)X +2 f +3066(re\256nements)X +3461(and)X +3605(variations)X +1 f +3953(of)X +4044(sec-)X +2418 4026(tion)N +2562(5)X +2622([LAR78].)X +2956(Larson's)X +3257(original)X +3526(algorithm)X +3857(calls)X +4024(for)X +4138(a)X +2418 4114(forest)N +2635(of)X +2736(binary)X +2975(hash)X +3156(trees)X +3341(that)X +3494(are)X +3626(accessed)X +3941(by)X +4054(two)X +2418 4202(hash)N +2586(functions.)X +2925(The)X +3071(\256rst)X +3216(hash)X +3384(function)X +3672(selects)X +3907(a)X +3964(partic-)X +2418 4290(ular)N +2571(tree)X +2720(within)X +2952(the)X +3078(forest.)X +3309(The)X +3462(second)X +3713(hash)X +3887(function,)X +2418 4378(which)N +2659(is)X +2757(required)X +3070(to)X +3177(be)X +3297(a)X +3377(boolean)X +3675(pseudo-random)X +2418 4466(number)N +2687(generator)X +3015(that)X +3159(is)X +3236(seeded)X +3479(by)X +3583(the)X +3705(key,)X +3865(is)X +3942(used)X +4112(to)X +2418 4554(traverse)N +2733(the)X +2890(tree)X +3070(until)X +3275(internal)X +3579(\(split\))X +3829(nodes)X +4075(are)X +2418 4642(exhausted)N +2763(and)X +2903(an)X +3003(external)X +3286(\(non-split\))X +3648(node)X +3827(is)X +3903(reached.)X +2418 4730(The)N +2571(bucket)X +2813(addresses)X +3149(are)X +3276(stored)X +3500(directly)X +3772(in)X +3861(the)X +3986(exter-)X +2418 4818(nal)N +2536(nodes.)X +2590 4932(Larson's)N +2903(re\256nements)X +3309(are)X +3440(based)X +3655(on)X +3767(the)X +3897(observa-)X +2418 5020(tion)N +2570(that)X +2718(the)X +2844(nodes)X +3059(can)X +3199(be)X +3303(represented)X +3702(by)X +3809(a)X +3872(single)X +4090(bit)X +2418 5108(that)N +2569(is)X +2653(set)X +2773(for)X +2898(internal)X +3174(nodes)X +3392(and)X +3539(not)X +3672(set)X +3791(for)X +3915(external)X +2418 5196(nodes,)N +2652(resulting)X +2959(in)X +3048(a)X +3111(radix)X +3303(search)X +3536(trie.)X +3709(Figure)X +3944(1)X +4010(illus-)X +2418 5284(trates)N +2621(this.)X +2804(Nodes)X +3037(A)X +3123(and)X +3267(B)X +3348(are)X +3475(internal)X +3748(\(split\))X +3967(nodes,)X +2418 5372(thus)N +2573(having)X +2813(no)X +2915(bucket)X +3151(addresses)X +3480(associated)X +3831(with)X +3994(them.)X +2418 5460(Instead,)N +2693(the)X +2814(external)X +3096(nodes)X +3306(\(C,)X +3429(D,)X +3530(and)X +3669(E\))X +3768(each)X +3938(need)X +4112(to)X +2418 5548(refer)N +2594(to)X +2679(a)X +2738(bucket)X +2975(address.)X +3279(These)X +3494(bucket)X +3731(addresses)X +4062(can)X +2418 5636(be)N +2529(stored)X +2760(in)X +2857(the)X +2990(trie)X +3132(itself)X +3327(where)X +3559(the)X +3691(subtries)X +3974(would)X +3 f +432 5960(2)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +3 p +%%Page: 3 3 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(live)N +862(if)X +933(they)X +1092(existed)X +1340([KNU68].)X +1709(For)X +1841(example,)X +2154(if)X +2224(nodes)X +2432(F)X +720 626(and)N +858(G)X +938(were)X +1117(the)X +1237(children)X +1522(of)X +1610(node)X +1787(C,)X +1881(the)X +2000(bucket)X +2235(address)X +720 714(L00)N +886(could)X +1101(reside)X +1330(in)X +1429(the)X +1563(bits)X +1714(that)X +1870(will)X +2030(eventually)X +2400(be)X +720 802(used)N +887(to)X +969(store)X +1145(nodes)X +1352(F)X +1416(and)X +1552(G)X +1630(and)X +1766(all)X +1866(their)X +2033(children.)X +10 f +720 890 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1894 2247(L1)N +784 1925(A)N +1431(E)X +1106 2247(D)N +1428 1281(C)N +1109 1603(B)N +1884 1930(L01)N +1879 1286(L00)N +1221 1814(1)N +903 2131(1)N +1221 1402(0)N +903 1714(0)N +1 Dt +1397 1821 MXY +-8 -32 Dl +-5 19 Dl +-20 6 Dl +33 7 Dl +-187 -182 Dl +1397 1322 MXY +-33 7 Dl +20 6 Dl +5 19 Dl +8 -32 Dl +-187 182 Dl +1069 1639 MXY +-32 7 Dl +20 6 Dl +5 19 Dl +7 -32 Dl +-186 182 Dl +1374 1891 MXY +185 Dc +1779 2133 MXY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1811 MY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1166 MY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1052 2213 MXY +185 Dc +1569 MY +185 Dc +720 1881 MXY +185 Dc +1779 2213 MXY +-28 -17 Dl +10 17 Dl +-10 18 Dl +28 -18 Dl +-543 0 Dl +1769 1891 MXY +-28 -18 Dl +10 18 Dl +-10 18 Dl +28 -18 Dl +-201 0 Dl +1364 1247 MXY +185 Dc +1769 MX +-28 -18 Dl +10 18 Dl +-10 18 Dl +28 -18 Dl +-201 0 Dl +1064 2143 MXY +-7 -32 Dl +-5 19 Dl +-20 6 Dl +32 7 Dl +-181 -181 Dl +3 Dt +-1 Ds +8 s +720 2482(Figure)N +925(1:)X +1 f +1002(Radix)X +1179(search)X +1365(trie)X +1474(with)X +1612(internal)X +1831(nodes)X +2004(A)X +2074(and)X +2189(B,)X +2271(external)X +720 2570(nodes)N +891(C,)X +972(D,)X +1056(and)X +1170(E,)X +1247(and)X +1361(bucket)X +1553(addresses)X +1819(stored)X +1997(in)X +2069(the)X +2168(unused)X +2370(por-)X +720 2658(tion)N +836(of)X +905(the)X +999(trie.)X +10 s +10 f +720 2922 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +892 3124(Further)N +1153(simpli\256cations)X +1647(of)X +1738(the)X +1860(above)X +2076([YIG89])X +2377(are)X +720 3212(possible.)N +1038(Using)X +1265(a)X +1337(single)X +1564(radix)X +1765(trie)X +1908(to)X +2006(avoid)X +2219(the)X +2352(\256rst)X +720 3300(hash)N +904(function,)X +1227(replacing)X +1562(the)X +1696(pseudo-random)X +2231(number)X +720 3388(generator)N +1052(with)X +1222(a)X +1286(well)X +1452(designed,)X +1785(bit-randomizing)X +2329(hash)X +720 3476(function,)N +1053(and)X +1215(using)X +1434(the)X +1578(portion)X +1855(of)X +1967(the)X +2110(hash)X +2302(value)X +720 3564(exposed)N +1021(during)X +1268(the)X +1404(trie)X +1549(traversal)X +1864(as)X +1969(a)X +2042(direct)X +2262(bucket)X +720 3652(address)N +990(results)X +1228(in)X +1319(an)X +2 f +1424(access)X +1 f +1663(function)X +1959(that)X +2108(works)X +2333(very)X +720 3740(similar)N +974(to)X +1068(Thompson's)X +1499(algorithm)X +1841(above.)X +2084(The)X +2240(follow-)X +720 3828(ing)N +847(algorithm)X +1183(uses)X +1346(the)X +1469(hash)X +1641(value)X +1840(to)X +1927(traverse)X +2206(a)X +2266(linear-)X +720 3916(ized)N +874(radix)X +1059(trie)X +2 f +8 s +1166 3891(3)N +1 f +10 s +1218 3916(starting)N +1478(at)X +1556(the)X +1674(0)X +2 f +7 s +3884(th)Y +10 s +1 f +1791 3916(bit.)N +0 f +8 s +720 4215(tbit)N +910(=)X +986(0;)X +1296(/*)X +1410(radix)X +1638(trie)X +1828(index)X +2056(*/)X +720 4303(hbit)N +910(=)X +986(0;)X +1296(/*)X +1410(hash)X +1600(bit)X +1752(index)X +2056(*/)X +720 4391(mask)N +910(=)X +986(0;)X +720 4479(hash)N +910(=)X +986 -0.4038(calchash\(key\);)AX +720 4655(for)N +872(\(mask)X +1100(=)X +1176(0;)X +910 4743 -0.4018(isbitset\(tbit\);)AN +910 4831(mask)N +1100(=)X +1176(\(mask)X +1404(<<)X +1518(1\))X +1632(+)X +1708(1\))X +1008 4919(if)N +1122(\(hash)X +1350(&)X +1426(\(1)X +1540(<<)X +1654 -0.4219(hbit++\)\)\))AX +1160 5007(/*)N +1274(right)X +1502(son)X +1692(*/)X +1160 5095(tbit)N +1350(=)X +1426(2)X +1502(*)X +1578(tbit)X +1768(+)X +1844(2;)X +1008 5183(else)N +1 f +16 s +720 5353 MXY +864 0 Dl +2 f +8 s +760 5408(3)N +1 f +9 s +818 5433(A)N +896(linearized)X +1206(radix)X +1380(trie)X +1502(is)X +1576(merely)X +1802(an)X +1895(array)X +2068(representation)X +720 5513(of)N +800(the)X +908(radix)X +1076(search)X +1280(trie)X +1396(described)X +1692(above.)X +1920(The)X +2052(children)X +2308(of)X +2388(the)X +720 5593(node)N +885(with)X +1038(index)X +1223(i)X +1267(can)X +1391(be)X +1483(found)X +1675(at)X +1751(the)X +1863(nodes)X +2055(indexed)X +2307(2*i+1)X +720 5673(and)N +842(2*i+2.)X +0 f +8 s +3146 538(/*)N +3260(left)X +3450(son)X +3678(*/)X +3146 626(tbit)N +3336(=)X +3412(2)X +3488(*)X +3564(tbit)X +3754(+)X +3830(1;)X +2706 802(bucket)N +2972(=)X +3048(hash)X +3238(&)X +3314(mask;)X +2 f +10 s +3495 1167(gdbm)N +1 f +2878 1299(The)N +3027(gdbm)X +3233(\(GNU)X +3458(data)X +3616(base)X +3783(manager\))X +4111(library)X +4349(is)X +4426(a)X +2706 1387(UNIX)N +2933(database)X +3236(manager)X +3539(written)X +3792(by)X +3897(Philip)X +4112(A.)X +4215(Nelson,)X +2706 1475(and)N +2848(made)X +3048(available)X +3364(as)X +3457(a)X +3518(part)X +3668(of)X +3760(the)X +3883(FSF)X +4040(software)X +4342(dis-)X +2706 1563(tribution.)N +3052(The)X +3207(gdbm)X +3419(library)X +3663(provides)X +3969(the)X +4097(same)X +4292(func-)X +2706 1651(tionality)N +3028(of)X +3151(the)X +2 f +3304(dbm)X +1 f +3442(/)X +2 f +3464(ndbm)X +1 f +3697(libraries)X +4015([NEL90])X +4360(but)X +2706 1739(attempts)N +3018(to)X +3121(avoid)X +3340(some)X +3550(of)X +3658(their)X +3846(shortcomings.)X +4337(The)X +2706 1827(gdbm)N +2918(library)X +3162(allows)X +3401(for)X +3525(arbitrary-length)X +4059(data,)X +4242(and)X +4387(its)X +2706 1915(database)N +3027(is)X +3124(a)X +3203(singular,)X +3524(non-sparse)X +2 f +8 s +3872 1890(4)N +1 f +10 s +3947 1915(\256le.)N +4112(The)X +4280(gdbm)X +2706 2003(library)N +2947(also)X +3103(includes)X +2 f +3396(dbm)X +1 f +3560(and)X +2 f +3702(ndbm)X +1 f +3906(compatible)X +4288(inter-)X +2706 2091(faces.)N +2878 2205(The)N +3025(gdbm)X +3229(library)X +3465(is)X +3540(based)X +3745(on)X +2 f +3847(extensible)X +4189(hashing)X +1 f +4442(,)X +2706 2293(a)N +2766(dynamic)X +3066(hashing)X +3339(algorithm)X +3674(by)X +3778(Fagin)X +3984(et)X +4066(al)X +4148([FAG79].)X +2706 2381(This)N +2881(algorithm)X +3225(differs)X +3467(from)X +3655(the)X +3785(previously)X +4155(discussed)X +2706 2469(algorithms)N +3069(in)X +3152(that)X +3293(it)X +3358(uses)X +3517(a)X +2 f +3574(directory)X +1 f +3889(that)X +4030(is)X +4103(a)X +4159(collapsed)X +2706 2557(representation)N +3192([ENB88])X +3517(of)X +3615(the)X +3744(radix)X +3940(search)X +4177(trie)X +4315(used)X +2706 2645(by)N +2 f +2806(sdbm)X +1 f +2975(.)X +10 f +2706 2733 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +7 s +3572 3761(L1)N +1 Dt +3485 3738 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-400 0 Dl +3180 3027 MXY +136 Dc +2706 3494 MXY +136 Dc +2950 3264 MXY +136 Dc +3738 MY +136 Dc +3485 2968 MXY +0 118 Dl +238 0 Dl +0 -118 Dl +-238 0 Dl +3442 MY +0 119 Dl +238 0 Dl +0 -119 Dl +-238 0 Dl +3679 MY +0 119 Dl +238 0 Dl +0 -119 Dl +-238 0 Dl +3187 3501 MXY +136 Dc +2963 3316 MXY +-24 5 Dl +15 4 Dl +4 15 Dl +5 -24 Dl +-137 134 Dl +3204 3083 MXY +-24 5 Dl +15 4 Dl +3 14 Dl +6 -23 Dl +-137 133 Dl +3204 3450 MXY +-6 -24 Dl +-3 14 Dl +-15 5 Dl +24 5 Dl +-137 -134 Dl +2842 3369(0)N +3075 3139(0)N +2842 3676(1)N +3075 3443(1)N +3562 3054(L00)N +3565 3528(L01)N +4197 2968 MXY +0 118 Dl +237 0 Dl +0 -118 Dl +-237 0 Dl +3205 MY +0 119 Dl +237 0 Dl +0 -119 Dl +-237 0 Dl +3561 MY +0 118 Dl +237 0 Dl +0 -118 Dl +-237 0 Dl +3960 2909 MXY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3146 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3383 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3620 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +4197 3027 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-119 0 Dl +4197 3264 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-119 0 Dl +3501 MY +59 0 Dl +0 89 Dl +4078 3738 MXY +59 0 Dl +0 -88 Dl +4197 3590 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-60 0 Dl +4197 3650 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-60 0 Dl +3991 3050(00)N +3991 3287(01)N +3991 3524(10)N +3991 3761(11)N +4269 3050(L00)N +4269 3287(L01)N +4283 3643(L1)N +3485 3501 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-155 0 Dl +3485 3027 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-163 0 Dl +2967 3687 MXY +-5 -24 Dl +-4 14 Dl +-15 4 Dl +24 6 Dl +-141 -141 Dl +3 Dt +-1 Ds +8 s +2706 4033(Figure)N +2903(2:)X +1 f +2972(A)X +3034(radix)X +3181(search)X +3359(trie)X +3460(and)X +3568(a)X +3612(directory)X +3858(representing)X +4189(the)X +4283(trie.)X +10 s +10 f +2706 4209 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +2878 4411(In)N +2968(this)X +3106(algorithm,)X +3460(a)X +3519(directory)X +3832(consists)X +4108(of)X +4198(a)X +4256(search)X +2706 4499(trie)N +2847(of)X +2947(depth)X +2 f +3158(n)X +1 f +3211(,)X +3264(containing)X +3635(2)X +2 f +7 s +4467(n)Y +10 s +1 f +3749 4499(bucket)N +3996(addresses)X +4337(\(i.e.)X +2706 4587(each)N +2897(element)X +3194(of)X +3304(the)X +3445(trie)X +3594(is)X +3689(a)X +3767(bucket)X +4023(address\).)X +4373(To)X +2706 4675(access)N +2935(the)X +3056(hash)X +3226(table,)X +3425(a)X +3483(32-bit)X +3696(hash)X +3865(value)X +4061(is)X +4136(calculated)X +2706 4763(and)N +2 f +2861(n)X +1 f +2953(bits)X +3107(of)X +3213(the)X +3350(value)X +3563(are)X +3701(used)X +3886(to)X +3986(index)X +4202(into)X +4364(the)X +2706 4851(directory)N +3018(to)X +3102(obtain)X +3324(a)X +3382(bucket)X +3618(address.)X +3921(It)X +3992(is)X +4067(important)X +4400(to)X +2706 4939(note)N +2866(that)X +3008(multiple)X +3296(entries)X +3532(of)X +3620(this)X +3756(directory)X +4067(may)X +4226(contain)X +2706 5027(the)N +2833(same)X +3026(bucket)X +3268(address)X +3537(as)X +3632(a)X +3696(result)X +3902(of)X +3997(directory)X +4315(dou-)X +2706 5115(bling)N +2903(during)X +3145(bucket)X +3392(splitting.)X +3706(Figure)X +3948(2)X +4021(illustrates)X +4364(the)X +2706 5203(relationship)N +3126(between)X +3436(a)X +3513(typical)X +3772(\(skewed\))X +4108(search)X +4355(trie)X +2706 5291(and)N +2850(its)X +2953(directory)X +3271(representation.)X +3774(The)X +3927(formation)X +4270(of)X +4364(the)X +2706 5379(directory)N +3016(shown)X +3245(in)X +3327(the)X +3445(\256gure)X +3652(is)X +3725(as)X +3812(follows.)X +16 s +2706 5593 MXY +864 0 Dl +2 f +8 s +2746 5648(4)N +1 f +9 s +2796 5673(It)N +2858(does)X +3008(not)X +3118(contain)X +3348(holes.)X +3 f +10 s +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(3)X + +4 p +%%Page: 4 4 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +604 538(Initially,)N +937(there)X +1158(is)X +1271(one)X +1446(slot)X +1620(in)X +1741(the)X +1898(directory)X +432 626(addressing)N +802(a)X +865(single)X +1083(bucket.)X +1364(The)X +1515(depth)X +1719(of)X +1812(the)X +1936(trie)X +2069(is)X +2148(0)X +432 714(and)N +577(0)X +646(bits)X +790(of)X +886(each)X +1063(hash)X +1239(value)X +1442(are)X +1570(examined)X +1910(to)X +2000(deter-)X +432 802(mine)N +624(in)X +718(which)X +946(bucket)X +1192(to)X +1286(place)X +1488(a)X +1556(key;)X +1726(all)X +1837(keys)X +2015(go)X +2126(in)X +432 890(bucket)N +682(0.)X +797(When)X +1024(this)X +1174(bucket)X +1423(is)X +1511(full,)X +1677(its)X +1787(contents)X +2089(are)X +432 978(divided)N +698(between)X +992(L0)X +1107(and)X +1249(L1)X +1363(as)X +1455(was)X +1605(done)X +1786(in)X +1873(the)X +1996(previ-)X +432 1066(ously)N +664(discussed)X +1030(algorithms.)X +1471(After)X +1700(this)X +1874(split,)X +2090(the)X +432 1154(address)N +710(of)X +814(the)X +948(second)X +1207(bucket)X +1457(must)X +1648(be)X +1760(stored)X +1992(in)X +2090(the)X +432 1242(directory.)N +796(To)X +939(accommodate)X +1438(the)X +1589(new)X +1776(address,)X +2090(the)X +432 1330(directory)N +752(is)X +835(split)X +2 f +8 s +972 1305(5)N +1 f +10 s +1330(,)Y +1054(by)X +1163(doubling)X +1476(it,)X +1569(thus)X +1731(increasing)X +2090(the)X +432 1418(depth)N +630(of)X +717(the)X +835(directory)X +1145(by)X +1245(one.)X +604 1532(After)N +813(this)X +967(split,)X +1163(a)X +1237(single)X +1466(bit)X +1588(of)X +1693(the)X +1829(hash)X +2014(value)X +432 1620(needs)N +663(to)X +773(be)X +896(examined)X +1255(to)X +1364(decide)X +1621(whether)X +1927(the)X +2072(key)X +432 1708(belongs)N +711(to)X +803(L0)X +922(or)X +1019(L1.)X +1158(Once)X +1358(one)X +1504(of)X +1601(these)X +1795(buckets)X +2069(\256lls)X +432 1796(\(L0)N +578(for)X +702(example\),)X +1051(it)X +1125(is)X +1208(split)X +1375(as)X +1472(before,)X +1728(and)X +1873(the)X +2000(direc-)X +432 1884(tory)N +585(is)X +662(split)X +823(again)X +1021(to)X +1107(make)X +1305(room)X +1498(for)X +1615(the)X +1736(address)X +2000(of)X +2090(the)X +432 1972(third)N +618(bucket.)X +927(This)X +1104(splitting)X +1400(causes)X +1645(the)X +1778(addresses)X +2121(of)X +432 2060(the)N +567(non-splitting)X +1012(bucket)X +1263(\(L1\))X +1443(to)X +1541(be)X +1653(duplicated.)X +2063(The)X +432 2148(directory)N +766(now)X +948(has)X +1099(four)X +1277(entries,)X +1555(a)X +1635(depth)X +1857(of)X +1968(2,)X +2072(and)X +432 2236(indexes)N +700(the)X +821(buckets)X +1089(L00,)X +1261(L01)X +1413(and)X +1552(L1,)X +1684(as)X +1774(shown)X +2006(in)X +2090(the)X +432 2324(Figure)N +661(2.)X +604 2438(The)N +756(crucial)X +1002(part)X +1154(of)X +1247(the)X +1371(algorithm)X +1708(is)X +1787(the)X +1911(observa-)X +432 2526(tion)N +580(that)X +724(L1)X +837(is)X +914(addressed)X +1255(twice)X +1453(in)X +1539(the)X +1661(directory.)X +1995(If)X +2073(this)X +432 2614(bucket)N +679(were)X +869(to)X +964(split)X +1134(now,)X +1324(the)X +1454(directory)X +1776(already)X +2045(con-)X +432 2702(tains)N +611(room)X +808(to)X +898(hold)X +1067(the)X +1192(address)X +1460(of)X +1554(the)X +1679(new)X +1840(bucket.)X +2121(In)X +432 2790(general,)N +711(the)X +831(relationship)X +1231(between)X +1521(the)X +1641(directory)X +1953(and)X +2090(the)X +432 2878(number)N +704(of)X +798(bucket)X +1039(addresses)X +1374(contained)X +1713(therein)X +1962(is)X +2041(used)X +432 2966(to)N +517(decide)X +750(when)X +947(to)X +1031(split)X +1190(the)X +1310(directory.)X +1662(Each)X +1845(bucket)X +2081(has)X +432 3054(a)N +505(depth,)X +740(\()X +2 f +767(n)X +7 s +3070(b)Y +10 s +1 f +848 3054(\),)N +932(associated)X +1299(with)X +1478(it)X +1558(and)X +1710(appears)X +1992(in)X +2090(the)X +432 3142(directory)N +744(exactly)X +998(2)X +2 f +7 s +3106(n)Y +9 f +1075(-)X +2 f +1106(n)X +4 s +3110(b)Y +7 s +1 f +10 s +1181 3142(times.)N +1396(When)X +1610(a)X +1668(bucket)X +1904(splits,)X +2113(its)X +432 3230(depth)N +638(increases)X +961(by)X +1069(one.)X +1253(The)X +1406(directory)X +1724(must)X +1907(split)X +2072(any)X +432 3318(time)N +602(a)X +665(bucket's)X +964(depth)X +1169(exceeds)X +1451(the)X +1576(depth)X +1781(of)X +1875(the)X +2000(direc-)X +432 3406(tory.)N +630(The)X +784(following)X +1123(code)X +1303(fragment)X +1621(helps)X +1818(to)X +1908(illustrate)X +432 3494(the)N +554(extendible)X +912(hashing)X +1185(algorithm)X +1520([FAG79])X +1838(for)X +1955(access-)X +432 3582(ing)N +554(individual)X +898(buckets)X +1163(and)X +1299(maintaining)X +1701(the)X +1819(directory.)X +0 f +8 s +432 3881(hash)N +622(=)X +698 -0.4038(calchash\(key\);)AX +432 3969(mask)N +622(=)X +698 -0.4018(maskvec[depth];)AX +432 4145(bucket)N +698(=)X +774 -0.4038(directory[hash)AX +1344(&)X +1420(mask];)X +432 4321(/*)N +546(Key)X +698 -0.4219(Insertion)AX +1078(*/)X +432 4409(if)N +546 -0.4038(\(store\(bucket,)AX +1116(key,)X +1306(data\))X +1534(==)X +1648(FAIL\))X +1876({)X +720 4497(newbl)N +948(=)X +1024 -0.4167(getpage\(\);)AX +720 4585 -0.4000(bucket->depth++;)AN +720 4673 -0.4091(newbl->depth)AN +1214(=)X +1290 -0.4038(bucket->depth;)AX +720 4761(if)N +834 -0.4038(\(bucket->depth)AX +1404(>)X +1480(depth\))X +1746({)X +1008 4849(/*)N +1122(double)X +1388 -0.4219(directory)AX +1768(*/)X +1008 4937(depth++;)N +1 f +16 s +432 5033 MXY +864 0 Dl +2 f +8 s +472 5088(5)N +1 f +9 s +534 5113(This)N +692(decision)X +962(to)X +1048(split)X +1202(the)X +1319(directory)X +1608(is)X +1685(based)X +1878(on)X +1979(a)X +2040(com-)X +432 5193(parison)N +666(of)X +748(the)X +858(depth)X +1040(of)X +1121(the)X +1230(page)X +1387(being)X +1568(split)X +1713(and)X +1838(the)X +1947(depth)X +2128(of)X +432 5273(the)N +543(trie.)X +698(In)X +781(Figure)X +992(2,)X +1069(the)X +1180(depths)X +1390(of)X +1472(both)X +1622(L00)X +1760(and)X +1886(L01)X +2024(are)X +2134(2,)X +432 5353(whereas)N +689(the)X +798(depth)X +979(of)X +1060(L1)X +1161(is)X +1230(1.)X +1323(Therefore,)X +1646(if)X +1710(L1)X +1810(were)X +1970(to)X +2046(split,)X +432 5433(the)N +543(directory)X +826(would)X +1029(not)X +1144(need)X +1303(to)X +1382(split.)X +1565(In)X +1648(reality,)X +1872(a)X +1926(bucket)X +2140(is)X +432 5513(allocated)N +727(for)X +846(the)X +969(directory)X +1264(at)X +1351(the)X +1474(time)X +1637(of)X +1732(\256le)X +1858(creation)X +2124(so)X +432 5593(although)N +707(the)X +818(directory)X +1100(splits)X +1274(logically,)X +1566(physical)X +1828(splits)X +2002(do)X +2096(not)X +432 5673(occur)N +610(until)X +760(the)X +866(\256le)X +976(becomes)X +1246(quite)X +1408(large.)X +0 f +8 s +2994 538 -0.4219(directory)AN +3374(=)X +3450 -0.3971(double\(directory\);)AX +2706 626(})N +2706 714 -0.3958(splitbucket\(bucket,)AN +3466(newbl\))X +2706 802(...)N +2418 890(})N +2 f +10 s +3169 1255(hsearch)N +1 f +2590 1387(Since)N +2 f +2807(hsearch)X +1 f +3100(does)X +3286(not)X +3427(have)X +3617(to)X +3717(translate)X +4027(hash)X +2418 1475(values)N +2659(into)X +2819(disk)X +2988(addresses,)X +3352(it)X +3432(can)X +3579(use)X +3721(much)X +3934(simpler)X +2418 1563(algorithms)N +2808(than)X +2994(those)X +3211(de\256ned)X +3495(above.)X +3775(System)X +4058(V's)X +2 f +2418 1651(hsearch)N +1 f +2708(constructs)X +3069(a)X +3141(\256xed-size)X +3489(hash)X +3671(table)X +3862(\(speci\256ed)X +2418 1739(by)N +2519(the)X +2637(user)X +2791(at)X +2869(table)X +3045(creation\).)X +3391(By)X +3504(default,)X +3767(a)X +3823(multiplica-)X +2418 1827(tive)N +2570(hash)X +2748(function)X +3046(based)X +3260(on)X +3371(that)X +3522(described)X +3861(in)X +3954(Knuth,)X +2418 1915(Volume)N +2710(3,)X +2804(section)X +3065(6.4)X +3199([KNU68])X +3541(is)X +3628(used)X +3809(to)X +3905(obtain)X +4138(a)X +2418 2003(primary)N +2694(bucket)X +2930(address.)X +3233(If)X +3309(this)X +3446(bucket)X +3681(is)X +3755(full,)X +3907(a)X +3964(secon-)X +2418 2091(dary)N +2593(multiplicative)X +3069(hash)X +3248(value)X +3454(is)X +3538(computed)X +3885(to)X +3978(de\256ne)X +2418 2179(the)N +2542(probe)X +2751(interval.)X +3062(The)X +3213(probe)X +3422(interval)X +3693(is)X +3772(added)X +3989(to)X +4076(the)X +2418 2267(original)N +2712(bucket)X +2971(address)X +3257(\(modulo)X +3573(the)X +3716(table)X +3916(size\))X +4112(to)X +2418 2355(obtain)N +2658(a)X +2734(new)X +2908(bucket)X +3162(address.)X +3483(This)X +3665(process)X +3946(repeats)X +2418 2443(until)N +2588(an)X +2688(empty)X +2911(bucket)X +3148(is)X +3224(found.)X +3474(If)X +3551(no)X +3654(bucket)X +3891(is)X +3967(found,)X +2418 2531(an)N +2514(insertion)X +2814(fails)X +2972(with)X +3134(a)X +3190(``table)X +3420(full'')X +3605(condition.)X +2590 2645(The)N +2768(basic)X +2986(algorithm)X +3350(may)X +3541(be)X +3670(modi\256ed)X +4006(by)X +4138(a)X +2418 2733(number)N +2705(of)X +2813(compile)X +3112(time)X +3295(options)X +3571(available)X +3902(to)X +4005(those)X +2418 2821(users)N +2604(with)X +2767(AT&T)X +3006(source)X +3237(code.)X +3450(First,)X +3637(the)X +3756(package)X +4040(pro-)X +2418 2909(vides)N +2638(two)X +2809(options)X +3094(for)X +3238(hash)X +3435(functions.)X +3803(Users)X +4036(may)X +2418 2997(specify)N +2690(their)X +2877(own)X +3055(hash)X +3242(function)X +3549(by)X +3669(compiling)X +4032(with)X +2418 3085(``USCR'')N +2757(de\256ned)X +3016(and)X +3155(declaring)X +3477(and)X +3616(de\256ning)X +3901(the)X +4022(vari-)X +2418 3173(able)N +2 f +2578(hcompar)X +1 f +2863(,)X +2909(a)X +2971(function)X +3263(taking)X +3488(two)X +3633(string)X +3840(arguments)X +2418 3261(and)N +2560(returning)X +2880(an)X +2982(integer.)X +3271(Users)X +3480(may)X +3643(also)X +3797(request)X +4054(that)X +2418 3349(hash)N +2587(values)X +2814(be)X +2912(computed)X +3250(simply)X +3489(by)X +3590(taking)X +3811(the)X +3930(modulo)X +2418 3437(of)N +2521(key)X +2673(\(using)X +2909(division)X +3201(rather)X +3424(than)X +3597(multiplication)X +4080(for)X +2418 3525(hash)N +2589(value)X +2787(calculation\).)X +3230(If)X +3308(this)X +3447(technique)X +3783(is)X +3859(used,)X +4049(col-)X +2418 3613(lisions)N +2651(are)X +2775(resolved)X +3072(by)X +3176(scanning)X +3485(sequentially)X +3896(from)X +4076(the)X +2418 3701(selected)N +2702(bucket)X +2941(\(linear)X +3176(probing\).)X +3517(This)X +3684(option)X +3913(is)X +3991(avail-)X +2418 3789(able)N +2572(by)X +2672(de\256ning)X +2954(the)X +3072(variable)X +3351(``DIV'')X +3622(at)X +3700(compile)X +3978(time.)X +2590 3903(A)N +2720(second)X +3015(option,)X +3311(based)X +3565(on)X +3716(an)X +3863(algorithm)X +2418 3991(discovered)N +2787(by)X +2888(Richard)X +3163(P.)X +3248(Brent,)X +3466(rearranges)X +3822(the)X +3940(table)X +4116(at)X +2418 4079(the)N +2549(time)X +2724(of)X +2824(insertion)X +3137(in)X +3232(order)X +3434(to)X +3528(speed)X +3743(up)X +3855(retrievals.)X +2418 4167(The)N +2571(basic)X +2764(idea)X +2926(is)X +3007(to)X +3097(shorten)X +3361(long)X +3531(probe)X +3741(sequences)X +4094(by)X +2418 4255(lengthening)N +2833(short)X +3030(probe)X +3249(sequences.)X +3651(Once)X +3857(the)X +3991(probe)X +2418 4343(chain)N +2613(has)X +2741(exceeded)X +3062(some)X +3252(threshold)X +3571(\(Brent)X +3796(suggests)X +4087(2\),)X +2418 4431(we)N +2541(attempt)X +2809(to)X +2899(shuf\257e)X +3145(any)X +3289(colliding)X +3601(keys)X +3776(\(keys)X +3978(which)X +2418 4519(appeared)N +2734(in)X +2821(the)X +2944(probe)X +3152(sequence)X +3471(of)X +3562(the)X +3684(new)X +3842(key\).)X +4049(The)X +2418 4607(details)N +2652(of)X +2744(this)X +2884(key)X +3025(shuf\257ing)X +3333(can)X +3469(be)X +3569(found)X +3780(in)X +3866([KNU68])X +2418 4695(and)N +2576([BRE73].)X +2946(This)X +3129(algorithm)X +3481(may)X +3660(be)X +3777(obtained)X +4094(by)X +2418 4783(de\256ning)N +2700(the)X +2818(variable)X +3097(``BRENT'')X +3487(at)X +3565(compile)X +3843(time.)X +2590 4897(A)N +2698(third)X +2899(set)X +3038(of)X +3154(options,)X +3458(obtained)X +3783(by)X +3912(de\256ning)X +2418 4985(``CHAINED'',)N +2943(use)X +3086(linked)X +3321(lists)X +3484(to)X +3581(resolve)X +3848(collisions.)X +2418 5073(Either)N +2647(of)X +2747(the)X +2878(primary)X +3164(hash)X +3343(function)X +3642(described)X +3982(above)X +2418 5161(may)N +2584(be)X +2688(used,)X +2882(but)X +3011(all)X +3118(collisions)X +3451(are)X +3577(resolved)X +3876(by)X +3983(build-)X +2418 5249(ing)N +2554(a)X +2623(linked)X +2856(list)X +2986(of)X +3086(entries)X +3333(from)X +3522(the)X +3653(primary)X +3940(bucket.)X +2418 5337(By)N +2542(default,)X +2816(new)X +2981(entries)X +3226(will)X +3381(be)X +3488(added)X +3711(to)X +3804(a)X +3871(bucket)X +4116(at)X +2418 5425(the)N +2541(beginning)X +2886(of)X +2978(the)X +3101(bucket)X +3339(chain.)X +3577(However,)X +3916(compile)X +2418 5513(options)N +2706(``SORTUP'')X +3173(or)X +3293(``SORTDOWN'')X +3908(may)X +4098(be)X +2418 5601(speci\256ed)N +2723(to)X +2805(order)X +2995(the)X +3113(hash)X +3280(chains)X +3505(within)X +3729(each)X +3897(bucket.)X +3 f +432 5960(4)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +5 p +%%Page: 5 5 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +2 f +1444 538(dynahash)N +1 f +892 670(The)N +2 f +1054(dynahash)X +1 f +1398(library,)X +1669(written)X +1932(by)X +2048(Esmond)X +2346(Pitt,)X +720 758(implements)N +1183(Larson's)X +1554(linear)X +1827(hashing)X +2165(algorithm)X +720 846([LAR88])N +1097(with)X +1302(an)X +2 f +1440(hsearch)X +1 f +1756(compatible)X +2174(interface.)X +720 934(Intuitively,)N +1099(a)X +1161(hash)X +1334(table)X +1516(begins)X +1751(as)X +1844(a)X +1905(single)X +2121(bucket)X +2360(and)X +720 1022(grows)N +941(in)X +1028(generations,)X +1443(where)X +1665(a)X +1725(generation)X +2088(corresponds)X +720 1110(to)N +815(a)X +884(doubling)X +1201(in)X +1296(the)X +1427(size)X +1585(of)X +1685(the)X +1815(hash)X +1994(table.)X +2222(The)X +2379(0)X +2 f +7 s +1078(th)Y +10 s +1 f +720 1198(generation)N +1085(occurs)X +1321(as)X +1414(the)X +1538(table)X +1719(grows)X +1940(from)X +2121(one)X +2262(bucket)X +720 1286(to)N +814(two.)X +1006(In)X +1105(the)X +1235(next)X +1405(generation)X +1776(the)X +1906(table)X +2093(grows)X +2320(from)X +720 1374(two)N +862(to)X +946(four.)X +1122(During)X +1371(each)X +1541(generation,)X +1921(every)X +2121(bucket)X +2356(that)X +720 1462(existed)N +967(at)X +1045(the)X +1163(beginning)X +1503(of)X +1590(the)X +1708(generation)X +2067(is)X +2140(split.)X +892 1576(The)N +1041(table)X +1221(starts)X +1414(as)X +1505(a)X +1565(single)X +1780(bucket)X +2018(\(numbered)X +2389(0\),)X +720 1664(the)N +839(current)X +1088(split)X +1245(bucket)X +1479(is)X +1552(set)X +1661(to)X +1743(bucket)X +1977(0,)X +2057(and)X +2193(the)X +2311(max-)X +720 1752(imum)N +933(split)X +1097(point)X +1288(is)X +1368(set)X +1483(to)X +1571(twice)X +1771(the)X +1895(current)X +2149(split)X +2312(point)X +720 1840(\(0\).)N +863(When)X +1084(it)X +1157(is)X +1239(time)X +1410(for)X +1532(a)X +1596(bucket)X +1838(to)X +1928(split,)X +2113(the)X +2239(keys)X +2414(in)X +720 1928(the)N +872(current)X +1154(split)X +1345(bucket)X +1612(are)X +1764(divided)X +2057(between)X +2378(the)X +720 2016(current)N +981(split)X +1151(bucket)X +1397(and)X +1545(a)X +1613(new)X +1779(bucket)X +2025(whose)X +2262(bucket)X +720 2104(number)N +1000(is)X +1088(equal)X +1297(to)X +1394(1)X +1469(+)X +1549(current)X +1812(split)X +1984(bucket)X +2232(+)X +2311(max-)X +720 2192(imum)N +927(split)X +1085(point.)X +1310(We)X +1442(can)X +1574(determine)X +1915(which)X +2131(keys)X +2298(move)X +720 2280(to)N +807(the)X +929(new)X +1087(bucket)X +1325(by)X +1429(examining)X +1791(the)X +2 f +1913(n)X +7 s +1962 2248(th)N +10 s +1 f +2043 2280(bit)N +2151(of)X +2242(a)X +2302(key's)X +720 2368(hash)N +899(value)X +1105(where)X +1334(n)X +1406(is)X +1491(the)X +1620(generation)X +1990(number.)X +2306(After)X +720 2456(the)N +846(bucket)X +1088(at)X +1174(the)X +1300(maximum)X +1651(split)X +1815(point)X +2006(has)X +2140(been)X +2319(split,)X +720 2544(the)N +839(generation)X +1198(number)X +1463(is)X +1536(incremented,)X +1973(the)X +2091(current)X +2339(split)X +720 2632(point)N +908(is)X +985(set)X +1098(back)X +1274(to)X +1360(zero,)X +1543(and)X +1683(the)X +1805(maximum)X +2152(split)X +2312(point)X +720 2720(is)N +815(set)X +946(to)X +1050(the)X +1190(number)X +1477(of)X +1586(the)X +1725(last)X +1877(bucket)X +2132(in)X +2235(the)X +2374(\256le)X +720 2808(\(which)N +971(is)X +1052(equal)X +1253(to)X +1342(twice)X +1543(the)X +1668(old)X +1797(maximum)X +2148(split)X +2312(point)X +720 2896(plus)N +873(1\).)X +892 3010(To)N +1031(facilitate)X +1361(locating)X +1668(keys,)X +1884(we)X +2027(maintain)X +2356(two)X +720 3098(masks.)N +989(The)X +1143(low)X +1291(mask)X +1488(is)X +1569(equal)X +1771(to)X +1861(the)X +1987(maximum)X +2339(split)X +720 3186(bucket)N +967(and)X +1116(the)X +1247(high)X +1422(mask)X +1624(is)X +1710(equal)X +1917(to)X +2011(the)X +2141(next)X +2311(max-)X +720 3274(imum)N +931(split)X +1093(bucket.)X +1372(To)X +1486(locate)X +1703(a)X +1764(speci\256c)X +2033(key,)X +2193(we)X +2311(com-)X +720 3362(pute)N +881(a)X +940(32-bit)X +1154(hash)X +1324(value)X +1520(using)X +1715(a)X +1773(bit-randomizing)X +2311(algo-)X +720 3450(rithm)N +932(such)X +1118(as)X +1224(the)X +1361(one)X +1516(described)X +1862(in)X +1962([LAR88].)X +2334(This)X +720 3538(hash)N +893(value)X +1093(is)X +1172(then)X +1336(masked)X +1607(with)X +1775(the)X +1898(high)X +2065(mask.)X +2299(If)X +2378(the)X +720 3626(resulting)N +1026(number)X +1297(is)X +1376(greater)X +1626(than)X +1790(the)X +1913(maximum)X +2262(bucket)X +720 3714(in)N +823(the)X +962(table)X +1159(\(current)X +1455(split)X +1633(bucket)X +1888(+)X +1974(maximum)X +2339(split)X +720 3802(point\),)N +962(the)X +1091(hash)X +1269(value)X +1474(is)X +1558(masked)X +1834(with)X +2007(the)X +2136(low)X +2287(mask.)X +720 3890(In)N +825(either)X +1046(case,)X +1242(the)X +1377(result)X +1592(of)X +1696(the)X +1831(mask)X +2037(is)X +2127(the)X +2262(bucket)X +720 3978(number)N +989(for)X +1107(the)X +1229(given)X +1431(key.)X +1611(The)X +1759(algorithm)X +2093(below)X +2312(illus-)X +720 4066(trates)N +914(this)X +1049(process.)X +0 f +8 s +720 4365(h)N +796(=)X +872 -0.4038(calchash\(key\);)AX +720 4453(bucket)N +986(=)X +1062(h)X +1138(&)X +1214 -0.4167(high_mask;)AX +720 4541(if)N +834(\()X +910(bucket)X +1176(>)X +1252 -0.4167(max_bucket)AX +1670(\))X +1008 4629(bucket)N +1274(=)X +1350(h)X +1426(&)X +1502 -0.4219(low_mask;)AX +720 4717 -0.4018(return\(bucket\);)AN +1 f +10 s +892 5042(In)N +1013(order)X +1237(to)X +1353(decide)X +1617(when)X +1845(to)X +1961(split)X +2152(a)X +2242(bucket,)X +2 f +720 5130(dynahash)N +1 f +1050(uses)X +2 f +1210(controlled)X +1561(splitting)X +1 f +1822(.)X +1884(A)X +1964(hash)X +2133(table)X +2311(has)X +2440(a)X +720 5218(\256ll)N +837(factor)X +1054(which)X +1279(is)X +1361(expressed)X +1707(in)X +1798(terms)X +2004(of)X +2099(the)X +2225(average)X +720 5306(number)N +990(of)X +1082(keys)X +1253(in)X +1339(each)X +1511(bucket.)X +1789(Each)X +1974(time)X +2140(the)X +2262(table's)X +720 5394(total)N +885(number)X +1153(of)X +1243(keys)X +1413(divided)X +1676(by)X +1778(its)X +1875(number)X +2142(of)X +2231(buckets)X +720 5482(exceeds)N +995(this)X +1130(\256ll)X +1238(factor,)X +1466(a)X +1522(bucket)X +1756(is)X +1829(split.)X +2878 538(Since)N +3079(the)X +2 f +3200(hsearch)X +1 f +3477(create)X +3693(interface)X +3998(\()X +2 f +4025(hcreate)X +1 f +4266(\))X +4315(calls)X +2706 626(for)N +2842(an)X +2960(estimate)X +3269(of)X +3378(the)X +3518(\256nal)X +3702(size)X +3869(of)X +3978(the)X +4118(hash)X +4306(table)X +2706 714(\()N +2 f +2733(nelem)X +1 f +2925(\),)X +2 f +3007(dynahash)X +1 f +3349(uses)X +3522(this)X +3672(information)X +4085(to)X +4182(initialize)X +2706 802(the)N +2848(table.)X +3088(The)X +3257(initial)X +3486(number)X +3774(of)X +3884(buckets)X +4172(is)X +4268(set)X +4400(to)X +2 f +2706 890(nelem)N +1 f +2926(rounded)X +3217(to)X +3306(the)X +3431(next)X +3596(higher)X +3828(power)X +4056(of)X +4150(two.)X +4337(The)X +2706 978(current)N +2958(split)X +3118(point)X +3305(is)X +3381(set)X +3493(to)X +3578(0)X +3641(and)X +3780(the)X +3901(maximum)X +4248(bucket)X +2706 1066(and)N +2842(maximum)X +3186(split)X +3343(point)X +3527(are)X +3646(set)X +3755(to)X +3837(this)X +3972(rounded)X +4255(value.)X +3 f +3148 1220(The)N +3301(New)X +3473(Implementation)X +1 f +2878 1352(Our)N +3042(implementation)X +3583(is)X +3675(also)X +3842(based)X +4063(on)X +4181(Larson's)X +2706 1440(linear)N +2939(hashing)X +3238([LAR88])X +3582(algorithm)X +3943(as)X +4060(well)X +4248(as)X +4364(the)X +2 f +2706 1528(dynahash)N +1 f +3047(implementation.)X +3623(The)X +2 f +3782(dbm)X +1 f +3954(family)X +4197(of)X +4297(algo-)X +2706 1616(rithms)N +2942(decide)X +3184(dynamically)X +3612(which)X +3840(bucket)X +4085(to)X +4178(split)X +4346(and)X +2706 1704(when)N +2914(to)X +3010(split)X +3180(it)X +3257(\(when)X +3491(it)X +3568(over\257ows\))X +3944(while)X +2 f +4155(dynahash)X +1 f +2706 1792(splits)N +2933(in)X +3054(a)X +3149(prede\256ned)X +3547(order)X +3776(\(linearly\))X +4134(and)X +4309(at)X +4426(a)X +2706 1880(prede\256ned)N +3116(time)X +3328(\(when)X +3599(the)X +3767(table)X +3993(\256ll)X +4151(factor)X +4409(is)X +2706 1968(exceeded\).)N +3121(We)X +3280(use)X +3434(a)X +3517(hybrid)X +3773(of)X +3887(these)X +4099(techniques.)X +2706 2056(Splits)N +2913(occur)X +3118(in)X +3206(the)X +3330(prede\256ned)X +3695(order)X +3891(of)X +3984(linear)X +4193(hashing,)X +2706 2144(but)N +2845(the)X +2980(time)X +3159(at)X +3253(which)X +3485(pages)X +3704(are)X +3839(split)X +4012(is)X +4101(determined)X +2706 2232(both)N +2869(by)X +2970(page)X +3143(over\257ows)X +3480(\()X +2 f +3507(uncontrolled)X +3937(splitting)X +1 f +4198(\))X +4246(and)X +4382(by)X +2706 2320(exceeding)N +3052(the)X +3170(\256ll)X +3278(factor)X +3486(\()X +2 f +3513(controlled)X +3862(splitting)X +1 f +4123(\))X +2878 2434(A)N +2962(hash)X +3135(table)X +3317(is)X +3395(parameterized)X +3876(by)X +3981(both)X +4148(its)X +4248(bucket)X +2706 2522(size)N +2904(\()X +2 f +2931(bsize)X +1 f +(\))S +3191(and)X +3380(\256ll)X +3541(factor)X +3801(\()X +2 f +3828(ffactor)X +1 f +4041(\).)X +4180(Whereas)X +2 f +2706 2610(dynahash's)N +1 f +3095(buckets)X +3364(can)X +3500(be)X +3599(represented)X +3993(as)X +4083(a)X +4142(linked)X +4365(list)X +2706 2698(of)N +2798(elements)X +3108(in)X +3195(memory,)X +3507(our)X +3639(package)X +3928(needs)X +4136(to)X +4222(support)X +2706 2786(disk)N +2874(access,)X +3135(and)X +3286(must)X +3476(represent)X +3806(buckets)X +4086(in)X +4183(terms)X +4395(of)X +2706 2874(pages.)N +2955(The)X +2 f +3106(bsize)X +1 f +3291(is)X +3369(the)X +3492(size)X +3642(\(in)X +3756(bytes\))X +3977(of)X +4069(these)X +4259(pages.)X +2706 2962(As)N +2833(in)X +2933(linear)X +3154(hashing,)X +3461(the)X +3597(number)X +3879(of)X +3983(buckets)X +4265(in)X +4364(the)X +2706 3050(table)N +2906(is)X +3003(equal)X +3221(to)X +3327(the)X +3469(number)X +3758(of)X +3869(keys)X +4060(in)X +4165(the)X +4306(table)X +2706 3138(divided)N +2988(by)X +2 f +3110(ffactor)X +1 f +3323(.)X +2 f +8 s +3113(6)Y +1 f +10 s +3417 3138(The)N +3584(controlled)X +3950(splitting)X +4252(occurs)X +2706 3226(each)N +2878(time)X +3044(the)X +3166(number)X +3435(of)X +3526(keys)X +3697(in)X +3783(the)X +3905(table)X +4085(exceeds)X +4364(the)X +2706 3314(\256ll)N +2814(factor)X +3022(multiplied)X +3370(by)X +3470(the)X +3588(number)X +3853(of)X +3940(buckets.)X +2878 3428(Inserting)N +3187(keys)X +3358(and)X +3498(splitting)X +3783(buckets)X +4051(is)X +4127(performed)X +2706 3516(precisely)N +3018(as)X +3107(described)X +3437(previously)X +3796(for)X +2 f +3911(dynahash)X +1 f +4218(.)X +4279(How-)X +2706 3604(ever,)N +2897(since)X +3094(buckets)X +3371(are)X +3502(now)X +3671(comprised)X +4036(of)X +4134(pages,)X +4368(we)X +2706 3692(must)N +2883(be)X +2981(prepared)X +3284(to)X +3367(handle)X +3602(cases)X +3793(where)X +4011(the)X +4130(size)X +4276(of)X +4364(the)X +2706 3780(keys)N +2873(and)X +3009(data)X +3163(in)X +3245(a)X +3301(bucket)X +3535(exceed)X +3779(the)X +3897(bucket)X +4131(size.)X +3 f +3318 3934(Over\257ow)N +3654(Pages)X +1 f +2878 4066(There)N +3095(are)X +3223(two)X +3372(cases)X +3571(where)X +3797(a)X +3862(key)X +4007(may)X +4174(not)X +4305(\256t)X +4400(in)X +2706 4154(its)N +2802(designated)X +3166(bucket.)X +3441(In)X +3529(the)X +3647(\256rst)X +3791(case,)X +3970(the)X +4088(total)X +4250(size)X +4395(of)X +2706 4242(the)N +2833(key)X +2978(and)X +3123(data)X +3286(may)X +3453(exceed)X +3706(the)X +3833(bucket)X +4076(size.)X +4269(In)X +4364(the)X +2706 4330(second,)N +3008(addition)X +3328(of)X +3453(a)X +3547(new)X +3739(key)X +3913(could)X +4149(cause)X +4386(an)X +2706 4418(over\257ow,)N +3068(but)X +3227(the)X +3382(bucket)X +3652(in)X +3770(question)X +4097(is)X +4206(not)X +4364(yet)X +2706 4506(scheduled)N +3049(to)X +3133(be)X +3230(split.)X +3428(In)X +3516(existing)X +3790(implementations,)X +4364(the)X +2706 4594(second)N +2953(case)X +3115(never)X +3317(arises)X +3523(\(since)X +3738(buckets)X +4006(are)X +4128(split)X +4288(when)X +2706 4682(they)N +2871(over\257ow\))X +3210(and)X +3352(the)X +3476(\256rst)X +3626(case)X +3791(is)X +3870(not)X +3998(handled)X +4278(at)X +4362(all.)X +2706 4770(Although)N +3036(large)X +3225(key/data)X +3525(pair)X +3678(handling)X +3986(is)X +4066(dif\256cult)X +4346(and)X +2706 4858(expensive,)N +3083(it)X +3163(is)X +3252(essential.)X +3604(In)X +3706(a)X +3777(linear)X +3995(hashed)X +4253(imple-)X +2706 4946(mentation,)N +3087(over\257ow)X +3413(pages)X +3636(are)X +3775(required)X +4083(for)X +4217(buckets)X +2706 5034(which)N +2935(over\257ow)X +3253(before)X +3492(they)X +3662(are)X +3793(split,)X +3982(so)X +4085(we)X +4211(can)X +4355(use)X +2706 5122(the)N +2833(same)X +3027(mechanism)X +3421(for)X +3544(large)X +3734(key/data)X +4035(pairs)X +4220(that)X +4368(we)X +2706 5210(use)N +2837(for)X +2955(over\257ow)X +3264(pages.)X +3511(Logically,)X +3862(we)X +3980(chain)X +4177(over\257ow)X +16 s +2706 5353 MXY +864 0 Dl +2 f +8 s +2746 5408(6)N +1 f +9 s +2801 5433(This)N +2952(is)X +3023(not)X +3138(strictly)X +3361(true.)X +3532(The)X +3667(\256le)X +3782(does)X +3937(not)X +4052(contract)X +4306(when)X +2706 5513(keys)N +2861(are)X +2972(deleted,)X +3221(so)X +3308(the)X +3419(number)X +3662(of)X +3744(buckets)X +3986(is)X +4056(actually)X +4306(equal)X +2706 5593(to)N +2782(the)X +2890(maximum)X +3202(number)X +3441(of)X +3520(keys)X +3671(ever)X +3814(present)X +4041(in)X +4116(the)X +4223(table)X +4382(di-)X +2706 5673(vided)N +2884(by)X +2974(the)X +3080(\256ll)X +3178(factor.)X +3 f +10 s +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(5)X + +6 p +%%Page: 6 6 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538(pages)N +639(to)X +725(the)X +847(buckets)X +1116(\(also)X +1296(called)X +1512(primary)X +1789(pages\).)X +2062(In)X +2152(a)X +432 626(memory)N +730(based)X +943(representation,)X +1448(over\257ow)X +1763(pages)X +1976(do)X +2086(not)X +432 714(pose)N +628(any)X +792(special)X +1063(problems)X +1409(because)X +1712(we)X +1854(can)X +2014(chain)X +432 802(over\257ow)N +776(pages)X +1017(to)X +1137(primary)X +1449(pages)X +1690(using)X +1921(memory)X +432 890(pointers.)N +776(However,)X +1137(mapping)X +1463(these)X +1674(over\257ow)X +2005(pages)X +432 978(into)N +584(a)X +648(disk)X +809(\256le)X +939(is)X +1019(more)X +1211(of)X +1305(a)X +1368(challenge,)X +1723(since)X +1915(we)X +2036(need)X +432 1066(to)N +547(be)X +675(able)X +861(to)X +975(address)X +1268(both)X +1462(bucket)X +1728(pages,)X +1983(whose)X +432 1154(numbers)N +729(are)X +849(growing)X +1137(linearly,)X +1422(and)X +1558(some)X +1747(indeterminate)X +432 1242(number)N +715(of)X +820(over\257ow)X +1143(pages)X +1364(without)X +1646(reorganizing)X +2090(the)X +432 1330(\256le.)N +604 1444(One)N +789(simple)X +1053(solution)X +1361(would)X +1612(be)X +1739(to)X +1852(allocate)X +2152(a)X +432 1532(separate)N +737(\256le)X +880(for)X +1015(over\257ow)X +1341(pages.)X +1604(The)X +1769(disadvantage)X +432 1620(with)N +605(such)X +783(a)X +850(technique)X +1193(is)X +1276(that)X +1426(it)X +1500(requires)X +1789(an)X +1895(extra)X +2086(\256le)X +432 1708(descriptor,)N +794(an)X +891(extra)X +1073(system)X +1316(call)X +1453(on)X +1554(open)X +1731(and)X +1867(close,)X +2072(and)X +432 1796(logically)N +739(associating)X +1122(two)X +1269(independent)X +1687(\256les.)X +1886(For)X +2023(these)X +432 1884(reasons,)N +728(we)X +857(wanted)X +1123(to)X +1219(map)X +1391(both)X +1567(primary)X +1855(pages)X +2072(and)X +432 1972(over\257ow)N +737(pages)X +940(into)X +1084(the)X +1202(same)X +1387(\256le)X +1509(space.)X +604 2086(The)N +799(buddy-in-waiting)X +1425(algorithm)X +1806(provides)X +2152(a)X +432 2174(mechanism)N +851(to)X +966(support)X +1259(multiple)X +1578(pages)X +1814(per)X +1970(logical)X +432 2262(bucket)N +685(while)X +902(retaining)X +1226(the)X +1362(simple)X +1613(split)X +1788(sequence)X +2121(of)X +432 2350(linear)N +681(hashing.)X +1015(Over\257ow)X +1383(pages)X +1631(are)X +1795(preallocated)X +432 2438(between)N +781(generations)X +1232(of)X +1379(primary)X +1713(pages.)X +1996(These)X +432 2526(over\257ow)N +759(pages)X +984(are)X +1125(used)X +1314(by)X +1436(any)X +1594(bucket)X +1850(containing)X +432 2614(more)N +646(keys)X +842(than)X +1029(\256t)X +1144(on)X +1273(the)X +1420(primary)X +1723(page)X +1924(and)X +2089(are)X +432 2702(reclaimed,)N +808(if)X +896(possible,)X +1217(when)X +1430(the)X +1567(bucket)X +1819(later)X +2000(splits.)X +432 2790(Figure)N +687(3)X +773(depicts)X +1045(the)X +1188(layout)X +1433(of)X +1545(primary)X +1844(pages)X +2072(and)X +432 2878(over\257ow)N +752(pages)X +970(within)X +1209(the)X +1342(same)X +1542(\256le.)X +1699(Over\257ow)X +2036(page)X +432 2966(use)N +586(information)X +1011(is)X +1111(recorded)X +1440(in)X +1548(bitmaps)X +1847(which)X +2089(are)X +432 3054(themselves)N +819(stored)X +1046(on)X +1157(over\257ow)X +1472(pages.)X +1725(The)X +1880(addresses)X +432 3142(of)N +520(the)X +639(bitmap)X +882(pages)X +1086(and)X +1223(the)X +1342(number)X +1608(of)X +1695(pages)X +1898(allocated)X +432 3230(at)N +515(each)X +688(split)X +850(point)X +1039(are)X +1163(stored)X +1384(in)X +1470(the)X +1592(\256le)X +1718(header.)X +1997(Using)X +432 3318(this)N +577(information,)X +1005(both)X +1177(over\257ow)X +1492(addresses)X +1829(and)X +1974(bucket)X +432 3406(addresses)N +764(can)X +900(be)X +999(mapped)X +1276(to)X +1361(disk)X +1517(addresses)X +1848(by)X +1951(the)X +2072(fol-)X +432 3494(lowing)N +674(calculation:)X +0 f +8 s +432 3793(int)N +736(bucket;)X +1192(/*)X +1306(bucket)X +1572(address)X +1876(*/)X +432 3881(u_short)N +736(oaddr;)X +1192(/*)X +1306(OVERFLOW)X +1648(address)X +1952(*/)X +432 3969(int)N +736 -0.4125(nhdr_pages;)AX +1192(/*)X +1306(npages)X +1572(in)X +1686 -112.4062(\256le)AX +1838(header)X +2104(*/)X +432 4057(int)N +736 -0.4125(spares[32];)AX +1192(/*)X +1306(npages)X +1572(at)X +1686(each)X +1876(split)X +2104(*/)X +432 4145(int)N +736(log2\(\);)X +1198(/*)X +1312(ceil\(log)X +1654(base)X +1844(2\))X +1958(*/)X +432 4321(#DEFINE)N +736 -0.3929(BUCKET_TO_PAGE\(bucket\))AX +1610(\\)X +584 4409(bucket)N +850(+)X +926 -0.4167(nhdr_pages)AX +1344(+)X +1420(\\)X +584 4497 -0.3894(\(bucket?spares[logs2\(bucket)AN +1648(+)X +1724(1\)-1]:0\))X +432 4673(#DEFINE)N +736 -0.3947(OADDR_TO_PAGE\(oaddr\))AX +1534(\\)X +584 4761 -0.3984(BUCKET_TO_PAGE\(\(1)AN +1268(<<)X +1382 -0.4091(\(oaddr>>11\)\))AX +1876(-)X +1952(1\))X +2066(+)X +2142(\\)X +584 4849(oaddr)N +812(&)X +888(0x7ff;)X +1 f +10 s +604 5262(An)N +728(over\257ow)X +1039(page)X +1217(is)X +1295(addressed)X +1637(by)X +1742(its)X +1842(split)X +2004(point,)X +432 5350(identifying)N +858(the)X +1031(generations)X +1476(between)X +1819(which)X +2090(the)X +432 5438(over\257ow)N +740(page)X +915(is)X +991(allocated,)X +1324(and)X +1463(its)X +1561(page)X +1736(number,)X +2023(iden-)X +432 5526(tifying)N +665(the)X +783(particular)X +1111(page)X +1283(within)X +1507(the)X +1625(split)X +1782(point.)X +1986(In)X +2073(this)X +432 5614(implementation,)N +983(offsets)X +1225(within)X +1457(pages)X +1668(are)X +1795(16)X +1903(bits)X +2046(long)X +432 5702(\(limiting)N +732(the)X +851(maximum)X +1196(page)X +1368(size)X +1513(to)X +1595(32K\),)X +1800(so)X +1891(we)X +2005(select)X +2418 538(an)N +2535(over\257ow)X +2860(page)X +3052(addressing)X +3435(algorithm)X +3786(that)X +3946(can)X +4098(be)X +2418 626(expressed)N +2760(in)X +2847(16)X +2952(bits)X +3091(and)X +3231(which)X +3451(allows)X +3684(quick)X +3886(retrieval.)X +2418 714(The)N +2568(top)X +2695(\256ve)X +2840(bits)X +2980(indicate)X +3258(the)X +3380(split)X +3541(point)X +3729(and)X +3869(the)X +3991(lower)X +2418 802(eleven)N +2650(indicate)X +2926(the)X +3046(page)X +3220(number)X +3487(within)X +3713(the)X +3832(split)X +3990(point.)X +2418 890(Since)N +2633(\256ve)X +2789(bits)X +2940(are)X +3075(reserved)X +3384(for)X +3514(the)X +3648(split)X +3821(point,)X +4041(\256les)X +2418 978(may)N +2578(split)X +2737(32)X +2839(times)X +3034(yielding)X +3318(a)X +3376(maximum)X +3721(\256le)X +3844(size)X +3990(of)X +4078(2)X +7 s +946(32)Y +10 s +2418 1066(buckets)N +2698(and)X +2849(32)X +2 f +(*)S +1 f +2982(2)X +7 s +1034(11)Y +10 s +3113 1066(over\257ow)N +3433(pages.)X +3691(The)X +3850(maximum)X +2418 1154(page)N +2597(size)X +2749(is)X +2829(2)X +7 s +1122(15)Y +10 s +1154(,)Y +2971(yielding)X +3259(a)X +3321(maximum)X +3671(\256le)X +3799(size)X +3950(greater)X +2418 1242(than)N +2601(131,000)X +2906(GB)X +3061(\(on)X +3212(\256le)X +3358(systems)X +3655(supporting)X +4041(\256les)X +2418 1330(larger)N +2626(than)X +2784(4GB\).)X +10 f +2418 1418 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 Dt +4014 2275 MXY +0 133 Dl +3881 2275 MXY +0 133 Dl +3748 2275 MXY +0 133 Dl +3083 2275 MXY +0 133 Dl +5 s +1 f +3523 2475(2/3)N +3390(2/2)X +3257(2/1)X +2859(1/2)X +2726(1/1)X +5 Dt +3814 1743 MXY +0 133 Dl +3282 1743 MXY +0 133 Dl +3017 1743 MXY +0 133 Dl +2884 1743 MXY +0 133 Dl +1 Dt +3681 1743 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3548 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3415 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3282 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3150 MX +0 133 Dl +132 0 Dl +0 -133 Dl +-132 0 Dl +3017 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +2884 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3 f +8 s +3017 2601(Over\257ow)N +3285(Addresses)X +3515 2833(Over\257ow)N +3783(Pages)X +2850(Buckets)X +1 Di +3349 2740 MXY + 3349 2740 lineto + 3482 2740 lineto + 3482 2873 lineto + 3349 2873 lineto + 3349 2740 lineto +closepath 3 3349 2740 3482 2873 Dp +2684 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +5 Dt +4146 2275 MXY +0 133 Dl +3216 2275 MXY +0 133 Dl +2684 2275 MXY +0 133 Dl +2551 2275 MXY +0 133 Dl +1 f +3798 1963(3)N +3266 1980(2)N +3001(1)X +2868(0)X +1 Dt +2751 1743 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3548 2275 MXY +-15 -22 Dl +2 16 Dl +-13 11 Dl +26 -5 Dl +-282 -117 Dl +3432 2275 MXY +-10 -25 Dl +-2 16 Dl +-15 8 Dl +27 1 Dl +-166 -117 Dl +3282 2275 MXY +12 -25 Dl +-14 10 Dl +-15 -6 Dl +17 21 Dl +-16 -117 Dl +2884 2275 MXY +26 7 Dl +-12 -12 Dl +3 -16 Dl +-17 21 Dl +382 -117 Dl +2751 2275 MXY +25 9 Dl +-11 -12 Dl +5 -17 Dl +-19 20 Dl +515 -117 Dl +3 f +3070 2152(Over\257ow)N +3338(Pages)X +3482 2275 MXY + 3482 2275 lineto + 3615 2275 lineto + 3615 2408 lineto + 3482 2408 lineto + 3482 2275 lineto +closepath 3 3482 2275 3615 2408 Dp +3349 MX + 3349 2275 lineto + 3482 2275 lineto + 3482 2408 lineto + 3349 2408 lineto + 3349 2275 lineto +closepath 3 3349 2275 3482 2408 Dp +3216 MX + 3216 2275 lineto + 3349 2275 lineto + 3349 2408 lineto + 3216 2408 lineto + 3216 2275 lineto +closepath 3 3216 2275 3349 2408 Dp +2817 MX + 2817 2275 lineto + 2950 2275 lineto + 2950 2408 lineto + 2817 2408 lineto + 2817 2275 lineto +closepath 3 2817 2275 2950 2408 Dp +2684 MX + 2684 2275 lineto + 2817 2275 lineto + 2817 2408 lineto + 2684 2408 lineto + 2684 2275 lineto +closepath 3 2684 2275 2817 2408 Dp +3615 MX +0 133 Dl +531 0 Dl +0 -133 Dl +-531 0 Dl +2950 MX +0 133 Dl +266 0 Dl +0 -133 Dl +-266 0 Dl +2551 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3798 1726 MXY +-21 -18 Dl +6 16 Dl +-10 13 Dl +25 -11 Dl +-599 -99 Dl +3266 1726 MXY +-1 -27 Dl +-7 15 Dl +-17 1 Dl +25 11 Dl +-67 -99 Dl +3033 1726 MXY +27 1 Dl +-14 -8 Dl +-1 -17 Dl +-12 24 Dl +166 -99 Dl +2900 1726 MXY +27 7 Dl +-13 -11 Dl +3 -17 Dl +-17 21 Dl +299 -99 Dl +3058 1621(Split)N +3203(Points)X +2418 2275 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3 Dt +-1 Ds +3137(Figure)Y +2619(3:)X +1 f +2691(Split)X +2832(points)X +3008(occur)X +3168(between)X +3399(generations)X +3712(and)X +3823(are)X +3919(numbered)X +2418 3225(from)N +2560(0.)X +2642(In)X +2713(this)X +2824(\256gure)X +2991(there)X +3136(are)X +3231(two)X +3345(over\257ow)X +3590(pages)X +3753(allocated)X +4000(at)X +4063(split)X +2418 3313(point)N +2566(1)X +2614(and)X +2722(three)X +2865(allocated)X +3111(at)X +3173(split)X +3300(point)X +3448(2.)X +10 s +10 f +2418 3489 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +2949 3731(Buffer)N +3192(Management)X +1 f +2590 3863(The)N +2744(hash)X +2920(table)X +3105(is)X +3187(stored)X +3412(in)X +3502(memory)X +3797(as)X +3892(a)X +3956(logical)X +2418 3951(array)N +2633(of)X +2749(bucket)X +3012(pointers.)X +3359(Physically,)X +3761(the)X +3907(array)X +4121(is)X +2418 4039(arranged)N +2728(in)X +2818(segments)X +3144(of)X +3239(256)X +3387(pointers.)X +3713(Initially,)X +4013(there)X +2418 4127(is)N +2530(space)X +2767(to)X +2887(allocate)X +3195(256)X +3373(segments.)X +3769(Reallocation)X +2418 4215(occurs)N +2651(when)X +2847(the)X +2967(number)X +3234(of)X +3323(buckets)X +3590(exceeds)X +3867(32K)X +4027(\(256)X +2418 4303(*)N +2508(256\).)X +2745(Primary)X +3053(pages)X +3286(may)X +3473(be)X +3598(accessed)X +3929(directly)X +2418 4391(through)N +2711(the)X +2853(array)X +3062(by)X +3185(bucket)X +3442(number)X +3730(and)X +3889(over\257ow)X +2418 4479(pages)N +2628(are)X +2754 0.4028(referenced)AX +3122(logically)X +3429(by)X +3536(their)X +3710(over\257ow)X +4022(page)X +2418 4567(address.)N +2726(For)X +2864(small)X +3063(hash)X +3236(tables,)X +3469(it)X +3539(is)X +3618(desirable)X +3934(to)X +4022(keep)X +2418 4655(all)N +2525(pages)X +2735(in)X +2823(main)X +3009(memory)X +3302(while)X +3506(on)X +3612(larger)X +3826(tables,)X +4059(this)X +2418 4743(is)N +2523(probably)X +2860(impossible.)X +3298(To)X +3438(satisfy)X +3698(both)X +3891(of)X +4009(these)X +2418 4831(requirements,)N +2900(the)X +3041(package)X +3348(includes)X +3658(buffer)X +3897(manage-)X +2418 4919(ment)N +2598(with)X +2760(LRU)X +2940(\(least)X +3134(recently)X +3413(used\))X +3607(replacement.)X +2590 5033(By)N +2730(default,)X +3020(the)X +3165(package)X +3475(allocates)X +3802(up)X +3928(to)X +4036(64K)X +2418 5121(bytes)N +2616(of)X +2712(buffered)X +3014(pages.)X +3246(All)X +3377(pages)X +3589(in)X +3680(the)X +3807(buffer)X +4032(pool)X +2418 5209(are)N +2542(linked)X +2766(in)X +2852(LRU)X +3036(order)X +3230(to)X +3316(facilitate)X +3621(fast)X +3761(replacement.)X +2418 5297(Whereas)N +2724(ef\256cient)X +3011(access)X +3241(to)X +3327(primary)X +3605(pages)X +3812(is)X +3889(provided)X +2418 5385(by)N +2521(the)X +2642(bucket)X +2879(array,)X +3087(ef\256cient)X +3372(access)X +3600(to)X +3684(over\257ow)X +3991(pages)X +2418 5473(is)N +2501(provided)X +2816(by)X +2926(linking)X +3182(over\257ow)X +3497(page)X +3679(buffers)X +3936(to)X +4027(their)X +2418 5561(predecessor)N +2827(page)X +3008(\(either)X +3247(the)X +3374(primary)X +3657(page)X +3838(or)X +3933(another)X +2418 5649(over\257ow)N +2742(page\).)X +3000(This)X +3181(means)X +3425(that)X +3584(an)X +3699(over\257ow)X +4022(page)X +3 f +432 5960(6)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +7 p +%%Page: 7 7 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(cannot)N +955(be)X +1052(present)X +1305(in)X +1388(the)X +1507(buffer)X +1724(pool)X +1886(if)X +1955(its)X +2050(primary)X +2324(page)X +720 626(is)N +804(not)X +937(present.)X +1240(This)X +1413(does)X +1591(not)X +1724(impact)X +1972(performance)X +2409(or)X +720 714(functionality,)N +1209(because)X +1524(an)X +1660(over\257ow)X +2005(page)X +2217(will)X +2400(be)X +720 802(accessed)N +1048(only)X +1236(after)X +1430(its)X +1550(predecessor)X +1975(page)X +2172(has)X +2324(been)X +720 890(accessed.)N +1068(Figure)X +1303(4)X +1369(depicts)X +1622(the)X +1746(data)X +1905(structures)X +2242(used)X +2414(to)X +720 978(manage)N +990(the)X +1108(buffer)X +1325(pool.)X +892 1092(The)N +1040(in-memory)X +1419(bucket)X +1656(array)X +1845(contains)X +2134(pointers)X +2414(to)X +720 1180(buffer)N +975(header)X +1248(structures)X +1617(which)X +1870(represent)X +2222(primary)X +720 1268(pages.)N +968(Buffer)X +1203(headers)X +1474(contain)X +1735(modi\256ed)X +2043(bits,)X +2202(the)X +2324(page)X +720 1356(address)N +995(of)X +1096(the)X +1228(buffer,)X +1479(a)X +1548(pointer)X +1808(to)X +1903(the)X +2034(actual)X +2259(buffer,)X +720 1444(and)N +875(a)X +950(pointer)X +1216(to)X +1317(the)X +1454(buffer)X +1690(header)X +1944(for)X +2077(an)X +2191(over\257ow)X +720 1532(page)N +901(if)X +979(it)X +1052(exists,)X +1283(in)X +1374(addition)X +1665(to)X +1756(the)X +1883(LRU)X +2072(links.)X +2296(If)X +2378(the)X +720 1620(buffer)N +950(corresponding)X +1442(to)X +1537(a)X +1606(particular)X +1947(bucket)X +2194(is)X +2280(not)X +2414(in)X +720 1708(memory,)N +1048(its)X +1164(pointer)X +1432(is)X +1526(NULL.)X +1801(In)X +1909(effect,)X +2154(pages)X +2377(are)X +720 1796(linked)N +950(in)X +1042(three)X +1233(ways.)X +1468(Using)X +1689(the)X +1817(buffer)X +2043(headers,)X +2338(they)X +720 1884(are)N +851(linked)X +1083(physically)X +1444(through)X +1725(the)X +1854(LRU)X +2045(links)X +2231(and)X +2378(the)X +720 1972(over\257ow)N +1036(links.)X +1241(Using)X +1462(the)X +1590(pages)X +1803(themselves,)X +2209(they)X +2377(are)X +720 2060(linked)N +943(logically)X +1246(through)X +1518(the)X +1639(over\257ow)X +1946(addresses)X +2276(on)X +2378(the)X +720 2148(page.)N +948(Since)X +1162(over\257ow)X +1482(pages)X +1700(are)X +1834(accessed)X +2151(only)X +2328(after)X +720 2236(their)N +904(predecessor)X +1321(pages,)X +1560(they)X +1734(are)X +1869(removed)X +2186(from)X +2378(the)X +720 2324(buffer)N +937(pool)X +1099(when)X +1293(their)X +1460(primary)X +1734(is)X +1807(removed.)X +10 f +720 2412 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 Dt +2309 3177 MXY +24 15 Dl +-8 -15 Dl +8 -15 Dl +-24 15 Dl +52 0 Dl +789 3160 MXY +-35 0 Dl +0 -156 Dl +1607 0 Dl +0 173 Dl +789 3091 MXY +-24 -15 Dl +9 15 Dl +-9 15 Dl +24 -15 Dl +-69 0 Dl +2309 3125 MXY +104 0 Dl +0 -155 Dl +-1693 0 Dl +0 121 Dl +927 3160 MXY +24 15 Dl +-9 -15 Dl +9 -15 Dl +-24 15 Dl +553 0 Dl +1618 3177 MXY +8 27 Dl +4 -17 Dl +16 -6 Dl +-28 -4 Dl +138 121 Dl +1895 3315 MXY +28 3 Dl +-15 -9 Dl +1 -18 Dl +-14 24 Dl +276 -138 Dl +3108 MY +-28 -3 Dl +15 10 Dl +-1 17 Dl +14 -24 Dl +-276 138 Dl +1756 3229 MXY +-8 -27 Dl +-3 17 Dl +-16 6 Dl +27 4 Dl +-138 -121 Dl +1480 MX +-24 -15 Dl +9 15 Dl +-9 15 Dl +24 -15 Dl +-553 0 Dl +3 f +5 s +1083 3073(LRU)N +1178(chain)X +4 Ds +1402 3851 MXY + 1402 3851 lineto + 1471 3851 lineto + 1471 3920 lineto + 1402 3920 lineto + 1402 3851 lineto +closepath 19 1402 3851 1471 3920 Dp +1445 3747(Over\257ow)N +1613(Address)X +1549 3609 MXY +0 69 Dl +1756 MX +-23 -15 Dl +8 15 Dl +-8 15 Dl +23 -15 Dl +-207 0 Dl +-1 Ds +3 Dt +1756 3419 MXY +-6 -28 Dl +-4 17 Dl +-17 5 Dl +27 6 Dl +-138 -138 Dl +2240 3471 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1826 3609 MXY +15 -24 Dl +-15 9 Dl +-16 -9 Dl +16 24 Dl +0 -138 Dl +1549 MX +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +858 3471 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +2240 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1549 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +858 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1 Dt +2171 3471 MXY + 2171 3471 lineto + 2448 3471 lineto + 2448 3609 lineto + 2171 3609 lineto + 2171 3471 lineto +closepath 19 2171 3471 2448 3609 Dp +1756 3609 MXY + 1756 3609 lineto + 2033 3609 lineto + 2033 3747 lineto + 1756 3747 lineto + 1756 3609 lineto +closepath 3 1756 3609 2033 3747 Dp +1480 3471 MXY + 1480 3471 lineto + 1756 3471 lineto + 1756 3609 lineto + 1480 3609 lineto + 1480 3471 lineto +closepath 19 1480 3471 1756 3609 Dp +789 MX + 789 3471 lineto + 1065 3471 lineto + 1065 3609 lineto + 789 3609 lineto + 789 3471 lineto +closepath 19 789 3471 1065 3609 Dp +962 3903(Buffer)N +1083(Header)X +849 3851 MXY + 849 3851 lineto + 918 3851 lineto + 918 3920 lineto + 849 3920 lineto + 849 3851 lineto +closepath 14 849 3851 918 3920 Dp +1756 3194 MXY + 1756 3194 lineto + 1895 3194 lineto + 1895 3471 lineto + 1756 3471 lineto + 1756 3194 lineto +closepath 14 1756 3194 1895 3471 Dp +2171 3056 MXY + 2171 3056 lineto + 2309 3056 lineto + 2309 3333 lineto + 2171 3333 lineto + 2171 3056 lineto +closepath 14 2171 3056 2309 3333 Dp +1480 MX + 1480 3056 lineto + 1618 3056 lineto + 1618 3333 lineto + 1480 3333 lineto + 1480 3056 lineto +closepath 14 1480 3056 1618 3333 Dp +789 MX + 789 3056 lineto + 927 3056 lineto + 927 3333 lineto + 789 3333 lineto + 789 3056 lineto +closepath 14 789 3056 927 3333 Dp +2780 MY +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +927 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1065 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1203 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +1342 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1480 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1618 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1756 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +1895 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2033 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2171 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2309 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +13 s +1048 2720(In)N +1173(Memory)X +1580(Bucket)X +1918(Array)X +867 3584(B0)N +1558(B5)X +2223(B10)X +1788 3722(O1/1)N +5 s +1515 3903(Primay)N +1651(Buffer)X +4 Ds +1990 3851 MXY + 1990 3851 lineto + 2059 3851 lineto + 2059 3920 lineto + 1990 3920 lineto + 1990 3851 lineto +closepath 3 1990 3851 2059 3920 Dp +2102 3903(Over\257ow)N +2270(Buffer)X +3 Dt +-1 Ds +8 s +720 4184(Figure)N +922(4:)X +1 f +996(Three)X +1164(primary)X +1386(pages)X +1551(\(B0,)X +1683(B5,)X +1794(B10\))X +1942(are)X +2039(accessed)X +2281(directly)X +720 4272(from)N +862(the)X +958(bucket)X +1146(array.)X +1326(The)X +1443(one)X +1553(over\257ow)X +1798(page)X +1935(\(O1/1\))X +2122(is)X +2182(linked)X +2359(phy-)X +720 4360(sically)N +915(from)X +1067(its)X +1155(primary)X +1384(page's)X +1577(buffer)X +1759(header)X +1955(as)X +2035(well)X +2172(as)X +2252(logically)X +720 4448(from)N +860(its)X +937(predecessor)X +1253(page)X +1389(buffer)X +1560(\(B5\).)X +10 s +10 f +720 4624 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1191 4954(Table)N +1406(Parameterization)X +1 f +892 5086(When)N +1107(a)X +1166(hash)X +1336(table)X +1515(is)X +1590(created,)X +1865(the)X +1985(bucket)X +2221(size,)X +2388(\256ll)X +720 5174(factor,)N +953(initial)X +1164(number)X +1434(of)X +1526(elements,)X +1856(number)X +2125(of)X +2216(bytes)X +2409(of)X +720 5262(main)N +919(memory)X +1225(used)X +1411(for)X +1543(caching,)X +1851(and)X +2005(a)X +2079(user-de\256ned)X +720 5350(hash)N +892(function)X +1184(may)X +1347(be)X +1448(speci\256ed.)X +1797(The)X +1946(bucket)X +2184(size)X +2333(\(and)X +720 5438(page)N +906(size)X +1064(for)X +1191(over\257ow)X +1509(pages\))X +1752(defaults)X +2039(to)X +2134(256)X +2287(bytes.)X +720 5526(For)N +858(tables)X +1072(with)X +1241(large)X +1429(data)X +1590(items,)X +1810(it)X +1881(may)X +2046(be)X +2149(preferable)X +720 5614(to)N +803(increase)X +1088(the)X +1207(page)X +1380(size,)X +1545(and,)X +1701(conversely,)X +2089(applications)X +720 5702(storing)N +1002(small)X +1235(items)X +1467(exclusively)X +1891(in)X +2012(memory)X +2338(may)X +2706 538(bene\256t)N +2966(from)X +3164(a)X +3242(smaller)X +3520(bucket)X +3776(size.)X +3983(A)X +4082(bucket)X +4337(size)X +2706 626(smaller)N +2962(than)X +3120(64)X +3220(bytes)X +3409(is)X +3482(not)X +3604(recommended.)X +2878 740(The)N +3031(\256ll)X +3147(factor)X +3363(indicates)X +3676(a)X +3740(desired)X +4000(density)X +4258(within)X +2706 828(the)N +2833(hash)X +3009(table.)X +3234(It)X +3312(is)X +3394(an)X +3499(approximation)X +3995(of)X +4091(the)X +4217(number)X +2706 916(of)N +2815(keys)X +3004(allowed)X +3300(to)X +3404(accumulate)X +3811(in)X +3914(any)X +4071(one)X +4228(bucket,)X +2706 1004(determining)N +3119(when)X +3319(the)X +3442(hash)X +3614(table)X +3795(grows.)X +4056(Its)X +4161(default)X +4409(is)X +2706 1092(eight.)N +2953(If)X +3054(the)X +3199(user)X +3380(knows)X +3636(the)X +3781(average)X +4079(size)X +4251(of)X +4364(the)X +2706 1180(key/data)N +3008(pairs)X +3194(being)X +3402(stored)X +3627(in)X +3718(the)X +3845(table,)X +4050(near)X +4218(optimal)X +2706 1268(bucket)N +2943(sizes)X +3122(and)X +3261(\256ll)X +3372(factors)X +3614(may)X +3775(be)X +3874(selected)X +4155(by)X +4257(apply-)X +2706 1356(ing)N +2828(the)X +2946(equation:)X +0 f +8 s +2706 1655(\(1\))N +2994 -0.3938(\(\(average_pair_length)AX +3830(+)X +3906(4\))X +4020(*)X +3032 1743(ffactor\))N +3374(>=)X +3488(bsize)X +1 f +10 s +2706 2042(For)N +2859(highly)X +3104(time)X +3287(critical)X +3551(applications,)X +3999(experimenting)X +2706 2130(with)N +2919(different)X +3266(bucket)X +3550(sizes)X +3776(and)X +3962(\256ll)X +4120(factors)X +4409(is)X +2706 2218(encouraged.)N +2878 2332(Figures)N +3144(5a,b,)X +3326(and)X +3468(c)X +3530(illustrate)X +3836(the)X +3960(effects)X +4200(of)X +4292(vary-)X +2706 2420(ing)N +2841(page)X +3026(sizes)X +3215(and)X +3363(\256ll)X +3483(factors)X +3734(for)X +3860(the)X +3990(same)X +4187(data)X +4353(set.)X +2706 2508(The)N +2864(data)X +3031(set)X +3152(consisted)X +3482(of)X +3581(24474)X +3813(keys)X +3992(taken)X +4198(from)X +4386(an)X +2706 2596(online)N +2931(dictionary.)X +3301(The)X +3451(data)X +3609(value)X +3807(for)X +3925(each)X +4097(key)X +4237(was)X +4386(an)X +2706 2684(ASCII)N +2938(string)X +3143(for)X +3260(an)X +3359(integer)X +3605(from)X +3784(1)X +3847(to)X +3931(24474)X +4153(inclusive.)X +2706 2772(The)N +2867(test)X +3013(run)X +3155(consisted)X +3488(of)X +3590(creating)X +3884(a)X +3955(new)X +4124(hash)X +4306(table)X +2706 2860(\(where)N +2966(the)X +3100(ultimate)X +3398(size)X +3559(of)X +3662(the)X +3796(table)X +3987(was)X +4147(known)X +4400(in)X +2706 2948(advance\),)N +3054(entering)X +3354(each)X +3539(key/data)X +3848(pair)X +4010(into)X +4171(the)X +4306(table)X +2706 3036(and)N +2849(then)X +3014(retrieving)X +3353(each)X +3528(key/data)X +3827(pair)X +3979(from)X +4162(the)X +4286(table.)X +2706 3124(Each)N +2898(of)X +2996(the)X +3125(graphs)X +3369(shows)X +3599(the)X +3727(timings)X +3996(resulting)X +4306(from)X +2706 3212(varying)N +2973(the)X +3093(pagesize)X +3392(from)X +3570(128)X +3712(bytes)X +3903(to)X +3986(1M)X +4118(and)X +4255(the)X +4374(\256ll)X +2706 3300(factor)N +2929(from)X +3120(1)X +3195(to)X +3292(128.)X +3486(For)X +3631(each)X +3813(run,)X +3974(the)X +4106(buffer)X +4337(size)X +2706 3388(was)N +2874(set)X +3006(at)X +3106(1M.)X +3299(The)X +3466(tests)X +3650(were)X +3849(all)X +3971(run)X +4120(on)X +4242(an)X +4360(HP)X +2706 3476(9000/370)N +3077(\(33.3)X +3312(Mhz)X +3527(MC68030\),)X +3966(with)X +4176(16M)X +4395(of)X +2706 3564(memory,)N +3042(64K)X +3228(physically)X +3605(addressed)X +3970(cache,)X +4222(and)X +4386(an)X +2706 3652(HP7959S)N +3055(disk)X +3231(drive,)X +3459(running)X +3751(4.3BSD-Reno)X +4244(single-)X +2706 3740(user.)N +2878 3854(Both)N +3066(system)X +3321(time)X +3496(\(Figure)X +3764(5a\))X +3899(and)X +4047(elapsed)X +4320(time)X +2706 3942(\(Figure)N +2966(5b\))X +3097(show)X +3290(that)X +3434(for)X +3552(all)X +3655(bucket)X +3892(sizes,)X +4091(the)X +4212(greatest)X +2706 4030(performance)N +3137(gains)X +3329(are)X +3451(made)X +3648(by)X +3751(increasing)X +4104(the)X +4225(\256ll)X +4336(fac-)X +2706 4118(tor)N +2822(until)X +2995(equation)X +3298(1)X +3365(is)X +3445(satis\256ed.)X +3774(The)X +3925(user)X +4085(time)X +4253(shown)X +2706 4206(in)N +2791(Figure)X +3023(5c)X +3122(gives)X +3314(a)X +3373(more)X +3561(detailed)X +3838(picture)X +4083(of)X +4172(how)X +4332(per-)X +2706 4294(formance)N +3054(varies.)X +3330(The)X +3499(smaller)X +3778(bucket)X +4035(sizes)X +4234(require)X +2706 4382(fewer)N +2921(keys)X +3099(per)X +3233(page)X +3416(to)X +3509(satisfy)X +3749(equation)X +4056(1)X +4127(and)X +4274(there-)X +2706 4470(fore)N +2860(incur)X +3049(fewer)X +3257(collisions.)X +3607(However,)X +3946(when)X +4144(the)X +4265(buffer)X +2706 4558(pool)N +2884(size)X +3045(is)X +3134(\256xed,)X +3349(smaller)X +3620(pages)X +3838(imply)X +4059(more)X +4259(pages.)X +2706 4646(An)N +2830(increased)X +3160(number)X +3430(of)X +3522(pages)X +3730(means)X +3960(more)X +2 f +4150(malloc\(3\))X +1 f +2706 4734(calls)N +2879(and)X +3021(more)X +3212(overhead)X +3533(in)X +3621(the)X +3745(hash)X +3918(package's)X +4265(buffer)X +2706 4822(manager)N +3003(to)X +3085(manage)X +3355(the)X +3473(additional)X +3813(pages.)X +2878 4936(The)N +3028(tradeoff)X +3308(works)X +3529(out)X +3655(most)X +3834(favorably)X +4166(when)X +4364(the)X +2706 5024(page)N +2886(size)X +3039(is)X +3120(256)X +3268(and)X +3412(the)X +3538(\256ll)X +3654(factor)X +3870(is)X +3950(8.)X +4057(Similar)X +4319(con-)X +2706 5112(clusions)N +3009(were)X +3207(obtained)X +3524(if)X +3614(the)X +3753(test)X +3905(was)X +4071(run)X +4218(without)X +2706 5200(knowing)N +3007(the)X +3126(\256nal)X +3289(table)X +3466(size)X +3612(in)X +3695(advance.)X +4020(If)X +4095(the)X +4214(\256le)X +4337(was)X +2706 5288(closed)N +2942(and)X +3088(written)X +3345(to)X +3437(disk,)X +3620(the)X +3748(conclusions)X +4156(were)X +4343(still)X +2706 5376(the)N +2832(same.)X +3065(However,)X +3408(rereading)X +3740(the)X +3865(\256le)X +3994(from)X +4177(disk)X +4337(was)X +2706 5464(slightly)N +2983(faster)X +3199(if)X +3285(a)X +3358(larger)X +3583(bucket)X +3834(size)X +3996(and)X +4149(\256ll)X +4274(factor)X +2706 5552(were)N +2898(used)X +3079(\(1K)X +3238(bucket)X +3486(size)X +3645(and)X +3795(32)X +3909(\256ll)X +4031(factor\).)X +4320(This)X +2706 5640(follows)N +2987(intuitively)X +3356(from)X +3553(the)X +3691(improved)X +4038(ef\256ciency)X +4395(of)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(7)X + +8 p +%%Page: 8 8 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538(performing)N +830(1K)X +965(reads)X +1172(from)X +1365(the)X +1500(disk)X +1670(rather)X +1894(than)X +2068(256)X +432 626(byte)N +609(reads.)X +857(In)X +962(general,)X +1257(performance)X +1702(for)X +1834(disk)X +2005(based)X +432 714(tables)N +639(is)X +712(best)X +861(when)X +1055(the)X +1173(page)X +1345(size)X +1490(is)X +1563(approximately)X +2046(1K.)X +10 f +432 802 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +619 2380 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +629 2437 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +648 2504 MXY +-12 25 Dl +24 0 Dl +-12 -25 Dl +686 2515 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +762 2516 MXY +-12 24 Dl +25 0 Dl +-13 -24 Dl +916 2515 MXY +-13 24 Dl +25 0 Dl +-12 -24 Dl +1222 2516 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1834 2515 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1 Dt +619 2392 MXY +10 57 Dl +19 67 Dl +38 11 Dl +76 1 Dl +154 -1 Dl +306 1 Dl +612 -1 Dl +8 s +1 f +1628 2522(128)N +3 Dt +607 2245 MXY +24 Dc +617 2375 MXY +23 Dc +635 2442 MXY +24 Dc +674 2525 MXY +23 Dc +750 2529 MXY +24 Dc +904 2527 MXY +23 Dc +1210 MX +23 Dc +1822 2528 MXY +23 Dc +20 Ds +1 Dt +619 2245 MXY +10 130 Dl +19 67 Dl +38 83 Dl +76 4 Dl +154 -2 Dl +306 0 Dl +612 1 Dl +678 2482(256)N +-1 Ds +3 Dt +619 2127 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +629 2191 MXY +0 25 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +648 2334 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +686 2409 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +762 2516 MXY +0 25 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +916 2516 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-25 0 Dl +1222 2515 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1834 2515 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +5 Dt +619 2139 MXY +10 65 Dl +19 142 Dl +38 75 Dl +76 108 Dl +154 -1 Dl +306 -1 Dl +612 0 Dl +694 2401(512)N +3 Dt +631 2064 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +641 2077 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +660 2132 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +698 2292 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +775 2382 MXY +-25 24 Dl +12 -12 Dl +-12 -12 Dl +25 24 Dl +928 2516 MXY +-25 24 Dl +13 -12 Dl +-13 -12 Dl +25 24 Dl +1234 2516 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +1846 2516 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +16 Ds +1 Dt +619 2076 MXY +10 14 Dl +19 54 Dl +38 160 Dl +76 90 Dl +154 134 Dl +306 1 Dl +612 -1 Dl +694 2257(1024)N +-1 Ds +3 Dt +619 1877 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +629 1855 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +648 1838 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +686 1860 MXY +12 -25 Dl +-24 0 Dl +12 25 Dl +762 1923 MXY +13 -24 Dl +-25 0 Dl +12 24 Dl +916 2087 MXY +12 -24 Dl +-25 0 Dl +13 24 Dl +1222 2256 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +1834 2541 MXY +12 -25 Dl +-24 0 Dl +12 25 Dl +619 1865 MXY +10 -22 Dl +19 -17 Dl +38 21 Dl +76 64 Dl +154 164 Dl +306 169 Dl +612 285 Dl +1645 2427(4096)N +619 1243 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +629 1196 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +648 1146 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +686 1174 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +762 1249 MXY +0 24 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +916 1371 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-25 0 Dl +1222 1680 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1834 1999 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +619 1255 MXY +10 -47 Dl +19 -50 Dl +38 28 Dl +76 75 Dl +154 122 Dl +306 309 Dl +612 319 Dl +1741 1934(8192)N +5 Dt +609 2531 MXY +1225 0 Dl +609 MX +0 -1553 Dl +2531 MY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +593 2625(0)N +-1 Ds +5 Dt +916 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +884 2625(32)N +-1 Ds +5 Dt +1222 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1190 2625(64)N +-1 Ds +5 Dt +1528 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1496 2625(96)N +-1 Ds +5 Dt +1834 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1786 2625(128)N +-1 Ds +5 Dt +609 2531 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +545 2558(0)N +-1 Ds +5 Dt +609 2013 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 2040(100)N +-1 Ds +5 Dt +609 1496 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 1523(200)N +-1 Ds +5 Dt +609 978 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 1005(300)N +1088 2724(Fill)N +1194(Factor)X +422 1611(S)N +426 1667(e)N +426 1724(c)N +424 1780(o)N +424 1837(n)N +424 1893(d)N +428 1949(s)N +3 Dt +-1 Ds +3 f +432 2882(Figure)N +636(5a:)X +1 f +744(System)X +956(Time)X +1113(for)X +1209(dictionary)X +1490(data)X +1618(set)X +1711(with)X +1847(1M)X +1958(of)X +2033(buffer)X +432 2970(space)N +594(and)X +707(varying)X +923(bucket)X +1114(sizes)X +1259(and)X +1372(\256ll)X +1465(factors.)X +1675(Each)X +1823(line)X +1940(is)X +2004(labeled)X +432 3058(with)N +562(its)X +639(bucket)X +825(size.)X +10 s +10 f +432 3234 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +8 s +1 f +428 4381(s)N +424 4325(d)N +424 4269(n)N +424 4212(o)N +426 4156(c)N +426 4099(e)N +422 4043(S)N +1116 5156(Fill)N +1222(Factor)X +506 3437(3200)N +4 Ds +1 Dt +666 3410 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +506 3825(2400)N +4 Ds +1 Dt +666 3799 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +506 4214(1600)N +4 Ds +1 Dt +666 4186 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +538 4602(800)N +4 Ds +1 Dt +666 4575 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +602 4990(0)N +4 Ds +1 Dt +666 4963 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +1786 5057(128)N +4 Ds +1 Dt +1834 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +1510 5057(96)N +4 Ds +1 Dt +1542 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +1218 5057(64)N +4 Ds +1 Dt +1250 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +926 5057(32)N +4 Ds +1 Dt +958 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +650 5057(0)N +4 Ds +1 Dt +666 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +4963 MY +0 -1553 Dl +4963 MY +1168 0 Dl +1741 4752(8192)N +3 Dt +675 3732 MXY +9 -172 Dl +18 -118 Dl +37 128 Dl +73 -121 Dl +146 623 Dl +292 497 Dl +584 245 Dl +4802 MY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1250 4557 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +958 4060 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +812 3437 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +739 3558 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +702 3430 MXY +0 25 Dl +0 -13 Dl +13 0 Dl +-25 0 Dl +684 3548 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +675 3720 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1637 4912(4096)N +675 4307 MXY +9 -58 Dl +18 30 Dl +37 89 Dl +73 144 Dl +146 235 Dl +292 122 Dl +584 89 Dl +4970 MY +12 -24 Dl +-24 0 Dl +12 24 Dl +1250 4881 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +958 4759 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +812 4524 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +739 4380 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +702 4291 MXY +13 -24 Dl +-25 0 Dl +12 24 Dl +684 4261 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +675 4319 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +734 4662(1024)N +16 Ds +1 Dt +675 4352 MXY +9 60 Dl +18 134 Dl +37 266 Dl +73 117 Dl +146 30 Dl +292 0 Dl +584 -1 Dl +-1 Ds +3 Dt +1846 4946 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +1262 4946 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +970 4947 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +824 4917 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +751 4800 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +715 4534 MXY +-25 25 Dl +12 -13 Dl +-12 -12 Dl +25 25 Dl +696 4400 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +687 4339 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +718 4792(512)N +5 Dt +675 4422 MXY +9 137 Dl +18 278 Dl +37 105 Dl +73 18 Dl +146 -1 Dl +292 0 Dl +584 -1 Dl +3 Dt +4946 MY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1250 4946 MXY +0 25 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +958 4947 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +812 4948 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +739 4930 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +702 4824 MXY +0 25 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +684 4547 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +675 4410 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +750 4921(256)N +20 Ds +1 Dt +675 4597 MXY +9 246 Dl +18 106 Dl +37 10 Dl +73 0 Dl +146 0 Dl +292 0 Dl +584 -1 Dl +-1 Ds +3 Dt +1822 MX +23 Dc +1238 4959 MXY +23 Dc +946 MX +23 Dc +800 MX +23 Dc +727 MX +23 Dc +691 4949 MXY +23 Dc +672 4843 MXY +24 Dc +663 4597 MXY +24 Dc +1395 4961(128)N +1 Dt +675 4855 MXY +9 93 Dl +18 10 Dl +37 1 Dl +73 0 Dl +146 -1 Dl +292 0 Dl +584 0 Dl +3 Dt +4946 MY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1250 MX +-12 24 Dl +24 0 Dl +-12 -24 Dl +958 MX +-12 24 Dl +24 0 Dl +-12 -24 Dl +812 MX +-12 25 Dl +24 0 Dl +-12 -25 Dl +739 4947 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +702 4946 MXY +-12 24 Dl +25 0 Dl +-13 -24 Dl +684 4936 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +675 4843 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +3 Dt +-1 Ds +3 f +432 5314(Figure)N +634(5b:)X +1 f +744(Elapsed)X +967(Time)X +1123(for)X +1218(dictionary)X +1498(data)X +1625(set)X +1717(with)X +1851(1M)X +1960(of)X +2033(buffer)X +432 5402(space)N +593(and)X +705(varying)X +920(bucket)X +1110(sizes)X +1254(and)X +1366(\256ll)X +1457(factors.)X +1681(Each)X +1827(line)X +1942(is)X +2004(labeled)X +432 5490(with)N +562(its)X +639(bucket)X +825(size.)X +10 s +2590 538(If)N +2677(an)X +2785(approximation)X +3284(of)X +3383(the)X +3513(number)X +3790(of)X +3889(elements)X +2418 626(ultimately)N +2773(to)X +2866(be)X +2973(stored)X +3200(in)X +3293(the)X +3422(hash)X +3599(table)X +3785(is)X +3868(known)X +4116(at)X +2418 714(the)N +2564(time)X +2754(of)X +2869(creation,)X +3196(the)X +3342(hash)X +3536(package)X +3847(takes)X +4059(this)X +2418 802(number)N +2688(as)X +2779(a)X +2839(parameter)X +3185(and)X +3325(uses)X +3487(it)X +3555(to)X +3641(hash)X +3812(entries)X +4050(into)X +2418 890(the)N +2541(full)X +2677(sized)X +2867(table)X +3048(rather)X +3261(than)X +3424(growing)X +3716(the)X +3838(table)X +4018(from)X +2418 978(a)N +2477(single)X +2691(bucket.)X +2968(If)X +3044(this)X +3181(number)X +3448(is)X +3523(not)X +3647(known,)X +3907(the)X +4027(hash)X +2418 1066(table)N +2632(starts)X +2859(with)X +3059(a)X +3153(single)X +3402(bucket)X +3674(and)X +3848(gracefully)X +2418 1154(expands)N +2707(as)X +2800(elements)X +3111(are)X +3236(added,)X +3474(although)X +3780(a)X +3842(slight)X +4044(per-)X +2418 1242(formance)N +2747(degradation)X +3151(may)X +3313(be)X +3413(noticed.)X +3713(Figure)X +3946(6)X +4010(illus-)X +2418 1330(trates)N +2625(the)X +2756(difference)X +3116(in)X +3211(performance)X +3651(between)X +3952(storing)X +2418 1418(keys)N +2588(in)X +2673(a)X +2732(\256le)X +2857(when)X +3054(the)X +3174(ultimate)X +3458(size)X +3605(is)X +3680(known)X +3920(\(the)X +4067(left)X +2418 1506(bars)N +2581(in)X +2672(each)X +2849(set\),)X +3014(compared)X +3360(to)X +3450(building)X +3744(the)X +3870(\256le)X +4000(when)X +2418 1594(the)N +2550(ultimate)X +2846(size)X +3005(is)X +3091(unknown)X +3422(\(the)X +3580(right)X +3764(bars)X +3931(in)X +4026(each)X +2418 1682(set\).)N +2609(Once)X +2814(the)X +2947(\256ll)X +3069(factor)X +3291(is)X +3378(suf\256ciently)X +3772(high)X +3948(for)X +4076(the)X +2418 1770(page)N +2596(size)X +2747(\(8\),)X +2887(growing)X +3180(the)X +3304(table)X +3486(dynamically)X +3908(does)X +4081(lit-)X +2418 1858(tle)N +2518(to)X +2600(degrade)X +2875(performance.)X +10 f +2418 1946 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +9 s +1 f +2413 3238(s)N +2409 3173(d)N +2409 3108(n)N +2409 3043(o)N +2411 2979(c)N +2411 2914(e)N +2407 2849(S)N +3143 4129(Fill)N +3261(Factor)X +2448 2152(15)N +4 Ds +1 Dt +2557 2122 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2448 2747(10)N +4 Ds +1 Dt +2557 2717 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2484 3343(5)N +4 Ds +1 Dt +2557 3313 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2484 3938(0)N +4 Ds +1 Dt +2557 3908 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +3976 4015(128)N +4 Ds +1 Dt +4030 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3626 4015(96)N +4 Ds +1 Dt +3662 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3258 4015(64)N +4 Ds +1 Dt +3294 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +2889 4015(32)N +4 Ds +1 Dt +2925 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +2539 4015(0)N +4 Ds +1 Dt +2557 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3908 MY +0 -1786 Dl +3908 MY +1473 0 Dl +4053 2378(8192)N +3 Dt +2569 2277 MXY +11 0 Dl +23 48 Dl +46 -167 Dl +92 35 Dl +184 12 Dl +369 143 Dl +736 0 Dl +2334 MY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3294 2334 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2925 2192 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2741 2180 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2649 2144 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2603 2311 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +2580 2263 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2569 2263 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +4053 2591(4096)N +2569 2348 MXY +11 -11 Dl +23 -96 Dl +46 71 Dl +92 72 Dl +184 226 Dl +369 48 Dl +736 -60 Dl +2612 MY +14 -28 Dl +-28 0 Dl +14 28 Dl +3294 2672 MXY +13 -28 Dl +-27 0 Dl +14 28 Dl +2925 2624 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2741 2398 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2649 2326 MXY +14 -27 Dl +-28 0 Dl +14 27 Dl +2603 2255 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2580 2350 MXY +14 -27 Dl +-28 0 Dl +14 27 Dl +2569 2362 MXY +13 -28 Dl +-27 0 Dl +14 28 Dl +4053 2681(1024)N +16 Ds +1 Dt +2569 2300 MXY +11 48 Dl +23 96 Dl +46 95 Dl +92 274 Dl +184 202 Dl +369 -155 Dl +736 -190 Dl +-1 Ds +3 Dt +4044 2656 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +3307 2846 MXY +-27 28 Dl +14 -14 Dl +-14 -14 Dl +27 28 Dl +2939 3001 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2755 2799 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2663 2525 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2617 2430 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2594 2334 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2582 2287 MXY +-27 27 Dl +14 -14 Dl +-14 -13 Dl +27 27 Dl +4053 2851(512)N +5 Dt +2569 2372 MXY +11 -24 Dl +23 405 Dl +46 83 Dl +92 227 Dl +184 -72 Dl +369 -119 Dl +736 -107 Dl +3 Dt +2751 MY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3294 2858 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2925 2977 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2741 3049 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +2649 2823 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2603 2739 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2580 2334 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2569 2358 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +4053 2795(256)N +20 Ds +1 Dt +2569 2456 MXY +11 285 Dl +23 95 Dl +46 251 Dl +92 -60 Dl +184 -84 Dl +369 -107 Dl +736 -71 Dl +-1 Ds +3 Dt +4016 MX +27 Dc +3280 2836 MXY +27 Dc +2912 2943 MXY +27 Dc +2728 3027 MXY +27 Dc +2635 3087 MXY +28 Dc +2589 2836 MXY +28 Dc +2566 2741 MXY +27 Dc +2554 2456 MXY +28 Dc +4053 2741(128)N +1 Dt +2569 2729 MXY +11 203 Dl +23 131 Dl +46 -60 Dl +92 -119 Dl +184 -60 Dl +369 -83 Dl +736 -12 Dl +3 Dt +2716 MY +-14 27 Dl +28 0 Dl +-14 -27 Dl +3294 2727 MXY +-14 28 Dl +27 0 Dl +-13 -28 Dl +2925 2811 MXY +-14 27 Dl +28 0 Dl +-14 -27 Dl +2741 2870 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2649 2989 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2603 3049 MXY +-14 27 Dl +28 0 Dl +-14 -27 Dl +2580 2918 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2569 2716 MXY +-14 27 Dl +27 0 Dl +-13 -27 Dl +3 Dt +-1 Ds +3 f +8 s +2418 4286(Figure)N +2628(5c:)X +1 f +2738(User)X +2887(Time)X +3051(for)X +3154(dictionary)X +3442(data)X +3577(set)X +3677(with)X +3820(1M)X +3938(of)X +4019(buffer)X +2418 4374(space)N +2579(and)X +2691(varying)X +2906(bucket)X +3096(sizes)X +3240(and)X +3352(\256ll)X +3443(factors.)X +3667(Each)X +3813(line)X +3928(is)X +3990(labeled)X +2418 4462(with)N +2548(its)X +2625(bucket)X +2811(size.)X +10 s +10 f +2418 4638 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +2590 4840(Since)N +2796(no)X +2904(known)X +3150(hash)X +3325(function)X +3620(performs)X +3938(equally)X +2418 4928(well)N +2589(on)X +2702(all)X +2815(possible)X +3110(data,)X +3297(the)X +3428(user)X +3595(may)X +3766(\256nd)X +3923(that)X +4076(the)X +2418 5016(built-in)N +2678(hash)X +2849(function)X +3140(does)X +3311(poorly)X +3544(on)X +3648(a)X +3708(particular)X +4040(data)X +2418 5104(set.)N +2548(In)X +2636(this)X +2771(case,)X +2950(a)X +3006(hash)X +3173(function,)X +3480(taking)X +3700(two)X +3840(arguments)X +2418 5192(\(a)N +2507(pointer)X +2760(to)X +2848(a)X +2910(byte)X +3074(string)X +3282(and)X +3424(a)X +3486(length\))X +3739(and)X +3880(returning)X +2418 5280(an)N +2517(unsigned)X +2829(long)X +2993(to)X +3077(be)X +3175(used)X +3344(as)X +3433(the)X +3553(hash)X +3722(value,)X +3938(may)X +4098(be)X +2418 5368(speci\256ed)N +2731(at)X +2817(hash)X +2992(table)X +3176(creation)X +3463(time.)X +3673(When)X +3893(an)X +3996(exist-)X +2418 5456(ing)N +2570(hash)X +2767(table)X +2973(is)X +3076(opened)X +3358(and)X +3524(a)X +3609(hash)X +3805(function)X +4121(is)X +2418 5544(speci\256ed,)N +2752(the)X +2879(hash)X +3054(package)X +3346(will)X +3498(try)X +3615(to)X +3705(determine)X +4054(that)X +2418 5632(the)N +2546(hash)X +2723(function)X +3020(supplied)X +3321(is)X +3404(the)X +3532(one)X +3678(with)X +3850(which)X +4076(the)X +2418 5720(table)N +2630(was)X +2811(created.)X +3139(There)X +3382(are)X +3536(a)X +3627(variety)X +3905(of)X +4027(hash)X +3 f +432 5960(8)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +9 p +%%Page: 9 9 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(functions)N +1065(provided)X +1397(with)X +1586(the)X +1731(package.)X +2082(The)X +2253(default)X +720 626(function)N +1014(for)X +1135(the)X +1260(package)X +1551(is)X +1631(the)X +1755(one)X +1897(which)X +2119(offered)X +2378(the)X +720 714(best)N +875(performance)X +1308(in)X +1396(terms)X +1600(of)X +1693(cycles)X +1920(executed)X +2232(per)X +2360(call)X +720 802(\(it)N +827(did)X +965(not)X +1103(produce)X +1398(the)X +1531(fewest)X +1776(collisions)X +2117(although)X +2432(it)X +720 890(was)N +866(within)X +1091(a)X +1148(small)X +1341(percentage)X +1710(of)X +1797(the)X +1915(function)X +2202(that)X +2342(pro-)X +720 978(duced)N +947(the)X +1080(fewest)X +1324(collisions\).)X +1731(Again,)X +1981(in)X +2077(time)X +2253(critical)X +720 1066(applications,)N +1152(users)X +1342(are)X +1466(encouraged)X +1862(to)X +1949(experiment)X +2334(with)X +720 1154(a)N +783(variety)X +1032(of)X +1125(hash)X +1298(functions)X +1622(to)X +1710(achieve)X +1982(optimal)X +2252(perfor-)X +720 1242(mance.)N +10 f +720 1330 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +7 s +1038 2925(Full)N +1149(size)X +1251(table)X +1384(\(left\))X +1547 2718(Fill)N +1643(Factor)X +2268 2662(64)N +1964(32)X +1674(16)X +1384(8)X +1093(4)X +4 Ds +1 Dt +900 2280 MXY +1548 0 Dl +900 1879 MXY +1548 0 Dl +900 1506 MXY +1548 0 Dl +1563 2902 MXY +111 0 Dl +-1 Ds +900 MX +110 0 Dl +1425 2828(System)N +983(User)X +1895 2778 MXY + 1895 2778 lineto + 1950 2778 lineto + 1950 2833 lineto + 1895 2833 lineto + 1895 2778 lineto +closepath 21 1895 2778 1950 2833 Dp +1342 MX + 1342 2778 lineto + 1397 2778 lineto + 1397 2833 lineto + 1342 2833 lineto + 1342 2778 lineto +closepath 14 1342 2778 1397 2833 Dp +900 MX + 900 2778 lineto + 955 2778 lineto + 955 2833 lineto + 900 2833 lineto + 900 2778 lineto +closepath 3 900 2778 955 2833 Dp +5 Dt +2283 2211 MXY +96 0 Dl +1992 MX +97 0 Dl +1702 MX +97 0 Dl +1411 2252 MXY +97 0 Dl +4 Ds +1 Dt +2283 2211 MXY + 2283 2211 lineto + 2379 2211 lineto + 2379 2252 lineto + 2283 2252 lineto + 2283 2211 lineto +closepath 14 2283 2211 2379 2252 Dp +1992 MX + 1992 2211 lineto + 2089 2211 lineto + 2089 2252 lineto + 1992 2252 lineto + 1992 2211 lineto +closepath 14 1992 2211 2089 2252 Dp +1702 MX + 1702 2211 lineto + 1799 2211 lineto + 1799 2252 lineto + 1702 2252 lineto + 1702 2211 lineto +closepath 14 1702 2211 1799 2252 Dp +1411 2252 MXY + 1411 2252 lineto + 1508 2252 lineto + 1508 2294 lineto + 1411 2294 lineto + 1411 2252 lineto +closepath 14 1411 2252 1508 2294 Dp +2283 MX + 2283 2252 lineto + 2379 2252 lineto + 2379 2612 lineto + 2283 2612 lineto + 2283 2252 lineto +closepath 3 2283 2252 2379 2612 Dp +1992 MX + 1992 2252 lineto + 2089 2252 lineto + 2089 2612 lineto + 1992 2612 lineto + 1992 2252 lineto +closepath 3 1992 2252 2089 2612 Dp +1702 MX + 1702 2252 lineto + 1799 2252 lineto + 1799 2612 lineto + 1702 2612 lineto + 1702 2252 lineto +closepath 3 1702 2252 1799 2612 Dp +1411 2294 MXY + 1411 2294 lineto + 1508 2294 lineto + 1508 2612 lineto + 1411 2612 lineto + 1411 2294 lineto +closepath 3 1411 2294 1508 2612 Dp +-1 Ds +2158 2238 MXY + 2158 2238 lineto + 2255 2238 lineto + 2255 2252 lineto + 2158 2252 lineto + 2158 2238 lineto +closepath 21 2158 2238 2255 2252 Dp +1868 MX + 1868 2238 lineto + 1965 2238 lineto + 1965 2280 lineto + 1868 2280 lineto + 1868 2238 lineto +closepath 21 1868 2238 1965 2280 Dp +1577 MX + 1577 2238 lineto + 1674 2238 lineto + 1674 2308 lineto + 1577 2308 lineto + 1577 2238 lineto +closepath 21 1577 2238 1674 2308 Dp +1287 2308 MXY + 1287 2308 lineto + 1287 2280 lineto + 1384 2280 lineto + 1384 2308 lineto + 1287 2308 lineto +closepath 21 1287 2280 1384 2308 Dp +2158 2280 MXY + 2158 2280 lineto + 2158 2252 lineto + 2255 2252 lineto + 2255 2280 lineto + 2158 2280 lineto +closepath 14 2158 2252 2255 2280 Dp +1868 2308 MXY + 1868 2308 lineto + 1868 2280 lineto + 1965 2280 lineto + 1965 2308 lineto + 1868 2308 lineto +closepath 14 1868 2280 1965 2308 Dp +1577 2335 MXY + 1577 2335 lineto + 1577 2308 lineto + 1674 2308 lineto + 1674 2335 lineto + 1577 2335 lineto +closepath 14 1577 2308 1674 2335 Dp +1287 2363 MXY + 1287 2363 lineto + 1287 2308 lineto + 1384 2308 lineto + 1384 2363 lineto + 1287 2363 lineto +closepath 14 1287 2308 1384 2363 Dp +2158 2280 MXY + 2158 2280 lineto + 2255 2280 lineto + 2255 2612 lineto + 2158 2612 lineto + 2158 2280 lineto +closepath 3 2158 2280 2255 2612 Dp +1868 2308 MXY + 1868 2308 lineto + 1965 2308 lineto + 1965 2612 lineto + 1868 2612 lineto + 1868 2308 lineto +closepath 3 1868 2308 1965 2612 Dp +1577 2335 MXY + 1577 2335 lineto + 1674 2335 lineto + 1674 2612 lineto + 1577 2612 lineto + 1577 2335 lineto +closepath 3 1577 2335 1674 2612 Dp +1287 2363 MXY + 1287 2363 lineto + 1384 2363 lineto + 1384 2612 lineto + 1287 2612 lineto + 1287 2363 lineto +closepath 3 1287 2363 1384 2612 Dp +4 Ds +1121 2066 MXY + 1121 2066 lineto + 1218 2066 lineto + 1224 2080 lineto + 1127 2080 lineto + 1121 2066 lineto +closepath 21 1121 2066 1224 2080 Dp +2080 MY + 1121 2080 lineto + 1218 2080 lineto + 1218 2273 lineto + 1121 2273 lineto + 1121 2080 lineto +closepath 14 1121 2080 1218 2273 Dp +2273 MY + 1121 2273 lineto + 1218 2273 lineto + 1218 2612 lineto + 1121 2612 lineto + 1121 2273 lineto +closepath 3 1121 2273 1218 2612 Dp +-1 Ds +997 1589 MXY + 997 1589 lineto + 1093 1589 lineto + 1093 1644 lineto + 997 1644 lineto + 997 1589 lineto +closepath 21 997 1589 1093 1644 Dp +1644 MY + 997 1644 lineto + 1093 1644 lineto + 1093 2280 lineto + 997 2280 lineto + 997 1644 lineto +closepath 14 997 1644 1093 2280 Dp +2280 MY + 997 2280 lineto + 1093 2280 lineto + 1093 2612 lineto + 997 2612 lineto + 997 2280 lineto +closepath 3 997 2280 1093 2612 Dp +10 s +719 2093(s)N +712 2037(d)N +712 1982(n)N +714 1927(o)N +716 1872(c)N +716 1816(e)N +712 1761(S)N +804 2286(10)N +804 1899(20)N +804 1540(30)N +3 Dt +900 1506 MXY +0 1106 Dl +1548 0 Dl +7 s +1978 2828(Elapsed)N +1701 2925(Dynamically)N +2018(grown)X +2184(table)X +2317(\(right\))X +3 Dt +-1 Ds +8 s +720 3180(Figure)N +934(6:)X +1 f +1020(The)X +1152(total)X +1299(regions)X +1520(indicate)X +1755(the)X +1865(difference)X +2154(between)X +2398(the)X +720 3268(elapsed)N +931(time)X +1065(and)X +1177(the)X +1275(sum)X +1402(of)X +1475(the)X +1573(system)X +1771(and)X +1883(user)X +2008(time.)X +2173(The)X +2291(left)X +2395(bar)X +720 3356(of)N +798(each)X +939(set)X +1035(depicts)X +1241(the)X +1344(timing)X +1537(of)X +1615(the)X +1718(test)X +1831(run)X +1940(when)X +2102(the)X +2204(number)X +2423(of)X +720 3444(entries)N +910(is)X +973(known)X +1167(in)X +1237(advance.)X +1496(The)X +1614(right)X +1754(bars)X +1879(depict)X +2054(the)X +2151(timing)X +2338(when)X +720 3532(the)N +814(\256le)X +912(is)X +971(grown)X +1150(from)X +1290(a)X +1334(single)X +1503(bucket.)X +10 s +10 f +720 3708 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +892 3910(Since)N +1131(this)X +1307(hashing)X +1617(package)X +1942(provides)X +2279(buffer)X +720 3998(management,)N +1188(the)X +1323(amount)X +1600(of)X +1704(space)X +1920(allocated)X +2247(for)X +2378(the)X +720 4086(buffer)N +948(pool)X +1121(may)X +1290(be)X +1397(speci\256ed)X +1713(by)X +1824(the)X +1953(user.)X +2157(Using)X +2378(the)X +720 4174(same)N +910(data)X +1069(set)X +1183(and)X +1324(test)X +1459(procedure)X +1805(as)X +1896(used)X +2067(to)X +2153(derive)X +2378(the)X +720 4262(graphs)N +962(in)X +1052(Figures)X +1320(5a-c,)X +1507(Figure)X +1744(7)X +1812(shows)X +2039(the)X +2164(impact)X +2409(of)X +720 4350(varying)N +997(the)X +1126(size)X +1282(of)X +1380(the)X +1509(buffer)X +1737(pool.)X +1950(The)X +2106(bucket)X +2351(size)X +720 4438(was)N +873(set)X +989(to)X +1078(256)X +1225(bytes)X +1421(and)X +1564(the)X +1689(\256ll)X +1804(factor)X +2019(was)X +2171(set)X +2287(to)X +2376(16.)X +720 4526(The)N +869(buffer)X +1090(pool)X +1256(size)X +1404(was)X +1552(varied)X +1776(from)X +1955(0)X +2018(\(the)X +2166(minimum)X +720 4614(number)N +986(of)X +1074(pages)X +1277(required)X +1565(to)X +1647(be)X +1743(buffered\))X +2063(to)X +2145(1M.)X +2316(With)X +720 4702(1M)N +854(of)X +944(buffer)X +1164(space,)X +1386(the)X +1507(package)X +1794(performed)X +2151(no)X +2253(I/O)X +2382(for)X +720 4790(this)N +871(data)X +1040(set.)X +1204(As)X +1328(Figure)X +1572(7)X +1647(illustrates,)X +2013(increasing)X +2378(the)X +720 4878(buffer)N +944(pool)X +1113(size)X +1265(can)X +1404(have)X +1583(a)X +1646(dramatic)X +1954(affect)X +2165(on)X +2271(result-)X +720 4966(ing)N +842(performance.)X +2 f +8 s +1269 4941(7)N +1 f +16 s +720 5353 MXY +864 0 Dl +2 f +8 s +760 5408(7)N +1 f +9 s +826 5433(Some)N +1024(allocators)X +1338(are)X +1460(extremely)X +1782(inef\256cient)X +2107(at)X +2192(allocating)X +720 5513(memory.)N +1029(If)X +1110(you)X +1251(\256nd)X +1396(that)X +1536(applications)X +1916(are)X +2036(running)X +2292(out)X +2416(of)X +720 5593(memory)N +1005(before)X +1234(you)X +1386(think)X +1578(they)X +1746(should,)X +2000(try)X +2124(varying)X +2388(the)X +720 5673(pagesize)N +986(to)X +1060(get)X +1166(better)X +1348(utilization)X +1658(from)X +1816(the)X +1922(memory)X +2180(allocator.)X +10 s +2830 1975 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +2853 2004 MXY +0 -27 Dl +28 0 Dl +0 27 Dl +-28 0 Dl +2876 2016 MXY +0 -27 Dl +27 0 Dl +0 27 Dl +-27 0 Dl +2922 1998 MXY +0 -27 Dl +27 0 Dl +0 27 Dl +-27 0 Dl +2967 2025 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +3013 2031 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +3059 MX +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +3196 2052 MXY +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +3561 2102 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +4292 2105 MXY +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +4 Ds +1 Dt +2844 1961 MXY +23 30 Dl +23 12 Dl +45 -18 Dl +46 26 Dl +46 6 Dl +45 0 Dl +137 21 Dl +366 50 Dl +730 3 Dl +9 s +4227 2158(User)N +-1 Ds +3 Dt +2830 1211 MXY +27 Dc +2853 1261 MXY +27 Dc +2876 1267 MXY +27 Dc +2921 1341 MXY +27 Dc +2967 1385 MXY +27 Dc +3013 1450 MXY +27 Dc +3059 1497 MXY +27 Dc +3196 1686 MXY +27 Dc +3561 2109 MXY +27 Dc +4292 2295 MXY +27 Dc +20 Ds +1 Dt +2844 1211 MXY +23 50 Dl +23 6 Dl +45 74 Dl +46 44 Dl +46 65 Dl +45 47 Dl +137 189 Dl +366 423 Dl +730 186 Dl +4181 2270(System)N +-1 Ds +3 Dt +2844 583 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2867 672 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2890 701 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2935 819 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +2981 849 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3027 908 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +3072 1026 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-27 0 Dl +3209 1292 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +3575 1823 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +4305 2059 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +5 Dt +2844 597 MXY +23 88 Dl +23 30 Dl +45 118 Dl +46 30 Dl +46 59 Dl +45 118 Dl +137 265 Dl +366 532 Dl +730 236 Dl +4328 2103(Total)N +2844 2310 MXY +1461 0 Dl +2844 MX +0 -1772 Dl +2310 MY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +2826 2416(0)N +-1 Ds +5 Dt +3209 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3155 2416(256)N +-1 Ds +5 Dt +3575 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3521 2416(512)N +-1 Ds +5 Dt +3940 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3886 2416(768)N +-1 Ds +5 Dt +4305 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +4233 2416(1024)N +-1 Ds +5 Dt +2844 2310 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2771 2340(0)N +-1 Ds +5 Dt +2844 2014 MXY +-18 0 Dl +2844 1719 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 1749(20)N +-1 Ds +5 Dt +2844 1423 MXY +-18 0 Dl +2844 1128 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 1158(40)N +-1 Ds +5 Dt +2844 833 MXY +-18 0 Dl +2844 538 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 568(60)N +3239 2529(Buffer)N +3445(Pool)X +3595(Size)X +3737(\(in)X +3835(K\))X +2695 1259(S)N +2699 1324(e)N +2699 1388(c)N +2697 1452(o)N +2697 1517(n)N +2697 1581(d)N +2701 1645(s)N +3 Dt +-1 Ds +3 f +8 s +2706 2773(Figure)N +2908(7:)X +1 f +2982(User)X +3123(time)X +3258(is)X +3322(virtually)X +3560(insensitive)X +3854(to)X +3924(the)X +4022(amount)X +4234(of)X +4307(buffer)X +2706 2861(pool)N +2852(available,)X +3130(however,)X +3396(both)X +3541(system)X +3750(time)X +3895(and)X +4018(elapsed)X +4240(time)X +4385(are)X +2706 2949(inversely)N +2960(proportional)X +3296(to)X +3366(the)X +3464(size)X +3583(of)X +3656(the)X +3753(buffer)X +3927(pool.)X +4092(Even)X +4242(for)X +4335(large)X +2706 3037(data)N +2831(sets)X +2946(where)X +3120(one)X +3230(expects)X +3439(few)X +3552(collisions,)X +3832(specifying)X +4116(a)X +4162(large)X +4307(buffer)X +2706 3125(pool)N +2836(dramatically)X +3171(improves)X +3425(performance.)X +10 s +10 f +2706 3301 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +3175 3543(Enhanced)N +3536(Functionality)X +1 f +2878 3675(This)N +3046(hashing)X +3320(package)X +3609(provides)X +3910(a)X +3971(set)X +4085(of)X +4177(compati-)X +2706 3763(bility)N +2895(routines)X +3174(to)X +3257(implement)X +3620(the)X +2 f +3739(ndbm)X +1 f +3937(interface.)X +4279(How-)X +2706 3851(ever,)N +2893(when)X +3095(the)X +3220(native)X +3443(interface)X +3752(is)X +3832(used,)X +4026(the)X +4151(following)X +2706 3939(additional)N +3046(functionality)X +3475(is)X +3548(provided:)X +10 f +2798 4071(g)N +1 f +2946(Inserts)X +3197(never)X +3413(fail)X +3556(because)X +3847(too)X +3985(many)X +4199(keys)X +2946 4159(hash)N +3113(to)X +3195(the)X +3313(same)X +3498(value.)X +10 f +2798 4247(g)N +1 f +2946(Inserts)X +3187(never)X +3393(fail)X +3527(because)X +3808(key)X +3950(and/or)X +4181(asso-)X +2946 4335(ciated)N +3158(data)X +3312(is)X +3385(too)X +3507(large)X +10 f +2798 4423(g)N +1 f +2946(Hash)X +3131(functions)X +3449(may)X +3607(be)X +3703(user-speci\256ed.)X +10 f +2798 4511(g)N +1 f +2946(Multiple)X +3268(pages)X +3498(may)X +3683(be)X +3806(cached)X +4077(in)X +4186(main)X +2946 4599(memory.)N +2706 4731(It)N +2801(also)X +2976(provides)X +3298(a)X +3380(set)X +3514(of)X +3626(compatibility)X +4097(routines)X +4400(to)X +2706 4819(implement)N +3087(the)X +2 f +3224(hsearch)X +1 f +3516(interface.)X +3876(Again,)X +4130(the)X +4266(native)X +2706 4907(interface)N +3008(offers)X +3216(enhanced)X +3540(functionality:)X +10 f +2798 5039(g)N +1 f +2946(Files)X +3121(may)X +3279(grow)X +3464(beyond)X +2 f +3720(nelem)X +1 f +3932(elements.)X +10 f +2798 5127(g)N +1 f +2946(Multiple)X +3247(hash)X +3420(tables)X +3632(may)X +3795(be)X +3896(accessed)X +4203(con-)X +2946 5215(currently.)N +10 f +2798 5303(g)N +1 f +2946(Hash)X +3134(tables)X +3344(may)X +3505(be)X +3604(stored)X +3823(and)X +3962(accessed)X +4266(on)X +2946 5391(disk.)N +10 f +2798 5479(g)N +1 f +2946(Hash)X +3155(functions)X +3497(may)X +3679(be)X +3799(user-speci\256ed)X +4288(at)X +2946 5567(runtime.)N +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(9)X + +10 p +%%Page: 10 10 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +459 538(Relative)N +760(Performance)X +1227(of)X +1314(the)X +1441(New)X +1613(Implementation)X +1 f +604 670(The)N +761(performance)X +1200(testing)X +1445(of)X +1544(the)X +1674(new)X +1840(package)X +2135(is)X +432 758(divided)N +711(into)X +874(two)X +1033(test)X +1183(suites.)X +1424(The)X +1588(\256rst)X +1751(suite)X +1941(of)X +2046(tests)X +432 846(requires)N +727(that)X +882(the)X +1015(tables)X +1237(be)X +1348(read)X +1522(from)X +1713(and)X +1864(written)X +2126(to)X +432 934(disk.)N +640(In)X +742(these)X +942(tests,)X +1139(the)X +1272(basis)X +1467(for)X +1595(comparison)X +2003(is)X +2090(the)X +432 1022(4.3BSD-Reno)N +908(version)X +1169(of)X +2 f +1260(ndbm)X +1 f +1438(.)X +1502(Based)X +1722(on)X +1826(the)X +1948(designs)X +432 1110(of)N +2 f +521(sdbm)X +1 f +712(and)X +2 f +850(gdbm)X +1 f +1028(,)X +1070(they)X +1230(are)X +1351(expected)X +1659(to)X +1743(perform)X +2024(simi-)X +432 1198(larly)N +605(to)X +2 f +693(ndbm)X +1 f +871(,)X +917(and)X +1059(we)X +1179(do)X +1285(not)X +1413(show)X +1608(their)X +1781(performance)X +432 1286(numbers.)N +800(The)X +977(second)X +1252(suite)X +1454(contains)X +1772(the)X +1921(memory)X +432 1374(resident)N +712(test)X +849(which)X +1071(does)X +1243(not)X +1370(require)X +1623(that)X +1768(the)X +1891(\256les)X +2049(ever)X +432 1462(be)N +533(written)X +784(to)X +870(disk,)X +1047(only)X +1213(that)X +1357(hash)X +1528(tables)X +1739(may)X +1901(be)X +2001(mani-)X +432 1550(pulated)N +692(in)X +778(main)X +961(memory.)X +1291(In)X +1381(this)X +1519(test,)X +1673(we)X +1790(compare)X +2090(the)X +432 1638(performance)N +859(to)X +941(that)X +1081(of)X +1168(the)X +2 f +1286(hsearch)X +1 f +1560(routines.)X +604 1752(For)N +760(both)X +947(suites,)X +1194(two)X +1358(different)X +1679(databases)X +2031(were)X +432 1840(used.)N +656(The)X +818(\256rst)X +979(is)X +1069(the)X +1204(dictionary)X +1566(database)X +1880(described)X +432 1928(previously.)N +836(The)X +987(second)X +1236(was)X +1386(constructed)X +1781(from)X +1962(a)X +2023(pass-)X +432 2016(word)N +647(\256le)X +799(with)X +990(approximately)X +1502(300)X +1671(accounts.)X +2041(Two)X +432 2104(records)N +700(were)X +887(constructed)X +1287(for)X +1411(each)X +1589(account.)X +1909(The)X +2064(\256rst)X +432 2192(used)N +604(the)X +727(logname)X +1028(as)X +1120(the)X +1243(key)X +1384(and)X +1525(the)X +1648(remainder)X +1999(of)X +2090(the)X +432 2280(password)N +768(entry)X +965(for)X +1091(the)X +1221(data.)X +1427(The)X +1584(second)X +1839(was)X +1996(keyed)X +432 2368(by)N +541(uid)X +672(and)X +817(contained)X +1157(the)X +1283(entire)X +1494(password)X +1825(entry)X +2018(as)X +2113(its)X +432 2456(data)N +589(\256eld.)X +794(The)X +942(tests)X +1107(were)X +1287(all)X +1389(run)X +1518(on)X +1620(the)X +1740(HP)X +1864(9000)X +2046(with)X +432 2544(the)N +574(same)X +783(con\256guration)X +1254(previously)X +1636(described.)X +2027(Each)X +432 2632(test)N +576(was)X +734(run)X +874(\256ve)X +1027(times)X +1232(and)X +1380(the)X +1510(timing)X +1750(results)X +1991(of)X +2090(the)X +432 2720(runs)N +602(were)X +791(averaged.)X +1154(The)X +1311(variance)X +1616(across)X +1849(the)X +1979(5)X +2050(runs)X +432 2808(was)N +591(approximately)X +1088(1%)X +1229(of)X +1330(the)X +1462(average)X +1746(yielding)X +2041(95%)X +432 2896(con\256dence)N +800(intervals)X +1096(of)X +1183(approximately)X +1666(2%.)X +3 f +1021 3050(Disk)N +1196(Based)X +1420(Tests)X +1 f +604 3182(In)N +693(these)X +880(tests,)X +1064(we)X +1180(use)X +1308(a)X +1365(bucket)X +1600(size)X +1746(of)X +1834(1024)X +2015(and)X +2152(a)X +432 3270(\256ll)N +540(factor)X +748(of)X +835(32.)X +3 f +432 3384(create)N +663(test)X +1 f +547 3498(The)N +703(keys)X +881(are)X +1011(entered)X +1279(into)X +1433(the)X +1561(hash)X +1738(table,)X +1944(and)X +2090(the)X +547 3586(\256le)N +669(is)X +742(\257ushed)X +993(to)X +1075(disk.)X +3 f +432 3700(read)N +608(test)X +1 f +547 3814(A)N +640(lookup)X +897(is)X +984(performed)X +1353(for)X +1481(each)X +1663(key)X +1813(in)X +1909(the)X +2041(hash)X +547 3902(table.)N +3 f +432 4016(verify)N +653(test)X +1 f +547 4130(A)N +640(lookup)X +897(is)X +984(performed)X +1353(for)X +1481(each)X +1663(key)X +1813(in)X +1909(the)X +2041(hash)X +547 4218(table,)N +759(and)X +911(the)X +1045(data)X +1215(returned)X +1519(is)X +1608(compared)X +1961(against)X +547 4306(that)N +687(originally)X +1018(stored)X +1234(in)X +1316(the)X +1434(hash)X +1601(table.)X +3 f +432 4420(sequential)N +798(retrieve)X +1 f +547 4534(All)N +674(keys)X +846(are)X +970(retrieved)X +1281(in)X +1367(sequential)X +1716(order)X +1910(from)X +2090(the)X +547 4622(hash)N +724(table.)X +950(The)X +2 f +1105(ndbm)X +1 f +1313(interface)X +1625(allows)X +1863(sequential)X +547 4710(retrieval)N +848(of)X +948(the)X +1079(keys)X +1259(from)X +1448(the)X +1578(database,)X +1907(but)X +2041(does)X +547 4798(not)N +701(return)X +945(the)X +1094(data)X +1279(associated)X +1660(with)X +1853(each)X +2052(key.)X +547 4886(Therefore,)N +929(we)X +1067(compare)X +1388(the)X +1530(performance)X +1980(of)X +2090(the)X +547 4974(new)N +703(package)X +989(to)X +1073(two)X +1215(different)X +1514(runs)X +1674(of)X +2 f +1763(ndbm)X +1 f +1941(.)X +2002(In)X +2090(the)X +547 5062(\256rst)N +697(case,)X +2 f +882(ndbm)X +1 f +1086(returns)X +1335(only)X +1503(the)X +1627(keys)X +1800(while)X +2003(in)X +2090(the)X +547 5150(second,)N +2 f +823(ndbm)X +1 f +1034(returns)X +1290(both)X +1465(the)X +1596(keys)X +1776(and)X +1924(the)X +2054(data)X +547 5238(\(requiring)N +894(a)X +956(second)X +1204(call)X +1345(to)X +1432(the)X +1555(library\).)X +1861(There)X +2074(is)X +2152(a)X +547 5326(single)N +764(run)X +897(for)X +1017(the)X +1141(new)X +1300(library)X +1539(since)X +1729(it)X +1798(returns)X +2046(both)X +547 5414(the)N +665(key)X +801(and)X +937(the)X +1055(data.)X +3 f +3014 538(In-Memory)N +3431(Test)X +1 f +2590 670(This)N +2757(test)X +2892(uses)X +3054(a)X +3114(bucket)X +3352(size)X +3501(of)X +3592(256)X +3736(and)X +3876(a)X +3936(\256ll)X +4048(fac-)X +2418 758(tor)N +2527(of)X +2614(8.)X +3 f +2418 872(create/read)N +2827(test)X +1 f +2533 986(In)N +2627(this)X +2769(test,)X +2927(a)X +2989(hash)X +3162(table)X +3344(is)X +3423(created)X +3682(by)X +3788(inserting)X +4094(all)X +2533 1074(the)N +2660(key/data)X +2961(pairs.)X +3186(Then)X +3380(a)X +3445(keyed)X +3666(retrieval)X +3963(is)X +4044(per-)X +2533 1162(formed)N +2801(for)X +2931(each)X +3115(pair,)X +3295(and)X +3446(the)X +3579(hash)X +3761(table)X +3952(is)X +4040(des-)X +2533 1250(troyed.)N +3 f +2938 1404(Performance)N +3405(Results)X +1 f +2590 1536(Figures)N +2866(8a)X +2978(and)X +3130(8b)X +3246(show)X +3451(the)X +3585(user)X +3755(time,)X +3952(system)X +2418 1624(time,)N +2608(and)X +2752(elapsed)X +3021(time)X +3191(for)X +3312(each)X +3487(test)X +3625(for)X +3746(both)X +3915(the)X +4040(new)X +2418 1712(implementation)N +2951(and)X +3098(the)X +3227(old)X +3360(implementation)X +3893(\()X +2 f +3920(hsearch)X +1 f +2418 1800(or)N +2 f +2528(ndbm)X +1 f +2706(,)X +2769(whichever)X +3147(is)X +3243(appropriate\))X +3678(as)X +3787(well)X +3967(as)X +4076(the)X +2418 1888(improvement.)N +2929(The)X +3098(improvement)X +3569(is)X +3666(expressed)X +4027(as)X +4138(a)X +2418 1976(percentage)N +2787(of)X +2874(the)X +2992(old)X +3114(running)X +3383(time:)X +0 f +8 s +2418 2275(%)N +2494(=)X +2570(100)X +2722(*)X +2798 -0.4219(\(old_time)AX +3178(-)X +3254 -0.4219(new_time\))AX +3634(/)X +3710(old_time)X +1 f +10 s +2590 2600(In)N +2700(nearly)X +2944(all)X +3067(cases,)X +3299(the)X +3439(new)X +3615(routines)X +3915(perform)X +2418 2688(better)N +2628(than)X +2793(the)X +2918(old)X +3047(routines)X +3332(\(both)X +2 f +3527(hsearch)X +1 f +3807(and)X +2 f +3949(ndbm)X +1 f +4127(\).)X +2418 2776(Although)N +2755(the)X +3 f +2888(create)X +1 f +3134(tests)X +3311(exhibit)X +3567(superior)X +3864(user)X +4032(time)X +2418 2864(performance,)N +2869(the)X +2991(test)X +3126(time)X +3292(is)X +3369(dominated)X +3731(by)X +3834(the)X +3955(cost)X +4107(of)X +2418 2952(writing)N +2677(the)X +2803(actual)X +3023(\256le)X +3153(to)X +3243(disk.)X +3444(For)X +3583(the)X +3709(large)X +3897(database)X +2418 3040(\(the)N +2564(dictionary\),)X +2957(this)X +3093(completely)X +3470(overwhelmed)X +3927(the)X +4045(sys-)X +2418 3128(tem)N +2570(time.)X +2783(However,)X +3129(for)X +3254(the)X +3383(small)X +3587(data)X +3752(base,)X +3946(we)X +4071(see)X +2418 3216(that)N +2569(differences)X +2958(in)X +3051(both)X +3224(user)X +3389(and)X +3536(system)X +3788(time)X +3960(contri-)X +2418 3304(bute)N +2576(to)X +2658(the)X +2776(superior)X +3059(performance)X +3486(of)X +3573(the)X +3691(new)X +3845(package.)X +2590 3418(The)N +3 f +2764(read)X +1 f +2920(,)X +3 f +2989(verify)X +1 f +3190(,)X +3259(and)X +3 f +3424(sequential)X +1 f +3818(results)X +4075(are)X +2418 3506(deceptive)N +2758(for)X +2883(the)X +3012(small)X +3216(database)X +3524(since)X +3720(the)X +3849(entire)X +4063(test)X +2418 3594(ran)N +2551(in)X +2643(under)X +2856(a)X +2922(second.)X +3215(However,)X +3560(on)X +3669(the)X +3796(larger)X +4013(data-)X +2418 3682(base)N +2590(the)X +3 f +2716(read)X +1 f +2900(and)X +3 f +3044(verify)X +1 f +3273(tests)X +3443(bene\256t)X +3689(from)X +3873(the)X +3999(cach-)X +2418 3770(ing)N +2546(of)X +2639(buckets)X +2910(in)X +2998(the)X +3122(new)X +3282(package)X +3571(to)X +3658(improve)X +3950(perfor-)X +2418 3858(mance)N +2666(by)X +2784(over)X +2965(80%.)X +3169(Since)X +3384(the)X +3519(\256rst)X +3 f +3680(sequential)X +1 f +4063(test)X +2418 3946(does)N +2598(not)X +2733(require)X +2 f +2994(ndbm)X +1 f +3205(to)X +3299(return)X +3523(the)X +3653(data)X +3819(values,)X +4076(the)X +2418 4034(user)N +2573(time)X +2735(is)X +2808(lower)X +3011(than)X +3169(for)X +3283(the)X +3401(new)X +3555(package.)X +3879(However)X +2418 4122(when)N +2613(we)X +2728(require)X +2977(both)X +3139(packages)X +3454(to)X +3536(return)X +3748(data,)X +3922(the)X +4040(new)X +2418 4210(package)N +2702(excels)X +2923(in)X +3005(all)X +3105(three)X +3286(timings.)X +2590 4324(The)N +2773(small)X +3003(database)X +3337(runs)X +3532(so)X +3660(quickly)X +3957(in)X +4076(the)X +2418 4412(memory-resident)N +3000(case)X +3173(that)X +3326(the)X +3457(results)X +3699(are)X +3831(uninterest-)X +2418 4500(ing.)N +2589(However,)X +2933(for)X +3056(the)X +3183(larger)X +3400(database)X +3706(the)X +3833(new)X +3995(pack-)X +2418 4588(age)N +2567(pays)X +2751(a)X +2824(small)X +3033(penalty)X +3305(in)X +3403(system)X +3661(time)X +3839(because)X +4130(it)X +2418 4676(limits)N +2636(its)X +2748(main)X +2944(memory)X +3247(utilization)X +3607(and)X +3759(swaps)X +3991(pages)X +2418 4764(out)N +2550(to)X +2642(temporary)X +3002(storage)X +3264(in)X +3356(the)X +3484(\256le)X +3616(system)X +3868(while)X +4076(the)X +2 f +2418 4852(hsearch)N +1 f +2698(package)X +2988(requires)X +3273(that)X +3419(the)X +3543(application)X +3924(allocate)X +2418 4940(enough)N +2692(space)X +2909(for)X +3041(all)X +3159(key/data)X +3468(pair.)X +3670(However,)X +4022(even)X +2418 5028(with)N +2600(the)X +2738(system)X +3000(time)X +3182(penalty,)X +3477(the)X +3614(resulting)X +3933(elapsed)X +2418 5116(time)N +2580(improves)X +2898(by)X +2998(over)X +3161(50%.)X +3 f +432 5960(10)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +11 p +%%Page: 11 11 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +10 f +908 454(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +1379 546(hash)N +1652(ndbm)X +1950(%change)X +1 f +10 f +908 550(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 642(CREATE)N +10 f +908 646(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 738(user)N +1424(6.4)X +1671(12.2)X +2073(48)X +1157 826(sys)N +1384(32.5)X +1671(34.7)X +2113(6)X +3 f +1006 914(elapsed)N +10 f +1310 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +1384 914(90.4)N +10 f +1581 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +1671 914(99.6)N +10 f +1883 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +2113 914(9)N +1 f +10 f +908 910(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 926(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1010(READ)N +10 f +908 1014(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1106(user)N +1424(3.4)X +1711(6.1)X +2073(44)X +1157 1194(sys)N +1424(1.2)X +1671(15.3)X +2073(92)X +3 f +1006 1282(elapsed)N +10 f +1310 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +1424 1282(4.0)N +10 f +1581 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +1671 1282(21.2)N +10 f +1883 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +2073 1282(81)N +1 f +10 f +908 1278(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 1294(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1378(VERIFY)N +10 f +908 1382(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1474(user)N +1424(3.5)X +1711(6.3)X +2073(44)X +1157 1562(sys)N +1424(1.2)X +1671(15.3)X +2073(92)X +3 f +1006 1650(elapsed)N +10 f +1310 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +1424 1650(4.0)N +10 f +1581 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +1671 1650(21.2)N +10 f +1883 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +2073 1650(81)N +1 f +10 f +908 1646(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 1662(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1746(SEQUENTIAL)N +10 f +908 1750(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1842(user)N +1424(2.7)X +1711(1.9)X +2046(-42)X +1157 1930(sys)N +1424(0.7)X +1711(3.9)X +2073(82)X +3 f +1006 2018(elapsed)N +10 f +1310 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +1424 2018(3.0)N +10 f +1581 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +1711 2018(5.0)N +10 f +1883 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +2073 2018(40)N +1 f +10 f +908 2014(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 2030(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 2114(SEQUENTIAL)N +1467(\(with)X +1656(data)X +1810(retrieval\))X +10 f +908 2118(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 2210(user)N +1424(2.7)X +1711(8.2)X +2073(67)X +1157 2298(sys)N +1424(0.7)X +1711(4.3)X +2073(84)X +3 f +1006 2386(elapsed)N +1424(3.0)X +1671(12.0)X +2073(75)X +1 f +10 f +908 2390(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +899 2394(c)N +2378(c)Y +2298(c)Y +2218(c)Y +2138(c)Y +2058(c)Y +1978(c)Y +1898(c)Y +1818(c)Y +1738(c)Y +1658(c)Y +1578(c)Y +1498(c)Y +1418(c)Y +1338(c)Y +1258(c)Y +1178(c)Y +1098(c)Y +1018(c)Y +938(c)Y +858(c)Y +778(c)Y +698(c)Y +618(c)Y +538(c)Y +1310 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +1581 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +1883 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +2278 2394(c)N +2378(c)Y +2298(c)Y +2218(c)Y +2138(c)Y +2058(c)Y +1978(c)Y +1898(c)Y +1818(c)Y +1738(c)Y +1658(c)Y +1578(c)Y +1498(c)Y +1418(c)Y +1338(c)Y +1258(c)Y +1178(c)Y +1098(c)Y +1018(c)Y +938(c)Y +858(c)Y +778(c)Y +698(c)Y +618(c)Y +538(c)Y +905 2574(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +1318 2666(hash)N +1585(hsearch)X +1953(%change)X +1 f +10 f +905 2670(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +945 2762(CREATE/READ)N +10 f +905 2766(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1064 2858(user)N +1343(6.6)X +1642(17.2)X +2096(62)X +1096 2946(sys)N +1343(1.1)X +1682(0.3)X +2029(-266)X +3 f +945 3034(elapsed)N +1343(7.8)X +1642(17.0)X +2096(54)X +1 f +10 f +905 3038(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +896 3050(c)N +2978(c)Y +2898(c)Y +2818(c)Y +2738(c)Y +2658(c)Y +1249 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +1520 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +1886 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +2281 3050(c)N +2978(c)Y +2898(c)Y +2818(c)Y +2738(c)Y +2658(c)Y +3 f +720 3174(Figure)N +967(8a:)X +1 f +1094(Timing)X +1349(results)X +1578(for)X +1692(the)X +1810(dictionary)X +2155(database.)X +10 f +720 3262 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1407 3504(Conclusion)N +1 f +892 3636(This)N +1063(paper)X +1271(has)X +1407(presented)X +1744(the)X +1871(design,)X +2129(implemen-)X +720 3724(tation)N +928(and)X +1070(performance)X +1503(of)X +1596(a)X +1658(new)X +1818(hashing)X +2093(package)X +2382(for)X +720 3812(UNIX.)N +993(The)X +1150(new)X +1316(package)X +1612(provides)X +1919(a)X +1986(superset)X +2280(of)X +2378(the)X +720 3900(functionality)N +1159(of)X +1255(existing)X +1537(hashing)X +1815(packages)X +2139(and)X +2284(incor-)X +720 3988(porates)N +975(additional)X +1318(features)X +1596(such)X +1766(as)X +1855(large)X +2038(key)X +2176(handling,)X +720 4076(user)N +876(de\256ned)X +1134(hash)X +1302(functions,)X +1641(multiple)X +1928(hash)X +2096(tables,)X +2324(vari-)X +720 4164(able)N +894(sized)X +1099(pages,)X +1342(and)X +1498(linear)X +1721(hashing.)X +2050(In)X +2156(nearly)X +2396(all)X +720 4252(cases,)N +954(the)X +1096(new)X +1274(package)X +1582(provides)X +1902(improved)X +2252(perfor-)X +720 4340(mance)N +974(on)X +1098(the)X +1240(order)X +1454(of)X +1565(50-80%)X +1863(for)X +2001(the)X +2142(workloads)X +720 4428(shown.)N +990(Applications)X +1420(such)X +1588(as)X +1676(the)X +1794(loader,)X +2035(compiler,)X +2360(and)X +720 4516(mail,)N +921(which)X +1156(currently)X +1485(implement)X +1866(their)X +2051(own)X +2227(hashing)X +720 4604(routines,)N +1032(should)X +1279(be)X +1389(modi\256ed)X +1706(to)X +1801(use)X +1941(the)X +2072(generic)X +2342(rou-)X +720 4692(tines.)N +892 4806(This)N +1087(hashing)X +1389(package)X +1705(is)X +1810(one)X +1978(access)X +2236(method)X +720 4894(which)N +953(is)X +1043(part)X +1205(of)X +1309(a)X +1382(generic)X +1656(database)X +1970(access)X +2212(package)X +720 4982(being)N +955(developed)X +1342(at)X +1457(the)X +1612(University)X +2007(of)X +2131(California,)X +720 5070(Berkeley.)N +1089(It)X +1177(will)X +1340(include)X +1614(a)X +1688(btree)X +1887(access)X +2131(method)X +2409(as)X +720 5158(well)N +916(as)X +1041(\256xed)X +1259(and)X +1433(variable)X +1750(length)X +2007(record)X +2270(access)X +720 5246(methods)N +1024(in)X +1119(addition)X +1414(to)X +1509(the)X +1640(hashed)X +1896(support)X +2168(presented)X +720 5334(here.)N +948(All)X +1099(of)X +1215(the)X +1361(access)X +1615(methods)X +1934(are)X +2081(based)X +2312(on)X +2440(a)X +720 5422(key/data)N +1037(pair)X +1207(interface)X +1533(and)X +1693(appear)X +1952(identical)X +2272(to)X +2378(the)X +720 5510(application)N +1121(layer,)X +1347(allowing)X +1671(application)X +2071(implementa-)X +720 5598(tions)N +906(to)X +999(be)X +1106(largely)X +1360(independent)X +1783(of)X +1881(the)X +2010(database)X +2318(type.)X +720 5686(The)N +873(package)X +1165(is)X +1246(expected)X +1560(to)X +1650(be)X +1754(an)X +1858(integral)X +2131(part)X +2284(of)X +2378(the)X +2706 538(4.4BSD)N +3006(system,)X +3293(with)X +3479(various)X +3759(standard)X +4075(applications)X +2706 626(such)N +2879(as)X +2972(more\(1\),)X +3277(sort\(1\))X +3517(and)X +3659(vi\(1\))X +3841(based)X +4050(on)X +4156(it.)X +4266(While)X +2706 714(the)N +2833(current)X +3089(design)X +3326(does)X +3501(not)X +3631(support)X +3899(multi-user)X +4256(access)X +2706 802(or)N +2804(transactions,)X +3238(they)X +3407(could)X +3616(be)X +3723(incorporated)X +4159(relatively)X +2706 890(easily.)N +10 f +2894 938(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +3365 1030(hash)N +3638(ndbm)X +3936(%change)X +1 f +10 f +2894 1034(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1126(CREATE)N +10 f +2894 1130(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1222(user)N +3390(0.2)X +3677(0.4)X +4079(50)X +3143 1310(sys)N +3390(0.1)X +3677(1.0)X +4079(90)X +3 f +2992 1398(elapsed)N +10 f +3296 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +3390 1398(0)N +10 f +3567 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +3677 1398(3.2)N +10 f +3869 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +4039 1398(100)N +1 f +10 f +2894 1394(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 1410(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1494(READ)N +10 f +2894 1498(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1590(user)N +3390(0.1)X +3677(0.1)X +4119(0)X +3143 1678(sys)N +3390(0.1)X +3677(0.4)X +4079(75)X +3 f +2992 1766(elapsed)N +10 f +3296 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +3390 1766(0.0)N +10 f +3567 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +3677 1766(0.0)N +10 f +3869 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +4119 1766(0)N +1 f +10 f +2894 1762(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 1778(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1862(VERIFY)N +10 f +2894 1866(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1958(user)N +3390(0.1)X +3677(0.2)X +4079(50)X +3143 2046(sys)N +3390(0.1)X +3677(0.3)X +4079(67)X +3 f +2992 2134(elapsed)N +10 f +3296 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +3390 2134(0.0)N +10 f +3567 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +3677 2134(0.0)N +10 f +3869 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +4119 2134(0)N +1 f +10 f +2894 2130(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 2146(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 2230(SEQUENTIAL)N +10 f +2894 2234(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 2326(user)N +3390(0.1)X +3677(0.0)X +4012(-100)X +3143 2414(sys)N +3390(0.1)X +3677(0.1)X +4119(0)X +3 f +2992 2502(elapsed)N +10 f +3296 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +3390 2502(0.0)N +10 f +3567 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +3677 2502(0.0)N +10 f +3869 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +4119 2502(0)N +1 f +10 f +2894 2498(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 2514(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 2598(SEQUENTIAL)N +3453(\(with)X +3642(data)X +3796(retrieval\))X +10 f +2894 2602(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 2694(user)N +3390(0.1)X +3677(0.1)X +4119(0)X +3143 2782(sys)N +3390(0.1)X +3677(0.1)X +4119(0)X +3 f +2992 2870(elapsed)N +3390(0.0)X +3677(0.0)X +4119(0)X +1 f +10 f +2894 2874(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2885 2878(c)N +2862(c)Y +2782(c)Y +2702(c)Y +2622(c)Y +2542(c)Y +2462(c)Y +2382(c)Y +2302(c)Y +2222(c)Y +2142(c)Y +2062(c)Y +1982(c)Y +1902(c)Y +1822(c)Y +1742(c)Y +1662(c)Y +1582(c)Y +1502(c)Y +1422(c)Y +1342(c)Y +1262(c)Y +1182(c)Y +1102(c)Y +1022(c)Y +3296 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +3567 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +3869 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +4264 2878(c)N +2862(c)Y +2782(c)Y +2702(c)Y +2622(c)Y +2542(c)Y +2462(c)Y +2382(c)Y +2302(c)Y +2222(c)Y +2142(c)Y +2062(c)Y +1982(c)Y +1902(c)Y +1822(c)Y +1742(c)Y +1662(c)Y +1582(c)Y +1502(c)Y +1422(c)Y +1342(c)Y +1262(c)Y +1182(c)Y +1102(c)Y +1022(c)Y +2891 3058(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +3304 3150(hash)N +3571(hsearch)X +3939(%change)X +1 f +10 f +2891 3154(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2931 3246(CREATE/READ)N +10 f +2891 3250(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3050 3342(user)N +3329(0.3)X +3648(0.4)X +4048(25)X +3082 3430(sys)N +3329(0.0)X +3648(0.0)X +4088(0)X +3 f +2931 3518(elapsed)N +3329(0.0)X +3648(0.0)X +4088(0)X +1 f +10 f +2891 3522(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2882 3534(c)N +3462(c)Y +3382(c)Y +3302(c)Y +3222(c)Y +3142(c)Y +3235 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +3506 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +3872 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +4267 3534(c)N +3462(c)Y +3382(c)Y +3302(c)Y +3222(c)Y +3142(c)Y +3 f +2706 3658(Figure)N +2953(8b:)X +1 f +3084(Timing)X +3339(results)X +3568(for)X +3682(the)X +3800(password)X +4123(database.)X +10 f +2706 3746 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +3396 3988(References)N +1 f +2706 4120([ATT79])N +3058(AT&T,)X +3358(DBM\(3X\),)X +2 f +3773(Unix)X +3990(Programmer's)X +2878 4208(Manual,)N +3194(Seventh)X +3491(Edition,)X +3793(Volume)X +4085(1)X +1 f +(,)S +4192(January,)X +2878 4296(1979.)N +2706 4472([ATT85])N +3027(AT&T,)X +3296(HSEARCH\(BA_LIB\),)X +2 f +4053(Unix)X +4239(System)X +2878 4560(User's)N +3112(Manual,)X +3401(System)X +3644(V.3)X +1 f +3753(,)X +3793(pp.)X +3913(506-508,)X +4220(1985.)X +2706 4736([BRE73])N +3025(Brent,)X +3253(Richard)X +3537(P.,)X +3651(``Reducing)X +4041(the)X +4168(Retrieval)X +2878 4824(Time)N +3071(of)X +3162(Scatter)X +3409(Storage)X +3678(Techniques'',)X +2 f +4146(Commun-)X +2878 4912(ications)N +3175(of)X +3281(the)X +3422(ACM)X +1 f +3591(,)X +3654(Volume)X +3955(16,)X +4098(No.)X +4259(2,)X +4362(pp.)X +2878 5000(105-109,)N +3185(February,)X +3515(1973.)X +2706 5176([BSD86])N +3055(NDBM\(3\),)X +2 f +3469(4.3BSD)X +3775(Unix)X +3990(Programmer's)X +2878 5264(Manual)N +3155(Reference)X +3505(Guide)X +1 f +3701(,)X +3749(University)X +4114(of)X +4208(Califor-)X +2878 5352(nia,)N +3016(Berkeley,)X +3346(1986.)X +2706 5528([ENB88])N +3025(Enbody,)X +3319(R.)X +3417(J.,)X +3533(Du,)X +3676(H.)X +3779(C.,)X +3897(``Dynamic)X +4270(Hash-)X +2878 5616(ing)N +3034(Schemes'',)X +2 f +3427(ACM)X +3630(Computing)X +4019(Surveys)X +1 f +4269(,)X +4322(Vol.)X +2878 5704(20,)N +2998(No.)X +3136(2,)X +3216(pp.)X +3336(85-113,)X +3603(June)X +3770(1988.)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4384(11)X + +12 p +%%Page: 12 12 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538([FAG79])N +776(Ronald)X +1057(Fagin,)X +1308(Jurg)X +1495(Nievergelt,)X +1903(Nicholas)X +604 626(Pippenger,)N +1003(H.)X +1135(Raymond)X +1500(Strong,)X +1787(``Extendible)X +604 714(Hashing)N +901(--)X +985(A)X +1073(Fast)X +1236(Access)X +1493(Method)X +1771(for)X +1894(Dynamic)X +604 802(Files'',)N +2 f +855(ACM)X +1046(Transactions)X +1485(on)X +1586(Database)X +1914(Systems)X +1 f +2168(,)X +604 890(Volume)N +882(4,)X +962(No.)X +1100(3.,)X +1200(September)X +1563(1979,)X +1763(pp)X +1863(315-34)X +432 1066([KNU68],)N +802(Knuth,)X +1064(D.E.,)X +2 f +1273(The)X +1434(Art)X +1577(of)X +1680(Computer)X +2041(Pro-)X +604 1154(gramming)N +971(Vol.)X +1140(3:)X +1245(Sorting)X +1518(and)X +1676(Searching)X +1 f +2001(,)X +2058(sec-)X +604 1242(tions)N +779(6.3-6.4,)X +1046(pp)X +1146(481-550.)X +432 1418([LAR78])N +747(Larson,)X +1011(Per-Ake,)X +1319(``Dynamic)X +1687(Hashing'',)X +2 f +2048(BIT)X +1 f +(,)S +604 1506(Vol.)N +764(18,)X +884(1978,)X +1084(pp.)X +1204(184-201.)X +432 1682([LAR88])N +752(Larson,)X +1021(Per-Ake,)X +1335(``Dynamic)X +1709(Hash)X +1900(Tables'',)X +2 f +604 1770(Communications)N +1183(of)X +1281(the)X +1415(ACM)X +1 f +1584(,)X +1640(Volume)X +1934(31,)X +2070(No.)X +604 1858(4.,)N +704(April)X +893(1988,)X +1093(pp)X +1193(446-457.)X +432 2034([LIT80])N +731(Witold,)X +1013(Litwin,)X +1286(``Linear)X +1590(Hashing:)X +1939(A)X +2036(New)X +604 2122(Tool)N +786(for)X +911(File)X +1065(and)X +1211(Table)X +1424(Addressing'',)X +2 f +1893(Proceed-)X +604 2210(ings)N +761(of)X +847(the)X +969(6th)X +1095(International)X +1540(Conference)X +1933(on)X +2036(Very)X +604 2298(Large)N +815(Databases)X +1 f +1153(,)X +1193(1980.)X +432 2474([NEL90])N +743(Nelson,)X +1011(Philip)X +1222(A.,)X +2 f +1341(Gdbm)X +1558(1.4)X +1679(source)X +1913(distribu-)X +604 2562(tion)N +748(and)X +888(README)X +1 f +1209(,)X +1249(August)X +1500(1990.)X +432 2738([THOM90])N +840(Ken)X +1011(Thompson,)X +1410(private)X +1670(communication,)X +604 2826(Nov.)N +782(1990.)X +432 3002([TOR87])N +790(Torek,)X +1066(C.,)X +1222(``Re:)X +1470(dbm.a)X +1751(and)X +1950(ndbm.a)X +604 3090(archives'',)N +2 f +966(USENET)X +1279(newsgroup)X +1650(comp.unix)X +1 f +2002(1987.)X +432 3266([TOR88])N +760(Torek,)X +1006(C.,)X +1133(``Re:)X +1351(questions)X +1686(regarding)X +2027(data-)X +604 3354(bases)N +826(created)X +1106(with)X +1295(dbm)X +1484(and)X +1647(ndbm)X +1876(routines'')X +2 f +604 3442(USENET)N +937(newsgroup)X +1328(comp.unix.questions)X +1 f +1982(,)X +2041(June)X +604 3530(1988.)N +432 3706([WAL84])N +773(Wales,)X +1018(R.,)X +1135(``Discussion)X +1564(of)X +1655("dbm")X +1887(data)X +2045(base)X +604 3794(system'',)N +2 f +973(USENET)X +1339(newsgroup)X +1762(unix.wizards)X +1 f +2168(,)X +604 3882(January,)N +894(1984.)X +432 4058([YIG89])N +751(Ozan)X +963(S.)X +1069(Yigit,)X +1294(``How)X +1545(to)X +1648(Roll)X +1826(Your)X +2032(Own)X +604 4146(Dbm/Ndbm'',)N +2 f +1087(unpublished)X +1504(manuscript)X +1 f +(,)S +1910(Toronto,)X +604 4234(July,)N +777(1989)X +3 f +432 5960(12)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +13 p +%%Page: 13 13 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(Margo)N +960(I.)X +1033(Seltzer)X +1282(is)X +1361(a)X +1423(Ph.D.)X +1631(student)X +1887(in)X +1974(the)X +2097(Department)X +720 626(of)N +823(Electrical)X +1167(Engineering)X +1595(and)X +1747(Computer)X +2102(Sciences)X +2418(at)X +720 714(the)N +850(University)X +1220(of)X +1318(California,)X +1694(Berkeley.)X +2055(Her)X +2207(research)X +720 802(interests)N +1017(include)X +1283(\256le)X +1415(systems,)X +1718(databases,)X +2076(and)X +2221(transac-)X +720 890(tion)N +896(processing)X +1291(systems.)X +1636(She)X +1807(spent)X +2027(several)X +2306(years)X +720 978(working)N +1026(at)X +1123(startup)X +1380(companies)X +1762(designing)X +2112(and)X +2267(imple-)X +720 1066(menting)N +1048(\256le)X +1216(systems)X +1535(and)X +1716(transaction)X +2133(processing)X +720 1154(software)N +1026(and)X +1170(designing)X +1509(microprocessors.)X +2103(Ms.)X +2253(Seltzer)X +720 1242(received)N +1057(her)X +1223(AB)X +1397(in)X +1522(Applied)X +1843(Mathematics)X +2320(from)X +720 1330 0.1953(Harvard/Radcliffe)AN +1325(College)X +1594(in)X +1676(1983.)X +720 1444(In)N +810(her)X +936(spare)X +1129(time,)X +1313(Margo)X +1549(can)X +1683(usually)X +1936(be)X +2034(found)X +2243(prepar-)X +720 1532(ing)N +868(massive)X +1171(quantities)X +1527(of)X +1639(food)X +1831(for)X +1970(hungry)X +2242(hoards,)X +720 1620(studying)N +1022(Japanese,)X +1355(or)X +1449(playing)X +1716(soccer)X +1948(with)X +2116(an)X +2218(exciting)X +720 1708(Bay)N +912(Area)X +1132(Women's)X +1507(Soccer)X +1788(team,)X +2026(the)X +2186(Berkeley)X +720 1796(Bruisers.)N +720 1910(Ozan)N +915(\()X +3 f +942(Oz)X +1 f +1040(\))X +1092(Yigit)X +1281(is)X +1358(currently)X +1672(a)X +1732(software)X +2033(engineer)X +2334(with)X +720 1998(the)N +886(Communications)X +1499(Research)X +1861(and)X +2044(Development)X +720 2086(group,)N +948(Computing)X +1328(Services,)X +1641(York)X +1826(University.)X +2224(His)X +2355(for-)X +720 2174(mative)N +967(years)X +1166(were)X +1352(also)X +1510(spent)X +1708(at)X +1795(York,)X +2009(where)X +2234(he)X +2338(held)X +720 2262(system)N +985(programmer)X +1425(and)X +1583(administrator)X +2052(positions)X +2382(for)X +720 2350(various)N +995(mixtures)X +1314(of)X +1420(of)X +1526(UNIX)X +1765(systems)X +2056(starting)X +2334(with)X +720 2438(Berkeley)N +1031(4.1)X +1151(in)X +1233(1982,)X +1433(while)X +1631(at)X +1709(the)X +1827(same)X +2012(time)X +2174(obtaining)X +720 2526(a)N +776(degree)X +1011(in)X +1093(Computer)X +1433(Science.)X +720 2640(In)N +813(his)X +931(copious)X +1205(free)X +1356(time,)X +1543(Oz)X +1662(enjoys)X +1896(working)X +2188(on)X +2293(what-)X +720 2728(ever)N +890(software)X +1197(looks)X +1400(interesting,)X +1788(which)X +2014(often)X +2209(includes)X +720 2816(language)N +1044(interpreters,)X +1464(preprocessors,)X +1960(and)X +2110(lately,)X +2342(pro-)X +720 2904(gram)N +905(generators)X +1260(and)X +1396(expert)X +1617(systems.)X +720 3018(Oz)N +836(has)X +964(authored)X +1266(several)X +1515(public-domain)X +2003(software)X +2301(tools,)X +720 3106(including)N +1069(an)X +1191(nroff-like)X +1545(text)X +1711(formatter)X +2 f +2056(proff)X +1 f +2257(that)X +2423(is)X +720 3194(apparently)N +1083(still)X +1226(used)X +1397(in)X +1483(some)X +1676(basement)X +2002(PCs.)X +2173(His)X +2307(latest)X +720 3282(obsessions)N +1143(include)X +1460(the)X +1639(incredible)X +2040(programming)X +720 3370(language)N +1030(Scheme,)X +1324(and)X +1460(Chinese)X +1738(Brush)X +1949(painting.)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4384(13)X + +14 p +%%Page: 14 14 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 5960(14)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +14 p +%%Trailer +xt + +xs diff --git a/src/util/db2/docs/libtp.usenix.ps b/src/util/db2/docs/libtp.usenix.ps new file mode 100644 index 0000000000..5b5ba6e1b8 --- /dev/null +++ b/src/util/db2/docs/libtp.usenix.ps @@ -0,0 +1,12340 @@ +%!PS-Adobe-1.0 +%%Creator: utopia:margo (& Seltzer,608-13E,8072,) +%%Title: stdin (ditroff) +%%CreationDate: Thu Dec 12 15:32:11 1991 +%%EndComments +% @(#)psdit.pro 1.3 4/15/88 +% lib/psdit.pro -- prolog for psdit (ditroff) files +% Copyright (c) 1984, 1985 Adobe Systems Incorporated. All Rights Reserved. +% last edit: shore Sat Nov 23 20:28:03 1985 +% RCSID: $Header$ + +% Changed by Edward Wang (edward@ucbarpa.berkeley.edu) to handle graphics, +% 17 Feb, 87. + +/$DITroff 140 dict def $DITroff begin +/fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def +/xi{0 72 11 mul translate 72 resolution div dup neg scale 0 0 moveto + /fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def F + /pagesave save def}def +/PB{save /psv exch def currentpoint translate + resolution 72 div dup neg scale 0 0 moveto}def +/PE{psv restore}def +/arctoobig 90 def /arctoosmall .05 def +/m1 matrix def /m2 matrix def /m3 matrix def /oldmat matrix def +/tan{dup sin exch cos div}def +/point{resolution 72 div mul}def +/dround {transform round exch round exch itransform}def +/xT{/devname exch def}def +/xr{/mh exch def /my exch def /resolution exch def}def +/xp{}def +/xs{docsave restore end}def +/xt{}def +/xf{/fontname exch def /slotno exch def fontnames slotno get fontname eq not + {fonts slotno fontname findfont put fontnames slotno fontname put}if}def +/xH{/fontheight exch def F}def +/xS{/fontslant exch def F}def +/s{/fontsize exch def /fontheight fontsize def F}def +/f{/fontnum exch def F}def +/F{fontheight 0 le{/fontheight fontsize def}if + fonts fontnum get fontsize point 0 0 fontheight point neg 0 0 m1 astore + fontslant 0 ne{1 0 fontslant tan 1 0 0 m2 astore m3 concatmatrix}if + makefont setfont .04 fontsize point mul 0 dround pop setlinewidth}def +/X{exch currentpoint exch pop moveto show}def +/N{3 1 roll moveto show}def +/Y{exch currentpoint pop exch moveto show}def +/S{show}def +/ditpush{}def/ditpop{}def +/AX{3 -1 roll currentpoint exch pop moveto 0 exch ashow}def +/AN{4 2 roll moveto 0 exch ashow}def +/AY{3 -1 roll currentpoint pop exch moveto 0 exch ashow}def +/AS{0 exch ashow}def +/MX{currentpoint exch pop moveto}def +/MY{currentpoint pop exch moveto}def +/MXY{moveto}def +/cb{pop}def % action on unknown char -- nothing for now +/n{}def/w{}def +/p{pop showpage pagesave restore /pagesave save def}def +/Dt{/Dlinewidth exch def}def 1 Dt +/Ds{/Ddash exch def}def -1 Ds +/Di{/Dstipple exch def}def 1 Di +/Dsetlinewidth{2 Dlinewidth mul setlinewidth}def +/Dsetdash{Ddash 4 eq{[8 12]}{Ddash 16 eq{[32 36]} + {Ddash 20 eq{[32 12 8 12]}{[]}ifelse}ifelse}ifelse 0 setdash}def +/Dstroke{gsave Dsetlinewidth Dsetdash 1 setlinecap stroke grestore + currentpoint newpath moveto}def +/Dl{rlineto Dstroke}def +/arcellipse{/diamv exch def /diamh exch def oldmat currentmatrix pop + currentpoint translate 1 diamv diamh div scale /rad diamh 2 div def + currentpoint exch rad add exch rad -180 180 arc oldmat setmatrix}def +/Dc{dup arcellipse Dstroke}def +/De{arcellipse Dstroke}def +/Da{/endv exch def /endh exch def /centerv exch def /centerh exch def + /cradius centerv centerv mul centerh centerh mul add sqrt def + /eradius endv endv mul endh endh mul add sqrt def + /endang endv endh atan def + /startang centerv neg centerh neg atan def + /sweep startang endang sub dup 0 lt{360 add}if def + sweep arctoobig gt + {/midang startang sweep 2 div sub def /midrad cradius eradius add 2 div def + /midh midang cos midrad mul def /midv midang sin midrad mul def + midh neg midv neg endh endv centerh centerv midh midv Da + Da} + {sweep arctoosmall ge + {/controldelt 1 sweep 2 div cos sub 3 sweep 2 div sin mul div 4 mul def + centerv neg controldelt mul centerh controldelt mul + endv neg controldelt mul centerh add endh add + endh controldelt mul centerv add endv add + centerh endh add centerv endv add rcurveto Dstroke} + {centerh endh add centerv endv add rlineto Dstroke} + ifelse} + ifelse}def +/Dpatterns[ +[%cf[widthbits] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000103810000000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000001038100000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0042660000246600>] +[8<0000990000990000>] +[8<0804020180402010>] +[8<2418814242811824>] +[8<6699996666999966>] +[8<8000000008000000>] +[8<00001c3e363e1c00>] +[8<0000000000000000>] +[32<00000040000000c00000004000000040000000e0000000000000000000000000>] +[32<00000000000060000000900000002000000040000000f0000000000000000000>] +[32<000000000000000000e0000000100000006000000010000000e0000000000000>] +[32<00000000000000002000000060000000a0000000f00000002000000000000000>] +[32<0000000e0000000000000000000000000000000f000000080000000e00000001>] +[32<0000090000000600000000000000000000000000000007000000080000000e00>] +[32<00010000000200000004000000040000000000000000000000000000000f0000>] +[32<0900000006000000090000000600000000000000000000000000000006000000>]] +[%ug +[8<0000020000000000>] +[8<0000020000002000>] +[8<0004020000002000>] +[8<0004020000402000>] +[8<0004060000402000>] +[8<0004060000406000>] +[8<0006060000406000>] +[8<0006060000606000>] +[8<00060e0000606000>] +[8<00060e000060e000>] +[8<00070e000060e000>] +[8<00070e000070e000>] +[8<00070e020070e000>] +[8<00070e020070e020>] +[8<04070e020070e020>] +[8<04070e024070e020>] +[8<04070e064070e020>] +[8<04070e064070e060>] +[8<06070e064070e060>] +[8<06070e066070e060>] +[8<06070f066070e060>] +[8<06070f066070f060>] +[8<060f0f066070f060>] +[8<060f0f0660f0f060>] +[8<060f0f0760f0f060>] +[8<060f0f0760f0f070>] +[8<0e0f0f0760f0f070>] +[8<0e0f0f07e0f0f070>] +[8<0e0f0f0fe0f0f070>] +[8<0e0f0f0fe0f0f0f0>] +[8<0f0f0f0fe0f0f0f0>] +[8<0f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f1f9>] +[8<bf8f1f9ff9f8f1f9>] +[8<bf8f1f9ffbf8f1f9>] +[8<bf8f1fdffbf8f1f9>] +[8<bf8f1fdffbf8f1fd>] +[8<ff8f1fdffbf8f1fd>] +[8<ff8f1fdffff8f1fd>] +[8<ff8f1ffffff8f1fd>] +[8<ff8f1ffffff8f1ff>] +[8<ff9f1ffffff8f1ff>] +[8<ff9f1ffffff9f1ff>] +[8<ff9f9ffffff9f1ff>] +[8<ff9f9ffffff9f9ff>] +[8<ffbf9ffffff9f9ff>] +[8<ffbf9ffffffbf9ff>] +[8<ffbfdffffffbf9ff>] +[8<ffbfdffffffbfdff>] +[8<ffffdffffffbfdff>] +[8<ffffdffffffffdff>] +[8<fffffffffffffdff>] +[8<ffffffffffffffff>]] +[%mg +[8<8000000000000000>] +[8<0822080080228000>] +[8<0204081020408001>] +[8<40e0400000000000>] +[8<66999966>] +[8<8001000010080000>] +[8<81c36666c3810000>] +[8<f0e0c08000000000>] +[16<07c00f801f003e007c00f800f001e003c007800f001f003e007c00f801f003e0>] +[16<1f000f8007c003e001f000f8007c003e001f800fc007e003f001f8007c003e00>] +[8<c3c300000000c3c3>] +[16<0040008001000200040008001000200040008000000100020004000800100020>] +[16<0040002000100008000400020001800040002000100008000400020001000080>] +[16<1fc03fe07df0f8f8f07de03fc01f800fc01fe03ff07df8f87df03fe01fc00f80>] +[8<80>] +[8<8040201000000000>] +[8<84cc000048cc0000>] +[8<9900009900000000>] +[8<08040201804020100800020180002010>] +[8<2418814242811824>] +[8<66999966>] +[8<8000000008000000>] +[8<70f8d8f870000000>] +[8<0814224180402010>] +[8<aa00440a11a04400>] +[8<018245aa45820100>] +[8<221c224180808041>] +[8<88000000>] +[8<0855800080550800>] +[8<2844004482440044>] +[8<0810204080412214>] +[8<00>]]]def +/Dfill{ + transform /maxy exch def /maxx exch def + transform /miny exch def /minx exch def + minx maxx gt{/minx maxx /maxx minx def def}if + miny maxy gt{/miny maxy /maxy miny def def}if + Dpatterns Dstipple 1 sub get exch 1 sub get + aload pop /stip exch def /stipw exch def /stiph 128 def + /imatrix[stipw 0 0 stiph 0 0]def + /tmatrix[stipw 0 0 stiph 0 0]def + /minx minx cvi stiph idiv stiph mul def + /miny miny cvi stipw idiv stipw mul def + gsave eoclip 0 setgray + miny stiph maxy{ + tmatrix exch 5 exch put + minx stipw maxx{ + tmatrix exch 4 exch put tmatrix setmatrix + stipw stiph true imatrix {stip} imagemask + }for + }for + grestore +}def +/Dp{Dfill Dstroke}def +/DP{Dfill currentpoint newpath moveto}def +end + +/ditstart{$DITroff begin + /nfonts 60 def % NFONTS makedev/ditroff dependent! + /fonts[nfonts{0}repeat]def + /fontnames[nfonts{()}repeat]def +/docsave save def +}def + +% character outcalls +/oc{ + /pswid exch def /cc exch def /name exch def + /ditwid pswid fontsize mul resolution mul 72000 div def + /ditsiz fontsize resolution mul 72 div def + ocprocs name known{ocprocs name get exec}{name cb}ifelse +}def +/fractm [.65 0 0 .6 0 0] def +/fraction{ + /fden exch def /fnum exch def gsave /cf currentfont def + cf fractm makefont setfont 0 .3 dm 2 copy neg rmoveto + fnum show rmoveto currentfont cf setfont(\244)show setfont fden show + grestore ditwid 0 rmoveto +}def +/oce{grestore ditwid 0 rmoveto}def +/dm{ditsiz mul}def +/ocprocs 50 dict def ocprocs begin +(14){(1)(4)fraction}def +(12){(1)(2)fraction}def +(34){(3)(4)fraction}def +(13){(1)(3)fraction}def +(23){(2)(3)fraction}def +(18){(1)(8)fraction}def +(38){(3)(8)fraction}def +(58){(5)(8)fraction}def +(78){(7)(8)fraction}def +(sr){gsave 0 .06 dm rmoveto(\326)show oce}def +(is){gsave 0 .15 dm rmoveto(\362)show oce}def +(->){gsave 0 .02 dm rmoveto(\256)show oce}def +(<-){gsave 0 .02 dm rmoveto(\254)show oce}def +(==){gsave 0 .05 dm rmoveto(\272)show oce}def +(uc){gsave currentpoint 400 .009 dm mul add translate + 8 -8 scale ucseal oce}def +end + +% an attempt at a PostScript FONT to implement ditroff special chars +% this will enable us to +% cache the little buggers +% generate faster, more compact PS out of psdit +% confuse everyone (including myself)! +50 dict dup begin +/FontType 3 def +/FontName /DIThacks def +/FontMatrix [.001 0 0 .001 0 0] def +/FontBBox [-260 -260 900 900] def% a lie but ... +/Encoding 256 array def +0 1 255{Encoding exch /.notdef put}for +Encoding + dup 8#040/space put %space + dup 8#110/rc put %right ceil + dup 8#111/lt put %left top curl + dup 8#112/bv put %bold vert + dup 8#113/lk put %left mid curl + dup 8#114/lb put %left bot curl + dup 8#115/rt put %right top curl + dup 8#116/rk put %right mid curl + dup 8#117/rb put %right bot curl + dup 8#120/rf put %right floor + dup 8#121/lf put %left floor + dup 8#122/lc put %left ceil + dup 8#140/sq put %square + dup 8#141/bx put %box + dup 8#142/ci put %circle + dup 8#143/br put %box rule + dup 8#144/rn put %root extender + dup 8#145/vr put %vertical rule + dup 8#146/ob put %outline bullet + dup 8#147/bu put %bullet + dup 8#150/ru put %rule + dup 8#151/ul put %underline + pop +/DITfd 100 dict def +/BuildChar{0 begin + /cc exch def /fd exch def + /charname fd /Encoding get cc get def + /charwid fd /Metrics get charname get def + /charproc fd /CharProcs get charname get def + charwid 0 fd /FontBBox get aload pop setcachedevice + 2 setlinejoin 40 setlinewidth + newpath 0 0 moveto gsave charproc grestore + end}def +/BuildChar load 0 DITfd put +/CharProcs 50 dict def +CharProcs begin +/space{}def +/.notdef{}def +/ru{500 0 rls}def +/rn{0 840 moveto 500 0 rls}def +/vr{0 800 moveto 0 -770 rls}def +/bv{0 800 moveto 0 -1000 rls}def +/br{0 840 moveto 0 -1000 rls}def +/ul{0 -140 moveto 500 0 rls}def +/ob{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath stroke}def +/bu{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath fill}def +/sq{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath stroke}def +/bx{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath fill}def +/ci{500 360 rmoveto currentpoint newpath 333 0 360 arc + 50 setlinewidth stroke}def + +/lt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 add exch s4 a4p stroke}def +/lb{0 800 moveto 0 -550 rlineto currx -200 2cx s4 add exch s4 a4p stroke}def +/rt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 sub exch s4 a4p stroke}def +/rb{0 800 moveto 0 -500 rlineto currx -200 2cx s4 sub exch s4 a4p stroke}def +/lk{0 800 moveto 0 300 -300 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/rk{0 800 moveto 0 300 s2 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/lf{0 800 moveto 0 -1000 rlineto s4 0 rls}def +/rf{0 800 moveto 0 -1000 rlineto s4 neg 0 rls}def +/lc{0 -200 moveto 0 1000 rlineto s4 0 rls}def +/rc{0 -200 moveto 0 1000 rlineto s4 neg 0 rls}def +end + +/Metrics 50 dict def Metrics begin +/.notdef 0 def +/space 500 def +/ru 500 def +/br 0 def +/lt 416 def +/lb 416 def +/rt 416 def +/rb 416 def +/lk 416 def +/rk 416 def +/rc 416 def +/lc 416 def +/rf 416 def +/lf 416 def +/bv 416 def +/ob 350 def +/bu 350 def +/ci 750 def +/bx 750 def +/sq 750 def +/rn 500 def +/ul 500 def +/vr 0 def +end + +DITfd begin +/s2 500 def /s4 250 def /s3 333 def +/a4p{arcto pop pop pop pop}def +/2cx{2 copy exch}def +/rls{rlineto stroke}def +/currx{currentpoint pop}def +/dround{transform round exch round exch itransform} def +end +end +/DIThacks exch definefont pop +ditstart +(psc)xT +576 1 1 xr +1(Times-Roman)xf 1 f +2(Times-Italic)xf 2 f +3(Times-Bold)xf 3 f +4(Times-BoldItalic)xf 4 f +5(Helvetica)xf 5 f +6(Helvetica-Bold)xf 6 f +7(Courier)xf 7 f +8(Courier-Bold)xf 8 f +9(Symbol)xf 9 f +10(DIThacks)xf 10 f +10 s +1 f +xi +%%EndProlog + +%%Page: 1 1 +10 s 10 xH 0 xS 1 f +3 f +14 s +1205 1206(LIBTP:)N +1633(Portable,)X +2100(M)X +2206(odular)X +2551(Transactions)X +3202(for)X +3374(UNIX)X +1 f +11 s +3661 1162(1)N +2 f +12 s +2182 1398(Margo)N +2467(Seltzer)X +2171 1494(Michael)N +2511(Olson)X +1800 1590(University)N +2225(of)X +2324(California,)X +2773(Berkeley)X +3 f +2277 1878(Abstract)N +1 f +10 s +755 2001(Transactions)N +1198(provide)X +1475(a)X +1543(useful)X +1771(programming)X +2239(paradigm)X +2574(for)X +2700(maintaining)X +3114(logical)X +3364(consistency,)X +3790(arbitrating)X +4156(con-)X +555 2091(current)N +808(access,)X +1059(and)X +1200(managing)X +1540(recovery.)X +1886(In)X +1977(traditional)X +2330(UNIX)X +2555(systems,)X +2852(the)X +2974(only)X +3140(easy)X +3307(way)X +3465(of)X +3556(using)X +3753(transactions)X +4160(is)X +4237(to)X +555 2181(purchase)N +876(a)X +947(database)X +1258(system.)X +1554(Such)X +1748(systems)X +2035(are)X +2168(often)X +2367(slow,)X +2572(costly,)X +2817(and)X +2967(may)X +3139(not)X +3275(provide)X +3554(the)X +3686(exact)X +3890(functionality)X +555 2271(desired.)N +848(This)X +1011(paper)X +1210(presents)X +1493(the)X +1611(design,)X +1860(implementation,)X +2402(and)X +2538(performance)X +2965(of)X +3052(LIBTP,)X +3314(a)X +3370(simple,)X +3623(non-proprietary)X +4147(tran-)X +555 2361(saction)N +809(library)X +1050(using)X +1249(the)X +1373(4.4BSD)X +1654(database)X +1957(access)X +2189(routines)X +2473(\()X +3 f +2500(db)X +1 f +2588(\(3\)\).)X +2775(On)X +2899(a)X +2961(conventional)X +3401(transaction)X +3779(processing)X +4148(style)X +555 2451(benchmark,)N +959(its)X +1061(performance)X +1495(is)X +1575(approximately)X +2065(85%)X +2239(that)X +2386(of)X +2480(the)X +2604(database)X +2907(access)X +3139(routines)X +3423(without)X +3693(transaction)X +4071(protec-)X +555 2541(tion,)N +725(200%)X +938(that)X +1084(of)X +1177(using)X +3 f +1376(fsync)X +1 f +1554(\(2\))X +1674(to)X +1761(commit)X +2030(modi\256cations)X +2490(to)X +2577(disk,)X +2755(and)X +2896(125%)X +3108(that)X +3253(of)X +3345(a)X +3406(commercial)X +3810(relational)X +4138(data-)X +555 2631(base)N +718(system.)X +3 f +555 2817(1.)N +655(Introduction)X +1 f +755 2940(Transactions)N +1186(are)X +1306(used)X +1474(in)X +1557(database)X +1855(systems)X +2129(to)X +2212(enable)X +2443(concurrent)X +2807(users)X +2992(to)X +3074(apply)X +3272(multi-operation)X +3790(updates)X +4055(without)X +555 3030(violating)N +863(the)X +985(integrity)X +1280(of)X +1371(the)X +1493(database.)X +1814(They)X +2003(provide)X +2271(the)X +2392(properties)X +2736(of)X +2826(atomicity,)X +3171(consistency,)X +3588(isolation,)X +3906(and)X +4045(durabil-)X +555 3120(ity.)N +701(By)X +816(atomicity,)X +1160(we)X +1276(mean)X +1472(that)X +1614(the)X +1734(set)X +1845(of)X +1934(updates)X +2200(comprising)X +2581(a)X +2638(transaction)X +3011(must)X +3187(be)X +3284(applied)X +3541(as)X +3629(a)X +3686(single)X +3898(unit;)X +4085(that)X +4226(is,)X +555 3210(they)N +714(must)X +890(either)X +1094(all)X +1195(be)X +1292(applied)X +1549(to)X +1632(the)X +1751(database)X +2049(or)X +2137(all)X +2238(be)X +2335(absent.)X +2601(Consistency)X +3013(requires)X +3293(that)X +3434(a)X +3491(transaction)X +3864(take)X +4019(the)X +4138(data-)X +555 3300(base)N +725(from)X +908(one)X +1051(logically)X +1358(consistent)X +1704(state)X +1877(to)X +1965(another.)X +2272(The)X +2423(property)X +2721(of)X +2814(isolation)X +3115(requires)X +3400(that)X +3546(concurrent)X +3916(transactions)X +555 3390(yield)N +750(results)X +994(which)X +1225(are)X +1358(indistinguishable)X +1938(from)X +2128(the)X +2260(results)X +2503(which)X +2733(would)X +2967(be)X +3077(obtained)X +3387(by)X +3501(running)X +3784(the)X +3916(transactions)X +555 3480(sequentially.)N +1002(Finally,)X +1268(durability)X +1599(requires)X +1878(that)X +2018(once)X +2190(transactions)X +2593(have)X +2765(been)X +2937(committed,)X +3319(their)X +3486(results)X +3715(must)X +3890(be)X +3986(preserved)X +555 3570(across)N +776(system)X +1018(failures)X +1279([TPCB90].)X +755 3693(Although)N +1080(these)X +1268(properties)X +1612(are)X +1734(most)X +1912(frequently)X +2265(discussed)X +2595(in)X +2680(the)X +2801(context)X +3060(of)X +3150(databases,)X +3501(they)X +3661(are)X +3782(useful)X +4000(program-)X +555 3783(ming)N +750(paradigms)X +1114(for)X +1238(more)X +1433(general)X +1700(purpose)X +1984(applications.)X +2441(There)X +2659(are)X +2788(several)X +3046(different)X +3353(situations)X +3689(where)X +3916(transactions)X +555 3873(can)N +687(be)X +783(used)X +950(to)X +1032(replace)X +1285(current)X +1533(ad-hoc)X +1772(mechanisms.)X +755 3996(One)N +910(situation)X +1206(is)X +1280(when)X +1475(multiple)X +1762(\256les)X +1916(or)X +2004(parts)X +2181(of)X +2269(\256les)X +2422(need)X +2594(to)X +2676(be)X +2772(updated)X +3046(in)X +3128(an)X +3224(atomic)X +3462(fashion.)X +3758(For)X +3889(example,)X +4201(the)X +555 4086(traditional)N +907(UNIX)X +1131(\256le)X +1256(system)X +1501(uses)X +1661(ordering)X +1955(constraints)X +2324(to)X +2408(achieve)X +2676(recoverability)X +3144(in)X +3228(the)X +3348(face)X +3505(of)X +3594(crashes.)X +3893(When)X +4107(a)X +4165(new)X +555 4176(\256le)N +678(is)X +752(created,)X +1026(its)X +1122(inode)X +1321(is)X +1395(written)X +1642(to)X +1724(disk)X +1877(before)X +2103(the)X +2221(new)X +2375(\256le)X +2497(is)X +2570(added)X +2782(to)X +2864(the)X +2982(directory)X +3292(structure.)X +3633(This)X +3795(guarantees)X +4159(that,)X +555 4266(if)N +627(the)X +748(system)X +993(crashes)X +1253(between)X +1544(the)X +1665(two)X +1808(I/O's,)X +2016(the)X +2137(directory)X +2450(does)X +2620(not)X +2744(contain)X +3002(a)X +3060 0.4531(reference)AX +3383(to)X +3467(an)X +3565(invalid)X +3809(inode.)X +4049(In)X +4138(actu-)X +555 4356(ality,)N +741(the)X +863(desired)X +1119(effect)X +1326(is)X +1402(that)X +1545(these)X +1733(two)X +1876(updates)X +2144(have)X +2319(the)X +2440(transactional)X +2873(property)X +3168(of)X +3258(atomicity)X +3583(\(either)X +3816(both)X +3981(writes)X +4200(are)X +555 4446(visible)N +790(or)X +879(neither)X +1124(is\).)X +1266(Rather)X +1501(than)X +1660(building)X +1947(special)X +2191(purpose)X +2466(recovery)X +2769(mechanisms)X +3186(into)X +3331(the)X +3450(\256le)X +3573(system)X +3816(or)X +3904(related)X +4144(tools)X +555 4536(\()N +2 f +582(e.g.)X +3 f +726(fsck)X +1 f +864(\(8\)\),)X +1033(one)X +1177(could)X +1383(use)X +1518(general)X +1783(purpose)X +2064(transaction)X +2443(recovery)X +2752(protocols)X +3077(after)X +3252(system)X +3501(failure.)X +3778(Any)X +3943(application)X +555 4626(that)N +705(needs)X +918(to)X +1010(keep)X +1192(multiple,)X +1508(related)X +1757(\256les)X +1920(\(or)X +2044(directories\))X +2440(consistent)X +2790(should)X +3032(do)X +3141(so)X +3241(using)X +3443(transactions.)X +3895(Source)X +4147(code)X +555 4716(control)N +805(systems,)X +1101(such)X +1271(as)X +1361(RCS)X +1534(and)X +1673(SCCS,)X +1910(should)X +2146(use)X +2276(transaction)X +2651(semantics)X +2990(to)X +3075(allow)X +3276(the)X +3397(``checking)X +3764(in'')X +3903(of)X +3992(groups)X +4232(of)X +555 4806(related)N +801(\256les.)X +1001(In)X +1095(this)X +1237(way,)X +1418(if)X +1493(the)X +1617 0.2841(``check-in'')AX +2028(fails,)X +2212(the)X +2336(transaction)X +2714(may)X +2878(be)X +2980(aborted,)X +3267(backing)X +3547(out)X +3675(the)X +3799(partial)X +4030(``check-)X +555 4896(in'')N +691(leaving)X +947(the)X +1065(source)X +1295(repository)X +1640(in)X +1722(a)X +1778(consistent)X +2118(state.)X +755 5019(A)N +842(second)X +1094(situation)X +1398(where)X +1624(transactions)X +2036(can)X +2177(be)X +2282(used)X +2458(to)X +2549(replace)X +2811(current)X +3068(ad-hoc)X +3316(mechanisms)X +3741(is)X +3822(in)X +3912(applications)X +555 5109(where)N +776(concurrent)X +1144(updates)X +1413(to)X +1499(a)X +1559(shared)X +1793(\256le)X +1919(are)X +2042(desired,)X +2318(but)X +2444(there)X +2629(is)X +2706(logical)X +2948(consistency)X +3345(of)X +3435(the)X +3556(data)X +3713(which)X +3932(needs)X +4138(to)X +4223(be)X +555 5199(preserved.)N +928(For)X +1059(example,)X +1371(when)X +1565(the)X +1683(password)X +2006(\256le)X +2128(is)X +2201(updated,)X +2495(\256le)X +2617(locking)X +2877(is)X +2950(used)X +3117(to)X +3199(disallow)X +3490(concurrent)X +3854(access.)X +4120(Tran-)X +555 5289(saction)N +804(semantics)X +1142(on)X +1244(the)X +1364(password)X +1689(\256les)X +1844(would)X +2066(allow)X +2266(concurrent)X +2632(updates,)X +2919(while)X +3119(preserving)X +3479(the)X +3598(logical)X +3837(consistency)X +4232(of)X +555 5379(the)N +681(password)X +1012(database.)X +1357(Similarly,)X +1702(UNIX)X +1930(utilities)X +2196(which)X +2419(rewrite)X +2674(\256les)X +2834(face)X +2996(a)X +3059(potential)X +3366(race)X +3528(condition)X +3857(between)X +4152(their)X +555 5469(rewriting)N +871(a)X +929(\256le)X +1053(and)X +1191(another)X +1453(process)X +1715(reading)X +1977(the)X +2096(\256le.)X +2259(For)X +2391(example,)X +2704(the)X +2823(compiler)X +3129(\(more)X +3342(precisely,)X +3673(the)X +3792(assembler\))X +4161(may)X +8 s +10 f +555 5541(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5619(1)N +8 s +763 5644(To)N +850(appear)X +1035(in)X +1101(the)X +2 f +1195(Proceedings)X +1530(of)X +1596(the)X +1690(1992)X +1834(Winter)X +2024(Usenix)X +1 f +2201(,)X +2233(San)X +2345(Francisco,)X +2625(CA,)X +2746(January)X +2960(1992.)X + +2 p +%%Page: 2 2 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(have)N +737(to)X +829(rewrite)X +1087(a)X +1152(\256le)X +1283(to)X +1374(which)X +1599(it)X +1672(has)X +1808(write)X +2002(permission)X +2382(in)X +2473(a)X +2538(directory)X +2857(to)X +2948(which)X +3173(it)X +3246(does)X +3422(not)X +3553(have)X +3734(write)X +3928(permission.)X +555 720(While)N +779(the)X +904(``.o'')X +1099(\256le)X +1228(is)X +1308(being)X +1513(written,)X +1787(another)X +2055(utility)X +2272(such)X +2446(as)X +3 f +2540(nm)X +1 f +2651(\(1\))X +2772(or)X +3 f +2866(ar)X +1 f +2942(\(1\))X +3063(may)X +3228(read)X +3394(the)X +3519(\256le)X +3648(and)X +3791(produce)X +4077(invalid)X +555 810(results)N +790(since)X +981(the)X +1105(\256le)X +1233(has)X +1366(not)X +1494(been)X +1672(completely)X +2054(written.)X +2347(Currently,)X +2700(some)X +2895(utilities)X +3160(use)X +3293(special)X +3542(purpose)X +3821(code)X +3998(to)X +4085(handle)X +555 900(such)N +722(cases)X +912(while)X +1110(others)X +1326(ignore)X +1551(the)X +1669(problem)X +1956(and)X +2092(force)X +2278(users)X +2463(to)X +2545(live)X +2685(with)X +2847(the)X +2965(consequences.)X +755 1023(In)N +845(this)X +983(paper,)X +1205(we)X +1322(present)X +1577(a)X +1635(simple)X +1870(library)X +2106(which)X +2324(provides)X +2622(transaction)X +2996(semantics)X +3334(\(atomicity,)X +3705(consistency,)X +4121(isola-)X +555 1113(tion,)N +720(and)X +857(durability\).)X +1236(The)X +1382(4.4BSD)X +1658(database)X +1956(access)X +2182(methods)X +2473(have)X +2645(been)X +2817(modi\256ed)X +3121(to)X +3203(use)X +3330(this)X +3465(library,)X +3719(optionally)X +4063(provid-)X +555 1203(ing)N +682(shared)X +917(buffer)X +1139(management)X +1574(between)X +1867(applications,)X +2298(locking,)X +2582(and)X +2722(transaction)X +3098(semantics.)X +3478(Any)X +3640(UNIX)X +3865(program)X +4161(may)X +555 1293(transaction)N +930(protect)X +1176(its)X +1274(data)X +1430(by)X +1532(requesting)X +1888(transaction)X +2262(protection)X +2609(with)X +2773(the)X +3 f +2893(db)X +1 f +2981(\(3\))X +3097(library)X +3333(or)X +3422(by)X +3524(adding)X +3764(appropriate)X +4152(calls)X +555 1383(to)N +646(the)X +773(transaction)X +1154(manager,)X +1480(buffer)X +1706(manager,)X +2032(lock)X +2199(manager,)X +2525(and)X +2670(log)X +2801(manager.)X +3147(The)X +3301(library)X +3543(routines)X +3829(may)X +3995(be)X +4099(linked)X +555 1473(into)N +708(the)X +834(host)X +995(application)X +1379(and)X +1523(called)X +1743(by)X +1851(subroutine)X +2217(interface,)X +2547(or)X +2642(they)X +2808(may)X +2974(reside)X +3194(in)X +3284(a)X +3348(separate)X +3640(server)X +3865(process.)X +4174(The)X +555 1563(server)N +772(architecture)X +1172(provides)X +1468(for)X +1582(network)X +1865(access)X +2091(and)X +2227(better)X +2430(protection)X +2775(mechanisms.)X +3 f +555 1749(2.)N +655(Related)X +938(Work)X +1 f +755 1872(There)N +1000(has)X +1164(been)X +1373(much)X +1608(discussion)X +1998(in)X +2117(recent)X +2371(years)X +2597(about)X +2831(new)X +3021(transaction)X +3429(models)X +3716(and)X +3888(architectures)X +555 1962 0.1172([SPEC88][NODI90][CHEN91][MOHA91].)AN +2009(Much)X +2220(of)X +2310(this)X +2448(work)X +2636(focuses)X +2900(on)X +3003(new)X +3160(ways)X +3348(to)X +3433(model)X +3656(transactions)X +4062(and)X +4201(the)X +555 2052(interactions)N +953(between)X +1245(them,)X +1449(while)X +1651(the)X +1772(work)X +1960(presented)X +2291(here)X +2453(focuses)X +2717(on)X +2820(the)X +2941(implementation)X +3466(and)X +3605(performance)X +4035(of)X +4125(tradi-)X +555 2142(tional)N +757(transaction)X +1129(techniques)X +1492(\(write-ahead)X +1919(logging)X +2183(and)X +2319(two-phase)X +2669(locking\))X +2956(on)X +3056(a)X +3112(standard)X +3404(operating)X +3727(system)X +3969(\(UNIX\).)X +755 2265(Such)N +947(traditional)X +1308(operating)X +1643(systems)X +1928(are)X +2059(often)X +2256(criticized)X +2587(for)X +2713(their)X +2892(inability)X +3190(to)X +3283(perform)X +3573(transaction)X +3956(processing)X +555 2355(adequately.)N +971([STON81])X +1342(cites)X +1517(three)X +1706(main)X +1894(areas)X +2088(of)X +2183(inadequate)X +2559(support:)X +2849(buffer)X +3074(management,)X +3532(the)X +3658(\256le)X +3788(system,)X +4058(and)X +4201(the)X +555 2445(process)N +823(structure.)X +1191(These)X +1410(arguments)X +1771(are)X +1897(summarized)X +2316(in)X +2405(table)X +2587(one.)X +2769(Fortunately,)X +3184(much)X +3388(has)X +3521(changed)X +3815(since)X +4006(1981.)X +4232(In)X +555 2535(the)N +683(area)X +848(of)X +945(buffer)X +1172(management,)X +1632(most)X +1817(UNIX)X +2048(systems)X +2331(provide)X +2606(the)X +2734(ability)X +2968(to)X +3060(memory)X +3357(map)X +3525(\256les,)X +3708(thus)X +3870(obviating)X +4201(the)X +555 2625(need)N +734(for)X +855(a)X +918(copy)X +1101(between)X +1396(kernel)X +1624(and)X +1766(user)X +1926(space.)X +2171(If)X +2251(a)X +2313(database)X +2616(system)X +2864(is)X +2943(going)X +3151(to)X +3239(use)X +3372(the)X +3496(\256le)X +3624(system)X +3872(buffer)X +4095(cache,)X +555 2715(then)N +719(a)X +781(system)X +1029(call)X +1171(is)X +1250(required.)X +1584(However,)X +1924(if)X +1998(buffering)X +2322(is)X +2400(provided)X +2710(at)X +2793(user)X +2952(level)X +3133(using)X +3331(shared)X +3566(memory,)X +3878(as)X +3970(in)X +4057(LIBTP,)X +555 2805(buffer)N +776(management)X +1210(is)X +1287(only)X +1452(as)X +1542(slow)X +1716(as)X +1806(access)X +2035(to)X +2120(shared)X +2353(memory)X +2643(and)X +2782(any)X +2921(replacement)X +3337(algorithm)X +3671(may)X +3832(be)X +3931(used.)X +4121(Since)X +555 2895(multiple)N +849(processes)X +1185(can)X +1325(access)X +1559(the)X +1685(shared)X +1923(data,)X +2105(prefetching)X +2499(may)X +2665(be)X +2769(accomplished)X +3238(by)X +3346(separate)X +3638(processes)X +3973(or)X +4067(threads)X +555 2985(whose)N +782(sole)X +932(purpose)X +1207(is)X +1281(to)X +1364(prefetch)X +1649(pages)X +1853(and)X +1990(wait)X +2149(on)X +2250(them.)X +2471(There)X +2680(is)X +2754(still)X +2894(no)X +2995(way)X +3150(to)X +3233(enforce)X +3496(write)X +3682(ordering)X +3975(other)X +4161(than)X +555 3075(keeping)N +829(pages)X +1032(in)X +1114(user)X +1268(memory)X +1555(and)X +1691(using)X +1884(the)X +3 f +2002(fsync)X +1 f +2180(\(3\))X +2294(system)X +2536(call)X +2672(to)X +2754(perform)X +3033(synchronous)X +3458(writes.)X +755 3198(In)N +845(the)X +966(area)X +1124(of)X +1214(\256le)X +1339(systems,)X +1635(the)X +1756(fast)X +1895(\256le)X +2020(system)X +2265(\(FFS\))X +2474([MCKU84])X +2871(allows)X +3103(allocation)X +3442(in)X +3527(units)X +3704(up)X +3806(to)X +3890(64KBytes)X +4232(as)X +555 3288(opposed)N +846(to)X +932(the)X +1054(4KByte)X +1327(and)X +1466(8KByte)X +1738(\256gures)X +1979(quoted)X +2220(in)X +2305([STON81].)X +2711(The)X +2859(measurements)X +3341(in)X +3426(this)X +3564(paper)X +3766(were)X +3946(taken)X +4143(from)X +555 3378(an)N +655(8KByte)X +928(FFS,)X +1104(but)X +1230(as)X +1320(LIBTP)X +1565(runs)X +1726(exclusively)X +2114(in)X +2199(user)X +2356(space,)X +2578(there)X +2762(is)X +2838(nothing)X +3105(to)X +3190(prevent)X +3454(it)X +3521(from)X +3700(being)X +3901(run)X +4031(on)X +4134(other)X +555 3468(UNIX)N +776(compatible)X +1152(\256le)X +1274(systems)X +1547(\(e.g.)X +1710(log-structured)X +2180([ROSE91],)X +2558(extent-based,)X +3004(or)X +3091(multi-block)X +3484([SELT91]\).)X +755 3591(Finally,)N +1029(with)X +1199(regard)X +1433(to)X +1523(the)X +1648(process)X +1916(structure,)X +2244(neither)X +2494(context)X +2757(switch)X +2993(time)X +3162(nor)X +3296(scheduling)X +3670(around)X +3920(semaphores)X +555 3681(seems)N +785(to)X +881(affect)X +1099(the)X +1231(system)X +1487(performance.)X +1968(However,)X +2317(the)X +2449(implementation)X +2984(of)X +3084(semaphores)X +3496(can)X +3641(impact)X +3892(performance)X +555 3771(tremendously.)N +1051(This)X +1213(is)X +1286(discussed)X +1613(in)X +1695(more)X +1880(detail)X +2078(in)X +2160(section)X +2407(4.3.)X +755 3894(The)N +908(Tuxedo)X +1181(system)X +1431(from)X +1615(AT&T)X +1861(is)X +1941(a)X +2004(transaction)X +2383(manager)X +2687(which)X +2910(coordinates)X +3307(distributed)X +3676(transaction)X +4055(commit)X +555 3984(from)N +738(a)X +801(variety)X +1051(of)X +1145(different)X +1449(local)X +1632(transaction)X +2011(managers.)X +2386(At)X +2493(this)X +2634(time,)X +2822(LIBTP)X +3070(does)X +3243(not)X +3371(have)X +3549(its)X +3650(own)X +3814(mechanism)X +4205(for)X +555 4074(distributed)N +942(commit)X +1231(processing,)X +1639(but)X +1786(could)X +2009(be)X +2130(used)X +2322(as)X +2434(a)X +2515(local)X +2716(transaction)X +3113(agent)X +3331(by)X +3455(systems)X +3752(such)X +3943(as)X +4054(Tuxedo)X +555 4164([ANDR89].)N +10 f +863 4393(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 4483(Buffer)N +1133(Management)X +10 f +1672(g)X +1 f +1720(Data)X +1892(must)X +2067(be)X +2163(copied)X +2397(between)X +2685(kernel)X +2906(space)X +3105(and)X +3241(user)X +3395(space.)X +10 f +1672 4573(g)N +1 f +1720(Buffer)X +1950(pool)X +2112(access)X +2338(is)X +2411(too)X +2533(slow.)X +10 f +1672 4663(g)N +1 f +1720(There)X +1928(is)X +2001(no)X +2101(way)X +2255(to)X +2337(request)X +2589(prefetch.)X +10 f +1672 4753(g)N +1 f +1720(Replacement)X +2159(is)X +2232(usually)X +2483(LRU)X +2663(which)X +2879(may)X +3037(be)X +3133(suboptimal)X +3508(for)X +3622(databases.)X +10 f +1672 4843(g)N +1 f +1720(There)X +1928(is)X +2001(no)X +2101(way)X +2255(to)X +2337(guarantee)X +2670(write)X +2855(ordering.)X +10 f +863 4853(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 4943(File)N +1047(System)X +10 f +1672(g)X +1 f +1720(Allocation)X +2078(is)X +2151(done)X +2327(in)X +2409(small)X +2602(blocks)X +2831(\(usually)X +3109(4K)X +3227(or)X +3314(8K\).)X +10 f +1672 5033(g)N +1 f +1720(Logical)X +1985(organization)X +2406(of)X +2493(\256les)X +2646(is)X +2719(redundantly)X +3122(expressed.)X +10 f +863 5043(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 5133(Process)N +1168(Structure)X +10 f +1672(g)X +1 f +1720(Context)X +1993(switching)X +2324(and)X +2460(message)X +2752(passing)X +3012(are)X +3131(too)X +3253(slow.)X +10 f +1672 5223(g)N +1 f +1720(A)X +1798(process)X +2059(may)X +2217(be)X +2313(descheduled)X +2730(while)X +2928(holding)X +3192(a)X +3248(semaphore.)X +10 f +863 5233(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +863(c)X +5193(c)Y +5113(c)Y +5033(c)Y +4953(c)Y +4873(c)Y +4793(c)Y +4713(c)Y +4633(c)Y +4553(c)Y +4473(c)Y +3990 5233(c)N +5193(c)Y +5113(c)Y +5033(c)Y +4953(c)Y +4873(c)Y +4793(c)Y +4713(c)Y +4633(c)Y +4553(c)Y +4473(c)Y +3 f +1156 5446(Table)N +1371(One:)X +1560(Shortcomings)X +2051(of)X +2138(UNIX)X +2363(transaction)X +2770(support)X +3056(cited)X +3241(in)X +3327([STON81].)X + +3 p +%%Page: 3 3 +10 s 10 xH 0 xS 3 f +1 f +755 630(The)N +901(transaction)X +1274(architecture)X +1675(presented)X +2004(in)X +2087([YOUN91])X +2474(is)X +2548(very)X +2712(similar)X +2955(to)X +3038(that)X +3179(implemented)X +3618(in)X +3701(the)X +3820(LIBTP.)X +4103(While)X +555 720([YOUN91])N +947(presents)X +1236(a)X +1298(model)X +1524(for)X +1644(providing)X +1981(transaction)X +2359(services,)X +2663(this)X +2803(paper)X +3007(focuses)X +3273(on)X +3378(the)X +3501(implementation)X +4028(and)X +4169(per-)X +555 810(formance)N +881(of)X +970(a)X +1028(particular)X +1358(system.)X +1642(In)X +1731(addition,)X +2034(we)X +2149(provide)X +2415(detailed)X +2690(comparisons)X +3116(with)X +3279(alternative)X +3639(solutions:)X +3970(traditional)X +555 900(UNIX)N +776(services)X +1055(and)X +1191(commercial)X +1590(database)X +1887(management)X +2317(systems.)X +3 f +555 1086(3.)N +655(Architecture)X +1 f +755 1209(The)N +906(library)X +1146(is)X +1224(designed)X +1534(to)X +1621(provide)X +1891(well)X +2054(de\256ned)X +2315(interfaces)X +2653(to)X +2740(the)X +2863(services)X +3147(required)X +3440(for)X +3559(transaction)X +3936(processing.)X +555 1299(These)N +777(services)X +1066(are)X +1195(recovery,)X +1527(concurrency)X +1955(control,)X +2232(and)X +2378(the)X +2506(management)X +2946(of)X +3043(shared)X +3283(data.)X +3487(First)X +3663(we)X +3787(will)X +3941(discuss)X +4201(the)X +555 1389(design)N +795(tradeoffs)X +1112(in)X +1205(the)X +1334(selection)X +1650(of)X +1748(recovery,)X +2081(concurrency)X +2510(control,)X +2787(and)X +2933(buffer)X +3160(management)X +3600(implementations,)X +4183(and)X +555 1479(then)N +713(we)X +827(will)X +971(present)X +1223(the)X +1341(overall)X +1584(library)X +1818(architecture)X +2218(and)X +2354(module)X +2614(descriptions.)X +3 f +555 1665(3.1.)N +715(Design)X +966(Tradeoffs)X +1 f +3 f +555 1851(3.1.1.)N +775(Crash)X +1004(Recovery)X +1 f +755 1974(The)N +909(recovery)X +1220(protocol)X +1516(is)X +1598(responsible)X +1992(for)X +2115(providing)X +2455(the)X +2582(transaction)X +2963(semantics)X +3308(discussed)X +3644(earlier.)X +3919(There)X +4136(are)X +4263(a)X +555 2064(wide)N +739(range)X +946(of)X +1041(recovery)X +1351(protocols)X +1677(available)X +1995([HAER83],)X +2395(but)X +2525(we)X +2647(can)X +2786(crudely)X +3054(divide)X +3281(them)X +3468(into)X +3619(two)X +3766(main)X +3953(categories.)X +555 2154(The)N +706(\256rst)X +856(category)X +1159(records)X +1422(all)X +1528(modi\256cations)X +1989(to)X +2077(the)X +2201(database)X +2504(in)X +2592(a)X +2653(separate)X +2942(\256le,)X +3089(and)X +3230(uses)X +3393(this)X +3533(\256le)X +3660(\(log\))X +3841(to)X +3928(back)X +4105(out)X +4232(or)X +555 2244(reapply)N +825(these)X +1019(modi\256cations)X +1483(if)X +1561(a)X +1626(transaction)X +2007(aborts)X +2232(or)X +2328(the)X +2455(system)X +2706(crashes.)X +3012(We)X +3153(call)X +3298(this)X +3442(set)X +3560(the)X +3 f +3687(logging)X +3963(protocols)X +1 f +4279(.)X +555 2334(The)N +703(second)X +949(category)X +1249(avoids)X +1481(the)X +1602(use)X +1732(of)X +1822(a)X +1881(log)X +2006(by)X +2109(carefully)X +2418(controlling)X +2792(when)X +2989(data)X +3146(are)X +3268(written)X +3518(to)X +3603(disk.)X +3799(We)X +3934(call)X +4073(this)X +4210(set)X +555 2424(the)N +3 f +673(non-logging)X +1096(protocols)X +1 f +1412(.)X +755 2547(Non-logging)N +1185(protocols)X +1504(hold)X +1666(dirty)X +1837(buffers)X +2085(in)X +2167(main)X +2347(memory)X +2634(or)X +2721(temporary)X +3071(\256les)X +3224(until)X +3390(commit)X +3654(and)X +3790(then)X +3948(force)X +4134(these)X +555 2637(pages)N +769(to)X +862(disk)X +1026(at)X +1115(transaction)X +1498(commit.)X +1813(While)X +2040(we)X +2165(can)X +2308(use)X +2446(temporary)X +2807(\256les)X +2971(to)X +3064(hold)X +3237(dirty)X +3418(pages)X +3631(that)X +3781(may)X +3949(need)X +4131(to)X +4223(be)X +555 2727(evicted)N +810(from)X +988(memory)X +1277(during)X +1508(a)X +1566(long-running)X +2006(transaction,)X +2400(the)X +2520(only)X +2684(user-level)X +3023(mechanism)X +3410(to)X +3494(force)X +3682(pages)X +3887(to)X +3971(disk)X +4126(is)X +4201(the)X +3 f +555 2817(fsync)N +1 f +733(\(2\))X +850(system)X +1095(call.)X +1274(Unfortunately,)X +3 f +1767(fsync)X +1 f +1945(\(2\))X +2062(is)X +2138(an)X +2237(expensive)X +2581(system)X +2826(call)X +2965(in)X +3050(that)X +3193(it)X +3260(forces)X +3480(all)X +3583(pages)X +3789(of)X +3879(a)X +3938(\256le)X +4062(to)X +4146(disk,)X +555 2907(and)N +691(transactions)X +1094(that)X +1234(manage)X +1504(more)X +1689(than)X +1847(one)X +1983(\256le)X +2105(must)X +2280(issue)X +2460(one)X +2596(call)X +2732(per)X +2855(\256le.)X +755 3030(In)N +853(addition,)X +3 f +1166(fsync)X +1 f +1344(\(2\))X +1469(provides)X +1776(no)X +1887(way)X +2051(to)X +2143(control)X +2400(the)X +2528(order)X +2728(in)X +2820(which)X +3046(dirty)X +3227(pages)X +3440(are)X +3569(written)X +3826(to)X +3918(disk.)X +4121(Since)X +555 3120(non-logging)N +976(protocols)X +1304(must)X +1489(sometimes)X +1861(order)X +2061(writes)X +2287(carefully)X +2603([SULL92],)X +2987(they)X +3155(are)X +3284(dif\256cult)X +3567(to)X +3659(implement)X +4030(on)X +4139(Unix)X +555 3210(systems.)N +868(As)X +977(a)X +1033(result,)X +1251(we)X +1365(have)X +1537(chosen)X +1780(to)X +1862(implement)X +2224(a)X +2280(logging)X +2544(protocol.)X +755 3333(Logging)N +1050(protocols)X +1372(may)X +1534(be)X +1634(categorized)X +2029(based)X +2236(on)X +2340(how)X +2502(information)X +2904(is)X +2981(logged)X +3223(\(physically)X +3602(or)X +3692(logically\))X +4022(and)X +4161(how)X +555 3423(much)N +767(is)X +854(logged)X +1106(\(before)X +1373(images,)X +1654(after)X +1836(images)X +2097(or)X +2198(both\).)X +2441(In)X +3 f +2542(physical)X +2855(logging)X +1 f +3103(,)X +3157(images)X +3417(of)X +3517(complete)X +3844(physical)X +4144(units)X +555 3513(\(pages)N +786(or)X +874(buffers\))X +1150(are)X +1270(recorded,)X +1593(while)X +1792(in)X +3 f +1875(logical)X +2118(logging)X +1 f +2387(a)X +2444(description)X +2820(of)X +2907(the)X +3025(operation)X +3348(is)X +3421(recorded.)X +3763(Therefore,)X +4121(while)X +555 3603(we)N +675(may)X +839(record)X +1071(entire)X +1280(pages)X +1489(in)X +1577(a)X +1639(physical)X +1932(log,)X +2080(we)X +2200(need)X +2378(only)X +2546(record)X +2777(the)X +2900(records)X +3162(being)X +3365(modi\256ed)X +3674(in)X +3761(a)X +3822(logical)X +4065(log.)X +4232(In)X +555 3693(fact,)N +718(physical)X +1006(logging)X +1271(can)X +1404(be)X +1501(thought)X +1766(of)X +1854(as)X +1942(a)X +1999(special)X +2243(case)X +2403(of)X +2491(logical)X +2730(logging,)X +3015(since)X +3201(the)X +3320 0.3125(``records'')AX +3686(that)X +3827(we)X +3942(log)X +4065(in)X +4148(logi-)X +555 3783(cal)N +673(logging)X +941(might)X +1151(be)X +1251(physical)X +1542(pages.)X +1789(Since)X +1991(logical)X +2233(logging)X +2501(is)X +2578(both)X +2743(more)X +2931(space-ef\256cient)X +3423(and)X +3562(more)X +3750(general,)X +4030(we)X +4147(have)X +555 3873(chosen)N +798(it)X +862(for)X +976(our)X +1103(logging)X +1367(protocol.)X +755 3996(In)N +3 f +843(before-image)X +1315(logging)X +1 f +1563(,)X +1604(we)X +1719(log)X +1842(a)X +1899(copy)X +2076(of)X +2164(the)X +2283(data)X +2438(before)X +2665(the)X +2784(update,)X +3039(while)X +3238(in)X +3 f +3321(after-image)X +3739(logging)X +1 f +3987(,)X +4027(we)X +4141(log)X +4263(a)X +555 4086(copy)N +740(of)X +836(the)X +963(data)X +1126(after)X +1303(the)X +1429(update.)X +1711(If)X +1793(we)X +1915(log)X +2045(only)X +2215(before-images,)X +2723(then)X +2889(there)X +3078(is)X +3159(suf\256cient)X +3485(information)X +3891(in)X +3981(the)X +4107(log)X +4237(to)X +555 4176(allow)N +761(us)X +860(to)X +3 f +950(undo)X +1 f +1150(the)X +1276(transaction)X +1656(\(go)X +1791(back)X +1971(to)X +2061(the)X +2187(state)X +2361(represented)X +2759(by)X +2866(the)X +2991(before-image\).)X +3514(However,)X +3876(if)X +3952(the)X +4077(system)X +555 4266(crashes)N +814(and)X +952(a)X +1010(committed)X +1374(transaction's)X +1806(changes)X +2087(have)X +2261(not)X +2385(reached)X +2658(the)X +2778(disk,)X +2953(we)X +3068(have)X +3241(no)X +3342(means)X +3568(to)X +3 f +3651(redo)X +1 f +3828(the)X +3947(transaction)X +555 4356(\(reapply)N +849(the)X +973(updates\).)X +1311(Therefore,)X +1675(logging)X +1945(only)X +2113(before-images)X +2599(necessitates)X +3004(forcing)X +3262(dirty)X +3439(pages)X +3648(at)X +3732(commit)X +4002(time.)X +4210(As)X +555 4446(mentioned)N +913(above,)X +1145(forcing)X +1397(pages)X +1600(at)X +1678(commit)X +1942(is)X +2015(considered)X +2383(too)X +2505(costly.)X +755 4569(If)N +834(we)X +953(log)X +1080(only)X +1247(after-images,)X +1694(then)X +1857(there)X +2043(is)X +2121(suf\256cient)X +2444(information)X +2847(in)X +2934(the)X +3057(log)X +3184(to)X +3271(allow)X +3474(us)X +3570(to)X +3657(redo)X +3825(the)X +3947(transaction)X +555 4659(\(go)N +687(forward)X +967(to)X +1054(the)X +1177(state)X +1348(represented)X +1743(by)X +1847(the)X +1969(after-image\),)X +2411(but)X +2537(we)X +2655(do)X +2759(not)X +2885(have)X +3061(the)X +3183(information)X +3585(required)X +3877(to)X +3963(undo)X +4147(tran-)X +555 4749(sactions)N +845(which)X +1073(aborted)X +1346(after)X +1526(dirty)X +1709(pages)X +1924(were)X +2113(written)X +2372(to)X +2466(disk.)X +2670(Therefore,)X +3039(logging)X +3314(only)X +3487(after-images)X +3920(necessitates)X +555 4839(holding)N +819(all)X +919(dirty)X +1090(buffers)X +1338(in)X +1420(main)X +1600(memory)X +1887(until)X +2053(commit)X +2317(or)X +2404(writing)X +2655(them)X +2835(to)X +2917(a)X +2973(temporary)X +3323(\256le.)X +755 4962(Since)N +956(neither)X +1202(constraint)X +1541(\(forcing)X +1823(pages)X +2029(on)X +2132(commit)X +2399(or)X +2489(buffering)X +2811(pages)X +3016(until)X +3184(commit\))X +3477(was)X +3624(feasible,)X +3916(we)X +4032(chose)X +4237(to)X +555 5052(log)N +683(both)X +851(before)X +1083(and)X +1225(after)X +1399(images.)X +1672(The)X +1823(only)X +1991(remaining)X +2342(consideration)X +2800(is)X +2879(when)X +3079(changes)X +3363(get)X +3486(written)X +3738(to)X +3825(disk.)X +4023(Changes)X +555 5142(affect)N +764(both)X +931(data)X +1090(pages)X +1298(and)X +1438(the)X +1560(log.)X +1726(If)X +1804(the)X +1926(changed)X +2218(data)X +2376(page)X +2552(is)X +2629(written)X +2880(before)X +3110(the)X +3232(log)X +3358(page,)X +3554(and)X +3694(the)X +3816(system)X +4062(crashes)X +555 5232(before)N +787(the)X +911(log)X +1039(page)X +1217(is)X +1296(written,)X +1569(the)X +1693(log)X +1820(will)X +1969(contain)X +2230(insuf\256cient)X +2615(information)X +3018(to)X +3105(undo)X +3290(the)X +3413(change.)X +3706(This)X +3873(violates)X +4147(tran-)X +555 5322(saction)N +803(semantics,)X +1160(since)X +1346(some)X +1536(changed)X +1825(data)X +1980(pages)X +2184(may)X +2343(not)X +2466(have)X +2638(been)X +2810(written,)X +3077(and)X +3213(the)X +3331(database)X +3628(cannot)X +3862(be)X +3958(restored)X +4237(to)X +555 5412(its)N +650(pre-transaction)X +1152(state.)X +755 5535(The)N +914(log)X +1050(record)X +1290(describing)X +1658(an)X +1768(update)X +2016(must)X +2205(be)X +2315(written)X +2576(to)X +2672(stable)X +2893(storage)X +3159(before)X +3398(the)X +3529(modi\256ed)X +3846(page.)X +4071(This)X +4246(is)X +3 f +555 5625(write-ahead)N +992(logging)X +1 f +1240(.)X +1307(If)X +1388(log)X +1517(records)X +1781(are)X +1907(safely)X +2126(written)X +2380(to)X +2469(disk,)X +2649(data)X +2810(pages)X +3020(may)X +3185(be)X +3288(written)X +3542(at)X +3627(any)X +3770(time)X +3939(afterwards.)X +555 5715(This)N +721(means)X +950(that)X +1094(the)X +1216(only)X +1382(\256le)X +1508(that)X +1652(ever)X +1815(needs)X +2022(to)X +2108(be)X +2208(forced)X +2438(to)X +2524(disk)X +2681(is)X +2758(the)X +2880(log.)X +3046(Since)X +3248(the)X +3370(log)X +3495(is)X +3571(append-only,)X +4015(modi\256ed)X + +4 p +%%Page: 4 4 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630(pages)N +760(always)X +1005(appear)X +1242(at)X +1322(the)X +1442(end)X +1580(and)X +1718(may)X +1878(be)X +1976(written)X +2224(to)X +2307(disk)X +2461(ef\256ciently)X +2807(in)X +2890(any)X +3027(\256le)X +3150(system)X +3393(that)X +3534(favors)X +3756(sequential)X +4102(order-)X +555 720(ing)N +677(\()X +2 f +704(e.g.)X +1 f +820(,)X +860(FFS,)X +1032(log-structured)X +1502(\256le)X +1624(system,)X +1886(or)X +1973(an)X +2069(extent-based)X +2495(system\).)X +3 f +555 906(3.1.2.)N +775(Concurrency)X +1245(Control)X +1 f +755 1029(The)N +918(concurrency)X +1354(control)X +1619(protocol)X +1923(is)X +2013(responsible)X +2415(for)X +2546(maintaining)X +2965(consistency)X +3376(in)X +3475(the)X +3610(presence)X +3929(of)X +4033(multiple)X +555 1119(accesses.)N +897(There)X +1114(are)X +1242(several)X +1499(alternative)X +1867(solutions)X +2183(such)X +2358(as)X +2453(locking,)X +2741(optimistic)X +3088(concurrency)X +3514(control)X +3769([KUNG81],)X +4183(and)X +555 1209(timestamp)N +912(ordering)X +1208([BERN80].)X +1619(Since)X +1821(optimistic)X +2164(methods)X +2459(and)X +2599(timestamp)X +2956(ordering)X +3252(are)X +3374(generally)X +3696(more)X +3884(complex)X +4183(and)X +555 1299(restrict)N +804(concurrency)X +1228(without)X +1498(eliminating)X +1888(starvation)X +2230(or)X +2323(deadlocks,)X +2690(we)X +2810(chose)X +3018(two-phase)X +3373(locking)X +3638(\(2PL\).)X +3890(Strict)X +4088(2PL)X +4246(is)X +555 1389(suboptimal)N +935(for)X +1054(certain)X +1297(data)X +1455(structures)X +1791(such)X +1962(as)X +2053(B-trees)X +2309(because)X +2588(it)X +2656(can)X +2792(limit)X +2966(concurrency,)X +3408(so)X +3503(we)X +3621(use)X +3752(a)X +3812(special)X +4059(locking)X +555 1479(protocol)N +842(based)X +1045(on)X +1145(one)X +1281(described)X +1609(in)X +1691([LEHM81].)X +755 1602(The)N +901(B-tree)X +1123(locking)X +1384(protocol)X +1672(we)X +1787(implemented)X +2226(releases)X +2502(locks)X +2691(at)X +2769(internal)X +3034(nodes)X +3241(in)X +3323(the)X +3441(tree)X +3582(as)X +3669(it)X +3733(descends.)X +4083(A)X +4161(lock)X +555 1692(on)N +658(an)X +757(internal)X +1025(page)X +1200(is)X +1276(always)X +1522(released)X +1808(before)X +2036(a)X +2094(lock)X +2254(on)X +2356(its)X +2453(child)X +2635(is)X +2710(obtained)X +3008(\(that)X +3177(is,)X +3272(locks)X +3463(are)X +3584(not)X +3 f +3708(coupled)X +1 f +3996([BAY77])X +555 1782(during)N +786(descent\).)X +1116(When)X +1330(a)X +1388(leaf)X +1531(\(or)X +1647(internal\))X +1941(page)X +2115(is)X +2190(split,)X +2369(a)X +2427(write)X +2614(lock)X +2774(is)X +2849(acquired)X +3148(on)X +3250(the)X +3370(parent)X +3593(before)X +3821(the)X +3941(lock)X +4100(on)X +4201(the)X +555 1872(just-split)N +855(page)X +1028(is)X +1102(released)X +1387(\(locks)X +1604(are)X +3 f +1724(coupled)X +1 f +2011(during)X +2241(ascent\).)X +2530(Write)X +2734(locks)X +2924(on)X +3025(internal)X +3291(pages)X +3495(are)X +3615(released)X +3899(immediately)X +555 1962(after)N +723(the)X +841(page)X +1013(is)X +1086(updated,)X +1380(but)X +1502(locks)X +1691(on)X +1791(leaf)X +1932(pages)X +2135(are)X +2254(held)X +2412(until)X +2578(the)X +2696(end)X +2832(of)X +2919(the)X +3037(transaction.)X +755 2085(Since)N +964(locks)X +1164(are)X +1294(released)X +1589(during)X +1828(descent,)X +2119(the)X +2247(structure)X +2558(of)X +2655(the)X +2783(tree)X +2934(may)X +3102(change)X +3360(above)X +3582(a)X +3648(node)X +3834(being)X +4042(used)X +4219(by)X +555 2175(some)N +752(process.)X +1061(If)X +1143(that)X +1291(process)X +1560(must)X +1743(later)X +1914(ascend)X +2161(the)X +2287(tree)X +2435(because)X +2717(of)X +2811(a)X +2874(page)X +3053(split,)X +3237(any)X +3380(such)X +3554(change)X +3809(must)X +3991(not)X +4120(cause)X +555 2265(confusion.)N +938(We)X +1077(use)X +1211(the)X +1336(technique)X +1675(described)X +2010(in)X +2099([LEHM81])X +2487(which)X +2710(exploits)X +2989(the)X +3113(ordering)X +3411(of)X +3504(data)X +3664(on)X +3770(a)X +3832(B-tree)X +4059(page)X +4237(to)X +555 2355(guarantee)N +888(that)X +1028(no)X +1128(process)X +1389(ever)X +1548(gets)X +1697(lost)X +1832(as)X +1919(a)X +1975(result)X +2173(of)X +2260(internal)X +2525(page)X +2697(updates)X +2962(made)X +3156(by)X +3256(other)X +3441(processes.)X +755 2478(If)N +836(a)X +899(transaction)X +1278(that)X +1425(updates)X +1697(a)X +1760(B-tree)X +1988(aborts,)X +2231(the)X +2356(user-visible)X +2757(changes)X +3043(to)X +3131(the)X +3255(tree)X +3402(must)X +3583(be)X +3685(rolled)X +3898(back.)X +4116(How-)X +555 2568(ever,)N +735(changes)X +1015(to)X +1097(the)X +1215(internal)X +1480(nodes)X +1687(of)X +1774(the)X +1892(tree)X +2033(need)X +2205(not)X +2327(be)X +2423(rolled)X +2630(back,)X +2822(since)X +3007(these)X +3192(pages)X +3395(contain)X +3651(no)X +3751(user-visible)X +4145(data.)X +555 2658(When)N +771(rolling)X +1008(back)X +1184(a)X +1244(transaction,)X +1640(we)X +1758(roll)X +1893(back)X +2069(all)X +2173(leaf)X +2318(page)X +2494(updates,)X +2783(but)X +2909(no)X +3013(internal)X +3281(insertions)X +3615(or)X +3705(page)X +3880(splits.)X +4111(In)X +4201(the)X +555 2748(worst)N +759(case,)X +944(this)X +1085(will)X +1235(leave)X +1431(a)X +1493(leaf)X +1640(page)X +1818(less)X +1964(than)X +2128(half)X +2279(full.)X +2456(This)X +2624(may)X +2788(cause)X +2993(poor)X +3166(space)X +3371(utilization,)X +3741(but)X +3869(does)X +4042(not)X +4170(lose)X +555 2838(user)N +709(data.)X +755 2961(Holding)N +1038(locks)X +1228(on)X +1329(leaf)X +1471(pages)X +1675(until)X +1842(transaction)X +2215(commit)X +2480(guarantees)X +2845(that)X +2986(no)X +3087(other)X +3273(process)X +3535(can)X +3668(insert)X +3866(or)X +3953(delete)X +4165(data)X +555 3051(that)N +711(has)X +854(been)X +1042(touched)X +1332(by)X +1448(this)X +1598(process.)X +1914(Rolling)X +2188(back)X +2375(insertions)X +2721(and)X +2872(deletions)X +3196(on)X +3311(leaf)X +3467(pages)X +3685(guarantees)X +4064(that)X +4219(no)X +555 3141(aborted)N +819(updates)X +1087(are)X +1209(ever)X +1371(visible)X +1607(to)X +1692(other)X +1880(transactions.)X +2326(Leaving)X +2612(page)X +2787(splits)X +2978(intact)X +3179(permits)X +3442(us)X +3536(to)X +3621(release)X +3867(internal)X +4134(write)X +555 3231(locks)N +744(early.)X +965(Thus)X +1145(transaction)X +1517(semantics)X +1853(are)X +1972(preserved,)X +2325(and)X +2461(locks)X +2650(are)X +2769(held)X +2927(for)X +3041(shorter)X +3284(periods.)X +755 3354(The)N +901(extra)X +1083(complexity)X +1464(introduced)X +1828(by)X +1929(this)X +2065(locking)X +2326(protocol)X +2614(appears)X +2881(substantial,)X +3264(but)X +3387(it)X +3452(is)X +3525(important)X +3856(for)X +3970(multi-user)X +555 3444(execution.)N +950(The)X +1118(bene\256ts)X +1410(of)X +1520(non-two-phase)X +2040(locking)X +2323(on)X +2446(B-trees)X +2721(are)X +2863(well)X +3044(established)X +3443(in)X +3548(the)X +3689(database)X +4009(literature)X +555 3534([BAY77],)N +899([LEHM81].)X +1320(If)X +1394(a)X +1450(process)X +1711(held)X +1869(locks)X +2058(until)X +2224(it)X +2288(committed,)X +2670(then)X +2828(a)X +2884(long-running)X +3322(update)X +3556(could)X +3754(lock)X +3912(out)X +4034(all)X +4134(other)X +555 3624(transactions)N +967(by)X +1076(preventing)X +1448(any)X +1593(other)X +1787(process)X +2057(from)X +2241(locking)X +2509(the)X +2635(root)X +2792(page)X +2972(of)X +3067(the)X +3193(tree.)X +3382(The)X +3535(B-tree)X +3764(locking)X +4032(protocol)X +555 3714(described)N +884(above)X +1096(guarantees)X +1460(that)X +1600(locks)X +1789(on)X +1889(internal)X +2154(pages)X +2357(are)X +2476(held)X +2634(for)X +2748(extremely)X +3089(short)X +3269(periods,)X +3545(thereby)X +3806(increasing)X +4156(con-)X +555 3804(currency.)N +3 f +555 3990(3.1.3.)N +775(Management)X +1245(of)X +1332(Shared)X +1596(Data)X +1 f +755 4113(Database)N +1075(systems)X +1353(permit)X +1587(many)X +1790(users)X +1980(to)X +2067(examine)X +2364(and)X +2505(update)X +2744(the)X +2866(same)X +3055(data)X +3213(concurrently.)X +3683(In)X +3774(order)X +3968(to)X +4054(provide)X +555 4203(this)N +702(concurrent)X +1078(access)X +1316(and)X +1464(enforce)X +1738(the)X +1868(write-ahead)X +2280(logging)X +2556(protocol)X +2855(described)X +3195(in)X +3289(section)X +3548(3.1.1,)X +3759(we)X +3884(use)X +4022(a)X +4089(shared)X +555 4293(memory)N +848(buffer)X +1071(manager.)X +1414(Not)X +1559(only)X +1726(does)X +1898(this)X +2038(provide)X +2308(the)X +2431(guarantees)X +2800(we)X +2919(require,)X +3192(but)X +3319(a)X +3380(user-level)X +3722(buffer)X +3944(manager)X +4246(is)X +555 4383(frequently)N +916(faster)X +1126(than)X +1295(using)X +1498(the)X +1626(\256le)X +1758(system)X +2010(buffer)X +2237(cache.)X +2491(Reads)X +2717(or)X +2814(writes)X +3040(involving)X +3376(the)X +3504(\256le)X +3636(system)X +3888(buffer)X +4115(cache)X +555 4473(often)N +746(require)X +1000(copying)X +1284(data)X +1444(between)X +1738(user)X +1898(and)X +2040(kernel)X +2266(space)X +2470(while)X +2673(a)X +2734(user-level)X +3076(buffer)X +3298(manager)X +3600(can)X +3737(return)X +3954(pointers)X +4237(to)X +555 4563(data)N +709(pages)X +912(directly.)X +1217(Additionally,)X +1661(if)X +1730(more)X +1915(than)X +2073(one)X +2209(process)X +2470(uses)X +2628(the)X +2746(same)X +2931(page,)X +3123(then)X +3281(fewer)X +3485(copies)X +3710(may)X +3868(be)X +3964(required.)X +3 f +555 4749(3.2.)N +715(Module)X +997(Architecture)X +1 f +755 4872(The)N +913(preceding)X +1262(sections)X +1552(described)X +1892(modules)X +2195(for)X +2321(managing)X +2669(the)X +2799(transaction)X +3183(log,)X +3337(locks,)X +3558(and)X +3706(a)X +3774(cache)X +3990(of)X +4089(shared)X +555 4962(buffers.)N +847(In)X +938(addition,)X +1244(we)X +1362(need)X +1538(to)X +1624(provide)X +1893(functionality)X +2326(for)X +2444(transaction)X +2 f +2819(begin)X +1 f +2997(,)X +2 f +3040(commit)X +1 f +3276(,)X +3319(and)X +2 f +3458(abort)X +1 f +3654(processing,)X +4040(necessi-)X +555 5052(tating)N +769(a)X +837(transaction)X +1221(manager.)X +1570(In)X +1669(order)X +1871(to)X +1965(arbitrate)X +2265(concurrent)X +2641(access)X +2879(to)X +2973(locks)X +3173(and)X +3320(buffers,)X +3599(we)X +3724(include)X +3991(a)X +4058(process)X +555 5142(management)N +995(module)X +1264(which)X +1489(manages)X +1799(a)X +1864(collection)X +2209(of)X +2305(semaphores)X +2713(used)X +2889(to)X +2980(block)X +3187(and)X +3332(release)X +3585(processes.)X +3962(Finally,)X +4237(in)X +555 5232(order)N +752(to)X +841(provide)X +1113(a)X +1176(simple,)X +1436(standard)X +1735(interface)X +2044(we)X +2165(have)X +2344(modi\256ed)X +2655(the)X +2780(database)X +3084(access)X +3317(routines)X +3602(\()X +3 f +3629(db)X +1 f +3717(\(3\)\).)X +3904(For)X +4041(the)X +4165(pur-)X +555 5322(poses)N +758(of)X +850(this)X +990(paper)X +1194(we)X +1313(call)X +1453(the)X +1575(modi\256ed)X +1883(package)X +2171(the)X +3 f +2293(Record)X +2567(Manager)X +1 f +2879(.)X +2943(Figure)X +3176(one)X +3316(shows)X +3540(the)X +3662(main)X +3846(interfaces)X +4183(and)X +555 5412(architecture)N +955(of)X +1042(LIBTP.)X + +5 p +%%Page: 5 5 +10 s 10 xH 0 xS 1 f +3 f +1 f +11 s +1851 1520(log_commit)N +2764 2077(buf_unpin)N +2764 1987(buf_get)N +3633 1408(buf_unpin)N +3633 1319(buf_pin)N +3633 1230(buf_get)N +3 f +17 s +1163 960(Txn)N +1430(M)X +1559(anager)X +2582(Record)X +3040(M)X +3169(anager)X +1 Dt +2363 726 MXY +0 355 Dl +1426 0 Dl +0 -355 Dl +-1426 0 Dl +3255 1616 MXY +0 535 Dl +534 0 Dl +0 -535 Dl +-534 0 Dl +2185 MX +0 535 Dl +535 0 Dl +0 -535 Dl +-535 0 Dl +1116 MX +0 535 Dl +534 0 Dl +0 -535 Dl +-534 0 Dl +726 MY +0 355 Dl +891 0 Dl +0 -355 Dl +-891 0 Dl +1 f +11 s +2207 1297(lock)N +2564 1386(log)N +865(unlock_all)X +1851 1609(log_unroll)N +1650 2508 MXY +0 178 Dl +1605 0 Dl +0 -178 Dl +-1605 0 Dl +1294 1616 MXY +19 -30 Dl +-19 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +2319 2508 MXY +-22 -30 Dl +4 23 Dl +-18 14 Dl +36 -7 Dl +-936 -357 Dl +3277 2455(sleep_on)N +1405 1616 MXY +36 4 Dl +-18 -13 Dl +1 -22 Dl +-19 31 Dl +1070 -535 Dl +2631 2508 MXY +36 6 Dl +-18 -14 Dl +3 -22 Dl +-21 30 Dl +891 -357 Dl +1426 2455(sleep_on)N +3255 1884 MXY +-31 -20 Dl +11 20 Dl +-11 19 Dl +31 -19 Dl +-535 0 Dl +1554 2366(wake)N +3277(wake)X +2185 1884 MXY +-31 -20 Dl +12 20 Dl +-12 19 Dl +31 -19 Dl +-356 0 Dl +0 -803 Dl +3 f +17 s +1236 1851(Lock)N +1118 2030(M)N +1247(anager)X +2339 1851(Log)N +2187 2030(M)N +2316(anager)X +3333 1851(Buffer)N +3257 2030(M)N +3386(anager)X +3522 1616 MXY +20 -30 Dl +-20 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +1950 2654(Process)N +2424(M)X +2553(anager)X +2542 1616 MXY +19 -30 Dl +-19 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +1 f +11 s +2207 1364(unlock)N +2452 2508 MXY +20 -31 Dl +-20 11 Dl +-19 -11 Dl +19 31 Dl +0 -357 Dl +2497 2322(sleep_on)N +2497 2233(wake)N +3 Dt +-1 Ds +3 f +10 s +1790 2830(Figure)N +2037(1:)X +2144(Library)X +2435(module)X +2708(interfaces.)X +1 f +10 f +555 3010(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 3286(3.2.1.)N +775(The)X +928(Log)X +1081(Manager)X +1 f +755 3409(The)N +3 f +907(Log)X +1067(Manager)X +1 f +1406(enforces)X +1706(the)X +1831(write-ahead)X +2238(logging)X +2509(protocol.)X +2843(Its)X +2949(primitive)X +3268(operations)X +3628(are)X +2 f +3753(log)X +1 f +3855(,)X +2 f +3901(log_commit)X +1 f +4279(,)X +2 f +555 3499(log_read)N +1 f +844(,)X +2 f +889(log_roll)X +1 f +1171(and)X +2 f +1312(log_unroll)X +1 f +1649(.)X +1714(The)X +2 f +1864(log)X +1 f +1991(call)X +2132(performs)X +2447(a)X +2508(buffered)X +2806(write)X +2996(of)X +3088(the)X +3211(speci\256ed)X +3520(log)X +3646(record)X +3876(and)X +4016(returns)X +4263(a)X +555 3589(unique)N +809(log)X +947(sequence)X +1278(number)X +1559(\(LSN\).)X +1840(This)X +2017(LSN)X +2203(may)X +2376(then)X +2549(be)X +2660(used)X +2842(to)X +2939(retrieve)X +3220(a)X +3291(record)X +3532(from)X +3723(the)X +3856(log)X +3993(using)X +4201(the)X +2 f +555 3679(log_read)N +1 f +865(call.)X +1042(The)X +2 f +1188(log)X +1 f +1311(interface)X +1614(knows)X +1844(very)X +2008(little)X +2175(about)X +2374(the)X +2493(internal)X +2759(format)X +2993(of)X +3080(the)X +3198(log)X +3320(records)X +3577(it)X +3641(receives.)X +3965(Rather,)X +4219(all)X +555 3769(log)N +681(records)X +942(are)X +1065 0.4028(referenced)AX +1430(by)X +1534(a)X +1594(header)X +1833(structure,)X +2158(a)X +2218(log)X +2344(record)X +2574(type,)X +2756(and)X +2896(a)X +2956(character)X +3276(buffer)X +3497(containing)X +3859(the)X +3981(data)X +4138(to)X +4223(be)X +555 3859(logged.)N +834(The)X +980(log)X +1103(record)X +1330(type)X +1489(is)X +1563(used)X +1731(to)X +1814(call)X +1951(the)X +2070(appropriate)X +2457(redo)X +2621(and)X +2758(undo)X +2939(routines)X +3217(during)X +2 f +3446(abort)X +1 f +3639(and)X +2 f +3775(commit)X +1 f +4031(process-)X +555 3949(ing.)N +721(While)X +941(we)X +1059(have)X +1235(used)X +1406(the)X +3 f +1528(Log)X +1684(Manager)X +1 f +2019(to)X +2104(provide)X +2372(before)X +2601(and)X +2740(after)X +2911(image)X +3130(logging,)X +3417(it)X +3484(may)X +3645(also)X +3797(be)X +3896(used)X +4066(for)X +4183(any)X +555 4039(of)N +642(the)X +760(logging)X +1024(algorithms)X +1386(discussed.)X +755 4162(The)N +2 f +905(log_commit)X +1 f +1308(operation)X +1636(behaves)X +1920(exactly)X +2177(like)X +2322(the)X +2 f +2445(log)X +1 f +2572(operation)X +2900(but)X +3026(guarantees)X +3394(that)X +3538(the)X +3660(log)X +3786(has)X +3917(been)X +4093(forced)X +555 4252(to)N +643(disk)X +802(before)X +1034(returning.)X +1394(A)X +1478(discussion)X +1837(of)X +1930(our)X +2063(commit)X +2333(strategy)X +2613(appears)X +2884(in)X +2971(the)X +3094(implementation)X +3621(section)X +3873(\(section)X +4152(4.2\).)X +2 f +555 4342(Log_unroll)N +1 f +935(reads)X +1126(log)X +1249(records)X +1507(from)X +1684(the)X +1803(log,)X +1946(following)X +2278(backward)X +2611(transaction)X +2983(pointers)X +3261(and)X +3397(calling)X +3635(the)X +3753(appropriate)X +4139(undo)X +555 4432(routines)N +839(to)X +927(implement)X +1295(transaction)X +1673(abort.)X +1904(In)X +1997(a)X +2059(similar)X +2307(manner,)X +2 f +2594(log_roll)X +1 f +2877(reads)X +3073(log)X +3201(records)X +3464(sequentially)X +3877(forward,)X +4178(cal-)X +555 4522(ling)N +699(the)X +817(appropriate)X +1203(redo)X +1366(routines)X +1644(to)X +1726(recover)X +1988(committed)X +2350(transactions)X +2753(after)X +2921(a)X +2977(system)X +3219(crash.)X +3 f +555 4708(3.2.2.)N +775(The)X +928(Buffer)X +1171(Manager)X +1 f +755 4831(The)N +3 f +912(Buffer)X +1167(Manager)X +1 f +1511(uses)X +1681(a)X +1749(pool)X +1923(of)X +2022(shared)X +2264(memory)X +2563(to)X +2657(provide)X +2934(a)X +3002(least-recently-used)X +3641(\(LRU\))X +3886(block)X +4095(cache.)X +555 4921(Although)N +886(the)X +1013(current)X +1270(library)X +1513(provides)X +1818(an)X +1923(LRU)X +2112(cache,)X +2345(it)X +2418(would)X +2647(be)X +2752(simple)X +2994(to)X +3085(add)X +3229(alternate)X +3534(replacement)X +3955(policies)X +4232(as)X +555 5011(suggested)N +903(by)X +1015([CHOU85])X +1408(or)X +1507(to)X +1601(provide)X +1878(multiple)X +2176(buffer)X +2405(pools)X +2610(with)X +2784(different)X +3092(policies.)X +3412(Transactions)X +3853(request)X +4116(pages)X +555 5101(from)N +736(the)X +859(buffer)X +1081(manager)X +1383(and)X +1524(keep)X +1701(them)X +3 f +1886(pinned)X +1 f +2145(to)X +2232(ensure)X +2466(that)X +2610(they)X +2772(are)X +2895(not)X +3021(written)X +3272(to)X +3358(disk)X +3515(while)X +3717(they)X +3879(are)X +4002(in)X +4088(a)X +4148(logi-)X +555 5191(cally)N +732(inconsistent)X +1135(state.)X +1343(When)X +1556(page)X +1729(replacement)X +2143(is)X +2217(necessary,)X +2571(the)X +3 f +2689(Buffer)X +2932(Manager)X +1 f +3264(\256nds)X +3439(an)X +3535(unpinned)X +3853(page)X +4025(and)X +4161(then)X +555 5281(checks)N +794(with)X +956(the)X +3 f +1074(Log)X +1227(Manager)X +1 f +1559(to)X +1641(ensure)X +1871(that)X +2011(the)X +2129(write-ahead)X +2529(protocol)X +2816(is)X +2889(enforced.)X +3 f +555 5467(3.2.3.)N +775(The)X +928(Lock)X +1121(Manager)X +1 f +755 5590(The)N +3 f +901(Lock)X +1095(Manager)X +1 f +1428(supports)X +1720(general)X +1978(purpose)X +2253(locking)X +2514(\(single)X +2753(writer,)X +2986(multiple)X +3273(readers\))X +3553(which)X +3769(is)X +3842(currently)X +4152(used)X +555 5680(to)N +638(provide)X +904(two-phase)X +1254(locking)X +1514(and)X +1650(high)X +1812(concurrency)X +2230(B-tree)X +2451(locking.)X +2751(However,)X +3086(the)X +3204(general)X +3461(purpose)X +3735(nature)X +3956(of)X +4043(the)X +4161(lock)X + +6 p +%%Page: 6 6 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630(manager)N +857(provides)X +1158(the)X +1281(ability)X +1510(to)X +1597(support)X +1862(a)X +1923(variety)X +2171(of)X +2263(locking)X +2528(protocols.)X +2890(Currently,)X +3241(all)X +3345(locks)X +3538(are)X +3661(issued)X +3885(at)X +3967(the)X +4089(granu-)X +555 720(larity)N +747(of)X +837(a)X +896(page)X +1071(\(the)X +1219(size)X +1367(of)X +1457(a)X +1516(buffer)X +1736(in)X +1821(the)X +1942(buffer)X +2161(pool\))X +2352(which)X +2570(is)X +2645(identi\256ed)X +2969(by)X +3071(two)X +3213(4-byte)X +3440(integers)X +3716(\(a)X +3801(\256le)X +3925(id)X +4009(and)X +4147(page)X +555 810(number\).)N +898(This)X +1071(provides)X +1378(the)X +1507(necessary)X +1851(information)X +2259(to)X +2351(extend)X +2595(the)X +3 f +2723(Lock)X +2926(Manager)X +1 f +3268(to)X +3360(perform)X +3649(hierarchical)X +4059(locking)X +555 900([GRAY76].)N +982(The)X +1133(current)X +1387(implementation)X +1915(does)X +2088(not)X +2216(support)X +2482(locks)X +2677(at)X +2760(other)X +2950(granularities)X +3376(and)X +3517(does)X +3689(not)X +3816(promote)X +4108(locks;)X +555 990(these)N +740(are)X +859(obvious)X +1132(future)X +1344(additions)X +1657(to)X +1739(the)X +1857(system.)X +755 1113(If)N +831(an)X +929(incoming)X +1253(lock)X +1413(request)X +1667(cannot)X +1903(be)X +2001(granted,)X +2284(the)X +2404(requesting)X +2760(process)X +3023(is)X +3098(queued)X +3352(for)X +3467(the)X +3586(lock)X +3745(and)X +3882(descheduled.)X +555 1203(When)N +769(a)X +827(lock)X +987(is)X +1062(released,)X +1368(the)X +1488(wait)X +1647(queue)X +1860(is)X +1934(traversed)X +2250(and)X +2387(any)X +2524(newly)X +2741(compatible)X +3118(locks)X +3308(are)X +3428(granted.)X +3730(Locks)X +3947(are)X +4067(located)X +555 1293(via)N +680(a)X +743(\256le)X +872(and)X +1015(page)X +1194(hash)X +1368(table)X +1551(and)X +1694(are)X +1820(chained)X +2097(both)X +2266(by)X +2373(object)X +2595(and)X +2737(by)X +2843(transaction,)X +3241(facilitating)X +3614(rapid)X +3805(traversal)X +4108(of)X +4201(the)X +555 1383(lock)N +713(table)X +889(during)X +1118(transaction)X +1490(commit)X +1754(and)X +1890(abort.)X +755 1506(The)N +907(primary)X +1188(interfaces)X +1528(to)X +1617(the)X +1742(lock)X +1907(manager)X +2211(are)X +2 f +2337(lock)X +1 f +2471(,)X +2 f +2518(unlock)X +1 f +2732(,)X +2779(and)X +2 f +2922(lock_unlock_all)X +1 f +3434(.)X +2 f +3500(Lock)X +1 f +3682(obtains)X +3939(a)X +4001(new)X +4161(lock)X +555 1596(for)N +680(a)X +747(speci\256c)X +1023(object.)X +1290(There)X +1509(are)X +1638(also)X +1797(two)X +1947(variants)X +2231(of)X +2328(the)X +2 f +2456(lock)X +1 f +2620(request,)X +2 f +2902(lock_upgrade)X +1 f +3373(and)X +2 f +3519(lock_downgrade)X +1 f +4053(,)X +4103(which)X +555 1686(allow)N +755(the)X +875(caller)X +1076(to)X +1160(atomically)X +1519(trade)X +1701(a)X +1758(lock)X +1917(of)X +2005(one)X +2142(type)X +2301(for)X +2416(a)X +2473(lock)X +2632(of)X +2720(another.)X +2 f +3022(Unlock)X +1 f +3275(releases)X +3551(a)X +3608(speci\256c)X +3874(mode)X +4073(of)X +4161(lock)X +555 1776(on)N +655(a)X +711(speci\256c)X +976(object.)X +2 f +1232(Lock_unlock_all)X +1 f +1786(releases)X +2061(all)X +2161(the)X +2279(locks)X +2468(associated)X +2818(with)X +2980(a)X +3036(speci\256c)X +3301(transaction.)X +3 f +555 1962(3.2.4.)N +775(The)X +928(Process)X +1207(Manager)X +1 f +755 2085(The)N +3 f +900(Process)X +1179(Manager)X +1 f +1511(acts)X +1656(as)X +1743(a)X +1799(user-level)X +2136(scheduler)X +2464(to)X +2546(make)X +2740(processes)X +3068(wait)X +3226(on)X +3326(unavailable)X +3716(locks)X +3905(and)X +4041(pending)X +555 2175(buffer)N +778(cache)X +988(I/O.)X +1161(For)X +1297(each)X +1470(process,)X +1756(a)X +1817(semaphore)X +2190(is)X +2268(maintained)X +2649(upon)X +2834(which)X +3055(that)X +3200(process)X +3466(waits)X +3660(when)X +3859(it)X +3928(needs)X +4136(to)X +4223(be)X +555 2265(descheduled.)N +1014(When)X +1228(a)X +1286(process)X +1549(needs)X +1754(to)X +1838(be)X +1936(run,)X +2084(its)X +2180(semaphore)X +2549(is)X +2623(cleared,)X +2897(and)X +3034(the)X +3153(operating)X +3477(system)X +3720(reschedules)X +4116(it.)X +4201(No)X +555 2355(sophisticated)N +1002(scheduling)X +1378(algorithm)X +1718(is)X +1799(applied;)X +2085(if)X +2162(the)X +2288(lock)X +2454(for)X +2576(which)X +2800(a)X +2864(process)X +3133(was)X +3286(waiting)X +3554(becomes)X +3863(available,)X +4201(the)X +555 2445(process)N +824(is)X +905(made)X +1107(runnable.)X +1456(It)X +1533(would)X +1761(have)X +1941(been)X +2121(possible)X +2411(to)X +2501(change)X +2757(the)X +2883(kernel's)X +3170(process)X +3439(scheduler)X +3775(to)X +3865(interact)X +4134(more)X +555 2535(ef\256ciently)N +900(with)X +1062(the)X +1180(lock)X +1338(manager,)X +1655(but)X +1777(doing)X +1979(so)X +2070(would)X +2290(have)X +2462(compromised)X +2918(our)X +3045(commitment)X +3469(to)X +3551(a)X +3607(user-level)X +3944(package.)X +3 f +555 2721(3.2.5.)N +775(The)X +928(Transaction)X +1361(Manager)X +1 f +755 2844(The)N +3 f +901(Transaction)X +1335(Manager)X +1 f +1668(provides)X +1965(the)X +2084(standard)X +2377(interface)X +2680(of)X +2 f +2768(txn_begin)X +1 f +3084(,)X +2 f +3125(txn_commit)X +1 f +3499(,)X +3540(and)X +2 f +3676(txn_abort)X +1 f +3987(.)X +4047(It)X +4116(keeps)X +555 2934(track)N +742(of)X +835(all)X +941(active)X +1159(transactions,)X +1588(assigns)X +1845(unique)X +2089(transaction)X +2467(identi\256ers,)X +2833(and)X +2974(directs)X +3213(the)X +3336(abort)X +3526(and)X +3667(commit)X +3936(processing.)X +555 3024(When)N +772(a)X +2 f +833(txn_begin)X +1 f +1174(is)X +1252(issued,)X +1497(the)X +3 f +1620(Transaction)X +2058(Manager)X +1 f +2395(assigns)X +2651(the)X +2773(next)X +2935(available)X +3249(transaction)X +3625(identi\256er,)X +3958(allocates)X +4263(a)X +555 3114(per-process)N +948(transaction)X +1322(structure)X +1625(in)X +1709(shared)X +1941(memory,)X +2249(increments)X +2622(the)X +2741(count)X +2940(of)X +3028(active)X +3241(transactions,)X +3665(and)X +3802(returns)X +4046(the)X +4165(new)X +555 3204(transaction)N +937(identi\256er)X +1256(to)X +1348(the)X +1476(calling)X +1724(process.)X +2034(The)X +2188(in-memory)X +2573(transaction)X +2954(structure)X +3264(contains)X +3560(a)X +3625(pointer)X +3881(into)X +4034(the)X +4161(lock)X +555 3294(table)N +734(for)X +851(locks)X +1043(held)X +1204(by)X +1307(this)X +1445(transaction,)X +1840(the)X +1961(last)X +2095(log)X +2220(sequence)X +2538(number,)X +2826(a)X +2885(transaction)X +3260(state)X +3430(\()X +2 f +3457(idle)X +1 f +(,)S +2 f +3620(running)X +1 f +3873(,)X +2 f +3915(aborting)X +1 f +4190(,)X +4232(or)X +2 f +555 3384(committing\))N +1 f +942(,)X +982(an)X +1078(error)X +1255(code,)X +1447(and)X +1583(a)X +1639(semaphore)X +2007(identi\256er.)X +755 3507(At)N +859(commit,)X +1147(the)X +3 f +1269(Transaction)X +1706(Manager)X +1 f +2042(calls)X +2 f +2213(log_commit)X +1 f +2615(to)X +2700(record)X +2929(the)X +3050(end)X +3189(of)X +3279(transaction)X +3654(and)X +3793(to)X +3878(\257ush)X +4056(the)X +4177(log.)X +555 3597(Then)N +743(it)X +810(directs)X +1047(the)X +3 f +1168(Lock)X +1364(Manager)X +1 f +1699(to)X +1784(release)X +2031(all)X +2134(locks)X +2325(associated)X +2677(with)X +2841(the)X +2961(given)X +3161(transaction.)X +3575(If)X +3651(a)X +3709(transaction)X +4083(aborts,)X +555 3687(the)N +3 f +680(Transaction)X +1120(Manager)X +1 f +1459(calls)X +1633(on)X +2 f +1739(log_unroll)X +1 f +2102(to)X +2190(read)X +2355(the)X +2479(transaction's)X +2915(log)X +3043(records)X +3306(and)X +3448(undo)X +3634(any)X +3776(modi\256cations)X +4237(to)X +555 3777(the)N +673(database.)X +1010(As)X +1119(in)X +1201(the)X +1319(commit)X +1583(case,)X +1762(it)X +1826(then)X +1984(calls)X +2 f +2151(lock_unlock_all)X +1 f +2683(to)X +2765(release)X +3009(the)X +3127(transaction's)X +3557(locks.)X +3 f +555 3963(3.2.6.)N +775(The)X +928(Record)X +1198(Manager)X +1 f +755 4086(The)N +3 f +919(Record)X +1208(Manager)X +1 f +1559(supports)X +1869(the)X +2006(abstraction)X +2397(of)X +2503(reading)X +2783(and)X +2938(writing)X +3208(records)X +3484(to)X +3585(a)X +3660(database.)X +3996(We)X +4147(have)X +555 4176(modi\256ed)N +861(the)X +981(the)X +1101(database)X +1399(access)X +1626(routines)X +3 f +1905(db)X +1 f +1993(\(3\))X +2108([BSD91])X +2418(to)X +2501(call)X +2638(the)X +2757(log,)X +2900(lock,)X +3079(and)X +3216(buffer)X +3434(managers.)X +3803(In)X +3891(order)X +4082(to)X +4165(pro-)X +555 4266(vide)N +718(functionality)X +1152(to)X +1239(perform)X +1523(undo)X +1708(and)X +1849(redo,)X +2037(the)X +3 f +2160(Record)X +2434(Manager)X +1 f +2770(de\256nes)X +3021(a)X +3081(collection)X +3421(of)X +3512(log)X +3638(record)X +3868(types)X +4061(and)X +4201(the)X +555 4356(associated)N +920(undo)X +1115(and)X +1266(redo)X +1444(routines.)X +1777(The)X +3 f +1937(Log)X +2105(Manager)X +1 f +2452(performs)X +2777(a)X +2848(table)X +3039(lookup)X +3296(on)X +3411(the)X +3543(record)X +3783(type)X +3955(to)X +4051(call)X +4201(the)X +555 4446(appropriate)N +951(routines.)X +1299(For)X +1440(example,)X +1762(the)X +1890(B-tree)X +2121(access)X +2356(method)X +2625(requires)X +2913(two)X +3062(log)X +3193(record)X +3428(types:)X +3648(insert)X +3855(and)X +4000(delete.)X +4241(A)X +555 4536(replace)N +808(operation)X +1131(is)X +1204(implemented)X +1642(as)X +1729(a)X +1785(delete)X +1997(followed)X +2302(by)X +2402(an)X +2498(insert)X +2696(and)X +2832(is)X +2905(logged)X +3143(accordingly.)X +3 f +555 4722(3.3.)N +715(Application)X +1134(Architectures)X +1 f +755 4845(The)N +907(structure)X +1215(of)X +1309(LIBTP)X +1558(allows)X +1794(application)X +2177(designers)X +2507(to)X +2596(trade)X +2784(off)X +2905(performance)X +3339(and)X +3481(protection.)X +3872(Since)X +4076(a)X +4138(large)X +555 4935(portion)N +810(of)X +901(LIBTP's)X +1205(functionality)X +1638(is)X +1715(provided)X +2024(by)X +2128(managing)X +2468(structures)X +2804(in)X +2889(shared)X +3122(memory,)X +3432(its)X +3530(structures)X +3865(are)X +3987(subject)X +4237(to)X +555 5025(corruption)N +926(by)X +1043(applications)X +1467(when)X +1678(the)X +1813(library)X +2064(is)X +2154(linked)X +2391(directly)X +2673(with)X +2852(the)X +2987(application.)X +3420(For)X +3568(this)X +3720(reason,)X +3987(LIBTP)X +4246(is)X +555 5115(designed)N +864(to)X +950(allow)X +1152(compilation)X +1558(into)X +1706(a)X +1766(separate)X +2053(server)X +2273(process)X +2537(which)X +2756(may)X +2917(be)X +3016(accessed)X +3321(via)X +3442(a)X +3501(socket)X +3729(interface.)X +4094(In)X +4184(this)X +555 5205(way)N +712(LIBTP's)X +1015(data)X +1172(structures)X +1507(are)X +1629(protected)X +1951(from)X +2130(application)X +2509(code,)X +2704(but)X +2829(communication)X +3349(overhead)X +3666(is)X +3741(increased.)X +4107(When)X +555 5295(applications)N +975(are)X +1107(trusted,)X +1377(LIBTP)X +1631(may)X +1801(be)X +1909(compiled)X +2239(directly)X +2516(into)X +2672(the)X +2802(application)X +3190(providing)X +3533(improved)X +3872(performance.)X +555 5385(Figures)N +815(two)X +955(and)X +1091(three)X +1272(show)X +1461(the)X +1579(two)X +1719(alternate)X +2016(application)X +2392(architectures.)X +755 5508(There)N +964(are)X +1084(potentially)X +1447(two)X +1588(modes)X +1818(in)X +1901(which)X +2118(one)X +2255(might)X +2462(use)X +2590(LIBTP)X +2833(in)X +2916(a)X +2972(server)X +3189(based)X +3392(architecture.)X +3832(In)X +3919(the)X +4037(\256rst,)X +4201(the)X +555 5598(server)N +778(would)X +1004(provide)X +1275(the)X +1399(capability)X +1741(to)X +1829(respond)X +2109(to)X +2197(requests)X +2486(to)X +2574(each)X +2747(of)X +2839(the)X +2962(low)X +3107(level)X +3288(modules)X +3584(\(lock,)X +3794(log,)X +3941(buffer,)X +4183(and)X +555 5688(transaction)N +944(managers\).)X +1356(Unfortunately,)X +1863(the)X +1998(performance)X +2442(of)X +2546(such)X +2730(a)X +2803(system)X +3062(is)X +3152(likely)X +3371(to)X +3470(be)X +3583(blindingly)X +3947(slow)X +4134(since)X + +7 p +%%Page: 7 7 +10 s 10 xH 0 xS 1 f +3 f +1 f +1 Dt +1864 1125 MXY +15 -26 Dl +-15 10 Dl +-14 -10 Dl +14 26 Dl +0 -266 Dl +1315 1125 MXY +15 -26 Dl +-15 10 Dl +-14 -10 Dl +14 26 Dl +0 -266 Dl +3 Dt +1133 1125 MXY +0 798 Dl +931 0 Dl +0 -798 Dl +-931 0 Dl +1 Dt +1266 1257 MXY +0 133 Dl +665 0 Dl +0 -133 Dl +-665 0 Dl +3 f +8 s +1513 1351(driver)N +1502 1617(LIBTP)N +1266 1390 MXY +0 400 Dl +665 0 Dl +0 -400 Dl +-665 0 Dl +3 Dt +1133 726 MXY +0 133 Dl +931 0 Dl +0 -133 Dl +-931 0 Dl +1 f +1029 1098(txn_abort)N +964 1015(txn_commit)N +1018 932(txn_begin)N +1910 1015(db_ops)N +3 f +1308 820(Application)N +1645(Program)X +1398 1218(Server)N +1594(Process)X +1 f +1390 986(socket)N +1569(interface)X +1 Dt +1848 967 MXY +-23 -14 Dl +8 14 Dl +-8 15 Dl +23 -15 Dl +-50 0 Dl +1324 MX +23 15 Dl +-9 -15 Dl +9 -14 Dl +-23 14 Dl +50 0 Dl +3 Dt +2862 859 MXY +0 1064 Dl +932 0 Dl +0 -1064 Dl +-932 0 Dl +1 Dt +3178 1390 MXY +24 -12 Dl +-17 0 Dl +-8 -15 Dl +1 27 Dl +150 -265 Dl +3494 1390 MXY +0 -27 Dl +-8 15 Dl +-16 1 Dl +24 11 Dl +-166 -265 Dl +3 f +3232 1617(LIBTP)N +2995 1390 MXY +0 400 Dl +666 0 Dl +0 -400 Dl +-666 0 Dl +992 MY +0 133 Dl +666 0 Dl +0 -133 Dl +-666 0 Dl +3168 1086(Application)N +1 f +2939 1201(txn_begin)N +2885 1284(txn_commit)N +2950 1368(txn_abort)N +3465 1284(db_ops)N +3 f +3155 766(Single)N +3339(Process)X +3 Dt +-1 Ds +811 2100(Figure)N +1023(2:)X +1107(Server)X +1318(Architecture.)X +1 f +1727(In)X +1811(this)X +1934(con\256guration,)X +811 2190(the)N +916(library)X +1113(is)X +1183(loaded)X +1380(into)X +1507(a)X +1562(server)X +1744(process)X +1962(which)X +2145(is)X +2214(ac-)X +811 2280(cessed)N +993(via)X +1087(a)X +1131(socket)X +1310(interface.)X +3 f +2563 2100(Figure)N +2803(3:)X +2914(Single)X +3140(Process)X +3403(Architecture.)X +1 f +3839(In)X +3950(this)X +2563 2190(con\256guration,)N +2948(the)X +3053(library)X +3250(routines)X +3483(are)X +3587(loaded)X +3784(as)X +3864(part)X +3990(of)X +2563 2280(the)N +2657(application)X +2957(and)X +3065(accessed)X +3303(via)X +3397(a)X +3441(subroutine)X +3727(interface.)X +10 s +10 f +555 2403(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 2679(modifying)N +909(a)X +966(piece)X +1157(of)X +1245(data)X +1400(would)X +1621(require)X +1870(three)X +2051(or)X +2138(possibly)X +2424(four)X +2578(separate)X +2862(communications:)X +3433(one)X +3569(to)X +3651(lock)X +3809(the)X +3927(data,)X +4101(one)X +4237(to)X +555 2769(obtain)N +781(the)X +905(data,)X +1085(one)X +1227(to)X +1315(log)X +1443(the)X +1567(modi\256cation,)X +2017(and)X +2159(possibly)X +2451(one)X +2593(to)X +2681(transmit)X +2969(the)X +3093(modi\256ed)X +3403(data.)X +3583(Figure)X +3817(four)X +3976(shows)X +4201(the)X +555 2859(relative)N +826(performance)X +1263(for)X +1387(retrieving)X +1728(a)X +1793(single)X +2013(record)X +2248(using)X +2450(the)X +2577(record)X +2812(level)X +2997(call)X +3142(versus)X +3376(using)X +3578(the)X +3705(lower)X +3917(level)X +4102(buffer)X +555 2949(management)N +987(and)X +1125(locking)X +1387(calls.)X +1616(The)X +1763(2:1)X +1887(ratio)X +2056(observed)X +2367(in)X +2450(the)X +2569(single)X +2781(process)X +3043(case)X +3203(re\257ects)X +3456(the)X +3575(additional)X +3916(overhead)X +4232(of)X +555 3039(parsing)N +819(eight)X +1006(commands)X +1380(rather)X +1595(than)X +1760(one)X +1903(while)X +2108(the)X +2233(3:1)X +2362(ratio)X +2536(observed)X +2853(in)X +2942(the)X +3067(client/server)X +3491(architecture)X +3898(re\257ects)X +4157(both)X +555 3129(the)N +679(parsing)X +941(and)X +1083(the)X +1207(communication)X +1731(overheard.)X +2118(Although)X +2445(there)X +2631(may)X +2794(be)X +2895(applications)X +3307(which)X +3528(could)X +3731(tolerate)X +3997(such)X +4169(per-)X +555 3219(formance,)N +904(it)X +973(seems)X +1194(far)X +1309(more)X +1499(feasible)X +1774(to)X +1861(support)X +2126(a)X +2187(higher)X +2417(level)X +2597(interface,)X +2923(such)X +3094(as)X +3185(that)X +3329(provided)X +3638(by)X +3742(a)X +3802(query)X +4009(language)X +555 3309(\()N +2 f +582(e.g.)X +1 f +718(SQL)X +889([SQL86]\).)X +755 3432(Although)N +1081(LIBTP)X +1327(does)X +1498(not)X +1624(have)X +1800(an)X +1900(SQL)X +2075(parser,)X +2316(we)X +2433(have)X +2608(built)X +2777(a)X +2836(server)X +3056(application)X +3435(using)X +3631(the)X +3752(toolkit)X +3983(command)X +555 3522(language)N +882(\(TCL\))X +1124([OUST90].)X +1544(The)X +1706(server)X +1940(supports)X +2248(a)X +2321(command)X +2674(line)X +2831(interface)X +3150(similar)X +3409(to)X +3508(the)X +3643(subroutine)X +4017(interface)X +555 3612(de\256ned)N +811(in)X +3 f +893(db)X +1 f +981(\(3\).)X +1135(Since)X +1333(it)X +1397(is)X +1470(based)X +1673(on)X +1773(TCL,)X +1964(it)X +2028(provides)X +2324(control)X +2571(structures)X +2903(as)X +2990(well.)X +3 f +555 3798(4.)N +655(Implementation)X +1 f +3 f +555 3984(4.1.)N +715(Locking)X +1014(and)X +1162(Deadlock)X +1502(Detection)X +1 f +755 4107(LIBTP)N +1007(uses)X +1175(two-phase)X +1535(locking)X +1805(for)X +1929(user)X +2093(data.)X +2297(Strictly)X +2562(speaking,)X +2897(the)X +3024(two)X +3173(phases)X +3416(in)X +3507(two-phase)X +3866(locking)X +4135(are)X +4263(a)X +3 f +555 4197(grow)N +1 f +756(phase,)X +986(during)X +1221(which)X +1443(locks)X +1638(are)X +1763(acquired,)X +2086(and)X +2228(a)X +3 f +2290(shrink)X +1 f +2537(phase,)X +2766(during)X +3001(which)X +3223(locks)X +3418(are)X +3543(released.)X +3873(No)X +3997(lock)X +4161(may)X +555 4287(ever)N +720(be)X +822(acquired)X +1124(during)X +1358(the)X +1481(shrink)X +1706(phase.)X +1954(The)X +2104(grow)X +2294(phase)X +2502(lasts)X +2669(until)X +2840(the)X +2963(\256rst)X +3112(release,)X +3381(which)X +3602(marks)X +3823(the)X +3946(start)X +4109(of)X +4201(the)X +555 4377(shrink)N +780(phase.)X +1028(In)X +1120(practice,)X +1420(the)X +1543(grow)X +1733(phase)X +1941(lasts)X +2108(for)X +2227(the)X +2350(duration)X +2642(of)X +2734(a)X +2795(transaction)X +3172(in)X +3259(LIBTP)X +3506(and)X +3647(in)X +3734(commercial)X +4138(data-)X +555 4467(base)N +721(systems.)X +1037(The)X +1184(shrink)X +1406(phase)X +1611(takes)X +1798(place)X +1990(during)X +2221(transaction)X +2595(commit)X +2861(or)X +2950(abort.)X +3177(This)X +3341(means)X +3568(that)X +3710(locks)X +3901(are)X +4022(acquired)X +555 4557(on)N +655(demand)X +929(during)X +1158(the)X +1276(lifetime)X +1545(of)X +1632(a)X +1688(transaction,)X +2080(and)X +2216(held)X +2374(until)X +2540(commit)X +2804(time,)X +2986(at)X +3064(which)X +3280(point)X +3464(all)X +3564(locks)X +3753(are)X +3872(released.)X +755 4680(If)N +832(multiple)X +1121(transactions)X +1527(are)X +1649(active)X +1864(concurrently,)X +2313(deadlocks)X +2657(can)X +2792(occur)X +2994(and)X +3133(must)X +3311(be)X +3410(detected)X +3701(and)X +3840(resolved.)X +4174(The)X +555 4770(lock)N +715(table)X +893(can)X +1027(be)X +1125(thought)X +1391(of)X +1480(as)X +1569(a)X +1627(representation)X +2104(of)X +2193(a)X +2251(directed)X +2532(graph.)X +2777(The)X +2924(nodes)X +3133(in)X +3216(the)X +3335(graph)X +3539(are)X +3659(transactions.)X +4103(Edges)X +555 4860(represent)N +878(the)X +3 f +1004(waits-for)X +1 f +1340(relation)X +1613(between)X +1909(transactions;)X +2342(if)X +2419(transaction)X +2 f +2799(A)X +1 f +2876(is)X +2957(waiting)X +3225(for)X +3347(a)X +3411(lock)X +3577(held)X +3743(by)X +3851(transaction)X +2 f +4230(B)X +1 f +4279(,)X +555 4950(then)N +716(a)X +775(directed)X +1057(edge)X +1232(exists)X +1437(from)X +2 f +1616(A)X +1 f +1687(to)X +2 f +1771(B)X +1 f +1842(in)X +1926(the)X +2046(graph.)X +2291(A)X +2371(deadlock)X +2683(exists)X +2887(if)X +2958(a)X +3016(cycle)X +3208(appears)X +3476(in)X +3560(the)X +3680(graph.)X +3925(By)X +4040(conven-)X +555 5040(tion,)N +719(no)X +819(transaction)X +1191(ever)X +1350(waits)X +1539(for)X +1653(a)X +1709(lock)X +1867(it)X +1931(already)X +2188(holds,)X +2401(so)X +2492(re\257exive)X +2793(edges)X +2996(are)X +3115(impossible.)X +755 5163(A)N +836(distinguished)X +1285(process)X +1549(monitors)X +1856(the)X +1977(lock)X +2138(table,)X +2337(searching)X +2668(for)X +2785(cycles.)X +3048(The)X +3195(frequency)X +3539(with)X +3703(which)X +3921(this)X +4058(process)X +555 5253(runs)N +716(is)X +792(user-settable;)X +1243(for)X +1360(the)X +1481(multi-user)X +1833(tests)X +1998(discussed)X +2328(in)X +2413(section)X +2663(5.1.2,)X +2866(it)X +2933(has)X +3063(been)X +3238(set)X +3350(to)X +3435(wake)X +3628(up)X +3731(every)X +3932(second,)X +4197(but)X +555 5343(more)N +742(sophisticated)X +1182(schedules)X +1516(are)X +1636(certainly)X +1938(possible.)X +2261(When)X +2474(a)X +2531(cycle)X +2722(is)X +2796(detected,)X +3105(one)X +3242(of)X +3330(the)X +3449(transactions)X +3853(in)X +3936(the)X +4055(cycle)X +4246(is)X +555 5433(nominated)N +917(and)X +1057(aborted.)X +1362(When)X +1578(the)X +1700(transaction)X +2076(aborts,)X +2315(it)X +2382(rolls)X +2547(back)X +2722(its)X +2820(changes)X +3102(and)X +3241(releases)X +3519(its)X +3617(locks,)X +3829(thereby)X +4093(break-)X +555 5523(ing)N +677(the)X +795(cycle)X +985(in)X +1067(the)X +1185(graph.)X + +8 p +%%Page: 8 8 +10 s 10 xH 0 xS 1 f +3 f +1 f +4 Ds +1 Dt +1866 865 MXY +1338 0 Dl +1866 1031 MXY +1338 0 Dl +1866 1199 MXY +1338 0 Dl +1866 1366 MXY +1338 0 Dl +1866 1533 MXY +1338 0 Dl +1866 1701 MXY +1338 0 Dl +-1 Ds +5 Dt +1866 1868 MXY +1338 0 Dl +1 Dt +1 Di +2981 MX + 2981 1868 lineto + 2981 1575 lineto + 3092 1575 lineto + 3092 1868 lineto + 2981 1868 lineto +closepath 21 2981 1575 3092 1868 Dp +2646 MX + 2646 1868 lineto + 2646 949 lineto + 2758 949 lineto + 2758 1868 lineto + 2646 1868 lineto +closepath 14 2646 949 2758 1868 Dp +2312 MX + 2312 1868 lineto + 2312 1701 lineto + 2423 1701 lineto + 2423 1868 lineto + 2312 1868 lineto +closepath 3 2312 1701 2423 1868 Dp +1977 MX + 1977 1868 lineto + 1977 1512 lineto + 2089 1512 lineto + 2089 1868 lineto + 1977 1868 lineto +closepath 19 1977 1512 2089 1868 Dp +3 f +2640 2047(Client/Server)N +1957(Single)X +2185(Process)X +7 s +2957 1957(record)N +2570(component)X +2289(record)X +1890(components)X +1733 1724(.1)N +1733 1556(.2)N +1733 1389(.3)N +1733 1222(.4)N +1733 1055(.5)N +1733 889(.6)N +1590 726(Elapsed)N +1794(Time)X +1613 782(\(in)N +1693(seconds\))X +3 Dt +-1 Ds +8 s +555 2255(Figure)N +756(4:)X +829(Comparison)X +1187(of)X +1260(High)X +1416(and)X +1540(Low)X +1681(Level)X +1850(Interfaces.)X +1 f +2174(Elapsed)X +2395(time)X +2528(in)X +2597(seconds)X +2818(to)X +2887(perform)X +3111(a)X +3158(single)X +3330(record)X +3511(retrieval)X +3742(from)X +3885(a)X +3932(command)X +4203(line)X +555 2345(\(rather)N +751(than)X +888(a)X +943(procedural)X +1241(interface\))X +1510(is)X +1579(shown)X +1772(on)X +1862(the)X +1966(y)X +2024(axis.)X +2185(The)X +2310(``component'')X +2704(numbers)X +2950(re\257ect)X +3135(the)X +3239(timings)X +3458(when)X +3622(the)X +3726(record)X +3914(is)X +3983(retrieved)X +4235(by)X +555 2435(separate)N +785(calls)X +924(to)X +996(the)X +1096(lock)X +1228(manager)X +1469(and)X +1583(buffer)X +1760(manager)X +2001(while)X +2165(the)X +2264(``record'')X +2531(timings)X +2745(were)X +2889(obtained)X +3130(by)X +3215(using)X +3375(a)X +3424(single)X +3598(call)X +3711(to)X +3782(the)X +3881(record)X +4064(manager.)X +555 2525(The)N +674(2:1)X +776(ratio)X +913(observed)X +1163(for)X +1257(the)X +1355(single)X +1528(process)X +1739(case)X +1868(is)X +1930(a)X +1977(re\257ection)X +2237(of)X +2309(the)X +2406(parsing)X +2613(overhead)X +2865(for)X +2958(executing)X +3225(eight)X +3372(separate)X +3599(commands)X +3895(rather)X +4062(than)X +4191(one.)X +555 2615(The)N +673(additional)X +948(factor)X +1115(of)X +1187(one)X +1298(re\257ected)X +1536(in)X +1605(the)X +1702(3:1)X +1803(ratio)X +1939(for)X +2031(the)X +2127(client/server)X +2460(architecture)X +2794(is)X +2855(due)X +2965(to)X +3033(the)X +3129(communication)X +3545(overhead.)X +3828(The)X +3945(true)X +4062(ratios)X +4222(are)X +555 2705(actually)N +775(worse)X +945(since)X +1094(the)X +1190(component)X +1492(timings)X +1703(do)X +1785(not)X +1884(re\257ect)X +2060(the)X +2155(search)X +2334(times)X +2490(within)X +2671(each)X +2804(page)X +2941(or)X +3011(the)X +3106(time)X +3237(required)X +3466(to)X +3533(transmit)X +3760(the)X +3855(page)X +3992(between)X +4221(the)X +555 2795(two)N +667(processes.)X +10 s +10 f +555 2885(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 3161(4.2.)N +715(Group)X +961(Commit)X +1 f +755 3284(Since)N +959(the)X +1083(log)X +1211(must)X +1392(be)X +1494(\257ushed)X +1751(to)X +1839(disk)X +1997(at)X +2080(commit)X +2349(time,)X +2536(disk)X +2694(bandwidth)X +3057(fundamentally)X +3545(limits)X +3751(the)X +3874(rate)X +4020(at)X +4103(which)X +555 3374(transactions)N +959(complete.)X +1314(Since)X +1513(most)X +1688(transactions)X +2091(write)X +2276(only)X +2438(a)X +2494(few)X +2635(small)X +2828(records)X +3085(to)X +3167(the)X +3285(log,)X +3427(the)X +3545(last)X +3676(page)X +3848(of)X +3935(the)X +4053(log)X +4175(will)X +555 3464(be)N +658(\257ushed)X +916(once)X +1095(by)X +1202(every)X +1408(transaction)X +1787(which)X +2010(writes)X +2233(to)X +2322(it.)X +2433(In)X +2527(the)X +2652(naive)X +2853(implementation,)X +3402(these)X +3593(\257ushes)X +3841(would)X +4067(happen)X +555 3554(serially.)N +755 3677(LIBTP)N +1008(uses)X +3 f +1177(group)X +1412(commit)X +1 f +1702([DEWI84])X +2077(in)X +2170(order)X +2371(to)X +2464(amortize)X +2775(the)X +2903(cost)X +3062(of)X +3159(one)X +3305(synchronous)X +3740(disk)X +3903(write)X +4098(across)X +555 3767(multiple)N +851(transactions.)X +1304(Group)X +1539(commit)X +1812(provides)X +2117(a)X +2182(way)X +2345(for)X +2468(a)X +2533(group)X +2749(of)X +2845(transactions)X +3257(to)X +3348(commit)X +3621(simultaneously.)X +4174(The)X +555 3857(\256rst)N +709(several)X +967(transactions)X +1380(to)X +1472(commit)X +1745(write)X +1939(their)X +2115(changes)X +2403(to)X +2494(the)X +2621(in-memory)X +3006(log)X +3137(page,)X +3338(then)X +3505(sleep)X +3699(on)X +3808(a)X +3873(distinguished)X +555 3947(semaphore.)N +966(Later,)X +1179(a)X +1238(committing)X +1629(transaction)X +2004(\257ushes)X +2249(the)X +2370(page)X +2545(to)X +2630(disk,)X +2805(and)X +2943(wakes)X +3166(up)X +3268(all)X +3370(its)X +3467(sleeping)X +3756(peers.)X +3988(The)X +4135(point)X +555 4037(at)N +635(which)X +853(changes)X +1134(are)X +1255(actually)X +1531(written)X +1780(is)X +1855(determined)X +2238(by)X +2340(three)X +2523(thresholds.)X +2914(The)X +3061(\256rst)X +3207(is)X +3281(the)X +2 f +3400(group)X +3612(threshold)X +1 f +3935(and)X +4072(de\256nes)X +555 4127(the)N +674(minimum)X +1005(number)X +1271(of)X +1359(transactions)X +1763(which)X +1979(must)X +2154(be)X +2250(active)X +2462(in)X +2544(the)X +2662(system)X +2904(before)X +3130(transactions)X +3533(are)X +3652(forced)X +3878(to)X +3960(participate)X +555 4217(in)N +646(a)X +711(group)X +927(commit.)X +1240(The)X +1394(second)X +1646(is)X +1728(the)X +2 f +1855(wait)X +2021(threshold)X +1 f +2352(which)X +2577(is)X +2658(expressed)X +3003(as)X +3098(the)X +3224(percentage)X +3601(of)X +3696(active)X +3916(transactions)X +555 4307(waiting)N +826(to)X +919(be)X +1026(committed.)X +1439(The)X +1595(last)X +1737(is)X +1821(the)X +2 f +1950(logdelay)X +2257(threshold)X +1 f +2590(which)X +2816(indicates)X +3131(how)X +3299(much)X +3507(un\257ushed)X +3848(log)X +3980(should)X +4223(be)X +555 4397(allowed)N +829(to)X +911(accumulate)X +1297(before)X +1523(a)X +1579(waiting)X +1839(transaction's)X +2289(commit)X +2553(record)X +2779(is)X +2852(\257ushed.)X +755 4520(Group)N +981(commit)X +1246(can)X +1379(substantially)X +1803(improve)X +2090(performance)X +2517(for)X +2631(high-concurrency)X +3218(environments.)X +3714(If)X +3788(only)X +3950(a)X +4006(few)X +4147(tran-)X +555 4610(sactions)N +836(are)X +957(running,)X +1248(it)X +1314(is)X +1389(unlikely)X +1673(to)X +1757(improve)X +2046(things)X +2263(at)X +2343(all.)X +2485(The)X +2632(crossover)X +2962(point)X +3148(is)X +3223(the)X +3343(point)X +3529(at)X +3609(which)X +3827(the)X +3947(transaction)X +555 4700(commit)N +823(rate)X +968(is)X +1045(limited)X +1295(by)X +1399(the)X +1521(bandwidth)X +1883(of)X +1974(the)X +2096(device)X +2330(on)X +2434(which)X +2654(the)X +2776(log)X +2902(resides.)X +3189(If)X +3267(processes)X +3599(are)X +3722(trying)X +3937(to)X +4023(\257ush)X +4201(the)X +555 4790(log)N +677(faster)X +876(than)X +1034(the)X +1152(log)X +1274(disk)X +1427(can)X +1559(accept)X +1785(data,)X +1959(then)X +2117(group)X +2324(commit)X +2588(will)X +2732(increase)X +3016(the)X +3134(commit)X +3398(rate.)X +3 f +555 4976(4.3.)N +715(Kernel)X +971(Intervention)X +1418(for)X +1541(Synchronization)X +1 f +755 5099(Since)N +954(LIBTP)X +1197(uses)X +1356(data)X +1511(in)X +1594(shared)X +1825(memory)X +2113(\()X +2 f +2140(e.g.)X +1 f +2277(the)X +2395(lock)X +2553(table)X +2729(and)X +2865(buffer)X +3082(pool\))X +3271(it)X +3335(must)X +3510(be)X +3606(possible)X +3888(for)X +4002(a)X +4058(process)X +555 5189(to)N +640(acquire)X +900(exclusive)X +1226(access)X +1454(to)X +1538(shared)X +1770(data)X +1926(in)X +2010(order)X +2202(to)X +2286(prevent)X +2549(corruption.)X +2945(In)X +3034(addition,)X +3338(the)X +3458(process)X +3721(manager)X +4020(must)X +4197(put)X +555 5279(processes)N +886(to)X +971(sleep)X +1159(when)X +1356(the)X +1477(lock)X +1638(or)X +1728(buffer)X +1948(they)X +2109(request)X +2364(is)X +2440(in)X +2525(use)X +2655(by)X +2758(some)X +2950(other)X +3138(process.)X +3441(In)X +3530(the)X +3650(LIBTP)X +3894(implementa-)X +555 5385(tion)N +705(under)X +914(Ultrix)X +1131(4.0)X +7 s +5353(2)Y +10 s +5385(,)Y +1305(we)X +1424(use)X +1556(System)X +1816(V)X +1899(semaphores)X +2303(to)X +2390(provide)X +2660(this)X +2800(synchronization.)X +3377(Semaphores)X +3794(implemented)X +4237(in)X +555 5475(this)N +701(fashion)X +968(turn)X +1128(out)X +1261(to)X +1354(be)X +1461(an)X +1568(expensive)X +1920(choice)X +2161(for)X +2285(synchronization,)X +2847(because)X +3132(each)X +3310(access)X +3546(traps)X +3732(to)X +3824(the)X +3952(kernel)X +4183(and)X +8 s +10 f +555 5547(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5625(2)N +8 s +763 5650(Ultrix)N +932(and)X +1040(DEC)X +1184(are)X +1277(trademarks)X +1576(of)X +1645(Digital)X +1839(Equipment)X +2136(Corporation.)X + +9 p +%%Page: 9 9 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(executes)N +852(atomically)X +1210(there.)X +755 753(On)N +878(architectures)X +1314(that)X +1459(support)X +1724(atomic)X +1967(test-and-set,)X +2382(a)X +2443(much)X +2646(better)X +2854(choice)X +3089(would)X +3314(be)X +3415(to)X +3502(attempt)X +3767(to)X +3854(obtain)X +4079(a)X +4139(spin-)X +555 843(lock)N +714(with)X +877(a)X +934(test-and-set,)X +1345(and)X +1482(issue)X +1663(a)X +1720(system)X +1963(call)X +2100(only)X +2263(if)X +2333(the)X +2452(spinlock)X +2744(is)X +2818(unavailable.)X +3249(Since)X +3447(virtually)X +3738(all)X +3838(semaphores)X +4237(in)X +555 933(LIBTP)N +801(are)X +924(uncontested)X +1330(and)X +1469(are)X +1591(held)X +1752(for)X +1869(very)X +2035(short)X +2218(periods)X +2477(of)X +2567(time,)X +2752(this)X +2890(would)X +3113(improve)X +3403(performance.)X +3873(For)X +4007(example,)X +555 1023(processes)N +885(must)X +1062(acquire)X +1321(exclusive)X +1646(access)X +1874(to)X +1958(buffer)X +2177(pool)X +2341(metadata)X +2653(in)X +2737(order)X +2929(to)X +3013(\256nd)X +3159(and)X +3297(pin)X +3421(a)X +3479(buffer)X +3698(in)X +3781(shared)X +4012(memory.)X +555 1113(This)N +721(semaphore)X +1093(is)X +1170(requested)X +1502(most)X +1681(frequently)X +2034(in)X +2119(LIBTP.)X +2404(However,)X +2742(once)X +2917(it)X +2984(is)X +3060(acquired,)X +3380(only)X +3545(a)X +3604(few)X +3748(instructions)X +4144(must)X +555 1203(be)N +656(executed)X +966(before)X +1196(it)X +1264(is)X +1341(released.)X +1669(On)X +1791(one)X +1931(architecture)X +2335(for)X +2453(which)X +2673(we)X +2791(were)X +2972(able)X +3130(to)X +3216(gather)X +3441(detailed)X +3719(pro\256ling)X +4018(informa-)X +555 1293(tion,)N +729(the)X +857(cost)X +1015(of)X +1111(the)X +1238(semaphore)X +1615(calls)X +1791(accounted)X +2146(for)X +2269(25%)X +2445(of)X +2541(the)X +2668(total)X +2839(time)X +3010(spent)X +3208(updating)X +3517(the)X +3644(metadata.)X +4003(This)X +4174(was)X +555 1383(fairly)N +749(consistent)X +1089(across)X +1310(most)X +1485(of)X +1572(the)X +1690(critical)X +1933(sections.)X +755 1506(In)N +848(an)X +950(attempt)X +1216(to)X +1304(quantify)X +1597(the)X +1720(overhead)X +2040(of)X +2132(kernel)X +2358(synchronization,)X +2915(we)X +3034(ran)X +3162(tests)X +3329(on)X +3434(a)X +3495(version)X +3756(of)X +3848(4.3BSD-Reno)X +555 1596(which)N +786(had)X +937(been)X +1123(modi\256ed)X +1441(to)X +1537(support)X +1811(binary)X +2050(semaphore)X +2432(facilities)X +2742(similar)X +2998(to)X +3094(those)X +3297(described)X +3639(in)X +3735([POSIX91].)X +4174(The)X +555 1686(hardware)N +880(platform)X +1181(consisted)X +1504(of)X +1595(an)X +1695(HP300)X +1941(\(33MHz)X +2237(MC68030\))X +2612(workstation)X +3014(with)X +3180(16MBytes)X +3537(of)X +3628(main)X +3812(memory,)X +4123(and)X +4263(a)X +555 1776(600MByte)N +920(HP7959)X +1205(SCSI)X +1396(disk)X +1552(\(17)X +1682(ms)X +1798(average)X +2072(seek)X +2237(time\).)X +2468(We)X +2602(ran)X +2727(three)X +2910(sets)X +3052(of)X +3141(comparisons)X +3568(which)X +3786(are)X +3907(summarized)X +555 1866(in)N +645(\256gure)X +860(\256ve.)X +1028(In)X +1123(each)X +1299(comparison)X +1701(we)X +1823(ran)X +1954(two)X +2102(tests,)X +2292(one)X +2436(using)X +2637(hardware)X +2965(spinlocks)X +3295(and)X +3438(the)X +3563(other)X +3755(using)X +3955(kernel)X +4183(call)X +555 1956(synchronization.)N +1135(Since)X +1341(the)X +1467(test)X +1606(was)X +1758(run)X +1892(single-user,)X +2291(none)X +2474(of)X +2568(the)X +2693(the)X +2818(locks)X +3014(were)X +3198(contested.)X +3568(In)X +3662(the)X +3787(\256rst)X +3938(two)X +4085(sets)X +4232(of)X +555 2046(tests,)N +743(we)X +863(ran)X +992(the)X +1116(full)X +1253(transaction)X +1631(processing)X +2000(benchmark)X +2383(described)X +2717(in)X +2805(section)X +3058(5.1.)X +3223(In)X +3315(one)X +3456(case)X +3620(we)X +3739(ran)X +3867(with)X +4034(both)X +4201(the)X +555 2136(database)N +854(and)X +992(log)X +1116(on)X +1218(the)X +1338(same)X +1525(disk)X +1680(\(1)X +1769(Disk\))X +1969(and)X +2107(in)X +2191(the)X +2311(second,)X +2576(we)X +2692(ran)X +2817(with)X +2981(the)X +3101(database)X +3400(and)X +3538(log)X +3661(on)X +3762(separate)X +4047(disks)X +4232(\(2)X +555 2226(Disk\).)N +800(In)X +894(the)X +1019(last)X +1157(test,)X +1315(we)X +1436(wanted)X +1695(to)X +1784(create)X +2004(a)X +2067(CPU)X +2249(bound)X +2476(environment,)X +2928(so)X +3026(we)X +3146(used)X +3319(a)X +3381(database)X +3684(small)X +3883(enough)X +4145(to)X +4233(\256t)X +555 2316(completely)N +941(in)X +1033(the)X +1161(cache)X +1375(and)X +1521(issued)X +1751(read-only)X +2089(transactions.)X +2541(The)X +2695(results)X +2933(in)X +3024(\256gure)X +3240(\256ve)X +3389(express)X +3659(the)X +3786(kernel)X +4016(call)X +4161(syn-)X +555 2406(chronization)N +980(performance)X +1411(as)X +1502(a)X +1562(percentage)X +1935(of)X +2026(the)X +2148(spinlock)X +2443(performance.)X +2914(For)X +3049(example,)X +3365(in)X +3451(the)X +3573(1)X +3637(disk)X +3794(case,)X +3977(the)X +4098(kernel)X +555 2496(call)N +697(implementation)X +1225(achieved)X +1537(4.4)X +1662(TPS)X +1824(\(transactions)X +2259(per)X +2387(second\))X +2662(while)X +2865(the)X +2988(semaphore)X +3361(implementation)X +3888(achieved)X +4199(4.6)X +555 2586(TPS,)N +735(and)X +874(the)X +995(relative)X +1259(performance)X +1689(of)X +1779(the)X +1900(kernel)X +2123(synchronization)X +2657(is)X +2732(96%)X +2901(that)X +3043(of)X +3132(the)X +3252(spinlock)X +3545(\(100)X +3714(*)X +3776(4.4)X +3898(/)X +3942(4.6\).)X +4111(There)X +555 2676(are)N +674(two)X +814(striking)X +1078(observations)X +1503(from)X +1679(these)X +1864(results:)X +10 f +635 2799(g)N +1 f +755(even)X +927(when)X +1121(the)X +1239(system)X +1481(is)X +1554(disk)X +1707(bound,)X +1947(the)X +2065(CPU)X +2240(cost)X +2389(of)X +2476(synchronization)X +3008(is)X +3081(noticeable,)X +3451(and)X +10 f +635 2922(g)N +1 f +755(when)X +949(we)X +1063(are)X +1182(CPU)X +1357(bound,)X +1597(the)X +1715(difference)X +2062(is)X +2135(dramatic)X +2436(\(67%\).)X +3 f +555 3108(4.4.)N +715(Transaction)X +1148(Protected)X +1499(Access)X +1747(Methods)X +1 f +755 3231(The)N +903(B-tree)X +1127(and)X +1266(\256xed)X +1449(length)X +1671(recno)X +1872(\(record)X +2127(number\))X +2421(access)X +2649(methods)X +2942(have)X +3116(been)X +3290(modi\256ed)X +3596(to)X +3680(provide)X +3947(transaction)X +555 3321(protection.)N +941(Whereas)X +1244(the)X +1363(previously)X +1722(published)X +2054(interface)X +2357(to)X +2440(the)X +2559(access)X +2786(routines)X +3065(had)X +3202(separate)X +3487(open)X +3664(calls)X +3832(for)X +3946(each)X +4114(of)X +4201(the)X +10 f +555 3507(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 Dt +2978 5036 MXY + 2978 5036 lineto + 2978 4662 lineto + 3093 4662 lineto + 3093 5036 lineto + 2978 5036 lineto +closepath 21 2978 4662 3093 5036 Dp +2518 MX + 2518 5036 lineto + 2518 3960 lineto + 2633 3960 lineto + 2633 5036 lineto + 2518 5036 lineto +closepath 3 2518 3960 2633 5036 Dp +2059 MX + 2059 5036 lineto + 2059 3946 lineto + 2174 3946 lineto + 2174 5036 lineto + 2059 5036 lineto +closepath 1 2059 3946 2174 5036 Dp +3 f +7 s +2912 5141(Read-only)N +1426 3767(of)N +1487(Spinlock)X +1710(Throughput)X +1480 3710(Throughput)N +1786(as)X +1850(a)X +1892(%)X +11 s +1670 4843(20)N +1670 4614(40)N +1670 4384(60)N +1670 4155(80)N +1648 3925(100)N +7 s +2041 5141(1)N +2083(Disk)X +2490(2)X +2532(Disks)X +5 Dt +1829 5036 MXY +1494 0 Dl +4 Ds +1 Dt +1829 4806 MXY +1494 0 Dl +1829 4577 MXY +1494 0 Dl +1829 4347 MXY +1494 0 Dl +1829 4118 MXY +1494 0 Dl +1829 3888 MXY +1494 0 Dl +3 Dt +-1 Ds +8 s +555 5360(Figure)N +753(5:)X +823(Kernel)X +1028(Overhead)X +1315(for)X +1413(System)X +1625(Call)X +1756(Synchronization.)X +1 f +2254(The)X +2370(performance)X +2708(of)X +2778(the)X +2873(kernel)X +3049(call)X +3158(synchronization)X +3583(is)X +3643(expressed)X +3911(as)X +3980(a)X +4024(percentage)X +555 5450(of)N +625(the)X +720(spinlock)X +954(synchronization)X +1379(performance.)X +1749(In)X +1819(disk)X +1943(bound)X +2120(cases)X +2271(\(1)X +2341(Disk)X +2479(and)X +2588(2)X +2637(Disks\),)X +2837(we)X +2928(see)X +3026(that)X +3139(4-6%)X +3294(of)X +3364(the)X +3459(performance)X +3797(is)X +3857(lost)X +3966(due)X +4074(to)X +4140(kernel)X +555 5540(calls)N +688(while)X +846(in)X +912(the)X +1006(CPU)X +1147(bound)X +1323(case,)X +1464(we)X +1554(have)X +1690(lost)X +1799(67%)X +1932(of)X +2001(the)X +2095(performance)X +2432(due)X +2540(to)X +2606(kernel)X +2781(calls.)X + +10 p +%%Page: 10 10 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(access)N +781(methods,)X +1092(we)X +1206(now)X +1364(have)X +1536(an)X +1632(integrated)X +1973(open)X +2149(call)X +2285(with)X +2447(the)X +2565(following)X +2896(calling)X +3134(conventions:)X +7 f +715 753(DB)N +859(*dbopen)X +1243(\(const)X +1579(char)X +1819(*file,)X +2155(int)X +2347(flags,)X +2683(int)X +2875(mode,)X +3163(DBTYPE)X +3499(type,)X +1291 843(int)N +1483(dbflags,)X +1915(const)X +2203(void)X +2443(*openinfo\))X +1 f +555 966(where)N +2 f +774(\256le)X +1 f +894(is)X +969(the)X +1089(name)X +1285(of)X +1374(the)X +1494(\256le)X +1618(being)X +1818(opened,)X +2 f +2092(\257ags)X +1 f +2265(and)X +2 f +2402(mode)X +1 f +2597(are)X +2717(the)X +2836(standard)X +3129(arguments)X +3484(to)X +3 f +3567(open)X +1 f +3731(\(2\),)X +2 f +3866(type)X +1 f +4021(is)X +4095(one)X +4232(of)X +555 1056(the)N +680(access)X +913(method)X +1180(types,)X +2 f +1396(db\257ags)X +1 f +1654(indicates)X +1966(the)X +2091(mode)X +2296(of)X +2390(the)X +2515(buffer)X +2739(pool)X +2907(and)X +3049(transaction)X +3427(protection,)X +3798(and)X +2 f +3940(openinfo)X +1 f +4246(is)X +555 1146(the)N +681(access)X +915(method)X +1183(speci\256c)X +1456(information.)X +1902(Currently,)X +2257(the)X +2383(possible)X +2673(values)X +2906(for)X +2 f +3028(db\257ags)X +1 f +3287(are)X +3414(DB_SHARED)X +3912(and)X +4055(DB_TP)X +555 1236(indicating)N +895(that)X +1035(buffers)X +1283(should)X +1516(be)X +1612(kept)X +1770(in)X +1852(a)X +1908(shared)X +2138(buffer)X +2355(pool)X +2517(and)X +2653(that)X +2793(the)X +2911(\256le)X +3033(should)X +3266(be)X +3362(transaction)X +3734(protected.)X +755 1359(The)N +900(modi\256cations)X +1355(required)X +1643(to)X +1725(add)X +1861(transaction)X +2233(protection)X +2578(to)X +2660(an)X +2756(access)X +2982(method)X +3242(are)X +3361(quite)X +3541(simple)X +3774(and)X +3910(localized.)X +715 1482(1.)N +795(Replace)X +1074(\256le)X +2 f +1196(open)X +1 f +1372(with)X +2 f +1534(buf_open)X +1 f +1832(.)X +715 1572(2.)N +795(Replace)X +1074(\256le)X +2 f +1196(read)X +1 f +1363(and)X +2 f +1499(write)X +1 f +1683(calls)X +1850(with)X +2012(buffer)X +2229(manager)X +2526(calls)X +2693(\()X +2 f +2720(buf_get)X +1 f +(,)S +2 f +3000(buf_unpin)X +1 f +3324(\).)X +715 1662(3.)N +795(Precede)X +1070(buffer)X +1287(manager)X +1584(calls)X +1751(with)X +1913(an)X +2009(appropriate)X +2395(\(read)X +2581(or)X +2668(write\))X +2880(lock)X +3038(call.)X +715 1752(4.)N +795(Before)X +1034(updates,)X +1319(issue)X +1499(a)X +1555(logging)X +1819(operation.)X +715 1842(5.)N +795(After)X +985(data)X +1139(have)X +1311(been)X +1483(accessed,)X +1805(release)X +2049(the)X +2167(buffer)X +2384(manager)X +2681(pin.)X +715 1932(6.)N +795(Provide)X +1064(undo/redo)X +1409(code)X +1581(for)X +1695(each)X +1863(type)X +2021(of)X +2108(log)X +2230(record)X +2456(de\256ned.)X +555 2071(The)N +702(following)X +1035(code)X +1209(fragments)X +1552(show)X +1743(how)X +1903(to)X +1987(transaction)X +2361(protect)X +2606(several)X +2856(updates)X +3123(to)X +3206(a)X +3263(B-tree.)X +7 s +3484 2039(3)N +10 s +3533 2071(In)N +3621(the)X +3740(unprotected)X +4140(case,)X +555 2161(an)N +652(open)X +829(call)X +966(is)X +1040(followed)X +1346(by)X +1447(a)X +1504(read)X +1664(call)X +1801(to)X +1884(obtain)X +2105(the)X +2224(meta-data)X +2562(for)X +2677(the)X +2796(B-tree.)X +3058(Instead,)X +3331(we)X +3446(issue)X +3627(an)X +3724(open)X +3901(to)X +3984(the)X +4102(buffer)X +555 2251(manager)N +852(to)X +934(obtain)X +1154(a)X +1210(\256le)X +1332(id)X +1414(and)X +1550(a)X +1606(buffer)X +1823(request)X +2075(to)X +2157(obtain)X +2377(the)X +2495(meta-data)X +2832(as)X +2919(shown)X +3148(below.)X +7 f +715 2374(char)N +955(*path;)X +715 2464(int)N +907(fid,)X +1147(flags,)X +1483(len,)X +1723(mode;)X +715 2644(/*)N +859(Obtain)X +1195(a)X +1291(file)X +1531(id)X +1675(with)X +1915(which)X +2203(to)X +2347(access)X +2683(the)X +2875(buffer)X +3211(pool)X +3451(*/)X +715 2734(fid)N +907(=)X +1003(buf_open\(path,)X +1723(flags,)X +2059(mode\);)X +715 2914(/*)N +859(Read)X +1099(the)X +1291(meta)X +1531(data)X +1771(\(page)X +2059(0\))X +2203(for)X +2395(the)X +2587(B-tree)X +2923(*/)X +715 3004(if)N +859(\(tp_lock\(fid,)X +1531(0,)X +1675(READ_LOCK\)\))X +1003 3094(return)N +1339(error;)X +715 3184(meta_data_ptr)N +1387(=)X +1483(buf_get\(fid,)X +2107(0,)X +2251(BF_PIN,)X +2635(&len\);)X +1 f +555 3307(The)N +714(BF_PIN)X +1014(argument)X +1350(to)X +2 f +1445(buf_get)X +1 f +1718(indicates)X +2036(that)X +2189(we)X +2316(wish)X +2500(to)X +2595(leave)X +2798(this)X +2946(page)X +3131(pinned)X +3382(in)X +3477(memory)X +3777(so)X +3881(that)X +4034(it)X +4111(is)X +4197(not)X +555 3397(swapped)N +862(out)X +990(while)X +1194(we)X +1314(are)X +1439(accessing)X +1772(it.)X +1881(The)X +2031(last)X +2167(argument)X +2495(to)X +2 f +2582(buf_get)X +1 f +2847(returns)X +3095(the)X +3218(number)X +3488(of)X +3580(bytes)X +3774(on)X +3879(the)X +4002(page)X +4179(that)X +555 3487(were)N +732(valid)X +912(so)X +1003(that)X +1143(the)X +1261(access)X +1487(method)X +1747(may)X +1905(initialize)X +2205(the)X +2323(page)X +2495(if)X +2564(necessary.)X +755 3610(Next,)N +955(consider)X +1251(inserting)X +1555(a)X +1615(record)X +1845(on)X +1949(a)X +2009(particular)X +2341(page)X +2517(of)X +2608(a)X +2668(B-tree.)X +2932(In)X +3022(the)X +3143(unprotected)X +3545(case,)X +3727(we)X +3844(read)X +4006(the)X +4127(page,)X +555 3700(call)N +2 f +693(_bt_insertat)X +1 f +1079(,)X +1121(and)X +1258(write)X +1444(the)X +1563(page.)X +1776(Instead,)X +2049(we)X +2164(lock)X +2323(the)X +2442(page,)X +2635(request)X +2888(the)X +3007(buffer,)X +3245(log)X +3368(the)X +3487(change,)X +3756(modify)X +4008(the)X +4127(page,)X +555 3790(and)N +691(release)X +935(the)X +1053(buffer.)X +7 f +715 3913(int)N +907(fid,)X +1147(len,)X +1387(pageno;)X +1867(/*)X +2011(Identifies)X +2539(the)X +2731(buffer)X +3067(*/)X +715 4003(int)N +907(index;)X +1867(/*)X +2011(Location)X +2443(at)X +2587(which)X +2875(to)X +3019(insert)X +3355(the)X +3547(new)X +3739(pair)X +3979(*/)X +715 4093(DBT)N +907(*keyp,)X +1243(*datap;)X +1867(/*)X +2011(Key/Data)X +2443(pair)X +2683(to)X +2827(be)X +2971(inserted)X +3403(*/)X +715 4183(DATUM)N +1003(*d;)X +1867(/*)X +2011(Key/data)X +2443(structure)X +2923(to)X +3067(insert)X +3403(*/)X +715 4363(/*)N +859(Lock)X +1099(and)X +1291(request)X +1675(the)X +1867(buffer)X +2203(*/)X +715 4453(if)N +859(\(tp_lock\(fid,)X +1531(pageno,)X +1915(WRITE_LOCK\)\))X +1003 4543(return)N +1339(error;)X +715 4633(buffer_ptr)N +1243(=)X +1339(buf_get\(fid,)X +1963(pageno,)X +2347(BF_PIN,)X +2731(&len\);)X +715 4813(/*)N +859(Log)X +1051(and)X +1243(perform)X +1627(the)X +1819(update)X +2155(*/)X +715 4903(log_insdel\(BTREE_INSERT,)N +1915(fid,)X +2155(pageno,)X +2539(keyp,)X +2827(datap\);)X +715 4993(_bt_insertat\(buffer_ptr,)N +1915(d,)X +2059(index\);)X +715 5083(buf_unpin\(buffer_ptr\);)N +1 f +555 5206(Succinctly,)N +942(the)X +1068(algorithm)X +1407(for)X +1529(turning)X +1788(unprotected)X +2195(code)X +2375(into)X +2527(protected)X +2854(code)X +3034(is)X +3115(to)X +3205(replace)X +3466(read)X +3633(operations)X +3995(with)X +2 f +4165(lock)X +1 f +555 5296(and)N +2 f +691(buf_get)X +1 f +951(operations)X +1305(and)X +1441(write)X +1626(operations)X +1980(with)X +2 f +2142(log)X +1 f +2264(and)X +2 f +2400(buf_unpin)X +1 f +2744(operations.)X +8 s +10 f +555 5458(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5536(3)N +8 s +766 5561(The)N +884(following)X +1152(code)X +1291(fragments)X +1565(are)X +1661(examples,)X +1937(but)X +2038(do)X +2120(not)X +2220(de\256ne)X +2394(the)X +2490(\256nal)X +2622(interface.)X +2894(The)X +3011(\256nal)X +3143(interface)X +3383(will)X +3501(be)X +3579(determined)X +3884(after)X +4018(LIBTP)X +4214(has)X +555 5633(been)N +691(fully)X +828(integrated)X +1099(with)X +1229(the)X +1323(most)X +1464(recent)X +3 f +1635(db)X +1 f +1707(\(3\))X +1797(release)X +1989(from)X +2129(the)X +2223(Computer)X +2495(Systems)X +2725(Research)X +2974(Group)X +3153(at)X +3215(University)X +3501(of)X +3570(California,)X +3861(Berkeley.)X + +11 p +%%Page: 11 11 +8 s 8 xH 0 xS 1 f +10 s +3 f +555 630(5.)N +655(Performance)X +1 f +755 753(In)N +845(this)X +983(section,)X +1253(we)X +1370(present)X +1625(the)X +1746(results)X +1978(of)X +2067(two)X +2209(very)X +2374(different)X +2673(benchmarks.)X +3103(The)X +3250(\256rst)X +3396(is)X +3471(an)X +3569(online)X +3791(transaction)X +4165(pro-)X +555 843(cessing)N +824(benchmark,)X +1234(similar)X +1489(to)X +1584(the)X +1715(standard)X +2020(TPCB,)X +2272(but)X +2407(has)X +2547(been)X +2732(adapted)X +3015(to)X +3110(run)X +3250(in)X +3345(a)X +3414(desktop)X +3696(environment.)X +4174(The)X +555 933(second)N +798(emulates)X +1103(a)X +1159(computer-aided)X +1683(design)X +1912(environment)X +2337(and)X +2473(provides)X +2769(more)X +2954(complex)X +3250(query)X +3453(processing.)X +3 f +555 1119(5.1.)N +715(Transaction)X +1148(Processing)X +1533(Benchmark)X +1 f +755 1242(For)N +887(this)X +1023(section,)X +1291(all)X +1392(performance)X +1820(numbers)X +2117(shown)X +2346(except)X +2576(for)X +2690(the)X +2808(commercial)X +3207(database)X +3504(system)X +3746(were)X +3923(obtained)X +4219(on)X +555 1332(a)N +614(DECstation)X +1009(5000/200)X +1333(with)X +1497(32MBytes)X +1852(of)X +1941(memory)X +2230(running)X +2501(Ultrix)X +2714(V4.0,)X +2914(accessing)X +3244(a)X +3302(DEC)X +3484(RZ57)X +3688(1GByte)X +3959(disk)X +4114(drive.)X +555 1422(The)N +720(commercial)X +1139(relational)X +1482(database)X +1799(system)X +2061(tests)X +2242(were)X +2438(run)X +2584(on)X +2703(a)X +2778(comparable)X +3192(machine,)X +3523(a)X +3598(Sparcstation)X +4033(1+)X +4157(with)X +555 1512(32MBytes)N +915(memory)X +1209(and)X +1352(a)X +1415(1GByte)X +1691(external)X +1976(disk)X +2135(drive.)X +2366(The)X +2517(database,)X +2840(binaries)X +3120(and)X +3262(log)X +3390(resided)X +3648(on)X +3754(the)X +3878(same)X +4069(device.)X +555 1602(Reported)N +869(times)X +1062(are)X +1181(the)X +1299(means)X +1524(of)X +1611(\256ve)X +1751(tests)X +1913(and)X +2049(have)X +2221(standard)X +2513(deviations)X +2862(within)X +3086(two)X +3226(percent)X +3483(of)X +3570(the)X +3688(mean.)X +755 1725(The)N +905(test)X +1041(database)X +1343(was)X +1493(con\256gured)X +1861(according)X +2203(to)X +2290(the)X +2413(TPCB)X +2637(scaling)X +2889(rules)X +3070(for)X +3189(a)X +3250(10)X +3355(transaction)X +3732(per)X +3860(second)X +4108(\(TPS\))X +555 1815(system)N +817(with)X +999(1,000,000)X +1359(account)X +1649(records,)X +1946(100)X +2106(teller)X +2311(records,)X +2607(and)X +2762(10)X +2881(branch)X +3139(records.)X +3455(Where)X +3709(TPS)X +3885(numbers)X +4200(are)X +555 1905(reported,)N +865(we)X +981(are)X +1102(running)X +1373(a)X +1431(modi\256ed)X +1737(version)X +1995(of)X +2084(the)X +2203(industry)X +2486(standard)X +2779(transaction)X +3152(processing)X +3516(benchmark,)X +3914(TPCB.)X +4174(The)X +555 1995(TPCB)N +780(benchmark)X +1163(simulates)X +1491(a)X +1553(withdrawal)X +1940(performed)X +2301(by)X +2407(a)X +2469(hypothetical)X +2891(teller)X +3082(at)X +3166(a)X +3228(hypothetical)X +3650(bank.)X +3872(The)X +4022(database)X +555 2085(consists)N +831(of)X +921(relations)X +1220(\(\256les\))X +1430(for)X +1547(accounts,)X +1871(branches,)X +2200(tellers,)X +2439(and)X +2578(history.)X +2863(For)X +2997(each)X +3168(transaction,)X +3563(the)X +3684(account,)X +3976(teller,)X +4183(and)X +555 2175(branch)N +795(balances)X +1093(must)X +1269(be)X +1366(updated)X +1641(to)X +1724(re\257ect)X +1946(the)X +2065(withdrawal)X +2447(and)X +2584(a)X +2640(history)X +2882(record)X +3108(is)X +3181(written)X +3428(which)X +3644(contains)X +3931(the)X +4049(account)X +555 2265(id,)N +657(branch)X +896(id,)X +998(teller)X +1183(id,)X +1285(and)X +1421(the)X +1539(amount)X +1799(of)X +1886(the)X +2004(withdrawal)X +2385([TPCB90].)X +755 2388(Our)N +914(implementation)X +1450(of)X +1551(the)X +1683(benchmark)X +2074(differs)X +2317(from)X +2506(the)X +2637(speci\256cation)X +3075(in)X +3170(several)X +3431(aspects.)X +3736(The)X +3894(speci\256cation)X +555 2478(requires)N +840(that)X +985(the)X +1108(database)X +1410(keep)X +1587(redundant)X +1933(logs)X +2091(on)X +2196(different)X +2498(devices,)X +2784(but)X +2911(we)X +3030(use)X +3162(a)X +3223(single)X +3439(log.)X +3606(Furthermore,)X +4052(all)X +4157(tests)X +555 2568(were)N +734(run)X +863(on)X +965(a)X +1023(single,)X +1256(centralized)X +1631(system)X +1875(so)X +1968(there)X +2151(is)X +2226(no)X +2328(notion)X +2553(of)X +2641(remote)X +2885(accesses.)X +3219(Finally,)X +3486(we)X +3601(calculated)X +3948(throughput)X +555 2658(by)N +662(dividing)X +955(the)X +1080(total)X +1249(elapsed)X +1517(time)X +1686(by)X +1793(the)X +1918(number)X +2190(of)X +2284(transactions)X +2694(processed)X +3038(rather)X +3253(than)X +3418(by)X +3525(computing)X +3894(the)X +4018(response)X +555 2748(time)N +717(for)X +831(each)X +999(transaction.)X +755 2871(The)N +912(performance)X +1351(comparisons)X +1788(focus)X +1993(on)X +2104(traditional)X +2464(Unix)X +2655(techniques)X +3029(\(unprotected,)X +3486(using)X +3 f +3690(\257ock)X +1 f +3854(\(2\))X +3979(and)X +4126(using)X +3 f +555 2961(fsync)N +1 f +733(\(2\)\))X +884(and)X +1030(a)X +1096(commercial)X +1504(relational)X +1836(database)X +2142(system.)X +2433(Well-behaved)X +2913(applications)X +3329(using)X +3 f +3531(\257ock)X +1 f +3695(\(2\))X +3818(are)X +3946(guaranteed)X +555 3051(that)N +704(concurrent)X +1077(processes')X +1441(updates)X +1715(do)X +1824(not)X +1955(interact)X +2225(with)X +2396(one)X +2541(another,)X +2831(but)X +2962(no)X +3070(guarantees)X +3442(about)X +3648(atomicity)X +3978(are)X +4105(made.)X +555 3141(That)N +731(is,)X +833(if)X +911(the)X +1038(system)X +1289(crashes)X +1555(in)X +1646(mid-transaction,)X +2198(only)X +2369(parts)X +2554(of)X +2649(that)X +2797(transaction)X +3177(will)X +3329(be)X +3433(re\257ected)X +3738(in)X +3828(the)X +3954 0.3125(after-crash)AX +555 3231(state)N +725(of)X +815(the)X +936(database.)X +1276(The)X +1424(use)X +1554(of)X +3 f +1643(fsync)X +1 f +1821(\(2\))X +1937(at)X +2017(transaction)X +2391(commit)X +2657(time)X +2821(provides)X +3119(guarantees)X +3485(of)X +3574(durability)X +3907(after)X +4077(system)X +555 3321(failure.)N +825(However,)X +1160(there)X +1341(is)X +1414(no)X +1514(mechanism)X +1899(to)X +1981(perform)X +2260(transaction)X +2632(abort.)X +3 f +555 3507(5.1.1.)N +775(Single-User)X +1191(Tests)X +1 f +755 3630(These)N +978(tests)X +1151(compare)X +1459(LIBTP)X +1712(in)X +1804(a)X +1870(variety)X +2123(of)X +2220(con\256gurations)X +2708(to)X +2800(traditional)X +3159(UNIX)X +3390(solutions)X +3708(and)X +3854(a)X +3920(commercial)X +555 3720(relational)N +884(database)X +1187(system)X +1435(\(RDBMS\).)X +1814(To)X +1929(demonstrate)X +2347(the)X +2471(server)X +2694(architecture)X +3100(we)X +3220(built)X +3392(a)X +3454(front)X +3636(end)X +3777(test)X +3913(process)X +4179(that)X +555 3810(uses)N +732(TCL)X +922([OUST90])X +1304(to)X +1405(parse)X +1614(database)X +1930(access)X +2175(commands)X +2561(and)X +2716(call)X +2870(the)X +3006(database)X +3321(access)X +3565(routines.)X +3901(In)X +4006(one)X +4160(case)X +555 3900(\(SERVER\),)N +956(frontend)X +1249(and)X +1386(backend)X +1675(processes)X +2004(were)X +2181(created)X +2434(which)X +2650(communicated)X +3142(via)X +3260(an)X +3356(IP)X +3447(socket.)X +3712(In)X +3799(the)X +3917(second)X +4160(case)X +555 3990(\(TCL\),)N +802(a)X +860(single)X +1073(process)X +1336(read)X +1497(queries)X +1751(from)X +1929(standard)X +2223(input,)X +2429(parsed)X +2660(them,)X +2861(and)X +2998(called)X +3211(the)X +3330(database)X +3628(access)X +3855(routines.)X +4174(The)X +555 4080(performance)N +987(difference)X +1338(between)X +1630(the)X +1752(TCL)X +1927(and)X +2067(SERVER)X +2397(tests)X +2563(quanti\256es)X +2898(the)X +3020(communication)X +3542(overhead)X +3861(of)X +3952(the)X +4074(socket.)X +555 4170(The)N +732(RDBMS)X +1063(implementation)X +1617(used)X +1816(embedded)X +2198(SQL)X +2401(in)X +2515(C)X +2620(with)X +2814(stored)X +3062(database)X +3391(procedures.)X +3835(Therefore,)X +4224(its)X +555 4260(con\256guration)N +1003(is)X +1076(a)X +1132(hybrid)X +1361(of)X +1448(the)X +1566(single)X +1777(process)X +2038(architecture)X +2438(and)X +2574(the)X +2692(server)X +2909(architecture.)X +3349(The)X +3494(graph)X +3697(in)X +3779(\256gure)X +3986(six)X +4099(shows)X +555 4350(a)N +611(comparison)X +1005(of)X +1092(the)X +1210(following)X +1541(six)X +1654(con\256gurations:)X +1126 4506(LIBTP)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(single)X +2671(application.)X +1126 4596(TCL)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(single)X +2671(application,)X +3067(requires)X +3346(query)X +3549(parsing.)X +1126 4686(SERVER)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(server)X +2677(con\256guration,)X +3144(requires)X +3423(query)X +3626(parsing.)X +1126 4776(NOTP)N +1552(Uses)X +1728(no)X +1828(locking,)X +2108(logging,)X +2392(or)X +2479(concurrency)X +2897(control.)X +1126 4866(FLOCK)N +1552(Uses)X +3 f +1728(\257ock)X +1 f +1892(\(2\))X +2006(for)X +2120(concurrency)X +2538(control)X +2785(and)X +2921(nothing)X +3185(for)X +3299(durability.)X +1126 4956(FSYNC)N +1552(Uses)X +3 f +1728(fsync)X +1 f +1906(\(2\))X +2020(for)X +2134(durability)X +2465(and)X +2601(nothing)X +2865(for)X +2979(concurrency)X +3397(control.)X +1126 5046(RDBMS)N +1552(Uses)X +1728(a)X +1784(commercial)X +2183(relational)X +2506(database)X +2803(system.)X +755 5235(The)N +902(results)X +1133(show)X +1324(that)X +1466(LIBTP,)X +1730(both)X +1894(in)X +1978(the)X +2098(procedural)X +2464(and)X +2602(parsed)X +2834(environments,)X +3312(is)X +3387(competitive)X +3787(with)X +3951(a)X +4009(commer-)X +555 5325(cial)N +692(system)X +935(\(comparing)X +1326(LIBTP,)X +1589(TCL,)X +1781(and)X +1917(RDBMS\).)X +2263(Compared)X +2617(to)X +2699(existing)X +2972(UNIX)X +3193(solutions,)X +3521(LIBTP)X +3763(is)X +3836(approximately)X +555 5415(15%)N +738(slower)X +988(than)X +1162(using)X +3 f +1371(\257ock)X +1 f +1535(\(2\))X +1665(or)X +1768(no)X +1884(protection)X +2245(but)X +2383(over)X +2562(80%)X +2745(better)X +2964(than)X +3137(using)X +3 f +3345(fsync)X +1 f +3523(\(2\))X +3652(\(comparing)X +4057(LIBTP,)X +555 5505(FLOCK,)N +857(NOTP,)X +1106(and)X +1242(FSYNC\).)X + +12 p +%%Page: 12 12 +10 s 10 xH 0 xS 1 f +3 f +8 s +3500 2184(RDBMS)N +1 Dt +3553 2085 MXY + 3553 2085 lineto + 3676 2085 lineto + 3676 1351 lineto + 3553 1351 lineto + 3553 2085 lineto +closepath 16 3553 1351 3676 2085 Dp +2018 2184(SERVER)N +1720 1168 MXY +0 917 Dl +122 0 Dl +0 -917 Dl +-122 0 Dl +1715 2184(TCL)N +2087 1534 MXY + 2087 1534 lineto + 2209 1534 lineto + 2209 2085 lineto + 2087 2085 lineto + 2087 1534 lineto +closepath 12 2087 1534 2209 2085 Dp +3187 MX + 3187 1534 lineto + 3309 1534 lineto + 3309 2085 lineto + 3187 2085 lineto + 3187 1534 lineto +closepath 19 3187 1534 3309 2085 Dp +3142 2184(FSYNC)N +2425(NOTP)X +2453 955 MXY + 2453 955 lineto + 2576 955 lineto + 2576 2085 lineto + 2453 2085 lineto + 2453 955 lineto +closepath 21 2453 955 2576 2085 Dp +2820 1000 MXY + 2820 1000 lineto + 2942 1000 lineto + 2942 2085 lineto + 2820 2085 lineto + 2820 1000 lineto +closepath 14 2820 1000 2942 2085 Dp +5 Dt +1231 2085 MXY +2567 0 Dl +4 Ds +1 Dt +1231 1840 MXY +2567 0 Dl +1231 1596 MXY +2567 0 Dl +1231 1351 MXY +2567 0 Dl +1231 1108 MXY +2567 0 Dl +1231 863 MXY +2567 0 Dl +11 s +1087 1877(2)N +1087 1633(4)N +1087 1388(6)N +1087 1145(8)N +1065 900(10)N +1028 763(TPS)N +-1 Ds +1353 2085 MXY + 1353 2085 lineto + 1353 1151 lineto + 1476 1151 lineto + 1476 2085 lineto + 1353 2085 lineto +closepath 3 1353 1151 1476 2085 Dp +8 s +1318 2184(LIBTP)N +2767(FLOCK)X +3 Dt +-1 Ds +10 s +1597 2399(Figure)N +1844(6:)X +1931(Single-User)X +2347(Performance)X +2814(Comparison.)X +1 f +10 f +555 2579(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 2855(5.1.2.)N +775(Multi-User)X +1174(Tests)X +1 f +755 2978(While)N +975(the)X +1097(single-user)X +1473(tests)X +1639(form)X +1819(a)X +1878(basis)X +2061(for)X +2178(comparing)X +2544(LIBTP)X +2789(to)X +2874(other)X +3062(systems,)X +3358(our)X +3488(goal)X +3649(in)X +3734(multi-user)X +4086(testing)X +555 3068(was)N +714(to)X +810(analyze)X +1089(its)X +1197(scalability.)X +1579(To)X +1701(this)X +1849(end,)X +2018(we)X +2145(have)X +2330(run)X +2470(the)X +2601(benchmark)X +2991(in)X +3086(three)X +3280(modes,)X +3542(the)X +3673(normal)X +3933(disk)X +4099(bound)X +555 3158(con\256guration)N +1010(\(\256gure)X +1252(seven\),)X +1510(a)X +1573(CPU)X +1755(bound)X +1982(con\256guration)X +2436(\(\256gure)X +2677(eight,)X +2884(READ-ONLY\),)X +3426(and)X +3569(lock)X +3734(contention)X +4099(bound)X +555 3248(\(\256gure)N +796(eight,)X +1003(NO_FSYNC\).)X +1510(Since)X +1715(the)X +1840(normal)X +2094(con\256guration)X +2548(is)X +2628(completely)X +3011(disk)X +3171(bound)X +3398(\(each)X +3600(transaction)X +3978(requires)X +4263(a)X +555 3354(random)N +823(read,)X +1005(a)X +1064(random)X +1332(write,)X +1540(and)X +1679(a)X +1738(sequential)X +2086(write)X +7 s +2251 3322(4)N +10 s +3354(\))Y +2329(we)X +2446(expect)X +2679(to)X +2764(see)X +2890(little)X +3059(performance)X +3489(improvement)X +3939(as)X +4028(the)X +4148(mul-)X +555 3444(tiprogramming)N +1064(level)X +1249(increases.)X +1613(In)X +1709(fact,)X +1879(\256gure)X +2095(seven)X +2307(reveals)X +2564(that)X +2713(we)X +2836(are)X +2964(able)X +3127(to)X +3218(overlap)X +3487(CPU)X +3670(and)X +3814(disk)X +3975(utilization)X +555 3534(slightly)N +825(producing)X +1181(approximately)X +1674(a)X +1740(10%)X +1917(performance)X +2354(improvement)X +2811(with)X +2983(two)X +3133(processes.)X +3511(After)X +3711(that)X +3861(point,)X +4075(perfor-)X +555 3624(mance)N +785(drops)X +983(off,)X +1117(and)X +1253(at)X +1331(a)X +1387(multi-programming)X +2038(level)X +2214(of)X +2301(4,)X +2381(we)X +2495(are)X +2614(performing)X +2995(worse)X +3207(than)X +3365(in)X +3447(the)X +3565(single)X +3776(process)X +4037(case.)X +755 3747(Similar)N +1021(behavior)X +1333(was)X +1489(reported)X +1787(on)X +1897(the)X +2025(commercial)X +2434(relational)X +2767(database)X +3074(system)X +3326(using)X +3529(the)X +3657(same)X +3852(con\256guration.)X +555 3837(The)N +707(important)X +1045(conclusion)X +1419(to)X +1508(draw)X +1696(from)X +1879(this)X +2021(is)X +2101(that)X +2248(you)X +2395(cannot)X +2636(attain)X +2841(good)X +3028(multi-user)X +3384(scaling)X +3638(on)X +3745(a)X +3808(badly)X +4013(balanced)X +555 3927(system.)N +839(If)X +915(multi-user)X +1266(performance)X +1695(on)X +1797(applications)X +2205(of)X +2293(this)X +2429(sort)X +2570(is)X +2644(important,)X +2996(one)X +3133(must)X +3309(have)X +3482(a)X +3539(separate)X +3824(logging)X +4089(device)X +555 4017(and)N +697(horizontally)X +1110(partition)X +1407(the)X +1531(database)X +1834(to)X +1921(allow)X +2124(a)X +2185(suf\256ciently)X +2570(high)X +2737(degree)X +2977(of)X +3069(multiprogramming)X +3698(that)X +3843(group)X +4055(commit)X +555 4107(can)N +687(amortize)X +988(the)X +1106(cost)X +1255(of)X +1342(log)X +1464(\257ushing.)X +755 4230(By)N +871(using)X +1067(a)X +1126(very)X +1292(small)X +1488(database)X +1788(\(one)X +1954(that)X +2097(can)X +2232(be)X +2331(entirely)X +2599(cached)X +2846(in)X +2930(main)X +3112(memory\))X +3428(and)X +3566(read-only)X +3896(transactions,)X +555 4320(we)N +670(generated)X +1004(a)X +1061(CPU)X +1236(bound)X +1456(environment.)X +1921(By)X +2034(using)X +2227(the)X +2345(same)X +2530(small)X +2723(database,)X +3040(the)X +3158(complete)X +3472(TPCB)X +3691(transaction,)X +4083(and)X +4219(no)X +3 f +555 4410(fsync)N +1 f +733(\(2\))X +862(on)X +977(the)X +1110(log)X +1247(at)X +1340(commit,)X +1639(we)X +1768(created)X +2036(a)X +2107(lock)X +2280(contention)X +2652(bound)X +2886(environment.)X +3365(The)X +3524(small)X +3731(database)X +4042(used)X +4223(an)X +555 4500(account)N +828(\256le)X +953(containing)X +1314(only)X +1479(1000)X +1662(records)X +1922(rather)X +2133(than)X +2294(the)X +2415(full)X +2549(1,000,000)X +2891(records)X +3150(and)X +3288(ran)X +3413(enough)X +3671(transactions)X +4076(to)X +4160(read)X +555 4590(the)N +677(entire)X +883(database)X +1183(into)X +1330(the)X +1451(buffer)X +1671(pool)X +1836(\(2000\))X +2073(before)X +2302(beginning)X +2645(measurements.)X +3147(The)X +3295(read-only)X +3626(transaction)X +4001(consisted)X +555 4680(of)N +646(three)X +831(database)X +1132(reads)X +1326(\(from)X +1533(the)X +1655(1000)X +1839(record)X +2069(account)X +2343(\256le,)X +2489(the)X +2611(100)X +2754(record)X +2983(teller)X +3171(\256le,)X +3316(and)X +3455(the)X +3576(10)X +3679(record)X +3908(branch)X +4150(\256le\).)X +555 4770(Since)N +759(no)X +865(data)X +1025(were)X +1208(modi\256ed)X +1518(and)X +1660(no)X +1766(history)X +2014(records)X +2277(were)X +2460(written,)X +2733(no)X +2839(log)X +2966(records)X +3228(were)X +3410(written.)X +3702(For)X +3838(the)X +3961(contention)X +555 4860(bound)N +780(con\256guration,)X +1252(we)X +1371(used)X +1543(the)X +1666(normal)X +1918(TPCB)X +2142(transaction)X +2519(\(against)X +2798(the)X +2920(small)X +3117(database\))X +3445(and)X +3585(disabled)X +3876(the)X +3998(log)X +4124(\257ush.)X +555 4950(Figure)N +784(eight)X +964(shows)X +1184(both)X +1346(of)X +1433(these)X +1618(results.)X +755 5073(The)N +902(read-only)X +1231(test)X +1363(indicates)X +1669(that)X +1810(we)X +1925(barely)X +2147(scale)X +2329(at)X +2408(all)X +2509(in)X +2592(the)X +2711(CPU)X +2887(bound)X +3108(case.)X +3308(The)X +3454(explanation)X +3849(for)X +3964(that)X +4105(is)X +4179(that)X +555 5163(even)N +735(with)X +905(a)X +969(single)X +1188(process,)X +1477(we)X +1599(are)X +1726(able)X +1888(to)X +1978(drive)X +2171(the)X +2297(CPU)X +2480(utilization)X +2832(to)X +2922(96%.)X +3137(As)X +3254(a)X +3317(result,)X +3542(that)X +3689(gives)X +3885(us)X +3983(very)X +4153(little)X +555 5253(room)N +753(for)X +876(improvement,)X +1352(and)X +1497(it)X +1570(takes)X +1764(a)X +1829(multiprogramming)X +2462(level)X +2647(of)X +2743(four)X +2906(to)X +2997(approach)X +3321(100%)X +3537(CPU)X +3721(saturation.)X +4106(In)X +4201(the)X +555 5343(case)N +718(where)X +939(we)X +1057(do)X +1161(perform)X +1444(writes,)X +1684(we)X +1802(are)X +1925(interested)X +2261(in)X +2347(detecting)X +2665(when)X +2863(lock)X +3025(contention)X +3387(becomes)X +3691(a)X +3750(dominant)X +4075(perfor-)X +555 5433(mance)N +787(factor.)X +1037(Contention)X +1414(will)X +1560(cause)X +1761(two)X +1903(phenomena;)X +2317(we)X +2433(will)X +2579(see)X +2704(transactions)X +3109(queueing)X +3425(behind)X +3665(frequently)X +4017(accessed)X +555 5523(data,)N +731(and)X +869(we)X +985(will)X +1131(see)X +1256(transaction)X +1629(abort)X +1815(rates)X +1988(increasing)X +2339(due)X +2476(to)X +2559(deadlock.)X +2910(Given)X +3127(that)X +3268(the)X +3387(branch)X +3627(\256le)X +3750(contains)X +4038(only)X +4201(ten)X +8 s +10 f +555 5595(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5673(4)N +8 s +763 5698(Although)N +1021(the)X +1115(log)X +1213(is)X +1272(written)X +1469(sequentially,)X +1810(we)X +1900(do)X +1980(not)X +2078(get)X +2172(the)X +2266(bene\256t)X +2456(of)X +2525(sequentiality)X +2868(since)X +3015(the)X +3109(log)X +3207(and)X +3315(database)X +3550(reside)X +3718(on)X +3798(the)X +3892(same)X +4039(disk.)X + +13 p +%%Page: 13 13 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +3187 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3286 2028 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3384 1926 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3483 1910 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3581 1910 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3680 1832 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3778 1909 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3877 1883 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3975 1679 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +4074 1487 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +5 Dt +3187 2060 MXY +99 -24 Dl +98 -101 Dl +99 -16 Dl +98 0 Dl +99 -78 Dl +98 77 Dl +99 -26 Dl +98 -204 Dl +99 -192 Dl +3 f +6 s +4088 1516(SMALL)N +3 Dt +3187 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3286 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3384 2041 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3483 1990 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3581 1843 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +3680 1578 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3778 1496 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3877 1430 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3975 1269 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +4074 1070 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1 Dt +3187 2060 MXY +99 0 Dl +98 -10 Dl +99 -51 Dl +98 -147 Dl +99 -265 Dl +98 -82 Dl +99 -66 Dl +98 -161 Dl +99 -199 Dl +4088 1099(LARGE)N +5 Dt +3089 2060 MXY +985 0 Dl +3089 MX +0 -1174 Dl +4 Ds +1 Dt +3581 2060 MXY +0 -1174 Dl +4074 2060 MXY +0 -1174 Dl +3089 1825 MXY +985 0 Dl +9 s +2993 1855(25)N +3089 1591 MXY +985 0 Dl +2993 1621(50)N +3089 1356 MXY +985 0 Dl +2993 1386(75)N +3089 1121 MXY +985 0 Dl +2957 1151(100)N +3089 886 MXY +985 0 Dl +2957 916(125)N +3281 2199(Multiprogramming)N +3071 2152(0)N +3569(5)X +4038(10)X +2859 787(Aborts)N +3089(per)X +3211(500)X +2901 847(transactions)N +-1 Ds +3 Dt +2037 1342 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2125 1358 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2213 1341 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2301 1191 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2388 1124 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-17 0 Dl +2476 1157 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2564 1157 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2652 1161 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2740 1153 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2828 1150 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +5 Dt +2037 1351 MXY +88 16 Dl +88 -17 Dl +88 -150 Dl +87 -67 Dl +88 33 Dl +88 0 Dl +88 4 Dl +88 -8 Dl +88 -3 Dl +6 s +2685 1234(READ-ONLY)N +3 Dt +2037 1464 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2125 1640 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2213 1854 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2301 1872 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2388 1871 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-17 0 Dl +2476 1933 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2564 1914 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2652 1903 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2740 1980 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2828 2004 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +1 Dt +2037 1473 MXY +88 176 Dl +88 214 Dl +88 18 Dl +87 -2 Dl +88 63 Dl +88 -19 Dl +88 -11 Dl +88 77 Dl +88 24 Dl +2759 1997(NO-FSYNC)N +5 Dt +1949 2060 MXY +879 0 Dl +1949 MX +0 -1174 Dl +4 Ds +1 Dt +2388 2060 MXY +0 -1174 Dl +2828 2060 MXY +0 -1174 Dl +1949 1825 MXY +879 0 Dl +9 s +1842 1855(40)N +1949 1591 MXY +879 0 Dl +1842 1621(80)N +1949 1356 MXY +879 0 Dl +1806 1386(120)N +1949 1121 MXY +879 0 Dl +1806 1151(160)N +1949 886 MXY +879 0 Dl +1806 916(200)N +2088 2199(Multiprogramming)N +1844 863(in)N +1922(TPS)X +1761 792(Throughput)N +1931 2121(0)N +2370 2133(5)N +2792(10)X +6 s +1679 1833(LIBTP)N +-1 Ds +3 Dt +837 1019 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +929 878 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1021 939 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1113 1043 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1205 1314 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1297 1567 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1389 1665 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1481 1699 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1573 1828 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1665 1804 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +5 Dt +837 1027 MXY +92 -141 Dl +92 62 Dl +92 104 Dl +92 271 Dl +92 253 Dl +92 98 Dl +92 34 Dl +92 129 Dl +92 -24 Dl +745 2060 MXY +920 0 Dl +745 MX +0 -1174 Dl +4 Ds +1 Dt +1205 2060 MXY +0 -1174 Dl +1665 2060 MXY +0 -1174 Dl +745 1766 MXY +920 0 Dl +9 s +673 1796(3)N +745 1473 MXY +920 0 Dl +673 1503(5)N +745 1180 MXY +920 0 Dl +673 1210(8)N +745 886 MXY +920 0 Dl +637 916(10)N +905 2199(Multiprogramming)N +622 851(in)N +700(TPS)X +575 792(Throughput)N +733 2152(0)N +1196(5)X +1629(10)X +3 Dt +-1 Ds +8 s +655 2441(Figure)N +872(7:)X +960(Multi-user)X +1286(Performance.)X +1 f +655 2531(Since)N +825(the)X +931(con\256guration)X +1300(is)X +1371(completely)X +655 2621(disk)N +790(bound,)X +994(we)X +1096(see)X +1204(only)X +1345(a)X +1400(small)X +1566(im-)X +655 2711(provement)N +964(by)X +1064(adding)X +1274(a)X +1337(second)X +1549(pro-)X +655 2801(cess.)N +849(Adding)X +1081(any)X +1213(more)X +1383(concurrent)X +655 2891(processes)N +935(causes)X +1137(performance)X +1493(degra-)X +655 2981(dation.)N +3 f +1927 2441(Figure)N +2149(8:)X +2243(Multi-user)X +2574(Performance)X +1927 2531(on)N +2021(a)X +2079(small)X +2251(database.)X +1 f +2551(With)X +2704(one)X +2821(pro-)X +1927 2621(cess,)N +2075(we)X +2174(are)X +2276(driving)X +2486(the)X +2589(CPU)X +2739(at)X +2810(96%)X +1927 2711(utilization)N +2215(leaving)X +2430(little)X +2575(room)X +2737(for)X +2838(im-)X +1927 2801(provement)N +2238(as)X +2328(the)X +2443(multiprogramming)X +1927 2891(level)N +2091(increases.)X +2396(In)X +2489(the)X +2607(NO-FSYNC)X +1927 2981(case,)N +2076(lock)X +2209(contention)X +2502(degrades)X +2751(perfor-)X +1927 3071(mance)N +2117(as)X +2194(soon)X +2339(as)X +2416(a)X +2468(second)X +2669(process)X +2884(is)X +1927 3161(added.)N +3 f +3199 2441(Figure)N +3405(9:)X +3482(Abort)X +3669(rates)X +3827(on)X +3919(the)X +4028(TPCB)X +3199 2531(Benchmark.)N +1 f +3589(The)X +3726(abort)X +3895(rate)X +4028(climbs)X +3199 2621(more)N +3366(quickly)X +3594(for)X +3704(the)X +3818(large)X +3980(database)X +3199 2711(test)N +3324(since)X +3491(processes)X +3771(are)X +3884(descheduled)X +3199 2801(more)N +3409(frequently,)X +3766(allowing)X +4068(more)X +3199 2891(processes)N +3459(to)X +3525(vie)X +3619(for)X +3709(the)X +3803(same)X +3950(locks.)X +10 s +10 f +555 3284(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 3560(records,)N +835(we)X +952(expect)X +1185(contention)X +1546(to)X +1631(become)X +1904(a)X +1963(factor)X +2174(quickly)X +2437(and)X +2576(the)X +2697(NO-FSYNC)X +3120(line)X +3263(in)X +3348(\256gure)X +3557(eight)X +3739(demonstrates)X +4184(this)X +555 3650(dramatically.)N +1022(Each)X +1209(additional)X +1555(process)X +1822(causes)X +2058(both)X +2226(more)X +2417(waiting)X +2682(and)X +2823(more)X +3013(deadlocking.)X +3470(Figure)X +3704(nine)X +3867(shows)X +4092(that)X +4237(in)X +555 3740(the)N +681(small)X +882(database)X +1187(case)X +1353(\(SMALL\),)X +1725(waiting)X +1992(is)X +2072(the)X +2197(dominant)X +2526(cause)X +2732(of)X +2826(declining)X +3151(performance)X +3585(\(the)X +3737(number)X +4009(of)X +4103(aborts)X +555 3830(increases)N +878(less)X +1026(steeply)X +1281(than)X +1447(the)X +1573(performance)X +2008(drops)X +2214(off)X +2336(in)X +2426(\256gure)X +2641(eight\),)X +2876(while)X +3082(in)X +3172(the)X +3298(large)X +3487(database)X +3792(case)X +3958(\(LARGE\),)X +555 3920(deadlocking)N +967(contributes)X +1343(more)X +1528(to)X +1610(the)X +1728(declining)X +2046(performance.)X +755 4043(Deadlocks)N +1116(are)X +1237(more)X +1424(likely)X +1628(to)X +1712(occur)X +1913(in)X +1997(the)X +2116(LARGE)X +2404(test)X +2536(than)X +2695(in)X +2778(the)X +2897(SMALL)X +3189(test)X +3321(because)X +3597(there)X +3779(are)X +3899(more)X +4085(oppor-)X +555 4133(tunities)N +814(to)X +900(wait.)X +1082(In)X +1173(the)X +1295(SMALL)X +1590(case,)X +1773(processes)X +2105(never)X +2307(do)X +2410(I/O)X +2540(and)X +2679(are)X +2801(less)X +2944(likely)X +3149(to)X +3234(be)X +3333(descheduled)X +3753(during)X +3985(a)X +4044(transac-)X +555 4223(tion.)N +740(In)X +828(the)X +947(LARGE)X +1235(case,)X +1415(processes)X +1744(will)X +1889(frequently)X +2240(be)X +2337(descheduled)X +2755(since)X +2941(they)X +3100(have)X +3273(to)X +3356(perform)X +3636(I/O.)X +3804(This)X +3967(provides)X +4263(a)X +555 4313(window)N +837(where)X +1058(a)X +1118(second)X +1365(process)X +1630(can)X +1766(request)X +2022(locks)X +2215(on)X +2318(already)X +2578(locked)X +2815(pages,)X +3041(thus)X +3197(increasing)X +3550(the)X +3671(likelihood)X +4018(of)X +4108(build-)X +555 4403(ing)N +677(up)X +777(long)X +939(chains)X +1164(of)X +1251(waiting)X +1511(processes.)X +1879(Eventually,)X +2266(this)X +2401(leads)X +2586(to)X +2668(deadlock.)X +3 f +555 4589(5.2.)N +715(The)X +868(OO1)X +1052(Benchmark)X +1 f +755 4712(The)N +903(TPCB)X +1125(benchmark)X +1505(described)X +1836(in)X +1921(the)X +2042(previous)X +2341(section)X +2591(measures)X +2913(performance)X +3343(under)X +3549(a)X +3608(conventional)X +4044(transac-)X +555 4802(tion)N +706(processing)X +1076(workload.)X +1446(Other)X +1656(application)X +2039(domains,)X +2357(such)X +2531(as)X +2625(computer-aided)X +3156(design,)X +3412(have)X +3591(substantially)X +4022(different)X +555 4892(access)N +786(patterns.)X +1105(In)X +1197(order)X +1392(to)X +1479(measure)X +1772(the)X +1895(performance)X +2327(of)X +2418(LIBTP)X +2664(under)X +2871(workloads)X +3229(of)X +3320(this)X +3459(type,)X +3641(we)X +3759(implemented)X +4201(the)X +555 4982(OO1)N +731(benchmark)X +1108(described)X +1436(in)X +1518([CATT91].)X +755 5105(The)N +908(database)X +1213(models)X +1472(a)X +1535(set)X +1651(of)X +1745(electronics)X +2120(components)X +2534(with)X +2703(connections)X +3113(among)X +3358(them.)X +3585(One)X +3746(table)X +3929(stores)X +4143(parts)X +555 5195(and)N +696(another)X +962(stores)X +1174(connections.)X +1622(There)X +1835(are)X +1959(three)X +2145(connections)X +2552(originating)X +2927(at)X +3009(any)X +3149(given)X +3351(part.)X +3540(Ninety)X +3782(percent)X +4043(of)X +4134(these)X +555 5285(connections)N +960(are)X +1081(to)X +1165(nearby)X +1406(parts)X +1584(\(those)X +1802(with)X +1966(nearby)X +2 f +2207(ids)X +1 f +2300(\))X +2348(to)X +2431(model)X +2652(the)X +2771(spatial)X +3001(locality)X +3262(often)X +3448(exhibited)X +3767(in)X +3850(CAD)X +4040(applica-)X +555 5375(tions.)N +779(Ten)X +933(percent)X +1198(of)X +1293(the)X +1419(connections)X +1830(are)X +1957(randomly)X +2292(distributed)X +2662(among)X +2908(all)X +3016(other)X +3209(parts)X +3393(in)X +3483(the)X +3609(database.)X +3954(Every)X +4174(part)X +555 5465(appears)N +829(exactly)X +1089(three)X +1278(times)X +1479(in)X +1569(the)X +2 f +1695(from)X +1 f +1874(\256eld)X +2043(of)X +2137(a)X +2200(connection)X +2579(record,)X +2832(and)X +2975(zero)X +3141(or)X +3235(more)X +3427(times)X +3627(in)X +3716(the)X +2 f +3841(to)X +1 f +3930(\256eld.)X +4139(Parts)X +555 5555(have)N +2 f +727(x)X +1 f +783(and)X +2 f +919(y)X +1 f +975(locations)X +1284(set)X +1393(randomly)X +1720(in)X +1802(an)X +1898(appropriate)X +2284(range.)X + +14 p +%%Page: 14 14 +10 s 10 xH 0 xS 1 f +3 f +1 f +755 630(The)N +900(intent)X +1102(of)X +1189(OO1)X +1365(is)X +1438(to)X +1520(measure)X +1808(the)X +1926(overall)X +2169(cost)X +2318(of)X +2405(a)X +2461(query)X +2664(mix)X +2808(characteristic)X +3257(of)X +3344(engineering)X +3743(database)X +4040(applica-)X +555 720(tions.)N +770(There)X +978(are)X +1097(three)X +1278(tests:)X +10 f +635 843(g)N +2 f +755(Lookup)X +1 f +1022(generates)X +1353(1,000)X +1560(random)X +1832(part)X +2 f +1984(ids)X +1 f +2077(,)X +2124(fetches)X +2378(the)X +2502(corresponding)X +2987(parts)X +3169(from)X +3351(the)X +3475(database,)X +3798(and)X +3940(calls)X +4113(a)X +4175(null)X +755 933(procedure)N +1097(in)X +1179(the)X +1297(host)X +1450(programming)X +1906(language)X +2216(with)X +2378(the)X +2496(parts')X +2 f +2699(x)X +1 f +2755(and)X +2 f +2891(y)X +1 f +2947(positions.)X +10 f +635 1056(g)N +2 f +755(Traverse)X +1 f +1067(retrieves)X +1371(a)X +1434(random)X +1706(part)X +1858(from)X +2041(the)X +2166(database)X +2470(and)X +2613(follows)X +2880(connections)X +3290(from)X +3473(it)X +3544(to)X +3632(other)X +3823(parts.)X +4045(Each)X +4232(of)X +755 1146(those)N +947(parts)X +1126(is)X +1202(retrieved,)X +1531(and)X +1670(all)X +1773(connections)X +2179(from)X +2358(it)X +2424(followed.)X +2771(This)X +2935(procedure)X +3279(is)X +3354(repeated)X +3649(depth-\256rst)X +4000(for)X +4116(seven)X +755 1236(hops)N +930(from)X +1110(the)X +1232(original)X +1505(part,)X +1674(for)X +1792(a)X +1852(total)X +2018(of)X +2109(3280)X +2293(parts.)X +2513(Backward)X +2862(traversal)X +3162(also)X +3314(exists,)X +3539(and)X +3678(follows)X +3941(all)X +4044(connec-)X +755 1326(tions)N +930(into)X +1074(a)X +1130(given)X +1328(part)X +1473(to)X +1555(their)X +1722(origin.)X +10 f +635 1449(g)N +2 f +755(Insert)X +1 f +962(adds)X +1129(100)X +1269(new)X +1423(parts)X +1599(and)X +1735(their)X +1902(connections.)X +755 1572(The)N +913(benchmark)X +1303(is)X +1389(single-user,)X +1794(but)X +1929(multi-user)X +2291(access)X +2530(controls)X +2821(\(locking)X +3120(and)X +3268(transaction)X +3652(protection\))X +4036(must)X +4223(be)X +555 1662(enforced.)N +898(It)X +968(is)X +1042(designed)X +1348(to)X +1431(be)X +1528(run)X +1656(on)X +1757(a)X +1814(database)X +2112(with)X +2275(20,000)X +2516(parts,)X +2713(and)X +2850(on)X +2951(one)X +3087(with)X +3249(200,000)X +3529(parts.)X +3745(Because)X +4033(we)X +4147(have)X +555 1752(insuf\256cient)N +935(disk)X +1088(space)X +1287(for)X +1401(the)X +1519(larger)X +1727(database,)X +2044(we)X +2158(report)X +2370(results)X +2599(only)X +2761(for)X +2875(the)X +2993(20,000)X +3233(part)X +3378(database.)X +3 f +555 1938(5.2.1.)N +775(Implementation)X +1 f +755 2061(The)N +920(LIBTP)X +1182(implementation)X +1724(of)X +1831(OO1)X +2027(uses)X +2205(the)X +2342(TCL)X +2532([OUST90])X +2914(interface)X +3235(described)X +3582(earlier.)X +3867(The)X +4031(backend)X +555 2151(accepts)N +813(commands)X +1181(over)X +1345(an)X +1442(IP)X +1534(socket)X +1760(and)X +1897(performs)X +2208(the)X +2327(requested)X +2656(database)X +2954(actions.)X +3242(The)X +3387(frontend)X +3679(opens)X +3886(and)X +4022(executes)X +555 2241(a)N +618(TCL)X +796(script.)X +1041(This)X +1210(script)X +1415(contains)X +1709(database)X +2013(accesses)X +2313(interleaved)X +2697(with)X +2866(ordinary)X +3165(program)X +3463(control)X +3716(statements.)X +4120(Data-)X +555 2331(base)N +718(commands)X +1085(are)X +1204(submitted)X +1539(to)X +1621(the)X +1739(backend)X +2027(and)X +2163(results)X +2392(are)X +2511(bound)X +2731(to)X +2813(program)X +3105(variables.)X +755 2454(The)N +903(parts)X +1082(table)X +1261(was)X +1409(stored)X +1628(as)X +1718(a)X +1776(B-tree)X +1999(indexed)X +2275(by)X +2 f +2377(id)X +1 f +2439(.)X +2501(The)X +2648(connection)X +3022(table)X +3200(was)X +3347(stored)X +3565(as)X +3654(a)X +3712(set)X +3823(of)X +3912(\256xed-length)X +555 2544(records)N +824(using)X +1029(the)X +1159(4.4BSD)X +1446(recno)X +1657(access)X +1895(method.)X +2207(In)X +2306(addition,)X +2620(two)X +2771(B-tree)X +3003(indices)X +3261(were)X +3449(maintained)X +3836(on)X +3947(connection)X +555 2634(table)N +732(entries.)X +1007(One)X +1162(index)X +1360(mapped)X +1634(the)X +2 f +1752(from)X +1 f +1923(\256eld)X +2085(to)X +2167(a)X +2223(connection)X +2595(record)X +2821(number,)X +3106(and)X +3242(the)X +3360(other)X +3545(mapped)X +3819(the)X +2 f +3937(to)X +1 f +4019(\256eld)X +4181(to)X +4263(a)X +555 2724(connection)N +932(record)X +1163(number.)X +1473(These)X +1690(indices)X +1941(support)X +2205(fast)X +2345(lookups)X +2622(on)X +2726(connections)X +3133(in)X +3219(both)X +3385(directions.)X +3765(For)X +3900(the)X +4022(traversal)X +555 2814(tests,)N +743(the)X +867(frontend)X +1165(does)X +1338(an)X +1439(index)X +1642(lookup)X +1889(to)X +1976(discover)X +2273(the)X +2396(connected)X +2747(part's)X +2 f +2955(id)X +1 f +3017(,)X +3062(and)X +3203(then)X +3366(does)X +3538(another)X +3804(lookup)X +4051(to)X +4138(fetch)X +555 2904(the)N +673(part)X +818(itself.)X +3 f +555 3090(5.2.2.)N +775(Performance)X +1242(Measurements)X +1766(for)X +1889(OO1)X +1 f +755 3213(We)N +888(compare)X +1186(LIBTP's)X +1487(OO1)X +1664(performance)X +2092(to)X +2174(that)X +2314(reported)X +2602(in)X +2684([CATT91].)X +3087(Those)X +3303(results)X +3532(were)X +3709(collected)X +4019(on)X +4119(a)X +4175(Sun)X +555 3303(3/280)N +759(\(25)X +888(MHz)X +1075(MC68020\))X +1448(with)X +1612(16)X +1714(MBytes)X +1989(of)X +2078(memory)X +2367(and)X +2505(two)X +2647(Hitachi)X +2904(892MByte)X +3267(disks)X +3452(\(15)X +3580(ms)X +3694(average)X +3966(seek)X +4130(time\))X +555 3393(behind)N +793(an)X +889(SMD-4)X +1149(controller.)X +1521(Frontends)X +1861(ran)X +1984(on)X +2084(an)X +2180(8MByte)X +2462(Sun)X +2606(3/260.)X +755 3516(In)N +844(order)X +1036(to)X +1120(measure)X +1410(performance)X +1839(on)X +1941(a)X +1999(machine)X +2293(of)X +2382(roughly)X +2653(equivalent)X +3009(processor)X +3339(power,)X +3582(we)X +3698(ran)X +3822(one)X +3959(set)X +4069(of)X +4157(tests)X +555 3606(on)N +666(a)X +733(standalone)X +1107(MC68030-based)X +1671(HP300)X +1923(\(33MHz)X +2225(MC68030\).)X +2646(The)X +2801(database)X +3108(was)X +3263(stored)X +3489(on)X +3599(a)X +3665(300MByte)X +4037(HP7959)X +555 3696(SCSI)N +744(disk)X +898(\(17)X +1026(ms)X +1139(average)X +1410(seek)X +1573(time\).)X +1802(Since)X +2000(this)X +2135(machine)X +2427(is)X +2500(not)X +2622(connected)X +2968(to)X +3050(a)X +3106(network,)X +3409(we)X +3523(ran)X +3646(local)X +3822(tests)X +3984(where)X +4201(the)X +555 3786(frontend)N +855(and)X +999(backend)X +1295(run)X +1430(on)X +1538(the)X +1664(same)X +1856(machine.)X +2195(We)X +2334(compare)X +2638(these)X +2830(measurements)X +3316(with)X +3485(Cattell's)X +3783(local)X +3966(Sun)X +4117(3/280)X +555 3876(numbers.)N +755 3999(Because)N +1051(the)X +1177(benchmark)X +1562(requires)X +1849(remote)X +2100(access,)X +2354(we)X +2476(ran)X +2607(another)X +2876(set)X +2993(of)X +3088(tests)X +3258(on)X +3365(a)X +3428(DECstation)X +3828(5000/200)X +4157(with)X +555 4089(32M)N +732(of)X +825(memory)X +1118(running)X +1393(Ultrix)X +1610(V4.0)X +1794(and)X +1936(a)X +1998(DEC)X +2184(1GByte)X +2459(RZ57)X +2666(SCSI)X +2859(disk.)X +3057(We)X +3194(compare)X +3496(the)X +3619(local)X +3800(performance)X +4232(of)X +555 4179(OO1)N +734(on)X +837(the)X +958(DECstation)X +1354(to)X +1439(its)X +1536(remote)X +1781(performance.)X +2250(For)X +2383(the)X +2503(remote)X +2748(case,)X +2929(we)X +3045(ran)X +3170(the)X +3290(frontend)X +3584(on)X +3686(a)X +3744(DECstation)X +4139(3100)X +555 4269(with)N +717(16)X +817(MBytes)X +1090(of)X +1177(main)X +1357(memory.)X +755 4392(The)N +900(databases)X +1228(tested)X +1435(in)X +1517([CATT91])X +1880(are)X +10 f +635 4515(g)N +1 f +755(INDEX,)X +1045(a)X +1101(highly-optimized)X +1672(access)X +1898(method)X +2158(package)X +2442(developed)X +2792(at)X +2870(Sun)X +3014(Microsystems.)X +10 f +635 4638(g)N +1 f +755(OODBMS,)X +1137(a)X +1193(beta)X +1347(release)X +1591(of)X +1678(a)X +1734(commercial)X +2133(object-oriented)X +2639(database)X +2936(management)X +3366(system.)X +10 f +635 4761(g)N +1 f +755(RDBMS,)X +1076(a)X +1133(UNIX-based)X +1565(commercial)X +1965(relational)X +2289(data)X +2444(manager)X +2742(at)X +2821(production)X +3189(release.)X +3474(The)X +3620(OO1)X +3797(implementation)X +755 4851(used)N +922(embedded)X +1272(SQL)X +1443(in)X +1525(C.)X +1638(Stored)X +1867(procedures)X +2240(were)X +2417(de\256ned)X +2673(to)X +2755(reduce)X +2990(client-server)X +3412(traf\256c.)X +755 4974(Table)N +974(two)X +1130(shows)X +1366(the)X +1500(measurements)X +1995(from)X +2187([CATT91])X +2566(and)X +2718(LIBTP)X +2976(for)X +3106(a)X +3178(local)X +3370(test)X +3517(on)X +3632(the)X +3765(MC680x0-based)X +555 5064(hardware.)N +915(All)X +1037(caches)X +1272(are)X +1391(cleared)X +1644(before)X +1870(each)X +2038(test.)X +2209(All)X +2331(times)X +2524(are)X +2643(in)X +2725(seconds.)X +755 5187(Table)N +960(two)X +1102(shows)X +1324(that)X +1466(LIBTP)X +1710(outperforms)X +2123(the)X +2242(commercial)X +2642(relational)X +2966(system,)X +3229(but)X +3352(is)X +3426(slower)X +3661(than)X +3820(OODBMS)X +4183(and)X +555 5277(INDEX.)N +872(Since)X +1077(the)X +1202(caches)X +1444(were)X +1628(cleared)X +1888(at)X +1973(the)X +2098(start)X +2263(of)X +2356(each)X +2530(test,)X +2687(disk)X +2846(throughput)X +3223(is)X +3302(critical)X +3551(in)X +3639(this)X +3780(test.)X +3957(The)X +4108(single)X +555 5367(SCSI)N +749(HP)X +877(drive)X +1068(used)X +1241(by)X +1347(LIBTP)X +1595(is)X +1674(approximately)X +2163(13%)X +2336(slower)X +2576(than)X +2739(the)X +2862(disks)X +3051(used)X +3223(in)X +3310([CATT91])X +3678(which)X +3899(accounts)X +4205(for)X +555 5457(part)N +700(of)X +787(the)X +905(difference.)X +755 5580(OODBMS)N +1118(and)X +1255(INDEX)X +1525(outperform)X +1906(LIBTP)X +2148(most)X +2323(dramatically)X +2744(on)X +2844(traversal.)X +3181(This)X +3343(is)X +3416(because)X +3691(we)X +3805(use)X +3932(index)X +4130(look-)X +555 5670(ups)N +689(to)X +774(\256nd)X +921(connections,)X +1347(whereas)X +1634(the)X +1755(other)X +1942(two)X +2084(systems)X +2359(use)X +2488(a)X +2546(link)X +2692(access)X +2920(method.)X +3222(The)X +3369(index)X +3569(requires)X +3850(us)X +3943(to)X +4027(examine)X + +15 p +%%Page: 15 15 +10 s 10 xH 0 xS 1 f +3 f +1 f +10 f +555 679(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +2 f +606 769(Measure)N +1 f +1019(INDEX)X +1389(OODBMS)X +1851(RDBMS)X +2250(LIBTP)X +10 f +555 771(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +555 787(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +1 f +595 869(Lookup)N +1114(5.4)X +1490(12.9)X +1950(27)X +2291(27.2)X +595 959(Traversal)N +1074(13)X +1530(9.8)X +1950(90)X +2291(47.3)X +595 1049(Insert)N +1114(7.4)X +1530(1.5)X +1950(22)X +2331(9.7)X +10 f +555 1059(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +555(c)X +999(c)Y +919(c)Y +839(c)Y +759(c)Y +959 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +1329 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +1791 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2190 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2512 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2618 679(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +2829 769(Measure)N +3401(Cache)X +3726(Local)X +4028(Remote)X +1 f +10 f +2618 771(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2618 787(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 869(Lookup)N +3401(cold)X +3747(15.7)X +4078(20.6)X +3401 959(warm)N +3787(7.8)X +4078(12.4)X +10 f +2618 969(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1059(Forward)N +2950(traversal)X +3401(cold)X +3747(28.4)X +4078(52.6)X +3401 1149(warm)N +3747(23.5)X +4078(47.4)X +10 f +2618 1159(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1249(Backward)N +3004(traversal)X +3401(cold)X +3747(24.2)X +4078(47.4)X +3401 1339(warm)N +3747(24.3)X +4078(47.6)X +10 f +2618 1349(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1439(Insert)N +3401(cold)X +3787(7.5)X +4078(10.3)X +3401 1529(warm)N +3787(6.7)X +4078(10.9)X +10 f +2618 1539(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2618(c)X +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3341 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3666 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3968 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +4309 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3 f +587 1785(Table)N +823(2:)X +931(Local)X +1163(MC680x0)X +1538(Performance)X +2026(of)X +2133(Several)X +587 1875(Systems)N +883(on)X +987(OO1.)X +2667 1785(Table)N +2909(3:)X +3023(Local)X +3260(vs.)X +3397(Remote)X +3707(Performance)X +4200(of)X +2667 1875(LIBTP)N +2926(on)X +3030(OO1.)X +1 f +10 f +555 1998(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 2274(two)N +696(disk)X +850(pages,)X +1074(but)X +1197(the)X +1316(links)X +1492(require)X +1741(only)X +1904(one,)X +2061(regardless)X +2408(of)X +2496(database)X +2794(size.)X +2980(Cattell)X +3214(reports)X +3458(that)X +3599(lookups)X +3873(using)X +4067(B-trees)X +555 2364(instead)N +808(of)X +901(links)X +1082(makes)X +1313(traversal)X +1616(take)X +1776(twice)X +1976(as)X +2069(long)X +2237(in)X +2325(INDEX.)X +2641(Adding)X +2907(a)X +2969(link)X +3119(access)X +3351(method)X +3617(to)X +3 f +3704(db)X +1 f +3792(\(3\))X +3911(or)X +4003(using)X +4201(the)X +555 2454(existing)N +828(hash)X +995(method)X +1255(would)X +1475(apparently)X +1834(be)X +1930(a)X +1986(good)X +2166(idea.)X +755 2577(Both)N +936(OODBMS)X +1304(and)X +1446(INDEX)X +1722(issue)X +1908 0.1944(coarser-granularity)AX +2545(locks)X +2739(than)X +2902(LIBTP.)X +3189(This)X +3356(limits)X +3562(concurrency)X +3985(for)X +4104(multi-)X +555 2667(user)N +711(applications,)X +1140(but)X +1264(helps)X +1455(single-user)X +1829(applications.)X +2278(In)X +2367(addition,)X +2671(the)X +2791(fact)X +2934(that)X +3076(LIBTP)X +3319(releases)X +3595(B-tree)X +3817(locks)X +4007(early)X +4189(is)X +4263(a)X +555 2757(drawback)N +896(in)X +986(OO1.)X +1210(Since)X +1416(there)X +1605(is)X +1686(no)X +1793(concurrency)X +2218(in)X +2307(the)X +2432(benchmark,)X +2836(high-concurrency)X +3430(strategies)X +3760(only)X +3929(show)X +4125(up)X +4232(as)X +555 2847(increased)N +882(locking)X +1145(overhead.)X +1503(Finally,)X +1772(the)X +1892(architecture)X +2294(of)X +2383(the)X +2503(LIBTP)X +2747(implementation)X +3271(was)X +3418(substantially)X +3844(different)X +4143(from)X +555 2937(that)N +702(of)X +796(either)X +1006(OODBMS)X +1375(or)X +1469(INDEX.)X +1786(Both)X +1968(of)X +2062(those)X +2258(systems)X +2538(do)X +2645(the)X +2770(searches)X +3070(in)X +3159(the)X +3284(user's)X +3503(address)X +3771(space,)X +3997(and)X +4139(issue)X +555 3027(requests)N +844(for)X +964(pages)X +1173(to)X +1260(the)X +1383(server)X +1605(process.)X +1911(Pages)X +2123(are)X +2247(cached)X +2496(in)X +2583(the)X +2706(client,)X +2929(and)X +3070(many)X +3273(queries)X +3530(can)X +3667(be)X +3768(satis\256ed)X +4055(without)X +555 3117(contacting)N +910(the)X +1029(server)X +1247(at)X +1326(all.)X +1467(LIBTP)X +1710(submits)X +1979(all)X +2080(the)X +2199(queries)X +2452(to)X +2535(the)X +2653(server)X +2870(process,)X +3151(and)X +3287(receives)X +3571(database)X +3868(records)X +4125(back;)X +555 3207(it)N +619(does)X +786(no)X +886(client)X +1084(caching.)X +755 3330(The)N +911(RDBMS)X +1221(architecture)X +1632(is)X +1716(much)X +1925(closer)X +2148(to)X +2241(that)X +2392(of)X +2490(LIBTP.)X +2783(A)X +2872(server)X +3100(process)X +3372(receives)X +3667(queries)X +3930(and)X +4076(returns)X +555 3420(results)N +786(to)X +870(a)X +928(client.)X +1168(The)X +1315(timing)X +1545(results)X +1776(in)X +1860(table)X +2038(two)X +2180(clearly)X +2421(show)X +2612(that)X +2754(the)X +2874(conventional)X +3309(database)X +3607(client/server)X +4025(model)X +4246(is)X +555 3510(expensive.)N +941(LIBTP)X +1188(outperforms)X +1605(the)X +1728(RDBMS)X +2032(on)X +2136(traversal)X +2437(and)X +2577(insertion.)X +2921(We)X +3057(speculate)X +3380(that)X +3524(this)X +3663(is)X +3740(due)X +3880(in)X +3966(part)X +4115(to)X +4201(the)X +555 3600(overhead)N +870(of)X +957(query)X +1160(parsing,)X +1436(optimization,)X +1880(and)X +2016(repeated)X +2309(interpretation)X +2761(of)X +2848(the)X +2966(plan)X +3124(tree)X +3265(in)X +3347(the)X +3465(RDBMS')X +3791(query)X +3994(executor.)X +755 3723(Table)N +962(three)X +1147(shows)X +1371(the)X +1492(differences)X +1873(between)X +2164(local)X +2343(and)X +2482(remote)X +2728(execution)X +3063(of)X +3153(LIBTP's)X +3456(OO1)X +3635(implementation)X +4160(on)X +4263(a)X +555 3813(DECstation.)N +989(We)X +1122(measured)X +1451(performance)X +1879(with)X +2042(a)X +2099(populated)X +2436(\(warm\))X +2694(cache)X +2899(and)X +3036(an)X +3133(empty)X +3354(\(cold\))X +3567(cache.)X +3812(Reported)X +4126(times)X +555 3903(are)N +681(the)X +806(means)X +1037(of)X +1130(twenty)X +1374(tests,)X +1562(and)X +1704(are)X +1829(in)X +1917(seconds.)X +2237(Standard)X +2548(deviations)X +2903(were)X +3086(within)X +3316(seven)X +3525(percent)X +3788(of)X +3881(the)X +4005(mean)X +4205(for)X +555 3993(remote,)N +818(and)X +954(two)X +1094(percent)X +1351(of)X +1438(the)X +1556(mean)X +1750(for)X +1864(local.)X +755 4116(The)N +914(20ms)X +1121(overhead)X +1450(of)X +1551(TCP/IP)X +1824(on)X +1938(an)X +2048(Ethernet)X +2354(entirely)X +2633(accounts)X +2948(for)X +3076(the)X +3207(difference)X +3567(in)X +3662(speed.)X +3918(The)X +4076(remote)X +555 4206(traversal)N +857(times)X +1055(are)X +1179(nearly)X +1405(double)X +1648(the)X +1771(local)X +1952(times)X +2150(because)X +2430(we)X +2549(do)X +2653(index)X +2855(lookups)X +3132(and)X +3272(part)X +3421(fetches)X +3673(in)X +3759(separate)X +4047(queries.)X +555 4296(It)N +629(would)X +854(make)X +1053(sense)X +1252(to)X +1339(do)X +1444(indexed)X +1723(searches)X +2021(on)X +2126(the)X +2248(server,)X +2489(but)X +2615(we)X +2733(were)X +2914(unwilling)X +3244(to)X +3330(hard-code)X +3676(knowledge)X +4052(of)X +4143(OO1)X +555 4386(indices)N +803(into)X +948(our)X +1075(LIBTP)X +1317(TCL)X +1488(server.)X +1745(Cold)X +1920(and)X +2056(warm)X +2259(insertion)X +2559(times)X +2752(are)X +2871(identical)X +3167(since)X +3352(insertions)X +3683(do)X +3783(not)X +3905(bene\256t)X +4143(from)X +555 4476(caching.)N +755 4599(One)N +915(interesting)X +1279(difference)X +1632(shown)X +1867(by)X +1973(table)X +2155(three)X +2342(is)X +2421(the)X +2545(cost)X +2700(of)X +2793(forward)X +3074(versus)X +3305(backward)X +3644(traversal.)X +3987(When)X +4205(we)X +555 4689(built)N +725(the)X +847(database,)X +1168(we)X +1285(inserted)X +1562(parts)X +1741(in)X +1826(part)X +2 f +1974(id)X +1 f +2059(order.)X +2292(We)X +2427(built)X +2596(the)X +2717(indices)X +2967(at)X +3048(the)X +3169(same)X +3357(time.)X +3562(Therefore,)X +3923(the)X +4044(forward)X +555 4779(index)N +757(had)X +897(keys)X +1068(inserted)X +1346(in)X +1432(order,)X +1646(while)X +1848(the)X +1970(backward)X +2307(index)X +2509(had)X +2649(keys)X +2820(inserted)X +3098(more)X +3286(randomly.)X +3656(In-order)X +3943(insertion)X +4246(is)X +555 4885(pessimal)N +858(for)X +975(B-tree)X +1199(indices,)X +1469(so)X +1563(the)X +1684(forward)X +1962(index)X +2163(is)X +2239(much)X +2440(larger)X +2651(than)X +2812(the)X +2933(backward)X +3269(one)X +7 s +3385 4853(5)N +10 s +4885(.)Y +3476(This)X +3640(larger)X +3850(size)X +3997(shows)X +4219(up)X +555 4975(as)N +642(extra)X +823(disk)X +976(reads)X +1166(in)X +1248(the)X +1366(cold)X +1524(benchmark.)X +3 f +555 5161(6.)N +655(Conclusions)X +1 f +755 5284(LIBTP)N +1006(provides)X +1311(the)X +1438(basic)X +1632(building)X +1927(blocks)X +2165(to)X +2256(support)X +2525(transaction)X +2906(protection.)X +3300(In)X +3396(comparison)X +3799(with)X +3970(traditional)X +555 5374(Unix)N +746(libraries)X +1040(and)X +1187(commercial)X +1597(systems,)X +1900(it)X +1974(offers)X +2192(a)X +2258(variety)X +2511(of)X +2608(tradeoffs.)X +2964(Using)X +3185(complete)X +3509(transaction)X +3891(protection)X +4246(is)X +555 5464(more)N +747(complicated)X +1166(than)X +1331(simply)X +1575(adding)X +3 f +1820(fsync)X +1 f +1998(\(2\))X +2119(and)X +3 f +2262(\257ock)X +1 f +2426(\(2\))X +2547(calls)X +2721(to)X +2810(code,)X +3008(but)X +3136(it)X +3206(is)X +3285(faster)X +3490(in)X +3578(some)X +3773(cases)X +3969(and)X +4111(offers)X +8 s +10 f +555 5536(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5614(5)N +8 s +763 5639(The)N +878(next)X +1004(release)X +1196(of)X +1265(the)X +1359(4.4BSD)X +1580(access)X +1758(method)X +1966(will)X +2082(automatically)X +2446(detect)X +2614(and)X +2722(compensate)X +3039(for)X +3129(in-order)X +3350(insertion,)X +3606(eliminating)X +3914(this)X +4023(problem.)X + +16 p +%%Page: 16 16 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(stricter)N +801(guarantees)X +1168(\(atomicity,)X +1540(consistency,)X +1957(isolation,)X +2275(and)X +2414(durability\).)X +2815(If)X +2892(the)X +3013(data)X +3170(to)X +3255(be)X +3354(protected)X +3676(are)X +3798(already)X +4058(format-)X +555 720(ted)N +675(\()X +2 f +702(i.e.)X +1 f +821(use)X +949(one)X +1086(of)X +1174(the)X +1293(database)X +1591(access)X +1818(methods\),)X +2157(then)X +2316(adding)X +2555(transaction)X +2928(protection)X +3274(requires)X +3554(no)X +3655(additional)X +3996(complex-)X +555 810(ity,)N +679(but)X +801(incurs)X +1017(a)X +1073(performance)X +1500(penalty)X +1756(of)X +1843(approximately)X +2326(15%.)X +755 933(In)N +844(comparison)X +1240(with)X +1404(commercial)X +1805(database)X +2104(systems,)X +2399(the)X +2519(tradeoffs)X +2827(are)X +2948(more)X +3135(complex.)X +3473(LIBTP)X +3717(does)X +3886(not)X +4009(currently)X +555 1023(support)N +825(a)X +891(standard)X +1193(query)X +1406(language.)X +1766(The)X +1921(TCL-based)X +2312(server)X +2539(process)X +2810(allows)X +3049(a)X +3115(certain)X +3364(ease)X +3533(of)X +3630(use)X +3767(which)X +3993(would)X +4223(be)X +555 1113(enhanced)N +882(with)X +1047(a)X +1106(more)X +1294(user-friendly)X +1732(interface)X +2037(\()X +2 f +2064(e.g.)X +1 f +2203(a)X +2261(windows)X +2572(based)X +2777(query-by-form)X +3272(application\),)X +3697(for)X +3813(which)X +4031(we)X +4147(have)X +555 1203(a)N +620(working)X +916(prototype.)X +1292(When)X +1513(accesses)X +1815(do)X +1924(not)X +2055(require)X +2312(sophisticated)X +2758(query)X +2969(processing,)X +3360(the)X +3486(TCL)X +3665(interface)X +3975(is)X +4056(an)X +4160(ade-)X +555 1293(quate)N +756(solution.)X +1080(What)X +1281(LIBTP)X +1529(fails)X +1693(to)X +1781(provide)X +2052(in)X +2140(functionality,)X +2595(it)X +2665(makes)X +2896(up)X +3002(for)X +3122(in)X +3210(performance)X +3643(and)X +3785(\257exibility.)X +4161(Any)X +555 1383(application)N +931(may)X +1089(make)X +1283(use)X +1410(of)X +1497(its)X +1592(record)X +1818(interface)X +2120(or)X +2207(the)X +2325(more)X +2510(primitive)X +2823(log,)X +2965(lock,)X +3143(and)X +3279(buffer)X +3496(calls.)X +755 1506(Future)N +987(work)X +1175(will)X +1322(focus)X +1519(on)X +1621(overcoming)X +2026(some)X +2217(of)X +2306(the)X +2426(areas)X +2614(in)X +2698(which)X +2916(LIBTP)X +3160(is)X +3235(currently)X +3547(de\256cient)X +3845(and)X +3983(extending)X +555 1596(its)N +652(transaction)X +1026(model.)X +1288(The)X +1435(addition)X +1719(of)X +1808(an)X +1905(SQL)X +2077(parser)X +2295(and)X +2432(forms)X +2640(front)X +2817(end)X +2954(will)X +3099(improve)X +3387(the)X +3506(system's)X +3807(ease)X +3967(of)X +4055(use)X +4183(and)X +555 1686(make)N +750(it)X +815(more)X +1001(competitive)X +1400(with)X +1563(commercial)X +1963(systems.)X +2277(In)X +2365(the)X +2484(long)X +2647(term,)X +2835(we)X +2950(would)X +3170(like)X +3310(to)X +3392(add)X +3528(generalized)X +3919(hierarchical)X +555 1776(locking,)N +836(nested)X +1062(transactions,)X +1486(parallel)X +1748(transactions,)X +2171(passing)X +2431(of)X +2518(transactions)X +2921(between)X +3209(processes,)X +3557(and)X +3693(distributed)X +4055(commit)X +555 1866(handling.)N +900(In)X +992(the)X +1115(short)X +1300(term,)X +1492(the)X +1614(next)X +1776(step)X +1929(is)X +2006(to)X +2092(integrate)X +2397(LIBTP)X +2643(with)X +2809(the)X +2931(most)X +3110(recent)X +3331(release)X +3579(of)X +3670(the)X +3792(database)X +4093(access)X +555 1956(routines)N +833(and)X +969(make)X +1163(it)X +1227(freely)X +1435(available)X +1745(via)X +1863(anonymous)X +2252(ftp.)X +3 f +555 2142(7.)N +655(Acknowledgements)X +1 f +755 2265(We)N +888(would)X +1109(like)X +1250(to)X +1332(thank)X +1530(John)X +1701(Wilkes)X +1948(and)X +2084(Carl)X +2242(Staelin)X +2484(of)X +2571(Hewlett-Packard)X +3131(Laboratories)X +3557(and)X +3693(Jon)X +3824(Krueger.)X +4148(John)X +555 2355(and)N +694(Carl)X +855(provided)X +1162(us)X +1255(with)X +1419(an)X +1517(extra)X +1700(disk)X +1855(for)X +1971(the)X +2091(HP)X +2215(testbed)X +2464(less)X +2606(than)X +2766(24)X +2868(hours)X +3068(after)X +3238(we)X +3354(requested)X +3684(it.)X +3770(Jon)X +3903(spent)X +4094(count-)X +555 2445(less)N +699(hours)X +901(helping)X +1164(us)X +1258(understand)X +1633(the)X +1754(intricacies)X +2107(of)X +2197(commercial)X +2599(database)X +2899(products)X +3198(and)X +3337(their)X +3507(behavior)X +3811(under)X +4017(a)X +4076(variety)X +555 2535(of)N +642(system)X +884(con\256gurations.)X +3 f +555 2721(8.)N +655(References)X +1 f +555 2901([ANDR89])N +942(Andrade,)X +1265(J.,)X +1361(Carges,)X +1629(M.,)X +1765(Kovach,)X +2060(K.,)X +2183(``Building)X +2541(an)X +2642(On-Line)X +2939(Transaction)X +3343(Processing)X +3715(System)X +3975(On)X +4098(UNIX)X +727 2991(System)N +982(V'',)X +2 f +1134(CommUNIXations)X +1 f +1725(,)X +1765 0.2188(November/December)AX +2477(1989.)X +555 3171([BAY77])N +878(Bayer,)X +1110(R.,)X +1223(Schkolnick,)X +1623(M.,)X +1754(``Concurrency)X +2243(of)X +2330(Operations)X +2702(on)X +2802(B-Trees'',)X +2 f +3155(Acta)X +3322(Informatica)X +1 f +3700(,)X +3740(1977.)X +555 3351([BERN80])N +936(Bernstein,)X +1297(P.,)X +1415(Goodman,)X +1785(N.,)X +1917(``Timestamp)X +2365(Based)X +2595(Algorithms)X +2992(for)X +3119(Concurrency)X +3567(Control)X +3844(in)X +3939(Distributed)X +727 3441(Database)N +1042(Systems'',)X +2 f +1402(Proceedings)X +1823(6th)X +1945(International)X +2387(Conference)X +2777(on)X +2877(Very)X +3049(Large)X +3260(Data)X +3440(Bases)X +1 f +3627(,)X +3667(October)X +3946(1980.)X +555 3621([BSD91])N +864(DB\(3\),)X +2 f +1109(4.4BSD)X +1376(Unix)X +1552(Programmer's)X +2044(Manual)X +2313(Reference)X +2655(Guide)X +1 f +2851(,)X +2891(University)X +3249(of)X +3336(California,)X +3701(Berkeley,)X +4031(1991.)X +555 3801([CATT91])N +923(Cattell,)X +1181(R.G.G.,)X +1455(``An)X +1632(Engineering)X +2049(Database)X +2369(Benchmark'',)X +2 f +2838(The)X +2983(Benchmark)X +3373(Handbook)X +3731(for)X +3848(Database)X +4179(and)X +727 3891(Transaction)N +1133(Processing)X +1509(Systems)X +1 f +1763(,)X +1803(J.)X +1874(Gray,)X +2075(editor,)X +2302(Morgan)X +2576(Kaufman)X +2895(1991.)X +555 4071([CHEN91])N +929(Cheng,)X +1180(E.,)X +1291(Chang,)X +1542(E.,)X +1653(Klein,)X +1872(J.,)X +1964(Lee,)X +2126(D.,)X +2245(Lu,)X +2375(E.,)X +2485(Lutgardo,)X +2820(A.,)X +2939(Obermarck,)X +3342(R.,)X +3456(``An)X +3629(Open)X +3824(and)X +3961(Extensible)X +727 4161(Event-Based)N +1157(Transaction)X +1556(Manager'',)X +2 f +1936(Proceedings)X +2357(1991)X +2537(Summer)X +2820(Usenix)X +1 f +3043(,)X +3083(Nashville,)X +3430(TN,)X +3577(June)X +3744(1991.)X +555 4341([CHOU85])N +943(Chou,)X +1163(H.,)X +1288(DeWitt,)X +1570(D.,)X +1694(``An)X +1872(Evaluation)X +2245(of)X +2338(Buffer)X +2574(Management)X +3019(Strategies)X +3361(for)X +3481(Relational)X +3836(Database)X +4157(Sys-)X +727 4431(tems'',)N +2 f +972(Proceedings)X +1393(of)X +1475(the)X +1593(11th)X +1755(International)X +2197(Conference)X +2587(on)X +2687(Very)X +2859(Large)X +3070(Databases)X +1 f +3408(,)X +3448(1985.)X +555 4611([DEWI84])N +925(DeWitt,)X +1207(D.,)X +1331(Katz,)X +1529(R.,)X +1648(Olken,)X +1890(F.,)X +2000(Shapiro,)X +2295(L.,)X +2410(Stonebraker,)X +2843(M.,)X +2979(Wood,)X +3220(D.,)X +3343(``Implementation)X +3929(Techniques)X +727 4701(for)N +841(Main)X +1030(Memory)X +1326(Database)X +1641(Systems'',)X +2 f +2001(Proceedings)X +2422(of)X +2504(SIGMOD)X +1 f +2812(,)X +2852(pp.)X +2972(1-8,)X +3119(June)X +3286(1984.)X +555 4881([GRAY76])N +944(Gray,)X +1153(J.,)X +1252(Lorie,)X +1474(R.,)X +1595(Putzolu,)X +1887(F.,)X +1999(and)X +2143(Traiger,)X +2428(I.,)X +2522(``Granularity)X +2973(of)X +3067(locks)X +3263(and)X +3406(degrees)X +3679(of)X +3773(consistency)X +4174(in)X +4263(a)X +727 4971(large)N +909(shared)X +1140(data)X +1295(base'',)X +2 f +1533(Modeling)X +1861(in)X +1944(Data)X +2125(Base)X +2301(Management)X +2740(Systems)X +1 f +2994(,)X +3034(Elsevier)X +3317(North)X +3524(Holland,)X +3822(New)X +3994(York,)X +4199(pp.)X +727 5061(365-394.)N +555 5241([HAER83])N +931(Haerder,)X +1235(T.)X +1348(Reuter,)X +1606(A.)X +1728(``Principles)X +2126(of)X +2217(Transaction-Oriented)X +2928(Database)X +3246(Recovery'',)X +2 f +3651(Computing)X +4029(Surveys)X +1 f +4279(,)X +727 5331(15\(4\);)N +943(237-318,)X +1250(1983.)X +555 5511([KUNG81])N +943(Kung,)X +1162(H.)X +1261(T.,)X +1371(Richardson,)X +1777(J.,)X +1869(``On)X +2042(Optimistic)X +2400(Methods)X +2701(for)X +2816(Concurrency)X +3252(Control'',)X +2 f +3591(ACM)X +3781(Transactions)X +4219(on)X +727 5601(Database)N +1054(Systems)X +1 f +1328(6\(2\);)X +1504(213-226,)X +1811(1981.)X + +17 p +%%Page: 17 17 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630([LEHM81])N +939(Lehman,)X +1245(P.,)X +1352(Yao,)X +1529(S.,)X +1636(``Ef\256cient)X +1989(Locking)X +2279(for)X +2396(Concurrent)X +2780(Operations)X +3155(on)X +3258(B-trees'',)X +2 f +3587(ACM)X +3779(Transactions)X +4219(on)X +727 720(Database)N +1054(Systems)X +1 f +1308(,)X +1348(6\(4\),)X +1522(December)X +1873(1981.)X +555 900([MOHA91])N +964(Mohan,)X +1241(C.,)X +1364(Pirahesh,)X +1690(H.,)X +1818(``ARIES-RRH:)X +2366(Restricted)X +2721(Repeating)X +3076(of)X +3173(History)X +3442(in)X +3533(the)X +3660(ARIES)X +3920(Transaction)X +727 990(Recovery)N +1055(Method'',)X +2 f +1398(Proceedings)X +1819(7th)X +1941(International)X +2383(Conference)X +2773(on)X +2873(Data)X +3053(Engineering)X +1 f +3449(,)X +3489(Kobe,)X +3703(Japan,)X +3926(April)X +4115(1991.)X +555 1170([NODI90])N +914(Nodine,)X +1194(M.,)X +1328(Zdonik,)X +1602(S.,)X +1709(``Cooperative)X +2178(Transaction)X +2580(Hierarchies:)X +2996(A)X +3077(Transaction)X +3479(Model)X +3711(to)X +3796(Support)X +4072(Design)X +727 1260(Applications'',)N +2 f +1242(Proceedings)X +1675(16th)X +1849(International)X +2303(Conference)X +2704(on)X +2815(Very)X +2998(Large)X +3220(Data)X +3411(Bases)X +1 f +3598(,)X +3649(Brisbane,)X +3985(Australia,)X +727 1350(August)N +978(1990.)X +555 1530([OUST90])N +923(Ousterhout,)X +1324(J.,)X +1420(``Tcl:)X +1648(An)X +1771(Embeddable)X +2197(Command)X +2555(Language'',)X +2 f +2971(Proceedings)X +3396(1990)X +3580(Winter)X +3822(Usenix)X +1 f +4045(,)X +4089(Wash-)X +727 1620(ington,)N +971(D.C.,)X +1162(January)X +1432(1990.)X +555 1800([POSIX91])N +955(``Unapproved)X +1441(Draft)X +1645(for)X +1773(Realtime)X +2096(Extension)X +2450(for)X +2578(Portable)X +2879(Operating)X +3234(Systems'',)X +3608(Draft)X +3812(11,)X +3946(October)X +4239(7,)X +727 1890(1991,)N +927(IEEE)X +1121(Computer)X +1461(Society.)X +555 2070([ROSE91])N +925(Rosenblum,)X +1341(M.,)X +1484(Ousterhout,)X +1892(J.,)X +1995(``The)X +2206(Design)X +2464(and)X +2611(Implementation)X +3149(of)X +3247(a)X +3314(Log-Structured)X +3835(File)X +3990(System'',)X +2 f +727 2160(Proceedings)N +1148(of)X +1230(the)X +1348(13th)X +1510(Symposium)X +1895(on)X +1995(Operating)X +2344(Systems)X +2618(Principles)X +1 f +2947(,)X +2987(1991.)X +555 2340([SELT91])N +904(Seltzer,)X +1171(M.,)X +1306(Stonebraker,)X +1738(M.,)X +1873(``Read)X +2116(Optimized)X +2478(File)X +2626(Systems:)X +2938(A)X +3020(Performance)X +3454(Evaluation'',)X +2 f +3898(Proceedings)X +727 2430(7th)N +849(Annual)X +1100(International)X +1542(Conference)X +1932(on)X +2032(Data)X +2212(Engineering)X +1 f +2608(,)X +2648(Kobe,)X +2862(Japan,)X +3085(April)X +3274(1991.)X +555 2610([SPEC88])N +907(Spector,)X +1200(Rausch,)X +1484(Bruell,)X +1732(``Camelot:)X +2107(A)X +2192(Flexible,)X +2501(Distributed)X +2888(Transaction)X +3294(Processing)X +3668(System'',)X +2 f +4004(Proceed-)X +727 2700(ings)N +880(of)X +962(Spring)X +1195(COMPCON)X +1606(1988)X +1 f +(,)S +1806(February)X +2116(1988.)X +555 2880([SQL86])N +862(American)X +1201(National)X +1499(Standards)X +1836(Institute,)X +2139(``Database)X +2509(Language)X +2847(SQL'',)X +3093(ANSI)X +3301(X3.135-1986)X +3747(\(ISO)X +3924(9075\),)X +4152(May)X +727 2970(1986.)N +555 3150([STON81])N +919(Stonebraker,)X +1348(M.,)X +1480(``Operating)X +1876(System)X +2132(Support)X +2406(for)X +2520(Database)X +2835(Management'',)X +2 f +3348(Communications)X +3910(of)X +3992(the)X +4110(ACM)X +1 f +4279(,)X +727 3240(1981.)N +555 3420([SULL92])N +925(Sullivan,)X +1247(M.,)X +1394(Olson,)X +1641(M.,)X +1788(``An)X +1976(Index)X +2195(Implementation)X +2737(Supporting)X +3127(Fast)X +3295(Recovery)X +3638(for)X +3767(the)X +3900(POSTGRES)X +727 3510(Storage)N +1014(System'',)X +1365(to)X +1469(appear)X +1726(in)X +2 f +1830(Proceedings)X +2272(8th)X +2415(Annual)X +2687(International)X +3150(Conference)X +3561(on)X +3682(Data)X +3883(Engineering)X +1 f +4279(,)X +727 3600(Tempe,)N +990(Arizona,)X +1289(February)X +1599(1992.)X +555 3780([TPCB90])N +914(Transaction)X +1319(Processing)X +1692(Performance)X +2129(Council,)X +2428(``TPC)X +2653(Benchmark)X +3048(B'',)X +3200(Standard)X +3510(Speci\256cation,)X +3973(Waterside)X +727 3870(Associates,)N +1110(Fremont,)X +1421(CA.,)X +1592(1990.)X +555 4050([YOUN91])N +947(Young,)X +1211(M.)X +1328(W.,)X +1470(Thompson,)X +1858(D.)X +1962(S.,)X +2072(Jaffe,)X +2274(E.,)X +2388(``A)X +2525(Modular)X +2826(Architecture)X +3253(for)X +3372(Distributed)X +3757(Transaction)X +4161(Pro-)X +727 4140(cessing'',)N +2 f +1057(Proceedings)X +1478(1991)X +1658(Winter)X +1896(Usenix)X +1 f +2119(,)X +2159(Dallas,)X +2404(TX,)X +2551(January)X +2821(1991.)X +3 f +755 4263(Margo)N +1008(I.)X +1080(Seltzer)X +1 f +1338(is)X +1411(a)X +1467(Ph.D.)X +1669(student)X +1920(in)X +2002(the)X +2120(Department)X +2519(of)X +2606(Electrical)X +2934(Engineering)X +3346(and)X +3482(Computer)X +3822(Sciences)X +4123(at)X +4201(the)X +555 4353(University)N +919(of)X +1012(California,)X +1383(Berkeley.)X +1739(Her)X +1886(research)X +2181(interests)X +2474(include)X +2735(\256le)X +2862(systems,)X +3160(databases,)X +3513(and)X +3654(transaction)X +4031(process-)X +555 4443(ing)N +686(systems.)X +1008(She)X +1157(spent)X +1355(several)X +1612(years)X +1811(working)X +2107(at)X +2194(startup)X +2441(companies)X +2813(designing)X +3153(and)X +3298(implementing)X +3771(\256le)X +3902(systems)X +4183(and)X +555 4533(transaction)N +929(processing)X +1294(software)X +1592(and)X +1729(designing)X +2061(microprocessors.)X +2648(Ms.)X +2791(Seltzer)X +3035(received)X +3329(her)X +3453(AB)X +3585(in)X +3668(Applied)X +3947(Mathemat-)X +555 4623(ics)N +664(from)X +840 0.1953(Harvard/Radcliffe)AX +1445(College)X +1714(in)X +1796(1983.)X +755 4746(In)N +845(her)X +971(spare)X +1163(time,)X +1347(Margo)X +1583(can)X +1717(usually)X +1970(be)X +2068(found)X +2277(preparing)X +2607(massive)X +2887(quantities)X +3220(of)X +3309(food)X +3478(for)X +3594(hungry)X +3843(hordes,)X +4099(study-)X +555 4836(ing)N +677(Japanese,)X +1003(or)X +1090(playing)X +1350(soccer)X +1576(with)X +1738(an)X +1834(exciting)X +2112(Bay)X +2261(Area)X +2438(Women's)X +2770(Soccer)X +3009(team,)X +3205(the)X +3323(Berkeley)X +3633(Bruisers.)X +3 f +755 5049(Michael)N +1056(A.)X +1159(Olson)X +1 f +1383(is)X +1461(a)X +1522(Master's)X +1828(student)X +2084(in)X +2170(the)X +2292(Department)X +2695(of)X +2786(Electrical)X +3118(Engineering)X +3534(and)X +3674(Computer)X +4018(Sciences)X +555 5139(at)N +645(the)X +774(University)X +1143(of)X +1241(California,)X +1617(Berkeley.)X +1978(His)X +2120(primary)X +2405(interests)X +2703(are)X +2833(database)X +3141(systems)X +3425(and)X +3572(mass)X +3763(storage)X +4026(systems.)X +555 5229(Mike)N +759(spent)X +963(two)X +1118(years)X +1323(working)X +1625(for)X +1754(a)X +1825(commercial)X +2239(database)X +2551(system)X +2808(vendor)X +3066(before)X +3307(joining)X +3567(the)X +3699(Postgres)X +4004(Research)X +555 5319(Group)N +780(at)X +858(Berkeley)X +1168(in)X +1250(1988.)X +1470(He)X +1584(received)X +1877(his)X +1990(B.A.)X +2161(in)X +2243(Computer)X +2583(Science)X +2853(from)X +3029(Berkeley)X +3339(in)X +3421(May)X +3588(1991.)X +755 5442(Mike)N +945(only)X +1108(recently)X +1388(transferred)X +1758(into)X +1903(Sin)X +2030(City,)X +2208(but)X +2330(is)X +2403(rapidly)X +2650(adopting)X +2950(local)X +3126(customs)X +3408(and)X +3544(coloration.)X +3929(In)X +4016(his)X +4129(spare)X +555 5532(time,)N +742(he)X +843(organizes)X +1176(informal)X +1477(Friday)X +1711(afternoon)X +2043(study)X +2240(groups)X +2482(to)X +2568(discuss)X +2823(recent)X +3044(technical)X +3358(and)X +3498(economic)X +3834(developments.)X +555 5622(Among)N +815(his)X +928(hobbies)X +1197(are)X +1316(Charles)X +1581(Dickens,)X +1884(Red)X +2033(Rock,)X +2242(and)X +2378(speaking)X +2683(Dutch)X +2899(to)X +2981(anyone)X +3233(who)X +3391(will)X +3535(permit)X +3764(it.)X + +17 p +%%Trailer +xt + +xs + diff --git a/src/util/db2/docs/mpool.3.ps b/src/util/db2/docs/mpool.3.ps new file mode 100644 index 0000000000..816c9243c8 --- /dev/null +++ b/src/util/db2/docs/mpool.3.ps @@ -0,0 +1,320 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.08 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.08 0 +%%Pages: 2 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.08 0 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 def/PL +792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron/scaron/zcaron +/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/space +/exclam/quotedbl/numbersign/dollar/percent/ampersand/quoteright/parenleft +/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four +/five/six/seven/eight/nine/colon/semicolon/less/equal/greater/question/at/A/B/C +/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash +/bracketright/circumflex/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q +/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase +/guillemotleft/guillemotright/bullet/florin/fraction/perthousand/dagger +/daggerdbl/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar +/section/dieresis/copyright/ordfeminine/guilsinglleft/logicalnot/minus +/registered/macron/degree/plusminus/twosuperior/threesuperior/acute/mu +/paragraph/periodcentered/cedilla/onesuperior/ordmasculine/guilsinglright +/onequarter/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde +/Adieresis/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute +/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls +/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute +/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve +/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex +/udieresis/yacute/thorn/ydieresis]def/Times-Italic@0 ENC0/Times-Italic RE +/Times-Bold@0 ENC0/Times-Bold RE/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 129.01(MPOOL\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 129.01(anual MPOOL\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 84 S +(ME).18 E F0(mpool \255 shared memory b)108 96 Q(uf)-.2 E(fer pool)-.25 E F1 +(SYNOPSIS)72 112.8 Q/F2 10/Times-Bold@0 SF(#include <db)108 124.8 Q(.h>)-.4 E +(#include <mpool.h>)108 136.8 Q(MPOOL *)108 160.8 Q(mpool_open \(DBT *k)108 +172.8 Q(ey)-.1 E 2.5(,i)-.55 G(nt fd, pgno_t pagesize, pgno_t maxcache\);) +216.25 172.8 Q -.1(vo)108 196.8 S(id).1 E(mpool_\214lter \(MPOOL *mp, v)108 +208.8 Q(oid \(*pgin\)\(v)-.1 E(oid *, pgno_t, v)-.1 E(oid *\),)-.1 E -.1(vo)158 +220.8 S(id \(*pgout\)\(v).1 E(oid *, pgno_t, v)-.1 E(oid *\), v)-.1 E +(oid *pgcookie\);)-.1 E -.1(vo)108 244.8 S(id *).1 E +(mpool_new \(MPOOL *mp, pgno_t *pgnoaddr\);)108 256.8 Q -.1(vo)108 280.8 S +(id *).1 E(mpool_get \(MPOOL *mp, pgno_t pgno, u_int \215ags\);)108 292.8 Q +(int)108 316.8 Q(mpool_put \(MPOOL *mp, v)108 328.8 Q(oid *pgaddr)-.1 E 2.5(,u) +-.92 G(_int \215ags\);)290.62 328.8 Q(int)108 352.8 Q +(mpool_sync \(MPOOL *mp\);)108 364.8 Q(int)108 388.8 Q +(mpool_close \(MPOOL *mp\);)108 400.8 Q F1(DESCRIPTION)72 417.6 Q/F3 10 +/Times-Italic@0 SF(Mpool)108 429.6 Q F0 1.013(is the library interf)3.513 F +1.013(ace intended to pro)-.1 F 1.013(vide page oriented b)-.15 F(uf)-.2 E +1.012(fer management of \214les.)-.25 F 1.012(The b)6.012 F(uf)-.2 E(fers)-.25 +E(may be shared between processes.)108 441.6 Q .416(The function)108 458.4 R F3 +(mpool_open)2.916 E F0 .417(initializes a memory pool.)2.917 F(The)5.417 E F3 +-.1(ke)2.917 G(y)-.2 E F0(ar)2.917 E .417(gument is the byte string used to ne) +-.18 F(gotiate)-.15 E .697(between multiple processes wishing to share b)108 +470.4 R(uf)-.2 E 3.196(fers. If)-.25 F .696(the \214le b)3.196 F(uf)-.2 E .696 +(fers are mapped in shared memory)-.25 F 3.196(,a)-.65 G(ll)534.44 470.4 Q .894 +(processes using the same k)108 482.4 R 1.194 -.15(ey w)-.1 H .894 +(ill share the b).15 F(uf)-.2 E 3.394(fers. If)-.25 F F3 -.1(ke)3.394 G(y)-.2 E +F0 .895(is NULL, the b)3.395 F(uf)-.2 E .895(fers are mapped into pri)-.25 F +-.25(va)-.25 G(te).25 E(memory)108 494.4 Q 5.116(.T)-.65 G(he)154.406 494.4 Q +F3(fd)2.616 E F0(ar)2.616 E .115(gument is a \214le descriptor for the underly\ +ing \214le, which must be seekable.)-.18 F(If)5.115 E F3 -.1(ke)2.615 G(y)-.2 E +F0 .115(is non-)2.615 F(NULL and matches a \214le already being mapped, the)108 +506.4 Q F3(fd)2.5 E F0(ar)2.5 E(gument is ignored.)-.18 E(The)108 523.2 Q F3 +(pa)3.328 E -.1(ge)-.1 G(size).1 E F0(ar)3.329 E .829 +(gument is the size, in bytes, of the pages into which the \214le is brok)-.18 +F .829(en up.)-.1 F(The)5.829 E F3(maxcac)3.329 E(he)-.15 E F0(ar)108 535.2 Q +.153(gument is the maximum number of pages from the underlying \214le to cache\ + at an)-.18 F 2.653(yo)-.15 G .153(ne time.)451.308 535.2 R .153(This v)5.153 F +.153(alue is)-.25 F .099(not relati)108 547.2 R .399 -.15(ve t)-.25 H 2.599(ot) +.15 G .099(he number of processes which share a \214le')168.727 547.2 R 2.6(sb) +-.55 G(uf)350.39 547.2 Q .1(fers, b)-.25 F .1(ut will be the lar)-.2 F .1 +(gest v)-.18 F .1(alue speci\214ed by)-.25 F(an)108 559.2 Q 2.5(yo)-.15 G 2.5 +(ft)129.79 559.2 S(he processes sharing the \214le.)138.4 559.2 Q(The)108 576 Q +F3(mpool_\214lter)3.254 E F0 .754(function is intended to mak)3.254 F 3.254(et) +-.1 G .754(ransparent input and output processing of the pages possi-)301.778 +576 R 3.095(ble. If)108 588 R(the)3.095 E F3(pgin)3.095 E F0 .596 +(function is speci\214ed, it is called each time a b)3.095 F(uf)-.2 E .596 +(fer is read into the memory pool from the)-.25 F .125(backing \214le.)108 600 +R .125(If the)5.125 F F3(pgout)2.625 E F0 .125 +(function is speci\214ed, it is called each time a b)2.625 F(uf)-.2 E .125 +(fer is written into the backing \214le.)-.25 F .276 +(Both functions are are called with the)108 612 R F3(pgcookie)2.777 E F0 +(pointer)2.777 E 2.777(,t)-.4 G .277 +(he page number and a pointer to the page to being)337.27 612 R +(read or written.)108 624 Q .124(The function)108 640.8 R F3(mpool_ne)2.624 E +(w)-.15 E F0(tak)2.624 E .123(es an MPOOL pointer and an address as ar)-.1 F +2.623(guments. If)-.18 F 2.623(an)2.623 G .623 -.25(ew p)457.568 640.8 T .123 +(age can be allo-).25 F .944(cated, a pointer to the page is returned and the \ +page number is stored into the)108 652.8 R F3(pgnoaddr)3.445 E F0 3.445 +(address. Other)3.445 F(-)-.2 E(wise, NULL is returned and errno is set.)108 +664.8 Q 1.167(The function)108 681.6 R F3(mpool_g)3.667 E(et)-.1 E F0(tak)3.667 +E 1.167(es a MPOOL pointer and a page number as ar)-.1 F 3.666(guments. If)-.18 +F 1.166(the page e)3.666 F 1.166(xists, a)-.15 F .686 +(pointer to the page is returned.)108 693.6 R .687 +(Otherwise, NULL is returned and errno is set.)5.686 F .687 +(The \215ags parameter is not)5.687 F(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G +(istrib)132.57 732 Q 104.595(ution June)-.2 F(4, 1993)2.5 E(1)535 732 Q EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 129.01(MPOOL\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 129.01(anual MPOOL\(3\))340.17 48 R(currently used.)108 84 Q 1.463 +(The function)108 100.8 R/F1 10/Times-Italic@0 SF(mpool_put)3.963 E F0 1.462 +(unpins the page referenced by)3.962 F F1(pgaddr)3.962 E F0(.).73 E F1(Pgaddr) +6.462 E F0 1.462(must be an address pre)3.962 F(viously)-.25 E(returned by)108 +112.8 Q F1(mpool_g)2.5 E(et)-.1 E F0(or)2.5 E F1(mpool_ne)2.5 E(w)-.15 E F0 5 +(.T).31 G(he \215ag v)271.65 112.8 Q(alue is speci\214ed by)-.25 E F1(or)2.5 E +F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)434.74 112.8 S(he follo)443.35 112.8 Q +(wing v)-.25 E(alues:)-.25 E(MPOOL_DIR)108 129.6 Q(TY)-.6 E +(The page has been modi\214ed and needs to be written to the backing \214le.) +144 141.6 Q F1(Mpool_put)108 158.4 Q F0 +(returns 0 on success and -1 if an error occurs.)2.5 E .247(The function)108 +175.2 R F1(mpool_sync)2.747 E F0 .247(writes all modi\214ed pages associated w\ +ith the MPOOL pointer to the backing \214le.)2.747 F F1(Mpool_sync)108 187.2 Q +F0(returns 0 on success and -1 if an error occurs.)2.5 E(The)108 204 Q F1 +(mpool_close)2.698 E F0 .198(function free')2.698 F 2.698(su)-.55 G 2.698(pa) +245.432 204 S .498 -.15(ny a)257.57 204 T .198 +(llocated memory associated with the memory pool cookie.).15 F(Modi-)5.197 E +(\214ed pages are)108 216 Q/F2 10/Times-Bold@0 SF(not)2.5 E F0 +(written to the backing \214le.)2.5 E F1(Mpool_close)5 E F0 +(returns 0 on success and -1 if an error occurs.)2.5 E/F3 9/Times-Bold@0 SF +(ERR)72 232.8 Q(ORS)-.27 E F0(The)108 244.8 Q F1(mpool_open)2.938 E F0 .438 +(function may f)2.938 F .438(ail and set)-.1 F F1(errno)2.938 E F0 .438(for an) +2.938 F 2.938(yo)-.15 G 2.938(ft)344.87 244.8 S .439 +(he errors speci\214ed for the library routine)353.918 244.8 R F1(mal-)2.939 E +(loc)108 256.8 Q F0(\(3\).).31 E(The)108 273.6 Q F1(mpool_g)2.5 E(et)-.1 E F0 +(function may f)2.5 E(ail and set)-.1 E F1(errno)2.5 E F0(for the follo)2.5 E +(wing:)-.25 E([EINV)108 290.4 Q 29.98(AL] The)-1.35 F(requested record doesn') +2.5 E 2.5(te)-.18 G(xist.)305.96 290.4 Q(The)108 307.2 Q F1(mpool_ne)4.073 E(w) +-.15 E F0(and)4.073 E F1(mpool_g)4.073 E(et)-.1 E F0 1.573(functions may f) +4.073 F 1.573(ail and set)-.1 F F1(errno)4.073 E F0 1.573(for an)4.073 F 4.073 +(yo)-.15 G 4.073(ft)421.336 307.2 S 1.573(he errors speci\214ed for the)431.519 +307.2 R(library routines)108 319.2 Q F1 -.37(re)2.5 G(ad).37 E F0(\(2\)).77 E +F1 2.5(,w).54 G(rite)214.48 319.2 Q F0(\(2\)).18 E F1(,).54 E F0(and)2.5 E F1 +(malloc)2.5 E F0(\(3\).).31 E(The)108 336 Q F1(mpool_sync)4.287 E F0 1.787 +(function may f)4.287 F 1.787(ail and set)-.1 F F1(errno)4.288 E F0 1.788 +(for an)4.288 F 4.288(yo)-.15 G 4.288(ft)356.694 336 S 1.788 +(he errors speci\214ed for the library routine)367.092 336 R F1(write)108 348 Q +F0(\(2\).).18 E(The)108 364.8 Q F1(mpool_close)4.125 E F0 1.624(function may f) +4.125 F 1.624(ail and set)-.1 F F1(errno)4.124 E F0 1.624(for an)4.124 F 4.124 +(yo)-.15 G 4.124(ft)357.842 364.8 S 1.624 +(he errors speci\214ed for the library routine)368.076 364.8 R F1(fr)108 376.8 +Q(ee)-.37 E F0(\(3\).).18 E F3(SEE ALSO)72 393.6 Q F1(dbopen)108 405.6 Q F0 +(\(3\),).24 E F1(btr)2.5 E(ee)-.37 E F0(\(3\),).18 E F1(hash)2.5 E F0(\(3\),) +.28 E F1 -.37(re)2.5 G(cno).37 E F0(\(3\)).18 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5 +(yD)-.15 G(istrib)132.57 732 Q 104.595(ution June)-.2 F(4, 1993)2.5 E(2)535 732 +Q EP +%%Trailer +end +%%EOF diff --git a/src/util/db2/docs/recno.3.ps b/src/util/db2/docs/recno.3.ps new file mode 100644 index 0000000000..8ffccfca90 --- /dev/null +++ b/src/util/db2/docs/recno.3.ps @@ -0,0 +1,341 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.08 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.08 0 +%%Pages: 2 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.08 0 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 def/PL +792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron/scaron/zcaron +/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/space +/exclam/quotedbl/numbersign/dollar/percent/ampersand/quoteright/parenleft +/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four +/five/six/seven/eight/nine/colon/semicolon/less/equal/greater/question/at/A/B/C +/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash +/bracketright/circumflex/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q +/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase +/guillemotleft/guillemotright/bullet/florin/fraction/perthousand/dagger +/daggerdbl/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar +/section/dieresis/copyright/ordfeminine/guilsinglleft/logicalnot/minus +/registered/macron/degree/plusminus/twosuperior/threesuperior/acute/mu +/paragraph/periodcentered/cedilla/onesuperior/ordmasculine/guilsinglright +/onequarter/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde +/Adieresis/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute +/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls +/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute +/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve +/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex +/udieresis/yacute/thorn/ydieresis]def/Times-Italic@0 ENC0/Times-Italic RE +/Times-Bold@0 ENC0/Times-Bold RE/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 130.12(RECNO\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 130.12(anual RECNO\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 84 S +(ME).18 E F0(recno \255 record number database access method)108 96 Q F1 +(SYNOPSIS)72 112.8 Q/F2 10/Times-Bold@0 SF(#include <sys/types.h>)108 124.8 Q +(#include <db)108 136.8 Q(.h>)-.4 E F1(DESCRIPTION)72 153.6 Q F0 1.158 +(The routine)108 165.6 R/F3 10/Times-Italic@0 SF(dbopen)3.658 E F0 1.158 +(is the library interf)3.658 F 1.158(ace to database \214les.)-.1 F 1.157 +(One of the supported \214le formats is record)6.158 F 1.159(number \214les.) +108 177.6 R 1.159(The general description of the database access methods is in) +6.159 F F3(dbopen)3.66 E F0 1.16(\(3\), this manual page).24 F +(describes only the recno speci\214c information.)108 189.6 Q 1.944 +(The record number data structure is either v)108 206.4 R 1.944 +(ariable or \214x)-.25 F 1.944 +(ed-length records stored in a \215at-\214le format,)-.15 F 2.04 +(accessed by the logical record number)108 218.4 R 7.04(.T)-.55 G 2.04(he e) +286.31 218.4 R 2.04(xistence of record number \214v)-.15 F 4.54(ei)-.15 G 2.04 +(mplies the e)442.1 218.4 R 2.04(xistence of)-.15 F .876 +(records one through four)108 230.4 R 3.376(,a)-.4 G .875 +(nd the deletion of record number one causes record number \214v)219.684 230.4 +R 3.375(et)-.15 G 3.375(ob)489.93 230.4 S 3.375(er)503.305 230.4 S(enum-)514.45 +230.4 Q .282(bered to record number four)108 242.4 R 2.782(,a)-.4 G 2.782(sw) +231.19 242.4 S .283(ell as the cursor)245.082 242.4 R 2.783(,i)-.4 G 2.783(fp) +316.633 242.4 S .283(ositioned after record number one, to shift do)327.746 +242.4 R .283(wn one)-.25 F(record.)108 254.4 Q .373 +(The recno access method speci\214c data structure pro)108 271.2 R .373 +(vided to)-.15 F F3(dbopen)2.873 E F0 .373(is de\214ned in the <db)2.873 F .373 +(.h> include \214le as)-.4 F(follo)108 283.2 Q(ws:)-.25 E(typedef struct {)108 +300 Q(u_long \215ags;)144 312 Q(u_int cachesize;)144 324 Q(u_int psize;)144 336 +Q(int lorder;)144 348 Q(size_t reclen;)144 360 Q(u_char b)144 372 Q -.25(va) +-.15 G(l;).25 E(char *bfname;)144 384 Q 2.5(}R)108 396 S(ECNOINFO;)121.97 396 Q +(The elements of this structure are de\214ned as follo)108 412.8 Q(ws:)-.25 E +14.61(\215ags The)108 429.6 R(\215ag v)2.5 E(alue is speci\214ed by)-.25 E F3 +(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)313.2 429.6 S(he follo)321.81 +429.6 Q(wing v)-.25 E(alues:)-.25 E(R_FIXEDLEN)144 446.4 Q .962 +(The records are \214x)180 458.4 R .963(ed-length, not byte delimited.)-.15 F +.963(The structure element)5.963 F F3 -.37(re)3.463 G(clen).37 E F0 +(speci\214es)3.463 E .345(the length of the record, and the structure element) +180 470.4 R F3(bval)2.844 E F0 .344(is used as the pad character)2.844 F 5.344 +(.A)-.55 G -.15(ny)530.15 470.4 S .739 +(records, inserted into the database, that are less than)180 482.4 R F3 -.37 +(re)3.239 G(clen).37 E F0 .74(bytes long are automatically)3.239 F(padded.)180 +494.4 Q(R_NOKEY)144 511.2 Q 2.34(In the interf)180 523.2 R 2.34 +(ace speci\214ed by)-.1 F F3(dbopen)4.84 E F0 4.84(,t).24 G 2.34 +(he sequential record retrie)344.98 523.2 R -.25(va)-.25 G 4.84<6c8c>.25 G 2.34 +(lls in both the)478.25 523.2 R(caller')180 535.2 Q 3.556(sk)-.55 G 1.357 -.15 +(ey a)217.336 535.2 T 1.057(nd data structures.).15 F 1.057 +(If the R_NOKEY \215ag is speci\214ed, the)6.057 F F3(cur)3.557 E(sor)-.1 E F0 +(routines)3.557 E .029(are not required to \214ll in the k)180 547.2 R .329 +-.15(ey s)-.1 H 2.529(tructure. This).15 F .028(permits applications to retrie) +2.529 F .328 -.15(ve r)-.25 H .028(ecords at).15 F +(the end of \214les without reading all of the interv)180 559.2 Q +(ening records.)-.15 E(R_SN)144 576 Q(APSHO)-.35 E(T)-.4 E .964 +(This \215ag requires that a snapshot of the \214le be tak)180 588 R .965 +(en when)-.1 F F3(dbopen)3.465 E F0 .965(is called, instead of)3.465 F +(permitting an)180 600 Q 2.5(yu)-.15 G +(nmodi\214ed records to be read from the original \214le.)245.96 600 Q +(cachesize)108 616.8 Q 3.16(As)144 628.8 S .66 +(uggested maximum size, in bytes, of the memory cache.)158.27 628.8 R .659 +(This v)5.659 F .659(alue is)-.25 F F2(only)3.159 E F0(advisory)3.159 E 3.159 +(,a)-.65 G .659(nd the)514.621 628.8 R .046 +(access method will allocate more memory rather than f)144 640.8 R 2.546 +(ail. If)-.1 F F3(cac)2.546 E(hesize)-.15 E F0 2.546(is 0)2.546 F .046 +(\(no size is speci\214ed\) a)2.546 F(def)144 652.8 Q(ault cache is used.)-.1 E +12.95(psize The)108 669.6 R .715 +(recno access method stores the in-memory copies of its records in a btree.) +3.216 F .715(This v)5.715 F .715(alue is the)-.25 F .805 +(size \(in bytes\) of the pages used for nodes in that tree.)144 681.6 R(If) +5.805 E F3(psize)3.305 E F0 .806(is 0 \(no page size is speci\214ed\) a)3.305 F +1.468 +(page size is chosen based on the underlying \214le system I/O block size.)144 +693.6 R(See)6.467 E F3(btr)3.967 E(ee)-.37 E F0 1.467(\(3\) for more).18 F +(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 96.815 +(ution August)-.2 F(18, 1994)2.5 E(1)535 732 Q EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 130.12(RECNO\(3\) BSD)72 48 R(Programmer')2.5 E 2.5(sM) +-.55 G 130.12(anual RECNO\(3\))340.17 48 R(information.)144 84 Q 9.62 +(lorder The)108 100.8 R 1.596(byte order for inte)4.096 F 1.596 +(gers in the stored database metadata.)-.15 F 1.597 +(The number should represent the)6.597 F .689(order as an inte)144 112.8 R .689 +(ger; for e)-.15 F .689(xample, big endian order w)-.15 F .689 +(ould be the number 4,321.)-.1 F(If)5.689 E/F1 10/Times-Italic@0 SF(lor)3.189 E +(der)-.37 E F0 .688(is 0 \(no)3.189 F +(order is speci\214ed\) the current host order is used.)144 124.8 Q 9.07 +(reclen The)108 141.6 R(length of a \214x)2.5 E(ed-length record.)-.15 E -.15 +(bv)108 158.4 S 16.68(al The)-.1 F .182 +(delimiting byte to be used to mark the end of a record for v)2.682 F .183 +(ariable-length records, and the pad)-.25 F .809(character for \214x)144 170.4 +R .809(ed-length records.)-.15 F .809(If no v)5.809 F .809 +(alue is speci\214ed, ne)-.25 F .809(wlines \(`)-.25 F(`\\n')-.74 E .808 +('\) are used to mark the)-.74 F(end of v)144 182.4 Q +(ariable-length records and \214x)-.25 E +(ed-length records are padded with spaces.)-.15 E 3.51(bfname The)108 199.2 R +.505 +(recno access method stores the in-memory copies of its records in a btree.) +3.005 F .506(If bfname is non-)5.506 F .065(NULL, it speci\214es the name of t\ +he btree \214le, as if speci\214ed as the \214le name for a dbopen of a btree) +144 211.2 R(\214le.)144 223.2 Q .971(The data part of the k)108 240 R -.15(ey) +-.1 G .972(/data pair used by the recno access method is the same as other acc\ +ess methods.).15 F .199(The k)108 252 R .499 -.15(ey i)-.1 H 2.699(sd).15 G(if) +157.507 252 Q 2.699(ferent. The)-.25 F F1(data)2.699 E F0 .199 +(\214eld of the k)2.699 F .499 -.15(ey s)-.1 H .198 +(hould be a pointer to a memory location of type).15 F F1 -.37(re)2.698 G +(cno_t).37 E F0 2.698(,a).68 G(s)536.11 252 Q .505(de\214ned in the <db)108 264 +R .506(.h> include \214le.)-.4 F .506(This type is normally the lar)5.506 F +.506(gest unsigned inte)-.18 F .506(gral type a)-.15 F -.25(va)-.2 G .506 +(ilable to the).25 F 2.5(implementation. The)108 276 R F1(size)2.5 E F0 +(\214eld of the k)2.5 E .3 -.15(ey s)-.1 H(hould be the size of that type.).15 +E .706(Because there can be no meta-data associated with the underlying recno \ +access method \214les, an)108 292.8 R 3.206(yc)-.15 G(hanges)512.23 292.8 Q +1.262(made to the def)108 304.8 R 1.262(ault v)-.1 F 1.262(alues \(e.g. \214x) +-.25 F 1.263(ed record length or byte separator v)-.15 F 1.263 +(alue\) must be e)-.25 F 1.263(xplicitly speci\214ed)-.15 F +(each time the \214le is opened.)108 316.8 Q .065(In the interf)108 333.6 R +.065(ace speci\214ed by)-.1 F F1(dbopen)2.564 E F0 2.564(,u).24 G .064 +(sing the)261.548 333.6 R F1(put)2.564 E F0(interf)2.564 E .064 +(ace to create a ne)-.1 F 2.564(wr)-.25 G .064 +(ecord will cause the creation of)414.44 333.6 R .755(multiple, empty records \ +if the record number is more than one greater than the lar)108 345.6 R .755 +(gest record currently in)-.18 F(the database.)108 357.6 Q/F2 9/Times-Bold@0 SF +(ERR)72 374.4 Q(ORS)-.27 E F0(The)108 386.4 Q F1 -.37(re)2.922 G(cno).37 E F0 +.421(access method routines may f)2.921 F .421(ail and set)-.1 F F1(errno)2.921 +E F0 .421(for an)2.921 F 2.921(yo)-.15 G 2.921(ft)377.933 386.4 S .421 +(he errors speci\214ed for the library rou-)386.964 386.4 R(tine)108 398.4 Q F1 +(dbopen)2.5 E F0(\(3\) or the follo).24 E(wing:)-.25 E([EINV)108 415.2 Q(AL]) +-1.35 E(An attempt w)144 427.2 Q(as made to add a record to a \214x)-.1 E +(ed-length database that w)-.15 E(as too lar)-.1 E(ge to \214t.)-.18 E F2 +(SEE ALSO)72 444 Q F1(btr)108 456 Q(ee)-.37 E F0(\(3\)).18 E F1(dbopen)2.5 E F0 +(\(3\),).24 E F1(hash)2.5 E F0(\(3\),).28 E F1(mpool)2.5 E F0(\(3\),).51 E F1 +2.754(Document Pr)108 480 R 2.754(ocessing in a Relational Database System)-.45 +F F0 5.255(,M).32 G 2.755(ichael Stonebrak)362.13 480 R(er)-.1 E 5.255(,H)-.4 G +2.755(eidi Stettner)454.06 480 R 5.255(,J)-.4 G(oseph)516.67 480 Q +(Kalash, Antonin Guttman, Nadene L)108 492 Q +(ynn, Memorandum No. UCB/ERL M82/32, May 1982.)-.55 E F2 -.09(BU)72 508.8 S(GS) +.09 E F0(Only big and little endian byte order is supported.)108 520.8 Q +(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 96.815 +(ution August)-.2 F(18, 1994)2.5 E(2)535 732 Q EP +%%Trailer +end +%%EOF diff --git a/src/util/db2/hash/Makefile.inc b/src/util/db2/hash/Makefile.inc new file mode 100644 index 0000000000..87746f721e --- /dev/null +++ b/src/util/db2/hash/Makefile.inc @@ -0,0 +1,6 @@ +# @(#)Makefile.inc 8.2 (Berkeley) 11/7/95 + +.PATH: ${.CURDIR}/db/hash + +SRCS+= hash.c hash_bigkey.c hash_buf.c hash_func.c hash_log2.c \ + hash_page.c hsearch.c dbm.c diff --git a/src/util/db2/hash/dbm.c b/src/util/db2/hash/dbm.c new file mode 100644 index 0000000000..a5a31f45bc --- /dev/null +++ b/src/util/db2/hash/dbm.c @@ -0,0 +1,297 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)dbm.c 8.6 (Berkeley) 11/7/95"; +#endif /* LIBC_SCCS and not lint */ + +#include "db-int.h" + +#include <sys/param.h> + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> + +#include "db-ndbm.h" +#include "hash.h" + +/* + * + * This package provides dbm and ndbm compatible interfaces to DB. + * First are the DBM routines, which call the NDBM routines, and + * the NDBM routines, which call the DB routines. + */ +static DBM *__cur_db; + +static void no_open_db __P((void)); + +int +dbminit(file) + char *file; +{ + if (__cur_db != NULL) + (void)dbm_close(__cur_db); + if ((__cur_db = dbm_open(file, O_RDWR, 0)) != NULL) + return (0); + if ((__cur_db = dbm_open(file, O_RDONLY, 0)) != NULL) + return (0); + return (-1); +} + +datum +fetch(key) + datum key; +{ + datum item; + + if (__cur_db == NULL) { + no_open_db(); + item.dptr = 0; + return (item); + } + return (dbm_fetch(__cur_db, key)); +} + +datum +firstkey() +{ + datum item; + + if (__cur_db == NULL) { + no_open_db(); + item.dptr = 0; + return (item); + } + return (dbm_firstkey(__cur_db)); +} + +datum +nextkey(key) + datum key; +{ + datum item; + + if (__cur_db == NULL) { + no_open_db(); + item.dptr = 0; + return (item); + } + return (dbm_nextkey(__cur_db)); +} + +int +delete(key) + datum key; +{ + if (__cur_db == NULL) { + no_open_db(); + return (-1); + } + return (dbm_delete(__cur_db, key)); +} + +int +store(key, dat) + datum key, dat; +{ + if (__cur_db == NULL) { + no_open_db(); + return (-1); + } + return (dbm_store(__cur_db, key, dat, DBM_REPLACE)); +} + +static void +no_open_db() +{ + (void)fprintf(stderr, "dbm: no open database.\n"); +} + +/* + * Returns: + * *DBM on success + * NULL on failure + */ +DBM * +dbm_open(file, flags, mode) + const char *file; + int flags, mode; +{ + HASHINFO info; + char path[MAXPATHLEN]; + + info.bsize = 4096; + info.ffactor = 40; + info.nelem = 1; + info.cachesize = 0; + info.hash = NULL; + info.lorder = 0; + (void)strcpy(path, file); + (void)strcat(path, DBM_SUFFIX); + return ((DBM *)__hash_open(path, flags, mode, &info, 0)); +} + +/* + * Returns: + * Nothing. + */ +void +dbm_close(db) + DBM *db; +{ + (void)(db->close)(db); +} + +/* + * Returns: + * DATUM on success + * NULL on failure + */ +datum +dbm_fetch(db, key) + DBM *db; + datum key; +{ + datum retval; + int status; + + status = (db->get)(db, (DBT *)&key, (DBT *)&retval, 0); + if (status) { + retval.dptr = NULL; + retval.dsize = 0; + } + return (retval); +} + +/* + * Returns: + * DATUM on success + * NULL on failure + */ +datum +dbm_firstkey(db) + DBM *db; +{ + int status; + datum retdata, retkey; + + status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_FIRST); + if (status) + retkey.dptr = NULL; + return (retkey); +} + +/* + * Returns: + * DATUM on success + * NULL on failure + */ +datum +dbm_nextkey(db) + DBM *db; +{ + int status; + datum retdata, retkey; + + status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_NEXT); + if (status) + retkey.dptr = NULL; + return (retkey); +} + +/* + * Returns: + * 0 on success + * <0 failure + */ +int +dbm_delete(db, key) + DBM *db; + datum key; +{ + int status; + + status = (db->del)(db, (DBT *)&key, 0); + if (status) + return (-1); + else + return (0); +} + +/* + * Returns: + * 0 on success + * <0 failure + * 1 if DBM_INSERT and entry exists + */ +int +dbm_store(db, key, content, flags) + DBM *db; + datum key, content; + int flags; +{ + return ((db->put)(db, (DBT *)&key, (DBT *)&content, + (flags == DBM_INSERT) ? R_NOOVERWRITE : 0)); +} + +int +dbm_error(db) + DBM *db; +{ + HTAB *hp; + + hp = (HTAB *)db->internal; + return (hp->errno); +} + +int +dbm_clearerr(db) + DBM *db; +{ + HTAB *hp; + + hp = (HTAB *)db->internal; + hp->errno = 0; + return (0); +} + +int +dbm_dirfno(db) + DBM *db; +{ + return(((HTAB *)db->internal)->fp); +} diff --git a/src/util/db2/hash/extern.h b/src/util/db2/hash/extern.h new file mode 100644 index 0000000000..dd8a34e8ef --- /dev/null +++ b/src/util/db2/hash/extern.h @@ -0,0 +1,76 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.8 (Berkeley) 11/7/95 + */ + +PAGE16 *__add_bigpage __P((HTAB *, PAGE16 *, indx_t, const u_int8_t)); +PAGE16 *__add_ovflpage __P((HTAB *, PAGE16 *)); +int32_t __addel __P((HTAB *, ITEM_INFO *, + const DBT *, const DBT *, u_int32_t, const u_int8_t)); +u_int32_t __alloc_tmp __P((HTAB*)); +int32_t __big_delete __P((HTAB *, PAGE16 *, indx_t)); +int32_t __big_insert __P((HTAB *, PAGE16 *, const DBT *, const DBT *)); +int32_t __big_keydata __P((HTAB *, PAGE16 *, DBT *, DBT *, int32_t)); +int32_t __big_return __P((HTAB *, ITEM_INFO *, DBT *, int32_t)); +u_int32_t __call_hash __P((HTAB *, int8_t *, int32_t)); +CURSOR *__cursor_creat __P((const DB *)); +int32_t __delete_page __P((HTAB *, PAGE16 *, int32_t)); +int32_t __delpair __P((HTAB *, CURSOR *, ITEM_INFO *)); +int32_t __expand_table __P((HTAB *)); +int32_t __find_bigpair __P((HTAB *, CURSOR *, int8_t *, int32_t)); +void __free_ovflpage __P((HTAB *, PAGE16 *)); +int32_t __get_bigkey __P((HTAB *, PAGE16 *, indx_t, DBT *)); +PAGE16 *__get_buf __P((HTAB *, u_int32_t, int32_t)); +u_int32_t __get_item __P((HTAB *, CURSOR *, DBT *, DBT *, ITEM_INFO *)); +u_int32_t __get_item_done __P((HTAB *, CURSOR *)); +u_int32_t __get_item_first __P((HTAB *, CURSOR *, DBT *, DBT *, ITEM_INFO *)); +u_int32_t __get_item_next __P((HTAB *, CURSOR *, DBT *, DBT *, ITEM_INFO *)); +u_int32_t __get_item_reset __P((HTAB *, CURSOR *)); +PAGE16 *__get_page __P((HTAB *, u_int32_t, int32_t)); +int32_t __ibitmap __P((HTAB *, int32_t, int32_t, int32_t)); +u_int32_t __log2 __P((u_int32_t)); +int32_t __new_page __P((HTAB *, u_int32_t, int32_t)); +void __pgin_routine __P((void *, db_pgno_t, void *)); +void __pgout_routine __P((void *, db_pgno_t, void *)); +u_int32_t __put_buf __P((HTAB *, PAGE16 *, u_int32_t)); +int32_t __put_page __P((HTAB *, PAGE16 *, int32_t, int32_t)); +void __reclaim_tmp __P((HTAB *)); +int32_t __split_page __P((HTAB *, u_int32_t, u_int32_t)); + +/* Default hash routine. */ +extern u_int32_t (*__default_hash) __P((const void *, size_t)); + +#ifdef HASH_STATISTICS +extern long hash_accesses, hash_bigpages, hash_collisions, hash_expansions; +extern long hash_overflow; +#endif diff --git a/src/util/db2/hash/hash.c b/src/util/db2/hash/hash.c new file mode 100644 index 0000000000..017138606a --- /dev/null +++ b/src/util/db2/hash/hash.c @@ -0,0 +1,1068 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash.c 8.12 (Berkeley) 11/7/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef DEBUG +#include <assert.h> +#endif + +#include "db-int.h" +#include "hash.h" +#include "page.h" +#include "extern.h" + +static int32_t flush_meta __P((HTAB *)); +static int32_t hash_access __P((HTAB *, ACTION, DBT *, DBT *)); +static int32_t hash_close __P((DB *)); +static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t)); +static int32_t hash_fd __P((const DB *)); +static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t)); +static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t)); +static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t)); +static int32_t hash_sync __P((const DB *, u_int32_t)); +static int32_t hdestroy __P((HTAB *)); +static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \ + u_int32_t)); +static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t)); +static HTAB *init_hash __P((HTAB *, const char *, HASHINFO *)); +static int32_t init_htab __P((HTAB *, int32_t)); +#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN +static void swap_header __P((HTAB *)); +static void swap_header_copy __P((HASHHDR *, HASHHDR *)); +#endif +static u_int32_t hget_header __P((HTAB *, u_int32_t)); +static void hput_header __P((HTAB *)); + +#define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; } + +/* Return values */ +#define SUCCESS (0) +#define ERROR (-1) +#define ABNORMAL (1) + +#ifdef HASH_STATISTICS +u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows, + hash_bigpages; +#endif + +/************************** INTERFACE ROUTINES ***************************/ +/* OPEN/CLOSE */ + +extern DB * +__hash_open(file, flags, mode, info, dflags) + const char *file; + int32_t flags, mode, dflags; + const HASHINFO *info; /* Special directives for create */ +{ + struct stat statbuf; + DB *dbp; + DBT mpool_key; + HTAB *hashp; + int32_t bpages, csize, new_table, save_errno, specified_file; + + if ((flags & O_ACCMODE) == O_WRONLY) { + errno = EINVAL; + return (NULL); + } + if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB)))) + return (NULL); + hashp->fp = -1; + + /* set this now, before file goes away... */ + specified_file = (file != NULL); + if (!file) { + file = tmpnam(NULL); + /* store the file name so that we can unlink it later */ + hashp->fname = (char *)file; +#ifdef DEBUG + fprintf(stderr, "Using file name %s.\n", file); +#endif + } + /* + * Even if user wants write only, we need to be able to read + * the actual file, so we need to open it read/write. But, the + * field in the hashp structure needs to be accurate so that + * we can check accesses. + */ + hashp->flags = flags; + hashp->save_file = specified_file && (hashp->flags & O_RDWR); + + new_table = 0; + if (!file || (flags & O_TRUNC) || + (stat(file, &statbuf) && (errno == ENOENT))) { + if (errno == ENOENT) + errno = 0; /* In case someone looks at errno. */ + new_table = 1; + } + if (file) { + if ((hashp->fp = open(file, flags, mode)) == -1) + RETURN_ERROR(errno, error0); + (void)fcntl(hashp->fp, F_SETFD, 1); + } + + /* Process arguments to set up hash table header. */ + if (new_table) { + if (!(hashp = init_hash(hashp, file, (HASHINFO *)info))) + RETURN_ERROR(errno, error1); + } else { + /* Table already exists */ + if (info && info->hash) + hashp->hash = info->hash; + else + hashp->hash = __default_hash; + + /* copy metadata from page into header */ + if (hget_header(hashp, + (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) != + sizeof(HASHHDR)) + RETURN_ERROR(EFTYPE, error1); + + /* Verify file type, versions and hash function */ + if (hashp->hdr.magic != HASHMAGIC) + RETURN_ERROR(EFTYPE, error1); +#define OLDHASHVERSION 1 + if (hashp->hdr.version != HASHVERSION && + hashp->hdr.version != OLDHASHVERSION) + RETURN_ERROR(EFTYPE, error1); + if (hashp->hash(CHARKEY, sizeof(CHARKEY)) + != hashp->hdr.h_charkey) + RETURN_ERROR(EFTYPE, error1); + /* + * Figure out how many segments we need. Max_Bucket is the + * maximum bucket number, so the number of buckets is + * max_bucket + 1. + */ + + /* Read in bitmaps */ + bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] + + (hashp->hdr.bsize << BYTE_SHIFT) - 1) >> + (hashp->hdr.bshift + BYTE_SHIFT); + + hashp->nmaps = bpages; + (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *)); + } + + /* start up mpool */ + mpool_key.data = (u_int8_t *)file; + mpool_key.size = strlen(file); + + if (info && info->cachesize) + csize = info->cachesize / hashp->hdr.bsize; + else + csize = DEF_CACHESIZE / hashp->hdr.bsize; + hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize); + + if (!hashp->mp) + RETURN_ERROR(errno, error1); + mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp); + + /* + * For a new table, set up the bitmaps. + */ + if (new_table && + init_htab(hashp, info && info->nelem ? info->nelem : 1)) + goto error2; + + /* initialize the cursor queue */ + TAILQ_INIT(&hashp->curs_queue); + hashp->seq_cursor = NULL; + + + /* get a chunk of memory for our split buffer */ + hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize); + if (!hashp->split_buf) + goto error2; + + hashp->new_file = new_table; + + if (!(dbp = (DB *)malloc(sizeof(DB)))) + goto error2; + + dbp->internal = hashp; + dbp->close = hash_close; + dbp->del = hash_delete; + dbp->fd = hash_fd; + dbp->get = hash_get; + dbp->put = hash_put; + dbp->seq = hash_seq; + dbp->sync = hash_sync; + dbp->type = DB_HASH; + +#ifdef DEBUG + (void)fprintf(stderr, + "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", + "init_htab:", + "TABLE POINTER ", (void *)hashp, + "BUCKET SIZE ", hashp->hdr.bsize, + "BUCKET SHIFT ", hashp->hdr.bshift, + "FILL FACTOR ", hashp->hdr.ffactor, + "MAX BUCKET ", hashp->hdr.max_bucket, + "OVFL POINT ", hashp->hdr.ovfl_point, + "LAST FREED ", hashp->hdr.last_freed, + "HIGH MASK ", hashp->hdr.high_mask, + "LOW MASK ", hashp->hdr.low_mask, + "NKEYS ", hashp->hdr.nkeys); +#endif +#ifdef HASH_STATISTICS + hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0; + hash_bigpages = 0; +#endif + return (dbp); + +error2: + save_errno = errno; + hdestroy(hashp); + errno = save_errno; + return (NULL); + +error1: + if (hashp != NULL) + (void)close(hashp->fp); + +error0: + free(hashp); + errno = save_errno; + return (NULL); +} + +static int32_t +hash_close(dbp) + DB *dbp; +{ + HTAB *hashp; + int32_t retval; + + if (!dbp) + return (ERROR); + + hashp = (HTAB *)dbp->internal; + retval = hdestroy(hashp); + free(dbp); + return (retval); +} + +static int32_t +hash_fd(dbp) + const DB *dbp; +{ + HTAB *hashp; + + if (!dbp) + return (ERROR); + + hashp = (HTAB *)dbp->internal; + if (hashp->fp == -1) { + errno = ENOENT; + return (-1); + } + return (hashp->fp); +} + +/************************** LOCAL CREATION ROUTINES **********************/ +static HTAB * +init_hash(hashp, file, info) + HTAB *hashp; + const char *file; + HASHINFO *info; +{ + struct stat statbuf; + int32_t nelem; + + nelem = 1; + hashp->hdr.nkeys = 0; + hashp->hdr.lorder = DB_BYTE_ORDER; + hashp->hdr.bsize = DEF_BUCKET_SIZE; + hashp->hdr.bshift = DEF_BUCKET_SHIFT; + hashp->hdr.ffactor = DEF_FFACTOR; + hashp->hash = __default_hash; + memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares)); + memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps)); + + /* Fix bucket size to be optimal for file system */ + if (file != NULL) { + if (stat(file, &statbuf)) + return (NULL); + hashp->hdr.bsize = statbuf.st_blksize; + hashp->hdr.bshift = __log2(hashp->hdr.bsize); + } + if (info) { + if (info->bsize) { + /* Round pagesize up to power of 2 */ + hashp->hdr.bshift = __log2(info->bsize); + hashp->hdr.bsize = 1 << hashp->hdr.bshift; + if (hashp->hdr.bsize > MAX_BSIZE) { + errno = EINVAL; + return (NULL); + } + } + if (info->ffactor) + hashp->hdr.ffactor = info->ffactor; + if (info->hash) + hashp->hash = info->hash; + if (info->lorder) { + if ((info->lorder != DB_BIG_ENDIAN) && + (info->lorder != DB_LITTLE_ENDIAN)) { + errno = EINVAL; + return (NULL); + } + hashp->hdr.lorder = info->lorder; + } + } + return (hashp); +} + +/* + * Returns 0 on No Error + */ +static int32_t +init_htab(hashp, nelem) + HTAB *hashp; + int32_t nelem; +{ + int32_t l2, nbuckets; + db_pgno_t i; + + /* + * Divide number of elements by the fill factor and determine a + * desired number of buckets. Allocate space for the next greater + * power of two number of buckets. + */ + nelem = (nelem - 1) / hashp->hdr.ffactor + 1; + + l2 = __log2(MAX(nelem, 2)); + nbuckets = 1 << l2; + + hashp->hdr.spares[l2] = l2 + 1; + hashp->hdr.spares[l2 + 1] = l2 + 1; + hashp->hdr.ovfl_point = l2; + hashp->hdr.last_freed = 2; + + hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1; + hashp->hdr.high_mask = (nbuckets << 1) - 1; + + /* + * The number of header pages is the size of the header divided by + * the amount of freespace on header pages (the page size - the + * size of 1 integer where the length of the header info on that + * page is stored) plus another page if it didn't divide evenly. + */ + hashp->hdr.hdrpages = + (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) + + (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0) + ? 0 : 1); + + /* Create pages for these buckets */ + /* + for (i = 0; i <= hashp->hdr.max_bucket; i++) { + if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0) + return (-1); + } + */ + + /* First bitmap page is at: splitpoint l2 page offset 1 */ + if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0)) + return (-1); + + return (0); +} + +/* + * Functions to get/put hash header. We access the file directly. + */ +u_int32_t +hget_header(hashp, page_size) + HTAB *hashp; + u_int32_t page_size; +{ + u_int32_t num_copied, i; + u_int8_t *hdr_dest; + + num_copied = 0; + i = 0; + + hdr_dest = (u_int8_t *)&hashp->hdr; + + /* + * XXX + * This should not be printing to stderr on a "normal" error case. + */ + lseek(hashp->fp, 0, SEEK_SET); + num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR)); + if (num_copied != sizeof(HASHHDR)) { + fprintf(stderr, "hash: could not retrieve header"); + return (0); + } +#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN + swap_header(hashp); +#endif + return (num_copied); +} + +void +hput_header(hashp) + HTAB *hashp; +{ + HASHHDR *whdrp; +#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN + HASHHDR whdr; +#endif + u_int32_t num_copied, i; + + num_copied = i = 0; + + whdrp = &hashp->hdr; +#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN + whdrp = &whdr; + swap_header_copy(&hashp->hdr, whdrp); +#endif + + lseek(hashp->fp, 0, SEEK_SET); + num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR)); + if (num_copied != sizeof(HASHHDR)) + (void)fprintf(stderr, "hash: could not write hash header"); + return; +} + +/********************** DESTROY/CLOSE ROUTINES ************************/ + +/* + * Flushes any changes to the file if necessary and destroys the hashp + * structure, freeing all allocated space. + */ +static int32_t +hdestroy(hashp) + HTAB *hashp; +{ + int32_t save_errno; + + save_errno = 0; + +#ifdef HASH_STATISTICS + { int i; + (void)fprintf(stderr, "hdestroy: accesses %ld collisions %ld\n", + hash_accesses, hash_collisions); + (void)fprintf(stderr, + "hdestroy: expansions %ld\n", hash_expansions); + (void)fprintf(stderr, + "hdestroy: overflows %ld\n", hash_overflows); + (void)fprintf(stderr, + "hdestroy: big key/data pages %ld\n", hash_bigpages); + (void)fprintf(stderr, + "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket); + + for (i = 0; i < NCACHED; i++) + (void)fprintf(stderr, + "spares[%d] = %d\n", i, hashp->hdr.spares[i]); + } +#endif + + if (flush_meta(hashp) && !save_errno) + save_errno = errno; + + /* Free the split page */ + if (hashp->split_buf) + free(hashp->split_buf); + + /* Free the big key and big data returns */ + if (hashp->bigkey_buf) + free(hashp->bigkey_buf); + if (hashp->bigdata_buf) + free(hashp->bigdata_buf); + + /* XXX This should really iterate over the cursor queue, but + it's not clear how to do that, and the only cursor a hash + table ever creates is the one used by hash_seq(). Passing + NULL as the first arg is also a kludge, but I know that + it's never used, so I do it. The intent is to plug the + memory leak. Correctness can come later. */ + + if (hashp->seq_cursor) + hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0); + + /* shut down mpool */ + mpool_sync(hashp->mp); + mpool_close(hashp->mp); + + if (hashp->fp != -1) + (void)close(hashp->fp); + + /* + * *** This may cause problems if hashp->fname is set in any case + * other than the case that we are generating a temporary file name. + * Note that the new version of mpool should support temporary + * files within mpool itself. + */ + if (hashp->fname && !hashp->save_file) { +#ifdef DEBUG + fprintf(stderr, "Unlinking file %s.\n", hashp->fname); +#endif + /* we need to chmod the file to allow it to be deleted... */ + chmod(hashp->fname, 0700); + unlink(hashp->fname); + /* destroy the temporary name */ + tmpnam(NULL); + } + free(hashp); + + if (save_errno) { + errno = save_errno; + return (ERROR); + } + return (SUCCESS); +} + +/* + * Write modified pages to disk + * + * Returns: + * 0 == OK + * -1 ERROR + */ +static int32_t +hash_sync(dbp, flags) + const DB *dbp; + u_int32_t flags; +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + + /* + * XXX + * Check success/failure conditions. + */ + return (flush_meta(hashp) || mpool_sync(hashp->mp)); +} + +/* + * Returns: + * 0 == OK + * -1 indicates that errno should be set + */ +static int32_t +flush_meta(hashp) + HTAB *hashp; +{ + int32_t i; + + if (!hashp->save_file) + return (0); + hashp->hdr.magic = HASHMAGIC; + hashp->hdr.version = HASHVERSION; + hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY)); + + /* write out metadata */ + hput_header(hashp); + + for (i = 0; i < NCACHED; i++) + if (hashp->mapp[i]) { + if (__put_page(hashp, + (PAGE16 *)hashp->mapp[i], A_BITMAP, 1)) + return (-1); + hashp->mapp[i] = NULL; + } + return (0); +} + +/*******************************SEARCH ROUTINES *****************************/ +/* + * All the access routines return + * + * Returns: + * 0 on SUCCESS + * 1 to indicate an external ERROR (i.e. key not found, etc) + * -1 to indicate an internal ERROR (i.e. out of memory, etc) + */ + +/* *** make sure this is true! */ + +static int32_t +hash_get(dbp, key, data, flag) + const DB *dbp; + const DBT *key; + DBT *data; + u_int32_t flag; +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + if (flag) { + hashp->errno = errno = EINVAL; + return (ERROR); + } + return (hash_access(hashp, HASH_GET, (DBT *)key, data)); +} + +static int32_t +hash_put(dbp, key, data, flag) + const DB *dbp; + DBT *key; + const DBT *data; + u_int32_t flag; +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + if (flag && flag != R_NOOVERWRITE) { + hashp->errno = errno = EINVAL; + return (ERROR); + } + if ((hashp->flags & O_ACCMODE) == O_RDONLY) { + hashp->errno = errno = EPERM; + return (ERROR); + } + return (hash_access(hashp, flag == R_NOOVERWRITE ? + HASH_PUTNEW : HASH_PUT, (DBT *)key, (DBT *)data)); +} + +static int32_t +hash_delete(dbp, key, flag) + const DB *dbp; + const DBT *key; + u_int32_t flag; /* Ignored */ +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + if (flag) { + hashp->errno = errno = EINVAL; + return (ERROR); + } + if ((hashp->flags & O_ACCMODE) == O_RDONLY) { + hashp->errno = errno = EPERM; + return (ERROR); + } + + return (hash_access(hashp, HASH_DELETE, (DBT *)key, NULL)); +} + +/* + * Assume that hashp has been set in wrapper routine. + */ +static int32_t +hash_access(hashp, action, key, val) + HTAB *hashp; + ACTION action; + DBT *key, *val; +{ + DBT page_key, page_val; + CURSOR cursor; + ITEM_INFO item_info; + u_int32_t bucket; + u_int32_t num_items; + +#ifdef HASH_STATISTICS + hash_accesses++; +#endif + + num_items = 0; + + /* + * Set up item_info so that we're looking for space to add an item + * as we cycle through the pages looking for the key. + */ + if (action == HASH_PUT || action == HASH_PUTNEW) { + if (ISBIG(key->size + val->size, hashp)) + item_info.seek_size = PAIR_OVERHEAD; + else + item_info.seek_size = key->size + val->size; + } else + item_info.seek_size = 0; + item_info.seek_found_page = 0; + + bucket = __call_hash(hashp, (int8_t *)key->data, key->size); + + cursor.pagep = NULL; + __get_item_reset(hashp, &cursor); + + cursor.bucket = bucket; + while (1) { + __get_item_next(hashp, &cursor, &page_key, &page_val, &item_info); + if (item_info.status == ITEM_ERROR) + return (ABNORMAL); + if (item_info.status == ITEM_NO_MORE) + break; + num_items++; + if (item_info.key_off == BIGPAIR) { + /* + * !!! + * 0 is a valid index. + */ + if (__find_bigpair(hashp, &cursor, (int8_t *)key->data, + key->size) > 0) + goto found; + } else if (key->size == page_key.size && + !memcmp(key->data, page_key.data, key->size)) + goto found; + } +#ifdef HASH_STATISTICS + hash_collisions++; +#endif + __get_item_done(hashp, &cursor); + + /* + * At this point, item_info will list either the last page in + * the chain, or the last page in the chain plus a pgno for where + * to find the first page in the chain with space for the + * item we wish to add. + */ + + /* Not found */ + switch (action) { + case HASH_PUT: + case HASH_PUTNEW: + if (__addel(hashp, &item_info, key, val, num_items, 0)) + return (ERROR); + break; + case HASH_GET: + case HASH_DELETE: + default: + return (ABNORMAL); + } + + if (item_info.caused_expand) + __expand_table(hashp); + return (SUCCESS); + +found: __get_item_done(hashp, &cursor); + + switch (action) { + case HASH_PUTNEW: + /* mpool_put(hashp->mp, pagep, 0); */ + return (ABNORMAL); + case HASH_GET: + if (item_info.key_off == BIGPAIR) { + if (__big_return(hashp, &item_info, val, 0)) + return (ERROR); + } else { + val->data = page_val.data; + val->size = page_val.size; + } + /* *** data may not be available! */ + break; + case HASH_PUT: + if (__delpair(hashp, &cursor, &item_info) || + __addel(hashp, &item_info, key, val, UNKNOWN, 0)) + return (ERROR); + __get_item_done(hashp, &cursor); + if (item_info.caused_expand) + __expand_table(hashp); + break; + case HASH_DELETE: + if (__delpair(hashp, &cursor, &item_info)) + return (ERROR); + break; + default: + abort(); + } + return (SUCCESS); +} + +/* ****************** CURSORS ********************************** */ +CURSOR * +__cursor_creat(dbp) + const DB *dbp; +{ + CURSOR *new_curs; + HTAB *hashp; + + new_curs = (CURSOR *)malloc(sizeof(struct cursor_t)); + if (!new_curs) + return NULL; + new_curs->internal = + (struct item_info *)malloc(sizeof(struct item_info)); + if (!new_curs->internal) { + free(new_curs); + return NULL; + } + new_curs->get = cursor_get; + new_curs->delete = cursor_delete; + + new_curs->bucket = 0; + new_curs->pgno = INVALID_PGNO; + new_curs->ndx = 0; + new_curs->pgndx = 0; + new_curs->pagep = NULL; + + /* place onto queue of cursors */ + hashp = (HTAB *)dbp->internal; + TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue); + + return new_curs; +} + +int32_t +cursor_get(dbp, cursorp, key, val, flags) + const DB *dbp; + CURSOR *cursorp; + DBT *key, *val; + u_int32_t flags; +{ + HTAB *hashp; + ITEM_INFO item_info; + + hashp = (HTAB *)dbp->internal; + + if (flags && flags != R_FIRST && flags != R_NEXT) { + hashp->errno = errno = EINVAL; + return (ERROR); + } +#ifdef HASH_STATISTICS + hash_accesses++; +#endif + + item_info.seek_size = 0; + + if (flags == R_FIRST) + __get_item_first(hashp, cursorp, key, val, &item_info); + else + __get_item_next(hashp, cursorp, key, val, &item_info); + + /* + * This needs to be changed around. As is, get_item_next advances + * the pointers on the page but this function actually advances + * bucket pointers. This works, since the only other place we + * use get_item_next is in hash_access which only deals with one + * bucket at a time. However, there is the problem that certain other + * functions (such as find_bigpair and delpair) depend on the + * pgndx member of the cursor. Right now, they are using pngdx - 1 + * since indices refer to the __next__ item that is to be fetched + * from the page. This is ugly, as you may have noticed, whoever + * you are. The best solution would be to depend on item_infos to + * deal with _current_ information, and have the cursors only + * deal with _next_ information. In that scheme, get_item_next + * would also advance buckets. Version 3... + */ + + + /* + * Must always enter this loop to do error handling and + * check for big key/data pair. + */ + while (1) { + if (item_info.status == ITEM_OK) { + if (item_info.key_off == BIGPAIR && + __big_keydata(hashp, cursorp->pagep, key, val, + item_info.pgndx)) + return (ABNORMAL); + + break; + } else if (item_info.status != ITEM_NO_MORE) + return (ABNORMAL); + + __put_page(hashp, cursorp->pagep, A_RAW, 0); + cursorp->ndx = cursorp->pgndx = 0; + cursorp->bucket++; + cursorp->pgno = INVALID_PGNO; + cursorp->pagep = NULL; + if (cursorp->bucket > hashp->hdr.max_bucket) + return (ABNORMAL); + __get_item_next(hashp, cursorp, key, val, &item_info); + } + + __get_item_done(hashp, cursorp); + return (0); +} + +int32_t +cursor_delete(dbp, cursor, flags) + const DB *dbp; + CURSOR *cursor; + u_int32_t flags; +{ + /* XXX this is empirically determined, so it might not be completely + correct, but it seems to work. At the very least it fixes + a memory leak */ + + free(cursor->internal); + free(cursor); + + return (0); +} + +static int32_t +hash_seq(dbp, key, val, flag) + const DB *dbp; + DBT *key, *val; + u_int32_t flag; +{ + HTAB *hashp; + + /* + * Seq just uses the default cursor to go sequecing through the + * database. Note that the default cursor is the first in the list. + */ + + hashp = (HTAB *)dbp->internal; + if (!hashp->seq_cursor) + hashp->seq_cursor = __cursor_creat(dbp); + + return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag)); +} + +/********************************* UTILITIES ************************/ + +/* + * Returns: + * 0 ==> OK + * -1 ==> Error + */ +int32_t +__expand_table(hashp) + HTAB *hashp; +{ + u_int32_t old_bucket, new_bucket; + int32_t spare_ndx; + +#ifdef HASH_STATISTICS + hash_expansions++; +#endif + new_bucket = ++hashp->hdr.max_bucket; + old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask); + + /* Get a page for this new bucket */ + if (__new_page(hashp, new_bucket, A_BUCKET) != 0) + return (-1); + + /* + * If the split point is increasing (hdr.max_bucket's log base 2 + * increases), we need to copy the current contents of the spare + * split bucket to the next bucket. + */ + spare_ndx = __log2(hashp->hdr.max_bucket + 1); + if (spare_ndx > hashp->hdr.ovfl_point) { + hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point]; + hashp->hdr.ovfl_point = spare_ndx; + } + if (new_bucket > hashp->hdr.high_mask) { + /* Starting a new doubling */ + hashp->hdr.low_mask = hashp->hdr.high_mask; + hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask; + } + if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) { + fprintf(stderr, "hash: Cannot allocate new bucket. Pages exhausted.\n"); + return (-1); + } + /* Relocate records to the new bucket */ + return (__split_page(hashp, old_bucket, new_bucket)); +} + +u_int32_t +__call_hash(hashp, k, len) + HTAB *hashp; + int8_t *k; + int32_t len; +{ + int32_t n, bucket; + + n = hashp->hash(k, len); + bucket = n & hashp->hdr.high_mask; + if (bucket > hashp->hdr.max_bucket) + bucket = bucket & hashp->hdr.low_mask; + return (bucket); +} + +#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN +/* + * Hashp->hdr needs to be byteswapped. + */ +static void +swap_header_copy(srcp, destp) + HASHHDR *srcp, *destp; +{ + int32_t i; + + P_32_COPY(srcp->magic, destp->magic); + P_32_COPY(srcp->version, destp->version); + P_32_COPY(srcp->lorder, destp->lorder); + P_32_COPY(srcp->bsize, destp->bsize); + P_32_COPY(srcp->bshift, destp->bshift); + P_32_COPY(srcp->ovfl_point, destp->ovfl_point); + P_32_COPY(srcp->last_freed, destp->last_freed); + P_32_COPY(srcp->max_bucket, destp->max_bucket); + P_32_COPY(srcp->high_mask, destp->high_mask); + P_32_COPY(srcp->low_mask, destp->low_mask); + P_32_COPY(srcp->ffactor, destp->ffactor); + P_32_COPY(srcp->nkeys, destp->nkeys); + P_32_COPY(srcp->hdrpages, destp->hdrpages); + P_32_COPY(srcp->h_charkey, destp->h_charkey); + for (i = 0; i < NCACHED; i++) { + P_32_COPY(srcp->spares[i], destp->spares[i]); + P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]); + } +} + +static void +swap_header(hashp) + HTAB *hashp; +{ + HASHHDR *hdrp; + int32_t i; + + hdrp = &hashp->hdr; + + M_32_SWAP(hdrp->magic); + M_32_SWAP(hdrp->version); + M_32_SWAP(hdrp->lorder); + M_32_SWAP(hdrp->bsize); + M_32_SWAP(hdrp->bshift); + M_32_SWAP(hdrp->ovfl_point); + M_32_SWAP(hdrp->last_freed); + M_32_SWAP(hdrp->max_bucket); + M_32_SWAP(hdrp->high_mask); + M_32_SWAP(hdrp->low_mask); + M_32_SWAP(hdrp->ffactor); + M_32_SWAP(hdrp->nkeys); + M_32_SWAP(hdrp->hdrpages); + M_32_SWAP(hdrp->h_charkey); + for (i = 0; i < NCACHED; i++) { + M_32_SWAP(hdrp->spares[i]); + M_16_SWAP(hdrp->bitmaps[i]); + } +} +#endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */ diff --git a/src/util/db2/hash/hash.c.patch b/src/util/db2/hash/hash.c.patch new file mode 100644 index 0000000000..b72cc0d26d --- /dev/null +++ b/src/util/db2/hash/hash.c.patch @@ -0,0 +1,109 @@ +*** /tmp/,RCSt1a21714 Wed Apr 3 11:49:15 1996 +--- hash.c Wed Apr 3 08:43:04 1996 +*************** +*** 399,405 + /* Create pages for these buckets */ + /* + for (i = 0; i <= hashp->hdr.max_bucket; i++) { +! if (__new_page(hashp, i, A_BUCKET) != 0) + return (-1); + } + */ + +--- 399,405 ----- + /* Create pages for these buckets */ + /* + for (i = 0; i <= hashp->hdr.max_bucket; i++) { +! if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0) + return (-1); + } + */ +*************** +*** 560,567 + * XXX + * Check success/failure conditions. + */ +! mpool_sync(hashp->mp); +! return (0); + } + + /* + +--- 560,566 ----- + * XXX + * Check success/failure conditions. + */ +! return (flush_meta(hashp) || mpool_sync(hashp->mp)); + } + + /* +*************** +*** 585,591 + hput_header(hashp); + + for (i = 0; i < NCACHED; i++) +! if (hashp->mapp[i]) + if (__put_page(hashp, + (PAGE16 *)hashp->mapp[i], A_BITMAP, 1)) + return (-1); + +--- 584,590 ----- + hput_header(hashp); + + for (i = 0; i < NCACHED; i++) +! if (hashp->mapp[i]) { + if (__put_page(hashp, + (PAGE16 *)hashp->mapp[i], A_BITMAP, 1)) + return (-1); +*************** +*** 589,594 + if (__put_page(hashp, + (PAGE16 *)hashp->mapp[i], A_BITMAP, 1)) + return (-1); + return (0); + } + + +--- 588,595 ----- + if (__put_page(hashp, + (PAGE16 *)hashp->mapp[i], A_BITMAP, 1)) + return (-1); ++ hashp->mapp[i] = NULL; ++ } + return (0); + } + +*************** +*** 726,732 + #ifdef HASH_STATISTICS + hash_collisions++; + #endif +- + __get_item_done(hashp, &cursor); + + /* + +--- 727,732 ----- + #ifdef HASH_STATISTICS + hash_collisions++; + #endif + __get_item_done(hashp, &cursor); + + /* +*************** +*** 773,778 + if (__delpair(hashp, &cursor, &item_info) || + __addel(hashp, &item_info, key, val, UNKNOWN, 0)) + return (ERROR); + if (item_info.caused_expand) + __expand_table(hashp); + break; + +--- 773,779 ----- + if (__delpair(hashp, &cursor, &item_info) || + __addel(hashp, &item_info, key, val, UNKNOWN, 0)) + return (ERROR); ++ __get_item_done(hashp, &cursor); + if (item_info.caused_expand) + __expand_table(hashp); + break; diff --git a/src/util/db2/hash/hash.h b/src/util/db2/hash/hash.h new file mode 100644 index 0000000000..973d543a85 --- /dev/null +++ b/src/util/db2/hash/hash.h @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)hash.h 8.4 (Berkeley) 11/2/95 + */ + +#include "mpool.h" +#include "db-queue.h" + +/* Operations */ +typedef enum { + HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT +} ACTION; + +/* cursor structure */ +typedef struct cursor_t { + TAILQ_ENTRY(cursor_t) queue; + int (*get) __P((const DB *, struct cursor_t *, DBT *, DBT *, \ + u_int32_t)); + int (*delete) __P((const DB *, struct cursor_t *, u_int32_t)); + db_pgno_t bucket; + db_pgno_t pgno; + indx_t ndx; + indx_t pgndx; + u_int16_t *pagep; + void *internal; +} CURSOR; + + +#define IS_BUCKET(X) ((X) & BUF_BUCKET) +#define IS_VALID(X) (!((X) & BUF_INVALID)) + +/* Hash Table Information */ +typedef struct hashhdr { /* Disk resident portion */ + int32_t magic; /* Magic NO for hash tables */ + int32_t version; /* Version ID */ + int32_t lorder; /* Byte Order */ + int32_t bsize; /* Bucket/Page Size */ + int32_t bshift; /* Bucket shift */ + int32_t ovfl_point; /* Where overflow pages are being allocated */ + int32_t last_freed; /* Last overflow page freed */ + int32_t max_bucket; /* ID of Maximum bucket in use */ + int32_t high_mask; /* Mask to modulo into entire table */ + int32_t low_mask; /* Mask to modulo into lower half of table */ + int32_t ffactor; /* Fill factor */ + int32_t nkeys; /* Number of keys in hash table */ + int32_t hdrpages; /* Size of table header */ + int32_t h_charkey; /* value of hash(CHARKEY) */ +#define NCACHED 32 /* number of bit maps and spare points */ + int32_t spares[NCACHED];/* spare pages for overflow */ + u_int16_t bitmaps[NCACHED]; /* address of overflow page bitmaps */ +} HASHHDR; + +typedef struct htab { /* Memory resident data structure */ + TAILQ_HEAD(_cursor_queue, cursor_t) curs_queue; + HASHHDR hdr; /* Header */ + u_int32_t (*hash) __P((const void *, size_t)); /* Hash Function */ + int32_t flags; /* Flag values */ + int32_t fp; /* File pointer */ + char *fname; /* File path */ + u_int8_t *bigdata_buf; /* Temporary Buffer for BIG data */ + u_int8_t *bigkey_buf; /* Temporary Buffer for BIG keys */ + u_int16_t *split_buf; /* Temporary buffer for splits */ + CURSOR *seq_cursor; /* Cursor used for hash_seq */ + int32_t errno; /* Error Number -- for DBM compatability */ + int32_t new_file; /* Indicates if fd is backing store or no */ + int32_t save_file; /* Indicates whether we need to flush file at + * exit */ + u_int32_t *mapp[NCACHED];/* Pointers to page maps */ + int32_t nmaps; /* Initial number of bitmaps */ + MPOOL *mp; /* mpool for buffer management */ +} HTAB; + +/* + * Constants + */ +#define MAX_BSIZE 65536 /* 2^16 */ +#define MIN_BUFFERS 6 +#define MINHDRSIZE 512 +#define DEF_CACHESIZE 65536 +#define DEF_BUCKET_SIZE 4096 +#define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */ +#define DEF_SEGSIZE 256 +#define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE) */ +#define DEF_DIRSIZE 256 +#define DEF_FFACTOR 65536 +#define MIN_FFACTOR 4 +#define SPLTMAX 8 +#define CHARKEY "%$sniglet^&" +#define NUMKEY 1038583 +#define BYTE_SHIFT 3 +#define INT32_T_TO_BYTE 2 +#define INT32_T_BYTE_SHIFT 5 +#define ALL_SET ((u_int32_t)0xFFFFFFFF) +#define ALL_CLEAR 0 + +#define PTROF(X) ((BUFHEAD *)((ptr_t)(X)&~0x3)) +#define ISMOD(X) ((ptr_t)(X)&0x1) +#define DOMOD(X) ((X) = (int8_t *)((ptr_t)(X)|0x1)) +#define ISDISK(X) ((ptr_t)(X)&0x2) +#define DODISK(X) ((X) = (int8_t *)((ptr_t)(X)|0x2)) + +#define BITS_PER_MAP 32 + +/* Given the address of the beginning of a big map, clear/set the nth bit */ +#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP))) +#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP))) +#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP))) + +/* Overflow management */ +/* + * Overflow page numbers are allocated per split point. At each doubling of + * the table, we can allocate extra pages. So, an overflow page number has + * the top 5 bits indicate which split point and the lower 11 bits indicate + * which page at that split point is indicated (pages within split points are + * numberered starting with 1). + */ + +#define SPLITSHIFT 11 +#define SPLITMASK 0x7FF +#define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIFT) +#define OPAGENUM(N) ((N) & SPLITMASK) +#define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) << SPLITSHIFT) + (O)) + +#define BUCKET_TO_PAGE(B) \ + ((B) + hashp->hdr.hdrpages + ((B) \ + ? hashp->hdr.spares[__log2((B)+1)-1] : 0)) +#define OADDR_TO_PAGE(B) \ + (BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B))) + +#define POW2(N) (1 << (N)) + +#define MAX_PAGES(H) (DB_OFF_T_MAX / (H)->hdr.bsize) + +/* Shorthands for accessing structure */ +#define METADATA_PGNO 0 +#define SPLIT_PGNO 0xFFFF + +typedef struct item_info { + db_pgno_t pgno; + db_pgno_t bucket; + indx_t ndx; + indx_t pgndx; + u_int8_t status; + int32_t seek_size; + db_pgno_t seek_found_page; + indx_t key_off; + indx_t data_off; + u_int8_t caused_expand; +} ITEM_INFO; + + +#define ITEM_ERROR 0 +#define ITEM_OK 1 +#define ITEM_NO_MORE 2 + +#define ITEM_GET_FIRST 0 +#define ITEM_GET_NEXT 1 +#define ITEM_GET_RESET 2 +#define ITEM_GET_DONE 3 +#define ITEM_GET_N 4 + +#define UNKNOWN 0xffffffff /* for num_items */ +#define NO_EXPAND 0xfffffffe diff --git a/src/util/db2/hash/hash_bigkey.c b/src/util/db2/hash/hash_bigkey.c new file mode 100644 index 0000000000..689dc3db64 --- /dev/null +++ b/src/util/db2/hash/hash_bigkey.c @@ -0,0 +1,483 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_bigkey.c 8.5 (Berkeley) 11/2/95"; +#endif /* LIBC_SCCS and not lint */ + +/* + * PACKAGE: hash + * DESCRIPTION: + * Big key/data handling for the hashing package. + * + * ROUTINES: + * External + * __big_keydata + * __big_split + * __big_insert + * __big_return + * __big_delete + * __find_last_page + * Internal + * collect_key + * collect_data + */ +#include <sys/types.h> + +#include <stdlib.h> +#include <string.h> + +#ifdef DEBUG +#include <assert.h> +#endif + +#include "db-int.h" +#include "hash.h" +#include "page.h" +#include "extern.h" + +static int32_t collect_key __P((HTAB *, PAGE16 *, int32_t, db_pgno_t *)); +static int32_t collect_data __P((HTAB *, PAGE16 *, int32_t)); + +/* + * Big_insert + * + * You need to do an insert and the key/data pair is greater than + * MINFILL * the bucket size + * + * Returns: + * 0 ==> OK + * -1 ==> ERROR + */ +int32_t +__big_insert(hashp, pagep, key, val) + HTAB *hashp; + PAGE16 *pagep; + const DBT *key, *val; +{ + size_t key_size, val_size; + indx_t key_move_bytes, val_move_bytes; + int8_t *key_data, *val_data, base_page; + + key_data = (int8_t *)key->data; + key_size = key->size; + val_data = (int8_t *)val->data; + val_size = val->size; + + NUM_ENT(pagep) = NUM_ENT(pagep) + 1; + + for (base_page = 1; key_size + val_size;) { + /* Add a page! */ + pagep = + __add_bigpage(hashp, pagep, NUM_ENT(pagep) - 1, base_page); + if (!pagep) + return (-1); + + /* There's just going to be one entry on this page. */ + NUM_ENT(pagep) = 1; + + /* Move the key's data. */ + key_move_bytes = MIN(FREESPACE(pagep), key_size); + /* Mark the page as to how much key & data is on this page. */ + BIGKEYLEN(pagep) = key_move_bytes; + val_move_bytes = + MIN(FREESPACE(pagep) - key_move_bytes, val_size); + BIGDATALEN(pagep) = val_move_bytes; + + /* Note big pages build beginning --> end, not vice versa. */ + if (key_move_bytes) + memmove(BIGKEY(pagep), key_data, key_move_bytes); + if (val_move_bytes) + memmove(BIGDATA(pagep), val_data, val_move_bytes); + + key_size -= key_move_bytes; + key_data += key_move_bytes; + val_size -= val_move_bytes; + val_data += val_move_bytes; + + base_page = 0; + } + __put_page(hashp, pagep, A_RAW, 1); + return (0); +} + +/* + * Called when we need to delete a big pair. + * + * Returns: + * 0 => OK + * -1 => ERROR + */ +int32_t +#ifdef __STDC__ +__big_delete(HTAB *hashp, PAGE16 *pagep, indx_t ndx) +#else +__big_delete(hashp, pagep, ndx) + HTAB *hashp; + PAGE16 *pagep; + u_int32_t ndx; /* Index of big pair on base page. */ +#endif +{ + PAGE16 *last_pagep; + + /* Get first page with big key/data. */ + pagep = __get_page(hashp, OADDR_TO_PAGE(DATA_OFF(pagep, ndx)), A_RAW); + if (!pagep) + return (-1); + + /* + * Traverse through the pages, freeing the previous one (except + * the first) at each new page. + */ + while (NEXT_PGNO(pagep) != INVALID_PGNO) { + last_pagep = pagep; + pagep = __get_page(hashp, NEXT_PGNO(pagep), A_RAW); + if (!pagep) + return (-1); + __delete_page(hashp, last_pagep, A_OVFL); + } + + /* Free the last page in the chain. */ + __delete_page(hashp, pagep, A_OVFL); + return (0); +} + +/* + * Given a key, indicates whether the big key at cursorp matches the + * given key. + * + * Returns: + * 1 = Found! + * 0 = Key not found + * -1 error + */ +int32_t +__find_bigpair(hashp, cursorp, key, size) + HTAB *hashp; + CURSOR *cursorp; + int8_t *key; + int32_t size; +{ + PAGE16 *pagep, *hold_pagep; + db_pgno_t next_pgno; + int32_t ksize; + u_int16_t bytes; + int8_t *kkey; + + ksize = size; + kkey = key; + bytes = 0; + + hold_pagep = NULL; + /* Chances are, hashp->cpage is the base page. */ + if (cursorp->pagep) + pagep = hold_pagep = cursorp->pagep; + else { + pagep = __get_page(hashp, cursorp->pgno, A_RAW); + if (!pagep) + return (-1); + } + + /* + * Now, get the first page with the big stuff on it. + * + * XXX + * KLUDGE: we know that cursor is looking at the _next_ item, so + * we have to look at pgndx - 1. + */ + next_pgno = OADDR_TO_PAGE(DATA_OFF(pagep, (cursorp->pgndx - 1))); + if (!hold_pagep) + __put_page(hashp, pagep, A_RAW, 0); + pagep = __get_page(hashp, next_pgno, A_RAW); + if (!pagep) + return (-1); + + /* While there are both keys to compare. */ + while ((ksize > 0) && (BIGKEYLEN(pagep))) { + if (ksize < KEY_OFF(pagep, 0) || + memcmp(BIGKEY(pagep), kkey, BIGKEYLEN(pagep))) { + __put_page(hashp, pagep, A_RAW, 0); + return (0); + } + kkey += BIGKEYLEN(pagep); + ksize -= BIGKEYLEN(pagep); + if (NEXT_PGNO(pagep) != INVALID_PGNO) { + next_pgno = NEXT_PGNO(pagep); + __put_page(hashp, pagep, A_RAW, 0); + pagep = __get_page(hashp, next_pgno, A_RAW); + if (!pagep) + return (-1); + } + } + __put_page(hashp, pagep, A_RAW, 0); +#ifdef DEBUG + assert(ksize >= 0); +#endif + if (ksize != 0) { +#ifdef HASH_STATISTICS + ++hash_collisions; +#endif + return (0); + } else + return (1); +} + +/* + * Fill in the key and data for this big pair. + */ +int32_t +__big_keydata(hashp, pagep, key, val, ndx) + HTAB *hashp; + PAGE16 *pagep; + DBT *key, *val; + int32_t ndx; +{ + ITEM_INFO ii; + PAGE16 *key_pagep; + db_pgno_t last_page; + + key_pagep = + __get_page(hashp, OADDR_TO_PAGE(DATA_OFF(pagep, ndx)), A_RAW); + if (!key_pagep) + return (-1); + key->size = collect_key(hashp, key_pagep, 0, &last_page); + key->data = hashp->bigkey_buf; + __put_page(hashp, key_pagep, A_RAW, 0); + + if (key->size == -1) + return (-1); + + /* Create an item_info to direct __big_return to the beginning pgno. */ + ii.pgno = last_page; + return (__big_return(hashp, &ii, val, 1)); +} + +/* + * Return the big key on page, ndx. + */ +int32_t +#ifdef __STDC__ +__get_bigkey(HTAB *hashp, PAGE16 *pagep, indx_t ndx, DBT *key) +#else +__get_bigkey(hashp, pagep, ndx, key) + HTAB *hashp; + PAGE16 *pagep; + u_int32_t ndx; + DBT *key; +#endif +{ + PAGE16 *key_pagep; + + key_pagep = + __get_page(hashp, OADDR_TO_PAGE(DATA_OFF(pagep, ndx)), A_RAW); + if (!pagep) + return (-1); + key->size = collect_key(hashp, key_pagep, 0, NULL); + key->data = hashp->bigkey_buf; + + __put_page(hashp, key_pagep, A_RAW, 0); + + return (0); +} + +/* + * Return the big key and data indicated in item_info. + */ +int32_t +__big_return(hashp, item_info, val, on_bigkey_page) + HTAB *hashp; + ITEM_INFO *item_info; + DBT *val; + int32_t on_bigkey_page; +{ + PAGE16 *pagep; + db_pgno_t next_pgno; + + if (!on_bigkey_page) { + /* Get first page with big pair on it. */ + pagep = __get_page(hashp, + OADDR_TO_PAGE(item_info->data_off), A_RAW); + if (!pagep) + return (-1); + } else { + pagep = __get_page(hashp, item_info->pgno, A_RAW); + if (!pagep) + return (-1); + } + + /* Traverse through the bigkey pages until a page with data is found. */ + while (!BIGDATALEN(pagep)) { + next_pgno = NEXT_PGNO(pagep); + __put_page(hashp, pagep, A_RAW, 0); + pagep = __get_page(hashp, next_pgno, A_RAW); + if (!pagep) + return (-1); + } + + val->size = collect_data(hashp, pagep, 0); + if (val->size < 1) + return (-1); + val->data = (void *)hashp->bigdata_buf; + + __put_page(hashp, pagep, A_RAW, 0); + return (0); +} + +/* + * Given a page with a big key on it, traverse through the pages counting data + * length, and collect all of the data on the way up. Store the key in + * hashp->bigkey_buf. last_page indicates to the calling function what the + * last page with key on it is; this will help if you later want to retrieve + * the data portion. + * + * Does the work for __get_bigkey. + * + * Return total length of data; -1 if error. + */ +static int32_t +collect_key(hashp, pagep, len, last_page) + HTAB *hashp; + PAGE16 *pagep; + int32_t len; + db_pgno_t *last_page; +{ + PAGE16 *next_pagep; + int32_t totlen, retval; + db_pgno_t next_pgno; +#ifdef DEBUG + db_pgno_t save_addr; +#endif + + /* If this is the last page with key. */ + if (BIGDATALEN(pagep)) { + totlen = len + BIGKEYLEN(pagep); + if (hashp->bigkey_buf) + free(hashp->bigkey_buf); + hashp->bigkey_buf = (char *)malloc(totlen); + if (!hashp->bigkey_buf) + return (-1); + memcpy(hashp->bigkey_buf + len, + BIGKEY(pagep), BIGKEYLEN(pagep)); + if (last_page) + *last_page = ADDR(pagep); + return (totlen); + } + + /* Key filled up all of last key page, so we've gone 1 too far. */ + if (BIGKEYLEN(pagep) == 0) { + if (hashp->bigkey_buf) + free(hashp->bigkey_buf); + hashp->bigkey_buf = (char *)malloc(len); + return (hashp->bigkey_buf ? len : -1); + } + totlen = len + BIGKEYLEN(pagep); + + /* Set pagep to the next page in the chain. */ + if (last_page) + *last_page = ADDR(pagep); + next_pgno = NEXT_PGNO(pagep); + next_pagep = __get_page(hashp, next_pgno, A_RAW); + if (!next_pagep) + return (-1); +#ifdef DEBUG + save_addr = ADDR(pagep); +#endif + retval = collect_key(hashp, next_pagep, totlen, last_page); + +#ifdef DEBUG + assert(save_addr == ADDR(pagep)); +#endif + memcpy(hashp->bigkey_buf + len, BIGKEY(pagep), BIGKEYLEN(pagep)); + __put_page(hashp, next_pagep, A_RAW, 0); + + return (retval); +} + +/* + * Given a page with big data on it, recur through the pages counting data + * length, and collect all of the data on the way up. Store the data in + * hashp->bigdata_buf. + * + * Does the work for __big_return. + * + * Return total length of data; -1 if error. + */ +static int32_t +collect_data(hashp, pagep, len) + HTAB *hashp; + PAGE16 *pagep; + int32_t len; +{ + PAGE16 *next_pagep; + int32_t totlen, retval; + db_pgno_t next_pgno; +#ifdef DEBUG + db_pgno_t save_addr; +#endif + + /* If there is no next page. */ + if (NEXT_PGNO(pagep) == INVALID_PGNO) { + if (hashp->bigdata_buf) + free(hashp->bigdata_buf); + totlen = len + BIGDATALEN(pagep); + hashp->bigdata_buf = (char *)malloc(totlen); + if (!hashp->bigdata_buf) + return (-1); + memcpy(hashp->bigdata_buf + totlen - BIGDATALEN(pagep), + BIGDATA(pagep), BIGDATALEN(pagep)); + return (totlen); + } + totlen = len + BIGDATALEN(pagep); + + /* Set pagep to the next page in the chain. */ + next_pgno = NEXT_PGNO(pagep); + next_pagep = __get_page(hashp, next_pgno, A_RAW); + if (!next_pagep) + return (-1); + +#ifdef DEBUG + save_addr = ADDR(pagep); +#endif + retval = collect_data(hashp, next_pagep, totlen); +#ifdef DEBUG + assert(save_addr == ADDR(pagep)); +#endif + memcpy(hashp->bigdata_buf + totlen - BIGDATALEN(pagep), + BIGDATA(pagep), BIGDATALEN(pagep)); + __put_page(hashp, next_pagep, A_RAW, 0); + + return (retval); +} diff --git a/src/util/db2/hash/hash_debug.c b/src/util/db2/hash/hash_debug.c new file mode 100644 index 0000000000..ed99c69320 --- /dev/null +++ b/src/util/db2/hash/hash_debug.c @@ -0,0 +1,106 @@ +/*- + * Copyright (c) 1995 + * The President and Fellows of Harvard University + * + * This code is derived from software contributed to Harvard by + * Jeremy Rassen. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_debug.c 8.4 (Berkeley) 11/7/95"; +#endif /* LIBC_SCCS and not lint */ + +#ifdef DEBUG +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Debug routines. + * + * ROUTINES: + * + * External + * __dump_bucket + */ +#include <stdio.h> + +#include "db-int.h" +#include "hash.h" +#include "page.h" +#include "extern.h" +#include "compat.h" + +void +__dump_bucket(hashp, bucket) + HTAB *hashp; + u_int32_t bucket; +{ + CURSOR cursor; + DBT key, val; + ITEM_INFO item_info; + int var; + char *cp; + + cursor.pagep = NULL; + item_info.seek_size = 0; + item_info.seek_found_page = 0; + + __get_item_reset(hashp, &cursor); + + cursor.bucket = bucket; + for (;;) { + __get_item_next(hashp, &cursor, &key, &val, &item_info); + if (item_info.status == ITEM_ERROR) { + (void)printf("get_item_next returned error\n"); + break; + } else if (item_info.status == ITEM_NO_MORE) + break; + + if (item_info.key_off == BIGPAIR) { + if (__big_keydata(hashp, cursor.pagep, &key, &val, + item_info.pgndx)) { + (void)printf("__big_keydata returned error\n"); + break; + } + } + + if (key.size == sizeof(int)) { + memcpy(&var, key.data, sizeof(int)); + (void)printf("%d\n", var); + } else { + for (cp = (char *)key.data; key.size--; cp++) + (void)printf("%c", *cp); + (void)printf("\n"); + } + } + __get_item_done(hashp, &cursor); +} +#endif /* DEBUG */ diff --git a/src/util/db2/hash/hash_func.c b/src/util/db2/hash/hash_func.c new file mode 100644 index 0000000000..efa2a2843b --- /dev/null +++ b/src/util/db2/hash/hash_func.c @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_func.c 8.4 (Berkeley) 11/7/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include "db-int.h" +#include "hash.h" +#include "page.h" +#include "extern.h" + +static u_int32_t hash1 __P((const void *, size_t)); +static u_int32_t hash2 __P((const void *, size_t)); +static u_int32_t hash3 __P((const void *, size_t)); +static u_int32_t hash4 __P((const void *, size_t)); + +/* Default hash function. */ +u_int32_t (*__default_hash) __P((const void *, size_t)) = hash4; + +/* + * Assume that we've already split the bucket to which this key hashes, + * calculate that bucket, and check that in fact we did already split it. + * + * EJB's original hsearch hash. + */ +#define PRIME1 37 +#define PRIME2 1048583 + +u_int32_t +hash1(key, len) + const void *key; + size_t len; +{ + u_int32_t h; + u_int8_t *k; + + h = 0; + k = (u_int8_t *)key; + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*k++ - ' '); + h %= PRIME2; + return (h); +} + +/* + * Phong Vo's linear congruential hash + */ +#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) + +u_int32_t +hash2(key, len) + const void *key; + size_t len; +{ + u_int32_t h; + u_int8_t *e, c, *k; + + k = (u_int8_t *)key; + e = k + len; + for (h = 0; k != e;) { + c = *k++; + if (!c && k > e) + break; + dcharhash(h, c); + } + return (h); +} + +/* + * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte + * units. On the first time through the loop we get the "leftover bytes" + * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle + * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If + * this routine is heavily used enough, it's worth the ugly coding. + * + * Ozan Yigit's original sdbm hash. + */ +u_int32_t +hash3(key, len) + const void *key; + size_t len; +{ + u_int32_t n, loop; + u_int8_t *k; + +#define HASHC n = *k++ + 65599 * n + + n = 0; + k = (u_int8_t *)key; + if (len > 0) { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { /* All fall throughs */ + HASHC; + case 7: + HASHC; + case 6: + HASHC; + case 5: + HASHC; + case 4: + HASHC; + case 3: + HASHC; + case 2: + HASHC; + case 1: + HASHC; + } while (--loop); + } + + } + return (n); +} + +/* Chris Torek's hash function. */ +u_int32_t +hash4(key, len) + const void *key; + size_t len; +{ + u_int32_t h, loop; + u_int8_t *k; + +#define HASH4a h = (h << 5) - h + *k++; +#define HASH4b h = (h << 5) + h + *k++; +#define HASH4 HASH4b + + h = 0; + k = (u_int8_t *)key; + if (len > 0) { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } + + } + return (h); +} diff --git a/src/util/db2/hash/hash_log2.c b/src/util/db2/hash/hash_log2.c new file mode 100644 index 0000000000..8604945e80 --- /dev/null +++ b/src/util/db2/hash/hash_log2.c @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_log2.c 8.4 (Berkeley) 11/7/95"; +#endif /* LIBC_SCCS and not lint */ + +#include "db-int.h" + +u_int32_t +__log2(num) + u_int32_t num; +{ + u_int32_t i, limit; + + limit = 1; + for (i = 0; limit < num; limit = limit << 1, i++); + return (i); +} diff --git a/src/util/db2/hash/hash_page.c b/src/util/db2/hash/hash_page.c new file mode 100644 index 0000000000..8622075d17 --- /dev/null +++ b/src/util/db2/hash/hash_page.c @@ -0,0 +1,1380 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_page.c 8.11 (Berkeley) 11/7/95"; +#endif /* LIBC_SCCS and not lint */ + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Page manipulation for hashing package. + * + * ROUTINES: + * + * External + * __get_page + * __add_ovflpage + * Internal + * overflow_page + * open_temp + */ + +#include <sys/types.h> + +#ifdef DEBUG +#include <assert.h> +#endif +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "db-int.h" +#include "hash.h" +#include "page.h" +#include "extern.h" + +static int32_t add_bigptr __P((HTAB *, ITEM_INFO *, indx_t)); +static u_int32_t *fetch_bitmap __P((HTAB *, int32_t)); +static u_int32_t first_free __P((u_int32_t)); +static indx_t next_realkey __P((PAGE16 *, indx_t)); +static u_int16_t overflow_page __P((HTAB *)); +static void page_init __P((HTAB *, PAGE16 *, db_pgno_t, u_int8_t)); +static indx_t prev_realkey __P((PAGE16 *, indx_t)); +static void putpair __P((PAGE8 *, const DBT *, const DBT *)); +static void swap_page_header_in __P((PAGE16 *)); +static void swap_page_header_out __P((PAGE16 *)); + +#ifdef DEBUG_SLOW +static void account_page(HTAB *, db_pgno_t, int); +#endif + +u_int32_t +__get_item(hashp, cursorp, key, val, item_info) + HTAB *hashp; + CURSOR *cursorp; + DBT *key, *val; + ITEM_INFO *item_info; +{ + db_pgno_t next_pgno; + int32_t i; + + /* Check if we need to get a page. */ + if (!cursorp->pagep) { + if (cursorp->pgno == INVALID_PGNO) { + cursorp->pagep = + __get_page(hashp, cursorp->bucket, A_BUCKET); + cursorp->pgno = ADDR(cursorp->pagep); + cursorp->ndx = 0; + cursorp->pgndx = 0; + } else + cursorp->pagep = + __get_page(hashp, cursorp->pgno, A_RAW); + if (!cursorp->pagep) { + item_info->status = ITEM_ERROR; + return (-1); + } + } + if (item_info->seek_size && + FREESPACE(cursorp->pagep) > item_info->seek_size) + item_info->seek_found_page = cursorp->pgno; + + if (cursorp->pgndx == NUM_ENT(cursorp->pagep)) { + /* Fetch next page. */ + if (NEXT_PGNO(cursorp->pagep) == INVALID_PGNO) { + item_info->status = ITEM_NO_MORE; + return (-1); + } + next_pgno = NEXT_PGNO(cursorp->pagep); + cursorp->pgndx = 0; + __put_page(hashp, cursorp->pagep, A_RAW, 0); + cursorp->pagep = __get_page(hashp, next_pgno, A_RAW); + if (!cursorp->pagep) { + item_info->status = ITEM_ERROR; + return (-1); + } + cursorp->pgno = next_pgno; + } + if (KEY_OFF(cursorp->pagep, cursorp->pgndx) != BIGPAIR) { + if ((i = prev_realkey(cursorp->pagep, cursorp->pgndx)) == + cursorp->pgndx) + key->size = hashp->hdr.bsize - + KEY_OFF(cursorp->pagep, cursorp->pgndx); + else + key->size = DATA_OFF(cursorp->pagep, i) - + KEY_OFF(cursorp->pagep, cursorp->pgndx); + } + + /* + * All of this information will be set incorrectly for big keys, but + * it will be ignored anyway. + */ + val->size = KEY_OFF(cursorp->pagep, cursorp->pgndx) - + DATA_OFF(cursorp->pagep, cursorp->pgndx); + key->data = KEY(cursorp->pagep, cursorp->pgndx); + val->data = DATA(cursorp->pagep, cursorp->pgndx); + item_info->pgno = cursorp->pgno; + item_info->bucket = cursorp->bucket; + item_info->ndx = cursorp->ndx; + item_info->pgndx = cursorp->pgndx; + item_info->key_off = KEY_OFF(cursorp->pagep, cursorp->pgndx); + item_info->data_off = DATA_OFF(cursorp->pagep, cursorp->pgndx); + item_info->status = ITEM_OK; + + return (0); +} + +u_int32_t +__get_item_reset(hashp, cursorp) + HTAB *hashp; + CURSOR *cursorp; +{ + if (cursorp->pagep) + __put_page(hashp, cursorp->pagep, A_RAW, 0); + cursorp->pagep = NULL; + cursorp->bucket = -1; + cursorp->ndx = 0; + cursorp->pgndx = 0; + cursorp->pgno = INVALID_PGNO; + return (0); +} + +u_int32_t +__get_item_done(hashp, cursorp) + HTAB *hashp; + CURSOR *cursorp; +{ + if (cursorp->pagep) + __put_page(hashp, cursorp->pagep, A_RAW, 0); + cursorp->pagep = NULL; + + /* + * We don't throw out the page number since we might want to + * continue getting on this page. + */ + return (0); +} + +u_int32_t +__get_item_first(hashp, cursorp, key, val, item_info) + HTAB *hashp; + CURSOR *cursorp; + DBT *key, *val; + ITEM_INFO *item_info; +{ + __get_item_reset(hashp, cursorp); + cursorp->bucket = 0; + return (__get_item_next(hashp, cursorp, key, val, item_info)); +} + +/* + * Returns a pointer to key/data pair on a page. In the case of bigkeys, + * just returns the page number and index of the bigkey pointer pair. + */ +u_int32_t +__get_item_next(hashp, cursorp, key, val, item_info) + HTAB *hashp; + CURSOR *cursorp; + DBT *key, *val; + ITEM_INFO *item_info; +{ + int stat; + + stat = __get_item(hashp, cursorp, key, val, item_info); + cursorp->ndx++; + cursorp->pgndx++; + return (stat); +} + +/* + * Put a non-big pair on a page. + */ +static void +putpair(p, key, val) + PAGE8 *p; + const DBT *key, *val; +{ + u_int16_t *pagep, n, off; + + pagep = (PAGE16 *)p; + + /* Items on the page are 0-indexed. */ + n = NUM_ENT(pagep); + off = OFFSET(pagep) - key->size + 1; + memmove(p + off, key->data, key->size); + KEY_OFF(pagep, n) = off; + + off -= val->size; + memmove(p + off, val->data, val->size); + DATA_OFF(pagep, n) = off; + + /* Adjust page info. */ + NUM_ENT(pagep) = n + 1; + OFFSET(pagep) = off - 1; +} + +/* + * Returns the index of the next non-bigkey pair after n on the page. + * Returns -1 if there are no more non-big things on the page. + */ +indx_t +#ifdef __STDC__ +next_realkey(PAGE16 * pagep, indx_t n) +#else +next_realkey(pagep, n) + PAGE16 *pagep; + u_int32_t n; +#endif +{ + indx_t i; + + for (i = n + 1; i < NUM_ENT(pagep); i++) + if (KEY_OFF(pagep, i) != BIGPAIR) + return (i); + return (-1); +} + +/* + * Returns the index of the previous non-bigkey pair after n on the page. + * Returns n if there are no previous non-big things on the page. + */ +static indx_t +#ifdef __STDC__ +prev_realkey(PAGE16 * pagep, indx_t n) +#else +prev_realkey(pagep, n) + PAGE16 *pagep; + u_int32_t n; +#endif +{ + int32_t i; + + /* Need a signed value to do the compare properly. */ + for (i = n - 1; i > -1; i--) + if (KEY_OFF(pagep, i) != BIGPAIR) + return (i); + return (n); +} + +/* + * Returns: + * 0 OK + * -1 error + */ +extern int32_t +__delpair(hashp, cursorp, item_info) + HTAB *hashp; + CURSOR *cursorp; + ITEM_INFO *item_info; +{ + PAGE16 *pagep; + indx_t ndx; + short check_ndx; + int16_t delta, len, next_key; + int32_t n; + u_int8_t *src, *dest; + + ndx = cursorp->pgndx; + if (!cursorp->pagep) { + pagep = __get_page(hashp, cursorp->pgno, A_RAW); + if (!pagep) + return (-1); + /* + * KLUGE: pgndx has gone one too far, because cursor points + * to the _next_ item. Use pgndx - 1. + */ + --ndx; + } else + pagep = cursorp->pagep; +#ifdef DEBUG + assert(ADDR(pagep) == cursorp->pgno); +#endif + + if (KEY_OFF(pagep, ndx) == BIGPAIR) { + delta = 0; + __big_delete(hashp, pagep, ndx); + } else { + /* + * Compute "delta", the amount we have to shift all of the + * offsets. To find the delta, we need to make sure that + * we aren't looking at the DATA_OFF of a big/keydata pair. + */ + for (check_ndx = (short)(ndx - 1); + check_ndx >= 0 && KEY_OFF(pagep, check_ndx) == BIGPAIR; + check_ndx--); + if (check_ndx < 0) + delta = hashp->hdr.bsize - DATA_OFF(pagep, ndx); + else + delta = + DATA_OFF(pagep, check_ndx) - DATA_OFF(pagep, ndx); + + /* + * The hard case: we want to remove something other than + * the last item on the page. We need to shift data and + * offsets down. + */ + if (ndx != NUM_ENT(pagep) - 1) { + /* + * Move the data: src is the address of the last data + * item on the page. + */ + src = (u_int8_t *)pagep + OFFSET(pagep) + 1; + /* + * Length is the distance between where to start + * deleting and end of the data on the page. + */ + len = DATA_OFF(pagep, ndx) - (OFFSET(pagep) + 1); + /* + * Dest is the location of the to-be-deleted item + * occupied - length. + */ + if (check_ndx < 0) + dest = + (u_int8_t *)pagep + hashp->hdr.bsize - len; + else + dest = (u_int8_t *)pagep + + DATA_OFF(pagep, (check_ndx)) - len; + memmove(dest, src, len); + } + } + + /* Adjust the offsets. */ + for (n = ndx; n < NUM_ENT(pagep) - 1; n++) + if (KEY_OFF(pagep, (n + 1)) != BIGPAIR) { + next_key = next_realkey(pagep, n); +#ifdef DEBUG + assert(next_key != -1); +#endif + KEY_OFF(pagep, n) = KEY_OFF(pagep, (n + 1)) + delta; + DATA_OFF(pagep, n) = DATA_OFF(pagep, (n + 1)) + delta; + } else { + KEY_OFF(pagep, n) = KEY_OFF(pagep, (n + 1)); + DATA_OFF(pagep, n) = DATA_OFF(pagep, (n + 1)); + } + + /* Adjust page metadata. */ + OFFSET(pagep) = OFFSET(pagep) + delta; + NUM_ENT(pagep) = NUM_ENT(pagep) - 1; + + --hashp->hdr.nkeys; + + /* Is this page now an empty overflow page? If so, free it. */ + if (TYPE(pagep) == HASH_OVFLPAGE && NUM_ENT(pagep) == 0) { + PAGE16 *empty_page; + db_pgno_t to_find, next_pgno, link_page; + + /* + * We need to go back to the first page in the chain and + * look for this page so that we can update the previous + * page's NEXT_PGNO field. + */ + to_find = ADDR(pagep); + empty_page = pagep; + link_page = NEXT_PGNO(empty_page); + pagep = __get_page(hashp, item_info->bucket, A_BUCKET); + if (!pagep) + return (-1); + while (NEXT_PGNO(pagep) != to_find) { + next_pgno = NEXT_PGNO(pagep); +#ifdef DEBUG + assert(next_pgno != INVALID_PGNO); +#endif + __put_page(hashp, pagep, A_RAW, 0); + pagep = __get_page(hashp, next_pgno, A_RAW); + if (!pagep) + return (-1); + } + + /* + * At this point, pagep should point to the page before the + * page to be deleted. + */ + NEXT_PGNO(pagep) = link_page; + if (item_info->pgno == to_find) { + item_info->pgno = ADDR(pagep); + item_info->pgndx = NUM_ENT(pagep); + item_info->seek_found_page = ADDR(pagep); + } + __delete_page(hashp, empty_page, A_OVFL); + } + __put_page(hashp, pagep, A_RAW, 1); + + return (0); +} + +extern int32_t +__split_page(hashp, obucket, nbucket) + HTAB *hashp; + u_int32_t obucket, nbucket; +{ + DBT key, val; + ITEM_INFO old_ii, new_ii; + PAGE16 *old_pagep, *temp_pagep; + db_pgno_t next_pgno; + int32_t off; + u_int16_t n; + int8_t base_page; + + off = hashp->hdr.bsize; + old_pagep = __get_page(hashp, obucket, A_BUCKET); + + base_page = 1; + + temp_pagep = hashp->split_buf; + memcpy(temp_pagep, old_pagep, hashp->hdr.bsize); + + page_init(hashp, old_pagep, ADDR(old_pagep), HASH_PAGE); + __put_page(hashp, old_pagep, A_RAW, 1); + + old_ii.pgno = BUCKET_TO_PAGE(obucket); + new_ii.pgno = BUCKET_TO_PAGE(nbucket); + old_ii.bucket = obucket; + new_ii.bucket = nbucket; + old_ii.seek_found_page = new_ii.seek_found_page = 0; + + while (temp_pagep != 0) { + off = hashp->hdr.bsize; + for (n = 0; n < NUM_ENT(temp_pagep); n++) { + if (KEY_OFF(temp_pagep, n) == BIGPAIR) { + __get_bigkey(hashp, temp_pagep, n, &key); + if (__call_hash(hashp, + key.data, key.size) == obucket) + add_bigptr(hashp, &old_ii, + DATA_OFF(temp_pagep, n)); + else + add_bigptr(hashp, &new_ii, + DATA_OFF(temp_pagep, n)); + } else { + key.size = off - KEY_OFF(temp_pagep, n); + key.data = KEY(temp_pagep, n); + off = KEY_OFF(temp_pagep, n); + val.size = off - DATA_OFF(temp_pagep, n); + val.data = DATA(temp_pagep, n); + if (__call_hash(hashp, + key.data, key.size) == obucket) + __addel(hashp, &old_ii, &key, &val, + NO_EXPAND, 1); + else + __addel(hashp, &new_ii, &key, &val, + NO_EXPAND, 1); + off = DATA_OFF(temp_pagep, n); + } + } + next_pgno = NEXT_PGNO(temp_pagep); + + /* Clear temp_page; if it's an overflow page, free it. */ + if (!base_page) + __delete_page(hashp, temp_pagep, A_OVFL); + else + base_page = 0; + if (next_pgno != INVALID_PGNO) + temp_pagep = __get_page(hashp, next_pgno, A_RAW); + else + break; + } + return (0); +} + +/* + * Add the given pair to the page. + * + * + * Returns: + * 0 ==> OK + * -1 ==> failure + */ +extern int32_t +#ifdef __STDC__ +__addel(HTAB *hashp, ITEM_INFO *item_info, const DBT *key, const DBT *val, + u_int32_t num_items, const u_int8_t expanding) +#else +__addel(hashp, item_info, key, val, num_items, expanding) + HTAB *hashp; + ITEM_INFO *item_info; + const DBT *key, *val; + u_int32_t num_items; + const u_int32_t expanding; +#endif +{ + PAGE16 *pagep; + int32_t do_expand; + db_pgno_t next_pgno; + + do_expand = 0; + + pagep = __get_page(hashp, + item_info->seek_found_page != 0 ? + item_info->seek_found_page : item_info->pgno, A_RAW); + if (!pagep) + return (-1); + + /* Advance to first page in chain with room for item. */ + while (NUM_ENT(pagep) && NEXT_PGNO(pagep) != INVALID_PGNO) { + /* + * This may not be the end of the chain, but the pair may fit + * anyway. + */ + if (ISBIG(PAIRSIZE(key, val), hashp) && BIGPAIRFITS(pagep)) + break; + if (PAIRFITS(pagep, key, val)) + break; + next_pgno = NEXT_PGNO(pagep); + __put_page(hashp, pagep, A_RAW, 0); + pagep = (PAGE16 *)__get_page(hashp, next_pgno, A_RAW); + if (!pagep) + return (-1); + } + + if ((ISBIG(PAIRSIZE(key, val), hashp) && + !BIGPAIRFITS(pagep)) || + (!ISBIG(PAIRSIZE(key, val), hashp) && + !PAIRFITS(pagep, key, val))) { + do_expand = 1; + pagep = __add_ovflpage(hashp, pagep); + if (!pagep) + return (-1); + + if ((ISBIG(PAIRSIZE(key, val), hashp) && + !BIGPAIRFITS(pagep)) || + (!ISBIG(PAIRSIZE(key, val), hashp) && + !PAIRFITS(pagep, key, val))) { + __put_page(hashp, pagep, A_RAW, 0); + return (-1); + } + } + + /* At this point, we know the page fits, so we just add it */ + + if (ISBIG(PAIRSIZE(key, val), hashp)) { + if (__big_insert(hashp, pagep, key, val)) + return (-1); + } else { + putpair((PAGE8 *)pagep, key, val); + } + + /* + * For splits, we are going to update item_info's page number + * field, so that we can easily return to the same page the + * next time we come in here. For other operations, this shouldn't + * matter, since adds are the last thing that happens before we + * return to the user program. + */ + item_info->pgno = ADDR(pagep); + + if (!expanding) + hashp->hdr.nkeys++; + + /* Kludge: if this is a big page, then it's already been put. */ + if (!ISBIG(PAIRSIZE(key, val), hashp)) + __put_page(hashp, pagep, A_RAW, 1); + + if (expanding) + item_info->caused_expand = 0; + else + switch (num_items) { + case NO_EXPAND: + item_info->caused_expand = 0; + break; + case UNKNOWN: + item_info->caused_expand |= + (hashp->hdr.nkeys / hashp->hdr.max_bucket) > + hashp->hdr.ffactor || + item_info->pgndx > hashp->hdr.ffactor; + break; + default: + item_info->caused_expand = + num_items > hashp->hdr.ffactor ? 1 : do_expand; + break; + } + return (0); +} + +/* + * Special __addel used in big splitting; this one just puts the pointer + * to an already-allocated big page in the appropriate bucket. + */ +int32_t +#ifdef __STDC__ +add_bigptr(HTAB * hashp, ITEM_INFO * item_info, indx_t big_pgno) +#else +add_bigptr(hashp, item_info, big_pgno) + HTAB *hashp; + ITEM_INFO *item_info; + u_int32_t big_pgno; +#endif +{ + PAGE16 *pagep; + db_pgno_t next_pgno; + + pagep = __get_page(hashp, item_info->bucket, A_BUCKET); + if (!pagep) + return (-1); + + /* + * Note: in __addel(), we used item_info->pgno for the beginning of + * our search for space. Now, we use item_info->bucket, since we + * know that the space required by a big pair on the base page is + * quite small, and we may very well find that space early in the + * chain. + */ + + /* Find first page in chain that has space for a big pair. */ + while (NUM_ENT(pagep) && (NEXT_PGNO(pagep) != INVALID_PGNO)) { + if (BIGPAIRFITS(pagep)) + break; + next_pgno = NEXT_PGNO(pagep); + __put_page(hashp, pagep, A_RAW, 0); + pagep = __get_page(hashp, next_pgno, A_RAW); + if (!pagep) + return (-1); + } + if (!BIGPAIRFITS(pagep)) { + pagep = __add_ovflpage(hashp, pagep); + if (!pagep) + return (-1); +#ifdef DEBUG + assert(BIGPAIRFITS(pagep)); +#endif + } + KEY_OFF(pagep, NUM_ENT(pagep)) = BIGPAIR; + DATA_OFF(pagep, NUM_ENT(pagep)) = big_pgno; + NUM_ENT(pagep) = NUM_ENT(pagep) + 1; + + __put_page(hashp, pagep, A_RAW, 1); + + return (0); +} + +/* + * + * Returns: + * pointer on success + * NULL on error + */ +extern PAGE16 * +__add_ovflpage(hashp, pagep) + HTAB *hashp; + PAGE16 *pagep; +{ + PAGE16 *new_pagep; + u_int16_t ovfl_num; + + /* Check if we are dynamically determining the fill factor. */ + if (hashp->hdr.ffactor == DEF_FFACTOR) { + hashp->hdr.ffactor = NUM_ENT(pagep) >> 1; + if (hashp->hdr.ffactor < MIN_FFACTOR) + hashp->hdr.ffactor = MIN_FFACTOR; + } + ovfl_num = overflow_page(hashp); + + if (__new_page(hashp, (u_int32_t)ovfl_num, A_OVFL) != 0) + return (NULL); + + if (!ovfl_num || !(new_pagep = __get_page(hashp, ovfl_num, A_OVFL))) + return (NULL); + + NEXT_PGNO(pagep) = (db_pgno_t)OADDR_TO_PAGE(ovfl_num); + TYPE(new_pagep) = HASH_OVFLPAGE; + + __put_page(hashp, pagep, A_RAW, 1); + +#ifdef HASH_STATISTICS + hash_overflows++; +#endif + return (new_pagep); +} + +/* + * + * Returns: + * pointer on success + * NULL on error + */ +extern PAGE16 * +#ifdef __STDC__ +__add_bigpage(HTAB * hashp, PAGE16 * pagep, indx_t ndx, const u_int8_t + is_basepage) +#else +__add_bigpage(hashp, pagep, ndx, is_basepage) + HTAB *hashp; + PAGE16 *pagep; + u_int32_t ndx; + const u_int32_t is_basepage; +#endif +{ + PAGE16 *new_pagep; + u_int16_t ovfl_num; + + ovfl_num = overflow_page(hashp); + + if (__new_page(hashp, (u_int32_t)ovfl_num, A_OVFL) != 0) + return (NULL); + + if (!ovfl_num || !(new_pagep = __get_page(hashp, ovfl_num, A_OVFL))) + return (NULL); + + if (is_basepage) { + KEY_OFF(pagep, ndx) = BIGPAIR; + DATA_OFF(pagep, ndx) = (indx_t)ovfl_num; + } else + NEXT_PGNO(pagep) = ADDR(new_pagep); + + __put_page(hashp, pagep, A_RAW, 1); + + TYPE(new_pagep) = HASH_BIGPAGE; + +#ifdef HASH_STATISTICS + hash_bigpages++; +#endif + return (new_pagep); +} + +static void +#ifdef __STDC__ +page_init(HTAB * hashp, PAGE16 * pagep, db_pgno_t pgno, u_int8_t type) +#else +page_init(hashp, pagep, pgno, type) + HTAB *hashp; + PAGE16 *pagep; + db_pgno_t pgno; + u_int32_t type; +#endif +{ + NUM_ENT(pagep) = 0; + PREV_PGNO(pagep) = NEXT_PGNO(pagep) = INVALID_PGNO; + TYPE(pagep) = type; + OFFSET(pagep) = hashp->hdr.bsize - 1; + /* + * Note: since in the current version ADDR(pagep) == PREV_PGNO(pagep), + * make sure that ADDR(pagep) is set after resetting PREV_PGNO(pagep). + * We reset PREV_PGNO(pagep) just in case the macros are changed. + */ + ADDR(pagep) = pgno; + + return; +} + +int32_t +__new_page(hashp, addr, addr_type) + HTAB *hashp; + u_int32_t addr; + int32_t addr_type; +{ + db_pgno_t paddr; + PAGE16 *pagep; + + switch (addr_type) { /* Convert page number. */ + case A_BUCKET: + paddr = BUCKET_TO_PAGE(addr); + break; + case A_OVFL: + case A_BITMAP: + paddr = OADDR_TO_PAGE(addr); + break; + default: + paddr = addr; + break; + } + pagep = mpool_new(hashp->mp, &paddr, MPOOL_PAGE_REQUEST); + if (!pagep) + return (-1); +#if DEBUG_SLOW + account_page(hashp, paddr, 1); +#endif + + if (addr_type != A_BITMAP) + page_init(hashp, pagep, paddr, HASH_PAGE); + + __put_page(hashp, pagep, addr_type, 1); + + return (0); +} + +int32_t +__delete_page(hashp, pagep, page_type) + HTAB *hashp; + PAGE16 *pagep; + int32_t page_type; +{ + if (page_type == A_OVFL) + __free_ovflpage(hashp, pagep); + return (mpool_delete(hashp->mp, pagep)); +} + +u_int8_t +is_bitmap_pgno(hashp, pgno) + HTAB *hashp; + db_pgno_t pgno; +{ + int32_t i; + + for (i = 0; i < hashp->nmaps; i++) + if (OADDR_TO_PAGE(hashp->hdr.bitmaps[i]) == pgno) + return (1); + return (0); +} + +void +__pgin_routine(pg_cookie, pgno, page) + void *pg_cookie; + db_pgno_t pgno; + void *page; +{ + HTAB *hashp; + PAGE16 *pagep; + int32_t max, i; + + pagep = (PAGE16 *)page; + hashp = (HTAB *)pg_cookie; + + /* + * There are the following cases for swapping: + * 0) New page that may be unitialized. + * 1) Bucket page or overflow page. Either swap + * the header or initialize the page. + * 2) Bitmap page. Swap the whole page! + * 3) Header pages. Not handled here; these are written directly + * to the file. + */ + + if (NUM_ENT(pagep) == 0 && NEXT_PGNO(pagep) == 0 && + !is_bitmap_pgno(hashp, pgno)) { + /* XXX check for !0 LSN */ + page_init(hashp, pagep, pgno, HASH_PAGE); + return; + } + + if (hashp->hdr.lorder == DB_BYTE_ORDER) + return; + if (is_bitmap_pgno(hashp, pgno)) { + max = hashp->hdr.bsize >> 2; /* divide by 4 bytes */ + for (i = 0; i < max; i++) + M_32_SWAP(((int32_t *)pagep)[i]); + } else + swap_page_header_in(pagep); +} + +void +__pgout_routine(pg_cookie, pgno, page) + void *pg_cookie; + db_pgno_t pgno; + void *page; +{ + HTAB *hashp; + PAGE16 *pagep; + int32_t i, max; + + pagep = (PAGE16 *)page; + hashp = (HTAB *)pg_cookie; + + /* + * There are the following cases for swapping: + * 1) Bucket page or overflow page. Just swap the header. + * 2) Bitmap page. Swap the whole page! + * 3) Header pages. Not handled here; these are written directly + * to the file. + */ + + if (hashp->hdr.lorder == DB_BYTE_ORDER) + return; + if (is_bitmap_pgno(hashp, pgno)) { + max = hashp->hdr.bsize >> 2; /* divide by 4 bytes */ + for (i = 0; i < max; i++) + M_32_SWAP(((int32_t *)pagep)[i]); + } else + swap_page_header_out(pagep); +} + +/* + * + * Returns: + * 0 ==> OK + * -1 ==>failure + */ +extern int32_t +__put_page(hashp, pagep, addr_type, is_dirty) + HTAB *hashp; + PAGE16 *pagep; + int32_t addr_type, is_dirty; +{ +#if DEBUG_SLOW + account_page(hashp, + ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1); +#endif + + return (mpool_put(hashp->mp, pagep, (is_dirty ? MPOOL_DIRTY : 0))); +} + +/* + * Returns: + * 0 indicates SUCCESS + * -1 indicates FAILURE + */ +extern PAGE16 * +__get_page(hashp, addr, addr_type) + HTAB *hashp; + u_int32_t addr; + int32_t addr_type; +{ + PAGE16 *pagep; + db_pgno_t paddr; + + switch (addr_type) { /* Convert page number. */ + case A_BUCKET: + paddr = BUCKET_TO_PAGE(addr); + break; + case A_OVFL: + case A_BITMAP: + paddr = OADDR_TO_PAGE(addr); + break; + default: + paddr = addr; + break; + } + pagep = (PAGE16 *)mpool_get(hashp->mp, paddr, 0); + +#if DEBUG_SLOW + account_page(hashp, paddr, 1); +#endif +#ifdef DEBUG + assert(ADDR(pagep) == paddr || ADDR(pagep) == 0 || + addr_type == A_BITMAP || addr_type == A_HEADER); +#endif + + return (pagep); +} + +void +swap_page_header_in(pagep) + PAGE16 *pagep; +{ + u_int32_t i; + + /* can leave type and filler alone, since they're 1-byte quantities */ + + M_32_SWAP(PREV_PGNO(pagep)); + M_32_SWAP(NEXT_PGNO(pagep)); + M_16_SWAP(NUM_ENT(pagep)); + M_16_SWAP(OFFSET(pagep)); + + for (i = 0; i < NUM_ENT(pagep); i++) { + M_16_SWAP(KEY_OFF(pagep, i)); + M_16_SWAP(DATA_OFF(pagep, i)); + } +} + +void +swap_page_header_out(pagep) + PAGE16 *pagep; +{ + u_int32_t i; + + for (i = 0; i < NUM_ENT(pagep); i++) { + M_16_SWAP(KEY_OFF(pagep, i)); + M_16_SWAP(DATA_OFF(pagep, i)) + } + + /* can leave type and filler alone, since they're 1-byte quantities */ + + M_32_SWAP(PREV_PGNO(pagep)); + M_32_SWAP(NEXT_PGNO(pagep)); + M_16_SWAP(NUM_ENT(pagep)); + M_16_SWAP(OFFSET(pagep)); +} + +#define BYTE_MASK ((1 << INT32_T_BYTE_SHIFT) -1) +/* + * Initialize a new bitmap page. Bitmap pages are left in memory + * once they are read in. + */ +extern int32_t +__ibitmap(hashp, pnum, nbits, ndx) + HTAB *hashp; + int32_t pnum, nbits, ndx; +{ + u_int32_t *ip; + int32_t clearbytes, clearints; + + /* make a new bitmap page */ + if (__new_page(hashp, pnum, A_BITMAP) != 0) + return (1); + if (!(ip = (u_int32_t *)__get_page(hashp, pnum, A_BITMAP))) + return (1); + hashp->nmaps++; + clearints = ((nbits - 1) >> INT32_T_BYTE_SHIFT) + 1; + clearbytes = clearints << INT32_T_TO_BYTE; + (void)memset((int8_t *)ip, 0, clearbytes); + (void)memset((int8_t *)ip + clearbytes, + 0xFF, hashp->hdr.bsize - clearbytes); + ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); + SETBIT(ip, 0); + hashp->hdr.bitmaps[ndx] = (u_int16_t)pnum; + hashp->mapp[ndx] = ip; + return (0); +} + +static u_int32_t +first_free(map) + u_int32_t map; +{ + u_int32_t i, mask; + + for (mask = 0x1, i = 0; i < BITS_PER_MAP; i++) { + if (!(mask & map)) + return (i); + mask = mask << 1; + } + return (i); +} + +static u_int16_t +overflow_page(hashp) + HTAB *hashp; +{ + u_int32_t *freep; + int32_t bit, first_page, free_bit, free_page, i, in_use_bits, j; + int32_t max_free, offset, splitnum; + u_int16_t addr; +#ifdef DEBUG2 + int32_t tmp1, tmp2; +#endif + + splitnum = hashp->hdr.ovfl_point; + max_free = hashp->hdr.spares[splitnum]; + + free_page = (max_free - 1) >> (hashp->hdr.bshift + BYTE_SHIFT); + free_bit = (max_free - 1) & ((hashp->hdr.bsize << BYTE_SHIFT) - 1); + + /* + * Look through all the free maps to find the first free block. + * The compiler under -Wall will complain that freep may be used + * before being set, however, this loop will ALWAYS get executed + * at least once, so freep is guaranteed to be set. + */ + first_page = hashp->hdr.last_freed >> (hashp->hdr.bshift + BYTE_SHIFT); + for (i = first_page; i <= free_page; i++) { + if (!(freep = fetch_bitmap(hashp, i))) + return (0); + if (i == free_page) + in_use_bits = free_bit; + else + in_use_bits = (hashp->hdr.bsize << BYTE_SHIFT) - 1; + + if (i == first_page) { + bit = hashp->hdr.last_freed & + ((hashp->hdr.bsize << BYTE_SHIFT) - 1); + j = bit / BITS_PER_MAP; + bit = bit & ~(BITS_PER_MAP - 1); + } else { + bit = 0; + j = 0; + } + for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) + if (freep[j] != ALL_SET) + goto found; + } + + /* No Free Page Found */ + hashp->hdr.last_freed = hashp->hdr.spares[splitnum]; + hashp->hdr.spares[splitnum]++; + offset = hashp->hdr.spares[splitnum] - + (splitnum ? hashp->hdr.spares[splitnum - 1] : 0); + +#define OVMSG "HASH: Out of overflow pages. Increase page size\n" + + if (offset > SPLITMASK) { + if (++splitnum >= NCACHED) { + (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); + return (0); + } + hashp->hdr.ovfl_point = splitnum; + hashp->hdr.spares[splitnum] = hashp->hdr.spares[splitnum - 1]; + hashp->hdr.spares[splitnum - 1]--; + offset = 1; + } + /* Check if we need to allocate a new bitmap page. */ + if (free_bit == (hashp->hdr.bsize << BYTE_SHIFT) - 1) { + free_page++; + if (free_page >= NCACHED) { + (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); + return (0); + } + /* + * This is tricky. The 1 indicates that you want the new page + * allocated with 1 clear bit. Actually, you are going to + * allocate 2 pages from this map. The first is going to be + * the map page, the second is the overflow page we were + * looking for. The __ibitmap routine automatically, sets + * the first bit of itself to indicate that the bitmap itself + * is in use. We would explicitly set the second bit, but + * don't have to if we tell __ibitmap not to leave it clear + * in the first place. + */ + if (__ibitmap(hashp, + (int32_t)OADDR_OF(splitnum, offset), 1, free_page)) + return (0); + hashp->hdr.spares[splitnum]++; +#ifdef DEBUG2 + free_bit = 2; +#endif + offset++; + if (offset > SPLITMASK) { + if (++splitnum >= NCACHED) { + (void)write(STDERR_FILENO, + OVMSG, sizeof(OVMSG) - 1); + return (0); + } + hashp->hdr.ovfl_point = splitnum; + hashp->hdr.spares[splitnum] = + hashp->hdr.spares[splitnum - 1]; + hashp->hdr.spares[splitnum - 1]--; + offset = 0; + } + } else { + /* + * Free_bit addresses the last used bit. Bump it to address + * the first available bit. + */ + free_bit++; + SETBIT(freep, free_bit); + } + + /* Calculate address of the new overflow page */ + addr = OADDR_OF(splitnum, offset); +#ifdef DEBUG2 + (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", + addr, free_bit, free_page); +#endif + + if (OADDR_TO_PAGE(addr) > MAX_PAGES(hashp)) { + (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); + return (0); + } + return (addr); + +found: + bit = bit + first_free(freep[j]); + SETBIT(freep, bit); +#ifdef DEBUG2 + tmp1 = bit; + tmp2 = i; +#endif + /* + * Bits are addressed starting with 0, but overflow pages are addressed + * beginning at 1. Bit is a bit address number, so we need to increment + * it to convert it to a page number. + */ + bit = 1 + bit + (i * (hashp->hdr.bsize << BYTE_SHIFT)); + if (bit >= hashp->hdr.last_freed) + hashp->hdr.last_freed = bit - 1; + + /* Calculate the split number for this page */ + for (i = 0; i < splitnum && (bit > hashp->hdr.spares[i]); i++); + offset = (i ? bit - hashp->hdr.spares[i - 1] : bit); + if (offset >= SPLITMASK) + return (0); /* Out of overflow pages */ + addr = OADDR_OF(i, offset); +#ifdef DEBUG2 + (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", + addr, tmp1, tmp2); +#endif + + if (OADDR_TO_PAGE(addr) > MAX_PAGES(hashp)) { + (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); + return (0); + } + /* Allocate and return the overflow page */ + return (addr); +} + +#ifdef DEBUG +int +bucket_to_page(hashp, n) + HTAB *hashp; + int n; +{ + int ret_val; + + ret_val = n + hashp->hdr.hdrpages; + if (n != 0) + ret_val += hashp->hdr.spares[__log2(n + 1) - 1]; + return (ret_val); +} + +int32_t +oaddr_to_page(hashp, n) + HTAB *hashp; + int n; +{ + int ret_val, temp; + + temp = (1 << SPLITNUM(n)) - 1; + ret_val = bucket_to_page(hashp, temp); + ret_val += (n & SPLITMASK); + + return (ret_val); +} +#endif /* DEBUG */ + +indx_t +page_to_oaddr(hashp, pgno) + HTAB *hashp; + db_pgno_t pgno; +{ + int32_t sp, ret_val; + + /* + * To convert page number to overflow address: + * + * 1. Find a starting split point -- use 0 since there are only + * 32 split points. + * 2. Find the split point s.t. 2^sp + hdr.spares[sp] < pgno and + * 2^(sp+1) = hdr.spares[sp+1] > pgno. The overflow address will + * be located at sp. + * 3. return... + */ + pgno -= hashp->hdr.hdrpages; + for (sp = 0; sp < NCACHED; sp++) + if (POW2(sp) + hashp->hdr.spares[sp] < pgno && + (POW2(sp + 1) + hashp->hdr.spares[sp + 1]) > pgno) + break; + + ret_val = OADDR_OF(sp + 1, + pgno - ((POW2(sp + 1) - 1) + hashp->hdr.spares[sp])); +#ifdef DEBUG + assert(OADDR_TO_PAGE(ret_val) == (pgno + hashp->hdr.hdrpages)); +#endif + return (ret_val); +} + +/* + * Mark this overflow page as free. + */ +extern void +__free_ovflpage(hashp, pagep) + HTAB *hashp; + PAGE16 *pagep; +{ + u_int32_t *freep; + int32_t bit_address, free_page, free_bit; + u_int16_t addr, ndx; + + addr = page_to_oaddr(hashp, ADDR(pagep)); + +#ifdef DEBUG2 + (void)fprintf(stderr, "Freeing %d\n", addr); +#endif + ndx = ((u_int16_t)addr) >> SPLITSHIFT; + bit_address = + (ndx ? hashp->hdr.spares[ndx - 1] : 0) + (addr & SPLITMASK) - 1; + if (bit_address < hashp->hdr.last_freed) + hashp->hdr.last_freed = bit_address; + free_page = (bit_address >> (hashp->hdr.bshift + BYTE_SHIFT)); + free_bit = bit_address & ((hashp->hdr.bsize << BYTE_SHIFT) - 1); + + freep = fetch_bitmap(hashp, free_page); +#ifdef DEBUG + /* + * This had better never happen. It means we tried to read a bitmap + * that has already had overflow pages allocated off it, and we + * failed to read it from the file. + */ + if (!freep) + assert(0); +#endif + CLRBIT(freep, free_bit); +#ifdef DEBUG2 + (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", + obufp->addr, free_bit, free_page); +#endif +} + +static u_int32_t * +fetch_bitmap(hashp, ndx) + HTAB *hashp; + int32_t ndx; +{ + if (ndx >= hashp->nmaps) + return (NULL); + if (!hashp->mapp[ndx]) + hashp->mapp[ndx] = (u_int32_t *)__get_page(hashp, + hashp->hdr.bitmaps[ndx], A_BITMAP); + + return (hashp->mapp[ndx]); +} + +#ifdef DEBUG_SLOW +static void +account_page(hashp, pgno, inout) + HTAB *hashp; + db_pgno_t pgno; + int inout; +{ + static struct { + db_pgno_t pgno; + int times; + } list[100]; + static int last; + int i, j; + + if (inout == -1) /* XXX: Kluge */ + inout = 0; + + /* Find page in list. */ + for (i = 0; i < last; i++) + if (list[i].pgno == pgno) + break; + /* Not found. */ + if (i == last) { + list[last].times = inout; + list[last].pgno = pgno; + last++; + } + list[i].times = inout; + if (list[i].times == 0) { + for (j = i; j < last; j++) + list[j] = list[j + 1]; + last--; + } + for (i = 0; i < last; i++, list[i].times++) + if (list[i].times > 20 && !is_bitmap_pgno(hashp, list[i].pgno)) + (void)fprintf(stderr, + "Warning: pg %d has been out for %d times\n", + list[i].pgno, list[i].times); +} +#endif /* DEBUG_SLOW */ diff --git a/src/util/db2/hash/hsearch.c b/src/util/db2/hash/hsearch.c new file mode 100644 index 0000000000..f257cd6eaa --- /dev/null +++ b/src/util/db2/hash/hsearch.c @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hsearch.c 8.5 (Berkeley) 9/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <fcntl.h> +#include <string.h> + +#include "db-int.h" +#include "search.h" + +static DB *dbp = NULL; +static ENTRY retval; + +extern int +hcreate(nel) + u_int nel; +{ + HASHINFO info; + + info.nelem = nel; + info.bsize = 256; + info.ffactor = 8; + info.cachesize = 0; + info.hash = NULL; + info.lorder = 0; + dbp = (DB *)__hash_open(NULL, O_CREAT | O_RDWR, 0600, &info, 0); + return (dbp != NULL); +} + +extern ENTRY * +hsearch(item, action) + ENTRY item; + ACTION action; +{ + DBT key, val; + int status; + + if (!dbp) + return (NULL); + key.data = (u_char *)item.key; + key.size = strlen(item.key) + 1; + + if (action == ENTER) { + val.data = (u_char *)item.data; + val.size = strlen(item.data) + 1; + status = (dbp->put)(dbp, &key, &val, R_NOOVERWRITE); + if (status) + return (NULL); + } else { + /* FIND */ + status = (dbp->get)(dbp, &key, &val, 0); + if (status) + return (NULL); + else + item.data = (char *)val.data; + } + retval.key = item.key; + retval.data = item.data; + return (&retval); +} + +extern void +hdestroy() +{ + if (dbp) { + (void)(dbp->close)(dbp); + dbp = NULL; + } +} diff --git a/src/util/db2/hash/page.h b/src/util/db2/hash/page.h new file mode 100644 index 0000000000..8ef8a2e294 --- /dev/null +++ b/src/util/db2/hash/page.h @@ -0,0 +1,178 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)page.h 8.4 (Berkeley) 11/7/95 + */ + +#define HI_MASK 0xFFFF0000 +#define LO_MASK (~HI_MASK) + +#define HI(N) ((u_int16_t)(((N) & HI_MASK) >> 16)) +#define LO(N) ((u_int16_t)((N) & LO_MASK)) + +/* Constants for big key page overhead information. */ +#define NUMSHORTS 0 +#define KEYLEN 1 +#define DATALEN 2 +#define NEXTPAGE 3 + +/* + * Hash pages store meta-data beginning at the top of the page (offset 0) + * and key/data values beginning at the bottom of the page (offset pagesize). + * Fields are always accessed via macros so that we can change the page + * format without too much pain. The only changes that will require massive + * code changes are if we no longer store key/data offsets next to each + * other (since we use that fact to compute key lengths). In the accessor + * macros below, P means a pointer to the page, I means an index of the + * particular entry being accessed. + * + * Hash base page format + * BYTE ITEM NBYTES TYPE ACCESSOR MACRO + * ---- ------------------ ------ -------- -------------- + * 0 previous page number 4 db_pgno_t PREV_PGNO(P) + * 4 next page number 4 db_pgno_t NEXT_PGNO(P) + * 8 # pairs on page 2 indx_t NUM_ENT(P) + * 10 page type 1 u_int8_t TYPE(P) + * 11 padding 1 u_int8_t none + * 12 highest free byte 2 indx_t OFFSET(P) + * 14 key offset 0 2 indx_t KEY_OFF(P, I) + * 16 data offset 0 2 indx_t DATA_OFF(P, I) + * 18 key offset 1 2 indx_t KEY_OFF(P, I) + * 20 data offset 1 2 indx_t DATA_OFF(P, I) + * ...etc... + */ + +/* Indices (in bytes) of the beginning of each of these entries */ +#define I_PREV_PGNO 0 +#define I_NEXT_PGNO 4 +#define I_ENTRIES 8 +#define I_TYPE 10 +#define I_HF_OFFSET 12 + +/* Overhead is everything prior to the first key/data pair. */ +#define PAGE_OVERHEAD (I_HF_OFFSET + sizeof(indx_t)) + +/* To allocate a pair, we need room for one key offset and one data offset. */ +#define PAIR_OVERHEAD ((sizeof(indx_t) << 1)) + +/* Use this macro to extract a value of type T from page P at offset O. */ +#define REFERENCE(P, T, O) (((T *)((u_int8_t *)(P) + O))[0]) + +/* + * Use these macros to access fields on a page; P is a PAGE16 *. + */ +#define NUM_ENT(P) (REFERENCE((P), indx_t, I_ENTRIES)) +#define PREV_PGNO(P) (REFERENCE((P), db_pgno_t, I_PREV_PGNO)) +#define NEXT_PGNO(P) (REFERENCE((P), db_pgno_t, I_NEXT_PGNO)) +#define TYPE(P) (REFERENCE((P), u_int8_t, I_TYPE)) +#define OFFSET(P) (REFERENCE((P), indx_t, I_HF_OFFSET)) +/* + * We need to store a page's own address on each page (unlike the Btree + * access method which needs the previous page). We use the PREV_PGNO + * field to store our own page number. + */ +#define ADDR(P) (PREV_PGNO((P))) + +/* Extract key/data offsets and data for a given index. */ +#define DATA_OFF(P, N) \ + REFERENCE(P, indx_t, PAGE_OVERHEAD + N * PAIR_OVERHEAD + sizeof(indx_t)) +#define KEY_OFF(P, N) \ + REFERENCE(P, indx_t, PAGE_OVERHEAD + N * PAIR_OVERHEAD) + +#define KEY(P, N) (((PAGE8 *)(P)) + KEY_OFF((P), (N))) +#define DATA(P, N) (((PAGE8 *)(P)) + DATA_OFF((P), (N))) + +/* + * Macros used to compute various sizes on a page. + */ +#define PAIRSIZE(K, D) (PAIR_OVERHEAD + (K)->size + (D)->size) +#define BIGOVERHEAD (4 * sizeof(u_int16_t)) +#define KEYSIZE(K) (4 * sizeof(u_int16_t) + (K)->size); +#define OVFLSIZE (2 * sizeof(u_int16_t)) +#define BIGPAGEOVERHEAD (4 * sizeof(u_int16_t)) +#define BIGPAGEOFFSET 4 +#define BIGPAGESIZE(P) ((P)->BSIZE - BIGPAGEOVERHEAD) + +#define PAGE_META(N) (((N) + 3) * sizeof(u_int16_t)) +#define MINFILL 0.75 +#define ISBIG(N, P) (((N) > ((P)->hdr.bsize * MINFILL)) ? 1 : 0) + +#define ITEMSIZE(I) (sizeof(u_int16_t) + (I)->size) + +/* + * Big key/data pages use a different page format. They have a single + * key/data "pair" containing the length of the key and data instead + * of offsets. + */ +#define BIGKEYLEN(P) (KEY_OFF((P), 0)) +#define BIGDATALEN(P) (DATA_OFF((P), 0)) +#define BIGKEY(P) (((PAGE8 *)(P)) + PAGE_OVERHEAD + PAIR_OVERHEAD) +#define BIGDATA(P) \ + (((PAGE8 *)(P)) + PAGE_OVERHEAD + PAIR_OVERHEAD + KEY_OFF((P), 0)) + + +#define OVFLPAGE 0 +#define BIGPAIR 0 +#define INVALID_PGNO 0xFFFFFFFF + +typedef unsigned short PAGE16; +typedef unsigned char PAGE8; + +#define A_BUCKET 0 +#define A_OVFL 1 +#define A_BITMAP 2 +#define A_RAW 4 +#define A_HEADER 5 + +#define PAIRFITS(P,K,D) ((PAIRSIZE((K),(D))) <= FREESPACE((P))) +#define BIGPAIRFITS(P) ((FREESPACE((P)) >= PAIR_OVERHEAD)) +/* + * Since these are all unsigned, we need to guarantee that we never go + * negative. Offset values are 0-based and overheads are one based (i.e. + * one byte of overhead is 1, not 0), so we need to convert OFFSETs to + * 1-based counting before subtraction. + */ +#define FREESPACE(P) \ + ((OFFSET((P)) + 1 - PAGE_OVERHEAD - (NUM_ENT((P)) * PAIR_OVERHEAD))) + +/* + * Overhead on header pages is just one word -- the length of the + * header info stored on that page. + */ +#define HEADER_OVERHEAD 4 + +#define HASH_PAGE 2 +#define HASH_BIGPAGE 3 +#define HASH_OVFLPAGE 4 diff --git a/src/util/db2/hash/page.h.patch b/src/util/db2/hash/page.h.patch new file mode 100644 index 0000000000..4a0311fea3 --- /dev/null +++ b/src/util/db2/hash/page.h.patch @@ -0,0 +1,42 @@ +*** /tmp/,RCSt1a21720 Wed Apr 3 11:49:55 1996 +--- page.h Wed Apr 3 08:42:25 1996 +*************** +*** 158,163 + + #define PAIRFITS(P,K,D) ((PAIRSIZE((K),(D))) <= FREESPACE((P))) + #define BIGPAIRFITS(P) ((FREESPACE((P)) >= PAIR_OVERHEAD)) + #define FREESPACE(P) \ + ((OFFSET((P)) - PAGE_OVERHEAD - (NUM_ENT((P)) * PAIR_OVERHEAD))) + + +--- 158,169 ----- + + #define PAIRFITS(P,K,D) ((PAIRSIZE((K),(D))) <= FREESPACE((P))) + #define BIGPAIRFITS(P) ((FREESPACE((P)) >= PAIR_OVERHEAD)) ++ /* ++ * Since these are all unsigned, we need to guarantee that we never go ++ * negative. Offset values are 0-based and overheads are one based (i.e. ++ * one byte of overhead is 1, not 0), so we need to convert OFFSETs to ++ * 1-based counting before subtraction. ++ */ + #define FREESPACE(P) \ + ((OFFSET((P)) + 1 - PAGE_OVERHEAD - (NUM_ENT((P)) * PAIR_OVERHEAD))) + +*************** +*** 159,165 + #define PAIRFITS(P,K,D) ((PAIRSIZE((K),(D))) <= FREESPACE((P))) + #define BIGPAIRFITS(P) ((FREESPACE((P)) >= PAIR_OVERHEAD)) + #define FREESPACE(P) \ +! ((OFFSET((P)) - PAGE_OVERHEAD - (NUM_ENT((P)) * PAIR_OVERHEAD))) + + /* + * Overhead on header pages is just one word -- the length of the + +--- 165,171 ----- + * 1-based counting before subtraction. + */ + #define FREESPACE(P) \ +! ((OFFSET((P)) + 1 - PAGE_OVERHEAD - (NUM_ENT((P)) * PAIR_OVERHEAD))) + + /* + * Overhead on header pages is just one word -- the length of the diff --git a/src/util/db2/hash/search.h b/src/util/db2/hash/search.h new file mode 100644 index 0000000000..4d3b9143e7 --- /dev/null +++ b/src/util/db2/hash/search.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)search.h 8.1 (Berkeley) 6/4/93 + */ + +/* Backward compatibility to hsearch interface. */ +typedef struct entry { + char *key; + char *data; +} ENTRY; + +typedef enum { + FIND, ENTER +} ACTION; + +int hcreate __P((unsigned int)); +void hdestroy __P((void)); +ENTRY *hsearch __P((ENTRY, ACTION)); diff --git a/src/util/db2/include/db-int.h b/src/util/db2/include/db-int.h new file mode 100644 index 0000000000..78be4c8a36 --- /dev/null +++ b/src/util/db2/include/db-int.h @@ -0,0 +1,208 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)compat.h 8.13 (Berkeley) 2/21/94 + */ + +#ifndef _DB_INT_H_ +#define _DB_INT_H_ + +#include "db.h" + +/* deal with autoconf-based stuff (db.h includes db-config.h) */ + +#ifndef HAVE_MEMMOVE +#define memmove my_memmove +#endif + +#ifndef HAVE_MKSTEMP +#define mkstemp my_mkstemp +#endif + +#ifndef HAVE_STRERROR +#define strerror my_strerror +#endif + +#define DB_LITTLE_ENDIAN 1234 +#define DB_BIG_ENDIAN 4321 + +#ifdef WORDS_BIGENDIAN +#define DB_BYTE_ORDER DB_BIG_ENDIAN +#else +#define DB_BYTE_ORDER DB_LITTLE_ENDIAN +#endif + +/* end autoconf-based stuff */ + +/* include necessary system header files */ + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <limits.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> + +/* types and constants used for database structure */ + +#define MAX_PAGE_NUMBER 0xffffffff /* >= # of pages in a file */ +typedef u_int32_t db_pgno_t; +#define MAX_PAGE_OFFSET 65535 /* >= # of bytes in a page */ +typedef u_int16_t indx_t; +#define MAX_REC_NUMBER 0xffffffff /* >= # of records in a tree */ +typedef u_int32_t recno_t; + +/* + * Little endian <==> big endian 32-bit swap macros. + * M_32_SWAP swap a memory location + * P_32_SWAP swap a referenced memory location + * P_32_COPY swap from one location to another + */ +#define M_32_SWAP(a) { \ + u_int32_t _tmp = a; \ + ((char *)&a)[0] = ((char *)&_tmp)[3]; \ + ((char *)&a)[1] = ((char *)&_tmp)[2]; \ + ((char *)&a)[2] = ((char *)&_tmp)[1]; \ + ((char *)&a)[3] = ((char *)&_tmp)[0]; \ +} +#define P_32_SWAP(a) { \ + u_int32_t _tmp = *(u_int32_t *)a; \ + ((char *)a)[0] = ((char *)&_tmp)[3]; \ + ((char *)a)[1] = ((char *)&_tmp)[2]; \ + ((char *)a)[2] = ((char *)&_tmp)[1]; \ + ((char *)a)[3] = ((char *)&_tmp)[0]; \ +} +#define P_32_COPY(a, b) { \ + ((char *)&(b))[0] = ((char *)&(a))[3]; \ + ((char *)&(b))[1] = ((char *)&(a))[2]; \ + ((char *)&(b))[2] = ((char *)&(a))[1]; \ + ((char *)&(b))[3] = ((char *)&(a))[0]; \ +} + +/* + * Little endian <==> big endian 16-bit swap macros. + * M_16_SWAP swap a memory location + * P_16_SWAP swap a referenced memory location + * P_16_COPY swap from one location to another + */ +#define M_16_SWAP(a) { \ + u_int16_t _tmp = a; \ + ((char *)&a)[0] = ((char *)&_tmp)[1]; \ + ((char *)&a)[1] = ((char *)&_tmp)[0]; \ +} +#define P_16_SWAP(a) { \ + u_int16_t _tmp = *(u_int16_t *)a; \ + ((char *)a)[0] = ((char *)&_tmp)[1]; \ + ((char *)a)[1] = ((char *)&_tmp)[0]; \ +} +#define P_16_COPY(a, b) { \ + ((char *)&(b))[0] = ((char *)&(a))[1]; \ + ((char *)&(b))[1] = ((char *)&(a))[0]; \ +} + +/* open functions for each database type, used in dbopen() */ + +DB *__bt_open __P((const char *, int, int, const BTREEINFO *, int)); +DB *__hash_open __P((const char *, int, int, const HASHINFO *, int)); +DB *__rec_open __P((const char *, int, int, const RECNOINFO *, int)); +void __dbpanic __P((DB *dbp)); + +/* + * There is no portable way to figure out the maximum value of a file + * offset, so we put it here. + */ +#ifdef OFF_T_MAX +#define DB_OFF_T_MAX OFF_T_MAX +#else +#define DB_OFF_T_MAX LONG_MAX +#endif + +#ifndef O_ACCMODE /* POSIX 1003.1 access mode mask. */ +#define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR) +#endif + +/* + * If you can't provide lock values in the open(2) call. Note, this + * allows races to happen. + */ +#ifndef O_EXLOCK /* 4.4BSD extension. */ +#define O_EXLOCK 0 +#endif + +#ifndef O_SHLOCK /* 4.4BSD extension. */ +#define O_SHLOCK 0 +#endif + +#ifndef EFTYPE +#define EFTYPE EINVAL /* POSIX 1003.1 format errno. */ +#endif + +#ifndef STDERR_FILENO +#define STDIN_FILENO 0 /* ANSI C #defines */ +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 +#endif + +#ifndef SEEK_END +#define SEEK_SET 0 /* POSIX 1003.1 seek values */ +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +#ifndef NULL /* ANSI C #defines NULL everywhere. */ +#define NULL 0 +#endif + +#ifndef MAX /* Usually found in <sys/param.h>. */ +#define MAX(_a,_b) ((_a)<(_b)?(_b):(_a)) +#endif +#ifndef MIN /* Usually found in <sys/param.h>. */ +#define MIN(_a,_b) ((_a)<(_b)?(_a):(_b)) +#endif + +#ifndef S_ISDIR /* POSIX 1003.1 file type tests. */ +#define S_ISDIR(m) ((m & 0170000) == 0040000) /* directory */ +#define S_ISCHR(m) ((m & 0170000) == 0020000) /* char special */ +#define S_ISBLK(m) ((m & 0170000) == 0060000) /* block special */ +#define S_ISREG(m) ((m & 0170000) == 0100000) /* regular file */ +#define S_ISFIFO(m) ((m & 0170000) == 0010000) /* fifo */ +#endif +#ifndef S_ISLNK /* BSD POSIX 1003.1 extensions */ +#define S_ISLNK(m) ((m & 0170000) == 0120000) /* symbolic link */ +#define S_ISSOCK(m) ((m & 0170000) == 0140000) /* socket */ +#endif + +#endif /* _DB_INT_H_ */ diff --git a/src/util/db2/include/db-ndbm.h b/src/util/db2/include/db-ndbm.h new file mode 100644 index 0000000000..1bafb6c4bf --- /dev/null +++ b/src/util/db2/include/db-ndbm.h @@ -0,0 +1,77 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ndbm.h 8.1 (Berkeley) 6/2/93 + */ + +#ifndef _NDBM_H_ +#define _NDBM_H_ + +#include "db.h" + +/* Map dbm interface onto db(3). */ +#define DBM_RDONLY O_RDONLY + +/* Flags to dbm_store(). */ +#define DBM_INSERT 0 +#define DBM_REPLACE 1 + +/* + * The db(3) support for ndbm(3) always appends this suffix to the + * file name to avoid overwriting the user's original database. + */ +#define DBM_SUFFIX ".db" + +typedef struct { + char *dptr; + int dsize; +} datum; + +typedef DB DBM; +#define dbm_pagfno(a) DBM_PAGFNO_NOT_AVAILABLE + +__BEGIN_DECLS +void dbm_close __P((DBM *)); +int dbm_delete __P((DBM *, datum)); +datum dbm_fetch __P((DBM *, datum)); +datum dbm_firstkey __P((DBM *)); +long dbm_forder __P((DBM *, datum)); +datum dbm_nextkey __P((DBM *)); +DBM *dbm_open __P((const char *, int, int)); +int dbm_store __P((DBM *, datum, datum, int)); +int dbm_dirfno __P((DBM *)); +__END_DECLS + +#endif /* !_NDBM_H_ */ diff --git a/src/util/db2/include/db-queue.h b/src/util/db2/include/db-queue.h new file mode 100644 index 0000000000..40d32ccb6e --- /dev/null +++ b/src/util/db2/include/db-queue.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.3 (Berkeley) 12/13/93 + */ + +#ifndef _QUEUE_H_ +#define _QUEUE_H_ + +/* + * This file defines three types of data structures: lists, tail queues, + * and circular queues. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list after + * an existing element or at the head of the list. A list may only be + * traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list after + * an existing element, at the head of the list, or at the end of the + * list. A tail queue may only be traversed in the forward direction. + * + * A circle queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the list. + * A circle queue may be traversed in either direction, but has a more + * complex end of list detection. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +/* + * List definitions. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ +#define LIST_INIT(head) { \ + (head)->lh_first = NULL; \ +} + +#define LIST_INSERT_AFTER(listelm, elm, field) { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} + +#define LIST_INSERT_HEAD(head, elm, field) { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} + +#define LIST_REMOVE(elm, field) { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} + +/* + * Tail queue definitions. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ +} + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ +} + +/* + * Tail queue functions. + */ +#define TAILQ_INIT(head) { \ + (head)->tqh_first = NULL; \ + (head)->tqh_last = &(head)->tqh_first; \ +} + +#define TAILQ_INSERT_HEAD(head, elm, field) { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} + +#define TAILQ_INSERT_TAIL(head, elm, field) { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &(elm)->field.tqe_next; \ +} + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ +} + +#define TAILQ_REMOVE(head, elm, field) { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ +} + +/* + * Circular queue definitions. + */ +#define CIRCLEQ_HEAD(name, type) \ +struct name { \ + struct type *cqh_first; /* first element */ \ + struct type *cqh_last; /* last element */ \ +} + +#define CIRCLEQ_ENTRY(type) \ +struct { \ + struct type *cqe_next; /* next element */ \ + struct type *cqe_prev; /* previous element */ \ +} + +/* + * Circular queue functions. + */ +#define CIRCLEQ_INIT(head) { \ + (head)->cqh_first = (void *)(head); \ + (head)->cqh_last = (void *)(head); \ +} + +#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) { \ + (elm)->field.cqe_next = (listelm)->field.cqe_next; \ + (elm)->field.cqe_prev = (listelm); \ + if ((listelm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (listelm)->field.cqe_next->field.cqe_prev = (elm); \ + (listelm)->field.cqe_next = (elm); \ +} + +#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) { \ + (elm)->field.cqe_next = (listelm); \ + (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ + if ((listelm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (listelm)->field.cqe_prev->field.cqe_next = (elm); \ + (listelm)->field.cqe_prev = (elm); \ +} + +#define CIRCLEQ_INSERT_HEAD(head, elm, field) { \ + (elm)->field.cqe_next = (head)->cqh_first; \ + (elm)->field.cqe_prev = (void *)(head); \ + if ((head)->cqh_last == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (head)->cqh_first->field.cqe_prev = (elm); \ + (head)->cqh_first = (elm); \ +} + +#define CIRCLEQ_INSERT_TAIL(head, elm, field) { \ + (elm)->field.cqe_next = (void *)(head); \ + (elm)->field.cqe_prev = (head)->cqh_last; \ + if ((head)->cqh_first == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (head)->cqh_last->field.cqe_next = (elm); \ + (head)->cqh_last = (elm); \ +} + +#define CIRCLEQ_REMOVE(head, elm, field) { \ + if ((elm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm)->field.cqe_prev; \ + else \ + (elm)->field.cqe_next->field.cqe_prev = \ + (elm)->field.cqe_prev; \ + if ((elm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm)->field.cqe_next; \ + else \ + (elm)->field.cqe_prev->field.cqe_next = \ + (elm)->field.cqe_next; \ +} +#endif /* !_QUEUE_H_ */ diff --git a/src/util/db2/include/db.h b/src/util/db2/include/db.h new file mode 100644 index 0000000000..6b5c04e6b9 --- /dev/null +++ b/src/util/db2/include/db.h @@ -0,0 +1,170 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)db.h 8.8 (Berkeley) 11/2/95 + */ + +#ifndef _DB_H_ +#define _DB_H_ + +#include <db-config.h> + +#include <sys/types.h> + +#define RET_ERROR -1 /* Return values. */ +#define RET_SUCCESS 0 +#define RET_SPECIAL 1 + +/* Key/data structure -- a Data-Base Thang. */ +typedef struct { + void *data; /* data */ + size_t size; /* data length */ +} DBT; + +/* Routine flags. */ +#define R_CURSOR 1 /* del, put, seq */ +#define __R_UNUSED 2 /* UNUSED */ +#define R_FIRST 3 /* seq */ +#define R_IAFTER 4 /* put (RECNO) */ +#define R_IBEFORE 5 /* put (RECNO) */ +#define R_LAST 6 /* seq (BTREE, RECNO) */ +#define R_NEXT 7 /* seq */ +#define R_NOOVERWRITE 8 /* put */ +#define R_PREV 9 /* seq (BTREE, RECNO) */ +#define R_SETCURSOR 10 /* put (RECNO) */ +#define R_RECNOSYNC 11 /* sync (RECNO) */ + +typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; + +/* + * !!! + * The following flags are included in the dbopen(3) call as part of the + * open(2) flags. In order to avoid conflicts with the open flags, start + * at the top of the 16 or 32-bit number space and work our way down. If + * the open flags were significantly expanded in the future, it could be + * a problem. Wish I'd left another flags word in the dbopen call. + * + * !!! + * None of this stuff is implemented yet. The only reason that it's here + * is so that the access methods can skip copying the key/data pair when + * the DB_LOCK flag isn't set. + */ +#if UINT_MAX > 65535 +#define DB_LOCK 0x20000000 /* Do locking. */ +#define DB_SHMEM 0x40000000 /* Use shared memory. */ +#define DB_TXN 0x80000000 /* Do transactions. */ +#else +#define DB_LOCK 0x2000 /* Do locking. */ +#define DB_SHMEM 0x4000 /* Use shared memory. */ +#define DB_TXN 0x8000 /* Do transactions. */ +#endif + +/* deal with turning prototypes on and off */ + +#if defined(__STDC__) || defined(__cplusplus) +#define __P(protos) protos /* full-blown ANSI C */ +#else /* !(__STDC__ || __cplusplus) */ +#define __P(protos) () /* traditional C preprocessor */ +#endif + +/* Access method description structure. */ +typedef struct __db { + DBTYPE type; /* Underlying db type. */ + int (*close) __P((struct __db *)); + int (*del) __P((const struct __db *, const DBT *, u_int)); + int (*get) __P((const struct __db *, const DBT *, DBT *, u_int)); + int (*put) __P((const struct __db *, DBT *, const DBT *, u_int)); + int (*seq) __P((const struct __db *, DBT *, DBT *, u_int)); + int (*sync) __P((const struct __db *, u_int)); + void *internal; /* Access method private. */ + int (*fd) __P((const struct __db *)); +} DB; + +#define BTREEMAGIC 0x053162 +#define BTREEVERSION 3 + +/* Structure used to pass parameters to the btree routines. */ +typedef struct { +#define R_DUP 0x01 /* duplicate keys */ + u_long flags; + u_int cachesize; /* bytes to cache */ + int maxkeypage; /* maximum keys per page */ + int minkeypage; /* minimum keys per page */ + u_int psize; /* page size */ + int (*compare) /* comparison function */ + __P((const DBT *, const DBT *)); + size_t (*prefix) /* prefix function */ + __P((const DBT *, const DBT *)); + int lorder; /* byte order */ +} BTREEINFO; + +#define HASHMAGIC 0x061561 +#define HASHVERSION 3 + +/* Structure used to pass parameters to the hashing routines. */ +typedef struct { + u_int bsize; /* bucket size */ + u_int ffactor; /* fill factor */ + u_int nelem; /* number of elements */ + u_int cachesize; /* bytes to cache */ + u_int32_t /* hash function */ + (*hash) __P((const void *, size_t)); + int lorder; /* byte order */ +} HASHINFO; + +/* Structure used to pass parameters to the record routines. */ +typedef struct { +#define R_FIXEDLEN 0x01 /* fixed-length records */ +#define R_NOKEY 0x02 /* key not required */ +#define R_SNAPSHOT 0x04 /* snapshot the input */ + u_long flags; + u_int cachesize; /* bytes to cache */ + u_int psize; /* page size */ + int lorder; /* byte order */ + size_t reclen; /* record length (fixed-length records) */ + u_char bval; /* delimiting byte (variable-length records */ + char *bfname; /* btree file name */ +} RECNOINFO; + +#if defined(__cplusplus) +#define __BEGIN_DECLS extern "C" { +#define __END_DECLS }; +#else +#define __BEGIN_DECLS +#define __END_DECLS +#endif + +__BEGIN_DECLS +DB *dbopen __P((const char *, int, int, DBTYPE, const void *)); +__END_DECLS + +#endif /* !_DB_H_ */ diff --git a/src/util/db2/man/Makefile.inc b/src/util/db2/man/Makefile.inc new file mode 100644 index 0000000000..f85cba1f81 --- /dev/null +++ b/src/util/db2/man/Makefile.inc @@ -0,0 +1,7 @@ +# @(#)Makefile.inc 8.2 (Berkeley) 11/14/94 + +.PATH: ${.CURDIR}/db/man + +MAN3+= db_btree.0 db_hash.0 db_lock.0 db_log.0 db_mpool.0 db_open.0 \ + db_recno.0 +MLINKS+=db_open.3 db.3 db_open.3 dbopen.3 diff --git a/src/util/db2/man/db.man.ps b/src/util/db2/man/db.man.ps new file mode 100644 index 0000000000..23feea6e5c --- /dev/null +++ b/src/util/db2/man/db.man.ps @@ -0,0 +1,2295 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.08 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.08 0 +%%Pages: 28 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.08 0 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 def/PL +792 def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron/scaron/zcaron +/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/space +/exclam/quotedbl/numbersign/dollar/percent/ampersand/quoteright/parenleft +/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four +/five/six/seven/eight/nine/colon/semicolon/less/equal/greater/question/at/A/B/C +/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash +/bracketright/circumflex/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q +/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase +/guillemotleft/guillemotright/bullet/florin/fraction/perthousand/dagger +/daggerdbl/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar +/section/dieresis/copyright/ordfeminine/guilsinglleft/logicalnot/minus +/registered/macron/degree/plusminus/twosuperior/threesuperior/acute/mu +/paragraph/periodcentered/cedilla/onesuperior/ordmasculine/guilsinglright +/onequarter/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde +/Adieresis/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute +/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls +/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute +/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve +/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex +/udieresis/yacute/thorn/ydieresis]def/Times-Italic@0 ENC0/Times-Italic RE +/Times-Bold@0 ENC0/Times-Bold RE/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 113.45(DB_BTREE\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 113.45(anual DB_BTREE\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA) +72 84 S(ME).18 E F0(db_btree \255 btree database access method)108 96 Q F1 +(DESCRIPTION)72 112.8 Q F0 .486(The DB library is a f)108 124.8 R .485 +(amily of groups of functions that pro)-.1 F .485 +(vides a modular programming interf)-.15 F .485(ace to trans-)-.1 F .822 +(actions and record-oriented \214le access.)108 136.8 R .822 +(The library includes support for transaction, locking, logging and)5.822 F +.258(\214le b)108 148.8 R(uf)-.2 E .258(fering functionality)-.25 F 2.758(,a) +-.65 G 2.758(sw)223.214 148.8 S .258(ell as v)237.082 148.8 R .258(arious inde) +-.25 F -.15(xe)-.15 G 2.758(da).15 G .258(ccess methods.)331.434 148.8 R(Man) +5.258 E 2.758(yo)-.15 G 2.758(ft)427.878 148.8 S .258 +(he functional groups \(e.g.)436.746 148.8 R .528(the memory pool functions\) \ +are useful independently of the rest of the DB functions, although some func-) +108 160.8 R .306(tional groups are e)108 172.8 R .306 +(xplicitly based on other functional groups \(e.g.)-.15 F .306 +(transactions and logging\).)5.306 F -.15(Fo)5.306 G 2.806(rag).15 G(eneral) +515.57 172.8 Q .245(description of transactions, see)108 184.8 R/F2 10 +/Times-Italic@0 SF(db_txn)2.745 E F0 2.745(\(3\). F).24 F .245 +(or a general description of the access methods, see)-.15 F F2(db_open)2.745 E +F0(\(3\)).24 E .308(and then the indi)108 196.8 R .308 +(vidual access method manual pages:)-.25 F F2(db_btr)2.807 E(ee)-.37 E F0 +(\(3\),).18 E F2(db_hash)2.807 E F0(\(3\),).28 E F2(db_lo)2.807 E(g)-.1 E F0 +.307(\(3\) and).22 F F2(db_r)2.807 E(ecno)-.37 E F0(\(3\).).18 E -.15(Fo)108 +208.8 S 3.635(rag).15 G 1.135(eneral description of the lock manager)138.45 +208.8 R 3.635(,s)-.4 G(ee)307.32 208.8 Q F2(db_loc)3.635 E(k)-.2 E F0 3.635 +(\(3\). F).67 F 1.135(or a general description of the memory)-.15 F +(pool manager)108 220.8 Q 2.5(,s)-.4 G(ee)171.2 220.8 Q F2(db_mpool)2.5 E F0 +(\(3\).).51 E +(This manual page describes speci\214c details of the btree access method.)108 +237.6 Q 1.518(The btree data structure is a sorted, balanced tree structure st\ +oring associated k)108 254.4 R -.15(ey)-.1 G 1.517(/data pairs.).15 F +(Searches,)6.517 E .598(insertions, and deletions in the btree will all comple\ +te in O lg base N where base is the a)108 266.4 R -.15(ve)-.2 G .598 +(rage \214ll f).15 F(actor)-.1 E(.)-.55 E .306 +(Often, inserting ordered data into btrees results in a lo)108 278.4 R 2.806 +<778c>-.25 G .305(ll f)341.61 278.4 R(actor)-.1 E 5.305(.T)-.55 G .305 +(his implementation has been modi\214ed)386.56 278.4 R(to mak)108 290.4 Q 2.5 +(eo)-.1 G(rdered insertion the best case, resulting in a much better than norm\ +al page \214ll f)147.34 290.4 Q(actor)-.1 E(.)-.55 E F1 -.495(AC)72 307.2 S +(CESS METHOD SPECIFIC INFORMA).495 E(TION)-.855 E F0 .175 +(The btree access method speci\214c data structure pro)108 319.2 R .176 +(vided to)-.15 F F2(db_open)2.676 E F0 .176(is typedef)2.676 F 1.176 -.5('d a) +.55 H .176(nd named BTREEINFO.).5 F 2.638(AB)108 331.2 S .138 +(TREEINFO structure has at least the follo)124.528 331.2 R .137 +(wing \214elds, which may be initialized before calling)-.25 F F2(db_open)2.637 +E F0(:).24 E(u_int cachesize;)108 348 Q 3.743(As)133 360 S 1.243 +(uggested maximum size \(in bytes\) of the memory cache.)147.853 360 R 1.243 +(This v)6.243 F 1.243(alue is)-.25 F/F3 10/Times-Bold@0 SF(only)3.743 E F0 +(advisory)3.743 E 3.744(,a)-.65 G 1.244(nd the)514.036 360 R .017 +(access method will allocate more memory rather than f)133 372 R 2.517 +(ail. Since)-.1 F -2.15 -.25(ev e)2.517 H .016(ry search e).25 F .016 +(xamines the root page)-.15 F 1.319 +(of the tree, caching the most recently used pages substantially impro)133 384 +R -.15(ve)-.15 G 3.82(sa).15 G 1.32(ccess time.)441.05 384 R 1.32(In addition,) +6.32 F(ph)133 396 Q .911(ysical writes are delayed as long as possible, so a m\ +oderate cache can reduce the number of I/O)-.05 F 1.497 +(operations signi\214cantly)133 408 R 6.497(.O)-.65 G -.15(bv)243.674 408 S +(iously).15 E 3.997(,u)-.65 G 1.497(sing a cache increases \(b)288.821 408 R +1.498(ut only increases\) the lik)-.2 F 1.498(elihood of)-.1 F .336(corruption\ + or lost data if the system crashes while a tree is being modi\214ed.)133 420 R +(If)5.336 E F2(cac)2.836 E(hesize)-.15 E F0 .335(is 0 \(no size)2.835 F +(is speci\214ed\) a def)133 432 Q(ault cache is used.)-.1 E +(int \(*compare\)\(const DBT *, const DBT *\);)108 448.8 Q .194 +(Compare is the k)133 460.8 R .494 -.15(ey c)-.1 H .194(omparison function.).15 +F .194(It must return an inte)5.194 F .194 +(ger less than, equal to, or greater than)-.15 F .656(zero if the \214rst k)133 +472.8 R .956 -.15(ey a)-.1 H -.18(rg).15 G .656 +(ument is considered to be respecti).18 F -.15(ve)-.25 G .655 +(ly less than, equal to, or greater than the).15 F .798(second k)133 484.8 R +1.098 -.15(ey a)-.1 H -.18(rg).15 G 3.298(ument. The).18 F .798 +(same comparison function must be used on a gi)3.298 F -.15(ve)-.25 G 3.298(nt) +.15 G .799(ree e)462.774 484.8 R -.15(ve)-.25 G .799(ry time it is).15 F 2.79 +(opened. If)133 496.8 R F2(compar)2.79 E(e)-.37 E F0 .29 +(is NULL \(no comparison function is speci\214ed\), the k)2.79 F -.15(ey)-.1 G +2.79(sa).15 G .29(re compared le)451.08 496.8 R(xically)-.15 E(,)-.65 E +(with shorter k)133 508.8 Q -.15(ey)-.1 G 2.5(sc).15 G +(onsidered less than longer k)208.57 508.8 Q -.15(ey)-.1 G(s.).15 E +(u_long \215ags;)108 525.6 Q(The \215ag v)133 537.6 Q(alue is speci\214ed by) +-.25 E F2(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)302.2 537.6 S +(he follo)310.81 537.6 Q(wing v)-.25 E(alues:)-.25 E(R_DUP)133 554.4 Q .354 +(Permit duplicate k)158 566.4 R -.15(ey)-.1 G 2.854(si).15 G 2.854(nt)250.752 +566.4 S .355(he tree, i.e. permit insertion if the k)261.386 566.4 R .655 -.15 +(ey t)-.1 H 2.855(ob).15 G 2.855(ei)432.64 566.4 S .355(nserted already e) +442.715 566.4 R .355(xists in)-.15 F 1.65(the tree.)158 578.4 R 1.65(The def) +6.65 F 1.65(ault beha)-.1 F(vior)-.2 E 4.149(,a)-.4 G 4.149(sd)295.509 578.4 S +1.649(escribed in)308.548 578.4 R F2(db_open)4.149 E F0 1.649(\(3\), is to o) +.24 F -.15(ve)-.15 G 1.649(rwrite a matching k).15 F -.15(ey)-.1 G .783 +(when inserting a ne)158 590.4 R 3.283(wk)-.25 G 1.083 -.15(ey o)253.542 590.4 +T 3.283(rt).15 G 3.283(of)280.508 590.4 S .783(ail if the R_NOO)292.021 590.4 R +(VER)-.5 E .784(WRITE \215ag is speci\214ed.)-.55 F .784(The R_DUP)5.784 F .129 +(\215ag is o)158 602.4 R -.15(ve)-.15 G .129(rridden by the R_NOO).15 F(VER)-.5 +E .128(WRITE \215ag, and if the R_NOO)-.55 F(VER)-.5 E .128 +(WRITE \215ag is spec-)-.55 F(i\214ed, attempts to insert duplicate k)158 614.4 +Q -.15(ey)-.1 G 2.5(si).15 G(nto the tree will f)314.69 614.4 Q(ail.)-.1 E .835 +(If the database contains duplicate k)158 631.2 R -.15(ey)-.1 G .835 +(s, the order of retrie).15 F -.25(va)-.25 G 3.335(lo).25 G 3.336(fk)414.7 +631.2 S -.15(ey)426.266 631.2 S .836(/data pairs is unde\214ned if).15 F(the) +158 643.2 Q F2 -.1(ge)3.003 G(t).1 E F0 .503(function is used, ho)3.003 F(we) +-.25 E -.15(ve)-.25 G -.4(r,).15 G F2(seq)3.403 E F0 .502 +(function calls with the R_CURSOR \215ag set will al)3.003 F -.1(wa)-.1 G(ys).1 +E(return the logical `)158 655.2 Q(`\214rst')-.74 E 2.5('o)-.74 G 2.5(fa)263.72 +655.2 S .3 -.15(ny g)273.99 655.2 T(roup of duplicate k).15 E -.15(ey)-.1 G(s.) +.15 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315 +(ution August)-.2 F(1, 1995)2.5 E(1)535 732 Q EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 113.45(DB_BTREE\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 113.45(anual DB_BTREE\(3\))340.17 48 R(int lorder;)108 84 Q .65 +(The byte order for inte)133 96 R .65(gers in the stored database metadata.) +-.15 F .65(The number should represent the order)5.65 F .749(as an inte)133 108 +R .749(ger; for e)-.15 F .749(xample, big endian order w)-.15 F .749 +(ould be the number 4,321.)-.1 F(If)5.749 E/F1 10/Times-Italic@0 SF(lor)3.249 E +(der)-.37 E F0 .749(is 0 \(no order is)3.249 F +(speci\214ed\) the current host order is used.)133 120 Q(int maxk)108 136.8 Q +-.15(ey)-.1 G(page;).15 E .073(The maximum number of k)133 148.8 R -.15(ey)-.1 +G 2.573(sw).15 G .073(hich will be stored on an)266.155 148.8 R 2.574(ys)-.15 G +.074(ingle page.)376.436 148.8 R .074(This functionality is not cur)5.074 F(-) +-.2 E(rently implemented.)133 160.8 Q(int mink)108 177.6 Q -.15(ey)-.1 G(page;) +.15 E .532(The minimum number of k)133 189.6 R -.15(ey)-.1 G 3.031(sw).15 G +.531(hich will be stored on an)266.787 189.6 R 3.031(ys)-.15 G .531 +(ingle page.)379.813 189.6 R .531(This v)5.531 F .531(alue is used to deter) +-.25 F(-)-.2 E .558(mine which k)133 201.6 R -.15(ey)-.1 G 3.058(sw).15 G .558 +(ill be stored on o)211.914 201.6 R -.15(ve)-.15 G(r\215o).15 E 3.058(wp)-.25 G +.558(ages, i.e. if a k)319.424 201.6 R .859 -.15(ey o)-.1 H 3.059(rd).15 G .559 +(ata item is longer than the page-)408.336 201.6 R .063(size di)133 213.6 R +.063(vided by the mink)-.25 F -.15(ey)-.1 G .063(page v).15 F .063 +(alue, it will be stored on o)-.25 F -.15(ve)-.15 G(r\215o).15 E 2.563(wp)-.25 +G .062(ages instead of in the page itself.)408.816 213.6 R(If)133 225.6 Q F1 +(mink)2.5 E -.3(ey)-.1 G(pa).3 E -.1(ge)-.1 G F0(is 0 \(no minimum number of k) +2.6 E -.15(ey)-.1 G 2.5(si).15 G 2.5(ss)332.96 225.6 S(peci\214ed\) a v)343.24 +225.6 Q(alue of 2 is used.)-.25 E +(size_t \(*pre\214x\)\(const DBT *, const DBT *\);)108 242.4 Q .691 +(Pre\214x is the pre\214x comparison function.)133 254.4 R .692 +(If speci\214ed, this function must return the number of bytes)5.691 F .195 +(of the second k)133 266.4 R .495 -.15(ey a)-.1 H -.18(rg).15 G .195 +(ument which are necessary to determine that it is greater than the \214rst k) +.18 F .495 -.15(ey a)-.1 H -.18(rg).15 G(u-).18 E 2.994(ment. If)133 278.4 R +.494(the k)2.994 F -.15(ey)-.1 G 2.994(sa).15 G .494(re equal, the k)211.376 +278.4 R .794 -.15(ey l)-.1 H .494(ength should be returned.).15 F .494 +(Note, the usefulness of this function)5.494 F .327(is v)133 290.4 R .327 +(ery data dependent, b)-.15 F .326(ut, in some data sets can produce signi\214\ +cantly reduced tree sizes and search)-.2 F 2.789(times. If)133 302.4 R F1(pr) +2.789 E(e\214x)-.37 E F0 .289(is NULL \(no pre\214x function is speci\214ed\),) +2.789 F/F2 10/Times-Bold@0 SF(and)2.789 E F0 .29 +(no comparison function is speci\214ed, a)2.79 F(def)133 314.4 Q .902(ault le) +-.1 F .902(xical comparison function is used.)-.15 F(If)5.901 E F1(pr)3.401 E +(e\214x)-.37 E F0 .901(is NULL and a comparison function is speci-)3.401 F +(\214ed, no pre\214x comparison is done.)133 326.4 Q(u_int psize;)108 343.2 Q +-.15(Pa)133 355.2 S .118 +(ge size is the size \(in bytes\) of the pages used for nodes in the tree.).15 +F .119(The minimum page size is 512)5.119 F .377 +(bytes and the maximum page size is 64K.)133 367.2 R(If)5.376 E F1(psize)2.876 +E F0 .376(is 0 \(no page size is speci\214ed\) a page size is cho-)2.876 F +(sen based on the underlying \214le system I/O block size.)133 379.2 Q .79 +(If the \214le already e)108 396 R .79(xists \(and the O_TR)-.15 F .79 +(UNC \215ag is not speci\214ed\), the v)-.4 F .79 +(alues speci\214ed for the parameters)-.25 F +(\215ags, lorder and psize are ignored in f)108 408 Q -.2(avo)-.1 G 2.5(ro).2 G +2.5(ft)284.4 408 S(he v)293.01 408 Q(alues used when the tree w)-.25 E +(as created.)-.1 E/F3 9/Times-Bold@0 SF(DB OPERA)72 424.8 Q(TIONS)-.855 E F0 +1.037(The functions returned by)108 436.8 R F1(db_open)3.537 E F0 1.036 +(for the btree access method are as described in)3.536 F F1(db_open)3.536 E F0 +1.036(\(3\), with the).24 F(follo)108 448.8 Q(wing e)-.25 E +(xceptions and additions:)-.15 E 5.28(type The)108 465.6 R(type is DB_BTREE.) +2.5 E 10.28(del Space)108 482.4 R 1.681(freed up by deleting k)4.181 F -.15(ey) +-.1 G 1.681(/data pairs from the tree is ne).15 F -.15(ve)-.25 G 4.181(rr).15 G +1.682(eclaimed, although it is reused)411.342 482.4 R .734(where possible.)133 +494.4 R .734(This means that the btree storage structure is gro)5.734 F(w-only) +-.25 E 5.734(.T)-.65 G .734(he only solutions are to)443.734 494.4 R -.2(avo) +133 506.4 S(id e).2 E(xcessi)-.15 E .3 -.15(ve d)-.25 H +(eletions, or to create a fresh tree periodically from a scan of an e).15 E +(xisting one.)-.15 E 9.72(put The)108 523.2 R F1(put)2.5 E F0(function tak)2.5 +E(es the follo)-.1 E(wing additional \215ags:)-.25 E(R_SETCURSOR)133 540 Q +(Store the k)158 552 Q -.15(ey)-.1 G(/data pair).15 E 2.5(,s)-.4 G +(etting or initializing the position of the cursor to reference it.)256.5 552 Q +9.17(seq F)108 568.8 R(orw)-.15 E +(ard sequential scans of a tree are from the least k)-.1 E .3 -.15(ey t)-.1 H +2.5(ot).15 G(he greatest.)373.55 568.8 Q .892(The returned k)133 585.6 R 1.192 +-.15(ey f)-.1 H .892(or the).15 F F1(seq)3.393 E F0 .893 +(function is not necessarily an e)3.393 F .893 +(xact match for the speci\214ed k)-.15 F 1.193 -.15(ey i)-.1 H 3.393(nt).15 G +(he)530.56 585.6 Q .5(btree access method.)133 597.6 R .5(The returned k)5.5 F +.8 -.15(ey i)-.1 H 3(st).15 G .499(he smallest k)307.04 597.6 R .799 -.15(ey g) +-.1 H .499(reater than or equal to the speci\214ed k).15 F -.15(ey)-.1 G(,)-.5 +E(permitting partial k)133 609.6 Q .3 -.15(ey m)-.1 H +(atches and range searches.).15 E(The)133 626.4 Q F1(seq)2.5 E F0(function tak) +2.5 E(es the follo)-.1 E(wing additional \215ags:)-.25 E(R_LAST)133 643.2 Q .04 +(The last k)158 655.2 R -.15(ey)-.1 G .04(/data pair of the database is return\ +ed, and the cursor is set or initialized to reference).15 F(it.)158 667.2 Q +(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315 +(ution August)-.2 F(1, 1995)2.5 E(2)535 732 Q EP +%%Page: 3 3 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 113.45(DB_BTREE\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 113.45(anual DB_BTREE\(3\))340.17 48 R(R_PREV)133 84 Q(Retrie)158 96 +Q .59 -.15(ve t)-.25 H .29(he k).15 F -.15(ey)-.1 G .29 +(/data pair immediately before the cursor).15 F 5.29(.I)-.55 G 2.79(ft)395.73 +96 S .29(he cursor is not yet set, this is the)404.63 96 R +(same as the R_LAST \215ag.)158 108 Q/F1 9/Times-Bold@0 SF(ERR)72 124.8 Q(ORS) +-.27 E F0(The)108 136.8 Q/F2 10/Times-Italic@0 SF(btr)2.541 E(ee)-.37 E F0 .041 +(access method functions may f)2.541 F .041(ail and set)-.1 F F2(errno)2.541 E +F0 .041(for an)2.541 F 2.541(yo)-.15 G 2.541(ft)376.152 136.8 S .041 +(he errors speci\214ed for the library func-)384.803 136.8 R(tion)108 148.8 Q +F2(db_open)2.5 E F0(\(3\).).24 E F1(SEE ALSO)72 165.6 Q F2(db_hash)108 177.6 Q +F0(\(3\),).28 E F2(db_loc)2.5 E(k)-.2 E F0(\(3\),).67 E F2(db_lo)2.5 E(g)-.1 E +F0(\(3\),).22 E F2(db_mpool)2.5 E F0(\(3\),).51 E F2(db_open)2.5 E F0(\(3\),) +.24 E F2(db_r)2.5 E(ecno)-.37 E F0(\(3\),).18 E F2(db_txn)2.5 E F0(\(3\)).24 E +F2(The Ubiquitous B-tr)108 201.6 Q(ee)-.37 E F0 2.5(,D).18 G(ouglas Comer) +209.47 201.6 Q 2.5(,A)-.4 G(CM Comput. Surv)276.72 201.6 Q 2.5(.1)-.65 G +(1, 2 \(June 1979\), 121-138.)360.25 201.6 Q F2(Pr)108 225.6 Q 1.588 +(e\214x B-tr)-.37 F(ees)-.37 E F0 4.088(,B).27 G 1.587(ayer and Unterauer) +177.636 225.6 R 4.087(,A)-.4 G 1.587(CM T)270.447 225.6 R 1.587 +(ransactions on Database Systems, V)-.35 F 1.587(ol. 2, 1 \(March 1977\),)-1.29 +F(11-26.)108 237.6 Q F2(The Art of Computer Pr)108 261.6 Q -.1(og)-.45 G -.15 +(ra).1 G(mming V).15 E(ol. 3: Sorting and Sear)-1.11 E -.15(ch)-.37 G(ing).15 E +F0 2.5(,D).22 G(.E. Knuth, 1968, pp 471-480.)382 261.6 Q(4.4 Berk)72 732 Q(ele) +-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E +(3)535 732 Q EP +%%Page: 1 4 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 117.9(DB_HASH\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 117.9(anual DB_HASH\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 +84 S(ME).18 E F0(db_hash \255 hash database access method)108 96 Q F1 +(DESCRIPTION)72 112.8 Q F0 .485(The DB library is a f)108 124.8 R .485 +(amily of groups of functions that pro)-.1 F .486 +(vides a modular programming interf)-.15 F .486(ace to trans-)-.1 F .823 +(actions and record-oriented \214le access.)108 136.8 R .822 +(The library includes support for transaction, locking, logging and)5.822 F +.258(\214le b)108 148.8 R(uf)-.2 E .258(fering functionality)-.25 F 2.758(,a) +-.65 G 2.758(sw)223.214 148.8 S .258(ell as v)237.082 148.8 R .258(arious inde) +-.25 F -.15(xe)-.15 G 2.758(da).15 G .258(ccess methods.)331.434 148.8 R(Man) +5.258 E 2.758(yo)-.15 G 2.758(ft)427.878 148.8 S .258 +(he functional groups \(e.g.)436.746 148.8 R .528(the memory pool functions\) \ +are useful independently of the rest of the DB functions, although some func-) +108 160.8 R .306(tional groups are e)108 172.8 R .306 +(xplicitly based on other functional groups \(e.g.)-.15 F .306 +(transactions and logging\).)5.306 F -.15(Fo)5.306 G 2.806(rag).15 G(eneral) +515.57 172.8 Q .245(description of transactions, see)108 184.8 R/F2 10 +/Times-Italic@0 SF(db_txn)2.745 E F0 2.745(\(3\). F).24 F .245 +(or a general description of the access methods, see)-.15 F F2(db_open)2.745 E +F0(\(3\)).24 E .307(and then the indi)108 196.8 R .307 +(vidual access method manual pages:)-.25 F F2(db_btr)2.808 E(ee)-.37 E F0 +(\(3\),).18 E F2(db_hash)2.808 E F0(\(3\),).28 E F2(db_lo)2.808 E(g)-.1 E F0 +.308(\(3\) and).22 F F2(db_r)2.808 E(ecno)-.37 E F0(\(3\).).18 E -.15(Fo)108 +208.8 S 3.635(rag).15 G 1.135(eneral description of the lock manager)138.45 +208.8 R 3.635(,s)-.4 G(ee)307.32 208.8 Q F2(db_loc)3.635 E(k)-.2 E F0 3.635 +(\(3\). F).67 F 1.135(or a general description of the memory)-.15 F +(pool manager)108 220.8 Q 2.5(,s)-.4 G(ee)171.2 220.8 Q F2(db_mpool)2.5 E F0 +(\(3\).).51 E +(This manual page describes speci\214c details of the hashing access method.) +108 237.6 Q .59(The hash data structure is an e)108 254.4 R .591 +(xtensible, dynamic hashing scheme.)-.15 F(Backw)5.591 E .591 +(ard compatible interf)-.1 F .591(aces to the)-.1 F .209 +(functions described in)108 266.4 R F2(dbm)2.709 E F0 .209(\(3\), and).32 F F2 +(ndbm)2.709 E F0 .209(\(3\) are pro).32 F .209(vided, ho)-.15 F(we)-.25 E -.15 +(ve)-.25 G 2.708(rt).15 G .208(hese interf)382.71 266.4 R .208 +(aces are not compatible with)-.1 F(pre)108 278.4 Q(vious \214le formats.)-.25 +E F1 -.495(AC)72 295.2 S(CESS METHOD SPECIFIC INFORMA).495 E(TION)-.855 E F0 +.612(The hash access method speci\214c data structure pro)108 307.2 R .612 +(vided to)-.15 F F2(db_open)3.112 E F0 .612(is typedef)3.112 F 1.612 -.5('d a) +.55 H .613(nd named HASHINFO.).5 F 2.5(AH)108 319.2 S +(ASHINFO structure has at least the follo)124.94 319.2 Q +(wing \214elds, which may be initialized before calling)-.25 E F2(db_open)2.5 E +F0(:).24 E(u_int bsize;)108 336 Q F2(Bsize)133 348 Q F0 2.041 +(de\214nes the hash table b)4.541 F(uck)-.2 E 2.041(et size, and is, by def)-.1 +F 2.04(ault, 256 bytes.)-.1 F 2.04(It may be preferable to)7.04 F +(increase the page size for disk-resident tables and tables with lar)133 360 Q +(ge data items.)-.18 E(u_int cachesize;)108 376.8 Q 3.846(As)133 388.8 S 1.347 +(uggested maximum size, in bytes, of the memory cache.)147.956 388.8 R 1.347 +(This v)6.347 F 1.347(alue is)-.25 F/F3 10/Times-Bold@0 SF(only)3.847 E F0 +(advisory)3.847 E 3.847(,a)-.65 G 1.347(nd the)513.933 388.8 R +(access method will allocate more memory rather than f)133 400.8 Q(ail.)-.1 E +(u_int f)108 417.6 Q -.1(fa)-.25 G(ctor;).1 E F2(Ffactor)133 429.6 Q F0 1.17 +(indicates a desired density within the hash table.)3.67 F 1.169 +(It is an approximation of the number of)6.169 F -.1(ke)133 441.6 S 1.162 +(ys allo)-.05 F 1.162(wed to accumulate in an)-.25 F 3.662(yo)-.15 G 1.162 +(ne b)284.852 441.6 R(uck)-.2 E 1.162(et, determining when the hash table gro) +-.1 F 1.162(ws or shrinks.)-.25 F(The def)133 453.6 Q(ault v)-.1 E(alue is 8.) +-.25 E(u_int32_t \(*hash\)\(const v)108 470.4 Q(oid *, size_t\);)-.2 E F2(Hash) +133 482.4 Q F0 .788(is a user de\214ned hash function.)3.288 F .787 +(Since no hash function performs equally well on all possible)5.788 F .017 +(data, the user may \214nd that the b)133 494.4 R .018 +(uilt-in hash function does poorly on a particular data set.)-.2 F .018 +(User speci-)5.018 F 1.154(\214ed hash functions must tak)133 506.4 R 3.654(et) +-.1 G 1.354 -.1(wo a)260.61 506.4 T -.18(rg).1 G 1.154 +(uments \(a pointer to a byte string and a length\) and return a).18 F +(32-bit quantity to be used as the hash v)133 518.4 Q(alue.)-.25 E .665 +(If a hash function is speci\214ed,)133 535.2 R F2(hash_open)3.165 E F0 .666 +(will attempt to determine if the hash function speci\214ed is)3.166 F +(the same as the one with which the database w)133 547.2 Q +(as created, and will f)-.1 E(ail if it is not.)-.1 E(int lorder;)108 564 Q .65 +(The byte order for inte)133 576 R .65(gers in the stored database metadata.) +-.15 F .65(The number should represent the order)5.65 F .748(as an inte)133 588 +R .749(ger; for e)-.15 F .749(xample, big endian order w)-.15 F .749 +(ould be the number 4,321.)-.1 F(If)5.749 E F2(lor)3.249 E(der)-.37 E F0 .749 +(is 0 \(no order is)3.249 F .456(speci\214ed\) the current host order is used.) +133 600 R .456(If the)5.456 F .456(\214le already e)5.456 F .456 +(xists, the speci\214ed v)-.15 F .455(alue is ignored and)-.25 F(the v)133 612 +Q(alue speci\214ed when the tree w)-.25 E(as created is used.)-.1 E +(u_int nelem;)108 628.8 Q F2(Nelem)133 640.8 Q F0 1.225 +(is an estimate of the \214nal size of the hash table.)3.724 F 1.225 +(If not set or set too lo)6.225 F 2.525 -.65(w, h)-.25 H 1.225(ash tables will) +.65 F -.15(ex)133 652.8 S 1.294(pand gracefully as k).15 F -.15(ey)-.1 G 3.794 +(sa).15 G 1.294(re entered, although a slight performance de)248.296 652.8 R +1.293(gradation may be noticed.)-.15 F(The def)133 664.8 Q(ault v)-.1 E +(alue is 1.)-.25 E .79(If the \214le already e)108 681.6 R .79 +(xists \(and the O_TR)-.15 F .79(UNC \215ag is not speci\214ed\), the v)-.4 F +.79(alues speci\214ed for the parameters)-.25 F(bsize, f)108 693.6 Q -.1(fa) +-.25 G(ctor).1 E 2.5(,l)-.4 G(order and nelem are ignored and the v)167.23 +693.6 Q(alues speci\214ed when the tree w)-.25 E(as created are used.)-.1 E +(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315 +(ution August)-.2 F(1, 1995)2.5 E(1)535 732 Q EP +%%Page: 2 5 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 117.9(DB_HASH\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 117.9(anual DB_HASH\(3\))340.17 48 R/F1 9/Times-Bold@0 SF(DB OPERA) +72 84 Q(TIONS)-.855 E F0(The functions returned by)108 96 Q/F2 10 +/Times-Italic@0 SF(db_open)2.5 E F0 +(for the hash access method are as described in)2.5 E F2(db_open)2.5 E F0 +(\(3\).).24 E F1(ERR)72 112.8 Q(ORS)-.27 E F0(The)108 124.8 Q F2(hash)2.609 E +F0 .109(access method functions may f)2.609 F .109(ail and set)-.1 F F2(errno) +2.609 E F0 .109(for an)2.609 F 2.609(yo)-.15 G 2.609(ft)375.678 124.8 S .109 +(he errors speci\214ed for the library func-)384.397 124.8 R(tion)108 136.8 Q +F2(db_open)2.5 E F0(\(3\).).24 E F1(SEE ALSO)72 153.6 Q F2(db_btr)108 165.6 Q +(ee)-.37 E F0(\(3\),).18 E F2(db_loc)2.5 E(k)-.2 E F0(\(3\),).67 E F2(db_lo)2.5 +E(g)-.1 E F0(\(3\),).22 E F2(db_mpool)2.5 E F0(\(3\),).51 E F2(db_open)2.5 E F0 +(\(3\),).24 E F2(db_r)2.5 E(ecno)-.37 E F0(\(3\),).18 E F2(db_txn)2.5 E F0 +(\(3\)).24 E F2(Dynamic Hash T)108 189.6 Q(ables)-.92 E F0 2.5(,P).27 G(er) +206.79 189.6 Q(-Ak)-.2 E 2.5(eL)-.1 G(arson, Communications of the A)242.86 +189.6 Q(CM, April 1988.)-.4 E F2 2.5(AN)108 213.6 S .3 -.15(ew H)123.28 213.6 T +(ash P).15 E(ac)-.8 E(ka)-.2 E .2 -.1(ge f)-.1 H(or UNIX).1 E F0 2.5(,M).94 G +(ar)248.41 213.6 Q(go Seltzer)-.18 E 2.5(,U)-.4 G(SENIX Proceedings, W)308.09 +213.6 Q(inter 1991.)-.4 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib) +132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(2)535 732 Q EP +%%Page: 1 6 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 117.9(DB_LOCK\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 117.9(anual DB_LOCK\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 +84 S(ME).18 E F0(db_lock \255 general purpose lock manager)108 96 Q F1 +(SYNOPSIS)72 112.8 Q/F2 10/Times-Bold@0 SF(#include <db_lock.h>)108 124.8 Q +(int)108 148.8 Q(lock_cr)108 160.8 Q(eate\(const char *path, mode_t mode,)-.18 +E(int lock_modes, const int8_t con\215icts[][], u_int maxlocks\);)158 172.8 Q +(LOCK_T)108 196.8 Q(ABLE_T *)-.9 E(lock_open\(const char *path\);)108 208.8 Q +(int)108 232.8 Q(lock_v)108 244.8 Q(ec\(LOCK_T)-.1 E(ABLE_T *lt, DBT *lock)-.9 +E(er)-.1 E 2.5(,s)-.92 G(truct timespec *timeout,)308.21 244.8 Q +(LOCK_REQ_T list[], int nlist, LOCK_REQ_T **elistp, DBT *con\215ict\);)158 +256.8 Q(int)108 280.8 Q(lock_get\(LOCK_T)108 292.8 Q +(ABLE_T *lt, const DBT *lock)-.9 E(er)-.1 E(,)-.92 E +(const DBT *obj, const lock_mode_t lock_mode, LOCK_T **lockp\);)158 304.8 Q +(int)108 328.8 Q(lock_put\(LOCK_T *lockp\);)108 340.8 Q(int)108 364.8 Q +(lock_close\(LOCK_T)108 376.8 Q(ABLE_T *lt\);)-.9 E(int)108 400.8 Q +(lock_unlink\(const char *path, int f)108 412.8 Q(or)-.25 E(ce\);)-.18 E F1 +(DESCRIPTION)72 429.6 Q F0 .485(The DB library is a f)108 441.6 R .485 +(amily of groups of functions that pro)-.1 F .486 +(vides a modular programming interf)-.15 F .486(ace to trans-)-.1 F .823 +(actions and record-oriented \214le access.)108 453.6 R .822 +(The library includes support for transaction, locking, logging and)5.822 F +.258(\214le b)108 465.6 R(uf)-.2 E .258(fering functionality)-.25 F 2.758(,a) +-.65 G 2.758(sw)223.214 465.6 S .258(ell as v)237.082 465.6 R .258(arious inde) +-.25 F -.15(xe)-.15 G 2.758(da).15 G .258(ccess methods.)331.434 465.6 R(Man) +5.258 E 2.758(yo)-.15 G 2.758(ft)427.878 465.6 S .258 +(he functional groups \(e.g.)436.746 465.6 R .528(the memory pool functions\) \ +are useful independently of the rest of the DB functions, although some func-) +108 477.6 R .306(tional groups are e)108 489.6 R .306 +(xplicitly based on other functional groups \(e.g.)-.15 F .306 +(transactions and logging\).)5.306 F -.15(Fo)5.306 G 2.806(rag).15 G(eneral) +515.57 489.6 Q .245(description of transactions, see)108 501.6 R/F3 10 +/Times-Italic@0 SF(db_txn)2.745 E F0 2.745(\(3\). F).24 F .245 +(or a general description of the access methods, see)-.15 F F3(db_open)2.745 E +F0(\(3\)).24 E .307(and then the indi)108 513.6 R .307 +(vidual access method manual pages:)-.25 F F3(db_btr)2.808 E(ee)-.37 E F0 +(\(3\),).18 E F3(db_hash)2.808 E F0(\(3\),).28 E F3(db_lo)2.808 E(g)-.1 E F0 +.308(\(3\) and).22 F F3(db_r)2.808 E(ecno)-.37 E F0(\(3\).).18 E -.15(Fo)108 +525.6 S 3.635(rag).15 G 1.135(eneral description of the lock manager)138.45 +525.6 R 3.635(,s)-.4 G(ee)307.32 525.6 Q F3(db_loc)3.635 E(k)-.2 E F0 3.635 +(\(3\). F).67 F 1.135(or a general description of the memory)-.15 F +(pool manager)108 537.6 Q 2.5(,s)-.4 G(ee)171.2 537.6 Q F3(db_mpool)2.5 E F0 +(\(3\).).51 E +(This manual page describes speci\214c details of the locking interf)108 554.4 +Q(ace.)-.1 E F3(Db_loc)108 571.2 Q(k)-.2 E F0 .346(is the library interf)2.846 +F .346(ace intended to pro)-.1 F .346(vide general-purpose locking.)-.15 F .347 +(While designed to w)5.347 F .347(ork with)-.1 F .946(the other DB functions, \ +these functions are also useful for more general locking purposes.)108 583.2 R +.946(Locks can be)5.946 F(shared between processes.)108 595.2 Q .682 +(The function)108 612 R F3(loc)3.182 E(k_cr)-.2 E(eate)-.37 E F0 .683 +(creates and initializes the lock table identi\214ed by the)3.182 F F3(path) +3.183 E F0(directory)3.183 E 5.683(.T)-.65 G .683(his direc-)501.827 612 R .565 +(tory must already e)108 624 R .565(xist when)-.15 F F3(loc)3.065 E(k_cr)-.2 E +(eate)-.37 E F0 .565(is called.)3.065 F .565(If the lock table identi\214ed by) +5.565 F F3(path)3.064 E F0 .564(already e)3.064 F .564(xists, then)-.15 F F3 +(loc)108 636 Q(k_cr)-.2 E(eate)-.37 E F0 .974 +(returns success without further action.)3.474 F .974 +(The \214les associated with the lock table are created in)5.974 F 2.017 +(the directory speci\214ed by)108 648 R F3(path)4.517 E F0 7.017(.\().28 G +2.017(The group of the created \214les is based on the system and directory) +250.846 648 R(def)108 660 Q .076(aults, and is not further speci\214ed by)-.1 F +F3(loc)2.576 E(k_cr)-.2 E(eate)-.37 E F0 2.576(.\) All).18 F .076 +(\214les created by)2.576 F F3(loc)2.576 E(k_cr)-.2 E(eate)-.37 E F0 .077 +(are created with mode)2.577 F F3(mode)108 672 Q F0(\(as described in)2.5 E F3 +-.15(ch)2.5 G(mod).15 E F0(\(2\)\) and modi\214ed by the process' umask v).77 E +(alue \(see)-.25 E F3(umask)2.5 E F0(\(2\)\).).67 E .739(The parameter)108 +688.8 R F3(loc)3.239 E(k_modes)-.2 E F0 .739(is the number of lock modes to be\ + recognized by the lock table \(including the)3.239 F(4.4 Berk)72 732 Q(ele)-.1 +E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(1) +535 732 Q EP +%%Page: 2 7 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 117.9(DB_LOCK\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 117.9(anual DB_LOCK\(3\))340.17 48 R -.74(``)108 84 S(not-granted') +.74 E 2.5('m)-.74 G 2.5(ode\). The)176.22 84 R(parameter)2.5 E/F1 10 +/Times-Italic@0 SF(con\215icts)2.5 E F0(is an)2.5 E F1(loc)2.5 E(k_modes)-.2 E +F0(by)2.5 E F1(loc)2.5 E(k_modes)-.2 E F0(array)2.5 E 5(.A)-.65 G(non-0 v) +467.59 84 Q(alue for:)-.25 E(con\215icts[requested_mode][held_mode])158 108 Q +.174(indicates that)108 132 R F1 -.37(re)2.674 G(quested_mode).37 E F0(and) +2.674 E F1(held_mode)2.674 E F0 2.675(con\215ict. The)2.674 F -.74(``)2.675 G +(not-granted').74 E 2.675('m)-.74 G .175(ode must be represented by 0.)419.705 +132 R(The include \214le <db_lock.h> declares tw)108 148.8 Q 2.5(oc)-.1 G +(ommonly used con\215ict arrays:)283.87 148.8 Q(int lock_sx_n;)108 165.6 Q +(const int8_t lock_sx_c[lock_sx_n][lock_sx_n];)108 177.6 Q(These v)133 189.6 Q +(ariables specify a con\215ict array for a simple scheme using shared and e) +-.25 E(xclusi)-.15 E .3 -.15(ve l)-.25 H(ock modes.).15 E(int lock_g_n;)108 +206.4 Q(const int8_t lock_g_c[lock_g_n][lock_g_n];)108 218.4 Q 1.071(These v) +133 230.4 R 1.071(ariables specify a con\215ict array that in)-.25 F -.2(vo)-.4 +G(lv).2 E 1.071(es v)-.15 F 1.07 +(arious intent lock modes \(e.g. intent shared\))-.25 F +(that are used for multigranularity locking.)133 242.4 Q 1.53 +(In addition, <db_lock.h> de\214nes the follo)108 259.2 R 1.531 +(wing macros that name lock modes for use with the standard)-.25 F(tables abo) +108 271.2 Q -.15(ve)-.15 G(:).15 E(LOCK_IS)144 288 Q(intent shared)169 300 Q +(LOCK_IX)144 312 Q(intent e)169 324 Q(xclusi)-.15 E -.15(ve)-.25 G(LOCK_NG)144 +336 Q(not granted \(al)169 348 Q -.1(wa)-.1 G(ys 0\)).1 E(LOCK_S)144 360 Q +(shared)169 372 Q(LOCK_SIX)144 384 Q(shared/intent e)169 396 Q(xclusi)-.15 E +-.15(ve)-.25 G(LOCK_X)144 408 Q -.15(ex)169 420 S(clusi).15 E -.15(ve)-.25 G F1 +(Maxloc)108 436.8 Q(ks)-.2 E F0 .442(is the maximum number of locks to be held\ + or requested in the table, and is used by)2.942 F F1(loc)2.941 E(k_cr)-.2 E +(eate)-.37 E F0(to estimate ho)108 448.8 Q 2.5(wm)-.25 G +(uch space to allocate for v)181.36 448.8 Q(arious lock-table data structures.) +-.25 E(The function)108 465.6 Q F1(loc)2.5 E(k_cr)-.2 E(eate)-.37 E F0 +(returns -1 on f)2.5 E(ailure, setting)-.1 E F1(errno)2.5 E F0 2.5(,a).18 G +(nd 0 on success.)356.07 465.6 Q .202(The function)108 482.4 R F1(loc)2.703 E +(k_open)-.2 E F0 .203(returns a pointer to the lock table identi\214ed by)2.703 +F F1(path)2.703 E F0 2.703(,w).28 G .203(hich must ha)425.678 482.4 R .503 -.15 +(ve a)-.2 H .203(lready been).15 F 1.162(created by a call to)108 494.4 R F1 +(loc)3.661 E(k_cr)-.2 E(eate)-.37 E F0 6.161(.T).18 G 1.161(he process must ha) +252.869 494.4 R 1.461 -.15(ve p)-.2 H 1.161 +(ermission to read and write \214les with o).15 F(wners,)-.25 E .06 +(groups and permissions as described for)108 506.4 R F1(loc)2.56 E(k_cr)-.2 E +(eate)-.37 E F0 5.06(.T).18 G(he)331.04 506.4 Q F1(loc)2.56 E(k_open)-.2 E F0 +.06(function returns NULL on f)2.56 F .06(ailure, set-)-.1 F(ting)108 518.4 Q +F1(errno)2.5 E F0(.).18 E .986(The function)108 535.2 R F1(loc)3.486 E(k_vec) +-.2 E F0 .986 +(atomically obtains and releases one or more locks from the designated table.) +3.486 F(The)5.986 E(function)108 547.2 Q F1(loc)4.52 E(k_vec)-.2 E F0 2.02(is \ +intended to support acquisition or trading of multiple locks under one lock ta\ +ble)4.52 F(semaphore, as is needed for lock coupling or in multigranularity lo\ +cking for lock escalation.)108 559.2 Q .746(If an)108 576 R 3.246(yo)-.15 G +3.246(ft)140.442 576 S .746(he requested locks cannot be acquired or an)149.798 +576 R 3.246(yo)-.15 G 3.246(ft)342.786 576 S .746 +(he locks to be released cannot be released, no)352.142 576 R .117 +(locks are acquired and no locks are released, and)108 588 R F1(loc)2.617 E +(k_vec)-.2 E F0 .117(returns an error)2.617 F 5.117(.T)-.55 G .117(he function) +419.211 588 R F1(loc)2.617 E(k_vec)-.2 E F0 .118(returns 0)2.617 F 1.143 +(on success.)108 600 R 1.143(If an error occurs,)6.143 F F1(loc)3.642 E(k_vec) +-.2 E F0 1.142(returns one of the follo)3.642 F 1.142(wing v)-.25 F 3.642 +(alues. In)-.25 F 1.142(addition, if)3.642 F F1(elistp)3.642 E F0 1.142(is not) +3.642 F(NULL, it is set to point to the LOCK_REQ_T entry which w)108 612 Q +(as being processed when the error occurred.)-.1 E(LOCK_GET_DEADLOCK)108 628.8 +Q .431(The speci\214ed)133 640.8 R F1(loc)2.931 E -.1(ke)-.2 G(r).1 E F0 -.1 +(wa)2.931 G 2.931(ss).1 G .431(elected as a victim in order to resolv)239.854 +640.8 R 2.932(ead)-.15 G 2.932(eadlock. In)407.718 640.8 R .432 +(this case, if the)2.932 F F1(con-)2.932 E(\215ict)133 652.8 Q F0(ar)2.901 E +.401(gument is non-NULL, it is set to reference the identity of a lock)-.18 F +.4(er holding the lock referenced)-.1 F(by)133 664.8 Q F1(elistp)2.585 E F0 +.085(at the time the request w)2.585 F .085(as denied.)-.1 F .086 +(\(This identity resides in static memory and may be o)5.086 F -.15(ve)-.15 G +-.2(r-).15 G(written by subsequent calls to)133 676.8 Q F1(loc)2.5 E(k_vec)-.2 +E F0(\).).31 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q +99.315(ution August)-.2 F(1, 1995)2.5 E(2)535 732 Q EP +%%Page: 3 8 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 117.9(DB_LOCK\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 117.9(anual DB_LOCK\(3\))340.17 48 R(LOCK_GET_ERR)108 84 Q(OR)-.4 E +(An error occurred and the e)133 96 Q(xternal v)-.15 E(ariable)-.25 E/F1 10 +/Times-Italic@0 SF(errno)2.5 E F0(has been set to indicate the error)2.5 E(.) +-.55 E(LOCK_GET_NO)108 112.8 Q(THELD)-.4 E +(The lock cannot be released, as it w)133 124.8 Q(as not held by the)-.1 E F1 +(loc)2.5 E -.1(ke)-.2 G(r).1 E F0(.).73 E(LOCK_GET_RESOURCE)108 141.6 Q 2.311(\ +The lock manager is unable to grant the requested locks because of limited int\ +ernal resources.)133 153.6 R(\(Releasing locks may allo)133 165.6 Q 2.5(wf)-.25 +G(uture calls to)249.4 165.6 Q F1(loc)2.5 E(k_vec)-.2 E F0(to succeed.\))2.5 E +(LOCK_GET_TIMEOUT)108 182.4 Q 3.204(At)133 194.4 S .704(imeout ar)146.204 194.4 +R .704(gument w)-.18 F .705(as speci\214ed, and the requested locks were not a) +-.1 F -.25(va)-.2 G .705(ilable soon enough.).25 F .705(In this)5.705 F .625 +(case, if the)133 206.4 R F1(con\215ict)3.125 E F0(ar)3.125 E .624 +(gument is non-NULL, it is set to reference the identity of a lock)-.18 F .624 +(er holding the)-.1 F .551(lock referenced by)133 218.4 R F1(elistp)3.052 E F0 +.552(at the time the request w)3.052 F .552(as denied.)-.1 F .552 +(\(This identity resides in static memory)5.552 F(and may be o)133 230.4 Q -.15 +(ve)-.15 G(rwritten by subsequent calls to).15 E F1(loc)2.5 E(k_vec)-.2 E F0 +(\).).31 E(The)108 247.2 Q F1(loc)3.005 E -.1(ke)-.2 G(r).1 E F0(ar)3.005 E +.504(gument speci\214ed to)-.18 F F1(loc)3.004 E(k_vec)-.2 E F0 .504 +(is a pointer to an untyped byte string which identi\214es the entity)3.004 F +(requesting or releasing the lock.)108 259.2 Q(If)5 E F1(loc)2.5 E -.1(ke)-.2 G +(r).1 E F0(is NULL, the calling process' pid is used instead.)2.5 E(The)108 276 +Q F1(timeout)4.628 E F0(ar)4.628 E 2.128(gument pro)-.18 F 2.128(vided to)-.15 +F F1(loc)4.628 E(k_vec)-.2 E F0 2.128(speci\214es a maximum interv)4.628 F +2.128(al to w)-.25 F 2.128(ait for the locks to be)-.1 F 2.642(granted. If)108 +288 R F1(timeout)2.642 E F0 .142(is NULL, it is ignored, and)2.642 F F1(loc) +2.642 E(k_vec)-.2 E F0 .141 +(will not return until all of the locks are acquired or)2.642 F +(an error has occurred.)108 300 Q(The)108 316.8 Q F1(list)4.263 E F0 1.764 +(array pro)4.263 F 1.764(vided to)-.15 F F1(loc)4.264 E(k_vec)-.2 E F0 1.764 +(is typedef)4.264 F 2.764 -.5('d i).55 H 4.264(n<).5 G 1.764 +(db_lock.h> as LOCK_REQ_T)331.114 316.8 R 6.764(.A)-.74 G(LOCK_REQ_T)476.67 +316.8 Q(structure has at least the follo)108 328.8 Q +(wing \214elds, which must be initialized before calling)-.25 E F1(loc)2.5 E +(k_vec)-.2 E F0(:).31 E(enum lock)108 345.6 Q(op op;)-.1 E +(The operation to be performed, which must be set to one of the follo)133 357.6 +Q(wing v)-.25 E(alues:)-.25 E(LOCK_GET)133 374.4 Q .201 +(Get a lock, as de\214ned by the v)158 386.4 R .201(alues of)-.25 F F1(loc) +2.701 E -.1(ke)-.2 G(r).1 E F0(,).73 E F1(obj)2.701 E F0(and)2.7 E F1(loc)2.7 E +(k_mode)-.2 E F0 5.2(.U).18 G .2(pon return from)435.99 386.4 R F1(loc)2.7 E +(k_vec)-.2 E F0(,).31 E .161(if the)158 398.4 R F1(loc)2.661 E(kp)-.2 E F0 .162 +(\214eld is non-NULL, a reference to the acquired lock is stored there.)2.662 F +.162(\(This reference)5.162 F(is in)158 410.4 Q -.25(va)-.4 G(lidated by an).25 +E 2.5(yc)-.15 G(all to)247.19 410.4 Q F1(loc)2.5 E(k_vec)-.2 E F0(or)2.5 E F1 +(loc)2.5 E(k_put)-.2 E F0(which releases the lock.\))2.5 E(LOCK_PUT)133 427.2 Q +(The lock referenced by the contents of the)158 439.2 Q F1(loc)2.5 E(kp)-.2 E +F0(\214eld is released.)2.5 E(LOCK_PUT_ALL)133 456 Q .759 +(All locks held by the)158 468 R F1(loc)3.259 E -.1(ke)-.2 G(r).1 E F0 .759 +(are released.)3.259 F(\(An)5.759 E 3.259(yl)-.15 G .759 +(ocks acquired as a part of the current call to)358.501 468 R F1(loc)158 480 Q +(k_vec)-.2 E F0(are not considered for this operation\).)2.5 E(LOCK_PUT_OBJ)133 +496.8 Q 1.409(All locks held by the)158 508.8 R F1(loc)3.909 E -.1(ke)-.2 G(r) +.1 E F0 3.909(,o).73 G 3.909(nt)287.704 508.8 S 1.409(he object)299.393 508.8 R +F1(obj)3.909 E F0 3.909(,w).48 G 1.41(ith the mode speci\214ed by)367.98 508.8 +R F1(loc)3.91 E(k_mode)-.2 E F0 3.91(,a).18 G(re)532.23 508.8 Q 2.802 +(released. A)158 520.8 R F1(loc)2.802 E(k_mode)-.2 E F0 .301 +(of LOCK_NG indicates that all locks on the object should be released.)2.802 F +(\(An)158 532.8 Q 3.053(yl)-.15 G .553 +(ocks acquired as a part of the current call to)184.233 532.8 R F1(loc)3.054 E +(k_vec)-.2 E F0 .554(are not considered for this opera-)3.054 F(tion\).)158 +544.8 Q(const DBT obj;)108 561.6 Q +(An untyped byte string which speci\214es the object to be lock)133 573.6 Q +(ed or released.)-.1 E(const lock_mode_t lock_mode;)108 590.4 Q +(The lock mode, used as an inde)133 602.4 Q 2.5(xi)-.15 G(nto)268.94 602.4 Q F1 +(lt)2.5 E F0 1.1 -.55('s c).68 H(on\215ict array).55 E(.)-.65 E +(LOCK_T **lockp;)108 619.2 Q 2.5(Ap)133 631.2 S +(ointer to a pointer to a lock reference.)147.72 631.2 Q(The)108 648 Q F1 +(nlist)2.5 E F0(ar)2.5 E(gument speci\214es the number of elements in the)-.18 +E F1(list)2.5 E F0(array)2.5 E(.)-.65 E 1.229(The function)108 664.8 R F1(loc) +3.729 E(k_g)-.2 E(et)-.1 E F0 1.228(is a simple interf)3.728 F 1.228 +(ace to the)-.1 F F1(loc)3.728 E(k_vec)-.2 E F0(functionality)3.728 E 3.728(,a) +-.65 G 1.228(nd is equi)416.31 664.8 R -.25(va)-.25 G 1.228 +(lent to calling the).25 F F1(loc)108 676.8 Q(k_vec)-.2 E F0 .123 +(function with the)2.623 F F1(lt)2.623 E F0(and)2.623 E F1(loc)2.623 E -.1(ke) +-.2 G(r).1 E F0(ar)2.623 E .123(guments, NULL)-.18 F F1(timeout)2.623 E F0(,) +.68 E F1(elistp)2.623 E F0(and)2.623 E F1(con\215ict)2.623 E F0(ar)2.623 E .124 +(guments, and a sin-)-.18 F .944(gle element)108 688.8 R F1(list)3.444 E F0 +(array)3.444 E 3.444(,f)-.65 G .944(or which the)203.606 688.8 R F1(op)3.444 E +F0 .944(\214eld is LOCK_GET)3.444 F 3.444(,a)-.74 G .944(nd the)365.014 688.8 R +F1(obj)3.444 E F0(,).48 E F1(loc)3.444 E(k_mode)-.2 E F0(and)3.444 E F1(loc) +3.444 E(kp)-.2 E F0 .943(\214elds are)3.443 F(4.4 Berk)72 732 Q(ele)-.1 E 2.5 +(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(3)535 +732 Q EP +%%Page: 4 9 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 117.9(DB_LOCK\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 117.9(anual DB_LOCK\(3\))340.17 48 R .509(represented by the ar)108 +84 R .509(guments of the same name.)-.18 F .51(Note that the type of the)5.509 +F/F1 10/Times-Italic@0 SF(obj)3.01 E F0(ar)3.01 E .51(gument to)-.18 F F1(loc) +3.01 E(k_g)-.2 E(et)-.1 E F0 .51(is dif-)3.01 F .765(ferent from the)108 96 R +F1(obj)3.265 E F0 .765(element found in the LOCK_REQ_T structure.)3.265 F(The) +5.765 E F1(loc)3.265 E(k_g)-.2 E(et)-.1 E F0 .765(function returns success) +3.265 F(and f)108 108 Q(ailure as described for the)-.1 E F1(loc)2.5 E(k_vec) +-.2 E F0(function.)2.5 E 1.186(The function)108 124.8 R F1(loc)3.686 E(k_put) +-.2 E F0 1.187(is a simple interf)3.687 F 1.187(ace to the)-.1 F F1(loc)3.687 E +(k_vec)-.2 E F0(functionality)3.687 E 3.687(,a)-.65 G 1.187(nd is equi)416.515 +124.8 R -.25(va)-.25 G 1.187(lent to calling the).25 F F1(loc)108 136.8 Q +(k_vec)-.2 E F0 .374(function with a single element)2.874 F F1(list)2.874 E F0 +(array)2.873 E 2.873(,f)-.65 G .373(or which the)314.82 136.8 R F1(op)2.873 E +F0 .373(\214eld is LOCK_PUT and the)2.873 F F1(loc)2.873 E(kp)-.2 E F0(\214eld) +2.873 E .631(is represented by the ar)108 148.8 R .631 +(gument of the same name.)-.18 F .632(Note that the type of the)5.632 F F1(loc) +3.132 E(kp)-.2 E F0(ar)3.132 E .632(gument to)-.18 F F1(loc)3.132 E(k_put)-.2 E +F0(is)3.132 E(dif)108 160.8 Q .275(ferent from the)-.25 F F1(loc)2.775 E(kp)-.2 +E F0 .274(element found in the LOCK_REQ_T structure.)2.775 F(The)5.274 E F1 +(loc)2.774 E(k_put)-.2 E F0 .274(function returns suc-)2.774 F(cess and f)108 +172.8 Q(ailure as described for the)-.1 E F1(loc)2.5 E(k_vec)-.2 E F0 +(function.)2.5 E .013(The function)108 189.6 R F1(loc)2.513 E(k_close)-.2 E F0 +.013(disassociates the calling process from the lock table)2.513 F F1(lt)2.513 +E F0 2.513(,a).68 G .013(fter releasing all locks held)431.636 189.6 R .228 +(or requested by that process.)108 201.6 R .228(The function)5.228 F F1(loc) +2.728 E(k_close)-.2 E F0 .228(returns -1 on f)2.728 F .227(ailure, setting)-.1 +F F1(errno)2.727 E F0 2.727(,a).18 G .227(nd 0 on success.)474.329 201.6 R .433 +(The function)108 218.4 R F1(loc)2.933 E(k_unlink)-.2 E F0(destro)2.933 E .433 +(ys the lock table identi\214ed by the directory)-.1 F F1(path)2.933 E F0 2.933 +(,r).28 G(emo)440.636 218.4 Q .433(ving all \214les used to)-.15 F 1.005 +(implement the lock table.)108 230.4 R 1.005(\(The directory)6.005 F F1(path) +3.505 E F0 1.005(is not remo)3.505 F -.15(ve)-.15 G 3.505(d.\) If).15 F 1.005 +(there are processes which ha)3.505 F 1.305 -.15(ve c)-.2 H(alled).15 E F1(loc) +108 242.4 Q(k_open)-.2 E F0 .869(without calling)3.369 F F1(loc)3.369 E +(k_close)-.2 E F0 .869 +(\(i.e., there are processes currently using the lock table\),)3.369 F F1(loc) +3.37 E(k_unlink)-.2 E F0 .409(will f)108 254.4 R .408 +(ail without further action, unless the force \215ag is set, in which case)-.1 +F F1(loc)2.908 E(k_unlink)-.2 E F0 .408(will attempt to delete)2.908 F .807 +(the lock table \214les re)108 266.4 R -.05(ga)-.15 G .808(rdless of an).05 F +3.308(yp)-.15 G .808(rocesses still using the lock table.)264.662 266.4 R(An) +5.808 E 3.308(ya)-.15 G .808(ccesses to a remo)433.208 266.4 R -.15(ve)-.15 G +3.308(dl).15 G(ock)525.56 266.4 Q .046(table will lik)108 278.4 R .046 +(ely result in une)-.1 F .045(xpected beha)-.15 F(vior)-.2 E 5.045(.T)-.55 G +.045(he function)304.24 278.4 R F1(loc)2.545 E(k_unlink)-.2 E F0 .045 +(returns -1 on f)2.545 F .045(ailure, setting)-.1 F F1(errno)2.545 E F0(,).18 E +(and 0 on success.)108 290.4 Q .798(In the case of catastrophic or system f)108 +307.2 R .798(ailure, it is possible to clean up a lock table by remo)-.1 F .799 +(ving all of the)-.15 F .38(\214les in the directory speci\214ed to the)108 +319.2 R F1(loc)2.88 E(k_cr)-.2 E(eate)-.37 E F0 .379 +(function, as lock table \214les are ne)2.88 F -.15(ve)-.25 G 2.879(rc).15 G +.379(reated in an)461.543 319.2 R 2.879(yd)-.15 G(irec-)521.68 319.2 Q +(tory other than the one speci\214ed to)108 331.2 Q F1(loc)2.5 E(k_cr)-.2 E +(eate)-.37 E F0(.).18 E/F2 9/Times-Bold@0 SF(ERR)72 348 Q(ORS)-.27 E F0(The)108 +360 Q F1(loc)4.158 E(k_cr)-.2 E(eate)-.37 E F0 1.658(function may f)4.158 F +1.658(ail and set)-.1 F F1(errno)4.158 E F0 1.658(for an)4.158 F 4.158(yo)-.15 +G 4.158(ft)353.71 360 S 1.659(he errors speci\214ed for the library routines) +363.978 360 R F1(mmap)108 372 Q F0(\(2\),).19 E F1(open)2.5 E F0(\(2\) and).24 +E F1(malloc)2.5 E F0(\(3\).).31 E(The)108 388.8 Q F1(loc)4.692 E(k_open)-.2 E +F0 2.192(function may f)4.692 F 2.192(ail and set)-.1 F F1(errno)4.692 E F0 +2.192(for an)4.692 F 4.692(yo)-.15 G 4.692(ft)353.87 388.8 S 2.191 +(he errors speci\214ed for the library routine)364.672 388.8 R F1(mmap)108 +400.8 Q F0(\(2\) and).19 E F1(open)2.5 E F0(\(2\).).24 E(The)108 417.6 Q F1 +(loc)2.57 E(k_close)-.2 E F0 .07(function may f)2.57 F .07(ail and set)-.1 F F1 +(errno)2.57 E F0 .07(for an)2.57 F 2.57(yo)-.15 G 2.57(ft)333.76 417.6 S .07 +(he errors speci\214ed for the library routine)342.44 417.6 R F1(close)2.57 E +F0(\(2\)).18 E(and)108 429.6 Q F1(munmap)2.5 E F0(\(2\).).19 E(The)108 446.4 Q +F1(loc)4.071 E(k_unlink)-.2 E F0 1.571(function may f)4.071 F 1.571 +(ail and set)-.1 F F1(errno)4.071 E F0 1.571(for an)4.071 F 4.071(yo)-.15 G +4.07(ft)353.22 446.4 S 1.57(he errors speci\214ed for the library function) +363.4 446.4 R F1(unlink)108 458.4 Q F0(\(2\) or the follo).67 E(wing:)-.25 E +([EB)108 475.2 Q(USY])-.1 E(The lock table w)133 487.2 Q +(as in use and the force \215ag w)-.1 E(as not set.)-.1 E F2(SEE ALSO)72 504 Q +F1(db_btr)108 516 Q(ee)-.37 E F0(\(3\),).18 E F1(db_hash)2.5 E F0(\(3\),).28 E +F1(db_lo)2.5 E(g)-.1 E F0(\(3\),).22 E F1(db_mpool)2.5 E F0(\(3\),).51 E F1 +(db_open)2.5 E F0(\(3\),).24 E F1(db_r)2.5 E(ecno)-.37 E F0(\(3\),).18 E F1 +(db_txn)2.5 E F0(\(3\)).24 E F2 -.09(BU)72 532.8 S(GS).09 E F0(The)108 544.8 Q +F1(maxloc)2.656 E(ks)-.2 E F0 .156 +(parameter is a kluge, and should be deleted in f)2.656 F -.2(avo)-.1 G 2.657 +(ro).2 G 2.657(fd)381.055 544.8 S .157(ynamically e)392.042 544.8 R .157 +(xpanding the lock table.)-.15 F(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G +(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(4)535 732 Q EP +%%Page: 1 10 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_LOG\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_LOG\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 +84 S(ME).18 E F0(db_log \255 log-manager access method)108 96 Q F1(DESCRIPTION) +72 112.8 Q F0 .486(The DB library is a f)108 124.8 R .485 +(amily of groups of functions that pro)-.1 F .485 +(vides a modular programming interf)-.15 F .485(ace to trans-)-.1 F .822 +(actions and record-oriented \214le access.)108 136.8 R .822 +(The library includes support for transaction, locking, logging and)5.822 F +.258(\214le b)108 148.8 R(uf)-.2 E .258(fering functionality)-.25 F 2.758(,a) +-.65 G 2.758(sw)223.214 148.8 S .258(ell as v)237.082 148.8 R .258(arious inde) +-.25 F -.15(xe)-.15 G 2.758(da).15 G .258(ccess methods.)331.434 148.8 R(Man) +5.258 E 2.758(yo)-.15 G 2.758(ft)427.878 148.8 S .258 +(he functional groups \(e.g.)436.746 148.8 R .528(the memory pool functions\) \ +are useful independently of the rest of the DB functions, although some func-) +108 160.8 R .306(tional groups are e)108 172.8 R .306 +(xplicitly based on other functional groups \(e.g.)-.15 F .306 +(transactions and logging\).)5.306 F -.15(Fo)5.306 G 2.806(rag).15 G(eneral) +515.57 172.8 Q .245(description of transactions, see)108 184.8 R/F2 10 +/Times-Italic@0 SF(db_txn)2.745 E F0 2.745(\(3\). F).24 F .245 +(or a general description of the access methods, see)-.15 F F2(db_open)2.745 E +F0(\(3\)).24 E .308(and then the indi)108 196.8 R .308 +(vidual access method manual pages:)-.25 F F2(db_btr)2.807 E(ee)-.37 E F0 +(\(3\),).18 E F2(db_hash)2.807 E F0(\(3\),).28 E F2(db_lo)2.807 E(g)-.1 E F0 +.307(\(3\) and).22 F F2(db_r)2.807 E(ecno)-.37 E F0(\(3\).).18 E -.15(Fo)108 +208.8 S 3.635(rag).15 G 1.135(eneral description of the lock manager)138.45 +208.8 R 3.635(,s)-.4 G(ee)307.32 208.8 Q F2(db_loc)3.635 E(k)-.2 E F0 3.635 +(\(3\). F).67 F 1.135(or a general description of the memory)-.15 F +(pool manager)108 220.8 Q 2.5(,s)-.4 G(ee)171.2 220.8 Q F2(db_mpool)2.5 E F0 +(\(3\).).51 E +(This manual page describes speci\214c details of the logging access method.) +108 237.6 Q .03(These functions pro)108 254.4 R .03 +(vide a general-purpose logging f)-.15 F .03(acility suf)-.1 F .03 +(\214cient for transaction management.)-.25 F .03(Logs can)5.03 F +(be shared by multiple processes.)108 266.4 Q 3.717(Al)108 283.2 S 1.217 +(og is represented by the directory)121.717 283.2 R(,)-.65 E F2 1.217 +(not the \214le)3.717 F F0 3.717(,n).18 G 1.217(amed by the \214rst ar)323 +283.2 R 1.218(gument to)-.18 F F2(db_open)3.718 E F0 3.718(\(3\). The).24 F +(\214rst)3.718 E(ar)108 295.2 Q .26 +(gument must be non-NULL, and the directory must already e)-.18 F(xist)-.15 E +F2(db_open)2.76 E F0 .26(is called.)2.76 F .26(In that directory)5.26 F 2.76 +(,t)-.65 G(he)530.56 295.2 Q 3.448 +(log is stored in one or more \214les named in the format `)108 307.2 R +(`log.YYYY)-.74 E(.MM.DD.HH.MM.SS')-1.29 E 3.448(', where)-.74 F -.74(``)108 +319.2 S(YYYY).74 E(.MM.DD.HH.SS')-1.29 E 2.507('i)-.74 G 2.507(st)220.497 319.2 +S .007(he approximate creation time of the log \214le, and is guaranteed to be\ + unique in)229.674 319.2 R(the directory)108 331.2 Q(.)-.65 E .465 +(The group of the created \214les is based on the system and directory def)108 +348 R .466(aults, and is not further speci\214ed by)-.1 F .073 +(the log access method.)108 360 R .072(All \214les are created with the)5.073 F +F2(mode)2.572 E F0 .072(speci\214ed to)2.572 F F2(db_open)2.572 E F0 2.572(,\() +.24 G .072(as described in)435.584 360 R F2 -.15(ch)2.572 G(mod).15 E F0 +(\(2\)\)).77 E(and modi\214ed by the process' umask v)108 372 Q(alue \(see)-.25 +E F2(umask)2.5 E F0(\(2\)\).).67 E(The)108 388.8 Q F2<8d61>2.5 E(gs)-.1 E F0 +(ar)2.5 E(gument to)-.18 E F2(db_open)2.5 E F0(must be 0 for the)2.5 E F2 +(db_lo)2.5 E(g)-.1 E F0(access method.)2.5 E F1 -.495(AC)72 405.6 S +(CESS METHOD SPECIFIC INFORMA).495 E(TION)-.855 E F0 .571 +(The log access method speci\214c data structure pro)108 417.6 R .571(vided to) +-.15 F F2(db_open)3.071 E F0 .572(is typedef)3.071 F 1.572 -.5('d a).55 H .572 +(nd named LOGINFO.).5 F(A)5.572 E(LOGINFO structure has at least the follo)108 +429.6 Q(wing \214elds, which may be initialized before calling)-.25 E F2 +(db_open)2.5 E F0(:).24 E(of)108 446.4 Q(f_t max_\214le_size;)-.25 E 1.585 +(The maximum size of a single \214le in the log.)133 458.4 R 1.584 +(If not speci\214ed, the maximum size def)6.584 F 1.584(aults to an)-.1 F +(implementation-speci\214c v)133 470.4 Q(alue.)-.25 E(int lorder;)108 487.2 Q +.65(The byte order for inte)133 499.2 R .65 +(gers in the stored database metadata.)-.15 F .65 +(The number should represent the order)5.65 F .749(as an inte)133 511.2 R .749 +(ger; for e)-.15 F .749(xample, big endian order w)-.15 F .749 +(ould be the number 4,321.)-.1 F(If)5.749 E F2(lor)3.249 E(der)-.37 E F0 .749 +(is 0 \(no order is)3.249 F(speci\214ed\) the current host order is used.)133 +523.2 Q 1.284(If the log already e)108 540 R 1.284(xists, the v)-.15 F 1.285(a\ +lues speci\214ed for the parameters max_\214le_size and lorder are ignored in) +-.25 F -.1(fa)108 552 S -.2(vo)-.1 G 2.5(ro).2 G 2.5(ft)136.1 552 S(he v)144.71 +552 Q(alues used when the log w)-.25 E(as created.)-.1 E F1(DB OPERA)72 568.8 Q +(TIONS)-.855 E F0 .687(The data part of the k)108 580.8 R -.15(ey)-.1 G .686(/\ +data pair used by the log access method is the same as for other access method\ +s.).15 F .837(The k)108 592.8 R 1.137 -.15(ey i)-.1 H 3.337(sd).15 G(if)159.421 +592.8 Q 3.337(ferent. Each)-.25 F .837(log record is identi\214ed by a log seq\ +uence number \(LSN\), which is stored in a)3.337 F(DBT)108 604.8 Q 2.702(,a) +-.74 G .202(nd which is used as the)136.902 604.8 R F2 -.1(ke)2.702 G(y)-.2 E +F0 .202(for all log functions that tak)2.702 F(e)-.1 E F2 -.1(ke)2.701 G(y)-.2 +E F0(ar)2.701 E 2.701(guments. Applications)-.18 F .201(cannot create)2.701 F +(LSN')108 616.8 Q .539(s, and all LSN')-.55 F 3.039(sp)-.55 G(ro)203.216 616.8 +Q .539(vided to functions as ar)-.15 F .539(guments must \214rst be retrie)-.18 +F -.15(ve)-.25 G 3.04(du).15 G .54(sing the)440.37 616.8 R F2(put)3.04 E F0(or) +3.04 E F2(seq)3.04 E F0(func-)3.04 E 2.783(tions. T)108 628.8 R 2.783(op)-.8 G +(ro)153.326 628.8 Q .283(vide a distinguished v)-.15 F .282 +(alue for applications, it is guaranteed that no v)-.25 F .282(alid LSN will e) +-.25 F -.15(ve)-.25 G 2.782(rh).15 G -2.25 -.2(av e)519.248 628.8 T(a)2.982 E +(size of 0.)108 640.8 Q(Applications can compare LSN')108 657.6 Q 2.5(su)-.55 G +(sing the)247.98 657.6 Q F2(lo)2.5 E(g_lsn_compar)-.1 E(e)-.37 E F0 +(function \(see belo)2.5 E(w\).)-.25 E .429(Applications can associate LSN')108 +674.4 R 2.929(sw)-.55 G .429(ith speci\214c log \214les.)253.586 674.4 R .429 +(The function)5.429 F F2(lo)2.929 E(g_lsn_\214le)-.1 E F0 .43(\(see belo)2.93 F +.43(w\), returns the)-.25 F .214 +(name of the log \214le containing the record with a speci\214ed LSN.)108 686.4 +R .214(\(The mapping of LSN to \214le is needed for)5.214 F(4.4 Berk)72 732 Q +(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(3, 1995) +2.5 E(1)535 732 Q EP +%%Page: 2 11 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_LOG\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_LOG\(3\))340.17 48 R .397(database administration.) +108 84 R -.15(Fo)5.397 G 2.897(re).15 G .398 +(xample, a transaction manager typically records the earliest LSN needed for) +231.931 84 R .519(restart, and the database administrator may w)108 96 R .519 +(ant to archi)-.1 F .819 -.15(ve l)-.25 H .519(og \214les to tape when the).15 +F 3.018(yc)-.15 G .518(ontain only LSN')465.624 96 R(s)-.55 E +(before the earliest one needed for restart.\))108 108 Q +(Applications can truncate the log \214le up to a speci\214c LSN using the)108 +124.8 Q/F1 10/Times-Italic@0 SF(lo)2.5 E(g_trunc)-.1 E F0(function \(see belo) +2.5 E(w\).)-.25 E .221(The functions returned by)108 141.6 R F1(db_open)2.721 E +F0 .221(for the log access method are as described in)2.721 F F1(db_open)2.721 +E F0 2.722(,w).24 G .222(ith the follo)482.586 141.6 R(w-)-.25 E(ing e)108 +153.6 Q(xceptions and additions:)-.15 E 5.28(type The)108 170.4 R +(type is DB_LOG.)2.5 E 10.28(del The)108 187.2 R F1(del)3.505 E F0 1.005 +(function al)3.505 F -.1(wa)-.1 G 1.005 +(ys returns an error for the log-manager access method, setting).1 F F1(errno) +3.504 E F0 1.004(to EIN-)3.504 F -1.35(VA)133 199.2 S(L.)1.35 E +(int \(*log_\215ush\)\(const DB *db, const DBT *lsn\);)108 216 Q(The)133 228 Q +F1(lo)2.866 E(g_\215ush)-.1 E F0 .367 +(function \215ushes the log up to and including the log record)2.866 F F1(lsn) +2.867 E F0 5.367(.T).24 G .367(he function)454.926 228 R F1(lo)2.867 E +(g_\215ush)-.1 E F0(returns -1 on f)133 240 Q(ailure, setting)-.1 E F1(errno) +2.5 E F0 2.5(,a).18 G(nd 0 on success.)278.61 240 Q +(int \(*log_lsn_compare\)\(const DB *,)108 256.8 Q .255 +(const DBT *lsn1, const DBT *lsn2\); A pointer to a function which is pro)183 +268.8 R .255(vided to permit)-.15 F .312(applications to compare LSN')133 280.8 +R 2.812(s. The)-.55 F F1(lo)2.812 E(g_lsn_compar)-.1 E(e)-.37 E F0 .312 +(function returns an inte)2.812 F .313(ger less than, equal to,)-.15 F .058 +(or greater than zero if the \214rst LSN is considered to be respecti)133 292.8 +R -.15(ve)-.25 G .058(ly less than, equal to, or greater than).15 F +(the second LSN.)133 304.8 Q(int \(*log_lsn_\214le\)\(const DB *db,)108 321.6 Q +(const DBT *lsn, char *name\);)183 333.6 Q(The)133 345.6 Q F1(lo)3.21 E +(g_lsn_\214le)-.1 E F0 .71 +(function stores a pointer to the name of the \214le containing)3.21 F F1(lsn) +3.211 E F0 .711(in the address refer)3.211 F(-)-.2 E .293(enced by)133 357.6 R +F1(name)2.793 E(.)-.15 E F0 .293(This pointer is to an internal static object,\ + and subsequent calls to the same function)5.293 F +(will modify the same object.)133 369.6 Q(The function)133 386.4 Q F1(lo)2.5 E +(g_lsn_\214le)-.1 E F0(returns -1 on f)2.5 E(ailure, setting)-.1 E F1(errno)2.5 +E F0 2.5(,a).18 G(nd 0 on success.)381.56 386.4 Q +(int \(*log_unlink\)\(const char *path, int force\);)108 403.2 Q(The)133 415.2 +Q F1(lo)3.275 E(g_unlink)-.1 E F0 .775(function destro)3.275 F .775 +(ys the log represented by)-.1 F F1(path)3.275 E F0 5.775(.I).28 G 3.275(ft) +394.745 415.2 S(he)404.13 415.2 Q F1(for)3.275 E(ce)-.37 E F0 .776 +(parameter is not set to 1)3.275 F .725 +(and there are other processes using the log, then)133 427.2 R F1(lo)3.224 E +(g_unlink)-.1 E F0 .724(will return -1, setting)3.224 F F1(errno)3.224 E F0 +.724(to EB)3.224 F(USY)-.1 E(.)-1.29 E(If)133 439.2 Q F1(for)2.831 E .331 +(ce is not set or ther)-.37 F 2.831(ea)-.37 G 1.071 -.37(re n)244.287 439.2 T +2.831(op).37 G -.45(ro)272.909 439.2 S .331(cesses using the lo).45 F .532 -.1 +(g, t)-.1 H .332(hen all \214les).1 F F0 .332(used by the log are destro)2.832 +F(yed.)-.1 E F1(lo)133 451.2 Q(g_unlink)-.1 E F0(will return -1 on f)2.5 E +(ailure, setting)-.1 E F1(errno)2.5 E F0 2.5(,a).18 G(nd 0 on success.)337.96 +451.2 Q(int \(*log_trunc\)\(const DB *db, const DBT *lsn\);)108 468 Q(The)133 +480 Q F1(lo)2.601 E(g_trunc)-.1 E F0 .101 +(function truncates the log up to an LSN which is less than)2.601 F F1(lsn)2.6 +E F0 5.1(.T).24 G .1(he function)453.24 480 R F1(lo)2.6 E(g_trunc)-.1 E F0 +(returns -1 on f)133 492 Q(ailure, setting)-.1 E F1(errno)2.5 E F0 2.5(,a).18 G +(nd 0 on success.)278.61 492 Q 9.72(put A)108 508.8 R .339 +(log record containing)2.839 F F1(data)2.839 E F0 .339(is appended to the log.) +2.839 F(Unlik)5.339 E 2.84(et)-.1 G(he)382.44 508.8 Q F1(put)2.84 E F0 .34 +(functions for other access meth-)2.84 F .789(ods, the k)133 520.8 R 1.089 -.15 +(ey p)-.1 H .788(arameter is not initialized by the application, instead, the \ +LSN assigned to the data is).15 F(returned in the)133 532.8 Q F1 -.1(ke)2.5 G +(y)-.2 E F0(parameter)2.5 E(.)-.55 E 1.157(The caller is responsible for pro) +133 549.6 R 1.157(viding an)-.15 F 3.657(yn)-.15 G 1.157(ecessary structure to) +318.267 549.6 R F1 1.157(data .)3.657 F F0(\(F)6.157 E 1.157(or e)-.15 F 1.157 +(xample, in a write-)-.15 F .267 +(ahead logging protocol, the application must understand what part of)133 561.6 +R F1(data)2.767 E F0 .266(is an operation code, what)2.766 F .622 +(part is redo information, and what part is undo information.)133 573.6 R .622 +(In addition, most transaction managers)5.622 F .985(will store in)133 585.6 R +F1(data)3.485 E F0 .985(the LSN of the pre)3.485 F .984 +(vious log record for the same transaction, to support chaining)-.25 F +(back through the transaction')133 597.6 Q 2.5(sl)-.55 G +(og records during undo.\))258.54 597.6 Q(The parameter)133 614.4 Q F1<8d61>2.5 +E(g)-.1 E F0(must be set to 0 or e)2.5 E(xactly one of the follo)-.15 E(wing v) +-.25 E(alues:)-.25 E(R_CHECKPOINT)133 631.2 Q .5(Specify the k)158 643.2 R -.15 +(ey)-.1 G .5(/data pair of the current call as the one to be returned when the) +.15 F F1(seq)3 E F0 .5(function is)3 F(ne)158 655.2 Q +(xt called with the R_CHECKPOINT \215ag.)-.15 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5 +(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(3, 1995)2.5 E(2)535 +732 Q EP +%%Page: 3 12 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_LOG\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_LOG\(3\))340.17 48 R(R_FLUSH)133 84 Q +(Flush immediately \(ignoring an)158 96 Q 2.5(yp)-.15 G +(ossibility for group commit\).)296.74 96 Q 9.17(seq The)108 112.8 R/F1 10 +/Times-Italic@0 SF(seq)2.5 E F0(function tak)2.5 E(es the follo)-.1 E +(wing additional \215ag:)-.25 E(R_CHECKPOINT)133 129.6 Q .184(The last k)158 +141.6 R -.15(ey)-.1 G .184(/data pair stored by the).15 F F1(put)2.684 E F0 +.183(function \(using the R_CHECKPOINT \215ag\) is returned,)2.684 F .216 +(and the cursor is set or initialized to reference it.)158 153.6 R .216(The e) +5.216 F .216(xpected use of this \215ag is during restart)-.15 F .801 +(and to determine what part of the log must be a)158 165.6 R -.25(va)-.2 G .801 +(ilable for restart.).25 F .801(Therefore, the log record)5.801 F(retrie)158 +177.6 Q -.15(ve)-.25 G 3.352(dw).15 G .853 +(ith R_CHECKPOINT should contain all the information that the transaction man-) +203.712 177.6 R(ager will need for this purpose.)158 189.6 Q 4.17(sync The)108 +206.4 R F1(sync)3.135 E F0 .635(function al)3.135 F -.1(wa)-.1 G .635 +(ys returns an error for the log-manager access method, setting).1 F F1(errno) +3.134 E F0 .634(to EIN-)3.134 F -1.35(VA)133 218.4 S(L.)1.35 E/F2 9 +/Times-Bold@0 SF(SEE ALSO)72 235.2 Q F1(db_btr)108 247.2 Q(ee)-.37 E F0(\(3\),) +.18 E F1(db_hash)2.5 E F0(\(3\),).28 E F1(db_loc)2.5 E(k)-.2 E F0(\(3\),).67 E +F1(db_mpool)2.5 E F0(\(3\),).51 E F1(db_open)2.5 E F0(\(3\),).24 E F1(db_r)2.5 +E(ecno)-.37 E F0(\(3\),).18 E F1(db_txn)2.5 E F0(\(3\)).24 E(4.4 Berk)72 732 Q +(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(3, 1995) +2.5 E(3)535 732 Q EP +%%Page: 1 13 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 110.12(DB_MPOOL\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 110.12(anual DB_MPOOL\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA) +72 84 S(ME).18 E F0(db_mpool \255 general purpose shared memory b)108 96 Q(uf) +-.2 E(fer pool)-.25 E F1(SYNOPSIS)72 112.8 Q/F2 10/Times-Bold@0 SF +(#include <db)108 124.8 Q(.h>)-.4 E(#include <mpool.h>)108 136.8 Q(int)108 +160.8 Q(mpool_cr)108 172.8 Q +(eate\(char *path, mode_t mode, size_t cachesize, u_long \215ags\);)-.18 E +(MPOOL *)108 196.8 Q(mpool_open\(char *path\);)108 208.8 Q(int)108 232.8 Q +(mpool_close\(MPOOL *mp\);)108 244.8 Q(MPOOLFILE *)108 268.8 Q(mpool_f)108 +280.8 Q(open\(MPOOL *mp, char *path, size_t pagesize, v)-.25 E(oid *pgcookie,) +-.1 E(int \(*pgin\)\(MPOOLFILE *mpf)158 292.8 Q(,)-.15 E(pgno_t pgno, v)188 +304.8 Q(oid *pgaddr)-.1 E 2.5(,v)-.92 G(oid *pgcookie\),)311.91 304.8 Q +(int \(*pgout\)\(MPOOLFILE *mpf)158 316.8 Q(,)-.15 E(pgno_t pgno, v)188 328.8 Q +(oid *pgaddr)-.1 E 2.5(,v)-.92 G(oid *pgcookie\);)311.91 328.8 Q(int)108 352.8 +Q(mpool_fclose\(MPOOLFILE *mpf\);)108 364.8 Q -.1(vo)108 388.8 S(id *).1 E +(mpool_get\(MPOOLFILE *mpf)108 400.8 Q 2.5(,p)-.15 G(gno_t *pgnoaddr)252.02 +400.8 Q 2.5(,u)-.92 G(_long \215ags,)334.73 400.8 Q +(int \(*callback\)\(MPOOLFILE *mpf)158 412.8 Q 2.5(,p)-.15 G(gno_t pgno\)\);) +318.97 412.8 Q(int)108 436.8 Q(mpool_put\(MPOOLFILE *mpf)108 448.8 Q 2.5(,v) +-.15 G(oid *pgaddr)253.04 448.8 Q 2.5(,u)-.92 G(_long \215ags\);)314.64 448.8 Q +(int)108 472.8 Q(mpool_sync\(MPOOLFILE *mpf\);)108 484.8 Q(int)108 508.8 Q +(mpool_unlink\(const char *path, int f)108 520.8 Q(or)-.25 E(ce\);)-.18 E -.1 +(vo)108 544.8 S(id).1 E(mpool_stat\(MPOOL *mp, FILE *fp\);)108 556.8 Q F1 +(DESCRIPTION)72 573.6 Q F0 .485(The DB library is a f)108 585.6 R .485 +(amily of groups of functions that pro)-.1 F .486 +(vides a modular programming interf)-.15 F .486(ace to trans-)-.1 F .823 +(actions and record-oriented \214le access.)108 597.6 R .822 +(The library includes support for transaction, locking, logging and)5.822 F +.258(\214le b)108 609.6 R(uf)-.2 E .258(fering functionality)-.25 F 2.758(,a) +-.65 G 2.758(sw)223.214 609.6 S .258(ell as v)237.082 609.6 R .258(arious inde) +-.25 F -.15(xe)-.15 G 2.758(da).15 G .258(ccess methods.)331.434 609.6 R(Man) +5.258 E 2.758(yo)-.15 G 2.758(ft)427.878 609.6 S .258 +(he functional groups \(e.g.)436.746 609.6 R .528(the memory pool functions\) \ +are useful independently of the rest of the DB functions, although some func-) +108 621.6 R .306(tional groups are e)108 633.6 R .306 +(xplicitly based on other functional groups \(e.g.)-.15 F .306 +(transactions and logging\).)5.306 F -.15(Fo)5.306 G 2.806(rag).15 G(eneral) +515.57 633.6 Q .245(description of transactions, see)108 645.6 R/F3 10 +/Times-Italic@0 SF(db_txn)2.745 E F0 2.745(\(3\). F).24 F .245 +(or a general description of the access methods, see)-.15 F F3(db_open)2.745 E +F0(\(3\)).24 E .307(and then the indi)108 657.6 R .307 +(vidual access method manual pages:)-.25 F F3(db_btr)2.808 E(ee)-.37 E F0 +(\(3\),).18 E F3(db_hash)2.808 E F0(\(3\),).28 E F3(db_lo)2.808 E(g)-.1 E F0 +.308(\(3\) and).22 F F3(db_r)2.808 E(ecno)-.37 E F0(\(3\).).18 E -.15(Fo)108 +669.6 S 3.635(rag).15 G 1.135(eneral description of the lock manager)138.45 +669.6 R 3.635(,s)-.4 G(ee)307.32 669.6 Q F3(db_loc)3.635 E(k)-.2 E F0 3.635 +(\(3\). F).67 F 1.135(or a general description of the memory)-.15 F +(pool manager)108 681.6 Q 2.5(,s)-.4 G(ee)171.2 681.6 Q F3(db_mpool)2.5 E F0 +(\(3\).).51 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q +99.315(ution August)-.2 F(1, 1995)2.5 E(1)535 732 Q EP +%%Page: 2 14 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 110.12(DB_MPOOL\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 110.12(anual DB_MPOOL\(3\))340.17 48 R +(This manual page describes speci\214c details of the memory pool interf)108 84 +Q(ace.)-.1 E(The)108 100.8 Q/F1 10/Times-Italic@0 SF(db_mpool)3.682 E F0 1.182 +(function is the library interf)3.682 F 1.183(ace intended to pro)-.1 F 1.183 +(vide general-purpose, page-oriented b)-.15 F(uf)-.2 E(fer)-.25 E .16 +(management of one or more \214les.)108 112.8 R .16(While designed to w)5.16 F +.16(ork with the other DB functions, these functions are)-.1 F .604 +(also useful for more general purposes.)108 124.8 R .604 +(The memory pools \(MPOOL)5.604 F -.55('s)-.92 G 3.104(\)a).55 G .605 +(re referred to in this document as)404.18 124.8 R .985(simply `)108 136.8 R +(`pools')-.74 E 3.485('. Pools)-.74 F .985(may be shared between processes.) +3.485 F .985(Pools are usually \214lled by pages from one or)5.985 F .673 +(more \214les \(MPOOLFILE')108 148.8 R 3.173(s\). P)-.55 F .674 +(ages in the pool are replaced in LR)-.15 F 3.174(U\()-.4 G .674 +(least-recently-used\) order)392.318 148.8 R 3.174(,w)-.4 G .674(ith each) +507.946 148.8 R(ne)108 160.8 Q 4.243(wp)-.25 G 1.743 +(age replacing the page which has been unused the longest.)133.653 160.8 R -.15 +(Pa)6.742 G 1.742(ges retrie).15 F -.15(ve)-.25 G 4.242(df).15 G 1.742 +(rom the pool using)459.494 160.8 R F1(mpool_g)108 172.8 Q(et)-.1 E F0 1.255 +(are `)3.755 F(`pinned')-.74 E 3.755('i)-.74 G 3.755(nm)215.435 172.8 S(emory) +231.97 172.8 Q 3.755(,b)-.65 G 3.755(yd)268.125 172.8 S(ef)281.88 172.8 Q 1.256 +(ault, until the)-.1 F 3.756(ya)-.15 G 1.256(re returned to the pool using the) +358.168 172.8 R F1(mpool_put)3.756 E F0(function.)108 184.8 Q .934 +(The function)108 201.6 R F1(mpool_cr)3.434 E(eate)-.37 E F0 .934 +(creates and initializes the memory pool identi\214ed by the)3.434 F F1(path) +3.433 E F0(directory)3.433 E 5.933(.T)-.65 G(his)528.33 201.6 Q .931 +(directory must already e)108 213.6 R .931(xist when)-.15 F F1(mpool_cr)3.431 E +(eate)-.37 E F0 .931(is called.)3.431 F .932 +(If the memory pool identi\214ed by)5.931 F F1(path)3.432 E F0(already)3.432 E +-.15(ex)108 225.6 S .045(ists, then).15 F F1(mpool_cr)2.545 E(eate)-.37 E F0 +.045(returns success without further action.)2.545 F .045 +(The \214les associated with the memory pool)5.045 F .87 +(are created in the directory speci\214ed by)108 237.6 R F1(path)3.37 E F0 5.87 +(.\().28 G .87(The group of the created \214les is based on the system and) +304.08 237.6 R .258(directory def)108 249.6 R .258 +(aults, and is not further speci\214ed by)-.1 F F1(mpool_cr)2.758 E(eate)-.37 E +F0 2.758(.\) All).18 F .258(\214les created by)2.758 F F1(mpool_cr)2.758 E +(eate)-.37 E F0 .258(are cre-)2.758 F .048(ated with mode)108 261.6 R F1(mode) +2.548 E F0 .049(\(as described in)2.548 F F1 -.15(ch)2.549 G(mod).15 E F0 .049 +(\(2\)\) and modi\214ed by the process' umask v).77 F .049(alue \(see)-.25 F F1 +(umask)2.549 E F0(\(2\)\).).67 E(The)108 278.4 Q F1(cac)2.544 E(hesize)-.15 E +F0(ar)2.544 E .044(gument speci\214es the size of the pool in bytes, and shoul\ +d be the size of the normal w)-.18 F(orking)-.1 E .509(set of the application \ +with some small amount of additional memory for unusual situations.)108 290.4 R +.509(If the number)5.509 F .362(of bytes currently `)108 302.4 R(`pinned')-.74 +E 2.862('i)-.74 G 2.862(nm)226.828 302.4 S .362(emory e)242.47 302.4 R(xceeds) +-.15 E F1(cac)2.861 E(hesize)-.15 E F0 2.861(,t).18 G(he)351.734 302.4 Q F1 +(db_mpool)2.861 E F0 .361(functions will attempt to allocate)2.861 F +(more memory and do not necessarily f)108 314.4 Q(ail, although the)-.1 E 2.5 +(ym)-.15 G(ay suf)341.61 314.4 Q(fer performance de)-.25 E(gradation.)-.15 E +(The)108 331.2 Q F1<8d61>2.5 E(gs)-.1 E F0(ar)2.5 E(gument is set by)-.18 E F1 +(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)272.73 331.2 S(he follo)281.34 +331.2 Q(wing v)-.25 E(alues:)-.25 E(MPOOL_PRIV)108 348 Q -1.11(AT)-1.35 G(E) +1.11 E(The pool is not shared by other processes or threads, so no locking of \ +pool resources is required.)144 360 Q .115(The function)108 376.8 R F1 +(mpool_open)2.615 E F0 .115 +(returns a pointer to the memory pool identi\214ed by)2.615 F F1(path)2.615 E +F0 2.615(,w).28 G .115(hich must ha)447.525 376.8 R .415 -.15(ve a)-.2 H +(lready).15 E .036(been created by a call to)108 388.8 R F1(mpool_cr)2.536 E +(eate)-.37 E F0 5.036(.T).18 G .036(he process must ha)276.074 388.8 R .336 +-.15(ve p)-.2 H .036(ermission to read and write \214les with o).15 F(wn-)-.25 +E 1.157(ers, groups and permissions as described for)108 400.8 R F1(mpool_cr) +3.657 E(eate)-.37 E F0 6.157(.T).18 G(he)365.075 400.8 Q F1(mpool_open)3.657 E +F0 1.157(function returns NULL on)3.657 F -.1(fa)108 412.8 S(ilure, setting).1 +E F1(errno)2.5 E F0(.).18 E(The)108 429.6 Q F1(mpool_close)6.383 E F0 3.883 +(function closes the pool indicated by the MPOOL pointer)6.383 F F1(mp)6.383 E +F0 6.383(,a).19 G 6.382(sr)480.026 429.6 S 3.882(eturned by)493.628 429.6 R F1 +(mpool_open)108 441.6 Q F0 5.047(.T).24 G .047(his function does)171.337 441.6 +R/F2 10/Times-Bold@0 SF(not)2.547 E F0 .047(imply a call to)2.547 F F1 +(mpool_sync)2.547 E F0 .047(\(or to)2.547 F F1(mpool_fclose)2.547 E F0 2.547 +(\)i).18 G .047(.e. no pages are writ-)455.951 441.6 R .404 +(ten to the source \214le as as a result of calling)108 453.6 R F1(mpool_close) +2.904 E F0 5.404(.T).18 G .404(he function)354.658 453.6 R F1(mpool_close)2.904 +E F0 .403(returns -1 on f)2.904 F(ailure,)-.1 E(setting)108 465.6 Q F1(errno) +2.5 E F0 2.5(,a).18 G(nd 0 on success.)169.01 465.6 Q .827(The function)108 +482.4 R F1(mpool_fopen)3.327 E F0 .827(opens a \214le for b)3.327 F(uf)-.2 E +.828(fering in the pool speci\214ed by the MPOOL ar)-.25 F 3.328(gument. The) +-.18 F F1(path)108 494.4 Q F0(ar)2.85 E .349 +(gument is the name of the \214le to be opened.)-.18 F(The)5.349 E F1(pa)2.849 +E -.1(ge)-.1 G(size).1 E F0(ar)2.849 E .349 +(gument is the size, in bytes, of the unit)-.18 F .738(of transfer between the\ + application and the pool, although not necessarily the unit of transfer betwe\ +en the)108 506.4 R .12(pool and the source \214le.)108 518.4 R .12 +(Applications not kno)5.12 F .12 +(wing the page size of the source \214le should retrie)-.25 F .42 -.15(ve t) +-.25 H .12(he meta-).15 F .234(data from the \214le using a page size that is \ +correct for the metadata, then close and reopen the \214le, or)108 530.4 R +2.735(,o)-.4 G(ther)521.32 530.4 Q(-)-.2 E +(wise determine the page size before calling)108 542.4 Q F1(mpool_fopen)2.5 E +F0(.).24 E .416(If the)108 559.2 R F1(pgin)2.916 E F0 .416(function is speci\ +\214ed, it is called each time a page is read into the memory pool from the so\ +urce)2.916 F 2.835(\214le. If)108 571.2 R(the)2.835 E F1(pgout)2.835 E F0 .336 +(function is speci\214ed, it is called each time a page is written to the sour\ +ce \214le.)2.835 F .336(Both func-)5.336 F .834 +(tions are called with the MPOOLFILE pointer returned from)108 583.2 R F1 +(mpool_fopen)3.333 E F0 3.333(,t).24 G .833(he page number)421.815 583.2 R +3.333(,ap)-.4 G .833(ointer to)505.557 583.2 R .014 +(the page being read or written, and the ar)108 595.2 R(gument)-.18 E F1 +(pgcookie)2.515 E F0 5.015(.I).18 G 2.515(fe)351.695 595.2 S .015 +(ither function f)361.98 595.2 R .015(ails, it should return non-zero)-.1 F +(and set)108 607.2 Q F1(errno)2.5 E F0 2.5(,i).18 G 2.5(nw)168.73 607.2 S +(hich case the)183.45 607.2 Q F1(db_mpool)2.5 E F0 +(function calling it will also f)2.5 E(ail, lea)-.1 E(ving)-.2 E F1(errno)2.5 E +F0(intact.)2.5 E(The)108 624 Q F1(mpool_fclose)2.705 E F0 .204 +(function closes the source \214le indicated by the MPOOLFILE pointer)2.705 F +F1(mpf)2.704 E F0 5.204(.T)1.96 G .204(his function)492.296 624 R(does)108 636 +Q F2(not)3.615 E F0 1.115(imply a call to)3.615 F F1(mpool_sync)3.615 E F0 +3.615(,i).31 G 1.115 +(.e. no pages are written to the source \214le as as a result of calling) +268.885 636 R F1(mpool_fclose)108 648 Q F0 5(.T).18 G(he function)175.12 648 Q +F1(mpool_fclose)2.5 E F0(returns -1 on f)2.5 E(ailure, setting)-.1 E F1(errno) +2.5 E F0 2.5(,a).18 G(nd 0 on success.)424.33 648 Q .019(The function)108 664.8 +R F1(mpool_g)2.519 E(et)-.1 E F0 .019 +(returns a pointer to the page with the page number speci\214ed by)2.519 F F1 +(pgnoaddr)2.518 E F0 2.518(,f).73 G .018(rom the)509.152 664.8 R .986 +(source \214le speci\214ed by the MPOOLFILE pointer)108 676.8 R F1(mpf)3.486 E +F0 5.986(.I)1.96 G 3.486(ft)342.268 676.8 S .987(he page does not e)351.864 +676.8 R .987(xist or cannot be retrie)-.15 F -.15(ve)-.25 G(d,).15 E F1 +(mpool_g)108 688.8 Q(et)-.1 E F0(returns NULL and sets errno.)2.5 E(4.4 Berk)72 +732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F +(1, 1995)2.5 E(2)535 732 Q EP +%%Page: 3 15 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 110.12(DB_MPOOL\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 110.12(anual DB_MPOOL\(3\))340.17 48 R(The)108 84 Q/F1 10 +/Times-Italic@0 SF<8d61>2.5 E(gs)-.1 E F0(ar)2.5 E(gument is set by)-.18 E F1 +(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)272.73 84 S(he follo)281.34 84 +Q(wing v)-.25 E(alues:)-.25 E(MPOOL_CALLB)108 100.8 Q -.4(AC)-.35 G(K).4 E 1.04 +(After the page number has been determined, b)133 112.8 R 1.04(ut before an)-.2 +F 3.54(yo)-.15 G 1.04(ther process or thread can access the)388.26 112.8 R .471 +(page, the function speci\214ed by the)133 124.8 R F1(callbac)2.971 E(k)-.2 E +F0(ar)2.971 E .471(gument is called.)-.18 F .471(If the function f)5.471 F .472 +(ails, it should return)-.1 F 1.11(non-zero and set)133 136.8 R F1(errno)3.61 E +F0 3.61(,i).18 G 3.61(nw)236.21 136.8 S 1.11(hich case)252.04 136.8 R F1 +(mpool_g)3.61 E(et)-.1 E F0 1.11(will also f)3.61 F 1.11(ail, lea)-.1 F(ving) +-.2 E F1(errno)3.61 E F0 3.61(intact. The)3.61 F F1(callbac)3.61 E(k)-.2 E F0 +1.012(function is called with the MPOOLFILE pointer returned from)133 148.8 R +F1(mpool_fopen)3.512 E F0 1.013(and the page number)3.513 F(.)-.55 E .228 +(This functionality is commonly used when page locking is required, b)133 160.8 +R .227(ut the page number of the page)-.2 F(being retrie)133 172.8 Q -.15(ve) +-.25 G 2.5(di).15 G 2.5(sn)198.14 172.8 S(ot kno)209.53 172.8 Q(wn.)-.25 E +(MPOOL_CREA)108 189.6 Q(TE)-1.11 E(If the speci\214ed page does not e)133 201.6 +Q(xist, create it.)-.15 E(MPOOL_LAST)108 218.4 Q 2.105 +(Return the last page of the source \214le and cop)133 230.4 R 4.605(yi)-.1 G +2.106(ts page number to the location referenced by)347.25 230.4 R F1(pgnoaddr) +133 242.4 Q F0(.).73 E(MPOOL_NEW)108 259.2 Q(Create a ne)133 271.2 Q 2.5(wp) +-.25 G(age in the \214le and cop)192.45 271.2 Q 2.5(yi)-.1 G +(ts page number to the location referenced by)290.67 271.2 Q F1(pgnoaddr)2.5 E +F0(.).73 E(MPOOL_NOPIN)108 288 Q(Don')133 300 Q 2.918(tp)-.18 G .418 +(in the page into memory)164.068 300 R 5.418(.\()-.65 G .417 +(This \215ag is intended for deb)274.108 300 R .417(ugging purposes, when it') +-.2 F 2.917(so)-.55 G .417(ften use-)504.873 300 R .972(ful to e)133 312 R .972 +(xamine pages which are currently held by other parts of the application.)-.15 +F -.15(Pa)5.973 G .973(ges retrie).15 F -.15(ve)-.25 G 3.473(di).15 G(n)535 312 +Q .529(this manner don')133 324 R 3.029(tn)-.18 G .528 +(eed to be returned to the memory pool, i.e. the)212.457 324 R 3.028(ys)-.15 G +(hould)413.95 324 Q/F2 10/Times-Bold@0 SF(not)3.028 E F0 .528 +(be speci\214ed as ar)3.028 F(gu-)-.18 E(ments to the)133 336 Q F1(mpool_put) +2.5 E F0(routine.\))2.5 E(Created pages ha)108 352.8 Q .3 -.15(ve a)-.2 H +(ll their bytes set to 0.).15 E 2.078(All pages returned by)108 369.6 R F1 +(mpool_g)4.578 E(et)-.1 E F0 2.079 +(\(unless the MPOOL_NOPIN \215ag is speci\214ed\), will be retained \(i.e.) +4.578 F -.74(``)108 381.6 S(pinned').74 E +('\) in the pool until a subsequent call to)-.74 E F1(mpool_put)2.5 E F0(.).68 +E .077(The function)108 398.4 R F1(mpool_put)2.577 E F0 .076 +(indicates that the page referenced by)2.577 F F1(pgaddr)2.576 E F0 .076 +(can be e)2.576 F .076(victed from the pool.)-.25 F F1(Pgaddr)5.076 E F0 +(must be an address pre)108 410.4 Q(viously returned by)-.25 E F1(mpool_g)2.5 E +(et)-.1 E F0(.).68 E(The \215ag v)108 427.2 Q(alue is speci\214ed by)-.25 E F1 +(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)277.2 427.2 S(he follo)285.81 +427.2 Q(wing v)-.25 E(alues:)-.25 E(MPOOL_DIR)108 444 Q(TY)-.6 E .052(The page\ + has been modi\214ed and must be written to the source \214le before being e) +133 456 R .052(victed from the pool.)-.25 F(MPOOL_DISCARD)108 472.8 Q .145 +(The page is unlik)133 484.8 R .144(ely to be useful in the near future, and s\ +hould be discarded before other pages in the)-.1 F(pool.)133 496.8 Q +(The function)108 513.6 Q F1(mpool_put)2.5 E F0(returns -1 on f)2.5 E +(ailure, setting)-.1 E F1(errno)2.5 E F0 2.5(,a).18 G(nd 0 on success.)352.77 +513.6 Q .027(The function)108 530.4 R F1(mpool_sync)2.527 E F0 .028 +(writes all pages associated with the MPOOLFILE pointer)2.528 F F1(mpf)2.528 E +F0 2.528(,w)1.96 G .028(hich were speci-)474.414 530.4 R .431(\214ed as ar)108 +542.4 R .431(guments to the)-.18 F F1(mpool_put)2.931 E F0 .431 +(function with an associated \215ag of MPOOL_DIR)2.931 F(TY)-.6 E 2.93(,t)-1.29 +G 2.93(ot)472.61 542.4 S .43(he source \214le.)483.32 542.4 R(The function)108 +554.4 Q F1(mpool_sync)2.5 E F0(returns -1 on f)2.5 E(ailure, setting)-.1 E F1 +(errno)2.5 E F0 2.5(,a).18 G(nd 0 on success.)357.76 554.4 Q 1.075 +(The function)108 571.2 R F1(mpool_unlink)3.575 E F0(destro)3.575 E 1.075 +(ys the memory pool identi\214ed by the directory)-.1 F F1(path)3.575 E F0 +3.575(,r).28 G(emo)471.33 571.2 Q 1.075(ving all \214les)-.15 F 1.121 +(used to implement the memory pool.)108 583.2 R 1.121(\(The directory)6.121 F +F1(path)3.621 E F0 1.121(is not remo)3.621 F -.15(ve)-.15 G 3.62(d.\) If).15 F +1.12(there are processes which)3.62 F(ha)108 595.2 Q .871 -.15(ve c)-.2 H +(alled).15 E F1(mpool_open)3.071 E F0 .571(without calling)3.071 F F1 +(mpool_close)3.071 E F0 .572 +(\(i.e., there are processes currently using the memory)3.071 F(pool\),)108 +607.2 Q F1(mpool_unlink)2.652 E F0 .152(will f)2.652 F .151 +(ail without further action, unless the force \215ag is set, in which case)-.1 +F F1(mpool_unlink)2.651 E F0 .524 +(will attempt to delete the memory pool \214les re)108 619.2 R -.05(ga)-.15 G +.525(rdless of an).05 F 3.025(yp)-.15 G .525 +(rocesses still using the memory pool.)366.45 619.2 R(An)5.525 E(y)-.15 E .598 +(accesses to a remo)108 631.2 R -.15(ve)-.15 G 3.097(dm).15 G .597 +(emory pool will lik)208.95 631.2 R .597(ely result in une)-.1 F .597 +(xpected beha)-.15 F(vior)-.2 E 5.597(.T)-.55 G .597(he function)436.036 631.2 +R F1(mpool_unlink)3.097 E F0(returns -1 on f)108 643.2 Q(ailure, setting)-.1 E +F1(errno)2.5 E F0 2.5(,a).18 G(nd 0 on success.)253.61 643.2 Q .11 +(In the case of catastrophic or system f)108 660 R .11 +(ailure, it is possible to clean up a memory pool by remo)-.1 F .11 +(ving all of the)-.15 F .569(\214les in the directory speci\214ed to the)108 +672 R F1(mpool_cr)3.068 E(eate)-.37 E F0 .568 +(function, as memory pool \214les are ne)3.068 F -.15(ve)-.25 G 3.068(rc).15 G +.568(reated in an)487.364 672 R(y)-.15 E +(directory other than the one speci\214ed to)108 684 Q F1(mpool_cr)2.5 E(eate) +-.37 E F0(.).18 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q +99.315(ution August)-.2 F(1, 1995)2.5 E(3)535 732 Q EP +%%Page: 4 16 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 110.12(DB_MPOOL\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 110.12(anual DB_MPOOL\(3\))340.17 48 R .025(The function)108 84 R/F1 +10/Times-Italic@0 SF(mpool_stat)2.525 E F0 .026 +(writes statistics for the memory pool)2.526 F F1(mp)2.526 E F0 .026 +(to the \214le speci\214ed by)2.526 F F1(fp)2.526 E F0 5.026(.T).19 G .026 +(hese statistics)485.254 84 R .829 +(include the number of \214les participating in the pool, the acti)108 96 R +1.129 -.15(ve p)-.25 H .829(ages in the pool, and numbers as to ho).15 F(w)-.25 +E(ef)108 108 Q(fecti)-.25 E .3 -.15(ve t)-.25 H(he cache has been.).15 E/F2 9 +/Times-Bold@0 SF(ERR)72 124.8 Q(ORS)-.27 E F0(The)108 136.8 Q F1(mpool_cr)3.852 +E(eate)-.37 E F0(,).18 E F1(mpool_open)3.852 E F0(and)3.852 E F1(mpool_fopen) +3.852 E F0 1.353(functions may f)3.852 F 1.353(ail and set)-.1 F F1(errno)3.853 +E F0 1.353(for an)3.853 F 3.853(yo)-.15 G 3.853(ft)493.424 136.8 S 1.353 +(he errors)503.387 136.8 R(speci\214ed for the library functions)108 148.8 Q F1 +(open)2.5 E F0(\(2\),).24 E F1 -.37(re)2.5 G(ad).37 E F0(\(2\), and).77 E F1 +(malloc)2.5 E F0(\(3\).).31 E(The)108 165.6 Q F1(mpool_close)3.144 E F0(and) +3.144 E F1(mpool_fclose)3.144 E F0 .644(functions may f)3.144 F .644 +(ail and set)-.1 F F1(errno)3.144 E F0 .643(for an)3.143 F 3.143(yo)-.15 G +3.143(ft)425.985 165.6 S .643(he errors speci\214ed for the)435.238 165.6 R +(library functions)108 177.6 Q F1(close)2.5 E F0(\(2\) and).18 E F1(fr)2.5 E +(ee)-.37 E F0(\(3\).).18 E(The)108 194.4 Q F1(mpool_g)4.097 E(et)-.1 E F0 1.597 +(function may f)4.097 F 1.597(ail and set)-.1 F F1(errno)4.097 E F0 1.597 +(for an)4.097 F 4.097(yo)-.15 G 4.097(ft)349.14 194.4 S 1.597 +(he errors speci\214ed for the library functions)359.347 194.4 R F1 -.37(re)108 +206.4 S(ad).37 E F0(\(2\),).77 E F1(write)2.5 E F0(\(2\), and).18 E F1(malloc) +2.5 E F0(\(3\) or the follo).31 E(wing:)-.25 E([EINV)108 223.2 Q(AL])-1.35 E +(The requested page does not e)133 235.2 Q(xist and MPOOL_CREA)-.15 E(TE w) +-1.11 E(as not set.)-.1 E(The)108 252 Q F1(mpool_put)4.288 E F0 1.787 +(function may f)4.287 F 1.787(ail and set)-.1 F F1(errno)4.287 E F0 1.787 +(for an)4.287 F 4.287(yo)-.15 G 4.287(ft)351.701 252 S 1.787 +(he errors speci\214ed for the library function)362.098 252 R F1(write)108 264 +Q F0(\(2\) or the follo).18 E(wing:)-.25 E([EA)108 280.8 Q(CCES])-.4 E +(The source \214le w)133 292.8 Q(as not opened for writing.)-.1 E(The)108 309.6 +Q F1(mpool_sync)3.993 E F0 1.493(function may f)3.993 F 1.493(ail and set)-.1 F +F1(errno)3.993 E F0 1.494(for an)3.993 F 3.994(yo)-.15 G 3.994(ft)353.752 309.6 +S 1.494(he errors speci\214ed for the library function)363.856 309.6 R F1 +(write)108 321.6 Q F0(\(2\).).18 E(The)108 338.4 Q F1(mpool_unlink)3.569 E F0 +1.069(function may f)3.569 F 1.068(ail and set)-.1 F F1(errno)3.568 E F0 1.068 +(for an)3.568 F 3.568(yo)-.15 G 3.568(ft)356.734 338.4 S 1.068 +(he errors speci\214ed for the library function)366.412 338.4 R F1(unlink)108 +350.4 Q F0(\(2\) or the follo).67 E(wing:)-.25 E([EB)108 367.2 Q(USY])-.1 E +(The memory pool w)133 379.2 Q(as in use and the force \215ag w)-.1 E +(as not set.)-.1 E F2(SEE ALSO)72 396 Q F1(db_btr)108 408 Q(ee)-.37 E F0 +(\(3\),).18 E F1(db_hash)2.5 E F0(\(3\),).28 E F1(db_loc)2.5 E(k)-.2 E F0 +(\(3\),).67 E F1(db_lo)2.5 E(g)-.1 E F0(\(3\),).22 E F1(db_open)2.5 E F0 +(\(3\),).24 E F1(db_r)2.5 E(ecno)-.37 E F0(\(3\),).18 E F1(db_txn)2.5 E F0 +(\(3\)).24 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q +99.315(ution August)-.2 F(1, 1995)2.5 E(4)535 732 Q EP +%%Page: 1 17 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 119.01(DB_OPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 119.01(anual DB_OPEN\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA) +72 84 S(ME).18 E F0(db_open \255 database access methods)108 96 Q F1(SYNOPSIS) +72 112.8 Q/F2 10/Times-Bold@0 SF(#include <db)108 124.8 Q(.h>)-.4 E(DB *)108 +148.8 Q(db_open\(const char *\214le, int \215ags, int mode,)108 160.8 Q +(DBTYPE type, DBINFO *dbinf)158 172.8 Q(o, const v)-.25 E(oid *openinf)-.1 E +(o\);)-.25 E F1(DESCRIPTION)72 189.6 Q F0 .485(The DB library is a f)108 201.6 +R .485(amily of groups of functions that pro)-.1 F .486 +(vides a modular programming interf)-.15 F .486(ace to trans-)-.1 F .823 +(actions and record-oriented \214le access.)108 213.6 R .822 +(The library includes support for transaction, locking, logging and)5.822 F +.258(\214le b)108 225.6 R(uf)-.2 E .258(fering functionality)-.25 F 2.758(,a) +-.65 G 2.758(sw)223.214 225.6 S .258(ell as v)237.082 225.6 R .258(arious inde) +-.25 F -.15(xe)-.15 G 2.758(da).15 G .258(ccess methods.)331.434 225.6 R(Man) +5.258 E 2.758(yo)-.15 G 2.758(ft)427.878 225.6 S .258 +(he functional groups \(e.g.)436.746 225.6 R .528(the memory pool functions\) \ +are useful independently of the rest of the DB functions, although some func-) +108 237.6 R .306(tional groups are e)108 249.6 R .306 +(xplicitly based on other functional groups \(e.g.)-.15 F .306 +(transactions and logging\).)5.306 F -.15(Fo)5.306 G 2.806(rag).15 G(eneral) +515.57 249.6 Q .245(description of transactions, see)108 261.6 R/F3 10 +/Times-Italic@0 SF(db_txn)2.745 E F0 2.745(\(3\). F).24 F .245 +(or a general description of the access methods, see)-.15 F F3(db_open)2.745 E +F0(\(3\)).24 E .307(and then the indi)108 273.6 R .307 +(vidual access method manual pages:)-.25 F F3(db_btr)2.808 E(ee)-.37 E F0 +(\(3\),).18 E F3(db_hash)2.808 E F0(\(3\),).28 E F3(db_lo)2.808 E(g)-.1 E F0 +.308(\(3\) and).22 F F3(db_r)2.808 E(ecno)-.37 E F0(\(3\).).18 E -.15(Fo)108 +285.6 S 3.635(rag).15 G 1.135(eneral description of the lock manager)138.45 +285.6 R 3.635(,s)-.4 G(ee)307.32 285.6 Q F3(db_loc)3.635 E(k)-.2 E F0 3.635 +(\(3\). F).67 F 1.135(or a general description of the memory)-.15 F +(pool manager)108 297.6 Q 2.5(,s)-.4 G(ee)171.2 297.6 Q F3(db_mpool)2.5 E F0 +(\(3\).).51 E(This manual page describes the o)108 314.4 Q -.15(ve)-.15 G +(rall structure of the a).15 E -.25(va)-.2 G(ilable access methods.).25 E .457 +(The currently supported \214le formats are btree, hashed, log and recno \(i.e\ +. \215at-\214le oriented\).)108 331.2 R .457(The btree for)5.457 F(-)-.2 E .974 +(mat is a representation of a sorted, balanced tree structure.)108 343.2 R .973 +(The hashed format is an e)5.974 F .973(xtensible, dynamic)-.15 F .801 +(hashing scheme.)108 355.2 R .802 +(The log format is a general-purpose logging f)5.801 F(acility)-.1 E 5.802(.T) +-.65 G .802(he recno format is a byte stream)406.888 355.2 R .415 +(\214le with \214x)108 367.2 R .415(ed or v)-.15 F .415 +(ariable length records.)-.25 F .415(The formats and other)5.415 F 2.914(,f)-.4 +G .414(ormat speci\214c information are described)376.714 367.2 R +(in detail in their respecti)108 379.2 Q .3 -.15(ve m)-.25 H(anual pages:).15 E +F3(db_btr)2.5 E(ee)-.37 E F0(\(3\),).18 E F3(db_hash)2.5 E F0(\(3\),).28 E F3 +(db_lo)2.5 E(g)-.1 E F0(\(3\), and).22 E F3(db_r)2.5 E(ecno)-.37 E F0(\(3\).) +.18 E .138(Db_open opens)108 396 R F3(\214le)2.638 E F0 .139 +(for reading and/or writing.)2.638 F .139(Files ne)5.139 F -.15(ve)-.25 G 2.639 +(ri).15 G .139(ntended to be preserv)349.088 396 R .139 +(ed on disk may be created)-.15 F .423 +(by setting the \214le parameter to NULL.)108 408 R .423 +(\(Note, while most of the access methods use)5.423 F F3(\214le)2.923 E F0 .423 +(as the name of an)2.923 F .429 +(underlying \214le on disk, this is not guaranteed.)108 420 R .43 +(See the manual pages for the indi)5.429 F .43(vidual access methods for)-.25 F +(more information.\))108 432 Q(The)108 448.8 Q F3<8d61>4.328 E(gs)-.1 E F0(and) +4.328 E F3 1.828(mode ar)4.328 F(guments)-.37 E F0 1.828 +(are as speci\214ed to the)4.328 F F3(open)4.328 E F0 1.828(\(2\) function, ho) +.24 F(we)-.25 E -.15(ve)-.25 G 2.628 -.4(r, o).15 H 1.828(nly the O_CREA).4 F +-.74(T,)-1.11 G .127(O_EXCL, O_EXLOCK, O_NONBLOCK, O_RDONL)108 460.8 R 2.708 +-1.29(Y, O)-1 H(_RD)1.29 E .128(WR, O_SHLOCK and O_TR)-.3 F .128 +(UNC \215ags are)-.4 F 2.5(meaningful. \(Note,)108 472.8 R +(opening a database \214le O_WR)2.5 E(ONL)-.4 E 2.5(Yi)-1 G 2.5(sn)342.67 472.8 +S(ot possible.\))354.06 472.8 Q(The)108 489.6 Q F3(type)5.338 E F0(ar)5.338 E +2.837(gument is of type DBTYPE \(as de\214ned in the <db)-.18 F 2.837 +(.h> include \214le\) and may be set to)-.4 F +(DB_BTREE, DB_HASH, DB_LOG or DB_RECNO.)108 501.6 Q(The)108 518.4 Q F3(dbinfo) +3.279 E F0(ar)3.279 E .779(gument is a pointer to a structure containing refer\ +ences to locking, logging, transaction, and)-.18 F 1.242(shared-memory b)108 +530.4 R(uf)-.2 E 1.242(fer pool information.)-.25 F(If)6.242 E F3(dbinfo)3.742 +E F0 1.241(is NULL, then the access method may still use these)3.741 F .667 +(subsystems, b)108 542.4 R .667(ut the usage will be pri)-.2 F -.25(va)-.25 G +.668(te to the application and managed by DB.).25 F(If)5.668 E F3(dbinfo)3.168 +E F0 .668(is non-NULL,)3.168 F .481(then the module referenced by each of the \ +non-NULL \214elds is used by DB as necessary)108 554.4 R 5.48(.T)-.65 G .48 +(he \214elds of the)479.4 554.4 R(DBINFO structure are de\214ned as follo)108 +566.4 Q(ws:)-.25 E(const char *errpfx;)108 583.2 Q 2.5(Ap)133 595.2 S +(re\214x to prepend to error messages; used only if)147.72 595.2 Q F3 +(err\214le)2.5 E F0(is non-NULL.)2.5 E(FILE *err\214le;)108 612 Q(The)133 624 Q +F3(stdio)2.5 E F0(\(3\) \214le stream to which error messages are logged.).18 E +.147(When an)133 648 R 2.647(ye)-.15 G .147(rror occurs in the)180.904 648 R F3 +(db_open)2.648 E F0 .148(function, or in an)2.648 F 2.648(yf)-.15 G .148 +(unction called using a \214eld of the returned)369.824 648 R .234 +(DB structure, an error v)133 660 R .234 +(alue is returned by the function, and the global v)-.25 F(ariable)-.25 E F3 +(errno)2.733 E F0 .233(is set appropri-)2.733 F(ately)133 672 Q 5.415(.I)-.65 G +2.915(ns)163.035 672 S .416(ome cases, ho)174.84 672 R(we)-.25 E -.15(ve)-.25 G +1.216 -.4(r, t).15 H(he).4 E F3(errno)2.916 E F0 -.25(va)2.916 G .416 +(lue may be insuf).25 F .416(\214cient to describe the cause of the error)-.25 +F(.)-.55 E .137(In these cases, if)133 684 R F3(err\214le)2.637 E F0 .137(is n\ +on-NULL, additional error information will be written to the \214le stream it) +2.637 F(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315 +(ution August)-.2 F(1, 1995)2.5 E(1)535 732 Q EP +%%Page: 2 18 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 119.01(DB_OPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 119.01(anual DB_OPEN\(3\))340.17 48 R .643 +(represents, preceded by the string, if an)133 84 R 1.943 -.65(y, s)-.15 H .643 +(peci\214ed by).65 F/F1 10/Times-Italic@0 SF(errpfx)3.143 E F0 5.643(.T).53 G +.644(his error logging f)394.94 84 R .644(acility should not)-.1 F +(be required for normal operation, b)133 96 Q(ut may be useful in deb)-.2 E +(ugging applications.)-.2 E(char *errb)108 112.8 Q(uf;)-.2 E .03(The b)133 +124.8 R(uf)-.2 E .03(fer to which error messages are copied.)-.25 F .029 +(If non-NULL,)5.029 F F1(errb)2.529 E(uf)-.2 E F0(beha)2.529 E -.15(ve)-.2 G +2.529(sa).15 G 2.529(sd)451.423 124.8 S .029(escribed for)462.842 124.8 R F1 +(err\214le)2.529 E F0(,).18 E -.15(ex)133 136.8 S .173(cept that the).15 F F1 +(errpfx)2.673 E F0 .174 +(\214eld is ignored and the error message is copied into the speci\214ed b) +2.673 F(uf)-.2 E .174(fer instead)-.25 F 1.014 +(of being written to the FILE stream.)133 148.8 R 1.013 +(The DB routines assume that the associated b)6.014 F(uf)-.2 E 1.013 +(fer is at least)-.25 F(1024 bytes in length.)133 160.8 Q(LOCK_T)108 177.6 Q +(ABLE_T *lockinfo;)-.93 E .265 +(If locking is required for the \214le being opened \(as in the case of b)133 +189.6 R(uf)-.2 E .266(fers being maintained in a shared)-.25 F 1.794(memory b) +133 201.6 R(uf)-.2 E 1.794(fer pool\), the)-.25 F F1(loc)4.294 E(kinfo)-.2 E F0 +1.794(\214eld contains a return v)4.294 F 1.793(alue from the function)-.25 F +F1(loc)4.293 E(k_open)-.2 E F0(that)4.293 E(should be used \(see)133 213.6 Q F1 +(db_loc)2.5 E(k)-.2 E F0 2.5(\(3\)\). If).67 F F1(loc)2.5 E(kinfo)-.2 E F0 +(is NULL, no locking is done.)2.5 E(DB *loginfo;)108 230.4 Q .93 +(If modi\214cations to the \214le being opened should be logged, the)133 242.4 +R F1(lo)3.43 E(ginfo)-.1 E F0 .93(\214eld contains a return v)3.43 F(alue)-.25 +E .063(from the function)133 254.4 R F1(dbopen)2.563 E F0 2.563(,w).24 G .062 +(hen opening a DB \214le of type DB_LOG.)247.642 254.4 R(If)5.062 E F1(lo)2.562 +E(ginfo)-.1 E F0 .062(is NULL, no logging)2.562 F(is done.)133 266.4 Q +(MPOOL *mpoolinfo;)108 283.2 Q 1.129 +(If the cache for the \214le being opened should be maintained in a shared b) +133 295.2 R(uf)-.2 E 1.129(fer pool, the)-.25 F F1(mpoolinfo)3.629 E F0 .102 +(\214eld contains a return v)133 307.2 R .102(alue from the function)-.25 F F1 +(mpool_open)2.602 E F0 .102(that should be used \(see)2.602 F F1(db_mpool)2.602 +E F0 2.602(\(3\)\). If).51 F F1(mpoolinfo)133 319.2 Q F0 .429 +(is NULL, a memory pool may still be created, b)2.929 F .43(ut it will be pri) +-.2 F -.25(va)-.25 G .43(te to the application and).25 F(managed by DB.)133 +331.2 Q(TXNMGR *txninfo;)108 348 Q 1.161 +(If the accesses to the \214le being opened should tak)133 360 R 3.661(ep)-.1 G +1.161(lace in the conte)354.474 360 R 1.161(xt of transactions \(pro)-.15 F +(viding)-.15 E 1.239(atomicity and complete error reco)133 372 R -.15(ve)-.15 G +1.239(ry\), the).15 F F1(txninfo)3.739 E F0 1.239(\214eld contains a return v) +3.739 F 1.24(alue from the function)-.25 F F1(txn_open)133 384 Q F0(\(see)2.599 +E F1(db_txn)2.599 E F0 2.599(\(3\)\). If).24 F .098 +(transactions are speci\214ed, the application is responsible for making suit-) +2.599 F 1.27(able calls to)133 396 R F1(txn_be)3.77 E(gin)-.4 E F0(,).24 E F1 +(txn_abort)3.77 E F0 3.77(,a).68 G(nd)282.91 396 Q F1(txn_commit)3.77 E F0 6.27 +(.I).68 G(f)356.12 396 Q F1(txninfo)3.77 E F0 1.27 +(is NULL, no transaction support is)3.77 F(done.)133 408 Q(The)108 424.8 Q F1 +(openinfo)2.85 E F0(ar)2.85 E .349(gument is a pointer to an access method spe\ +ci\214c structure described in the access method')-.18 F(s)-.55 E .03 +(manual page.)108 436.8 R(If)5.03 E F1(openinfo)2.53 E F0 .031 +(is NULL, each access method will use def)2.53 F .031 +(aults appropriate for the system and the)-.1 F(access method.)108 448.8 Q/F2 9 +/Times-Bold@0 SF(KEY/D)72 465.6 Q -1.35 -.855(AT A)-.315 H -.666(PA)3.105 G +(IRS).666 E F0 .313(Access to all access methods is based on k)108 477.6 R -.15 +(ey)-.1 G .312(/data pairs.).15 F .312(Both k)5.312 F -.15(ey)-.1 G 2.812(sa) +.15 G .312(nd data are represented by the follo)386.758 477.6 R(w-)-.25 E +(ing data structure:)108 489.6 Q(typedef struct {)108 506.4 Q -.2(vo)144 518.4 +S(id *data;).2 E(size_t size;)144 530.4 Q 2.5(}D)108 542.4 S(BT)122.52 542.4 Q +(;)-.55 E(The elements of the DBT structure are de\214ned as follo)108 559.2 Q +(ws:)-.25 E 5.84(data A)108 576 R(pointer to a byte string.)2.5 E 6.95 +(size The)108 592.8 R(length of)2.5 E F1(data)2.5 E F0 2.5(,i).26 G 2.5(nb) +215.2 592.8 S(ytes.)227.7 592.8 Q -2.15 -.25(Ke y)108 609.6 T .672(and data by\ +te strings may reference strings of essentially unlimited length, although an) +3.422 F 3.173(yt)-.15 G .873 -.1(wo o)493.204 609.6 T 3.173(ft).1 G(hem)522.78 +609.6 Q(must \214t into a)108 621.6 Q -.25(va)-.2 G +(ilable memory at the same time.).25 E .14(The access methods pro)108 638.4 R +.139(vide no guarantees about byte string alignment, and applications are resp\ +onsible for)-.15 F(maintaining an)108 650.4 Q 2.5(yn)-.15 G +(ecessary alignment.)180.07 650.4 Q F2(DB OPERA)72 667.2 Q(TIONS)-.855 E F1 +(Db_open)108 679.2 Q F0 .56 +(returns a pointer to a DB structure \(as de\214ned in the <db)3.06 F .56 +(.h> include \214le\) on success, and NULL)-.4 F 1.02(on error)108 691.2 R 6.02 +(.T)-.55 G 1.02(he DB structure describes a database type, and includes a set \ +of functions to perform v)155.03 691.2 R(arious)-.25 E(4.4 Berk)72 732 Q(ele) +-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E +(2)535 732 Q EP +%%Page: 3 19 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 119.01(DB_OPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 119.01(anual DB_OPEN\(3\))340.17 48 R .241 +(actions, as described belo)108 84 R 4.041 -.65(w. E)-.25 H .241 +(ach of these functions tak).65 F .241 +(es a pointer to a DB structure, and may tak)-.1 F 2.741(eo)-.1 G .242(ne or) +519.488 84 R .889(more DBT *')108 96 R 3.389(sa)-.55 G .889(nd a \215ag v) +174.827 96 R .889(alue as well.)-.25 F(Indi)5.889 E .888 +(vidual access methods specify additional functions and \215ags)-.25 F +(which are speci\214c to the method.)108 108 Q +(The \214elds of the DB structure are as follo)5 E(ws:)-.25 E(DBTYPE type;)108 +124.8 Q(The type of the underlying access method \(and \214le format\).)133 +136.8 Q(int \(*close\)\(const DB *db\);)108 153.6 Q 3.863(Ap)133 165.6 S 1.363 +(ointer to a function to \215ush an)149.083 165.6 R 3.864(yc)-.15 G 1.364 +(ached information to disk, free an)290.968 165.6 R 3.864(ya)-.15 G 1.364 +(llocated resources, and)445.912 165.6 R .878(close an)133 177.6 R 3.378(yu) +-.15 G .878(nderlying \214les.)179.596 177.6 R .878(Since k)5.878 F -.15(ey)-.1 +G .878(/data pairs are cached in memory).15 F 3.377(,f)-.65 G .877 +(ailing to sync the \214le with)431.445 177.6 R(the)133 189.6 Q/F1 10 +/Times-Italic@0 SF(close)2.5 E F0(or)2.5 E F1(sync)2.5 E F0 +(function may result in inconsistent or lost information.)2.5 E(The)133 206.4 Q +F1(close)2.5 E F0(functions return -1 on f)2.5 E(ailure, setting)-.1 E F1 +(errno)2.5 E F0 2.5(,a).18 G(nd 0 on success.)355.54 206.4 Q +(int \(*del\)\(const DB *db, TXN *txnid,)108 223.2 Q(const DBT *k)183 235.2 Q +-.15(ey)-.1 G 2.5(,u)-.5 G(_int \215ags\);)257.65 235.2 Q 2.541(Ap)133 247.2 S +.041(ointer to a function to remo)147.761 247.2 R .341 -.15(ve k)-.15 H -.15 +(ey).05 G .041(/data pairs from the database.).15 F .042(The k)5.041 F -.15(ey) +-.1 G .042(/data pair associated with).15 F(the speci\214ed)133 259.2 Q F1 -.1 +(ke)2.5 G(y)-.2 E F0(are discarded from the database.)2.5 E(The)133 276 Q F1 +(txnid)3.317 E F0 .817(parameter contains a transaction ID returned from)3.317 +F F1(txn_be)3.317 E(gin)-.4 E F0 3.317(,i).24 G 3.316(ft)431.22 276 S .816 +(he \214le is being accessed)440.646 276 R +(under transaction protection, or NULL if transactions are not in ef)133 288 Q +(fect.)-.25 E(The parameter)133 304.8 Q F1<8d61>2.5 E(g)-.1 E F0 +(must be set to 0 or e)2.5 E(xactly one of the follo)-.15 E(wing v)-.25 E +(alues:)-.25 E(R_CURSOR)133 321.6 Q(Delete the record referenced by the cursor) +158 333.6 Q 5(.T)-.55 G(he cursor must ha)339.32 333.6 Q .3 -.15(ve p)-.2 H(re) +.15 E(viously been initialized.)-.25 E(The)133 350.4 Q F1(delete)2.934 E F0 +.434(functions return -1 on error)2.934 F 2.934(,s)-.4 G(etting)297.818 350.4 Q +F1(errno)2.934 E F0 2.934(,0o).18 G 2.934(ns)364.3 350.4 S .434 +(uccess, and 1 if the speci\214ed)376.124 350.4 R F1 -.1(ke)2.935 G(y)-.2 E F0 +.435(did not)2.935 F -.15(ex)133 362.4 S(ist in the \214le.).15 E +(int \(*fd\)\(const DB *db\);)108 379.2 Q 3.351(Ap)133 391.2 S .851 +(ointer to a function which returns a \214le descriptor representati)148.571 +391.2 R 1.15 -.15(ve o)-.25 H 3.35(ft).15 G .85(he underlying database.)430.53 +391.2 R(A)5.85 E .338(\214le descriptor referencing the same \214le will be re\ +turned to all processes which call)133 403.2 R F1(db_open)2.838 E F0 .339 +(with the)2.839 F(same)133 415.2 Q F1(\214le)3.376 E F0 3.376(name. This)3.376 +F .876(\214le descriptor may be safely used as an ar)3.376 F .876 +(gument to the)-.18 F F1(fcntl)3.376 E F0 .875(\(2\) and).51 F F1(\215oc)3.375 +E(k)-.2 E F0(\(2\)).67 E .99(locking functions.)133 427.2 R .99 +(The \214le descriptor is not necessarily associated with an)5.99 F 3.49(yo) +-.15 G 3.49(ft)453.98 427.2 S .99(he underlying \214les)463.58 427.2 R +(used by the access method.)133 439.2 Q(No \214le descriptor is a)5 E -.25(va) +-.2 G(ilable for in memory databases.).25 E(The)133 456 Q F1(fd)2.5 E F0 +(functions return -1 on error)2.5 E 2.5(,s)-.4 G(etting)278.68 456 Q F1(errno) +2.5 E F0 2.5(,a).18 G(nd the \214le descriptor on success.)335.8 456 Q +(int \(*get\)\(const DB *db, TXN *txnid,)108 472.8 Q(const DBT *k)183 484.8 Q +-.15(ey)-.1 G 2.5(,D)-.5 G(BT *data, u_int \215ags\);)259.87 484.8 Q 2.854(Ap) +133 496.8 S .354(ointer to a function which is the interf)148.074 496.8 R .354 +(ace for k)-.1 F -.15(ey)-.1 G .353(ed retrie).15 F -.25(va)-.25 G 2.853(lf).25 +G .353(rom the database.)397.995 496.8 R .353(The address and)5.353 F +(length of the data associated with the speci\214ed)133 508.8 Q F1 -.1(ke)2.5 G +(y)-.2 E F0(are returned in the structure referenced by)2.5 E F1(data)2.5 E F0 +(.).26 E(The)133 525.6 Q F1(txnid)3.316 E F0 .816 +(parameter contains a transaction ID returned from)3.316 F F1(txn_be)3.317 E +(gin)-.4 E F0 3.317(,i).24 G 3.317(ft)431.215 525.6 S .817 +(he \214le is being accessed)440.642 525.6 R +(under transaction protection, or NULL if transactions are not in ef)133 537.6 +Q(fect.)-.25 E(The)133 554.4 Q F1 -.1(ge)2.5 G(t).1 E F0 +(functions return -1 on error)2.5 E 2.5(,s)-.4 G(etting)283.02 554.4 Q F1 +(errno)2.5 E F0 2.5(,0o).18 G 2.5(ns)348.2 554.4 S(uccess, and 1 if the)359.59 +554.4 Q F1 -.1(ke)2.5 G(y)-.2 E F0 -.1(wa)2.5 G 2.5(sn).1 G(ot found.)476.83 +554.4 Q(int \(*put\)\(const DB *db, TXN *txnid,)108 571.2 Q(DBT *k)183 583.2 Q +-.15(ey)-.1 G 2.5(,c)-.5 G(onst DBT *data, u_int \215ags\);)233.48 583.2 Q 2.5 +(Ap)133 595.2 S(ointer to a function to store k)147.72 595.2 Q -.15(ey)-.1 G +(/data pairs in the database.).15 E(The)133 612 Q F1(txnid)3.317 E F0 .817 +(parameter contains a transaction ID returned from)3.317 F F1(txn_be)3.317 E +(gin)-.4 E F0 3.317(,i).24 G 3.316(ft)431.22 612 S .816 +(he \214le is being accessed)440.646 612 R +(under transaction protection, or NULL if transactions are not in ef)133 624 Q +(fect.)-.25 E(The parameter)133 640.8 Q F1<8d61>2.5 E(g)-.1 E F0 +(must be set to 0 or e)2.5 E(xactly one of the follo)-.15 E(wing v)-.25 E +(alues:)-.25 E(R_CURSOR)133 657.6 Q .448(Replace the k)158 669.6 R -.15(ey)-.1 +G .448(/data pair referenced by the cursor).15 F 5.449(.T)-.55 G .449 +(he cursor must ha)375.156 669.6 R .749 -.15(ve p)-.2 H(re).15 E .449 +(viously been ini-)-.25 F(tialized.)158 681.6 Q(4.4 Berk)72 732 Q(ele)-.1 E 2.5 +(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(3)535 +732 Q EP +%%Page: 4 20 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 119.01(DB_OPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 119.01(anual DB_OPEN\(3\))340.17 48 R(R_NOO)133 84 Q(VER)-.5 E +(WRITE)-.55 E(Enter the ne)158 96 Q 2.5(wk)-.25 G -.15(ey)220.69 96 S +(/data pair only if the k).15 E .3 -.15(ey d)-.1 H(oes not pre).15 E(viously e) +-.25 E(xist.)-.15 E .664(The def)133 112.8 R .664(ault beha)-.1 F .664 +(vior of the)-.2 F/F1 10/Times-Italic@0 SF(put)3.164 E F0 .664 +(functions is to enter the ne)3.164 F 3.164(wk)-.25 G -.15(ey)387.498 112.8 S +.663(/data pair).15 F 3.163(,r)-.4 G .663(eplacing an)443.534 112.8 R 3.163(yp) +-.15 G(re)503.03 112.8 Q(viously)-.25 E -.15(ex)133 124.8 S(isting k).15 E -.15 +(ey)-.1 G(.)-.5 E(The)133 141.6 Q F1(put)3.558 E F0 1.058 +(functions return -1 on error)3.558 F 3.559(,s)-.4 G(etting)291.089 141.6 Q F1 +(errno)3.559 E F0 3.559(,0o).18 G 3.559(ns)359.446 141.6 S 1.059 +(uccess, and 1 if the R_NOO)371.895 141.6 R(VER)-.5 E(WRITE)-.55 E F1<8d61>133 +153.6 Q(g)-.1 E F0 -.1(wa)2.5 G 2.5(ss).1 G(et and the k)172.24 153.6 Q .3 -.15 +(ey a)-.1 H(lready e).15 E(xists in the \214le.)-.15 E +(int \(*seq\)\(const DB *db, TXN *txnid,)108 170.4 Q(DBT *k)183 182.4 Q -.15 +(ey)-.1 G 2.5(,D)-.5 G(BT *data, u_int \215ags\);)236.26 182.4 Q 2.877(Ap)133 +194.4 S .377(ointer to a function which is the interf)148.097 194.4 R .377 +(ace for sequential retrie)-.1 F -.25(va)-.25 G 2.877(lf).25 G .377 +(rom the database.)415.194 194.4 R .376(The address)5.376 F .012 +(and length of the k)133 206.4 R .312 -.15(ey a)-.1 H .012 +(re returned in the structure referenced by).15 F F1 -.1(ke)2.512 G(y)-.2 E F0 +2.512(,a).32 G .012(nd the address and length of the)412.726 206.4 R +(data are returned in the structure referenced by)133 218.4 Q F1(data)2.5 E F0 +(.).26 E(The)133 235.2 Q F1(txnid)3.317 E F0 .817 +(parameter contains a transaction ID returned from)3.317 F F1(txn_be)3.317 E +(gin)-.4 E F0 3.317(,i).24 G 3.316(ft)431.22 235.2 S .816 +(he \214le is being accessed)440.646 235.2 R +(under transaction protection, or NULL if transactions are not in ef)133 247.2 +Q(fect.)-.25 E .721(Sequential k)133 264 R -.15(ey)-.1 G .721 +(/data pair retrie).15 F -.25(va)-.25 G 3.221(lm).25 G .721(ay be)277.884 264 R +.721(gin at an)-.15 F 3.221(yt)-.15 G .721 +(ime, and the logical position of the `)346.568 264 R(`cursor')-.74 E 3.222('i) +-.74 G(s)536.11 264 Q .947(not af)133 276 R .947(fected by calls to the)-.25 F +F1(del)3.447 E F0(,).51 E F1 -.1(ge)3.447 G(t).1 E F0(,).68 E F1(put)3.447 E F0 +3.446(,o).68 G(r)308.572 276 Q F1(sync)3.446 E F0 3.446 +(functions. Modi\214cations)3.446 F .946(to the database during a)3.446 F 2.091 +(sequential scan will be re\215ected in the scan, i.e. records inserted behind\ + the cursor will not be)133 288 R +(returned while records inserted in front of the cursor will be returned.)133 +300 Q(The parameter)133 316.8 Q F1<8d61>2.5 E(g)-.1 E F0(must be set to 0 or e) +2.5 E(xactly one of the follo)-.15 E(wing v)-.25 E(alues:)-.25 E(R_CURSOR)133 +333.6 Q .937(The data associated with the speci\214ed k)158 345.6 R 1.237 -.15 +(ey i)-.1 H 3.437(sr).15 G 3.437(eturned. This)348.546 345.6 R(dif)3.437 E .936 +(fers from the)-.25 F F1 -.1(ge)3.436 G(t).1 E F0 .936(functions in)3.436 F +(that it sets or initializes the cursor to the location of the k)158 357.6 Q .3 +-.15(ey a)-.1 H 2.5(sw).15 G(ell.)415.5 357.6 Q(R_FIRST)133 374.4 Q .835 +(The \214rst k)158 386.4 R -.15(ey)-.1 G .835(/data pair of the database is re\ +turned, and the cursor is set or initialized to refer).15 F(-)-.2 E(ence it.) +158 398.4 Q(R_NEXT)133 415.2 Q(Retrie)158 427.2 Q 1.015 -.15(ve t)-.25 H .715 +(he k).15 F -.15(ey)-.1 G .715(/data pair immediately after the cursor).15 F +5.715(.I)-.55 G 3.215(ft)391.91 427.2 S .714 +(he cursor is not yet set, this is the)401.235 427.2 R +(same as the R_FIRST \215ag.)158 439.2 Q(The)133 456 Q F1(seq)3.014 E F0 .514 +(functions return -1 on error)3.014 F 3.015(,s)-.4 G(etting)287.83 456 Q F1 +(errno)3.015 E F0 3.015(,0o).18 G 3.015(ns)354.555 456 S .515 +(uccess, and 1 if there are no k)366.46 456 R -.15(ey)-.1 G .515(/data pairs) +.15 F(less than or greater than the speci\214ed or current k)133 468 Q -.15(ey) +-.1 G(.)-.5 E(int \(*sync\)\(const DB *db, u_int \215ags\);)108 484.8 Q 3.291 +(Ap)133 496.8 S .791(ointer to a function to \215ush an)148.511 496.8 R 3.291 +(yc)-.15 G .791(ached information to disk.)286.388 496.8 R .79 +(If the database is in memory only)5.79 F(,)-.65 E(the)133 508.8 Q F1(sync)2.5 +E F0(function has no ef)2.5 E(fect and will al)-.25 E -.1(wa)-.1 G(ys succeed.) +.1 E(The parameter)133 525.6 Q F1<8d61>2.5 E(g)-.1 E F0 +(must be set to 0 or a v)2.5 E +(alue speci\214ed by an access method speci\214c manual page.)-.25 E(The)133 +542.4 Q F1(sync)2.5 E F0(functions return -1 on f)2.5 E(ailure, setting)-.1 E +F1(errno)2.5 E F0 2.5(,a).18 G(nd 0 on success.)352.76 542.4 Q/F2 9 +/Times-Bold@0 SF(ERR)72 559.2 Q(ORS)-.27 E F0(The)108 571.2 Q F1(db_open)4.548 +E F0 2.048(function may f)4.548 F 2.049(ail and set)-.1 F F1(errno)4.549 E F0 +2.049(for an)4.549 F 4.549(yo)-.15 G 4.549(ft)345.977 571.2 S 2.049 +(he errors speci\214ed for the library functions)356.636 571.2 R F1(open)108 +583.2 Q F0(\(2\),).24 E F1(malloc)2.5 E F0(\(3\) or the follo).31 E(wing:)-.25 +E([EFTYPE])108 600 Q 2.5<418c>133 612 S(le is incorrectly formatted.)148.28 612 +Q([EINV)108 628.8 Q(AL])-1.35 E 2.557(Ap)133 640.8 S .056 +(arameter has been speci\214ed \(hash function, recno pad byte etc.\))147.777 +640.8 R .056(that is incompatible with the cur)5.056 F(-)-.2 E .725 +(rent \214le speci\214cation or)133 652.8 R 3.225(,a\215)-.4 G .725 +(ag to a function which is not meaningful for the function \(for e)248.435 +652.8 R(xample,)-.15 E .763(use of the cursor without prior initialization\) o\ +r there is a mismatch between the v)133 664.8 R .763(ersion number of)-.15 F +(\214le and the softw)133 676.8 Q(are.)-.1 E(The)108 693.6 Q F1(close)2.913 E +F0 .413(functions may f)2.913 F .413(ail and set)-.1 F F1(errno)2.913 E F0 .413 +(for an)2.913 F 2.913(yo)-.15 G 2.913(ft)319.62 693.6 S .414 +(he errors speci\214ed for the library functions)328.643 693.6 R F1(close)2.914 +E F0(\(2\),).18 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q +99.315(ution August)-.2 F(1, 1995)2.5 E(4)535 732 Q EP +%%Page: 5 21 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 119.01(DB_OPEN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 119.01(anual DB_OPEN\(3\))340.17 48 R/F1 10/Times-Italic@0 SF -.37 +(re)108 84 S(ad).37 E F0(\(2\),).77 E F1(write)2.5 E F0(\(2\),).18 E F1(fr)2.5 +E(ee)-.37 E F0(\(3\), or).18 E F1(fsync)2.5 E F0(\(2\).).31 E(The)108 100.8 Q +F1(del)2.52 E F0(,).51 E F1 -.1(ge)2.52 G(t).1 E F0(,).68 E F1(put)2.52 E F0 +(and)2.52 E F1(seq)2.52 E F0 .02(functions may f)2.52 F .02(ail and set)-.1 F +F1(errno)2.52 E F0 .02(for an)2.52 F 2.52(yo)-.15 G 2.52(ft)376.3 100.8 S .02 +(he errors speci\214ed for the library func-)384.93 100.8 R(tions)108 112.8 Q +F1 -.37(re)2.5 G(ad).37 E F0(\(2\),).77 E F1(write)2.5 E F0(\(2\),).18 E F1(fr) +2.5 E(ee)-.37 E F0(\(3\) or).18 E F1(malloc)2.5 E F0(\(3\).).31 E(The)108 129.6 +Q F1(fd)2.5 E F0(functions will f)2.5 E(ail and set)-.1 E F1(errno)2.5 E F0 +(to ENOENT for in memory databases.)2.5 E(The)108 146.4 Q F1(sync)2.5 E F0 +(functions may f)2.5 E(ail and set)-.1 E F1(errno)2.5 E F0(for an)2.5 E 2.5(yo) +-.15 G 2.5(ft)312.71 146.4 S(he errors speci\214ed for the library function) +321.32 146.4 Q F1(fsync)2.5 E F0(\(2\).).31 E/F2 9/Times-Bold@0 SF(SEE ALSO)72 +163.2 Q F1(db_btr)108 175.2 Q(ee)-.37 E F0(\(3\),).18 E F1(db_hash)2.5 E F0 +(\(3\),).28 E F1(db_loc)2.5 E(k)-.2 E F0(\(3\),).67 E F1(db_lo)2.5 E(g)-.1 E F0 +(\(3\),).22 E F1(db_mpool)2.5 E F0(\(3\),).51 E F1(db_r)2.5 E(ecno)-.37 E F0 +(\(3\),).18 E F1(db_txn)2.5 E F0(\(3\)).24 E F2 -.09(BU)72 192 S(GS).09 E F0 +.106(The name DBT is a mnemonic for `)108 204 R .106(`data base thang')-.74 F +.106(', and w)-.74 F .107(as used because noone could think of a reason-)-.1 F +(able name that w)108 216 Q(asn')-.1 E 2.5(ta)-.18 G(lready in use some)202.14 +216 Q(where else.)-.25 E(The)108 232.8 Q F1(fd)2.5 E F0(function interf)2.5 E +(ace is a kluge, and will be deleted in a future v)-.1 E(ersion of the interf) +-.15 E(ace.)-.1 E(Only big and little endian byte order is supported.)108 249.6 +Q(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315 +(ution August)-.2 F(1, 1995)2.5 E(5)535 732 Q EP +%%Page: 1 22 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 111.23(DB_RECNO\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 111.23(anual DB_RECNO\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA) +72 84 S(ME).18 E F0(db_recno \255 record number database access method)108 96 Q +F1(DESCRIPTION)72 112.8 Q F0(speci\214c details of the recno access method.)108 +124.8 Q F1 -.495(AC)72 141.6 S(CESS METHOD SPECIFIC INFORMA).495 E(TION)-.855 E +F0 2.497(The recno access method speci\214c data structure pro)108 153.6 R +2.497(vided to)-.15 F/F2 10/Times-Italic@0 SF(db_open)4.997 E F0 2.497 +(is typedef)4.997 F 3.497 -.5('d a).55 H 2.497(nd named REC-).5 F 2.765 +(NOINFO. A)108 165.6 R .265(RECNOINFO structure has at least the follo)2.765 F +.266(wing \214elds, which may be initialized before call-)-.25 F(ing)108 177.6 +Q F2(db_open)2.5 E F0(:).24 E(u_int8_t b)108 194.4 Q -.25(va)-.15 G(l;).25 E +.793(The delimiting byte to be used to mark the end of a record for v)133 206.4 +R .793(ariable-length records, and the pad)-.25 F .386(character for \214x)133 +218.4 R .387(ed-length records.)-.15 F .387(If no v)5.387 F .387 +(alue is speci\214ed, ne)-.25 F .387(wlines \(`)-.25 F(`\\n')-.74 E .387 +('\) are used to mark the end)-.74 F(of v)133 230.4 Q +(ariable-length records and \214x)-.25 E +(ed-length records are padded with spaces.)-.15 E(char *bfname;)108 247.2 Q +1.152(The recno access method stores the in-memory copies of its records in a \ +btree.)133 259.2 R 1.152(If bfname is non-)6.152 F .35(NULL, it speci\214es th\ +e name of the btree \214le, as if speci\214ed as the \214le name for a)133 +271.2 R F2(db_open)2.851 E F0 .351(of a btree)2.851 F(\214le.)133 283.2 Q +(u_int cachesize;)108 300 Q 3.847(As)133 312 S 1.347 +(uggested maximum size, in bytes, of the memory cache.)147.957 312 R 1.347 +(This v)6.347 F 1.347(alue is)-.25 F/F3 10/Times-Bold@0 SF(only)3.847 E F0 +(advisory)3.847 E 3.846(,a)-.65 G 1.346(nd the)513.934 312 R .693 +(access method will allocate more memory rather than f)133 324 R 3.193(ail. If) +-.1 F F2(cac)3.193 E(hesize)-.15 E F0 3.193(is 0)3.193 F .693 +(\(no size is speci\214ed\) a)3.193 F(def)133 336 Q(ault size is used.)-.1 E +(u_long \215ags;)108 352.8 Q(The \215ag v)133 364.8 Q(alue is speci\214ed by) +-.25 E F2(or)2.5 E F0('ing an).73 E 2.5(yo)-.15 G 2.5(ft)302.2 364.8 S +(he follo)310.81 364.8 Q(wing v)-.25 E(alues:)-.25 E(R_FIXEDLEN)133 381.6 Q +1.49(The records are \214x)158 393.6 R 1.489(ed-length, not byte delimited.) +-.15 F 1.489(The structure element)6.489 F F2 -.37(re)3.989 G(clen).37 E F0 +1.489(speci\214es the)3.989 F .487 +(length of the record, and the structure element)158 405.6 R F2(bval)2.987 E F0 +.488(is used as the pad character)2.988 F 5.488(.A)-.55 G .788 -.15(ny r) +495.232 405.6 T(ecords,).15 E(inserted into the database, that are less than) +158 417.6 Q F2 -.37(re)2.5 G(clen).37 E F0 +(bytes long are automatically padded.)2.5 E(R_NOKEY)133 434.4 Q 1.146 +(In the interf)158 446.4 R 1.146(ace speci\214ed by)-.1 F F2(db_open)3.646 E F0 +3.646(,t).24 G 1.146(he sequential record retrie)320.816 446.4 R -.25(va)-.25 G +3.646<6c8c>.25 G 1.145(lls in both the caller')449.31 446.4 R(s)-.55 E -.1(ke) +158 458.4 S 4.795(ya)-.05 G 2.295(nd data structures.)181.425 458.4 R 2.295 +(If the R_NOKEY \215ag is speci\214ed, the)7.295 F F2(cur)4.795 E(sor)-.1 E F0 +2.295(functions are not)4.795 F .621(required to \214ll in the k)158 470.4 R +.921 -.15(ey s)-.1 H 3.121(tructure. This).15 F .621 +(permits applications to retrie)3.121 F .92 -.15(ve r)-.25 H .62 +(ecords at the end of).15 F(\214les without reading all of the interv)158 482.4 +Q(ening records.)-.15 E(R_SN)133 499.2 Q(APSHO)-.35 E(T)-.4 E .029 +(This \215ag requires that a snapshot of the \214le be tak)158 511.2 R .029 +(en when)-.1 F F2(db_open)2.529 E F0 .029(is called, instead of permit-)2.529 F +(ting an)158 523.2 Q 2.5(yu)-.15 G +(nmodi\214ed records to be read from the original \214le.)197.85 523.2 Q +(int lorder;)108 540 Q .65(The byte order for inte)133 552 R .65 +(gers in the stored database metadata.)-.15 F .65 +(The number should represent the order)5.65 F .748(as an inte)133 564 R .749 +(ger; for e)-.15 F .749(xample, big endian order w)-.15 F .749 +(ould be the number 4,321.)-.1 F(If)5.749 E F2(lor)3.249 E(der)-.37 E F0 .749 +(is 0 \(no order is)3.249 F(speci\214ed\) the current host order is used.)133 +576 Q(u_int psize;)108 592.8 Q .284(The recno access method stores the in-memo\ +ry copies of its records in a btree.)133 604.8 R .284(This v)5.284 F .283 +(alue is the size)-.25 F .297 +(\(in bytes\) of the pages used for nodes in that tree.)133 616.8 R(If)5.297 E +F2(psize)2.797 E F0 .297(is 0 \(no page size is speci\214ed\) a page size)2.797 +F(is chosen based on the underlying \214le system I/O block size.)133 628.8 Q +(See)5 E F2(btr)2.5 E(ee)-.37 E F0(\(3\) for more information.).18 E +(size_t reclen;)108 645.6 Q(The length of a \214x)133 657.6 Q +(ed-length record.)-.15 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib) +132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(1)535 732 Q EP +%%Page: 2 23 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 111.23(DB_RECNO\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 111.23(anual DB_RECNO\(3\))340.17 48 R/F1 9/Times-Bold@0 SF +(DB OPERA)72 84 Q(TIONS)-.855 E F0 .972(The data part of the k)108 96 R -.15 +(ey)-.1 G .971(/data pair used by the recno access method is the same as other\ + access methods.).15 F .198(The k)108 108 R .498 -.15(ey i)-.1 H 2.698(sd).15 G +(if)157.504 108 Q 2.698(ferent. The)-.25 F/F2 10/Times-Italic@0 SF(data)2.698 E +F0 .198(\214eld of the k)2.698 F .499 -.15(ey s)-.1 H .199 +(hould be a pointer to a memory location of type).15 F F2 -.37(re)2.699 G +(cno_t).37 E F0 2.699(,a).68 G(s)536.11 108 Q .506(de\214ned in the <db)108 120 +R .506(.h> include \214le.)-.4 F .506(This type is normally the lar)5.506 F +.506(gest unsigned inte)-.18 F .506(gral type a)-.15 F -.25(va)-.2 G .505 +(ilable to the).25 F 2.5(implementation. The)108 132 R F2(size)2.5 E F0 +(\214eld of the k)2.5 E .3 -.15(ey s)-.1 H(hould be the size of that type.).15 +E 1.944(The record number data structure is either v)108 148.8 R 1.944 +(ariable or \214x)-.25 F 1.944 +(ed-length records stored in a \215at-\214le format,)-.15 F 1.856 +(accessed by the logical record number)108 160.8 R 6.856(.T)-.55 G 1.856(he e) +285.206 160.8 R 1.856(xistence of record number \214v)-.15 F 4.356(er)-.15 G +1.856(equires the e)440.442 160.8 R 1.856(xistence of)-.15 F .875 +(records one through four)108 172.8 R 3.375(,a)-.4 G .875 +(nd the deletion of record number one causes record number \214v)219.68 172.8 R +3.376(et)-.15 G 3.376(ob)489.928 172.8 S 3.376(er)503.304 172.8 S(enum-)514.45 +172.8 Q .283(bered to record number four)108 184.8 R 2.783(,a)-.4 G 2.783(sw) +231.195 184.8 S .283(ell as the cursor)245.088 184.8 R 2.783(,i)-.4 G 2.783(fp) +316.64 184.8 S .282(ositioned after record number one, to shift do)327.753 +184.8 R .282(wn one)-.25 F 3.18(record. The)108 196.8 R .68 +(creation of record number \214v)3.18 F 3.18(ew)-.15 G .681 +(hen records one through four do not e)295.05 196.8 R .681 +(xist causes the logical)-.15 F(creation of them with zero-length data.)108 +208.8 Q .372(Because there is no meta-data associated with the underlying recn\ +o access method \214les, an)108 225.6 R 2.872(yc)-.15 G .372(hanges made) +487.698 225.6 R .191(to the def)108 237.6 R .191(ault v)-.1 F .191 +(alues \(e.g. \214x)-.25 F .192(ed record length or byte separator v)-.15 F +.192(alue\) must be e)-.25 F .192(xplicitly speci\214ed each time)-.15 F +(the \214le is opened.)108 249.6 Q 1.037(The functions returned by)108 266.4 R +F2(db_open)3.537 E F0 1.036(for the btree access method are as described in) +3.536 F F2(db_open)3.536 E F0 1.036(\(3\), with the).24 F(follo)108 278.4 Q +(wing e)-.25 E(xceptions and additions:)-.15 E 5.28(type The)108 295.2 R +(type is DB_RECNO.)2.5 E 9.72(put Using)108 312 R(the)2.558 E F2(put)2.558 E F0 +(interf)2.559 E .059(ace to create a ne)-.1 F 2.559(wr)-.25 G .059 +(ecord will cause the creation of multiple, empty records if the)293.07 312 R +(record number is more than one greater than the lar)133 324 Q +(gest record currently in the database.)-.18 E(The)133 340.8 Q F2(put)2.5 E F0 +(function tak)2.5 E(es the follo)-.1 E(wing additional \215ags:)-.25 E +(R_IAFTER)133 357.6 Q 1.225 +(Append the data immediately after the data referenced by)158 369.6 R F2 -.1 +(ke)3.724 G(y)-.2 E F0 3.724(,c).32 G 1.224(reating a ne)425.354 369.6 R 3.724 +(wk)-.25 G -.15(ey)490.046 369.6 S 1.224(/data pair).15 F(.)-.55 E +(The record number of the appended k)158 381.6 Q -.15(ey)-.1 G +(/data pair is returned in the).15 E F2 -.1(ke)2.5 G(y)-.2 E F0(structure.)2.5 +E(R_IBEFORE)133 398.4 Q 1.343 +(Insert the data immediately before the data referenced by)158 410.4 R F2 -.1 +(ke)3.844 G(y)-.2 E F0 3.844(,c).32 G 1.344(reating a ne)424.874 410.4 R 3.844 +(wk)-.25 G -.15(ey)489.926 410.4 S 1.344(/data pair).15 F(.)-.55 E +(The record number of the inserted k)158 422.4 Q -.15(ey)-.1 G +(/data pair is returned in the).15 E F2 -.1(ke)2.5 G(y)-.2 E F0(structure.)2.5 +E(R_SETCURSOR)133 439.2 Q(Store the k)158 451.2 Q -.15(ey)-.1 G(/data pair).15 +E 2.5(,s)-.4 G +(etting or initializing the position of the cursor to reference it.)256.5 451.2 +Q 9.17(seq The)108 468 R F2(seq)2.5 E F0(function tak)2.5 E(es the follo)-.1 E +(wing additional \215ags:)-.25 E(R_LAST)133 484.8 Q .04(The last k)158 496.8 R +-.15(ey)-.1 G .04(/data pair of the database is returned, and the cursor is se\ +t or initialized to reference).15 F(it.)158 508.8 Q(R_PREV)133 525.6 Q(Retrie) +158 537.6 Q .59 -.15(ve t)-.25 H .29(he k).15 F -.15(ey)-.1 G .29 +(/data pair immediately before the cursor).15 F 5.29(.I)-.55 G 2.79(ft)395.73 +537.6 S .29(he cursor is not yet set, this is the)404.63 537.6 R +(same as the R_LAST \215ag.)158 549.6 Q .749 +(If the database \214le is a character special \214le and no complete k)133 +566.4 R -.15(ey)-.1 G .748(/data pairs are currently a).15 F -.25(va)-.2 G +(ilable,).25 E(the)133 578.4 Q F2(seq)2.5 E F0(function returns 2.)2.5 E 4.17 +(sync The)108 595.2 R F2(sync)2.5 E F0(function tak)2.5 E(es the follo)-.1 E +(wing additional \215ag:)-.25 E(R_RECNOSYNC)133 612 Q .643 +(This \215ag causes the)158 624 R F2(sync)3.143 E F0 .644 +(function to apply to the btree \214le which underlies the recno \214le, not) +3.143 F .09(the recno \214le itself.)158 636 R .09(\(See the)5.09 F F2(bfname) +2.59 E F0 .09(\214eld of RECNOINFO structure, abo)2.59 F -.15(ve)-.15 G 2.59 +(,f).15 G .09(or more informa-)470.95 636 R(tion.\))158 648 Q F1(ERR)72 664.8 Q +(ORS)-.27 E F0(The)108 676.8 Q F2 -.37(re)3.731 G(cno).37 E F0 1.231 +(access method functions may f)3.731 F 1.231(ail and set)-.1 F F2(errno)3.731 E +F0 1.231(for an)3.731 F 3.731(yo)-.15 G 3.731(ft)392.652 676.8 S 1.231 +(he errors speci\214ed for the library)402.493 676.8 R(function)108 688.8 Q F2 +(db_open)2.5 E F0(\(3\) or the follo).24 E(wing:)-.25 E(4.4 Berk)72 732 Q(ele) +-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E +(2)535 732 Q EP +%%Page: 3 24 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 111.23(DB_RECNO\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 111.23(anual DB_RECNO\(3\))340.17 48 R([EINV)108 84 Q(AL])-1.35 E +(An attempt w)133 96 Q(as made to add a record to a \214x)-.1 E +(ed-length database that w)-.15 E(as too lar)-.1 E(ge to \214t.)-.18 E/F1 9 +/Times-Bold@0 SF(SEE ALSO)72 112.8 Q/F2 10/Times-Italic@0 SF(db_btr)108 124.8 Q +(ee)-.37 E F0(\(3\),).18 E F2(db_hash)2.5 E F0(\(3\),).28 E F2(db_loc)2.5 E(k) +-.2 E F0(\(3\),).67 E F2(db_lo)2.5 E(g)-.1 E F0(\(3\),).22 E F2(db_mpool)2.5 E +F0(\(3\),).51 E F2(db_open)2.5 E F0(\(3\),).24 E F2(db_txn)2.5 E F0(\(3\)).24 E +F2 2.755(Document Pr)108 148.8 R 2.755 +(ocessing in a Relational Database System)-.45 F F0 5.254(,M).32 G 2.754 +(ichael Stonebrak)362.134 148.8 R(er)-.1 E 5.254(,H)-.4 G 2.754(eidi Stettner) +454.062 148.8 R 5.254(,J)-.4 G(oseph)516.67 148.8 Q +(Kalash, Antonin Guttman, Nadene L)108 160.8 Q +(ynn, Memorandum No. UCB/ERL M82/32, May 1982.)-.55 E F1 -.09(BU)72 177.6 S(GS) +.09 E F0(The)108 189.6 Q F2(sync)3.616 E F0(function')3.616 E 3.616(sR)-.55 G +1.116(_RECNOSYNC interf)198.838 189.6 R 1.117 +(ace is a kluge, and will be deleted in a future v)-.1 F 1.117(ersion of the) +-.15 F(interf)108 201.6 Q(ace.)-.1 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G +(istrib)132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(3)535 732 Q EP +%%Page: 1 25 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_TXN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_TXN\(3\))340.17 48 R/F1 9/Times-Bold@0 SF -.18(NA)72 +84 S(ME).18 E F0(db_txn \255 transaction management functions)108 96 Q F1 +(SYNOPSIS)72 112.8 Q/F2 10/Times-Bold@0 SF(#include <db)108 124.8 Q(.h>)-.4 E +(#include <db_lock.h>)108 136.8 Q(int)108 160.8 Q(txn_cr)108 172.8 Q +(eate\(const char *path, mode_t mode, u_int maxtxns, u_int \215ags\);)-.18 E +(TXNMGR *)108 196.8 Q(txn_open\(const char *path, DBT *logp, LOCK_T)108 208.8 Q +(ABLE_T *lockp,)-.9 E(int \(*r)158 220.8 Q(eco)-.18 E -.1(ve)-.1 G +(r\)\(DBT *lsn, DBT *log_entry).1 E 2.5(,i)-.55 G(nt isundo\)\);)340.11 220.8 Q +(TXN *)108 244.8 Q(txn_begin\(TXNMGR *txnp\);)108 256.8 Q(int)108 280.8 Q +(txn_commit\(TXN *tid\);)108 292.8 Q(int)108 316.8 Q(txn_pr)108 328.8 Q(epar) +-.18 E(e\(TXN *tid\);)-.18 E(int)108 352.8 Q(txn_abort\(TXN *tid\);)108 364.8 Q +(int)108 388.8 Q(txn_close\(TXNMGR *txnp\);)108 400.8 Q(int)108 424.8 Q +(txn_unlink\(const char *path, int f)108 436.8 Q(or)-.25 E(ce\);)-.18 E F1 +(DESCRIPTION)72 453.6 Q F0(speci\214c details of the transaction support.)108 +465.6 Q/F3 10/Times-Italic@0 SF(Db_txn)108 482.4 Q F0 .034 +(is the library interf)2.534 F .034(ace that pro)-.1 F .034 +(vides transaction semantics.)-.15 F .034(Full transaction support is pro)5.034 +F .034(vided by a)-.15 F .722(collection of modules that pro)108 494.4 R .723 +(vide well de\214ned interf)-.15 F .723 +(aces to the services required for transaction process-)-.1 F 3.488(ing. These) +108 506.4 R .988(services are reco)3.488 F -.15(ve)-.15 G .988(ry \(see).15 F +F3(db_lo)3.488 E(g)-.1 E F0 .988(\(3\)\), concurrenc).22 F 3.488(yc)-.15 G .988 +(ontrol \(see)371.864 506.4 R F3(db_loc)3.487 E(k)-.2 E F0 .987 +(\(3\)\), and the manage-).67 F 2.201(ment of shared data \(see)108 518.4 R F3 +(db_mpool)4.701 E F0 4.701(\(3\)\). T).51 F 2.202 +(ransaction semantics can be applied to the access methods)-.35 F(described in) +108 530.4 Q F3(db)2.5 E F0(\(3\) through function call parameters.).23 E .629(\ +The model intended for transactional use \(and that is used by the access meth\ +ods\) is that write-ahead log-)108 547.2 R .047(ging is pro)108 559.2 R .047 +(vided by)-.15 F F3(db_lo)2.547 E(g)-.1 E F0 .047 +(\(3\) to record both before- and after).22 F .048(-image logging.)-.2 F .048 +(Locking follo)5.048 F .048(ws a tw)-.25 F(o-phase)-.1 E +(protocol and is implemented by)108 571.2 Q F3(db_loc)2.5 E(k)-.2 E F0(\(3\).) +.67 E .549(The function)108 588 R F3(txn_cr)3.049 E(eate)-.37 E F0 .549 +(creates and initializes the transaction re)3.049 F .548 +(gion identi\214ed by the)-.15 F F3(path)3.048 E F0(directory)3.048 E 5.548(.T) +-.65 G(his)528.33 588 Q .572(directory must already e)108 600 R .572(xist when) +-.15 F F3(txn_cr)3.072 E(eate)-.37 E F0 .572(is called.)3.072 F .572 +(If the transaction re)5.572 F .572(gion identi\214ed by)-.15 F F3(path)3.072 E +F0(already)3.072 E -.15(ex)108 612 S 1.78(ists, then).15 F F3(txn_cr)4.28 E +(eate)-.37 E F0 1.78(returns success without further action.)4.28 F 1.78 +(The \214les associated with the transaction)6.78 F(re)108 624 Q .293 +(gion are created in the directory speci\214ed by)-.15 F F3(path)2.793 E F0 +5.293(.\().28 G .293(The group of the created \214les is based on the system) +327.657 624 R .781(and directory def)108 636 R .781 +(aults, and is not further speci\214ed by)-.1 F F3(txn_cr)3.281 E(eate)-.37 E +F0 3.281(.\) All).18 F .78(\214les created by)3.28 F F3(txn_cr)3.28 E(eate)-.37 +E F0 .78(are cre-)3.28 F .048(ated with mode)108 648 R F3(mode)2.548 E F0 .049 +(\(as described in)2.548 F F3 -.15(ch)2.549 G(mod).15 E F0 .049 +(\(2\)\) and modi\214ed by the process' umask v).77 F .049(alue \(see)-.25 F F3 +(umask)2.549 E F0(\(2\)\).).67 E(An)108 660 Q 2.5(yn)-.15 G(ecessary)132.57 660 +Q 2.5(,a)-.65 G(ssociated log and lock re)175.23 660 Q +(gions are created as well \(see)-.15 E F3(db_lo)2.5 E(g)-.1 E F0(\(3\) and).22 +E F3(db_loc)2.5 E(k)-.2 E F0(\(3\)\).).67 E(The)108 676.8 Q F3(maxtxns)4.191 E +F0(ar)4.191 E 1.691(gument speci\214es the maximum number of simultaneous tran\ +sactions that are supported.)-.18 F .229 +(This bounds the size of backing \214les and is used to deri)108 688.8 R .529 +-.15(ve l)-.25 H .229(imits for the size of the lock re).15 F .229 +(gion and log\214les.)-.15 F(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib) +132.57 732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(1)535 732 Q EP +%%Page: 2 26 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_TXN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_TXN\(3\))340.17 48 R(When there are more than)108 84 +Q/F1 10/Times-Italic@0 SF(maxtxns)2.5 E F0(concurrent transactions, calls to) +2.5 E F1(txn_be)2.5 E(gin)-.4 E F0(may f)2.5 E(ail.)-.1 E(Def)108 100.8 Q .847 +(ault locking and logging protocols are pro)-.1 F .846 +(vided only if the backing \214les e)-.15 F 3.346(xist. If)-.15 F .846 +(the backing \214les do)3.346 F 1.433(not e)108 112.8 R 1.433(xist, the)-.15 F +F1<8d61>3.933 E(gs)-.1 E F0 1.434 +(parameter must indicate both a logging mode and locking mode speci\214ed by) +3.933 F F1(or)3.934 E F0('ing).73 E(together at most one \215ag from each of t\ +he TXN_LOCK and TXN_LOG classes as follo)108 124.8 Q(ws:)-.25 E(TXN_LOCK_2PL) +108 141.6 Q(Use tw)133 153.6 Q(o-phase locking.)-.1 E(TXN_LOCK_OPTIMISTIC)108 +170.4 Q(Use optimistic locking \(not currently implemented\).)133 182.4 Q +(TXN_LOG_REDO)108 199.2 Q(Pro)133 211.2 Q +(vide redo-only logging \(not currently implemented\).)-.15 E(TXN_LOG_UNDO)108 +228 Q(Pro)133 240 Q(vide undo-only logging \(not currently implemented\).)-.15 +E(TXN_LOG_UNDOREDO)108 256.8 Q(Pro)133 268.8 Q +(vide undo/redo write-ahead logging.)-.15 E(The function)108 285.6 Q F1(txn_cr) +2.5 E(eate)-.37 E F0(returns -1 on f)2.5 E(ailure, setting)-.1 E F1(errno)2.5 E +F0 2.5(,a).18 G(nd 0 on success.)351.83 285.6 Q 1.892(The function)108 302.4 R +F1(txn_open)4.392 E F0 1.892(returns a pointer to the transaction re)4.392 F +1.892(gion identi\214ed by)-.15 F F1(path)4.392 E F0 4.392(,w).28 G 1.892 +(hich must ha)476.016 302.4 R -.15(ve)-.2 G .239 +(already been created by a call to)108 314.4 R F1(txn_cr)2.739 E(eate)-.37 E F0 +5.239(.T).18 G .239(he process must ha)296.88 314.4 R .539 -.15(ve p)-.2 H .239 +(ermission to read and write \214les with).15 F -.25(ow)108 326.4 S .327 +(ners, groups and permissions as described for).25 F F1(txn_cr)2.826 E(eate) +-.37 E F0 5.326(.T).18 G(he)362.624 326.4 Q F1(txn_open)2.826 E F0 .326 +(function returns NULL on f)2.826 F(ail-)-.1 E(ure, setting)108 338.4 Q F1 +(errno)2.5 E F0(.).18 E(The)108 355.2 Q F1 -.37(re)3.181 G(co).37 E(ver)-.1 E +F0(ar)3.181 E .681(gument speci\214es a function that is called by)-.18 F F1 +(txn_abort)3.181 E F0 .682(during transaction abort.)3.182 F .682(This func-) +5.682 F(tion tak)108 367.2 Q(es three ar)-.1 E(guments:)-.18 E 10.83(lsn A)108 +384 R(log sequence number \(LSN\).)2.5 E(log_entry)108 400.8 Q 2.5(Al)133 412.8 +S(og record.)145.5 412.8 Q(isundo)108 429.6 Q(An undo \215ag set to 0 if the o\ +peration is a redo and set to 1 if the operation an undo.)133 441.6 Q 1.498 +(As discussed in the)108 458.4 R F1(db_lo)3.998 E 3.998(g\()-.1 G(3\))228.44 +458.4 Q F0 1.497(manual page, the application is responsible for pro)3.997 F +1.497(viding an)-.15 F 3.997(yn)-.15 G(ecessary)506.13 458.4 Q .486 +(structure to the log record.)108 470.4 R -.15(Fo)5.486 G 2.986(re).15 G .487 +(xample, the application must understand what part of the log record is an) +242.256 470.4 R(operation code, what part is redo information, and what part i\ +s undo information.)108 482.4 Q(The)108 499.2 Q F1(txn_be)2.785 E(gin)-.4 E F0 +.285(function creates a ne)2.785 F 2.784(wt)-.25 G .284 +(ransaction in the designated transaction manager)264.018 499.2 R 2.784(,r)-.4 +G .284(eturning a pointer)468.332 499.2 R +(to a TXN that uniquely identi\214es it.)108 511.2 Q(The)108 528 Q F1 +(txn_commit)2.615 E F0 .115(function ends the transaction speci\214ed by the) +2.615 F F1(tid)2.615 E F0(ar)2.615 E 2.615(gument. An)-.18 F 2.615(yl)-.15 G +.115(ocks held by the transac-)440.12 528 R(tion are released.)108 540 Q +(If logging is enabled, a commit log record is written and \215ushed to disk.)5 +E(The)108 556.8 Q F1(txn_abort)2.889 E F0 .389 +(function causes an abnormal termination of the transaction.)2.889 F .388 +(If logging is enabled, the log is)5.389 F 2.312(played backw)108 568.8 R 2.312 +(ards and an)-.1 F 4.812(yr)-.15 G(eco)228.628 568.8 Q -.15(ve)-.15 G 2.312 +(ry operations are initiated through the).15 F F1 -.37(re)4.813 G(co).37 E(ver) +-.1 E F0 2.313(function speci\214ed to)4.813 F F1(txn_open)108 580.8 Q F0 5(.A) +.24 G(fter reco)159.62 580.8 Q -.15(ve)-.15 G +(ry is completed, all locks held by the transaction are released.).15 E(The)108 +597.6 Q F1(txn_close)3.824 E F0 1.323 +(function detaches a process from the transaction en)3.823 F 1.323 +(vironment speci\214ed by the TXNMGR)-.4 F(pointer)108 609.6 Q 5.261(.A)-.55 G +.261(ll mapped re)150.761 609.6 R .261(gions are unmapped and an)-.15 F 2.761 +(ya)-.15 G .262(llocated resources are freed.)323.638 609.6 R(An)5.262 E 2.762 +(yu)-.15 G .262(ncommitted trans-)466.688 609.6 R(actions are aborted.)108 +621.6 Q .69(The function)108 638.4 R F1(txn_unlink)3.19 E F0(destro)3.19 E .69 +(ys the transaction re)-.1 F .69(gion identi\214ed by the directory)-.15 F F1 +(path)3.19 E F0 3.19(,r).28 G(emo)472.1 638.4 Q .69(ving all \214les)-.15 F +1.78(used to implement the transaction re)108 650.4 R 4.28(gion. \(The)-.15 F +(directory)4.28 E F1(path)4.28 E F0 1.78(is not remo)4.28 F -.15(ve)-.15 G 4.28 +(d.\) If).15 F 1.78(there are processes)4.28 F .553(which ha)108 662.4 R .853 +-.15(ve c)-.2 H(alled).15 E F1(txn_open)3.052 E F0 .552(without calling)3.052 F +F1(txn_close)3.052 E F0 .552 +(\(i.e., there are processes currently using the transac-)3.052 F .135(tion re) +108 674.4 R(gion\),)-.15 E F1(txn_unlink)2.635 E F0 .135(will f)2.635 F .135 +(ail without further action, unless the force \215ag is set, in which case)-.1 +F F1(txn_unlink)2.636 E F0 1.67(will attempt to delete the transaction re)108 +686.4 R 1.67(gion \214les re)-.15 F -.05(ga)-.15 G 1.67(rdless of an).05 F 4.17 +(yp)-.15 G 1.67(rocesses still using the transaction)397.22 686.4 R(4.4 Berk)72 +732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315(ution August)-.2 F +(1, 1995)2.5 E(2)535 732 Q EP +%%Page: 3 27 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_TXN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_TXN\(3\))340.17 48 R(re)108 84 Q 2.996(gion. An)-.15 +F 2.996(ya)-.15 G .496(ccesses to a remo)165.902 84 R -.15(ve)-.15 G 2.997(dt) +.15 G .497(ransaction re)257.007 84 R .497(gion will lik)-.15 F .497 +(ely result in une)-.1 F .497(xpected beha)-.15 F(vior)-.2 E 5.497(.T)-.55 G +.497(he func-)506.463 84 R(tion)108 96 Q/F1 10/Times-Italic@0 SF(txn_unlink)2.5 +E F0(returns -1 on f)2.5 E(ailure, setting)-.1 E F1(errno)2.5 E F0 2.5(,a).18 G +(nd 0 on success.)316.39 96 Q .51(In the case of catastrophic or system f)108 +112.8 R .51(ailure, it is possible to clean up a transaction re)-.1 F .51 +(gion by remo)-.15 F .51(ving all)-.15 F .34 +(of the \214les in the directory speci\214ed to the)108 124.8 R F1(txn_cr)2.841 +E(eate)-.37 E F0 .341(function, as transaction re)2.841 F .341 +(gion \214les are ne)-.15 F -.15(ve)-.25 G 2.841(rc).15 G(reated)515.57 124.8 Q +(in an)108 136.8 Q 2.5(yd)-.15 G(irectory other than the one speci\214ed to) +140.07 136.8 Q F1(txn_cr)2.5 E(eate)-.37 E F0(.).18 E(The)108 153.6 Q F1 +(txn_pr)3.42 E(epar)-.37 E(e)-.37 E F0 .92(function initiates the be)3.42 F +.919(ginning of a tw)-.15 F 3.419(op)-.1 G .919(hase commit.)352.206 153.6 R +.919(In a distrib)5.919 F .919(uted transaction, the)-.2 F .185 +(prepare directi)108 165.6 R .485 -.15(ve s)-.25 H .185 +(hould be issued to all participating transaction managers.).15 F .185 +(Each manager must tak)5.185 F 2.685(ew)-.1 G(hat-)524.45 165.6 Q -2.15 -.25 +(ev e)108 177.6 T 2.5(ra).25 G +(ction is necessary to guarantee that a future call to)131.75 177.6 Q F1 +(txn_commit)2.5 E F0(on the speci\214ed)2.5 E F1(tid)2.5 E F0(will succeed.)2.5 +E/F2 9/Times-Bold@0 SF(SYSTEM INTEGRA)72 194.4 Q(TION)-.855 E F0 .28 +(This model can be applied to data bases other than the pro)108 206.4 R .279 +(vided access methods.)-.15 F -.15(Fo)5.279 G 2.779(re).15 G .279 +(xample, consider an)459.182 206.4 R .15(application that pro)108 218.4 R .15 +(vides transaction semantics to data stored in re)-.15 F .15 +(gular \214les accessed using the)-.15 F F1 -.37(re)2.65 G(ad).37 E F0 .15 +(\(2\) and).77 F F1(write)108 230.4 Q F0 .708(\(2\) system calls.).18 F .707 +(The operations for which transaction protection is desired are brack)5.708 F +.707(eted by calls to)-.1 F F1(txn_be)108 242.4 Q(gin)-.4 E F0(and)2.5 E F1 +(txn_commit)2.5 E F0(.).68 E .606 +(Before data are referenced, a call is made to the lock manager)108 259.2 R(,) +-.4 E F1(db_loc)3.106 E(k)-.2 E F0 3.106(,f).67 G .606 +(or a lock of the appropriate type)408.064 259.2 R .719 +(\(e.g. read\) on the object being lock)108 271.2 R 3.218(ed. The)-.1 F .718 +(object might be a page in the \214le, a byte, a range of bytes, or)3.218 F +.572(some k)108 283.2 R -.15(ey)-.1 G 5.572(.B)-.5 G .573 +(efore a write is performed, the application mak)160.464 283.2 R .573 +(es a call to the log manager)-.1 F(,)-.4 E F1(db_lo)3.073 E(g)-.1 E F0 3.073 +(,t).22 G 3.073(or)506.387 283.2 S(ecord)517.79 283.2 Q .522 +(enough information to redo the operation in case of f)108 295.2 R .522 +(ailure after commit and to undo the operation in case)-.1 F .609(of abort.)108 +307.2 R .609 +(After the log message is written, the write system calls are issued.)5.609 F +.61(After all requests are issued,)5.61 F .518(the application calls)108 319.2 +R F1(txn_commit)3.017 E F0 5.517(.W).68 G(hen)256.84 319.2 Q F1(txn_commit) +3.017 E F0 .517(returns, the caller is guaranteed that all necessary log)3.017 +F(writes ha)108 331.2 Q .3 -.15(ve b)-.2 H(een written to disk.).15 E 1.081 +(At an)108 348 R 3.581(yt)-.15 G 1.081(ime, the application may call)142.232 +348 R F1(txn_abort)3.581 E F0 3.581(,w).68 G 1.081 +(hich will result in the appropriate calls to the)318.828 348 R F1 -.37(re) +3.582 G(co).37 E(ver)-.1 E F0 .278(routine to restore the `)108 360 R +(`database')-.74 E 2.778('t)-.74 G 2.778(oac)246.48 360 S .278 +(onsistent pre-transaction state.)265.916 360 R .277(\(The reco)5.277 F -.15 +(ve)-.15 G 2.777(rr).15 G .277(outine must be able to)450.562 360 R +(either reapply or undo the update depending on the conte)108 372 Q +(xt, for each dif)-.15 E(ferent type of log record.\))-.25 E .746 +(If the application should crash, the reco)108 388.8 R -.15(ve)-.15 G .746 +(ry process uses the).15 F F1(db_lo)3.246 E(g)-.1 E F0(interf)3.246 E .746 +(ace to read the log and call the)-.1 F F1 -.37(re)108 400.8 S(co).37 E(ver)-.1 +E F0(routine to restore the database to a consistent state.)2.5 E(The)108 417.6 +Q F1(txn_pr)3.098 E(epar)-.37 E(e)-.37 E F0 .598(function pro)3.098 F .598 +(vides the core functionality to implement distrib)-.15 F .597 +(uted transactions, b)-.2 F .597(ut it does)-.2 F .36 +(not actually manage the noti\214cation of distrib)108 429.6 R .36 +(uted transaction managers.)-.2 F .36(The caller is responsible for issu-)5.36 +F(ing)108 441.6 Q F1(txn_pr)2.82 E(epar)-.37 E(e)-.37 E F0 .32 +(calls to all sites participating in the transaction.)2.82 F .319 +(If all responses are positi)5.319 F -.15(ve)-.25 G 2.819(,t).15 G .319 +(he caller can)488.832 441.6 R .822(issue a)108 453.6 R F1(txn_commit)3.322 E +F0 5.822(.I).68 G 3.322(fa)198.076 453.6 S 1.122 -.15(ny o)209.168 453.6 T +3.322(ft).15 G .822(he responses are ne)236.772 453.6 R -.05(ga)-.15 G(ti).05 E +-.15(ve)-.25 G 3.322(,t).15 G .823(he caller should issue a)349.15 453.6 R F1 +(txn_abort)3.323 E F0 5.823(.I).68 G 3.323(ng)499.747 453.6 S(eneral,)513.07 +453.6 Q(the)108 465.6 Q F1(txn_pr)2.5 E(epar)-.37 E(e)-.37 E F0 +(call requires that the transaction log be \215ushed to disk.)2.5 E .821 +(The structure of the transaction support allo)108 482.4 R .821 +(ws application designers to trade of)-.25 F 3.32(fp)-.25 G .82 +(erformance and protec-)445.07 482.4 R 3.948(tion. Since)108 494.4 R 1.448 +(DB manages man)3.948 F 3.948(ys)-.15 G 1.448(tructures in shared memory)245.36 +494.4 R 3.948(,i)-.65 G 1.448(ts information is subject to corruption by) +367.982 494.4 R 1.306(applications when the library is link)108 506.4 R 1.306 +(ed directly with the application.)-.1 F -.15(Fo)6.306 G 3.805(rt).15 G 1.305 +(his reason, DB is designed to)416.815 506.4 R(allo)108 518.4 Q 3.367(wc)-.25 G +.867(ompilation into a separate serv)137.777 518.4 R .868 +(er process that may be accessed via a sock)-.15 F .868(et interf)-.1 F 3.368 +(ace. In)-.1 F .868(this w)3.368 F(ay)-.1 E(DB')108 530.4 Q 2.828(sd)-.55 G +.328(ata structures are protected from application code, b)136.388 530.4 R .328 +(ut communication o)-.2 F -.15(ve)-.15 G .327(rhead is increased.).15 F(When) +5.327 E(applications are trusted, DB may be compiled directly into the applica\ +tion for increased performance.)108 542.4 Q F2(ERR)72 559.2 Q(ORS)-.27 E F0 +(The)108 571.2 Q F1(txn_cr)4.113 E(eate)-.37 E F0 1.613(function may f)4.113 F +1.613(ail and set)-.1 F F1(errno)4.113 E F0 1.614(for an)4.113 F 4.114(yo)-.15 +G 4.114(ft)349.022 571.2 S 1.614 +(he errors speci\214ed for the library functions)359.246 571.2 R F1(open)108 +583.2 Q F0(\(2\),).24 E F1(write)2.5 E F0(\(2\),).18 E F1(malloc)2.5 E F0 +(\(3\),).31 E F1(loc)2.5 E(k_cr)-.2 E(eate)-.37 E F0(\(3\), and).18 E F1(lo)2.5 +E(g_cr)-.1 E(eate)-.37 E F0(\(3\).).18 E(The)108 600 Q F1(txn_open)2.509 E F0 +.009(function may f)2.509 F .009(ail and set)-.1 F F1(errno)2.508 E F0 .008 +(to an)2.508 F 2.508(yo)-.15 G 2.508(ft)323.916 600 S .008 +(he errors speci\214ed for the library functions)332.534 600 R F1(open)2.508 E +F0(\(2\),).24 E F1(write)108 612 Q F0(\(2\),).18 E F1(malloc)2.5 E F0(\(3\),) +.31 E F1(loc)2.5 E(k_open)-.2 E F0(\(3\), and).24 E F1(lo)2.5 E(g_open)-.1 E F0 +(\(3\).).24 E(The)108 628.8 Q F1(txn_be)2.671 E(gin)-.4 E F0 .171 +(function may f)2.671 F .171(ail and set)-.1 F F1(errno)2.671 E F0 .171 +(to ENOSPC indicating that the maximum number of concur)2.671 F(-)-.2 E +(rent transactions has been reached.)108 640.8 Q(The)108 657.6 Q F1(txn_commit) +2.5 E F0(function may f)2.5 E(ail and set)-.1 E F1(errno)2.5 E F0(to EINV)2.5 E +(AL indicating that the transaction w)-1.35 E(as aborted.)-.1 E(The)108 674.4 Q +F1(txn_close)4.582 E F0 2.082(function may f)4.582 F 2.081(ail and set)-.1 F F1 +(errno)4.581 E F0 2.081(to an)4.581 F 4.581(yo)-.15 G 4.581(ft)345.753 674.4 S +2.081(he errors speci\214ed for the library functions)356.444 674.4 R F1(close) +108 686.4 Q F0(\(2\),).18 E F1 -.37(re)2.5 G(ad).37 E F0(\(2\),).77 E F1(write) +2.5 E F0(\(2\),).18 E F1(fr)2.5 E(ee)-.37 E F0(\(3\),).18 E F1(fsync)2.5 E F0 +(\(2\),).31 E F1(loc)2.5 E(k_close)-.2 E F0(\(3\) or).18 E F1(lo)2.5 E(g_close) +-.1 E F0(\(3\).).18 E(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 +732 Q 99.315(ution August)-.2 F(1, 1995)2.5 E(3)535 732 Q EP +%%Page: 4 28 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 124.57(DB_TXN\(3\) BSD)72 48 R(Programmer')2.5 E 2.5 +(sM)-.55 G 124.57(anual DB_TXN\(3\))340.17 48 R(The)108 84 Q/F1 10 +/Times-Italic@0 SF(txn_unlink)4.319 E F0 1.819(function may f)4.319 F 1.819 +(ail and set)-.1 F F1(errno)4.319 E F0 1.819(to an)4.319 F 4.319(yo)-.15 G 4.32 +(ft)347.58 84 S 1.82(he errors speci\214ed for the library functions)358.01 84 +R F1(unlink)108 96 Q F0(\(2\),).67 E F1(loc)2.5 E(k_unlink)-.2 E F0(\(3\), and) +.67 E F1(lo)2.5 E(g_unlink)-.1 E F0(\(3\), or the follo).67 E(wing:)-.25 E([EB) +108 112.8 Q(USY])-.1 E(The transaction re)133 124.8 Q(gion w)-.15 E +(as in use and the force \215ag w)-.1 E(as not set.)-.1 E/F2 9/Times-Bold@0 SF +(SEE ALSO)72 141.6 Q F1(db_btr)108 153.6 Q(ee)-.37 E F0(\(3\),).18 E F1 +(db_hash)2.5 E F0(\(3\),).28 E F1(db_loc)2.5 E(k)-.2 E F0(\(3\),).67 E F1 +(db_lo)2.5 E(g)-.1 E F0(\(3\),).22 E F1(db_mpool)2.5 E F0(\(3\),).51 E F1 +(db_open)2.5 E F0(\(3\),).24 E F1(db_r)2.5 E(ecno)-.37 E F0(\(3\)).18 E F1 .904 +(LIBTP: P)108 177.6 R(ortable)-.8 E 3.404(,M)-.1 G .904(odular T)189.738 177.6 +R -.15(ra)-.55 G .904(nsactions for UNIX).15 F F0 3.404(,M).94 G(ar)328.884 +177.6 Q .904(go Seltzer)-.18 F 3.403(,M)-.4 G .903 +(ichael Olson, USENIX proceedings,)392.041 177.6 R -.4(Wi)108 189.6 S +(nter 1992.).4 E F2 -.09(BU)72 206.4 S(GS).09 E F0(The)108 218.4 Q F1(maxtxns) +2.792 E F0 .292(parameter is a kluge, and should be deleted in f)2.792 F -.2 +(avo)-.1 G 2.793(ro).2 G 2.793(fd)378.448 218.4 S .293(ynamically e)389.571 +218.4 R .293(xpanding the transaction)-.15 F(re)108 230.4 Q(gion.)-.15 E +(4.4 Berk)72 732 Q(ele)-.1 E 2.5(yD)-.15 G(istrib)132.57 732 Q 99.315 +(ution August)-.2 F(1, 1995)2.5 E(4)535 732 Q EP +%%Trailer +end +%%EOF diff --git a/src/util/db2/man/db_btree.3 b/src/util/db2/man/db_btree.3 new file mode 100644 index 0000000000..25e289f3cd --- /dev/null +++ b/src/util/db2/man/db_btree.3 @@ -0,0 +1,246 @@ +.\" Copyright (c) 1990, 1993, 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_btree.3 8.11 (Berkeley) 8/1/95 +.\" +.TH DB_BTREE 3 "August 1, 1995" +.UC 7 +.SH NAME +db_btree \- btree database access method +.SH DESCRIPTION +.so db.so +.GN +specific details of the btree access method. +.PP +The btree data structure is a sorted, balanced tree structure storing +associated key/data pairs. +Searches, insertions, and deletions in the btree will all complete in +O lg base N where base is the average fill factor. +Often, inserting ordered data into btrees results in a low fill factor. +This implementation has been modified to make ordered insertion the best +case, resulting in a much better than normal page fill factor. +.SH "ACCESS METHOD SPECIFIC INFORMATION" +The btree access method specific data structure provided to +.I db_open +is typedef'd and named BTREEINFO. +A BTREEINFO structure has at least the following fields, +which may be initialized before calling +.IR db_open : +.TP 5 +u_int cachesize; +A suggested maximum size (in bytes) of the memory cache. +This value is +.B only +advisory, and the access method will allocate more memory rather than fail. +Since every search examines the root page of the tree, caching the most +recently used pages substantially improves access time. +In addition, physical writes are delayed as long as possible, so a moderate +cache can reduce the number of I/O operations significantly. +Obviously, using a cache increases (but only increases) the likelihood of +corruption or lost data if the system crashes while a tree is being modified. +If +.I cachesize +is 0 (no size is specified) a default cache is used. +.TP 5 +int (*compare)(const DBT *, const DBT *); +Compare is the key comparison function. +It must return an integer less than, equal to, or greater than zero if the +first key argument is considered to be respectively less than, equal to, +or greater than the second key argument. +The same comparison function must be used on a given tree every time it +is opened. +If +.I compare +is NULL (no comparison function is specified), the keys are compared +lexically, with shorter keys considered less than longer keys. +.TP 5 +u_long flags; +The flag value is specified by +.IR or 'ing +any of the following values: +.RS +.TP 5 +R_DUP +Permit duplicate keys in the tree, i.e. permit insertion if the key to be +inserted already exists in the tree. +The default behavior, as described in +.IR db_open (3), +is to overwrite a matching key when inserting a new key or to fail if +the R_NOOVERWRITE flag is specified. +The R_DUP flag is overridden by the R_NOOVERWRITE flag, and if the +R_NOOVERWRITE flag is specified, attempts to insert duplicate keys into +the tree will fail. +.IP +If the database contains duplicate keys, the order of retrieval of +key/data pairs is undefined if the +.I get +function is used, however, +.I seq +function calls with the R_CURSOR flag set will always return the logical +``first'' of any group of duplicate keys. +.RE +.TP 5 +int lorder; +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +.TP 5 +int maxkeypage; +The maximum number of keys which will be stored on any single page. +This functionality is not currently implemented. +.\" The maximum number of keys which will be stored on any single page. +.\" Because of the way the btree data structure works, +.\" .I maxkeypage +.\" must always be greater than or equal to 2. +.\" If +.\" .I maxkeypage +.\" is 0 (no maximum number of keys is specified) the page fill factor is +.\" made as large as possible (which is almost invariably what is wanted). +.TP 5 +int minkeypage; +The minimum number of keys which will be stored on any single page. +This value is used to determine which keys will be stored on overflow +pages, i.e. if a key or data item is longer than the pagesize divided +by the minkeypage value, it will be stored on overflow pages instead +of in the page itself. +If +.I minkeypage +is 0 (no minimum number of keys is specified) a value of 2 is used. +.TP 5 +size_t (*prefix)(const DBT *, const DBT *); +Prefix is the prefix comparison function. +If specified, this function must return the number of bytes of the second key +argument which are necessary to determine that it is greater than the first +key argument. +If the keys are equal, the key length should be returned. +Note, the usefulness of this function is very data dependent, but, in some +data sets can produce significantly reduced tree sizes and search times. +If +.I prefix +is NULL (no prefix function is specified), +.B and +no comparison function is specified, a default lexical comparison function +is used. +If +.I prefix +is NULL and a comparison function is specified, no prefix comparison is +done. +.TP 5 +u_int psize; +Page size is the size (in bytes) of the pages used for nodes in the tree. +The minimum page size is 512 bytes and the maximum page size is 64K. +If +.I psize +is 0 (no page size is specified) a page size is chosen based on the +underlying file system I/O block size. +.PP +If the file already exists (and the O_TRUNC flag is not specified), the +values specified for the parameters flags, lorder and psize are ignored +in favor of the values used when the tree was created. +.SH "DB OPERATIONS" +The functions returned by +.I db_open +for the btree access method are as described in +.IR db_open (3), +with the following exceptions and additions: +.TP 5 +type +The type is DB_BTREE. +.TP 5 +del +Space freed up by deleting key/data pairs from the tree is never reclaimed, +although it is reused where possible. +This means that the btree storage structure is grow-only. +The only solutions are to avoid excessive deletions, or to create a fresh +tree periodically from a scan of an existing one. +.TP 5 +put +The +.I put +function takes the following additional flags: +.RS +.TP 5 +R_SETCURSOR +Store the key/data pair, setting or initializing the position of the +cursor to reference it. +.RE +.TP 5 +seq +Forward sequential scans of a tree are from the least key to the greatest. +.IP +The returned key for the +.I seq +function is not necessarily an exact match for the specified key in +the btree access method. +The returned key is the smallest key greater than or equal to the +specified key, permitting partial key matches and range searches. +.IP +The +.I seq +function takes the following additional flags: +.RS +.TP 5 +R_LAST +The last key/data pair of the database is returned, and the cursor +is set or initialized to reference it. +.TP 5 +R_PREV +Retrieve the key/data pair immediately before the cursor. +If the cursor is not yet set, this is the same as the R_LAST flag. +.RE +.SH ERRORS +The +.I btree +access method functions may fail and set +.I errno +for any of the errors specified for the library function +.IR db_open (3). +.SH "SEE ALSO" +.IR db_hash (3), +.IR db_lock (3), +.IR db_log (3), +.IR db_mpool (3), +.IR db_open (3), +.IR db_recno (3), +.IR db_txn (3) +.sp +.IR "The Ubiquitous B-tree" , +Douglas Comer, ACM Comput. Surv. 11, 2 (June 1979), 121-138. +.sp +.IR "Prefix B-trees" , +Bayer and Unterauer, ACM Transactions on Database Systems, Vol. 2, 1 +(March 1977), 11-26. +.sp +.IR "The Art of Computer Programming Vol. 3: Sorting and Searching" , +D.E. Knuth, 1968, pp 471-480. diff --git a/src/util/db2/man/db_hash.3 b/src/util/db2/man/db_hash.3 new file mode 100644 index 0000000000..adb88fca7c --- /dev/null +++ b/src/util/db2/man/db_hash.3 @@ -0,0 +1,138 @@ +.\" Copyright (c) 1990, 1993, 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_hash.3 8.13 (Berkeley) 8/1/95 +.\" +.TH DB_HASH 3 "August 1, 1995" +.UC 7 +.SH NAME +db_hash \- hash database access method +.SH DESCRIPTION +.so db.so +.GN +specific details of the hashing access method. +.PP +The hash data structure is an extensible, dynamic hashing scheme. +Backward compatible interfaces to the functions described in +.IR dbm (3), +and +.IR ndbm (3) +are provided, however these interfaces are not compatible with +previous file formats. +.SH "ACCESS METHOD SPECIFIC INFORMATION" +The hash access method specific data structure provided to +.I db_open +is typedef'd and named HASHINFO. +A HASHINFO structure has at least the following fields, +which may be initialized before calling +.IR db_open : +.TP 5 +u_int bsize; +.I Bsize +defines the hash table bucket size, and is, by default, 256 bytes. +It may be preferable to increase the page size for disk-resident tables +and tables with large data items. +.TP 5 +u_int cachesize; +A suggested maximum size, in bytes, of the memory cache. +This value is +.B only +advisory, and the access method will allocate more memory rather +than fail. +.TP 5 +u_int ffactor; +.I Ffactor +indicates a desired density within the hash table. +It is an approximation of the number of keys allowed to accumulate in any +one bucket, determining when the hash table grows or shrinks. +The default value is 8. +.TP 5 +u_int32_t (*hash)(const void *, size_t); +.I Hash +is a user defined hash function. +Since no hash function performs equally well on all possible data, the +user may find that the built-in hash function does poorly on a particular +data set. +User specified hash functions must take two arguments (a pointer to a byte +string and a length) and return a 32-bit quantity to be used as the hash +value. +.IP +If a hash function is specified, +.I hash_open +will attempt to determine if the hash function specified is the same as +the one with which the database was created, and will fail if it is not. +.TP 5 +int lorder; +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +If the file already exists, the specified value is ignored and the +value specified when the tree was created is used. +.TP 5 +u_int nelem; +.I Nelem +is an estimate of the final size of the hash table. +If not set or set too low, hash tables will expand gracefully as keys +are entered, although a slight performance degradation may be noticed. +The default value is 1. +.PP +If the file already exists (and the O_TRUNC flag is not specified), the +values specified for the parameters bsize, ffactor, lorder and nelem are +ignored and the values specified when the tree was created are used. +.SH "DB OPERATIONS" +The functions returned by +.I db_open +for the hash access method are as described in +.IR db_open (3). +.SH ERRORS +The +.I hash +access method functions may fail and set +.I errno +for any of the errors specified for the library function +.IR db_open (3). +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_lock (3), +.IR db_log (3), +.IR db_mpool (3), +.IR db_open (3), +.IR db_recno (3), +.IR db_txn (3) +.sp +.IR "Dynamic Hash Tables" , +Per-Ake Larson, Communications of the ACM, April 1988. +.sp +.IR "A New Hash Package for UNIX" , +Margo Seltzer, USENIX Proceedings, Winter 1991. diff --git a/src/util/db2/man/db_lock.3 b/src/util/db2/man/db_lock.3 new file mode 100644 index 0000000000..b18a38c60c --- /dev/null +++ b/src/util/db2/man/db_lock.3 @@ -0,0 +1,462 @@ +.\" Copyright (c) 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_lock.3 8.14 (Berkeley) 8/1/95 +.\" +.TH DB_LOCK 3 "August 1, 1995" +.UC 7 +.SH NAME +db_lock \- general purpose lock manager +.SH SYNOPSIS +.nf +.ft B +#include <db_lock.h> + +int +lock_create(const char *path, mode_t mode, +.ti +5 +int lock_modes, const int8_t conflicts[][], u_int maxlocks); + +LOCK_TABLE_T * +lock_open(const char *path); + +int +lock_vec(LOCK_TABLE_T *lt, DBT *locker, struct timespec *timeout, +.ti +5 +LOCK_REQ_T list[], int nlist, LOCK_REQ_T **elistp, DBT *conflict); + +int +lock_get(LOCK_TABLE_T *lt, const DBT *locker, +.ti +5 +const DBT *obj, const lock_mode_t lock_mode, LOCK_T **lockp); + +int +lock_put(LOCK_T *lockp); + +int +lock_close(LOCK_TABLE_T *lt); + +int +lock_unlink(const char *path, int force); +.ft R +.fi +.SH DESCRIPTION +.so db.so +.GN +specific details of the locking interface. +.PP +.I Db_lock +is the library interface intended to provide general-purpose locking. +While designed to work with the other DB functions, these functions are +also useful for more general locking purposes. +Locks can be shared between processes. +.PP +.CR "lock table" lock +.PP +The parameter +.I lock_modes +is the number of lock modes to be recognized by the lock table +(including the ``not-granted'' mode). +The parameter +.I conflicts +is an +.I lock_modes +by +.I lock_modes +array. +A non-0 value for: +.sp +.ti +5 +conflicts[requested_mode][held_mode] +.sp +indicates that +.I requested_mode +and +.I held_mode +conflict. +The ``not-granted'' mode must be represented by 0. +.PP +The include file <db_lock.h> declares two commonly used conflict arrays: +.TP 5 +int lock_sx_n; +.br +.ns +.TP 5 +const int8_t lock_sx_c[lock_sx_n][lock_sx_n]; +These variables specify a conflict array +for a simple scheme using shared and exclusive lock modes. +.TP 5 +int lock_g_n; +.br +.ns +.TP 5 +const int8_t lock_g_c[lock_g_n][lock_g_n]; +These variables specify a conflict array that involves various intent +lock modes (e.g. intent shared) that are used for multigranularity locking. +.PP +In addition, +<db_lock.h> defines the following macros that name lock modes for use with +the standard tables above: +.RS +.TP 5 +LOCK_IS +intent shared +.br +.ns +.TP 5 +LOCK_IX +intent exclusive +.br +.ns +.TP 5 +LOCK_NG +not granted (always 0) +.br +.ns +.TP 5 +LOCK_S +shared +.br +.ns +.TP 5 +LOCK_SIX +shared/intent exclusive +.br +.ns +.TP 5 +LOCK_X +exclusive +.RE +.PP +.I Maxlocks +is the maximum number of locks to be held or requested in the table, +and is used by +.I lock_create +to estimate how much space to allocate for various lock-table data +structures. +.PP +.RT lock_create +.PP +.OP "lock table" lock +.PP +The function +.I lock_vec +atomically obtains and releases one or more locks from the designated +table. +The function +.I lock_vec +is intended to support acquisition or trading of multiple locks +under one lock table semaphore, as is needed for lock coupling or +in multigranularity locking for lock escalation. +.PP +If any of the requested locks cannot be acquired +or any of the locks to be released cannot be released, +no locks are acquired and no locks are released, and +.I lock_vec +returns an error. +The function +.I lock_vec +returns 0 on success. +If an error occurs, +.I lock_vec +returns one of the following values. +In addition, if +.I elistp +is not NULL, it is set to point to the LOCK_REQ_T entry which +was being processed when the error occurred. +.TP 5 +LOCK_GET_DEADLOCK +The specified +.I locker +was selected as a victim in order to resolve a deadlock. +In this case, if the +.I conflict +argument is non-NULL, it is set to reference the identity of a +locker holding the lock referenced by +.I elistp +at the time the request was denied. +(This identity resides in static memory and may be overwritten by +subsequent calls to +.IR lock_vec ). +.TP 5 +LOCK_GET_ERROR +An error occurred and the external variable +.I errno +has been set to indicate the error. +.TP 5 +LOCK_GET_NOTHELD +The lock cannot be released, as it was not held by the +.IR locker . +.TP 5 +LOCK_GET_RESOURCE +The lock manager is unable to grant the requested locks because of +limited internal resources. +(Releasing locks may allow future calls to +.I lock_vec +to succeed.) +.TP 5 +LOCK_GET_TIMEOUT +A timeout argument was specified, and the requested locks were not +available soon enough. +In this case, if the +.I conflict +argument is non-NULL, it is set to reference the identity of a +locker holding the lock referenced by +.I elistp +at the time the request was denied. +(This identity resides in static memory and may be overwritten by +subsequent calls to +.IR lock_vec ). +.PP +The +.I locker +argument specified to +.I lock_vec +is a pointer to an untyped byte string which identifies the entity +requesting or releasing the lock. +If +.I locker +is NULL, the calling process' pid is used instead. +.PP +The +.I timeout +argument provided to +.I lock_vec +specifies a maximum interval to wait for the locks to be granted. +If +.I timeout +is NULL, it is ignored, and +.I lock_vec +will not return until all of the locks are acquired or an error has +occurred. +.PP +The +.I list +array provided to +.I lock_vec +is typedef'd in <db_lock.h> as LOCK_REQ_T. +A LOCK_REQ_T structure has at least the following fields, +which must be initialized before calling +.IR lock_vec : +.TP 5 +enum lockop op; +The operation to be performed, which must be set to one of the +following values: +.RS +.TP 5 +LOCK_GET +Get a lock, as defined by the values of +.IR locker , +.I obj +and +.IR lock_mode . +Upon return from +.IR lock_vec , +if the +.I lockp +field is non-NULL, a reference to the acquired lock is stored there. +(This reference is invalidated by any call to +.I lock_vec +or +.I lock_put +which releases the lock.) +.TP 5 +LOCK_PUT +The lock referenced by the contents of the +.I lockp +field is released. +.TP 5 +LOCK_PUT_ALL +All locks held by the +.I locker +are released. +(Any locks acquired as a part of the current call to +.I lock_vec +are not considered for this operation). +.TP 5 +LOCK_PUT_OBJ +All locks held by the +.IR locker , +on the object +.IR obj , +with the mode specified by +.IR lock_mode , +are released. +A +.I lock_mode +of LOCK_NG indicates that all locks on the object should be released. +(Any locks acquired as a part of the current call to +.I lock_vec +are not considered for this operation). +.RE +.TP 5 +const DBT obj; +An untyped byte string which specifies the object to be locked or +released. +.TP 5 +const lock_mode_t lock_mode; +The lock mode, used as an index into +.IR lt 's +conflict array. +.TP 5 +LOCK_T **lockp; +A pointer to a pointer to a lock reference. +.PP +The +.I nlist +argument specifies the number of elements in the +.I list +array. +.PP +The function +.I lock_get +is a simple interface to the +.I lock_vec +functionality, and is equivalent to calling the +.I lock_vec +function with the +.I lt +and +.I locker +arguments, NULL +.IR timeout , +.I elistp +and +.I conflict +arguments, and a single element +.I list +array, for which the +.I op +field is LOCK_GET, and the +.IR obj , +.I lock_mode +and +.I lockp +fields are represented by the arguments of the same name. +Note that the type of the +.I obj +argument to +.I lock_get +is different from the +.I obj +element found in the LOCK_REQ_T structure. +The +.I lock_get +function returns success and failure as described for the +.I lock_vec +function. +.PP +The function +.I lock_put +is a simple interface to the +.I lock_vec +functionality, and is equivalent to calling the +.I lock_vec +function with a single element +.I list +array, for which the +.I op +field is LOCK_PUT and the +.I lockp +field is represented by the argument of the same name. +Note that the type of the +.I lockp +argument to +.I lock_put +is different from the +.I lockp +element found in the LOCK_REQ_T structure. +The +.I lock_put +function returns success and failure as described for the +.I lock_vec +function. +.PP +The function +.I lock_close +disassociates the calling process from the lock table +.IR lt , +after releasing all locks held or requested by that process. +.RT lock_close +.PP +.UN "lock table" lock +.SH "ERRORS" +The +.I lock_create +function may fail and set +.I errno +for any of the errors specified for the library routines +.IR mmap (2), +.IR open (2) +and +.IR malloc (3). +.PP +The +.I lock_open +function may fail and set +.I errno +for any of the errors specified for the library routine +.IR mmap (2) +and +.IR open (2). +.PP +The +.I lock_close +function may fail and set +.I errno +for any of the errors specified for the library routine +.IR close (2) +and +.IR munmap (2). +.PP +The +.I lock_unlink +function may fail and set +.I errno +for any of the errors specified for the library function +.IR unlink (2) +or the following: +.TP 5 +[EBUSY] +The lock table was in use and the force flag was not set. +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_hash (3), +.IR db_log (3), +.IR db_mpool (3), +.IR db_open (3), +.IR db_recno (3), +.IR db_txn (3) +.SH BUGS +The +.I maxlocks +parameter is a kluge, and should be deleted in favor of dynamically +expanding the lock table. diff --git a/src/util/db2/man/db_log.3 b/src/util/db2/man/db_log.3 new file mode 100644 index 0000000000..34c4d1f5d1 --- /dev/null +++ b/src/util/db2/man/db_log.3 @@ -0,0 +1,290 @@ +.\" Copyright (c) 1990, 1993, 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_log.3 8.15 (Berkeley) 8/3/95 +.\" +.TH DB_LOG 3 "August 3, 1995" +.UC 7 +.SH NAME +db_log \- log-manager access method +.SH DESCRIPTION +.so db.so +.GN +specific details of the logging access method. +.PP +These functions provide a general-purpose logging facility sufficient +for transaction management. +Logs can be shared by multiple processes. +.PP +A log is represented by the directory, +.IR "not the file" , +named by the first argument to +.IR db_open (3). +The first argument must be non-NULL, +and the directory must already exist +.I db_open +is called. +In that directory, the log is stored in one or more files named +in the format ``log.YYYY.MM.DD.HH.MM.SS'', where ``YYYY.MM.DD.HH.SS'' +is the approximate creation time of the log file, and is guaranteed +to be unique in the directory. +.PP +The group of the created files is based on the system and directory +defaults, and is not further specified by the log access method. +All files are created with the +.I mode +specified to +.IR db_open , +(as described in +.IR chmod (2)) +and modified by the process' umask value (see +.IR umask (2)). +.PP +The +.I flags +argument to +.I db_open +must be 0 for the +.I db_log +access method. +.SH "ACCESS METHOD SPECIFIC INFORMATION" +The log access method specific data structure provided to +.I db_open +is typedef'd and named LOGINFO. +A LOGINFO structure has at least the following fields, +which may be initialized before calling +.IR db_open : +.TP 5 +off_t max_file_size; +The maximum size of a single file in the log. +If not specified, the maximum size defaults to an implementation-specific +value. +.TP 5 +int lorder; +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +.PP +If the log already exists, the values specified for the parameters +max_file_size and lorder are ignored in favor of the values used +when the log was created. +.SH "DB OPERATIONS" +The data part of the key/data pair used by the log access method +is the same as for other access methods. +The key is different. +Each log record is identified by a log sequence number (LSN), +which is stored in a DBT, and which is used as the +.I key +for all log functions that take +.I key +arguments. +Applications cannot create LSN's, and all LSN's provided to functions +as arguments must first be retrieved using the +.I put +or +.I seq +functions. +To provide a distinguished value for applications, it is guaranteed that +no valid LSN will ever have a size of 0. +.PP +Applications can compare LSN's using the +.I log_lsn_compare +function (see below). +.PP +Applications can associate LSN's with specific log files. +The function +.I log_lsn_file +(see below), returns the name of the log file containing the +record with a specified LSN. +(The mapping of LSN to file is needed for database administration. +For example, a transaction manager typically records the earliest LSN +needed for restart, and the database administrator may want to archive +log files to tape when they contain only LSN's before the earliest one +needed for restart.) +.PP +Applications can truncate the log file up to a specific LSN using the +.I log_trunc +function (see below). +.PP +The functions returned by +.I db_open +for the log access method are as described in +.IR db_open , +with the following exceptions and additions: +.TP 5 +type +The type is DB_LOG. +.TP 5 +del +The +.I del +function always returns an error for the log-manager access method, +setting +.I errno +to EINVAL. +.TP 5 +int (*log_flush)(const DB *db, const DBT *lsn); +The +.I log_flush +function flushes the log up to and including the log record +.IR lsn . +.RT log_flush +.TP 5 +int (*log_lsn_compare)(const DB *, +.ti +5 +const DBT *lsn1, const DBT *lsn2); +A pointer to a function which is provided to permit applications to +compare LSN's. +The +.I log_lsn_compare +function returns an integer less than, equal to, or greater than zero +if the first LSN is considered to be respectively less than, equal to, +or greater than the second LSN. +.TP 5 +int (*log_lsn_file)(const DB *db, +.ti +5 +const DBT *lsn, char *name); +.br +The +.I log_lsn_file +function stores a pointer to the name of the file containing +.I lsn +in the address referenced by +.IR name. +This pointer is to an internal static object, and subsequent calls to +the same function will modify the same object. +.IP +.RT log_lsn_file +.TP 5 +int (*log_unlink)(const char *path, int force); +The +.I log_unlink +function destroys the log represented by +.IR path . +If the +.I force +parameter is not set to 1 and there are other processes using the +log, then +.I log_unlink +will return -1, setting +.IR errno +to EBUSY. +If +.I force is not set or there are no processes using the log, then all files +used by the log are destroyed. +.I log_unlink +will return -1 on failure, setting +.IR errno , +and 0 on success. +.TP 5 +int (*log_trunc)(const DB *db, const DBT *lsn); +The +.I log_trunc +function truncates the log up to an LSN which is less than +.IR lsn . +.RT log_trunc +.TP 5 +put +A log record containing +.I data +is appended to the log. +Unlike the +.I put +functions for other access methods, the key parameter is not initialized +by the application, instead, the LSN assigned to the data is returned in +the +.I key +parameter. +.IP +The caller is responsible for providing any necessary structure to +.I data . +(For example, in a write-ahead logging protocol, the application must +understand what part of +.I data +is an operation code, what part is redo information, and what part is +undo information. +In addition, most transaction managers will store in +.I data +the LSN of the previous log record for the same transaction, +to support chaining back through the transaction's log records +during undo.) +.IP +The parameter +.I flag +must be set to 0 or exactly one of the following values: +.RS +.TP 5 +R_CHECKPOINT +Specify the key/data pair of the current call as the one to be returned +when the +.I seq +function is next called with the R_CHECKPOINT flag. +.TP 5 +R_FLUSH +Flush immediately (ignoring any possibility for group commit). +.RE +.TP 5 +seq +The +.I seq +function takes the following additional flag: +.RS +.TP 5 +R_CHECKPOINT +The last key/data pair stored by the +.I put +function (using the R_CHECKPOINT flag) is returned, +and the cursor is set or initialized to reference it. +The expected use of this flag is during restart and to determine what +part of the log must be available for restart. +Therefore, the log record retrieved with R_CHECKPOINT should contain +all the information that the transaction manager will need for this +purpose. +.RE +.TP 5 +sync +The +.I sync +function always returns an error for the log-manager access method, +setting +.I errno +to EINVAL. +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_hash (3), +.IR db_lock (3), +.IR db_mpool (3), +.IR db_open (3), +.IR db_recno (3), +.IR db_txn (3) diff --git a/src/util/db2/man/db_mpool.3 b/src/util/db2/man/db_mpool.3 new file mode 100644 index 0000000000..4b683b6185 --- /dev/null +++ b/src/util/db2/man/db_mpool.3 @@ -0,0 +1,403 @@ +.\" Copyright (c) 1990, 1993, 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_mpool.3 8.14 (Berkeley) 8/1/95 +.\" +.TH DB_MPOOL 3 "August 1, 1995" +.UC 7 +.SH NAME +db_mpool \- general purpose shared memory buffer pool +.SH SYNOPSIS +.nf +.ft B +#include <db.h> +#include <mpool.h> + +int +mpool_create(char *path, mode_t mode, size_t cachesize, u_long flags); + +MPOOL * +mpool_open(char *path); + +int +mpool_close(MPOOL *mp); + +MPOOLFILE * +mpool_fopen(MPOOL *mp, char *path, size_t pagesize, void *pgcookie, +.ti +5 +int (*pgin)(MPOOLFILE *mpf, +.ti +8 +pgno_t pgno, void *pgaddr, void *pgcookie), +.ti +5 +int (*pgout)(MPOOLFILE *mpf, +.ti +8 +pgno_t pgno, void *pgaddr, void *pgcookie); + +int +mpool_fclose(MPOOLFILE *mpf); + +void * +mpool_get(MPOOLFILE *mpf, pgno_t *pgnoaddr, u_long flags, +.ti +5 +int (*callback)(MPOOLFILE *mpf, pgno_t pgno)); + +int +mpool_put(MPOOLFILE *mpf, void *pgaddr, u_long flags); + +int +mpool_sync(MPOOLFILE *mpf); + +int +mpool_unlink(const char *path, int force); + +void +mpool_stat(MPOOL *mp, FILE *fp); +.ft R +.fi +.SH DESCRIPTION +.so db.so +.GN +specific details of the memory pool interface. +.PP +The +.I db_mpool +function is the library interface intended to provide general-purpose, +page-oriented buffer management of one or more files. +While designed to work with the other DB functions, these functions are +also useful for more general purposes. +The memory pools (MPOOL's) are referred to in this document as +simply ``pools''. +Pools may be shared between processes. +Pools are usually filled by pages from one or more files (MPOOLFILE's). +Pages in the pool are replaced in LRU (least-recently-used) order, +with each new page replacing the page which has been unused the longest. +Pages retrieved from the pool using +.I mpool_get +are ``pinned'' in memory, by default, +until they are returned to the pool using the +.I mpool_put +function. +.PP +.CR "memory pool" mpool +.PP +The +.I cachesize +argument specifies the size of the pool in bytes, +and should be the size of the normal working set of the application with +some small amount of additional memory for unusual situations. +If the number of bytes currently ``pinned'' in memory exceeds +.IR cachesize , +the +.I db_mpool +functions will attempt to allocate more memory and do not necessarily fail, +although they may suffer performance degradation. +.PP +The +.I flags +argument is set by +.IR or 'ing +any of the following values: +.TP +MPOOL_PRIVATE +The pool is not shared by other processes or threads, +so no locking of pool resources is required. +.PP +.OP "memory pool" mpool +.PP +The +.I mpool_close +function closes the pool indicated by the MPOOL pointer +.IR mp , +as returned by +.IR mpool_open . +This function does +.B not +imply a call to +.I mpool_sync +(or to +.IR mpool_fclose ) +i.e. no pages are written to the source file as as a result of calling +.IR mpool_close . +.RT mpool_close +.PP +The function +.I mpool_fopen +opens a file for buffering in the pool specified by the MPOOL +argument. +The +.I path +argument is the name of the file to be opened. +The +.I pagesize +argument is the size, in bytes, of the unit of transfer between the +application and the pool, although not necessarily the unit of transfer +between the pool and the source file. +Applications not knowing the page size of the source file should +retrieve the metadata from the file using a page size that is correct +for the metadata, then close and reopen the file, or, +otherwise determine the page size before calling +.IR mpool_fopen . +.PP +If the +.I pgin +function is specified, it is called each time a page is read into +the memory pool from the source file. +If the +.I pgout +function is specified, it is called each time a page is written +to the source file. +Both functions are called with the MPOOLFILE pointer returned from +.IR mpool_fopen , +the page number, a pointer to the page being read or written, and +the argument +.IR pgcookie . +If either function fails, it should return non-zero and set +.IR errno , +in which case the +.I db_mpool +function calling it will also fail, leaving +.I errno +intact. +.PP +The +.I mpool_fclose +function closes the source file indicated by the MPOOLFILE pointer +.IR mpf . +This function does +.B not +imply a call to +.IR mpool_sync , +i.e. no pages are written to the source file as as a result of calling +.IR mpool_fclose . +.RT mpool_fclose +.\" +.\".PP +.\"int +.\"mpool_fd (MPOOLFILE *mpf); +.\" +.\".PP +.\"The function +.\".I mpool_fd +.\"takes an MPOOLFILE pointer and returns the file descriptor being +.\"used to read/write that file +.\"to/from the pool. +.PP +The function +.I mpool_get +returns a pointer to the page with the page number specified by +.IR pgnoaddr , +from the source file specified by the MPOOLFILE pointer +.IR mpf . +If the page does not exist or cannot be retrieved, +.I mpool_get +returns NULL and sets errno. +.PP +The +.I flags +argument is set by +.IR or 'ing +any of the following values: +.TP 5 +MPOOL_CALLBACK +After the page number has been determined, but before any other +process or thread can access the page, the function specified by +the +.I callback +argument is called. +If the function fails, it should return non-zero and set +.IR errno , +in which case +.I mpool_get +will also fail, leaving +.I errno +intact. +The +.I callback +function is called with the MPOOLFILE pointer returned from +.I mpool_fopen +and the page number. +This functionality is commonly used when page locking is required, +but the page number of the page being retrieved is not known. +.TP 5 +MPOOL_CREATE +If the specified page does not exist, create it. +.TP 5 +MPOOL_LAST +Return the last page of the source file and copy its page number +to the location referenced by +.IR pgnoaddr . +.TP 5 +MPOOL_NEW +Create a new page in the file and copy its page number to the location +referenced by +.IR pgnoaddr . +.TP 5 +MPOOL_NOPIN +Don't pin the page into memory. +(This flag is intended for debugging purposes, when it's often useful +to examine pages which are currently held by other parts of the +application. +Pages retrieved in this manner don't need to be returned to the +memory pool, i.e. they should +.B not +be specified as arguments to the +.IR mpool_put +routine.) +.PP +Created pages have all their bytes set to 0. +.PP +All pages returned by +.I mpool_get +(unless the MPOOL_NOPIN flag is specified), +will be retained (i.e. ``pinned'') in the pool until a subsequent +call to +.IR mpool_put . +.PP +The function +.I mpool_put +indicates that the page referenced by +.I pgaddr +can be evicted from the pool. +.I Pgaddr +must be an address previously returned by +.IR mpool_get . +.PP +The flag value is specified by +.IR or 'ing +any of the following values: +.TP 5 +MPOOL_DIRTY +The page has been modified and must be written to the source file +before being evicted from the pool. +.TP 5 +MPOOL_DISCARD +The page is unlikely to be useful in the near future, and should be +discarded before other pages in the pool. +.PP +.RT mpool_put +.PP +The function +.I mpool_sync +writes all pages associated with the MPOOLFILE pointer +.IR mpf , +which were specified as arguments to the +.I mpool_put +function with an associated flag of +MPOOL_DIRTY, +to the source file. +.RT mpool_sync +.PP +.UN "memory pool" mpool +.PP +The function +.I mpool_stat +writes statistics for the memory pool +.I mp +to the file specified by +.IR fp . +These statistics include the number of files participating in the pool, +the active pages in the pool, and numbers as to how effective the cache +has been. +.SH ERRORS +The +.IR mpool_create , +.I mpool_open +and +.I mpool_fopen +functions may fail and set +.I errno +for any of the errors specified for the library functions +.IR open (2), +.IR read (2), +and +.IR malloc (3). +.PP +The +.I mpool_close +and +.I mpool_fclose +functions may fail and set +.I errno +for any of the errors specified for the library functions +.IR close (2) +and +.IR free (3). +.PP +The +.I mpool_get +function may fail and set +.I errno +for any of the errors specified for the library functions +.IR read (2), +.IR write (2), +and +.IR malloc (3) +or the following: +.TP 5 +[EINVAL] +The requested page does not exist and MPOOL_CREATE was not set. +.PP +The +.I mpool_put +function may fail and set +.I errno +for any of the errors specified for the library function +.IR write (2) +or the following: +.TP 5 +[EACCES] +The source file was not opened for writing. +.PP +The +.I mpool_sync +function may fail and set +.I errno +for any of the errors specified for the library function +.IR write (2). +.PP +The +.I mpool_unlink +function may fail and set +.I errno +for any of the errors specified for the library function +.IR unlink (2) +or the following: +.TP 5 +[EBUSY] +The memory pool was in use and the force flag was not set. +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_hash (3), +.IR db_lock (3), +.IR db_log (3), +.IR db_open (3), +.IR db_recno (3), +.IR db_txn (3) diff --git a/src/util/db2/man/db_open.3 b/src/util/db2/man/db_open.3 new file mode 100644 index 0000000000..f988ef9245 --- /dev/null +++ b/src/util/db2/man/db_open.3 @@ -0,0 +1,574 @@ +.\" Copyright (c) 1990, 1993, 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_open.3 8.15 (Berkeley) 8/1/95 +.\" +.TH DB_OPEN 3 "August 1, 1995" +.UC 7 +.SH NAME +db_open \- database access methods +.SH SYNOPSIS +.nf +.ft B +#include <db.h> + +DB * +db_open(const char *file, int flags, int mode, +.ti +5 +DBTYPE type, DBINFO *dbinfo, const void *openinfo); +.ft R +.fi +.SH DESCRIPTION +.so db.so +.GN +the overall structure of the available access methods. +.PP +The currently supported file formats are btree, hashed, log and recno +(i.e. flat-file oriented). +The btree format is a representation of a sorted, balanced tree structure. +The hashed format is an extensible, dynamic hashing scheme. +The log format is a general-purpose logging facility. +The recno format is a byte stream file with fixed or variable length +records. +The formats and other, format specific information are described in detail +in their respective manual pages: +.IR db_btree (3), +.IR db_hash (3), +.IR db_log (3), +and +.IR db_recno (3). +.PP +Db_open opens +.I file +for reading and/or writing. +Files never intended to be preserved on disk may be created by setting +the file parameter to NULL. +(Note, while most of the access methods use +.I file +as the name of an underlying file on disk, this is not guaranteed. +See the manual pages for the individual access methods for more +information.) +.PP +The +.I flags +and +.I mode arguments +are as specified to the +.IR open (2) +function, however, only the O_CREAT, O_EXCL, O_EXLOCK, O_NONBLOCK, +O_RDONLY, O_RDWR, O_SHLOCK and O_TRUNC flags are meaningful. +(Note, opening a database file O_WRONLY is not possible.) +.\"Three additional options may be specified by +.\".IR or 'ing +.\"them into the +.\".I flags +.\"argument. +.\".TP +.\"DB_LOCK +.\"Do the necessary locking in the database to support concurrent access. +.\"If concurrent access isn't needed or the database is read-only this +.\"flag should not be set, as it tends to have an associated performance +.\"penalty. +.\".TP +.\"DB_SHMEM +.\"Place the underlying memory pool used by the database in shared +.\"memory. +.\"Necessary for concurrent access. +.\".TP +.\"DB_TXN +.\"Support transactions in the database. +.\"The DB_LOCK and DB_SHMEM flags must be set as well. +.PP +The +.I type +argument is of type DBTYPE (as defined in the <db.h> include file) and +may be set to DB_BTREE, DB_HASH, DB_LOG or DB_RECNO. +.PP +The +.I dbinfo +argument is a pointer to a structure containing references to locking, +logging, transaction, and shared-memory buffer pool information. +If +.I dbinfo +is NULL, then the access method may still use these subsystems, +but the usage will be private to the application and managed by DB. +If +.I dbinfo +is non-NULL, +then the module referenced by each of the non-NULL fields is used by DB +as necessary. +The fields of the DBINFO structure are defined as follows: +.TP 5 +const char *errpfx; +A prefix to prepend to error messages; used only if +.I errfile +is non-NULL. +.TP 5 +FILE *errfile; +The +.IR stdio (3) +file stream to which error messages are logged. +.sp +When any error occurs in the +.I db_open +function, or in any function called using a field of the returned DB +structure, an error value is returned by the function, +and the global variable +.I errno +is set appropriately. +In some cases, however, the +.I errno +value may be insufficient to describe the cause of the error. +In these cases, if +.I errfile +is non-NULL, additional error information will be written to the file +stream it represents, preceded by the string, if any, specified by +.IR errpfx . +This error logging facility should not be required for normal operation, +but may be useful in debugging applications. +.TP 5 +char *errbuf; +The buffer to which error messages are copied. +If non-NULL, +.I errbuf +behaves as described for +.IR errfile , +except that the +.I errpfx +field is ignored and the error message is copied into the specified +buffer instead of being written to the FILE stream. +The DB routines assume that the associated buffer is at least 1024 bytes +in length. +.TP 5 +LOCK_TABLE_T *lockinfo; +If locking is required for the file being opened (as in the case of +buffers being maintained in a shared memory buffer pool), +the +.I lockinfo +field contains a return value from the function +.I lock_open +that should be used +(see +.IR db_lock (3)). +If +.I lockinfo +is NULL, no locking is done. +.TP 5 +DB *loginfo; +If modifications to the file being opened should be logged, the +.I loginfo +field contains a return value from the function +.IR dbopen , +when opening a DB file of type DB_LOG. +If +.I loginfo +is NULL, no logging is done. +.TP 5 +MPOOL *mpoolinfo; +If the cache for the file being opened should be maintained in a shared +buffer pool, the +.I mpoolinfo +field contains a return value from the function +.I mpool_open +that should be used +(see +.IR db_mpool (3)). +If +.I mpoolinfo +is NULL, a memory pool may still be created, +but it will be private to the application and managed by DB. +.TP 5 +TXNMGR *txninfo; +If the accesses to the file being opened should take place in the context +of transactions (providing atomicity and complete error recovery), the +.I txninfo +field contains a return value from the function +.I txn_open +(see +.IR db_txn (3)). +If transactions are specified, +the application is responsible for making suitable calls to +.IR txn_begin , +.IR txn_abort , +and +.IR txn_commit . +If +.I txninfo +is NULL, no transaction support is done. +.PP +The +.I openinfo +argument is a pointer to an access method specific structure described +in the access method's manual page. +If +.I openinfo +is NULL, each access method will use defaults appropriate for the system +and the access method. +.SH "KEY/DATA PAIRS" +Access to all access methods is based on key/data pairs. +Both keys and data are represented by the following data structure: +.PP +typedef struct { +.RS +void *data; +.br +size_t size; +.RE +} DBT; +.PP +The elements of the DBT structure are defined as follows: +.TP 5 +data +A pointer to a byte string. +.ns +.br +.TP 5 +size +The length of +.IR data , +in bytes. +.PP +Key and data byte strings may reference strings of essentially unlimited +length, although any two of them must fit into available memory at the +same time. +.PP +The access methods provide no guarantees about byte string alignment, +and applications are responsible for maintaining any necessary alignment. +.SH "DB OPERATIONS" +.I Db_open +returns a pointer to a DB structure (as defined in the <db.h> include file) +on success, and NULL on error. +The DB structure describes a database type, and includes a set of functions +to perform various actions, as described below. +Each of these functions takes a pointer to a DB structure, and may take +one or more DBT *'s and a flag value as well. +Individual access methods specify additional functions and flags which +are specific to the method. +The fields of the DB structure are as follows: +.TP 5 +DBTYPE type; +The type of the underlying access method (and file format). +.TP 5 +int (*close)(const DB *db); +A pointer to a function to flush any cached information to disk, +free any allocated resources, and close any underlying files. +Since key/data pairs are cached in memory, failing to sync the +file with the +.I close +or +.I sync +function may result in inconsistent or lost information. +.IP +The +.I close +functions return -1 on failure, setting +.IR errno , +and 0 on success. +.TP 5 +int (*del)(const DB *db, TXN *txnid, +.ti +5 +const DBT *key, u_int flags); +.br +A pointer to a function to remove key/data pairs from the database. +The key/data pair associated with the specified +.I key +are discarded from the database. +.IP +The +.I txnid +parameter contains a transaction ID returned from +.IR txn_begin , +if the file is being accessed under transaction protection, +or NULL if transactions are not in effect. +.IP +The parameter +.I flag +must be set to 0 or exactly one of the following values: +.RS +.TP 5 +R_CURSOR +Delete the record referenced by the cursor. +The cursor must have previously been initialized. +.RE +.IP +The +.I delete +functions return -1 on error, setting +.IR errno , +0 on success, and 1 if the specified +.I key +did not exist in the file. +.TP 5 +int (*fd)(const DB *db); +A pointer to a function which returns a file descriptor representative +of the underlying database. +A file descriptor referencing the same file will be returned to all +processes which call +.I db_open +with the same +.I file +name. +This file descriptor may be safely used as an argument to the +.IR fcntl (2) +and +.IR flock (2) +locking functions. +The file descriptor is not necessarily associated with any of the +underlying files used by the access method. +No file descriptor is available for in memory databases. +.IP +The +.I fd +functions return -1 on error, setting +.IR errno , +and the file descriptor on success. +.TP 5 +int (*get)(const DB *db, TXN *txnid, +.ti +5 +const DBT *key, DBT *data, u_int flags); +.br +A pointer to a function which is the interface for keyed retrieval from +the database. +The address and length of the data associated with the specified +.I key +are returned in the structure referenced by +.IR data . +.IP +The +.I txnid +parameter contains a transaction ID returned from +.IR txn_begin , +if the file is being accessed under transaction protection, +or NULL if transactions are not in effect. +.IP +The +.I get +functions return -1 on error, setting +.IR errno , +0 on success, and 1 if the +.I key +was not found. +.TP 5 +int (*put)(const DB *db, TXN *txnid, +.ti +5 +DBT *key, const DBT *data, u_int flags); +.br +A pointer to a function to store key/data pairs in the database. +.IP +The +.I txnid +parameter contains a transaction ID returned from +.IR txn_begin , +if the file is being accessed under transaction protection, +or NULL if transactions are not in effect. +.IP +The parameter +.I flag +must be set to 0 or exactly one of the following values: +.RS +.TP 5 +R_CURSOR +Replace the key/data pair referenced by the cursor. +The cursor must have previously been initialized. +.TP 5 +R_NOOVERWRITE +Enter the new key/data pair only if the key does not previously exist. +.RE +.IP +The default behavior of the +.I put +functions is to enter the new key/data pair, replacing any previously +existing key. +.IP +The +.I put +functions return -1 on error, setting +.IR errno , +0 on success, and 1 if the R_NOOVERWRITE +.I flag +was set and the key already exists in the file. +.TP 5 +int (*seq)(const DB *db, TXN *txnid, +.ti +5 +DBT *key, DBT *data, u_int flags); +.br +A pointer to a function which is the interface for sequential +retrieval from the database. +The address and length of the key are returned in the structure +referenced by +.IR key , +and the address and length of the data are returned in the +structure referenced +by +.IR data . +.IP +The +.I txnid +parameter contains a transaction ID returned from +.IR txn_begin , +if the file is being accessed under transaction protection, +or NULL if transactions are not in effect. +.IP +Sequential key/data pair retrieval may begin at any time, and the +logical position of the ``cursor'' is not affected by calls to the +.IR del , +.IR get , +.IR put , +or +.I sync +functions. +Modifications to the database during a sequential scan will be reflected +in the scan, i.e. records inserted behind the cursor will not be returned +while records inserted in front of the cursor will be returned. +.IP +The parameter +.I flag +must be set to 0 or exactly one of the following values: +.RS +.TP 5 +R_CURSOR +The data associated with the specified key is returned. +This differs from the +.I get +functions in that it sets or initializes the cursor to the location of +the key as well. +.TP 5 +R_FIRST +The first key/data pair of the database is returned, and the cursor +is set or initialized to reference it. +.TP 5 +R_NEXT +Retrieve the key/data pair immediately after the cursor. +If the cursor is not yet set, this is the same as the R_FIRST flag. +.RE +.IP +The +.I seq +functions return -1 on error, setting +.IR errno , +0 on success, +and 1 if there are no key/data pairs less than or greater than the +specified or current key. +.TP 5 +int (*sync)(const DB *db, u_int flags); +A pointer to a function to flush any cached information to disk. +If the database is in memory only, the +.I sync +function has no effect and will always succeed. +.IP +The parameter +.I flag +must be set to 0 or a value specified by an access method specific +manual page. +.IP +The +.I sync +functions return -1 on failure, setting +.IR errno , +and 0 on success. +.SH ERRORS +The +.I db_open +function may fail and set +.I errno +for any of the errors specified for the library functions +.IR open (2), +.IR malloc (3) +or the following: +.TP 5 +[EFTYPE] +A file is incorrectly formatted. +.TP 5 +[EINVAL] +A parameter has been specified (hash function, recno pad byte etc.) +that is incompatible with the current file specification or, a flag +to a function which is not meaningful for the function (for example, +use of the cursor without prior initialization) or there is a mismatch +between the version number of file and the software. +.PP +The +.I close +functions may fail and set +.I errno +for any of the errors specified for the library functions +.IR close (2), +.IR read (2), +.IR write (2), +.IR free (3), +or +.IR fsync (2). +.PP +The +.IR del , +.IR get , +.I put +and +.I seq +functions may fail and set +.I errno +for any of the errors specified for the library functions +.IR read (2), +.IR write (2), +.IR free (3) +or +.IR malloc (3). +.PP +The +.I fd +functions will fail and set +.I errno +to ENOENT for in memory databases. +.PP +The +.I sync +functions may fail and set +.I errno +for any of the errors specified for the library function +.IR fsync (2). +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_hash (3), +.IR db_lock (3), +.IR db_log (3), +.IR db_mpool (3), +.IR db_recno (3), +.IR db_txn (3) +.SH BUGS +The name DBT is a mnemonic for ``data base thang'', and was used +because noone could think of a reasonable name that wasn't already +in use somewhere else. +.PP +The +.I fd +function interface is a kluge, +and will be deleted in a future version of the interface. +.PP +Only big and little endian byte order is supported. diff --git a/src/util/db2/man/db_recno.3 b/src/util/db2/man/db_recno.3 new file mode 100644 index 0000000000..6b93b3f5a1 --- /dev/null +++ b/src/util/db2/man/db_recno.3 @@ -0,0 +1,268 @@ +.\" Copyright (c) 1990, 1993, 1994, 1995 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_recno.3 8.12 (Berkeley) 8/1/95 +.\" +.TH DB_RECNO 3 "August 1, 1995" +.UC 7 +.SH NAME +db_recno \- record number database access method +.SH DESCRIPTION +.so db.so +specific details of the recno access method. +.SH "ACCESS METHOD SPECIFIC INFORMATION" +The recno access method specific data structure provided to +.I db_open +is typedef'd and named RECNOINFO. +A RECNOINFO structure has at least the following fields, +which may be initialized before calling +.IR db_open : +.TP 5 +u_int8_t bval; +The delimiting byte to be used to mark the end of a record for +variable-length records, and the pad character for fixed-length +records. +If no value is specified, newlines (``\en'') are used to mark the end +of variable-length records and fixed-length records are padded with +spaces. +.TP 5 +char *bfname; +The recno access method stores the in-memory copies of its records +in a btree. +If bfname is non-NULL, it specifies the name of the btree file, +as if specified as the file name for a +.I db_open +of a btree file. +.TP 5 +u_int cachesize; +A suggested maximum size, in bytes, of the memory cache. +This value is +.B only +advisory, and the access method will allocate more memory rather than fail. +If +.I cachesize +is 0 (no size is specified) a default size is used. +.TP 5 +u_long flags; +The flag value is specified by +.IR or 'ing +any of the following values: +.RS +.TP 5 +R_FIXEDLEN +The records are fixed-length, not byte delimited. +The structure element +.I reclen +specifies the length of the record, and the structure element +.I bval +is used as the pad character. +Any records, inserted into the database, that are less than +.I reclen +bytes long are automatically padded. +.TP 5 +R_NOKEY +In the interface specified by +.IR db_open , +the sequential record retrieval fills in both the caller's key and +data structures. +If the R_NOKEY flag is specified, the +.I cursor +functions are not required to fill in the key structure. +This permits applications to retrieve records at the end of files without +reading all of the intervening records. +.TP 5 +R_SNAPSHOT +This flag requires that a snapshot of the file be taken when +.I db_open +is called, instead of permitting any unmodified records to be read from +the original file. +.RE +.TP 5 +int lorder; +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +.TP 5 +u_int psize; +The recno access method stores the in-memory copies of its records +in a btree. +This value is the size (in bytes) of the pages used for nodes in that tree. +If +.I psize +is 0 (no page size is specified) a page size is chosen based on the +underlying file system I/O block size. +See +.IR btree (3) +for more information. +.TP 5 +size_t reclen; +The length of a fixed-length record. +.SH "DB OPERATIONS" +The data part of the key/data pair used by the recno access method +is the same as other access methods. +The key is different. +The +.I data +field of the key should be a pointer to a memory location of type +.IR recno_t , +as defined in the <db.h> include file. +This type is normally the largest unsigned integral type available to +the implementation. +The +.I size +field of the key should be the size of that type. +.PP +The record number data structure is either variable or fixed-length +records stored in a flat-file format, accessed by the logical record +number. +The existence of record number five requires the existence of records +one through four, and the deletion of record number one causes +record number five to be renumbered to record number four, as well +as the cursor, if positioned after record number one, to shift down +one record. +The creation of record number five when records one through four do +not exist causes the logical creation of them with zero-length data. +.PP +Because there is no meta-data associated with the underlying recno access +method files, any changes made to the default values (e.g. fixed record +length or byte separator value) must be explicitly specified each time the +file is opened. +.PP +The functions returned by +.I db_open +for the btree access method are as described in +.IR db_open (3), +with the following exceptions and additions: +.TP 5 +type +The type is DB_RECNO. +.TP 5 +put +Using the +.I put +interface to create a new record will cause the creation of multiple, +empty records if the record number is more than one greater than the +largest record currently in the database. +.IP +The +.I put +function takes the following additional flags: +.RS +.TP 5 +R_IAFTER +Append the data immediately after the data referenced by +.IR key , +creating a new key/data pair. +The record number of the appended key/data pair is returned in the +.I key +structure. +.TP 5 +R_IBEFORE +Insert the data immediately before the data referenced by +.IR key , +creating a new key/data pair. +The record number of the inserted key/data pair is returned in the +.I key +structure. +.TP 5 +R_SETCURSOR +Store the key/data pair, setting or initializing the position of the +cursor to reference it. +.RE +.TP 5 +seq +The +.I seq +function takes the following additional flags: +.RS +.TP 5 +R_LAST +The last key/data pair of the database is returned, and the cursor +is set or initialized to reference it. +.TP 5 +R_PREV +Retrieve the key/data pair immediately before the cursor. +If the cursor is not yet set, this is the same as the R_LAST flag. +.RE +.IP +If the database file is a character special file and no complete +key/data pairs are currently available, the +.I seq +function returns 2. +.TP 5 +sync +The +.I sync +function takes the following additional flag: +.RS +.TP 5 +R_RECNOSYNC +This flag causes the +.I sync +function to apply to the btree file which underlies the recno file, +not the recno file itself. +(See the +.I bfname +field of RECNOINFO +structure, above, for more information.) +.RE +.SH ERRORS +The +.I recno +access method functions may fail and set +.I errno +for any of the errors specified for the library function +.IR db_open (3) +or the following: +.TP 5 +[EINVAL] +An attempt was made to add a record to a fixed-length database that +was too large to fit. +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_hash (3), +.IR db_lock (3), +.IR db_log (3), +.IR db_mpool (3), +.IR db_open (3), +.IR db_txn (3) +.sp +.IR "Document Processing in a Relational Database System" , +Michael Stonebraker, Heidi Stettner, Joseph Kalash, Antonin Guttman, +Nadene Lynn, Memorandum No. UCB/ERL M82/32, May 1982. +.SH BUGS +The +.I sync +function's R_RECNOSYNC interface is a kluge, +and will be deleted in a future version of the interface. diff --git a/src/util/db2/man/db_txn.3 b/src/util/db2/man/db_txn.3 new file mode 100644 index 0000000000..18ad64692e --- /dev/null +++ b/src/util/db2/man/db_txn.3 @@ -0,0 +1,373 @@ +.\" Copyright (c) 1994, 1995 +.\" The President and Fellows of Harvard University. All rights reserved. +.\" Copyright (c) 1994, 1995 +.\" Margo I. Selzer. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)db_txn.3 8.8 (Harvard) 8/1/95 +.\" +.TH DB_TXN 3 "August 1, 1995" +.UC 7 +.SH NAME +db_txn \- transaction management functions +.SH SYNOPSIS +.nf +.ft B +#include <db.h> +#include <db_lock.h> + +int +txn_create(const char *path, mode_t mode, u_int maxtxns, u_int flags); + +TXNMGR * +txn_open(const char *path, DBT *logp, LOCK_TABLE_T *lockp, +.ti +5 +int (*recover)(DBT *lsn, DBT *log_entry, int isundo)); + +TXN * +txn_begin(TXNMGR *txnp); + +int +txn_commit(TXN *tid); + +int +txn_prepare(TXN *tid); + +int +txn_abort(TXN *tid); + +int +txn_close(TXNMGR *txnp); + +int +txn_unlink(const char *path, int force); +.ft R +.fi +.SH DESCRIPTION +.so db.so +specific details of the transaction support. +.PP +.I Db_txn +is the library interface that provides transaction semantics. +Full transaction support is provided by a collection of modules +that provide well defined interfaces to the services required for +transaction processing. +These services are recovery (see +.IR db_log (3)), +concurrency control (see +.IR db_lock (3)), +and the management of shared data (see +.IR db_mpool (3)). +Transaction semantics can be applied to the access methods described in +.IR db (3) +through function call parameters. +.PP +The model intended for transactional use (and that is used by the +access methods) is that write-ahead logging is provided by +.IR db_log (3) +to record both before- and after-image logging. +Locking follows a two-phase protocol and is implemented by +.IR db_lock (3). +.PP +.CR "transaction region" txn +Any necessary, +associated log and lock regions are created as well (see +.IR db_log (3) +and +.IR db_lock (3)). +.PP +The +.I maxtxns +argument specifies the maximum number of simultaneous transactions that +are supported. +This bounds the size of backing files and is used to derive limits for +the size of the lock region and logfiles. +When there are more than +.I maxtxns +concurrent transactions, calls to +.I txn_begin +may fail. +.PP +Default locking and logging protocols are provided only if the +backing files exist. +If the backing files do not exist, the +.I flags +parameter must indicate both a logging mode and locking mode specified by +.IR or 'ing +together at most one flag from each of the TXN_LOCK and TXN_LOG classes +as follows: +.TP 5 +TXN_LOCK_2PL +Use two-phase locking. +.TP 5 +TXN_LOCK_OPTIMISTIC +Use optimistic locking (not currently implemented). +.TP 5 +TXN_LOG_REDO +Provide redo-only logging (not currently implemented). +.TP 5 +TXN_LOG_UNDO +Provide undo-only logging (not currently implemented). +.TP 5 +TXN_LOG_UNDOREDO +Provide undo/redo write-ahead logging. +.PP +.RT txn_create +.PP +.OP "transaction region" txn +.PP +The +.I recover +argument specifies a function that is called by +.I txn_abort +during transaction abort. +This function takes three arguments: +.TP 5 +lsn +A log sequence number (LSN). +.TP 5 +log_entry +A log record. +.TP 5 +isundo +An undo flag set to 0 if the operation is a redo and set to 1 if the +operation an undo. +.PP +As discussed in the +.I db_log (3) +manual page, +the application is responsible for providing any necessary structure +to the log record. +For example, the application must understand what part of the log +record is an operation code, what part is redo information, and what +part is undo information. +.PP +The +.I txn_begin +function creates a new transaction in the designated transaction +manager, returning a pointer to a TXN that uniquely identifies it. +.PP +The +.I txn_commit +function ends the transaction specified by the +.I tid +argument. +Any locks held by the transaction are released. +If logging is enabled, a commit log record is written and flushed to disk. +.PP +The +.I txn_abort +function causes an abnormal termination of the transaction. +If logging is enabled, the log is played backwards and any recovery +operations are initiated through the +.I recover +function specified to +.IR txn_open . +After recovery is completed, all locks held by the transaction are released. +.PP +The +.I txn_close +function detaches a process from the transaction environment specified +by the TXNMGR pointer. +All mapped regions are unmapped and any allocated resources are freed. +Any uncommitted transactions are aborted. +.PP +.UN "transaction region" txn +.PP +The +.I txn_prepare +function initiates the beginning of a two phase commit. +In a distributed transaction, +the prepare directive should be issued to all participating +transaction managers. +Each manager must take whatever action is necessary to guarantee +that a future call to +.I txn_commit +on the specified +.I tid +will succeed. +.SH "SYSTEM INTEGRATION" +This model can be applied to data bases other than the provided access +methods. +For example, consider an application that provides transaction semantics +to data stored in regular files accessed using the +.IR read (2) +and +.IR write (2) +system calls. +The operations for which transaction protection is desired are bracketed +by calls to +.I txn_begin +and +.IR txn_commit . +.PP +Before data are referenced, a call is made to the lock manager, +.IR db_lock , +for a lock of the appropriate type (e.g. read) +on the object being locked. +The object might be a page in the file, a byte, a range of bytes, +or some key. +Before a write is performed, the application makes a call to the +log manager, +.IR db_log , +to record enough information to redo the operation in case of +failure after commit and to undo the operation in case of abort. +After the log message is written, the write system calls are issued. +After all requests are issued, the application calls +.IR txn_commit . +When +.I txn_commit +returns, the caller is guaranteed that all necessary log writes have +been written to disk. +.PP +At any time, the application may call +.IR txn_abort , +which will result in the appropriate calls to the +.I recover +routine to restore the ``database'' to a consistent pre-transaction +state. +(The recover routine must be able to either reapply or undo the update +depending on the context, for each different type of log record.) +.PP +If the application should crash, the recovery process uses the +.I db_log +interface to read the log and call the +.I recover +routine to restore the database to a consistent state. +.PP +The +.I txn_prepare +function provides the core functionality to implement distributed +transactions, +but it does not actually manage the notification of distributed +transaction managers. +The caller is responsible for issuing +.I txn_prepare +calls to all sites participating in the transaction. +If all responses are positive, the caller can issue a +.IR txn_commit . +If any of the responses are negative, the caller should issue a +.IR txn_abort . +In general, the +.I txn_prepare +call requires that the transaction log be flushed to disk. +.PP +The structure of the transaction support allows application designers +to trade off performance and protection. +Since DB manages many structures in shared memory, +its information is subject to corruption by applications when the library +is linked directly with the application. +For this reason, DB is designed to allow compilation into a separate +server process that may be accessed via a socket interface. +In this way DB's data structures are protected from application code, +but communication overhead is increased. +When applications are trusted, DB may be compiled directly into the +application for increased performance. +.SH ERRORS +The +.I txn_create +function may fail and set +.I errno +for any of the errors specified for the library functions +.IR open (2), +.IR write (2), +.IR malloc (3), +.IR lock_create (3), +and +.IR log_create (3). +.PP +The +.I txn_open +function may fail and set +.I errno +to any of the errors specified for the library functions +.IR open (2), +.IR write (2), +.IR malloc (3), +.IR lock_open (3), +and +.IR log_open (3). +.PP +The +.I txn_begin +function may fail and set +.I errno +to ENOSPC indicating that the maximum number of concurrent +transactions has been reached. +.PP +The +.I txn_commit +function may fail and set +.I errno +to EINVAL indicating that the transaction was aborted. +.PP +The +.I txn_close +function may fail and set +.I errno +to any of the errors specified for the library functions +.IR close (2), +.IR read (2), +.IR write (2), +.IR free (3), +.IR fsync (2), +.IR lock_close (3) +or +.IR log_close (3). +.PP +The +.I txn_unlink +function may fail and set +.I errno +to any of the errors specified for the library functions +.IR unlink (2), +.IR lock_unlink (3), +and +.IR log_unlink (3), +or the following: +.TP 5 +[EBUSY] +The transaction region was in use and the force flag was not set. +.SH "SEE ALSO" +.IR db_btree (3), +.IR db_hash (3), +.IR db_lock (3), +.IR db_log (3), +.IR db_mpool (3), +.IR db_open (3), +.IR db_recno (3) +.sp +.IR "LIBTP: Portable, Modular Transactions for UNIX" , +Margo Seltzer, Michael Olson, USENIX proceedings, Winter 1992. +.SH BUGS +The +.I maxtxns +parameter is a kluge, and should be deleted in favor of dynamically +expanding the transaction region. diff --git a/src/util/db2/man/spell.ok b/src/util/db2/man/spell.ok new file mode 100644 index 0000000000..794b00bf87 --- /dev/null +++ b/src/util/db2/man/spell.ok @@ -0,0 +1,170 @@ +Ake +Antonin +BTREE +BTREEINFO +Bsize +CALLBACK +Comput +D.E +DB +DB's +DBINFO +DBT +DBTYPE +Db +EACCES +EBUSY +EFTYPE +EINVAL +ENOENT +ENOSPC +ERL +EXCL +EXLOCK +FIXEDLEN +Ffactor +Guttman +HASHINFO +Heidi +IAFTER +IBEFORE +Kalash +Knuth +LIBTP +LOGINFO +LRU +LSN +LSN's +MPOOL +MPOOL's +MPOOLFILE +MPOOLFILE's +Maxlocks +Mpool +NG +NOKEY +NOOVERWRITE +NOPIN +NOTHELD +Nadene +Nelem +Nelems +OBJ +Pgaddr +RDONLY +RDWR +RECNO +RECNOINFO +RECNOSYNC +REQ +SETCURSOR +SHLOCK +Stettner +Stonebraker +Surv +TMPDIR +TRUNC +TXN +TXNMGR +Txn +UCB +UNDOREDO +USENIX +Unterauer +Vol +WAL +WRONLY +XACT +YYYY.MM.DD.HH.SS +al +bfname +bsize +btree +btrees +bval +cachesize +callback +const +db +db.h +dbinfo +dbopen +del +elistp +endian +enum +errbuf +errfile +errno +errpfx +fd +ffactor +getv +ing +int +int32 +int8 +isundo +kluge +lastlsn +lg +lock.h +lockinfo +lockop +lockp +log.YYYY.MM.DD.HH.MM.SS +logfiles +loginfo +logp +lreq +lsn +lsn1 +lsn2 +lt +maxcache +maxkeypage +maxlocks +maxtxns +meta +minkeypage +mmap +mpf +mpool +mpool.h +mpoolinfo +munmap +nacquire +nelem +nelems +nmodes +noone +nrelease +obj +op +openinfo +pathname +pgaddr +pgcookie +pgin +pgno +pgnoaddr +pgout +pid +pp +psize +queue.h +reclen +recno +sx +thang +timespec +tmp +trunc +txn +txnid +txninfo +txnp +typedef +typedef'd +vec +writeable diff --git a/src/util/db2/mpool/Makefile.inc b/src/util/db2/mpool/Makefile.inc new file mode 100644 index 0000000000..93210c89e2 --- /dev/null +++ b/src/util/db2/mpool/Makefile.inc @@ -0,0 +1,5 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/mpool + +SRCS+= mpool.c diff --git a/src/util/db2/mpool/README b/src/util/db2/mpool/README new file mode 100644 index 0000000000..0f01fbcdb4 --- /dev/null +++ b/src/util/db2/mpool/README @@ -0,0 +1,7 @@ +# @(#)README 8.1 (Berkeley) 6/4/93 + +These are the current memory pool routines. +They aren't ready for prime time, yet, and +the interface is expected to change. + +--keith diff --git a/src/util/db2/mpool/mpool.c b/src/util/db2/mpool/mpool.c new file mode 100644 index 0000000000..12e557d031 --- /dev/null +++ b/src/util/db2/mpool/mpool.c @@ -0,0 +1,502 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)mpool.c 8.7 (Berkeley) 11/2/95"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "db-int.h" +#include "mpool.h" + +static BKT *mpool_bkt __P((MPOOL *)); +static BKT *mpool_look __P((MPOOL *, db_pgno_t)); +static int mpool_write __P((MPOOL *, BKT *)); + +/* + * mpool_open -- + * Initialize a memory pool. + */ +MPOOL * +mpool_open(key, fd, pagesize, maxcache) + void *key; + int fd; + db_pgno_t pagesize, maxcache; +{ + struct stat sb; + MPOOL *mp; + int entry; + + /* + * Get information about the file. + * + * XXX + * We don't currently handle pipes, although we should. + */ + if (fstat(fd, &sb)) + return (NULL); + if (!S_ISREG(sb.st_mode)) { + errno = ESPIPE; + return (NULL); + } + + /* Allocate and initialize the MPOOL cookie. */ + if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) + return (NULL); + CIRCLEQ_INIT(&mp->lqh); + for (entry = 0; entry < HASHSIZE; ++entry) + CIRCLEQ_INIT(&mp->hqh[entry]); + mp->maxcache = maxcache; + mp->npages = sb.st_size / pagesize; + mp->pagesize = pagesize; + mp->fd = fd; + return (mp); +} + +/* + * mpool_filter -- + * Initialize input/output filters. + */ +void +mpool_filter(mp, pgin, pgout, pgcookie) + MPOOL *mp; + void (*pgin) __P((void *, db_pgno_t, void *)); + void (*pgout) __P((void *, db_pgno_t, void *)); + void *pgcookie; +{ + mp->pgin = pgin; + mp->pgout = pgout; + mp->pgcookie = pgcookie; +} + +/* + * mpool_new -- + * Get a new page of memory. + */ +void * +mpool_new(mp, pgnoaddr, flags) + MPOOL *mp; + db_pgno_t *pgnoaddr; + u_int flags; +{ + struct _hqh *head; + BKT *bp; + + if (mp->npages == MAX_PAGE_NUMBER) { + (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); + abort(); + } +#ifdef STATISTICS + ++mp->pagenew; +#endif + /* + * Get a BKT from the cache. Assign a new page number, attach + * it to the head of the hash chain, the tail of the lru chain, + * and return. + */ + if ((bp = mpool_bkt(mp)) == NULL) + return (NULL); + if (flags == MPOOL_PAGE_REQUEST) { + mp->npages++; + bp->pgno = *pgnoaddr; + } else + bp->pgno = *pgnoaddr = mp->npages++; + + bp->flags = MPOOL_PINNED | MPOOL_INUSE; + + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_INSERT_HEAD(head, bp, hq); + CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); + return (bp->page); +} + +int +mpool_delete(mp, page) + MPOOL *mp; + void *page; +{ + struct _hqh *head; + BKT *bp; + + bp = (BKT *)((char *)page - sizeof(BKT)); + +#ifdef DEBUG + if (!(bp->flags & MPOOL_PINNED)) { + (void)fprintf(stderr, + "mpool_delete: page %d not pinned\n", bp->pgno); + abort(); + } +#endif + + /* Remove from the hash and lru queues. */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_REMOVE(head, bp, hq); + CIRCLEQ_REMOVE(&mp->lqh, bp, q); + + free(bp); + return (RET_SUCCESS); +} + +/* + * mpool_get + * Get a page. + */ +void * +mpool_get(mp, pgno, flags) + MPOOL *mp; + db_pgno_t pgno; + u_int flags; /* XXX not used? */ +{ + struct _hqh *head; + BKT *bp; + off_t off; + int nr; + +#ifdef STATISTICS + ++mp->pageget; +#endif + + /* Check for a page that is cached. */ + if ((bp = mpool_look(mp, pgno)) != NULL) { +#ifdef DEBUG + if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) { + (void)fprintf(stderr, + "mpool_get: page %d already pinned\n", bp->pgno); + abort(); + } +#endif + /* + * Move the page to the head of the hash chain and the tail + * of the lru chain. + */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_REMOVE(head, bp, hq); + CIRCLEQ_INSERT_HEAD(head, bp, hq); + CIRCLEQ_REMOVE(&mp->lqh, bp, q); + CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); + + /* Return a pinned page. */ + bp->flags |= MPOOL_PINNED; + return (bp->page); + } + + /* Get a page from the cache. */ + if ((bp = mpool_bkt(mp)) == NULL) + return (NULL); + + /* Read in the contents. */ +#ifdef STATISTICS + ++mp->pageread; +#endif + off = mp->pagesize * pgno; + if (lseek(mp->fd, off, SEEK_SET) != off) + return (NULL); + + if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) { + if (nr > 0) { + /* A partial read is definitely bad. */ + errno = EINVAL; + return (NULL); + } else { + /* + * A zero-length reads, means you need to create a + * new page. + */ + memset(bp->page, 0, mp->pagesize); + } + } + + /* Set the page number, pin the page. */ + bp->pgno = pgno; + if (!(flags & MPOOL_IGNOREPIN)) + bp->flags = MPOOL_PINNED; + bp->flags |= MPOOL_INUSE; + + /* + * Add the page to the head of the hash chain and the tail + * of the lru chain. + */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_INSERT_HEAD(head, bp, hq); + CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); + + /* Run through the user's filter. */ + if (mp->pgin != NULL) + (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); + + return (bp->page); +} + +/* + * mpool_put + * Return a page. + */ +int +mpool_put(mp, page, flags) + MPOOL *mp; + void *page; + u_int flags; +{ + BKT *bp; + +#ifdef STATISTICS + ++mp->pageput; +#endif + bp = (BKT *)((char *)page - sizeof(BKT)); +#ifdef DEBUG + if (!(bp->flags & MPOOL_PINNED)) { + (void)fprintf(stderr, + "mpool_put: page %d not pinned\n", bp->pgno); + abort(); + } +#endif + bp->flags &= ~MPOOL_PINNED; + if (flags & MPOOL_DIRTY) + bp->flags |= flags & MPOOL_DIRTY; + return (RET_SUCCESS); +} + +/* + * mpool_close + * Close the buffer pool. + */ +int +mpool_close(mp) + MPOOL *mp; +{ + BKT *bp; + + /* Free up any space allocated to the lru pages. */ + while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) { + CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q); + free(bp); + } + + /* Free the MPOOL cookie. */ + free(mp); + return (RET_SUCCESS); +} + +/* + * mpool_sync + * Sync the pool to disk. + */ +int +mpool_sync(mp) + MPOOL *mp; +{ + BKT *bp; + + /* Walk the lru chain, flushing any dirty pages to disk. */ + for (bp = mp->lqh.cqh_first; + bp != (void *)&mp->lqh; bp = bp->q.cqe_next) + if (bp->flags & MPOOL_DIRTY && + mpool_write(mp, bp) == RET_ERROR) + return (RET_ERROR); + + /* Sync the file descriptor. */ + return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); +} + +/* + * mpool_bkt + * Get a page from the cache (or create one). + */ +static BKT * +mpool_bkt(mp) + MPOOL *mp; +{ + struct _hqh *head; + BKT *bp; + + /* If under the max cached, always create a new page. */ + if (mp->curcache < mp->maxcache) + goto new; + + /* + * If the cache is max'd out, walk the lru list for a buffer we + * can flush. If we find one, write it (if necessary) and take it + * off any lists. If we don't find anything we grow the cache anyway. + * The cache never shrinks. + */ + for (bp = mp->lqh.cqh_first; + bp != (void *)&mp->lqh; bp = bp->q.cqe_next) + if (!(bp->flags & MPOOL_PINNED)) { + /* Flush if dirty. */ + if (bp->flags & MPOOL_DIRTY && + mpool_write(mp, bp) == RET_ERROR) + return (NULL); +#ifdef STATISTICS + ++mp->pageflush; +#endif + /* Remove from the hash and lru queues. */ + head = &mp->hqh[HASHKEY(bp->pgno)]; + CIRCLEQ_REMOVE(head, bp, hq); + CIRCLEQ_REMOVE(&mp->lqh, bp, q); +#ifdef DEBUG + { void *spage; + spage = bp->page; + memset(bp, 0xff, sizeof(BKT) + mp->pagesize); + bp->page = spage; + } +#endif + bp->flags = 0; + return (bp); + } + +new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) + return (NULL); +#ifdef STATISTICS + ++mp->pagealloc; +#endif +#if defined(DEBUG) || defined(PURIFY) + memset(bp, 0xff, sizeof(BKT) + mp->pagesize); +#endif + bp->page = (char *)bp + sizeof(BKT); + bp->flags = 0; + ++mp->curcache; + return (bp); +} + +/* + * mpool_write + * Write a page to disk. + */ +static int +mpool_write(mp, bp) + MPOOL *mp; + BKT *bp; +{ + off_t off; + +#ifdef STATISTICS + ++mp->pagewrite; +#endif + + /* Run through the user's filter. */ + if (mp->pgout) + (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); + + off = mp->pagesize * bp->pgno; + if (lseek(mp->fd, off, SEEK_SET) != off) + return (RET_ERROR); + if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize) + return (RET_ERROR); + + bp->flags &= ~MPOOL_DIRTY; + return (RET_SUCCESS); +} + +/* + * mpool_look + * Lookup a page in the cache. + */ +static BKT * +mpool_look(mp, pgno) + MPOOL *mp; + db_pgno_t pgno; +{ + struct _hqh *head; + BKT *bp; + + head = &mp->hqh[HASHKEY(pgno)]; + for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next) + if ((bp->pgno == pgno) && + (bp->flags & MPOOL_INUSE == MPOOL_INUSE)) { +#ifdef STATISTICS + ++mp->cachehit; +#endif + return (bp); + } +#ifdef STATISTICS + ++mp->cachemiss; +#endif + return (NULL); +} + +#ifdef STATISTICS +/* + * mpool_stat + * Print out cache statistics. + */ +void +mpool_stat(mp) + MPOOL *mp; +{ + BKT *bp; + int cnt; + char *sep; + + (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); + (void)fprintf(stderr, + "page size %lu, cacheing %lu pages of %lu page max cache\n", + mp->pagesize, mp->curcache, mp->maxcache); + (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", + mp->pageput, mp->pageget, mp->pagenew); + (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", + mp->pagealloc, mp->pageflush); + if (mp->cachehit + mp->cachemiss) + (void)fprintf(stderr, + "%.0f%% cache hit rate (%lu hits, %lu misses)\n", + ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) + * 100, mp->cachehit, mp->cachemiss); + (void)fprintf(stderr, "%lu page reads, %lu page writes\n", + mp->pageread, mp->pagewrite); + + sep = ""; + cnt = 0; + for (bp = mp->lqh.cqh_first; + bp != (void *)&mp->lqh; bp = bp->q.cqe_next) { + (void)fprintf(stderr, "%s%d", sep, bp->pgno); + if (bp->flags & MPOOL_DIRTY) + (void)fprintf(stderr, "d"); + if (bp->flags & MPOOL_PINNED) + (void)fprintf(stderr, "P"); + if (++cnt == 10) { + sep = "\n"; + cnt = 0; + } else + sep = ", "; + + } + (void)fprintf(stderr, "\n"); +} +#endif diff --git a/src/util/db2/mpool/mpool.h b/src/util/db2/mpool/mpool.h new file mode 100644 index 0000000000..92bf7541d3 --- /dev/null +++ b/src/util/db2/mpool/mpool.h @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mpool.h 8.4 (Berkeley) 11/2/95 + */ + +#include "db-queue.h" + +/* + * The memory pool scheme is a simple one. Each in-memory page is referenced + * by a bucket which is threaded in up to two of three ways. All active pages + * are threaded on a hash chain (hashed by page number) and an lru chain. + * Inactive pages are threaded on a free chain. Each reference to a memory + * pool is handed an opaque MPOOL cookie which stores all of this information. + */ +#define HASHSIZE 128 +#define HASHKEY(pgno) ((pgno - 1) % HASHSIZE) + +/* The BKT structures are the elements of the queues. */ +typedef struct _bkt { + CIRCLEQ_ENTRY(_bkt) hq; /* hash queue */ + CIRCLEQ_ENTRY(_bkt) q; /* lru queue */ + void *page; /* page */ + db_pgno_t pgno; /* page number */ + +#define MPOOL_DIRTY 0x01 /* page needs to be written */ +#define MPOOL_PINNED 0x02 /* page is pinned into memory */ +#define MPOOL_INUSE 0x04 /* page address is valid */ + u_int8_t flags; /* flags */ +} BKT; + +typedef struct MPOOL { + CIRCLEQ_HEAD(_lqh, _bkt) lqh; /* lru queue head */ + /* hash queue array */ + CIRCLEQ_HEAD(_hqh, _bkt) hqh[HASHSIZE]; + db_pgno_t curcache; /* current number of cached pages */ + db_pgno_t maxcache; /* max number of cached pages */ + db_pgno_t npages; /* number of pages in the file */ + u_long pagesize; /* file page size */ + int fd; /* file descriptor */ + /* page in conversion routine */ + void (*pgin) __P((void *, db_pgno_t, void *)); + /* page out conversion routine */ + void (*pgout) __P((void *, db_pgno_t, void *)); + void *pgcookie; /* cookie for page in/out routines */ +#ifdef STATISTICS + u_long cachehit; + u_long cachemiss; + u_long pagealloc; + u_long pageflush; + u_long pageget; + u_long pagenew; + u_long pageput; + u_long pageread; + u_long pagewrite; +#endif +} MPOOL; + +#define MPOOL_IGNOREPIN 0x01 /* Ignore if the page is pinned. */ +#define MPOOL_PAGE_REQUEST 0x01 /* Allocate a new page with a + specific page number. */ +#define MPOOL_PAGE_NEXT 0x02 /* Allocate a new page with the next + page number. */ + +__BEGIN_DECLS +MPOOL *mpool_open __P((void *, int, db_pgno_t, db_pgno_t)); +void mpool_filter __P((MPOOL *, void (*)(void *, db_pgno_t, void *), + void (*)(void *, db_pgno_t, void *), void *)); +void *mpool_new __P((MPOOL *, db_pgno_t *, u_int)); +void *mpool_get __P((MPOOL *, db_pgno_t, u_int)); +int mpool_delete __P((MPOOL *, void *)); +int mpool_put __P((MPOOL *, void *, u_int)); +int mpool_sync __P((MPOOL *)); +int mpool_close __P((MPOOL *)); +#ifdef STATISTICS +void mpool_stat __P((MPOOL *)); +#endif +__END_DECLS diff --git a/src/util/db2/obj/Makefile.in b/src/util/db2/obj/Makefile.in new file mode 100644 index 0000000000..5e60ef7961 --- /dev/null +++ b/src/util/db2/obj/Makefile.in @@ -0,0 +1,157 @@ +SHELL = /bin/sh + +LIBDB = libdb.a + +HASH_OBJS = hash.o hash_bigkey.o hash_debug.o hash_func.o hash_log2.o \ + hash_page.o hsearch.o dbm.o +BT_OBJS = bt_close.o bt_conv.o bt_debug.o bt_delete.o bt_get.o \ + bt_open.o bt_overflow.o bt_page.o bt_put.o bt_search.o \ + bt_seq.o bt_split.o bt_utils.o +DB_OBJS = db.o +MPOOL_OBJS = mpool.o +REC_OBJS = rec_close.o rec_delete.o rec_get.o rec_open.o rec_put.o \ + rec_search.o rec_seq.o rec_utils.o + +MEMMOVE_OBJ = @MEMMOVE_OBJ@ +MKSTEMP_OBJ = @MKSTEMP_OBJ@ +STRERROR_OBJ = @STRERROR_OBJ@ + +MISC_OBJS = $(MEMMOVE_OBJ) $(MKSTEMP_OBJ) + +ALL_OBJS = $(HASH_OBJS) $(BT_OBJS) $(DB_OBJS) $(MPOOL_OBJS) \ + $(REC_OBJS) $(MISC_OBJS) + +TMPDIR = /tmp + +AR = ar +CC = @CC@ +RANLIB = @RANLIB@ +FCTSH = @FCTSH@ +top_srcdir = @top_srcdir@ +srcdir = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ +includedir = @includedir@ +libdir = @libdir@ + +CDEBUGFLAGS = @CFLAGS@ + +CFLAGS = $(CDEBUGFLAGS) @CPPFLAGS@ @DEFS@ \ + -I. -I$(top_srcdir)/include -I$(top_srcdir)/mpool -I$(top_srcdir)/db \ + -I$(top_srcdir)/hash -I$(top_srcdir)/btree -I$(top_srcdir)/recno + +all:: $(LIBDB) db.h + +$(LIBDB): $(ALL_OBJS) + $(AR) cru $@ $(ALL_OBJS) + $(RANLIB) $@ + +db.h: $(top_srcdir)/include/db.h + ln -s $(top_srcdir)/include/db.h . + +dbtest: dbtest.o $(STRERROR_OBJ) $(LIBDB) + $(CC) -o $@ dbtest.o $(STRERROR_OBJ) $(LIBDB) + +check:: dbtest + TMPDIR=$(TMPDIR) $(FCTSH) $(top_srcdir)/test/run.test + +install:: + cp $(LIBDB) $(libdir) + $(RANLIB) $(libdir)/$(LIBDB) + cp $(top_srcdir)/include/db.h $(includedir) + cp ../db-config.h $(includedir) + +clean:: + rm -f $(ALL_OBJS) $(LIBDB) \ + dbtest.o $(STRERROR_OBJ) dbtest \ + core t1 t2 t3 *~ + +## if VPATH worked everywhere, I could do this: +## +## VPATH = @top_srcdir@/db @top_srcdir@/mpool \ +## @top_srcdir@/hash @top_srcdir@/btree @top_srcdir@/recno \ +## @top_srcdir@/clib +## +## but it doesn't, so I do this instead: + +db.o: $(top_srcdir)/db/db.c + $(CC) $(CFLAGS) -c -o db.o $(top_srcdir)/db/db.c + +mpool.o: $(top_srcdir)/mpool/mpool.c + $(CC) $(CFLAGS) -c -o mpool.o $(top_srcdir)/mpool/mpool.c + +hash.o: $(top_srcdir)/hash/hash.c + $(CC) $(CFLAGS) -c -o hash.o $(top_srcdir)/hash/hash.c +hash_bigkey.o: $(top_srcdir)/hash/hash_bigkey.c + $(CC) $(CFLAGS) -c -o hash_bigkey.o $(top_srcdir)/hash/hash_bigkey.c +hash_debug.o: $(top_srcdir)/hash/hash_debug.c + $(CC) $(CFLAGS) -c -o hash_debug.o $(top_srcdir)/hash/hash_debug.c +hash_func.o: $(top_srcdir)/hash/hash_func.c + $(CC) $(CFLAGS) -c -o hash_func.o $(top_srcdir)/hash/hash_func.c +hash_log2.o: $(top_srcdir)/hash/hash_log2.c + $(CC) $(CFLAGS) -c -o hash_log2.o $(top_srcdir)/hash/hash_log2.c +hash_page.o: $(top_srcdir)/hash/hash_page.c + $(CC) $(CFLAGS) -c -o hash_page.o $(top_srcdir)/hash/hash_page.c +hsearch.o: $(top_srcdir)/hash/hsearch.c + $(CC) $(CFLAGS) -c -o hsearch.o $(top_srcdir)/hash/hsearch.c +dbm.o: $(top_srcdir)/hash/dbm.c + $(CC) $(CFLAGS) -c -o dbm.o $(top_srcdir)/hash/dbm.c + +bt_close.o: $(top_srcdir)/btree/bt_close.c + $(CC) $(CFLAGS) -c -o bt_close.o $(top_srcdir)/btree/bt_close.c +bt_conv.o: $(top_srcdir)/btree/bt_conv.c + $(CC) $(CFLAGS) -c -o bt_conv.o $(top_srcdir)/btree/bt_conv.c +bt_debug.o: $(top_srcdir)/btree/bt_debug.c + $(CC) $(CFLAGS) -c -o bt_debug.o $(top_srcdir)/btree/bt_debug.c +bt_delete.o: $(top_srcdir)/btree/bt_delete.c + $(CC) $(CFLAGS) -c -o bt_delete.o $(top_srcdir)/btree/bt_delete.c +bt_get.o: $(top_srcdir)/btree/bt_get.c + $(CC) $(CFLAGS) -c -o bt_get.o $(top_srcdir)/btree/bt_get.c +bt_open.o: $(top_srcdir)/btree/bt_open.c + $(CC) $(CFLAGS) -c -o bt_open.o $(top_srcdir)/btree/bt_open.c +bt_overflow.o: $(top_srcdir)/btree/bt_overflow.c + $(CC) $(CFLAGS) -c -o bt_overflow.o $(top_srcdir)/btree/bt_overflow.c +bt_page.o: $(top_srcdir)/btree/bt_page.c + $(CC) $(CFLAGS) -c -o bt_page.o $(top_srcdir)/btree/bt_page.c +bt_put.o: $(top_srcdir)/btree/bt_put.c + $(CC) $(CFLAGS) -c -o bt_put.o $(top_srcdir)/btree/bt_put.c +bt_search.o: $(top_srcdir)/btree/bt_search.c + $(CC) $(CFLAGS) -c -o bt_search.o $(top_srcdir)/btree/bt_search.c +bt_seq.o: $(top_srcdir)/btree/bt_seq.c + $(CC) $(CFLAGS) -c -o bt_seq.o $(top_srcdir)/btree/bt_seq.c +bt_split.o: $(top_srcdir)/btree/bt_split.c + $(CC) $(CFLAGS) -c -o bt_split.o $(top_srcdir)/btree/bt_split.c +bt_stack.o: $(top_srcdir)/btree/bt_stack.c + $(CC) $(CFLAGS) -c -o bt_stack.o $(top_srcdir)/btree/bt_stack.c +bt_utils.o: $(top_srcdir)/btree/bt_utils.c + $(CC) $(CFLAGS) -c -o bt_utils.o $(top_srcdir)/btree/bt_utils.c + +rec_close.o: $(top_srcdir)/recno/rec_close.c + $(CC) $(CFLAGS) -c -o rec_close.o $(top_srcdir)/recno/rec_close.c +rec_delete.o: $(top_srcdir)/recno/rec_delete.c + $(CC) $(CFLAGS) -c -o rec_delete.o $(top_srcdir)/recno/rec_delete.c +rec_get.o: $(top_srcdir)/recno/rec_get.c + $(CC) $(CFLAGS) -c -o rec_get.o $(top_srcdir)/recno/rec_get.c +rec_open.o: $(top_srcdir)/recno/rec_open.c + $(CC) $(CFLAGS) -c -o rec_open.o $(top_srcdir)/recno/rec_open.c +rec_put.o: $(top_srcdir)/recno/rec_put.c + $(CC) $(CFLAGS) -c -o rec_put.o $(top_srcdir)/recno/rec_put.c +rec_search.o: $(top_srcdir)/recno/rec_search.c + $(CC) $(CFLAGS) -c -o rec_search.o $(top_srcdir)/recno/rec_search.c +rec_seq.o: $(top_srcdir)/recno/rec_seq.c + $(CC) $(CFLAGS) -c -o rec_seq.o $(top_srcdir)/recno/rec_seq.c +rec_utils.o: $(top_srcdir)/recno/rec_utils.c + $(CC) $(CFLAGS) -c -o rec_utils.o $(top_srcdir)/recno/rec_utils.c + +dbtest.o: $(top_srcdir)/test/dbtest.c + $(CC) $(CFLAGS) -c -o dbtest.o $(top_srcdir)/test/dbtest.c + +memmove.o: $(top_srcdir)/clib/memmove.c + $(CC) $(CFLAGS) -c -o memmove.o $(top_srcdir)/clib/memmove.c +mkstemp.o: $(top_srcdir)/clib/mkstemp.c + $(CC) $(CFLAGS) -c -o mkstemp.o $(top_srcdir)/clib/mkstemp.c +strerror.o: $(top_srcdir)/clib/strerror.c + $(CC) $(CFLAGS) -c -o strerror.o $(top_srcdir)/clib/strerror.c + +distclean:: clean + rm -f Makefile diff --git a/src/util/db2/recno/Makefile.inc b/src/util/db2/recno/Makefile.inc new file mode 100644 index 0000000000..e49e225522 --- /dev/null +++ b/src/util/db2/recno/Makefile.inc @@ -0,0 +1,6 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/recno + +SRCS+= rec_close.c rec_delete.c rec_get.c rec_open.c rec_put.c rec_search.c \ + rec_seq.c rec_utils.c diff --git a/src/util/db2/recno/extern.h b/src/util/db2/recno/extern.h new file mode 100644 index 0000000000..feed434453 --- /dev/null +++ b/src/util/db2/recno/extern.h @@ -0,0 +1,54 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.3 (Berkeley) 6/4/94 + */ + +#include "../btree/extern.h" + +int __rec_close __P((DB *)); +int __rec_delete __P((const DB *, const DBT *, u_int)); +int __rec_dleaf __P((BTREE *, PAGE *, u_int32_t)); +int __rec_fd __P((const DB *)); +int __rec_fmap __P((BTREE *, recno_t)); +int __rec_fout __P((BTREE *)); +int __rec_fpipe __P((BTREE *, recno_t)); +int __rec_get __P((const DB *, const DBT *, DBT *, u_int)); +int __rec_iput __P((BTREE *, recno_t, const DBT *, u_int)); +int __rec_put __P((const DB *dbp, DBT *, const DBT *, u_int)); +int __rec_ret __P((BTREE *, EPG *, recno_t, DBT *, DBT *)); +EPG *__rec_search __P((BTREE *, recno_t, enum SRCHOP)); +int __rec_seq __P((const DB *, DBT *, DBT *, u_int)); +int __rec_sync __P((const DB *, u_int)); +int __rec_vmap __P((BTREE *, recno_t)); +int __rec_vout __P((BTREE *)); +int __rec_vpipe __P((BTREE *, recno_t)); diff --git a/src/util/db2/recno/rec_close.c b/src/util/db2/recno/rec_close.c new file mode 100644 index 0000000000..2e923070bd --- /dev/null +++ b/src/util/db2/recno/rec_close.c @@ -0,0 +1,186 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_close.c 8.9 (Berkeley) 11/18/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <sys/uio.h> +#ifdef RECNO_USE_MMAP +#include <sys/mman.h> +#endif + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <unistd.h> + +#include "db-int.h" +#include "recno.h" + +/* + * __REC_CLOSE -- Close a recno tree. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_close(dbp) + DB *dbp; +{ + BTREE *t; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + if (__rec_sync(dbp, 0) == RET_ERROR) + return (RET_ERROR); + + /* Committed to closing. */ + status = RET_SUCCESS; +#ifdef RECNO_USE_MMAP + if (F_ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize)) + status = RET_ERROR; +#endif + + if (!F_ISSET(t, R_INMEM)) + if (F_ISSET(t, R_CLOSEFP)) { + if (fclose(t->bt_rfp)) + status = RET_ERROR; + } else + if (close(t->bt_rfd)) + status = RET_ERROR; + + if (__bt_close(dbp) == RET_ERROR) + status = RET_ERROR; + + return (status); +} + +/* + * __REC_SYNC -- sync the recno tree to disk. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__rec_sync(dbp, flags) + const DB *dbp; + u_int flags; +{ + struct iovec iov[2]; + BTREE *t; + DBT data, key; + off_t off; + recno_t scursor, trec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + if (flags == R_RECNOSYNC) + return (__bt_sync(dbp, 0)); + + if (F_ISSET(t, R_RDONLY | R_INMEM) || !F_ISSET(t, R_MODIFIED)) + return (RET_SUCCESS); + + /* Read any remaining records into the tree. */ + if (!F_ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + return (RET_ERROR); + + /* Rewind the file descriptor. */ + if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0) + return (RET_ERROR); + + /* Save the cursor. */ + scursor = t->bt_cursor.rcursor; + + key.size = sizeof(recno_t); + key.data = &trec; + + if (F_ISSET(t, R_FIXLEN)) { + /* + * We assume that fixed length records are all fixed length. + * Any that aren't are either EINVAL'd or corrected by the + * record put code. + */ + status = (dbp->seq)(dbp, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + if (write(t->bt_rfd, data.data, data.size) != data.size) + return (RET_ERROR); + status = (dbp->seq)(dbp, &key, &data, R_NEXT); + } + } else { + iov[1].iov_base = &t->bt_bval; + iov[1].iov_len = 1; + + status = (dbp->seq)(dbp, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + iov[0].iov_base = data.data; + iov[0].iov_len = data.size; + if (writev(t->bt_rfd, iov, 2) != data.size + 1) + return (RET_ERROR); + status = (dbp->seq)(dbp, &key, &data, R_NEXT); + } + } + + /* Restore the cursor. */ + t->bt_cursor.rcursor = scursor; + + if (status == RET_ERROR) + return (RET_ERROR); + if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1) + return (RET_ERROR); + if (ftruncate(t->bt_rfd, off)) + return (RET_ERROR); + F_CLR(t, R_MODIFIED); + return (RET_SUCCESS); +} diff --git a/src/util/db2/recno/rec_delete.c b/src/util/db2/recno/rec_delete.c new file mode 100644 index 0000000000..35ec7769ac --- /dev/null +++ b/src/util/db2/recno/rec_delete.c @@ -0,0 +1,197 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_delete.c 8.7 (Berkeley) 7/14/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include "db-int.h" +#include "recno.h" + +static int rec_rdelete __P((BTREE *, recno_t)); + +/* + * __REC_DELETE -- Delete the item(s) referenced by a key. + * + * Parameters: + * dbp: pointer to access method + * key: key to delete + * flags: R_CURSOR if deleting what the cursor references + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__rec_delete(dbp, key, flags) + const DB *dbp; + const DBT *key; + u_int flags; +{ + BTREE *t; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + switch(flags) { + case 0: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + if (nrec > t->bt_nrecs) + return (RET_SPECIAL); + --nrec; + status = rec_rdelete(t, nrec); + break; + case R_CURSOR: + if (!F_ISSET(&t->bt_cursor, CURS_INIT)) + goto einval; + if (t->bt_nrecs == 0) + return (RET_SPECIAL); + status = rec_rdelete(t, t->bt_cursor.rcursor - 1); + if (status == RET_SUCCESS) + --t->bt_cursor.rcursor; + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + if (status == RET_SUCCESS) + F_SET(t, B_MODIFIED | R_MODIFIED); + return (status); +} + +/* + * REC_RDELETE -- Delete the data matching the specified key. + * + * Parameters: + * tree: tree + * nrec: record to delete + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +static int +rec_rdelete(t, nrec) + BTREE *t; + recno_t nrec; +{ + EPG *e; + PAGE *h; + int status; + + /* Find the record; __rec_search pins the page. */ + if ((e = __rec_search(t, nrec, SDELETE)) == NULL) + return (RET_ERROR); + + /* Delete the record. */ + h = e->page; + status = __rec_dleaf(t, h, e->index); + if (status != RET_SUCCESS) { + mpool_put(t->bt_mp, h, 0); + return (status); + } + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); +} + +/* + * __REC_DLEAF -- Delete a single record from a recno leaf page. + * + * Parameters: + * t: tree + * index: index on current page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__rec_dleaf(t, h, index) + BTREE *t; + PAGE *h; + u_int32_t index; +{ + RLEAF *rl; + indx_t *ip, cnt, offset; + u_int32_t nbytes; + char *from; + void *to; + + /* + * Delete a record from a recno leaf page. Internal records are never + * deleted from internal pages, regardless of the records that caused + * them to be added being deleted. Pages made empty by deletion are + * not reclaimed. They are, however, made available for reuse. + * + * Pack the remaining entries at the end of the page, shift the indices + * down, overwriting the deleted record and its index. If the record + * uses overflow pages, make them available for reuse. + */ + to = rl = GETRLEAF(h, index); + if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR) + return (RET_ERROR); + nbytes = NRLEAF(rl); + + /* + * Compress the key/data pairs. Compress and adjust the [BR]LEAF + * offsets. Reset the headers. + */ + from = (char *)h + h->upper; + memmove(from + nbytes, from, (char *)to - from); + h->upper += nbytes; + + offset = h->linp[index]; + for (cnt = &h->linp[index] - (ip = &h->linp[0]); cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nbytes; + for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; + h->lower -= sizeof(indx_t); + --t->bt_nrecs; + return (RET_SUCCESS); +} diff --git a/src/util/db2/recno/rec_get.c b/src/util/db2/recno/rec_get.c new file mode 100644 index 0000000000..230b2d4f54 --- /dev/null +++ b/src/util/db2/recno/rec_get.c @@ -0,0 +1,311 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_get.c 8.9 (Berkeley) 8/18/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "db-int.h" +#include "recno.h" + +/* + * __REC_GET -- Get a record from the btree. + * + * Parameters: + * dbp: pointer to access method + * key: key to find + * data: data to return + * flag: currently unused + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__rec_get(dbp, key, data, flags) + const DB *dbp; + const DBT *key; + DBT *data; + u_int flags; +{ + BTREE *t; + EPG *e; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Get currently doesn't take any flags, and keys of 0 are illegal. */ + if (flags || (nrec = *(recno_t *)key->data) == 0) { + errno = EINVAL; + return (RET_ERROR); + } + + /* + * If we haven't seen this record yet, try to find it in the + * original file. + */ + if (nrec > t->bt_nrecs) { + if (F_ISSET(t, R_EOF | R_INMEM)) + return (RET_SPECIAL); + if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS) + return (status); + } + + --nrec; + if ((e = __rec_search(t, nrec, SEARCH)) == NULL) + return (RET_ERROR); + + status = __rec_ret(t, e, 0, NULL, data); + if (F_ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e->page, 0); + else + t->bt_pinned = e->page; + return (status); +} + +/* + * __REC_FPIPE -- Get fixed length records from a pipe. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_fpipe(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + recno_t nrec; + size_t len; + int ch; + u_char *p; + + if (t->bt_rdata.size < t->bt_reclen) { + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_reclen) : + realloc(t->bt_rdata.data, t->bt_reclen); + if (t->bt_rdata.data == NULL) + return (RET_ERROR); + t->bt_rdata.size = t->bt_reclen; + } + data.data = t->bt_rdata.data; + data.size = t->bt_reclen; + + for (nrec = t->bt_nrecs; nrec < top;) { + len = t->bt_reclen; + for (p = t->bt_rdata.data;; *p++ = ch) + if ((ch = getc(t->bt_rfp)) == EOF || !--len) { + if (ch != EOF) + *p = ch; + if (len != 0) + memset(p, t->bt_bval, len); + if (__rec_iput(t, + nrec, &data, 0) != RET_SUCCESS) + return (RET_ERROR); + ++nrec; + break; + } + if (ch == EOF) + break; + } + if (nrec < top) { + F_SET(t, R_EOF); + return (RET_SPECIAL); + } + return (RET_SUCCESS); +} + +/* + * __REC_VPIPE -- Get variable length records from a pipe. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_vpipe(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + recno_t nrec; + indx_t len; + size_t sz; + int bval, ch; + u_char *p; + + bval = t->bt_bval; + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + for (p = t->bt_rdata.data, + sz = t->bt_rdata.size;; *p++ = ch, --sz) { + if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) { + data.data = t->bt_rdata.data; + data.size = p - (u_char *)t->bt_rdata.data; + if (ch == EOF && data.size == 0) + break; + if (__rec_iput(t, nrec, &data, 0) + != RET_SUCCESS) + return (RET_ERROR); + break; + } + if (sz == 0) { + len = p - (u_char *)t->bt_rdata.data; + t->bt_rdata.size += (sz = 256); + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_rdata.size) : + realloc(t->bt_rdata.data, t->bt_rdata.size); + if (t->bt_rdata.data == NULL) + return (RET_ERROR); + p = (u_char *)t->bt_rdata.data + len; + } + } + if (ch == EOF) + break; + } + if (nrec < top) { + F_SET(t, R_EOF); + return (RET_SPECIAL); + } + return (RET_SUCCESS); +} + +/* + * __REC_FMAP -- Get fixed length records from a file. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_fmap(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + recno_t nrec; + u_char *sp, *ep, *p; + size_t len; + + if (t->bt_rdata.size < t->bt_reclen) { + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_reclen) : + realloc(t->bt_rdata.data, t->bt_reclen); + if (t->bt_rdata.data == NULL) + return (RET_ERROR); + t->bt_rdata.size = t->bt_reclen; + } + data.data = t->bt_rdata.data; + data.size = t->bt_reclen; + + sp = (u_char *)t->bt_cmap; + ep = (u_char *)t->bt_emap; + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + if (sp >= ep) { + F_SET(t, R_EOF); + return (RET_SPECIAL); + } + len = t->bt_reclen; + for (p = t->bt_rdata.data; + sp < ep && len > 0; *p++ = *sp++, --len); + if (len != 0) + memset(p, t->bt_bval, len); + if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) + return (RET_ERROR); + } + t->bt_cmap = (caddr_t)sp; + return (RET_SUCCESS); +} + +/* + * __REC_VMAP -- Get variable length records from a file. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_vmap(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + u_char *sp, *ep; + recno_t nrec; + int bval; + + sp = (u_char *)t->bt_cmap; + ep = (u_char *)t->bt_emap; + bval = t->bt_bval; + + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + if (sp >= ep) { + F_SET(t, R_EOF); + return (RET_SPECIAL); + } + for (data.data = sp; sp < ep && *sp != bval; ++sp); + data.size = sp - (u_char *)data.data; + if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) + return (RET_ERROR); + ++sp; + } + t->bt_cmap = (caddr_t)sp; + return (RET_SUCCESS); +} diff --git a/src/util/db2/recno/rec_open.c b/src/util/db2/recno/rec_open.c new file mode 100644 index 0000000000..fccaacc90d --- /dev/null +++ b/src/util/db2/recno/rec_open.c @@ -0,0 +1,243 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_open.c 8.12 (Berkeley) 11/18/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#ifdef RECNO_USE_MMAP +#include <sys/mman.h> +#endif +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <unistd.h> + +#include "db-int.h" +#include "recno.h" + +DB * +__rec_open(fname, flags, mode, openinfo, dflags) + const char *fname; + int flags, mode, dflags; + const RECNOINFO *openinfo; +{ + BTREE *t; + BTREEINFO btopeninfo; + DB *dbp; + PAGE *h; + struct stat sb; + int rfd, sverrno; + + /* Open the user's file -- if this fails, we're done. */ + if (fname != NULL && (rfd = open(fname, flags, mode)) < 0) + return (NULL); + + /* Create a btree in memory (backed by disk). */ + dbp = NULL; + if (openinfo) { + if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT)) + goto einval; + btopeninfo.flags = 0; + btopeninfo.cachesize = openinfo->cachesize; + btopeninfo.maxkeypage = 0; + btopeninfo.minkeypage = 0; + btopeninfo.psize = openinfo->psize; + btopeninfo.compare = NULL; + btopeninfo.prefix = NULL; + btopeninfo.lorder = openinfo->lorder; + dbp = __bt_open(openinfo->bfname, + O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags); + } else + dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags); + if (dbp == NULL) + goto err; + + /* + * Some fields in the tree structure are recno specific. Fill them + * in and make the btree structure look like a recno structure. We + * don't change the bt_ovflsize value, it's close enough and slightly + * bigger. + */ + t = dbp->internal; + if (openinfo) { + if (openinfo->flags & R_FIXEDLEN) { + F_SET(t, R_FIXLEN); + t->bt_reclen = openinfo->reclen; + if (t->bt_reclen == 0) + goto einval; + } + t->bt_bval = openinfo->bval; + } else + t->bt_bval = '\n'; + + F_SET(t, R_RECNO); + if (fname == NULL) + F_SET(t, R_EOF | R_INMEM); + else + t->bt_rfd = rfd; + + if (fname != NULL) { + /* + * In 4.4BSD, stat(2) returns true for ISSOCK on pipes. + * Unfortunately, that's not portable, so we use lseek + * and check the errno values. + */ + errno = 0; + if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) { + switch (flags & O_ACCMODE) { + case O_RDONLY: + F_SET(t, R_RDONLY); + break; + default: + goto einval; + } +slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) + goto err; + F_SET(t, R_CLOSEFP); + t->bt_irec = + F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe; + } else { + switch (flags & O_ACCMODE) { + case O_RDONLY: + F_SET(t, R_RDONLY); + break; + case O_RDWR: + break; + default: + goto einval; + } + + if (fstat(rfd, &sb)) + goto err; + /* + * Kluge -- we'd like to test to see if the file is too + * big to mmap. Since, we don't know what size or type + * off_t's or size_t's are, what the largest unsigned + * integral type is, or what random insanity the local + * C compiler will perpetrate, doing the comparison in + * a portable way is flatly impossible. Hope that mmap + * fails if the file is too large. + */ + if (sb.st_size == 0) + F_SET(t, R_EOF); + else { +#ifdef RECNO_USE_MMAP + /* + * XXX + * Mmap doesn't work correctly on many current + * systems. In particular, it can fail subtly, + * with cache coherency problems. Don't use it + * for now. + */ + t->bt_msize = sb.st_size; + if ((t->bt_smap = mmap(NULL, t->bt_msize, + PROT_READ, MAP_PRIVATE, rfd, + (off_t)0)) == (caddr_t)-1) + goto slow; + t->bt_cmap = t->bt_smap; + t->bt_emap = t->bt_smap + sb.st_size; + t->bt_irec = F_ISSET(t, R_FIXLEN) ? + __rec_fmap : __rec_vmap; + F_SET(t, R_MEMMAPPED); +#else + goto slow; +#endif + } + } + } + + /* Use the recno routines. */ + dbp->close = __rec_close; + dbp->del = __rec_delete; + dbp->fd = __rec_fd; + dbp->get = __rec_get; + dbp->put = __rec_put; + dbp->seq = __rec_seq; + dbp->sync = __rec_sync; + + /* If the root page was created, reset the flags. */ + if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL) + goto err; + if ((h->flags & P_TYPE) == P_BLEAF) { + F_CLR(h, P_TYPE); + F_SET(h, P_RLEAF); + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + } else + mpool_put(t->bt_mp, h, 0); + + if (openinfo && openinfo->flags & R_SNAPSHOT && + !F_ISSET(t, R_EOF | R_INMEM) && + t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + goto err; + return (dbp); + +einval: errno = EINVAL; +err: sverrno = errno; + if (dbp != NULL) + (void)__bt_close(dbp); + if (fname != NULL) + (void)close(rfd); + errno = sverrno; + return (NULL); +} + +int +__rec_fd(dbp) + const DB *dbp; +{ + BTREE *t; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* In-memory database can't have a file descriptor. */ + if (F_ISSET(t, R_INMEM)) { + errno = ENOENT; + return (-1); + } + return (t->bt_rfd); +} diff --git a/src/util/db2/recno/rec_put.c b/src/util/db2/recno/rec_put.c new file mode 100644 index 0000000000..25452f15f8 --- /dev/null +++ b/src/util/db2/recno/rec_put.c @@ -0,0 +1,280 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_put.c 8.7 (Berkeley) 8/18/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "recno.h" + +/* + * __REC_PUT -- Add a recno item to the tree. + * + * Parameters: + * dbp: pointer to access method + * key: key + * data: data + * flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is + * already in the tree and R_NOOVERWRITE specified. + */ +int +__rec_put(dbp, key, data, flags) + const DB *dbp; + DBT *key; + const DBT *data; + u_int flags; +{ + BTREE *t; + DBT fdata, tdata; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* + * If using fixed-length records, and the record is long, return + * EINVAL. If it's short, pad it out. Use the record data return + * memory, it's only short-term. + */ + if (F_ISSET(t, R_FIXLEN) && data->size != t->bt_reclen) { + if (data->size > t->bt_reclen) + goto einval; + + if (t->bt_rdata.size < t->bt_reclen) { + t->bt_rdata.data = t->bt_rdata.data == NULL ? + malloc(t->bt_reclen) : + realloc(t->bt_rdata.data, t->bt_reclen); + if (t->bt_rdata.data == NULL) + return (RET_ERROR); + t->bt_rdata.size = t->bt_reclen; + } + memmove(t->bt_rdata.data, data->data, data->size); + memset((char *)t->bt_rdata.data + data->size, + t->bt_bval, t->bt_reclen - data->size); + fdata.data = t->bt_rdata.data; + fdata.size = t->bt_reclen; + } else { + fdata.data = data->data; + fdata.size = data->size; + } + + switch (flags) { + case R_CURSOR: + if (!F_ISSET(&t->bt_cursor, CURS_INIT)) + goto einval; + nrec = t->bt_cursor.rcursor; + break; + case R_SETCURSOR: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + break; + case R_IAFTER: + if ((nrec = *(recno_t *)key->data) == 0) { + nrec = 1; + flags = R_IBEFORE; + } + break; + case 0: + case R_IBEFORE: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + break; + case R_NOOVERWRITE: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + if (nrec <= t->bt_nrecs) + return (RET_SPECIAL); + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + /* + * Make sure that records up to and including the put record are + * already in the database. If skipping records, create empty ones. + */ + if (nrec > t->bt_nrecs) { + if (!F_ISSET(t, R_EOF | R_INMEM) && + t->bt_irec(t, nrec) == RET_ERROR) + return (RET_ERROR); + if (nrec > t->bt_nrecs + 1) { + if (F_ISSET(t, R_FIXLEN)) { + if ((tdata.data = + (void *)malloc(t->bt_reclen)) == NULL) + return (RET_ERROR); + tdata.size = t->bt_reclen; + memset(tdata.data, t->bt_bval, tdata.size); + } else { + tdata.data = NULL; + tdata.size = 0; + } + while (nrec > t->bt_nrecs + 1) + if (__rec_iput(t, + t->bt_nrecs, &tdata, 0) != RET_SUCCESS) + return (RET_ERROR); + if (F_ISSET(t, R_FIXLEN)) + free(tdata.data); + } + } + + if ((status = __rec_iput(t, nrec - 1, &fdata, flags)) != RET_SUCCESS) + return (status); + + if (flags == R_SETCURSOR) + t->bt_cursor.rcursor = nrec; + + F_SET(t, R_MODIFIED); + return (__rec_ret(t, NULL, nrec, key, NULL)); +} + +/* + * __REC_IPUT -- Add a recno item to the tree. + * + * Parameters: + * t: tree + * nrec: record number + * data: data + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_iput(t, nrec, data, flags) + BTREE *t; + recno_t nrec; + const DBT *data; + u_int flags; +{ + DBT tdata; + EPG *e; + PAGE *h; + indx_t index, nxtindex; + db_pgno_t pg; + u_int32_t nbytes; + int dflags, status; + char *dest, db[NOVFLSIZE]; + + /* + * If the data won't fit on a page, store it on indirect pages. + * + * XXX + * If the insert fails later on, these pages aren't recovered. + */ + if (data->size > t->bt_ovflsize) { + if (__ovfl_put(t, data, &pg) == RET_ERROR) + return (RET_ERROR); + tdata.data = db; + tdata.size = NOVFLSIZE; + *(db_pgno_t *)db = pg; + *(u_int32_t *)(db + sizeof(db_pgno_t)) = data->size; + dflags = P_BIGDATA; + data = &tdata; + } else + dflags = 0; + + /* __rec_search pins the returned page. */ + if ((e = __rec_search(t, nrec, + nrec > t->bt_nrecs || flags == R_IAFTER || flags == R_IBEFORE ? + SINSERT : SEARCH)) == NULL) + return (RET_ERROR); + + h = e->page; + index = e->index; + + /* + * Add the specified key/data pair to the tree. The R_IAFTER and + * R_IBEFORE flags insert the key after/before the specified key. + * + * Pages are split as required. + */ + switch (flags) { + case R_IAFTER: + ++index; + break; + case R_IBEFORE: + break; + default: + if (nrec < t->bt_nrecs && + __rec_dleaf(t, h, index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + break; + } + + /* + * If not enough room, split the page. The split code will insert + * the key and data and unpin the current page. If inserting into + * the offset array, shift the pointers up. + */ + nbytes = NRLEAFDBT(data->size); + if (h->upper - h->lower < nbytes + sizeof(indx_t)) { + status = __bt_split(t, h, NULL, data, dflags, nbytes, index); + if (status == RET_SUCCESS) + ++t->bt_nrecs; + return (status); + } + + if (index < (nxtindex = NEXTINDEX(h))) + memmove(h->linp + index + 1, h->linp + index, + (nxtindex - index) * sizeof(indx_t)); + h->lower += sizeof(indx_t); + + h->linp[index] = h->upper -= nbytes; + dest = (char *)h + h->upper; + WR_RLEAF(dest, data, dflags); + + ++t->bt_nrecs; + F_SET(t, B_MODIFIED); + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + return (RET_SUCCESS); +} diff --git a/src/util/db2/recno/rec_search.c b/src/util/db2/recno/rec_search.c new file mode 100644 index 0000000000..0be9563b8f --- /dev/null +++ b/src/util/db2/recno/rec_search.c @@ -0,0 +1,126 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_search.c 8.4 (Berkeley) 7/14/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> + +#include "db-int.h" +#include "recno.h" + +/* + * __REC_SEARCH -- Search a btree for a key. + * + * Parameters: + * t: tree to search + * recno: key to find + * op: search operation + * + * Returns: + * EPG for matching record, if any, or the EPG for the location of the + * key, if it were inserted into the tree. + * + * Returns: + * The EPG for matching record, if any, or the EPG for the location + * of the key, if it were inserted into the tree, is entered into + * the bt_cur field of the tree. A pointer to the field is returned. + */ +EPG * +__rec_search(t, recno, op) + BTREE *t; + recno_t recno; + enum SRCHOP op; +{ + register indx_t index; + register PAGE *h; + EPGNO *parent; + RINTERNAL *r; + db_pgno_t pg; + indx_t top; + recno_t total; + int sverrno; + + BT_CLR(t); + for (pg = P_ROOT, total = 0;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + goto err; + if (h->flags & P_RLEAF) { + t->bt_cur.page = h; + t->bt_cur.index = recno - total; + return (&t->bt_cur); + } + for (index = 0, top = NEXTINDEX(h);;) { + r = GETRINTERNAL(h, index); + if (++index == top || total + r->nrecs > recno) + break; + total += r->nrecs; + } + + BT_PUSH(t, pg, index - 1); + + pg = r->pgno; + switch (op) { + case SDELETE: + --GETRINTERNAL(h, (index - 1))->nrecs; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + case SINSERT: + ++GETRINTERNAL(h, (index - 1))->nrecs; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + case SEARCH: + mpool_put(t->bt_mp, h, 0); + break; + } + + } + /* Try and recover the tree. */ +err: sverrno = errno; + if (op != SEARCH) + while ((parent = BT_POP(t)) != NULL) { + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + break; + if (op == SINSERT) + --GETRINTERNAL(h, parent->index)->nrecs; + else + ++GETRINTERNAL(h, parent->index)->nrecs; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + } + errno = sverrno; + return (NULL); +} diff --git a/src/util/db2/recno/rec_seq.c b/src/util/db2/recno/rec_seq.c new file mode 100644 index 0000000000..e150333cb1 --- /dev/null +++ b/src/util/db2/recno/rec_seq.c @@ -0,0 +1,131 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)rec_seq.c 8.3 (Berkeley) 7/14/94"; +#endif /* not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> + +#include "db-int.h" +#include "recno.h" + +/* + * __REC_SEQ -- Recno sequential scan interface. + * + * Parameters: + * dbp: pointer to access method + * key: key for positioning and return value + * data: data return value + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +int +__rec_seq(dbp, key, data, flags) + const DB *dbp; + DBT *key, *data; + u_int flags; +{ + BTREE *t; + EPG *e; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + switch(flags) { + case R_CURSOR: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + break; + case R_NEXT: + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + nrec = t->bt_cursor.rcursor + 1; + break; + } + /* FALLTHROUGH */ + case R_FIRST: + nrec = 1; + break; + case R_PREV: + if (F_ISSET(&t->bt_cursor, CURS_INIT)) { + if ((nrec = t->bt_cursor.rcursor - 1) == 0) + return (RET_SPECIAL); + break; + } + /* FALLTHROUGH */ + case R_LAST: + if (!F_ISSET(t, R_EOF | R_INMEM) && + t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + return (RET_ERROR); + nrec = t->bt_nrecs; + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) { + if (!F_ISSET(t, R_EOF | R_INMEM) && + (status = t->bt_irec(t, nrec)) != RET_SUCCESS) + return (status); + if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) + return (RET_SPECIAL); + } + + if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL) + return (RET_ERROR); + + F_SET(&t->bt_cursor, CURS_INIT); + t->bt_cursor.rcursor = nrec; + + status = __rec_ret(t, e, nrec, key, data); + if (F_ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e->page, 0); + else + t->bt_pinned = e->page; + return (status); +} diff --git a/src/util/db2/recno/rec_utils.c b/src/util/db2/recno/rec_utils.c new file mode 100644 index 0000000000..f757a724f5 --- /dev/null +++ b/src/util/db2/recno/rec_utils.c @@ -0,0 +1,122 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_utils.c 8.6 (Berkeley) 7/16/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "db-int.h" +#include "recno.h" + +/* + * __rec_ret -- + * Build return data. + * + * Parameters: + * t: tree + * e: key/data pair to be returned + * nrec: record number + * key: user's key structure + * data: user's data structure + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__rec_ret(t, e, nrec, key, data) + BTREE *t; + EPG *e; + recno_t nrec; + DBT *key, *data; +{ + RLEAF *rl; + void *p; + + if (key == NULL) + goto dataonly; + + /* We have to copy the key, it's not on the page. */ + if (sizeof(recno_t) > t->bt_rkey.size) { + p = (void *)(t->bt_rkey.data == NULL ? + malloc(sizeof(recno_t)) : + realloc(t->bt_rkey.data, sizeof(recno_t))); + if (p == NULL) + return (RET_ERROR); + t->bt_rkey.data = p; + t->bt_rkey.size = sizeof(recno_t); + } + memmove(t->bt_rkey.data, &nrec, sizeof(recno_t)); + key->size = sizeof(recno_t); + key->data = t->bt_rkey.data; + +dataonly: + if (data == NULL) + return (RET_SUCCESS); + + /* + * We must copy big keys/data to make them contigous. Otherwise, + * leave the page pinned and don't copy unless the user specified + * concurrent access. + */ + rl = GETRLEAF(e->page, e->index); + if (rl->flags & P_BIGDATA) { + if (__ovfl_get(t, rl->bytes, + &data->size, &t->bt_rdata.data, &t->bt_rdata.size)) + return (RET_ERROR); + data->data = t->bt_rdata.data; + } else if (F_ISSET(t, B_DB_LOCK)) { + /* Use +1 in case the first record retrieved is 0 length. */ + if (rl->dsize + 1 > t->bt_rdata.size) { + p = (void *)(t->bt_rdata.data == NULL ? + malloc(rl->dsize + 1) : + realloc(t->bt_rdata.data, rl->dsize + 1)); + if (p == NULL) + return (RET_ERROR); + t->bt_rdata.data = p; + t->bt_rdata.size = rl->dsize + 1; + } + memmove(t->bt_rdata.data, rl->bytes, rl->dsize); + data->size = rl->dsize; + data->data = t->bt_rdata.data; + } else { + data->size = rl->dsize; + data->data = rl->bytes; + } + return (RET_SUCCESS); +} diff --git a/src/util/db2/recno/recno.h b/src/util/db2/recno/recno.h new file mode 100644 index 0000000000..bec772c2fa --- /dev/null +++ b/src/util/db2/recno/recno.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)recno.h 8.1 (Berkeley) 6/4/93 + */ + +enum SRCHOP { SDELETE, SINSERT, SEARCH}; /* Rec_search operation. */ + +#include "../btree/btree.h" +#include "extern.h" diff --git a/src/util/db2/test/Makefile b/src/util/db2/test/Makefile new file mode 100644 index 0000000000..a5dd08ae58 --- /dev/null +++ b/src/util/db2/test/Makefile @@ -0,0 +1,23 @@ +# @(#)Makefile 8.15 (Berkeley) 7/28/94 + +PROG= dbtest +OBJS= dbtest.o strerror.o + +# Uncomment the STAT line get hash and btree statistical use info. This +# also forces ld to load the btree debug functions for use by gdb, which +# is useful. The db library has to be compiled with -DSTATISTICS as well. +INC= -I${PORTDIR}/include -I${PORTDIR} +OORG= -g +#STAT= -DSTATISTICS +CFLAGS= -D__DBINTERFACE_PRIVATE -DDEBUG ${STAT} ${OORG} ${INC} + +dbtest: ${OBJS} ${PORTDIR}/libdb.a + ${CC} -o $@ ${OBJS} ${PORTDIR}/libdb.a + +strerror.o: ${PORTDIR}/clib/strerror.c + ${CC} -c ${PORTDIR}/clib/strerror.c + +clean: + rm -f dbtest.core gmon.out ${OBJS} ${PROG} t1 t2 t3 + +${OBJS}: Makefile diff --git a/src/util/db2/test/README b/src/util/db2/test/README new file mode 100644 index 0000000000..0c0cd13d8f --- /dev/null +++ b/src/util/db2/test/README @@ -0,0 +1,74 @@ +# @(#)README 8.8 (Berkeley) 7/31/94 + +To build this portably, try something like: + + make PORTDIR="../PORT/MACH" + +where MACH is the machine, i.e. "sunos.4.1.1". + +To run the tests, enter "sh run.test". If your system dictionary isn't +in /usr/share/dict/words, edit run.test to reflect the correct place. + +Fairly large files (the command files) are built in this directory during +the test runs, and even larger files (the database files) are created in +"/var/tmp". If the latter directory doesn't exist, set the environmental +variable TMPDIR to a directory where the files can be built. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +The script file consists of lines with an initial character which is +the command for that line, or an initial character indicating a key +or data entry for a previous command. + +Legal command characters are as follows: + +c: compare a record + + must be followed by [kK][dD]; the data value in the database + associated with the specified key is compared to the specified + data value. +e: echo a string + + writes out the rest of the line into the output file; if the + last character is not a carriage-return, a newline is appended. +f: set the flags for the next command + + no value zero's the flags +g: do a get command + + must be followed by [kK] + + writes out the retrieved data DBT. +o [r]: dump [reverse] + + dump the database out, if 'r' is set, in reverse order. +p: do a put command + + must be followed by [kK][dD] +r: do a del command + + must be followed by [kK] unless R_CURSOR flag set. +S: sync the database +s: do a seq command + + must be followed by [kK] if R_CURSOR flag set. + + writes out the retrieved data DBT. + +Legal key/data characters are as follows: + +D [file]: data file + + set the current data value to the contents of the file +d [data]: + + set the current key value to the contents of the line. +K [file]: key file + + set the current key value to the contents of the file +k [data]: + + set the current key value to the contents of the line. + +Blank lines, lines with leading white space, and lines with leading +hash marks (#) are ignored. + +Options to dbtest are as follows: + + -d: Set the DB_LOCK flag. + -f: Use the file argument as the database file. + -i: Use the rest of the argument to set elements in the info + structure. If the type is btree, then "-i cachesize=10240" + will set BTREEINFO.cachesize to 10240. + -o: The rest of the argument is the output file instead of + using stdout. + -s: Don't delete the database file before opening it, i.e. + use the database file from a previous run. + +Dbtest requires two arguments, the type of access "hash", "recno" +or "btree", and the script name or "-" to indicate stdin. diff --git a/src/util/db2/test/SEQ_TEST/data b/src/util/db2/test/SEQ_TEST/data new file mode 100644 index 0000000000..37a518537e --- /dev/null +++ b/src/util/db2/test/SEQ_TEST/data @@ -0,0 +1,8 @@ +A000027875A000135891 +A000059165A000130168 +A000060256A000133490 +A040025906A000136770 +A040027881A000135829 +A040028611A000137873 +A040032413A000056974 +A040050163A000126233 diff --git a/src/util/db2/test/SEQ_TEST/mbox b/src/util/db2/test/SEQ_TEST/mbox new file mode 100644 index 0000000000..9d5d49d073 --- /dev/null +++ b/src/util/db2/test/SEQ_TEST/mbox @@ -0,0 +1,399 @@ +From wiggans@aipl.arsusda.gov Mon Sep 12 11:05:58 1994 +Received: from vangogh.CS.Berkeley.EDU by python.bostic.com (8.6.9.Beta4/2.6) + id OAA16853; Mon, 12 Sep 1994 14:05:42 -0400 +From: wiggans@aipl.arsusda.gov +Received: from hofmann.CS.Berkeley.EDU (hofmann.CS.Berkeley.EDU [128.32.34.35]) by vangogh.CS.Berkeley.EDU (8.7.Alpha.1/8.6.9.Beta0) with ESMTP id LAA15825 for <bostic@vangogh.CS.Berkeley.EDU>; Mon, 12 Sep 1994 11:05:20 -0700 (PDT) +Received: from uu7.psi.com (uu7.psi.com [38.145.204.6]) by hofmann.CS.Berkeley.EDU (8.6.9/8.6.6.Beta11) with SMTP id LAA25681 for <bostic@cs.berkeley.edu>; Mon, 12 Sep 1994 11:05:44 -0700 +Received: from AIPL.ARSUSDA.GOV by uu7.psi.com (5.65b/4.0.071791-PSI/PSINet) via SMTP; + id AA00699 for bostic@cs.berkeley.edu; Mon, 12 Sep 94 14:06:15 -0400 +Received: by aipl.arsusda.gov (AIX 3.2/UCB 5.64/4.03) + id AA14802; Mon, 12 Sep 1994 14:05:48 -0400 +Message-Id: <9409121805.AA14802@aipl.arsusda.gov> +Subject: db 1.85 problem +To: bostic@cs.berkeley.edu (Keith Bostic) +Date: Mon, 12 Sep 1994 14:05:47 -0400 (EDT) +X-Mailer: ELM [version 2.4 PL22] +Content-Type: text +Content-Length: 2553 +Status: RO + +In using the btree option to sequentially read and then write a file, we +are having a problem with 1.85. When compiled with 1.73 there is no +problem. The problem is that the seq call keeps reading the same record. +The code follows: + +/* chkseq.c Check sequential read and write */ + +#include <stdio.h> +#include <sys/stat.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> /* O_CREAT, O_RDWR */ +#include <errno.h> /* Error numbers */ +#include <db.h> + +extern int errno; +extern char *sys_errlist[]; + +typedef struct idst { + char id1[7]; +} id; + +void cvtid(char *, char *); + +void main() { + char anim10[11], datastor[212],keystor[10], *pc; + int i; + long in = 0L; + DB *dbp, *dbpo; + DBT key, data, keyo, datao; + + if ((dbp = dbopen("bullxrf.db", O_RDWR, 0664 + , DB_BTREE, NULL )) == NULL) { + printf("\n Error on dbopen %d %s\n",errno,strerror(errno)); + exit(61); + } + key.size = 7; + keyo.size = 7; + + while (dbp->seq(dbp, &key, &data,R_NEXT) == 0) { + in++; + if (in > 20) break; +/* pc = (char *) key.data; +for (i=0;i<key.size;i++) printf("%02x",pc[i]); printf("\n"); */ + cvtid(anim10,key.data); printf("%s\n",anim10); + memcpy(keystor,key.data,key.size); +/* for (i=0;i<key.size;i++) printf("%02x",keystor[i]); printf("\n"); */ + memcpy(datastor,data.data,data.size); +/* for (i=0;i<8;i++) printf("%02x",datastor[i]); printf("\n"); */ + keyo.data = keystor; + datao.data = datastor; + datao.size = data.size; +/* + if (in % 1000 == 1) { + cvtid(anim10,key.data); printf("%5d %s\n",in,anim10); */ + if (dbp->put(dbp, &keyo, &datao,0) != 0) { + printf("Write failed at %d\n",in); + exit(85); + } +/* } + */ + } + printf("%d Records copied\n",in); + dbp->close(dbp); +} + +I am running on an RS/6000 AIX 3.2.5. The section of the make file +follows: + +# Make file +all: chkseq + +chkseq: chkseq.c + cc -gO3 -lm -o chkseq\ + -L /data6/hash/include/sys/lib -l db -I /data6/hash/include \ + chkseq.c cvtid.o ascii.o +# -L /data12/db.1.85 -l db -I /data12/db.1.85/include \ + +We would appreciate your help. +Thanks, + +-- +George Wiggans I================================================I + |Animal Improvement Programs Laboratory | +Phone: 301-504-8407 |Bldg 263 Beltsville Agricultural Research Center| +FAX: 301-504-8092 |Beltsville, MD 20705-2350 USA | +wiggans@aipl.arsusda.gov | | +=========================I================================================I + +From wiggans@aipl.arsusda.gov Fri Sep 16 20:27:22 1994 +Received: from vangogh.CS.Berkeley.EDU by python.bostic.com (8.6.9.Beta4/2.6) + id XAA09260; Fri, 16 Sep 1994 23:27:09 -0400 +From: wiggans@aipl.arsusda.gov +Received: from hofmann.CS.Berkeley.EDU (hofmann.CS.Berkeley.EDU [128.32.34.35]) by vangogh.CS.Berkeley.EDU (8.7.Alpha.1/8.6.9.Beta0) with ESMTP id UAA25674 for <bostic@vangogh.CS.Berkeley.EDU>; Fri, 16 Sep 1994 20:27:03 -0700 (PDT) +Received: from uu7.psi.com (uu7.psi.com [38.145.204.6]) by hofmann.CS.Berkeley.EDU (8.6.9/8.6.6.Beta11) with SMTP id UAA15043 for <bostic@cs.berkeley.edu>; Fri, 16 Sep 1994 20:27:16 -0700 +Received: from AIPL.ARSUSDA.GOV by uu7.psi.com (5.65b/4.0.071791-PSI/PSINet) via SMTP; + id AA18737 for bostic@cs.berkeley.edu; Fri, 16 Sep 94 23:27:14 -0400 +Received: by aipl.arsusda.gov (AIX 3.2/UCB 5.64/4.03) + id AA10907; Fri, 16 Sep 1994 23:26:18 -0400 +Message-Id: <9409170326.AA10907@aipl.arsusda.gov> +Subject: Test case +To: bostic@cs.berkeley.edu (Keith Bostic) +Date: Fri, 16 Sep 1994 23:26:16 -0400 (EDT) +X-Mailer: ELM [version 2.4 PL22] +Content-Type: text +Content-Length: 3713 +Status: RO + +The following program loads 2 10 character animal ID which are used to +change an animal's ID. After loading, it closes, then opens and +sequentially reads and rewrites the file changing the first character of +the 2nd ID to U. Failure is observed when the update part gets stuck on +the first record, rereading it. The last step displays the updated file. +The name of the data file is a command line argument. + +The data: +A000027875A000135891 +A000059165A000130168 +A000060256A000133490 +A040025906A000136770 +A040027881A000135829 +A040028611A000137873 +A040032413A000056974 +A040050163A000126233 +A040050329A000126177 +A040050411A000119017 +A040050995A000116767 +A040051022A000126669 +A040051276A000127444 +A040051514A000120563 +A040051597A000127287 +A040051627A000127284 +A040051700A000126914 +A040051810A000127286 +A040051964A000118834 +A040052164A000135104 +A040052165A000127688 +A040052186A000126926 +A040052530A000126287 +A040052560A000119160 +A040052892A000125334 +A040053004A000127684 +A040053359A000128628 +A040053378A000137680 +A040053416A000128825 +A040053589A000120369 +A040053620A000128460 +A040053751A000123525 +A040053754A000126736 +A040054191A000126286 +A040054251A000121745 +A040054253A000127848 +A040054596A000130931 +A040054981A000128731 +A040055000A000127689 + +The program: +/* chkseq.c Check sequential read and write */ + +#include <stdio.h> +#include <sys/stat.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> /* O_CREAT, O_RDWR */ +#include <errno.h> /* Error numbers */ +#include <db.h> + +extern int errno; +extern char *sys_errlist[]; + + +void main(int argc, char *argv[]) { + char id1[] = {" "}, id2[] = {" "}; + int i; + long in = 0L, out = 0L; + DB *dbp, *dbpo; + DBT key, data, keyo, datao; + FILE *fopen(), *fin; + + if ((fin = fopen(argv[1],"r")) == NULL) { + printf("Unable to open %s\n",argv[1]); + exit(25); + } + if ((dbp = dbopen("test.db",O_RDWR | O_CREAT, 0664 + , DB_BTREE, NULL )) == NULL) { + printf("\n Open error on test.db %d %s\n",errno,strerror(errno)); + exit(25); + } + + while (fscanf(fin," %10s%10s",id1,id2) > 0) { + key.size = 11; + data.size = 11; + key.data = id1; + data.data = id2; + printf("%10s %10s\n",key.data,data.data); + if (dbp->put(dbp, &key, &data,R_NOOVERWRITE) != 0) { + printf("Error writing output\n"); + } + out++; + } + printf("%d Records in\n",out); + dbp->close(dbp); + + if ((dbp = dbopen("test.db", O_RDWR, 0664 + , DB_BTREE, NULL )) == NULL) { + printf("\n Error on dbopen %d %s\n",errno,strerror(errno)); + exit(61); + } + + while (dbp->seq(dbp, &key, &data,R_NEXT) == 0) { + strcpy(id1,key.data); + keyo.size = 11; + datao.size = 11; + keyo.data = id1; + strcpy(id2,data.data); + id2[0] = 'U'; + datao.data=id2; + printf("%10s %10s\n",key.data,data.data); + in++; + if (in > 50) break; + if (dbp->put(dbp, &keyo, &datao,0) != 0) { + printf("Write failed at %d\n",in); + exit(85); + } + } + printf("%d Records copied\n",in); + in = 0; + dbp->seq(dbp, &key, &data,R_FIRST); + printf("%10s %10s\n",key.data,data.data); + in++; + while (dbp->seq(dbp, &key, &data,R_NEXT) == 0) { + in++; + printf("%10s %10s\n",key.data,data.data); + } + printf("%d Records read\n",in); + dbp->close(dbp); +} + + +-- +George Wiggans I================================================I + |Animal Improvement Programs Laboratory | +Phone: 301-504-8407 |Bldg 263 Beltsville Agricultural Research Center| +FAX: 301-504-8092 |Beltsville, MD 20705-2350 USA | +wiggans@aipl.arsusda.gov | | +=========================I================================================I + +From bostic Fri Sep 23 08:44:56 1994 +To: wiggans@aipl.arsusda.gov /usr/src/local/db/test/SEQ_TEST/mbox +Subject: Re: Test case + + +OK, I've attached a tentative patch for the bug, that appears +to fix it on my local system. Please let me know if you have +any further problems with this. + +There are a couple of issues here. The first, is that to some +extent, the btree code is correct. You aren't replacing the +cursor in your test program, you're adding a new key, which +just happens to be where the cursor was. So, the btree code +is doing you a favor by returning the new key as part of the +cursor walk, and it's not its fault. ;-} + +However, because a put to the cursor record is done using a +delete/add pair, doing it the "right" way will result in the +exact same behavior as you saw doing it "wrong". + +Thinking about this further, there's another bug that's going to +hit eventually -- if you have duplicate records, the current +scheme of doing delete/add to replace the cursor record can result +in a record being returned twice, which is tacky at best, if not +actually wrong. + +I think I may have to revisit how duplicate records are stored. +Which does not make me happy. ;-{ + +--keith + +*** db/btree/bt_seq.c.orig Fri Sep 23 08:35:06 1994 +--- db/btree/bt_seq.c Fri Sep 23 08:34:58 1994 +*************** +*** 35,41 **** + */ + + #if defined(LIBC_SCCS) && !defined(lint) +! static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; + #endif /* LIBC_SCCS and not lint */ + + #include <sys/types.h> +--- 35,41 ---- + */ + + #if defined(LIBC_SCCS) && !defined(lint) +! static char sccsid[] = "@(#)bt_seq.c 8.8 (Berkeley) 9/23/94"; + #endif /* LIBC_SCCS and not lint */ + + #include <sys/types.h> +*************** +*** 246,252 **** + PAGE *h; + indx_t index; + pgno_t pg; +! int exact; + + /* + * There are a couple of states that we can be in. The cursor has +--- 246,252 ---- + PAGE *h; + indx_t index; + pgno_t pg; +! int exact, rval; + + /* + * There are a couple of states that we can be in. The cursor has +*************** +*** 255,269 **** + c = &t->bt_cursor; + + /* +! * The cursor was deleted where there weren't any duplicate records, +! * so the key was saved. Find out where that key would go in the +! * current tree. It doesn't matter if the returned key is an exact +! * match or not -- if it's an exact match, the record was added after +! * the delete so we can just return it. If not, as long as there's +! * a record there, return it. + */ +! if (F_ISSET(c, CURS_ACQUIRE)) +! return (__bt_first(t, &c->key, ep, &exact)); + + /* Get the page referenced by the cursor. */ + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) +--- 255,299 ---- + c = &t->bt_cursor; + + /* +! * The cursor was deleted and there weren't any duplicate records, +! * so the cursor's key was saved. Find out where that key would +! * be in the current tree. If the returned key is an exact match, +! * it means that a key/data pair was inserted into the tree after +! * the delete. We could reasonably return the key, but the problem +! * is that this is the access pattern we'll see if the user is +! * doing seq(..., R_NEXT)/put(..., 0) pairs, i.e. the put deletes +! * the cursor record and then replaces it, so the cursor was saved, +! * and we'll simply return the same "new" record until the user +! * notices and doesn't do a put() of it. Since the key is an exact +! * match, we could as easily put the new record before the cursor, +! * and we've made no guarantee to return it. So, move forward or +! * back a record if it's an exact match. +! * +! * XXX +! * In the current implementation, put's to the cursor are done with +! * delete/add pairs. This has two consequences. First, it means +! * that seq(..., R_NEXT)/put(..., R_CURSOR) pairs are going to exhibit +! * the same behavior as above. Second, you can return the same key +! * twice if you have duplicate records. The scenario is that the +! * cursor record is deleted, moving the cursor forward or backward +! * to a duplicate. The add then inserts the new record at a location +! * ahead of the cursor because duplicates aren't sorted in any way, +! * and the new record is later returned. This has to be fixed at some +! * point. + */ +! if (F_ISSET(c, CURS_ACQUIRE)) { +! if (rval = __bt_first(t, &c->key, ep, &exact)) +! return (RET_ERROR); +! if (!exact) +! return (rval); +! /* +! * XXX +! * Kluge -- get, release, get the page. +! */ +! c->pg.pgno = ep->page->pgno; +! c->pg.index = ep->index; +! mpool_put(t->bt_mp, ep->page, 0); +! } + + /* Get the page referenced by the cursor. */ + if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) + + + diff --git a/src/util/db2/test/SEQ_TEST/t.c b/src/util/db2/test/SEQ_TEST/t.c new file mode 100644 index 0000000000..ac0fef71ec --- /dev/null +++ b/src/util/db2/test/SEQ_TEST/t.c @@ -0,0 +1,86 @@ +/* chkseq.c Check sequential read and write */ + +#include <sys/stat.h> +#include "db-int.h" +#include <errno.h> /* Error numbers */ +#include <fcntl.h> /* O_CREAT, O_RDWR */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +extern int errno; + +void main(int argc, char *argv[]) { + char id1[] = {" "}, id2[] = {" "}; + int i; + long in = 0L, out = 0L; + DB *dbp, *dbpo; + DBT key, data, keyo, datao; + FILE *fopen(), *fin; + + unlink("test.db"); + if ((fin = fopen("data","r")) == NULL) { + printf("Unable to open %s\n","data"); + exit(25); + } + if ((dbp = dbopen("test.db",O_RDWR | O_CREAT, 0664 + , DB_BTREE, NULL )) == NULL) { + printf("\n Open error on test.db %d %s\n",errno,strerror(errno)); + exit(25); + } + + while (fscanf(fin," %10s%10s",id1,id2) > 0) { + key.size = 11; + data.size = 11; + key.data = id1; + data.data = id2; + printf("%10s %10s\n",key.data,data.data); + if (dbp->put(dbp, &key, &data,R_NOOVERWRITE) != 0) { + printf("Error writing output\n"); + } + out++; + } + printf("%d Records in\n",out); + dbp->close(dbp); + + if ((dbp = dbopen("test.db", O_RDWR, 0664 + , DB_BTREE, NULL )) == NULL) { + printf("\n Error on dbopen %d %s\n",errno,strerror(errno)); + exit(61); + } + + while (dbp->seq(dbp, &key, &data,R_NEXT) == 0) { + strcpy(id1,key.data); + keyo.size = 11; + datao.size = 11; + keyo.data = id1; + strcpy(id2,data.data); + id2[0] = 'U'; + datao.data=id2; + printf("%10s %10s\n",key.data,data.data); + in++; + if (in > 10) break; +#ifdef notdef + if (dbp->put(dbp, &keyo, &datao,0) != 0) { + printf("Write failed at %d\n",in); + exit(85); + } +#else + if (dbp->put(dbp, &keyo, &datao,R_CURSOR) != 0) { + printf("Write failed at %d\n",in); + exit(85); + } +#endif + } + printf("%d Records copied\n",in); + in = 0; + dbp->seq(dbp, &key, &data,R_FIRST); + printf("%10s %10s\n",key.data,data.data); + in++; + while (dbp->seq(dbp, &key, &data,R_NEXT) == 0) { + in++; + printf("%10s %10s\n",key.data,data.data); + } + printf("%d Records read\n",in); + dbp->close(dbp); +} diff --git a/src/util/db2/test/btree.tests/main.c b/src/util/db2/test/btree.tests/main.c new file mode 100644 index 0000000000..02f1b96286 --- /dev/null +++ b/src/util/db2/test/btree.tests/main.c @@ -0,0 +1,765 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)main.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> +#include <fcntl.h> +#include "db-int.h" +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include "btree.h" + +typedef struct cmd_table { + char *cmd; + int nargs; + int rconv; + void (*func) __P((DB *, char **)); + char *usage, *descrip; +} cmd_table; + +int stopstop; +DB *globaldb; + +void append __P((DB *, char **)); +void bstat __P((DB *, char **)); +void cursor __P((DB *, char **)); +void delcur __P((DB *, char **)); +void delete __P((DB *, char **)); +void dump __P((DB *, char **)); +void first __P((DB *, char **)); +void get __P((DB *, char **)); +void help __P((DB *, char **)); +void iafter __P((DB *, char **)); +void ibefore __P((DB *, char **)); +void icursor __P((DB *, char **)); +void insert __P((DB *, char **)); +void keydata __P((DBT *, DBT *)); +void last __P((DB *, char **)); +void list __P((DB *, char **)); +void load __P((DB *, char **)); +void mstat __P((DB *, char **)); +void next __P((DB *, char **)); +int parse __P((char *, char **, int)); +void previous __P((DB *, char **)); +void show __P((DB *, char **)); +void usage __P((void)); +void user __P((DB *)); + +cmd_table commands[] = { + "?", 0, 0, help, "help", NULL, + "a", 2, 1, append, "append key def", "append key with data def", + "b", 0, 0, bstat, "bstat", "stat btree", + "c", 1, 1, cursor, "cursor word", "move cursor to word", + "delc", 0, 0, delcur, "delcur", "delete key the cursor references", + "dele", 1, 1, delete, "delete word", "delete word", + "d", 0, 0, dump, "dump", "dump database", + "f", 0, 0, first, "first", "move cursor to first record", + "g", 1, 1, get, "get key", "locate key", + "h", 0, 0, help, "help", "print command summary", + "ia", 2, 1, iafter, "iafter key data", "insert data after key", + "ib", 2, 1, ibefore, "ibefore key data", "insert data before key", + "ic", 2, 1, icursor, "icursor key data", "replace cursor", + "in", 2, 1, insert, "insert key def", "insert key with data def", + "la", 0, 0, last, "last", "move cursor to last record", + "li", 1, 1, list, "list file", "list to a file", + "loa", 1, 0, load, "load file", NULL, + "loc", 1, 1, get, "get key", NULL, + "m", 0, 0, mstat, "mstat", "stat memory pool", + "n", 0, 0, next, "next", "move cursor forward one record", + "p", 0, 0, previous, "previous", "move cursor back one record", + "q", 0, 0, NULL, "quit", "quit", + "sh", 1, 0, show, "show page", "dump a page", + { NULL }, +}; + +int recno; /* use record numbers */ +char *dict = "words"; /* default dictionary */ +char *progname; + +int +main(argc, argv) + int argc; + char **argv; +{ + int c; + DB *db; + BTREEINFO b; + + progname = *argv; + + b.flags = 0; + b.cachesize = 0; + b.maxkeypage = 0; + b.minkeypage = 0; + b.psize = 0; + b.compare = NULL; + b.prefix = NULL; + b.lorder = 0; + + while ((c = getopt(argc, argv, "bc:di:lp:ru")) != EOF) { + switch (c) { + case 'b': + b.lorder = BIG_ENDIAN; + break; + case 'c': + b.cachesize = atoi(optarg); + break; + case 'd': + b.flags |= R_DUP; + break; + case 'i': + dict = optarg; + break; + case 'l': + b.lorder = LITTLE_ENDIAN; + break; + case 'p': + b.psize = atoi(optarg); + break; + case 'r': + recno = 1; + break; + case 'u': + b.flags = 0; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (recno) + db = dbopen(*argv == NULL ? NULL : *argv, O_RDWR, + 0, DB_RECNO, NULL); + else + db = dbopen(*argv == NULL ? NULL : *argv, O_CREAT|O_RDWR, + 0600, DB_BTREE, &b); + + if (db == NULL) { + (void)fprintf(stderr, "dbopen: %s\n", strerror(errno)); + exit(1); + } + globaldb = db; + user(db); + exit(0); + /* NOTREACHED */ +} + +void +user(db) + DB *db; +{ + FILE *ifp; + int argc, i, last; + char *lbuf, *argv[4], buf[512]; + + if ((ifp = fopen("/dev/tty", "r")) == NULL) { + (void)fprintf(stderr, + "/dev/tty: %s\n", strerror(errno)); + exit(1); + } + for (last = 0;;) { + (void)printf("> "); + (void)fflush(stdout); + if ((lbuf = fgets(&buf[0], 512, ifp)) == NULL) + break; + if (lbuf[0] == '\n') { + i = last; + goto uselast; + } + lbuf[strlen(lbuf) - 1] = '\0'; + + if (lbuf[0] == 'q') + break; + + argc = parse(lbuf, &argv[0], 3); + if (argc == 0) + continue; + + for (i = 0; commands[i].cmd != NULL; i++) + if (strncmp(commands[i].cmd, argv[0], + strlen(commands[i].cmd)) == 0) + break; + + if (commands[i].cmd == NULL) { + (void)fprintf(stderr, + "%s: command unknown ('help' for help)\n", lbuf); + continue; + } + + if (commands[i].nargs != argc - 1) { + (void)fprintf(stderr, "usage: %s\n", commands[i].usage); + continue; + } + + if (recno && commands[i].rconv) { + static recno_t nlong; + nlong = atoi(argv[1]); + argv[1] = (char *)&nlong; + } +uselast: last = i; + (*commands[i].func)(db, argv); + } + if ((db->sync)(db) == RET_ERROR) + perror("dbsync"); + else if ((db->close)(db) == RET_ERROR) + perror("dbclose"); +} + +int +parse(lbuf, argv, maxargc) + char *lbuf, **argv; + int maxargc; +{ + int argc = 0; + char *c; + + c = lbuf; + while (isspace(*c)) + c++; + while (*c != '\0' && argc < maxargc) { + *argv++ = c; + argc++; + while (!isspace(*c) && *c != '\0') { + c++; + } + while (isspace(*c)) + *c++ = '\0'; + } + return (argc); +} + +void +append(db, argv) + DB *db; + char **argv; +{ + DBT key, data; + int status; + + if (!recno) { + (void)fprintf(stderr, + "append only available for recno db's.\n"); + return; + } + key.data = argv[1]; + key.size = sizeof(recno_t); + data.data = argv[2]; + data.size = strlen(data.data); + status = (db->put)(db, &key, &data, R_APPEND); + switch (status) { + case RET_ERROR: + perror("append/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +cursor(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + status = (*db->seq)(db, &key, &data, R_CURSOR); + switch (status) { + case RET_ERROR: + perror("cursor/seq"); + break; + case RET_SPECIAL: + (void)printf("key not found\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +delcur(db, argv) + DB *db; + char **argv; +{ + int status; + + status = (*db->del)(db, NULL, R_CURSOR); + + if (status == RET_ERROR) + perror("delcur/del"); +} + +void +delete(db, argv) + DB *db; + char **argv; +{ + DBT key; + int status; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + + status = (*db->del)(db, &key, 0); + switch (status) { + case RET_ERROR: + perror("delete/del"); + break; + case RET_SPECIAL: + (void)printf("key not found\n"); + break; + case RET_SUCCESS: + break; + } +} + +void +dump(db, argv) + DB *db; + char **argv; +{ + __bt_dump(db); +} + +void +first(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_FIRST); + + switch (status) { + case RET_ERROR: + perror("first/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +get(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + + status = (*db->get)(db, &key, &data, 0); + + switch (status) { + case RET_ERROR: + perror("get/get"); + break; + case RET_SPECIAL: + (void)printf("key not found\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +help(db, argv) + DB *db; + char **argv; +{ + int i; + + for (i = 0; commands[i].cmd; i++) + if (commands[i].descrip) + (void)printf("%s: %s\n", + commands[i].usage, commands[i].descrip); +} + +void +iafter(db, argv) + DB *db; + char **argv; +{ + DBT key, data; + int status; + + if (!recno) { + (void)fprintf(stderr, + "iafter only available for recno db's.\n"); + return; + } + key.data = argv[1]; + key.size = sizeof(recno_t); + data.data = argv[2]; + data.size = strlen(data.data); + status = (db->put)(db, &key, &data, R_IAFTER); + switch (status) { + case RET_ERROR: + perror("iafter/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +ibefore(db, argv) + DB *db; + char **argv; +{ + DBT key, data; + int status; + + if (!recno) { + (void)fprintf(stderr, + "ibefore only available for recno db's.\n"); + return; + } + key.data = argv[1]; + key.size = sizeof(recno_t); + data.data = argv[2]; + data.size = strlen(data.data); + status = (db->put)(db, &key, &data, R_IBEFORE); + switch (status) { + case RET_ERROR: + perror("ibefore/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +icursor(db, argv) + DB *db; + char **argv; +{ + int status; + DBT data, key; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + data.data = argv[2]; + data.size = strlen(argv[2]) + 1; + + status = (*db->put)(db, &key, &data, R_CURSOR); + switch (status) { + case RET_ERROR: + perror("icursor/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +insert(db, argv) + DB *db; + char **argv; +{ + int status; + DBT data, key; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + data.data = argv[2]; + data.size = strlen(argv[2]) + 1; + + status = (*db->put)(db, &key, &data, R_NOOVERWRITE); + switch (status) { + case RET_ERROR: + perror("insert/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +last(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_LAST); + + switch (status) { + case RET_ERROR: + perror("last/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +list(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + FILE *fp; + int status; + + if ((fp = fopen(argv[1], "w")) == NULL) { + (void)fprintf(stderr, "%s: %s\n", argv[1], strerror(errno)); + return; + } + status = (*db->seq)(db, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + (void)fprintf(fp, "%s\n", key.data); + status = (*db->seq)(db, &key, &data, R_NEXT); + } + if (status == RET_ERROR) + perror("list/seq"); +} + +DB *BUGdb; +void +load(db, argv) + DB *db; + char **argv; +{ + register char *p, *t; + FILE *fp; + DBT data, key; + recno_t cnt; + size_t len; + int status; + char *lp, buf[16 * 1024]; + + BUGdb = db; + if ((fp = fopen(argv[1], "r")) == NULL) { + (void)fprintf(stderr, "%s: %s\n", argv[1], strerror(errno)); + return; + } + (void)printf("loading %s...\n", argv[1]); + + for (cnt = 1; (lp = fgetline(fp, &len)) != NULL; ++cnt) { + if (recno) { + key.data = &cnt; + key.size = sizeof(recno_t); + data.data = lp; + data.size = len + 1; + } else { + key.data = lp; + key.size = len + 1; + for (p = lp + len - 1, t = buf; p >= lp; *t++ = *p--); + *t = '\0'; + data.data = buf; + data.size = len + 1; + } + + status = (*db->put)(db, &key, &data, R_NOOVERWRITE); + switch (status) { + case RET_ERROR: + perror("load/put"); + exit(1); + case RET_SPECIAL: + if (recno) + (void)fprintf(stderr, + "duplicate: %ld {%s}\n", cnt, data.data); + else + (void)fprintf(stderr, + "duplicate: %ld {%s}\n", cnt, key.data); + exit(1); + case RET_SUCCESS: + break; + } + } + (void)fclose(fp); +} + +void +next(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_NEXT); + + switch (status) { + case RET_ERROR: + perror("next/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +previous(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_PREV); + + switch (status) { + case RET_ERROR: + perror("previous/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +show(db, argv) + DB *db; + char **argv; +{ + BTREE *t; + PAGE *h; + db_pgno_t pg; + + pg = atoi(argv[1]); + t = db->internal; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { + (void)printf("getpage of %ld failed\n", pg); + return; + } + if (pg == 0) + __bt_dmpage(h); + else + __bt_dpage(h); + mpool_put(t->bt_mp, h, 0); +} + +void +bstat(db, argv) + DB *db; + char **argv; +{ + (void)printf("BTREE\n"); + __bt_stat(db); +} + +void +mstat(db, argv) + DB *db; + char **argv; +{ + (void)printf("MPOOL\n"); + mpool_stat(((BTREE *)db->internal)->bt_mp); +} + +void +keydata(key, data) + DBT *key, *data; +{ + if (!recno && key->size > 0) + (void)printf("%s/", key->data); + if (data->size > 0) + (void)printf("%s", data->data); + (void)printf("\n"); +} + +void +usage() +{ + (void)fprintf(stderr, + "usage: %s [-bdlu] [-c cache] [-i file] [-p page] [file]\n", + progname); + exit (1); +} diff --git a/src/util/db2/test/dbtest.c b/src/util/db2/test/dbtest.c new file mode 100644 index 0000000000..f49228165b --- /dev/null +++ b/src/util/db2/test/dbtest.c @@ -0,0 +1,753 @@ +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1992, 1993, 1994\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)dbtest.c 8.17 (Berkeley) 9/1/94"; +#endif /* not lint */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "db-int.h" + +enum S { COMMAND, COMPARE, GET, PUT, REMOVE, SEQ, SEQFLAG, KEY, DATA }; + +void compare __P((DBT *, DBT *)); +DBTYPE dbtype __P((char *)); +void dump __P((DB *, int)); +void err __P((const char *, ...)); +void get __P((DB *, DBT *)); +void getdata __P((DB *, DBT *, DBT *)); +void put __P((DB *, DBT *, DBT *)); +void rem __P((DB *, DBT *)); +char *sflags __P((int)); +void synk __P((DB *)); +void *rfile __P((char *, size_t *)); +void seq __P((DB *, DBT *)); +u_int setflags __P((char *)); +void *setinfo __P((DBTYPE, char *)); +void usage __P((void)); +void *xmalloc __P((char *, size_t)); + +DBTYPE type; /* Database type. */ +void *infop; /* Iflags. */ +u_long lineno; /* Current line in test script. */ +u_int flags; /* Current DB flags. */ +int ofd = STDOUT_FILENO; /* Standard output fd. */ + +DB *XXdbp; /* Global for gdb. */ +int XXlineno; /* Fast breakpoint for gdb. */ + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern int optind; + extern char *optarg; + enum S command, state; + DB *dbp; + DBT data, key, keydata; + size_t len; + int ch, oflags, sflag; + char *fname, *infoarg, *p, *t, buf[8 * 1024]; + + infoarg = NULL; + fname = NULL; + oflags = O_CREAT | O_RDWR; + sflag = 0; + while ((ch = getopt(argc, argv, "f:i:lo:s")) != EOF) + switch (ch) { + case 'f': + fname = optarg; + break; + case 'i': + infoarg = optarg; + break; + case 'l': + oflags |= DB_LOCK; + break; + case 'o': + if ((ofd = open(optarg, + O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) + err("%s: %s", optarg, strerror(errno)); + break; + case 's': + sflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (argc != 2) + usage(); + + /* Set the type. */ + type = dbtype(*argv++); + + /* Open the descriptor file. */ + if (strcmp(*argv, "-") && freopen(*argv, "r", stdin) == NULL) + err("%s: %s", *argv, strerror(errno)); + + /* Set up the db structure as necessary. */ + if (infoarg == NULL) + infop = NULL; + else + for (p = strtok(infoarg, ",\t "); p != NULL; + p = strtok(0, ",\t ")) + if (*p != '\0') + infop = setinfo(type, p); + + /* + * Open the DB. Delete any preexisting copy, you almost never + * want it around, and it often screws up tests. + */ + if (fname == NULL) { + p = getenv("TMPDIR"); + if (p == NULL) + p = "/var/tmp"; + (void)sprintf(buf, "%s/__dbtest", p); + fname = buf; + (void)unlink(buf); + } else if (!sflag) + (void)unlink(fname); + + if ((dbp = dbopen(fname, + oflags, S_IRUSR | S_IWUSR, type, infop)) == NULL) + err("dbopen: %s", strerror(errno)); + XXdbp = dbp; + + state = COMMAND; + for (lineno = 1; + (p = fgets(buf, sizeof(buf), stdin)) != NULL; ++lineno) { + /* Delete the newline, displaying the key/data is easier. */ + if (ofd == STDOUT_FILENO && (t = strchr(p, '\n')) != NULL) + *t = '\0'; + if ((len = strlen(buf)) == 0 || isspace(*p) || *p == '#') + continue; + + /* Convenient gdb break point. */ + if (XXlineno == lineno) + XXlineno = 1; + switch (*p) { + case 'c': /* compare */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = COMPARE; + break; + case 'e': /* echo */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + /* Don't display the newline, if CR at EOL. */ + if (p[len - 2] == '\r') + --len; + if (write(ofd, p + 1, len - 1) != len - 1 || + write(ofd, "\n", 1) != 1) + err("write: %s", strerror(errno)); + break; + case 'g': /* get */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = GET; + break; + case 'p': /* put */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = PUT; + break; + case 'r': /* remove */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + if (flags == R_CURSOR) { + rem(dbp, &key); + state = COMMAND; + } else { + state = KEY; + command = REMOVE; + } + break; + case 'S': /* sync */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + synk(dbp); + state = COMMAND; + break; + case 's': /* seq */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + if (flags == R_CURSOR) { + state = KEY; + command = SEQ; + } else + seq(dbp, &key); + break; + case 'f': + flags = setflags(p + 1); + break; + case 'D': /* data file */ + if (state != DATA) + err("line %lu: not expecting data", lineno); + data.data = rfile(p + 1, &data.size); + goto ldata; + case 'd': /* data */ + if (state != DATA) + err("line %lu: not expecting data", lineno); + data.data = xmalloc(p + 1, len - 1); + data.size = len - 1; +ldata: switch (command) { + case COMPARE: + compare(&keydata, &data); + break; + case PUT: + put(dbp, &key, &data); + break; + default: + err("line %lu: command doesn't take data", + lineno); + } + if (type != DB_RECNO) + free(key.data); + free(data.data); + state = COMMAND; + break; + case 'K': /* key file */ + if (state != KEY) + err("line %lu: not expecting a key", lineno); + if (type == DB_RECNO) + err("line %lu: 'K' not available for recno", + lineno); + key.data = rfile(p + 1, &key.size); + goto lkey; + case 'k': /* key */ + if (state != KEY) + err("line %lu: not expecting a key", lineno); + if (type == DB_RECNO) { + static recno_t recno; + recno = atoi(p + 1); + key.data = &recno; + key.size = sizeof(recno); + } else { + key.data = xmalloc(p + 1, len - 1); + key.size = len - 1; + } +lkey: switch (command) { + case COMPARE: + getdata(dbp, &key, &keydata); + state = DATA; + break; + case GET: + get(dbp, &key); + if (type != DB_RECNO) + free(key.data); + state = COMMAND; + break; + case PUT: + state = DATA; + break; + case REMOVE: + rem(dbp, &key); + if ((type != DB_RECNO) && (flags != R_CURSOR)) + free(key.data); + state = COMMAND; + break; + case SEQ: + seq(dbp, &key); + if ((type != DB_RECNO) && (flags != R_CURSOR)) + free(key.data); + state = COMMAND; + break; + default: + err("line %lu: command doesn't take a key", + lineno); + } + break; + case 'o': + dump(dbp, p[1] == 'r'); + break; + default: + err("line %lu: %s: unknown command character", + lineno, p); + } + } +#ifdef STATISTICS + /* + * -l must be used (DB_LOCK must be set) for this to be + * used, otherwise a page will be locked and it will fail. + */ + if (type == DB_BTREE && oflags & DB_LOCK) + __bt_stat(dbp); +#endif + if (dbp->close(dbp)) + err("db->close: %s", strerror(errno)); + (void)close(ofd); + exit(0); +} + +#define NOOVERWRITE "put failed, would overwrite key\n" + +void +compare(db1, db2) + DBT *db1, *db2; +{ + register size_t len; + register u_char *p1, *p2; + + if (db1->size != db2->size) + printf("compare failed: key->data len %lu != data len %lu\n", + db1->size, db2->size); + + len = MIN(db1->size, db2->size); + for (p1 = db1->data, p2 = db2->data; len--;) + if (*p1++ != *p2++) { + printf("compare failed at offset %d\n", + p1 - (u_char *)db1->data); + break; + } +} + +void +get(dbp, kp) + DB *dbp; + DBT *kp; +{ + DBT data; + + switch (dbp->get(dbp, kp, &data, flags)) { + case 0: + (void)write(ofd, data.data, data.size); + if (ofd == STDOUT_FILENO) + (void)write(ofd, "\n", 1); + break; + case -1: + err("line %lu: get: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: +#define NOSUCHKEY "get failed, no such key\n" + if (ofd != STDOUT_FILENO) + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + else + (void)fprintf(stderr, "%d: %.*s: %s", + lineno, MIN(kp->size, 20), kp->data, NOSUCHKEY); +#undef NOSUCHKEY + break; + } +} + +void +getdata(dbp, kp, dp) + DB *dbp; + DBT *kp, *dp; +{ + switch (dbp->get(dbp, kp, dp, flags)) { + case 0: + return; + case -1: + err("line %lu: getdata: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + err("line %lu: getdata failed, no such key", lineno); + /* NOTREACHED */ + } +} + +void +put(dbp, kp, dp) + DB *dbp; + DBT *kp, *dp; +{ + switch (dbp->put(dbp, kp, dp, flags)) { + case 0: + break; + case -1: + err("line %lu: put: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + (void)write(ofd, NOOVERWRITE, sizeof(NOOVERWRITE) - 1); + break; + } +} + +void +rem(dbp, kp) + DB *dbp; + DBT *kp; +{ + switch (dbp->del(dbp, kp, flags)) { + case 0: + break; + case -1: + err("line %lu: rem: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: +#define NOSUCHKEY "rem failed, no such key\n" + if (ofd != STDOUT_FILENO) + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + else if (flags != R_CURSOR) + (void)fprintf(stderr, "%d: %.*s: %s", + lineno, MIN(kp->size, 20), kp->data, NOSUCHKEY); + else + (void)fprintf(stderr, + "%d: rem of cursor failed\n", lineno); +#undef NOSUCHKEY + break; + } +} + +void +synk(dbp) + DB *dbp; +{ + switch (dbp->sync(dbp, flags)) { + case 0: + break; + case -1: + err("line %lu: synk: %s", lineno, strerror(errno)); + /* NOTREACHED */ + } +} + +void +seq(dbp, kp) + DB *dbp; + DBT *kp; +{ + DBT data; + + switch (dbp->seq(dbp, kp, &data, flags)) { + case 0: + (void)write(ofd, data.data, data.size); + if (ofd == STDOUT_FILENO) + (void)write(ofd, "\n", 1); + break; + case -1: + err("line %lu: seq: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: +#define NOSUCHKEY "seq failed, no such key\n" + if (ofd != STDOUT_FILENO) + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + else if (flags == R_CURSOR) + (void)fprintf(stderr, "%d: %.*s: %s", + lineno, MIN(kp->size, 20), kp->data, NOSUCHKEY); + else + (void)fprintf(stderr, + "%d: seq (%s) failed\n", lineno, sflags(flags)); +#undef NOSUCHKEY + break; + } +} + +void +dump(dbp, rev) + DB *dbp; + int rev; +{ + DBT key, data; + int flags, nflags; + + if (rev) { + flags = R_LAST; + nflags = R_PREV; + } else { + flags = R_FIRST; + nflags = R_NEXT; + } + for (;; flags = nflags) + switch (dbp->seq(dbp, &key, &data, flags)) { + case 0: + (void)write(ofd, data.data, data.size); + if (ofd == STDOUT_FILENO) + (void)write(ofd, "\n", 1); + break; + case 1: + goto done; + case -1: + err("line %lu: (dump) seq: %s", + lineno, strerror(errno)); + /* NOTREACHED */ + } +done: return; +} + +u_int +setflags(s) + char *s; +{ + char *p; + + for (; isspace(*s); ++s); + if (*s == '\n' || *s == '\0') + return (0); + if ((p = strchr(s, '\n')) != NULL) + *p = '\0'; + if (!strcmp(s, "R_CURSOR")) return (R_CURSOR); + if (!strcmp(s, "R_FIRST")) return (R_FIRST); + if (!strcmp(s, "R_IAFTER")) return (R_IAFTER); + if (!strcmp(s, "R_IBEFORE")) return (R_IBEFORE); + if (!strcmp(s, "R_LAST")) return (R_LAST); + if (!strcmp(s, "R_NEXT")) return (R_NEXT); + if (!strcmp(s, "R_NOOVERWRITE")) return (R_NOOVERWRITE); + if (!strcmp(s, "R_PREV")) return (R_PREV); + if (!strcmp(s, "R_SETCURSOR")) return (R_SETCURSOR); + + err("line %lu: %s: unknown flag", lineno, s); + /* NOTREACHED */ +} + +char * +sflags(flags) + int flags; +{ + switch (flags) { + case R_CURSOR: return ("R_CURSOR"); + case R_FIRST: return ("R_FIRST"); + case R_IAFTER: return ("R_IAFTER"); + case R_IBEFORE: return ("R_IBEFORE"); + case R_LAST: return ("R_LAST"); + case R_NEXT: return ("R_NEXT"); + case R_NOOVERWRITE: return ("R_NOOVERWRITE"); + case R_PREV: return ("R_PREV"); + case R_SETCURSOR: return ("R_SETCURSOR"); + } + + return ("UNKNOWN!"); +} + +DBTYPE +dbtype(s) + char *s; +{ + if (!strcmp(s, "btree")) + return (DB_BTREE); + if (!strcmp(s, "hash")) + return (DB_HASH); + if (!strcmp(s, "recno")) + return (DB_RECNO); + err("%s: unknown type (use btree, hash or recno)", s); + /* NOTREACHED */ +} + +void * +setinfo(type, s) + DBTYPE type; + char *s; +{ + static BTREEINFO ib; + static HASHINFO ih; + static RECNOINFO rh; + char *eq; + + if ((eq = strchr(s, '=')) == NULL) + err("%s: illegal structure set statement", s); + *eq++ = '\0'; + if (!isdigit(*eq)) + err("%s: structure set statement must be a number", s); + + switch (type) { + case DB_BTREE: + if (!strcmp("flags", s)) { + ib.flags = atoi(eq); + return (&ib); + } + if (!strcmp("cachesize", s)) { + ib.cachesize = atoi(eq); + return (&ib); + } + if (!strcmp("maxkeypage", s)) { + ib.maxkeypage = atoi(eq); + return (&ib); + } + if (!strcmp("minkeypage", s)) { + ib.minkeypage = atoi(eq); + return (&ib); + } + if (!strcmp("lorder", s)) { + ib.lorder = atoi(eq); + return (&ib); + } + if (!strcmp("psize", s)) { + ib.psize = atoi(eq); + return (&ib); + } + break; + case DB_HASH: + if (!strcmp("bsize", s)) { + ih.bsize = atoi(eq); + return (&ih); + } + if (!strcmp("ffactor", s)) { + ih.ffactor = atoi(eq); + return (&ih); + } + if (!strcmp("nelem", s)) { + ih.nelem = atoi(eq); + return (&ih); + } + if (!strcmp("cachesize", s)) { + ih.cachesize = atoi(eq); + return (&ih); + } + if (!strcmp("lorder", s)) { + ih.lorder = atoi(eq); + return (&ih); + } + break; + case DB_RECNO: + if (!strcmp("flags", s)) { + rh.flags = atoi(eq); + return (&rh); + } + if (!strcmp("cachesize", s)) { + rh.cachesize = atoi(eq); + return (&rh); + } + if (!strcmp("lorder", s)) { + rh.lorder = atoi(eq); + return (&rh); + } + if (!strcmp("reclen", s)) { + rh.reclen = atoi(eq); + return (&rh); + } + if (!strcmp("bval", s)) { + rh.bval = atoi(eq); + return (&rh); + } + if (!strcmp("psize", s)) { + rh.psize = atoi(eq); + return (&rh); + } + break; + } + err("%s: unknown structure value", s); + /* NOTREACHED */ +} + +void * +rfile(name, lenp) + char *name; + size_t *lenp; +{ + struct stat sb; + void *p; + int fd; + char *np; + + for (; isspace(*name); ++name); + if ((np = strchr(name, '\n')) != NULL) + *np = '\0'; + if ((fd = open(name, O_RDONLY, 0)) < 0 || + fstat(fd, &sb)) + err("%s: %s\n", name, strerror(errno)); +#ifdef NOT_PORTABLE + if (sb.st_size > (off_t)SIZE_T_MAX) + err("%s: %s\n", name, strerror(E2BIG)); +#endif + if ((p = (void *)malloc((u_int)sb.st_size)) == NULL) + err("%s", strerror(errno)); + (void)read(fd, p, (int)sb.st_size); + *lenp = sb.st_size; + (void)close(fd); + return (p); +} + +void * +xmalloc(text, len) + char *text; + size_t len; +{ + void *p; + + if ((p = (void *)malloc(len)) == NULL) + err("%s", strerror(errno)); + memmove(p, text, len); + return (p); +} + +void +usage() +{ + (void)fprintf(stderr, + "usage: dbtest [-l] [-f file] [-i info] [-o file] type script\n"); + exit(1); +} + +#if __STDC__ +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +void +#if __STDC__ +err(const char *fmt, ...) +#else +err(fmt, va_alist) + char *fmt; + va_dcl +#endif +{ + va_list ap; +#if __STDC__ + va_start(ap, fmt); +#else + va_start(ap); +#endif + (void)fprintf(stderr, "dbtest: "); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fprintf(stderr, "\n"); + exit(1); + /* NOTREACHED */ +} diff --git a/src/util/db2/test/hash1.tests/Makefile b/src/util/db2/test/hash1.tests/Makefile new file mode 100644 index 0000000000..348a8f818b --- /dev/null +++ b/src/util/db2/test/hash1.tests/Makefile @@ -0,0 +1,43 @@ +# @(#)Makefile 8.16 (Berkeley) 11/20/95 + +ALL = driver2 tcreat3 tdel thash4 tread2 tseq tverify +OBJ = driver2.o tcreat3.o tdel.o thash4.o tread2.o tseq.o tverify.o +CC = gcc + +all: ${ALL} + +# Uncomment the STAT line get hash and btree statistical use info. This +# also forces ld to load the btree debug functions for use by gdb, which +# is useful. The db library has to be compiled with -DSTATISTICS as well. +INC= -I${PORTDIR}/include -I${PORTDIR} +OORG= -g +#STAT= -DSTATISTICS +CFLAGS= -D__DBINTERFACE_PRIVATE -DDEBUG ${STAT} ${OORG} ${INC} + +tcreat3: tcreat3.o ${PORTDIR}/libdb.a + ${CC} -o $@ tcreat3.o ${PORTDIR}/libdb.a + +tdel: tdel.o ${PORTDIR}/libdb.a + ${CC} -o $@ tdel.o ${PORTDIR}/libdb.a + +thash4: thash4.o ${PORTDIR}/libdb.a + ${CC} -o $@ thash4.o ${PORTDIR}/libdb.a + +tread2: tread2.o ${PORTDIR}/libdb.a + ${CC} -o $@ tread2.o ${PORTDIR}/libdb.a + +tseq: tseq.o ${PORTDIR}/libdb.a + ${CC} -o $@ tseq.o ${PORTDIR}/libdb.a + +tverify: tverify.o ${PORTDIR}/libdb.a + ${CC} -o $@ tverify.o ${PORTDIR}/libdb.a + +driver2: driver2.o ${PORTDIR}/libdb.a + ${CC} -o $@ driver2.o ${PORTDIR}/libdb.a + +strerror.o: ${PORTDIR}/clib/strerror.c + ${CC} -c ${PORTDIR}/clib/strerror.c + +clean: + rm -f *.core gmon.out ${ALL} ${OBJ} t1 t2 t3 + diff --git a/src/util/db2/test/hash1.tests/driver2.c b/src/util/db2/test/hash1.tests/driver2.c new file mode 100644 index 0000000000..074f9eae06 --- /dev/null +++ b/src/util/db2/test/hash1.tests/driver2.c @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)driver2.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +/* + * Test driver, to try to tackle the large ugly-split problem. + */ + +#include <sys/file.h> +#include <stdio.h> +#include "ndbm.h" + +int my_hash(key, len) + char *key; + int len; +{ + return(17); /* So I'm cruel... */ +} + +main(argc, argv) + int argc; +{ + DB *db; + DBT key, content; + char keybuf[2049]; + char contentbuf[2049]; + char buf[256]; + int i; + HASHINFO info; + + info.bsize = 1024; + info.ffactor = 5; + info.nelem = 1; + info.cachesize = NULL; +#ifdef HASH_ID_PROGRAM_SPECIFIED + info.hash_id = HASH_ID_PROGRAM_SPECIFIED; + info.hash_func = my_hash; +#else + info.hash = my_hash; +#endif + info.lorder = 0; + if (!(db = dbopen("bigtest", O_RDWR | O_CREAT, 0644, DB_HASH, &info))) { + sprintf(buf, "dbopen: failed on file bigtest"); + perror(buf); + exit(1); + } + srand(17); + key.data = keybuf; + content.data = contentbuf; + memset(keybuf, '\0', sizeof(keybuf)); + memset(contentbuf, '\0', sizeof(contentbuf)); + for (i=1; i <= 500; i++) { + key.size = 128 + (rand()&1023); + content.size = 128 + (rand()&1023); +/* printf("%d: Key size %d, data size %d\n", i, key.size, + content.size); */ + sprintf(keybuf, "Key #%d", i); + sprintf(contentbuf, "Contents #%d", i); + if ((db->put)(db, &key, &content, R_NOOVERWRITE)) { + sprintf(buf, "dbm_store #%d", i); + perror(buf); + } + } + if ((db->close)(db)) { + perror("closing hash file"); + exit(1); + } + exit(0); +} + + + diff --git a/src/util/db2/test/hash1.tests/makedb.sh b/src/util/db2/test/hash1.tests/makedb.sh new file mode 100644 index 0000000000..15901de193 --- /dev/null +++ b/src/util/db2/test/hash1.tests/makedb.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# +# @(#)makedb.sh 8.1 (Berkeley) 6/4/93 + +awk '{i++; print $0; print i;}' /usr/local/lib/dict/words > WORDS +ls /bin /usr/bin /usr/ucb /etc | egrep '^(...|....|.....|......)$' | \ +sort | uniq | \ +awk '{ + printf "%s\n", $0 + for (i = 0; i < 1000; i++) + printf "%s+", $0 + printf "\n" +}' > LONG.DATA diff --git a/src/util/db2/test/hash1.tests/tcreat3.c b/src/util/db2/test/hash1.tests/tcreat3.c new file mode 100644 index 0000000000..3aaab99cbf --- /dev/null +++ b/src/util/db2/test/hash1.tests/tcreat3.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tcreat3.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include "db-int.h" + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key; + DB *dbp; + HASHINFO ctl; + FILE *fp; + int trash; + + int i = 0; + + argv++; + ctl.hash = NULL; + ctl.bsize = atoi(*argv++); + ctl.ffactor = atoi(*argv++); + ctl.nelem = atoi(*argv++); + ctl.lorder = 0; + if (!(dbp = dbopen( "hashtest", + O_CREAT|O_TRUNC|O_RDWR, 0600, DB_HASH, &ctl))){ + /* create table */ + fprintf(stderr, "cannot create: hash table (size %d)\n", + INITIAL); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + +/* + * enter key/data pair into the table + */ + if ((dbp->put)(dbp, &key, &item, R_NOOVERWRITE) != NULL) { + fprintf(stderr, "cannot enter: key %s\n", + item.data); + exit(1); + } + } + + (dbp->close)(dbp); + exit(0); +} diff --git a/src/util/db2/test/hash1.tests/tdel.c b/src/util/db2/test/hash1.tests/tdel.c new file mode 100644 index 0000000000..b542d8ef3d --- /dev/null +++ b/src/util/db2/test/hash1.tests/tdel.c @@ -0,0 +1,122 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tdel.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include "db-int.h" +#include <stdio.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +/* Usage: thash pagesize fillfactor file */ +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key; + DB *dbp; + HASHINFO ctl; + FILE *fp; + int stat; + + int i = 0; + + argv++; + ctl.nelem = INITIAL; + ctl.hash = NULL; + ctl.bsize = atoi(*argv++); + ctl.ffactor = atoi(*argv++); + ctl.cachesize = 1024 * 1024; /* 1 MEG */ + ctl.lorder = 0; + argc -= 2; + if (!(dbp = dbopen( NULL, O_CREAT|O_RDWR, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot create: hash table size %d)\n", + INITIAL); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + +/* + * enter key/data pair into the table + */ + if ((dbp->put)(dbp, &key, &item, R_NOOVERWRITE) != NULL) { + fprintf(stderr, "cannot enter: key %s\n", + item.data); + exit(1); + } + } + + if ( --argc ) { + fp = fopen ( argv[0], "r"); + i = 0; + while ( fgets(wp1, 8192, fp) && + fgets(wp2, 8192, fp) && + i++ < MAXWORDS) { + key.size = strlen(wp1); + stat = (dbp->del)(dbp, &key, 0); + if (stat) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + exit(1); + } + } + fclose(fp); + } + (dbp->close)(dbp); + exit(0); +} diff --git a/src/util/db2/test/hash1.tests/testit b/src/util/db2/test/hash1.tests/testit new file mode 100644 index 0000000000..c80dc4e69b --- /dev/null +++ b/src/util/db2/test/hash1.tests/testit @@ -0,0 +1,154 @@ +#!/bin/csh -f +# +# @(#)testit 8.1 (Berkeley) 6/4/93 +# + +echo "" +echo "PAGE FILL " +set name=WORDS + set i = 256 + foreach j ( 11 14 21 ) + echo "thash4 $i $j" + ./thash4 $i $j 25000 65536 $name < $name + end + set i = 512 + foreach j ( 21 28 43 ) + echo "thash4 $i $j" + ./thash4 $i $j 25000 65536 $name < $name + end + set i = 1024 + foreach j ( 43 57 85 ) + echo "thash4 $i $j" + ./thash4 $i $j 25000 65536 $name < $name + end + set i = 2048 + foreach j ( 85 114 171 ) + echo "thash4 $i $j" + ./thash4 $i $j 25000 65536 $name < $name + end + set i = 4096 + foreach j ( 171 228 341 ) + echo "thash4 $i $j" + ./thash4 $i $j 25000 65536 $name < $name + end + set i = 8192 + foreach j ( 341 455 683 ) + echo "thash4 $i $j" + ./thash4 $i $j 25000 65536 $name < $name + end + echo "PAGE FILL " + set i = 256 + foreach j ( 11 14 21 ) + echo "$i"_"$j" + ./tcreat3 $i $j 25000 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 512 + foreach j ( 21 28 43 ) + echo "$i"_"$j" + ./tcreat3 $i $j 25000 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 1024 + foreach j ( 43 57 85 ) + echo "$i"_"$j" + ./tcreat3 $i $j 25000 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 2048 + foreach j ( 85 114 171 ) + echo "$i"_"$j" + ./tcreat3 $i $j 25000 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 4096 + foreach j ( 171 228 341 ) + echo "$i"_"$j" + ./tcreat3 $i $j 25000 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 8192 + foreach j ( 341 455 683 ) + echo "$i"_"$j" + ./tcreat3 $i $j 25000 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end +set name=LONG.DATA + set i = 1024 + foreach j ( 1 2 4 ) + echo ./thash4 $i $j 600 65536 $name + ./thash4 $i $j 600 65536 $name < $name + end + + set i = 2048 + foreach j ( 1 2 4 ) + echo ./thash4 $i $j 600 65536 $name + ./thash4 $i $j 600 65536 $name < $name + end + set i = 4096 + foreach j ( 1 2 4 ) + echo ./thash4 $i $j 600 65536 $name + ./thash4 $i $j 600 65536 $name < $name + end + set i = 8192 + foreach j ( 2 4 8 ) + echo ./thash4 $i $j 600 65536 $name + ./thash4 $i $j 600 65536 $name < $name + end + echo "PAGE FILL " + set i = 1024 + foreach j ( 1 2 4 ) + echo "$i"_"$j" + ./tcreat3 $i $j 600 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 2048 + foreach j ( 1 2 4 ) + echo "$i"_"$j" + ./tcreat3 $i $j 600 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 4096 + foreach j ( 1 2 4 ) + echo "$i"_"$j" + ./tcreat3 $i $j 600 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + set i = 8192 + foreach j ( 2 4 8 ) + echo "$i"_"$j" + ./tcreat3 $i $j 600 $name < $name + ./tread2 65536 < $name + ./tverify $name < $name + ./tseq > /dev/null + ./tdel $i $j $name < $name + end + +./driver2 diff --git a/src/util/db2/test/hash1.tests/thash4.c b/src/util/db2/test/hash1.tests/thash4.c new file mode 100644 index 0000000000..b0d4b42e36 --- /dev/null +++ b/src/util/db2/test/hash1.tests/thash4.c @@ -0,0 +1,131 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)thash4.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include <errno.h> +#include "db-int.h" + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +/* Usage: thash pagesize fillfactor file */ +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key, res; + DB *dbp; + HASHINFO ctl; + FILE *fp; + int stat; + time_t t; + + int i = 0; + + argv++; + ctl.hash = NULL; + ctl.bsize = atoi(*argv++); + ctl.ffactor = atoi(*argv++); + ctl.nelem = atoi(*argv++); + ctl.cachesize = atoi(*argv++); + ctl.lorder = 0; + if (!(dbp = dbopen( NULL, O_CREAT|O_RDWR, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot create: hash table size %d)\n", + INITIAL); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + +/* + * enter key/data pair into the table + */ + if ((dbp->put)(dbp, &key, &item, R_NOOVERWRITE) != NULL) { + fprintf(stderr, "cannot enter: key %s\n", + item.data); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } + } + + if ( --argc ) { + fp = fopen ( argv[0], "r"); + i = 0; + while ( fgets(wp1, 256, fp) && + fgets(wp2, 8192, fp) && + i++ < MAXWORDS) { + + key.size = strlen(wp1); + stat = (dbp->get)(dbp, &key, &res, 0); + if (stat < 0 ) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } else if ( stat > 0 ) { + fprintf ( stderr, "%s not found\n", key.data ); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } + } + fclose(fp); + } + dbp->close(dbp); + exit(0); +} diff --git a/src/util/db2/test/hash1.tests/tread2.c b/src/util/db2/test/hash1.tests/tread2.c new file mode 100644 index 0000000000..44acaa93dd --- /dev/null +++ b/src/util/db2/test/hash1.tests/tread2.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tread2.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include "db-int.h" + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +typedef struct { /* info to be stored */ + int num, siz; +} info; + +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key, res; + DB *dbp; + HASHINFO ctl; + int stat; + + int i = 0; + + ctl.nelem = INITIAL; + ctl.hash = NULL; + ctl.bsize = 64; + ctl.ffactor = 1; + ctl.cachesize = atoi(*argv++); + ctl.lorder = 0; + if (!(dbp = dbopen( "hashtest", O_RDONLY, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot open: hash table\n" ); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + + stat = (dbp->get)(dbp, &key, &res,0); + if (stat < 0) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + exit(1); + } else if ( stat > 0 ) { + fprintf ( stderr, "%s not found\n", key.data ); + exit(1); + } + } + (dbp->close)(dbp); + exit(0); +} diff --git a/src/util/db2/test/hash1.tests/tseq.c b/src/util/db2/test/hash1.tests/tseq.c new file mode 100644 index 0000000000..5eee5de61c --- /dev/null +++ b/src/util/db2/test/hash1.tests/tseq.c @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tseq.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include "db-int.h" + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + + +char wp[8192]; +char cp[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key, res; + DB *dbp; + FILE *fp; + int stat; + + if (!(dbp = dbopen( "hashtest", O_RDONLY, 0400, DB_HASH, NULL))) { + /* create table */ + fprintf(stderr, "cannot open: hash table\n" ); + exit(1); + } + +/* +* put info in structure, and structure in the item +*/ + for ( stat = (dbp->seq) (dbp, &res, &item, 1 ); + stat == 0; + stat = (dbp->seq) (dbp, &res, &item, 0 ) ) { + + memcpy ( wp, res.data, res.size ); + wp[res.size] = 0; + memcpy ( cp, item.data, item.size ); + cp[item.size] = 0; + + printf ( "%s %s\n", wp, cp ); + } + (dbp->close)(dbp); + exit(0); +} diff --git a/src/util/db2/test/hash1.tests/tverify.c b/src/util/db2/test/hash1.tests/tverify.c new file mode 100644 index 0000000000..a5a5dfd183 --- /dev/null +++ b/src/util/db2/test/hash1.tests/tverify.c @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tverify.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include "db-int.h" + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +typedef struct { /* info to be stored */ + int num, siz; +} info; + +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT key, res; + DB *dbp; + HASHINFO ctl; + int trash; + int stat; + + int i = 0; + + ctl.nelem = INITIAL; + ctl.hash = NULL; + ctl.bsize = 64; + ctl.ffactor = 1; + ctl.cachesize = 1024 * 1024; /* 1 MEG */ + ctl.lorder = 0; + if (!(dbp = dbopen( "hashtest", O_RDONLY, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot open: hash table\n" ); + exit(1); + } + + key.data = wp1; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + + stat = (dbp->get)(dbp, &key, &res,0); + if (stat < 0) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + exit(1); + } else if ( stat > 0 ) { + fprintf ( stderr, "%s not found\n", key.data ); + exit(1); + } + if ( memcmp ( res.data, wp2, res.size ) ) { + fprintf ( stderr, "data for %s is incorrect. Data was %s. Should have been %s\n", key.data, res.data, wp2 ); + } + } + (dbp->close)(dbp); + exit(0); +} diff --git a/src/util/db2/test/hash2.tests/README b/src/util/db2/test/hash2.tests/README new file mode 100644 index 0000000000..f29ccf7e1b --- /dev/null +++ b/src/util/db2/test/hash2.tests/README @@ -0,0 +1,72 @@ +# @(#)README 8.1 (Berkeley) 6/4/93 + +This package implements a superset of the hsearch and dbm/ndbm libraries. + +Test Programs: + All test programs which need key/data pairs expect them entered + with key and data on separate lines + + tcreat3.c + Takes + bucketsize (bsize), + fill factor (ffactor), and + initial number of elements (nelem). + Creates a hash table named hashtest containing the + keys/data pairs entered from standard in. + thash4.c + Takes + bucketsize (bsize), + fill factor (ffactor), + initial number of elements (nelem) + bytes of cache (ncached), and + file from which to read data (fname) + Creates a table from the key/data pairs on standard in and + then does a read of each key/data in fname + tdel.c + Takes + bucketsize (bsize), and + fill factor (ffactor). + file from which to read data (fname) + Reads each key/data pair from fname and deletes the + key from the hash table hashtest + tseq.c + Reads the key/data pairs in the file hashtest and writes them + to standard out. + tread2.c + Takes + butes of cache (ncached). + Reads key/data pairs from standard in and looks them up + in the file hashtest. + tverify.c + Reads key/data pairs from standard in, looks them up + in the file hashtest, and verifies that the data is + correct. + +NOTES: + +The file search.h is provided for using the hsearch compatible interface +on BSD systems. On System V derived systems, search.h should appear in +/usr/include. + +The man page ../man/db.3 explains the interface to the hashing system. +The file hash.ps is a postscript copy of a paper explaining +the history, implementation, and performance of the hash package. + +"bugs" or idiosyncracies + +If you have a lot of overflows, it is possible to run out of overflow +pages. Currently, this will cause a message to be printed on stderr. +Eventually, this will be indicated by a return error code. + +If you are using the ndbm interface and exit without flushing or closing the +file, you may lose updates since the package buffers all writes. Also, +the db interface only creates a single database file. To avoid overwriting +the user's original file, the suffix ".db" is appended to the file name +passed to dbm_open. Additionally, if your code "knows" about the historic +.dir and .pag files, it will break. + +There is a fundamental difference between this package and the old hsearch. +Hsearch requires the user to maintain the keys and data in the application's +allocated memory while hash takes care of all storage management. The down +side is that the byte strings passed in the ENTRY structure must be null +terminated (both the keys and the data). diff --git a/src/util/db2/test/hash2.tests/bigtest.c b/src/util/db2/test/hash2.tests/bigtest.c new file mode 100644 index 0000000000..e6d4108b72 --- /dev/null +++ b/src/util/db2/test/hash2.tests/bigtest.c @@ -0,0 +1,76 @@ +#include "db-int.h" +#include <stdio.h> +#include <fcntl.h> +#include <assert.h> +#include <stdlib.h> + +int +main(void) +{ + HASHINFO info; + DB *db; + DBT key, value, returned; + int *data; + int n, i; + + info.bsize = 512; + info.cachesize = 500; + info.lorder = 0; + info.ffactor = 4; + info.nelem = 0; + info.hash = NULL; + + db = dbopen("big2.db", O_RDWR|O_CREAT|O_TRUNC, 0664, DB_HASH, &info); + data = malloc(800 * sizeof(int)); + for (n = 0; n < 800; n++) + data[n] = 0xDEADBEEF; + key.size = sizeof(int); + key.data = &n; + value.size = 800 * sizeof(int); + value.data = (void *)data; + + for (n = 0; n < 200000; n++) { + returned.data = NULL; + if (n == 4627) + printf(""); + if (n % 50 == 0) + printf("put n = %d\n", n); + if (db->put(db, &key, &value, 0) != 0) + printf("put error, n = %d\n", n); + if (db->get(db, &key, &returned, 0) != 0) + printf("Immediate get error, n = %d\n", n); + assert (returned.size == 3200); + for (i = 0; i < 800; i++) + if (((int *)returned.data)[i] != 0xDEADBEEF) + printf("ERRORRRRRR!!!\n"); + + } + + for (n = 0; n < 200000; n++) { + if (n % 50 == 0) + printf("seq n = %d\n", n); + if ((db->seq(db, &key, &returned, 0)) != 0) + printf("Seq error, n = %d\n", n); + + assert(returned.size == 3200); + + for (i = 0; i < 800; i++) + if (((int *)returned.data)[i] != 0xDEADBEEF) + printf("ERRORRRRRR!!! seq %d\n", n); + } + + for (n = 0; n < 2000; n++) { + if (n % 50 == 0) + printf("get n = %d\n", n); + if (db->get(db, &key, &returned, 0) != 0) + printf("Late get error, n = %d\n", n); + assert(returned.size == 1200); + for (i = 0; i < 300; i++) + if (((int *)returned.data)[i] != 0xDEADBEEF) + printf("ERRORRRRRR!!!, get %d\n", n); + } + db->close(db); + free(value.data); + return(0); +} + diff --git a/src/util/db2/test/hash2.tests/passtest.c b/src/util/db2/test/hash2.tests/passtest.c new file mode 100644 index 0000000000..d6277ca52d --- /dev/null +++ b/src/util/db2/test/hash2.tests/passtest.c @@ -0,0 +1,184 @@ +#include "db-int.h" +#include <fcntl.h> +#include <stdio.h> +#include <string.h> + +extern int hash_expansions; + +int +main(void) +{ + FILE *keys, *vals; + DB *db; + DBT key, val; + char *key_line, *val_line, *get_key, *get_val, *old, *key2; + HASHINFO passwd; + int n = 0, i = 0, expected; + + key_line = (char *)malloc(100); + val_line = (char *)malloc(300); + old = (char *)malloc(300); + + keys = fopen("yp.keys", "rt"); + vals = fopen("yp.total", "rt"); + + passwd.bsize = 1024; + passwd.cachesize = 1024 * 1024; + passwd.ffactor = 10; + passwd.hash = NULL; + passwd.nelem = 0; + passwd.lorder = 4321; + + + db = dbopen("/usr/tmp/passwd.db", O_RDWR|O_CREAT|O_TRUNC, 0664, DB_HASH, + &passwd); + if (!db) { + fprintf(stderr, "create_db: couldn't create database file\n"); + exit(1); + } + + while ((key_line = fgets(key_line, 100, keys)) != NULL) { + if (n % 1000 == 0) + fprintf(stderr, "Putting #%d.\n", n); + n++; + fgets(val_line, 300, vals); + key.size = strlen(key_line); + key.data = (void *)key_line; + val.size = strlen(val_line); + val.data = (void *)val_line; + if (db->put(db, &key, &val, 0) != 0) + fprintf(stderr, "Put error, n = %d\n", n); + if (db->get(db, &key, &val, 0) != 0) + fprintf(stderr, "Immediate get error, n = %d\n", n); + } + fprintf(stderr, "Done with put!\n"); + free(key_line); + free(val_line); + fclose(keys); + fclose(vals); + db->close(db); + + + + + keys = fopen("yp.keys", "rt"); + vals = fopen("yp.total", "rt"); + get_key = (char *)malloc(100); + get_val = (char *)malloc(300); + + db = dbopen("/usr/tmp/passwd.db", O_RDWR, 0664, DB_HASH, &passwd); + if (!db) + fprintf(stderr, "Could not open db!\n"); + n = 0; + while ((get_key = fgets(get_key, 100, keys)) != NULL) { + n++; + if (n % 1000 == 0) + fprintf(stderr, "Getting #%d.\n", n); + key.size = strlen(get_key); + key.data = (void *)get_key; + if (db->get(db, &key, &val, 0) != 0) + fprintf(stderr, "Retrieval error on %s\n", get_key); + fgets(get_val, 300, vals); + if (memcmp(val.data, (void *)get_val, val.size)) { + fprintf(stderr, "Unmatched get on %s.\n", get_key); + fprintf(stderr, "Input = %s\nOutput = %s\n", get_val, + (char *)val.data); + } + } + expected = n; + fclose(vals); + fclose(keys); + free(get_key); + free(get_val); + db->close(db); + + + + + get_key = (char *)malloc(100); + get_val = (char *)malloc(300); + + db = dbopen("/usr/tmp/passwd.db", O_RDWR, 0664, DB_HASH, &passwd); + if (!db) + fprintf(stderr, "Could not open db!\n"); + n = 0; + for (;;) { + n++; + if (n % 1000 == 0) + fprintf(stderr, "Sequence getting #%d.\n", n); + if (db->seq(db, &key, &val, 0) != 0) { + fprintf(stderr, + "Exiting sequence retrieve; n = %d, expected = %d\n", + n - 1 , expected); + break; + } + } + free(get_key); + free(get_val); + db->close(db); + + get_key = (char *)malloc(100); + key2 = (char *)malloc(100); + + keys = fopen("yp.keys", "rt"); + vals = fopen("yp.total", "rt"); + + db = dbopen("/usr/tmp/passwd.db", O_RDWR, 0664, DB_HASH, &passwd); + n = 0; + while ((get_key = fgets(get_key, 100, keys)) != NULL) { + if (n % 1000 == 0) + fprintf(stderr, "Deleting #%d.\n", n); + n+=2; + key2 = fgets(get_key, 100, keys); + if (!key2) + break; + key.data = (void *)key2; + key.size = strlen(key2); + if (db->del(db, &key, 0) != 0) + fprintf(stderr, "Delete error on %d", n); + } + + db->close(db); + free(get_key); + free(key2); + fclose(keys); + fclose(vals); + + get_key = (char *)malloc(100); + key2 = (char *)malloc(100); + get_val = (char *)malloc(300); + + keys = fopen("yp.keys", "rt"); + vals = fopen("yp.total", "rt"); + + db = dbopen("/usr/tmp/passwd.db", O_RDWR, 0664, DB_HASH, &passwd); + n = 0; + while ((get_key = fgets(get_key, 100, keys)) != NULL) { + n += 2; + if (n % 1000 == 0) + fprintf(stderr, "Re-retrieving #%d.\n", n); + key2 = fgets(key2, 100, keys); + if (!key2) + break; + key.data = (void *)get_key; + key.size = strlen(get_key); + if (db->get(db, &key, &val, 0) != 0) + fprintf(stderr, "Retrieval after delete error on %d\n", n); + fgets(get_val, 300, vals); + if (memcmp(val.data, (void *)get_val, val.size)) { + fprintf(stderr, "Unmatched get after delete on %s.\n", get_key); + fprintf(stderr, "Input = %s\nOutput = %s\n", get_val, + (char *)val.data); + } + fgets(get_val, 300, vals); + } + + db->close(db); + free(get_key); + free(key2); + free(get_val); + fclose(keys); + fclose(vals); + + exit(0); +} diff --git a/src/util/db2/test/hash2.tests/passwd/genpass.c b/src/util/db2/test/hash2.tests/passwd/genpass.c new file mode 100644 index 0000000000..da37676878 --- /dev/null +++ b/src/util/db2/test/hash2.tests/passwd/genpass.c @@ -0,0 +1,23 @@ +#include <stdio.h> +#include <stdlib.h> + +void +main(int argc, char **argv) +{ + int i,j,n; + char *pass[8], r; + + n = atoi(argv[1]); + + srandom(101173); + for (i = 0; i < n; i++) { + for (j = 0; j < 8; j++) { + r = random() % 122; + while (r < 48) + r += random() % (122 - r); + printf("%c", r); + } + printf("\n"); + } +} + diff --git a/src/util/db2/test/run.test b/src/util/db2/test/run.test new file mode 100644 index 0000000000..393aedcad2 --- /dev/null +++ b/src/util/db2/test/run.test @@ -0,0 +1,717 @@ +#!/bin/sh - +# +# @(#)run.test 8.13 (Berkeley) 11/2/95 +# + +# db regression tests +main() +{ + + PROG=./dbtest + TMP1=${TMPDIR-.}/t1 + TMP2=${TMPDIR-.}/t2 + TMP3=${TMPDIR-.}/t3 + + if [ \! -z "$WORDLIST" -a -f "$WORDLIST" ]; then + DICT=$WORDLIST + elif [ -f /usr/local/lib/dict/words ]; then + DICT=/usr/local/lib/dict/words + elif [ -f /usr/share/dict/words ]; then + DICT=/usr/share/dict/words + elif [ -f /usr/dict/words ]; then + DICT=/usr/dict/words + elif [ -f /usr/share/lib/dict/words ]; then + DICT=/usr/share/lib/dict/words + else + echo 'run.test: no dictionary' + exit 1 + fi + + if [ $# -eq 0 ]; then + for t in 1 2 3 4 5 6 7 8 9 10 11 12 13 20; do + test$t + done + else + while [ $# -gt 0 ] + do case "$1" in + test*) + $1;; + [0-9]*) + test$1;; + btree) + for t in 1 2 3 7 8 9 10 12 13; do + test$t + done;; + hash) + for t in 1 2 3 8 13 20; do + test$t + done;; + recno) + for t in 1 2 3 4 5 6 7 10 11; do + test$t + done;; + *) + echo "run.test: unknown test $1" + echo "usage: run.test test# | type" + exit 1 + esac + shift + done + fi + rm -f $TMP1 $TMP2 $TMP3 + exit 0 +} + +# Take the first hundred entries in the dictionary, and make them +# be key/data pairs. +test1() +{ + echo "Test 1: btree, hash: small key, small data pairs" + sed 200q $DICT > $TMP1 + for type in btree hash; do + rm -f $TMP2 $TMP3 + for i in `sed 200q $DICT`; do + echo p + echo k$i + echo d$i + echo g + echo k$i + done > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test1: type $type: failed" + exit 1 + fi + done + echo "Test 1: recno: small key, small data pairs" + rm -f $TMP2 $TMP3 + sed 200q $DICT | + awk '{ + ++i; + printf("p\nk%d\nd%s\ng\nk%d\n", i, $0, i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test1: type recno: failed" + exit 1 + fi +} + +# Take the first 200 entries in the dictionary, and give them +# each a medium size data entry. +test2() +{ + echo "Test 2: btree, hash: small key, medium data pairs" + mdata=abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz + echo $mdata | + awk '{ for (i = 1; i < 201; ++i) print $0 }' > $TMP1 + for type in hash btree; do + rm -f $TMP2 $TMP3 + for i in `sed 200q $DICT`; do + echo p + echo k$i + echo d$mdata + echo g + echo k$i + done > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test2: type $type: failed" + exit 1 + fi + done + echo "Test 2: recno: small key, medium data pairs" + rm -f $TMP2 $TMP3 + echo $mdata | + awk '{ for (i = 1; i < 201; ++i) + printf("p\nk%d\nd%s\ng\nk%d\n", i, $0, i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test2: type recno: failed" + exit 1 + fi +} + +# Insert the programs in /bin with their paths as their keys. +test3() +{ + echo "Test 3: hash: small key, big data pairs" + rm -f $TMP1 + (find /bin -type f -exec test -r {} \; -print | xargs cat) > $TMP1 + for type in hash; do + rm -f $TMP2 $TMP3 + for i in `find /bin -type f -exec test -r {} \; -print`; do + echo p + echo k$i + echo D$i + echo g + echo k$i + done > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test3: $type: failed" + exit 1 + fi + done + echo "Test 3: btree: small key, big data pairs" + for psize in 512 16384 65536; do + echo " page size $psize" + for type in btree; do + rm -f $TMP2 $TMP3 + for i in `find /bin -type f -exec test -r {} \; -print`; do + echo p + echo k$i + echo D$i + echo g + echo k$i + done > $TMP2 + $PROG -i psize=$psize -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test3: $type: page size $psize: failed" + exit 1 + fi + done + done + echo "Test 3: recno: big data pairs" + rm -f $TMP2 $TMP3 + find /bin -type f -exec test -r {} \; -print | + awk '{ + ++i; + printf("p\nk%d\nD%s\ng\nk%d\n", i, $0, i); + }' > $TMP2 + for psize in 512 16384 65536; do + echo " page size $psize" + $PROG -i psize=$psize -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test3: recno: page size $psize: failed" + exit 1 + fi + done +} + +# Do random recno entries. +test4() +{ + echo "Test 4: recno: random entries" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 37; i <= 37 + 88 * 17; i += 17) { + if (i % 41) + s = substr($0, 1, i % 41); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + for (i = 1; i <= 15; ++i) { + if (i % 41) + s = substr($0, 1, i % 41); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + for (i = 19234; i <= 19234 + 61 * 27; i += 27) { + if (i % 41) + s = substr($0, 1, i % 41); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + exit + }' > $TMP1 + rm -f $TMP2 $TMP3 + cat $TMP1 | + awk 'BEGIN { + i = 37; + incr = 17; + } + { + printf("p\nk%d\nd%s\n", i, $0); + if (i == 19234 + 61 * 27) + exit; + if (i == 37 + 88 * 17) { + i = 1; + incr = 1; + } else if (i == 15) { + i = 19234; + incr = 27; + } else + i += incr; + } + END { + for (i = 37; i <= 37 + 88 * 17; i += 17) + printf("g\nk%d\n", i); + for (i = 1; i <= 15; ++i) + printf("g\nk%d\n", i); + for (i = 19234; i <= 19234 + 61 * 27; i += 27) + printf("g\nk%d\n", i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test4: type recno: failed" + exit 1 + fi +} + +# Do reverse order recno entries. +test5() +{ + echo "Test 5: recno: reverse order entries" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk ' { + for (i = 1500; i; --i) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + cat $TMP1 | + awk 'BEGIN { + i = 1500; + } + { + printf("p\nk%d\nd%s\n", i, $0); + --i; + } + END { + for (i = 1500; i; --i) + printf("g\nk%d\n", i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test5: type recno: failed" + exit 1 + fi +} + +# Do alternating order recno entries. +test6() +{ + echo "Test 6: recno: alternating order entries" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk ' { + for (i = 1; i < 1200; i += 2) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + for (i = 2; i < 1200; i += 2) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + cat $TMP1 | + awk 'BEGIN { + i = 1; + even = 0; + } + { + printf("p\nk%d\nd%s\n", i, $0); + i += 2; + if (i >= 1200) { + if (even == 1) + exit; + even = 1; + i = 2; + } + } + END { + for (i = 1; i < 1200; ++i) + printf("g\nk%d\n", i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + sort -o $TMP1 $TMP1 + sort -o $TMP3 $TMP3 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test6: type recno: failed" + exit 1 + fi +} + +# Delete cursor record +test7() +{ + echo "Test 7: btree, recno: delete cursor record" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 120; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + printf("%05d: input key %d: %s\n", 120, 120, $0); + printf("seq failed, no such key\n"); + printf("%05d: input key %d: %s\n", 1, 1, $0); + printf("%05d: input key %d: %s\n", 2, 2, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + for type in btree recno; do + cat $TMP1 | + awk '{ + if (i == 120) + exit; + printf("p\nk%d\nd%s\n", ++i, $0); + } + END { + printf("fR_NEXT\n"); + for (i = 1; i <= 120; ++i) + printf("s\n"); + printf("fR_CURSOR\ns\nk120\n"); + printf("r\n"); + printf("fR_NEXT\ns\n"); + printf("fR_CURSOR\ns\nk1\n"); + printf("r\n"); + printf("fR_FIRST\ns\n"); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test7: type $type: failed" + exit 1 + fi + done +} + +# Make sure that overflow pages are reused. +test8() +{ + echo "Test 8: btree, hash: repeated small key, big data pairs" + rm -f $TMP1 + echo "" | + awk 'BEGIN { + for (i = 1; i <= 10; ++i) { + printf("p\nkkey1\nD/bin/sh\n"); + printf("p\nkkey2\nD/bin/csh\n"); + if (i % 8 == 0) { + printf("c\nkkey2\nD/bin/csh\n"); + printf("c\nkkey1\nD/bin/sh\n"); + printf("e\t%d of 10 (comparison)\n", i); + } else + printf("e\t%d of 10 \n", i); + printf("r\nkkey1\nr\nkkey2\n"); + } + }' > $TMP1 + $PROG btree $TMP1 +# $PROG hash $TMP1 + # No explicit test for success. +} + +# Test btree duplicate keys +test9() +{ + echo "Test 9: btree: duplicate keys" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 543; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + for type in btree; do + cat $TMP1 | + awk '{ + if (i++ % 2) + printf("p\nkduplicatekey\nd%s\n", $0); + else + printf("p\nkunique%dkey\nd%s\n", i, $0); + } + END { + printf("o\n"); + }' > $TMP2 + $PROG -iflags=1 -o $TMP3 $type $TMP2 + sort -o $TMP3 $TMP3 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test9: type $type: failed" + exit 1 + fi + done +} + +# Test use of cursor flags without initialization +test10() +{ + echo "Test 10: btree, recno: test cursor flag use" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 20; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + # Test that R_CURSOR doesn't succeed before cursor initialized + for type in btree recno; do + cat $TMP1 | + awk '{ + if (i == 10) + exit; + printf("p\nk%d\nd%s\n", ++i, $0); + } + END { + printf("fR_CURSOR\nr\n"); + printf("eR_CURSOR SHOULD HAVE FAILED\n"); + }' > $TMP2 + $PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1 + if [ -s $TMP3 ] ; then + echo "Test 10: delete: R_CURSOR SHOULD HAVE FAILED" + exit 1 + fi + done + for type in btree recno; do + cat $TMP1 | + awk '{ + if (i == 10) + exit; + printf("p\nk%d\nd%s\n", ++i, $0); + } + END { + printf("fR_CURSOR\np\nk1\ndsome data\n"); + printf("eR_CURSOR SHOULD HAVE FAILED\n"); + }' > $TMP2 + $PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1 + if [ -s $TMP3 ] ; then + echo "Test 10: put: R_CURSOR SHOULD HAVE FAILED" + exit 1 + fi + done +} + +# Test insert in reverse order. +test11() +{ + echo "Test 11: recno: reverse order insert" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 779; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + for type in recno; do + cat $TMP1 | + awk '{ + if (i == 0) { + i = 1; + printf("p\nk1\nd%s\n", $0); + printf("%s\n", "fR_IBEFORE"); + } else + printf("p\nk1\nd%s\n", $0); + } + END { + printf("or\n"); + }' > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test11: type $type: failed" + exit 1 + fi + done +} + +# Take the first 20000 entries in the dictionary, reverse them, and give +# them each a small size data entry. Use a small page size to make sure +# the btree split code gets hammered. +test12() +{ + if ( rev < /dev/null ) > /dev/null 2>&1 ; then + : + else + echo "Test 12: skipped, rev not found" + return + fi + echo "Test 12: btree: lots of keys, small page size" + mdata=abcdefghijklmnopqrstuvwxy + echo $mdata | + awk '{ for (i = 1; i < 20001; ++i) print $0 }' > $TMP1 + for type in btree; do + rm -f $TMP2 $TMP3 + for i in `sed 20000q $DICT | rev`; do + echo p + echo k$i + echo d$mdata + echo g + echo k$i + done > $TMP2 + $PROG -i psize=512 -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test12: type $type: failed" + exit 1 + fi + done +} + +# Test different byte orders. +test13() +{ + echo "Test 13: btree, hash: differing byte orders" + sed 50q $DICT > $TMP1 + for order in 1234 4321; do + for type in btree hash; do + rm -f byte.file $TMP2 $TMP3 + for i in `sed 50q $DICT`; do + echo p + echo k$i + echo d$i + echo g + echo k$i + done > $TMP2 + $PROG -ilorder=$order -f byte.file -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test13: $type/$order put failed" + exit 1 + fi + for i in `sed 50q $DICT`; do + echo g + echo k$i + done > $TMP2 + $PROG -s \ + -ilorder=$order -f byte.file -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test13: $type/$order get failed" + exit 1 + fi + done + done + rm -f byte.file +} + +# Try a variety of bucketsizes and fill factors for hashing +test20() +{ + echo\ + "Test 20: hash: bucketsize, fill factor; nelem 25000 cachesize 65536" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 10000; ++i) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("%s\n", s); + } + exit; + }' > $TMP1 + sed 10000q $DICT | + awk 'BEGIN { + ds="abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" + } + { + if (++i % 34) + s = substr(ds, 1, i % 34); + else + s = substr(ds, 1); + printf("p\nk%s\nd%s\n", $0, s); + }' > $TMP2 + sed 10000q $DICT | + awk '{ + ++i; + printf("g\nk%s\n", $0); + }' >> $TMP2 + bsize=256 + for ffactor in 11 14 21; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=512 + for ffactor in 21 28 43; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=1024 + for ffactor in 43 57 85; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=2048 + for ffactor in 85 114 171; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=4096 + for ffactor in 171 228 341; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=8192 + for ffactor in 341 455 683; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done +} + +main $* diff --git a/src/util/dyn/ChangeLog b/src/util/dyn/ChangeLog new file mode 100644 index 0000000000..9fac926656 --- /dev/null +++ b/src/util/dyn/ChangeLog @@ -0,0 +1,13 @@ +Mon Jul 22 04:20:48 1996 Marc Horowitz <marc@mit.edu> + + * dyn_insert.c (DynInsert): what used to be #ifdef POSIX, should + be #ifdef HAVE_MEMMOVE + +Tue Jul 9 19:30:40 1996 Marc Horowitz <marc@mit.edu> + + * configure.in (DEPLIBS): AC_SUBST() it, to hack around an + incorrect assumption in aclocal.m4 + * Makefile.in (DONE): add rules and macros to support shared + libraries + + diff --git a/src/util/dyn/Imakefile b/src/util/dyn/Imakefile new file mode 100644 index 0000000000..471cf5bc81 --- /dev/null +++ b/src/util/dyn/Imakefile @@ -0,0 +1,26 @@ +# This file is part of libdyn.a, the C Dynamic Object library. It +# contains the Imakefile. +# +# There are no restrictions on this code; however, if you make any +# changes, I request that you document them so that I do not get +# credit or blame for your modifications. +# +# Written by Barr3y Jaspan, Student Information Processing Board (SIPB) +# and MIT-Project Athena, 1989. + +SRCS = dyn_create.c dyn_put.c dyn_debug.c dyn_delete.c dyn_size.c \ + dyn_append.c dyn_realloc.c dyn_paranoid.c dyn_insert.c \ + dyn_initzero.c +OBJS = dyn_create.o dyn_put.o dyn_debug.o dyn_delete.o dyn_size.o \ + dyn_append.o dyn_realloc.o dyn_paranoid.o dyn_insert.o \ + dyn_initzero.o +HDRS = dyn.h dynP.h + +DEST = libdyn.a + +StageLibrary($(DEST), $(OBJS)) +StageIncludes(dyn.h,) + +Program(test, test.o, $(DEST), $(DEST)) + +Depend(,, $(SRCS) $(HDRS)) diff --git a/src/util/dyn/Makefile.in b/src/util/dyn/Makefile.in new file mode 100644 index 0000000000..e2c4255bd8 --- /dev/null +++ b/src/util/dyn/Makefile.in @@ -0,0 +1,65 @@ +CFLAGS = $(CCOPTS) $(DEFS) + +##DOSBUILDTOP = ..\.. +##DOSLIBNAME=libdyn.lib + +.c.o: + $(CC) $(CFLAGS) -c $(srcdir)/$*.c +@SHARED_RULE@ + +SRCS = $(srcdir)/dyn_create.c \ + $(srcdir)/dyn_put.c \ + $(srcdir)/dyn_debug.c \ + $(srcdir)/dyn_delete.c \ + $(srcdir)/dyn_size.c \ + $(srcdir)/dyn_append.c \ + $(srcdir)/dyn_realloc.c \ + $(srcdir)/dyn_paranoid.c \ + $(srcdir)/dyn_insert.c \ + $(srcdir)/dyn_initzero.c + +OBJS = dyn_create.$(OBJEXT) \ + dyn_put.$(OBJEXT) \ + dyn_debug.$(OBJEXT) \ + dyn_delete.$(OBJEXT) \ + dyn_size.$(OBJEXT) \ + dyn_append.$(OBJEXT) \ + dyn_realloc.$(OBJEXT) \ + dyn_paranoid.$(OBJEXT) \ + dyn_insert.$(OBJEXT) \ + dyn_initzero.$(OBJEXT) + +LIB_SUBDIRS= . +LIBDONE= DONE + +all-unix:: shared includes $(OBJS) +all-mac:: $(OBJS) +all-windows:: $(OBJS) + +shared: + mkdir shared + +check-windows:: + +clean-unix:: + $(RM) shared/* + +clean-mac:: +clean-windows:: + +DONE:: $(OBJS) + $(RM) DONE + echo $(OBJS) >DONE + +libdyn.$(STEXT): $(OBJS) + $(RM) $@ + $(ARADD) $@ $(OBJS) + $(RANLIB) $@ + +install:: libdyn.a + $(INSTALL_DATA) libdyn.a $(DESTDIR)$(KRB5_LIBDIR)/libdyn.a + $(RANLIB) $(DESTDIR)$(KRB5_LIBDIR)/libdyn.a + +clean:: + $(RM) libdyn.$(LIBEXT) libdyn.bak DONE + diff --git a/src/util/dyn/README b/src/util/dyn/README new file mode 100644 index 0000000000..0c08ac5c62 --- /dev/null +++ b/src/util/dyn/README @@ -0,0 +1,32 @@ +libdyn.a -- Release 1.0 + +A C Dynamic Object is an array that takes care of resizing itself as +elements are added and deleted from it. It can be of any type for +which sizeof is defined and for which an address of a variable of that +type can be passed to a function. + +To build libdyn.a, simply type "make depend all" (if you don't have +the program makedepend, of course, leave out the "depend" part). If +your system's bcopy() cannot handle overlapping regions, you'll need +to write one that can. (Left as an excercise for the reader..) + +The library should compile and work without modification on a vast +number of systems. It only uses 5 external functions: malloc, +realloc, free, bcopy, and fprintf (to stderr). Of these, only bcopy +should need to be changed for other systems (such as MS-DOS) and it +could probably be done with a -D flag to the compiler. + +The test/demo program is built by "make all". This program produces +the library's debugging output (to stderr) as well as some of its own +output (to stdout). + +The library has been tested (with test.c) on a VAX VSII, VAXstation +3100, DECstation 3100, and IBM RT all running BSD4.3 (except for the +DECstation, which was running Ultrix V2.1). + +An earlier version of this library was posted to alt.sources. This +version contains one new function (DynInsert) and slightly cleaner +code, but no bugfixes (no bugs were found). + +Author: Barr3y Jaspan, Student Information Processing Board (SIPB) and +MIT-Project Athena, bjaspan@athena.mit.edu, 1990 diff --git a/src/util/dyn/TODO b/src/util/dyn/TODO new file mode 100644 index 0000000000..d5a242b981 --- /dev/null +++ b/src/util/dyn/TODO @@ -0,0 +1,3 @@ +o be able to get obj->size +o be able to get array without DynAdd (so you can just use DynPut) +o be able to specify bzero on realloc diff --git a/src/util/dyn/configure.in b/src/util/dyn/configure.in new file mode 100644 index 0000000000..6ba8e6d24a --- /dev/null +++ b/src/util/dyn/configure.in @@ -0,0 +1,14 @@ +AC_INIT(dyn.h) +CONFIG_RULES +AC_PROG_ARCHIVE +AC_PROG_ARCHIVE_ADD +AC_PROG_RANLIB +AC_PROG_INSTALL +AC_CHECK_FUNCS(memmove) +V5_SHARED_LIB_OBJS +V5_MAKE_SHARED_LIB(libdyn,1.0,../../lib, ../util/dyn) +dnl DEPLIBS is normally set by KRB5_LIBRARIES, but that makes assumptions +dnl which fail for libdyn. +AC_SUBST(DEPLIBS) +CopySrcHeader(dyn.h,[$](BUILDTOP)/include) +V5_AC_OUTPUT_MAKEFILE diff --git a/src/util/dyn/dyn.3m b/src/util/dyn/dyn.3m new file mode 100644 index 0000000000..21ae70bb2f --- /dev/null +++ b/src/util/dyn/dyn.3m @@ -0,0 +1,198 @@ + + + +15 March 1990 DYN(3M) + + + +NAME + dyn - the C Dynamic Object library + + +DESCRIPTION + A C Dynamic Object is an array that takes care of resizing + itself as you add and delete elements from it. It can be of + any type for which sizeof is defined and for which an + address of a variable of that type can be passed to a func- + tion. The library containing the functions described below + is called _l_i_b_d_y_n._a, and the necessary declarations to use + them are in <_d_y_n._h>. + + A DynObject is actually a structure that contains an array + and a couple of integers to maintain necessary state infor- + mation. When a Dyn function is said to operate on "the + object" or "the array", it is operating on the array stored + in the structure while at the same time updating internal + state information. + + +LIST OF FUNCTIONS + DynObject DynCreate(size, increment) + int size, increment; + + _R_e_q_u_i_r_e_s: _s_i_z_e and _i_n_c_r_e_m_e_n_t are greater than zero. + + _E_f_f_e_c_t_s: Creates a new DynObject that will store elements of + size _s_i_z_e and will allocate memory in blocks large enough to + hold exactly _i_n_c_r_e_m_e_n_t elements. For example, if you are + storing 8-byte double precision numbers and _i_n_c_r_e_m_e_n_t is 5, + each 5th element you add to the object will cause it to + request 40 more bytes (8 * 5) from the operating system. If + _i_n_c_r_e_m_e_n_t is zero, a default value is used (currently 100). + This is the only time the programmer deals with a dynamic + object's memory allocation. + + _R_e_t_u_r_n_s: DynCreate returns the new DynObject, or NULL if + there is insufficient memory. + + int DynDestroy(obj) + DynObject obj; + + _M_o_d_i_f_i_e_s: obj + + _E_f_f_e_c_t_s: Frees all memory associated with _o_b_j. The results + of calling any Dyn function on a destroyed object are unde- + fined (except for DynCreate, which resets the object). + + _R_e_t_u_r_n_s: DynDestroy returns DYN_OK. + + + + + + 1 + + + + + + +DYN(3M) 15 March 1990 + + + + int DynAdd(obj, el) + DynObject obj; + DynPtr el; + + _M_o_d_i_f_i_e_s: obj + + _E_f_f_e_c_t_s: Adds the element pointed to by _e_l to the object + _o_b_j, resizing the object if necessary. The new element + becomes the last element in obj's array. + + _R_e_t_u_r_n_s: DynAdd returns DYN_OK on success or DYN_NOMEM if + there is insufficient memory. + + int DynInsert(obj, index, els, num) + DynObject obj; + DynPtr els; + int index, num; + + _M_o_d_i_f_i_e_s: obj + + _E_f_f_e_c_t_s: Inserts the array of _n_u_m elements, pointed to by + _e_l_s, into the object _o_b_j starting at the array location + _i_n_d_e_x, resizing the object if necessary. Order is + preserved; if you have the array "1 2 3 4 5" and insert "10 + 11 12" at the third position, you will have the array "1 2 + 10 11 12 3 4 5". + + _R_e_t_u_r_n_s: DynInsert returns DYN_BADINDEX if _i_n_d_e_x is not + between 0 and DynSize(obj); DYN_BADVALUE if _n_u_m is less than + 1; DYN_NOMEM if there is insufficient memory. + + int DynGet(obj, index) + DynObject obj; + int index; + + _E_f_f_e_c_t_s: Returns the address of the element _i_n_d_e_x in the + array of _o_b_j. This pointer can be treated as a normal array + of the type specified to DynCreate. The order of elements + in this array is the order in which they were added to the + object. The returned pointer is guaranteed to be valid only + until obj is modified. + + _R_e_t_u_r_n_s: DynGet returns NULL if _i_n_d_e_x is larger than the + number of elements in the array of less than zero. + + int DynDelete(obj, index) + DynObject obj; + int index; + + _M_o_d_i_f_i_e_s: obj + + + + + +2 + + + + + + +15 March 1990 DYN(3M) + + + + _E_f_f_e_c_t_s: The element _i_n_d_e_x is deleted from the object _o_b_j. + Note that the element is actually removed permanently from + the array. If you have the array "1 2 3 4 5" and delete the + third element, you will have the array "1 2 4 5". The order + of elements in not affected. + + _R_e_t_u_r_n_s: DynDelete will return DYN_OK on success or + DYN_BADINDEX if the element _i_n_d_e_x does not exist in the + array or is less than zero. + + int DynSize(obj) + DynObject obj; + + _E_f_f_e_c_t_s: Returns the number of elements in the object _o_b_j. + + int DynHigh(obj) + DynObject obj; + + _E_f_f_e_c_t_s: Returns the index of the highest element in the + object _o_b_j. In this version, DynHigh is macro that expands + to DynSize - 1. + + int DynLow(obj) + DynObject obj; + + _E_f_f_e_c_t_s: Returns the index of the lowest element in the + object _o_b_j. In this version, DynLow is macro that expands + to 0. + + int DynDebug(obj, state) + DynObject obj; + int state; + + _M_o_d_i_f_i_e_s: obj + + _E_f_f_e_c_t_s: Sets the debugging state of _o_b_j to _s_t_a_t_e and prints + a message on stderr saying what state debugging was set to. + Any non-zero value for _s_t_a_t_e turns debugging ``on''. When + debugging is on, all Dyn functions will produce (hopefully + useful) output to stderr describing what is going on. + + _R_e_t_u_r_n_s: DynDebug returns DYN_OK. + +AUTHOR + Barr3y Jaspan, Student Information Processing Board (SIPB) + and MIT-Project Athena, bjaspan@athena.mit.edu + + + + + + + + + + 3 + + + diff --git a/src/util/dyn/dyn_append.c b/src/util/dyn/dyn_append.c new file mode 100644 index 0000000000..81403ece70 --- /dev/null +++ b/src/util/dyn/dyn_append.c @@ -0,0 +1,26 @@ +/* + * This file is part of libdyn.a, the C Dynamic Object library. It + * contains the source code for the function DynAppend(). + * + * There are no restrictions on this code; however, if you make any + * changes, I request that you document them so that I do not get + * credit or blame for your modifications. + * + * Written by Barr3y Jaspan, Student Information Processing Board (SIPB) + * and MIT-Project Athena, 1989. + */ + +#include <stdio.h> + +#include "dynP.h" + +/* + * Made obsolete by DynInsert, now just a convenience function. + */ +int DynAppend(obj, els, num) + DynObjectP obj; + DynPtr els; + int num; +{ + return DynInsert(obj, DynSize(obj), els, num); +} diff --git a/src/util/dyn/dyn_initzero.c b/src/util/dyn/dyn_initzero.c new file mode 100644 index 0000000000..3949f30ec4 --- /dev/null +++ b/src/util/dyn/dyn_initzero.c @@ -0,0 +1,26 @@ +/* + * This file is part of libdyn.a, the C Dynamic Object library. It + * contains the source code for the function DynInitZero(). + * + * There are no restrictions on this code; however, if you make any + * changes, I request that you document them so that I do not get + * credit or blame for your modifications. + * + * Written by Barr3y Jaspan, Student Information Processing Board (SIPB) + * and MIT-Project Athena, 1989. + */ + +#include <stdio.h> + +#include "dynP.h" + +int DynInitzero(obj, state) + DynObjectP obj; + int state; +{ + obj->initzero = state; + + if (obj->debug) + fprintf(stderr, "dyn: initzero: initzero set to %d.\n", state); + return DYN_OK; +} diff --git a/src/util/dyn/dyn_insert.c b/src/util/dyn/dyn_insert.c new file mode 100644 index 0000000000..5654e935d1 --- /dev/null +++ b/src/util/dyn/dyn_insert.c @@ -0,0 +1,70 @@ +/* + * This file is part of libdyn.a, the C Dynamic Object library. It + * contains the source code for the function DynInsert(). + * + * There are no restrictions on this code; however, if you make any + * changes, I request that you document them so that I do not get + * credit or blame for your modifications. + * + * Written by Barr3y Jaspan, Student Information Processing Board (SIPB) + * and MIT-Project Athena, 1989. + */ + +#include <stdio.h> +#include "dynP.h" + +int DynInsert(obj, idx, els_in, num) + DynObjectP obj; + void *els_in; + int idx, num; +{ + DynPtr els = (DynPtr) els_in; + int ret; + + if (idx < 0 || idx > obj->num_el) { + if (obj->debug) + fprintf(stderr, "dyn: insert: index %d is not in [0,%d]\n", + idx, obj->num_el); + return DYN_BADINDEX; + } + + if (num < 1) { + if (obj->debug) + fprintf(stderr, "dyn: insert: cannot insert %d elements\n", + num); + return DYN_BADVALUE; + } + + if (obj->debug) + fprintf(stderr,"dyn: insert: Moving %d bytes from %d + %d to + %d\n", + (obj->num_el-idx)*obj->el_size, obj->array, + obj->el_size*idx, obj->el_size*(idx+num)); + + if ((ret = _DynResize(obj, obj->num_el + num)) != DYN_OK) + return ret; +#ifdef HAVE_MEMMOVE + memmove(obj->array + obj->el_size*(idx + num), + obj->array + obj->el_size*idx, + (obj->num_el-idx)*obj->el_size); +#else + bcopy(obj->array + obj->el_size*idx, + obj->array + obj->el_size*(idx + num), + (obj->num_el-idx)*obj->el_size); +#endif + + if (obj->debug) + fprintf(stderr, "dyn: insert: Copying %d bytes from %d to %d + %d\n", + obj->el_size*num, els, obj->array, obj->el_size*idx); + +#ifdef HAVE_MEMMOVE + memmove(obj->array + obj->el_size*idx, els, obj->el_size*num); +#else + bcopy(els, obj->array + obj->el_size*idx, obj->el_size*num); +#endif + obj->num_el += num; + + if (obj->debug) + fprintf(stderr, "dyn: insert: done.\n"); + + return DYN_OK; +} diff --git a/src/util/dyn/dyn_paranoid.c b/src/util/dyn/dyn_paranoid.c new file mode 100644 index 0000000000..7eb750a21a --- /dev/null +++ b/src/util/dyn/dyn_paranoid.c @@ -0,0 +1,26 @@ +/* + * This file is part of libdyn.a, the C Dynamic Object library. It + * contains the source code for the function DynDebug(). + * + * There are no restrictions on this code; however, if you make any + * changes, I request that you document them so that I do not get + * credit or blame for your modifications. + * + * Written by Barr3y Jaspan, Student Information Processing Board (SIPB) + * and MIT-Project Athena, 1989. + */ + +#include <stdio.h> + +#include "dynP.h" + +int DynParanoid(obj, state) + DynObjectP obj; + int state; +{ + obj->paranoid = state; + + if (obj->debug) + fprintf(stderr, "dyn: paranoid: Paranoia set to %d.\n", state); + return DYN_OK; +} diff --git a/src/util/dyn/dyn_realloc.c b/src/util/dyn/dyn_realloc.c new file mode 100644 index 0000000000..31e3975b57 --- /dev/null +++ b/src/util/dyn/dyn_realloc.c @@ -0,0 +1,88 @@ +/* + * This file is part of libdyn.a, the C Dynamic Object library. It + * contains the source code for the internal function _DynRealloc(). + * + * There are no restrictions on this code; however, if you make any + * changes, I request that you document them so that I do not get + * credit or blame for your modifications. + * + * Written by Barr3y Jaspan, Student Information Processing Board (SIPB) + * and MIT-Project Athena, 1989. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "dynP.h" + +/* + * Resize the array so that element req exists. + */ +int _DynResize(obj, req) + DynObjectP obj; + int req; +{ + int cnt, size; + + if (obj->size > req) + return DYN_OK; + else if (obj->inc > 0) + return _DynRealloc(obj, (req - obj->size) / obj->inc + 1); + else { + if (obj->size == 0) + size = -obj->inc; + else + size = obj->size; + + while (size <= req) + size <<= 1; + + return _DynRealloc(obj, size); + } +} + +/* + * Resize the array by num_incs units. If obj->inc is positive, this + * means make it obj->inc*num_incs elements larger. If obj->inc is + * negative, this means make the array num_incs elements long. + * + * Ideally, this function should not be called from outside the + * library. However, nothing will break if it is. + */ +int _DynRealloc(obj, num_incs) + DynObjectP obj; + int num_incs; +{ + DynPtr temp; + int new_size_in_bytes; + + if (obj->inc > 0) + new_size_in_bytes = obj->el_size*(obj->size + obj->inc*num_incs); + else + new_size_in_bytes = obj->el_size*num_incs; + + if (obj->debug) + fprintf(stderr, + "dyn: alloc: Increasing object by %d bytes (%d incs).\n", + new_size_in_bytes - obj->el_size*obj->size, + num_incs); + + temp = (DynPtr) realloc(obj->array, new_size_in_bytes); + if (temp == NULL) { + if (obj->debug) + fprintf(stderr, "dyn: alloc: Out of memory.\n"); + return DYN_NOMEM; + } + else { + obj->array = temp; + if (obj->inc > 0) + obj->size += obj->inc*num_incs; + else + obj->size = num_incs; + } + + if (obj->debug) + fprintf(stderr, "dyn: alloc: done.\n"); + + return DYN_OK; +} diff --git a/src/util/et/ChangeLog b/src/util/et/ChangeLog index 34cc96338a..233b5b9d7a 100644 --- a/src/util/et/ChangeLog +++ b/src/util/et/ChangeLog @@ -14,6 +14,10 @@ Mon Jun 10 21:54:09 1996 Theodore Ts'o <tytso@rsts-11.mit.edu> * vfprintf.c, internal.h, compile_et.c, et_c.awk, com_err.c: Change _WINDOWS to _MSDOS, and add check for _WIN32. +Sun May 12 01:13:02 1996 Marc Horowitz <marc@mit.edu> + + * et_c.awk: deal with continuations in the input .et file. + Wed Mar 20 00:19:08 1996 Theodore Y. Ts'o <tytso@dcl> * Makefile.in (SRCS): Fix SRCS definition so that it doesn't fool diff --git a/src/util/et/et_c.awk b/src/util/et/et_c.awk index 8067e71905..ac1321cd06 100644 --- a/src/util/et/et_c.awk +++ b/src/util/et/et_c.awk @@ -120,7 +120,18 @@ c2n["_"]=63 table_item_count = 0 } +(continuation == 1) && ($0 ~ /\\[ \t]*$/) { + text=substr($0,1,length($0)-1); + printf "\t\t\"%s\"\n", text > outfile +} + +(continuation == 1) && ($0 ~ /"[ \t]*$/) { + printf "\t\t\"%s,\n", $0 > outfile + continuation = 0; +} + /^[ \t]*(error_code|ec)[ \t]+[A-Z_0-9]+,[ \t]*$/ { + table_item_count++ skipone=1 next } @@ -135,10 +146,28 @@ c2n["_"]=63 table_item_count++ } +/^[ \t]*(error_code|ec)[ \t]+[A-Z_0-9]+,[ \t]*".*\\[ \t]*$/ { + text="" + for (i=3; i<=NF; i++) { + text = text FS $i + } + text=substr(text,2,length(text)-2); + printf "\t%s\"\n", text > outfile + continuation++; +} + +/^[ \t]*".*\\[ \t]*$/ { + if (skipone) { + text=substr($0,1,length($0)-1); + printf "\t%s\"\n", text > outfile + continuation++; + } + skipone=0 +} + { if (skipone) { printf "\t%s,\n", $0 > outfile - table_item_count++ } skipone=0 } |
