From 65e3812ed0f8d579d442d6115ec4eed7c9650f94 Mon Sep 17 00:00:00 2001
From: William Brown <firstyear@redhat.com>
Date: Fri, 23 Jun 2017 11:06:00 +1000
Subject: [PATCH 1/2] Ticket 49290 - improve idl handling in complex searches

Bug Description:  Previously we performed iterative set manipulations
during searches. For example, (&(a)(b)(c)(d)) would result in:

t1 = intersect(a, b)
t2 = intersect(t1, c)
t3 = intersect(t2, d)

This is similar for or with union. The issue is that if the candidate
sets were large (until d), then we would allocate and duplicate a large
set of ID's repeatedly, until the set was reduced in the third step.

The same is true for unions, but the set would continue to grow. As well
due to the sorted nature of the IDList, we would perform an O(m + n)
merge of the arrays, potentially many times. This would be amplified
the more terms added.

Especially in the and case, this could result in significant time
differences if you move smaller candidate sets to the start of the
query, and for the or case, would result in exponential time
increases by adding extra terms.

Fix Description:  To resolve this, rather than processing each
set in an interative fashion, we store each subfilters result in
the new idl_set structure. When we have collected each candidate
set we can then perform better intersections. In the case of the
and query, we can and the *smallest* candidate to each term first,
which significantly reduces the number of comparisons and copies.

A benefit of this change, is that future improvements to not/or
queries are easier to make because we now have "perfect" knowledge
of all the IDLists involved, rather than just pairs at a time.

https://pagure.io/389-ds-base/issue/49290

Author: wibrown

Review by: ???
---
 Makefile.am                                        |   1 +
 .../tests/suites/dynamic_plugins/plugin_tests.py   |   2 +-
 dirsrvtests/tests/suites/filter/filter_logic.py    | 153 +++++++++++
 .../tests/suites/replication/cleanallruv_test.py   |   2 +-
 dirsrvtests/tests/tickets/ticket48252_test.py      |  53 ++--
 ldap/servers/slapd/back-ldbm/back-ldbm.h           |  39 ++-
 ldap/servers/slapd/back-ldbm/filterindex.c         | 162 +++++++-----
 ldap/servers/slapd/back-ldbm/idl_common.c          | 124 ++++++---
 ldap/servers/slapd/back-ldbm/idl_new.c             |  26 +-
 ldap/servers/slapd/back-ldbm/idl_set.c             | 282 +++++++++++++++++++++
 ldap/servers/slapd/back-ldbm/index.c               |  44 +++-
 ldap/servers/slapd/back-ldbm/proto-back-ldbm.h     |  14 +
 12 files changed, 746 insertions(+), 156 deletions(-)
 create mode 100644 dirsrvtests/tests/suites/filter/filter_logic.py
 create mode 100644 ldap/servers/slapd/back-ldbm/idl_set.c

diff --git a/Makefile.am b/Makefile.am
index f075a47..6afa3fd 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1262,6 +1262,7 @@ libback_ldbm_la_SOURCES = ldap/servers/slapd/back-ldbm/ancestorid.c \
 	ldap/servers/slapd/back-ldbm/idl.c \
 	ldap/servers/slapd/back-ldbm/idl_shim.c \
 	ldap/servers/slapd/back-ldbm/idl_new.c \
+	ldap/servers/slapd/back-ldbm/idl_set.c \
 	ldap/servers/slapd/back-ldbm/idl_common.c \
 	ldap/servers/slapd/back-ldbm/import.c \
 	ldap/servers/slapd/back-ldbm/import-merge.c \
diff --git a/dirsrvtests/tests/suites/dynamic_plugins/plugin_tests.py b/dirsrvtests/tests/suites/dynamic_plugins/plugin_tests.py
index f17ec53..15c4c0a 100644
--- a/dirsrvtests/tests/suites/dynamic_plugins/plugin_tests.py
+++ b/dirsrvtests/tests/suites/dynamic_plugins/plugin_tests.py
@@ -22,7 +22,7 @@ from lib389._constants import (DEFAULT_SUFFIX, PLUGIN_ACCT_USABILITY, PLUGIN_ACC
                               PLUGIN_LINKED_ATTRS, PLUGIN_MEMBER_OF,
                               PLUGIN_MANAGED_ENTRY, PLUGIN_PASSTHRU, PLUGIN_REFER_INTEGRITY,
                               PLUGIN_RETRO_CHANGELOG, PLUGIN_ROOTDN_ACCESS, DN_DM, PASSWORD,
-                              LOCALHOST)
+                              LOCALHOST, args_instance)
 
 log = logging.getLogger(__name__)
 
diff --git a/dirsrvtests/tests/suites/filter/filter_logic.py b/dirsrvtests/tests/suites/filter/filter_logic.py
new file mode 100644
index 0000000..ab3d632
--- /dev/null
+++ b/dirsrvtests/tests/suites/filter/filter_logic.py
@@ -0,0 +1,153 @@
+# --- BEGIN COPYRIGHT BLOCK ---
+# Copyright (C) 2017 Red Hat, Inc.
+# All rights reserved.
+#
+# License: GPL (version 3 or any later version).
+# See LICENSE for details.
+# --- END COPYRIGHT BLOCK ---
+#
+
+import pytest
+import ldap
+
+from lib389.topologies import topology_st
+from lib389._constants import DEFAULT_SUFFIX
+
+from lib389.idm.user import UserAccounts
+
+"""
+This test case asserts that various logical filters apply correctly and as expected.
+This is to assert that we have correct and working search operations, especially related
+to indexed content from filterindex.c and idl_sets.
+"""
+
+USER0_DN = 'uid=user0,ou=People,%s' % DEFAULT_SUFFIX
+USER1_DN = 'uid=user1,ou=People,%s' % DEFAULT_SUFFIX
+USER2_DN = 'uid=user2,ou=People,%s' % DEFAULT_SUFFIX
+USER3_DN = 'uid=user3,ou=People,%s' % DEFAULT_SUFFIX
+USER4_DN = 'uid=user4,ou=People,%s' % DEFAULT_SUFFIX
+USER5_DN = 'uid=user5,ou=People,%s' % DEFAULT_SUFFIX
+USER6_DN = 'uid=user6,ou=People,%s' % DEFAULT_SUFFIX
+USER7_DN = 'uid=user7,ou=People,%s' % DEFAULT_SUFFIX
+USER8_DN = 'uid=user8,ou=People,%s' % DEFAULT_SUFFIX
+USER9_DN = 'uid=user9,ou=People,%s' % DEFAULT_SUFFIX
+
+@pytest.fixture(scope="module")
+def topology_st_f(topology_st):
+    # Add our users to the topology_st
+    users = UserAccounts(topology_st.standalone, DEFAULT_SUFFIX)
+    for i in range(0, 10):
+        users.create(properties={
+            'uid': 'user%s' % i,
+            'cn': 'user%s' % i,
+            'sn': '%s' % i,
+            'uidNumber': '%s' % i,
+            'gidNumber': '%s' % i,
+            'homeDirectory': '/home/user%s' % i
+        })
+    # return it
+    # print("ATTACH NOW")
+    # import time
+    # time.sleep(30)
+    return topology_st.standalone
+
+def _check_filter(topology_st_f, filt, expect_len, expect_dns):
+    # print("checking %s" % filt)
+    results = topology_st_f.search_s("ou=People,%s" % DEFAULT_SUFFIX, ldap.SCOPE_ONELEVEL, filt, ['uid',])
+    assert len(results) == expect_len
+    result_dns = [result.dn for result in results]
+    assert set(expect_dns) == set(result_dns)
+
+def test_filter_logic_eq(topology_st_f):
+    _check_filter(topology_st_f, '(uid=user0)', 1, [USER0_DN])
+
+def test_filter_logic_sub(topology_st_f):
+    _check_filter(topology_st_f, '(uid=user*)', 10, [
+            USER0_DN, USER1_DN, USER2_DN, USER3_DN, USER4_DN,
+            USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+
+def test_filter_logic_not_eq(topology_st_f):
+    _check_filter(topology_st_f, '(!(uid=user0))', 9, [
+            USER1_DN, USER2_DN, USER3_DN, USER4_DN, USER5_DN,
+            USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+
+# More not cases?
+
+def test_filter_logic_range(topology_st_f):
+    _check_filter(topology_st_f, '(uid>=user5)', 5, [
+            USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+    _check_filter(topology_st_f, '(uid<=user4)', 5, [
+            USER0_DN, USER1_DN, USER2_DN, USER3_DN, USER4_DN
+        ])
+    _check_filter(topology_st_f, '(uid>=ZZZZ)', 0, [])
+    _check_filter(topology_st_f, '(uid<=aaaa)', 0, [])
+
+def test_filter_logic_and_eq(topology_st_f):
+    _check_filter(topology_st_f, '(&(uid=user0)(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(&(uid=user0)(cn=user1))', 0, [])
+
+def test_filter_logic_and_range(topology_st_f):
+    _check_filter(topology_st_f, '(&(uid>=user5)(cn<=user7))', 3, [
+        USER5_DN, USER6_DN, USER7_DN
+        ])
+
+def test_filter_logic_or_eq(topology_st_f):
+    _check_filter(topology_st_f, '(|(uid=user0)(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(|(uid=user0)(uid=user1))', 2, [USER0_DN, USER1_DN])
+
+def test_filter_logic_and_not_eq(topology_st_f):
+    _check_filter(topology_st_f, '(&(uid=user0)(!(cn=user0)))', 0, [])
+    _check_filter(topology_st_f, '(&(uid=*)(!(uid=user0)))', 9, [
+            USER1_DN, USER2_DN, USER3_DN, USER4_DN, USER5_DN,
+            USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+
+def test_filter_logic_or_not_eq(topology_st_f):
+    _check_filter(topology_st_f, '(|(!(uid=user0))(!(uid=user1)))', 10, [
+            USER0_DN, USER1_DN, USER2_DN, USER3_DN, USER4_DN,
+            USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+
+def test_filter_logic_and_range(topology_st_f):
+    _check_filter(topology_st_f, '(&(uid>=user5)(uid=user6))', 1, [USER6_DN])
+    _check_filter(topology_st_f, '(&(uid>=user5)(uid=user0))', 0, [])
+
+def test_filter_logic_or_range(topology_st_f):
+    _check_filter(topology_st_f, '(|(uid>=user5)(uid=user6))', 5, [
+            USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+    _check_filter(topology_st_f, '(|(uid>=user5)(uid=user0))', 6, [
+            USER0_DN,
+            USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN
+        ])
+
+def test_filter_logic_and_and_eq(topology_st_f):
+    _check_filter(topology_st_f, '(&(&(uid=user0)(sn=0))(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(&(&(uid=user1)(sn=0))(cn=user0))', 0, [])
+    _check_filter(topology_st_f, '(&(&(uid=user0)(sn=1))(cn=user0))', 0, [])
+    _check_filter(topology_st_f, '(&(&(uid=user0)(sn=0))(cn=user1))', 0, [])
+
+def test_filter_logic_or_or_eq(topology_st_f):
+    _check_filter(topology_st_f, '(|(|(uid=user0)(sn=0))(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(|(|(uid=user1)(sn=0))(cn=user0))', 2, [USER0_DN, USER1_DN])
+    _check_filter(topology_st_f, '(|(|(uid=user0)(sn=1))(cn=user0))', 2, [USER0_DN, USER1_DN])
+    _check_filter(topology_st_f, '(|(|(uid=user0)(sn=0))(cn=user1))', 2, [USER0_DN, USER1_DN])
+    _check_filter(topology_st_f, '(|(|(uid=user0)(sn=1))(cn=user2))', 3, [USER0_DN, USER1_DN, USER2_DN])
+
+def test_filter_logic_and_or_eq(topology_st_f):
+    _check_filter(topology_st_f, '(&(|(uid=user0)(sn=0))(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(&(|(uid=user1)(sn=0))(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(&(|(uid=user0)(sn=1))(cn=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(&(|(uid=user0)(sn=0))(cn=user1))', 0, [])
+    _check_filter(topology_st_f, '(&(|(uid=user0)(sn=1))(cn=*))', 2, [USER0_DN, USER1_DN])
+
+def test_filter_logic_or_and_eq(topology_st_f):
+    _check_filter(topology_st_f, '(|(&(uid=user0)(sn=0))(uid=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(|(&(uid=user1)(sn=2))(uid=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(|(&(uid=user0)(sn=1))(uid=user0))', 1, [USER0_DN])
+    _check_filter(topology_st_f, '(|(&(uid=user1)(sn=1))(uid=user0))', 2, [USER0_DN, USER1_DN])
+
+
diff --git a/dirsrvtests/tests/suites/replication/cleanallruv_test.py b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
index cd7aa21..a5a3673 100644
--- a/dirsrvtests/tests/suites/replication/cleanallruv_test.py
+++ b/dirsrvtests/tests/suites/replication/cleanallruv_test.py
@@ -18,7 +18,7 @@ from lib389._constants import (DEFAULT_SUFFIX, REPLICA_RUV_FILTER, REPLICAROLE_M
                               REPLICAID_MASTER_1, REPLICATION_BIND_DN, REPLICATION_BIND_PW,
                               REPLICATION_BIND_METHOD, REPLICATION_TRANSPORT, SUFFIX,
                               RA_NAME, RA_BINDDN, RA_BINDPW, RA_METHOD, RA_TRANSPORT_PROT,
-                              defaultProperties)
+                              defaultProperties, args_instance)
 
 from lib389 import DirSrv
 
diff --git a/dirsrvtests/tests/tickets/ticket48252_test.py b/dirsrvtests/tests/tickets/ticket48252_test.py
index ac88d37..d8d25e8 100644
--- a/dirsrvtests/tests/tickets/ticket48252_test.py
+++ b/dirsrvtests/tests/tickets/ticket48252_test.py
@@ -11,6 +11,7 @@ import logging
 import pytest
 from lib389.tasks import *
 from lib389.topologies import topology_st
+from lib389.idm.user import UserAccounts
 
 from lib389._constants import DEFAULT_SUFFIX, SUFFIX, DEFAULT_BENAME, PLUGIN_USN
 
@@ -21,6 +22,7 @@ USER_NUM = 10
 TEST_USER = "test_user"
 
 
+
 def test_ticket48252_setup(topology_st):
     """
     Enable USN plug-in for enabling tombstones
@@ -35,12 +37,16 @@ def test_ticket48252_setup(topology_st):
         assert False
 
     log.info("Adding test entries...")
-    for id in range(USER_NUM):
-        name = "%s%d" % (TEST_USER, id)
-        topology_st.standalone.add_s(Entry(("cn=%s,%s" % (name, SUFFIX), {
-            'objectclass': "top person".split(),
-            'sn': name,
-            'cn': name})))
+    ua = UserAccounts(topology_st.standalone, DEFAULT_SUFFIX)
+    for i in range(USER_NUM):
+        ua.create(properties={
+                'uid': "%s%d" % (TEST_USER, i),
+                'cn' : "%s%d" % (TEST_USER, i),
+                'sn' : 'user',
+                'uidNumber' : '1000',
+                'gidNumber' : '2000',
+                'homeDirectory' : '/home/testuser'
+            })
 
 
 def in_index_file(topology_st, id, index):
@@ -64,10 +70,11 @@ def test_ticket48252_run_0(topology_st):
     Check it is not in the 'cn' index file
     """
     log.info("Case 1 - Check deleted entry is not in the 'cn' index file")
+    uas = UserAccounts(topology_st.standalone, DEFAULT_SUFFIX)
     del_rdn = "cn=%s0" % TEST_USER
-    del_entry = "%s,%s" % (del_rdn, SUFFIX)
+    del_entry = uas.get('%s0' % TEST_USER)
     log.info("	Deleting a test entry %s..." % del_entry)
-    topology_st.standalone.delete_s(del_entry)
+    del_entry.delete()
 
     assert in_index_file(topology_st, 0, 'cn') == False
 
@@ -75,7 +82,7 @@ def test_ticket48252_run_0(topology_st):
     assert topology_st.standalone.db2index(DEFAULT_BENAME, 'cn')
 
     assert in_index_file(topology_st, 0, 'cn') == False
-    log.info("	entry %s is not in the cn index file after reindexed." % del_entry)
+    log.info("	entry %s is not in the cn index file after reindexed." % del_rdn)
     log.info('Case 1 - PASSED')
 
 
@@ -85,21 +92,35 @@ def test_ticket48252_run_1(topology_st):
     Check it is in the 'objectclass' index file as a tombstone entry
     """
     log.info("Case 2 - Check deleted entry is in the 'objectclass' index file as a tombstone entry")
+    uas = UserAccounts(topology_st.standalone, DEFAULT_SUFFIX)
     del_rdn = "cn=%s1" % TEST_USER
-    del_entry = "%s,%s" % (del_rdn, SUFFIX)
-    log.info("	Deleting a test entry %s..." % del_entry)
-    topology_st.standalone.delete_s(del_entry)
+    del_entry = uas.get('%s1' % TEST_USER)
+    log.info("	Deleting a test entry %s..." % del_rdn)
 
-    entry = topology_st.standalone.search_s(SUFFIX, ldap.SCOPE_SUBTREE, '(&(objectclass=nstombstone)(%s))' % del_rdn)
+    del_uniqueid = del_entry.get_attr_val_utf8('nsuniqueid')
+
+    del_entry.delete()
+
+    ## Note: you can't search by nstombstone and other indexed types - it won't work!
+    # need to use the nsuniqueid.
+
+    entry = topology_st.standalone.search_s(SUFFIX, ldap.SCOPE_SUBTREE, '(&(objectclass=nstombstone)(nsuniqueid=%s))' % del_uniqueid)
     assert len(entry) == 1
-    log.info("	entry %s is in the objectclass index file." % del_entry)
+    log.info("	entry %s is in the objectclass index file." % del_rdn)
+    entry = topology_st.standalone.search_s(SUFFIX, ldap.SCOPE_SUBTREE, '(&(objectclass=nstombstone)(%s))' % del_rdn)
+    assert len(entry) == 0
+    log.info("	entry %s is correctly not in the cn index file before reindexing." % del_rdn)
 
     log.info("	db2index - reindexing %s ..." % 'objectclass')
     assert topology_st.standalone.db2index(DEFAULT_BENAME, 'objectclass')
 
-    entry = topology_st.standalone.search_s(SUFFIX, ldap.SCOPE_SUBTREE, '(&(objectclass=nstombstone)(%s))' % del_rdn)
+    entry = topology_st.standalone.search_s(SUFFIX, ldap.SCOPE_SUBTREE, '(&(objectclass=nstombstone)(nsuniqueid=%s))' % del_uniqueid)
     assert len(entry) == 1
-    log.info("	entry %s is in the objectclass index file after reindexed." % del_entry)
+    log.info("	entry %s is in the objectclass index file after reindexing." % del_rdn)
+    entry = topology_st.standalone.search_s(SUFFIX, ldap.SCOPE_SUBTREE, '(&(objectclass=nstombstone)(%s))' % del_rdn)
+    assert len(entry) == 0
+    log.info("	entry %s is correctly not in the cn index file after reindexing." % del_rdn)
+
     log.info('Case 2 - PASSED')
 
 
diff --git a/ldap/servers/slapd/back-ldbm/back-ldbm.h b/ldap/servers/slapd/back-ldbm/back-ldbm.h
index 88a91ed..acf5aa6 100644
--- a/ldap/servers/slapd/back-ldbm/back-ldbm.h
+++ b/ldap/servers/slapd/back-ldbm/back-ldbm.h
@@ -237,7 +237,7 @@ typedef u_int32_t	ID;
 /*
  * Use this to count and index into an array of ID.
  */
-typedef u_int32_t	NIDS;
+typedef uint32_t NIDS;
 
 /*
  * This structure represents an id block on disk and an id list
@@ -245,22 +245,35 @@ typedef u_int32_t	NIDS;
  *
  * The fields have the following meanings:
  *
- *	b_nmax	maximum number of ids in this block. if this is == ALLIDSBLOCK,
- *		then this block represents all ids.
- *	b_nids	current number of ids in use in this block.  if this
- *		is == INDBLOCK, then this block is an indirect block
- *		containing a list of other blocks containing actual ids.
- *		the list is terminated by an id of NOID.
- *	b_ids	a list of the actual ids themselves
+ *  b_nmax  maximum number of ids in this block. if this is == ALLIDSBLOCK,
+ *      then this block represents all ids.
+ *  b_nids  current number of ids in use in this block.  if this
+ *      is == INDBLOCK, then this block is an indirect block
+ *      containing a list of other blocks containing actual ids.
+ *      the list is terminated by an id of NOID.
+ *  b_ids   a list of the actual ids themselves
  */
+#define ALLIDSBLOCK 0       /* == 0 => this is an allid block  */
+#define INDBLOCK    0       /* == 0 => this is an indirect blk */
+
 typedef struct block {
-	NIDS		b_nmax;		/* max number of ids in this list  */
-#define ALLIDSBLOCK	0		/* == 0 => this is an allid block  */
-	NIDS		b_nids;		/* current number of ids used	   */
-#define INDBLOCK	0		/* == 0 => this is an indirect blk */
-	ID		b_ids[1];	/* the ids - actually bigger 	   */
+    NIDS        b_nmax;     /* max number of ids in this list  */
+    NIDS        b_nids;     /* current number of ids used      */
+    struct  block *next; /* Pointer to the next block in the list
+                         * used by idl_set
+                         */
+    ID      b_ids[1];   /* the ids - actually bigger       */
 } Block, IDList;
 
+typedef struct _idlist_set {
+    int64_t count;
+    int64_t allids;
+    int64_t emptyset;
+    IDList *head;
+    IDList *tail;
+    IDList *complement_head;
+} IDListSet;
+
 #define ALLIDS( idl )		((idl)->b_nmax == ALLIDSBLOCK)
 #define INDIRECT_BLOCK( idl )	((idl)->b_nids == INDBLOCK)
 #define IDL_NIDS(idl)           (idl ? (idl)->b_nids : (NIDS)0)
diff --git a/ldap/servers/slapd/back-ldbm/filterindex.c b/ldap/servers/slapd/back-ldbm/filterindex.c
index 6af8e09..991923a 100644
--- a/ldap/servers/slapd/back-ldbm/filterindex.c
+++ b/ldap/servers/slapd/back-ldbm/filterindex.c
@@ -652,7 +652,8 @@ list_candidates(
     int           allidslimit
 )
 {
-    IDList        *idl, *tmp, *tmp2;
+    IDList        *idl;
+    IDList        *tmp;
     Slapi_Filter  *f, *nextf, *f_head;
     int           range = 0;
     int           isnot;
@@ -663,6 +664,7 @@ list_candidates(
     char          *tpairs[2] = {NULL, NULL};
     struct berval *vpairs[2] = {NULL, NULL};
     int is_and = 0;
+    IDListSet *idl_set = NULL;
 
     slapi_log_err(SLAPI_LOG_TRACE, "list_candidates", "=> 0x%x\n", ftype);
 
@@ -766,6 +768,10 @@ list_candidates(
         goto out;
     }
 
+    if  (ftype == LDAP_FILTER_OR || ftype == LDAP_FILTER_AND) {
+        idl_set = idl_set_create();
+    }
+
     idl = NULL;
     nextf = NULL;
     isnot = 0;
@@ -778,15 +784,21 @@ list_candidates(
                 (LDAP_FILTER_EQUALITY == slapi_filter_get_choice(slapi_filter_list_first(f))));
 
         if (isnot) {
-            /* if this is the first filter we have an allid search anyway, so bail */
-            if(f == f_head)
-            {
+            /*
+             * If this is the first filter, make sure we have something to
+             * subtract from.
+             */
+            if (f == f_head) {
                 idl = idl_allids( be );
-                break;
+                idl_set_insert_idl(idl_set, idl);
             }
+            /*
+             * Not the first filter - good! Get the indexed type out.
+             * if this is unindexed, we'll get allids. During the intersection
+             * in notin, we'll mark this as don't skip filter, because else we might
+             * get the wrong results.
+             */
 
-            /* Fetch the IDL for foo */
-            /* Later we'll remember to call idl_notin() */
             slapi_log_err(SLAPI_LOG_TRACE, "list_candidates" ,"NOT filter\n");
             if (filter_is_subtype(slapi_filter_list_first(f))) {
                 /*
@@ -832,64 +844,79 @@ list_candidates(
             }
         }
 
-        tmp2 = idl;
-        if ( idl == NULL ) {
-            idl = tmp;
-            if ( (ftype == LDAP_FILTER_AND) && ((idl == NULL) ||
-                (idl_length(idl) <= FILTER_TEST_THRESHOLD))) {
-                break; /* We can exit the loop now, since the candidate list is small already */
-            }
-        } else if ( ftype == LDAP_FILTER_AND ) {
-            if (isnot) {
-                /*
-                 * If tmp is NULL or ALLID, idl_notin just duplicates idl.
-                 * We don't have to do it.
-                 */
-                if (!tmp && !idl_is_allids(tmp)) {
-                    IDList *new_idl = NULL;
-                    int notin_result = 0;
-                    notin_result = idl_notin( be, idl, tmp, &new_idl );
-                    if (notin_result) {
-                        idl_free(&idl);
-                        idl = new_idl;
-                    }
-                }
-            } else {
-                idl = idl_intersection(be, idl, tmp);
-                idl_free( &tmp2 );
-            }
-            idl_free( &tmp );
-            /* stop if the list has gotten too small */
-            if ((idl == NULL) ||
-                (idl_length(idl) <= FILTER_TEST_THRESHOLD))
-                break;
-        } else {
-            Slapi_Operation *operation;
-            slapi_pblock_get( pb, SLAPI_OPERATION, &operation );
+        /*
+         * Assert we recieved a valid idl. If it was NULL, it means somewhere we failed
+         * during the dblayer interactions.
+         *
+         * idl_set requires a valid idl structure to generate the linked list of
+         * idls that we insert.
+         */
+        if (tmp == NULL) {
+            slapi_log_err(SLAPI_LOG_CRIT, "list_candidates", "NULL idl was recieved from filter_candidates_ext.");
+            slapi_log_err(SLAPI_LOG_CRIT, "list_candidates", "Falling back to empty IDL set. This may affect your search results.");
+            PR_ASSERT(tmp);
+            tmp = idl_alloc(0);
+        }
 
-            idl = idl_union( be, idl, tmp );
-            idl_free( &tmp );
-            idl_free( &tmp2 );
-            /* stop if we're already committed to an exhaustive
-             * search. :(
+        /*
+         * At this point we have the idl set from the subfilter. In idl_set,
+         * we stash this for later ....
+         */
+
+        if  (ftype == LDAP_FILTER_OR ||
+            (ftype == LDAP_FILTER_AND && !isnot)) {
+            idl_set_insert_idl(idl_set, tmp);
+        } else if (ftype == LDAP_FILTER_AND && isnot) {
+            idl_set_insert_complement_idl(idl_set, tmp);
+        }
+
+        if (ftype == LDAP_FILTER_OR && idl_set_union_shortcut(idl_set) != 0) {
+            /*
+             * If we encounter an allids idl, this means that union will return
+             * and allids - we should not process anymore, and fallback to full
+             * table scan at this point.
              */
-            /* PAGED RESULTS: we strictly limit the idlist size by the allids (aka idlistscan) limit.
+            goto apply_set_op;
+        }
+
+        if (ftype == LDAP_FILTER_AND && idl_set_intersection_shortcut(idl_set) != 0) {
+            /*
+             * If we encounter a zero length idl, we bail now because this can never
+             * result in a meaningful result besides zero.
              */
+            goto apply_set_op;
+        }
+
+    }
+
+    /*
+     * Do the idl_set operation if required.
+     * these are far more efficient than the iterative union and
+     * intersections we previously used.
+     */
+apply_set_op:
+
+    if (ftype == LDAP_FILTER_OR) {
+        /* If one of the idl_set is allids, this shortcuts :) */
+        idl = idl_set_union(idl_set, be);
+        size_t nids = IDL_NIDS(idl);
+        if ( allidslimit > 0 && nids > allidslimit ) {
+            Slapi_Operation *operation;
+            slapi_pblock_get( pb, SLAPI_OPERATION, &operation );
+            /* PAGED RESULTS: we strictly limit the idlist size by the allids (aka idlistscan) limit. */
             if (op_is_pagedresults(operation)) {
-                int nids = IDL_NIDS(idl);
-                if ( allidslimit > 0 && nids > allidslimit ) {
-                    idl_free( &idl );
-                    idl = idl_allids( be );
-                }
+                idl_free( &idl );
+                idl = idl_allids( be );
             }
-            if (idl_is_allids(idl))
-                break;
         }
+    } else if (ftype == LDAP_FILTER_AND) {
+        idl = idl_set_intersect(idl_set, be);
     }
 
     slapi_log_err(SLAPI_LOG_TRACE, "list_candidates", "<= %lu\n",
                    (u_long)IDL_NIDS(idl));
 out:
+    idl_set_destroy(idl_set);
     if (is_and) {
         /*
          * Sets IS_AND back to 0 only when this function set 1.
@@ -989,14 +1016,11 @@ keys2idl(
     int         allidslimit
 )
 {
-    IDList    *idl;
-    int    i;
+    IDList    *idl = NULL;
 
-    slapi_log_err(SLAPI_LOG_TRACE, "keys2idl", "=> type %s indextype %s\n",
-        type, indextype);
-    idl = NULL;
-    for ( i = 0; ivals[i] != NULL; i++ ) {
-        IDList    *idl2;
+    slapi_log_err(SLAPI_LOG_TRACE, "keys2idl", "=> type %s indextype %s\n", type, indextype);
+    for ( size_t i = 0; ivals[i] != NULL; i++ ) {
+        IDList    *idl2 = NULL;
 
         idl2 = index_read_ext_allids( pb, be, type, indextype, slapi_value_get_berval(ivals[i]), txn, err, unindexed, allidslimit );
 
@@ -1011,23 +1035,25 @@ keys2idl(
         }
 #endif
         if ( idl2 == NULL ) {
-            idl_free( &idl );
-            idl = NULL;
-            break;
+            slapi_log_err(SLAPI_LOG_WARNING, "keys2idl", "recieved NULL idl from index_read_ext_allids, treating as empty set\n");
+            slapi_log_err(SLAPI_LOG_WARNING, "keys2idl", "this is probably a bug that should be reported\n");
+            idl2 = idl_alloc(0);
         }
 
+        /* First iteration of the ivals, stash idl2. */
         if (idl == NULL) {
             idl = idl2;
         } else {
-            IDList    *tmp;
+            /*
+             * second iteration of the ivals - do an intersection and free
+             * the intermediates.
+             */
+            IDList    *tmp = NULL;
 
             tmp = idl;
             idl = idl_intersection(be, idl, idl2);
             idl_free( &idl2 );
             idl_free( &tmp );
-            if ( idl == NULL ) {
-                break;
-            }
         }
     }
 
diff --git a/ldap/servers/slapd/back-ldbm/idl_common.c b/ldap/servers/slapd/back-ldbm/idl_common.c
index cd3458e..bda0084 100644
--- a/ldap/servers/slapd/back-ldbm/idl_common.c
+++ b/ldap/servers/slapd/back-ldbm/idl_common.c
@@ -15,12 +15,15 @@
 
 #include "back-ldbm.h"
 
+/* Default to holding 512 ids for idl_fetch_ext */
+#define IDLIST_MIN_BLOCK_SIZE 512
+
 size_t idl_sizeof(IDList *idl)
 {
 	if (NULL == idl) {
 		return 0;
 	}
-	return (2 + idl->b_nmax) * sizeof(ID);
+	return sizeof(IDList) + (idl->b_nmax * sizeof(ID));
 }
 
 NIDS idl_length(IDList *idl)
@@ -44,10 +47,19 @@ idl_alloc( NIDS nids )
 {
 	IDList	*new;
 
+    if (nids == 0) {
+        /*
+         * Because we use allids in b_nmax as 0, when we request an
+         * empty set, this *looks* like an empty set. Instead, we make
+         * a minimum b_nmax to trick it.
+         */
+        nids = 1;
+    }
+
 	/* nmax + nids + space for the ids */
-	new = (IDList *) slapi_ch_calloc( (2 + nids), sizeof(ID) );
+	new = (IDList *) slapi_ch_calloc( 1, sizeof(IDList) + (sizeof(ID) * nids) );
 	new->b_nmax = nids;
-	new->b_nids = 0;
+	/* new->b_nids = 0; */
 
 	return( new );
 }
@@ -116,7 +128,7 @@ idl_append_extend(IDList **orig_idl, ID id)
 	IDList *idl = *orig_idl;
 
 	if (idl == NULL) {
-		idl = idl_alloc(32); /* used to be 0 */
+		idl = idl_alloc(IDLIST_MIN_BLOCK_SIZE); /* used to be 0 */
 		idl_append(idl, id);
 
 		*orig_idl = idl;
@@ -125,17 +137,11 @@ idl_append_extend(IDList **orig_idl, ID id)
 
 	if ( idl->b_nids == idl->b_nmax ) {
 		/* No more room, need to extend */
-		/* Allocate new IDL with twice the space of this one */
-		IDList *idl_new = NULL;
-		idl_new = idl_alloc(idl->b_nmax * 2);
-		if (NULL == idl_new) {
+		idl->b_nmax = idl->b_nmax * 2;
+		idl = slapi_ch_realloc(idl, sizeof(IDList) + (sizeof(ID) * idl->b_nmax));
+		if (idl == NULL) {
 			return ENOMEM;
 		}
-		/* copy over the existing contents */
-		idl_new->b_nids = idl->b_nids;
-		memcpy(idl_new->b_ids, idl->b_ids, sizeof(ID) * idl->b_nids);
-		idl_free(&idl);
-		idl = idl_new;
 	}
 
 	idl->b_ids[idl->b_nids] = id;
@@ -155,8 +161,7 @@ idl_dup( IDList *idl )
 	}
 
 	new = idl_alloc( idl->b_nmax );
-	SAFEMEMCPY( (char *) new, (char *) idl, (idl->b_nmax + 2)
-	    * sizeof(ID) );
+	memcpy(new, idl, idl_sizeof(idl) );
 
 	return( new );
 }
@@ -170,14 +175,14 @@ idl_min( IDList *a, IDList *b )
 int
 idl_id_is_in_idlist(IDList *idl, ID id)
 {
-    NIDS i;
     if (NULL == idl || NOID == id) {
         return 0; /* not in the list */
     }
     if (ALLIDS(idl)) {
         return 1; /* in the list */
     }
-    for (i = 0; i < idl->b_nids; i++) {
+
+    for (NIDS i = 0; i < idl->b_nids; i++) {
         if (id == idl->b_ids[i]) {
             return 1; /* in the list */
         }
@@ -186,6 +191,39 @@ idl_id_is_in_idlist(IDList *idl, ID id)
 }
 
 /*
+ * idl_compare - compare idl a and b for value equality and ordering.
+ */
+int64_t
+idl_compare(IDList *a, IDList *b) {
+    /* Assert they are not none. */
+    if (a == NULL || b == NULL) {
+        return 1;
+    }
+    /* Are they the same pointer? */
+    if (a == b) {
+        return 0;
+    }
+    /* Do they have the same number of IDs? */
+    if (a->b_nids != b->b_nids) {
+        return 1;
+    }
+
+    /* Are they both allid blocks? */
+    if ( ALLIDS( a ) && ALLIDS( b ) ) {
+        return 0;
+    }
+
+    /* Same size, and not the same array. Lets check! */
+    for (size_t i = 0; i < a->b_nids; i++) {
+        if (a->b_ids[i] != b->b_ids[i]) {
+            return 1;
+        }
+    }
+    /* Must be the same! */
+    return 0;
+}
+
+/*
  * idl_intersection - return a intersection b
  */
 IDList *
@@ -198,9 +236,14 @@ idl_intersection(
 	NIDS	ai, bi, ni;
 	IDList	*n;
 
-	if ( a == NULL || b == NULL ) {
-		return( NULL );
+	if ( a == NULL || a->b_nids == 0 ) {
+		return idl_dup(a);
+	}
+
+	if ( b == NULL || b->b_nids == 0) {
+		return idl_dup(b);
 	}
+
 	if ( ALLIDS( a ) ) {
 		slapi_be_set_flag(be, SLAPI_BE_FLAG_DONT_BYPASS_FILTERTEST);
 		return( idl_dup( b ) );
@@ -225,10 +268,6 @@ idl_intersection(
 		}
 	}
 
-	if ( ni == 0 ) {
-		idl_free( &n );
-		return( NULL );
-	}
 	n->b_nids = ni;
 
 	return( n );
@@ -248,10 +287,10 @@ idl_union(
 	NIDS	ai, bi, ni;
 	IDList	*n;
 
-	if ( a == NULL ) {
+	if ( a == NULL || a->b_nids == 0 ) {
 		return( idl_dup( b ) );
 	}
-	if ( b == NULL ) {
+	if ( b == NULL || b->b_nids == 0 ) {
 		return( idl_dup( a ) );
 	}
 	if ( ALLIDS( a ) || ALLIDS( b ) ) {
@@ -311,12 +350,25 @@ idl_notin(
 	IDList	*n;
 	*new_result = NULL;
 
-	if ( a == NULL ) {
-		return( 0 );
+	/* Nothing in a, so nothing to subtract from. */
+	if ( a == NULL || a->b_nids == 0 ) {
+		*new_result = idl_alloc(0);
+		return 1;
 	}
-	if ( b == NULL || ALLIDS( b ) ) {
-		*new_result = idl_dup( a );
-		return( 1 );
+	/* b is empty, so nothing to remove from a. */
+	if ( b == NULL || b->b_nids == 0 ) {
+		// *new_result = idl_dup( a );
+		return 0;
+	}
+
+	/* b is allIDS, so a - b, should be the empty set.
+	 * but if the type in unindexed, we don't know. Instead we have to
+	 * return a, and mark that we can't skip the filter test.
+	 */
+	if ( ALLIDS(b) ) {
+		// *new_result = idl_alloc(0);
+		slapi_be_set_flag(be, SLAPI_BE_FLAG_DONT_BYPASS_FILTERTEST);
+		return 0;
 	}
 
 	if ( ALLIDS( a ) ) { /* Not convinced that this code is really worth it */
@@ -418,7 +470,7 @@ idl_nextid( IDList *idl, ID id )
 {
 	NIDS	i;
 
-	if (NULL == idl) {
+	if (NULL == idl || idl->b_nids == 0) {
 		return NOID;
 	}
 	if ( ALLIDS( idl ) ) {
@@ -463,10 +515,18 @@ idl_iterator idl_iterator_decrement(idl_iterator *i)
 
 ID idl_iterator_dereference(idl_iterator i, const IDList *idl)
 {
+	/*
+	 * NOID is used to terminate iteration. When we get an allIDS
+	 * idl->b_nids == number of entries in id2entry. That's how we
+	 * know to stop returning ids.
+	 */
 	if ( (NULL == idl) || (i >= idl->b_nids)) {
 		return NOID;
 	}
 	if (ALLIDS(idl)) {
+		/*
+		 * entries in id2entry start at 1, not 0, so we have off by one here.
+		 */
 		return (ID) i + 1;
 	} else {
 		return idl->b_ids[i];
@@ -484,5 +544,5 @@ ID idl_iterator_dereference_decrement(idl_iterator *i, const IDList *idl)
 {
 	idl_iterator_decrement(i);
 	return idl_iterator_dereference(*i,idl);
-
 }
+
diff --git a/ldap/servers/slapd/back-ldbm/idl_new.c b/ldap/servers/slapd/back-ldbm/idl_new.c
index 1ffdc03..246653a 100644
--- a/ldap/servers/slapd/back-ldbm/idl_new.c
+++ b/ldap/servers/slapd/back-ldbm/idl_new.c
@@ -406,6 +406,9 @@ idl_new_range_fetch(
         goto error; /* Not found is OK, return NULL IDL */
     }
 
+    /* Allocate an empty set to populate into */
+    idl = idl_alloc(0);
+
     /* Iterate over the duplicates, amassing them into an IDL */
     while (cur_key.data &&
            (upperkey && upperkey->data ?
@@ -600,21 +603,16 @@ error:
         qsort((void *)&idl->b_ids[0], idl->b_nids, (size_t)sizeof(ID), idl_sort_cmp);
     }
     if (operator & SLAPI_OP_RANGE_NO_IDL_SORT) {
-        int i;
-        int left = leftovercnt;
-        while (left) {
-            for (i = 0; i < leftovercnt; i++) {
-                if (leftover[i].key && idl_id_is_in_idlist(idl, leftover[i].key)) {
-                    idl_rc = idl_append_extend(&idl, leftover[i].id);
-                    if (idl_rc) {
-                        slapi_log_err(SLAPI_LOG_ERR, "idl_new_range_fetch",
-                            "Unable to extend id list (err=%d)\n", idl_rc);
-                        idl_free(&idl);
-                        return NULL;
-                    }
-                    leftover[i].key = 0;
-                    left--;
+        for (size_t i = 0; i < leftovercnt; i++) {
+            if (leftover[i].key && idl_id_is_in_idlist(idl, leftover[i].key) == 0) {
+                idl_rc = idl_append_extend(&idl, leftover[i].id);
+                if (idl_rc) {
+                    slapi_log_err(SLAPI_LOG_ERR, "idl_new_range_fetch",
+                        "Unable to extend id list (err=%d)\n", idl_rc);
+                    idl_free(&idl);
+                    return NULL;
                 }
+                leftover[i].key = 0;
             }
         }
         slapi_ch_free((void **)&leftover);
diff --git a/ldap/servers/slapd/back-ldbm/idl_set.c b/ldap/servers/slapd/back-ldbm/idl_set.c
new file mode 100644
index 0000000..949e635
--- /dev/null
+++ b/ldap/servers/slapd/back-ldbm/idl_set.c
@@ -0,0 +1,282 @@
+/** BEGIN COPYRIGHT BLOCK
+ * Copyright (C) 2017 Red Hat, Inc.
+ * All rights reserved.
+ *
+ * License: GPL (version 3 or any later version).
+ * See LICENSE for details.
+ * END COPYRIGHT BLOCK **/
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include "back-ldbm.h"
+
+#define IDL_SET_SMALL_THRESHOLD 8
+
+/*
+ * In filterindex, rather than calling idl_union multiple times over
+ * this idl_set provides better apis to do efficent set manipulations
+ * for idl results.
+ *
+ * previously or results were calculated as:
+ * |(a)(b)(c)(d)
+ *
+ * t1 = union(a, b)
+ * t2 = union(t1, c)
+ * t3 = union(t2, d)
+ * As you can see, this results in n-1 union operations. Depending on
+ * the size of the IDL, this could signifigantly affect performance.
+ * It actually let to a situation where re-arranging a query would be
+ * faster.
+ *
+ * Instead, if we maintain a set of IDL results, then perform a more
+ * effective union we can improve our results. For example.
+ *
+ * |(a)(b)(c)(d)
+ * |(10,000)(1)(1)(1)
+ *
+ * t1 = union(b,c)
+ * t2 = union(t1,d)
+ * t3 = union(t2, a)
+ *
+ * We still perform the same number of unions but we did it with 10,005
+ * copies instead of 30,005 copies.
+ *
+ */
+
+IDListSet *
+idl_set_create() {
+    IDListSet *idl_set = (IDListSet *)slapi_ch_calloc(1, sizeof(IDListSet));
+    /* all other fields are 0 thanks to calloc */
+    return idl_set;
+}
+
+static void
+idl_set_free_idls(IDListSet *idl_set) {
+    /* Free idlists */
+    IDList *idl = idl_set->head;
+    IDList *next_idl = NULL;
+    while (idl != NULL) {
+        next_idl = idl->next;
+        idl_free(&idl);
+        idl = next_idl;
+    }
+
+    /* Free complements if any */
+    idl = idl_set->complement_head;
+    while (idl != NULL) {
+        next_idl = idl->next;
+        idl_free(&idl);
+        idl = next_idl;
+    }
+}
+
+void
+idl_set_destroy(IDListSet *idl_set) {
+    slapi_ch_free((void **)&(idl_set));
+}
+
+/*
+ * This function attempts to "loosley" arrange the incomming idls
+ * by size. It's not perfect, but the idea is to keep smaller
+ * idls early to reduce the amount of copies performed later.
+ */
+void
+idl_set_insert_idl(IDListSet *idl_set, IDList *idl) {
+    PR_ASSERT(idl);
+
+    int32_t is_allids = idl_is_allids(idl);
+    /* Is this an allids? */
+    if (is_allids) {
+        idl_set->allids = 1;
+    }
+
+    if (idl->b_nids == 0) {
+        idl_set->emptyset = 1;
+    }
+
+    if (idl_set->count == 0) {
+        idl->next = NULL;
+        idl_set->head = idl;
+        idl_set->tail = idl;
+    } else {
+        /*
+         * If it's small enough, prepend it.
+         *
+         * We check head size here because we at least want to know if 250 < 500. It means
+         * if we get some initially large terms, they get some benefit.
+         * Saying this, if we are given 1, 10,000, 250, 250, we'll have to do them in
+         * that order, which isn't exactly optimal. However, the  alternative
+         * is fully sorting this list and makeing idlist be doubly linked
+         *
+         * really, we are trying to optimise the many 1 result set + objectclass=type filter.
+         */
+        if (!is_allids && (idl->b_nids <= IDL_SET_SMALL_THRESHOLD || idl->b_nids <= idl_set->head->b_nids)) {
+            idl->next = idl_set->head;
+            idl_set->head = idl;
+        } else {
+            /* else it exceeds the threshold and head size, just append. */
+            idl_set->tail->next = idl;
+            idl_set->tail = idl;
+        }
+    }
+    idl_set->count += 1;
+
+    return;
+}
+
+/*
+ * The difference between this and insert is that complement implies
+ * a future intersection, and that we plan to use a "not" query.
+ *
+ * As a result, the order of operations is to:
+ * * intersect all sets
+ * * apply complements.
+ */
+void
+idl_set_insert_complement_idl(IDListSet *idl_set, IDList *idl) {
+    PR_ASSERT(idl);
+    /*
+     * If we complement to ALLIDS, the result is empty set.
+     * so we can put empty set into the main list. This will cause
+     * -- no change to  union (but this should never be called during OR / union)
+     * -- shortcut of the AND to trigger immediately
+     */
+    idl->next = idl_set->complement_head;
+    idl_set->complement_head = idl;
+    idl_set->count += 1;
+}
+
+int64_t
+idl_set_union_shortcut(IDListSet *idl_set) {
+    /* Are we able to shortcut the union process? */
+    /* This generally indicates the presence of allids */
+    return idl_set->allids;
+}
+
+int64_t
+idl_set_intersection_shortcut(IDListSet *idl_set) {
+    /* If we have a 0 length idl, we can never create an intersection. */
+    return idl_set->emptyset;
+}
+
+IDList *
+idl_set_union(IDListSet *idl_set, backend *be) {
+    if (idl_set->count == 0) {
+        return idl_alloc(0);
+    }
+    if (idl_set->count == 1) {
+        return idl_set->head;
+    }
+    if (idl_set->allids != 0) {
+        idl_set_free_idls(idl_set);
+        return idl_allids(be);
+    }
+    /*
+     * For now do a dumb union ....
+     * because idl_set is partially ordered, this should be
+     * relatively faster now.
+     */
+    IDList *result_list = idl_set->head;
+    IDList *new_result_list = NULL;
+    IDList *idl = idl_set->head->next;
+    IDList *next_idl = NULL;
+    while (idl != NULL) {
+        next_idl = idl->next;
+        new_result_list = idl_union(be, result_list, idl);
+
+        idl_free(&result_list);
+        idl_free(&idl);
+
+        result_list = new_result_list;
+        idl = next_idl;
+    }
+
+    return result_list;
+
+    /*
+     * To make this faster:
+     * track the max size idlist on the way in that is not
+     * allids. If we hav an all ids immedatel free all idls and return
+     *
+     * If idls count > 2 ...
+     *
+     * Then turn the max into a b+tree
+     * now do an insert sort of the rest of the sets in.
+     *
+     * Finally, dump the b+tree back to an idlist and return it.
+     */
+
+}
+
+IDList *
+idl_set_intersect(IDListSet *idl_set, backend *be) {
+    if (idl_set->count == 0) {
+        return idl_alloc(0);
+    }
+    if (idl_set->count == 1) {
+        return idl_set->head;
+    }
+
+    /* Okay, actually do a intersection then! */
+
+    /*
+     * Start with the *minimum* intersection, because it
+     * will reduce our candidate set the quickest.
+     */
+
+    IDList *result_list = idl_set->head;
+    IDList *new_result_list = NULL;
+    IDList *idl = idl_set->head->next;
+    IDList *next_idl = NULL;
+
+    while (idl != NULL) {
+        next_idl = idl->next;
+        /*
+         * Only both to intersect results if we have a non 0
+         * It's important to construct it like this, because it
+         * means if we have an empty idl, we never intersect it,
+         * and we free the idl's remaining in the idl_set.
+         *
+         * another aspect is that if idl_set->min is 0, because it
+         * starts as the result_list, we never do any intersections
+         * and we just free the remaining sets.
+         * hint: idl_intersection returns NULL for empty set.
+         */
+
+        if (result_list && result_list->b_nids != 0) {
+            new_result_list = idl_intersection(be, result_list, idl);
+            idl_free(&result_list);
+            result_list = new_result_list;
+        }
+        idl_free(&idl);
+        idl = next_idl;
+    }
+
+    /* Now, that we have the "smallest" intersection possible, we need to subtract
+     * elements as required.
+     */
+
+    idl = idl_set->complement_head;
+    while (idl != NULL) {
+        next_idl = idl->next;
+        if (idl_notin(be, result_list, idl, &new_result_list)) {
+            /*
+             * idl_notin returns 1 on new alloc, so free result_list and idl
+             */
+            idl_free(&idl);
+            idl_free(&result_list);
+            result_list = new_result_list;
+        } else {
+            /*
+             * idl_notin returns 0 when it "does nothing", so just free idl.
+             */
+            idl_free(&idl);
+        }
+        idl = next_idl;
+    }
+
+    return result_list;
+}
+
diff --git a/ldap/servers/slapd/back-ldbm/index.c b/ldap/servers/slapd/back-ldbm/index.c
index 4d74484..22c2e39 100644
--- a/ldap/servers/slapd/back-ldbm/index.c
+++ b/ldap/servers/slapd/back-ldbm/index.c
@@ -362,6 +362,7 @@ index_addordel_entry(
     {
         const CSN *tombstone_csn = NULL;
         char deletion_csn_str[CSN_STRSIZE];
+        char *entryusn_str;
         Slapi_DN parent;
         Slapi_DN *sdn = slapi_entry_get_sdn(e->ep_entry);
 
@@ -371,6 +372,8 @@ index_addordel_entry(
          * Just index the "nstombstone" attribute value from the objectclass
          * attribute, and the nsuniqueid attribute value, and the 
          * nscpEntryDN value of the deleted entry.
+         *
+         * 2017 - add entryusn to this to improve entryusn tombstone tasks.
          */
         result = index_addordel_string(be, SLAPI_ATTR_OBJECTCLASS, SLAPI_ATTR_VALUE_TOMBSTONE, e->ep_id, flags, txn);
         if ( result != 0 ) {
@@ -397,6 +400,19 @@ index_addordel_entry(
             }
         }
 
+        /*
+         * add the entryusn to the list of indexed attributes, even if it's a tombstone
+         */
+        entryusn_str = slapi_entry_attr_get_charptr(e->ep_entry, SLAPI_ATTR_ENTRYUSN);
+        if (entryusn_str != NULL) {
+            result = index_addordel_string(be, SLAPI_ATTR_ENTRYUSN, entryusn_str, e->ep_id, flags, txn);
+            slapi_ch_free_string(&entryusn_str);
+            if (result != 0) {
+                ldbm_nasty("index_addordel_entry",errmsg, 1021, result);
+                return result;
+            }
+        }
+
         slapi_sdn_done(&parent);
         if (entryrdn_get_switch()) { /* subtree-rename: on */
             Slapi_Attr* attr;
@@ -1033,6 +1049,12 @@ index_read_ext_allids(
 		interval = PR_MillisecondsToInterval(slapi_rand() % 100);
 		DS_Sleep(interval);
 	    continue;
+	  } else if (*err != 0 || idl == NULL) {
+	    /* The database might not exist. We have to assume it means empty set */
+	    slapi_log_err(SLAPI_LOG_TRACE, "index_read_ext_allids", "Failed to access idl index for %s\n", basetype);
+	    slapi_log_err(SLAPI_LOG_TRACE, "index_read_ext_allids", "Assuming %s has no index values\n", basetype);
+	    idl = idl_alloc(0);
+	    break;
 	  } else {
 	    break;
 	  }
@@ -1436,19 +1458,19 @@ index_range_read_ext(
     if (0 != *err) { 
         /* Free the key we just read above */
             DBT_FREE_PAYLOAD(lowerkey);
-        if (DB_NOTFOUND == *err) { 
-            *err = 0; 
-            idl = NULL; 
-        } else { 
-            idl = idl_allids( be ); 
+        if (DB_NOTFOUND == *err) {
+            *err = 0;
+            idl = idl_alloc(0);
+        } else {
+            idl = idl_allids( be );
             ldbm_nasty("index_range_read_ext", errmsg, 1080, *err);
-            slapi_log_err(SLAPI_LOG_ERR, 
-                "index_range_read_ext", "(%s,%s) allids (seek to lower key in index file err %i)\n", 
-                type, prefix, *err ); 
-        } 
-        dbc->c_close(dbc); 
+            slapi_log_err(SLAPI_LOG_ERR,
+                "index_range_read_ext", "(%s,%s) allids (seek to lower key in index file err %i)\n",
+                type, prefix, *err );
+        }
+        dbc->c_close(dbc);
         goto error;
-    }         
+    }
     /* We now close the cursor, since we're about to iterate over many keys */
     *err = dbc->c_close(dbc);
 
diff --git a/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h b/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h
index ca2ff7b..46b67cc 100644
--- a/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h
+++ b/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h
@@ -281,6 +281,20 @@ IDList *idl_new_range_fetch(backend *be, DB* db, DBT *lowerkey, DBT *upperkey,
                             int allidslimit, int sizelimit, time_t stoptime,
                             int lookthrough_limit, int operator);
 
+
+int64_t idl_compare(IDList *a, IDList *b);
+
+/*
+ * idl_set.c
+ */
+IDListSet * idl_set_create();
+void idl_set_destroy(IDListSet *idl_set);
+void idl_set_insert_idl(IDListSet *idl_set, IDList *idl);
+int64_t idl_set_union_shortcut(IDListSet *idl_set);
+int64_t idl_set_intersection_shortcut(IDListSet *idl_set);
+IDList * idl_set_union(IDListSet *idl_set, backend *be);
+IDList * idl_set_intersect(IDListSet *idl_set, backend *be);
+
 /*
  * index.c
  */
-- 
1.8.3.1