CREATE TABLE employeeexistsexists (emp_no INT NOT NULL,
emp_fname varCHAR(20) NOT NULL,
emp_lname varCHAR(20) NOT NULL,
dept_no varCHAR(4) NULL)
insert into employeeexistsexists values(1, 'Matthew', 'Smith', 'd3')
insert into employeeexistsexists values(2, 'Ann', 'Jones', 'd3')
insert into employeeexistsexists values(3, 'John', 'Barrimore','d1')
insert into employeeexistsexists values(4, 'James', 'James', 'd2')
insert into employeeexistsexists values(5, 'Elsa', 'Bertoni', 'd2')
insert into employeeexistsexists values(6, 'Elke', 'Hansel', 'd2')
insert into employeeexistsexists values(7, 'Sybill', 'Moser', 'd1')
select * from employeeexistsexists
CREATE TABLE department(dept_no CHAR(4) NOT NULL,
dept_name CHAR(25) NOT NULL,
location CHAR(30) NULL)
insert into department values ('d1', 'developer', 'Dallas')
insert into department values ('d2', 'tester', 'Seattle')
insert into department values ('d3', 'marketing', 'Dallas')
select * from department
select * from employeeexistsexists
--NOT IN
SELECT emp_lname FROM employeeexistsexists WHERE emp_lname in
(SELECT emp_lname FROM department WHERE employeeexistsexists.dept_no = department.dept_no
AND location = 'Seattle')
--NOT EXISTS
select emp_lname from employeeexistsexists where exists
(select * from department where employeeexistsexists.dept_no=department.dept_no and location= 'seattle')
select * from department
select * from employeeexistsexists
The difference between NOT IN and NOT EXISTS
There is a popular misconception that NOT IN and NOT EXISTS are two ways to filter out rows present in one table and not in another table, with both methods being usable interchangeably. Popular wisdom is that the difference between the two is only in terms of performance (being on the basis of whether the larger table is the "outer" or the "inner" in the query) but that the results are the same.
However, we must remember that such an operation being an Anti-Join fails when NULLs are involved. In Oracle a NULL cannot be compared to any other value, not even another NULL. Therefore, a NOT IN operation would fail if the result set being probed returns a NULL. In such a case, the results of a NOT IN query is 0 rows while a NOT EXISTS query would still show the rows present in the one table but not in the other table.
Here is a simple demonstration :
You can see that if there is a single row with a NULL value on the query predicate (the OWNER column in QUERY_TABLE), the NOT IN query actually fails.
Tom Kyte has explained this in the section on Anti-Joins in his "Effective Oracle By Design" book. However, I prefer the much better explanation by Roger Schrag
However, we must remember that such an operation being an Anti-Join fails when NULLs are involved. In Oracle a NULL cannot be compared to any other value, not even another NULL. Therefore, a NOT IN operation would fail if the result set being probed returns a NULL. In such a case, the results of a NOT IN query is 0 rows while a NOT EXISTS query would still show the rows present in the one table but not in the other table.
Here is a simple demonstration :
SQL> -- create the two test tables SQL> SQL> drop table results_table purge; Table dropped. SQL> drop table query_table purge; Table dropped. SQL> SQL> create table results_table 2 as select owner,object_name,object_type from dba_objects where owner in ('HEMANT','DBSNMP','OUTLN'); Table created. SQL> create table query_table 2 as select owner,object_name,object_type from dba_objects where owner in ('DBSNMP','OUTLN'); Table created. SQL> SQL> -- a NOT IN query SQL> select r.owner, r.object_name from results_table r 2 where r.owner not in (select q.owner from query_table q) 3 order by 1,2; OWNER OBJECT_NAME ------------------------------ ------------------------------ HEMANT DUPDB HEMANT MY_T_A HEMANT RESULTS_TABLE HEMANT SOURCE_TABLE HEMANT TEST_APPEND HEMANT TRACE_USER_SESSIONS HEMANT TRACE_USER_SESSIONS_BEGIN HEMANT TRACE_USER_SESSIONS_END 8 rows selected. SQL> SQL> -- a NOT EXISTS query SQL> select r.owner, r.object_name from results_table r 2 where not exists (select '1' from query_table q where r.owner=q.owner) 3 order by 1,2; OWNER OBJECT_NAME ------------------------------ ------------------------------ HEMANT DUPDB HEMANT MY_T_A HEMANT RESULTS_TABLE HEMANT SOURCE_TABLE HEMANT TEST_APPEND HEMANT TRACE_USER_SESSIONS HEMANT TRACE_USER_SESSIONS_BEGIN HEMANT TRACE_USER_SESSIONS_END 8 rows selected. SQL> SQL> --- SQL> REM So far, NOT IN and NOT EXISTS have presented the same results SQL> SQL> REM What happens if there is a row with a NULL value ? SQL> SQL> insert into query_table values (NULL,'ABCDEFGH','TABLE'); 1 row created. SQL> commit; Commit complete. SQL> SQL> -- retry the NOT IN query SQL> select r.owner, r.object_name from results_table r 2 where r.owner not in (select q.owner from query_table q) 3 order by 1,2; no rows selected SQL> SQL> -- retry the NOT EXISTS query SQL> select r.owner, r.object_name from results_table r 2 where not exists (select '1' from query_table q where r.owner=q.owner) 3 order by 1,2; OWNER OBJECT_NAME ------------------------------ ------------------------------ HEMANT DUPDB HEMANT MY_T_A HEMANT RESULTS_TABLE HEMANT SOURCE_TABLE HEMANT TEST_APPEND HEMANT TRACE_USER_SESSIONS HEMANT TRACE_USER_SESSIONS_BEGIN HEMANT TRACE_USER_SESSIONS_END 8 rows selected. SQL> SQL> --- SQL> REM Surprise ?! The NOT IN returned 0 rows ! SQL> REM Why ? Because of the presence of a NULL in the query_table ! SQL> REM SQL> REM REMEMBER : A "NOT IN" anti-join fails because a NULL returned cannot be compared ! SQL> SQL> -- SQL> REM One "workaround" is to filter out rows which contain NULLs SQL> REM .... but think carefully before you do so. Are you sure you want to exclude them ? SQL> SQL> REM In the ideal world, such columns should be defined as NOT NULL columns ! SQL> REM That would be the right schema design ! SQL> SQL> -- test the suggested workaround SQL> select r.owner, r.object_name from results_table r 2 where r.owner not in (select q.owner from query_table q WHERE OWNER IS NOT NULL) 3 order by 1,2; OWNER OBJECT_NAME ------------------------------ ------------------------------ HEMANT DUPDB HEMANT MY_T_A HEMANT RESULTS_TABLE HEMANT SOURCE_TABLE HEMANT TEST_APPEND HEMANT TRACE_USER_SESSIONS HEMANT TRACE_USER_SESSIONS_BEGIN HEMANT TRACE_USER_SESSIONS_END 8 rows selected. SQL>
You can see that if there is a single row with a NULL value on the query predicate (the OWNER column in QUERY_TABLE), the NOT IN query actually fails.
Tom Kyte has explained this in the section on Anti-Joins in his "Effective Oracle By Design" book. However, I prefer the much better explanation by Roger Schrag
No comments:
Post a Comment