exec-ddl
CREATE TABLE abcd (a INT, b INT, c INT, INDEX (a,b))
----
TABLE abcd
 ├── a int
 ├── b int
 ├── c int
 ├── rowid int not null (hidden)
 ├── INDEX primary
 │    └── rowid int not null (hidden)
 └── INDEX secondary
      ├── a int
      ├── b int
      └── rowid int not null (hidden)

exec-ddl
CREATE TABLE small (m INT, n INT)
----
TABLE small
 ├── m int
 ├── n int
 ├── rowid int not null (hidden)
 └── INDEX primary
      └── rowid int not null (hidden)

exec-ddl
ALTER TABLE small INJECT STATISTICS '[
  {
    "columns": ["m"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 10
  }
]'
----

# We can only test lookup stat generation when using non-covering indexes
# (that's when we create a group with LookupJoin). We can compare the
# statistics with the top-level join, they should be in the same ballpark.

opt
SELECT * FROM small JOIN abcd ON a=m
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) c:6(int)
 ├── key columns: [7] = [7]
 ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0]
 ├── fd: (1)==(4), (4)==(1)
 ├── inner-join (lookup abcd@secondary)
 │    ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) abcd.rowid:7(int!null)
 │    ├── key columns: [1] = [4]
 │    ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0, distinct(7)=95.617925, null(7)=0]
 │    ├── fd: (7)-->(4,5), (1)==(4), (4)==(1)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0]
 │    └── filters (true)
 └── filters (true)

# Filter that gets pushed down on both sides, but comes back into the ON
# condition for the lookup side.
opt
SELECT * FROM small JOIN abcd ON a=m WHERE n > 2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int) c:6(int)
 ├── key columns: [7] = [7]
 ├── stats: [rows=33, distinct(1)=3.3, null(1)=0, distinct(2)=0.98265847, null(2)=0, distinct(4)=3.3, null(4)=0]
 ├── fd: (1)==(4), (4)==(1)
 ├── inner-join (lookup abcd@secondary)
 │    ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int) abcd.rowid:7(int!null)
 │    ├── key columns: [1] = [4]
 │    ├── stats: [rows=33, distinct(1)=3.3, null(1)=0, distinct(2)=0.98265847, null(2)=0, distinct(4)=3.3, null(4)=0, distinct(7)=32.6221433, null(7)=0]
 │    ├── fd: (7)-->(4,5), (1)==(4), (4)==(1)
 │    ├── select
 │    │    ├── columns: m:1(int) n:2(int!null)
 │    │    ├── stats: [rows=3.3, distinct(1)=3.3, null(1)=0, distinct(2)=0.98265847, null(2)=0]
 │    │    ├── scan small
 │    │    │    ├── columns: m:1(int) n:2(int)
 │    │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(2)=1, null(2)=0.1]
 │    │    └── filters
 │    │         └── n > 2 [type=bool, outer=(2), constraints=(/2: [/3 - ]; tight)]
 │    └── filters (true)
 └── filters (true)

# Filter that applies to the right side and gets pulled back into the ON
# condition.
opt
SELECT * FROM small JOIN abcd ON a=m WHERE b > 2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int!null) c:6(int)
 ├── key columns: [7] = [7]
 ├── stats: [rows=33.6127051, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0, distinct(5)=28.5893063, null(5)=0]
 ├── fd: (1)==(4), (4)==(1)
 ├── inner-join (lookup abcd@secondary)
 │    ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int!null) abcd.rowid:7(int!null)
 │    ├── key columns: [1] = [4]
 │    ├── stats: [rows=33.3333333, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0, distinct(5)=28.3867538, null(5)=0, distinct(7)=32.837752, null(7)=0]
 │    ├── fd: (7)-->(4,5), (1)==(4), (4)==(1)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0]
 │    └── filters
 │         └── b > 2 [type=bool, outer=(5), constraints=(/5: [/3 - ]; tight)]
 └── filters (true)

# Filter that can only be applied after the primary index join.
opt
SELECT * FROM small JOIN abcd ON a=m WHERE c>2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) c:6(int!null)
 ├── key columns: [7] = [7]
 ├── stats: [rows=33.6127051, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0, distinct(6)=28.5893063, null(6)=0]
 ├── fd: (1)==(4), (4)==(1)
 ├── inner-join (lookup abcd@secondary)
 │    ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) abcd.rowid:7(int!null)
 │    ├── key columns: [1] = [4]
 │    ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0, distinct(7)=95.617925, null(7)=0]
 │    ├── fd: (7)-->(4,5), (1)==(4), (4)==(1)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0]
 │    └── filters (true)
 └── filters
      └── c > 2 [type=bool, outer=(6), constraints=(/6: [/3 - ]; tight)]

# Multiple equalities.
opt
SELECT * FROM small JOIN abcd ON a=m AND b=n WHERE c>2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int!null) c:6(int!null)
 ├── key columns: [7] = [7]
 ├── stats: [rows=0.342367862, distinct(1)=0.342367862, null(1)=0, distinct(2)=0.342367862, null(2)=0, distinct(4)=0.342367862, null(4)=0, distinct(5)=0.342367862, null(5)=0, distinct(6)=0.341789832, null(6)=0]
 ├── fd: (1)==(4), (4)==(1), (2)==(5), (5)==(2)
 ├── inner-join (lookup abcd@secondary)
 │    ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int!null) abcd.rowid:7(int!null)
 │    ├── key columns: [1 2] = [4 5]
 │    ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=1, null(4)=0, distinct(5)=1, null(5)=0, distinct(7)=0.99955012, null(7)=0]
 │    ├── fd: (7)-->(4,5), (1)==(4), (4)==(1), (2)==(5), (5)==(2)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(2)=1, null(2)=0.1]
 │    └── filters (true)
 └── filters
      └── c > 2 [type=bool, outer=(6), constraints=(/6: [/3 - ]; tight)]

exec-ddl
CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, c))
----
TABLE abc
 ├── a int not null
 ├── b int
 ├── c int not null
 └── INDEX primary
      ├── a int not null
      └── c int not null

exec-ddl
CREATE TABLE def (d INT, e INT, f INT, PRIMARY KEY (f, e), INDEX e_idx (e) STORING (d))
----
TABLE def
 ├── d int
 ├── e int not null
 ├── f int not null
 ├── INDEX primary
 │    ├── f int not null
 │    └── e int not null
 └── INDEX e_idx
      ├── e int not null
      ├── f int not null
      └── d int (storing)

# Set up the statistics as if the first table is much smaller than the second.
exec-ddl
ALTER TABLE abc INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100
  }
]'
----

exec-ddl
ALTER TABLE def INJECT STATISTICS '[
  {
    "columns": ["e"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 100
  },
  {
    "columns": ["f"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 10000
  }
]'
----

# The filter a=f is selective, so we expect a lookup join.
opt
SELECT * FROM abc JOIN def ON a = f
----
inner-join (lookup def)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null)
 ├── key columns: [1] = [6]
 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=9.99954623, null(3)=0, distinct(5)=63.2138954, null(5)=0, distinct(6)=100, null(6)=0]
 ├── key: (3,5,6)
 ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(6), (6)==(1)
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0]
 │    ├── key: (1,3)
 │    └── fd: (1,3)-->(2)
 └── filters (true)

# The filter a=e is not very selective, so we do not expect a lookup join.
opt
SELECT * FROM abc JOIN def ON a = e
----
inner-join (merge)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null)
 ├── left ordering: +1
 ├── right ordering: +5
 ├── stats: [rows=10000, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0, distinct(5)=100, null(5)=0, distinct(6)=6339.67659, null(6)=0]
 ├── key: (3,5,6)
 ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(5), (5)==(1)
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0]
 │    ├── key: (1,3)
 │    ├── fd: (1,3)-->(2)
 │    └── ordering: +1
 ├── scan def@e_idx
 │    ├── columns: d:4(int) e:5(int!null) f:6(int!null)
 │    ├── stats: [rows=10000, distinct(5)=100, null(5)=0, distinct(6)=10000, null(6)=0]
 │    ├── key: (5,6)
 │    ├── fd: (5,6)-->(4)
 │    └── ordering: +5
 └── filters (true)

# Check column statistics for lookup join.
opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=5 colstat=6 colstat=(2,5,6)
SELECT * FROM abc JOIN DEF ON a = f
----
inner-join (lookup def)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null)
 ├── key columns: [1] = [6]
 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=9.99954623, null(2)=1, distinct(3)=9.99954623, null(3)=0, distinct(4)=95.1671064, null(4)=1, distinct(5)=63.2138954, null(5)=0, distinct(6)=100, null(6)=0, distinct(2,5,6)=100, null(2,5,6)=1]
 ├── key: (3,5,6)
 ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(6), (6)==(1)
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1, distinct(3)=10, null(3)=0]
 │    ├── key: (1,3)
 │    └── fd: (1,3)-->(2)
 └── filters (true)
