exec-ddl
CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d))
----
TABLE a
 ├── x int not null
 ├── y int
 ├── s string
 ├── d decimal not null
 ├── INDEX primary
 │    └── x int not null
 └── INDEX secondary
      ├── s string desc
      ├── d decimal not null
      └── x int not null (storing)

exec-ddl
ALTER TABLE a INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["x","y"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["s"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 10
  },
  {
    "columns": ["s","y"],
    "created_at": "2018-01-01 1:40:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 100
  }
]'
----

build
SELECT x FROM a
----
project
 ├── columns: x:1(int!null)
 ├── stats: [rows=2000]
 ├── key: (1)
 └── scan a
      ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
      ├── stats: [rows=2000]
      ├── key: (1)
      └── fd: (1)-->(2-4), (3,4)~~>(1,2)

build
SELECT y, s FROM a
----
project
 ├── columns: y:2(int) s:3(string)
 ├── stats: [rows=2000]
 └── scan a
      ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
      ├── stats: [rows=2000]
      ├── key: (1)
      └── fd: (1)-->(2-4), (3,4)~~>(1,2)

build
SELECT count(*) FROM (SELECT x, y FROM a) GROUP BY x, y
----
project
 ├── columns: count:5(int)
 ├── stats: [rows=2000]
 └── group-by
      ├── columns: x:1(int!null) y:2(int) count_rows:5(int)
      ├── grouping columns: x:1(int!null) y:2(int)
      ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0]
      ├── key: (1)
      ├── fd: (1)-->(2,5)
      ├── project
      │    ├── columns: x:1(int!null) y:2(int)
      │    ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0]
      │    ├── key: (1)
      │    ├── fd: (1)-->(2)
      │    └── scan a
      │         ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
      │         ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0]
      │         ├── key: (1)
      │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── count-rows [type=int]

# Test that the stats are calculated correctly for synthesized columns.
build
SELECT * FROM (SELECT concat(s, y::string) FROM a) AS q(v) WHERE v = 'foo'
----
select
 ├── columns: v:5(string!null)
 ├── stats: [rows=20, distinct(5)=1, null(5)=0]
 ├── fd: ()-->(5)
 ├── project
 │    ├── columns: concat:5(string)
 │    ├── stats: [rows=2000, distinct(5)=100, null(5)=0]
 │    ├── scan a
 │    │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    │    ├── stats: [rows=2000, distinct(2,3)=100, null(2,3)=0]
 │    │    ├── key: (1)
 │    │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 │    └── projections
 │         └── concat(s, y::STRING) [type=string, outer=(2,3)]
 └── filters
      └── concat = 'foo' [type=bool, outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)]

# Test that stats for synthesized and non-synthesized columns are combined.
build
SELECT * FROM (SELECT concat(s, y::string), x FROM a) AS q(v, x) GROUP BY v, x
----
group-by
 ├── columns: v:5(string) x:1(int!null)
 ├── grouping columns: x:1(int!null) concat:5(string)
 ├── stats: [rows=2000, distinct(1,5)=2000, null(1,5)=0]
 ├── key: (1)
 ├── fd: (1)-->(5)
 └── project
      ├── columns: concat:5(string) x:1(int!null)
      ├── stats: [rows=2000, distinct(1,5)=2000, null(1,5)=0]
      ├── key: (1)
      ├── fd: (1)-->(5)
      ├── scan a
      │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
      │    ├── stats: [rows=2000, distinct(1-3)=2000, null(1-3)=0]
      │    ├── key: (1)
      │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── projections
           └── concat(s, y::STRING) [type=string, outer=(2,3)]

# No available stats for column y.
build
SELECT * FROM (SELECT y + 3 AS v FROM a) WHERE v >= 1 AND v <= 100
----
select
 ├── columns: v:5(int!null)
 ├── stats: [rows=990, distinct(5)=100, null(5)=0]
 ├── project
 │    ├── columns: v:5(int)
 │    ├── stats: [rows=2000, distinct(5)=200, null(5)=20]
 │    ├── scan a
 │    │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    │    ├── stats: [rows=2000, distinct(2)=200, null(2)=20]
 │    │    ├── key: (1)
 │    │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 │    └── projections
 │         └── y + 3 [type=int, outer=(2)]
 └── filters
      └── (v >= 1) AND (v <= 100) [type=bool, outer=(5), constraints=(/5: [/1 - /100]; tight)]

exec-ddl
CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING)
----
TABLE kuv
 ├── k int not null
 ├── u float
 ├── v string
 └── INDEX primary
      └── k int not null

exec-ddl
ALTER TABLE kuv INJECT STATISTICS '[
  {
    "columns": ["k"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["v"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 10
  }
]'
----

# Correlated subquery.
build
SELECT * FROM a WHERE EXISTS (SELECT s < v FROM kuv GROUP BY s < v)
----
select
 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 ├── stats: [rows=666.666667, distinct(1)=666.666667, null(1)=0, distinct(4)=196.531694, null(4)=0]
 ├── key: (1)
 ├── fd: (1)-->(2-4), (3,4)~~>(1,2)
 ├── scan a
 │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(4)=200, null(4)=0]
 │    ├── key: (1)
 │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 └── filters
      └── exists [type=bool, outer=(3), correlated-subquery]
           └── group-by
                ├── columns: column8:8(bool)
                ├── grouping columns: column8:8(bool)
                ├── outer: (3)
                ├── stats: [rows=10, distinct(8)=10, null(8)=0]
                ├── key: (8)
                └── project
                     ├── columns: column8:8(bool)
                     ├── outer: (3)
                     ├── stats: [rows=2000, distinct(8)=10, null(8)=0]
                     ├── scan kuv
                     │    ├── columns: k:5(int!null) u:6(float) v:7(string)
                     │    ├── stats: [rows=2000, distinct(7)=10, null(7)=0]
                     │    ├── key: (5)
                     │    └── fd: (5)-->(6,7)
                     └── projections
                          └── s < v [type=bool, outer=(3,7)]

# Bump up null counts.
exec-ddl
ALTER TABLE a INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 500,
    "null_count": 1000
  },
  {
    "columns": ["s"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 500,
    "null_count": 500
  }
]'
----

build colstat=2 colstat=3 colstat=(2,3)
SELECT y, s FROM a
----
project
 ├── columns: y:2(int) s:3(string)
 ├── stats: [rows=2000, distinct(2)=500, null(2)=1000, distinct(3)=500, null(3)=500, distinct(2,3)=2000, null(2,3)=1250]
 └── scan a
      ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
      ├── stats: [rows=2000, distinct(2)=500, null(2)=1000, distinct(3)=500, null(3)=500, distinct(2,3)=2000, null(2,3)=1250]
      ├── key: (1)
      └── fd: (1)-->(2-4), (3,4)~~>(1,2)

# Test that the stats are calculated correctly for synthesized columns.
build
SELECT * FROM (SELECT concat(s, y::string) FROM a) AS q(v) WHERE v = 'foo'
----
select
 ├── columns: v:5(string!null)
 ├── stats: [rows=0.375, distinct(5)=0.375, null(5)=0]
 ├── fd: ()-->(5)
 ├── project
 │    ├── columns: concat:5(string)
 │    ├── stats: [rows=2000, distinct(5)=2000, null(5)=1250]
 │    ├── scan a
 │    │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    │    ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=1250]
 │    │    ├── key: (1)
 │    │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 │    └── projections
 │         └── concat(s, y::STRING) [type=string, outer=(2,3)]
 └── filters
      └── concat = 'foo' [type=bool, outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)]

build colstat=5 colstat=6 colstat=(5,6)
SELECT NULL, NULLIF(x,y) FROM a
----
project
 ├── columns: "?column?":5(unknown) nullif:6(int)
 ├── stats: [rows=2000, distinct(5)=0, null(5)=2000, distinct(6)=1000, null(6)=1000, distinct(5,6)=1000, null(5,6)=2000]
 ├── fd: ()-->(5)
 ├── scan a
 │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    ├── stats: [rows=2000, distinct(1,2)=1000, null(1,2)=1000]
 │    ├── key: (1)
 │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 └── projections
      ├── null [type=unknown]
      └── CASE x WHEN y THEN NULL ELSE x END [type=int, outer=(1,2)]
