exec-ddl
CREATE TABLE a (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z))
----
TABLE a
 ├── x int not null
 ├── y int
 ├── z float not null
 ├── s string
 ├── INDEX primary
 │    └── x int not null
 └── INDEX secondary
      ├── s string desc
      ├── z float not null
      └── x int not null (storing)

exec-ddl
ALTER TABLE a INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 400
  },
  {
    "columns": ["s"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 10
  },
  {
    "columns": ["s","y","z"],
    "created_at": "2018-01-01 1:40:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 600
  }
]'
----

# No aggregate.
build
SELECT x FROM a GROUP BY x, y
----
project
 ├── columns: x:1(int!null)
 ├── stats: [rows=2000]
 ├── key: (1)
 └── group-by
      ├── columns: x:1(int!null) y:2(int)
      ├── grouping columns: x:1(int!null) y:2(int)
      ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0]
      ├── key: (1)
      ├── fd: (1)-->(2)
      └── project
           ├── columns: x:1(int!null) y:2(int)
           ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0]
           ├── key: (1)
           ├── fd: (1)-->(2)
           └── scan a
                ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
                ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0]
                ├── key: (1)
                └── fd: (1)-->(2-4), (3,4)~~>(1,2)

# Group by single column key.
build
SELECT max(y) FROM a GROUP BY x
----
project
 ├── columns: max:5(int)
 ├── stats: [rows=2000]
 └── group-by
      ├── columns: x:1(int!null) max:5(int)
      ├── grouping columns: x:1(int!null)
      ├── stats: [rows=2000, distinct(1)=2000, null(1)=0]
      ├── key: (1)
      ├── fd: (1)-->(5)
      ├── project
      │    ├── columns: x:1(int!null) y:2(int)
      │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0]
      │    ├── key: (1)
      │    ├── fd: (1)-->(2)
      │    └── scan a
      │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │         ├── stats: [rows=2000, distinct(1)=2000, null(1)=0]
      │         ├── key: (1)
      │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── max [type=int, outer=(2)]
                └── variable: y [type=int]

# Group by non-key.
build
SELECT y, sum(z) FROM a GROUP BY y
----
group-by
 ├── columns: y:2(int) sum:5(float)
 ├── grouping columns: y:2(int)
 ├── stats: [rows=400, distinct(2)=400, null(2)=0]
 ├── key: (2)
 ├── fd: (2)-->(5)
 ├── project
 │    ├── columns: y:2(int) z:3(float!null)
 │    ├── stats: [rows=2000, distinct(2)=400, null(2)=0]
 │    └── scan a
 │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
 │         ├── stats: [rows=2000, distinct(2)=400, null(2)=0]
 │         ├── key: (1)
 │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 └── aggregations
      └── sum [type=float, outer=(3)]
           └── variable: z [type=float]

build
SELECT max(x) FROM a GROUP BY y, z, s
----
project
 ├── columns: max:5(int)
 ├── stats: [rows=600]
 └── group-by
      ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int)
      ├── grouping columns: y:2(int) z:3(float!null) s:4(string)
      ├── stats: [rows=600, distinct(2-4)=600, null(2-4)=0]
      ├── key: (2-4)
      ├── fd: (3,4)~~>(2), (2-4)-->(5)
      ├── scan a
      │    ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │    ├── stats: [rows=2000, distinct(2-4)=600, null(2-4)=0]
      │    ├── key: (1)
      │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── max [type=int, outer=(1)]
                └── variable: x [type=int]

build
SELECT min(x) FROM a GROUP BY y, z
----
project
 ├── columns: min:5(int)
 ├── stats: [rows=2000]
 └── group-by
      ├── columns: y:2(int) z:3(float!null) min:5(int)
      ├── grouping columns: y:2(int) z:3(float!null)
      ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0]
      ├── key: (2,3)
      ├── fd: (2,3)-->(5)
      ├── project
      │    ├── columns: x:1(int!null) y:2(int) z:3(float!null)
      │    ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0]
      │    ├── key: (1)
      │    ├── fd: (1)-->(2,3)
      │    └── scan a
      │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │         ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0]
      │         ├── key: (1)
      │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── min [type=int, outer=(1)]
                └── variable: x [type=int]

build
SELECT max(x) FROM a GROUP BY y, z, s HAVING s IN ('a', 'b')
----
project
 ├── columns: max:5(int)
 ├── stats: [rows=120]
 └── select
      ├── columns: y:2(int) z:3(float!null) s:4(string!null) max:5(int)
      ├── stats: [rows=120, distinct(3)=97.6, null(3)=0, distinct(4)=2, null(4)=0]
      ├── key: (3,4)
      ├── fd: (3,4)-->(2), (2-4)-->(5)
      ├── group-by
      │    ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int)
      │    ├── grouping columns: y:2(int) z:3(float!null) s:4(string)
      │    ├── stats: [rows=600, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=0, distinct(2-4)=600, null(2-4)=0]
      │    ├── key: (2-4)
      │    ├── fd: (3,4)~~>(2), (2-4)-->(5)
      │    ├── scan a
      │    │    ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │    │    ├── stats: [rows=2000, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=0, distinct(2-4)=600, null(2-4)=0]
      │    │    ├── key: (1)
      │    │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      │    └── aggregations
      │         └── max [type=int, outer=(1)]
      │              └── variable: x [type=int]
      └── filters
           └── s IN ('a', 'b') [type=bool, outer=(4), constraints=(/4: [/'a' - /'a'] [/'b' - /'b']; tight)]

# Estimate the distinct count for an aggregate column.
build
SELECT sum(x), s FROM a GROUP BY s HAVING sum(x) = 5
----
select
 ├── columns: sum:5(decimal!null) s:4(string)
 ├── stats: [rows=1, distinct(5)=1, null(5)=0]
 ├── key: (4)
 ├── fd: ()-->(5)
 ├── group-by
 │    ├── columns: s:4(string) sum:5(decimal)
 │    ├── grouping columns: s:4(string)
 │    ├── stats: [rows=10, distinct(4)=10, null(4)=0, distinct(5)=10, null(5)=0]
 │    ├── key: (4)
 │    ├── fd: (4)-->(5)
 │    ├── project
 │    │    ├── columns: x:1(int!null) s:4(string)
 │    │    ├── stats: [rows=2000, distinct(4)=10, null(4)=0]
 │    │    ├── key: (1)
 │    │    ├── fd: (1)-->(4)
 │    │    └── scan a
 │    │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
 │    │         ├── stats: [rows=2000, distinct(4)=10, null(4)=0]
 │    │         ├── key: (1)
 │    │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 │    └── aggregations
 │         └── sum [type=decimal, outer=(1)]
 │              └── variable: x [type=int]
 └── filters
      └── sum = 5 [type=bool, outer=(5), constraints=(/5: [/5 - /5]; tight), fd=()-->(5)]

# Scalar GroupBy.
build
SELECT max(y), sum(z) FROM a HAVING sum(z) = 5.0
----
select
 ├── columns: max:5(int) sum:6(float!null)
 ├── cardinality: [0 - 1]
 ├── stats: [rows=1, distinct(6)=1, null(6)=0]
 ├── key: ()
 ├── fd: ()-->(5,6)
 ├── scalar-group-by
 │    ├── columns: max:5(int) sum:6(float)
 │    ├── cardinality: [1 - 1]
 │    ├── stats: [rows=1, distinct(6)=1, null(6)=0]
 │    ├── key: ()
 │    ├── fd: ()-->(5,6)
 │    ├── project
 │    │    ├── columns: y:2(int) z:3(float!null)
 │    │    ├── stats: [rows=2000]
 │    │    └── scan a
 │    │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
 │    │         ├── stats: [rows=2000]
 │    │         ├── key: (1)
 │    │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 │    └── aggregations
 │         ├── max [type=int, outer=(2)]
 │         │    └── variable: y [type=int]
 │         └── sum [type=float, outer=(3)]
 │              └── variable: z [type=float]
 └── filters
      └── sum = 5.0 [type=bool, outer=(6), constraints=(/6: [/5.0 - /5.0]; tight), fd=()-->(6)]

# Bump up null counts.
exec-ddl
ALTER TABLE a INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 400,
    "null_count": 1000
  },
  {
    "columns": ["s"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 10,
    "null_count": 1000
  },
  {
    "columns": ["s","y","z"],
    "created_at": "2018-01-01 2:10:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 600,
    "null_count": 1100
  }
]'
----

# No aggregate.
build
SELECT x FROM a GROUP BY x, y
----
project
 ├── columns: x:1(int!null)
 ├── stats: [rows=1000]
 ├── key: (1)
 └── group-by
      ├── columns: x:1(int!null) y:2(int)
      ├── grouping columns: x:1(int!null) y:2(int)
      ├── stats: [rows=1000, distinct(1,2)=1000, null(1,2)=1000]
      ├── key: (1)
      ├── fd: (1)-->(2)
      └── project
           ├── columns: x:1(int!null) y:2(int)
           ├── stats: [rows=2000, distinct(1,2)=1000, null(1,2)=1000]
           ├── key: (1)
           ├── fd: (1)-->(2)
           └── scan a
                ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
                ├── stats: [rows=2000, distinct(1,2)=1000, null(1,2)=1000]
                ├── key: (1)
                └── fd: (1)-->(2-4), (3,4)~~>(1,2)

# Group by single column key.
build
SELECT max(y) FROM a GROUP BY x
----
project
 ├── columns: max:5(int)
 ├── stats: [rows=2000]
 └── group-by
      ├── columns: x:1(int!null) max:5(int)
      ├── grouping columns: x:1(int!null)
      ├── stats: [rows=2000, distinct(1)=2000, null(1)=0]
      ├── key: (1)
      ├── fd: (1)-->(5)
      ├── project
      │    ├── columns: x:1(int!null) y:2(int)
      │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0]
      │    ├── key: (1)
      │    ├── fd: (1)-->(2)
      │    └── scan a
      │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │         ├── stats: [rows=2000, distinct(1)=2000, null(1)=0]
      │         ├── key: (1)
      │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── max [type=int, outer=(2)]
                └── variable: y [type=int]

# Group by non-key.
build
SELECT y, sum(z) FROM a GROUP BY y
----
group-by
 ├── columns: y:2(int) sum:5(float)
 ├── grouping columns: y:2(int)
 ├── stats: [rows=400, distinct(2)=400, null(2)=400]
 ├── key: (2)
 ├── fd: (2)-->(5)
 ├── project
 │    ├── columns: y:2(int) z:3(float!null)
 │    ├── stats: [rows=2000, distinct(2)=400, null(2)=1000]
 │    └── scan a
 │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
 │         ├── stats: [rows=2000, distinct(2)=400, null(2)=1000]
 │         ├── key: (1)
 │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 └── aggregations
      └── sum [type=float, outer=(3)]
           └── variable: z [type=float]

build
SELECT max(x) FROM a GROUP BY y, z, s
----
project
 ├── columns: max:5(int)
 ├── stats: [rows=600]
 └── group-by
      ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int)
      ├── grouping columns: y:2(int) z:3(float!null) s:4(string)
      ├── stats: [rows=600, distinct(2-4)=600, null(2-4)=600]
      ├── key: (2-4)
      ├── fd: (3,4)~~>(2), (2-4)-->(5)
      ├── scan a
      │    ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │    ├── stats: [rows=2000, distinct(2-4)=600, null(2-4)=1100]
      │    ├── key: (1)
      │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── max [type=int, outer=(1)]
                └── variable: x [type=int]

build
SELECT min(x) FROM a GROUP BY y, z
----
project
 ├── columns: min:5(int)
 ├── stats: [rows=2000]
 └── group-by
      ├── columns: y:2(int) z:3(float!null) min:5(int)
      ├── grouping columns: y:2(int) z:3(float!null)
      ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=1000]
      ├── key: (2,3)
      ├── fd: (2,3)-->(5)
      ├── project
      │    ├── columns: x:1(int!null) y:2(int) z:3(float!null)
      │    ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=1000]
      │    ├── key: (1)
      │    ├── fd: (1)-->(2,3)
      │    └── scan a
      │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │         ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=1000]
      │         ├── key: (1)
      │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      └── aggregations
           └── min [type=int, outer=(1)]
                └── variable: x [type=int]

build
SELECT max(x) FROM a GROUP BY y, z, s HAVING s IN ('a', 'b')
----
project
 ├── columns: max:5(int)
 ├── stats: [rows=118.9]
 └── select
      ├── columns: y:2(int) z:3(float!null) s:4(string!null) max:5(int)
      ├── stats: [rows=118.9, distinct(3)=97.6, null(3)=0, distinct(4)=2, null(4)=0]
      ├── key: (3,4)
      ├── fd: (3,4)-->(2), (2-4)-->(5)
      ├── group-by
      │    ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int)
      │    ├── grouping columns: y:2(int) z:3(float!null) s:4(string)
      │    ├── stats: [rows=600, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=5.5, distinct(2-4)=600, null(2-4)=600]
      │    ├── key: (2-4)
      │    ├── fd: (3,4)~~>(2), (2-4)-->(5)
      │    ├── scan a
      │    │    ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
      │    │    ├── stats: [rows=2000, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=1000, distinct(2-4)=600, null(2-4)=1100]
      │    │    ├── key: (1)
      │    │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
      │    └── aggregations
      │         └── max [type=int, outer=(1)]
      │              └── variable: x [type=int]
      └── filters
           └── s IN ('a', 'b') [type=bool, outer=(4), constraints=(/4: [/'a' - /'a'] [/'b' - /'b']; tight)]

# Estimate the null count for an aggregate column.
build
SELECT sum(x), s FROM a GROUP BY s HAVING sum(x) = 5
----
select
 ├── columns: sum:5(decimal!null) s:4(string)
 ├── stats: [rows=0.9, distinct(5)=0.9, null(5)=0]
 ├── key: (4)
 ├── fd: ()-->(5)
 ├── group-by
 │    ├── columns: s:4(string) sum:5(decimal)
 │    ├── grouping columns: s:4(string)
 │    ├── stats: [rows=10, distinct(4)=10, null(4)=10, distinct(5)=10, null(5)=1]
 │    ├── key: (4)
 │    ├── fd: (4)-->(5)
 │    ├── project
 │    │    ├── columns: x:1(int!null) s:4(string)
 │    │    ├── stats: [rows=2000, distinct(4)=10, null(4)=1000]
 │    │    ├── key: (1)
 │    │    ├── fd: (1)-->(4)
 │    │    └── scan a
 │    │         ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string)
 │    │         ├── stats: [rows=2000, distinct(4)=10, null(4)=1000]
 │    │         ├── key: (1)
 │    │         └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 │    └── aggregations
 │         └── sum [type=decimal, outer=(1)]
 │              └── variable: x [type=int]
 └── filters
      └── sum = 5 [type=bool, outer=(5), constraints=(/5: [/5 - /5]; tight), fd=()-->(5)]

# Regression test for #36442.
norm
WITH q (a, b) AS (SELECT * FROM (VALUES (true, NULL), (false, NULL), (true, 5)))
  SELECT 1
    FROM q
   WHERE q.a
GROUP BY q.b
  HAVING bool_or(q.a)
----
project
 ├── columns: "?column?":4(int!null)
 ├── cardinality: [0 - 3]
 ├── stats: [rows=0]
 ├── fd: ()-->(4)
 ├── select
 │    ├── columns: column2:2(int) bool_or:3(bool!null)
 │    ├── cardinality: [0 - 3]
 │    ├── stats: [rows=0, distinct(3)=0, null(3)=0]
 │    ├── key: (2)
 │    ├── fd: ()-->(3)
 │    ├── group-by
 │    │    ├── columns: column2:2(int) bool_or:3(bool)
 │    │    ├── grouping columns: column2:2(int)
 │    │    ├── cardinality: [0 - 3]
 │    │    ├── stats: [rows=0.875, distinct(2)=0.875, null(2)=0.875, distinct(3)=0.875, null(3)=0.875]
 │    │    ├── key: (2)
 │    │    ├── fd: (2)-->(3)
 │    │    ├── select
 │    │    │    ├── columns: column1:1(bool!null) column2:2(int)
 │    │    │    ├── cardinality: [0 - 3]
 │    │    │    ├── stats: [rows=1.5, distinct(1)=1, null(1)=0, distinct(2)=0.875, null(2)=1]
 │    │    │    ├── fd: ()-->(1)
 │    │    │    ├── values
 │    │    │    │    ├── columns: column1:1(bool) column2:2(int)
 │    │    │    │    ├── cardinality: [3 - 3]
 │    │    │    │    ├── stats: [rows=3, distinct(1)=2, null(1)=0, distinct(2)=1, null(2)=2]
 │    │    │    │    ├── (true, NULL) [type=tuple{bool, int}]
 │    │    │    │    ├── (false, NULL) [type=tuple{bool, int}]
 │    │    │    │    └── (true, 5) [type=tuple{bool, int}]
 │    │    │    └── filters
 │    │    │         └── variable: column1 [type=bool, outer=(1), constraints=(/1: [/true - /true]; tight), fd=()-->(1)]
 │    │    └── aggregations
 │    │         └── bool-or [type=bool, outer=(1)]
 │    │              └── variable: column1 [type=bool]
 │    └── filters
 │         └── variable: bool_or [type=bool, outer=(3), constraints=(/3: [/true - /true]; tight), fd=()-->(3)]
 └── projections
      └── const: 1 [type=int]
