# LogicTest: 5node-dist 5node-dist-opt 5node-dist-metadata

# Disable automatic stats.
statement ok
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false

statement ok
CREATE TABLE data (a INT, b INT, c FLOAT, d DECIMAL, PRIMARY KEY (a, b, c, d), INDEX c_idx (c, d))

# Prevent the merge queue from immediately discarding our splits.
statement ok
SET CLUSTER SETTING kv.range_merge.queue_enabled = false;

# Split into ten parts.
statement ok
ALTER TABLE data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)

# Relocate the ten parts to the five nodes.
statement ok
ALTER TABLE data EXPERIMENTAL_RELOCATE
  SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)

# Generate all combinations of values 1 to 10.
statement ok
INSERT INTO data SELECT a, b, c::FLOAT, d::DECIMAL FROM
   generate_series(1, 10) AS a(a),
   generate_series(1, 10) AS b(b),
   generate_series(1, 10) AS c(c),
   generate_series(1, 10) AS d(d)

# Verify data placement.
query TTTI colnames
SELECT start_key, end_key, replicas, lease_holder FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE data]
----
start_key  end_key  replicas  lease_holder
NULL       /1       {1}       1
/1         /2       {2}       2
/2         /3       {3}       3
/3         /4       {4}       4
/4         /5       {5}       5
/5         /6       {1}       1
/6         /7       {2}       2
/7         /8       {3}       3
/8         /9       {4}       4
/9         NULL     {5}       5

statement ok
CREATE STATISTICS s1 ON a FROM data

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
s1               {a}           10000      10              0

# TODO(radu): reenable when we support histograms.
#
# let $hist_id_1
# SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE data] WHERE statistics_name = 's1'
# 
# query TII colnames
# SHOW HISTOGRAM $hist_id_1
# ----
# upper_bound  range_rows  equal_rows
# 1            0           1000
# 2            0           1000
# 3            0           1000
# 4            0           1000
# 5            0           1000
# 6            0           1000
# 7            0           1000
# 8            0           1000
# 9            0           1000
# 10           0           1000

statement ok
CREATE STATISTICS "" ON b FROM data

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
s1               {a}           10000      10              0
NULL             {b}           10000      10              0

# Verify that we can package statistics into a json object and later restore them.
let $json_stats
SHOW STATISTICS USING JSON FOR TABLE data

statement ok
DELETE FROM system.table_statistics

statement ok
ALTER TABLE data INJECT STATISTICS '$json_stats'

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
s1               {a}           10000      10              0
NULL             {b}           10000      10              0

# Verify that any other statistics are blown away when we INJECT.
statement ok
CREATE STATISTICS s3 ON c FROM data

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
s1               {a}           10000      10              0
NULL             {b}           10000      10              0
s3               {c}           10000      10              0

statement ok
ALTER TABLE data INJECT STATISTICS '$json_stats'

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
s1               {a}           10000      10              0
NULL             {b}           10000      10              0

# Test AS OF SYSTEM TIME

statement error pgcode 42P01 relation "data" does not exist
CREATE STATISTICS s2 ON a FROM data AS OF SYSTEM TIME '2017'

statement ok
CREATE STATISTICS s2 ON a FROM data AS OF SYSTEM TIME '-1us'

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
NULL             {b}           10000      10              0
s2               {a}           10000      10              0

#
# Test default column statistics
#

statement ok
CREATE STATISTICS s3 FROM data

query TIII colnames
SELECT column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE data]
WHERE statistics_name = 's3'
----
column_names  row_count  distinct_count  null_count
{a}           10000      10              0
{c}           10000      10              0
{b}           10000      10              0
{d}           10000      10              0

# Add indexes, including duplicate index on column c.
statement ok
CREATE INDEX ON data (c DESC, b ASC); CREATE INDEX ON data (b DESC)

statement ok
CREATE STATISTICS s4 FROM data

# Check that stats are only collected once per column.
query TIII colnames
SELECT column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE data]
WHERE statistics_name = 's4'
----
column_names  row_count  distinct_count  null_count
{a}           10000      10              0
{c}           10000      10              0
{b}           10000      10              0
{d}           10000      10              0

statement ok
DROP INDEX data@c_idx; DROP INDEX data@data_c_b_idx

statement ok
CREATE STATISTICS s5 FROM [53]

# We should still get stats for column c, but now column c is added later as a
# non-index column, resulting in a different ordering of the rows.
query TIII colnames
SELECT column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE data]
WHERE statistics_name = 's5'
----
column_names  row_count  distinct_count  null_count
{a}           10000      10              0
{b}           10000      10              0
{c}           10000      10              0
{d}           10000      10              0

# Table with a hidden primary key and no other indexes.
statement ok
CREATE TABLE simple (x INT, y INT)

statement ok
CREATE STATISTICS default_stat1 FROM simple

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE simple]
----
statistics_name  column_names  row_count  distinct_count  null_count
default_stat1    {rowid}       0          0               0
default_stat1    {x}           0          0               0
default_stat1    {y}           0          0               0

# Add one null row.
statement ok
INSERT INTO simple VALUES (DEFAULT, DEFAULT)

# Add an index.
statement ok
CREATE UNIQUE INDEX ON simple (y) STORING (x)

statement ok
CREATE STATISTICS default_stat2 FROM simple

# Now stats are collected on the index column y before column x.
query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE simple]
----
statistics_name  column_names  row_count  distinct_count  null_count
default_stat2    {rowid}       1          1               0
default_stat2    {y}           1          0               1
default_stat2    {x}           1          0               1

#
# Test numeric references
#

statement ok
CREATE STATISTICS s6 ON a FROM [53]

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE data]
----
statistics_name  column_names  row_count  distinct_count  null_count
s5               {b}           10000      10              0
s5               {c}           10000      10              0
s5               {d}           10000      10              0
s6               {a}           10000      10              0

# Combine default columns and numeric reference.
statement ok
CREATE STATISTICS __auto__ FROM [53]

query TIII colnames
SELECT column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE data]
WHERE statistics_name = '__auto__'
----
column_names  row_count  distinct_count  null_count
{a}           10000      10              0
{b}           10000      10              0
{c}           10000      10              0
{d}           10000      10              0

#
# Test delete stats
#

statement ok
DROP INDEX data@data_b_idx

statement ok
CREATE STATISTICS __auto__ FROM [53];
CREATE STATISTICS __auto__ FROM [53];
CREATE STATISTICS __auto__ FROM [53];
CREATE STATISTICS __auto__ FROM [53];
CREATE STATISTICS __auto__ FROM [53];
CREATE STATISTICS __auto__ FROM [53];

# Only the last 4-5 automatic stats should remain for each column.
query TT colnames
SELECT statistics_name, column_names
FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING
----
statistics_name  column_names
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {d}
__auto__         {d}
__auto__         {d}
__auto__         {d}
__auto__         {d}

statement ok
CREATE STATISTICS s7 ON a FROM [53]

query TT colnames
SELECT statistics_name, column_names
FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING
----
statistics_name  column_names
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {d}
__auto__         {d}
__auto__         {d}
__auto__         {d}
__auto__         {d}
s7               {a}

statement ok
CREATE STATISTICS s8 ON a FROM [53]

# s7 is deleted but the automatic stats remain.
query TT colnames
SELECT statistics_name, column_names
FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING
----
statistics_name  column_names
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {a}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {b}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {c}
__auto__         {d}
__auto__         {d}
__auto__         {d}
__auto__         {d}
__auto__         {d}
s8               {a}

# Regression test for #33195.
statement ok
CREATE TABLE t (x int); INSERT INTO t VALUES (1); ALTER TABLE t DROP COLUMN x

# Ensure that creating stats on a table with no columns does not cause a panic.
statement ok
CREATE STATISTICS s FROM t

# Regression test for #35150.
statement ok
CREATE TABLE groups (data JSON); INSERT INTO groups VALUES ('{"data": {"domain": "github.com"}}')

# Ensure that trying to create statistics on a JSON column gives an appropriate error.
statement error pq: CREATE STATISTICS is not supported for JSON columns
CREATE STATISTICS s ON data FROM groups

# The json column is not included in the default columns.
statement ok
CREATE STATISTICS s FROM groups

query TT colnames
SELECT statistics_name, column_names
FROM [SHOW STATISTICS FOR TABLE groups] ORDER BY statistics_name, column_names::STRING
----
statistics_name  column_names
s                {rowid}

# Regression test for #35764.
statement ok
CREATE TABLE users (
  profile_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  last_updated TIMESTAMP DEFAULT now(),
  user_profile JSONB,
  INVERTED INDEX user_details (user_profile)
)

statement ok
INSERT INTO users (user_profile) VALUES
  ('{"first_name": "Lola", "last_name": "Dog", "location": "NYC", "online" : true, "friends" : 547}'),
  ('{"first_name": "Ernie", "status": "Looking for treats", "location" : "Brooklyn"}'),
  ('{"first_name": "Carl", "last_name": "Kimball", "location": "NYC", "breed": "Boston Terrier"}'
)

# Ensure that trying to create statistics with default columns does not fail
# when there is an inverted index.
statement ok
CREATE STATISTICS s FROM users

query TTI colnames
SELECT statistics_name, column_names, row_count
FROM [SHOW STATISTICS FOR TABLE users] ORDER BY statistics_name, column_names::STRING
----
statistics_name  column_names    row_count
s                {last_updated}  3
s                {profile_id}    3

# Arrays are supported.
statement ok
CREATE TABLE arr (x INT[])

statement ok
INSERT INTO arr VALUES (ARRAY[1,2]), (ARRAY[1,2]), (ARRAY[3,4]), (NULL)

statement ok
CREATE STATISTICS arr_stats FROM arr

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE arr] ORDER BY statistics_name, column_names::STRING
----
statistics_name  column_names  row_count  distinct_count  null_count
arr_stats        {rowid}       4          4               0
arr_stats        {x}           4          2               1
