# LogicTest: local local-opt fakedist fakedist-opt

# The following tables form the interleaved hierarchy:
#   name:             primary key:                # rows:   'a' = id mod X :
#   parent1           (pid1)                      40        8
#     child1          (pid1, cid1)                150       66
#       grandchild1   (pid1, cid1, gcid1)         410       201
#     child2          (pid1, cid2, cid3)          15        7
#       grandchild2   (pid1, cid2, cid3, gcid2)   51        13
#   parent2           (pid2)                      5         2
# Additional rows in child1, child2, and grandchild1 with no corresponding
# parent row are also inserted.
#
# All IDs belonging to a table (pid1 --> parent1, cid1 --> child1, cid2,cid3
# --> child2, etc.) start from 1 up to (# rows).
# Foreign keys are modded by their ancestor's (# rows). For example, for child1
# row with cid1=500, we take ((cid1-1) % 200 + 1) = 100 as pid1.
# One exception is cid3, which is taken as cid2 % 15.
# There's a column 'a' that's modded by a factor.
#
# This allows us to test the following edge cases (in order of tests):
#   - one-to-many (parent1 - child1)
#   - one-to-one and one-to-none (parent1 - child2)
#   - parent-grandchild (parent1 - grandchild1)
#   - multiple interleaved columns (child2 - grandchild2)
#   - additional ancestor above (child2 - grandchild2)
#   - no interleaved relationship (parent1 - parent2, parent2 - child1)
#   - TODO(richardwu): sibling-sibling (child1 - child2)

#################
# Create tables #
#################

statement ok
CREATE TABLE parent1 (pid1 INT PRIMARY KEY, pa1 INT)

statement ok
CREATE TABLE parent2 (pid2 INT PRIMARY KEY, pa2 INT)

statement ok
CREATE TABLE child1 (
  pid1 INT,
  cid1 INT,
  ca1 INT,
  PRIMARY KEY(pid1, cid1)
)
INTERLEAVE IN PARENT parent1 (pid1)

statement ok
CREATE TABLE child2 (
  pid1 INT,
  cid2 INT,
  cid3 INT,
  ca2 INT,
  PRIMARY KEY(pid1, cid2, cid3)
)
INTERLEAVE IN PARENT parent1 (pid1)

statement ok
CREATE TABLE grandchild1 (
  pid1 INT,
  cid1 INT,
  gcid1 INT,
  gca1 INT,
  PRIMARY KEY(pid1, cid1, gcid1)
)
INTERLEAVE IN PARENT child1 (pid1, cid1)

# No foreign key since we are permitting the rows to overflow out of child2
# for pid1 > 15.
statement ok
CREATE TABLE grandchild2 (
  pid1 INT,
  cid2 INT,
  cid3 INT,
  gcid2 INT,
  gca2 INT,
  PRIMARY KEY(pid1, cid2, cid3, gcid2)
)
INTERLEAVE IN PARENT child2 (pid1, cid2, cid3)

####################
# Insert some rows #
####################

statement ok
INSERT INTO parent1 SELECT
  pid,
  mod(pid, 8)
FROM
  generate_series(1, 40) AS ID(pid)

statement ok
INSERT INTO parent2 SELECT
  pid,
  mod(pid, 2)
FROM
  generate_series(1, 5) AS ID(pid)

# child1 has more rows than parent1.
statement ok
INSERT INTO child1 SELECT
  mod(cid-1, 40) + 1,
  cid,
  mod(cid, 66)
FROM
  generate_series(1, 150) AS ID(cid)

# Insert additional rows with no correspond parent rows to check for correctness.
statement ok
INSERT INTO child1 VALUES
  (-1, -1, -1),
  (0, 0, 0),
  (41, 41, 41),
  (151, 151, 19),
  (160, 160, 28)

# child2 has fewer rows than parent1.
statement ok
INSERT INTO child2 SELECT
  mod(cid-1, 40) + 1,
  cid,
  mod(cid, 14),
  mod(cid, 7)
FROM
  generate_series(1, 15) AS ID(cid)

statement ok
INSERT INTO child2 VALUES
  (-1, -1, -1, -1),
  (0, 0, 0, 0),
  (16, 16, 2, 2),
  (20, 20, 6, 6)

statement ok
INSERT INTO grandchild1 SELECT
  mod(mod(gcid-1, 150), 40) + 1,
  mod(gcid-1, 150) + 1,
  gcid,
  mod(gcid, 201)
FROM
  generate_series(1, 410) AS ID(gcid)

statement ok
INSERT INTO grandchild1 VALUES
  (-1, -1, -1, -1),
  (0, 0, 0, 0),
  (200, 200, 200, 200),
  (411, 411, 411, 9)


# We let grandchild2.pid1 exceed child2.pid1 (one of the interleaved keys).
# So instead of
#     (gcid1 - 1) % 15 % 40 + 1
# we choose to only mod by 40 (nrows of parent1) instead of first modding
# by 15 (nrows of child2).
statement ok
INSERT INTO grandchild2 SELECT
  mod(gcid-1, 40) + 1,
  mod(gcid-1, 15) + 1,
  mod(mod(gcid-1, 15) + 1, 14),
  gcid,
  mod(gcid, 13)
FROM
  generate_series(1, 51) AS ID(gcid)

#####################
# Interleaved joins #
#####################

# Select over two ranges for parent/child with split at children key.
# Returns:
#   pid1    pa1         cid1      ca1
#           (pid1 % 8)           (cid1 % 66)
query IIII rowsort,colnames
SELECT * FROM parent1 JOIN child1 USING(pid1) WHERE pid1 >= 3 AND pid1 <= 5
----
pid1  pa1  cid1  ca1
3     3    3     3
3     3    43    43
3     3    83    17
3     3    123   57
4     4    4     4
4     4    44    44
4     4    84    18
4     4    124   58
5     5    5     5
5     5    45    45
5     5    85    19
5     5    125   59

# Swap parent1 and child1 tables.

query IIII rowsort,colnames
SELECT * FROM child1 JOIN parent1 USING(pid1) WHERE pid1 >= 3 AND pid1 <= 5
----
pid1  cid1  ca1  pa1
3     3     3    3
3     43    43   3
3     83    17   3
3     123   57   3
4     4     4    4
4     44    44   4
4     84    18   4
4     124   58   4
5     5     5    5
5     45    45   5
5     85    19   5
5     125   59   5

# Select over two ranges for parent/child with split at grandchild key.
# Also, rows with pid1 <= 30 should have 4 rows whereas pid1 > 30 should
# have 3 rows.
# Returns:
#   parent1.pid1 pa1          child1.pid1     cid1        ca1
#                (pid1 % 8)                             (cid1 % 66)
query IIIII colnames
SELECT * FROM parent1 JOIN child1 ON parent1.pid1 = child1.pid1 WHERE parent1.pid1 >= 29 AND parent1.pid1 <= 31 ORDER BY parent1.pid1
----
pid1 pa1 pid1 cid1 ca1
29    5    29    29    29
29    5    29    69    3
29    5    29    109   43
29    5    29    149   17
30    6    30    30    30
30    6    30    70    4
30    6    30    110   44
30    6    30    150   18
31    7    31    31    31
31    7    31    71    5
31    7    31    111   45

# parent-child where pid1 <= 15 have one joined row and pid1 > 15 have no
# joined rows (since child2 only has 15 rows up to pid1 = 15).
# Returns:
#   pid1    pa1           cid2      cid3              ca2
#           (pid1 % 8)              (cid2 % 14)       (cid2 % 7)
query IIIII rowsort,colnames
SELECT * FROM parent1 JOIN child2 USING(pid1) WHERE pid1 >= 12
----
pid1 pa1 cid2 cid3 ca2
12  4  12  12  5
13  5  13  13  6
14  6  14  0   0
15  7  15  1   1
16  0  16  2   2
20  4  20  6   6

# Single gateway node query (node 1).
# Returns:
#   pid1    pa1           cid2      cid3              ca2
#           (pid1 % 8)              (cid2 % 14)       (cid2 % 7)
# Note pid=21, 31 has no joined rows since child2 only has pid1 <= 15.
query IIIII rowsort,colnames
SELECT * FROM parent1 JOIN child2 USING(pid1) WHERE pid1 IN (1, 11, 21, 31)
----
pid1  pa1  cid2  cid3  ca2
1     1    1     1     1
11    3    11    11    4

# Parent-grandchild.
# Returns:
#   pid1    pa2           cid2    cid3          gcid2       gca2
#           (pid1 % 8)            (cid2 % 14)               (gcid2 % 13)
# Where clause is on ranges that overlap children, grandchildren, and parent
# key splits, respectively.
# Rows with pid >= 11 have only one joined row since there are only 51
# granchild2 rows.
query IIIIII rowsort,colnames
SELECT * FROM parent1 JOIN grandchild2 USING(pid1) WHERE
  pid1 >= 11 AND pid1 <= 13
  OR pid1 >= 19 AND pid1 <= 21
  OR pid1 >= 31 AND pid1 <= 33
----
pid1  pa1  cid2  cid3  gcid2  gca2
11    3    6     6     51     12
11    3    11    11    11     11
12    4    12    12    12     12
13    5    13    13    13     0
19    3    4     4     19     6
20    4    5     5     20     7
21    5    6     6     21     8
31    7    1     1     31     5
32    0    2     2     32     6
33    1    3     3     33     7

# Swap parent1 and grandchild2 positions.
query IIIIII rowsort,colnames
SELECT * FROM grandchild2 JOIN parent1 USING(pid1) WHERE
  pid1 >= 11 AND pid1 <= 13
  OR pid1 >= 19 AND pid1 <= 21
  OR pid1 >= 31 AND pid1 <= 33
----
pid1  cid2  cid3  gcid2  gca2 pa1
11    6     6     51     12   3
11    11    11    11     11   3
12    12    12    12     12   4
13    13    13    13     0    5
19    4     4     19     6    3
20    5     5     20     7    4
21    6     6     21     8    5
31    1     1     31     5    7
32    2     2     32     6    0
33    3     3     33     7    1

# Join on multiple interleaved columns with an overarching ancestor (parent1).
# Returns:
#   child2.pid1   gc2.pid1        child2.cid2   gc2.cid2  child2.cid3   gc2.cid3      child2.ca2        gcid2         gca2
#                 (gcid2 % 40)                            (cid2 % 14)                                                 (gcid2 % 13)
query IIIIIIIII colnames,rowsort
SELECT * FROM child2 JOIN grandchild2 ON
  child2.pid1=grandchild2.pid1
  AND child2.cid2=grandchild2.cid2
  AND child2.cid3=grandchild2.cid3
WHERE
  child2.pid1 >= 5 AND child2.pid1 <= 7
  OR child2.cid2 >= 12 AND child2.cid2 <= 14
  OR gcid2 >= 49 AND gcid2 <= 51
----
pid1  cid2  cid3  ca2  pid1  cid2  cid3  gcid2  gca2
5     5     5     5    5     5     5     5      5
6     6     6     6    6     6     6     6      6
7     7     7     0    7     7     7     7      7
12    12    12    5    12    12    12    12     12
13    13    13    6    13    13    13    13     0
14    14    0     0    14    14    0     14     1

# Aggregation over parent and child keys.
# There are 4 rows for each 10 <= pid1 <= 30 and 3 rows for each 30 < pid1 <=
# 39.
# We thus have 3 arithmetic series of 10 + ... + 39 and 1 arithmetic series
# of 10 + ... + 30 or
#     sum(pid1) = 3 * (39 - 10 + 1) * (10 + 39)/2 + (30 - 10 + 1) * (10 + 30)/2 = 2625
# For sum(cid1), we notice that pid1 = cid1 % 40, thus for every additional
# round of rows under a pid1, cid1 is increased by 40.
# For each additional round up to the 3rd (2 rounds after the first where 50 <= cid1 <= 79,
# 90 <= cid1 <= 119) , we have an additional
#     40 * (1 + 2) * (39 - 10 + 1) = 3600
# For the 4th round, we have 150 - 130 + 1 = 21 rows (130 <= cid1 <= 150) each
# additional row adds 120, thus
#     sum(cid1) = sum(pid1) + 3600 + 21 * 120 = 8745
# For each
query RR
SELECT sum(parent1.pid1), sum(child1.cid1) FROM parent1 JOIN child1 USING(pid1) WHERE
  pid1 >= 10 AND pid1 <= 39
----
2625 8745

###############
# Outer joins #
###############

# The schema/values for each table are as follows:
# Table:        pkey:                     pkey values (same):   values:
# outer_p1      (pid1)                    {1, 2, 3, ... 20}     100 + pkey
# outer_c1      (pid1, cid1, cid2)        {2, 4, 6, ... 28}     200 + pkey
# outer_gc1     (pid1, cid1, cid2, gcid1) {4, 8, 12, ... 36}    300 + pkey

# Split between 4 nodes based on pkey value (p):
# node 1:       p - 1 mod 20 ∈ [1...5)
# node 2:       p - 1 mod 20 ∈ [5...10)
# node 3:       p - 1 mod 20 ∈ [10...15)
# node 4:       p - 1 mod 20 ∈ [15...20)

statement ok
CREATE TABLE outer_p1 (
  pid1 INT PRIMARY KEY,
  pa1 INT
)

statement ok
CREATE TABLE outer_c1 (
  pid1 INT,
  cid1 INT,
  cid2 INT,
  ca1 INT,
  PRIMARY KEY (pid1, cid1, cid2)
) INTERLEAVE IN PARENT outer_p1 (pid1)

statement ok
CREATE TABLE outer_gc1 (
  pid1 INT,
  cid1 INT,
  cid2 INT,
  gcid1 INT,
  gca1 INT,
  PRIMARY KEY (pid1, cid1, cid2, gcid1)
) INTERLEAVE IN PARENT outer_c1 (pid1, cid1, cid2)

statement ok
INSERT INTO outer_p1
  SELECT i, i+100 FROM generate_series(1, 20) AS g(i)

statement ok
INSERT INTO outer_c1
  SELECT i, i, i, i+200 FROM generate_series(-2, 28, 2) AS g(i)

statement ok
INSERT INTO outer_gc1
  SELECT i, i, i, i, i+300 FROM generate_series(-4, 36, 4) AS g(i)

### Begin OUTER queries

query IIIII rowsort,colnames
SELECT * FROM outer_p1 FULL OUTER JOIN outer_c1 USING (pid1)
----
pid1  pa1  cid1  cid2  ca1
-2    NULL  -2    -2    198
0     NULL  0     0     200
1     101   NULL  NULL  NULL
2     102   2     2     202
3     103   NULL  NULL  NULL
4     104   4     4     204
5     105   NULL  NULL  NULL
6     106   6     6     206
7     107   NULL  NULL  NULL
8     108   8     8     208
9     109   NULL  NULL  NULL
10    110   10    10    210
11    111   NULL  NULL  NULL
12    112   12    12    212
13    113   NULL  NULL  NULL
14    114   14    14    214
15    115   NULL  NULL  NULL
16    116   16    16    216
17    117   NULL  NULL  NULL
18    118   18    18    218
19    119   NULL  NULL  NULL
20    120   20    20    220
22    NULL  22    22    222
24    NULL  24    24    224
26    NULL  26    26    226
28    NULL  28    28    228

query IIIIII rowsort,colnames
SELECT * FROM outer_gc1 FULL OUTER JOIN outer_c1 USING (pid1, cid1, cid2)
----
pid1  cid1  cid2  gcid1  gca1  ca1
-4    -4    -4    -4     296   NULL
-2    -2    -2    NULL   NULL  198
0     0     0     0      300   200
2     2     2     NULL   NULL  202
4     4     4     4      304   204
6     6     6     NULL   NULL  206
8     8     8     8      308   208
10    10    10    NULL   NULL  210
12    12    12    12     312   212
14    14    14    NULL   NULL  214
16    16    16    16     316   216
18    18    18    NULL   NULL  218
20    20    20    20     320   220
22    22    22    NULL   NULL  222
24    24    24    24     324   224
26    26    26    NULL   NULL  226
28    28    28    28     328   228
32    32    32    32     332   NULL
36    36    36    36     336   NULL

query IIIII rowsort,colnames
SELECT * FROM outer_c1 LEFT OUTER JOIN outer_p1 USING (pid1) WHERE pid1 >= 0 AND pid1 < 40
----
pid1  cid1  cid2  ca1  pa1
0     0     0     200  NULL
2     2     2     202  102
4     4     4     204  104
6     6     6     206  106
8     8     8     208  108
10    10    10    210  110
12    12    12    212  112
14    14    14    214  114
16    16    16    216  116
18    18    18    218  118
20    20    20    220  120
22    22    22    222  NULL
24    24    24    224  NULL
26    26    26    226  NULL
28    28    28    228  NULL

query IIIIII rowsort,colnames
SELECT * FROM outer_p1 RIGHT OUTER JOIN outer_gc1 USING (pid1) WHERE pid1 >= 1 AND pid1 <= 20
----
pid1  pa1  cid1  cid2  gcid1  gca1
4     104  4     4     4      304
8     108  8     8     8      308
12    112  12    12    12     312
16    116  16    16    16     316
20    120  20    20    20     320

# Regression test for #22655.

statement ok
CREATE TABLE a (a STRING, b STRING, PRIMARY KEY (a, b))

statement ok
CREATE TABLE b (a STRING, b STRING, PRIMARY KEY (a, b)) INTERLEAVE IN PARENT a (a, b)

statement ok
SELECT * FROM a JOIN b ON a.a=b.a AND a.b=b.b WHERE a.a='foo'

subtest ParentChildDifferentSize
# Regression test for #22647. Test when child is a few columns larger than parent.
statement ok
CREATE TABLE small_parent (a STRING PRIMARY KEY, b STRING); INSERT INTO small_parent VALUES ('first', 'second')

statement ok
CREATE TABLE large_child (a STRING PRIMARY KEY, c STRING, d STRING, e STRING, f STRING) INTERLEAVE IN PARENT small_parent (a)

statement ok
INSERT INTO large_child VALUES ('first', 'second_child', 'third_child', 'fourth_child', 'fifth_child')

query TTTTTT
SELECT * FROM large_child JOIN small_parent USING (a)
----
first  second_child  third_child  fourth_child  fifth_child second

# Test with composite keys.
statement ok
CREATE TABLE small_parent_ck (a STRING, b STRING, c STRING, PRIMARY KEY (a, b)); INSERT INTO small_parent_ck VALUES ('first', 'second', 'third')

statement ok
CREATE TABLE large_child_ck (a STRING, b STRING, d STRING, e STRING, f STRING, PRIMARY KEY (a, b, d)) INTERLEAVE IN PARENT small_parent_ck (a, b)

statement ok
INSERT INTO large_child_ck VALUES ('first', 'second', 'third_child', 'fourth_child', 'fifth_child')

query TTTTTTT
SELECT * FROM large_child_ck JOIN small_parent_ck USING (a)
----
first  second  third_child  fourth_child  fifth_child  second  third


# Test with families.
statement ok
CREATE TABLE small_parent_fam (a STRING, b STRING, c STRING, PRIMARY KEY (a, b)); INSERT INTO small_parent_fam VALUES ('first', 'second', 'third')

statement ok
CREATE TABLE large_child_fam (
   a STRING,
   b STRING,
   d STRING,
   e STRING,
   f STRING,
   PRIMARY KEY (a, b, d),
   FAMILY f1 (a, b, d, e),
   FAMILY f2 (f)
) INTERLEAVE IN PARENT small_parent_fam (a, b)

statement ok
INSERT INTO large_child_fam VALUES ('first', 'second', 'third_child', 'fourth_child', 'fifth_child')

query TTTTTTT
SELECT * FROM large_child_fam JOIN small_parent_fam USING (a)
----
first  second  third_child  fourth_child  fifth_child  second  third


# Test with parent being much larger than child.
statement ok
CREATE TABLE large_parent_fam (
  a STRING,
  b STRING,
  c STRING,
  d STRING,
  e STRING,
  f STRING,
  PRIMARY KEY (a, b),
  FAMILY f1 (a, b, c, d),
  FAMILY f2 (e, f)
)

statement ok
INSERT INTO large_parent_fam VALUES ('first', 'second', 'third', 'fourth', 'fifth', 'sixth')

statement ok
CREATE TABLE small_child_fam (
   a STRING,
   b STRING,
   g STRING,
   PRIMARY KEY (a, b)
) INTERLEAVE IN PARENT large_parent_fam (a, b)

statement ok
INSERT INTO small_child_fam VALUES ('first', 'second', 'third_child')

query TTTTTTTT
SELECT * FROM small_child_fam JOIN large_parent_fam USING (a)
----
first  second  third_child  second  third  fourth  fifth  sixth
