# int8
--reset
--dir=FWD_B,FWD_D
--cfg=s8s8s32,s8s8s8,s8s8u8,u8s8s32,u8s8s8,u8s8u8
--attr-oscale=per_oc:2.25
--attr-post-ops='sum:0.5','add:s8','linear:0.5:1.5:2', \
                'add:f32:per_oc;sum:0.25:relu:0.5'
--mb=2 --batch=set_gpu
--mb=0 --batch=shapes_0d_gpu

--attr-oscale=common:2.25
--attr-post-ops='sum:0.5','add:u8','linear:-0.5:0.5:0.5', \
                'add:f32:per_oc;abs;sum:0.5'
--batch=shapes_1d

# f32
--reset
--cfg=f32
--mb=2
--dir=FWD_B,BWD_D,BWD_WB
--batch=set_gpu

# f32 + post_ops
--reset
--cfg=f32
--dir=FWD_B,FWD_D
--attr-post-ops='sum:2;relu:3'
--mb=2 --batch=set_gpu
--mb=0 --batch=shapes_0d_gpu

--attr-post-ops='sum:2','linear:3:2','add:f32', \
                'sum;add:s8:per_oc;linear:0.5:1.5:2'
--batch=shapes_1d

# bf16
--reset
--cfg=bf16bf16bf16
--dir=FWD_B,BWD_D,BWD_WB
--batch=shapes_1d

# f16
--reset
--mb=0,2
--cfg=f16
--dir=FWD_B,FWD_D
--attr-post-ops='linear:1:2','add:f16','add:s8:per_oc;linear:2:3:0.5'
--batch=shapes_1d

# bf16 + f32
--attr-post-ops=

--dir=FWD_B
--cfg=bf16bf16f32
--batch=shapes_1d

--dir=BWD_D
--cfg=f32bf16bf16
--batch=shapes_1d

--dir=BWD_WB
--cfg=bf16f32bf16
--batch=shapes_1d

# bf16 + post_ops
--mb=0
--dir=FWD_B
--cfg=bf16bf16bf16,bf16bf16f32
--attr-post-ops='sum:0.5','tanh','add:bf16','add:f32:per_oc;relu:0.5'
--batch=shapes_1d

# diff mem tags
--batch=harness_ip_tag_gpu
