#!/bin/sh

#
# This script creates ZFS pools and dataset compatible with zsys
#

# Layout:
# bpool/BOOT/ubuntu_${UUID}
# rpool/ROOT/ubuntu_${UUID}
# rpool/ROOT/ubuntu_${UUID}/var -o canmount=off
# rpool/ROOT/ubuntu_${UUID}/var/games
# rpool/ROOT/ubuntu_${UUID}/var/lib
# rpool/ROOT/ubuntu_${UUID}/var/lib/AccountsService
# rpool/ROOT/ubuntu_${UUID}/var/lib/apt
# rpool/ROOT/ubuntu_${UUID}/var/lib/dpkg
# rpool/ROOT/ubuntu_${UUID}/var/log
# rpool/ROOT/ubuntu_${UUID}/var/mail
# rpool/ROOT/ubuntu_${UUID}/var/snap
# rpool/ROOT/ubuntu_${UUID}/var/spool
# rpool/ROOT/ubuntu_${UUID}/var/www
# rpool/ROOT/ubuntu_${UUID}/var/lib/NetworkManager
# rpool/ROOT/ubuntu_${UUID}/srv
# rpool/ROOT/ubuntu_${UUID}/usr -o canmount=off
# rpool/ROOT/ubuntu_${UUID}/usr/local
# rpool/USERDATA/$user_$UUID2
# rpool/USERDATA/root_$UUID2
#
# Steps:
# - Verify that /target is mounted
# - Retrieve fstab
# - unmount /target
# - delete all the partitions but the ESP
# - Create p1 ext4 size 100MB
# - Create p2 zfs bpool 1GB
# - Create p3 zfs rbool 100% remaining
# - Create datasets
# - Create /swapfile on /target
#
# After setup is done leave it mounted to let Ubiquity proceed with installation

set -eu

REQUIREDPKGS="zfsutils-linux"
TARGET="/target"
ESP="${TARGET}/boot/efi"
ZSYSTMP="/tmp/$(basename $0)"
INIT_FLAG="${ZSYSTMP}/init.done"
FSTAB_PARTMAN="${ZSYSTMP}/fstab.partman"
PARTITION_LAYOUT="${ZSYSTMP}/layout"

mkdir -p "${ZSYSTMP}"

usage() {
    # Display script usage
    cat<<EOF
Usage: $(basename "$0") [COMMAND] [OPTIONS...]
    Prepares a zsys compatible ZFS system.

Commands:
    layout      Get layout to display before formatting to ubiquity. Give the chosen disk as argument
    init        Initialize the pools and datasets
    finalize    Finalize the installation after the system has been installed
Options:
    -h, --help      This help
    -d, --debug     Enable debug mode
EOF
    exit
}

SHORTOPTS="hd"
LONGOPTS="help,debug"

TEMP=$(getopt -o $SHORTOPTS --long $LONGOPTS -- "$@")
eval set -- "$TEMP"

while true ; do
    case "$1" in
        -h|--help)
            usage;;
        -d|--debug)
            set -x
            shift;;
        --)
            shift;
            break;;
        *)
            usage;;
    esac
done

COMMAND=$( echo $1| tr '[:upper:]' '[:lower:]' )
EXTRAARG=""
if [ $# -gt 1 ]; then
    EXTRAARG="${2}"
fi

check_prerequisites() {
    # Check and set requirements to run this script
    #
    # Check and set the requirements to run this test. If any of the
    # requirement is missing the programs exit with error
    #
    # Args:
    #   $@: List of required packages
    #
    # Returns
    #   Exit program is a requirement is not met
    echo "I: Checking system requirements"

    if [ $(id -u) -ne 0 ]; then
        echo "E: Script must be executed as root. Exiting!"
        exit 1
    fi

    for pkg in $@; do
        if ! dpkg-query -W -f'${Status}' "${pkg}"|grep -q "install ok installed" 2>/dev/null; then
            echo "E: $pkg is required and not installed on this system. Exiting!"
            exit 1
        fi
    done

}

prepare_target() {
	target="$1"

	if ! grep -qE "\s${target}\s" /proc/mounts; then
		echo "E: $target is not mounted. Exiting!"
		exit 1
	fi

	# Save fstab generated by partman
	if [ -f "${target}/etc/fstab" ]; then
	    echo "I: Saving existing fstab"
		cp "${target}/etc/fstab" "${FSTAB_PARTMAN}"
	else
		echo "W: ${target}/etc/fstab doesn't exist"
	fi

	# umount /target
	# It may fail to umount because the swap is being created by partman and not finished when we reach this point.
	# Give it some time and retry with a sleep between tries.
	iter=0
	maxiter=10

	for mountpoint in "${ESP}" "${target}"; do
		if [ ! -d "${mountpoint}" ]; then
			continue
		fi

		echo "I: umounting ${mountpoint}"
		while :; do
			# Do not make it quiet. We want to know why it failed.
			if ! sudo umount "${mountpoint}"; then
				iter=$(( iter + 1 ))
				echo "W: Try ${iter}. Failed to umount ${mountpoint}."
				if [ ${iter} -eq ${maxiter} ]; then
					echo "E: Failed to umount ${mountpoint}. Exiting!"
					exit 1
				fi
				sleep 3
			else
				break
			fi
		done
	done
}

get_layout() {
	# Returns disk, base name of the partition and partition numbers to create
	target="$1"
	disk="$2"

	if [ -z "${disk}" ]; then
		# The entire disk has been formatted with use_device
		# There is either one ext4 partition or one ext4 and one ESP
		part="$(grep -E "\s${target}\s" /proc/mounts | awk '{print $1}')"
		partbase=""

		if [ -n "${part}" ]; then
			disk="$(lsblk -lns -o TYPE,PATH ${part}| grep disk| awk '{print $2}')"
			if [ -z "${disk}" ]; then
				echo "E: Couldn't identify disk for partition ${part}. Exiting!"
				exit 1
			fi
			# Some disks have letters in the partition number like /dev/nvme0n1p1
			# In this case we want to retrieve 'p' so we deal only with partition number
			# in the rest of the script and prepend the base.
			partbase="$(echo ${part} | sed -e 's/[0-9]*$//' | sed -e "s#${disk}##")"
		fi
	else
		# The only purpose of this code is to display a friendly message in ubiquity to show the user
		# what partitioning will be performed. However, on first call, the disk is not yet partitioned
		# and collecting the information about disk partitioning would require to query partman. But we
		# don't want to add this extra complexity just to display a message. Instead we hardcode the
		# extension of the partition name depending on the type of disk, basically it's 'p' for anything
		# else than standard drives (eg nvme01pX)
		case "${disk}" in
			/dev/sd*|/dev/hd*|/dev/vd*)
				partbase=""
				;;
			*)
				partbase="p"
		esac
	fi

	partesp=1
	if is_gpt "${disk}"; then
		# No extended partition on EFI + GPT
		# The layout is
		# 1: ESP
		# 2: swap
		# 3: bpool
		# 4: rpool
		partswap=2
		partbpool=3
		partrpool=4
	else
		# MBR pools are on extended partition
		# The layout is:
		# 1: ESP
		# 2: Extended
		# 5: swap
		# 6: bpool
		# 7: rpool
		partswap=5
		partbpool=6
		partrpool=7
	fi

	echo "OK|${disk}|${partbase}|${partesp}|${partswap}|${partbpool}|${partrpool}"
}

format_disk() {
	disk="$1"
	partbase="$2"
	partesp="$3"
	partbpool="$4"
	partrpool="$5"
	ss="$6"
	partswap=$(( partbpool - 1 ))
	partext=$(( partesp + 1 ))
	partprefix="${disk}${partbase}"

	sfdisktmp="${ZSYSTMP}/sfdisk.cfg"
	rm -f "${sfdisktmp}"

	echo "I: Formatting disk $disk with partitions ESP:${partesp} ext:${partext} swap:${partswap} bpool:${partbpool} rpool:${partrpool}"

	# bpool size: 500M < 5% of ZFS allocated space < 2G
	# partswap is partition 2 on GPT systems and first extended partition on MBR.
	size_percent=$(expr \( $(blockdev --getsize64 ${partprefix}${partswap}) / 1024 / 1024 \) \* 5 / 100)
	bpool_size=500
	[ ${size_percent} -gt ${bpool_size} ] && bpool_size=${size_percent}
	[ ${bpool_size} -gt 2048 ] && bpool_size=2048

	if is_gpt "${disk}"; then
		# Improvement: Delete all the partitions but the ESP
		# There should be only 1 or 2 partitions but it can be made generic
		if ! esp_exists "${disk}"; then
			start=$(sfdisk -l "${disk}"|grep "^${partprefix}${partesp}"|awk '{print $2}')
			cat > "${sfdisktmp}" <<EOF
${partprefix}${partesp}   : start= ${start}, size=    512M,        type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B, bootable
EOF
		else
			sfdisk --delete "${disk}" ${partswap}
		fi

		cat >> "${sfdisktmp}" <<EOF
${partprefix}${partswap}  :   size= ${ss}M,         type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F
${partprefix}${partbpool} :   size= ${bpool_size}M, type=6A82CB45-1DD2-11B2-99A6-080020736631
${partprefix}${partrpool} :                         type=6A85CF4D-1DD2-11B2-99A6-080020736631
EOF
	else
		if ! esp_exists "${disk}"; then
			start=$(sfdisk -l "${disk}"|grep "^${partprefix}${partesp}"|awk '{print $2}')
			cat > "${sfdisktmp}" <<EOF
${partprefix}${partesp}   : start= ${start}, size=    512M,        type=ef, bootable
EOF
		else
			sfdisk --delete "${disk}" ${partswap}
		fi

		cat >> "${sfdisktmp}" <<EOF
${partprefix}${partswap}  :                size= ${ss}M,         type=82
${partprefix}${partbpool} :                size= ${bpool_size}M, type=a5
${partprefix}${partrpool} :                                      type=a5
EOF

	fi
	cat "${sfdisktmp}" | sfdisk --append "${disk}"

	# Force a re-read of the partition table
	echo "I: Re-reading partition table"
	partx --add "${disk}" 2>/dev/null || true
	partx --show "${disk}"
}

init_zfs() {
	target="$1"
	partbpool="$2"
	partrpool="$3"

	echo "I: Initializing ZFS"
	# Now we can create the pools and dataset
	UUID_ORIG=$(head -100 /dev/urandom | tr -dc 'a-z0-9' |head -c6)

	# Pools
	# rpool
	zpool create -f \
		-o ashift=12 \
		-O compression=lz4 \
		-O acltype=posixacl \
		-O xattr=sa \
		-O relatime=on \
		-O normalization=formD \
		-O mountpoint=/ \
		-O canmount=off \
		-O dnodesize=auto \
		-O sync=disabled \
		-O mountpoint=/ -R "${target}" rpool "${partrpool}"

	# bpool
	# The version of bpool is set to the default version to prevent users from upgrading
	# Then only features supported by grub are enabled.
	zpool create -f \
		-o ashift=12 \
		-d \
		-o feature@async_destroy=enabled \
		-o feature@bookmarks=enabled \
		-o feature@embedded_data=enabled \
		-o feature@empty_bpobj=enabled \
		-o feature@enabled_txg=enabled \
		-o feature@extensible_dataset=enabled \
		-o feature@filesystem_limits=enabled \
		-o feature@hole_birth=enabled \
		-o feature@large_blocks=enabled \
		-o feature@lz4_compress=enabled \
		-o feature@spacemap_histogram=enabled \
		-O compression=lz4 \
		-O acltype=posixacl \
		-O xattr=sa \
		-O relatime=on \
		-O normalization=formD \
		-O canmount=off \
		-O devices=off \
		-O mountpoint=/boot -R "${target}" bpool "${partbpool}"

	# Root and boot dataset
	zfs create rpool/ROOT -o canmount=off -o mountpoint=none
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}" -o mountpoint=/
	zfs create bpool/BOOT -o canmount=off -o mountpoint=none
	zfs create "bpool/BOOT/ubuntu_${UUID_ORIG}" -o mountpoint=/boot

	# System dataset
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var" -o canmount=off
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/lib"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/lib/AccountsService"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/lib/apt"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/lib/dpkg"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/lib/NetworkManager"

	# Desktop specific system dataset
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/srv"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/usr" -o canmount=off
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/usr/local"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/games"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/log"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/mail"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/snap"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/spool"
	zfs create "rpool/ROOT/ubuntu_${UUID_ORIG}/var/www"

	# USERDATA datasets
	# Dataset associated to the user are created by the installer.
	zfs create rpool/USERDATA -o canmount=off -o mountpoint=/

	# Set zsys properties
	zfs set com.ubuntu.zsys:bootfs='yes' "rpool/ROOT/ubuntu_${UUID_ORIG}"
	zfs set com.ubuntu.zsys:last-used=$(date +%s) "rpool/ROOT/ubuntu_${UUID_ORIG}"
	zfs set com.ubuntu.zsys:bootfs='no' "rpool/ROOT/ubuntu_${UUID_ORIG}/srv"
	zfs set com.ubuntu.zsys:bootfs='no' "rpool/ROOT/ubuntu_${UUID_ORIG}/usr"
	zfs set com.ubuntu.zsys:bootfs='no' "rpool/ROOT/ubuntu_${UUID_ORIG}/var"
}

move_user () {
	target="$1"
	user="$2"
	userhome="$3"
	uuid="$4"

	echo "I: Creating user $user with home $userhome"
	mv "${target}/${userhome}" "${target}/tmp/home/${user}"
	zfs create "rpool/USERDATA/${user}_${uuid}" -o canmount=on -o mountpoint=${userhome}
	chown $(chroot "${target}" id -u ${user}):$(chroot ${target} id -g ${user}) "${target}/${userhome}"
	rsync -a "${target}/tmp/home/${user}/" "${target}/${userhome}"
	bootfsdataset=$(grep "\s${target}\s" /proc/mounts | awk '{ print $1 }')
	zfs set com.ubuntu.zsys:bootfs-datasets="${bootfsdataset}" rpool/USERDATA/${user}_${UUID_ORIG}
}

init_system_partitions() {
	target="$1"
	partefi="$2"
	partgrub="$3"

	# ESP
	mkdir -p "${target}/boot/efi"
	mount -t vfat "${partefi}" "${target}/boot/efi"
	mkdir -p "${target}/boot/efi/grub"

	echo "I: Mount grub directory"
	# Finalize grub directory
	mkdir -p "${target}/boot/grub"
	mount -o bind "${target}/boot/efi/grub" "${target}/boot/grub"
}

esp_exists() {
	if is_gpt "${1}"; then
		parttype="C12A7328-F81F-11D2-BA4B-00A0C93EC93B"
	else
		# FIXME: Currently partman-auto set the type of EFI on MBR as W95 (b) instead of EFI (ef)
		parttype="b"
	fi
	sfdisk -d "${1}" | grep -q "type=${parttype}"
}

is_gpt() {
	sfdisk -d "${1}" | awk '/^label:/ {print $2}'|grep -q gpt
}

check_prerequisites ${REQUIREDPKGS}

echo "I: Running $(basename "$0") ${COMMAND}"

if [ -z "${COMMAND}" ]; then
	echo "E: ${COMMAND} is mandatory. Exiting!"
	exit 1
elif [ "${COMMAND}" = "layout" ]; then
	# Just displays de layout that will be created without any change to the disk.
	# At this stage we don't now yet the size of the partition that will be created.
	IFS="|" read ERR DISK PARTBASE PARTESP PARTSWAP PARTBPOOL PARTRPOOL<<EOF
$(get_layout ${TARGET} "${EXTRAARG}")
EOF

	if [ "${ERR}" != "OK" ]; then
		echo "${ERR}"
		exit 1
	fi

	cat > "${PARTITION_LAYOUT}" <<EOF
disk:${DISK}
EOF
	if ! esp_exists "${DISK}"; then
		cat >> "${PARTITION_LAYOUT}" <<EOF
part:vfat:ESP:${DISK}${PARTBASE}${PARTESP}
EOF
	fi

	cat >> "${PARTITION_LAYOUT}" <<EOF
part:swap:swap:${DISK}${PARTBASE}${PARTSWAP}
part:zfs:bpool:${DISK}${PARTBASE}${PARTBPOOL}
part:zfs:rpool:${DISK}${PARTBASE}${PARTRPOOL}
EOF

elif [ "${COMMAND}" = "init" ]; then
	rm -f "${INIT_FLAG}"

	IFS="|" read ERR DISK PARTBASE PARTESP PARTSWAP PARTBPOOL PARTRPOOL<<EOF
$(get_layout ${TARGET} "")
EOF

	if [ "${ERR}" != "OK" ]; then
		echo "${ERR}"
		exit 1
	fi

	echo "I: Partition table before init of ZFS"
	partx --show "${DISK}"

	# Swap files are not supported on ZFS, we use a swap partition instead:
	SWAPFILE="$(grep "^${TARGET}" /proc/swaps |awk '{print $1}')"
	# Give us a minimum swap partition size of 4MB in case we decide on
	# no swap, just to keep the partition layout stable:
	SWAPSIZE=4194304

	# Disable swap and get the swap volume size:
	if [ -n "${SWAPFILE}" ]; then
		SWAPSIZE=$(stat -c%s "${SWAPFILE}")
		echo "I: Found swapfile with size ${SWAPSIZE}. Disabling"
		swapoff "${SWAPFILE}"
	fi
	# Convert to MiB to align the size on the size of a block
	SWAPVOLSIZE=$(( SWAPSIZE / 1024 / 1024 ))

	prepare_target "${TARGET}"
	format_disk "${DISK}" "${PARTBASE}" "${PARTESP}" "${PARTBPOOL}" "${PARTRPOOL}" "${SWAPVOLSIZE}"
	init_zfs "${TARGET}" "${DISK}${PARTBASE}${PARTBPOOL}" "${DISK}${PARTBASE}${PARTRPOOL}"
	init_system_partitions "${TARGET}" "${DISK}${PARTBASE}1" "${DISK}${PARTBASE}${PARTESP}"

	# Generate fstab
	# $TARGET/etc has been destroyed by the creation of the zfs partitition
	# Recreate it
	mkdir -p "${TARGET}/etc"
	if [ -f "${FSTAB_PARTMAN}" ]; then
		echo "I: Creating fstab"
		grep -Ev '\s/\s|/swapfile' "${FSTAB_PARTMAN}" > "${TARGET}/etc/fstab"
	fi

	if ! grep -q "boot/efi" "${TARGET}/etc/fstab"; then
		espuuid=$(blkid -s UUID -o value "${DISK}${PARTBASE}${PARTESP}")
		echo "UUID=${espuuid}\t/boot/efi\tvfat\tumask=0022,fmask=0022,dmask=0022\t0\t1" >> "${TARGET}/etc/fstab"
	fi

	# Bind mount grub from ESP to the expected location
	echo "/boot/efi/grub\t/boot/grub\tnone\tdefaults,bind\t0\t0" >> "${TARGET}/etc/fstab"

	if [ -n "${SWAPFILE}" ]; then
		SWAPDEVICE="${DISK}${PARTBASE}${PARTSWAP}"
		mkswap -f "${SWAPDEVICE}"
		SWAPID=$(blkid -s UUID -o value "${SWAPDEVICE}")
		printf "UUID=${SWAPID}\tnone\tswap\tdiscard\t0\t0\n" >> "${TARGET}/etc/fstab"
		swapon -v "${SWAPDEVICE}"
	fi
	# Make /boot/{grub,efi} world readable
	sed -i 's#\(.*boot/efi.*\)umask=0077\(.*\)#\1umask=0022,fmask=0022,dmask=0022\2#' "${TARGET}/etc/fstab"

	echo "I: Marking ZFS utilities to be kept in the target system"
	apt-install zfsutils-linux 2>/dev/null
	apt-install zfs-initramfs 2>/dev/null
	apt-install zsys 2>/dev/null

	touch "$INIT_FLAG"
elif [ "${COMMAND}" = "finalize" ]; then
	if [ ! -f "$INIT_FLAG" ]; then
		echo "W: zsys init didn't succeed. Not proceeding with command: ${COMMAND}. Aborting!"
		exit 1
	fi

	# Activate zfs generator.
	# After enabling the generator we should run zfs set canmount=on DATASET
	# in the chroot for one dataset of each pool to refresh the zfs cache.
	echo "I: Activating zfs generator"
	ln -s /usr/lib/zfs-linux/zed.d/history_event-zfs-list-cacher.sh "${TARGET}/etc/zfs/zed.d"

	# Create zpool cache
	zpool set cachefile= bpool
	zpool set cachefile= rpool
	cp /etc/zfs/zpool.cache "${TARGET}/etc/zfs/"
	mkdir -p "${TARGET}/etc/zfs/zfs-list.cache"
	touch "${TARGET}/etc/zfs/zfs-list.cache/bpool" "${TARGET}/etc/zfs/zfs-list.cache/rpool"

	# Handle userdata
	UUID_ORIG=$(head -100 /dev/urandom | tr -dc 'a-z0-9' |head -c6)
	mkdir -p "${TARGET}/tmp/home"
	for user in ${TARGET}/home/*; do
        if [ -d "${user}" ]; then
            user="$(basename $user)"
            move_user "${TARGET}" "${user}" "/home/${user}" "${UUID_ORIG}"
        fi
	done

	move_user "${TARGET}" root /root "${UUID_ORIG}"

	echo "I: Changing sync mode of rpool to standard"
	zfs set sync=standard rpool

	echo "I: ZFS setup complete"
else
	echo "E: Unknown command: $COMMAND"
	exit 1
fi
