#! /bin/sh
set -e

. /usr/share/debconf/confmodule

# We need to duplicate this code because localechooser has the right to
# assume that its later scripts are only run once, but oem-config doesn't
# have that luxury.

db_get debian-installer/locale
LOCALE="$RET"

db_get debian-installer/language
LANGLIST="$RET"

db_get debian-installer/country
COUNTRY="$RET"

db_get localechooser/supported-locales
EXTRAS="$(echo "$RET" | sed 's/,//g')"

LOCALE_TRANSLATIONS="$LOCALE"

case ${LOCALE%%_*} in
    pt|zh)
	# In the special cases of Portuguese and Chinese, selecting a
	# different location may imply a different dialect of the language.
	# In such cases, make LANG reflect the selected language (for
	# messages, character types, and collation) and make the other
	# locale categories reflect the selected location.
	db_get localechooser/languagelist
	ORIG_LANGUAGE="$RET"

	if grep -q "^$ORIG_LANGUAGE;" /usr/lib/ubiquity/localechooser/languagelist; then
		newlocale="$(echo "$LOCALE" | sed "s/_[A-Z][A-Z]*/_$COUNTRY/")"
		if grep -q "^${newlocale%%[.@]*}[.@ ]" /usr/share/i18n/SUPPORTED && \
		   [ "$newlocale" != "$LOCALE" ]; then
			LOCALE="$newlocale"
			LANGLIST="$(grep "^$ORIG_LANGUAGE;" /usr/lib/ubiquity/localechooser/languagelist | cut -d';' -f6)"
		fi
	fi
	;;
esac

# It is relatively common for the combination of language and location (as
# selected on the timezone page) not to identify a supported combined
# locale.  For example, this happens when the user is a migrant, or when
# they prefer to use a different language to interact with their computer
# because that language is better-supported.
#
# In such cases, we would like to be able to use a locale reflecting the
# selected language in LANG for messages, character types, and collation,
# and to make the other locale categories reflect the selected location.
# This means that we have to guess at a suitable locale for the selected
# location, and we do not want to ask yet another locale-related question.
# Nevertheless, some cases are ambiguous: a user who has asked for the
# English language and identifies their location as Switzerland will get
# different numeric representation depending on which Swiss locale we pick.
#
# The goal of identifying a reasonable default for migrants makes things
# easier: it is reasonable to default to French for France despite the
# existence of several minority languages there, because anyone who prefers
# those languages will probably already have selected them and won't arrive
# here.  However, in some cases we're unsure, and in some cases we actively
# don't want to pick a "preferred" language: selecting either Greek or
# Turkish as the default language for migrants to Cyprus would probably
# offend somebody!  In such cases we simply punt to the old behaviour of not
# setting up a locale reflecting the location, which is suboptimal but is at
# least unlikely to give offence.
#
# Our best shot at general criteria for selecting a default language in
# these circumstances are as follows:
#
#  * Exclude special-purpose (e.g. en_DK) and artificial (e.g. la_AU,
#    tlh_GB) locales.
#  * If there is a language specific to or very strongly associated with the
#    country in question, prefer it unless it has rather few native
#    speakers.
#  * Exclude minority languages that are relatively unlikely to be spoken by
#    migrants who have not already selected them as their preferred language
#    earlier in the installer.
#  * If there is an official national language likely to be seen in print
#    media, road signs, etc., then prefer that.
#  * In cases of doubt, selecting no default language is safe.
#
# This code is currently duplicated between here and
# ubiquity/d-i/patches/localechooser-post-base-installer.patch.  Please keep
# them in sync until the duplication can be removed.

combined="$(echo "$LOCALE" | sed "s/_[A-Z][A-Z]*/_$COUNTRY/")"
if [ "$LOCALE" = "$LOCALE_TRANSLATIONS" ] && \
   ! grep -q "^${combined%%[.@]*}[.@ ]" /usr/share/i18n/SUPPORTED; then
	available="$(grep ' UTF-8' /usr/share/i18n/SUPPORTED | \
		grep "_$COUNTRY[.@ ]" | cut -d' ' -f1 | sort -u)"
	if [ "$(echo "$available" | wc -l)" = 1 ]; then
		combined="$available"
	else
		case $COUNTRY in
		    AU) deflang=en ;;
		    CN) deflang=zh ;;
		    DE) deflang=de ;;
		    DK) deflang=da ;;
		    DZ) deflang=ar ;;
		    ES) deflang=es ;;
		    # Somewhat unclear: Oromo has the greatest number of
		    # native speakers; English is the most widely spoken
		    # language and taught in secondary schools; Amharic is
		    # the official language and was taught in primary
		    # schools.
		    ET) deflang=am ;;
		    FI) deflang=fi ;;
		    FR) deflang=fr ;;
		    GB) deflang=en ;;
		    # Irish (Gaelic) is strongly associated with Ireland,
		    # but nearly all its native speakers also speak English,
		    # and migrants are likely to use English.
		    IE) deflang=en ;;
		    IT) deflang=it ;;
		    MA) deflang=ar ;;
		    MK) deflang=mk ;;
		    NG) deflang=en ;;
		    NL) deflang=nl ;;
		    NZ) deflang=en ;;
		    IL) deflang=he ;;
		    # Filipino is a de facto version of Tagalog, which is
		    # also spoken; English is also an official language.
		    PH) deflang=fil ;;
		    PK) deflang=ur ;;
		    PL) deflang=pl ;;
		    RU) deflang=ru ;;
		    # Chinese has more speakers, but English is the "common
		    # language of the nation" (Wikipedia) and official
		    # documents must be translated into English to be
		    # accepted.
		    SG) deflang=en ;;
		    SN) deflang=wo ;;
		    TR) deflang=tr ;;
		    TW) deflang=zh ;;
		    UA) deflang=uk ;;
		    US) deflang=en ;;
		    ZM) deflang=en ;;
		    *) deflang= ;;
		esac
		combined="$(grep ' UTF-8' /usr/share/i18n/SUPPORTED | \
			grep "${deflang}_$COUNTRY[.@ ]" | head -n1 | \
			cut -d' ' -f1)"
	fi

	if [ "$combined" ]; then
		LOCALE="$combined"
	fi
fi

set_field () {
	local file category value
	file="$1"
	category="$2"
	value="$3"

	if grep -qs "^#* *$category=" "$file"; then
		sed -i "s,^#* *$category=.*,$category=\"$value\"," "$file"
	else
		echo "$category=\"$value\"" >> "$file"
	fi
}

if [ -e /etc/environment ]; then
	sed -i "s,^LANG=.*,LANG=\"$LOCALE_TRANSLATIONS\"," /etc/environment
fi
if grep -qs "^LANG=" /etc/default/locale; then
	sed -i "s,^LANG=.*,LANG=\"$LOCALE_TRANSLATIONS\"," /etc/default/locale
else
	echo "LANG=\"$LOCALE_TRANSLATIONS\"" >> /etc/default/locale
fi
# We set LANGUAGE only if the languagelist is a list of
# languages with alternatives. Otherwise, setting it is useless
if echo "$LANGLIST" | grep -q ":"; then
	# Adjust /etc/environment if LANGUAGE is already set, but otherwise
	# leave it alone.
	if grep -q "^LANGUAGE=" /etc/environment; then
		sed -i "s,^LANGUAGE=.*,LANGUAGE=\"$LANGLIST\"," /etc/environment
	fi
	if grep -q "^LANGUAGE=" /etc/default/locale; then
		sed -i "s,^LANGUAGE=.*,LANGUAGE=\"$LANGLIST\"," /etc/default/locale
	else
		echo "LANGUAGE=\"$LANGLIST\"" >> /etc/default/locale
	fi
else
	if [ -e /etc/environment ]; then
		sed -i "/^LANGUAGE=/d" /etc/environment
	fi
	if [ -e /etc/default/locale ]; then
		sed -i "/^LANGUAGE=/d" /etc/default/locale
	fi
fi
if [ "$LOCALE_TRANSLATIONS" != "$LOCALE" ]; then
	for category in \
		LC_NUMERIC LC_TIME LC_MONETARY LC_PAPER LC_NAME LC_ADDRESS \
		LC_TELEPHONE LC_MEASUREMENT LC_IDENTIFICATION; do
		set_field /etc/default/locale "$category" "$LOCALE"
	done
	mkdir -p "/usr/share/locale-langpack/${LOCALE%_*}"
	/usr/sbin/locale-gen --no-purge "$LOCALE" || true
fi

if [ -e /etc/default/gdm ]; then
	sed -i "s,^#*LANG=.*,LANG=$LOCALE_TRANSLATIONS,g" /etc/default/gdm
fi

LANGCODE="${LOCALE_TRANSLATIONS%%_*}"
LANGCODE="${LANGCODE%%.*}"
LANGCODE="${LANGCODE%%@*}"
mkdir -p "/usr/share/locale-langpack/${LANGCODE%_*}"
/usr/share/locales/install-language-pack "$LANGCODE" '' || true

# TODO: kbd/cyr handling?
