diff --git a/flake.nix b/flake.nix index 8bef9c1..aa4cead 100644 --- a/flake.nix +++ b/flake.nix @@ -59,14 +59,24 @@ syndicate-py = python3Packages.syndicate; }) self.legacyPackages; - nixosModules.default = self.nixosModules.syndicate-server; - nixosModules.syndicate-server = - # A little hack to apply our overlay to this module only. - { config, lib, pkgs, ... }: - (import ./nixos/syndicate-server.nix) { - inherit config lib; - pkgs = pkgs.extend self.overlays.default; - }; + nixosModules = { + default = self.nixosModules.syndicate-server; + + syndicate-server = + # A little hack to apply our overlay to this module only. + { config, lib, pkgs, ... }: + (import ./nixos/syndicate-server.nix) { + inherit config lib; + pkgs = pkgs.extend self.overlays.default; + }; + + synit.imports = [ + ./nixos/modules/system/boot/stage-1.nix + ./nixos/modules/system/boot/stage-2.nix + ./nixos/modules/system/boot/synit.nix + ]; + + }; devShells = self.lib.mapAttrs (system: pkgs: with pkgs; { @@ -79,6 +89,7 @@ inherit system; pkgs = pkgs'; }; { + http = simpleTest { nodes.machine = { config, pkgs, ... }: { imports = [ self.nixosModules.syndicate-server ]; @@ -119,6 +130,21 @@ machine.succeed("curl --fail http://localhost:8888/greeting") ''; }; + + synit = simpleTest { + nodes.machine = { + imports = [ + self.nixosModules.synit + ]; + + nixpkgs.pkgs = pkgs'; + }; + testScript = '' + start_all() + machine.wait_for_unit("foo.service") + ''; + }; + }) { inherit (self.legacyPackages) x86_64-linux; }; }; } diff --git a/nixos/modules/system/boot/stage-1-init.sh b/nixos/modules/system/boot/stage-1-init.sh new file mode 100644 index 0000000..8baca35 --- /dev/null +++ b/nixos/modules/system/boot/stage-1-init.sh @@ -0,0 +1,626 @@ +#! @shell@ + +targetRoot=/mnt-root +console=tty1 +verbose="@verbose@" + +info() { + if [[ -n "$verbose" ]]; then + echo "$@" + fi +} + +extraUtils="@extraUtils@" +export LD_LIBRARY_PATH=@extraUtils@/lib +export PATH=@extraUtils@/bin +ln -s @extraUtils@/bin /bin + +# Copy the secrets to their needed location +if [ -d "@extraUtils@/secrets" ]; then + for secret in $(cd "@extraUtils@/secrets"; find . -type f); do + mkdir -p $(dirname "/$secret") + ln -s "@extraUtils@/secrets/$secret" "$secret" + done +fi + + +fail() { + if [ -n "$panicOnFail" ]; then exit 1; fi + + @preFailCommands@ + + # If starting stage 2 failed, allow the user to repair the problem + # in an interactive shell. + cat </dev/$console 2>/dev/$console" + elif [ -n "$allowShell" -a "$reply" = i ]; then + echo "Starting interactive shell..." + setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" || fail + elif [ "$reply" = r ]; then + echo "Rebooting..." + reboot -f + else + info "Continuing..." + fi +} + +trap 'fail' 0 + + +# Print a greeting. +info +info "<<< NixOS Stage 1 >>>" +info + +# Make several required directories. +mkdir -p /etc/udev +touch /etc/fstab # to shut up mount +ln -s /proc/mounts /etc/mtab # to shut up mke2fs +touch /etc/udev/hwdb.bin # to shut up udev +touch /etc/initrd-release + +# Function for waiting for device(s) to appear. +waitDevice() { + local device="$1" + # Split device string using ':' as a delimiter as bcachefs + # uses this for multi-device filesystems, i.e. /dev/sda1:/dev/sda2:/dev/sda3 + local IFS=':' + + # USB storage devices tend to appear with some delay. It would be + # great if we had a way to synchronously wait for them, but + # alas... So just wait for a few seconds for the device to + # appear. + for dev in $device; do + if test ! -e $dev; then + echo -n "waiting for device $dev to appear..." + try=20 + while [ $try -gt 0 ]; do + sleep 1 + udevadm trigger --action=add + if test -e $dev; then break; fi + echo -n "." + try=$((try - 1)) + done + echo + [ $try -ne 0 ] + fi + done +} + +# Mount special file systems. +specialMount() { + local device="$1" + local mountPoint="$2" + local options="$3" + local fsType="$4" + + mkdir -m 0755 -p "$mountPoint" + mount -n -t "$fsType" -o "$options" "$device" "$mountPoint" +} +source @earlyMountScript@ + +# Copy initrd secrets from /.initrd-secrets to their actual destinations +if [ -d "/.initrd-secrets" ]; then + # + # Secrets are named by their full destination pathname and stored + # under /.initrd-secrets/ + # + for secret in $(cd "/.initrd-secrets"; find . -type f); do + mkdir -p $(dirname "/$secret") + cp "/.initrd-secrets/$secret" "$secret" + done +fi + +# Log the script output to /dev/kmsg or /run/log/stage-1-init.log. +mkdir -p /tmp +mkfifo /tmp/stage-1-init.log.fifo +logOutFd=8 && logErrFd=9 +eval "exec $logOutFd>&1 $logErrFd>&2" +if test -w /dev/kmsg; then + tee -i < /tmp/stage-1-init.log.fifo /proc/self/fd/"$logOutFd" | while read -r line; do + if test -n "$line"; then + echo "<7>stage-1-init: [$(date)] $line" > /dev/kmsg + fi + done & +else + mkdir -p /run/log + tee -i < /tmp/stage-1-init.log.fifo /run/log/stage-1-init.log & +fi +exec > /tmp/stage-1-init.log.fifo 2>&1 + + +# Process the kernel command line. +export stage2Init=/init +for o in $(cat /proc/cmdline); do + case $o in + console=*) + set -- $(IFS==; echo $o) + params=$2 + set -- $(IFS=,; echo $params) + console=$1 + ;; + init=*) + set -- $(IFS==; echo $o) + stage2Init=$2 + ;; + boot.persistence=*) + set -- $(IFS==; echo $o) + persistence=$2 + ;; + boot.persistence.opt=*) + set -- $(IFS==; echo $o) + persistence_opt=$2 + ;; + boot.trace|debugtrace) + # Show each command. + set -x + ;; + boot.shell_on_fail) + allowShell=1 + ;; + boot.debug1|debug1) # stop right away + allowShell=1 + fail + ;; + boot.debug1devices) # stop after loading modules and creating device nodes + allowShell=1 + debug1devices=1 + ;; + boot.debug1mounts) # stop after mounting file systems + allowShell=1 + debug1mounts=1 + ;; + boot.panic_on_fail|stage1panic=1) + panicOnFail=1 + ;; + root=*) + # If a root device is specified on the kernel command + # line, make it available through the symlink /dev/root. + # Recognise LABEL= and UUID= to support UNetbootin. + set -- $(IFS==; echo $o) + if [ $2 = "LABEL" ]; then + root="/dev/disk/by-label/$3" + elif [ $2 = "UUID" ]; then + root="/dev/disk/by-uuid/$3" + else + root=$2 + fi + ln -s "$root" /dev/root + ;; + copytoram) + copytoram=1 + ;; + findiso=*) + # if an iso name is supplied, try to find the device where + # the iso resides on + set -- $(IFS==; echo $o) + isoPath=$2 + ;; + esac +done + +# Set hostid before modules are loaded. +# This is needed by the spl/zfs modules. +@setHostId@ + +# Load the required kernel modules. +mkdir -p /lib +ln -s @modulesClosure@/lib/modules /lib/modules +ln -s @modulesClosure@/lib/firmware /lib/firmware +# see comment in stage-1.nix for explanation +echo @extraUtils@/bin/modprobe-kernel > /proc/sys/kernel/modprobe +for i in @kernelModules@; do + info "loading module $(basename $i)..." + modprobe $i +done + + +# Create device nodes in /dev. +@preDeviceCommands@ +info "running udev..." +ln -sfn /proc/self/fd /dev/fd +ln -sfn /proc/self/fd/0 /dev/stdin +ln -sfn /proc/self/fd/1 /dev/stdout +ln -sfn /proc/self/fd/2 /dev/stderr +mkdir -p /etc/udev +ln -sfn @udevRules@ /etc/udev/rules.d +mkdir -p /dev/.mdadm +udevd --daemon +udevadm trigger --action=add +udevadm settle + + +if test -n "$debug1devices"; then fail; fi + + +@postDeviceCommands@ + + +# Check the specified file system, if appropriate. +checkFS() { + local device="$1" + local fsType="$2" + + # Only check block devices. + if [ ! -b "$device" ]; then return 0; fi + + # Don't check ROM filesystems. + if [ "$fsType" = iso9660 -o "$fsType" = udf ]; then return 0; fi + + # Don't check resilient COWs as they validate the fs structures at mount time + if [ "$fsType" = btrfs -o "$fsType" = zfs -o "$fsType" = bcachefs ]; then return 0; fi + + # Skip fsck for apfs as the fsck utility does not support repairing the filesystem (no -a option) + if [ "$fsType" = apfs ]; then return 0; fi + + # Skip fsck for nilfs2 - not needed by design and no fsck tool for this filesystem. + if [ "$fsType" = nilfs2 ]; then return 0; fi + + # Skip fsck for inherently readonly filesystems. + if [ "$fsType" = squashfs ]; then return 0; fi + + # If we couldn't figure out the FS type, then skip fsck. + if [ "$fsType" = auto ]; then + echo 'cannot check filesystem with type "auto"!' + return 0 + fi + + # Device might be already mounted manually + # e.g. NBD-device or the host filesystem of the file which contains encrypted root fs + if mount | grep -q "^$device on "; then + echo "skip checking already mounted $device" + return 0 + fi + + # Optionally, skip fsck on journaling filesystems. This option is + # a hack - it's mostly because e2fsck on ext3 takes much longer to + # recover the journal than the ext3 implementation in the kernel + # does (minutes versus seconds). + if test -z "@checkJournalingFS@" -a \ + \( "$fsType" = ext3 -o "$fsType" = ext4 -o "$fsType" = reiserfs \ + -o "$fsType" = xfs -o "$fsType" = jfs -o "$fsType" = f2fs \) + then + return 0 + fi + + echo "checking $device..." + + fsck -V -a "$device" + fsckResult=$? + + if test $(($fsckResult | 2)) = $fsckResult; then + echo "fsck finished, rebooting..." + sleep 3 + reboot -f + fi + + if test $(($fsckResult | 4)) = $fsckResult; then + echo "$device has unrepaired errors, please fix them manually." + fail + fi + + if test $fsckResult -ge 8; then + echo "fsck on $device failed." + fail + fi + + return 0 +} + + +# Function for mounting a file system. +mountFS() { + local device="$1" + local mountPoint="$2" + local options="$3" + local fsType="$4" + + if [ "$fsType" = auto ]; then + fsType=$(blkid -o value -s TYPE "$device") + if [ -z "$fsType" ]; then fsType=auto; fi + fi + + # Filter out x- options, which busybox doesn't do yet. + local optionsFiltered="$(IFS=,; for i in $options; do if [ "${i:0:2}" != "x-" ]; then echo -n $i,; fi; done)" + # Prefix (lower|upper|work)dir with /mnt-root (overlayfs) + local optionsPrefixed="$( echo "$optionsFiltered" | sed -E 's#\<(lowerdir|upperdir|workdir)=#\1=/mnt-root#g' )" + + echo "$device /mnt-root$mountPoint $fsType $optionsPrefixed" >> /etc/fstab + + checkFS "$device" "$fsType" + + # Optionally resize the filesystem. + case $options in + *x-nixos.autoresize*) + if [ "$fsType" = ext2 -o "$fsType" = ext3 -o "$fsType" = ext4 ]; then + modprobe "$fsType" + echo "resizing $device..." + e2fsck -fp "$device" + resize2fs "$device" + elif [ "$fsType" = f2fs ]; then + echo "resizing $device..." + fsck.f2fs -fp "$device" + resize.f2fs "$device" + fi + ;; + esac + + # Create backing directories for overlayfs + if [ "$fsType" = overlay ]; then + for i in upper work; do + dir="$( echo "$optionsPrefixed" | grep -o "${i}dir=[^,]*" )" + mkdir -m 0700 -p "${dir##*=}" + done + fi + + info "mounting $device on $mountPoint..." + + mkdir -p "/mnt-root$mountPoint" + + # For ZFS and CIFS mounts, retry a few times before giving up. + # We do this for ZFS as a workaround for issue NixOS/nixpkgs#25383. + local n=0 + while true; do + mount "/mnt-root$mountPoint" && break + if [ \( "$fsType" != cifs -a "$fsType" != zfs \) -o "$n" -ge 10 ]; then fail; break; fi + echo "retrying..." + sleep 1 + n=$((n + 1)) + done + + [ "$mountPoint" == "/" ] && + [ -f "/mnt-root/etc/NIXOS_LUSTRATE" ] && + lustrateRoot "/mnt-root" + + true +} + +lustrateRoot () { + local root="$1" + + echo + echo -e "\e[1;33m<<< NixOS is now lustrating the root filesystem (cruft goes to /old-root) >>>\e[0m" + echo + + mkdir -m 0755 -p "$root/old-root.tmp" + + echo + echo "Moving impurities out of the way:" + for d in "$root"/* + do + [ "$d" == "$root/nix" ] && continue + [ "$d" == "$root/boot" ] && continue # Don't render the system unbootable + [ "$d" == "$root/old-root.tmp" ] && continue + + mv -v "$d" "$root/old-root.tmp" + done + + # Use .tmp to make sure subsequent invokations don't clash + mv -v "$root/old-root.tmp" "$root/old-root" + + mkdir -m 0755 -p "$root/etc" + touch "$root/etc/NIXOS" + + exec 4< "$root/old-root/etc/NIXOS_LUSTRATE" + + echo + echo "Restoring selected impurities:" + while read -u 4 keeper; do + dirname="$(dirname "$keeper")" + mkdir -m 0755 -p "$root/$dirname" + cp -av "$root/old-root/$keeper" "$root/$keeper" + done + + exec 4>&- +} + + + +if test -e /sys/power/resume -a -e /sys/power/disk; then + if test -n "@resumeDevice@" && waitDevice "@resumeDevice@"; then + resumeDev="@resumeDevice@" + resumeInfo="$(udevadm info -q property "$resumeDev" )" + else + for sd in @resumeDevices@; do + # Try to detect resume device. According to Ubuntu bug: + # https://bugs.launchpad.net/ubuntu/+source/pm-utils/+bug/923326/comments/1 + # when there are multiple swap devices, we can't know where the hibernate + # image will reside. We can check all of them for swsuspend blkid. + if waitDevice "$sd"; then + resumeInfo="$(udevadm info -q property "$sd")" + if [ "$(echo "$resumeInfo" | sed -n 's/^ID_FS_TYPE=//p')" = "swsuspend" ]; then + resumeDev="$sd" + break + fi + fi + done + fi + if test -n "$resumeDev"; then + resumeMajor="$(echo "$resumeInfo" | sed -n 's/^MAJOR=//p')" + resumeMinor="$(echo "$resumeInfo" | sed -n 's/^MINOR=//p')" + echo "$resumeMajor:$resumeMinor" > /sys/power/resume 2> /dev/null || echo "failed to resume..." + fi +fi + +# If we have a path to an iso file, find the iso and link it to /dev/root +if [ -n "$isoPath" ]; then + mkdir -p /findiso + + for delay in 5 10; do + blkid | while read -r line; do + device=$(echo "$line" | sed 's/:.*//') + type=$(echo "$line" | sed 's/.*TYPE="\([^"]*\)".*/\1/') + + mount -t "$type" "$device" /findiso + if [ -e "/findiso$isoPath" ]; then + ln -sf "/findiso$isoPath" /dev/root + break 2 + else + umount /findiso + fi + done + + sleep "$delay" + done +fi + +# Try to find and mount the root device. +mkdir -p $targetRoot + +exec 3< @fsInfo@ + +while read -u 3 mountPoint; do + read -u 3 device + read -u 3 fsType + read -u 3 options + + # !!! Really quick hack to support bind mounts, i.e., where the + # "device" should be taken relative to /mnt-root, not /. Assume + # that every device that starts with / but doesn't start with /dev + # is a bind mount. + pseudoDevice= + case $device in + /dev/*) + ;; + //*) + # Don't touch SMB/CIFS paths. + pseudoDevice=1 + ;; + /*) + device=/mnt-root$device + ;; + *) + # Not an absolute path; assume that it's a pseudo-device + # like an NFS path (e.g. "server:/path"). + pseudoDevice=1 + ;; + esac + + if test -z "$pseudoDevice" && ! waitDevice "$device"; then + # If it doesn't appear, try to mount it anyway (and + # probably fail). This is a fallback for non-device "devices" + # that we don't properly recognise. + echo "Timed out waiting for device $device, trying to mount anyway." + fi + + # Wait once more for the udev queue to empty, just in case it's + # doing something with $device right now. + udevadm settle + + # If copytoram is enabled: skip mounting the ISO and copy its content to a tmpfs. + if [ -n "$copytoram" ] && [ "$device" = /dev/root ] && [ "$mountPoint" = /iso ]; then + fsType=$(blkid -o value -s TYPE "$device") + fsSize=$(blockdev --getsize64 "$device" || stat -Lc '%s' "$device") + + mkdir -p /tmp-iso + mount -t "$fsType" /dev/root /tmp-iso + mountFS tmpfs /iso size="$fsSize" tmpfs + + cp -r /tmp-iso/* /mnt-root/iso/ + + umount /tmp-iso + rmdir /tmp-iso + continue + fi + + if [ "$mountPoint" = / ] && [ "$device" = tmpfs ] && [ ! -z "$persistence" ]; then + echo persistence... + waitDevice "$persistence" + echo enabling persistence... + mountFS "$persistence" "$mountPoint" "$persistence_opt" "auto" + continue + fi + + mountFS "$device" "$mountPoint" "$options" "$fsType" +done + +exec 3>&- + + +@postMountCommands@ + + +# Emit a udev rule for /dev/root to prevent systemd from complaining. +if [ -e /mnt-root/iso ]; then + eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=/mnt-root/iso) +else + eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=$targetRoot) +fi +if [ "$ROOT_MAJOR" -a "$ROOT_MINOR" -a "$ROOT_MAJOR" != 0 ]; then + mkdir -p /run/udev/rules.d + echo 'ACTION=="add|change", SUBSYSTEM=="block", ENV{MAJOR}=="'$ROOT_MAJOR'", ENV{MINOR}=="'$ROOT_MINOR'", SYMLINK+="root"' > /run/udev/rules.d/61-dev-root-link.rules +fi + + +# Stop udevd. +udevadm control --exit + +# Reset the logging file descriptors. +# Do this just before pkill, which will kill the tee process. +exec 1>&$logOutFd 2>&$logErrFd +eval "exec $logOutFd>&- $logErrFd>&-" + +# Kill any remaining processes, just to be sure we're not taking any +# with us into stage 2. But keep storage daemons like unionfs-fuse. +# +# Storage daemons are distinguished by an @ in front of their command line: +# https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/ +for pid in $(pgrep -v -f '^@'); do + # Make sure we don't kill kernel processes, see #15226 and: + # http://stackoverflow.com/questions/12213445/identifying-kernel-threads + readlink "/proc/$pid/exe" &> /dev/null || continue + # Try to avoid killing ourselves. + [ $pid -eq $$ ] && continue + kill -9 "$pid" +done + +if test -n "$debug1mounts"; then fail; fi + + +# Restore /proc/sys/kernel/modprobe to its original value. +echo /sbin/modprobe > /proc/sys/kernel/modprobe + + +# Start stage 2. `switch_root' deletes all files in the ramfs on the +# current root. The path has to be valid in the chroot not outside. +if [ ! -e "$targetRoot/$stage2Init" ]; then + stage2Check=${stage2Init} + while [ "$stage2Check" != "${stage2Check%/*}" ] && [ ! -L "$targetRoot/$stage2Check" ]; do + stage2Check=${stage2Check%/*} + done + if [ ! -L "$targetRoot/$stage2Check" ]; then + echo "stage 2 init script ($targetRoot/$stage2Init) not found" + fail + fi +fi + +mkdir -m 0755 -p $targetRoot/proc $targetRoot/sys $targetRoot/dev $targetRoot/run + +mount --move /proc $targetRoot/proc +mount --move /sys $targetRoot/sys +mount --move /dev $targetRoot/dev +mount --move /run $targetRoot/run + +exec env -i $(type -P switch_root) "$targetRoot" "$stage2Init" + +fail # should never be reached diff --git a/nixos/modules/system/boot/stage-1.nix b/nixos/modules/system/boot/stage-1.nix new file mode 100644 index 0000000..e058b2c --- /dev/null +++ b/nixos/modules/system/boot/stage-1.nix @@ -0,0 +1,427 @@ +# This module builds the initial ramdisk, which contains an init +# script that performs the first stage of booting the system: it loads +# the modules necessary to mount the root file system, then calls the +# init in the root file system to start the second boot stage. + +{ config, lib, utils, pkgs, ... }: + +with lib; + +let + + udev = pkgs.eudev; + + kernel-name = config.boot.kernelPackages.kernel.name or "kernel"; + + modulesTree = config.system.modulesTree.override { name = kernel-name + "-modules"; }; + firmware = config.hardware.firmware; + + + # Determine the set of modules that we need to mount the root FS. + modulesClosure = pkgs.makeModulesClosure { + rootModules = config.boot.initrd.availableKernelModules ++ config.boot.initrd.kernelModules; + kernel = modulesTree; + firmware = firmware; + allowMissing = false; + }; + + + # The initrd only has to mount `/` or any FS marked as necessary for + # booting (such as the FS containing `/nix/store`, or an FS needed for + # mounting `/`, like `/` on a loopback). + fileSystems = filter utils.fsNeededForBoot config.system.build.fileSystems; + + # A utility for enumerating the shared-library dependencies of a program + findLibs = pkgs.buildPackages.writeShellScriptBin "find-libs" '' + set -euo pipefail + + declare -A seen + left=() + + patchelf="${pkgs.buildPackages.patchelf}/bin/patchelf" + + function add_needed { + rpath="$($patchelf --print-rpath $1)" + dir="$(dirname $1)" + for lib in $($patchelf --print-needed $1); do + left+=("$lib" "$rpath" "$dir") + done + } + + add_needed "$1" + + while [ ''${#left[@]} -ne 0 ]; do + next=''${left[0]} + rpath=''${left[1]} + ORIGIN=''${left[2]} + left=("''${left[@]:3}") + if [ -z ''${seen[$next]+x} ]; then + seen[$next]=1 + + # Ignore the dynamic linker which for some reason appears as a DT_NEEDED of glibc but isn't in glibc's RPATH. + case "$next" in + ld*.so.?) continue;; + esac + + IFS=: read -ra paths <<< $rpath + res= + for path in "''${paths[@]}"; do + path=$(eval "echo $path") + if [ -f "$path/$next" ]; then + res="$path/$next" + echo "$res" + add_needed "$res" + break + fi + done + if [ -z "$res" ]; then + echo "Couldn't satisfy dependency $next" >&2 + exit 1 + fi + fi + done + ''; + + # Some additional utilities needed in stage 1, like mount, fsck + # etc. We don't want to bring in all of those packages, so we just + # copy what we need. Instead of using statically linked binaries, + # we just copy what we need from Glibc and use patchelf to make it + # work. + extraUtils = pkgs.runCommandCC "extra-utils" + { nativeBuildInputs = [pkgs.buildPackages.nukeReferences]; + allowedReferences = [ "out" ]; # prevent accidents like glibc being included in the initrd + } + '' + set +o pipefail + + mkdir -p $out/bin $out/lib + ln -s $out/bin $out/sbin + + copy_bin_and_libs () { + [ -f "$out/bin/$(basename $1)" ] && rm "$out/bin/$(basename $1)" + cp -pdv $1 $out/bin + } + + # Copy BusyBox. + for BIN in ${pkgs.busybox}/{s,}bin/*; do + copy_bin_and_libs $BIN + done + + # Copy some util-linux stuff. + copy_bin_and_libs ${pkgs.util-linux}/sbin/blkid + + # Add RAID mdadm tool. + copy_bin_and_libs ${pkgs.mdadm}/sbin/mdadm + copy_bin_and_libs ${pkgs.mdadm}/sbin/mdmon + + # Copy udev. + copy_bin_and_libs ${udev}/bin/udevd + copy_bin_and_libs ${udev}/bin/udevadm + for BIN in ${udev}/lib/udev/*_id; do + copy_bin_and_libs $BIN + done + + # Copy modprobe. + copy_bin_and_libs ${pkgs.kmod}/bin/kmod + ln -sf kmod $out/bin/modprobe + + # Dirty hack to make sure the kernel properly loads modules + # such as ext4 on demand (e.g. on a `mount(2)` syscall). This is necessary + # because `kmod` isn't linked against `libpthread.so.0` anymore (since + # it was merged into `libc.so.6` since version `2.34`), but still needs + # to access it for some reason. This is not an issue in stage-1 itself + # because of the `LD_LIBRARY_PATH`-variable and anytime later because the rpath of + # kmod/modprobe points to glibc's `$out/lib` where `libpthread.so.6` exists. + # However, this is a problem when the kernel calls `modprobe` inside + # the initial ramdisk because it doesn't know about the + # `LD_LIBRARY_PATH` and the rpath was nuked. + # + # Also, we can't use `makeWrapper` here because `kmod` only does + # `modprobe` functionality if `argv[0] == "modprobe"`. + cat >$out/bin/modprobe-kernel <&1 | grep -q "BusyBox" + $out/bin/blkid -V 2>&1 | grep -q 'libblkid' + $out/bin/udevadm --version + $out/bin/mdadm --version + ${optionalString config.services.multipath.enable '' + ($out/bin/multipath || true) 2>&1 | grep -q 'need to be root' + ($out/bin/multipathd || true) 2>&1 | grep -q 'need to be root' + ''} + + ${config.boot.initrd.extraUtilsCommandsTest} + fi + ''; # */ + + udevRules = pkgs.runCommand "udev-rules" { + allowedReferences = [ extraUtils ]; + preferLocalBuild = true; + } '' + mkdir -p $out + + echo 'ENV{LD_LIBRARY_PATH}="${extraUtils}/lib"' > $out/00-env.rules + + cp -v ${udev}/var/lib/udev/rules.d/60-cdrom_id.rules $out/ + cp -v ${udev}/var/lib/udev/rules.d/60-persistent-storage.rules $out/ + cp -v ${udev}/var/lib/udev/rules.d/75-net-description.rules $out/ + cp -v ${udev}/var/lib/udev/rules.d/80-drivers.rules $out/ + cp -v ${udev}/var/lib/udev/rules.d/80-net-name-slot.rules $out/ + ${config.boot.initrd.extraUdevRulesCommands} + + for i in $out/*.rules; do + substituteInPlace $i \ + --replace ata_id ${extraUtils}/bin/ata_id \ + --replace scsi_id ${extraUtils}/bin/scsi_id \ + --replace cdrom_id ${extraUtils}/bin/cdrom_id \ + --replace ${pkgs.coreutils}/bin/basename ${extraUtils}/bin/basename \ + --replace ${pkgs.util-linux}/bin/blkid ${extraUtils}/bin/blkid \ + --replace ${pkgs.mdadm}/sbin ${extraUtils}/sbin \ + --replace ${pkgs.bash}/bin/sh ${extraUtils}/bin/sh \ + --replace ${udev} ${extraUtils} + done + + # Work around a bug in QEMU, which doesn't implement the "READ + # DISC INFORMATION" SCSI command: + # https://bugzilla.redhat.com/show_bug.cgi?id=609049 + # As a result, `cdrom_id' doesn't print + # ID_CDROM_MEDIA_TRACK_COUNT_DATA, which in turn prevents the + # /dev/disk/by-label symlinks from being created. We need these + # in the NixOS installation CD, so use ID_CDROM_MEDIA in the + # corresponding udev rules for now. This was the behaviour in + # udev <= 154. See also + # http://www.spinics.net/lists/hotplug/msg03935.html + substituteInPlace $out/60-persistent-storage.rules \ + --replace ID_CDROM_MEDIA_TRACK_COUNT_DATA ID_CDROM_MEDIA + ''; # */ + + + # The init script of boot stage 1 (loading kernel modules for + # mounting the root FS). + bootStage1 = pkgs.substituteAll { + src = ./stage-1-init.sh; + + shell = "${extraUtils}/bin/ash"; + + isExecutable = true; + + postInstall = '' + echo checking syntax + # check both with bash + ${pkgs.buildPackages.bash}/bin/sh -n $target + # and with ash shell, just in case + ${pkgs.buildPackages.busybox}/bin/ash -n $target + ''; + + inherit udevRules extraUtils modulesClosure; + + inherit (config.boot) resumeDevice; + + inherit (config.system.build) earlyMountScript; + + inherit (config.boot.initrd) checkJournalingFS verbose + preDeviceCommands postDeviceCommands postMountCommands preFailCommands kernelModules; + + resumeDevices = map (sd: if sd ? device then sd.device else "/dev/disk/by-label/${sd.label}") + (filter (sd: hasPrefix "/dev/" sd.device && !sd.randomEncryption.enable + # Don't include zram devices + && !(hasPrefix "/dev/zram" sd.device) + ) config.swapDevices); + + fsInfo = + let f = fs: [ fs.mountPoint (if fs.device != null then fs.device else "/dev/disk/by-label/${fs.label}") fs.fsType (builtins.concatStringsSep "," fs.options) ]; + in pkgs.writeText "initrd-fsinfo" (concatStringsSep "\n" (concatMap f fileSystems)); + + setHostId = optionalString (config.networking.hostId != null) '' + hi="${config.networking.hostId}" + ${if pkgs.stdenv.isBigEndian then '' + echo -ne "\x''${hi:0:2}\x''${hi:2:2}\x''${hi:4:2}\x''${hi:6:2}" > /etc/hostid + '' else '' + echo -ne "\x''${hi:6:2}\x''${hi:4:2}\x''${hi:2:2}\x''${hi:0:2}" > /etc/hostid + ''} + ''; + }; + + + # The closure of the init script of boot stage 1 is what we put in + # the initial RAM disk. + initialRamdisk = pkgs.makeInitrd { + name = "initrd-${kernel-name}"; + inherit (config.boot.initrd) compressor compressorArgs prepend; + + contents = + [ { object = bootStage1; + symlink = "/init"; + } + { object = pkgs.writeText "mdadm.conf" config.boot.initrd.services.swraid.mdadmConf; + symlink = "/etc/mdadm.conf"; + } + { object = pkgs.runCommand "initrd-kmod-blacklist-ubuntu" { + src = "${pkgs.kmod-blacklist-ubuntu}/modprobe.conf"; + preferLocalBuild = true; + } '' + target=$out + ${pkgs.buildPackages.perl}/bin/perl -0pe 's/## file: iwlwifi.conf(.+?)##/##/s;' $src > $out + ''; + symlink = "/etc/modprobe.d/ubuntu.conf"; + } + { object = config.environment.etc."modprobe.d/nixos.conf".source; + symlink = "/etc/modprobe.d/nixos.conf"; + } + { object = pkgs.kmod-debian-aliases; + symlink = "/etc/modprobe.d/debian.conf"; + } + ] ++ lib.optionals config.services.multipath.enable [ + { object = pkgs.runCommand "multipath.conf" { + src = config.environment.etc."multipath.conf".text; + preferLocalBuild = true; + } '' + target=$out + printf "$src" > $out + substituteInPlace $out \ + --replace ${config.services.multipath.package}/lib ${extraUtils}/lib + ''; + symlink = "/etc/multipath.conf"; + } + ] ++ (lib.mapAttrsToList + (symlink: options: + { + inherit symlink; + object = options.source; + } + ) + config.boot.initrd.extraFiles); + }; + + # Script to add secret files to the initrd at bootloader update time + initialRamdiskSecretAppender = + let + compressorExe = initialRamdisk.compressorExecutableFunction pkgs; + in pkgs.writeScriptBin "append-initrd-secrets" + '' + #!${pkgs.bash}/bin/bash -e + function usage { + echo "USAGE: $0 INITRD_FILE" >&2 + echo "Appends this configuration's secrets to INITRD_FILE" >&2 + } + + if [ $# -ne 1 ]; then + usage + exit 1 + fi + + if [ "$1"x = "--helpx" ]; then + usage + exit 0 + fi + + ${lib.optionalString (config.boot.initrd.secrets == {}) + "exit 0"} + + export PATH=${pkgs.coreutils}/bin:${pkgs.libarchive}/bin:${pkgs.gzip}/bin:${pkgs.findutils}/bin + + function cleanup { + if [ -n "$tmp" -a -d "$tmp" ]; then + rm -fR "$tmp" + fi + } + trap cleanup EXIT + + tmp=$(mktemp -d ''${TMPDIR:-/tmp}/initrd-secrets.XXXXXXXXXX) + + ${lib.concatStringsSep "\n" (mapAttrsToList (dest: source: + let source' = if source == null then dest else toString source; in + '' + mkdir -p $(dirname "$tmp/.initrd-secrets/${dest}") + cp -a ${source'} "$tmp/.initrd-secrets/${dest}" + '' + ) config.boot.initrd.secrets) + } + + (cd "$tmp" && find . -print0 | sort -z | bsdtar --uid 0 --gid 0 -cnf - -T - | bsdtar --null -cf - --format=newc @-) | \ + ${compressorExe} ${lib.escapeShellArgs initialRamdisk.compressorArgs} >> "$1" + ''; + +in + +{ + config = mkIf config.boot.initrd.enable { + + system.build = lib.mapAttrs (_: lib.mkForce) { inherit bootStage1 initialRamdisk initialRamdiskSecretAppender extraUtils; }; + + }; +} diff --git a/nixos/modules/system/boot/stage-2-init.sh b/nixos/modules/system/boot/stage-2-init.sh new file mode 100755 index 0000000..cdd1147 --- /dev/null +++ b/nixos/modules/system/boot/stage-2-init.sh @@ -0,0 +1,125 @@ +#! @shell@ + +systemConfig=@systemConfig@ + +export HOME=/root PATH="@path@" + + +if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" != true ]; then + # Process the kernel command line. + for o in $(>>\e[0m" + echo + + + # Normally, stage 1 mounts the root filesystem read/writable. + # However, in some environments, stage 2 is executed directly, and the + # root is read-only. So make it writable here. + if [ -z "$container" ]; then + mount -n -o remount,rw none / + fi +fi + + +# Likewise, stage 1 mounts /proc, /dev and /sys, so if we don't have a +# stage 1, we need to do that here. +if [ ! -e /proc/1 ]; then + specialMount() { + local device="$1" + local mountPoint="$2" + local options="$3" + local fsType="$4" + + # We must not overwrite this mount because it's bind-mounted + # from stage 1's /run + if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" = true ] && [ "${mountPoint}" = /run ]; then + return + fi + + install -m 0755 -d "$mountPoint" + mount -n -t "$fsType" -o "$options" "$device" "$mountPoint" + } + source @earlyMountScript@ +fi + + +if [ "${IN_NIXOS_SYSTEMD_STAGE1:-}" = true ]; then + echo "booting system configuration ${systemConfig}" +else + echo "booting system configuration $systemConfig" > /dev/kmsg +fi + + +# Make /nix/store a read-only bind mount to enforce immutability of +# the Nix store. Note that we can't use "chown root:nixbld" here +# because users/groups might not exist yet. +# Silence chown/chmod to fail gracefully on a readonly filesystem +# like squashfs. +chown -f 0:30000 /nix/store +chmod -f 1775 /nix/store +if [ -n "@readOnlyStore@" ]; then + if ! [[ "$(findmnt --noheadings --output OPTIONS /nix/store)" =~ ro(,|$) ]]; then + if [ -z "$container" ]; then + mount --bind /nix/store /nix/store + else + mount --rbind /nix/store /nix/store + fi + mount -o remount,ro,bind /nix/store + fi +fi + + + +# Log the script output to /dev/kmsg or /run/log/stage-2-init.log. +# Only at this point are all the necessary prerequisites ready for these commands. +exec {logOutFd}>&1 {logErrFd}>&2 +if test -w /dev/kmsg; then +exec > >(tee -i /proc/self/fd/"$logOutFd" | while read -r line; do +if test -n "$line"; then +echo "<7>stage-2-init: $line" > /dev/kmsg +fi +done) 2>&1 +else +mkdir -p /run/log +exec > >(tee -i /run/log/stage-2-init.log) 2>&1 +fi + + +# Required by the activation script +install -m 0755 -d /etc /etc/nixos +install -m 01777 -d /tmp + + +# Run the script that performs all configuration activation that does +# not have to be done at boot time. +echo "running activation script..." +$systemConfig/activate + + +# Record the boot configuration. +ln -sfn "$systemConfig" /run/booted-system + + +# Run any user-specified commands. +@shell@ @postBootCommands@ + + +# Reset the logging file descriptors. +exec 1>&$logOutFd 2>&$logErrFd +exec {logOutFd}>&- {logErrFd}>&- + + +# Start Synit. +echo "starting Synit..." +exec synit-pid1 diff --git a/nixos/modules/system/boot/stage-2.nix b/nixos/modules/system/boot/stage-2.nix new file mode 100644 index 0000000..2cd9450 --- /dev/null +++ b/nixos/modules/system/boot/stage-2.nix @@ -0,0 +1,26 @@ +{ config, lib, pkgs, ... }: + +{ + config = { + + system.build.bootStage2 = lib.mkForce (pkgs.substituteAll { + src = ./stage-2-init.sh; + shellDebug = "${pkgs.bashInteractive}/bin/bash"; + shell = "${pkgs.bash}/bin/bash"; + isExecutable = true; + inherit (config.nix) readOnlyStore; + inherit (config.system.build) earlyMountScript; + path = lib.makeBinPath [ + pkgs.coreutils + pkgs.utillinux + pkgs.synit-pid1 + ]; + postBootCommands = pkgs.writeText "local-cmds" '' + ${config.boot.postBootCommands} + ${config.powerManagement.powerUpCommands} + ''; + + }); + + }; +} diff --git a/nixos/modules/system/boot/synit.nix b/nixos/modules/system/boot/synit.nix new file mode 100644 index 0000000..6f1335b --- /dev/null +++ b/nixos/modules/system/boot/synit.nix @@ -0,0 +1,146 @@ +{ config, lib, pkgs, ... }: + +with lib; + +{ + config = { + + boot.initrd = { + enable = true; + network.enable = false; + systemd.enable = false; + verbose = true; + }; + + environment.etc = { + + "syndicate/boot/001-console-getty.pr".text = '' + > + + ''; + + "syndicate/boot/020-load-core-layer.pr".text = '' + ; Attenuate `$config` by rewriting plain `require-service` assertions to `require-core-service` + ; assertions. Allow all other assertions through. + ; + let ?sys = <* $config [ + > + + ]> + + ; Give meaning to `require-core-service`: it is an ordinary `require-service`, plus a + ; declaration that the `core` milestone depends on the service. + ; + ? [ + > + + ] + + ; Load config in the `core` directory, using the wrapped `config` so that all plain services + ; required are changed to be *core* services. + ; + > + + ; In addition, require the `core` milestone explicitly. + ; + > + ''; + + "syndicate/boot/030-load-services.pr".text = '' + ; Attenuate `$config` by rewriting plain `require-service` assertions to + ; `require-basic-service` assertions. Allow all other assertions through. + ; + let ?basic = <* $config [ + > + + ]> + + ; Give meaning to `require-basic-service`: it is an ordinary `require-service`, plus a + ; declaration that the service depends on the `core` milestone. + ; + ? [ + up>> + + ] + + ; Once we see that the `core` milestone is ready, start processing the `services` + ; directory. + ; + ? up> [ + > + ] + ''; + + "syndicate/core/configdirs.pr".text = '' + > + > + ''; + + "syndicate/core/hostname.pr".text = '' + > + > + ''; + + "syndicate/core/eudev.pr".text = let + initialScanScript = pkgs.writeScript "eudev-initial-scan.sh" '' + #!${pkgs.runtimeShell} + set -e + echo "" > /proc/sys/kernel/hotplug + udevadm trigger --type=subsystems --action=add + udevadm trigger --type=devices --action=add + udevadm settle --timeout=30 + ''; + in '' + > + + + > + up>> + > + ''; + + "syndicate/core/machine-dataspace.pr".text = '' + let ?ds = dataspace + + + $ds ? ?r [ + $log ! + ?- $log ! + ] + ''; + + "syndicate/services/configdirs.pr".text = '' + > + > + ''; + + } // (builtins.listToAttrs (map (file: { + name = "syndicate/boot/${file}"; + value.source = + "${pkgs.synit-pid1.src}packaging/packages/synit-config/files/etc/syndicate/boot/${file}"; + }) [ "010-exec.pr" "010-milestone.pr" "010-service-state-up.pr" ])); + + environment.systemPackages = with pkgs; [ + synit-pid1 + syndicate-server + (writeScriptBin "synit-log" '' + #! ${lib.getBin bash}/bin/bash + ${lib.getBin coreutils}/bin/mkdir -p /var/log/synit + exec ${lib.getBin s6}/bin/s6-log t /var/log/synit + '') + ]; + + systemd.package = pkgs.systemd // { meta.broken = true; }; + + }; +}