make-initrd-ng: init

This commit is contained in:
Will Fancher 2021-11-20 19:47:28 -05:00
parent 52c98fc3e9
commit d193ef8a57
7 changed files with 392 additions and 0 deletions

View file

@ -0,0 +1,9 @@
{ rustPlatform }:
rustPlatform.buildRustPackage {
pname = "make-initrd-ng";
version = "0.1.0";
src = ./make-initrd-ng;
cargoLock.lockFile = ./make-initrd-ng/Cargo.lock;
}

View file

@ -0,0 +1,79 @@
let
# Some metadata on various compression programs, relevant to naming
# the initramfs file and, if applicable, generating a u-boot image
# from it.
compressors = import ./initrd-compressor-meta.nix;
# Get the basename of the actual compression program from the whole
# compression command, for the purpose of guessing the u-boot
# compression type and filename extension.
compressorName = fullCommand: builtins.elemAt (builtins.match "([^ ]*/)?([^ ]+).*" fullCommand) 1;
in
{ stdenvNoCC, perl, cpio, ubootTools, lib, pkgsBuildHost, makeInitrdNGTool, patchelf, runCommand, glibc
# Name of the derivation (not of the resulting file!)
, name ? "initrd"
# Program used to compress the cpio archive; use "cat" for no compression.
# This can also be a function which takes a package set and returns the path to the compressor,
# such as `pkgs: "${pkgs.lzop}/bin/lzop"`.
, compressor ? "gzip"
, _compressorFunction ?
if lib.isFunction compressor then compressor
else if ! builtins.hasContext compressor && builtins.hasAttr compressor compressors then compressors.${compressor}.executable
else _: compressor
, _compressorExecutable ? _compressorFunction pkgsBuildHost
, _compressorName ? compressorName _compressorExecutable
, _compressorMeta ? compressors.${_compressorName} or {}
# List of arguments to pass to the compressor program, or null to use its defaults
, compressorArgs ? null
, _compressorArgsReal ? if compressorArgs == null then _compressorMeta.defaultArgs or [] else compressorArgs
# Filename extension to use for the compressed initramfs. This is
# included for clarity, but $out/initrd will always be a symlink to
# the final image.
# If this isn't guessed, you may want to complete the metadata above and send a PR :)
, extension ? _compressorMeta.extension or
(throw "Unrecognised compressor ${_compressorName}, please specify filename extension")
# List of { object = path_or_derivation; symlink = "/path"; }
# The paths are copied into the initramfs in their nix store path
# form, then linked at the root according to `symlink`.
, contents
# List of uncompressed cpio files to prepend to the initramfs. This
# can be used to add files in specified paths without them becoming
# symlinks to store paths.
, prepend ? []
# Whether to wrap the initramfs in a u-boot image.
, makeUInitrd ? stdenvNoCC.hostPlatform.linux-kernel.target == "uImage"
# If generating a u-boot image, the architecture to use. The default
# guess may not align with u-boot's nomenclature correctly, so it can
# be overridden.
# See https://gitlab.denx.de/u-boot/u-boot/-/blob/9bfb567e5f1bfe7de8eb41f8c6d00f49d2b9a426/common/image.c#L81-106 for a list.
, uInitrdArch ? stdenvNoCC.hostPlatform.linuxArch
# The name of the compression, as recognised by u-boot.
# See https://gitlab.denx.de/u-boot/u-boot/-/blob/9bfb567e5f1bfe7de8eb41f8c6d00f49d2b9a426/common/image.c#L195-204 for a list.
# If this isn't guessed, you may want to complete the metadata above and send a PR :)
, uInitrdCompression ? _compressorMeta.ubootName or
(throw "Unrecognised compressor ${_compressorName}, please specify uInitrdCompression")
}: runCommand name {
compress = "${_compressorExecutable} ${lib.escapeShellArgs _compressorArgsReal}";
passthru = {
compressorExecutableFunction = _compressorFunction;
compressorArgs = _compressorArgsReal;
};
passAsFile = ["contents"];
contents = lib.concatMapStringsSep "\n" ({ object, symlink, ... }: "${object}\n${if symlink == null then "" else symlink}") contents + "\n";
nativeBuildInputs = [makeInitrdNGTool patchelf glibc cpio];
} ''
mkdir ./root
make-initrd-ng "$contentsPath" ./root
mkdir "$out"
(cd root && find * .[^.*] -exec touch -h -d '@1' '{}' +)
(cd root && find * .[^.*] -print0 | sort -z | cpio -o -H newc -R +0:+0 --reproducible --null | eval -- $compress >> "$out/initrd")
''

View file

@ -0,0 +1,5 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "make-initrd-ng"
version = "0.1.0"

View file

@ -0,0 +1,9 @@
[package]
name = "make-initrd-ng"
version = "0.1.0"
authors = ["Will Fancher <elvishjerricco@gmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View file

@ -0,0 +1,79 @@
# What is this for?
NixOS's traditional initrd is generated by listing the paths that
should be included in initrd and copying the full runtime closure of
those paths into the archive. For most things, like almost any
executable, this involves copying the entirety of huge packages like
glibc, when only things like the shared library files are needed. To
solve this, NixOS does a variety of patchwork to edit the files being
copied in so they only refer to small, patched up paths. For instance,
executables and their shared library dependencies are copied into an
`extraUtils` derivation, and every ELF file is patched to refer to
files in that output.
The problem with this is that it is often difficult to correctly patch
some things. For instance, systemd bakes the path to the `mount`
command into the binary, so patchelf is no help. Instead, it's very
often easier to simply copy the desired files to their original store
locations in initrd and not copy their entire runtime closure. This
does mean that it is the burden of the developer to ensure that all
necessary dependencies are copied in, as closures won't be
consulted. However, it is rare that full closures are actually
desirable, so in the traditional initrd, the developer was likely to
do manual work on patching the dependencies explicitly anyway.
# How it works
This program is similar to its inspiration (`find-libs` from the
traditional initrd), except that it also handles symlinks and
directories according to certain rules. As input, it receives a
sequence of pairs of paths. The first path is an object to copy into
initrd. The second path (if not empty) is the path to a symlink that
should be placed in the initrd, pointing to that object. How that
object is copied depends on its type.
1. A regular file is copied directly to the same absolute path in the
initrd.
- If it is *also* an ELF file, then all of its direct shared
library dependencies are also listed as objects to be copied.
2. A directory's direct children are listed as objects to be copied,
and a directory at the same absolute path in the initrd is created.
3. A symlink's target is listed as an object to be copied.
There are a couple of quirks to mention here. First, the term "object"
refers to the final file path that the developer intends to have
copied into initrd. This means any parent directory is not considered
an object just because its child was listed as an object in the
program input; instead those intermediate directories are simply
created in support of the target object. Second, shared libraries,
directory children, and symlink targets aren't immediately recursed,
because they simply get listed as objects themselves, and are
therefore traversed when they themselves are processed. Finally,
symlinks in the intermediate directories leading to an object are
preserved, meaning an input object `/a/symlink/b` will just result in
initrd containing `/a/symlink -> /target/b` and `/target/b`, even if
`/target` has other children. Preserving symlinks in this manner is
important for things like systemd.
These rules automate the most important and obviously necessary
copying that needs to be done in most cases, allowing programs and
configuration files to go unpatched, while keeping the content of the
initrd to a minimum.
# Why Rust?
- A prototype of this logic was written in Bash, in an attempt to keep
with its `find-libs` ancestor, but that program was difficult to
write, and ended up taking several minutes to run. This program runs
in less than a second, and the code is substantially easier to work
with.
- This will not require end users to install a rust toolchain to use
NixOS, as long as this tool is cached by Hydra. And if you're
bootstrapping NixOS from source, rustc is already required anyway.
- Rust was favored over Python for its type system, and because if you
want to go fast, why not go *really fast*?

View file

@ -0,0 +1,208 @@
use std::collections::{HashSet, VecDeque};
use std::env;
use std::ffi::OsStr;
use std::fs;
use std::hash::Hash;
use std::io::{BufReader, BufRead, Error, ErrorKind};
use std::os::unix;
use std::path::{Component, Path, PathBuf};
use std::process::{Command, Stdio};
struct NonRepeatingQueue<T> {
queue: VecDeque<T>,
seen: HashSet<T>,
}
impl<T> NonRepeatingQueue<T> {
fn new() -> NonRepeatingQueue<T> {
NonRepeatingQueue {
queue: VecDeque::new(),
seen: HashSet::new(),
}
}
}
impl<T: Clone + Eq + Hash> NonRepeatingQueue<T> {
fn push_back(&mut self, value: T) -> bool {
if self.seen.contains(&value) {
false
} else {
self.seen.insert(value.clone());
self.queue.push_back(value);
true
}
}
fn pop_front(&mut self) -> Option<T> {
self.queue.pop_front()
}
}
fn patch_elf<S: AsRef<OsStr>, P: AsRef<OsStr>>(mode: S, path: P) -> Result<String, Error> {
let output = Command::new("patchelf")
.arg(&mode)
.arg(&path)
.stderr(Stdio::inherit())
.output()?;
if output.status.success() {
Ok(String::from_utf8(output.stdout).expect("Failed to parse output"))
} else {
Err(Error::new(ErrorKind::Other, format!("failed: patchelf {:?} {:?}", OsStr::new(&mode), OsStr::new(&path))))
}
}
fn copy_file<P: AsRef<Path> + AsRef<OsStr>, S: AsRef<Path>>(
source: P,
target: S,
queue: &mut NonRepeatingQueue<Box<Path>>,
) -> Result<(), Error> {
fs::copy(&source, target)?;
if !Command::new("ldd").arg(&source).output()?.status.success() {
//stdout(Stdio::inherit()).stderr(Stdio::inherit()).
println!("{:?} is not dynamically linked. Not recursing.", OsStr::new(&source));
return Ok(());
}
let rpath_string = patch_elf("--print-rpath", &source)?;
let needed_string = patch_elf("--print-needed", &source)?;
// Shared libraries don't have an interpreter
if let Ok(interpreter_string) = patch_elf("--print-interpreter", &source) {
queue.push_back(Box::from(Path::new(&interpreter_string.trim())));
}
let rpath = rpath_string.trim().split(":").map(|p| Box::<Path>::from(Path::new(p))).collect::<Vec<_>>();
for line in needed_string.lines() {
let mut found = false;
for path in &rpath {
let lib = path.join(line);
if lib.exists() {
// No need to recurse. The queue will bring it back round.
queue.push_back(Box::from(lib.as_path()));
found = true;
break;
}
}
if !found {
// glibc makes it tricky to make this an error because
// none of the files have a useful rpath.
println!("Warning: Couldn't satisfy dependency {} for {:?}", line, OsStr::new(&source));
}
}
Ok(())
}
fn queue_dir<P: AsRef<Path>>(
source: P,
queue: &mut NonRepeatingQueue<Box<Path>>,
) -> Result<(), Error> {
for entry in fs::read_dir(source)? {
let entry = entry?;
// No need to recurse. The queue will bring us back round here on its own.
queue.push_back(Box::from(entry.path().as_path()));
}
Ok(())
}
fn handle_path(
root: &Path,
p: &Path,
queue: &mut NonRepeatingQueue<Box<Path>>,
) -> Result<(), Error> {
let mut source = PathBuf::new();
let mut target = Path::new(root).to_path_buf();
let mut iter = p.components().peekable();
while let Some(comp) = iter.next() {
match comp {
Component::Prefix(_) => panic!("This tool is not meant for Windows"),
Component::RootDir => {
target.clear();
target.push(root);
source.clear();
source.push("/");
}
Component::CurDir => {}
Component::ParentDir => {
// Don't over-pop the target if the path has too many ParentDirs
if source.pop() {
target.pop();
}
}
Component::Normal(name) => {
target.push(name);
source.push(name);
let typ = fs::symlink_metadata(&source)?.file_type();
if typ.is_file() && !target.exists() {
copy_file(&source, &target, queue)?;
} else if typ.is_symlink() {
let link_target = fs::read_link(&source)?;
// Create the link, then push its target to the queue
if !target.exists() {
unix::fs::symlink(&link_target, &target)?;
}
source.pop();
source.push(link_target);
while let Some(c) = iter.next() {
source.push(c);
}
let link_target_path = source.as_path();
if link_target_path.exists() {
queue.push_back(Box::from(link_target_path));
}
break;
} else if typ.is_dir() {
if !target.exists() {
fs::create_dir(&target)?;
}
// Only recursively copy if the directory is the target object
if iter.peek().is_none() {
queue_dir(&source, queue)?;
}
}
}
}
}
Ok(())
}
fn main() -> Result<(), Error> {
let args: Vec<String> = env::args().collect();
let input = fs::File::open(&args[1])?;
let output = &args[2];
let out_path = Path::new(output);
let mut queue = NonRepeatingQueue::<Box<Path>>::new();
let mut lines = BufReader::new(input).lines();
while let Some(obj) = lines.next() {
// Lines should always come in pairs
let obj = obj?;
let sym = lines.next().unwrap()?;
let obj_path = Path::new(&obj);
queue.push_back(Box::from(obj_path));
if !sym.is_empty() {
println!("{} -> {}", &sym, &obj);
// We don't care about preserving symlink structure here
// nearly as much as for the actual objects.
let link_string = format!("{}/{}", output, sym);
let link_path = Path::new(&link_string);
let mut link_parent = link_path.to_path_buf();
link_parent.pop();
fs::create_dir_all(link_parent)?;
unix::fs::symlink(obj_path, link_path)?;
}
}
while let Some(obj) = queue.pop_front() {
println!("{:?}", obj);
handle_path(out_path, &*obj, &mut queue)?;
}
Ok(())
}

View file

@ -732,6 +732,9 @@ with pkgs;
makeInitrd = callPackage ../build-support/kernel/make-initrd.nix; # Args intentionally left out
makeInitrdNG = callPackage ../build-support/kernel/make-initrd-ng.nix;
makeInitrdNGTool = callPackage ../build-support/kernel/make-initrd-ng-tool.nix {};
makeWrapper = makeSetupHook
{ deps = [ dieHook ];
substitutions = {