Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std.zig.system: use both PATH and hardcoded locations to find env (2 version) #22814

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

BratishkaErik
Copy link
Contributor

@BratishkaErik BratishkaErik commented Feb 8, 2025

This is second attempt after #21540 was reverted before. In this version relative paths are handled too.

Diff compare:

diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig
index 2376334c5c..e00d2e5dfe 100644
--- a/lib/std/zig/system.zig
+++ b/lib/std/zig/system.zig
@@ -1004,8 +1004,15 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
 ///
 /// If it finds ELF magic sequence, file is considered an ELF file and function returns.
 fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
-    var current_path = start_path;
+    var current_path: std.BoundedArray(u8, fs.max_path_bytes) = .{};
+    current_path.appendSliceAssumeCapacity(start_path);
 
+    // Needed for storing `fs.path.resolve` result.
+    var buf: [fs.max_path_bytes + 1]u8 = undefined;
+    var fbs: std.heap.FixedBufferAllocator = .init(&buf);
+    const allocator = fbs.allocator();
+
+    // Needed for storing file content.
     // According to `man 2 execve`:
     //
     // The kernel imposes a maximum length on the text
@@ -1018,11 +1025,8 @@ fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableTo
     // *including* "#!" characters and ignoring newline.
     // For safety, we set max length as 255 + \n (1).
     var buffer: [255 + 1]u8 = undefined;
-    while (true) {
-        // Interpreter path can be relative on Linux, but
-        // for simplicity we are asserting it is an absolute path.
-        assert(std.fs.path.isAbsolute(current_path));
-        const file = cwd.openFile(current_path, .{}) catch |err| switch (err) {
+    while (true) : (fbs.reset()) {
+        const file = cwd.openFile(current_path.constSlice(), .{}) catch |err| switch (err) {
             error.NoSpaceLeft => unreachable,
             error.NameTooLong => unreachable,
             error.PathAlreadyExists => unreachable,
@@ -1056,37 +1060,67 @@ fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableTo
         var is_elf_file = false;
         defer if (is_elf_file == false) file.close();
 
-        // Shortest working interpreter path is "#!/i" (4)
-        // (interpreter is "/i", assuming all paths are absolute, like in above comment).
-        // ELF magic number length is also 4.
-        //
-        // If file is shorter than that, it is definitely not ELF file
-        // nor file with "shebang" line.
-        const min_len = 4;
-
-        const len = preadAtLeast(file, &buffer, 0, min_len) catch return error.UnableToFindElfFile;
+        // Shortest working interpreter path is "#!i" (3)
+        // (interpreter is "i", relative to file itself).
+        // ELF magic number length is 4.
+        const len = preadAtLeast(file, &buffer, 0, 4) catch |err| switch (err) {
+            error.UnexpectedEndOfFile => preadAtLeast(file, &buffer, 0, 3) catch
+            // If file is shorter than that, it is definitely not ELF file
+            // nor file with "shebang" line.
+                return error.UnableToFindElfFile,
+            else => return error.UnableToFindElfFile,
+        };
         const content = buffer[0..len];
 
-        if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
+        if (len > 4 and mem.eql(u8, content[0..4], std.elf.MAGIC)) {
             // It is very likely ELF file!
             is_elf_file = true;
             return file;
         } else if (mem.eql(u8, content[0..2], "#!")) {
             // We detected shebang, now parse entire line.
+            const interpreter_path = interpreter_path: {
+                // Trim leading "#!" and separate line from others.
+                const first_line = content[2 .. mem.indexOfScalar(u8, content, '\n') orelse content.len];
 
-            // Trim leading "#!", spaces and tabs.
-            const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
+                // Trim leading spaces and tabs.
+                const trimmed_line = mem.trimLeft(u8, first_line, &.{ ' ', '\t' });
 
-            // This line can have:
-            // * Interpreter path only,
-            // * Interpreter path and arguments, all separated by space, tab or NUL character.
-            // And optionally newline at the end.
-            const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
+                // This line can have:
+                // * Interpreter path only,
+                // * Interpreter path and arguments, all separated by space, tab or NUL character.
+                // And optionally newline at the end.
+                const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
 
-            // Separate path and args.
-            const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
+                // Separate path and args.
+                const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
 
-            current_path = path_maybe_args[0..path_end];
+                break :interpreter_path path_maybe_args[0..path_end];
+            };
+
+            // We want these scenarios to work without using `realpath`:
+            // * Interpreter is absolute/relative path and real file.
+            // * Interpreter is absolute/relative path and absolute/relative symlink.
+            const interpreter_real_path = interpreter_real_path: {
+                var readlink_buffer: [std.fs.max_path_bytes]u8 = undefined;
+
+                const interpreter_real_path = cwd.readLink(interpreter_path, &readlink_buffer) catch |err| switch (err) {
+                    error.NotLink => interpreter_path,
+                    else => return error.UnableToFindElfFile,
+                };
+
+                const next_path = fs.path.resolve(allocator, &.{
+                    // `dirname` can return `null` in two situations:
+                    // * When path is '/': impossible since it always contain file path.
+                    // * When path is "some_current_dir_file": use ".".
+                    fs.path.dirname(interpreter_path) orelse ".",
+                    interpreter_real_path,
+                }) catch return error.UnableToFindElfFile;
+
+                break :interpreter_real_path next_path;
+            };
+
+            current_path.clear();
+            current_path.appendSliceAssumeCapacity(interpreter_real_path);
             continue;
         } else {
             // Not a ELF file, not a shell script with "shebang line", invalid duck.
@@ -1178,7 +1212,7 @@ fn detectAbiAndDynamicLinker(
 
     const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
 
-    const cwd = std.fs.cwd();
+    const cwd = fs.cwd();
 
     // Algorithm is:
     // 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
@@ -1190,12 +1224,12 @@ fn detectAbiAndDynamicLinker(
         const PATH = std.posix.getenv("PATH") orelse break :try_path null;
         var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
 
-        var buf: [std.fs.max_path_bytes + 1]u8 = undefined;
+        var buf: [fs.max_path_bytes + 1]u8 = undefined;
         var fbs: std.heap.FixedBufferAllocator = .init(&buf);
         const allocator = fbs.allocator();
 
         while (it.next()) |path| : (fbs.reset()) {
-            const start_path = std.fs.path.joinZ(allocator, &.{ path, "env" }) catch |err| switch (err) {
+            const start_path = fs.path.join(allocator, &.{ path, "env" }) catch |err| switch (err) {
                 error.OutOfMemory => continue,
             };
 

I have tested with following files:

$ PATH="./:/usr/sbin" /usr/bin/zig build --zig-lib-dir ~/github.com/zig/lib

# env
#!/home/bratishkaerik/github.com/zig/a

# a
#!../../../../bin/env

# /bin/env
#!/usr/bin/coreutils --coreutils-prog-shebang=env

# /usr/bin/coreutils
ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, stripped

@BratishkaErik
Copy link
Contributor Author

Turns out it did not worked with mix of relative interpreters and symlinks, so I rewrote it a little bit:

info: current_path = ./env
error: shebang read!
error: interpreter (real    ) = a
error: interpreter (readlink) = a
error: next_path = a

info: current_path = a
error: shebang read!
error: interpreter (real    ) = ../../../../bin/env
error: interpreter (readlink) = ../../../../bin/env
error: next_path = ../../../../../../../bin/env

info: current_path = ../../../../../../../bin/env
error: shebang read!
error: interpreter (real    ) = /usr/bin/coreutils
error: interpreter (readlink) = /usr/bin/coreutils
error: next_path = /usr/bin/coreutils

info: current_path = /usr/bin/coreutils
error: target = ...
info: current_path = ./env
error: shebang read!
error: interpreter (real    ) = symlinked/interpreter
error: interpreter (readlink) = ../interpreter_with_relative_interpreter
error: next_path = interpreter_with_relative_interpreter

info: current_path = interpreter_with_relative_interpreter
error: shebang read!
error: interpreter (real    ) = cant_find_this_interpreter
error: interpreter (readlink) = cant_find_this_interpreter
error: next_path = cant_find_this_interpreter

info: current_path = cant_find_this_interpreter
error: shebang read!
error: interpreter (real    ) = /usr/bin/file
error: interpreter (readlink) = /usr/bin/file
error: next_path = /usr/bin/file

info: current_path = /usr/bin/file
error: target = ...
.
├── symlinked
│  └── interpreter -> ../interpreter_with_relative_interpreter
├── cant_find_this_interpreter
├── file_with_relative_symlinked_interpreter
├── interpreter_with_relative_interpreter
└── env -> file_with_relative_symlinked_interpreter

1) env a.k.a file_with_relative_symlinked_interpreter
#!symlinked/interpreter

2) symlinked/interpreter a.k.a interpreter_with_relative_interpreter
#!cant_find_this_interpreter

3) cant_find_this_interpreter
#!/usr/bin/file
echo "Hello?"

# Alternatively:

.
├── symlinked
│  └── 2 -> ../real_2
├── 3
├── env
└── real_2

Should help systems that have main `env` binary in different location
than hardcoded `/usr/bin/env` **during build** (not neccessarily always),
like Nix/Guix, Termux, Gentoo Prefix etc.

Related:
https://www.github.com/ziglang/zig/issues/12156
https://www.github.com/ziglang/zig/issues/14146
https://www.github.com/ziglang/zig/issues/14577
https://www.github.com/ziglang/zig/issues/15898

This is second attempt after https://www.github.com/ziglang/zig/pull/21540
was reverted before. In this version relative paths are handled too.

Signed-off-by: Eric Joldasov <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant