/
BinLinkingShim.zig
310 lines (262 loc) · 10.4 KB
/
BinLinkingShim.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
//! This struct is used by bun.exe to encode `.bunx` files, to be consumed
//! by the shim 'bun_shim_impl.exe'. The latter exe does not include this code.
//!
//! The format is as follows:
//!
//! [WSTR:bin_path][u16'"'][u16:0](shebang?)[flags:u16]
//!
//! if shebang:
//! [WSTR:program][u16:0][WSTR:args][u32:bin_path_byte_len][u32:arg_byte_len]
//! - args always ends with a trailing space
//!
//! See 'bun_shim_impl.zig' for more details on how this file is consumed.
const std = @import("std");
const bun = @import("root").bun;
const simdutf = bun.simdutf;
const lastIndexOfScalar = std.mem.lastIndexOfScalar;
fn eqlComptime(a: []const u8, comptime b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
/// Relative to node_modules. Do not include slash
bin_path: []const u16,
/// Information found within the target file's shebang
shebang: ?Shebang,
/// Random numbers are chosen for validation purposes
/// These arbitrary numbers will probably not show up in the other fields.
/// This will reveal off-by-one mistakes.
pub const VersionFlag = enum(u13) {
pub const current = .v5;
v1 = 5474,
/// Fix bug where paths were not joined correctly
v2 = 5475,
/// Added an error message for when the process is not found
v3 = 5476,
/// Added a flag to tell if the shebang is exactly "node" This is used in an
/// automatic fallback path where if "node" is asked for, but not present,
/// it will retry the spawn with "bun".
v4 = 5477,
/// Fixed bugs where passing arguments did not always work.
v5 = 5478,
_,
};
pub const Flags = packed struct(u16) {
// this is set if the shebang content is "node" or "bun"
is_node_or_bun: bool,
// this is for validation that the shim is not corrupt and to detect offset memory reads
is_node: bool,
// indicates if a shebang is present
has_shebang: bool,
version_tag: VersionFlag = VersionFlag.current,
pub fn isValid(flags: Flags) bool {
const mask: u16 = @bitCast(Flags{
.is_node_or_bun = false,
.is_node = false,
.has_shebang = false,
.version_tag = @enumFromInt(std.math.maxInt(u13)),
});
const compare_to: u16 = @bitCast(Flags{
.is_node_or_bun = false,
.is_node = false,
.has_shebang = false,
});
return (@as(u16, @bitCast(flags)) & comptime mask) == comptime compare_to;
}
};
pub const embedded_executable_data = @embedFile("./bun_shim_impl.exe");
fn wU8(comptime s: []const u8) []const u8 {
const str = std.unicode.utf8ToUtf16LeStringLiteral(s);
return @alignCast(std.mem.sliceAsBytes(str));
}
pub const Shebang = struct {
launcher: []const u8,
utf16_len: u32,
is_node_or_bun: bool,
pub fn init(launcher: []const u8, is_node_or_bun: bool) !Shebang {
return .{
.launcher = launcher,
// TODO(@paperdave): what if this is invalid utf8?
.utf16_len = @intCast(bun.simdutf.length.utf16.from.utf8(launcher)),
.is_node_or_bun = is_node_or_bun,
};
}
const ExtensionType = enum {
run_with_bun,
run_with_cmd,
run_with_powershell,
};
const BunExtensions = std.ComptimeStringMap(ExtensionType, .{
.{ wU8(".js"), .run_with_bun },
.{ wU8(".mjs"), .run_with_bun },
.{ wU8(".cjs"), .run_with_bun },
.{ wU8(".jsx"), .run_with_bun },
.{ wU8(".ts"), .run_with_bun },
.{ wU8(".cts"), .run_with_bun },
.{ wU8(".mts"), .run_with_bun },
.{ wU8(".tsx"), .run_with_bun },
.{ wU8(".sh"), .run_with_bun },
.{ wU8(".cmd"), .run_with_cmd },
.{ wU8(".bat"), .run_with_cmd },
.{ wU8(".ps1"), .run_with_powershell },
});
/// std.fs.path.basename but utf16
fn basenameW(path: []const u16) []const u16 {
if (path.len == 0)
return &[_]u16{};
var end_index: usize = path.len - 1;
while (true) {
const byte = path[end_index];
if (byte == '/' or byte == '\\') {
if (end_index == 0)
return &[_]u16{};
end_index -= 1;
continue;
}
if (byte == ':' and end_index == 1) {
return &[_]u16{};
}
break;
}
var start_index: usize = end_index;
end_index += 1;
while (path[start_index] != '/' and path[start_index] != '\\' and
!(path[start_index] == ':' and start_index == 1))
{
if (start_index == 0)
return path[0..end_index];
start_index -= 1;
}
return path[start_index + 1 .. end_index];
}
/// std.fs.path.extension but utf16
pub fn extensionW(path: []const u16) []const u16 {
const filename = basenameW(path);
const index = lastIndexOfScalar(u16, filename, '.') orelse return path[path.len..];
if (index == 0) return path[path.len..];
return filename[index..];
}
pub fn parseFromBinPath(bin_path: []const u16) ?Shebang {
if (BunExtensions.get(@alignCast(std.mem.sliceAsBytes(extensionW(bin_path))))) |i| {
return switch (i) {
.run_with_bun => comptime Shebang.init("bun run", true) catch unreachable,
.run_with_cmd => comptime Shebang.init("cmd /c", false) catch unreachable,
.run_with_powershell => comptime Shebang.init("powershell -ExecutionPolicy Bypass -File", false) catch unreachable,
};
}
return null;
}
/// `32766` is taken from `CreateProcessW` docs. One less to account for the null terminator
/// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessw#parameters
pub const max_shebang_input_length = (32766) + "#!".len;
/// Given the start of a file, parse the shebang
/// Output contains slices that point into the input buffer
///
/// Since a command line cannot be longer than 32766 characters,
/// this function does not accept inputs longer than `max_shebang_input_length`
pub fn parse(contents_maybe_overflow: []const u8, bin_path: []const u16) !?Shebang {
const contents = contents_maybe_overflow[0..@min(contents_maybe_overflow.len, max_shebang_input_length)];
if (contents.len < 3) {
return parseFromBinPath(bin_path);
}
if (contents[0] != '#' or contents[1] != '!') {
return parseFromBinPath(bin_path);
}
const line = line: {
var line_i = bun.strings.indexOfCharUsize(contents, '\n') orelse return parseFromBinPath(bin_path);
std.debug.assert(line_i >= 1);
if (contents[line_i - 1] == '\r') {
line_i -= 1;
}
break :line contents[2..line_i];
};
var tokenizer = std.mem.tokenizeScalar(u8, line, ' ');
const first = tokenizer.next() orelse return parseFromBinPath(bin_path);
if (eqlComptime(first, "/usr/bin/env") or eqlComptime(first, "/bin/env")) {
const rest = tokenizer.rest();
const program = tokenizer.next() orelse return parseFromBinPath(bin_path);
const is_node_or_bun = eqlComptime(program, "bun") or eqlComptime(program, "node");
return try Shebang.init(rest, is_node_or_bun);
}
return try Shebang.init(line, false);
}
pub fn encodedLength(shebang: Shebang) usize {
return (" ".len + shebang.utf16_len) * @sizeOf(u16) +
@sizeOf(u32) * 2;
}
};
pub fn encodedLength(options: @This()) usize {
const l = ((options.bin_path.len + "\" ".len) * @sizeOf(u16)) +
@sizeOf(Flags) +
if (options.shebang) |s| s.encodedLength() else 0;
std.debug.assert(l % 2 == 0);
return l;
}
/// The buffer must be exactly the correct length given by encodedLength
pub fn encodeInto(options: @This(), buf: []u8) !void {
std.debug.assert(buf.len == options.encodedLength());
std.debug.assert(options.bin_path[0] != '/');
var wbuf = @as([*]u16, @alignCast(@ptrCast(&buf[0])))[0 .. buf.len / 2];
@memcpy(wbuf[0..options.bin_path.len], options.bin_path);
wbuf = wbuf[options.bin_path.len..];
wbuf[0] = '"';
wbuf[1] = 0;
wbuf = wbuf[2..];
const is_node_or_bun = if (options.shebang) |s| s.is_node_or_bun else false;
var flags = Flags{
.has_shebang = options.shebang != null,
.is_node_or_bun = is_node_or_bun,
.is_node = false,
};
if (options.shebang) |s| {
flags.is_node = bun.strings.hasPrefixComptime(s.launcher, "node") and
(s.launcher.len == 4 or s.launcher[4] == ' ');
if (flags.is_node) std.debug.assert(flags.is_node_or_bun);
const encoded = bun.strings.convertUTF8toUTF16InBuffer(
wbuf[0..s.utf16_len],
s.launcher,
);
std.debug.assert(encoded.len == s.utf16_len);
wbuf = wbuf[s.utf16_len..];
wbuf[0] = ' ';
wbuf = wbuf[1..];
@as(*align(1) u32, @ptrCast(&wbuf[0])).* = @intCast(options.bin_path.len * 2);
@as(*align(1) u32, @ptrCast(&wbuf[2])).* = (s.utf16_len) * 2 + 2; // include the spaces!
wbuf = wbuf[(@sizeOf(u32) * 2) / @sizeOf(u16) ..];
}
@as(*align(1) Flags, @ptrCast(&wbuf[0])).* = flags;
wbuf = wbuf[@sizeOf(Flags) / @sizeOf(u16) ..];
if (@import("builtin").mode == .Debug) {
if (wbuf.len != 0) std.debug.panic("wbuf.len != 0, got {d}", .{wbuf.len});
}
}
const Decoded = struct {
bin_path: []const u16,
flags: Flags,
};
pub fn looseDecode(input: []const u8) ?Decoded {
if (input.len < @sizeOf(Flags) + 2 * @sizeOf(u32) + 8) {
return null;
}
const flags = @as(*align(1) const Flags, @ptrCast(&input[input.len - @sizeOf(Flags)])).*;
if (!flags.isValid()) {
return null;
}
const bin_path_u8 = if (flags.has_shebang) bin_path_u8: {
const bin_path_byte_len = @as(*align(1) const u32, @ptrCast(&input[input.len - @sizeOf(Flags) - 2 * @sizeOf(u32)])).*;
if (bin_path_byte_len % 2 != 0) {
return null;
}
if (bin_path_byte_len > (input.len - 8)) {
return null;
}
break :bin_path_u8 input[0..bin_path_byte_len];
} else (
// path slice is 0..flags-2
input[0 .. input.len - @sizeOf(Flags)]);
if (bin_path_u8.len % 2 != 0) {
return null;
}
return .{
.bin_path = bun.reinterpretSlice(u16, bin_path_u8),
.flags = flags,
};
}