diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig index b27a3d70126d..83e7da76494b 100644 --- a/lib/std/Uri.zig +++ b/lib/std/Uri.zig @@ -134,6 +134,7 @@ pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort }; /// original `text`. Each component that is provided, will be non-`null`. pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { var reader = SliceReader{ .slice = text }; + var uri = Uri{ .scheme = "", .user = null, @@ -145,13 +146,14 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri { .fragment = null, }; - if (reader.peekPrefix("//")) { // authority part + if (reader.peekPrefix("//")) a: { // authority part std.debug.assert(reader.get().? == '/'); std.debug.assert(reader.get().? == '/'); const authority = reader.readUntil(isAuthoritySeparator); - if (authority.len == 0) - return error.InvalidFormat; + if (authority.len == 0) { + if (reader.peekPrefix("/")) break :a else return error.InvalidFormat; + } var start_of_host: usize = 0; if (std.mem.indexOf(u8, authority, "@")) |index| { @@ -224,7 +226,6 @@ pub fn format( try writer.writeAll(":"); if (uri.host) |host| { try writer.writeAll("//"); - if (uri.user) |user| { try writer.writeAll(user); if (uri.password) |password| { @@ -486,6 +487,23 @@ test "should fail gracefully" { try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://")); } +test "file" { + const parsed = try parse("file:///"); + try std.testing.expectEqualSlices(u8, "file", parsed.scheme); + try std.testing.expectEqual(@as(?[]const u8, null), parsed.host); + try std.testing.expectEqualSlices(u8, "/", parsed.path); + + const parsed2 = try parse("file:///an/absolute/path/to/something"); + try std.testing.expectEqualSlices(u8, "file", parsed2.scheme); + try std.testing.expectEqual(@as(?[]const u8, null), parsed2.host); + try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/something", parsed2.path); + + const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/"); + try std.testing.expectEqualSlices(u8, "file", parsed3.scheme); + try std.testing.expectEqualSlices(u8, "localhost", parsed3.host.?); + try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/another/thing/", parsed3.path); +} + test "scheme" { try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme); try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme); @@ -695,3 +713,20 @@ test "URI query escaping" { defer std.testing.allocator.free(formatted_uri); try std.testing.expectEqualStrings("/?response-content-type=application%2Foctet-stream", formatted_uri); } + +test "format" { + const uri = Uri{ + .scheme = "file", + .user = null, + .password = null, + .host = null, + .port = null, + .path = "/foo/bar/baz", + .query = null, + .fragment = null, + }; + var buf = std.ArrayList(u8).init(std.testing.allocator); + defer buf.deinit(); + try uri.format("+/", .{}, buf.writer()); + try std.testing.expectEqualSlices(u8, "file:/foo/bar/baz", buf.items); +} diff --git a/src/Manifest.zig b/src/Manifest.zig index 199663556d4e..2ff54e613266 100644 --- a/src/Manifest.zig +++ b/src/Manifest.zig @@ -2,8 +2,11 @@ pub const basename = "build.zig.zon"; pub const Hash = std.crypto.hash.sha2.Sha256; pub const Dependency = struct { - url: []const u8, - url_tok: Ast.TokenIndex, + location: union(enum) { + url: []const u8, + path: []const u8, + }, + location_tok: Ast.TokenIndex, hash: ?[]const u8, hash_tok: Ast.TokenIndex, }; @@ -218,12 +221,12 @@ const Parse = struct { }; var dep: Dependency = .{ - .url = undefined, - .url_tok = undefined, + .location = undefined, + .location_tok = undefined, .hash = null, .hash_tok = undefined, }; - var have_url = false; + var has_location = false; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -232,12 +235,29 @@ const Parse = struct { // things manually provides an opportunity to do any additional verification // that is desirable on a per-field basis. if (mem.eql(u8, field_name, "url")) { - dep.url = parseString(p, field_init) catch |err| switch (err) { - error.ParseFailure => continue, - else => |e| return e, + if (has_location) { + return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{}); + } + dep.location = .{ + .url = parseString(p, field_init) catch |err| switch (err) { + error.ParseFailure => continue, + else => |e| return e, + }, + }; + has_location = true; + dep.location_tok = main_tokens[field_init]; + } else if (mem.eql(u8, field_name, "path")) { + if (has_location) { + return fail(p, main_tokens[field_init], "dependency should specify only one of 'url' and 'path' fields.", .{}); + } + dep.location = .{ + .path = parseString(p, field_init) catch |err| switch (err) { + error.ParseFailure => continue, + else => |e| return e, + }, }; - dep.url_tok = main_tokens[field_init]; - have_url = true; + has_location = true; + dep.location_tok = main_tokens[field_init]; } else if (mem.eql(u8, field_name, "hash")) { dep.hash = parseHash(p, field_init) catch |err| switch (err) { error.ParseFailure => continue, @@ -250,8 +270,8 @@ const Parse = struct { } } - if (!have_url) { - try appendError(p, main_tokens[node], "dependency is missing 'url' field", .{}); + if (!has_location) { + try appendError(p, main_tokens[node], "dependency requires location field, one of 'url' or 'path'.", .{}); } return dep; diff --git a/src/Package.zig b/src/Package.zig index d170baeae509..790ddef253be 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -245,8 +245,6 @@ pub fn fetchAndAddDependencies( error.FileNotFound => { // Handle the same as no dependencies. if (this_hash) |hash| { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ hash[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); try dependencies_source.writer().print( \\ pub const {} = struct {{ \\ pub const build_root = "{}"; @@ -256,7 +254,7 @@ pub fn fetchAndAddDependencies( \\ , .{ std.zig.fmtId(hash), - std.zig.fmtEscapes(build_root), + std.zig.fmtEscapes(pkg.root_src_directory.path.?), std.zig.fmtEscapes(hash), }); } else { @@ -312,66 +310,85 @@ pub fn fetchAndAddDependencies( try dependencies_source.writer().writeAll("pub const packages = struct {\n"); } - const deps_list = manifest.dependencies.values(); - for (manifest.dependencies.keys(), 0..) |name, i| { - const dep = deps_list[i]; - - const sub = try fetchAndUnpack( - thread_pool, - http_client, - global_cache_directory, - dep, - report, - all_modules, - root_prog_node, - name, - ); - - if (sub.mod) |mod| { - if (!sub.found_existing) { - try mod.fetchAndAddDependencies( - deps_pkg, - arena, + for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, *dep| { + var fetch_location = try FetchLocation.init(gpa, dep.*, directory, report); + defer fetch_location.deinit(gpa); + + // Directories do not provide a hash in build.zig.zon. + // Hash the path to the module rather than its contents. + const sub_mod, const found_existing = if (fetch_location == .directory) + try getDirectoryModule(gpa, fetch_location, directory, all_modules, dep, report) + else + try getCachedPackage( + gpa, + global_cache_directory, + dep.*, + all_modules, + root_prog_node, + ) orelse .{ + try fetchAndUnpack( + fetch_location, thread_pool, http_client, - mod.root_src_directory, + directory, global_cache_directory, - local_cache_directory, - dependencies_source, - error_bundle, + dep.*, + report, all_modules, root_prog_node, - dep.hash.?, - ); - } + name, + ), + false, + }; - try pkg.add(gpa, name, mod); - if (deps_pkg.table.get(dep.hash.?)) |other_sub| { - // This should be the same package (and hence module) since it's the same hash - // TODO: dedup multiple versions of the same package - assert(other_sub == mod); - } else { - try deps_pkg.add(gpa, dep.hash.?, mod); - } - } else if (!sub.found_existing) { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ (dep.hash.?)[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); - try dependencies_source.writer().print( - \\ pub const {} = struct {{ - \\ pub const build_root = "{}"; - \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; - \\ }}; - \\ - , .{ - std.zig.fmtId(dep.hash.?), - std.zig.fmtEscapes(build_root), - }); + assert(dep.hash != null); + + switch (sub_mod) { + .zig_pkg => |sub_pkg| { + if (!found_existing) { + try sub_pkg.fetchAndAddDependencies( + deps_pkg, + arena, + thread_pool, + http_client, + sub_pkg.root_src_directory, + global_cache_directory, + local_cache_directory, + dependencies_source, + error_bundle, + all_modules, + root_prog_node, + dep.hash.?, + ); + } + + try pkg.add(gpa, name, sub_pkg); + if (deps_pkg.table.get(dep.hash.?)) |other_sub| { + // This should be the same package (and hence module) since it's the same hash + // TODO: dedup multiple versions of the same package + assert(other_sub == sub_pkg); + } else { + try deps_pkg.add(gpa, dep.hash.?, sub_pkg); + } + }, + .non_zig_pkg => |sub_pkg| { + if (!found_existing) { + try dependencies_source.writer().print( + \\ pub const {} = struct {{ + \\ pub const build_root = "{}"; + \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; + \\ }}; + \\ + , .{ + std.zig.fmtId(dep.hash.?), + std.zig.fmtEscapes(sub_pkg.root_src_directory.path.?), + }); + } + }, } } if (this_hash) |hash| { - const pkg_dir_sub_path = "p" ++ fs.path.sep_str ++ hash[0..hex_multihash_len]; - const build_root = try global_cache_directory.join(arena, &.{pkg_dir_sub_path}); try dependencies_source.writer().print( \\ pub const {} = struct {{ \\ pub const build_root = "{}"; @@ -380,7 +397,7 @@ pub fn fetchAndAddDependencies( \\ , .{ std.zig.fmtId(hash), - std.zig.fmtEscapes(build_root), + std.zig.fmtEscapes(pkg.root_src_directory.path.?), std.zig.fmtEscapes(hash), }); for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| { @@ -490,15 +507,296 @@ const Report = struct { } }; +const FetchLocation = union(enum) { + /// The relative path to a file or directory. + /// This may be a file that requires unpacking (such as a .tar.gz), + /// or the path to the root directory of a package. + file: []const u8, + directory: []const u8, + http_request: std.Uri, + + pub fn init(gpa: Allocator, dep: Manifest.Dependency, root_dir: Compilation.Directory, report: Report) !FetchLocation { + switch (dep.location) { + .url => |url| { + const uri = std.Uri.parse(url) catch |err| switch (err) { + error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI", .{}), + else => return err, + }; + if (ascii.eqlIgnoreCase(uri.scheme, "file")) { + return report.fail(dep.location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{}); + } + return .{ .http_request = uri }; + }, + .path => |path| { + if (fs.path.isAbsolute(path)) { + return report.fail(dep.location_tok, "Absolute paths are not allowed. Use a relative path instead", .{}); + } + + const is_dir = isDirectory(root_dir, path) catch |err| switch (err) { + error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{path}), + else => return err, + }; + + return if (is_dir) + .{ .directory = try gpa.dupe(u8, path) } + else + .{ .file = try gpa.dupe(u8, path) }; + }, + } + } + + pub fn deinit(f: *FetchLocation, gpa: Allocator) void { + switch (f.*) { + inline .file, .directory => |path| gpa.free(path), + .http_request => {}, + } + f.* = undefined; + } + + pub fn fetch( + f: FetchLocation, + gpa: Allocator, + root_dir: Compilation.Directory, + http_client: *std.http.Client, + dep: Manifest.Dependency, + report: Report, + ) !ReadableResource { + switch (f) { + .file => |file| { + const owned_path = try gpa.dupe(u8, file); + errdefer gpa.free(owned_path); + return .{ + .path = owned_path, + .resource = .{ .file = try root_dir.handle.openFile(file, .{}) }, + }; + }, + .http_request => |uri| { + var h = std.http.Headers{ .allocator = gpa }; + defer h.deinit(); + + var req = try http_client.request(.GET, uri, h, .{}); + errdefer req.deinit(); + + try req.start(.{}); + try req.wait(); + + if (req.response.status != .ok) { + return report.fail(dep.location_tok, "Expected response status '200 OK' got '{} {s}'", .{ + @intFromEnum(req.response.status), + req.response.status.phrase() orelse "", + }); + } + + return .{ + .path = try gpa.dupe(u8, uri.path), + .resource = .{ .http_request = req }, + }; + }, + .directory => unreachable, // Directories do not require fetching + } + } +}; + +const ReadableResource = struct { + path: []const u8, + resource: union(enum) { + file: fs.File, + http_request: std.http.Client.Request, + }, + + /// Unpack the package into the global cache directory. + /// If `ps` does not require unpacking (for example, if it is a directory), then no caching is performed. + /// In either case, the hash is computed and returned along with the path to the package. + pub fn unpack( + rr: *ReadableResource, + allocator: Allocator, + thread_pool: *ThreadPool, + global_cache_directory: Compilation.Directory, + dep: Manifest.Dependency, + report: Report, + pkg_prog_node: *std.Progress.Node, + ) !PackageLocation { + switch (rr.resource) { + inline .file, .http_request => |*r| { + const s = fs.path.sep_str; + const rand_int = std.crypto.random.int(u64); + const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + + const actual_hash = h: { + var tmp_directory: Compilation.Directory = d: { + const path = try global_cache_directory.join(allocator, &.{tmp_dir_sub_path}); + errdefer allocator.free(path); + + const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); + errdefer iterable_dir.close(); + + break :d .{ + .path = path, + .handle = iterable_dir.dir, + }; + }; + defer tmp_directory.closeAndFree(allocator); + + const opt_content_length = try rr.getSize(); + + var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{ + .child_reader = r.reader(), + .prog_node = pkg_prog_node, + .unit = if (opt_content_length) |content_length| unit: { + const kib = content_length / 1024; + const mib = kib / 1024; + if (mib > 0) { + pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); + pkg_prog_node.setUnit("MiB"); + break :unit .mib; + } else { + pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); + pkg_prog_node.setUnit("KiB"); + break :unit .kib; + } + } else .any, + }; + pkg_prog_node.context.refresh(); + + switch (try rr.getFileType(dep, report)) { + .@"tar.gz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.gzip), + // I have not checked what buffer sizes the xz decompression implementation uses + // by default, so the same logic applies for buffering the reader as for gzip. + .@"tar.xz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.xz), + } + + // Unpack completed - stop showing amount as progress + pkg_prog_node.setEstimatedTotalItems(0); + pkg_prog_node.setCompletedItems(0); + pkg_prog_node.context.refresh(); + + // TODO: delete files not included in the package prior to computing the package hash. + // for example, if the ini file has directives to include/not include certain files, + // apply those rules directly to the filesystem right here. This ensures that files + // not protected by the hash are not present on the file system. + + break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); + }; + + const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); + const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path}); + defer allocator.free(unpacked_path); + + const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path); + errdefer allocator.free(relative_unpacked_path); + try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path); + + return .{ + .hash = actual_hash, + .relative_unpacked_path = relative_unpacked_path, + }; + }, + } + } + + const FileType = enum { + @"tar.gz", + @"tar.xz", + }; + + pub fn getSize(rr: ReadableResource) !?u64 { + switch (rr.resource) { + // TODO: Handle case of chunked content-length + .http_request => |req| return req.response.content_length, + .file => |f| return (try f.metadata()).size(), + } + } + + pub fn getFileType(rr: ReadableResource, dep: Manifest.Dependency, report: Report) !FileType { + switch (rr.resource) { + .file => { + return fileTypeFromPath(rr.path) orelse + return report.fail(dep.location_tok, "Unknown file type", .{}); + }, + .http_request => |req| { + const content_type = req.response.headers.getFirstValue("Content-Type") orelse + return report.fail(dep.location_tok, "Missing 'Content-Type' header", .{}); + + // If the response has a different content type than the URI indicates, override + // the previously assumed file type. + return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or + ascii.eqlIgnoreCase(content_type, "application/x-gzip") or + ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) + .@"tar.gz" + else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) + .@"tar.xz" + else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) ty: { + // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz + // whose content-disposition header is: 'attachment; filename="-.tar.gz"' + const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse + return report.fail(dep.location_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); + break :ty getAttachmentType(content_disposition) orelse + return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); + } else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type}); + }, + } + } + + fn fileTypeFromPath(file_path: []const u8) ?FileType { + return if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) + .@"tar.gz" + else if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) + .@"tar.xz" + else + null; + } + + fn getAttachmentType(content_disposition: []const u8) ?FileType { + const disposition_type_end = ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return null; + + var value_start = ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return null; + value_start += "filename".len; + if (content_disposition[value_start] == '*') { + value_start += 1; + } + if (content_disposition[value_start] != '=') return null; + value_start += 1; + + var value_end = mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len; + if (content_disposition[value_end - 1] == '\"') { + value_end -= 1; + } + return fileTypeFromPath(content_disposition[value_start..value_end]); + } + + pub fn deinit(rr: *ReadableResource, gpa: Allocator) void { + gpa.free(rr.path); + switch (rr.resource) { + .file => |file| file.close(), + .http_request => |*req| req.deinit(), + } + rr.* = undefined; + } +}; + +pub const PackageLocation = struct { + /// For packages that require unpacking, this is the hash of the package contents. + /// For directories, this is the hash of the absolute file path. + hash: [Manifest.Hash.digest_length]u8, + relative_unpacked_path: []const u8, + + pub fn deinit(pl: *PackageLocation, allocator: Allocator) void { + allocator.free(pl.relative_unpacked_path); + pl.* = undefined; + } +}; + const hex_multihash_len = 2 * Manifest.multihash_len; const MultiHashHexDigest = [hex_multihash_len]u8; + +const DependencyModule = union(enum) { + zig_pkg: *Package, + non_zig_pkg: *Package, +}; /// This is to avoid creating multiple modules for the same build.zig file. /// If the value is `null`, the package is a known dependency, but has not yet /// been fetched. -pub const AllModules = std.AutoHashMapUnmanaged(MultiHashHexDigest, ?union(enum) { - zig_pkg: *Package, - non_zig_pkg: void, -}); +pub const AllModules = std.AutoHashMapUnmanaged(MultiHashHexDigest, ?DependencyModule); fn ProgressReader(comptime ReaderType: type) type { return struct { @@ -542,29 +840,27 @@ fn ProgressReader(comptime ReaderType: type) type { }; } -fn fetchAndUnpack( - thread_pool: *ThreadPool, - http_client: *std.http.Client, +/// Get a cached package if it exists. +/// Returns `null` if the package has not been cached +/// If the package exists in the cache, returns a pointer to the package and a +/// boolean indicating whether this package has already been seen in the build +/// (i.e. whether or not its transitive dependencies have been fetched). +fn getCachedPackage( + gpa: Allocator, global_cache_directory: Compilation.Directory, dep: Manifest.Dependency, - report: Report, all_modules: *AllModules, root_prog_node: *std.Progress.Node, - /// This does not have to be any form of canonical or fully-qualified name: it - /// is only intended to be human-readable for progress reporting. - name_for_prog: []const u8, -) !struct { mod: ?*Package, found_existing: bool } { - const gpa = http_client.allocator; +) !?struct { DependencyModule, bool } { const s = fs.path.sep_str; - // Check if the expected_hash is already present in the global package // cache, and thereby avoid both fetching and unpacking. - if (dep.hash) |h| cached: { + if (dep.hash) |h| { const hex_digest = h[0..hex_multihash_len]; const pkg_dir_sub_path = "p" ++ s ++ hex_digest; var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) { - error.FileNotFound => break :cached, + error.FileNotFound => return null, else => |e| return e, }; errdefer pkg_dir.close(); @@ -574,162 +870,99 @@ fn fetchAndUnpack( const gop = try all_modules.getOrPut(gpa, hex_digest.*); if (gop.found_existing) { if (gop.value_ptr.*) |mod| { - return switch (mod) { - .zig_pkg => |pkg| .{ - .mod = pkg, - .found_existing = true, - }, - .non_zig_pkg => .{ - .mod = null, - .found_existing = true, - }, - }; + return .{ mod, true }; } } - pkg_dir.access(build_zig_basename, .{}) catch { - gop.value_ptr.* = .non_zig_pkg; - return .{ - .mod = null, - .found_existing = false, - }; - }; - - const build_root = try global_cache_directory.join(gpa, &.{pkg_dir_sub_path}); - errdefer gpa.free(build_root); - root_prog_node.completeOne(); - const ptr = try gpa.create(Package); - errdefer gpa.destroy(ptr); + const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; + const basename = if (is_zig_mod) build_zig_basename else ""; + const pkg = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, basename); - const owned_src_path = try gpa.dupe(u8, build_zig_basename); - errdefer gpa.free(owned_src_path); + const module: DependencyModule = if (is_zig_mod) + .{ .zig_pkg = pkg } + else + .{ .non_zig_pkg = pkg }; - ptr.* = .{ - .root_src_directory = .{ - .path = build_root, - .handle = pkg_dir, - }, - .root_src_directory_owned = true, - .root_src_path = owned_src_path, - }; - - gop.value_ptr.* = .{ .zig_pkg = ptr }; - return .{ - .mod = ptr, - .found_existing = false, - }; + try all_modules.put(gpa, hex_digest.*, module); + return .{ module, false }; } - var pkg_prog_node = root_prog_node.start(name_for_prog, 0); - defer pkg_prog_node.end(); - pkg_prog_node.activate(); - pkg_prog_node.context.refresh(); - - const uri = try std.Uri.parse(dep.url); - - const rand_int = std.crypto.random.int(u64); - const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + return null; +} - const actual_hash = a: { - var tmp_directory: Compilation.Directory = d: { - const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path}); - errdefer gpa.free(path); +fn getDirectoryModule( + gpa: Allocator, + fetch_location: FetchLocation, + directory: Compilation.Directory, + all_modules: *AllModules, + dep: *Manifest.Dependency, + report: Report, +) !struct { DependencyModule, bool } { + assert(fetch_location == .directory); - const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); - errdefer iterable_dir.close(); + if (dep.hash != null) { + return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); + } - break :d .{ - .path = path, - .handle = iterable_dir.dir, - }; - }; - defer tmp_directory.closeAndFree(gpa); + const hash = try computePathHash(gpa, directory, fetch_location.directory); + const hex_digest = Manifest.hexDigest(hash); + dep.hash = try gpa.dupe(u8, &hex_digest); - var h = std.http.Headers{ .allocator = gpa }; - defer h.deinit(); + // There is no fixed location to check for directory modules. + // Instead, check whether it is already listed in all_modules. + if (all_modules.get(hex_digest)) |mod| return .{ mod.?, true }; - var req = try http_client.request(.GET, uri, h, .{}); - defer req.deinit(); + var pkg_dir = directory.handle.openDir(fetch_location.directory, .{}) catch |err| switch (err) { + error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{fetch_location.directory}), + else => |e| return e, + }; + defer pkg_dir.close(); - try req.start(.{}); - try req.wait(); + const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; + const basename = if (is_zig_mod) build_zig_basename else ""; - if (req.response.status != .ok) { - return report.fail(dep.url_tok, "Expected response status '200 OK' got '{} {s}'", .{ - @intFromEnum(req.response.status), - req.response.status.phrase() orelse "", - }); - } + const pkg = try createWithDir(gpa, directory, fetch_location.directory, basename); + const module: DependencyModule = if (is_zig_mod) + .{ .zig_pkg = pkg } + else + .{ .non_zig_pkg = pkg }; - const content_type = req.response.headers.getFirstValue("Content-Type") orelse - return report.fail(dep.url_tok, "Missing 'Content-Type' header", .{}); - - var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{ - .child_reader = req.reader(), - .prog_node = &pkg_prog_node, - .unit = if (req.response.content_length) |content_length| unit: { - const kib = content_length / 1024; - const mib = kib / 1024; - if (mib > 0) { - pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); - pkg_prog_node.setUnit("MiB"); - break :unit .mib; - } else { - pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); - pkg_prog_node.setUnit("KiB"); - break :unit .kib; - } - } else .any, - }; - pkg_prog_node.context.refresh(); - - if (ascii.eqlIgnoreCase(content_type, "application/gzip") or - ascii.eqlIgnoreCase(content_type, "application/x-gzip") or - ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) - { - // I observed the gzip stream to read 1 byte at a time, so I am using a - // buffered reader on the front of it. - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip); - } else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) { - // I have not checked what buffer sizes the xz decompression implementation uses - // by default, so the same logic applies for buffering the reader as for gzip. - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz); - } else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) { - // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz - // whose content-disposition header is: 'attachment; filename="-.tar.gz"' - const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse - return report.fail(dep.url_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); - if (isTarAttachment(content_disposition)) { - try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip); - } else return report.fail(dep.url_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); - } else { - return report.fail(dep.url_tok, "Unsupported 'Content-Type' header value: '{s}'", .{content_type}); - } + try all_modules.put(gpa, hex_digest, module); + return .{ module, false }; +} - // Download completed - stop showing downloaded amount as progress - pkg_prog_node.setEstimatedTotalItems(0); - pkg_prog_node.setCompletedItems(0); - pkg_prog_node.context.refresh(); +fn fetchAndUnpack( + fetch_location: FetchLocation, + thread_pool: *ThreadPool, + http_client: *std.http.Client, + directory: Compilation.Directory, + global_cache_directory: Compilation.Directory, + dep: Manifest.Dependency, + report: Report, + all_modules: *AllModules, + root_prog_node: *std.Progress.Node, + /// This does not have to be any form of canonical or fully-qualified name: it + /// is only intended to be human-readable for progress reporting. + name_for_prog: []const u8, +) !DependencyModule { + assert(fetch_location == .file or fetch_location == .http_request); - // TODO: delete files not included in the package prior to computing the package hash. - // for example, if the ini file has directives to include/not include certain files, - // apply those rules directly to the filesystem right here. This ensures that files - // not protected by the hash are not present on the file system. + const gpa = http_client.allocator; - // TODO: raise an error for files that have illegal paths on some operating systems. - // For example, on Linux a path with a backslash should raise an error here. - // Of course, if the ignore rules above omit the file from the package, then everything - // is fine and no error should be raised. + var pkg_prog_node = root_prog_node.start(name_for_prog, 0); + defer pkg_prog_node.end(); + pkg_prog_node.activate(); + pkg_prog_node.context.refresh(); - break :a try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); - }; + var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); + defer readable_resource.deinit(gpa); - const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); - try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, pkg_dir_sub_path); + var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node); + defer package_location.deinit(gpa); - const actual_hex = Manifest.hexDigest(actual_hash); + const actual_hex = Manifest.hexDigest(package_location.hash); if (dep.hash) |h| { if (!mem.eql(u8, h, &actual_hex)) { return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ @@ -743,9 +976,9 @@ fn fetchAndUnpack( const eb = report.error_bundle; const notes_len = 1; try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ - .tok = dep.url_tok, + .tok = dep.location_tok, .off = 0, - .msg = "url field is missing corresponding hash field", + .msg = "dependency is missing hash field", }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ @@ -754,35 +987,28 @@ fn fetchAndUnpack( return error.PackageFetchFailed; } - const build_zig_path = try std.fs.path.join(gpa, &.{ pkg_dir_sub_path, build_zig_basename }); + const build_zig_path = try fs.path.join(gpa, &.{ package_location.relative_unpacked_path, build_zig_basename }); defer gpa.free(build_zig_path); - global_cache_directory.handle.access(build_zig_path, .{}) catch |err| switch (err) { - error.FileNotFound => { - try all_modules.put(gpa, actual_hex, .non_zig_pkg); - return .{ - .mod = null, - .found_existing = false, - }; - }, - else => return err, - }; + const is_zig_mod = if (global_cache_directory.handle.access(build_zig_path, .{})) |_| true else |_| false; + const basename = if (is_zig_mod) build_zig_basename else ""; + const pkg = try createWithDir(gpa, global_cache_directory, package_location.relative_unpacked_path, basename); + const module: DependencyModule = if (is_zig_mod) + .{ .zig_pkg = pkg } + else + .{ .non_zig_pkg = pkg }; - const mod = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, build_zig_basename); - try all_modules.put(gpa, actual_hex, .{ .zig_pkg = mod }); - return .{ - .mod = mod, - .found_existing = false, - }; + try all_modules.put(gpa, actual_hex, module); + return module; } fn unpackTarball( gpa: Allocator, - req_reader: anytype, + reader: anytype, out_dir: fs.Dir, comptime compression: type, ) !void { - var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req_reader); + var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var decompress = try compression.decompress(gpa, br.reader()); defer decompress.deinit(); @@ -873,6 +1099,24 @@ fn computePackageHash( return hasher.finalResult(); } +/// Compute the hash of a file path. +fn computePathHash(gpa: Allocator, dir: Compilation.Directory, path: []const u8) ![Manifest.Hash.digest_length]u8 { + const resolved_path = try std.fs.path.resolve(gpa, &.{ dir.path.?, path }); + defer gpa.free(resolved_path); + var hasher = Manifest.Hash.init(.{}); + hasher.update(resolved_path); + return hasher.finalResult(); +} + +fn isDirectory(root_dir: Compilation.Directory, path: []const u8) !bool { + var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) { + error.NotDir => return false, + else => return err, + }; + defer dir.close(); + return true; +} + /// Make a file system path identical independently of operating system path inconsistencies. /// This converts backslashes into forward slashes. fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { @@ -953,36 +1197,18 @@ fn renameTmpIntoCache( } } -fn isTarAttachment(content_disposition: []const u8) bool { - const disposition_type_end = ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return false; - - var value_start = ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return false; - value_start += "filename".len; - if (content_disposition[value_start] == '*') { - value_start += 1; - } - if (content_disposition[value_start] != '=') return false; - value_start += 1; - - var value_end = mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len; - if (content_disposition[value_end - 1] == '\"') { - value_end -= 1; - } - return ascii.endsWithIgnoreCase(content_disposition[value_start..value_end], ".tar.gz"); -} - -test "isTarAttachment" { - try std.testing.expect(isTarAttachment("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); - try std.testing.expect(isTarAttachment("attachment; filename*=\"stuff.tar.gz\"")); - try std.testing.expect(isTarAttachment("ATTACHMENT; filename=\"stuff.tar.gz\"")); - try std.testing.expect(isTarAttachment("attachment; FileName=\"stuff.tar.gz\"")); - try std.testing.expect(isTarAttachment("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); - - try std.testing.expect(!isTarAttachment("attachment FileName=\"stuff.tar.gz\"")); - try std.testing.expect(!isTarAttachment("attachment; FileName=\"stuff.tar\"")); - try std.testing.expect(!isTarAttachment("attachment; FileName\"stuff.gz\"")); - try std.testing.expect(!isTarAttachment("attachment; size=42")); - try std.testing.expect(!isTarAttachment("inline; size=42")); - try std.testing.expect(!isTarAttachment("FileName=\"stuff.tar.gz\"; attachment;")); - try std.testing.expect(!isTarAttachment("FileName=\"stuff.tar.gz\";")); +test "getAttachmentType" { + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; filename*=\"stuff.tar.gz\"")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("ATTACHMENT; filename=\"stuff.tar.xz\"")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar.xz\"")); + try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); + + try std.testing.expect(ReadableResource.getAttachmentType("attachment FileName=\"stuff.tar.gz\"") == null); + try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar\"") == null); + try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName\"stuff.gz\"") == null); + try std.testing.expect(ReadableResource.getAttachmentType("attachment; size=42") == null); + try std.testing.expect(ReadableResource.getAttachmentType("inline; size=42") == null); + try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\"; attachment;") == null); + try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\";") == null); }