Skip to content

Commit

Permalink
Add amalgamation script
Browse files Browse the repository at this point in the history
  • Loading branch information
glebm committed Aug 24, 2019
1 parent baba790 commit b9b4080
Show file tree
Hide file tree
Showing 8 changed files with 5,492 additions and 2 deletions.
21 changes: 19 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,16 @@ ifneq (Cygwin,$(UNAME))
endif
endif

AMALGAM ?= 0
AMALGAM_SOURCE := build/libsass-amalgam.cpp
include Makefile.conf
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
ifeq (1,$(AMALGAM))
OBJECTS = $(AMALGAM_SOURCE:.cpp=.o)
COBJECTS =
else
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
endif
RCOBJECTS = $(RESOURCES:.rc=.o)

DEBUG_LVL ?= NONE
Expand All @@ -184,6 +191,7 @@ CLEANUPS += $(RCOBJECTS)
CLEANUPS += $(COBJECTS)
CLEANUPS += $(OBJECTS)
CLEANUPS += $(LIBSASS_LIB)
CLEANUPS += $(AMALGAM_SOURCE)

all: $(BUILD)

Expand All @@ -199,6 +207,15 @@ debug-shared: CFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$
debug-shared: CXXFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$(CXXFLAGS))
debug-shared: shared

space := $(null) #
comma := ,

script/amalgamate/build/amalgamate: script/amalgamate/amalgamate.cpp
$(MAKE) -C script/amalgamate build/amalgamate

$(AMALGAM_SOURCE): $(addprefix src/,$(SOURCES)) $(addprefix src/,$(CSOURCES)) script/amalgamate/build/amalgamate
$(MAKE) -C script/amalgamate OUT=../../$(AMALGAM_SOURCE)

lib:
$(MKDIR) lib

Expand Down
1 change: 1 addition & 0 deletions script/amalgamate/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
BasedOnStyle: Google
24 changes: 24 additions & 0 deletions script/amalgamate/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
CXX ?= c++

CXXFLAGS := -std=c++11
CXXFLAGS_OPT := $(CXXFLAGS) -O2 -s
CXXFLAGS_DBG := $(CXXFLAGS) -fsanitize=address -g -O1 -fno-omit-frame-pointer
CXXFLAGS_FASTBUILD := $(CXXFLAGS) -O0 -s

EXTS ?=
EXCLUDE ?=
OUT ?=

amalgamate: build/amalgamate
build/amalgamate --root=../../ --exts='$(EXTS)' --exclude='$(EXCLUDE)' --out=$(OUT)

build:
@mkdir build

build/amalgamate: amalgamate.cpp | build
$(CXX) $(CXXFLAGS_FASTBUILD) -o build/amalgamate amalgamate.cpp

clean: | build
rm -rf build

.PHONY: amalgamate clean
28 changes: 28 additions & 0 deletions script/amalgamate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# LibSass amalgamation script

This script concatenates LibSass sources into a single file.

This reduces single-core compilation time by 50% and the output shared library size by 10%.

SQLite has a great writeup on amalgamation here:
<https://www.sqlite.org/amalgamation.html>.

With amalgamation:

~~~bash
rm -f script/amalgamate/build/amalgamate && make clean AMALGAM=1 && \
time make lib/libsass.so AMALGAM=1 && du -sh lib/libsass.so
~~~

Compilation time (1 core): 30s
`lib/libsass.so` size: 3.0M

Without amalgamation:

~~~bash
make clean AMALGAM=0 && time make -j`nproc` lib/libsass.so AMALGAM=0 && du -sh lib/libsass.so
~~~

Compilation time (1 core): 60s
Compilation time (8 cores): 16s
`lib/libsass.so` size: 3.3M
276 changes: 276 additions & 0 deletions script/amalgamate/amalgamate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <iostream>
#include <regex>
#include <unordered_map>
#include <unordered_set>

#include "filesystem-polyfill/ghc/filesystem.hpp"

// #define DEBUG 0

namespace {

namespace fs = ghc::filesystem;

std::string ReadFile(const fs::path &path) {
std::ifstream ifs(path, std::ios::binary);
return std::string(std::istreambuf_iterator<char>(ifs), {});
}

struct IncludeStatement {
std::string relpath;
std::size_t line_begin;
std::size_t line_end;
bool duplicate;
};

struct FileData {
std::string contents;
std::vector<IncludeStatement> includes;
};

class Amalgamator {
public:
Amalgamator(fs::path root_dir, fs::path src_dir,
std::vector<std::string> exts,
std::unordered_set<std::string> exclude, std::ostream &out)
: root_dir_(std::move(root_dir)),
src_dir_(src_dir),
exts_(exts),
exclude_(exclude),
out_(out) {}

void Amalgamate() {
LoadFiles();

#ifdef DEBUG
const auto log_strings = [](const char *name,
const std::vector<std::string> &xs) {
std::cerr << name << " (" << xs.size() << "):";
for (const std::string &x : xs) std::cerr << " " << x;
std::cerr << std::endl;
};

log_strings("Files", files_);
#endif

for (auto &it : files_data_) {
AnalyzeIncludes(it.first, &it.second);
}

#ifdef DEBUG
log_strings("Sorted files", files_);
#endif

std::unordered_set<std::string> written;
for (const auto &file : files_) {
WriteReplaceIncludes(out_, file, /*parent=*/"", &written);
}
out_.flush();
}

private:
void WriteReplaceIncludes(std::ostream &out, const std::string relpath,
const std::string &parent,
std::unordered_set<std::string> *written) {
if (written->find(relpath) != written->end()) return;
written->insert(relpath);
out << "/* AMALGAM: " << relpath;
if (!parent.empty()) out << " included from " << parent;
out << " */ \n";
const auto &data = files_data_.at(relpath);
std::size_t prev = 0;
for (const auto &incl : data.includes) {
out.write(data.contents.data() + prev, incl.line_begin - prev);
WriteReplaceIncludes(out, incl.relpath, relpath, written);
prev = incl.line_end;
}
out.write(data.contents.data() + prev, data.contents.size() - prev);
if (data.contents.empty() || data.contents.back() != '\n') {
out.write("\n", 1);
}
}

void LoadFiles() {
for (const auto &entry : fs::recursive_directory_iterator(src_dir_)) {
if (!entry.is_regular_file()) continue;
const auto ext = entry.path().extension();
if (std::find(exts_.begin(), exts_.end(), ext) == exts_.end()) continue;

std::string relpath = PathToSrcRelativeUnix(entry.path());
if (exclude_.find(relpath) != exclude_.end()) continue;

files_.emplace_back(relpath);
files_data_[std::move(relpath)] = {ReadFile(entry.path())};
}
std::sort(files_.begin(), files_.end());
}

void AnalyzeIncludes(const std::string &relpath, FileData *data) {
static const auto *const kIncludeRegex = new std::regex(
R"([ \t]*#include (<[^"\n>]*>|"[^"\n]*")[^\n]*\n)",
std::regex::optimize);
const char *const s = data->contents.c_str();
std::size_t pos = 0;
std::unordered_set<std::string> found_includes;
for (std::cmatch m; std::regex_search(s + pos, m, *kIncludeRegex);
pos += m.position() + m.length()) {
// Match only beginning of line:
if (!(pos + m.position() == 0 || s[pos + m.position() - 1] == '\n')) {
continue;
}
if (m[1].length() < 3) {
throw std::runtime_error("Invalid include: " + m.str());
}
std::string include(m[1].first + 1, m[1].length() - 2);

auto found = ResolveInclude(relpath, include);
if (found.empty()) continue;
if (found == relpath) {
std::cerr << "WARNING: Self-include in " << relpath << std::endl;
continue;
}
const bool duplicate = found_includes.find(found) != found_includes.end();
if (duplicate) {
std::cerr << "WARNING: Duplicate #include of " << found << " in "
<< relpath << std::endl;
}
found_includes.insert(found);
data->includes.push_back({std::move(found), pos + m.position(),
pos + m.position() + m.length(), duplicate});
}

#ifdef DEBUG
if (data->includes.empty()) return;
std::cerr << "Includes for " << relpath << ":";
for (const auto &x : data->includes)
std::cerr << " " << x.relpath << " (" << x.line_begin << "," << x.line_end
<< ")";
std::cerr << std::endl;
#endif
}

std::string ResolveInclude(std::string from_relpath, std::string include) {
std::string resolved;
if (include[0] == '.' || files_data_.find(resolved) == files_data_.end()) {
resolved = fs::path(from_relpath)
.parent_path()
.append(include)
.lexically_normal();
} else {
resolved = include;
}
if (files_data_.find(resolved) == files_data_.end()) {
resolved.clear();
return resolved;
}
return resolved;
}

std::string PathToSrcRelativeUnix(const fs::path &path) {
auto relpath = fs::relative(path, src_dir_);
if (fs::path::preferred_separator == '/') return relpath.u8string();
std::string result = relpath.u8string();
std::replace(result.begin(), result.end(), fs::path::preferred_separator,
'/');
return result;
}

fs::path root_dir_;
fs::path src_dir_;
std::vector<std::string> exts_;
std::unordered_set<std::string> exclude_;
std::ostream &out_;

std::vector<std::string> files_;
std::unordered_map<std::string, FileData> files_data_;
};

std::vector<std::string> StrSplit(const std::string &str, char sep) {
std::vector<std::string> result;
std::size_t end = 0;
std::size_t pos = 0;
while ((pos = str.find(sep, pos)) != std::string::npos) {
result.push_back(str.substr(end, pos - end));
++pos;
end = pos;
}
result.push_back(str.substr(end, str.size() - end));
return result;
}

bool StartsWith(const std::string &str, const std::string &prefix,
std::size_t pos = 0) {
if (pos + prefix.size() > str.size()) return false;
for (std::size_t i = 0; i < prefix.size(); ++i) {
if (prefix[i] != str[i + pos]) return false;
}
return true;
}

bool ParseFlag(const std::string &arg, const std::string &name,
std::unordered_map<std::string, std::string> *flags) {
if (!StartsWith(arg, name, 2)) return false;
if (arg[name.size() + 2] != '=') {
throw std::runtime_error("Invalid argument: " + arg + arg[name.size() + 2]);
}
flags->emplace(name, arg.substr(name.size() + 3));
return true;
}

} // namespace

int main(int argc, char *argv[]) {
fs::path root_dir;
std::unordered_map<std::string, std::string> flags;
for (int i = 1; i < argc; ++i) {
const std::string &arg = argv[i];
if (!StartsWith(arg, "--")) {
throw std::runtime_error("Invalid argument (must start with --): " + arg);
}
ParseFlag(arg, "root", &flags) || ParseFlag(arg, "exts", &flags) ||
ParseFlag(arg, "out", &flags) || ParseFlag(arg, "exclude", &flags);
}

const auto &root_flag = flags.find("root");
if (root_flag != flags.end() && !root_flag->second.empty()) {
root_dir = root_flag->second;
} else {
root_dir = fs::current_path();
}

std::vector<std::string> exts;
const auto &exts_flag = flags.find("exts");
if (exts_flag != flags.end() && !exts_flag->second.empty()) {
exts = StrSplit(exts_flag->second, ',');
} else {
exts = {".h", ".c", ".hpp", ".cpp"};
}

std::ostream *out;
std::ofstream outfile;
const auto &out_flag = flags.find("out");
if (out_flag != flags.end() && !out_flag->second.empty()) {
outfile = std::ofstream(out_flag->second);
out = &outfile;
} else {
out = &std::cout;
}

std::unordered_set<std::string> exclude;
const auto &exclude_flag = flags.find("exclude");
if (exclude_flag != flags.end()) {
const auto vec = StrSplit(exclude_flag->second, ',');
exclude = {vec.begin(), vec.end()};
}

Amalgamator(root_dir, root_dir.append("src"), std::move(exts),
std::move(exclude), *out)
.Amalgamate();
if (outfile.is_open()) outfile.close();

return 0;
}
1 change: 1 addition & 0 deletions script/amalgamate/filesystem-polyfill/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DisableFormat: true
5 changes: 5 additions & 0 deletions script/amalgamate/filesystem-polyfill/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
C++17 `std::filesystem` polyfill for C++11 from:
https://github.com/gulrak/filesystem/tree/135015f20b6641140a408d3883c9c820948be1c5

Author: Steffen Schümann <[email protected]>
License: BSD 3-Clause
Loading

0 comments on commit b9b4080

Please sign in to comment.