Skip to content

Commit

Permalink
Give the post-index hook ability to tell soupault to completely ignor…
Browse files Browse the repository at this point in the history
…e a page

by setting a special variable `ignore_page`.

Since it needs data to decide, also give that hook access to the completely index entry
rather than just modifiable fields.
However, the hook still	needs to modify	the `index_fields` variable:
for compatibility reasons and also because it's	a bad idea to let the hook modify
the full index entry, since it also contains data used by soupault internally,
such as	the page file path and URL.
  • Loading branch information
dmbaturin committed Nov 28, 2023
1 parent c9a0d44 commit 5ab0478
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 20 deletions.
17 changes: 13 additions & 4 deletions src/hooks.ml
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ let run_pre_process_hook soupault_state hook_config file_name lua_code page_file
It has access to the page element tree and also to extracted index fields
and can modify both.
*)
let run_post_index_hook soupault_state hook_config file_name lua_code env soup fields =
let run_post_index_hook soupault_state hook_config file_name lua_code env soup entry =
let assoc_of_json j =
(* This function handles values projected from Lua,
and Lua doesn't have a distinction between arrays/lists and tables:
Expand All @@ -198,13 +198,15 @@ let run_post_index_hook soupault_state hook_config file_name lua_code env soup f
let lua_str = I.Value.string in
let lua_state = I.mk () in
let settings = soupault_state.soupault_settings in
let () =
let index_entry_json = Utils.json_of_index_entry entry in
let () =
(* Set up the post-index hook environment *)
I.register_globals [
"page", Plugin_api.lua_of_soup (Plugin_api.Html.SoupNode soup);
"page_url", lua_str.embed env.page_url;
"page_file", lua_str.embed env.page_file;
"index_fields", Plugin_api.lua_of_json (`O fields);
"index_entry", Plugin_api.lua_of_json index_entry_json;
"index_fields", Plugin_api.lua_of_json (`O entry.fields);
"config", lua_of_toml hook_config;
"hook_config", lua_of_toml hook_config;
"soupault_config", lua_of_toml soupault_state.soupault_config;
Expand All @@ -213,18 +215,25 @@ let run_post_index_hook soupault_state hook_config file_name lua_code env soup f
"site_dir", lua_str.embed settings.site_dir;
"soupault_pass", I.Value.int.embed soupault_state.soupault_pass;
"global_data", lua_of_json !(soupault_state.global_data);
"ignore_page", I.Value.bool.embed false;
] lua_state
in
let (let*) = Result.bind in
let () = Logs.info @@ fun m -> m "Running the post-index hook on page %s" env.page_file in
let* () = Plugin_api.run_lua lua_state file_name lua_code in
let () = soupault_state.global_data := (Plugin_api.extract_global_data lua_state) in
(* XXX: The assumption is that there's no way to completely unset a global
in the Lua interpreter we are using,
so if we added [ignore_page] to globals, retrieving it will never cause errors,
and that projection to a bool will never fail either.
*)
let ignore_page = I.getglobal lua_state (I.Value.string.embed "ignore_page") |> I.Value.bool.project in
let index_fields = I.getglobal lua_state (I.Value.string.embed "index_fields") in
if not (I.Value.table.is index_fields) then
Error "post-index hook has not assigned a table to the index_fields variable"
else
let* fields = Plugin_api.json_of_lua index_fields in
Ok (assoc_of_json fields)
Ok (ignore_page, (assoc_of_json fields))

(* render hook replaces the normal page rendering process.
Expand Down
46 changes: 30 additions & 16 deletions src/soupault.ml
Original file line number Diff line number Diff line change
Expand Up @@ -434,18 +434,21 @@ let make_page_url settings nav_path orig_path target_dir page_file =
let extract_metadata state hooks env html =
(* Metadata is only extracted from non-index pages *)
let settings = state.soupault_settings in
if not (Autoindex.index_extraction_should_run settings env.page_file) then (Ok None) else
if not (Autoindex.index_extraction_should_run settings env.page_file) then (Ok (false, None)) else
let entry = Autoindex.get_entry settings env html in
let post_index_hook = Hashtbl.find_opt hooks "post-index" in
match post_index_hook with
| Some (file_name, source_code, hook_config) ->
if not (Hooks.hook_should_run settings hook_config "post-index" env.page_file) then (Ok (Some entry)) else
(* Let the post-index hook update the fields *)
let* index_fields =
Hooks.run_post_index_hook state hook_config file_name source_code env html entry.fields
if not (Hooks.hook_should_run settings hook_config "post-index" env.page_file) then (Ok (false, (Some entry))) else
(* Let the post-index hook update the fields.
It can also set a special [ignore_page] variable to tell soupault to exclude the page
from indexing and any further processing.
*)
let* (ignore_page, index_fields) =
Hooks.run_post_index_hook state hook_config file_name source_code env html entry
in
Ok (Some {entry with fields=index_fields})
| None -> Ok (Some entry)
Ok (ignore_page, (Some {entry with fields=index_fields}))
| None -> Ok (false, (Some entry))

let run_pre_process_hook state hooks page_file target_dir target_file content =
let settings = state.soupault_settings in
Expand Down Expand Up @@ -531,15 +534,26 @@ let process_page state page_data index index_hash widgets hooks =
let before_index, after_index, widget_hash = widgets in
let* () = process_widgets state env before_index widget_hash html in
(* Index extraction *)
let* index_entry = extract_metadata state hooks env html in
if settings.index_only then Ok (index_entry, new_pages) else
let* () = process_widgets state env after_index widget_hash html in
let* () = mkdir target_dir in
let* html_str = render_html state hooks env html in
let* () = save_html state hooks env html_str in
(* Finally, run the post-save hook. *)
let* () = run_post_save_hook state hooks env in
Ok (index_entry, new_pages)
let* (ignore_page, index_entry) = extract_metadata state hooks env html in
(* If the render hook told us to ignore the page, pretend it did not exist:
return None for the index entry and do not save to disk.
*)
if ignore_page then
begin
let () = Logs.info @@ fun m -> m "Ignoring page %s according to post-index hook instructions" page_file in
Ok (None, [])
end
else
begin
if settings.index_only then Ok (index_entry, new_pages) else
let* () = mkdir target_dir in
let* () = process_widgets state env after_index widget_hash html in
let* html_str = render_html state hooks env html in
let* () = save_html state hooks env html_str in
(* Finally, run the post-save hook. *)
let* () = run_post_save_hook state hooks env in
Ok (index_entry, new_pages)
end

(* Monadic wrapper for process_page that can either return or ignore errors *)
let process_page state index index_hash widgets hooks page_data =
Expand Down

0 comments on commit 5ab0478

Please sign in to comment.