From 775cd770842d01c623ab58b9432d62c728cf6aaa Mon Sep 17 00:00:00 2001
From: Congon4tor <ignacio.dominguez95@gmail.com>
Date: Mon, 6 Jan 2025 18:20:27 +0100
Subject: [PATCH] support # in refs

---
 .gitignore                 |  1 +
 hacktricks-preprocessor.py | 53 ++++++++++++++++++++++++++------------
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/.gitignore b/.gitignore
index 365859fbc18..77edd91a01b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ scripts/*
 book
 book/*
 hacktricks-preprocessor.log
+hacktricks-preprocessor-error.log
diff --git a/hacktricks-preprocessor.py b/hacktricks-preprocessor.py
index 969d5333ed5..af594944943 100644
--- a/hacktricks-preprocessor.py
+++ b/hacktricks-preprocessor.py
@@ -7,7 +7,14 @@
 from urllib.request import urlopen, Request
 
 logger = logging.getLogger(__name__)
-logging.basicConfig(filename='hacktricks-preprocessor.log', filemode='w', encoding='utf-8', level=logging.DEBUG)
+logger.setLevel(logging.DEBUG)
+handler = logging.FileHandler(filename='hacktricks-preprocessor.log', mode='w', encoding='utf-8')
+handler.setLevel(logging.DEBUG)
+logger.addHandler(handler)
+
+handler2 = logging.FileHandler(filename='hacktricks-preprocessor-error.log', mode='w', encoding='utf-8')
+handler2.setLevel(logging.ERROR)
+logger.addHandler(handler2)
 
 
 def findtitle(search ,obj, key, path=(),):
@@ -27,7 +34,7 @@ def findtitle(search ,obj, key, path=(),):
 
 
 def ref(matchobj):
-    logger.debug(f'Match: {matchobj.groups(0)[0].strip()}')
+    logger.debug(f'Ref match: {matchobj.groups(0)[0].strip()}')
     href =  matchobj.groups(0)[0].strip()
     title = href
     if href.startswith("http://") or href.startswith("https://"):
@@ -45,19 +52,29 @@ def ref(matchobj):
         try:
             if href.endswith("/"):
                 href = href+"README.md" # Fix if ref points to a folder
-            chapter, _path = findtitle(href, book, "source_path")
-            logger.debug(f'Recursive title search result: {chapter["name"]}')
-            title = chapter['name']
+            if "#" in  href:
+                chapter, _path = findtitle(href.split("#")[0], book, "source_path")
+                title = " ".join(href.split("#")[1].split("-")).title()
+                logger.debug(f'Ref has # using title: {title}')
+            else:
+                chapter, _path = findtitle(href, book, "source_path")
+                logger.debug(f'Recursive title search result: {chapter["name"]}')
+                title = chapter['name']
         except Exception as e:
             try:
                 dir = path.dirname(current_chapter['source_path'])
                 logger.debug(f'Error getting chapter title: {href} trying with relative path {path.normpath(path.join(dir,href))}')
-                chapter, _path = findtitle(path.normpath(path.join(dir,href)), book, "source_path")
-                logger.debug(f'Recursive title search result: {chapter["name"]}')
-                title = chapter['name']
+                if "#" in  href:
+                    chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path")
+                    title = " ".join(href.split("#")[1].split("-")).title()
+                    logger.debug(f'Ref has # using title: {title}')
+                else:
+                    chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path")
+                    title = chapter["name"]
+                    logger.debug(f'Recursive title search result: {chapter["name"]}')
             except Exception as e:
-                logger.debug(f'Error getting chapter title: {path.normpath(path.join(dir,href))}')
-                print(f'Error getting chapter title: {path.normpath(path.join(dir,href))}')
+                logger.debug(e)
+                logger.error(f'Error getting chapter title: {path.normpath(path.join(dir,href))}')
                 sys.exit(1)
 
 
@@ -69,6 +86,7 @@ def ref(matchobj):
 
     return result
 
+
 def files(matchobj):
     logger.debug(f'Files match: {matchobj.groups(0)[0].strip()}')
     href =  matchobj.groups(0)[0].strip()
@@ -76,19 +94,19 @@ def files(matchobj):
 
     try:
         for root, dirs, files in os.walk(os.getcwd()+'/src/files'):
+            logger.debug(root)
+            logger.debug(files)
             if href in files:
                 title = href
                 logger.debug(f'File search result: {os.path.join(root, href)}')
         
     except Exception as e:
         logger.debug(e)
-        logger.debug(f'Error searching file: {href}')
-        print(f'Error searching file: {href}')
+        logger.error(f'Error searching file: {href}')
         sys.exit(1)
 
         if title=="":
-            logger.debug(f'Error searching file: {href}')
-            print(f'Error searching file: {href}')
+            logger.error(f'Error searching file: {href}')
             sys.exit(1)
 
     template = f"""<a class="content_ref" href="/files/{href}"><span class="content_ref_label">{title}</span></a>"""
@@ -97,6 +115,7 @@ def files(matchobj):
 
     return result
 
+
 def add_read_time(content):
     regex = r'(<\/style>\n# .*(?=\n))'
     new_content = re.sub(regex, lambda x: x.group(0) + "\n\nReading time: {{ #reading_time }}", content)
@@ -126,15 +145,15 @@ def iterate_chapters(sections):
     context, book = json.load(sys.stdin)
 
     logger.debug(f"Context: {context}")
-    logger.debug(f"Env: {context['config']['preprocessor']['hacktricks']['env']}")
 
     for chapter in iterate_chapters(book['sections']):
         logger.debug(f"Chapter: {chapter['path']}")
         current_chapter = chapter
-        regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endref[\s]*}}'
+        # regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endref[\s]*}}'
+        regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n#]*(?:#(.*))?)(?:\n)?{{[\s]*#endref[\s]*}}'
         new_content = re.sub(regex, ref, chapter['content'])
         regex = r'{{[\s]*#file[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endfile[\s]*}}'
-        new_content = re.sub(regex, files, chapter['content'])
+        new_content = re.sub(regex, files, new_content)
         new_content = add_read_time(new_content)
         chapter['content'] = new_content