Profiling of --show revealed a secret #3988

mohemiv · 2024-04-16T15:26:03Z

Hello there,

I've done the profiling of the --show options and I have found out how --show can increase performance by two times or more, and sometimes by a hundred times.

Let's take a look into the potfile_remove_parse function in src/potfile.c:

int potfile_remove_parse (hashcat_ctx_t *hashcat_ctx)
{

  [REDACTED]

  while (!hc_feof (&potfile_ctx->fp))
  {
    size_t line_len = fgetl (&potfile_ctx->fp, line_buf, HCBUFSIZ_LARGE);

    if (line_len == 0) continue;

    char *last_separator = strrchr (line_buf, hashconfig->separator);

    if (last_separator == NULL) continue; // ??

    char *line_pw_buf = last_separator + 1;

    size_t line_pw_len = line_buf + line_len - line_pw_buf;

    char *line_hash_buf = line_buf;

    int line_hash_len = last_separator - line_buf;

    line_hash_buf[line_hash_len] = 0;

    if (line_hash_len == 0) continue;

    if (hash_buf.salt)
    {
      memset (hash_buf.salt, 0, sizeof (salt_t));
    }

    if (hash_buf.esalt)
    {
      memset (hash_buf.esalt, 0, hashconfig->esalt_size);
    }

    if (hash_buf.hook_salt)
    {
      memset (hash_buf.hook_salt, 0, hashconfig->hook_salt_size);
    }

    [REDACTED]

        {
          const int parser_status = module_ctx->module_hash_decode (hashconfig, hash_buf.digest, hash_buf.salt, hash_buf.esalt, hash_buf.hook_salt, hash_buf.hash_info, line_hash_buf, line_hash_len);

          if (parser_status != PARSER_OK) continue;

          if (hashconfig->potfile_keep_all_hashes == true)
          {
            potfile_update_hashes (hashcat_ctx, &hash_buf, line_pw_buf, (u32) line_pw_len, all_hashes_tree);

            continue;
          }

          hash_t *found = (hash_t *) hc_bsearch_r (&hash_buf, hashes_buf, hashes_cnt, sizeof (hash_t), sort_by_hash, (void *) hashconfig);

          potfile_update_hash (hashcat_ctx, found, line_pw_buf, (u32) line_pw_len);
        }

    [REDACTED]

What is going on here is as follows:

Hashcat reads a line from the potfile.
Hashcat memsets buffers.
Hashcat tries to parse a line via module_hash_decode or a similar function.
Hashcat repeats the algorithm.

I've found that more than half of the hashcat’s operating time can be spent on memsetting buffers!

This time depends on the algorithm. Let's take a look into 13400 module, for example:

typedef struct keepass
{
  u32 version;
  u32 algorithm;

  /* key-file handling */
  u32 keyfile_len;
  u32 keyfile[8];

  u32 final_random_seed[8];
  u32 transf_random_seed[8];
  u32 enc_iv[4];
  u32 contents_hash[8];

  /* specific to version 1 */
  u32 contents_len;
  u32 contents[0x200000];

  /* specific to version 2 */
  u32 expected_bytes[8];

} keepass_t;

The length of this structure is more than 2 megabytes, and hashcat memsets it over and over again, even if the line in the potfile doesn't match the hash at all. On my machine, this can go on for an hour.

I did a quick fix, but it's a hack and not a proper solution:

diff --git a/src/modules/module_13400.c b/src/modules/module_13400.c
index 8f019d859..0f838b0e6 100644
--- a/src/modules/module_13400.c
+++ b/src/modules/module_13400.c
@@ -106,6 +106,11 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   if (line_len < 128) return (PARSER_SALT_LENGTH);
 
+  if (line_buf[0] != '$') return (PARSER_SALT_LENGTH);
+  if (line_buf[1] != 'k') return (PARSER_SALT_LENGTH);
+  if (line_buf[2] != 'e') return (PARSER_SALT_LENGTH);
+  if (line_buf[3] != 'e') return (PARSER_SALT_LENGTH);
+
   if ((line_buf[line_len - (64 + 1 + 2 + 1 + 2)] == '*')
    && (line_buf[line_len - (64 + 1 + 2 + 1 + 1)] == '1')
   && (line_buf[line_len - (64 + 1 + 2 + 1 + 0)] == '*')) is_keyfile_present = true;

  hc_token_t token;

  memset (&token, 0, sizeof (hc_token_t));

  token.signatures_cnt    = 1;
  token.signatures_buf[0] = SIGNATURE_KEEPASS;
diff --git a/src/potfile.c b/src/potfile.c
index a912e1f50..f3b170031 100644
--- a/src/potfile.c
+++ b/src/potfile.c
@@ -511,6 +511,22 @@ int potfile_remove_parse (hashcat_ctx_t *hashcat_ctx)
     tmps = hcmalloc (hashconfig->tmp_size);
   }
 
+  if (hash_buf.salt)
+  {
+    memset (hash_buf.salt, 0, sizeof (salt_t));
+  }
+
+  if (hash_buf.esalt)
+  {
+    memset (hash_buf.esalt, 0, hashconfig->esalt_size);
+  }
+
+  if (hash_buf.hook_salt)
+  {
+    memset (hash_buf.hook_salt, 0, hashconfig->hook_salt_size);
+  }
+
+
   char *line_buf = (char *) hcmalloc (HCBUFSIZ_LARGE);
 
   while (!hc_feof (&potfile_ctx->fp))
@@ -535,21 +551,6 @@ int potfile_remove_parse (hashcat_ctx_t *hashcat_ctx)
 
     if (line_hash_len == 0) continue;
 
-    if (hash_buf.salt)
-    {
-      memset (hash_buf.salt, 0, sizeof (salt_t));
-    }
-
-    if (hash_buf.esalt)
-    {
-      memset (hash_buf.esalt, 0, hashconfig->esalt_size);
-    }
-
-    if (hash_buf.hook_salt)
-    {
-      memset (hash_buf.hook_salt, 0, hashconfig->hook_salt_size);
-    }
-
     if (module_ctx->module_hash_decode_potfile != MODULE_DEFAULT)
     {
       if (module_ctx->module_potfile_custom_check != MODULE_DEFAULT)
@@ -568,6 +569,22 @@ int potfile_remove_parse (hashcat_ctx_t *hashcat_ctx)
           }
         }
 
+    if (hash_buf.salt)
+    {
+      memset (hash_buf.salt, 0, sizeof (salt_t));
+    }
+
+    if (hash_buf.esalt)
+    {
+      memset (hash_buf.esalt, 0, hashconfig->esalt_size);
+    }
+
+    if (hash_buf.hook_salt)
+    {
+      memset (hash_buf.hook_salt, 0, hashconfig->hook_salt_size);
+    }
+
+
         continue;
       }
 
@@ -585,6 +602,21 @@ int potfile_remove_parse (hashcat_ctx_t *hashcat_ctx)
       {
         potfile_update_hashes (hashcat_ctx, &hash_buf, line_pw_buf, (u32) line_pw_len, all_hashes_tree);
 
+    if (hash_buf.salt)
+    {
+      memset (hash_buf.salt, 0, sizeof (salt_t));
+    }
+
+    if (hash_buf.esalt)
+    {
+      memset (hash_buf.esalt, 0, hashconfig->esalt_size);
+    }
+
+    if (hash_buf.hook_salt)
+    {
+      memset (hash_buf.hook_salt, 0, hashconfig->hook_salt_size);
+    }
+
         continue;
       }

I would suggest that module_hash_decode functions should take raw buffers and do memsets themselves if necessary. Most of the time, these memsets are not even needed.

Also, some of the module_hash_decode functions do memsets before checking the signature. Let's take a look into 13100 module, for example:

int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
{
  u32 *digest = (u32 *) digest_buf;

  krb5tgs_t *krb5tgs = (krb5tgs_t *) esalt_buf;

  hc_token_t token;

  memset (&token, 0, sizeof (hc_token_t));

  token.signatures_cnt    = 1;
  token.signatures_buf[0] = SIGNATURE_KRB5TGS;

  token.len[0]  = 9;
  token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
                | TOKEN_ATTR_VERIFY_SIGNATURE;

  /**
   * hc
   * format 1: $krb5tgs$23$*user$realm$spn*$checksum$edata2
   * format 2: $krb5tgs$23$checksum$edata2
   *
   * jtr
   * format 3: $krb5tgs$spn:checksum$edata2
   */

  if (line_len < (int) strlen (SIGNATURE_KRB5TGS)) return (PARSER_SALT_LENGTH);

If we move all memset calls into module_hash_decode functions, it will be easier to identify and fix slow module_hash_decode functions, such as above, because they will become the bottleneck. Although, I imagine that it's possible that these functions are already optimized by the compiler.

I can do some PRs, but I just would like to highlight the issue to develop a solution that will be good for everyone.

The text was updated successfully, but these errors were encountered:

mohemiv added the bug label Apr 16, 2024

This was referenced Apr 16, 2024

Add error message when mixing --show and --username #3655

Merged

Improving performance of --show #1 #3989

Open

Improving performance of --show #2 #3990

Open

Annoying errors in hashcat-git #3991

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Profiling of --show revealed a secret #3988

Profiling of --show revealed a secret #3988

mohemiv commented Apr 16, 2024 •

edited

Profiling of --show revealed a secret #3988

Profiling of --show revealed a secret #3988

Comments

mohemiv commented Apr 16, 2024 • edited

mohemiv commented Apr 16, 2024 •

edited