Challenge

from Crypto.Cipher import AES
from Crypto.Util.Padding import pad
from random import randint
from secret import flag


def encrypt(plain,key):
    Ckey=bytearray([key]*16)
    cipher=AES.new(Ckey,AES.MODE_ECB)
    return cipher.encrypt(plain)


flag=pad(flag.encode(),16)
keys=[randint(0,256) for i in range(4)]
for i in keys:
    flag=encrypt(flag,i)
open("output.txt","w").write(flag.hex())


#9378c177d525efede9b36a64f12efd710d966a654ca9694edf2a07493b19863e4ca1b71ad5f5a23e161caa178e52c68cfdc1de62631188d7e94e1805eaeb908c


In this challenge, the flag is encrypted using AES in ECB mode (deterministic), applied four times, where each key is generated from a single byte repeated 16 times, and PKCS#7 padding is added before encryption.

Each AES key is generated as:

\[\text{Key} = \text{bytearray}([x] \times 16), \quad x \in \{0,\dots,255\}\]

Where $x$ is a single byte.

This means that the full 128-bit AES key is generated by repeating a single byte

The encryption order is:

\[CT = E_{k4}(E_{k3}(E_{k2}(E_{k1}(Flag))))\]

Since each key ($k1, k2, k3, k4$) is a single byte ($0-255$), we need to recover these four bytes to decrypt the flag.

\[k_1, k_2, k_3, k_4 \in \{0,\dots,255\}\]

I solved the challenge in two different ways:

Method 1: The Brute Force Approach

The most straightforward approach is to try every possible combination of the four keys. Since the keys are applied sequentially, we can nest four loops to reverse the process.

The idea is to reverse the encryption process and brute-force all possible key combinations.

\[PT = D_{k_1}\big(D_{k_2}(D_{k_3}(D_{k_4}(CT)))\big)\]

Key Space ($O(N^4)$)

Because each key is 1 byte (256 possibilities) and there are 4 independent keys:

\[\text{Total Combinations} = 256 \times 256 \times 256 \times 256\] \[\text{Key Space} = 256^4 = 2^{32} \approx 4{,}294{,}967{,}296\]

import re
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad
from random import randint
from tqdm import *


def d(ct, x):
    Ckey=bytearray([x]*16)
    cipher = AES.new(Ckey,AES.MODE_ECB)
    return cipher.decrypt(ct)

  
ct = bytes.fromhex('9378c177d525efede9b36a64f12efd710d966a654ca9694edf2a07493b19863e4ca1b71ad5f5a23e161caa178e52c68cfdc1de62631188d7e94e1805eaeb908c')


print('looking')
for i in range(0, 256):
    #i = keys[3]
    dec1 = d(ct, i)
    for j in trange(256):
        #j = keys[2]
        dec2 = d(dec1, j)
        for k in range(256):
            #k = keys[1]
            dec3 = d(dec2, k)
            for l in range(256):
                #l = keys[0]
                flag = d(dec3, l)
                try:
                    print(flag.decode())
                except:
                    pass

So we waited … two hours.

python is great, but $256^4$ operations are really slow. I moved the same logic to C++ because it’s much faster than Python.

#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include <iostream>
#include <vector>
#include <cstring>      
#include <string>
#include <cstdlib>      
#include <iomanip>      
#include <openssl/aes.h>
#include <omp.h>
using namespace std;

// --------------------------------------------------------------------------
// Convert Hex String to Byte Vector
// --------------------------------------------------------------------------
vector<unsigned char> hex_to_bytes(const string& hex) {
    vector<unsigned char> bytes;
    for (unsigned int i = 0; i < hex.length(); i += 2) {
        string byte_string = hex.substr(i, 2);
        unsigned char byte = (unsigned char)strtol(byte_string.c_str(), NULL, 16);
        bytes.push_back(byte);
    }
    return bytes;
}

// --------------------------------------------------------------------------
// Decrypt one layer using AES-128-ECB
// --------------------------------------------------------------------------
void decrypt_layer(const unsigned char* input, unsigned char* output, size_t len, int key_byte) {
    AES_KEY decrypt_key_struct;
    unsigned char key[16];
    memset(key, key_byte, 16);
    AES_set_decrypt_key(key, 128, &decrypt_key_struct);
    for (size_t i = 0; i < len; i += 16) {
        AES_decrypt(input + i, output + i, &decrypt_key_struct);
    }
}

// --------------------------------------------------------------------------
//Check if the buffer contains printable ASCII characters
// --------------------------------------------------------------------------
bool is_printable(const unsigned char* data, size_t len) {
    for (size_t i = 0; i < len; ++i) {
        if (data[i] == 0) continue; // Skip null padding
        if (data[i] < 32 || data[i] > 126) return false;
    }
    return true;
}


int main() {
    string hex_ct = "9378c177d525efede9b36a64f12efd710d966a654ca9694edf2a07493b19863e4ca1b71ad5f5a23e161caa178e52c68cfdc1de62631188d7e94e1805eaeb908c";
    
    vector<unsigned char> ciphertext = hex_to_bytes(hex_ct);
    size_t data_len = ciphertext.size();

    cout << "[*] Starting 4-Layer Brute-force Attack..." << endl;
    cout << "[*] Using OpenMP for parallel processing." << endl;

    long long total_combinations = 256 * 256; // Complexity of outer loops
    long long progress_counter = 0;
    
    // Shared flag to stop all threads once the solution is found
    volatile bool solution_found = false; 

    // Parallelize the outer two loops
#pragma omp parallel for collapse(2) schedule(static) shared(solution_found)
    for (int i = 0; i < 256; ++i) {
        for (int j = 0; j < 256; ++j) {
            
            // If another thread found the flag, stop working
            if (solution_found) continue;

            // --- Progress Display Logic ---
            long long current_count;
            #pragma omp atomic capture
            current_count = ++progress_counter;

            if (current_count % 500 == 0 && !solution_found) {
                #pragma omp critical
                {
                    if (!solution_found) {
                        float percentage = (float)current_count / total_combinations * 100.0;
                        cout << "\rProgress: " << fixed << setprecision(2) << percentage << "% " << flush;
                    }
                }
            }
            // ------------------------------

            // Buffers for intermediate layers
            unsigned char layer1_out[64];
            unsigned char layer2_out[64];
            unsigned char layer3_out[64];
            unsigned char potential_flag[64];

            // Perform Layer 1 & 2 Decryption
            decrypt_layer(ciphertext.data(), layer1_out, data_len, i);
            decrypt_layer(layer1_out, layer2_out, data_len, j);

            // Inner loops for Layer 3 & 4
            for (int k = 0; k < 256; ++k) {
                decrypt_layer(layer2_out, layer3_out, data_len, k);

                for (int l = 0; l < 256; ++l) {
                    decrypt_layer(layer3_out, potential_flag, data_len, l);

                    // Check if we found valid text
                    // (data_len - 16) ignores potential padding in the last block
                    if (is_printable(potential_flag, data_len - 16)) {
                        
                        #pragma omp critical
                        {
                            if (!solution_found) {
                                solution_found = true;
                                cout << "\n\n[+] FLAG FOUND!" << endl;
                                cout << "----------------------------------------" << endl;
                                cout << "Keys (i,j,k,l): " << i << ", " << j << ", " << k << ", " << l << endl;
                                cout << "Decrypted Text: ";
                                for(size_t x = 0; x < data_len; x++) {
                                    if(potential_flag[x] >= 32 && potential_flag[x] <= 126) 
                                        cout << potential_flag[x];
                                }
                                cout << "\n----------------------------------------" << endl;
                            }
                        }
                    }
                    if (solution_found) break; 
                }
                if (solution_found) break;
            }
        }
    }

    if (!solution_found) {
        cout << "\n[-] Brute-force finished. No valid flag found." << endl;
    }

    return 0;
}

It worked! C++ finished it in about 10 minutes, but it was still inefficient for me. Now we move to a more optimized method.

Method 2: Meet-in-the-Middle (MitM) Attack

Since the ciphertext length is 64 bytes (a multiple of 16), and the pad() function was used, we can assume the last block is a full block of PKCS#7 padding (\x10 repeated 16 times). Therefore The last plaintext block before encryption is:

\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10\x10


To optimize our solution, we can use a Meet-in-the-Middle attack. This technique exploits the fact that encryption layers are independent (since ECB mode is deterministic).
Instead of attacking the full 4 layer depth, we meet at the 2-layer midpoint. We precompute the state after two encryptions and store it in lookup table. Then, we decrypt the last ciphertext block and look for a match. once a match is found, all four keys are recovered at once.

The Midpoint Concept

\[\text{Plaintext} \xrightarrow{k_1, k_2} \mathbf{[Midpoint]} \xleftarrow{k_3, k_4} \text{Ciphertext}\]

By splitting the encryption into two halves, we create a midpoint in the middle:

  1. From the Left (Forward): We start with the known padding and encrypt it twice using all possible combinations of $k_1$ and $k_2$. We save these results in a lookup table.

    \[Midpoint_{Front} = E_{k_2}(E_{k_1}(\text{Padding}))\]
  2. From the Right (Backward): We take the last ciphertext block and decrypt it twice using all possible combinations of $k_4$ and $k_3$, and check if the result exists in our lookup table.

    \[Midpoint_{Back} = D_{k_3}(D_{k_4}(\text{Ciphertext}))\]

If a match is found:

\[Midpoint_{Front} = Midpoint_{Back}\]

this means both sides reached the same internal AES state:

\[E_{k_2}(E_{k_1}(\text{Padding})) = D_{k_3}(D_{k_4}(\text{Ciphertext}))\]

At this point, we have recovered the correct keys $(k_1, k_2, k_3, k_4)$, which allows us to decrypt the flag.

Key Space Reduction: $O(N^4) \to O(N^2)$

Instead of attacking all four encryption layers at once, we split them into two independent halves $(2 + 2)$. We precompute the first two layers and store the results in a lookup table, then process the remaining two layers and check for a match in that table.

\[256^2 (\text{Front}) + 256^2 (\text{Back}) = 131,072 \text{ attempts}\]

from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad


ct = bytes.fromhex('9378c177d525efede9b36a64f12efd710d966a654ca9694edf2a07493b19863e4ca1b71ad5f5a23e161caa178e52c68cfdc1de62631188d7e94e1805eaeb908c')
# We only care about the last block (ECB + padding)
last_block_ct = ct[-16:]

target_padding = b'\x10' * 16

def get_cipher(k):
    key = bytearray([k]*16)
    return AES.new(key, AES.MODE_ECB)


# ======================================
# Phase 1 – Forward encryption (Padding)
# ======================================
print("[+] Phase 1: Building Lookup Table")

lookup_table = {}
for k1 in range(256):
    c1 = get_cipher(k1).encrypt(target_padding)
    for k2 in range(256):
        c2 = get_cipher(k2).encrypt(c1)
        # Store the result of E_k2(E_k1(padding))
        lookup_table[c2] = (k1, k2)

# ======================================
# Phase 2 – Backward decryption (Ciphertext)
# ======================================
print("[+] Phase 2: Decrypting from ciphertext side")

found = False
for k4 in range(256):
    d1 = get_cipher(k4).decrypt(last_block_ct)
    for k3 in range(256):
        d2 = get_cipher(k3).decrypt(d1)
        
        if d2 in lookup_table:
            k1, k2 = lookup_table[d2]
            print(f"\n[!] Match Found! Keys: k1={k1}, k2={k2}, k3={k3}, k4={k4}")
            
            res = ct
            for key in [k4, k3, k2, k1]:
                res = get_cipher(key).decrypt(res)
            
            try:
                print(f"[+] Flag: {unpad(res, 16).decode()}")
                found = True
                break
            except:
                continue
    if found: break

if not found:
    print("[-] No valid key combination found (try other padding lengths)")
    
#flag{35ba164ed8174be3049e9d1ea155b296}


As a result:

  • Brute-force in Python took ~2 hours
  • Brute-force in C++ finished in under 10 minutes
  • Meet-in-the-Middle finished in seconds