Description:

Show me your wordle skills!

Task author: rixxc|RedRocket

nc scamble.rumble.host 2568

The service running looks like this:

#!/usr/bin/env python3

import random
import string
from secret import FLAG

NUMBER_OF_GAMES = 100
THRESHOLD = 5
SIZE_OF_WORDLIST = 12000

rng = random.SystemRandom()


def game(secret_word, wordlist):
    print("[start game]")
    chars = set(secret_word)

    for guess_count in range(1, 7):
        guess = input("> ")[:5]
        assert(len(guess) == 5)
        assert(guess in wordlist)

        if guess == secret_word:
            print("[guess correct]> ")
            break

        result = ""
        for i in range(5):
            if guess[i] == secret_word[i]:
                result += "+"
            elif guess[i] in chars:
                result += "*"
            else:
                result += "-"

        print(result)
    else:
        print("[number of guesses exceeded]> ")

    return guess_count


def main():
    wordlist = set()
    while len(wordlist) < SIZE_OF_WORDLIST:
        word = ''.join(rng.choices(string.ascii_uppercase, weights=[7.58, 1.96, 3.16, 4.98, 18.93, 1.49, 3.02, 4.98, 8.02, 0.24, 1.32, 3.60, 2.55, 12.53, 2.24, 0.67, 0.02, 6.89, 8.42, 7.79, 3.83, 0.84, 1.78, 0.05, 0.05, 1.21], k=5))
        wordlist.add(word)

    print("Here is your personal wordlist:")
    print("[begin of wordlist]")
    for word in wordlist:
        print(word)
    print("[end of wordlist]")

    guesses = []
    for _ in range(NUMBER_OF_GAMES):
        secret_word = rng.choice(list(wordlist))
        guesses.append(game(secret_word, wordlist))

    average_guesses = sum(guesses) / NUMBER_OF_GAMES
    print(f'avg.: {average_guesses}')
    if average_guesses < THRESHOLD:
        print(FLAG)


if __name__ == '__main__':
    main()

Summarized we need to solve a game of wordle 100 times with an average guess num of less than 5. This is humanely possible, but not in the given timeframe, therefore we need to automize it.

We found an article about entropy-based wordle solving from Aditya Sengupta and an implementation in the Julia programming language.

Then we rewrote his wordle-solver in python:

// wordle_doer.py

from collections import Counter
from scipy.stats import entropy
import string

class WordleDoer:
    def __init__(self, wordlist, wordlength) -> None:
        self.wordlist = wordlist
        self.wordlength = wordlength
        self.corpus = []
        self.first_guess = None

    def reset(self):
        self.corpus = self.wordlist

    def guessToList(self, guess):
        result = [None] * self.wordlength
        conversionMap = {
            '+': 2,
            '*': 1,
            '-': 0
        }
        for i in range(0, 5):
            result[i] = conversionMap[guess[i]]

        return result


    def listToGuess(self, list: list):
        guess = ['-'] * 5
        conversionMap = {
            2: '+',
            1: '*',
            0: '-'
        }
        for i in range(0, 5):
            guess[i] = conversionMap[list[i]]

        return ''.join(guess)


    def constrain(self, query, result):
        self.corpus = [word for word in self.corpus if self.guess(query, word) == result]


    def guess(self, query, word):
        result = [0] * self.wordlength

        for i in range(0, self.wordlength):
            if word[i] == query[i]:
                result[i] = 2

        for i in range(0, self.wordlength):
            if query[i] in word and result[i] != 2:
                result[i] = 1

        return self.listToGuess(result)

    def choose(self):
        print(len(self.corpus))
        if len(self.corpus) == 0:
            return None
        if len(self.corpus) == 1:
            return self.corpus[0]
        if self.first_guess is not None and len(self.corpus) == len(self.wordlist):
            return self.first_guess

        is_known = [all(x[i] == self.corpus[0][i] for x in self.corpus) for i in range(0, self.wordlength)]
        word_so_far = "".join([self.corpus[0][j] if is_known[j] else '0' for j in range(0, self.wordlength)])
        char_occurences = {}
        for char in string.ascii_uppercase:
            char_occurences[char] = [sum([word[j].count(char) for word in self.corpus]) for j in range(0, self.wordlength)]
        best_word = self.corpus[0]
        best_diff_entropy = 0
        for word in self.corpus:
            diff_entropy = 0
            seen_chars = set()
            for (i, char) in enumerate(word):
                greens = char_occurences[char][i]
                if not (char in seen_chars):
                    mask = [ch != char for ch in word_so_far]
                    yellows = sum([char_occurences[char][i] for i in range(0, self.wordlength) if mask[i]]) - greens
                    seen_chars.add(char)
                else:
                    yellows = 0
                greys = len(self.corpus) - yellows - greens
                dist = [greens, yellows, greys]
                sumdist = sum(dist)
                dist = [entry / sumdist for entry in dist]
                diff_entropy = diff_entropy + entropy(dist)
            if diff_entropy >= best_diff_entropy:
                best_diff_entropy = diff_entropy
                best_word = word

        if self.first_guess is None:
            self.first_guess = best_word

        return best_word

Our implementation has the following changes:

  • We are caching the first guess, because
    • the wordlist doesn’t change between games
    • the first guess takes up the most time
  • We dumbed down the guess method to match the given game
    • The game implementation does not Count occurences

Because we need to win 100 games with high reliability, we needed to automize it: doit.py

#!/usr/bin/python

from pwn import *
from wordle_doer import WordleDoer
import re

r = remote("scamble.rumble.host", 2568)
#r = remote("localhost", 2568)

rateMatcher = re.compile("\s?[\*+-]{5}\s?")

r.recvuntil(b"[begin of wordlist]\n")
wordlist = [word.decode() for word in r.recvuntil(b"[end of wordlist]\n", True).splitlines(False)]

solver = WordleDoer(wordlist, 5)

while True:
    start = r.recvuntil([b"avg.:", b"[start game]"])
    if start.startswith(b"avg.:"):
        print(start)
        print(r.recvline())
        break
    print("Game started")
    solver.reset()
    while True:
        guess = solver.choose()
        if len(guess) != 5:
            print(solver.corpus)
        print("My guess: '" + guess + '"')
        command = r.recvuntil(b"> ", True)
        print(b"command: " + command)
        if b"[guess correct]" in command or b"[number of guesses exceeded]" in command:
            break
        r.sendline(guess.encode())
        response = r.recvline().decode().replace("\n", "")
        if not rateMatcher.match(response):
            print("Start anew: " + response)
            break
        print("Response: '" + response + "'")
        solver.constrain(guess, response)

That’s it!