# This script generates expected number of remaining candidate answers for wordle. # Choose smallest value for greedy optimization. # Set process_count to the number of processors you'd like to allocate. # # usage: # python expectations.py # sort -n output??.dat | head # # input: # see https://www.nytimes.com/games-assets/v2/wordle.a0b43bff9849d49cf1b2.js [find "first"] # latest.txt: current set of possible answer words # pool.txt: set of all possible answer words # extra.txt: set of additional words allowed for guess # # output: # output##.dat: # expected number of remaining candidates if this guess word is used # guess word # asterisk identifies guess is from answer words import numpy as np from datetime import datetime from multiprocessing import Pool process_count = 24 candidate_list = [] candidate_set = set() for word in open('latest.txt', 'r'): word = word.strip() candidate_list.append(word) candidate_set.add(word) np.random.shuffle(candidate_list) candidate_set_size = len(candidate_list) guess_list = [] for file in [ 'pool.txt', 'extra.txt' ]: for word in open(file, 'r'): guess_list.append(word.strip()) np.random.shuffle(candidate_list) guess_set_size = len(guess_list) def retrieve_feedback(answer, guess): feedback = ['m']*5 freq = {} for i in range(5): freq[answer[i]] = freq.get(answer[i], 0) + 1 for i in range(5): if (guess[i] == answer[i]): feedback[i] = 'h' freq[guess[i]] -= 1 for i in range(5): if (feedback[i] != 'h') and (freq.get(guess[i], 0) > 0): feedback[i] = 'o' freq[guess[i]] -= 1 feedback = ''.join(feedback) return feedback def is_match(candidate, guess, feedback): freq = {} for i in range(5): freq[candidate[i]] = freq.get(candidate[i], 0) + 1 for i in range(5): if (feedback[i] == 'h'): if (guess[i] == candidate[i]): freq[guess[i]] -= 1 else: return False for i in range(5): if (feedback[i] == 'o'): if (guess[i] != candidate[i]) and (freq.get(guess[i], 0) > 0): freq[guess[i]] -= 1 else: return False for i in range(5): if (feedback[i] == 'm'): if (freq.get(guess[i], 0) > 0): return False return True def apply_feedback(candidate_list, guess, feedback): return np.sum([ 1 for candidate in candidate_list if is_match(candidate, guess, feedback) ]) def get_expectation(index): output = open('output' + str(index).zfill(2) + '.dat', 'w') next_index = index while (next_index < guess_set_size): guess = guess_list[next_index] candidate_set_size_mean = np.mean([ apply_feedback(candidate_list, guess, retrieve_feedback(answer, guess)) for answer in candidate_list ]) if (guess in candidate_set): output.write(str(candidate_set_size_mean) + '\t' + guess + '\t*\n') else: output.write(str(candidate_set_size_mean) + '\t' + guess + '\t\n') next_index += process_count if (index == 0) and ((next_index % (10 * process_count)) == 0): print(str(datetime.now()), next_index / guess_set_size, sep = '\t') output.close() pool = Pool(processes = process_count) i = range(process_count) t = pool.map(get_expectation, i) print('done.')