Files
004_comission/banson_hker/phase1-fix/doc/decrypt.md
louiscklaw 72bacdd6b5 update,
2025-01-31 19:28:21 +08:00

4.5 KiB

decrypt file

5.1 general flow (decrypt_file)

def decrypt_file(file_path, dictionary):
    # will open an encrypted file and decrypt it by a guessed key
    # 
    # try to guess the k by e(as specified) first
    #     PASS: show user decrypted
    #     FAIL: process below
    # try to bruce force the k by all possible k's candiates
    #     PASS: show user decrypted
    #     FAIL: show user cannot decrypt message

    with open(file_path, 'r') as fi:
        # beginning of the process
        # read file and join the lines all
        lines = fi.readlines()
        encrypted_text = ''.join(lines)

        decrypted = False
        done = False
        decrypted_text = ''

        print("try decrypt by guessing maximum occurrence ... ")
        [valid, text] = decrypt_by_letter_occurrence(encrypted_text, dictionary)
        decrypted = valid
        decrypted_text = text

        # if the message cannot decrypt by letter e population
        if not (decrypted):
            print("decrypt by guessing maximum occurence seems doesn't work...")
            [valid, text] = decrypt_by_bruce_force(encrypted_text, dictionary)
            decrypted = valid
            decrypted_text = text

        if (decrypted):
            print()
            print("Final decrypted message:")
            print()
            print(decrypted_text)
            print()

        else:
            # no decryption works
            print("Seems neither of them works.")

5.2 (Decrypt by letter occurrence, decrypt_by_letter_occurrence)

def decrypt_by_letter_occurrence(enc_text, dictionary):
    # 1. get the occurrence/population of letter from whole encrypted message
    # 2. find the max occurrence
    # 3. find the distance between max letter and letter "E"  (denoted: "guessed k")
    # 4. try decrypt using "guessed k"
    # 5. lookup in dictionary (check_words_valid) and check if the decrypted valid.

    print('decrypted by guessed k')
    characters_population = count_most_occurrence_letter(enc_text)

    print('')
    print('population of letters in encrypted text (case insensitive, from a to z)')
    print([chr(65 + i) for i in range(0, 26)])
    print(['{:0>1}'.format(i) for i in characters_population])

    print('')
    guess_k = find_max_occurrence(characters_population)
    print(f'try decrypt using guess_k -> guessed k: {guess_k}')

    decrypted_text = shift_cipher_decrypt(enc_text, guess_k)
    list_texts = decrypted_text.split(' ')
    check_result_using_guess_k = check_words_valid(list_texts, dictionary, 0.8)

    return [check_result_using_guess_k, decrypted_text]

5.3 (decrypt_by_bruce_force, decrypt_by_bruce_force)

def decrypt_by_bruce_force(encrypted_text, dictionary):
    # 1. get the occurrence/population of letter from whole encrypted message
    # 2. find the candidates of k (filter all zero answer in step 1) 
    
    # 3. find the distance between max letter and letter "E" -> "guessed k"
    # 4. try decrypt using this "guessed k"
    # 5. lookup in dictionary (check_words_valid) and check if the decrypted valid.

    print()
    print('try decrypt by bruce forcing k ...')
    # will open an encrypted file and decrypt it by a guessed key
    dictionary_match_found = False
    characters_population = count_most_occurrence_letter(encrypted_text)

    guess_k = bruce_force_k(characters_population, encrypted_text, dictionary)
    # guess_k == -1 means the decrypted message failed in dictionary lookup, 
    # send the result directly
    if (guess_k == -999):
        return [False, '']

    decrypted_text = shift_cipher_decrypt(encrypted_text, guess_k)
    check_result_using_guess_k = check_words_valid(decrypted_text, dictionary, 0.8)

    return [check_result_using_guess_k, decrypted_text]

5.3 (check_words_valid, check_words_valid)

def check_words_valid(list_decrypted_text, dictionary, passing_gate):
    # split decrypted text and word-by-word lookup in dictionary
    # get a score reflect the matching 
    # output true / false when score higher than the passing gate

    result = list(map(lambda x: dictionary_lookup(
        x, dictionary), list_decrypted_text))
    len_all_result = len(result)
    true_in_result = len(list(filter(lambda r: r, result)))

    return true_in_result / len_all_result > passing_gate