Files
004_comission/banson_hker/phase1-fix/deliver/shift_cipher_encrypter.py
louiscklaw 72bacdd6b5 update,
2025-01-31 19:28:21 +08:00

329 lines
10 KiB
Python

#!/usr/bin/env python
import os
import sys
# ORD_a = ord('a') # 97
ORD_A = ord('A') # 65
def shift_cipher_encrypt(plaintext, key):
# apply encryption to text with given key
encrypted_message = ''
for char in plaintext:
if char.isalpha():
# change every character to upper case before enter to the loop to
# compare. (requirement)
char = char.upper()
# Determine ASCII offset based on uppercase or lowercase letter
ascii_offset = ORD_A
# the comment shown below are the pseudo code, it demonstrate the ideas only
# let say the input is 'the' // without quote
# find distance of target character with reference to A or a
# i.e. t - a = 19 , h - a = 7 , e - a = 4
distance = ord(char) - ascii_offset
# [19,7,4] + [8,8,8] (key) = [27, 15, 12]
# Shift the character by adding the key and taking modulo 26 to wrap around
# [27,15,12] % [26,26,26] = [1,15,12] // get modules
shifted_distance = (distance + key) % 26
# [1,15,12] + [97,97,97] = [98,112,109]
# chr(98) , chr(112) , chr(109) = 'bpm'
shifted_char = chr(shifted_distance + ascii_offset)
# so: the -> bpm
encrypted_message += shifted_char
else:
# consider integer case, retain
encrypted_message += char
return encrypted_message
def shift_cipher_decrypt(cipher_text, key):
# decrypt the text using k as key
plaintext = ""
for char in cipher_text:
if char.isalpha():
# Determine ASCII offset based on lowercase or uppercase letter
ascii_offset = ORD_A
# Calculate the distance of the target character from a or A
distance = ord(char) - ascii_offset
# apply shift, get the remainder of 26
shifted_distance = (distance - key) % 26
# Convert back to ASCII
decrypted_char = chr(shifted_distance + ascii_offset)
plaintext += decrypted_char
else:
# If it is not an alphabetic character, retain as is.
plaintext += char
return plaintext
def count_most_occurrence_letter(txt_in):
# letter e, as stated have the most occurrence in the message by statistics.
# as 'Shift Cipher' is a encryption by letter shifting,
# the letters have good chance to have the most occurrence too in the encrypted text.
output = [0] * 26 # bucket for 26 letters
for char in txt_in:
if char.isalpha():
output[ord(char.upper()) - ORD_A] += 1
# output contains the statistics of paragraph letter by letter
return output
def find_max_occurrence(char_occurrences):
# get the letter of the most occurrences. i.e. "m"
# by subtract between this letter to e, k can be guess
# find max occurrence and its index
max_idx = char_occurrences.index(max(char_occurrences))
# subtract it with index of e -> 4
return max_idx - 4
def encrypt_file(file_path, key=8):
# encrypt file given by file_path with key
# open a file and apply encryption
output_file = file_path.replace('.txt', '_e.txt')
# convert it to integer
key = int(key)
# open source file (plaintext)
with open(file_path, 'r', encoding="utf-8") as fi:
temp = ''.join(fi.readlines())
# open target file (encrypted text), to be able to create file if not
# exist
with open(output_file, 'w+') as fo:
fo.truncate(0)
fo.writelines([shift_cipher_encrypt(temp, key)])
print(f'encryption done and file saved to {output_file}')
return
def decrypt_file(file_path, dictionary):
# will open an encrypted file and decrypt it by a guessed key
#
# try to guess the k by e(as specified) first
# PASS: show user decrypted
# FAIL: process below
# try to bruce force the k by all possible k's candiates
# PASS: show user decrypted
# FAIL: show user cannot decrypt message
with open(file_path, 'r') as fi:
# beginning of the process
# read file and join the lines all
lines = fi.readlines()
encrypted_text = ''.join(lines)
decrypted = False
done = False
decrypted_text = ''
print("try decrypt by guessing maximum occurrence ... ")
[valid, text] = decrypt_by_letter_occurrence(encrypted_text, dictionary)
decrypted = valid
decrypted_text = text
# if the message cannot decrypt by letter e population
if not (decrypted):
print("decrypt by guessing maximum occurence seems doesn't work...")
[valid, text] = decrypt_by_bruce_force(encrypted_text, dictionary)
decrypted = valid
decrypted_text = text
if (decrypted):
print()
print("Final decrypted message:")
print()
print(decrypted_text)
print()
else:
# no decryption works
print("Seems neither of them works.")
def decrypt_by_letter_occurrence(enc_text, dictionary):
# 1. get the occurrence/population of letter from whole encrypted message
# 2. find the max occurrence
# 3. find the distance between max letter and letter "E" (denoted: "guessed k")
# 4. try decrypt using "guessed k"
# 5. lookup in dictionary (check_words_valid) and check if the decrypted valid.
print('decrypted by guessed k')
characters_population = count_most_occurrence_letter(enc_text)
print('')
print('population of letters in encrypted text (case insensitive, from a to z)')
print([chr(65 + i) for i in range(0, 26)])
print(['{:0>1}'.format(i) for i in characters_population])
print('')
guess_k = find_max_occurrence(characters_population)
print(f'try decrypt using guess_k -> guessed k: {guess_k}')
decrypted_text = shift_cipher_decrypt(enc_text, guess_k)
list_texts = decrypted_text.split(' ')
check_result_using_guess_k = check_words_valid(list_texts, dictionary, 0.8)
return [check_result_using_guess_k, decrypted_text]
def decrypt_by_bruce_force(encrypted_text, dictionary):
# 1. get the occurrence/population of letter from whole encrypted message
# 2. find the candidates of k (filter all zero answer in step 1)
# 3. find the distance between max letter and letter "E" -> "guessed k"
# 4. try decrypt using this "guessed k"
# 5. lookup in dictionary (check_words_valid) and check if the decrypted valid.
print()
print('try decrypt by bruce forcing k ...')
# will open an encrypted file and decrypt it by a guessed key
dictionary_match_found = False
characters_population = count_most_occurrence_letter(encrypted_text)
guess_k = bruce_force_k(characters_population, encrypted_text, dictionary)
# guess_k == -1 means the decrypted message failed in dictionary lookup,
# send the result directly
if (guess_k == -999):
return [False, '']
decrypted_text = shift_cipher_decrypt(encrypted_text, guess_k)
check_result_using_guess_k = check_words_valid(decrypted_text, dictionary, 0.8)
return [check_result_using_guess_k, decrypted_text]
def check_words_valid(list_decrypted_text, dictionary, passing_gate):
# split decrypted text and word-by-word lookup in dictionary
# get a score reflect the matching
# output true / false when score higher than the passing gate
result = list(map(lambda x: dictionary_lookup(
x, dictionary), list_decrypted_text))
len_all_result = len(result)
true_in_result = len(list(filter(lambda r: r, result)))
return true_in_result / len_all_result > passing_gate
def bruce_force_k(characters_population, encrypted_text, dictionary):
# 1. shift the character array left by 4 to align E
# 2. if the population of
output = -1
done = False
for (guess_k) in range(0,25+1):
decrypted_text = shift_cipher_decrypt(encrypted_text, guess_k)
list_decrypted_text = decrypted_text.split(' ')
result = check_words_valid(list_decrypted_text, dictionary, 0.8)
# print(f'trying k={guess_k} -> result "{decrypted_text}"')
# print(guess_k, decrypted_text)
if result:
print('guessed k matching:', guess_k)
output = guess_k
break
pass
return output
def dictionary_lookup(text_to_lookup, dictionary):
try:
return dictionary.index(text_to_lookup.upper()) > -1
except BaseException:
return False
def load_dictionary():
output = []
with open('./words.txt', 'r', encoding="utf-8") as f_dict:
output = f_dict.readlines()
output = list(map(lambda x: x.strip(), output))
output = list(map(lambda x: x.upper(), output))
return output
# main loop
while True:
# show menu
print()
print("1. Encrypt File")
print("2. Decrypt File")
print("q. quit")
print()
option = input("Select an option (1/2/q): ")
if option == "1":
# run if user want to encrypt file
# check if user entered a file
user_not_enter_file = True
while user_not_enter_file:
file_path = input("Enter the path of the file to encrypt: ")
if len(file_path) > 0:
if os.path.exists(file_path):
user_not_enter_file = False
else:
print('sorry but the file not exist')
else:
print('please enter a file path')
# check if user entered a key
user_not_enter_key = True
while user_not_enter_key:
key = input("Enter the key(k) to encrypt: ")
if (len(key) > 0):
user_not_enter_key = False
else:
print('please enter a key(k)')
if os.path.exists(file_path):
encrypt_file(file_path, key)
print('encryption done')
else:
print("File does not exist.")
elif option == "2":
# run if user want to decrypt file
file_path = input("Enter the path of the file to decrypt: ")
if os.path.exists(file_path):
decrypt_file(file_path, load_dictionary())
print('decryption done')
else:
print("File does not exist.")
elif option.lower() == "q":
print('quitting bye ...')
break
else:
print('')
print('ERROR !')
print('please enter either [1/2/q]')
input("press a key to continue ...")
print('')
print("Exiting...")