329 lines
10 KiB
Python
329 lines
10 KiB
Python
#!/usr/bin/env python
|
|
import os
|
|
import sys
|
|
|
|
# ORD_a = ord('a') # 97
|
|
ORD_A = ord('A') # 65
|
|
|
|
def shift_cipher_encrypt(plaintext, key):
|
|
# apply encryption to text with given key
|
|
|
|
encrypted_message = ''
|
|
|
|
for char in plaintext:
|
|
if char.isalpha():
|
|
# change every character to upper case before enter to the loop to
|
|
# compare. (requirement)
|
|
char = char.upper()
|
|
# Determine ASCII offset based on uppercase or lowercase letter
|
|
ascii_offset = ORD_A
|
|
|
|
# the comment shown below are the pseudo code, it demonstrate the ideas only
|
|
# let say the input is 'the' // without quote
|
|
# find distance of target character with reference to A or a
|
|
# i.e. t - a = 19 , h - a = 7 , e - a = 4
|
|
distance = ord(char) - ascii_offset
|
|
|
|
# [19,7,4] + [8,8,8] (key) = [27, 15, 12]
|
|
# Shift the character by adding the key and taking modulo 26 to wrap around
|
|
# [27,15,12] % [26,26,26] = [1,15,12] // get modules
|
|
shifted_distance = (distance + key) % 26
|
|
|
|
# [1,15,12] + [97,97,97] = [98,112,109]
|
|
# chr(98) , chr(112) , chr(109) = 'bpm'
|
|
shifted_char = chr(shifted_distance + ascii_offset)
|
|
|
|
# so: the -> bpm
|
|
encrypted_message += shifted_char
|
|
|
|
else:
|
|
# consider integer case, retain
|
|
encrypted_message += char
|
|
|
|
return encrypted_message
|
|
|
|
|
|
def shift_cipher_decrypt(cipher_text, key):
|
|
# decrypt the text using k as key
|
|
|
|
plaintext = ""
|
|
|
|
for char in cipher_text:
|
|
if char.isalpha():
|
|
# Determine ASCII offset based on lowercase or uppercase letter
|
|
ascii_offset = ORD_A
|
|
|
|
# Calculate the distance of the target character from a or A
|
|
distance = ord(char) - ascii_offset
|
|
|
|
# apply shift, get the remainder of 26
|
|
shifted_distance = (distance - key) % 26
|
|
|
|
# Convert back to ASCII
|
|
decrypted_char = chr(shifted_distance + ascii_offset)
|
|
|
|
plaintext += decrypted_char
|
|
else:
|
|
# If it is not an alphabetic character, retain as is.
|
|
plaintext += char
|
|
|
|
return plaintext
|
|
|
|
def count_most_occurrence_letter(txt_in):
|
|
# letter e, as stated have the most occurrence in the message by statistics.
|
|
# as 'Shift Cipher' is a encryption by letter shifting,
|
|
# the letters have good chance to have the most occurrence too in the encrypted text.
|
|
output = [0] * 26 # bucket for 26 letters
|
|
|
|
for char in txt_in:
|
|
if char.isalpha():
|
|
output[ord(char.upper()) - ORD_A] += 1
|
|
|
|
# output contains the statistics of paragraph letter by letter
|
|
return output
|
|
|
|
|
|
def find_max_occurrence(char_occurrences):
|
|
# get the letter of the most occurrences. i.e. "m"
|
|
# by subtract between this letter to e, k can be guess
|
|
|
|
# find max occurrence and its index
|
|
max_idx = char_occurrences.index(max(char_occurrences))
|
|
|
|
# subtract it with index of e -> 4
|
|
return max_idx - 4
|
|
|
|
|
|
def encrypt_file(file_path, key=8):
|
|
# encrypt file given by file_path with key
|
|
|
|
# open a file and apply encryption
|
|
output_file = file_path.replace('.txt', '_e.txt')
|
|
|
|
# convert it to integer
|
|
key = int(key)
|
|
|
|
# open source file (plaintext)
|
|
with open(file_path, 'r', encoding="utf-8") as fi:
|
|
temp = ''.join(fi.readlines())
|
|
|
|
# open target file (encrypted text), to be able to create file if not
|
|
# exist
|
|
with open(output_file, 'w+') as fo:
|
|
fo.truncate(0)
|
|
fo.writelines([shift_cipher_encrypt(temp, key)])
|
|
|
|
print(f'encryption done and file saved to {output_file}')
|
|
return
|
|
|
|
|
|
def decrypt_file(file_path, dictionary):
|
|
# will open an encrypted file and decrypt it by a guessed key
|
|
#
|
|
# try to guess the k by e(as specified) first
|
|
# PASS: show user decrypted
|
|
# FAIL: process below
|
|
# try to bruce force the k by all possible k's candiates
|
|
# PASS: show user decrypted
|
|
# FAIL: show user cannot decrypt message
|
|
|
|
with open(file_path, 'r') as fi:
|
|
# beginning of the process
|
|
# read file and join the lines all
|
|
lines = fi.readlines()
|
|
encrypted_text = ''.join(lines)
|
|
|
|
decrypted = False
|
|
done = False
|
|
decrypted_text = ''
|
|
|
|
print("try decrypt by guessing maximum occurrence ... ")
|
|
[valid, text] = decrypt_by_letter_occurrence(encrypted_text, dictionary)
|
|
decrypted = valid
|
|
decrypted_text = text
|
|
|
|
# if the message cannot decrypt by letter e population
|
|
if not (decrypted):
|
|
print("decrypt by guessing maximum occurence seems doesn't work...")
|
|
[valid, text] = decrypt_by_bruce_force(encrypted_text, dictionary)
|
|
decrypted = valid
|
|
decrypted_text = text
|
|
|
|
if (decrypted):
|
|
print()
|
|
print("Final decrypted message:")
|
|
print()
|
|
print(decrypted_text)
|
|
print()
|
|
|
|
else:
|
|
# no decryption works
|
|
print("Seems neither of them works.")
|
|
|
|
|
|
def decrypt_by_letter_occurrence(enc_text, dictionary):
|
|
# 1. get the occurrence/population of letter from whole encrypted message
|
|
# 2. find the max occurrence
|
|
# 3. find the distance between max letter and letter "E" (denoted: "guessed k")
|
|
# 4. try decrypt using "guessed k"
|
|
# 5. lookup in dictionary (check_words_valid) and check if the decrypted valid.
|
|
|
|
print('decrypted by guessed k')
|
|
characters_population = count_most_occurrence_letter(enc_text)
|
|
|
|
print('')
|
|
print('population of letters in encrypted text (case insensitive, from a to z)')
|
|
print([chr(65 + i) for i in range(0, 26)])
|
|
print(['{:0>1}'.format(i) for i in characters_population])
|
|
|
|
print('')
|
|
guess_k = find_max_occurrence(characters_population)
|
|
print(f'try decrypt using guess_k -> guessed k: {guess_k}')
|
|
|
|
decrypted_text = shift_cipher_decrypt(enc_text, guess_k)
|
|
list_texts = decrypted_text.split(' ')
|
|
check_result_using_guess_k = check_words_valid(list_texts, dictionary, 0.8)
|
|
|
|
return [check_result_using_guess_k, decrypted_text]
|
|
|
|
|
|
def decrypt_by_bruce_force(encrypted_text, dictionary):
|
|
# 1. get the occurrence/population of letter from whole encrypted message
|
|
# 2. find the candidates of k (filter all zero answer in step 1)
|
|
|
|
# 3. find the distance between max letter and letter "E" -> "guessed k"
|
|
# 4. try decrypt using this "guessed k"
|
|
# 5. lookup in dictionary (check_words_valid) and check if the decrypted valid.
|
|
|
|
print()
|
|
print('try decrypt by bruce forcing k ...')
|
|
# will open an encrypted file and decrypt it by a guessed key
|
|
dictionary_match_found = False
|
|
characters_population = count_most_occurrence_letter(encrypted_text)
|
|
|
|
guess_k = bruce_force_k(characters_population, encrypted_text, dictionary)
|
|
# guess_k == -1 means the decrypted message failed in dictionary lookup,
|
|
# send the result directly
|
|
if (guess_k == -999):
|
|
return [False, '']
|
|
|
|
decrypted_text = shift_cipher_decrypt(encrypted_text, guess_k)
|
|
check_result_using_guess_k = check_words_valid(decrypted_text, dictionary, 0.8)
|
|
|
|
return [check_result_using_guess_k, decrypted_text]
|
|
|
|
|
|
def check_words_valid(list_decrypted_text, dictionary, passing_gate):
|
|
# split decrypted text and word-by-word lookup in dictionary
|
|
# get a score reflect the matching
|
|
# output true / false when score higher than the passing gate
|
|
|
|
result = list(map(lambda x: dictionary_lookup(
|
|
x, dictionary), list_decrypted_text))
|
|
len_all_result = len(result)
|
|
true_in_result = len(list(filter(lambda r: r, result)))
|
|
|
|
return true_in_result / len_all_result > passing_gate
|
|
|
|
|
|
def bruce_force_k(characters_population, encrypted_text, dictionary):
|
|
# 1. shift the character array left by 4 to align E
|
|
# 2. if the population of
|
|
|
|
output = -1
|
|
done = False
|
|
|
|
for (guess_k) in range(0,25+1):
|
|
|
|
decrypted_text = shift_cipher_decrypt(encrypted_text, guess_k)
|
|
list_decrypted_text = decrypted_text.split(' ')
|
|
result = check_words_valid(list_decrypted_text, dictionary, 0.8)
|
|
# print(f'trying k={guess_k} -> result "{decrypted_text}"')
|
|
# print(guess_k, decrypted_text)
|
|
|
|
if result:
|
|
print('guessed k matching:', guess_k)
|
|
output = guess_k
|
|
break
|
|
|
|
pass
|
|
|
|
return output
|
|
|
|
|
|
def dictionary_lookup(text_to_lookup, dictionary):
|
|
try:
|
|
return dictionary.index(text_to_lookup.upper()) > -1
|
|
except BaseException:
|
|
return False
|
|
|
|
|
|
def load_dictionary():
|
|
output = []
|
|
with open('./words.txt', 'r', encoding="utf-8") as f_dict:
|
|
output = f_dict.readlines()
|
|
output = list(map(lambda x: x.strip(), output))
|
|
output = list(map(lambda x: x.upper(), output))
|
|
|
|
return output
|
|
|
|
# main loop
|
|
while True:
|
|
# show menu
|
|
print()
|
|
print("1. Encrypt File")
|
|
print("2. Decrypt File")
|
|
print("q. quit")
|
|
print()
|
|
option = input("Select an option (1/2/q): ")
|
|
|
|
if option == "1":
|
|
# run if user want to encrypt file
|
|
# check if user entered a file
|
|
user_not_enter_file = True
|
|
while user_not_enter_file:
|
|
file_path = input("Enter the path of the file to encrypt: ")
|
|
if len(file_path) > 0:
|
|
if os.path.exists(file_path):
|
|
user_not_enter_file = False
|
|
else:
|
|
print('sorry but the file not exist')
|
|
else:
|
|
print('please enter a file path')
|
|
|
|
# check if user entered a key
|
|
user_not_enter_key = True
|
|
while user_not_enter_key:
|
|
key = input("Enter the key(k) to encrypt: ")
|
|
if (len(key) > 0):
|
|
user_not_enter_key = False
|
|
else:
|
|
print('please enter a key(k)')
|
|
|
|
if os.path.exists(file_path):
|
|
encrypt_file(file_path, key)
|
|
print('encryption done')
|
|
else:
|
|
print("File does not exist.")
|
|
|
|
elif option == "2":
|
|
# run if user want to decrypt file
|
|
file_path = input("Enter the path of the file to decrypt: ")
|
|
if os.path.exists(file_path):
|
|
decrypt_file(file_path, load_dictionary())
|
|
print('decryption done')
|
|
else:
|
|
print("File does not exist.")
|
|
|
|
elif option.lower() == "q":
|
|
print('quitting bye ...')
|
|
break
|
|
else:
|
|
print('')
|
|
print('ERROR !')
|
|
print('please enter either [1/2/q]')
|
|
input("press a key to continue ...")
|
|
print('')
|
|
|
|
print("Exiting...")
|