update,

2025-01-31 19:28:21 +08:00
parent ce9a4aa9b3
commit 72bacdd6b5
168 changed files with 939668 additions and 0 deletions
--- a/banson_hker/phase1-fix/deliver/problem2/aaaaa.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/aaaaa.txt
@@ -0,0 +1 @@
+aaaaa
--- a/banson_hker/phase1-fix/deliver/problem2/apple.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/apple.txt
@@ -0,0 +1 @@
+apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple 
--- a/banson_hker/phase1-fix/deliver/problem2/apple_e.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/apple_e.txt
@@ -0,0 +1 @@
+ETTPI ETTPI ETTPI
--- a/banson_hker/phase1-fix/deliver/problem2/people.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/people.txt
@@ -0,0 +1 @@
+people people people
--- a/banson_hker/phase1-fix/deliver/problem2/people_e.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/people_e.txt
@@ -0,0 +1 @@
+BQABXQ BQABXQ BQABXQ
--- a/banson_hker/phase1-fix/deliver/problem2/shift_cipher_encrypter.py
+++ b/banson_hker/phase1-fix/deliver/problem2/shift_cipher_encrypter.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python
+import os,sys
+
+ORD_a = ord('a') # 97
+ORD_A = ord('A') # 65
+
+DICTIONARY = []
+
+def shift_cipher_encrypt(plaintext, key):
+    # apply encryption to text with given key
+    encrypted_message = ''
+
+    for char in plaintext:
+        if char.isalpha():
+            char = char.upper()
+            ascii_offset = ORD_A if char.isupper() else ORD_a  # Determine ASCII offset based on uppercase or lowercase letter
+
+            # the comment shown below are the pseudo code, it demonstrate the ideas only
+            # let say the input is 'the' // without quote
+            # find distance of target character with reference to A or a
+            # i.e. t - a = 19 ,   h - a = 7 ,  e - a = 4
+            distance = ord(char) - ascii_offset
+
+            # [19,7,4] + [8,8,8] (key) = [27, 15, 12]
+            # Shift the character by adding the key and taking modulo 26 to wrap around
+            # [27,15,12] % [26,26,26] = [1,15,12]   // get modules
+            shifted_distance = (distance + key) % 26
+
+            # [1,15,12] + [97,97,97] = [98,112,109]
+            # chr(98) , chr(112) , chr(109) = 'bpm'
+            shifted_char = chr(shifted_distance + ascii_offset)
+
+            # so: the -> bpm
+            encrypted_message += shifted_char
+
+        else:
+            # consider integer case, retain
+            encrypted_message += char
+
+    return encrypted_message
+
+
+def shift_cipher_decrypt(ciphertext, key):
+    plaintext = ""
+
+    for char in ciphertext:
+        if char.isalpha():
+            ascii_offset = ORD_a if char.islower() else ORD_A  # Determine ASCII offset based on lowercase or uppercase letter
+
+            # Calculate the distance of the target character from a or A
+            distance = ord(char) - ascii_offset
+
+            # apply shift, get the remainder of 26
+            shifted_distance = (distance - key) % 26
+
+            # Convert back to ASCII
+            decrypted_char = chr(shifted_distance + ascii_offset)
+
+            plaintext += decrypted_char
+        else:
+            # If it is not an alphabetic character, retain as is.
+            plaintext += char
+
+    return plaintext
+
+
+def count_letter_e(txt_in):
+    # reserved function for demonstration purpose
+    occurence = 0
+    for char in txt_in:
+        if char.isalpha():
+            if char.lower() == 'e':
+                occurence += 1
+    return occurence
+
+def count_most_occurrence_letter(txt_in):
+    # letter e, as stated have the most occurrence in the message by statistics.
+    # as 'Shift Cipher' is a encryption by letter shifting, the letters have good chance
+    # to have the most occurrence too in the encrypted text.
+    output = [0] * 26    # bucket for 26 letters
+
+    for char in txt_in:
+        if char.isalpha():
+            output[ord(char.lower()) - ORD_a] += 1
+
+    # output contains the statistics of paragraph letter by letter
+    return output
+
+def find_max_occurrence(char_occurrences):
+    # get the letter of the most occurrences. i.e. m
+    # by subtract between this letter to e, k can be guess
+
+    # find max occurrence and its index
+    max_idx = char_occurrences.index(max(char_occurrences))
+
+    # subtract it with index of e -> 4
+    return max_idx - 4
+
+def encrypt_file(file_path, key=8):
+    # open a file and apply encryption
+    output_file = file_path.replace('.txt','_e.txt')
+
+    # convert it to integer
+    key = int(key)
+
+    # open source file (plaintext)
+    with open(file_path,'r',encoding="utf-8") as fi:
+        temp = ''.join(fi.readlines())
+
+        # open target file (encrypted text)
+        with open(output_file,'w+') as fo:
+            fo.truncate(0)
+            fo.writelines([shift_cipher_encrypt(temp, key)])
+
+    print(f'encryption done and file saved to {output_file}')
+    return
+
+def decrypt_file(file_path, dictionary):
+    # will open an encrypted file and decrypt it by a guessed key
+
+    with open(file_path,'r') as fi:
+        # beginning of the process
+        # read file and join the lines all
+        lines = fi.readlines()
+        e_temp = ''.join(lines)
+
+        decrypted = False
+        done = False
+        decrypted_text = ''
+
+        print("try decrypt by guessing maximum occurrence ... ")
+        [valid, text] = decrypt_by_guessed_k(e_temp, dictionary)
+        decrypted = valid
+        decrypted_text = text
+
+        if not(decrypted):
+            print("decrypt by guessing maximum occurence seems doesn't work...")
+            [valid, text]=decrypt_by_bruce_force_k(e_temp, dictionary)
+            decrypted = valid
+            decrypted_text = text
+
+        if (decrypted):
+            print()
+            print("Final decrypted message:")
+            print()
+            print(decrypted_text)
+            print()
+
+        else:
+            print("Seems neither of them works.")
+            
+
+def decrypt_by_guessed_k(e_temp, dictionary):
+    print('decrypted by guessed k')
+    characters_distribution = count_most_occurrence_letter(e_temp)
+
+    print('')
+    print('distribution of letters in encrypted text (case insensitive, from a to z)')
+    print([chr(65+i) for i in range(0,26)])
+    print(['{:0>1}'.format(i) for i in characters_distribution])
+
+    print('')
+    guess_k = find_max_occurrence(characters_distribution)
+    print(f'try decrypt using guess_k -> guessed k: {guess_k}')
+
+    decrypted_text = shift_cipher_decrypt(e_temp, guess_k)
+    list_texts = decrypted_text.split(' ')
+    check_result_using_guess_k = check_words_valid(list_texts, dictionary, 0.8)
+
+    
+
+    return [check_result_using_guess_k, decrypted_text]
+
+def decrypt_by_bruce_force_k(e_temp, dictionary):
+    print()
+    print('try decrypt by bruce forcing k ...')
+    # will open an encrypted file and decrypt it by a guessed key
+    dictionary_match_found = False
+    characters_distribution = count_most_occurrence_letter(e_temp)
+    guess_k = bruce_force_k(characters_distribution, e_temp, dictionary)
+    decrypted_text = shift_cipher_decrypt(e_temp, guess_k)
+
+    return [True, decrypted_text]
+
+def check_words_valid(list_decrypted_text,dictionary, passing_gate):
+    result = list(map(lambda x: dictionary_lookup(x, dictionary), list_decrypted_text))
+    len_all_result = len(result)
+    true_in_result = len(list(filter(lambda r: r, result)))
+    
+    return true_in_result/len_all_result > passing_gate
+
+def bruce_force_k(characters_distribution, e_temp, dictionary):
+    output  = -1
+    done = False
+
+    shifted_character_distribution = characters_distribution[4:]+characters_distribution[0:4]
+    # print(shifted_character_distribution)
+
+    for (k) in range(0,26):
+        
+        if (shifted_character_distribution[k] > 0):
+            guess_k = k
+
+            decrypted_text = shift_cipher_decrypt(e_temp, guess_k)
+            list_decrypted_text = decrypted_text.split(' ')
+            result = check_words_valid(list_decrypted_text, dictionary, 0.8)
+            print(f'trying k={guess_k} -> result "{decrypted_text}"')
+
+            if result == True:
+                print('guessed k matching:', guess_k)
+                output = guess_k
+                break
+        else:
+            # NOTE: for debug 
+            # print(f'skip bruce because k={k} is not possible')
+            pass
+
+    return output
+
+
+def dictionary_lookup(text_to_lookup, dictionary):
+    try:
+        return dictionary.index(text_to_lookup.upper()) > -1
+    except:
+        return False
+
+def load_dictionary():
+    output = []
+    with open('./words.txt','r',encoding="utf-8") as f_dict:
+        output = f_dict.readlines()
+        output = list(map(lambda x: x.strip(), output))
+        output = list(map(lambda x: x.upper(), output))
+
+    return output
+
+while True:
+    # show menu
+    print()
+    print("1. Encrypt File")
+    print("2. Decrypt File")
+    print("q. quit")
+    print()
+    option = input("Select an option (1/2/q): ")
+
+    if option == "1":
+        # run if user want to encrypt file
+        # check if user entered a file
+        user_not_enter_file = True
+        while user_not_enter_file:
+          file_path = input("Enter the path of the file to encrypt: ")
+          if len(file_path) > 0:
+            if os.path.exists(file_path):
+              user_not_enter_file = False
+            else:
+              print('sorry but the file not exist')
+          else:
+              print('please enter a file path')
+
+        # check if user entered a key
+        user_not_enter_key = True
+        while user_not_enter_key:
+          key = input("Enter the key(k) to encrypt: ")
+          if (len(key) > 0):
+              user_not_enter_key = False
+          else:
+              print('please enter a key(k)')
+
+        if os.path.exists(file_path):
+            encrypt_file(file_path, key)
+            print('encryption done')
+        else:
+            print("File does not exist.")
+
+    elif option == "2":
+        # run if user want to decrypt file
+        file_path = input("Enter the path of the file to decrypt: ")
+        if os.path.exists(file_path):
+            decrypt_file(file_path, load_dictionary())
+            print('decryption done')
+        else:
+            print("File does not exist.")
+
+    elif option.lower() == "q":
+        print('quitting bye ...')
+        break
+    else:
+        print('')
+        print('ERROR !')
+        print('please enter either [1/2/q]')
+        input("press a key to continue ...")
+        print('')
+
+print("Exiting...")
--- a/banson_hker/phase1-fix/deliver/problem2/test.sh
+++ b/banson_hker/phase1-fix/deliver/problem2/test.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+set -ex
+
+python ./shift_cipher_encrypter.py
--- a/banson_hker/phase1-fix/deliver/problem2/wiki.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/wiki.txt
@@ -0,0 +1,8 @@
+Ref: https://en.wikipedia.org/wiki/Hong_Kong
+
+
+Hong Kong was established as a colony of the British Empire after the Qing Empire ceded Hong Kong Island in 1841–1842. The colony expanded to the Kowloon Peninsula in 1860 and was further extended when the United Kingdom obtained a 99-year lease of the New Territories in 1898. Hong Kong was briefly occupied by Japan from 1941 to 1945 during World War II. The whole territory was transferred from the United Kingdom to China in 1997. Hong Kong maintains separate governing and economic systems from that of mainland China under the principle of "one country, two systems".[f] 
+Originally a sparsely populated area of farming and fishing villages,[18][19] the territory is now one of the world's most significant financial centres and commercial ports. Hong Kong is the world's fourth-ranked global financial centre, ninth-largest exporter, and eighth-largest importer. Its currency, the Hong Kong dollar, is the eighth most traded currency in the world. Home to the second-highest number of billionaires of any city in the world, Hong Kong has the largest concentration of ultra high-net-worth individuals. Although the city has one of the highest per capita incomes in the world, severe income inequality exists among the population. Despite having the largest number of skyscrapers of any city in the world, housing in Hong Kong has been well-documented to experience a chronic persistent shortage. 
+Hong Kong is a highly developed territory and has a Human Development Index (HDI) of 0.952, ranking fourth in the world. The city has the second highest life expectancy in the world, and a public transport rate exceeding 90%. 
+
+around 261 words
--- a/banson_hker/phase1-fix/deliver/problem2/wiki_e.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/wiki_e.txt
@@ -0,0 +1,8 @@
+Dqr: tffbe://qz.iuwubqpum.ads/iuwu/Tazs_Wazs
+
+
+Tazs Wazs ime qefmnxuetqp me m oaxazk ar ftq Ndufuet Qybudq mrfqd ftq Cuzs Qybudq oqpqp Tazs Wazs Uexmzp uz 1841V1842. Ftq oaxazk qjbmzpqp fa ftq Waixaaz Bqzuzegxm uz 1860 mzp ime rgdftqd qjfqzpqp itqz ftq Gzufqp Wuzspay anfmuzqp m 99-kqmd xqmeq ar ftq Zqi Fqddufaduqe uz 1898. Tazs Wazs ime nduqrxk aoogbuqp nk Vmbmz rday 1941 fa 1945 pgduzs Iadxp Imd UU. Ftq itaxq fqddufadk ime fdmzerqddqp rday ftq Gzufqp Wuzspay fa Otuzm uz 1997. Tazs Wazs ymuzfmuze eqbmdmfq sahqdzuzs mzp qoazayuo ekefqye rday ftmf ar ymuzxmzp Otuzm gzpqd ftq bduzoubxq ar "azq oagzfdk, fia ekefqye".[r] 
+Adusuzmxxk m ebmdeqxk babgxmfqp mdqm ar rmdyuzs mzp ruetuzs huxxmsqe,[18][19] ftq fqddufadk ue zai azq ar ftq iadxp'e yaef euszuruomzf ruzmzoumx oqzfdqe mzp oayyqdoumx badfe. Tazs Wazs ue ftq iadxp'e ragdft-dmzwqp sxanmx ruzmzoumx oqzfdq, zuzft-xmdsqef qjbadfqd, mzp qustft-xmdsqef uybadfqd. Ufe ogddqzok, ftq Tazs Wazs paxxmd, ue ftq qustft yaef fdmpqp ogddqzok uz ftq iadxp. Tayq fa ftq eqoazp-tustqef zgynqd ar nuxxuazmudqe ar mzk oufk uz ftq iadxp, Tazs Wazs tme ftq xmdsqef oazoqzfdmfuaz ar gxfdm tust-zqf-iadft uzpuhupgmxe. Mxftagst ftq oufk tme azq ar ftq tustqef bqd ombufm uzoayqe uz ftq iadxp, eqhqdq uzoayq uzqcgmxufk qjuefe myazs ftq babgxmfuaz. Pqebufq tmhuzs ftq xmdsqef zgynqd ar ewkeodmbqde ar mzk oufk uz ftq iadxp, tageuzs uz Tazs Wazs tme nqqz iqxx-paogyqzfqp fa qjbqduqzoq m otdazuo bqdeuefqzf etadfmsq. 
+Tazs Wazs ue m tustxk pqhqxabqp fqddufadk mzp tme m Tgymz Pqhqxabyqzf Uzpqj (TPU) ar 0.952, dmzwuzs ragdft uz ftq iadxp. Ftq oufk tme ftq eqoazp tustqef xurq qjbqofmzok uz ftq iadxp, mzp m bgnxuo fdmzebadf dmfq qjoqqpuzs 90%. 
+
+mdagzp 261 iadpe
--- a/banson_hker/phase1-fix/deliver/problem2/words.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/words.txt
--- a/banson_hker/phase1-fix/deliver/problem2/zzzzz.txt
+++ b/banson_hker/phase1-fix/deliver/problem2/zzzzz.txt
@@ -0,0 +1 @@
+zzzzz
				`@@ -0,0 +1 @@`
				`apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple apple`