############################################################ ## ## python 3 code for applied modern algebra (spring 2020) ## kimball martin ## ## ## this is some helper code for some of the lab 4. this code is not ## necessarily meant to be the most efficient or elegant possible, but it ## is meant to be easy to understand given what we did in class. ## please notify me of any bugs/typos spotted. ############################################################ ############################################################ ## ## for lab 4 ## ############################################################ # our 5-bit encoding scheme -- letters A-Z correspond to number 0-25 # and 0-5 correspond to the numerical codes 26-31 code = [chr(65+i) for i in range(26)] + [str(i) for i in range(6)] # given an alphabetic string s (or semi-numeric--digits 0-5 are also allowed) # return the encoding as a string of bits # if the string includes other characters, they will be ignored def encode(s): bits = [] for x in s: for i in range(32): if code[i] == x: for j in range(5): # append the j-th "bit" of i for j < 5 bits.append(((1 << j) & i) >> j) break return bits # given a list of bits as encoded with the encode function, # return the decoded string (i.e., do the inverse to encode) def decode(bits): s = '' for i in range(len(bits)//5): num = 0 # this will be the numerical value for a 5-bit block for j in range(5): num += (1 << j)*bits[5*i+j] s += code[num] return s ############################################################ ## ## some code from lab 3 ## ############################################################ ############################################################ # returns only the alphabetic part of a string # i.e., strips spaces, punctuation, numbers, etc. ############################################################ def onlyalpha(s): t = '' for x in s: if x.isalpha(): t = t + x return t ############################################################ # perform frequency count on string s # return result as list of length 26 whose i-th element is # the number of times the i-th letter appears ############################################################ def freq_count(s): su = s.upper() count = [] for i in range(65,91): freq = 0 for x in su: if x == chr(i): freq = freq+1 count.append(freq) return count ############################################################ # compute frequency distribution for string s # this is the the vector of frequency counts divided by # the length of the sring ############################################################ def freq_dist(s): freq = freq_count(s) c = 1/sum(freq) for i in range(len(freq)): freq[i] = freq[i]*c return freq ############################################################ # letters in the English alphabet ############################################################ # upper-case letters, e.g., Letter[1] = 'B' Letter = [chr(i) for i in range(65,91)] # lower-case letters, e.g., letter[1] = 'b' letter = [chr(i) for i in range(97,123)] ############################################################ # frequencies of letters in English plaintext ############################################################ EF = [.08167, .01492, .02782, .04253, .12702, .02228, .02015, .06094, .06966, \ .00153, .00772, .04025, .02406, .06749, .07507, .01929, .00095, .05987, \ .06327, .09056, .02758, .00978, .02360, .00150, .01974, .00074 ] ############################################################ # sample text for encryption, taken from # https://en.wikipedia.org/wiki/National_Institute_of_Standards_and_Technology ############################################################ text = 'THENATIONALINSTITUTEOFSTANDARDSANDTECHNOLOGYNISTISAPHYSICALSCIENCESLABORATORYANDANONREGULATORYAGENCYOFTHEUNITEDSTATESDEPARTMENTOFCOMMERCEITSMISSIONISTOPROMOTEINNOVATIONANDINDUSTRIALCOMPETITIVENESSNISTSACTIVITIESAREORGANIZEDINTOLABORATORYPROGRAMSTHATINCLUDENANOSCALESCIENCEANDTECHNOLOGYENGINEERINGINFORMATIONTECHNOLOGYNEUTRONRESEARCHMATERIALMEASUREMENTANDPHYSICALMEASUREMENTITWASFORMERLYKNOWNASTHENATIONALBUREAUOFSTANDARDS'