# Section #5 Solutions

October 18th, 2020

Written by Brahm Capoor, Juliette Woodrow, Peter Maldonado, Kara Eng, Tori Qiu and Parth Sarin

## Strings

### String Slicing

1. s[1:6]
2. s[:2] or s[0:2]
3. s[6:9]
4. s[6:] or s[6:10]
5. s or s[6:7]
6. s[:] or s[0:10] (or just s)

### String Construction

        
def only_one_first_char(s):

if s == "":
return ""

first_char = s
output = first_char
for i in range(1, len(s)):
if(s[i] != first_char):
output += s[i]

return output

def make_gerund(s):

if len(s) >= 3 and s[len(s)-3:] == 'ing':
s = s[0:len(s)-3] + 'ly'
else:
s = s + 'ing'

return s

def put_in_middle(outer, inner):
middle = len(outer) // 2
return outer[0:middle] + inner + outer[middle:]



### Word Puzzles

          
def is_palindrome(str):
"""
>>> is_palindrome('racecar')
True
>>> is_palindrome('chris')
False
>>> is_palindrome('mehran')
False
"""
normalized = normalize(str) #this removes any non-alphabetic characters
rev = str[::-1]  # oh yeah, feel the power of slices!
return normalized == rev

def normalize(str):
"""
This function returns a "normalized" version of the string passed in.
The normalized string only includes alphabetic characters (in any Unicode
supported alphabet).  For examples, whitespace, punctuation, and digits
would not be included in the normalized string.
>>> normalize('abc 1!2 def')
'abcdef'
>>> normalize('여보, 안경')
'여보안경'
"""
normalized = ''
for ch in str:
if ch.isalpha():
normalized += ch.lower()
return normalized

def is_tridrome(word):
"""
Returns whether or not word is a tridrome, i.e., the first three letters
are the same as the last three letters.

Arguments:
word -- The word to check

>>> is_tridrome('ENTERTAINMENT')
True
>>> is_tridrome('UNDERGROUND')
True
>>> is_tridrome('DEFENESTRATION')
False
>>> is_tridrome('PYTHON')
False
>>> is_tridrome('')
False
"""

"""
We need to check that
1. the word is at least six letters, and
2. the first three letters of the word are the same as the last three
letters.

To check the first condition, we check if
len(word) >= 6

For the second condition, we can extract the first three letters of the
word with word[:3] and the last three with word[-3:]. If negative indexing
isn't comfortable, the last three letters of the word can also be sliced
with word[len(word)-3:]!

In Python, we can combine these two conditions using the and keyword,
which short-circuits (i.e., if the first condition is false, it doesn't
check the second condition).

This leads to the rather nice one-line solution:
"""
return len(word) >= 6 and word[:3] == word[-3:]

ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

def is_peaceful(word):
"""
Returns whether a word is peaceful, i.e., whether its letters appear in
sorted order.

Arguments:
word -- The word to check
>>> is_peaceful('ABORT')
True
>>> is_peaceful('FIRST')
True
>>> is_peaceful('')
True
>>> is_peaceful('PYTHON')
False
>>> is_peaceful('CHOCOLATE')
False
"""

"""
Python provides a really nice sorted function that can sort collections.
It returns a list, and you can turn a list into a string by joining together
its elements with a string:
''.join(lst)

We can, therefore, get a nice one-line solution with:
"""
# return word == ''.join(sorted(word))

"""
Or, you might prefer the iterative solution, using string comparisons to
check if a character is >= the next character (Python lets you compare
strings alphabetically!):
"""
# for i in range(len(word) - 1): # don't check the last letter
#     if word[i] >= word[i+1]:
#         return False
# return True

"""
And finally, we can use a creative application of the .find function to
obtain a character's position in the alphabet:
"""
for i in range(len(word) - 1): # don't check the last letter
"""
We search for the character in ALPHABET.
"""
curr_letter_index = ALPHABET.find(word[i])
next_letter_index = ALPHABET.find(word[i+1])
if curr_letter_index >= next_letter_index:
return False
return True

def is_stacatto(word):
"""
Returns whether a word is a stacatto word, i.e., whether the letters in
even positions are vowels.

Arguments:
word -- The word to check

>>> is_stacatto('AUTOMATIC')
True
>>> is_stacatto('POPULATE')
True
>>> is_stacatto('')
True
>>> is_stacatto('PYTHON')
False
>>> is_stacatto('SPAGHETTI')
False
"""
VOWELS = 'AEIOUY'

for i in range(len(word)):
if i % 2 == 1:
even_letter = word[i]
if not even_letter in VOWELS:
return False # we've found an even letter that isn't a vowel,
# so we can return immediately.

return True

def count_tridromes(filename):
"""
Return the number of tridromes in the file
"""
count = 0
with open(filename, 'r') as f:
for line in f:
word = line.strip().upper()
if is_tridrome(word):
count += 1
return count

def count_peaceful(filename):
"""
Return the number of peaceful words in the file
"""
count = 0
with open(filename, 'r') as f:
for line in f:
word = line.strip().upper()
if is_peaceful(word):
count += 1
return count

def count_stacatto(filename):
"""
Return the number of stacatto words in the file
"""
count = 0
with open(filename, 'r') as f:
for line in f:
word = line.strip().upper()
if is_stacatto(word):
count += 1
return count



## String Parsing

#### Introduction to String Parsing

              
def exclaim(s):
mark = s.find('!')
if mark == -1:
return ''
# scan left from the exclamation mark
i = mark - 1
while i >= 0 and s[i].isalpha():
i -= 1
word = s[i + 1: mark + 1]
if len(word) >= 2:
return word
return ''

def vowels(s):
colon = s.find(':')
if colon == -1:
return ''
# scan right from the colon
i = colon + 1
while i < len(s) and s[i].lower() in 'aeiou':
i += 1
word = s[colon + 1:i]
return word



## Finding the smallest unique positive integer



def smallest_uniq_pos_int(filename):
all_positive_nums = []
seen_before = []
not_unique = []
with open(filename, 'r') as f:
for line in f:
num = int(line)
if num >= 1:
all_positive_nums.append(num)
if num in seen_before:
not_unique.append(num)
else:
seen_before.append(num)

smallest = 0
for num in all_positive_nums:
if num not in not_unique and (num < smallest or smallest == 0):
smallest = num

return smallest



#### Extracting Email Hostnames

This is one possible approach to decomposing this problem. Think about other ways to structure your solution!

              
def extract_hostname(line):
at_index = line.find('@')
if at_index == -1:
return ''
# scan forward till the end of the word or line
i = at_index + 1
while i < len(line) and line[i] != ' ':
if line[i].isalpha() or line[i] =='.':
i += 1
else:

hostname = line[at_index + 1: i]

if len(hostname) < 4 or not '.' in hostname:
return ''

return hostname

def extract_all_hostnames(filename):
hostnames = []
with open(filename, 'r') as f:
for line in f:
hostname = extract_hostname(line)
if hostname != '' and not hostname in hostnames:
hostnames.append(hostname)
hostnames = sorted(hostnames)
return hostnames



## A much better email parser

This is the solution to parts 1 and 2 of this problem.

          
import sys

def is_email_char(ch):
return ch.isalnum() or ch in ['.', '-', '_']

def parse_emails(s, max_per_line, permitted_host):
emails = []
search = 0
while (max_per_line == -1 or len(emails) < max_per_line) and search < len(s):
at = s.find('@', search)
if at == -1:
break
start = at - 1
while start >= 0 and is_email_char(s[start]):
start -= 1
end = at + 1
while end < len(s) and is_email_char(s[end]):
end += 1
host = s[at+1:end]
if permitted_host == '' or host == permitted_host:
email = s[start+1:end]
if host.find(".") != -1 and at - start > 0:
emails.append(email)
search = end
return emails

def parse_all_emails(filename, max_per_line, permitted_host):
parsed = []
with open(filename, 'r') as f:
for line in f:
parsed.extend(parse_emails(line, max_per_line, permitted_host))
return parsed

def main():
args = sys.argv[1:]
if len(args) == 1:
print(parse_all_emails(args, -1, ''))
else:
if args == '-max':
max_per_line = int(args)
parse_all_emails(args, max_per_line, '')
else:
permitted_host = args
parse_all_emails(args, -1, permitted_host)

if __name__ == "__main__":
main()