Section #5 Solutions

October 18th, 2020

Written by Brahm Capoor, Juliette Woodrow, Peter Maldonado, Kara Eng, Tori Qiu and Parth Sarin

Strings

String Slicing

s[1:6]
s[:2] or s[0:2]
s[6:9]
s[6:] or s[6:10]
s[6] or s[6:7]
s[:] or s[0:10] (or just s)

String Construction

        
def only_one_first_char(s):
	
	if s == "":
		return ""

	first_char = s[0]
	output = first_char
	for i in range(1, len(s)):
		if(s[i] != first_char):
			output += s[i]

	return output


def make_gerund(s):

	#if it already ends in ing, add an 'ly' instead 
	if len(s) >= 3 and s[len(s)-3:] == 'ing':
		s = s[0:len(s)-3] + 'ly'
	else:
		s = s + 'ing'

	return s



def put_in_middle(outer, inner):
	middle = len(outer) // 2
	return outer[0:middle] + inner + outer[middle:]

Word Puzzles

          
def is_palindrome(str):
    """
    >>> is_palindrome('racecar')
    True
    >>> is_palindrome('chris')
    False
    >>> is_palindrome('mehran')
    False
    """
    normalized = normalize(str) #this removes any non-alphabetic characters
    rev = str[::-1]  # oh yeah, feel the power of slices!
    return normalized == rev


def normalize(str):
    """
    This function returns a "normalized" version of the string passed in.
    The normalized string only includes alphabetic characters (in any Unicode
    supported alphabet).  For examples, whitespace, punctuation, and digits
    would not be included in the normalized string.
    >>> normalize('abc 1!2 def')
    'abcdef'
    >>> normalize('여보, 안경')
    '여보안경'
    """
    normalized = ''
    for ch in str:
        if ch.isalpha():
            normalized += ch.lower()
    return normalized


def is_tridrome(word):
    """
    Returns whether or not word is a tridrome, i.e., the first three letters
    are the same as the last three letters.

    Arguments:
        word -- The word to check

    >>> is_tridrome('ENTERTAINMENT')
    True
    >>> is_tridrome('UNDERGROUND')
    True
    >>> is_tridrome('DEFENESTRATION')
    False
    >>> is_tridrome('PYTHON')
    False
    >>> is_tridrome('')
    False
    """

    """
    We need to check that
    1. the word is at least six letters, and
    2. the first three letters of the word are the same as the last three 
       letters.

    To check the first condition, we check if
        len(word) >= 6

    For the second condition, we can extract the first three letters of the 
    word with word[:3] and the last three with word[-3:]. If negative indexing
    isn't comfortable, the last three letters of the word can also be sliced
    with word[len(word)-3:]!

    In Python, we can combine these two conditions using the `and` keyword,
    which short-circuits (i.e., if the first condition is false, it doesn't
    check the second condition).

    This leads to the rather nice one-line solution:
    """
    return len(word) >= 6 and word[:3] == word[-3:]


ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

def is_peaceful(word):
    """
    Returns whether a word is peaceful, i.e., whether its letters appear in
    sorted order.

    Arguments:
        word -- The word to check
    >>> is_peaceful('ABORT')
    True
    >>> is_peaceful('FIRST')
    True
    >>> is_peaceful('')
    True
    >>> is_peaceful('PYTHON')
    False
    >>> is_peaceful('CHOCOLATE')
    False
    """

    """
    Python provides a really nice `sorted` function that can sort collections.
    It returns a list, and you can turn a list into a string by joining together
    its elements with a string:
        ''.join(lst)

    We can, therefore, get a nice one-line solution with:
    """
    # return word == ''.join(sorted(word))

    """
    Or, you might prefer the iterative solution, using string comparisons to 
    check if a character is >= the next character (Python lets you compare 
    strings alphabetically!):
    """
    # for i in range(len(word) - 1): # don't check the last letter
    #     if word[i] >= word[i+1]:
    #         return False
    # return True

    """
    And finally, we can use a creative application of the `.find` function to 
    obtain a character's position in the alphabet:
    """
    for i in range(len(word) - 1): # don't check the last letter
        """
        We search for the character in ALPHABET. 
        """
        curr_letter_index = ALPHABET.find(word[i])
        next_letter_index = ALPHABET.find(word[i+1])
        if curr_letter_index >= next_letter_index:
            return False
    return True


def is_stacatto(word):
    """
    Returns whether a word is a stacatto word, i.e., whether the letters in
    even positions are vowels.

    Arguments:
        word -- The word to check

    >>> is_stacatto('AUTOMATIC')
    True
    >>> is_stacatto('POPULATE')
    True
    >>> is_stacatto('')
    True
    >>> is_stacatto('PYTHON')
    False
    >>> is_stacatto('SPAGHETTI')
    False
    """
    VOWELS = 'AEIOUY'

    for i in range(len(word)):
        if i % 2 == 1:
            even_letter = word[i]
            if not even_letter in VOWELS:
                return False # we've found an even letter that isn't a vowel,
                             # so we can return immediately.

    return True


def count_tridromes(filename):
    """
    Return the number of tridromes in the file
    """
    count = 0
    with open(filename, 'r') as f:
        for line in f:
            word = line.strip().upper()
            if is_tridrome(word):
                count += 1
    return count


def count_peaceful(filename):
    """
    Return the number of peaceful words in the file
    """
    count = 0
    with open(filename, 'r') as f:
        for line in f:
            word = line.strip().upper()
            if is_peaceful(word):
                count += 1
    return count


def count_stacatto(filename):
    """
    Return the number of stacatto words in the file
    """
    count = 0
    with open(filename, 'r') as f:
        for line in f:
            word = line.strip().upper()
            if is_stacatto(word):
                count += 1
    return count

String Parsing

Introduction to String Parsing

              
def exclaim(s):
    mark = s.find('!')
    if mark == -1:
        return ''
    # scan left from the exclamation mark
    i = mark - 1
    while i >= 0 and s[i].isalpha():
        i -= 1
    word = s[i + 1: mark + 1]
    if len(word) >= 2:
        return word
    return ''

def vowels(s):
    colon = s.find(':')
    if colon == -1:
        return ''
    # scan right from the colon
    i = colon + 1
    while i < len(s) and s[i].lower() in 'aeiou':
        i += 1
    word = s[colon + 1:i]
    return word

Finding the smallest unique positive integer

          
          
  def smallest_uniq_pos_int(filename):
    all_positive_nums = []
    seen_before = []
    not_unique = []
    with open(filename, 'r') as f:
      for line in f:
        num = int(line)
        if num >= 1:
          all_positive_nums.append(num)
          if num in seen_before:
            not_unique.append(num)
          else:
            seen_before.append(num)

    smallest = 0
    for num in all_positive_nums:
        if num not in not_unique and (num < smallest or smallest == 0):
            smallest = num 

    return smallest

Extracting Email Hostnames

This is one possible approach to decomposing this problem. Think about other ways to structure your solution!

              
def extract_hostname(line):
    at_index = line.find('@')
    if at_index == -1:
        return ''
    # scan forward till the end of the word or line
    i = at_index + 1
    while i < len(line) and line[i] != ' ':
        if line[i].isalpha() or line[i] =='.':
            i += 1
        else:
            break # end the loop immediately - we'll talk about this more next week!

    hostname = line[at_index + 1: i]

    if len(hostname) < 4 or not '.' in hostname:
        return ''

    return hostname

def extract_all_hostnames(filename):
    hostnames = []
    with open(filename, 'r') as f:
        for line in f:
            hostname = extract_hostname(line)
            if hostname != '' and not hostname in hostnames:
                hostnames.append(hostname)
    hostnames = sorted(hostnames)
    return hostnames

A much better email parser

This is the solution to parts 1 and 2 of this problem.

          
import sys

def is_email_char(ch):
    return ch.isalnum() or ch in ['.', '-', '_']

def parse_emails(s, max_per_line, permitted_host):
    emails = []
    search = 0
    while (max_per_line == -1 or len(emails) < max_per_line) and search < len(s):
        at = s.find('@', search)
        if at == -1:
            break
        start = at - 1
        while start >= 0 and is_email_char(s[start]):
            start -= 1
        end = at + 1
        while end < len(s) and is_email_char(s[end]):
            end += 1
        host = s[at+1:end]
        if permitted_host == '' or host == permitted_host:
            email = s[start+1:end]
            if host.find(".") != -1 and at - start > 0:
                emails.append(email)
        search = end
    return emails

def parse_all_emails(filename, max_per_line, permitted_host):
    parsed = []
    with open(filename, 'r') as f:
        for line in f:
            parsed.extend(parse_emails(line, max_per_line, permitted_host))
    return parsed

def main():
    args = sys.argv[1:]
    if len(args) == 1:
        print(parse_all_emails(args[0], -1, ''))
    else:
        if args[0] == '-max':
            max_per_line = int(args[1])
            parse_all_emails(args[2], max_per_line, '')
        else:
            permitted_host = args[1]
            parse_all_emails(args[2], -1, permitted_host)

if __name__ == "__main__":
    main()