Code Developed in CSCI-1100

Lecture 1

Module: lec1_three_doubles — Finds three consecutive pairs of double letters

Find all words containing three consecutive pairs of double letters in a file of all English words located at:

Modules used: urllib

Author: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>

Returns: All words matching condition and the count of found words

Pseudo Code:

open the file from the web with all the words in English
 
for each word in the file:
    for all positions l in the word
        if letters at positions (l and l+1) and (l+2 and l+3) and
        (l+4 and l+5) are the same then
            output word and increment the count

Code:

""" Find all words containing three consecutive pairs of double letters 
in a file of all English words located at:

           http://thinkpython.com/code/words.txt

**Modules used:**  :py:mod:`urllib` 

**Author**: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>

**Returns:** All words matching condition and the count of found words

**Pseudo Code**::

   open the file from the web with all the words in English
    
   for each word in the file:
       for all positions l in the word
           if letters at positions (l and l+1) and (l+2 and l+3) and
           (l+4 and l+5) are the same then
               output word and increment the count

"""

__version__ = '1'


import urllib

def three_double(word):
    """ Returns True if the word contains three consecutive pairs of
    double letters and False otherwise. 
        
    """
    for l in range(len(word)-5):
        if word[l] == word[l+1] and \
           word[l+2]==word[l+3] and \
           word[l+4]==word[l+5]:
            return True
    return False

# Comments that fit in a single line can be put in this format.
# Anything after a single pound sign is ignored.
# Main body of the program starts here

word_url = 'http://thinkpython.com/code/words.txt'
word_file = urllib.urlopen(word_url)

count = 0
for word in word_file:
    word = word.strip().strip('\n')
    if three_double(word):
        print word
        count = count + 1
        
if count == 0:
    print 'No words found'
else:
    print count, 'words are found'

Lecture 3

Module: lec3_function_example — Example functions from geometry

This is a program for computing area and volume of a cylinder

Author: Sibel Adali (adalis)

Code:

"""
    This is a program for computing area and volume
    of a cylinder
    
    Author: Sibel Adali (adalis)

"""


import math

def area_circle(radius):
    return math.pi * radius**2

def volume_cylinder(radius, height):
    area = area_circle(radius)
    return area * height

def area_cylinder(radius, height):    
    circle_area = area_circle(radius)
    height_area = 2 * radius * math.pi * height
    return 2 * circle_area + height_area

def print_info(radius, height):
    x = area_cylinder(radius,height)
    print
    print "area of a cylinder is with dimensions", radius, height, "is", x
    print "area of a circle with radius", radius, "is", area_circle(radius)
    print "volume of a cylinder with radius", radius, "and height", height,"is", volume_cylinder(radius, height)
    print
    
######################
# Main program here
######################

print_info(4,10)
print_info(5,10)
    

Lecture 5

Module: lec5_stringformatting — Example of string formatting

Illustrates the use of formatted strings and functions.

Code:

""" Illustrates the use of formatted strings and functions. """

def thankyou(giftgiver, gift, goodgift):
    msg = "Dear %s:\n" \
        " Thank you for your thoughtful gift of %s.\n" \
        " It was very nice of you to think of me.\n"
    middlemessage = ""
    
    if goodgift == 'yes':
        middlemessage = "It is going to get really good use.\n"

    endmessage = "Very sincerely yours, Me."
    return  (msg+middlemessage+endmessage) %(giftgiver, gift)


print thankyou('Josh', 'Microsoft swag', 'yes')

print thankyou('Grandma', 'sweater', 'no')

Lecture 7

Module: lec7_middle_value — Finds the middle value in a list

Illustrates list indexing. Find the middle value in a list. The median value is the middle value in the sorted version of the list. The median function has a side effect that it changes the input list.

Code:

""" Illustrates list indexing. Find the middle value in a list.
    The median value is the middle value in the sorted version of
    the list. The median function has a side effect that it changes
    the input list.
    
"""


def middlevalue(L):
    """Return the middle value in the list. If the list is 
    of even length, then the average of the middle two values
    is returned.
    
    """
    
    idx = len(L)/2      
    if len(L)%2 == 1: ##odd length list
        return L[idx]
    elif len(L)>= 2:
        return ( L[idx-1] + L[idx] )/2.
    else:
        return 0

def median(L):
    L.sort()
    return middlevalue(L)


### Test code
mylist = []
print mylist, "middle value", middlevalue(mylist)

mylist = [1,2,3]
print mylist, "middle value", middlevalue(mylist)

mylist = [1,2,3,4,5]
print mylist, "middle value", middlevalue(mylist)

mylist = [1,2,3,4]
print mylist, "middle value", middlevalue(mylist)

Lecture 8

Module: lec8_multireturn — Tuple example, function returning multiple values

Illustrates the use of tuples to return two values at once, read the values using multiple assignment.

Code:

""" Illustrates the use of tuples to return two values at once,
    read the values using multiple assignment.
    
"""

def next_years_population(bpop, fpop):
    """ Returns the next years bunny and fox population. """
    
    bpop_next = (10*bpop)/(1+0.1*bpop)-0.05*bpop*fpop
    fpop_next = 0.4 * fpop +0.02*fpop*bpop
    bpop_next = int(max(0, bpop_next))
    fpop_next = int(max(0, fpop_next))
    return (bpop_next, fpop_next)

b = 100
f = 5

bnext, fnext = next_years_population(b,f)
print "Next years population", bnext, fnext

b,f = bnext, fnext
bnext, fnext = next_years_population(b,f)

print "Next years population", bnext, fnext

Lecture 9

Module: lec9_circle_overlap — Checks for overlapping circles and valid input

Program for testing whether two circles intersect. Illustrates how to validate input so that no input can break the program.

Consider adding: 1. Allow floating point values (isdigit() only checks for integers) 2. Return detailed information about why the input was wrong

Code:

""" Program for testing whether two circles intersect.
    Illustrates how to validate input so that no input can
    break the program.
    
    Consider adding: 
    1. Allow floating point values (isdigit() only checks for integers)
    2. Return detailed information about why the input was wrong
    
"""


def distance( (x1,y1), (x2,y2) ):
    """ Returns the distance between two points. """
    return ((x1-x2)**2 + (y1-y2)**2)**(0.5)


def overlap_circle( (x1,y1,r1), (x2,y2,r2) ):
    """ Returns true if two circles overlap or one circle is 
    within the other one. False is returned otherwise.
    
    """
    
    dist = distance( (x1,y1), (x2,y2) )
    if dist <= r1+r2:
        return True
    else:
        return False
    

def valid_input(m):
    """ Takes as input a list of values. Returns true if
    there are two values, each value is an integer and the 
    final value is greater than zero. Returns false otherwise.
    
    """
    if len(m) < 3:
        return False
    elif not (m[0].isdigit() and m[1].isdigit() and m[2].isdigit()):
        return False
    elif int(m[2]) <= 0:
        return False
    else:
        return True


if __name__ == "__main__":
    ## this is the main body of the program.
    ## it best to keep the main program part small for debugging 

    line1 = raw_input('Circle 1 x,y,r ==> ')
    line2 = raw_input('Circle 2 x,y,r ==> ')

    m1 = line1.split(',')
    m2 = line2.split(',')

    if valid_input(m1) and valid_input(m2):
        x1,y1,r1 = int(m[0]), int(m[1]), int(m[2])
        x2,y2,r2 = int(m[0]), int(m[1]), int(m[2])

        if overlap_circle( (x1,y1,r1), (x2,y2,r2) ):
            print "Circles are overlapping"
        else:
            print "Circles are not overlapping"
    else:
        print "You entered bad input."

Module: lec9_compare_three — Orders three values in sorted order

Code:

def order_vals(x,y,z):
    if x <= y:
        if y <= z:
            return (x,y,z)
        elif x <= z:  ##  x<=y,  z<y
            return (x,z,y)
        else:
            return(z,x,y)
    else: ## y < x
        if x <= z:
            return (y,x,z)
        elif y <= z:   ## y < x, z < x
            return (y,z,x)
        else:
            return (z,y,x)
        
        
def order_vals2(x,y,z):
    if  x<=y and y <= z:
        return (x,y,z)
    elif x<=y and z <= y:
        return (x,z,y)
    elif z<=x and x<= y:
        return (z,x,y)
    elif y<=x and x<= z:
        return (y,x,z)
    elif y<=z and z<= x:
        return (y,z,x)
    else:
        return (z,y,x)

if __name__ == "__main__":
    x = int(raw_input("Val 1 ===> "))
    y = int(raw_input("Val 2 ===> "))
    z = int(raw_input("Val 3 ===> "))
    print order_vals2(x,y,z)

Module: lec9_overlap — Checks for overlapping rectangles and ordering of semesters

This module illustrates checking for overlapping rectangles and checking for which semester comes first.

Code:

""" This module illustrates checking for overlapping 
    rectangles and checking for which semester comes first. 

"""

def no_overlap_rectangle( (x1,y1,x2,y2), (x3,y3,x4,y4) ):
    """ Returns false if two rectangles do not overlap. """
    if x4 < x1 or x2 < x3 or y4 < y1 or y2 < y3:
        return True
    else:
        return False
    
def overlap_rectangle( (x1,y1,x2,y2), (x3,y3,x4,y4) ):
    """ Returns true if two rectangles overlap. """
    if not (x4 < x1 or x2 < x3 or y4 < y1 or y2 < y3) :
        return True
    return False

    
def semester_compare( (s1,y1), (s2,y2) ):
    """ Returns 1 is the first semester comes before the second, 
    2 if the second semester comes before the first,
    0 if the two are equal. Spring semester comes before fall 
    in the same year.
    
    """
    s1 = s1.capitalize()
    s2 = s2.capitalize()
    if y1 < y2:
        return 1
    elif y2 < y1:
        return 2
    else: ## y1 == y2
        if s1 == s2:
            return 0
        elif s1 == 'Spring':
            return 1
        else:
            return 2
    
    

Module: lec9_shortcut_example — Example for shorcutting boolean expressions

Illustrates the use of shortcutting in Boolean expressions. If we converted x to integer before checking if it contains a number, we would get an error.

Due to shortcutting, if x.isdigit() is false, we never run the next part that converts the string to integer.

Code:

""" Illustrates the use of shortcutting in Boolean expressions. 
    If we converted x to integer before checking if it contains a
    number, we would get an error. 
    
    Due to shortcutting, if x.isdigit() is false, we never run
    the next part that converts the string to integer.
    
"""

if __name__ == "__main__":
    x = raw_input("Enter a positive number ==> ")

    if x.isdigit() and int(x) > 0:
        print "ok"
    else:
        print "bad input"

Lecture 10

Module: lec10_compare_listitems_sidebyside — Printing items in a list

Find all values in the list that are less than the value before.

Code:

""" Find all values in the list that are 
    less than the value before.
    
"""

co2_levels = [ (2001, 320.03), (2003, 322.16), \
               (2004, 328.07),\
               (2006, 323.91), (2008, 341.47), \
               (2009, 348.92),\
               (2010, 357.29), (2011, 363.77), \
               (2012, 361.51),\
               (2013, 300.47) ]

i = 0
while (i < len(co2_levels)-1):
    year1, val1 = co2_levels[i]
    year2, val2 = co2_levels[i+1]
    if val2 < val1:
        print "Went down from", year1, "to", year2
    i += 1

Module: lec10_loopthroughlist — Example for accessing items in a list

Find the number of values in the list that are greater than the average value.

Code:

""" Find the number of values in the list that
    are greater than the average value.
    
"""

co2_levels = [ (2001, 320.03), (2003, 322.16), \
               (2004, 328.07),\
               (2006, 323.91), (2008, 341.47), \
               (2009, 348.92),\
               (2010, 357.29), (2011, 363.77), \
               (2012, 361.51),\
               (2013, 382.47) ]

## Code to find the total/average value
i = 0
sum_levels = 0
while i < len(co2_levels):
    year, val = co2_levels[i]
    sum_levels += val
    i += 1
    
avg_value = sum_levels/len(co2_levels)
print "The average value of co2_level is", avg_value

## Code to find values higher than the average
i = 0
count = 0
while (i < len(co2_levels)):
    year, val = co2_levels[i]
    if val > avg_value:
        count += 1
    i += 1
    
print "The number of values greater than average is", count

Module: lec10_count_down — Acessing items in a list backwards

Printing list items in backwards order.

Code:

""" Printing list items in backwards order.

"""

co2_levels = [ (2001, 320.03), (2003, 322.16), \
               (2004, 328.07),\
               (2006, 323.91), (2008, 341.47), \
               (2009, 348.92),\
               (2010, 357.29), (2011, 363.77), \
               (2012, 361.51),\
               (2013, 382.47) ]

i = len(co2_levels)-1
while (i >= 0):
    print co2_levels[i]
    i -= 1

Module: lec10_convert_to_list — Converting a list of strings to list of ints

Program to convert a string containing a list of integers to a list of integers

Code:

"""Program to convert a string containing a list of
   integers to a list of integers
   
"""

x = '1,2,3,4'

myx = x.split(',')

print myx

i = 0
while (i < len(myx)):
    myx[i] = int( myx[i] )
    i+=1

print myx

## myx = [1,2,3,4]

Module: lec10_xmas_tree — Print small Christmas tree

Print a Christmas tree –I know it is too early

Code:

""" Print a Christmas tree --I know it is too early

"""

i = 1
while (i < 11):
    print " "*(4-i/2) + "*"*i
    i += 2

print "   ***\n" * 3

Lecture 11

Module: lec11_list_aliasing_functions — Lists as function arguments

Example illustrates how function parameters are aliases of lists. In other words, if you pass a list as an argument to a list, and the change the list in the function, the original list is also changed.

Code:

""" Example illustrates how function parameters are aliases
    of lists. In other words, if you pass a list as an argument
    to a list, and the change the list in the function, the
    original list is also changed.
    
"""



def smallest_in_list(L):
    """This L will be an alias of the list used as an argument.
    
    """
    L.sort()
    if len(L)>0:
        return L[0] 
    else:
        None

def worse_version_of_smallest_in_list():
    """Bad because it uses a global variable l1. 
    Do not write code like this, it is a type of 
    hard coding and very hard to debug.
    
    """
    
    l1.sort()
    if len(l1)>0:
        return l1[0] 
    else:
        None

def fine_version_of_smallest_in_list(l1):
    """Fine, because l1 now becomes a local variable.
       Function always uses the most local definition of a 
       variable.
       
    """
    
    l1.sort()
    if len(l1)>0:
        return l1[0] 
    else:
        None



l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']

print "before calling the function", l1

min_val = smallest_in_list(l1)
print "The minimum value is", min_val

print "after calling the function", l1
print "the function had a side effect of changing the list."

Module: lec11_loop_example — Different ways to loop through lists

This example illustrates the use of while and
for loops.

Note that the for loop simply assigns the variable item to an element of the for loop:

l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']

for item in l1:
    print item

This for loop is equivalent to executing:

item= 'cat' 
item= 'dog' 
item= 'zebra' 
item= 'bat' 
item= 'fish'
item= 'kangaroo'
item= 'baluga whale' 

Code:

""" This example illustrates the use of while and 
    for loops.

   Note that the for loop simply assigns the    
   variable item to an element of the for loop::
   
      l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
  
      for item in l1:
          print item

   This for loop is equivalent to executing::

      item= 'cat' 
      item= 'dog' 
      item= 'zebra' 
      item= 'bat' 
      item= 'fish'
      item= 'kangaroo'
      item= 'baluga whale' 


"""

l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']


print "First way: basic for loop"
print "list before the loop", l1
mystr = ""
for item in l1:
    item = item.capitalize()
    print item
print "list after the loop, see items did not change", l1
print

##################################################
## Given the items in the list are strings, the 
## assignment actually copies the items. Hence,
## the above loop will not change the items. To
## accomplish this, you need to use indexing, such
## as l1[i] = l1[i].capitalize()
## There are a number of ways you can get an index.
##################################################

print "First way, with an index that we create"
print "This is not a good way, don't do this."
print "list before the loop", l1
mystr = ""
i = 0
for item in l1:
    l1[i] = l1[i].capitalize()
    print "%d. %s" %(i+1, l1[i])
    i += 1
print "list after the loop, see items changed", l1
print



##################################################
## Since the while loop already creates indices
## we can just the while loop for the same process
## as in the previous loop
##################################################


l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']

print "Second way, use a while loop to directly create an index"
i = 0
print "list before the loop", l1
while i < len(l1):
    l1[i] = l1[i].capitalize()
    print "%d. %s" %(i+1, l1[i])
    i += 1
print "list after the loop, see items changed", l1
print
    
    
    
##################################################
## The third way is to create the indices using
## the range function first. The range function
## returns a list
## >>> range(len(l1))
## [0, 1, 2, 3, 4, 5, 6]
##
## for i in range(len(l1))
##
## is the same of executing the following assignments
##
## i=0
## i=1
## i=2
## i=3
## i=4
## i=5
## i=6
##
## Now, we can use these indices to access the list items
##################################################

l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']


print "Third way, using a for loop of indices"
print "list before the loop", l1

for i in range(len(l1)):
    l1[i] = l1[i].capitalize()
    print "%d. %s" %(i+1, l1[i])
print "list after the loop, see items changed", l1
print


    
##################################################
## The final way is to create indices and list items
## together. We have not yet seen this, but it is 
## very useful. 
##
## for item in enumerate(l1):
##      print item
## is the same of executing the following assignments
##
##
## item = (0, 'Cat')
## item = (1, 'Dog')
## item = (2, 'Zebra')
## item = (3, 'Bat')
## item = (4, 'Fish')
## item = (5, 'Kangaroo')
## item = (6, 'Baluga whale')
##
## You get a list item and its index together. 
## Given you get a tuple, you can directly access them
## using the following format:
##################################################

l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']


print "Fourth way, using enumerate"
print "list before the loop", l1

for (i, val) in enumerate(l1):
    l1[i] = l1[i].capitalize()
    print "%d. %s" %(i+1, val)
print "list after the loop, see items changed", l1
print

Module: lec11_loop_listoftuples — Looping through lists of tuples with for loop

Example for loop using a list of tuples. Since each element is a tuple, it is a copy of the elements in the list (you cannot change individual components of tuples):

for item in co2_levels:
    print item

is the same as executing:

item=(2001, 320.03)
item=(2003, 322.16)
item=(2004, 328.07)
item=(2006, 323.91)
item=(2008, 341.47)
item=(2009, 348.92)
item=(2010, 357.29)
item=(2011, 363.77)
item=(2012, 361.51)
item=(2013, 300.47)

Code:

""" Example for loop using a list of tuples. Since
    each element is a tuple, it is a copy of the 
    elements in the list (you cannot change 
    individual components of tuples)::

      for item in co2_levels:
          print item
        
    is the same as executing::
    
      item=(2001, 320.03)
      item=(2003, 322.16)
      item=(2004, 328.07)
      item=(2006, 323.91)
      item=(2008, 341.47)
      item=(2009, 348.92)
      item=(2010, 357.29)
      item=(2011, 363.77)
      item=(2012, 361.51)
      item=(2013, 300.47)

"""

co2_levels = [ (2001, 320.03), (2003, 322.16), \
               (2004, 328.07),\
               (2006, 323.91), (2008, 341.47), \
               (2009, 348.92),\
               (2010, 357.29), (2011, 363.77), \
               (2012, 361.51),\
               (2013, 300.47) ]

##print all values greater than 350.

for item in co2_levels:
    if item[1] > 350:
        print item[0], item[1]
    

Module: lec11_loop_listoflists — Looping through lists of lists with for loop

Loop example with a list of lists. This is different because each element in the list is a list itself. Hence the expression:

for item in mylist:
    print item

would execute:

item = [1]
item = [2]
item = [3]

At each point, item is a list. Hence, it is not a copy of the list inside mylist, but an alias.

As a result, if I change item, the original list also changes.

Code:

""" Loop example with a list of lists. This is different
    because each element in the list is a list itself. Hence
    the expression::
    
      for item in mylist:
          print item
        
    would execute::
    
      item = [1]
      item = [2]
      item = [3]
    
    At each point, item is a list. Hence, it is not a copy
    of the list inside mylist, but an alias.
    
    As a result, if I change item, the original list also changes.

"""
mylist = [ [1], [2], [3] ]

print "before the loop", mylist

for item in mylist:
    item[0] *= 4
    
print "after the loop, see the list has changed", mylist

Lecture 12

Module: lec12_loop_comparison — While versus for loops

Simple example to compare the method to use loops to index lists.

Code:

""" Simple example to compare the method to use loops to
    index lists.

"""

animals = ['cat', 'skunk', 'deer', 'chipmunk', 'slugs']
i = 0
while i < len(animals):
    print i, animals[i]
    i += 1
    
    
#####

print "How does the for loop work"

for i in range(len(animals)):
    print i, animals[i]
         

Module: lec12_double_consecutive — Words with two consecutive double letters

Program to illustrate how to check if a word has two consecutive double letters. We also illustrate how to read user input repeatedly using a while loop:

read from the user a word
go through every position i in the word:
    check if letters i, i+1 are the same
    and if letters i+2 and i+3 are the same

Shows two examples functions to implement the same function to illustrate that any loop is exited as soon as the function executes a return.

Code:

"""
    Program to illustrate how to check if a word has two consecutive
    double letters. We also illustrate how to read user input
    repeatedly using a while loop::

       read from the user a word
       go through every position i in the word:
           check if letters i, i+1 are the same
           and if letters i+2 and i+3 are the same


    Shows two examples functions to implement the same function
    to illustrate that any loop is exited as soon as the function
    executes a return.

"""

def check_two_consecutive_double(iword):
    """ Example function shows how to track whether a condition is 
        True, and return it. 

        If two consecutive double letters is found at any position
        in the word, the variable isdouble is set to True. Otherwise
        its original value of False is returned.

    """

    isdouble = False
    for i in range(0, len(iword)-3):
        if iword[i] == iword[i+1] and\
           iword[i+2] == iword[i+3]:
            isdouble = True    
    return isdouble

def check_two_consecutive_double2(iword):
    """ Example shows how to return True as soon as the function finds
        the condition is True. The last line of the function is
        reached only if the condition being checked was never True. In
        this case, we can safely return False.

        If two consecutive double letters is found at any position
        in the word, return True immediately. If the condition is never true,
        then return False.

    """

    iword = iword.lower()
    for i in range(0, len(iword)-3):
        ##test code
        ##print iword[i], iword[i+1], iword[i+2], iword[i+3]
        if iword[i] == iword[i+1] and\
           iword[i+2] == iword[i+3]:
            return True    
    return False


###############
## Testing the program
###############


if __name__ == '__main__':
    iword = raw_input("Please enter a word (stop to end) ==> ")

    while iword.lower() != 'stop':
        isdouble = check_two_consecutive_double2(iword)
        if isdouble:
            print iword, "is a double consecutive letter word"
        else:
            print iword, "is no good"
        iword = raw_input("Please enter a word (stop to end) ==> ")   

Module: lec12_local_maxima — Lists as function arguments

This program shows how to find all the local maxima in a list

Local maxima are all the values that are greater than the values immediate before and after them in the list (first and last entries cannot be a local maxima)

Write a function to return the list of all local maxima

Code:

"""
    This program shows how to find all the local maxima in a list

    Local maxima are all the values that are greater than the values 
    immediate before and after them in the list (first and last entries
    cannot be a local maxima)

    Write a function to return the list of all local maxima
"""

def local_maxima(L):
    """ Find the local maxima by going through all elements 
        in the list, excluding the first and last elements.

    """

    lm = [] ##the list of all local maxima found.
    
    for i in range(1, len(L)-1):
        if L[i-1] < L[i] and L[i] > L[i+1]:
            lm.append( L[i] )
        
    return lm



## Some reasonable test cases are here

L = [0, 2, 3, 6, 4, 8, 2, 3, 5, 1]

L = [0,1,2]

L = []

L = [0]

L = [0,1]

print "Local maxima of", L
print local_maxima(L)

Module: lec12_closest_two — Closest two values in a list

Example program to find the two closest values in a list

We will revisit this program later. We are using this problem to show how to generate all possible pairs of indices from a list without repeating any pair ( so if we generate pair of indices 0,1 we will not generate 1,0 )

We also illustrate how to find the minimum of a given set of values

To find the minimum, we need to initiate a variable first before the loop. However, we must make sure that the minimum distance is actually a real distance. Hence, we use the first pair for this purpose.

This program is not robust as it fails it the list has no values in it.

Code:

"""  
    Example program to find the two closest values in a list 

    We will revisit this program later. We are using this problem
    to show how to generate all possible pairs of indices from a list
    without repeating any pair ( so if we generate pair of indices 0,1
    we will not generate 1,0 )

    We also illustrate how to find the minimum of a given set of values

    To find the minimum, we need to initiate a variable first before the
    loop. However, we must make sure that the minimum distance is actually 
    a real distance. Hence, we use the first pair for this purpose.

    This program is not robust as it fails it the list has no values in it.

"""


L = [2, 40, 31, 10]

pair = (0,1) ## initial value of a pair
min_dist = abs( L[0]-L[1] ) ##initial value for closest distance between a pair

for i in range(len(L)-1):
    val1 = L[i]
    for j in range(i+1,len(L)):
        val2 = L[j]
        print "(i,j): %d,%d" %(i,j), "values", val1, val2
        new_dist = abs( L[i]-L[j])
        if new_dist < min_dist:
            min_dist = new_dist
            pair = (i,j)
        

print "Closest two values are at indices", pair
x,y = pair
print L[x], L[y]

Module: lec12_image_doubleloop — Image flip example with double loop

Example image manipulation program. It copies all pixels from one image to another new image.

To do this, we must first create an array of pixels for each image. An array is similar to a list of lists.

Code:

"""
   Example image manipulation program. It copies all pixels 
   from one image to another new image. 

   To do this, we must first create an array of pixels for each image. 
   An array is similar to a list of lists.

"""

from PIL import Image

def copy_image():
    im = Image.open("bolt.jpg")
    pix = im.load()  ##get an array of pixels for the image
    w,h = im.size
    
    newim = Image.new("RGB", (w,h), "white")
    newpix = newim.load() ##get an array of pixels for the image
    
    ## example to flip the image in different ways
    ## change this variable to try different versions.
    image_flip = 'down'
    
    for i in range(w):
        for j in range(h):
            if image_flip == 'right':
                newpix[i,j] = pix[w-i-1,j]
            elif image_flip == 'down':
                newpix[i,j] = pix[i,h-j-1]
            elif image_flip == 'down_right':
                newpix[i,j] = pix[w-i-1,h-j-1]
            else:
                newpix[i,j] = pix[i,j]
    
    newim.show()



if __name__ == '__main__':
    copy_image()

Module: lec12_control_loop1 — Loop control with break

Example program that shows control of loops using break

Code:

""" Example program that shows control of loops
    using break

"""

def print_val(val):
    print val, val**(0.5)



############### MAIN PROGRAM

if __name__ == "__main__":
    while True: ## the main code will continue to run until break is executed
        val = raw_input("Please enter a number (-1 to stop) ==> ")
        if val == '-1':
            break   
        if not val.isdigit():
            print "Please enter a number"
        else: ## do something, this function is a place holder
            print_val(int(val))
            
    print "Finished the main loop"

Module: lec12_control_loop2 — Break with double loop

Example program for controlling loops using break

Note that the break exits from the inner most loop, but not the upper loop!

Code:

""" Example program for controlling loops using break

    Note that the break exits from the inner most loop,
    but not the upper loop!

"""

if __name__ == "__main__":

    for i in range(4):
        print i
        for j in range(4):
            if i < j:
                print "BREAK"
                break
            print i,j   

Module: lec12_control_loop3 — Break and continue in loops

Example program illustrates the use of break and continue to control how a loop executes

Read input until user types -1, give warning for bad input (not number) Report the average, min, max of all the numbers user entered

Code:

"""  Example program illustrates the use of break and continue to 
     control how a loop executes

     Read input until user types -1, give warning for bad input (not number)
     Report the average, min, max of all the numbers user entered
     
"""

if __name__ == "__main__":
    user_inputs = []
    while True:
        print user_inputs
        val = raw_input("Enter a number (-1 to stop) ==> ")
        if val == '-1':
            break  ## exit the loop when -1 is entered
        if not val.isdigit():
            print "Please enter a number"
            continue    ## skip the remaining part of the loop for this input
        ##val is not -1 and val is digit
        val = int(val)
        user_inputs.append(val)
        
    print "Your statistics"
    print "Min: %d,  Max: %d, Average: %f" \
          %( min(user_inputs), max(user_inputs), \
             float(sum(user_inputs))/len(user_inputs) )

Lecture 13

Module: lec13_filewrite — File write example

Simple program to illustrate writing into a file

Open a file Write multiple lines Close the file

Code:

""" Simple program to illustrate writing into a file

    Open a file
    Write multiple lines
    Close the file

"""

if __name__ == "__main__":
    f = open('myfile.txt','w')

    for i in range(1,11):
        f.write(str(i)+ '\n')   ## each line must end with a newline
        
        ## when writing to a file, the close function makes sure that 
        ## the file is properly saved
        f.close()  

Module: lec13_parsing1 — File parsing: a regular file

Example of file parsing, reads a file of the form:

lego type, lego number 2x2,1 2x1,5

in which each line is a lego type and a given count, with a header in the first line.

Example file for this program can be found at:

Code:

"""  Example of file parsing, reads a file of the form:

     lego type, lego number
     2x2,1
     2x1,5

     in which each line is a lego type and a given count, with a header
     in the first line.

     Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/

"""

if __name__ == "__main__":
    f = open('legos.txt')

    i = 0 ## used as a counter of the lines in the file
    
    legos = []
    for line in f:  ## iterates over each line executing: line = f.readline()
        i += 1 
        if i== 1:
            continue  ## skip the header line
     
        print i ## debugging code
        lego_line = (line.strip()).split(",")
        lego_type = lego_line[0].strip()
        lego_number = int(lego_line[1])

        legos = legos + [lego_type]*lego_number  ## append the lego information
    
    print legos

Module: lec13_parsing2 — File parsing with unknown number of fields per line

Example of parsing a file that has
  • data on each line (no header)
  • but undetermined length (the last entry, number of reviews may vary

We will process yelp data from Lab 4

Example file for this program can be found at:

Code:

""" Example of parsing a file that has
    - data on each line (no header)
    - but undetermined length (the last entry, number of reviews may vary

   We will process yelp data from Lab 4

   Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/


"""

if __name__ == "__main__":
    count = 0
    for line in open('yelp.txt'):  ##Read each line
        count += 1
        m = line.strip().split("|")
        reviews = m[6:]  ##use split to get the reviews
        
        for i in range(len(reviews)):  ## convert reviews to integer
            reviews[i] = int( reviews[i] )
        
        ## print information for each business
        print "%s: Avg review: %.2f" \
            %(m[0], sum(reviews)/float(len(reviews)))
        ##if count > 10:   ##debugging code to test the first 10 lines
        ##    break
        
    print "Number of businesses", count

Module: lec13_parsing3 — File parsing to match a given condition

Program to parse regular data Each line is a row of data

Each line is delimited by some character

For example: CSV means comma separated values

Read the file line by line
Extract information from it

This program reads a county name and finds the name of all the Farmer’s markets in that country, and also prints the number

Example file for this program can be found at:

Code:

"""
    Program to parse regular data
    Each line is a row of data
        Each line is delimited by some character
        
    For example: CSV means comma separated values
    
    Read the file line by line
        Extract information from it

    This program reads a county name and finds the name of all the
    Farmer's markets in that country, and also prints the number

    Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/

"""

def parse_line(line):
    line = line.strip() 
    m = line.split(',')
    return m
    

def parse_file(fname, county):
    f = open(fname)
    
    header = f.readline()  ## the first line is header, we are skipping it
    i = 0
    cnt = 0
    for line in f:
        i += 1
        info = parse_line(line)
        if info[0] == county:
            print info[1]  
            cnt += 1
        #if i > 2:  ## debugging code to only look at the first few lines
        #    break
    print "Found", cnt, "markets"

######

if __name__ == "__main__":
    county = raw_input("Please enter a county name ==> ")
    
    parse_file('fm.csv', county)   

Module: lec13_parsing4 — File parsing with blocks of data

Program to parse regular data Each line is a row of data

Each line is delimited by some character

For example: CSV means comma separated values

This program shows how to parse consecutive blocks of data The file is sorted by the county, so when we read a new county name we know that we started processing a new county.

Algorithm:

Skip the header Read the first line ##note assumes such a line exists Find the name of the county Read the rest of the file line by line

Extract information from it If it is the same county as the previous line, then

increment the count of markets for this county
Else ## we have a new county
store the current county and
the number of markets in that county in a list

start a new county with count=1

When finished, we have to add the last county we were counting to the list We can now sort this list to find

the top 3 or 10 counties with the highest number of markets

Example file for this program can be found at:

Code:

"""
    Program to parse regular data
    Each line is a row of data
        Each line is delimited by some character
        
    For example: CSV means comma separated values

    This program shows how to parse consecutive blocks of data
    The file is sorted by the county, so when we read a new county name
    we know that we started processing a new county.

    Algorithm:
    
    Skip the header
    Read the first line ##note assumes such a line exists
    Find the name of the county
    Read the rest of the file line by line
        Extract information from it
        If it is the same county as the previous line, then
            increment the count of markets for this county
        Else ## we have a new county
            store the current county and 
              the number of markets in that county in a list
            start a new county with count=1
     When finished, we have to add the last county we were counting to the list
     We can now sort this list to find 
         the top 3 or 10 counties with the highest number of markets

     Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/


"""

def parse_line(line):
    line = line.strip()
    m = line.split(',')
    return m
    

def parse_file(fname):
    f = open(fname)
    
    header = f.readline() ##skip the header line
    
    line = f.readline() ## read the line for the first county
    info = parse_line(line)
    cnt = 1   
    county = info[0]
    
    i = 0
    markets = [] ## to be used for sorting by number of markets
    for line in f:
        i += 1
        info = parse_line(line)
        if info[0] == county:  ## same as previous county
            cnt += 1
        else: ## new county found
            markets.append((cnt, county)) ## store the count for the old county
            county = info[0] ## start the new county
            cnt = 1
    markets.append( (cnt, county) ) ## the last county needs to be added
    
    markets.sort(reverse=True) 
    ## sort the list of tuples (by the first value first, 
    ## and then by the second value)

    for i in range(10): ## print the top 10
        print "County: %s, number: %d" %(markets[i][1], markets[i][0])

######
## This is the main program
#####

if __name__ == "__main__":
    parse_file('fm.csv')   

Module: lec13_files_from_web — Reading files from Web

Illustrates the use of HTML files over the web in programs

Files from the web are opened using the urllib library

After this, the file is processed in the same way as a file on your own hard disk.

Note we are using different methods to read the files but they are all equivalent and are used to illustrate the different methods

Function:

lec13_files_from_web.is_palindrome(word)[source]

Returns True if the word is a palindrome, the word is the same when read forward and backwards. It returns False otherwise.

Code:

""" Illustrates the use of HTML files over the web in programs

    Files from the web are opened using the urllib library

    After this, the file is processed in the same way as a file 
    on your own hard disk.

    Note we are using different methods to read the files
    but they are all equivalent and are used to illustrate the
    different methods
"""

import urllib

def is_palindrome(word):
    """ Returns True if the word is a palindrome, the word is the 
        same when read forward and backwards. 
        It returns False otherwise.

    """

    word = word.strip()
    word = word.lower()
    for i in range( len(word)/2 ):
        if word[i] != word[-i-1]:
            return False  ## if a mismatch is found, return False
    return True  # if no mismatch is found, return True



###########

if __name__ == "__main__":
    word_url = 'http://thinkpython.com/code/words.txt'
    word_file = urllib.urlopen(word_url)
    
    i = 0
    while True:
        i += 1
        word = word_file.readline()
        if word == '':  ## this is true when the end of file is reached
            break 
        if is_palindrome(word):
            print word.strip()

Lecture 14

Module: lec14_list_version — List solution for finding unique actors

We are trying to find the number of unique actors in the database This solution is O(N^2) operations

Example file for this program can be found at:

Code:

"""
   We are trying to find the number of unique actors in the database
   This solution is O(N^2) operations

   Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/

    
"""

if __name__ == "__main__":    
    count = 0
    actors = []
    
    
    for line in open('imdb_data.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        movie = m[1].strip()
        if name not in actors:
            actors.append(name)  ## This is an O(N) operation
    
        count += 1
    
    ## Since we repeat the loop O(N) times and each time
    ## we conduct an O(N) operation, the total complexity is
    ## O(N*N=N^2)
        
    print "Total", count, "movies"
    print "Total", len(actors), "actors"
    

Module: lec14_set_version — Set solution for finding unique actors

We are trying to find the number of unique actors in the database This solution is O(N) operations

Example file for this program can be found at:

Code:

"""
   We are trying to find the number of unique actors in the database
   This solution is O(N) operations

   Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/

    
"""

if __name__ == "__main__":    
    
    count = 0
    actors = set([])
    
    for line in open('hanks.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        movie = m[1].strip()
        actors.add(name) ## This is an O(1) operation
        count += 1
    
    ## The above loop is repeated O(N) times, hence    
    ## total complexity of this operation is O(1*N)=O(N)
    
    print "Total", count, "movies"
    print "Total", len(actors), "actors"
    
    
    for actor in sorted(actors):
        print actor
    

Module: lec14_list_to_set_version — Lower complexity list and set solution

We are trying to find the number of unique actors in the database This solution is O(N) operations

Example file for this program can be found at:

Code:

"""
   We are trying to find the number of unique actors in the database
   This solution is O(N) operations

   Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/

    
"""

if __name__ == "__main__":
    actors = []
    
    for line in open('imdb_data.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        movie = m[1].strip()
        actors.append(name) ## each append operation is O(1)
    
    ## so far 0(n)
    
    
    actorset = set(actors) ## this conversion if also O(N)
        
    print "Total", len(actorset), "actors"
    

Module: lec14_common_movies — Movies actors have in common

Find the movies common between George Clooney, Catherine Zeta-Jones and Brad Pitt

Illustrates the use of sets to answer complex queries

Example file for this program can be found at:

Code:

"""
   Find the movies common between George Clooney, 
   Catherine Zeta-Jones and Brad Pitt

   Illustrates the use of sets to answer complex queries

   Example file for this program can be found at:

       http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/


"""

def common_movies(name1, movies1, name2, movies2):
    common = movies1 & movies2

    print "Movies with %s and %s:" %(name1,name2)
    print "Total:", len(common)
    for movie in common:
        print movie

    print

if __name__ == "__main__":    
    
    movies1 = set([]) ## all movies of george clooney
    movies2 = set([]) ## all movies of catherine zeta jones
    movies3 = set([]) ## all movies of brad pitt
    
    for line in open('imdb_data.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        movie = m[1].strip()
        if name == 'Clooney, George':
            movies1.add(movie)
        if name == 'Zeta-Jones, Catherine':
            movies2.add(movie)
        if name == 'Pitt, Brad':
            movies3.add(movie)
    
    print "Number of movies for Clooney", len(movies1)
    print "Number of movies for Zeta", len(movies2)
    print "Number of movies for Pitt", len(movies3)
    
    common_movies("Clooney", movies1, "Zeta", movies2)
    common_movies("Clooney", movies1, "Pitt", movies3)
    common_movies("Pitt", movies3, "Zeta", movies2)
    
        
    print "Movies with all three together:"
    print movies1&movies2&movies3
    print
    
    print "Movies with Clooney and Pitt, but not Zeta"
    print (movies1&movies3) - movies2
    print

Lecture 15

Module: lec15_sudokusolver — Automatic Sudoku solver

Automated solver for easy sudoku puzzles by finding all possible values for a given location using sets

If there is a single possible location, it can be safely put

The main algorithm:

while there is an empty location with only a single possible value
add the value to the board

Code:

"""
    Automated solver for easy sudoku puzzles by finding all
    possible values for a given location using sets
 
    If there is a single possible location, it can be safely put

    The main algorithm:

    while there is an empty location with only a single possible value
         add the value to the board

"""

import lab06_util

def print_board(board):
    """ Prints a board """
    for i in range(9):
        if i%3 == 0:
            print " " + "-"*29 + " "
        line = ""
        for j in range(9):
            if j%3 == 0:
                line += "|"
            line += " " + board[i][j] + " "
        print line + "|"
    print " " + "-"*29 + " "

def values_present(board, row, col):
    """ All the values present in the board along a row/column
        and 3x3 grid, except for the given location is returned
        as a set.

    """

    values = set()
    gridx = 3*(row/3)
    gridy = 3*(col/3)
    for i in range(9):
        if i != col:
            values.add ( board[row][i] )
    for i in range(9):
        if i != row:
            values.add ( board[i][col] )
    for i in range(gridx,gridx+3):
        for j in range(gridy, gridy+3):
            if i != row and j != col:    
                values.add( board[i][j] )   
    return values    

def values_possible(board, row, col):
    """ Given a row/column, returns the set of all possible values 
        for that location, by subtracting from the set of values
        1 - 9 (as strings) the values that are present in row/column 
        and 3x3 grid.

    """

    possible = set()
    for i in range(1,10):
        possible.add ( str(i) )
    return possible - values_present(board, row, col)

def find_possible_location(board):
    """ For every row/column, find a location that is empty 
        and has only one possible value. Return the row/column
        and the value. If there is no such value, return None.

    """

    for i in range(9):
        for j in range(9):
            if board[i][j] == '.':
                pos = values_possible(board, i, j)
                if len(pos) == 1:
                    return i,j, pos.pop()
    return None, None, None


### Main body of the function
### Read the board, and add a value to the board repeatedly
### until no such value exists (either board is solved or 
### or there is no location with a single possible value.
if __name__ == "__main__":
    board = lab06_util.read_sudoku('easy.txt')
    
    while True:
        print_board(board)
        i,j,val = find_possible_location(board)
        if i != None:
            print "Possible at", i,j
            print "Value", val
            board[i][j] = val
            raw_input()
        else:
            break

Module: lec15_nummovies_listsolution1 — List solution 1: number of movies for each actor

Naive solution for finding the number of movies for an actor

Use a list of lists: [ [actorname, num movies], ... ]

For each movie read:
Find the index in this list for this actor Add 1 to the number of movies

This is an O(N^2) solution, so not recommended. Dictionaries are made for this type of problems.

Code:

"""
    Naive solution for finding the number of movies for an actor

    Use a list of lists: [ [actorname, num movies], ... ]

    For each movie read:
         Find the index in this list for this actor
         Add 1 to the number of movies

    This is an O(N^2) solution, so not recommended. Dictionaries
    are made for this type of problems.
"""

def find_actor(actorlist, actor):
    """ Find the index for an actor in the list of lists called actorlist 
        Return None if the actor is not found

    """

    for i in range( len(actorlist) ):
        if actorlist[i][0] == actor:
            return i
    return None


if __name__ == "__main__":
    actorlist = []  ## [name, num_movies]

    for line in open('hanks.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        movie = m[1].strip()
        i = find_actor(actorlist, name)

        if i != None: 
            actorlist[i][1] += 1
        else: ## actor is not in the list yet
            actorlist.append( [name, 1] )

        
    print actorlist

Module: lec15_nummovies_listsolution2 — List solution 2: number of movies for each actor

Second naive solution for finding the number of movies for an actor

Use a list of actor names, the actor name is repeated once for each movie they are in:

actors_list For each movie read:

Add the actor name to the list

Sort actors_list ## sorting is O(N log N) as we will see, better than O(N^2)

start with the first actor and count = 1 Go through each name in actors_list (starting at the second location):

if the name is same as the previous one:
add 1 to the number of movies
else:
print the previous count (or add to a list) start a new count for the given actor with count = 1

print the last actor (or add to a list)

This is an O(N log N) solution, better than the previous one, but still dictionary solution (next one) is much faster.

Code:

"""
    Second naive solution for finding the number of movies for an actor

    Use a list of actor names, the actor name is repeated once for each 
    movie they are in:

    actors_list
    For each movie read:
        Add the actor name to the list

    Sort actors_list ## sorting is O(N log N) as we will see, better than O(N^2)

    start with the first actor and count = 1
    Go through each name in actors_list (starting at the second location):
        if the name is same as the previous one: 
              add 1 to the number of movies
        else:
              print the previous count (or add to a list)
              start a new count for the given actor with count = 1
     print the last actor (or add to a list)

    This is an O(N log N) solution, better than the previous one, but
    still dictionary solution (next one) is much faster.
"""


if __name__ == "__main__":
    actors = []
    for line in open('hanks.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        actors.append( name )
        
    actors.sort()  ## N log N

    actorlist = []
    
    current_actor = actors[0]
    num_movies = 1
    for i in range(1, len(actors)):
        if current_actor == actors[i]:
            num_movies += 1
        else:
            actorlist.append( [current_actor, num_movies] )
            current_actor = actors[i]
            num_movies = 1
    actorlist.append( [current_actor, num_movies] )
    
    print actorlist

Module: lec15_dictionary_example — Example of dictionary methods

Simple example of dictionary methods

Code:

""" Simple example of dictionary methods """

animals = {'tiger': 91.0, 'belgian horse': 162.6, 'indian elephant': 280.0,
 'lion': 97.0}

print animals  ## print the full dictionary

print animals.keys() ## the keys of the dictionary is converted to a list  

print sorted(animals.keys()) ## the keys of the dictionary is converted to a sorted list   

print sorted(animals) ## identical to sorted(animals.keys())

print animals.values()  ## all the values in the dictionary is put in a list of values


## print all key/value pairs
print
for key in animals:
    print key, animals[key]

## print all key/value pairs, the result is the same as the above 
## the above is the better method for iterating over dictionaries 
## as it does not have the additional step of converting the keys to a list    
print
for key in animals.keys():
    print key, animals[key]

Module: lec15_nummovies_dictionary_solution — Dictionary solution: number of movies for each actor

Dictionary based solution for finding the number of movies for an actor Then finding the actors with the highest number of movies This is an O(N) solution to compute the number of movies for each actor

actors is a dictionary
key: actor name value: the set of movies the actor is in
For each actor, movie in the file:
if dictionary actors has the actor as a key:
add 1 to the number of movies for this actor
else:
add the actor to the actors dictionary as a key with count 1 as value

## to find the top actors numlist = [] For each actor:

append [number of movies, actor] to numlist

sort numlist in reverse print top 3 values from numlist

Code:

"""
    Dictionary based solution for finding the number of movies for an actor
    Then finding the actors with the highest number of movies
    This is an O(N) solution to compute the number of movies for each actor

    actors is a dictionary
        key: actor name
        value: the set of movies the actor is in

    For each actor, movie in the file:
        if dictionary actors has the actor as a key:
              add 1 to the number of movies for this actor
        else:
              add the actor to the actors dictionary as a key 
              with count 1 as value

    ## to find the top actors
    numlist = []
    For each actor:
         append [number of movies, actor] to numlist
    sort numlist in reverse
    print top 3 values from numlist


"""


if __name__ == "__main__":
    actors = {}  ### key: actor name, value: number of movies
    
    for line in open('imdb_data.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        
        if name in actors:  
            ##checks if name is in the set of keys for this dictionary
            actors[name] += 1
        else: ## a name we have not yet seen
            actors[name] = 1
            

    ## now we will compute the actors with the highest number of movies 
    nummovie_list = [] ## [ [num_movies, actor name], .. ]
    
    for name in actors: ## for each key in actors
        nummovie_list.append ( [actors[name], name] )
    
    nummovie_list.sort(reverse=True)  ## highest value first
    ## though the second value, names are sorted in reverse order also

    ## print the top 20 values
    for i in range(20):
        print "%s (%d)" %(nummovie_list[i][1], \
                          nummovie_list[i][0])

Module: lec15_setofmovies_dictionary — Dictionary solution: storing set of movies for each actor

Final dictionary example for movies

It constructs a dictionary with
key: actor name value: the set of movies the actor is in

The program then asks repeatedly for the name of an actor and prints the set of movies for that actor

Code:

"""
    Final dictionary example for movies

    It constructs a dictionary with
        key: actor name
        value: the set of movies the actor is in

    The program then asks repeatedly for the name of an actor
    and prints the set of movies for that actor 
"""

if __name__ == "__main__":
    actors = {}  ### key: actor name, value: set of movies
    
    for line in open('hanks.txt'):
        m = line.strip().split('|')
        name = m[0].strip()
        movie = m[1].strip()

        if name in actors:  
            ##checks if name is in the set of keys for this dictionary
            actors[name].add( movie )
        else: ## new name, initialize with a set containing the current movie
            actors[name] = set( [movie] )

    while True: ## ask for user input repeatedly
        name = raw_input('Give me an actor (-1 to stop) ==> ')
        if name == '-1':
            break
        if name not in actors:
            print "I do not have this actor"
        else:
            print "Movies for this actor:"
            ## remember: actors[name] is a set, so we can iterate over it
            for movie in actors[name]:
                print "\t", movie

Lecture 16-17

Class: Point2d — Illustrates use of classes for 2d points

Class for creating and manipulating 2-dimensional points

Methods:

Point2d.__init__(x0=0, y0=0)[source]

Method to initialize. x=0,y=0 provides default values. Example calls:

x = Point2d(5,10) 
x = Point2d()  ## same as x = Point2d(0,0)
Point2d.__str__()[source]

Method to print the object

Point2d.scale(c)[source]

Method to scale a point

Point2d.magnitude()[source]

Returns the magnitude of an object

Point2d.distance(other)[source]

Returns the distance of an object to another

Point2d.grid_distance(other)[source]

Returns the grid distance between two points

Point2d.copy()[source]

Returns a new object identical to the current one.

Point2d.__add__(other)[source]

Called when adding two points: pt1 + pt2, returns a new object

Point2d.__sub__(other)[source]

Called when subtracting two points: pt1 - pt2, returns a new object

Point2d.__eq__(other)[source]

Called when checking if two points are equal: pt1 == pt2. Returns a Boolean

Point2d.move(command)[source]

Code:

"""
    Class for creating and manipulating 2-dimensional points
"""

import math

class Point2d(object):
    def __init__(self, x0=0, y0=0):
        """ Method to initialize. x=0,y=0 provides default values.
            Example calls::

                   x = Point2d(5,10) 
                   x = Point2d()  ## same as x = Point2d(0,0)

        """
        self.x = x0
        self.y = y0
        
    def __str__(self): 
        """ Method to print the object   """

        return '(%d, %d)' %(self.x, self.y)
    
    def scale(self, c): 
        """ Method to scale a point """
        self.x *= c
        self.y *= c
        
    def magnitude(self): 
        """ Returns the magnitude of an object """
        return math.sqrt( self.x**2 + self.y**2 )
    
    def distance(self, other):
        """ Returns the distance of an object to another """
        dx = self.x - other.x
        dy = self.y - other.y
        return math.sqrt ( dx**2 + dy**2 )
    
    def grid_distance(self, other):
        """ Returns the grid distance between two points """
        dx = self.x - other.x
        dy = self.y - other.y
        return abs(dx) + abs(dy)

    def copy(self):
        """ Returns a new object identical to the current one. """
        return Point2d(self.x, self.y)
    
    def __add__(self, other): 
        """ Called when adding two points: pt1 + pt2, returns a new object """
        newx = self.x+other.x
        newy = self.y+other.y
        pt = Point2d(newx, newy)  ## create a new object to return
        return pt

    def __sub__(self, other): 
        """ Called when subtracting two points: pt1 - pt2, returns a new object  """
        
        return Point2d( self.x-other.x, \
                        self.y-other.y )

    def __eq__(self, other):
        """ Called when checking if two points are equal: pt1 == pt2.
            Returns a Boolean """
        return self.x==other.x and self.y==other.y
    
    
    def move(self, command):
        command = command.lower()
        if command == 'up':
            self.y += 1
        elif command == 'down':
            self.y -= 1
        elif command == 'left':
            self.x -= 1
        elif command == 'right':
            self.x += 1
        
if __name__ == '__main__':
    ### first let us tests all the implemented methods 
    pt1 = Point2d(5, 10)  ##cals to __init__
    pt2 = Point2d(10, 20)  ##cals to __init__
    print pt1, pt2 ##cals to __str__
    pt1.scale(10)  ## function that returns no value is called like this
    m = pt1.magnitude()  ## function returns value but takes no arguments
    print m
    d = pt1.distance(pt2)  ## function to find distance between two points
    d2 = pt2.distance(pt1) ## which returns a value
    print d, d2   ## the two different ways to call should be equal

    pt3 = pt1+pt2 ## calls __add__
    print pt3
    pt3 = pt1-pt2 ## calls __sub__
    print pt3
    
    pt4 = pt1.copy()
    print pt4
    print pt4==pt1, '(True if copy works)' ## calls __eq__, they should be the same
    print pt1==pt2, '(should be False)' ## they should be different


    ## Let us use the points to solve a previous homework
    print
    print 'HW solution'
    pt1 = Point2d(5, 10)
    cmd1 = ['up','down','left']
    pt2 = Point2d(15, 3)
    cmd2 = ['right','stay', 'down']

    print "Wallace at:", pt1, "Gromit at:", pt2
    for i in range(len(cmd1)):
        pt1.move( cmd1[i] )
        pt2.move( cmd2[i] )
        print pt1, pt2, pt1.grid_distance(pt2)

Class: Time — Illustrates use of classes for storing Time

Methods:

Time.__init__(h, m, s)[source]

Store time internally as seconds

Time.__str__()[source]

Print time externally as military time

Code:

class Time(object):
    def __init__(self, h, m, s):
        """Store time internally as seconds """
        self.sec = s + m*60 + h*60*60
        
    def __str__(self):
        """ Print time externally as military time """
        h = self.sec/3600
        m = (self.sec-(h*3600))/60
        s = self.sec - h*3600 - m*60
        return '%02d:%02d:%02d' %(h,m,s)
    
    def convert(self):
        """ Convert time to its input form """
        h = self.sec/(3600)
        m = (self.sec - h*3600)/60
        s = self.sec - h*3600 - m*60
        return (h,m,s)
        
    def __add__(self, other):
        """ Take in time object, add to self, return new time object
        """
        (h,m,s) = self.convert()
        newTime = Time(h,m,s)
        
        newTime.sec += other.sec
        if newTime.sec >= 86400:
            newTime -= 86400
        
        return newTime
    
    def __sub__(self, other):
        """ Time in time object, subtract from self, return new time object
        """
        
        (h,m,s) = self.convert()
        newTime = Time(h,m,s)
        
        newTime.sec -= other.sec
        if newTime.sec < 0:
            newTime.sec = 0
        
        return newTime
    
    def am_or_pm(self):
        """ Is Time before or after 12:00:00 """
        if self.sec < 43200:
            return "AM"
        else:
            return "PM"
        
if __name__ == "__main__":
    time1 = Time(5,5,5)
    time2 = Time(12,0,0)
    print str(time1+time2)
    print str(time2-time1)
    print time2.am_or_pm()
    print time1.am_or_pm()

Class: Address — Simple class for storing address info

Address class. Could be expanded to separate out details (such as city, state, zip, etc.) to be returned individually

Methods:

Address.__init__(address)[source]
Address.__str__()[source]

Code:

"""
   Address class. Could be expanded to separate out details (such as city,
   state, zip, etc.) to be returned individually

"""

class Address(object):
    def __init__(self,address):
        self.address = address
    
    def __str__(self):
        return self.address

Class: Restaurant — Class for storing restaurant info

Restaurant class, uses the address class

Methods:

Restaurant.__init__(name, latitude, longitude, address, url, category, ratings=[])[source]

create new Restaurant object

Restaurant.__str__()[source]

create string of restaurant

Restaurant.average_rating()[source]

return average rating for restaurant

Restaurant.max_rating()[source]

return max rating for restaurant if it exists

Restaurant.min_rating()[source]

return min rating for restaurant if it exists

Code:

"""
    Restaurant class, uses the address class

"""

from Address import *

class Restaurant(object):
    def __init__(self,name,latitude,longitude,address,url,category,ratings=list()):
        """ create new Restaurant object """
        self.name = name
        self.latitude = latitude
        self.longitude = longitude
        self.address = Address(address)
        self.url = url
        self.category = category
        self.ratings = ratings

    def __str__(self):
        """ create string of restaurant """
        mystr = ""
        mystr = self.name + str(self.address)
        return mystr
    
    def average_rating(self):
        """ return average rating for restaurant """
        if len(self.ratings) == 0:
            return -1
        
        return sum(self.ratings)/len(self.ratings)
        
    def max_rating(self):
        """ return max rating for restaurant if it exists """
        if len(self.ratings) == 0:
            return -1
        
        maxValue = self.ratings[0]
        for i in range(1,len(self.ratings)):
            maxValue = max(maxValue,self.ratings[i])
        return maxValue
    
    def min_rating(self):
        """ return min rating for restaurant if it exists """
        if len(self.ratings) == 0:
            return -1  
        
        return min(self.ratings)
    
        

Module: lat_long_to_dist — Module for computing distance

Module for computing the Haversine distance between two latitude and longitude coordinates in miles

Code:

""" 
    Module for computing the Haversine distance between two
    latitude and longitude coordinates in miles

"""

import math

def distance_from_lat_long( lat1, long1, lat2, long2 ):
    #  Convert to radians before applying the formulas
    lat1 *= math.pi / 180.0
    long1 *= math.pi / 180.0
    lat2 *= math.pi / 180.0
    long2 *= math.pi / 180.0

    #  Now the real work.
    dlat = (lat1-lat2)
    dlong = long1-long2
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlong/2)**2
    c = 2*math.atan2( math.sqrt(a), math.sqrt(1-a) )
    R = 6371 / 1.609
    return R*c

Module: lec17_search_restaurants — Search for restaurants using the Restaurant class

Shows the use of the Restaurant class in finding restaurants with a given criteria

Code:

"""
    Shows the use of the Restaurant class in finding
    restaurants with a given criteria

"""

from Restaurant import Restaurant
from lat_long_to_dist import *

def convert_input_to_restaurant(line):
    """Parses a single line of the yelp file, keeping some of the
    data, and throwing away the rest.
    """

    line = line.strip('\n')
    values = line.split('|')
    s_rating = values[6:]
    scores = []
    for s in s_rating:
        scores.append( int(s) )
    result = [ values[0], \
               float(values[1]),\
               float(values[2]), \
               values[3], \
               values[4], \
               values[5], \
               scores ]
    
    restaurant = Restaurant(result[0],result[1],result[2],result[3].replace("+","\n"),result[4],result[5],scores)
    return restaurant

def build_restaurant_list(filename):
    """ Parses the given filename containing yelp data and
    returns a list of restaurants. Each item is a list containing 
    restaurant information.
    """

    restaurants = []
    for line in open(filename):
        new_r = convert_input_to_restaurant(line)
        restaurants.append(new_r)
    return restaurants

if __name__ == "__main__":
    restaurants = build_restaurant_list("yelp.txt")
    print restaurants
    
    position = raw_input("Position ==> ").split(",")
    for i in range(len(position)):
        position[i] = float(position[i])
    distance = float(raw_input("Distance ==> "))
    rating = float(raw_input("Rating ==> "))
    rType = raw_input("Type ==> ")
    
    for restaurant in restaurants:
        # if restaurant outside distance from given position
        if (distance < distance_from_lat_long(position[0],position[1],restaurant.latitude,restaurant.longitude)):
            continue
        # if average rating for restaurant less than input rating
        if (rating > restaurant.average_rating()):
            continue
        # check resturant is the same type
        if (rType.lower() != restaurant.category.lower()):
            continue
        
        print restaurant
        print

Lecture 18

Module: lec18_convert_dictionary — Converting key/value for a dictionary

Illustrates how to convert a dictionary of hobbies with person as key and set of hobbies as value to a new dictionary with hobby as key, and the set of people with that hobby as value.

Code:

"""
    Illustrates how to convert a dictionary of hobbies with 
    person as key and set of hobbies as value to a new dictionary
    with hobby as key, and the set of people with that hobby as 
    value.

"""


if __name__ == "__main__":

    hobbies = {'Gru' : set(['Hiking','Cooking']), \
               'Edith' : set(['Hiking','Board Games'])}
    
    new_dict = {}
    
    for person in hobbies:
        for h in hobbies[person]:
            if h in new_dict: ## this hobby already exists in the dictionary
                new_dict[h].add(person)
            else: ## a new hobby we have not yet seen
                new_dict[h] = set([person])
    
    print new_dict

Module: lec18_dictionary_delete — Dictionary delete example

Illustrates the deletion from dictionaries and counting values from a dictionary

Code:

"""
    Illustrates the deletion from dictionaries and counting values from 
    a dictionary

"""

if __name__ == "__main__":
    peoples = {"Thomas" : "red", "Ashok" : "green", "Sandy" : "red", \
               "Allison" : "orange", "Fei" : "blue", "Natasha" : "blue", \
               "Brennah" : "blue" }
    
    print peoples
    peoples["Fei"] = "green"
    print peoples
    
    del peoples["Sandy"]
    print peoples
    
    colors = {}
    #color: count

    ## find the number of people for each color
    for person in peoples:
        color = peoples[person]
        if color in colors:
            colors[color] += 1
        else:
            colors[color] = 1
    
    ## find the max value in all of the values for the dictionary
    max_value = max(colors.values())
    
    ## find the color for the max value
    for color in colors:
        if colors[color] == max_value:
            print color
    

Module: lec18_actor_last_names — Find the first name of all actors with the same last name

Illustrates how to the last name from a file and constructs a dictionary of last names

Code:

"""
    Illustrates how to the last name from a file and constructs
    a dictionary of last names

"""

if __name__ == "__main__":
    
    last_names = {} ## key: actor last name, value: set of actors with that last name

    for line in open("imdb_data.txt"):
        ## first, split on "|" for different fields
        words = line.strip().split('|') 

        ## next, split the name at zero position on "," 
        ## to separate out the first and last name
        actor = words[0].strip().split(',') 

        ## remove any additional spaces in all names
        for i in range(len(actor)):  
            actor[i] = actor[i].strip()
        
        ## check if there was multiple names and if so
        ## add the first name of this actor the dictionary
        if len(actor) > 1:
            if actor[0] in last_names:
                last_names[actor[0]].add(actor[1])
            else:
                last_names[actor[0]] = set([actor[1]])

    ## all actors with last name "Bacon"
    print last_names["Bacon"]

Module: lec18_movieyears_names — Finding years with max number of movies

Creates a dictionary from the IMDB data in which years for movies is a key and the names of the movies in that year is a value (as movies can be repeated for different actors, using a set is best here).

We can then use this dictionary to:

  • print the movies in a given year
  • find the number of movies in each year

Code:

"""
     Creates a dictionary from the IMDB data in which
     years for movies is a key and the names of the movies in that
     year is a value (as movies can be repeated for different actors,
     using a set is best here).

     We can then use this dictionary to:
 
      -  print the movies in a given year
      -  find the number of movies in each year

"""

def get_year_movies():
    
    imdb_file = "imdb_data.txt"
    
    years_and_movies = {}
    for line in open(imdb_file):
        words = line.strip().split('|')
        movie_name = words[1].strip()
        year = int(words[2])
        if year in years_and_movies:
            years_and_movies[year].add(movie_name)
        else:
            years_and_movies[year] = set([movie_name])
    
    return years_and_movies

def find_busiest_years1(years_and_movies):
    # one way using lists and sorting to find top year
    # slower than second method as it has complexity of n*log(n)+n
    movie_count = []
    for year in years_and_movies:
        movie_count.append([len(years_and_movies[year]),year])
    
    movie_count.sort(reverse=True)
    print movie_count[0]

def find_busiest_years2(years_and_movies):
    # using just dictionary and storing max as we go long
    # faster method as it's just complexity of n
    max_value = 0
    max_year = 0
    for year in years_and_movies:
        if len(years_and_movies[year]) > max_value:
            max_value = len(years_and_movies[year])
            max_year = year
    
    print max_value, max_year

def print_movies_in_a_year(years_and_movies):
    """ Shows how to construct a string from a set of values
        and print strings of a given length.
    """
    max_length = 60
    year = int(raw_input("Enter a year ==> "))
    if year in years_and_movies:

        mystr = "" ##used for printing multiple movies in a line
        for movie in sorted( years_and_movies[year] ):
            mystr += movie + ", "
            if len(mystr)>max_length:
                print mystr
                mystr = ""
        print mystr.strip().strip(",")
    else:
        print "This year is not found"

if __name__ == "__main__":
    years_and_movies = get_year_movies()

    ## we will use this dictionary to do a number of things

    ## print movies in a given year using the dictionary
    print_movies_in_a_year(years_and_movies)

    ## test the first solution to the busiest years
    find_busiest_years1(years_and_movies)

    ## test the second solution to the busiest years
    find_busiest_years2(years_and_movies)

Module: lec18_bacon_degree — Find actors with Bacon degree 1

Using the IMDB data, computes people with Bacon number 1: all people who starred in a movie with Kevin Bacon

Code:

"""
    Using the IMDB data, computes people with Bacon number 1: 
    all people who starred in a movie with Kevin Bacon

"""

if __name__ == "__main__":
    actors = {}  ## key:actor, value: set of movies for that actor
    movies = {}  ## key:movie, value: set of actors in that movie

    ## first populate the above dictionaries
    for line in open('imdb_data.txt'):
        words = line.strip().split('|')
        actor = words[0].strip()
        movie_name = words[1].strip()

        if actor in actors:
            actors[actor].add(movie_name)
        else:
            actors[actor] = set([movie_name])
            
        if movie_name in movies:
            movies[movie_name].add(actor)
        else:
            movies[movie_name] = set([actor])

    ## now find the actors with Bacon degree 1
    bacon_numbers = {0 : set(["Bacon, Kevin"]), \
                     1 : set([])}

    for movie in actors["Bacon, Kevin"]: ## for each of Kevin Bacon's movies
        bacon_numbers[1] |= movies[movie] ## set union all actors for the movie

    ## Remember: Kevin Bacon should not be in degree 1
    bacon_numbers[1] = bacon_numbers[1] - bacon_numbers[0]

    print bacon_numbers[1]
    print len(bacon_numbers[1]), "actors"

Module: lec18_common_movies — Find max number of common movies between pair of actors

Finds the number of common movies for all pair of actors Then, finds the actors with the highest number of common movies (returns only one pair of actors even if there are ties)

Code:

"""
    Finds the number of common movies for all pair of actors
    Then, finds the actors with the highest number of common movies
    (returns only one pair of actors even if there are ties)

"""

if __name__ == "__main__":
    actors = {} ## key: actor name, value: set of movies of that actor
    
    ## populate the actor dictionary
    for line in open(imdb_file):
        words = line.strip().split('|')
        actor = words[0].strip()
        movie_name = words[1].strip()
        if actor in actors:
            actors[actor].add(movie_name)
        else:
            actors[actor] = set([movie_name])
    
    
    ## the following finds the max number of common movies for
    ## any pair of actors 
    
    common_count = 0 ## variable to hold the maximum number of common movies
    actor_names = [] ## the names of actors with the current max value
    
    ## double for loop to find all pair of actors 
    for actor in actors:
        for actor2 in actors:
            if actor == actor2:
                continue
            common = len(actors[actor] & actors[actor2])
            if common > common_count: 
                common_count = common
                actor_names = [actor,actor2]
    
    print common_count,actor_names

Lecture 19

Module: lec19_search_smallesttwo — Find smallest two values in a list

Find smallest two values in a list

Code:

"""
     Find smallest two values in a list
     
"""

import time
import random


def search4(L):
    """
        low1,low2 are the first two values in a list
        go through every element and update low1, low2
        so that they are the smallest two elements
        return low1, low2
        
        O(N) solution -- linear, N is length of the list
        
    """
    if len(L)<2:
        return None, None
    
    min1, min2 = L[0], L[1]
    if min1 > min2:
        min1, min2 = min2, min1
        
    for i in range(2, len(L)):
        if L[i] < min2:
            if L[i] < min1:
                min2 = min1
                min1 = L[i]
            else:
                min2 = L[i]
    return min1, min2


def search1(L):
    """
        low1,low2 are the first two values in a list
        go through every element and update low1, low2
        so that they are the smallest two elements
        return low1, low2
        
        O(N) solution - linear, N is length of the list
    """
    if len(L)<2:
        return None, None
    
    min1, min2 = L[0], L[1]
    if min1 > min2:
        min1, min2 = min2, min1
        
    for i in range(2, len(L)):
        if L[i] < min1:
            min2 = min1
            min1 = L[i]
        elif L[i] < min2:
            min2 = L[i]

    return min1, min2




def search3(L):
    """
         Create a copy of L
         sort the copy
         return the first two elements

         O(N log N) solution due to sorting, N is length of the list
    
    """
    if len(L)>1:
        L1 = L[:]
        L1.sort()
        return L1[0], L1[1]
    else:
        return None, None


def search2(L):
    """
         Create a copy of L
         Use min to find smallest
         Remove the min from smallest, and find the next min
         Return the two values

         O(N) solution, N is length of the list
    
    """
    
    if len(L) < 2:
        return None, None
    
    L1 = L[:]
    min1 = min(L1)  ## O(N) operation
    L1.remove(min1) ## O(N) operation
    min2 = min(L1) ## O(N) operation
    return min1, min2


if __name__ == "__main__":
    
    maxrange = 10000000
    ## create a random list
    L = range(maxrange)
    random.shuffle( L )
    
    ## time run time of a program
    start = time.time()
    a,b = search1(L)
    end = time.time()
    print "Search 1 took", end-start
    
    """
    start = time.time()
    a,b = search2(L)
    end = time.time()
    print "Search 2 took", end-start
    
    start = time.time()
    a,b = search3(L)
    end = time.time()
    print "Search 3 took", end-start
    """
    
    start = time.time()
    a,b = search4(L)
    end = time.time()
    print "Search 4 took", end-start    

Module: test_19_search_smallesttwo — Test module for lec19_search_smallesttwo

Testing module for finding the two smallest values

Test cases: number of values in the list: 0, 1, 2, 2+ duplicates or not: min two are the same or not ordering of values: min two values are ordered or not location of min two: min two are the first/last values

Nose module: 1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == “__main__” component 2. Each function will assert something (what should be true)

Code:

"""   
     Testing module for finding the two smallest values
     
     Test cases:
     number of values in the list: 0, 1, 2, 2+
     duplicates or not: min two are the same or not
     ordering of values: min two values are ordered or not
     location of min two: min two are the first/last values
     
     Nose module:
     1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == "__main__" component
     2. Each function will assert something (what should be true)
    

"""
import nose
from lec19_search_smallesttwo import *


def test1():
    a,b = search1( [] )
    assert a==None and b==None

def test2():
    a,b = search1( [1] )
    assert a==None and b==None
    
def test3():
    a,b = search1( [1,2] )
    assert a==1 and b==2

def test4():
    a,b = search1( [2,1] )
    assert a==1 and b==2
    
def test5():
    a,b = search1( [3,2,1] )
    assert a==1 and b==2

def test6():
    a,b = search1( [1,4,5,3,2] )
    assert a==1 and b==2
    
def test7():
    a,b = search1( [1,4,5,1,2] )
    assert a==1 and b==1

if __name__ == "__main__":
    nose.runmodule()

Module: lec19_indexof_smallesttwo — Find the index of the smallest two values in a list

Find smallest two values in a list

Code:

"""
     Find smallest two values in a list
     
"""

import time
import random


def search1(L):
    """
        low1,low2 are the first two values in a list
        go through every element and update low1, low2
        so that they are the smallest two elements
        return low1, low2
        
        O(N) solution -- linear, N is length of the list
        
    """
    if len(L)<2:
        return None, None
    
    min1, min2 = L[0], L[1]
    loc1, loc2 = 0, 1
    
    if min1 > min2:
        min1, min2 = min2, min1
        loc1, loc2 = 1, 0
        
    for i in range(2, len(L)):
        if L[i] < min2:
            if L[i] < min1:
                min2 = min1
                min1 = L[i]
                loc2 = loc1
                loc1 = i
            else:
                min2 = L[i]
                loc2 = i
                
    return loc1, loc2





def search3(L):
    """
         Create a copy of L
         Use min to find smallest
         Remove the min from smallest, and find the next min
         Return the two values

         O(N) solution -- linear, N is length of the list
    
    """
    
    if len(L) < 2:
        return None, None
    
    L1 = L[:] ##O(N) to make a copy
    min1 = min(L1) ##O(N) to find the min, written in C++
    L1.remove(min1) ##O(N) to remove
    min2 = min(L1) ##O(N) to find the min, written in C++

    loc1 = L.index(min1)
    if min1 == min2:
        loc2 = L.index(min2, loc1+1)
    else:
        loc2 = L.index(min2)
    return loc1, loc2

def search2(L):
    """
         Create a copy of L
         Use min to find smallest
         Remove the min from smallest, and find the next min
         Return the two values

        O(N) solution -- linear, N is length of the list
    
    """
    
    if len(L) < 2:
        return None, None
    
    L1 = L[:]  ##O(N) to make a copy
    min1 = min(L1)  ##O(N) to find the min, written in C++
    loc1 = L1.index(min1) ##O(N) to find the min, written in C++
    
    L1.remove(min1) ##O(1)
    min2 = min(L1) ##O(N) to find the min
    loc2 = L1.index(min2)  ##O(N) to find the min, written in C++
    if loc2 >= loc1:
        loc2 += 1  ##to account for removed item

    return loc1, loc2

if __name__ == "__main__":
    
    maxrange = 3000000
    ## create a random list
    L = range(maxrange)
    random.shuffle( L )
    
    ## time run time of a program
    start = time.time()
    a,b = search1(L)
    end = time.time()
    print "Search 1 took", end-start
    
    start = time.time()
    a,b = search2(L)
    end = time.time()
    print "Search 2 took", end-start
    
    start = time.time()
    a,b = search3(L)
    end = time.time()
    print "Search 3 took", end-start

Module: test19_indexof_smallesttwo — Test module for lec19_indexof_smallesttwo

Testing module for finding index of two smallest values

Test cases: number of values in the list: 0, 1, 2, 2+ duplicates or not: min two are the same or not ordering of values: min two values are ordered or not location of min two: min two are the first/last values

Nose module: 1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == “__main__” component 2. Each function will assert something (what should be true)

Code:

"""   
     Testing module for finding index of two smallest values
     
     Test cases:
     number of values in the list: 0, 1, 2, 2+
     duplicates or not: min two are the same or not
     ordering of values: min two values are ordered or not
     location of min two: min two are the first/last values
     
     Nose module:
     1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == "__main__" component
     2. Each function will assert something (what should be true)
    

"""

import nose
from lec19_indexof_smallesttwo import *


def test1():
    a,b = search1( [] )
    assert a==None and b==None

def test2():
    a,b = search1( [1] )
    assert a==None and b==None
    
def test3():
    a,b = search1( [1,2] )
    assert a==0 and b==1

def test4():
    a,b = search1( [2,1] )
    assert a==1 and b==0
    
def test5():
    a,b = search1( [3,2,1] )
    assert a==2 and b==1

def test6():
    a,b = search1( [1,4,5,3,2] )
    assert a==0 and b==4
    
def test7():
    a,b = search1( [1,4,5,1,2] )
    assert a==0 and b==3

if __name__ == "__main__":
    nose.runmodule()

Lecture 20

Module: sort_sol — Sort a list of values

This module implements different sort functions. Each function returns the sorted list, but ins_list also changes the input list while merge_sort does not.

Merge sort is implemented two ways: 1. Iterative merge sort: merge_sort 2. Recursive merge sort: merge_sort_rec

Code:

"""
    This module implements different sort functions. 
    Each function returns the sorted list, but ins_list also 
    changes the input list while merge_sort does not. 

    Merge sort is implemented two ways:
    1. Iterative merge sort: merge_sort
    2. Recursive merge sort: merge_sort_rec

"""

import random

def ins_sort(L):
    """ 
        Insertion sort implementation.

    """

    for i in range(1,len(L)):
        ##copy the value in L[i] to x
        x = L[i]
        j = i-1
        while j>=0 and L[j]>x:
            L[j+1] = L[j]
            j -= 1
        L[j+1] = x
    return L    


def merge(L1, L2):
    """Assume L1 and L2 are sorted,
       merge and return a single new sorted list 
       
    """
    M = []
    while len(L1)>0 and len(L2)>0:
        if L1[0] < L2[0]:
            M.append( L1.pop(0) )
        else:
            M.append( L2.pop(0) )
    if len(L1)>0:
        M.extend(L1)
    else:
        M.extend(L2)
    return M


def merge_sort(L):
    """ Iterative merge sort solution, uses merge as a subroutine """

    if len(L)==0:
        return []
    
    tomerge = []
    for item in L:
        tomerge.append( [item] )
    ## get pairs of items from tomerge
    ## merge and put in a new list
    
    while len(tomerge) > 1:
        tomerge_new = []
        while len(tomerge)>1:
            L1 = tomerge.pop(0)
            L2 = tomerge.pop(0)
            L = merge(L1, L2)
            tomerge_new.append(L)
        if len(tomerge)>0:
            tomerge_new.append( tomerge[0] )
        tomerge = tomerge_new
    return tomerge[0]
    
def merge_sort_rec(L):
    """ Recursive merge sort solution, uses merge as a subroutine. """

    if len(L) <= 1: ##a list of length 1 is already sorted
        return L
    else: ## lists 2 or more items: divide in half, sort each sublist and merge
        mid = len(L)/2
        L1 = merge_sort_rec( L[:mid] )
        L2 = merge_sort_rec( L[mid:] )
        return merge(L1, L2)


if __name__ == "__main__":
    x = range(10)
    y = x[:]
    random.shuffle(y)
    z = merge_sort_rec(y)
    print z
    print x==z

Module: test_sort — Test module for sort functions

Testing code for Computer Science 1, Lecture 20 on sorting. This assumes that the sort functions are all in file sorts_sol.py, each taking one list as its only argument, and that their names are ins_sort and merge_sort. Assumes that methods return a sorted list.

All tests are based on random permutations of integers. Also try permutations that are almost sorted by only switching few pairs. We leave that as an exercise.

Code:

'''
    Testing code for Computer Science 1, Lecture 20 on sorting. This
    assumes that the sort functions are all in file sorts_sol.py, each taking
    one list as its only argument, and that their names are ins_sort
    and merge_sort. Assumes that methods return a sorted list.

    All tests are based on random permutations of integers. Also try 
    permutations that are almost sorted by only switching few pairs.
    We leave that as an exercise.

'''

import sort_sol
import time
import random


def run_and_time(name, sort_fcn, v, known_v):
    '''
    Run the function passed as sort_fcn, timing its performance and
    double-checking if it correct.  The correctness check is probably
    not necessary.
    '''
    print "Testing " + name
    t0 = time.time()
    x = sort_fcn(v)
    t1 = time.time()
    print "Time: %.4f seconds" %(t1-t0)
    print "Is correct?", x==known_v
    print


def run_and_time_python_sort(v):
    '''
    Run and time the Python list sort function on the list.
    '''
    print "Running Python's list sort function"
    t0 = time.time()
    v.sort()
    t1 = time.time()
    print "Time: %.4f seconds" %(t1-t0)
    print



####################################################

if __name__ == '__main__':
    n = int(raw_input("Enter the number of values ==> "))
    print "----------"
    print "Running on %d values" %n
    print "----------"

    v = range(n)
    v1 = v[:]
    random.shuffle(v1)

    v2 = v1[:]
    v3 = v1[:]
    v4 = v1[:]

    #run_and_time("selection sort", sort_sol.ins_sort, v1, v )
    run_and_time("merge sort", sort_sol.merge_sort, v2, v )
    run_and_time("recursive merge sort", sort_sol.merge_sort_rec, v3, v )
    # passing functions as an arg to a fcn
    run_and_time_python_sort(v4 )

Lecture 21

Module: lec21_recursion — Example recursive functions

Example recursive functions

Code:

""" 
    Example recursive functions

"""

def blast(N):
    """ 
        Simple example of recursion to show how the call stack
        works with printing.

    """

    if N > 0:
        blast(N-1)
        print N
    else:
        print "Blast off!"
        
def factorial(N):
    """
        Idea: N! = N * (N-1)!
        Use a recursive step for this
    """

    if N <= 1:
        return 1
    else:
        val = factorial(N-1)
        return N * val

def fib(N):
    """ 
        Generates the Nth fibonacci number recursively.
        It is much better to use an iterative solution for this.

    """

    if N < 2:
        return N
    else:
        return fib(N-1) + fib(N-2)
    

if __name__ == "__main__":
    for i in range(10):
        print fib(i),
    print

Module: lec21_sierpinski — Sierpinski recursive fractal drawing

This modules draws the Sierpinki triangles up to a given depth using the Tkinter module. It illustrates the use of recursion in drawing self-similar patterns in smaller and smaller regions of the larger triangle.

See:

http://en.wikipedia.org/wiki/Sierpinski_triangle

Code:

"""

     This modules draws the Sierpinki triangles up to a given depth
     using the Tkinter module. It illustrates the use of recursion in
     drawing self-similar patterns in smaller and smaller regions of the
     larger triangle.

     See:

     http://en.wikipedia.org/wiki/Sierpinski_triangle

"""

import Tkinter as tk
import math

def sierpinski(chart_1, lowleft, top, lowright, level, maxlevel):
    """Recursive function to draw Sierpinski triangles in chart_1
    within coordinates: lowleft, top, lowright. 

    At each call, the call level is increased. The function ends
    when maxlevel is reached.

    """

    if level == maxlevel:
        return  ##Base case to terminate the process.
    else:
        chart_1.create_polygon([lowleft, top, lowright], fill="red") 
        leftmid = (lowleft[0]+top[0])/2,(lowleft[1]+top[1])/2
        rightmid = (lowright[0]+top[0])/2,(lowright[1]+top[1])/2
        bottommid = (lowright[0]+lowleft[0])/2,(lowright[1]+lowleft[1])/2
        chart_1.create_polygon([leftmid, rightmid, bottommid], fill="white") 
        chart_1.update() 

        ##Recursive calls to redraw triangles in three corners of the 
        ##current triangle area
        level += 1
        sierpinski(chart_1, lowleft, leftmid, bottommid, level,maxlevel)
        sierpinski(chart_1, leftmid, top, rightmid, level,maxlevel)
        sierpinski(chart_1, bottommid, rightmid, lowright, level,maxlevel)

def restart(chart):
    """Redraws the Sierpinski triangle, but increasing the depth 
    at each time.

    """

    chart_1.delete(tk.ALL) 
    sierpinski(chart, (0,600), (300,600-300*math.sqrt(3)), (600,600), \
               0, maxlevel_var[0])
    maxlevel_var[0] += 1

if __name__ == "__main__":
    root = tk.Tk()
    root.title("Sierpinski Recursion Example")
    chart_1 = tk.Canvas(root, width=600, height=600, background="white")
    chart_1.grid(row=0, column=0)
    ## Initially max level is 1, which will draw 
    ##a simple triangle with an inverted triangle inside.
    maxlevel_var = [1]

    restart(chart_1)  ## Draw the Sierpinski triangles once
    root.frame = tk.Frame(root)
    root.frame.button = tk.Button(root.frame,\
                                  text="quit", \
                                  command=lambda:root.destroy())
    root.frame.button2 = tk.Button(root.frame, \
                                   text="draw again!", \
                                   command=lambda:restart(chart_1))
    root.frame.button.grid()
    root.frame.button2.grid()
    root.frame.grid()
    root.mainloop()

Lecture 22

Module: lec22_modes — Finding modes of a list

Finding the modes of a list:

  • Given a list L of integers, find the value that occurs most often
  • Should function change the input list? NO
  • Could there be more than one value that is the mode? YES
  • N: number of values in the list
  • M: number of distinct values in the list ( M < N or M << N ?)
  • What other variations of this function I might be interested in?

Algorithm:

  1. Find the frequency of each value
  2. Find the max frequency
  3. All values with that frequency

Method 1: Dictionary.

  1. Dictionary: keys are distinct values, values are counts
  2. Find max of all values

3. Go through each key, and if the value is the max value, add key to the output

Method 2: Set.
  1. Find the distinct values using set(L)
  2. Count the frequency for each value, put in a list and sort
  3. Return the top elements in the list
Method 3: List.
  1. Sort a copy of the list
  2. Go through and count each item
  3. Keep track of the mode and the corresponding values for the mode (while iterating through the list)

Code:

""" Finding the modes of a list:
    
    - Given a list L of integers, find the 
      value that occurs most often
    
    - Should function change the input list? NO

    - Could there be more than one value that
      is the mode? YES
    
    - N: number of values in the list 
    - M: number of distinct values in the list ( M < N  or M << N ?)
    
    - What other variations of this function I might be interested in?
    
Algorithm:

    1. Find the frequency of each value
    2. Find the max frequency
    3. All values with that frequency
    
Method 1: Dictionary.

    1. Dictionary: keys are distinct values, values are counts
    2. Find max of all values
    3. Go through each key, and if the value
    is the max value, add key to the output
    
Method 2: Set.
    1. Find the distinct values using set(L)
    2. Count the frequency for each value, put in a list and sort
    3. Return the top elements in the list


Method 3: List.
    1. Sort a copy of the list
    2. Go through and count each item
    3. Keep track of the mode and the corresponding values for
       the mode (while iterating through the list)
    
"""

import time
import random

def modes_dict(L):
    """ Overall complexity: O(N+M)  """
    
    if len(L) == 0:
        return [], 0
    counts = {}
    for item in L:  ## O(N)
        if item not in counts: ## O(1)
            # item is not in dictionary
            counts[item] = 1 ## O(1)
        else:
            counts[item] += 1 ## O(1)
            
    ##Complexity so far: O(N)
    mode_count = max ( counts.values() ) ## O(M)
    mode_vals = []
    
    for key in counts:  ## M keys, so O(M)
        if counts[key] == mode_count:
            mode_vals.append ( key )
    return mode_vals, mode_count


def modes_set(L):
    """ Overall complexity: O(N+N*M+Mlog M) """

    if len(L) == 0:
        return [], 0
    
    counts = []
    
    ##set(L) is O(N), finds M items
    for item in set(L):  ##execute for loop M times
        c = L.count(item)  ##O(N) for each item
        counts.append( (c, item) )
        
    ## O(N) + O(N*M)
    
    counts.sort( reverse=True ) ## O(M log M)

    mode_count = counts[0][0]
    mode_vals = []

    ## O(N/M) because about N/M items with a given frequency
    for (c,item) in counts:
        if c == mode_count:
            mode_vals.append( item )
        else:
            break

    return mode_vals, mode_count

def modes_list(L):
    """ Overall complexity: O(N log N + N) """

    if len(L) == 0:
        return [], 0

    L1 = L[:]
    L1.sort() ## O(N log N) 

    cur_val = L1[0]
    cur_count = 1
    mode_vals = []
    mode_count = 0

    for i in range(1,len(L1)): ## O(N)
        if L1[i] == cur_val:
            cur_count += 1
        else:
            if cur_count > mode_count:
                mode_vals = [cur_val]
                mode_count = cur_count  
            elif cur_count == mode_count:
                mode_vals.append( cur_val )

            cur_val = L1[i]
            cur_count = 1
            
    if cur_count > mode_count:
        mode_vals = [cur_val]
        mode_count = cur_count
    elif cur_count == mode_count:
        mode_vals.append ( cur_val )
        
    return mode_vals, mode_count

def time_alg(f, L):
    start = time.time()
    f(L)
    end = time.time()
    print "%s: %f seconds" %( f.__name__, end-start)
    
def random_list(N, divider):
    ## The number of distinct values (M) = N/divider
    L = []
    for i in range(N):
        x = random.randint(1, N/divider)
        L.append(x)
    return L

if __name__ == "__main__":
    L = [1,2,3,4,2,2,4,4,5,6]
    L2 = [1,2,3,4,2,2,2,4,4,5,6]
    
    print "Testing with different N, fixed M"
    print
    N = 1000
    divider = 10
    
    for i in range(3):
        L = random_list(N, divider)
        print "L has", N, "values", N/divider, "distinct values"
        time_alg(modes_dict, L)
        time_alg(modes_set, L)
        time_alg(modes_list, L)
        print        
        N *= 10
        divider *= 10
        
    print "Testing with different M (number of distinct values), fixed N"
    print

    N = 100000
    divider = 100
    
    for i in range(3):
        L = random_list(N, divider)
        print "L has", N, "values", N/divider, "distinct values"
        time_alg(modes_dict, L)
        time_alg(modes_set, L)
        time_alg(modes_list, L)
        print        
        divider *= 10
        

Table Of Contents

Previous topic

Transition to the next class - Data Structures

This Page