Code Developed in CSCI-1100

Lecture 1

Module: lec1_three_doubles — Finds three consecutive pairs of double letters

Find all words containing three consecutive pairs of double letters in a file of all English words located at:

Modules used: urllib

Author: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>

Returns: All words matching condition and the count of found words

Pseudo Code:

open the file from the web with all the words in English
 
for each word in the file:
    for all positions l in the word
        if letters at positions (l and l+1) and (l+2 and l+3) and
        (l+4 and l+5) are the same then
            output word and increment the count

Code:

""" Find all words containing three consecutive pairs of double letters 
in a file of all English words located at:

           http://thinkpython.com/code/words.txt

**Modules used:**  :py:mod:`urllib` 

**Author**: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>

**Returns:** All words matching condition and the count of found words

**Pseudo Code**::

   open the file from the web with all the words in English
    
   for each word in the file:
       for all positions l in the word
           if letters at positions (l and l+1) and (l+2 and l+3) and
           (l+4 and l+5) are the same then
               output word and increment the count

"""

__version__ = '1'


import urllib

def three_double(word):
    """ Returns True if the word contains three consecutive pairs of
    double letters and False otherwise. 
        
    """
    for l in range(len(word)-5):
        if word[l] == word[l+1] and \
           word[l+2]==word[l+3] and \
           word[l+4]==word[l+5]:
            return True
    return False

# Comments that fit in a single line can be put in this format.
# Anything after a single pound sign is ignored.
# Main body of the program starts here

word_url = 'http://thinkpython.com/code/words.txt'
word_file = urllib.urlopen(word_url)

count = 0
for word in word_file:
    word = word.strip().strip('\n')
    if three_double(word):
        print word
        count = count + 1
        
if count == 0:
    print 'No words found'
else:
    print count, 'words are found'

Lecture 4

Module: lec4_examples — Examples programs from Lecture 4

Code:

"""
Write a program to 
   read radius of a circle in float
   compute its area and
   print it.


"""

import math


radius = raw_input("Radius ==> ")


radius = float(radius)
## or
## radius = float(raw_input("Radius ==> "))
area = math.pi * radius**2


print "The area of a circle is:", area
print "The area of a circle is: %.2f" %area
"""
Write a program to
     read the first name and last name of a person
     print name in two ways:
     firstname lastname
     lastname, firstname

"""

fname = raw_input("First name ==> ")
print fname

lname = raw_input("Last name ==> ")
print lname

print fname + " " + lname

print fname, lname

print lname + ", " + fname

print lname, ",", fname

## formatted output
print "%s %s" %(fname, lname)

print "%s, %s" %(lname, fname)
"""
Write a program to read three values,
find and print their 
     average, 
     min and max,
     average of the smallest two values.

"""

val1 = float(raw_input("Value 1 ==> "))
val2 = float(raw_input("Value 2 ==> "))
val3 = float(raw_input("Value 3 ==> "))

avg = (val1+val2+val3)/3

print "The average value is: %.2f" %avg

minval = min(val1, val2, val3)
print "The min value is: %.2f" %minval

maxval = max(val1, val2, val3)
print "The max value is: %.2f" %maxval

avgmintwo = (val1+val2+val3 - max(val1,val2,val3))/2
print "Average of min two values is: %.2f" %avgmintwo
name2 = "Rensselaer Polytechnic Institute"

##a -> 1,  e->a, 1->e
name2 = name2.replace("a","1")
name2 = name2.replace("e","a")
name2 = name2.replace("1","e")

print name2

word = 'Bring back the swarm'

word = word.title()
word = word.replace(" ", "")
word = "#" + word
print word


word = 'Bring back the swarm'

word = "#" + (word.title()).replace(" ","")
print word

Lecture 5

Module: lec5_examples — Examples programs from Lecture 5

Code:

"""
Write a function that takes the name of a person
and places all wovels outside (in alphabetical order)

For example:

meeseek
eeeemsk

amos
aoms

This illustrates the use of a function that calls a function
and a function with multiple parameters.
"""

def put_outside_letter(word, letter):
    word = letter * word.count(letter) + word.replace(letter,"")
    return word


def put_outside(word):
    word = put_outside_letter(word,"u")
    word = put_outside_letter(word,"o")
    word = put_outside_letter(word,"i")
    word = put_outside_letter(word,"e")
    word = put_outside_letter(word,"a")
    return word

######################### Main body of code

inputword = raw_input("Enter a word ==> ")
outword = put_outside(inputword)
print outword
    
"""
Examples functions from the exercise
"""

import math

def frame_word(word):
    fstr = "*"*(len(word)+4) + "\n"
    fstr += "* " + word + " *\n"
    fstr += "*"*(len(word)+4)
    return fstr

def area_circle(radius):
    area = math.pi* radius**2
    return area



#################

print area_circle(2)
"""
Write a function that computes the length of
the hypotenuse of a triangle, given the lengths
of its legs (remember: A^2+B^2 = C^2)

Use this function to read the lengths of edges and
compute and print the hypothenuse.

Show the program structure and functions
"""

import math

def hypotenuse(a, b):
    c = math.sqrt( a**2 + b**2 )
    return c


################## main body of the program

aval = float(raw_input("Enter a ==> "))
bval = float(raw_input("Enter b ==> "))
cval = hypotenuse(aval,bval)
print "C value is %.2f" %cval

"""
Write a function that generates a thank you note
given the person who gave the gift and the gift.


"""


def send_thankyou(person, gift):
    outstr = "Dear %s,\n Thank you for your thoughtful gift of %s.\n" \
             "It was very nice of you to think of me.\n" \
             "My first weeks at Rensselaer have been crazy." %(person, gift)
    return outstr


########### main body of the program    
print send_thankyou("Uncle", "rocket ship")

Lecture 6

Module: lec6_examples — Examples programs from Lecture 6

Code:

"""
Write a function that generates a thank you note
given the person who gave the gift and the gift.

This is an example of a function with that returns
nothing. Notice how this function is called differently
than a function that returns a value.

"""


def send_thankyou(person, gift):
    outstr = "Dear %s,\n Thank you for your " \
        "thoughtful gift of %s.\n" \
        "It was very nice of you to think of me.\n" \
        "My first weeks at Rensselaer have been crazy." \
        %(person, gift)
    print outstr



def send_thankyou2(person, gift):
    outstr = "Dear %s,\n Thank you for your " \
        "thoughtful gift of %s.\n" \
        "It was very nice of you to think of me.\n" \
        "My first weeks at Rensselaer have been crazy." \
        %(person, gift)
    return outstr


########### main body of the program    

## calling function that returns nothing
send_thankyou("Uncle", "rocket ship")

print

## calling function that returns a string
print send_thankyou("Uncle", "rocket ship")

mins = int(raw_input("How long did you work out today (mins)? "))

print "You worked out", mins, "minutes"

if mins >= 60:
    print "You are crushing it!"
    print "Keep up the great work!"

if mins >= 20 and mins < 60:   #### if mins between 20 and 60
    print "That is great!"
    print "Great job!"

if mins < 20:
    print "You can benefit more from a longer exercise"

print "You can work even harder next time"

######################################
### Alternate equivalent solution  ###
######################################


mins = int(raw_input("How long did you work out today (mins)? "))

print "You worked out", mins, "minutes"

if mins >= 60:
    print "You are crushing it!"
    print "Keep up the great work!"
elif mins >= 20:   #### if mins between 20 and 60
    print "That is great!"
    print "Great job!"
else mins < 20:
    print "You can benefit more from a longer exercise"

print "You can work even harder next time"

Lecture 7

Module: lec7_examples — Examples programs from Lecture 7

Code:

"""Example of using tuples to return two things
   or make multiple assignment

"""

def return_two_things():
    return 'a', 2

########

x = return_two_things()
print x


name, cnt = return_two_things()
print name, cnt


## Swap two values using multiple assignment

a = 4
b = 3
print a,b

a,b = b,a  ## swap values of a and b

print a,b
"""
Purpose: Example image program

Reads an image, crops four equal sizes images
from it, pastes them into another image in 
different order and saves the resulting image

The image used in this program can be found on Piazza under
Resources->Resources
"""


from PIL import Image

im = Image.open("swarm1.jpg")

print "Size", im.size
print "Mode", im.mode
print "Format", im.format


im1 = im.crop((0,0,300,400))
im2 = im.crop((300,0,600,400))
im3 = im.crop((0,400,300,800))
im4 = im.crop((300,400,600,800))

imnew = Image.new("RGB", (600,800), "White")

imnew.paste( im3, (0,0) )
imnew.paste( im1, (300,0))
imnew.paste( im2, (0,400))
imnew.paste( im4, (300,400))

imnew.save("scrambled_swarm.jpg")

"""
This module prints boolean algebra tables
Save this in a file called ``truth_table.py``

"""

def print_and_table():
    """Prints the AND truth table. """
    print "C1 AND C2 (ex:1>2 AND 3>2)"
    print "TRUE AND TRUE = TRUE"
    print "FALSE AND TRUE = FALSE"
    print "TRUE AND FALSE = FALSE"
    print "FALSE AND FALSE = FALSE"
    
def return_or_table():
    """Returns the OR truth table. """
    str = "C1 OR C2 (ex:1>2 OR 3>2)\n"
    str += "TRUE OR TRUE = TRUE\n"
    str += "FALSE OR TRUE = TRUE\n"
    str += "TRUE OR FALSE = TRUE\n"
    str += "FALSE OR FALSE = FALSE"
    return str

Lecture 9

Module: lec9_examples — Examples programs from Lecture 9

Code:

"""
This is a general program for practicing different loop
methods. The following are examples of:

1. Loops that count up or down, loops that end
  
   Loop block   
   Printing (print all on one line)
   Changing a list (make upper case)

   Accumulate a value
      Count by 1, 3 (print three in a line, check
      for three letters in a word, ending conditions?)
      Is it true that there are two consecutive repeated letters?
      
      Count all farmer's markets in Albany
      
2. Loops that are undeterministic

   Depends on an external condition 
      (while user does not say stop)
      
------ WE will continue with these the next time
   Depends on a complex condition 
      (while found or end of list, farmer's market)
   

3. Double loops

   Find all possible pairs of agents
   Find pairs of agents with the same first letter in their name


"""

months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', \
          'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

agents = ['skye','may','fitz','simmons','caulson',\
          'hunter', 'mack', 'morse', 'triplett', \
          'hartley', 'deathlok', 'koenig', \
          'gonzales', 'fury']

word = 'bookkeeper'


i=0
while i< len(months):
    print "Month: %d is %s" %(i+1, months[i].capitalize() )
    i += 1

print months

print
print agents

## Capitalize the values in list "agents"

i=0
while i < len(agents):
    agents[i] = agents[i].capitalize()
    i += 1


## Check if capitalized
print
print agents


##Count how many agents have first names that start with S
##Accumulation of values

i = 0
cnt = 0
while i < len(agents):
    if agents[i][0] == 'S':
        cnt += 1
    i += 1

print "%d agents with first name starts with 'S'" %cnt

##Find if there any agents who name starts with F
##Print Yes if any agent with such name, No otherwise

i = 0
found = False
while i < len(agents):
    if agents[i][0] == 'F':
        found = True
    i += 1

if found:
    print 'Yes'
else:
    print 'No'


print
print "This is a loop that counts up to 7"
i = 0
while i < 7:
    i += 1
    print i
    
print 
print "This is a loop that counts 10 down to 1"
    
i=10
while i > 0:
    print i
    i -= 1
    
print
print "This is loop that counts by 2, starting with 1 and ending with 19"
i = 1
while i <= 20:
    print i
    i += 
    
"""
The Farmer's market CSV file is available on Piazza under 

Resources->Resources->lec9overview.zip

Get list of Farmer's Markets, each market has the 
following fields:

County	Market Name	Location	
Address Line 1	City	State	Zip	Contact	Phone	
Market Link	Operation Hours	Operation Season	
FMNP	Operating Months	
Latitude	Longitude	EBT/SNAP	
Location Points

"""


def get_markets():
    markets = []
    i = 0
    for line in open('Farmers_Markets.csv').read().split("\r"):
        i += 1
        if i == 1:
            continue
        m = line.strip().split(",")
        markets.append(m)
    return markets


## markets is a list of lists
markets = get_markets()
print markets[0]
print markets[1]

county = raw_input("Input a county ==> ")

county = county.capitalize()
## Find and print all market names and address in 
## Rensselaer County
## Display number of found markets

i = 0
cnt = 0
while i < len(markets):
    if markets[i][0] == county:
        print markets[i][1]
        cnt += 1
    i += 1
    
print "%d markets found in %s" %(cnt, county)
""" This program checks given a word, whether it has
one or two double consecutive letters. This is an example
of how to iterate over the letters of a word using a while
loop.

"""


word = 'bookkeeper'


##Checks for single double letter
i = 0
found = False
while i < len(word)-1:
    if word[i] == word[i+1]:
        found = True   
    i += 1
    
if found:
    print "The word has a single double letter"
    
word = 'ballon'

##Checks for double double letter
i = 0
found = False
while i < len(word)-3:
    if word[i] == word[i+1] and \
       word[i+2] == word[i+3] :
        found = True   
    i += 1
    
if found:
    print "The word has a double double letter"
""" This program shows how to write a loop that ends
on a given user input. So, we must make sure that
while loop executes once initially and also that we 
set the conditions that would stop the loop manually
when the correct input is given.

"""




### write a loop that reads user input, until the 
### user types stop

finished = False
while not finished:
    cmd = raw_input("Enter a command (stop to stop) => ")
    if cmd == 'stop':
        finished = True

Lecture 10

Module: lec10_examples — Examples programs from Lecture 10

Code:

"""
Overview of Lecture 10, understanding the difference between 
list aliasing and copying

Simple value: int, float, string, Boolean, tuple

Container: list (set and dictionary)

a = b

If b has a simple value, then we make a copy
of its value and assign to a

If b is a list (or any container), a is an
alias to b [they point to the same object]

If variable b is passed as an argument to a
function, if b has a simple value, then we
make a copy of its value

Otherwise, we create an alias to the same
object

The following functions create a new list (they are not aliases):

Concatenation: x+y
Replication:   x*10
Shallow Copy:  list(x)
Slicing: x [i:j]

The copy is shallow! This means that only the first level of 
values are copied in the list.
""" 


## Example 1: Since each animal is a string, the value is copied into
## variable a in the for loop! Hence, capitalizing a does not change
## the value in the list

animals = ['cat','dog','pig','unicorn','dolphin','fish','rabbit']

for a in animals:
    a = a.capitalize()
    print a

print animals


## Example 1: Since each value in the list is also a list, the variable a becomes an
## alias for each sublist in the for loop. Changing a in the loop also changes the
## values in the list

values = [ [1,2], [3,4] ]

for a in values:
    a.append(100)
    print a
    
print values
""" This program illustrates the use of functions that take
    as input a list and while loops that end on an external condition

"""

def cap_value(mystr):
    mystr = mystr.capitalize()
    return mystr

def arrange_values(mylist):
    """When we sort in the function, the actual list is also
       sorted since mylist is an alias to the argument used.

    """
    mylist.sort()


myvals = []

finished = False
while not finished:   ## an indefinite while loop, continues until an outside condition is met
    newval = raw_input("Enter a value (stop to end) => ")
    if newval == 'stop':
        finished = True
    else:
        newval = cap_value(newval)  ## pass by value since string is a simple value, a return is needed
        
        myvals.append(newval)
        arrange_values(myvals)  ## pass by alias, the list is modified in the function without a return
        print myvals
        

Lecture 11

Module: lec11_examples — Examples programs from Lecture 11

Code:

""" Example functions from Lecture 11 on if statements

"""

import math

def is_intersect(x0,y0,r0,x1,y1,r1):
    """ Returns true if two circles intersect. 
        Circle 1 is at (x0,y0) with radius r0
        Circle 2 is at (x1,y1) with radius r1
        Check the distance between the centers, it should 
        be less than the sum of radii
 
    """

    dist = math.sqrt( (x0-x1)**2 + (y0-y1)**2 )
    if dist <= r0+r1:
        return True
    else:
        return False
    

def find_bigger_semester(sem1, sem2):
    """ Given two semesters as tuples of semester and year
        return the semester that is later in the year.

    """
    
    s1, y1 = sem1
    s2, y2 = sem2
    if y1 < y2:
        return sem2
    elif y2 < y1:
        return sem1
    elif s1 == 'Fall' and s2 == 'Spring':
        return sem1
    elif s2 == 'Fall' and s1 == 'Spring':
        return sem2
    else:
        return s1 ##they are the same, return any one
    
    
"""
Code to compare three values, a,b,c
"""
    
def find_ordering(a,b,c):
    """ Orders three values using nested if statements """
    ## a is the smallest value
    if a == min(a,b,c):
        if b<= c:
            print "a, b, c"
        else:
            print "a, c, b"
    elif b == min(a,b,c):
        if a <= c:
            print "b, a, c"
        else:
            print "b, c, a"
    else:  ## c is the min value
        if a <= b:
            print "c, a, b"
        else:
            print "c, b, a"
    
def find_ordering2(a,b,c):
    """ Orders three values using if/elif statements,
        all possible orderings are: abc, acb, bac, bca, cab, cba 

    """

    if a <= b <= c: ## this is Python syntax, would not work in all languages
        print "a, b, c"
    elif a <= c <= b:
        print "a, c, b"
    elif b <= a <= c:
        print "b, a, c"
    elif b <= c <= a:
        print "b, c, a"
    elif c <= a <= b:
        print "c, a, b"
    else:
        print "c, b, a"

            
print find_bigger_semester( ('Fall', 2013), ('Spring', 2014))
""" This program illustrates the use of randomness in programs
    A drunk is in a platform and moves randomly left or right
    (half the time to the left, and half the time to the right)
    We simulate this until the drunk falls off the platform

    We also illustrate program structure

    Pseudo code:

    Read the length of a platform
    Put the drunk in the middle of the platform
    Print the platform with the man in it
    
    while the drunk is still in the platform:
        randomly move the drunk 
        print the platform

"""

### import statements
import random

### function definitions here
def print_platform(iteration, location, length):
    left = location-1
    right = length - location
    print ("%4d " %iteration) +\
          "-"*left + "X" + "-"*right
    #raw_input("   <enter> ")

    
### The part below is not executed when we import this program
### but it will execute when we run the program
if __name__ == "__main__":
    
    n = int(raw_input("Enter length of platform => "))
    
    loc = n/2
    iteration = 1
    while loc > 0 and loc <= n:
        print_platform(iteration, loc, n)
        next = random.random()
        if next > 0.5:
            loc += 1
        else:
            loc -= 1
        iteration += 1
            

Lecture 12

Module: lec12_examples — Examples programs from Lecture 12

Code:

""" Finds the closest points of a list by finding 
    the pair with the smallest distance
    
"""

import math

def find_distance(point1, point2):
    """Returns the Cartesian distance from two points
    point1, point2 are (x,y) tuples"""

    x1,y1 = point1
    x2,y2 = point2
    distance = math.sqrt( (y2-y1)**2 + (x2-x1)**2 )
    return distance


if __name__ == '__main__':
    # Collection of coordinate points
    points = [ (1,5), (13.5,9), (10,5), (8,2), (16,3) ]
    
    # Show this list to the user
    print 'All points:', points
    
    # 1. Set a default "minimum" value
    # Best option: the distance between the first two points
    min_distance = find_distance(points[0], points[1])
    
    # 2. Store the points that created the min_distance
    # For now, it is the first 2 points.
    closest_points = (points[0], points[1])
    
    # 3. Compare all points to each other
    
    # For each point...
    for p in points:
        # For every other point...
        for q in points:
            if p != q:
                # Calculate the distance
                dist = find_distance(p,q)
                # Is it smaller than our current min_distance?
                if dist < min_distance:
                    # If so, update the min_distance!
                    min_distance = dist
                    # And update the closest_points!
                    closest_points = (p,q)
                    
        # end inner for (ready to move on to the next point)
    # end outer for (finished comparing all points)
    
    # Now, min_distance and closest_points hold our answer!
    print "\nMinimum distance is: %.2f between points %s" \
          % (min_distance, closest_points)
    
    ## You can modify the for loop to avoid comparing the same
    ## two coordinates more than once by using ranges.  Try it!
""" Practice using ranges to iterate through a list.
    Now you have an index to find your place!

"""


if __name__ == '__main__':
    planets = ['Mercury', 'Venus', 'Earth',
               'Mars', 'Jupiter', 'Saturn', 'Uranus',
               'Neptune', 'Pluto']
    
    print "PLANETS GOING FORWARD:"
    for i in range(len(planets)):
        print "%d: %s" % (i+1, planets[i])
        
    print "\nPLANETS GOING BACKWARD:"
    for i in range(len(planets)-1, -1, -1):
        print "%d: %s" % (i+1, planets[i])
        
    print "\nEVERY OTHER PLANET:"
    for i in range(0, len(planets), 2):
        print "%d: %s" % (i+1, planets[i])
        

    ## New example:
    ## Use the indexes to print the planets' neighbors
    print "\nNEIGHBORS"
    for i in range(1, len(planets)-1):
        print "%s >>>> %s >>>> %s" % (planets[i-1], planets[i], planets[i+1])

        ## Do you see why the range has to be set from 1 to len-1?
""" Compares the weights of two rats and finds the 
    first day on which Rat 1 is heavier than Rat 2

"""

# Weights of two rats
L1 = [3.7, 4.1, 4.9, 5.4, 6.0]
L2 = [4.5, 4.7, 5.1, 5.3, 5.9]

i = 0
while i < len(L1) and i < len(L2):
    if L1[i] > L2[i]:
        print "On day %d, Rat 1 (%.1f) is heavier than Rat 2 (%.1f)" \
            %(i, L1[i], L2[i])
        break   # Stops the loop on the first occurrence
    i += 1
    
print "\nOUTSIDE THE LOOP"

Lecture 13

Module: lec13_examples — Examples programs from Lecture 13

Code:

""" Example of reading and writing a file.

Remember: 
1. when reading lines, the newline at the end ofl line is also read
2. when writing lines, you must explicitly add a newline

"""

if __name__ == "__main__":
    f = open("census.txt")
    line1 = f.readline()  ## read only one line
    line2 = f.read()      ## read the rest of the file as a single string
    line3 = f.readline()  ## this should return empty string (end of file already)
    f.close()
    f = open("census.txt")
    line4 = f.readline()  ## we are back to the beginning of the file
    
    print "line1", line1
    print "line2", line2
    print "line3", line3
    print "line4", line4
    
    fout = open("census_out.txt", "w")
    fout.write(line1)  ## example writing to file
    fout.write(line2)  
    fout.write(line3)
    fout.write(line4)
    fout.close()
""" Reads each line of the census data, splits 
    and finds all information relevant to Albany.


"""


if __name__ == "__main__":
    
    f = open("census_data.txt")
    i = 0
    for line in f:
        m = line.strip().split("\t")
        i += 1
        if m[0].startswith('Albany'):
            print "%s: Population: %s" %(m[0], m[1])
""" Write a program to read scores.txt
    each line containing a score, and then
    print scores in decreasing order (with
    index on the left.)
    
    Algorithm:
    
    Read the file into a list
    Sort the list (reverse)
    Print the list

"""

if __name__ == "__main__":
    
    nums = []
    f = open("scores.txt")
    for line in f:
        num = int(line.strip())
        nums.append(num)   
    nums.sort(reverse=True)
    for i in range(len(nums)):
        print "%d: %d" %(i, nums[i])
""" This program illustrates how to read a line on the web as a file.
    Simply use the urllib.urlopen method instead of the open method of a file.
    Reading is identical to the one for files.


"""
import urllib

def print_line(line):
        print line


if __name__ == "__main__":
        f = urllib.urlopen("http://www.cs.rpi.edu/~sibel/csci1100/fall2015/_sources/course_notes/lec13_files_web.txt")

        i = 0
        for line in f:
                print line.strip()
                i += 1
                if i==10:
                        break

Lecture 14

Module: lec14_examples — Examples programs from Lecture 14

Code:

""" Simple program for reading through a file (in this case imdb file
containing the list of actors, movies and year, and finding all the
movies by an actor. The file can be found under Resources in Piazza.

Note. The file is very large, 200K+ lines.

"""

if __name__ == "__main__":
    name = raw_input("Enter an actor => ")

    for line in open("imdb_data.txt"):
        m = line.strip().split("|")
        actor = m[0].strip()
        
        if actor == name:
            print m[1].strip()
""" This program illustrates the use of sets to find the number of
actors in the data file "imdb.txt".  In particular, the use of sets
here is a big efficiency save, as checking  

value in set

is significantly faster than checking value in list.

"""

if __name__ == "__main__":
    
    f = open("imdb_data.txt")
    
    actors = set([])
    
    lineno = 0
    
    for line in f:
        lineno += 1
        ## This is just to illustrate how quickly the program is running
        if lineno %1000 == 0:
            print "Line no", lineno
            
        m = line.strip().split("|")
        actor = m[0].strip()
        movie = m[1].strip()
        year = int(m[2])

        actors.add ( actor ) 
 
    print "%d actors are found" %(len(actors))

Lecture 15

Module: lec15_examples — Examples programs from Lecture 15

Code:

"""Use hanks.txt, find for each actor
   how many movies they have starred in.

   This solution does not use dictionaries
   We are looking at complexity of solutions:

   Solution 1: Uses a list of lists for each actor
   nummovies = [ [actor, numberofmovies], ...]

   Algorithm:
   for each line in the movie file:
         find the index of the list that has
         the current actor
         
         add 1 to the number of movies
         
   Analysis:
   Searching if an actor is in the list may require
   checking each entry in the list, (linear time O(n))
   
   This list look up is repeated for each line in the movie
   If m lines in the movie, total time roughly:  O(n*m)

   Note that number of actors is similar to number of lines
   as each movie has few actors, so if n=m, we get O(n^2),
   a quadratic solution.

   Check to see that this is a very slow program.

"""

def parse_line(line):
    m = line.strip().split("|")
    actor = m[0].strip()
    movie = m[1].strip()
    year = int(m[2])
    return actor, movie, year

def find_actor(nummovies, a):
    for i in range(len(nummovies)):
        if nummovies[i][0] == a:
            return i
    nummovies.append ( [a, 0] )
    return len(nummovies)-1

if __name__ == "__main__":
    nummovies = []
    
    ##open the file
    f = open("hanks.txt")
    
    i = 0
    
    ##read line by line
    for line in f:

        ##debugging code to see the progress of the program
        i += 1
        if i%10000 == 0:
            print "Line", i
            
        ##find actor, movie, year
        a,m,y = parse_line(line)
        
        ## find actor index
        idx = find_actor(nummovies, a)

        ## add 1 for movies
        nummovies[idx][1] += 1
        
    name = raw_input("Enter an actor => ")
    idx = find_actor(nummovies, name)
    print "%s starred in %d movies" \
          %(name, nummovies[idx][1])
"""Use hanks.txt, find for each actor
   how many movies they have starred in.

   This is a new solution that uses dictionaries

   actors: dictionary
       key: name of actor
       value: number of movies by this actor
   
   for each line in the movie file:
         if actor is not in keys, add him/her with zero movies
         add 1 to the number of movies for this actor

   Complexity analysis:
   Finding if an actor is in the key of a dictionary (actor in actors)
   is constant time, does not depend on the number of actors in the 
   dictionary. So, O(1)

   We repeat this for each line of the file, so for n lines we get 
   O(1*n) = O(n) algorithm. Compare with the lec15_ex1.py. This is a
   much cheaper algorithm. It will run much faster.

"""

def parse_line(line):
    m = line.strip().split("|")
    actor = m[0].strip()
    movie = m[1].strip()
    year = int(m[2])
    return actor, movie, year


if __name__ == "__main__":
    actors = {}
    
    ##open the file
    f = open("imdb_data.txt")
    
    i = 0    
    ##debugging code to see the progress of the program
    for line in f:
        i += 1
        if i%10000 == 0:
            print "Line", i
            
        a,m,y = parse_line(line)
        
        ## find actor index
        if a not in actors: ##if this is the first time we see this actor
            actors[a] = 1
        else:  ## actor key already exists
            actors[a] += 1
        
    name = raw_input("Enter an actor => ")
    if name in actors: ## set look up
        print "%s starred in %d movies" %(name, actors[name])

"""Use hanks.txt, find for each actor
   how many movies they have starred in,
   also print the movies.

   This is a slight variation on lec15_ex2.py
   We will still use a dictionary, but also store
   the set of movies for each actor as value.

   actors: dictionary
       key: name of actor
       value: set of movies by this actor
   
   for each line in the movie file:
         if actor is not in keys, add him/her with zero movies
         add 1 to the number of movies for this actor

   Complexity:
   Finding a key in dictionary: O(1)
   Adding a value to a set: O(1)

   We repeat both for each line of the file, if n lines
   total complexity is still O(n)

"""

def parse_line(line):
    m = line.strip().split("|")
    actor = m[0].strip()
    movie = m[1].strip()
    year = int(m[2])
    return actor, movie, year


if __name__ == "__main__":
    actors = {}
    
    ##open the file
    f = open("imdb_data.txt")
    
    ##read line by line
    for line in f:
        a,m,y = parse_line(line)
        
        ## find actor index
        if a not in actors:
            actors[a] = set()
        actors[a].add(m)
        
    name = raw_input("Enter an actor => ")
    if name in actors: ## set look up
        print "%s starred in %d movies" %(name, len(actors[name]))
        print "Movies"
        for m in actors[name]:
            print m

Lecture 16

Module: lec16_examples — Examples programs from Lecture 16

Code:

"""

Time Complexity review:

Constant time, O(1)
    does not depend on the size of the data
    (list/dictionary/set)
    
    value in set
    key in dictionary
    append to a list
    add a value to a set
    
    
Linear time, O(n)
    depends linearly on the data, twice the
    size of the data, twice the length of time the program will take
    
    value in list
    

Quadratic time, O(n^2)
    Some sort of double loop, scales quadratically, 
    twice the data, 4 times slower for example
    
    Double loops over n items are quadratic. Example:
    
    for i in range(n):
        for j in range(n):
            print i,j
    This is O(n^2) complexity
    
Worse stuff:

    O(n^3): involves triple loop
    Lots of matrix operations are cubed complexity and
    are very costly


Space complexity:

    There is an equivalent notion of space complexity, how much
    data you keep in memory. For fast programs, it is equally important
    not to keep unnecessary data or make multiple copies. You will
    see this in future classes.

"""
""" Examples of more complex dictionaries
    in which value can be different things.

    This program shows how to take one dictionary
    with set of values as keys, and create a 
    new dictionary where the keys are values from the
    first dictionary.


"""


if __name__ == "__main__":
    ### dictionary where
    ### key: name
    ### value: set of hobbies for that person

    characters = {}

    characters['Gru'] = set(['World domination','Dancing'])
    characters['Minion'] = set(['Floating', 'Dancing'])
    characters['Margo'] = set(['Dancing', 'World domination'])
    

    ## print information from characters in a line
    for person in sorted(characters.keys()):
        line = "%s: " %(person.capitalize())
    
        for hobby in sorted(characters[person]):
            line += "%s, " %hobby
        print line.strip().strip(",")
    
    
    ## we will create the reverse dictionary
    hobbies = {}
    ### key: a hobby from characters dictionary
    ### value: set of names with that hobby

    
    for name in characters:
        for hobby in characters[name]:
            if hobby in hobbies:
                hobbies[hobby].add(name)
            else:
                hobbies[hobby] = set([name])
        
    print hobbies
""" Example of using an API that returns a JSON
    object.

    To load a string containing a json object into a Python object
    use json.loads()

    >>> x = json.loads('{"a": 1, "b": 2}')
    >>> x
    {u'a': 1, u'b': 2}

    Note: u'a' means that 'a' is a string encoded in Unicode

    The reverse operation will take a Python object, and create
    JSON string representation of it.

    >>> json.dumps([ {'a':1, 'b':2}, [1,2] ])
    '[{"a": 1, "b": 2}, [1, 2]]'


"""
import urllib
import json

if __name__ == "__main__":
    ## Find the bounding box of an adddress, in this case Troy, NY
    url = "http://nominatim.openstreetmap.org/search?q=Troy, NY&format=json&"\
        "polygon_geojson=1&addressdetails=0"

    content = (urllib.urlopen(url)).read()
    
    content = json.loads(content)

    print content[0]['boundingbox']

    ### Find photos in Troy, NY given the bounding box of 
    ### latitude and longitude
    url2 = "http://www.panoramio.com/map/get_panoramas.php?set=public&"\
        "from=0&to=5&minx=%s&miny=%s&maxx=%s&maxy=%s&size=medium&mapfilter=true" \
        %('-73.8517851','42.5684117','-73.5317851','42.8884117')

    content = (urllib.urlopen(url2)).read()

    content = json.loads(content)

    for photo in content['photos']:
        print photo['photo_url'] 
""" Create two dictionaries from IMDB for easy look up
    and ask for an actor or movie name repeatedly and
    print relevant info


    Dictionaries: 
        actors: key: name, value: set of movies
        movies: key: movie, value: set of actors in that movie

"""

def read_values():
    actors = {}
    movies = {}

    for line in open("imdb_data.txt"):
        m = line.strip().split("|")
    
        for i in range(len(m)):
            m[i] = m[i].strip() ## strip space of split values

        actor = m[0]
        movie = m[1]
    
        if actor in actors:
            actors[actor].add( movie )
        else:
            actors[actor] = set( [movie] )
     
        if movie in movies:
            movies[movie].add( actor )
        else:
            movies[movie] = set( [actor] )
    return actors, movies

if __name__ == "__main__":
    actors, movies = read_values()

    while True:
        cmd = raw_input("1 to search actor, 2 to search movie => ")
        if cmd not in ['1','2']:
            break
        if cmd == '1':
            actor = raw_input("Actor name => ")
            if actor in actors:
                print "Actor %s found" %actor
                for movie in actors[actor]:
                    print movie
        elif cmd == '2':
            movie = raw_input("Movie name => ")
            if movie in movies:
                print "Movie %s found" %movie
                for actor in movies[movie]:
                    print actor
        print

Lecture 17

Module: lec17_examples — Examples programs from Lecture 17

Code:

""" This program illustrates the use of dictionaries
    and sets to compute degree of Kevin Bacon up to 2.

    As a challenge, figure out how to make the degree 
    function work for any degree and loop to find the 
    next degree until no new actors found.

    Step 1:
    Create two dictionaries from IMDB for easy look up
    and ask for an actor or movie name repeatedly and
    print relevant info

    Dictionaries: 
        actors: key: name, value: set of movies
        movies: key: movie, value: set of actors in that movie

    Step 2: Ask for an actor

    1. If the actor is Kevin Bacon, return degree 0
    2. If the actor is in a movie with Kevin Bacon, return 1
    3. If the actor is in a movie with a degree 1 person (but
       is not Kevin Bacon or degree 1 actor), return 2
    Otherwise, return nothing (for now).

"""

def read_values():
    actors = {}
    movies = {}

    for line in open("imdb_data.txt"):
        m = line.strip().split("|")
    
        for i in range(len(m)):
            m[i] = m[i].strip() ## strip space of split values

        actor = m[0]
        movie = m[1]
    
        if actor in actors:
            actors[actor].add( movie )
        else:
            actors[actor] = set( [movie] )
     
        if movie in movies:
            movies[movie].add( actor )
        else:
            movies[movie] = set( [actor] )
    return actors, movies



def find_degree(inputactor, actors, movies):
    degree0 = set(['Bacon, Kevin'])
    if inputactor in degree0:
        return 0

    moviesfordegree0 = actors['Bacon, Kevin']

    degree1 = set([])    
    for movie in moviesfordegree0:
        actorset = movies[movie]
        degree1 |= actorset
    degree1 = degree1 - degree0

    if inputactor in degree1:
        return 1
    
    degree2 = set([])
    ## find movies degree1 actors were in
    allmovies = set([])
    for actor in degree1:
        allmovies |= actors[actor]

    ## find actors in degree1 actors' movies
    for movie in allmovies:
        degree2 |= movies[movie]        
        
    ## subtract degree 0 and degree 1
    degree2 = degree2 - (degree1  | degree0)
    if inputactor in degree2:
        return 2


if __name__ == "__main__":
    actors, movies = read_values()

    while True:
        actor = raw_input("Actor name (stop to end) => ")
        if actor == 'stop':
            break
        if actor not in actors:
            print "Actor not found"
        else:
            deg = find_degree(actor, actors, movies)
            if deg == None:
                print "Degree is above 2"
            else:
                print "Degree is", deg
""" Class example, a simple class of 2d objects 


"""

class Point2d(object):
    def __init__(self, x0, y0):
        """Initialize to make sure each point has an x, y value. """
        self.x = x0
        self.y = y0
        
    def length(self):
        """ Return the length of a point. """
        return (self.x**2 + self.y**2)**(0.5)
    
    def __str__(self):
        """ Returns the string representation of object. 
            Call as:
 
            str(x)
            print x  ##calls this function and prints the result string

        """
        return "(%d, %d)" %(self.x, self.y)
      
    def distance(self, other):
        """ Returns the distance between two points. """
        d = (self.x-other.x)**2 + (self.y-other.y)**2
        return d**(0.5)
      
    def __add__(self, other):
        """ Adds two points and returns a new point with the 
            addition of values. You can call this as:

            pt1.__add__(pt2)
            pt1+pt2
   
        """

        new = Point2d(self.x, self.y)
        new.x += other.x
        new.y += other.y
        return new
    
    def __sub__(self, other):
        """ Subtracts other from self, and returns a new point 
            containing the result. You can call this as:

            pt1.__sub__(pt2)
            pt1-pt2
   
        """
        new = Point2d(self.x, self.y)
        new.x -= other.x
        new.y -= other.y
        return new

if __name__ == "__main__":
    ##Test code here
    pt1 = Point2d(10, 20)
    pt2 = Point2d(3, 4)
    
    print pt1.x, pt1.y
    print pt2.x, pt2.y

    print pt1 ## calls the __str__ method
    print str(pt1) ## this is identical to the above call
    
    print "Length of pt1 is", pt1.length()
    print "Length of pt2 is", pt2.length()
    
    print "Distance between", pt1, "and", pt2, "is:", pt1.distance(pt2)
    print pt1  ##calls str
     
    pt3 = pt1+pt2
    print pt3

    print "Subtraction:", pt1-pt2
    print "Pt1:", pt1, "Pt2:", pt2
    print "Add/Subtract do not change the input objects"

Lecture 18

Module: lec18_examples — Examples programs from Lecture 18

Code:

""" Class for storing time.

"""

class Time(object):
    def __init__(self, hr, min, sec):
        """Store time internally as seconds. """
        if hr > 24:
            hr = hr%24        
        self.seconds = hr*60*60 + min*60 + sec
        
    def convert(self):
        """Convert seconds to hour, minute and seconds """

        hr = self.seconds/3600
        min = (self.seconds - hr*3600)/60
        sec = self.seconds - hr*3600 - min*60
        return hr, min, sec
        
    def __str__(self):
        """Print time as military time. """

        hr, min, sec = self.convert()
        return '%02d:%02d:%02d' \
               %(hr, min, sec)
    
    def __add__(self, other):
        """Add two time objects and return a new time object. """

        total = self.seconds + other.seconds
        hr = total/3600
        min = (total - hr*3600)/60
        sec = total - hr*3600 - min*60
        return Time(hr, min, sec)
    
    def __sub__(self, other):
        """ Subtract one time object from another. If negative,
            assume time is in the previous day.

        """

        total = self.seconds - other.seconds
        if total < 0:
            total += 24*3600
        hr = total/3600
        min = (total - hr*3600)/60
        sec = total - hr*3600 - min*60

        return Time(hr, min, sec)        

    def am_or_pm(self):
        """ Is Time before or after 12:00:00 """
        if self.seconds < 43200:
            return "AM"
        else:
            return "PM"

        
if __name__ == "__main__":
    time1 = Time(5,5,5)
    time2 = Time(12,0,0)
    print str(time1+time2)
    print str(time2-time1)
    print time2.am_or_pm()
    print time1.am_or_pm()
""" Class example, this file contains two classes: 
    Point1d for 1 dimensional objects
    Point2d for 2 dimensional objects 

     Distinguish between a class and a file it is saved in
     
     This file name: points.py
     Class name: Point1d, Point2d

     When importing into another program you can either use:

     1.

     import points
     x = points.Point2d(5,10)

     2.
     
     from points import Point2d
     x = Point2d(5,10)

"""

class Point1d(object):
    def __init__(self, x0, name):
        self.x = x0
        self.name = name
    def __str__(self):
        return "%s: %d" %(self.name, self.x)
    def __lt__(self, other):
        return self.x < other.x

class Point2d(object):
    def __init__(self, x0, y0):
        """Initialize to make sure each point has an x, y value. """
        self.x = x0
        self.y = y0
        
    def length(self):
        """ Return the length of a point. """
        return (self.x**2 + self.y**2)**(0.5)
    
    def __str__(self):
        """ Returns the string representation of object. 
            Call as:
 
            str(x)
            print x  ##calls this function and prints the result string

        """
        return "(%d, %d)" %(self.x, self.y)
      
    def distance(self, other):
        """ Returns the distance between two points. """
        d = (self.x-other.x)**2 + (self.y-other.y)**2
        return d**(0.5)
      
    def __add__(self, other):
        """ Adds two points and returns a new point with the 
            addition of values. You can call this as:

            pt1.__add__(pt2)
            pt1+pt2
   
        """

        new = Point2d(self.x, self.y)
        new.x += other.x
        new.y += other.y
        return new

    def haversine_distance(self, other):
        """ Haversine distance, in miles between two locations 
        with their latitude and longitude.
    

        """
        import math

        lat1 = self.y * math.pi / 180.0
        long1 = self.x * math.pi / 180.0
        lat2 = other.y * math.pi / 180.0
        long2 = other.x * math.pi / 180.0

        #  Now the real work.
        dlat = (lat1-lat2)
        dlong = (long1-long2)
        a = math.sin(dlat/2)**2 + \
            math.cos(lat1) * math.cos(lat2) * math.sin(dlong/2)**2
        c = 2*math.atan2( math.sqrt(a), math.sqrt(1-a) )
        R = 6371 / 1.609
        return R*c

    
    def __sub__(self, other):
        """ Subtracts other from self, and returns a new point 
            containing the result. You can call this as:

            pt1.__sub__(pt2)
            pt1-pt2
   
        """
        return Point2d( self.x-other.x, self.y-other.y )
    
    def __eq__ (self, other):
        return self.x == other.x \
            and self.y == other.y
        

if __name__ == "__main__":
    ##Test code here
    pt1 = Point2d(10, 20)
    pt2 = Point2d(3, 4)
    
    print pt1.x, pt1.y
    print pt2.x, pt2.y

    print pt1 ## calls the __str__ method
    print str(pt1) ## this is identical to the above call
    
    print "Length of pt1 is", pt1.length()
    print "Length of pt2 is", pt2.length()
    
    print "Distance between", pt1, "and", pt2, "is:", pt1.distance(pt2)
    print pt1  ##calls str
     
    pt3 = pt1+pt2
    print pt3

    print "Subtraction:", pt1-pt2
    print "Pt1:", pt1, "Pt2:", pt2
    print "Add/Subtract do not change the input objects"

    pttroy = Point2d(-73.69, 42.73)
    ptalbany = Point2d(-73.72, 42.70)
    print "Distance between Troy and Albany", pttroy.haversine_distance(ptalbany)
"""Illustrates the use of classes both defined in the same file
   as in Business and also imported from a different file, as in
   Point2d.

"""

from points import Point2d

class Business(object):
    def __init__(self, name, lat, lon, address, url, category, scores):
        self.name = name
        self.loc = Point2d(float(lon), float(lat))
        self.address = address.replace('+','\n\t')
        self.url = url
        self.category = category

        ## convert scores into integer
        for i in range(len(scores)):
            scores[i] = int(scores[i])
        self.scores = scores
        
    def avgscore(self):
        if len(self.scores) < 3:
            return sum(self.scores)/float(len(self.scores))
        else:
            s = list(self.scores)
            s.sort() ## we do not want to change the actual ordering of scores
            return ( sum(s[1:-1])/float(len(s)-2) )
            
    def __str__(self):
        return "%s\n\t%s\n\tLocation: %s\nAvg Score: %.1f" \
               %(self.name, self.address, self.loc, self.avgscore())
    

if __name__ == "__main__":
    
    f = open('yelp.txt')
    
    ## Create a list of business objects
    businesses = []
    for line in f:
        m = line.strip().split("|")
        b = Business(m[0],m[1],m[2],m[3],m[4],m[5],m[6:])
        businesses.append(b)
        
    
    finished = False
    while not finished:
        name = raw_input("Enter a business (stop to end) => ")
        if name == 'stop':
            finished = True
        else:
            found = False
            for b in businesses:
                if b.name == name:
                    print b
                    found = True
                    break ##no need to continue checking with other businesses
            if not found:
                print "Business is not found"

Lecture 19

Module: lec19_examples — Examples programs from Lecture 19

Code:

""" This module shows different programming styles
    for different basic problems.

    Note: short circuited if statements
    c1 and c2:  if c1 is false, no need to check c2
    c1 or c2:   if c1 is true, no need to check c2
    
    The problem is that you may not even get a syntax
    error until you are forced to evaluate c2
    a = 1
    b = 'q'
    d = 0

    if a>1 and 1/0 < 2:  ##normally tou should get division by zero
        print "Hello"
    
    if d>1 or int(b) < 2:  ##normally you should get an error for int('q')
        print "Hello"
    

"""

## Pattern 1: return as soon as you find out something
## If you reach the end of the function, then it must be True/False?

def find_valid(word):
    """ Return True if word contains at least one digit between 1 and 5
        False otherwise.
       
    """

    for letter in word:
        if letter in ['1','2','3','4','5']:
            return True
    ###if you arrive here, no letter in 1-5 range
    return False


## Pattern 2: keep a Boolean of what value you should return
## Note that this is less efficient because you may not need to
## check the rest of word, but you still do in this version.
       
def find_valid2(word):
    """ Return True if word contains at least one digit between 1 and 5
        False otherwise.
       
    """
    isvalid = False
    for letter in word:
        if letter in ['1','2','3','4','5']:
            isvalid = True
    ###if you arrive here, no letter in 1-5 range
    return isvalid


## Pattern 1: Now the problem is different. We cannot find
## if all words are between 1 and 5 looking at a single character
## but we can check the reverse, if the value is not between 1 and 5.

find_between(word):
    """ Return true if all digits in word are between 1 and 5,
        False otherwise.
       
    """
    for letter in word:
        if letter.isdigit() and letter not in ['1','2','3','4','5']:
            return False
    return True

## Pattern 2 for the same problem.

find_between2(word):
    """ Return true if all digits in word are between 1 and 5,
        False otherwise.
       
    """
    isvalid = True
    for letter in word:
        if letter.isdigit() and letter not in ['1','2','3','4','5']:
            isvalid = False
    return isvalid


if __name__ == "__main__":

    ## This is an example of a while loop that does not use break
    ## but if statements to decide when to end.

    finished = True
    while not finished:
        cmd = raw_input("Enter an integer between 1 and 5 (stop to end) => ")
        
        if cmd == 'stop':
            finished = True
        elif not cmd.isdigit():
            print "Please enter an integer between 1 and 5"
        elif 1<=int(cmd)<=5:
            print "Good value"
""" This program illustrates different patterns for 
    iterating through a list to keep track of values.

"""

def find_min_year(L):
    """Given a list L with pairs of year,value, 
       find a year (the first year in the list) 
       with the smallest value. 
       
    """
    
    ##set the initial value to a valid value
    minvalue = L[0][1] 
    minyear = L[0][0]
    for (year, val) in L:
        if val < minvalue:
            minvalue = val
            minyear = year
    return minyear 

def find_min_years(L):
    """Given a list L with pairs of year,value, 
       find the list of all years with the smallest value. 
       
    """
    
    ## initialize to a valid value
    minvalue = L[0][1]
    minyear = [ L[0][0] ]
    for i in range(1,len(L)):
        year, val = L[i]
        if val < minvalue:
            minvalue = val
            minyear = [year]
        elif val == minvalue: ##also add years with the same value
            minyear.append (year)
    return minyear, minvalue

def find_max_years(L):
    """Given a list L with pairs of year,value, 
       find the year with the largest value (same as above,
       put looks for large values). 
       
    """
    
    maxvalue = L[0][1]
    maxyear = [ L[0][0] ]
    for i in range(1,len(L)):
        year, val = L[i]
        if val > maxvalue:
            maxvalue = val
            maxyear = [year]
        elif val == maxvalue:
            maxyear.append (year)
    return maxyear, maxvalue


def find_allpairs(L):
    """Find all unique pairs of values. Do not match a value
       to itself, do not return the same pair twice:

       L=[1,2,3]

       return [(1,2), (1,3), (2,3)]

       do not return: (1,1), (2,2), (3,3), 
                      (2,1)** since (1,2) is already there
                      (3,1)** since (3,1) is already there
                      (3,2)** since (3,2) is already there

    """

    allpairs = []
    for i in range(len(L)-1):
        for j in range(i+1,len(L)):
            val1 = L[i]
            val2 = L[j]
            allpairs.append ( (val1, val2) )
    return allpairs


def find_threeletters(word,searchword):
    """Count how many times a sequence of three letters appear in
       word. See how we iterate only once, but stop 3 characters before
       the end of the word.
    
    """

    count = 0
    for i in range(len(word)-3):
        if word[i:i+3] == searchword:
            count += 1
    return count
    

if __name__ == "__main__":
    ## Test code here
    
    ## Temperature and snowfall values for Troy in December

    snowfall = [(1958, 8.6), (1959, 4.6), (1960, 12.0), (1961, 14.5),
                (1962, 14.0), (1963, 19.9), (1964, 8.5), (1965, 2.8),
                (1966, 24.5), (1967, 14.0), (1968, 16.0), (1969, 48.2),
                (1970, 36.2), (1971, 7.0), (1973, 10.0), (1974, 7.0),
                (1975, 13.0), (1976, 6.5), (1977, 9.1), (1978, 14.8),
                (1980, 5.6), (1982, 2.5), (1983, 3.0), (1984, 10.0),
                (1987, 4.0), (1988, 3.8), (1989, 0.5), (1990, 5.0), 
                (1991, 4.0),
                (1992, 4.3), (1993, 7.5), (1994, 1.5), (1995, 21.0),
                (1996, 6.5), (1997, 0), (1998, 2.5), (1999, 0), 
                (2000, 15.9),
                (2001, 6.5), (2002, 6.2), (2003, 30.1), (2004, 5.0),
                (2005, 6.4), (2006, 1.0), (2007, 17.8), (2008, 16.2),
                (2010, 0.5), (2011, 3.0), (2012, 6.0), (2013, 14.6),
                (2014, 5.8)] 
    
    meantemp = [(1956, 32.4), (1957, 34.1), (1958, 20.1), (1959, 31.2),
                (1960, 22.8), (1961, 28.8), (1962, 24.7), (1963, 20.7),
                (1964, 28.1), (1965, 31.7), (1966, 28.5), (1967, 30.7),
                (1968, 24.9), (1969, 24.4), (1970, 25.6), (1971, 32.1),
                (1972, 29.9), (1973, 30.7), (1974, 31.4), (1975, 25.9),
                (1976, 22.4), (1977, 26.6), (1978, 29.8), (1979, 32.5),
                (1980, 20.7), (1981, 29.5), (1982, 35.4), (1983, 26.0),
                (1984, 34.7), (1985, 25.6), (1986, 30.4), (1987, 31.9),
                (1988, 27.4), (1989, 14.4), (1990, 35.0), (1991, 30.1),
                (1992, 29.6), (1993, 28.4), (1994, 32.5), (1995, 24.4),
                (1996, 35.4), (1997, 31.1), (1998, 36.1), (1999, 31.2),
                (2000, 23.4), (2001, 36.4), (2002, 27.4), (2003, 29.3),
                (2004, 28.6), (2005, 27.8), (2006, 37.3), (2007, 28.1),
                (2008, 30.2), (2010, 26.0), (2011, 35.4), (2012, 33.8),
                (2013, 27.9), (2014, 32.8)] 
    
    ## Example DNA sequence
    sequence = "ATCACTGTAGTAGTAGCTGGAAAGAGAAATCTGTGACTCCAATTAGCCA" \
        "GTTCCTGCAGACCTTGTGAGGACTAGAGGAAGAATGCTCCTGGCTGTTT" \
        "TGTACTGCCTGCTGTGGAGTTTCCAGACCTCCGCTGGCCATTTCCCTAG" \
        "AGCCTGTGTCTCCTCTAAGAACCTGATGGAGAAGGAATGCTGTCCACCG" \
        "TGGAGCGGGGACAGGAGTCCCTGTGGCCAGCTTTCAGGCAGAGGTTCC" \
        "TGTCAGAATATCCTTCTGTCCAATGCACCACTTGGGCCTCAATTTCCCTT" \
        "CACAGGGGTGGATGACCGGGAGTCGTGGCCTTCCGTCTTTTATAATAGG" \
        "ACCTGCCAGTGCTCTGGCAACTTCATGGGATTCAACTGTGGAAACTGCAA" \
        "GTTTGGCTTTTGGGGACCAAACTGCACAGAGAGACGACTCTTGGTGAGAA" \
        "GAAACATCTTCGATTTGAGTGCCCCAGAGAAGGACAAATTTTTTGCCTACC" \
        "TCACTTTAGCAAAGCATACCATCAGCTCAGACTATGTCATCCCCATAGGGA" \
        "CCATTGGCCAAATGAAAAATGGATCAACACCCATGTTTAACGACATCAATA" \
        "TTTATGACCTCTTTGTCTGGATGCATTATTATGTGTCAATGGATGCACTGC" \
        "TTGGGGGATCTGAAATCTGGAGAGACATTGATTTTGCCCATGAAGCACCA" \
        "GCTTTTCTGCCTTGGCATAGACTCTTCTTGTTGCGGTGGGAACAAGAAATC" \
        "CAGAAGCTGACAGGAGATGAAAACTTCACTATTCCATATTGGGACTGGCG" \
        "GGATGCAGAAAAGTGTGACATTTGCACAGATGAGTACATGGG"
    
    print "Min"
    print "Snowfall", find_min_years(snowfall)
    print "Mean temperature", find_min_years(meantemp)
    
    print "Max"
    print "Snowfall", find_max_years(snowfall)
    print "Mean temperature", find_max_years(meantemp)    
    print "All pairs for [1,2,3,4]", \
          find_allpairs([1,2,3,4])
    
    print "ACA", find_threeletters(sequence, 'ACA')
from PIL import Image

def copy_image(fname,copytype):
    """ We will copy an image from a given file to a new image
        and return the image object.
        
        copytype is one of: same, right, down 
        add your versions!

    """

    im = Image.open(fname)
    pix = im.load()  ##get an array of pixels for the image
    w,h = im.size
    
    newim = Image.new("RGB", (w,h), "white") ## create a blank image
    ##of the same size

    newpix = newim.load() ##get an array of pixels for the new image

    ##now copy the pixels from one image to the next
    for i in range(w):
        for j in range(h):
            if copytype == 'same':
                newpix[i,j] = pix[i,j]
            elif copytype == 'right':
                newpix[w-i-1,j] = pix[i,j]
            elif copytype == 'down':
                newpix[i,h-j-1] = pix[i,j]

    ##return the new images
    return newim


if __name__ == "__main__":
    newim = copy_image('bolt.jpg','same')
    newim.show()

    newim = copy_image('bolt.jpg','right')
    newim.show()

    newim = copy_image('bolt.jpg','down')
    newim.show()

Lecture 20

Module: lec20_examples — Examples programs from Lecture 20

Code:

"""
Problem: Find the index of the two smallest values
We will also learn how to time running time of algorithms
using the time module

Algorithm:

     Idea 1:
     Make a copy of list
     Sort the copy
     Find the two smallest values (index 0,1)
     Find the index of these values**
     
     
     Idea 2:
     Initialize two smallest values to 0,1
     Then, iterate through list
     and remember the smallest two values
     and their index
     
     Idea 3:
     Make a list of value, index
     Sort the list
     Return the index for the first two
     
     Idea 4: (implement this yourself)
     Make a copy of list
     find min in copy
     find index of min in copy
     remove min from list copy
     find next min in copy
     find index of min in the next copy

"""

import random
import time

def smallest_two1(L):
    """ Assume n items in List L. 
    Complexity:  O(nlogn + 3n) =mostly costly element= O(nlogn)
        
    """
    
    L1 = list(L)  ### O(n): read and append to new list for n elements
    L1.sort()     ### O(nlogn): sorting: we will see this in sorting lecture
    min1,min2 = L1[0], L1[1]  ## O(1)
    i1 = L.index(min1)  ### O(n): compare against every element in worst case
    i2 = L.index(min2)  ### O(n): compare against every element in worst case
    if i2 == i1:
        i2 = L.index(min2, i1+1)
    return i1, i2

def smallest_two2(L):
    """ Assume n items in List L.
        Complexity: O(n)

    """
    if L[0] < L[1]:
        i1, i2 = 0,1
    else:
        i1, i2 = 1,0
    for i in range(2,len(L)):  ### O(n)
        if L[i] < L[i1]:
            i1, i2 = i, i1
        elif L[i] < L[i2]:
            i2 = i
    return i1, i2

def smallest_two3(L):
    """ Assume n items in List L.
        Complexity: O(n+nlog n) =mostly costly element= O(nlogn)
        Note: Compared to smallest_two3, we are sorting a more complex list
              (list of 2-tuples). So efficiency will depend on the implementation
              of that sort

    """

    L1 = []
    for (i,val) in enumerate(L):  ## O(n): read and append each element
        L1.append( (val, i) )
    L1.sort()      ## O(nlogn): sorting, we will see why
    return L1[0][1], L1[1][1]

if __name__ == "__main__":
    print "Test cases"
    
    L = range(1000)
    random.shuffle(L)

    start = time.time()
    a,b = smallest_two1(L)
    end = time.time()
    print "smallest_two1 took %f seconds" %(end-start)

    start = time.time()
    a,b = smallest_two2(L)
    end = time.time()
    print "smallest_two2 took %f seconds" %(end-start)
   

    start = time.time()
    a,b = smallest_two3(L)
    end = time.time()
    print "smallest_two3 took %f seconds" %(end-start)

""" 

Nose test cases

Normal test cases:

[1,2,3,4] ** smallest values at the beginning
[3,2,1,4] ** smallest values in the middle
[4,3,2,1] ** smallest values at the end
[1,4,3]   ** odd length list
[4,2,1,1,5]  ** duplicate vaues


Edge cases (unusual cases, decide on what should be returned first!)

Small lists: [] , [1], [2,1]
Lists with things other than numbers: ['a','b'] **note this should work fine 
as long as there is a comparison between values


"""
import nose
from smallest_two import *

## test for the first function
def test_st1_1():
    assert smallest_two1([1,2,3,4]) == (0,1)
    
def test_st1_2():
    assert smallest_two1([3,2,1,4]) == (2,1)
    
def test_st1_3():
    assert smallest_two1([4,3,2,1]) == (3,2)

def test_st1_4():
    assert smallest_two1([1,4,3]) == (0,2)

def test_st1_5():
    assert smallest_two1([4,2,1,1,5]) == (2,3)
    

## test for the second function
def test_st2_1():
    assert smallest_two2([1,2,3,4]) == (0,1)
    
def test_st2_2():
    assert smallest_two2([3,2,1,4]) == (2,1)
    
def test_st2_3():
    assert smallest_two2([4,3,2,1]) == (3,2)

def test_st2_4():
    assert smallest_two2([1,4,3]) == (0,2)

def test_st2_5():
    assert smallest_two2([4,2,1,1,5]) == (2,3)
    
## test for the second function
def test_st3_1():
    assert smallest_two3([1,2,3,4]) == (0,1)
    
def test_st3_2():
    assert smallest_two3([3,2,1,4]) == (2,1)
    
def test_st3_3():
    assert smallest_two3([4,3,2,1]) == (3,2)

def test_st3_4():
    assert smallest_two3([1,4,3]) == (0,2)

def test_st3_5():
    assert smallest_two3([4,2,1,1,5]) == (2,3)
    
    
if __name__ == "__main__":
    nose.runmodule(exit=False)
"""
Input: a sorted list and a value

Find the index of value if value is in list
of if it is not, return the index of where 
value would be inserted to keep the list sorted.

"""
import random
import time

def search(L,val):
    """ Linear search, each item has to be searched. """

    for i in range(len(L)):  ## O(n)
        if L[i] >= val:
            return i
    return len(L)


def binsearch(L, val):
    """ Binary search: always look at the middle value
    of a list, then look at the middle value of the remaining
    list. You can do this at most O(log n) times, where
    log is base 2.

    """
    
    low = 0
    high = len(L)-1
    while low != high:
        mid = (low+high)/2
        #print low, high, mid
        #raw_input()
        if L[mid] < val:
            low = mid+1
        else:
            high = mid
    return low
    

if __name__ == "__main__":
    
    print "Time tests"
    k = 500000
    L = range(k)

    start = time.time()
    a = search(L, k+1)
    end = time.time()
    print "Linear search took", end-start, "seconds"


    start = time.time()
    a = binsearch(L,k+1)
    end = time.time()
    print "Binary search took", end-start, "seconds"

Lecture 21

Module: lec21_examples — Examples programs from Lecture 21

Code:

""" Sorting

Naive: O(n^2) => Insertion sort
Best: O(n logn) => Merge sort
    ** 
Internal sort: O(n logn) 
     ** but much faster as it is written in C and 
        compiled, instead of the interpreted version.

"""

import time
import random


def time_function(L, func):
    """ Illustrates how you can send a function as an argument
    to another function as well. Runs the function called func,
    and returns the time.

    """

    L1 = list(L)
    start = time.time()
    func(L1)
    end = time.time()
    print "Method: %s took %f seconds" \
          %((func.__name__).ljust(20), end-start)


def ins_sort(L):
    """ Total complexity:
    1 + 2 + ... + n-1 = n(n-1)/2
    Hence, this algorithm is O(n^2)
    
    
    Note: xrange iterates through the
    list without generating it explicitly
    (a bit cheaper than using range).
    """
    
    for i in xrange(1, len(L)):  
        ### i ranges between 1 and n-1
        val = L[i]
        j = i-1
        while j >= 0 and L[j] > val: ##at most i times
            L[j+1] = L[j]
            j -= 1
        L[j+1] = val


def merge(L1, L2):
    """ Assume L1 and L2 are sorted.
    Create a new list L that is the merged
    version of L1&L2.
    
    This is the efficient version of merge
    that does not modify the input lists, as pop 
    is costly, even though it is a constant time operation.

    """
    
    L = []
    i = 0
    j = 0
    while i < len(L1) and j < len(L2):
        if L1[i] < L2[j]:
            val = L1[i]
            L.append( val )
            i += 1
        else:
            val = L2[j]
            L.append( val )
            j += 1
    ## at this point, either L1 or L2 has run out of values
    ## add all the remaining values to the end of L.
    L.extend(L1[i:]) 
    L.extend(L2[j:])
    return L


def merge_with_pop(L1, L2):
    """ Assume L1 and L2 are sorted.
    Create a new list L that is the merged
    version of L1&L2.
    
    This is the easy version of merge, using pop.

    """
    
    L = []
    while len(L1) > 0 and len(L2) > 0:
        if L1[0] < L2[0]:
            val = L1.pop(0)
            L.append( val )
        else:
            val = L2.pop(0)
            L.append( val )
    L.extend(L1)
    L.extend(L2)
    return L
    
def merge_sort(L):
    """ Complexity: Every step of while loop:
        we merge lists which together would make up the
        input list. So, there are about O(n) comparisons
        and list insertions.
        
        For example, in the beginning, we have n lists,
        so we merge n/2 times lists of 1 items, each requiring
        about 2 insertions. Total cost 2*n/2=n.

        How many times the while loop is executed?
            We start with n lists
            At the next step, we have n/2
            We will keep halving, which we can do at most log n 
            times (log base 2 of n).

        Total cost:  O(n* log n)

    """

    L1 = []
    for val in L:
        L1.append( [val] )
    
    while len(L1) > 1:
        L2 = []
        for i in range(0, len(L1)-1, 2):
            Lmerged = merge( L1[i], L1[i+1] )
            L2.append( Lmerged )
            
        if len(L1)%2 == 1:
            L2.append( L1[-1] )
        L1 = L2
    return L1[0]


def merge_sort_with_pop(L):
    """ This is the version that uses the less efficient
    version of merge. See the time comparisons by running
    this program.
    
    The time complexity is the same, O(n log n), but
    has higher cost because of inefficient use of lists.

    """

    L1 = []
    for val in L:
        L1.append( [val] )
    
    while len(L1) > 1:
        L2 = []
        for i in range(0, len(L1)-1, 2):
            Lmerged = merge_with_pop( L1[i], L1[i+1] )
            L2.append( Lmerged )
            
        if len(L1)%2 == 1:
            L2.append( L1[-1] )
        L1 = L2
    return L1[0]


if __name__ == "__main__":
    k = 10000
    
    L = range(k)
    random.shuffle(L)
    
    time_function( L, ins_sort )
    time_function( L, merge_sort )
    time_function( L, merge_sort_with_pop )
    time_function( L, list.sort )
    

Lecture 22

Module: lec22_examples — Examples programs from Lecture 22

Code:

"""
Simply Tkinter program to show creation of an interface.
This is not the preferred method for creating a user interface
as it will be difficult to attach functions to the buttons.

"""

from Tkinter import *

root = Tk()

main_frame = Frame(root)
main_frame.pack()
top_frame = Frame(main_frame)
top_frame.pack(side=TOP)
bottom_frame = Frame(main_frame)
bottom_frame.pack(side=BOTTOM)

canvas = Canvas(top_frame, height=400, width=400)
canvas.pack()

canvas.create_oval(100,100,300,300)

button3 = Button(bottom_frame, text="Bottom Left")
button3.pack(side=LEFT)
button4 = Button(bottom_frame, text="Bottom Right")
button4.pack(side=RIGHT)

root.mainloop()

print "Hello"
"""
Sample TkInter program: Illustrates the use of a class to 
encapsulate all interface elements, use of buttons, canvases
and frames.

"""

from Tkinter import *

class MyApp(object):
    def __init__(self, parent):
        ## interface elements
        self.parent = parent
        self.main_frame = Frame(parent)
        self.main_frame.pack()
        self.top_frame = Frame(self.main_frame)
        self.top_frame.pack(side=TOP)
        self.bottom_frame = Frame(self.main_frame)
        self.bottom_frame.pack(side=BOTTOM)
        self.canvas = Canvas(self.top_frame, height=500, width=500)
        self.canvas.pack()
        self.drawbutton = Button(self.bottom_frame, text="Draw", \
                                 command=self.draw)
        self.drawbutton.pack(side=LEFT)
        self.clearbutton = Button(self.bottom_frame, text="Clear", \
                                  command=self.clear)
        self.clearbutton.pack(side=LEFT)
        self.increasebutton = Button(self.bottom_frame, text="Increase", \
                                     command=self.increase)
        self.increasebutton.pack(side=LEFT)
        self.decreasebutton = Button(self.bottom_frame, text="Decrease", \
                                     command=self.decrease)
        self.decreasebutton.pack(side=LEFT)
        self.quitbutton = Button(self.bottom_frame, text="Quit", \
                                 command=self.terminate )
        self.quitbutton.pack(side=RIGHT)

        ## other class attributes
        self.drawingfinished = True
        self.numcircles = 4


    def increase(self):
        self.numcircles *= 2

    def decrease(self):
        self.numcircles /= 2
        self.numcircles = max(self.numcircles, 4)

    def clear(self):
        self.canvas.delete("all")

    def draw(self):
        self.drawingfinished = False
        x,y = 250, 250
        radius = 200/self.numcircles
        for i in range(self.numcircles):
            rad = radius*(i+1)
            self.canvas.create_oval(x-rad,y-rad,\
                                        x+rad,y+rad)
            self.canvas.update()
            self.canvas.after(100)
        self.drawingfinished = True

    def terminate(self):
        if self.drawingfinished:
            self.parent.destroy()

if __name__ == "__main__":
    root = Tk()

    myapp = MyApp(root)

    root.mainloop()
    print "Finished the program"

Lecture 23

Module: lec23_examples — Examples programs from Lecture 23

Code:

"""
Recursion: Example functions

Basis/Stopping condition:
    Define when your function should stop 
    calling itself recursively

Inductive/Recursive step:
    Define how the function can compute its
    output by calling itself recursively and then
    use the result.

Example:

Recursive step:

factorial(n) =  factorial(n-1)*n

If you had the correct output for  factorial(n-1), 
you can multiply it with n to find the correct output.

"""

def blast(n):
    if n <= 0: ##basis step
        print "Blast off!"
    else: ## recursive step
        print n
        blast(n-1)
        print n
                

def factorial(n):
    if n == 1:  
        return 1
    else:
        x = factorial(n-1)
        return x*n

def factorial_iterative(n):
    """Many recursive functions can be written without recursion. """
    x = 1
    for i in range(n+1):
        x *= i
    return x


def fib(n):
    """Fibonacci sequence: Even though it is defined recursively
       this function would be much more efficient to write without
       recursion.

    """
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fib(n-1) + fib(n-2)


if __name__ == "__main__":
    ##Testing code here.
    val = int(raw_input("Number => "))
    print "Factorial of %d is %d" %(val, factorial(val))

    for i in range(10):
        print "fib(%d) = %d" %(i, fib(i))

Lecture 24

Module: lec24_examples — Examples programs from Lecture 24

Code:

"""
Problem: Find the mode of a list: the value that occurs most frequently

Refine requirements:

1. How do I break ties or should I return all values that occur most
   frequently?

2. What should I return for empty list?

3. What are the different values and is there a limited number of them?

   Expected number of distinct values in the list: m
   If m is constant or it can vary?
   Do we expect m to be as large as n (#items in the list)

4. What are the values? Integer or float?

   Which values are considered the same?

--------
Version 1.

Find mode, given:
    A list of integers, (m is not fixed/not a constant)
    I am looking for any value that occurs most frequently 
    (break ties arbitrarily)
    Return None if the list is empty

Possible solutions:
Note: For complexity, we care about the most costly/complex
step.

List solution:  O(n logn)

     Create a copy of the list:   O(n) 
     Sort a copy of the list:     O(n logn)
     Go through the list once, and keep track of 
        #times an item is seen and the most frequent item
        O(n)

Set solution:  O(n * m) 

     Find the set(L):   O(n)  =>   m distinct values

     For each distinct value  ( set(L)):   [repeat loop m times: O(n*m)]
         Count how many times the value occurs in L
         and keep track of the max   O(n)


Dictionary solution: O(n)

     Use a dictionary D: 
         key: a value in list
         value: count of that values
     For each item in list: O(n)
         update counts in D
     For each key in the dictionary: O(m)
         check the count and keep 
         track of the max

"""

import time
import random


def time_algorithm(L, func):
    start = time.time()
    val = func(L)
    end = time.time()
    print "%s took: %.4f seconds" %(func.__name__, end-start)

def mode_list(L):
    if len(L) == 0:
        return None

    L1 = L[:] ##make a copy of the list to work with
    L1.sort()

    last_val = L1[0]   ##keep track of counts of the current value
    last_count = 1
    max_val = None     ##keep track of the max count and value for it
    max_count = 0
    for i in xrange(1,len(L1)):
        val = L1[i]
        if val == last_val:
            last_count += 1
        else: ##we have seen a new value
            if last_count > max_count:
                max_count = last_count
                max_val = last_val
            last_val = val
            last_count = 1
            
    if last_count > max_count:
        max_count = last_count
        max_val = last_val
    return max_val


def mode_set(L):
    max_val = None
    max_count = 0
    for item in set(L): ##for each distinct item
        cur_cnt = L.count(item) ##find the count
        if cur_cnt > max_count: ##check if this is the current max count
            max_count = cur_cnt
            max_val = item
    return max_val


def mode_dict(L):
    D = {}
    for item in L:  ##iterate through the list and find counts
        if item not in D:
            D[item] = 1
        else:
            D[item] += 1
    max_val = None
    max_count = 0
    for key in D:  ##go through the counts to find the max
        cnt = D[key]
        if cnt > max_count:
            max_count = cnt
            max_val = key
    return max_val

            
if __name__ == "__main__":

    ## test code!, try [], [1,2,2,3], [3,3,2,1]
   
    ## timing of the algorithm
    ## We expect dictionary to be always cheapest
    ## Set solution is fast when m is very small, otherwise
    ## Set solution is similar to n^2 and the most expensive one.
    ## Create a random list given n/m
    n = 4000
    m = n/10   ##Try also n=10
    x = []
    for i in range(n):
        x.append( random.randint(1,m) )

    ## Test all the algorithms
    print "N: %d, M: %d" %(n,m)
    print
    time_algorithm(x, mode_list)
    time_algorithm(x, mode_set)
    time_algorithm(x, mode_dict)