Code Developed in CSCI-1100 Fall 2025

Lecture 1

Module: three_doubles — Finds three consecutive pairs of double letters

Code:

""" Find all words containing three consecutive pairs of double letters 
in a file of all English words located at:

        http://www.greenteapress.com/thinkpython/code/words.txt

**Modules used:**  :py:mod:`urllib` 

**Author**: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <cvstewart@gmail.com>

**Returns:** All words matching condition and the count of found words

**Pseudo Code**::

   open the file from the web with all the words in English
    
   for each word in the file:
       for all positions l in the word
           if the letters at positions (l and l+1) are the same
              and the letters at positions (l+2 and l+3) are the same
              and the letters at positons  (l+4 and l+5) are the same then
               output word and increment the count

"""

import urllib.request

def has_three_double(word):
    """
    Returns True if the word contains three consecutive pairs of
    double letters and False otherwise.         
    """
    for l in range(len(word)-5):
        if word[l] == word[l+1] and \
                word[l+2]==word[l+3] and \
                word[l+4]==word[l+5]:
            return True
    return False

# Comments that fit in a single line can be put in this format.

# The main body of the program starts here

"""
Assign the location of the words file and go get it.
"""
word_url = 'http://www.greenteapress.com/thinkpython/code/words.txt'
word_file = urllib.request.urlopen(word_url)

'''
Process each word in the file one by one, testing to see if it has
three consecutive doubles.  Print it and count it if it does.
'''
count = 0
for word in word_file:
    word = word.decode().strip()
    if has_three_double(word):
        print(word)
        count = count + 1

'''
After we've gone through all the words, output a final message based
on the number of words that were counted.
'''        
if count == 0:
    print('No words found')
elif count == 1:
    print("1 word found")
else:
    print(count, 'words were found')

Lecture 3

Module: lec03_surface_and_area — Find the surface and area of a cylinder

Code:

pi = 3.14159
radius = input("Radius: ")
radius =  float(radius)
height = input("Height: ")
height = float(height)
base_area = pi * radius ** 2
volume = base_area * height
surface_area = 2 * base_area + 2 * pi * radius * height
print("volume is", volume, ", surface area is", surface_area)

Lecture 5

Module: lec05_surface_and_area — Find the surface and area of a cylinder

Code:

'''
This is a program to calculate the surface area and
volume of a cylinder given a radius and a height.

Radius and height are in float and are user inputs.

Sample Execution:
Enter radius (float) => 12
Enter height (float) => 10
Surface area is: 1658.76
Volume is: 4523.89

9/18/2023
'''
import math

def area_circle(radius):
    '''
    This function returns the area of a circle with a given radius.
    
    radius is the input parameter
    '''
    area = math.pi * radius ** 2
    return area

def area_cylinder(h, r):
    '''
    Give a height h and radius r, return the surface area of a cylinder.
    '''
    cap_area = 2 * area_circle(r)
    rect_area = math.pi * 2 * r * h
    return cap_area + rect_area

if __name__ == "__main__":
    r = float(input("Enter radius (float) => "))
    h = float(input("Enter height (float) => "))
    print("Surface area is: {:.2f}".format(area_cylinder(h, r)))
    print("Volume is: {:.2f}".format(h * area_circle(r)))

Lecture 6

Module: lec06_rectangle — Does a given point fall within a rectangle

Code:

'''
Program to demonstrate the use of complex boolean expressions and if/elif/else
clauses. Determine whether a set of coordinates fall within a rectangle given
by the verticies (x0, y0), (x0, y1), (x1, y1), and (x1, y0)

Author: CS1 Staff
Date 9/21/2024
'''

'''
Initialize the rectangle
'''
x0 = 10
x1 = 16
y0 = 32
y1 = 45

'''
Get the target point
'''
x = input("x coordinate ==> ")
print(x)
x = float(x)
y = input("y coordinate ==> ")
print(y)
y = float(y)

'''
If the x coordinate matches x0 or x1 and we are within the y range, we are
on the boundary. Similarly, if the y coordinate matches y0 or y1 and we are 
within the x range, we are also on the boundary
'''
if ((x == x0 or x == x1) and (y0 <= y <= y1) or (y == y0 or y == y1) and (x0 <= x <= x1)):
    print("Point ({:.2f},{:.2f}) is on the boundary.".format(x, y))
elif (x0 < x < x1) and (y0 < y < y1):
    '''
    If we are not on the boundary, but we are in range in both x and y, 
    then we are inside the rectangle
    '''
    print("Point ({:.2f},{:.2f}) is inside the rectangle.".format(x, y))
else:
    '''
    If we are not on the boundary and we are not inside the rectangle, then
    we must be inside.
    '''
    print("Point ({:.2f},{:.2f}) is outside the rectangle.".format(x, y))

Lecture 7

Module: lec07_area — Set up a module for area calculations

Code:

'''
Lecture 7 - Area Module
Prof. Charles Stewart

We've gathered the code from our area calculations to form a module
that can be used by other programs.
'''

import math

def circle(radius):
    ''' Compute and return the area of a circle '''
    return math.pi * radius**2

def cylinder(radius,height):
    ''' Compute and return the surface area of a cylinder '''
    circle_area = circle(radius)
    height_area = 2 * radius * math.pi * height
    return 2*circle_area + height_area

def sphere(radius):
    '''  Compute and return the surface area of a sphere '''
    return 4 * math.pi * radius**2

Module: lec07_use_area — Use the area module in a separate main

Code:

'''
Lecture 7 - Demonstrate the use of the area calculations
Prof. Charles Stewart
'''

import lec07_area

r = 6
h = 10
a1 = lec07_area.circle(r)
a2 = lec07_area.cylinder(r,h)
a3 = lec07_area.sphere(r)
print("Area circle {:.1f}".format(a1))
print("Surface area cylinder {:.1f}".format(a2))
print("Surface area sphere {:.1f}".format(a3))

Module: lec07_images_init — Image chipmunk example

Code:

from PIL import Image

filename = "chipmunk.jpg"
im = Image.open(filename)
print('\n' '********************')
print("Here's the information about", filename)
print(im.format, im.size, im.mode)

gray_im = im.convert('L')
scaled = gray_im.resize( (128,128) )
print("After converting to gray scale and resizing,")
print("the image information has changed to")
print(scaled.format, scaled.size, scaled.mode)

scaled.show()
scaled.save(filename + "_scaled.jpg")

Lecture 9

Module: lec09_co2_percentages — CO2 percentages from class examples

Code:

'''
Demonstrate walking through a list calculating values between pairs of
values. In this instance we are calculating the percent change year-to-year
for CO2 concentration.
'''

co2_levels = [ (2001, 320.03), (2003, 322.16), (2004, 328.07),\
               (2006, 323.91), (2008, 341.47), (2009, 348.92),\
               (2010, 357.29), (2011, 363.77), (2012, 361.51),\
               (2013, 382.47) ]

i=1
percent_change = []
while i< len(co2_levels):
    percent_change.append((co2_levels[i][1] - co2_levels[i-1][1]) / co2_levels[i-1][1])
    i += 1

print(percent_change)

Module: lec09_loop_variable_examples — Three examples of manipulating loop variables

Code:

'''
Two examples of manipulating loop variables. The first prints out every 
other element of the list starting from the first element. The second uses the
loop variable to print out an evergreen tree.
'''
months=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']

i = 0
while i < len(months):
    print(months[i])
    i+= 2
    
'''
Now print out the evergreen.
'''
print()
i = 1
#length = 9
while i < 10:
    print((4 - i//2 )*" " + i*'*')
    i+= 2

print(3*" "+3*'*')
print(3*" "+3*'*')

'''
Finally, let's let the user pick
an odd value > 3.
'''
print()
field = int(input("Enter an odd number greater than 3: "))
if field % 2 == 1 and field > 3:
    j = 1
    while j <= field:
        print((field - j) // 2 * ' ', j * "*")
        j += 2
        
    j = 3
    print((field - j) // 2 * ' ', j * "*")
    print((field - j) // 2 * ' ', j * "*")
else:
    print("{} is not an odd integer greater than 3.".format(field))

Module: lec09_nested_loop — Example of doubly nested loop

Code:

'''
Quick code snippet to demonstrate walking through a list operating on 
all pairs of list elements without repeating matches and without operating
on the diagonals.

CS1
'''

L = [2, 21, 12, 8, 5, 31]
i = 0

dist = abs(L[0] - L[1])
indices = 0 ,1

while i < len(L):
    j = i +1
    while j < len(L):
        test_dist = abs(L[i] - L[j])
        if test_dist <= dist:
            dist = test_dist
            indices = i, j
        j += 1
    i += 1
    
print("Closest {} at {}.".format(dist, indices))

Lecture 11

Module: Lec11_module — Example of defining test code in a module

Code:

'''
Demonstrate importing and using a 'homegrown' module. In this file
we are defining the Lec11_module code. Note that the code in the

if __name__ == "__main__":

block is executed when this file is run, but not when we import it 
into Lec11_main. Either way the "addto" function remains available.
'''
def addto(val, increment):
    return val + increment

if __name__ == "__main__":
    # Put the main body of the program below this line
    n = int(input("Enter a positive integer ==> "))
    total = 0
    i = 0
    print(i,n)
    while i < n:
        print(i,n)        
        total = addto(total, i)
        i += 1
    print('Sum is', total)

Module: Lec11_main — Example of importing a module with test code

Code:

'''
Demonstrate importing and using a 'homegrown' module. In this file
we are importing the Lec11_module code. Note that the code in the

if __name__ == "__main__":

block is not executed, but we can read in and use the "addto" function.
'''
import Lec11_module

Lec11_module.addto(5,7)

some_other_find(y)

Module: Lec11_RandomWalk — Example of using the random module

Code:

# -*- coding: utf-8 -*-
"""
Created on Thu Oct  8 14:20:39 2020

@author: westu

This is an example of using a random function in a simulation. Conceptually,
a person randomly takes a step forward or backward on a platform based on 
the value of the random function. To control the behavior of the simulation,
you can uncomment and control the value of the seed.
"""
import time
import random

def print_pos(pos, length):
    '''
    Given a platform length in length and a person's position in pos,
    place them on the platform or falling off the platform
    '''
    if pos == 0:
        str = 'v' + length * '-'+ '_'
    elif pos > length:
        str = '_' + length * '-'+ 'v'
    else:        
        str = '_'+(pos-1)*"-"+"^"+(length-pos)*"-"+'_'
    print(str, flush=True)
    
'''
Main for the random walk.
'''
if __name__ == "__main__":
    length = input("Length: ")
    length = int(length)
    #random.seed(100)
    pos = length // 2 + 1
    while pos > 0 and pos < (length+1):
        print_pos(pos, length)
        # Plug in your favorite random function here:
        # random.random() or random.randint work too ...
        pos += random.choice([-1, 1])
        time.sleep(.1)
    print_pos(pos, length)

Lecture 12

Module: Lec12_dist — Closest point example

Code:

'''
Two implementations of the closest point calculation, one using
an auxillary list and one not using an auxillary list.
'''
def distance(p1, p2):
    '''
    Calcalate the distance between two points.
    '''
    return ((p1[1] - p2[1])**2 + (p1[0]-p2[0])**2)**0.5

def closest_points_1(points):
    '''
    Calculate the closest distance between two points using a distance array
    '''
    dist = []
    for i in range(len(points)):
        for j in range(i+1, len(points)):
            dist.append([distance(points[i], points[j]),i,j])
    return min(dist)

def closest_points_2(points):
    '''
    Calculate the closest distance between two points without using a distance array
    '''
    small = distance(points[0], points[1])
    i1 = 0
    i2 = 1
    for i in range(len(points)):
        for j in range(i+1, len(points)):
            dist = distance(points[i], points[j]) 
            if dist < small:
                small = dist
                i1 = i
                i2 = j
    return small, i1, i2

points = [ (1,5), (13.5, 9), (10, 5), (8, 2), (16,3) ]

cp = closest_points_1(points)
print("Closest dist of {:.2f} occurs between {} and {}".format(cp[0], points[cp[1]], points[cp[2]]))
cp = closest_points_2(points)
print("Closest dist of {:.2f} occurs between {} and {}".format(cp[0], points[cp[1]], points[cp[2]]))

Module: Lec12_Workspace — For and while loop examples

Code:

'''
Calculate the distance between 2 x,y coordinates. This is used
later in the closest points calculation
'''
def dist(x, y):
    return ((x[0] - y[0])**2 + (x[1] - y[1])**2)**0.5

'''
Two loops to demonstrate manipulation of
loop variables for "for" and "while" loops.
'''
n = int(input("N?: "))

print("For:")
for i in range(2, n, 2):
    print(i)
    
print("\nWhile:")
i = 2
while i < n:
    print(i)
    i += 2

Lecture 13

Module: lec13_avg — File example, reading and calculating scores

Code:

file_name = input("Enter the name of the scores file: ")
file_name = file_name.strip()   # Elminate extra white space that the user may have typed
print(file_name)

num_scores = 0
sum_scores = 0
for s in open(file_name):
    sum_scores += int(s)
    num_scores += 1
    print(int(s))

print("Average score is {:.1f}".format( sum_scores / num_scores ))

Module: lec13_parse_legos — Parsing Practice from Lecture 13

Code:

'''
Building the list of legos from a file.  Each line of this file 
contains the name of a lego and the number of copies of that 
lego, separated by a comma.  For example,
2x1, 3
2x2, 2
'''
lego_name = input('Enter the name of the legos file: ').strip()
lego_list = []
for line in open(lego_name):
    line = line.split(',')
    lego = line[0].strip()   # get rid of extra space
    count = int(line[1])
    # Either of the following two lines work...
    # lego_list.extend( [lego]*count )
    lego_list = lego_list + [lego]*count
print(lego_list)

Module: lec13_parse_yelp — Parsing Practice from Lecture 13

Code:

'''
Lecture 13 Practice Problem: Parse the yelp.txt data file to create a
list of lists of names and averages. This demonstrates parsing an
irregularly formatted file.  We have to know that the 0th entry on
each line and the 6th are the scores.

Prof. Stewart
'''

def yelp_averages( yelp_name ):
    averages = []
    for line in open(yelp_name):
        line = line.split('|')
        name = line[0]
        scores = line[6:]    # From entry 6 on are the scores

        if len(scores) == 0:
            # Handle the special case of an empty scores list
            averages.append( [ name, -1 ] )
        else:
            # Compute the average when there is at least one score
            sum_score = 0
            for s in scores:
                sum_score += int(s)
            avg = sum_score / len(scores)
            averages.append([name,avg])
    return averages

avgs = yelp_averages('yelp.txt')
print( avgs[0:3] )

Lecture 14

Code:

'''
Here is a short example to show the calculation of a mode
when there is and is not an "enumerable" mapping between 
values and indices. (For the not enumerable, assume the values
are floats or sparsely distributed.

We assume scores are hockey scores. They cannot be negative.
'''

scores = [(3, 2), (2, 1), (9, 1), (8, 7), (2, 0), (0,4), (1,7), (29, 6), (27, 29), (30, 29), (2, 29)]

'''
Assume the values have an efficient mapping to indices of a list
'''

'''
Find the range for the enumeration.
'''
high = scores[0][0]
for score in scores:
    if score[0] > high:
        high = score[0]
    if score[1] > high:
        high = score[1]

'''
Now generate a list of occurence values and increment when you see a value occur.
'''
L = (high+1) * [0]
for score in scores:
    L[score[0]] += 1
    L[score[1]] += 1
    
'''
Report the enumerable case.
'''
most = max(L)
print("Max occurence: {}".format(most))
if most == 1:
    print("No Mode")
else:
    for index in range(len(L)):
        if L[index] == most:
            print("Mode is at: {}".format(index))
            
'''
---------------------------------------
Now do it again assuming the values are not eumerable
---------------------------------------
'''

'''
Make a single list in sorted order
'''
L = []
for score in scores:
    L.append(score[0])
    L.append(score[1])
L.sort()

'''
Walk through the list looking for where the breaks in the sorted list
occur and use that to count occurences
'''
curr = 0   # Count value, current max
index = 0  
prev = -1  # Element value from previous grouping
count = 0
modes = [] # All the values that have the maximum
while index < len(L):
    if L[index] != prev:
        if count > curr:
            modes = [prev]
            curr = count
        elif count == curr:
            modes.append(prev)
        prev = L[index]
        count = 1
    else:
        count += 1
    index += 1
    
if count > curr:
    modes = [prev]
    curr = count
elif count == curr:
    modes.append(prev)
print(modes)

Code:

# -*- coding: utf-8 -*-
"""
Solution to practice problem:
In this question you will implement a method for tracing a decreasing path 
through a grid (list of lists) to find a local minimum. The idea is to start at 
a specific entry in the grid and move to the smallest element of the current point 
and its neighbors. The process is repeated until the current element is the 
smallest in its neighborhood. This would be a local minimum. (As an aside, by 
running this algorithm with different randomly selected values of the start 
element, we can obtain some approximation of the globally minimum element.) 
Neighbors are elements that surround a given element, including the diagonals. 
There is no wrapping around grid edges. Indexing starts at 0.

Part a: Write a function smallest_neighbor(grid, start) that takes grid, a 
two-dimensional grid of numbers represented as a list of lists, and start, 
the row and column of the starting element represented as a two-tuple, and 
returns the row and column of the element whose value is the smallest among 
the start element and all of its neighbors, also in the form of a two-tuple. 
For example,

matrix = 	[[19, 11, 7, 8],
[5, 3, -1, -27],
[14, 0, -2, 4],
[12, -18, 10, -11]]
>>> print(smallest_neighbor(matrix, (0,0)))
(1, 1)

since element (1, 1) which is 3, is the smallest of all neighbors of element (0; 0).

Part b: Now write a function local_min(grid, start) that takes grid, a 
two-dimensional grid of numbers represented as a list of lists, and start, 
the row and column of the starting element represented as a two-tuple (this 
is the same arguments as for Part a), and returns the row and column of the 
local minimum element, also in the form of a two-tuple. When searching for the 
local minimum element, begin with start and repeatedly move to the neighbor 
whose value is the smallest among all neighbors of the current element and 
smaller than the current element itself. If all neighbors of the current element 
are not smaller than the current element, then it is the local minimum element. 
Use function smallest_neighbor(grid, start) that you defined earlier. For example,

(1, 3)

since element (1; 3) which is -27 is smaller than any of its neighbors.

@author: mushtu
"""

def smallest_neighbor(grid,start):
    result = start
    min_so_far = grid[start[0]][start[1]]
    neighbors = [(0,1),(1,0),(0,-1),(-1,0),(-1,1),(1,-1),(1,1),(-1,-1)]
    for neighbor in neighbors:
        new_row = start[0] + neighbor[0]
        new_col = start[1] + neighbor[1]
        if 0<=new_row<len(grid) and 0<=new_col<len(grid[0]):
            if grid[new_row][new_col] < min_so_far:
                min_so_far = grid[new_row][new_col]
                result = (new_row,new_col)
    return result



def local_min(grid,start):
    smaller = smallest_neighbor(grid,start)
    while smaller != start:
        start = smaller
        smaller = smallest_neighbor(grid,start)
    return smaller
    

matrix = [[19, 11, 7, 8],
          [5, 3, -1, -27],
          [14, 0, -2, 4],
          [12, -18, 10, -11]]

print(smallest_neighbor(matrix, (3,2)))
print(local_min(matrix,(0,0)))

Code:

"""
@author: uzmam
"""

'''1. Given a grid find the location of every queen (denoted by q or Q)
   2. Given a grid find if a queen is located on the same row or column.
   Return a True if yes, False otherwise.
   3. Find if a queen is located on any of the diagonals of king's location.
   Return a True if yes, False otherwise'''

def find_queen(grid):
    location = []
    for r in range(len(grid)):
        for c in range(len(grid[0])):
            if grid[r][c].lower()=='q':
                location.append((r,c))
    return location
                
def same_row_col(grid):
    queen = find_queen(grid)
    for r in range(len(grid)):
        for c in range(len(grid[0])):
            if grid[r][c].lower()=='k':
                krow,kcol=r,c
    for i,j in queen:
        if krow==i or kcol==j:
            return True
    return False

def same_diagonal(grid):
    queen = find_queen(grid)
    for r in range(len(grid)):
        for c in range(len(grid[0])):
            if grid[r][c].lower()=='k':
                krow,kcol=r,c
    for i,j in queen:
        if abs(krow-i) == abs(kcol-j):
            return True
    return False


if __name__ == '__main__':
    grid = [['.', '.', '.', '.', '.', '.', '.', '.'],
            ['.', '.', '.', '.', '.', '.', '.', '.'],
            ['.', '.', '.', '.', '.', '.', '.', '.'],
            ['.', '.', '.', '.', 'q', '.', 'k', '.'],
            ['.', 'Q', '.', '.', '.', '.', '.', '.'],
            ['.', '.', '.', '.', '.', '.', '.', '.'],
            ['.', '.', '.', '.', '.', '.', '.', '.'],
            ['.', '.', '.', '.', '.', '.', '.', '.']]
    print(find_queen(grid))
    print(same_row_col(grid))
    print(same_diagonal(grid))
    

Lecture 15

Module: Lec15_find_names_start — Starting point for IMDB example

Code:

'''
This is the start to the solution to the problem of find all people
named in the internet movide database.  

One important note.  In opening the file we need to specify the
encoding the text.  The default is what's known as utf-8, but this
only handles English characters well.  For the IMDB file, we need to
open with a more language-independent, international standard.  This
is 'ISO-8859-1'.

As we will use the time.time() function to measure how long our
computation takes.  This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines.  By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time


imdb_file = input("Enter the name of the IMDB file ==> ").strip()
name_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()

Module: Lec15_find_names_list — Using lists to hold unique names

Code:

'''
This is the list-based solution to the problem of finding all people
named in the internet movide database.  Each line is split and
stripped to get the name and then the name is added to a list, but
only if it is not already there.

One important note.  In opening the file we need to specify the
encoding the text.  The default is what's known as utf-8, but this
only handles English characters well.  For the IMDB file, we need to
open with a more language-independent, international standard.  This
is 'ISO-8859-1'.

As we will use the time.time() function to measure how long our
computation takes.  This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines.  By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time

imdb_file = input("Enter the name of the IMDB file ==> ").strip()

start_time = time.time()

name_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()
    
    #  Add the name to the list if it is new
    if not name in name_list:
        name_list.append(name)
        if len(name_list) % 1000 == 0:
            end_time = time.time()
            print('After {} added, the last 1000 took {:.2f} seconds'.format(len(name_list), end_time-start_time))
            start_time = end_time
            

print("Number of unique names in the IMDB:", len(name_list))
for n in name_list:
    print('\t{}'.format(n))

Module: lec15_find_names_list_sort — Faster list version using sorting

Code:

'''
Here is an alternative list based solution - not covered in lecture -
where each name is added to the list without any checking for
duplicates. The list is then sorted and the number of distinct
individual is counted by scanning through the list and looking for
adjacent pairs of names that are different.

You will see that this solution is almost as fast as the set-based
solution, but the set-based solution is simpler and more natural to
write.

One important note.  In opening the file we need to specify the
encoding the text.  The default is what's known as utf-8, but this
only handles English characters well.  For the IMDB file, we need to
open with a more language-independent, international standard.  This
is 'ISO-8859-1'.

As we will use the time.time() function to measure how long our
computation takes.  This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines.  By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time

imdb_file = input("Enter the name of the IMDB file ==> ").strip()

start_time = time.time()

# Add all the names to the list
name_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()
    name_list.append(name)

# Sort the names.  After this all repeated names will be next to each
# other in the list.
name_list.sort()

# Count the distinct names by counting the number of adjacent pairs of
# names that are different.
count = 1
for i in range(1,len(name_list)):
    if name_list[i-1] != name_list[i]:
        count += 1

end_time = time.time()
print('Total time required {:2f} seconds'.format(end_time-start_time))
print("Number of unique names in the IMDB:", count)

Module: Lec15_find_names_sets — Faster versions using sets

Code:

'''
This is the solution to the problem of using sets to count the number
of individuals in the internet movie database.  Each line of input is
split and stripped to get the name and this name is added to the set. 

One important note.  In opening the file we need to specify the
encoding the text.  The default is what's known as utf-8, but this
only handles English characters well.  For the IMDB file, we need to
open with a more language-independent, international standard.  This
is 'ISO-8859-1'.

As we will use the time.time() function to measure how long our
computation takes.  This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines.  By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time

imdb_file = input("Enter the name of the IMDB file ==> ").strip()

start_time = time.time()

names = set()
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()
    names.add(name)

end_time = time.time()

print("Solution took {:.2f} seconds".format(end_time-start_time))

print("Number of unique names in the IMDB:", len(names))

#######
##  The rest of this code was written to test the code and then
##  commented out.
#######
'''
ordered_names = sorted(names)
for i in range(min(len(ordered_names),100)):
    print("{}: {}".format(i, ordered_names[i]))
'''

'''
for n in names:
    print('\t{}'.format(n))
'''

Lecture 16

Module: lec16_imdb — Find how many movies everyone was in

Code:

imdb_file = input("Enter the name of the IMDB file ==> ").strip()
count_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()
    found = False
    for pair in count_list:
        if pair[0] == name:
            pair[1] += 1
            found = True
            break
    if not found:
        new_pair = [name, 1]
        count_list.append(new_pair)

for pair in count_list:
    print("{} appeared in {} movies".format(pair[0], pair[1]))
        

Module: lec16_imdb_sorted — Faster version using sorting

Code:

imdb_file = input("Enter the name of the IMDB file ==> ").strip()
count_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()
    found = False
    count_list.append(name)

count_list.sort()

index = 0
while index < len(count_list):
    name = count_list[index]
    count = 0
    while count_list[index] == name and index < len(count_list):
        count += 1
        index += 1
    print("{} appeared in {} movies".format(name, count), flush=True)
        

Module: lec16_imdb_dict — The fastest version using dictionaries

Code:

imdb_file = input("Enter the name of the IMDB file ==> ").strip()
counts = dict()
for line in open(imdb_file, encoding = "ISO-8859-1"):
    words = line.strip().split('|')
    name = words[0].strip()
    if name in counts:
        counts[name] += 1
    else:
        counts[name] = 1
        
names = sorted(counts)
limit = min(100, len(names))
for index in range(limit):
    name = names[index]
    print("{} appeared in {} movies".format(name, counts[name]))