Code Developed in CSCI-1100 Fall 2025¶
Lecture 1¶
Module: three_doubles — Finds three consecutive pairs of double letters¶
Code:
""" Find all words containing three consecutive pairs of double letters
in a file of all English words located at:
http://www.greenteapress.com/thinkpython/code/words.txt
**Modules used:** :py:mod:`urllib`
**Author**: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <cvstewart@gmail.com>
**Returns:** All words matching condition and the count of found words
**Pseudo Code**::
open the file from the web with all the words in English
for each word in the file:
for all positions l in the word
if the letters at positions (l and l+1) are the same
and the letters at positions (l+2 and l+3) are the same
and the letters at positons (l+4 and l+5) are the same then
output word and increment the count
"""
import urllib.request
def has_three_double(word):
"""
Returns True if the word contains three consecutive pairs of
double letters and False otherwise.
"""
for l in range(len(word)-5):
if word[l] == word[l+1] and \
word[l+2]==word[l+3] and \
word[l+4]==word[l+5]:
return True
return False
# Comments that fit in a single line can be put in this format.
# The main body of the program starts here
"""
Assign the location of the words file and go get it.
"""
word_url = 'http://www.greenteapress.com/thinkpython/code/words.txt'
word_file = urllib.request.urlopen(word_url)
'''
Process each word in the file one by one, testing to see if it has
three consecutive doubles. Print it and count it if it does.
'''
count = 0
for word in word_file:
word = word.decode().strip()
if has_three_double(word):
print(word)
count = count + 1
'''
After we've gone through all the words, output a final message based
on the number of words that were counted.
'''
if count == 0:
print('No words found')
elif count == 1:
print("1 word found")
else:
print(count, 'words were found')
Lecture 3¶
Module: lec03_surface_and_area — Find the surface and area of a cylinder¶
Code:
pi = 3.14159
radius = input("Radius: ")
radius = float(radius)
height = input("Height: ")
height = float(height)
base_area = pi * radius ** 2
volume = base_area * height
surface_area = 2 * base_area + 2 * pi * radius * height
print("volume is", volume, ", surface area is", surface_area)
Lecture 5¶
Module: lec05_surface_and_area — Find the surface and area of a cylinder¶
Code:
'''
This is a program to calculate the surface area and
volume of a cylinder given a radius and a height.
Radius and height are in float and are user inputs.
Sample Execution:
Enter radius (float) => 12
Enter height (float) => 10
Surface area is: 1658.76
Volume is: 4523.89
9/18/2023
'''
import math
def area_circle(radius):
'''
This function returns the area of a circle with a given radius.
radius is the input parameter
'''
area = math.pi * radius ** 2
return area
def area_cylinder(h, r):
'''
Give a height h and radius r, return the surface area of a cylinder.
'''
cap_area = 2 * area_circle(r)
rect_area = math.pi * 2 * r * h
return cap_area + rect_area
if __name__ == "__main__":
r = float(input("Enter radius (float) => "))
h = float(input("Enter height (float) => "))
print("Surface area is: {:.2f}".format(area_cylinder(h, r)))
print("Volume is: {:.2f}".format(h * area_circle(r)))
Lecture 6¶
Module: lec06_rectangle — Does a given point fall within a rectangle¶
Code:
'''
Program to demonstrate the use of complex boolean expressions and if/elif/else
clauses. Determine whether a set of coordinates fall within a rectangle given
by the verticies (x0, y0), (x0, y1), (x1, y1), and (x1, y0)
Author: CS1 Staff
Date 9/21/2024
'''
'''
Initialize the rectangle
'''
x0 = 10
x1 = 16
y0 = 32
y1 = 45
'''
Get the target point
'''
x = input("x coordinate ==> ")
print(x)
x = float(x)
y = input("y coordinate ==> ")
print(y)
y = float(y)
'''
If the x coordinate matches x0 or x1 and we are within the y range, we are
on the boundary. Similarly, if the y coordinate matches y0 or y1 and we are
within the x range, we are also on the boundary
'''
if ((x == x0 or x == x1) and (y0 <= y <= y1) or (y == y0 or y == y1) and (x0 <= x <= x1)):
print("Point ({:.2f},{:.2f}) is on the boundary.".format(x, y))
elif (x0 < x < x1) and (y0 < y < y1):
'''
If we are not on the boundary, but we are in range in both x and y,
then we are inside the rectangle
'''
print("Point ({:.2f},{:.2f}) is inside the rectangle.".format(x, y))
else:
'''
If we are not on the boundary and we are not inside the rectangle, then
we must be inside.
'''
print("Point ({:.2f},{:.2f}) is outside the rectangle.".format(x, y))
Lecture 7¶
Module: lec07_area — Set up a module for area calculations¶
Code:
'''
Lecture 7 - Area Module
Prof. Charles Stewart
We've gathered the code from our area calculations to form a module
that can be used by other programs.
'''
import math
def circle(radius):
''' Compute and return the area of a circle '''
return math.pi * radius**2
def cylinder(radius,height):
''' Compute and return the surface area of a cylinder '''
circle_area = circle(radius)
height_area = 2 * radius * math.pi * height
return 2*circle_area + height_area
def sphere(radius):
''' Compute and return the surface area of a sphere '''
return 4 * math.pi * radius**2
Module: lec07_use_area — Use the area module in a separate main¶
Code:
'''
Lecture 7 - Demonstrate the use of the area calculations
Prof. Charles Stewart
'''
import lec07_area
r = 6
h = 10
a1 = lec07_area.circle(r)
a2 = lec07_area.cylinder(r,h)
a3 = lec07_area.sphere(r)
print("Area circle {:.1f}".format(a1))
print("Surface area cylinder {:.1f}".format(a2))
print("Surface area sphere {:.1f}".format(a3))
Module: lec07_images_init — Image chipmunk example¶
Code:
from PIL import Image
filename = "chipmunk.jpg"
im = Image.open(filename)
print('\n' '********************')
print("Here's the information about", filename)
print(im.format, im.size, im.mode)
gray_im = im.convert('L')
scaled = gray_im.resize( (128,128) )
print("After converting to gray scale and resizing,")
print("the image information has changed to")
print(scaled.format, scaled.size, scaled.mode)
scaled.show()
scaled.save(filename + "_scaled.jpg")
Lecture 9¶
Module: lec09_co2_percentages — CO2 percentages from class examples¶
Code:
'''
Demonstrate walking through a list calculating values between pairs of
values. In this instance we are calculating the percent change year-to-year
for CO2 concentration.
'''
co2_levels = [ (2001, 320.03), (2003, 322.16), (2004, 328.07),\
(2006, 323.91), (2008, 341.47), (2009, 348.92),\
(2010, 357.29), (2011, 363.77), (2012, 361.51),\
(2013, 382.47) ]
i=1
percent_change = []
while i< len(co2_levels):
percent_change.append((co2_levels[i][1] - co2_levels[i-1][1]) / co2_levels[i-1][1])
i += 1
print(percent_change)
Module: lec09_loop_variable_examples — Three examples of manipulating loop variables¶
Code:
'''
Two examples of manipulating loop variables. The first prints out every
other element of the list starting from the first element. The second uses the
loop variable to print out an evergreen tree.
'''
months=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
i = 0
while i < len(months):
print(months[i])
i+= 2
'''
Now print out the evergreen.
'''
print()
i = 1
#length = 9
while i < 10:
print((4 - i//2 )*" " + i*'*')
i+= 2
print(3*" "+3*'*')
print(3*" "+3*'*')
'''
Finally, let's let the user pick
an odd value > 3.
'''
print()
field = int(input("Enter an odd number greater than 3: "))
if field % 2 == 1 and field > 3:
j = 1
while j <= field:
print((field - j) // 2 * ' ', j * "*")
j += 2
j = 3
print((field - j) // 2 * ' ', j * "*")
print((field - j) // 2 * ' ', j * "*")
else:
print("{} is not an odd integer greater than 3.".format(field))
Module: lec09_nested_loop — Example of doubly nested loop¶
Code:
'''
Quick code snippet to demonstrate walking through a list operating on
all pairs of list elements without repeating matches and without operating
on the diagonals.
CS1
'''
L = [2, 21, 12, 8, 5, 31]
i = 0
dist = abs(L[0] - L[1])
indices = 0 ,1
while i < len(L):
j = i +1
while j < len(L):
test_dist = abs(L[i] - L[j])
if test_dist <= dist:
dist = test_dist
indices = i, j
j += 1
i += 1
print("Closest {} at {}.".format(dist, indices))
Lecture 11¶
Module: Lec11_module — Example of defining test code in a module¶
Code:
'''
Demonstrate importing and using a 'homegrown' module. In this file
we are defining the Lec11_module code. Note that the code in the
if __name__ == "__main__":
block is executed when this file is run, but not when we import it
into Lec11_main. Either way the "addto" function remains available.
'''
def addto(val, increment):
return val + increment
if __name__ == "__main__":
# Put the main body of the program below this line
n = int(input("Enter a positive integer ==> "))
total = 0
i = 0
print(i,n)
while i < n:
print(i,n)
total = addto(total, i)
i += 1
print('Sum is', total)
Module: Lec11_main — Example of importing a module with test code¶
Code:
'''
Demonstrate importing and using a 'homegrown' module. In this file
we are importing the Lec11_module code. Note that the code in the
if __name__ == "__main__":
block is not executed, but we can read in and use the "addto" function.
'''
import Lec11_module
Lec11_module.addto(5,7)
some_other_find(y)
Module: Lec11_RandomWalk — Example of using the random module¶
Code:
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 8 14:20:39 2020
@author: westu
This is an example of using a random function in a simulation. Conceptually,
a person randomly takes a step forward or backward on a platform based on
the value of the random function. To control the behavior of the simulation,
you can uncomment and control the value of the seed.
"""
import time
import random
def print_pos(pos, length):
'''
Given a platform length in length and a person's position in pos,
place them on the platform or falling off the platform
'''
if pos == 0:
str = 'v' + length * '-'+ '_'
elif pos > length:
str = '_' + length * '-'+ 'v'
else:
str = '_'+(pos-1)*"-"+"^"+(length-pos)*"-"+'_'
print(str, flush=True)
'''
Main for the random walk.
'''
if __name__ == "__main__":
length = input("Length: ")
length = int(length)
#random.seed(100)
pos = length // 2 + 1
while pos > 0 and pos < (length+1):
print_pos(pos, length)
# Plug in your favorite random function here:
# random.random() or random.randint work too ...
pos += random.choice([-1, 1])
time.sleep(.1)
print_pos(pos, length)
Lecture 12¶
Module: Lec12_dist — Closest point example¶
Code:
'''
Two implementations of the closest point calculation, one using
an auxillary list and one not using an auxillary list.
'''
def distance(p1, p2):
'''
Calcalate the distance between two points.
'''
return ((p1[1] - p2[1])**2 + (p1[0]-p2[0])**2)**0.5
def closest_points_1(points):
'''
Calculate the closest distance between two points using a distance array
'''
dist = []
for i in range(len(points)):
for j in range(i+1, len(points)):
dist.append([distance(points[i], points[j]),i,j])
return min(dist)
def closest_points_2(points):
'''
Calculate the closest distance between two points without using a distance array
'''
small = distance(points[0], points[1])
i1 = 0
i2 = 1
for i in range(len(points)):
for j in range(i+1, len(points)):
dist = distance(points[i], points[j])
if dist < small:
small = dist
i1 = i
i2 = j
return small, i1, i2
points = [ (1,5), (13.5, 9), (10, 5), (8, 2), (16,3) ]
cp = closest_points_1(points)
print("Closest dist of {:.2f} occurs between {} and {}".format(cp[0], points[cp[1]], points[cp[2]]))
cp = closest_points_2(points)
print("Closest dist of {:.2f} occurs between {} and {}".format(cp[0], points[cp[1]], points[cp[2]]))
Module: Lec12_Workspace — For and while loop examples¶
Code:
'''
Calculate the distance between 2 x,y coordinates. This is used
later in the closest points calculation
'''
def dist(x, y):
return ((x[0] - y[0])**2 + (x[1] - y[1])**2)**0.5
'''
Two loops to demonstrate manipulation of
loop variables for "for" and "while" loops.
'''
n = int(input("N?: "))
print("For:")
for i in range(2, n, 2):
print(i)
print("\nWhile:")
i = 2
while i < n:
print(i)
i += 2
Lecture 13¶
Module: lec13_avg — File example, reading and calculating scores¶
Code:
file_name = input("Enter the name of the scores file: ")
file_name = file_name.strip() # Elminate extra white space that the user may have typed
print(file_name)
num_scores = 0
sum_scores = 0
for s in open(file_name):
sum_scores += int(s)
num_scores += 1
print(int(s))
print("Average score is {:.1f}".format( sum_scores / num_scores ))
Module: lec13_parse_legos — Parsing Practice from Lecture 13¶
Code:
'''
Building the list of legos from a file. Each line of this file
contains the name of a lego and the number of copies of that
lego, separated by a comma. For example,
2x1, 3
2x2, 2
'''
lego_name = input('Enter the name of the legos file: ').strip()
lego_list = []
for line in open(lego_name):
line = line.split(',')
lego = line[0].strip() # get rid of extra space
count = int(line[1])
# Either of the following two lines work...
# lego_list.extend( [lego]*count )
lego_list = lego_list + [lego]*count
print(lego_list)
Module: lec13_parse_yelp — Parsing Practice from Lecture 13¶
Code:
'''
Lecture 13 Practice Problem: Parse the yelp.txt data file to create a
list of lists of names and averages. This demonstrates parsing an
irregularly formatted file. We have to know that the 0th entry on
each line and the 6th are the scores.
Prof. Stewart
'''
def yelp_averages( yelp_name ):
averages = []
for line in open(yelp_name):
line = line.split('|')
name = line[0]
scores = line[6:] # From entry 6 on are the scores
if len(scores) == 0:
# Handle the special case of an empty scores list
averages.append( [ name, -1 ] )
else:
# Compute the average when there is at least one score
sum_score = 0
for s in scores:
sum_score += int(s)
avg = sum_score / len(scores)
averages.append([name,avg])
return averages
avgs = yelp_averages('yelp.txt')
print( avgs[0:3] )
Lecture 14¶
Code:
'''
Here is a short example to show the calculation of a mode
when there is and is not an "enumerable" mapping between
values and indices. (For the not enumerable, assume the values
are floats or sparsely distributed.
We assume scores are hockey scores. They cannot be negative.
'''
scores = [(3, 2), (2, 1), (9, 1), (8, 7), (2, 0), (0,4), (1,7), (29, 6), (27, 29), (30, 29), (2, 29)]
'''
Assume the values have an efficient mapping to indices of a list
'''
'''
Find the range for the enumeration.
'''
high = scores[0][0]
for score in scores:
if score[0] > high:
high = score[0]
if score[1] > high:
high = score[1]
'''
Now generate a list of occurence values and increment when you see a value occur.
'''
L = (high+1) * [0]
for score in scores:
L[score[0]] += 1
L[score[1]] += 1
'''
Report the enumerable case.
'''
most = max(L)
print("Max occurence: {}".format(most))
if most == 1:
print("No Mode")
else:
for index in range(len(L)):
if L[index] == most:
print("Mode is at: {}".format(index))
'''
---------------------------------------
Now do it again assuming the values are not eumerable
---------------------------------------
'''
'''
Make a single list in sorted order
'''
L = []
for score in scores:
L.append(score[0])
L.append(score[1])
L.sort()
'''
Walk through the list looking for where the breaks in the sorted list
occur and use that to count occurences
'''
curr = 0 # Count value, current max
index = 0
prev = -1 # Element value from previous grouping
count = 0
modes = [] # All the values that have the maximum
while index < len(L):
if L[index] != prev:
if count > curr:
modes = [prev]
curr = count
elif count == curr:
modes.append(prev)
prev = L[index]
count = 1
else:
count += 1
index += 1
if count > curr:
modes = [prev]
curr = count
elif count == curr:
modes.append(prev)
print(modes)
Code:
# -*- coding: utf-8 -*-
"""
Solution to practice problem:
In this question you will implement a method for tracing a decreasing path
through a grid (list of lists) to find a local minimum. The idea is to start at
a specific entry in the grid and move to the smallest element of the current point
and its neighbors. The process is repeated until the current element is the
smallest in its neighborhood. This would be a local minimum. (As an aside, by
running this algorithm with different randomly selected values of the start
element, we can obtain some approximation of the globally minimum element.)
Neighbors are elements that surround a given element, including the diagonals.
There is no wrapping around grid edges. Indexing starts at 0.
Part a: Write a function smallest_neighbor(grid, start) that takes grid, a
two-dimensional grid of numbers represented as a list of lists, and start,
the row and column of the starting element represented as a two-tuple, and
returns the row and column of the element whose value is the smallest among
the start element and all of its neighbors, also in the form of a two-tuple.
For example,
matrix = [[19, 11, 7, 8],
[5, 3, -1, -27],
[14, 0, -2, 4],
[12, -18, 10, -11]]
>>> print(smallest_neighbor(matrix, (0,0)))
(1, 1)
since element (1, 1) which is 3, is the smallest of all neighbors of element (0; 0).
Part b: Now write a function local_min(grid, start) that takes grid, a
two-dimensional grid of numbers represented as a list of lists, and start,
the row and column of the starting element represented as a two-tuple (this
is the same arguments as for Part a), and returns the row and column of the
local minimum element, also in the form of a two-tuple. When searching for the
local minimum element, begin with start and repeatedly move to the neighbor
whose value is the smallest among all neighbors of the current element and
smaller than the current element itself. If all neighbors of the current element
are not smaller than the current element, then it is the local minimum element.
Use function smallest_neighbor(grid, start) that you defined earlier. For example,
(1, 3)
since element (1; 3) which is -27 is smaller than any of its neighbors.
@author: mushtu
"""
def smallest_neighbor(grid,start):
result = start
min_so_far = grid[start[0]][start[1]]
neighbors = [(0,1),(1,0),(0,-1),(-1,0),(-1,1),(1,-1),(1,1),(-1,-1)]
for neighbor in neighbors:
new_row = start[0] + neighbor[0]
new_col = start[1] + neighbor[1]
if 0<=new_row<len(grid) and 0<=new_col<len(grid[0]):
if grid[new_row][new_col] < min_so_far:
min_so_far = grid[new_row][new_col]
result = (new_row,new_col)
return result
def local_min(grid,start):
smaller = smallest_neighbor(grid,start)
while smaller != start:
start = smaller
smaller = smallest_neighbor(grid,start)
return smaller
matrix = [[19, 11, 7, 8],
[5, 3, -1, -27],
[14, 0, -2, 4],
[12, -18, 10, -11]]
print(smallest_neighbor(matrix, (3,2)))
print(local_min(matrix,(0,0)))
Code:
"""
@author: uzmam
"""
'''1. Given a grid find the location of every queen (denoted by q or Q)
2. Given a grid find if a queen is located on the same row or column.
Return a True if yes, False otherwise.
3. Find if a queen is located on any of the diagonals of king's location.
Return a True if yes, False otherwise'''
def find_queen(grid):
location = []
for r in range(len(grid)):
for c in range(len(grid[0])):
if grid[r][c].lower()=='q':
location.append((r,c))
return location
def same_row_col(grid):
queen = find_queen(grid)
for r in range(len(grid)):
for c in range(len(grid[0])):
if grid[r][c].lower()=='k':
krow,kcol=r,c
for i,j in queen:
if krow==i or kcol==j:
return True
return False
def same_diagonal(grid):
queen = find_queen(grid)
for r in range(len(grid)):
for c in range(len(grid[0])):
if grid[r][c].lower()=='k':
krow,kcol=r,c
for i,j in queen:
if abs(krow-i) == abs(kcol-j):
return True
return False
if __name__ == '__main__':
grid = [['.', '.', '.', '.', '.', '.', '.', '.'],
['.', '.', '.', '.', '.', '.', '.', '.'],
['.', '.', '.', '.', '.', '.', '.', '.'],
['.', '.', '.', '.', 'q', '.', 'k', '.'],
['.', 'Q', '.', '.', '.', '.', '.', '.'],
['.', '.', '.', '.', '.', '.', '.', '.'],
['.', '.', '.', '.', '.', '.', '.', '.'],
['.', '.', '.', '.', '.', '.', '.', '.']]
print(find_queen(grid))
print(same_row_col(grid))
print(same_diagonal(grid))
Lecture 15¶
Module: Lec15_find_names_start — Starting point for IMDB example¶
Code:
'''
This is the start to the solution to the problem of find all people
named in the internet movide database.
One important note. In opening the file we need to specify the
encoding the text. The default is what's known as utf-8, but this
only handles English characters well. For the IMDB file, we need to
open with a more language-independent, international standard. This
is 'ISO-8859-1'.
As we will use the time.time() function to measure how long our
computation takes. This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines. By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
name_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
Module: Lec15_find_names_list — Using lists to hold unique names¶
Code:
'''
This is the list-based solution to the problem of finding all people
named in the internet movide database. Each line is split and
stripped to get the name and then the name is added to a list, but
only if it is not already there.
One important note. In opening the file we need to specify the
encoding the text. The default is what's known as utf-8, but this
only handles English characters well. For the IMDB file, we need to
open with a more language-independent, international standard. This
is 'ISO-8859-1'.
As we will use the time.time() function to measure how long our
computation takes. This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines. By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
start_time = time.time()
name_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
# Add the name to the list if it is new
if not name in name_list:
name_list.append(name)
if len(name_list) % 1000 == 0:
end_time = time.time()
print('After {} added, the last 1000 took {:.2f} seconds'.format(len(name_list), end_time-start_time))
start_time = end_time
print("Number of unique names in the IMDB:", len(name_list))
for n in name_list:
print('\t{}'.format(n))
Module: lec15_find_names_list_sort — Faster list version using sorting¶
Code:
'''
Here is an alternative list based solution - not covered in lecture -
where each name is added to the list without any checking for
duplicates. The list is then sorted and the number of distinct
individual is counted by scanning through the list and looking for
adjacent pairs of names that are different.
You will see that this solution is almost as fast as the set-based
solution, but the set-based solution is simpler and more natural to
write.
One important note. In opening the file we need to specify the
encoding the text. The default is what's known as utf-8, but this
only handles English characters well. For the IMDB file, we need to
open with a more language-independent, international standard. This
is 'ISO-8859-1'.
As we will use the time.time() function to measure how long our
computation takes. This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines. By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
start_time = time.time()
# Add all the names to the list
name_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
name_list.append(name)
# Sort the names. After this all repeated names will be next to each
# other in the list.
name_list.sort()
# Count the distinct names by counting the number of adjacent pairs of
# names that are different.
count = 1
for i in range(1,len(name_list)):
if name_list[i-1] != name_list[i]:
count += 1
end_time = time.time()
print('Total time required {:2f} seconds'.format(end_time-start_time))
print("Number of unique names in the IMDB:", count)
Module: Lec15_find_names_sets — Faster versions using sets¶
Code:
'''
This is the solution to the problem of using sets to count the number
of individuals in the internet movie database. Each line of input is
split and stripped to get the name and this name is added to the set.
One important note. In opening the file we need to specify the
encoding the text. The default is what's known as utf-8, but this
only handles English characters well. For the IMDB file, we need to
open with a more language-independent, international standard. This
is 'ISO-8859-1'.
As we will use the time.time() function to measure how long our
computation takes. This function tells the seconds since an "epoch",
which is 1/1/1970 on Unix-based systems (including Macs and Linux
machines) and 1/1/1601 on Windows machines. By recording in the
software the time before the calculations start, the time when the
calculations end, and subtracting we get the elapsed time.
'''
import time
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
start_time = time.time()
names = set()
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
names.add(name)
end_time = time.time()
print("Solution took {:.2f} seconds".format(end_time-start_time))
print("Number of unique names in the IMDB:", len(names))
#######
## The rest of this code was written to test the code and then
## commented out.
#######
'''
ordered_names = sorted(names)
for i in range(min(len(ordered_names),100)):
print("{}: {}".format(i, ordered_names[i]))
'''
'''
for n in names:
print('\t{}'.format(n))
'''
Lecture 16¶
Module: lec16_imdb — Find how many movies everyone was in¶
Code:
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
count_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
found = False
for pair in count_list:
if pair[0] == name:
pair[1] += 1
found = True
break
if not found:
new_pair = [name, 1]
count_list.append(new_pair)
for pair in count_list:
print("{} appeared in {} movies".format(pair[0], pair[1]))
Module: lec16_imdb_sorted — Faster version using sorting¶
Code:
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
count_list = []
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
found = False
count_list.append(name)
count_list.sort()
index = 0
while index < len(count_list):
name = count_list[index]
count = 0
while count_list[index] == name and index < len(count_list):
count += 1
index += 1
print("{} appeared in {} movies".format(name, count), flush=True)
Module: lec16_imdb_dict — The fastest version using dictionaries¶
Code:
imdb_file = input("Enter the name of the IMDB file ==> ").strip()
counts = dict()
for line in open(imdb_file, encoding = "ISO-8859-1"):
words = line.strip().split('|')
name = words[0].strip()
if name in counts:
counts[name] += 1
else:
counts[name] = 1
names = sorted(counts)
limit = min(100, len(names))
for index in range(limit):
name = names[index]
print("{} appeared in {} movies".format(name, counts[name]))