In [ ]:
!wget https://student.cs.uwaterloo.ca/~cs114/src/a-tale-of-two-cities.txt
In [ ]:
!wget https://student.cs.uwaterloo.ca/~cs114/src/filelib.ipynb
In [4]:
def txt_to_ls(filename: str) -> list[str]:
    """
    Split a plain text file given by filename into lines, and return a list of
    those lines. The newline/line break (\n) *is* included in the returned
    strings.
    """
    with open(filename) as fh:
        return list(fh)
    
assert txt_to_ls("a-tale-of-two-cities.txt")[10] == "Title: A Tale of Two Cities\n", "Title line"
assert txt_to_ls("a-tale-of-two-cities.txt")[106] == "It was the best of times, it was the worst of times, it was the age of\n", "Famous first line"
In [ ]:
cities = txt_to_ls("a-tale-of-two-cities.txt")
for line in cities:
    print(line.strip())
In [15]:
import typing

def distribution(
    lst: typing.Sequence
) -> dict[typing.Any, int]:
    r = {}
    for val in lst:
        if not (val in r):
            r[val] = 0
        r[val] = r[val] + 1
    return r

def distributionChart(
    lst: typing.Sequence
) -> None:
    dist = distribution(lst)
    def distCount(key):
        return dist[key]
    for key in sorted(dist, key=distCount, reverse=True):
        print(key, "*" * dist[key])
In [11]:
def txt_to_str(filename: str) -> str:
    """
    Return the content of a text file filename as a single string.
    """
    with open(filename) as fh:
        return fh.read()
    
assert txt_to_str("a-tale-of-two-cities.txt")[1:53] == "The Project Gutenberg eBook of A Tale of Two Cities\n", "Successfully read"
assert txt_to_str("a-tale-of-two-cities.txt")[-59:] == "subscribe to our email newsletter to hear about new eBooks.", "One long string"
In [ ]:
cities = txt_to_str("a-tale-of-two-cities.txt")
words = cities.strip().lower().split()
distributionChart(words)
In [ ]:
!wget https://student.cs.uwaterloo.ca/~cs114/src/nino34.csv
In [18]:
import csv

def csv_to_ld(csv_file: str, delimiter: str=",") -> list[dict[str, str]]:
    """
    Return a list corresponding to the rows in the CSV file specified by
    csv_file, as dictionaries. Optionally, provide a different delimiter from
    the default ",".
    """
    with open(csv_file) as fh:
        return list(csv.DictReader(fh, delimiter=delimiter))

assert len(csv_to_ld("nino34.csv")) == 909, "Entire file converted into list"
assert csv_to_ld("nino34.csv")[0]["YR"] == "1950", "First year is as in the original file"

import csv

def csv_to_dl(csv_file: str, delimiter: str=",") -> dict[str, list[str]]:
    """
    Return a dictionary in which each key corresponds to a list of strings, the
    entire column of data for that field in the CSV file specified by csv_file.
    Optionally, provide a different delimiter from the default ",".
    """
    ret: dict[str, list[str]] = {}
    with open(csv_file) as fh:
        rdr = csv.DictReader(fh, delimiter=delimiter)
        for field in rdr.fieldnames or []:
            ret[field] = []
        for row in rdr:
            for field in row:
                ret[field].append(row[field])
    return ret

assert len(csv_to_dl("nino34.csv")["TOTAL"]) == 909, "Entire file converted into list"
assert csv_to_dl("nino34.csv")["YR"][0] == "1950", "First year is as in the original file"
In [22]:
print(csv_to_ld("nino34_small.csv")[2])
print(csv_to_dl("nino34_small.csv")["TOTAL"])
{'YR': '1950', 'MON': '3', 'TOTAL': '25.88', 'ClimAdjust': '26.95', 'ANOM': '-1.07'}
['24.56', '25.07', '25.88', '26.29', '26.19', '26.47', '26.28', '25.88', '25.73', '25.68', '25.46', '25.29']
In [23]:
import csv

def csv_to_ll(csv_file: str, delimiter: str=",") -> list[list[str]]:
    """
    Return a list corresponding to the rows in the CSV file specified by
    csv_file, as lists of cells. Optionally, provide a different delimiter from
    the default ",".
    """
    with open(csv_file) as fh:
        return list(csv.reader(fh, delimiter=delimiter))
    
assert len(csv_to_ll("nino34.csv")) == 910, "Entire file convert into list"
assert csv_to_ll("nino34.csv")[0] == ["YR", "MON", "TOTAL", "ClimAdjust", "ANOM"], "Header row intact"
In [25]:
print(csv_to_ll("nino34_small.csv")[2])
['1950', '2', '25.07', '26.39', '-1.32']
In [26]:
def averageOf(l: list[float]) -> float:
    sum = 0.0
    for val in l:
        sum = sum + val
    return sum / len(l)
In [30]:
measurements: dict[int, list[float]] = {}
for m in range(1, 13):
    measurements[m] = []

nino = csv_to_ld("nino34.csv")
for row in nino:
    measurements[int(row["MON"])].append(
        float(row["TOTAL"])
    )
    
averages: dict[int, float] = {}
for m in range(1, 13):
    averages[m] = averageOf(measurements[m])
print(averages)
{1: 26.423421052631568, 2: 26.637631578947378, 3: 27.146973684210526, 4: 27.594342105263152, 5: 27.70052631578947, 6: 27.51947368421052, 7: 27.088157894736838, 8: 26.671973684210535, 9: 26.557894736842115, 10: 26.532266666666672, 11: 26.52293333333333, 12: 26.4624}