In [ ]:
!wget https://student.cs.uwaterloo.ca/~cs114/src/filelib.ipynb
In [2]:
import csv

def csv_to_dl(csv_file: str, delimiter: str=",") -> dict[str, list[str]]:
    """
    Return a dictionary in which each key corresponds to a list of strings, the
    entire column of data for that field in the CSV file specified by csv_file.
    Optionally, provide a different delimiter from the default ",".
    """
    ret: dict[str, list[str]] = {}
    with open(csv_file) as fh:
        rdr = csv.DictReader(fh, delimiter=delimiter)
        for field in rdr.fieldnames or []:
            ret[field] = []
        for row in rdr:
            for field in row:
                ret[field].append(row[field])
    return ret

assert len(csv_to_dl("nino34.csv")["TOTAL"]) == 909, "Entire file converted into list"
assert csv_to_dl("nino34.csv")["YR"][0] == "1950", "First year is as in the original file"
In [4]:
def sumEveryColumn(filename: str) -> dict[str, float]:
    """
    Sum every column of the CSV file given by filename. Return the sums in a
    dictionary, with each column name associated with a sum.
    """
    data = csv_to_dl(filename)
    #print(data)
    r = {}
    for key in data:
        sum = 0.0
        for item in data[key]:
            sum = sum + float(item)
        r[key] = sum
    return r

print(sumEveryColumn("nino34.csv"))
{'YR': 1806525.0, 'MON': 5895.0, 'TOTAL': 24457.69000000002, 'ClimAdjust': 24447.470000000038, 'ANOM': 10.119999999999987}
In [5]:
def txt_to_ls(filename: str) -> list[str]:
    """
    Split a plain text file given by filename into lines, and return a list of
    those lines. The newline/line break (\n) *is* included in the returned
    strings.
    """
    with open(filename) as fh:
        return list(fh)
    
assert txt_to_ls("a-tale-of-two-cities.txt")[10] == "Title: A Tale of Two Cities\n", "Title line"
assert txt_to_ls("a-tale-of-two-cities.txt")[106] == "It was the best of times, it was the worst of times, it was the age of\n", "Famous first line"
In [6]:
def ls_to_txt(filename: str, lines: list[str], add_newline=True) -> list[str]:
    """
    Write a list of strings, given by lines, to a text file, given by filename.
    If add_newline is True, the default, then a newline is added after each
    string. Returns the list of strings written, with the newlines if
    applicable.
    """
    def concat_newline(s: str) -> str:
        """
        Return s concatenated to a newline.
        """
        return s + "\n"
    if add_newline:
        lines = list(map(concat_newline, lines))
    with open(filename, "w") as fh:
        fh.writelines(lines)
    return lines

assert ls_to_txt("tmp.txt", ["Hello"])[0] == "Hello\n", "Newline added by default"
assert ls_to_txt("tmp.txt", ["Hello"], add_newline=False)[0] == "Hello", "Newline not added with add_newline=False"
In [11]:
def reverseLines(
    outputFile: str, inputFile: str
) -> None:
    """
    Read the file given by inputFile and reverse all of its lines into outputFile.
    """
    lines = txt_to_ls(inputFile)
    for idx in range(len(lines)):
        lines[idx] = lines[idx].strip()[::-1] + "\n"
    ls_to_txt(outputFile, lines, add_newline=False)
    
reverseLines("seitic-owt-fo-elat-a.txt", "a-tale-of-two-cities.txt")
In [13]:
def lineInfo(
    outputFile: str, inputFile: str
) -> None:
    """
    Read in the file given by inputFile, and for each line, write out some
    information about that line to outputFile.
    """
    lines = txt_to_ls(inputFile)
    for idx in range(len(lines)):
        line = lines[idx]
        lines[idx] = f"Line length: {len(line)}. Line reversed: {line.strip()[::-1]}\n"
    ls_to_txt(outputFile, lines, add_newline=False)
    
lineInfo("info-atotc.txt", "a-tale-of-two-cities.txt")
In [14]:
import csv

def csv_to_ld(csv_file: str, delimiter: str=",") -> list[dict[str, str]]:
    """
    Return a list corresponding to the rows in the CSV file specified by
    csv_file, as dictionaries. Optionally, provide a different delimiter from
    the default ",".
    """
    with open(csv_file) as fh:
        return list(csv.DictReader(fh, delimiter=delimiter))

assert len(csv_to_ld("nino34.csv")) == 909, "Entire file converted into list"
assert csv_to_ld("nino34.csv")[0]["YR"] == "1950", "First year is as in the original file"
In [15]:
import csv
import typing

def ld_to_csv(
    csv_file: str, data: list[dict[str, typing.Any]], delimiter: str=","
) -> list[dict[str, typing.Any]]:
    """
    Write the data given by data as a list of dictionaries to the CSV file
    given by csv_file. Returns the same data. Optionally, provide a different
    delimiter from the default ",".
    """
    with open(csv_file, "w") as fh:
        wr = csv.DictWriter(fh, list(data[0]), delimiter=delimiter)
        wr.writeheader()
        wr.writerows(data)
    return data

assert len(ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))) == 909, "CSV round trip"
assert ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))[0]["YR"] == "1950", "First year preserved"
In [17]:
nino = csv_to_ld("nino34.csv")
for row in nino:
    row["Fahrenheit"] = str(  float(row["TOTAL"])*9/5+32  )
ld_to_csv("nino34f.csv", nino)
print("Hi!")
Hi!
In [18]:
import json
import typing

def json_to_any(json_file: str) -> typing.Any:
    """
    Return JSON data read from the file given by json_file.
    """
    with open(json_file) as fh:
        return json.load(fh)
    
assert json_to_any("filelib.ipynb")["cells"][0]["source"][0] == "# FileLib\n", "This ipynb file is JSON"
assert len(json_to_any("filelib.ipynb")["cells"]) == 17, "Total number of cells in this file"
In [19]:
import json
import typing

def any_to_json(json_file: str, data: typing.Any) -> typing.Any:
    """
    Save the data given by data to a JSON file given by json_file. Returns the
    same data.
    """
    with open(json_file, "w") as fh:
        json.dump(data, fh)
    return data

assert any_to_json("tmp.json", json_to_any("filelib.ipynb"))["cells"][0]["source"][0] == "# FileLib\n", "Same data returned"
assert any_to_json("tmp.json", json_to_any("filelib.ipynb")) == json_to_any("filelib.ipynb"), "JSON round trip"
In [26]:
ipynb = json_to_any("exercise-l15.ipynb")
ipynb["cells"][0]["source"] = ["print('Hello!')"]
any_to_json("modified.ipynb", ipynb)
print("Hi")
Hi
In [28]:
def words(line: str) -> list[str]:
    """
    Split line into words, and return them as a list.
    """
    letters = ""
    for ch in line.lower():
        if ch >= "a" and ch <= "z":
            letters = letters + ch
        else:
            letters = letters + " "
    return letters.strip().split()

print(words("It was the best of times, it was the worst of times."))
['it', 'was', 'the', 'best', 'of', 'times', 'it', 'was', 'the', 'worst', 'of', 'times']
In [29]:
def distribution(lst: typing.Sequence) -> dict[typing.Any, int]:
    r = {}
    for val in lst:
        if not (val in r):
            r[val] = 0
        r[val] = r[val] + 1
    return r
In [30]:
def txt_to_str(filename: str) -> str:
    """
    Return the content of a text file filename as a single string.
    """
    with open(filename) as fh:
        return fh.read()
    
assert txt_to_str("a-tale-of-two-cities.txt")[1:53] == "The Project Gutenberg eBook of A Tale of Two Cities\n", "Successfully read"
assert txt_to_str("a-tale-of-two-cities.txt")[-59:] == "subscribe to our email newsletter to hear about new eBooks.", "One long string"
In [32]:
def wordDistributionCSV(outFilename: str, inFilename: str) -> None:
    """
    Read inFilename as a text file, compute the word count for each word, and write the word counts
    as a CSV to outFilename.
    """
    # 1: Words
    inWords = words(txt_to_str(inFilename))
    
    # 2: Distribution and sorting
    dist = distribution(inWords)
    def dictValue(key: str) -> int:
        return dist[key]
    byFreq = sorted(dist, reverse=True, key=dictValue)
    
    # 3: Write CSV
    data = []
    for word in byFreq:
        data.append({
            "Word": word,
            "# of appearances": dist[word]
        })
    ld_to_csv(outFilename, data)
    
wordDistributionCSV("a-tale-of-many-words.csv", "a-tale-of-two-cities.txt")