FileLib¶

This is a library of functions to operate on files. You may use any of them verbatim in CS114. In other courses, you may need to request permission, depending on their policies. Simply copy any function you need.

Some sample files are suggested, but this code should work with any file in the formats these functions expect.

Read the docstring of each function to see what it does.

Copyright concerns¶

Each of these functions is far too trivial to copyright. You may consider them to be in the public domain.

Text files¶

In [ ]:
# Suggested sample text file
!wget -c https://student.cs.uwaterloo.ca/~cs114/src/a-tale-of-two-cities.txt
In [ ]:
def txt_to_ls(filename: str) -> list[str]:
    """
    Split a plain text file given by filename into lines, and return a list of
    those lines. The newline/line break (\n) *is* included in the returned
    strings.
    """
    with open(filename) as fh:
        return list(fh)
    
assert txt_to_ls("a-tale-of-two-cities.txt")[10] == "Title: A Tale of Two Cities\n", "Title line"
assert txt_to_ls("a-tale-of-two-cities.txt")[106] == "It was the best of times, it was the worst of times, it was the age of\n", "Famous first line"
In [ ]:
def txt_to_str(filename: str) -> str:
    """
    Return the content of a text file filename as a single string.
    """
    with open(filename) as fh:
        return fh.read()
    
assert txt_to_str("a-tale-of-two-cities.txt")[1:53] == "The Project Gutenberg eBook of A Tale of Two Cities\n", "Successfully read"
assert txt_to_str("a-tale-of-two-cities.txt")[-59:] == "subscribe to our email newsletter to hear about new eBooks.", "One long string"
In [ ]:
def ls_to_txt(filename: str, lines: list[str], add_newline=True) -> list[str]:
    """
    Write a list of strings, given by lines, to a text file, given by filename.
    If add_newline is True, the default, then a newline is added after each
    string. Returns the list of strings written, with the newlines if
    applicable.
    """
    def concat_newline(s: str) -> str:
        """
        Return s concatenated to a newline.
        """
        return s + "\n"
    if add_newline:
        lines = list(map(concat_newline, lines))
    with open(filename, "w") as fh:
        fh.writelines(lines)
    return lines

assert ls_to_txt("tmp.txt", ["Hello"])[0] == "Hello\n", "Newline added by default"
assert ls_to_txt("tmp.txt", ["Hello"], add_newline=False)[0] == "Hello", "Newline not added with add_newline=False"

CSV files¶

In [ ]:
# Suggested sample CSV file
!wget -c https://student.cs.uwaterloo.ca/~cs114/src/nino34.csv
In [ ]:
import csv

def csv_to_ld(csv_file: str, delimiter: str=",") -> list[dict[str, str]]:
    """
    Return a list corresponding to the rows in the CSV file specified by
    csv_file, as dictionaries. Optionally, provide a different delimiter from
    the default ",".
    """
    with open(csv_file) as fh:
        return list(csv.DictReader(fh, delimiter=delimiter))

assert len(csv_to_ld("nino34.csv")) == 909, "Entire file converted into list"
assert csv_to_ld("nino34.csv")[0]["YR"] == "1950", "First year is as in the original file"
In [ ]:
import csv

def csv_to_dl(csv_file: str, delimiter: str=",") -> dict[str, list[str]]:
    """
    Return a dictionary in which each key corresponds to a list of strings, the
    entire column of data for that field in the CSV file specified by csv_file.
    Optionally, provide a different delimiter from the default ",".
    """
    ret: dict[str, list[str]] = {}
    with open(csv_file) as fh:
        rdr = csv.DictReader(fh, delimiter=delimiter)
        for field in rdr.fieldnames or []:
            ret[field] = []
        for row in rdr:
            for field in row:
                ret[field].append(row[field])
    return ret

assert len(csv_to_dl("nino34.csv")["TOTAL"]) == 909, "Entire file converted into list"
assert csv_to_dl("nino34.csv")["YR"][0] == "1950", "First year is as in the original file"
In [ ]:
import csv

def csv_to_ll(csv_file: str, delimiter: str=",") -> list[list[str]]:
    """
    Return a list corresponding to the rows in the CSV file specified by
    csv_file, as lists of cells. Optionally, provide a different delimiter from
    the default ",".
    """
    with open(csv_file) as fh:
        return list(csv.reader(fh, delimiter=delimiter))
    
assert len(csv_to_ll("nino34.csv")) == 910, "Entire file convert into list"
assert csv_to_ll("nino34.csv")[0] == ["YR", "MON", "TOTAL", "ClimAdjust", "ANOM"], "Header row intact"
In [ ]:
import csv
import typing

def ld_to_csv(
    csv_file: str, data: list[dict[str, typing.Any]], delimiter: str=","
) -> list[dict[str, typing.Any]]:
    """
    Write the data given by data as a list of dictionaries to the CSV file
    given by csv_file. Returns the same data. Optionally, provide a different
    delimiter from the default ",".
    """
    with open(csv_file, "w") as fh:
        wr = csv.DictWriter(fh, list(data[0]), delimiter=delimiter)
        wr.writeheader()
        wr.writerows(data)
    return data

assert len(ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))) == 909, "CSV round trip"
assert ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))[0]["YR"] == "1950", "First year preserved"
In [ ]:
import csv
import typing

def dl_to_csv(
    csv_file: str, data: dict[str, list[typing.Any]], delimiter: str=","
) -> dict[str, list[typing.Any]]:
    """
    Write the data given by data as a dictionary of strings to lists to the CSV
    file given by csv_file. Returns the same data. Optionally, provide a
    different delimiter from the default ",".
    """
    with open(csv_file, "w") as fh:
        fieldnames = list(data)
        wr = csv.DictWriter(fh, fieldnames, delimiter=delimiter)
        wr.writeheader()
        for row_idx in range(len(data[fieldnames[0]])):
            row: dict[str, str] = {}
            for field in data:
                row[field] = str(data[field][row_idx])
            wr.writerow(row)
    return data

assert len(dl_to_csv("tmp.csv", csv_to_dl("nino34.csv"))["TOTAL"]) == 909, "CSV round trip"
assert dl_to_csv("tmp.csv", csv_to_dl("nino34.csv"))["YR"][0] == "1950", "First year preserved"
In [ ]:
import csv

def ll_to_csv(
    csv_file: str, data: list[list[str]], delimiter: str=","
) -> list[list[str]]:
    """
    Write the data given by data as a list of lists to the CSV file given by
    csv_file. Returns the same data. Optionally, provide a different delimiter
    from the default ",".
    """
    with open(csv_file, "w") as fh:
        wr = csv.writer(fh)
        wr.writerows(data)
    return data

assert len(ll_to_csv("tmp.csv", csv_to_ll("nino34.csv"))) == 910, "CSV round trip"
assert ll_to_csv("tmp.csv", csv_to_ll("nino34.csv"))[1][0] == "1950", "First year preserved"

JSON files¶

Note: ipynb files are JSON files, so this file will be used in examples.

In [ ]:
import json
import typing

def json_to_any(json_file: str) -> typing.Any:
    """
    Return JSON data read from the file given by json_file.
    """
    with open(json_file) as fh:
        return json.load(fh)
    
assert json_to_any("filelib.ipynb")["cells"][0]["source"][0] == "# FileLib\n", "This ipynb file is JSON"
assert len(json_to_any("filelib.ipynb")["cells"]) == 17, "Total number of cells in this file"
In [ ]:
import json
import typing

def any_to_json(json_file: str, data: typing.Any) -> typing.Any:
    """
    Save the data given by data to a JSON file given by json_file. Returns the
    same data.
    """
    with open(json_file, "w") as fh:
        json.dump(data, fh)
    return data

assert any_to_json("tmp.json", json_to_any("filelib.ipynb"))["cells"][0]["source"][0] == "# FileLib\n", "Same data returned"
assert any_to_json("tmp.json", json_to_any("filelib.ipynb")) == json_to_any("filelib.ipynb"), "JSON round trip"