This is a library of functions to operate on files. You may use any of them verbatim in CS114. In other courses, you may need to request permission, depending on their policies. Simply copy any function you need.
Some sample files are suggested, but this code should work with any file in the formats these functions expect.
Read the docstring of each function to see what it does.
Each of these functions is far too trivial to copyright. You may consider them to be in the public domain.
# Suggested sample text file
!wget -c https://student.cs.uwaterloo.ca/~cs114/src/a-tale-of-two-cities.txt
def txt_to_ls(filename: str) -> list[str]:
"""
Split a plain text file given by filename into lines, and return a list of
those lines. The newline/line break (\n) *is* included in the returned
strings.
"""
with open(filename) as fh:
return list(fh)
assert txt_to_ls("a-tale-of-two-cities.txt")[10] == "Title: A Tale of Two Cities\n", "Title line"
assert txt_to_ls("a-tale-of-two-cities.txt")[106] == "It was the best of times, it was the worst of times, it was the age of\n", "Famous first line"
def txt_to_str(filename: str) -> str:
"""
Return the content of a text file filename as a single string.
"""
with open(filename) as fh:
return fh.read()
assert txt_to_str("a-tale-of-two-cities.txt")[1:53] == "The Project Gutenberg eBook of A Tale of Two Cities\n", "Successfully read"
assert txt_to_str("a-tale-of-two-cities.txt")[-59:] == "subscribe to our email newsletter to hear about new eBooks.", "One long string"
def ls_to_txt(filename: str, lines: list[str], add_newline=True) -> list[str]:
"""
Write a list of strings, given by lines, to a text file, given by filename.
If add_newline is True, the default, then a newline is added after each
string. Returns the list of strings written, with the newlines if
applicable.
"""
def concat_newline(s: str) -> str:
"""
Return s concatenated to a newline.
"""
return s + "\n"
if add_newline:
lines = list(map(concat_newline, lines))
with open(filename, "w") as fh:
fh.writelines(lines)
return lines
assert ls_to_txt("tmp.txt", ["Hello"])[0] == "Hello\n", "Newline added by default"
assert ls_to_txt("tmp.txt", ["Hello"], add_newline=False)[0] == "Hello", "Newline not added with add_newline=False"
# Suggested sample CSV file
!wget -c https://student.cs.uwaterloo.ca/~cs114/src/nino34.csv
import csv
def csv_to_ld(csv_file: str, delimiter: str=",") -> list[dict[str, str]]:
"""
Return a list corresponding to the rows in the CSV file specified by
csv_file, as dictionaries. Optionally, provide a different delimiter from
the default ",".
"""
with open(csv_file) as fh:
return list(csv.DictReader(fh, delimiter=delimiter))
assert len(csv_to_ld("nino34.csv")) == 909, "Entire file converted into list"
assert csv_to_ld("nino34.csv")[0]["YR"] == "1950", "First year is as in the original file"
import csv
def csv_to_dl(csv_file: str, delimiter: str=",") -> dict[str, list[str]]:
"""
Return a dictionary in which each key corresponds to a list of strings, the
entire column of data for that field in the CSV file specified by csv_file.
Optionally, provide a different delimiter from the default ",".
"""
ret: dict[str, list[str]] = {}
with open(csv_file) as fh:
rdr = csv.DictReader(fh, delimiter=delimiter)
for field in rdr.fieldnames or []:
ret[field] = []
for row in rdr:
for field in row:
ret[field].append(row[field])
return ret
assert len(csv_to_dl("nino34.csv")["TOTAL"]) == 909, "Entire file converted into list"
assert csv_to_dl("nino34.csv")["YR"][0] == "1950", "First year is as in the original file"
import csv
def csv_to_ll(csv_file: str, delimiter: str=",") -> list[list[str]]:
"""
Return a list corresponding to the rows in the CSV file specified by
csv_file, as lists of cells. Optionally, provide a different delimiter from
the default ",".
"""
with open(csv_file) as fh:
return list(csv.reader(fh, delimiter=delimiter))
assert len(csv_to_ll("nino34.csv")) == 910, "Entire file convert into list"
assert csv_to_ll("nino34.csv")[0] == ["YR", "MON", "TOTAL", "ClimAdjust", "ANOM"], "Header row intact"
import csv
import typing
def ld_to_csv(
csv_file: str, data: list[dict[str, typing.Any]], delimiter: str=","
) -> list[dict[str, typing.Any]]:
"""
Write the data given by data as a list of dictionaries to the CSV file
given by csv_file. Returns the same data. Optionally, provide a different
delimiter from the default ",".
"""
with open(csv_file, "w") as fh:
wr = csv.DictWriter(fh, list(data[0]), delimiter=delimiter)
wr.writeheader()
wr.writerows(data)
return data
assert len(ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))) == 909, "CSV round trip"
assert ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))[0]["YR"] == "1950", "First year preserved"
import csv
import typing
def dl_to_csv(
csv_file: str, data: dict[str, list[typing.Any]], delimiter: str=","
) -> dict[str, list[typing.Any]]:
"""
Write the data given by data as a dictionary of strings to lists to the CSV
file given by csv_file. Returns the same data. Optionally, provide a
different delimiter from the default ",".
"""
with open(csv_file, "w") as fh:
fieldnames = list(data)
wr = csv.DictWriter(fh, fieldnames, delimiter=delimiter)
wr.writeheader()
for row_idx in range(len(data[fieldnames[0]])):
row: dict[str, str] = {}
for field in data:
row[field] = str(data[field][row_idx])
wr.writerow(row)
return data
assert len(dl_to_csv("tmp.csv", csv_to_dl("nino34.csv"))["TOTAL"]) == 909, "CSV round trip"
assert dl_to_csv("tmp.csv", csv_to_dl("nino34.csv"))["YR"][0] == "1950", "First year preserved"
import csv
def ll_to_csv(
csv_file: str, data: list[list[str]], delimiter: str=","
) -> list[list[str]]:
"""
Write the data given by data as a list of lists to the CSV file given by
csv_file. Returns the same data. Optionally, provide a different delimiter
from the default ",".
"""
with open(csv_file, "w") as fh:
wr = csv.writer(fh)
wr.writerows(data)
return data
assert len(ll_to_csv("tmp.csv", csv_to_ll("nino34.csv"))) == 910, "CSV round trip"
assert ll_to_csv("tmp.csv", csv_to_ll("nino34.csv"))[1][0] == "1950", "First year preserved"
Note: ipynb files are JSON files, so this file will be used in examples.
import json
import typing
def json_to_any(json_file: str) -> typing.Any:
"""
Return JSON data read from the file given by json_file.
"""
with open(json_file) as fh:
return json.load(fh)
assert json_to_any("filelib.ipynb")["cells"][0]["source"][0] == "# FileLib\n", "This ipynb file is JSON"
assert len(json_to_any("filelib.ipynb")["cells"]) == 17, "Total number of cells in this file"
import json
import typing
def any_to_json(json_file: str, data: typing.Any) -> typing.Any:
"""
Save the data given by data to a JSON file given by json_file. Returns the
same data.
"""
with open(json_file, "w") as fh:
json.dump(data, fh)
return data
assert any_to_json("tmp.json", json_to_any("filelib.ipynb"))["cells"][0]["source"][0] == "# FileLib\n", "Same data returned"
assert any_to_json("tmp.json", json_to_any("filelib.ipynb")) == json_to_any("filelib.ipynb"), "JSON round trip"