!wget https://student.cs.uwaterloo.ca/~cs114/src/filelib.ipynb
import csv
def csv_to_dl(csv_file: str, delimiter: str=",") -> dict[str, list[str]]:
"""
Return a dictionary in which each key corresponds to a list of strings, the
entire column of data for that field in the CSV file specified by csv_file.
Optionally, provide a different delimiter from the default ",".
"""
ret: dict[str, list[str]] = {}
with open(csv_file) as fh:
rdr = csv.DictReader(fh, delimiter=delimiter)
for field in rdr.fieldnames or []:
ret[field] = []
for row in rdr:
for field in row:
ret[field].append(row[field])
return ret
assert len(csv_to_dl("nino34.csv")["TOTAL"]) == 909, "Entire file converted into list"
assert csv_to_dl("nino34.csv")["YR"][0] == "1950", "First year is as in the original file"
def sumEveryColumn(filename: str) -> dict[str, float]:
"""
Sum every column of the CSV file given by filename. Return the sums in a
dictionary, with each column name associated with a sum.
"""
data = csv_to_dl(filename)
#print(data)
r = {}
for key in data:
sum = 0.0
for item in data[key]:
sum = sum + float(item)
r[key] = sum
return r
print(sumEveryColumn("nino34.csv"))
{'YR': 1806525.0, 'MON': 5895.0, 'TOTAL': 24457.69000000002, 'ClimAdjust': 24447.470000000038, 'ANOM': 10.119999999999987}
def txt_to_ls(filename: str) -> list[str]:
"""
Split a plain text file given by filename into lines, and return a list of
those lines. The newline/line break (\n) *is* included in the returned
strings.
"""
with open(filename) as fh:
return list(fh)
assert txt_to_ls("a-tale-of-two-cities.txt")[10] == "Title: A Tale of Two Cities\n", "Title line"
assert txt_to_ls("a-tale-of-two-cities.txt")[106] == "It was the best of times, it was the worst of times, it was the age of\n", "Famous first line"
def ls_to_txt(filename: str, lines: list[str], add_newline=True) -> list[str]:
"""
Write a list of strings, given by lines, to a text file, given by filename.
If add_newline is True, the default, then a newline is added after each
string. Returns the list of strings written, with the newlines if
applicable.
"""
def concat_newline(s: str) -> str:
"""
Return s concatenated to a newline.
"""
return s + "\n"
if add_newline:
lines = list(map(concat_newline, lines))
with open(filename, "w") as fh:
fh.writelines(lines)
return lines
assert ls_to_txt("tmp.txt", ["Hello"])[0] == "Hello\n", "Newline added by default"
assert ls_to_txt("tmp.txt", ["Hello"], add_newline=False)[0] == "Hello", "Newline not added with add_newline=False"
def reverseLines(
outputFile: str, inputFile: str
) -> None:
"""
Read the file given by inputFile and reverse all of its lines into outputFile.
"""
lines = txt_to_ls(inputFile)
for idx in range(len(lines)):
lines[idx] = lines[idx].strip()[::-1] + "\n"
ls_to_txt(outputFile, lines, add_newline=False)
reverseLines("seitic-owt-fo-elat-a.txt", "a-tale-of-two-cities.txt")
def lineInfo(
outputFile: str, inputFile: str
) -> None:
"""
Read in the file given by inputFile, and for each line, write out some
information about that line to outputFile.
"""
lines = txt_to_ls(inputFile)
for idx in range(len(lines)):
line = lines[idx]
lines[idx] = f"Line length: {len(line)}. Line reversed: {line.strip()[::-1]}\n"
ls_to_txt(outputFile, lines, add_newline=False)
lineInfo("info-atotc.txt", "a-tale-of-two-cities.txt")
import csv
def csv_to_ld(csv_file: str, delimiter: str=",") -> list[dict[str, str]]:
"""
Return a list corresponding to the rows in the CSV file specified by
csv_file, as dictionaries. Optionally, provide a different delimiter from
the default ",".
"""
with open(csv_file) as fh:
return list(csv.DictReader(fh, delimiter=delimiter))
assert len(csv_to_ld("nino34.csv")) == 909, "Entire file converted into list"
assert csv_to_ld("nino34.csv")[0]["YR"] == "1950", "First year is as in the original file"
import csv
import typing
def ld_to_csv(
csv_file: str, data: list[dict[str, typing.Any]], delimiter: str=","
) -> list[dict[str, typing.Any]]:
"""
Write the data given by data as a list of dictionaries to the CSV file
given by csv_file. Returns the same data. Optionally, provide a different
delimiter from the default ",".
"""
with open(csv_file, "w") as fh:
wr = csv.DictWriter(fh, list(data[0]), delimiter=delimiter)
wr.writeheader()
wr.writerows(data)
return data
assert len(ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))) == 909, "CSV round trip"
assert ld_to_csv("tmp.csv", csv_to_ld("nino34.csv"))[0]["YR"] == "1950", "First year preserved"
nino = csv_to_ld("nino34.csv")
for row in nino:
row["Fahrenheit"] = str( float(row["TOTAL"])*9/5+32 )
ld_to_csv("nino34f.csv", nino)
print("Hi!")
Hi!
import json
import typing
def json_to_any(json_file: str) -> typing.Any:
"""
Return JSON data read from the file given by json_file.
"""
with open(json_file) as fh:
return json.load(fh)
assert json_to_any("filelib.ipynb")["cells"][0]["source"][0] == "# FileLib\n", "This ipynb file is JSON"
assert len(json_to_any("filelib.ipynb")["cells"]) == 17, "Total number of cells in this file"
import json
import typing
def any_to_json(json_file: str, data: typing.Any) -> typing.Any:
"""
Save the data given by data to a JSON file given by json_file. Returns the
same data.
"""
with open(json_file, "w") as fh:
json.dump(data, fh)
return data
assert any_to_json("tmp.json", json_to_any("filelib.ipynb"))["cells"][0]["source"][0] == "# FileLib\n", "Same data returned"
assert any_to_json("tmp.json", json_to_any("filelib.ipynb")) == json_to_any("filelib.ipynb"), "JSON round trip"
ipynb = json_to_any("exercise-l15.ipynb")
ipynb["cells"][0]["source"] = ["print('Hello!')"]
any_to_json("modified.ipynb", ipynb)
print("Hi")
Hi
def words(line: str) -> list[str]:
"""
Split line into words, and return them as a list.
"""
letters = ""
for ch in line.lower():
if ch >= "a" and ch <= "z":
letters = letters + ch
else:
letters = letters + " "
return letters.strip().split()
print(words("It was the best of times, it was the worst of times."))
['it', 'was', 'the', 'best', 'of', 'times', 'it', 'was', 'the', 'worst', 'of', 'times']
def distribution(lst: typing.Sequence) -> dict[typing.Any, int]:
r = {}
for val in lst:
if not (val in r):
r[val] = 0
r[val] = r[val] + 1
return r
def txt_to_str(filename: str) -> str:
"""
Return the content of a text file filename as a single string.
"""
with open(filename) as fh:
return fh.read()
assert txt_to_str("a-tale-of-two-cities.txt")[1:53] == "The Project Gutenberg eBook of A Tale of Two Cities\n", "Successfully read"
assert txt_to_str("a-tale-of-two-cities.txt")[-59:] == "subscribe to our email newsletter to hear about new eBooks.", "One long string"
def wordDistributionCSV(outFilename: str, inFilename: str) -> None:
"""
Read inFilename as a text file, compute the word count for each word, and write the word counts
as a CSV to outFilename.
"""
# 1: Words
inWords = words(txt_to_str(inFilename))
# 2: Distribution and sorting
dist = distribution(inWords)
def dictValue(key: str) -> int:
return dist[key]
byFreq = sorted(dist, reverse=True, key=dictValue)
# 3: Write CSV
data = []
for word in byFreq:
data.append({
"Word": word,
"# of appearances": dist[word]
})
ld_to_csv(outFilename, data)
wordDistributionCSV("a-tale-of-many-words.csv", "a-tale-of-two-cities.txt")