diff --git a/_quarto.yml b/_quarto.yml
index 9777cf9f..64d3bd2f 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -226,6 +226,31 @@ website:
- text: ' 6.1. Exercises'
href: modules/module7/module7-19-using_the_python_debugger.qmd
- href: modules/module7/module7-20-what_did_we_just_learn.qmd
+ - section: "**Module 8: A Slice of NumPy and Advanced Data Wrangling**"
+ contents:
+ - href: modules/module8/module8-00-module_learning_outcomes.qmd
+ - href: modules/module8/module8-01-numpy_and_1d_arrays.qmd
+ - text: ' 1.1. Exercises'
+ href: modules/module8/module8-02-numpy_and_array_questions.qmd
+ - href: modules/module8/module8-05-multi-dimensional_arrays.qmd
+ - text: ' 2.1. Exercises'
+ href: modules/module8/module8-06-make_that_array.qmd
+ - href: modules/module8/module8-11-working_with_null_values.qmd
+ - text: ' 3.1. Exercises'
+ href: modules/module8/module8-12-finding_and_dropping_null_values_questions.qmd
+ - href: modules/module8/module8-16-working_with_dates_and_time.qmd
+ - text: ' 4.1. Exercises'
+ href: modules/module8/module8-17-datetime_quesitons.qmd
+ - href: modules/module8/module8-19-introduction_to_working_with_strings.qmd
+ - text: ' 5.1. Exercises'
+ href: modules/module8/module8-20-string_questions.qmd
+ - href: modules/module8/module8-23-more_advanced_string_processing.qmd
+ - text: ' 6.1. Exercises'
+ href: modules/module8/module8-24-advanced_string_questions.qmd
+ - href: modules/module8/module8-27-what_did_we_just_learn.qmd
+ - section: "**Module Closing Remarks**"
+ contents:
+ - href: modules/module9/module9-00-congratulations.qmd
# Since we are declaring options for two formats here (html and revealjs)
# each qmd file needs to include a yaml block including which format to use for that file.
diff --git a/data/cycling_data.csv b/data/cycling_data.csv
new file mode 100644
index 00000000..e69bcadb
--- /dev/null
+++ b/data/cycling_data.csv
@@ -0,0 +1,34 @@
+Date,Name,Type,Time,Distance,Comments
+Sep-10-2019 17:13,Afternoon Ride,Ride,2084,12.62,Rain
+Sep-11-2019 06:52,Morning Ride,Ride,2531,13.03,rain
+Sep-11-2019 17:23,Afternoon Ride,Ride,1863,12.52,Wet road but nice whether
+Sep-12-2019 07:06,Morning Ride,Ride,2192,12.84,Stopped for photo of sunrise
+Sep-12-2019 17:28,Afternoon Ride,Ride,1891,12.48,Tired by the end of the week.
+Sep-17-2019 06:57,Morning Ride,Ride,2272,12.45,Rested after the weekend!
+Sep-17-2019 17:15,Afternoon Ride,Ride,1973,12.45,Legs feeling strong!
+Sep-18-2019 06:43,Morning Ride,Ride,2285,12.6,Raining
+Sep-19-2019 06:49,Morning Ride,Ride,2903,14.57,Thankfully not raining today!
+Sep-18-2019 17:15,Afternoon Ride,Ride,2101,12.48,Pumped up tires
+Sep-19-2019 17:30,Afternoon Ride,Ride,48062,12.48,Feeling good
+Sep-20-2019 06:52,Morning Ride,Ride,2090,12.59,Getting colder which is nice
+Sep-20-2019 18:02,Afternoon Ride,Ride,2961,12.81,Feeling good
+Sep-24-2019 06:50,Morning Ride,Ride,2462,12.68,Rested after the weekend!
+Sep-24-2019 17:35,Afternoon Ride,Ride,2076,12.47,"Oiled chain, bike feels smooth"
+Sep-25-2019 06:41,Morning Ride,Ride,2321,12.68,Bike feeling much smoother
+Sep-25-2019 17:07,Afternoon Ride,Ride,1775,12.1,Feeling really tired
+Sep-26-2019 06:35,Morning Ride,Ride,2124,12.65,Stopped for photo of sunrise
+Sep-26-2019 17:13,Afternoon Ride,Ride,1860,12.52,raining
+Sep-27-2019 06:42,Morning Ride,Ride,2350,12.91,Detour around trucks at Jericho
+Sep-27-2019 18:00,Afternoon Ride,Ride,1712,12.47,Tired by the end of the week
+Oct-01-2019 06:53,Morning Ride,Ride,2118,12.71,Rested after the weekend!
+Oct-01-2019 17:15,Afternoon Ride,Ride,1732,,Legs feeling strong!
+Oct-02-2019 06:45,Morning Ride,Ride,2222,12.82,Beautiful morning! Feeling fit
+Oct-02-2019 17:13,Afternoon Ride,Ride,1756,,A little tired today but good weather
+Oct-03-2019 06:46,Morning Ride,Ride,2134,13.06,Bit tired today but good weather
+Oct-03-2019 17:45,Afternoon Ride,Ride,1724,12.52,Feeling good
+Oct-04-2019 06:47,Morning Ride,Ride,2182,12.68,Wet road
+Oct-04-2019 18:08,Afternoon Ride,Ride,1870,12.63,"Very tired, riding into the wind"
+Oct-10-2019 07:55,Morning Ride,Ride,2149,12.7,Really cold! But feeling good
+Oct-10-2019 18:10,Afternoon Ride,Ride,1841,12.59,Feeling good after a holiday break!
+Oct-11-2019 07:47,Morning Ride,Ride,2463,12.79,Stopped for photo of sunrise
+Oct-11-2019 18:16,Afternoon Ride,Ride,1843,11.79,"Bike feeling tight, needs an oil and pump"
\ No newline at end of file
diff --git a/data/cycling_data_dirty.csv b/data/cycling_data_dirty.csv
new file mode 100644
index 00000000..ce31884b
--- /dev/null
+++ b/data/cycling_data_dirty.csv
@@ -0,0 +1,34 @@
+Date,Name,Type,Time,Distance,Comments
+2019-09-09 7:13,Morning Ride,Ride,2084,12.62,Rain
+2019-09-09 20:52,Afternoon Ride,Ride,2531,13.03,rain
+2019-09-10 7:23,Morning Ride,Ride,1863,,Wet road but nice weather
+2019-09-10 21:06,Afternoon Ride,Ride,2192,12.84,Stopped for photo of sunrise
+2019-09-11 7:28,Morning Ride,Ride,1891,12.48,Tired by the end of the week
+2019-09-15 20:57,Afternoon Ride,Ride,2272,12.45,Rested after the weekend!
+2019-09-16 7:15,Morning Ride,Ride,1973,12.45,Legs feeling strong!
+2019-09-16 20:43,Afternoon Ride,Ride,2285,12.6,Raining
+2019-09-17 20:49,Afternoon Ride,Ride,2903,14.57,Raining today
+2019-09-17 7:15,Morning Ride,Ride,2101,12.48,Pumped up tires
+2019-09-18 7:30,Afternoon Ride,Ride,48062,12.48,Feeling good
+2019-09-18 20:52,Afternoon Ride,Ride,2090,12.59,Getting colder which is nice
+2019-09-19 8:02,Morning Ride,Ride,2961,12.81,Feeling good
+2019-09-22 20:50,Afternoon Ride,Ride,2462,12.68,Rested after the weekend!
+2019-09-23 7:35,Morning Ride,Ride,2076,12.47,"Oiled chain, bike feels smooth"
+2019-09-23 20:41,Afternoon Ride,Ride,2321,12.68,Bike feeling much smoother
+2019-09-24 7:07,Morning Ride,Ride,1775,12.1,Feeling really tired
+2019-09-24 20:35,Afternoon Ride,Ride,2124,12.65,Stopped for photo of sunrise
+2019-09-25 7:13,Morning Ride,Ride,1860,12.52,raining
+2019-09-25 20:42,Afternoon Ride,Ride,2350,12.91,Detour around trucks at Jericho
+2019-09-26 8:00,Morning Ride,Ride,1712,12.47,Tired by the end of the week
+2019-09-29 20:53,Afternoon Ride,Ride,2118,12.71,Rested after the weekend!
+2019-09-30 7:15,Morning Ride,Ride,1732,,Legs feeling strong!
+2019-09-30 20:45,Afternoon Ride,Ride,2222,12.82,Beautiful morning! Feeling fit
+2019-10-01 7:13,Morning Ride,Ride,1756,,A little tired today but good weather
+2019-10-01 20:46,Afternoon Ride,Ride,2134,13.06,Bit tired today but good weather
+2019-10-02 7:45,Morning Ride,Ride,1724,12.52,Feeling good
+2019-10-02 20:47,Afternoon Ride,Ride,2182,12.68,Wet road
+2019-10-03 8:08,Morning Ride,Ride,1870,12.63,"Very tired, riding into the wind"
+2019-10-08 20:55,Afternoon Ride,Ride,2149,12.7,Really cold! But feeling good
+2019-10-09 7:10,Morning Ride,Ride,1841,12.59,Feeling good after a holiday break!
+2019-10-09 20:47,Afternoon Ride,Ride,2463,12.79,Stopped for photo of sunrise
+2019-10-10 7:16,Morning Ride,Ride,1843,11.79,"Bike feeling tight, needs an oil and pump"
\ No newline at end of file
diff --git a/data/cycling_data_split_time.csv b/data/cycling_data_split_time.csv
new file mode 100644
index 00000000..e80c8be1
--- /dev/null
+++ b/data/cycling_data_split_time.csv
@@ -0,0 +1,34 @@
+Year,Month,Day,Clock,Name,Type,Time,Distance,Comments
+2019,Sep,10, 17:13:04,Afternoon Ride,Ride,2084,12.62,Rain
+2019,Sep,11, 06:52:18,Morning Ride,Ride,2531,13.03,rain
+2019,Sep,11, 17:23:50,Afternoon Ride,Ride,1863,12.52,Wet road but nice weather
+2019,Sep,12, 07:06:19,Morning Ride,Ride,2192,12.84,Stopped for photo of sunrise
+2019,Sep,12, 17:28:05,Afternoon Ride,Ride,1891,12.48,Tired by the end of the week
+2019,Sep,17, 06:57:48,Morning Ride,Ride,2272,12.45,Rested after the weekend!
+2019,Sep,17, 17:15:47,Afternoon Ride,Ride,1973,12.45,Legs feeling strong!
+2019,Sep,18, 06:43:34,Morning Ride,Ride,2285,12.6,Raining
+2019,Sep,19, 06:49:53,Morning Ride,Ride,2903,14.57,Raining today
+2019,Sep,18, 17:15:52,Afternoon Ride,Ride,2101,12.48,Pumped up tires
+2019,Sep,19, 17:30:01,Afternoon Ride,Ride,48062,12.48,Feeling good
+2019,Sep,20, 06:52:09,Morning Ride,Ride,2090,12.59,Getting colder which is nice
+2019,Sep,20, 18:02:05,Afternoon Ride,Ride,2961,12.81,Feeling good
+2019,Sep,24, 06:50:41,Morning Ride,Ride,2462,12.68,Rested after the weekend!
+2019,Sep,24, 17:35:42,Afternoon Ride,Ride,2076,12.47,"Oiled chain, bike feels smooth"
+2019,Sep,25, 06:41:24,Morning Ride,Ride,2321,12.68,Bike feeling much smoother
+2019,Sep,25, 17:07:21,Afternoon Ride,Ride,1775,12.1,Feeling really tired
+2019,Sep,26, 06:35:41,Morning Ride,Ride,2124,12.65,Stopped for photo of sunrise
+2019,Sep,26, 17:13:33,Afternoon Ride,Ride,1860,12.52,raining
+2019,Sep,27, 06:42:43,Morning Ride,Ride,2350,12.91,Detour around trucks at Jericho
+2019,Sep,27, 18:00:18,Afternoon Ride,Ride,1712,12.47,Tired by the end of the week
+2019,Sep,1, 06:53:52,Morning Ride,Ride,2118,12.71,Rested after the weekend!
+2019,Oct,1, 17:15:07,Afternoon Ride,Ride,1732,NaN,Legs feeling strong!
+2019,Oct,2, 06:45:55,Morning Ride,Ride,2222,12.82,Beautiful morning! Feeling fit
+2019,Oct,2, 17:13:09,Afternoon Ride,Ride,1756,NaN,A little tired today but good weather
+2019,Oct,3, 06:46:06,Morning Ride,Ride,2134,13.06,Bit tired today but good weather
+2019,Oct,3, 17:45:22,Afternoon Ride,Ride,1724,12.52,Feeling good
+2019,Oct,4, 06:47:36,Morning Ride,Ride,2182,12.68,Wet road
+2019,Oct,4, 18:08:08,Afternoon Ride,Ride,1870,12.63,"Very tired, riding into the wind"
+2019,Oct,10, 07:55:40,Morning Ride,Ride,2149,12.7,Really cold! But feeling good
+2019,Oct,10, 18:10:31,Afternoon Ride,Ride,1841,12.59,Feeling good after a holiday break!
+2019,Oct,11, 07:47:14,Morning Ride,Ride,2463,12.79,Stopped for photo of sunrise
+2019,Oct,11, 18:16:57,Afternoon Ride,Ride,1843,11.79,"Bike feeling tight, needs an oil and pump"
\ No newline at end of file
diff --git a/modules/module8/module8-00-module_learning_outcomes.qmd b/modules/module8/module8-00-module_learning_outcomes.qmd
new file mode 100644
index 00000000..39233956
--- /dev/null
+++ b/modules/module8/module8-00-module_learning_outcomes.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 0. Module Learning Outcomes
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-01-numpy_and_1d_arrays.qmd b/modules/module8/module8-01-numpy_and_1d_arrays.qmd
new file mode 100644
index 00000000..4736ece7
--- /dev/null
+++ b/modules/module8/module8-01-numpy_and_1d_arrays.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 1. NumPy and 1D Arrays
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-02-numpy_and_array_questions.qmd b/modules/module8/module8-02-numpy_and_array_questions.qmd
new file mode 100644
index 00000000..3255a800
--- /dev/null
+++ b/modules/module8/module8-02-numpy_and_array_questions.qmd
@@ -0,0 +1,259 @@
+---
+format: live-html
+---
+
+
+
+# 1.1. Exercises
+
+## NumPy and Array Questions
+
+
+
+
+
+
+
+```python
+array([ 0, 5, 10, 15, 20, 25, 30])
+```
+
+
+
+
+## More NumPy
+
+
+
+
+
+## NumPy Practice
+
+**Instructions:**
+Running a coding exercise for the first time could take a bit of time for everything to load. Be patient, it could take a few minutes.
+
+**When you see `____` in a coding exercise, replace it with what you assume to be the correct code. Run it and see if you obtain the desired output. Submit your code to validate if you were correct.**
+
+_**Make sure you remove the hash (`#`) symbol in the coding portions of this question. We have commented them so that the line won't execute and you can test your code after each step.**_
+
+Let's explore how Python compares lists and arrays.
+
+**Tasks:**
+
+- Create 2 lists of the same length and save each as objects named `a_list` and `b_list`.
+- Using Boolean operators, what is outputted when you test to see if they are equal?
+
+```{pyodide}
+#| setup: true
+#| exercise: numpy_practice_list
+import pandas as pd
+```
+
+```{pyodide}
+#| exercise: numpy_practice_list
+import numpy as np
+
+# Create 2 lists of the same length, but containing different elements
+# Save each as objects named a_list and b_list
+____
+
+____
+
+
+# Using boolean operators, what is outputted when you test to see if they are equal?
+____
+```
+
+```{pyodide}
+#| exercise: numpy_practice_list
+#| check: true
+from src.utils import print_correct_msg
+
+assert result == False, "Do your lists contain different elements?"
+print_correct_msg()
+```
+
+:::: { .hint exercise="numpy_practice_list"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `[]` or `list()` notation?
+- Are you using `==` to check if the lists are equal?
+
+:::
+::::
+
+:::: { .solution exercise="numpy_practice_list" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import numpy as np
+
+# Create 2 lists of the same length, but containing different elements
+# Save each as objects named a_list and b_list
+a_list = [1, 2, 5, 7]
+
+b_list = [2, 4, 7, 7]
+
+
+# Using boolean operators, what is outputted when you test to see if they are equal?
+a_list == b_list
+```
+
+:::
+::::
+
+
+
+
+
+
+
+
+Now let's do the same exercises using arrays.
+
+**Tasks:**
+
+- Create 2 arrays of the same length and save each as objects named `a_array` and `b_array`.
+- Using Boolean operators, what is outputted when you test to see if they are equal?
+
+```{pyodide}
+#| exercise: numpy_practice_array
+import numpy as np
+
+# Create 2 arrays of the same length, but containing different elements
+# Save each as objects named a_array and b_array
+____
+
+____
+
+
+# Using boolean operators, what is outputted when you test to see if they are equal?
+____
+```
+
+```{pyodide}
+#| exercise: numpy_practice_array
+#| check: true
+import numpy as np
+from src.utils import print_correct_msg
+
+assert isinstance(result, np.ndarray), "Are you creating an array?"
+assert all(isinstance(x, np.bool) for x in result), "Are you using `==` to check if the arrays are equal?"
+print_correct_msg()
+```
+
+:::: { .hint exercise="numpy_practice_array"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `np.array()` with parentheses to make your arrays?
+- Are you using `==` to check if the arrays are equal?
+
+:::
+::::
+
+:::: { .solution exercise="numpy_practice_array" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import numpy as np
+
+# Create 2 arrays of the same length, but containing different elements
+# Save each as objects named a_array and b_array
+a_array = np.array((1, 2, 5, 7))
+
+b_array = np.array((2, 4, 7, 7))
+
+
+# Using boolean operators, what is outputted when you test to see if they are equal?
+a_array == b_array
+```
+
+:::
+::::
+
+
+
+
+
+
+This is an example of how useful arrays can be when doing numerical computation! To compare each element in a list would take more code and time for the same result.
\ No newline at end of file
diff --git a/modules/module8/module8-05-multi-dimensional_arrays.qmd b/modules/module8/module8-05-multi-dimensional_arrays.qmd
new file mode 100644
index 00000000..74c01df8
--- /dev/null
+++ b/modules/module8/module8-05-multi-dimensional_arrays.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 2. Multi-dimensional Arrays
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-06-make_that_array.qmd b/modules/module8/module8-06-make_that_array.qmd
new file mode 100644
index 00000000..38b3672e
--- /dev/null
+++ b/modules/module8/module8-06-make_that_array.qmd
@@ -0,0 +1,370 @@
+---
+format: live-html
+---
+
+
+
+# 2.1. Exercises
+
+## Make that Array
+
+```python
+np.ones((4, 3))
+```
+
+
+
+
+## Shape, Size, and Dimension
+
+```python
+array([[ 0, 1, 2, 3, 4, 5, 6, 7],
+ [ 8, 9, 10, 11, 12, 13, 14, 15]])
+
+```
+
+
+
+
+```python
+array([[[ 0, 1, 2, 3, 4],
+ [ 5, 6, 7, 8, 9]],
+
+ [[10, 11, 12, 13, 14],
+ [15, 16, 17, 18, 19]],
+
+ [[20, 21, 22, 23, 24],
+ [25, 26, 27, 28, 29]]])
+```
+
+
+
+
+
+
+
+## More Arrays Questions
+
+Use the following array named `hurray` to answer the next set of questions.
+
+```python
+array([[ 0, 1, 2],
+ [ 3, 4, 5],
+ [ 6, 7, 8],
+ [ 9, 10, 11],
+ [12, 13, 14],
+ [15, 16, 17]])
+```
+
+```python
+# Question 1
+array([[0, 1, 2],
+ [3, 4, 5],
+ [6, 7, 8]])
+```
+
+
+
+
+```python
+# Question 2
+array([[10, 11],
+ [13, 14]])
+```
+
+
+
+
+```python
+# Question 3
+array([[ 0, 3, 6, 9, 12, 15],
+ [ 1, 4, 7, 10, 13, 16],
+ [ 2, 5, 8, 11, 14, 17]])
+```
+
+
+
+
+
+## Coding questions
+
+**Instructions:**
+Running a coding exercise for the first time could take a bit of time for everything to load. Be patient, it could take a few minutes.
+
+**When you see `____` in a coding exercise, replace it with what you assume to be the correct code. Run it and see if you obtain the desired output. Submit your code to validate if you were correct.**
+
+_**Make sure you remove the hash (`#`) symbol in the coding portions of this question. We have commented them so that the line won't execute and you can test your code after each step.**_
+
+
+### Making an Array
+
+Let's make an array and find its size and dimension.
+
+**Tasks:**
+
+- Create an array named `arr1` that contains only elements with values 1 and a shape of (3,5).
+- Save the dimension and size of `arr1` in objects named `arr1_dim` and `arr1_size` respectively.
+
+```{pyodide}
+#| setup: true
+#| exercise: making_an_array
+import pandas as pd
+```
+
+```{pyodide}
+#| exercise: making_an_array
+import numpy as np
+
+# Create an array that contains only elements with values 1 with a shape of (3,5)
+# Save it as an object named arr1
+____ = ____
+
+____
+
+# Save the dimension and size of `arr1` in objects
+# named `arr1_dim` and `arr1_size` respectively
+# ____ = ____
+# ____ = ____
+
+# result = {
+# "arr1_dim": arr1_dim,
+# "arr1_size": arr1_size
+# }
+# result
+```
+
+```{pyodide}
+#| exercise: making_an_array
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, dict), "result should be a dict."
+assert result["arr1_dim"] == 2, "The dimensions of your array are incorrect. Make sure you are creating a 2D array."
+assert result["arr1_size"] == 15, "The size of your array is incorrect. Make sure you are creating a 3 by 5 array."
+print_correct_msg()
+```
+
+:::: { .hint exercise="making_an_array"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `.ones((3,5))`?
+- Are you using `.ndim` and `.size`?
+- Are you saving your objects as the correct names?
+
+:::
+::::
+
+:::: { .solution exercise="making_an_array" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import numpy as np
+
+# Create an array that contains only elements with values 1 with a shape of (3,5)
+# Save it as an object named arr1
+arr1 = np.ones((3,5))
+
+arr1
+
+# Save the dimension and size of `arr1` in objects
+# named `arr1_dim` and `arr1_size` respectively
+arr1_dim = arr1.ndim
+arr1_size = arr1.size
+
+result = {
+ "arr1_dim": arr1_dim,
+ "arr1_size": arr1_size
+}
+result
+```
+
+:::
+::::
+
+
+
+### Array Practice
+
+Let's make a new array and transform it by slicing and transposing.
+
+**Tasks:**
+
+- Create an array named `arr2` using `np.linspace()` with 6 equally spaced values from 1 to 16 and a shape of (2,3) (You'll need `.reshape()` for this!).
+- Transpose the array and name it `arr2t`.
+- Finally, slice the new object `arr2t` so it only includes the values 7 and 16. Save this as an object named `sliced_arr2t`.
+
+```{pyodide}
+#| exercise: array_practice
+import numpy as np
+
+# Create an array named arr2 using np.linspace() with 6 equally
+# spaced values from 1 to 16 and a shape of (2,3)
+____ = ____
+____
+
+# Transpose the array and name it arr2t
+# ____ = ____
+# ____
+
+# Finally slice the new object `arr2t` so it only includes the values 10, 13 and 16.
+# Save this as an object named sliced_arr2t
+# ____ = ____
+# ____
+
+# result = {
+# "arr2": arr2,
+# "arr2t": arr2t,
+# "sliced_arr2t": sliced_arr2t
+# }
+# result
+```
+
+```{pyodide}
+#| exercise: array_practice
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, dict), "result should be a dict."
+assert result["arr2"].shape == (2, 3), "The dimensions of your array are incorrect. Make sure you are creating a 2D array using the 'reshape()' function."
+assert result["arr2t"].shape == (3, 2), "The dimensions of the transposed array are incorrect. Make sure you are transposing the array properly."
+assert result["sliced_arr2t"].shape == (3,), "The dimensions of the sliced array are incorrect. Make sure you are only slicing the required values."
+assert sum(result["sliced_arr2t"]) == 39.0, "The the values in the sliced array are incorrect. Are you slicing properly?"
+print_correct_msg()
+```
+
+:::: { .hint exercise="array_practice"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `.reshape()` to change the dimension of np.linspace()?
+- Are you using `.T`?
+- Are you slicing with `[:,1]`?
+
+:::
+::::
+
+:::: { .solution exercise="array_practice" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import numpy as np
+
+# Create an array named arr2 using np.linspace() with 6 equally
+# spaced values from 1 to 16 and a shape of (2,3)
+arr2 = np.linspace(1,16,6).reshape(2,3)
+
+# Transpose the array and name it arr2t
+arr2t = arr2.T
+
+# Finally slice the new object `arr2t` so it only includes the values 10, 13 and 16
+# Save this as an object named sliced_arr2t
+sliced_arr2t = arr2t[:,1]
+
+result = {
+ "arr2": arr2,
+ "arr2t": arr2t,
+ "sliced_arr2t": sliced_arr2t
+}
+result
+```
+
+:::
+::::
\ No newline at end of file
diff --git a/modules/module8/module8-11-working_with_null_values.qmd b/modules/module8/module8-11-working_with_null_values.qmd
new file mode 100644
index 00000000..a8a5fef5
--- /dev/null
+++ b/modules/module8/module8-11-working_with_null_values.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 3. Working with Null Values
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-12-finding_and_dropping_null_values_questions.qmd b/modules/module8/module8-12-finding_and_dropping_null_values_questions.qmd
new file mode 100644
index 00000000..a09943d7
--- /dev/null
+++ b/modules/module8/module8-12-finding_and_dropping_null_values_questions.qmd
@@ -0,0 +1,306 @@
+---
+format: live-html
+---
+
+
+
+# 3.1. Exercises
+
+## Finding and Dropping Null Values Questions
+
+You run `.info()` on the `fruit_salad` dataframe and get the following output.
+
+```out
+
+RangeIndex: 10 entries, 0 to 9
+Data columns (total 8 columns):
+ # Column Non-Null Count Dtype
+--- ------ -------------- -----
+ 0 name 10 non-null object
+ 1 colour 10 non-null object
+ 2 location 10 non-null object
+ 3 seed 10 non-null bool
+ 4 shape 9 non-null object
+ 5 sweetness 10 non-null bool
+ 6 water_content 8 non-null float64
+ 7 weight 10 non-null int64
+dtypes: bool(2), float64(1), int64(1), object(4)
+memory usage: 628.0+ bytes
+```
+
+
+
+
+```out
+ name height diameter age flowering
+0 Cherry 15.0 2 12.0 True
+1 Fir 20.0 4 4.0 False
+2 Willow 25.0 3 2.0 True
+3 Oak NaN 2 NaN False
+4 Oak 10.0 5 6.0 NaN
+```
+
+
+
+
+## Filling Methods
+
+Use the `forest` dataframe below to answer the next 2 questions:
+
+```out
+ name height diameter age flowering
+0 Cherry 15.0 2 12.0 True
+1 Fir 20.0 4 4.0 False
+2 Willow 25.0 3 2.0 True
+3 Oak NaN 2 3.0 False
+4 Oak 10.0 5 6.0 False
+```
+
+```out
+# Quesiton 1
+ name height diameter age flowering
+0 Cherry 15.0 2 12 True
+1 Fir 20.0 4 4 False
+2 Willow 25.0 3 2 True
+3 Oak 17.5 2 3 False
+4 Oak 10.0 5 6 False
+```
+
+
+
+
+```out
+# Quesiton 2
+ name height diameter age flowering
+0 Cherry 15.0 2 12 True
+1 Fir 20.0 4 4 False
+2 Willow 25.0 3 2 True
+3 Oak 10.0 2 3 False
+4 Oak 10.0 5 6 False
+```
+
+
+
+
+## Coding questions
+
+**Instructions:**
+Running a coding exercise for the first time could take a bit of time for everything to load. Be patient, it could take a few minutes.
+
+**When you see `____` in a coding exercise, replace it with what you assume to be the correct code. Run it and see if you obtain the desired output. Submit your code to validate if you were correct.**
+
+_**Make sure you remove the hash (`#`) symbol in the coding portions of this question. We have commented them so that the line won't execute and you can test your code after each step.**_
+
+
+### Practice Filling Null Values
+
+Let's replace the values missing in the `canucks` dataframe with the salary mean.
+
+**Tasks:**
+
+- Replace the `NaN` values in the dataframe with the mean salary value.
+- Save this as a new dataframe named `canucks_altered`.
+- Display the `canucks_altered` dataframe.
+
+```{pyodide}
+#| exercise: practice_filling_null_values
+import pandas as pd
+
+canucks = pd.read_csv('data/canucks.csv')
+
+# Replace the null values in the dataframe with the mean salary value
+# Save this as a new dataframe named canucks_altered
+____ = ____
+
+# Display the canucks_altered dataframe
+# ____
+```
+
+```{pyodide}
+#| exercise: practice_filling_null_values
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, pd.DataFrame), "We are expecting a dataframe."
+assert result['Salary'].isnull().sum() == 0, "There are still some missing values in the 'Salary' column. Are you filling the missing values?"
+assert result.loc[20, "Salary"] == 3166250.0, "Are you replacing NaN with the mean salary value?"
+print_correct_msg()
+```
+
+:::: { .hint exercise="practice_filling_null_values"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `.fillna()`?
+- Are you using the argument `value=canucks['Salary].mean()`?
+
+:::
+::::
+
+:::: { .solution exercise="practice_filling_null_values" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import pandas as pd
+
+canucks = pd.read_csv('data/canucks.csv')
+
+# Replace the null values in the dataframe with the mean salary value
+# Save this as a new dataframe named canucks_altered
+canucks_altered = canucks.fillna(value=canucks['Salary'].mean())
+
+# Display the canucks_altered dataframe
+canucks_altered
+```
+
+:::
+::::
+
+
+
+### Practice Identifying Null Values
+
+Let's practice using `.isnull()` in our data processing using the `canucks` dataset from earlier in this course.
+
+**Tasks:**
+
+- Identify any columns with null values in the `canucks` dataframe with `.info()` and save this as `canucks_info`.
+- Create a new column in the dataframe named `Wealth` where all the values equal `"comfortable"`.
+- Name the new dataframe `canucks_comf`.
+- Do conditional value replacement, where if the value in the `Salary` column is null, we replace `"comfortable"` with `"unknown"`.
+- Display the new `canucks_comf` dataframe.
+
+```{pyodide}
+#| exercise: practice_identifying_null_values
+import pandas as pd
+
+canucks = pd.read_csv('data/canucks.csv')
+
+# Identify any columns with null values with .info()
+# Save this dataframe as canucks_info
+____ = ____
+____
+
+# Create a new column in the dataframe named Wealth
+# where all the values equal "comfortable"
+# Name the new dataframe canucks_comf
+____ = ____
+____
+
+# Do conditional replacement, where if the value in the salary column is null,
+# we replace "comfortable" with "unknown"
+____ = ____
+____
+```
+
+```{pyodide}
+#| exercise: practice_identifying_null_values
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, pd.DataFrame), "We are expecting a dataframe."
+assert "Wealth" in list(result.columns), "Make sure you are creating a new column 'Wealth' in the canucks dataframe."
+assert "comfortable" in set(result['Wealth']), "Make sure you are setting the values in the 'Wealth' column to 'comfortable'."
+assert "unknown" in set(result['Wealth']), "Make sure you are setting the null values in the 'Wealth' column to 'unknown'."
+assert sum(result['Wealth'] == "unknown") == 2, "The amount of 'unknown' values is incorrect. Make ure you are replacing all the null values to 'unknown'."
+print_correct_msg()
+```
+
+:::: { .hint exercise="practice_identifying_null_values"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `canucks.info()`?
+- Are you creating `canucks_comf` with `canucks.assign(Wealth = "comfortable")`?
+- Are you using `.loc[]` to replace the values in the `Wealth` column?
+- Are you using `canucks_comf['Salary'].isnull()` as your condition in `.loc[]`?
+
+:::
+::::
+
+:::: { .solution exercise="practice_identifying_null_values" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import pandas as pd
+
+canucks = pd.read_csv('data/canucks.csv')
+
+# Identify any columns with null values with .info()
+# Save this dataframe as canucks_info
+canucks_info = canucks.info()
+canucks_info
+
+# Create a new column in the dataframe named Wealth
+# where all the values equal "comfortable"
+# Name the new dataframe canucks_comf
+canucks_comf = canucks.assign(Wealth = "comfortable")
+canucks_comf
+
+# Do conditional replacement, where if the value in the salary column is null,
+# we replace "comfortable" with "unknown"
+canucks_comf.loc[canucks_comf['Salary'].isnull(), "Wealth"] = "unknown"
+canucks_comf
+```
+
+:::
+::::
\ No newline at end of file
diff --git a/modules/module8/module8-16-working_with_dates_and_time.qmd b/modules/module8/module8-16-working_with_dates_and_time.qmd
new file mode 100644
index 00000000..48aed7e2
--- /dev/null
+++ b/modules/module8/module8-16-working_with_dates_and_time.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 4. Working with Dates and Time
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-17-datetime_quesitons.qmd b/modules/module8/module8-17-datetime_quesitons.qmd
new file mode 100644
index 00000000..01e3577d
--- /dev/null
+++ b/modules/module8/module8-17-datetime_quesitons.qmd
@@ -0,0 +1,173 @@
+---
+format: live-html
+---
+
+
+
+# 4.1. Exercises
+
+## Datetime Questions
+
+
+
+
+
+
+
+
+
+
+
+## Practice Processing Dates
+
+**Instructions:**
+Running a coding exercise for the first time could take a bit of time for everything to load. Be patient, it could take a few minutes.
+
+**When you see `____` in a coding exercise, replace it with what you assume to be the correct code. Run it and see if you obtain the desired output. Submit your code to validate if you were correct.**
+
+_**Make sure you remove the hash (`#`) symbol in the coding portions of this question. We have commented them so that the line won't execute and you can test your code after each step.**_
+
+
+Use the output of the following code chunk to help answer the next question.
+
+```{pyodide}
+import pandas as pd
+
+# The data
+hockey_players = pd.read_csv('data/canucks.csv')
+hockey_players
+```
+
+Let's read in data and parse a datetime column as well as calculate the hockey team's oldest and youngest player.
+
+**Tasks:**
+
+- Read in the `canucks.csv` file from the data folder and parse the `Birth Date` column. Save this as an object named `canucks`.
+- Find the oldest player's date of birth and save the Timestamp as `oldest`.
+- Find the youngest player's date of birth and save the Timestamp as `youngest`.
+- Find the age difference between the two players in years to 2 decimal places. Save this as an object name `age_range`.
+- Display `age_range`.
+
+
+```{pyodide}
+#| exercise: practice_processing_dates
+import pandas as pd
+
+DAYS_PER_YEAR = 365.25
+
+# Read in the canucks.csv file from the data folder and parse the "Birth Date" column
+# You may want to specify date_format="%d-%b-%y"
+# Save this as an object named canucks
+____ = ____
+
+# Find the oldest player's date of birth
+# Save the Timstamp as oldest
+____ = ____
+
+# Find the youngest player's age date of birth
+# Save the Timestamp as youngest
+# ____ = ____
+
+# Find the age difference between the two players in number of years to 2 decimal places
+# Save this an an object name age_range
+# ____ = ____
+
+# Display age_range
+# ____
+```
+
+```{pyodide}
+#| exercise: practice_processing_dates
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, float), "Are you returning age_range?"
+assert len(str(result).split(".")[1]) == 2, "Please round to 2 decimal places."
+assert result == 14.24, "The value for 'age_range' is incorrect. Are you computing the 'oldest' and 'youngest' correctly?"
+print_correct_msg()
+```
+
+:::: { .hint exercise="practice_processing_dates"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using the argument `parse_dates` while reading in the data?
+- The oldest player has the `min()` date of birth.
+- The youngest player has the `max()` date of birth.
+- Are you subtracting the min value from the max value?
+- Are you rounding to 2 decimal places?
+
+:::
+::::
+
+:::: { .solution exercise="practice_processing_dates" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import pandas as pd
+
+DAYS_PER_YEAR = 365.25
+
+# Read in the canucks.csv file from the data folder and parse the "Birth Date" column
+# Save this as an object named canucks
+canucks = pd.read_csv('data/canucks.csv', parse_dates=['Birth Date'], date_format="%d-%b-%y")
+
+# Find the oldest player's date of birth
+# Save the Timstamp as oldest
+oldest = canucks['Birth Date'].min()
+
+# Find the youngest player's date of birth
+# Save the Timestamp as youngest
+youngest = canucks['Birth Date'].max()
+
+# Find the age difference between the two players in number of years to 2 decimal places
+# Save this an an object name age_range
+age_range = round((youngest - oldest).days/DAYS_PER_YEAR, 2)
+
+# Display age_range
+age_range
+```
+
+:::
+::::
\ No newline at end of file
diff --git a/modules/module8/module8-19-introduction_to_working_with_strings.qmd b/modules/module8/module8-19-introduction_to_working_with_strings.qmd
new file mode 100644
index 00000000..4b04bb48
--- /dev/null
+++ b/modules/module8/module8-19-introduction_to_working_with_strings.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 5. Introduction to Working with Strings
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-20-string_questions.qmd b/modules/module8/module8-20-string_questions.qmd
new file mode 100644
index 00000000..743e5646
--- /dev/null
+++ b/modules/module8/module8-20-string_questions.qmd
@@ -0,0 +1,192 @@
+---
+format: live-html
+---
+
+
+
+# 5.1. Exercises
+
+## String Questions
+
+
+
+
+
+
+
+## Identify the String Code
+
+```out
+ name colour location seed shape sweetness water_content weight
+0 apple red canada True round True 84 100
+1 banana yellow mexico False long True 75 120
+2 cantaloupe orange spain True round True 90 1360
+3 dragon-fruit magenta china True round False 96 600
+4 elderberry purple austria False round True 80 5
+5 fig purple turkey False oval False 78 40
+6 guava green mexico True oval True 83 450
+7 huckleberry blue canada True round True 73 5
+8 kiwi brown china True round True 80 76
+9 lemon yellow mexico False oval False 83 65
+
+```
+
+{fig-align="center" fig-alt="404 image" width="80%"}
+
+
+
+
+
+## Practice Handling Strings
+
+**Instructions:**
+Running a coding exercise for the first time could take a bit of time for everything to load. Be patient, it could take a few minutes.
+
+**When you see `____` in a coding exercise, replace it with what you assume to be the correct code. Run it and see if you obtain the desired output. Submit your code to validate if you were correct.**
+
+_**Make sure you remove the hash (`#`) symbol in the coding portions of this question. We have commented them so that the line won't execute and you can test your code after each step.**_
+
+Use the output of the following code chunk to help answer the next question.
+
+```{pyodide}
+import pandas as pd
+
+# The data
+hockey_players = pd.read_csv('data/canucks.csv')
+hockey_players
+```
+
+Let's transform some of the columns in your canucks dataset. Let's also see how many of the players have multiple `T`'s in their name.
+
+**Tasks:**
+
+- Convert the `Position` and `Country` columns into uppercase and save this in a dataframe named `canucks_upper`.
+- Create a new column in the `canucks_upper` dataframe named `number_ts` where you count the total number of times the letter T (lowercase or uppercase) appears in their name.
+- Save this dataframe named as `canucks_upper_ts`.
+- How many players have multiple T's in their name?
+
+```{pyodide}
+#| exercise: practice_handling_strings
+import pandas as pd
+
+canucks = pd.read_csv('data/canucks.csv', parse_dates=['Birth Date'], date_format="%d-%b-%y")
+
+# Convert the Position and Country columns into uppercase
+# Save this in a dataframe named canucks_upper
+# ____ = ____
+
+# Create a new column in the canucks_upper dataframe named number_ts
+# where you count the total number of times the letter T
+# (lowercase or uppercase) appears in their name
+# Save this dataframe named as canucks_upper_ts
+# ____ = ____
+
+# How many players have more than 1 letter T in their name?
+# ____ = ____
+```
+
+```{pyodide}
+#| exercise: practice_handling_strings
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, int), "Are you calculating the number of players having more than 1 letter T in their name?"
+assert result == 6, "Check your calculation."
+print_correct_msg()
+```
+
+:::: { .hint exercise="practice_handling_strings"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `.assign` and `.upper()`?
+- You will have to convert the lower to `.lower()` first and then `.count('t')`
+- The number of players with `number_ts` greater than one can be calculated by filtering on that column and getting the `.shape` of the dataframe.
+
+:::
+::::
+
+:::: { .solution exercise="practice_handling_strings" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import pandas as pd
+
+canucks = pd.read_csv('data/canucks.csv', parse_dates=['Birth Date'], date_format="%d-%b-%y")
+
+# Convert the Position and Country columns into uppercase
+# Save this in a dataframe named canucks_upper
+canucks_upper = canucks.assign(Position = canucks['Position'].str.upper(),
+ Country = canucks['Country'].str.upper())
+
+# Create a new column in the canucks_upper dataframe named number_ts
+# where you count the total number of times the letter T
+# (lowercase or uppercase) appears in their name
+# Save this dataframe named as canucks_upper_ts
+canucks_upper_ts = canucks_upper.assign(number_ts=canucks_upper['Player'].str.lower().str.count('t'))
+
+# How many players have more than 1 letter T in their name?
+canucks_upper_ts[canucks_upper_ts['number_ts'] > 1].shape[0]
+```
+
+:::
+::::
+
+
+
+
+
\ No newline at end of file
diff --git a/modules/module8/module8-23-more_advanced_string_processing.qmd b/modules/module8/module8-23-more_advanced_string_processing.qmd
new file mode 100644
index 00000000..8077c8df
--- /dev/null
+++ b/modules/module8/module8-23-more_advanced_string_processing.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 6. More Advanced String Processing
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/module8-24-advanced_string_questions.qmd b/modules/module8/module8-24-advanced_string_questions.qmd
new file mode 100644
index 00000000..2d6724c6
--- /dev/null
+++ b/modules/module8/module8-24-advanced_string_questions.qmd
@@ -0,0 +1,196 @@
+---
+format: live-html
+---
+
+
+
+# 6.1. Exercises
+
+## Advanced String Questions
+
+
+
+
+
+
+
+## Strings
+
+```out
+ name colour location seed shape sweetness water_content weight
+0 apple red canada True round True 84 100
+1 banana yellow mexico False long True 75 120
+2 cantaloupe orange spain True round True 90 1360
+3 dragon-fruit magenta china True round False 96 600
+4 elderberry purple austria False round True 80 5
+5 fig purple turkey False oval False 78 40
+6 guava green mexico True oval True 83 450
+7 huckleberry blue canada True round True 73 5
+8 kiwi brown china True round True 80 76
+9 lemon yellow mexico False oval False 83 65
+
+```
+
+{fig-align="center" fig-alt="404 image" width="80%"}
+
+
+
+
+
+## Processing Strings in a Dataframe
+
+**Instructions:**
+Running a coding exercise for the first time could take a bit of time for everything to load. Be patient, it could take a few minutes.
+
+**When you see `____` in a coding exercise, replace it with what you assume to be the correct code. Run it and see if you obtain the desired output. Submit your code to validate if you were correct.**
+
+_**Make sure you remove the hash (`#`) symbol in the coding portions of this question. We have commented them so that the line won't execute and you can test your code after each step.**_
+
+
+Use the `lego` dataframe to help answer the next question.
+
+```{pyodide}
+import pandas as pd
+
+# The data
+lego = pd.read_csv('data/lego-sets.csv')
+lego
+```
+
+Let's practice using the verbs we learned from the lesson to process a string column in our Lego dataset.
+
+**Tasks:**
+
+- Convert the `name` column in the `lego` dataset to lowercase and overwrite the dataframe by saving it as an object named `lego`.
+- Filter the dataset to find all the Lego sets that contain `"weetabix"` in the `name` column.
+- Save this as an object named `lego_weetabix`.
+- Replace the word` "weetabix`" in the `name` column of the `lego_wetabix` dataframe with the string `"cereal-brand"`.
+- Save this in an object called `lego_cereal`.
+- If the row contains the word `"promotional"` in the `name` column, change the entire value to `"cereal-brand freebie"`.
+- Display `lego_cereal`.
+
+```{pyodide}
+#| exercise: processing_strings_in_a_dataframe
+import pandas as pd
+
+lego = pd.read_csv('data/lego-sets.csv')
+
+# Convert the name column in the lego dataset to lowercase and
+# overwrite the dataframe by saving it as an object named lego
+____ = ____
+____
+
+# Filter the dataset to find all the lego sets that contain "weetabix" in the name column
+# Save this as a object named lego_weetabix
+# ____ = ____
+# ____
+
+# Replace the word "weetabix" in the name column of the lego_wetabix dataframe
+# with the string "cereal-brand"
+# Save this in an object called lego_cereal
+# ____ = ____
+
+# If the row contains the word "promotional" in the name column,
+# change the entire value to "cereal-brand freebie"
+# ____ = ____
+
+# Display lego_cereal
+# ____
+```
+
+```{pyodide}
+#| exercise: processing_strings_in_a_dataframe
+#| check: true
+from src.utils import print_correct_msg
+
+assert isinstance(result, pd.DataFrame), "We are expecting a dataframe."
+assert any("cereal-brand" in s for s in list(result['name'])), "Make sure you are replacing 'weetabix' with 'cereal-brand'."
+assert not any("promotional" in s for s in list(result['name'])), "Make sure you are replacing 'promotional' with 'cereal-brand freebie'."
+print_correct_msg()
+```
+
+:::: { .hint exercise="processing_strings_in_a_dataframe"}
+::: { .callout-note collapse="false"}
+
+## Hint 1
+
+- Are you using `.assign()` and `.lower()` to convert the name column to lowercase?
+- Are you using `str.contains('weetabix')` to filter all the rows?
+- You should be using `assign()` with `str.replace('weetabix', 'cereal-brand')` to replace "weetabix".
+- You should be using `.loc[]` and `str.contains('promotional')` (and a single `=`) to assign new row values.
+
+:::
+::::
+
+:::: { .solution exercise="processing_strings_in_a_dataframe" }
+::: { .callout-tip collapse="false"}
+
+## Fully worked solution:
+
+```{pyodide}
+import pandas as pd
+
+lego = pd.read_csv('data/lego-sets.csv')
+
+# Convert the name column in the lego dataset to lowercase and
+# overwrite the dataframe by saving it as an object named lego
+lego = lego.assign(name = lego['name'].str.lower())
+lego
+
+# Filter the dataset to find all the lego sets that contain "weetabix" in the name column
+# Save this as a object named lego_weetabix
+lego_weetabix = lego[lego['name'].str.contains('weetabix')]
+lego_weetabix
+
+# Replace the word "weetabix" in the name column of the lego_wetabix dataframe
+# with the string "cereal-brand"
+# Save this in an object called lego_cereal
+lego_cereal = lego_weetabix.assign(name = lego_weetabix['name'].str.replace('weetabix', 'cereal-brand'))
+
+# If the row contains the word "promotional" in the name column,
+# change the entire value to "cereal-brand freebie"
+lego_cereal.loc[lego_cereal['name'].str.contains('promotional'), 'name'] = 'cereal-brand freebie'
+
+# Display lego_cereal
+lego_cereal
+```
+
+:::
+::::
\ No newline at end of file
diff --git a/modules/module8/module8-27-what_did_we_just_learn.qmd b/modules/module8/module8-27-what_did_we_just_learn.qmd
new file mode 100644
index 00000000..d1180d69
--- /dev/null
+++ b/modules/module8/module8-27-what_did_we_just_learn.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 7. What Did We Just Learn?
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module8/slides/module8_00.qmd b/modules/module8/slides/module8_00.qmd
new file mode 100644
index 00000000..3d8b94a2
--- /dev/null
+++ b/modules/module8/slides/module8_00.qmd
@@ -0,0 +1,26 @@
+---
+format: revealjs
+title: Module Learning Outcomes
+title-slide-attributes:
+ data-notes: |
+ In this module you will learn about `numpy` arrays and more advanced wrangling techniques such as handling columns with dates and strings.
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+## Module Learning Outcomes
+
+By the end of the module, students are expected to:
+
+- Use [NumPy](https://numpy.org/) to create ndarrays with `np.array()` and from functions such as `np.arange()`, `np.linspace()` and `np.ones()`.
+- Describe the shape, dimension and size of an array.
+- Identify null values in a dataframe and manage them by removing them using [`.dropna()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html) or replacing them using [`.fillna()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html).
+- Manipulate non-standard date/time formats into standard Pandas datetime using [`pd.to_datetime()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html).
+- Find, and replace text from a dataframe using verbs such as [`.replace()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.replace.html) and [`.contains()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.contains.html).
+
+
+
+# Let's Start!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_01.qmd b/modules/module8/slides/module8_01.qmd
new file mode 100644
index 00000000..1d09dc2f
--- /dev/null
+++ b/modules/module8/slides/module8_01.qmd
@@ -0,0 +1,491 @@
+---
+format: revealjs
+title: NumPy and 1D Arrays
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+
+```{python}
+import numpy as np
+```
+
+
+:::{.notes}
+Although we have not formally introduced you to NumPy, the name may sound familiar since we've been subtly hinting at its existence for a little while now.
+
+In the last Module, we've had you import this library for practice.
+
+So what is NumPy?
+:::
+
+---
+
+## What is NumPy?
+
+**NumPy** -> **"Numerical Python extensions"**.
+
+NumPy offers:
+
+- Arrays
+- Mathematical Constants
+- Mathematical Functions
+
+
+:::{.notes}
+The name NumPy is derived from **"Numerical Python extensions"**.
+
+NumPy is a Python library used primarily for computing involving numbers. It is especially useful as it provides a multidimensional array object, called an ***array***.
+
+In addition, NumPy also offers numerous other mathematical functions used in the domain of Linear Algebra and Calculus.
+:::
+
+---
+
+## So What is an Array?
+
+```{python}
+my_list = [1, 2, 3, 4, 5]
+my_list
+```
+
+
+
+```{python}
+my_array = np.array((1, 2, 3, 4, 5))
+my_array
+```
+
+
+
+```{python}
+type(my_array)
+```
+
+
+:::{.notes}
+A NumPy array is somewhat like a list.
+
+They are considered their own data type.
+
+We can see this by using `type` on an array.
+:::
+
+---
+
+```{python}
+my_list = [1,"hi"]
+```
+
+
+
+```{python}
+my_array = np.array((1, "hi"))
+my_array
+```
+
+
+:::{.notes}
+Soon, we'll start to see that although lists and arrays appear quite similar, they have some key differences.
+
+A list can contain multiple data types, while an array cannot.
+
+In this case, `1` was converted to a `'1'` in quotations, which signifies that it is now a string.
+:::
+
+---
+
+## Creating 1D Arrays
+
+```{python}
+my_array = np.array([1, 2, 3, 4])
+my_array
+```
+
+
+
+```{python}
+np.zeros(10)
+```
+
+
+
+```{python}
+np.ones(4)
+```
+
+
+:::{.notes}
+We can make arrays from lists as well as tuples.
+
+There are also several built-in NumPy functions that create different arrays with patterns and requirements.
+
+`np.zeros()` will create an array containing `0` for each element, and the input argument specifies the size.
+
+Here we specified 10, so our array has 10 elements.
+
+Similarly, `np.ones()` does the same thing except with an array of elements with `1` values.
+
+Now we've specified 4 as the input, and so this array has 4 elements.
+:::
+
+---
+
+```{python}
+np.arange(5)
+```
+
+
+
+```{python}
+np.arange(0, 10, 2)
+```
+
+
+:::{.notes}
+`np.arange()` similarly to `range()` can take 1, 2 or 3 input arguments and will produce an array in a similar way that `range()` produces a sequence.
+
+If there are 3 input arguments, the first 2 are where the interval values start and stop respectively, and the third argument gives the step size between values.
+:::
+
+---
+
+```{python}
+np.linspace(1,5,10)
+```
+
+
+
+```{python}
+np.random.rand(5)
+```
+
+
+:::{.notes}
+
+`np.linspace()` will produce an array containing the number of elements specified by the 3rd argument's value, containing values between the first 2 arguments values.
+
+For example, this code will produce 10, equally spaced values from 1 to 5.
+
+Notice the elements in `np.linspace()` arrays are defaulted to type `float`.
+
+We can also produce an array with random values using `np.random.rand()`.
+
+Here, we have random numbers uniformly distributed from 0 to 1.
+:::
+
+---
+
+## Elementwise operations
+
+```{python}
+array1 = np.ones(4)
+array1
+```
+
+
+
+```{python}
+array2 = array1 + 1
+array2
+```
+
+
+
+```{python}
+array1 + array2
+```
+
+
+
+```{python}
+array1 * array2
+```
+
+
+:::{.notes}
+Let's talk about how operations are calculated with arrays.
+
+We discussed that array and lists are similar but not quite the same.
+
+Arrays are designed for convenience mathematically, so arrays operate in an element-wise manner.
+
+When we do operations, the operation is done to each element in the array.
+
+If we add to our array, 1 is added to each element in the array.
+
+If we add two arrays together, the element at identical index positions are added.
+
+Similarly, if we multiply 2 arrays together, the index at each position are multiplied together.
+:::
+
+---
+
+```{python}
+list_1 = [1, 1, 1, 1]
+```
+
+
+
+```{python}
+# | eval: false
+list_1 + 1
+```
+
+```out
+TypeError: can only concatenate list (not "int") to list
+
+Detailed traceback:
+ File "", line 1, in
+```
+
+
+:::{.notes}
+This is much more convenient than using a list.
+
+We can't simply add 1 to a list. Instead, we get an error.
+:::
+
+---
+
+```{python}
+list_1 = [ 1, 1, 1, 1]
+
+list_2 = [elem + 1 for elem in list_1]
+list_2
+```
+
+
+
+```{python}
+list_3 = []
+
+for index in range(len(list_1)):
+ list_3.append(list_1[index] + list_2[index])
+
+list_3
+```
+
+
+:::{.notes}
+If we wanted the same operations done with lists, we would have to use a loop or list comprehension to obtain the same results.
+:::
+
+---
+
+## Slicing and Indexing 1D Arrays
+
+```{python}
+arr = np.arange(10)
+arr
+```
+
+
+
+```{python}
+arr[7]
+```
+
+
+
+```{python}
+arr[2:6]
+```
+
+
+
+```{python}
+arr[-1]
+```
+
+
+:::{.notes}
+When it comes to slicing, 1D arrays are sliced in the same manner that lists are.
+
+We can obtain an individual location by putting the index position in square brackets.
+
+And just like slicing dataframes with `.iloc[]`, when we want an interval of values, the first value in the bracket is included, and the last value is excluded.
+
+To obtain elements from right to left, we use negative integers.
+:::
+
+---
+
+## Boolean Indexing
+
+```{python}
+grade_array = np.array([98, 87, 103, 92, 67, 107, 78, 104, 85, 105])
+grade_array
+```
+
+
+
+```{python}
+threshold = np.array([98, 87, 103, 92, 67, 107, 78, 104, 85, 105]) > 100
+threshold
+```
+
+
+
+```{python}
+grade_array[threshold] = 100
+grade_array
+```
+
+
+:::{.notes}
+Let's now explore Boolean indexing.
+
+Let's take a 1D array that consists of 10 elements.
+
+Remember that when we do most operations, it occurs in an element-wise manner.
+
+Perhaps we are grading exams that contain bonus marks.
+
+The max possible allowed mark on the exam is 100%, so we must cap the grades, so any mark greater than 100 is set to 100. First, we check which values are greater than 100.
+
+This produces an array containing Boolean values, which we store in the object `threshold`.
+
+The first and second elements are `False` since both 98 and 87 and not larger than 100. However, the 3rd element is `True` since 103 is larger than 100.
+
+We now can replace all those values that have a `True` Boolean, with a new value; in this case, let's assign them a value of 100, the maximum possible allowed grade.
+:::
+
+---
+
+```{python}
+new_grade_array = np.array([98,87,103, 92,67, 107, 78, 104, 85, 105])
+new_grade_array
+```
+
+
+
+```{python}
+new_grade_array[new_grade_array > 100] = 100
+new_grade_array
+```
+
+
+:::{.notes}
+We could also shorten the whole process and avoid making `threshold` by using the following code.
+
+You'll notice that we use similar filtering square bracket notation that we did using pandas!
+:::
+
+---
+
+## Why NumPy?
+
+```{python}
+# | include: false
+cereal = pd.read_csv('data/cereal.csv')
+```
+
+```{python}
+cereal.head()
+```
+
+
+
+```{python}
+type(cereal.loc[3,'calories'])
+```
+
+
+
+```{python}
+cereal['calories'].to_numpy()
+```
+
+
+:::{.notes}
+So why, NumPy?
+
+Lists are often used with a similar purpose of arrays, but they are slow to process.
+
+Because of this, NumPy is used to create many other structures.
+
+In fact, let's refresh ourselves on certain values in a dataframe.
+
+Remember when we obtained the data type of a specific value in a dataframe?
+
+We obtained this ``, which we originally ignored.
+
+This is because a pandas dataframe is built off of a multidimensional (2D specifically) array!
+
+We will explain more about multidimensional arrays in the next set of slides.
+
+We can actually convert an entire pandas column into an array pretty easily using `np.to_numpy()`.
+:::
+
+---
+
+## NumPy Constants and Functions
+
+{fig-alt="404 image"}
+
+```{python}
+np.pi
+```
+
+
+
+{fig-alt="404 image"}
+
+```{python}
+np.inf
+```
+
+
+
+{fig-alt="404 image"}
+
+```{python}
+np.e
+```
+
+
+:::{.notes}
+NumPy also offers an assortment of handy mathematical constants and functions.
+:::
+
+---
+
+## NumPy Functions
+
+```{python}
+np.prod([2, 3, 1])
+```
+
+
+
+```{python}
+np.diff([2, 5, 20])
+```
+
+
+
+```{python}
+np.log10(100)
+```
+
+
+
+The full list of mathematical functions are available at this NumPy website.
+
+
+:::{.notes}
+NumPy's functions include but are not limited to:
+
+- `np.prod()` which calculates the product of values in an array
+- `np.diff()` which calculates the difference between element (left element subtracted from the right element)
+- And other functions such as `np.log()` or trigonometric ones like `np.sin()`
+:::
+
+
+# Let’s apply what we learned!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_05.qmd b/modules/module8/slides/module8_05.qmd
new file mode 100644
index 00000000..68b64e32
--- /dev/null
+++ b/modules/module8/slides/module8_05.qmd
@@ -0,0 +1,297 @@
+---
+format: revealjs
+title: Multi-dimensional Arrays
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+## Creating 2D Arrays
+
+```{python}
+my_array = np.array((1, 2, 3, 4))
+my_array
+```
+
+
+
+```{python}
+list_2d = [[1, 2], [3, 4], [5, 6]]
+array_2d = np.array(list_2d)
+array_2d
+```
+
+
+:::{.notes}
+We saw in the last set of slides that we can create 1D arrays using a number of different functions such as `np.array()`.
+
+We can also use the same functions to make multi-dimensional arrays, which are indicated by the multiple sets of square brackets `[[ ]]`.
+
+In our 1D arrays, our arrays only have a single set of square brackets, whereas, in multi-dimensional arrays, we count multiple sets.
+:::
+
+---
+
+```{python}
+np.zeros((3,4))
+```
+
+
+
+```{python}
+np.random.rand(4, 2)
+```
+
+
+
+```{python}
+np.arange(0,12).reshape(3,4)
+```
+
+
+:::{.notes}
+Some of the functions that we use to create arrays have the ability for us to specify them with multi-dimensions.
+
+`np.zeros()` is a function that accepts a tuple with the shape of our desired array.
+
+In this case, an array with 3 rows and 4 columns.
+
+In contract with `np.zeros()`, `np.random.rand()` accepts multiple numeric values that correspond to the arrays shape.
+
+So in the example, an array with 4 rows and 2 columns.
+
+We can also use the verb `.reshape()` to transform a 1D array into a multi-dimension array.
+:::
+
+---
+
+## Array Shapes
+
+{width="75%" fig-align="center" fig-alt="404 image"}
+
+- `.ndim`
+- `.shape`
+- `.size`
+
+
+:::{.notes}
+We saw how to make multi-dimensional arrays, but the dimension is quite different than what the shape of an array is.
+
+Here are three main array nouns we need to know to understand the characteristics of an array:
+
+- `.ndim`: the number of dimensions of an array
+
+- `.shape`: the number of elements in each dimension (like calling `len()` on each dimension)
+
+- `.size`: the total number of elements in an array (i.e., the product of `.shape`)
+:::
+
+---
+
+```{python}
+array1 = np.ones(4)
+array1
+```
+
+
+
+```{python}
+array1.ndim
+```
+
+
+
+```{python}
+array1.shape
+```
+
+
+
+```{python}
+array1.size
+```
+
+
+:::{.notes}
+`array1` is an example of a 1d array.
+
+We can use `.ndim` to check the number of dimensions, and just as we suspected, it is 1.
+
+We use `.shape` to find the number of elements in each dimension.
+
+This returns a tuple with only 1 value, which represents the 1 dimension. This value gives the number of elements in the dimension.
+
+Finally, `.size` will return the ***total*** number of values in the array.
+:::
+
+---
+
+```{python}
+array_2d = np.ones((3, 2))
+array_2d
+```
+
+
+
+```{python}
+array_2d.ndim
+```
+
+
+
+```{python}
+array_2d.shape
+```
+
+
+
+```{python}
+array_2d.size
+```
+
+
+:::{.notes}
+Let's try this again with a 2d array.
+
+We can confirm the number of dimensions with `ndim`.
+
+Here we have a 2-dimensional array as expected.
+
+The shape of the array now consists of two elements, one for each dimension.
+
+The size is the product of the values in `.shape`.
+:::
+
+---
+
+```{python}
+array_2d.shape
+```
+
+
+
+```{python}
+len(array_2d.shape)
+```
+
+
+
+```{python}
+np.prod(array_2d.shape)
+```
+
+
+:::{.notes}
+If we have the `.shape` of the array, we can get both the `.ndim` of the array with `len()`,
+
+as well as the size by taking the product of the elements.
+:::
+
+---
+
+## Indexing and Slicing 2D arrays
+
+```{python}
+arr2 = np.arange(0,12).reshape(3,4)
+arr2
+```
+
+
+
+```{python}
+arr2[1, 2]
+```
+
+
+
+```{python}
+arr2[1][2]
+```
+
+
+:::{.notes}
+Slicing 2D arrays can be compared to slicing pandas dataframes (without the `.iloc[]`).
+
+Let's say we want to select `6`. It's located in row 1 and column 2 (remember that the index includes 0).
+
+We could also do the same thing by putting the row and column index in separate square brackets, but it's not recommended.
+:::
+
+---
+
+```{python}
+# | echo: false
+arr2
+```
+
+
+
+```{python}
+arr2[2]
+```
+
+
+
+```{python}
+arr2[:,2]
+```
+
+
+
+```{python}
+arr2[:2,1:]
+```
+
+
+:::{.notes}
+If we want a complete row of the array, we can specify with a single number.
+
+Here, we select the last row at index 2 that has the elements 8, 9, 10, and 11.
+
+If we only want a single column, we can use the same syntax we used with `.iloc[]`.
+
+This code selects the column at index 2, with the values 2, 6, and 10.
+
+We can obtain specific slices by using a colon as well.
+
+If we only wanted the first 2 rows and the last 3 columns, we could do the following.
+:::
+
+---
+
+```{python}
+# | echo: false
+arr2
+```
+
+
+
+```{python}
+arr2.T
+```
+
+
+
+```{python}
+
+arr2[1,1] = 77777
+arr2
+```
+
+
+:::{.notes}
+If we want to ***transpose*** our array we can use the verb, `.T`.
+
+This converts the columns to rows and the columns to rows.
+
+We can replace values in an array by specifying the element we wish to replace in square brackets on the left side of the assignment operator and our new desired value on the right of it.
+
+Here we can see that that value 5, was replaced with 77,777.
+:::
+
+
+# Let’s apply what we learned!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_11.qmd b/modules/module8/slides/module8_11.qmd
new file mode 100644
index 00000000..62baeb9a
--- /dev/null
+++ b/modules/module8/slides/module8_11.qmd
@@ -0,0 +1,277 @@
+---
+format: revealjs
+title: Working with Null Values
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+
+**Null**: The human-readable term of a value that is missing from the dataframe.
+
+```{python}
+np.nan
+```
+
+Missing values are sometimes referred to as `NA` values.
+
+In this course, we generally refer to them as both ***null*** and `NaN` values.
+
+
+:::{.notes}
+In the real world of data analysis, it's uncommon that we have a perfect dataset ready to be used. In fact, in most cases, cleaning and wrangling data will be an ongoing and time-consuming project. No matter how complete or well planned a database may seem, a data analyst will almost always encounter ***null*** values.
+
+A "null" is the human-readable term of a value that is missing from the dataframe. Remember, in Module 4 we discussed `NaN` being of type `float`? Python translates null values in numerical columns to `NaN`. Well, `NaN` is a constant that comes from the NumPy library.
+
+In some cases, missing values are sometimes referred to as `NA` values because of how they are handled in other programming languages.
+
+This is reflected in some of the names of the functions we use to handle them.
+
+In this course, we generally refer to them as both ***null*** and `NaN` values.
+:::
+
+---
+
+## Info on missing values
+
+```{python}
+# | include: false
+cereal = pd.read_csv('data/cereal.csv').loc[:,["name", "mfr", "calories", "fat", "fiber", "rating"]]
+```
+
+```{python}
+cereal.info()
+```
+
+
+:::{.notes}
+A good rule of thumb when conducting an analysis is to check early on how complete the dataset is.
+
+`.info()` is similar to `.dtypes`, but in addition to the dtype of each column, it includes the total number of non-null values contained in each column.
+
+Let's try it out on a subset of our `cereal` dataset.
+
+Here we see the total number of rows at the top with `RangeIndex: 77 entries, 0 to 76`.
+
+The `Non-Null Count` column specifies the number of non-null values.
+
+In this case, we have a complete dataframe with zero null values for each column.
+:::
+
+---
+
+```{python}
+# | include: false
+cycling = pd.read_csv('data/cycling_data_dirty.csv', parse_dates =["Date"])
+```
+
+```{python}
+cycling
+```
+
+
+:::{.notes}
+Let's take a look at a case where we are not so lucky. `cycling` is a subset of a dataset that contains the bicycling trips Tomas Beuzen, a UBC postdoc rode his bike to campus and back during the fall 2019 semester.
+:::
+
+---
+
+```{python}
+cycling.info()
+```
+
+
+:::{.notes}
+Using `.info()` with this new data, we get the following.
+
+We can see that there is a total of 33 entries (rows).
+
+We see that the `Distance` column only contains 30 non-null values out of a possible 33.
+
+This would mean that 3 values are missing from this column.
+:::
+
+---
+
+
+
+```{python}
+cycling['Distance'].isnull()
+```
+
+:::{.notes}
+We can use `.isnull()` on a particular column to obtain a Boolean series indicating if each row is a null value.
+
+:::
+
+---
+
+```{python}
+cycling[cycling['Distance'].isnull()]
+```
+
+
+
+```{python}
+cycling[cycling.isnull().any(axis=1)]
+```
+
+
+:::{.notes}
+We can pair `.isnull()` with our filtering method to obtain the rows that contain null values in the `Distance` column of the dataframe.
+
+Here, we see the 3 rows of our dataframe that contain null values.
+
+If we wanted to filter all the rows that contain null values and not just in the `Distance` column, we could use the verb `.any()` on the full dataframe.
+
+We only have `NaN` values in the `Distance` column, so the same 3 rows are outputted as before.
+:::
+
+---
+
+We will be discussing the following 2 simple ways of working with missing values:
+
+- `.dropna()`
+- `.fillna()`
+
+
+:::{.notes}
+Now that we have identified that our dataframe contains null values, what can we do about them?
+:::
+
+---
+
+## Dropping Null Values
+
+```{python}
+trips_removed = cycling.dropna()
+trips_removed
+```
+
+
+:::{.notes}
+The easiest and simplest way of handling nulls values is to remove those rows from the dataset.
+
+In a fashion similar to dropping columns, we can drop rows if they contain a `NaN` value.
+
+It's important that we take some necessary precautions and not drop a large portion of the data.
+
+In our example above, if we were to remove the 3 rows we identified to contain `NaN` values, we do it in the following way.
+
+Notice that index 2 was removed, and we only have 30 rows in our dataframe now.
+:::
+
+---
+
+```{python}
+cycling.dropna(subset=['Type'])
+```
+
+
+:::{.notes}
+By default, all the rows with `NaN` values in any column will be considered when dropping rows; however, if we only want to drop rows with `NaN` values in certain columns, we can use the `subset` argument.
+
+Since in this code, we subsetting the column `Type` which has no `NaN` values, no rows were dropped from the dataframe, and we still have 33 rows.
+:::
+
+---
+
+```{python}
+cycling.dropna(subset=['Distance'])
+```
+
+
+:::{.notes}
+The rows do get dropped when we subset on the `Distance` column.
+
+Alternatively, if you have a column missing a large portion of data, the best option may be to drop that column instead of the rows with missing values.
+
+This will keep more of your data instead of dropping and losing most of your data.
+:::
+
+---
+
+## Replacing Null Values
+
+```{python}
+cycling_zero_fill = cycling.fillna(value=0)
+cycling_zero_fill
+```
+
+
+:::{.notes}
+Alternately, if we have a small dataset and we don't want to rid ourselves of any data, we may prefer to replace `NaN` with a particular value.
+
+We can do so with `.fillna()`.
+
+Perhaps it's missing from the data because he didn't cycle that particular day. Replacing the `NaN` value with 0, in this case, would make sense.
+
+Now index 2 now has a `Distance` of `0.00`.
+:::
+
+---
+
+```{python}
+cycling['Distance'].mean().round(2)
+```
+
+
+
+```{python}
+cycling_mean_fill = cycling.fillna(value=cycling['Distance'].mean().round(2))
+cycling_mean_fill
+```
+
+
+:::{.notes}
+Maybe a better decision would be to replace the values in `Distance` with the mean to avoid outliers.
+
+First, we can calculate the mean rounded to 2 decimal places as 12.67, and include that in the `value` argument in the `.fillna()` verb.
+
+We can now see the value in `Distance` for index 2 change to `12.67`.
+:::
+
+---
+
+```{python}
+cycling.fillna(method='bfill')
+```
+
+
+:::{.notes}
+We could also fill using certain methods.
+
+***"bfill"*** uses the next valid row observation to fill the `NaN`:
+
+Index 2 adopts the distance value of `12.84` from index 3.
+:::
+
+---
+
+```{python}
+cycling.fillna(method='ffill')
+```
+
+
+:::{.notes}
+The `method` argument value ***"ffill"*** propagates the last valid observation forward to next.
+
+Here, we see that index 2 adopts the value `13.03` from index 1.
+
+`bfill` and `ffill` are methods usually adopted when dealing with columns organized by date.
+
+This way, an observation can adopt a similar value to those near it.
+
+We will explore date columns in the next slide deck.
+
+Remember, these are only a few methods that can be used in simple situations.
+
+In some scenarios, more complex methods of handling missing values may need to be used for effective analysis.
+:::
+
+
+# Let’s apply what we learned!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_16.qmd b/modules/module8/slides/module8_16.qmd
new file mode 100644
index 00000000..ffd12bc0
--- /dev/null
+++ b/modules/module8/slides/module8_16.qmd
@@ -0,0 +1,503 @@
+---
+format: revealjs
+title: Working with Dates and Time
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+
+
+{width="100%" fig-align="center" fig-alt="404 image"}
+
+
+:::{.notes}
+It wasn't too long ago in Module 4, where we briefly mentioned another column dtype called `datetime64` and `timedelta[ns]`.
+
+Dates and times can be a bit tricky and require a specific data type so that analysis can be done correctly.
+:::
+
+---
+
+```{python}
+# | include: false
+cycling = pd.read_csv('data/cycling_data.csv')
+```
+
+```{python}
+cycling.head()
+```
+
+
+
+```{python}
+cycling.dtypes
+```
+
+
+:::{.notes}
+Let's take our cycling dataset as an example.
+
+Our date column in our cycling dataframe currently has a dtype value of `object`.
+:::
+
+---
+
+```{python}
+cycling.sort_values('Date').head(15)
+```
+
+
+:::{.notes}
+When we try to sort these values, it doesn't recognize the day or month values and will sort them in some ascending order that is not temporal.
+
+We can see that this sorted starts with October 1st, 2019, followed by September 10th, 2019, and then September 11th, 2019.
+
+Python is purely sorting the rows by month and not taking the day into consideration.
+:::
+
+---
+
+```{python}
+cycling.head()
+```
+
+
+
+```{python}
+dates = (cycling['Date'].str.split(' ', expand=True)
+ .rename(columns = {0:'Date',
+ 1:'Time'}))
+dates.head()
+```
+
+
+:::{.notes}
+We can try parsing dates ourselves, but that can be difficult and time-consuming.
+
+To demonstrate this, let's make an attempt at parsing the `Date` column in our `cycling` dataframe, which currently has an `object` dtype.
+
+First, we would need to split the column separating the date and the time and rename the labels 0 and 1 to `Date` and `Time` respectively.
+:::
+
+---
+
+```{python}
+dates = (dates['Date'].str.split('-', expand=True).rename(columns = {0:'Month',
+ 1:'Day',
+ 2:'Year'}))
+dates.head()
+```
+
+
+:::{.notes}
+Once again, we need to split the date column using `str.split(),` which we learned in Module 4, and separate it into columns for the year, month and day.
+:::
+
+---
+
+```{python}
+dates.iloc[0,1]
+```
+
+
+
+```{python}
+type(dates.iloc[0,1])
+```
+
+
+:::{.notes}
+Currently, the values in `dates` are of type `str`, so we would not be able to sort them in a temporal manner.
+:::
+
+---
+
+```{python}
+cycling_dates = (cycling.assign(Year = dates['Year'].astype(int),
+ Month = dates['Month'],
+ Day = dates['Day'].astype(int))
+ )
+cycling_dates.head(3)
+```
+
+
+
+```{python}
+cycling_dates = cycling_dates.loc[:, ['Year', 'Month', 'Day', 'Name',
+ 'Type', 'Time', 'Distance', 'Comments']]
+cycling_dates.head(3)
+```
+
+
+:::{.notes}
+We must convert the columns to integers values and add them to the `cycling_dates` dataframe.
+
+We are then going to select and reorder the columns in the dataframe, so the new date columns are on the left side.
+:::
+
+---
+
+```{python}
+cycling_dates.sort_values(['Year', 'Month', 'Day'])
+```
+
+
+:::{.notes}
+Now we try to sort them, but how do we sort the `month` column?
+
+It now incorrectly sorts the rows by listing the October rows before September.
+
+It must be quite evident that we really don't want to do it this way, right?
+
+There are a lot of limitations, and we haven't yet separated the time.
+
+Calculating the time between dates now can also be extremely difficult.
+
+(The differing number of days in months is a contributing factor.)
+
+Thankfully we don't have to do it this way.
+
+Pandas have some built-in functions that will make our lives much easier.
+
+By the end of this slide deck, we will be able to answer the question of *what was Tom's longest time between rides*.
+
+This is a question that without pandas, would have taken hours, instead of minutes.
+:::
+
+---
+
+## Pandas parse_dates
+
+```{python}
+cycling = pd.read_csv('data/cycling_data.csv')
+cycling.head(3)
+```
+
+
+
+```{python}
+cycling.dtypes
+```
+
+
+:::{.notes}
+Remember How Pandas is built using the NumPy library?
+
+Well in a similar way, Pandas datetime verbs are built using the built-in Python library; `datetime` .
+
+We can parse our data at the same time as we read in our dataframe using the argument `parse_dates`.
+
+Originally the `Date` column adopts a dtype of `object` when the data is read in.
+:::
+
+---
+
+```{python}
+cycling_dates = pd.read_csv('data/cycling_data.csv', parse_dates = ['Date'])
+cycling_dates.head()
+```
+
+
+
+```{python }
+cycling_dates.dtypes
+```
+
+
+:::{.notes}
+Using the `parse_dates`argument with `pd.read_csv()`, transforms the column so that so that it now adopts a `datetime64` dtype.
+:::
+
+---
+
+```{python}
+cycling_dates.sort_values('Date')
+```
+
+
+:::{.notes}
+Now that we have a datetime column which expresses when Tom began his journey, we can sort our dataframe in a temporal manner, properly now:
+:::
+
+---
+
+```{python}
+pd.read_csv('data/cycling_data_split_time.csv').head()
+```
+
+
+
+```{python}
+(pd.read_csv('data/cycling_data_split_time.csv',
+ parse_dates={'Date': ['Year', 'Month', 'Day', 'Clock']})
+ .head())
+```
+
+
+:::{.notes}
+As another example, our date data may be split between multiple columns.
+
+We can combine the `Year`, `Month `, and `Day` columns to a single datetime column by using a dictionary within the `parse_dates` argument.
+
+The dictionary key indicates the new column name, and the dictionary value is a list with the multiple date columns to combine.
+:::
+
+---
+
+```{python}
+cycling = pd.read_csv('data/cycling_data.csv')
+cycling.head()
+```
+
+
+
+```{python}
+cycling.dtypes
+```
+
+
+:::{.notes}
+What if we need to convert a column into dtype `datetime` after reading in our data?
+
+That's not a problem! We have `pd.to_datetime()` to transform columns of an already existing dataframe.
+
+Let's use our original `cycling` dataframe where `Date` is still of dtype `object`.
+:::
+
+---
+
+
+```{python}
+new_cycling = cycling.assign(Date = pd.to_datetime(cycling['Date']))
+new_cycling.head()
+```
+
+
+
+```{python}
+new_cycling.dtypes
+```
+
+
+:::{.notes}
+To convert `Date` to a datetime dtype, we use `pd.to_datetime()` and `assign()`.
+
+Now in the `new_cycling` dataframe, we see that the column `Date` is now of type `datetime64[ns]`
+
+Ok, but what if I don't want the full datetime value, and I want a column with only a portion of it, like the month or year?
+:::
+
+---
+
+ Pandas datetime tools
+
+- `.dt.day_name()` for the day of the week:
+
+```{python}
+new_cycling['Date'].dt.day_name().head(3)
+```
+
+
+
+```{python}
+new_cycling.assign(weekday = new_cycling['Date'].dt.day_name()).head(3)
+```
+
+
+:::{.notes}
+No worries, we can add a new column to our dataframe in a similar as we did before, but now we can extract a portion of the `datetime` column by using one of the many pandas datetime tools.
+
+Here are a couple of examples:
+
+- `.dt.day_name()` for the day of the week, which we can pair this with `.assign()` to add this as a column in the dataframe.
+:::
+
+---
+
+```{python}
+new_cycling['Date'].dt.day.head()
+```
+
+
+
+```{python}
+new_cycling.assign(day = new_cycling['Date'].dt.day).head()
+```
+
+:::{.notes}
+- `.dt.day` for the day which we can again use with `.assign()` to add it to our dataframe.
+:::
+
+---
+
+Here are some of the most common useful datetime tools:
+
+- `.dt.year`
+- `.dt.month`
+- `.dt.month_name()`
+- `.dt.day`
+- `.dt.day_name()`
+- `.dt.hour`
+- `.dt.minute`
+
+For a full list, refer to the attributes and methods section of the Timestamp documentation.
+
+
+:::{.notes}
+There is some inconsistency with these verbs. You can see that some use parentheses `()` and some do not.
+
+Using the `.dt` portion of these can only be used on a pandas Series. We can extract the day, month, year, hour, or minute from a single datetime value, using the same nouns but omitting the `.dt`.
+
+Let's see how that's possible.
+:::
+
+---
+
+```{python}
+new_cycling.head()
+```
+
+
+
+If I select the first example in row 1 of our `new_cycling` dataset, you'll notice that it outputs something called a `Timestamp`.
+
+```{python}
+timestamp_ex = new_cycling.loc[1,'Date']
+timestamp_ex
+```
+
+
+:::{.notes}
+If I select the first example in row 1 of our `new_cycling` dataset, you'll notice that it outputs something called a `Timestamp`.
+
+This is a pandas data type.
+:::
+
+---
+
+```{python}
+timestamp_ex
+```
+
+
+
+```{python}
+timestamp_ex.month_name()
+```
+
+
+
+```{python}
+timestamp_ex.day
+```
+
+
+
+```{python}
+timestamp_ex.hour
+```
+
+
+:::{.notes}
+Timestamps show a snapshot of when an event has occurred. Timestamps are complete with both dates and times. If the date and time are not available in your original data, Python will fill in any temporal unknowns with default values (often with `00:00:00` for time, if only the date was provided).
+
+To obtain the month name, day, or hour from the Timestamp, we can use the same nouns in the previous slide without `.dt`.
+
+Here we get the `.mount_name()`, the `.day` and the `.hour` of a single value by using the same verbs as before but omitting the `.dt`.
+:::
+
+---
+
+## .diff()
+
+```{python}
+cycling_intervals = new_cycling['Date'].sort_values().diff()
+cycling_intervals
+```
+
+
+:::{.notes}
+In our analysis, it might be important to know how frequent events occur and the time between them.
+
+`.diff()` is a useful function for that.
+
+This outputs a pandas Series with the time that occurs between rows. As you can see, there was a 10 hour and 22minute gap between Tom's third and forth bike rides. Wow - that's a long workday!"
+
+Here, you'll now notice a new dtype at the bottom of our new pandas Series named `timedelta64`.
+:::
+
+---
+
+## timedelta
+
+```{python}
+cycling_intervals[1]
+```
+
+
+
+```{python}
+cycling_intervals[1].seconds
+```
+
+
+
+```{python}
+sec_per_hour = 60 * 60
+cycling_intervals[1].seconds / sec_per_hour
+```
+
+
+:::{.notes}
+Unlike a `Timestamp` that represents a snapshot in time, `timedelta` represents a duration or an interval of time.
+
+Here we can obtain the time between 2 trips.
+
+Measurement can only be extracted from the timedelta object using either `days`, `seconds`, and `microseconds` verbs.
+
+Here we obtain the number of seconds.
+
+We can convert them into other units by using simple operations.
+
+In this case, we convert it to hours by dividing it by the number of seconds in an hour.
+:::
+
+---
+
+```{python}
+cycling_intervals.max()
+```
+
+
+
+```{python}
+cycling_intervals.min()
+```
+
+
+
+```{python}
+interval_range = cycling_intervals.max() - cycling_intervals.min()
+interval_range
+```
+
+
+:::{.notes}
+Timedelta objects have a lot of functionality.
+
+We can use a summary statistic verbs with them.
+
+For example, we can calculate the maximum amount of time between rides.
+
+As well as the minimum.
+
+We can also do a simple operation with them, like finding the range.
+:::
+
+
+# Let’s apply what we learned!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_19.qmd b/modules/module8/slides/module8_19.qmd
new file mode 100644
index 00000000..1631cf43
--- /dev/null
+++ b/modules/module8/slides/module8_19.qmd
@@ -0,0 +1,313 @@
+---
+format: revealjs
+title: Introduction to Working with Strings
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+## Recap
+
+Let's first remind ourselves of some of the methods we've already learned such as:
+
+- `.upper()`
+- `.lower()`
+- `.count()`
+- `.split()`
+
+
+```{python}
+instrument = 'Violin'
+instrument
+```
+
+
+
+```{python}
+instrument.upper()
+```
+
+
+:::{.notes}
+Although we have already introduced you to strings to a certain degree, processing and working with this data type is an area that will require a substantial amount of learning.
+
+In this course, we will only scratch the surface when it comes to strings.
+
+That being said, we do hope to provide you with an adequate foundation in string processing.
+
+Let's first remind ourselves of some of the methods we've already learned such as:
+
+- `.upper()`
+- `.lower()`
+- `.uppercase()`
+- `.count()`
+- `.split()`
+
+When we work with just a general string, we can just use the function at the end of the object name.
+
+For example, if our string object name was `instrument`.
+
+We could convert to all uppercase characters with `instrument.upper()`.
+:::
+
+---
+
+```{python}
+instrument.lower()
+```
+
+
+
+```{python}
+instrument.count('i')
+```
+
+
+
+```{python}
+instrument.split('i')
+```
+
+
+:::{.notes}
+Or convert the string to lowercase with `instrument.lower()`.
+
+We could count the number of occurrences of the letter "i" using `instrument.count('i')`
+
+And split a string on a specified character (for example, in this case, "i") using the code `instrument.split('i')`.
+:::
+
+---
+
+## Processing String Columns
+
+```{python}
+# | include: false
+cycling = pd.read_csv('data/cycling_data.csv')
+```
+
+```{python}
+cycling
+```
+
+:::{.notes}
+The only problem is that when we work with data, we will be applying these transformations, not to a single string, but to a whole column of them.
+
+We saw back in Module 4 that string data is represented in a pandas dataframe using the dtype `object`.
+
+This is the default dtype given to columns that have a mix of different data types or if pandas cannot identify the column as any other dtype.
+
+Let's bring back our `cycling` dataframe to demonstrate how to work with columns of this dtype.
+
+:::
+
+---
+
+```{python}
+# | include: false
+cycling = pd.read_csv('data/cycling_data.csv')
+```
+
+```{python}
+upper_cycle = cycling.assign(Comments = cycling['Comments'].str.upper())
+upper_cycle.head()
+```
+
+
+
+```{python}
+rain_cycle = upper_cycle.assign(Rain = upper_cycle['Comments'].str.count('RAIN'))
+rain_cycle.head()
+```
+
+
+:::{.notes}
+Remember when we discussed datetimes columns, and we applied time functions to a whole column by adding `.dt` before the function?
+
+We can use that same syntax style when applying string transformations to entire columns but this time using `.str`.
+
+Perhaps we wanted the entire `Comments` column from our `cycling` dataframe in uppercase.
+
+We can use `.assign()` and `.upper` paired with `.str` to transform the column.
+
+Not too, shabby!
+
+How about we add a new column that contains the number of times "RAIN" is counted in `upper_cycle`.
+
+Again we use `.str.count('RAIN')`
+:::
+
+---
+
+```{python}
+upper_cycle['Comments'].str.split(expand=True)
+```
+
+
+:::{.notes}
+We've also seen this syntax when we used `str.split()` in Module 4 when we learned about splitting our columns.
+
+Here we split up every word in the `Comments` column and created a new column for each.
+:::
+
+---
+
+```{python}
+"My favorite colour" + "is Blue"
+```
+
+
+
+```{python}
+combined_cycle = cycling.assign(Distance_str = cycling['Distance'].astype('str') + ' km')
+combined_cycle.head()
+```
+
+
+:::{.notes}
+Another operation that we've lightly touched on is the concatenation of strings. For instance, when we add 2 strings together:
+
+This can be implemented in dataframes too by concatenating a column with `str` values with another `str` and create a column:
+:::
+
+---
+
+```{python}
+upper_cycle.head(3)
+```
+
+
+
+```{python}
+cap_cycle = upper_cycle.assign(Comments = upper_cycle['Comments'].str.capitalize())
+cap_cycle.head(3)
+```
+
+
+
+```{python}
+cap_cycle = upper_cycle.assign(Comments = upper_cycle['Comments'].str.title())
+cap_cycle.head(3)
+```
+
+
+:::{.notes}
+A new function we haven't discussed but is quite similar to `.upper()` and `.lower()` is`.capitalize()` which capitalizes the first word of the string.
+
+Another is `.title()`, which capitalizes the first letter of every word in a string.
+:::
+
+---
+
+## Strip
+
+`.strip()` .
+
+
+```{python}
+"Sunshine" == " Sunshine "
+```
+
+
+
+```{python}
+string1 = " Sunshine "
+new_string1 = string1.strip()
+new_string1
+```
+
+
+
+```{python}
+"Sunshine" == new_string1
+```
+
+
+:::{.notes}
+One function that might not seem that pertinent but is extremely useful is `.strip()` .
+
+`strip()` removes characters starting or ending a string, with the default being spaces.
+
+For Example:
+
+To us, reading "Sunshine" and " Sunshine " are the same thing, but to Python, they are quite different because of the blank space surrounding it.
+
+The blank space on either side of a string often needs to be removed depending on the analysis.
+
+We can remove them in the example above using `strip()`.
+:::
+
+---
+
+```{python}
+cycling.head()
+```
+
+
+
+```{python}
+cycling[cycling['Comments'] == 'Rain']
+```
+
+
+:::{.notes}
+This can be especially frustrating when we are trying to filter dataframes.
+
+Let's try to filter our data to find rows where the value for the `Comments` column is "Rain".
+
+We can see that index 0 should be filtered out, but pandas does not recognize it with the trailing blank space.
+
+No rows are outputted. That's because there is a blank space following "Rain".
+:::
+
+---
+
+```{python}
+stripped_cycling = cycling.assign(Comments = cycling['Comments'].str.strip())
+stripped_cycling.head()
+```
+
+
+
+```{python}
+stripped_cycling[stripped_cycling['Comments'] == 'Rain']
+```
+
+
+:::{.notes}
+Let's now strip our column using `.strip()` and assigning the changes to the `Comments` column of a dataframe named `stripped_cycling`.
+
+Since we are using `.strip()` with a dataframe column, we have to add `.str`.
+
+This time, when we filter on `Rain` on our new `stripped_cycling` dataframe, pandas filters out the row!
+
+Ahh, that's much better!
+:::
+
+---
+
+```{python}
+stripped_cycling.tail(5)
+```
+
+
+
+```{python}
+stripped_cycling['Comments'].str.strip("!").tail()
+```
+
+
+:::{.notes}
+We are not limited to stripping the values of white space. We can also strip any other character. Let's try punctuation!
+
+We can see that index 30 has a value of `Feeling good after a holiday break!` in the `Comments` column.
+
+After using `str.strip('!')` we can see that it's no longer has the exclamation mark!
+:::
+
+
+# Let’s apply what we learned!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_23.qmd b/modules/module8/slides/module8_23.qmd
new file mode 100644
index 00000000..5df51340
--- /dev/null
+++ b/modules/module8/slides/module8_23.qmd
@@ -0,0 +1,175 @@
+---
+format: revealjs
+title: More Advanced String Processing
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+## Replace
+
+```{python}
+cycling = pd.read_csv('data/cycling_data.csv')
+cycling.head(10)
+```
+
+
+:::{.notes}
+Ok, we have an idea of how we can do some fairly standard strings processing; however, it's time we dived a little deeper into this.
+
+There are **MANY** different functions, but we'll concentrate on a couple here that we will use often and provide a list of several that will be useful in future string processing adventures.
+
+Just like in regular text, there will be times in your data analysis where you will want to replace some of the text within a string.
+
+That's where `.replace()` comes in.
+
+We usually like our data to be consistent; however; consistency is not always present even in the best of dataframes.
+
+Let's take a look at our cycling dataset.
+:::
+
+---
+
+```{python}
+cycling_lower = cycling.assign(Comments = cycling['Comments'].str.lower())
+cycling_lower.head(9)
+```
+
+
+:::{.notes}
+Before we do anything, let's convert this whole column to lowercase, to make our life easier.
+
+This means we only need to be replacing 1 version of a single word instead of taking into consideration all different case versions.
+:::
+
+---
+
+```{python}
+# | echo: false
+cycling_lower.head()
+```
+
+
+
+```{python}
+cycling_rain = cycling_lower.assign(Comments = cycling_lower['Comments'].str.replace('whether', 'weather'))
+cycling_rain.head()
+```
+
+
+:::{.notes}
+You'll notice in the third row, the word "whether" should have been spelled "weather".
+
+Let's replace this word in the entire dataset with the correct spelling of "weather".
+
+With `replace()`, the first argument is the word we are identifying, and the second is what we are replacing the first argument with.
+
+We can see that the word `whether` has now been replaced with the more appropriate spelling of `weather`.
+:::
+
+---
+
+## Contains
+
+```{python}
+cycling_lower['Comments'].str.contains('rain')
+```
+
+
+
+```{python}
+cycling_lower[cycling_lower['Comments'].str.contains('rain')]
+```
+
+
+:::{.notes}
+`contains()` can be used to filter our dataframe.
+
+Instead of checking if a column equals an exact value, we can check in a pattern is ***contained*** in the column.
+
+For example, what if we want all the rows that have some portion of "rain".
+
+This will return a pandas series with Boolean values.
+
+We can combine this and use it as a condition to filter our dataframe on to obtain all the rows that contain the string "rain".
+
+If we wanted to, we could use this subset of data to see if our cyclist Tom was slower on average, on days that it rained!
+:::
+
+---
+
+```{python}
+# | include: false
+rain_before = cycling_lower[cycling_lower['Comments'].str.contains('rain')]
+```
+
+
+
+```{python}
+cycling_lower.loc[cycling_lower['Comments'].str.contains('rain'), 'Comments'] = 'rained'
+```
+
+
+:::{.notes}
+`.replace()` can be somewhat limiting since we saw it can only replace specific strings that we specify.
+
+What if we want to replace any of the values in the entire dataframe that contains the word `"rain"` to the word `"rained"`?
+
+We actually know how to do this!
+
+We can pair our `.contains()` function with conditional value replacement using `.loc[]`!
+
+We learned about conditional value replacement back in Module 2. Let's see what this looks like.
+
+First, we call our dataframe, and inside the `.loc[]` function, we put our condition as the first argument and the column we wish to change or create in the second.
+
+Next, on the right side of the assignment operation be specify the new value we wish to assign to these rows that meet this condition.
+
+Let's replace all rows with values that only contains `"rain"` with `"rained"`.
+
+What does `cycling_lower` look like now?
+:::
+
+---
+
+The rows originally filtered with "rain" in the dataset:
+
+```{python}
+# | echo: false
+rain_before
+```
+
+
+
+Have now been been been replaced with "rained" in the `Comments` column:
+
+```{python}
+cycling_lower[cycling_lower['Comments'] == 'rained']
+```
+
+
+:::{.notes}
+The rows originally filtered with "rain" in the dataset have now been replaced with "rained" in the `Comments` column.
+
+Rows 0 and 1 which both had values of `"rain"` and Rows 7 and 8 which were both `"raining"` and `"thankfully not raining today!"` respectively have now all been changed to the just `"rained"`.
+
+Rows 0 and 1, which both had values of `"rain"`, and Rows 7 and 8, which were both `"raining"` and `"raining today"`, respectively, have now all been changed to the value `"rained"`.
+:::
+
+---
+
+**Additional String Documentation**
+
+
+:::{.notes}
+There are quite a few other string methods that are available, but this should get you started.
+
+See the documentation here for a table of some of the other string processing possibilities available.
+:::
+
+
+# Let’s apply what we learned!
\ No newline at end of file
diff --git a/modules/module8/slides/module8_27.qmd b/modules/module8/slides/module8_27.qmd
new file mode 100644
index 00000000..5dd6a3de
--- /dev/null
+++ b/modules/module8/slides/module8_27.qmd
@@ -0,0 +1,39 @@
+---
+format: revealjs
+title: What Did we Learn and What to Expect in Assignment 8
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+## Summary
+
+Students are now expected to be able to:
+
+- Use NumPy to create ndarrays with `np.array()` and from functions such as `np.arange()`, `np.linspace()` and `np.ones()`.
+- Describe the shape, dimension and size of an array.
+- Identify null values in a dataframe and manage them by removing them using `.dropna()` or replacing them using `.fillna()`.
+- Manipulate non-standard date/time formats into standard Pandas datetime using `pd.to_datetime()`.
+- Find, and replace text from a dataframe using verbs such as `.replace()`.
+
+
+:::{.notes}
+The assignment will concentrate on the learning objectives as well as building knowledge on existing concepts.
+:::
+
+---
+
+## Attribution
+
+The cereal dataset:
+
+ “[80 Cereals](https://www.kaggle.com/crawford/80-cereals/)” (c) by [Chris Crawford](https://www.linkedin.com/in/crawforc3/) is licensed
+under [Creative Commons Attribution-ShareAlike 3.0 Unported](http://creativecommons.org/licenses/by-sa/3.0/)
+
+
+
+# On to Assignment 8!
diff --git a/modules/module9/module9-00-congratulations.qmd b/modules/module9/module9-00-congratulations.qmd
new file mode 100644
index 00000000..82959dec
--- /dev/null
+++ b/modules/module9/module9-00-congratulations.qmd
@@ -0,0 +1,29 @@
+---
+format:
+ html:
+ page-layout: full
+---
+
+# 0. Congratulations!
+
+::: {.panel-tabset .nav-pills}
+
+## Video
+
+
+
+## Slides
+
+
+
+:::
diff --git a/modules/module9/slides/module9_00.qmd b/modules/module9/slides/module9_00.qmd
new file mode 100644
index 00000000..b0c4f16d
--- /dev/null
+++ b/modules/module9/slides/module9_00.qmd
@@ -0,0 +1,71 @@
+---
+format: revealjs
+title: Congratulations on completing Programming in Python for Data Science!
+title-slide-attributes:
+ data-notes: |
+---
+
+```{python}
+# | echo: false
+%run src/utils.py
+```
+
+
+## You did it!
+
+We covered a lot of ground but you managed to finished all 8 Modules. It's important to celebrate this success.
+
+
+---
+
+## Attribution
+
+- Material from UBC's DSCI 511: Python Programming for Data Science by Tom Beuzen.
+
+- The cereal dataset: 80 Cereals(c) by Chris Crawford is licensed under Creative Commons Attribution-ShareAlike 3.0 Unported.
+
+
+---
+
+## Special Thanks
+
+Not only did you put in a lot of work by completing this course, but many people did too by helping make this course possible.
+
+Special thanks must be attributed to the following individuals and organizations (in alphabetical order):
+
+- Tom Beuzen
+- Tim Head
+- Ines Montani
+- Joel Ostblom
+- Elijah Willie
+
+- The Master of Data Science Program at UBC
+- Our friends, family and colleagues for supporting us and cheering us on!
+
+
+---
+
+## About Us
+
+
+
+### Mike Gelbart
+- Website: www.mikegelbart.com
+- Twitter: @MikeGelbart
+
+
+
+### Tiffany Timbers
+
+- Website: www.tiffanytimbers.com
+- Twitter: @TiffanyTimbers
+
+
+
+### Hayley Boyce
+- Website: www.hayleyfboyce.com
+- Twitter: @HayleyFBoyce
+
+
+
+# Good luck on applying your new skills!
\ No newline at end of file