Session-2-5-Examples

This page shows the pource code for Session-2-5-Examples.py with teaching notes, PURPOSE comments, and VS Code instructions.

Source File Session-2-5-Examples.py
Folder Chapter-2-Basic-Sessions

import sys
sys.stdout.reconfigure(encoding='utf-8')

"""
Session 5: File Handling & Data Analysis - Code Examples
Introduction to Python Course
"""

import csv
from collections import Counter

# ============================================
# PART 1: Reading Text Files
# ============================================
print("=" * 60)
print("PART 1: Reading Text Files")
print("=" * 60)
# ======================================================
# WHAT THIS SECTION DOES:
# 1) PURPOSE: File handling lets your program read data that
#    persists between runs - text files, logs, config files.
#    Python uses the built-in open() function plus the 'with'
#    statement to safely open, use, and close files. Three
#    reading methods cover all real-world needs:
#    .read()      -> entire file as one string
#    iteration    -> one line at a time (memory efficient)
#    .readlines() -> all lines as a list
#
# 2) VS CODE: Run each Example block with Shift+Enter.
#    Example 1 creates sample.txt on first run (via the except
#    block), then reads it. Examples 2 and 3 reuse that file.
#    After running, open the Explorer panel in VS Code to see
#    the new .txt file appear in your folder.
# ======================================================

# ------------------------------------------------------
# HOW open() AND THE 'with' STATEMENT WORK
# ------------------------------------------------------
#
#   open() takes a filename and a MODE character:
#       "r"  -> READ   (file must exist; default mode)
#       "w"  -> WRITE  (creates new OR overwrites existing)
#       "a"  -> APPEND (creates new OR adds to end of existing)
#
#   Without 'with', you must close the file manually:
#       file = open("data.txt", "r")
#       content = file.read()
#       file.close()              <- easy to forget!
#
#   WITH 'with', Python closes the file automatically
#   even if an error occurs inside the block:
#       with open("data.txt", "r") as file:
#           content = file.read()
#       # file is closed here automatically - ALWAYS use 'with'
#
#   READING METHODS:
#       file.read()         -> returns entire file as ONE string
#       file.readlines()    -> returns a LIST of lines (each with \n)
#       for line in file:   -> iterates one line at a time
#                              (best for large files - no memory overload)
#
#   .strip()  removes leading/trailing whitespace including \n
#   enumerate(file, 1) gives (1, line1), (2, line2), ... starting at 1
# ------------------------------------------------------

# Example 1: Reading entire file at once
print("\n--- Example 1: Read entire file ---")
# ======================================================
# WHAT: Opens sample.txt for reading. If the file does not
#       exist (first run), the except block catches the
#       FileNotFoundError, creates the file, then reads it.
#       file.read() returns the entire contents as one string.
# TRY:  After running once, open sample.txt in VS Code
#       (Explorer panel) and add a 4th line manually.
#       Run this block again and see the new line appear.
# ======================================================
try:
    with open("sample.txt", "r") as file:    # "r" = read mode
        content = file.read()                # entire file as one string
        print(content)
except FileNotFoundError:
    print("File not found - creating sample.txt first...")
    with open("sample.txt", "w") as file:    # "w" = write mode (creates file)
        file.write("Hello, this is line 1.\n")
        file.write("This is line 2.\n")
        file.write("And this is line 3.\n")

    with open("sample.txt", "r") as file:
        content = file.read()
        print(content)

# Example 2: Reading line by line
print("\n--- Example 2: Read line by line ---")
# ======================================================
# WHAT: Iterating over the file object gives one line per
#       iteration - Python does NOT load the whole file into
#       memory at once. This is the right choice for large files.
#       enumerate(file, 1) adds a counter starting at 1.
#       .strip() removes the invisible \n at the end of each line.
# TRY:  Remove .strip() from the print and run again.
#       Notice the blank line after each output - that is the \n.
# ======================================================
with open("sample.txt", "r") as file:
    for line_number, line in enumerate(file, 1):   # 1 = start counting from 1
        print(f"Line {line_number}: {line.strip()}")  # .strip() removes the \n

# Example 3: Reading into a list
print("\n--- Example 3: Read all lines into a list ---")
# ======================================================
# WHAT: .readlines() loads ALL lines into a Python list.
#       Each item in the list is one line including its \n.
#       Use when you need to access lines by index or loop
#       through them multiple times.
# TRY:  Print lines[-1].strip() to get the LAST line of the file.
#       Print lines[1].strip() to get the SECOND line.
# ======================================================
with open("sample.txt", "r") as file:
    lines = file.readlines()               # list: ["line1\n", "line2\n", ...]
    print(f"Total lines: {len(lines)}")
    print(f"First line:  {lines[0].strip()}")   # index 0 = first


# ============================================
# PART 2: Writing to Text Files
# ============================================
print("\n" + "=" * 60)
print("PART 2: Writing to Text Files")
print("=" * 60)
# ======================================================
# WHAT THIS SECTION DOES:
# 1) PURPOSE: Writing files lets your program SAVE results
#    that survive after the program closes - reports, logs,
#    exports. The key decision every time: do you want to
#    OVERWRITE the file ("w") or ADD to the end ("a")?
#
# 2) VS CODE: Run each Example with Shift+Enter. After each
#    write operation, open the file in VS Code's Explorer to
#    see the actual file content change on disk.
# ======================================================

# ------------------------------------------------------
# WRITE MODE vs APPEND MODE
# ------------------------------------------------------
#
#   "w" (WRITE / OVERWRITE):
#       - Creates the file if it does not exist
#       - If the file ALREADY EXISTS, it is WIPED and replaced
#       - Use when you want a fresh file every run
#
#   "a" (APPEND):
#       - Creates the file if it does not exist
#       - If the file ALREADY EXISTS, new content is added
#         to the END - existing content is preserved
#       - Use for log files, running totals, ongoing records
#
#   WRITING METHODS:
#       file.write("text")         writes a string (you add \n manually)
#       file.writelines(list)      writes each item in a list
#
#   IMPORTANT: write() does NOT add a newline automatically.
#   You must add \n yourself: file.write("hello\n")
#   Without \n, the next write() continues on the SAME line.
# ------------------------------------------------------

# Example 1: Writing (overwrite mode)
print("\n--- Example 1: Writing to file (overwrite) ---")
# ======================================================
# WHAT: "w" creates output.txt (or wipes it if it exists).
#       Each write() call appends to the current write position
#       within this 'with' block. \n at the end of each string
#       starts a new line in the file.
# TRY:  Run this block TWICE. Open output.txt between runs.
#       Notice it always has the same 3 lines - because "w"
#       overwrites on every run. No duplicates accumulate.
# ======================================================
with open("output.txt", "w") as file:        # "w" = overwrite each time
    file.write("This is the first line.\n")  # \n = newline in the file
    file.write("This is the second line.\n")
    file.write("Numbers: " + str([1, 2, 3, 4, 5]) + "\n")
print("Written to 'output.txt'")

# Example 2: Appending to file
print("\n--- Example 2: Appending to file ---")
# ======================================================
# WHAT: "a" opens output.txt and adds to the end WITHOUT
#       erasing what is already there.
# TRY:  Run this block THREE times in a row, then open
#       output.txt. You will see the appended lines multiply
#       because "a" never erases - each run adds more.
# ======================================================
with open("output.txt", "a") as file:        # "a" = add to end, never erase
    file.write("This line was appended.\n")
    file.write("And so was this one.\n")
print("Appended to 'output.txt'")

print("\nContents of 'output.txt':")
with open("output.txt", "r") as file:
    print(file.read())

# Example 3: Writing a list to file
print("\n--- Example 3: Writing a list to file ---")
# ======================================================
# WHAT: The for loop writes each list item as its own line.
#       item + "\n" adds the newline that write() omits.
#       This is the standard pattern for saving any list to a file.
# TRY:  Add "Butter" and "Cheese" to shopping_list, run again,
#       then open shopping_list.txt to see all items.
# ======================================================
shopping_list = ["Apples", "Bananas", "Milk", "Bread", "Eggs"]
with open("shopping_list.txt", "w") as file:
    for item in shopping_list:
        file.write(item + "\n")         # \n puts each item on its own line
print("Shopping list written to 'shopping_list.txt'")


# ============================================
# PART 3: Working with CSV Files
# ============================================
print("\n" + "=" * 60)
print("PART 3: Working with CSV Files")
print("=" * 60)
# ======================================================
# WHAT THIS SECTION DOES:
# 1) PURPOSE: CSV (Comma-Separated Values) is the universal
#    format for tabular data - spreadsheets, databases, exports
#    from Excel. Python's built-in 'csv' module handles the
#    quoting, escaping, and parsing automatically so you never
#    have to split commas manually.
#    Two styles of reading/writing:
#    List style   : rows as plain lists  (csv.reader / csv.writer)
#    Dict style   : rows as dictionaries (csv.DictReader / csv.DictWriter)
#    Dict style is usually better - column names make the code readable.
#
# 2) VS CODE: Run all four Examples together with Shift+Enter.
#    Open employees.csv in VS Code after Example 1 - you'll see
#    the comma-separated columns. Then open it in Excel/Sheets
#    to see it as a proper table.
# ======================================================

# ------------------------------------------------------
# HOW CSV FILES WORK
# ------------------------------------------------------
#
#   A CSV file looks like this on disk:
#       name,age,department,salary
#       Alice,28,Engineering,75000
#       Bob,35,Marketing,65000
#
#   LIST STYLE (csv.reader / csv.writer):
#       Each row is a plain Python list:
#       ["Alice", "28", "Engineering", "75000"]
#       Access by index: row[0] = "Alice", row[3] = "75000"
#       NOTE: ALL values come back as STRINGS - you must
#       convert numbers yourself: int(row[1]), float(row[3])
#
#   DICT STYLE (csv.DictReader / csv.DictWriter):
#       Each row is a dictionary keyed by the header row:
#       {"name": "Alice", "age": "28", "salary": "75000"}
#       Access by name: row["name"], row["salary"]
#       Much more readable in code - recommended for real use.
#
#   newline='' in open() is REQUIRED when writing CSV on Windows
#   to prevent blank lines between rows.
#
#   next(reader) skips one row (used to skip the header row
#   when using csv.reader so you don't process header as data).
# ------------------------------------------------------

# Example 1: Create a sample CSV file
print("\n--- Example 1: Creating a CSV file ---")
# ======================================================
# WHAT: writer.writerows() writes an entire list of lists at once.
#       The first sublist is the header row.
#       newline='' prevents Windows from inserting blank lines.
# TRY:  After running, open employees.csv in VS Code's text
#       editor - you can see the raw commas. Then open it in
#       Excel or Google Sheets to see it as a table.
# ======================================================
employees = [
    ["name",    "age", "department",   "salary"],  # header row
    ["Alice",    28,   "Engineering",   75000],
    ["Bob",      35,   "Marketing",     65000],
    ["Charlie",  42,   "Sales",         70000],
    ["Diana",    31,   "Engineering",   80000],
    ["Eve",      29,   "HR",            60000]
]

with open("employees.csv", "w", newline='') as file:  # newline='' required!
    writer = csv.writer(file)
    writer.writerows(employees)                        # writes all rows at once
print("Created 'employees.csv'")

# Example 2: Reading CSV as lists
print("\n--- Example 2: Reading CSV as lists ---")
# ======================================================
# WHAT: csv.reader returns each row as a list of strings.
#       next(reader) reads (and discards) the header row so
#       the for loop only processes data rows.
#       All values are strings even if they look like numbers:
#       row[1] is "28" not 28 - use int(row[1]) if you need math.
# TRY:  Print type(row[1]) inside the loop.
#       You will see <class 'str'> - not int.
# ======================================================
with open("employees.csv", "r") as file:
    reader = csv.reader(file)
    header = next(reader)              # reads & skips the header row
    print(f"Header: {header}")
    print("\nEmployee data:")
    for row in reader:                 # each row is a list of strings
        print(f"  {row[0]}: Age {row[1]}, {row[2]} dept, ${row[3]}")

# Example 3: Reading CSV as dictionaries
print("\n--- Example 3: Reading CSV as dictionaries ---")
# ======================================================
# WHAT: csv.DictReader automatically uses the first row as
#       the dictionary keys. Each subsequent row becomes a dict.
#       row['department'] is far clearer than row[2].
#       The header row is consumed automatically - no next() needed.
# TRY:  Change the filter to "Marketing" or "Sales" to see
#       different departments. Then remove the if entirely to
#       print all employees as dictionaries.
# ======================================================
with open("employees.csv", "r") as file:
    reader = csv.DictReader(file)       # header row becomes dict keys
    print("\nEngineering employees:")
    for row in reader:                  # each row is a dict
        if row['department'] == "Engineering":
            print(f"  {row['name']}: ${row['salary']}")

# Example 4: Writing dictionaries to CSV
print("\n--- Example 4: Writing dictionaries to CSV ---")
# ======================================================
# WHAT: csv.DictWriter needs 'fieldnames' - the list of keys
#       that define the column order.
#       writer.writeheader() writes the column names as row 1.
#       writer.writerows() writes all the dict rows after that.
# TRY:  Add a third employee dict to new_employees and run.
#       Open new_employees.csv to verify the third row appears.
# ======================================================
new_employees = [
    {"name": "Frank", "age": 26, "department": "IT",      "salary": 72000},
    {"name": "Grace", "age": 33, "department": "Finance",  "salary": 78000}
]

with open("new_employees.csv", "w", newline='') as file:
    fieldnames = ["name", "age", "department", "salary"]   # defines column order
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()               # writes the header row
    writer.writerows(new_employees)    # writes all data rows
print("Created 'new_employees.csv'")


# ============================================
# PART 4: Data Analysis with CSV
# ============================================
print("\n" + "=" * 60)
print("PART 4: Data Analysis with CSV")
print("=" * 60)
# ======================================================
# WHAT THIS SECTION DOES:
# 1) PURPOSE: Combines file reading with the data analysis
#    patterns from Session 3 (lists, dicts, loops). A real
#    sales dataset is created, loaded into a list of dicts,
#    then analysed five ways: total revenue, best seller,
#    average sale, breakdown by category, and top 5 by price.
#    This is exactly what data analysts do with Python daily.
#
# 2) VS CODE: Run the entire Part 4 block together with
#    Shift+Enter - the CSV must be created before it is read.
#    Study each analysis step individually and match the
#    printed output to the code that produced it.
# ======================================================

# ------------------------------------------------------
# KEY PATTERNS USED IN THIS SECTION
# ------------------------------------------------------
#
#   GENERATOR EXPRESSION inside sum() - compact total:
#       sum(float(s['amount']) * int(s['quantity']) for s in sales)
#       Same as a for loop that multiplies and accumulates,
#       written in one line. float() and int() convert strings
#       to numbers (remember: CSV values are always strings).
#
#   Counter() from the collections module:
#       Counter(["apple","apple","banana"]) -> {"apple":2, "banana":1}
#       .most_common(1) returns the [(item, count)] of the top item.
#       Perfect for "what is the most frequent value" questions.
#
#   sorted() with a lambda key:
#       sorted(sales, key=lambda x: float(x['amount']), reverse=True)
#       lambda x: float(x['amount']) is a tiny function that tells
#       sorted() WHAT to sort by (the 'amount' field).
#       reverse=True = highest first (descending order).
#
#   dict.get(key, default):
#       categories.get(cat, 0) returns 0 if cat not yet in the dict.
#       Used here to safely accumulate totals without KeyError.
# ------------------------------------------------------

# Create sample sales data
print("\n--- Creating sample sales data ---")
sales_data = [
    ["date",       "product",  "category",    "amount",  "quantity"],
    ["2024-01-15", "Laptop",   "Electronics", 1299.99,   1],
    ["2024-01-15", "Mouse",    "Electronics",   29.99,   2],
    ["2024-01-15", "Keyboard", "Electronics",   79.99,   1],
    ["2024-01-16", "Desk",     "Furniture",    399.99,   1],
    ["2024-01-16", "Chair",    "Furniture",    199.99,   2],
    ["2024-01-16", "Lamp",     "Furniture",     45.50,   3],
    ["2024-01-17", "Laptop",   "Electronics", 1299.99,   1],
    ["2024-01-17", "Monitor",  "Electronics",  249.99,   1],
    ["2024-01-17", "Webcam",   "Electronics",   89.99,   2],
    ["2024-01-18", "Notebook", "Stationery",     5.99,   5],
    ["2024-01-18", "Pen",      "Stationery",     1.99,  10],
    ["2024-01-18", "Folder",   "Stationery",     3.49,   8],
    ["2024-01-19", "Laptop",   "Electronics", 1299.99,   1],
    ["2024-01-19", "Mouse",    "Electronics",   29.99,   1],
    ["2024-01-19", "Monitor",  "Electronics",  249.99,   2],
    ["2024-01-20", "Desk",     "Furniture",    399.99,   1],
    ["2024-01-20", "Chair",    "Furniture",    199.99,   1],
    ["2024-01-20", "Cabinet",  "Furniture",    299.99,   1],
    ["2024-01-21", "Notebook", "Stationery",     5.99,   3],
    ["2024-01-21", "Pen",      "Stationery",     1.99,   5]
]

with open("sales.csv", "w", newline='') as file:
    writer = csv.writer(file)
    writer.writerows(sales_data)
print("Created 'sales.csv'")

# Load all sales into a list of dicts for analysis
print("\n--- Analyzing Sales Data ---")
sales = []
with open("sales.csv", "r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        sales.append(row)              # each row is a dict; build a list of them

# Analysis 1: Total revenue
# ======================================================
# WHAT: Generator expression inside sum():
#       for each sale, multiply amount x quantity, then sum all.
#       float() converts the string "1299.99" to the number 1299.99
#       int() converts the string "1" to the number 1
#       :,.2f formats with thousand separator comma and 2 decimals
# ======================================================
total_revenue = sum(float(s['amount']) * int(s['quantity']) for s in sales)
print(f"\n1. Total Revenue: ${total_revenue:,.2f}")

# Analysis 2: Best selling product
# ======================================================
# WHAT: Build a list of just the product names, then use
#       Counter to count how many times each appears.
#       .most_common(1) returns [(product, count)] for the top 1.
#       [0] gets the first (and only) item; [0] again gets the name.
# TRY:  Change most_common(1) to most_common(3) to see the top 3.
# ======================================================
products       = [sale['product'] for sale in sales]
product_counts = Counter(products)                    # counts occurrences
best_seller    = product_counts.most_common(1)[0]     # (product, count) tuple
print(f"2. Best Seller: {best_seller[0]} ({best_seller[1]} sales)")

# Analysis 3: Average sale amount
avg_sale = total_revenue / len(sales)
print(f"3. Average Sale: ${avg_sale:.2f}")

# Analysis 4: Sales by category
# ======================================================
# WHAT: categories dict accumulates totals keyed by category name.
#       .get(cat, 0) safely returns 0 when a category is first seen.
#       sorted(..., key=lambda x: x[1], reverse=True) sorts the
#       (category, total) pairs by their total (index 1), highest first.
#       lambda x: x[1] means "use the second element as the sort key".
# TRY:  Change reverse=True to reverse=False to sort lowest first.
# ======================================================
print("\n4. Sales by Category:")
categories = {}
for sale in sales:
    cat    = sale['category']
    amount = float(sale['amount']) * int(sale['quantity'])
    categories[cat] = categories.get(cat, 0) + amount   # accumulate safely

for category, total in sorted(categories.items(),
                               key=lambda x: x[1],       # sort by total value
                               reverse=True):             # highest first
    print(f"   {category}: ${total:,.2f}")

# Analysis 5: Top 5 most expensive items
# ======================================================
# WHAT: sorted() with a lambda that extracts the 'amount' field
#       and converts to float for numeric comparison.
#       [:5] slices the sorted list to get only the top 5.
#       enumerate(..., 1) adds ranking numbers 1-5.
# TRY:  Change [:5] to [:3] for a top-3 list instead.
# ======================================================
print("\n5. Top 5 Most Expensive Items:")
sales_by_price = sorted(sales,
                         key=lambda x: float(x['amount']),  # sort by amount
                         reverse=True)                       # highest first
for i, sale in enumerate(sales_by_price[:5], 1):            # top 5 with rank
    print(f"   {i}. {sale['product']}: ${float(sale['amount']):.2f}")


# ============================================
# PART 5: Practical Examples
# ============================================
print("\n" + "=" * 60)
print("PART 5: Practical Examples")
print("=" * 60)
# ======================================================
# WHAT THIS SECTION DOES:
# 1) PURPOSE: Three complete mini-programs that combine file
#    reading, writing, and data analysis into realistic tools:
#    - Word frequency counter : reads text, counts every word
#    - Log file analyser      : classifies and reports log lines
#    - Data filter + export   : filters CSV rows and saves results
#    These are the kinds of scripts Python professionals write
#    every day for reporting, monitoring, and data pipelines.
#
# 2) VS CODE: Run each Example separately with Shift+Enter.
#    After each one, open the output file(s) in VS Code to
#    see what the program produced on disk.
# ======================================================

# Example 1: Word Frequency Counter
print("\n--- Example 1: Word Frequency Counter ---")
# ======================================================
# WHAT: Four steps working together:
#   1. Write sample text to a file
#   2. Read it back line by line
#   3. For each line: lowercase, strip whitespace, split into words
#   4. For each word: strip punctuation, count with dict
#   .lower()     normalises case so "Python" and "python" count as one
#   .strip('.,!?') removes common punctuation at word edges
#   .get(word, 0) + 1 is the standard "count occurrences" dict pattern
#   sorted(..., key=lambda x: x[1], reverse=True) sorts by count
# TRY:  Change the text to a paragraph from any book or article
#       and see which words dominate.
# ======================================================
sample_text = """
Python is an amazing programming language.
Python is easy to learn and powerful to use.
Many developers love Python for its simplicity.
Python has a large community of developers.
"""

with open("sample_text.txt", "w") as file:
    file.write(sample_text)

word_count = {}
with open("sample_text.txt", "r") as file:
    for line in file:
        words = line.lower().strip().split()    # lowercase, trim, split on spaces
        for word in words:
            word = word.strip('.,!?')           # remove edge punctuation
            if word:                            # skip empty strings
                word_count[word] = word_count.get(word, 0) + 1  # count it

print("\nTop 10 Most Common Words:")
sorted_words = sorted(word_count.items(),
                       key=lambda x: x[1],      # sort by count (second element)
                       reverse=True)             # most frequent first
for i, (word, count) in enumerate(sorted_words[:10], 1):
    print(f"  {i}. '{word}': {count} times")

# Example 2: Log File Analyser
print("\n--- Example 2: Log File Analyser ---")
# ======================================================
# WHAT: A real log analysis pattern used in production systems.
#   1. Write sample log entries to application.log
#   2. Read the file, classify each line by INFO/WARNING/ERROR
#   3. Count each type and collect ERROR lines for a detail report
#   'in' checks if a substring exists anywhere in the line.
#   This is simpler than regex for basic classification.
# TRY:  Add a "CRITICAL" level to log_stats dict, add a
#       "CRITICAL" entry to log_entries, and count it in the loop.
# ======================================================
log_entries = [
    "2024-01-15 10:23:45 INFO: Application started",
    "2024-01-15 10:24:12 INFO: User login: alice@example.com",
    "2024-01-15 10:25:33 ERROR: Database connection failed",
    "2024-01-15 10:26:01 WARNING: Slow query detected",
    "2024-01-15 10:27:15 INFO: User login: bob@example.com",
    "2024-01-15 10:28:42 ERROR: File not found: config.ini",
    "2024-01-15 10:29:18 INFO: Cache cleared",
    "2024-01-15 10:30:55 ERROR: Invalid API key",
    "2024-01-15 10:31:22 WARNING: High memory usage detected",
    "2024-01-15 10:32:40 INFO: Backup completed successfully"
]

with open("application.log", "w") as file:
    for entry in log_entries:
        file.write(entry + "\n")

log_stats = {"INFO": 0, "WARNING": 0, "ERROR": 0}  # counters start at 0
errors    = []                                       # collects error lines

with open("application.log", "r") as file:
    for line in file:
        if "INFO" in line:                # 'in' checks for substring
            log_stats["INFO"] += 1
        elif "WARNING" in line:
            log_stats["WARNING"] += 1
        elif "ERROR" in line:
            log_stats["ERROR"] += 1
            errors.append(line.strip())   # save full error line for report

print("\nLog Analysis:")
print(f"  INFO messages:    {log_stats['INFO']}")
print(f"  WARNING messages: {log_stats['WARNING']}")
print(f"  ERROR messages:   {log_stats['ERROR']}")
print("\nError details:")
for error in errors:
    print(f"  - {error}")

# Example 3: Data Filtering and Export
print("\n--- Example 3: Data Filtering and Export ---")
# ======================================================
# WHAT: A complete ETL mini-pipeline (Extract, Transform, Load):
#   EXTRACT  : read rows from sales.csv
#   TRANSFORM: calculate total_value, keep only high-value rows
#   LOAD     : write filtered rows to a new CSV file
#   This pattern - filter a large dataset and export the subset -
#   is one of the most common real-world Python tasks.
#   f"{value:.2f}" formats the number to 2 decimal places as a string
#   before storing it in the dict so it saves nicely to CSV.
# TRY:  Change the threshold from 200 to 500 and re-run.
#       Fewer rows will qualify - check the output file count.
# ======================================================
high_value_sales = []
with open("sales.csv", "r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        total_value = float(row['amount']) * int(row['quantity'])
        if total_value >= 200:                           # filter threshold
            row['total_value'] = f"{total_value:.2f}"   # add calculated column
            high_value_sales.append(row)

if high_value_sales:
    with open("high_value_sales.csv", "w", newline='') as file:
        fieldnames = ["date", "product", "category", "amount", "quantity", "total_value"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(high_value_sales)

    print(f"Exported {len(high_value_sales)} high-value sales to 'high_value_sales.csv'")
    total_filtered = sum(float(s['total_value']) for s in high_value_sales)
    print(f"Total value of high-value sales: ${total_filtered:,.2f}")


# ============================================
# SUMMARY
# ============================================
print("\n" + "=" * 60)
print("SESSION 5 COMPLETE!")
print("=" * 60)
print("\nFiles created in this session:")
print("  1. sample.txt            - Basic text file")
print("  2. output.txt            - Text output with append demo")
print("  3. shopping_list.txt     - List written to file")
print("  4. employees.csv         - Employee data (list style)")
print("  5. new_employees.csv     - Employees (dict style)")
print("  6. sales.csv             - Sales transaction data")
print("  7. sample_text.txt       - Text for word counting")
print("  8. application.log       - Log file for analysis")
print("  9. high_value_sales.csv  - Filtered sales data")
print("\nKey skills learned:")
print("  - Reading and writing text files with 'with open()'")
print("  - Difference between write mode 'w' and append mode 'a'")
print("  - Working with CSV files using csv.reader and DictReader")
print("  - Writing CSV with csv.writer and DictWriter")
print("  - Data analysis: totals, averages, grouping, sorting")
print("  - Using lambda as a sort key")
print("  - Counter() for frequency counting")
print("  - Real-world patterns: word counter, log analyser, ETL filter")
print("\nGreat job completing Session 5!")
print("=" * 60)