Session-3-3-Examples

This page shows the source code for Session-3-3-Examples.py in browser-friendly HTML format. It was generated automatically from the original Python file.

Source File Session-3-3-Examples.py
Folder Chapter-3-Advanced-Sessions
"""
Advanced Python - Session 3: Working with APIs & Web Scraping
Code Examples and Projects

Note: Install required packages:
pip install requests beautifulsoup4 lxml
"""

import requests
from bs4 import BeautifulSoup
import json
import time
from datetime import datetime
import csv

# ============================================
# PART 1: HTTP Basics & Requests Library
# ============================================
print("=" * 60)
print("PART 1: HTTP Basics & Requests")
print("=" * 60)

# Example 1: Basic GET request
print("\n--- Example 1: Basic GET Request ---")
try:
    # Using JSONPlaceholder - a free fake API for testing
    response = requests.get('https://jsonplaceholder.typicode.com/posts/1')
    
    print(f"Status Code: {response.status_code}")
    print(f"Response Time: {response.elapsed.total_seconds()}s")
    print(f"\nResponse Data:")
    print(json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

# Example 2: GET with parameters
print("\n--- Example 2: GET with Parameters ---")
try:
    params = {'userId': 1}
    response = requests.get(
        'https://jsonplaceholder.typicode.com/posts',
        params=params
    )
    
    posts = response.json()
    print(f"Found {len(posts)} posts for user 1")
    print(f"First post title: {posts[0]['title']}")
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

# Example 3: POST request
print("\n--- Example 3: POST Request ---")
try:
    new_post = {
        'title': 'My New Post',
        'body': 'This is the content of my post',
        'userId': 1
    }
    
    response = requests.post(
        'https://jsonplaceholder.typicode.com/posts',
        json=new_post
    )
    
    print(f"Status Code: {response.status_code}")
    print(f"Created post:")
    print(json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

# Example 4: Response object attributes
print("\n--- Example 4: Response Object ---")
try:
    response = requests.get('https://jsonplaceholder.typicode.com/users/1')
    
    print(f"URL: {response.url}")
    print(f"Status Code: {response.status_code}")
    print(f"Headers: {dict(list(response.headers.items())[:3])}...")  # First 3 headers
    print(f"Encoding: {response.encoding}")
    print(f"Response Time: {response.elapsed.total_seconds()}s")
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

# ============================================
# PART 2: Working with REST APIs
# ============================================
print("\n" + "=" * 60)
print("PART 2: Working with REST APIs")
print("=" * 60)

# Example 1: Complete CRUD operations
print("\n--- Example 1: CRUD Operations ---")

class APIClient:
    """Simple API client for JSONPlaceholder"""
    
    def __init__(self, base_url):
        self.base_url = base_url
    
    def get_all(self, endpoint):
        """GET all resources"""
        url = f"{self.base_url}/{endpoint}"
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    
    def get_one(self, endpoint, resource_id):
        """GET single resource"""
        url = f"{self.base_url}/{endpoint}/{resource_id}"
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    
    def create(self, endpoint, data):
        """POST - Create resource"""
        url = f"{self.base_url}/{endpoint}"
        response = requests.post(url, json=data)
        response.raise_for_status()
        return response.json()
    
    def update(self, endpoint, resource_id, data):
        """PUT - Update resource"""
        url = f"{self.base_url}/{endpoint}/{resource_id}"
        response = requests.put(url, json=data)
        response.raise_for_status()
        return response.json()
    
    def delete(self, endpoint, resource_id):
        """DELETE resource"""
        url = f"{self.base_url}/{endpoint}/{resource_id}"
        response = requests.delete(url)
        response.raise_for_status()
        return response.status_code

# Using the API client
client = APIClient('https://jsonplaceholder.typicode.com')

try:
    # GET single post
    post = client.get_one('posts', 1)
    print(f"Retrieved: {post['title']}")
    
    # CREATE new post
    new_post = {'title': 'New Post', 'body': 'Content', 'userId': 1}
    created = client.create('posts', new_post)
    print(f"Created post with ID: {created['id']}")
    
    # UPDATE post
    updated_data = {'title': 'Updated Title', 'body': 'Updated', 'userId': 1}
    updated = client.update('posts', 1, updated_data)
    print(f"Updated: {updated['title']}")
    
    # DELETE post
    status = client.delete('posts', 1)
    print(f"Delete status code: {status}")
    
except requests.exceptions.RequestException as e:
    print(f"API Error: {e}")

# Example 2: Error handling
print("\n--- Example 2: Error Handling ---")

def safe_api_call(url, timeout=5):
    """Make API call with comprehensive error handling"""
    try:
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()
        return {'success': True, 'data': response.json()}
    
    except requests.exceptions.Timeout:
        return {'success': False, 'error': 'Request timed out'}
    
    except requests.exceptions.ConnectionError:
        return {'success': False, 'error': 'Connection failed'}
    
    except requests.exceptions.HTTPError as e:
        return {'success': False, 'error': f'HTTP error: {e}'}
    
    except requests.exceptions.RequestException as e:
        return {'success': False, 'error': f'Request error: {e}'}
    
    except json.JSONDecodeError:
        return {'success': False, 'error': 'Invalid JSON response'}

# Test error handling
result = safe_api_call('https://jsonplaceholder.typicode.com/posts/1')
if result['success']:
    print(f"Success! Title: {result['data']['title']}")
else:
    print(f"Error: {result['error']}")

# Example 3: Pagination
print("\n--- Example 3: Handling Pagination ---")

def get_all_pages(base_url, endpoint, per_page=10):
    """Fetch all pages of paginated data"""
    all_data = []
    page = 1
    
    while True:
        try:
            params = {'_page': page, '_limit': per_page}
            response = requests.get(f"{base_url}/{endpoint}", params=params)
            response.raise_for_status()
            
            data = response.json()
            
            if not data:  # No more data
                break
            
            all_data.extend(data)
            print(f"Fetched page {page}: {len(data)} items")
            page += 1
            
            # Safety limit for demo
            if page > 3:
                break
                
        except requests.exceptions.RequestException as e:
            print(f"Error on page {page}: {e}")
            break
    
    return all_data

# Fetch multiple pages
all_posts = get_all_pages('https://jsonplaceholder.typicode.com', 'posts')
print(f"Total posts fetched: {len(all_posts)}")

# ============================================
# PART 3: API Authentication
# ============================================
print("\n" + "=" * 60)
print("PART 3: API Authentication")
print("=" * 60)

# Example 1: API Key in headers (most common)
print("\n--- Example 1: API Key Authentication ---")

def fetch_with_api_key(url, api_key):
    """Make authenticated request with API key"""
    headers = {
        'X-API-Key': api_key,
        'Content-Type': 'application/json'
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None

# Demo (will work with JSONPlaceholder even without real key)
data = fetch_with_api_key('https://jsonplaceholder.typicode.com/posts/1', 'demo_key')
if data:
    print(f"Fetched: {data['title']}")

# Example 2: Bearer token authentication
print("\n--- Example 2: Bearer Token Authentication ---")

class AuthenticatedClient:
    """API client with token authentication"""
    
    def __init__(self, base_url, token):
        self.base_url = base_url
        self.token = token
        self.session = requests.Session()
        self.session.headers.update({
            'Authorization': f'Bearer {token}',
            'Content-Type': 'application/json'
        })
    
    def get(self, endpoint):
        """Make authenticated GET request"""
        url = f"{self.base_url}/{endpoint}"
        response = self.session.get(url)
        response.raise_for_status()
        return response.json()
    
    def post(self, endpoint, data):
        """Make authenticated POST request"""
        url = f"{self.base_url}/{endpoint}"
        response = self.session.post(url, json=data)
        response.raise_for_status()
        return response.json()

# Demo
auth_client = AuthenticatedClient('https://jsonplaceholder.typicode.com', 'demo_token')
try:
    data = auth_client.get('posts/1')
    print(f"Authenticated request: {data['title']}")
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

# ============================================
# PART 4: Web Scraping with Beautiful Soup
# ============================================
print("\n" + "=" * 60)
print("PART 4: Web Scraping with Beautiful Soup")
print("=" * 60)

# Example 1: Basic HTML parsing
print("\n--- Example 1: Basic HTML Parsing ---")

html_doc = """
<html>
<head><title>Sample Page</title></head>
<body>
    <div class="container">
        <h1 id="main-title">Welcome to Web Scraping</h1>
        <p class="intro">This is a sample paragraph.</p>
        <ul class="links">
            <li><a href="https://example.com/page1">Page 1</a></li>
            <li><a href="https://example.com/page2">Page 2</a></li>
            <li><a href="https://example.com/page3">Page 3</a></li>
        </ul>
        <div class="article">
            <h2>Article Title</h2>
            <p class="content">Article content goes here.</p>
            <span class="author">By John Doe</span>
        </div>
    </div>
</body>
</html>
"""

soup = BeautifulSoup(html_doc, 'html.parser')

# Find elements
print(f"Title: {soup.title.string}")
print(f"H1: {soup.h1.get_text()}")
print(f"First link: {soup.find('a')['href']}")

# Find all links
links = soup.find_all('a')
print(f"\nAll links ({len(links)}):")
for link in links:
    print(f"  - {link.get_text()}: {link.get('href')}")

# Example 2: CSS selectors
print("\n--- Example 2: CSS Selectors ---")

# Select by class
intro = soup.select('.intro')[0]
print(f"Intro text: {intro.get_text()}")

# Select by ID
main_title = soup.select('#main-title')[0]
print(f"Main title: {main_title.get_text()}")

# Complex selectors
article_content = soup.select('div.article p.content')[0]
print(f"Article content: {article_content.get_text()}")

# Example 3: Extracting structured data
print("\n--- Example 3: Extracting Structured Data ---")

def extract_articles(html):
    """Extract article data from HTML"""
    soup = BeautifulSoup(html, 'html.parser')
    
    articles = []
    for article in soup.find_all('div', class_='article'):
        title_elem = article.find('h2')
        content_elem = article.find('p', class_='content')
        author_elem = article.find('span', class_='author')
        
        if title_elem and content_elem:
            articles.append({
                'title': title_elem.get_text(strip=True),
                'content': content_elem.get_text(strip=True),
                'author': author_elem.get_text(strip=True) if author_elem else 'Unknown'
            })
    
    return articles

articles = extract_articles(html_doc)
print(f"Extracted {len(articles)} articles:")
for article in articles:
    print(f"  - {article['title']} by {article['author']}")

# Example 4: Scraping a real website (quotes)
print("\n--- Example 4: Scraping Quotes ---")

def scrape_quotes(url):
    """Scrape quotes from quotes.toscrape.com"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Python Scraper Tutorial)'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        quotes = []
        for quote in soup.find_all('div', class_='quote'):
            text = quote.find('span', class_='text').get_text()
            author = quote.find('small', class_='author').get_text()
            tags = [tag.get_text() for tag in quote.find_all('a', class_='tag')]
            
            quotes.append({
                'text': text,
                'author': author,
                'tags': tags
            })
        
        return quotes
    
    except requests.exceptions.RequestException as e:
        print(f"Error scraping: {e}")
        return []

# Scrape quotes
quotes = scrape_quotes('http://quotes.toscrape.com/')
if quotes:
    print(f"Scraped {len(quotes)} quotes:")
    for i, quote in enumerate(quotes[:3], 1):  # Show first 3
        print(f"\n{i}. {quote['text']}")
        print(f"   - {quote['author']}")
        print(f"   Tags: {', '.join(quote['tags'])}")

# ============================================
# PART 5: Best Practices
# ============================================
print("\n" + "=" * 60)
print("PART 5: Best Practices")
print("=" * 60)

# Example 1: Rate limiting
print("\n--- Example 1: Rate Limiting ---")

class RateLimitedScraper:
    """Scraper with rate limiting"""
    
    def __init__(self, delay=1):
        self.delay = delay
        self.last_request = 0
    
    def fetch(self, url):
        """Fetch URL with rate limiting"""
        # Wait if necessary
        elapsed = time.time() - self.last_request
        if elapsed < self.delay:
            time.sleep(self.delay - elapsed)
        
        try:
            response = requests.get(url)
            self.last_request = time.time()
            return response
        except requests.exceptions.RequestException as e:
            print(f"Error: {e}")
            return None

scraper = RateLimitedScraper(delay=1)
print("Fetching URLs with rate limiting...")
for i in range(3):
    start = time.time()
    response = scraper.fetch(f'https://jsonplaceholder.typicode.com/posts/{i+1}')
    if response:
        print(f"  Request {i+1} completed in {time.time() - start:.2f}s")

# Example 2: Retry logic with exponential backoff
print("\n--- Example 2: Retry with Exponential Backoff ---")

def fetch_with_retry(url, max_retries=3):
    """Fetch with exponential backoff retry"""
    for attempt in range(max_retries):
        try:
            response = requests.get(url, timeout=5)
            response.raise_for_status()
            return response
        
        except requests.exceptions.RequestException as e:
            if attempt == max_retries - 1:
                print(f"Failed after {max_retries} attempts")
                raise
            
            wait_time = 2 ** attempt  # 1s, 2s, 4s
            print(f"Attempt {attempt + 1} failed. Retrying in {wait_time}s...")
            time.sleep(wait_time)

# Demo
try:
    response = fetch_with_retry('https://jsonplaceholder.typicode.com/posts/1')
    print(f"Success! Status: {response.status_code}")
except requests.exceptions.RequestException as e:
    print(f"All retries failed: {e}")

# Example 3: Caching responses
print("\n--- Example 3: Response Caching ---")

class CachedAPIClient:
    """API client with response caching"""
    
    def __init__(self, cache_duration=300):  # 5 minutes
        self.cache = {}
        self.cache_duration = cache_duration
    
    def get(self, url):
        """Get with caching"""
        # Check cache
        if url in self.cache:
            data, timestamp = self.cache[url]
            age = time.time() - timestamp
            
            if age < self.cache_duration:
                print(f"  Cache HIT (age: {age:.1f}s)")
                return data
            else:
                print(f"  Cache EXPIRED")
        else:
            print(f"  Cache MISS")
        
        # Fetch and cache
        response = requests.get(url)
        data = response.json()
        self.cache[url] = (data, time.time())
        return data

cached_client = CachedAPIClient(cache_duration=10)
url = 'https://jsonplaceholder.typicode.com/posts/1'

# First request (cache miss)
data1 = cached_client.get(url)
print(f"Title: {data1['title']}")

# Second request (cache hit)
data2 = cached_client.get(url)
print(f"Title: {data2['title']}")

# ============================================
# PROJECT 1: Weather API Client
# ============================================
print("\n" + "=" * 60)
print("PROJECT 1: Weather API Client")
print("=" * 60)

class WeatherClient:
    """Client for weather API (using JSONPlaceholder as demo)"""
    
    def __init__(self, api_key=None):
        self.api_key = api_key
        self.base_url = 'https://jsonplaceholder.typicode.com'
        self.cache = {}
    
    def get_weather(self, city_id):
        """Get current weather (simulated)"""
        url = f"{self.base_url}/posts/{city_id}"
        
        try:
            response = requests.get(url, timeout=5)
            response.raise_for_status()
            
            # In real API, this would be actual weather data
            data = response.json()
            
            # Simulate weather data structure
            weather = {
                'city': f"City {city_id}",
                'temperature': 20 + (city_id % 10),
                'conditions': 'Sunny',
                'humidity': 60,
                'timestamp': datetime.now().isoformat()
            }
            
            return weather
        
        except requests.exceptions.RequestException as e:
            print(f"Error fetching weather: {e}")
            return None
    
    def save_to_csv(self, weather_data, filename='weather_data.csv'):
        """Save weather data to CSV"""
        file_exists = False
        try:
            with open(filename, 'r'):
                file_exists = True
        except FileNotFoundError:
            pass
        
        with open(filename, 'a', newline='') as f:
            fieldnames = ['city', 'temperature', 'conditions', 'humidity', 'timestamp']
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            
            if not file_exists:
                writer.writeheader()
            
            writer.writerow(weather_data)

# Demo weather client
weather_client = WeatherClient()

print("\nFetching weather data:")
for city_id in range(1, 4):
    weather = weather_client.get_weather(city_id)
    if weather:
        print(f"  {weather['city']}: {weather['temperature']}°C, {weather['conditions']}")
        weather_client.save_to_csv(weather)

print("Weather data saved to weather_data.csv")

# ============================================
# PROJECT 2: Web Scraper with CSV Export
# ============================================
print("\n" + "=" * 60)
print("PROJECT 2: Web Scraper with CSV Export")
print("=" * 60)

class WebScraper:
    """Generic web scraper with CSV export"""
    
    def __init__(self, delay=1):
        self.delay = delay
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Python Tutorial Scraper)'
        })
    
    def scrape_page(self, url, parser_func):
        """Scrape a single page"""
        try:
            time.sleep(self.delay)  # Rate limiting
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            return parser_func(soup)
        
        except requests.exceptions.RequestException as e:
            print(f"Error scraping {url}: {e}")
            return []
    
    def scrape_multiple(self, urls, parser_func):
        """Scrape multiple URLs"""
        all_data = []
        
        for i, url in enumerate(urls, 1):
            print(f"Scraping {i}/{len(urls)}: {url}")
            data = self.scrape_page(url, parser_func)
            all_data.extend(data)
        
        return all_data
    
    def save_to_csv(self, data, filename, fieldnames):
        """Save scraped data to CSV"""
        with open(filename, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(data)
        
        print(f"Saved {len(data)} items to {filename}")

def parse_quotes(soup):
    """Parser function for quotes"""
    quotes = []
    for quote in soup.find_all('div', class_='quote'):
        text_elem = quote.find('span', class_='text')
        author_elem = quote.find('small', class_='author')
        
        if text_elem and author_elem:
            quotes.append({
                'quote': text_elem.get_text(),
                'author': author_elem.get_text()
            })
    
    return quotes

# Demo scraper
scraper = WebScraper(delay=1)
urls = ['http://quotes.toscrape.com/']

print("\nScraping quotes:")
quotes_data = scraper.scrape_multiple(urls, parse_quotes)
if quotes_data:
    scraper.save_to_csv(quotes_data, 'scraped_quotes.csv', ['quote', 'author'])
    print(f"Total quotes scraped: {len(quotes_data)}")

print("\n" + "=" * 60)
print("Advanced Session 3 Completed!")
print("Master APIs and web scraping!")
print("=" * 60)