This page shows the source code for Session-3-3-Examples.py in browser-friendly HTML format. It was generated automatically from the original Python file.
"""
Advanced Python - Session 3: Working with APIs & Web Scraping
Code Examples and Projects
Note: Install required packages:
pip install requests beautifulsoup4 lxml
"""
import requests
from bs4 import BeautifulSoup
import json
import time
from datetime import datetime
import csv
# ============================================
# PART 1: HTTP Basics & Requests Library
# ============================================
print("=" * 60)
print("PART 1: HTTP Basics & Requests")
print("=" * 60)
# Example 1: Basic GET request
print("\n--- Example 1: Basic GET Request ---")
try:
# Using JSONPlaceholder - a free fake API for testing
response = requests.get('https://jsonplaceholder.typicode.com/posts/1')
print(f"Status Code: {response.status_code}")
print(f"Response Time: {response.elapsed.total_seconds()}s")
print(f"\nResponse Data:")
print(json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
# Example 2: GET with parameters
print("\n--- Example 2: GET with Parameters ---")
try:
params = {'userId': 1}
response = requests.get(
'https://jsonplaceholder.typicode.com/posts',
params=params
)
posts = response.json()
print(f"Found {len(posts)} posts for user 1")
print(f"First post title: {posts[0]['title']}")
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
# Example 3: POST request
print("\n--- Example 3: POST Request ---")
try:
new_post = {
'title': 'My New Post',
'body': 'This is the content of my post',
'userId': 1
}
response = requests.post(
'https://jsonplaceholder.typicode.com/posts',
json=new_post
)
print(f"Status Code: {response.status_code}")
print(f"Created post:")
print(json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
# Example 4: Response object attributes
print("\n--- Example 4: Response Object ---")
try:
response = requests.get('https://jsonplaceholder.typicode.com/users/1')
print(f"URL: {response.url}")
print(f"Status Code: {response.status_code}")
print(f"Headers: {dict(list(response.headers.items())[:3])}...") # First 3 headers
print(f"Encoding: {response.encoding}")
print(f"Response Time: {response.elapsed.total_seconds()}s")
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
# ============================================
# PART 2: Working with REST APIs
# ============================================
print("\n" + "=" * 60)
print("PART 2: Working with REST APIs")
print("=" * 60)
# Example 1: Complete CRUD operations
print("\n--- Example 1: CRUD Operations ---")
class APIClient:
"""Simple API client for JSONPlaceholder"""
def __init__(self, base_url):
self.base_url = base_url
def get_all(self, endpoint):
"""GET all resources"""
url = f"{self.base_url}/{endpoint}"
response = requests.get(url)
response.raise_for_status()
return response.json()
def get_one(self, endpoint, resource_id):
"""GET single resource"""
url = f"{self.base_url}/{endpoint}/{resource_id}"
response = requests.get(url)
response.raise_for_status()
return response.json()
def create(self, endpoint, data):
"""POST - Create resource"""
url = f"{self.base_url}/{endpoint}"
response = requests.post(url, json=data)
response.raise_for_status()
return response.json()
def update(self, endpoint, resource_id, data):
"""PUT - Update resource"""
url = f"{self.base_url}/{endpoint}/{resource_id}"
response = requests.put(url, json=data)
response.raise_for_status()
return response.json()
def delete(self, endpoint, resource_id):
"""DELETE resource"""
url = f"{self.base_url}/{endpoint}/{resource_id}"
response = requests.delete(url)
response.raise_for_status()
return response.status_code
# Using the API client
client = APIClient('https://jsonplaceholder.typicode.com')
try:
# GET single post
post = client.get_one('posts', 1)
print(f"Retrieved: {post['title']}")
# CREATE new post
new_post = {'title': 'New Post', 'body': 'Content', 'userId': 1}
created = client.create('posts', new_post)
print(f"Created post with ID: {created['id']}")
# UPDATE post
updated_data = {'title': 'Updated Title', 'body': 'Updated', 'userId': 1}
updated = client.update('posts', 1, updated_data)
print(f"Updated: {updated['title']}")
# DELETE post
status = client.delete('posts', 1)
print(f"Delete status code: {status}")
except requests.exceptions.RequestException as e:
print(f"API Error: {e}")
# Example 2: Error handling
print("\n--- Example 2: Error Handling ---")
def safe_api_call(url, timeout=5):
"""Make API call with comprehensive error handling"""
try:
response = requests.get(url, timeout=timeout)
response.raise_for_status()
return {'success': True, 'data': response.json()}
except requests.exceptions.Timeout:
return {'success': False, 'error': 'Request timed out'}
except requests.exceptions.ConnectionError:
return {'success': False, 'error': 'Connection failed'}
except requests.exceptions.HTTPError as e:
return {'success': False, 'error': f'HTTP error: {e}'}
except requests.exceptions.RequestException as e:
return {'success': False, 'error': f'Request error: {e}'}
except json.JSONDecodeError:
return {'success': False, 'error': 'Invalid JSON response'}
# Test error handling
result = safe_api_call('https://jsonplaceholder.typicode.com/posts/1')
if result['success']:
print(f"Success! Title: {result['data']['title']}")
else:
print(f"Error: {result['error']}")
# Example 3: Pagination
print("\n--- Example 3: Handling Pagination ---")
def get_all_pages(base_url, endpoint, per_page=10):
"""Fetch all pages of paginated data"""
all_data = []
page = 1
while True:
try:
params = {'_page': page, '_limit': per_page}
response = requests.get(f"{base_url}/{endpoint}", params=params)
response.raise_for_status()
data = response.json()
if not data: # No more data
break
all_data.extend(data)
print(f"Fetched page {page}: {len(data)} items")
page += 1
# Safety limit for demo
if page > 3:
break
except requests.exceptions.RequestException as e:
print(f"Error on page {page}: {e}")
break
return all_data
# Fetch multiple pages
all_posts = get_all_pages('https://jsonplaceholder.typicode.com', 'posts')
print(f"Total posts fetched: {len(all_posts)}")
# ============================================
# PART 3: API Authentication
# ============================================
print("\n" + "=" * 60)
print("PART 3: API Authentication")
print("=" * 60)
# Example 1: API Key in headers (most common)
print("\n--- Example 1: API Key Authentication ---")
def fetch_with_api_key(url, api_key):
"""Make authenticated request with API key"""
headers = {
'X-API-Key': api_key,
'Content-Type': 'application/json'
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return None
# Demo (will work with JSONPlaceholder even without real key)
data = fetch_with_api_key('https://jsonplaceholder.typicode.com/posts/1', 'demo_key')
if data:
print(f"Fetched: {data['title']}")
# Example 2: Bearer token authentication
print("\n--- Example 2: Bearer Token Authentication ---")
class AuthenticatedClient:
"""API client with token authentication"""
def __init__(self, base_url, token):
self.base_url = base_url
self.token = token
self.session = requests.Session()
self.session.headers.update({
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
})
def get(self, endpoint):
"""Make authenticated GET request"""
url = f"{self.base_url}/{endpoint}"
response = self.session.get(url)
response.raise_for_status()
return response.json()
def post(self, endpoint, data):
"""Make authenticated POST request"""
url = f"{self.base_url}/{endpoint}"
response = self.session.post(url, json=data)
response.raise_for_status()
return response.json()
# Demo
auth_client = AuthenticatedClient('https://jsonplaceholder.typicode.com', 'demo_token')
try:
data = auth_client.get('posts/1')
print(f"Authenticated request: {data['title']}")
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
# ============================================
# PART 4: Web Scraping with Beautiful Soup
# ============================================
print("\n" + "=" * 60)
print("PART 4: Web Scraping with Beautiful Soup")
print("=" * 60)
# Example 1: Basic HTML parsing
print("\n--- Example 1: Basic HTML Parsing ---")
html_doc = """
<html>
<head><title>Sample Page</title></head>
<body>
<div class="container">
<h1 id="main-title">Welcome to Web Scraping</h1>
<p class="intro">This is a sample paragraph.</p>
<ul class="links">
<li><a href="https://example.com/page1">Page 1</a></li>
<li><a href="https://example.com/page2">Page 2</a></li>
<li><a href="https://example.com/page3">Page 3</a></li>
</ul>
<div class="article">
<h2>Article Title</h2>
<p class="content">Article content goes here.</p>
<span class="author">By John Doe</span>
</div>
</div>
</body>
</html>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
# Find elements
print(f"Title: {soup.title.string}")
print(f"H1: {soup.h1.get_text()}")
print(f"First link: {soup.find('a')['href']}")
# Find all links
links = soup.find_all('a')
print(f"\nAll links ({len(links)}):")
for link in links:
print(f" - {link.get_text()}: {link.get('href')}")
# Example 2: CSS selectors
print("\n--- Example 2: CSS Selectors ---")
# Select by class
intro = soup.select('.intro')[0]
print(f"Intro text: {intro.get_text()}")
# Select by ID
main_title = soup.select('#main-title')[0]
print(f"Main title: {main_title.get_text()}")
# Complex selectors
article_content = soup.select('div.article p.content')[0]
print(f"Article content: {article_content.get_text()}")
# Example 3: Extracting structured data
print("\n--- Example 3: Extracting Structured Data ---")
def extract_articles(html):
"""Extract article data from HTML"""
soup = BeautifulSoup(html, 'html.parser')
articles = []
for article in soup.find_all('div', class_='article'):
title_elem = article.find('h2')
content_elem = article.find('p', class_='content')
author_elem = article.find('span', class_='author')
if title_elem and content_elem:
articles.append({
'title': title_elem.get_text(strip=True),
'content': content_elem.get_text(strip=True),
'author': author_elem.get_text(strip=True) if author_elem else 'Unknown'
})
return articles
articles = extract_articles(html_doc)
print(f"Extracted {len(articles)} articles:")
for article in articles:
print(f" - {article['title']} by {article['author']}")
# Example 4: Scraping a real website (quotes)
print("\n--- Example 4: Scraping Quotes ---")
def scrape_quotes(url):
"""Scrape quotes from quotes.toscrape.com"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Python Scraper Tutorial)'
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
quotes = []
for quote in soup.find_all('div', class_='quote'):
text = quote.find('span', class_='text').get_text()
author = quote.find('small', class_='author').get_text()
tags = [tag.get_text() for tag in quote.find_all('a', class_='tag')]
quotes.append({
'text': text,
'author': author,
'tags': tags
})
return quotes
except requests.exceptions.RequestException as e:
print(f"Error scraping: {e}")
return []
# Scrape quotes
quotes = scrape_quotes('http://quotes.toscrape.com/')
if quotes:
print(f"Scraped {len(quotes)} quotes:")
for i, quote in enumerate(quotes[:3], 1): # Show first 3
print(f"\n{i}. {quote['text']}")
print(f" - {quote['author']}")
print(f" Tags: {', '.join(quote['tags'])}")
# ============================================
# PART 5: Best Practices
# ============================================
print("\n" + "=" * 60)
print("PART 5: Best Practices")
print("=" * 60)
# Example 1: Rate limiting
print("\n--- Example 1: Rate Limiting ---")
class RateLimitedScraper:
"""Scraper with rate limiting"""
def __init__(self, delay=1):
self.delay = delay
self.last_request = 0
def fetch(self, url):
"""Fetch URL with rate limiting"""
# Wait if necessary
elapsed = time.time() - self.last_request
if elapsed < self.delay:
time.sleep(self.delay - elapsed)
try:
response = requests.get(url)
self.last_request = time.time()
return response
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return None
scraper = RateLimitedScraper(delay=1)
print("Fetching URLs with rate limiting...")
for i in range(3):
start = time.time()
response = scraper.fetch(f'https://jsonplaceholder.typicode.com/posts/{i+1}')
if response:
print(f" Request {i+1} completed in {time.time() - start:.2f}s")
# Example 2: Retry logic with exponential backoff
print("\n--- Example 2: Retry with Exponential Backoff ---")
def fetch_with_retry(url, max_retries=3):
"""Fetch with exponential backoff retry"""
for attempt in range(max_retries):
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
if attempt == max_retries - 1:
print(f"Failed after {max_retries} attempts")
raise
wait_time = 2 ** attempt # 1s, 2s, 4s
print(f"Attempt {attempt + 1} failed. Retrying in {wait_time}s...")
time.sleep(wait_time)
# Demo
try:
response = fetch_with_retry('https://jsonplaceholder.typicode.com/posts/1')
print(f"Success! Status: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"All retries failed: {e}")
# Example 3: Caching responses
print("\n--- Example 3: Response Caching ---")
class CachedAPIClient:
"""API client with response caching"""
def __init__(self, cache_duration=300): # 5 minutes
self.cache = {}
self.cache_duration = cache_duration
def get(self, url):
"""Get with caching"""
# Check cache
if url in self.cache:
data, timestamp = self.cache[url]
age = time.time() - timestamp
if age < self.cache_duration:
print(f" Cache HIT (age: {age:.1f}s)")
return data
else:
print(f" Cache EXPIRED")
else:
print(f" Cache MISS")
# Fetch and cache
response = requests.get(url)
data = response.json()
self.cache[url] = (data, time.time())
return data
cached_client = CachedAPIClient(cache_duration=10)
url = 'https://jsonplaceholder.typicode.com/posts/1'
# First request (cache miss)
data1 = cached_client.get(url)
print(f"Title: {data1['title']}")
# Second request (cache hit)
data2 = cached_client.get(url)
print(f"Title: {data2['title']}")
# ============================================
# PROJECT 1: Weather API Client
# ============================================
print("\n" + "=" * 60)
print("PROJECT 1: Weather API Client")
print("=" * 60)
class WeatherClient:
"""Client for weather API (using JSONPlaceholder as demo)"""
def __init__(self, api_key=None):
self.api_key = api_key
self.base_url = 'https://jsonplaceholder.typicode.com'
self.cache = {}
def get_weather(self, city_id):
"""Get current weather (simulated)"""
url = f"{self.base_url}/posts/{city_id}"
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
# In real API, this would be actual weather data
data = response.json()
# Simulate weather data structure
weather = {
'city': f"City {city_id}",
'temperature': 20 + (city_id % 10),
'conditions': 'Sunny',
'humidity': 60,
'timestamp': datetime.now().isoformat()
}
return weather
except requests.exceptions.RequestException as e:
print(f"Error fetching weather: {e}")
return None
def save_to_csv(self, weather_data, filename='weather_data.csv'):
"""Save weather data to CSV"""
file_exists = False
try:
with open(filename, 'r'):
file_exists = True
except FileNotFoundError:
pass
with open(filename, 'a', newline='') as f:
fieldnames = ['city', 'temperature', 'conditions', 'humidity', 'timestamp']
writer = csv.DictWriter(f, fieldnames=fieldnames)
if not file_exists:
writer.writeheader()
writer.writerow(weather_data)
# Demo weather client
weather_client = WeatherClient()
print("\nFetching weather data:")
for city_id in range(1, 4):
weather = weather_client.get_weather(city_id)
if weather:
print(f" {weather['city']}: {weather['temperature']}°C, {weather['conditions']}")
weather_client.save_to_csv(weather)
print("Weather data saved to weather_data.csv")
# ============================================
# PROJECT 2: Web Scraper with CSV Export
# ============================================
print("\n" + "=" * 60)
print("PROJECT 2: Web Scraper with CSV Export")
print("=" * 60)
class WebScraper:
"""Generic web scraper with CSV export"""
def __init__(self, delay=1):
self.delay = delay
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Python Tutorial Scraper)'
})
def scrape_page(self, url, parser_func):
"""Scrape a single page"""
try:
time.sleep(self.delay) # Rate limiting
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return parser_func(soup)
except requests.exceptions.RequestException as e:
print(f"Error scraping {url}: {e}")
return []
def scrape_multiple(self, urls, parser_func):
"""Scrape multiple URLs"""
all_data = []
for i, url in enumerate(urls, 1):
print(f"Scraping {i}/{len(urls)}: {url}")
data = self.scrape_page(url, parser_func)
all_data.extend(data)
return all_data
def save_to_csv(self, data, filename, fieldnames):
"""Save scraped data to CSV"""
with open(filename, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
print(f"Saved {len(data)} items to {filename}")
def parse_quotes(soup):
"""Parser function for quotes"""
quotes = []
for quote in soup.find_all('div', class_='quote'):
text_elem = quote.find('span', class_='text')
author_elem = quote.find('small', class_='author')
if text_elem and author_elem:
quotes.append({
'quote': text_elem.get_text(),
'author': author_elem.get_text()
})
return quotes
# Demo scraper
scraper = WebScraper(delay=1)
urls = ['http://quotes.toscrape.com/']
print("\nScraping quotes:")
quotes_data = scraper.scrape_multiple(urls, parse_quotes)
if quotes_data:
scraper.save_to_csv(quotes_data, 'scraped_quotes.csv', ['quote', 'author'])
print(f"Total quotes scraped: {len(quotes_data)}")
print("\n" + "=" * 60)
print("Advanced Session 3 Completed!")
print("Master APIs and web scraping!")
print("=" * 60)