Source code for dsci524_group29_webscraping.save_data

# save_data.py
# author: Hui Tang
# date: 2025-01-16

import os
import json
import csv

[docs] def save_data(data, format='csv', destination='output.csv'): """ Saves the extracted data into a file. Parameters: data (list or dict): The data to be saved. - For 'csv', it must be a list of dictionaries where each dictionary represents a row. - For 'json', it can be either a list or a dictionary. format (str, optional): The format in which to save the data. Options are: - 'csv': Saves the data as a CSV file. Each key in the dictionaries becomes a column header. - 'json': Saves the data as a JSON file. The data is serialized with indentation for readability. Default is 'csv'. destination (str, optional): The file path to save the data. Can specify: - A file name (e.g., 'output.csv'). - A full path (e.g., '/path/to/output.csv'). Default is 'output.csv'. Returns: str: The absolute path to the saved file. Raises: ValueError: If the format is unsupported or if the data structure is incompatible with the format. FileNotFoundError: If the directory specified in the destination path does not exist. Exception: If an unexpected error occurs during the file-writing process. Examples: # Save data as a CSV file save_data([{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}], format='csv', destination='data.csv') # Save data as a JSON file save_data({"name": "Alice", "age": 25}, format='json', destination='data.json') Notes: - The directory specified in the destination path must exist; otherwise, a FileNotFoundError is raised. - For 'csv', the first dictionary in the list determines the column headers. """ # Validate the destination directory dir_path = os.path.dirname(destination) if dir_path and not os.path.exists(dir_path): # Ensure the directory exists before attempting to save raise FileNotFoundError(f"The directory {dir_path} does not exist.") # Save data in CSV format if format == 'csv': # Ensure the input data is a list of dictionaries if not isinstance(data, list) or not all(isinstance(item, dict) for item in data): raise ValueError("For CSV, data must be a list of dictionaries.") try: with open(destination, mode='w', newline='') as file: # Write the data to the CSV file writer = csv.DictWriter(file, fieldnames=data[0].keys()) writer.writeheader() # Write the header row writer.writerows(data) # Write the data rows except Exception as e: # Handle unexpected issues when saving the CSV file raise Exception(f"Failed to save CSV data: {e}") # Save data in JSON format elif format == 'json': # Ensure the input data is either a list or a dictionary if not isinstance(data, (list, dict)): raise ValueError("For JSON, data must be a list or a dictionary.") try: with open(destination, mode='w') as file: # Write the JSON data to the file with indentation for readability json.dump(data, file, indent=4) except Exception as e: # Handle unexpected issues when saving the JSON file raise Exception(f"Failed to save JSON data: {e}") else: # Raise an error for unsupported formats raise ValueError("Unsupported format. Use 'csv' or 'json'.") # Return the absolute path to the saved file return os.path.abspath(destination)