Dense vs Sparse

Dense vs Sparse#

In this notebook, you will explore the difference in performance between dense and sparse matrix storage. There is a task to partially implement dense and sparse matrix assembly. After you have comppleted those that, the code in the remainder of the notebook performs the comparison for you and gives you some results to inspect.

\(\text{Task 1.1:}\)

Run the code below. The different finite element meshes that are used for this exercise are visualized.

import numpy as np
import scipy.sparse as sp
import time
import matplotlib.pyplot as plt
import os
from urllib.request import urlretrieve

# these imports are local py-files
import mesh_utils
import benchmark_operations

mesh_files = ['mesh080.msh', 'mesh040.msh', 'mesh020.msh', 'mesh010.msh']

for filename in mesh_files:
    if not os.path.isfile(filename):
        print(f"Downloading {filename}...")
        urlretrieve('https://github.com/TUDelft-MUDE/source-files/raw/main/file/PA2.2/'+filename, filename)

mesh_utils.plot_meshes(mesh_files)

\(\text{Task 1.2:}\)

Complete the implementation for dense and sparse matrix assembly

def assemble_dense_matrix(elements, n_nodes, n_repeats=1):
    """Assemble dense matrix with np.ix_"""
    element_matrix = np.ones((3, 3))
    
    # Time multiple runs for better statistics
    times = []
    M = None
    
    for _ in range(n_repeats):
        M = np.zeros((n_nodes, n_nodes))
        start_time = time.time()
        for e in elements:
            ### YOUR CODE HERE ###
        times.append(time.time() - start_time)
    
    assembly_time = np.min(times)
    return M, assembly_time

def assemble_sparse_matrix(elements, n_nodes, n_repeats=1):
    """Assemble sparse matrix to CSR format"""
    Me = np.ones((3, 3))  # Element matrix
    
    # Time multiple runs for better statistics
    times = []
    M_sparse = None
    
    for _ in range(n_repeats):
        start_time = time.time()
        
        # Pre-allocate arrays for sparse matrix construction
        n_elements = len(elements)
        n_entries = n_elements * 9  # 3x3 entries per element
        
        row_indices = np.zeros(n_entries, dtype=np.int32)
        col_indices = np.zeros(n_entries, dtype=np.int32)
        data = np.zeros(n_entries)
        
        # Fill the arrays
        for elem_idx, e in enumerate(elements):
            base_idx = elem_idx * 9
            for i in range(3):
                for j in range(3):
                    idx = base_idx + i * 3 + j
                    row_indices[idx] = ### YOUR CODE HERE ###
                    col_indices[idx] = ### YOUR CODE HERE ###
                    data[idx] = ### YOUR CODE HERE ###
        
        # Create sparse matrix and sum duplicates
        M_sparse = sp.csr_array((data, (row_indices, col_indices)), shape=(n_nodes, n_nodes))
        times.append(time.time() - start_time)
    
    assembly_time = np.min(times)
    return M_sparse, assembly_time

\(\text{Task 1.3:}\)

Run the code below. The performance of the sparse and dense implementations is compared here. Note that part of the operations are defined in the file benchmark_operations.py. For robustness of the comparison, tasks are repeated a number of times, where the number depends on how long the task takes to perform.

def run_performance_comparison(mesh_files):
    """Main function to run the performance comparison"""
    results = []
    
    print("=== Dense vs Sparse Matrix Performance Comparison ===\n")
    
    for mesh_file in mesh_files:
        try:
            print(f"Processing {mesh_file}...")
            n, elements = mesh_utils.read_gmsh(mesh_file)
            n_nodes = np.max(elements) + 1
            
            # Determine number of repeats based on expected operation time
            # More repeats for smaller, faster operations
            if n_nodes < 1000:
                assembly_repeats = 20
                matvec_repeats = 50
            elif n_nodes < 10000:
                assembly_repeats = 10
                matvec_repeats = 20
            else:
                assembly_repeats = 5
                matvec_repeats = 10
            
            # Dense matrix assembly
            print(f"  Assembling dense matrix ({assembly_repeats} runs)...")
            M_dense, dense_assembly_time = assemble_dense_matrix(elements, n_nodes, assembly_repeats)
            
            # Sparse matrix assembly
            print(f"  Assembling sparse matrix ({assembly_repeats} runs)...")
            M_sparse, sparse_assembly_time = assemble_sparse_matrix(elements, n_nodes, assembly_repeats)
            
            print(f"  Assembly - Dense: {dense_assembly_time:.4f}s, Sparse: {sparse_assembly_time:.4f}s")
            print(f"  Assembly speedup: {dense_assembly_time/sparse_assembly_time:.1f}x")
            
            # Benchmark operations
            result = benchmark_operations.run(M_dense, M_sparse, n_nodes, matvec_repeats, 1)
            result['dense_assembly_time'] = dense_assembly_time
            result['sparse_assembly_time'] = sparse_assembly_time
            
            # Convert numpy values to Python native types for JSON serialization
            for key, value in result.items():
                if hasattr(value, 'item'):  # numpy scalar
                    result[key] = value.item()
                elif not isinstance(value, (int, float, str, bool)):
                    result[key] = float(value)
            
            results.append(result)
            
            print()
            
        except FileNotFoundError:
            print(f"  File {mesh_file} not found, skipping...")
            continue
        except Exception as e:
            print(f"  Error processing {mesh_file}: {e}")
            continue
    
    return results

results = run_performance_comparison(mesh_files)

\(\text{Task 1.4:}\)

Run the code below. Results are visualized. Notice the log-scale in the plots! Ask yourself the following questions

For most operations, the slope of the plots is different, resulting in an increasing speedup factor for the sparse implementation for big matrices. For assembly, however, dense and sparse implementations give the same slope. Why is the overhead associated with the dense matrix relatively small for assembly?
In this comparison, we go only to moderate meshes. Suppose that observations on performance can be extrapolated and that you want to solve a problem with a mesh with a million nodes. How much memory would be approximately required for storing a dense or sparse matrix? How much time would it take to solve a system of equations with dense or sparse matrix representation?

def plot_results(results):
    """Create performance comparison plots"""
    if not results:
        print("No results to plot!")
        return
    
    fig = plt.figure(figsize=(12, 12))
    
    n_nodes = [r['n_nodes'] for r in results]
    
    # Memory usage comparison
    ax1 = plt.subplot(3, 2, 1)
    dense_mem = [r['dense_memory'] for r in results]
    sparse_mem = [r['sparse_memory'] for r in results]
    
    ax1.loglog(n_nodes, dense_mem, 'o-', label='Dense')
    ax1.loglog(n_nodes, sparse_mem, 's-', label='Sparse')
    ax1.set_xlabel('Number of Nodes')
    ax1.set_ylabel('Memory Usage (MB)')
    ax1.set_title('Memory Usage Comparison')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Matrix-vector multiplication time
    ax2 = plt.subplot(3, 2, 2)
    dense_matvec = [r['dense_matvec_time'] for r in results]
    sparse_matvec = [r['sparse_matvec_time'] for r in results]
    
    ax2.loglog(n_nodes, dense_matvec, 'o-', label='Dense')
    ax2.loglog(n_nodes, sparse_matvec, 's-', label='Sparse')
    ax2.set_xlabel('Number of Nodes')
    ax2.set_ylabel('Time (seconds)')
    ax2.set_title('Matrix × Vector Time')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Assembly time comparison
    ax3 = plt.subplot(3, 2, 3)
    dense_assembly = [r['dense_assembly_time'] for r in results]
    sparse_assembly = [r['sparse_assembly_time'] for r in results]
    
    ax3.loglog(n_nodes, dense_assembly, 'o-', label='Dense')
    ax3.loglog(n_nodes, sparse_assembly, 's-', label='Sparse')
    ax3.set_xlabel('Number of Nodes')
    ax3.set_ylabel('Time (seconds)')
    ax3.set_title('Matrix Assembly Time')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Solve time comparison
    ax4 = plt.subplot(3, 2, 4)
    dense_solve = [r['dense_solve_time'] for r in results]
    sparse_solve = [r['sparse_solve_time'] for r in results]

    ax4.loglog(n_nodes, dense_solve, 'o-', label='Dense')
    ax4.loglog(n_nodes, sparse_solve, 's-', label='Sparse')
    ax4.set_xlabel('Number of Nodes')
    ax4.set_ylabel('Time (seconds)')
    ax4.set_title('Linear System Solve Time')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # Speedup factors
    ax5 = plt.subplot(3, 2, 5)
    matvec_speedup = [d/s for d, s in zip(dense_matvec, sparse_matvec)]
    assembly_speedup = [d/s for d, s in zip(dense_assembly, sparse_assembly)]
    solve_speedup = [d/s for d, s in zip(dense_solve, sparse_solve)]
    
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    ax5.loglog(n_nodes, matvec_speedup, 'o-', label='Matrix × Vector', color=colors[2])
    ax5.loglog(n_nodes, assembly_speedup, 's-', label='Assembly', color=colors[3])
    ax5.loglog(n_nodes, solve_speedup, 'v-', label='Solve', color=colors[4])

    ax5.set_xlabel('Number of Nodes')
    ax5.set_ylabel('Speedup Factor')
    ax5.set_title('Performance Speedup (Sparse vs Dense)')
    ax5.legend()
    ax5.grid(True, alpha=0.3)
    
    # Sparsity pattern
    ax6 = plt.subplot(3, 2, 6)
    sparsity = [r['sparsity'] for r in results]
    ax6.semilogx(n_nodes, sparsity, 'o-', color=colors[5])
    ax6.set_xlabel('Number of Nodes')
    ax6.set_ylabel('Sparsity (%)')
    ax6.set_title('Matrix Sparsity')
    ax6.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

plot_results(results)

By Frans van der Meer and Tom van Woudenberg, Delft University of Technology. CC BY 4.0, more info on the Credits page of Workbook.