SQL Subqueries
Subqueries (also called inner queries or nested queries) are queries embedded within other SQL statements. They allow you to perform complex operations by breaking them down into logical steps.
Types of Subqueries
- Scalar Subquery: Returns a single value
- Row Subquery: Returns a single row
- Column Subquery: Returns a single column
- Table Subquery: Returns a result table
Sample Tables for Examples
-- Employees table
CREATE TABLE employees (
employee_id INT PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50),
salary DECIMAL(10,2),
department_id INT,
hire_date DATE
);
-- Departments table
CREATE TABLE departments (
department_id INT PRIMARY KEY,
department_name VARCHAR(50),
location_id INT
);
-- Sample data
INSERT INTO departments VALUES
(10, 'Accounting', 100),
(20, 'Marketing', 200),
(30, 'IT', 300);
INSERT INTO employees VALUES
(101, 'John', 'Doe', 75000, 10, '2020-01-15'),
(102, 'Jane', 'Smith', 85000, 20, '2019-05-22'),
(103, 'Robert', 'Johnson', 90000, 10, '2018-11-03'),
(104, 'Emily', 'Davis', 65000, 30, '2021-02-28'),
(105, 'Michael', 'Brown', 80000, 20, '2020-07-19');
Subquery in WHERE Clause
-- Employees who earn more than the average salary
SELECT first_name, last_name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
-- Employees in the same department as 'John Doe'
SELECT first_name, last_name, department_id
FROM employees
WHERE department_id = (
SELECT department_id
FROM employees
WHERE first_name = 'John' AND last_name = 'Doe'
);
Subquery in FROM Clause
-- Average salary by department
SELECT d.department_name, avg_sal.avg_salary
FROM departments d
JOIN (
SELECT department_id, AVG(salary) AS avg_salary
FROM employees
GROUP BY department_id
) avg_sal ON d.department_id = avg_sal.department_id;
-- Top 3 highest earners
SELECT first_name, last_name, salary
FROM (
SELECT first_name, last_name, salary
FROM employees
ORDER BY salary DESC
LIMIT 3
) AS top_earners;
Subquery in SELECT Clause
-- Employee count per department in each employee's row
SELECT
e.first_name,
e.last_name,
e.department_id,
(SELECT COUNT(*)
FROM employees
WHERE department_id = e.department_id) AS dept_count
FROM employees e;
-- Difference from department average salary
SELECT
first_name,
last_name,
salary,
salary - (SELECT AVG(salary)
FROM employees e2
WHERE e2.department_id = e1.department_id) AS diff_from_avg
FROM employees e1;
Correlated Subqueries
Subqueries that reference columns from the outer query.
-- Employees who earn more than their department average
SELECT e1.first_name, e1.last_name, e1.salary
FROM employees e1
WHERE salary > (
SELECT AVG(salary)
FROM employees e2
WHERE e2.department_id = e1.department_id
);
-- Departments with above-average employee counts
SELECT d.department_name
FROM departments d
WHERE (
SELECT COUNT(*)
FROM employees e
WHERE e.department_id = d.department_id
) > (SELECT AVG(emp_count)
FROM (
SELECT COUNT(*) AS emp_count
FROM employees
GROUP BY department_id
) counts);
EXISTS Operator
-- Departments that have employees
SELECT department_name
FROM departments d
WHERE EXISTS (
SELECT 1
FROM employees e
WHERE e.department_id = d.department_id
);
-- Employees who have been with the company longer than their manager
SELECT e1.first_name, e1.last_name
FROM employees e1
WHERE EXISTS (
SELECT 1
FROM employees e2
WHERE e2.employee_id = e1.manager_id
AND e1.hire_date < e2.hire_date
);
IN Operator with Subqueries
-- Employees in departments located in location 100 or 200
SELECT first_name, last_name
FROM employees
WHERE department_id IN (
SELECT department_id
FROM departments
WHERE location_id IN (100, 200)
);
-- Employees not in the top 5 highest salaries
SELECT first_name, last_name, salary
FROM employees
WHERE employee_id NOT IN (
SELECT employee_id
FROM employees
ORDER BY salary DESC
LIMIT 5
);
Subqueries with ANY/SOME/ALL
-- Employees earning more than any IT employee
SELECT first_name, last_name, salary
FROM employees
WHERE salary > ANY (
SELECT salary
FROM employees
WHERE department_id = 30
);
-- Employees earning more than all Marketing employees
SELECT first_name, last_name, salary
FROM employees
WHERE salary > ALL (
SELECT salary
FROM employees
WHERE department_id = 20
);
Common Table Expressions (CTEs)
CTEs provide a more readable alternative to subqueries.
-- Using WITH clause for CTE
WITH dept_avg AS (
SELECT department_id, AVG(salary) AS avg_salary
FROM employees
GROUP BY department_id
)
SELECT e.first_name, e.last_name, e.salary, d.avg_salary
FROM employees e
JOIN dept_avg d ON e.department_id = d.department_id
WHERE e.salary > d.avg_salary;
-- Recursive CTE example (employee hierarchy)
WITH RECURSIVE emp_hierarchy AS (
-- Base case
SELECT employee_id, first_name, last_name, manager_id, 1 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case
SELECT e.employee_id, e.first_name, e.last_name, e.manager_id, eh.level + 1
FROM employees e
JOIN emp_hierarchy eh ON e.manager_id = eh.employee_id
)
SELECT * FROM emp_hierarchy ORDER BY level;
Performance Considerations
- Correlated subqueries can be slow - consider JOINs instead
- Use EXISTS instead of IN for large datasets
- CTEs often perform better than nested subqueries
- Ensure proper indexing on join/filter columns