Set Structures

Set data structures store unique elements and provide efficient operations for insertion, deletion, and searching. They are essential for solving problems involving unique elements and membership testing.

What are Sets?

A set is a collection of unique elements. In programming, sets are useful when you need to:

Common Set Use Cases:

  • Remove duplicates from data
  • Check if an element exists quickly
  • Maintain sorted unique elements
  • Perform mathematical set operations

STL set

The STL set stores unique elements in sorted order using a balanced binary search tree:


#include <set>
#include <iostream>

int main() {
    set<int> s;
    
    // Insert elements
    s.insert(5);
    s.insert(2);
    s.insert(8);
    s.insert(2); // Duplicate - won't be added
    
    // Size and check
    cout << "Size: " << s.size() << endl; // Output: 3
    
    // Check if element exists
    if (s.find(5) != s.end()) {
        cout << "5 found in set" << endl;
    }
    
    // Alternative existence check
    if (s.count(8) > 0) {
        cout << "8 exists in set" << endl;
    }
    
    // Iterate (elements are in sorted order)
    for (int x : s) {
        cout << x << " "; // Output: 2 5 8
    }
    
    // Remove element
    s.erase(5);
    
    return 0;
}
                    

STL multiset

Multiset allows duplicate elements while maintaining sorted order:


#include <set>

int main() {
    multiset<int> ms;
    
    // Insert duplicates
    ms.insert(3);
    ms.insert(1);
    ms.insert(3);
    ms.insert(2);
    ms.insert(3);
    
    cout << "Size: " << ms.size() << endl; // Output: 5
    cout << "Count of 3: " << ms.count(3) << endl; // Output: 3
    
    // Iterate
    for (int x : ms) {
        cout << x << " "; // Output: 1 2 3 3 3
    }
    
    // Remove one occurrence
    ms.erase(ms.find(3)); // Removes only one 3
    cout << "After erasing one 3: " << ms.count(3) << endl; // Output: 2
    
    // Remove all occurrences
    ms.erase(3); // Removes all remaining 3s
    
    return 0;
}
                    

Unordered Set

For faster average operations, use unordered_set (hash table based):


#include <unordered_set>

int main() {
    unordered_set<int> us;
    
    // Insert elements (O(1) average)
    us.insert(10);
    us.insert(20);
    us.insert(15);
    
    // Fast lookup (O(1) average)
    if (us.find(15) != us.end()) {
        cout << "15 found!" << endl;
    }
    
    // Elements are not sorted
    for (int x : us) {
        cout << x << " "; // Order is not guaranteed
    }
    
    return 0;
}
                    

Set Operations

STL provides algorithms for mathematical set operations:


#include <set>
#include <algorithm>
#include <vector>

int main() {
    set<int> set1 = {1, 2, 3, 4, 5};
    set<int> set2 = {3, 4, 5, 6, 7};
    vector<int> result;
    
    // Union (all elements from both sets)
    set_union(set1.begin(), set1.end(),
              set2.begin(), set2.end(),
              back_inserter(result));
    // result: {1, 2, 3, 4, 5, 6, 7}
    
    result.clear();
    
    // Intersection (common elements)
    set_intersection(set1.begin(), set1.end(),
                     set2.begin(), set2.end(),
                     back_inserter(result));
    // result: {3, 4, 5}
    
    result.clear();
    
    // Difference (elements in set1 but not in set2)
    set_difference(set1.begin(), set1.end(),
                   set2.begin(), set2.end(),
                   back_inserter(result));
    // result: {1, 2}
    
    return 0;
}
                    

Custom Comparators

You can define custom sorting criteria for sets:


#include <set>
#include <string>

// Custom comparator for descending order
struct GreaterInt {
    bool operator()(int a, int b) const {
        return a > b;
    }
};

// Custom comparator for strings by length
struct CompareByLength {
    bool operator()(const string& a, const string& b) const {
        if (a.length() != b.length()) {
            return a.length() < b.length();
        }
        return a < b; // If same length, lexicographic order
    }
};

int main() {
    // Set with custom comparator
    set<int, GreaterInt> descendingSet;
    descendingSet.insert(3);
    descendingSet.insert(1);
    descendingSet.insert(4);
    
    for (int x : descendingSet) {
        cout << x << " "; // Output: 4 3 1
    }
    
    // Set of strings sorted by length
    set<string, CompareByLength> lengthSet;
    lengthSet.insert("cat");
    lengthSet.insert("elephant");
    lengthSet.insert("a");
    lengthSet.insert("dog");
    
    for (const string& s : lengthSet) {
        cout << s << " "; // Output: a cat dog elephant
    }
    
    return 0;
}
                    

Performance Comparison

Time Complexities:

  • set/multiset: O(log n) for insert, find, erase
  • unordered_set: O(1) average, O(n) worst case
  • When to use set: Need sorted order, stable performance
  • When to use unordered_set: Don't need order, want faster operations

Practical Examples

Remove Duplicates from Array


vector<int> removeDuplicates(vector<int>& nums) {
    set<int> uniqueSet(nums.begin(), nums.end());
    return vector<int>(uniqueSet.begin(), uniqueSet.end());
}
                    

Find Missing Numbers


vector<int> findMissing(const vector<int>& nums, int maxVal) {
    set<int> present(nums.begin(), nums.end());
    vector<int> missing;
    
    for (int i = 1; i <= maxVal; i++) {
        if (present.find(i) == present.end()) {
            missing.push_back(i);
        }
    }
    
    return missing;
}
                    

Common Pitfalls

Watch Out For

  • Iterator invalidation: Modifying set while iterating can be dangerous
  • Custom comparator consistency: Must define strict weak ordering
  • Hash collisions: unordered_set can degrade to O(n) in worst case
  • Memory overhead: Sets have more overhead than simple arrays

CodeForces Challenge Problems

Practice set operations:

Easy

Unique Elements

Practice using sets to handle unique element problems.

Sets STL
Solve Problem