package Set::Functional; use 5.006; use Exporter qw{import}; =head1 NAME Set::Functional - set operations for functional programming =head1 VERSION Version 1.0 =cut our $VERSION = '1.01'; our @EXPORT_OK = qw{ setify setify_by cartesian difference difference_by disjoint disjoint_by distinct distinct_by intersection intersection_by symmetric_difference symmetric_difference_by union union_by is_disjoint is_disjoint_by is_equal is_equal_by is_equivalent is_equivalent_by is_pairwise_disjoint is_pairwise_disjoint_by is_proper_subset is_proper_subset_by is_proper_superset is_proper_superset_by is_subset is_subset_by is_superset is_superset_by }; our %EXPORT_TAGS = (all => \@EXPORT_OK); =head1 SYNOPSIS This module provides basic set operations for native lists. The primary goal is to take advantage of Perl's native functional programming capabilities while relying solely on Pure Perl constructs to perform the set operations as fast as possible. All of these techniques have been benchmarked against other common Perl idioms to determine the optimal solution. These benchmarks can be found in this package (shortly). Each function is provided in two forms. The first form always expects simple flat data structures of defined elements. The second form expects a BLOCK (refered to as a choice function) to evaluate each member of the list to a defined value to determine how the element is a set member. These can be identified by the suffix "_by". None of these functions check definedness inline so as to eliminate the costly O(n) operation. All functions have been prototyped to give them a native Perl-ish look and feel. Example usage: use Set::Functional ':all'; # Set Creation my @deduped_numbers = setify(1 .. 10, 2 .. 11); my @deduped_objects_by_name = setify_by { $_->{name} } ({name => 'fred'}, {name => 'bob'}, {name => 'fred'}); # Set Operation my @all_permutations = cartesian \@arr1, \@arr2, \@arr3, \@arr4; my @only_arr1_elements = difference \@arr1, \@arr2, \@arr3, \@arr4; my @only_arr1_elements_by_name = difference_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; my @unique_per_set = disjoint \@arr1, \@arr2, \@arr3, \@arr4; my @unique_per_set_by_name = disjoint_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; my @unique_elements = distinct \@arr1, \@arr2, \@arr3, \@arr4; my @unique_elements_by_name = distinct_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; my @shared_elements = intersection \@arr1, \@arr2, \@arr3, \@arr4; my @shared_elements_by_name = intersection_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; my @odd_occuring_elements = symmetric_difference \@arr1, \@arr2, \@arr3, \@arr4; my @odd_occuring_elements_by_name = symmetric_difference_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; my @all_elements = union \@arr1, \@arr2, \@arr3, \@arr4; my @all_elements_by_name = union_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; # Set Predicates my $is_all_of_arr1_distinct_from_arr2 = is_disjoint \@arr1, \@arr2; my $is_all_of_arr1_distinct_from_arr2_by_name = is_disjoint_by { $_->{name} } \@arr1, \@arr2; my $is_arr1_the_same_as_arr2 = is_equal \@arr1, \@arr2; my $is_arr1_the_same_as_arr2_by_name = is_equal_by { $_->{name} } \@arr1, \@arr2; my $are_all_sets_mutually_unique = is_pairwise_disjoint \@arr1, \@arr2, \@arr3, \@arr4; my $are_all_sets_mutually_unique_by_name = is_pairwise_disjoint_by { $_->{name} } \@arr1, \@arr2, \@arr3, \@arr4; my $is_all_of_arr1_in_arr2_but_not_the_same_as_arr2 = is_proper_subset \@arr1, \@arr2; my $is_all_of_arr1_in_arr2_but_not_the_same_as_arr2_by_name = is_proper_subset_by { $_->{name} } \@arr1, \@arr2; my $is_all_of_arr1_in_arr2 = is_subset \@arr1, \@arr2; my $is_all_of_arr1_in_arr2_by_name = is_subset_by { $_->{name} } \@arr1, \@arr2; my $is_all_of_arr2_in_arr1_but_not_the_same_as_arr1 = is_proper_superset \@arr1, \@arr2; my $is_all_of_arr2_in_arr1_but_not_the_same_as_arr1_by_name = is_proper_superset_by { $_->{name} } \@arr1, \@arr2; my $is_all_of_arr2_in_arr1 = is_superset \@arr1, \@arr2; my $is_all_of_arr2_in_arr1_by_name = is_superset_by { $_->{name} } \@arr1, \@arr2; =head1 CONSTRUCTORS =cut =head2 setify(@) Given a list, return a new set. Order is not guaranteed. setify 1 .. 10, 6 .. 15 => 1 .. 15 =cut sub setify(@) { my %set; undef @set{@_} if @_; return keys %set; } =head2 setify_by(&@) Given a choice function and a list, return a new set defined by the choice function. Order is not guaranteed. =cut sub setify_by(&@){ my $func = shift; my %set; @set{ map { $func->($_) } @_ } = @_ if @_; return values %set; } =head1 OPERATORS =cut =head2 cartesian(@) Given multiple set references, return multiple sets containing all permutations of one element from each set. If the empty set is provided, the empty set is returned. If no sets are provided then none are returned. cartesian [1 .. 3], [1 .. 2] => [1,1],[1,2],[2,1],[2,2],[3,1],[3,2] =cut sub cartesian(@) { return unless @_; my @results; my $repetitions = 1; ($repetitions *= @$_) || return [] for @_; $#results = $repetitions - 1; for my $idx (0 .. $#results) { $repetitions = @results; $results[$idx] = [map { $_->[int($idx/($repetitions /= @$_)) % @$_] } @_]; } return @results; } =head2 difference(@) Given multiple set references, return a new set with all the elements in the first set that don't exist in subsequent sets. difference [1 .. 10], [6 .. 15] => 1 .. 5 =cut sub difference(@) { my $first = shift; return unless $first && @$first; my %set; undef @set{@$first}; do { delete @set{@$_} if @$_ } for @_; return keys %set; } =head2 difference_by(&@) Given a choice function and multiple set references, return a new set with all the elements in the first set that don't exist in subsequent sets according to the choice function. =cut sub difference_by(&@) { my $func = shift; my $first = shift; return unless $first && @$first; my %set; @set{ map { $func->($_) } @$first } = @$first; do { delete @set{ map { $func->($_) } @$_ } if @$_ } for @_; return values %set; } =head2 disjoint(@) Given multiple set references, return corresponding sets containing all the elements from the original set that exist in any set exactly once. disjoint [1 .. 10], [6 .. 15] => [1 .. 5], [11 .. 15] =cut sub disjoint(@) { my %element_to_count; do { ++$element_to_count{$_} for @$_ } for @_; return map { [grep { $element_to_count{$_} == 1 } @$_] } @_; } =head2 disjoint_by(&@) Given a choice function and multiple set references, return corresponding sets containing all the elements from the original set that exist in any set exactly once according to the choice function. =cut sub disjoint_by(&@) { my $func = shift; my %key_to_count; do { ++$key_to_count{$func->($_)} for @$_ } for @_; return map { [grep { $key_to_count{$func->($_)} == 1 } @$_] } @_; } =head2 distinct(@) Given multiple set references, return a new set containing all the elements that exist in any set exactly once. distinct [1 .. 10], [6 .. 15] => 1 .. 5, 11 .. 15 =cut sub distinct(@) { my %element_to_count; do { ++$element_to_count{$_} for @$_ } for @_; return grep { $element_to_count{$_} == 1 } keys %element_to_count; } =head2 distinct_by(&@) Given a choice function and multiple set references, return a new set containing all the elements that exist in any set exactly once according to the choice function. =cut sub distinct_by(&@) { my $func = shift; my %key_to_count; for (@_) { for (@$_) { my $key = $func->($_); $key_to_count{$key} = exists $key_to_count{$key} ? undef : $_; } } return grep { defined } values %key_to_count; } =head2 intersection(@) Given multiple set references, return a new set containing all the elements that exist in all sets. intersection [1 .. 10], [6 .. 15] => 6 .. 10 =cut sub intersection(@) { my $first = shift; return unless $first && @$first; my %set; undef @set{@$first}; for (@_) { my @int = grep { exists $set{$_} } @$_; return unless @int; %set = (); undef @set{@int}; } return keys %set; } =head2 intersection_by(&@) Given a choice function and multiple set references, return a new set containing all the elements that exist in all sets according to the choice function. =cut sub intersection_by(&@) { my $func = shift; my $first = shift; return unless $first && @$first; my %set; @set{ map { $func->($_) } @$first } = @$first; for (@_) { my @int = grep { exists $set{$func->($_)} } @$_; return unless @int; %set = (); @set{ map { $func->($_) } @int } = @int; } return values %set; } =head2 symmetric_difference(@) Given multiple set references, return a new set containing all the elements that exist an odd number of times across all sets. symmetric_difference [1 .. 10], [6 .. 15], [4, 8, 12] => 1 .. 5, 8, 11 .. 15 =cut sub symmetric_difference(@) { my $count; my %element_to_count; do { ++$element_to_count{$_} for @$_ } for @_; return grep { $element_to_count{$_} % 2 } keys %element_to_count; } =head2 symmetric_difference_by(&@) Given a choice function and multiple set references, return a new set containing all the elements that exist an odd number of times across all sets according to the choice function. =cut sub symmetric_difference_by(&@) { my $func = shift; my $count; my %key_to_count; do { ++$key_to_count{$func->($_)} for @$_ } for @_; return map { grep { $count = delete $key_to_count{$func->($_)}; defined($count) && $count % 2 } @$_ } @_; } =head2 union(@) Given multiple set references, return a new set containing all the elements that exist in any set. union [1 .. 10], [6 .. 15] => 1 .. 15 =cut sub union(@) { my %set; do { undef @set{@$_} if @$_ } for @_; return keys %set; } =head2 union_by(&@) Given a choice function and multiple set references, return a new set containing all the elements that exist in any set according to the choice function. =cut sub union_by(&@) { my $func = shift; my %set; do { @set{ map { $func->($_) } @$_ } = @$_ if @$_ } for @_; return values %set; } =head1 PREDICATES =cut =head2 is_disjoint($$) Given two set references, return true if both sets contain none of the same values. is_disjoint [1 .. 5], [6 .. 10] => true is_disjoint [1 .. 6], [4 .. 10] => false =cut sub is_disjoint($$) { my @set = &intersection(@_[0,1]); return ! @set; } =head2 is_disjoint_by(&$$) Given a choice function and two sets references, return true if both sets contain none of the same values according to the choice function. =cut sub is_disjoint_by(&$$) { my @set = &intersection_by(@_[0,1,2]); return ! @set; } =head2 is_equal($$) Given two set references, return true if both sets contain all the same values. Aliased by is_equivalent. is_equal [1 .. 5], [1 .. 5] => true is_equal [1 .. 10], [6 .. 15] => false =cut sub is_equal($$) { my @set = &intersection(@_[0,1]); return @set == @{$_[0]} && @set == @{$_[1]}; } *is_equivalent = \&is_equal; =head2 is_equal_by(&$$) Given a choice function and two sets references, return true if both sets contain all the same values according to the choice function. Aliased by is_equivalent_by. =cut sub is_equal_by(&$$) { my @set = &intersection_by(@_[0,1,2]); return @set == @{$_[1]} && @set == @{$_[2]}; } *is_equivalent_by = \&is_equal_by; =head2 is_pairwise_disjoint(@) Given multiple set references, return true if every set is disjoint from every other set. is_pairwise_disjoint [1 .. 5], [6 .. 10], [11 .. 15] => true is_pairwise_disjoint [1 .. 5], [6 .. 10], [11 .. 15], [3 .. 8] => false =cut sub is_pairwise_disjoint(@) { my @sets = &disjoint(@_); do { return 0 if @{$sets[$_]} != @{$_[$_]} } for 0 .. $#sets; return 1; } =head2 is_pairwise_disjoint_by(&@) Given a choice function and multiple set references, return true if every set is disjoint from every other set according to the choice function. =cut sub is_pairwise_disjoint_by(&@) { my @sets = &disjoint_by((shift), @_); do { return 0 if @{$sets[$_]} != @{$_[$_]} } for 0 .. $#sets; return 1; } =head2 is_proper_subset($$) Given two set references, return true if the first set is fully contained by but is not equivalent to the second. is_proper_subset [1 .. 5], [1 .. 10] => true is_proper_subset [1 .. 5], [1 .. 5] => false =cut sub is_proper_subset($$) { my @set = &intersection(@_[0,1]); return @set == @{$_[0]} && @set != @{$_[1]}; } =head2 is_proper_subset_by(&$$) Given a choice function and two set references, return true if the first set is fully contained by but is not equivalent to the second according to the choice function. =cut sub is_proper_subset_by(&$$) { my @set = &intersection_by(@_[0,1,2]); return @set == @{$_[1]} && @set != @{$_[2]}; } =head2 is_proper_superset($$) Given two set references, return true if the first set fully contains but is not equivalent to the second. is_proper_superset [1 .. 10], [1 .. 5] => true is_proper_superset [1 .. 5], [1 .. 5] => false =cut sub is_proper_superset($$) { my @set = &intersection(@_[0,1]); return @set != @{$_[0]} && @set == @{$_[1]}; } =head2 is_proper_superset_by(&$$) Given a choice function and two set references, return true if the first set fully contains but is not equivalent to the second according to the choice function. =cut sub is_proper_superset_by(&$$) { my @set = &intersection_by(@_[0,1,2]); return @set != @{$_[1]} && @set == @{$_[2]}; } =head2 is_subset($$) Given two set references, return true if the first set is fully contained by the second. is_subset [1 .. 5], [1 .. 10] => true is_subset [1 .. 5], [1 .. 5] => true is_subset [1 .. 5], [2 .. 11] => false =cut sub is_subset($$) { my @set = &intersection(@_[0,1]); return @set == @{$_[0]}; } =head2 is_subset_by(&$$) Given a choice function and two set references, return true if the first set is fully contained by the second according to the choice function. =cut sub is_subset_by(&$$) { my @set = &intersection_by(@_[0,1,2]); return @set == @{$_[1]}; } =head2 is_superset($$) Given two set references, return true if the first set fully contains the second. is_superset [1 .. 10], [1 .. 5] => true is_superset [1 .. 5], [1 .. 5] => true is_subset [1 .. 5], [2 .. 11] => false =cut sub is_superset($$) { my @set = &intersection(@_[0,1]); return @set == @{$_[1]}; } =head2 is_superset_by(&$$) Given a choice function and two set references, return true if the first set fully contains the second according to the choice function. =cut sub is_superset_by(&$$) { my @set = &intersection_by(@_[0,1,2]); return @set == @{$_[2]}; } =head1 AUTHOR Aaron Cohen, C<< <aarondcohen at gmail.com> >> Special thanks to: L<Logan Bell|http://metacpan.org/author/logie> L<Thomas Whaples|https://github.com/twhaples> =head1 BUGS Please report any bugs or feature requests to C<bug-set-functional at rt.cpan.org>, or through the web interface at L<https://github.com/aarondcohen/Set-Functional/issues>. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 TODO =over 4 =item * Add SEE ALSO section =back =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Set::Functional You can also look for information at: =over 4 =item * Official GitHub Repo L<https://github.com/aarondcohen/Set-Functional> =item * GitHub's Issue Tracker (report bugs here) L<https://github.com/aarondcohen/Set-Functional/issues> =item * CPAN Ratings L<http://cpanratings.perl.org/d/Set-Functional> =item * Official CPAN Page L<http://search.cpan.org/dist/Set-Functional/> =back =head1 LICENSE AND COPYRIGHT Copyright 2011-2013 Aaron Cohen. This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published by the Free Software Foundation; or the Artistic License. See http://dev.perl.org/licenses/ for more information. =cut 1; # End of Set::Functional