// Copyright 2015, Tobias Hermann and the FunctionalPlus contributors. // https://github.com/Dobiasd/FunctionalPlus // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #pragma once #include #include #include #include #include #include #include #include namespace fplus { // API search type: group_by : (((a, a) -> Bool), [a]) -> [[a]] // fwd bind count: 1 // Arrange the elements into groups using a given predicate. // Only groups of consecutive elements are formed. // For a version scanning the whole container see group_globally_by. // group_by((==), [1,2,2,2,3,2,2,4,5,5]) == [[1],[2,2,2],[3],[2,2],[4],[5,5]] // BinaryPredicate p is a (not neccessarily transitive) connectivity check. // O(n) template > ContainerOut group_by(BinaryPredicate p, const ContainerIn& xs) { // ContainerOut is not deduced to // SameContNewType(ContainerIn, ContainerIn) // here, since ContainerIn could be a std::string. internal::check_binary_predicate_for_container(); static_assert(std::is_same::value, "Containers do not match."); ContainerOut result; if (is_empty(xs)) return result; typedef typename ContainerOut::value_type InnerContainerOut; *internal::get_back_inserter(result) = InnerContainerOut(1, xs.front()); for (auto it = ++std::begin(xs); it != std::end(xs); ++it) { if (internal::invoke(p, result.back().back(), *it)) *internal::get_back_inserter(result.back()) = *it; else *internal::get_back_inserter(result) = InnerContainerOut(1, *it); } return result; } // API search type: group_on : ((a -> b), [a]) -> [[a]] // fwd bind count: 1 // Arrange elements equal after applying a transformer into groups. // Only groups of consecutive elements are formed. // For a version scanning the whole container see group_globally_on. // group_on((mod 10), [12,22,34]) == [[12,22],[34]] // O(n) template auto group_on(F f, const ContainerIn& xs) { return group_by(is_equal_by(f), xs); } // API search type: group_on_labeled : ((a -> b), [a]) -> [(b, [a])] // fwd bind count: 1 // Arrange elements equal after applying a transformer into groups, // adding the transformation result as a label to the group. // Only groups of consecutive elements are formed. // For a version scanning the whole container see group_globally_on_labeled. // group_on_labeled((mod 10), [12,22,34]) == [(2,[12,22]), (4,[34])] // O(n) template auto group_on_labeled(F f, const ContainerIn& xs) { const auto group = [](auto f1, const auto& xs1) { return group_by(f1, xs1); }; return internal::group_on_labeled_impl(group, f, xs); } // API search type: group : [a] -> [[a]] // fwd bind count: 0 // Arrange equal elements into groups. // Only groups of consecutive elements are formed. // For a version scanning the whole container see group_globally. // group([1,2,2,2,3,2,2,4,5,5]) == [[1],[2,2,2],[3],[2,2],[4],[5,5]] // O(n) template > ContainerOut group(const ContainerIn& xs) { static_assert(std::is_same::value, "Containers do not match."); typedef typename ContainerIn::value_type T; auto pred = [](const T& x, const T& y) { return x == y; }; return group_by(pred, xs); } // API search type: group_globally_by : (((a, a) -> Bool), [a]) -> [[a]] // fwd bind count: 1 // Arrange equal elements into groups. // group_globally_by((==), [1,2,2,2,3,2,2,4,5,5]) // == [[1],[2,2,2,2,2],[3],[4],[5,5]] // BinaryPredicate p is a // transitive (whenever p(x,y) and p(y,z), then also p(x,z)) equality check. // O(n^2) // If you need O(n*log(n)), sort and then use group_by template > ContainerOut group_globally_by(BinaryPredicate p, const ContainerIn& xs) { internal::check_binary_predicate_for_container(); static_assert(std::is_same::value, "Containers do not match."); typedef typename ContainerOut::value_type InnerContainerOut; ContainerOut result; for (const auto& x : xs) { bool found = false; for (auto& ys : result) { if (internal::invoke(p, x, ys.back())) { *internal::get_back_inserter(ys) = x; found = true; break; } } if (!found) { *internal::get_back_inserter(result) = InnerContainerOut(1, x); } } return result; } // API search type: group_globally_on : ((a -> b), [a]) -> [[a]] // fwd bind count: 1 // Arrange elements equal after applying a transformer into groups. // group_globally_on((mod 10), [12,34,22]) == [[12,22],[34]] // O(n^2) // If you need O(n*log(n)), sort and then use group_on template auto group_globally_on(F f, const ContainerIn& xs) { return group_globally_by(is_equal_by(f), xs); } // API search type: group_globally_on_labeled : ((a -> b), [a]) -> [(b, [a])] // fwd bind count: 1 // Arrange elements equal after applying a transformer into groups, // adding the transformation result as a label to the group. // group_globally_on_labeled((mod 10), [12,34,22]) == [(2,[12,22]),(4, [34])] // O(n^2) // If you need O(n*log(n)), sort and then use group_on_labeled template auto group_globally_on_labeled(F f, const ContainerIn& xs) { const auto group = [](auto f1, const auto& xs1) { return group_globally_by(f1, xs1); }; return internal::group_on_labeled_impl(group, f, xs); } // API search type: group_globally : [a] -> [[a]] // fwd bind count: 0 // Arrange equal elements into groups. // group_globally([1,2,2,2,3,2,2,4,5,5]) == [[1],[2,2,2,2,2],[3],[4],[5,5]] // O(n^2) // If you need O(n*log(n)), sort and then use group template > ContainerOut group_globally(const ContainerIn& xs) { static_assert(std::is_same::value, "Containers do not match."); typedef typename ContainerIn::value_type T; auto pred = [](const T& x, const T& y) { return x == y; }; return group_globally_by(pred, xs); } // API search type: cluster_by : (((a, a) -> Bool), [a]) -> [[a]] // fwd bind count: 1 // Groups connected components, stable regarding initial order. // cluster_by(\x y -> abs (y - x) <= 3), [2,3,6,4,12,11,20,23,8,4]) // == [[2,3,6,4,12,11,8,4],[20,23]] // BinaryPredicate p is a connectivity check, being // a) commutative (p(x,y) = p(y,x)) // b) reflexive (p(x,x) = true) // c) not neccessarily transitive, but can be // O(n^2), memory complexity also O(n^2) template > ContainerOut cluster_by(BinaryPredicate p, const ContainerIn& xs) { internal::check_binary_predicate_for_container(); static_assert(std::is_same::value, "Containers do not match."); typedef std::vector bools; bools zero_filled_row(size_of_cont(xs), 0); // adjecency matrix typedef std::vector boolss; boolss adj_mat(size_of_cont(xs), zero_filled_row); for (const auto& idx_and_val_y : enumerate(xs)) { auto idx_y = idx_and_val_y.first; auto val_y = idx_and_val_y.second; for (const auto& idx_and_val_x : enumerate(xs)) { auto idx_x = idx_and_val_x.first; auto val_x = idx_and_val_x.second; if (internal::invoke(p, val_y, val_x)) { adj_mat[idx_y][idx_x] = 1; } } } bools already_used = zero_filled_row; auto is_already_used = [&](std::size_t i) -> bool { return already_used[i] != 0; }; typedef std::vector idxs; typedef std::vector idxss; auto bools_to_idxs = [](const bools& activations) -> idxs { auto unsigned_char_to_bool = [](unsigned char x) { return x != 0; }; return find_all_idxs_by(unsigned_char_to_bool, activations); }; idxss idx_clusters; std::function process_idx = [&](std::size_t idx) -> void { auto connected_idxs = bools_to_idxs(adj_mat[idx]); auto new_connected_idxs = drop_if(is_already_used, connected_idxs); if (is_empty(new_connected_idxs)) { return; } idx_clusters.back() = append(idx_clusters.back(), new_connected_idxs); for (const auto& new_idx : new_connected_idxs) { already_used[new_idx] = 1; } for (const auto& new_idx : new_connected_idxs) { process_idx(new_idx); } }; typedef typename ContainerOut::value_type InnerContainerOut; for (const auto& idx : all_idxs(xs)) { if (is_already_used(idx)) { continue; } *internal::get_back_inserter(idx_clusters) = idxs(); *internal::get_back_inserter(idx_clusters.back()) = idx; already_used[idx] = 1; process_idx(idx); } typedef typename ContainerIn::value_type T; auto idx_to_val = [&](std::size_t idx) -> T { return elem_at_idx(idx, xs); }; auto idxs_to_vals = [&](const idxs& val_idxs) -> InnerContainerOut { return transform_convert(idx_to_val, sort(val_idxs)); }; return transform_convert(idxs_to_vals, idx_clusters); } // API search type: split_by : ((a -> Bool), Bool, [a]) -> [[a]] // fwd bind count: 2 // Split a sequence at every element fulfilling a predicate. // The splitting elements are discarded. // split_by(is_even, true, [1,3,2,2,5,5,3,6,7,9]) == [[1,3],[],[5,5,3],[7,9]] // also known as split_when // O(n) template > ContainerOut split_by (UnaryPredicate pred, bool allow_empty, const ContainerIn& xs) { internal::check_unary_predicate_for_container(); static_assert(std::is_same::value, "Containers do not match."); if (allow_empty && is_empty(xs)) { return {{}}; } ContainerOut result; auto itOut = internal::get_back_inserter(result); auto start = std::begin(xs); while (start != std::end(xs)) { const auto stop = std::find_if(start, std::end(xs), pred); if (start != stop || allow_empty) { *itOut = { start, stop }; } if (stop == std::end(xs)) { break; } start = internal::add_to_iterator(stop); if (allow_empty && start == std::end(xs)) { *itOut = {}; } } return result; } // API search type: split_by_keep_separators : ((a -> Bool), [a]) -> [[a]] // fwd bind count: 1 // Split a sequence at every element fulfilling a predicate. // The splitting elements are kept. // split_by_keep_separators(is_even, true, [1,3,2,2,5,5,3,6,7,9]) // == [[1,3],[2],[2,5,5,3],[6,7,9]] // O(n) template > ContainerOut split_by_keep_separators (UnaryPredicate pred, const ContainerIn& xs) { internal::check_unary_predicate_for_container(); static_assert(std::is_same::value, "Containers do not match."); ContainerOut result; if (is_empty(xs)) return result; auto itOut = internal::get_back_inserter(result); auto start = std::begin(xs); while (start != std::end(xs)) { const auto stop = std::find_if( internal::add_to_iterator(start), std::end(xs), pred); *itOut = { start, stop }; if (stop == std::end(xs)) { break; } start = stop; } return result; } // API search type: split : (a, Bool, [a]) -> [[a]] // fwd bind count: 2 // Split a sequence at every element equal to x. // The splitting elements are discarded. // split(0, true, [1,3,2,0,0,6,0,7,5]) == [[1,3,2],[],[6],[7,5]] // O(n) template auto split(const T& x, bool allow_empty, const ContainerIn& xs) { return split_by(is_equal_to(x), allow_empty, xs); } // API search type: split_one_of : ([a], Bool, [a]) -> [[a]] // fwd bind count: 2 // Split a sequence at every element present in delimiters. // The splitting elements are discarded. // Also known as split_words_by_many. // split_one_of([0,3], true [1,3,2,0,0,6,0,7,5]) == [[1],[2],[],[6],[7,5]] // split_one_of(" o", false, "How are u?") == ["H","w","are","u?"] // O(n) template auto split_one_of( const ContainerDelims delimiters, bool allow_empty, const ContainerIn& xs) { const auto pred = [&](const typename ContainerIn::value_type& x) -> bool { return is_elem_of(x, delimiters); }; return split_by(pred, allow_empty, xs); } // API search type: split_keep_separators : ((a -> Bool), [a]) -> [[a]] // fwd bind count: 1 // Split a sequence at every element equal to x. // The splitting elements are kept. // split_keep_separators(2, true, [1,3,2,2,5,5,3,2,7,9]) // == [[1,3],[2],[2,5,5,3],[6,7,9]] // O(n) template auto split_keep_separators(const T& x, const ContainerIn& xs) { return split_by_keep_separators(is_equal_to(x), xs); } // API search type: split_at_idx : (Int, [a]) -> ([a], [a]) // fwd bind count: 1 // Split a sequence at a specific position. // split_at_idx(2, [0,1,2,3,4]) == ([0,1],[2,3,4]) template std::pair split_at_idx (std::size_t idx, const Container& xs) { assert(idx <= size_of_cont(xs)); return make_pair(get_segment(0, idx, xs), get_segment(idx, size_of_cont(xs), xs)); } // API search type: insert_at_idx : (Int, a, [a]) -> [a] // fwd bind count: 2 // Insert an element into a sequence at a specific position. // insert_at_idx(2, 0, [1,2,3,4]) == [1,2,0,3,4]. template Container insert_at_idx(std::size_t idx, const T& x, const Container& xs) { const auto splitted = split_at_idx(idx, xs); return concat(std::vector( { splitted.first, singleton_seq(x), splitted.second })); } // API search type: partition : ((a -> Bool), [a]) -> ([a], [a]) // fwd bind count: 1 // Split a sequence into two groups. // The first group contains all elements fulfilling the predicate. // The second group contains the remaining elements. // partition(is_even, [0,1,1,3,7,2,3,4]) == ([0,2,4],[1,1,3,7,3]) template std::pair partition (UnaryPredicate pred, const Container& xs) { internal::check_unary_predicate_for_container(); Container matching; Container notMatching; auto itOutMatching = internal::get_back_inserter(matching); auto itOutNotMatching = internal::get_back_inserter(notMatching); for (const auto& x : xs) { if (internal::invoke(pred, x)) *itOutMatching = x; else *itOutNotMatching = x; } return make_pair(matching, notMatching); } // API search type: split_at_idxs : ([Int], [a]) -> [[a]] // fwd bind count: 1 // Split a sequence at specific indices. // split_at_idxs([2,5], [0,1,2,3,4,5,6,7]) == [[0,1],[2,3,4],[5,6,7]] // split_at_idxs([2,5,5], [0,1,2,3,4,5,6,7]) == [[0,1],[2,3,4],[],[5,6,7]] template > ContainerOut split_at_idxs(const ContainerIdxs& idxsIn, const ContainerIn& xs) { static_assert(std::is_same::value, "Indices must be std::size_t"); static_assert(std::is_same::value, "Containers do not match."); ContainerIdxs idxStartC = {0}; ContainerIdxs idxEndC = {size_of_cont(xs)}; std::vector containerIdxss = {idxStartC, idxsIn, idxEndC}; auto idxs = concat(containerIdxss); auto idxsClean = sort(idxs); ContainerOut result; internal::prepare_container(result, size_of_cont(idxsClean) - 1); auto itOut = internal::get_back_inserter(result); auto idxPairs = overlapping_pairs(idxsClean); for (const auto& idxPair : idxPairs) { *itOut = get_segment(idxPair.first, idxPair.second, xs); } return result; } // API search type: split_every : (Int, [a]) -> [[a]] // fwd bind count: 1 // Split a sequence every n elements. // split_every(3, [0,1,2,3,4,5,6,7]) == [[0,1,2],[3,4,5],[6,7]] // Also known as chunk or chunks. template > ContainerOut split_every(std::size_t n, const ContainerIn& xs) { return split_at_idxs< std::vector, ContainerIn, ContainerOut>( numbers_step( n, size_of_cont(xs), n), xs); } // API search type: split_by_token : ([a], Bool, [a]) -> [[a]] // fwd bind count: 2 // Split a sequence at every segment matching a token. // split_by_token(", ", true, "foo, bar, baz") == ["foo", "bar", "baz"] template > ContainerOut split_by_token(const ContainerIn& token, bool allow_empty, const ContainerIn& xs) { static_assert(std::is_same::value, "Containers do not match."); const auto token_begins = find_all_instances_of_token_non_overlapping(token, xs); const auto token_ends = transform(add_to(size_of_cont(token)), token_begins); assert(is_sorted(interweave(token_begins, token_ends))); typedef std::vector idx_vec; const auto segments = zip( fplus::append(idx_vec(1, 0), token_ends), fplus::append(token_begins, idx_vec(1, size_of_cont(xs)))); ContainerOut result; auto itOut = internal::get_back_inserter(result); for (const auto& segment : segments) { if (segment.first != segment.second || allow_empty) *itOut = get_segment(segment.first, segment.second, xs); } return result; } // API search type: run_length_encode_by : (((a, a) -> Bool), [a]) -> [(Int, a)] // fwd bind count: 1 // RLE using a specific binary predicate as equality check. // run_length_encode_by((==),[1,2,2,2,2,3,3,2)) == [(1,1),(4,2),(2,3),(1,2)] template >> ContainerOut run_length_encode_by(BinaryPredicate pred, const ContainerIn& xs) { internal::check_binary_predicate_for_container(); ContainerOut result; auto groups = group_by(pred, xs); auto group_to_pair = [](const ContainerIn& group) -> std::pair { return std::make_pair(size_of_cont(group), group.front()); }; return transform(group_to_pair, groups); } // API search type: run_length_encode : [a] -> [(Int, a)] // fwd bind count: 0 // RLE. // run_length_encode([1,2,2,2,2,3,3,2)) == [(1,1),(4,2),(2,3),(1,2)] template auto run_length_encode(const ContainerIn& xs) { return run_length_encode_by(is_equal, xs); } // API search type: run_length_decode : [(Int, a)] -> [a] // fwd bind count: 0 // Inverse operation to run_length_encode. // run_length_decode([(1,1),(4,2),(2,3),(1,2)]) == [1,2,2,2,2,3,3,2) template auto run_length_decode(const ContainerIn& pairs) { static_assert(std::is_convertible::value, "Count type must be convertible to std::size_t."); const auto pair_to_vec = [](const Pair& p) { return replicate(p.first, p.second); }; return concat(transform(pair_to_vec, pairs)); } // API search type: span : ((a -> Bool), [a]) -> ([a], [a]) // fwd bind count: 1 // span, applied to a predicate p and a list xs, // returns a tuple where first element is longest prefix (possibly empty) // of xs of elements that satisfy p // and second element is the remainder of the list. // span(is_even, [0,2,4,5,6,7,8]) == ([0,2,4], [5,6,7,8]) template std::pair span(UnaryPredicate pred, const Container& xs) { auto maybeIdx = find_first_idx_by(logical_not(pred), xs); return { take(just_with_default(size_of_cont(xs), maybeIdx), xs), drop(just_with_default(size_of_cont(xs), maybeIdx), xs) }; } // API search type: divvy : (Int, Int, [a]) -> [[a]] // fwd bind count: 2 // Generates subsequences overlapping with a specific step. // divvy(5, 2, [0,1,2,3,4,5,6,7,8,9]) == [[0,1,2,3,4],[2,3,4,5,6],[4,5,6,7,8]] // divvy(length, 1, xs) is also known as aperture // divvy(1, step, xs) is also known as stride // (but withouts the nested lists in the result) template > ContainerOut divvy(std::size_t length, std::size_t step, const ContainerIn& xs) { assert(length > 0); assert(step > 0); const auto start_idxs = numbers_step( 0, size_of_cont(xs) - (length - 1), step); ContainerOut result; internal::prepare_container(result, size_of_cont(start_idxs)); auto itOut = internal::get_back_inserter(result); for (const auto start_idx : start_idxs) { *itOut = get_segment(start_idx, start_idx + length, xs); } return result; } // API search type: aperture : (Int, [a]) -> [[a]] // fwd bind count: 1 // Generates overlapping subsequences. // aperture(5, [0,1,2,3,4,5,6]) == [[0,1,2,3,4],[1,2,3,4,5],[2,3,4,5,6]] template > ContainerOut aperture(std::size_t length, const ContainerIn& xs) { assert(length > 0); const auto start_idxs = numbers( 0, size_of_cont(xs) - (length - 1)); ContainerOut result; internal::prepare_container(result, size_of_cont(start_idxs)); auto itOut = internal::get_back_inserter(result); for (const auto start_idx : start_idxs) { *itOut = get_segment(start_idx, start_idx + length, xs); } return result; } // API search type: stride : (Int, [a]) -> [a] // fwd bind count: 1 // Keeps every nth element. // stride(3, [0,1,2,3,4,5,6,7]) == [0,3,6] template Container stride(std::size_t step, const Container& xs) { assert(step > 0); Container ys; auto it = internal::get_back_inserter(ys); auto it_in = std::begin(xs); std::size_t i = 0; const auto xs_size = size_of_cont(xs); while(it_in != std::end(xs)) { *it = *it_in; std::size_t increment = std::min(step, xs_size - i); internal::advance_iterator(it_in, increment); i += increment; } return ys; } // API search type: winsorize : (Float, [Float]) -> [Float] // fwd bind count: 1 // Winsorizing // winsorize(0.1, [1,3,4,4,4,4,4,4,6,8]) == [3,3,4,4,4,4,4,4,6,6] template Container winsorize(double trim_ratio, const Container& xs) { if (size_of_cont(xs) < 2) { return xs; } trim_ratio = std::max(trim_ratio, 0.0); const auto xs_sorted = sort(xs); std::size_t amount = floor( trim_ratio * static_cast(size_of_cont(xs_sorted))); amount = std::min(size_of_cont(xs_sorted) / 2, amount); const auto parts = split_at_idxs( std::vector({amount, size_of_cont(xs_sorted) - amount}), xs_sorted); assert(size_of_cont(parts) == 3); typedef typename Container::value_type T; if (is_empty(parts[1])) { return Container(size_of_cont(xs_sorted), median(xs_sorted)); } else { const T lower = parts[1].front(); const T upper = parts[1].back(); const auto result = concat(std::vector({ Container(amount, lower), parts[1], Container(amount, upper)})); assert(size_of_cont(result) == size_of_cont(xs_sorted)); return result; } } // API search type: separate_on : ((a -> b), [a]) -> [[a]] // fwd bind count: 1 // Separate elements equal after applying a transformer into groups. // separate_on((mod 10), [12,22,34]) == [[12,34],[22]] template > ContainerOut separate_on(F f, const ContainerIn& xs) { static_assert(std::is_same::value, "Containers do not match."); ContainerOut result; if (is_empty(xs)) { return result; } const auto groups = group_globally_on(f, xs); bool found = true; auto itOut = internal::get_back_inserter(result); std::size_t index = 0; while (found) { typename ContainerOut::value_type sub_result; found = false; auto itOutInner = internal::get_back_inserter(sub_result); for (auto& group: groups) { if (size_of_cont(group) > index) { *itOutInner = group[index]; found = true; } } if (found) { *itOut = sub_result; ++index; } } return result; } // API search type: separate : [a] -> [[a]] // fwd bind count: 0 // Separate equal elements into groups. // separate([1, 2, 2, 3, 3, 4, 4, 4]) == [[1, 2, 3, 4], [2, 3, 4], [4]] template > ContainerOut separate(const ContainerIn& xs) { static_assert(std::is_same::value, "Containers do not match."); typedef typename ContainerIn::value_type T; return separate_on(identity, xs); } } // namespace fplus