@@ -3303,10 +3303,10 @@ inline constexpr auto __set_algo_cut_off = 1000;
33033303template <class _IsVector , class _ExecutionPolicy , class _RandomAccessIterator1 , class _RandomAccessIterator2 ,
33043304 class _OutputIterator , class _SizeFunction , class _SetOP , class _Compare , class _Proj1 , class _Proj2 >
33053305_OutputIterator
3306- __parallel_set_op (__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
3307- _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
3308- _OutputIterator __result, _SizeFunction __size_func, _SetOP __set_op, _Compare __comp, _Proj1 __proj1 ,
3309- _Proj2 __proj2)
3306+ __parallel_set_op_impl (__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
3307+ _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
3308+ _OutputIterator __result, _SizeFunction __size_func, _SetOP __set_op, _Compare __comp,
3309+ _Proj1 __proj1, _Proj2 __proj2)
33103310{
33113311 using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag;
33123312
@@ -3399,6 +3399,38 @@ __parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA
33993399 });
34003400}
34013401
3402+ // Thin wrapper over __parallel_set_op_impl that always partitions the larger range.
3403+ // When range2 is larger, it swaps ranges and wraps __set_op / __size_func / projections
3404+ // so that the leaf operation still sees the caller's original range order. This is
3405+ // important to satisfy semantic requirements to use elements from the first sequence in
3406+ // the output when elements are equivalent.
3407+ template <class _IsVector , class _ExecutionPolicy , class _RandomAccessIterator1 , class _RandomAccessIterator2 ,
3408+ class _OutputIterator , class _SizeFunction , class _SetOP , class _Compare , class _Proj1 , class _Proj2 >
3409+ _OutputIterator
3410+ __parallel_set_op (__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
3411+ _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
3412+ _OutputIterator __result, _SizeFunction __size_func, _SetOP __set_op, _Compare __comp, _Proj1 __proj1,
3413+ _Proj2 __proj2)
3414+ {
3415+ const auto __n1 = __last1 - __first1;
3416+ const auto __n2 = __last2 - __first2;
3417+
3418+ if (__n1 >= __n2)
3419+ {
3420+ return __parallel_set_op_impl (__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2,
3421+ __last2, __result, __size_func, __set_op, __comp, __proj1, __proj2);
3422+ }
3423+ // Partition the larger range2, wrapping callbacks to preserve
3424+ // the caller's original range order for the leaf operation.
3425+ return __parallel_set_op_impl (
3426+ __tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __first1, __last1, __result,
3427+ [&__size_func](auto __n, auto __m) { return __size_func (__m, __n); },
3428+ [&__set_op](auto __f2, auto __l2, auto __f1, auto __l1, auto * __res, auto __comp, auto __p2, auto __p1) {
3429+ return __set_op (__f1, __l1, __f2, __l2, __res, __comp, __p1, __p2);
3430+ },
3431+ __comp, __proj2, __proj1);
3432+ }
3433+
34023434// a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference'
34033435template <class _IsVector , class _ExecutionPolicy , class _RandomAccessIterator1 , class _RandomAccessIterator2 ,
34043436 class _OutputIterator , class _SetUnionOp , class _Compare , class _Proj1 , class _Proj2 >
@@ -3681,41 +3713,18 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _
36813713 const _DifferenceType __total_work = __n1 + __n2;
36823714 if (__total_work > __set_algo_cut_off)
36833715 {
3684- return __internal::__except_handler ([&]() {
3685- // Decide which range to partition based on size
3686- if (__n1 >= __n2)
3687- {
3688- return __internal::__parallel_set_op (
3689- __tag, std::forward<_ExecutionPolicy>(__exec), __begin1, __last1, __begin2, __last2, __result,
3690- [](_DifferenceType __n, _DifferenceType __m) { return std::min (__n, __m); },
3691- [](_RandomAccessIterator1 __lmda_first1, _RandomAccessIterator1 __lmda_last1,
3692- _RandomAccessIterator2 __lmda_first2, _RandomAccessIterator2 __lmda_last2, _T* __result,
3693- _Compare __comp, oneapi::dpl::identity, oneapi::dpl::identity) {
3694- return oneapi::dpl::__utils::__set_intersection_construct (
3695- __lmda_first1, __lmda_last1, __lmda_first2, __lmda_last2, __result,
3696- oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, __comp,
3697- oneapi::dpl::identity{}, oneapi::dpl::identity{});
3698- },
3699- __comp, oneapi::dpl::identity{}, oneapi::dpl::identity{});
3700- }
3701- else
3702- {
3703- return __internal::__parallel_set_op (
3704- __tag, std::forward<_ExecutionPolicy>(__exec), __begin2, __last2, __begin1, __last1, __result,
3705- [](_DifferenceType __n, _DifferenceType __m) { return std::min (__n, __m); },
3706- [](_RandomAccessIterator2 __lmda_first2, _RandomAccessIterator2 __lmda_last2,
3707- _RandomAccessIterator1 __lmda_first1, _RandomAccessIterator1 __lmda_last1, _T* __result,
3708- _Compare __comp, oneapi::dpl::identity, oneapi::dpl::identity) {
3709- // Lambda params: __lmda_first1 = iter of range2, __lmda_first2 = iter of range1
3710- // Swap to pass logical range1 first for semantic correctness (must copy from first range)
3711- return oneapi::dpl::__utils::__set_intersection_construct (
3712- __lmda_first1, __lmda_last1, __lmda_first2, __lmda_last2, __result,
3713- oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, __comp,
3714- oneapi::dpl::identity{}, oneapi::dpl::identity{});
3715- },
3716- __comp, oneapi::dpl::identity{}, oneapi::dpl::identity{});
3717- }
3718- });
3716+ return __internal::__parallel_set_op (
3717+ __tag, std::forward<_ExecutionPolicy>(__exec), __begin1, __last1, __begin2, __last2, __result,
3718+ [](_DifferenceType __n, _DifferenceType __m) { return std::min (__n, __m); },
3719+ [](_RandomAccessIterator1 __lmda_first1, _RandomAccessIterator1 __lmda_last1,
3720+ _RandomAccessIterator2 __lmda_first2, _RandomAccessIterator2 __lmda_last2, _T* __result, _Compare __comp,
3721+ oneapi::dpl::identity, oneapi::dpl::identity) {
3722+ return oneapi::dpl::__utils::__set_intersection_construct (
3723+ __lmda_first1, __lmda_last1, __lmda_first2, __lmda_last2, __result,
3724+ oneapi::dpl::__internal::__op_uninitialized_copy<_ExecutionPolicy>{}, __comp,
3725+ oneapi::dpl::identity{}, oneapi::dpl::identity{});
3726+ },
3727+ __comp, oneapi::dpl::identity{}, oneapi::dpl::identity{});
37193728 }
37203729
37213730 // Work too small for parallelization - use serial algorithm
0 commit comments