合并两个有序序列

作者: csdn-whinah 发表日期: 2011年07月16日分类: 算法评论: 0 条阅读次数: 3,707 次

经典写法

合并两个有序序列，太简单了吧？还有专门讨论的必要吗？

这是一个最简单的 Merge 版本：

template<class ForwardIter1, class ForwardIter2, class OutputIter>
void merge(ForwardIter1 first1, ForwardIter1 last1,
           ForwardIter2 first2, ForwardIter2 last2, OutputIter output) {
    for (; first1 != last1 && first2 != last2; ++output) {
        if (*first2 < *first1)
            *output = *first2, ++first2;
        else
            *output = *first1, ++first1;
    }
    for (; first1 != last1; ++first1, ++output)
        *output = *first1;
    for (; first2 != last2; ++first2, ++output)
        *output = *first2;
}

template<class ForwardIter1, class ForwardIter2, class OutputIter>

void merge(ForwardIter1 first1, ForwardIter1 last1,

ForwardIter2 first2, ForwardIter2 last2, OutputIter output) {

for (; first1 != last1 && first2 != last2; ++output) {

if (*first2 < *first1)

*output = *first2, ++first2;

else

*output = *first1, ++first1;

}

for (; first1 != last1; ++first1, ++output)

*output = *first1;

for (; first2 != last2; ++first2, ++output)

*output = *first2;

}

看似简单？为什么循环里面判断是：

        if (*first2 < *first1)
            *output = *first2, ++first2;
        else
            *output = *first1, ++first1;

if (*first2 < *first1)

*output = *first2, ++first2;

else

*output = *first1, ++first1;

而不是：

        if (*first1 < *first2)
            *output = *first1, ++first1;
        else
            *output = *first2, ++first2;

if (*first1 < *first2)

*output = *first1, ++first1;

else

*output = *first2, ++first2;

答案：排序的稳定性，有序序列可能存在相等的元素，对于相等的元素，在 output 中，前者可以保证 seq1 的实例在 seq2 中的之前，而后者反了过来，虽说仍然是确定的，但不符合人的直觉。

为什么要两个不同类的 ForwardIter ？只是付出了很小的努力，我们就达到了 merge 不同 iterator 的效用（注意，是效用，不是目标！），从理论上讲，ForwardIter1, ForwardIter2, OutputIter 完全可以是不同的 iterator，并且，他们所指向的元素类型也可以不同，只要在相应的操作符下兼容即可。

应用

如此，最正常的应用莫过于于 merge_sort 了：

template<class RandInputIter, class OutputIter>
OutputIter
merge_sort_loop(RandInputIter first, RandInputIter last, OutputIter output) {
    ptrdiff_t len = last - first;
    if (len <= 1) {
        if (len == 1)
            *output = *first, ++output;
        return output;
    }
    else {
        ptrdiff_t mid = len / 2;
        OutputIter lasto1 = merge_sort_loop(first      , first + mid, output);
        OutputIter lasto2 = merge_sort_loop(first + mid, last       , lasto1);
        merge(output, lasto1, lasto1, lasto2, first);
        std::copy(first, last, output);
        return lasto2;
    }
}

template<class RandIter>
void merge_sort(RandIter first, RandIter last) {
    typedef typename std::iterator_traits<RandIter>::value_type val_t;
    boost::scoped_array<val_t> tmp(new val_t[last - first]); // exception safe
    merge_sort_loop(first, last, tmp.get());
}

template<class RandInputIter, class OutputIter>

OutputIter

merge_sort_loop(RandInputIter first, RandInputIter last, OutputIter output) {

ptrdiff_t len = last - first;

if (len <= 1) {

if (len == 1)

*output = *first, ++output;

return output;

}

else {

ptrdiff_t mid = len / 2;

OutputIter lasto1 = merge_sort_loop(first , first + mid, output);

OutputIter lasto2 = merge_sort_loop(first + mid, last , lasto1);

merge(output, lasto1, lasto1, lasto2, first);

std::copy(first, last, output);

return lasto2;

}

template<class RandIter>

void merge_sort(RandIter first, RandIter last) {

typedef typename std::iterator_traits<RandIter>::value_type val_t;

boost::scoped_array<val_t> tmp(new val_t[last - first]); // exception safe

merge_sort_loop(first, last, tmp.get());

}

在 merge_sort_loop 中，output 不必是 RandomAccessIterator ！（stl 里面有 stable_sort/inplace_merge，效率和内存使用比这里的 merge_sort 都要低，生产系统中还是应该用 stl 。）

这里说的只是经典的 merge 写法，有没有其它写法呢？只要仔细想，肯定有！

另一种写法

template<class ForwardIter1, class ForwardIter2, class OutputIter>
OutputIter
merge2(ForwardIter1 first1, ForwardIter1 last1,
       ForwardIter2 first2, ForwardIter2 last2, OutputIter output) {
    for (; first1 != last1 && first2 != last2; ++output) {
        if (*first2 < *first1) {
            do { // why use do .. while ?
                *output = *first2;
                 ++output; ++first2;
            } while (first2 != last2 && *first2 < *first1);
            *output = *first1; ++first1;
        } else {
            do { // why use do .. while ?
                *output = *first1;
                 ++output; ++first1;
            } while (first1 != last1 && !(*first2 < *first1)); // Important
            *output = *first2, ++first2;
        }
    }
    for (; first1 != last1; ++first1, ++output)
        *output = *first1;
    for (; first2 != last2; ++first2, ++output)
        *output = *first2;
    return output;
}

template<class ForwardIter1, class ForwardIter2, class OutputIter>

OutputIter

merge2(ForwardIter1 first1, ForwardIter1 last1,

ForwardIter2 first2, ForwardIter2 last2, OutputIter output) {

for (; first1 != last1 && first2 != last2; ++output) {

if (*first2 < *first1) {

do { // why use do .. while ?

*output = *first2;

++output; ++first2;

} while (first2 != last2 && *first2 < *first1);

*output = *first1; ++first1;

} else {

do { // why use do .. while ?

*output = *first1;

++output; ++first1;

} while (first1 != last1 && !(*first2 < *first1)); // Important

*output = *first2, ++first2;

}

for (; first1 != last1; ++first1, ++output)

*output = *first1;

for (; first2 != last2; ++first2, ++output)

*output = *first2;

return output;

}

注意，为了实现稳定性，Important 行的条件是：

while (first1 != last1 && !(*first2 < *first1))

1	while (first1 != last1 && !(first2 < first1))

而非

while (first1 != last1 && *first1 < *first2)

1	while (first1 != last1 && first1 < first2)

这种写法有什么优势呢？搞那么复杂？

这里有一道题目：原地合并两个单向链表，返回新的表头。

简单生硬地套用经典写法：

Node* mergelist(Node* a, Node* b) {
    if (NULL == a) return b;
    if (NULL == b) return a;
    Node* c = b->key < a->key ? b : a; // note:stablize
    do { // why use do .. while ?
        if (b->key < a->key) { // note:stablize
            Node* tmp = b->next;
            b->next = a;
            b = tmp;
        } else {
            Node* tmp = a->next;
            a->next = b;
            a = tmp;
        }
    } while (a && b);
    return c;
}

Node* mergelist(Node* a, Node* b) {

if (NULL == a) return b;

if (NULL == b) return a;

Node* c = b->key < a->key ? b : a; // note:stablize

do { // why use do .. while ?

if (b->key < a->key) { // note:stablize

Node* tmp = b->next;

b->next = a;

b = tmp;

} else {

Node* tmp = a->next;

a->next = b;

a = tmp;

}

} while (a && b);

return c;

}

有问题吗？具体啥问题，慢慢找，不难找！

套用新写法：

Node* mergelist2(Node* a, Node* b) {
    if (NULL == a) return b;
    if (NULL == b) return a;
    Node* c = b->key < a->key ? b : a; // note:stablize
    do { // why use do .. while ?
        Node* prev;
        if (b->key < a->key) { // note:stablize
            do {        
                prev = b;       
                b = b->next;    
            } while (b && b->key < a->key); // stablize 
            prev->next = a;
        } else {
            do {        
                prev = a;       
                a = a->next;    
            } while (a && !(b->key < a->key)); // stablize 
            prev->next = b;
        }       
    } while (a && b); 
    return c;
}

Node* mergelist2(Node* a, Node* b) {

if (NULL == a) return b;

if (NULL == b) return a;

Node* c = b->key < a->key ? b : a; // note:stablize

do { // why use do .. while ?

Node* prev;

if (b->key < a->key) { // note:stablize

do {

prev = b;

b = b->next;

} while (b && b->key < a->key); // stablize

prev->next = a;

} else {

do {

prev = a;

a = a->next;

} while (a && !(b->key < a->key)); // stablize

prev->next = b;

}

} while (a && b);

return c;

}

其它（可能的困惑）

!(y<x)

可以看到，程序中很多地方使用了 ! (y < x) 的写法，而非 x <= y，为什么？

一方面，stl 对 Comparable 的最小定义是 operator<，一个类型，如果定义了 operator<，那么它可以在直接使用所有stl的排序/堆/查找算法，还有 set/map, multiset/multimap 容器。但是 std::find 等在非排序序列上操作的算法不算。

虽然，直观看，!(y < x) 和 x <= y 等价，然而，在数学定义上，两者并不等价，我们可以举出一些反例，但这些反例多多少看上去并不太“自然”。我正在努力寻找“很自然”的反例。

boost 提供了一系列模板类来从 operator< 推导其它操作符： ==, !=, <=, >, >=，原理上都很简单。

do {…} while

一般情况下， while (expr) {… } 和 do {…} while (expr) 等价，如果循环之前 expr 为 true 的话，在这种情况下，我们应该用 do {… } while，主要原因有两点：

少了一次对 expr 的求值
减少了一次非条件（绝对）跳转，和至少一次条件跳转（参考编译原理关于代码生成的章节，或者直接看编译器生成的汇编码）

当然，如果循环执行的次数很多的话，这个效率的损失微乎其微，但是，如果循环的执行次数很少，就像这本文例子中的，内循环的平均执行次数少于2次，这个多余的开销就相当可观了！