数据结构——最大索引堆(C++和Java实现)

在上一篇博客中，记录了优先队列——堆这个数据结构的实现，并且关于堆的性质我也在上文中介绍过，堆能用来进行排序，堆排序具有快速(复杂度O(NlogN))，稳定的特点，尤其是非常稳定，因此适用于某些需要排序稳定性的场合。

但是呢，普通的二叉堆有两个缺陷：

在对的元素体积非常大的情况下，经常性的移动元素是低效的。
如果在堆的使用过程中，堆中的元素的值要改变，则普通堆对此无能为力，简单的说，如果一个元素如果进入堆之后，它的值就不能改变了，否则会影响堆的性质。

第一个缺陷还能用类似于指针排序的技术解决，但是第二个缺陷不采用特殊的技术是没有办法解决的，然而在一些场合，堆中元素的值确实需要改变。于是乎，索引堆这个数据结构就在这里应运而生了。

所谓索引堆，简单的说，就是在堆里头存放的不是数据，而是数据所在的数组的索引，也就是下标，根据数据的某种优先级来调整各个元素对应的下标在堆中的位置。本质上来说，索引堆也是堆，提供堆的接口。

那么接下来，我们就来尝试用C++和Java两种语言来实现索引堆，注释在代码中写的比较详细。

C++版如下:

#include <iostream>

using namespace std;

template<typename Item>
class IndexMaxHeap {

private:
    Item *data;
    int *indexes;
    int *reverse;
    int count;
    int capacity;

    void shiftUp( int k ) {
        while (k > 1 && data[ indexes[k/2] ] < data[ indexes[k] ]) {
            swap( indexes[k/2], indexes[k] );
            reverse[indexes[k / 2]] = k / 2;
            reverse[indexes[k]] = k;
            k /= 2;
        }
    }

    void shiftDown( int k ) {
        while (2*k <= count) {
            int j = 2 * k; // 在此轮循环中,data[k]和data[j]交换位置
            if (j + 1 <= count && data[ indexes[j] ] < data[ indexes[j+1] ])
                // data[j] 是 data[2*k]和data[2*k+1]中的最大值
                j += 1;
            if (data[ indexes[k] ] > data[ indexes[j] ])
                break;
            swap( indexes[k], indexes[j] );
            reverse[ indexes[k] ] = k;
            reverse[ indexes[j] ] = j;
            k = j;
        }
    }

public:
    // 构造函数, 构造一个空的索引堆, 可容纳capacity个元素
    IndexMaxHeap(int capacity) {
        data = new Item[capacity + 1];
        indexes = new int[capacity + 1];
        reverse = new int[capacity + 1];
        for (int i = 0; i <= capacity; i++) {
            reverse[i] = 0;
        }
        count = 0;
        this->capacity = capacity;
    }

    IndexMaxHeap(Item arr[], int n) {
        data = new Item[n+1];
        capacity = n + 1;
        for (int i = 0; i < n; i++)
            data[i+1] = arr[i];
        count = n;
        for (int i = count / 2; i >= 1; i--)
            shiftDown(i);
    }

    ~IndexMaxHeap() {
        delete[] data;
        delete[] indexes;
        delete[] reverse;
    }

    // 返回堆中的元素个数
    int size() {
        return count;
    }

    // 返回一个布尔值, 表示堆中是否为空
    bool isEmpty() {
        return count == 0;
    }

    // 向最大索引堆中插入一个新的元素, 新元素的索引为i, 元素为item
    // 传入的i对用户而言,是从0索引的
    void insert(int i, Item item) {
        assert(count + 1 <= capacity);
        assert(i + 1 >= 1 && i + 1 <= capacity);

        // 再插入一个新元素前,还需要保证索引i所在的位置是没有元素的。
        assert( !contain(i) );

        i += 1;
        data[i] = item;
        indexes[count+1] = i;
        reverse[i] = count + 1;

        count++;
        shiftUp( count );
    }

    // 从最大堆中取出堆顶元素, 即堆中所存储的最大数据
    Item extractMax() {
        assert(count > 0);

        Item ret = data[ indexes[1] ];

        swap( indexes[1], indexes[count] );
        reverse[ indexes[1] ] = 1;
        reverse[ indexes[count] ] = 0;
        count--;

        shiftDown( 1 );

        return ret;
    }

    int extractMaxIndex() {
        assert( count > 0 );
        int ret = indexes[1] - 1;
        swap( indexes[1], indexes[count] );
        reverse[ indexes[1] ] = 1;
        reverse[ indexes[count] ] = 0;
        count--;
        shiftDown(1);
        return ret;
    }

    bool contain( int i ) {
        assert( i + 1 >= 1 && i + 1 <= capacity);
        return reverse[i+1] != 0;
    }

    Item getItem( int i ) {
        assert(contain(i));
        return data[i + 1];
    }

    void change( int i, Item newItem ) {

        assert(contain(i));

        i += 1;
        data[i] = newItem;

        // 找到indexes[j] = i, j 表示 data[i]在堆中的位置
        // 之后shiftUp(j), 再shiftDown(j)
        // for ( int j = 1; j <= count; j++ ) {
        //     if (indexes[j] == i) {
        //         shiftUp(j);
        //         shiftDown(j);
        //         return;
        //     }
        // }

        // 有了 reverse 之后,
        // 我们可以非常简单的通过reverse直接定位索引i在indexes中的位置
        int j = reverse[i];
        shiftUp(j);
        shiftDown(j);
    }

    // 获取最大堆中的堆顶元素
    Item getMax(){
        assert( count > 0 );
        return data[1];
    }

    // 测试索引堆中的索引数组index和反向数组reverse
    // 注意:这个测试在向堆中插入元素以后, 不进行extract操作有效
    bool testIndexesAndReverseIndexes(){

        int *copyIndexes = new int[count+1];
        int *copyReverseIndexes = new int[count+1];

        for( int i = 0 ; i <= count ; i ++ ){
            copyIndexes[i] = indexes[i];
            copyReverseIndexes[i] = reverse[i];
        }

        copyIndexes[0] = copyReverseIndexes[0] = 0;
        std::sort(copyIndexes, copyIndexes + count + 1);
        std::sort(copyReverseIndexes, copyReverseIndexes + count + 1);

        // 在对索引堆中的索引和反向索引进行排序后,
        // 两个数组都应该正好是1...count这count个索引
        bool res = true;
        for( int i = 1 ; i <= count ; i ++ )
            if( copyIndexes[i-1] + 1 != copyIndexes[i] ||
                    copyReverseIndexes[i-1] + 1 != copyReverseIndexes[i] ){
                res = false;
                break;
            }

        delete[] copyIndexes;
        delete[] copyReverseIndexes;

        if( !res ){
            cout<<"Error!"<<endl;
            return false;
        }

        for( int i = 1 ; i <= count ; i ++ )
            if( reverse[ indexes[i] ] != i ){
                cout<<"Error 2"<<endl;
                return false;
            }

        return true;
    }
};

Java版本的代码如下:

// 最大索引堆
public class IndexMaxHeap<Item extends Comparable> {

    protected Item[] data;    // 最大索引堆中的数据
    protected int[] indexes;  // 最大索引堆中的索引, indexes[x] = i 表示索引i在x的位置
    protected int[] reverse;  // 最大索引堆中的反向索引, reverse[i] = x 表示索引i在x的位置
    protected int count;
    protected int capacity;

    // 构造函数, 构造一个空堆, 可容纳capacity个元素
    public IndexMaxHeap(int capacity) {
        data = (Item[]) new Comparable[capacity + 1];
        indexes = new int[capacity + 1];
        reverse = new int[capacity + 1];
        for (int i = 0; i <= capacity; i++) {
            reverse[i] = 0;
        }
        count = 0;
        this.capacity = capacity;
    }

    // 返回索引堆中的元素个数
    public int size() {
        return count;
    }

    // 返回一个布尔值, 表示索引堆中是否为空
    public boolean isEmpty() {
        return count == 0;
    }

    // 向最大索引堆中插入一个新的元素, 新元素的索引为i, 元素为item
    // 传入的i对用户而言,是从0索引的
    public void insert(int i, Item item) {
        assert (count + 1 <= capacity);
        assert (i + 1 >= 1 && i + 1 <= capacity);

        // 再插入一个新元素前,还需要保证索引i所在的位置是没有元素的。
        assert ( !contain(i) );

        i += 1;
        data[i] = item;
        indexes[count + 1] = i;
        reverse[i] = count + 1;

        count++;
        shiftUp( count );
    }

    // 从最大索引堆中取出堆顶元素, 即索引堆中所存储的最大数据
    public Item extractMax() {
        assert (count > 0);

        Item ret = data[ indexes[1] ];
        swap(indexes, 1, count);
        reverse[ indexes[1] ] = 1;
        reverse[ indexes[count] ] = 0;
        count--;

        shiftDown(1);

        return ret;
    }

    // 从最大索引堆中取出堆顶元素的索引
    int extractMaxIndex() {
        assert (count > 0);

        int ret = indexes[1] - 1;
        swap(indexes, 1, count);
        reverse[ indexes[1] ] = 1;
        reverse[ indexes[count] ] = 0;
        count--;
        shiftDown(1);
        return ret;
    }

    // 看索引i所在的位置是否存在元素
    private boolean contain(int i) {
        assert (i + 1 >= 1 && i + 1 <= capacity);
        return reverse[i+1] != 0;
    }

    private void shiftUp(int k) {
        while (k > 1 && data[ indexes[k / 2] ].compareTo( data[ indexes[k] ] ) < 0) {
            swap(indexes, k / 2, k);
            k /= 2;
        }
    }

    private void shiftDown(int k) {
        while (2*k <= count) {
            int j = 2 * k;
            if (j + 1 <= count && data[ indexes[j] ].compareTo( data[ indexes[j+1] ] ) < 0) {
                j += 1;
            }

            if (data[ indexes[k] ].compareTo(data[ indexes[j] ]) > 0) {
                break;
            }

            swap(indexes, k, j);
            k = j;
        }
    }

    // 交换索引堆中的索引i和j
    // 由于有了反向索引reverse数组，
    // indexes数组发生改变以后， 相应的就需要维护reverse数组
    private void swap(int[] arr, int i, int j) {
        int t = arr[i];
        arr[i] = arr[j];
        arr[j] = t;

        reverse[ indexes[i] ] = i;
        reverse[ indexes[j] ] = j;
    }

    // 获取最大索引堆中索引为i的元素
    public Item getItem(int i) {
        assert (contain(i));
        return data[i + 1];
    }

    // 将最大索引堆中索引为i的元素修改为newItem
    public void change(int i, Item newItem) {
        assert(contain(i));

        i += 1;
        data[i] = newItem;

        int j = reverse[i];
        shiftUp(j);
        shiftDown(j);
    }

    // 获取最大索引堆中的堆顶元素
    Item getMax() {
        assert (count > 0);
        return data[1];
    }

    // 测试索引堆中的索引数组index和反向数组reverse
    // 注意:这个测试在向堆中插入元素以后, 不进行extract操作有效
    public boolean testIndexes(){

        int[] copyIndexes = new int[count+1];
        int[] copyReverseIndexes = new int[count+1];

        for( int i = 0 ; i <= count ; i ++ ) {
            copyIndexes[i] = indexes[i];
            copyReverseIndexes[i] = reverse[i];
        }

        copyIndexes[0] = 0;
        copyReverseIndexes[0] = 0;
        Arrays.sort(copyIndexes);
        Arrays.sort(copyReverseIndexes);

        // 在对索引堆中的索引和反向索引进行排序后,
        // 两个数组都应该正好是1...count这count个索引
        boolean res = true;
        for( int i = 1 ; i <= count ; i ++ )
            if( copyIndexes[i-1] + 1 != copyIndexes[i] ||
                    copyReverseIndexes[i-1] + 1 != copyReverseIndexes[i] ){
                res = false;
                break;
            }

        if( !res ){
            System.out.println("Error!");
            return false;
        }

        return true;
    }

    public static void main(String[] args) {
        int N = 1000000;
        IndexMaxHeap<Integer> indexMaxHeap = new IndexMaxHeap<Integer>(N);
        for (int i = 0; i < N; i++) {
            indexMaxHeap.insert(i, (int) (Math.random() * N));
        }

        assert indexMaxHeap.testIndexes();
    }

}

李晓的博客

数据结构——最大索引堆(C++和Java实现)

Search

Table of Contents