Heapsort

Heapsort

The heap structure on n nodes is the binary tree with nodes 0,1,...,n-1 such that the parent of each node j other than 0 is the floor of (j-1)/2 (and such that 0 is the root of the tree).

Warning: A number of books use a different convention. There, the nodes of the heap structure are 1,2,...,n, the parent of each node j other than 1 is the floor of j/2, and 1 is the root.

An array of records a[i] indexed by nodes i of a heap structure is a max-heap if its keys satisfy the max-heap condition,

cmp(&a[i],&a[j])>=0 whenever i is an ancestor of j.

Now consider the following two problems:

Given an array a[0],a[1],...,a[n-1] of records and an index inc such that
the array arises from a max-heap by increasing the value of a[inc].key,
permute the n records so that the resulting array is again a max-heap;
Given an array a[0],a[1],...,a[n-1] of records and an index dec such that
the array arises from a max-heap by decreasing the value of a[dec].key,
permute the n records so that the resulting array is again a max-heap.

Problem 1 can be solved by a call of bubbleup(a, n, inc, a[inc]) and problem 2 can be solved by a call of siftdown(a, n, dec, a[dec]) with bubbleup and siftdown defined as follows:

void bubbleup(record a[], int n, int vacant, record missing)
{
  int parent=(vacant-1)/2;

  while(vacant>0)
    {
      /* the actual value of a[vacant] is irrelevant;
         if   a[vacant] were overwritten with missing, 
	 then the max-heap condition would be satisfied 
	      everywhere except possibly at j=vacant */
          
      if(cmp(&a[parent],&missing)<0)
	{
	  a[vacant]=a[parent], vacant=parent, parent=(vacant-1)/2;
	}
      else
	break;
    }
  a[vacant]=missing;
}


void siftdown(record a[], int n, int vacant, record missing)
{
  int child=2*(vacant+1);

  while(child<n)
    {
      /* the actual value of a[vacant] is irrelevant;
         if   a[vacant] were overwritten with missing, 
	 then the max-heap condition would be satisfied 
	      everywhere except possibly at i=vacant */
          
      if(cmp(&a[child],&a[child-1])<0) 
	child--;
      if(cmp(&a[child],&missing)>0)
        {
          a[vacant]=a[child], vacant=child, child=2*(vacant+1);
        }
      else
	break;
    }

  if((child==n) && cmp(&a[n-1],&missing)>0)
    {
      a[vacant]=a[n-1], vacant=n-1;
    }

  a[vacant]=missing;
}

The version of Heapsort designed by Williams uses bubbleup and siftdown as follows:

void makeheap(record a[], int n)
{
  int k;
  
  for(k=1; k<n; k++) 
    bubbleup(a, n, k, a[k]);
}

void hsort(record a[], int n)
{
  int k;
  record temp;

  makeheap(a,n);

  for(k=n-1; k>0; k--) 
    {
      /* now a[0],a[1],    ...,a[k] is a heap and 
             a[k+1],a[k+2],...,a[n] is sorted      */

      temp=a[k], a[k]=a[0];
      siftdown(a, k, 0, temp);
    }
}

A very nice animation of this version has been provided by Duane J. Jarc at the George Washington University School of Engineering and Applied Science.

The version of Heapsort designed by Floyd improves on this theme in two ways. The first improvement is the iterated use of siftdown (rather than bubbleup) in makeheap, which yields makeheap with the worst-case running time in O(n):

void makeheap(record a[], int n)
{
  int k;

  for(k=n/2-1; k>=0; k--) 
    siftdown(a, n, k, a[k]);
}

A very nice animation of the resulting version of Heapsort has been provided by Systems Research Center at Compaq.

To get an inkling of how the speed of this version compares with that of Quicksort, click on the square just below the heading "Heap Sort (by Jason Harrison)" and on the square just below the heading "Quick Sort (by James Gosling)" in the Sorting Algorithms Demo provided by Jason Harrison. (These two applets can be run simultaneously.)

The second of Floyd's improvements is the following modification of siftdown.

void siftdown(record a[], int n, int vacant, record missing)
{
  int memo=vacant;
  int child, parent;

  child=2*(vacant+1);
  while(child<n)
    {
      if(cmp(&a[child],&a[child-1])<0) 
	child--;
      a[vacant]=a[child], vacant=child, child=2*(vacant+1);
    }
  if(child==n)
    {
      a[vacant]=a[n-1], vacant=n-1;
    }
  
  parent=(vacant-1)/2;
  while(vacant>memo)
    {
      if(cmp(&a[parent],&missing)<0)
        {
          a[vacant]=a[parent], vacant=parent, parent=(vacant-1)/2;
        }
      else
        break;
    }
  a[vacant]=missing;
}

Here is the "accelerated Heapsort" that stems from Floyd's second improvement:

void hsort(record a[], int n)
{
  int k, drop, last;
  record temp;

  makeheap(a,n);

  drop = floor_of_lg(n-1, &last);
  for(k=n-1; k>0; k--) 
    {
      temp=a[k], a[k]=a[0];
      siftdown(a, k, 0, temp, drop);
      if (k==last)
	{
	  drop--, last/=2;
	}
    }
}

void makeheap(record a[], int n)
{
  int k, drop, first;
  
  drop=1, first=n/2-1;
  for(k=first; k>=0; k--)
    { 
      if(k==(first-1)/2)
	{
	  drop++, first=k;
	}
      siftdown(a, n, k, a[k], drop);
    }
}

void siftdown(record a[], int n, int vacant, record missing, int drop)
{
  int memo=vacant;
  int child, parent;
  int count, next_peek;

  count=0, next_peek=(drop+1)/2;

  child=2*(vacant+1);
  while(child<n)
    {
      if(cmp(&a[child],&a[child-1])<0)
        child--;
      a[vacant]=a[child], vacant=child, child=2*(vacant+1);

      count++;
      if (count==next_peek)
	{
	  if(cmp(&a[(vacant-1)/2],&missing)<=0)
	    break;
	  else
	    next_peek=(count+drop+1)/2;	      
	}
    }

  if(child==n)
    {
      a[vacant]=a[n-1], vacant=n-1;
    }
  
  parent=(vacant-1)/2;
  while(vacant>memo)    
    {
      if(cmp(&a[parent],&missing)<0)        
	{
          a[vacant]=a[parent], vacant=parent, parent=(vacant-1)/2;
	}
      else        
	break;
    }
  a[vacant]=missing;
}


int floor_of_lg(int n, int* power)
{
  /***********************************************************
  *  computes the largest power of 2 that is at most n and   *
  *  returns  the exponent in this power:                    *
  *            n =   1   2   3   4   5   6   7   8   9  ...  *
  *        power =   1   2   2   4   4   4   4   8   8  ...  *
  *   exponent k =   0   1   1   2   2   2   2   3   3  ...  *
  ***********************************************************/

  int k=0;
  for(*power=1; 2*(*power)<=n; (*power)*=2)
    k+=1;
  return k;
}

Back to the table of contents of Vašek Chvátal's course notes