Atlanta Custom Software Development 

 
   Search        Code/Page
 

User Login
Email

Password

 

Forgot the Password?
Services
» Web Development
» Maintenance
» Data Integration/BI
» Information Management
Programming
  Database
Automation
OS/Networking
Graphics
Links
Tools
» Regular Expr Tester
» Free Tools

ShuttleMergeSort - An improved MergeSort

Total Hit ( 2400)

Rate this article:     Poor     Excellent 

 Submit Your Question/Comment about this article

Rating


 


Click here to copy the following block
' 2/4/03. The previous version of ShuttleMergeSort failed on very short lists.
' The code below corrects the problem and eliminates a couple of unnecessary
' variables. Sorting times for one million random longs,
' double or strings are 67, 90 and 95 seconds (Excel 2001 / 800 mhz PowerBook /
' MacOS 10.2.3).

' 1/7/03. Here is a 20-25% faster version of MergeSort. The old version
' merged into an auxiliary array, and copied the result back to the primary
' array at the end of each pass. This version plans ahead for an even number
' of passes, and alternates direction each time, first merging to the auxiliary
' array and then back to the primary array. It also replaces recursive calls
' with an explicit stack, and calls to a separate InsertionSort with in-line
' code. Because of the back and forth merging,
' I call this version "ShuttleMergeSort".

' Another frequently proposed optimization for MergeSort is to set runs up in
' alternating directions (low to high, then high to low). This allows
' replacing separate boundary tests for LP and RP with a single test for LP
' crossing RP. I tried this, and it wasn't faster in practice. Probably the
' gain from fewer loop tests was offset by time spent in extra comparisons; in
' the simpler version, when one run is used up, the rest of the other run is
' copied to the output array with no further comparisons. Also,
' the run-alternating version was significantly slower on presorted inputs,
' which often occur in practice.

' QuickSort is still faster for strings (64 sec vs. 95),
' but MergeSort is faster for doubles (90 sec vs. 162) and longs (67 sec vs.
' 116). Given that MergeSort is stable and guaranteed NlogN,
' while QuickSort is unstable and always has an N^2 worst case,
' MergeSort is my choice for a single all-purpose sort.

' The first example below is a pointerized version for strings. It can be
' adapted to doubles by changing the declaration of A() and T. The second
' example is a direct version for longs that can be adapted to integers.

Sub pShuttleMergeSortS(LO As Long, HI As Long, A() As String, P() As Long, _
  Q() As Long)
  'LO and HI point to first and last keys; A() is the buffer of string keys.
  'P() and Q() are buffers of pointers to the keys in A()
  Dim Length As Double  'length of initial runs to be made by InsertionSort
  Dim nRuns As Long    'the number of runs at each stage
  Dim Stack() As Long    'bookkeeping stack for merge passes
  Dim I As Long
  Dim L As Long      'left limit
  Dim R As Long      'right limit
  Dim LP As Long      'left pointer
  Dim RP As Long      'right pointer
  Dim OP As Long      'other pointer
  Dim TMP As String
  Dim Forward As Boolean  'toggle for direction of alternate merge passes
  
  'Calculate how many merge passes will be needed.
  'Each back & forth pair of merges will convert 4N sublists into N.
  Length = 1 + HI - LO
  nRuns = 1
  While Length > 20
    Length = Length / 4
    nRuns = nRuns * 4
  Wend

  'Set up stack to keep track of sublists being merged.
  ReDim Stack(1 To nRuns)
  For I = 1 To nRuns - 1
    Stack(I) = LO + (Length * CDbl(I))
  Next I
  Stack(nRuns) = HI
  
  'Build short runs using low overhead InsertionSort.
  L = LO
  For I = 1 To nRuns
    R = Stack(I)
    For RP = L + 1 To R
      OP = P(RP)
      TMP = A(OP)
      For LP = RP To L + 1 Step -1
        If TMP < A(P(LP - 1)) Then P(LP) = P(LP - 1) Else Exit For
      Next LP
      P(LP) = OP
    Next RP
    L = R + 1
  Next I
  
  'Make back & forth passes of MergeSort until all runs are merged.
  Forward = True
  While nRuns > 1
    R = LO - 1
    If Forward Then
      'Half the passes are forward, merging from P() into Q().
      For I = 2 To nRuns Step 2
        LP = R + 1
        OP = LP
        L = Stack(I - 1)
        RP = L + 1
        R = Stack(I)
        Do
          If A(P(LP)) <= A(P(RP)) Then
            Q(OP) = P(LP)
            OP = OP + 1
            LP = LP + 1
            If LP > L Then
              Do
                Q(OP) = P(RP)
                OP = OP + 1
                RP = RP + 1
              Loop Until RP > R
              Exit Do
            End If
          Else
            Q(OP) = P(RP)
            OP = OP + 1
            RP = RP + 1
            If RP > R Then
              Do
                Q(OP) = P(LP)
                OP = OP + 1
                LP = LP + 1
              Loop Until LP > L
              Exit Do
            End If
          End If
        Loop
        Stack(I \ 2) = R
      Next I
    Else
    'Half the passes are backward, merging from Q() into P().
      For I = 2 To nRuns Step 2
        LP = R + 1
        OP = LP
        L = Stack(I - 1)
        RP = L + 1
        R = Stack(I)
        Do
          If A(Q(LP)) <= A(Q(RP)) Then
            P(OP) = Q(LP)
            OP = OP + 1
            LP = LP + 1
            If LP > L Then
              Do
                P(OP) = Q(RP)
                OP = OP + 1
                RP = RP + 1
              Loop Until RP > R
              Exit Do
            End If
          Else
            P(OP) = Q(RP)
            OP = OP + 1
            RP = RP + 1
            If RP > R Then
              Do
                P(OP) = Q(LP)
                OP = OP + 1
                LP = LP + 1
              Loop Until LP > L
              Exit Do
            End If
          End If
        Loop
        Stack(I \ 2) = R
      Next I
    End If
    'After each merge, we have half as many runs and we switch direction.
    nRuns = nRuns \ 2
    Forward = Not Forward
  Wend
End Sub

Sub ShuttleMergeSortL(LO As Long, HI As Long, A() As Long, B() As Long)
  'LO and HI point to the first and last keys.
  'A() and B() are the primary and auxiliary buffers of keys
  Dim Length As Double  'length of initial runs to be made by InsertionSort
  Dim nRuns As Long    'the number of runs at each stage
  Dim Stack() As Long    'bookkeeping stack for merge passes
  Dim I As Long
  Dim L As Long      'left limit
  Dim R As Long      'right limit
  Dim LP As Long      'left pointer
  Dim RP As Long      'right pointer
  Dim OP As Long      'other pointer
  Dim TMP As String
  Dim Forward As Boolean  'toggle for direction of alternate merge passes
  
  'Calculate how many merge passes will be needed.
  'Each back & forth pair of merges will convert 4N sublists into N.
  Length = 1 + HI - LO
  nRuns = 1
  While Length > 20
    Length = Length / 4
    nRuns = nRuns * 4
  Wend

  'Set up stack to keep track of sublists being merged.
  ReDim Stack(1 To nRuns)
  For I = 1 To nRuns - 1
    Stack(I) = LO + (Length * CDbl(I))
  Next I
  Stack(nRuns) = HI
  
  'Build short runs using low overhead InsertionSort.
  L = LO
  For I = 1 To nRuns
    R = Stack(I)
    For RP = L + 1 To R
      TMP = A(RP)
      For LP = RP To L + 1 Step -1
        If TMP < A(LP - 1) Then A(LP) = A(LP - 1) Else Exit For
      Next LP
      A(LP) = TMP
    Next RP
    L = R + 1
  Next I
  
  'Make back & forth passes of MergeSort until all runs are merged.
  Forward = True
  While nRuns > 1
    R = LO - 1
    If Forward Then
      'Half the passes are forward, merging from P() into Q().
      For I = 2 To nRuns Step 2
        LP = R + 1
        OP = LP
        L = Stack(I - 1)
        RP = L + 1
        R = Stack(I)
        Do
          If A(LP) <= A(RP) Then
            B(OP) = A(LP)
            OP = OP + 1
            LP = LP + 1
            If LP > L Then
              Do
                B(OP) = A(RP)
                OP = OP + 1
                RP = RP + 1
              Loop Until RP > R
              Exit Do
            End If
          Else
            B(OP) = A(RP)
            OP = OP + 1
            RP = RP + 1
            If RP > R Then
              Do
                B(OP) = A(LP)
                OP = OP + 1
                LP = LP + 1
              Loop Until LP > L
              Exit Do
            End If
          End If
        Loop
        Stack(I \ 2) = R
      Next I
    Else
     'Half the passes are backward, merging from Q() into P().
      For I = 2 To nRuns Step 2
        LP = R + 1
        OP = LP
        L = Stack(I - 1)
        RP = L + 1
        R = Stack(I)
        Do
          If B(LP) <= B(RP) Then
            A(OP) = B(LP)
            OP = OP + 1
            LP = LP + 1
            If LP > L Then
              Do
                A(OP) = B(RP)
                OP = OP + 1
                RP = RP + 1
              Loop Until RP > R
              Exit Do
            End If
          Else
            A(OP) = B(RP)
            OP = OP + 1
            RP = RP + 1
            If RP > R Then
              Do
                A(OP) = B(LP)
                OP = OP + 1
                LP = LP + 1
              Loop Until LP > L
              Exit Do
            End If
          End If
        Loop
        Stack(I \ 2) = R
      Next I
    End If
    'After each merge, we have half as many runs and we switch direction.
    nRuns = nRuns \ 2
    Forward = Not Forward
  Wend
End Sub


Submitted By : Nayan Patel  (Member Since : 5/26/2004 12:23:06 PM)

Job Description : He is the moderator of this site and currently working as an independent consultant. He works with VB.net/ASP.net, SQL Server and other MS technologies. He is MCSD.net, MCDBA and MCSE. In his free time he likes to watch funny movies and doing oil painting.
View all (893) submissions by this author  (Birth Date : 7/14/1981 )


Home   |  Comment   |  Contact Us   |  Privacy Policy   |  Terms & Conditions   |  BlogsZappySys

© 2008 BinaryWorld LLC. All rights reserved.