create index concurrently

Simply Speaking

Simply speaking, a technical to reduce the table and index bloat caused by updating, through a link from old tuple(dead tuple) to new tuple if the changed attribute isn’t in any indexes. With such links:

No need to generate a new index entry to represent the new tuple.
Old versions of updated tuples can be actually removed by normal operations instead of vacuum operation only

postgreSQL HOT From ZhiHu

OFFICIAL DOC

For more details , see postgresql/src/backend/access/heap/README.HOT

Main Commits:

282d2a03dd30804b01f8042f640d638c2ee76604 HOT updates
6f10eb21118f1ce72d4dd144c494749e25658d51 Refactor heap_page_prune

HIGH LEVEL DESIGN

TID

HEAP PAGE

Low Level Design

Key functions

heap_page_prune

`heap_page_prune_opt`

Optionally prune and repair fragmentation in the specified page.

heap_page_prune_opt()
{
  minfree = RelationGetTargetPageFreeSpace();
  minfree = Max(minfree, BLCKSZ / 10);
  
  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
  {
    if (!ConditionalLockBufferForCleanup(buffer))
      continue;
    if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
    {
      heap_page_prune();
    }
  }
}

    
  

`heap_page_prune`

Prune and repair fragmentation in the specified page.

heap_page_prune()
{
  for (offnum = FirstOffsetNumber;
       offnum <= maxoff;
       offnum = OffsetNumberNext(offnum))
  {
    heap_prune_chain(&prstate)
  }
  
  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
  {
    heap_page_prune_execute();
  }
     
}

    
  

`heap_prune_chain`

Prune specified line pointer or a HOT chain originating at line pointer.

heap_prune_chain()
{
  /* while not end of the chain */
  for (;;)
  {
    lp = PageGetItemId(dp, offnum);
    /* Unused item obviously isn't part of the chain */
    if (!ItemIdIsUsed(lp))
      break;
      
    if (ItemIdIsRedirected(lp))
    {
      if (nchain > 0)
        break;			/* not at start of chain */
      chainitems[nchain++] = offnum;
      offnum = ItemIdGetRedirect(rootlp);
      continue;
    }
    
    if (ItemIdIsDead(lp))
      break;
    
    htup = (HeapTupleHeader) PageGetItem(dp, lp);
    
    /*
     * Check the tuple XMIN against prior XMAX, if any
     */
    if (TransactionIdIsValid(priorXmax) &&
      !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
      break;

     /* record each item of this chain */
     chainitems[nchain++] = offnum;
    
    switch ((HTSV_Result) prstate->htsv[offnum])
    {
      case: HEAPTUPLE_DEAD:
        tupdead = true;
        break;
      case HEAPTUPLE_RECENTLY_DEAD:
        heap_prune_record_prunable(prstate,
                                    HeapTupleHeaderGetUpdateXid(htup));
      ...
    }
    
  }
  
  if (OffsetNumberIsValid(latestdead))
  {
    for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
    {
      heap_prune_record_unused(prstate, chainitems[i]);
      ndeleted++;
    }
    
    if (i >= nchain) /* The whole chain is dead */
      heap_prune_record_dead(prstate, rootoffnum);
    else						/* Or just redirect */
      heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
  }
}

    
  

heap_prune_record_redirect

`heap_page_prune_execute`

Perform the actual page changes needed by heap_page_prune

void
heap_page_prune_execute()
{
  /* redirect */
  for (int i = 0; i < nredirected; i++)
  {
    ItemIdSetRedirect(fromlp, tooff);
  }
  
  /* dead */
  for (int i = 0; i < ndead; i++)
  {
    ItemIdSetDead(lp);
  }
  
  /* unused */
  for (int i = 0; i < nunused; i++)
  {
    ItemIdSetUnused(lp);
  }
  PageRepairFragmentation();
  page_verify_redirects();
}

    
  

search HOT chain

heap_hot_search_buffer search HOT chain for tuple satisfying snapshot

bool
heap_hot_search_buffer()
{
  blkno = ItemPointerGetBlockNumber(tid);
  offnum = ItemPointerGetOffsetNumber(tid);
  for (;;)
  {
    lp = PageGetItemId(page, offnum);
    heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
  }
}

    
  

create index concurrently

Simply Speaking

related posts

HIGH LEVEL DESIGN

TID

Low Level Design

Key functions

heap_page_prune

heap_page_prune_opt

heap_page_prune

heap_prune_chain

heap_page_prune_execute

search HOT chain

`heap_page_prune_opt`

`heap_page_prune`

`heap_prune_chain`

`heap_page_prune_execute`