What is the content in the simplehash.h file of PostgreSQL 07/12 Update SLTechnology News&Howtos

What is the content in the simplehash.h file of PostgreSQL

2025-07-12 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Database >

Shulou(Shulou.com)05/31 Report--

< num; i++, limit members increased, nor do we need to do * deal with deleted elements, nor do we need to compare keys. So a * special-cased implementation is lot faster. As resizing can be time * consuming and frequent, that's worthwhile to optimize. * * To be able to simply move entries over, we have to start not at the * first bucket (i.e olddata[0]), but find the first bucket that's either * empty, or is occupied by an entry at its optimal position. Such a * bucket has to exist in any table with a load factor under 1, as not all * buckets are occupied, i.e. there always has to be an empty bucket. By * starting at such a bucket we can move the entries to the larger table, * without having to deal with conflicts. */ /* search for the first element in the hash that's not wrapped around */ for (i = 0; i < oldsize; i++) { SH_ELEMENT_TYPE *oldentry = &olddata[i]; uint32 hash; uint32 optimal; if (oldentry->

Status! = SH_STATUS_IN_USE) {startelem = I; break;} hash = SH_ENTRY_HASH (tb, oldentry); optimal = SH_INITIAL_BUCKET (tb, hash); if (optimal = = I) {startelem = I; break }} / * and copy all elements in the old table * / copyelem = startelem; for (I = 0; I

< oldsize; i++) { SH_ELEMENT_TYPE *oldentry = &olddata[copyelem]; if (oldentry->

< tb->

Size; iTunes +) {SH_ELEMENT_TYPE * entry = & tb- > data [I]; if (entry- > status! = SH_STATUS_IN_USE) {startelem = I; break;}} Assert (startelem)

< SH_MAX_SIZE); /* * Iterate backwards, that allows the current element to be deleted, even * if there are backward shifts */ iter->

Cur = startelem; iter- > end = iter- > cur; iter- > done = false;} / * * Initialize iterator to a specific bucket. That's really only useful for * cases where callers are partially iterating over the hashspace, and that * iteration deletes and inserts elements based on visited entries. Doing that * repeatedly could lead to an unbalanced keyspace when always starting at the * same position. * / SH_SCOPE voidSH_START_ITERATE_AT (SH_TYPE * tb, SH_ITERATOR * iter, uint32 at) {/ * Iterate backwards, that allows the current element to be deleted, even * if there are backward shifts. * / iter- > cur = at & tb- > sizemask; / * ensure at is within a valid range * / iter- > end = iter- > cur; iter- > done = false;} / * * Iterate over all entries in the hash-table. Return the next occupied entry, * or NULL if done. * * During iteration the current entry in the hash table may be deleted, * without leading to elements being skipped or returned twice. Additionally * the rest of the table may be modified (i.e. There can be insertions or * deletions), but if so, there's neither a guarantee that all nodes are * visited at least once, nor a guarantee that a node is visited at most once. * / SH_SCOPE SH_ELEMENT_TYPE * SH_ITERATE (SH_TYPE * tb, SH_ITERATOR * iter) {while (! iter- > done) {SH_ELEMENT_TYPE * elem; elem = & tb- > Datiter [> cur]; / * next element in backward direction * / iter- > cur = (iter- > cur-1) & tb- > sizemask If ((iter- > cur & tb- > sizemask) = (iter- > end & tb- > sizemask) iter- > done = true; if (elem- > status = = SH_STATUS_IN_USE) {return elem;}} return NULL;} / * * Report some statistics about the state of the hashtable. For * debugging/profiling purposes only. * / SH_SCOPE voidSH_STAT (SH_TYPE * tb) {uint32 max_chain_length = 0; uint32 total_chain_length = 0; double avg_chain_length; double fillfactor; uint32 I; uint32 * collisions = palloc0 (tb- > size * sizeof (uint32)); uint32 total_collisions = 0; uint32 max_collisions = 0; double avg_collisions For (I = 0; I)

< tb->

Size; iTunes +) {uint32 hash; uint32 optimal; uint32 dist; SH_ELEMENT_TYPE * elem; elem = & tb- > data [I]; if (elem- > status! = SH_STATUS_IN_USE) continue; hash = SH_ENTRY_HASH (tb, elem); optimal = SH_INITIAL_BUCKET (tb, hash) Dist = SH_DISTANCE_FROM_OPTIMAL (tb, optimal, I); if (dist > max_chain_length) max_chain_length = dist; total_chain_length + = dist; formulations [formulas] +;} for (I = 0; I

< tb->

Size; iTunes +) {uint32 curcoll = collisions [I]; if (curcoll = = 0) continue; / * single contained element is not a collision * / curcoll--; total_collisions + = curcoll; if (curcoll > max_collisions) max_collisions = curcoll } if (tb- > members > 0) {fillfactor = tb- > members / ((double) tb- > size); avg_chain_length = ((double) total_chain_length) / tb- > members; avg_collisions = ((double) total_collisions) / tb- > members;} else {fillfactor = 0; avg_chain_length = 0; avg_collisions = 0 } elog (LOG, "size:" UINT64_FORMAT ", members:% u, filled:% f, total chain:% u, max chain:% u, avg chain:% f, total_collisions:% u, max_collisions:% I, avg_collisions:% f", tb- > size, tb- > members, fillfactor, total_chain_length, max_chain_length, avg_chain_length, total_collisions, max_collisions, avg_collisions) } # endif / * SH_DEFINE * / / * undefine external parameters So next hash table can be defined * / # undef SH_PREFIX#undef SH_KEY_TYPE#undef SH_KEY#undef SH_ELEMENT_TYPE#undef SH_HASH_KEY#undef SH_SCOPE#undef SH_DECLARE#undef SH_DEFINE#undef SH_GET_HASH#undef SH_STORE_HASH#undef SH_USE_NONDEFAULT_ALLOCATOR/* undefine locally declared macros * / # undef SH_MAKE_PREFIX#undef SH_MAKE_NAME#undef SH_MAKE_NAME_#undef SH_FILLFACTOR#undef SH_MAX_FILLFACTOR # undef SH_GROW_MAX_DIB#undef SH_GROW_MAX_MOVE#undef SH_GROW_MIN_FILLFACTOR#undef SH_MAX_SIZE/* types * / # undef SH_TYPE#undef SH_STATUS#undef SH_STATUS_EMPTY#undef SH_STATUS_IN_USE#undef SH_ITERATOR/* external function names * / # undef SH_CREATE#undef SH_DESTROY#undef SH_RESET#undef SH_INSERT#undef SH_DELETE#undef SH_LOOKUP#undef SH_GROW#undef SH_START_ITERATE# Undef SH_START_ITERATE_AT#undef SH_ITERATE#undef SH_ALLOCATE#undef SH_FREE#undef SH_STAT/* internal function names * / # undef SH_COMPUTE_PARAMETERS#undef SH_COMPARE_KEYS#undef SH_INITIAL_BUCKET#undef SH_NEXT#undef SH_PREV#undef SH_DISTANCE_FROM_OPTIMAL#undef SH_ENTRY_ Hash III. Tracking and analysis

Taking tuplehash_insert as an example, this paper analyzes the implementation of simplehash inserting hash table.

Test script

-- disable parallel set max_parallel_workers_per_gather=0;select bh,avg (C1), min (C1), max (c2) from t_agg_simple group by bh

Tracking and analysis

(gdb) b tuplehash_insertBreakpoint 1 at 0x6d2a27: file.. / src/include/lib/simplehash.h, line 490. (gdb)

Input parameters

(gdb) p * tb$1 = {size = 256, members = 0, sizemask = 255, grow_threshold = 230, data = 0x1cc2a10, ctx = 0x1c9b320, private_data = 0x1cb88a0} (gdb)

Judge whether growth is needed or not

(gdb) n497 insertdist = 0; (gdb) 507 if (unlikely (tb- > members > = tb- > grow_threshold)) (gdb) p tb- > members$2 = 0 (gdb) p tb- > grow_threshold$3 = 230

Perform the insert to start the bucket search at the optimized location

Get an array of items (TupleHashEntryData * pointer) to initialize the start element and the current element

(gdb) n523 data = tb- > data; (gdb) 524 startelem = SH_INITIAL_BUCKET (tb, hash); (gdb) p * data$4 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0} (gdb) n525 curelem = startelem; (gdb) p startelem$5 = 114 (gdb) p hash$6 = 443809650 (gdb)

Enter the loop to find an idle bucket to perform the insert

(gdb) n531 SH_ELEMENT_TYPE * entry = & data [curelem] (gdb) n534 if (entry- > status = = SH_STATUS_EMPTY) (gdb) p * entry$7 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0} (gdb) p * data$8 = {firstTuple = 0x0, additional = 0x0, status = 0} (gdb) p data [255] $9 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0} (gdb) n536 tb- > members++; (gdb) 537 entry- > SH_KEY = key (gdb) p * tb$10 = {size = 256, members = 1, sizemask = 255,230, data = 0x1cc2a10, ctx = 0x1c9b320, private_data = 0x1cb88a0} (gdb) n539 SH_GET_HASH (tb, entry) = hash; (gdb) 541 entry- > status = SH_STATUS_IN_USE; (gdb) p * entry$11 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 443809650} (gdb) n542 * found = false (gdb) 543 return entry; (gdb) p * entry$12 = {firstTuple = 0x0, additional = 0x0, status = 1, hash = 443809650} (gdb)

Complete the function call and return entry

(gdb) n652} (gdb) LookupTupleHashEntry (hashtable=0x1cb88a0, slot=0x1c9d248, isnew=0x7ffd1348e797) at execGrouping.c:303303 if (found) (gdb)

Back to LookupTupleHashEntry

(gdb) LookupTupleHashEntry (hashtable=0x1cb88a0, slot=0x1c9d248, isnew=0x7ffd1348e797) at execGrouping.c:303303 if (found) (gdb) n311 * isnew= true; (gdb) 313 entry- > additional = NULL; (gdb) 314 MemoryContextSwitchTo (hashtable- > tablecxt); (gdb) 316 entry- > firstTuple = ExecCopySlotMinimalTuple (slot); (gdb) 324 MemoryContextSwitchTo (oldContext)

View tuple data

(gdb) p * entry$13 = {firstTuple = 0x1cb2498, additional = 0x0, status = 1, hash = 443809650} (gdb) x entry- 7x entry- > firstTuple- > t_bits0x1cb24a7: 0x00 0x0b 0x47 0x5a 0x30 0x31 0x7e (gdb) x 0x00 0x0b 0x47 0x5a 0x30 0x31 0x7e 7c entry- > firstTuple- > t_bits0x1cb24a7: 0'\ 00011'\ v'71'G' 90'Z' 48'0' 49 '1126' ~'

The next call, this time there is a collision

(gdb) cContinuing.Breakpoint 1, tuplehash_insert (tb=0x1cb8730, key=0x0, found=0x7ffd1348e757) at.. / src/include/lib/simplehash.h:490490 uint32 hash = SH_HASH_KEY (tb, key); (gdb) n497 insertdist = 0; (gdb) p hash$15 = 4237773170 (gdb) n507 if (unlikely (tb- > members > = tb- > grow_threshold)) (gdb) 523 data = tb- > data; (gdb) 524 startelem = SH_INITIAL_BUCKET (tb, hash) (gdb) p data [0] $16 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0} (gdb) n525 curelem = startelem; (gdb) 531 SH_ELEMENT_TYPE * entry = & data [curelem] (gdb) p startelem$17 = 14 (gdb) p curelem$18 = 114 (gdb) p data [curelem] $19 = {firstTuple = 0x1cb2498, additional = 0x1cb24d0, status = 1, hash = 443809650} (gdb) n534 if (entry- > status = = SH_STATUS_EMPTY) (gdb) 554 if (SH_COMPARE_KEYS (tb, hash, key, entry)) (gdb) 561 curhash = SH_ENTRY_HASH (tb, entry); (gdb) 562 curoptimal = SH_INITIAL_BUCKET (tb, curhash) (gdb) p curhash$20 = 443809650 (gdb) n563 curdist = SH_DISTANCE_FROM_OPTIMAL (tb, curoptimal, curelem); (gdb) 565 if (insertdist > curdist) (gdb) p curoptimal$21 = 114 (gdb) p curdist$22 = 0 (gdb) n634 curelem = SH_NEXT (tb, curelem, startelem); (gdb) p insertdist$23 = 0 (gdb) n635 insertdist++ (gdb) p curelem$24 = 115 (gdb) n645 if (unlikely (insertdist > SH_GROW_MAX_DIB) & & (gdb) 651} (gdb) 531 SH_ELEMENT_TYPE * entry = & data [curelem]; (gdb) 534 if (entry- > status = SH_STATUS_EMPTY) (gdb) 536 tb- > members++; (gdb) 537 entry- > SH_KEY = key (gdb) 539 SH_GET_HASH (tb, entry) = hash; (gdb) 541 entry- > status = SH_STATUS_IN_USE; (gdb) 542 * found = false; (gdb) 543 return entry; (gdb) p * entry$25 = {firstTuple = 0x0, additional = 0x0, status = 1, hash = 4237773170} (gdb)

Go back to LookupTupleHashEntry and check out tuple

(gdb) LookupTupleHashEntry (hashtable=0x1cb88a0, slot=0x1c9d248, isnew=0x7ffd1348e797) at execGrouping.c:303303 if (found) (gdb) 311 * isnew= true; (gdb) 313 entry- > additional = NULL; (gdb) 314 MemoryContextSwitchTo (hashtable- > tablecxt); (gdb) 316 entry- > firstTuple = ExecCopySlotMinimalTuple (slot); (gdb) 324 MemoryContextSwitchTo (oldContext) (gdb) p * entry$26 = {firstTuple = 0x1cb2580, additional = 0x0, status = 1, hash = 4237773170} (gdb) p * entry- > firstTuple$27 = {t_len = 21, mt_padding = "\ 000\ 000\ 000", t_infomask2 = 1, t_infomask = 2, t_hoff = 24'\ 030' T_bits = 0x1cb258f ""} (gdb) x t_bits0x1cb258f 7x entry- > firstTuple- > t_bits0x1cb258f: 0x00 0x0b 0x47 0x5a 0x30 0x32 0x7e (gdb) x Unix 7c entry- > firstTuple- > t_bits0x1cb258f: 0'\ 00011'\ v'71'G' 90'Z' 48'0' 50 '2126' (gdb) so far The study on "what is in the simplehash.h file of PostgreSQL" is over. I hope to be able to solve your doubts. The collocation of theory and practice can better help you learn, go and try it! If you want to continue to learn more related knowledge, please continue to follow the website, the editor will continue to work hard to bring you more practical articles!

Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.

*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.