Network Security Internet Technology Development Database Servers Mobile Phone Android Software Apple Software Computer Software News IT Information

In addition to Weibo, there is also WeChat

Please pay attention

WeChat public account

Shulou

Walsender Analysis of background process of PostgreSQL

2025-01-16 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Database >

Share

Shulou(Shulou.com)05/31 Report--

< numParams; i++) paramTypes[i] = pq_getmsgint(&input_message, 4); } pq_getmsgend(&input_message); //执行解析 exec_parse_message(query_string, stmt_name, paramTypes, numParams); } break; case 'B': /* bind */ //------------- 绑定 forbidden_in_wal_sender(firstchar); /* Set statement_timestamp() */ SetCurrentStatementStartTimestamp(); /* * this message is complex enough that it seems best to put * the field extraction out-of-line * 该消息看起来比较复杂,看起来最好的做法是提取字段 */ exec_bind_message(&input_message); break; case 'E': /* execute */ { //------------ 执行 const char *portal_name; int max_rows; forbidden_in_wal_sender(firstchar); /* Set statement_timestamp() */ SetCurrentStatementStartTimestamp(); portal_name = pq_getmsgstring(&input_message); max_rows = pq_getmsgint(&input_message, 4); pq_getmsgend(&input_message); exec_execute_message(portal_name, max_rows); } break; case 'F': /* fastpath function call */ //----------- 函数调用 forbidden_in_wal_sender(firstchar); /* Set statement_timestamp() */ SetCurrentStatementStartTimestamp(); /* Report query to various monitoring facilities. */ pgstat_report_activity(STATE_FASTPATH, NULL); set_ps_display("", false); /* start an xact for this function invocation */ start_xact_command(); /* * Note: we may at this point be inside an aborted * transaction. We can't throw error for that until we've * finished reading the function-call message, so * HandleFunctionRequest() must check for it after doing so. * Be careful not to do anything that assumes we're inside a * valid transaction here. */ /* switch back to message context */ MemoryContextSwitchTo(MessageContext); HandleFunctionRequest(&input_message); /* commit the function-invocation transaction */ finish_xact_command(); send_ready_for_query = true; break; case 'C': /* close */ { //---------- 关闭 int close_type; const char *close_target; forbidden_in_wal_sender(firstchar); close_type = pq_getmsgbyte(&input_message); close_target = pq_getmsgstring(&input_message); pq_getmsgend(&input_message); switch (close_type) { case 'S': if (close_target[0] != '\0') DropPreparedStatement(close_target, false); else { /* special-case the unnamed statement */ drop_unnamed_stmt(); } break; case 'P': { Portal portal; portal = GetPortalByName(close_target); if (PortalIsValid(portal)) PortalDrop(portal, false); } break; default: ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid CLOSE message subtype %d", close_type))); break; } if (whereToSendOutput == DestRemote) pq_putemptymessage('3'); /* CloseComplete */ } break; case 'D': /* describe */ { //------------- 描述比如\d等命令 int describe_type; const char *describe_target; forbidden_in_wal_sender(firstchar); /* Set statement_timestamp() (needed for xact) */ SetCurrentStatementStartTimestamp(); describe_type = pq_getmsgbyte(&input_message); describe_target = pq_getmsgstring(&input_message); pq_getmsgend(&input_message); switch (describe_type) { case 'S': exec_describe_statement_message(describe_target); break; case 'P': exec_describe_portal_message(describe_target); break; default: ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid DESCRIBE message subtype %d", describe_type))); break; } } break; case 'H': /* flush */ //--------- flush 刷新 pq_getmsgend(&input_message); if (whereToSendOutput == DestRemote) pq_flush(); break; case 'S': /* sync */ //---------- Sync 同步 pq_getmsgend(&input_message); finish_xact_command(); send_ready_for_query = true; break; /* * 'X' means that the frontend is closing down the socket. EOF * means unexpected loss of frontend connection. Either way, * perform normal shutdown. */ case 'X': case EOF: /* * Reset whereToSendOutput to prevent ereport from attempting * to send any more messages to client. */ if (whereToSendOutput == DestRemote) whereToSendOutput = DestNone; /* * NOTE: if you are tempted to add more code here, DON'T! * Whatever you had in mind to do should be set up as an * on_proc_exit or on_shmem_exit callback, instead. Otherwise * it will fail to be called during other backend-shutdown * scenarios. */ proc_exit(0); case 'd': /* copy data */ case 'c': /* copy done */ case 'f': /* copy fail */ /* * Accept but ignore these messages, per protocol spec; we * probably got here because a COPY failed, and the frontend * is still sending data. */ break; default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid frontend message type %d", firstchar))); } } /* end of input-reading loop */}三、跟踪分析 在主节点上用gdb跟踪postmaster,在PostgresMain上设置断点后启动standby节点,进入断点 [xdb@localhost ~]$ ps -ef|grep postgrexdb 1263 1 0 14:20 pts/0 00:00:00 /appdb/xdb/pg11.2/bin/postgres(gdb) b PostgresMainBreakpoint 1 at 0x8bf9df: file postgres.c, line 3660.(gdb) set follow-fork-mode child(gdb) cContinuing.[New process 1332][Thread debugging using libthread_db enabled]Using host libthread_db library "/lib64/libthread_db.so.1".[Switching to Thread 0x7fb3885d98c0 (LWP 1332)]Breakpoint 1, PostgresMain (argc=1, argv=0x1aa4c78, dbname=0x1aa4b68 "", username=0x1aa4b40 "replicator") at postgres.c:36603660 volatile bool send_ready_for_query = true;(gdb) 1.初始化相关变量 注意变量IsUnderPostmaster,如为T则表示该进程为postmaster的子进程 (gdb) p *argv$1 = 0xc27715 "postgres"(gdb) n3661 bool disable_idle_in_transaction_timeout = false;(gdb) 3664 if (!IsUnderPostmaster)(gdb) p IsUnderPostmaster$2 = true 2.初始化进程信息,设置进程状态,初始化GUC参数 (gdb) n3667 SetProcessingMode(InitProcessing);(gdb) 3672 if (!IsUnderPostmaster)(gdb) p InitProcessing$3 = InitProcessing 3.解析命令行参数并作相关校验 (gdb) n3678 process_postgres_switches(argc, argv, PGC_POSTMASTER, &dbname);(gdb) 3681 if (dbname == NULL)(gdb) p dbname$4 = 0x1aa4b68 ""(gdb) p username$5 = 0x1aa4b40 "replicator"(gdb) n3692 if (!IsUnderPostmaster)(gdb) 4.如为walsender进程,则调用WalSndSignals初始化,否则执行其他信号初始化 3712 if (am_walsender)(gdb) 3713 WalSndSignals();(gdb) 5.初始化BlockSig/UnBlockSig/StartupBlockSig (gdb) 3751 pqinitmask();(gdb) 3753 if (IsUnderPostmaster)(gdb) 3756 sigdelset(&BlockSig, SIGQUIT);(gdb) (gdb) 3759 PG_SETMASK(&BlockSig); /* block everything except SIGQUIT */(gdb) 6.非子进程(仍为postmaster进程),则检查数据库路径/切换路径/创建锁定文件等操作 N/A 7.调用BaseInit执行基本的初始化 3785 BaseInit();(gdb) 8.调用InitProcess/InitPostgres初始化进程 3797 InitProcess();(gdb) 3801 PG_SETMASK(&UnBlockSig);(gdb) 3810 InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, false);(gdb) 9.重置内存上下文,处理加载库和前后台消息交互等 (gdb) 3819 if (PostmasterContext)(gdb) 3821 MemoryContextDelete(PostmasterContext);(gdb) P PostmasterContext$6 = (MemoryContext) 0x1a78c60(gdb) P *PostmasterContext$7 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, methods = 0xc93260 , parent = 0x1a73aa0, firstchild = 0x1a9a700, prevchild = 0x1a7ac70, nextchild = 0x1a75ab0, name = 0xc2622a "Postmaster", ident = 0x0, reset_cbs = 0x0}(gdb) n3822 PostmasterContext = NULL;(gdb) 3825 SetProcessingMode(NormalProcessing);(gdb) 3831 BeginReportingGUCOptions();(gdb) 3837 if (IsUnderPostmaster && Log_disconnections)(gdb) p Log_disconnections$8 = false(gdb) p$9 = false(gdb) n3841 if (am_walsender)(gdb) 3842 InitWalSender();(gdb) 3848 process_session_preload_libraries();(gdb) 3853 if (whereToSendOutput == DestRemote)(gdb) 3857 pq_beginmessage(&buf, 'K');(gdb) 3858 pq_sendint32(&buf, (int32) MyProcPid);(gdb) 3859 pq_sendint32(&buf, (int32) MyCancelKey);(gdb) 3860 pq_endmessage(&buf);(gdb) 3865 if (whereToSendOutput == DestDebug)(gdb) 10.初始化内存上下文 (gdb) 3874 MessageContext = AllocSetContextCreate(TopMemoryContext,(gdb) 3884 row_description_context = AllocSetContextCreate(TopMemoryContext,(gdb) 3887 MemoryContextSwitchTo(row_description_context);(gdb) 3888 initStringInfo(&row_description_buf);(gdb) 3889 MemoryContextSwitchTo(TopMemoryContext);(gdb) 3894 if (!IsUnderPostmaster)(gdb) 3919 if (sigsetjmp(local_sigjmp_buf, 1) != 0)(gdb) 4027 PG_exception_stack = &local_sigjmp_buf;(gdb) 4029 if (!ignore_till_sync)(gdb) 4030 send_ready_for_query = true; /* initially, or after error */(gdb) 11.进入主循环 11.1切换至MessageContext上下文 (gdb) 4042 doing_extended_query_message = false;(gdb) 4048 MemoryContextSwitchTo(MessageContext);(gdb) 4049 MemoryContextResetAndDeleteChildren(MessageContext); 11.2初始化输入的消息 (gdb) 4051 initStringInfo(&input_message);(gdb) 4057 InvalidateCatalogSnapshotConditionally();(gdb) p input_message$10 = {data = 0x1a78d78 "", len = 0, maxlen = 1024, cursor = 0}(gdb) 11.3给客户端发送可以执行查询等消息 (gdb) n4072 if (send_ready_for_query)(gdb) p send_ready_for_query$12 = true(gdb) n4074 if (IsAbortedTransactionBlockState())(gdb) 4087 else if (IsTransactionOrTransactionBlock())(gdb) 4102 ProcessCompletedNotifies();(gdb) 4103 pgstat_report_stat(false);(gdb) 4105 set_ps_display("idle", false);(gdb) 4106 pgstat_report_activity(STATE_IDLE, NULL);(gdb) 4109 ReadyForQuery(whereToSendOutput);(gdb) 4110 send_ready_for_query = false;(gdb) 11.4读取命令 命令是IDENTIFY_SYSTEM,判断系统标识是否OK firstchar ->

ASCII 81-> letter'Q'

(gdb) 4119 DoingCommandRead = true; (gdb) 4124 firstchar = ReadCommand (& input_message); (gdb) 4135 CHECK_FOR_INTERRUPTS (); (gdb) p input_message$13 = {data = 0x1a78d78 "IDENTIFY_SYSTEM", len = 16, maxlen = 1024, cursor = 0} (gdb) p firstchar$14 = 81 (gdb) $15 = 81 (gdb) n4136 DoingCommandRead = false (gdb) 4141 if (disable_idle_in_transaction_timeout) (gdb) 4151 if (ConfigReloadPending) (gdb) 4161 if (ignore_till_sync & & firstchar! = EOF) (gdb)

11.5 perform relevant actions according to command type

Walsender-> execute the exec_replication_command command

(gdb) 4164 switch (firstchar) (gdb) 4171 SetCurrentStatementStartTimestamp (); (gdb) 4173 query_string = pq_getmsgstring (& input_message); (gdb) 4174 pq_getmsgend (& input_message) (gdb) p query_string$16 = 0x1a78d78 "IDENTIFY_SYSTEM" (gdb) n4176 if (am_walsender) (gdb) 4178 if (! exec_replication_command (query_string)) (gdb) 4184 send_ready_for_query = true; (gdb) 4186 break (gdb) 4411} / * end of input-reading loop * / (gdb)

Continue the loop, receive the command, and the second command is START_REPLICATION

(gdb) 4124 firstchar = ReadCommand (& input_message); (gdb) 4135 CHECK_FOR_INTERRUPTS (); (gdb) p input_message$18 = {data = 0x1a78d78 "START_REPLICATION 0/5D000000 TIMELINE 16", len = 41, maxlen = 1024, cursor = 0} (gdb) p firstchar$19 = 81.. 4164 switch (firstchar) (gdb) n4171 SetCurrentStatementStartTimestamp () (gdb) 4173 query_string = pq_getmsgstring (& input_message); (gdb) 4174 pq_getmsgend (& input_message) (gdb) 4176 if (am_walsender) (gdb) p query_string$20 = 0x1a78d78 "START_REPLICATION 0/5D000000 TIMELINE 16" (gdb) p input_message$21 = {data = 0x1a78d78 "START_REPLICATION 0/5D000000 TIMELINE 16", len = 41, maxlen = 1024, cursor = 41} (gdb) n4178 if (! exec_replication_command (query_string)) (gdb)

When replication begins, the master node uses psql to connect to the database and execute the sql statement. The child process will receive the relevant signal and perform the relevant processing.

Execute script

[xdb@localhost] $psql-d testdbpsql (11.2) Type "help" for help.testdb=# drop table T1

Child process output

(gdb) Program received signal SIGUSR1, User defined signal 1.0x00007fb38696c903 in _ _ epoll_wait_nocancel () from / lib64/libc.so.6 (gdb) Single stepping until exit from function _ epoll_wait_nocancel,which has no line number information.procsignal_sigusr1_handler (postgres_signal_arg=32766) at procsignal.c:262262 {(gdb) n263 int save_errno = errno (gdb) Program received signal SIGTRAP, Trace/breakpoint trap.0x00007fb3881eecd0 in _ _ errno_location () from / lib64/libpthread.so.0 (gdb) Single stepping until exit from function _ errno_location,which has no line number information.procsignal_sigusr1_handler (postgres_signal_arg=10) at procsignal.c:265265 if (CheckProcSignal (PROCSIG_CATCHUP_INTERRUPT)) (gdb)

DONE!

After DEBUG exits gdb, psql session crash: (

[xdb@localhost] $psql-d testdbpsql (11.2) Type "help" for help.testdb=# drop table T1 politics warning: terminating connection because of crash of another server processDETAIL: The postmaster has commanded this server process to roll back the current transaction and exit, because another server process exited abnormally and possibly corrupted shared memory.HINT: In a moment you should be able to reconnect to the database and repeat your command.server closed the connection unexpectedly This probably means the server terminated abnormally before or while processing the request.The connection to the server was lost. Attempting reset: failed.! > at this point, the study of "walsender Analysis of the background process of PostgreSQL" is over. I hope to be able to solve your doubts. The collocation of theory and practice can better help you learn, go and try it! If you want to continue to learn more related knowledge, please continue to follow the website, the editor will continue to work hard to bring you more practical articles!

Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.

Views: 0

*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.

Share To

Database

Wechat

© 2024 shulou.com SLNews company. All rights reserved.

12
Report