• postgresql源码学习(41)—— 崩溃恢复③ - 日志来源


    一、 日志来源有哪些?

           前篇中获取到了恢复起点,即开始回放日志的位置,后面我们就可以开始读取并应用日志了。不过在此之前还有一个问题,从哪里获取WAL日志呢?

           第一篇中提到过,Startup进程的3大作用——崩溃恢复、从库日志应用、PITR,对于不同的用途,也有不同的日志来源。

           pg维护一个专门的状态机(称作state machine)表示待应用日志的来源,用于在不同时段,从不同的日志源获取WAL日志。对应代码如下(在xlog.c):

    1. /*
    2. * Codes indicating where we got a WAL file from during recovery, or where
    3. * to attempt to get one.
    4. */
    5. typedef enum
    6. {
    7. XLOG_FROM_ANY = 0, /* request to read WAL from any source */
    8. XLOG_FROM_ARCHIVE, /* restored using restore_command,从归档日志中获取 */
    9. XLOG_FROM_PG_WAL, /* existing file in pg_wal,从pg_wal目录获取 */
    10. XLOG_FROM_STREAM /* streamed from primary,备库从主库获取 */
    11. } XLogSource;
    12. /* human-readable names for XLogSources, for debugging output */
    13. static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};

    二、 WaitForWALToBecomeAvailable函数

           pg会根据当前状态确定初始日志源,当日志源读取发生错误,或者状态发生改变时,会切换到其他日志源。这个功能对应函数是 WaitForWALToBecomeAvailable(在xlog.c文件)

    1. 主要参数

    • fetching_ckpt:若为true,说明正在读取检查点记录,并且应该准备从该点之后的RedoStartLSN开始读取WAL日志(If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should prepare to read WAL starting from RedoStartLSN after this.)
    • RecPtr:不一定是指向我们感兴趣的记录开始位置,也可能指向页头或段头('RecPtr' might not point to the beginning of the record we're interested in, it might also point to the page or segment header.)
    • tliRecPtr:如果RecPtr指向页头或段头,则tliRecPtr指向我们感兴趣的WAL记录位置。它用于决定流复制从哪个时间线获取WAL日志(In that case, 'tliRecPtr' is the position of the WAL record we're interested in. It is used to decide which timeline to stream the requested WAL from.)

    2. 返回值

    • 如果不是从库模式,且记录不是立即可用的,函数返回false。
    • 如果是从库模式,则一直等到记录可用
    • 当请求的日志记录状态变为可用时,该函数打开包含该记录的文件,并返回true
    • 当遇到从库模式结束(end of standby mode,即用户将从库提升为主库),且无更多可用WAL日志时,返回false
    1. /*
    2. * Open the WAL segment containing WAL location 'RecPtr'.
    3. */
    4. static bool
    5. WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
    6. bool fetching_ckpt, XLogRecPtr tliRecPtr)
    7. {
    8. static TimestampTz last_fail_time = 0;
    9. TimestampTz now;
    10. bool streaming_reply_sent = false;
    11. /* 首先初始化currentSource */
    12. /* 如果不是从归档获取日志(即当前在做崩溃恢复) */
    13. if (!InArchiveRecovery)
    14. /* 当前日志源设置为从pg_wal目录直接读取wal日志 */
    15. currentSource = XLOG_FROM_PG_WAL;
    16. /* 如果是从归档获取日志(即当前在做PITR或为从库) */
    17. else if (currentSource == XLOG_FROM_ANY ||
    18. (!StandbyMode && currentSource == XLOG_FROM_STREAM))
    19. {
    20. /* 当前日志源设置为从归档读取日志 */
    21. lastSourceFailed = false;
    22. currentSource = XLOG_FROM_ARCHIVE;
    23. }
    24. for (;;)
    25. {
    26. XLogSource oldSource = currentSource;
    27. bool startWalReceiver = false;
    28. /*
    29. * 循环检查,如果在日志读取中发生了错误,则考虑开始切换日志源
    30. */
    31. if (lastSourceFailed)
    32. {
    33. /* 判断当前日志源 */
    34. switch (currentSource)
    35. {
    36. case XLOG_FROM_ARCHIVE:
    37. case XLOG_FROM_PG_WAL:
    38. /*
    39. * 检查是否存在trigger文件。注意pg只会在遇到报错时检查该项,因此当你创建trigger文件时,pg仍然在会主从切换前尽可能多地应用归档和pg_wal中的日志。
    40. * 若是从库且存在trigger文件,关闭WalReceiver进程,提升为主库,函数返回false(对应前面关于返回值介绍的第4条)
    41. */
    42. if (StandbyMode && CheckForStandbyTrigger())
    43. {
    44. ShutdownWalRcv();
    45. return false;
    46. }
    47. /*
    48. * Not in standby mode, and we've now tried the archive and pg_wal.
    49. * 若非从库,且记录不是立即可用的,函数返回false(对应前面关于返回值介绍的第1条)
    50. */
    51. if (!StandbyMode)
    52. return false;
    53. /*
    54. * Move to XLOG_FROM_STREAM state, and set to start a walreceiver if necessary. 如果上面两种情况都不符合,说明当前是从库且未检查到trigger文件。则日志源设为XLOG_FROM_STREAM,并启动WalReceiver进程,退出switch语句。
    55. */
    56. currentSource = XLOG_FROM_STREAM;
    57. startWalReceiver = true;
    58. break;
    59. /* 如果在日志源为XLOG_FROM_STREAM时发生报错 */
    60. case XLOG_FROM_STREAM:
    61. /* 首先这种情况只可能在从库发生 */
    62. Assert(StandbyMode);
    63. /*
    64. * 在退出 XLOG_FROM_STREAM 状态前,确保WalReceiver进程已关闭,避免覆盖从归档中还原的WAL日志
    65. */
    66. if (WalRcvStreaming())
    67. ShutdownWalRcv();
    68. /*
    69. * Before we sleep, re-scan for possible new timelines if
    70. * we were requested to recover to the latest timeline.
    71. * 在sleep之前,再次查询是否有新的时间线,我们是否有被请求恢复到最新时间线。如果有,则进入XLOG_FROM_ARCHIVE状态重新开始,退出switch语句
    72. */
    73. if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
    74. {
    75. if (rescanLatestTimeLine())
    76. {
    77. currentSource = XLOG_FROM_ARCHIVE;
    78. break;
    79. }
    80. }
    81. /* XLOG_FROM_STREAM is the last state in our state
    82. * machine, so we've exhausted all the options for
    83. * obtaining the requested WAL. We're going to loop back
    84. * and retry from the archive, but if it hasn't been long
    85. * since last attempt, sleep wal_retrieve_retry_interval
    86. * milliseconds to avoid busy-waiting.
    87. * XLOG_FROM_STREAM是状态机的最终状态,说明已经尝试尽了所有可能获取WAL日志的日志来源。此时将进入循环,并且重新尝试从归档中获取日志。但如果距离上次尝试的时间还不够长,我们会休眠wal_retrieve_retry_interval 参数指定的毫秒数,避免尝试过于频繁。
    88. */
    89. now = GetCurrentTimestamp();
    90. if (!TimestampDifferenceExceeds(last_fail_time, now,
    91. wal_retrieve_retry_interval))
    92. {
    93. long wait_time;
    94. wait_time = wal_retrieve_retry_interval -
    95. TimestampDifferenceMilliseconds(last_fail_time, now);
    96. (void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
    97. WL_LATCH_SET | WL_TIMEOUT |
    98. WL_EXIT_ON_PM_DEATH,
    99. wait_time,
    100. WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
    101. ResetLatch(&XLogCtl->recoveryWakeupLatch);
    102. now = GetCurrentTimestamp();
    103. /* Handle interrupt signals of startup process */
    104. HandleStartupProcInterrupts();
    105. }
    106. /* 记录错误时间,从XLOG_FROM_ARCHIVE状态重新开始,退出switch语句 */
    107. last_fail_time = now;
    108. currentSource = XLOG_FROM_ARCHIVE;
    109. break;
    110. default:
    111. elog(ERROR, "unexpected WAL source %d", currentSource);
    112. }
    113. }
    114. /* 如果没有遇到错误,且日志来源为pg_wal */
    115. else if (currentSource == XLOG_FROM_PG_WAL)
    116. {
    117. /*
    118. * We just successfully read a file in pg_wal. We prefer files in
    119. * the archive over ones in pg_wal, so try the next file again
    120. * from the archive first.
    121. * 说明成功从pg_wal中读取到了文件。在PITR或从库模式下,我们更倾向于从归档中获取日志,因此修改日志来源,下一个日志尝试从归档中获取。
    122. */
    123. if (InArchiveRecovery)
    124. currentSource = XLOG_FROM_ARCHIVE;
    125. }
    126. /* 如果新旧日志源不相等,记一个debug信息,说明日志源修改过 */
    127. if (currentSource != oldSource)
    128. elog(DEBUG2, "switched WAL source from %s to %s after %s",
    129. xlogSourceNames[oldSource], xlogSourceNames[currentSource],
    130. lastSourceFailed ? "failure" : "success");
    131. /*
    132. * We've now handled possible failure. Try to read from the chosen
    133. * source. 下面处理可能的失败情况,尝试从选择的日志源读取。略。
    134. */
    135. }

    参考

    PostgreSQL技术内幕:事务处理深度探索》第4章

  • 相关阅读:
    IDEA中添加servlet模板
    白酒:中国的酒文化的传承与发扬
    26 mysql 索引的存储更新删除
    开关电源EMC整改案例-辐射骚扰
    基于视觉显著性的车载单目相机自运动估计及前车尺度估计方法
    【Python】Numpy生成等差数组
    【算法1-3】暴力枚举——组合的输出
    LeetCode每日一题——522. 最长特殊序列 II
    LeetCode - Medium - 62. Unique Paths
    Real closed field
  • 原文地址:https://blog.csdn.net/Hehuyi_In/article/details/126494891