ここでは、HMMの最尤推定で省略した計算過程について記述します。
\(Q\left(\boldsymbol{\theta}, \boldsymbol{\theta}^{\text {old }}\right)\)
$$
\begin{aligned}
p(\mathbf{X}, \mathbf{Z} | \boldsymbol{\theta})&=p\left(\mathbf{z}_{1} | \boldsymbol{\pi}\right)\left[\prod_{n=2}^{N} p\left(\mathbf{z}_{n} | \mathbf{z}_{n-1}, \mathbf{A}\right)\right] \prod_{m=1}^{N} p\left(\mathbf{x}_{m} | \mathbf{z}_{m}, \boldsymbol{\phi}\right)\qquad (13.10)\\
&=\prod_{k=1}^{K} \pi_{k}^{z_{1 k}}\left[ \prod_{n=2}^{N}\prod_{k=1}^{K} \prod_{j=1}^{K} A_{j k}^{z_{n-1, j} z_{n k}} \right] \prod_{n=1}^{N}\prod_{d=1}^{D} \prod_{k=1}^{K} \phi_{d k}^{x_{nd} z_{nk}}\\
\therefore\ln p(\mathbf{X}, \mathbf{Z} | \boldsymbol{\theta})
&=\sum_{k=1}^Kz_{1k}\ln\pi_k + \sum_{n=2}^N\sum_{k=1}^K\sum_{j=1}^Kz_{n-1,j}z_{nk}\ln A_{jk} + \sum_{n=1}^N\sum_{d=1}^D\sum_{k=1}^Kx_{nd}z_{nk}\ln\phi_{dk}
\end{aligned}
$$
であるから、これを \((13.12)\) に代入して、
$$
\begin{aligned}
Q\left(\boldsymbol{\theta}, \boldsymbol{\theta}^{\text {old }}\right)=&\sum_{\mathbf{Z}} p\left(\mathbf{Z} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right) \ln p(\mathbf{X}, \mathbf{Z} | \boldsymbol{\theta})\qquad (13.12)\\
=& \sum_{\mathbf{Z}} p\left(\mathbf{Z} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right)\sum_{k=1}^Kz_{1k}\ln\pi_k+\sum_{\mathbf{Z}} p\left(\mathbf{Z} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right)\sum_{n=2}^N\sum_{k=1}^K\sum_{j=1}^Kz_{n-1,j}z_{nk}\ln A_{jk}\\
&+\sum_{\mathbf{Z}} p\left(\mathbf{Z} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right)\sum_{n=1}^N\sum_{d=1}^D\sum_{k=1}^Kx_{nd}z_{nk}\ln\phi_{dk}
\end{aligned}
$$
ここで、\(\sum_{\mathbf{Z}} = \sum_{\mathbf{z_1},\mathbf{z_2},\ldots,\mathbf{z_N}}\) なので、それぞれ関係の無い部分を周辺化でき、
$$
\begin{aligned}
Q\left(\boldsymbol{\theta}, \boldsymbol{\theta}^{\text {old }}\right)=& \sum_{k=1}^K\sum_{\mathbf{z_1}} p\left(\mathbf{z_1} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right)z_{1k}\ln\pi_k+\sum_{n=2}^N\sum_{k=1}^K\sum_{j=1}^K\sum_{\mathbf{z_{n-1},z_n}}p\left(\mathbf{z_{n-1},z_n} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right)z_{n-1,j}z_{nk}\ln A_{jk}\\
&+\sum_{n=1}^N\sum_{d=1}^D\sum_{k=1}^K\sum_{\mathbf{z_n}}p\left(\mathbf{z_n} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right)x_{nd}z_{nk}\ln\phi_{dk}\\
\end{aligned}$$
となります。また、\(\gamma,\xi\) の定義を用いることで
$$\begin{aligned}
Q\left(\boldsymbol{\theta}, \boldsymbol{\theta}^{\text {old }}\right)=& \sum_{k=1}^K\left(\sum_{\mathbf{z_1}} \gamma\left(\mathbf{z}_1\right)z_{1k}\right)\ln\pi_k+\sum_{n=2}^N\sum_{k=1}^K\sum_{j=1}^K\left(\sum_{\mathbf{z_{n-1},z_n}}\xi\left(\mathbf{z_{n-1},z_{n}}\right) z_{n-1,j}z_{nk}\right)\ln A_{jk}\\
&+\sum_{n=1}^N\sum_{d=1}^D\sum_{k=1}^K\left(\sum_{\mathbf{z_n}}\gamma\left(\mathbf{z}_n\right)x_{nd}z_{nk}\right)\ln\phi_{dk}
\end{aligned}
$$
と書き直せます。この時、潜在変数が離散なので、\(\gamma,\xi\) はそれぞれ和が \(1\) となる \(K,\left(K\times K\right)\) 個の非負の数の集合を用いて以下のように記述できます。
$$
\begin{aligned} \gamma\left(z_{n k}\right) &=\sum_{\mathbf{z}} \gamma(\mathbf{z}) z_{n k} &(13.15)\\ \xi\left(z_{n-1, j}, z_{n k}\right) &=\sum_{\mathbf{z}} \gamma(\mathbf{z}) z_{n-1, j} z_{n k} &(13.16)\end{aligned}
$$
したがって、これらを用いて上記の式を書き直すと、
$$
\begin{aligned} Q\left(\boldsymbol{\theta}, \boldsymbol{\theta}^{\mathrm{old}}\right)=& \sum_{k=1}^{K} \gamma\left(z_{1 k}\right) \ln \pi_{k}+\sum_{n=2}^{N} \sum_{j=1}^{K} \sum_{k=1}^{K} \xi\left(z_{n-1, j}, z_{n k}\right) \ln A_{j k} \\ &+\sum_{n=1}^{N} \sum_{k=1}^{K}\gamma\left(z_{n k}\right) \sum_{d=1}^Dx_{nd} \ln \phi_{d k} \end{aligned}\qquad (13.17)
$$
Maximization step
ここでは、上記の \(Q\left(\boldsymbol{\theta}, \boldsymbol{\theta}^{\mathrm{old}}\right)\) 関数を各パラメータ \(\boldsymbol{\theta}\) に関して最大化する際のラグランジュの未定乗数法を書き下します。
\(\boldsymbol{\pi}\)
$$
\begin{aligned}
L\left(\boldsymbol{\pi},\boldsymbol{\theta}, \boldsymbol{\theta}^{\text { old }}\right) &= \sum_{k=1}^{K} \gamma\left(z_{1 k}\right) \ln \pi_{k} + \lambda_1\left(\sum_{k=1}^K\pi_k - 1\right)\\
\frac{\partial L}{\partial \pi_k}&=\frac{\gamma\left(z_{1 k}\right)}{\pi_k} + \lambda_1 = 0\quad \therefore \pi_k = -\frac{\gamma\left(z_{1 k}\right)}{\lambda_1}\\
\frac{\partial L}{\partial \lambda_1}&=\sum_{k=1}^K\pi_k - 1 = 0\quad \therefore\lambda_1 = -\sum_{k=1}^K\gamma\left(z_{1 k}\right)\\
\therefore\pi_k^{\star} &= \frac{\gamma\left(z_{1 k}\right)}{\sum_{j=1}^{K} \gamma\left(z_{1 j}\right)}
\end{aligned}
$$
\(\boldsymbol{A}\)
$$\begin{aligned}
L\left(\boldsymbol{A},\boldsymbol{\theta}, \boldsymbol{\theta}^{\text { old }}\right)
& = \sum_{n=2}^{N} \sum_{j=1}^{K} \sum_{k=1}^{K} \xi\left(z_{n-1, j}, z_{n k}\right) \ln A_{j k} + \sum_{j=1}^K\lambda_{2,j}\left(\sum_{k=1}^K A_{jk} - 1\right)\\
\frac{\partial L}{\partial A_{jk}} &= \sum_{n=2}^{N}\frac{\xi\left(z_{n-1, j}, z_{n k}\right)}{A_{jk}} + \lambda_{2,j} = 0\quad \therefore A_{jk} = -\frac{\sum_{n=2}^N\xi\left(z_{n-1, j}, z_{n k}\right)}{\lambda_{2,j}}\\
\frac{\partial L}{\partial \lambda_{2,j}} &= \sum_{k=1}^K A_{jk} - 1 = 0\quad \therefore \lambda_{2,j} = -\sum_{k=1}^K\sum_{n=2}^N\xi\left(z_{n-1, j}, z_{n k}\right)\\
\therefore A_{jk}^{\star}&=\frac{\sum_{n=2}^{N} \xi\left(z_{n-1, j}, z_{n k}\right)}{\sum_{l=1}^{K} \sum_{n=2}^{N} \xi\left(z_{n-1, j}, z_{n l}\right)}
\end{aligned}$$
\(\boldsymbol{\phi}\)
$$\begin{aligned}
L\left(\boldsymbol{\phi},\boldsymbol{\theta}, \boldsymbol{\theta}^{\text { old }}\right)
& = \sum_{n=1}^{N} \sum_{k=1}^{K}\gamma\left(z_{n k}\right) \sum_{i=1}^Dx_{ni} \ln \phi_{i k} + \sum_{k=1}^K\lambda_{3,k}\left( \sum_{i=1}^D\phi_{ik} - 1\right)\\
\frac{\partial L}{\partial\phi_{ik}} &= \frac{\sum_{n=1}^N\gamma\left(z_{n k}\right)x_{ni}}{\phi_{ik}} + \lambda_{3,k}=0\quad \therefore \phi_{ik} = -\frac{\sum_{n=1}^N\gamma\left(z_{n k}\right)x_{ni}}{\lambda_{3,k}}\\
\frac{\partial L}{\partial\lambda_{3,k}} &= \sum_{i=1}^D\phi_{ik} - 1 = 0 \quad \therefore \lambda_{3,k} = -\sum_{i=1}^D\sum_{n=1}^N\gamma\left(z_{n k}\right)x_{ni} = -\sum_{i=1}^D\gamma\left(z_{n k}\right)\\
\therefore \phi_{ik}^{\star} &= \frac{\sum_{n=1}^N\gamma\left(z_{n k}\right)x_{ni}}{\sum_{i=1}^D\gamma\left(z_{n k}\right)}
\end{aligned}$$
Expectation step
$$
\begin{aligned}
\gamma\left(\mathbf{z}_{n}\right) &=p\left(\mathbf{z}_{n} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right) &(13.13)\\ \xi\left(\mathbf{z}_{n-1}, \mathbf{z}_{n}\right) &=p\left(\mathbf{z}_{n-1}, \mathbf{z}_{n} | \mathbf{X}, \boldsymbol{\theta}^{\text {old }}\right) &(13.14)\\
\alpha\left(\mathbf{z}_{n}\right) & \equiv p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n}, \mathbf{z}_{n}\right) & (13.34)\\
\beta\left(\mathbf{z}_{n}\right) & \equiv p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n}\right) & (13.35)\\
\end{aligned}
$$
forward-algorithm \((\alpha)\)
$$\begin{aligned}
\alpha\left(\mathbf{z}_{n}\right)
& =p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n}, \mathbf{z}_{n}\right) \\
& =p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n} | \mathbf{z}_{n}\right) p\left(\mathbf{z}_{n}\right) \\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1} | \mathbf{z}_{n}\right) p\left(\mathbf{z}_{n}\right) \quad (\because \text{conditional independence})\\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1}, \mathbf{z}_{n}\right) \\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) \sum_{\mathbf{z}_{n-1}} p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1}, \mathbf{z}_{n-1}, \mathbf{z}_{n}\right) \quad (\because \text{demarginalization})\\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) \sum_{\mathbf{z}_{n-1}} p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1}, \mathbf{z}_{n} | \mathbf{z}_{n-1}\right) p\left(\mathbf{z}_{n-1}\right) \\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) \sum_{\mathbf{z}_{n-1}} p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1} | \mathbf{z}_{n-1}\right) p\left(\mathbf{z}_{n} | \mathbf{z}_{n-1}\right) p\left(\mathbf{z}_{n-1}\right) \quad (\because \text{conditional independence})\\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) \sum_{\mathbf{z}_{n-1}} p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1}, \mathbf{z}_{n-1}\right) p\left(\mathbf{z}_{n} | \mathbf{z}_{n-1}\right) \\
& =p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) \sum_{\mathbf{z}_{n-1}} \alpha(\mathbf{z}_{n-1})p(\mathbf{z} | \mathbf{z}_{n-1})\\
\end{aligned}$$
backward-algorithm \((\beta)\)
$$\begin{aligned}
\beta\left(\mathbf{z}_{n}\right)
& =p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n}\right) \\
& =\sum_{\mathbf{z}_{n+1}} p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N}, \mathbf{z}_{n+1} | \mathbf{z}_{n}\right) \quad (\because \text{demarginalization})\\
& =\sum_{\mathbf{z}_{n+1}} p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n}, \mathbf{z}_{n+1}\right) p\left(\mathbf{z}_{n+1} | \mathbf{z}_{n}\right) \\
& =\sum_{\mathbf{z}_{n+1}} p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n+1}\right) p\left(\mathbf{z}_{n+1} | \mathbf{z}_{n}\right) \quad (\because \text{conditional independence})\\
& =\sum_{\mathbf{z}_{n+1}} p\left(\mathbf{x}_{n+2}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n+1}\right) p\left(\mathbf{x}_{n+1} | \mathbf{z}_{n+1}\right) p\left(\mathbf{z}_{n+1} | \mathbf{z}_{n}\right) \quad (\because \text{conditional independence})\\
& =\sum_{\mathbf{z}_{n+1}} \beta(\mathbf{z}_{n+1})p(\mathbf{x}_{n+1}|\mathbf{z}_{n+1})p(\mathbf{z}_{n+1}|\mathbf{z}_n)\qquad (13.38)
\end{aligned}$$
\(\gamma,\xi\)
$$
\begin{aligned}
\gamma\left(\mathbf{z}_{n}\right)
&=p\left(\mathbf{z}_{n} | \mathbf{X}\right)=\frac{p\left(\mathbf{X} | \mathbf{z}_{n}\right) p\left(\mathbf{z}_{n}\right)}{p(\mathbf{X})}\quad (\because \text{Bayes' theorem}) &(13.32)\\
&=\frac{p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n}, \mathbf{z}_{n}\right) p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n}\right)}{p(\mathbf{X})} \quad (\because \text{conditional independence})\\
&= \frac{\alpha\left(\mathbf{z}_{n}\right) \beta\left(\mathbf{z}_{n}\right)}{p(\mathbf{X})} & (13.33)
\end{aligned}
$$
$$\begin{aligned}
\xi\left(\mathbf{z}_{n-1}, \mathbf{z}_{n}\right)
&=p\left(\mathbf{z}_{n-1}, \mathbf{z}_{n} | \mathbf{X}\right) \\
&=\frac{p(\mathbf{X} | \mathbf{z}_{n-1}, \mathbf{z}_{n}) p\left(\mathbf{z}_{n-1}, \mathbf{z}_{n}\right)}{p(\mathbf{X})} \quad (\because \text{Bayes' theorem})\\
&=\frac{p\left(\mathbf{x}_{1}, \ldots, \mathbf{x}_{n-1} | \mathbf{z}_{n-1}\right) p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) p\left(\mathbf{x}_{n+1}, \ldots, \mathbf{x}_{N} | \mathbf{z}_{n}\right) p\left(\mathbf{z}_{n} | \mathbf{z}_{n-1}\right) p\left(\mathbf{z}_{n-1}\right)}{p(\mathbf{X})} \quad (\because \text{conditional independence})\\
&=\frac{\alpha\left(\mathbf{z}_{n-1}\right) p\left(\mathbf{x}_{n} | \mathbf{z}_{n}\right) p\left(\mathbf{z}_{n} | \mathbf{z}_{n-1}\right) \beta\left(\mathbf{x}_{n}\right)}{p(\mathbf{X})}\qquad (13.43)
\end{aligned}$$