@article{oai:muroran-it.repo.nii.ac.jp:00010063, author = {KOBAYASHI, Yosuke and 小林, 洋介 and KONDO, Kazuhiro and 近藤, 和弘}, journal = {Applied Acoustics}, month = {Dec}, note = {application/pdf, Objective measures of intelligibility are preferable to subjective ones in the evaluation of speech systems used in real environments. In this study, subjective evaluations of eight types of indoor noise environments were used to compare four intelligibility indices to objectively evaluate Japanese speech intelligibility. These indices were as follows: short-time objective intelligibility (STOI), which has been widely used in recent years; speech intelligibility prediction based on mutual information (SIMI), which is derived from STOI; extended STOI (ESTOI), which is an improved version of STOI; and frequency weighted segmental signal to noise ratio (fwSNRseg), which incorporates both time and frequency components. These indices were subjectively evaluated in the eight noisy environments included in the corpus and environments for noisy speech recognition 4 (CENSREC-4) dataset using the familiarity-controlled word lists 2007 (FW07) as the speech data for the intelligibility evaluations. The results of the subjective evaluation of the four indices were then used to train predictive intelligibility estimation models. We evaluated the model performance using cross validation, which involved repeated training of seven of the eight environments and predicting the speech intelligibility under the remaining one environment. In the simulation results, the prediction accuracy of the SIMI index was significantly higher than that of the other indices, with a root mean squared error of 0.160 and a correlation coefficient of 0.934.}, title = {Japanese speech intelligibility estimation and prediction using objective intelligibility indices under noisy and reverberant conditions}, volume = {156}, year = {2019}, yomi = {コバヤシ, ヨウスケ and コンドウ, カズヒロ} }