This paper proposes a novel algorithm to jointly determine the structure and the parameters of a posteriori probability model based on neural networks (NNs). It makes use of well-known ideas of pruning, splitting, and merging neural components and takes advantage of the probabilistic interpretation of these components. The algorithm, so called a posteriori probability model selection (PPMS), is applied to an NN architecture called the generalized softmax perceptron (GSP) whose outputs can be understood as probabilities although results shown can be extended to more general network architectures. Learning rules are derived from the application of the expectation-maximization algorithm to the GSP-PPMS structure. Simulation results show the advantages of the proposed algorithm with respect to other schemes. {\^A}{\textcopyright} 2005 IEEE.

}, keywords = {Algorithms, Automated, Biological, Breast Neoplasms, Computer simulation, Computer-Assisted, Computing Methodologies, Decision Support Techniques, Diagnosis, Estimation, Expectation-maximization, Generalized Softmax Perceptron (GSP), Humans, Mathematical models, Model selection, Models, Neural Networks (Computer), Neural networks, Numerical Analysis, Objective function, Pattern recognition, Posterior probability, Probability, Statistical, Stochastic Processes, algorithm, article, artificial neural network, automated pattern recognition, biological model, breast tumor, classification, cluster analysis, computer analysis, computer assisted diagnosis, decision support system, evaluation, human, mathematical computing, methodology, statistical model, statistics}, issn = {10459227}, doi = {10.1109/TNN.2005.849826}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-23044459586\&partnerID=40\&md5=f00e7d86a625cfc466373a2a938276d0}, author = {J I Arribas and Jes{\'u}s Cid-Sueiro} } @conference {arribas2003neural, title = {Neural posterior probabilities for microcalcification detection in breast cancer diagnoses}, booktitle = {Neural Engineering, 2003. Conference Proceedings. First International IEEE EMBS Conference on}, year = {2003}, pages = {660{\textendash}663}, publisher = {IEEE}, organization = {IEEE}, abstract = {We apply the a Posteriori Probability Model Selection (PPMS) algorithm with the help of Generalized Softmax Perceptron (GSP) neural architecture in order to obtain estimates of the posterior class probabilities at its outputs, in the binary problem of microcalcification detection in a hospital digitalized mammogram database. We first detect windowed images with high probability to belong to the class microcalcification is present, then we locally segment the shape of the calcifications, and finally show the segmented microcalcifications to the radiologist. The segmented images together with the posterior probabilities for each window image can be employed as a valuable information to help predicting a breast diagnosis, in order to distinguish between benignant calcium deposit and malignant accumulation, that is, breast carcinoma.}, doi = {https://doi.org/10.1109/CNE.2003.1196915}, url = {https://ieeexplore.ieee.org/abstract/document/1196915}, author = {J I Arribas and Carlos Alberola-Lopez and Mateos-Marcos, A and Jes{\'u}s Cid-Sueiro} } @article {409, title = {Cost functions to estimate a posteriori probabilities in multiclass problems}, journal = {IEEE Transactions on Neural Networks}, volume = {10}, year = {1999}, pages = {645-656}, abstract = {The problem of designing cost functions to estimate a posteriori probabilities in multiclass problems is addressed in this paper. We establish necessary and sufficient conditions that these costs must satisfy in one-class one-output networks whose outputs are consistent with probability laws. We focus our attention on a particular subset of the corresponding cost functions; those which verify two usually interesting properties: symmetry and separability (well-known cost functions, such as the quadratic cost or the cross entropy are particular cases in this subset). Finally, we present a universal stochastic gradient learning rule for single-layer networks, in the sense of minimizing a general version of these cost functions for a wide family of nonlinear activation functions.

}, keywords = {Cost functions, Estimation, Functions, Learning algorithms, Multiclass problems, Neural networks, Pattern recognition, Probability, Problem solving, Random processes, Stochastic gradient learning rule}, issn = {10459227}, doi = {10.1109/72.761724}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0032643080\&partnerID=40\&md5=d528195bd6ec84531e59ddd2ececcd46}, author = {Jes{\'u}s Cid-Sueiro and J I Arribas and S Urban-Munoz and A R Figueiras-Vidal} } @conference {412, title = {Estimates of constrained multi-class a posteriori probabilities in time series problems with neural networks}, booktitle = {Proceedings of the International Joint Conference on Neural Networks}, year = {1999}, publisher = {IEEE, United States}, organization = {IEEE, United States}, address = {Washington, DC, USA}, abstract = {In time series problems, where time ordering is a crucial issue, the use of Partial Likelihood Estimation (PLE) represents a specially suitable method for the estimation of parameters in the model. We propose a new general supervised neural network algorithm, Joint Network and Data Density Estimation (JNDDE), that employs PLE to approximate conditional probability density functions for multi-class classification problems. The logistic regression analysis is generalized to multiple class problems with softmax regression neural network used to model the a-posteriori probabilities such that they are approximated by the network outputs. Constraints to the network architecture, as well as to the model of data, are imposed, resulting in both a flexible network architecture and distribution modeling. We consider application of JNDDE to channel equalization and present simulation results.

}, keywords = {Approximation theory, Computer simulation, Constraint theory, Data structures, Joint network-data density estimation (JNDDE), Mathematical models, Multi-class a posteriori probabilities, Neural networks, Partial likelihood estimation (PLE), Probability density function, Regression analysis}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0033325263\&partnerID=40\&md5=8c6134020b0b2a9c5ab05b131c070b88}, author = {J I Arribas and Jes{\'u}s Cid-Sueiro and T Adali and H Ni and B Wang and A R Figueiras-Vidal} } @conference {411, title = {Neural architectures for parametric estimation of a posteriori probabilities by constrained conditional density functions}, booktitle = {Neural Networks for Signal Processing - Proceedings of the IEEE Workshop}, year = {1999}, publisher = {IEEE, Piscataway, NJ, United States}, organization = {IEEE, Piscataway, NJ, United States}, address = {Madison, WI, USA}, abstract = {A new approach to the estimation of {\textquoteright}a posteriori{\textquoteright} class probabilities using neural networks, the Joint Network and Data Density Estimation (JNDDE), is presented in this paper. It is based on the estimation of the conditional data density functions, with some restrictions imposed by the classifier structure; the Bayes{\textquoteright} rule is used to obtain the {\textquoteright}a posteriori{\textquoteright} probabilities from these densities. The proposed method is applied to three different network structures: the logistic perceptron (for the binary case), the softmax perceptron (for multi-class problems) and a generalized softmax perceptron (that can be used to map arbitrarily complex probability functions). Gaussian mixture models are used for the conditional densities. The method has the advantage of establishing a distinction between the network parameters and the model parameters. Complexity on any of them can be fixed as desired. Maximum Likelihood gradient-based rules for the estimation of the parameters can be obtained. It is shown that JNDDE exhibits a more robust convergence characteristics than other methods of a posteriori probability estimation, such as those based on the minimization of a Strict Sense Bayesian (SSB) cost function.

}, keywords = {Asymptotic stability, Constraint theory, Data structures, Gaussian mixture models, Joint network and data density estimation, Mathematical models, Maximum likelihood estimation, Neural networks, Probability}, doi = {https://doi.org/10.1109/NNSP.1999.788145}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0033321049\&partnerID=40\&md5=7967fa377810cc0c3e6a4d9020024b80}, author = {J I Arribas and Jes{\'u}s Cid-Sueiro and T Adali and A R Figueiras-Vidal} } @conference {410, title = {Neural networks to estimate ML multi-class constrained conditional probability density functions}, booktitle = {Proceedings of the International Joint Conference on Neural Networks}, year = {1999}, publisher = {IEEE, United States}, organization = {IEEE, United States}, address = {Washington, DC, USA}, abstract = {In this paper, a new algorithm, the Joint Network and Data Density Estimation (JNDDE), is proposed to estimate the {\textquoteleft}a posteriori{\textquoteright} probabilities of the targets with neural networks in multiple classes problems. It is based on the estimation of conditional density functions for each class with some restrictions or constraints imposed by the classifier structure and the use Bayes rule to force the a posteriori probabilities at the output of the network, known here as a implicit set. The method is applied to train perceptrons by means of Gaussian mixture inputs, as a particular example for the Generalized Softmax Perceptron (GSP) network. The method has the advantage of providing a clear distinction between the network architecture and the model of the data constraints, giving network parameters or weights on one side and data over parameters on the other. MLE stochastic gradient based rules are obtained for JNDDE. This algorithm can be applied to hybrid labeled and unlabeled learning in a natural fashion.

}, keywords = {Generalized softmax perceptron (GSP) network, Joint network and data density estimation (JNDDE), Mathematical models, Maximum likelihood estimation, Neural networks, Probability density function, Random processes}, doi = {https://doi.org/10.1109/IJCNN.1999.831174}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0033326060\&partnerID=40\&md5=bb38c144dac0872f3a467dc12170e6b6}, author = {J I Arribas and Jes{\'u}s Cid-Sueiro and T Adali and A R Figueiras-Vidal} }