@article{2022:peshave:phmconf_eval_vec_repr,type={article},doi={10.36001/phmconf.2022.v14i1.3196},journal={Annual Conference of the PHM Society},publisher={PHM Society},address={Nashville, TN, USA},issn={2325-0178},volume={14},number={1},id={2022:peshave:phmconf_eval_vec_repr},year={2022},month={10},day={28},date={2022-10-28},title={Evaluating Vector Representations of Short Text Data for Automating Recommendations of Maintenance Cases},author={Peshave, Akshay and Aggour, Kareem and Ali, Asma and Mulwad, Varish and Dixit, Sharad and Saxena, Abhinav},url={https://doi.org/10.36001/phmconf.2022.v14i1.3196}}
Evaluating Vector Representations of Short Text Data for Automating Recommendations of Maintenance Cases
ABSTRACT : This paper presents our efforts towards developing a prescriptive maintenance system that integrates with and enhances state-of-the-art asset performance management software available in the industry. The goal of prescriptive maintenance is to analyze the behavior of an asset, assess its condition, and recommend specific actions to maximize the utility of that asset. Specifically, this work evaluates three approaches of different complexities for vectorization of short-text maintenance case titles for kNN-based recommendation of cases relevant to a new input case title. Industrial text must first be vectorized to build automated and/or machine learning-based prediction and recommendation models. The choice of vectorization methods heavily dictates how the language gets modeled and consequently impacts the performance of downstream prediction and recommendation models.The objective of the nearest neighbor case recommendations is to reduce manual Subject Matter Expert (SME) effort and increase consistency of recommended maintenance actions on industrial assets by reusing actions performed on the identified nearest neighbor cases from past maintenance work. Four models based on three text vectorization approaches are evaluated, quantitatively and qualitatively, using real data from a large variety of utility customers from the energy domain. A single tier (WVEC-1tier) and a three-tier (WVEC-3tier) approach that represent case titles in word-based vector spaces each significantly outperform a more complex bag-of-phrases topic vector space-based approach (TVEC-K-topics). We present our findings and challenges identified so far in building such a recommendation system.