IGLC.net EXPORT DATE: 19 June 2026 @CONFERENCE{Sabek2026, author={Sabek, Mohamed and Mei, Qipei and Lee, Gaang and Golabchi, Ali and Gonzalez, Vicente }, editor={Hamzeh, Farook and Poshdar, Mani and Garcia-Lopez,, Nelly P. }, title={The Lean Construction Visual Taxonomy (LCVT): bridging the semantic gap}, journal={Proceedings of the 34th Annual Conference of the International Group for Lean Construction (IGLC 34)}, booktitle={Proceedings of the 34th Annual Conference of the International Group for Lean Construction (IGLC 34)}, year={2026}, pages={14-25}, url={http://www.iglc.net/papers/details/2468}, doi={10.24928/2026/0151}, affiliation={PhD Candidate, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, sabek@ualberta.ca, orcid.org/0009-0005-2906-9874 ; Assistant Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, qipei@ualberta.ca, ; Assistant Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, gaang@ualberta.ca, ; Adjunct Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, alireza1@ualberta.ca, ; Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, vagonzal@ualberta.ca, orcid.org/0000-0003-3408-3863 }, abstract={The architecture, engineering, and construction (AEC) industry faces productivity stagnation due to ineffective production flow management. Although Lean Construction (LC) aims to minimize waste, manual monitoring lacks the high-frequency data required for timely control. Computer Vision (CV) offers automated monitoring but suffers from a "Semantic Gap," where models detect low-level objects but fail to interpret high-level Lean states (e.g., "waiting"). This study proposes the Lean Construction Visual Taxonomy (LCVT), a three-level hierarchical framework–Category, Indicator, Visual Definition grounded in Transformation-Flow-Value (TFV) theory. Crucially, the LCVT provides standardized class definitions to guide "zero-shot" prompt engineering in Vision-Language Models (VLMs). By injecting formal L3 definitions that address entity types, temporal thresholds (e.g., stationary >60 s), and spatial context into VLM models such as GPT-4o and Gemini 2.5, the framework enables sophisticated, lean reasoning without the need for massive custom-labeled datasets. Pilot validation achieved a 0.946 mAP in distinguishing state-dependent equipment loads. By formalizing the visual signatures of waste, the LCVT establishes the data infrastructure necessary for proactive, VLM-driven decision support in construction AI. }, author_keywords={AI, transformation-flow-value, computer vision, taxonomy, visual management. }, address={Singapore, Singapore }, issn={2789-0015 }, publisher={ }, language={English}, document_type={Conference Paper}, source={IGLC}, }