IGLC.net
EXPORT DATE: 19 June 2026

@CONFERENCE{Sabek2026,
author={Sabek, Mohamed and Mei, Qipei and Lee, Gaang and Golabchi, Ali and Gonzalez, Vicente },
editor={Hamzeh, Farook and Poshdar, Mani and Garcia-Lopez,, Nelly P. },
title={The Lean Construction Visual Taxonomy (LCVT): bridging the semantic gap}, 
journal={Proceedings of the 34th Annual Conference of the International Group for Lean Construction (IGLC 34)}, 
booktitle={Proceedings of the 34th Annual Conference of the International Group for Lean Construction (IGLC 34)}, 
year={2026},
pages={14-25},
url={http://www.iglc.net/papers/details/2468},
doi={10.24928/2026/0151},
affiliation={PhD Candidate, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, sabek@ualberta.ca, orcid.org/0009-0005-2906-9874 ; Assistant Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, qipei@ualberta.ca, ; Assistant Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, gaang@ualberta.ca, ; Adjunct Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, alireza1@ualberta.ca, ; Professor, Department of Civil and Environmental Engineering, University of Alberta, Edmonton, Canada, vagonzal@ualberta.ca,  orcid.org/0000-0003-3408-3863 },
abstract={The architecture, engineering, and construction (AEC) industry faces productivity stagnation due to ineffective production flow management. Although Lean Construction (LC) aims to minimize waste, manual monitoring lacks the high-frequency data required for timely control. Computer Vision (CV) offers automated monitoring but suffers from a "Semantic Gap," where models detect low-level objects but fail to interpret high-level Lean states (e.g., "waiting"). This study proposes the Lean Construction Visual Taxonomy (LCVT), a three-level hierarchical framework–Category, Indicator, Visual Definition grounded in Transformation-Flow-Value (TFV) theory. Crucially, the LCVT provides standardized class definitions to guide "zero-shot" prompt engineering in Vision-Language Models (VLMs). By injecting formal L3 definitions that address entity types, temporal thresholds (e.g., stationary &gt;60 s), and spatial context into VLM models such as GPT-4o and Gemini 2.5, the framework enables sophisticated, lean reasoning without the need for massive custom-labeled datasets. Pilot validation achieved a 0.946 mAP in distinguishing state-dependent equipment loads. By formalizing the visual signatures of waste, the LCVT establishes the data infrastructure necessary for proactive, VLM-driven decision support in construction AI. },
author_keywords={AI, transformation-flow-value, computer vision, taxonomy, visual management. },
address={Singapore, Singapore },
issn={2789-0015 },
publisher={ },
language={English},
document_type={Conference Paper},
source={IGLC},
}