{"id":93,"date":"2026-03-18T18:40:15","date_gmt":"2026-03-18T18:40:15","guid":{"rendered":"https:\/\/medlearn.imperial.ac.uk\/innovation\/?page_id=93"},"modified":"2026-04-09T13:55:56","modified_gmt":"2026-04-09T13:55:56","slug":"abg-procedure-vision-dataset","status":"publish","type":"page","link":"https:\/\/medlearn.imperial.ac.uk\/innovation\/projects\/abg-procedure-vision-dataset\/","title":{"rendered":"ABG Procedure Vision Dataset"},"content":{"rendered":"\n<!-- BANNER -->\n<div style=\"background:#001220;\">\n  <svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" viewBox=\"0 0 1400 460\" style=\"width:100%;display:block;\">\n  <defs>\n    <linearGradient id=\"abgBg\" x1=\"0\" y1=\"0\" x2=\"1\" y2=\"1\">\n      <stop offset=\"0%\" stop-color=\"#001220\"\/>\n      <stop offset=\"100%\" stop-color=\"#003060\"\/>\n    <\/linearGradient>\n    <linearGradient id=\"abgAccent\" x1=\"0\" y1=\"0\" x2=\"0\" y2=\"1\">\n      <stop offset=\"0%\" stop-color=\"#4A90D9\" stop-opacity=\"0.3\"\/>\n      <stop offset=\"100%\" stop-color=\"#4A90D9\" stop-opacity=\"0\"\/>\n    <\/linearGradient>\n  <\/defs>\n  <rect width=\"1400\" height=\"460\" fill=\"url(#abgBg)\"\/>\n  <!-- Scan lines -->\n  <g opacity=\"0.07\">\n    <line x1=\"0\" y1=\"46\" x2=\"1400\" y2=\"46\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"92\" x2=\"1400\" y2=\"92\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"138\" x2=\"1400\" y2=\"138\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"184\" x2=\"1400\" y2=\"184\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"230\" x2=\"1400\" y2=\"230\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"276\" x2=\"1400\" y2=\"276\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"322\" x2=\"1400\" y2=\"322\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"368\" x2=\"1400\" y2=\"368\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n    <line x1=\"0\" y1=\"414\" x2=\"1400\" y2=\"414\" stroke=\"#4A90D9\" stroke-width=\"0.5\"\/>\n  <\/g>\n  <!-- FOV frame \u2014 Quest passthrough look -->\n  <rect x=\"120\" y=\"60\" width=\"520\" height=\"340\" rx=\"18\" fill=\"none\" stroke=\"#4A90D9\" stroke-width=\"2\" opacity=\"0.35\"\/>\n  <rect x=\"120\" y=\"60\" width=\"520\" height=\"340\" rx=\"18\" fill=\"url(#abgAccent)\" opacity=\"0.4\"\/>\n  <!-- Corner brackets -->\n  <polyline points=\"120,105 120,60 165,60\" fill=\"none\" stroke=\"#7DC4FF\" stroke-width=\"3\" opacity=\"0.7\"\/>\n  <polyline points=\"595,60 640,60 640,105\" fill=\"none\" stroke=\"#7DC4FF\" stroke-width=\"3\" opacity=\"0.7\"\/>\n  <polyline points=\"120,355 120,400 165,400\" fill=\"none\" stroke=\"#7DC4FF\" stroke-width=\"3\" opacity=\"0.7\"\/>\n  <polyline points=\"595,400 640,400 640,355\" fill=\"none\" stroke=\"#7DC4FF\" stroke-width=\"3\" opacity=\"0.7\"\/>\n  <!-- Manikin arm outline inside FOV -->\n  <ellipse cx=\"380\" cy=\"290\" rx=\"130\" ry=\"38\" fill=\"#0056a3\" opacity=\"0.18\" stroke=\"#4A90D9\" stroke-width=\"1\" opacity=\"0.4\"\/>\n  <!-- Puncture site dot -->\n  <circle cx=\"360\" cy=\"280\" r=\"10\" fill=\"none\" stroke=\"#FF6B6B\" stroke-width=\"2.5\" opacity=\"0.8\"\/>\n  <circle cx=\"360\" cy=\"280\" r=\"4\" fill=\"#FF6B6B\" opacity=\"0.7\"\/>\n  <!-- Bounding box labels -->\n  <!-- procedural_hand -->\n  <rect x=\"200\" y=\"200\" width=\"160\" height=\"120\" rx=\"3\" fill=\"none\" stroke=\"#00CC88\" stroke-width=\"1.8\" opacity=\"0.75\"\/>\n  <rect x=\"200\" y=\"195\" width=\"120\" height=\"16\" rx=\"2\" fill=\"#00CC88\" opacity=\"0.85\"\/>\n  <text x=\"208\" y=\"207\" font-family=\"monospace,Arial\" font-size=\"9\" fill=\"#fff\">procedural_hand<\/text>\n  <!-- stabilizing_hand -->\n  <rect x=\"380\" y=\"230\" width=\"150\" height=\"100\" rx=\"3\" fill=\"none\" stroke=\"#FFB300\" stroke-width=\"1.8\" opacity=\"0.75\"\/>\n  <rect x=\"380\" y=\"225\" width=\"120\" height=\"16\" rx=\"2\" fill=\"#FFB300\" opacity=\"0.85\"\/>\n  <text x=\"388\" y=\"237\" font-family=\"monospace,Arial\" font-size=\"9\" fill=\"#111\">stabilizing_hand<\/text>\n  <!-- puncture_site -->\n  <rect x=\"330\" y=\"255\" width=\"72\" height=\"55\" rx=\"3\" fill=\"none\" stroke=\"#FF6B6B\" stroke-width=\"1.8\" opacity=\"0.75\"\/>\n  <rect x=\"330\" y=\"250\" width=\"84\" height=\"16\" rx=\"2\" fill=\"#FF6B6B\" opacity=\"0.85\"\/>\n  <text x=\"338\" y=\"262\" font-family=\"monospace,Arial\" font-size=\"9\" fill=\"#fff\">puncture_site<\/text>\n  <!-- needle_syringe -->\n  <rect x=\"220\" y=\"170\" width=\"110\" height=\"60\" rx=\"3\" fill=\"none\" stroke=\"#9B59F5\" stroke-width=\"1.8\" opacity=\"0.75\"\/>\n  <rect x=\"220\" y=\"165\" width=\"96\" height=\"16\" rx=\"2\" fill=\"#9B59F5\" opacity=\"0.85\"\/>\n  <text x=\"228\" y=\"177\" font-family=\"monospace,Arial\" font-size=\"9\" fill=\"#fff\">needle_syringe<\/text>\n  <!-- alcohol_wipe -->\n  <rect x=\"450\" y=\"160\" width=\"90\" height=\"50\" rx=\"3\" fill=\"none\" stroke=\"#4A90D9\" stroke-width=\"1.8\" opacity=\"0.75\"\/>\n  <rect x=\"450\" y=\"155\" width=\"76\" height=\"16\" rx=\"2\" fill=\"#4A90D9\" opacity=\"0.85\"\/>\n  <text x=\"458\" y=\"167\" font-family=\"monospace,Arial\" font-size=\"9\" fill=\"#fff\">alcohol_wipe<\/text>\n  <!-- Right panel: Roboflow \/ YOLO stats -->\n  <rect x=\"800\" y=\"80\" width=\"480\" height=\"300\" rx=\"10\" fill=\"#ffffff\" opacity=\"0.04\" stroke=\"#4A90D9\" stroke-width=\"1\" opacity=\"0.3\"\/>\n  <text x=\"830\" y=\"116\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#7DC4FF\" opacity=\"0.7\" letter-spacing=\"2\">DATASET METRICS<\/text>\n  <!-- Class pills -->\n  <rect x=\"830\" y=\"128\" width=\"100\" height=\"20\" rx=\"4\" fill=\"#00CC88\" opacity=\"0.25\"\/>\n  <text x=\"880\" y=\"142\" font-family=\"Arial,sans-serif\" font-size=\"9\" fill=\"#00CC88\" text-anchor=\"middle\">procedural_hand<\/text>\n  <rect x=\"940\" y=\"128\" width=\"110\" height=\"20\" rx=\"4\" fill=\"#FFB300\" opacity=\"0.25\"\/>\n  <text x=\"995\" y=\"142\" font-family=\"Arial,sans-serif\" font-size=\"9\" fill=\"#FFB300\" text-anchor=\"middle\">stabilizing_hand<\/text>\n  <rect x=\"1060\" y=\"128\" width=\"90\" height=\"20\" rx=\"4\" fill=\"#FF6B6B\" opacity=\"0.25\"\/>\n  <text x=\"1105\" y=\"142\" font-family=\"Arial,sans-serif\" font-size=\"9\" fill=\"#FF6B6B\" text-anchor=\"middle\">puncture_site<\/text>\n  <rect x=\"830\" y=\"156\" width=\"100\" height=\"20\" rx=\"4\" fill=\"#9B59F5\" opacity=\"0.25\"\/>\n  <text x=\"880\" y=\"170\" font-family=\"Arial,sans-serif\" font-size=\"9\" fill=\"#9B59F5\" text-anchor=\"middle\">needle_syringe<\/text>\n  <rect x=\"940\" y=\"156\" width=\"86\" height=\"20\" rx=\"4\" fill=\"#4A90D9\" opacity=\"0.3\"\/>\n  <text x=\"983\" y=\"170\" font-family=\"Arial,sans-serif\" font-size=\"9\" fill=\"#4A90D9\" text-anchor=\"middle\">alcohol_wipe<\/text>\n  <!-- Stat rows -->\n  <line x1=\"830\" y1=\"192\" x2=\"1250\" y2=\"192\" stroke=\"#4A90D9\" stroke-width=\"0.5\" opacity=\"0.3\"\/>\n  <text x=\"830\" y=\"214\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#ffffff\" opacity=\"0.5\">Model<\/text>\n  <text x=\"970\" y=\"214\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#7DC4FF\" opacity=\"0.8\">YOLOv12s<\/text>\n  <line x1=\"830\" y1=\"224\" x2=\"1250\" y2=\"224\" stroke=\"#4A90D9\" stroke-width=\"0.5\" opacity=\"0.2\"\/>\n  <text x=\"830\" y=\"246\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#ffffff\" opacity=\"0.5\">Capture<\/text>\n  <text x=\"970\" y=\"246\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#7DC4FF\" opacity=\"0.8\">Meta Quest &middot; 1024&#215;1024<\/text>\n  <line x1=\"830\" y1=\"256\" x2=\"1250\" y2=\"256\" stroke=\"#4A90D9\" stroke-width=\"0.5\" opacity=\"0.2\"\/>\n  <text x=\"830\" y=\"278\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#ffffff\" opacity=\"0.5\">Platform<\/text>\n  <text x=\"970\" y=\"278\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#7DC4FF\" opacity=\"0.8\">Roboflow &middot; Local GPU<\/text>\n  <line x1=\"830\" y1=\"288\" x2=\"1250\" y2=\"288\" stroke=\"#4A90D9\" stroke-width=\"0.5\" opacity=\"0.2\"\/>\n  <text x=\"830\" y=\"310\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#ffffff\" opacity=\"0.5\">Classes<\/text>\n  <text x=\"970\" y=\"310\" font-family=\"Arial,sans-serif\" font-size=\"11\" fill=\"#7DC4FF\" opacity=\"0.8\">5 &middot; Oct&ndash;Dec 2025<\/text>\n  <!-- Bottom label -->\n  <text x=\"700\" y=\"432\" font-family=\"Arial,sans-serif\" font-size=\"12\" fill=\"#7DC4FF\" text-anchor=\"middle\" opacity=\"0.45\" letter-spacing=\"3\">ABG PROCEDURE VISION DATASET &mdash; FIRST-PERSON PROCEDURAL AI RESEARCH<\/text>\n<\/svg>\n<\/div>\n\n<div style=\"max-width:960px;margin:0 auto;padding:40px 32px 64px 32px;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;\">\n\n  <!-- BACK + TAGS -->\n  <div style=\"display:flex;align-items:center;gap:12px;margin-bottom:24px;flex-wrap:wrap;\">\n    <a href=\"\/innovation\/projects\/\" style=\"font-size:12px;color:#003E74;text-decoration:none;opacity:0.7;\">&larr; All Projects<\/a>\n    <span style=\"font-size:11px;background:#e8f0fa;color:#003E74;padding:4px 10px;border-radius:12px;font-weight:600;\">Computer Vision<\/span>\n    <span style=\"font-size:11px;background:#e8f0fa;color:#003E74;padding:4px 10px;border-radius:12px;font-weight:600;\">AI \/ ML<\/span>\n    <span style=\"font-size:11px;background:#e8f0fa;color:#003E74;padding:4px 10px;border-radius:12px;font-weight:600;\">Clinical Skills<\/span>\n    <span style=\"font-size:11px;background:#f5f5f5;color:#666;padding:4px 10px;border-radius:12px;\">Research Dataset<\/span>\n  <\/div>\n\n  <h1 style=\"font-size:34px;font-weight:800;color:#001E45;margin:0 0 8px 0;line-height:1.2;\">ABG Procedure Vision Dataset<\/h1>\n  <p style=\"font-size:17px;color:#003E74;margin:0 0 36px 0;font-weight:400;\">A multi-angle annotated vision dataset for arterial blood gas procedure recognition &mdash; comprising over 100,000 frames across five camera angles plus a first-person component, built for real-time AI inference in clinical simulation and vision-guided robotics.<\/p>\n\n  <!-- OVERVIEW -->\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">This project documents the creation of what is believed to be one of the first purpose-built computer vision datasets for the <strong>Arterial Blood Gas (ABG) sampling procedure<\/strong>, annotated at the object level and designed for real-time detection. The dataset was captured primarily across <strong>five fixed camera angles<\/strong>, producing over 80,000 annotated frames that give the model varied spatial perspectives on the ABG procedure. An additional <strong>first-person component<\/strong> of approximately 20,000 frames was captured using a <strong>Meta Quest<\/strong> headset, replicating the direct viewpoint of a clinician performing the procedure. This multi-angle approach maximises model robustness for real-world deployment, while the first-person component supports integration directly within XR simulation environments. The dataset is also intended for future use in <strong>vision-guided robot reaching<\/strong> applications, where procedural understanding from multiple viewpoints is a key requirement.<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 40px 0;\">The work was carried out collaboratively with two teaching fellows at Imperial London over three months (October&ndash;December 2025), combining clinical procedure expertise with machine learning practice. A <strong>YOLOv12s<\/strong> model was trained on the resulting dataset using <strong>Roboflow<\/strong> for annotation management, with training runs executed locally on a laptop GPU. The project has since informed a pivot toward robotic procedure guidance, with the dataset now serving as a foundation for ongoing research.<\/p>\n\n  <!-- PROJECT AT A GLANCE -->\n  <h2 style=\"font-size:20px;font-weight:700;color:#001E45;margin:0 0 16px 0;padding-bottom:8px;border-bottom:2px solid #e8eef5;\">Project at a Glance<\/h2>\n  <table style=\"width:100%;border-collapse:collapse;margin-bottom:40px;font-size:14px;\">\n    <tr style=\"border-bottom:1px solid #e8eef5;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;width:180px;vertical-align:top;\">Status<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">Dataset complete &mdash; model inference ongoing; robotic application in development<\/td>\n    <\/tr>\n    <tr style=\"border-bottom:1px solid #e8eef5;background:#f9fbfd;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Period<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">October &ndash; December 2025<\/td>\n    <\/tr>\n    <tr style=\"border-bottom:1px solid #e8eef5;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Annotation Classes<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">\n        <code style=\"background:#f0f4f8;padding:2px 7px;border-radius:3px;font-size:12px;margin-right:4px;\">alcohol_wipe<\/code>\n        <code style=\"background:#f0f4f8;padding:2px 7px;border-radius:3px;font-size:12px;margin-right:4px;\">needle_syringe<\/code>\n        <code style=\"background:#f0f4f8;padding:2px 7px;border-radius:3px;font-size:12px;margin-right:4px;\">procedural_hand<\/code>\n        <code style=\"background:#f0f4f8;padding:2px 7px;border-radius:3px;font-size:12px;margin-right:4px;\">puncture_site<\/code>\n        <code style=\"background:#f0f4f8;padding:2px 7px;border-radius:3px;font-size:12px;\">stabilizing_hand<\/code>\n      <\/td>\n    <\/tr>\n    <tr style=\"border-bottom:1px solid #e8eef5;background:#f9fbfd;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Capture Method<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">First-person video, Meta Quest headset &middot; 1024 &times; 1024px resolution<\/td>\n    <\/tr>\n    <tr style=\"border-bottom:1px solid #e8eef5;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Model Trained<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">YOLOv12s &middot; exported to ONNX for deployment &middot; trained on local GPU<\/td>\n    <\/tr>\n    <tr style=\"border-bottom:1px solid #e8eef5;background:#f9fbfd;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Toolchain<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">Roboflow (annotation &amp; dataset management) &middot; YOLO &middot; Unity Sentis &middot; ONNX<\/td>\n    <\/tr>\n    <tr style=\"border-bottom:1px solid #e8eef5;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Original Target<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">Real-time inference in Meta Quest XR simulation via Unity Sentis \/ Meta XR Building Blocks<\/td>\n    <\/tr>\n    <tr style=\"background:#f9fbfd;\">\n      <td style=\"padding:12px 16px;font-weight:600;color:#003E74;vertical-align:top;\">Current Direction<\/td>\n      <td style=\"padding:12px 16px;color:#333;\">Robotic ABG guidance &mdash; dataset repurposed as training foundation for robotic procedure recognition<\/td>\n    <\/tr>\n  <\/table>\n\n  <!-- DATASET DESIGN -->\n  <h2 style=\"font-size:20px;font-weight:700;color:#001E45;margin:0 0 16px 0;padding-bottom:8px;border-bottom:2px solid #e8eef5;\">Dataset Design &amp; Annotation<\/h2>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">ABG sampling is a precise, multi-step clinical procedure involving palpation of the radial artery, site preparation, needle insertion, sample aspiration, and safe sharps disposal. Each step involves distinct objects and hand configurations that must be reliably distinguished by a computer vision model. The five annotation classes were designed to reflect the procedure&#8217;s key safety-critical elements: the instruments in use (<code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">needle_syringe<\/code>, <code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">alcohol_wipe<\/code>), the anatomical target (<code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">puncture_site<\/code>), and the two functional hand roles (<code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">procedural_hand<\/code>, <code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">stabilizing_hand<\/code>).<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">Video was recorded from first-person perspective wearing a Meta Quest headset in a clinical simulation environment, producing footage with the characteristic wide-angle FOV, natural head-movement blur, and visual clutter (blue tray, red sharps bin, sterile packaging, absorbent drape) typical of a real simulation station. This choice was intentional: a model trained on this perspective generalises directly to inference running on the headset itself, without the domain shift that would arise from a conventional camera rig.<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 40px 0;\">Annotation was managed in <strong>Roboflow<\/strong>, with bounding box labels applied frame by frame. Roboflow&#8217;s dataset versioning, augmentation pipeline, and YOLO export format were used throughout. The team annotated procedural footage across the full sequence of ABG steps to ensure class balance and procedural coverage, with particular attention to challenging cases: heavy occlusion of the puncture site by hands, partial syringe visibility, and motion blur during needle handling.<\/p>\n\n  <!-- SYNTHETIC DATA STRATEGY -->\n  <h2 style=\"font-size:20px;font-weight:700;color:#001E45;margin:0 0 16px 0;padding-bottom:8px;border-bottom:2px solid #e8eef5;\">Synthetic Data Augmentation Strategy<\/h2>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">A key research contribution of this project is a structured synthetic image generation strategy developed to augment the real dataset. Using generative AI (DALL-E \/ OpenAI Images API), a prompt framework was designed to produce photorealistic first-person ABG images that visually match the Quest-captured footage &mdash; including lens distortion, motion blur, sensor noise, and the specific objects present on a simulation station.<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">Rather than generating generic clinical images, the prompt strategy was engineered label-first: each generation prompt is written to maximise the visibility and variety of a specific annotation class, while maintaining plausible co-occurrence of other objects. Separate prompt variants target hard cases such as heavy occlusion, partial hand crops, motion blur from head turns, and varying lighting (daylight, tungsten). This approach allows systematic construction of a training distribution that addresses known model weaknesses &mdash; such as confusion between <code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">procedural_hand<\/code> and <code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">needle_syringe<\/code> at close range.<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 40px 0;\">The recommended synthetic distribution for a 500-image augmentation batch is approximately 35% needle_syringe-heavy, 20% puncture_site-clear, 15% each for alcohol_wipe, stabilizing_hand, and procedural_hand &mdash; with roughly 25&ndash;30% of the full set comprising &#8220;hard mode&#8221; variants (occlusion, crop, blur, off-centre). Synthetic images are reviewed before annotation and ingested into Roboflow alongside real frames using the same labelling schema.<\/p>\n\n  <!-- MODEL & DEPLOYMENT -->\n  <h2 style=\"font-size:20px;font-weight:700;color:#001E45;margin:0 0 16px 0;padding-bottom:8px;border-bottom:2px solid #e8eef5;\">Model Training &amp; Deployment Pathway<\/h2>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">YOLOv12s was selected for its balance of inference speed and detection accuracy at the scale of objects present in ABG footage &mdash; particularly the small and often occluded <code style=\"background:#f0f4f8;padding:1px 6px;border-radius:3px;font-size:13px;\">puncture_site<\/code> marker and the visually similar hand classes. Training was run locally on a laptop GPU at 1024&times;1024 input resolution, with the trained model exported to <strong>ONNX<\/strong> for cross-platform deployment.<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">The initial deployment target was the <strong>Meta XR Building Blocks Object Detection<\/strong> system in Unity, using the Unity Sentis runtime to run the ONNX model on-device via the Quest&#8217;s neural processing capability. The ONNX export includes integrated NMS post-processing (outputs: boxes \/ class_ids \/ scores), reducing the custom pipeline required in Unity. This pathway was explored in detail and documented as part of the research &mdash; including ONNX import into Unity Sentis, Provider Asset configuration, and input resolution alignment between training (1024) and inference (640).<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 40px 0;\">Following a strategic pivot, the model and dataset are now being adapted for a <strong>robotic guidance application<\/strong>, where real-time procedure recognition will inform robotic arm positioning and step-sequencing during simulated ABG. The dataset&#8217;s first-person visual characteristics remain highly relevant in this context, as robotic camera placement can be configured to approximate the same viewpoint.<\/p>\n\n  <!-- BLOCKQUOTE -->\n  <blockquote style=\"border-left:4px solid #003E74;background:#f5f8fb;border-radius:0 6px 6px 0;padding:20px 24px;margin:0 0 40px 0;\">\n    <p style=\"font-size:15px;color:#003E74;line-height:1.7;margin:0;font-style:italic;\">&#8220;Creating a labelled vision dataset for a specific clinical procedure, from scratch, in first-person perspective &mdash; this kind of domain-specific annotation work is what bridges the gap between general-purpose AI models and tools that are actually deployable in clinical simulation.&#8221;<\/p>\n  <\/blockquote>\n\n  <!-- TEAM -->\n  <h2 style=\"font-size:20px;font-weight:700;color:#001E45;margin:0 0 16px 0;padding-bottom:8px;border-bottom:2px solid #e8eef5;\">Team &amp; Collaborators<\/h2>\n  <div style=\"display:flex;flex-wrap:wrap;gap:16px;margin-bottom:40px;\">\n    <div style=\"background:#f5f8fb;border-radius:8px;padding:16px 20px;min-width:180px;flex:1;\">\n      <p style=\"font-size:13px;font-weight:700;color:#001E45;margin:0 0 2px 0;\">Dr Risheka Walls<\/p>\n      <p style=\"font-size:12px;color:#666;margin:0;\">Project Lead<\/p>\n    <\/div>\n    <div style=\"background:#f5f8fb;border-radius:8px;padding:16px 20px;min-width:180px;flex:1;\">\n      <p style=\"font-size:13px;font-weight:700;color:#001E45;margin:0 0 2px 0;\">Payal Guha<\/p>\n      <p style=\"font-size:12px;color:#666;margin:0;\">Teaching Fellow<\/p>\n    <\/div>\n    <div style=\"background:#f5f8fb;border-radius:8px;padding:16px 20px;min-width:180px;flex:1;\">\n      <p style=\"font-size:13px;font-weight:700;color:#001E45;margin:0 0 2px 0;\">Oscar L. Oglina<\/p>\n      <p style=\"font-size:12px;color:#666;margin:0;\">Teaching Fellow<\/p>\n    <\/div>\n    <div style=\"background:#f5f8fb;border-radius:8px;padding:16px 20px;min-width:180px;flex:1;\">\n      <p style=\"font-size:13px;font-weight:700;color:#001E45;margin:0 0 2px 0;\">Adrian Cowell<\/p>\n      <p style=\"font-size:12px;color:#666;margin:0;\">Innovation Lead &mdash; Technology &amp; Development<\/p>\n    <\/div>\n  <\/div>\n\n  <!-- NEXT STEPS -->\n  <h2 style=\"font-size:20px;font-weight:700;color:#001E45;margin:0 0 16px 0;padding-bottom:8px;border-bottom:2px solid #e8eef5;\">Upcoming Research Directions<\/h2>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 18px 0;\">The robotic guidance application represents the most immediate next phase. Beyond that, several research threads are open: expanding the dataset to cover additional clinical procedures (e.g. venepuncture, cannulation) using the same first-person capture methodology; exploring semi-automatic annotation using the trained model to pre-label new footage; and investigating the use of synthetic data pipelines to build datasets for procedures where real footage is difficult to obtain due to clinical access constraints.<\/p>\n  <p style=\"font-size:15px;color:#333;line-height:1.8;margin:0 0 0 0;\">There is also potential for this dataset and methodology to be shared more broadly &mdash; as a benchmark for medical procedure recognition research, or as a teaching resource for students learning computer vision annotation practices in a clinical context.<\/p>\n\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>procedural_hand stabilizing_hand puncture_site needle_syringe alcohol_wipe DATASET METRICS procedural_hand stabilizing_hand puncture_site needle_syringe alcohol_wipe Model YOLOv12s Capture Meta Quest &middot; 1024&#215;1024 Platform Roboflow &middot; Local GPU Classes 5 &middot; Oct&ndash;Dec 2025 ABG [&hellip;]<\/p>\n","protected":false},"author":16,"featured_media":0,"parent":7,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-93","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/pages\/93","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/users\/16"}],"replies":[{"embeddable":true,"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/comments?post=93"}],"version-history":[{"count":5,"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/pages\/93\/revisions"}],"predecessor-version":[{"id":260,"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/pages\/93\/revisions\/260"}],"up":[{"embeddable":true,"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/pages\/7"}],"wp:attachment":[{"href":"https:\/\/medlearn.imperial.ac.uk\/innovation\/wp-json\/wp\/v2\/media?parent=93"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}