From: Jerin Jacob <jer...@marvell.com> Add mldev API specification to standardize and use the machine learning device and inference operations in vendor neutral way.
Following operations are abstracted through APIs - ML device capability probe - ML device configuration - ML device queue pair configuration - ML device state management - ML device stat/xstat operations - ML model load/unload/start/stop operations - ML model information probe - ML IO operations to find size for input and output buffers - ML quantize and dequantize operations - ML ops pool creation and free operations - ML device enqueue/dequeue fastpath interference operations Also added programming guide. Signed-off-by: Jerin Jacob <jer...@marvell.com> Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com> --- MAINTAINERS | 5 + doc/api/doxy-api-index.md | 1 + doc/api/doxy-api.conf.in | 1 + doc/guides/prog_guide/img/mldev_flow.svg | 714 ++++++++++++++ doc/guides/prog_guide/index.rst | 1 + doc/guides/prog_guide/mldev.rst | 186 ++++ doc/guides/rel_notes/release_23_03.rst | 5 + lib/meson.build | 1 + lib/mldev/meson.build | 18 + lib/mldev/rte_mldev.c | 8 + lib/mldev/rte_mldev.h | 1119 ++++++++++++++++++++++ lib/mldev/version.map | 7 + 12 files changed, 2066 insertions(+) create mode 100644 doc/guides/prog_guide/img/mldev_flow.svg create mode 100644 doc/guides/prog_guide/mldev.rst create mode 100644 lib/mldev/meson.build create mode 100644 lib/mldev/rte_mldev.c create mode 100644 lib/mldev/rte_mldev.h create mode 100644 lib/mldev/version.map diff --git a/MAINTAINERS b/MAINTAINERS index 3495946d0f..fa91900a20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -538,6 +538,11 @@ F: drivers/raw/skeleton/ F: app/test/test_rawdev.c F: doc/guides/prog_guide/rawdev.rst +ML device API - EXPERIMENTAL +M: Srikanth Yalavarthi <syalavar...@marvell.com> +F: lib/mldev/ +F: doc/guides/prog_guide/mldev.rst + Memory Pool Drivers ------------------- diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index de488c7abf..a12562977a 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -22,6 +22,7 @@ The public API headers are grouped by topics: [compress](@ref rte_comp.h), [regexdev](@ref rte_regexdev.h), [dmadev](@ref rte_dmadev.h), + [mldev](@ref rte_mldev.h), [eventdev](@ref rte_eventdev.h), [event_eth_rx_adapter](@ref rte_event_eth_rx_adapter.h), [event_eth_tx_adapter](@ref rte_event_eth_tx_adapter.h), diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in index f0886c3bd1..5d6416d3e0 100644 --- a/doc/api/doxy-api.conf.in +++ b/doc/api/doxy-api.conf.in @@ -57,6 +57,7 @@ INPUT = @TOPDIR@/doc/api/doxy-api-index.md \ @TOPDIR@/lib/mempool \ @TOPDIR@/lib/meter \ @TOPDIR@/lib/metrics \ + @TOPDIR@/lib/mldev \ @TOPDIR@/lib/node \ @TOPDIR@/lib/net \ @TOPDIR@/lib/pcapng \ diff --git a/doc/guides/prog_guide/img/mldev_flow.svg b/doc/guides/prog_guide/img/mldev_flow.svg new file mode 100644 index 0000000000..6c5dda14e5 --- /dev/null +++ b/doc/guides/prog_guide/img/mldev_flow.svg @@ -0,0 +1,714 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- SPDX-License-Identifier: BSD-3-Clause --> +<!-- Copyright (c) 2022 Marvell. --> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + width="320mm" + height="297mm" + viewBox="0 0 320 297" + version="1.1" + id="svg6899" + inkscape:version="1.2.1 (9c6d41e410, 2022-07-14)" + sodipodi:docname="mldev_flow.svg" + inkscape:export-filename="mldev_flow.png" + inkscape:export-xdpi="96" + inkscape:export-ydpi="96" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg"> + <sodipodi:namedview + id="namedview6901" + pagecolor="#ffffff" + bordercolor="#000000" + borderopacity="0.25" + inkscape:showpageshadow="2" + inkscape:pageopacity="0.0" + inkscape:pagecheckerboard="0" + inkscape:deskcolor="#d1d1d1" + inkscape:document-units="mm" + showgrid="false" + inkscape:connector-spacing="0" + inkscape:lockguides="false" + inkscape:zoom="0.49638341" + inkscape:cx="640.63382" + inkscape:cy="525.80323" + inkscape:window-width="1920" + inkscape:window-height="986" + inkscape:window-x="-11" + inkscape:window-y="-11" + inkscape:window-maximized="1" + inkscape:current-layer="layer1" /> + <defs + id="defs6896"> + <marker + style="overflow:visible" + id="RoundedArrow" + refX="5" + refY="0" + orient="auto-start-reverse" + inkscape:stockid="RoundedArrow" + markerWidth="6.1347523" + markerHeight="5.9304948" + viewBox="0 0 6.1347524 5.9304951" + inkscape:isstock="true" + inkscape:collect="always" + preserveAspectRatio="xMidYMid"> + <path + transform="scale(0.7)" + d="m -0.21114562,-4.1055728 6.42229122,3.21114561 a 1,1 90 0 1 0,1.78885438 L -0.21114562,4.1055728 A 1.236068,1.236068 31.717474 0 1 -2,3 v -6 a 1.236068,1.236068 148.28253 0 1 1.78885438,-1.1055728 z" + style="fill:context-stroke;fill-rule:evenodd;stroke:none" + id="path1367" /> + </marker> + <marker + style="overflow:visible" + id="TriangleStart" + refX="4" + refY="0" + orient="auto-start-reverse" + inkscape:stockid="TriangleStart" + markerWidth="5.3244081" + markerHeight="6.155385" + viewBox="0 0 5.3244081 6.1553851" + inkscape:isstock="true" + inkscape:collect="always" + preserveAspectRatio="xMidYMid"> + <path + transform="scale(0.5)" + style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt" + d="M 5.77,0 -2.88,5 V -5 Z" + id="path135" /> + </marker> + </defs> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <rect + style="fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" + id="rect39991" + width="312.88394" + height="286.7659" + x="3.5580292" + y="5.1170502" + ry="18.197132" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 114.68664,155.38145 h 32.15418" + id="path24358" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-start="#rect24176-1" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#TriangleStart)" + d="m 114.68664,179.58099 h 32.15008" + id="path24360" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-start="#rect24176-1" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#TriangleStart)" + d="m 114.68664,203.78389 h 32.15008" + id="path24362" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-start="#rect24176-1" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-start:url(#TriangleStart)" + d="m 114.68664,227.98576 32.14997,0" + id="path24364" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-start="#rect24176-1" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#TriangleStart)" + d="M 146.8367,252.18432 H 114.68664" + id="path24366" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-end="#rect24176-1" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#TriangleStart)" + d="M 146.8367,276.38309 H 114.68664" + id="path24368" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-end="#rect24176-1" /> + <rect + style="fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:2, 1;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24370" + width="18.09137" + height="13.568528" + x="127.27605" + y="208.81961" + ry="2.7394907" + inkscape:connector-avoid="true" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:4, 2;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 70.388979,148.58514 -1e-6,-46.3516" + id="path24426" + inkscape:connector-type="polyline" + inkscape:connector-curvature="0" + inkscape:connection-start="#rect24176-1" + inkscape:connection-end="#rect24176" /> + <g + id="g42647"> + <g + id="g31403" + transform="translate(-44.050451,15.173444)"> + <rect + style="fill:#cadae7;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844498;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.68901, 0.844498;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-5-9" + width="99.155487" + height="14.152132" + x="190.88715" + y="229.93475" + ry="2.2479143" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-236.90309" + y="240.37343" + id="text31115" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113" + style="stroke:none;stroke-width:0.75" + x="-236.90309" + y="240.37343">rte_ml_model_update_params()</tspan></text> + </g> + <g + id="g31398" + transform="translate(-44.050451,15.173444)"> + <rect + style="fill:#cadae7;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844505;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.68902, 0.844505;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-4" + width="99.155495" + height="14.152357" + x="190.88705" + y="205.73608" + ry="2.2479498" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-212.70453" + y="240.37334" + id="text31115-8" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-8" + style="stroke:none;stroke-width:0.75" + x="-212.70453" + y="240.37334">rte_ml_model_stop()</tspan></text> + </g> + <g + id="g31408" + transform="translate(-44.050451,15.173444)"> + <rect + style="fill:#cadae7;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844505;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.68901, 0.844505;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-5-2-2" + width="99.155495" + height="14.152359" + x="190.88715" + y="254.13341" + ry="2.2479503" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-261.10187" + y="240.37343" + id="text31115-1" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-3" + style="stroke:none;stroke-width:0.75" + x="-261.10187" + y="240.37343">rte_ml_model_unload()</tspan></text> + </g> + <g + id="g31393" + transform="translate(-44.050451,15.173444)"> + <rect + style="fill:#cadae7;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844566;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.68914, 0.844566;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-5-2-5" + width="99.155434" + height="14.154394" + x="190.88718" + y="181.53319" + ry="2.2482734" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-188.50266" + y="240.37343" + id="text31115-4" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-2" + style="stroke:none;stroke-width:0.75" + x="-188.50266" + y="240.37343">rte_ml_model_start()</tspan></text> + </g> + <g + id="g31388" + transform="translate(-44.050451,15.173444)"> + <rect + style="fill:#cadae7;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844565;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.68914, 0.844565;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-5-8" + width="99.155434" + height="14.154395" + x="190.88718" + y="157.33029" + ry="2.2482736" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-164.29976" + y="240.37343" + id="text31115-6" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-5" + style="stroke:none;stroke-width:0.75" + x="-164.29976" + y="240.37343">rte_ml_model_info_get()</tspan></text> + </g> + <g + id="g31383" + transform="translate(-44.050451,15.173444)"> + <rect + style="fill:#cadae7;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844503;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.689, 0.844503;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-2" + width="99.155495" + height="14.152369" + x="190.89127" + y="133.13176" + ry="2.2479515" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-140.10022" + y="240.37755" + id="text31115-0" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-35" + style="stroke:none;stroke-width:0.75" + x="-140.10022" + y="240.37755">rte_ml_model_load()</tspan></text> + </g> + </g> + <rect + style="fill:#ffccaa;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844503;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.689, 0.844503;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-2-5" + width="99.155495" + height="14.152369" + x="184.08008" + y="112.15163" + ry="2.2479515" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-119.12009" + y="233.56647" + id="text31115-0-5" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-35-8" + style="stroke:none;stroke-width:0.75" + x="-119.12009" + y="233.56647">rte_ml_dequeue_burst()</tspan></text> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:3, 1.5;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 114.90712,47.649005 h 56.16045" + id="path24248" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-start="#rect24176" + inkscape:connection-end="#rect24200" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:3, 1.5;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 171.06762,70.71111 -56.1605,0.0024" + id="path24250" + inkscape:connector-type="polyline" + inkscape:connector-curvature="0" + inkscape:connection-end="#rect24176" + inkscape:connection-start="#rect24200-5" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:3, 1.5;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="M 171.06765,93.773951 H 114.90712" + id="path24252" + inkscape:connector-type="orthogonal" + inkscape:connector-curvature="0" + inkscape:connection-end="#rect24176" + inkscape:connection-start="#rect24200-5-2" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:3, 1.5;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 215.44396,47.649004 h 36.42795" + id="path24566" + inkscape:connector-type="polyline" + inkscape:connector-curvature="0" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:3, 1.5;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 215.444,70.710168 h 36.42791" + id="path24568" + inkscape:connector-type="polyline" + inkscape:connector-curvature="0" /> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:3, 1.5;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#TriangleStart);marker-end:url(#TriangleStart)" + d="m 215.44395,93.773951 36.42796,-10e-7" + id="path24570" + inkscape:connector-type="polyline" + inkscape:connector-curvature="0" /> + <g + id="g42675"> + <g + id="g31358" + transform="translate(-19.708778,16.231776)"> + <rect + style="fill:#dcf4d3;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.623639;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.24728, 0.623639;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200" + width="44.376362" + height="17.244751" + x="190.77635" + y="22.794853" + ry="2.7391431" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:4, 2;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-31.802492" + y="212.98004" + id="text31256" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31254" + style="stroke-width:0.75" + x="-31.802492" + y="212.98004">Queue Pair 0</tspan></text> + </g> + <g + id="g31353" + transform="translate(-19.708778,16.231776)"> + <rect + style="fill:#dcf4d3;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.623639;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.24728, 0.623639;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-5" + width="44.376362" + height="17.244749" + x="190.7764" + y="45.856018" + ry="2.7391429" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:4, 2;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-54.863655" + y="213.10411" + id="text31256-9" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31254-9" + style="stroke-width:0.75" + x="-54.863655" + y="213.10411">Queue Pair ..</tspan></text> + </g> + <g + id="g31363" + transform="translate(-19.708778,16.231776)"> + <rect + style="fill:#dcf4d3;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.623731;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.24746, 0.623731;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-5-2" + width="44.37627" + height="17.249832" + x="190.77643" + y="68.917259" + ry="2.7399504" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:4, 2;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-77.927437" + y="213.08859" + id="text31256-5" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31254-8" + style="stroke-width:0.75" + x="-77.927437" + y="213.08859">Queue Pair N</tspan></text> + </g> + </g> + <g + id="g42661"> + <g + id="g31368" + transform="translate(-19.708778,16.231776)" + inkscape:connector-avoid="true"> + <rect + style="fill:#ffeeaa;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.08598;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24479" + width="30.914017" + height="10.84422" + x="271.58066" + y="25.995117" + ry="2.2564735" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:4, 2;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-31.941525" + y="287.03415" + id="text31260" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31258" + style="stroke-width:0.75" + x="-31.941525" + y="287.03415">Core 0</tspan></text> + </g> + <g + id="g31373" + transform="translate(-19.708778,16.231776)"> + <rect + style="fill:#ffeeaa;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.08598;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24479-4" + width="30.914017" + height="10.84422" + x="271.58066" + y="49.056282" + ry="2.2564735" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:4, 2;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-55.00008" + y="287.15549" + id="text31260-0" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31258-7" + style="stroke-width:0.75" + x="-55.00008" + y="287.15549">Core ..</tspan></text> + </g> + <g + id="g31378" + transform="translate(-19.708778,16.231776)" + inkscape:connector-avoid="true"> + <rect + style="fill:#ffeeaa;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.08598;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24479-41" + width="30.914017" + height="10.84422" + x="271.58066" + y="72.120064" + ry="2.2564735" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:4, 2;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-78.063866" + y="287.13998" + id="text31260-5" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31258-8" + style="stroke-width:0.75" + x="-78.063866" + y="287.13998">Core N</tspan></text> + </g> + </g> + <rect + style="fill:#ffccaa;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.844503;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:1.689, 0.844503;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24200-2-5-6" + width="99.155495" + height="14.152369" + x="184.08008" + y="13.539296" + ry="2.2479515" + inkscape:connector-avoid="true" /> + <text + xml:space="preserve" + style="font-size:6.35px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.750001;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:3, 1.5;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-20.507757" + y="233.56647" + id="text31115-0-5-7" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan31113-35-8-7" + style="stroke:none;stroke-width:0.75" + x="-20.507757" + y="233.56647">rte_ml_enqueue_burst()</tspan></text> + <path + style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.75;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:2.25, 0.75;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#RoundedArrow)" + d="M 233.65793,27.691665 V 112.15163" + id="path36804" + inkscape:connector-type="polyline" + inkscape:connector-curvature="0" /> + <g + id="g42683"> + <rect + style="fill:#44d7f4;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24176" + width="89.036293" + height="63.036304" + x="25.870831" + y="39.197231" + ry="3.0941005" /> + <text + xml:space="preserve" + style="font-size:11.2889px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.264999;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-49.288273" + y="70.228432" + id="text38896" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan38894" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:11.2889px;font-family:Arial;-inkscape-font-specification:'Arial Bold';stroke-width:0.265" + x="-49.288273" + y="70.228432">Machine</tspan><tspan + sodipodi:role="line" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:11.2889px;font-family:Arial;-inkscape-font-specification:'Arial Bold';stroke-width:0.265" + x="-63.399399" + y="70.228432" + id="tspan38898">Learning</tspan><tspan + sodipodi:role="line" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:11.2889px;font-family:Arial;-inkscape-font-specification:'Arial Bold';stroke-width:0.265" + x="-77.510529" + y="70.228432" + id="tspan38900">Inference</tspan><tspan + sodipodi:role="line" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:11.2889px;font-family:Arial;-inkscape-font-specification:'Arial Bold';stroke-width:0.265" + x="-91.621651" + y="70.228432" + id="tspan38902">Engine</tspan></text> + </g> + <g + id="g42621"> + <rect + style="fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.405;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect24176-1" + width="88.595322" + height="134.59531" + x="26.09132" + y="148.58514" + ry="6.6065331" /> + <g + id="g42601"> + <g + id="g39966" + transform="translate(-60.175145,10.144324)"> + <rect + style="fill:#007cab;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.236524;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect38962" + width="48.620556" + height="21.483501" + x="106.25385" + y="146.14212" + ry="1.9712806" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:Arial;-inkscape-font-specification:'Arial Bold';text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.264999;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-157.3761" + y="130.49591" + id="text39799" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan39797" + style="font-size:6.35px;fill:#000000;stroke-width:0.265" + x="-157.3761" + y="130.49591">Model 0</tspan></text> + </g> + <g + id="g39971" + transform="translate(-60.175151,10.144334)"> + <rect + style="fill:#007cab;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.236524;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect38962-8" + width="48.620556" + height="21.483501" + x="106.25385" + y="178.65079" + ry="1.9712806" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:Arial;-inkscape-font-specification:'Arial Bold';text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.264999;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-189.88477" + y="130.49591" + id="text39799-8" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan39797-1" + style="font-size:6.35px;fill:#000000;stroke-width:0.265" + x="-189.88477" + y="130.49591">Model 1</tspan></text> + </g> + <g + id="g39976" + transform="translate(-60.175145,10.144324)"> + <rect + style="fill:#007cab;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.236524;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect38962-9" + width="48.620556" + height="21.483501" + x="106.25385" + y="211.15947" + ry="1.9712806" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:Arial;-inkscape-font-specification:'Arial Bold';text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.264999;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-222.39345" + y="130.49591" + id="text39799-9" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan39797-8" + style="font-size:6.35px;fill:#000000;stroke-width:0.265" + x="-222.39345" + y="130.49591">Model ..</tspan></text> + </g> + <g + id="g39981" + transform="translate(-60.175145,10.144324)"> + <rect + style="fill:#007cab;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.236524;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + id="rect38962-7" + width="48.620556" + height="21.483501" + x="106.25385" + y="243.66815" + ry="1.9712806" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:Arial;-inkscape-font-specification:'Arial Bold';text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.264999;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-254.90213" + y="130.49591" + id="text39799-90" + transform="rotate(-90)"><tspan + sodipodi:role="line" + id="tspan39797-5" + style="font-size:6.35px;fill:#000000;stroke-width:0.265" + x="-254.90213" + y="130.49591">Model N</tspan></text> + </g> + </g> + </g> + <text + xml:space="preserve" + style="font-size:14.1111px;font-family:Arial;-inkscape-font-specification:Arial;text-align:center;writing-mode:tb-rl;text-anchor:middle;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.264999;stroke-linecap:round;stroke-linejoin:round;stroke-dasharray:none;stroke-dashoffset:0;paint-order:stroke fill markers" + x="-279.79742" + y="275.46826" + id="text38896-4" + transform="rotate(-90)"><tspan + sodipodi:role="line" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:14.1111px;font-family:Arial;-inkscape-font-specification:'Arial Bold';stroke-width:0.265" + x="-279.79742" + y="275.46826" + id="tspan38902-6">mldev</tspan></text> + </g> +</svg> diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst index 8564883018..d7f2a28bdb 100644 --- a/doc/guides/prog_guide/index.rst +++ b/doc/guides/prog_guide/index.rst @@ -30,6 +30,7 @@ Programmer's Guide regexdev dmadev gpudev + mldev rte_security rawdev link_bonding_poll_mode_drv_lib diff --git a/doc/guides/prog_guide/mldev.rst b/doc/guides/prog_guide/mldev.rst new file mode 100644 index 0000000000..a0bd370e72 --- /dev/null +++ b/doc/guides/prog_guide/mldev.rst @@ -0,0 +1,186 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright (c) 2022 Marvell. + +Machine Learning Device Library +=============================== + +The MLDEV library provides a Machine Learning device framework for the management and +provisioning of hardware and software ML poll mode drivers, defining APIs which +support a number of ML operations including device handling and inference processing. +The ML model creation and training is outside of the scope of this library. + +The ML framework is built on the following model: + +.. _figure_mldev_work_flow: + +.. figure:: img/mldev_flow.* + + Work flow of inference on MLDEV + +**ML Device**: A hardware or software-based implementation of ML device API for running +inferences using a pre-trained ML model. + +**ML Model**: An ML model is an algorithm trained over a dataset. A model consists of +procedure/algorithm and data/pattern required to make predictions on live data. Once +the model is created and trained outside of the DPDK scope, the model can be loaded +via rte_ml_model_load() and then start it using rte_ml_model_start() API. The +rte_ml_model_params_update() can be used to update the model parameters such as weights +and bias without unloading the model using rte_ml_model_unload(). + +**ML Inference**: ML inference is the process of feeding data to the model via +rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated +outputs / predictions from the started model. + +Design Principles +----------------- + +The MLDEV library follows the same basic principles as those used in DPDK's +Ethernet Device framework and the Crypto framework. The MLDEV framework provides +a generic Machine Learning device framework which supports both physical (hardware) +and virtual (software) ML devices as well as an ML API to manage and configure ML +devices. The APIs also supports performing ML inference operations through ML poll +mode driver. + + +Device Operations +----------------- + +Device Creation +~~~~~~~~~~~~~~~ + +Physical ML devices are discovered during the PCI probe/enumeration, through the +EAL functions which are executed at DPDK initialization, based on their PCI device +identifier, each unique PCI BDF (bus/bridge, device, function). ML physical devices, +like other physical devices in DPDK can be white-listed or black-listed +using the EAL command line options. + + +Device Identification +~~~~~~~~~~~~~~~~~~~~~ + +Each device, whether virtual or physical is uniquely designated by two +identifiers: + +- A unique device index used to designate the ML device in all functions + exported by the MLDEV API. + +- A device name used to designate the ML device in console messages, for + administration or debugging purposes. + +Device Features and Capabilities +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ML devices may support different feature set. In order to get the +supported PMD feature ``rte_ml_dev_info_get`` API which return the +info of the device and it's supported features. + +Device Configuration +~~~~~~~~~~~~~~~~~~~~ + +The configuration of each ML device includes the following operations: + +- Allocation of resources, including hardware resources if a physical device. +- Resetting the device into a well-known default state. +- Initialization of statistics counters. + +The rte_ml_dev_configure API is used to configure a ML device. + +.. code-block:: c + + int rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *cfg); + +The ``rte_ml_dev_config`` structure is used to pass the configuration parameters +for the ML device, for example number of queue pairs, maximum number of models, +maximum size of model and so on. + +Configuration of Queue Pairs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each ML device can be configured with number of queue pairs. +Each queue pair is configured using ``rte_ml_dev_queue_pair_setup`` + +Logical Cores, Memory and Queues Pair Relationships +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Multiple logical cores should never share the same queue pair for enqueuing +operations or dequeueing operations on the same ML device since this would +require global locks and hinder performance. + +Configuration of Machine Learning models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pre-trained ML models that are built using external ML compiler / training frameworks +are used to perform inference operations. These models are configured on an ML device +in a two-stage process that includes loading the model on an ML device, and starting +the model to accept inference operations. Inference operations can be queued for a +model only when the model is in started state. Model load stage assigns a Model ID, +which is unique for the model in a driver's context. Model ID is used during all +subsequent slow-path and fast-path operations. + +Model loading and start is done through the ``rte_ml_model_load`` and +``rte_ml_model_start`` functions. + +Similarly stop and unloading are done through ``rte_ml_model_stop`` and +``rte_ml_model_unload`` functions. + +Stop and unload functions would release the resources allocated for the +models. Inference tasks cannot be queued for a model that is stopped. + +Detailed information related to the model can be retrieved from the driver using the +function ``rte_ml_model_info_get``. Model information is accessible to the application +through the ``rte_ml_model_info`` structure. Information available to the user would +include the details related to the inputs and outputs, and the maximum batch size +supported by the model. + +User can optionally update the model params such as weights and bias, without unloading +the model, through the ``rte_ml_model_params_update`` function. A model should be in +stopped state to update the params. Model has to be started in order to enqueue inference +requests after a params update. + +Enqueue / Dequeue +~~~~~~~~~~~~~~~~~ + +The burst enqueue API uses a ML device identifier and a queue pair identifier +to specify the device queue pair to schedule the processing on. The ``nb_ops`` +parameter is the number of operations to process which are supplied in the +``ops`` array of ``rte_ml_op`` structures. The enqueue function returns the +number of operations it enqueued for processing, a return value equal to +``nb_ops`` means that all packets have been enqueued. + +The dequeue API uses the same format as the enqueue API of processed but +the ``nb_ops`` and ``ops`` parameters are now used to specify the max processed +operations the user wishes to retrieve and the location in which to store them. +The API call returns the actual number of processed operations returned; this +can never be larger than ``nb_ops``. + +``rte_ml_op`` provides the required information to the driver to queue an ML inference +task. ML op specifies the model to be used and the number of batches to be executed in +the inference task. Input and output buffer information is specified through the +structure ``rte_ml_buff_seg``, which supports segmented data. Input is provided through +the ``rte_ml_op::input`` and output through ``rte_ml_op::output``. Data pointed in each +op, should not be released until the dequeue of for that op. + + +Quantize and Dequantize +~~~~~~~~~~~~~~~~~~~~~~~ + +Inference operations performed with lower precision types would improve the throughput +and efficiency of the inference execution with a minimal loss of accuracy, which is within +the tolerance limits. Quantization and dequantization is the process of converting data +from a higher precision type to a lower precision type and vice-versa. ML library provides +the functions ``rte_ml_io_quantize`` and ``rte_ml_io_dequantize`` to enable data type +conversions. User needs to provide the address of the quantized and dequantized data +buffers to the functions, along the number of the batches in the buffers. + +For quantization, the dequantized data is assumed to be of the type ``dtype`` provided by +the ``rte_ml_model_info::input`` and the data is converted to ``qtype`` provided by the +``rte_ml_model_info::input``. + +For dequantization, the quantized data is assumed to be of the type ``qtype`` provided by +the ``rte_ml_model_info::output`` and the data is converted to ``dtype`` provided by the +``rte_ml_model_info::output``. + +Size of the buffers required for the input and output can be calculated using the functions +``rte_ml_io_input_size_get`` and ``rte_ml_io_output_size_get``. These functions would get the +buffer sizes for both quantized and dequantized data for the given number of batches. + diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 1fa101c420..f23b58f416 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -87,6 +87,11 @@ New Features ``rte_event_dev_config::nb_single_link_event_port_queues`` parameter required for eth_rx, eth_tx, crypto and timer eventdev adapters. +* **Added machine learning inference device library.** + + * Added a machine learning inference device framework for management and provision of + hardware and software machine learning inference devices. + Removed Items ------------- diff --git a/lib/meson.build b/lib/meson.build index a90fee31b7..ad91819375 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -63,6 +63,7 @@ libraries = [ 'flow_classify', # flow_classify lib depends on pkt framework table lib 'graph', 'node', + 'mldev', ] optional_libs = [ diff --git a/lib/mldev/meson.build b/lib/mldev/meson.build new file mode 100644 index 0000000000..e378cfca30 --- /dev/null +++ b/lib/mldev/meson.build @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2022 Marvell. + +sources = files( + 'rte_mldev.c', +) + +headers = files( + 'rte_mldev.h', +) + +deps += ['mempool'] + +if get_option('buildtype').contains('debug') + cflags += [ '-DRTE_LIBRTE_ML_DEV_DEBUG' ] +else + cflags += [ '-URTE_LIBRTE_ML_DEV_DEBUG' ] +endif diff --git a/lib/mldev/rte_mldev.c b/lib/mldev/rte_mldev.c new file mode 100644 index 0000000000..70aad4c44b --- /dev/null +++ b/lib/mldev/rte_mldev.c @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2022 Marvell. + */ + +#include <rte_log.h> +#include <rte_mldev.h> + +RTE_LOG_REGISTER_DEFAULT(rte_ml_dev_logtype, INFO); diff --git a/lib/mldev/rte_mldev.h b/lib/mldev/rte_mldev.h new file mode 100644 index 0000000000..b22234730d --- /dev/null +++ b/lib/mldev/rte_mldev.h @@ -0,0 +1,1119 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2022 Marvell. + */ + +#ifndef RTE_MLDEV_H +#define RTE_MLDEV_H + +/** + * @file rte_mldev.h + * + * @warning + * @b EXPERIMENTAL: + * All functions in this file may be changed or removed without prior notice. + * + * ML (Machine Learning) device API. + * + * The ML framework is built on the following model: + * + * + * +-----------------+ rte_ml_[en|de]queue_burst() + * | | | + * | Machine o------+ +--------+ | + * | Learning | | | queue | | +------+ + * | Inference o------+-----o |<===o===>|Core 0| + * | Engine | | | pair 0 | +------+ + * | o----+ | +--------+ + * | | | | + * +-----------------+ | | +--------+ + * ^ | | | queue | +------+ + * | | +-----o |<=======>|Core 1| + * | | | pair 1 | +------+ + * | | +--------+ + * +--------+--------+ | + * | +-------------+ | | +--------+ + * | | Model 0 | | | | queue | +------+ + * | +-------------+ | +-------o |<=======>|Core N| + * | +-------------+ | | pair N | +------+ + * | | Model 1 | | +--------+ + * | +-------------+ | + * | +-------------+ |<------> rte_ml_model_load() + * | | Model .. | |-------> rte_ml_model_info_get() + * | +-------------+ |<------- rte_ml_model_start() + * | +-------------+ |<------- rte_ml_model_stop() + * | | Model N | |<------- rte_ml_model_params_update() + * | +-------------+ |<------- rte_ml_model_unload() + * +-----------------+ + * + * ML Device: A hardware or software-based implementation of ML device API for + * running inferences using a pre-trained ML model. + * + * ML Model: An ML model is an algorithm trained over a dataset. A model consists of + * procedure/algorithm and data/pattern required to make predictions on live data. + * Once the model is created and trained outside of the DPDK scope, the model can be loaded + * via rte_ml_model_load() and then start it using rte_ml_model_start() API. + * The rte_ml_model_params_update() can be used to update the model parameters such as weight + * and bias without unloading the model using rte_ml_model_unload(). + * + * ML Inference: ML inference is the process of feeding data to the model via + * rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated + * outputs/predictions from the started model. + * + * In all functions of the ML device API, the ML device is designated by an + * integer >= 0 named as device identifier *dev_id*. + * + * The functions exported by the ML device API to setup a device designated by + * its device identifier must be invoked in the following order: + * + * - rte_ml_dev_configure() + * - rte_ml_dev_queue_pair_setup() + * - rte_ml_dev_start() + * + * A model is required to run the inference operations with the user specified inputs. + * Application needs to invoke the ML model API in the following order before queueing + * inference jobs. + * + * - rte_ml_model_load() + * - rte_ml_model_start() + * + * A model can be loaded on a device only after the device has been configured and can be + * started or stopped only after a device has been started. + * + * The rte_ml_model_info_get() API is provided to retrieve the information related to the model. + * The information would include the shape and type of input and output required for the inference. + * + * Data quantization and dequantization is one of the main aspects in ML domain. This involves + * conversion of input data from a higher precision to a lower precision data type and vice-versa + * for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and + * dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input + * and output buffers holding data for multiple batches. + * + * Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the + * size of quantized and de-quantized multi-batch input and output buffers. + * + * User can optionally update the model parameters with rte_ml_model_params_update() after + * invoking rte_ml_model_stop() API on a given model ID. + * + * The application can invoke, in any order, the functions exported by the ML API to enqueue + * inference jobs and dequeue inference response. + * + * If the application wants to change the device configuration (i.e., call + * rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the + * device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then + * the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API + * for the given model. The application does not need to call rte_ml_dev_stop() API for + * any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc. + * + * Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in + * start state after invoking rte_ml_model_start() API, then the application can call + * rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID. + * + * Finally, an application can close an ML device by invoking the rte_ml_dev_close() function. + * + * Typical application utilisation of the ML API will follow the following + * programming flow. + * + * - rte_ml_dev_configure() + * - rte_ml_dev_queue_pair_setup() + * - rte_ml_model_load() + * - rte_ml_dev_start() + * - rte_ml_model_start() + * - rte_ml_model_info_get() + * - rte_ml_enqueue_burst() + * - rte_ml_dequeue_burst() + * - rte_ml_model_stop() + * - rte_ml_model_unload() + * - rte_ml_dev_stop() + * - rte_ml_dev_close() + * + * Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD + * are lock-free functions which assume to not be invoked in parallel on different logical cores + * on the same target object. For instance, the dequeue function of a poll mode driver cannot be + * invoked in parallel on two logical cores to operate on same queue pair. Of course, this function + * can be invoked in parallel by different logical core on different queue pair. + * It is the responsibility of the user application to enforce this rule. + */ + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_mempool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Logging Macro */ +extern int rte_ml_dev_logtype; + +#define RTE_MLDEV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args) + +#define RTE_ML_STR_MAX 128 +/**< Maximum length of name string */ + +#define RTE_MLDEV_DEFAULT_MAX 32 +/** Maximum number of devices if rte_ml_dev_init() is not called. */ + +/* Device operations */ + +/** + * Initialize the device array before probing devices. If not called, the first device probed would + * initialize the array to a size of RTE_MLDEV_DEFAULT_MAX. + * + * @param dev_max + * Maximum number of devices. + * + * @return + * 0 on success, -rte_errno otherwise: + * - ENOMEM if out of memory + * - EINVAL if 0 size + * - EBUSY if already initialized + */ +__rte_experimental +int +rte_ml_dev_init(size_t dev_max); + +/** + * Get the total number of ML devices that have been successfully initialised. + * + * @return + * - The total number of usable ML devices. + */ +__rte_experimental +uint16_t +rte_ml_dev_count(void); + +/** + * Check if the device is in ready state. + * + * @param dev_id + * The identifier of the device. + * + * @return + * - 0 if device state is not in ready state. + * - 1 if device state is ready state. + */ +__rte_experimental +int +rte_ml_dev_is_valid_dev(int16_t dev_id); + +/** + * Return the NUMA socket to which a device is connected. + * + * @param dev_id + * The identifier of the device. + * + * @return + * - The NUMA socket id to which the device is connected + * - 0 If the socket could not be determined. + * - -EINVAL: if the dev_id value is not valid. + */ +__rte_experimental +int +rte_ml_dev_socket_id(int16_t dev_id); + +/** ML device information */ +struct rte_ml_dev_info { + const char *driver_name; + /**< Driver name */ + uint16_t max_models; + /**< Maximum number of models supported by the device. + * @see struct rte_ml_dev_config::nb_models + */ + uint16_t max_queue_pairs; + /**< Maximum number of queues pairs supported by the device. + * @see struct rte_ml_dev_config::nb_queue_pairs + */ + uint16_t max_desc; + /**< Maximum allowed number of descriptors for queue pair by the device. + * @see struct rte_ml_dev_qp_conf::nb_desc + */ + uint16_t max_segments; + /**< Maximum number of scatter-gather entries supported by the device. + * @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next + */ + uint16_t min_align_size; + /**< Minimum alignment size of IO buffers used by the device. */ +}; + +/** + * Retrieve the information of the device. + * + * @param dev_id + * The identifier of the device. + * @param dev_info + * A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device. + * + * @return + * - 0: Success, driver updates the information of the ML device + * - < 0: Error code returned by the driver info get function. + */ +__rte_experimental +int +rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info); + +/** ML device configuration structure */ +struct rte_ml_dev_config { + int socket_id; + /**< Socket to allocate resources on. */ + uint16_t nb_models; + /**< Number of models to be loaded on the device. + * This value cannot exceed the max_models which is previously provided in + * struct rte_ml_dev_info::max_models + */ + uint16_t nb_queue_pairs; + /**< Number of queue pairs to configure on this device. + * This value cannot exceed the max_models which is previously provided in + * struct rte_ml_dev_info::max_queue_pairs + */ +}; + +/** + * Configure an ML device. + * + * This function must be invoked first before any other function in the API. + * + * ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after + * rte_ml_dev_close() is called. + * + * The caller may use rte_ml_dev_info_get() to get the capability of each resources available for + * this ML device. + * + * @param dev_id + * The identifier of the device to configure. + * @param config + * The ML device configuration structure. + * + * @return + * - 0: Success, device configured. + * - < 0: Error code returned by the driver configuration function. + */ +__rte_experimental +int +rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config); + +/* Forward declaration */ +struct rte_ml_op; + +/**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */ +typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op); + +/** ML device queue pair configuration structure. */ +struct rte_ml_dev_qp_conf { + uint32_t nb_desc; + /**< Number of descriptors per queue pair. + * This value cannot exceed the max_desc which previously provided in + * struct rte_ml_dev_info:max_desc + */ + rte_ml_dev_stop_flush_t cb; + /**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op. + * Value NULL is allowed, in which case callback will not be invoked. + * This function can be used to properly dispose of outstanding ML ops from all + * queue pairs, for example ops containing memory pointers. + * @see rte_ml_dev_stop() + */ +}; + +/** + * Set up a queue pair for a device. This should only be called when the device is stopped. + * + * @param dev_id + * The identifier of the device. + * @param queue_pair_id + * The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1] + * previously supplied to rte_ml_dev_configure(). + * @param qp_conf + * The pointer to the configuration data to be used for the queue pair. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated + * for the queue pair. + * + * @return + * - 0: Success, queue pair correctly set up. + * - < 0: Queue pair configuration failed. + */ +__rte_experimental +int +rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id, + const struct rte_ml_dev_qp_conf *qp_conf, int socket_id); + +/** + * Start an ML device. + * + * The device start step consists of setting the configured features and enabling the ML device + * to accept inference jobs. + * + * @param dev_id + * The identifier of the device. + * + * @return + * - 0: Success, device started. + * - <0: Error code of the driver device start function. + */ +__rte_experimental +int +rte_ml_dev_start(int16_t dev_id); + +/** + * Stop an ML device. A stopped device cannot accept inference jobs. + * The device can be restarted with a call to rte_ml_dev_start(). + * + * @param dev_id + * The identifier of the device. + * + * @return + * - 0: Success, device stopped. + * - <0: Error code of the driver device stop function. + */ +__rte_experimental +int +rte_ml_dev_stop(int16_t dev_id); + +/** + * Close an ML device. The device cannot be restarted! + * + * @param dev_id + * The identifier of the device. + * + * @return + * - 0 on successfully closing device. + * - <0 on failure to close device. + */ +__rte_experimental +int +rte_ml_dev_close(int16_t dev_id); + +/** Status of ML operation */ +enum rte_ml_op_status { + RTE_ML_OP_STATUS_SUCCESS = 0, + /**< Operation completed successfully */ + RTE_ML_OP_STATUS_NOT_PROCESSED, + /**< Operation has not yet been processed by the device. */ + RTE_ML_OP_STATUS_ERROR, + /**< Operation completed with error. + * Application can invoke rte_ml_op_error_get() to get PMD specific + * error code if needed. + */ +}; + +/** ML operation's input and output buffer representation as scatter gather list + */ +struct rte_ml_buff_seg { + rte_iova_t iova_addr; + /**< IOVA address of segment buffer. */ + void *addr; + /**< Virtual address of segment buffer. */ + uint32_t length; + /**< Segment length. */ + uint32_t reserved; + /**< Reserved for future use. */ + struct rte_ml_buff_seg *next; + /**< Points to next segment. Value NULL represents the last segment. */ +}; + +/** + * ML Operation. + * + * This structure contains data related to performing an ML operation on the buffers using + * the model specified through model_id. + */ +struct rte_ml_op { + uint16_t model_id; + /**< Model ID to be used for the operation. */ + uint16_t nb_batches; + /**< Number of batches. Minimum value must be one. + * Input buffer must hold inference data for each batch as contiguous. + */ + uint32_t reserved; + /**< Reserved for future use. */ + struct rte_mempool *mempool; + /**< Pool from which operation is allocated. */ + struct rte_ml_buff_seg input; + /**< Input buffer to hold the inference data. */ + struct rte_ml_buff_seg output; + /**< Output buffer to hold the inference output by the driver. */ + RTE_STD_C11 + union { + uint64_t user_u64; + /**< User data as uint64_t.*/ + void *user_ptr; + /**< User data as void*.*/ + }; + enum rte_ml_op_status status; + /**< Operation status. */ + uint64_t impl_opaque; + /**< Implementation specific opaque value. + * An implementation may use this field to hold + * implementation specific value to share between + * dequeue and enqueue operation. + * The application should not modify this field. + */ +} __rte_cache_aligned; + +/* Enqueue/Dequeue operations */ + +/** + * Enqueue a burst of ML inferences for processing on an ML device. + * + * The rte_ml_enqueue_burst() function is invoked to place ML inference + * operations on the queue *qp_id* of the device designated by its *dev_id*. + * + * The *nb_ops* parameter is the number of inferences to process which are + * supplied in the *ops* array of *rte_ml_op* structures. + * + * The rte_ml_enqueue_burst() function returns the number of inferences it + * actually enqueued for processing. A return value equal to *nb_ops* means that + * all packets have been enqueued. + * + * @param dev_id + * The identifier of the device. + * @param qp_id + * The index of the queue pair which inferences are to be enqueued for processing. + * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to + * *rte_ml_dev_configure*. + * @param ops + * The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the + * ML inferences to be processed. + * @param nb_ops + * The number of operations to process. + * + * @return + * The number of inference operations actually enqueued to the ML device. + * The return value can be less than the value of the *nb_ops* parameter when the ML device queue + * is full or if invalid parameters are specified in a *rte_ml_op*. + */ +__rte_experimental +uint16_t +rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); + +/** + * Dequeue a burst of processed ML inferences operations from a queue on the ML device. + * The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied + * in the *ops* array. + * + * The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued, + * which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array. + * + * A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations, + * and this is likely to signify that other processed operations remain in the devices output queue. + * Application implementing a "retrieve as many processed operations as possible" policy can check + * this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than + * *nb_ops* is returned. + * + * The rte_ml_dequeue_burst() function does not provide any error notification to avoid + * the corresponding overhead. + * + * @param dev_id + * The identifier of the device. + * @param qp_id + * The index of the queue pair from which to retrieve processed packets. + * The value must be in the range [0, nb_queue_pairs - 1] previously supplied to + * rte_ml_dev_configure(). + * @param ops + * The address of an array of pointers to *rte_ml_op* structures that must be large enough to + * store *nb_ops* pointers in it. + * @param nb_ops + * The maximum number of inferences to dequeue. + * + * @return + * The number of operations actually dequeued, which is the number of pointers + * to *rte_ml_op* structures effectively supplied to the *ops* array. + */ +__rte_experimental +uint16_t +rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); + +/** + * Verbose error structure definition. + */ +struct rte_ml_op_error { + char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */ + uint64_t errcode; /**< Vendor specific error code. */ +}; + +/** + * Get PMD specific error information for an ML op. + * + * When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status, + * This API allows to get PMD specific error details. + * + * @param[in] dev_id + * Device identifier + * @param[in] op + * Handle of ML operation + * @param[in] error + * Address of structure rte_ml_op_error to be filled + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error); + +/* Statistics operations */ + +/** Device statistics. */ +struct rte_ml_dev_stats { + uint64_t enqueued_count; + /**< Count of all operations enqueued */ + uint64_t dequeued_count; + /**< Count of all operations dequeued */ + uint64_t enqueue_err_count; + /**< Total error count on operations enqueued */ + uint64_t dequeue_err_count; + /**< Total error count on operations dequeued */ +}; + +/** + * Retrieve the general I/O statistics of a device. + * + * @param dev_id + * The identifier of the device. + * @param stats + * Pointer to structure to where statistics will be copied. + * On error, this location may or may not have been modified. + * @return + * - 0 on success + * - -EINVAL: If invalid parameter pointer is provided. + */ +__rte_experimental +int +rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats); + +/** + * Reset the statistics of a device. + * + * @param dev_id + * The identifier of the device. + */ +__rte_experimental +void +rte_ml_dev_stats_reset(int16_t dev_id); + +/** + * A name-key lookup element for extended statistics. + * + * This structure is used to map between names and ID numbers for extended ML device statistics. + */ +struct rte_ml_dev_xstats_map { + uint16_t id; + /**< xstat identifier */ + char name[RTE_ML_STR_MAX]; + /**< xstat name */ +}; + +/** + * Retrieve names of extended statistics of an ML device. + * + * @param dev_id + * The identifier of the device. + * @param[out] xstats_map + * Block of memory to insert id and names into. Must be at least size in capacity. + * If set to NULL, function returns required capacity. + * @param size + * Capacity of xstats_map (number of name-id maps). + * + * @return + * - Positive value on success: + * - The return value is the number of entries filled in the stats map. + * - If xstats_map set to NULL then required capacity for xstats_map. + * - Negative value on error: + * - -ENODEV: for invalid *dev_id*. + * - -ENOTSUP: if the device doesn't support this function. + */ +__rte_experimental +int +rte_ml_dev_xstats_names_get(int16_t dev_id, struct rte_ml_dev_xstats_map *xstats_map, + uint32_t size); + +/** + * Retrieve the value of a single stat by requesting it by name. + * + * @param dev_id + * The identifier of the device. + * @param name + * The stat name to retrieve. + * @param stat_id + * If non-NULL, the numerical id of the stat will be returned, so that further requests for + * the stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to + * scan a list of names for the stat. + * @param[out] value + * Must be non-NULL, retrieved xstat value will be stored in this address. + * + * @return + * - 0: Successfully retrieved xstat value. + * - -EINVAL: invalid parameters. + * - -ENOTSUP: if not supported. + */ +__rte_experimental +int +rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value); + +/** + * Retrieve extended statistics of an ML device. + * + * @param dev_id + * The identifier of the device. + * @param stat_ids + * The id numbers of the stats to get. The ids can be fetched from the stat position in the + * stat list from rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get(). + * @param values + * The values for each stats request by ID. + * @param nb_ids + * The number of stats requested. + * @return + * - Positive value: number of stat entries filled into the values array + * - Negative value on error: + * - -ENODEV: for invalid *dev_id*. + * - -ENOTSUP: if the device doesn't support this function. + */ +__rte_experimental +int +rte_ml_dev_xstats_get(int16_t dev_id, const uint16_t *stat_ids, uint64_t *values, uint16_t nb_ids); + +/** + * Reset the values of the xstats of the selected component in the device. + * + * @param dev_id + * The identifier of the device. + * @param stat_ids + * Selects specific statistics to be reset. When NULL, all statistics will be reset. + * If non-NULL, must point to array of at least *nb_ids* size. + * @param nb_ids + * The number of ids available from the *ids* array. Ignored when ids is NULL. + * @return + * - 0: Successfully reset the statistics to zero. + * - -EINVAL: invalid parameters. + * - -ENOTSUP: if not supported. + */ +__rte_experimental +int +rte_ml_dev_xstats_reset(int16_t dev_id, const uint16_t *stat_ids, uint16_t nb_ids); + +/* Utility operations */ + +/** + * Dump internal information about *dev_id* to the FILE* provided in *fd*. + * + * @param dev_id + * The identifier of the device. + * @param fd + * A pointer to a file for output. + * @return + * - 0: on success. + * - <0: on failure. + */ +__rte_experimental +int +rte_ml_dev_dump(int16_t dev_id, FILE *fd); + +/** + * Trigger the ML device self test. + * + * @param dev_id + * The identifier of the device. + * @return + * - 0: Selftest successful. + * - -ENOTSUP: if the device doesn't support selftest. + * - other values < 0 on failure. + */ +__rte_experimental +int +rte_ml_dev_selftest(int16_t dev_id); + +/* Model operations */ + +/** ML model load parameters + * + * Parameters required to load an ML model. + */ +struct rte_ml_model_params { + void *addr; + /**< Address of model buffer */ + size_t size; + /**< Size of model buffer */ +}; + +/** + * Load an ML model to the device. + * + * Load an ML model to the device with parameters requested in the structure rte_ml_model_params. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] params + * Parameters for the model to be loaded. + * @param[out] model_id + * Identifier of the model loaded. + * + * @return + * - 0: Success, Model loaded. + * - < 0: Failure, Error code of the model load driver function. + */ +__rte_experimental +int +rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id); + +/** + * Unload an ML model from the device. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier of the model to be unloaded. + * + * @return + * - 0: Success, Model unloaded. + * - < 0: Failure, Error code of the model unload driver function. + */ +__rte_experimental +int +rte_ml_model_unload(int16_t dev_id, uint16_t model_id); + +/** + * Start an ML model for the given device ID. + * + * Start an ML model to accept inference requests. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier of the model to be started. + * + * @return + * - 0: Success, Model loaded. + * - < 0: Failure, Error code of the model start driver function. + */ +__rte_experimental +int +rte_ml_model_start(int16_t dev_id, uint16_t model_id); + +/** + * Stop an ML model for the given device ID. + * + * Model stop would disable the ML model to be used for inference jobs. + * All inference jobs must have been completed before model stop is attempted. + + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier of the model to be stopped. + * + * @return + * - 0: Success, Model unloaded. + * - < 0: Failure, Error code of the model stop driver function. + */ +__rte_experimental +int +rte_ml_model_stop(int16_t dev_id, uint16_t model_id); + +/** + * Input and output data types. ML models can operate on reduced precision + * datatypes to achieve better power efficiency, lower network latency and lower memory footprint. + * This enum is used to represent the lower precision integer and floating point types used + * by ML models. + */ +enum rte_ml_io_type { + RTE_ML_IO_TYPE_UNKNOWN = 0, + /**< Invalid or unknown type */ + RTE_ML_IO_TYPE_INT8, + /**< 8-bit integer */ + RTE_ML_IO_TYPE_UINT8, + /**< 8-bit unsigned integer */ + RTE_ML_IO_TYPE_INT16, + /**< 16-bit integer */ + RTE_ML_IO_TYPE_UINT16, + /**< 16-bit unsigned integer */ + RTE_ML_IO_TYPE_INT32, + /**< 32-bit integer */ + RTE_ML_IO_TYPE_UINT32, + /**< 32-bit unsigned integer */ + RTE_ML_IO_TYPE_FP8, + /**< 8-bit floating point number */ + RTE_ML_IO_TYPE_FP16, + /**< IEEE 754 16-bit floating point number */ + RTE_ML_IO_TYPE_FP32, + /**< IEEE 754 32-bit floating point number */ + RTE_ML_IO_TYPE_BFLOAT16 + /**< 16-bit brain floating point number. */ +}; + +/** + * Input and output format. This is used to represent the encoding type of multi-dimensional + * used by ML models. + */ +enum rte_ml_io_format { + RTE_ML_IO_FORMAT_NCHW = 1, + /**< Batch size (N) x channels (C) x height (H) x width (W) */ + RTE_ML_IO_FORMAT_NHWC, + /**< Batch size (N) x height (H) x width (W) x channels (C) */ + RTE_ML_IO_FORMAT_CHWN, + /**< Channels (C) x height (H) x width (W) x batch size (N) */ + RTE_ML_IO_FORMAT_3D, + /**< Format to represent a 3 dimensional data */ + RTE_ML_IO_FORMAT_2D, + /**< Format to represent matrix data */ + RTE_ML_IO_FORMAT_1D, + /**< Format to represent vector data */ + RTE_ML_IO_FORMAT_SCALAR, + /**< Format to represent scalar data */ +}; + +/** + * Input and output shape. This structure represents the encoding format and dimensions + * of the tensor or vector. + * + * The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used + * for the data would depend on the format. Unused dimensions to be set to 1. + */ +struct rte_ml_io_shape { + enum rte_ml_io_format format; + /**< Format of the data */ + uint32_t w; + /**< First dimension */ + uint32_t x; + /**< Second dimension */ + uint32_t y; + /**< Third dimension */ + uint32_t z; + /**< Fourth dimension */ +}; + +/** Input and output data information structure + * + * Specifies the type and shape of input and output data. + */ +struct rte_ml_io_info { + char name[RTE_ML_STR_MAX]; + /**< Name of data */ + struct rte_ml_io_shape shape; + /**< Shape of data */ + enum rte_ml_io_type qtype; + /**< Type of quantized data */ + enum rte_ml_io_type dtype; + /**< Type of de-quantized data */ +}; + +/** Model information structure */ +struct rte_ml_model_info { + char name[RTE_ML_STR_MAX]; + /**< Model name. */ + char version[RTE_ML_STR_MAX]; + /**< Model version */ + uint16_t model_id; + /**< Model ID */ + uint16_t device_id; + /**< Device ID */ + uint16_t batch_size; + /**< Maximum number of batches that the model can process simultaneously */ + uint32_t nb_inputs; + /**< Number of inputs */ + const struct rte_ml_io_info *input_info; + /**< Input info array. Array size is equal to nb_inputs */ + uint32_t nb_outputs; + /**< Number of outputs */ + const struct rte_ml_io_info *output_info; + /**< Output info array. Array size is equal to nb_output */ + uint64_t wb_size; + /**< Size of model weights and bias */ +}; + +/** + * Get ML model information. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier for the model created + * @param[out] model_info + * Pointer to a model info structure + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info); + +/** + * Update the model parameters without unloading model. + * + * Update model parameters such as weights and bias without unloading the model. + * rte_ml_model_stop() must be called before invoking this API. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier for the model created + * @param[in] buffer + * Pointer to the model weights and bias buffer. + * Size of the buffer is equal to wb_size returned in *rte_ml_model_info*. + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer); + +/* IO operations */ + +/** + * Get size of quantized and dequantized input buffers. + * + * Calculate the size of buffers required for quantized and dequantized input data. + * This API would return the buffer sizes for the number of batches provided and would + * consider the alignment requirements as per the PMD. Input sizes computed by this API can + * be used by the application to allocate buffers. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier for the model created + * @param[in] nb_batches + * Number of batches of input to be processed in a single inference job + * @param[out] input_qsize + * Quantized input size pointer. + * NULL value is allowed, in which case input_qsize is not calculated by the driver. + * @param[out] input_dsize + * Dequantized input size pointer. + * NULL value is allowed, in which case input_dsize is not calculated by the driver. + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, + uint64_t *input_qsize, uint64_t *input_dsize); + +/** + * Get size of quantized and dequantized output buffers. + * + * Calculate the size of buffers required for quantized and dequantized output data. + * This API would return the buffer sizes for the number of batches provided and would consider + * the alignment requirements as per the PMD. Output sizes computed by this API can be used by the + * application to allocate buffers. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier for the model created + * @param[in] nb_batches + * Number of batches of input to be processed in a single inference job + * @param[out] output_qsize + * Quantized output size pointer. + * NULL value is allowed, in which case output_qsize is not calculated by the driver. + * @param[out] output_dsize + * Dequantized output size pointer. + * NULL value is allowed, in which case output_dsize is not calculated by the driver. + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, + uint64_t *output_qsize, uint64_t *output_dsize); + +/** + * Quantize input data. + * + * Quantization converts data from a higher precision types to a lower precision types to improve + * the throughput and efficiency of the model execution with minimal loss of accuracy. + * Types of dequantized data and quantized data are specified by the model. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier for the model + * @param[in] nb_batches + * Number of batches in the dequantized input buffer + * @param[in] dbuffer + * Address of dequantized input data + * @param[in] qbuffer + * Address of quantized input data + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer, + void *qbuffer); + +/** + * Dequantize output data. + * + * Dequantization converts data from a lower precision type to a higher precision type. + * Types of quantized data and dequantized are specified by the model. + * + * @param[in] dev_id + * The identifier of the device. + * @param[in] model_id + * Identifier for the model + * @param[in] nb_batches + * Number of batches in the dequantized output buffer + * @param[in] qbuffer + * Address of quantized output data + * @param[in] dbuffer + * Address of dequantized output data + * + * @return + * - Returns 0 on success + * - Returns negative value on failure + */ +__rte_experimental +int +rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer, + void *dbuffer); + +/* ML op pool operations */ + +/** + * Create an ML operation pool + * + * @param name + * ML operations pool name + * @param nb_elts + * Number of elements in pool + * @param cache_size + * Number of elements to cache on lcore, see + * *rte_mempool_create* for further details about cache size + * @param user_size + * Size of private data to allocate for user with each operation + * @param socket_id + * Socket to identifier allocate memory on + * @return + * - On success pointer to mempool + * - On failure NULL + */ +__rte_experimental +struct rte_mempool * +rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size, + uint16_t user_size, int socket_id); + +/** + * Free an ML operation pool + * + * @param mempool + * A pointer to the mempool structure. + * If NULL then, the function does nothing. + */ +__rte_experimental +void +rte_ml_op_pool_free(struct rte_mempool *mempool); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_MLDEV_H */ diff --git a/lib/mldev/version.map b/lib/mldev/version.map new file mode 100644 index 0000000000..3793380442 --- /dev/null +++ b/lib/mldev/version.map @@ -0,0 +1,7 @@ +EXPERIMENTAL { + global: + + rte_ml_dev_logtype; + + local: *; +}; -- 2.39.1