Re: [PATCH v2 16/23] staging: media: tegra-video: tegra20: simplify format align calculations

Mikko Perttunen Tue, 23 Sep 2025 21:47:48 -0700

On Tuesday, September 23, 2025 3:50 PM Svyatoslav Ryhel wrote:
> вт, 23 вер. 2025 р. о 09:11 Svyatoslav Ryhel <clamo...@gmail.com> пише:
> >
> > вт, 23 вер. 2025 р. о 09:04 Mikko Perttunen <mperttu...@nvidia.com> пише:
> > >
> > > On Monday, September 22, 2025 4:36 PM Svyatoslav Ryhel wrote:
> > > > пн, 22 вер. 2025 р. о 10:27 Mikko Perttunen <mperttu...@nvidia.com> 
> > > > пише:
> > > > >
> > > > > On Monday, September 22, 2025 3:30 PM Svyatoslav Ryhel wrote:
> > > > > > пн, 22 вер. 2025 р. о 09:23 Mikko Perttunen <mperttu...@nvidia.com> 
> > > > > > пише:
> > > > > > >
> > > > > > > On Monday, September 22, 2025 2:13 PM Svyatoslav Ryhel wrote:
> > > > > > > > пн, 22 вер. 2025 р. о 07:44 Mikko Perttunen 
> > > > > > > > <mperttu...@nvidia.com> пише:
> > > > > > > > >
> > > > > > > > > On Saturday, September 6, 2025 10:53 PM Svyatoslav Ryhel 
> > > > > > > > > wrote:
> > > > > > > > > > Simplify format align calculations by slightly modifying 
> > > > > > > > > > supported formats
> > > > > > > > > > structure.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Svyatoslav Ryhel <clamo...@gmail.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/staging/media/tegra-video/tegra20.c | 41 
> > > > > > > > > > ++++++++-------------
> > > > > > > > > >  1 file changed, 16 insertions(+), 25 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/staging/media/tegra-video/tegra20.c 
> > > > > > > > > > b/drivers/staging/media/tegra-video/tegra20.c
> > > > > > > > > > index 6e0b3b728623..781c4e8ec856 100644
> > > > > > > > > > --- a/drivers/staging/media/tegra-video/tegra20.c
> > > > > > > > > > +++ b/drivers/staging/media/tegra-video/tegra20.c
> > > > > > > > > > @@ -280,20 +280,8 @@ static void tegra20_fmt_align(struct 
> > > > > > > > > > v4l2_pix_format *pix, unsigned int bpp)
> > > > > > > > > >       pix->width  = clamp(pix->width,  TEGRA20_MIN_WIDTH,  
> > > > > > > > > > TEGRA20_MAX_WIDTH);
> > > > > > > > > >       pix->height = clamp(pix->height, TEGRA20_MIN_HEIGHT, 
> > > > > > > > > > TEGRA20_MAX_HEIGHT);
> > > > > > > > > >
> > > > > > > > > > -     switch (pix->pixelformat) {
> > > > > > > > > > -     case V4L2_PIX_FMT_UYVY:
> > > > > > > > > > -     case V4L2_PIX_FMT_VYUY:
> > > > > > > > > > -     case V4L2_PIX_FMT_YUYV:
> > > > > > > > > > -     case V4L2_PIX_FMT_YVYU:
> > > > > > > > > > -             pix->bytesperline = roundup(pix->width, 2) * 
> > > > > > > > > > 2;
> > > > > > > > > > -             pix->sizeimage = roundup(pix->width, 2) * 2 * 
> > > > > > > > > > pix->height;
> > > > > > > > > > -             break;
> > > > > > > > > > -     case V4L2_PIX_FMT_YUV420:
> > > > > > > > > > -     case V4L2_PIX_FMT_YVU420:
> > > > > > > > > > -             pix->bytesperline = roundup(pix->width, 8);
> > > > > > > > > > -             pix->sizeimage = roundup(pix->width, 8) * 
> > > > > > > > > > pix->height * 3 / 2;
> > > > > > > > > > -             break;
> > > > > > > > > > -     }
> > > > > > > > > > +     pix->bytesperline = DIV_ROUND_UP(pix->width * bpp, 8);
> > > > > > > > >
> > > > > > > > > Assuming the bpp is coming from the format table below, this 
> > > > > > > > > changes the value of bytesperline for planar formats. With 
> > > > > > > > > this it'll be (width * 12) / 8 i.e. width * 3/2, which 
> > > > > > > > > doesn't sound right.
> > > > > > > > >
> > > > > > > >
> > > > > > > > Downstream uses soc_mbus_bytes_per_line for this calculation 
> > > > > > > > which was
> > > > > > > > deprecated some time ago, here is a fragment
> > > > > > > >
> > > > > > > > s32 soc_mbus_bytes_per_line(u32 width, const struct 
> > > > > > > > soc_mbus_pixelfmt *mf)
> > > > > > > > {
> > > > > > > >  if (mf->fourcc == V4L2_PIX_FMT_JPEG)
> > > > > > > >  return 0;
> > > > > > > >
> > > > > > > >  if (mf->layout != SOC_MBUS_LAYOUT_PACKED)
> > > > > > > >  return width * mf->bits_per_sample / 8;
> > > > > > > >
> > > > > > > >  switch (mf->packing) {
> > > > > > > >  case SOC_MBUS_PACKING_NONE:
> > > > > > > >   return width * mf->bits_per_sample / 8;
> > > > > > > >  case SOC_MBUS_PACKING_2X8_PADHI:
> > > > > > > >  case SOC_MBUS_PACKING_2X8_PADLO:
> > > > > > > >  case SOC_MBUS_PACKING_EXTEND16:
> > > > > > > >   return width * 2;
> > > > > > > >  case SOC_MBUS_PACKING_1_5X8:
> > > > > > > >   return width * 3 / 2;
> > > > > > > >  case SOC_MBUS_PACKING_VARIABLE:
> > > > > > > >   return 0;
> > > > > > > >  }
> > > > > > > >    return -EINVAL;
> > > > > > > > }
> > > > > > > >
> > > > > > > > V4L2_PIX_FMT_YUV420 and V4L2_PIX_FMT_YVU420 are classified as
> > > > > > > > SOC_MBUS_PACKING_1_5X8 hence we get width * 3/2
> > > > > > >
> > > > > > > Googling this brings up the entry
> > > > > > >
> > > > > > > {
> > > > > > >         .code = V4L2_MBUS_FMT_YUYV8_1_5X8,
> > > > > > >         .fmt = {
> > > > > > >                 .fourcc                 = V4L2_PIX_FMT_YUV420,
> > > > > > >                 .name                   = "YUYV 4:2:0",
> > > > > > >                 .bits_per_sample                = 8,
> > > > > > >                 .packing                        = 
> > > > > > > SOC_MBUS_PACKING_1_5X8,
> > > > > > >                 .order                  = SOC_MBUS_ORDER_LE,
> > > > > > >                 .layout                 = SOC_MBUS_LAYOUT_PACKED,
> > > > > > >         },
> > > > > > > }
> > > > > > >
> > > > > > > which matches that you're describing. It doesn't make sense to 
> > > > > > > me, since it at the same time specifies PIX_FMT_YUV420 (which is 
> > > > > > > planar with 3 planes, as documented by 
> > > > > > > include/uapi/linux/videodev2.h), and LAYOUT_PACKED
> > > > > > >
> > > > > > > /**
> > > > > > >  * enum soc_mbus_layout - planes layout in memory
> > > > > > >  * @SOC_MBUS_LAYOUT_PACKED:             color components packed
> > > > > > >  * @SOC_MBUS_LAYOUT_PLANAR_2Y_U_V:      YUV components stored in 
> > > > > > > 3 planes (4:2:2)
> > > > > > >  * @SOC_MBUS_LAYOUT_PLANAR_2Y_C:        YUV components stored in 
> > > > > > > a luma and a
> > > > > > >  *                                      chroma plane (C plane is 
> > > > > > > half the size
> > > > > > >  *                                      of Y plane)
> > > > > > >  * @SOC_MBUS_LAYOUT_PLANAR_Y_C:         YUV components stored in 
> > > > > > > a luma and a
> > > > > > >  *                                      chroma plane (C plane is 
> > > > > > > the same size
> > > > > > >  *                                      as Y plane)
> > > > > > >  */
> > > > > > > enum soc_mbus_layout {
> > > > > > >         SOC_MBUS_LAYOUT_PACKED = 0,
> > > > > > >         SOC_MBUS_LAYOUT_PLANAR_2Y_U_V,
> > > > > > >         SOC_MBUS_LAYOUT_PLANAR_2Y_C,
> > > > > > >         SOC_MBUS_LAYOUT_PLANAR_Y_C,
> > > > > > > };
> > > > > > >
> > > > > > > i.e. non-planar. The code in the driver is handling it as three 
> > > > > > > planes as well, with addresses 
> > > > > > > VB0_BASE_ADDRESS/VB0_BASE_ADDRESS_U/VB0_BASE_ADDRESS_V. Since the 
> > > > > > > planes are separate, there should be no need to have more than 
> > > > > > > 'width' samples per line.
> > > > > > >
> > > > > >
> > > > > > I did not invent this, I have just simplified this calculation from
> > > > > > downstream, output values remain same. I have no cameras which can
> > > > > > output V4L2_PIX_FMT_YUV420 or V4L2_PIX_FMT_YVU420 so I cannot test 
> > > > > > if
> > > > > > this works either. Other YUV and RAW formats were tested on real HW
> > > > > > and work perfectly fine.
> > > > >
> > > > > My understanding from the code was, that the MEDIA_BUS_FMT_ formats 
> > > > > listed in the video format table refer to the input formats from the 
> > > > > camera, and the V4L2_PIX_FMT_ formats to output formats from VI. 
> > > > > Hence VI could input UYVY8_2X8 and write to memory in YUV420. The 
> > > > > code dealing with V4L2_PIX_FMT_ values seems to be related to the 
> > > > > output to memory. Is it possible to test this (your camera -> VI 
> > > > > converts to YUV420) or am I mistaken?
> > > > >
> > > >
> > > > Camera I am testing with has no YUV420 options available and from what
> > > > I can tell there is no way to force VI to output in YUV420 unless
> > > > camera supports it. Any format manipulations should requite hooking up
> > > > ISP, or am I missing smth?
> > >
> > > From a quick look at the spec it looks to me like for YUV422 packed input 
> > > formats specifically, VI should be able to convert to YUV420. If that 
> > > were not the case, e.g. 'TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, 
> > > YUV420),' would not make sense anyway as it's talking about both YUV422 
> > > packed input data and then also YUV420.
> > >
> >
> > After additional checking you are correct, VI should be able to
> > perform YUV442 to YUV440. One of the reasons why VI is not exposing
> > YUV440 may be video-centric nature of the driver, so that it exposes
> > only formats supported by camera and VI. I will double check which
> > formats video device exposes. What should I test exactly?
> >


If you are able to test, I would like to see the following (with YUV422 input 
camera, VI set to output YUV420)
(1) Output image is correct
(2) Check output image bytes per line (e.g. with a hex editor)
(3) If output image bytes per line is 3/2 * width, try changing it to 1 * width 
and repeating test

> 
> Alternatively, since code that I propose matches in output with one
> that was before, changes can be applied and revised once there will be
> such need. Especially, since YUV422 and RAW8/10 work fine and were
> tested. I am not sure there will be many use cases which deliberately
> target YUV420.
> 

Yeah, since it's a pre-existing issue, that makes sense. However, I'd still add 
a comment to the bytes per line calculation with a reference to the downstream 
code it's based on, and that it produces an unexpected 3/2 * width for YUV420.

Mikko

> > > >
> > > > > It's certainly possible that the current code is functional -- if 
> > > > > bytesperline is set to a too large value and that information flows 
> > > > > to userspace, it could still read the buffer. It would just waste 
> > > > > memory.
> > > > >
> > > > > >
> > > > > > > >
> > > > > > > > > > +     pix->sizeimage = pix->bytesperline * pix->height;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  /*
> > > > > > > > > > @@ -576,20 +564,23 @@ static const struct tegra_vi_ops 
> > > > > > > > > > tegra20_vi_ops = {
> > > > > > > > > >       .vi_stop_streaming = tegra20_vi_stop_streaming,
> > > > > > > > > >  };
> > > > > > > > > >
> > > > > > > > > > -#define TEGRA20_VIDEO_FMT(MBUS_CODE, BPP, FOURCC)    \
> > > > > > > > > > -{                                                    \
> > > > > > > > > > -     .code    = MEDIA_BUS_FMT_##MBUS_CODE,           \
> > > > > > > > > > -     .bpp     = BPP,                                 \
> > > > > > > > > > -     .fourcc  = V4L2_PIX_FMT_##FOURCC,               \
> > > > > > > > > > +#define TEGRA20_VIDEO_FMT(DATA_TYPE, BIT_WIDTH, MBUS_CODE, 
> > > > > > > > > > BPP, FOURCC)      \
> > > > > > > > > > +{                                                          
> > > > > > > > > >           \
> > > > > > > > > > +     .img_dt         = TEGRA_IMAGE_DT_##DATA_TYPE,         
> > > > > > > > > >           \
> > > > > > > > > > +     .bit_width      = BIT_WIDTH,                          
> > > > > > > > > >           \
> > > > > > > > > > +     .code           = MEDIA_BUS_FMT_##MBUS_CODE,          
> > > > > > > > > >           \
> > > > > > > > > > +     .bpp            = BPP,                                
> > > > > > > > > >           \
> > > > > > > > > > +     .fourcc         = V4L2_PIX_FMT_##FOURCC,              
> > > > > > > > > >           \
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  static const struct tegra_video_format 
> > > > > > > > > > tegra20_video_formats[] = {
> > > > > > > > > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 2, UYVY),
> > > > > > > > > > -     TEGRA20_VIDEO_FMT(VYUY8_2X8, 2, VYUY),
> > > > > > > > > > -     TEGRA20_VIDEO_FMT(YUYV8_2X8, 2, YUYV),
> > > > > > > > > > -     TEGRA20_VIDEO_FMT(YVYU8_2X8, 2, YVYU),
> > > > > > > > > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YUV420),
> > > > > > > > > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YVU420),
> > > > > > > > > > +     /* YUV422 */
> > > > > > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 16, UYVY),
> > > > > > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, VYUY8_2X8, 16, VYUY),
> > > > > > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YUYV8_2X8, 16, YUYV),
> > > > > > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YVYU8_2X8, 16, YVYU),
> > > > > > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, 
> > > > > > > > > > YUV420),
> > > > > > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, 
> > > > > > > > > > YVU420),
> > > > > > > > > >  };
> > > > > > > > > >
> > > > > > > > > >  const struct tegra_vi_soc tegra20_vi_soc = {
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > >
> > > > >
> > > > >
> > > > >
> > >
> > >
> > >
> > >

Re: [PATCH v2 16/23] staging: media: tegra-video: tegra20: simplify format align calculations

Reply via email to