Re: [PATCH v3 15/22] staging: media: tegra-video: tegra20: simplify format align calculations

Mikko Perttunen Wed, 01 Oct 2025 21:00:35 -0700

On Wednesday, October 1, 2025 4:59 PM Svyatoslav Ryhel wrote:
> ср, 1 жовт. 2025 р. о 10:51 Mikko Perttunen <[email protected]> пише:
> >
> > On Wednesday, October 1, 2025 2:35 PM Svyatoslav Ryhel wrote:
> > > ср, 1 жовт. 2025 р. о 08:07 Svyatoslav Ryhel <[email protected]> пише:
> > > >
> > > > ср, 1 жовт. 2025 р. о 07:38 Mikko Perttunen <[email protected]> 
> > > > пише:
> > > > >
> > > > > On Friday, September 26, 2025 12:16 AM Svyatoslav Ryhel wrote:
> > > > > > Simplify format align calculations by slightly modifying supported 
> > > > > > formats
> > > > > > structure. Adjusted U and V offset calculations for planar formats 
> > > > > > since
> > > > > > YUV420P bits per pixel is 12 (1 full plane for Y + 2 * 1/4 planes 
> > > > > > for U
> > > > > > and V) so stride is width * 3/2, but offset must be calculated with 
> > > > > > plain
> > > > > > width since each plain has stride width * 1. This aligns with 
> > > > > > downstream
> > > > >
> > > > > plane
> > > > >
> > > > > > behavior which uses same approach for offset calculations.
> > > > > >
> > > > > > Signed-off-by: Svyatoslav Ryhel <[email protected]>
> > > > > > ---
> > > > > >  drivers/staging/media/tegra-video/tegra20.c | 58 
> > > > > > +++++++++------------
> > > > > >  drivers/staging/media/tegra-video/vi.h      |  3 +-
> > > > > >  2 files changed, 27 insertions(+), 34 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/staging/media/tegra-video/tegra20.c 
> > > > > > b/drivers/staging/media/tegra-video/tegra20.c
> > > > > > index 7c3ff843235d..b7a39723dfc2 100644
> > > > > > --- a/drivers/staging/media/tegra-video/tegra20.c
> > > > > > +++ b/drivers/staging/media/tegra-video/tegra20.c
> > > > > > @@ -280,20 +280,8 @@ static void tegra20_fmt_align(struct 
> > > > > > v4l2_pix_format *pix, unsigned int bpp)
> > > > > >       pix->width  = clamp(pix->width,  TEGRA20_MIN_WIDTH,  
> > > > > > TEGRA20_MAX_WIDTH);
> > > > > >       pix->height = clamp(pix->height, TEGRA20_MIN_HEIGHT, 
> > > > > > TEGRA20_MAX_HEIGHT);
> > > > > >
> > > > > > -     switch (pix->pixelformat) {
> > > > > > -     case V4L2_PIX_FMT_UYVY:
> > > > > > -     case V4L2_PIX_FMT_VYUY:
> > > > > > -     case V4L2_PIX_FMT_YUYV:
> > > > > > -     case V4L2_PIX_FMT_YVYU:
> > > > > > -             pix->bytesperline = roundup(pix->width, 2) * 2;
> > > > > > -             pix->sizeimage = roundup(pix->width, 2) * 2 * 
> > > > > > pix->height;
> > > > > > -             break;
> > > > > > -     case V4L2_PIX_FMT_YUV420:
> > > > > > -     case V4L2_PIX_FMT_YVU420:
> > > > > > -             pix->bytesperline = roundup(pix->width, 8);
> > > > > > -             pix->sizeimage = roundup(pix->width, 8) * pix->height 
> > > > > > * 3 / 2;
> > > > > > -             break;
> > > > > > -     }
> > > > > > +     pix->bytesperline = DIV_ROUND_UP(pix->width * bpp, 8);
> > > > > > +     pix->sizeimage = pix->bytesperline * pix->height;
> > > > > >  }
> > > > > >
> > > > > >  /*
> > > > > > @@ -305,6 +293,7 @@ static void tegra20_channel_queue_setup(struct 
> > > > > > tegra_vi_channel *chan)
> > > > > >  {
> > > > > >       unsigned int stride = chan->format.bytesperline;
> > > > > >       unsigned int height = chan->format.height;
> > > > > > +     unsigned int width = chan->format.width;
> > > > > >
> > > > > >       chan->start_offset = 0;
> > > > > >
> > > > > > @@ -321,8 +310,8 @@ static void tegra20_channel_queue_setup(struct 
> > > > > > tegra_vi_channel *chan)
> > > > > >
> > > > > >       case V4L2_PIX_FMT_YUV420:
> > > > > >       case V4L2_PIX_FMT_YVU420:
> > > > > > -             chan->addr_offset_u = stride * height;
> > > > > > -             chan->addr_offset_v = chan->addr_offset_u + stride * 
> > > > > > height / 4;
> > > > > > +             chan->addr_offset_u = width * height;
> > > > > > +             chan->addr_offset_v = chan->addr_offset_u + width * 
> > > > > > height / 4;
> > > > > >
> > > > > >               /* For YVU420, we swap the locations of the U and V 
> > > > > > planes. */
> > > > > >               if (chan->format.pixelformat == V4L2_PIX_FMT_YVU420)
> > > > > > @@ -332,14 +321,14 @@ static void 
> > > > > > tegra20_channel_queue_setup(struct tegra_vi_channel *chan)
> > > > > >               chan->start_offset_v = chan->addr_offset_v;
> > > > > >
> > > > > >               if (chan->vflip) {
> > > > > > -                     chan->start_offset   += stride * (height - 1);
> > > > > > -                     chan->start_offset_u += (stride / 2) * 
> > > > > > ((height / 2) - 1);
> > > > > > -                     chan->start_offset_v += (stride / 2) * 
> > > > > > ((height / 2) - 1);
> > > > > > +                     chan->start_offset   += width * (height - 1);
> > > > > > +                     chan->start_offset_u += (width / 2) * 
> > > > > > ((height / 2) - 1);
> > > > > > +                     chan->start_offset_v += (width / 2) * 
> > > > > > ((height / 2) - 1);
> > > > > >               }
> > > > > >               if (chan->hflip) {
> > > > > > -                     chan->start_offset   += stride - 1;
> > > > > > -                     chan->start_offset_u += (stride / 2) - 1;
> > > > > > -                     chan->start_offset_v += (stride / 2) - 1;
> > > > > > +                     chan->start_offset   += width - 1;
> > > > > > +                     chan->start_offset_u += (width / 2) - 1;
> > > > > > +                     chan->start_offset_v += (width / 2) - 1;
> > > > > >               }
> > > > > >               break;
> > > > > >       }
> > > > > > @@ -576,20 +565,23 @@ static const struct tegra_vi_ops 
> > > > > > tegra20_vi_ops = {
> > > > > >       .vi_stop_streaming = tegra20_vi_stop_streaming,
> > > > > >  };
> > > > > >
> > > > > > -#define TEGRA20_VIDEO_FMT(MBUS_CODE, BPP, FOURCC)    \
> > > > > > -{                                                    \
> > > > > > -     .code    = MEDIA_BUS_FMT_##MBUS_CODE,           \
> > > > > > -     .bpp     = BPP,                                 \
> > > > > > -     .fourcc  = V4L2_PIX_FMT_##FOURCC,               \
> > > > > > +#define TEGRA20_VIDEO_FMT(DATA_TYPE, BIT_WIDTH, MBUS_CODE, BPP, 
> > > > > > FOURCC)      \
> > > > > > +{                                                                  
> > > > > >   \
> > > > > > +     .img_dt         = TEGRA_IMAGE_DT_##DATA_TYPE,                 
> > > > > >   \
> > > > > > +     .bit_width      = BIT_WIDTH,                                  
> > > > > >   \
> > > > > > +     .code           = MEDIA_BUS_FMT_##MBUS_CODE,                  
> > > > > >   \
> > > > > > +     .bpp            = BPP,                                        
> > > > > >   \
> > > > > > +     .fourcc         = V4L2_PIX_FMT_##FOURCC,                      
> > > > > >   \
> > > > > >  }
> > > > > >
> > > > > >  static const struct tegra_video_format tegra20_video_formats[] = {
> > > > > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 2, UYVY),
> > > > > > -     TEGRA20_VIDEO_FMT(VYUY8_2X8, 2, VYUY),
> > > > > > -     TEGRA20_VIDEO_FMT(YUYV8_2X8, 2, YUYV),
> > > > > > -     TEGRA20_VIDEO_FMT(YVYU8_2X8, 2, YVYU),
> > > > > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YUV420),
> > > > > > -     TEGRA20_VIDEO_FMT(UYVY8_2X8, 1, YVU420),
> > > > > > +     /* YUV422 */
> > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 16, UYVY),
> > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, VYUY8_2X8, 16, VYUY),
> > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YUYV8_2X8, 16, YUYV),
> > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, YVYU8_2X8, 16, YVYU),
> > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YUV420),
> > > > > > +     TEGRA20_VIDEO_FMT(YUV422_8, 16, UYVY8_2X8, 12, YVU420),
> > > > > >  };
> > > > >
> > > > > Looking at the code, BPP seems to only be used for the line stride 
> > > > > (i.e. bytes per line) calculation. I think we should just make it 8 
> > > > > for the planar formats (possibly with an explaining comment). With 
> > > > > the current code, we end up with 'bytesperline' variables in places 
> > > > > not being the actual bytes per line, which is confusing.
> > > > >
> > > > > Actually, we can then just make the 'bpp' field be bytes per pixel as 
> > > > > it was before to avoid the discrepancy with Tegra210.
> > > > >
> > > >
> > > > No, this code is actually cleaner and in sync with what downstream
> > > > does, Tegra210 bytes per pixel is confusing since it totally neglects
> > > > formats with fractional bytes per pixel, it is impossible to set there
> > > > 3/2 for example, which is used by YUV420.
> > > >
> > > > According to downstream code bytes_per_line =
> > > > soc_mbus_bytes_per_line..., downstream directly name is bytes_per_line
> > > > and soc_mbus_bytes_per_line returns width * 3 / 2 which is correct
> > > > calculation (12 bits). Meanwhile for planar formats Tegra has 3
> > > > different buffers so with offset calculation plain width must be used
> > > > (which matches downstream).
> > > >
> > >
> > > If you mean use of BPP by VI, I can propose removing bytesperline and
> > > sizeimage configuration from VI entirely and leave this to per-SoC
> > > fmt_align function which does this already anyway and guards every
> > > time those values are referred. This way there will be no instances
> > > where "places not being the actual bytes per line" comes true.
> >
> > Without trying myself, I'm not sure what approach is the cleanest. In any 
> > case, the downstream code is just wrong (or incorrectly named), so we 
> > shouldn't defer to it in this matter. I don't see a reason to keep the 
> > value '12' either if it doesn't serve any purpose (admittedly if we changed 
> > it to 8 or 1, 'bpp' would be a confusing name for it, but explainable with 
> > a comment and improve-able later) I don't mind having an if/switch 
> > statement for the planar formats to use a '8' as multiplier instead of '12' 
> > if we need to keep the '12'. But the main thing I want to avoid is a 
> > bytesperline/stride variable that isn't the line stride in bytes.
> >
> 
> I am proposing you a solution, handle bytesperline and sizeimage in
> per-SoC fmt_align function.


Ok, I think that sounds good. It's good to consolidate the calculation in one 
place.

> 
> 12 represents amount of bits used per pixel, 8 for Y plane, 2 for U
> plane and 2 for V plane, total is 12. "but explainable with a comment
> and improve-able later" why then we cannot use 12 with a comment? this
> is all arbitrary. Downstream is not wrong from this perspective, you
> don't take into account that YUV420 is planar and it uses 3 planes a
> whole Y plane and 1/4 of U and V which in total results in wigth + 2 *
> 1/4 width which is width * 3/2

Yes -- but AIUI, the only thing the bpp value is used for the bytesperline 
calculation. When we add the special case for planar formats, which doesn't use 
the bpp value, then the value 12 is never used anywhere. We should at least 
have a comment saying it is unused. (At that point, we could just hardcode the 
bpp values in the fmt_align function -- but I don't mind either way.)

> 
> > >
> > > > > >
> > > > > >  const struct tegra_vi_soc tegra20_vi_soc = {
> > > > > > diff --git a/drivers/staging/media/tegra-video/vi.h 
> > > > > > b/drivers/staging/media/tegra-video/vi.h
> > > > > > index bfadde8858d4..5cbc0606ed6c 100644
> > > > > > --- a/drivers/staging/media/tegra-video/vi.h
> > > > > > +++ b/drivers/staging/media/tegra-video/vi.h
> > > > > > @@ -281,7 +281,8 @@ enum tegra_image_dt {
> > > > > >   * @img_dt: MIPI CSI-2 data type (for CSI-2 only)
> > > > > >   * @bit_width: format width in bits per component (for 
> > > > > > CSI/Tegra210 only)
> > > > > >   * @code: media bus format code
> > > > > > - * @bpp: bytes per pixel (when stored in memory)
> > > > > > + * @bpp: bytes per pixel (when stored in memory) for Tegra210,
> > > > > > + *    bits per pixel for Tegra20/Tegra30
> > > > > >   * @img_fmt: image format (for CSI/Tegra210 only)
> > > > > >   * @fourcc: V4L2 pixel format FCC identifier
> > > > > >   */
> > > > > >
> >
> >
> >
> >

Re: [PATCH v3 15/22] staging: media: tegra-video: tegra20: simplify format align calculations

Reply via email to