Salut

> Btw. this is not everything, the far more biggest issue, is that we cannot
> scan the resource files attached in libs and executables, where those qml
> files are shipped within the binary package. Patches to solve this are very
> welcome, as I don't know how to extract the resources from a binary.


The attached script should decode enough information from embedded compiled QML 
to 
extract imports from executable and library


If you don't want to depends on python3-elftools, you can replace the usage of 
find_in_elf() with find_in_file() in method scan_binary_for_imports().


$ ./qmlbytecode.py /usr/bin/kdeconnect-sms
('org.kde.people', None)
('QtQuick', None)
('QtCore', None)
('QtQuick.Dialogs', None)
('org.kde.config', None)
('org.kde.kirigami.delegates', None)
('org.kde.kirigamiaddons.components', None)
('QtMultimedia', None)
('org.kde.kdeconnect.sms', None)
('org.kde.kirigamiaddons.formcard', None)
('QtQuick.Layouts', None)
('org.kde.kdeconnect', None)
('QtQuick.Controls', None)
('org.kde.kirigami', None)

$ ./qmlbytecode.py /usr/lib/x86_64-linux-gnu/qt6/plugins/plasma/applets/
org.kde.panel.so
('org.kde.draganddrop', '2.0')
('org.kde.ksvg', '1.0')
('org.kde.plasma.plasmoid', '2.0')
('org.kde.kquickcontrolsaddons', '2.0')
('QtQuick.Window', None)
('org.kde.plasma.core', None)
('QtQuick.Layouts', '1.1')
('QtQuick.Layouts', '1.0')
('org.kde.plasma.components', '3.0')
('org.kde.plasma.extras', '2.0')
('org.kde.kirigami', None)
('QtQuick', '2.15')
('QtQuick', '2.5')
('org.kde.kirigami', '2.20')



Regards,
Olivier


#!/usr/bin/python3
# SPDX-FileCopyrightText: 2026 Olivier Trichet <[email protected]>
# SPDX-License-Identifier: LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only

# Note: this script was written unit the code of the class mapping the bytecode of compiled QML unit.
#       As such, it uses the same licenses
#  https://github.com/qt/qtdeclarative/blob/0a102bea9f45058d10e3799cea7eb4a0da41ad9d/src/qml/common/qv4compileddata_p.h


'''
Tools to decode the bytecode of a QML embeded as a compiled unit in a binary.
'''

import argparse
from elftools.elf.elffile import ELFFile
import pathlib
from typing import Any, Optional
import sys


MAGIC_QML = b"qv4cdata"
MAGIC_ELF = b"\x7fELF"

SIZEOF_QUINT8 = 1
SIZEOF_QUINT16_LE = 2
SIZEOF_QUINT32_LE = 4
SIZEOF_QINT32_LE = 4

LITTLE_ENDIAN = 'little'


def _parse_raw(data: bytes) -> bytes:
    return data


def _parse_unsigned_int(data: bytes) -> int:
    return int.from_bytes(data, byteorder=LITTLE_ENDIAN, signed=False)


def _parse_signed_int(data: bytes) -> int:
    return int.from_bytes(data, byteorder=LITTLE_ENDIAN, signed=True)


def _sizeof(clazz) -> int:
    class_size = 0
    for name, size, parser in clazz._attributes:
        class_size += size
    return class_size


class ByteCodeUnit:
    ...


class _QmlByteCodeElement:
    def __init__(self, bytecode_unit: ByteCodeUnit, elt_offset: int):
        self._unit = bytecode_unit
        self._elt_offset = elt_offset

    def _get(self, attr_name: str) -> Any:
        bytecode = self._unit._bytecode
        offset = self._elt_offset
        for name, size, parser in self.__class__._attributes:
            if attr_name == name:
                return parser(bytecode[offset:offset+size])
            offset += size

        raise ValueError(attr_name)


class String(_QmlByteCodeElement):
    _attributes = [
        ('size', SIZEOF_QINT32_LE, _parse_signed_int),
    ]

    def __init__(self, bytecode_unit: ByteCodeUnit, offset: int):
        super().__init__(bytecode_unit, offset)

    def __str__(self):
        offset = self._elt_offset + _sizeof(String)
        # the stored 'size' is the number of utf-16 characters which are 2 bytes long
        str_size = 2 * self._get('size')
        str_bytes = self._unit._bytecode[offset:offset + str_size]
        return str_bytes.decode('utf-16-le')


class Import(_QmlByteCodeElement):
    _attributes = [
        ('type', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('uriIndex', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('qualifierIndex', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('location', 4, _parse_raw),  # Location
        ('version_A', SIZEOF_QUINT8, _parse_unsigned_int),
        ('version_B', SIZEOF_QUINT8, _parse_unsigned_int),
        ('reserved', 2, _parse_raw)
    ]

    def __init__(self, bytecode_unit: ByteCodeUnit, offset: int):
        super().__init__(bytecode_unit, offset)

    def import_type(self) -> str:
        importType = self._get('type')
        if importType == 0x1:
            return 'Library'
        if importType == 0x2:
            return 'File'
        if importType == 0x3:
            return 'Script'
        if importType == 0x4:
            return 'InlineComponent'
        return None

    def uri(self) -> Optional[String]:
        return self._unit._string_at(self._get('uriIndex'))

    def version(self) -> Optional[str]:
        version_a = self._get('version_A')
        version_b = self._get('version_B')
        if version_a == 0xff or version_b == 0xff:
            return None
        if sys.byteorder == LITTLE_ENDIAN:
            return f"{version_b}.{version_a}"
        else:
            return f"{version_a}.{version_b}"


class QmlUnit(_QmlByteCodeElement):
    _attributes = [
        ('nImports', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToImports', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('nObjects', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToObjects', SIZEOF_QUINT32_LE, _parse_unsigned_int)
    ]

    def __init__(self, bytecode_unit: ByteCodeUnit, offset: int):
        super().__init__(bytecode_unit, offset)

    def imports(self) -> list[Import]:
        imports = []
        offset = self._elt_offset + self._get('offsetToImports')
        for i in range(0, self._get('nImports')):
            imports.append(Import(self._unit, offset + i * _sizeof(Import)))
        return imports


class ByteCodeUnit(_QmlByteCodeElement):
    _attributes = [
        ('magic', 8, _parse_raw),  # char[8]
        ('version', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('reserved', 4, _parse_raw),
        ('sourceTimeStamp', 8, _parse_raw),  # qint64_le
        ('unitSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('md5Checksum', 16, _parse_raw),  # char[16]
        ('dependencyMD5Checksum', 16, _parse_raw),  # char[16]
        ('flags', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('stringTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToStringTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('functionTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToFunctionTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('classTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToClassTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('templateObjectTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToTemplateObjectTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('blockTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToBlockTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('lookupTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToLookupTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('regexpTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToRegexpTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('constantTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToConstantTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('jsClassTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToJSClassTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('translationTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToTranslationTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('localExportEntryTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToLocalExportEntryTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('indirectExportEntryTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToIndirectExportEntryTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('starExportEntryTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToStarExportEntryTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('importEntryTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToImportEntryTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('moduleRequestTableSize', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToModuleRequestTable', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('indexOfRootFunction', 4, _parse_signed_int),
        ('sourceFileIndex', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('finalUrlIndex', SIZEOF_QUINT32_LE, _parse_unsigned_int),
        ('offsetToQmlUnit', SIZEOF_QUINT32_LE, _parse_unsigned_int)
    ]

    def __init__(self, qml_bytecode: bytes):
        self._bytecode = qml_bytecode
        super().__init__(self, 0)

        if len(self._bytecode) < 28:
            raise "Invalid bytecode (bytearray length less that minimal size to read unit size)"
        if self.unit_size() > len(self._bytecode):
            raise "Invalid bytecode (declared size > bytearray length)"
        self._bytecode = self._bytecode[0:self.unit_size()]

    def unit_size(self) -> int:
        return self._get('unitSize')

    def qml_unit(self) -> QmlUnit:
        return QmlUnit(self, self._elt_offset + self._get('offsetToQmlUnit'))

    def string_table_size(self):
        return self._get('stringTableSize')

    def _string_at(self, idx: int) -> Optional[String]:
        string_table_offset = self._get('offsetToStringTable')
        string_table_size = self.string_table_size()

        if string_table_offset == 0 or string_table_size == 0:
            return None

        if idx < string_table_size:
            # string table is an array of offset to where string are stored
            current_string_info_offset = string_table_offset + idx * SIZEOF_QUINT32_LE
            string_offset = _parse_unsigned_int(self._bytecode[current_string_info_offset:current_string_info_offset+SIZEOF_QUINT32_LE])

            return String(self, string_offset)
        else:
            # Dynamic string: not stored in the bytecode
            return None


def decode_qml(file_bytes: bytes, offset: int) -> ByteCodeUnit:
    return ByteCodeUnit(file_bytes[offset:])


def find_in_file(filepath: pathlib.Path) -> list[ByteCodeUnit]:
    '''
    Scan for compile QML information in file.

    'qv4cdata' magic string is search in raw bytes.
    '''
    file_bytes = filepath.read_bytes()

    results = []
    offset = 0
    while (offset := file_bytes.find(MAGIC_QML, offset)) != -1:
        unit = ByteCodeUnit(file_bytes[offset:])
        results.append(unit)
        offset += unit.unit_size()

    return results


def find_in_elf(filepath: pathlib.Path) -> list[ByteCodeUnit]:
    '''
    Scan for compile QML information in ELF file.

    'qv4cdata' magic string is search in decoded ELF section.
    Will works exactly as find_in_file() as long as ELF section are not compressed.
    '''
    with filepath.open('rb') as f:
        if f.read(len(MAGIC_ELF)) != MAGIC_ELF:
            print("Not an ELF file")
            return []

        elffile = ELFFile(f)

        results = []
        for section in elffile.iter_sections():
            # Decompress section if necessary (no bin with compression section in Debian?)
            section_data = section.data()

            offset = 0
            while (offset := section_data.find(MAGIC_QML, offset)) != -1:
                # print(f"Found {MAGIC_QML} at offset {offset} in section {section.name} (compressed={bool(section.compressed)})")
                unit = decode_qml(section_data, offset)
                offset += unit.unit_size()
                results.append(unit)
        return results


def scan_binary_for_imports(filepath: pathlib.Path) -> set[tuple[str, str]]:
    imports = set()
    for unit in find_in_elf(filepath):
        qu = unit.qml_unit()
        for qi in qu.imports():
            if qi.import_type() == 'Library' and qi.uri() is not None:
                imports.add((str(qi.uri()), qi.version()))
    return imports


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("filepath", type=pathlib.Path)
    args = parser.parse_args()

    for imp in scan_binary_for_imports(args.filepath):
        print(imp)


if __name__ == "__main__":
    main()
-- 
https://alioth-lists.debian.net/cgi-bin/mailman/listinfo/pkg-kde-talk

Reply via email to