Hi DataFlow team,

We are trying to use ffmpeg to process some video data using dataflow.
In order to do this we need the worker nodes to have ffmpeg installed.

After reading Beam docs I created a setup.py file for my job like this:

#!/usr/bin/python
import subprocess
from distutils.command.build import build as _build
import setuptools

class build(_build):
sub_commands = _build.sub_commands + [('CustomCommands', None)]

class CustomCommands(setuptools.Command):
def initialize_options(self):
pass

def finalize_options(self):
pass

def RunCustomCommand(self, command_list):
p = subprocess.Popen(
command_list,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
stdout_data, _ = p.communicate()
if p.returncode != 0:
raise RuntimeError(
'Command %s failed: exit code: %s' % (
command_list, p.returncode))

def run(self):
for command in CUSTOM_COMMANDS:
self.RunCustomCommand(command)

CUSTOM_COMMANDS = [
['apt-get', 'update'],
['apt-get', 'install', '-y', 'ffmpeg']]
REQUIRED_PACKAGES = [
'boto3==1.11.17',
'ffmpeg-python==0.2.0',
'google-cloud-storage==1.31.0']
setuptools.setup(
name='DataflowJob',
version='0.1',
install_requires=REQUIRED_PACKAGES,
packages=setuptools.find_packages(),
mdclass={
'build': build,
'CustomCommands': CustomCommands})

However, when I run the job I still get an error saying that ffmpeg is not
installed: "No such file or directory: 'ffmpeg'"

Any clue what am I doing wrong?

Thanks so much!

Reply via email to