Sure. Here is our lua script.

-Paul Edmon-


On 09/13/2018 07:28 AM, Andre Torres wrote:
That's interesting using AD to maintain uid consistency across all the nodes. 
Like Loris, I'm also interested in your Lua script.

-
André

On 13/09/2018, 11:42, "slurm-users on behalf of Loris Bennett" 
<slurm-users-boun...@lists.schedmd.com on behalf of loris.benn...@fu-berlin.de> wrote:

     Hi Paul,
I'd be interested in seeing your Lua submit script, if you're willing to
     share.
Until now I had thought that the most elegant way of setting up Slurm
     users would be via a PAM module analogous to pam_mkhomedir, the simplest
     option being to use pam_script.
However, given that we do have users who somehow never get round to
     submitting a job before their HPC access expires, setting up the Slurm
     account when the first job is submitted seems quite appealing.
Cheers, Loris Paul Edmon <ped...@cfa.harvard.edu> writes: > So useradd is adding a Linux user, which sacctmgr creates a Slurm user.
     >
     > What we do is that we run AD for our Linux user managment. We then in 
our job submit lua script look to see if the user has an account in slurm and if 
they don't we create it.
     >
     > Another way would be to make all your Linux users and then map that in 
to Slurm using sacctmgr.
     >
     > It really depends on if your Slurm users are a subset of your regular 
users or not.
     >
     > -Paul Edmon-
     >
     > On 9/12/2018 12:21 PM, Andre Torres wrote:
     >
     >  Hi all,
     >
     >  I’m new to slurm and I’m confused regarding user creation. I have an 
installation with 1 login node and 5 compute nodes. If I create a user across all 
the nodes with the same uid and gid I can execute jobs but
     >  I can’t understand the difference between user creation with “useradd” 
command and the “sacctmgr” command
     >
     >  sacctmgr create account name=test
     >
     >  sacctmgr create user jdoe account=test
     >
     >  Also, is there anyway of creating a user at login node and replicate to 
the compute nodes ? What is the best practice for user creation ?
     >
     >  Thanks in advance
     >
     >
     --
     Dr. Loris Bennett (Mr.)
     ZEDAT, Freie Universität Berlin         Email loris.benn...@fu-berlin.de



-- Cache association existance for this many seconds, with the caveat
-- that if you manually remove a user's association, it will take SLURM
-- up to this long to recognize that it's gone.
local ASSOC_CACHE_LIFETIME = 24 * 60 * 60

-- Cache the start time of a downtime (if any) for this many minutes, so we
-- don't have to call 'scontrol show reservations' for every job submission.
local DOWNTIME_RESERVATION_CACHE = 30
-- Demand that jobs end this many minutes before a scheduled downtime.
local SUGGESTED_JOB_END_BEFORE_DOWNTIME = 60

local posix = require "posix"

function in_group(user, group)
	local group = posix.getgroup(group)

	local i = 0
	while group[i] do
		if group[i] == user then
			return true
		end
		i = i + 1
	end

	return false
end

-- Compatibility: Lua-5.1
-- Got this from: http://lua-users.org/wiki/SplitJoin
function split(str, pat)
   local t = {}  -- NOTE: use {n = 0} in Lua-5.0
   local fpat = "(.-)" .. pat
   local last_end = 1
   local s, e, cap = str:find(fpat, 1)
   while s do
      if s ~= 1 or cap ~= "" then
	 table.insert(t,cap)
      end
      last_end = e+1
      s, e, cap = str:find(fpat, last_end)
   end
   if last_end <= #str then
      cap = str:sub(last_end)
      table.insert(t, cap)
   end
   return t
end

local existing_assocs = {}
local existing_assocs_last_expired = os.time()
function assoc_exists(user, account)
	-- Expire the cache if it's old.
	if existing_assocs_last_expired + ASSOC_CACHE_LIFETIME < os.time() then
		existing_assocs = {}
		existing_assocs_last_expired = os.time()
	end

	if existing_assocs[user .. "\n" .. account] then
		return true
	end

	-- Unfortunately, filehandles returned by io.popen() don't have a
	-- way to return their exitstatuses until lua 5.2.  We should be
	-- reasonably safe here, since if we erroneously conclude the
	-- association doesn't exist, we'll just try to add it.
	--
	-- http://lua-users.org/lists/lua-l/2012-01/msg00364.html
	local fp = io.popen(string.format(
		"sacctmgr --parsable2 --noheader list associations " ..
			"format=account users='%s'",
		user
	))
	for line in fp:lines() do
		if line == account then
			existing_assocs[user .. "\n" .. account] = true
			return true
		end
	end

	return false
end

function ensure_assoc_exists(user, account)
	--See if user exists and is associated

	oldaccount = assoc_exists(user,account)

	if oldaccount then
		return true
	end

	local fp = io.popen(string.format(
		"sacctmgr --parsable2 --noheader list associations " ..
			"format=account users='%s'",
		user
	))

	oldaccount=0

	for line in fp:lines() do
		oldaccount=line
	end

	if oldaccount == 0 then
		--Create new group if needed
		ret = os.execute(string.format(
			"sacctmgr -i create account name='%s' " ..
				"fairshare=45",
			account
		))

		if ret ~= 0 then
			slurm.log_info("sacctmgr failed to add account %s " ..
				"with exit status %d",
				account, ret)
		end

		--Create new users if needed
		ret = os.execute(string.format(
			"sacctmgr -i create user name='%s' defaultaccount='%s' " ..
				"fairshare=parent maxsubmitjobs=10100",
			user, account
		))
		if ret == 0 then
			existing_assocs[user .. "\n" .. account] = true
			slurm.log_info("added association to %s for %s",
				account, user)
			return true
		end

		slurm.log_info("sacctmgr failed to add association to " ..
			"%s for %s with exit status %d",
			account, user, ret)
		return false
	end

	--If we get here we need to reassociate the user because
	--The user does exist but is in the wrong group

	--Create new group if needed
	ret = os.execute(string.format(
		"sacctmgr -i create account name='%s' " ..
			"fairshare=45",
		account
	))

	if ret ~= 0 then
		slurm.log_info("sacctmgr failed to add account %s " ..
			"with exit status %d",
			account, ret)
	end	

  --Now to move the user to the their new group
	ret = os.execute(string.format(
		"sacctmgr -i add user name='%s' defaultaccount='%s'",
		user, account
	))

	if ret == 0 then
		slurm.log_info("added association to %s for %s",
			account, user)
	end

	if ret ~= 0 then
		slurm.log_info("sacctmgr failed to add account %s " ..
		"with exit status %d",
		account, ret)
	end

	--Removing the old group
	ret = os.execute(string.format(
		"sacctmgr -i delete user name='%s' account='%s'",
		user, oldaccount
	))

	if ret == 0 then
		slurm.log_info("removed association of %s for %s",
			oldaccount, user)
	end

	if ret ~= 0 then
		slurm.log_info("sacctmgr failed to remove account %s " ..
		"with exit status %d",
		oldaccount, ret)
	end

	--Setting other parameters for the user
	ret = os.execute(string.format(
		"sacctmgr -i modify user where name='%s' " ..
		"cluster=odyssey defaultaccount='%s' set " ..
		"fairshare=parent maxsubmitjobs=10100",
		user, account
	))

	if ret == 0 then
		existing_assocs[user .. "\n" .. account] = true
		slurm.log_info("added association to %s for %s",
			account, user)
		return true
	end

	if ret ~= 0 then
		slurm.log_info("sacctmgr failed to modify user %s " ..
		"with exit status %d",
		user, ret)
		return false
	end
end

_downtime_start = -1
_downtime_start_last_fetched = -1
function downtime_reservation_start()
	if _downtime_start_last_fetched + DOWNTIME_RESERVATION_CACHE * 60 >= os.time() then
		-- Return the unexpired cached value.
		return _downtime_start
	end

	local fp = io.popen("scontrol -o show reservations downtime")
	for line in fp:lines() do
		if line:sub(1, string.len("ReservationName=downtime ")) == "ReservationName=downtime " then
			result, count = line:gsub(".* StartTime=(%d+-%d+-%d+)T(%d+:%d+:%d+) .*", "%1 %2")
			if count > 0 then
				_downtime_start_last_fetched = os.time()
				-- el6's lua-posix is too old to have strptime(),
				-- so we'll use date(1) to parse times instead.
				_downtime_start = tonumber(io.popen(
					string.format("date -d '%s' +%%s", result)):read())
				return _downtime_start

				-- parsed, _ = posix.strptime(result, "%Y-%m-%d %H:%M:%S")
				-- -- luaposix puts the day of the month in 'monthday', but
				-- -- os.time() wants it in 'day'.
				-- parsed['day'] = parsed['monthday']
				-- return os.time(parsed)
			end
		end
	end

	-- No scheduled downtime.
	_downtime_start_last_fetched = os.time()
	_downtime_start = -1
	return _downtime_start
end

function seconds_to_slurm_timespec(seconds)
	local days = math.floor(seconds / 60 / 60 / 24)
	local leftover = seconds - (days * 60 * 60 * 24)

	local hours = math.floor(leftover / 60 / 60)
	leftover = (leftover - (hours * 60 * 60))

	local minutes = math.floor(leftover / 60)

	return string.format("%02i-%02i:%02i", days, hours, minutes)
end

function slurm_timespec_to_seconds(timespec)
	local days, hours, minutes, seconds

	minutes = timespec:match("^(%d+)$")
	if minutes then
		return tonumber(timespec) * 60
	end

	minutes, seconds = timespec:match("^(%d+):(%d+)$")
	if minutes and seconds then
		return minutes * 60 + seconds
	end

	hours, minutes, seconds = timespec:match("^(%d+):(%d+):(%d+)$")
	if hours and minutes and seconds then
		return hours * 60 * 60 + minutes * 60 + seconds
	end

	days, hours = timespec:match("^(%d+)-(%d+)$")
	if days and hours then
		return days * 24 * 60 * 60 + hours * 60 * 60
	end

	days, hours, minutes = timespec:match("^(%d+)-(%d+):(%d+)$")
	if days and hours and minutes then
		return days * 24 * 60 * 60 + hours * 60 * 60 + minutes * 60
	end

	days, hours, minutes, seconds = timespec:match("^(%d+)-(%d+):(%d+):(%d+)$")
	if days and hours and minutes and seconds then
		return days * 24 * 60 * 60 + hours * 60 * 60 + minutes * 60 + seconds
	end

	return -1
end

function friendly_error_if_time_limit_intersects_downtime(time_limit)
	local reservation_start = get_downtime_start()
	if not reservation_start then
		return 0
	end

	local now = os.time(os.date("*t"))
	-- time_limit is in minutes.
	local job_end_time = now + time_limit * 60

	if job_end_time <= reservation_start then
		-- The job is scheduled to end before the downtime.
		return 0
	end

	-- Suggest a runtime that ends SUGGESTED_JOB_END_BEFORE_DOWNTIME
	-- minutes before the downtime, so slurmctld has time to process
	-- all the RPCs before the downtime actually starts.
	--
	-- This also gives the user a little leeway to update their job
	-- with the suggested downtime, so the suggested time isn't
	-- rejected if they take a few minutes to update their submission.
	local secs_until_downtime = reservation_start -
		now - SUGGESTED_JOB_END_BEFORE_DOWNTIME * 60
	local suggested_timespec = seconds_to_slurm_timespec(
		secs_until_downtime)

	local downtime_start = posix.localtime(reservation_start)
	log_user(string.format([[

==============================================================

Your job has not been submitted.


The Odyssey cluster has a scheduled maintenance downtime
starting at %04d-%02d-%02d %02d:%02d:%02d %s.

Your job will not end before the downtime. Please specify
a shorter time limit for your job, such as:

   -t %s

This will give your job the most possible time to run before
the downtime. If your job does not finish before the downtime
starts, it will be terminated then.
==============================================================
]],
		downtime_start.year, downtime_start.month,
		downtime_start.monthday, downtime_start.hour,
		downtime_start.min, downtime_start.sec,
		os.date("%Z"), suggested_timespec))
	return 2051
--	return slurm.ESLURM_INVALID_TIME_LIMIT
end

-- This function checks to makes sure that the gres field is filled to ensure that there is an actual request for gres resources.
-- Otherwise it will reject the job and send a message to the user. gres is the job gres, type is the type of gres to check against,
-- part is the partition that we want to check, and newpart is the partition the user should use if they don't have that gres defined
function gres_check(gres,type,part,newpart)
	if (gres == nil) then
	    gres = "none"
	end
	if string.match(gres, type) then
	else
		slurm.log_user("You must request a %s using the --gres option to use %s partition, if you have CPU work for this hardware please use %s",type,part,newpart)
	    exit()
	end
end

function slurm_job_submit(job_desc, part_list, submit_uid)

	-- Need to exclude root as root is not subject to this.
	if job_desc.user_id == 0 then
		return 0
	end

	-- Silently reject --exclusive, forcing jobs to share
	-- nodes if other resources allow.
	if job_desc.shared == 0 then
		job_desc.shared = 1
	end

	submit_user = posix.getpasswd(submit_uid)
	submit_user_primary_group = posix.getgroup(submit_user.gid).name
	ensure_assoc_exists(submit_user.name, submit_user_primary_group)

    -- Per RT 1185187, we are going to put a gate on the gpgpu and aagk80 partitions to ensure only jobs that request GPU's get access
    -- This table handles the mapping of partition to partition the user should use if they don't request gpu's
    gres_part={["gpu_requeue"] = "serial_requeue",["gpgpu"] = "gpgpu_requeue",["aagk80"] = "aagk80_requeue"}
    gres_type={["gpu_requeue"] = "gpu",["gpgpu"] = "gpu",["aagk80"] = "gpu"}

    -- Need to test to see if there is actually anything set
	if (job_desc.partition ~= nil) then
    	-- Need to get partitions that the user is requesting.
	    local partition = split(job_desc.partition,",")
    
	    -- Now we need to check if we are submitting to any of the partitions that we have limits on.
	    for i,part in ipairs(partition)
	    do

            -- As it turns out when --mem-per-cpu is invoked it takes the value for mem-per-cpu and adds 2147483647 to it (half of 4294967294 which is the largest int)
            -- We don't want to permit --mem-per-cpu for these nodes as those can submit to normal hardware.  Also we can't apriori know right now if some one is
            -- using mem-per-cpu versus mem as they may just be asking for a ridiculous amount of memory.
            -- Suffice it to say this puts an upper limit on memory asks for bigmem at 2 PB.
		    if part == "bigmem" then
		    	if (job_desc.pn_min_memory < 250000 or job_desc.pn_min_memory > 2147483646) then
		    		slurm.log_user("You must request more than 250GB for jobs in bigmem partition")
		    		return 2052
		    	end
		    end

		    if part == "ncf_bigmem" then
		    	if (job_desc.pn_min_memory < 30000 or job_desc.pn_min_memory > 2147483646) then
		    		slurm.log_user("You must request more than 30GB for jobs in ncf_bigmem partition")
		    		return 2052
		    	end
		    end

		    if part == "interact" then
		        slurm.log_user("The interact partition has been replaced with the test partition.  Please use the test partition.")
                return 2052
		    end

            -- Checks GRES requests.
            if gres_part[part] then
                gres_check(job_desc.gres,gres_type[part],part,gres_part[part])
            end
        end
	end

	--Needs to be fixed for the new lua bindings that allow us to query reservations
	--return friendly_error_if_time_limit_intersects_downtime(job_desc.time_limit)
	return 0
end

function slurm_job_modify(job_desc, job_rec, part_list, modify_uid)

	return 0
end

slurm.log_info("initialized")

return slurm.SUCCESS

Reply via email to