On 03/18/2015 09:41 AM, Max Reitz wrote:
On 2015-03-17 at 19:27, John Snow wrote:On 03/17/2015 03:49 PM, Max Reitz wrote:On 2015-03-04 at 23:15, John Snow wrote:This patch actually implements the transactional callback system for the drive_backup transaction. (1) We manually pick up a reference to the bitmap if present to allow its cleanup to be delayed until after all drive_backup jobs launched by the transaction have fully completed. (2) We create a functional closure that envelops the original drive_backup callback, to be able to intercept the completion status and return code for the job. (3) We add the drive_backup_cb method for the drive_backup action, which unpacks the completion information and invokes the final cleanup. (4) backup_transaction_complete will perform the final cleanup on the backup job. (5) In the case of transaction cancellation, drive_backup_cb is still responsible for cleaning up the mess we may have already made. Signed-off-by: John Snow <js...@redhat.com> --- block/backup.c | 9 +++++++ blockdev.c | 64 ++++++++++++++++++++++++++++++++++++++--------- include/block/block_int.h | 8 ++++++ 3 files changed, 69 insertions(+), 12 deletions(-) diff --git a/block/backup.c b/block/backup.c index 4332df4..3673fb0 100644 --- a/block/backup.c +++ b/block/backup.c @@ -233,6 +233,15 @@ typedef struct { int ret; } BackupCompleteData; +void backup_transaction_complete(BlockJob *job, int ret) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + + if (s->sync_bitmap) { + bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL); + } +} + static void backup_complete(BlockJob *job, void *opaque) { BackupBlockJob *s = container_of(job, BackupBlockJob, common); diff --git a/blockdev.c b/blockdev.c index 9e3b9e9..3ff14a7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque, int ret) typedef void (CallbackFn)(void *opaque, int ret); -/* Temporary. Removed in the next patch. */ -CallbackFn *new_transaction_wrapper(BlkTransactionState *common, - void *opaque, - void (*callback)(void *, int), - void **new_opaque); - -void undo_transaction_wrapper(BlkTransactionState *common); - /** * Create a new transactional callback wrapper. * @@ -1389,7 +1381,7 @@ void undo_transaction_wrapper(BlkTransactionState *common); * * @return The callback to be used instead of @callback. */ -CallbackFn *new_transaction_wrapper(BlkTransactionState *common, +static CallbackFn *new_transaction_wrapper(BlkTransactionState *common, void *opaque, CallbackFn *callback, void **new_opaque) @@ -1416,7 +1408,7 @@ CallbackFn *new_transaction_wrapper(BlkTransactionState *common, /** * Undo any actions performed by the above call. */ -void undo_transaction_wrapper(BlkTransactionState *common) +static void undo_transaction_wrapper(BlkTransactionState *common) { BlkTransactionList *btl = common->list; BlkTransactionState *bts; @@ -1449,6 +1441,7 @@ void undo_transaction_wrapper(BlkTransactionState *common) blk_put_transaction_state(common); } +static void block_job_cb(void *opaque, int ret); static void _drive_backup(const char *device, const char *target, bool has_format, const char *format, enum MirrorSyncMode sync, @@ -1767,6 +1760,9 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) BlockDriverState *bs; DriveBackup *backup; Error *local_err = NULL; + CallbackFn *cb; + void *opaque; + BdrvDirtyBitmap *bmap = NULL; assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); backup = common->action->drive_backup; @@ -1777,6 +1773,19 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) return; } + /* BackupBlockJob is opaque to us, so look up the bitmap ourselves */ + if (backup->has_bitmap) { + bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); + if (!bmap) { + error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); + return; + } + } + + /* Create our transactional callback wrapper, + and register that we'd like to call .cb() later. */ + cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque); + /* AioContext is released in .clean() */ state->aio_context = bdrv_get_aio_context(bs); aio_context_acquire(state->aio_context); @@ -1789,7 +1798,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) backup->has_bitmap, backup->bitmap, backup->has_on_source_error, backup->on_source_error, backup->has_on_target_error, backup->on_target_error, - NULL, NULL, + cb, opaque, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -1798,6 +1807,11 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp) state->bs = bs; state->job = state->bs->job; + /* Keep the job alive until .cb(), too. */ + block_job_incref(state->job); + if (bmap) { + bdrv_dirty_bitmap_incref(bmap); + } } static void drive_backup_abort(BlkTransactionState *common) @@ -1809,6 +1823,10 @@ static void drive_backup_abort(BlkTransactionState *common) if (bs && bs->job && bs->job == state->job) { block_job_cancel_sync(bs->job); } + + /* Undo any callback actions we may have done. Putting down references + * obtained during prepare() is handled by cb(). */ + undo_transaction_wrapper(common); } static void drive_backup_clean(BlkTransactionState *common) @@ -1820,6 +1838,27 @@ static void drive_backup_clean(BlkTransactionState *common) } } +static void drive_backup_cb(BlkTransactionState *common) +{ + BlkTransactionData *btd = common->opaque; + BlockDriverState *bs = btd->opaque; + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + + assert(state->bs == bs); + if (bs->job) { + assert(state->job == bs->job); + } + + state->aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(state->aio_context); + + /* Note: We also have the individual job's return code in btd->ret */ + backup_transaction_complete(state->job, common->list->status); + block_job_decref(state->job); + + aio_context_release(state->aio_context); +} + typedef struct BlockdevBackupState { BlkTransactionState common; BlockDriverState *bs; @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = { .instance_size = sizeof(DriveBackupState), .prepare = drive_backup_prepare, .abort = drive_backup_abort, - .clean = drive_backup_clean + .clean = drive_backup_clean, + .cb = drive_backup_cb }, [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { .instance_size = sizeof(BlockdevBackupState), diff --git a/include/block/block_int.h b/include/block/block_int.h index e0d5561..731684d 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockCompletionFunc *cb, void *opaque, Error **errp); +/* + * backup_transaction_complete + * @job The BackupJob that was associated with a transactions/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no structure named "BackupJob", but this looks like there might be one)+ * @ret Amalgamated return code for the entire transactionHm. The call to this function you're introducing in this patch will probably stay the only one so there won't be anyone who'll have to worry about what this means, but if there was, they probably won't reach a conclusive result. I know what it means because I've seen patch 3 (right now it means "everything OR-ed together so it's 0 for success or some non-zero (maybe positive, maybe negative, depending on whether you have an even or an odd number of errors, and depending on whether the jobs return negative values for errors or not) for error"), but I wouldn't be able to infer it from this. At the least you should add that 0 means success and everything else means error (if you take my suggestion for patch 3, it would be 0 for success and -errno for error, where that error number is one of the errors encountered). Other than that, looks good (as far as I can tell with my still limited insights into patch 3). MaxIt's my opinion that this patch should give you insight into patch #3, instead of the other way around. This patch is useful for demonstrating the general flow, because I kept all drive_backup specific concerns strictly separated from patch #3.It only truly helps me understand patch 3 under the assumption that this patch is correct. For reviewing, I cannot really take that assumption. ;-) I mean, it does help me with the interfaces patch 3 offers, but it still doesn't help me with the objects introduced in patch 3, for instance, because those are all contained in patch 3 itself. And if I cannot verify patch 3 on its own, I cannot really verify that what this patch does is correct in regards to the interfaces offered there.I'll write a more comprehensive document for the docs/ folder soon, but the general shape of the idea is "The cleanup actions defined in the .cb() method will be executed after every job in the transactional group has completed."Right, again, assuming your implementation is correct, but questioning that is part of the natural reviewing process. :-)
Yes, that's fine! I am just trying to help you unravel this strange loop (ha ha) and/or try to understand what is not necessarily clear with the information as presented.
But there's some fine print: "'every job' refers only to jobs that have the capability to register a post-transaction callback, which currently means only drive_backup." The general approach to this is, mechanically: (1) Extend the life of objects of interest with reference counts, including Jobs, Bitmaps, and BlkTransactionStates. (2) "steal" the callback from a BlockJob and, when invoked, update our management information for the transaction group in BlkTransactionList. (3) Once all jobs we have sent out return, execute the .cb() methods as indicated in the BlkTransactionList. So, if you were adding a callback to a different QMP transaction: - Look at new_transaction_wrapper; you'll use this to bump up various references used internally for this whole system, and it'll keep qmp_transaction from being able to delete the list and state objects. This function will give to you (as a gimmick) a new callback and opaque data structure pointer for you to give to the job that you are starting. I obfuscate this just a bit to make it clear that you should be using this function every time to help manage the transactional state. - Now, the job will run on its own happy way, When it finishes, it will call transaction_callback, which is the function that "intercepts" the callbacks, and dispatches the original enveloped callbacks. It ditches the original data we used to know how to call the original callback, and begins storing completion information for jobs instead. - transaction_callback will call blk_transaction_complete to store completion info for one job. In turn, it calls put_blk_transaction_list to decrement the pending jobs count (the "jobs" refcount) and once that hits zero ... - All of the callback mechanisms associated with each BlockTransactionState are run via blk_run_transaction_callbacks. - This is where drive_backup_cb is going to get invoked again, and then we will splinter back out into backup-specific concerns, with Jobs and Bitmaps and what not.Yes, I can see the control flow; my main problem lies in the three different structs introduced in patch 3. I'll tell you why I'm confused by them:
Paydirt!
First, there is BlkTransactionList. Sounds to me like it should be a list of transactions. Judging from its members, it is rather a list of operations ("actions") for a single transaction, however. Apparently, there is a double-use for the word "transaction": I'd consider a transaction to be the thing started by the QMP command which is a group of atomic operations. However, that one structure is called BlkTransactionState, but it apparently this is only the state of a single operation in the transaction. Pre-existing, but doesn't make it better. You added comments for its list entry members, with the first one stating "All transactions in the current group" and the second being a subset of that. However, again, I consider these to be operations and not transactions. The group is the transaction.
Ah, yes. Good points. We need to decide on consistent terminology. Transaction: The entire group. Action: One particular command within a transaction group. Action Op: One particular stage of one command (e.g. prepare, commit)BlkTransactionState is an existing structure that does indeed refer instead to one particular action's state, not that of the entire transaction. This can be changed to be clearer. Better names might be BlkTransactionActionState or BlkActionState.
The BlkTransactionList here is indeed intended to be a list for the entire transaction, so it is actually a "list of actions" and not, perhaps as its name implies, a list of transactions. (We have no data structures with a scope this vast.)
The BlkTransactionList is an object that is created once per transaction group (once per qmp_transaction()) and keeps a count of how many jobs we've launched (and thus how many to expect to return to us.)
Once a job completes, it adds that *action's state* (BlkTransactionState) to its list. Once all jobs sent out have returned, it will iterate this list and execute their .cb() actions.
So perhaps an amendment would be: typedef struct BlkTransactionList { int pending_jobs; int transaction_rc; QLIST(...) completed_actions; } BlkTransactionList; to make this part clearer.
So we have that adding to my confusion. Furthermore, I think we don't need to list entry members (entry and list_entry) in BlkTransactionState. I think it should be fine to drop the snap_bdrv_states list in qmp_transaction() and just add all BlkTransactionStates to the BlkTransactionList (or however you name it if you rename it) in the preparation while () loop there. I don't think we need to add them only in blk_transaction_complete() (and we should drop that from there if it's been done in qmp_transaction() already, of course). Then we can really just use that list generally as *the* list of operations in a transaction.
Yes, there's not necessarily a strong reason to keep two lists.One of the existing reasons, though, is that not all transactions launch jobs or have been modified to work with this system yet, so we definitely don't want to consider all of these entries in THIS system just yet.
So one of the simplifications two lists makes is that we don't have to worry about "unsupported actions" making it into the list. Everything in *my* list can be processed without further thought.
We can also just add more state into the BlkTransactionState (the ActionState) to help us distinguish, and then we can get rid of both lists.
A state machine tag or something might be useful in that way.
Second, there is BlkTransactionData. From the name I cannot judge at all how it is supposed to differ from BlkTransactionState, because, well, it contains some data about a transaction... Maybe, because I consider BlkTransactionState to be misnamed, BlkTransactionData actually contains data about the transaction and not about a single operation? But from looking at the members, I have no idea. It has "opaque" and "ret"... I don't know how that is vital data to the transaction itself (and I don't think it is, it is rather data for the completion callback, I guess?), it only looks like some sort of pattern I have seen for AIO callbacks and the like.
BlkTransactionData was meant to contain completion info about a "job" that was launched by an action. I tried to avoid tying it explicitly to a BlockJob -- it can be any action with a callback, really.
Anyway, this structure contains "Data that was delivered to the original callback." In our case here, it's data that would've gone to block_job_complete.
So it contains an opaque pointer (A BDS, we secretly know) and a return code for that individual action's task.
Third, there is BlkTransactionWrapper. I suppose it wraps a transaction...? Or maybe at least an operation, because apparently I can no longer be sure whether a transaction is a transaction or an operation... But there is no transaction or even a single operation in there. It's just a structure describing a callback. I don't know how that is a "transaction wrapper"...
We have achieved maximum Strawberry.The BlkTransactionWrapper was meant to wrap any data necessary to be able to deliver the original callback we're stealing.
So in our case, it's for *one action's callback* and in this one instance, it's being used to hold the data needed to manually invoke the block_job_complete callback.
So both BTD and BTW here contain callback related information for one particular action.
They are differentiated by "Wrapper" having a "Before we execute the original callback" context, and "Data" having a "We've already executed the original callback" context.
Can they be combined into one structure? Yes, as long as we keep a tag that lets us know what state we're in.
Can they be inlined into "BlkTransactionState"? Yes -- but I opted to keep it in a separate glob to not clutter up the BlkTransactionState with values only useful for an optional feature.
Is that wise? Maybe, maybe not.I'm open to names. The most semantically accurate would be BlkTransactionActionCallbackData if we combined BTD/BTW to be one structure. It's a mouthful, though.
So, I don't understand any of the names given to the objects, and while I might understand what they are used for by looking at them, I have a *really* hard time understanding the code that uses them, because if I don't stare at their definitions all the time, I will immediately forget what they contain and what they are used for because I don't understand the names at all (and there is nothing which explicitly tells me what they are used for either). The fact that all of them are starting with BlkTransaction makes mixing them up even easier.
Understood.
From the point of view of the "transaction action developer," the interface to the new feature is found via the "new_transaction_wrapper" function, But some care must be taken to make sure this callback actually gets used once it has been requested. The whole thing can be aborted with the "undo_transaction_wrapper" function. All of the other functions that got added to blockdev.c in Patch #3 are there to assist these two "interface" functions in doing what they claim to. Everything in patch 4 and 5 just adds more reference count flexibility to things that are only of interest to drive_backup. Patch 6 allows us to inject our special callback wrapper into the qmp_drive_backup function. Clear as mud?Control flow and concept, clear. Implementation, still very muddy, sorry... Max--js+ */ +void backup_transaction_complete(BlockJob *job, int ret); + + void blk_dev_change_media_cb(BlockBackend *blk, bool load); bool blk_dev_has_removable_media(BlockBackend *blk); void blk_dev_eject_request(BlockBackend *blk, bool force);
-- —js