Here’s an update: I sticked to the idea of using collector; it categorizes models from a deletion perspective into:
- fast_deletes - models that can be easily deleted, without checking for other relations - update_fields - field that have SET_NULL, i.e. relations to the customer objects that are only updated - … all the rest So I kept the structure from Collector, looping the three categories mentioned above. It works ok; for each instance, you’ll have the check instance._meta.get_fields() to find the field related to the old customer record. Fixing GenericRelations require the most code, because you’ll have to find the content type, object id (and maybe content object) fields that are related to the old customer record. Moreover, you’ll have to be aware that you may violate foreign key constraints when replacing the old id. My implementation is attached, if it’s interesting for anybody. Have a nice weekend y’all jools jools schrieb am Freitag, 21. Januar 2022 um 16:05:58 UTC+1: > Dear Django users, > > here’s an interesting case and I’m curious whether somebody can point me > in the right direction. > > Here’s the problem: > My company has a database of customers. Over the years, many duplicates > have been created. For cleaning up duplicates, I’d like to have a > search-and-replace functionality. > > I.e.: Replace all references in the database to customer <old> with > customer <new>. > The database schema has quite a bit of complexity, so I’m aiming to find a > very generic solution. > > Approach: > Django has a bit of functionality to find all references to an existing > object, namely django.db.models.deletion.Collector and I’m using it to > find all references. > > Though, the “replace” logic seems quite hard to get right: > > - It has to keep parent links intact > - It has to recognize references in parent models (Customer model is > derived from the concrete model Actor) > - It has to recognize generic relations, built with Django’s content > types > > > My stub implementation comes below. > > > - Has anybody else implemented sth like this, e.g. in helper library? > - Do you think the approach is right? > - What would you differently? > - Any caveats that you know of? > > Best regards > Jools > > Stub implementation: > > from django.db.models.deletion import Collector, ProtectedError, > RestrictedError > > > def replace_object(old_obj, new_obj, using='default'): > """ > Replace an old object with a new object throughout the database. > """ > collector = Collector(using) > > try: > collector.collect([old_obj]) > except ProtectedError: > pass > except RestrictedError: > pass > > for model, obj in collector.instances_with_model(): > for field_opts in obj._meta.get_fields(): > if __is_qualified(field_opts, new_obj): > setattr(obj, field_opts.name, new_obj) > obj.save() > > def __is_qualified(field_opts, new_obj): > if not hasattr(field_opts, 'related_model'): > return False > > # This check is wrong for multi-table inheritance, I suppose? > if field_opts.related_model != new_obj.__class__: > return False > if hasattr(field_opts, 'parent_link') and field_opts.parent_link is > True: > return False > > return True > > -- You received this message because you are subscribed to the Google Groups "Django users" group. To unsubscribe from this group and stop receiving emails from it, send an email to django-users+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/django-users/1a094625-ab1c-4763-81f2-d8c77ffab870n%40googlegroups.com.
from django.contrib.contenttypes.fields import GenericRelation, GenericForeignKey from django.contrib.contenttypes.models import ContentType from django.db.models import sql from django.db.models.deletion import Collector from django.db.utils import IntegrityError class LogMessage: def __init__(self, instance, message=""): self._instance = instance self._message = message def __str__(self): try: model_name = self._instance._meta.verbose_name except AttributeError: model_name = self._instance.__class__.__name__ return f'{model_name}, {str(self._instance)}, {self._message}' class ObjectReplacer: def __init__(self): self._errors = [] self._updates = [] def replace_object(self, old_obj, new_obj, using='default'): collector = Collector(using) collector.collect([old_obj], keep_parents=True) self.__update_fields(collector, new_obj) self.__update_fast_deletes(collector, new_obj) self.__update_the_rest(collector, old_obj, new_obj) self.show_updates() self.show_errors() def __update_fields(self, collector, new_obj): for model, instances_for_fieldvalues in collector.field_updates.items(): for (field, value), instances in instances_for_fieldvalues.items(): if self.__update_batch(model, [obj.pk for obj in instances], field.name, new_obj.pk, collector.using): self.add_updates([LogMessage(obj) for obj in instances]) def __update_fast_deletes(self, collector, new_obj): for qs in collector.fast_deletes: print(qs) return [] def __update_the_rest(self, collector, old_obj, new_obj): # delete instances for model, instances in collector.data.items(): # Ok, django provides us with model and instances. # We’ll need to find the referencing field and update with the new PK. # print(model, instances) for instance in instances: for field_opts in instance._meta.get_fields(include_hidden=True): if hasattr(field_opts, 'related_model') and field_opts.related_model == old_obj.__class__: if instance == old_obj: # Don’t attempt to change the old object itself. continue if hasattr(field_opts, 'field') and isinstance(field_opts.field, GenericRelation): self.__update_generic_relation(instance, collector, old_obj, new_obj) else: self.__simple_update(model, instance, collector, new_obj, field_opts) def __simple_update(self, model, instance, collector, new_obj, field_opts): if self.__update_batch(model, [instance.pk], field_opts.name, new_obj.pk, collector.using): self.add_update(LogMessage(instance)) def __update_generic_relation(self, instance, collector, old_obj, new_obj): # If it’s a generic relation, we search the fields for a GenericForeignKey field. # This will give us information about the desired content type and object id. for gen_field_opts in instance._meta.get_fields(): if isinstance(gen_field_opts, GenericForeignKey): ct_for_this_field = getattr(instance, gen_field_opts.ct_field) desired_ct = ContentType.objects.get_for_model(old_obj) if ct_for_this_field == desired_ct: if self.__update_batch(instance.__class__, [instance.pk], getattr(gen_field_opts, 'fk_field'), new_obj.pk, collector.using): self.add_update(LogMessage(instance)) return def __update_batch(self, model, pks, field_name, new_pk, using): query = sql.UpdateQuery(model) try: query.update_batch(pks, {field_name: new_pk}, using) except IntegrityError as error: self.add_errors([LogMessage(None, str(error))]) return False return True def add_error(self, log_message): self._errors.append(log_message) def add_errors(self, log_messages): self._errors += log_messages def add_update(self, log_message): self._updates.append(log_message) def add_updates(self, log_messages): self._updates += log_messages def show_updates(self): print("Updated:") for log_msg in self._updates: print("\t", str(log_msg)) def show_errors(self): print("Errors:") for log_msg in self._errors: print("\t", str(log_msg))