Re: Predicting an object over an pretrained model is not working

Thomas Passin via Python-list Tue, 30 Jul 2024 15:05:35 -0700

On 7/30/2024 4:49 PM, marc nicole wrote:

OK, but how's the probability of small_ball greater than others? I can'tfind it anyway, what's its value?

It's your code. I wouldn't know. I suppose it's represented somewhere inall those parameters. You need to understand what those function callsare returning. It's documented somewhere, right?

And you really do need to know the probabilities of the competing imagesbecause otherwise you won't know how confident you can be that theidentification is a strong one.

Le mar. 30 juil. 2024 à 21:37, Thomas Passin via Python-list<python-list@python.org <mailto:python-list@python.org>> a écrit :


    On 7/30/2024 2:18 PM, marc nicole via Python-list wrote:
     > Hello all,
     >
     > I want to predict an object by given as input an image and want
    to have my
     > model be able to predict the label. I have trained a model using
    tensorflow
     > based on annotated database where the target object to predict
    was added to
     > the pretrained model. the code I am using is the following where
    I set the
     > target object image as input and want to have the prediction output:
     >
     >
     >
     >
     >
     >
     >
     >
     > class MultiObjectDetection():
     >
     >      def __init__(self, classes_name):
     >
     >          self._classes_name = classes_name
     >          self._num_classes = len(classes_name)
     >
     >          self._common_params = {'image_size': 448, 'num_classes':
     > self._num_classes,
     >                  'batch_size':1}
     >          self._net_params = {'cell_size': 7, 'boxes_per_cell':2,
     > 'weight_decay': 0.0005}
     >          self._net = YoloTinyNet(self._common_params,
    self._net_params,
     > test=True)
     >
     >      def predict_object(self, image):
     >          predicts = self._net.inference(image)
     >          return predicts
     >
     >      def process_predicts(self, resized_img, predicts, thresh=0.2):
     >          """
     >          process the predicts of object detection with one image
    input.
     >
     >          Args:
     >              resized_img: resized source image.
     >              predicts: output of the model.
     >              thresh: thresh of bounding box confidence.
     >          Return:
     >              predicts_dict: {"stick": [[x1, y1, x2, y2, scores1],
    [...]]}.
     >          """
     >          cls_num = self._num_classes
     >          bbx_per_cell = self._net_params["boxes_per_cell"]
     >          cell_size = self._net_params["cell_size"]
     >          img_size = self._common_params["image_size"]
     >          p_classes = predicts[0, :, :, 0:cls_num]
     >          C = predicts[0, :, :, cls_num:cls_num+bbx_per_cell] # two
     > bounding boxes in one cell.
     >          coordinate = predicts[0, :, :, cls_num+bbx_per_cell:] # all
     > bounding boxes position.
     >
     >          p_classes = np.reshape(p_classes, (cell_size, cell_size,
    1, cls_num))
     >          C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1))
     >
     >          P = C * p_classes # confidencefor all classes of all
    bounding
     > boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2,
     > 1).
     >
     >          predicts_dict = {}
     >          for i in range(cell_size):
     >              for j in range(cell_size):
     >                  temp_data = np.zeros_like(P, np.float32)
     >                  temp_data[i, j, :, :] = P[i, j, :, :]
     >                  position = np.argmax(temp_data) # refer to the class
     > num (with maximum confidence) for every bounding box.
     >                  index = np.unravel_index(position, P.shape)
     >
     >                  if P[index] > thresh:
     >                      class_num = index[-1]
     >                      coordinate = np.reshape(coordinate, (cell_size,
     > cell_size, bbx_per_cell, 4)) # (cell_size, cell_size,
     > bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
     >                      max_coordinate = coordinate[index[0],
    index[1], index[2], :]
     >
     >                      xcenter = max_coordinate[0]
     >                      ycenter = max_coordinate[1]
     >                      w = max_coordinate[2]
     >                      h = max_coordinate[3]
     >
     >                      xcenter = (index[1] + xcenter) *
    (1.0*img_size /cell_size)
     >                      ycenter = (index[0] + ycenter) *
    (1.0*img_size /cell_size)
     >
     >                      w = w * img_size
     >                      h = h * img_size
     >                      xmin = 0 if (xcenter - w/2.0 < 0) else
    (xcenter - w/2.0)
     >                      ymin = 0 if (xcenter - w/2.0 < 0) else
    (ycenter - h/2.0)
     >                      xmax = resized_img.shape[0] if (xmin + w) >
     > resized_img.shape[0] else (xmin + w)
     >                      ymax = resized_img.shape[1] if (ymin + h) >
     > resized_img.shape[1] else (ymin + h)
     >
     >                      class_name = self._classes_name[class_num]
     >                      predicts_dict.setdefault(class_name, [])
     >                      predicts_dict[class_name].append([int(xmin),
     > int(ymin), int(xmax), int(ymax), P[index]])
     >
     >          return predicts_dict
     >
     >      def non_max_suppress(self, predicts_dict, threshold=0.5):
     >          """
     >          implement non-maximum supression on predict bounding boxes.
     >          Args:
     >              predicts_dict: {"stick": [[x1, y1, x2, y2, scores1],
    [...]]}.
     >              threshhold: iou threshold
     >          Return:
     >              predicts_dict processed by non-maximum suppression
     >          """
     >          for object_name, bbox in predicts_dict.items():
     >              bbox_array = np.array(bbox, dtype=np.float)
     >              x1, y1, x2, y2, scores = bbox_array[:,0],
    bbox_array[:,1],
     > bbox_array[:,2], bbox_array[:,3], bbox_array[:,4]
     >              areas = (x2-x1+1) * (y2-y1+1)
     >              order = scores.argsort()[::-1]
     >              keep = []
     >              while order.size > 0:
     >                  i = order[0]
     >                  keep.append(i)
     >                  xx1 = np.maximum(x1[i], x1[order[1:]])
     >                  yy1 = np.maximum(y1[i], y1[order[1:]])
     >                  xx2 = np.minimum(x2[i], x2[order[1:]])
     >                  yy2 = np.minimum(y2[i], y2[order[1:]])
     >                  inter = np.maximum(0.0, xx2-xx1+1) *
    np.maximum(0.0, yy2-yy1+1)
     >                  iou = inter/(areas[i]+areas[order[1:]]-inter)
     >                  indexs = np.where(iou<=threshold)[0]
     >                  order = order[indexs+1]
     >              bbox = bbox_array[keep]
     >              predicts_dict[object_name] = bbox.tolist()
     >              predicts_dict = predicts_dict
     >          return predicts_dict
     >
     >
     >
     > class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle",
     > "bus", "car", "cat", "chair", "cow", "diningtable",
     >                     "dog", "horse", "motorbike", "person",
     > "pottedplant", "sheep", "sofa", "train", "tvmonitor",
     >                     "small_ball"]
     > modelFile = ('models\\train\\model.ckpt-0')
     > track_object = "small_ball"print("object detection and tracking...")
     >
     > multiObjectDetect = MultiObjectDetection(IP, class_names)
     > image = tf.placeholder(tf.float32, (1, 448, 448, 3))
     > object_predicts = multiObjectDetect.predict_object(image)
     >
     >
     >
     > sess = tf.Session()
     > saver = tf.train.Saver(multiObjectDetect._net.trainable_collection)
     >
     >
     > saver.restore(sess, modelFile)
     >
     > index = 0while 1:
     >
     >      src_img = cv2.imread("./weirdobject.jpg")
     >      resized_img = cv2.resize(src_img, (448, 448))
     >
     >      np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
     >      np_img = np_img.astype(np.float32)
     >      np_img = np_img / 255.0 * 2 - 1
     >      np_img = np.reshape(np_img, (1, 448, 448, 3))
     >
     >
     >      np_predict = sess.run(object_predicts, feed_dict={image:
    np_img})
     >      predicts_dict =
    multiObjectDetect.process_predicts(resized_img, np_predict)
     >      predicts_dict =
    multiObjectDetect.non_max_suppress(predicts_dict)
     >
     >      print ("predict dict = ", predicts_dict)
     >
     >
     >
     >
     >
     >
     >
     > The problem with this code is that the predicts_dict returns:
     >
     >
     >
     > predict dict =  {'sheep': [[233.0, 92.0, 448.0, -103.0,
     > 5.3531270027160645], [167.0, 509.0, 209.0, 101.0, 4.947688579559326],
     > [0.0, 0.0, 448.0, 431.0, 3.393721580505371]], 'horse': [[374.0, 33.0,
     > 282.0, 448.0, 5.277851581573486], [135.0, 688.0, -33.0, -14.0,
     > 3.5144259929656982], [1.0, 117.0, 112.0, -138.0, 2.656987190246582]],
     > 'bicycle': [[461.0, 781.0, 154.0, -381.0, 5.918102741241455], [70.0,
     > 344.0, 391.0, -138.0, 3.031444787979126], [378.0, 497.0, 46.0, 149.0,
     > 2.7629122734069824], [541.0, 583.0, 69.0, 307.0, 2.7170517444610596],
     > [323.0, 22.0, 336.0, 448.0, 1.608760952949524]], 'bottle': [[390.0,
     > 218.0, -199.0, 448.0, 4.582971096038818], [0.0, 0.0, 448.0, -410.0,
     > 0.9097045063972473]], 'sofa': [[346.0, 102.0, 323.0, -38.0,
     > 2.371835947036743]], 'dog': [[319.0, 254.0, -282.0, 373.0,
     > 4.022889137268066]], 'cat': [[63.0, -195.0, 365.0, -92.0,
     > 3.5134828090667725]], 'person': [[22.0, -122.0, 154.0, 448.0,
     > 3.927537441253662], [350.0, 155.0, -36.0, -445.0, 2.679833173751831],
     > [119.0, 416.0, -43.0, 292.0, 0.9529445171356201], [251.0, 445.0,
     > 225.0, 188.0, 0.9001350402832031]], 'train': [[329.0, 485.0, -24.0,
     > -235.0, 2.7050414085388184], [483.0, 362.0, 237.0, -86.0,
     > 2.555817127227783], [13.0, 365.0, 373.0, 448.0, 0.6229299902915955]],
     > 'small_ball': [[217.0, 737.0, 448.0, -315.0, 1.739920973777771],
     > [117.0, 283.0, 153.0, 122.0, 1.5690066814422607]], 'boat': [[164.0,
     > 805.0, 34.0, -169.0, 4.972668170928955], [0.0, 0.0, 397.0, 69.0,
     > 2.353729486465454], [302.0, 605.0, 15.0, -22.0, 2.0259625911712646]],
     > 'aeroplane': [[470.0, 616.0, -305.0, -37.0, 3.431873321533203], [0.0,
     > 0.0, 448.0, -72.0, 2.836672306060791]], 'bus': [[0.0, 0.0, -101.0,
     > -280.0, 1.2078320980072021]], 'pottedplant': [[620.0, -268.0, -124.0,
     > 418.0, 2.158564805984497], [0.0, 0.0, 448.0, -779.0,
     > 1.6623022556304932]], 'tvmonitor': [[0.0, 0.0, 448.0, 85.0,
     > 3.238999128341675], [240.0, 772.0, 200.0, 91.0, 1.7443398237228394],
     > [546.0, 155.0, 448.0, 448.0, 1.1334525346755981], [107.0, 441.0,
     > 432.0, 219.0, 0.5971617698669434]], 'chair': [[470.0, -187.0, 106.0,
     > 235.0, 3.8548083305358887], [524.0, 740.0, -103.0, 99.0,
     > 3.636549234390259], [0.0, 0.0, 275.0, -325.0, 3.0997846126556396],
     > [711.0, -231.0, -146.0, 392.0, 2.205275535583496]], 'diningtable':
     > [[138.0, -310.0, 111.0, 448.0, 4.660728931427002], [317.0, -66.0,
     > 313.0, 6.0, 4.535496234893799], [0.0, 0.0, -41.0, 175.0,
     > 1.8571208715438843], [21.0, -92.0, 76.0, 172.0, 1.2035608291625977],
     > [0.0, 0.0, 448.0, -250.0, 1.00322687625885]], 'car': [[312.0, 232.0,
     > 132.0, 309.0, 3.205225706100464], [514.0, -76.0, 218.0, 448.0,
     > 1.4289973974227905], [0.0, 0.0, 448.0, 142.0, 0.7124998569488525]]}
     >
     >
     > WHile I expect only the dict to contain the small_ball key
     >
     >
     >
     > How's that is possible? where's the prediction output?How to fix
    the code?

    Without trying to figure out all that code, why would you expect only
    results for a single key?  An ML system is going to compute
    probabilities and parameters for all objects it knows about (presumably
    subject to some threshold).

--https://mail.python.org/mailman/listinfo/python-list

    <https://mail.python.org/mailman/listinfo/python-list>


--
https://mail.python.org/mailman/listinfo/python-list

Re: Predicting an object over an pretrained model is not working

Reply via email to