Wednesday, June 14, 2017

Finding closest pair of points in two disjoint sets of points

The finding nearest pair of points in two disjoint sets of points (or Polygons made of points) can be found in O(nlogn) computational complexity (with O(n) memory complexity).

With ordinary tree or graph search, solving this is not possible because we do not know the starting and ending points in advance. The k-d binary search tree can used to solve this problem as it can be used to navigate inside the two dimensional space rather easily. 

This is infact a last resort to solve this problem if we do not make any assumptions about the distribution of the points. If we can make some assumptions about the points, e.g. if they are not nested, lie on either side of one set etc, then the problem becomes more simpler and can be solved with relative less complexity. 

K – Dimensional binary tree divides the dimensional space with each of its node with its X or Y (..) coordinate so while navigating it one doesn’t has to navigate through all of its nodes.

This is an example of the two sets of points:





















#include <iostream>

using namespace std;

int const k=2; // the number of dimensions
double min_distance = 10000; // set a large default value, in this example all distance will be shorter than this. 

double distance(int arr[], int arr2[])
{
 return sqrt(pow(arr2[0] - arr[0], 2) + pow(arr2[1] - arr[1], 2));
 
}

struct Node {
 int point[k];
 Node *left, *right;
 Node()
 {
  left = right = NULL;  

 }
};

// A method to create a node of K D tree
struct Node* newNode(int arr[])
{
 struct Node* temp = new Node;

 for (int i = 0; i<k; i++)
  temp->point[i] = arr[i];

 return temp;
}

Node * insertNode(Node * node, int arr[], int d)
{
 if (node == NULL)
  return newNode(arr);

 int dim = d%k;


 if (node->point[dim] > arr[dim])
 {
  

  node->left = insertNode(node->left, arr, dim + 1);
 }
 else
 {
 
  node->right = insertNode(node->right, arr, dim + 1);
 }

 return node;
}
Node * Nearest=NULL;

Node * FindnearestNode(Node * head1, int arr[], int d)
{
 // if empty tree, return
 if (head1 == NULL)
  return NULL;

 // check for each tree. 
   if (min_distance > distance(head1->point, arr))
 {
  min_distance = distance(head1->point, arr);
  Nearest = head1;
 }

 if (head1->left == NULL && head1->right == NULL)
  return head1;

 // findout current dimension, in this case it either x or y i.e. 0 or 1
 int dim = d%k;

 // navigate through the tree as if inserting to a new member (to remain to the nearest member in closeness). in the path for insert it will find the nearest member. 
 if (head1->right && head1->point[dim] < arr[dim])
  return FindnearestNode(head1->right, arr, d+1);
 else if(head1->left && head1->point[dim] > arr[dim] )
  return FindnearestNode(head1->left, arr, d+1);

 return Nearest;
}


int main()
{
 int const an = 10;
 int const bn = 10;

 int ax[an] = { 34,55,11,79,77,65,3,9,5,66 };
 int ay[an] = { 5, 6, 7, 9, 32,3,15,7,10,35 };

 int bx[bn] = { 5,35,4,41,32,64,41,54,87,3 };
 int by[bn] = { 23,33,17,15,32,22,33,23,21,32 };

  

 Node * head1=NULL;
 Node * head2 = NULL;


  
 double Final_Min_Distance = min_distance;

 // fill the K-D trees with the two dimensional data in two trees.
 for (int i = 0; i < an; i++)
 {
  int temp[k];
  temp[0] = ax[i];
  temp[1] = ay[i];

  head1=insertNode(head1, temp, 0);
  temp[0] = bx[i];
  temp[1] = by[i];

  head2=insertNode(head2, temp, 0);


 }
 Node * AnearB=NULL;

 Node * BnearA = NULL;



 min_distance = 1000;
   Final_Min_Distance = min_distance;
 for (int i = 0; i < an; i++)
 {
  int temp[k];
  temp[0] = bx[i];
  temp[1] = by[i];

  
  Node * Nearer2 = FindnearestNode(head1, temp, 0);
  if (Final_Min_Distance > min_distance)
  {
   BnearA = Nearer2;
   Final_Min_Distance = min_distance;
  }
  cout << " distance of B (" << temp[0] << "," << temp[1] << ") to nearest A (" << BnearA->point[0] << "," << BnearA->point[1] << ") distance:" << Final_Min_Distance << endl;
  min_distance = 1000;


 }
 cout << "Minimum Distance is " << Final_Min_Distance<<endl<<endl;


 min_distance = 1000;
 Final_Min_Distance = min_distance;
 for (int i = 0; i < an; i++)
 {
  int temp[k];
  temp[0] = ax[i];
  temp[1] = ay[i];


  Node * Nearer2 = FindnearestNode(head2, temp, 0);
  if (Final_Min_Distance > min_distance)
  {
   AnearB = Nearer2;
   Final_Min_Distance = min_distance;
  }
  cout << " distance of A (" << temp[0] << "," << temp[1] << ") to nearest B (" << AnearB->point[0] << "," << AnearB->point[1] << ") distance:" << Final_Min_Distance << endl;
  min_distance = 1000;


 }
 cout << "Minimum Distance is " << Final_Min_Distance;



 system("pause");

}

here is the result:




Computational Complexity to build the trees: O(n)
Memory Complexity to build the trees: O(n)
Computational Complexity to navigate through all members of set A or B: O(n)
Computational Complexity to search the tree to find the nearest neighbor points: O(logn)



So the final computational complexity would be : O(n)+O(n)+O(logn) = O(nlogn)

No comments: