# Distance Measures in Data Science with Algorithms

## Distance Measures in data science with algorithms

### 1. Euclidean Distance:

import numpy as np

def euclidean_distance(p1, p2):
return np.sqrt(np.sum((p1 - p2) ** 2))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Euclidean distance:", euclidean_distance(point1, point2))

#clcoding.com
Euclidean distance: 2.8284271247461903

### 2. Manhattan Distance:

import numpy as np

def manhattan_distance(p1, p2):
return np.sum(np.abs(p1 - p2))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Manhattan distance:", manhattan_distance(point1, point2))

#clcoding.com
Manhattan distance: 4

### 3. Cosine Similarity:

from scipy.spatial import distance

def cosine_similarity(p1, p2):
return 1 - distance.cosine(p1, p2)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Cosine similarity:", cosine_similarity(point1, point2))

#clcoding.com
Cosine similarity: 0.9838699100999074

### 4. Minkowski Distance:

import numpy as np

def minkowski_distance(p1, p2, r):
return np.power(np.sum(np.power(np.abs(p1 - p2), r)), 1/r)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Minkowski distance:", minkowski_distance(point1, point2, 3))

#clcoding.com
Minkowski distance: 2.5198420997897464

### 5. Chebyshev Distance:

import numpy as np

def chebyshev_distance(p1, p2):
return np.max(np.abs(p1 - p2))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Chebyshev distance:", chebyshev_distance(point1, point2))

#clcoding.com
Chebyshev distance: 2

### 6. Hamming Distance:

import jellyfish

def hamming_distance(s1, s2):
return jellyfish.hamming_distance(s1, s2)

# Example usage
string1 = "hello"
string2 = "hallo"
print("Hamming distance:", hamming_distance(string1, string2))

#clcoding.com
Hamming distance: 1

### 7. Jaccard Similarity:

def jaccard_similarity(s1, s2):
set1 = set(s1)
set2 = set(s2)
intersection = set1.intersection(set2)
union = set1.union(set2)
return len(intersection) / len(union)

# Example usage
string1 = "hello"
string2 = "hallo"
print("Jaccard similarity:", jaccard_similarity(string1, string2))

#clcoding.com
Jaccard similarity: 0.6

### 8. Sørensen-Dice Index:

def sorensen_dice_index(s1, s2):
set1 = set(s1)
set2 = set(s2)
intersection = set1.intersection(set2)
return (2 * len(intersection)) / (len(set1) + len(set2))

# Example usage
string1 = "hello"
string2 = "hallo"
print("Sørensen-Dice index:", sorensen_dice_index(string1, string2))

#clcoding.com
Sørensen-Dice index: 0.75

### 9. Haversine Distance:

def haversine_distance(lat1, lon1, lat2, lon2):
R = 6371.0  # Radius of the earth in km
a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) * np.cos(np.deg2rad(lat2)) * np.sin(dLon / 2)**2
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
return R * c

# Example usage
print("Haversine distance:", haversine_distance(51.5074, 0.1278, 40.7128, -74.0060))

#clcoding.com
Input In [14]
a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) *
^
SyntaxError: invalid syntax

### 10. Mahalanobis Distance:

from scipy.spatial.distance import cdist

def mahalanobis_distance(X, Y):
return cdist(X.reshape(1,-1), Y.reshape(1,-1), 'mahalanobis', VI=np.cov(X))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Mahalanobis distance:", mahalanobis_distance(point1, point2))

#clcoding.com
Mahalanobis distance: [[1.41421356]]

### 11. Pearson Correlation:

from scipy.stats import pearsonr

def pearson_correlation(X, Y):
return pearsonr(X, Y)[0]

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Pearson correlation:", pearson_correlation(point1, point2))

#clcoding.com
Pearson correlation: 1.0

### 12. Squared Euclidean Distance:

def squared_euclidean_distance(X, Y):
return euclidean_distance(X, Y)**2

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Squared Euclidean distance:", squared_euclidean_distance(point1, point2))

#clcoding.com
Squared Euclidean distance: 8.000000000000002

### 13. Jensen-Shannon Divergence:

def jensen_shannon_divergence(X, Y):
M = 0.5 * (X + Y)
return np.sqrt(0.5 * (rel_entr(X, M).sum() + rel_entr(Y, M).sum()))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Jensen-Shannon divergence:", jensen_shannon_divergence(point1, point2))

#clcoding.com
Jensen-Shannon divergence: 0.6569041853099059

### 14. Chi-Square Distance:

def chi_square_distance(X, Y):
X = X / np.sum(X)
Y = Y / np.sum(Y)
return np.sum((X - Y) ** 2 / (X + Y))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Chi-Square distance:", chi_square_distance(point1, point2))

#clcoding.com
Chi-Square distance: 0.01923076923076923

### 15. Spearman Correlation:

from scipy.stats import spearmanr

def spearman_correlation(X, Y):
return spearmanr(X, Y)[0]

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Spearman correlation:", spearman_correlation(point1, point2))

#clcoding.com
Spearman correlation: 0.9999999999999999

### 16. Canberra Distance:

from scipy.spatial.distance import canberra

def canberra_distance(X, Y):
return canberra(X, Y)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Canberra distance:", canberra_distance(point1, point2))

#clcoding.com
Canberra distance: 0.8333333333333333