Tuesday 30 January 2024

Distance Measures in Data Science with Algorithms

Distance Measures in data science with algorithms

1. Euclidean Distance:

import numpy as np

def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Euclidean distance:", euclidean_distance(point1, point2))

#clcoding.com
Euclidean distance: 2.8284271247461903


2. Manhattan Distance:

import numpy as np

def manhattan_distance(p1, p2):
    return np.sum(np.abs(p1 - p2))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Manhattan distance:", manhattan_distance(point1, point2))

#clcoding.com
Manhattan distance: 4



3. Cosine Similarity:

from scipy.spatial import distance

def cosine_similarity(p1, p2):
    return 1 - distance.cosine(p1, p2)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Cosine similarity:", cosine_similarity(point1, point2))

#clcoding.com
Cosine similarity: 0.9838699100999074

4. Minkowski Distance:

import numpy as np

def minkowski_distance(p1, p2, r):
    return np.power(np.sum(np.power(np.abs(p1 - p2), r)), 1/r)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Minkowski distance:", minkowski_distance(point1, point2, 3))

#clcoding.com
Minkowski distance: 2.5198420997897464



5. Chebyshev Distance:

import numpy as np

def chebyshev_distance(p1, p2):
    return np.max(np.abs(p1 - p2))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Chebyshev distance:", chebyshev_distance(point1, point2))

#clcoding.com
Chebyshev distance: 2


6. Hamming Distance:

import jellyfish

def hamming_distance(s1, s2):
    return jellyfish.hamming_distance(s1, s2)

# Example usage
string1 = "hello"
string2 = "hallo"
print("Hamming distance:", hamming_distance(string1, string2))

#clcoding.com
Hamming distance: 1



7. Jaccard Similarity:

def jaccard_similarity(s1, s2):
    set1 = set(s1)
    set2 = set(s2)
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    return len(intersection) / len(union)

# Example usage
string1 = "hello"
string2 = "hallo"
print("Jaccard similarity:", jaccard_similarity(string1, string2))

#clcoding.com
Jaccard similarity: 0.6

8. Sørensen-Dice Index:

def sorensen_dice_index(s1, s2):
    set1 = set(s1)
    set2 = set(s2)
    intersection = set1.intersection(set2)
    return (2 * len(intersection)) / (len(set1) + len(set2))

# Example usage
string1 = "hello"
string2 = "hallo"
print("Sørensen-Dice index:", sorensen_dice_index(string1, string2))

#clcoding.com
Sørensen-Dice index: 0.75



9. Haversine Distance:

def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371.0  # Radius of the earth in km
    dLat = np.deg2rad(lat2 - lat1)
    dLon = np.deg2rad(lon2 - lon1)
    a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) * np.cos(np.deg2rad(lat2)) * np.sin(dLon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Example usage
print("Haversine distance:", haversine_distance(51.5074, 0.1278, 40.7128, -74.0060))

#clcoding.com
  Input In [14]
    a = np.sin(dLat / 2)**2 + np.cos(np.deg2rad(lat1)) *
                                                         ^
SyntaxError: invalid syntax

10. Mahalanobis Distance:

from scipy.spatial.distance import cdist

def mahalanobis_distance(X, Y):
    return cdist(X.reshape(1,-1), Y.reshape(1,-1), 'mahalanobis', VI=np.cov(X))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Mahalanobis distance:", mahalanobis_distance(point1, point2))

#clcoding.com
Mahalanobis distance: [[1.41421356]]



11. Pearson Correlation:

from scipy.stats import pearsonr

def pearson_correlation(X, Y):
    return pearsonr(X, Y)[0]

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Pearson correlation:", pearson_correlation(point1, point2))

#clcoding.com
Pearson correlation: 1.0

12. Squared Euclidean Distance:

def squared_euclidean_distance(X, Y):
    return euclidean_distance(X, Y)**2

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Squared Euclidean distance:", squared_euclidean_distance(point1, point2))

#clcoding.com
Squared Euclidean distance: 8.000000000000002



13. Jensen-Shannon Divergence:

def jensen_shannon_divergence(X, Y):
    M = 0.5 * (X + Y)
    return np.sqrt(0.5 * (rel_entr(X, M).sum() + rel_entr(Y, M).sum()))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Jensen-Shannon divergence:", jensen_shannon_divergence(point1, point2))

#clcoding.com
Jensen-Shannon divergence: 0.6569041853099059

14. Chi-Square Distance:

def chi_square_distance(X, Y):
    X = X / np.sum(X)
    Y = Y / np.sum(Y)
    return np.sum((X - Y) ** 2 / (X + Y))

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Chi-Square distance:", chi_square_distance(point1, point2))

#clcoding.com
Chi-Square distance: 0.01923076923076923



15. Spearman Correlation:

from scipy.stats import spearmanr

def spearman_correlation(X, Y):
    return spearmanr(X, Y)[0]

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Spearman correlation:", spearman_correlation(point1, point2))

#clcoding.com
Spearman correlation: 0.9999999999999999

16. Canberra Distance:

from scipy.spatial.distance import canberra

def canberra_distance(X, Y):
    return canberra(X, Y)

# Example usage
point1 = np.array([1, 2])
point2 = np.array([3, 4])
print("Canberra distance:", canberra_distance(point1, point2))

#clcoding.com
Canberra distance: 0.8333333333333333



0 Comments:

Post a Comment

Popular Posts

Categories

AI (27) Android (24) AngularJS (1) Assembly Language (2) aws (17) Azure (7) BI (10) book (4) Books (117) C (77) C# (12) C++ (82) Course (62) Coursera (179) coursewra (1) Cybersecurity (22) data management (11) Data Science (95) Data Strucures (6) Deep Learning (9) Django (6) Downloads (3) edx (2) Engineering (14) Excel (13) Factorial (1) Finance (5) flutter (1) FPL (17) Google (19) Hadoop (3) HTML&CSS (46) IBM (25) IoT (1) IS (25) Java (92) Leet Code (4) Machine Learning (44) Meta (18) MICHIGAN (5) microsoft (3) Pandas (3) PHP (20) Projects (29) Python (748) Python Coding Challenge (221) Questions (2) R (70) React (6) Scripting (1) security (3) Selenium Webdriver (2) Software (17) SQL (40) UX Research (1) web application (8)

Followers

Person climbing a staircase. Learn Data Science from Scratch: online program with 21 courses