Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
c9bf8cc
Added .gitignore
asta-valdorf Mar 10, 2026
660d2ef
Added project requirements
asta-valdorf Mar 10, 2026
19cca14
code for feature extraction
Mar 30, 2026
d4c78ed
code for feature extraction
Mar 30, 2026
6092a9c
Code for making csv file with features
Mar 30, 2026
c03a303
Changed notation for cancerous to binary
JuliusMichelsen Mar 31, 2026
9c572f9
Added .astype(int) to cancerous or not for features.csv
JuliusMichelsen Apr 8, 2026
914756f
updated names for features
Apr 8, 2026
6bbf72c
Delete src/feature_A.py
emmdupont Apr 8, 2026
cd02bf1
Delete src/feature_B.py
emmdupont Apr 8, 2026
0f09133
Changed feature_extraction.py, with 2 different feature_A and feature_B
JuliusMichelsen Apr 8, 2026
3c9d3cf
Fixed small spelling mistakes, and changed feats parameter names
JuliusMichelsen Apr 8, 2026
e88ed6e
small fix
JuliusMichelsen Apr 8, 2026
903ce7c
Updated feature extractions codes
Apr 9, 2026
c662ed3
Add hair and pen marks detection and removal code
asta-valdorf Apr 15, 2026
9993ce2
Added script for k-NN base-classifier
JuliusMichelsen Apr 17, 2026
5193d70
Added graphs and confusion matrix for k-NN base-classifier
JuliusMichelsen Apr 17, 2026
95eb1cc
Changed location for graphs and confusion matrix form k-NN base-clas.
JuliusMichelsen Apr 17, 2026
2a0f1a3
resolving merge conflicts
asta-valdorf Apr 19, 2026
874145c
pushed hair, pen marks and preprocessing files and renamed files
asta-valdorf Apr 21, 2026
d62b530
deleted random file
asta-valdorf Apr 21, 2026
35ce64d
Added and changed comments
JuliusMichelsen Apr 22, 2026
ef22730
Changed comments a bit
JuliusMichelsen Apr 22, 2026
5298f24
Inserted DT in classifer notebook and changed location of notebook
Apr 22, 2026
27ede9b
improved hair and pen analysis
asta-valdorf Apr 23, 2026
af199e8
improved hair and pen analysis
asta-valdorf Apr 23, 2026
0109753
Fixing small grammar in feautre A2 and B2
JuliusMichelsen Apr 29, 2026
8f94615
Fixed some grammar in feature code
JuliusMichelsen Apr 29, 2026
8bd09e0
removed unused featrue files and updated feature_extraction
emgeerthsen Apr 30, 2026
57e62f7
Added progress bar to feature_extraction code
Apr 30, 2026
7750691
Removed unused code
emgeerthsen Apr 30, 2026
6aa22d1
Removed normalization
May 5, 2026
50e4f7d
new CSV files
May 5, 2026
7e18e8a
Added needed imports
Enet-itu May 5, 2026
e759525
Adding the logistic regression
Enet-itu May 5, 2026
1d88305
adding visual to logistic regression
Enet-itu May 6, 2026
c0a1b9e
Cleaning up bugs
Enet-itu May 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Empty file added .gitignore
Empty file.
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python-envs.defaultEnvManager": "ms-python.python:conda",
"python-envs.defaultPackageManager": "ms-python.python:conda"
}
978 changes: 978 additions & 0 deletions base_classifiers.ipynb

Large diffs are not rendered by default.

Binary file added data/.DS_Store
Binary file not shown.
2,104 changes: 2,104 additions & 0 deletions data/base_features.csv

Large diffs are not rendered by default.

2,104 changes: 2,104 additions & 0 deletions data/clean_features.csv

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions feature_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Code taken from exercise session 05_feature_extraction
import numpy as np
from skimage.transform import resize
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
import os

from src.feature_A import asymmetry_np_centroid
from src.feature_B import border_irregularity
from src.feature_C import get_multicolor_rate2

data_path = "../data/"

def features_csv(meta_data , data_path):
"""
Extracts ABC features from skin lesion images and masks and saves them into a CSV file.

Loads images and the corresponding masks based on the given metadata.
Only images with existing masks are used. For each image we extract the following features:
- A1: Mean asymmetry score based on center of image
- A2: Asymmetry score based on centroid of lesion
- B1: Border convexity score
- B2: Border irregularity score
- C: Multicolor rate

Features that couldn't be computed are stored as NaN and will be seen as an empty slot in the CSV file.

Args:
meta_data (str): Filename of dataset (e.g. "metadata.csv").
data_path (str): Root path to the location of the dataset.

Returns:
None: The code loads the CSV file directly into the given datapath

Notes:
Code takes 20-30 minutes to run
"""

def load_image_and_mask(image_id, data_path = data_path):
"""
Loads single skin lesion image and the corresponding mask.

Args:
image_id (str): Image id taken from the metadata.
data_path (str): Root path to the location of the dataset.

Returns:
im: np.ndarray of the loaded image
mask: np.ndarray of the loaded mask
"""
img_path = data_path + "imgs_clean/"
mask_path = data_path + "masks/"

file_im = img_path + image_id
file_mask = (mask_path + image_id).replace(".png", "_mask.png")
im = plt.imread(file_im)
mask = plt.imread(file_mask)

if mask.ndim == 3:
mask = mask[..., 0] # Converts RGB mask into grayscale, by only taking first channel

if im.shape[:2] != mask.shape[:2]:
mask = resize(mask, im.shape[:2], anti_aliasing=False) # Resize mask dimension to image if they are different

return im, mask

def load_metadata(meta_data, data_path):
"""
Loads the metadata as a pandas dataframe.

Creates new column, which defines wether a lesion is cancerous or benign in binary values.

Args:
meta_data (str): Filename of the dataset (e.g. "metadata.csv")
data_path (str): Root path to the location of the dataset.

Returns:
Dataframe from metadata with column defining cancerous or benign
"""
metadata_path = data_path + meta_data
metadata = pd.read_csv(metadata_path)

# Labels a cancerous lesion with 1 and a benign lesion with 0
metadata["cancerous"] = metadata["diagnostic"].isin(["BCC", "MEL", "SCC"]).astype(int)

# Filter to only rows where the mask file actually exists
mask_path = data_path + "masks/"
metadata = metadata[metadata["img_id"].apply(
lambda x: os.path.exists(mask_path + x.replace(".png", "_mask.png"))
)]

print(f"Found {len(metadata)} images with masks out of {len(pd.read_csv(data_path + meta_data))} total")

return metadata

def return_features(row):
"""
Extracts ABC features for each skin lesion image and the corresponding mask.
Stores the features in a dictionary

Args:
row (pd.Series): A single row from metadata containing "img_id" and "cancerous"

Returns:
feats (dict): A dictionary with all extracted features for every skin lesion with a mask
"""
img_id = row["img_id"]
im, mask = load_image_and_mask(img_id, data_path)

diagnostic = row["cancerous"]

asymmetry_score_np = asymmetry_np_centroid(mask) #A2 - Centroid (center of lesion)
border_contours = border_irregularity(mask) #B2 - Centroid (Only taking the largest lesion if multiple)
color = get_multicolor_rate2(im , mask) #C - Difference between dominant colors


# computing features
feats = {
"img_id": img_id,
"cancerous": diagnostic,
"asymmetry_np_centroid" : asymmetry_score_np,
"border_contours": border_contours,
"color": color,

}

return feats

def make_csv(df , output_dir = "output/"):
"""
Outputs the CSV file with the featues computed for the skin lesions

Args:
df (pd.DataFrame): The dataframe for the given CSV file
output_dir (str): The path where the final CSV file should be loaded to

Returns:
The final CSV file with the diagnostic, image id and extracted features
"""
output_path = os.path.join(output_dir, "clean_features.csv")

results = []

# Manual tqdm loop
for _, row in tqdm(
df.iterrows(),
total=len(df),
desc="Extracting features"
):

results.append(return_features(row))


# Apply return_features to each row and collect the results as a list and converts the list into a dataframe
features_df = pd.DataFrame(results)

return features_df.to_csv(output_path, index=False)

df = load_metadata(meta_data, data_path)
make_csv(df , "data/")

features_csv("metadata.csv" , data_path = "data/")
112 changes: 112 additions & 0 deletions hair_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import cv2
import numpy as np
from pathlib import Path

# hair detection and removal for light and dark hair
# group into three groups based on amount of coverage

def hair_coverage(img_gray, kernel_size=9, threshold=10) -> float:
"""
Estimate the fraction of the image covered by hair.
Uses combined blackhat+tophat for complete dark/light hair detection.
"""

# structuring element shaped as a cross to emphasize thin hair-like structures
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, 3))

# only get dark hair to not get light spots with tophat and overdetect coverage
blackhat = cv2.morphologyEx(img_gray, cv2.MORPH_BLACKHAT, kernel)

# segment the pixels where the difference between closing and original image intensities is bigger than 10
_, mask = cv2.threshold(blackhat, threshold, 255, cv2.THRESH_BINARY)

hair_mask = cv2.medianBlur(mask, 3) # reduce small noise

# compute coverage ratio
total_area = img_gray.shape[0] * img_gray.shape[1]
hair_area = np.count_nonzero(hair_mask)

return hair_area / total_area


def removeHair_auto(img_org, img_gray, lesion_mask=None):
"""
Selects removal parameters based on coverage level
Chooses between light-hair and dark-hair detection
Inpaints detected hair regions
"""

coverage = hair_coverage(img_gray)

if coverage < 0.05:
return "skip", None, None, img_org.copy(), coverage, 0, 0

if coverage < 0.2:
kernel_size = 9
black_threshold = 11
top_threshold = 20
radius = 3
else:
kernel_size = 11
black_threshold = 15
top_threshold = 25
radius = 5


# structuring element for hair enhancement
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, 3))

# compute morphological responses
blackhat = cv2.morphologyEx(img_gray, cv2.MORPH_BLACKHAT, kernel)
tophat = cv2.morphologyEx(img_gray, cv2.MORPH_TOPHAT, kernel)

# threshold responses to create binary masks
_, black_mask = cv2.threshold(blackhat, black_threshold, 255, cv2.THRESH_BINARY)
_, top_mask = cv2.threshold(tophat, top_threshold, 255, cv2.THRESH_BINARY)


black_score = np.count_nonzero(black_mask)
top_score = np.count_nonzero(top_mask)

# select the dominant hair color, only choose light mode if it clearly wins
if top_score > 1.2 * black_score:
mode = "light"
response = tophat
mask = top_mask
else:
mode = "dark"
response = blackhat
mask = black_mask

# inpaint detected hair regions
img_out = cv2.inpaint(img_org, mask, radius, cv2.INPAINT_NS)

return mode, response, mask, img_out, coverage, black_score, top_score


def hair_removal(img_bgr):
"""
Complete hair removal pipeline.

- Converts image to grayscale
- Estimates hair coverage
- Applies adaptive parameters based on coverage
- Skips processing when hair presence is negligible
- Returns cleaned image.
"""

# convert to grayscale for morphological processing
img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

# apply adaptive hair removal
mode, response, mask, img_out, coverage, black_score, top_score = \
removeHair_auto(img_bgr, img_gray)

# if skipped, return original image
if mode == "skip":
return img_bgr

return img_out



Loading