{ "cells": [ { "cell_type": "markdown", "id": "f8bdf84d-e0e6-4c70-acdf-ed27d3db8f4c", "metadata": {}, "source": [ "### Sonification: Turning Artwork into Music for Accessibility\n", "\n", "Before running the code, additional modules may need to be installed; you can install these by typing \"pip install [module name]\" into the terminal. (For example, it is likely you will need to type \"pip install pretty_midi\".) " ] }, { "cell_type": "code", "execution_count": 3, "id": "ec819042-4dc7-4d3e-a5c3-4aa69f3349e0", "metadata": {}, "outputs": [], "source": [ "# ==============================\n", "# CELL 1: Import Required Modules\n", "# ==============================\n", "\n", "# Image Processing\n", "from PIL import Image\n", "import numpy as np\n", "import colorsys\n", "import cv2\n", "\n", "# Music / Notation\n", "from music21 import stream, note, chord, metadata, key, tempo, dynamics, instrument, expressions, meter\n", "\n", "# MIDI + Audio Rendering\n", "import pretty_midi\n", "import soundfile as sf\n", "\n", "# Audio Effects\n", "from pedalboard import Pedalboard, Reverb, Chorus\n", "from pedalboard.io import AudioFile\n", "\n", "# Utility\n", "import random\n", "import copy" ] }, { "cell_type": "markdown", "id": "be91f7cf-140c-42ee-881f-e4c1dbf3c975", "metadata": {}, "source": [ "The following cell is the user configuration panel, which is where you can choose the instruments, key, tempo etc. of the piece. Make sure the image file is saved in the same folder as this Python file. \n", "\n", "After you are happy with your choices, you can run all cells and the output files will be produced!" ] }, { "cell_type": "code", "execution_count": 5, "id": "6e181892-e51a-48b6-82f4-227bf46d375f", "metadata": {}, "outputs": [], "source": [ "# ======================================\n", "# CELL 2: USER CONFIGURATION PANEL\n", "# ======================================\n", "\n", "# --- IMAGE ---\n", "IMAGE_PATH = \"scream.jpg\" # Name of file you want an image of\n", "\n", "# --- OUTPUT FILES ---\n", "OUTPUT_XML = \"scream.musicxml\" # Name of musicXML file which can then be opened in Musescore or other reading software\n", "OUTPUT_MIDI = \"screammidi.mid\"\n", "OUTPUT_WAV = \"screamwav.wav\"\n", "OUTPUT_WAV_EFFECTS = \"screameffectswav.wav\"\n", "\n", "# --- PIECE NAME ---\n", "PIECE_NAME = \"Scream\"\n", "\n", "# --- MUSICAL STYLE ---\n", "STYLE = \"cinematic\" # User interface - change to either \"classical\" or \"cinematic\"\n", "\n", "# --- CADENCE SETTINGS ---\n", "USE_SUSPENSION = True\n", "\n", "# --- MUSICAL SETTINGS ---\n", "KEY_SIGNATURE = \"Am\" # User interface - change\n", "TEMPO_BPM = 70 # User interface - change\n", "# time signature included here?\n", "\n", "# --- STRUCTURE ---\n", "NO_OF_SLICES = 150 # User interface - change - affects length of piece\n", "REPEAT_AABA = True # something to do with structure - doesn't currently work\n", "\n", "# --- MELODY SETTINGS ---\n", "BASE_OCTAVE = 4 # User interface - change - octave you're centered around - middle C is 4\n", "REGISTER_SPAN = 2 # User interface - change\n", "\n", "# --- INSTRUMENT SELECTION ---\n", "selected_melody_instrument = instrument.Flute() # List: Violin, Viola, Flute, Oboe, Clarinet, Bassoon, Cello, Harp\n", "selected_counter_instrument = instrument.Oboe()\n", "selected_harmony_instrument = instrument.Piano()\n", "selected_bass_instrument = instrument.Violoncello()\n", "\n", "# --- AUDIO SETTINGS ---\n", "ADD_EFFECTS = True\n", "REVERB_AMOUNT = 0.4\n", "CHORUS_AMOUNT = 0.3" ] }, { "cell_type": "code", "execution_count": 6, "id": "dd59266f-3c5f-4144-b3d4-4bb82032cb6f", "metadata": {}, "outputs": [], "source": [ "# ======================================\n", "# CELL 3: Instrument Ranges + Utilities\n", "# ======================================\n", "\n", "INSTRUMENT_RANGES = {\n", " \"Violin\": (55, 103),\n", " \"Viola\": (48, 88),\n", " \"Violoncello\": (36, 76),\n", " \"Flute\": (60, 96),\n", " \"Oboe\": (58, 91),\n", " \"Clarinet\": (50, 94),\n", " \"Bassoon\": (34, 75),\n", " \"Piano\": (21, 108)\n", "}\n", "\n", "def clamp_to_range(pitch, instrument_obj):\n", " # ensures pitch stays playable\n", " name = instrument_obj.instrumentName\n", " low, high = INSTRUMENT_RANGES.get(name, (21,108)) # retrieving the allowed range\n", "\n", " while pitch.midi < low:\n", " pitch.octave += 1\n", " while pitch.midi > high:\n", " pitch.octave -= 1\n", "\n", " return pitch" ] }, { "cell_type": "code", "execution_count": 7, "id": "097e91e7-eb31-4691-b60c-9d966d1e6449", "metadata": {}, "outputs": [], "source": [ "# ======================================\n", "# CELL 4: IMAGE ANALYSIS: Extract Structured Colour Data\n", "# ======================================\n", "\n", "def analyse_image(image_path):\n", "\n", " img = Image.open(image_path).convert(\"RGB\") # open and convert to RGB, each pixel has three values\n", " width, height = img.size\n", " pixels = np.array(img)\n", "\n", " NUMBER_OF_SLICES = min(NO_OF_SLICES, width)\n", " slice_width = width // NUMBER_OF_SLICES # dividing the image vertically into slices\n", "\n", " rgb_data = []\n", " brightness = []\n", " saturation = []\n", " vertical_positions = []\n", "\n", " total_r = 0\n", " total_g = 0\n", " total_b = 0 # used to determine overall dominant colour\n", "\n", " for i in range(NUMBER_OF_SLICES):\n", "\n", " x_start = i * slice_width\n", " x_end = x_start + slice_width # determines boundaries of slices\n", "\n", " section = pixels[:, x_start:x_end, :] # taking the vertical slice\n", "\n", " avg_color = section.mean(axis=(0,1))\n", " r, g, b = avg_color\n", "\n", " rgb_data.append((r, g, b)) # storing the average colour of each slice\n", "\n", " # Accumulate totals for overall colour detection\n", " total_r += r\n", " total_g += g\n", " total_b += b\n", "\n", " # Brightness\n", " brightness.append(np.mean(avg_color) / 255) # brightness is the average of RGB values, then normalised\n", "\n", " # Saturation\n", " r_n, g_n, b_n = r/255, g/255, b/255 # normalising RGB values\n", " _, sat, _ = colorsys.rgb_to_hsv(r_n, g_n, b_n) # converting to HSV, this extracts colour intensity\n", " saturation.append(sat)\n", "\n", " # Y position for melodic contour\n", " vertical_profile = section.mean(axis=2).mean(axis=1)\n", " y_index = np.argmax(vertical_profile)\n", " vertical_positions.append(y_index)\n", "\n", " return rgb_data, brightness, saturation, vertical_positions, height, total_r, total_g, total_b" ] }, { "cell_type": "code", "execution_count": 8, "id": "16df0197-66d5-4b93-89c6-cb56c68e8a0a", "metadata": {}, "outputs": [], "source": [ "# ======================================\n", "# CELL 5: OVERALL COLOUR + CADENCE LOGIC\n", "# ======================================\n", "\n", "def determine_cadence_from_image(total_r, total_g, total_b):\n", "\n", " # store totals in dictionary for sorting\n", " totals = {\"Red\": total_r, \"Green\": total_g, \"Blue\": total_b}\n", "\n", " # sort colours by dominance\n", " sorted_colours = sorted(totals.items(), key=lambda x: x[1], reverse=True)\n", "\n", " # extracting these rankings\n", " dominant_colour = sorted_colours[0][0]\n", " second_colour = sorted_colours[1][0]\n", " weakest_colour = sorted_colours[2][0]\n", "\n", " # extracting corresponding strength value of each colour\n", " dominant_value = sorted_colours[0][1]\n", " second_value = sorted_colours[1][1]\n", " weakest_value = sorted_colours[2][1]\n", "\n", " # detect interrupted cadence case - 2 strong colours and one significantly weaker\n", " if abs(dominant_value - second_value) < 0.05 * dominant_value:\n", " cadence_type = \"Interrupted (V-VI)\"\n", " cadence_degrees = [5, 6]\n", "\n", " # otherwise, we select the type of cadence based on most dominant colour\n", " else:\n", " if dominant_colour == \"Red\":\n", " cadence_type = \"Perfect (V-I)\"\n", " cadence_degrees = [5, 1]\n", "\n", " elif dominant_colour == \"Blue\":\n", " cadence_type = \"Plagal (IV-I)\"\n", " cadence_degrees = [4, 1]\n", "\n", " else: # Green dominant\n", " cadence_type = \"Imperfect (ends on V)\"\n", " cadence_degrees = [1, 5]\n", "\n", " print(\"Overall dominant colour:\", dominant_colour)\n", " print(\"Cadence selected:\", cadence_type)\n", "\n", " return cadence_type, cadence_degrees" ] }, { "cell_type": "code", "execution_count": 9, "id": "f994380b-9a1e-4657-8fff-2edcd85dd656", "metadata": {}, "outputs": [], "source": [ "# ======================================\n", "# CELL 6: RGB → Tonal Scale Degree\n", "# ======================================\n", "\n", "def rgb_to_degree(r, g, b):\n", "\n", " total = r + g + b + 1e-6 # calculate proportional weight of each colour\n", " r_w = r / total\n", " g_w = g / total\n", " b_w = b / total\n", "\n", " if STYLE == \"classical\":\n", " # Strong tonal hierarchy\n", " if r_w > 0.45:\n", " return 1 # tonic\n", " elif g_w > 0.45:\n", " return 3 # mediant\n", " elif b_w > 0.45:\n", " return 5 # dominant\n", " else:\n", " return random.choice([2,4,6,7])\n", "\n", " elif STYLE == \"cinematic\":\n", " # More modal / open feeling\n", " return random.choice([1,2,3,5,6])" ] }, { "cell_type": "code", "execution_count": 10, "id": "21e6e819-4bbe-4106-ad1d-9f518293b8b9", "metadata": {}, "outputs": [], "source": [ "# ======================================\n", "# CELL 7: Generate Score (with Cadence)\n", "# ======================================\n", "\n", "def generate_score():\n", "\n", " # --- Analyse Image --- #\n", " (rgb_data, brightness, saturation, vertical_positions, height, total_r, total_g, total_b) = analyse_image(IMAGE_PATH)\n", "\n", " cadence_type, cadence_degrees = determine_cadence_from_image(total_r, total_g, total_b)\n", "\n", " # --- Initialise score and Metadata --- #\n", "\n", " score = stream.Score()\n", " score.insert(0, metadata.Metadata())\n", " score.insert(0, tempo.MetronomeMark(number=TEMPO_BPM))\n", " score.insert(0, key.Key(KEY_SIGNATURE))\n", " score.insert(0, meter.TimeSignature(\"4/4\"))\n", "\n", " tonal_key = key.Key(KEY_SIGNATURE)\n", " scale = tonal_key.getScale() # generating the scale for the chosen key\n", " scale_pitches = [scale.pitchFromDegree(d) for d in range(1, 8)] # stores the pitches of the scale\n", "\n", " score.metadata.title = PIECE_NAME\n", "\n", " # ---------------------------\n", " # PARTS SETUP\n", " # ---------------------------\n", " melody = stream.Part()\n", " melody.insert(0, selected_melody_instrument)\n", "\n", " counter = stream.Part()\n", " counter.insert(0, selected_counter_instrument)\n", "\n", " harmony = stream.Part()\n", " harmony.insert(0, selected_harmony_instrument)\n", "\n", " bass = stream.Part()\n", " bass.insert(0, selected_bass_instrument)\n", "\n", "\n", " # ---------------------------\n", " # RHYTHM ENGINE\n", " # ---------------------------\n", " rhythm_patterns = [\n", " [1, 0.5, 0.5],\n", " [0.75, 0.25, 0.5],\n", " [0.5, 0.5, 0.5, 0.5],\n", " [1.5, 0.5],\n", " [0.25, 0.25, 0.5, 1],\n", " [0.5, 1, 0.5],\n", " [0.25, 0.75, 0.5]\n", " ]\n", "\n", " current_pattern = random.choice(rhythm_patterns)\n", " pattern_index = 0\n", " elapsed_time = 0\n", " current_dynamic = None\n", " current_bar = 1 # tracks harmonic bars\n", "\n", " # harmony_interval = 4\n", " # next_harmony_time = harmony_interval\n", "\n", " # ---------------------------\n", " # MAIN COMPOSITION LOOP\n", " # ---------------------------\n", " for i in range(NO_OF_SLICES):\n", "\n", " r, g, b = rgb_data[i] # average colour of the slice\n", "\n", " # pitch derived from RGB proportions\n", " degree = rgb_to_degree(r, g, b) # converts colour to degree of scale\n", " pitch = scale.pitchFromDegree(degree) # converts degree to pitch\n", "\n", " # melodic contour from vertical positions\n", " octave_shift = int((1 - vertical_positions[i] / height) * REGISTER_SPAN)\n", " pitch.octave = BASE_OCTAVE + octave_shift\n", " pitch = clamp_to_range(pitch, selected_melody_instrument) # ensures notes are within instrument range\n", "\n", " # ---- Rhythm selection ----\n", " duration = current_pattern[pattern_index]\n", " pattern_index += 1\n", " if pattern_index >= len(current_pattern):\n", " current_pattern = random.choice(rhythm_patterns)\n", " pattern_index = 0\n", "\n", " # ---- Melody ----\n", " if random.random() < 0.12:\n", " m_obj = note.Rest(quarterLength=duration)\n", " else:\n", " m_obj = note.Note(pitch, quarterLength=duration)\n", " melody.append(m_obj)\n", "\n", " # ---- Countermelody (3rd below) ----\n", " counter_degree = (degree - 2 - 1) % 7 + 1\n", " counter_pitch = scale.pitchFromDegree(counter_degree)\n", " counter_pitch.octave = pitch.octave\n", " if counter_pitch.midi >= pitch.midi:\n", " counter_pitch.octave -= 1\n", " counter_note = note.Note(counter_pitch, quarterLength=duration)\n", " counter.append(counter_note)\n", "\n", " # ---- Dynamics ----\n", " if brightness[i] < 0.3:\n", " dyn_mark = \"p\"\n", " elif brightness[i] < 0.5:\n", " dyn_mark = \"mp\"\n", " elif brightness[i] < 0.7:\n", " dyn_mark = \"mf\"\n", " else:\n", " dyn_mark = \"f\"\n", "\n", " if dyn_mark != current_dynamic:\n", " melody.insert(melody.highestTime, dynamics.Dynamic(dyn_mark))\n", " counter.insert(counter.highestTime, dynamics.Dynamic(dyn_mark))\n", " harmony.insert(harmony.highestTime, dynamics.Dynamic(dyn_mark))\n", " bass.insert(bass.highestTime, dynamics.Dynamic(dyn_mark))\n", " current_dynamic = dyn_mark\n", "\n", " elapsed_time += duration\n", "\n", " # ---------------------------\n", " # HARMONY\n", " # ---------------------------\n", "\n", " if elapsed_time >= current_bar * 4:\n", "\n", " sat = saturation[i]\n", "\n", " if sat < 0.3:\n", " chord_degrees = [degree, (degree+2-1)%7+1, (degree+4-1)%7+1]\n", "\n", " elif sat < 0.6:\n", " chord_degrees = [degree, (degree+2-1)%7+1, (degree+4-1)%7+1, (degree+6-1)%7+1]\n", "\n", " else:\n", " chord_degrees = [degree, (degree+2-1)%7+1, (degree+4-1)%7+1, (degree+6-1)%7+1]\n", "\n", " chord_pitches = [scale.pitchFromDegree(d) for d in chord_degrees]\n", "\n", "\n", " # First chord: dotted minim (3 beats)\n", " harm1 = chord.Chord(chord_pitches)\n", " harm1.quarterLength = 3\n", " harmony.append(harm1)\n", "\n", " # Second chord: same harmony, crotchet (1 beat)\n", " harm2 = chord.Chord(chord_pitches)\n", " harm2.quarterLength = 1\n", " harmony.append(harm2)\n", "\n", " # Bass mirrors harmony root\n", " root_pitch = scale.pitchFromDegree(degree)\n", " root_pitch.octave = 2\n", " bass1 = note.Note(root_pitch, quarterLength=3)\n", " bass2 = note.Note(root_pitch, quarterLength=1)\n", " bass.append(bass1)\n", " bass.append(bass2)\n", "\n", " current_bar += 1 # advance harmonic bar\n", "\n", "\n", "\n", " # ---------------------------------\n", " # ALIGN BEFORE CADENCE\n", " # ---------------------------------\n", " parts = [melody, counter, harmony, bass]\n", " max_time = max(p.highestTime for p in parts)\n", "\n", " # Force cadence to start at next full bar\n", " remainder = max_time % 4\n", " if remainder != 0:\n", " padding = 4 - remainder\n", " else:\n", " padding = 0\n", "\n", " for p in parts:\n", " if p.highestTime < max_time:\n", " p.append(note.Rest(quarterLength=max_time - p.highestTime))\n", " if padding > 0:\n", " p.append(note.Rest(quarterLength=padding))\n", " # ---------------------------------\n", " # FINAL CADENCE\n", " # ---------------------------------\n", "\n", " base_tempo = TEMPO_BPM\n", " cadence_start = max(p.highestTime for p in [melody, counter, harmony, bass])\n", " melody.insert(cadence_start, expressions.TextExpression(\"rit.\"))\n", "\n", " # ---- Melody anticipation ----\n", " anticipation = note.Note(scale_pitches[4]) # Dominant\n", " anticipation.quarterLength = 4\n", " melody.append(anticipation)\n", "\n", " final_melody = note.Note(scale_pitches[4]) # Tonic\n", " final_melody.quarterLength = 4\n", " final_melody.expressions.append(expressions.Fermata())\n", " melody.append(final_melody)\n", "\n", " # ---- Countermelody resolution ----\n", "\n", " final_counter = note.Note(scale_pitches[3])\n", " final_counter.quarterLength = 4\n", " counter.append(final_counter)\n", "\n", "\n", " step1 = note.Note(scale_pitches[3])\n", " step1.quarterLength = 1\n", " step2 = note.Note(scale_pitches[2])\n", " step2.quarterLength = 1\n", " step3 = note.Note(scale_pitches[1])\n", " step3.quarterLength = 1\n", " resolution = note.Note(scale_pitches[2])\n", " resolution.quarterLength = 1\n", " resolution.expressions.append(expressions.Fermata())\n", "\n", " counter.append(step1)\n", " counter.append(step2)\n", " counter.append(step3)\n", " counter.append(resolution)\n", "\n", " current_time = cadence_start\n", "\n", " for i, d in enumerate(cadence_degrees):\n", "\n", " # Gradual tempo reduction\n", " rit_value = base_tempo * (0.9 ** (i + 1))\n", " score.insert(current_time, tempo.MetronomeMark(number=rit_value))\n", "\n", " root = scale.pitchFromDegree(d)\n", " final_chord = chord.Chord([\n", " root,\n", " scale.pitchFromDegree((d+2-1)%7+1),\n", " scale.pitchFromDegree((d+4-1)%7+1)\n", " ])\n", "\n", " final_chord.quarterLength = 4\n", " if d == 1:\n", " final_chord.expressions.append(expressions.Fermata())\n", " harmony.append(final_chord)\n", "\n", "\n", " bass_note = note.Note(root)\n", " bass_note.octave = 2\n", " bass_note.quarterLength = 4\n", " if d == 1:\n", " bass_note.expressions.append(expressions.Fermata())\n", " bass.append(bass_note)\n", "\n", "\n", " # ---------------------------\n", " # FINAL ALIGNMENT CHECK\n", " # ---------------------------\n", "\n", " parts = [melody, counter, harmony, bass]\n", " max_time = max(p.highestTime for p in parts)\n", "\n", " for p in parts:\n", " remaining = max_time - p.highestTime\n", " if remaining > 0:\n", " p.append(note.Rest(quarterLength=remaining))\n", "\n", " # ---------------------------\n", " # ADD PARTS TO SCORE\n", " # ---------------------------\n", " score.append(melody)\n", " score.append(counter)\n", " score.append(harmony)\n", " score.append(bass)\n", "\n", " return score" ] }, { "cell_type": "code", "execution_count": 11, "id": "0d6c1e8f-5aa1-40fc-ad0e-8ce7ac3941a6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overall dominant colour: Red\n", "Cadence selected: Perfect (V-I)\n", "MusicXML and MIDI successfully created.\n" ] } ], "source": [ "# ======================================\n", "# CELL 8: Export Symbolic Files\n", "# ======================================\n", "\n", "score = generate_score()\n", "\n", "score.write(\"musicxml\", OUTPUT_XML)\n", "score.write(\"midi\", OUTPUT_MIDI)\n", "\n", "print(\"MusicXML and MIDI successfully created.\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "dde21125-c6be-4d13-8835-37a5ca6fc047", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Audio rendering complete.\n" ] } ], "source": [ "# ======================================\n", "# CELL 9: Convert MIDI → WAV\n", "# ======================================\n", "\n", "# load MIDI\n", "midi_data = pretty_midi.PrettyMIDI(OUTPUT_MIDI)\n", "\n", "# assign proper General MIDI programs\n", "for inst in midi_data.instruments:\n", " try:\n", " inst.program = pretty_midi.instrument_name_to_program(inst.name)\n", " except:\n", " inst.program = 0\n", "\n", "audio = midi_data.synthesize(fs=44100, wave=np.sin)\n", "\n", "Audio = audio / np.max(np.abs(audio)) #normalise\n", "\n", "sf.write(OUTPUT_WAV, Audio, 44100)\n", "\n", "print(\"Audio rendering complete.\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "ada26330-fabc-4bca-8add-cbe53653da58", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Effects applied.\n" ] } ], "source": [ "# ======================================\n", "# CELL 10: Add Reverb / Chorus\n", "# ======================================\n", "\n", "if ADD_EFFECTS:\n", "\n", " board = Pedalboard([\n", " Chorus(depth=CHORUS_AMOUNT),\n", " Reverb(room_size=REVERB_AMOUNT)\n", " ])\n", "\n", " with AudioFile(OUTPUT_WAV) as f:\n", " audio = f.read(f.frames)\n", " effected = board(audio, f.samplerate)\n", "\n", " with AudioFile(OUTPUT_WAV_EFFECTS, 'w', f.samplerate, effected.shape[0]) as f:\n", " f.write(effected)\n", "\n", " print(\"Effects applied.\")" ] }, { "cell_type": "markdown", "id": "ffad5c9f-1ad7-4cc0-a562-4abd4e744644", "metadata": {}, "source": [ "There will be 4 output files in the same folder the image file was saved in. The two .wav files can be downloaded to listen to as audio files, and the .xml file can be opened in MuseScore or similar compositional software to view the score and individual parts. " ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:base] *", "language": "python", "name": "conda-base-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }