-
Notifications
You must be signed in to change notification settings - Fork 0
/
TPF_ModeloCNN_CargaImágenes y RtaVoz
1 lines (1 loc) · 172 KB
/
TPF_ModeloCNN_CargaImágenes y RtaVoz
1
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7,"status":"ok","timestamp":1717957537201,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"rVomi3VaoeJy","outputId":"71a4794a-8ad4-4a71-8345-5ace6910c764"},"outputs":[{"output_type":"stream","name":"stdout","text":["Failed to load (likely expired) https://storage.googleapis.com/kaggle-data-sets/111880/269359/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240602%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240602T202631Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=4f332d0fb3ca065027acca0dfe4b89ce077bf569d7c62d4d61c94aa9c74d50cf95cd3ed9c4c39b3c439852afdfcd0113f67f9db54f9ce449dddbd33648272bee38c3425f043c86dc547cbff27b6e284abcf1825dba293ef27b10807ed74c3cf727a0a7ce4507fc73a97e756d9ba7d8c0241eeaa59e629ec3c13a2348c7d75d6527432a4b3f4f83449b33814565207bade8b4960ee4a6b2a0863570a5ee38e73171ee2b0f51753f3100fddd56c92f0a8d3e437ca7899bff73501a5eb09a940791a528bdc225b53d6efda312da31823dc9bbc51d88fc3fb0d5799b2ea1bba59d5fb4e39431e82f1b9d8e818be30e4866b8e32e29c8d5eb8c79cb90e7997dad0747 to path /kaggle/input/intel-image-classification\n","Data source import complete.\n"]}],"source":["\n","# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES\n","# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,\n","# THEN FEEL FREE TO DELETE THIS CELL.\n","# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON\n","# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR\n","# NOTEBOOK.\n","\n","import os\n","import sys\n","from tempfile import NamedTemporaryFile\n","from urllib.request import urlopen\n","from urllib.parse import unquote, urlparse\n","from urllib.error import HTTPError\n","from zipfile import ZipFile\n","import tarfile\n","import shutil\n","\n","CHUNK_SIZE = 40960\n","DATA_SOURCE_MAPPING = 'intel-image-classification:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F111880%2F269359%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240602%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240602T202631Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D4f332d0fb3ca065027acca0dfe4b89ce077bf569d7c62d4d61c94aa9c74d50cf95cd3ed9c4c39b3c439852afdfcd0113f67f9db54f9ce449dddbd33648272bee38c3425f043c86dc547cbff27b6e284abcf1825dba293ef27b10807ed74c3cf727a0a7ce4507fc73a97e756d9ba7d8c0241eeaa59e629ec3c13a2348c7d75d6527432a4b3f4f83449b33814565207bade8b4960ee4a6b2a0863570a5ee38e73171ee2b0f51753f3100fddd56c92f0a8d3e437ca7899bff73501a5eb09a940791a528bdc225b53d6efda312da31823dc9bbc51d88fc3fb0d5799b2ea1bba59d5fb4e39431e82f1b9d8e818be30e4866b8e32e29c8d5eb8c79cb90e7997dad0747'\n","\n","KAGGLE_INPUT_PATH='/kaggle/input'\n","KAGGLE_WORKING_PATH='/kaggle/working'\n","KAGGLE_SYMLINK='kaggle'\n","\n","!umount /kaggle/input/ 2> /dev/null\n","shutil.rmtree('/kaggle/input', ignore_errors=True)\n","os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)\n","os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)\n","\n","try:\n"," os.symlink(KAGGLE_INPUT_PATH, os.path.join(\"..\", 'input'), target_is_directory=True)\n","except FileExistsError:\n"," pass\n","try:\n"," os.symlink(KAGGLE_WORKING_PATH, os.path.join(\"..\", 'working'), target_is_directory=True)\n","except FileExistsError:\n"," pass\n","\n","for data_source_mapping in DATA_SOURCE_MAPPING.split(','):\n"," directory, download_url_encoded = data_source_mapping.split(':')\n"," download_url = unquote(download_url_encoded)\n"," filename = urlparse(download_url).path\n"," destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)\n"," try:\n"," with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:\n"," total_length = fileres.headers['content-length']\n"," print(f'Downloading {directory}, {total_length} bytes compressed')\n"," dl = 0\n"," data = fileres.read(CHUNK_SIZE)\n"," while len(data) > 0:\n"," dl += len(data)\n"," tfile.write(data)\n"," done = int(50 * dl / int(total_length))\n"," sys.stdout.write(f\"\\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded\")\n"," sys.stdout.flush()\n"," data = fileres.read(CHUNK_SIZE)\n"," if filename.endswith('.zip'):\n"," with ZipFile(tfile) as zfile:\n"," zfile.extractall(destination_path)\n"," else:\n"," with tarfile.open(tfile.name) as tarfile:\n"," tarfile.extractall(destination_path)\n"," print(f'\\nDownloaded and uncompressed: {directory}')\n"," except HTTPError as e:\n"," print(f'Failed to load (likely expired) {download_url} to path {destination_path}')\n"," continue\n"," except OSError as e:\n"," print(f'Failed to load {download_url} to path {destination_path}')\n"," continue\n","\n","print('Data source import complete.')\n"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":28440,"status":"ok","timestamp":1717957571683,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"Te-u8k8MUyv0","outputId":"02fc7d0b-386a-407c-ce34-2e9a62aca312"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"markdown","metadata":{"id":"L248uxhQqjtW"},"source":["# **INSTITUTO SUPERIOR POLITÉCNICO CÓRDOBA**\n","\n","# **Módulo Científico de Datos**\n","### **TPF: Clasificación de Imágenes utilizando Redes Neuronales Convulcionales**\n","\n"," ### **Grupo 18:**\n"," Viviana Farabollini\n"," Mariano Ledezma\n"," Natalia Lamia\n","\n"]},{"cell_type":"markdown","metadata":{"id":"MU_bxoMHqsNq"},"source":["\n","Para realizar el presente trabajo utilizamos el dataset proporcionado por \"Kaggle Intel Image Classification\" del siguiente link: https://www.kaggle.com/puneet6060/intel-image-classification/data\n","\n","\n","El mismo posee 3 carpetas las cuales contienen:\n","\n","1. Carpeta de entrenamiento:\n"," + 6 carpetas con las imagenes para entrenar.\n","2. Carpeta de test:\n"," + 6 carpetas con las imagenes para testear.\n","3. Carpeta de predicción:\n"," + Imagenes para que el modelo prediga su clasificación.\n"," \n","## **Objetivo:**\n","\n","* Desarrollar y entrenar un modelo de Red Neuronal Convolucional (CNN) para clasificar y predecir imágenes organizadas en una estructura grid, con el objetivo de facilitar la futura construcción de una aplicación para una agencia de turismo.\n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"gSdYmlmlQq7G"},"source":["#**Importación de librerías**"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"tWs2ya5kVFAn","executionInfo":{"status":"ok","timestamp":1717957582448,"user_tz":180,"elapsed":4229,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"}}},"outputs":[],"source":["# Procedemos a importar las librerias necesarias para desarrollar el proyecto.\n","\n","import numpy as np\n","import os\n","from tensorflow.keras import Sequential\n","from sklearn.metrics import confusion_matrix\n","from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Dense, Flatten, Reshape, Conv2DTranspose, UpSampling2D, Input,MaxPooling2D\n","from tensorflow.keras import Model\n","import seaborn as sn\n","from tensorflow.keras.optimizers import SGD\n","import warnings\n","warnings.filterwarnings(\"ignore\")\n","\n","from keras.preprocessing.image import ImageDataGenerator\n","import matplotlib.pyplot as plt"]},{"cell_type":"markdown","metadata":{"id":"XeaCihe3Q1KF"},"source":["# **Carpeta contenedora del DATASET**"]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":671,"status":"ok","timestamp":1717957591431,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"aVnIFHgRVyJ2","outputId":"2e4f58f7-83bd-4de9-a69e-09ea332b9984"},"outputs":[{"output_type":"stream","name":"stdout","text":["[Errno 2] No such file or directory: '/kaggle/input/intel-image-classification'\n","/content\n"]}],"source":["# Nos desplazamos por el Drive hasta la carpeta contenedora del DataSet.\n","%cd '/kaggle/input/intel-image-classification'"]},{"cell_type":"markdown","metadata":{"id":"TXfsE3XXRDWF"},"source":["# **Definición de Rutas**"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"GgHxhvsxWEtR"},"outputs":[],"source":["# Definimos las rutas donde se alojan los datos.\n","train = 'seg_train/seg_train/'\n","validation = 'seg_test/seg_test/'\n","test = 'seg_pred/'"]},{"cell_type":"markdown","metadata":{"id":"gcUr8MIrczT_"},"source":["# **Definición del Generador de Datos**"]},{"cell_type":"markdown","metadata":{"id":"417qjXAqRLpV"},"source":["## **Preparación de imágenes para entrenamiento, validación y prueba**"]},{"cell_type":"markdown","metadata":{"id":"uCeE53YSRlyb"},"source":["Con la importación de la clase 'ImageDataGenerator' de Keras, se generan lotes de datos de imágenes con aumentos y transformaciones en tiempo real.\n","\n"," **Creación de *instancias* de ImageDataGenerator:**\n","\n"," Se crean tres instancias para los conjuntos de datos: *entrenamiento, validación y prueba*. El argumento rescale=1./255 normaliza los valores de los píxeles de las imágenes al rango [0, 1] (originalmente los valores de los píxeles están en el rango [0, 255]).\n","\n","\n","**Configuración del generador de datos de entrenamiento:**\n","\n","*train:* Directorio donde se encuentran las imágenes de entrenamiento.\n","\n","*batch_size=32*: Las imágenes se procesan en lotes de 32.\n","\n","*target_size=(150, 150):* Las imágenes se redimensionan a 150x150 píxeles.\n","\n","*class_mode='categorical'*: Se asume que las etiquetas de las imágenes son categóricas (más de dos clases).\n","\n","\n","**Configuración del generador de datos de validación:**\n","\n","*validation:* Directorio donde se encuentran las imágenes de validación.\n","\n","*batch_size=32:* Las imágenes se procesan en lotes de 32.\n","\n","*shuffle=False:* Las imágenes no se mezclan, lo cual es útil para mantener un orden predecible durante la evaluación.\n","\n","*target_size=(150, 150):* Las imágenes se redimensionan a 150x150 píxeles.\n","\n","*class_mode='categorical':* Se asume que las etiquetas de las imágenes son categóricas.\n","\n","**Configuración del generador de datos de prueba:**\n","\n","*test:* Directorio donde se encuentran las imágenes de prueba.\n","\n","*target_size=(150,150):* Las imágenes se redimensionan a 150x150 píxeles.\n","\n","*batch_size=1:* Las imágenes se procesan una a una, lo cual puede ser útil para la evaluación precisa de cada imagen de prueba.\n","\n","Así, en el siguiente script se prepara los datos de imágenes para el modelo de red neuronal, asegurando que todas las imágenes tengan el mismo tamaño y estén normalizadas, lo que es crucial para un entrenamiento eficaz del modelo.\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1186,"status":"ok","timestamp":1717467104186,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"MUODYc1uYrTB","outputId":"6ef4a1b5-5b1d-4510-b49d-159f06f9c17f"},"outputs":[{"name":"stdout","output_type":"stream","text":["Found 14034 images belonging to 6 classes.\n","Found 3000 images belonging to 6 classes.\n","Found 7301 images belonging to 1 classes.\n"]}],"source":["# Utilizamos la libreria proporcionada por Keras para leer las imagenes del DataSet.\n","\n","train_datagen = ImageDataGenerator(rescale = 1./255)\n","\n","validation_datagen = ImageDataGenerator(rescale = 1./255)\n","\n","train_generator = train_datagen.flow_from_directory(train,\n"," batch_size=32,\n"," target_size = (150, 150),\n"," class_mode='categorical')\n","\n","validation_generator = validation_datagen.flow_from_directory(validation,\n"," batch_size=32,\n"," shuffle=False,\n"," target_size = (150, 150),\n"," class_mode='categorical')\n","\n","\n","test_datagen = ImageDataGenerator(rescale = 1./255)\n","test_generator = test_datagen.flow_from_directory(test, target_size = (150, 150), batch_size = 1)"]},{"cell_type":"markdown","metadata":{"id":"dEHUm8XUWiZK"},"source":["## **Clases**"]},{"cell_type":"markdown","metadata":{"id":"_rtzVIF9XEFS"},"source":["**Nombres de las clases**\n","\n","*train_generator.class_indices:* Esta propiedad del objeto train_generator devuelve un diccionario que asigna nombres de clases a sus respectivos índices. Por ejemplo, {'buildings': 0, 'forest': 1 ......etc.}.\n","\n","Se imprime el diccionario de nombres de clases y sus índices, lo cual es útil para verificar que las clases han sido correctamente identificadas y asignadas.\n","\n","*list(class_names.keys()):* convierte las claves del diccionario class_names (que son los nombres de las clases) en una lista.\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":338,"status":"ok","timestamp":1717467112897,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"5y-i6yRKZOJv","outputId":"f8c5f824-49cf-421a-91ff-892c94b3281e"},"outputs":[{"name":"stdout","output_type":"stream","text":["{'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4, 'street': 5}\n"]}],"source":["# Obtenemos los nombres de las clases.\n","class_names = train_generator.class_indices\n","print(class_names)\n","label=list(class_names.keys())"]},{"cell_type":"markdown","metadata":{"id":"j6vlAxh4dK5I"},"source":["# **Visualización de los Datos**"]},{"cell_type":"markdown","metadata":{"id":"MxPDN8ElYFIO"},"source":["## **Iteración sobre el generador de datos de entrenamiento**"]},{"cell_type":"markdown","metadata":{"id":"RDdSt1jVZQy-"},"source":["Utilizando las bibliotecas *matplotlib.pyplot* y *numpy*\n","\n","Se un bucle que itera sobre los lotes de datos generados por *train_generator*. En cada iteración, *img_batch* contiene un lote de imágenes y *label_batch* contiene el correspondiente lote de etiquetas.\n","\n","Se imprime la forma del lote de imágenes. Por ejemplo, si *batch_size=32* y las imágenes se redimensionan a 150x150 píxeles con 3 canales de color (RGB), la forma será (32, 150, 150, 3).\n","\n","Se crea una nueva figura de Matplotlib con un tamaño especificado de 20x10 pulgadas, que será utilizada para mostrar las imágenes del lote.\n","\n","Se construye un bucle que itera sobre las 32 imágenes en el lote.\n","\n","*sub = plt.subplot(4, 8, ix + 1):* Crea un subgráfico en la figura con una cuadrícula de 4 filas y 8 columnas. ix + 1 indica la posición del subgráfico.\n","\n","*plt.imshow(img_batch[ix]):* Muestra la imagen ix del lote.\n","\n","*plt.xticks([])* y *plt.yticks([]):* Elimina las marcas de los ejes x e y para que la visualización sea más clara.\n","\n","*plt.xlabel(label[np.argmax(label_batch[ix])]):* Muestra el nombre de la clase como la etiqueta del eje x. *np.argmax(label_batch[ix])* obtiene el índice de la clase con la mayor probabilidad para la imagen ix, y label se usa para obtener el nombre de la clase correspondiente.\n","\n","De este modo, se puede visualizar un lote de imágenes junto con sus etiquetas, asegurando que el generador de datos funciona correctamente y que las imágenes están etiquetadas de manera adecuada; y, se puede verificar visualmente que las imágenes han sido correctamente preprocesadas (redimensionadas y normalizadas).\n","\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":629,"output_embedded_package_id":"1k8FB-nffu1I1fd9TH9NDOM7I4mmmKOCU"},"executionInfo":{"elapsed":5239,"status":"ok","timestamp":1717467125486,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"sOhcAoWrZTi_","outputId":"48e3c631-cad6-4156-e2f7-c9056015bd02"},"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in https://colab.research.google.com to view."},"metadata":{}}],"source":["# Imprimimos algunas imagenes que se utilizarán en el entrenamiento con su respectivo rótuolo.\n","for img_batch,label_batch in train_generator:\n"," print(img_batch.shape)\n"," plt.figure(figsize=(20,10))\n"," for ix in range(32):\n"," sub = plt.subplot(4, 8, ix + 1)\n"," plt.imshow(img_batch[ix])\n"," plt.xticks([])\n"," plt.yticks([])\n"," plt.xlabel(label[np.argmax(label_batch[ix])])\n"," break"]},{"cell_type":"markdown","metadata":{"id":"IkrM3fnpcQR4"},"source":["# **Creación del Modelo**"]},{"cell_type":"markdown","metadata":{"id":"irLaiVhoepSV"},"source":["## **Definición de la función *build_model***\n","\n","Esta función acepta un parámetro input_shape con un valor predeterminado de (150, 150, 3), que representa el tamaño de entrada de las imágenes (150x150 píxeles y 3 canales de color RGB).\n","\n","## **Inicialización del Modelo Secuencial**\n","\n","Se crea una instancia del modelo secuencial de Keras, lo que permite agregar capas de la red neuronal de manera lineal.\n","\n","* *Primer capa convulcional:* Agrega una capa convolucional con 64 filtros, un tamaño de kernel de 3x3, un paso (stride) de 2x2, relleno (padding) 'same' para conservar el tamaño de la entrada, y la función de activación ReLU. Esta capa también especifica el tamaño de entrada input_shape.\n","\n","* *Segunda capa convulcional:* Agrega una segunda capa convolucional con 128 filtros y las mismas configuraciones de kernel, stride, padding y activación que la primera capa, excepto el input_shape.\n","\n","* *Capa de Max Pooling:* Agrega una capa de max pooling con tamaño de pool predeterminado (2x2), que reduce la dimensión espacial de la salida de la capa convolucional anterior.\n","\n","* *Tercer capa convulcional:* Agrega una tercera capa convolucional con 256 filtros y las mismas configuraciones de kernel, stride, padding y activación.\n","\n","* *Cuarta capa convulsional:* Agrega una cuarta capa convolucional con 512 filtros y las mismas configuraciones de kernel, stride, padding y activación.\n","\n","* *Otra capa de Max Pooling:* Agrega otra capa de max pooling para reducir nuevamente la dimensión espacial de la salida.\n","\n","* *Aplanar la salida:* Aplana la salida tridimensional de la última capa convolucional en un vector unidimensional para poder conectarla a las capas completamente conectadas (densas).\n","\n","* *Primer capa densa:* Agrega una capa de dropout con una tasa del 20% para prevenir el sobreajuste, y una capa densa con 1048 neuronas y la función de activación ReLU.\n","\n","* *Segunda capa densa:* Agrega otra capa de dropout con una tasa del 20%, y una capa densa con tantas neuronas como clases en class_names, utilizando la función de activación softmax para obtener probabilidades de pertenencia a cada clase.\n","\n","* *Compilación del modelo:* usando el optimizador Adam, la función de pérdida de entropía cruzada categórica (adecuada para clasificación multiclase) y la métrica de precisión para evaluar el rendimiento del modelo.\n","\n","* *Devolución del modelo construido y compilado*\n","\n","En este script se define una función para construir y compilar una red neuronal convolucional (CNN) con varias capas convolucionales, de pooling, aplanamiento, dropout y densas. El modelo está diseñado para aceptar imágenes de tamaño 150x150 con 3 canales de color y clasificar imágenes en tantas categorías como clases haya en class_names.\n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"GaFfKDsNZoLf"},"outputs":[],"source":["# Definimos el método que creará la arquitectura de la red neuronal.\n","def build_model(input_shape = (150, 150, 3)):\n"," Model = Sequential()\n"," Model.add(Conv2D(64, kernel_size = (3, 3), strides = (2, 2), padding = 'same', activation = 'relu', input_shape = input_shape))\n"," Model.add(Conv2D(128, kernel_size = (3, 3), strides = (2, 2), padding = 'same', activation = 'relu'))\n"," Model.add(MaxPool2D())\n"," Model.add(Conv2D(256, kernel_size = (3, 3), strides = (2, 2), padding = 'same', activation = 'relu'))\n"," Model.add(Conv2D(512, kernel_size = (3, 3), strides = (2, 2), padding = 'same', activation = 'relu'))\n"," Model.add(MaxPool2D())\n"," Model.add(Flatten())\n"," Model.add(Dropout(0.2))\n"," Model.add(Dense(1048, activation = 'relu'))\n"," Model.add(Dropout(0.2))\n"," Model.add(Dense(len(class_names), activation = 'softmax'))\n","\n"," Model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])\n","\n"," return Model"]},{"cell_type":"markdown","metadata":{"id":"FYH6IGk2ddrr"},"source":["# **Entrenamiento del Modelo**"]},{"cell_type":"markdown","metadata":{"id":"Nbngzh34iVOi"},"source":["## **Creación de la red neuronal para ser entrenada**\n","\n","Se llama a la función *build_model*, y ésta devuelve un modelo de red neuronal convolucional (CNN) ya compilado con la arquitectura definida anteriormente.\n","\n","*model.summary():* imprime un resumen detallado de la arquitectura del modelo en la consola y proporciona información sobre cada capa del modelo, incluyendo el tipo de capa, la forma de la salida de cada capa, y el número de parámetros que se pueden entrenar (weights).\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":642,"status":"ok","timestamp":1717467140744,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"AV0bdlogZysZ","outputId":"f343b06f-072d-4d60-fd92-c1972d5e088e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Model: \"sequential\"\n","_________________________________________________________________\n"," Layer (type) Output Shape Param # \n","=================================================================\n"," conv2d (Conv2D) (None, 75, 75, 64) 1792 \n"," \n"," conv2d_1 (Conv2D) (None, 38, 38, 128) 73856 \n"," \n"," max_pooling2d (MaxPooling2 (None, 19, 19, 128) 0 \n"," D) \n"," \n"," conv2d_2 (Conv2D) (None, 10, 10, 256) 295168 \n"," \n"," conv2d_3 (Conv2D) (None, 5, 5, 512) 1180160 \n"," \n"," max_pooling2d_1 (MaxPoolin (None, 2, 2, 512) 0 \n"," g2D) \n"," \n"," flatten (Flatten) (None, 2048) 0 \n"," \n"," dropout (Dropout) (None, 2048) 0 \n"," \n"," dense (Dense) (None, 1048) 2147352 \n"," \n"," dropout_1 (Dropout) (None, 1048) 0 \n"," \n"," dense_1 (Dense) (None, 6) 6294 \n"," \n","=================================================================\n","Total params: 3704622 (14.13 MB)\n","Trainable params: 3704622 (14.13 MB)\n","Non-trainable params: 0 (0.00 Byte)\n","_________________________________________________________________\n"]}],"source":["# Creamos la red neuronal para que se entrene utilizando el TPU.\n","model = build_model()\n","model.summary()"]},{"cell_type":"markdown","metadata":{"id":"6t87SrSZkiYv"},"source":["# **Entrenamiento del modelo**"]},{"cell_type":"markdown","metadata":{"id":"OboCYhFtkmMa"},"source":["Se entrena el modelo CNN utilizando el generador de datos de entrenamiento *train_generator* durante 30 epochs (iterations over the entire dataset).\n","\n","*shuffle=True* asegura que los datos se mezclen aleatoriamente en cada epoch, lo que ayuda a generalizar mejor y evita que el modelo aprenda patrones específicos del orden de los datos.\n","\n","*validation_data = validation_generator* especifica el generador de datos de validación que se utilizará para evaluar el modelo después de cada epoch de entrenamiento. Esto permite monitorear la performance del modelo en datos no vistos durante el entrenamiento.\n","\n","*hist* almacena el historial del entrenamiento, que incluye información sobre la pérdida y la precisión del modelo en cada epoch tanto para los datos de entrenamiento como de validación. Esto puede ser útil para la visualización y el análisis posterior del rendimiento del modelo."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/"},"id":"IWTt_VfnZ55l","outputId":"2b66b542-5275-4a5a-d7a4-a789c2381697"},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1/30\n","439/439 [==============================] - 486s 1s/step - loss: 0.9954 - accuracy: 0.5950 - val_loss: 0.7793 - val_accuracy: 0.6823\n","Epoch 2/30\n","439/439 [==============================] - 495s 1s/step - loss: 0.6855 - accuracy: 0.7480 - val_loss: 0.6350 - val_accuracy: 0.7583\n","Epoch 3/30\n","439/439 [==============================] - 477s 1s/step - loss: 0.5333 - accuracy: 0.8090 - val_loss: 0.5215 - val_accuracy: 0.8080\n","Epoch 4/30\n","439/439 [==============================] - 482s 1s/step - loss: 0.4447 - accuracy: 0.8427 - val_loss: 0.4878 - val_accuracy: 0.8240\n","Epoch 5/30\n","439/439 [==============================] - 472s 1s/step - loss: 0.3552 - accuracy: 0.8744 - val_loss: 0.4725 - val_accuracy: 0.8320\n","Epoch 6/30\n","439/439 [==============================] - 473s 1s/step - loss: 0.2900 - accuracy: 0.8944 - val_loss: 0.4861 - val_accuracy: 0.8363\n","Epoch 7/30\n","439/439 [==============================] - 468s 1s/step - loss: 0.2240 - accuracy: 0.9197 - val_loss: 0.5342 - val_accuracy: 0.8277\n","Epoch 8/30\n","213/439 [=============>................] - ETA: 3:49 - loss: 0.1702 - accuracy: 0.9375"]}],"source":["# Entrenamos el modelo.\n","hist = model.fit_generator(train_generator, epochs = 30, shuffle=True, validation_data = validation_generator)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Oe-Jl83maMrd"},"outputs":[],"source":["# Guardamos los parametros del modelo para futuras pruebas.\n","model.save_weights('/kaggle/working/model_weights_cnn.h5')"]},{"cell_type":"markdown","metadata":{"id":"FD1yqF3Ymttk"},"source":["# **Evaluación del Modelo**"]},{"cell_type":"markdown","metadata":{"id":"SRBN8-mLmyfz"},"source":["## **Evaluación del desempeño durante el entrenamiento**\n","\n","Se generan y visualizan las gráficas históricas de la pérdida y la precisión (loss - accuracy). Estas gráficas son esenciales para entender si el modelo está aprendiendo correctamente o si está experimentando problemas como el sobreajuste.\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":408},"executionInfo":{"elapsed":985,"status":"ok","timestamp":1717366724967,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"vwyByWzLaQ8h","outputId":"45268363-c211-4a5b-f2b4-3790dfe48400"},"outputs":[{"data":{"text/plain":["<matplotlib.legend.Legend at 0x7b2020512410>"]},"execution_count":13,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"\n","text/plain":["<Figure size 1000x400 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["# Observamos la grafica historica de la perdida y el nivel de predicción.\n","plt.figure(figsize = (10, 4))\n","plt.plot(hist.history['loss'], label = 'train_loss')\n","plt.plot(hist.history['accuracy'], label = 'train_accuracy')\n","plt.title('training plots')\n","plt.legend()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5kHXyj1daw7n"},"outputs":[],"source":["# Cargamos los parametros de las neuronas.\n","#model.load_weights('model_weights.h5')"]},{"cell_type":"markdown","metadata":{"id":"FpNn-x0onrY3"},"source":["## **Evaluación en el conjunto de prueba**\n","\n","Se evalúa el modelo utilizando el conjunto de datos de prueba. Proporcionando métricas de desempeño (loss y accuracy) en datos que el modelo no ha visto durante el entrenamiento, lo que es crucial para entender su capacidad de generalización.\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3017,"status":"ok","timestamp":1717366732465,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"ptlPWVp5a4YZ","outputId":"4b13bb81-37ac-4530-a1a0-3d65b326121e"},"outputs":[{"name":"stdout","output_type":"stream","text":["94/94 [==============================] - 3s 29ms/step - loss: 1.1895 - accuracy: 0.8113\n"]}],"source":["#loss, accuracy = model.evaluate(test_generator)\n","\n","error = model.evaluate(validation_generator)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PmXG121_a90j"},"outputs":[],"source":["# Preparamos los datos para graficarlos en una matriz de confusión.\n","predictions = model.predict_generator(validation_generator)\n","predicted_classes = np.argmax(predictions, axis=1)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":452},"executionInfo":{"elapsed":1086,"status":"ok","timestamp":1717366743349,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"CVoDn3nubCyX","outputId":"e0341a53-e413-4401-aea4-6ab8a05b7294"},"outputs":[{"data":{"image/png":"\n","text/plain":["<Figure size 640x480 with 2 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["# Graficamos la matriz de confusión.\n","CM = confusion_matrix(validation_generator.classes,predicted_classes)\n","ax = plt.axes()\n","sn.heatmap(CM, annot=True,\n"," annot_kws={\"size\": 10},\n"," fmt=\"d\",#mostrar números de manera no exponencial.\n"," xticklabels=class_names,\n"," yticklabels=class_names,\n"," linewidths=.5,\n"," ax = ax)\n","ax.set_title('Confusion matrix')\n","plt.show()"]},{"cell_type":"markdown","metadata":{"id":"Mns71vGwo3VA"},"source":["## **Puesta a Prueba del modelo**\n","\n","Se configura la visualización de imágenes, se definene filas y columnas para la cuadrícula de subplots para mostrar 32 imágenes (4x8).\n","\n","Se itera sobre los lotes de imágenes del generador de prueba.\n","\n","Se muestran las imágenes.\n","\n","Se hace una predicción para las imágenes: *model.predict(img_batch)* genera una predicción para la imagen.\n","*np.argmax(pred)* obtiene el índice de la clase con la mayor probabilidad.\n","*np.max(pred) * 100* calcula la probabilidad máxima y la convierte en un porcentaje.\n","\n","Se etiqueta en el eje *y* con la probabilidad de la predicción y en el eje *x* con el nombre de la clase predicha.\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"h-wD2LuNbLt-"},"outputs":[],"source":["# Ponemos a prueba el modelo con los datos para testear.\n","index = 1\n","plt.figure(figsize=(20,10))\n","row = 4\n","col = 8\n","for img_batch,_ in test_generator:\n"," sub = plt.subplot(row, col, index)\n"," plt.imshow(img_batch[0])\n"," plt.xticks([])\n"," plt.yticks([])\n"," pred = model.predict(img_batch)\n"," class_key = np.argmax(pred)\n"," prob = np.max(pred) * 100\n"," plt.ylabel('{:.2f}%'.format(prob))\n"," plt.xlabel(label[np.argmax(pred)])\n"," index = index + 1\n"," if index > row * col:\n"," break"]},{"cell_type":"markdown","metadata":{"id":"BpWF1eXKriNX"},"source":["## **Devolución de predicción por Voz**"]},{"cell_type":"markdown","metadata":{"id":"bITmcGktsqYW"},"source":["*model.predict(img_batch)* genera una predicción para la imagen img_batch. Esto dará un vector de probabilidades para cada clase.\n","\n","*np.argmax(pred)* obtenie el índice de la clase con la mayor probabilidad en el vector de predicciones pred. Este índice corresponde a la clase predicha por el modelo.\n","\n","*np.max(pred) * 100* calcula la probabilidad máxima entre todas las clases predichas y la convierte en un porcentaje, proporcionando la confianza del modelo en su predicción.\n","\n","Así se generar un mensaje de voz que anuncia la clase predicha y su probabilidad asociada proporcionando una retroalimentación auditiva sobre las predicciones del modelo. Pensado para futuras aplicaciones de asistencia o accesibilidad."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":6698,"status":"ok","timestamp":1717366778200,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"2c5wkbPmYZ67","outputId":"521a542e-d156-445e-8145-958d39410361"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting gTTS\n"," Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)\n","Requirement already satisfied: requests<3,>=2.27 in /usr/local/lib/python3.10/dist-packages (from gTTS) (2.31.0)\n","Requirement already satisfied: click<8.2,>=7.1 in /usr/local/lib/python3.10/dist-packages (from gTTS) (8.1.7)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gTTS) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gTTS) (3.7)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gTTS) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gTTS) (2024.2.2)\n","Installing collected packages: gTTS\n","Successfully installed gTTS-2.5.1\n"]}],"source":["#Instalamos e importamos la librería necesaria\n","!pip install gTTS\n","from gtts import gTTS\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":308,"status":"ok","timestamp":1717367506209,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"axqoYdItY0kS","outputId":"d3cb3625-634a-44e4-a0e7-c8b65aa63a6e"},"outputs":[{"name":"stdout","output_type":"stream","text":["1/1 [==============================] - 0s 26ms/step\n"]}],"source":["# Clasificación de la imagen\n","pred = model.predict(img_batch)\n","class_key = np.argmax(pred)\n","prob = np.max(pred) * 100\n","class_name = label[class_key]\n","\n","# Generación del texto a hablar\n","text_to_speak = f'La imagen ha sido clasificada como {class_name} con una probabilidad del {prob:.2f}%.'\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8uB1WsRZYkFi"},"outputs":[],"source":["# Generación del archivo de audio\n","tts = gTTS(text=text_to_speak, lang='es') # 'es' para español\n","tts.save('/kaggle/working/result_audio.mp3')\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":75},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1717367530351,"user":{"displayName":"Viviana Farabollini","userId":"17648384242142966058"},"user_tz":180},"id":"kwBwK-W2Ymdq","outputId":"6fe10f43-6c29-42ed-d377-f9e96ea73b1d"},"outputs":[{"data":{"text/html":["\n"," <audio controls=\"controls\" autoplay=\"autoplay\">\n"," <source src=\"data:audio/mpeg;base64,\" type=\"audio/mpeg\" />\n"," Your browser does not support the audio element.\n"," </audio>\n"," "],"text/plain":["<IPython.lib.display.Audio object>"]},"execution_count":26,"metadata":{},"output_type":"execute_result"}],"source":["from IPython.display import Audio\n","\n","# Reproducción del audio\n","Audio(\"/kaggle/working/result_audio.mp3\", autoplay=True)\n"]},{"cell_type":"markdown","metadata":{"id":"0hHxN9reSfic"},"source":["# **CARGA DE POSIBLES IMÁGENES DE USUARIOS Y SUGERENCIA POR VOZ**"]},{"cell_type":"markdown","metadata":{"id":"E4O5MpH0Tlpz"},"source":["Después de definir y correr el modelo de clasificación y predicción presentamos un script que supone cómo podría utilizarse el mismo para realizar sugerencias de visitas turísticas a usuarios que carguen una o varias imágenes de su interés.\n","\n","Para ello:\n","\n","* Definimos un diccionario *'info_categorias'* que contiene información sobre diferentes categorías de imágenes.\n","* Definimos la función *'predict_image_from_url'* para cargar una imagen desde una URL, preprocesarla y hacer una predicción utilizando un modelo preentrenado.\n","* Definimos las categorías (*class_names*) y se proporcionan las URLs de las imágenes a clasificar.\n","* Iteramos sobre cada URL de imagen en la lista img_urls. Podríamos sólo solicitar una URL al usuario. Creamos una lista para evaluar las sugerencias que se realizan. Para cada imagen, se realiza una predicción utilizando la *función predict_image_from_url*.\n","* Si la predicción es exitosa, se obtiene la clase predicha y se genera un texto basado en la información de la categoría.\n","* Utilizamos la biblioteca gTTS para convertir el texto en voz en español.\n","* Se muestra la imagen original junto con la predicción y se reproduce el audio con la respuesta por voz. Se espera un tiempo para que el audio termine antes de continuar con la siguiente imagen.\n"]},{"cell_type":"code","source":["# Clasificación de la imagen y respuesta por voz\n","\n","from gtts import gTTS\n","from IPython.display import Audio, display\n","import requests\n","from PIL import Image, UnidentifiedImageError\n","from io import BytesIO\n","import numpy as np\n","import matplotlib.pyplot as plt\n","import time\n","\n","# Diccionario con información sobre cada categoría\n","info_categorias = {\n"," \"buildings\": \"te sugiero visitar Buenos Aires, con su arquitectura impresionante y sitios históricos como el Teatro Colón y el Obelisco.\",\n"," \"forest\": \"te recomiendo los Parques Nacionales de la Patagonia, como el Parque Nacional Los Alerces, conocido por sus frondosos bosques.\",\n"," \"glacier\": \"El mejor lugar es el Parque Nacional Los Glaciares en la Patagonia, donde puedes ver el impresionante Glaciar Perito Moreno.\",\n"," \"mountain\": \"No puedes perderte el Aconcagua en Mendoza, la montaña más alta de América.\",\n"," \"sea\": \"Visita Mar del Plata o la Península Valdés, donde puedes disfrutar de playas y avistamiento de ballenas.\",\n"," \"street\": \"Para calles pintorescas, pasea por el barrio de San Telmo en Buenos Aires, con sus calles empedradas y mercados de antigüedades.\"\n","}\n","\n","# Función para cargar una imagen desde una URL y predecir su clase\n","def predict_image_from_url(model, img_url, class_names):\n"," try:\n"," response = requests.get(img_url)\n"," response.raise_for_status() # Verificar si la solicitud fue exitosa\n","\n"," img = Image.open(BytesIO(response.content))\n","\n"," # Mantener una copia de la imagen original\n"," img_original = img.copy()\n","\n"," # Redimensionar la imagen a 150x150 para el modelo\n"," img = img.resize((150, 150))\n"," img_array = np.array(img) / 255.0\n"," img_array = np.expand_dims(img_array, axis=0)\n","\n"," prediction = model.predict(img_array)\n"," predicted_class = np.argmax(prediction)\n"," return predicted_class, prediction[0][predicted_class], img_original\n","\n"," except (requests.exceptions.RequestException, UnidentifiedImageError) as e:\n"," print(f\"Error al procesar la imagen desde la URL {img_url}: {e}\")\n"," return None, None, None\n","\n","class_names = [\"buildings\", \"forest\", \"glacier\", \"mountain\", \"sea\", \"street\"]\n","\n","# URL de la imagen que el usuario quiere clasificar\n","img_url = \"https://www.shutterstock.com/image-photo/beautiful-view-sea-on-atlantic-600nw-253604740.jpg\"\n","\n","# Realizar la predicción\n","predicted_class, confidence, img = predict_image_from_url(model, img_url, class_names)\n","\n","if predicted_class is not None:\n"," class_name = class_names[predicted_class]\n","\n"," # Obtener la información de la categoría predicha\n"," info_categoria_predicha = info_categorias[class_name]\n","\n"," # Generar el texto a hablar con la información de la categoría predicha\n"," text_to_speak = f'Sugerencia turística: {info_categoria_predicha}.'\n","\n"," # Generar el archivo de audio\n"," tts = gTTS(text=text_to_speak, lang='es') # 'es' para español\n"," ruta_audio_respuesta = '/kaggle/working/result_audio.mp3'\n"," tts.save(ruta_audio_respuesta)\n","\n"," # Reproducir el archivo de audio\n"," audio = Audio(ruta_audio_respuesta, autoplay=True)\n"," display(audio)\n","\n"," # Mostrar la imagen original y la predicción con proporciones correctas\n"," plt.imshow(img)\n"," plt.title(f\"Predicción: {class_name} - Confianza: {confidence * 100:.2f}%\")\n"," plt.axis('off')\n"," plt.show()\n","\n"," # Esperar a que termine el audio antes de continuar\n"," duration = len(text_to_speak.split()) / 2 # Aproximación de la duración del audio en segundos\n"," time.sleep(duration + 1) # Esperar un poco más para asegurarse de que el audio termine\n","else:\n"," print(f\"No se pudo procesar la imagen desde la URL {img_url}.\")\n"],"metadata":{"id":"pfgy2iBscEam"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"vrN8SgbD0pkk"},"outputs":[],"source":["# Clasificación de la imagen y respuesta por voz\n","\n","# Función para cargar una lista de URLs de imágenes y predecir su clase\n","\n","\n","# Lista de URLs de las imágenes que el usuario quiere clasificar\n","img_urls = [\n"," \"https://www.shutterstock.com/image-photo/beautiful-view-sea-on-atlantic-600nw-253604740.jpg\",\n"," \"https://media.tacdn.com/media/attractions-splice-spp-674x446/07/1a/70/bf.jpg\",\n"," \"https://media.tacdn.com/media/attractions-splice-spp-674x446/06/e4/dd/04.jpg\",\n"," \"https://assets.turismocity.com/cdn-cgi/image/format=auto,width=500,fit=scale-down/playas%20arg%20-%20punta%20perdices.jpg\",\n"," \"https://www.verbuenosaires.com/wp-content/uploads/2019/05/San-Telmo-buenos-aires.jpg\",\n"," \"https://101lugaresincreibles.com/wp-content/uploads/2016/05/hornocal-jujuy-2-e1463151261703.jpg\",\n"," \"https://fotografias.lasexta.com/clipping/cmsimages02/2019/03/05/417B5470-7EA5-4440-B015-1A0E8C30F6B9/69.jpg?crop=960,540,x0,y89&width=1280&height=720&optimize=low&format=jpg\",\n"," \"https://www.shutterstock.com/image-photo/street-intersection-buildings-buenos-aires-600nw-1940554291.jpg\",\n"," \"https://hips.hearstapps.com/hmg-prod/images/centro-cultural-kirchner-buenos-aires-1560761469.jpg\",\n"," \"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT8sFj9UZxNRm3wq1M-cOybxCpVPbm7KlahQA&s\",\n"," \"https://st1.uvnimg.com/dims4/default/1d90d8b/2147483647/thumbnail/1024x576%3E/quality/75/?url=https%3A%2F%2Fuvn-brightspot.s3.amazonaws.com%2Fassets%2Fvixes%2F6%2F6-pueblos-argentinos-escondidos-entre-montanas-3.jpg\"\n","]\n","\n","#Iteramos por las lista para automatizar las predicciones de las imágenes cargadas\n","\n","for img_url in img_urls:\n"," # Realizar la predicción\n"," predicted_class, confidence, img = predict_image_from_url(model, img_url, class_names)\n","\n"," if predicted_class is not None:\n"," class_name = class_names[predicted_class]\n","\n"," # Obtener la información de la categoría predicha\n"," info_categoria_predicha = info_categorias[class_name]\n","\n"," # Generar el texto a hablar con la información de la categoría predicha\n"," text_to_speak = f'Sugerencia turística: {info_categoria_predicha}.'\n","\n"," # Generar el archivo de audio\n"," tts = gTTS(text=text_to_speak, lang='es') # 'es' para español\n"," ruta_audio_respuesta = '/kaggle/working/result_audio.mp3'\n"," tts.save(ruta_audio_respuesta)\n","\n"," # Reproducir el archivo de audio\n"," audio = Audio(ruta_audio_respuesta, autoplay=True)\n"," display(audio)\n","\n"," # Mostrar la imagen original y la predicción con proporciones correctas\n"," plt.imshow(img)\n"," plt.title(f\"Predicción: {class_name} - Confianza: {confidence * 100:.2f}%\")\n"," plt.axis('off')\n"," plt.show()\n","\n"," # Esperar a que termine el audio antes de continuar\n"," duration = len(text_to_speak.split()) / 2 # Aproximación de la duración del audio en segundos\n"," time.sleep(duration + 1) # Esperar un poco más para asegurarse de que el audio termine\n"," else:\n"," print(f\"No se pudo procesar la imagen desde la URL {img_url}.\")\n"]}],"metadata":{"accelerator":"GPU","colab":{"gpuType":"T4","provenance":[{"file_id":"https://storage.googleapis.com/kaggle-colab-exported-notebooks/notebook9fb2c2d731-eb3afd85-4561-4c1b-8e6a-ba5c9085044c.ipynb?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com/20240602/auto/storage/goog4_request&X-Goog-Date=20240602T202631Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=25f4ab10d63714aa65324e1e882748f82cf9364f89296d7ae2364e411942faf9e3cfebd540c901fcdb01d689f6a54b6449a26c3dc2ba89698413c915bc0223c1cc1aa37807a12e6076f393e97c40b9cff16fc1dfdaee5d2a094a15763dce9217c0124611861805f5f13f95a1bf02a9fb3fba88d603849dd1f42e87846b66d52253da0633b6cff1167077acdbd7e002baa3a61a653c685e9f9884108a00bd5b27189068c2566821125d3c78ee13ef2aaa2041b33ea1494c2e6dfa1e21ace11b66e58b5cb925a1242218936db18f0ad3f6edbb1b8c7484a35cec03ee4e4c980f450effadc2ede8aa1a890ea817cee3a6bd9c8d6f6bbc051e3f456bf96f2a8fa07e","timestamp":1717363885291},{"file_id":"https://storage.googleapis.com/kaggle-colab-exported-notebooks/notebook9fb2c2d731-b26af7bf-e8d7-4825-a680-af3fad861324.ipynb?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com/20240508/auto/storage/goog4_request&X-Goog-Date=20240508T221517Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=905a1ad44753d1e3785ebf0049f1295cceca5123d69e3e91232a1b1564a4f349b969801d8b49c326b7e2536331014cab6719ff472c5f7d9680b55f7bc67c30032aa785e5af90a49856c836e9cb07f19fd005a31d94b38d398c4aa85f22f6b64e30f6f73c64e06583d18088bf26870ed0ce386dcfc5db201af916f00e4f44e72ecf5cc3b45b9d335be28de06c6597b741aaebd645c2af0e203d15c4cc08b3d1f31c9cdc01038d1096b10ba4421edcdb737b0a8d4cc5b907a54ddfcfe066494fb5ed3976d6cdd711fb2750c7be42e10da096628f21e37ce527fc6a6c61c9d47603a8e7c0f9c84821b3fda040ebe9e77b951b81f1450a2f7f4691e3529f4e744e0f","timestamp":1715208670720},{"file_id":"https://storage.googleapis.com/kaggle-colab-exported-notebooks/notebookd612d5e298-89b0d249-d53b-4445-9e8f-7c2c51d97208.ipynb?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com/20240503/auto/storage/goog4_request&X-Goog-Date=20240503T001326Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=74fbc6f2674dab00f66444cb9cbd3dc8e0b3aca71315419b67b46b93b402c3b1fe21f979d803ea239fcacdca8033eb72717286eec7252cf5d61b5b4f3d12caaa2cb915573f6cf28433b334b1d6a47c9a8414f9652358b775472baf67a85d72d41751fe75a99b74f5f63b18d0aa0ace650f38420c8b6c574f50ecaf62564a8c55c78843e20e9df6f0a87ee8211f6188bbc67b45c89f91788f548799237b786a05f3bc411e4e5f570bb89c7ac919b76ce8e551b6eb8875084090b05b506b11d29bc116cf9b0e5ce061a20cd24bf25cb93c34ba384b48089ccd578947c7836b7193b4572d886c2e10da65e19be841a892b76b5518aeb371c92961856e4e424e2ed3","timestamp":1714705850123}]},"kaggle":{"accelerator":"none","dataSources":[{"datasetId":111880,"sourceId":269359,"sourceType":"datasetVersion"}],"dockerImageVersionId":30698,"isGpuEnabled":false,"isInternetEnabled":false,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":0}