Convert CSV Notion to XLSX
Convert CSV Notion to XLSX

Convert CSV Notion to XLSX

April 02, 2023
CSV to XLSX

Converting CSV files to XLSX format can be incredibly useful for data analysis, reporting, and sharing information. This tutorial will guide you through the process of converting CSV files to XLSX using Python, whether you need to convert a single file or multiple files in a folder.

Prerequisites

Before we begin, make sure you have Python installed on your system. You will also need to install the pandas and openpyxl libraries. You can install them using pip:

pip install pandas openpyxl

Step 1: Convert a Single CSV File to XLSX

Let's start by converting a single CSV file to XLSX format. Create a Python script named convert_single.py with the following content:

import pandas as pd

def convert_csv_to_xlsx(csv_file, xlsx_file):
    # Read the CSV file
    data = pd.read_csv(csv_file)
    
    # Save as XLSX file
    data.to_excel(xlsx_file, index=False)
    print(f"File {csv_file} successfully converted to {xlsx_file}")

# Example usage
convert_csv_to_xlsx('example.csv', 'example.xlsx')

In this script:

  • The convert_csv_to_xlsx function reads the CSV file and saves it as an XLSX file.
  • The pd.read_csv() function is used to read the CSV file.
  • The data.to_excel() function saves the DataFrame as an XLSX file.

Step 2: Convert Multiple CSV Files in a Folder to XLSX

To convert multiple CSV files in a folder, create a Python script named convert_folder.py with the following content:

import os
import pandas as pd

def clean_data(text):
    """Clean the text data by removing URLs, parentheses, and leading/trailing spaces."""
    text = text.lstrip()  # Remove leading spaces
    text = re.sub(r'\s*\(https?://[^\)]+\)', '', text)  # Remove URLs in parentheses
    text = text.replace('(', '').replace(')', '')  # Remove parentheses
    text = text.replace('PT ', 'PT. ')  # Replace "PT " with "PT. "
    text = text.replace('CV ', 'CV. ')  # Replace "CV " with "CV. "
    return text

def convert_files(input_folder, output_folder):
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through all CSV files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.csv'):
            csv_file_path = os.path.join(input_folder, file_name)
            xlsx_file_name = file_name.replace('.csv', '.xlsx')
            xlsx_file_path = os.path.join(output_folder, xlsx_file_name)

            # Read the CSV file
            data = pd.read_csv(csv_file_path)

            # Clean the data
            data = data.apply(lambda col: col.map(lambda x: clean_data(str(x)) if isinstance(x, str) else x))

            # Save as XLSX file
            data.to_excel(xlsx_file_path, index=False)
            print(f"File {csv_file_path} successfully converted to {xlsx_file_path}")

# Example usage
convert_files('csv_folder', 'xlsx_folder')

In this script:

  • The convert_files function iterates through all CSV files in the specified input folder and converts each one to XLSX format.
  • The os.makedirs() function ensures that the output folder exists.
  • The os.listdir() function lists all files in the input folder.
  • The pd.read_csv() and data.to_excel() functions are used to read and save the files, respectively.

Step 3: Combine Both Approaches in a Single Script

You can also combine both functionalities into a single script. Create a script named convert_csv.py:

import os
import sys
import json
import pandas as pd
import re
from colorama import init, Fore, Style

def resource_path(relative_path):
    """Get the absolute path to the resource, works for development and PyInstaller."""
    try:
        base_path = sys._MEIPASS
    except AttributeError:
        base_path = os.path.abspath(".")
    return os.path.join(base_path, relative_path)

def clean_data(text):
    """Clean the text data by removing URLs, parentheses, and leading/trailing spaces."""
    text = text.lstrip()  # Remove leading spaces
    text = re.sub(r'\s*\(https?://[^\)]+\)', '', text)  # Remove URLs in parentheses
    text = text.replace('(', '').replace(')', '')  # Remove parentheses
    text = text.replace('PT ', 'PT. ')  # Replace "PT " with "PT. "
    text = text.replace('CV ', 'CV. ')  # Replace "CV " with "CV. "
    return text

def convert_files(input_folder, output_folder):
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through all CSV files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.csv'):
            csv_file_path = os.path.join(input_folder, file_name)
            xlsx_file_name = file_name.replace('.csv', '.xlsx')
            xlsx_file_path = os.path.join(output_folder, xlsx_file_name)

            # Read the CSV file
            data = pd.read_csv(csv_file_path)

            # Clean the data
            data = data.apply(lambda col: col.map(lambda x: clean_data(str(x)) if isinstance(x, str) else x))

            # Save as XLSX file
            data.to_excel(xlsx_file_path, index=False)
            print(f"{Fore.GREEN}File {csv_file_path} successfully converted to {xlsx_file_path}.{Style.RESET_ALL}")

def main():
    # Initialize colorama
    init(autoreset=True)

    # Path to config.json
    config_path = resource_path('config.json')
    
    # Read the configuration file
    try:
        with open(config_path, 'r') as config_file:
            config = json.load(config_file)
    except FileNotFoundError:
        print(f"{Fore.RED}Config file not found at {config_path}{Style.RESET_ALL}")
        input("Press Enter to exit...")
        return

    input_folder = config.get('input_folder')
    output_folder = config.get('output_folder')

    if not input_folder or not output_folder:
        print(f"{Fore.RED}Config file must contain 'input_folder' and 'output_folder'{Style.RESET_ALL}")
        input("Press Enter to exit...")
        return

    # Paths to input and output folders
    input_folder_path = os.path.join(os.path.dirname(sys.executable), input_folder)
    output_folder_path = os.path.join(os.path.dirname(sys.executable), output_folder)
    
    # Check if the input folder exists
    if os.path.exists(input_folder_path):
        print(f"{Fore.BLUE}Input folder found at {input_folder_path}.{Style.RESET_ALL}")
        convert_files(input_folder_path, output_folder_path)
    else:
        print(f"{Fore.RED}Input folder not found at {input_folder_path}.{Style.RESET_ALL}")

    print("\n" * 2)  # Add two blank lines
    input("Press Enter to exit...")

if __name__ == "__main__":
    main()

Configuration File

Create a config.json file to specify the input and output folders:

{
    "input_folder": "csv_folder",
    "output_folder": "xlsx_folder"
}

Running the Script

Place your CSV files in the csv_folder and run the script using:

python convert_csv.py

Creating an Executable

You can also create an executable using PyInstaller. First, install PyInstaller:

pip install pyinstaller

Then, create the executable:

pyinstaller --add-data "config.json;." --onefile convert_csv.py

This will generate an executable file in the dist directory. Place your csv_folder and config.json in the same directory as the executable and run it to perform the conversion.

Fix Code

import os
import sys
import json
import pandas as pd
import re
from colorama import init, Fore, Style

def resource_path(relative_path):
    """ Dapatkan path absolut ke resource, bekerja untuk Dev dan untuk PyInstaller """
    try:
        # PyInstaller mengatur path _MEIPASS
        base_path = sys._MEIPASS
    except AttributeError:
        base_path = os.path.abspath(".")
    return os.path.join(base_path, relative_path)

def clean_data(text):
    """ Hapus URL dalam tanda kurung, tanda kurung, spasi sebelum tanda kurung, dan spasi di depan kolom """
    text = text.lstrip()  # Hilangkan spasi di depan teks
    text = re.sub(r'\s*\(https?://[^\)]+\)', '', text)  # Hapus URL dalam tanda kurung beserta spasi sebelumnya
    text = text.replace('(', '').replace(')', '')  # Hapus tanda kurung
    text = text.replace('PT ', 'PT. ')  # Ganti "PT " dengan "PT. "
    text = text.replace('CV ', 'CV. ')  # Ganti "CV " dengan "CV. "
    return text

def main():
    print(f"{Fore.GREEN} L M Y D Converter - CSV Notion to Xlsx")
    print('------------------------------------------------------')
    print("\n" * 1)  # Tambahkan dua baris kosong
    # Inisialisasi colorama
    init(autoreset=True)
    
    # Path ke config.json
    config_path = os.path.join(os.path.dirname(sys.executable), 'config.json')
    
    # Baca file konfigurasi
    try:
        with open(config_path, 'r') as config_file:
            config = json.load(config_file)
    except FileNotFoundError:
        print(f"{Fore.RED}Config file tidak ditemukan di {config_path}{Style.RESET_ALL}")
        input("Tekan Enter untuk menutup...")
        return

    input_csv = config.get('input_csv')
    output_xlsx = config.get('output_xlsx')

    if not input_csv or not output_xlsx:
        print(f"{Fore.RED}Config file harus berisi 'input_csv' dan 'output_xlsx'{Style.RESET_ALL}")
        input("Tekan Enter untuk menutup...")
        return

    # Path relatif ke file CSV
    csv_file_path = os.path.join(os.path.dirname(sys.executable), input_csv)
    
    # Periksa apakah file ada di path yang diberikan
    if os.path.exists(csv_file_path):
        print(f"{Fore.BLUE}File {input_csv} ditemukan di {csv_file_path}.{Style.RESET_ALL}")
        
        # Baca file CSV
        data = pd.read_csv(csv_file_path)
        
        # Bersihkan data
        data = data.apply(lambda col: col.map(lambda x: clean_data(str(x)) if isinstance(x, str) else x))
        
        # Simpan sebagai file Excel
        excel_file_path = os.path.join(os.path.dirname(sys.executable), output_xlsx)
        data.to_excel(excel_file_path, index=False)
        
        print(f"{Fore.GREEN}File berhasil dikonversi dan disimpan sebagai {output_xlsx}.{Style.RESET_ALL}")
    else:
        print(f"{Fore.RED}File {input_csv} tidak ditemukan di {csv_file_path}.{Style.RESET_ALL}")
        
    # Tambahkan pesan terima kasih sebelum menunggu input
    print("\n" * 1)
    print('------------------------------------------------------------------------')
    print("\n" * 1)  # Tambahkan dua baris kosong
    print(f"{Fore.GREEN}Terima kasih sudah menggunakan aplikasi LMYD Convert CSV Notion to XLSX.{Style.RESET_ALL}")
    print("\n" * 1)
    input("Tekan Enter untuk menutup...")

if __name__ == "__main__":
    main()
{
    "input_csv": "INQ2024.csv",
    "output_xlsx": "INQ2024.xlsx"
}
import os
import sys
import json
import pandas as pd
import re
from colorama import init, Fore, Style

def resource_path(relative_path):
    """ Dapatkan path absolut ke resource, bekerja untuk Dev dan untuk PyInstaller """
    try:
        # PyInstaller mengatur path _MEIPASS
        base_path = sys._MEIPASS
    except AttributeError:
        base_path = os.path.abspath(".")
    return os.path.join(base_path, relative_path)

def clean_data(text):
    """ Hapus URL dalam tanda kurung, tanda kurung, spasi sebelum tanda kurung, dan spasi di depan kolom """
    text = text.lstrip()  # Hilangkan spasi di depan teks
    text = re.sub(r'\s*\(https?://[^\)]+\)', '', text)  # Hapus URL dalam tanda kurung beserta spasi sebelumnya
    text = text.replace('(', '').replace(')', '')  # Hapus tanda kurung
    text = text.replace('PT ', 'PT. ')  # Ganti "PT " dengan "PT. "
    text = text.replace('CV ', 'CV. ')  # Ganti "CV " dengan "CV. "
    return text

def convert_files(input_folder, output_folder):
    # Pastikan folder output ada
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterasi melalui semua file CSV di folder input
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.csv'):
            csv_file_path = os.path.join(input_folder, file_name)
            xlsx_file_name = file_name.replace('.csv', '.xlsx')
            xlsx_file_path = os.path.join(output_folder, xlsx_file_name)

            # Baca file CSV
            data = pd.read_csv(csv_file_path)

            # Bersihkan data
            data = data.apply(lambda col: col.map(lambda x: clean_data(str(x)) if isinstance(x, str) else x))

            # Simpan sebagai file Excel
            data.to_excel(xlsx_file_path, index=False)
            print(f"{Fore.GREEN}File {csv_file_path} berhasil dikonversi dan disimpan sebagai {xlsx_file_path}.{Style.RESET_ALL}")

def main():
    print(f"{Fore.GREEN} L M Y D Converter - CSV Notion to Xlsx")
    print('------------------------------------------------------')
    print("\n" * 1)  # Tambahkan dua baris kosong
    # Inisialisasi colorama
    init(autoreset=True)

    # Path ke config.json
    config_path = resource_path('config.json')
    
    # Baca file konfigurasi
    try:
        with open(config_path, 'r') as config_file:
            config = json.load(config_file)
    except FileNotFoundError:
        print(f"{Fore.RED}Config file tidak ditemukan di {config_path}{Style.RESET_ALL}")
        input("Tekan Enter untuk menutup...")
        return

    input_folder = config.get('input_folder')
    output_folder = config.get('output_folder')

    if not input_folder or not output_folder:
        print(f"{Fore.RED}Config file harus berisi 'input_folder' dan 'output_folder'{Style.RESET_ALL}")
        input("Tekan Enter untuk menutup...")
        return

    # Path relatif ke folder input dan output
    input_folder_path = os.path.join(os.path.dirname(sys.executable), input_folder)
    output_folder_path = os.path.join(os.path.dirname(sys.executable), output_folder)
    
    # Periksa apakah folder input ada
    if os.path.exists(input_folder_path):
        print(f"{Fore.BLUE}Folder input ditemukan di {input_folder_path}.{Style.RESET_ALL}")
        convert_files(input_folder_path, output_folder_path)
    else:
        print(f"{Fore.RED}Folder input tidak ditemukan di {input_folder_path}.{Style.RESET_ALL}")

    print("\n" * 1)
    print('------------------------------------------------------------------------')
    print("\n" * 1)  # Tambahkan dua baris kosong
    print('Terima kasih sudah menggunakan aplikasi LMYD Convert CSV Notion to XLSX.')
    print("\n" * 1)
    input("Tekan Enter untuk menutup...")

if __name__ == "__main__":
    main()
{
    "input_folder": "csv_files",
    "output_folder": "xlsx_files"
}

Conclusion

By following these steps, you can easily convert CSV files to XLSX format using Python. Whether you need to convert a single file or an entire folder, these scripts provide a flexible and efficient solution. You can also customize the scripts to suit your specific needs.

For more detailed information, refer to the pandas documentation and the PyInstaller documentation.