Rekap Data Excel
Rekap Data Excel

Rekap Data Excel

Juli 21, 2024
xlsx with pyhton

Introduction

In this tutorial, we will learn how to perform a sales recap based on various criteria using Python and Pandas. We will use an Excel file containing sales data with multiple sheets representing different months. We will also create a flexible configuration to specify the desired type of recap.

Preparation

Make sure you have installed the pandas library to read and process Excel files. You can install it using pip:

pip install pandas

Configuration File

Create a config.json file that stores the configuration for the Excel file name, recap type, groupby columns, and specific months.

{
"file_path": "path_to_file.xlsx",
"rekap_type": "per_month",  // "all" for recap of all months, "per_month" for recap of a specific month, "range" for recap from month to month
"groupby_columns": ["Sales", "Brand"],  // Columns used for groupby
"per_month": 7  // If "rekap_type" is "per_month", specify the desired month (e.g., 7 for July)
}

Python Script

Here is the complete Python script to read the configuration and perform the sales recap:

import pandas as pd
import json

# Function to read configuration from JSON file
def load_config(config_path):
with open(config_path, 'r') as config_file:
config = json.load(config_file)
return config

# Function to read data from Excel file based on configuration
def read_excel_sheets(file_path, sheet_range=None):
all_sheets = pd.read_excel(file_path, sheet_name=None)
sheet_names = list(all_sheets.keys())

if sheet_range:
start, end = sheet_range
selected_sheets = {sheet_name: all_sheets[sheet_name] for sheet_name in sheet_names[start-1:end]}
else:
selected_sheets = all_sheets

df = pd.concat(selected_sheets.values(), ignore_index=True)
return df

# Function to perform sales recap
def rekap_penjualan(df, groupby_columns):
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')
df.dropna(subset=['Amount', 'Quantity'], inplace=True)
rekap = df.groupby(groupby_columns).agg({'Amount': 'sum', 'Quantity': 'sum'}).reset_index()
return rekap

# Main function to run the recap based on configuration
def main(config_path):
config = load_config(config_path)
file_path = config["file_path"]
rekap_type = config["rekap_type"]
groupby_columns = config["groupby_columns"]

if rekap_type == "all":
df = read_excel_sheets(file_path)
rekap = rekap_penjualan(df, groupby_columns)
rekap.to_excel('sales_recap_all_months.xlsx', index=False)
print("Sales recap for all months has been saved to 'sales_recap_all_months.xlsx'")
elif rekap_type == "per_month":
month = config["per_month"]
df = read_excel_sheets(file_path, sheet_range=(month, month))
rekap = rekap_penjualan(df, groupby_columns)
rekap.to_excel(f'sales_recap_month_{month}.xlsx', index=False)
print(f"Sales recap for month {month} has been saved to 'sales_recap_month_{month}.xlsx'")
elif rekap_type == "range":
start_month = config["range"]["start"]
end_month = config["range"]["end"]
df = read_excel_sheets(file_path, sheet_range=(start_month, end_month))
rekap = rekap_penjualan(df, groupby_columns)
rekap.to_excel(f'sales_recap_month_{start_month}_to_{end_month}.xlsx', index=False)
print(f"Sales recap from month {start_month} to {end_month} has been saved to 'sales_recap_month_{start_month}_to_{end_month}.xlsx'")
else:
print("Unknown recap type in configuration.")

if __name__ == "__main__":
config_path = 'config.json'  # Path to configuration file
main(config_path)

Code Explanation

  • load_config(): Reads the configuration from the JSON file.
  • read_excel_sheets(): Reads sheets from the Excel file based on the configuration.
  • rekap_penjualan(): Converts the 'Amount' and 'Quantity' columns to numeric, cleans the data, and performs the recap based on the specified columns in the configuration.
  • main(): The main function that runs the recap based on the configuration.

Other Codes

{
    "file_path": "path_to_file.xlsx",
    "rekap_type": "all",  // "all" untuk rekap semua bulan, "per_month" untuk rekap per bulan, "range" untuk rekap dari bulan ke bulan
    "groupby_column": "Sales",  // Kolom yang digunakan untuk groupby
    "per_month": 7,       // Jika "rekap_type" adalah "per_month", tentukan bulan yang diinginkan (misalnya 7 untuk Juli)
    "range": {            // Jika "rekap_type" adalah "range", tentukan rentang bulan
        "start": 1,       // Bulan mulai (misalnya 1 untuk Januari)
        "end": 6          // Bulan akhir (misalnya 6 untuk Juni)
    }
}
import pandas as pd
import json

# Fungsi untuk membaca konfigurasi dari file JSON
def load_config(config_path):
    with open(config_path, 'r') as config_file:
        config = json.load(config_file)
    return config

# Fungsi untuk membaca data dari file Excel berdasarkan konfigurasi
def read_excel_sheets(file_path, sheet_range=None):
    all_sheets = pd.read_excel(file_path, sheet_name=None)
    sheet_names = list(all_sheets.keys())
    
    if sheet_range:
        start, end = sheet_range
        selected_sheets = {sheet_name: all_sheets[sheet_name] for sheet_name in sheet_names[start-1:end]}
    else:
        selected_sheets = all_sheets

    df = pd.concat(selected_sheets.values(), ignore_index=True)
    return df

# Fungsi untuk melakukan rekap penjualan
def rekap_penjualan(df, groupby_column):
    df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
    df.dropna(subset=['Amount'], inplace=True)
    rekap = df.groupby(groupby_column)['Amount'].sum().reset_index()
    return rekap

# Fungsi utama untuk menjalankan rekapitulasi berdasarkan konfigurasi
def main(config_path):
    config = load_config(config_path)
    file_path = config["file_path"]
    rekap_type = config["rekap_type"]
    groupby_column = config["groupby_column"]

    if rekap_type == "all":
        df = read_excel_sheets(file_path)
        rekap = rekap_penjualan(df, groupby_column)
        rekap.to_excel('rekap_penjualan_semua_bulan.xlsx', index=False)
        print("Rekap penjualan semua bulan telah disimpan ke 'rekap_penjualan_semua_bulan.xlsx'")
    elif rekap_type == "per_month":
        month = config["per_month"]
        df = read_excel_sheets(file_path, sheet_range=(month, month))
        rekap = rekap_penjualan(df, groupby_column)
        rekap.to_excel(f'rekap_penjualan_bulan_{month}.xlsx', index=False)
        print(f"Rekap penjualan bulan {month} telah disimpan ke 'rekap_penjualan_bulan_{month}.xlsx'")
    elif rekap_type == "range":
        start_month = config["range"]["start"]
        end_month = config["range"]["end"]
        df = read_excel_sheets(file_path, sheet_range=(start_month, end_month))
        rekap = rekap_penjualan(df, groupby_column)
        rekap.to_excel(f'rekap_penjualan_bulan_{start_month}_sampai_{end_month}.xlsx', index=False)
        print(f"Rekap penjualan dari bulan {start_month} sampai {end_month} telah disimpan ke 'rekap_penjualan_bulan_{start_month}_sampai_{end_month}.xlsx'")
    else:
        print("Jenis rekap tidak dikenal dalam konfigurasi.")

if __name__ == "__main__":
    config_path = 'config.json'  # Path ke file konfigurasi
    main(config_path)
import pandas as pd

try:
    # Gantilah 'path_to_file.xlsx' dengan path file Excel Anda
    file_path = 'path_to_file.xlsx'

    # Membaca sheet ke-7 (bulan Juli) dalam file Excel
    all_sheets = pd.read_excel(file_path, sheet_name=None)
    sheet_names = list(all_sheets.keys())

    # Memastikan bahwa sheet ke-7 ada
    if len(sheet_names) >= 7:
        july_sheet_name = sheet_names[6]  # Sheet ke-7
        df = all_sheets[july_sheet_name]

        # Cetak nama kolom untuk memastikan nama kolom yang benar
        print("Nama kolom dalam DataFrame:", df.columns.tolist())

        # Menampilkan beberapa baris pertama untuk memeriksa data
        print(df.head())

        # Mengubah kolom 'Amount' menjadi numerik, memaksa kesalahan menjadi NaN
        df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')

        # Menghapus baris dengan nilai NaN di kolom 'Amount'
        df.dropna(subset=['Amount'], inplace=True)

        # Rekap penjualan berdasarkan Sales
        rekap_penjualan = df.groupby('Sales')['Amount'].sum().reset_index()

        # Menyimpan hasil rekapitulasi ke file baru
        rekap_penjualan.to_excel('rekap_penjualan_berdasarkan_sales_juli.xlsx', index=False)

        print("Rekap penjualan berdasarkan Sales telah disimpan ke 'rekap_penjualan_berdasarkan_sales_juli.xlsx'")
    else:
        print("Sheet ke-7 tidak ditemukan dalam file Excel.")

except KeyError as e:
    print(f"Kesalahan: Kolom {e} tidak ditemukan dalam DataFrame. Periksa kembali nama kolom dalam file Excel Anda.")
except Exception as e:
    print(f"Kesalahan: {e}")
import pandas as pd

try:
    # Gantilah 'path_to_file.xlsx' dengan path file Excel Anda
    file_path = 'path_to_file.xlsx'

    # Membaca semua sheet dalam file Excel
    all_sheets = pd.read_excel(file_path, sheet_name=None)

    # Membatasi hanya hingga sheet ke-6 (bulan Juni)
    sheet_names = list(all_sheets.keys())[:6]

    # Menggabungkan sheet ke-1 hingga ke-6 menjadi satu DataFrame
    selected_sheets = {sheet_name: all_sheets[sheet_name] for sheet_name in sheet_names}
    df = pd.concat(selected_sheets.values(), ignore_index=True)

    # Cetak nama kolom untuk memastikan nama kolom yang benar
    print("Nama kolom dalam DataFrame:", df.columns.tolist())

    # Menampilkan beberapa baris pertama untuk memeriksa data
    print(df.head())

    # Mengubah kolom 'Amount' menjadi numerik, memaksa kesalahan menjadi NaN
    df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')

    # Menghapus baris dengan nilai NaN di kolom 'Amount'
    df.dropna(subset=['Amount'], inplace=True)

    # Rekap penjualan berdasarkan Sales
    rekap_penjualan = df.groupby('Sales')['Amount'].sum().reset_index()

    # Menyimpan hasil rekapitulasi ke file baru
    rekap_penjualan.to_excel('rekap_penjualan_berdasarkan_sales_juni.xlsx', index=False)

    print("Rekap penjualan berdasarkan Sales telah disimpan ke 'rekap_penjualan_berdasarkan_sales_juni.xlsx'")

except KeyError as e:
    print(f"Kesalahan: Kolom {e} tidak ditemukan dalam DataFrame. Periksa kembali nama kolom dalam file Excel Anda.")
except Exception as e:
    print(f"Kesalahan: {e}")
import pandas as pd

try:
    # Gantilah 'path_to_file.xlsx' dengan path file Excel Anda
    file_path = 'path_to_file.xlsx'

    # Membaca semua sheet dalam file Excel
    all_sheets = pd.read_excel(file_path, sheet_name=None)

    # Menggabungkan semua sheet menjadi satu DataFrame
    df = pd.concat(all_sheets.values(), ignore_index=True)

    # Cetak nama kolom untuk memastikan nama kolom yang benar
    print("Nama kolom dalam DataFrame:", df.columns.tolist())

    # Menampilkan beberapa baris pertama untuk memeriksa data
    print(df.head())

    # Mengubah kolom 'Amount' menjadi numerik, memaksa kesalahan menjadi NaN
    df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')

    # Menghapus baris dengan nilai NaN di kolom 'Amount'
    df.dropna(subset=['Amount'], inplace=True)

    # Rekap penjualan berdasarkan merek barang
    rekap_penjualan = df.groupby('Brand')['Amount'].sum().reset_index()

    # Menyimpan hasil rekapitulasi ke file baru
    rekap_penjualan.to_excel('rekap_penjualan_berdasarkan_merk.xlsx', index=False)

    print("Rekap penjualan berdasarkan merek barang telah disimpan ke 'rekap_penjualan_berdasarkan_merk.xlsx'")

except KeyError as e:
    print(f"Kesalahan: Kolom {e} tidak ditemukan dalam DataFrame. Periksa kembali nama kolom dalam file Excel Anda.")
except Exception as e:
    print(f"Kesalahan: {e}")

Conclusion

By following this tutorial, you have learned how to perform a sales recap based on various criteria using Python and Pandas. You have also learned how to create a flexible configuration to specify the desired type of recap. With this configuration, you can perform a sales recap based on the Sales and Brand columns, including the number of units sold and the total Amount.