Rekap Data Excel
Juli 21, 2024 • •
Introduction
In this tutorial, we will learn how to perform a sales recap based on various criteria using Python and Pandas. We will use an Excel file containing sales data with multiple sheets representing different months. We will also create a flexible configuration to specify the desired type of recap.
Preparation
Make sure you have installed the pandas library to read and process Excel files. You can install it using pip:
pip install pandas
Configuration File
Create a config.json file that stores the configuration for the Excel file name, recap type, groupby columns, and specific months.
{
"file_path": "path_to_file.xlsx",
"rekap_type": "per_month", // "all" for recap of all months, "per_month" for recap of a specific month, "range" for recap from month to month
"groupby_columns": ["Sales", "Brand"], // Columns used for groupby
"per_month": 7 // If "rekap_type" is "per_month", specify the desired month (e.g., 7 for July)
}
Python Script
Here is the complete Python script to read the configuration and perform the sales recap:
import pandas as pd
import json
# Function to read configuration from JSON file
def load_config(config_path):
with open(config_path, 'r') as config_file:
config = json.load(config_file)
return config
# Function to read data from Excel file based on configuration
def read_excel_sheets(file_path, sheet_range=None):
all_sheets = pd.read_excel(file_path, sheet_name=None)
sheet_names = list(all_sheets.keys())
if sheet_range:
start, end = sheet_range
selected_sheets = {sheet_name: all_sheets[sheet_name] for sheet_name in sheet_names[start-1:end]}
else:
selected_sheets = all_sheets
df = pd.concat(selected_sheets.values(), ignore_index=True)
return df
# Function to perform sales recap
def rekap_penjualan(df, groupby_columns):
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')
df.dropna(subset=['Amount', 'Quantity'], inplace=True)
rekap = df.groupby(groupby_columns).agg({'Amount': 'sum', 'Quantity': 'sum'}).reset_index()
return rekap
# Main function to run the recap based on configuration
def main(config_path):
config = load_config(config_path)
file_path = config["file_path"]
rekap_type = config["rekap_type"]
groupby_columns = config["groupby_columns"]
if rekap_type == "all":
df = read_excel_sheets(file_path)
rekap = rekap_penjualan(df, groupby_columns)
rekap.to_excel('sales_recap_all_months.xlsx', index=False)
print("Sales recap for all months has been saved to 'sales_recap_all_months.xlsx'")
elif rekap_type == "per_month":
month = config["per_month"]
df = read_excel_sheets(file_path, sheet_range=(month, month))
rekap = rekap_penjualan(df, groupby_columns)
rekap.to_excel(f'sales_recap_month_{month}.xlsx', index=False)
print(f"Sales recap for month {month} has been saved to 'sales_recap_month_{month}.xlsx'")
elif rekap_type == "range":
start_month = config["range"]["start"]
end_month = config["range"]["end"]
df = read_excel_sheets(file_path, sheet_range=(start_month, end_month))
rekap = rekap_penjualan(df, groupby_columns)
rekap.to_excel(f'sales_recap_month_{start_month}_to_{end_month}.xlsx', index=False)
print(f"Sales recap from month {start_month} to {end_month} has been saved to 'sales_recap_month_{start_month}_to_{end_month}.xlsx'")
else:
print("Unknown recap type in configuration.")
if __name__ == "__main__":
config_path = 'config.json' # Path to configuration file
main(config_path)
Code Explanation
-
load_config(): Reads the configuration from the JSON file. -
read_excel_sheets(): Reads sheets from the Excel file based on the configuration. -
rekap_penjualan(): Converts the 'Amount' and 'Quantity' columns to numeric, cleans the data, and performs the recap based on the specified columns in the configuration. -
main(): The main function that runs the recap based on the configuration.
Other Codes
{
"file_path": "path_to_file.xlsx",
"rekap_type": "all", // "all" untuk rekap semua bulan, "per_month" untuk rekap per bulan, "range" untuk rekap dari bulan ke bulan
"groupby_column": "Sales", // Kolom yang digunakan untuk groupby
"per_month": 7, // Jika "rekap_type" adalah "per_month", tentukan bulan yang diinginkan (misalnya 7 untuk Juli)
"range": { // Jika "rekap_type" adalah "range", tentukan rentang bulan
"start": 1, // Bulan mulai (misalnya 1 untuk Januari)
"end": 6 // Bulan akhir (misalnya 6 untuk Juni)
}
}
import pandas as pd
import json
# Fungsi untuk membaca konfigurasi dari file JSON
def load_config(config_path):
with open(config_path, 'r') as config_file:
config = json.load(config_file)
return config
# Fungsi untuk membaca data dari file Excel berdasarkan konfigurasi
def read_excel_sheets(file_path, sheet_range=None):
all_sheets = pd.read_excel(file_path, sheet_name=None)
sheet_names = list(all_sheets.keys())
if sheet_range:
start, end = sheet_range
selected_sheets = {sheet_name: all_sheets[sheet_name] for sheet_name in sheet_names[start-1:end]}
else:
selected_sheets = all_sheets
df = pd.concat(selected_sheets.values(), ignore_index=True)
return df
# Fungsi untuk melakukan rekap penjualan
def rekap_penjualan(df, groupby_column):
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
df.dropna(subset=['Amount'], inplace=True)
rekap = df.groupby(groupby_column)['Amount'].sum().reset_index()
return rekap
# Fungsi utama untuk menjalankan rekapitulasi berdasarkan konfigurasi
def main(config_path):
config = load_config(config_path)
file_path = config["file_path"]
rekap_type = config["rekap_type"]
groupby_column = config["groupby_column"]
if rekap_type == "all":
df = read_excel_sheets(file_path)
rekap = rekap_penjualan(df, groupby_column)
rekap.to_excel('rekap_penjualan_semua_bulan.xlsx', index=False)
print("Rekap penjualan semua bulan telah disimpan ke 'rekap_penjualan_semua_bulan.xlsx'")
elif rekap_type == "per_month":
month = config["per_month"]
df = read_excel_sheets(file_path, sheet_range=(month, month))
rekap = rekap_penjualan(df, groupby_column)
rekap.to_excel(f'rekap_penjualan_bulan_{month}.xlsx', index=False)
print(f"Rekap penjualan bulan {month} telah disimpan ke 'rekap_penjualan_bulan_{month}.xlsx'")
elif rekap_type == "range":
start_month = config["range"]["start"]
end_month = config["range"]["end"]
df = read_excel_sheets(file_path, sheet_range=(start_month, end_month))
rekap = rekap_penjualan(df, groupby_column)
rekap.to_excel(f'rekap_penjualan_bulan_{start_month}_sampai_{end_month}.xlsx', index=False)
print(f"Rekap penjualan dari bulan {start_month} sampai {end_month} telah disimpan ke 'rekap_penjualan_bulan_{start_month}_sampai_{end_month}.xlsx'")
else:
print("Jenis rekap tidak dikenal dalam konfigurasi.")
if __name__ == "__main__":
config_path = 'config.json' # Path ke file konfigurasi
main(config_path)
import pandas as pd
try:
# Gantilah 'path_to_file.xlsx' dengan path file Excel Anda
file_path = 'path_to_file.xlsx'
# Membaca sheet ke-7 (bulan Juli) dalam file Excel
all_sheets = pd.read_excel(file_path, sheet_name=None)
sheet_names = list(all_sheets.keys())
# Memastikan bahwa sheet ke-7 ada
if len(sheet_names) >= 7:
july_sheet_name = sheet_names[6] # Sheet ke-7
df = all_sheets[july_sheet_name]
# Cetak nama kolom untuk memastikan nama kolom yang benar
print("Nama kolom dalam DataFrame:", df.columns.tolist())
# Menampilkan beberapa baris pertama untuk memeriksa data
print(df.head())
# Mengubah kolom 'Amount' menjadi numerik, memaksa kesalahan menjadi NaN
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
# Menghapus baris dengan nilai NaN di kolom 'Amount'
df.dropna(subset=['Amount'], inplace=True)
# Rekap penjualan berdasarkan Sales
rekap_penjualan = df.groupby('Sales')['Amount'].sum().reset_index()
# Menyimpan hasil rekapitulasi ke file baru
rekap_penjualan.to_excel('rekap_penjualan_berdasarkan_sales_juli.xlsx', index=False)
print("Rekap penjualan berdasarkan Sales telah disimpan ke 'rekap_penjualan_berdasarkan_sales_juli.xlsx'")
else:
print("Sheet ke-7 tidak ditemukan dalam file Excel.")
except KeyError as e:
print(f"Kesalahan: Kolom {e} tidak ditemukan dalam DataFrame. Periksa kembali nama kolom dalam file Excel Anda.")
except Exception as e:
print(f"Kesalahan: {e}")
import pandas as pd
try:
# Gantilah 'path_to_file.xlsx' dengan path file Excel Anda
file_path = 'path_to_file.xlsx'
# Membaca semua sheet dalam file Excel
all_sheets = pd.read_excel(file_path, sheet_name=None)
# Membatasi hanya hingga sheet ke-6 (bulan Juni)
sheet_names = list(all_sheets.keys())[:6]
# Menggabungkan sheet ke-1 hingga ke-6 menjadi satu DataFrame
selected_sheets = {sheet_name: all_sheets[sheet_name] for sheet_name in sheet_names}
df = pd.concat(selected_sheets.values(), ignore_index=True)
# Cetak nama kolom untuk memastikan nama kolom yang benar
print("Nama kolom dalam DataFrame:", df.columns.tolist())
# Menampilkan beberapa baris pertama untuk memeriksa data
print(df.head())
# Mengubah kolom 'Amount' menjadi numerik, memaksa kesalahan menjadi NaN
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
# Menghapus baris dengan nilai NaN di kolom 'Amount'
df.dropna(subset=['Amount'], inplace=True)
# Rekap penjualan berdasarkan Sales
rekap_penjualan = df.groupby('Sales')['Amount'].sum().reset_index()
# Menyimpan hasil rekapitulasi ke file baru
rekap_penjualan.to_excel('rekap_penjualan_berdasarkan_sales_juni.xlsx', index=False)
print("Rekap penjualan berdasarkan Sales telah disimpan ke 'rekap_penjualan_berdasarkan_sales_juni.xlsx'")
except KeyError as e:
print(f"Kesalahan: Kolom {e} tidak ditemukan dalam DataFrame. Periksa kembali nama kolom dalam file Excel Anda.")
except Exception as e:
print(f"Kesalahan: {e}")
import pandas as pd
try:
# Gantilah 'path_to_file.xlsx' dengan path file Excel Anda
file_path = 'path_to_file.xlsx'
# Membaca semua sheet dalam file Excel
all_sheets = pd.read_excel(file_path, sheet_name=None)
# Menggabungkan semua sheet menjadi satu DataFrame
df = pd.concat(all_sheets.values(), ignore_index=True)
# Cetak nama kolom untuk memastikan nama kolom yang benar
print("Nama kolom dalam DataFrame:", df.columns.tolist())
# Menampilkan beberapa baris pertama untuk memeriksa data
print(df.head())
# Mengubah kolom 'Amount' menjadi numerik, memaksa kesalahan menjadi NaN
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
# Menghapus baris dengan nilai NaN di kolom 'Amount'
df.dropna(subset=['Amount'], inplace=True)
# Rekap penjualan berdasarkan merek barang
rekap_penjualan = df.groupby('Brand')['Amount'].sum().reset_index()
# Menyimpan hasil rekapitulasi ke file baru
rekap_penjualan.to_excel('rekap_penjualan_berdasarkan_merk.xlsx', index=False)
print("Rekap penjualan berdasarkan merek barang telah disimpan ke 'rekap_penjualan_berdasarkan_merk.xlsx'")
except KeyError as e:
print(f"Kesalahan: Kolom {e} tidak ditemukan dalam DataFrame. Periksa kembali nama kolom dalam file Excel Anda.")
except Exception as e:
print(f"Kesalahan: {e}")
Conclusion
By following this tutorial, you have learned how to perform a sales recap based on various criteria using Python and Pandas. You have also learned how to create a flexible configuration to specify the desired type of recap. With this configuration, you can perform a sales recap based on the Sales and Brand columns, including the number of units sold and the total Amount.