from flask import Flask, render_template, request, redirect, url_for import pandas as pd from fuzzywuzzy import fuzz import os from flask import send_file app = Flask(__name__) app.config['UPLOAD_FOLDER'] = 'uploads' app.config['OUTPUT_FOLDER'] = 'output' output_file = None def process_csv(input_path): global output_file df = pd.read_csv(input_path) df['Address'] = df['Address (street)'].astype(str) + '-' + df['Postal code'].astype(str) + '-' + df['City'].astype( str) + '-' + df['Country'].astype(str) + df['Region'].astype(str) # df['Name'] = df['NAME_FIRST'].astype(str) + '-' + df['NAME_LAST'].astype(str) + '-' + df['NAME3'].astype(str) + '-' + df['NAME4'].astype(str) df['Name'] = df['Vendor Name'].astype(str) df['Name'] = df['Name'].str.lower() df['Address'] = df['Address'].str.lower() df.sort_values(['Name'], inplace=True) df = df.reset_index(drop=True) df['name_fuzzy_ratio'] = '' df['address_fuzzy_ratio'] = '' df['name_based_group'] = '' df['address_based_group'] = '' last_row_index = len(df) - 1 df.at[0, 'name_fuzzy_ratio'] = 100 df.at[0, 'address_fuzzy_ratio'] = 100 df.at[last_row_index, 'name_fuzzy_ratio'] = 100 df.at[last_row_index, 'address_fuzzy_ratio'] = 100 for i in range(1, last_row_index): current_name = df['Name'].iloc[i] previous_name = df['Name'].iloc[i - 1] fuzzy_ratio = fuzz.ratio(previous_name, current_name) df.at[i, 'name_fuzzy_ratio'] = fuzzy_ratio df['name_fuzzy_ratio'] = pd.to_numeric(df['name_fuzzy_ratio'], errors='coerce') group_counter = 1 df.at[0, 'name_based_group'] = group_counter for i in range(1, len(df)): if df.at[i, 'name_fuzzy_ratio'] > 80: df.at[i, 'name_based_group'] = df.at[i - 1, 'name_based_group'] else: group_counter += 1 df.at[i, 'name_based_group'] = group_counter group = df.at[0, 'name_based_group'] df.sort_values(['name_based_group', 'Address'], inplace=True) df = df.reset_index(drop=True) for i in range(1, last_row_index): current_address = df['Address'].iloc[i] previous_address = df['Address'].iloc[i - 1] fuzzy_ratio = fuzz.ratio(previous_address, current_address) df.at[i, 'address_fuzzy_ratio'] = fuzzy_ratio df['address_fuzzy_ratio'] = pd.to_numeric(df['address_fuzzy_ratio'], errors='coerce') address_group_counter = 1 df.at[0, 'address_based_group'] = str(address_group_counter) for i in range(1, len(df)): if df.at[i, 'address_fuzzy_ratio'] > 70: df.at[i, 'address_based_group'] = df.at[i - 1, 'address_based_group'] else: if df.at[i, 'name_based_group'] != group: address_group_counter = 1 group = df.at[i, 'name_based_group'] else: address_group_counter += 1 df.at[i, 'address_based_group'] = str(address_group_counter) # Concatenate for unique group name df['Group'] = df.apply(lambda row: 'Group_{}_{}'.format(row['name_based_group'], row['address_based_group']), axis=1) columns_to_drop = ['name_fuzzy_ratio', 'address_fuzzy_ratio', 'Address', 'Name'] df.drop(columns=columns_to_drop, inplace=True) output_path = os.path.join(app.config['OUTPUT_FOLDER'], 'output.csv') df.to_csv(output_path, index=False) output_file = 'output.csv' return output_path @app.route('/', methods=['GET', 'POST']) def upload_file(): global output_file if request.method == 'POST': file = request.files['file'] if file: file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename) file.save(file_path) output_file = process_csv(file_path) return redirect(url_for('upload_file')) return render_template('index.html', output_file=output_file) @app.route('/downloads/output.csv') def download_file(): output_file = os.path.join(app.config['OUTPUT_FOLDER'], 'output.csv') return send_file(output_file, as_attachment=True) if __name__ == '__main__': app.run(debug=True)