| | import gradio as gr |
| | import numpy as np |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| | sns.set_style("darkgrid", |
| | {"grid.color": ".6", |
| | "grid.linestyle": ":"}) |
| | import category_encoders as ce |
| | from sklearn.decomposition import TruncatedSVD |
| | from sklearn.feature_extraction.text import TfidfVectorizer |
| | from sklearn.metrics.pairwise import cosine_similarity |
| | from sklearn.preprocessing import LabelEncoder |
| | from sklearn.preprocessing import OneHotEncoder |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | def predict_match(name, body_profile_type): |
| | user_df = {} |
| |
|
| | |
| | |
| |
|
| | user_df['age'] = 22.0 |
| | user_df['status'] = 1.0 |
| | user_df['sex'] = 0.0 |
| | user_df['height'] = 60.0 |
| | user_df['smokes'] = 1.0 |
| | user_df['new_languages'] = 2.0 |
| | user_df['body_profile'] = float(body_profile_type) |
| | user_df['education_level'] = 4.0 |
| | user_df['dropped_out'] = 0.0 |
| | user_df['bio'] = 'I am a foodie and traveller. But sometimes like to sit alone in a corner and read a good fiction.' |
| | user_df['location_preference'] = 2.0 |
| | user_df['num_languages'] = 2.0 |
| | user_df['drinks_encoded'] = 0.0 |
| | user_df['drugs_encoded'] = 0.0 |
| | |
| | user_df['location_new_york'] = 0.0 |
| | user_df['location_northern_california'] = 1.0 |
| | user_df['location_southern_california'] = 0.0 |
| | user_df['job_encoded'] = 4.0 |
| | user_df['pets_0'] = 1.0 |
| | user_df['pets_1'] = 1.0 |
| | user_df['pets_2'] = 1.0 |
| | user_df['pets_3'] = 1.0 |
| |
|
| | |
| | tfidf_df = pd.DataFrame(tfidf.transform([user_df['bio']]).toarray(), columns=feature_names) |
| |
|
| | |
| | |
| | user_df = pd.DataFrame(user_df, index=[0]) |
| | user_df.drop("bio", axis=1, inplace=True) |
| | user_df = pd.concat([user_df, tfidf_df], axis=1) |
| |
|
| | suggested_name = recommendOne(user_df) |
| |
|
| | |
| | return suggested_name |
| |
|
| | def greet_test(name, str2): |
| | return "Hello " + name + "!!" + " str2=" + str2 |
| |
|
| | |
| | |
| | tinder_df = pd.read_csv("tinder_data.csv") |
| |
|
| | |
| | tinder_df['num_languages'] = tinder_df['language']\ |
| | .str.count(',') + 1 |
| | tinder_df.drop(["language"], axis=1, inplace=True) |
| |
|
| | place_type_strength = { |
| | 'anywhere': 1.0, |
| | 'same state': 2.0, |
| | 'same city': 2.5 |
| | } |
| |
|
| | tinder_df['location_preference'] = \ |
| | tinder_df['location_preference']\ |
| | .apply(lambda x: place_type_strength[x]) |
| |
|
| | two_unique_values_column = { |
| | 'sex': {'f': 1, 'm': 0}, |
| | 'dropped_out': {'no': 0, 'yes': 1} |
| | } |
| |
|
| | tinder_df.replace(two_unique_values_column, |
| | inplace=True) |
| |
|
| | status_type_strength = { |
| | 'single': 2.0, |
| | 'available': 2.0, |
| | 'seeing someone': 1.0, |
| | 'married': 1.0 |
| | } |
| | tinder_df['status'] = tinder_df['status']\ |
| | .apply(lambda x: |
| | status_type_strength[x]) |
| |
|
| | |
| | orientation_encoder = LabelEncoder() |
| |
|
| | |
| | orientation_encoder.fit(tinder_df['orientation']) |
| |
|
| | |
| | tinder_df['orientation'] = orientation_encoder.\ |
| | transform(tinder_df['orientation']) |
| |
|
| | |
| | tinder_df.drop("orientation", axis=1, inplace=True) |
| |
|
| | drinking_habit = { |
| | 'socially': 'sometimes', |
| | 'rarely': 'sometimes', |
| | 'not at all': 'do not drink', |
| | 'often': 'drinks often', |
| | 'very often': 'drinks often', |
| | 'desperately': 'drinks often' |
| | } |
| | tinder_df['drinks'] = tinder_df['drinks']\ |
| | .apply(lambda x: |
| | drinking_habit[x]) |
| | |
| | habit_encoder = LabelEncoder() |
| |
|
| | |
| | habit_encoder.fit(tinder_df[['drinks', 'drugs']] |
| | .values.reshape(-1)) |
| |
|
| | |
| | |
| | tinder_df['drinks_encoded'] = \ |
| | habit_encoder.transform(tinder_df['drinks']) |
| | tinder_df['drugs_encoded'] = \ |
| | habit_encoder.transform(tinder_df['drugs']) |
| |
|
| | |
| | tinder_df.drop(["drinks", "drugs"], axis=1, |
| | inplace=True) |
| |
|
| | region_dict = {'southern_california': ['los angeles', |
| | 'san diego', 'hacienda heights', |
| | 'north hollywood', 'phoenix'], |
| | 'new_york': ['brooklyn', |
| | 'new york']} |
| |
|
| | def get_region(city): |
| | for region, cities in region_dict.items(): |
| | if city.lower() in [c.lower() for c in cities]: |
| | return region |
| | return "northern_california" |
| |
|
| |
|
| | tinder_df['location'] = tinder_df['location']\ |
| | .str.split(', ')\ |
| | .str[0].apply(get_region) |
| | |
| | location_encoder = OneHotEncoder() |
| |
|
| | |
| | location_encoded = location_encoder.fit_transform\ |
| | (tinder_df[['location']]) |
| |
|
| | |
| | location_encoded_df = pd.DataFrame(location_encoded.toarray()\ |
| | , columns=location_encoder.\ |
| | get_feature_names_out(['location'])) |
| |
|
| | |
| | tinder_df = pd.concat([tinder_df, location_encoded_df], axis=1) |
| | |
| | tinder_df.drop(["location"], axis=1, inplace=True) |
| |
|
| | |
| | job_encoder = LabelEncoder() |
| |
|
| | |
| | job_encoder.fit(tinder_df['job']) |
| |
|
| | |
| | tinder_df['job_encoded'] = job_encoder.\ |
| | transform(tinder_df['job']) |
| |
|
| | |
| | tinder_df.drop('job', axis=1, inplace=True) |
| |
|
| | smokes = { |
| | 'no': 1.0, |
| | 'sometimes': 0, |
| | 'yes': 0, |
| | 'when drinking':0, |
| | 'trying to quit':0 |
| | } |
| | tinder_df['smokes'] = tinder_df['smokes']\ |
| | .apply(lambda x: smokes[x]) |
| |
|
| | bin_enc = ce.BinaryEncoder(cols=['pets']) |
| |
|
| | |
| | pet_enc = bin_enc.fit_transform(tinder_df['pets']) |
| |
|
| | |
| | tinder_df = pd.concat([tinder_df, pet_enc], axis=1) |
| |
|
| | tinder_df.drop("pets",axis=1,inplace = True) |
| |
|
| | |
| | location_encoder = LabelEncoder() |
| |
|
| | |
| | location_encoder.fit(tinder_df['new_languages']) |
| |
|
| | |
| | tinder_df['new_languages'] = location_encoder.transform( |
| | tinder_df['new_languages']) |
| |
|
| | |
| | le = LabelEncoder() |
| |
|
| | |
| | tinder_df["body_profile"] = le.fit_transform(tinder_df["body_profile"]) |
| |
|
| | |
| | tfidf = TfidfVectorizer(stop_words='english') |
| |
|
| | |
| | tfidf_matrix = tfidf.fit_transform(tinder_df['bio']) |
| |
|
| | |
| | feature_names = tfidf.vocabulary_ |
| |
|
| | |
| | tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), |
| | columns=feature_names) |
| |
|
| | |
| | tinder_dfs = tinder_df.drop(["bio", "user_id", |
| | "username"], axis=1) |
| | tinder_dfs = pd.concat([tinder_dfs, |
| | tfidf_df], axis=1) |
| |
|
| | |
| | svd = TruncatedSVD(n_components=100) |
| | |
| |
|
| | svd_matrix = svd.fit_transform(tinder_dfs) |
| |
|
| |
|
| |
|
| | |
| | |
| | cosine_sim = cosine_similarity(svd_matrix) |
| |
|
| | def recommend(user_df, num_recommendations=5): |
| |
|
| | |
| | |
| | svd_matrixs = svd.transform(user_df) |
| |
|
| | |
| | |
| | cosine_sim_new = cosine_similarity(svd_matrixs, svd_matrix) |
| |
|
| | |
| | |
| | sim_scores = list(enumerate(cosine_sim_new[0])) |
| | sim_scores = sorted(sim_scores, |
| | key=lambda x: x[1], reverse=True) |
| | sim_indices = [i[0] for i in |
| | sim_scores[1:num_recommendations+1]] |
| |
|
| | |
| | return tinder_df['username'].iloc[sim_indices] |
| |
|
| | def recommendOne(user_df, num_recommendations=1): |
| |
|
| | |
| | |
| | svd_matrixs = svd.transform(user_df) |
| |
|
| | |
| | |
| | cosine_sim_new = cosine_similarity(svd_matrixs, svd_matrix) |
| |
|
| | |
| | |
| | sim_scores = list(enumerate(cosine_sim_new[0])) |
| | sim_scores = sorted(sim_scores, |
| | key=lambda x: x[1], reverse=True) |
| | sim_indices = [i[0] for i in |
| | sim_scores[1:num_recommendations+1]] |
| | ser = tinder_df['username'].iloc[sim_indices] |
| |
|
| | return pd.Series(ser[sim_indices[0]])[0] |
| |
|
| | |
| |
|
| | iface = gr.Interface(fn=predict_match, inputs=["text", "text"], outputs="text") |
| | iface.launch() |
| |
|