KEMBAR78
Data Science | PDF | Computer Programming
0% found this document useful (0 votes)
14 views7 pages

Data Science

The document contains several Python functions addressing various problems, including counting unique identical strings, finding the maximum repeating sequence, determining the longest alternating subsequence, and calculating maximum subarray cost. It also includes functions for analyzing sales data, such as revenue generation, sales percentages, and identifying unsold products in cities. Additionally, a simple Flask web application is provided for collecting user feedback.

Uploaded by

shdh78253
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views7 pages

Data Science

The document contains several Python functions addressing various problems, including counting unique identical strings, finding the maximum repeating sequence, determining the longest alternating subsequence, and calculating maximum subarray cost. It also includes functions for analyzing sales data, such as revenue generation, sales percentages, and identifying unsold products in cities. Additionally, a simple Flask web application is provided for collecting user feedback.

Uploaded by

shdh78253
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

# Unique identical strings

from typing import List

from collections import defaultdict

def solution(strs: List[str]) -> int:

# Dictionary to count occurrences of each unique character set

normalized_count = defaultdict(int)

# Normalize each string by considering only the unique characters

for s in strs:

normalized = ''.join(sorted(set(s))) # Sort the unique characters

normalized_count[normalized] += 1

# Calculate the number of identical pairs

identical_pairs = 0

for count in normalized_count.values():

if count > 1:

identical_pairs += count * (count - 1) // 2 # Combination formula

return identical_pairs

# Example usage:

strs1 = ["good", "god", "yarm", "bac", "aabc"]

print(solution(strs1)) # Expected output: 2

strs2 = ["cba", "nba", "dba"]

print(solution(strs2)) # Expected output: 0

# Max Repeating sequence

def maxRepeating(sequence: str, word: str) -> int:

count =0

while word *(count+1) in sequence:

count+=1

return count

maxRepeating("ababc", word= "ab")


# Longest Alternating string

def longest_alternating_subsequence(X):

if not X:

return 0

length = 1

for i in range(1, len(X)):

if X[i] != X[i - 1]: # Check if adjacent elements alternate

length += 1

return length

# Examples

X1 = [0, 1, 0, 1, 0]

X2 = [0]

print(longest_alternating_subsequence(X1)) # Output: 5

print(longest_alternating_subsequence(X2)) # Output: 1

#Maximum Subarray Cost

def max_subarray_cost(arr):

if not arr:

return 0

max_sum = float('-inf')

current_sum = 0

for num in arr:

current_sum += num

max_sum = max(max_sum, current_sum)

if current_sum < 0:

current_sum = 0

return max_sum ** 2

arr1 = [1, -1, 1, -1, 1]

arr2 = [1, 2, 3]

print(max_subarray_cost(arr1)) # Output: 1
print(max_subarray_cost(arr2)) # Output: 36

# Longest Common Prefix

def longest_common_prefix(input_string_lst):

if not input_string_lst:

return ""

prefix = input_string_lst[0]

for string in input_string_lst[1:]:

while not string.startswith(prefix):

prefix = prefix[:-1]

if not prefix:

return ""

return prefix

input_strings = ["relation", "renation", "restation"]

print("Longest Common Prefix:", longest_common_prefix(input_strings))

# Longest Postfix

def longest_common_postfix(input_string_lst):

if not input_string_lst:

return ""

suffix = input_string_lst[0]

for string in input_string_lst[1:]:

while not string.endswith(suffix):

suffix = suffix[1:]

if not suffix:

return ""

return suffix

input_strings = ["relation", "nation", "station"]

print("Longest Common Postfix:", longest_common_postfix(input_strings))


# EDA Future Engineering

# Question 1: How much revenue was generated from the three top-selling product
categories?

sales_data['Sales']=sales_data['Sales'].replace('[\$,]','',regex=True).astype(float)

top_categories_revenue = (df.groupby('Product Category')['Sales'].sum().nlargest(3))

print("Revenue from top 3 product categories:\n", top_categories_revenue)

print("Total revenue from top 3 categories: $", top_categories_revenue.sum())

# Question 2: What percentage of overall Sales do each of the categories contribute in


2019?

df['Order Date'] = pd.to_datetime(df['Order Date'])

df_2019 = df[df['Order Date'].dt.year == 2019]

category_sales_2019 = df_2019.groupby('Product Category')['Sales'].sum()

total_sales_2019 = category_sales_2019.sum()

category_sales_percentage = (category_sales_2019 / total_sales_2019) * 100

print("Percentage of overall sales by category in 2019:\n", category_sales_percentage)

# Question 3: Which product category shows the widest price range of products?

df['Price Each'] = df['Price Each'].replace({'\$': '', ',': ''}, regex=True).astype(float)

price_range = (df.groupby('Product Category')['Price Each'].agg(lambda x: x.max() - x.min()))

widest_price_range_category = price_range.idxmax()

print("Category with widest price range:", widest_price_range_category)

print("Price range for", widest_price_range_category, ":", price_range.max(), "$")

# Question 4: Plot the Sales trend for iPhone on a Monthly basis.

import matplotlib.pyplot as plt

df['Month'] = df['Order Date'].dt.to_period('M') # Extract Month

iphone_sales = df[df['Product'] == 'iPhone'].groupby('Month')['Sales'].sum()

iphone_sales.plot(kind='line', marker='o', figsize=(10, 6), title="Monthly iPhone Sales Trend")

plt.ylabel("Sales")

plt.xlabel("Month")

plt.show()
# Question 5: For every city, list the products that have not yet been sold in that city.

all_products = set(df['Product'].unique())

city_products = df.groupby('City')['Product'].apply(set)

unsold_products = {city: all_products - products for city, products in city_products.items()}

for city, products in unsold_products.items():

print(f"Unsold products in {city}: {products}")

# Question 6: List the top 3 cities in terms of the number of orders for each time of the day.

top_cities_time_of_day = (df.groupby(['Time of Day', 'City'])['OrderID']

.count().reset_index(name='Order Count'))

top_cities = (top_cities_time_of_day.groupby('Time of Day').apply(lambda x: x.nlargest

(3, 'Order Count')).reset_index(drop=True))

print("Top 3 cities for each time of day:\n", top_cities)

# Question 7: Find the top 3 customers by Sales.

top_customers = (df.groupby('Customer_ID')['Sales'].sum().nlargest(3))

print("Top 3 customers by sales:\n", top_customers)

# Simple Web Page

from flask import Flask, request

from markupsafe import escape

app = Flask(__name__)

# Homepage route

@app.route('/')

def index():

return '''

<form action="/submit" method="post">

Name: <input type="text" name="name"><br>

Email: <input type="text" name="email"><br>

Feedback: <textarea name="feedback"></textarea><br>

<input type="submit" value="Submit">


</form>

'''

# Feedback submission handler

@app.route('/submit', methods=['POST'])

def submit():

name = request.form['name']

email = request.form['email']

feedback = request.form['feedback']

if not name or not email or not feedback:

return("All Fields Required. Kindly fill the Name , Email and feedback")

name = escape(name)

email=escape(email)

feedback = escape(feedback)

return f'''

<h1>Thank You for Your Feedback!</h1>

<p><strong>Name:</strong> {name}</p>

<p><strong>Email:</strong> {email}</p>

<p><strong>Feedback:</strong> {feedback}</p>

'''

@app.errorhandler(404)

def page_not_found(e):

error_page = '''

<h1> 404 page not found <h1>

'''

return error_page

if __name__ == '__main__':

app.run(debug=True)

You might also like