Hanukkah of Data

Author

François Michonneau

Published

March 1, 2023

Intro

Let’s get familiar with the data.

import ibis
from ibis import _
print("Using ibis version: ", ibis.__version__)
ibis.options.interactive = True

con = ibis.sqlite.connect("noahs.sqlite")

customers = con.table('customers')
products = con.table('products')
orders = con.table('orders')
orders_items = con.table('orders_items')

customers.head()
Using ibis version:  4.1.0
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ customerid  name              address          citystatezip             birthdate   phone        ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ int32stringstringstringstringstring       │
├────────────┼──────────────────┼─────────────────┼─────────────────────────┼────────────┼──────────────┤
│       1001 │ Jack Quinn       │ 201 E Park St   │ Los Angeles, CA 91343   │ 1960-05-14 │ 805-287-8515 │
│       1002 │ David Powell     │ 224C Tysens Ln  │ Staten Island, NY 10306 │ 1978-04-04 │ 516-768-1652 │
│       1003 │ Carrie Green     │ 1608 W 53rd Way │ Tampa, FL 33614         │ 1969-01-21 │ 727-209-0470 │
│       1004 │ Steven Miller    │ 178½ E 7th St   │ Manhattan, NY 10009     │ 1953-08-17 │ 607-941-9563 │
│       1005 │ Christine Powers │ 270 W 242nd St  │ Bronx, NY 10463         │ 1983-06-06 │ 212-759-9043 │
└────────────┴──────────────────┴─────────────────┴─────────────────────────┴────────────┴──────────────┘
products.head()
/home/francois/.local/lib/python3.10/site-packages/ibis/backends/base/sql/alchemy/__init__.py:125: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.
  yield con.execute(*args, **kwargs)
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ sku      desc                                     wholesale_cost ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ stringstringdecimal(10, 2) │
├─────────┼─────────────────────────────────────────┼────────────────┤
│ DLI0002 │ Smoked Whitefish Sandwich               │           9.33 │
│ PET0005 │ Vegan Cat Food, Turkey & Chicken        │           4.35 │
│ HOM0018 │ Power Radio (red)                       │          21.81 │
│ KIT0034 │ Azure Ladle                             │           2.81 │
│ PET0041 │ Gluten-free Cat Food, Pumpkin & Pumpkin │           4.60 │
└─────────┴─────────────────────────────────────────┴────────────────┘
orders.head()
/home/francois/.local/lib/python3.10/site-packages/ibis/backends/base/sql/alchemy/__init__.py:125: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.
  yield con.execute(*args, **kwargs)
TypeError: must be real number, not str

TypeError: must be real number, not str
orders_items.head()
/home/francois/.local/lib/python3.10/site-packages/ibis/backends/base/sql/alchemy/__init__.py:125: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.
  yield con.execute(*args, **kwargs)
┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ orderid  sku      qty    unit_price     ┃
┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ int32stringint32decimal(10, 2) │
├─────────┼─────────┼───────┼────────────────┤
│    1001 │ COL0820 │     125.52 │
│    1002 │ TOY8907 │     112.92 │
│    1002 │ KIT5813 │     17.99 │
│    1002 │ KIT3981 │     27.21 │
│    1003 │ KIT7098 │     112.53 │
└─────────┴─────────┴───────┴────────────────┘

There is definitely something weird with the data types provided in the SQLite database, so let’s use pyarrow to create parquet files from the CSV and check that the data types are what is expected.

from pyarrow import csv
import pyarrow.parquet as pq

customers = csv.read_csv("noahs-customers.csv")
pq.write_table(customers, "noahs-customers.parquet")
orders = csv.read_csv("noahs-orders.csv")
pq.write_table(orders, "noahs-orders.parquet")
products = csv.read_csv("noahs-products.csv")
pq.write_table(products, "noahs-products.parquet")
items_orders = csv.read_csv("noahs-orders_items.csv")
pq.write_table(items_orders, "noahs-orders_items.parquet")
con =  ibis.connect("duckdb://")
customers = con.read_parquet('noahs-customers.parquet')
products = con.read_parquet('noahs-products.parquet')
orders_items = con.read_parquet('noahs-orders_items.parquet')
orders = con.read_parquet('noahs-orders.parquet')

Day 1

We need to convert the last names of the customers and find one that matches their phone number.

(
    customers
    .mutate(last_name = _.name.re_extract(r"(.+)\s(.+)", 2).lower())
    .filter(_.last_name.length() == 10)
    .mutate(converted = _.last_name.re_replace("[abc]", "2")
                        .re_replace("[def]", "3")
                        .re_replace("[ghi]", "4")
                        .re_replace("[jkl]", "5")
                        .re_replace("[mno]", "6")
                        .re_replace("[pqrs]","7")
                        .re_replace("[tuv]", "8")
                        .re_replace("[wxyz]","9")
           )
    .mutate(edit_phone = _.phone.re_replace("-", ""))
    .filter(_.edit_phone == _.converted)
    .select(_.name, _.phone, _.converted, _.edit_phone)
)
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ name            phone         converted   edit_phone ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━┩
│ stringstringstringstring     │
├────────────────┼──────────────┼────────────┼────────────┤
│ Sam Guttenberg │ 488-836-2374 │ 4888362374 │ 4888362374 │
└────────────────┴──────────────┴────────────┴────────────┘

Day 2

We are looking for customers who have bought coffee/bagels, in 2017, that have the initals JD.

(
    products.filter(_.desc.lower().contains("coffee, drip"))
)
┏━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ sku      desc          wholesale_cost ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ stringstringfloat64        │
├─────────┼──────────────┼────────────────┤
│ DLI1464 │ Coffee, Drip │           6.21 │
└─────────┴──────────────┴────────────────┘
(
    products.filter(_.desc.lower().contains("bagel"))
)
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ sku      desc           wholesale_cost ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ stringstringfloat64        │
├─────────┼───────────────┼────────────────┤
│ BKY4234 │ Caraway Bagel │           6.23 │
│ BKY5887 │ Sesame Bagel  │           6.38 │
└─────────┴───────────────┴────────────────┘
orders_items.head()
┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
┃ orderid  sku      qty    unit_price ┃
┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
│ int64stringint64float64    │
├─────────┼─────────┼───────┼────────────┤
│    1001 │ COL0820 │     125.52 │
│    1002 │ TOY8907 │     112.92 │
│    1002 │ KIT5813 │     17.99 │
│    1002 │ KIT3981 │     27.21 │
│    1003 │ KIT7098 │     112.53 │
└─────────┴─────────┴───────┴────────────┘
(
    orders_items.filter(_.sku.isin(['DLI1464', 'BKY4234', 'BKY5887']))
    .select(_.orderid)
    .distinct()
    .left_join(orders, _.orderid == orders.orderid)
    .select(_.orderid_x, _.customerid, _.ordered, _.total)
    .mutate(year = _.ordered.strftime('%Y'))
    .filter(_.year == '2017')
    .left_join(customers, _.customerid == customers.customerid)
    .select(_.orderid_x, _.total, _.customerid_y, _.name, _.phone, _.address, _.citystatezip)
    .filter(_.name.re_search('J(.+)\sD(.+)'))
)
┏━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ orderid_x  total    customerid_y  name          phone         address           citystatezip              ┃
┡━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ int64float64int64stringstringstringstring                    │
├───────────┼─────────┼──────────────┼──────────────┼──────────────┼──────────────────┼───────────────────────────┤
│      740933.224164 │ Jeremy Davis │ 212-771-8924 │ 134-10 Foch Blvd │ South Ozone Park, NY 114… │
└───────────┴─────────┴──────────────┴──────────────┴──────────────┴──────────────────┴───────────────────────────┘
orders_items.filter(_.orderid == 7409)
┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
┃ orderid  sku      qty    unit_price ┃
┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
│ int64stringint64float64    │
├─────────┼─────────┼───────┼────────────┤
│    7409 │ DLI1464 │     17.73 │
│    7409 │ KIT5861 │     112.53 │
│    7409 │ HOM8601 │     14.48 │
│    7409 │ BKY5887 │     18.48 │
└─────────┴─────────┴───────┴────────────┘

Day 3

We are looking for someone born in the year of the dog (comes back every 12 years: 2030, 2018, 2006, 1994, 1982, 1970, 1958), is an Aries (born between March 21 and April 19), and lives in South Ozone Park (see previous answer).

# Aries born in the year of the Dog
# year of the dog comes back every 12 years: 2030, 2018, 2006, 1994, 1982, 1970, 1958
# Aries: March 21 to Apr 19


# range for the birth days
customers.filter((_.birthdate == _.birthdate.min()) | (_.birthdate == _.birthdate.max()))
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ customerid  name           address           citystatezip        birthdate   phone        ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ int64stringstringstringdatestring       │
├────────────┼───────────────┼──────────────────┼────────────────────┼────────────┼──────────────┤
│       2658 │ Timothy Evans │ 335½ 20th St     │ Brooklyn, NY 11215 │ 1935-02-14 │ 838-950-3946 │
│       7499 │ Barry Barnes  │ 2758 Mermaid Ave │ Brooklyn, NY 11224 │ 2000-12-30 │ 516-305-8067 │
└────────────┴───────────────┴──────────────────┴────────────────────┴────────────┴──────────────┘
dog_years = ["1994", "1982", "1970", "1958", "1936"]

(
    customers
    .mutate(year = _.birthdate.year())
    .filter(_.year.isin(dog_years))
    .filter(
        ((_.birthdate.month() == 3) & (_.birthdate.day() >= 21)) |
        ((_.birthdate.month() == 4) & (_.birthdate.day() <= 19))
    )
    .filter(_.citystatezip.ilike("south ozone park%"))
)
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━┓
┃ customerid  name          address          citystatezip                birthdate   phone         year  ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━┩
│ int64stringstringstringdatestringint32 │
├────────────┼──────────────┼─────────────────┼────────────────────────────┼────────────┼──────────────┼───────┤
│       2274 │ Brent Nguyen │ 109-19 110th St │ South Ozone Park, NY 11420 │ 1958-03-25 │ 516-636-7397 │  1958 │
└────────────┴──────────────┴─────────────────┴────────────────────────────┴────────────┴──────────────┴───────┘

Day 4

We are looking for someone who buys regularly baked good between 4 and 5am.

(
    products.filter(_.sku.startswith("BKY"))
    .left_join(orders_items, orders_items.sku == _.sku)
    .select(_.orderid)
    .left_join(orders, orders.orderid == _.orderid)
    .filter((_.ordered.hour() >= 4) & (_.ordered.hour() < 5))
    .group_by(_.customerid)
    .aggregate(n_visits = _.count())
    .filter(_.n_visits == _.n_visits.max())
    .join(customers, customers.customerid == _.customerid)
)
┏━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ customerid  n_visits  name              address           citystatezip     birthdate   phone        ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ int64int64stringstringstringdatestring       │
├────────────┼──────────┼──────────────────┼──────────────────┼─────────────────┼────────────┼──────────────┤
│       537510 │ Christina Booker │ 1127 Grinnell Pl │ Bronx, NY 10474 │ 1981-01-08 │ 718-649-9036 │
└────────────┴──────────┴──────────────────┴──────────────────┴─────────────────┴────────────┴──────────────┘

Day 5

We are looking for someone who buys a lot of cat food for their old cats and lives in Queens Village.

(
    products.filter(_.desc.lower().contains("senior cat"))
)
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ sku      desc                                           wholesale_cost ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ stringstringfloat64        │
├─────────┼───────────────────────────────────────────────┼────────────────┤
│ PET0096 │ Wet Senior Cat Food, Turkey & Shrimp          │           3.11 │
│ PET0304 │ Gluten-free Senior Cat Food, Chicken & Turkey │           4.24 │
│ PET0925 │ Senior Cat Food                               │           3.84 │
│ PET0977 │ Vegan Senior Cat Food, Chicken & Beef         │           4.36 │
│ PET1090 │ Vegan Senior Cat Food, Tuna & Duck            │           4.39 │
│ PET1220 │ Dry Senior Cat Food, Salmon & Shrimp          │           3.51 │
│ PET1536 │ Dry Senior Cat Food, Chicken & Chicken        │           2.26 │
│ PET1849 │ Vegan Senior Cat Food, Duck & Pumpkin         │           4.65 │
│ PET1909 │ Vegan Senior Cat Food, Chicken & Salmon       │           4.60 │
│ PET1919 │ Gluten-free Senior Cat Food, Shrimp & Tuna    │           4.23 │
│  │
└─────────┴───────────────────────────────────────────────┴────────────────┘
## let's find out who buys senior cat food
(
    products.filter(_.desc.lower().contains("senior cat"))
    .left_join(orders_items, orders_items.sku == _.sku)
    .select(_.orderid)
    .left_join(orders, orders.orderid == _.orderid)
    .select(_.customerid)
    .distinct()
    .left_join(customers, customers.customerid == _.customerid)
    .filter(_.citystatezip.contains("Queens Village"))
)
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ customerid_x  customerid_y  name        address          citystatezip            birthdate   phone        ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ int64int64stringstringstringdatestring       │
├──────────────┼──────────────┼────────────┼─────────────────┼────────────────────────┼────────────┼──────────────┤
│         54865486 │ Sam House  │ 220-8 111th Ave │ Queens Village, NY 11… │ 1955-11-10 │ 607-836-2966 │
│         76757675 │ Anita Koch │ 106-51 214th St │ Queens Village, NY 11… │ 1955-11-14 │ 315-492-7411 │
└──────────────┴──────────────┴────────────┴─────────────────┴────────────────────────┴────────────┴──────────────┘
## it's not quite enough, let's compare how much they buy
(
    products.filter(_.desc.lower().contains("senior cat"))
    .left_join(orders_items, orders_items.sku == _.sku)
    .select(_.orderid)
    .left_join(orders, orders.orderid == _.orderid)
    .group_by(_.customerid)
    .aggregate(_.total.sum().name("total_catfood"))
    .left_join(customers, customers.customerid == _.customerid)
    .filter(_.citystatezip.contains("Queens Village"))
    .select(_.customerid_x, _.name, _.total_catfood, _.phone)    
)
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ customerid_x  name        total_catfood  phone        ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ int64stringfloat64string       │
├──────────────┼────────────┼───────────────┼──────────────┤
│         5486 │ Sam House  │        231.88 │ 607-836-2966 │
│         7675 │ Anita Koch │        699.98 │ 315-492-7411 │
└──────────────┴────────────┴───────────────┴──────────────┘

Day 6

We are looking for someone who minimizes the profit on their purchases at Noah’s Market.

(
    orders_items
    .left_join(products, products.sku == _.sku)
    .mutate(profit = _.unit_price - _.wholesale_cost)
    .group_by(_.orderid)
    .aggregate(_.profit.sum().name("total_profit_per_order"))
    .left_join(orders, orders.orderid == _.orderid)
    .group_by(_.customerid)
    .aggregate(_.total_profit_per_order.sum().name("total_profit_per_customer"))
    .filter(_.total_profit_per_customer == _.total_profit_per_customer.min())
    .join(customers, customers.customerid == _.customerid)
    .select(_.customerid, _.name, _.phone, _.total_profit_per_customer)
)
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ customerid  name            phone         total_profit_per_customer ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ int64stringstringfloat64                   │
├────────────┼────────────────┼──────────────┼───────────────────────────┤
│       8342 │ Emily Randolph │ 914-868-0316 │                   -313.16 │
└────────────┴────────────────┴──────────────┴───────────────────────────┘

Day 7

We are looking for people who are buying the same product but in different colors (colors are indicated in parentheses in the descriptions), and that have made purchases one after the other (they were following each other in line).

(
    products
    .filter(_.desc.re_search('.+\s\(.+\)$')) ## colors are noted in parenthesis
    .left_join(orders_items, orders_items.sku == _.sku)
    .select(_.orderid, _.desc)
    .left_join(orders, orders.orderid == _.orderid)
    .order_by(_.ordered)
    .mutate(
        lag_customerid = _.customerid.lag(),
        lag_ordered = _.ordered.lag()
    )
    .filter(_.lag_customerid == 8342)
    .select(_.orderid_x, _.desc, _.customerid, _.ordered, _.lag_customerid, _.lag_ordered)
)
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃ orderid_x  desc                       customerid  ordered              lag_customerid  lag_ordered         ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ int64stringint64timestampint64timestamp           │
├───────────┼───────────────────────────┼────────────┼─────────────────────┼────────────────┼─────────────────────┤
│     12625 │ Automatic Mixer (white)   │       18652017-05-28 09:15:4783422017-05-28 09:15:06 │
│     86372 │ Electric Machine (purple) │       88352019-06-01 12:50:2583422019-06-01 12:50:16 │
│     94040 │ Electric Toaster (blue)   │       83422019-08-18 14:20:2883422019-08-18 14:20:28 │
│     94044 │ Noah's Gift Box (green)   │      108802019-08-18 14:57:1983422019-08-18 14:20:28 │
│     96223 │ Super Widget (white)      │      119422019-09-09 16:53:2283422019-09-09 16:30:05 │
│    154876 │ Electric Crockpot (purpl… │       83422021-04-17 19:52:2883422021-04-17 19:52:28 │
│    154879 │ Jigsaw Puzzle (Moses and… │       24762021-04-17 20:22:3283422021-04-17 19:52:28 │
│    200952 │ Noah's Jersey (puce)      │       78802022-07-26 09:48:3283422022-07-26 08:18:05 │
└───────────┴───────────────────────────┴────────────┴─────────────────────┴────────────────┴─────────────────────┘
# I need to figure out how to do the time difference because here I'm doing it by eye
# and ideally we need to compare the content of their orders and remove lines from the same order
# It's probably not in 2017 because that was the beginning of the story
customers.filter(_.customerid == 8835)
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ customerid  name            address        citystatezip             birthdate   phone        ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ int64stringstringstringdatestring       │
├────────────┼────────────────┼───────────────┼─────────────────────────┼────────────┼──────────────┤
│       8835 │ Jonathan Adams │ 644 Targee St │ Staten Island, NY 10304 │ 1975-08-26 │ 315-618-5263 │
└────────────┴────────────────┴───────────────┴─────────────────────────┴────────────┴──────────────┘

Day 8

We are looking for someone who has purchased lots of collectible (prefix with COL in the products table).

(
    products.filter(_.sku.lower().contains("col"))
    .left_join(orders_items, orders_items.sku == _.sku)
    .select(_.orderid, _.sku_x)
    .left_join(orders, orders.orderid == _.orderid)
    .group_by(_.customerid)
    .aggregate(n_collect = _.count())
    .order_by(ibis.desc(_.n_collect))
    .left_join(customers, customers.customerid == _.customerid)
    .select(_.name, _.phone, _.n_collect)
)
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃ name               phone         n_collect ┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ stringstringint64     │
├───────────────────┼──────────────┼───────────┤
│ Travis Bartlett   │ 929-906-5980 │       111 │
│ Justin Jimenez    │ 516-722-4758 │        31 │
│ Joshua Smith      │ 914-243-3254 │        30 │
│ Tyler Hickman     │ 585-307-8945 │        27 │
│ Rachel Richardson │ 212-778-9867 │        26 │
│ Kim Yang          │ 516-399-6228 │        26 │
│ Joshua Parks      │ 680-594-9693 │        26 │
│ Amber Porter      │ 838-307-4383 │        26 │
│ Chelsea Marsh     │ 315-565-2714 │        26 │
│ Jennifer Daniels  │ 516-994-3023 │        26 │
│  │
└───────────────────┴──────────────┴───────────┘