Skip to content

Commit

Permalink
adding workflows and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
wesmadrigal committed Feb 28, 2024
1 parent 2e26ed9 commit 3fc2d41
Show file tree
Hide file tree
Showing 13 changed files with 259 additions and 13 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: graphreduce_actions

env:
DOCKER_IMAGE_NAME: graphreduce
DOCKER_IMAGE_TAG: latest

on: [push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
python setup.py install
- name: Python client tests
run: |
pytest test_graph_reduce.py -s
31 changes: 19 additions & 12 deletions graphreduce/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@
import pandas as pd
import dask.dataframe as dd
import pyspark
from structlog import get_logger

# internal
from graphreduce.node import GraphReduceNode


logger = get_logger('graphreduce.context')


def method_requires (
nodes: typing.List[GraphReduceNode] = [],
checkpoint: bool = False,
Expand Down Expand Up @@ -48,19 +52,22 @@ def newfunc(inst, *args, **kwargs):
else:
df = inst.df

# Some libraries don't like a ton of underscores
# So rename the function
fname = func.__name__
fname = fname.replace('_', '-')
name = f"{inst.__class__.__name__}_{fname}.{inst.fmt}"
# checkpoint
inst._storage_client.offload(
df,
name
)
path = inst._storage_client.get_path(name)
# reload
inst.df = inst._storage_client.load(path)
if hasattr(inst, '_checkpoints') and fname in inst._checkpoints:
return res
else:
name = f"{inst.__class__.__name__}_{fname}.{inst.fmt}"
# checkpoint
inst._storage_client.offload(
df,
name
)
path = inst._storage_client.get_path(name)
# reload
inst.df = inst._storage_client.load(path)

# add function to list of checkpoints.
inst._checkpoints.append(fname)
return res
return newfunc
return wrapit
24 changes: 24 additions & 0 deletions graphreduce/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class GraphReduceNode(metaclass=abc.ABCMeta):
label_period_unit : typing.Optional[PeriodUnit]
label_field: typing.Optional[str]
storage_client: typing.Optional[StorageClient]
batch_features: typing.List
online_features: typing.List
on_demand_features: typing.List

def __init__ (
self,
Expand All @@ -58,6 +61,7 @@ def __init__ (
spark_sqlctx : pyspark.sql.SQLContext = None,
columns : list = [],
storage_client: typing.Optional[StorageClient] = None,
checkpoints: list = [],
):
"""
Constructor
Expand Down Expand Up @@ -108,6 +112,8 @@ def __init__ (
self._storage_client = storage_client
# List of merged neighbor classes.
self._merged = []
# List of checkpoints.
self._checkpoints = []

if not self.date_key:
logger.warning(f"no `date_key` set for {self}")
Expand All @@ -129,6 +135,7 @@ def __str__ (
"""
return f"<GraphReduceNode: fpath={self.fpath} fmt={self.fmt}>"



def do_data (
self
Expand Down Expand Up @@ -466,6 +473,23 @@ def prep_for_labels (
return self.df


def online_features (
self,
):
"""
Define online features.
"""
pass

def on_demand_features (
self,
):
"""
Define on demand features for this node.
"""
pass




class DynamicNode(GraphReduceNode):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setuptools.setup(
name="graphreduce",
version = "1.5.9",
version = "1.6.2",
url="https://github.com/wesmadrigal/graphreduce",
packages = setuptools.find_packages(exclude=[ "docs", "examples" ]),
install_requires = [
Expand Down
Empty file added tests/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions tests/data/cust.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,name
1,wes
2,john
17 changes: 17 additions & 0 deletions tests/data/notification_interactions.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
id,notification_id,interaction_type_id,ts
1000,101,1500,2022-08-06
1001,101,1600,2022-08-07
1002,101,1700,2022-08-08
1003,102,1500,2023-01-01
1004,102,1600,2023-01-02
1005,102,1700,2023-01-03
1006,103,1500,2023-05-05
1007,103,1600,2023-05-06
1008,103,1700,2023-05-07
1009,104,1500,2023-06-01
1010,105,1500,2023-06-02
1011,106,1500,2023-06-03
1012,107,1500,2023-06-04
1013,108,1500,2023-06-15
1014,109,1500,2023-06-16
1015,110,1500,2023-06-17
16 changes: 16 additions & 0 deletions tests/data/notifications.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
id,customer_id,ts
101,1,2022-08-05
102,1,2023-01-01
103,1,2023-05-05
104,1,2023-06-01
105,1,2023-06-02
106,1,2023-06-23
107,2,2022-09-05
108,2,2022-11-01
109,2,2023-01-01
110,2,2023-07-01
111,1,2023-07-15
112,1,2023-07-18
113,1,2023-08-01
114,2,2023-07-18
115,2,2023-08-01
27 changes: 27 additions & 0 deletions tests/data/order_events.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
id,order_id,event_id
1,1,1
2,1,2
3,1,3
4,1,4
5,1,5
6,1,6
7,1,7
8,1,8
9,1,9
10,1,10
11,2,11
12,2,12
13,2,13
14,2,14
15,2,15
16,2,16
17,2,17
18,2,18
19,2,19
20,2,20
21,3,21
22,3,22
23,3,23
24,3,24
25,3,25
26,3,26
17 changes: 17 additions & 0 deletions tests/data/order_products.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
id,order_id,product_id
1,1,1
2,1,2
3,1,3
4,1,4
5,2,1
6,2,2
7,2,3
8,2,4
9,3,1
10,3,2
11,3,3
12,3,4
13,4,1
14,4,2
15,4,3
16,4,4
8 changes: 8 additions & 0 deletions tests/data/orders.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
id,customer_id,ts,amount
1,1,2023-05-12,10
2,1,2023-06-01,11.5
3,2,2023-01-01,100
4,2,2022-08-05,150
5,1,2023-07-01,325
6,2,2023-07-02,23
7,1,2023-07-14,12000
5 changes: 5 additions & 0 deletions tests/data/products.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,name,category
1,butter,food
2,body armor,defense
3,flour,food
4,catnip,pets
93 changes: 93 additions & 0 deletions tests/test_graph_reduce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python

import os

from graphreduce.node import GraphReduceNode, DynamicNode
from graphreduce.graph_reduce import GraphReduce
from graphreduce.enum import ComputeLayerEnum, PeriodUnit, StorageFormatEnum, ProviderEnum


data_path = '/'.join(os.path.abspath(__file__).split('/')[0:-1]) + '/data'


def test_node_instance():
node = DynamicNode(
fpath=os.path.join(data_path, 'cust.csv'),
fmt='csv',
prefix='cust',
compute_layer=ComputeLayerEnum.pandas,
date_key=None
)
assert isinstance(node, DynamicNode)


def test_get_data():
node = DynamicNode(
fpath=os.path.join(data_path, 'cust.csv'),
fmt='csv',
prefix='cust',
compute_layer=ComputeLayerEnum.pandas,
date_key=None
)
node.do_data()
print(node.df)
assert len(node.df) == 2


def test_filter_data():
node = DynamicNode(
fpath=os.path.join(data_path, 'cust.csv'),
fmt='csv',
prefix='cust',
compute_layer=ComputeLayerEnum.pandas,
date_key=None
)
node.do_data()
node.do_filters()
print(node.df)
assert len(node.df) == 2


def test_multi_node():

cust_node = DynamicNode(
fpath=os.path.join(data_path, 'cust.csv'),
fmt='csv',
prefix='cust',
date_key=None
)

order_node = DynamicNode(
fpath=os.path.join(data_path, 'orders.csv'),
fmt='csv',
prefix='ord',
date_key='ts'
)

gr = GraphReduce(
parent_node=cust_node,
fmt='csv',
compute_layer=ComputeLayerEnum.pandas,
dynamic_propagation=True,
compute_period_val=730
)
gr.add_node(cust_node)
gr.add_node(order_node)

assert len(gr) == 2

gr.add_entity_edge(
parent_node=cust_node,
relation_node=order_node,
parent_key='id',
relation_key='customer_id',
relation_type='parent_child',
reduce=True
)

gr.do_transformations()
print(gr.parent_node.df)
assert len(gr.parent_node.df) == 2



0 comments on commit 3fc2d41

Please sign in to comment.