From 5cd4ff9d0183233bc4bc079e7c181372eef636e7 Mon Sep 17 00:00:00 2001 From: Ty Dunn Date: Mon, 5 Jun 2023 13:27:09 +0200 Subject: add transform to dlt pipeline reicpe --- .../recipes/AddTransformRecipe/README.md | 0 .../continuedev/recipes/AddTransformRecipe/main.py | 23 +++++ .../recipes/AddTransformRecipe/steps.py | 105 +++++++++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 continuedev/src/continuedev/recipes/AddTransformRecipe/README.md create mode 100644 continuedev/src/continuedev/recipes/AddTransformRecipe/main.py create mode 100644 continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md new file mode 100644 index 00000000..e69de29b diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py new file mode 100644 index 00000000..974336cf --- /dev/null +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py @@ -0,0 +1,23 @@ +from textwrap import dedent + +from ...core.main import Step +from ...core.sdk import ContinueSDK +from ...steps.core.core import WaitForUserInputStep +from ...steps.main import MessageStep +from .steps import SetupPipelineStep, ValidatePipelineStep + + +class AddTransformRecipe(Step): + hide: bool = True + + async def run(self, sdk: ContinueSDK): + await sdk.run_step( + MessageStep(message=dedent("""\ + This recipe will walk you through the process of adding a transform to a dlt pipeline for your chosen data source. With the help of Continue, you will: + - X + - Y + - Z""")) >> + WaitForUserInputStep(prompt="What API do you want to load data from?") >> + SetupPipelineStep(api_description="WeatherAPI.com API") >> + ValidatePipelineStep() + ) diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py new file mode 100644 index 00000000..c6059627 --- /dev/null +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py @@ -0,0 +1,105 @@ +from textwrap import dedent + +from ...steps.main import MessageStep +from ...core.sdk import Models +from ...core.observation import DictObservation +from ...models.filesystem_edit import AddFile +from ...core.main import Step +from ...core.sdk import ContinueSDK + + +""" +https://dlthub.com/docs/general-usage/resource#filter-transform-and-pivot-data + +Using chess pipeline we show how to add map and filter Python transforms. +Example: https://dlthub.com/docs/customizations/customizing-pipelines/renaming_columns +- dlt init chess duckdb +- python chess.py +- write a transform function: ideas for transform functions: using chess Python library decode the moves OR filter out certain games +- use add_map or add_filter +- run python and streamlit app +""" + +class SetupPipelineStep(Step): + hide: bool = True + name: str = "Setup dlt Pipeline" + + api_description: str # e.g. "I want to load data from the weatherapi.com API" + + async def describe(self, models: Models): + return dedent(f"""\ + This step will create a new dlt pipeline that loads data from an API, as per your request: + {self.api_description} + """) + + async def run(self, sdk: ContinueSDK): + source_name = (await sdk.models.gpt35()).complete( + f"Write a snake_case name for the data source described by {self.api_description}: ").strip() + filename = f'{source_name}.py' + + # running commands to get started when creating a new dlt pipeline + await sdk.run([ + 'python3 -m venv env', + 'source env/bin/activate', + 'pip install dlt', + f'dlt init {source_name} duckdb', + 'Y', + 'pip install -r requirements.txt' + ]) + + # editing the resource function to call the requested API + await sdk.edit_file( + filename=filename, + prompt=f'Edit the resource function to call the API described by this: {self.api_description}' + ) + + # wait for user to put API key in secrets.toml + await sdk.ide.setFileOpen(await sdk.ide.getWorkspaceDirectory() + "/.dlt/secrets.toml") + await sdk.wait_for_user_confirmation("If this service requires an API key, please add it to the `secrets.toml` file and then press `Continue`") + return DictObservation(values={"source_name": source_name}) + + +class ValidatePipelineStep(Step): + hide: bool = True + + async def run(self, sdk: ContinueSDK): + source_name = sdk.history.last_observation().values["source_name"] + filename = f'{source_name}.py' + + await sdk.run_step(MessageStep(message=dedent("""\ + This step will validate that your dlt pipeline is working as expected: + - Test that the API call works + - Load the data into a local DuckDB instance + - Write a query to view the data + """))) + + # test that the API call works + await sdk.run(f'python3 {filename}') + + # remove exit() from the main main function + await sdk.edit_file( + filename=filename, + prompt='Remove exit() from the main function' + ) + + # load the data into the DuckDB instance + await sdk.run(f'python3 {filename}') + + table_name = f"{source_name}.{source_name}_resource" + tables_query_code = dedent(f'''\ + import duckdb + + # connect to DuckDB instance + conn = duckdb.connect(database="{source_name}.duckdb") + + # get table names + rows = conn.execute("SELECT * FROM {table_name};").fetchall() + + # print table names + for row in rows: + print(row) + ''') + + query_filename = (await sdk.ide.getWorkspaceDirectory()) + "/query.py" + await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code)) + await sdk.run('env/bin/python3 query.py') -- cgit v1.2.3-70-g09d2 From 8d67b7ea38811957e23d3b566e5c0fce394588d5 Mon Sep 17 00:00:00 2001 From: Ty Dunn Date: Mon, 5 Jun 2023 17:59:35 +0200 Subject: more add transform progress --- .../recipes/AddTransformRecipe/README.md | 3 + .../continuedev/recipes/AddTransformRecipe/main.py | 11 +- .../recipes/AddTransformRecipe/steps.py | 194 +++++++++++++++++---- 3 files changed, 165 insertions(+), 43 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md index e69de29b..9ad49a5f 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md @@ -0,0 +1,3 @@ +# AddTransformRecipe + +Uses the Chess.com API example to show how to add map and filter Python transforms to a dlt pipeline. \ No newline at end of file diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py index 974336cf..0fd96930 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py @@ -4,7 +4,7 @@ from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep from ...steps.main import MessageStep -from .steps import SetupPipelineStep, ValidatePipelineStep +from .steps import SetUpChessPipelineStep, AddTransformStep class AddTransformRecipe(Step): @@ -13,11 +13,12 @@ class AddTransformRecipe(Step): async def run(self, sdk: ContinueSDK): await sdk.run_step( MessageStep(message=dedent("""\ - This recipe will walk you through the process of adding a transform to a dlt pipeline for your chosen data source. With the help of Continue, you will: + This recipe will walk you through the process of adding a transform to a dlt pipeline that uses the chess.com API source. With the help of Continue, you will: - X - Y - Z""")) >> - WaitForUserInputStep(prompt="What API do you want to load data from?") >> - SetupPipelineStep(api_description="WeatherAPI.com API") >> - ValidatePipelineStep() + + SetUpChessPipelineStep() >> + WaitForUserInputStep(prompt="How do you want to transform the chess.com API data before loading it? For example, you could use the `python-chess` library to decode the moves or filter out certain games") >> + AddTransformStep() ) diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py index c6059627..8ab3eda1 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py @@ -11,7 +11,7 @@ from ...core.sdk import ContinueSDK """ https://dlthub.com/docs/general-usage/resource#filter-transform-and-pivot-data -Using chess pipeline we show how to add map and filter Python transforms. + Example: https://dlthub.com/docs/customizations/customizing-pipelines/renaming_columns - dlt init chess duckdb - python chess.py @@ -20,57 +20,44 @@ Example: https://dlthub.com/docs/customizations/customizing-pipelines/renaming_c - run python and streamlit app """ -class SetupPipelineStep(Step): +class SetUpChessPipelineStep(Step): hide: bool = True - name: str = "Setup dlt Pipeline" + name: str = "Setup Chess.com API dlt Pipeline" api_description: str # e.g. "I want to load data from the weatherapi.com API" async def describe(self, models: Models): return dedent(f"""\ - This step will create a new dlt pipeline that loads data from an API, as per your request: - {self.api_description} + This step will create a new dlt pipeline that loads data from the chess.com API. """) async def run(self, sdk: ContinueSDK): - source_name = (await sdk.models.gpt35()).complete( - f"Write a snake_case name for the data source described by {self.api_description}: ").strip() - filename = f'{source_name}.py' + + filename = 'chess.py' # running commands to get started when creating a new dlt pipeline await sdk.run([ 'python3 -m venv env', 'source env/bin/activate', 'pip install dlt', - f'dlt init {source_name} duckdb', + 'dlt init chess duckdb', 'Y', 'pip install -r requirements.txt' ]) - # editing the resource function to call the requested API - await sdk.edit_file( - filename=filename, - prompt=f'Edit the resource function to call the API described by this: {self.api_description}' - ) - # wait for user to put API key in secrets.toml - await sdk.ide.setFileOpen(await sdk.ide.getWorkspaceDirectory() + "/.dlt/secrets.toml") - await sdk.wait_for_user_confirmation("If this service requires an API key, please add it to the `secrets.toml` file and then press `Continue`") - return DictObservation(values={"source_name": source_name}) - - -class ValidatePipelineStep(Step): +class AddTransformStep(Step): hide: bool = True async def run(self, sdk: ContinueSDK): - source_name = sdk.history.last_observation().values["source_name"] + source_name = 'chess' filename = f'{source_name}.py' await sdk.run_step(MessageStep(message=dedent("""\ - This step will validate that your dlt pipeline is working as expected: - - Test that the API call works + This step will customize your resource function with a transform of your choice: + - Add a filter or map transformation depending on your request - Load the data into a local DuckDB instance - - Write a query to view the data + - Open up a Streamlit app for you to view the data """))) # test that the API call works @@ -86,19 +73,150 @@ class ValidatePipelineStep(Step): await sdk.run(f'python3 {filename}') table_name = f"{source_name}.{source_name}_resource" - tables_query_code = dedent(f'''\ - import duckdb - - # connect to DuckDB instance - conn = duckdb.connect(database="{source_name}.duckdb") - - # get table names - rows = conn.execute("SELECT * FROM {table_name};").fetchall() - - # print table names - for row in rows: - print(row) - ''') + examples = dedent(f"""\ + + Task: Use either the `add_map` or `add_filter` function to transform the data. + + Below you will find some docs page that will help you understand this task. + + # Customize resources + ## Filter, transform and pivot data + + You can attach any number of transformations that are evaluated on item per item basis to your resource. The available transformation types: + + - map - transform the data item (resource.add_map) + - filter - filter the data item (resource.add_filter) + - yield map - a map that returns iterator (so single row may generate many rows - resource.add_yield_map) + + Example: We have a resource that loads a list of users from an api endpoint. We want to customize it so: + + we remove users with user_id == "me" + we anonymize user data + Here's our resource: + ```python + import dlt + + @dlt.resource(write_disposition="replace") + def users(): + ... + users = requests.get(...) + ... + yield users + ``` + + Here's our script that defines transformations and loads the data. + ```python + from pipedrive import users + + def anonymize_user(user_data): + user_data["user_id"] = hash_str(user_data["user_id"]) + user_data["user_email"] = hash_str(user_data["user_email"]) + return user_data + + # add the filter and anonymize function to users resource and enumerate + for user in users().add_filter(lambda user: user["user_id"] != "me").add_map(anonymize_user): + print(user) + ``` + + Here is a more complex example of a filter transformation: + + # Renaming columns + ## Renaming columns by replacing the special characters + + In the example below, we create a dummy source with special characters in the name. We then write a function that we intend to apply to the resource to modify its output (i.e. replacing the German umlaut): replace_umlauts_in_dict_keys. + ```python + import dlt + + # create a dummy source with umlauts (special characters) in key names (um) + @dlt.source + def dummy_source(prefix: str = None): + @dlt.resource + def dummy_data(): + for _ in range(100): + yield {f'Objekt_{_}':{'Größe':_, 'Äquivalenzprüfung':True}} + return dummy_data(), + + def replace_umlauts_in_dict_keys(d): + # Replaces umlauts in dictionary keys with standard characters. + umlaut_map = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', 'Ä': 'Ae', 'Ö': 'Oe', 'Ü': 'Ue'} + result = {} + for k, v in d.items(): + new_key = ''.join(umlaut_map.get(c, c) for c in k) + if isinstance(v, dict): + result[new_key] = replace_umlauts_in_dict_keys(v) + else: + result[new_key] = v + return result + + # We can add the map function to the resource + + # 1. Create an instance of the source so you can edit it. + data_source = dummy_source() + + # 2. Modify this source instance's resource + data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys) + + # 3. Inspect your result + for row in data_source: + print(row) + + # {'Objekt_0': {'Groesse': 0, 'Aequivalenzpruefung': True}} + # ... + ``` + + Here is a more complex example of a map transformation: + + # Pseudonymizing columns + ## Pseudonymizing (or anonymizing) columns by replacing the special characters + Pseudonymization is a deterministic way to hide personally identifiable info (PII), enabling us to consistently achieve the same mapping. If instead you wish to anonymize, you can delete the data, or replace it with a constant. In the example below, we create a dummy source with a PII column called "name", which we replace with deterministic hashes (i.e. replacing the German umlaut). + + ```python + import dlt + import hashlib + + @dlt.source + def dummy_source(prefix: str = None): + @dlt.resource + def dummy_data(): + for _ in range(3): + yield {'id':_, 'name': f'Jane Washington {_}'} + return dummy_data(), + + def pseudonymize_name(doc): + Pseudonmyisation is a deterministic type of PII-obscuring + Its role is to allow identifying users by their hash, without revealing the underlying info. + + # add a constant salt to generate + salt = 'WI@N57%zZrmk#88c' + salted_string = doc['name'] + salt + sh = hashlib.sha256() + sh.update(salted_string.encode()) + hashed_string = sh.digest().hex() + doc['name'] = hashed_string + return doc + + # run it as is + for row in dummy_source().dummy_data().add_map(pseudonymize_name): + print(row) + + #{'id': 0, 'name': '96259edb2b28b48bebce8278c550e99fbdc4a3fac8189e6b90f183ecff01c442'} + #{'id': 1, 'name': '92d3972b625cbd21f28782fb5c89552ce1aa09281892a2ab32aee8feeb3544a1'} + #{'id': 2, 'name': '443679926a7cff506a3b5d5d094dc7734861352b9e0791af5d39db5a7356d11a'} + + # Or create an instance of the data source, modify the resource and run the source. + + # 1. Create an instance of the source so you can edit it. + data_source = dummy_source() + # 2. Modify this source instance's resource + data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys) + # 3. Inspect your result + for row in data_source: + print(row) + + pipeline = dlt.pipeline(pipeline_name='example', destination='bigquery', dataset_name='normalized_data') + load_info = pipeline.run(data_source) + ``` + """) query_filename = (await sdk.ide.getWorkspaceDirectory()) + "/query.py" await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code)) -- cgit v1.2.3-70-g09d2 From 0be241fe44edcc35c79835ac59971d60869d1c34 Mon Sep 17 00:00:00 2001 From: Ty Dunn Date: Tue, 6 Jun 2023 17:34:49 +0200 Subject: getting to first version --- .../recipes/AddTransformRecipe/README.md | 7 ++- .../continuedev/recipes/AddTransformRecipe/main.py | 12 ++-- .../recipes/AddTransformRecipe/steps.py | 70 ++++++++-------------- 3 files changed, 38 insertions(+), 51 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md index 9ad49a5f..d735e0cd 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md @@ -1,3 +1,8 @@ # AddTransformRecipe -Uses the Chess.com API example to show how to add map and filter Python transforms to a dlt pipeline. \ No newline at end of file +Uses the Chess.com API example to show how to add map and filter Python transforms to a dlt pipeline. + +Background +- https://dlthub.com/docs/general-usage/resource#filter-transform-and-pivot-data +- https://dlthub.com/docs/customizations/customizing-pipelines/renaming_columns +- https://dlthub.com/docs/customizations/customizing-pipelines/pseudonymizing_columns \ No newline at end of file diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py index 0fd96930..2a0736dd 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py @@ -14,11 +14,11 @@ class AddTransformRecipe(Step): await sdk.run_step( MessageStep(message=dedent("""\ This recipe will walk you through the process of adding a transform to a dlt pipeline that uses the chess.com API source. With the help of Continue, you will: - - X - - Y - - Z""")) >> - + - Set up a dlt pipeline for the chess.com API + - Add a filter or map transform to the pipeline + - Run the pipeline and view the transformed data in a Streamlit app + - """)) >> SetUpChessPipelineStep() >> - WaitForUserInputStep(prompt="How do you want to transform the chess.com API data before loading it? For example, you could use the `python-chess` library to decode the moves or filter out certain games") >> - AddTransformStep() + WaitForUserInputStep(prompt="How do you want to transform the Chess.com API data before loading it? For example, you could use the `python-chess` library to decode the moves or filter out certain games") >> + AddTransformStep(transform_description="Use the `python-chess` library to decode the moves in the game data") # Ask Nate how to not hardcode this here ) diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py index 8ab3eda1..46ddbed5 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py @@ -8,24 +8,10 @@ from ...core.main import Step from ...core.sdk import ContinueSDK -""" -https://dlthub.com/docs/general-usage/resource#filter-transform-and-pivot-data - - -Example: https://dlthub.com/docs/customizations/customizing-pipelines/renaming_columns -- dlt init chess duckdb -- python chess.py -- write a transform function: ideas for transform functions: using chess Python library decode the moves OR filter out certain games -- use add_map or add_filter -- run python and streamlit app -""" - class SetUpChessPipelineStep(Step): hide: bool = True name: str = "Setup Chess.com API dlt Pipeline" - api_description: str # e.g. "I want to load data from the weatherapi.com API" - async def describe(self, models: Models): return dedent(f"""\ This step will create a new dlt pipeline that loads data from the chess.com API. @@ -49,6 +35,8 @@ class SetUpChessPipelineStep(Step): class AddTransformStep(Step): hide: bool = True + transform_description: str # e.g. "Use the `python-chess` library to decode the moves in the game data" + async def run(self, sdk: ContinueSDK): source_name = 'chess' filename = f'{source_name}.py' @@ -60,43 +48,29 @@ class AddTransformStep(Step): - Open up a Streamlit app for you to view the data """))) - # test that the API call works - await sdk.run(f'python3 {filename}') - - # remove exit() from the main main function - await sdk.edit_file( - filename=filename, - prompt='Remove exit() from the main function' - ) - - # load the data into the DuckDB instance - await sdk.run(f'python3 {filename}') + prompt = dedent(f"""\ + Task: Write a transform function using the description below and then use `add_map` or `add_filter` from the `dlt` library to attach it a resource. - table_name = f"{source_name}.{source_name}_resource" - examples = dedent(f"""\ - - Task: Use either the `add_map` or `add_filter` function to transform the data. + Description: {self.transform_description} - Below you will find some docs page that will help you understand this task. + Here are some docs pages that will help you better understand how to use `dlt`. # Customize resources ## Filter, transform and pivot data You can attach any number of transformations that are evaluated on item per item basis to your resource. The available transformation types: - - map - transform the data item (resource.add_map) - filter - filter the data item (resource.add_filter) - yield map - a map that returns iterator (so single row may generate many rows - resource.add_yield_map) - Example: We have a resource that loads a list of users from an api endpoint. We want to customize it so: - - we remove users with user_id == "me" - we anonymize user data + Example: We have a resource that loads a list of users from an api endpoint. We want to customize it so: + - we remove users with user_id == 'me' + - we anonymize user data Here's our resource: ```python import dlt - @dlt.resource(write_disposition="replace") + @dlt.resource(write_disposition='replace') def users(): ... users = requests.get(...) @@ -109,12 +83,12 @@ class AddTransformStep(Step): from pipedrive import users def anonymize_user(user_data): - user_data["user_id"] = hash_str(user_data["user_id"]) - user_data["user_email"] = hash_str(user_data["user_email"]) + user_data['user_id'] = hash_str(user_data['user_id']) + user_data['user_email'] = hash_str(user_data['user_email']) return user_data # add the filter and anonymize function to users resource and enumerate - for user in users().add_filter(lambda user: user["user_id"] != "me").add_map(anonymize_user): + for user in users().add_filter(lambda user: user['user_id'] != 'me').add_map(anonymize_user): print(user) ``` @@ -168,7 +142,7 @@ class AddTransformStep(Step): # Pseudonymizing columns ## Pseudonymizing (or anonymizing) columns by replacing the special characters - Pseudonymization is a deterministic way to hide personally identifiable info (PII), enabling us to consistently achieve the same mapping. If instead you wish to anonymize, you can delete the data, or replace it with a constant. In the example below, we create a dummy source with a PII column called "name", which we replace with deterministic hashes (i.e. replacing the German umlaut). + Pseudonymization is a deterministic way to hide personally identifiable info (PII), enabling us to consistently achieve the same mapping. If instead you wish to anonymize, you can delete the data, or replace it with a constant. In the example below, we create a dummy source with a PII column called 'name', which we replace with deterministic hashes (i.e. replacing the German umlaut). ```python import dlt @@ -216,8 +190,16 @@ class AddTransformStep(Step): pipeline = dlt.pipeline(pipeline_name='example', destination='bigquery', dataset_name='normalized_data') load_info = pipeline.run(data_source) ``` - """) + """) + + # edit the pipeline to add a tranform function and attach it to a resource + await sdk.edit_file( + filename=filename, + prompt=prompt + ) + + # run the pipeline and load the data + await sdk.run(f'python3 {filename}') - query_filename = (await sdk.ide.getWorkspaceDirectory()) + "/query.py" - await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code)) - await sdk.run('env/bin/python3 query.py') + # run a streamlit app to show the data + await sdk.run(f'dlt pipeline {source_name} show') \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 86369432eb6d35727f87fffa4a79646a85bb5498 Mon Sep 17 00:00:00 2001 From: Ty Dunn Date: Wed, 7 Jun 2023 13:29:53 +0200 Subject: initial structure --- .../recipes/DeployPipelineAirflowRecipe/README.md | 0 .../recipes/DeployPipelineAirflowRecipe/main.py | 37 +++++ .../recipes/DeployPipelineAirflowRecipe/steps.py | 152 +++++++++++++++++++++ 3 files changed, 189 insertions(+) create mode 100644 continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/README.md create mode 100644 continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py create mode 100644 continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/README.md b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/README.md new file mode 100644 index 00000000..e69de29b diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py new file mode 100644 index 00000000..d7cd03db --- /dev/null +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py @@ -0,0 +1,37 @@ +from textwrap import dedent + +from ...core.main import Step +from ...core.sdk import ContinueSDK +from ...steps.core.core import WaitForUserInputStep +from ...steps.main import MessageStep +from .steps import SetupPipelineStep, ValidatePipelineStep + + +# https://github.com/dlt-hub/dlt-deploy-template/blob/master/airflow-composer/dag_template.py +# https://www.notion.so/dlthub/Deploy-a-pipeline-with-Airflow-245fd1058652479494307ead0b5565f3 +# 1. What verified pipeline do you want to deploy with Airflow? +# 2. Set up selected verified pipeline +# 3. Deploy selected verified pipeline with Airflow +# 4. Set up Airflow locally? + +class DeployPipelineAirflowRecipe(Step): + hide: bool = True + + async def run(self, sdk: ContinueSDK): + text_observation = await sdk.run_step( + MessageStep(name="Building your first dlt pipeline", message=dedent("""\ + This recipe will walk you through the process of creating a dlt pipeline for your chosen data source. With the help of Continue, you will: + - Create a Python virtual environment with dlt installed + - Run `dlt init` to generate a pipeline template + - Write the code to call the API + - Add any required API keys to the `secrets.toml` file + - Test that the API call works + - Load the data into a local DuckDB instance + - Write a query to view the data""")) >> + WaitForUserInputStep( + prompt="What API do you want to load data from? (e.g. weatherapi.com, chess.com)") + ) + await sdk.run_step( + SetupPipelineStep(api_description=text_observation.text) >> + ValidatePipelineStep() + ) diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py new file mode 100644 index 00000000..c32ae923 --- /dev/null +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py @@ -0,0 +1,152 @@ +import os +import subprocess +from textwrap import dedent +import time + +from ...models.main import Range +from ...models.filesystem import RangeInFile +from ...steps.main import MessageStep +from ...core.sdk import Models +from ...core.observation import DictObservation, InternalErrorObservation +from ...models.filesystem_edit import AddFile, FileEdit +from ...core.main import Step +from ...core.sdk import ContinueSDK + +AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)" + + +class SetupPipelineStep(Step): + hide: bool = True + name: str = "Setup dlt Pipeline" + + api_description: str # e.g. "I want to load data from the weatherapi.com API" + + async def describe(self, models: Models): + return dedent(f"""\ + This step will create a new dlt pipeline that loads data from an API, as per your request: + {self.api_description} + """) + + async def run(self, sdk: ContinueSDK): + sdk.context.set("api_description", self.api_description) + + source_name = (await sdk.models.gpt35()).complete( + f"Write a snake_case name for the data source described by {self.api_description}: ").strip() + filename = f'{source_name}.py' + + # running commands to get started when creating a new dlt pipeline + await sdk.run([ + 'python3 -m venv env', + 'source env/bin/activate', + 'pip install dlt', + f'dlt init {source_name} duckdb\n\rY', + 'pip install -r requirements.txt' + ], description=dedent(f"""\ + Running the following commands: + - `python3 -m venv env`: Create a Python virtual environment + - `source env/bin/activate`: Activate the virtual environment + - `pip install dlt`: Install dlt + - `dlt init {source_name} duckdb`: Create a new dlt pipeline called {source_name} that loads data into a local DuckDB instance + - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline"""), name="Setup Python environment") + + # editing the resource function to call the requested API + await sdk.ide.highlightCode(RangeInFile(filepath=os.path.join(await sdk.ide.getWorkspaceDirectory(), filename), range=Range.from_shorthand(15, 0, 29, 0)), "#00ff0022") + + # sdk.set_loading_message("Writing code to call the API...") + await sdk.edit_file( + filename=filename, + prompt=f'Edit the resource function to call the API described by this: {self.api_description}. Do not move or remove the exit() call in __main__.', + name=f"Edit the resource function to call the API {AI_ASSISTED_STRING}" + ) + + time.sleep(1) + + # wait for user to put API key in secrets.toml + await sdk.ide.setFileOpen(await sdk.ide.getWorkspaceDirectory() + "/.dlt/secrets.toml") + await sdk.wait_for_user_confirmation("If this service requires an API key, please add it to the `secrets.toml` file and then press `Continue`") + + sdk.context.set("source_name", source_name) + + +class ValidatePipelineStep(Step): + hide: bool = True + + async def run(self, sdk: ContinueSDK): + workspace_dir = await sdk.ide.getWorkspaceDirectory() + source_name = sdk.context.get("source_name") + filename = f'{source_name}.py' + + # await sdk.run_step(MessageStep(name="Validate the pipeline", message=dedent("""\ + # Next, we will validate that your dlt pipeline is working as expected: + # - Test that the API call works + # - Load the data into a local DuckDB instance + # - Write a query to view the data + # """))) + + # test that the API call works + output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API") + + # If it fails, return the error + if "Traceback" in output: + output = "Traceback" + output.split("Traceback")[-1] + file_content = await sdk.ide.readFile(os.path.join(workspace_dir, filename)) + suggestion = (await sdk.models.gpt35()).complete(dedent(f"""\ + ```python + {file_content} + ``` + This above code is a dlt pipeline that loads data from an API. The function with the @resource decorator is responsible for calling the API and returning the data. While attempting to run the pipeline, the following error occurred: + + ```ascii + {output} + ``` + + This is a brief summary of the error followed by a suggestion on how it can be fixed by editing the resource function:""")) + + api_documentation_url = (await sdk.models.gpt35()).complete(dedent(f"""\ + The API I am trying to call is the '{sdk.context.get('api_description')}'. I tried calling it in the @resource function like this: + ```python + {file_content} + ``` + What is the URL for the API documentation that will help me learn how to make this call? Please format in markdown so I can click the link.""")) + + sdk.raise_exception( + title=f"Error while running pipeline.\nFix the resource function in {filename} and rerun this step", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=dedent(f"""\ + {suggestion} + + {api_documentation_url} + + After you've fixed the code, click the retry button at the top of the Validate Pipeline step above."""))) + + # remove exit() from the main main function + await sdk.run_step(MessageStep(name="Remove early exit() from main function", message="Remove the early exit() from the main function now that we are done testing and want the pipeline to load the data into DuckDB.")) + + contents = await sdk.ide.readFile(os.path.join(workspace_dir, filename)) + replacement = "\n".join( + list(filter(lambda line: line.strip() != "exit()", contents.split("\n")))) + await sdk.ide.applyFileSystemEdit(FileEdit( + filepath=os.path.join(workspace_dir, filename), + replacement=replacement, + range=Range.from_entire_file(contents) + )) + + # load the data into the DuckDB instance + await sdk.run(f'python3 {filename}', name="Load data into DuckDB", description=f"Running python3 {filename} to load data into DuckDB") + + table_name = f"{source_name}.{source_name}_resource" + tables_query_code = dedent(f'''\ + import duckdb + + # connect to DuckDB instance + conn = duckdb.connect(database="{source_name}.duckdb") + + # get table names + rows = conn.execute("SELECT * FROM {table_name};").fetchall() + + # print table names + for row in rows: + print(row) + ''') + + query_filename = os.path.join(workspace_dir, "query.py") + await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code), name="Add query.py file", description="Adding a file called `query.py` to the workspace that will run a test query on the DuckDB instance") + await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected") -- cgit v1.2.3-70-g09d2 From c84eae1885489ec7b07e0bb0eea1bac36f40c181 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Wed, 7 Jun 2023 17:00:00 -0400 Subject: quick look over on transform recipe --- continuedev/src/continuedev/core/policy.py | 5 +- .../AddTransformRecipe/dlt_transform_docs.md | 135 ++++++++++++++++ .../continuedev/recipes/AddTransformRecipe/main.py | 13 +- .../recipes/AddTransformRecipe/steps.py | 173 +++------------------ .../src/continuedev/steps/steps_on_startup.py | 5 +- 5 files changed, 173 insertions(+), 158 deletions(-) create mode 100644 continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index 8aea8de7..c3f1d188 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -3,6 +3,7 @@ from typing import List, Tuple, Type from ..steps.chroma import AnswerQuestionChroma, EditFileChroma, CreateCodebaseIndexChroma from ..steps.steps_on_startup import StepsOnStartupStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe +from ..recipes.AddTransformRecipe.main import AddTransformRecipe from .main import Step, Validator, History, Policy from .observation import Observation, TracebackObservation, UserInputObservation from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeStep, FasterEditHighlightedCodeStep, StarCoderEditHighlightedCodeStep, MessageStep, EmptyStep, SetupContinueWorkspaceStep @@ -28,8 +29,10 @@ class DemoPolicy(Policy): # This could be defined with ObservationTypePolicy. Ergonomics not right though. if "/pytest" in observation.user_input.lower(): return WritePytestsRecipe(instructions=observation.user_input) - elif "/dlt" in observation.user_input.lower() or " dlt" in observation.user_input.lower(): + elif "/dlt" in observation.user_input.lower(): return CreatePipelineRecipe() + elif "/transform" in observation.user_input.lower(): + return AddTransformRecipe() elif "/comment" in observation.user_input.lower(): return CommentCodeStep() elif "/ask" in observation.user_input: diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md b/continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md new file mode 100644 index 00000000..658b285f --- /dev/null +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md @@ -0,0 +1,135 @@ +# Customize resources +## Filter, transform and pivot data + +You can attach any number of transformations that are evaluated on item per item basis to your resource. The available transformation types: +- map - transform the data item (resource.add_map) +- filter - filter the data item (resource.add_filter) +- yield map - a map that returns iterator (so single row may generate many rows - resource.add_yield_map) + +Example: We have a resource that loads a list of users from an api endpoint. We want to customize it so: +- we remove users with user_id == 'me' +- we anonymize user data +Here's our resource: +```python +import dlt + +@dlt.resource(write_disposition='replace') +def users(): + ... + users = requests.get(...) + ... + yield users +``` + +Here's our script that defines transformations and loads the data. +```python +from pipedrive import users + +def anonymize_user(user_data): + user_data['user_id'] = hash_str(user_data['user_id']) + user_data['user_email'] = hash_str(user_data['user_email']) + return user_data + +# add the filter and anonymize function to users resource and enumerate +for user in users().add_filter(lambda user: user['user_id'] != 'me').add_map(anonymize_user): +print(user) +``` + +Here is a more complex example of a filter transformation: + + # Renaming columns + ## Renaming columns by replacing the special characters + + In the example below, we create a dummy source with special characters in the name. We then write a function that we intend to apply to the resource to modify its output (i.e. replacing the German umlaut): replace_umlauts_in_dict_keys. + ```python + import dlt + + # create a dummy source with umlauts (special characters) in key names (um) + @dlt.source + def dummy_source(prefix: str = None): + @dlt.resource + def dummy_data(): + for _ in range(100): + yield {f'Objekt_{_}':{'Größe':_, 'Äquivalenzprüfung':True}} + return dummy_data(), + + def replace_umlauts_in_dict_keys(d): + # Replaces umlauts in dictionary keys with standard characters. + umlaut_map = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', 'Ä': 'Ae', 'Ö': 'Oe', 'Ü': 'Ue'} + result = {} + for k, v in d.items(): + new_key = ''.join(umlaut_map.get(c, c) for c in k) + if isinstance(v, dict): + result[new_key] = replace_umlauts_in_dict_keys(v) + else: + result[new_key] = v + return result + + # We can add the map function to the resource + + # 1. Create an instance of the source so you can edit it. + data_source = dummy_source() + + # 2. Modify this source instance's resource + data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys) + + # 3. Inspect your result + for row in data_source: + print(row) + + # {'Objekt_0': {'Groesse': 0, 'Aequivalenzpruefung': True}} + # ... + ``` + +Here is a more complex example of a map transformation: + +# Pseudonymizing columns +## Pseudonymizing (or anonymizing) columns by replacing the special characters +Pseudonymization is a deterministic way to hide personally identifiable info (PII), enabling us to consistently achieve the same mapping. If instead you wish to anonymize, you can delete the data, or replace it with a constant. In the example below, we create a dummy source with a PII column called 'name', which we replace with deterministic hashes (i.e. replacing the German umlaut). + +```python +import dlt +import hashlib + +@dlt.source +def dummy_source(prefix: str = None): + @dlt.resource + def dummy_data(): + for _ in range(3): + yield {'id':_, 'name': f'Jane Washington {_}'} + return dummy_data(), + +def pseudonymize_name(doc): + Pseudonmyisation is a deterministic type of PII-obscuring + Its role is to allow identifying users by their hash, without revealing the underlying info. + + # add a constant salt to generate + salt = 'WI@N57%zZrmk#88c' + salted_string = doc['name'] + salt + sh = hashlib.sha256() + sh.update(salted_string.encode()) + hashed_string = sh.digest().hex() + doc['name'] = hashed_string + return doc + + # run it as is + for row in dummy_source().dummy_data().add_map(pseudonymize_name): + print(row) + + #{'id': 0, 'name': '96259edb2b28b48bebce8278c550e99fbdc4a3fac8189e6b90f183ecff01c442'} + #{'id': 1, 'name': '92d3972b625cbd21f28782fb5c89552ce1aa09281892a2ab32aee8feeb3544a1'} + #{'id': 2, 'name': '443679926a7cff506a3b5d5d094dc7734861352b9e0791af5d39db5a7356d11a'} + + # Or create an instance of the data source, modify the resource and run the source. + + # 1. Create an instance of the source so you can edit it. + data_source = dummy_source() + # 2. Modify this source instance's resource + data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys) + # 3. Inspect your result + for row in data_source: + print(row) + + pipeline = dlt.pipeline(pipeline_name='example', destination='bigquery', dataset_name='normalized_data') + load_info = pipeline.run(data_source) +``` \ No newline at end of file diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py index 2a0736dd..5e05b587 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py @@ -11,14 +11,17 @@ class AddTransformRecipe(Step): hide: bool = True async def run(self, sdk: ContinueSDK): - await sdk.run_step( + text_observation = await sdk.run_step( MessageStep(message=dedent("""\ This recipe will walk you through the process of adding a transform to a dlt pipeline that uses the chess.com API source. With the help of Continue, you will: - Set up a dlt pipeline for the chess.com API - Add a filter or map transform to the pipeline - - Run the pipeline and view the transformed data in a Streamlit app - - """)) >> + - Run the pipeline and view the transformed data in a Streamlit app"""), name="Add transformation to a dlt pipeline") >> SetUpChessPipelineStep() >> - WaitForUserInputStep(prompt="How do you want to transform the Chess.com API data before loading it? For example, you could use the `python-chess` library to decode the moves or filter out certain games") >> - AddTransformStep(transform_description="Use the `python-chess` library to decode the moves in the game data") # Ask Nate how to not hardcode this here + WaitForUserInputStep( + prompt="How do you want to transform the Chess.com API data before loading it? For example, you could use the `python-chess` library to decode the moves or filter out certain games") + ) + await sdk.run_step( + AddTransformStep( + transform_description=text_observation.text) ) diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py index 46ddbed5..f7f5a43b 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py @@ -1,3 +1,4 @@ +import os from textwrap import dedent from ...steps.main import MessageStep @@ -7,35 +8,39 @@ from ...models.filesystem_edit import AddFile from ...core.main import Step from ...core.sdk import ContinueSDK +AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)" + class SetUpChessPipelineStep(Step): hide: bool = True name: str = "Setup Chess.com API dlt Pipeline" async def describe(self, models: Models): - return dedent(f"""\ - This step will create a new dlt pipeline that loads data from the chess.com API. - """) + return "This step will create a new dlt pipeline that loads data from the chess.com API." async def run(self, sdk: ContinueSDK): - filename = 'chess.py' - # running commands to get started when creating a new dlt pipeline await sdk.run([ 'python3 -m venv env', 'source env/bin/activate', 'pip install dlt', - 'dlt init chess duckdb', - 'Y', + 'dlt --non-interactive init chess duckdb', 'pip install -r requirements.txt' - ]) + ], name="Set up Python environment", description=dedent(f"""\ + Running the following commands: + - `python3 -m venv env`: Create a Python virtual environment + - `source env/bin/activate`: Activate the virtual environment + - `pip install dlt`: Install dlt + - `dlt init chess duckdb`: Create a new dlt pipeline called "chess" that loads data into a local DuckDB instance + - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline""")) class AddTransformStep(Step): hide: bool = True - transform_description: str # e.g. "Use the `python-chess` library to decode the moves in the game data" + # e.g. "Use the `python-chess` library to decode the moves in the game data" + transform_description: str async def run(self, sdk: ContinueSDK): source_name = 'chess' @@ -45,8 +50,10 @@ class AddTransformStep(Step): This step will customize your resource function with a transform of your choice: - Add a filter or map transformation depending on your request - Load the data into a local DuckDB instance - - Open up a Streamlit app for you to view the data - """))) + - Open up a Streamlit app for you to view the data"""), name="Write transformation function")) + + with open(os.path.join(os.path.dirname(__file__), 'dlt_transform_docs.md')) as f: + dlt_transform_docs = f.read() prompt = dedent(f"""\ Task: Write a transform function using the description below and then use `add_map` or `add_filter` from the `dlt` library to attach it a resource. @@ -55,151 +62,17 @@ class AddTransformStep(Step): Here are some docs pages that will help you better understand how to use `dlt`. - # Customize resources - ## Filter, transform and pivot data - - You can attach any number of transformations that are evaluated on item per item basis to your resource. The available transformation types: - - map - transform the data item (resource.add_map) - - filter - filter the data item (resource.add_filter) - - yield map - a map that returns iterator (so single row may generate many rows - resource.add_yield_map) - - Example: We have a resource that loads a list of users from an api endpoint. We want to customize it so: - - we remove users with user_id == 'me' - - we anonymize user data - Here's our resource: - ```python - import dlt - - @dlt.resource(write_disposition='replace') - def users(): - ... - users = requests.get(...) - ... - yield users - ``` - - Here's our script that defines transformations and loads the data. - ```python - from pipedrive import users - - def anonymize_user(user_data): - user_data['user_id'] = hash_str(user_data['user_id']) - user_data['user_email'] = hash_str(user_data['user_email']) - return user_data - - # add the filter and anonymize function to users resource and enumerate - for user in users().add_filter(lambda user: user['user_id'] != 'me').add_map(anonymize_user): - print(user) - ``` - - Here is a more complex example of a filter transformation: - - # Renaming columns - ## Renaming columns by replacing the special characters - - In the example below, we create a dummy source with special characters in the name. We then write a function that we intend to apply to the resource to modify its output (i.e. replacing the German umlaut): replace_umlauts_in_dict_keys. - ```python - import dlt - - # create a dummy source with umlauts (special characters) in key names (um) - @dlt.source - def dummy_source(prefix: str = None): - @dlt.resource - def dummy_data(): - for _ in range(100): - yield {f'Objekt_{_}':{'Größe':_, 'Äquivalenzprüfung':True}} - return dummy_data(), - - def replace_umlauts_in_dict_keys(d): - # Replaces umlauts in dictionary keys with standard characters. - umlaut_map = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', 'Ä': 'Ae', 'Ö': 'Oe', 'Ü': 'Ue'} - result = {} - for k, v in d.items(): - new_key = ''.join(umlaut_map.get(c, c) for c in k) - if isinstance(v, dict): - result[new_key] = replace_umlauts_in_dict_keys(v) - else: - result[new_key] = v - return result - - # We can add the map function to the resource - - # 1. Create an instance of the source so you can edit it. - data_source = dummy_source() - - # 2. Modify this source instance's resource - data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys) - - # 3. Inspect your result - for row in data_source: - print(row) - - # {'Objekt_0': {'Groesse': 0, 'Aequivalenzpruefung': True}} - # ... - ``` - - Here is a more complex example of a map transformation: - - # Pseudonymizing columns - ## Pseudonymizing (or anonymizing) columns by replacing the special characters - Pseudonymization is a deterministic way to hide personally identifiable info (PII), enabling us to consistently achieve the same mapping. If instead you wish to anonymize, you can delete the data, or replace it with a constant. In the example below, we create a dummy source with a PII column called 'name', which we replace with deterministic hashes (i.e. replacing the German umlaut). - - ```python - import dlt - import hashlib - - @dlt.source - def dummy_source(prefix: str = None): - @dlt.resource - def dummy_data(): - for _ in range(3): - yield {'id':_, 'name': f'Jane Washington {_}'} - return dummy_data(), - - def pseudonymize_name(doc): - Pseudonmyisation is a deterministic type of PII-obscuring - Its role is to allow identifying users by their hash, without revealing the underlying info. - - # add a constant salt to generate - salt = 'WI@N57%zZrmk#88c' - salted_string = doc['name'] + salt - sh = hashlib.sha256() - sh.update(salted_string.encode()) - hashed_string = sh.digest().hex() - doc['name'] = hashed_string - return doc - - # run it as is - for row in dummy_source().dummy_data().add_map(pseudonymize_name): - print(row) - - #{'id': 0, 'name': '96259edb2b28b48bebce8278c550e99fbdc4a3fac8189e6b90f183ecff01c442'} - #{'id': 1, 'name': '92d3972b625cbd21f28782fb5c89552ce1aa09281892a2ab32aee8feeb3544a1'} - #{'id': 2, 'name': '443679926a7cff506a3b5d5d094dc7734861352b9e0791af5d39db5a7356d11a'} - - # Or create an instance of the data source, modify the resource and run the source. - - # 1. Create an instance of the source so you can edit it. - data_source = dummy_source() - # 2. Modify this source instance's resource - data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys) - # 3. Inspect your result - for row in data_source: - print(row) - - pipeline = dlt.pipeline(pipeline_name='example', destination='bigquery', dataset_name='normalized_data') - load_info = pipeline.run(data_source) - ``` - """) + {dlt_transform_docs}""") # edit the pipeline to add a tranform function and attach it to a resource await sdk.edit_file( filename=filename, - prompt=prompt + prompt=prompt, + name=f"Writing transform function {AI_ASSISTED_STRING}" ) # run the pipeline and load the data - await sdk.run(f'python3 {filename}') + await sdk.run(f'python3 {filename}', name="Run the pipeline", description=f"Running `python3 {filename}` to load the data into a local DuckDB instance") # run a streamlit app to show the data - await sdk.run(f'dlt pipeline {source_name} show') \ No newline at end of file + await sdk.run(f'dlt pipeline {source_name} show', name="Show data in a Streamlit app", description=f"Running `dlt pipeline {source_name} show` to show the data in a Streamlit app, where you can view and play with the data.") diff --git a/continuedev/src/continuedev/steps/steps_on_startup.py b/continuedev/src/continuedev/steps/steps_on_startup.py index cd40ff56..63dedd82 100644 --- a/continuedev/src/continuedev/steps/steps_on_startup.py +++ b/continuedev/src/continuedev/steps/steps_on_startup.py @@ -1,11 +1,12 @@ from ..core.main import ContinueSDK, Models, Step from .main import UserInputStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe - +from ..recipes.AddTransformRecipe.main import AddTransformRecipe step_name_to_step_class = { "UserInputStep": UserInputStep, - "CreatePipelineRecipe": CreatePipelineRecipe + "CreatePipelineRecipe": CreatePipelineRecipe, + "AddTransformRecipe": AddTransformRecipe } -- cgit v1.2.3-70-g09d2 From c3b24a89105d22a5fa01400b7c9d5494a2d3ffc5 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Wed, 7 Jun 2023 17:03:57 -0400 Subject: added /airflow command and config string mapping --- continuedev/src/continuedev/core/policy.py | 5 ++++- continuedev/src/continuedev/steps/steps_on_startup.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index 8aea8de7..d87a3582 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -3,6 +3,7 @@ from typing import List, Tuple, Type from ..steps.chroma import AnswerQuestionChroma, EditFileChroma, CreateCodebaseIndexChroma from ..steps.steps_on_startup import StepsOnStartupStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe +from ..recipes.DeployPipelineAirflowRecipe.main import DeployPipelineAirflowRecipe from .main import Step, Validator, History, Policy from .observation import Observation, TracebackObservation, UserInputObservation from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeStep, FasterEditHighlightedCodeStep, StarCoderEditHighlightedCodeStep, MessageStep, EmptyStep, SetupContinueWorkspaceStep @@ -28,8 +29,10 @@ class DemoPolicy(Policy): # This could be defined with ObservationTypePolicy. Ergonomics not right though. if "/pytest" in observation.user_input.lower(): return WritePytestsRecipe(instructions=observation.user_input) - elif "/dlt" in observation.user_input.lower() or " dlt" in observation.user_input.lower(): + elif "/dlt" in observation.user_input.lower(): return CreatePipelineRecipe() + elif "/airflow" in observation.user_input.lower(): + return DeployPipelineAirflowRecipe() elif "/comment" in observation.user_input.lower(): return CommentCodeStep() elif "/ask" in observation.user_input: diff --git a/continuedev/src/continuedev/steps/steps_on_startup.py b/continuedev/src/continuedev/steps/steps_on_startup.py index cd40ff56..b1376e8a 100644 --- a/continuedev/src/continuedev/steps/steps_on_startup.py +++ b/continuedev/src/continuedev/steps/steps_on_startup.py @@ -1,11 +1,13 @@ from ..core.main import ContinueSDK, Models, Step from .main import UserInputStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe +from ..recipes.DeployPipelineAirflowRecipe.main import DeployPipelineAirflowRecipe step_name_to_step_class = { "UserInputStep": UserInputStep, - "CreatePipelineRecipe": CreatePipelineRecipe + "CreatePipelineRecipe": CreatePipelineRecipe, + "DeployPipelineAirflowRecipe": DeployPipelineAirflowRecipe } -- cgit v1.2.3-70-g09d2 From 7db0178fb8de64731de403e3916e6fb303ee64d3 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Fri, 9 Jun 2023 14:35:21 -0400 Subject: touching up transform recipe, chat context --- continuedev/src/continuedev/core/abstract_sdk.py | 12 ++++++-- continuedev/src/continuedev/core/autopilot.py | 1 + continuedev/src/continuedev/core/main.py | 24 ++++++++++++++- continuedev/src/continuedev/core/policy.py | 3 +- continuedev/src/continuedev/core/sdk.py | 10 +++++- continuedev/src/continuedev/libs/llm/__init__.py | 6 ++-- .../src/continuedev/libs/llm/hf_inference_api.py | 4 ++- continuedev/src/continuedev/libs/llm/openai.py | 15 +++++---- .../continuedev/recipes/AddTransformRecipe/main.py | 2 +- .../recipes/AddTransformRecipe/steps.py | 19 +++++++++--- .../recipes/CreatePipelineRecipe/main.py | 2 +- .../recipes/CreatePipelineRecipe/steps.py | 4 +-- continuedev/src/continuedev/server/ide.py | 2 +- continuedev/src/continuedev/steps/core/core.py | 36 ++++++++++++++++++++-- continuedev/src/continuedev/steps/main.py | 13 +------- docs/docs/walkthroughs/create-a-recipe.md | 4 +-- 16 files changed, 117 insertions(+), 40 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/core/abstract_sdk.py b/continuedev/src/continuedev/core/abstract_sdk.py index 1c800875..417971cd 100644 --- a/continuedev/src/continuedev/core/abstract_sdk.py +++ b/continuedev/src/continuedev/core/abstract_sdk.py @@ -1,10 +1,10 @@ -from abc import ABC, abstractmethod +from abc import ABC, abstractmethod, abstractproperty from typing import Coroutine, List, Union from .config import ContinueConfig from ..models.filesystem_edit import FileSystemEdit from .observation import Observation -from .main import History, Step +from .main import ChatMessage, History, Step, ChatMessageRole """ @@ -83,3 +83,11 @@ class AbstractContinueSDK(ABC): @abstractmethod def set_loading_message(self, message: str): pass + + @abstractmethod + def add_chat_context(self, content: str, role: ChatMessageRole = "assistent"): + pass + + @abstractproperty + def chat_context(self) -> List[ChatMessage]: + pass diff --git a/continuedev/src/continuedev/core/autopilot.py b/continuedev/src/continuedev/core/autopilot.py index b82e1fef..c979d53a 100644 --- a/continuedev/src/continuedev/core/autopilot.py +++ b/continuedev/src/continuedev/core/autopilot.py @@ -35,6 +35,7 @@ class Autopilot(ContinueBaseModel): class Config: arbitrary_types_allowed = True + keep_untouched = (cached_property,) def get_full_state(self) -> FullState: return FullState(history=self.history, active=self._active, user_input_queue=self._main_user_input_queue) diff --git a/continuedev/src/continuedev/core/main.py b/continuedev/src/continuedev/core/main.py index 37d80de3..19b36a6a 100644 --- a/continuedev/src/continuedev/core/main.py +++ b/continuedev/src/continuedev/core/main.py @@ -1,10 +1,18 @@ -from typing import Callable, Coroutine, Dict, Generator, List, Tuple, Union +from textwrap import dedent +from typing import Callable, Coroutine, Dict, Generator, List, Literal, Tuple, Union from ..models.main import ContinueBaseModel from pydantic import validator from ..libs.llm import LLM from .observation import Observation +ChatMessageRole = Literal["assistant", "user", "system"] + + +class ChatMessage(ContinueBaseModel): + role: ChatMessageRole + content: str + class HistoryNode(ContinueBaseModel): """A point in history, a list of which make up History""" @@ -12,12 +20,25 @@ class HistoryNode(ContinueBaseModel): observation: Union[Observation, None] depth: int + def to_chat_messages(self) -> List[ChatMessage]: + return self.step.chat_context + [ChatMessage(role="assistant", content=self.step.description)] + class History(ContinueBaseModel): """A history of steps taken and their results""" timeline: List[HistoryNode] current_index: int + def to_chat_history(self) -> List[ChatMessage]: + msgs = [] + for node in self.timeline: + if not node.step.hide: + msgs += [ + ChatMessage(role="assistant", content=msg) + for msg in node.to_chat_messages() + ] + return msgs + def add_node(self, node: HistoryNode): self.timeline.insert(self.current_index + 1, node) self.current_index += 1 @@ -113,6 +134,7 @@ class Step(ContinueBaseModel): description: Union[str, None] = None system_message: Union[str, None] = None + chat_context: List[ChatMessage] = [] class Config: copy_on_model_validation = False diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index c3f1d188..7661f0c4 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -6,10 +6,11 @@ from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe from ..recipes.AddTransformRecipe.main import AddTransformRecipe from .main import Step, Validator, History, Policy from .observation import Observation, TracebackObservation, UserInputObservation -from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeStep, FasterEditHighlightedCodeStep, StarCoderEditHighlightedCodeStep, MessageStep, EmptyStep, SetupContinueWorkspaceStep +from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeStep, FasterEditHighlightedCodeStep, StarCoderEditHighlightedCodeStep, EmptyStep, SetupContinueWorkspaceStep from ..recipes.WritePytestsRecipe.main import WritePytestsRecipe from ..recipes.ContinueRecipeRecipe.main import ContinueStepStep from ..steps.comment_code import CommentCodeStep +from ..steps.core.core import MessageStep class DemoPolicy(Policy): diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py index ea90a13a..11127361 100644 --- a/continuedev/src/continuedev/core/sdk.py +++ b/continuedev/src/continuedev/core/sdk.py @@ -14,7 +14,7 @@ from ..libs.llm.hf_inference_api import HuggingFaceInferenceAPI from ..libs.llm.openai import OpenAI from .observation import Observation from ..server.ide_protocol import AbstractIdeProtocolServer -from .main import Context, ContinueCustomException, History, Step +from .main import Context, ContinueCustomException, History, Step, ChatMessage, ChatMessageRole from ..steps.core.core import * @@ -136,3 +136,11 @@ class ContinueSDK(AbstractContinueSDK): def raise_exception(self, message: str, title: str, with_step: Union[Step, None] = None): raise ContinueCustomException(message, title, with_step) + + def add_chat_context(self, content: str, role: ChatMessageRole = "assistent"): + self.history.timeline[self.history.current_index].step.chat_context.append( + ChatMessage(content=content, role=role)) + + @property + def chat_context(self) -> List[ChatMessage]: + return self.history.to_chat_history() diff --git a/continuedev/src/continuedev/libs/llm/__init__.py b/continuedev/src/continuedev/libs/llm/__init__.py index 6bae2222..24fd34be 100644 --- a/continuedev/src/continuedev/libs/llm/__init__.py +++ b/continuedev/src/continuedev/libs/llm/__init__.py @@ -1,4 +1,6 @@ -from typing import Union +from typing import List, Union + +from ...core.main import ChatMessage from ...models.main import AbstractModel from pydantic import BaseModel @@ -6,7 +8,7 @@ from pydantic import BaseModel class LLM(BaseModel): system_message: Union[str, None] = None - def complete(self, prompt: str, **kwargs): + def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs): """Return the completion of the text with the given temperature.""" raise diff --git a/continuedev/src/continuedev/libs/llm/hf_inference_api.py b/continuedev/src/continuedev/libs/llm/hf_inference_api.py index 734da160..1586c620 100644 --- a/continuedev/src/continuedev/libs/llm/hf_inference_api.py +++ b/continuedev/src/continuedev/libs/llm/hf_inference_api.py @@ -1,3 +1,5 @@ +from typing import List +from ...core.main import ChatMessage from ..llm import LLM import requests @@ -9,7 +11,7 @@ class HuggingFaceInferenceAPI(LLM): api_key: str model: str = "bigcode/starcoder" - def complete(self, prompt: str, **kwargs): + def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs): """Return the completion of the text with the given temperature.""" API_URL = f"https://api-inference.huggingface.co/models/{self.model}" headers = { diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py index 10801465..da8c5caf 100644 --- a/continuedev/src/continuedev/libs/llm/openai.py +++ b/continuedev/src/continuedev/libs/llm/openai.py @@ -1,6 +1,7 @@ import asyncio import time from typing import Any, Dict, Generator, List, Union +from ...core.main import ChatMessage import openai import aiohttp from ..llm import LLM @@ -62,7 +63,7 @@ class OpenAI(LLM): for chunk in generator: yield chunk.choices[0].text - def complete(self, prompt: str, **kwargs) -> str: + def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> str: t1 = time.time() self.completion_count += 1 @@ -70,15 +71,17 @@ class OpenAI(LLM): "frequency_penalty": 0, "presence_penalty": 0, "stream": False} | kwargs if args["model"] == "gpt-3.5-turbo": - messages = [{ - "role": "user", - "content": prompt - }] + messages = [] if self.system_message: - messages.insert(0, { + messages.append({ "role": "system", "content": self.system_message }) + message += [msg.dict() for msg in with_history] + messages.append({ + "role": "user", + "content": prompt + }) resp = openai.ChatCompletion.create( messages=messages, **args, diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py index 5e05b587..e9a998e3 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py @@ -3,7 +3,7 @@ from textwrap import dedent from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from .steps import SetUpChessPipelineStep, AddTransformStep diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py index f7f5a43b..7bb0fc23 100644 --- a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py @@ -1,7 +1,9 @@ import os from textwrap import dedent -from ...steps.main import MessageStep +from ...models.main import Range +from ...models.filesystem import RangeInFile +from ...steps.core.core import MessageStep from ...core.sdk import Models from ...core.observation import DictObservation from ...models.filesystem_edit import AddFile @@ -26,7 +28,8 @@ class SetUpChessPipelineStep(Step): 'source env/bin/activate', 'pip install dlt', 'dlt --non-interactive init chess duckdb', - 'pip install -r requirements.txt' + 'pip install -r requirements.txt', + 'pip install pandas streamlit' # Needed for the pipeline show step later ], name="Set up Python environment", description=dedent(f"""\ Running the following commands: - `python3 -m venv env`: Create a Python virtual environment @@ -44,7 +47,8 @@ class AddTransformStep(Step): async def run(self, sdk: ContinueSDK): source_name = 'chess' - filename = f'{source_name}.py' + filename = f'{source_name}_pipeline.py' + abs_filepath = os.path.join(sdk.ide.workspace_directory, filename) await sdk.run_step(MessageStep(message=dedent("""\ This step will customize your resource function with a transform of your choice: @@ -52,6 +56,13 @@ class AddTransformStep(Step): - Load the data into a local DuckDB instance - Open up a Streamlit app for you to view the data"""), name="Write transformation function")) + # Open the file and highlight the function to be edited + await sdk.ide.setFileOpen(abs_filepath) + await sdk.ide.highlightCode(range_in_file=RangeInFile( + filepath=abs_filepath, + range=Range.from_shorthand(47, 0, 51, 0) + )) + with open(os.path.join(os.path.dirname(__file__), 'dlt_transform_docs.md')) as f: dlt_transform_docs = f.read() @@ -75,4 +86,4 @@ class AddTransformStep(Step): await sdk.run(f'python3 {filename}', name="Run the pipeline", description=f"Running `python3 {filename}` to load the data into a local DuckDB instance") # run a streamlit app to show the data - await sdk.run(f'dlt pipeline {source_name} show', name="Show data in a Streamlit app", description=f"Running `dlt pipeline {source_name} show` to show the data in a Streamlit app, where you can view and play with the data.") + await sdk.run(f'dlt pipeline {source_name}_pipeline show', name="Show data in a Streamlit app", description=f"Running `dlt pipeline {source_name} show` to show the data in a Streamlit app, where you can view and play with the data.") diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py index 39e1ba42..818168ba 100644 --- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py +++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py @@ -3,7 +3,7 @@ from textwrap import dedent from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from .steps import SetupPipelineStep, ValidatePipelineStep, RunQueryStep diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py index 3b9a8c85..ea40a058 100644 --- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py @@ -5,7 +5,7 @@ import time from ...models.main import Range from ...models.filesystem import RangeInFile -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from ...core.sdk import Models from ...core.observation import DictObservation, InternalErrorObservation from ...models.filesystem_edit import AddFile, FileEdit @@ -51,7 +51,7 @@ class SetupPipelineStep(Step): # editing the resource function to call the requested API resource_function_range = Range.from_shorthand(15, 0, 29, 0) - await sdk.ide.highlightCode(RangeInFile(filepath=os.path.join(await sdk.ide.getWorkspaceDirectory(), filename), range=resource_function_range), "#00ff0022") + await sdk.ide.highlightCode(RangeInFile(filepath=os.path.join(await sdk.ide.getWorkspaceDirectory(), filename), range=resource_function_range)) # sdk.set_loading_message("Writing code to call the API...") await sdk.edit_file( diff --git a/continuedev/src/continuedev/server/ide.py b/continuedev/src/continuedev/server/ide.py index 5826f15f..f4ea1071 100644 --- a/continuedev/src/continuedev/server/ide.py +++ b/continuedev/src/continuedev/server/ide.py @@ -138,7 +138,7 @@ class IdeProtocolServer(AbstractIdeProtocolServer): "sessionId": session_id }) - async def highlightCode(self, range_in_file: RangeInFile, color: str): + async def highlightCode(self, range_in_file: RangeInFile, color: str = "#00ff0022"): await self._send_json("highlightCode", { "rangeInFile": range_in_file.dict(), "color": color diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index dfd765eb..5117d479 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -1,4 +1,5 @@ # These steps are depended upon by ContinueSDK +import os import subprocess from textwrap import dedent from typing import Coroutine, List, Union @@ -23,6 +24,17 @@ class ReversibleStep(Step): raise NotImplementedError +class MessageStep(Step): + name: str = "Message" + message: str + + async def describe(self, models: Models) -> Coroutine[str, None, None]: + return self.message + + async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: + return TextObservation(text=self.message) + + class FileSystemEditStep(ReversibleStep): edit: FileSystemEdit _diff: Union[EditDiff, None] = None @@ -38,6 +50,13 @@ class FileSystemEditStep(ReversibleStep): # Where and when should file saves happen? +def output_contains_error(output: str) -> bool: + return "Traceback" in output or "SyntaxError" in output + + +AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)" + + class ShellCommandsStep(Step): cmds: List[str] cwd: Union[str, None] = None @@ -50,13 +69,26 @@ class ShellCommandsStep(Step): return f"Error when running shell commands:\n```\n{self._err_text}\n```" cmds_str = "\n".join(self.cmds) - return (await models.gpt35()).complete(f"{cmds_str}\n\nSummarize what was done in these shell commands, using markdown bullet points:") + return models.gpt35.complete(f"{cmds_str}\n\nSummarize what was done in these shell commands, using markdown bullet points:") async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: cwd = await sdk.ide.getWorkspaceDirectory() if self.cwd is None else self.cwd for cmd in self.cmds: output = await sdk.ide.runCommand(cmd) + if output is not None and output_contains_error(output): + suggestion = sdk.models.gpt35.complete(dedent(f"""\ + While running the command `{cmd}`, the following error occurred: + + ```ascii + {output} + ``` + + This is a brief summary of the error followed by a suggestion on how it can be fixed:"""), with_context=sdk.chat_context) + + sdk.raise_exception( + title="Error while running query", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=suggestion) + ) return TextObservation(text=output) @@ -116,7 +148,7 @@ class Gpt35EditCodeStep(Step): _prompt_and_completion: str = "" async def describe(self, models: Models) -> Coroutine[str, None, None]: - return (await models.gpt35()).complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:") + return models.gpt35.complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:") async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: rif_with_contents = [] diff --git a/continuedev/src/continuedev/steps/main.py b/continuedev/src/continuedev/steps/main.py index 81a1e3a9..24335b4f 100644 --- a/continuedev/src/continuedev/steps/main.py +++ b/continuedev/src/continuedev/steps/main.py @@ -212,7 +212,7 @@ class StarCoderEditHighlightedCodeStep(Step): _prompt_and_completion: str = "" async def describe(self, models: Models) -> Coroutine[str, None, None]: - return (await models.gpt35()).complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:") + return models.gpt35.complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:") async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: range_in_files = await sdk.ide.getHighlightedCode() @@ -317,17 +317,6 @@ class SolveTracebackStep(Step): return None -class MessageStep(Step): - name: str = "Message" - message: str - - async def describe(self, models: Models) -> Coroutine[str, None, None]: - return self.message - - async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: - return TextObservation(text=self.message) - - class EmptyStep(Step): hide: bool = True diff --git a/docs/docs/walkthroughs/create-a-recipe.md b/docs/docs/walkthroughs/create-a-recipe.md index 60bfe9a8..3b80df8a 100644 --- a/docs/docs/walkthroughs/create-a-recipe.md +++ b/docs/docs/walkthroughs/create-a-recipe.md @@ -17,8 +17,6 @@ continue/continuedev/src/continuedev/recipes ## 1. Create a step - - ### a. Start by creating a subclass of Step You should first consider what will be the parameters of your recipe. These are defined as attributes in the step, as with `input_file_path: str` below @@ -33,7 +31,7 @@ If you'd like to override the default description of your steps, which is just t - Return a static string - Store state in a class attribute (prepend with a double underscore, which signifies (through Pydantic) that this is not a parameter for the Step, just internal state) during the run method, and then grab this in the describe method. -- Use state in conjunction with the `models` parameter of the describe method to autogenerate a description with a language model. For example, if you'd used an attribute called `__code_written` to store a string representing some code that was written, you could implement describe as `return (await models.gpt35()).complete(f"{self.\_\_code_written}\n\nSummarize the changes made in the above code.")`. +- Use state in conjunction with the `models` parameter of the describe method to autogenerate a description with a language model. For example, if you'd used an attribute called `__code_written` to store a string representing some code that was written, you could implement describe as `return models.gpt35.complete(f"{self.\_\_code_written}\n\nSummarize the changes made in the above code.")`. ## 2. Compose steps together into a complete recipe -- cgit v1.2.3-70-g09d2 From 8bc43221b32fda1bffa6157ab335b48d0c605973 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Fri, 9 Jun 2023 23:00:46 -0400 Subject: cleaning up transform pipeline --- continuedev/src/continuedev/core/main.py | 8 +++----- continuedev/src/continuedev/core/sdk.py | 4 ++-- continuedev/src/continuedev/libs/llm/openai.py | 2 +- continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py | 4 ++-- continuedev/src/continuedev/steps/core/core.py | 3 ++- 5 files changed, 10 insertions(+), 11 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/core/main.py b/continuedev/src/continuedev/core/main.py index 19b36a6a..3053e5a1 100644 --- a/continuedev/src/continuedev/core/main.py +++ b/continuedev/src/continuedev/core/main.py @@ -3,7 +3,6 @@ from typing import Callable, Coroutine, Dict, Generator, List, Literal, Tuple, U from ..models.main import ContinueBaseModel from pydantic import validator -from ..libs.llm import LLM from .observation import Observation ChatMessageRole = Literal["assistant", "user", "system"] @@ -21,6 +20,8 @@ class HistoryNode(ContinueBaseModel): depth: int def to_chat_messages(self) -> List[ChatMessage]: + if self.step.description is None: + return self.step.chat_context return self.step.chat_context + [ChatMessage(role="assistant", content=self.step.description)] @@ -33,10 +34,7 @@ class History(ContinueBaseModel): msgs = [] for node in self.timeline: if not node.step.hide: - msgs += [ - ChatMessage(role="assistant", content=msg) - for msg in node.to_chat_messages() - ] + msgs += node.to_chat_messages() return msgs def add_node(self, node: HistoryNode): diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py index 11127361..59bfc0f2 100644 --- a/continuedev/src/continuedev/core/sdk.py +++ b/continuedev/src/continuedev/core/sdk.py @@ -77,9 +77,9 @@ class ContinueSDK(AbstractContinueSDK): async def wait_for_user_confirmation(self, prompt: str): return await self.run_step(WaitForUserConfirmationStep(prompt=prompt)) - async def run(self, commands: Union[List[str], str], cwd: str = None, name: str = None, description: str = None) -> Coroutine[str, None, None]: + async def run(self, commands: Union[List[str], str], cwd: str = None, name: str = None, description: str = None, handle_error: bool = True) -> Coroutine[str, None, None]: commands = commands if isinstance(commands, List) else [commands] - return (await self.run_step(ShellCommandsStep(cmds=commands, cwd=cwd, description=description, **({'name': name} if name else {})))).text + return (await self.run_step(ShellCommandsStep(cmds=commands, cwd=cwd, description=description, handle_error=handle_error, **({'name': name} if name else {})))).text async def edit_file(self, filename: str, prompt: str, name: str = None, description: str = None, range: Range = None): filepath = await self._ensure_absolute_path(filename) diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py index da8c5caf..6a537afd 100644 --- a/continuedev/src/continuedev/libs/llm/openai.py +++ b/continuedev/src/continuedev/libs/llm/openai.py @@ -77,7 +77,7 @@ class OpenAI(LLM): "role": "system", "content": self.system_message }) - message += [msg.dict() for msg in with_history] + messages += [msg.dict() for msg in with_history] messages.append({ "role": "user", "content": prompt diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py index ea40a058..e59cc51c 100644 --- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py @@ -86,7 +86,7 @@ class ValidatePipelineStep(Step): # """))) # test that the API call works - output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API") + output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API", handle_error=False) # If it fails, return the error if "Traceback" in output or "SyntaxError" in output: @@ -157,7 +157,7 @@ class RunQueryStep(Step): hide: bool = True async def run(self, sdk: ContinueSDK): - output = await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected") + output = await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected", handle_error=False) if "Traceback" in output or "SyntaxError" in output: suggestion = sdk.models.gpt35.complete(dedent(f"""\ diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index 5117d479..40e992e7 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -61,6 +61,7 @@ class ShellCommandsStep(Step): cmds: List[str] cwd: Union[str, None] = None name: str = "Run Shell Commands" + handle_error: bool = True _err_text: Union[str, None] = None @@ -76,7 +77,7 @@ class ShellCommandsStep(Step): for cmd in self.cmds: output = await sdk.ide.runCommand(cmd) - if output is not None and output_contains_error(output): + if self.handle_error and output is not None and output_contains_error(output): suggestion = sdk.models.gpt35.complete(dedent(f"""\ While running the command `{cmd}`, the following error occurred: -- cgit v1.2.3-70-g09d2 From 262dffd21c4dac88050926d72b78f7e91a5df75b Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sat, 10 Jun 2023 07:53:31 -0400 Subject: selection step of airflow recipe --- .../recipes/DeployPipelineAirflowRecipe/main.py | 40 ++++--- .../recipes/DeployPipelineAirflowRecipe/steps.py | 120 +-------------------- .../src/continuedev/steps/input/nl_multiselect.py | 27 +++++ 3 files changed, 57 insertions(+), 130 deletions(-) create mode 100644 continuedev/src/continuedev/steps/input/nl_multiselect.py (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py index d7cd03db..8e7d258d 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py @@ -1,10 +1,11 @@ from textwrap import dedent +from ...steps.input.nl_multiselect import NLMultiselectStep from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep from ...steps.main import MessageStep -from .steps import SetupPipelineStep, ValidatePipelineStep +from .steps import SetupPipelineStep # https://github.com/dlt-hub/dlt-deploy-template/blob/master/airflow-composer/dag_template.py @@ -19,19 +20,30 @@ class DeployPipelineAirflowRecipe(Step): async def run(self, sdk: ContinueSDK): text_observation = await sdk.run_step( - MessageStep(name="Building your first dlt pipeline", message=dedent("""\ - This recipe will walk you through the process of creating a dlt pipeline for your chosen data source. With the help of Continue, you will: - - Create a Python virtual environment with dlt installed - - Run `dlt init` to generate a pipeline template - - Write the code to call the API - - Add any required API keys to the `secrets.toml` file - - Test that the API call works - - Load the data into a local DuckDB instance - - Write a query to view the data""")) >> - WaitForUserInputStep( - prompt="What API do you want to load data from? (e.g. weatherapi.com, chess.com)") + MessageStep(name="Deploying a pipeline to Airflow", message=dedent("""\ + This recipe will show you how to deploy a pipeline to Airflow. With the help of Continue, you will: + - Select a dlt-verified pipeline + - Setup the pipeline + - Deploy it to Airflow + - Optionally, setup Airflow locally""")) >> + NLMultiselectStep( + prompt=dedent("""\ + Which verified pipeline do you want to deploy with Airflow? The options are: + - Asana + - Chess.com + - GitHub + - Google Analytics + - Google Sheets + - HubSpot + - Matomo + - Pipedrive + - Shopify + - Strapi + - Zendesk"""), + options=[ + "asana_dlt", "chess", "github", "google_analytics", "google_sheets", "hubspot", "matomo", "pipedrive", "shopify_dlt", "strapi", "zendesk" + ]) ) await sdk.run_step( - SetupPipelineStep(api_description=text_observation.text) >> - ValidatePipelineStep() + SetupPipelineStep(source_name=text_observation.text) >> ) diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py index c32ae923..b69b3adc 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py @@ -19,134 +19,22 @@ class SetupPipelineStep(Step): hide: bool = True name: str = "Setup dlt Pipeline" - api_description: str # e.g. "I want to load data from the weatherapi.com API" + source_name: str async def describe(self, models: Models): - return dedent(f"""\ - This step will create a new dlt pipeline that loads data from an API, as per your request: - {self.api_description} - """) + pass async def run(self, sdk: ContinueSDK): - sdk.context.set("api_description", self.api_description) - - source_name = (await sdk.models.gpt35()).complete( - f"Write a snake_case name for the data source described by {self.api_description}: ").strip() - filename = f'{source_name}.py' - - # running commands to get started when creating a new dlt pipeline await sdk.run([ 'python3 -m venv env', 'source env/bin/activate', 'pip install dlt', - f'dlt init {source_name} duckdb\n\rY', + f'dlt --non-interactive init {self.source_name} duckdb', 'pip install -r requirements.txt' ], description=dedent(f"""\ Running the following commands: - `python3 -m venv env`: Create a Python virtual environment - `source env/bin/activate`: Activate the virtual environment - `pip install dlt`: Install dlt - - `dlt init {source_name} duckdb`: Create a new dlt pipeline called {source_name} that loads data into a local DuckDB instance + - `dlt init {self.source_name} duckdb`: Create a new dlt pipeline called {self.source_name} that loads data into a local DuckDB instance - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline"""), name="Setup Python environment") - - # editing the resource function to call the requested API - await sdk.ide.highlightCode(RangeInFile(filepath=os.path.join(await sdk.ide.getWorkspaceDirectory(), filename), range=Range.from_shorthand(15, 0, 29, 0)), "#00ff0022") - - # sdk.set_loading_message("Writing code to call the API...") - await sdk.edit_file( - filename=filename, - prompt=f'Edit the resource function to call the API described by this: {self.api_description}. Do not move or remove the exit() call in __main__.', - name=f"Edit the resource function to call the API {AI_ASSISTED_STRING}" - ) - - time.sleep(1) - - # wait for user to put API key in secrets.toml - await sdk.ide.setFileOpen(await sdk.ide.getWorkspaceDirectory() + "/.dlt/secrets.toml") - await sdk.wait_for_user_confirmation("If this service requires an API key, please add it to the `secrets.toml` file and then press `Continue`") - - sdk.context.set("source_name", source_name) - - -class ValidatePipelineStep(Step): - hide: bool = True - - async def run(self, sdk: ContinueSDK): - workspace_dir = await sdk.ide.getWorkspaceDirectory() - source_name = sdk.context.get("source_name") - filename = f'{source_name}.py' - - # await sdk.run_step(MessageStep(name="Validate the pipeline", message=dedent("""\ - # Next, we will validate that your dlt pipeline is working as expected: - # - Test that the API call works - # - Load the data into a local DuckDB instance - # - Write a query to view the data - # """))) - - # test that the API call works - output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API") - - # If it fails, return the error - if "Traceback" in output: - output = "Traceback" + output.split("Traceback")[-1] - file_content = await sdk.ide.readFile(os.path.join(workspace_dir, filename)) - suggestion = (await sdk.models.gpt35()).complete(dedent(f"""\ - ```python - {file_content} - ``` - This above code is a dlt pipeline that loads data from an API. The function with the @resource decorator is responsible for calling the API and returning the data. While attempting to run the pipeline, the following error occurred: - - ```ascii - {output} - ``` - - This is a brief summary of the error followed by a suggestion on how it can be fixed by editing the resource function:""")) - - api_documentation_url = (await sdk.models.gpt35()).complete(dedent(f"""\ - The API I am trying to call is the '{sdk.context.get('api_description')}'. I tried calling it in the @resource function like this: - ```python - {file_content} - ``` - What is the URL for the API documentation that will help me learn how to make this call? Please format in markdown so I can click the link.""")) - - sdk.raise_exception( - title=f"Error while running pipeline.\nFix the resource function in {filename} and rerun this step", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=dedent(f"""\ - {suggestion} - - {api_documentation_url} - - After you've fixed the code, click the retry button at the top of the Validate Pipeline step above."""))) - - # remove exit() from the main main function - await sdk.run_step(MessageStep(name="Remove early exit() from main function", message="Remove the early exit() from the main function now that we are done testing and want the pipeline to load the data into DuckDB.")) - - contents = await sdk.ide.readFile(os.path.join(workspace_dir, filename)) - replacement = "\n".join( - list(filter(lambda line: line.strip() != "exit()", contents.split("\n")))) - await sdk.ide.applyFileSystemEdit(FileEdit( - filepath=os.path.join(workspace_dir, filename), - replacement=replacement, - range=Range.from_entire_file(contents) - )) - - # load the data into the DuckDB instance - await sdk.run(f'python3 {filename}', name="Load data into DuckDB", description=f"Running python3 {filename} to load data into DuckDB") - - table_name = f"{source_name}.{source_name}_resource" - tables_query_code = dedent(f'''\ - import duckdb - - # connect to DuckDB instance - conn = duckdb.connect(database="{source_name}.duckdb") - - # get table names - rows = conn.execute("SELECT * FROM {table_name};").fetchall() - - # print table names - for row in rows: - print(row) - ''') - - query_filename = os.path.join(workspace_dir, "query.py") - await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code), name="Add query.py file", description="Adding a file called `query.py` to the workspace that will run a test query on the DuckDB instance") - await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected") diff --git a/continuedev/src/continuedev/steps/input/nl_multiselect.py b/continuedev/src/continuedev/steps/input/nl_multiselect.py new file mode 100644 index 00000000..c3c832f5 --- /dev/null +++ b/continuedev/src/continuedev/steps/input/nl_multiselect.py @@ -0,0 +1,27 @@ +from typing import List, Union +from ..core.core import WaitForUserInputStep +from ...core.main import Step +from ...core.sdk import ContinueSDK + + +class NLMultiselectStep(Step): + hide: bool = True + + prompt: str + options: List[str] + + async def run(self, sdk: ContinueSDK): + user_response = (await sdk.run_step(WaitForUserInputStep(prompt=self.prompt))).text + + def extract_option(text: str) -> Union[str, None]: + for option in self.options: + if option in text: + return option + return None + + first_try = extract_option(user_response.lower()) + if first_try is not None: + return first_try + + gpt_parsed = await sdk.models.gpt35.complete(f"These are the available options are: [{', '.join(self.options)}]. The user requested {user_response}. This is the exact string from the options array that they selected:") + return extract_option(gpt_parsed) or self.options[0] -- cgit v1.2.3-70-g09d2 From cddbfc5b727712968488418dc217e83a55d04acd Mon Sep 17 00:00:00 2001 From: Tyler Dunn Date: Sun, 11 Jun 2023 18:18:33 -0700 Subject: initial approach --- .../src/continuedev/recipes/DDtoBQRecipe/README.md | 3 + .../DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md | 108 +++++++++++++++++++++ .../src/continuedev/recipes/DDtoBQRecipe/main.py | 27 ++++++ .../src/continuedev/recipes/DDtoBQRecipe/steps.py | 90 +++++++++++++++++ 4 files changed, 228 insertions(+) create mode 100644 continuedev/src/continuedev/recipes/DDtoBQRecipe/README.md create mode 100644 continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md create mode 100644 continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py create mode 100644 continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/README.md b/continuedev/src/continuedev/recipes/DDtoBQRecipe/README.md new file mode 100644 index 00000000..c4981e56 --- /dev/null +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/README.md @@ -0,0 +1,3 @@ +# DDtoBQRecipe + +Move from using DuckDB to Google BigQuery as the destination for your `dlt` pipeline \ No newline at end of file diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md b/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md new file mode 100644 index 00000000..fce9a083 --- /dev/null +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md @@ -0,0 +1,108 @@ +--- +title: "Share a dataset: duckdb -> BigQuery" +description: Share a local dataset by moving it to BigQuery +keywords: [how to, share a dataset] +--- + +# Share a dataset: duckdb -> BigQuery +In previous walkthroughs you used the local stack to create and run your pipeline. This saved you the headache of setting up cloud account, credentials and often also money. Our choice for local "warehouse" is `duckdb`, fast, feature rich and working everywhere. However at some point you want to move to production or share the results with your colleagues. The local `duckdb` file is not sufficient for that! Let's move the dataset to BigQuery now. + +## 1. Replace the "destination" argument with "bigquery" +```python +if __name__=='__main__': + + # below we replaced "duckdb" in the "destination" argument with "bigquery" + pipeline = dlt.pipeline(pipeline_name='weatherapi', destination='bigquery', dataset_name='weatherapi_data') +``` +And that's it regarding the code modifications! If you run the script, `dlt` will create identical dataset you had in `duckdb` but in BigQuery. + +## 2. Enable access to BigQuery and obtain credentials. +Please [follow those steps](../destinations/bigquery.md) to enable `dlt` to write data to BigQuery. + +## 3. Add credentials to secrets.toml +Please add the following section to your `secrets.toml` file, use the credentials obtained from the previous step +```toml +[destination.bigquery.credentials] +location = "US" # change the location of the data +project_id = "project_id" # please set me up! +private_key = "private_key" # please set me up! +client_email = "client_email" # please set me up! +``` + +## 4. Run the pipeline again +``` +python weatherapi.py +``` +Head on to the next section if you see exceptions! + +## 5. Troubleshoot exceptions + +### Credentials Missing: ConfigFieldMissingException + +You'll see this exception if `dlt` cannot find your bigquery credentials. In the exception below all of them ('project_id', 'private_key', 'client_email') are missing. The exception gives you also the list of all lookups for configuration performed - [here we explain how to read such list](run-a-pipeline.md#missing-secret-or-configuration-values). +``` +dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['project_id', 'private_key', 'client_email'] in configuration with spec GcpServiceAccountCredentials + for field "project_id" config providers and keys were tried in following order: + In Environment Variables key WEATHERAPI__DESTINATION__BIGQUERY__CREDENTIALS__PROJECT_ID was not found. + In Environment Variables key WEATHERAPI__DESTINATION__CREDENTIALS__PROJECT_ID was not found. +``` +The most common cases for the exception: +1. The secrets are not in `secrets.toml` at all +2. The are placed in wrong section. For example the fragment below will not work: +```toml +[destination.bigquery] +project_id = "project_id" # please set me up! +``` +3. You run the pipeline script from the **different** folder from which it is saved. For example `python weatherapi_demo/weatherapi.py` will run the script from `weatherapi_demo` folder but the current working directory is folder above. This prevents `dlt` from finding `weatherapi_demo/.dlt/secrets.toml` and filling-in credentials. + +### Placeholders still in secrets.toml +Here BigQuery complain that the format of the `private_key` is incorrect. Practically this most often happens if you forgot to replace the placeholders in `secrets.toml` with real values + +``` + +Connection with BigQuerySqlClient to dataset name weatherapi_data failed. Please check if you configured the credentials at all and provided the right credentials values. You can be also denied access or your internet connection may be down. The actual reason given is: No key could be detected. +``` + +### Bigquery not enabled +[You must enable Bigquery API.](https://console.cloud.google.com/apis/dashboard) +``` + +403 POST https://bigquery.googleapis.com/bigquery/v2/projects/bq-walkthrough/jobs?prettyPrint=false: BigQuery API has not been used in project 364286133232 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/bigquery.googleapis.com/overview?project=364286133232 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. + +Location: EU +Job ID: a5f84253-3c10-428b-b2c8-1a09b22af9b2 + [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Google developers console API activation', 'url': 'https://console.developers.google.com/apis/api/bigquery.googleapis.com/overview?project=364286133232'}]}, {'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'SERVICE_DISABLED', 'domain': 'googleapis.com', 'metadata': {'service': 'bigquery.googleapis.com', 'consumer': 'projects/364286133232'}}] + ``` + +### Lack of permissions to create jobs +Add `BigQuery Job User` as described in the [destination page](../destinations/bigquery.md). +``` + +403 POST https://bigquery.googleapis.com/bigquery/v2/projects/bq-walkthrough/jobs?prettyPrint=false: Access Denied: Project bq-walkthrough: User does not have bigquery.jobs.create permission in project bq-walkthrough. + +Location: EU +Job ID: c1476d2c-883c-43f7-a5fe-73db195e7bcd +``` + +### Lack of permissions to query/write data +Add `BigQuery Data Editor` as described in the [destination page](../destinations/bigquery.md). +``` + +403 Access Denied: Table bq-walkthrough:weatherapi_data._dlt_loads: User does not have permission to query table bq-walkthrough:weatherapi_data._dlt_loads, or perhaps it does not exist in location EU. + +Location: EU +Job ID: 299a92a3-7761-45dd-a433-79fdeb0c1a46 +``` + +### Lack of billing / BigQuery in sandbox mode +`dlt` does not support BigQuery when project has no billing enabled. If you see a stack trace where following warning appears: +``` + +403 Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. DML queries are not allowed in the free tier. Set up a billing account to remove this restriction. +``` +or + +``` +2023-06-08 16:16:26,769|[WARNING ]|8096|dlt|load.py|complete_jobs:198|Job for weatherapi_resource_83b8ac9e98_4_jsonl retried in load 1686233775.932288 with message {"error_result":{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."},"errors":[{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."}],"job_start":"2023-06-08T14:16:26.850000Z","job_end":"2023-06-08T14:16:26.850000Z","job_id":"weatherapi_resource_83b8ac9e98_4_jsonl"} +``` +you must enable the billing. \ No newline at end of file diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py new file mode 100644 index 00000000..4aabdfdf --- /dev/null +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py @@ -0,0 +1,27 @@ +from textwrap import dedent + +from ...core.main import Step +from ...core.sdk import ContinueSDK +from ...steps.core.core import WaitForUserInputStep +from ...steps.main import MessageStep +from .steps import SetupPipelineStep, ValidatePipelineStep, RunQueryStep + +# Based on the following guide: +# https://github.com/dlt-hub/dlt/pull/392 + +class DDtoBQRecipeRecipe(Step): + hide: bool = True + + async def run(self, sdk: ContinueSDK): + text_observation = await sdk.run_step( + MessageStep(name="Move from using DuckDB to Google BigQuery as the destination", message=dedent("""\ + This recipe will walk you through the process of moving from using DuckDB to Google BigQuery as the destination for your dlt pipeline. With the help of Continue, you will: + - Set up a dlt pipeline for the chess.com API + - Switch destination from DuckDB to Google BigQuery + - Add BigQuery credentials to your secrets.toml file + - Run the pipeline again to load data to BigQuery""")) + ) + await sdk.run_step( + SetUpChessPipelineStep() >> + SwitchDestinationStep() + ) \ No newline at end of file diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py new file mode 100644 index 00000000..4a835e1a --- /dev/null +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py @@ -0,0 +1,90 @@ +import os +import subprocess +from textwrap import dedent +import time + +from ...models.main import Range +from ...models.filesystem import RangeInFile +from ...steps.main import MessageStep +from ...core.sdk import Models +from ...core.observation import DictObservation, InternalErrorObservation +from ...models.filesystem_edit import AddFile, FileEdit +from ...core.main import Step +from ...core.sdk import ContinueSDK + +AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)" + +class SetUpChessPipelineStep(Step): + hide: bool = True + name: str = "Setup Chess.com API dlt Pipeline" + + async def describe(self, models: Models): + return "This step will create a new dlt pipeline that loads data from the chess.com API." + + async def run(self, sdk: ContinueSDK): + + # running commands to get started when creating a new dlt pipeline + await sdk.run([ + 'python3 -m venv env', + 'source env/bin/activate', + 'pip install dlt', + 'dlt --non-interactive init chess duckdb', + 'pip install -r requirements.txt', + ], name="Set up Python environment", description=dedent(f"""\ + Running the following commands: + - `python3 -m venv env`: Create a Python virtual environment + - `source env/bin/activate`: Activate the virtual environment + - `pip install dlt`: Install dlt + - `dlt init chess duckdb`: Create a new dlt pipeline called "chess" that loads data into a local DuckDB instance + - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline""")) + + +class SwitchDestinationStep(Step): + hide: bool = True + + async def run(self, sdk: ContinueSDK): + + # Switch destination from DuckDB to Google BigQuery + filename = 'chess.py' + prompt = 'Replace the "destination" argument with "bigquery"' + + ## edit the pipeline to add a tranform function and attach it to a resource + await sdk.edit_file( + filename=filename, + prompt=prompt, + name=f'Replacing the "destination" argument with "bigquery" {AI_ASSISTED_STRING}' + ) + + # Add BigQuery credentials to your secrets.toml file + template = dedent(f"""\ + [destination.bigquery.credentials] + location = "US" # change the location of the data + project_id = "project_id" # please set me up! + private_key = "private_key" # please set me up! + client_email = "client_email" # please set me up!""") + + ## wait for user to put API key in secrets.toml + await sdk.ide.setFileOpen(await sdk.ide.getWorkspaceDirectory() + "/.dlt/secrets.toml") + ## append template to bottom of secrets.toml + await sdk.wait_for_user_confirmation("Please add your GCP credentials to `secrets.toml` file and then press `Continue`") + + # Run the pipeline again to load data to BigQuery + output = await sdk.run('env/bin/python3 chess.py', name="Load data to BigQuery", description="Running `env/bin/python3 chess.py` to load data to Google BigQuery") + + ## TODO: REPLACE WITH APPROACH TO HELPING WITH THINGS MENTIONED IN `## 5. Troubleshoot exceptions` + if "Traceback" in output or "SyntaxError" in output: + suggestion = sdk.models.gpt35.complete(dedent(f"""\ + ```python + {await sdk.ide.readFile(os.path.join(sdk.ide.workspace_directory, "query.py"))} + ``` + This above code is a query that runs on the DuckDB instance. While attempting to run the query, the following error occurred: + + ```ascii + {output} + ``` + + This is a brief summary of the error followed by a suggestion on how it can be fixed:""")) + + sdk.raise_exception( + title="Error while running query", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=suggestion) + ) -- cgit v1.2.3-70-g09d2 From 78a0299d9ed21b7cbee5e64f35eb37c79fe77371 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 11 Jun 2023 19:02:34 -0700 Subject: working on airflow --- .../recipes/DeployPipelineAirflowRecipe/main.py | 7 +++-- .../recipes/DeployPipelineAirflowRecipe/steps.py | 36 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py index 8e7d258d..fbd6e11d 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py @@ -5,7 +5,7 @@ from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep from ...steps.main import MessageStep -from .steps import SetupPipelineStep +from .steps import SetupPipelineStep, DeployAirflowStep # https://github.com/dlt-hub/dlt-deploy-template/blob/master/airflow-composer/dag_template.py @@ -19,7 +19,7 @@ class DeployPipelineAirflowRecipe(Step): hide: bool = True async def run(self, sdk: ContinueSDK): - text_observation = await sdk.run_step( + source_name = await sdk.run_step( MessageStep(name="Deploying a pipeline to Airflow", message=dedent("""\ This recipe will show you how to deploy a pipeline to Airflow. With the help of Continue, you will: - Select a dlt-verified pipeline @@ -45,5 +45,6 @@ class DeployPipelineAirflowRecipe(Step): ]) ) await sdk.run_step( - SetupPipelineStep(source_name=text_observation.text) >> + SetupPipelineStep(source_name=source_name) >> + DeployAirflowStep(source_name=source_name) ) diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py index b69b3adc..6a912f0c 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py @@ -3,6 +3,7 @@ import subprocess from textwrap import dedent import time +from ...steps.core.core import WaitForUserInputStep from ...models.main import Range from ...models.filesystem import RangeInFile from ...steps.main import MessageStep @@ -38,3 +39,38 @@ class SetupPipelineStep(Step): - `pip install dlt`: Install dlt - `dlt init {self.source_name} duckdb`: Create a new dlt pipeline called {self.source_name} that loads data into a local DuckDB instance - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline"""), name="Setup Python environment") + + +class DeployAirflowStep(Step): + hide: bool = True + source_name: str + + async def run(self, sdk: ContinueSDK): + + # Run dlt command to deploy pipeline to Airflow + await sdk.run([ + f'dlt --non-interactive deploy {self.source_name}_pipeline.py airflow-composer', + ], description="Running `dlt deploy airflow` to deploy the dlt pipeline to Airflow", name="Deploy dlt pipeline to Airflow") + + # Modify the DAG file + directory = await sdk.ide.getWorkspaceDirectory() + filepath = os.path.join( + directory, f"dags/dag_{self.source_name}_pipeline.py") + + # TODO: Find and replace in file step. + old_file_contents = await sdk.ide.readFile(filepath) + file_contents = old_file_contents.replace("pipeline_name", f"{self.source_name}_pipeline").replace( + "dataset_name", f"{self.source_name}_dataset") + await sdk.apply_filesystem_edit(FileEdit(filepath=filepath, range=Range.from_entire_file(filepath, old_file_contents), replacement=file_contents)) + + # Prompt the user for the DAG schedule + response = await sdk.run_step(WaitForUserInputStep(prompt="When would you like this Airflow DAG to run? (e.g. every day, every Monday, every 1st of the month, etc.)", name="Set DAG Schedule")) + edit_dag_range = Range.from_shorthand(18, 0, 23, 0) + await sdk.ide.highlightCode(range_in_file=RangeInFile(filepath=filepath, range=edit_dag_range)) + await sdk.edit_file(filepath, prompt=f"Edit the DAG so that it runs at the following schedule: '{response}'", + range=edit_dag_range) + + # Tell the user to check the schedule and fill in owner, email, other default_args + await sdk.run_step(MessageStep(message="Fill in the owner, email, and other default_args in the DAG file with your own personal information.", name="Fill in default_args")) + + # Run the DAG locally ?? -- cgit v1.2.3-70-g09d2 From 49f3ba8b252ef736eea23747ae3768b504e000c6 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 11 Jun 2023 22:22:46 -0700 Subject: finished airflow recipe --- .../recipes/DeployPipelineAirflowRecipe/steps.py | 16 ++++++------- .../src/continuedev/steps/find_and_replace.py | 26 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 continuedev/src/continuedev/steps/find_and_replace.py (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py index 6a912f0c..ce910252 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py @@ -12,6 +12,7 @@ from ...core.observation import DictObservation, InternalErrorObservation from ...models.filesystem_edit import AddFile, FileEdit from ...core.main import Step from ...core.sdk import ContinueSDK +from ...steps.find_and_replace import FindAndReplaceStep AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)" @@ -57,17 +58,16 @@ class DeployAirflowStep(Step): filepath = os.path.join( directory, f"dags/dag_{self.source_name}_pipeline.py") - # TODO: Find and replace in file step. - old_file_contents = await sdk.ide.readFile(filepath) - file_contents = old_file_contents.replace("pipeline_name", f"{self.source_name}_pipeline").replace( - "dataset_name", f"{self.source_name}_dataset") - await sdk.apply_filesystem_edit(FileEdit(filepath=filepath, range=Range.from_entire_file(filepath, old_file_contents), replacement=file_contents)) + # Replace the pipeline name and dataset name + await sdk.run_step(FindAndReplaceStep(filepath=filepath, pattern="'pipeline_name'", replacement=f"'{self.source_name}_pipeline'")) + await sdk.run_step(FindAndReplaceStep(filepath=filepath, pattern="'dataset_name'", replacement=f"'{self.source_name}_data'")) + await sdk.run_step(FindAndReplaceStep(filepath=filepath, pattern="pipeline_or_source_script", replacement=f"{self.source_name}_pipeline")) # Prompt the user for the DAG schedule - response = await sdk.run_step(WaitForUserInputStep(prompt="When would you like this Airflow DAG to run? (e.g. every day, every Monday, every 1st of the month, etc.)", name="Set DAG Schedule")) edit_dag_range = Range.from_shorthand(18, 0, 23, 0) - await sdk.ide.highlightCode(range_in_file=RangeInFile(filepath=filepath, range=edit_dag_range)) - await sdk.edit_file(filepath, prompt=f"Edit the DAG so that it runs at the following schedule: '{response}'", + await sdk.ide.highlightCode(range_in_file=RangeInFile(filepath=filepath, range=edit_dag_range), color="#33993333") + response = await sdk.run_step(WaitForUserInputStep(prompt="When would you like this Airflow DAG to run? (e.g. every day, every Monday, every 1st of the month, etc.)")) + await sdk.edit_file(filepath, prompt=f"Edit the DAG so that it runs at the following schedule: '{response.text}'", range=edit_dag_range) # Tell the user to check the schedule and fill in owner, email, other default_args diff --git a/continuedev/src/continuedev/steps/find_and_replace.py b/continuedev/src/continuedev/steps/find_and_replace.py new file mode 100644 index 00000000..78511b27 --- /dev/null +++ b/continuedev/src/continuedev/steps/find_and_replace.py @@ -0,0 +1,26 @@ +from ..models.filesystem_edit import FileEdit, Range +from ..core.main import Models, Step +from ..core.sdk import ContinueSDK + + +class FindAndReplaceStep(Step): + name: str = "Find and replace" + filepath: str + pattern: str + replacement: str + + async def describe(self, models: Models): + return f"Replace all instances of `{self.pattern}` with `{self.replacement}` in `{self.filepath}`" + + async def run(self, sdk: ContinueSDK): + file_content = await sdk.ide.readFile(self.filepath) + while self.pattern in file_content: + start_index = file_content.index(self.pattern) + end_index = start_index + len(self.pattern) + await sdk.ide.applyFileSystemEdit(FileEdit( + filepath=self.filepath, + range=Range.from_indices(file_content, start_index, end_index), + replacement=self.replacement + )) + file_content = file_content[:start_index] + \ + self.replacement + file_content[end_index:] -- cgit v1.2.3-70-g09d2 From 18cfe5c697fe1ecf22edd99c72372756279594d7 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 11 Jun 2023 22:48:34 -0700 Subject: cleaning bq recipe --- continuedev/src/continuedev/core/autopilot.py | 1 + continuedev/src/continuedev/core/policy.py | 3 ++ .../DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md | 59 +++++++--------------- .../src/continuedev/recipes/DDtoBQRecipe/main.py | 2 +- .../src/continuedev/recipes/DDtoBQRecipe/steps.py | 35 ++++++++----- .../src/continuedev/steps/steps_on_startup.py | 4 +- 6 files changed, 48 insertions(+), 56 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/core/autopilot.py b/continuedev/src/continuedev/core/autopilot.py index b82e1fef..c979d53a 100644 --- a/continuedev/src/continuedev/core/autopilot.py +++ b/continuedev/src/continuedev/core/autopilot.py @@ -35,6 +35,7 @@ class Autopilot(ContinueBaseModel): class Config: arbitrary_types_allowed = True + keep_untouched = (cached_property,) def get_full_state(self) -> FullState: return FullState(history=self.history, active=self._active, user_input_queue=self._main_user_input_queue) diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index 8aea8de7..8612d834 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -9,6 +9,7 @@ from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeSte from ..recipes.WritePytestsRecipe.main import WritePytestsRecipe from ..recipes.ContinueRecipeRecipe.main import ContinueStepStep from ..steps.comment_code import CommentCodeStep +from ..recipes.DDtoBQRecipe.main import DDtoBQRecipeRecipe class DemoPolicy(Policy): @@ -30,6 +31,8 @@ class DemoPolicy(Policy): return WritePytestsRecipe(instructions=observation.user_input) elif "/dlt" in observation.user_input.lower() or " dlt" in observation.user_input.lower(): return CreatePipelineRecipe() + elif "/ddtobq" in observation.user_input.lower(): + return DDtoBQRecipeRecipe() elif "/comment" in observation.user_input.lower(): return CommentCodeStep() elif "/ask" in observation.user_input: diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md b/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md index fce9a083..eb68e117 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/dlt_duckdb_to_bigquery_docs.md @@ -1,61 +1,28 @@ ---- -title: "Share a dataset: duckdb -> BigQuery" -description: Share a local dataset by moving it to BigQuery -keywords: [how to, share a dataset] ---- - -# Share a dataset: duckdb -> BigQuery -In previous walkthroughs you used the local stack to create and run your pipeline. This saved you the headache of setting up cloud account, credentials and often also money. Our choice for local "warehouse" is `duckdb`, fast, feature rich and working everywhere. However at some point you want to move to production or share the results with your colleagues. The local `duckdb` file is not sufficient for that! Let's move the dataset to BigQuery now. - -## 1. Replace the "destination" argument with "bigquery" -```python -if __name__=='__main__': - - # below we replaced "duckdb" in the "destination" argument with "bigquery" - pipeline = dlt.pipeline(pipeline_name='weatherapi', destination='bigquery', dataset_name='weatherapi_data') -``` -And that's it regarding the code modifications! If you run the script, `dlt` will create identical dataset you had in `duckdb` but in BigQuery. - -## 2. Enable access to BigQuery and obtain credentials. -Please [follow those steps](../destinations/bigquery.md) to enable `dlt` to write data to BigQuery. - -## 3. Add credentials to secrets.toml -Please add the following section to your `secrets.toml` file, use the credentials obtained from the previous step -```toml -[destination.bigquery.credentials] -location = "US" # change the location of the data -project_id = "project_id" # please set me up! -private_key = "private_key" # please set me up! -client_email = "client_email" # please set me up! -``` - -## 4. Run the pipeline again -``` -python weatherapi.py -``` -Head on to the next section if you see exceptions! - -## 5. Troubleshoot exceptions - ### Credentials Missing: ConfigFieldMissingException You'll see this exception if `dlt` cannot find your bigquery credentials. In the exception below all of them ('project_id', 'private_key', 'client_email') are missing. The exception gives you also the list of all lookups for configuration performed - [here we explain how to read such list](run-a-pipeline.md#missing-secret-or-configuration-values). + ``` dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['project_id', 'private_key', 'client_email'] in configuration with spec GcpServiceAccountCredentials for field "project_id" config providers and keys were tried in following order: In Environment Variables key WEATHERAPI__DESTINATION__BIGQUERY__CREDENTIALS__PROJECT_ID was not found. In Environment Variables key WEATHERAPI__DESTINATION__CREDENTIALS__PROJECT_ID was not found. ``` + The most common cases for the exception: + 1. The secrets are not in `secrets.toml` at all 2. The are placed in wrong section. For example the fragment below will not work: + ```toml [destination.bigquery] project_id = "project_id" # please set me up! ``` + 3. You run the pipeline script from the **different** folder from which it is saved. For example `python weatherapi_demo/weatherapi.py` will run the script from `weatherapi_demo` folder but the current working directory is folder above. This prevents `dlt` from finding `weatherapi_demo/.dlt/secrets.toml` and filling-in credentials. ### Placeholders still in secrets.toml + Here BigQuery complain that the format of the `private_key` is incorrect. Practically this most often happens if you forgot to replace the placeholders in `secrets.toml` with real values ``` @@ -64,7 +31,9 @@ Connection with BigQuerySqlClient to dataset name weatherapi_data failed. Please ``` ### Bigquery not enabled + [You must enable Bigquery API.](https://console.cloud.google.com/apis/dashboard) + ``` 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/bq-walkthrough/jobs?prettyPrint=false: BigQuery API has not been used in project 364286133232 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/bigquery.googleapis.com/overview?project=364286133232 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry. @@ -72,10 +41,12 @@ Connection with BigQuerySqlClient to dataset name weatherapi_data failed. Please Location: EU Job ID: a5f84253-3c10-428b-b2c8-1a09b22af9b2 [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Google developers console API activation', 'url': 'https://console.developers.google.com/apis/api/bigquery.googleapis.com/overview?project=364286133232'}]}, {'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'SERVICE_DISABLED', 'domain': 'googleapis.com', 'metadata': {'service': 'bigquery.googleapis.com', 'consumer': 'projects/364286133232'}}] - ``` +``` ### Lack of permissions to create jobs + Add `BigQuery Job User` as described in the [destination page](../destinations/bigquery.md). + ``` 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/bq-walkthrough/jobs?prettyPrint=false: Access Denied: Project bq-walkthrough: User does not have bigquery.jobs.create permission in project bq-walkthrough. @@ -85,7 +56,9 @@ Job ID: c1476d2c-883c-43f7-a5fe-73db195e7bcd ``` ### Lack of permissions to query/write data + Add `BigQuery Data Editor` as described in the [destination page](../destinations/bigquery.md). + ``` 403 Access Denied: Table bq-walkthrough:weatherapi_data._dlt_loads: User does not have permission to query table bq-walkthrough:weatherapi_data._dlt_loads, or perhaps it does not exist in location EU. @@ -95,14 +68,18 @@ Job ID: 299a92a3-7761-45dd-a433-79fdeb0c1a46 ``` ### Lack of billing / BigQuery in sandbox mode + `dlt` does not support BigQuery when project has no billing enabled. If you see a stack trace where following warning appears: + ``` 403 Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. DML queries are not allowed in the free tier. Set up a billing account to remove this restriction. ``` + or ``` 2023-06-08 16:16:26,769|[WARNING ]|8096|dlt|load.py|complete_jobs:198|Job for weatherapi_resource_83b8ac9e98_4_jsonl retried in load 1686233775.932288 with message {"error_result":{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."},"errors":[{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."}],"job_start":"2023-06-08T14:16:26.850000Z","job_end":"2023-06-08T14:16:26.850000Z","job_id":"weatherapi_resource_83b8ac9e98_4_jsonl"} ``` -you must enable the billing. \ No newline at end of file + +you must enable the billing. diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py index 4aabdfdf..1cb12ff3 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py @@ -4,7 +4,7 @@ from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep from ...steps.main import MessageStep -from .steps import SetupPipelineStep, ValidatePipelineStep, RunQueryStep +from .steps import SetUpChessPipelineStep, SwitchDestinationStep # Based on the following guide: # https://github.com/dlt-hub/dlt/pull/392 diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py index 4a835e1a..395cbbc8 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py @@ -45,15 +45,8 @@ class SwitchDestinationStep(Step): async def run(self, sdk: ContinueSDK): # Switch destination from DuckDB to Google BigQuery - filename = 'chess.py' - prompt = 'Replace the "destination" argument with "bigquery"' - - ## edit the pipeline to add a tranform function and attach it to a resource - await sdk.edit_file( - filename=filename, - prompt=prompt, - name=f'Replacing the "destination" argument with "bigquery" {AI_ASSISTED_STRING}' - ) + filepath = os.path.join(sdk.ide.workspace_directory, 'chess.py') + await sdk.run_step(FindAndReplaceStep(filepath=filepath, pattern="destination='duckdb'", replacement="destination='bigquery'")) # Add BigQuery credentials to your secrets.toml file template = dedent(f"""\ @@ -63,16 +56,28 @@ class SwitchDestinationStep(Step): private_key = "private_key" # please set me up! client_email = "client_email" # please set me up!""") - ## wait for user to put API key in secrets.toml - await sdk.ide.setFileOpen(await sdk.ide.getWorkspaceDirectory() + "/.dlt/secrets.toml") - ## append template to bottom of secrets.toml + # wait for user to put API key in secrets.toml + secrets_path = os.path.join( + sdk.ide.workspace_directory, "/.dlt/secrets.toml") + await sdk.ide.setFileOpen(secrets_path) + await sdk.append_to_file(secrets_path, template) + + # append template to bottom of secrets.toml await sdk.wait_for_user_confirmation("Please add your GCP credentials to `secrets.toml` file and then press `Continue`") + +class LoadDataStep(Step): + name: str = "Load data to BigQuery" + hide: bool = True + + async def run(self, sdk: ContinueSDK): # Run the pipeline again to load data to BigQuery output = await sdk.run('env/bin/python3 chess.py', name="Load data to BigQuery", description="Running `env/bin/python3 chess.py` to load data to Google BigQuery") - ## TODO: REPLACE WITH APPROACH TO HELPING WITH THINGS MENTIONED IN `## 5. Troubleshoot exceptions` if "Traceback" in output or "SyntaxError" in output: + with open(os.path.join(__file__, "dlt_duckdb_to_bigquery_docs.md"), "r") as f: + docs = f.read() + suggestion = sdk.models.gpt35.complete(dedent(f"""\ ```python {await sdk.ide.readFile(os.path.join(sdk.ide.workspace_directory, "query.py"))} @@ -83,6 +88,10 @@ class SwitchDestinationStep(Step): {output} ``` + Here is documentation describing common errors and their causes/solutions: + + {docs} + This is a brief summary of the error followed by a suggestion on how it can be fixed:""")) sdk.raise_exception( diff --git a/continuedev/src/continuedev/steps/steps_on_startup.py b/continuedev/src/continuedev/steps/steps_on_startup.py index cd40ff56..ba793425 100644 --- a/continuedev/src/continuedev/steps/steps_on_startup.py +++ b/continuedev/src/continuedev/steps/steps_on_startup.py @@ -1,11 +1,13 @@ from ..core.main import ContinueSDK, Models, Step from .main import UserInputStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe +from ..recipes.DDtoBQRecipe.main import DDtoBQRecipeRecipe step_name_to_step_class = { "UserInputStep": UserInputStep, - "CreatePipelineRecipe": CreatePipelineRecipe + "CreatePipelineRecipe": CreatePipelineRecipe, + "DDtoBQRecipeRecipe": DDtoBQRecipeRecipe } -- cgit v1.2.3-70-g09d2 From db3cf46933e180b1896b701185378e86b7bfca26 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 11 Jun 2023 23:25:04 -0700 Subject: finished bq recipe --- .../src/continuedev/recipes/DDtoBQRecipe/main.py | 14 +++++++------- .../src/continuedev/recipes/DDtoBQRecipe/steps.py | 17 +++++++++-------- continuedev/src/continuedev/steps/find_and_replace.py | 3 ++- 3 files changed, 18 insertions(+), 16 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py index 1cb12ff3..cd1ff1b9 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py @@ -4,24 +4,24 @@ from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep from ...steps.main import MessageStep -from .steps import SetUpChessPipelineStep, SwitchDestinationStep +from .steps import SetUpChessPipelineStep, SwitchDestinationStep, LoadDataStep # Based on the following guide: # https://github.com/dlt-hub/dlt/pull/392 + class DDtoBQRecipeRecipe(Step): hide: bool = True async def run(self, sdk: ContinueSDK): - text_observation = await sdk.run_step( + await sdk.run_step( MessageStep(name="Move from using DuckDB to Google BigQuery as the destination", message=dedent("""\ This recipe will walk you through the process of moving from using DuckDB to Google BigQuery as the destination for your dlt pipeline. With the help of Continue, you will: - Set up a dlt pipeline for the chess.com API - Switch destination from DuckDB to Google BigQuery - Add BigQuery credentials to your secrets.toml file - - Run the pipeline again to load data to BigQuery""")) - ) - await sdk.run_step( + - Run the pipeline again to load data to BigQuery""")) >> SetUpChessPipelineStep() >> - SwitchDestinationStep() - ) \ No newline at end of file + SwitchDestinationStep() >> + LoadDataStep() + ) diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py index 395cbbc8..c7e5d095 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py @@ -3,6 +3,7 @@ import subprocess from textwrap import dedent import time +from ...steps.find_and_replace import FindAndReplaceStep from ...models.main import Range from ...models.filesystem import RangeInFile from ...steps.main import MessageStep @@ -14,6 +15,7 @@ from ...core.sdk import ContinueSDK AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)" + class SetUpChessPipelineStep(Step): hide: bool = True name: str = "Setup Chess.com API dlt Pipeline" @@ -45,7 +47,8 @@ class SwitchDestinationStep(Step): async def run(self, sdk: ContinueSDK): # Switch destination from DuckDB to Google BigQuery - filepath = os.path.join(sdk.ide.workspace_directory, 'chess.py') + filepath = os.path.join( + sdk.ide.workspace_directory, 'chess_pipeline.py') await sdk.run_step(FindAndReplaceStep(filepath=filepath, pattern="destination='duckdb'", replacement="destination='bigquery'")) # Add BigQuery credentials to your secrets.toml file @@ -58,7 +61,7 @@ class SwitchDestinationStep(Step): # wait for user to put API key in secrets.toml secrets_path = os.path.join( - sdk.ide.workspace_directory, "/.dlt/secrets.toml") + sdk.ide.workspace_directory, ".dlt/secrets.toml") await sdk.ide.setFileOpen(secrets_path) await sdk.append_to_file(secrets_path, template) @@ -72,17 +75,15 @@ class LoadDataStep(Step): async def run(self, sdk: ContinueSDK): # Run the pipeline again to load data to BigQuery - output = await sdk.run('env/bin/python3 chess.py', name="Load data to BigQuery", description="Running `env/bin/python3 chess.py` to load data to Google BigQuery") + output = await sdk.run('env/bin/python3 chess_pipeline.py', name="Load data to BigQuery", description="Running `env/bin/python3 chess_pipeline.py` to load data to Google BigQuery") if "Traceback" in output or "SyntaxError" in output: - with open(os.path.join(__file__, "dlt_duckdb_to_bigquery_docs.md"), "r") as f: + with open(os.path.join(os.path.dirname(__file__), "dlt_duckdb_to_bigquery_docs.md"), "r") as f: docs = f.read() + output = "Traceback" + output.split("Traceback")[-1] suggestion = sdk.models.gpt35.complete(dedent(f"""\ - ```python - {await sdk.ide.readFile(os.path.join(sdk.ide.workspace_directory, "query.py"))} - ``` - This above code is a query that runs on the DuckDB instance. While attempting to run the query, the following error occurred: + When trying to load data into BigQuery, the following error occurred: ```ascii {output} diff --git a/continuedev/src/continuedev/steps/find_and_replace.py b/continuedev/src/continuedev/steps/find_and_replace.py index 78511b27..c9654867 100644 --- a/continuedev/src/continuedev/steps/find_and_replace.py +++ b/continuedev/src/continuedev/steps/find_and_replace.py @@ -19,7 +19,8 @@ class FindAndReplaceStep(Step): end_index = start_index + len(self.pattern) await sdk.ide.applyFileSystemEdit(FileEdit( filepath=self.filepath, - range=Range.from_indices(file_content, start_index, end_index), + range=Range.from_indices( + file_content, start_index, end_index - 1), replacement=self.replacement )) file_content = file_content[:start_index] + \ -- cgit v1.2.3-70-g09d2 From bcfaac2491b5888a425f3cc46ff7efa5048e9c79 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Mon, 12 Jun 2023 00:11:45 -0700 Subject: patches --- .../recipes/DeployPipelineAirflowRecipe/main.py | 2 +- .../recipes/DeployPipelineAirflowRecipe/steps.py | 2 +- .../src/continuedev/steps/steps_on_startup.py | 5 +++-- extension/package-lock.json | 4 ++-- extension/package.json | 2 +- .../scripts/continuedev-0.1.1-py3-none-any.whl | Bin 59335 -> 69544 bytes extension/src/terminal/terminalEmulator.ts | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py index fbd6e11d..f3601c2d 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/main.py @@ -4,7 +4,7 @@ from ...steps.input.nl_multiselect import NLMultiselectStep from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from .steps import SetupPipelineStep, DeployAirflowStep diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py index ce910252..97e16e82 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py @@ -6,7 +6,7 @@ import time from ...steps.core.core import WaitForUserInputStep from ...models.main import Range from ...models.filesystem import RangeInFile -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from ...core.sdk import Models from ...core.observation import DictObservation, InternalErrorObservation from ...models.filesystem_edit import AddFile, FileEdit diff --git a/continuedev/src/continuedev/steps/steps_on_startup.py b/continuedev/src/continuedev/steps/steps_on_startup.py index fbdbbcff..2586eb03 100644 --- a/continuedev/src/continuedev/steps/steps_on_startup.py +++ b/continuedev/src/continuedev/steps/steps_on_startup.py @@ -2,14 +2,15 @@ from ..core.main import ContinueSDK, Models, Step from .main import UserInputStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe from ..recipes.DeployPipelineAirflowRecipe.main import DeployPipelineAirflowRecipe - +from ..recipes.DDtoBQRecipe.main import DDtoBQRecipe from ..recipes.AddTransformRecipe.main import AddTransformRecipe step_name_to_step_class = { "UserInputStep": UserInputStep, "CreatePipelineRecipe": CreatePipelineRecipe, "DeployPipelineAirflowRecipe": DeployPipelineAirflowRecipe, - "AddTransformRecipe": AddTransformRecipe + "AddTransformRecipe": AddTransformRecipe, + "DDtoBQRecipe": DDtoBQRecipe } diff --git a/extension/package-lock.json b/extension/package-lock.json index 061b6342..c86d3955 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "continue", - "version": "0.0.25", + "version": "0.0.28", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "continue", - "version": "0.0.25", + "version": "0.0.28", "license": "Apache-2.0", "dependencies": { "@electron/rebuild": "^3.2.10", diff --git a/extension/package.json b/extension/package.json index 66ade224..052fe6ff 100644 --- a/extension/package.json +++ b/extension/package.json @@ -14,7 +14,7 @@ "displayName": "Continue", "pricing": "Free", "description": "Refine code 10x faster", - "version": "0.0.25", + "version": "0.0.28", "publisher": "Continue", "engines": { "vscode": "^1.74.0" diff --git a/extension/scripts/continuedev-0.1.1-py3-none-any.whl b/extension/scripts/continuedev-0.1.1-py3-none-any.whl index 4c89d23e..c5a66516 100644 Binary files a/extension/scripts/continuedev-0.1.1-py3-none-any.whl and b/extension/scripts/continuedev-0.1.1-py3-none-any.whl differ diff --git a/extension/src/terminal/terminalEmulator.ts b/extension/src/terminal/terminalEmulator.ts index 67b47e2f..b3031baf 100644 --- a/extension/src/terminal/terminalEmulator.ts +++ b/extension/src/terminal/terminalEmulator.ts @@ -74,7 +74,7 @@ export class CapturedTerminal { if ( lines.length > 0 && (lines[lines.length - 1].includes("bash-") || - lines[lines.length - 1].includes("(main)")) && + lines[lines.length - 1].includes(") $ ")) && lines[lines.length - 1].includes("$") ) { resolve(this.dataBuffer); -- cgit v1.2.3-70-g09d2 From a2298cf46317d777c9baebe14ff012e754a59508 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Mon, 12 Jun 2023 00:36:43 -0700 Subject: patches --- continuedev/src/continuedev/core/policy.py | 4 ++-- .../src/continuedev/recipes/DDtoBQRecipe/main.py | 2 +- .../recipes/DeployPipelineAirflowRecipe/steps.py | 16 ++++++++++++++++ .../src/continuedev/steps/steps_on_startup.py | 4 ++-- extension/package-lock.json | 4 ++-- extension/package.json | 2 +- .../scripts/continuedev-0.1.1-py3-none-any.whl | Bin 69544 -> 74791 bytes 7 files changed, 24 insertions(+), 8 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index 667b4546..8e43bf55 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -11,7 +11,7 @@ from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeSte from ..recipes.WritePytestsRecipe.main import WritePytestsRecipe from ..recipes.ContinueRecipeRecipe.main import ContinueStepStep from ..steps.comment_code import CommentCodeStep -from ..recipes.DDtoBQRecipe.main import DDtoBQRecipeRecipe +from ..recipes.DDtoBQRecipe.main import DDtoBQRecipe from ..steps.core.core import MessageStep @@ -35,7 +35,7 @@ class DemoPolicy(Policy): elif "/dlt" in observation.user_input.lower(): return CreatePipelineRecipe() elif "/ddtobq" in observation.user_input.lower(): - return DDtoBQRecipeRecipe() + return DDtoBQRecipe() elif "/airflow" in observation.user_input.lower(): return DeployPipelineAirflowRecipe() elif "/transform" in observation.user_input.lower(): diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py index cd1ff1b9..99bfa55d 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py @@ -10,7 +10,7 @@ from .steps import SetUpChessPipelineStep, SwitchDestinationStep, LoadDataStep # https://github.com/dlt-hub/dlt/pull/392 -class DDtoBQRecipeRecipe(Step): +class DDtoBQRecipe(Step): hide: bool = True async def run(self, sdk: ContinueSDK): diff --git a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py index 97e16e82..4a128786 100644 --- a/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DeployPipelineAirflowRecipe/steps.py @@ -42,6 +42,22 @@ class SetupPipelineStep(Step): - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline"""), name="Setup Python environment") +class RunPipelineStep(Step): + hide: bool = True + name: str = "Run dlt Pipeline" + + source_name: str + + async def describe(self, models: Models): + pass + + async def run(self, sdk: ContinueSDK): + await sdk.run([ + f'python3 {self.source_name}_pipeline.py', + ], description=dedent(f"""\ + Running the command `python3 {self.source_name}_pipeline.py to run the pipeline: """), name="Run dlt pipeline") + + class DeployAirflowStep(Step): hide: bool = True source_name: str diff --git a/continuedev/src/continuedev/steps/steps_on_startup.py b/continuedev/src/continuedev/steps/steps_on_startup.py index 80a57f76..eae8b558 100644 --- a/continuedev/src/continuedev/steps/steps_on_startup.py +++ b/continuedev/src/continuedev/steps/steps_on_startup.py @@ -1,7 +1,7 @@ from ..core.main import ContinueSDK, Models, Step from .main import UserInputStep from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe -from ..recipes.DDtoBQRecipe.main import DDtoBQRecipeRecipe +from ..recipes.DDtoBQRecipe.main import DDtoBQRecipe from ..recipes.DeployPipelineAirflowRecipe.main import DeployPipelineAirflowRecipe from ..recipes.DDtoBQRecipe.main import DDtoBQRecipe from ..recipes.AddTransformRecipe.main import AddTransformRecipe @@ -9,7 +9,7 @@ from ..recipes.AddTransformRecipe.main import AddTransformRecipe step_name_to_step_class = { "UserInputStep": UserInputStep, "CreatePipelineRecipe": CreatePipelineRecipe, - "DDtoBQRecipeRecipe": DDtoBQRecipeRecipe, + "DDtoBQRecipe": DDtoBQRecipe, "DeployPipelineAirflowRecipe": DeployPipelineAirflowRecipe, "AddTransformRecipe": AddTransformRecipe, "DDtoBQRecipe": DDtoBQRecipe diff --git a/extension/package-lock.json b/extension/package-lock.json index c86d3955..7462b5be 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "continue", - "version": "0.0.28", + "version": "0.0.31", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "continue", - "version": "0.0.28", + "version": "0.0.31", "license": "Apache-2.0", "dependencies": { "@electron/rebuild": "^3.2.10", diff --git a/extension/package.json b/extension/package.json index 052fe6ff..bc91e1c5 100644 --- a/extension/package.json +++ b/extension/package.json @@ -14,7 +14,7 @@ "displayName": "Continue", "pricing": "Free", "description": "Refine code 10x faster", - "version": "0.0.28", + "version": "0.0.31", "publisher": "Continue", "engines": { "vscode": "^1.74.0" diff --git a/extension/scripts/continuedev-0.1.1-py3-none-any.whl b/extension/scripts/continuedev-0.1.1-py3-none-any.whl index c5a66516..42f3d4a3 100644 Binary files a/extension/scripts/continuedev-0.1.1-py3-none-any.whl and b/extension/scripts/continuedev-0.1.1-py3-none-any.whl differ -- cgit v1.2.3-70-g09d2 From eb9bb7862a16dec01230b97ebbcb572eec462f18 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Mon, 12 Jun 2023 10:21:04 -0700 Subject: patches --- continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py | 2 +- continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py | 2 +- continuedev/src/continuedev/steps/core/core.py | 2 +- continuedev/src/continuedev/steps/find_and_replace.py | 1 + extension/package-lock.json | 4 ++-- extension/package.json | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py index 99bfa55d..1ae84310 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/main.py @@ -3,7 +3,7 @@ from textwrap import dedent from ...core.main import Step from ...core.sdk import ContinueSDK from ...steps.core.core import WaitForUserInputStep -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from .steps import SetUpChessPipelineStep, SwitchDestinationStep, LoadDataStep # Based on the following guide: diff --git a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py index c7e5d095..5cf89ccf 100644 --- a/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py +++ b/continuedev/src/continuedev/recipes/DDtoBQRecipe/steps.py @@ -6,7 +6,7 @@ import time from ...steps.find_and_replace import FindAndReplaceStep from ...models.main import Range from ...models.filesystem import RangeInFile -from ...steps.main import MessageStep +from ...steps.core.core import MessageStep from ...core.sdk import Models from ...core.observation import DictObservation, InternalErrorObservation from ...models.filesystem_edit import AddFile, FileEdit diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index 40e992e7..53df65cc 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -85,7 +85,7 @@ class ShellCommandsStep(Step): {output} ``` - This is a brief summary of the error followed by a suggestion on how it can be fixed:"""), with_context=sdk.chat_context) + This is a brief summary of the error followed by a suggestion on how it can be fixed:"""), with_history=sdk.chat_context) sdk.raise_exception( title="Error while running query", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=suggestion) diff --git a/continuedev/src/continuedev/steps/find_and_replace.py b/continuedev/src/continuedev/steps/find_and_replace.py index c9654867..fec33997 100644 --- a/continuedev/src/continuedev/steps/find_and_replace.py +++ b/continuedev/src/continuedev/steps/find_and_replace.py @@ -25,3 +25,4 @@ class FindAndReplaceStep(Step): )) file_content = file_content[:start_index] + \ self.replacement + file_content[end_index:] + await sdk.ide.saveFile(self.filepath) diff --git a/extension/package-lock.json b/extension/package-lock.json index 7462b5be..aebd0803 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "continue", - "version": "0.0.31", + "version": "0.0.34", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "continue", - "version": "0.0.31", + "version": "0.0.34", "license": "Apache-2.0", "dependencies": { "@electron/rebuild": "^3.2.10", diff --git a/extension/package.json b/extension/package.json index bc91e1c5..1d2fd995 100644 --- a/extension/package.json +++ b/extension/package.json @@ -14,7 +14,7 @@ "displayName": "Continue", "pricing": "Free", "description": "Refine code 10x faster", - "version": "0.0.31", + "version": "0.0.34", "publisher": "Continue", "engines": { "vscode": "^1.74.0" -- cgit v1.2.3-70-g09d2