From bf388e23c268e9eeb086bc5bdd2264f50508c457 Mon Sep 17 00:00:00 2001 From: William Liu Date: Fri, 10 Jun 2016 22:27:37 -0400 Subject: [PATCH 01/17] Playing around with pyglet library --- pyglet/helloworld.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 pyglet/helloworld.py diff --git a/pyglet/helloworld.py b/pyglet/helloworld.py new file mode 100644 index 0000000..723353c --- /dev/null +++ b/pyglet/helloworld.py @@ -0,0 +1,31 @@ +import pyglet +from pyglet.window import key +from pyglet.window import mouse + + +window = pyglet.window.Window() +label = pyglet.text.Label( + 'Hello world', x=window.width//2, y=window.height//2, + anchor_x='center', anchor_y='center') + + +@window.event +def on_draw(): + window.clear() + label.draw() + + +@window.event +def on_key_press(symbol, modifiers): + """ Detect Key Press """ + if symbol == key.A: + print "The 'A' key was pressed." + elif symbol == key.LEFT: + print "The left arrow key was pressed." + elif symbol == key.RIGHT: + print "The right arrow key was pressed." + elif symbol == key.ENTER: + print "The enter key was pressed" + + +pyglet.app.run() From cf5c302a514ffd60fcb213eb9a1937ebfa932ce4 Mon Sep 17 00:00:00 2001 From: Will Liu Date: Sun, 15 Oct 2017 10:37:39 -0600 Subject: [PATCH 02/17] Fix requirements.txt --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 283aea4..b6286b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ PyYAML==3.10 -UNKNOWN==0.0.0 beautifulsoup4==4.3.2 chardet==2.2.1 colorama==0.2.7 @@ -12,7 +11,6 @@ lxml==3.2.4 matplotlib==1.3.1 mechanize==0.2.5 mock==1.0.1 -mysql-connector-python==1.0.12 nltk==2.0.4 numpy==1.7.1 openpyxl==1.7.0 From d68a38f58a5b911e992bfaebe56fe95b9aefaa03 Mon Sep 17 00:00:00 2001 From: Will Liu Date: Tue, 1 May 2018 06:35:07 -0600 Subject: [PATCH 03/17] Sample Jenkins Job --- jenkins/test_build.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 jenkins/test_build.py diff --git a/jenkins/test_build.py b/jenkins/test_build.py new file mode 100644 index 0000000..7f8e8f9 --- /dev/null +++ b/jenkins/test_build.py @@ -0,0 +1,18 @@ +from jenkinsapi.jenkins import Jenkins + + +jenkins = Jenkins('http://localhost:8078', username='admin', password='admin') +job_name = 'django_unit_tests' +job = jenkins[job_name] + +queue_item = job.invoke() +print('Building ... ') + +queue_item.block_until_complete() + +build = job.get_last_completed_build() + +print('Last build number was: ', build.get_number()) +print('Last build result was: ', build.get_status()) +#print('Last build based on branch: ', build.get_revision_branch()) + From fbb6a1bd200807018931d2d65576cfc5b8f07a5c Mon Sep 17 00:00:00 2001 From: William Liu Date: Wed, 2 May 2018 17:24:59 -0600 Subject: [PATCH 04/17] Add example rabbitmq --- rabbitmq/example_producer_consumer.py | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 rabbitmq/example_producer_consumer.py diff --git a/rabbitmq/example_producer_consumer.py b/rabbitmq/example_producer_consumer.py new file mode 100644 index 0000000..fa0ab60 --- /dev/null +++ b/rabbitmq/example_producer_consumer.py @@ -0,0 +1,37 @@ +import pika + + +credentials = pika.PlainCredentials('guest', 'guest') +parameters = pika.ConnectionParameters(host="localhost", + credentials=credentials, + heartbeat_interval=600) + + +def test_simple_producer_consumer(): + print("Creating Connection, testing simple producer and consumer") + + # Create Connection + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.queue_declare(queue='hello-queue') + + # Publish to Queue + print("About to publish to queue") + channel.basic_publish(exchange='', routing_key='hello-queue', body='test') # publish to default exchange + print "After Channel Publish" + + # Consume from Queue and run callback + channel.basic_consume(callback, queue='hello-queue', no_ack=True) # acknowledge + print(' [*] Waiting for messages. To exit press CTRL+C') + channel.start_consuming() + connection.close() + + +def callback(ch, method, properties, body): + """ Callback from Consumer """ + print(" [x] Received %r" % body) + + +if __name__ == '__main__': + test_simple_producer_consumer() + From 4d7fe39246ff2c5997e9059a1e8741b451ba7769 Mon Sep 17 00:00:00 2001 From: William Liu Date: Wed, 2 May 2018 17:25:34 -0600 Subject: [PATCH 05/17] Add avro examples --- avro/example.py | 19 +++++++++++++++++++ avro/user.avsc | 11 +++++++++++ avro/users.avro | Bin 0 -> 313 bytes 3 files changed, 30 insertions(+) create mode 100644 avro/example.py create mode 100644 avro/user.avsc create mode 100644 avro/users.avro diff --git a/avro/example.py b/avro/example.py new file mode 100644 index 0000000..2e43fc1 --- /dev/null +++ b/avro/example.py @@ -0,0 +1,19 @@ +import pdb + +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + + +schema = avro.schema.Parse(open("user.avsc").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) + +reader.close() diff --git a/avro/user.avsc b/avro/user.avsc new file mode 100644 index 0000000..fac4b1d --- /dev/null +++ b/avro/user.avsc @@ -0,0 +1,11 @@ +{ + + "namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} diff --git a/avro/users.avro b/avro/users.avro new file mode 100644 index 0000000000000000000000000000000000000000..de4e0b4267604678f7e2b1792e54e71abf204c5a GIT binary patch literal 313 zcmeZI%3@>@Nh~YM*GtY%NloU+E6vFf1M`cMGg5OCcQIEhl~fj_Dp@Hg6{RNU7o{la zC@7_6rskv+1I3~dvc)AunR)3T*}TMDuzE0EtD^u{6Rnh)R{~Z6v_&ZvNoiVQS$L+#krJvdB%^U^OwP~A2kL_vT3nEr40c3nMPhD2PO2WrPsol7Ee2V`z+;)` rW#byu)8O{&Y|yLgdMr9TjyaXZ#fc0JEKF=psd)^13~WWIDd_3|-g;(X literal 0 HcmV?d00001 From 5fce2d6865acdde8bf843e5544c0885714f8f62e Mon Sep 17 00:00:00 2001 From: William Liu Date: Wed, 2 May 2018 17:26:00 -0600 Subject: [PATCH 06/17] Sample logstash message --- logging/sample_msg_to_logstash.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 logging/sample_msg_to_logstash.py diff --git a/logging/sample_msg_to_logstash.py b/logging/sample_msg_to_logstash.py new file mode 100644 index 0000000..370fd74 --- /dev/null +++ b/logging/sample_msg_to_logstash.py @@ -0,0 +1,27 @@ +# Send a sample message to logstash + +import socket +import json +import sys + +HOST = 'localhost' +PORT = 5000 + +try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +except socket.error, msg: + sys.stderr.write("[ERROR] %s\n" % msg[1]) + sys.exit(1) + +try: + sock.connect((HOST, PORT)) +except socket.error, msg: + sys.stderr.write("[ERROR] %s\n" % msg[1]) + sys.exit(2) + +msg = {'@message': 'python test message, hello there', '@tags': ['python', 'test']} + +sock.send(json.dumps(msg)) + +sock.close() +sys.exit(0) From 0075bec983c816d6a3d644249e67359a39b41875 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 15 May 2018 18:49:46 -0600 Subject: [PATCH 07/17] Sample boto3 w/ s3 --- boto/amazon_boto3_sample.py | 62 +++++++++++++++++++++++++++++ {Fabric => fabric}/sample_fabric.py | 0 2 files changed, 62 insertions(+) create mode 100644 boto/amazon_boto3_sample.py rename {Fabric => fabric}/sample_fabric.py (100%) diff --git a/boto/amazon_boto3_sample.py b/boto/amazon_boto3_sample.py new file mode 100644 index 0000000..e5241fb --- /dev/null +++ b/boto/amazon_boto3_sample.py @@ -0,0 +1,62 @@ +""" +Fabfile runs functions as commands with 'fab' then the function name + +Usage: + fab + +Examples: + fab print_me + +Assumes: + Your environment files and variables are setup: + e.g. AWS_DEFAULT_REGION + And/or your ~/.aws/config and credentials file is setup with: + config + [default] + region=us-west-2 + credentials + [default] + aws_access_key_id='' + aws_secret_access_key='' + +Example .bashrc / .bash_profile alias + alias print_me="fab -f /home/will/GitHub/python-examples/fabric/fabfile.py print_me" +""" + +import os + +import boto3 + + +AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'us-west-2') + + +def print_me(): + """ Run 'fab print_me' to test fab works locally """ + print("Hey") + + +def list_buckets(): + """ List all buckets in a S3 """ + s3_client = boto3.client('s3') + response = s3_client.list_buckets() + return response['Buckets'] + + +def create_bucket(bucket_name): + """ Create a bucket """ + s3_client = boto3.client('s3') + response = s3_client.create_bucket(Bucket=bucket_name, + CreateBucketConfiguration={'LocationConstraint': AWS_DEFAULT_REGION}) + return response + + +def get_files_from_bucket(bucket_name): + s3_resource = boto3.resource('s3') + my_bucket = s3_resource.Bucket(bucket_name) + for s3_file in my_bucket.objects.all(): + print(s3_file.key) + + +if __name__ == '__main__': + pass diff --git a/Fabric/sample_fabric.py b/fabric/sample_fabric.py similarity index 100% rename from Fabric/sample_fabric.py rename to fabric/sample_fabric.py From 2cae1ccd22328e3c2141ce07cb4934fc29de1fb7 Mon Sep 17 00:00:00 2001 From: William Liu Date: Fri, 18 May 2018 12:04:46 -0600 Subject: [PATCH 08/17] Selenium Test --- selenium/simple_search.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 selenium/simple_search.py diff --git a/selenium/simple_search.py b/selenium/simple_search.py new file mode 100644 index 0000000..a5888b6 --- /dev/null +++ b/selenium/simple_search.py @@ -0,0 +1,15 @@ +from selenium import webdriver +from selenium.webdriver.common.keys import Keys + + +#driver = webdriver.Firefox() +driver = webdriver.Chrome() +driver.get("http://www.python.org") +assert "Python" in driver.title +elem = driver.find_element_by_name("q") +elem.clear() +elem.send_keys("pycon") +elem.send_keys(Keys.RETURN) +assert "No results found." not in driver.page_source +driver.close() + From 5493bf8944f7c0dcbe0a502eea94c2e2f583e750 Mon Sep 17 00:00:00 2001 From: Will Liu Date: Mon, 4 Jun 2018 19:24:06 -0600 Subject: [PATCH 09/17] Add fabric cd --- fabric/sample_fabric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fabric/sample_fabric.py b/fabric/sample_fabric.py index ef9b776..cfaee71 100644 --- a/fabric/sample_fabric.py +++ b/fabric/sample_fabric.py @@ -18,7 +18,7 @@ """ #import logging # debug output Paramiko prints -from fabric.api import run +from fabric.api import run, cd #For debugging, prints Paramiko's debug statements to #standard error stream From 8ef55a2c178cc3b280d30003df458f5c0bfd0afe Mon Sep 17 00:00:00 2001 From: William Liu Date: Fri, 17 Aug 2018 14:08:22 -0600 Subject: [PATCH 10/17] Boto3 notes --- boto/amazon_boto3_sample.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/boto/amazon_boto3_sample.py b/boto/amazon_boto3_sample.py index e5241fb..6d0bc9f 100644 --- a/boto/amazon_boto3_sample.py +++ b/boto/amazon_boto3_sample.py @@ -40,6 +40,13 @@ def list_buckets(): """ List all buckets in a S3 """ s3_client = boto3.client('s3') response = s3_client.list_buckets() + buckets = [bucket['Name'] for bucket in response['Buckets']] + print(buckets) + + response = s3_client.get_bucket_location('my_bucket') + print(response) + #{'LocationConstraint': 'us-west-1', 'ResponseMetadata': {'HTTPStatusCode': 200, 'RetryAttempts': 0, 'HostId': '', ... + return response['Buckets'] @@ -48,6 +55,7 @@ def create_bucket(bucket_name): s3_client = boto3.client('s3') response = s3_client.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': AWS_DEFAULT_REGION}) + # Make sure to check that your ~/.aws/config default region is pointed to the same region as this call return response From e1b8d0fc4484b95987dbc330c839555c498d4497 Mon Sep 17 00:00:00 2001 From: William Liu Date: Fri, 17 Aug 2018 15:26:22 -0600 Subject: [PATCH 11/17] boto put and get file from s3 --- boto/amazon_boto3_sample.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/boto/amazon_boto3_sample.py b/boto/amazon_boto3_sample.py index 6d0bc9f..d678d20 100644 --- a/boto/amazon_boto3_sample.py +++ b/boto/amazon_boto3_sample.py @@ -59,12 +59,24 @@ def create_bucket(bucket_name): return response -def get_files_from_bucket(bucket_name): +def get_all_files_from_bucket(bucket_name): s3_resource = boto3.resource('s3') my_bucket = s3_resource.Bucket(bucket_name) for s3_file in my_bucket.objects.all(): print(s3_file.key) +def get_file_from_bucket(bucket_name, filename): + s3_resource = boto3.resource('s3') + response = s3_resource.Bucket('my_bucket_name').download_file('my_remote_file.txt', '/home/will/my_local_file.txt') + print(response) + + +def put_file_into_bucket(bucket_name, filename): + s3_resource = boto3.resource('s3') + response = s3_resource.Bucket('my_bucket_name').upload_file('/home/will/my_local_file.txt', 'my_remote_file.txt') + print(response) + + if __name__ == '__main__': pass From 51921ac9f8309a2799c08f721603f01efcee83d0 Mon Sep 17 00:00:00 2001 From: William Liu Date: Sat, 27 Oct 2018 10:46:16 -0600 Subject: [PATCH 12/17] Parquet Example --- pyarrow/deathstar.parquet | Bin 0 -> 9092 bytes pyarrow/parquet_example.py | 186 ++++++++++++++++++ .../69a1f1872d854c39a69ff24c0820889c.parquet | Bin 0 -> 8027 bytes .../7026c2af982a4564a92c7602b43976cf.parquet | Bin 0 -> 8027 bytes .../94d8ecfeb23642c4a73779e0427ce653.parquet | Bin 0 -> 8103 bytes .../9ef633fb96d04eebb9a20c845a620b97.parquet | Bin 0 -> 8027 bytes 6 files changed, 186 insertions(+) create mode 100644 pyarrow/deathstar.parquet create mode 100644 pyarrow/parquet_example.py create mode 100644 pyarrow/starships/year=2014/month=12/day=10/69a1f1872d854c39a69ff24c0820889c.parquet create mode 100644 pyarrow/starships/year=2014/month=12/day=10/7026c2af982a4564a92c7602b43976cf.parquet create mode 100644 pyarrow/starships/year=2014/month=12/day=10/94d8ecfeb23642c4a73779e0427ce653.parquet create mode 100644 pyarrow/starships/year=2014/month=12/day=10/9ef633fb96d04eebb9a20c845a620b97.parquet diff --git a/pyarrow/deathstar.parquet b/pyarrow/deathstar.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c94f66a6198a283ca8dcbec4fd70ca9bd969de65 GIT binary patch literal 9092 zcmdT~UuYaf7~kDoFKL>_CT6x9d!?JR8KWk-Ke@yvO0gltKeSp*d{N4=y_+;Edwb{Y zUTv&`h!3SmDI!ve_#h&sNS>q*BI1LHPer8oBi1b0k2NA#7*}1*D+1@qY8Zo4~ z+3%bAe!uy4GV}W;J$~e=42q!vwC5;NP;6L|;vH2tT7 zSK6u5PKqom5`Ilipn=iZm=qt?V#gjix{4H4QC;;VoWp&IIcdTl2m>Hg5@C)s^zd%5 zWzr2_0_g50x7Pe9P`TU_P#G*3)l=ns!KfGo)7|h$U6a*MWc-eTzmX}i=4dU!8C@0K z#p&*v=mp!1zCnztBM|NN9U5;4MbF8*ZUJF<}b2dAZ56u=q2u=tX`7w4?FR-orz$4&{Cy|dep$m(m8@OPOtx9oW9&# zc7Aq7AQ)$M&Hoh0o!cE`E(YXY@R+?Wt8dEq$^iat7fn}75XRszTI384oY@DqY&w&h z%4Daq+11Pg3p29|x!iPaHamN0{uzPRzBZk;c1~de=l28?h|?GDC-=OfzM3%C@H_q&u|0(eJQVV#* zEn=T?*Be&(&As^Jz1}Ks4#POSCZqE*Pr~BOBZtBKSP7%)OA^+poic<~(gQ|~{tza7aV(gyS~AY$4YyQw zD(mo)?NfQDLi{boTdW`J_=4^&RvwRW5Vpgmm8p#WWc4Jh2}@rzT-PG{iVI7>Yzy4> zwdt(2a|(00axj?7j#9ZuEbpzc-SD?0ek)12w+q=8kIglKbp9(bnCl1I5*%vlbaqu? zYW;yvYiJ9^FW(aokJ(0vc)Z_})emHR72{vWDer~=jlmczVY@h#n}O3$U=(L|&Hogb zY~C9%>8}`$13Lx**+iuwI~cD|;L8(~PfIAqLqf_?0HuPI3I?B=EXbIbt9KRks)9e7#5eAzx#|?ZBLSA( z$I%l!5#txfz!rM6aym|GUObJ*Iu6k~cM;O@b~Z>yPpxWsIoMRxYYP4{jekhd9CQwQ z8^|mVP5Sg(D(|ViweacV906Q@sEdq8Xp_9C^gYaDYWFx;}uGcd$c~@5~y8!D4;f!hia%u&g3m} znppXCKHu}PsR?mcB_009g&6*y)g-w4k^=wxN*;Hqxp7xY z`F6!G{tt8}-cB)xBNkvdYSDzF6*~Ce6esF8W2z#JCg9{X?4sdlKcTZpCVvWLt5&j| zBvvusYAq4h zC>pK-J?)xhK|i}zs%+$4xC*1A%O{^B1(%NZfw`k*n*}J{lP^!|0ao+h1VmKPA2EJ# z?H>Z(?2H<@=|VpYUFZ-o19od4f*d$HA;L-^u2~{;xo2(X1P#^l)_I4P*AA*T6xXwM z0^QmYW7p>*T40YB&%3)8;p*mS(S#dl5jnI=v@F>Dvj`p7qs42yz!gzjy-{O_-KB=H zxK+Q1`Oy2&5O{8m7R@^jgu{Vbqs0oYDaGkE`y$5m?kqzv>-J7~YgXBC=W;Ye%YMi` zO{ZXU2`vJP{!Rq8wnaj%E`s{TXp!hmoCw9Oov=(hJgLmylv3|nO+>1jBZPu$3=zmT zMhD}It!7asDn3|oU1Czr$aEpQVI!WntV^~q9=@#s_-vBcp!VQTO;i&;y}CSgh$?_B zYsC{UOB(zNRiCdBcdAgS=)R2AGt;xvnW>d0jvQZk^vPAYDN0CEm)}xsM3Rof|G)Vg DX%dNb literal 0 HcmV?d00001 diff --git a/pyarrow/parquet_example.py b/pyarrow/parquet_example.py new file mode 100644 index 0000000..3861de3 --- /dev/null +++ b/pyarrow/parquet_example.py @@ -0,0 +1,186 @@ +import pdb + +import arrow +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq +import requests +from pandas import Series +from pandas.io.json import json_normalize + + +def get_api_endpoint(url: str) -> pd.DataFrame: + """ Get API endpoint and convert to DataFrame + Example: + url: https://swapi.co/api/starships/9/ + data: + +(Pdb) print(d) + {'MGLT': '10', 'cargo_capacity': '1000000000000', + 'films': ['https://swapi.co/api/films/1/'], + 'manufacturer': 'Imperial Department of Military Research, Sienar Fleet Systems', + 'starship_class': 'Deep Space Mobile Battlestation', + 'created': '2014-12-10T16:36:50.509000Z', + 'model': 'DS-1 Orbital Battle Station', + 'url': 'https://swapi.co/api/starships/9/', + 'consumables': '3 years', 'hyperdrive_rating': '4.0', + 'crew': '342953', 'name': 'Death Star', 'max_atmosphering_speed': 'n/a', + 'edited': '2014-12-22T17:35:44.452589Z', 'length': '120000', + 'pilots': [], 'cost_in_credits': '1000000000000', 'passengers': '843342'} + df: + df.columns + Index(['MGLT', 'cargo_capacity', 'consumables', 'cost_in_credits', 'created', + 'crew', 'edited', 'films', 'hyperdrive_rating', 'length', + 'manufacturer', 'max_atmosphering_speed', 'model', 'name', 'passengers', + 'pilots', 'starship_class', 'url'], + dtype='object') + +(Pdb) df.head() + MGLT cargo_capacity consumables cost_in_credits \ + 0 10 1000000000000 3 years 1000000000000 ... + """ + r = requests.get(url) + d = r.json() + return pd.DataFrame(dict([(k, Series(v)) for k, v in d.items()])) + + +def df_add_partition_columns(df, date_field): + """ Return a dataframe with new columns used for partitioning by datetime + Example: 2018-03-04T14:12:15.653Z returns with new df columns of 'year', 'month', day' + """ + df[date_field] = df[date_field].map(lambda t: pd.to_datetime(t, format="%Y-%m-%dT%H:%M:%S.%fZ")) + df['year'], df['month'], df['day'] = df[date_field].apply(lambda x: x.year), df[date_field].apply(lambda x: x.month), df[date_field].apply(lambda x: x.day) + return df + + +def df_to_parquet_table(df: pd.DataFrame) -> pa.Table: + """ Convert DataFrame to Pyarrow Table + Example: + pyarrow.Table + MGLT: string + cargo_capacity: string + consumables: string + cost_in_credits: string + created: string + crew: string + edited: string + films: string + hyperdrive_rating: string + length: string + manufacturer: string + max_atmosphering_speed: string + model: string + name: string + passengers: string + pilots: double + starship_class: string + url: string + __index_level_0__: int64 + metadata + -------- + {b'pandas': b'{"columns": [{"field_name": "MGLT", "pandas_type": "unicode", "m' + b'etadata": null, "name": "MGLT", "numpy_type": "object"}, {"field' + b'_name": "cargo_capacity", "pandas_type": "unicode", "metadata": ' + b'null, "name": "cargo_capacity", "numpy_type": "object"}, {"field' + b'_name": "consumables", "pandas_type": "unicode", "metadata": nul' + b'l, "name": "consumables", "numpy_type": "object"}, {"field_name"' + b': "cost_in_credits", "pandas_type": "unicode", "metadata": null,' + b' "name": "cost_in_credits", "numpy_type": "object"}, {"field_nam' + b'e": "created", "pandas_type": "unicode", "metadata": null, "name' + b'": "created", "numpy_type": "object"}, {"field_name": "crew", "p' + b'andas_type": "unicode", "metadata": null, "name": "crew", "numpy' + b'_type": "object"}, {"field_name": "edited", "pandas_type": "unic' + b'ode", "metadata": null, "name": "edited", "numpy_type": "object"' + b'}, {"field_name": "films", "pandas_type": "unicode", "metadata":' + b' null, "name": "films", "numpy_type": "object"}, {"field_name": ' + b'"hyperdrive_rating", "pandas_type": "unicode", "metadata": null,' + b' "name": "hyperdrive_rating", "numpy_type": "object"}, {"field_n' + b'ame": "length", "pandas_type": "unicode", "metadata": null, "nam' + b'e": "length", "numpy_type": "object"}, {"field_name": "manufactu' + b'rer", "pandas_type": "unicode", "metadata": null, "name": "manuf' + b'acturer", "numpy_type": "object"}, {"field_name": "max_atmospher' + b'ing_speed", "pandas_type": "unicode", "metadata": null, "name": ' + b'"max_atmosphering_speed", "numpy_type": "object"}, {"field_name"' + b': "model", "pandas_type": "unicode", "metadata": null, "name": "' + b'model", "numpy_type": "object"}, {"field_name": "name", "pandas_' + b'type": "unicode", "metadata": null, "name": "name", "numpy_type"' + b': "object"}, {"field_name": "passengers", "pandas_type": "unicod' + b'e", "metadata": null, "name": "passengers", "numpy_type": "objec' + b't"}, {"field_name": "pilots", "pandas_type": "float64", "metadat' + b'a": null, "name": "pilots", "numpy_type": "float64"}, {"field_na' + b'me": "starship_class", "pandas_type": "unicode", "metadata": nul' + b'l, "name": "starship_class", "numpy_type": "object"}, {"field_na' + b'me": "url", "pandas_type": "unicode", "metadata": null, "name": ' + b'"url", "numpy_type": "object"}, {"field_name": "__index_level_0_' + b'_", "pandas_type": "int64", "metadata": null, "name": null, "num' + b'py_type": "int64"}], "column_indexes": [{"field_name": null, "pa' + b'ndas_type": "unicode", "metadata": {"encoding": "UTF-8"}, "name"' + b': null, "numpy_type": "object"}], "pandas_version": "0.22.0", "i' + b'ndex_columns": ["__index_level_0__"]}'} + """ + pyarrow_deathstar_table = pa.Table.from_pandas(df) # Create PyArrow Table from Pandas DF + print(pyarrow_deathstar_table) + pq.write_table(pyarrow_deathstar_table, 'deathstar.parquet') # Convert PyArrow Table to Parquet Table / File + parquet_table = pq.read_table('deathstar.parquet') # Read back Parquet File as a Table + parquet_table = pq.ParquetFile('deathstar.parquet') # Read back Parquet File as a ParquetFile for finer-grained read and write + print(parquet_table.metadata) + # + # created_by: parquet-cpp version 1.4.1-SNAPSHOT + # num_columns: 19 + # num_rows: 1 + # num_row_groups: 1 + # format_version: 1.0 + # serialized_size: 4574 + + print(parquet_table.schema) + # + #MGLT: BYTE_ARRAY UTF8 + #cargo_capacity: BYTE_ARRAY UTF8 + #consumables: BYTE_ARRAY UTF8 + #cost_in_credits: BYTE_ARRAY UTF8 + #created: BYTE_ARRAY UTF8 + #crew: BYTE_ARRAY UTF8 + #edited: BYTE_ARRAY UTF8 + #films: BYTE_ARRAY UTF8 + #hyperdrive_rating: BYTE_ARRAY UTF8 + #length: BYTE_ARRAY UTF8 + #manufacturer: BYTE_ARRAY UTF8 + #max_atmosphering_speed: BYTE_ARRAY UTF8 + #model: BYTE_ARRAY UTF8 + #name: BYTE_ARRAY UTF8 + #passengers: BYTE_ARRAY UTF8 + #pilots: DOUBLE + #starship_class: BYTE_ARRAY UTF8 + #url: BYTE_ARRAY UTF8 + #__index_level_0__: INT64 + return parquet_table + + +def write_parquet_table_as_partitioned_dataset(parquet_file) -> pq.ParquetDataset: + """ Write a parquet table as a parititioned dataset (i.e. multiple Parquet files) + An example of a dataset partitioned by year and month on disk might look like: + dataset_name/ + year=2018/ + month=09/ + 0.parq + 1.parq + month=10/ + 0.parq + 1.parq + """ + parquet_table = pq.read_table(parquet_file) # Read back Parquet File as a Table + #pq.write_to_dataset(parquet_table, root_path='starships', partition_cols=['created']) + pq.write_to_dataset(parquet_table, root_path='starships', partition_cols=['year', 'month', 'day'], flavor='spark') + dataset = pq.ParquetDataset('starships') + return dataset + + +if __name__ == '__main__': + + # Basics of get request, save to DataFrame, PyArrow Table, Parquet File + df_deathstar = get_api_endpoint('https://swapi.co/api/starships/9/') + df_deathstar = df_add_partition_columns(df_deathstar, 'created') + parquet_deathstar_table = df_to_parquet_table(df_deathstar) + + # Write to and Read from Partitioned Datasets + write_parquet_table_as_partitioned_dataset('deathstar.parquet') + print("Done") diff --git a/pyarrow/starships/year=2014/month=12/day=10/69a1f1872d854c39a69ff24c0820889c.parquet b/pyarrow/starships/year=2014/month=12/day=10/69a1f1872d854c39a69ff24c0820889c.parquet new file mode 100644 index 0000000000000000000000000000000000000000..798be3047548876ac4f6eb97b1bc933572aa639f GIT binary patch literal 8027 zcmdT}O>7fK6yDu9&JRgSLU!4JDVr^srjW!xaZ-{4Dp8cSh(D3^0z%ftp2S2y*JM@-wekOo`{M8u}j=~SQNxSAIAmz1&-sQ5s?%7L}>(D#h@4r$wD-O z|4FEo>rikff#(GdT;xMy*T6)83l7MEBkvzx5Cut)Z0R!)2koIrZrB}&1|V0e2q;qT zd)>$ujg;idaL_*wwJ@|ak;w+iQk zt|acDbhiw*A=|jJ0=2xw5N&fCWct96d)(@g+mg(hRw1J;8qji>UE`%y9;^qz-2i4r zofN%oLh`P+d;H!=W-U9Rn~7u|rgWQdB-y*!2A%}#%-&Y9pJuN;^dK@d`&&mV$E2>E zw#daNCV3A#Lt?-!vNl9Yvti0f+*Mw>$%CgI;7&)#8}B_y+;GvWXC>+E7iCJXb59UY zZag+MG0qSS61(hvGUP6GdzlL$a%UZ8U-Hs*9^C2z&$r-oRX8CG4u~_9!R~WAku4U9 z#z&*E(O7ID`tJ1j#B@A97N3Ys?4LTx(Art0vs|r_3E=WpFMuGPa5uS^1?idqp7emf zyKxBBd{zY{!On@8%7`DCEdta;b@o%cURT!a`k2= zNdFr+2UjD+S-DQGXBHx{7Le7 z_NG^IFrUinXJ8_)*}A#xaQ;;Kf(L(!;J%0{7hMr}rpBW&yxua|4HCQTelk2B$sUiO z0nKGQT~g)i+rhoSKFW2&SD8D;#33Ha)u7qFDk&LuQEleKbb zG|A?2!pcE+NpXtxo&v5YPO(ydgafl2&d!fUm1FruR1=0WquI6rm3bSLe%55T?X1#S zuGYxJa%+zlOKT>Zf`&6|WHo%Qf*UI4Ud?4yJQ8b2ROnZtC*Fx{vuIOGd0v_+fxvJ&ACIOP&L*h}I zh@q>4$mYAXQaV<6irtL|YYvfXcVW`;Rm@9AOCfJKIam{UaHPB})gn zs*t`C#YEp&T|!%2r9fZHc0y`=$!|Ak@Fc2DevF%mPwV=rdPtfRD5j6WS2bzBX0~tp!@<2iKcpW5cm%y2iVMg9lXxHSi1hEBya;Ew9#m0O0(p(ZRgcw z)+l65vgt!homNhrSC@2Xq!L(zqj$4l7$b_Bfwq>?Yz*wCmLeuk@;{*{Ef{eX;#+t(wom7sZbczIkdoyyMgd&G~XSd2?`l`e1n% zLd_1Z#Iql+b30TI=8}=s?8!LpTCpIOw9xgCTsrwJQRN5FdVpVLVXW}(6EHgIHX9VX z^g&|oJnF&KP5&Gh?QVv#N^!&uxmPKXI4X;K+MBa8=*kgY6=NwC8H>fnBI*TBMiZL< tqyX*FWG<(;J(TF!#8`B6{^-Hu^B){rK*#?O$8GGF`=OWPj-&rp{}(vG-7f$D literal 0 HcmV?d00001 diff --git a/pyarrow/starships/year=2014/month=12/day=10/7026c2af982a4564a92c7602b43976cf.parquet b/pyarrow/starships/year=2014/month=12/day=10/7026c2af982a4564a92c7602b43976cf.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b242a4d4dedb0a9b2a971e090e44d6d57b08e4ff GIT binary patch literal 8027 zcmdT}PiPcZ7@wJJHh)Y~6EovP*E*S+*Hn}2pX{b42dN>Xt@sarn6@f#(GdT;xMy*FYk`1qbB7k@pYJi-IIbj`SIbgZ9t_H|#x#9zdZ~5m2Pw z_qvfS8Y%f=K)cJ_a``PoWu?WZ(v#AxrCc(l<+YUVEW4!c@X~!Ad@X=qc}%Q4+PdI` zt|acDbhiw*A=|jJ3^mJUh_-nSnO-pD9=H1Bwxn{VUC3$+2DDvf*LZ1#2WtUvH-MSZ zAjN2#P`vByKEF3oIonC{E|)!Qoe)9ST{9z-T*e(PxEn9{XV zk6gTBiubTHBnG@DYeS?o2d3S^UFD^lJb2mx?sSCw=lv^*8!krmt)!g&qD<*^?g`?T z8;?yU#u0IjWQ#?j z@zH2(G!~nWzB@IZn2N{8;)z&d|Kv%A*3K$}%=vnn7-c3PaJ$qs(L3)wP`leeeBVf(C>*T+)1F!~5D>PFTMN5f3IyQ&VCtGk($ z*S7g7x4M|_OS>URXnOdu5B$=LGp-)A1}4e6N9?0nx5+KY79V5h))r)x@h^twPm<5G zH?y3FR@&0fz@())y1C?X{#5#c2Y-s-zKAIoLlJnU$D=Vk-!jz=61(huGCUs1K98UQ z%_S#O(&cO0!M*KnmzRej94_+WWu6vcdhOr=Wd1OVQqeU-nSpuDaCn_HW!8YuQRbFy2WHvn_dsLuIEc0Pk% zvbmJB^U#}8+-AL}fGdjItTZ0sz)XiTbE8q^n6-dYtJ^>RiGTwpZh{$ zrzZ`9dELl)a%EmR2_EbPxA%tJ2axuh6%WypzjbLh3Niy>UYSDz5h_P=3p#2p(qr}p zEw;PH19GhaOr8E2_3PAwU@beN=aVS|Rm`o`H-dCq0Cz{g<9)bRwc^(#K&pF4JW2~O zbafEfDlV;*jvbz4SL4B&kI1!$Fy;6v=9iR-=Rt4#f03Joa(=e_5pZJHkU;iYL&%M9*6ntvxpbdFZR@o?wg*rMDyxV3z4%zk(uX`% zNNig(BzFLXmqa#n!T9=&EJ55SKKy%P1vn~&DkD+hSWWHWlawL zXYv#@t}emr7j>XP`2`vhP4(&^@E=GHu$k*Rc%5spbp1_9;08iyq0!8=X3MMF&a0`M zQOKHP(TBD=rJOpinuTnBIq9GcJGxhM3m?OjqmC#lR^`ZI77PR3E$YxnC(CVF=xAxp z(a@;#Dl}8MG}=MoARo=oj_y}4pvMSyF?zw&(I}{1IE{RSOnod=d|ZoJmGLNAOPF|k zHv%3(RBL3cmY>J6sI0%q`2LY=1@LlbvysaVQ!Ak}0~%p~x(7OX7EZ)Znf7T6=XIFs zot#!+K#o+gqb4#9k6W>h*N#+VMLgG96Rg1KIhD+wHxIT)#{z9h6OZpgVxiNvW85{n>Dr<_4Yj_9Zui>Js~EH)NVFK{w)TAvi4 rGn&fh6>o$R9ZQTwN9T?nJU;iqv3a!p4{_Y9eRDtba@=wBZ}ooxR`}g7 literal 0 HcmV?d00001 diff --git a/pyarrow/starships/year=2014/month=12/day=10/94d8ecfeb23642c4a73779e0427ce653.parquet b/pyarrow/starships/year=2014/month=12/day=10/94d8ecfeb23642c4a73779e0427ce653.parquet new file mode 100644 index 0000000000000000000000000000000000000000..41625682e5223fe8e2faa0b65e3063af256d5bb4 GIT binary patch literal 8103 zcmdT}U1$_n6rP!EHb2JH#LRTktaY-Q+gS6nyV<112dNU$R{X=HFN!Ra%}p|8c6Pfn ztEN^cMT&?IB0)+irHD^beDtA+ls-tGN-0$OP;BUflv1P*rIg}B&z(Crvy+?bPIfh= zA!cXJJ@A2r z{8M05Xwit0B#M%NE{QR@t$Q>gM7!0<(Kn7v%aS4~j`BWAD}vG-8x#7ysW1hM#bv~> zI$qxjvSgy{hlAZ37*>O?9GTA=d@}7>y*QuGWc7lcHJnA4)?HD#FQS_gFk2Q0wV=IK z<|$!Ceu)v@(%%HK5p5CcMVq7A+vRp$R6Y^W-x1*U zb%bzZW>)NNLI6K+_8GpA&D&1Kurk>qo;4iGl1A`W6R`Xx8Zv_0X z8U-$V&1^(ndEx^2s5K@>yku(vTDpU0-QZmlm5)XAR}0X3&=T`4_)m(^zhc*y(vWt^ zDkIpsJ4%)~of;n<;V4F_T=hOVdY^9fqZa{s=Ui@AMP*4u_u7El7SaGZj@e|XqFrx%Q&YACBp=;xp}7SS;Y;Y|%K2GezAota+F9d&-9*x~c%_M=~M3VvG{tay^+M`!5&a zD3z<;C&%PZ)n^hlu{G~3lx6#p3i!(}y4fByL|I%CF`f z^rDl)mZRn8w4;V;IC^nWJBe-Bce4uv+LVDUy{Ju^7(3e3qV3=uza1Z_&z@T6d@f)Q zclZHo%js5WPR}}}A}+dNTW(N(Yd|-8AZ)*Pld#nZUoU;SOkB2F!#bBK4Ios>AI{}_ zy}rvzPCuW~om}27EWk2b^BKE>tDl`$|SDB2sT-vc|p` z?devKO~5g?K>RCm&b9huYS}YK{o7Z3>JdxNVVC(WQTakdj}S1wi3syD6XoV8FK*-5 zmmY!hkmD4ka@G6fxIEtDb7?K;whem+hH{FOjr6GeXb&*@zMn8!WsEWrWa$XNl5&-l z5s;(}jF^wr*&-0Bq!(Sl*-y)tH6IfX9m1(?UL#@s1TnNG@wS>ShF z-U((VFfM3QP(EVqXnw|kB%`@zuajy!t4vUFq1O^BKsFQ{a1 zOX!~gaAawK#H$)YRSGo1hvj1|6n)ovLAK^`%Lv)S@@#L#b%G zQMe;1cO|rxgecrjkSJ6)90Hq0B2c@6$o=7RzL|}vD^DEthogS&+L3{}dKRD0nD{I< zGl@*bb%a*2PsqjTej2LSwXF89Aq72I>VDcaDyUIAyMS}Lj0Cx@Pog)nJ{Tr1t8PS0 zTz(T_vemUekw?G;E4ur-Jo}`|Ws|&mk-gGNioNmG3LEjN2lg5`8<}vO(}NAu?U=o} zI~;o{+hT~Q6`b%k0xu{mi|N;jO(V%go{7`^Y6*(e9qWF&ePg#@^vtiPsQw!zdiGa7 z^gOOZsh&|E`Ed&!PrJn^F3qx{EYaXK59_T0;LxVS+aI z^heM4#8#GGRwSW22FHTqziYsT_BJK^0cn-dOq&dlr1SlUJK z4nW%E;W5ipgZrU3AspZu+hOqj$OEspF|L|dGd^5Y!-Ep#a7cy~46Ijy1XE{4DHu|& zyAi*52GWkUJjzO(IIl?bs0QtjV7?Z*5XTmjJgG%LFkolu8XH;ui%KAdByourrt~6f{pmOAe)nl7myn4xX5L^Y}D031Wg! P+pq9LhajAQ|DyjFzlYym literal 0 HcmV?d00001 diff --git a/pyarrow/starships/year=2014/month=12/day=10/9ef633fb96d04eebb9a20c845a620b97.parquet b/pyarrow/starships/year=2014/month=12/day=10/9ef633fb96d04eebb9a20c845a620b97.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c37a70dbf585420129c415b1e87951e1cef3adec GIT binary patch literal 8027 zcmdT}O>7fK6yDu9&JRgSLU!4JDI1qeQ%K^U*eS^Yl_*ME#Ggod0U>K+PvWiCYj<}I zq!g*DI3QFZgb+ee)k9TPD5}y+#Q`DILl2w~st~72?TJHGp&U@v1FAZ+v*Wcr#wK`4 z)hLN~-rikbf#(GdT;xMy*FZAB1qbB7k@pYJi-IIbw)7cDfcDS?H|%yq9Z;yS2q;qT zd)>$ui2vveM#F=}D{RQa+Vd3u;=kmmN~KdFdVxz81i*JSJ8dttvR7 zD~UTO-7Ujy$TqGlL)EkxqHV52rVk9cN39;YE$O^r6?5u>4lRe-HC|fb!FmAP31DV4 zNGaMT6z_Vw$M21F-m+7gkxH8|quGQbsou>t@HqIA)!Qoe)9ST{?nftQe(PxEn9{XV zk6e0Uiua&1BnI3`)`m!_Hq1DMyUI&9dGMqI-0lc@{k^`>>+2YYy zVl);XjmPI>?@o;;rxJ;=L^7V-KY5a&wX;rRxt^0r;PO^4fgm1mx44%D>6!o@_kh2< zaSHWf)&(TVPK&cN*}>0uAzRkA3)WO5Vx3hB+E_XtK_8JtP0v}8ScFM;SKa1veK(Wx z`Zh1+RtM7=v>SqimWLnvz%RWxe- zoud8TBK^?(59Bf*_ljgot{rF zduE8QyzLPW7-|kWyuan8?|HBaz|$ejyDUI?Fv5yiF7_wJ(eB4E3KF~Qelkqf-tm}p z7F5eZD+WZe2}?t=F+LauE5n#iRVc!P6NSSF6bn);7|`S#DJjtjtUcRgM}cxkf9?y3 zovt(p7BoHY%9VNPB)Gp9tnLjt9gy~%6%WypzjbLh3Niy>L777W5h_RW3mTeSWR6)c zm}0vdbdVc0VCwYGs8^>R1Z!GZt&mFVsA5j7z7eEV0o)k@kM`kOHHzPm0IBXF@hC0C z(A7a?tGTsOI#y(o-Hituc99!5VaoAU+$%>*(bSz9tO?R>0X&R?ClOqO#$jIqnbe^| zpCXg<#mx0~>XA!N4Dl;TPoMUtkaEtzb15C3fqE*MN;zo|rTSe0{56i0YOR>VHLR$e zC=5LBPjYh5N@rnC6#*x94GH9|HH6&wrRuap&E@9|YOB`v*d9P3sH`69cjIFvOFOx$ zkiHVdMBi9lLOrfhpsy7>AvL~Wx1esa0Mj>TRiiIu3kx=Nq9oh}z%2@$!Q>B$cEb}3 zKjWm?vEn3QAT;oN+eXf{z~_S}X!fiK{65S9{|{i`6^~6|6HY5&bB+g~DRmECS(AhR znLGu}t4r|uMIC5Teu1V$)6420@E=GHu$lWhc%6H(bp1_9;08iyqfw2FYRPNc&WFiv zpU&&WoMDBhlv80w0fbMFC}Gkibp{Dxbbvt(!_l#Ld@LG9zOvru!&tgwH@;29&P5IC znN+nk2W>T@+A3;k6m=c7Gm5#wa>_=x1h{v8;bWM#aeo(>I*NvtMyHzag%QQWs=O{` zQCYvw_}-0Z4e&~*-^dkrowd-J20k+&7n(JYJx3!%G!vk$<={l(lwqC5p{homHmDkQ zB=TzFI@U&@8=GR}W|2kn?Ed08*Lo~m$?Pv0b2-N%*KFc13vxBbLVI?9@yhSESk!8- z-`KUTnQF0kQ9Gac;H#n<@YL)tnzJ1+n*+7_i{;(mu<2F$e8zRIb829g?2Ygi^}K3N zByfnT^^kfhcEQRMBMZ#>8xh#p5{ZtcEU0Vr70Fc$mCAyZEvRgbU^Sz5eTn@77CuDW zkXsSxD5R3^sKvvzl>wg*Ljy?`bd`*S%}4XIqx80g!X8qX&=AeQ<0Z9sffd_iEqV54{|B9Q|AUU(8h9F8}}l literal 0 HcmV?d00001 From 31e80b21759fc90d9c1c113d47319c9fb491bd50 Mon Sep 17 00:00:00 2001 From: William Liu Date: Sat, 15 Dec 2018 08:26:38 -0700 Subject: [PATCH 13/17] Add quickselect algorithm --- algorithms/quick_select.py | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 algorithms/quick_select.py diff --git a/algorithms/quick_select.py b/algorithms/quick_select.py new file mode 100644 index 0000000..223e8ab --- /dev/null +++ b/algorithms/quick_select.py @@ -0,0 +1,62 @@ +""" Python 3 implementation of a quickselect algorithm """ +from typing import List +import unittest +import random + + +class Solution: + + def quickselect(self, items, item_index): + if items is None or len(items) < 1: + return None + + if item_index < 0 or item_index > len(items) - 1: + raise IndexError() + + return self.select(items, 0, len(items) - 1, item_index) + + def select(self, lst, l, r, index): + # base case + if r == l: + return lst[l] + + # choose random pivot + pivot_index = random.randint(l, r) + + # move pivot to beginning of list + lst[l], lst[pivot_index] = lst[pivot_index], lst[l] + + # partition + i = l + for j in range(l+1, r+1): + if lst[j] < lst[l]: + i += 1 + lst[i], lst[j] = lst[j], lst[i] + + # move pivot to correct location + lst[i], lst[l] = lst[l], lst[i] + + # recursively partition one side only + if index == i: + return lst[i] + elif index < i: + return self.select(lst, l, i-1, index) + else: + return self.select(lst, i+1, r, index) + + +class SolutionTest(unittest.TestCase): + + def test_quickselect(self): + s = Solution() + response = s.quickselect([12, 2, 4, 3, 5], 2) + assert response == 4 + + response = s.quickselect([12, 2, 4, 3, 5], 0) + assert response == 2 + + + + +if __name__ == '__main__': + unittest.main() From 7922c573cbf1c3c88ceb025b32f576b83cc871ab Mon Sep 17 00:00:00 2001 From: William Liu Date: Sun, 16 Dec 2018 05:12:11 -0700 Subject: [PATCH 14/17] Fix quick sort algorithm --- algorithms/quick_sort.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/algorithms/quick_sort.py b/algorithms/quick_sort.py index b8bf974..e4a7e9d 100644 --- a/algorithms/quick_sort.py +++ b/algorithms/quick_sort.py @@ -1,4 +1,5 @@ -""" Quick Sort """ +""" Quick Sort in Python3 """ +import pdb def quickSort(mylist): @@ -12,6 +13,7 @@ def quickSortHelper(mylist, first, last): quickSortHelper(mylist, splitpoint+1, last) def partition(mylist, first, last): + #import pdb; pdb.set_trace() pivotvalue = mylist[first] leftmark = first+1 @@ -29,17 +31,15 @@ def partition(mylist, first, last): if rightmark < leftmark: done = True else: - # swap mylist[leftmark], mylist[rightmark] = mylist[rightmark], mylist[leftmark] - # swap - mylist[leftmark], mylist[rightmark] = mylist[rightmark], mylist[leftmark] - + mylist[first], mylist[rightmark] = mylist[rightmark], mylist[first] + return rightmark if __name__ == '__main__': - mylist = [54,26,93,17,77,31,44,55,20] - print "Original: ", mylist + mylist = [54, 26, 93, 17, 77, 31, 44, 55, 20] + print("Original: ", mylist) quickSort(mylist) - print "Quick Sorted: ", mylist \ No newline at end of file + print("Quick Sorted: ", mylist) From 910907f6891ce0b6a35f4ee56ecf737663700c5d Mon Sep 17 00:00:00 2001 From: William Liu Date: Sun, 16 Dec 2018 06:59:14 -0700 Subject: [PATCH 15/17] Update quicksort for Python3, fix bugs --- algorithms/quick_sort.py | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/algorithms/quick_sort.py b/algorithms/quick_sort.py index e4a7e9d..bce0341 100644 --- a/algorithms/quick_sort.py +++ b/algorithms/quick_sort.py @@ -1,38 +1,64 @@ -""" Quick Sort in Python3 """ +""" Quick Sort in Python3 +Quick sort uses divide and conquer to gain the same advantages as merge sort, +with the benefit of using less storage, but at the cost of a worse worst case runtime +O(n^2) if the pivot values are bad. +""" import pdb +from typing import List def quickSort(mylist): + """ Initialize our recursive function """ quickSortHelper(mylist, 0, len(mylist)-1) def quickSortHelper(mylist, first, last): - if first < last: + """ Recursive function to split up """ + if first < last: # check if need to sort still + splitpoint = partition(mylist, first, last) + # now that we know our splitpoint, we can then recursively run quicksort on the list's bottom half and top half quickSortHelper(mylist, first, splitpoint-1) quickSortHelper(mylist, splitpoint+1, last) def partition(mylist, first, last): - #import pdb; pdb.set_trace() - pivotvalue = mylist[first] + """ Partition Process, made up of: + * Pick a pivot value (i.e. what we'll compare our unsorted numbers to) + Based off this value, we'll compare our unsorted values and either move + our items to the left of the pivot or to the right of the pivot. + * """ + pivotvalue = mylist[first] # get the first value as pivotvalue - leftmark = first+1 + leftmark = first + 1 rightmark = last done = False while not done: + # Go from leftmost side onwards (to right) and try to find a value + # that is greater than the pivot value (i.e. left side of pivot should be + # smaller values than pivot value, if we found one that is greater, we + # stop at leftmark, saying we need to do a swap to the right side) while leftmark <= rightmark and mylist[leftmark] <= pivotvalue: leftmark += 1 - while mylist[rightmark] >= pivotvalue and rightmark >= leftmark: + # Go from rightmost side inwards (to left) and try to find a value + # that is less than the pivot value (i.e. right side of pivot should be + # greater values than pivot value, if we found one that is smaller, we + # stop at rightmark, saying we need to do a swap to the left side) + while rightmark >= leftmark and mylist[rightmark] >= pivotvalue: rightmark -= 1 if rightmark < leftmark: - done = True + done = True # we're done sorting through this list because we've crossed else: + # we have a swap between a value in the left list and a value in the right list mylist[leftmark], mylist[rightmark] = mylist[rightmark], mylist[leftmark] + # Once rightmark is less than leftmark, then rightmark is now the split point. + # That means what we picked as the pivot value can now be exchanged with the + # contents of the split point and the pivot value is now in the correct place + # Note: remember that our pivot value was the first value in our list mylist[first], mylist[rightmark] = mylist[rightmark], mylist[first] return rightmark From 0264e6c7347caab9fb02c59362ba5fa69e24279d Mon Sep 17 00:00:00 2001 From: William Liu Date: Thu, 20 Dec 2018 20:41:10 -0700 Subject: [PATCH 16/17] Update merge sort for python3 --- algorithms/merge_sort.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/algorithms/merge_sort.py b/algorithms/merge_sort.py index eb044cd..77cbb24 100644 --- a/algorithms/merge_sort.py +++ b/algorithms/merge_sort.py @@ -2,14 +2,14 @@ def mergeSort(mylist): - print "Splitting", mylist + print("Splitting", mylist) if len(mylist) > 1: mid = len(mylist) // 2 lefthalf = mylist[:mid] - print "Left half ", lefthalf + print("Left half ", lefthalf) righthalf = mylist[mid:] - print "Right half ", righthalf + print("Right half ", righthalf) mergeSort(lefthalf) mergeSort(righthalf) @@ -42,11 +42,11 @@ def mergeSort(mylist): j += 1 k += 1 - print "Merging", mylist + print("Merging", mylist) if __name__ == '__main__': - mylist = [54,26,93,17,77,31,44,55,20] - print "Original: ", mylist + mylist = [54, 26, 93, 17, 77, 31, 44, 55, 20] + print("Original: ", mylist) mergeSort(mylist) - print "Merge Sorted: ", mylist \ No newline at end of file + print("Merge Sorted: ", mylist) From 731113038b6574e3ede72cc61921b0aee8e3bafa Mon Sep 17 00:00:00 2001 From: William Liu Date: Sun, 2 Oct 2022 10:44:41 -0600 Subject: [PATCH 17/17] Add parenthesis on prints --- algorithms/insertion_sort.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/algorithms/insertion_sort.py b/algorithms/insertion_sort.py index cf4fd5f..332d6b0 100644 --- a/algorithms/insertion_sort.py +++ b/algorithms/insertion_sort.py @@ -2,7 +2,7 @@ def insertionSort(mylist): for index in range(1, len(mylist)): - print "Index is ", index # 1, 2, 3, 4, 5, 6, 7, 8; this is the outer loop + print("Index is ", index) # 1, 2, 3, 4, 5, 6, 7, 8; this is the outer loop # setup first case (only one item) currentvalue = mylist[index] @@ -17,6 +17,6 @@ def insertionSort(mylist): if __name__ == '__main__': mylist = [54,26,93,17,77,31,44,55,20] - print "Original: ", mylist + print("Original: ", mylist) insertionSort(mylist) - print "Insertion Sorted: ", mylist \ No newline at end of file + print("Insertion Sorted: ", mylist) \ No newline at end of file