Add Python project examples

+ Neural network CLI + Hidden Markov Model CLI + K-Means clustering CLI + Linear regression CLI + Screenshots, updated README instructions
2022-02-06 13:39:26 -05:00
parent bbbf404340
commit 940d035638
22 changed files with 2523 additions and 0 deletions
--- a/python/linear-regression/README.md
+++ b/python/linear-regression/README.md
@@ -0,0 +1,65 @@
+Install required dependencies for matplotlib GUI frontend and all pip other packages for this project
+
+```bash
+sudo apt install python3-tk
+python3.9 -m pip install -r requirements.txt
+```
+
+Given a set of tuple `(X,Y)` data points as `[(X, Y), .., (X, Y)]`, determine the 
+best fitting line plot, and then apply this projection to predict the dependent `Y` 
+value using an independent `GIVEN_X` value. 
+
+```bash
+python3.9 linear-regression.py -h
+usage: linear-regression.py [-h] [--silent] [--file [FILE_PATH]] [GIVEN_X] [X,Y ...]
+
+Find most fitting line plot for given data points and predict value given some X
+
+positional arguments:
+  GIVEN_X               Value for X for prediction using linear regression
+                            (default: '4.5')
+                                
+  X,Y                   A list of data points separated by spaces as: x,y x,y x,y ...
+                            (default: '[(1, 3), (2, 7), (3, 5), (4, 9), (5, 11), (6, 12), (7, 15)]')
+                                
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --silent              When this flag is set, line plot visualization will not be shown
+                            (default: 'False')
+                                
+  --file [FILE_PATH], -f [FILE_PATH]
+                        Optionally provide file for data to be read from. Each point must be on it's own line with format x,y 
+```
+
+Running linear regression program
+```bash
+python3.9 linear-regression.py --file ./input.txt --silent
+Finding fitting line plot for given data [(1, 3), (2, 7), (3, 5), (4, 9), (5, 11), (6, 12), (7, 15)]
+points_avg: (5.117647058823529, 5.235294117647059)
+variance: (241.76470588235296, 193.05882352941177)
+sigma: (3.887196176892422, 3.4736402333270258)
+covariance: 0.8455882352941174
+correlation: 0.0626235432924427
+Our line Y = BX + A must pass through the point (5.117647058823529, 5.235294117647059)
+Y = (0.05596107055961069)X + 4.9489051094890515
+For X = 4.5, Y is predicted to be 5.200729927007299 
+```
+
+By default, the following linear regression is calculated and displayed 
+```bash
+python3.9 linear-regression.py
+
+
+Finding fitting line plot for given data [(1, 3), (2, 7), (3, 5), (4, 9), (5, 11), (6, 12), (7, 15)]
+points_avg: (4.0, 8.857142857142858)
+variance: (28.0, 104.85714285714286)
+sigma: (2.160246899469287, 4.180453381654971)
+covariance: 8.666666666666666
+correlation: 0.9596775116832306
+Our line Y = BX + A must pass through the point (4.0, 8.857142857142858)
+Y = (1.8571428571428565)X + 1.4285714285714315
+For X = 4.5, Y is predicted to be 9.785714285714285
+```
+
+![](screenshot.png)
--- a/python/linear-regression/input.txt
+++ b/python/linear-regression/input.txt
@@ -0,0 +1,17 @@
+1,2
+2,3
+2,2
+5,6
+6,7
+6,8
+7,11
+1,1
+2,6
+4,8
+6,1
+3,2
+15,5
+10,2
+2,10
+11,4
+4,11
--- a/python/linear-regression/linear-regression.py
+++ b/python/linear-regression/linear-regression.py
@@ -0,0 +1,198 @@
+################################################################################
+# Author: Shaun Reed                                                           #
+# About: Linear regression CLI                                                 #
+# Contact: shaunrd0@gmail.com  | URL: www.shaunreed.com  | GitHub: shaunrd0    #
+################################################################################
+
+from ast import literal_eval
+from matplotlib import pyplot as plt
+from typing import List
+import argparse
+import math
+import numpy as np
+import sys
+
+
+################################################################################
+# Commandline Argument Parser
+################################################################################
+
+# ==============================================================================
+
+def init_parser():
+    parser = argparse.ArgumentParser(
+        description='Find most fitting line plot for given data points and predict value given some X',
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+
+    parser.add_argument(
+        'given', metavar='GIVEN_X', type=float, nargs='?',
+        help=
+        '''Value for X for prediction using linear regression
+    (default: '%(default)s')
+        ''',
+        default=4.5
+    )
+
+    parser.add_argument(
+        'data', metavar='X,Y', type=point, nargs='*',
+        help=
+        '''A list of data points separated by spaces as: x,y x,y x,y ...
+    (default: '%(default)s')
+        ''',
+        default=[(1, 3), (2, 7), (3, 5), (4, 9), (5, 11), (6, 12), (7, 15)]
+    )
+
+    parser.add_argument(
+        '--silent', action='store_true',
+        help=
+        '''When this flag is set, line plot visualization will not be shown
+    (default: '%(default)s')
+        ''',
+        default=False
+    )
+
+    parser.add_argument(
+        '--file', '-f', metavar='FILE_PATH', nargs='?', type=open,
+        help=
+        '''Optionally provide file for data to be read from. Each point must be on it\'s own line with format x,y 
+        ''',
+    )
+    return parser
+
+
+def point(arg):
+    """
+    Helper function for parsing x,y points provided through argparse CLI
+
+    :param arg: A single argument passed to an option or positional argument
+    :return: A tuple (x, y) representing a data point
+    """
+    try:
+        x, y = literal_eval(arg)
+        return x, y
+    except:
+        raise argparse.ArgumentTypeError("Please provide data points in x,y format")
+
+
+################################################################################
+# Linear Regression Calculation
+################################################################################
+
+# ==============================================================================
+
+def points_average(data):
+    """
+    Finds average (x, y) for points in data list [(x, y), (x, y), ...]
+    Used for updating cluster centroid positions
+
+    :param data: List [(x, y), (x, y), ...]
+    :return: An average (x, y) position for the list of points
+    """
+    x, y = 0, 0
+    for pair in data:
+        x += pair[0]
+        y += pair[1]
+    x = float(x / len(data))
+    y = float(y / len(data))
+    return x, y
+
+
+def points_variance(data, points_avg):
+    """
+    Find variance for a series of data points
+
+    :param data: List of data points [(x, y), (x, y), ...]
+    :param points_avg: Average (x, y) position for the list of points in data
+    :return: Variance of X and Y for the data set as a tuple (x, y)
+    """
+    x, y = 0, 0
+    for point in data:
+        x += math.pow((point[0] - points_avg[0]), 2)
+        y += math.pow((point[1] - points_avg[1]), 2)
+    return x, y
+
+
+def points_covariance(data, points_avg):
+    """
+    Find covariance between X, Y within the data set
+
+    :param data: List of data points [(x, y), (x, y), ...]
+    :param points_avg: Tuple of average X, Y for data set list
+    :return: Single float value representing covariance
+    """
+    cov = 0
+    for point in data:
+        cov += (point[0] - points_avg[0]) * (point[1] - points_avg[1])
+    return float(cov / (len(data) - 1))
+
+
+def show_regression(data, beta, alpha):
+    """
+    Shows the linear regression in the matplotlib subplot
+    Line drawn with Y = BX + A
+
+    :param data: Data to show on the scatter plot
+    :param beta: Value for B in the line equation
+    :param alpha: Value for A in the line equation
+    """
+    dataX, dataY = zip(*data)
+    scaleX = np.linspace(min(dataX) - 1, max(dataX) + 1, 100)
+    scaleY = beta * scaleX + alpha
+    plt.plot(scaleX, scaleY, c='g')
+    plt.scatter(dataX, dataY, c='k')
+    print(f'For X = {context.given}, Y is predicted to be {beta * context.given + alpha} ')
+    plt.scatter(context.given, beta * context.given + alpha, c='#e6e600')
+    plt.show()
+
+
+################################################################################
+# Main
+################################################################################
+
+# ==============================================================================
+
+def main(args: List[str]):
+    parser = init_parser()
+    global context
+    context = parser.parse_args(args[1:])
+    print(f'Finding fitting line plot for given data {context.data}')
+    if context.file:  # If a file was provided, use that data instead
+        context.data = [literal_eval(line.rstrip()) for line in context.file]
+        context.data = [(float(x), float(y)) for x, y in context.data]
+
+    # Find the average for the data X and Y points
+    data_avg = points_average(context.data)
+    print(f'points_avg: {data_avg}')
+
+    # Find the variance for the data X and Y points
+    data_variance = points_variance(context.data, data_avg)
+    print(f'variance: {data_variance}')
+
+    # Find the standard deviations for X and Y values
+    data_sigma = (math.sqrt(float(data_variance[0] / (len(context.data) - 1))),
+                  math.sqrt(float(data_variance[1] / (len(context.data) - 1))))
+    print(f'sigma: {data_sigma}')
+
+    # Find the covariance between X, Y within data set
+    data_covariance = points_covariance(context.data, data_avg)
+    print(f'covariance: {data_covariance}')
+
+    # Find correlation between X, Y within data set
+    data_correlation = (1.0/math.prod(data_sigma)) * data_covariance
+    print(f'correlation: {data_correlation}')
+
+    # Find equation for linear regression for the given data set
+    print(f'Our line Y = BX + A must pass through the point {data_avg}')
+    data_beta = data_correlation * float(data_sigma[1] / data_sigma[0])
+    data_alpha = data_avg[1] - data_beta * data_avg[0]
+    print(f'Y = ({data_beta})X + {data_alpha}')
+
+    # Show the final graph produced by linear regression calculations
+    # + Predicts the Y value, given the X value provided through the CLI
+    if not context.silent:
+        show_regression(context.data, data_beta, data_alpha)
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
--- a/python/linear-regression/screenshot.png
+++ b/python/linear-regression/screenshot.png