Return to Snippet

Revision: 36849
at November 28, 2010 15:53 by ge01f


Initial Code
# Red Eye Monitor (REM) by Geoff Howland
#
#   Suite Package: For use with REM Package system, which is a generic system
#     for creating runnable code, services, web pages and RPC handlers.
#

# Package information
package:
  name: "Red Eye Monitor"
  version: "2010.11.22.00"
  # Stability: unstable (development is occurring) -> testing (development has stopped) -> accepted (>N [10 medium?] sites confirm running) -> stable  (>10 days w/o new bug reports)-> proven (>90(?) days w/o new bug reports)
  #TODO(g): "accepted" will be judged by me initially, because there is no
  #   user base, and also no integrated bug tracking issue tests, so all of
  #   this is manual until automation for it arrives
  stability: unstable
  
  short: rem
  author: Geoff Howland
  maintainer: Geoff Howland
  contributors: []
  info: "Comprehensive cloud automation for lazy control freaks"
  website: http://redeyemon.wordpress.com/
  
  # This is a Suite package, which means it is meant to be a collection of
  #   packages and web stuff.  It is a high-level master-application type package.
  type: suite


# This is the script that launches the State scripts, and runs the State
#   Evaluator if necessary
#NOTE(g): This is a standard launcher, which can be used on any package.  If
#   you want to customize it, rename it to something like "name_launcher.py"
#   is it's obvious you have modified it and it is not the default launcher.
launcher: launcher.py


# This is the State Evaluator script for this package.  If the state is
#   left blank ("") it will be run on mounting the package.  If a state script
#   finished running, and did not set a new state, it is run to determine
#   the next state.
#TODO(g): Is this generic or specific?  It's just going to process the states?
#   If so, then it is generic, and isn't needed here.  The Package module
#   can do it all, and just RUN the script thats in the current system...
#   This changes things though, but making a handler custom would be a pain,
#   no one will want to do it...
#TODO(g): Generic State Evaluator will simply set the state to "active"
#DEFAULT:#state evaluator: scripts/state_evaluator.py
state evaluator: rem_state_evaluator.py


# Paths to use when working with this package
paths:
  # Where our scripts like, this is a sandbox for some security
  #NOTE(g): I only do this for scripts, as they have an execution component,
  #   so this will aid in sandboxing their runs
  #TODE(g):SECURITY: There is a lot to do here later, for now do it simply
  script: package/rem/package/scripts/
  
  # data, static and other paths work off the base package path prefix
  #NOTE(g): This is because I think the paths to the data files look clearest
  #   this way.  Scripts are usually prefixed by something that says "script"
  #   in the key, but data files may have lots of interesting names.  The
  #   "data/" prefix in the path adds information, that this is data request,
  #   and not a block.  Basically, I think it's hard to read.
  base: package/rem/package/



# Modules are not run, but they exist as named plug-ins to the package.
#   Using modules allows a flexible and universal method of making resource
#   groups for the package state handler.  Use them for including groups of
#   scripts, or groups of data sources, or whatever your package might need.
#   Because they are just a data container for your scripts to use, they are
#   completely free for any purpose.  The advantage of this is that accessing
#   resources is the same from any package script, and any package script can
#   access the resources of any other package module by:
#     GetPackageModule(group, name)
#TODO(g): Implement: GetPackageModule(group, name)
#TODO(g):SECURITY:HIGH: Add filters to allow/deny modules to outside sources,
#   and name specific sources for allow/deny.  ACLs.  Make general library,
#   so sharedstate/locks/counters and message queues can also use ACLs.
#   This is of primary concern for RPC calls, and an ACL should be able to
#   specify not allowing a DynamicRPC or other calls to access data. (LATER)
modules:
  # Module Groups are used to give name spaces to the elements in the module
  #TODO(g):REMOVE: These are monitoring modules, but it's an example
  monitors:
    ping:
      remote: true
      script:
        - monitors/ping.py
    snmp:
      remote: true
      script:
        - monitors/snmp.py
    tcp:
      remote: true
      script:
        - monitors/tcp.py
    http:
      remote: true
      script:
        - monitors/http.py
    local:
      remote: false
      script:
        - monitors/local.py
    collector:
      remote: true
      script:
        - monitors/collector.py


# Scripts to process the results of module scripts
#NOTE(g): This will be passed along with to the Job Scheduler Suite.
#   This allows us to have custom processing for results from modules that
#   we register with the Job Scheduler to run
module result processors:
  monitors: scripts/process/monitor_processor.py


# Alternative method:
# A module result specification, which defines how to deal with the results,
#   in the standard way that I deal with results.  This is good for the
#   monitoring type situations.
module result specifications:
  monitors: data/monitor/monitor_result_processor.yaml


# HTTP, RPC stuff
#TODO(g): Come up with a better fucking name than "communications", it sucks
#NOTE(g): Is "web" much better?  I liked more generic better...
communication:
  # Will look in the static path for files referenced by relative path if they
  #   are not caught by HTTP path matches.
  static:
    path: static/html/
  
  # Non-User defined HTTP page entries.
  #NOTE(g): User defined is in: ./data/web/user_pages.yaml
  http:
    # Show internals page
    #TODO(g): This could be better, all the way around...
    show:
      run:
        - script: scripts/web_demo/show.py
    
    # Show internals page
    #TODO(g): Move this to the dropStar Suite Package
    admin:
      run:
        - script: scripts/web_demo/admin.py
      
      template:
        path: static/html/simple.html
    
    # Show internals page
    #TODO(g): Move this to the dropStar Suite Package
    widgets:
      run:
        - script: scripts/web_demo/widgets.py
      
      template:
        path: static/html/simple.html
    
    # Import all content in this YAML file as pages for this package
    #TODO(g): Move this to the dropStar Suite Package
    __load: data/web/user_pages.yaml
  
  
  # RPC calls: JSON data over XmlHttpRequest JS call (via web)
  rpc:
    #TODO(g): Clean up the cruft in this file, half of these arent used any more
    "":
      run:
        - script: scripts/web_demo/search.py
  
  
    Search:
      run:
        - script: scripts/web_demo/search.py
  
  
    # Reload specified 'widgets'.  List or space delimited: widgets have no spaces
    #TODO(g): This one is universal?  Could using specific ones to keep
    #   context/position be a good practice?
    ReloadWidgets:
      run:
        - script: scripts/admin/reload_widgets.py
    
    
    #TODO(g): This one is universal?  Could using specific ones to keep
    #   context/position be a good practice?
    DynamicRPC:
      run:
        #TODO(g): This will need to use the PACKAGE name (mounted name) to
        #   access the proper script now, since they can be dynamically
        #   mounted anywhere...
        - script: scripts/dynamic/dynamic_rpc.py
    
    
    # Monitoring
    #TODO(g): Use DynamicRPC and get rid of these...
    MonitorHostList:
      run:
        - script: scripts/monitor_admin/host_list.py
    
    MonitorHostView:
      run:
        - script: scripts/monitor_admin/host_view.py
    
    MonitorGraphList:
      run:
        - script: scripts/monitor_admin/graph_list.py
    
    MonitorGraphView:
      run:
        - script: scripts/monitor_admin/graph_view.py
    
    MonitorGraphViewDialog:
      run:
        - script: scripts/monitor_admin/graph_view_dialog.py
    
    #TODO(g): Later this will do more things, for now it's a single purpose menu
    #TODO(g): Use DynamicRPC and get rid of these...
    MonitorManageMonitors:
      run:
        - script: scripts/monitor_admin/create_monitor_dialog.py
    
    CreateMonitor:
      run:
        - script: scripts/monitor_admin/create_monitor.py
    
    DeleteMonitorDialog:
      run:
        - script: scripts/monitor_admin/delete_monitor_dialog.py
    
    DeleteMonitorConfirmed:
      run:
        - script: scripts/monitor_admin/delete_monitor.py
  
    #TODO(g): Later this will do more things, for now it's a single purpose menu
    #TODO(g): Use DynamicRPC and get rid of these...
    MonitorManageHosts:
      run:
        - script: scripts/monitor_admin/create_host_dialog.py
    
    CreateMonitorHost:
      run:
        - script: scripts/monitor_admin/create_host.py
    
    DeleteMonitorHostDialog:
      run:
        - script: scripts/monitor_admin/delete_host_dialog.py
    
    DeleteMonitorHostConfirmed:
      run:
        - script: scripts/monitor_admin/delete_host.py
    
    CreateDashboardDialog:
      run:
        - script: scripts/monitor_admin/create_dashboard_dialog.py
    
    CreateDashboardConfirmed:
      run:
        - script: scripts/monitor_admin/create_dashboard.py
  
    AlertVisualizerView:
      run:
        - script: scripts/monitor_admin/alert_visualizer.py
  
    AlertVisualizerDialog:
      run:
        - script: scripts/monitor_admin/alert_visualizer_dialog.py
  
  
    # Work on many selected hosts
    #TODO(g): Use DynamicRPC and get rid of these...
    MonitorAddDefaultMonitorsToHosts:
      run:
        - script: scripts/monitor_admin/selected_hosts_add_default_monitors.py
  
    MonitorAddDefaultMonitorsToHostsDialog:
      run:
        - script: scripts/monitor_admin/selected_hosts_add_default_monitors_dialog.py
  
    MonitorDeleteSelectedHosts:
      run:
        - script: scripts/monitor_admin/selected_hosts_delete.py
  
    MonitorDeleteSelectedHostsDialog:
      run:
        - script: scripts/monitor_admin/selected_hosts_delete_dialog.py
  
  
    # Mother's RPC
    GetHost:
      run:
        - script: scripts/mother/get_host.py
    
    GetAllHosts:
      run:
        - script: scripts/mother/get_all_hosts.py


    #TODO(g): Move to dropStar Suite Package
    #TODO(g): Use DynamicRPC and get rid of these...
    UpdateAdmin:
      run:
        - script: scripts/web_demo/admin.py
  
    UpdateWidgets:
      run:
        - script: scripts/web_demo/widgets.py
    
    CreatePageDialog:
      run:
        - script: scripts/admin/create_page_dialog.py
    
    CreatePage:
      run:
        - script: scripts/admin/create_page.py
    
    CreateWidgetDialog:
      run:
        - script: scripts/admin/create_widget_dialog.py
    
    CreateWidget:
      run:
        - script: scripts/admin/create_widget.py
    
    CollectFieldSet:
      run:
        - script: scripts/web_demo/collect_fieldset.py
    ViewPageWidgets:
      run:
        - script: scripts/admin/view_page_widgets.py
  
    EditPageDialog:
      run:
        - script: scripts/admin/edit_page_dialog.py
  
    EditPageSave:
      run:
        - script: scripts/admin/create_page.py
  
    DeletePageDialog:
      run:
        - script: scripts/admin/delete_page_dialog.py
  
    DeletePageConfirmed:
      run:
        - script: scripts/admin/delete_page.py
  
    ClonePageDialog:
      run:
        - script: scripts/admin/clone_page_dialog.py
  
    ClonePage:
      run:
        - script: scripts/admin/clone_page.py
  
    EditWidgetDialog:
      run:
        - script: scripts/admin/create_widget_dialog.py
  
    EditWidgetSave:
      run:
        - script: scripts/admin/edit_widget_save.py
  
    DeleteWidgetDialog:
      run:
        - script: scripts/admin/delete_widget_dialog.py
  
    DeleteWidgetConfirmed:
      run:
        - script: scripts/admin/delete_widget.py
    
    #TODO(g): Later this may do more, for now it just creates pages...
    #TODO(g): Use DynamicRPC and get rid of these...
    ManagePages:
      run:
        - script: scripts/admin/create_page_dialog.py
  
    #TODO(g): Later this may do more, for now it just creates widgets...
    #TODO(g): Use DynamicRPC and get rid of these...
    ManageWidgets:
      run:
        - script: scripts/admin/create_widget_dialog.py
  
  
  


# State machine for this package
state machine:
  # Starting state for this package
  state: initial

  # These contexts will be available as the active state data, when they are set
  context:
    # Startup context
    initial:
      # Number of times this script has run or completed
      script run times: 0
      script completed times: 0
      
      # Script data for this context
      script:
        platform:
          # Cross platform run block
          xplat:
            - script: script/rem/initial.py
    
    # Active context
    active:
      # Number of times this script has run or completed
      script run times: 0
      script completed times: 0
      
      # Script data for this context
      script:
        platform:
          # Cross platform run block
          xplat:
            - script: script/rem/active.py
    
    # Shutdown context
    shutdown:
      # Number of times this script has run or completed
      script run times: 0
      script completed times: 0
      
      # Script data for this context
      script:
        platform:
          # Cross platform run block
          xplat:
            #TODO(g): Could send regional collectors information about the shutdown,
            #   including the NEW mother to use (if this is mother), and why this
            #   is being shut down, so they can log it, and know WTF is up...
            - script: script/rem/shutdown.py


## Public key of author, verify that this package was produced by the author
##TODO(g): ...
#pubkey: null
#
## Signed application data, SHA1 sum of things that matter...
##TODO(g): ...
#signed: null


#TODO(g): What to do about this thing?  GLOBAL!  Move it to global shared state!
## Sites that are served by this dropSTAR instance
#data:
#  time series path: /timeseries/


# These packages should be ON this machine, but arent MOUNTED by this package
#TODO(g): Can pass args into the required packages, for use in interfacing
#   with this package?  Could automate connectors or something, ways to
#   intreface or modify/filter data...
requires packages: {}
  
# Mount these packages
#TODO(g): Mount options, "mount as" for different package name, things
#   access packages by their names, so the default name is important, but a
#   "mount as" could provide an alternative way to use that package.
#   Also can provide a over-ride package handler, than the script it specifies
#   in the package...
#   Could attach additional monitors, or specify the logging target, and
#   shit like that...
#TODO(g): LOGGING TARGETS!  This is the right place to assign them, then
#   use them automatically in the ProcessBlock code, passed down with the
#   request_state information, or something.
mount packages:
  # The Package Mounter...  Really, in the package?  How does THIS package
  #   get mounted?  Seems like it needs to be a ProcessBlock type featuer...
  #   Or better yet, just IN a procbloc.
  MonitorPackage: {}
  WebStuff: {}


# Jobs are different than the state machine.  They can be run as cron, or
#   against the boolean result of a test-script (with cron), or can be
#   invoked over RPC, and their status/results/duration are associated with
#   the state machine, since they are made to operate on the state machine.
jobs:
  # Store monitor results in the "monitor.results.store" queue
  #NOTE(g): This allows us to separate processing I/O from other tasks, win!
  monitor_storage:
    platform:
      freebsd:
        - script: scripts/monitor/queue_storage.py
          interval: 5
      xplat:
        - script: scripts/monitor/queue_storage.py
          interval: 5
  
  
  #TODO(g): Change this to pulling data out of the "monitor.results.analyze" queue...
  alert_sla_monitoring:
    platform:
      xplat:
        - script: scripts/monitor/alert_sla.py
          interval: 5
  
  #TODO(g): Conditionally start SLA monitoring! (Test with global lock)
  alert_sla_outage_handler:
    platform:
      xplat:
        - script: scripts/monitor/alert_sla_outage.py
          interval: 30


#TODO(g): Move all this stuff into jobs or the state machine
#
##TODO(g): Conditionally start node monitors! (Test with global lock)
#run workers:
#  central_monitoring:
#    - script: scripts/monitor/node/node_monitor.py
#    
#    #TODO(g): Run workers needs to be reformatted...
#    #minimum: 5
#    #maximum: 20
#    #work:
#    #  - script: scripts/monitor/node/node_monitor.py
#
#
#run_simultaneous:
#  #TODO(g): Conditionally start web server? (Test with global lock)
#  #
#  #   Should CONDITIONALLY start this, if any package mounted has http/rpc stuff
#  httpd:
#    - script: scripts/httpd/__init__.py
#  
#  
#  #TODO(g): Run shared_state_sync automatically if any packages have
#  #   "load state" data...
#  
#  # Synchronize our shared state system, saving any defered writes
#  shared_state_sync:
#    - script: scripts/admin/shared_state_sync.py
#
#  
#  #TODO(g): Conditionally start SLA monitoring!  Uses the Job Scheduler...
#  


# --- Only load these for the REM central server: Mother ---


# Load sharedstate buckets, so that running state can have persistence
#NOTE(g):Specifies the path to load the state.  If a "%s" is present, then
#   parse the %s string for the key name, and store the contents of each
#   YAML file into a key in the bucket "__timeseries.ts".
#   If the "%s" were not present, it would simply load the contents of this
#   YAML file into the keys of YAML data.  If the YAML data was not in
#   dict format, then it would store the data wholey in the key "default".
#NOTE(g): When any of this data is changed, it will be automatically saved
#   back into the specified files, as the save files are registered against
#   the bucket names in "__sharedstate.save.registered" private bucket.
load state:
  
  # --- SPLIT: The below stuff is for the GUI/web system, and should be in a dropStar Suite of it's own! --- #
  
  # Load up each of the page widgets, into it's page.ui.widgets key (on page)
  ui.page.widgets: data/web/user_page_widgets/%s.yaml
  
  # User created pages
  ui.pages: data/web/user_pages.yaml
  
  # HTML Templates
  ui.templates: data/web/html_templates.yaml
  
  # Dynamic Widgets: Used to dynamically generate page content from complex
  #   comprehensive data descriptions
  ui.dynamic_widgets: data/web/dynamic_widgets/%s.yaml
  
  
  # --- SPLIT: GUI from REM stuff.  Above is GUI and should go into a DropStar Suite of it's own! --- #
  
  # Load up each of the monitors of each host (key is host's FQDN)
  monitors.hosts: data/monitor/hosts.yaml
  
  # Host Groups
  monitors.host_groups: data/monitor/host_groups.yaml
  
  # Monitoring: Alerts: key=alert name
  #NOTE(g): SLAs are stored in alert['sla'] = {}.  They are alert specific,
  #   so cannot be keyed on their own name in their own file, without effort.
  monitors.alerts: data/monitor/alerts.yaml
  
  # Monitoring: Roles: key=role name
  monitors.roles: data/monitor/roles.yaml
  
  # Monitoring: Contacts: key=contact user name
  monitors.contacts: data/monitor/contacts.yaml
  
  # Monitoring: Silences: dict of dicts (should be list of dicts)
  monitors.silences: data/monitor/silences.yaml
  
  # Monitoring: Shifts: key=shift name
  monitors.shifts: data/monitor/shifts.yaml
  
  # Monitoring: Shift Filters: key=shift filter name
  monitors.shift_filters: data/monitor/shift_filters.yaml
  
  # Monitoring: Outages: dict of dicts: Active outages
  monitors.outages: data/monitor/outages.yaml
  
  # Monitoring: Outage Groups: dict of dicts, Active Outage Groups
  monitors.outage_groups: data/monitor/outage_groups.yaml
  
  # Monitoring: Outage Groups: dict of dicts, Historical Outage Groups
  monitors.outage_groups.history: data/monitor/outage_groups_history.yaml
  
  # Monitoring: Outages: dict of dicts: Historical outages (completed)
  monitors.outages.history.unack: data/monitor/outages_history_unack.yaml
  
  # Monitoring: Outages: dict of dicts: Historical outages (completed)
  monitors.outages.history.ack: data/monitor/outages_history_ack.yaml
  
  # Monitoring: Notifications: dict of dicts: History of emails/SMSs/etc
  monitors.notifications: data/monitor/alert_notifications.yaml
  
  # Monitoring: Dashboard: dict of dicts
  monitors.dashboard: data/monitor/dashboard.yaml
  
  # Monitoring: Globals: dict of values
  monitors.globals: data/monitor/globals.yaml
  
  
  # Keep track of when we last rendered graphs, to ensure we dont do them
  #   all again immediately on restart
  #NOTE(g): If you're wondering why startup takes a lot of time, this is why.
  #TODO(g): Cant I remove this now?  Graphing is now client side, I dont think
  #   this is required anymore...  Even if we keep them, we dont need to save it
  #__timeseries.last_rendered: data/monitor/timeseries_last_rendered.yaml
  


#TODO(g): I kind of like this specification model, where I could give it a
#   full name, or use the path and key, but I have to explicitly save it.
#   This way there can be 10000s of locks in the system, but only the specified
#   ones are stored, and are done so by package that uses them.
#NODE(g): Use namespaces with dot separation to ensure package separation.
#   Not stomping on your data is up to you!
#TODO(g): Add test with warning/error if packages try to use the same variables
#   in their usage.
#load locks:
#   monitors.lock01: data/monitor/locks/%s.yaml
#   monitors.lock02: data/monitor/locks/%s.yaml
#   monitors.lock03: data/monitor/locks/%s.yaml

#TODO(g): Remove this one the individual lock method is done, this doesnt work
#   in a packaged environment...  Leaving temporarily only for design history.
#######TODO(g): Stored locks all the locks?
#######TODO(g): Does it ever make sense to store SOME locks?  SOME counters?
#######   It seems to me they shouldnt be that plentiful, and they, together, are
#######   the shared state.  Clear them if you want, but save them all for peace
#######   of mind that they can be trusted to come back.
#######TODO(g): Figure out the delays on saves, ESPECIALLY for counters, which
#######   by their definition are meant to change in a frequent manner, linear
#######   or exponential to requests
#######load locks: data/monitor/locks.yaml


# Load the stored counters
#NOTE(g): Being listed here, they will automatically be registered to be
#   save, without any delays, so these files should always be accurate in
#   terms of the latest counter values
#TODO(g):DESIGN: Switch this to just specify a directory?  Then the counters
#   can be specified in one line.  This way is just more typing...
#   Per file has the benefit of them working without any starting files,
#   and also not saving counters that are used in the code, but not specified
#   to be saved.  The method of one entry per counter is more explicit...
load counters:
  #TODO(g): Should these be moved into the controllers for this data?
  #   Maybe the rest of them should be too...
  monitors.outages: data/monitor/counters/monitors.outages
  monitors.outage_groups: data/monitor/counters/monitors.outage_groups
  monitors.notifications: data/monitor/counters/monitors.notifications

Initial URL


Initial Description
With comments, and not stripped, version in development.  Not Python, YAML.

Initial Title
REM Package Example

Initial Tags


Initial Language
Python