Revision: 31113
Updated Code
at August 30, 2010 07:42 by wware
Updated Code
"""Setting up a web service for sharing numerical data
This came up in the context of people doing quantified-self experiments
but I think it can be valuable in several contexts, if done well. General
principles:
(1) Data should be as self-documenting as possible, via semantic
markup or something similar.
(2) Simultaneously, there should be as few as possible a-priori
constraints on how data is formatted.
(3) Data should be human-readable, or easily rendered human-readable.
(4) There should be some provision for controlling access to data.
So I think it makes sense to offer data in a JSON format.
To support ease of understanding data semantically, data objects may
refer to a descriptor object. If they don't, they should contain their
own descriptive information.
To avoid confusion between different objects, every object with any
likelihood of persistence (the exception being a request template sent
to the web service) should include a randomly generated 128-bit UUID.
An alternative to the UUID would be a permanently-assigned URI, in
keeping with the Semantic Web way of doing things. It might make sense
that any object shared publicly would have a URI.
Privacy control can be done a few different ways. Obviously a server
could operate behind a firewall and the system could be set up to
prohibit data sharing beyond the firewall. But for data that can't
just live behind a firewall there should be some more nuanced
provision for controlling privacy.
A web service should allow users to:
(1) Post descriptor and data objects on the server.
(2) Specify who is allowed to fetch a particular object.
(a) Create groups of the users on the system.
(b) Use a set-union of groups and individual users to specify
who is allowed to fetch an object. (See example below, where
an individual is "Bob Smith" and a group is "weatherbuffs".)
(3) Fetch data and descriptor objects using a template.
"""
import json
import pprint
import datetime
import urllib
########### Some preliminaries ##########################
#
# As of this writing I don't yet have a server working.
# But the following shows how client code will access it
# when it exists.
SERVER_URL = 'http://127.0.0.1/data-server/'
def httpPostRequest(params):
f = urllib.urlopen(SERVER_URL, urllib.urlencode(params))
data = f.read()
f.close()
return json.loads(data)
def httpGetRequest(params):
f = urllib.urlopen(SERVER_URL + '?' + urllib.urlencode(params))
data = f.read()
f.close()
return json.loads(data)
HOUR = datetime.timedelta(hours=1)
########### Descriptors and data #####################
def generateId():
import hashlib
import random
h = hashlib.sha1()
h.update(repr(random.random()))
return h.hexdigest()
descriptor = {
'id': generateId(), # a URI could also work here
'summary': 'A sequence of temperature samples in time',
'creator': 'Will Ware <[email protected]>',
'format': {
'units': 'fahrenheit',
'period': 1, # seconds
},
}
now = datetime.datetime.now() # how to handle time zones? UTC?
data = {
'timestamp': now.strftime('%Y/%m/%d %H:%M:%S.%f'),
'latitude': 42.0,
'longitude': -71.0,
'id': generateId(),
'descriptor': descriptor['id'],
'creator': 'Will Ware <[email protected]>',
'experiment': 'Outdoor temperature near Will\'s house',
'summary': 'Anything summary-wise more specific than the descriptor',
'samples': [
82.0, 85.0, 83.0, 84.0, 86.0
],
'visible-to': [
'Bob Smith', 'weatherbuffs'
]
}
pprint.pprint(descriptor)
print
pprint.pprint(data)
if False:
# submit a descriptor object
httpPostRequest(descriptor)
# submit a data object
httpPostRequest(data)
#################################################
# Here is a template for fetching all data objects less than two hours
# old, created by Will, and using the descriptor appearing above.
template = {
'timestamp__gt': # server will probably be written in Django
(now - 2 * HOUR).strftime('%Y/%m/%d %H:%M:%S.%f'),
'creator': 'Will Ware <[email protected]>',
'descriptor': descriptor['id'],
}
print
pprint.pprint(template)
if False:
print httpGetRequest(template)
Revision: 31112
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 30, 2010 07:36 by wware
Initial Code
import json
import pprint
import datetime
import urllib
########### Some preliminaries ##########################
#
# As of this writing I don't yet have a server working.
# But the following shows how client code will access it
# when it exists.
SERVER_URL = 'http://127.0.0.1/data-server/'
def httpPostRequest(params):
f = urllib.urlopen(SERVER_URL, urllib.urlencode(params))
data = f.read()
f.close()
return json.loads(data)
def httpGetRequest(params):
f = urllib.urlopen(SERVER_URL + '?' + urllib.urlencode(params))
data = f.read()
f.close()
return json.loads(data)
HOUR = datetime.timedelta(hours=1)
########### Descriptors and data #####################
def generateId():
import hashlib
import random
h = hashlib.sha1()
h.update(repr(random.random()))
return h.hexdigest()
descriptor = {
'id': generateId(), # a URI could also work here
'summary': 'A sequence of temperature samples in time',
'creator': 'Will Ware <[email protected]>',
'format': {
'units': 'fahrenheit',
'period': 1, # seconds
},
}
now = datetime.datetime.now() # how to handle time zones? UTC?
data = {
'timestamp': now.strftime('%Y/%m/%d %H:%M:%S.%f'),
'latitude': 42.0,
'longitude': -71.0,
'id': generateId(),
'descriptor': descriptor['id'],
'creator': 'Will Ware <[email protected]>',
'experiment': 'Outdoor temperature near Will\'s house',
'summary': 'Anything summary-wise more specific than the descriptor',
'samples': [
82.0, 85.0, 83.0, 84.0, 86.0
],
'visible-to': [
'Bob Smith', 'weatherbuffs'
]
}
pprint.pprint(descriptor)
print
pprint.pprint(data)
if False:
# submit a descriptor object
httpPostRequest(descriptor)
# submit a data object
httpPostRequest(data)
#################################################
# Here is a template for fetching all data objects less than two hours
# old, created by Will, and using the descriptor appearing above.
template = {
'timestamp__gt': # server will probably be written in Django
(now - 2 * HOUR).strftime('%Y/%m/%d %H:%M:%S.%f'),
'creator': 'Will Ware <[email protected]>',
'descriptor': descriptor['id'],
}
print
pprint.pprint(template)
if False:
print httpGetRequest(template)
Initial URL
http://edison.thinktrylearn.com/experiments/show/198
Initial Description
Initial Title
Puttering with a proposal for a web API for numerical data sharing
Initial Tags
Initial Language
Python