defining system data structures

Post Reply
User avatar

Topic author
imiller
Master
Posts: 151
Joined: Fri Jun 28, 2019 8:45 am
Reputation: 0
Location: South Tyneside, UK
Status: Offline
Contact:

defining system data structures

Post by imiller » Fri Apr 26, 2024 4:44 am

when dabbling in lua I wrote the following little thing that reports ages of files on a disk.
As you can see it defines various file system data structures and uses those definitions when scanning indexf.sys
.
How would I do definitions like this in python?

Code: Select all

--
-- file_report - scans all file headers and gives histograms of revision and backup dates
--

RMS=require("vms.rmsdef")
rms=require("vms.rms")
sys=require("vms.sys")
rec=require("vms.rec")
DSC=require("vms.descrip")
PRV=require("vms.prvdef")
SS=require("vms.ssdef")

local diskname = ""

-- getmsg - get the error message for a condition value or return an empty string
--
function getmsg(msgid)
    sts, msg, opt = sys.getmsg(msgid, 0)
    if sts == SS._NORMAL then
        return (msg)
    else
        return ""
    end
end

-- idtoasc
function idtoasc(id)
    local ctxt = 0
    sts, nambuf, resid, attrib, ctxt = sys.idtoasc(id, ctxt)
    ret = sys.finish_rdb(ctxt);

    if (sts & 1) ~= 0 then
        return (nambuf)
    else
        return ""
    end
end

-- daysb4
--  how many days before now is the specified VMS time
function daysb4(time)

    junk, now = sys.gettim()
    sts, time_since = sys.asctim(time-now,0)            -- convert time to string delta time before now
    if (sts & 1) ~= 0 then
        days_since = 0 + string.match(time_since,"%d+")     -- get the days part
        return (days_since)
    else
        return 99999    -- if the times are more than 9999 days apart then that's not a valid delta time.
    end
end

-- spairs
-- interate over a table in order
-- [ found on the lua help web site ]
function spairs(t)
    -- collect the keys
    local keys = {}
    for k in pairs(t) do keys[#keys+1] = k end

    table.sort(keys)

    -- return the iterator function
    local i = 0
    return function()
        i = i + 1
        if keys[i] then
            return keys[i], t[keys[i]]
        end
    end
end

function display_histogram(table)

-- determine the total to enable the calculation of %
    total = 0
    for k,v in pairs(table) do
        total = total + v
    end

    for k,v in spairs(table) do
        print(string.format("%5d : %2.1f%%",k,100.0*(v/total)))
    end
end

function update_histogram(days,table)
    for k,v in spairs(table) do
        if days <= k then
                table[k] = table[k] + 1
                break
        end
    end
end

-- define the fixed parts of a file header
local r = rec.new()
rec.addint(r, DSC.K_DTYPE_BU,0, 0)      -- 0)  fh2$b_idoffset   - ident area offset in words
rec.addint(r, DSC.K_DTYPE_BU,0, 1)      -- 1)  fh2$b_mpoffset   - map area offset in words
rec.addint(r, DSC.K_DTYPE_BU,0, 2)      -- 2)  fh2$b_acoffset   -
rec.addint(r, DSC.K_DTYPE_BU,0, 3)      -- 3)  fh2$b_rsoffset   -
rec.addint(r, DSC.K_DTYPE_BU,0, 6)      -- 4)  fh2$b_strucver   - file structure version
rec.addint(r, DSC.K_DTYPE_BU,0, 7)      -- 5)  fh2$b_struclev   - file structure level
rec.addint(r, DSC.K_DTYPE_WU,0, 8)      -- 6)  fh2$w_fid_num    - fid number
rec.addint(r, DSC.K_DTYPE_WU,0, 10)     -- 7)  fh2$w_fid_seq    - fid seq
rec.addint(r, DSC.K_DTYPE_WU,0, 12)     -- 8)  fh2$w_fid_rvn    - fid rvn
rec.addint(r, DSC.K_DTYPE_WU,0, 20)     -- 9)  fh2$w_recattr    - record attributes
rec.addint(r, DSC.K_DTYPE_LU,0, 52)     -- 10) fh2$l_filechar   - file characteristics
rec.addint(r, DSC.K_DTYPE_WU,0, 56)     -- 11) fh2$w_recprot
rec.addint(r, DSC.K_DTYPE_BU,0, 58)     -- 12) fh2$b_map_inuse  - number of words in use in map area
rec.addint(r, DSC.K_DTYPE_WU,0, 60)     -- 13) fh2$w_uicmember  - file owner uic member
rec.addint(r, DSC.K_DTYPE_WU,0, 62)     -- 14) fh2$w_uicgroup   - file owner uic group
rec.addint(r, DSC.K_DTYPE_WU,0, 64)     -- 15) fh2$2_fileprot   - file protection
rec.addint(r, DSC.K_DTYPE_LU,0, 76)     -- 16) fh2$l_highwater  - file highwater mark
rec.addint(r, DSC.K_DTYPE_WU,0, 510)    -- 17) fh2$l_checksum   - file header checksum
rec.addint(r, DSC.K_DTYPE_WU,0, 4)      -- 18) fh2$w_seg_num    - fille extension segment number. >0 in Extension headers

--

local sts
local fp
local revhistogram = {[0]=0,[7]=0,[30]=0,[365]=0,[1000]=0,[5000]=0,[8000]=0,[10000]=0,[99999]=0}
local bakhistogram = {[0]=0,[1]=0,[2]=0,[3]=0,[4]=0,[5]=0,[6]=0,[7]=0,[31]=0,[365]=0,[10000]=0,[99999]=0}

--
-- main
--

-- if an argument was supplied then use it as the name of the disk to report on
if arg and #arg > 0 then
    diskname = arg[1]
else
-- ask for the disk name
    print("Name of disk device to report on?")
    diskname = io.stdin:read()
end

print("reporting on",diskname.."[000000]INDEXF.SYS")

-- Open the index file to scan the file headers - shared read - important not to lock it
sts, fp = rms.open(diskname.."[000000]INDEXF.SYS", null, "g", "u")

if sts ~= RMS._NORMAL then
   print(getmsg(sts))
   os.exit() -- give up
end

-- sequentially read through the index file until the end

local ptr
local len
local rn = 0
local rv = 0

sts, rbuf, rlen = rms.get(fp, nil)
while sts == RMS._NORMAL
do
    rec.load(r,rbuf,rlen)       -- load the record buffer into the fixed part of the file header structure
    rn = rn + 1

    idoffset = rec.getint(r,0)
    mpoffset = rec.getint(r,1)
    acoffset = rec.getint(r,2)
    rsoffset = rec.getint(r,3)
    structver = rec.getint(r,4)
    structlev = rec.getint(r,5)
    fid_num = rec.getint(r,6)
    fid_seq = rec.getint(r,7)
    fid_rvn = rec.getint(r,8)
    recattr = rec.getint(r,9)
    filechar = rec.getint(r,10)
    map_inuse = rec.getint(r,12)
    uic_mem = rec.getint(r,13)
    uic_grp = rec.getint(r,14)

    seg_num = rec.getint(r,18)

--  is a valid file header?
    invalid_header = 0 -- reasons the header is bad

    if seg_num > 0 then
        invalid_header  = invalid_header + 1    -- it's an extension header so not interesting
    end
    if (filechar & 0x8000) ~= 0 then    -- if MARKDEL flag set
        invalid_header  = invalid_header + 1    -- don't bother with deleted files
    end
    if idoffset < 30 then
        invalid_header = invalid_header + 1
    end
    if idoffset > mpoffset then
        invalid_header = invalid_header + 1
    end
    if acoffset > 0 then
        if mpoffset > acoffset then
            invalid_header = invalid_header + 1
        end
        if acoffset > rsoffset then
            invalid_header = invalid_header + 1
        end
    end
    if (structlev ~= 2) and (structlev ~= 5) then
        invalid_header = invalid_header + 1
    end
    if structver < 1 then
        invalid_header = invalid_header + 1
    end
    if fid_num == 0 then
        invalid_header =invalid_header + 1
    end

    if invalid_header == 0 then
--      valid file header
        rv = rv + 1

--*     print(string.format("file header %d, structver %d, structlvl %d, fid (%d,%d), uic [%o,%o] map inuse %d idoffset %d",
--*             rn, structver, structlev, fid_num, fid_seq, uic_grp, uic_mem, map_inuse, idoffset))

--*     print(string.format("uic [%o,%o] [%s]",uic_grp, uic_mem, idtoasc(uic_grp*65535 + uic_mem)))

-- now define the id section which is idoffset*2 bytes from the start of the record
        if (structlev == 2) then

--          ODS2

            id2 = rec.new()
            rec.addstr(id2, null,          20,  0+idoffset*2)   -- 0) fi2$t_filename    - filename
            rec.addint(id2, DSC.K_DTYPE_WU, 0, 20+idoffset*2)   -- 1) fi2$w_revision    - file header revision count
            rec.addint(id2, DSC.K_DTYPE_QU, 0, 22+idoffset*2)   -- 2) fi2$q_credate     - file creation date
            rec.addint(id2, DSC.K_DTYPE_QU, 0, 30+idoffset*2)   -- 3) fi2$q_revdate     - file revision date
            rec.addint(id2, DSC.K_DTYPE_QU, 0, 30+idoffset*2)   -- 4) fi2$q_expdate     - file expiration date
            rec.addint(id2, DSC.K_DTYPE_QU, 0, 46+idoffset*2)   -- 5) fi2$q_bakdate     - file backup date

            rec.load(id2,rbuf,rlen) -- load the data from the records into the ODS2 ID Header structure

            filename = rec.getstr(id2,0)
            revision = rec.getint(id2,1)
            credate = rec.getint(id2,2)
--*         sts2, credate_s = sys.asctim(credate,0)
            revdate = rec.getint(id2,3)
--*         sts2, revdate_s = sys.asctim(revdate,0)
            expdate = rec.getint(id2,4)
--*         sts2, expdate_s = sys.asctim(expdate,0)
            bakdate = rec.getint(id2,5)
--*         sts2, bakdate_s = sys.asctim(bakdate,0)

--*         print(string.format("filename %20.20s revision %d credate %s revdate %s expdate %s bakdate %s",
--*             filename,revision,credate_s,revdate_s,expdate_s,bakdate_s))

            revdateb4 = daysb4(revdate)
--*         print(string.format("filename %20.20s revdate %s days %s",filename,revdate_s,revdateb4))

            update_histogram(revdateb4,revhistogram)

            bakdateb4 = daysb4(bakdate)
--*         print(string.format("filename %20.20s bakdate %s days %s",filename,bakdate_s,bakdateb4))

            update_histogram(bakdateb4,bakhistogram)

            rec.delete(id2)

        elseif (structlev == 5) then
-- ODS5
            id5 = rec.new()
            rec.addint(id5, DSC.K_DTYPE_BU, 0,  0+idoffset*2) -- 0) fi5$b_control
            rec.addint(id5, DSC.K_DTYPE_BU, 0,  1+idoffset*2) -- 1) fi5$b_namelen
            rec.addint(id5, DSC.K_DTYPE_WU, 0,  2+idoffset*2) -- 2) fi5$b_revision
            rec.addint(id5, DSC.K_DTYPE_QU, 0,  4+idoffset*2) -- 3) fi5$q_credate    - file creation date
            rec.addint(id5, DSC.K_DTYPE_QU, 0, 12+idoffset*2) -- 4) fi5$q_revdate    - file revision date
            rec.addint(id5, DSC.K_DTYPE_QU, 0, 20+idoffset*2) -- 5) fi5$q_expdate    - file expiration date
            rec.addint(id5, DSC.K_DTYPE_QU, 0, 28+idoffset*2) -- 6) fi5$q_bakdate    - file backup date
            rec.addint(id5, DSC.K_DTYPE_QU, 0, 36+idoffset*2) -- 7) fi5$q_accdate    - file access date - 0 if not enabled
            rec.addint(id5, DSC.K_DTYPE_QU, 0, 44+idoffset*2) -- 8) fi5$q_attdate    - file attributes update date
            FI5_S_FILENAME = 44
            rec.addstr(id5, null, FI5_S_FILENAME, 76+idoffset*2) --  9) fi5$t_filename
            rec.addstr(id5, null, 204,  120+idoffset*2)          -- 10) fi5$t_filenamext

            rec.load(id5,rbuf,rlen)     -- load the data into the ODS5 ID Header structure

            control = rec.getint(id5, 0)
            nametype = control & 3

            if (control & 4) == 1 then
                fixed_length = true
            else
                fixed_length = false
            end

            namelen = rec.getint(id5, 1)

            if namelen > FI5_S_FILENAME then
                filename = rec.getstr(id5,9) .. string.sub(rec.getstr(id5,10),1,namelen - FI5_S_FILENAME)
            else
                filename = rec.getstr(id5,9)
            end

            revision = rec.getint(id5,2)
            credate = rec.getint(id5,3)
--*         sts2, credate_s = sys.asctim(credate,0)
            revdate = rec.getint(id5,4)
            attdate = rec.getint(id5,8)
            if attdate > revdate then   -- if file attributes updated later than file contents
                revdate = attdate       -- use the newer date for revdate
            end
--*         sts2, revdate_s = sys.asctim(revdate,0)
            bakdate = rec.getint(id5,6)
--*         sts2, bakdate_s = sys.asctim(bakdate,0)

--*         print(string.format("filename %20.20s revision %d credate %s revdate %s bakdate %s",
--*             filename,revision,credate_s,revdate_s,bakdate_s))

            revdateb4 = daysb4(revdate)
--*         print(string.format("filename %20.20s revdate %s days %s",filename,revdate_s,revdateb4))

            update_histogram(revdateb4,revhistogram)

            bakdateb4 = daysb4(bakdate)
--*         print(string.format("filename %20.20s bakdate %s days %s",filename,bakdate_s,bakdateb4))

            update_histogram(bakdateb4,bakhistogram)

            rec.delete(id5)
        end
    end
    sts, rbuf, rlen = rms.get(fp, r) -- next
end
-- end of main loop

-- report if unexpected error
if sts ~= RMS._NORMAL and sts ~= RMS._EOF then
   print(getmsg(sts))
end

-- Free up some memory used by the record structure
rec.delete(r)

-- Close the file
sts = rms.close(fp);
if sts ~= RMS._NORMAL then
   print(getmsg(sts))
end

print(string.format("File date report for %s with %d headers read and %d headers found valid ",diskname,rn,rv))

print("revision date histogram\nDays : filecount")
display_histogram(revhistogram)

print("backup date histogram\nDays : filecount")
display_histogram(bakhistogram)
Ian Miller
[ personal opinion only. usual disclaimers apply. Do not taunt happy fun ball ].

User avatar

arne_v
Master
Posts: 356
Joined: Fri Apr 17, 2020 7:31 pm
Reputation: 0
Location: Rhode Island, USA
Status: Offline
Contact:

Re: defining system data structures

Post by arne_v » Fri Apr 26, 2024 9:26 am

First you need the definitions.

I assume that the Lua porter (Hoff I believe) has added those as part of the port.

If you are lucky then the VMS Python also got the definitions - otherwise you will need to add them yourself.

And then you need to read using the definitions.

Two different approaches:
1) read the file as seekable binary strem and use offsets and lengths to pick the fields out
2) somehow define a native struct and use that to read records

#1 should be pretty straight forward also in Python.

#2 is more tricky - I believe that CPython has a construct module with a Struct type that can be used - I know how to do it in Jython.

Sorry for the somewhat vague answer.

If you tell me a little more about what you want and which platform and which Python then I may try and produce an actual example.

Added in 4 hours 33 minutes 6 seconds:
To find the values then they should be in SYS$LIBRARY:LIB.MLB and there are descriptions in "File System Internals".
Arne
arne@vajhoej.dk
VMS user since 1986

User avatar

Topic author
imiller
Master
Posts: 151
Joined: Fri Jun 28, 2019 8:45 am
Reputation: 0
Location: South Tyneside, UK
Status: Offline
Contact:

Re: defining system data structures

Post by imiller » Mon Apr 29, 2024 9:34 am

being a Plain Old C programmer then the Struct module looks like the way to go for me.
I know how to dig the definitions out of lib.mlb and starlet.mlb as necessary.

I'm trying to get the hang of python. It's a higher level language than I'm used to :)
Ian Miller
[ personal opinion only. usual disclaimers apply. Do not taunt happy fun ball ].

User avatar

arne_v
Master
Posts: 356
Joined: Fri Apr 17, 2020 7:31 pm
Reputation: 0
Location: Rhode Island, USA
Status: Offline
Contact:

Re: defining system data structures

Post by arne_v » Mon Apr 29, 2024 9:47 pm

Just for fun I made a quick try.

I am using the construct module, which is part of the old JFP Python. It should work fine on Python 3.10 but I can not get pip working on VMS x86-64.

Code: Select all

from construct import *

home = Struct('home', Bytes('other1', 14),
                      SNInt16('cluster'),
                      Bytes('other2', 16),
                      SNInt16('ibmapsize'),
                      Bytes('other3', 478))

filehdr = Struct('filehdr', Bytes('other1', 6),
                            SNInt8('strucLevMinor'),
                            SNInt8('strucLevMajor'),
                            Bytes('other2', 72))

identarea2 = Struct('identarea2', String('fileName', 20),
                                  Bytes('other', 34),
                                  String('fileNameExt', 66))

identarea5 = Struct('identarea5', SNInt8('other1'),
                                  SNInt8('nameLen'),
                                  Bytes('other2', 74),
                                  String('fileName', 44),
                                  String('fileNameExt', 204))

f = open('/disk2/000000/indexf.sys', 'rb')
bootrec = f.read(512)
homerec = f.read(512)
homeblk = home.parse(homerec)
startfiles = 4 * homeblk.cluster + homeblk.ibmapsize
for i in range(startfiles - 2):
    skiprec = f.read(512)
n = 0
while True:
    frec = f.read(512)
    if not frec:
        break
    fhdr = filehdr.parse(frec[0:80])
    if fhdr.strucLevMajor == 2:
        ia2 = identarea2.parse(frec[80:200])
        fnm = (ia2.fileName + ia2.fileNameExt).rstrip()
        print(fnm)
    if fhdr.strucLevMajor == 5:
        ia5 = identarea5.parse(frec[80:404])
        fnm = (ia5.fileName + ia5.fileNameExt)[:ia5.nameLen]
        print(fnm)
    n = n + 1
print('%d files' % (n))
f.close()
Note that:
* I have not defined all fields - lots of work to be done
* I do not test for invalid file headers or deleted files
* idoffset is assumed to be 80 always

So not a complete solution - just a demo.
Arne
arne@vajhoej.dk
VMS user since 1986


sergey_vorfolomeev
VSI Expert
Master
Posts: 102
Joined: Thu Aug 22, 2019 12:17 am
Reputation: 0
Status: Offline

Re: defining system data structures

Post by sergey_vorfolomeev » Tue Apr 30, 2024 4:52 am


User avatar

Topic author
imiller
Master
Posts: 151
Joined: Fri Jun 28, 2019 8:45 am
Reputation: 0
Location: South Tyneside, UK
Status: Offline
Contact:

Re: defining system data structures

Post by imiller » Tue Apr 30, 2024 4:58 am

thanks,
I shall have a play with that
Ian Miller
[ personal opinion only. usual disclaimers apply. Do not taunt happy fun ball ].

User avatar

arne_v
Master
Posts: 356
Joined: Fri Apr 17, 2020 7:31 pm
Reputation: 0
Location: Rhode Island, USA
Status: Offline
Contact:

Re: defining system data structures

Post by arne_v » Fri May 03, 2024 2:46 pm

If anyone like Java/Groovy/Jython/Scala/Kotlin then I have INDEXF record definitions that are complete for file header and ident area (but not for map areas).

https://www.vajhoej.dk/arne/opensource/vms/

vmsindexf*.zip

Added in 5 hours 17 minutes 5 seconds:
sergey_vorfolomeev wrote:
Tue Apr 30, 2024 4:52 am
Python struct is suitable:

https://docs.python.org/3/library/struct.html
True.

The same code as for the construct example:

Code: Select all

from struct import *
from collections import namedtuple

homefmt = '14sh16sh478s'
homeclz = namedtuple('home', 'other1,cluster,other2,ibmapsize,other3')
filehdrfmt = '6sbb72s'
filehdrclz = namedtuple('filehdr', 'other1,strucLevMinor,strucLevMajor,other2')
identarea2fmt = '20s34s66s'
identarea2clz = namedtuple('identarea2', 'fileName,other,fileNameExt')
identarea5fmt = 'bb74s44s204s'
identarea5clz = namedtuple('identarea5', 'other1,nameLen,other2,fileName,fileNameExt')

f = open('/disk2/000000/indexf.sys', 'rb')
bootrec = f.read(512)
homerec = f.read(512)
homeblk = homeclz._make(unpack(homefmt, homerec))
startfiles = 4 * homeblk.cluster + homeblk.ibmapsize
for i in range(startfiles - 2):
    skiprec = f.read(512)
n = 0
while True:
    frec = f.read(512)
    if not frec:
        break
    fhdr = filehdrclz._make(unpack(filehdrfmt, frec[0:80]))
    if fhdr.strucLevMajor == 2:
        ia2 = identarea2clz._make(unpack(identarea2fmt, frec[80:200]))
        fnm = (ia2.fileName + ia2.fileNameExt).decode('iso-8859-1').rstrip()
        print(fnm)
    if fhdr.strucLevMajor == 5:
        ia5 = identarea5clz._make(unpack(identarea5fmt, frec[80:404]))
        fnm = (ia5.fileName + ia5.fileNameExt).decode('iso-8859-1')[:ia5.nameLen]
        print(fnm)
    n = n + 1
print('%d files' % (n))
f.close()
Added in 6 minutes 47 seconds:
Builtin is obviously better than a module that need to be installed via pip.

But I do think that the construct code is more readable.
Arne
arne@vajhoej.dk
VMS user since 1986

Post Reply