Skip to content

Proper procedure for importing a custom rule to detect social security #'s #274

@munntjlx

Description

@munntjlx

I have been trying to add a new custom rule, below: I was wondering if there is a 'negative' aka if it contains this regex DONT match? Since there are certain type of #'s that are invalid. We tried putting it in 'custom' rules directory and then compiling, as well as in the 'default' rules. It gets logged to the sqlite database, but the reporting seems to have an 'off by one' error since we added a new rule. I suspect the indeces are misaligned. Is there a good process for referencing custom rules (RTFM) or something?

- name: US Social Security Number (Basic)
  id: np.ssn.1
  pattern: |
    (?x)
    \b
    \d{3}         # Area number
    -
    \d{2}         # Group number
    -
    \d{4}         # Serial number
    \b

This one works, but has the misalignment problem, and the following errors when trying to run a report (when we put in default rules and re-compile)

The application panicked (crashed).
Message:  index out of bounds: the len is 0 but the index is 0
Location: crates/noseyparker-cli/src/cmd_report/human_format.rs:85
Run with COLORBT_SHOW_HIDDEN=1 environment variable to disable frame filtering.
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ BACKTRACE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
                              ⋮ 14 frames hidden ⋮                              
15: core::panicking::panic_bounds_check::he315898ba5b8216d
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/panicking.rs:273
16: index<noseyparker::match_type::Group>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/slice/index.rs:274
17: index<noseyparker::match_type::Group, usize>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/slice/index.rs:16
18: index<[noseyparker::match_type::Group; 1], usize>
    at /Users/shahj3/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/smallvec-1.15.0/src/lib.rs:2059
    2057 │ 
    2058 │     fn index(&self, index: I) -> &I::Output {
    2059 >         &(**self)[index]
    2060 │     }
    2061 │ }
19: fmt
    at /Users/shahj3/src/POCs/noseyparker/crates/noseyparker-cli/src/cmd_report/human_format.rs:85
      83 │         } else {
      84 │             let group_heading = reporter.style_heading("Group:".into());
      85 >             write_group(group_heading, &gs[0])?;
      86 │         }
      87 │ 
20: core::fmt::rt::Argument::fmt::hc0b28dad2d7b7ba8
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/fmt/rt.rs:184
21: core::fmt::write::hbc92919d8e8f9a96
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/fmt/mod.rs:1481
22: default_write_fmt<std::io::buffered::bufwriter::BufWriter<std::io::stdio::Stdout>>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/io/mod.rs:639
23: write_fmt<dyn std::io::Write>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/io/impls.rs:227
24: human_format<alloc::boxed::Box<dyn std::io::Write, alloc::alloc::Global>>
    at /Users/shahj3/src/POCs/noseyparker/crates/noseyparker-cli/src/cmd_report/human_format.rs:17
      15 │                 self.style_id(&finding.metadata.finding_id),
      16 │             )?;
      17 >             writeln!(&mut writer, "{}", PrettyFinding(self, &finding))?;
      18 │         }
      19 │         Ok(())
25: report<alloc::boxed::Box<dyn std::io::Write, alloc::alloc::Global>>
    at /Users/shahj3/src/POCs/noseyparker/crates/noseyparker-cli/src/cmd_report.rs:195
     193 │     fn report<W: std::io::Write>(&self, format: Self::Format, writer: W) -> Result<()> {
     194 │         match format {
     195 >             ReportOutputFormat::Human => self.human_format(writer),
     196 │             ReportOutputFormat::Json => self.json_format(writer),
     197 │             ReportOutputFormat::Jsonl => self.jsonl_format(writer),
26: run
    at /Users/shahj3/src/POCs/noseyparker/crates/noseyparker-cli/src/cmd_report.rs:72
      70 │         styles,
      71 │     };
      72 >     reporter.report(args.output_args.format, output)
      73 │ }
      74 │ 
27: try_main
    at /Users/shahj3/src/POCs/noseyparker/crates/noseyparker-cli/src/main.rs:118
     116 │         args::Command::Scan(args) => cmd_scan::run(global_args, args),
     117 │         args::Command::Summarize(args) => cmd_summarize::run(global_args, args),
     118 >         args::Command::Report(args) => cmd_report::run(global_args, args),
     119 │         args::Command::Annotations(args) => cmd_annotations::run(global_args, args),
     120 │         args::Command::Generate(args) => cmd_generate::run(global_args, args),
28: main
    at /Users/shahj3/src/POCs/noseyparker/crates/noseyparker-cli/src/main.rs:126
     124 │ fn main() {
     125 │     let args = &CommandLineArgs::parse_args();
     126 >     if let Err(e) = try_main(args) {
     127 │         // Use the more verbose format that includes a backtrace when running with -vv or higher,
     128 │         // otherwise use a more compact one-line error format.
29: call_once<fn(), ()>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/ops/function.rs:250
30: __rust_begin_short_backtrace<fn(), ()>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/sys/backtrace.rs:152
31: {closure#0}<()>
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/rt.rs:199
32: core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::h05562602ade80278
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/core/src/ops/function.rs:284
33: std::panicking::try::do_call::ha5656d49188e7f80
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/panicking.rs:589
34: std::panicking::try::h65274237ad573fed
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/panicking.rs:552
35: std::panic::catch_unwind::hfba9e8aea6ea1d44
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/panic.rs:359
36: std::rt::lang_start_internal::{{closure}}::ha5e635a952a4146a
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/rt.rs:168
37: std::panicking::try::do_call::h5c24223eef24357f
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/panicking.rs:589
38: std::panicking::try::h5942a7c8254326a3
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/panicking.rs:552
39: std::panic::catch_unwind::h9b9d0c41a72d924b
    at /rustc/17067e9ac6d7ecb70e50f92c1944e545188d2359/library/std/src/panic.rs:359
40: std::rt::lang_start_internal::hdff9e551ec0db2ea

The original with the negative examples (YAML)

rules:

- name: US Social Security Number (Basic)
  id: np.ssn.1
  pattern: |
    (?x)
    \b
    \d{3}         # Area number
    -
    \d{2}         # Group number
    -
    \d{4}         # Serial number
    \b
  examples:
    - "123-45-6789"
    - "078-05-1120"
  negative_examples:
    - "000-12-3456"
    - "666-45-6789"
    - "900-12-3456"
    - "123-00-6789"
    - "123-45-0000"
  categories: [identifier, fuzzy]
  description: >
    A U.S. Social Security Number (SSN) was detected. SSNs are sensitive personal identifiers
    and should be protected to prevent identity theft or unauthorized use.
  references:
    - https://www.ssa.gov/kc/SSAFactSheet--IssuingSSNs.html

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions